aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/README77
-rw-r--r--Documentation/ABI/obsolete/devfs13
-rw-r--r--Documentation/ABI/stable/syscalls10
-rw-r--r--Documentation/ABI/stable/sysfs-module30
-rw-r--r--Documentation/ABI/testing/sysfs-class16
-rw-r--r--Documentation/ABI/testing/sysfs-devices25
-rw-r--r--Documentation/Changes15
-rw-r--r--Documentation/CodingStyle100
-rw-r--r--Documentation/DocBook/Makefile3
-rw-r--r--Documentation/DocBook/genericirq.tmpl474
-rw-r--r--Documentation/DocBook/kernel-api.tmpl58
-rw-r--r--Documentation/DocBook/kernel-locking.tmpl2
-rw-r--r--Documentation/DocBook/libata.tmpl104
-rw-r--r--Documentation/DocBook/mtdnand.tmpl17
-rw-r--r--Documentation/DocBook/videobook.tmpl2
-rw-r--r--Documentation/IPMI.txt4
-rw-r--r--Documentation/IRQ.txt22
-rw-r--r--Documentation/RCU/checklist.txt44
-rw-r--r--Documentation/RCU/torture.txt34
-rw-r--r--Documentation/RCU/whatisRCU.txt13
-rw-r--r--Documentation/README.DAC9606
-rw-r--r--Documentation/SubmitChecklist57
-rw-r--r--Documentation/arm/IXP4xx2
-rw-r--r--Documentation/arm/Samsung-S3C24XX/Overview.txt35
-rw-r--r--Documentation/arm/Samsung-S3C24XX/S3C2412.txt120
-rw-r--r--Documentation/arm/Samsung-S3C24XX/S3C2413.txt21
-rw-r--r--Documentation/arm/Sharp-LH/ADC-LH7-Touchscreen61
-rw-r--r--Documentation/arm/Sharp-LH/LCDPanels59
-rw-r--r--Documentation/atomic_ops.txt28
-rw-r--r--Documentation/console/console.txt144
-rw-r--r--Documentation/devices.txt140
-rw-r--r--Documentation/digiepca.txt2
-rw-r--r--Documentation/driver-model/overview.txt2
-rw-r--r--Documentation/fb/fbcon.txt180
-rw-r--r--Documentation/feature-removal-schedule.txt112
-rw-r--r--Documentation/filesystems/Locking9
-rw-r--r--Documentation/filesystems/automount-support.txt2
-rw-r--r--Documentation/filesystems/configfs/configfs_example.c19
-rw-r--r--Documentation/filesystems/devfs/ChangeLog1977
-rw-r--r--Documentation/filesystems/devfs/README1959
-rw-r--r--Documentation/filesystems/devfs/ToDo40
-rw-r--r--Documentation/filesystems/devfs/boot-options65
-rw-r--r--Documentation/filesystems/ext3.txt8
-rw-r--r--Documentation/filesystems/fuse.txt118
-rw-r--r--Documentation/filesystems/inotify.txt130
-rw-r--r--Documentation/filesystems/porting7
-rw-r--r--Documentation/filesystems/ramfs-rootfs-initramfs.txt146
-rw-r--r--Documentation/filesystems/vfs.txt6
-rw-r--r--Documentation/hwmon/abituguru59
-rw-r--r--Documentation/hwmon/abituguru-datasheet312
-rw-r--r--Documentation/hwmon/lm7031
-rw-r--r--Documentation/hwmon/lm8317
-rw-r--r--Documentation/hwmon/smsc47m192102
-rw-r--r--Documentation/hwmon/sysfs-interface274
-rw-r--r--Documentation/hwmon/userspace-tools17
-rw-r--r--Documentation/hwmon/w83791d113
-rw-r--r--Documentation/i2c/busses/i2c-i8013
-rw-r--r--Documentation/i2c/busses/i2c-nforce22
-rw-r--r--Documentation/i2c/busses/i2c-ocores51
-rw-r--r--Documentation/i2c/busses/i2c-piix440
-rw-r--r--Documentation/i2c/busses/scx200_acb19
-rw-r--r--Documentation/ia64/aliasing.txt208
-rw-r--r--Documentation/infiniband/ipoib.txt12
-rw-r--r--Documentation/initrd.txt24
-rw-r--r--Documentation/ioctl-number.txt3
-rw-r--r--Documentation/irqflags-tracing.txt57
-rw-r--r--Documentation/isdn/README.gigaset7
-rw-r--r--Documentation/kbuild/makefiles.txt8
-rw-r--r--Documentation/kdump/gdbmacros.txt2
-rw-r--r--Documentation/kdump/kdump.txt420
-rw-r--r--Documentation/kernel-parameters.txt57
-rw-r--r--Documentation/keys-request-key.txt54
-rw-r--r--Documentation/keys.txt89
-rw-r--r--Documentation/lockdep-design.txt197
-rw-r--r--Documentation/md.txt67
-rw-r--r--Documentation/memory-barriers.txt382
-rw-r--r--Documentation/networking/README.ipw220010
-rw-r--r--Documentation/networking/bonding.txt323
-rw-r--r--Documentation/networking/ip-sysctl.txt7
-rw-r--r--Documentation/networking/ipvs-sysctl.txt143
-rw-r--r--Documentation/networking/netdevices.txt8
-rw-r--r--Documentation/networking/pktgen.txt2
-rw-r--r--Documentation/networking/tuntap.txt11
-rw-r--r--Documentation/pci.txt18
-rw-r--r--Documentation/pcmcia/crc32hash.c32
-rw-r--r--Documentation/pcmcia/devicetable.txt36
-rw-r--r--Documentation/pi-futex.txt121
-rw-r--r--Documentation/power/devices.txt90
-rw-r--r--Documentation/power/swsusp.txt84
-rw-r--r--Documentation/power/video.txt4
-rw-r--r--Documentation/powerpc/booting-without-of.txt4
-rw-r--r--Documentation/robust-futexes.txt2
-rw-r--r--Documentation/rt-mutex-design.txt781
-rw-r--r--Documentation/rt-mutex.txt79
-rw-r--r--Documentation/rtc.txt7
-rw-r--r--Documentation/scsi/00-INDEX2
-rw-r--r--Documentation/scsi/ChangeLog.megaraid_sas29
-rw-r--r--Documentation/scsi/aacraid.txt8
-rw-r--r--Documentation/scsi/cpqfc.txt272
-rw-r--r--Documentation/scsi/hptiop.txt92
-rw-r--r--Documentation/scsi/ppa.txt2
-rw-r--r--Documentation/scsi/tmscsim.txt2
-rw-r--r--Documentation/serial/driver9
-rw-r--r--Documentation/sound/alsa/ALSA-Configuration.txt123
-rw-r--r--Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl58
-rw-r--r--Documentation/sparc/sbus_drivers.txt95
-rw-r--r--Documentation/sparse.txt36
-rw-r--r--Documentation/sysctl/vm.txt30
-rw-r--r--Documentation/sysrq.txt5
-rw-r--r--Documentation/tty.txt7
-rw-r--r--Documentation/usb/usbmon.txt32
-rw-r--r--Documentation/video4linux/CARDLIST.bttv4
-rw-r--r--Documentation/video4linux/CARDLIST.cx8810
-rw-r--r--Documentation/video4linux/CARDLIST.saa71341
-rw-r--r--Documentation/video4linux/CARDLIST.tuner3
-rw-r--r--Documentation/video4linux/CQcam.txt203
-rw-r--r--Documentation/video4linux/README.pvrusb2212
-rw-r--r--Documentation/video4linux/Zoran23
-rw-r--r--Documentation/video4linux/bttv/CONTRIBUTORS8
-rw-r--r--Documentation/video4linux/cx2341x/fw-calling.txt69
-rw-r--r--Documentation/video4linux/cx2341x/fw-decoder-api.txt319
-rw-r--r--Documentation/video4linux/cx2341x/fw-dma.txt94
-rw-r--r--Documentation/video4linux/cx2341x/fw-encoder-api.txt694
-rw-r--r--Documentation/video4linux/cx2341x/fw-memory.txt141
-rw-r--r--Documentation/video4linux/cx2341x/fw-osd-api.txt342
-rw-r--r--Documentation/video4linux/cx2341x/fw-upload.txt49
-rw-r--r--Documentation/video4linux/cx88/hauppauge-wintv-cx88-ir.txt54
-rw-r--r--Documentation/video4linux/et61x251.txt52
-rw-r--r--Documentation/video4linux/ibmcam.txt168
-rw-r--r--Documentation/video4linux/ov511.txt32
-rw-r--r--Documentation/video4linux/sn9c102.txt78
-rw-r--r--Documentation/video4linux/v4lgrab.c192
-rw-r--r--Documentation/video4linux/w9968cf.txt162
-rw-r--r--Documentation/video4linux/zc0301.txt80
-rw-r--r--Documentation/vm/page_migration114
-rw-r--r--Documentation/w1/masters/ds249018
-rw-r--r--Documentation/w1/w1.generic18
-rw-r--r--Documentation/w1/w1.netlink98
-rw-r--r--Documentation/watchdog/pcwd-watchdog.txt75
-rw-r--r--Documentation/watchdog/src/watchdog-simple.c15
-rw-r--r--Documentation/watchdog/src/watchdog-test.c68
-rw-r--r--Documentation/watchdog/watchdog-api.txt56
-rw-r--r--Documentation/watchdog/watchdog.txt23
-rw-r--r--Documentation/x86_64/boot-options.txt21
144 files changed, 9161 insertions, 6044 deletions
diff --git a/Documentation/ABI/README b/Documentation/ABI/README
new file mode 100644
index 000000000000..9feaf16f1617
--- /dev/null
+++ b/Documentation/ABI/README
@@ -0,0 +1,77 @@
1This directory attempts to document the ABI between the Linux kernel and
2userspace, and the relative stability of these interfaces. Due to the
3everchanging nature of Linux, and the differing maturity levels, these
4interfaces should be used by userspace programs in different ways.
5
6We have four different levels of ABI stability, as shown by the four
7different subdirectories in this location. Interfaces may change levels
8of stability according to the rules described below.
9
10The different levels of stability are:
11
12 stable/
13 This directory documents the interfaces that the developer has
14 defined to be stable. Userspace programs are free to use these
15 interfaces with no restrictions, and backward compatibility for
16 them will be guaranteed for at least 2 years. Most interfaces
17 (like syscalls) are expected to never change and always be
18 available.
19
20 testing/
21 This directory documents interfaces that are felt to be stable,
22 as the main development of this interface has been completed.
23 The interface can be changed to add new features, but the
24 current interface will not break by doing this, unless grave
25 errors or security problems are found in them. Userspace
26 programs can start to rely on these interfaces, but they must be
27 aware of changes that can occur before these interfaces move to
28 be marked stable. Programs that use these interfaces are
29 strongly encouraged to add their name to the description of
30 these interfaces, so that the kernel developers can easily
31 notify them if any changes occur (see the description of the
32 layout of the files below for details on how to do this.)
33
34 obsolete/
35 This directory documents interfaces that are still remaining in
36 the kernel, but are marked to be removed at some later point in
37 time. The description of the interface will document the reason
38 why it is obsolete and when it can be expected to be removed.
39 The file Documentation/feature-removal-schedule.txt may describe
40 some of these interfaces, giving a schedule for when they will
41 be removed.
42
43 removed/
44 This directory contains a list of the old interfaces that have
45 been removed from the kernel.
46
47Every file in these directories will contain the following information:
48
49What: Short description of the interface
50Date: Date created
51KernelVersion: Kernel version this feature first showed up in.
52Contact: Primary contact for this interface (may be a mailing list)
53Description: Long description of the interface and how to use it.
54Users: All users of this interface who wish to be notified when
55 it changes. This is very important for interfaces in
56 the "testing" stage, so that kernel developers can work
57 with userspace developers to ensure that things do not
58 break in ways that are unacceptable. It is also
59 important to get feedback for these interfaces to make
60 sure they are working in a proper way and do not need to
61 be changed further.
62
63
64How things move between levels:
65
66Interfaces in stable may move to obsolete, as long as the proper
67notification is given.
68
69Interfaces may be removed from obsolete and the kernel as long as the
70documented amount of time has gone by.
71
72Interfaces in the testing state can move to the stable state when the
73developers feel they are finished. They cannot be removed from the
74kernel tree without going through the obsolete state first.
75
76It's up to the developer to place their interfaces in the category they
77wish for it to start out in.
diff --git a/Documentation/ABI/obsolete/devfs b/Documentation/ABI/obsolete/devfs
new file mode 100644
index 000000000000..b8b87399bc8f
--- /dev/null
+++ b/Documentation/ABI/obsolete/devfs
@@ -0,0 +1,13 @@
1What: devfs
2Date: July 2005
3Contact: Greg Kroah-Hartman <gregkh@suse.de>
4Description:
5 devfs has been unmaintained for a number of years, has unfixable
6 races, contains a naming policy within the kernel that is
7 against the LSB, and can be replaced by using udev.
8 The files fs/devfs/*, include/linux/devfs_fs*.h will be removed,
9 along with the the assorted devfs function calls throughout the
10 kernel tree.
11
12Users:
13
diff --git a/Documentation/ABI/stable/syscalls b/Documentation/ABI/stable/syscalls
new file mode 100644
index 000000000000..c3ae3e7d6a0c
--- /dev/null
+++ b/Documentation/ABI/stable/syscalls
@@ -0,0 +1,10 @@
1What: The kernel syscall interface
2Description:
3 This interface matches much of the POSIX interface and is based
4 on it and other Unix based interfaces. It will only be added to
5 over time, and not have things removed from it.
6
7 Note that this interface is different for every architecture
8 that Linux supports. Please see the architecture-specific
9 documentation for details on the syscall numbers that are to be
10 mapped to each syscall.
diff --git a/Documentation/ABI/stable/sysfs-module b/Documentation/ABI/stable/sysfs-module
new file mode 100644
index 000000000000..75be43118335
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-module
@@ -0,0 +1,30 @@
1What: /sys/module
2Description:
3 The /sys/module tree consists of the following structure:
4
5 /sys/module/MODULENAME
6 The name of the module that is in the kernel. This
7 module name will show up either if the module is built
8 directly into the kernel, or if it is loaded as a
9 dyanmic module.
10
11 /sys/module/MODULENAME/parameters
12 This directory contains individual files that are each
13 individual parameters of the module that are able to be
14 changed at runtime. See the individual module
15 documentation as to the contents of these parameters and
16 what they accomplish.
17
18 Note: The individual parameter names and values are not
19 considered stable, only the fact that they will be
20 placed in this location within sysfs. See the
21 individual driver documentation for details as to the
22 stability of the different parameters.
23
24 /sys/module/MODULENAME/refcnt
25 If the module is able to be unloaded from the kernel, this file
26 will contain the current reference count of the module.
27
28 Note: If the module is built into the kernel, or if the
29 CONFIG_MODULE_UNLOAD kernel configuration value is not enabled,
30 this file will not be present.
diff --git a/Documentation/ABI/testing/sysfs-class b/Documentation/ABI/testing/sysfs-class
new file mode 100644
index 000000000000..4b0cb891e46e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class
@@ -0,0 +1,16 @@
1What: /sys/class/
2Date: Febuary 2006
3Contact: Greg Kroah-Hartman <gregkh@suse.de>
4Description:
5 The /sys/class directory will consist of a group of
6 subdirectories describing individual classes of devices
7 in the kernel. The individual directories will consist
8 of either subdirectories, or symlinks to other
9 directories.
10
11 All programs that use this directory tree must be able
12 to handle both subdirectories or symlinks in order to
13 work properly.
14
15Users:
16 udev <linux-hotplug-devel@lists.sourceforge.net>
diff --git a/Documentation/ABI/testing/sysfs-devices b/Documentation/ABI/testing/sysfs-devices
new file mode 100644
index 000000000000..6a25671ee5f6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices
@@ -0,0 +1,25 @@
1What: /sys/devices
2Date: February 2006
3Contact: Greg Kroah-Hartman <gregkh@suse.de>
4Description:
5 The /sys/devices tree contains a snapshot of the
6 internal state of the kernel device tree. Devices will
7 be added and removed dynamically as the machine runs,
8 and between different kernel versions, the layout of the
9 devices within this tree will change.
10
11 Please do not rely on the format of this tree because of
12 this. If a program wishes to find different things in
13 the tree, please use the /sys/class structure and rely
14 on the symlinks there to point to the proper location
15 within the /sys/devices tree of the individual devices.
16 Or rely on the uevent messages to notify programs of
17 devices being added and removed from this tree to find
18 the location of those devices.
19
20 Note that sometimes not all devices along the directory
21 chain will have emitted uevent messages, so userspace
22 programs must be able to handle such occurrences.
23
24Users:
25 udev <linux-hotplug-devel@lists.sourceforge.net>
diff --git a/Documentation/Changes b/Documentation/Changes
index b02f476c2973..488272074c36 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -181,8 +181,8 @@ Intel IA32 microcode
181-------------------- 181--------------------
182 182
183A driver has been added to allow updating of Intel IA32 microcode, 183A driver has been added to allow updating of Intel IA32 microcode,
184accessible as both a devfs regular file and as a normal (misc) 184accessible as a normal (misc) character device. If you are not using
185character device. If you are not using devfs you may need to: 185udev you may need to:
186 186
187mkdir /dev/cpu 187mkdir /dev/cpu
188mknod /dev/cpu/microcode c 10 184 188mknod /dev/cpu/microcode c 10 184
@@ -201,7 +201,9 @@ with programs using shared memory.
201udev 201udev
202---- 202----
203udev is a userspace application for populating /dev dynamically with 203udev is a userspace application for populating /dev dynamically with
204only entries for devices actually present. udev replaces devfs. 204only entries for devices actually present. udev replaces the basic
205functionality of devfs, while allowing persistant device naming for
206devices.
205 207
206FUSE 208FUSE
207---- 209----
@@ -231,18 +233,13 @@ The PPP driver has been restructured to support multilink and to
231enable it to operate over diverse media layers. If you use PPP, 233enable it to operate over diverse media layers. If you use PPP,
232upgrade pppd to at least 2.4.0. 234upgrade pppd to at least 2.4.0.
233 235
234If you are not using devfs, you must have the device file /dev/ppp 236If you are not using udev, you must have the device file /dev/ppp
235which can be made by: 237which can be made by:
236 238
237mknod /dev/ppp c 108 0 239mknod /dev/ppp c 108 0
238 240
239as root. 241as root.
240 242
241If you use devfsd and build ppp support as modules, you will need
242the following in your /etc/devfsd.conf file:
243
244LOOKUP PPP MODLOAD
245
246Isdn4k-utils 243Isdn4k-utils
247------------ 244------------
248 245
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index ce5d2c038cf5..6d2412ec91ed 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -155,7 +155,83 @@ problem, which is called the function-growth-hormone-imbalance syndrome.
155See next chapter. 155See next chapter.
156 156
157 157
158 Chapter 5: Functions 158 Chapter 5: Typedefs
159
160Please don't use things like "vps_t".
161
162It's a _mistake_ to use typedef for structures and pointers. When you see a
163
164 vps_t a;
165
166in the source, what does it mean?
167
168In contrast, if it says
169
170 struct virtual_container *a;
171
172you can actually tell what "a" is.
173
174Lots of people think that typedefs "help readability". Not so. They are
175useful only for:
176
177 (a) totally opaque objects (where the typedef is actively used to _hide_
178 what the object is).
179
180 Example: "pte_t" etc. opaque objects that you can only access using
181 the proper accessor functions.
182
183 NOTE! Opaqueness and "accessor functions" are not good in themselves.
184 The reason we have them for things like pte_t etc. is that there
185 really is absolutely _zero_ portably accessible information there.
186
187 (b) Clear integer types, where the abstraction _helps_ avoid confusion
188 whether it is "int" or "long".
189
190 u8/u16/u32 are perfectly fine typedefs, although they fit into
191 category (d) better than here.
192
193 NOTE! Again - there needs to be a _reason_ for this. If something is
194 "unsigned long", then there's no reason to do
195
196 typedef unsigned long myflags_t;
197
198 but if there is a clear reason for why it under certain circumstances
199 might be an "unsigned int" and under other configurations might be
200 "unsigned long", then by all means go ahead and use a typedef.
201
202 (c) when you use sparse to literally create a _new_ type for
203 type-checking.
204
205 (d) New types which are identical to standard C99 types, in certain
206 exceptional circumstances.
207
208 Although it would only take a short amount of time for the eyes and
209 brain to become accustomed to the standard types like 'uint32_t',
210 some people object to their use anyway.
211
212 Therefore, the Linux-specific 'u8/u16/u32/u64' types and their
213 signed equivalents which are identical to standard types are
214 permitted -- although they are not mandatory in new code of your
215 own.
216
217 When editing existing code which already uses one or the other set
218 of types, you should conform to the existing choices in that code.
219
220 (e) Types safe for use in userspace.
221
222 In certain structures which are visible to userspace, we cannot
223 require C99 types and cannot use the 'u32' form above. Thus, we
224 use __u32 and similar types in all structures which are shared
225 with userspace.
226
227Maybe there are other cases too, but the rule should basically be to NEVER
228EVER use a typedef unless you can clearly match one of those rules.
229
230In general, a pointer, or a struct that has elements that can reasonably
231be directly accessed should _never_ be a typedef.
232
233
234 Chapter 6: Functions
159 235
160Functions should be short and sweet, and do just one thing. They should 236Functions should be short and sweet, and do just one thing. They should
161fit on one or two screenfuls of text (the ISO/ANSI screen size is 80x24, 237fit on one or two screenfuls of text (the ISO/ANSI screen size is 80x24,
@@ -183,7 +259,7 @@ and it gets confused. You know you're brilliant, but maybe you'd like
183to understand what you did 2 weeks from now. 259to understand what you did 2 weeks from now.
184 260
185 261
186 Chapter 6: Centralized exiting of functions 262 Chapter 7: Centralized exiting of functions
187 263
188Albeit deprecated by some people, the equivalent of the goto statement is 264Albeit deprecated by some people, the equivalent of the goto statement is
189used frequently by compilers in form of the unconditional jump instruction. 265used frequently by compilers in form of the unconditional jump instruction.
@@ -220,7 +296,7 @@ out:
220 return result; 296 return result;
221} 297}
222 298
223 Chapter 7: Commenting 299 Chapter 8: Commenting
224 300
225Comments are good, but there is also a danger of over-commenting. NEVER 301Comments are good, but there is also a danger of over-commenting. NEVER
226try to explain HOW your code works in a comment: it's much better to 302try to explain HOW your code works in a comment: it's much better to
@@ -240,7 +316,7 @@ When commenting the kernel API functions, please use the kerneldoc format.
240See the files Documentation/kernel-doc-nano-HOWTO.txt and scripts/kernel-doc 316See the files Documentation/kernel-doc-nano-HOWTO.txt and scripts/kernel-doc
241for details. 317for details.
242 318
243 Chapter 8: You've made a mess of it 319 Chapter 9: You've made a mess of it
244 320
245That's OK, we all do. You've probably been told by your long-time Unix 321That's OK, we all do. You've probably been told by your long-time Unix
246user helper that "GNU emacs" automatically formats the C sources for 322user helper that "GNU emacs" automatically formats the C sources for
@@ -288,7 +364,7 @@ re-formatting you may want to take a look at the man page. But
288remember: "indent" is not a fix for bad programming. 364remember: "indent" is not a fix for bad programming.
289 365
290 366
291 Chapter 9: Configuration-files 367 Chapter 10: Configuration-files
292 368
293For configuration options (arch/xxx/Kconfig, and all the Kconfig files), 369For configuration options (arch/xxx/Kconfig, and all the Kconfig files),
294somewhat different indentation is used. 370somewhat different indentation is used.
@@ -313,7 +389,7 @@ support for file-systems, for instance) should be denoted (DANGEROUS), other
313experimental options should be denoted (EXPERIMENTAL). 389experimental options should be denoted (EXPERIMENTAL).
314 390
315 391
316 Chapter 10: Data structures 392 Chapter 11: Data structures
317 393
318Data structures that have visibility outside the single-threaded 394Data structures that have visibility outside the single-threaded
319environment they are created and destroyed in should always have 395environment they are created and destroyed in should always have
@@ -344,7 +420,7 @@ Remember: if another thread can find your data structure, and you don't
344have a reference count on it, you almost certainly have a bug. 420have a reference count on it, you almost certainly have a bug.
345 421
346 422
347 Chapter 11: Macros, Enums and RTL 423 Chapter 12: Macros, Enums and RTL
348 424
349Names of macros defining constants and labels in enums are capitalized. 425Names of macros defining constants and labels in enums are capitalized.
350 426
@@ -399,7 +475,7 @@ The cpp manual deals with macros exhaustively. The gcc internals manual also
399covers RTL which is used frequently with assembly language in the kernel. 475covers RTL which is used frequently with assembly language in the kernel.
400 476
401 477
402 Chapter 12: Printing kernel messages 478 Chapter 13: Printing kernel messages
403 479
404Kernel developers like to be seen as literate. Do mind the spelling 480Kernel developers like to be seen as literate. Do mind the spelling
405of kernel messages to make a good impression. Do not use crippled 481of kernel messages to make a good impression. Do not use crippled
@@ -410,7 +486,7 @@ Kernel messages do not have to be terminated with a period.
410Printing numbers in parentheses (%d) adds no value and should be avoided. 486Printing numbers in parentheses (%d) adds no value and should be avoided.
411 487
412 488
413 Chapter 13: Allocating memory 489 Chapter 14: Allocating memory
414 490
415The kernel provides the following general purpose memory allocators: 491The kernel provides the following general purpose memory allocators:
416kmalloc(), kzalloc(), kcalloc(), and vmalloc(). Please refer to the API 492kmalloc(), kzalloc(), kcalloc(), and vmalloc(). Please refer to the API
@@ -429,7 +505,7 @@ from void pointer to any other pointer type is guaranteed by the C programming
429language. 505language.
430 506
431 507
432 Chapter 14: The inline disease 508 Chapter 15: The inline disease
433 509
434There appears to be a common misperception that gcc has a magic "make me 510There appears to be a common misperception that gcc has a magic "make me
435faster" speedup option called "inline". While the use of inlines can be 511faster" speedup option called "inline". While the use of inlines can be
@@ -457,7 +533,7 @@ something it would have done anyway.
457 533
458 534
459 535
460 Chapter 15: References 536 Appendix I: References
461 537
462The C Programming Language, Second Edition 538The C Programming Language, Second Edition
463by Brian W. Kernighan and Dennis M. Ritchie. 539by Brian W. Kernighan and Dennis M. Ritchie.
@@ -481,4 +557,4 @@ Kernel CodingStyle, by greg@kroah.com at OLS 2002:
481http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/ 557http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
482 558
483-- 559--
484Last updated on 30 December 2005 by a community effort on LKML. 560Last updated on 30 April 2006.
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 5a2882d275ba..66e1cf733571 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -10,7 +10,8 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
10 kernel-hacking.xml kernel-locking.xml deviceiobook.xml \ 10 kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
11 procfs-guide.xml writing_usb_driver.xml \ 11 procfs-guide.xml writing_usb_driver.xml \
12 kernel-api.xml journal-api.xml lsm.xml usb.xml \ 12 kernel-api.xml journal-api.xml lsm.xml usb.xml \
13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml 13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
14 genericirq.xml
14 15
15### 16###
16# The build process is as follows (targets): 17# The build process is as follows (targets):
diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl
new file mode 100644
index 000000000000..0f4a4b6321e4
--- /dev/null
+++ b/Documentation/DocBook/genericirq.tmpl
@@ -0,0 +1,474 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4
5<book id="Generic-IRQ-Guide">
6 <bookinfo>
7 <title>Linux generic IRQ handling</title>
8
9 <authorgroup>
10 <author>
11 <firstname>Thomas</firstname>
12 <surname>Gleixner</surname>
13 <affiliation>
14 <address>
15 <email>tglx@linutronix.de</email>
16 </address>
17 </affiliation>
18 </author>
19 <author>
20 <firstname>Ingo</firstname>
21 <surname>Molnar</surname>
22 <affiliation>
23 <address>
24 <email>mingo@elte.hu</email>
25 </address>
26 </affiliation>
27 </author>
28 </authorgroup>
29
30 <copyright>
31 <year>2005-2006</year>
32 <holder>Thomas Gleixner</holder>
33 </copyright>
34 <copyright>
35 <year>2005-2006</year>
36 <holder>Ingo Molnar</holder>
37 </copyright>
38
39 <legalnotice>
40 <para>
41 This documentation is free software; you can redistribute
42 it and/or modify it under the terms of the GNU General Public
43 License version 2 as published by the Free Software Foundation.
44 </para>
45
46 <para>
47 This program is distributed in the hope that it will be
48 useful, but WITHOUT ANY WARRANTY; without even the implied
49 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
50 See the GNU General Public License for more details.
51 </para>
52
53 <para>
54 You should have received a copy of the GNU General Public
55 License along with this program; if not, write to the Free
56 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
57 MA 02111-1307 USA
58 </para>
59
60 <para>
61 For more details see the file COPYING in the source
62 distribution of Linux.
63 </para>
64 </legalnotice>
65 </bookinfo>
66
67<toc></toc>
68
69 <chapter id="intro">
70 <title>Introduction</title>
71 <para>
72 The generic interrupt handling layer is designed to provide a
73 complete abstraction of interrupt handling for device drivers.
74 It is able to handle all the different types of interrupt controller
75 hardware. Device drivers use generic API functions to request, enable,
76 disable and free interrupts. The drivers do not have to know anything
77 about interrupt hardware details, so they can be used on different
78 platforms without code changes.
79 </para>
80 <para>
81 This documentation is provided to developers who want to implement
82 an interrupt subsystem based for their architecture, with the help
83 of the generic IRQ handling layer.
84 </para>
85 </chapter>
86
87 <chapter id="rationale">
88 <title>Rationale</title>
89 <para>
90 The original implementation of interrupt handling in Linux is using
91 the __do_IRQ() super-handler, which is able to deal with every
92 type of interrupt logic.
93 </para>
94 <para>
95 Originally, Russell King identified different types of handlers to
96 build a quite universal set for the ARM interrupt handler
97 implementation in Linux 2.5/2.6. He distinguished between:
98 <itemizedlist>
99 <listitem><para>Level type</para></listitem>
100 <listitem><para>Edge type</para></listitem>
101 <listitem><para>Simple type</para></listitem>
102 </itemizedlist>
103 In the SMP world of the __do_IRQ() super-handler another type
104 was identified:
105 <itemizedlist>
106 <listitem><para>Per CPU type</para></listitem>
107 </itemizedlist>
108 </para>
109 <para>
110 This split implementation of highlevel IRQ handlers allows us to
111 optimize the flow of the interrupt handling for each specific
112 interrupt type. This reduces complexity in that particular codepath
113 and allows the optimized handling of a given type.
114 </para>
115 <para>
116 The original general IRQ implementation used hw_interrupt_type
117 structures and their ->ack(), ->end() [etc.] callbacks to
118 differentiate the flow control in the super-handler. This leads to
119 a mix of flow logic and lowlevel hardware logic, and it also leads
120 to unnecessary code duplication: for example in i386, there is a
121 ioapic_level_irq and a ioapic_edge_irq irq-type which share many
122 of the lowlevel details but have different flow handling.
123 </para>
124 <para>
125 A more natural abstraction is the clean separation of the
126 'irq flow' and the 'chip details'.
127 </para>
128 <para>
129 Analysing a couple of architecture's IRQ subsystem implementations
130 reveals that most of them can use a generic set of 'irq flow'
131 methods and only need to add the chip level specific code.
132 The separation is also valuable for (sub)architectures
133 which need specific quirks in the irq flow itself but not in the
134 chip-details - and thus provides a more transparent IRQ subsystem
135 design.
136 </para>
137 <para>
138 Each interrupt descriptor is assigned its own highlevel flow
139 handler, which is normally one of the generic
140 implementations. (This highlevel flow handler implementation also
141 makes it simple to provide demultiplexing handlers which can be
142 found in embedded platforms on various architectures.)
143 </para>
144 <para>
145 The separation makes the generic interrupt handling layer more
146 flexible and extensible. For example, an (sub)architecture can
147 use a generic irq-flow implementation for 'level type' interrupts
148 and add a (sub)architecture specific 'edge type' implementation.
149 </para>
150 <para>
151 To make the transition to the new model easier and prevent the
152 breakage of existing implementations, the __do_IRQ() super-handler
153 is still available. This leads to a kind of duality for the time
154 being. Over time the new model should be used in more and more
155 architectures, as it enables smaller and cleaner IRQ subsystems.
156 </para>
157 </chapter>
158 <chapter id="bugs">
159 <title>Known Bugs And Assumptions</title>
160 <para>
161 None (knock on wood).
162 </para>
163 </chapter>
164
165 <chapter id="Abstraction">
166 <title>Abstraction layers</title>
167 <para>
168 There are three main levels of abstraction in the interrupt code:
169 <orderedlist>
170 <listitem><para>Highlevel driver API</para></listitem>
171 <listitem><para>Highlevel IRQ flow handlers</para></listitem>
172 <listitem><para>Chiplevel hardware encapsulation</para></listitem>
173 </orderedlist>
174 </para>
175 <sect1>
176 <title>Interrupt control flow</title>
177 <para>
178 Each interrupt is described by an interrupt descriptor structure
179 irq_desc. The interrupt is referenced by an 'unsigned int' numeric
180 value which selects the corresponding interrupt decription structure
181 in the descriptor structures array.
182 The descriptor structure contains status information and pointers
183 to the interrupt flow method and the interrupt chip structure
184 which are assigned to this interrupt.
185 </para>
186 <para>
187 Whenever an interrupt triggers, the lowlevel arch code calls into
188 the generic interrupt code by calling desc->handle_irq().
189 This highlevel IRQ handling function only uses desc->chip primitives
190 referenced by the assigned chip descriptor structure.
191 </para>
192 </sect1>
193 <sect1>
194 <title>Highlevel Driver API</title>
195 <para>
196 The highlevel Driver API consists of following functions:
197 <itemizedlist>
198 <listitem><para>request_irq()</para></listitem>
199 <listitem><para>free_irq()</para></listitem>
200 <listitem><para>disable_irq()</para></listitem>
201 <listitem><para>enable_irq()</para></listitem>
202 <listitem><para>disable_irq_nosync() (SMP only)</para></listitem>
203 <listitem><para>synchronize_irq() (SMP only)</para></listitem>
204 <listitem><para>set_irq_type()</para></listitem>
205 <listitem><para>set_irq_wake()</para></listitem>
206 <listitem><para>set_irq_data()</para></listitem>
207 <listitem><para>set_irq_chip()</para></listitem>
208 <listitem><para>set_irq_chip_data()</para></listitem>
209 </itemizedlist>
210 See the autogenerated function documentation for details.
211 </para>
212 </sect1>
213 <sect1>
214 <title>Highlevel IRQ flow handlers</title>
215 <para>
216 The generic layer provides a set of pre-defined irq-flow methods:
217 <itemizedlist>
218 <listitem><para>handle_level_irq</para></listitem>
219 <listitem><para>handle_edge_irq</para></listitem>
220 <listitem><para>handle_simple_irq</para></listitem>
221 <listitem><para>handle_percpu_irq</para></listitem>
222 </itemizedlist>
223 The interrupt flow handlers (either predefined or architecture
224 specific) are assigned to specific interrupts by the architecture
225 either during bootup or during device initialization.
226 </para>
227 <sect2>
228 <title>Default flow implementations</title>
229 <sect3>
230 <title>Helper functions</title>
231 <para>
232 The helper functions call the chip primitives and
233 are used by the default flow implementations.
234 The following helper functions are implemented (simplified excerpt):
235 <programlisting>
236default_enable(irq)
237{
238 desc->chip->unmask(irq);
239}
240
241default_disable(irq)
242{
243 if (!delay_disable(irq))
244 desc->chip->mask(irq);
245}
246
247default_ack(irq)
248{
249 chip->ack(irq);
250}
251
252default_mask_ack(irq)
253{
254 if (chip->mask_ack) {
255 chip->mask_ack(irq);
256 } else {
257 chip->mask(irq);
258 chip->ack(irq);
259 }
260}
261
262noop(irq)
263{
264}
265
266 </programlisting>
267 </para>
268 </sect3>
269 </sect2>
270 <sect2>
271 <title>Default flow handler implementations</title>
272 <sect3>
273 <title>Default Level IRQ flow handler</title>
274 <para>
275 handle_level_irq provides a generic implementation
276 for level-triggered interrupts.
277 </para>
278 <para>
279 The following control flow is implemented (simplified excerpt):
280 <programlisting>
281desc->chip->start();
282handle_IRQ_event(desc->action);
283desc->chip->end();
284 </programlisting>
285 </para>
286 </sect3>
287 <sect3>
288 <title>Default Edge IRQ flow handler</title>
289 <para>
290 handle_edge_irq provides a generic implementation
291 for edge-triggered interrupts.
292 </para>
293 <para>
294 The following control flow is implemented (simplified excerpt):
295 <programlisting>
296if (desc->status &amp; running) {
297 desc->chip->hold();
298 desc->status |= pending | masked;
299 return;
300}
301desc->chip->start();
302desc->status |= running;
303do {
304 if (desc->status &amp; masked)
305 desc->chip->enable();
306 desc-status &amp;= ~pending;
307 handle_IRQ_event(desc->action);
308} while (status &amp; pending);
309desc-status &amp;= ~running;
310desc->chip->end();
311 </programlisting>
312 </para>
313 </sect3>
314 <sect3>
315 <title>Default simple IRQ flow handler</title>
316 <para>
317 handle_simple_irq provides a generic implementation
318 for simple interrupts.
319 </para>
320 <para>
321 Note: The simple flow handler does not call any
322 handler/chip primitives.
323 </para>
324 <para>
325 The following control flow is implemented (simplified excerpt):
326 <programlisting>
327handle_IRQ_event(desc->action);
328 </programlisting>
329 </para>
330 </sect3>
331 <sect3>
332 <title>Default per CPU flow handler</title>
333 <para>
334 handle_percpu_irq provides a generic implementation
335 for per CPU interrupts.
336 </para>
337 <para>
338 Per CPU interrupts are only available on SMP and
339 the handler provides a simplified version without
340 locking.
341 </para>
342 <para>
343 The following control flow is implemented (simplified excerpt):
344 <programlisting>
345desc->chip->start();
346handle_IRQ_event(desc->action);
347desc->chip->end();
348 </programlisting>
349 </para>
350 </sect3>
351 </sect2>
352 <sect2>
353 <title>Quirks and optimizations</title>
354 <para>
355 The generic functions are intended for 'clean' architectures and chips,
356 which have no platform-specific IRQ handling quirks. If an architecture
357 needs to implement quirks on the 'flow' level then it can do so by
358 overriding the highlevel irq-flow handler.
359 </para>
360 </sect2>
361 <sect2>
362 <title>Delayed interrupt disable</title>
363 <para>
364 This per interrupt selectable feature, which was introduced by Russell
365 King in the ARM interrupt implementation, does not mask an interrupt
366 at the hardware level when disable_irq() is called. The interrupt is
367 kept enabled and is masked in the flow handler when an interrupt event
368 happens. This prevents losing edge interrupts on hardware which does
369 not store an edge interrupt event while the interrupt is disabled at
370 the hardware level. When an interrupt arrives while the IRQ_DISABLED
371 flag is set, then the interrupt is masked at the hardware level and
372 the IRQ_PENDING bit is set. When the interrupt is re-enabled by
373 enable_irq() the pending bit is checked and if it is set, the
374 interrupt is resent either via hardware or by a software resend
375 mechanism. (It's necessary to enable CONFIG_HARDIRQS_SW_RESEND when
376 you want to use the delayed interrupt disable feature and your
377 hardware is not capable of retriggering an interrupt.)
378 The delayed interrupt disable can be runtime enabled, per interrupt,
379 by setting the IRQ_DELAYED_DISABLE flag in the irq_desc status field.
380 </para>
381 </sect2>
382 </sect1>
383 <sect1>
384 <title>Chiplevel hardware encapsulation</title>
385 <para>
386 The chip level hardware descriptor structure irq_chip
387 contains all the direct chip relevant functions, which
388 can be utilized by the irq flow implementations.
389 <itemizedlist>
390 <listitem><para>ack()</para></listitem>
391 <listitem><para>mask_ack() - Optional, recommended for performance</para></listitem>
392 <listitem><para>mask()</para></listitem>
393 <listitem><para>unmask()</para></listitem>
394 <listitem><para>retrigger() - Optional</para></listitem>
395 <listitem><para>set_type() - Optional</para></listitem>
396 <listitem><para>set_wake() - Optional</para></listitem>
397 </itemizedlist>
398 These primitives are strictly intended to mean what they say: ack means
399 ACK, masking means masking of an IRQ line, etc. It is up to the flow
400 handler(s) to use these basic units of lowlevel functionality.
401 </para>
402 </sect1>
403 </chapter>
404
405 <chapter id="doirq">
406 <title>__do_IRQ entry point</title>
407 <para>
408 The original implementation __do_IRQ() is an alternative entry
409 point for all types of interrupts.
410 </para>
411 <para>
412 This handler turned out to be not suitable for all
413 interrupt hardware and was therefore reimplemented with split
414 functionality for egde/level/simple/percpu interrupts. This is not
415 only a functional optimization. It also shortens code paths for
416 interrupts.
417 </para>
418 <para>
419 To make use of the split implementation, replace the call to
420 __do_IRQ by a call to desc->chip->handle_irq() and associate
421 the appropriate handler function to desc->chip->handle_irq().
422 In most cases the generic handler implementations should
423 be sufficient.
424 </para>
425 </chapter>
426
427 <chapter id="locking">
428 <title>Locking on SMP</title>
429 <para>
430 The locking of chip registers is up to the architecture that
431 defines the chip primitives. There is a chip->lock field that can be used
432 for serialization, but the generic layer does not touch it. The per-irq
433 structure is protected via desc->lock, by the generic layer.
434 </para>
435 </chapter>
436 <chapter id="structs">
437 <title>Structures</title>
438 <para>
439 This chapter contains the autogenerated documentation of the structures which are
440 used in the generic IRQ layer.
441 </para>
442!Iinclude/linux/irq.h
443 </chapter>
444
445 <chapter id="pubfunctions">
446 <title>Public Functions Provided</title>
447 <para>
448 This chapter contains the autogenerated documentation of the kernel API functions
449 which are exported.
450 </para>
451!Ekernel/irq/manage.c
452!Ekernel/irq/chip.c
453 </chapter>
454
455 <chapter id="intfunctions">
456 <title>Internal Functions Provided</title>
457 <para>
458 This chapter contains the autogenerated documentation of the internal functions.
459 </para>
460!Ikernel/irq/handle.c
461!Ikernel/irq/chip.c
462 </chapter>
463
464 <chapter id="credits">
465 <title>Credits</title>
466 <para>
467 The following people have contributed to this document:
468 <orderedlist>
469 <listitem><para>Thomas Gleixner<email>tglx@linutronix.de</email></para></listitem>
470 <listitem><para>Ingo Molnar<email>mingo@elte.hu</email></para></listitem>
471 </orderedlist>
472 </para>
473 </chapter>
474</book>
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index ca02e04a906c..1ae4dc0fd856 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -62,6 +62,8 @@
62 <sect1><title>Internal Functions</title> 62 <sect1><title>Internal Functions</title>
63!Ikernel/exit.c 63!Ikernel/exit.c
64!Ikernel/signal.c 64!Ikernel/signal.c
65!Iinclude/linux/kthread.h
66!Ekernel/kthread.c
65 </sect1> 67 </sect1>
66 68
67 <sect1><title>Kernel objects manipulation</title> 69 <sect1><title>Kernel objects manipulation</title>
@@ -114,9 +116,33 @@ X!Ilib/string.c
114 </sect1> 116 </sect1>
115 </chapter> 117 </chapter>
116 118
119 <chapter id="kernel-lib">
120 <title>Basic Kernel Library Functions</title>
121
122 <para>
123 The Linux kernel provides more basic utility functions.
124 </para>
125
126 <sect1><title>Bitmap Operations</title>
127!Elib/bitmap.c
128!Ilib/bitmap.c
129 </sect1>
130
131 <sect1><title>Command-line Parsing</title>
132!Elib/cmdline.c
133 </sect1>
134
135 <sect1><title>CRC Functions</title>
136!Elib/crc16.c
137!Elib/crc32.c
138!Elib/crc-ccitt.c
139 </sect1>
140 </chapter>
141
117 <chapter id="mm"> 142 <chapter id="mm">
118 <title>Memory Management in Linux</title> 143 <title>Memory Management in Linux</title>
119 <sect1><title>The Slab Cache</title> 144 <sect1><title>The Slab Cache</title>
145!Iinclude/linux/slab.h
120!Emm/slab.c 146!Emm/slab.c
121 </sect1> 147 </sect1>
122 <sect1><title>User Space Memory Access</title> 148 <sect1><title>User Space Memory Access</title>
@@ -280,12 +306,13 @@ X!Ekernel/module.c
280 <sect1><title>MTRR Handling</title> 306 <sect1><title>MTRR Handling</title>
281!Earch/i386/kernel/cpu/mtrr/main.c 307!Earch/i386/kernel/cpu/mtrr/main.c
282 </sect1> 308 </sect1>
309
283 <sect1><title>PCI Support Library</title> 310 <sect1><title>PCI Support Library</title>
284!Edrivers/pci/pci.c 311!Edrivers/pci/pci.c
285!Edrivers/pci/pci-driver.c 312!Edrivers/pci/pci-driver.c
286!Edrivers/pci/remove.c 313!Edrivers/pci/remove.c
287!Edrivers/pci/pci-acpi.c 314!Edrivers/pci/pci-acpi.c
288<!-- kerneldoc does not understand to __devinit 315<!-- kerneldoc does not understand __devinit
289X!Edrivers/pci/search.c 316X!Edrivers/pci/search.c
290 --> 317 -->
291!Edrivers/pci/msi.c 318!Edrivers/pci/msi.c
@@ -314,9 +341,11 @@ X!Earch/i386/kernel/mca.c
314 </sect1> 341 </sect1>
315 </chapter> 342 </chapter>
316 343
317 <chapter id="devfs"> 344 <chapter id="firmware">
318 <title>The Device File System</title> 345 <title>Firmware Interfaces</title>
319!Efs/devfs/base.c 346 <sect1><title>DMI Interfaces</title>
347!Edrivers/firmware/dmi_scan.c
348 </sect1>
320 </chapter> 349 </chapter>
321 350
322 <chapter id="sysfs"> 351 <chapter id="sysfs">
@@ -331,6 +360,18 @@ X!Earch/i386/kernel/mca.c
331!Esecurity/security.c 360!Esecurity/security.c
332 </chapter> 361 </chapter>
333 362
363 <chapter id="audit">
364 <title>Audit Interfaces</title>
365!Ekernel/audit.c
366!Ikernel/auditsc.c
367!Ikernel/auditfilter.c
368 </chapter>
369
370 <chapter id="accounting">
371 <title>Accounting Framework</title>
372!Ikernel/acct.c
373 </chapter>
374
334 <chapter id="pmfuncs"> 375 <chapter id="pmfuncs">
335 <title>Power Management</title> 376 <title>Power Management</title>
336!Ekernel/power/pm.c 377!Ekernel/power/pm.c
@@ -390,7 +431,6 @@ X!Edrivers/pnp/system.c
390 </sect1> 431 </sect1>
391 </chapter> 432 </chapter>
392 433
393
394 <chapter id="blkdev"> 434 <chapter id="blkdev">
395 <title>Block Devices</title> 435 <title>Block Devices</title>
396!Eblock/ll_rw_blk.c 436!Eblock/ll_rw_blk.c
@@ -401,6 +441,14 @@ X!Edrivers/pnp/system.c
401!Edrivers/char/misc.c 441!Edrivers/char/misc.c
402 </chapter> 442 </chapter>
403 443
444 <chapter id="parportdev">
445 <title>Parallel Port Devices</title>
446!Iinclude/linux/parport.h
447!Edrivers/parport/ieee1284.c
448!Edrivers/parport/share.c
449!Idrivers/parport/daisy.c
450 </chapter>
451
404 <chapter id="viddev"> 452 <chapter id="viddev">
405 <title>Video4Linux</title> 453 <title>Video4Linux</title>
406!Edrivers/media/video/videodev.c 454!Edrivers/media/video/videodev.c
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
index 158ffe9bfade..644c3884fab9 100644
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -1590,7 +1590,7 @@ the amount of locking which needs to be done.
1590 <para> 1590 <para>
1591 Our final dilemma is this: when can we actually destroy the 1591 Our final dilemma is this: when can we actually destroy the
1592 removed element? Remember, a reader might be stepping through 1592 removed element? Remember, a reader might be stepping through
1593 this element in the list right now: it we free this element and 1593 this element in the list right now: if we free this element and
1594 the <symbol>next</symbol> pointer changes, the reader will jump 1594 the <symbol>next</symbol> pointer changes, the reader will jump
1595 off into garbage and crash. We need to wait until we know that 1595 off into garbage and crash. We need to wait until we know that
1596 all the readers who were traversing the list when we deleted the 1596 all the readers who were traversing the list when we deleted the
diff --git a/Documentation/DocBook/libata.tmpl b/Documentation/DocBook/libata.tmpl
index f869b03929db..e97c32314541 100644
--- a/Documentation/DocBook/libata.tmpl
+++ b/Documentation/DocBook/libata.tmpl
@@ -169,6 +169,22 @@ void (*tf_read) (struct ata_port *ap, struct ata_taskfile *tf);
169 169
170 </sect2> 170 </sect2>
171 171
172 <sect2><title>PIO data read/write</title>
173 <programlisting>
174void (*data_xfer) (struct ata_device *, unsigned char *, unsigned int, int);
175 </programlisting>
176
177 <para>
178All bmdma-style drivers must implement this hook. This is the low-level
179operation that actually copies the data bytes during a PIO data
180transfer.
181Typically the driver
182will choose one of ata_pio_data_xfer_noirq(), ata_pio_data_xfer(), or
183ata_mmio_data_xfer().
184 </para>
185
186 </sect2>
187
172 <sect2><title>ATA command execute</title> 188 <sect2><title>ATA command execute</title>
173 <programlisting> 189 <programlisting>
174void (*exec_command)(struct ata_port *ap, struct ata_taskfile *tf); 190void (*exec_command)(struct ata_port *ap, struct ata_taskfile *tf);
@@ -204,11 +220,10 @@ command.
204 <programlisting> 220 <programlisting>
205u8 (*check_status)(struct ata_port *ap); 221u8 (*check_status)(struct ata_port *ap);
206u8 (*check_altstatus)(struct ata_port *ap); 222u8 (*check_altstatus)(struct ata_port *ap);
207u8 (*check_err)(struct ata_port *ap);
208 </programlisting> 223 </programlisting>
209 224
210 <para> 225 <para>
211 Reads the Status/AltStatus/Error ATA shadow register from 226 Reads the Status/AltStatus ATA shadow register from
212 hardware. On some hardware, reading the Status register has 227 hardware. On some hardware, reading the Status register has
213 the side effect of clearing the interrupt condition. 228 the side effect of clearing the interrupt condition.
214 Most drivers for taskfile-based hardware use 229 Most drivers for taskfile-based hardware use
@@ -269,23 +284,6 @@ void (*set_mode) (struct ata_port *ap);
269 284
270 </sect2> 285 </sect2>
271 286
272 <sect2><title>Reset ATA bus</title>
273 <programlisting>
274void (*phy_reset) (struct ata_port *ap);
275 </programlisting>
276
277 <para>
278 The very first step in the probe phase. Actions vary depending
279 on the bus type, typically. After waking up the device and probing
280 for device presence (PATA and SATA), typically a soft reset
281 (SRST) will be performed. Drivers typically use the helper
282 functions ata_bus_reset() or sata_phy_reset() for this hook.
283 Many SATA drivers use sata_phy_reset() or call it from within
284 their own phy_reset() functions.
285 </para>
286
287 </sect2>
288
289 <sect2><title>Control PCI IDE BMDMA engine</title> 287 <sect2><title>Control PCI IDE BMDMA engine</title>
290 <programlisting> 288 <programlisting>
291void (*bmdma_setup) (struct ata_queued_cmd *qc); 289void (*bmdma_setup) (struct ata_queued_cmd *qc);
@@ -354,16 +352,74 @@ int (*qc_issue) (struct ata_queued_cmd *qc);
354 352
355 </sect2> 353 </sect2>
356 354
357 <sect2><title>Timeout (error) handling</title> 355 <sect2><title>Exception and probe handling (EH)</title>
358 <programlisting> 356 <programlisting>
359void (*eng_timeout) (struct ata_port *ap); 357void (*eng_timeout) (struct ata_port *ap);
358void (*phy_reset) (struct ata_port *ap);
359 </programlisting>
360
361 <para>
362Deprecated. Use ->error_handler() instead.
363 </para>
364
365 <programlisting>
366void (*freeze) (struct ata_port *ap);
367void (*thaw) (struct ata_port *ap);
368 </programlisting>
369
370 <para>
371ata_port_freeze() is called when HSM violations or some other
372condition disrupts normal operation of the port. A frozen port
373is not allowed to perform any operation until the port is
374thawed, which usually follows a successful reset.
375 </para>
376
377 <para>
378The optional ->freeze() callback can be used for freezing the port
379hardware-wise (e.g. mask interrupt and stop DMA engine). If a
380port cannot be frozen hardware-wise, the interrupt handler
381must ack and clear interrupts unconditionally while the port
382is frozen.
383 </para>
384 <para>
385The optional ->thaw() callback is called to perform the opposite of ->freeze():
386prepare the port for normal operation once again. Unmask interrupts,
387start DMA engine, etc.
388 </para>
389
390 <programlisting>
391void (*error_handler) (struct ata_port *ap);
392 </programlisting>
393
394 <para>
395->error_handler() is a driver's hook into probe, hotplug, and recovery
396and other exceptional conditions. The primary responsibility of an
397implementation is to call ata_do_eh() or ata_bmdma_drive_eh() with a set
398of EH hooks as arguments:
399 </para>
400
401 <para>
402'prereset' hook (may be NULL) is called during an EH reset, before any other actions
403are taken.
404 </para>
405
406 <para>
407'postreset' hook (may be NULL) is called after the EH reset is performed. Based on
408existing conditions, severity of the problem, and hardware capabilities,
409 </para>
410
411 <para>
412Either 'softreset' (may be NULL) or 'hardreset' (may be NULL) will be
413called to perform the low-level EH reset.
414 </para>
415
416 <programlisting>
417void (*post_internal_cmd) (struct ata_queued_cmd *qc);
360 </programlisting> 418 </programlisting>
361 419
362 <para> 420 <para>
363This is a high level error handling function, called from the 421Perform any hardware-specific actions necessary to finish processing
364error handling thread, when a command times out. Most newer 422after executing a probe-time or EH-time command via ata_exec_internal().
365hardware will implement its own error handling code here. IDE BMDMA
366drivers may use the helper function ata_eng_timeout().
367 </para> 423 </para>
368 424
369 </sect2> 425 </sect2>
diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index 6e463d0db266..a8c8cce50633 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -109,7 +109,7 @@
109 for most of the implementations. These functions can be replaced by the 109 for most of the implementations. These functions can be replaced by the
110 board driver if neccecary. Those functions are called via pointers in the 110 board driver if neccecary. Those functions are called via pointers in the
111 NAND chip description structure. The board driver can set the functions which 111 NAND chip description structure. The board driver can set the functions which
112 should be replaced by board dependend functions before calling nand_scan(). 112 should be replaced by board dependent functions before calling nand_scan().
113 If the function pointer is NULL on entry to nand_scan() then the pointer 113 If the function pointer is NULL on entry to nand_scan() then the pointer
114 is set to the default function which is suitable for the detected chip type. 114 is set to the default function which is suitable for the detected chip type.
115 </para></listitem> 115 </para></listitem>
@@ -133,7 +133,7 @@
133 [REPLACEABLE]</para><para> 133 [REPLACEABLE]</para><para>
134 Replaceable members hold hardware related functions which can be 134 Replaceable members hold hardware related functions which can be
135 provided by the board driver. The board driver can set the functions which 135 provided by the board driver. The board driver can set the functions which
136 should be replaced by board dependend functions before calling nand_scan(). 136 should be replaced by board dependent functions before calling nand_scan().
137 If the function pointer is NULL on entry to nand_scan() then the pointer 137 If the function pointer is NULL on entry to nand_scan() then the pointer
138 is set to the default function which is suitable for the detected chip type. 138 is set to the default function which is suitable for the detected chip type.
139 </para></listitem> 139 </para></listitem>
@@ -156,9 +156,8 @@
156 <title>Basic board driver</title> 156 <title>Basic board driver</title>
157 <para> 157 <para>
158 For most boards it will be sufficient to provide just the 158 For most boards it will be sufficient to provide just the
159 basic functions and fill out some really board dependend 159 basic functions and fill out some really board dependent
160 members in the nand chip description structure. 160 members in the nand chip description structure.
161 See drivers/mtd/nand/skeleton for reference.
162 </para> 161 </para>
163 <sect1> 162 <sect1>
164 <title>Basic defines</title> 163 <title>Basic defines</title>
@@ -189,9 +188,9 @@ static unsigned long baseaddr;
189 <sect1> 188 <sect1>
190 <title>Partition defines</title> 189 <title>Partition defines</title>
191 <para> 190 <para>
192 If you want to divide your device into parititions, then 191 If you want to divide your device into partitions, then
193 enable the configuration switch CONFIG_MTD_PARITIONS and define 192 enable the configuration switch CONFIG_MTD_PARTITIONS and define
194 a paritioning scheme suitable to your board. 193 a partitioning scheme suitable to your board.
195 </para> 194 </para>
196 <programlisting> 195 <programlisting>
197#define NUM_PARTITIONS 2 196#define NUM_PARTITIONS 2
@@ -1295,7 +1294,9 @@ in this page</entry>
1295 </para> 1294 </para>
1296!Idrivers/mtd/nand/nand_base.c 1295!Idrivers/mtd/nand/nand_base.c
1297!Idrivers/mtd/nand/nand_bbt.c 1296!Idrivers/mtd/nand/nand_bbt.c
1298!Idrivers/mtd/nand/nand_ecc.c 1297<!-- No internal functions for kernel-doc:
1298X!Idrivers/mtd/nand/nand_ecc.c
1299-->
1299 </chapter> 1300 </chapter>
1300 1301
1301 <chapter id="credits"> 1302 <chapter id="credits">
diff --git a/Documentation/DocBook/videobook.tmpl b/Documentation/DocBook/videobook.tmpl
index fdff984a5161..b629da33951d 100644
--- a/Documentation/DocBook/videobook.tmpl
+++ b/Documentation/DocBook/videobook.tmpl
@@ -976,7 +976,7 @@ static int camera_close(struct video_device *dev)
976 <title>Interrupt Handling</title> 976 <title>Interrupt Handling</title>
977 <para> 977 <para>
978 Our example handler is for an ISA bus device. If it was PCI you would be 978 Our example handler is for an ISA bus device. If it was PCI you would be
979 able to share the interrupt and would have set SA_SHIRQ to indicate a 979 able to share the interrupt and would have set IRQF_SHARED to indicate a
980 shared IRQ. We pass the device pointer as the interrupt routine argument. We 980 shared IRQ. We pass the device pointer as the interrupt routine argument. We
981 don't need to since we only support one card but doing this will make it 981 don't need to since we only support one card but doing this will make it
982 easier to upgrade the driver for multiple devices in the future. 982 easier to upgrade the driver for multiple devices in the future.
diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt
index bf1cf98d2a27..0256805b548f 100644
--- a/Documentation/IPMI.txt
+++ b/Documentation/IPMI.txt
@@ -10,7 +10,7 @@ standard for controlling intelligent devices that monitor a system.
10It provides for dynamic discovery of sensors in the system and the 10It provides for dynamic discovery of sensors in the system and the
11ability to monitor the sensors and be informed when the sensor's 11ability to monitor the sensors and be informed when the sensor's
12values change or go outside certain boundaries. It also has a 12values change or go outside certain boundaries. It also has a
13standardized database for field-replacable units (FRUs) and a watchdog 13standardized database for field-replaceable units (FRUs) and a watchdog
14timer. 14timer.
15 15
16To use this, you need an interface to an IPMI controller in your 16To use this, you need an interface to an IPMI controller in your
@@ -64,7 +64,7 @@ situation, you need to read the section below named 'The SI Driver' or
64IPMI defines a standard watchdog timer. You can enable this with the 64IPMI defines a standard watchdog timer. You can enable this with the
65'IPMI Watchdog Timer' config option. If you compile the driver into 65'IPMI Watchdog Timer' config option. If you compile the driver into
66the kernel, then via a kernel command-line option you can have the 66the kernel, then via a kernel command-line option you can have the
67watchdog timer start as soon as it intitializes. It also have a lot 67watchdog timer start as soon as it initializes. It also have a lot
68of other options, see the 'Watchdog' section below for more details. 68of other options, see the 'Watchdog' section below for more details.
69Note that you can also have the watchdog continue to run if it is 69Note that you can also have the watchdog continue to run if it is
70closed (by default it is disabled on close). Go into the 'Watchdog 70closed (by default it is disabled on close). Go into the 'Watchdog
diff --git a/Documentation/IRQ.txt b/Documentation/IRQ.txt
new file mode 100644
index 000000000000..1011e7175021
--- /dev/null
+++ b/Documentation/IRQ.txt
@@ -0,0 +1,22 @@
1What is an IRQ?
2
3An IRQ is an interrupt request from a device.
4Currently they can come in over a pin, or over a packet.
5Several devices may be connected to the same pin thus
6sharing an IRQ.
7
8An IRQ number is a kernel identifier used to talk about a hardware
9interrupt source. Typically this is an index into the global irq_desc
10array, but except for what linux/interrupt.h implements the details
11are architecture specific.
12
13An IRQ number is an enumeration of the possible interrupt sources on a
14machine. Typically what is enumerated is the number of input pins on
15all of the interrupt controller in the system. In the case of ISA
16what is enumerated are the 16 input pins on the two i8259 interrupt
17controllers.
18
19Architectures can assign additional meaning to the IRQ numbers, and
20are encouraged to in the case where there is any manual configuration
21of the hardware involved. The ISA IRQs are a classic example of
22assigning this kind of additional meaning.
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 49e27cc19385..1d50cf0c905e 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -144,9 +144,47 @@ over a rather long period of time, but improvements are always welcome!
144 whether the increased speed is worth it. 144 whether the increased speed is worth it.
145 145
1468. Although synchronize_rcu() is a bit slower than is call_rcu(), 1468. Although synchronize_rcu() is a bit slower than is call_rcu(),
147 it usually results in simpler code. So, unless update performance 147 it usually results in simpler code. So, unless update
148 is important or the updaters cannot block, synchronize_rcu() 148 performance is critically important or the updaters cannot block,
149 should be used in preference to call_rcu(). 149 synchronize_rcu() should be used in preference to call_rcu().
150
151 An especially important property of the synchronize_rcu()
152 primitive is that it automatically self-limits: if grace periods
153 are delayed for whatever reason, then the synchronize_rcu()
154 primitive will correspondingly delay updates. In contrast,
155 code using call_rcu() should explicitly limit update rate in
156 cases where grace periods are delayed, as failing to do so can
157 result in excessive realtime latencies or even OOM conditions.
158
159 Ways of gaining this self-limiting property when using call_rcu()
160 include:
161
162 a. Keeping a count of the number of data-structure elements
163 used by the RCU-protected data structure, including those
164 waiting for a grace period to elapse. Enforce a limit
165 on this number, stalling updates as needed to allow
166 previously deferred frees to complete.
167
168 Alternatively, limit only the number awaiting deferred
169 free rather than the total number of elements.
170
171 b. Limiting update rate. For example, if updates occur only
172 once per hour, then no explicit rate limiting is required,
173 unless your system is already badly broken. The dcache
174 subsystem takes this approach -- updates are guarded
175 by a global lock, limiting their rate.
176
177 c. Trusted update -- if updates can only be done manually by
178 superuser or some other trusted user, then it might not
179 be necessary to automatically limit them. The theory
180 here is that superuser already has lots of ways to crash
181 the machine.
182
183 d. Use call_rcu_bh() rather than call_rcu(), in order to take
184 advantage of call_rcu_bh()'s faster grace periods.
185
186 e. Periodically invoke synchronize_rcu(), permitting a limited
187 number of updates per grace period.
150 188
1519. All RCU list-traversal primitives, which include 1899. All RCU list-traversal primitives, which include
152 list_for_each_rcu(), list_for_each_entry_rcu(), 190 list_for_each_rcu(), list_for_each_entry_rcu(),
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index e4c38152f7f7..a4948591607d 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -7,7 +7,7 @@ The CONFIG_RCU_TORTURE_TEST config option is available for all RCU
7implementations. It creates an rcutorture kernel module that can 7implementations. It creates an rcutorture kernel module that can
8be loaded to run a torture test. The test periodically outputs 8be loaded to run a torture test. The test periodically outputs
9status messages via printk(), which can be examined via the dmesg 9status messages via printk(), which can be examined via the dmesg
10command (perhaps grepping for "rcutorture"). The test is started 10command (perhaps grepping for "torture"). The test is started
11when the module is loaded, and stops when the module is unloaded. 11when the module is loaded, and stops when the module is unloaded.
12 12
13However, actually setting this config option to "y" results in the system 13However, actually setting this config option to "y" results in the system
@@ -35,6 +35,19 @@ stat_interval The number of seconds between output of torture
35 be printed -only- when the module is unloaded, and this 35 be printed -only- when the module is unloaded, and this
36 is the default. 36 is the default.
37 37
38shuffle_interval
39 The number of seconds to keep the test threads affinitied
40 to a particular subset of the CPUs. Used in conjunction
41 with test_no_idle_hz.
42
43test_no_idle_hz Whether or not to test the ability of RCU to operate in
44 a kernel that disables the scheduling-clock interrupt to
45 idle CPUs. Boolean parameter, "1" to test, "0" otherwise.
46
47torture_type The type of RCU to test: "rcu" for the rcu_read_lock()
48 API, "rcu_bh" for the rcu_read_lock_bh() API, and "srcu"
49 for the "srcu_read_lock()" API.
50
38verbose Enable debug printk()s. Default is disabled. 51verbose Enable debug printk()s. Default is disabled.
39 52
40 53
@@ -42,14 +55,14 @@ OUTPUT
42 55
43The statistics output is as follows: 56The statistics output is as follows:
44 57
45 rcutorture: --- Start of test: nreaders=16 stat_interval=0 verbose=0 58 rcu-torture: --- Start of test: nreaders=16 stat_interval=0 verbose=0
46 rcutorture: rtc: 0000000000000000 ver: 1916 tfle: 0 rta: 1916 rtaf: 0 rtf: 1915 59 rcu-torture: rtc: 0000000000000000 ver: 1916 tfle: 0 rta: 1916 rtaf: 0 rtf: 1915
47 rcutorture: Reader Pipe: 1466408 9747 0 0 0 0 0 0 0 0 0 60 rcu-torture: Reader Pipe: 1466408 9747 0 0 0 0 0 0 0 0 0
48 rcutorture: Reader Batch: 1464477 11678 0 0 0 0 0 0 0 0 61 rcu-torture: Reader Batch: 1464477 11678 0 0 0 0 0 0 0 0
49 rcutorture: Free-Block Circulation: 1915 1915 1915 1915 1915 1915 1915 1915 1915 1915 0 62 rcu-torture: Free-Block Circulation: 1915 1915 1915 1915 1915 1915 1915 1915 1915 1915 0
50 rcutorture: --- End of test 63 rcu-torture: --- End of test
51 64
52The command "dmesg | grep rcutorture:" will extract this information on 65The command "dmesg | grep torture:" will extract this information on
53most systems. On more esoteric configurations, it may be necessary to 66most systems. On more esoteric configurations, it may be necessary to
54use other commands to access the output of the printk()s used by 67use other commands to access the output of the printk()s used by
55the RCU torture test. The printk()s use KERN_ALERT, so they should 68the RCU torture test. The printk()s use KERN_ALERT, so they should
@@ -115,8 +128,9 @@ The following script may be used to torture RCU:
115 modprobe rcutorture 128 modprobe rcutorture
116 sleep 100 129 sleep 100
117 rmmod rcutorture 130 rmmod rcutorture
118 dmesg | grep rcutorture: 131 dmesg | grep torture:
119 132
120The output can be manually inspected for the error flag of "!!!". 133The output can be manually inspected for the error flag of "!!!".
121One could of course create a more elaborate script that automatically 134One could of course create a more elaborate script that automatically
122checked for such errors. 135checked for such errors. The "rmmod" command forces a "SUCCESS" or
136"FAILURE" indication to be printk()ed.
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 07cb93b82ba9..4f41a60e5111 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -184,7 +184,17 @@ synchronize_rcu()
184 blocking, it registers a function and argument which are invoked 184 blocking, it registers a function and argument which are invoked
185 after all ongoing RCU read-side critical sections have completed. 185 after all ongoing RCU read-side critical sections have completed.
186 This callback variant is particularly useful in situations where 186 This callback variant is particularly useful in situations where
187 it is illegal to block. 187 it is illegal to block or where update-side performance is
188 critically important.
189
190 However, the call_rcu() API should not be used lightly, as use
191 of the synchronize_rcu() API generally results in simpler code.
192 In addition, the synchronize_rcu() API has the nice property
193 of automatically limiting update rate should grace periods
194 be delayed. This property results in system resilience in face
195 of denial-of-service attacks. Code using call_rcu() should limit
196 update rate in order to gain this same sort of resilience. See
197 checklist.txt for some approaches to limiting the update rate.
188 198
189rcu_assign_pointer() 199rcu_assign_pointer()
190 200
@@ -790,7 +800,6 @@ RCU pointer update:
790 800
791RCU grace period: 801RCU grace period:
792 802
793 synchronize_kernel (deprecated)
794 synchronize_net 803 synchronize_net
795 synchronize_sched 804 synchronize_sched
796 synchronize_rcu 805 synchronize_rcu
diff --git a/Documentation/README.DAC960 b/Documentation/README.DAC960
index 98ea617a0dd6..0e8f618ab534 100644
--- a/Documentation/README.DAC960
+++ b/Documentation/README.DAC960
@@ -78,9 +78,9 @@ also known as "System Drives", and Drive Groups are also called "Packs". Both
78terms are in use in the Mylex documentation; I have chosen to standardize on 78terms are in use in the Mylex documentation; I have chosen to standardize on
79the more generic "Logical Drive" and "Drive Group". 79the more generic "Logical Drive" and "Drive Group".
80 80
81DAC960 RAID disk devices are named in the style of the Device File System 81DAC960 RAID disk devices are named in the style of the obsolete Device File
82(DEVFS). The device corresponding to Logical Drive D on Controller C is 82System (DEVFS). The device corresponding to Logical Drive D on Controller C
83referred to as /dev/rd/cCdD, and the partitions are called /dev/rd/cCdDp1 83is referred to as /dev/rd/cCdD, and the partitions are called /dev/rd/cCdDp1
84through /dev/rd/cCdDp7. For example, partition 3 of Logical Drive 5 on 84through /dev/rd/cCdDp7. For example, partition 3 of Logical Drive 5 on
85Controller 2 is referred to as /dev/rd/c2d5p3. Note that unlike with SCSI 85Controller 2 is referred to as /dev/rd/c2d5p3. Note that unlike with SCSI
86disks the device names will not change in the event of a disk drive failure. 86disks the device names will not change in the event of a disk drive failure.
diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist
new file mode 100644
index 000000000000..8230098da529
--- /dev/null
+++ b/Documentation/SubmitChecklist
@@ -0,0 +1,57 @@
1Linux Kernel patch sumbittal checklist
2~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3
4Here are some basic things that developers should do if they
5want to see their kernel patch submittals accepted quicker.
6
7These are all above and beyond the documentation that is provided
8in Documentation/SubmittingPatches and elsewhere about submitting
9Linux kernel patches.
10
11
12
13- Builds cleanly with applicable or modified CONFIG options =y, =m, and =n.
14 No gcc warnings/errors, no linker warnings/errors.
15
16- Passes allnoconfig, allmodconfig
17
18- Builds on multiple CPU arch-es by using local cross-compile tools
19 or something like PLM at OSDL.
20
21- ppc64 is a good architecture for cross-compilation checking because it
22 tends to use `unsigned long' for 64-bit quantities.
23
24- Matches kernel coding style(!)
25
26- Any new or modified CONFIG options don't muck up the config menu.
27
28- All new Kconfig options have help text.
29
30- Has been carefully reviewed with respect to relevant Kconfig
31 combinations. This is very hard to get right with testing --
32 brainpower pays off here.
33
34- Check cleanly with sparse.
35
36- Use 'make checkstack' and 'make namespacecheck' and fix any
37 problems that they find. Note: checkstack does not point out
38 problems explicitly, but any one function that uses more than
39 512 bytes on the stack is a candidate for change.
40
41- Include kernel-doc to document global kernel APIs. (Not required
42 for static functions, but OK there also.) Use 'make htmldocs'
43 or 'make mandocs' to check the kernel-doc and fix any issues.
44
45- Has been tested with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT,
46 CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES,
47 CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_SPINLOCK_SLEEP all simultaneously
48 enabled.
49
50- Has been build- and runtime tested with and without CONFIG_SMP and
51 CONFIG_PREEMPT.
52
53- If the patch affects IO/Disk, etc: has been tested with and without
54 CONFIG_LBD.
55
56
572006-APR-27
diff --git a/Documentation/arm/IXP4xx b/Documentation/arm/IXP4xx
index d4c6d3aa0c25..43edb4ecf27d 100644
--- a/Documentation/arm/IXP4xx
+++ b/Documentation/arm/IXP4xx
@@ -85,7 +85,7 @@ IXP4xx provides two methods of accessing PCI memory space:
852) If > 64MB of memory space is required, the IXP4xx can be 852) If > 64MB of memory space is required, the IXP4xx can be
86 configured to use indirect registers to access PCI This allows 86 configured to use indirect registers to access PCI This allows
87 for up to 128MB (0x48000000 to 0x4fffffff) of memory on the bus. 87 for up to 128MB (0x48000000 to 0x4fffffff) of memory on the bus.
88 The disadvantadge of this is that every PCI access requires 88 The disadvantage of this is that every PCI access requires
89 three local register accesses plus a spinlock, but in some 89 three local register accesses plus a spinlock, but in some
90 cases the performance hit is acceptable. In addition, you cannot 90 cases the performance hit is acceptable. In addition, you cannot
91 mmap() PCI devices in this case due to the indirect nature 91 mmap() PCI devices in this case due to the indirect nature
diff --git a/Documentation/arm/Samsung-S3C24XX/Overview.txt b/Documentation/arm/Samsung-S3C24XX/Overview.txt
index 8c6ee684174c..3e46d2a31158 100644
--- a/Documentation/arm/Samsung-S3C24XX/Overview.txt
+++ b/Documentation/arm/Samsung-S3C24XX/Overview.txt
@@ -7,11 +7,13 @@ Introduction
7------------ 7------------
8 8
9 The Samsung S3C24XX range of ARM9 System-on-Chip CPUs are supported 9 The Samsung S3C24XX range of ARM9 System-on-Chip CPUs are supported
10 by the 's3c2410' architecture of ARM Linux. Currently the S3C2410 and 10 by the 's3c2410' architecture of ARM Linux. Currently the S3C2410,
11 the S3C2440 are supported CPUs. 11 S3C2440 and S3C2442 devices are supported.
12 12
13 Support for the S3C2400 series is in progress. 13 Support for the S3C2400 series is in progress.
14 14
15 Support for the S3C2412 and S3C2413 CPUs is being merged.
16
15 17
16Configuration 18Configuration
17------------- 19-------------
@@ -43,9 +45,18 @@ Machines
43 45
44 Samsung's own development board, geared for PDA work. 46 Samsung's own development board, geared for PDA work.
45 47
48 Samsung/Aiji SMDK2412
49
50 The S3C2412 version of the SMDK2440.
51
52 Samsung/Aiji SMDK2413
53
54 The S3C2412 version of the SMDK2440.
55
46 Samsung/Meritech SMDK2440 56 Samsung/Meritech SMDK2440
47 57
48 The S3C2440 compatible version of the SMDK2440 58 The S3C2440 compatible version of the SMDK2440, which has the
59 option of an S3C2440 or S3C2442 CPU module.
49 60
50 Thorcom VR1000 61 Thorcom VR1000
51 62
@@ -211,24 +222,6 @@ Port Contributors
211 Lucas Correia Villa Real (S3C2400 port) 222 Lucas Correia Villa Real (S3C2400 port)
212 223
213 224
214Document Changes
215----------------
216
217 05 Sep 2004 - BJD - Added Document Changes section
218 05 Sep 2004 - BJD - Added Klaus Fetscher to list of contributors
219 25 Oct 2004 - BJD - Added Dimitry Andric to list of contributors
220 25 Oct 2004 - BJD - Updated the MTD from the 2.6.9 merge
221 21 Jan 2005 - BJD - Added rx3715, added Shannon to contributors
222 10 Feb 2005 - BJD - Added Guillaume Gourat to contributors
223 02 Mar 2005 - BJD - Added SMDK2440 to list of machines
224 06 Mar 2005 - BJD - Added Christer Weinigel
225 08 Mar 2005 - BJD - Added LCVR to list of people, updated introduction
226 08 Mar 2005 - BJD - Added section on adding machines
227 09 Sep 2005 - BJD - Added section on platform data
228 11 Feb 2006 - BJD - Added I2C, RTC and Watchdog sections
229 11 Feb 2006 - BJD - Added Osiris machine, and S3C2400 information
230
231
232Document Author 225Document Author
233--------------- 226---------------
234 227
diff --git a/Documentation/arm/Samsung-S3C24XX/S3C2412.txt b/Documentation/arm/Samsung-S3C24XX/S3C2412.txt
new file mode 100644
index 000000000000..cb82a7fc7901
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/S3C2412.txt
@@ -0,0 +1,120 @@
1 S3C2412 ARM Linux Overview
2 ==========================
3
4Introduction
5------------
6
7 The S3C2412 is part of the S3C24XX range of ARM9 System-on-Chip CPUs
8 from Samsung. This part has an ARM926-EJS core, capable of running up
9 to 266MHz (see data-sheet for more information)
10
11
12Clock
13-----
14
15 The core clock code provides a set of clocks to the drivers, and allows
16 for source selection and a number of other features.
17
18
19Power
20-----
21
22 No support for suspend/resume to RAM in the current system.
23
24
25DMA
26---
27
28 No current support for DMA.
29
30
31GPIO
32----
33
34 There is support for setting the GPIO to input/output/special function
35 and reading or writing to them.
36
37
38UART
39----
40
41 The UART hardware is similar to the S3C2440, and is supported by the
42 s3c2410 driver in the drivers/serial directory.
43
44
45NAND
46----
47
48 The NAND hardware is similar to the S3C2440, and is supported by the
49 s3c2410 driver in the drivers/mtd/nand directory.
50
51
52USB Host
53--------
54
55 The USB hardware is similar to the S3C2410, with extended clock source
56 control. The OHCI portion is supported by the ohci-s3c2410 driver, and
57 the clock control selection is supported by the core clock code.
58
59
60USB Device
61----------
62
63 No current support in the kernel
64
65
66IRQs
67----
68
69 All the standard, and external interrupt sources are supported. The
70 extra sub-sources are not yet supported.
71
72
73RTC
74---
75
76 The RTC hardware is similar to the S3C2410, and is supported by the
77 s3c2410-rtc driver.
78
79
80Watchdog
81--------
82
83 The watchdog harware is the same as the S3C2410, and is supported by
84 the s3c2410_wdt driver.
85
86
87MMC/SD/SDIO
88-----------
89
90 No current support for the MMC/SD/SDIO block.
91
92IIC
93---
94
95 The IIC hardware is the same as the S3C2410, and is supported by the
96 i2c-s3c24xx driver.
97
98
99IIS
100---
101
102 No current support for the IIS interface.
103
104
105SPI
106---
107
108 No current support for the SPI interfaces.
109
110
111ATA
112---
113
114 No current support for the on-board ATA block.
115
116
117Document Author
118---------------
119
120Ben Dooks, (c) 2006 Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/S3C2413.txt b/Documentation/arm/Samsung-S3C24XX/S3C2413.txt
new file mode 100644
index 000000000000..ab2a88858f12
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/S3C2413.txt
@@ -0,0 +1,21 @@
1 S3C2413 ARM Linux Overview
2 ==========================
3
4Introduction
5------------
6
7 The S3C2413 is an extended version of the S3C2412, with an camera
8 interface and mobile DDR memory support. See the S3C2412 support
9 documentation for more information.
10
11
12Camera Interface
13---------------
14
15 This block is currently not supported.
16
17
18Document Author
19---------------
20
21Ben Dooks, (c) 2006 Simtec Electronics
diff --git a/Documentation/arm/Sharp-LH/ADC-LH7-Touchscreen b/Documentation/arm/Sharp-LH/ADC-LH7-Touchscreen
new file mode 100644
index 000000000000..1e6a23fdf2fc
--- /dev/null
+++ b/Documentation/arm/Sharp-LH/ADC-LH7-Touchscreen
@@ -0,0 +1,61 @@
1README on the ADC/Touchscreen Controller
2========================================
3
4The LH79524 and LH7A404 include a built-in Analog to Digital
5controller (ADC) that is used to process input from a touchscreen.
6The driver only implements a four-wire touch panel protocol.
7
8The touchscreen driver is maintenance free except for the pen-down or
9touch threshold. Some resistive displays and board combinations may
10require tuning of this threshold. The driver exposes some of it's
11internal state in the sys filesystem. If the kernel is configured
12with it, CONFIG_SYSFS, and sysfs is mounted at /sys, there will be a
13directory
14
15 /sys/devices/platform/adc-lh7.0
16
17containing these files.
18
19 -r--r--r-- 1 root root 4096 Jan 1 00:00 samples
20 -rw-r--r-- 1 root root 4096 Jan 1 00:00 threshold
21 -r--r--r-- 1 root root 4096 Jan 1 00:00 threshold_range
22
23The threshold is the current touch threshold. It defaults to 750 on
24most targets.
25
26 # cat threshold
27 750
28
29The threshold_range contains the range of valid values for the
30threshold. Values outside of this range will be silently ignored.
31
32 # cat threshold_range
33 0 1023
34
35To change the threshold, write a value to the threshold file.
36
37 # echo 500 > threshold
38 # cat threshold
39 500
40
41The samples file contains the most recently sampled values from the
42ADC. There are 12. Below are typical of the last sampled values when
43the pen has been released. The first two and last two samples are for
44detecting whether or not the pen is down. The third through sixth are
45X coordinate samples. The seventh through tenth are Y coordinate
46samples.
47
48 # cat samples
49 1023 1023 0 0 0 0 530 529 530 529 1023 1023
50
51To determine a reasonable threshold, press on the touch panel with an
52appropriate stylus and read the values from samples.
53
54 # cat samples
55 1023 676 92 103 101 102 855 919 922 922 1023 679
56
57The first and eleventh samples are discarded. Thus, the important
58values are the second and twelfth which are used to determine if the
59pen is down. When both are below the threshold, the driver registers
60that the pen is down. When either is above the threshold, it
61registers then pen is up.
diff --git a/Documentation/arm/Sharp-LH/LCDPanels b/Documentation/arm/Sharp-LH/LCDPanels
new file mode 100644
index 000000000000..fb1b21c2f2f4
--- /dev/null
+++ b/Documentation/arm/Sharp-LH/LCDPanels
@@ -0,0 +1,59 @@
1README on the LCD Panels
2========================
3
4Configuration options for several LCD panels, available from Logic PD,
5are included in the kernel source. This README will help you
6understand the configuration data and give you some guidance for
7adding support for other panels if you wish.
8
9
10lcd-panels.h
11------------
12
13There is no way, at present, to detect which panel is attached to the
14system at runtime. Thus the kernel configuration is static. The file
15arch/arm/mach-ld7a40x/lcd-panels.h (or similar) defines all of the
16panel specific parameters.
17
18It should be possible for this data to be shared among several device
19families. The current layout may be insufficiently general, but it is
20amenable to improvement.
21
22
23PIXEL_CLOCK
24-----------
25
26The panel data sheets will give a range of acceptable pixel clocks.
27The fundamental LCDCLK input frequency is divided down by a PCD
28constant in field '.tim2'. It may happen that it is impossible to set
29the pixel clock within this range. A clock which is too slow will
30tend to flicker. For the highest quality image, set the clock as high
31as possible.
32
33
34MARGINS
35-------
36
37These values may be difficult to glean from the panel data sheet. In
38the case of the Sharp panels, the upper margin is explicitly called
39out as a specific number of lines from the top of the frame. The
40other values may not matter as much as the panels tend to
41automatically center the image.
42
43
44Sync Sense
45----------
46
47The sense of the hsync and vsync pulses may be called out in the data
48sheet. On one panel, the sense of these pulses determine the height
49of the visible region on the panel. Most of the Sharp panels use
50negative sense sync pulses set by the TIM2_IHS and TIM2_IVS bits in
51'.tim2'.
52
53
54Pel Layout
55----------
56
57The Sharp color TFT panels are all configured for 16 bit direct color
58modes. The amba-lcd driver sets the pel mode to 565 for 5 bits of
59each red and blue and 6 bits of green.
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index 23a1c2402bcc..2a63d5662a93 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -157,13 +157,13 @@ For example, smp_mb__before_atomic_dec() can be used like so:
157 smp_mb__before_atomic_dec(); 157 smp_mb__before_atomic_dec();
158 atomic_dec(&obj->ref_count); 158 atomic_dec(&obj->ref_count);
159 159
160It makes sure that all memory operations preceeding the atomic_dec() 160It makes sure that all memory operations preceding the atomic_dec()
161call are strongly ordered with respect to the atomic counter 161call are strongly ordered with respect to the atomic counter
162operation. In the above example, it guarentees that the assignment of 162operation. In the above example, it guarantees that the assignment of
163"1" to obj->dead will be globally visible to other cpus before the 163"1" to obj->dead will be globally visible to other cpus before the
164atomic counter decrement. 164atomic counter decrement.
165 165
166Without the explicitl smp_mb__before_atomic_dec() call, the 166Without the explicit smp_mb__before_atomic_dec() call, the
167implementation could legally allow the atomic counter update visible 167implementation could legally allow the atomic counter update visible
168to other cpus before the "obj->dead = 1;" assignment. 168to other cpus before the "obj->dead = 1;" assignment.
169 169
@@ -173,11 +173,11 @@ ordering with respect to memory operations after an atomic_dec() call
173(smp_mb__{before,after}_atomic_inc()). 173(smp_mb__{before,after}_atomic_inc()).
174 174
175A missing memory barrier in the cases where they are required by the 175A missing memory barrier in the cases where they are required by the
176atomic_t implementation above can have disasterous results. Here is 176atomic_t implementation above can have disastrous results. Here is
177an example, which follows a pattern occuring frequently in the Linux 177an example, which follows a pattern occurring frequently in the Linux
178kernel. It is the use of atomic counters to implement reference 178kernel. It is the use of atomic counters to implement reference
179counting, and it works such that once the counter falls to zero it can 179counting, and it works such that once the counter falls to zero it can
180be guarenteed that no other entity can be accessing the object: 180be guaranteed that no other entity can be accessing the object:
181 181
182static void obj_list_add(struct obj *obj) 182static void obj_list_add(struct obj *obj)
183{ 183{
@@ -291,9 +291,9 @@ to the size of an "unsigned long" C data type, and are least of that
291size. The endianness of the bits within each "unsigned long" are the 291size. The endianness of the bits within each "unsigned long" are the
292native endianness of the cpu. 292native endianness of the cpu.
293 293
294 void set_bit(unsigned long nr, volatils unsigned long *addr); 294 void set_bit(unsigned long nr, volatile unsigned long *addr);
295 void clear_bit(unsigned long nr, volatils unsigned long *addr); 295 void clear_bit(unsigned long nr, volatile unsigned long *addr);
296 void change_bit(unsigned long nr, volatils unsigned long *addr); 296 void change_bit(unsigned long nr, volatile unsigned long *addr);
297 297
298These routines set, clear, and change, respectively, the bit number 298These routines set, clear, and change, respectively, the bit number
299indicated by "nr" on the bit mask pointed to by "ADDR". 299indicated by "nr" on the bit mask pointed to by "ADDR".
@@ -301,9 +301,9 @@ indicated by "nr" on the bit mask pointed to by "ADDR".
301They must execute atomically, yet there are no implicit memory barrier 301They must execute atomically, yet there are no implicit memory barrier
302semantics required of these interfaces. 302semantics required of these interfaces.
303 303
304 int test_and_set_bit(unsigned long nr, volatils unsigned long *addr); 304 int test_and_set_bit(unsigned long nr, volatile unsigned long *addr);
305 int test_and_clear_bit(unsigned long nr, volatils unsigned long *addr); 305 int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr);
306 int test_and_change_bit(unsigned long nr, volatils unsigned long *addr); 306 int test_and_change_bit(unsigned long nr, volatile unsigned long *addr);
307 307
308Like the above, except that these routines return a boolean which 308Like the above, except that these routines return a boolean which
309indicates whether the changed bit was set _BEFORE_ the atomic bit 309indicates whether the changed bit was set _BEFORE_ the atomic bit
@@ -335,7 +335,7 @@ subsequent memory operation is made visible. For example:
335 /* ... */; 335 /* ... */;
336 obj->killed = 1; 336 obj->killed = 1;
337 337
338The implementation of test_and_set_bit() must guarentee that 338The implementation of test_and_set_bit() must guarantee that
339"obj->dead = 1;" is visible to cpus before the atomic memory operation 339"obj->dead = 1;" is visible to cpus before the atomic memory operation
340done by test_and_set_bit() becomes visible. Likewise, the atomic 340done by test_and_set_bit() becomes visible. Likewise, the atomic
341memory operation done by test_and_set_bit() must become visible before 341memory operation done by test_and_set_bit() must become visible before
@@ -474,7 +474,7 @@ Now, as far as memory barriers go, as long as spin_lock()
474strictly orders all subsequent memory operations (including 474strictly orders all subsequent memory operations (including
475the cas()) with respect to itself, things will be fine. 475the cas()) with respect to itself, things will be fine.
476 476
477Said another way, _atomic_dec_and_lock() must guarentee that 477Said another way, _atomic_dec_and_lock() must guarantee that
478a counter dropping to zero is never made visible before the 478a counter dropping to zero is never made visible before the
479spinlock being acquired. 479spinlock being acquired.
480 480
diff --git a/Documentation/console/console.txt b/Documentation/console/console.txt
new file mode 100644
index 000000000000..d3e17447321c
--- /dev/null
+++ b/Documentation/console/console.txt
@@ -0,0 +1,144 @@
1Console Drivers
2===============
3
4The linux kernel has 2 general types of console drivers. The first type is
5assigned by the kernel to all the virtual consoles during the boot process.
6This type will be called 'system driver', and only one system driver is allowed
7to exist. The system driver is persistent and it can never be unloaded, though
8it may become inactive.
9
10The second type has to be explicitly loaded and unloaded. This will be called
11'modular driver' by this document. Multiple modular drivers can coexist at
12any time with each driver sharing the console with other drivers including
13the system driver. However, modular drivers cannot take over the console
14that is currently occupied by another modular driver. (Exception: Drivers that
15call take_over_console() will succeed in the takeover regardless of the type
16of driver occupying the consoles.) They can only take over the console that is
17occupied by the system driver. In the same token, if the modular driver is
18released by the console, the system driver will take over.
19
20Modular drivers, from the programmer's point of view, has to call:
21
22 take_over_console() - load and bind driver to console layer
23 give_up_console() - unbind and unload driver
24
25In newer kernels, the following are also available:
26
27 register_con_driver()
28 unregister_con_driver()
29
30If sysfs is enabled, the contents of /sys/class/vtconsole can be
31examined. This shows the console backends currently registered by the
32system which are named vtcon<n> where <n> is an integer fro 0 to 15. Thus:
33
34 ls /sys/class/vtconsole
35 . .. vtcon0 vtcon1
36
37Each directory in /sys/class/vtconsole has 3 files:
38
39 ls /sys/class/vtconsole/vtcon0
40 . .. bind name uevent
41
42What do these files signify?
43
44 1. bind - this is a read/write file. It shows the status of the driver if
45 read, or acts to bind or unbind the driver to the virtual consoles
46 when written to. The possible values are:
47
48 0 - means the driver is not bound and if echo'ed, commands the driver
49 to unbind
50
51 1 - means the driver is bound and if echo'ed, commands the driver to
52 bind
53
54 2. name - read-only file. Shows the name of the driver in this format:
55
56 cat /sys/class/vtconsole/vtcon0/name
57 (S) VGA+
58
59 '(S)' stands for a (S)ystem driver, ie, it cannot be directly
60 commanded to bind or unbind
61
62 'VGA+' is the name of the driver
63
64 cat /sys/class/vtconsole/vtcon1/name
65 (M) frame buffer device
66
67 In this case, '(M)' stands for a (M)odular driver, one that can be
68 directly commanded to bind or unbind.
69
70 3. uevent - ignore this file
71
72When unbinding, the modular driver is detached first, and then the system
73driver takes over the consoles vacated by the driver. Binding, on the other
74hand, will bind the driver to the consoles that are currently occupied by a
75system driver.
76
77NOTE1: Binding and binding must be selected in Kconfig. It's under:
78
79Device Drivers -> Character devices -> Support for binding and unbinding
80console drivers
81
82NOTE2: If any of the virtual consoles are in KD_GRAPHICS mode, then binding or
83unbinding will not succeed. An example of an application that sets the console
84to KD_GRAPHICS is X.
85
86How useful is this feature? This is very useful for console driver
87developers. By unbinding the driver from the console layer, one can unload the
88driver, make changes, recompile, reload and rebind the driver without any need
89for rebooting the kernel. For regular users who may want to switch from
90framebuffer console to VGA console and vice versa, this feature also makes
91this possible. (NOTE NOTE NOTE: Please read fbcon.txt under Documentation/fb
92for more details).
93
94Notes for developers:
95=====================
96
97take_over_console() is now broken up into:
98
99 register_con_driver()
100 bind_con_driver() - private function
101
102give_up_console() is a wrapper to unregister_con_driver(), and a driver must
103be fully unbound for this call to succeed. con_is_bound() will check if the
104driver is bound or not.
105
106Guidelines for console driver writers:
107=====================================
108
109In order for binding to and unbinding from the console to properly work,
110console drivers must follow these guidelines:
111
1121. All drivers, except system drivers, must call either register_con_driver()
113 or take_over_console(). register_con_driver() will just add the driver to
114 the console's internal list. It won't take over the
115 console. take_over_console(), as it name implies, will also take over (or
116 bind to) the console.
117
1182. All resources allocated during con->con_init() must be released in
119 con->con_deinit().
120
1213. All resources allocated in con->con_startup() must be released when the
122 driver, which was previously bound, becomes unbound. The console layer
123 does not have a complementary call to con->con_startup() so it's up to the
124 driver to check when it's legal to release these resources. Calling
125 con_is_bound() in con->con_deinit() will help. If the call returned
126 false(), then it's safe to release the resources. This balance has to be
127 ensured because con->con_startup() can be called again when a request to
128 rebind the driver to the console arrives.
129
1304. Upon exit of the driver, ensure that the driver is totally unbound. If the
131 condition is satisfied, then the driver must call unregister_con_driver()
132 or give_up_console().
133
1345. unregister_con_driver() can also be called on conditions which make it
135 impossible for the driver to service console requests. This can happen
136 with the framebuffer console that suddenly lost all of its drivers.
137
138The current crop of console drivers should still work correctly, but binding
139and unbinding them may cause problems. With minimal fixes, these drivers can
140be made to work correctly.
141
142==========================
143Antonino Daplas <adaplas@pol.net>
144
diff --git a/Documentation/devices.txt b/Documentation/devices.txt
index b369a8c46a73..4aaf68fafebe 100644
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -3,7 +3,7 @@
3 3
4 Maintained by Torben Mathiasen <device@lanana.org> 4 Maintained by Torben Mathiasen <device@lanana.org>
5 5
6 Last revised: 25 January 2005 6 Last revised: 15 May 2006
7 7
8This list is the Linux Device List, the official registry of allocated 8This list is the Linux Device List, the official registry of allocated
9device numbers and /dev directory nodes for the Linux operating 9device numbers and /dev directory nodes for the Linux operating
@@ -94,7 +94,6 @@ Your cooperation is appreciated.
94 9 = /dev/urandom Faster, less secure random number gen. 94 9 = /dev/urandom Faster, less secure random number gen.
95 10 = /dev/aio Asyncronous I/O notification interface 95 10 = /dev/aio Asyncronous I/O notification interface
96 11 = /dev/kmsg Writes to this come out as printk's 96 11 = /dev/kmsg Writes to this come out as printk's
97 12 = /dev/oldmem Access to crash dump from kexec kernel
98 1 block RAM disk 97 1 block RAM disk
99 0 = /dev/ram0 First RAM disk 98 0 = /dev/ram0 First RAM disk
100 1 = /dev/ram1 Second RAM disk 99 1 = /dev/ram1 Second RAM disk
@@ -262,13 +261,13 @@ Your cooperation is appreciated.
262 NOTE: These devices permit both read and write access. 261 NOTE: These devices permit both read and write access.
263 262
264 7 block Loopback devices 263 7 block Loopback devices
265 0 = /dev/loop0 First loopback device 264 0 = /dev/loop0 First loop device
266 1 = /dev/loop1 Second loopback device 265 1 = /dev/loop1 Second loop device
267 ... 266 ...
268 267
269 The loopback devices are used to mount filesystems not 268 The loop devices are used to mount filesystems not
270 associated with block devices. The binding to the 269 associated with block devices. The binding to the
271 loopback devices is handled by mount(8) or losetup(8). 270 loop devices is handled by mount(8) or losetup(8).
272 271
273 8 block SCSI disk devices (0-15) 272 8 block SCSI disk devices (0-15)
274 0 = /dev/sda First SCSI disk whole disk 273 0 = /dev/sda First SCSI disk whole disk
@@ -943,7 +942,7 @@ Your cooperation is appreciated.
943 240 = /dev/ftlp FTL on 16th Memory Technology Device 942 240 = /dev/ftlp FTL on 16th Memory Technology Device
944 943
945 Partitions are handled in the same way as for IDE 944 Partitions are handled in the same way as for IDE
946 disks (see major number 3) expect that the partition 945 disks (see major number 3) except that the partition
947 limit is 15 rather than 63 per disk (same as SCSI.) 946 limit is 15 rather than 63 per disk (same as SCSI.)
948 947
949 45 char isdn4linux ISDN BRI driver 948 45 char isdn4linux ISDN BRI driver
@@ -1168,7 +1167,7 @@ Your cooperation is appreciated.
1168 The filename of the encrypted container and the passwords 1167 The filename of the encrypted container and the passwords
1169 are sent via ioctls (using the sdmount tool) to the master 1168 are sent via ioctls (using the sdmount tool) to the master
1170 node which then activates them via one of the 1169 node which then activates them via one of the
1171 /dev/scramdisk/x nodes for loopback mounting (all handled 1170 /dev/scramdisk/x nodes for loop mounting (all handled
1172 through the sdmount tool). 1171 through the sdmount tool).
1173 1172
1174 Requested by: andy@scramdisklinux.org 1173 Requested by: andy@scramdisklinux.org
@@ -2538,18 +2537,32 @@ Your cooperation is appreciated.
2538 0 = /dev/usb/lp0 First USB printer 2537 0 = /dev/usb/lp0 First USB printer
2539 ... 2538 ...
2540 15 = /dev/usb/lp15 16th USB printer 2539 15 = /dev/usb/lp15 16th USB printer
2541 16 = /dev/usb/mouse0 First USB mouse
2542 ...
2543 31 = /dev/usb/mouse15 16th USB mouse
2544 32 = /dev/usb/ez0 First USB firmware loader
2545 ...
2546 47 = /dev/usb/ez15 16th USB firmware loader
2547 48 = /dev/usb/scanner0 First USB scanner 2540 48 = /dev/usb/scanner0 First USB scanner
2548 ... 2541 ...
2549 63 = /dev/usb/scanner15 16th USB scanner 2542 63 = /dev/usb/scanner15 16th USB scanner
2550 64 = /dev/usb/rio500 Diamond Rio 500 2543 64 = /dev/usb/rio500 Diamond Rio 500
2551 65 = /dev/usb/usblcd USBLCD Interface (info@usblcd.de) 2544 65 = /dev/usb/usblcd USBLCD Interface (info@usblcd.de)
2552 66 = /dev/usb/cpad0 Synaptics cPad (mouse/LCD) 2545 66 = /dev/usb/cpad0 Synaptics cPad (mouse/LCD)
2546 96 = /dev/usb/hiddev0 1st USB HID device
2547 ...
2548 111 = /dev/usb/hiddev15 16th USB HID device
2549 112 = /dev/usb/auer0 1st auerswald ISDN device
2550 ...
2551 127 = /dev/usb/auer15 16th auerswald ISDN device
2552 128 = /dev/usb/brlvgr0 First Braille Voyager device
2553 ...
2554 131 = /dev/usb/brlvgr3 Fourth Braille Voyager device
2555 132 = /dev/usb/idmouse ID Mouse (fingerprint scanner) device
2556 133 = /dev/usb/sisusbvga1 First SiSUSB VGA device
2557 ...
2558 140 = /dev/usb/sisusbvga8 Eigth SISUSB VGA device
2559 144 = /dev/usb/lcd USB LCD device
2560 160 = /dev/usb/legousbtower0 1st USB Legotower device
2561 ...
2562 175 = /dev/usb/legousbtower15 16th USB Legotower device
2563 240 = /dev/usb/dabusb0 First daubusb device
2564 ...
2565 243 = /dev/usb/dabusb3 Fourth dabusb device
2553 2566
2554180 block USB block devices 2567180 block USB block devices
2555 0 = /dev/uba First USB block device 2568 0 = /dev/uba First USB block device
@@ -2710,6 +2723,17 @@ Your cooperation is appreciated.
2710 1 = /dev/cpu/1/msr MSRs on CPU 1 2723 1 = /dev/cpu/1/msr MSRs on CPU 1
2711 ... 2724 ...
2712 2725
2726202 block Xen Virtual Block Device
2727 0 = /dev/xvda First Xen VBD whole disk
2728 16 = /dev/xvdb Second Xen VBD whole disk
2729 32 = /dev/xvdc Third Xen VBD whole disk
2730 ...
2731 240 = /dev/xvdp Sixteenth Xen VBD whole disk
2732
2733 Partitions are handled in the same way as for IDE
2734 disks (see major number 3) except that the limit on
2735 partitions is 15.
2736
2713203 char CPU CPUID information 2737203 char CPU CPUID information
2714 0 = /dev/cpu/0/cpuid CPUID on CPU 0 2738 0 = /dev/cpu/0/cpuid CPUID on CPU 0
2715 1 = /dev/cpu/1/cpuid CPUID on CPU 1 2739 1 = /dev/cpu/1/cpuid CPUID on CPU 1
@@ -2747,11 +2771,27 @@ Your cooperation is appreciated.
2747 46 = /dev/ttyCPM0 PPC CPM (SCC or SMC) - port 0 2771 46 = /dev/ttyCPM0 PPC CPM (SCC or SMC) - port 0
2748 ... 2772 ...
2749 47 = /dev/ttyCPM5 PPC CPM (SCC or SMC) - port 5 2773 47 = /dev/ttyCPM5 PPC CPM (SCC or SMC) - port 5
2750 50 = /dev/ttyIOC40 Altix serial card 2774 50 = /dev/ttyIOC0 Altix serial card
2775 ...
2776 81 = /dev/ttyIOC31 Altix serial card
2777 82 = /dev/ttyVR0 NEC VR4100 series SIU
2778 83 = /dev/ttyVR1 NEC VR4100 series DSIU
2779 84 = /dev/ttyIOC84 Altix ioc4 serial card
2780 ...
2781 115 = /dev/ttyIOC115 Altix ioc4 serial card
2782 116 = /dev/ttySIOC0 Altix ioc3 serial card
2783 ...
2784 147 = /dev/ttySIOC31 Altix ioc3 serial card
2785 148 = /dev/ttyPSC0 PPC PSC - port 0
2786 ...
2787 153 = /dev/ttyPSC5 PPC PSC - port 5
2788 154 = /dev/ttyAT0 ATMEL serial port 0
2789 ...
2790 169 = /dev/ttyAT15 ATMEL serial port 15
2791 170 = /dev/ttyNX0 Hilscher netX serial port 0
2751 ... 2792 ...
2752 81 = /dev/ttyIOC431 Altix serial card 2793 185 = /dev/ttyNX15 Hilscher netX serial port 15
2753 82 = /dev/ttyVR0 NEC VR4100 series SIU 2794 186 = /dev/ttyJ0 JTAG1 DCC protocol based serial port emulation
2754 83 = /dev/ttyVR1 NEC VR4100 series DSIU
2755 2795
2756205 char Low-density serial ports (alternate device) 2796205 char Low-density serial ports (alternate device)
2757 0 = /dev/culu0 Callout device for ttyLU0 2797 0 = /dev/culu0 Callout device for ttyLU0
@@ -2786,8 +2826,8 @@ Your cooperation is appreciated.
2786 50 = /dev/cuioc40 Callout device for ttyIOC40 2826 50 = /dev/cuioc40 Callout device for ttyIOC40
2787 ... 2827 ...
2788 81 = /dev/cuioc431 Callout device for ttyIOC431 2828 81 = /dev/cuioc431 Callout device for ttyIOC431
2789 82 = /dev/cuvr0 Callout device for ttyVR0 2829 82 = /dev/cuvr0 Callout device for ttyVR0
2790 83 = /dev/cuvr1 Callout device for ttyVR1 2830 83 = /dev/cuvr1 Callout device for ttyVR1
2791 2831
2792 2832
2793206 char OnStream SC-x0 tape devices 2833206 char OnStream SC-x0 tape devices
@@ -2897,7 +2937,6 @@ Your cooperation is appreciated.
2897 ... 2937 ...
2898 196 = /dev/dvb/adapter3/video0 first video decoder of fourth card 2938 196 = /dev/dvb/adapter3/video0 first video decoder of fourth card
2899 2939
2900
2901216 char Bluetooth RFCOMM TTY devices 2940216 char Bluetooth RFCOMM TTY devices
2902 0 = /dev/rfcomm0 First Bluetooth RFCOMM TTY device 2941 0 = /dev/rfcomm0 First Bluetooth RFCOMM TTY device
2903 1 = /dev/rfcomm1 Second Bluetooth RFCOMM TTY device 2942 1 = /dev/rfcomm1 Second Bluetooth RFCOMM TTY device
@@ -3002,12 +3041,43 @@ Your cooperation is appreciated.
3002 ioctl()'s can be used to rewind the tape regardless of 3041 ioctl()'s can be used to rewind the tape regardless of
3003 the device used to access it. 3042 the device used to access it.
3004 3043
3005231 char InfiniBand MAD 3044231 char InfiniBand
3006 0 = /dev/infiniband/umad0 3045 0 = /dev/infiniband/umad0
3007 1 = /dev/infiniband/umad1 3046 1 = /dev/infiniband/umad1
3008 ... 3047 ...
3048 63 = /dev/infiniband/umad63 63rd InfiniBandMad device
3049 64 = /dev/infiniband/issm0 First InfiniBand IsSM device
3050 65 = /dev/infiniband/issm1 Second InfiniBand IsSM device
3051 ...
3052 127 = /dev/infiniband/issm63 63rd InfiniBand IsSM device
3053 128 = /dev/infiniband/uverbs0 First InfiniBand verbs device
3054 129 = /dev/infiniband/uverbs1 Second InfiniBand verbs device
3055 ...
3056 159 = /dev/infiniband/uverbs31 31st InfiniBand verbs device
3057
3058232 char Biometric Devices
3059 0 = /dev/biometric/sensor0/fingerprint first fingerprint sensor on first device
3060 1 = /dev/biometric/sensor0/iris first iris sensor on first device
3061 2 = /dev/biometric/sensor0/retina first retina sensor on first device
3062 3 = /dev/biometric/sensor0/voiceprint first voiceprint sensor on first device
3063 4 = /dev/biometric/sensor0/facial first facial sensor on first device
3064 5 = /dev/biometric/sensor0/hand first hand sensor on first device
3065 ...
3066 10 = /dev/biometric/sensor1/fingerprint first fingerprint sensor on second device
3067 ...
3068 20 = /dev/biometric/sensor2/fingerprint first fingerprint sensor on third device
3069 ...
3070
3071233 char PathScale InfiniPath interconnect
3072 0 = /dev/ipath Primary device for programs (any unit)
3073 1 = /dev/ipath0 Access specifically to unit 0
3074 2 = /dev/ipath1 Access specifically to unit 1
3075 ...
3076 4 = /dev/ipath3 Access specifically to unit 3
3077 129 = /dev/ipath_sma Device used by Subnet Management Agent
3078 130 = /dev/ipath_diag Device used by diagnostics programs
3009 3079
3010232-239 UNASSIGNED 3080234-239 UNASSIGNED
3011 3081
3012240-254 char LOCAL/EXPERIMENTAL USE 3082240-254 char LOCAL/EXPERIMENTAL USE
3013240-254 block LOCAL/EXPERIMENTAL USE 3083240-254 block LOCAL/EXPERIMENTAL USE
@@ -3021,6 +3091,28 @@ Your cooperation is appreciated.
3021 This major is reserved to assist the expansion to a 3091 This major is reserved to assist the expansion to a
3022 larger number space. No device nodes with this major 3092 larger number space. No device nodes with this major
3023 should ever be created on the filesystem. 3093 should ever be created on the filesystem.
3094 (This is probaly not true anymore, but I'll leave it
3095 for now /Torben)
3096
3097---LARGE MAJORS!!!!!---
3098
3099256 char Equinox SST multi-port serial boards
3100 0 = /dev/ttyEQ0 First serial port on first Equinox SST board
3101 127 = /dev/ttyEQ127 Last serial port on first Equinox SST board
3102 128 = /dev/ttyEQ128 First serial port on second Equinox SST board
3103 ...
3104 1027 = /dev/ttyEQ1027 Last serial port on eighth Equinox SST board
3105
3106256 block Resident Flash Disk Flash Translation Layer
3107 0 = /dev/rfda First RFD FTL layer
3108 16 = /dev/rfdb Second RFD FTL layer
3109 ...
3110 240 = /dev/rfdp 16th RFD FTL layer
3111
3112257 char Phoenix Technologies Cryptographic Services Driver
3113 0 = /dev/ptlsec Crypto Services Driver
3114
3115
3024 3116
3025 **** ADDITIONAL /dev DIRECTORY ENTRIES 3117 **** ADDITIONAL /dev DIRECTORY ENTRIES
3026 3118
diff --git a/Documentation/digiepca.txt b/Documentation/digiepca.txt
index 88820fe38dad..f2560e22f2c9 100644
--- a/Documentation/digiepca.txt
+++ b/Documentation/digiepca.txt
@@ -2,7 +2,7 @@ NOTE: This driver is obsolete. Digi provides a 2.6 driver (dgdm) at
2http://www.digi.com for PCI cards. They no longer maintain this driver, 2http://www.digi.com for PCI cards. They no longer maintain this driver,
3and have no 2.6 driver for ISA cards. 3and have no 2.6 driver for ISA cards.
4 4
5This driver requires a number of user-space tools. They can be aquired from 5This driver requires a number of user-space tools. They can be acquired from
6http://www.digi.com, but only works with 2.4 kernels. 6http://www.digi.com, but only works with 2.4 kernels.
7 7
8 8
diff --git a/Documentation/driver-model/overview.txt b/Documentation/driver-model/overview.txt
index ac4a7a737e43..2050c9ffc629 100644
--- a/Documentation/driver-model/overview.txt
+++ b/Documentation/driver-model/overview.txt
@@ -18,7 +18,7 @@ Traditional driver models implemented some sort of tree-like structure
18(sometimes just a list) for the devices they control. There wasn't any 18(sometimes just a list) for the devices they control. There wasn't any
19uniformity across the different bus types. 19uniformity across the different bus types.
20 20
21The current driver model provides a comon, uniform data model for describing 21The current driver model provides a common, uniform data model for describing
22a bus and the devices that can appear under the bus. The unified bus 22a bus and the devices that can appear under the bus. The unified bus
23model includes a set of common attributes which all busses carry, and a set 23model includes a set of common attributes which all busses carry, and a set
24of common callbacks, such as device discovery during bus probing, bus 24of common callbacks, such as device discovery during bus probing, bus
diff --git a/Documentation/fb/fbcon.txt b/Documentation/fb/fbcon.txt
index 08dce0f631bf..f373df12ed4c 100644
--- a/Documentation/fb/fbcon.txt
+++ b/Documentation/fb/fbcon.txt
@@ -135,10 +135,10 @@ C. Boot options
135 135
136 The angle can be changed anytime afterwards by 'echoing' the same 136 The angle can be changed anytime afterwards by 'echoing' the same
137 numbers to any one of the 2 attributes found in 137 numbers to any one of the 2 attributes found in
138 /sys/class/graphics/fb{x} 138 /sys/class/graphics/fbcon
139 139
140 con_rotate - rotate the display of the active console 140 rotate - rotate the display of the active console
141 con_rotate_all - rotate the display of all consoles 141 rotate_all - rotate the display of all consoles
142 142
143 Console rotation will only become available if Console Rotation 143 Console rotation will only become available if Console Rotation
144 Support is compiled in your kernel. 144 Support is compiled in your kernel.
@@ -148,5 +148,177 @@ C. Boot options
148 Actually, the underlying fb driver is totally ignorant of console 148 Actually, the underlying fb driver is totally ignorant of console
149 rotation. 149 rotation.
150 150
151--- 151C. Attaching, Detaching and Unloading
152
153Before going on on how to attach, detach and unload the framebuffer console, an
154illustration of the dependencies may help.
155
156The console layer, as with most subsystems, needs a driver that interfaces with
157the hardware. Thus, in a VGA console:
158
159console ---> VGA driver ---> hardware.
160
161Assuming the VGA driver can be unloaded, one must first unbind the VGA driver
162from the console layer before unloading the driver. The VGA driver cannot be
163unloaded if it is still bound to the console layer. (See
164Documentation/console/console.txt for more information).
165
166This is more complicated in the case of the the framebuffer console (fbcon),
167because fbcon is an intermediate layer between the console and the drivers:
168
169console ---> fbcon ---> fbdev drivers ---> hardware
170
171The fbdev drivers cannot be unloaded if it's bound to fbcon, and fbcon cannot
172be unloaded if it's bound to the console layer.
173
174So to unload the fbdev drivers, one must first unbind fbcon from the console,
175then unbind the fbdev drivers from fbcon. Fortunately, unbinding fbcon from
176the console layer will automatically unbind framebuffer drivers from
177fbcon. Thus, there is no need to explicitly unbind the fbdev drivers from
178fbcon.
179
180So, how do we unbind fbcon from the console? Part of the answer is in
181Documentation/console/console.txt. To summarize:
182
183Echo a value to the bind file that represents the framebuffer console
184driver. So assuming vtcon1 represents fbcon, then:
185
186echo 1 > sys/class/vtconsole/vtcon1/bind - attach framebuffer console to
187 console layer
188echo 0 > sys/class/vtconsole/vtcon1/bind - detach framebuffer console from
189 console layer
190
191If fbcon is detached from the console layer, your boot console driver (which is
192usually VGA text mode) will take over. A few drivers (rivafb and i810fb) will
193restore VGA text mode for you. With the rest, before detaching fbcon, you
194must take a few additional steps to make sure that your VGA text mode is
195restored properly. The following is one of the several methods that you can do:
196
1971. Download or install vbetool. This utility is included with most
198 distributions nowadays, and is usually part of the suspend/resume tool.
199
2002. In your kernel configuration, ensure that CONFIG_FRAMEBUFFER_CONSOLE is set
201 to 'y' or 'm'. Enable one or more of your favorite framebuffer drivers.
202
2033. Boot into text mode and as root run:
204
205 vbetool vbestate save > <vga state file>
206
207 The above command saves the register contents of your graphics
208 hardware to <vga state file>. You need to do this step only once as
209 the state file can be reused.
210
2114. If fbcon is compiled as a module, load fbcon by doing:
212
213 modprobe fbcon
214
2155. Now to detach fbcon:
216
217 vbetool vbestate restore < <vga state file> && \
218 echo 0 > /sys/class/vtconsole/vtcon1/bind
219
2206. That's it, you're back to VGA mode. And if you compiled fbcon as a module,
221 you can unload it by 'rmmod fbcon'
222
2237. To reattach fbcon:
224
225 echo 1 > /sys/class/vtconsole/vtcon1/bind
226
2278. Once fbcon is unbound, all drivers registered to the system will also
228become unbound. This means that fbcon and individual framebuffer drivers
229can be unloaded or reloaded at will. Reloading the drivers or fbcon will
230automatically bind the console, fbcon and the drivers together. Unloading
231all the drivers without unloading fbcon will make it impossible for the
232console to bind fbcon.
233
234Notes for vesafb users:
235=======================
236
237Unfortunately, if your bootline includes a vga=xxx parameter that sets the
238hardware in graphics mode, such as when loading vesafb, vgacon will not load.
239Instead, vgacon will replace the default boot console with dummycon, and you
240won't get any display after detaching fbcon. Your machine is still alive, so
241you can reattach vesafb. However, to reattach vesafb, you need to do one of
242the following:
243
244Variation 1:
245
246 a. Before detaching fbcon, do
247
248 vbetool vbemode save > <vesa state file> # do once for each vesafb mode,
249 # the file can be reused
250
251 b. Detach fbcon as in step 5.
252
253 c. Attach fbcon
254
255 vbetool vbestate restore < <vesa state file> && \
256 echo 1 > /sys/class/vtconsole/vtcon1/bind
257
258Variation 2:
259
260 a. Before detaching fbcon, do:
261 echo <ID> > /sys/class/tty/console/bind
262
263
264 vbetool vbemode get
265
266 b. Take note of the mode number
267
268 b. Detach fbcon as in step 5.
269
270 c. Attach fbcon:
271
272 vbetool vbemode set <mode number> && \
273 echo 1 > /sys/class/vtconsole/vtcon1/bind
274
275Samples:
276========
277
278Here are 2 sample bash scripts that you can use to bind or unbind the
279framebuffer console driver if you are in an X86 box:
280
281---------------------------------------------------------------------------
282#!/bin/bash
283# Unbind fbcon
284
285# Change this to where your actual vgastate file is located
286# Or Use VGASTATE=$1 to indicate the state file at runtime
287VGASTATE=/tmp/vgastate
288
289# path to vbetool
290VBETOOL=/usr/local/bin
291
292
293for (( i = 0; i < 16; i++))
294do
295 if test -x /sys/class/vtconsole/vtcon$i; then
296 if [ `cat /sys/class/vtconsole/vtcon$i/name | grep -c "frame buffer"` \
297 = 1 ]; then
298 if test -x $VBETOOL/vbetool; then
299 echo Unbinding vtcon$i
300 $VBETOOL/vbetool vbestate restore < $VGASTATE
301 echo 0 > /sys/class/vtconsole/vtcon$i/bind
302 fi
303 fi
304 fi
305done
306
307---------------------------------------------------------------------------
308#!/bin/bash
309# Bind fbcon
310
311for (( i = 0; i < 16; i++))
312do
313 if test -x /sys/class/vtconsole/vtcon$i; then
314 if [ `cat /sys/class/vtconsole/vtcon$i/name | grep -c "frame buffer"` \
315 = 1 ]; then
316 echo Unbinding vtcon$i
317 echo 1 > /sys/class/vtconsole/vtcon$i/bind
318 fi
319 fi
320done
321---------------------------------------------------------------------------
322
323--
152Antonino Daplas <adaplas@pol.net> 324Antonino Daplas <adaplas@pol.net>
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 43ab119963d5..99f219a01e0e 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -6,17 +6,6 @@ be removed from this file.
6 6
7--------------------------- 7---------------------------
8 8
9What: devfs
10When: July 2005
11Files: fs/devfs/*, include/linux/devfs_fs*.h and assorted devfs
12 function calls throughout the kernel tree
13Why: It has been unmaintained for a number of years, has unfixable
14 races, contains a naming policy within the kernel that is
15 against the LSB, and can be replaced by using udev.
16Who: Greg Kroah-Hartman <greg@kroah.com>
17
18---------------------------
19
20What: RAW driver (CONFIG_RAW_DRIVER) 9What: RAW driver (CONFIG_RAW_DRIVER)
21When: December 2005 10When: December 2005
22Why: declared obsolete since kernel 2.6.3 11Why: declared obsolete since kernel 2.6.3
@@ -33,27 +22,12 @@ Who: Adrian Bunk <bunk@stusta.de>
33 22
34--------------------------- 23---------------------------
35 24
36What: RCU API moves to EXPORT_SYMBOL_GPL
37When: April 2006
38Files: include/linux/rcupdate.h, kernel/rcupdate.c
39Why: Outside of Linux, the only implementations of anything even
40 vaguely resembling RCU that I am aware of are in DYNIX/ptx,
41 VM/XA, Tornado, and K42. I do not expect anyone to port binary
42 drivers or kernel modules from any of these, since the first two
43 are owned by IBM and the last two are open-source research OSes.
44 So these will move to GPL after a grace period to allow
45 people, who might be using implementations that I am not aware
46 of, to adjust to this upcoming change.
47Who: Paul E. McKenney <paulmck@us.ibm.com>
48
49---------------------------
50
51What: raw1394: requests of type RAW1394_REQ_ISO_SEND, RAW1394_REQ_ISO_LISTEN 25What: raw1394: requests of type RAW1394_REQ_ISO_SEND, RAW1394_REQ_ISO_LISTEN
52When: November 2005 26When: November 2006
53Why: Deprecated in favour of the new ioctl-based rawiso interface, which is 27Why: Deprecated in favour of the new ioctl-based rawiso interface, which is
54 more efficient. You should really be using libraw1394 for raw1394 28 more efficient. You should really be using libraw1394 for raw1394
55 access anyway. 29 access anyway.
56Who: Jody McIntyre <scjody@steamballoon.com> 30Who: Jody McIntyre <scjody@modernduck.com>
57 31
58--------------------------- 32---------------------------
59 33
@@ -147,16 +121,6 @@ Who: NeilBrown <neilb@suse.de>
147 121
148--------------------------- 122---------------------------
149 123
150What: au1x00_uart driver
151When: January 2006
152Why: The 8250 serial driver now has the ability to deal with the differences
153 between the standard 8250 family of UARTs and their slightly strange
154 brother on Alchemy SOCs. The loss of features is not considered an
155 issue.
156Who: Ralf Baechle <ralf@linux-mips.org>
157
158---------------------------
159
160What: eepro100 network driver 124What: eepro100 network driver
161When: January 2007 125When: January 2007
162Why: replaced by the e100 driver 126Why: replaced by the e100 driver
@@ -192,6 +156,16 @@ Who: Jean Delvare <khali@linux-fr.org>
192 156
193--------------------------- 157---------------------------
194 158
159What: Unused EXPORT_SYMBOL/EXPORT_SYMBOL_GPL exports
160 (temporary transition config option provided until then)
161 The transition config option will also be removed at the same time.
162When: before 2.6.19
163Why: Unused symbols are both increasing the size of the kernel binary
164 and are often a sign of "wrong API"
165Who: Arjan van de Ven <arjan@linux.intel.com>
166
167---------------------------
168
195What: remove EXPORT_SYMBOL(tasklist_lock) 169What: remove EXPORT_SYMBOL(tasklist_lock)
196When: August 2006 170When: August 2006
197Files: kernel/fork.c 171Files: kernel/fork.c
@@ -212,15 +186,6 @@ Who: Greg Kroah-Hartman <gregkh@suse.de>
212 186
213--------------------------- 187---------------------------
214 188
215What: Support for NEC DDB5074 and DDB5476 evaluation boards.
216When: June 2006
217Why: Board specific code doesn't build anymore since ~2.6.0 and no
218 users have complained indicating there is no more need for these
219 boards. This should really be considered a last call.
220Who: Ralf Baechle <ralf@linux-mips.org>
221
222---------------------------
223
224What: USB driver API moves to EXPORT_SYMBOL_GPL 189What: USB driver API moves to EXPORT_SYMBOL_GPL
225When: Febuary 2008 190When: Febuary 2008
226Files: include/linux/usb.h, drivers/usb/core/driver.c 191Files: include/linux/usb.h, drivers/usb/core/driver.c
@@ -248,3 +213,56 @@ Why: The interface no longer has any callers left in the kernel. It
248Who: Nick Piggin <npiggin@suse.de> 213Who: Nick Piggin <npiggin@suse.de>
249 214
250--------------------------- 215---------------------------
216
217What: Support for the MIPS EV96100 evaluation board
218When: September 2006
219Why: Does no longer build since at least November 15, 2003, apparently
220 no userbase left.
221Who: Ralf Baechle <ralf@linux-mips.org>
222
223---------------------------
224
225What: Support for the Momentum / PMC-Sierra Jaguar ATX evaluation board
226When: September 2006
227Why: Does no longer build since quite some time, and was never popular,
228 due to the platform being replaced by successor models. Apparently
229 no user base left. It also is one of the last users of
230 WANT_PAGE_VIRTUAL.
231Who: Ralf Baechle <ralf@linux-mips.org>
232
233---------------------------
234
235What: Support for the Momentum Ocelot, Ocelot 3, Ocelot C and Ocelot G
236When: September 2006
237Why: Some do no longer build and apparently there is no user base left
238 for these platforms.
239Who: Ralf Baechle <ralf@linux-mips.org>
240
241---------------------------
242
243What: Support for MIPS Technologies' Altas and SEAD evaluation board
244When: September 2006
245Why: Some do no longer build and apparently there is no user base left
246 for these platforms. Hardware out of production since several years.
247Who: Ralf Baechle <ralf@linux-mips.org>
248
249---------------------------
250
251What: Support for the IT8172-based platforms, ITE 8172G and Globespan IVR
252When: September 2006
253Why: Code does no longer build since at least 2.6.0, apparently there is
254 no user base left for these platforms. Hardware out of production
255 since several years and hardly a trace of the manufacturer left on
256 the net.
257Who: Ralf Baechle <ralf@linux-mips.org>
258
259---------------------------
260
261What: Interrupt only SA_* flags
262When: Januar 2007
263Why: The interrupt related SA_* flags are replaced by IRQF_* to move them
264 out of the signal namespace.
265
266Who: Thomas Gleixner <tglx@linutronix.de>
267
268---------------------------
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 1045da582b9b..d31efbbdfe50 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -99,7 +99,7 @@ prototypes:
99 int (*sync_fs)(struct super_block *sb, int wait); 99 int (*sync_fs)(struct super_block *sb, int wait);
100 void (*write_super_lockfs) (struct super_block *); 100 void (*write_super_lockfs) (struct super_block *);
101 void (*unlockfs) (struct super_block *); 101 void (*unlockfs) (struct super_block *);
102 int (*statfs) (struct super_block *, struct kstatfs *); 102 int (*statfs) (struct dentry *, struct kstatfs *);
103 int (*remount_fs) (struct super_block *, int *, char *); 103 int (*remount_fs) (struct super_block *, int *, char *);
104 void (*clear_inode) (struct inode *); 104 void (*clear_inode) (struct inode *);
105 void (*umount_begin) (struct super_block *); 105 void (*umount_begin) (struct super_block *);
@@ -142,15 +142,16 @@ see also dquot_operations section.
142 142
143--------------------------- file_system_type --------------------------- 143--------------------------- file_system_type ---------------------------
144prototypes: 144prototypes:
145 struct super_block *(*get_sb) (struct file_system_type *, int, 145 struct int (*get_sb) (struct file_system_type *, int,
146 const char *, void *); 146 const char *, void *, struct vfsmount *);
147 void (*kill_sb) (struct super_block *); 147 void (*kill_sb) (struct super_block *);
148locking rules: 148locking rules:
149 may block BKL 149 may block BKL
150get_sb yes yes 150get_sb yes yes
151kill_sb yes yes 151kill_sb yes yes
152 152
153->get_sb() returns error or a locked superblock (exclusive on ->s_umount). 153->get_sb() returns error or 0 with locked superblock attached to the vfsmount
154(exclusive on ->s_umount).
154->kill_sb() takes a write-locked superblock, does all shutdown work on it, 155->kill_sb() takes a write-locked superblock, does all shutdown work on it,
155unlocks and drops the reference. 156unlocks and drops the reference.
156 157
diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.txt
index 58c65a1713e5..7cac200e2a85 100644
--- a/Documentation/filesystems/automount-support.txt
+++ b/Documentation/filesystems/automount-support.txt
@@ -19,7 +19,7 @@ following procedure:
19 19
20 (2) Have the follow_link() op do the following steps: 20 (2) Have the follow_link() op do the following steps:
21 21
22 (a) Call do_kern_mount() to call the appropriate filesystem to set up a 22 (a) Call vfs_kern_mount() to call the appropriate filesystem to set up a
23 superblock and gain a vfsmount structure representing it. 23 superblock and gain a vfsmount structure representing it.
24 24
25 (b) Copy the nameidata provided as an argument and substitute the dentry 25 (b) Copy the nameidata provided as an argument and substitute the dentry
diff --git a/Documentation/filesystems/configfs/configfs_example.c b/Documentation/filesystems/configfs/configfs_example.c
index 3d4713a6c207..2d6a14a463e0 100644
--- a/Documentation/filesystems/configfs/configfs_example.c
+++ b/Documentation/filesystems/configfs/configfs_example.c
@@ -264,6 +264,15 @@ static struct config_item_type simple_child_type = {
264}; 264};
265 265
266 266
267struct simple_children {
268 struct config_group group;
269};
270
271static inline struct simple_children *to_simple_children(struct config_item *item)
272{
273 return item ? container_of(to_config_group(item), struct simple_children, group) : NULL;
274}
275
267static struct config_item *simple_children_make_item(struct config_group *group, const char *name) 276static struct config_item *simple_children_make_item(struct config_group *group, const char *name)
268{ 277{
269 struct simple_child *simple_child; 278 struct simple_child *simple_child;
@@ -304,7 +313,13 @@ static ssize_t simple_children_attr_show(struct config_item *item,
304"items have only one attribute that is readable and writeable.\n"); 313"items have only one attribute that is readable and writeable.\n");
305} 314}
306 315
316static void simple_children_release(struct config_item *item)
317{
318 kfree(to_simple_children(item));
319}
320
307static struct configfs_item_operations simple_children_item_ops = { 321static struct configfs_item_operations simple_children_item_ops = {
322 .release = simple_children_release,
308 .show_attribute = simple_children_attr_show, 323 .show_attribute = simple_children_attr_show,
309}; 324};
310 325
@@ -345,10 +360,6 @@ static struct configfs_subsystem simple_children_subsys = {
345 * children of its own. 360 * children of its own.
346 */ 361 */
347 362
348struct simple_children {
349 struct config_group group;
350};
351
352static struct config_group *group_children_make_group(struct config_group *group, const char *name) 363static struct config_group *group_children_make_group(struct config_group *group, const char *name)
353{ 364{
354 struct simple_children *simple_children; 365 struct simple_children *simple_children;
diff --git a/Documentation/filesystems/devfs/ChangeLog b/Documentation/filesystems/devfs/ChangeLog
deleted file mode 100644
index e5aba5246d7c..000000000000
--- a/Documentation/filesystems/devfs/ChangeLog
+++ /dev/null
@@ -1,1977 +0,0 @@
1/* -*- auto-fill -*- */
2===============================================================================
3Changes for patch v1
4
5- creation of devfs
6
7- modified miscellaneous character devices to support devfs
8===============================================================================
9Changes for patch v2
10
11- bug fix with manual inode creation
12===============================================================================
13Changes for patch v3
14
15- bugfixes
16
17- documentation improvements
18
19- created a couple of scripts (one to save&restore a devfs and the
20 other to set up compatibility symlinks)
21
22- devfs support for SCSI discs. New name format is: sd_hHcCiIlL
23===============================================================================
24Changes for patch v4
25
26- bugfix for the directory reading code
27
28- bugfix for compilation with kerneld
29
30- devfs support for generic hard discs
31
32- rationalisation of the various watchdog drivers
33===============================================================================
34Changes for patch v5
35
36- support for mounting directly from entries in the devfs (it doesn't
37 need to be mounted to do this), including the root filesystem.
38 Mounting of swap partitions also works. Hence, now if you set
39 CONFIG_DEVFS_ONLY to 'Y' then you won't be able to access your discs
40 via ordinary device nodes. Naturally, the default is 'N' so that you
41 can still use your old device nodes. If you want to mount from devfs
42 entries, make sure you use: append = "root=/dev/sd_..." in your
43 lilo.conf. It seems LILO looks for the device number (major&minor)
44 and writes that into the kernel image :-(
45
46- support for character memory devices (/dev/null, /dev/zero, /dev/full
47 and so on). Thanks to C. Scott Ananian <cananian@alumni.princeton.edu>
48===============================================================================
49Changes for patch v6
50
51- support for subdirectories
52
53- support for symbolic links (created by devfs_mk_symlink(), no
54 support yet for creation via symlink(2))
55
56- SCSI disc naming now cast in stone, with the format:
57 /dev/sd/c0b1t2u3 controller=0, bus=1, ID=2, LUN=3, whole disc
58 /dev/sd/c0b1t2u3p4 controller=0, bus=1, ID=2, LUN=3, 4th partition
59
60- loop devices now appear in devfs
61
62- tty devices, console, serial ports, etc. now appear in devfs
63 Thanks to C. Scott Ananian <cananian@alumni.princeton.edu>
64
65- bugs with mounting devfs-only devices now fixed
66===============================================================================
67Changes for patch v7
68
69- SCSI CD-ROMS, tapes and generic devices now appear in devfs
70===============================================================================
71Changes for patch v8
72
73- bugfix with no-rewind SCSI tapes
74
75- RAMDISCs now appear in devfs
76
77- better cleaning up of devfs entries created by various modules
78
79- interface change to <devfs_register>
80===============================================================================
81Changes for patch v9
82
83- the v8 patch was corrupted somehow, which would affect the patch for
84 linux/fs/filesystems.c
85 I've also fixed the v8 patch file on the WWW
86
87- MetaDevices (/dev/md*) should now appear in devfs
88===============================================================================
89Changes for patch v10
90
91- bugfix in meta device support for devfs
92
93- created this ChangeLog file
94
95- added devfs support to the floppy driver
96
97- added support for creating sockets in a devfs
98===============================================================================
99Changes for patch v11
100
101- added DEVFS_FL_HIDE_UNREG flag
102
103- incorporated better patch for ttyname() in libc 5.4.43 from H.J. Lu.
104
105- interface change to <devfs_mk_symlink>
106
107- support for creating symlinks with symlink(2)
108
109- parallel port printer (/dev/lp*) now appears in devfs
110===============================================================================
111Changes for patch v12
112
113- added inode check to <devfs_fill_file> function
114
115- improved devfs support when mounting from devfs
116
117- added call to <<release>> operation when removing swap areas on
118 devfs devices
119
120- increased NR_SUPER to 128 to support large numbers of devfs mounts
121 (for chroot(2) gaols)
122
123- fixed bug in SCSI disc support: was generating incorrect minors if
124 SCSI ID's did not start at 0 and increase by 1
125
126- support symlink traversal when mounting root
127===============================================================================
128Changes for patch v13
129
130- added devfs support to soundcard driver
131 Thanks to Eric Dumas <dumas@linux.eu.org> and
132 C. Scott Ananian <cananian@alumni.princeton.edu>
133
134- added devfs support to the joystick driver
135
136- loop driver now has it's own subdirectory "/dev/loop/"
137
138- created <devfs_get_flags> and <devfs_set_flags> functions
139
140- fix problem with SCSI disc compatibility names (sd{a,b,c,d,e,f})
141 which assumes ID's start at 0 and increase by 1. Also only create
142 devfs entries for SCSI disc partitions which actually exist
143 Show new names in partition check
144 Thanks to Jakub Jelinek <jj@sunsite.ms.mff.cuni.cz>
145===============================================================================
146Changes for patch v14
147
148- bug fix in floppy driver: would not compile without
149 CONFIG_DEVFS_FS='Y'
150 Thanks to Jurgen Botz <jbotz@nova.botz.org>
151
152- bug fix in loop driver
153 Thanks to C. Scott Ananian <cananian@alumni.princeton.edu>
154
155- do not create devfs entries for printers not configured
156 Thanks to C. Scott Ananian <cananian@alumni.princeton.edu>
157
158- do not create devfs entries for serial ports not present
159 Thanks to C. Scott Ananian <cananian@alumni.princeton.edu>
160
161- ensure <tty_register_devfs> is exported from tty_io.c
162 Thanks to C. Scott Ananian <cananian@alumni.princeton.edu>
163
164- allow unregistering of devfs symlink entries
165
166- fixed bug in SCSI disc naming introduced in last patch version
167===============================================================================
168Changes for patch v15
169
170- ported to kernel 2.1.81
171===============================================================================
172Changes for patch v16
173
174- created <devfs_set_symlink_destination> function
175
176- moved DEVFS_SUPER_MAGIC into header file
177
178- added DEVFS_FL_HIDE flag
179
180- created <devfs_get_maj_min>
181
182- created <devfs_get_handle_from_inode>
183
184- fixed bugs in searching by major&minor
185
186- changed interface to <devfs_unregister>, <devfs_fill_file> and
187 <devfs_find_handle>
188
189- fixed inode times when symlink created with symlink(2)
190
191- change tty driver to do auto-creation of devfs entries
192 Thanks to C. Scott Ananian <cananian@alumni.princeton.edu>
193
194- fixed bug in genhd.c: whole disc (non-SCSI) was not registered to
195 devfs
196
197- updated libc 5.4.43 patch for ttyname()
198===============================================================================
199Changes for patch v17
200
201- added CONFIG_DEVFS_TTY_COMPAT
202 Thanks to C. Scott Ananian <cananian@alumni.princeton.edu>
203
204- bugfix in devfs support for drivers/char/lp.c
205 Thanks to C. Scott Ananian <cananian@alumni.princeton.edu>
206
207- clean up serial driver so that PCMCIA devices unregister correctly
208 Thanks to C. Scott Ananian <cananian@alumni.princeton.edu>
209
210- fixed bug in genhd.c: whole disc (non-SCSI) was not registered to
211 devfs [was missing in patch v16]
212
213- updated libc 5.4.43 patch for ttyname() [was missing in patch v16]
214
215- all SCSI devices now registered in /dev/sg
216
217- support removal of devfs entries via unlink(2)
218===============================================================================
219Changes for patch v18
220
221- added floppy/?u720 floppy entry
222
223- fixed kerneld support for entries in devfs subdirectories
224
225- incorporated latest patch for ttyname() in libc 5.4.43 from H.J. Lu.
226===============================================================================
227Changes for patch v19
228
229- bug fix when looking up unregistered entries: kerneld was not called
230
231- fixes for kernel 2.1.86 (now requires 2.1.86)
232===============================================================================
233Changes for patch v20
234
235- only create available floppy entries
236 Thanks to Andrzej Krzysztofowicz <ankry@green.mif.pg.gda.pl>
237
238- new IDE naming scheme following SCSI format (i.e. /dev/id/c0b0t0u0p1
239 instead of /dev/hda1)
240 Thanks to Andrzej Krzysztofowicz <ankry@green.mif.pg.gda.pl>
241
242- new XT disc naming scheme following SCSI format (i.e. /dev/xd/c0t0p1
243 instead of /dev/xda1)
244 Thanks to Andrzej Krzysztofowicz <ankry@green.mif.pg.gda.pl>
245
246- new non-standard CD-ROM names (i.e. /dev/sbp/c#t#)
247 Thanks to Andrzej Krzysztofowicz <ankry@green.mif.pg.gda.pl>
248
249- allow symlink traversal when mounting the root filesystem
250
251- Create entries for MD devices at MD init
252 Thanks to Christophe Leroy <christophe.leroy5@capway.com>
253===============================================================================
254Changes for patch v21
255
256- ported to kernel 2.1.91
257===============================================================================
258Changes for patch v22
259
260- SCSI host number patch ("scsihosts=" kernel option)
261 Thanks to Andrzej Krzysztofowicz <ankry@green.mif.pg.gda.pl>
262===============================================================================
263Changes for patch v23
264
265- Fixed persistence bug with device numbers for manually created
266 device files
267
268- Fixed problem with recreating symlinks with different content
269
270- Added CONFIG_DEVFS_MOUNT (mount devfs on /dev at boot time)
271===============================================================================
272Changes for patch v24
273
274- Switched from CONFIG_KERNELD to CONFIG_KMOD: module autoloading
275 should now work again
276
277- Hide entries which are manually unlinked
278
279- Always invalidate devfs dentry cache when registering entries
280
281- Support removal of devfs directories via rmdir(2)
282
283- Ensure directories created by <devfs_mk_dir> are visible
284
285- Default no access for "other" for floppy device
286===============================================================================
287Changes for patch v25
288
289- Updates to CREDITS file and minor IDE numbering change
290 Thanks to Andrzej Krzysztofowicz <ankry@green.mif.pg.gda.pl>
291
292- Invalidate devfs dentry cache when making directories
293
294- Invalidate devfs dentry cache when removing entries
295
296- More informative message if root FS mount fails when devfs
297 configured
298
299- Fixed persistence bug with fifos
300===============================================================================
301Changes for patch v26
302
303- ported to kernel 2.1.97
304
305- Changed serial directory from "/dev/serial" to "/dev/tts" and
306 "/dev/consoles" to "/dev/vc" to be more friendly to new procps
307===============================================================================
308Changes for patch v27
309
310- Added support for IDE4 and IDE5
311 Thanks to Andrzej Krzysztofowicz <ankry@green.mif.pg.gda.pl>
312
313- Documented "scsihosts=" boot parameter
314
315- Print process command when debugging kerneld/kmod
316
317- Added debugging for register/unregister/change operations
318
319- Added "devfs=" boot options
320
321- Hide unregistered entries by default
322===============================================================================
323Changes for patch v28
324
325- No longer lock/unlock superblock in <devfs_put_super> (cope with
326 recent VFS interface change)
327
328- Do not automatically change ownership/protection of /dev/tty
329
330- Drop negative dentries when they are released
331
332- Manage dcache more efficiently
333===============================================================================
334Changes for patch v29
335
336- Added DEVFS_FL_AUTO_DEVNUM flag
337===============================================================================
338Changes for patch v30
339
340- No longer set unnecessary methods
341
342- Ported to kernel 2.1.99-pre3
343===============================================================================
344Changes for patch v31
345
346- Added PID display to <call_kerneld> debugging message
347
348- Added "diread" and "diwrite" options
349
350- Ported to kernel 2.1.102
351
352- Fixed persistence problem with permissions
353===============================================================================
354Changes for patch v32
355
356- Fixed devfs support in drivers/block/md.c
357===============================================================================
358Changes for patch v33
359
360- Support legacy device nodes
361
362- Fixed bug where recreated inodes were hidden
363
364- New IDE naming scheme: everything is under /dev/ide
365===============================================================================
366Changes for patch v34
367
368- Improved debugging in <get_vfs_inode>
369
370- Prevent duplicate calls to <devfs_mk_dir> in SCSI layer
371
372- No longer free old dentries in <devfs_mk_dir>
373
374- Free all dentries for a given entry when deleting inodes
375===============================================================================
376Changes for patch v35
377
378- Ported to kernel 2.1.105 (sound driver changes)
379===============================================================================
380Changes for patch v36
381
382- Fixed sound driver port
383===============================================================================
384Changes for patch v37
385
386- Minor documentation tweaks
387===============================================================================
388Changes for patch v38
389
390- More documentation tweaks
391
392- Fix for sound driver port
393
394- Removed ttyname-patch (grab libc 5.4.44 instead)
395
396- Ported to kernel 2.1.107-pre2 (loop driver fix)
397===============================================================================
398Changes for patch v39
399
400- Ported to kernel 2.1.107 (hd.c hunk broke due to spelling "fixes"). Sigh
401
402- Removed many #ifdef's, replaced with trickery in include/devfs_fs.h
403===============================================================================
404Changes for patch v40
405
406- Fix for sound driver port
407
408- Limit auto-device numbering to majors 128 to 239
409===============================================================================
410Changes for patch v41
411
412- Fixed inode times persistence problem
413===============================================================================
414Changes for patch v42
415
416- Ported to kernel 2.1.108 (drivers/scsi/hosts.c hunk broke)
417===============================================================================
418Changes for patch v43
419
420- Fixed spelling in <devfs_readlink> debug
421
422- Fixed bug in <devfs_setup> parsing "dilookup"
423
424- More #ifdef's removed
425
426- Supported Sparc keyboard (/dev/kbd)
427
428- Supported DSP56001 digital signal processor (/dev/dsp56k)
429
430- Supported Apple Desktop Bus (/dev/adb)
431
432- Supported Coda network file system (/dev/cfs*)
433===============================================================================
434Changes for patch v44
435
436- Fixed devfs inode leak when manually recreating inodes
437
438- Fixed permission persistence problem when recreating inodes
439===============================================================================
440Changes for patch v45
441
442- Ported to kernel 2.1.110
443===============================================================================
444Changes for patch v46
445
446- Ported to kernel 2.1.112-pre1
447
448- Removed harmless "unused variable" compiler warning
449
450- Fixed modes for manually recreated device nodes
451===============================================================================
452Changes for patch v47
453
454- Added NULL devfs inode warning in <devfs_read_inode>
455
456- Force all inode nlink values to 1
457===============================================================================
458Changes for patch v48
459
460- Added "dimknod" option
461
462- Set inode nlink to 0 when freeing dentries
463
464- Added support for virtual console capture devices (/dev/vcs*)
465 Thanks to Dennis Hou <smilax@mindmeld.yi.org>
466
467- Fixed modes for manually recreated symlinks
468===============================================================================
469Changes for patch v49
470
471- Ported to kernel 2.1.113
472===============================================================================
473Changes for patch v50
474
475- Fixed bugs in recreated directories and symlinks
476===============================================================================
477Changes for patch v51
478
479- Improved robustness of rc.devfs script
480 Thanks to Roderich Schupp <rsch@experteam.de>
481
482- Fixed bugs in recreated device nodes
483
484- Fixed bug in currently unused <devfs_get_handle_from_inode>
485
486- Defined new <devfs_handle_t> type
487
488- Improved debugging when getting entries
489
490- Fixed bug where directories could be emptied
491
492- Ported to kernel 2.1.115
493===============================================================================
494Changes for patch v52
495
496- Replaced dummy .epoch inode with .devfsd character device
497
498- Modified rc.devfs to take account of above change
499
500- Removed spurious driver warning messages when CONFIG_DEVFS_FS=n
501
502- Implemented devfsd protocol revision 0
503===============================================================================
504Changes for patch v53
505
506- Ported to kernel 2.1.116 (kmod change broke hunk)
507
508- Updated Documentation/Configure.help
509
510- Test and tty pattern patch for rc.devfs script
511 Thanks to Roderich Schupp <rsch@experteam.de>
512
513- Added soothing message to warning in <devfs_d_iput>
514===============================================================================
515Changes for patch v54
516
517- Ported to kernel 2.1.117
518
519- Fixed default permissions in sound driver
520
521- Added support for frame buffer devices (/dev/fb*)
522===============================================================================
523Changes for patch v55
524
525- Ported to kernel 2.1.119
526
527- Use GCC extensions for structure initialisations
528
529- Implemented async open notification
530
531- Incremented devfsd protocol revision to 1
532===============================================================================
533Changes for patch v56
534
535- Ported to kernel 2.1.120-pre3
536
537- Moved async open notification to end of <devfs_open>
538===============================================================================
539Changes for patch v57
540
541- Ported to kernel 2.1.121
542
543- Prepended "/dev/" to module load request
544
545- Renamed <call_kerneld> to <call_kmod>
546
547- Created sample modules.conf file
548===============================================================================
549Changes for patch v58
550
551- Fixed typo "AYSNC" -> "ASYNC"
552===============================================================================
553Changes for patch v59
554
555- Added open flag for files
556===============================================================================
557Changes for patch v60
558
559- Ported to kernel 2.1.123-pre2
560===============================================================================
561Changes for patch v61
562
563- Set i_blocks=0 and i_blksize=1024 in <devfs_read_inode>
564===============================================================================
565Changes for patch v62
566
567- Ported to kernel 2.1.123
568===============================================================================
569Changes for patch v63
570
571- Ported to kernel 2.1.124-pre2
572===============================================================================
573Changes for patch v64
574
575- Fixed Unix98 pty support
576
577- Increased buffer size in <get_partition_list> to avoid crash and
578 burn
579===============================================================================
580Changes for patch v65
581
582- More Unix98 pty support fixes
583
584- Added test for empty <<name>> in <devfs_find_handle>
585
586- Renamed <generate_path> to <devfs_generate_path> and published
587
588- Created /dev/root symlink
589 Thanks to Roderich Schupp <rsch@ExperTeam.de>
590 with further modifications by me
591===============================================================================
592Changes for patch v66
593
594- Yet more Unix98 pty support fixes (now tested)
595
596- Created <devfs_get_fops>
597
598- Support media change checks when CONFIG_DEVFS_ONLY=y
599
600- Abolished Unix98-style PTY names for old PTY devices
601===============================================================================
602Changes for patch v67
603
604- Added inline declaration for dummy <devfs_generate_path>
605
606- Removed spurious "unable to register... in devfs" messages when
607 CONFIG_DEVFS_FS=n
608
609- Fixed misc. devices when CONFIG_DEVFS_FS=n
610
611- Limit auto-device numbering to majors 144 to 239
612===============================================================================
613Changes for patch v68
614
615- Hide unopened virtual consoles from directory listings
616
617- Added support for video capture devices
618
619- Ported to kernel 2.1.125
620===============================================================================
621Changes for patch v69
622
623- Fix for CONFIG_VT=n
624===============================================================================
625Changes for patch v70
626
627- Added support for non-OSS/Free sound cards
628===============================================================================
629Changes for patch v71
630
631- Ported to kernel 2.1.126-pre2
632===============================================================================
633Changes for patch v72
634
635- #ifdef's for CONFIG_DEVFS_DISABLE_OLD_NAMES removed
636===============================================================================
637Changes for patch v73
638
639- CONFIG_DEVFS_DISABLE_OLD_NAMES replaced with "nocompat" boot option
640
641- CONFIG_DEVFS_BOOT_OPTIONS removed: boot options always available
642===============================================================================
643Changes for patch v74
644
645- Removed CONFIG_DEVFS_MOUNT and "mount" boot option and replaced with
646 "nomount" boot option
647
648- Documentation updates
649
650- Updated sample modules.conf
651===============================================================================
652Changes for patch v75
653
654- Updated sample modules.conf
655
656- Remount devfs after initrd finishes
657
658- Ported to kernel 2.1.127
659
660- Added support for ISDN
661 Thanks to Christophe Leroy <christophe.leroy5@capway.com>
662===============================================================================
663Changes for patch v76
664
665- Updated an email address in ChangeLog
666
667- CONFIG_DEVFS_ONLY replaced with "only" boot option
668===============================================================================
669Changes for patch v77
670
671- Added DEVFS_FL_REMOVABLE flag
672
673- Check for disc change when listing directories with removable media
674 devices
675
676- Use DEVFS_FL_REMOVABLE in sd.c
677
678- Ported to kernel 2.1.128
679===============================================================================
680Changes for patch v78
681
682- Only call <scan_dir_for_removable> on first call to <devfs_readdir>
683
684- Ported to kernel 2.1.129-pre5
685
686- ISDN support improvements
687 Thanks to Christophe Leroy <christophe.leroy5@capway.com>
688===============================================================================
689Changes for patch v79
690
691- Ported to kernel 2.1.130
692
693- Renamed miscdevice "apm" to "apm_bios" to be consistent with
694 devices.txt
695===============================================================================
696Changes for patch v80
697
698- Ported to kernel 2.1.131
699
700- Updated <devfs_rmdir> for VFS change in 2.1.131
701===============================================================================
702Changes for patch v81
703
704- Fixed permissions on /dev/ptmx
705===============================================================================
706Changes for patch v82
707
708- Ported to kernel 2.1.132-pre4
709
710- Changed initial permissions on /dev/pts/*
711
712- Created <devfs_mk_compat>
713
714- Added "symlinks" boot option
715
716- Changed devfs_register_blkdev() back to register_blkdev() for IDE
717
718- Check for partitions on removable media in <devfs_lookup>
719===============================================================================
720Changes for patch v83
721
722- Fixed support for ramdisc when using string-based root FS name
723
724- Ported to kernel 2.2.0-pre1
725===============================================================================
726Changes for patch v84
727
728- Ported to kernel 2.2.0-pre7
729===============================================================================
730Changes for patch v85
731
732- Compile fixes for driver/sound/sound_common.c (non-module) and
733 drivers/isdn/isdn_common.c
734 Thanks to Christophe Leroy <christophe.leroy5@capway.com>
735
736- Added support for registering regular files
737
738- Created <devfs_set_file_size>
739
740- Added /dev/cpu/mtrr as an alternative interface to /proc/mtrr
741
742- Update devfs inodes from entries if not changed through FS
743===============================================================================
744Changes for patch v86
745
746- Ported to kernel 2.2.0-pre9
747===============================================================================
748Changes for patch v87
749
750- Fixed bug when mounting non-devfs devices in a devfs
751===============================================================================
752Changes for patch v88
753
754- Fixed <devfs_fill_file> to only initialise temporary inodes
755
756- Trap for NULL fops in <devfs_register>
757
758- Return -ENODEV in <devfs_fill_file> for non-driver inodes
759
760- Fixed bug when unswapping non-devfs devices in a devfs
761===============================================================================
762Changes for patch v89
763
764- Switched to C data types in include/linux/devfs_fs.h
765
766- Switched from PATH_MAX to DEVFS_PATHLEN
767
768- Updated Documentation/filesystems/devfs/modules.conf to take account
769 of reverse scanning (!) by modprobe
770
771- Ported to kernel 2.2.0
772===============================================================================
773Changes for patch v90
774
775- CONFIG_DEVFS_DISABLE_OLD_TTY_NAMES replaced with "nottycompat" boot
776 option
777
778- CONFIG_DEVFS_TTY_COMPAT removed: existing "symlinks" boot option now
779 controls this. This means you must have libc 5.4.44 or later, or a
780 recent version of libc 6 if you use the "symlinks" option
781===============================================================================
782Changes for patch v91
783
784- Switch from <devfs_mk_symlink> to <devfs_mk_compat> in
785 drivers/char/vc_screen.c to fix problems with Midnight Commander
786===============================================================================
787Changes for patch v92
788
789- Ported to kernel 2.2.2-pre5
790===============================================================================
791Changes for patch v93
792
793- Modified <sd_name> in drivers/scsi/sd.c to cope with devices that
794 don't exist (which happens with new RAID autostart code printk()s)
795===============================================================================
796Changes for patch v94
797
798- Fixed bug in joystick driver: only first joystick was registered
799===============================================================================
800Changes for patch v95
801
802- Fixed another bug in joystick driver
803
804- Fixed <devfsd_read> to not overrun event buffer
805===============================================================================
806Changes for patch v96
807
808- Ported to kernel 2.2.5-2
809
810- Created <devfs_auto_unregister>
811
812- Fixed bugs: compatibility entries were not unregistered for:
813 loop driver
814 floppy driver
815 RAMDISC driver
816 IDE tape driver
817 SCSI CD-ROM driver
818 SCSI HDD driver
819===============================================================================
820Changes for patch v97
821
822- Fixed bugs: compatibility entries were not unregistered for:
823 ALSA sound driver
824 partitions in generic disc driver
825
826- Don't return unregistred entries in <devfs_find_handle>
827
828- Panic in <devfs_unregister> if entry unregistered
829
830- Don't panic in <devfs_auto_unregister> for duplicates
831===============================================================================
832Changes for patch v98
833
834- Don't unregister already unregistered entries in <unregister>
835
836- Register entry in <sd_detect>
837
838- Unregister entry in <sd_detach>
839
840- Changed to <devfs_*register_chrdev> in drivers/char/tty_io.c
841
842- Ported to kernel 2.2.7
843===============================================================================
844Changes for patch v99
845
846- Ported to kernel 2.2.8
847
848- Fixed bug in drivers/scsi/sd.c when >16 SCSI discs
849
850- Disable warning messages when unable to read partition table for
851 removable media
852===============================================================================
853Changes for patch v100
854
855- Ported to kernel 2.3.1-pre5
856
857- Added "oops-on-panic" boot option
858
859- Improved debugging in <devfs_register> and <devfs_unregister>
860
861- Register entry in <sr_detect>
862
863- Unregister entry in <sr_detach>
864
865- Register entry in <sg_detect>
866
867- Unregister entry in <sg_detach>
868
869- Added support for ALSA drivers
870===============================================================================
871Changes for patch v101
872
873- Ported to kernel 2.3.2
874===============================================================================
875Changes for patch v102
876
877- Update serial driver to register PCMCIA entries
878 Thanks to Roch-Alexandre Nomine-Beguin <roch@samarkand.infini.fr>
879
880- Updated an email address in ChangeLog
881
882- Hide virtual console capture entries from directory listings when
883 corresponding console device is not open
884===============================================================================
885Changes for patch v103
886
887- Ported to kernel 2.3.3
888===============================================================================
889Changes for patch v104
890
891- Added documentation for some functions
892
893- Added "doc" target to fs/devfs/Makefile
894
895- Added "v4l" directory for video4linux devices
896
897- Replaced call to <devfs_unregister> in <sd_detach> with call to
898 <devfs_register_partitions>
899
900- Moved registration for sr and sg drivers from detect() to attach()
901 methods
902
903- Register entries in <st_attach> and unregister in <st_detach>
904
905- Work around IDE driver treating CD-ROM as gendisk
906
907- Use <sed> instead of <tr> in rc.devfs
908
909- Updated ToDo list
910
911- Removed "oops-on-panic" boot option: now always Oops
912===============================================================================
913Changes for patch v105
914
915- Unregister SCSI host from <scsi_host_no_list> in <scsi_unregister>
916 Thanks to Zoltán Böszörményi <zboszor@mail.externet.hu>
917
918- Don't save /dev/log in rc.devfs
919
920- Ported to kernel 2.3.4-pre1
921===============================================================================
922Changes for patch v106
923
924- Fixed silly typo in drivers/scsi/st.c
925
926- Improved debugging in <devfs_register>
927===============================================================================
928Changes for patch v107
929
930- Added "diunlink" and "nokmod" boot options
931
932- Removed superfluous warning message in <devfs_d_iput>
933===============================================================================
934Changes for patch v108
935
936- Remove entries when unloading sound module
937===============================================================================
938Changes for patch v109
939
940- Ported to kernel 2.3.6-pre2
941===============================================================================
942Changes for patch v110
943
944- Took account of change to <d_alloc_root>
945===============================================================================
946Changes for patch v111
947
948- Created separate event queue for each mounted devfs
949
950- Removed <devfs_invalidate_dcache>
951
952- Created new ioctl()s for devfsd
953
954- Incremented devfsd protocol revision to 3
955
956- Fixed bug when re-creating directories: contents were lost
957
958- Block access to inodes until devfsd updates permissions
959===============================================================================
960Changes for patch v112
961
962- Modified patch so it applies against 2.3.5 and 2.3.6
963
964- Updated an email address in ChangeLog
965
966- Do not automatically change ownership/protection of /dev/tty<n>
967
968- Updated sample modules.conf
969
970- Switched to sending process uid/gid to devfsd
971
972- Renamed <call_kmod> to <try_modload>
973
974- Added DEVFSD_NOTIFY_LOOKUP event
975
976- Added DEVFSD_NOTIFY_CHANGE event
977
978- Added DEVFSD_NOTIFY_CREATE event
979
980- Incremented devfsd protocol revision to 4
981
982- Moved kernel-specific stuff to include/linux/devfs_fs_kernel.h
983===============================================================================
984Changes for patch v113
985
986- Ported to kernel 2.3.9
987
988- Restricted permissions on some block devices
989===============================================================================
990Changes for patch v114
991
992- Added support for /dev/netlink
993 Thanks to Dennis Hou <smilax@mindmeld.yi.org>
994
995- Return EISDIR rather than EINVAL for read(2) on directories
996
997- Ported to kernel 2.3.10
998===============================================================================
999Changes for patch v115
1000
1001- Added support for all remaining character devices
1002 Thanks to Dennis Hou <smilax@mindmeld.yi.org>
1003
1004- Cleaned up netlink support
1005===============================================================================
1006Changes for patch v116
1007
1008- Added support for /dev/parport%d
1009 Thanks to Tim Waugh <tim@cyberelk.demon.co.uk>
1010
1011- Fixed parallel port ATAPI tape driver
1012
1013- Fixed Atari SLM laser printer driver
1014===============================================================================
1015Changes for patch v117
1016
1017- Added support for COSA card
1018 Thanks to Dennis Hou <smilax@mindmeld.yi.org>
1019
1020- Fixed drivers/char/ppdev.c: missing #include <linux/init.h>
1021
1022- Fixed drivers/char/ftape/zftape/zftape-init.c
1023 Thanks to Vladimir Popov <mashgrad@usa.net>
1024===============================================================================
1025Changes for patch v118
1026
1027- Ported to kernel 2.3.15-pre3
1028
1029- Fixed bug in loop driver
1030
1031- Unregister /dev/lp%d entries in drivers/char/lp.c
1032 Thanks to Maciej W. Rozycki <macro@ds2.pg.gda.pl>
1033===============================================================================
1034Changes for patch v119
1035
1036- Ported to kernel 2.3.16
1037===============================================================================
1038Changes for patch v120
1039
1040- Fixed bug in drivers/scsi/scsi.c
1041
1042- Added /dev/ppp
1043 Thanks to Dennis Hou <smilax@mindmeld.yi.org>
1044
1045- Ported to kernel 2.3.17
1046===============================================================================
1047Changes for patch v121
1048
1049- Fixed bug in drivers/block/loop.c
1050
1051- Ported to kernel 2.3.18
1052===============================================================================
1053Changes for patch v122
1054
1055- Ported to kernel 2.3.19
1056===============================================================================
1057Changes for patch v123
1058
1059- Ported to kernel 2.3.20
1060===============================================================================
1061Changes for patch v124
1062
1063- Ported to kernel 2.3.21
1064===============================================================================
1065Changes for patch v125
1066
1067- Created <devfs_get_info>, <devfs_set_info>,
1068 <devfs_get_first_child> and <devfs_get_next_sibling>
1069 Added <<dir>> parameter to <devfs_register>, <devfs_mk_compat>,
1070 <devfs_mk_dir> and <devfs_find_handle>
1071 Work sponsored by SGI
1072
1073- Fixed apparent bug in COSA driver
1074
1075- Re-instated "scsihosts=" boot option
1076===============================================================================
1077Changes for patch v126
1078
1079- Always create /dev/pts if CONFIG_UNIX98_PTYS=y
1080
1081- Fixed call to <devfs_mk_dir> in drivers/block/ide-disk.c
1082 Thanks to Dennis Hou <smilax@mindmeld.yi.org>
1083
1084- Allow multiple unregistrations
1085
1086- Created /dev/scsi hierarchy
1087 Work sponsored by SGI
1088===============================================================================
1089Changes for patch v127
1090
1091Work sponsored by SGI
1092
1093- No longer disable devpts if devfs enabled (caveat emptor)
1094
1095- Added flags array to struct gendisk and removed code from
1096 drivers/scsi/sd.c
1097
1098- Created /dev/discs hierarchy
1099===============================================================================
1100Changes for patch v128
1101
1102Work sponsored by SGI
1103
1104- Created /dev/cdroms hierarchy
1105===============================================================================
1106Changes for patch v129
1107
1108Work sponsored by SGI
1109
1110- Removed compatibility entries for sound devices
1111
1112- Removed compatibility entries for printer devices
1113
1114- Removed compatibility entries for video4linux devices
1115
1116- Removed compatibility entries for parallel port devices
1117
1118- Removed compatibility entries for frame buffer devices
1119===============================================================================
1120Changes for patch v130
1121
1122Work sponsored by SGI
1123
1124- Added major and minor number to devfsd protocol
1125
1126- Incremented devfsd protocol revision to 5
1127
1128- Removed compatibility entries for SoundBlaster CD-ROMs
1129
1130- Removed compatibility entries for netlink devices
1131
1132- Removed compatibility entries for SCSI generic devices
1133
1134- Removed compatibility entries for SCSI tape devices
1135===============================================================================
1136Changes for patch v131
1137
1138Work sponsored by SGI
1139
1140- Support info pointer for all devfs entry types
1141
1142- Added <<info>> parameter to <devfs_mk_dir> and <devfs_mk_symlink>
1143
1144- Removed /dev/st hierarchy
1145
1146- Removed /dev/sg hierarchy
1147
1148- Removed compatibility entries for loop devices
1149
1150- Removed compatibility entries for IDE tape devices
1151
1152- Removed compatibility entries for SCSI CD-ROMs
1153
1154- Removed /dev/sr hierarchy
1155===============================================================================
1156Changes for patch v132
1157
1158Work sponsored by SGI
1159
1160- Removed compatibility entries for floppy devices
1161
1162- Removed compatibility entries for RAMDISCs
1163
1164- Removed compatibility entries for meta-devices
1165
1166- Removed compatibility entries for SCSI discs
1167
1168- Created <devfs_make_root>
1169
1170- Removed /dev/sd hierarchy
1171
1172- Support "../" when searching devfs namespace
1173
1174- Created /dev/ide/host* hierarchy
1175
1176- Supported IDE hard discs in /dev/ide/host* hierarchy
1177
1178- Removed compatibility entries for IDE discs
1179
1180- Removed /dev/ide/hd hierarchy
1181
1182- Supported IDE CD-ROMs in /dev/ide/host* hierarchy
1183
1184- Removed compatibility entries for IDE CD-ROMs
1185
1186- Removed /dev/ide/cd hierarchy
1187===============================================================================
1188Changes for patch v133
1189
1190Work sponsored by SGI
1191
1192- Created <devfs_get_unregister_slave>
1193
1194- Fixed bug in fs/partitions/check.c when rescanning
1195===============================================================================
1196Changes for patch v134
1197
1198Work sponsored by SGI
1199
1200- Removed /dev/sd, /dev/sr, /dev/st and /dev/sg directories
1201
1202- Removed /dev/ide/hd directory
1203
1204- Exported <devfs_get_parent>
1205
1206- Created <devfs_register_tape> and /dev/tapes hierarchy
1207
1208- Removed /dev/ide/mt hierarchy
1209
1210- Removed /dev/ide/fd hierarchy
1211
1212- Ported to kernel 2.3.25
1213===============================================================================
1214Changes for patch v135
1215
1216Work sponsored by SGI
1217
1218- Removed compatibility entries for virtual console capture devices
1219
1220- Removed unused <devfs_set_symlink_destination>
1221
1222- Removed compatibility entries for serial devices
1223
1224- Removed compatibility entries for console devices
1225
1226- Do not hide entries from devfsd or children
1227
1228- Removed DEVFS_FL_TTY_COMPAT flag
1229
1230- Removed "nottycompat" boot option
1231
1232- Removed <devfs_mk_compat>
1233===============================================================================
1234Changes for patch v136
1235
1236Work sponsored by SGI
1237
1238- Moved BSD pty devices to /dev/pty
1239
1240- Added DEVFS_FL_WAIT flag
1241===============================================================================
1242Changes for patch v137
1243
1244Work sponsored by SGI
1245
1246- Really fixed bug in fs/partitions/check.c when rescanning
1247
1248- Support new "disc" naming scheme in <get_removable_partition>
1249
1250- Allow NULL fops in <devfs_register>
1251
1252- Removed redundant name functions in SCSI disc and IDE drivers
1253===============================================================================
1254Changes for patch v138
1255
1256Work sponsored by SGI
1257
1258- Fixed old bugs in drivers/block/paride/pt.c, drivers/char/tpqic02.c,
1259 drivers/net/wan/cosa.c and drivers/scsi/scsi.c
1260 Thanks to Sergey Kubushin <ksi@ksi-linux.com>
1261
1262- Fall back to major table if NULL fops given to <devfs_register>
1263===============================================================================
1264Changes for patch v139
1265
1266Work sponsored by SGI
1267
1268- Corrected and moved <get_blkfops> and <get_chrfops> declarations
1269 from arch/alpha/kernel/osf_sys.c to include/linux/fs.h
1270
1271- Removed name function from struct gendisk
1272
1273- Updated devfs FAQ
1274===============================================================================
1275Changes for patch v140
1276
1277Work sponsored by SGI
1278
1279- Ported to kernel 2.3.27
1280===============================================================================
1281Changes for patch v141
1282
1283Work sponsored by SGI
1284
1285- Bug fix in arch/m68k/atari/joystick.c
1286
1287- Moved ISDN and capi devices to /dev/isdn
1288===============================================================================
1289Changes for patch v142
1290
1291Work sponsored by SGI
1292
1293- Bug fix in drivers/block/ide-probe.c (patch confusion)
1294===============================================================================
1295Changes for patch v143
1296
1297Work sponsored by SGI
1298
1299- Bug fix in drivers/block/blkpg.c:partition_name()
1300===============================================================================
1301Changes for patch v144
1302
1303Work sponsored by SGI
1304
1305- Ported to kernel 2.3.29
1306
1307- Removed calls to <devfs_register> from cdu31a, cm206, mcd and mcdx
1308 CD-ROM drivers: generic driver handles this now
1309
1310- Moved joystick devices to /dev/joysticks
1311===============================================================================
1312Changes for patch v145
1313
1314Work sponsored by SGI
1315
1316- Ported to kernel 2.3.30-pre3
1317
1318- Register whole-disc entry even for invalid partition tables
1319
1320- Fixed bug in mounting root FS when initrd enabled
1321
1322- Fixed device entry leak with IDE CD-ROMs
1323
1324- Fixed compile problem with drivers/isdn/isdn_common.c
1325
1326- Moved COSA devices to /dev/cosa
1327
1328- Support fifos when unregistering
1329
1330- Created <devfs_register_series> and used in many drivers
1331
1332- Moved Coda devices to /dev/coda
1333
1334- Moved parallel port IDE tapes to /dev/pt
1335
1336- Moved parallel port IDE generic devices to /dev/pg
1337===============================================================================
1338Changes for patch v146
1339
1340Work sponsored by SGI
1341
1342- Removed obsolete DEVFS_FL_COMPAT and DEVFS_FL_TOLERANT flags
1343
1344- Fixed compile problem with fs/coda/psdev.c
1345
1346- Reinstate change to <devfs_register_blkdev> in
1347 drivers/block/ide-probe.c now that fs/isofs/inode.c is fixed
1348
1349- Switched to <devfs_register_blkdev> in drivers/block/floppy.c,
1350 drivers/scsi/sr.c and drivers/block/md.c
1351
1352- Moved DAC960 devices to /dev/dac960
1353===============================================================================
1354Changes for patch v147
1355
1356Work sponsored by SGI
1357
1358- Ported to kernel 2.3.32-pre4
1359===============================================================================
1360Changes for patch v148
1361
1362Work sponsored by SGI
1363
1364- Removed kmod support: use devfsd instead
1365
1366- Moved miscellaneous character devices to /dev/misc
1367===============================================================================
1368Changes for patch v149
1369
1370Work sponsored by SGI
1371
1372- Ensure include/linux/joystick.h is OK for user-space
1373
1374- Improved debugging in <get_vfs_inode>
1375
1376- Ensure dentries created by devfsd will be cleaned up
1377===============================================================================
1378Changes for patch v150
1379
1380Work sponsored by SGI
1381
1382- Ported to kernel 2.3.34
1383===============================================================================
1384Changes for patch v151
1385
1386Work sponsored by SGI
1387
1388- Ported to kernel 2.3.35-pre1
1389
1390- Created <devfs_get_name>
1391===============================================================================
1392Changes for patch v152
1393
1394Work sponsored by SGI
1395
1396- Updated sample modules.conf
1397
1398- Ported to kernel 2.3.36-pre1
1399===============================================================================
1400Changes for patch v153
1401
1402Work sponsored by SGI
1403
1404- Ported to kernel 2.3.42
1405
1406- Removed <devfs_fill_file>
1407===============================================================================
1408Changes for patch v154
1409
1410Work sponsored by SGI
1411
1412- Took account of device number changes for /dev/fb*
1413===============================================================================
1414Changes for patch v155
1415
1416Work sponsored by SGI
1417
1418- Ported to kernel 2.3.43-pre8
1419
1420- Moved /dev/tty0 to /dev/vc/0
1421
1422- Moved sequence number formatting from <_tty_make_name> to drivers
1423===============================================================================
1424Changes for patch v156
1425
1426Work sponsored by SGI
1427
1428- Fixed breakage in drivers/scsi/sd.c due to recent SCSI changes
1429===============================================================================
1430Changes for patch v157
1431
1432Work sponsored by SGI
1433
1434- Ported to kernel 2.3.45
1435===============================================================================
1436Changes for patch v158
1437
1438Work sponsored by SGI
1439
1440- Ported to kernel 2.3.46-pre2
1441===============================================================================
1442Changes for patch v159
1443
1444Work sponsored by SGI
1445
1446- Fixed drivers/block/md.c
1447 Thanks to Mike Galbraith <mikeg@weiden.de>
1448
1449- Documentation fixes
1450
1451- Moved device registration from <lp_init> to <lp_register>
1452 Thanks to Tim Waugh <twaugh@redhat.com>
1453===============================================================================
1454Changes for patch v160
1455
1456Work sponsored by SGI
1457
1458- Fixed drivers/char/joystick/joystick.c
1459 Thanks to Vojtech Pavlik <vojtech@suse.cz>
1460
1461- Documentation updates
1462
1463- Fixed arch/i386/kernel/mtrr.c if procfs and devfs not enabled
1464
1465- Fixed drivers/char/stallion.c
1466===============================================================================
1467Changes for patch v161
1468
1469Work sponsored by SGI
1470
1471- Remove /dev/ide when ide-mod is unloaded
1472
1473- Fixed bug in drivers/block/ide-probe.c when secondary but no primary
1474
1475- Added DEVFS_FL_NO_PERSISTENCE flag
1476
1477- Used new DEVFS_FL_NO_PERSISTENCE flag for Unix98 pty slaves
1478
1479- Removed unnecessary call to <update_devfs_inode_from_entry> in
1480 <devfs_readdir>
1481
1482- Only set auto-ownership for /dev/pty/s*
1483===============================================================================
1484Changes for patch v162
1485
1486Work sponsored by SGI
1487
1488- Set inode->i_size to correct size for symlinks
1489 Thanks to Jeremy Fitzhardinge <jeremy@goop.org>
1490
1491- Only give lookup() method to directories to comply with new VFS
1492 assumptions
1493
1494- Remove unnecessary tests in symlink methods
1495
1496- Don't kill existing block ops in <devfs_read_inode>
1497
1498- Restore auto-ownership for /dev/pty/m*
1499===============================================================================
1500Changes for patch v163
1501
1502Work sponsored by SGI
1503
1504- Don't create missing directories in <devfs_find_handle>
1505
1506- Removed Documentation/filesystems/devfs/mk-devlinks
1507
1508- Updated Documentation/filesystems/devfs/README
1509===============================================================================
1510Changes for patch v164
1511
1512Work sponsored by SGI
1513
1514- Fixed CONFIG_DEVFS breakage in drivers/char/serial.c introduced in
1515 linux-2.3.99-pre6-7
1516===============================================================================
1517Changes for patch v165
1518
1519Work sponsored by SGI
1520
1521- Ported to kernel 2.3.99-pre6
1522===============================================================================
1523Changes for patch v166
1524
1525Work sponsored by SGI
1526
1527- Added CONFIG_DEVFS_MOUNT
1528===============================================================================
1529Changes for patch v167
1530
1531Work sponsored by SGI
1532
1533- Updated Documentation/filesystems/devfs/README
1534
1535- Updated sample modules.conf
1536===============================================================================
1537Changes for patch v168
1538
1539Work sponsored by SGI
1540
1541- Disabled multi-mount capability (use VFS bindings instead)
1542
1543- Updated README from master HTML file
1544===============================================================================
1545Changes for patch v169
1546
1547Work sponsored by SGI
1548
1549- Removed multi-mount code
1550
1551- Removed compatibility macros: VFS has changed too much
1552===============================================================================
1553Changes for patch v170
1554
1555Work sponsored by SGI
1556
1557- Updated README from master HTML file
1558
1559- Merged devfs inode into devfs entry
1560===============================================================================
1561Changes for patch v171
1562
1563Work sponsored by SGI
1564
1565- Updated sample modules.conf
1566
1567- Removed dead code in <devfs_register> which used to call
1568 <free_dentries>
1569
1570- Ported to kernel 2.4.0-test2-pre3
1571===============================================================================
1572Changes for patch v172
1573
1574Work sponsored by SGI
1575
1576- Changed interface to <devfs_register>
1577
1578- Changed interface to <devfs_register_series>
1579===============================================================================
1580Changes for patch v173
1581
1582Work sponsored by SGI
1583
1584- Simplified interface to <devfs_mk_symlink>
1585
1586- Simplified interface to <devfs_mk_dir>
1587
1588- Simplified interface to <devfs_find_handle>
1589===============================================================================
1590Changes for patch v174
1591
1592Work sponsored by SGI
1593
1594- Updated README from master HTML file
1595===============================================================================
1596Changes for patch v175
1597
1598Work sponsored by SGI
1599
1600- DocBook update for fs/devfs/base.c
1601 Thanks to Tim Waugh <twaugh@redhat.com>
1602
1603- Removed stale fs/tunnel.c (was never used or completed)
1604===============================================================================
1605Changes for patch v176
1606
1607Work sponsored by SGI
1608
1609- Updated ToDo list
1610
1611- Removed sample modules.conf: now distributed with devfsd
1612
1613- Updated README from master HTML file
1614
1615- Ported to kernel 2.4.0-test3-pre4 (which had devfs-patch-v174)
1616===============================================================================
1617Changes for patch v177
1618
1619- Updated README from master HTML file
1620
1621- Documentation cleanups
1622
1623- Ensure <devfs_generate_path> terminates string for root entry
1624 Thanks to Tim Jansen <tim@tjansen.de>
1625
1626- Exported <devfs_get_name> to modules
1627
1628- Make <devfs_mk_symlink> send events to devfsd
1629
1630- Cleaned up option processing in <devfs_setup>
1631
1632- Fixed bugs in handling symlinks: could leak or cause Oops
1633
1634- Cleaned up directory handling by separating fops
1635 Thanks to Alexander Viro <viro@parcelfarce.linux.theplanet.co.uk>
1636===============================================================================
1637Changes for patch v178
1638
1639- Fixed handling of inverted options in <devfs_setup>
1640===============================================================================
1641Changes for patch v179
1642
1643- Adjusted <try_modload> to account for <devfs_generate_path> fix
1644===============================================================================
1645Changes for patch v180
1646
1647- Fixed !CONFIG_DEVFS_FS stub declaration of <devfs_get_info>
1648===============================================================================
1649Changes for patch v181
1650
1651- Answered question posed by Al Viro and removed his comments from <devfs_open>
1652
1653- Moved setting of registered flag after other fields are changed
1654
1655- Fixed race between <devfsd_close> and <devfsd_notify_one>
1656
1657- Global VFS changes added bogus BKL to devfsd_close(): removed
1658
1659- Widened locking in <devfs_readlink> and <devfs_follow_link>
1660
1661- Replaced <devfsd_read> stack usage with <devfsd_ioctl> kmalloc
1662
1663- Simplified locking in <devfsd_ioctl> and fixed memory leak
1664===============================================================================
1665Changes for patch v182
1666
1667- Created <devfs_*alloc_major> and <devfs_*alloc_devnum>
1668
1669- Removed broken devnum allocation and use <devfs_alloc_devnum>
1670
1671- Fixed old devnum leak by calling new <devfs_dealloc_devnum>
1672
1673- Created <devfs_*alloc_unique_number>
1674
1675- Fixed number leak for /dev/cdroms/cdrom%d
1676
1677- Fixed number leak for /dev/discs/disc%d
1678===============================================================================
1679Changes for patch v183
1680
1681- Fixed bug in <devfs_setup> which could hang boot process
1682===============================================================================
1683Changes for patch v184
1684
1685- Documentation typo fix for fs/devfs/util.c
1686
1687- Fixed drivers/char/stallion.c for devfs
1688
1689- Added DEVFSD_NOTIFY_DELETE event
1690
1691- Updated README from master HTML file
1692
1693- Removed #include <asm/segment.h> from fs/devfs/base.c
1694===============================================================================
1695Changes for patch v185
1696
1697- Made <block_semaphore> and <char_semaphore> in fs/devfs/util.c
1698 private
1699
1700- Fixed inode table races by removing it and using inode->u.generic_ip
1701 instead
1702
1703- Moved <devfs_read_inode> into <get_vfs_inode>
1704
1705- Moved <devfs_write_inode> into <devfs_notify_change>
1706===============================================================================
1707Changes for patch v186
1708
1709- Fixed race in <devfs_do_symlink> for uni-processor
1710
1711- Updated README from master HTML file
1712===============================================================================
1713Changes for patch v187
1714
1715- Fixed drivers/char/stallion.c for devfs
1716
1717- Fixed drivers/char/rocket.c for devfs
1718
1719- Fixed bug in <devfs_alloc_unique_number>: limited to 128 numbers
1720===============================================================================
1721Changes for patch v188
1722
1723- Updated major masks in fs/devfs/util.c up to Linus' "no new majors"
1724 proclamation. Block: were 126 now 122 free, char: were 26 now 19 free
1725
1726- Updated README from master HTML file
1727
1728- Removed remnant of multi-mount support in <devfs_mknod>
1729
1730- Removed unused DEVFS_FL_SHOW_UNREG flag
1731===============================================================================
1732Changes for patch v189
1733
1734- Removed nlink field from struct devfs_inode
1735
1736- Removed auto-ownership for /dev/pty/* (BSD ptys) and used
1737 DEVFS_FL_CURRENT_OWNER|DEVFS_FL_NO_PERSISTENCE for /dev/pty/s* (just
1738 like Unix98 pty slaves) and made /dev/pty/m* rw-rw-rw- access
1739===============================================================================
1740Changes for patch v190
1741
1742- Updated README from master HTML file
1743
1744- Replaced BKL with global rwsem to protect symlink data (quick and
1745 dirty hack)
1746===============================================================================
1747Changes for patch v191
1748
1749- Replaced global rwsem for symlink with per-link refcount
1750===============================================================================
1751Changes for patch v192
1752
1753- Removed unnecessary #ifdef CONFIG_DEVFS_FS from arch/i386/kernel/mtrr.c
1754
1755- Ported to kernel 2.4.10-pre11
1756
1757- Set inode->i_mapping->a_ops for block nodes in <get_vfs_inode>
1758===============================================================================
1759Changes for patch v193
1760
1761- Went back to global rwsem for symlinks (refcount scheme no good)
1762===============================================================================
1763Changes for patch v194
1764
1765- Fixed overrun in <devfs_link> by removing function (not needed)
1766
1767- Updated README from master HTML file
1768===============================================================================
1769Changes for patch v195
1770
1771- Fixed buffer underrun in <try_modload>
1772
1773- Moved down_read() from <search_for_entry_in_dir> to <find_entry>
1774===============================================================================
1775Changes for patch v196
1776
1777- Fixed race in <devfsd_ioctl> when setting event mask
1778 Thanks to Kari Hurtta <hurtta@leija.mh.fmi.fi>
1779
1780- Avoid deadlock in <devfs_follow_link> by using temporary buffer
1781===============================================================================
1782Changes for patch v197
1783
1784- First release of new locking code for devfs core (v1.0)
1785
1786- Fixed bug in drivers/cdrom/cdrom.c
1787===============================================================================
1788Changes for patch v198
1789
1790- Discard temporary buffer, now use "%s" for dentry names
1791
1792- Don't generate path in <try_modload>: use fake entry instead
1793
1794- Use "existing" directory in <_devfs_make_parent_for_leaf>
1795
1796- Use slab cache rather than fixed buffer for devfsd events
1797===============================================================================
1798Changes for patch v199
1799
1800- Removed obsolete usage of DEVFS_FL_NO_PERSISTENCE
1801
1802- Send DEVFSD_NOTIFY_REGISTERED events in <devfs_mk_dir>
1803
1804- Fixed locking bug in <devfs_d_revalidate_wait> due to typo
1805
1806- Do not send CREATE, CHANGE, ASYNC_OPEN or DELETE events from devfsd
1807 or children
1808===============================================================================
1809Changes for patch v200
1810
1811- Ported to kernel 2.5.1-pre2
1812===============================================================================
1813Changes for patch v201
1814
1815- Fixed bug in <devfsd_read>: was dereferencing freed pointer
1816===============================================================================
1817Changes for patch v202
1818
1819- Fixed bug in <devfsd_close>: was dereferencing freed pointer
1820
1821- Added process group check for devfsd privileges
1822===============================================================================
1823Changes for patch v203
1824
1825- Use SLAB_ATOMIC in <devfsd_notify_de> from <devfs_d_delete>
1826===============================================================================
1827Changes for patch v204
1828
1829- Removed long obsolete rc.devfs
1830
1831- Return old entry in <devfs_mk_dir> for 2.4.x kernels
1832
1833- Updated README from master HTML file
1834
1835- Increment refcount on module in <check_disc_changed>
1836
1837- Created <devfs_get_handle> and exported <devfs_put>
1838
1839- Increment refcount on module in <devfs_get_ops>
1840
1841- Created <devfs_put_ops> and used where needed to fix races
1842
1843- Added clarifying comments in response to preliminary EMC code review
1844
1845- Added poisoning to <devfs_put>
1846
1847- Improved debugging messages
1848
1849- Fixed unregister bugs in drivers/md/lvm-fs.c
1850===============================================================================
1851Changes for patch v205
1852
1853- Corrected (made useful) debugging message in <unregister>
1854
1855- Moved <kmem_cache_create> in <mount_devfs_fs> to <init_devfs_fs>
1856
1857- Fixed drivers/md/lvm-fs.c to create "lvm" entry
1858
1859- Added magic number to guard against scribbling drivers
1860
1861- Only return old entry in <devfs_mk_dir> if a directory
1862
1863- Defined macros for error and debug messages
1864
1865- Updated README from master HTML file
1866===============================================================================
1867Changes for patch v206
1868
1869- Added support for multiple Compaq cpqarray controllers
1870
1871- Fixed (rare, old) race in <devfs_lookup>
1872===============================================================================
1873Changes for patch v207
1874
1875- Fixed deadlock bug in <devfs_d_revalidate_wait>
1876
1877- Tag VFS deletable in <devfs_mk_symlink> if handle ignored
1878
1879- Updated README from master HTML file
1880===============================================================================
1881Changes for patch v208
1882
1883- Added KERN_* to remaining messages
1884
1885- Cleaned up declaration of <stat_read>
1886
1887- Updated README from master HTML file
1888===============================================================================
1889Changes for patch v209
1890
1891- Updated README from master HTML file
1892
1893- Removed silently introduced calls to lock_kernel() and
1894 unlock_kernel() due to recent VFS locking changes. BKL isn't
1895 required in devfs
1896
1897- Changed <devfs_rmdir> to allow later additions if not yet empty
1898
1899- Added calls to <devfs_register_partitions> in drivers/block/blkpc.c
1900 <add_partition> and <del_partition>
1901
1902- Fixed bug in <devfs_alloc_unique_number>: was clearing beyond
1903 bitfield
1904
1905- Fixed bitfield data type for <devfs_*alloc_devnum>
1906
1907- Made major bitfield type and initialiser 64 bit safe
1908===============================================================================
1909Changes for patch v210
1910
1911- Updated fs/devfs/util.c to fix shift warning on 64 bit machines
1912 Thanks to Anton Blanchard <anton@samba.org>
1913
1914- Updated README from master HTML file
1915===============================================================================
1916Changes for patch v211
1917
1918- Do not put miscellaneous character devices in /dev/misc if they
1919 specify their own directory (i.e. contain a '/' character)
1920
1921- Copied macro for error messages from fs/devfs/base.c to
1922 fs/devfs/util.c and made use of this macro
1923
1924- Removed 2.4.x compatibility code from fs/devfs/base.c
1925===============================================================================
1926Changes for patch v212
1927
1928- Added BKL to <devfs_open> because drivers still need it
1929===============================================================================
1930Changes for patch v213
1931
1932- Protected <scan_dir_for_removable> and <get_removable_partition>
1933 from changing directory contents
1934===============================================================================
1935Changes for patch v214
1936
1937- Switched to ISO C structure field initialisers
1938
1939- Switch to set_current_state() and move before add_wait_queue()
1940
1941- Updated README from master HTML file
1942
1943- Fixed devfs entry leak in <devfs_readdir> when *readdir fails
1944===============================================================================
1945Changes for patch v215
1946
1947- Created <devfs_find_and_unregister>
1948
1949- Switched many functions from <devfs_find_handle> to
1950 <devfs_find_and_unregister>
1951
1952- Switched many functions from <devfs_find_handle> to <devfs_get_handle>
1953===============================================================================
1954Changes for patch v216
1955
1956- Switched arch/ia64/sn/io/hcl.c from <devfs_find_handle> to
1957 <devfs_get_handle>
1958
1959- Removed deprecated <devfs_find_handle>
1960===============================================================================
1961Changes for patch v217
1962
1963- Exported <devfs_find_and_unregister> and <devfs_only> to modules
1964
1965- Updated README from master HTML file
1966
1967- Fixed module unload race in <devfs_open>
1968===============================================================================
1969Changes for patch v218
1970
1971- Removed DEVFS_FL_AUTO_OWNER flag
1972
1973- Switched lingering structure field initialiser to ISO C
1974
1975- Added locking when setting/clearing flags
1976
1977- Documentation fix in fs/devfs/util.c
diff --git a/Documentation/filesystems/devfs/README b/Documentation/filesystems/devfs/README
deleted file mode 100644
index aabfba24bc2e..000000000000
--- a/Documentation/filesystems/devfs/README
+++ /dev/null
@@ -1,1959 +0,0 @@
1Devfs (Device File System) FAQ
2
3
4Linux Devfs (Device File System) FAQ
5Richard Gooch
620-AUG-2002
7
8
9Document languages:
10
11
12
13
14
15
16
17-----------------------------------------------------------------------------
18
19NOTE: the master copy of this document is available online at:
20
21http://www.atnf.csiro.au/~rgooch/linux/docs/devfs.html
22and looks much better than the text version distributed with the
23kernel sources. A mirror site is available at:
24
25http://www.ras.ucalgary.ca/~rgooch/linux/docs/devfs.html
26
27There is also an optional daemon that may be used with devfs. You can
28find out more about it at:
29
30http://www.atnf.csiro.au/~rgooch/linux/
31
32A mailing list is available which you may subscribe to. Send
33email
34to majordomo@oss.sgi.com with the following line in the
35body of the message:
36subscribe devfs
37To unsubscribe, send the message body:
38unsubscribe devfs
39instead. The list is archived at
40
41http://oss.sgi.com/projects/devfs/archive/.
42
43-----------------------------------------------------------------------------
44
45Contents
46
47
48What is it?
49
50Why do it?
51
52Who else does it?
53
54How it works
55
56Operational issues (essential reading)
57
58Instructions for the impatient
59Permissions persistence across reboots
60Dealing with drivers without devfs support
61All the way with Devfs
62Other Issues
63Kernel Naming Scheme
64Devfsd Naming Scheme
65Old Compatibility Names
66SCSI Host Probing Issues
67
68
69
70Device drivers currently ported
71
72Allocation of Device Numbers
73
74Questions and Answers
75
76Making things work
77Alternatives to devfs
78What I don't like about devfs
79How to report bugs
80Strange kernel messages
81Compilation problems with devfsd
82
83
84Other resources
85
86Translations of this document
87
88
89-----------------------------------------------------------------------------
90
91
92What is it?
93
94Devfs is an alternative to "real" character and block special devices
95on your root filesystem. Kernel device drivers can register devices by
96name rather than major and minor numbers. These devices will appear in
97devfs automatically, with whatever default ownership and
98protection the driver specified. A daemon (devfsd) can be used to
99override these defaults. Devfs has been in the kernel since 2.3.46.
100
101NOTE that devfs is entirely optional. If you prefer the old
102disc-based device nodes, then simply leave CONFIG_DEVFS_FS=n (the
103default). In this case, nothing will change. ALSO NOTE that if you do
104enable devfs, the defaults are such that full compatibility is
105maintained with the old devices names.
106
107There are two aspects to devfs: one is the underlying device
108namespace, which is a namespace just like any mounted filesystem. The
109other aspect is the filesystem code which provides a view of the
110device namespace. The reason I make a distinction is because devfs
111can be mounted many times, with each mount showing the same device
112namespace. Changes made are global to all mounted devfs filesystems.
113Also, because the devfs namespace exists without any devfs mounts, you
114can easily mount the root filesystem by referring to an entry in the
115devfs namespace.
116
117
118The cost of devfs is a small increase in kernel code size and memory
119usage. About 7 pages of code (some of that in __init sections) and 72
120bytes for each entry in the namespace. A modest system has only a
121couple of hundred device entries, so this costs a few more
122pages. Compare this with the suggestion to put /dev on a <a
123href="#why-faq-ramdisc">ramdisc.
124
125On a typical machine, the cost is under 0.2 percent. On a modest
126system with 64 MBytes of RAM, the cost is under 0.1 percent. The
127accusations of "bloatware" levelled at devfs are not justified.
128
129-----------------------------------------------------------------------------
130
131
132Why do it?
133
134There are several problems that devfs addresses. Some of these
135problems are more serious than others (depending on your point of
136view), and some can be solved without devfs. However, the totality of
137these problems really calls out for devfs.
138
139The choice is a patchwork of inefficient user space solutions, which
140are complex and likely to be fragile, or to use a simple and efficient
141devfs which is robust.
142
143There have been many counter-proposals to devfs, all seeking to
144provide some of the benefits without actually implementing devfs. So
145far there has been an absence of code and no proposed alternative has
146been able to provide all the features that devfs does. Further,
147alternative proposals require far more complexity in user-space (and
148still deliver less functionality than devfs). Some people have the
149mantra of reducing "kernel bloat", but don't consider the effects on
150user-space.
151
152A good solution limits the total complexity of kernel-space and
153user-space.
154
155
156Major&minor allocation
157
158The existing scheme requires the allocation of major and minor device
159numbers for each and every device. This means that a central
160co-ordinating authority is required to issue these device numbers
161(unless you're developing a "private" device driver), in order to
162preserve uniqueness. Devfs shifts the burden to a namespace. This may
163not seem like a huge benefit, but actually it is. Since driver authors
164will naturally choose a device name which reflects the functionality
165of the device, there is far less potential for namespace conflict.
166Solving this requires a kernel change.
167
168/dev management
169
170Because you currently access devices through device nodes, these must
171be created by the system administrator. For standard devices you can
172usually find a MAKEDEV programme which creates all these (hundreds!)
173of nodes. This means that changes in the kernel must be reflected by
174changes in the MAKEDEV programme, or else the system administrator
175creates device nodes by hand.
176
177The basic problem is that there are two separate databases of
178major and minor numbers. One is in the kernel and one is in /dev (or
179in a MAKEDEV programme, if you want to look at it that way). This is
180duplication of information, which is not good practice.
181Solving this requires a kernel change.
182
183/dev growth
184
185A typical /dev has over 1200 nodes! Most of these devices simply don't
186exist because the hardware is not available. A huge /dev increases the
187time to access devices (I'm just referring to the dentry lookup times
188and the time taken to read inodes off disc: the next subsection shows
189some more horrors).
190
191An example of how big /dev can grow is if we consider SCSI devices:
192
193host 6 bits (say up to 64 hosts on a really big machine)
194channel 4 bits (say up to 16 SCSI buses per host)
195id 4 bits
196lun 3 bits
197partition 6 bits
198TOTAL 23 bits
199
200
201This requires 8 Mega (1024*1024) inodes if we want to store all
202possible device nodes. Even if we scrap everything but id,partition
203and assume a single host adapter with a single SCSI bus and only one
204logical unit per SCSI target (id), that's still 10 bits or 1024
205inodes. Each VFS inode takes around 256 bytes (kernel 2.1.78), so
206that's 256 kBytes of inode storage on disc (assuming real inodes take
207a similar amount of space as VFS inodes). This is actually not so bad,
208because disc is cheap these days. Embedded systems would care about
209256 kBytes of /dev inodes, but you could argue that embedded systems
210would have hand-tuned /dev directories. I've had to do just that on my
211embedded systems, but I would rather just leave it to devfs.
212
213Another issue is the time taken to lookup an inode when first
214referenced. Not only does this take time in scanning through a list in
215memory, but also the seek times to read the inodes off disc.
216This could be solved in user-space using a clever programme which
217scanned the kernel logs and deleted /dev entries which are not
218available and created them when they were available. This programme
219would need to be run every time a new module was loaded, which would
220slow things down a lot.
221
222There is an existing programme called scsidev which will automatically
223create device nodes for SCSI devices. It can do this by scanning files
224in /proc/scsi. Unfortunately, to extend this idea to other device
225nodes would require significant modifications to existing drivers (so
226they too would provide information in /proc). This is a non-trivial
227change (I should know: devfs has had to do something similar). Once
228you go to this much effort, you may as well use devfs itself (which
229also provides this information). Furthermore, such a system would
230likely be implemented in an ad-hoc fashion, as different drivers will
231provide their information in different ways.
232
233Devfs is much cleaner, because it (naturally) has a uniform mechanism
234to provide this information: the device nodes themselves!
235
236
237Node to driver file_operations translation
238
239There is an important difference between the way disc-based character
240and block nodes and devfs entries make the connection between an entry
241in /dev and the actual device driver.
242
243With the current 8 bit major and minor numbers the connection between
244disc-based c&b nodes and per-major drivers is done through a
245fixed-length table of 128 entries. The various filesystem types set
246the inode operations for c&b nodes to {chr,blk}dev_inode_operations,
247so when a device is opened a few quick levels of indirection bring us
248to the driver file_operations.
249
250For miscellaneous character devices a second step is required: there
251is a scan for the driver entry with the same minor number as the file
252that was opened, and the appropriate minor open method is called. This
253scanning is done *every time* you open a device node. Potentially, you
254may be searching through dozens of misc. entries before you find your
255open method. While not an enormous performance overhead, this does
256seem pointless.
257
258Linux *must* move beyond the 8 bit major and minor barrier,
259somehow. If we simply increase each to 16 bits, then the indexing
260scheme used for major driver lookup becomes untenable, because the
261major tables (one each for character and block devices) would need to
262be 64 k entries long (512 kBytes on x86, 1 MByte for 64 bit
263systems). So we would have to use a scheme like that used for
264miscellaneous character devices, which means the search time goes up
265linearly with the average number of major device drivers on your
266system. Not all "devices" are hardware, some are higher-level drivers
267like KGI, so you can get more "devices" without adding hardware
268You can improve this by creating an ordered (balanced:-)
269binary tree, in which case your search time becomes log(N).
270Alternatively, you can use hashing to speed up the search.
271But why do that search at all if you don't have to? Once again, it
272seems pointless.
273
274Note that devfs doesn't use the major&minor system. For devfs
275entries, the connection is done when you lookup the /dev entry. When
276devfs_register() is called, an internal table is appended which has
277the entry name and the file_operations. If the dentry cache doesn't
278have the /dev entry already, this internal table is scanned to get the
279file_operations, and an inode is created. If the dentry cache already
280has the entry, there is *no lookup time* (other than the dentry scan
281itself, but we can't avoid that anyway, and besides Linux dentries
282cream other OS's which don't have them:-). Furthermore, the number of
283node entries in a devfs is only the number of available device
284entries, not the number of *conceivable* entries. Even if you remove
285unnecessary entries in a disc-based /dev, the number of conceivable
286entries remains the same: you just limit yourself in order to save
287space.
288
289Devfs provides a fast connection between a VFS node and the device
290driver, in a scalable way.
291
292/dev as a system administration tool
293
294Right now /dev contains a list of conceivable devices, most of which I
295don't have. Devfs only shows those devices available on my
296system. This means that listing /dev is a handy way of checking what
297devices are available.
298
299Major&minor size
300
301Existing major and minor numbers are limited to 8 bits each. This is
302now a limiting factor for some drivers, particularly the SCSI disc
303driver, which consumes a single major number. Only 16 discs are
304supported, and each disc may have only 15 partitions. Maybe this isn't
305a problem for you, but some of us are building huge Linux systems with
306disc arrays. With devfs an arbitrary pointer can be associated with
307each device entry, which can be used to give an effective 32 bit
308device identifier (i.e. that's like having a 32 bit minor
309number). Since this is private to the kernel, there are no C library
310compatibility issues which you would have with increasing major and
311minor number sizes. See the section on "Allocation of Device Numbers"
312for details on maintaining compatibility with userspace.
313
314Solving this requires a kernel change.
315
316Since writing this, the kernel has been modified so that the SCSI disc
317driver has more major numbers allocated to it and now supports up to
318128 discs. Since these major numbers are non-contiguous (a result of
319unplanned expansion), the implementation is a little more cumbersome
320than originally.
321
322Just like the changes to IPv4 to fix impending limitations in the
323address space, people find ways around the limitations. In the long
324run, however, solutions like IPv6 or devfs can't be put off forever.
325
326Read-only root filesystem
327
328Having your device nodes on the root filesystem means that you can't
329operate properly with a read-only root filesystem. This is because you
330want to change ownerships and protections of tty devices. Existing
331practice prevents you using a CD-ROM as your root filesystem for a
332*real* system. Sure, you can boot off a CD-ROM, but you can't change
333tty ownerships, so it's only good for installing.
334
335Also, you can't use a shared NFS root filesystem for a cluster of
336discless Linux machines (having tty ownerships changed on a common
337/dev is not good). Nor can you embed your root filesystem in a
338ROM-FS.
339
340You can get around this by creating a RAMDISC at boot time, making
341an ext2 filesystem in it, mounting it somewhere and copying the
342contents of /dev into it, then unmounting it and mounting it over
343/dev.
344
345A devfs is a cleaner way of solving this.
346
347Non-Unix root filesystem
348
349Non-Unix filesystems (such as NTFS) can't be used for a root
350filesystem because they variously don't support character and block
351special files or symbolic links. You can't have a separate disc-based
352or RAMDISC-based filesystem mounted on /dev because you need device
353nodes before you can mount these. Devfs can be mounted without any
354device nodes. Devlinks won't work because symlinks aren't supported.
355An alternative solution is to use initrd to mount a RAMDISC initial
356root filesystem (which is populated with a minimal set of device
357nodes), and then construct a new /dev in another RAMDISC, and finally
358switch to your non-Unix root filesystem. This requires clever boot
359scripts and a fragile and conceptually complex boot procedure.
360
361Devfs solves this in a robust and conceptually simple way.
362
363PTY security
364
365Current pseudo-tty (pty) devices are owned by root and read-writable
366by everyone. The user of a pty-pair cannot change
367ownership/protections without being suid-root.
368
369This could be solved with a secure user-space daemon which runs as
370root and does the actual creation of pty-pairs. Such a daemon would
371require modification to *every* programme that wants to use this new
372mechanism. It also slows down creation of pty-pairs.
373
374An alternative is to create a new open_pty() syscall which does much
375the same thing as the user-space daemon. Once again, this requires
376modifications to pty-handling programmes.
377
378The devfs solution allows a device driver to "tag" certain device
379files so that when an unopened device is opened, the ownerships are
380changed to the current euid and egid of the opening process, and the
381protections are changed to the default registered by the driver. When
382the device is closed ownership is set back to root and protections are
383set back to read-write for everybody. No programme need be changed.
384The devpts filesystem provides this auto-ownership feature for Unix98
385ptys. It doesn't support old-style pty devices, nor does it have all
386the other features of devfs.
387
388Intelligent device management
389
390Devfs implements a simple yet powerful protocol for communication with
391a device management daemon (devfsd) which runs in user space. It is
392possible to send a message (either synchronously or asynchronously) to
393devfsd on any event, such as registration/unregistration of device
394entries, opening and closing devices, looking up inodes, scanning
395directories and more. This has many possibilities. Some of these are
396already implemented. See:
397
398
399http://www.atnf.csiro.au/~rgooch/linux/
400
401Device entry registration events can be used by devfsd to change
402permissions of newly-created device nodes. This is one mechanism to
403control device permissions.
404
405Device entry registration/unregistration events can be used to run
406programmes or scripts. This can be used to provide automatic mounting
407of filesystems when a new block device media is inserted into the
408drive.
409
410Asynchronous device open and close events can be used to implement
411clever permissions management. For example, the default permissions on
412/dev/dsp do not allow everybody to read from the device. This is
413sensible, as you don't want some remote user recording what you say at
414your console. However, the console user is also prevented from
415recording. This behaviour is not desirable. With asynchronous device
416open and close events, you can have devfsd run a programme or script
417when console devices are opened to change the ownerships for *other*
418device nodes (such as /dev/dsp). On closure, you can run a different
419script to restore permissions. An advantage of this scheme over
420modifying the C library tty handling is that this works even if your
421programme crashes (how many times have you seen the utmp database with
422lingering entries for non-existent logins?).
423
424Synchronous device open events can be used to perform intelligent
425device access protections. Before the device driver open() method is
426called, the daemon must first validate the open attempt, by running an
427external programme or script. This is far more flexible than access
428control lists, as access can be determined on the basis of other
429system conditions instead of just the UID and GID.
430
431Inode lookup events can be used to authenticate module autoload
432requests. Instead of using kmod directly, the event is sent to
433devfsd which can implement an arbitrary authentication before loading
434the module itself.
435
436Inode lookup events can also be used to construct arbitrary
437namespaces, without having to resort to populating devfs with symlinks
438to devices that don't exist.
439
440Speculative Device Scanning
441
442Consider an application (like cdparanoia) that wants to find all
443CD-ROM devices on the system (SCSI, IDE and other types), whether or
444not their respective modules are loaded. The application must
445speculatively open certain device nodes (such as /dev/sr0 for the SCSI
446CD-ROMs) in order to make sure the module is loaded. This requires
447that all Linux distributions follow the standard device naming scheme
448(last time I looked RedHat did things differently). Devfs solves the
449naming problem.
450
451The same application also wants to see which devices are actually
452available on the system. With the existing system it needs to read the
453/dev directory and speculatively open each /dev/sr* device to
454determine if the device exists or not. With a large /dev this is an
455inefficient operation, especially if there are many /dev/sr* nodes. A
456solution like scsidev could reduce the number of /dev/sr* entries (but
457of course that also requires all that inefficient directory scanning).
458
459With devfs, the application can open the /dev/sr directory
460(which triggers the module autoloading if required), and proceed to
461read /dev/sr. Since only the available devices will have
462entries, there are no inefficencies in directory scanning or device
463openings.
464
465-----------------------------------------------------------------------------
466
467Who else does it?
468
469FreeBSD has a devfs implementation. Solaris and AIX each have a
470pseudo-devfs (something akin to scsidev but for all devices, with some
471unspecified kernel support). BeOS, Plan9 and QNX also have it. SGI's
472IRIX 6.4 and above also have a device filesystem.
473
474While we shouldn't just automatically do something because others do
475it, we should not ignore the work of others either. FreeBSD has a lot
476of competent people working on it, so their opinion should not be
477blithely ignored.
478
479-----------------------------------------------------------------------------
480
481
482How it works
483
484Registering device entries
485
486For every entry (device node) in a devfs-based /dev a driver must call
487devfs_register(). This adds the name of the device entry, the
488file_operations structure pointer and a few other things to an
489internal table. Device entries may be added and removed at any
490time. When a device entry is registered, it automagically appears in
491any mounted devfs'.
492
493Inode lookup
494
495When a lookup operation on an entry is performed and if there is no
496driver information for that entry devfs will attempt to call
497devfsd. If still no driver information can be found then a negative
498dentry is yielded and the next stage operation will be called by the
499VFS (such as create() or mknod() inode methods). If driver information
500can be found, an inode is created (if one does not exist already) and
501all is well.
502
503Manually creating device nodes
504
505The mknod() method allows you to create an ordinary named pipe in the
506devfs, or you can create a character or block special inode if one
507does not already exist. You may wish to create a character or block
508special inode so that you can set permissions and ownership. Later, if
509a device driver registers an entry with the same name, the
510permissions, ownership and times are retained. This is how you can set
511the protections on a device even before the driver is loaded. Once you
512create an inode it appears in the directory listing.
513
514Unregistering device entries
515
516A device driver calls devfs_unregister() to unregister an entry.
517
518Chroot() gaols
519
5202.2.x kernels
521
522The semantics of inode creation are different when devfs is mounted
523with the "explicit" option. Now, when a device entry is registered, it
524will not appear until you use mknod() to create the device. It doesn't
525matter if you mknod() before or after the device is registered with
526devfs_register(). The purpose of this behaviour is to support
527chroot(2) gaols, where you want to mount a minimal devfs inside the
528gaol. Only the devices you specifically want to be available (through
529your mknod() setup) will be accessible.
530
5312.4.x kernels
532
533As of kernel 2.3.99, the VFS has had the ability to rebind parts of
534the global filesystem namespace into another part of the namespace.
535This now works even at the leaf-node level, which means that
536individual files and device nodes may be bound into other parts of the
537namespace. This is like making links, but better, because it works
538across filesystems (unlike hard links) and works through chroot()
539gaols (unlike symbolic links).
540
541Because of these improvements to the VFS, the multi-mount capability
542in devfs is no longer needed. The administrator may create a minimal
543device tree inside a chroot(2) gaol by using VFS bindings. As this
544provides most of the features of the devfs multi-mount capability, I
545removed the multi-mount support code (after issuing an RFC). This
546yielded code size reductions and simplifications.
547
548If you want to construct a minimal chroot() gaol, the following
549command should suffice:
550
551mount --bind /dev/null /gaol/dev/null
552
553
554Repeat for other device nodes you want to expose. Simple!
555
556-----------------------------------------------------------------------------
557
558
559Operational issues
560
561
562Instructions for the impatient
563
564Nobody likes reading documentation. People just want to get in there
565and play. So this section tells you quickly the steps you need to take
566to run with devfs mounted over /dev. Skip these steps and you will end
567up with a nearly unbootable system. Subsequent sections describe the
568issues in more detail, and discuss non-essential configuration
569options.
570
571Devfsd
572OK, if you're reading this, I assume you want to play with
573devfs. First you should ensure that /usr/src/linux contains a
574recent kernel source tree. Then you need to compile devfsd, the device
575management daemon, available at
576
577http://www.atnf.csiro.au/~rgooch/linux/.
578Because the kernel has a naming scheme
579which is quite different from the old naming scheme, you need to
580install devfsd so that software and configuration files that use the
581old naming scheme will not break.
582
583Compile and install devfsd. You will be provided with a default
584configuration file /etc/devfsd.conf which will provide
585compatibility symlinks for the old naming scheme. Don't change this
586config file unless you know what you're doing. Even if you think you
587do know what you're doing, don't change it until you've followed all
588the steps below and booted a devfs-enabled system and verified that it
589works.
590
591Now edit your main system boot script so that devfsd is started at the
592very beginning (before any filesystem
593checks). /etc/rc.d/rc.sysinit is often the main boot script
594on systems with SysV-style boot scripts. On systems with BSD-style
595boot scripts it is often /etc/rc. Also check
596/sbin/rc.
597
598NOTE that the line you put into the boot
599script should be exactly:
600
601/sbin/devfsd /dev
602
603DO NOT use some special daemon-launching
604programme, otherwise the boot script may not wait for devfsd to finish
605initialising.
606
607System Libraries
608There may still be some problems because of broken software making
609assumptions about device names. In particular, some software does not
610handle devices which are symbolic links. If you are running a libc 5
611based system, install libc 5.4.44 (if you have libc 5.4.46, go back to
612libc 5.4.44, which is actually correct). If you are running a glibc
613based system, make sure you have glibc 2.1.3 or later.
614
615/etc/securetty
616PAM (Pluggable Authentication Modules) is supposed to be a flexible
617mechanism for providing better user authentication and access to
618services. Unfortunately, it's also fragile, complex and undocumented
619(check out RedHat 6.1, and probably other distributions as well). PAM
620has problems with symbolic links. Append the following lines to your
621/etc/securetty file:
622
623vc/1
624vc/2
625vc/3
626vc/4
627vc/5
628vc/6
629vc/7
630vc/8
631
632This will not weaken security. If you have a version of util-linux
633earlier than 2.10.h, please upgrade to 2.10.h or later. If you
634absolutely cannot upgrade, then also append the following lines to
635your /etc/securetty file:
636
6371
6382
6393
6404
6415
6426
6437
6448
645
646This may potentially weaken security by allowing root logins over the
647network (a password is still required, though). However, since there
648are problems with dealing with symlinks, I'm suspicious of the level
649of security offered in any case.
650
651XFree86
652While not essential, it's probably a good idea to upgrade to XFree86
6534.0, as patches went in to make it more devfs-friendly. If you don't,
654you'll probably need to apply the following patch to
655/etc/security/console.perms so that ordinary users can run
656startx. Note that not all distributions have this file (e.g. Debian),
657so if it's not present, don't worry about it.
658
659--- /etc/security/console.perms.orig Sat Apr 17 16:26:47 1999
660+++ /etc/security/console.perms Fri Feb 25 23:53:55 2000
661@@ -14,7 +14,7 @@
662 # man 5 console.perms
663
664 # file classes -- these are regular expressions
665-<console>=tty[0-9][0-9]* :[0-9]\.[0-9] :[0-9]
666+<console>=tty[0-9][0-9]* vc/[0-9][0-9]* :[0-9]\.[0-9] :[0-9]
667
668 # device classes -- these are shell-style globs
669 <floppy>=/dev/fd[0-1]*
670
671If the patch does not apply, then change the line:
672
673<console>=tty[0-9][0-9]* :[0-9]\.[0-9] :[0-9]
674
675with:
676
677<console>=tty[0-9][0-9]* vc/[0-9][0-9]* :[0-9]\.[0-9] :[0-9]
678
679
680Disable devpts
681I've had a report of devpts mounted on /dev/pts not working
682correctly. Since devfs will also manage /dev/pts, there is no
683need to mount devpts as well. You should either edit your
684/etc/fstab so devpts is not mounted, or disable devpts from
685your kernel configuration.
686
687Unsupported drivers
688Not all drivers have devfs support. If you depend on one of these
689drivers, you will need to create a script or tarfile that you can use
690at boot time to create device nodes as appropriate. There is a
691section which describes this. Another
692section lists the drivers which have
693devfs support.
694
695/dev/mouse
696
697Many disributions configure /dev/mouse to be the mouse device
698for XFree86 and GPM. I actually think this is a bad idea, because it
699adds another level of indirection. When looking at a config file, if
700you see /dev/mouse you're left wondering which mouse
701is being referred to. Hence I recommend putting the actual mouse
702device (for example /dev/psaux) into your
703/etc/X11/XF86Config file (and similarly for the GPM
704configuration file).
705
706Alternatively, use the same technique used for unsupported drivers
707described above.
708
709The Kernel
710Finally, you need to make sure devfs is compiled into your kernel. Set
711CONFIG_EXPERIMENTAL=y, CONFIG_DEVFS_FS=y and CONFIG_DEVFS_MOUNT=y by
712using favourite configuration tool (i.e. make config or
713make xconfig) and then make clean and then recompile your kernel and
714modules. At boot, devfs will be mounted onto /dev.
715
716If you encounter problems booting (for example if you forgot a
717configuration step), you can pass devfs=nomount at the kernel
718boot command line. This will prevent the kernel from mounting devfs at
719boot time onto /dev.
720
721In general, a kernel built with CONFIG_DEVFS_FS=y but without mounting
722devfs onto /dev is completely safe, and requires no
723configuration changes. One exception to take note of is when
724LABEL= directives are used in /etc/fstab. In this
725case you will be unable to boot properly. This is because the
726mount(8) programme uses /proc/partitions as part of
727the volume label search process, and the device names it finds are not
728available, because setting CONFIG_DEVFS_FS=y changes the names in
729/proc/partitions, irrespective of whether devfs is mounted.
730
731Now you've finished all the steps required. You're now ready to boot
732your shiny new kernel. Enjoy.
733
734Changing the configuration
735
736OK, you've now booted a devfs-enabled system, and everything works.
737Now you may feel like changing the configuration (common targets are
738/etc/fstab and /etc/devfsd.conf). Since you have a
739system that works, if you make any changes and it doesn't work, you
740now know that you only have to restore your configuration files to the
741default and it will work again.
742
743
744Permissions persistence across reboots
745
746If you don't use mknod(2) to create a device file, nor use chmod(2) or
747chown(2) to change the ownerships/permissions, the inode ctime will
748remain at 0 (the epoch, 12 am, 1-JAN-1970, GMT). Anything with a ctime
749later than this has had it's ownership/permissions changed. Hence, a
750simple script or programme may be used to tar up all changed inodes,
751prior to shutdown. Although effective, many consider this approach a
752kludge.
753
754A much better approach is to use devfsd to save and restore
755permissions. It may be configured to record changes in permissions and
756will save them in a database (in fact a directory tree), and restore
757these upon boot. This is an efficient method and results in immediate
758saving of current permissions (unlike the tar approach, which saves
759permissions at some unspecified future time).
760
761The default configuration file supplied with devfsd has config entries
762which you may uncomment to enable persistence management.
763
764If you decide to use the tar approach anyway, be aware that tar will
765first unlink(2) an inode before creating a new device node. The
766unlink(2) has the effect of breaking the connection between a devfs
767entry and the device driver. If you use the "devfs=only" boot option,
768you lose access to the device driver, requiring you to reload the
769module. I consider this a bug in tar (there is no real need to
770unlink(2) the inode first).
771
772Alternatively, you can use devfsd to provide more sophisticated
773management of device permissions. You can use devfsd to store
774permissions for whole groups of devices with a single configuration
775entry, rather than the conventional single entry per device entry.
776
777Permissions database stored in mounted-over /dev
778
779If you wish to save and restore your device permissions into the
780disc-based /dev while still mounting devfs onto /dev
781you may do so. This requires a 2.4.x kernel (in fact, 2.3.99 or
782later), which has the VFS binding facility. You need to do the
783following to set this up:
784
785
786
787make sure the kernel does not mount devfs at boot time
788
789
790make sure you have a correct /dev/console entry in your
791root file-system (where your disc-based /dev lives)
792
793create the /dev-state directory
794
795
796add the following lines near the very beginning of your boot
797scripts:
798
799mount --bind /dev /dev-state
800mount -t devfs none /dev
801devfsd /dev
802
803
804
805
806add the following lines to your /etc/devfsd.conf file:
807
808REGISTER ^pt[sy] IGNORE
809CREATE ^pt[sy] IGNORE
810CHANGE ^pt[sy] IGNORE
811DELETE ^pt[sy] IGNORE
812REGISTER .* COPY /dev-state/$devname $devpath
813CREATE .* COPY $devpath /dev-state/$devname
814CHANGE .* COPY $devpath /dev-state/$devname
815DELETE .* CFUNCTION GLOBAL unlink /dev-state/$devname
816RESTORE /dev-state
817
818Note that the sample devfsd.conf file contains these lines,
819as well as other sample configurations you may find useful. See the
820devfsd distribution
821
822
823reboot.
824
825
826
827
828Permissions database stored in normal directory
829
830If you are using an older kernel which doesn't support VFS binding,
831then you won't be able to have the permissions database in a
832mounted-over /dev. However, you can still use a regular
833directory to store the database. The sample /etc/devfsd.conf
834file above may still be used. You will need to create the
835/dev-state directory prior to installing devfsd. If you have
836old permissions in /dev, then just copy (or move) the device
837nodes over to the new directory.
838
839Which method is better?
840
841The best method is to have the permissions database stored in the
842mounted-over /dev. This is because you will not need to copy
843device nodes over to /dev-state, and because it allows you to
844switch between devfs and non-devfs kernels, without requiring you to
845copy permissions between /dev-state (for devfs) and
846/dev (for non-devfs).
847
848
849Dealing with drivers without devfs support
850
851Currently, not all device drivers in the kernel have been modified to
852use devfs. Device drivers which do not yet have devfs support will not
853automagically appear in devfs. The simplest way to create device nodes
854for these drivers is to unpack a tarfile containing the required
855device nodes. You can do this in your boot scripts. All your drivers
856will now work as before.
857
858Hopefully for most people devfs will have enough support so that they
859can mount devfs directly over /dev without losing most functionality
860(i.e. losing access to various devices). As of 22-JAN-1998 (devfs
861patch version 10) I am now running this way. All the devices I have
862are available in devfs, so I don't lose anything.
863
864WARNING: if your configuration requires the old-style device names
865(i.e. /dev/hda1 or /dev/sda1), you must install devfsd and configure
866it to maintain compatibility entries. It is almost certain that you
867will require this. Note that the kernel creates a compatibility entry
868for the root device, so you don't need initrd.
869
870Note that you no longer need to mount devpts if you use Unix98 PTYs,
871as devfs can manage /dev/pts itself. This saves you some RAM, as you
872don't need to compile and install devpts. Note that some versions of
873glibc have a bug with Unix98 pty handling on devfs systems. Contact
874the glibc maintainers for a fix. Glibc 2.1.3 has the fix.
875
876Note also that apart from editing /etc/fstab, other things will need
877to be changed if you *don't* install devfsd. Some software (like the X
878server) hard-wire device names in their source. It really is much
879easier to install devfsd so that compatibility entries are created.
880You can then slowly migrate your system to using the new device names
881(for example, by starting with /etc/fstab), and then limiting the
882compatibility entries that devfsd creates.
883
884IF YOU CONFIGURE TO MOUNT DEVFS AT BOOT, MAKE SURE YOU INSTALL DEVFSD
885BEFORE YOU BOOT A DEVFS-ENABLED KERNEL!
886
887Now that devfs has gone into the 2.3.46 kernel, I'm getting a lot of
888reports back. Many of these are because people are trying to run
889without devfsd, and hence some things break. Please just run devfsd if
890things break. I want to concentrate on real bugs rather than
891misconfiguration problems at the moment. If people are willing to fix
892bugs/false assumptions in other code (i.e. glibc, X server) and submit
893that to the respective maintainers, that would be great.
894
895
896All the way with Devfs
897
898The devfs kernel patch creates a rationalised device tree. As stated
899above, if you want to keep using the old /dev naming scheme,
900you just need to configure devfsd appopriately (see the man
901page). People who prefer the old names can ignore this section. For
902those of us who like the rationalised names and an uncluttered
903/dev, read on.
904
905If you don't run devfsd, or don't enable compatibility entry
906management, then you will have to configure your system to use the new
907names. For example, you will then need to edit your
908/etc/fstab to use the new disc naming scheme. If you want to
909be able to boot non-devfs kernels, you will need compatibility
910symlinks in the underlying disc-based /dev pointing back to
911the old-style names for when you boot a kernel without devfs.
912
913You can selectively decide which devices you want compatibility
914entries for. For example, you may only want compatibility entries for
915BSD pseudo-terminal devices (otherwise you'll have to patch you C
916library or use Unix98 ptys instead). It's just a matter of putting in
917the correct regular expression into /dev/devfsd.conf.
918
919There are other choices of naming schemes that you may prefer. For
920example, I don't use the kernel-supplied
921names, because they are too verbose. A common misconception is
922that the kernel-supplied names are meant to be used directly in
923configuration files. This is not the case. They are designed to
924reflect the layout of the devices attached and to provide easy
925classification.
926
927If you like the kernel-supplied names, that's fine. If you don't then
928you should be using devfsd to construct a namespace more to your
929liking. Devfsd has built-in code to construct a
930namespace that is both logical and easy to
931manage. In essence, it creates a convenient abbreviation of the
932kernel-supplied namespace.
933
934You are of course free to build your own namespace. Devfsd has all the
935infrastructure required to make this easy for you. All you need do is
936write a script. You can even write some C code and devfsd can load the
937shared object as a callable extension.
938
939
940Other Issues
941
942The init programme
943Another thing to take note of is whether your init programme
944creates a Unix socket /dev/telinit. Some versions of init
945create /dev/telinit so that the telinit programme can
946communicate with the init process. If you have such a system you need
947to make sure that devfs is mounted over /dev *before* init
948starts. In other words, you can't leave the mounting of devfs to
949/etc/rc, since this is executed after init. Other
950versions of init require a named pipe /dev/initctl
951which must exist *before* init starts. Once again, you need to
952mount devfs and then create the named pipe *before* init
953starts.
954
955The default behaviour now is not to mount devfs onto /dev at
956boot time for 2.3.x and later kernels. You can correct this with the
957"devfs=mount" boot option. This solves any problems with init,
958and also prevents the dreaded:
959
960Cannot open initial console
961
962message. For 2.2.x kernels where you need to apply the devfs patch,
963the default is to mount.
964
965If you have automatic mounting of devfs onto /dev then you
966may need to create /dev/initctl in your boot scripts. The
967following lines should suffice:
968
969mknod /dev/initctl p
970kill -SIGUSR1 1 # tell init that /dev/initctl now exists
971
972Alternatively, if you don't want the kernel to mount devfs onto
973/dev then you could use the following procedure is a
974guideline for how to get around /dev/initctl problems:
975
976# cd /sbin
977# mv init init.real
978# cat > init
979#! /bin/sh
980mount -n -t devfs none /dev
981mknod /dev/initctl p
982exec /sbin/init.real $*
983[control-D]
984# chmod a+x init
985
986Note that newer versions of init create /dev/initctl
987automatically, so you don't have to worry about this.
988
989Module autoloading
990You will need to configure devfsd to enable module
991autoloading. The following lines should be placed in your
992/etc/devfsd.conf file:
993
994LOOKUP .* MODLOAD
995
996
997As of devfsd-v1.3.10, a generic /etc/modules.devfs
998configuration file is installed, which is used by the MODLOAD
999action. This should be sufficient for most configurations. If you
1000require further configuration, edit your /etc/modules.conf
1001file. The way module autoloading work with devfs is:
1002
1003
1004a process attempts to lookup a device node (e.g. /dev/fred)
1005
1006
1007if that device node does not exist, the full pathname is passed to
1008devfsd as a string
1009
1010
1011devfsd will pass the string to the modprobe programme (provided the
1012configuration line shown above is present), and specifies that
1013/etc/modules.devfs is the configuration file
1014
1015
1016/etc/modules.devfs includes /etc/modules.conf to
1017access local configurations
1018
1019modprobe will search it's configuration files, looking for an alias
1020that translates the pathname into a module name
1021
1022
1023the translated pathname is then used to load the module.
1024
1025
1026If you wanted a lookup of /dev/fred to load the
1027mymod module, you would require the following configuration
1028line in /etc/modules.conf:
1029
1030alias /dev/fred mymod
1031
1032The /etc/modules.devfs configuration file provides many such
1033aliases for standard device names. If you look closely at this file,
1034you will note that some modules require multiple alias configuration
1035lines. This is required to support module autoloading for old and new
1036device names.
1037
1038Mounting root off a devfs device
1039If you wish to mount root off a devfs device when you pass the
1040"devfs=only" boot option, then you need to pass in the
1041"root=<device>" option to the kernel when booting. If you use
1042LILO, then you must have this in lilo.conf:
1043
1044append = "root=<device>"
1045
1046Surprised? Yep, so was I. It turns out if you have (as most people
1047do):
1048
1049root = <device>
1050
1051
1052then LILO will determine the device number of <device> and will
1053write that device number into a special place in the kernel image
1054before starting the kernel, and the kernel will use that device number
1055to mount the root filesystem. So, using the "append" variety ensures
1056that LILO passes the root filesystem device as a string, which devfs
1057can then use.
1058
1059Note that this isn't an issue if you don't pass "devfs=only".
1060
1061TTY issues
1062The ttyname(3) function in some versions of the C library makes
1063false assumptions about device entries which are symbolic links. The
1064tty(1) programme is one that depends on this function. I've
1065written a patch to libc 5.4.43 which fixes this. This has been
1066included in libc 5.4.44 and a similar fix is in glibc 2.1.3.
1067
1068
1069Kernel Naming Scheme
1070
1071The kernel provides a default naming scheme. This scheme is designed
1072to make it easy to search for specific devices or device types, and to
1073view the available devices. Some device types (such as hard discs),
1074have a directory of entries, making it easy to see what devices of
1075that class are available. Often, the entries are symbolic links into a
1076directory tree that reflects the topology of available devices. The
1077topological tree is useful for finding how your devices are arranged.
1078
1079Below is a list of the naming schemes for the most common drivers. A
1080list of reserved device names is
1081available for reference. Please send email to
1082rgooch@atnf.csiro.au to obtain an allocation. Please be
1083patient (the maintainer is busy). An alternative name may be allocated
1084instead of the requested name, at the discretion of the maintainer.
1085
1086Disc Devices
1087
1088All discs, whether SCSI, IDE or whatever, are placed under the
1089/dev/discs hierarchy:
1090
1091 /dev/discs/disc0 first disc
1092 /dev/discs/disc1 second disc
1093
1094
1095Each of these entries is a symbolic link to the directory for that
1096device. The device directory contains:
1097
1098 disc for the whole disc
1099 part* for individual partitions
1100
1101
1102CD-ROM Devices
1103
1104All CD-ROMs, whether SCSI, IDE or whatever, are placed under the
1105/dev/cdroms hierarchy:
1106
1107 /dev/cdroms/cdrom0 first CD-ROM
1108 /dev/cdroms/cdrom1 second CD-ROM
1109
1110
1111Each of these entries is a symbolic link to the real device entry for
1112that device.
1113
1114Tape Devices
1115
1116All tapes, whether SCSI, IDE or whatever, are placed under the
1117/dev/tapes hierarchy:
1118
1119 /dev/tapes/tape0 first tape
1120 /dev/tapes/tape1 second tape
1121
1122
1123Each of these entries is a symbolic link to the directory for that
1124device. The device directory contains:
1125
1126 mt for mode 0
1127 mtl for mode 1
1128 mtm for mode 2
1129 mta for mode 3
1130 mtn for mode 0, no rewind
1131 mtln for mode 1, no rewind
1132 mtmn for mode 2, no rewind
1133 mtan for mode 3, no rewind
1134
1135
1136SCSI Devices
1137
1138To uniquely identify any SCSI device requires the following
1139information:
1140
1141 controller (host adapter)
1142 bus (SCSI channel)
1143 target (SCSI ID)
1144 unit (Logical Unit Number)
1145
1146
1147All SCSI devices are placed under /dev/scsi (assuming devfs
1148is mounted on /dev). Hence, a SCSI device with the following
1149parameters: c=1,b=2,t=3,u=4 would appear as:
1150
1151 /dev/scsi/host1/bus2/target3/lun4 device directory
1152
1153
1154Inside this directory, a number of device entries may be created,
1155depending on which SCSI device-type drivers were installed.
1156
1157See the section on the disc naming scheme to see what entries the SCSI
1158disc driver creates.
1159
1160See the section on the tape naming scheme to see what entries the SCSI
1161tape driver creates.
1162
1163The SCSI CD-ROM driver creates:
1164
1165 cd
1166
1167
1168The SCSI generic driver creates:
1169
1170 generic
1171
1172
1173IDE Devices
1174
1175To uniquely identify any IDE device requires the following
1176information:
1177
1178 controller
1179 bus (aka. primary/secondary)
1180 target (aka. master/slave)
1181 unit
1182
1183
1184All IDE devices are placed under /dev/ide, and uses a similar
1185naming scheme to the SCSI subsystem.
1186
1187XT Hard Discs
1188
1189All XT discs are placed under /dev/xd. The first XT disc has
1190the directory /dev/xd/disc0.
1191
1192TTY devices
1193
1194The tty devices now appear as:
1195
1196 New name Old-name Device Type
1197 -------- -------- -----------
1198 /dev/tts/{0,1,...} /dev/ttyS{0,1,...} Serial ports
1199 /dev/cua/{0,1,...} /dev/cua{0,1,...} Call out devices
1200 /dev/vc/0 /dev/tty Current virtual console
1201 /dev/vc/{1,2,...} /dev/tty{1...63} Virtual consoles
1202 /dev/vcc/{0,1,...} /dev/vcs{1...63} Virtual consoles
1203 /dev/pty/m{0,1,...} /dev/ptyp?? PTY masters
1204 /dev/pty/s{0,1,...} /dev/ttyp?? PTY slaves
1205
1206
1207RAMDISCS
1208
1209The RAMDISCS are placed in their own directory, and are named thus:
1210
1211 /dev/rd/{0,1,2,...}
1212
1213
1214Meta Devices
1215
1216The meta devices are placed in their own directory, and are named
1217thus:
1218
1219 /dev/md/{0,1,2,...}
1220
1221
1222Floppy discs
1223
1224Floppy discs are placed in the /dev/floppy directory.
1225
1226Loop devices
1227
1228Loop devices are placed in the /dev/loop directory.
1229
1230Sound devices
1231
1232Sound devices are placed in the /dev/sound directory
1233(audio, sequencer, ...).
1234
1235
1236Devfsd Naming Scheme
1237
1238Devfsd provides a naming scheme which is a convenient abbreviation of
1239the kernel-supplied namespace. In some
1240cases, the kernel-supplied naming scheme is quite convenient, so
1241devfsd does not provide another naming scheme. The convenience names
1242that devfsd creates are in fact the same names as the original devfs
1243kernel patch created (before Linus mandated the Big Name
1244Change). These are referred to as "new compatibility entries".
1245
1246In order to configure devfsd to create these convenience names, the
1247following lines should be placed in your /etc/devfsd.conf:
1248
1249REGISTER .* MKNEWCOMPAT
1250UNREGISTER .* RMNEWCOMPAT
1251
1252This will cause devfsd to create (and destroy) symbolic links which
1253point to the kernel-supplied names.
1254
1255SCSI Hard Discs
1256
1257All SCSI discs are placed under /dev/sd (assuming devfs is
1258mounted on /dev). Hence, a SCSI disc with the following
1259parameters: c=1,b=2,t=3,u=4 would appear as:
1260
1261 /dev/sd/c1b2t3u4 for the whole disc
1262 /dev/sd/c1b2t3u4p5 for the 5th partition
1263 /dev/sd/c1b2t3u4p5s6 for the 6th slice in the 5th partition
1264
1265
1266SCSI Tapes
1267
1268All SCSI tapes are placed under /dev/st. A similar naming
1269scheme is used as for SCSI discs. A SCSI tape with the
1270parameters:c=1,b=2,t=3,u=4 would appear as:
1271
1272 /dev/st/c1b2t3u4m0 for mode 0
1273 /dev/st/c1b2t3u4m1 for mode 1
1274 /dev/st/c1b2t3u4m2 for mode 2
1275 /dev/st/c1b2t3u4m3 for mode 3
1276 /dev/st/c1b2t3u4m0n for mode 0, no rewind
1277 /dev/st/c1b2t3u4m1n for mode 1, no rewind
1278 /dev/st/c1b2t3u4m2n for mode 2, no rewind
1279 /dev/st/c1b2t3u4m3n for mode 3, no rewind
1280
1281
1282SCSI CD-ROMs
1283
1284All SCSI CD-ROMs are placed under /dev/sr. A similar naming
1285scheme is used as for SCSI discs. A SCSI CD-ROM with the
1286parameters:c=1,b=2,t=3,u=4 would appear as:
1287
1288 /dev/sr/c1b2t3u4
1289
1290
1291SCSI Generic Devices
1292
1293The generic (aka. raw) interface for all SCSI devices are placed under
1294/dev/sg. A similar naming scheme is used as for SCSI discs. A
1295SCSI generic device with the parameters:c=1,b=2,t=3,u=4 would appear
1296as:
1297
1298 /dev/sg/c1b2t3u4
1299
1300
1301IDE Hard Discs
1302
1303All IDE discs are placed under /dev/ide/hd, using a similar
1304convention to SCSI discs. The following mappings exist between the new
1305and the old names:
1306
1307 /dev/hda /dev/ide/hd/c0b0t0u0
1308 /dev/hdb /dev/ide/hd/c0b0t1u0
1309 /dev/hdc /dev/ide/hd/c0b1t0u0
1310 /dev/hdd /dev/ide/hd/c0b1t1u0
1311
1312
1313IDE Tapes
1314
1315A similar naming scheme is used as for IDE discs. The entries will
1316appear in the /dev/ide/mt directory.
1317
1318IDE CD-ROM
1319
1320A similar naming scheme is used as for IDE discs. The entries will
1321appear in the /dev/ide/cd directory.
1322
1323IDE Floppies
1324
1325A similar naming scheme is used as for IDE discs. The entries will
1326appear in the /dev/ide/fd directory.
1327
1328XT Hard Discs
1329
1330All XT discs are placed under /dev/xd. The first XT disc
1331would appear as /dev/xd/c0t0.
1332
1333
1334Old Compatibility Names
1335
1336The old compatibility names are the legacy device names, such as
1337/dev/hda, /dev/sda, /dev/rtc and so on.
1338Devfsd can be configured to create compatibility symlinks so that you
1339may continue to use the old names in your configuration files and so
1340that old applications will continue to function correctly.
1341
1342In order to configure devfsd to create these legacy names, the
1343following lines should be placed in your /etc/devfsd.conf:
1344
1345REGISTER .* MKOLDCOMPAT
1346UNREGISTER .* RMOLDCOMPAT
1347
1348This will cause devfsd to create (and destroy) symbolic links which
1349point to the kernel-supplied names.
1350
1351
1352-----------------------------------------------------------------------------
1353
1354
1355Device drivers currently ported
1356
1357- All miscellaneous character devices support devfs (this is done
1358 transparently through misc_register())
1359
1360- SCSI discs and generic hard discs
1361
1362- Character memory devices (null, zero, full and so on)
1363 Thanks to C. Scott Ananian <cananian@alumni.princeton.edu>
1364
1365- Loop devices (/dev/loop?)
1366
1367- TTY devices (console, serial ports, terminals and pseudo-terminals)
1368 Thanks to C. Scott Ananian <cananian@alumni.princeton.edu>
1369
1370- SCSI tapes (/dev/scsi and /dev/tapes)
1371
1372- SCSI CD-ROMs (/dev/scsi and /dev/cdroms)
1373
1374- SCSI generic devices (/dev/scsi)
1375
1376- RAMDISCS (/dev/ram?)
1377
1378- Meta Devices (/dev/md*)
1379
1380- Floppy discs (/dev/floppy)
1381
1382- Parallel port printers (/dev/printers)
1383
1384- Sound devices (/dev/sound)
1385 Thanks to Eric Dumas <dumas@linux.eu.org> and
1386 C. Scott Ananian <cananian@alumni.princeton.edu>
1387
1388- Joysticks (/dev/joysticks)
1389
1390- Sparc keyboard (/dev/kbd)
1391
1392- DSP56001 digital signal processor (/dev/dsp56k)
1393
1394- Apple Desktop Bus (/dev/adb)
1395
1396- Coda network file system (/dev/cfs*)
1397
1398- Virtual console capture devices (/dev/vcc)
1399 Thanks to Dennis Hou <smilax@mindmeld.yi.org>
1400
1401- Frame buffer devices (/dev/fb)
1402
1403- Video capture devices (/dev/v4l)
1404
1405
1406-----------------------------------------------------------------------------
1407
1408
1409Allocation of Device Numbers
1410
1411Devfs allows you to write a driver which doesn't need to allocate a
1412device number (major&minor numbers) for the internal operation of the
1413kernel. However, there are a number of userspace programmes that use
1414the device number as a unique handle for a device. An example is the
1415find programme, which uses device numbers to determine whether
1416an inode is on a different filesystem than another inode. The device
1417number used is the one for the block device which a filesystem is
1418using. To preserve compatibility with userspace programmes, block
1419devices using devfs need to have unique device numbers allocated to
1420them. Furthermore, POSIX specifies device numbers, so some kind of
1421device number needs to be presented to userspace.
1422
1423The simplest option (especially when porting drivers to devfs) is to
1424keep using the old major and minor numbers. Devfs will take whatever
1425values are given for major&minor and pass them onto userspace.
1426
1427This device number is a 16 bit number, so this leaves plenty of space
1428for large numbers of discs and partitions. This scheme can also be
1429used for character devices, in particular the tty devices, which are
1430currently limited to 256 pseudo-ttys (this limits the total number of
1431simultaneous xterms and remote logins). Note that the device number
1432is limited to the range 36864-61439 (majors 144-239), in order to
1433avoid any possible conflicts with existing official allocations.
1434
1435Please note that using dynamically allocated block device numbers may
1436break the NFS daemons (both user and kernel mode), which expect dev_t
1437for a given device to be constant over the lifetime of remote mounts.
1438
1439A final note on this scheme: since it doesn't increase the size of
1440device numbers, there are no compatibility issues with userspace.
1441
1442-----------------------------------------------------------------------------
1443
1444
1445Questions and Answers
1446
1447
1448Making things work
1449Alternatives to devfs
1450What I don't like about devfs
1451How to report bugs
1452Strange kernel messages
1453Compilation problems with devfsd
1454
1455
1456
1457Making things work
1458
1459Here are some common questions and answers.
1460
1461
1462
1463Devfsd doesn't start
1464
1465Make sure you have compiled and installed devfsd
1466Make sure devfsd is being started from your boot
1467scripts
1468Make sure you have configured your kernel to enable devfs (see
1469below)
1470Make sure devfs is mounted (see below)
1471
1472
1473Devfsd is not managing all my permissions
1474
1475Make sure you are capturing the appropriate events. For example,
1476device entries created by the kernel generate REGISTER events,
1477but those created by devfsd generate CREATE events.
1478
1479
1480Devfsd is not capturing all REGISTER events
1481
1482See the previous entry: you may need to capture CREATE events.
1483
1484
1485X will not start
1486
1487Make sure you followed the steps
1488outlined above.
1489
1490
1491Why don't my network devices appear in devfs?
1492
1493This is not a bug. Network devices have their own, completely separate
1494namespace. They are accessed via socket(2) and
1495setsockopt(2) calls, and thus require no device nodes. I have
1496raised the possibilty of moving network devices into the device
1497namespace, but have had no response.
1498
1499
1500How can I test if I have devfs compiled into my kernel?
1501
1502All filesystems built-in or currently loaded are listed in
1503/proc/filesystems. If you see a devfs entry, then
1504you know that devfs was compiled into your kernel. If you have
1505correctly configured and rebuilt your kernel, then devfs will be
1506built-in. If you think you've configured it in, but
1507/proc/filesystems doesn't show it, you've made a mistake.
1508Common mistakes include:
1509
1510Using a 2.2.x kernel without applying the devfs patch (if you
1511don't know how to patch your kernel, use 2.4.x instead, don't bother
1512asking me how to patch)
1513Forgetting to set CONFIG_EXPERIMENTAL=y
1514Forgetting to set CONFIG_DEVFS_FS=y
1515Forgetting to set CONFIG_DEVFS_MOUNT=y (if you want devfs
1516to be automatically mounted at boot)
1517Editing your .config manually, instead of using make
1518config or make xconfig
1519Forgetting to run make dep; make clean after changing the
1520configuration and before compiling
1521Forgetting to compile your kernel and modules
1522Forgetting to install your kernel
1523Forgetting to install your modules
1524
1525Please check twice that you've done all these steps before sending in
1526a bug report.
1527
1528
1529
1530How can I test if devfs is mounted on /dev?
1531
1532The device filesystem will always create an entry called
1533".devfsd", which is used to communicate with the daemon. Even
1534if the daemon is not running, this entry will exist. Testing for the
1535existence of this entry is the approved method of determining if devfs
1536is mounted or not. Note that the type of entry (i.e. regular file,
1537character device, named pipe, etc.) may change without notice. Only
1538the existence of the entry should be relied upon.
1539
1540
1541When I start devfsd, I see the error:
1542Error opening file: ".devfsd" No such file or directory?
1543
1544This means that devfs is not mounted. Make sure you have devfs mounted.
1545
1546
1547How do I mount devfs?
1548
1549First make sure you have devfs compiled into your kernel (see
1550above). Then you will either need to:
1551
1552set CONFIG_DEVFS_MOUNT=y in your kernel config
1553pass devfs=mount to your boot loader
1554mount devfs manually in your boot scripts with:
1555mount -t none devfs /dev
1556
1557
1558
1559Mount by volume LABEL=<label> doesn't work with
1560devfs
1561
1562Most probably you are not mounting devfs onto /dev. What
1563happens is that if your kernel config has CONFIG_DEVFS_FS=y
1564then the contents of /proc/partitions will have the devfs
1565names (such as scsi/host0/bus0/target0/lun0/part1). The
1566contents of /proc/partitions are used by mount(8) when
1567mounting by volume label. If devfs is not mounted on /dev,
1568then mount(8) will fail to find devices. The solution is to
1569make sure that devfs is mounted on /dev. See above for how to
1570do that.
1571
1572
1573I have extra or incorrect entries in /dev
1574
1575You may have stale entries in your dev-state area. Check for a
1576RESTORE configuration line in your devfsd configuration
1577(typically /etc/devfsd.conf). If you have this line, check
1578the contents of the specified directory for stale entries. Remove
1579any entries which are incorrect, then reboot.
1580
1581
1582I get "Unable to open initial console" messages at boot
1583
1584This usually happens when you don't have devfs automounted onto
1585/dev at boot time, and there is no valid
1586/dev/console entry on your root file-system. Create a valid
1587/dev/console device node.
1588
1589
1590
1591
1592
1593Alternatives to devfs
1594
1595I've attempted to collate all the anti-devfs proposals and explain
1596their limitations. Under construction.
1597
1598
1599Why not just pass device create/remove events to a daemon?
1600
1601Here the suggestion is to develop an API in the kernel so that devices
1602can register create and remove events, and a daemon listens for those
1603events. The daemon would then populate/depopulate /dev (which
1604resides on disc).
1605
1606This has several limitations:
1607
1608
1609it only works for modules loaded and unloaded (or devices inserted
1610and removed) after the kernel has finished booting. Without a database
1611of events, there is no way the daemon could fully populate
1612/dev
1613
1614
1615if you add a database to this scheme, the question is then how to
1616present that database to user-space. If you make it a list of strings
1617with embedded event codes which are passed through a pipe to the
1618daemon, then this is only of use to the daemon. I would argue that the
1619natural way to present this data is via a filesystem (since many of
1620the events will be of a hierarchical nature), such as devfs.
1621Presenting the data as a filesystem makes it easy for the user to see
1622what is available and also makes it easy to write scripts to scan the
1623"database"
1624
1625
1626the tight binding between device nodes and drivers is no longer
1627possible (requiring the otherwise perfectly avoidable
1628table lookups)
1629
1630
1631you cannot catch inode lookup events on /dev which means
1632that module autoloading requires device nodes to be created. This is a
1633problem, particularly for drivers where only a few inodes are created
1634from a potentially large set
1635
1636
1637this technique can't be used when the root FS is mounted
1638read-only
1639
1640
1641
1642
1643Just implement a better scsidev
1644
1645This suggestion involves taking the scsidev programme and
1646extending it to scan for all devices, not just SCSI devices. The
1647scsidev programme works by scanning /proc/scsi
1648
1649Problems:
1650
1651
1652the kernel does not currently provide a list of all devices
1653available. Not all drivers register entries in /proc or
1654generate kernel messages
1655
1656
1657there is no uniform mechanism to register devices other than the
1658devfs API
1659
1660
1661implementing such an API is then the same as the
1662proposal above
1663
1664
1665
1666
1667Put /dev on a ramdisc
1668
1669This suggestion involves creating a ramdisc and populating it with
1670device nodes and then mounting it over /dev.
1671
1672Problems:
1673
1674
1675
1676this doesn't help when mounting the root filesystem, since you
1677still need a device node to do that
1678
1679
1680if you want to use this technique for the root device node as
1681well, you need to use initrd. This complicates the booting sequence
1682and makes it significantly harder to administer and configure. The
1683initrd is essentially opaque, robbing the system administrator of easy
1684configuration
1685
1686
1687insufficient information is available to correctly populate the
1688ramdisc. So we come back to the
1689proposal above to "solve" this
1690
1691
1692a ramdisc-based solution would take more kernel memory, since the
1693backing store would be (at best) normal VFS inodes and dentries, which
1694take 284 bytes and 112 bytes, respectively, for each entry. Compare
1695that to 72 bytes for devfs
1696
1697
1698
1699
1700Do nothing: there's no problem
1701
1702Sometimes people can be heard to claim that the existing scheme is
1703fine. This is what they're ignoring:
1704
1705
1706device number size (8 bits each for major and minor) is a real
1707limitation, and must be fixed somehow. Systems with large numbers of
1708SCSI devices, for example, will continue to consume the remaining
1709unallocated major numbers. USB will also need to push beyond the 8 bit
1710minor limitation
1711
1712
1713simply increasing the device number size is insufficient. Apart
1714from causing a lot of pain, it doesn't solve the management issues
1715of a /dev with thousands or more device nodes
1716
1717
1718ignoring the problem of a huge /dev will not make it go
1719away, and dismisses the legitimacy of a large number of people who
1720want a dynamic /dev
1721
1722
1723the standard response then becomes: "write a device management
1724daemon", which brings us back to the
1725proposal above
1726
1727
1728
1729
1730What I don't like about devfs
1731
1732Here are some common complaints about devfs, and some suggestions and
1733solutions that may make it more palatable for you. I can't please
1734everybody, but I do try :-)
1735
1736I hate the naming scheme
1737
1738First, remember that no naming scheme will please everybody. You hate
1739the scheme, others love it. Who's to say who's right and who's wrong?
1740Ultimately, the person who writes the code gets to choose, and what
1741exists now is a combination of the choices made by the
1742devfs author and the
1743kernel maintainer (Linus).
1744
1745However, not all is lost. If you want to create your own naming
1746scheme, it is a simple matter to write a standalone script, hack
1747devfsd, or write a script called by devfsd. You can create whatever
1748naming scheme you like.
1749
1750Further, if you want to remove all traces of the devfs naming scheme
1751from /dev, you can mount devfs elsewhere (say
1752/devfs) and populate /dev with links into
1753/devfs. This population can be automated using devfsd if you
1754wish.
1755
1756You can even use the VFS binding facility to make the links, rather
1757than using symbolic links. This way, you don't even have to see the
1758"destination" of these symbolic links.
1759
1760Devfs puts policy into the kernel
1761
1762There's already policy in the kernel. Device numbers are in fact
1763policy (why should the kernel dictate what device numbers I use?).
1764Face it, some policy has to be in the kernel. The real difference
1765between device names as policy and device numbers as policy is that
1766no one will use device numbers directly, because device
1767numbers are devoid of meaning to humans and are ugly. At least with
1768the devfs device names, (even though you can add your own naming
1769scheme) some people will use the devfs-supplied names directly. This
1770offends some people :-)
1771
1772Devfs is bloatware
1773
1774This is not even remotely true. As shown above,
1775both code and data size are quite modest.
1776
1777
1778How to report bugs
1779
1780If you have (or think you have) a bug with devfs, please follow the
1781steps below:
1782
1783
1784
1785make sure you have enabled debugging output when configuring your
1786kernel. You will need to set (at least) the following config options:
1787
1788CONFIG_DEVFS_DEBUG=y
1789CONFIG_DEBUG_KERNEL=y
1790CONFIG_DEBUG_SLAB=y
1791
1792
1793
1794please make sure you have the latest devfs patches applied. The
1795latest kernel version might not have the latest devfs patches applied
1796yet (Linus is very busy)
1797
1798
1799save a copy of your complete kernel logs (preferably by
1800using the dmesg programme) for later inclusion in your bug
1801report. You may need to use the -s switch to increase the
1802internal buffer size so you can capture all the boot messages.
1803Don't edit or trim the dmesg output
1804
1805
1806
1807
1808try booting with devfs=dall passed to the kernel boot
1809command line (read the documentation on your bootloader on how to do
1810this), and save the result to a file. This may be quite verbose, and
1811it may overflow the messages buffer, but try to get as much of it as
1812you can
1813
1814
1815send a copy of your devfsd configuration file(s)
1816
1817send the bug report to me first.
1818Don't expect that I will see it if you post it to the linux-kernel
1819mailing list. Include all the information listed above, plus
1820anything else that you think might be relevant. Put the string
1821devfs somewhere in the subject line, so my mail filters mark
1822it as urgent
1823
1824
1825
1826
1827Here is a general guide on how to ask questions in a way that greatly
1828improves your chances of getting a reply:
1829
1830http://www.tuxedo.org/~esr/faqs/smart-questions.html. If you have
1831a bug to report, you should also read
1832
1833http://www.chiark.greenend.org.uk/~sgtatham/bugs.html.
1834
1835
1836Strange kernel messages
1837
1838You may see devfs-related messages in your kernel logs. Below are some
1839messages and what they mean (and what you should do about them, if
1840anything).
1841
1842
1843
1844devfs_register(fred): could not append to parent, err: -17
1845
1846You need to check what the error code means, but usually 17 means
1847EEXIST. This means that a driver attempted to create an entry
1848fred in a directory, but there already was an entry with that
1849name. This is often caused by flawed boot scripts which untar a bunch
1850of inodes into /dev, as a way to restore permissions. This
1851message is harmless, as the device nodes will still
1852provide access to the driver (unless you use the devfs=only
1853boot option, which is only for dedicated souls:-). If you want to get
1854rid of these annoying messages, upgrade to devfsd-v1.3.20 and use the
1855recommended RESTORE directive to restore permissions.
1856
1857
1858devfs_mk_dir(bill): using old entry in dir: c1808724 ""
1859
1860This is similar to the message above, except that a driver attempted
1861to create a directory named bill, and the parent directory
1862has an entry with the same name. In this case, to ensure that drivers
1863continue to work properly, the old entry is re-used and given to the
1864driver. In 2.5 kernels, the driver is given a NULL entry, and thus,
1865under rare circumstances, may not create the require device nodes.
1866The solution is the same as above.
1867
1868
1869
1870
1871
1872Compilation problems with devfsd
1873
1874Usually, you can compile devfsd just by typing in
1875make in the source directory, followed by a make
1876install (as root). Sometimes, you may have problems, particularly
1877on broken configurations.
1878
1879
1880
1881error messages relating to DEVFSD_NOTIFY_DELETE
1882
1883This happened because you have an ancient set of kernel headers
1884installed in /usr/include/linux or /usr/src/linux.
1885Install kernel 2.4.10 or later. You may need to pass the
1886KERNEL_DIR variable to make (if you did not install
1887the new kernel sources as /usr/src/linux), or you may copy
1888the devfs_fs.h file in the kernel source tree into
1889/usr/include/linux.
1890
1891
1892
1893
1894-----------------------------------------------------------------------------
1895
1896
1897Other resources
1898
1899
1900
1901Douglas Gilbert has written a useful document at
1902
1903http://www.torque.net/sg/devfs_scsi.html which
1904explores the SCSI subsystem and how it interacts with devfs
1905
1906
1907Douglas Gilbert has written another useful document at
1908
1909http://www.torque.net/scsi/SCSI-2.4-HOWTO/ which
1910discusses the Linux SCSI subsystem in 2.4.
1911
1912
1913Johannes Erdfelt has started a discussion paper on Linux and
1914hot-swap devices, describing what the requirements are for a scalable
1915solution and how and why he's used devfs+devfsd. Note that this is an
1916early draft only, available in plain text form at:
1917
1918http://johannes.erdfelt.com/hotswap.txt.
1919Johannes has promised a HTML version will follow.
1920
1921
1922I presented an invited
1923paper
1924at the
1925
19262nd Annual Storage Management Workshop held in Miamia, Florida,
1927U.S.A. in October 2000.
1928
1929
1930
1931
1932-----------------------------------------------------------------------------
1933
1934
1935Translations of this document
1936
1937This document has been translated into other languages.
1938
1939
1940
1941
1942The document master (in English) by rgooch@atnf.csiro.au is
1943available at
1944
1945http://www.atnf.csiro.au/~rgooch/linux/docs/devfs.html
1946
1947
1948
1949A Korean translation by viatoris@nownuri.net is available at
1950
1951http://your.destiny.pe.kr/devfs/devfs.html
1952
1953
1954
1955
1956-----------------------------------------------------------------------------
1957Most flags courtesy of ITA's
1958Flags of All Countries
1959used with permission.
diff --git a/Documentation/filesystems/devfs/ToDo b/Documentation/filesystems/devfs/ToDo
deleted file mode 100644
index afd5a8f2c19b..000000000000
--- a/Documentation/filesystems/devfs/ToDo
+++ /dev/null
@@ -1,40 +0,0 @@
1 Device File System (devfs) ToDo List
2
3 Richard Gooch <rgooch@atnf.csiro.au>
4
5 3-JUL-2000
6
7This is a list of things to be done for better devfs support in the
8Linux kernel. If you'd like to contribute to the devfs, please have a
9look at this list for anything that is unallocated. Also, if there are
10items missing (surely), please contact me so I can add them to the
11list (preferably with your name attached to them:-).
12
13
14- >256 ptys
15 Thanks to C. Scott Ananian <cananian@alumni.princeton.edu>
16
17- Amiga floppy driver (drivers/block/amiflop.c)
18
19- Atari floppy driver (drivers/block/ataflop.c)
20
21- SWIM3 (Super Woz Integrated Machine 3) floppy driver (drivers/block/swim3.c)
22
23- Amiga ZorroII ramdisc driver (drivers/block/z2ram.c)
24
25- Parallel port ATAPI CD-ROM (drivers/block/paride/pcd.c)
26
27- Parallel port ATAPI floppy (drivers/block/paride/pf.c)
28
29- AP1000 block driver (drivers/ap1000/ap.c, drivers/ap1000/ddv.c)
30
31- Archimedes floppy (drivers/acorn/block/fd1772.c)
32
33- MFM hard drive (drivers/acorn/block/mfmhd.c)
34
35- I2O block device (drivers/message/i2o/i2o_block.c)
36
37- ST-RAM device (arch/m68k/atari/stram.c)
38
39- Raw devices
40
diff --git a/Documentation/filesystems/devfs/boot-options b/Documentation/filesystems/devfs/boot-options
deleted file mode 100644
index df3d33b03e0a..000000000000
--- a/Documentation/filesystems/devfs/boot-options
+++ /dev/null
@@ -1,65 +0,0 @@
1/* -*- auto-fill -*- */
2
3 Device File System (devfs) Boot Options
4
5 Richard Gooch <rgooch@atnf.csiro.au>
6
7 18-AUG-2001
8
9
10When CONFIG_DEVFS_DEBUG is enabled, you can pass several boot options
11to the kernel to debug devfs. The boot options are prefixed by
12"devfs=", and are separated by commas. Spaces are not allowed. The
13syntax looks like this:
14
15devfs=<option1>,<option2>,<option3>
16
17and so on. For example, if you wanted to turn on debugging for module
18load requests and device registration, you would do:
19
20devfs=dmod,dreg
21
22You may prefix "no" to any option. This will invert the option.
23
24
25Debugging Options
26=================
27
28These requires CONFIG_DEVFS_DEBUG to be enabled.
29Note that all debugging options have 'd' as the first character. By
30default all options are off. All debugging output is sent to the
31kernel logs. The debugging options do not take effect until the devfs
32version message appears (just prior to the root filesystem being
33mounted).
34
35These are the options:
36
37dmod print module load requests to <request_module>
38
39dreg print device register requests to <devfs_register>
40
41dunreg print device unregister requests to <devfs_unregister>
42
43dchange print device change requests to <devfs_set_flags>
44
45dilookup print inode lookup requests
46
47diget print VFS inode allocations
48
49diunlink print inode unlinks
50
51dichange print inode changes
52
53dimknod print calls to mknod(2)
54
55dall some debugging turned on
56
57
58Other Options
59=============
60
61These control the default behaviour of devfs. The options are:
62
63mount mount devfs onto /dev at boot time
64
65only disable non-devfs device nodes for devfs-capable drivers
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index afb1335c05d6..4aecc9bdb273 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -113,6 +113,14 @@ noquota
113grpquota 113grpquota
114usrquota 114usrquota
115 115
116bh (*) ext3 associates buffer heads to data pages to
117nobh (a) cache disk block mapping information
118 (b) link pages into transaction to provide
119 ordering guarantees.
120 "bh" option forces use of buffer heads.
121 "nobh" option tries to avoid associating buffer
122 heads (supported only for "writeback" mode).
123
116 124
117Specification 125Specification
118============= 126=============
diff --git a/Documentation/filesystems/fuse.txt b/Documentation/filesystems/fuse.txt
index 33f74310d161..a584f05403a4 100644
--- a/Documentation/filesystems/fuse.txt
+++ b/Documentation/filesystems/fuse.txt
@@ -18,6 +18,14 @@ Non-privileged mount (or user mount):
18 user. NOTE: this is not the same as mounts allowed with the "user" 18 user. NOTE: this is not the same as mounts allowed with the "user"
19 option in /etc/fstab, which is not discussed here. 19 option in /etc/fstab, which is not discussed here.
20 20
21Filesystem connection:
22
23 A connection between the filesystem daemon and the kernel. The
24 connection exists until either the daemon dies, or the filesystem is
25 umounted. Note that detaching (or lazy umounting) the filesystem
26 does _not_ break the connection, in this case it will exist until
27 the last reference to the filesystem is released.
28
21Mount owner: 29Mount owner:
22 30
23 The user who does the mounting. 31 The user who does the mounting.
@@ -86,16 +94,20 @@ Mount options
86 The default is infinite. Note that the size of read requests is 94 The default is infinite. Note that the size of read requests is
87 limited anyway to 32 pages (which is 128kbyte on i386). 95 limited anyway to 32 pages (which is 128kbyte on i386).
88 96
89Sysfs 97Control filesystem
90~~~~~ 98~~~~~~~~~~~~~~~~~~
99
100There's a control filesystem for FUSE, which can be mounted by:
91 101
92FUSE sets up the following hierarchy in sysfs: 102 mount -t fusectl none /sys/fs/fuse/connections
93 103
94 /sys/fs/fuse/connections/N/ 104Mounting it under the '/sys/fs/fuse/connections' directory makes it
105backwards compatible with earlier versions.
95 106
96where N is an increasing number allocated to each new connection. 107Under the fuse control filesystem each connection has a directory
108named by a unique number.
97 109
98For each connection the following attributes are defined: 110For each connection the following files exist within this directory:
99 111
100 'waiting' 112 'waiting'
101 113
@@ -110,7 +122,47 @@ For each connection the following attributes are defined:
110 connection. This means that all waiting requests will be aborted an 122 connection. This means that all waiting requests will be aborted an
111 error returned for all aborted and new requests. 123 error returned for all aborted and new requests.
112 124
113Only a privileged user may read or write these attributes. 125Only the owner of the mount may read or write these files.
126
127Interrupting filesystem operations
128~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
129
130If a process issuing a FUSE filesystem request is interrupted, the
131following will happen:
132
133 1) If the request is not yet sent to userspace AND the signal is
134 fatal (SIGKILL or unhandled fatal signal), then the request is
135 dequeued and returns immediately.
136
137 2) If the request is not yet sent to userspace AND the signal is not
138 fatal, then an 'interrupted' flag is set for the request. When
139 the request has been successfully transfered to userspace and
140 this flag is set, an INTERRUPT request is queued.
141
142 3) If the request is already sent to userspace, then an INTERRUPT
143 request is queued.
144
145INTERRUPT requests take precedence over other requests, so the
146userspace filesystem will receive queued INTERRUPTs before any others.
147
148The userspace filesystem may ignore the INTERRUPT requests entirely,
149or may honor them by sending a reply to the _original_ request, with
150the error set to EINTR.
151
152It is also possible that there's a race between processing the
153original request and it's INTERRUPT request. There are two possibilities:
154
155 1) The INTERRUPT request is processed before the original request is
156 processed
157
158 2) The INTERRUPT request is processed after the original request has
159 been answered
160
161If the filesystem cannot find the original request, it should wait for
162some timeout and/or a number of new requests to arrive, after which it
163should reply to the INTERRUPT request with an EAGAIN error. In case
1641) the INTERRUPT request will be requeued. In case 2) the INTERRUPT
165reply will be ignored.
114 166
115Aborting a filesystem connection 167Aborting a filesystem connection
116~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 168~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -139,8 +191,8 @@ the filesystem. There are several ways to do this:
139 - Use forced umount (umount -f). Works in all cases but only if 191 - Use forced umount (umount -f). Works in all cases but only if
140 filesystem is still attached (it hasn't been lazy unmounted) 192 filesystem is still attached (it hasn't been lazy unmounted)
141 193
142 - Abort filesystem through the sysfs interface. Most powerful 194 - Abort filesystem through the FUSE control filesystem. Most
143 method, always works. 195 powerful method, always works.
144 196
145How do non-privileged mounts work? 197How do non-privileged mounts work?
146~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 198~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -304,25 +356,7 @@ Scenario 1 - Simple deadlock
304 | | for "file"] 356 | | for "file"]
305 | | *DEADLOCK* 357 | | *DEADLOCK*
306 358
307The solution for this is to allow requests to be interrupted while 359The solution for this is to allow the filesystem to be aborted.
308they are in userspace:
309
310 | [interrupted by signal] |
311 | <fuse_unlink() |
312 | [release semaphore] | [semaphore acquired]
313 | <sys_unlink() |
314 | | >fuse_unlink()
315 | | [queue req on fc->pending]
316 | | [wake up fc->waitq]
317 | | [sleep on req->waitq]
318
319If the filesystem daemon was single threaded, this will stop here,
320since there's no other thread to dequeue and execute the request.
321In this case the solution is to kill the FUSE daemon as well. If
322there are multiple serving threads, you just have to kill them as
323long as any remain.
324
325Moral: a filesystem which deadlocks, can soon find itself dead.
326 360
327Scenario 2 - Tricky deadlock 361Scenario 2 - Tricky deadlock
328---------------------------- 362----------------------------
@@ -355,24 +389,14 @@ but is caused by a pagefault.
355 | | [lock page] 389 | | [lock page]
356 | | * DEADLOCK * 390 | | * DEADLOCK *
357 391
358Solution is again to let the the request be interrupted (not 392Solution is basically the same as above.
359elaborated further).
360
361An additional problem is that while the write buffer is being
362copied to the request, the request must not be interrupted. This
363is because the destination address of the copy may not be valid
364after the request is interrupted.
365
366This is solved with doing the copy atomically, and allowing
367interruption while the page(s) belonging to the write buffer are
368faulted with get_user_pages(). The 'req->locked' flag indicates
369when the copy is taking place, and interruption is delayed until
370this flag is unset.
371 393
372Scenario 3 - Tricky deadlock with asynchronous read 394An additional problem is that while the write buffer is being copied
373--------------------------------------------------- 395to the request, the request must not be interrupted/aborted. This is
396because the destination address of the copy may not be valid after the
397request has returned.
374 398
375The same situation as above, except thread-1 will wait on page lock 399This is solved with doing the copy atomically, and allowing abort
376and hence it will be uninterruptible as well. The solution is to 400while the page(s) belonging to the write buffer are faulted with
377abort the connection with forced umount (if mount is attached) or 401get_user_pages(). The 'req->locked' flag indicates when the copy is
378through the abort attribute in sysfs. 402taking place, and abort is delayed until this flag is unset.
diff --git a/Documentation/filesystems/inotify.txt b/Documentation/filesystems/inotify.txt
index 6d501903f68e..59a919f16144 100644
--- a/Documentation/filesystems/inotify.txt
+++ b/Documentation/filesystems/inotify.txt
@@ -69,17 +69,135 @@ Prototypes:
69 int inotify_rm_watch (int fd, __u32 mask); 69 int inotify_rm_watch (int fd, __u32 mask);
70 70
71 71
72(iii) Internal Kernel Implementation 72(iii) Kernel Interface
73 73
74Each inotify instance is associated with an inotify_device structure. 74Inotify's kernel API consists a set of functions for managing watches and an
75event callback.
76
77To use the kernel API, you must first initialize an inotify instance with a set
78of inotify_operations. You are given an opaque inotify_handle, which you use
79for any further calls to inotify.
80
81 struct inotify_handle *ih = inotify_init(my_event_handler);
82
83You must provide a function for processing events and a function for destroying
84the inotify watch.
85
86 void handle_event(struct inotify_watch *watch, u32 wd, u32 mask,
87 u32 cookie, const char *name, struct inode *inode)
88
89 watch - the pointer to the inotify_watch that triggered this call
90 wd - the watch descriptor
91 mask - describes the event that occurred
92 cookie - an identifier for synchronizing events
93 name - the dentry name for affected files in a directory-based event
94 inode - the affected inode in a directory-based event
95
96 void destroy_watch(struct inotify_watch *watch)
97
98You may add watches by providing a pre-allocated and initialized inotify_watch
99structure and specifying the inode to watch along with an inotify event mask.
100You must pin the inode during the call. You will likely wish to embed the
101inotify_watch structure in a structure of your own which contains other
102information about the watch. Once you add an inotify watch, it is immediately
103subject to removal depending on filesystem events. You must grab a reference if
104you depend on the watch hanging around after the call.
105
106 inotify_init_watch(&my_watch->iwatch);
107 inotify_get_watch(&my_watch->iwatch); // optional
108 s32 wd = inotify_add_watch(ih, &my_watch->iwatch, inode, mask);
109 inotify_put_watch(&my_watch->iwatch); // optional
110
111You may use the watch descriptor (wd) or the address of the inotify_watch for
112other inotify operations. You must not directly read or manipulate data in the
113inotify_watch. Additionally, you must not call inotify_add_watch() more than
114once for a given inotify_watch structure, unless you have first called either
115inotify_rm_watch() or inotify_rm_wd().
116
117To determine if you have already registered a watch for a given inode, you may
118call inotify_find_watch(), which gives you both the wd and the watch pointer for
119the inotify_watch, or an error if the watch does not exist.
120
121 wd = inotify_find_watch(ih, inode, &watchp);
122
123You may use container_of() on the watch pointer to access your own data
124associated with a given watch. When an existing watch is found,
125inotify_find_watch() bumps the refcount before releasing its locks. You must
126put that reference with:
127
128 put_inotify_watch(watchp);
129
130Call inotify_find_update_watch() to update the event mask for an existing watch.
131inotify_find_update_watch() returns the wd of the updated watch, or an error if
132the watch does not exist.
133
134 wd = inotify_find_update_watch(ih, inode, mask);
135
136An existing watch may be removed by calling either inotify_rm_watch() or
137inotify_rm_wd().
138
139 int ret = inotify_rm_watch(ih, &my_watch->iwatch);
140 int ret = inotify_rm_wd(ih, wd);
141
142A watch may be removed while executing your event handler with the following:
143
144 inotify_remove_watch_locked(ih, iwatch);
145
146Call inotify_destroy() to remove all watches from your inotify instance and
147release it. If there are no outstanding references, inotify_destroy() will call
148your destroy_watch op for each watch.
149
150 inotify_destroy(ih);
151
152When inotify removes a watch, it sends an IN_IGNORED event to your callback.
153You may use this event as an indication to free the watch memory. Note that
154inotify may remove a watch due to filesystem events, as well as by your request.
155If you use IN_ONESHOT, inotify will remove the watch after the first event, at
156which point you may call the final inotify_put_watch.
157
158(iv) Kernel Interface Prototypes
159
160 struct inotify_handle *inotify_init(struct inotify_operations *ops);
161
162 inotify_init_watch(struct inotify_watch *watch);
163
164 s32 inotify_add_watch(struct inotify_handle *ih,
165 struct inotify_watch *watch,
166 struct inode *inode, u32 mask);
167
168 s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
169 struct inotify_watch **watchp);
170
171 s32 inotify_find_update_watch(struct inotify_handle *ih,
172 struct inode *inode, u32 mask);
173
174 int inotify_rm_wd(struct inotify_handle *ih, u32 wd);
175
176 int inotify_rm_watch(struct inotify_handle *ih,
177 struct inotify_watch *watch);
178
179 void inotify_remove_watch_locked(struct inotify_handle *ih,
180 struct inotify_watch *watch);
181
182 void inotify_destroy(struct inotify_handle *ih);
183
184 void get_inotify_watch(struct inotify_watch *watch);
185 void put_inotify_watch(struct inotify_watch *watch);
186
187
188(v) Internal Kernel Implementation
189
190Each inotify instance is represented by an inotify_handle structure.
191Inotify's userspace consumers also have an inotify_device which is
192associated with the inotify_handle, and on which events are queued.
75 193
76Each watch is associated with an inotify_watch structure. Watches are chained 194Each watch is associated with an inotify_watch structure. Watches are chained
77off of each associated device and each associated inode. 195off of each associated inotify_handle and each associated inode.
78 196
79See fs/inotify.c for the locking and lifetime rules. 197See fs/inotify.c and fs/inotify_user.c for the locking and lifetime rules.
80 198
81 199
82(iv) Rationale 200(vi) Rationale
83 201
84Q: What is the design decision behind not tying the watch to the open fd of 202Q: What is the design decision behind not tying the watch to the open fd of
85 the watched object? 203 the watched object?
@@ -145,7 +263,7 @@ A: The poor user-space interface is the second biggest problem with dnotify.
145 file descriptor-based one that allows basic file I/O and poll/select. 263 file descriptor-based one that allows basic file I/O and poll/select.
146 Obtaining the fd and managing the watches could have been done either via a 264 Obtaining the fd and managing the watches could have been done either via a
147 device file or a family of new system calls. We decided to implement a 265 device file or a family of new system calls. We decided to implement a
148 family of system calls because that is the preffered approach for new kernel 266 family of system calls because that is the preferred approach for new kernel
149 interfaces. The only real difference was whether we wanted to use open(2) 267 interfaces. The only real difference was whether we wanted to use open(2)
150 and ioctl(2) or a couple of new system calls. System calls beat ioctls. 268 and ioctl(2) or a couple of new system calls. System calls beat ioctls.
151 269
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 2f388460cbe7..5531694059ab 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -50,10 +50,11 @@ Turn your foo_read_super() into a function that would return 0 in case of
50success and negative number in case of error (-EINVAL unless you have more 50success and negative number in case of error (-EINVAL unless you have more
51informative error value to report). Call it foo_fill_super(). Now declare 51informative error value to report). Call it foo_fill_super(). Now declare
52 52
53struct super_block foo_get_sb(struct file_system_type *fs_type, 53int foo_get_sb(struct file_system_type *fs_type,
54 int flags, const char *dev_name, void *data) 54 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
55{ 55{
56 return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super); 56 return get_sb_bdev(fs_type, flags, dev_name, data, foo_fill_super,
57 mnt);
57} 58}
58 59
59(or similar with s/bdev/nodev/ or s/bdev/single/, depending on the kind of 60(or similar with s/bdev/nodev/ or s/bdev/single/, depending on the kind of
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.txt b/Documentation/filesystems/ramfs-rootfs-initramfs.txt
index 60ab61e54e8a..25981e2e51be 100644
--- a/Documentation/filesystems/ramfs-rootfs-initramfs.txt
+++ b/Documentation/filesystems/ramfs-rootfs-initramfs.txt
@@ -70,11 +70,13 @@ tmpfs mounts. See Documentation/filesystems/tmpfs.txt for more information.
70What is rootfs? 70What is rootfs?
71--------------- 71---------------
72 72
73Rootfs is a special instance of ramfs, which is always present in 2.6 systems. 73Rootfs is a special instance of ramfs (or tmpfs, if that's enabled), which is
74(It's used internally as the starting and stopping point for searches of the 74always present in 2.6 systems. You can't unmount rootfs for approximately the
75kernel's doubly-linked list of mount points.) 75same reason you can't kill the init process; rather than having special code
76to check for and handle an empty list, it's smaller and simpler for the kernel
77to just make sure certain lists can't become empty.
76 78
77Most systems just mount another filesystem over it and ignore it. The 79Most systems just mount another filesystem over rootfs and ignore it. The
78amount of space an empty instance of ramfs takes up is tiny. 80amount of space an empty instance of ramfs takes up is tiny.
79 81
80What is initramfs? 82What is initramfs?
@@ -92,14 +94,16 @@ out of that.
92 94
93All this differs from the old initrd in several ways: 95All this differs from the old initrd in several ways:
94 96
95 - The old initrd was a separate file, while the initramfs archive is linked 97 - The old initrd was always a separate file, while the initramfs archive is
96 into the linux kernel image. (The directory linux-*/usr is devoted to 98 linked into the linux kernel image. (The directory linux-*/usr is devoted
97 generating this archive during the build.) 99 to generating this archive during the build.)
98 100
99 - The old initrd file was a gzipped filesystem image (in some file format, 101 - The old initrd file was a gzipped filesystem image (in some file format,
100 such as ext2, that had to be built into the kernel), while the new 102 such as ext2, that needed a driver built into the kernel), while the new
101 initramfs archive is a gzipped cpio archive (like tar only simpler, 103 initramfs archive is a gzipped cpio archive (like tar only simpler,
102 see cpio(1) and Documentation/early-userspace/buffer-format.txt). 104 see cpio(1) and Documentation/early-userspace/buffer-format.txt). The
105 kernel's cpio extraction code is not only extremely small, it's also
106 __init data that can be discarded during the boot process.
103 107
104 - The program run by the old initrd (which was called /initrd, not /init) did 108 - The program run by the old initrd (which was called /initrd, not /init) did
105 some setup and then returned to the kernel, while the init program from 109 some setup and then returned to the kernel, while the init program from
@@ -124,13 +128,14 @@ Populating initramfs:
124 128
125The 2.6 kernel build process always creates a gzipped cpio format initramfs 129The 2.6 kernel build process always creates a gzipped cpio format initramfs
126archive and links it into the resulting kernel binary. By default, this 130archive and links it into the resulting kernel binary. By default, this
127archive is empty (consuming 134 bytes on x86). The config option 131archive is empty (consuming 134 bytes on x86).
128CONFIG_INITRAMFS_SOURCE (for some reason buried under devices->block devices 132
129in menuconfig, and living in usr/Kconfig) can be used to specify a source for 133The config option CONFIG_INITRAMFS_SOURCE (for some reason buried under
130the initramfs archive, which will automatically be incorporated into the 134devices->block devices in menuconfig, and living in usr/Kconfig) can be used
131resulting binary. This option can point to an existing gzipped cpio archive, a 135to specify a source for the initramfs archive, which will automatically be
132directory containing files to be archived, or a text file specification such 136incorporated into the resulting binary. This option can point to an existing
133as the following example: 137gzipped cpio archive, a directory containing files to be archived, or a text
138file specification such as the following example:
134 139
135 dir /dev 755 0 0 140 dir /dev 755 0 0
136 nod /dev/console 644 0 0 c 5 1 141 nod /dev/console 644 0 0 c 5 1
@@ -146,23 +151,84 @@ as the following example:
146Run "usr/gen_init_cpio" (after the kernel build) to get a usage message 151Run "usr/gen_init_cpio" (after the kernel build) to get a usage message
147documenting the above file format. 152documenting the above file format.
148 153
149One advantage of the text file is that root access is not required to 154One advantage of the configuration file is that root access is not required to
150set permissions or create device nodes in the new archive. (Note that those 155set permissions or create device nodes in the new archive. (Note that those
151two example "file" entries expect to find files named "init.sh" and "busybox" in 156two example "file" entries expect to find files named "init.sh" and "busybox" in
152a directory called "initramfs", under the linux-2.6.* directory. See 157a directory called "initramfs", under the linux-2.6.* directory. See
153Documentation/early-userspace/README for more details.) 158Documentation/early-userspace/README for more details.)
154 159
155The kernel does not depend on external cpio tools, gen_init_cpio is created 160The kernel does not depend on external cpio tools. If you specify a
156from usr/gen_init_cpio.c which is entirely self-contained, and the kernel's 161directory instead of a configuration file, the kernel's build infrastructure
157boot-time extractor is also (obviously) self-contained. However, if you _do_ 162creates a configuration file from that directory (usr/Makefile calls
158happen to have cpio installed, the following command line can extract the 163scripts/gen_initramfs_list.sh), and proceeds to package up that directory
159generated cpio image back into its component files: 164using the config file (by feeding it to usr/gen_init_cpio, which is created
165from usr/gen_init_cpio.c). The kernel's build-time cpio creation code is
166entirely self-contained, and the kernel's boot-time extractor is also
167(obviously) self-contained.
168
169The one thing you might need external cpio utilities installed for is creating
170or extracting your own preprepared cpio files to feed to the kernel build
171(instead of a config file or directory).
172
173The following command line can extract a cpio image (either by the above script
174or by the kernel build) back into its component files:
160 175
161 cpio -i -d -H newc -F initramfs_data.cpio --no-absolute-filenames 176 cpio -i -d -H newc -F initramfs_data.cpio --no-absolute-filenames
162 177
178The following shell script can create a prebuilt cpio archive you can
179use in place of the above config file:
180
181 #!/bin/sh
182
183 # Copyright 2006 Rob Landley <rob@landley.net> and TimeSys Corporation.
184 # Licensed under GPL version 2
185
186 if [ $# -ne 2 ]
187 then
188 echo "usage: mkinitramfs directory imagename.cpio.gz"
189 exit 1
190 fi
191
192 if [ -d "$1" ]
193 then
194 echo "creating $2 from $1"
195 (cd "$1"; find . | cpio -o -H newc | gzip) > "$2"
196 else
197 echo "First argument must be a directory"
198 exit 1
199 fi
200
201Note: The cpio man page contains some bad advice that will break your initramfs
202archive if you follow it. It says "A typical way to generate the list
203of filenames is with the find command; you should give find the -depth option
204to minimize problems with permissions on directories that are unwritable or not
205searchable." Don't do this when creating initramfs.cpio.gz images, it won't
206work. The Linux kernel cpio extractor won't create files in a directory that
207doesn't exist, so the directory entries must go before the files that go in
208those directories. The above script gets them in the right order.
209
210External initramfs images:
211--------------------------
212
213If the kernel has initrd support enabled, an external cpio.gz archive can also
214be passed into a 2.6 kernel in place of an initrd. In this case, the kernel
215will autodetect the type (initramfs, not initrd) and extract the external cpio
216archive into rootfs before trying to run /init.
217
218This has the memory efficiency advantages of initramfs (no ramdisk block
219device) but the separate packaging of initrd (which is nice if you have
220non-GPL code you'd like to run from initramfs, without conflating it with
221the GPL licensed Linux kernel binary).
222
223It can also be used to supplement the kernel's built-in initamfs image. The
224files in the external archive will overwrite any conflicting files in
225the built-in initramfs archive. Some distributors also prefer to customize
226a single kernel image with task-specific initramfs images, without recompiling.
227
163Contents of initramfs: 228Contents of initramfs:
164---------------------- 229----------------------
165 230
231An initramfs archive is a complete self-contained root filesystem for Linux.
166If you don't already understand what shared libraries, devices, and paths 232If you don't already understand what shared libraries, devices, and paths
167you need to get a minimal root filesystem up and running, here are some 233you need to get a minimal root filesystem up and running, here are some
168references: 234references:
@@ -176,13 +242,36 @@ code against, along with some related utilities. It is BSD licensed.
176 242
177I use uClibc (http://www.uclibc.org) and busybox (http://www.busybox.net) 243I use uClibc (http://www.uclibc.org) and busybox (http://www.busybox.net)
178myself. These are LGPL and GPL, respectively. (A self-contained initramfs 244myself. These are LGPL and GPL, respectively. (A self-contained initramfs
179package is planned for the busybox 1.2 release.) 245package is planned for the busybox 1.3 release.)
180 246
181In theory you could use glibc, but that's not well suited for small embedded 247In theory you could use glibc, but that's not well suited for small embedded
182uses like this. (A "hello world" program statically linked against glibc is 248uses like this. (A "hello world" program statically linked against glibc is
183over 400k. With uClibc it's 7k. Also note that glibc dlopens libnss to do 249over 400k. With uClibc it's 7k. Also note that glibc dlopens libnss to do
184name lookups, even when otherwise statically linked.) 250name lookups, even when otherwise statically linked.)
185 251
252A good first step is to get initramfs to run a statically linked "hello world"
253program as init, and test it under an emulator like qemu (www.qemu.org) or
254User Mode Linux, like so:
255
256 cat > hello.c << EOF
257 #include <stdio.h>
258 #include <unistd.h>
259
260 int main(int argc, char *argv[])
261 {
262 printf("Hello world!\n");
263 sleep(999999999);
264 }
265 EOF
266 gcc -static hello2.c -o init
267 echo init | cpio -o -H newc | gzip > test.cpio.gz
268 # Testing external initramfs using the initrd loading mechanism.
269 qemu -kernel /boot/vmlinuz -initrd test.cpio.gz /dev/zero
270
271When debugging a normal root filesystem, it's nice to be able to boot with
272"init=/bin/sh". The initramfs equivalent is "rdinit=/bin/sh", and it's
273just as useful.
274
186Why cpio rather than tar? 275Why cpio rather than tar?
187------------------------- 276-------------------------
188 277
@@ -241,7 +330,7 @@ the above threads) is:
241Future directions: 330Future directions:
242------------------ 331------------------
243 332
244Today (2.6.14), initramfs is always compiled in, but not always used. The 333Today (2.6.16), initramfs is always compiled in, but not always used. The
245kernel falls back to legacy boot code that is reached only if initramfs does 334kernel falls back to legacy boot code that is reached only if initramfs does
246not contain an /init program. The fallback is legacy code, there to ensure a 335not contain an /init program. The fallback is legacy code, there to ensure a
247smooth transition and allowing early boot functionality to gradually move to 336smooth transition and allowing early boot functionality to gradually move to
@@ -258,8 +347,9 @@ and so on.
258 347
259This kind of complexity (which inevitably includes policy) is rightly handled 348This kind of complexity (which inevitably includes policy) is rightly handled
260in userspace. Both klibc and busybox/uClibc are working on simple initramfs 349in userspace. Both klibc and busybox/uClibc are working on simple initramfs
261packages to drop into a kernel build, and when standard solutions are ready 350packages to drop into a kernel build.
262and widely deployed, the kernel's legacy early boot code will become obsolete
263and a candidate for the feature removal schedule.
264 351
265But that's a while off yet. 352The klibc package has now been accepted into Andrew Morton's 2.6.17-mm tree.
353The kernel's current early boot code (partition detection, etc) will probably
354be migrated into a default initramfs, automatically created and used by the
355kernel build.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 3a2e5520c1e3..9d3aed628bc1 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -113,8 +113,8 @@ members are defined:
113struct file_system_type { 113struct file_system_type {
114 const char *name; 114 const char *name;
115 int fs_flags; 115 int fs_flags;
116 struct super_block *(*get_sb) (struct file_system_type *, int, 116 struct int (*get_sb) (struct file_system_type *, int,
117 const char *, void *); 117 const char *, void *, struct vfsmount *);
118 void (*kill_sb) (struct super_block *); 118 void (*kill_sb) (struct super_block *);
119 struct module *owner; 119 struct module *owner;
120 struct file_system_type * next; 120 struct file_system_type * next;
@@ -211,7 +211,7 @@ struct super_operations {
211 int (*sync_fs)(struct super_block *sb, int wait); 211 int (*sync_fs)(struct super_block *sb, int wait);
212 void (*write_super_lockfs) (struct super_block *); 212 void (*write_super_lockfs) (struct super_block *);
213 void (*unlockfs) (struct super_block *); 213 void (*unlockfs) (struct super_block *);
214 int (*statfs) (struct super_block *, struct kstatfs *); 214 int (*statfs) (struct dentry *, struct kstatfs *);
215 int (*remount_fs) (struct super_block *, int *, char *); 215 int (*remount_fs) (struct super_block *, int *, char *);
216 void (*clear_inode) (struct inode *); 216 void (*clear_inode) (struct inode *);
217 void (*umount_begin) (struct super_block *); 217 void (*umount_begin) (struct super_block *);
diff --git a/Documentation/hwmon/abituguru b/Documentation/hwmon/abituguru
new file mode 100644
index 000000000000..69cdb527d58f
--- /dev/null
+++ b/Documentation/hwmon/abituguru
@@ -0,0 +1,59 @@
1Kernel driver abituguru
2=======================
3
4Supported chips:
5 * Abit uGuru (Hardware Monitor part only)
6 Prefix: 'abituguru'
7 Addresses scanned: ISA 0x0E0
8 Datasheet: Not available, this driver is based on reverse engineering.
9 A "Datasheet" has been written based on the reverse engineering it
10 should be available in the same dir as this file under the name
11 abituguru-datasheet.
12
13Authors:
14 Hans de Goede <j.w.r.degoede@hhs.nl>,
15 (Initial reverse engineering done by Olle Sandberg
16 <ollebull@gmail.com>)
17
18
19Module Parameters
20-----------------
21
22* force: bool Force detection. Note this parameter only causes the
23 detection to be skipped, if the uGuru can't be read
24 the module initialization (insmod) will still fail.
25* fan_sensors: int Tell the driver how many fan speed sensors there are
26 on your motherboard. Default: 0 (autodetect).
27* pwms: int Tell the driver how many fan speed controls (fan
28 pwms) your motherboard has. Default: 0 (autodetect).
29* verbose: int How verbose should the driver be? (0-3):
30 0 normal output
31 1 + verbose error reporting
32 2 + sensors type probing info\n"
33 3 + retryable error reporting
34 Default: 2 (the driver is still in the testing phase)
35
36Notice if you need any of the first three options above please insmod the
37driver with verbose set to 3 and mail me <j.w.r.degoede@hhs.nl> the output of:
38dmesg | grep abituguru
39
40
41Description
42-----------
43
44This driver supports the hardware monitoring features of the Abit uGuru chip
45found on Abit uGuru featuring motherboards (most modern Abit motherboards).
46
47The uGuru chip in reality is a Winbond W83L950D in disguise (despite Abit
48claiming it is "a new microprocessor designed by the ABIT Engineers").
49Unfortunatly this doesn't help since the W83L950D is a generic
50microcontroller with a custom Abit application running on it.
51
52Despite Abit not releasing any information regarding the uGuru, Olle
53Sandberg <ollebull@gmail.com> has managed to reverse engineer the sensor part
54of the uGuru. Without his work this driver would not have been possible.
55
56Known Issues
57------------
58
59The voltage and frequency control parts of the Abit uGuru are not supported.
diff --git a/Documentation/hwmon/abituguru-datasheet b/Documentation/hwmon/abituguru-datasheet
new file mode 100644
index 000000000000..aef5a9b36846
--- /dev/null
+++ b/Documentation/hwmon/abituguru-datasheet
@@ -0,0 +1,312 @@
1uGuru datasheet
2===============
3
4First of all, what I know about uGuru is no fact based on any help, hints or
5datasheet from Abit. The data I have got on uGuru have I assembled through
6my weak knowledge in "backwards engineering".
7And just for the record, you may have noticed uGuru isn't a chip developed by
8Abit, as they claim it to be. It's realy just an microprocessor (uC) created by
9Winbond (W83L950D). And no, reading the manual for this specific uC or
10mailing Windbond for help won't give any usefull data about uGuru, as it is
11the program inside the uC that is responding to calls.
12
13Olle Sandberg <ollebull@gmail.com>, 2005-05-25
14
15
16Original version by Olle Sandberg who did the heavy lifting of the initial
17reverse engineering. This version has been almost fully rewritten for clarity
18and extended with write support and info on more databanks, the write support
19is once again reverse engineered by Olle the additional databanks have been
20reverse engineered by me. I would like to express my thanks to Olle, this
21document and the Linux driver could not have been written without his efforts.
22
23Note: because of the lack of specs only the sensors part of the uGuru is
24described here and not the CPU / RAM / etc voltage & frequency control.
25
26Hans de Goede <j.w.r.degoede@hhs.nl>, 28-01-2006
27
28
29Detection
30=========
31
32As far as known the uGuru is always placed at and using the (ISA) I/O-ports
330xE0 and 0xE4, so we don't have to scan any port-range, just check what the two
34ports are holding for detection. We will refer to 0xE0 as CMD (command-port)
35and 0xE4 as DATA because Abit refers to them with these names.
36
37If DATA holds 0x00 or 0x08 and CMD holds 0x00 or 0xAC an uGuru could be
38present. We have to check for two different values at data-port, because
39after a reboot uGuru will hold 0x00 here, but if the driver is removed and
40later on attached again data-port will hold 0x08, more about this later.
41
42After wider testing of the Linux kernel driver some variants of the uGuru have
43turned up which will hold 0x00 instead of 0xAC at the CMD port, thus we also
44have to test CMD for two different values. On these uGuru's DATA will initally
45hold 0x09 and will only hold 0x08 after reading CMD first, so CMD must be read
46first!
47
48To be really sure an uGuru is present a test read of one or more register
49sets should be done.
50
51
52Reading / Writing
53=================
54
55Addressing
56----------
57
58The uGuru has a number of different addressing levels. The first addressing
59level we will call banks. A bank holds data for one or more sensors. The data
60in a bank for a sensor is one or more bytes large.
61
62The number of bytes is fixed for a given bank, you should always read or write
63that many bytes, reading / writing more will fail, the results when writing
64less then the number of bytes for a given bank are undetermined.
65
66See below for all known bank addresses, numbers of sensors in that bank,
67number of bytes data per sensor and contents/meaning of those bytes.
68
69Although both this document and the kernel driver have kept the sensor
70terminoligy for the addressing within a bank this is not 100% correct, in
71bank 0x24 for example the addressing within the bank selects a PWM output not
72a sensor.
73
74Notice that some banks have both a read and a write address this is how the
75uGuru determines if a read from or a write to the bank is taking place, thus
76when reading you should always use the read address and when writing the
77write address. The write address is always one (1) more then the read address.
78
79
80uGuru ready
81-----------
82
83Before you can read from or write to the uGuru you must first put the uGuru
84in "ready" mode.
85
86To put the uGuru in ready mode first write 0x00 to DATA and then wait for DATA
87to hold 0x09, DATA should read 0x09 within 250 read cycles.
88
89Next CMD _must_ be read and should hold 0xAC, usually CMD will hold 0xAC the
90first read but sometimes it takes a while before CMD holds 0xAC and thus it
91has to be read a number of times (max 50).
92
93After reading CMD, DATA should hold 0x08 which means that the uGuru is ready
94for input. As above DATA will usually hold 0x08 the first read but not always.
95This step can be skipped, but it is undetermined what happens if the uGuru has
96not yet reported 0x08 at DATA and you proceed with writing a bank address.
97
98
99Sending bank and sensor addresses to the uGuru
100----------------------------------------------
101
102First the uGuru must be in "ready" mode as described above, DATA should hold
1030x08 indicating that the uGuru wants input, in this case the bank address.
104
105Next write the bank address to DATA. After the bank address has been written
106wait for to DATA to hold 0x08 again indicating that it wants / is ready for
107more input (max 250 reads).
108
109Once DATA holds 0x08 again write the sensor address to CMD.
110
111
112Reading
113-------
114
115First send the bank and sensor addresses as described above.
116Then for each byte of data you want to read wait for DATA to hold 0x01
117which indicates that the uGuru is ready to be read (max 250 reads) and once
118DATA holds 0x01 read the byte from CMD.
119
120Once all bytes have been read data will hold 0x09, but there is no reason to
121test for this. Notice that the number of bytes is bank address dependent see
122above and below.
123
124After completing a successfull read it is advised to put the uGuru back in
125ready mode, so that it is ready for the next read / write cycle. This way
126if your program / driver is unloaded and later loaded again the detection
127algorithm described above will still work.
128
129
130
131Writing
132-------
133
134First send the bank and sensor addresses as described above.
135Then for each byte of data you want to write wait for DATA to hold 0x00
136which indicates that the uGuru is ready to be written (max 250 reads) and
137once DATA holds 0x00 write the byte to CMD.
138
139Once all bytes have been written wait for DATA to hold 0x01 (max 250 reads)
140don't ask why this is the way it is.
141
142Once DATA holds 0x01 read CMD it should hold 0xAC now.
143
144After completing a successfull write it is advised to put the uGuru back in
145ready mode, so that it is ready for the next read / write cycle. This way
146if your program / driver is unloaded and later loaded again the detection
147algorithm described above will still work.
148
149
150Gotchas
151-------
152
153After wider testing of the Linux kernel driver some variants of the uGuru have
154turned up which do not hold 0x08 at DATA within 250 reads after writing the
155bank address. With these versions this happens quite frequent, using larger
156timeouts doesn't help, they just go offline for a second or 2, doing some
157internal callibration or whatever. Your code should be prepared to handle
158this and in case of no response in this specific case just goto sleep for a
159while and then retry.
160
161
162Address Map
163===========
164
165Bank 0x20 Alarms (R)
166--------------------
167This bank contains 0 sensors, iow the sensor address is ignored (but must be
168written) just use 0. Bank 0x20 contains 3 bytes:
169
170Byte 0:
171This byte holds the alarm flags for sensor 0-7 of Sensor Bank1, with bit 0
172corresponding to sensor 0, 1 to 1, etc.
173
174Byte 1:
175This byte holds the alarm flags for sensor 8-15 of Sensor Bank1, with bit 0
176corresponding to sensor 8, 1 to 9, etc.
177
178Byte 2:
179This byte holds the alarm flags for sensor 0-5 of Sensor Bank2, with bit 0
180corresponding to sensor 0, 1 to 1, etc.
181
182
183Bank 0x21 Sensor Bank1 Values / Readings (R)
184--------------------------------------------
185This bank contains 16 sensors, for each sensor it contains 1 byte.
186So far the following sensors are known to be available on all motherboards:
187Sensor 0 CPU temp
188Sensor 1 SYS temp
189Sensor 3 CPU core volt
190Sensor 4 DDR volt
191Sensor 10 DDR Vtt volt
192Sensor 15 PWM temp
193
194Byte 0:
195This byte holds the reading from the sensor. Sensors in Bank1 can be both
196volt and temp sensors, this is motherboard specific. The uGuru however does
197seem to know (be programmed with) what kindoff sensor is attached see Sensor
198Bank1 Settings description.
199
200Volt sensors use a linear scale, a reading 0 corresponds with 0 volt and a
201reading of 255 with 3494 mV. The sensors for higher voltages however are
202connected through a division circuit. The currently known division circuits
203in use result in ranges of: 0-4361mV, 0-6248mV or 0-14510mV. 3.3 volt sources
204use the 0-4361mV range, 5 volt the 0-6248mV and 12 volt the 0-14510mV .
205
206Temp sensors also use a linear scale, a reading of 0 corresponds with 0 degree
207Celsius and a reading of 255 with a reading of 255 degrees Celsius.
208
209
210Bank 0x22 Sensor Bank1 Settings (R)
211Bank 0x23 Sensor Bank1 Settings (W)
212-----------------------------------
213
214This bank contains 16 sensors, for each sensor it contains 3 bytes. Each
215set of 3 bytes contains the settings for the sensor with the same sensor
216address in Bank 0x21 .
217
218Byte 0:
219Alarm behaviour for the selected sensor. A 1 enables the described behaviour.
220Bit 0: Give an alarm if measured temp is over the warning threshold (RW) *
221Bit 1: Give an alarm if measured volt is over the max threshold (RW) **
222Bit 2: Give an alarm if measured volt is under the min threshold (RW) **
223Bit 3: Beep if alarm (RW)
224Bit 4: 1 if alarm cause measured temp is over the warning threshold (R)
225Bit 5: 1 if alarm cause measured volt is over the max threshold (R)
226Bit 6: 1 if alarm cause measured volt is under the min threshold (R)
227Bit 7: Volt sensor: Shutdown if alarm persist for more then 4 seconds (RW)
228 Temp sensor: Shutdown if temp is over the shutdown threshold (RW)
229
230* This bit is only honored/used by the uGuru if a temp sensor is connected
231** This bit is only honored/used by the uGuru if a volt sensor is connected
232Note with some trickery this can be used to find out what kinda sensor is
233detected see the Linux kernel driver for an example with many comments on
234how todo this.
235
236Byte 1:
237Temp sensor: warning threshold (scale as bank 0x21)
238Volt sensor: min threshold (scale as bank 0x21)
239
240Byte 2:
241Temp sensor: shutdown threshold (scale as bank 0x21)
242Volt sensor: max threshold (scale as bank 0x21)
243
244
245Bank 0x24 PWM outputs for FAN's (R)
246Bank 0x25 PWM outputs for FAN's (W)
247-----------------------------------
248
249This bank contains 3 "sensors", for each sensor it contains 5 bytes.
250Sensor 0 usually controls the CPU fan
251Sensor 1 usually controls the NB (or chipset for single chip) fan
252Sensor 2 usually controls the System fan
253
254Byte 0:
255Flag 0x80 to enable control, Fan runs at 100% when disabled.
256low nibble (temp)sensor address at bank 0x21 used for control.
257
258Byte 1:
2590-255 = 0-12v (linear), specify voltage at which fan will rotate when under
260low threshold temp (specified in byte 3)
261
262Byte 2:
2630-255 = 0-12v (linear), specify voltage at which fan will rotate when above
264high threshold temp (specified in byte 4)
265
266Byte 3:
267Low threshold temp (scale as bank 0x21)
268
269byte 4:
270High threshold temp (scale as bank 0x21)
271
272
273Bank 0x26 Sensors Bank2 Values / Readings (R)
274---------------------------------------------
275
276This bank contains 6 sensors (AFAIK), for each sensor it contains 1 byte.
277So far the following sensors are known to be available on all motherboards:
278Sensor 0: CPU fan speed
279Sensor 1: NB (or chipset for single chip) fan speed
280Sensor 2: SYS fan speed
281
282Byte 0:
283This byte holds the reading from the sensor. 0-255 = 0-15300 (linear)
284
285
286Bank 0x27 Sensors Bank2 Settings (R)
287Bank 0x28 Sensors Bank2 Settings (W)
288------------------------------------
289
290This bank contains 6 sensors (AFAIK), for each sensor it contains 2 bytes.
291
292Byte 0:
293Alarm behaviour for the selected sensor. A 1 enables the described behaviour.
294Bit 0: Give an alarm if measured rpm is under the min threshold (RW)
295Bit 3: Beep if alarm (RW)
296Bit 7: Shutdown if alarm persist for more then 4 seconds (RW)
297
298Byte 1:
299min threshold (scale as bank 0x26)
300
301
302Warning for the adventerous
303===========================
304
305A word of caution to those who want to experiment and see if they can figure
306the voltage / clock programming out, I tried reading and only reading banks
3070-0x30 with the reading code used for the sensor banks (0x20-0x28) and this
308resulted in a _permanent_ reprogramming of the voltages, luckily I had the
309sensors part configured so that it would shutdown my system on any out of spec
310voltages which proprably safed my computer (after a reboot I managed to
311immediatly enter the bios and reload the defaults). This probably means that
312the read/write cycle for the non sensor part is different from the sensor part.
diff --git a/Documentation/hwmon/lm70 b/Documentation/hwmon/lm70
new file mode 100644
index 000000000000..2bdd3feebf53
--- /dev/null
+++ b/Documentation/hwmon/lm70
@@ -0,0 +1,31 @@
1Kernel driver lm70
2==================
3
4Supported chip:
5 * National Semiconductor LM70
6 Datasheet: http://www.national.com/pf/LM/LM70.html
7
8Author:
9 Kaiwan N Billimoria <kaiwan@designergraphix.com>
10
11Description
12-----------
13
14This driver implements support for the National Semiconductor LM70
15temperature sensor.
16
17The LM70 temperature sensor chip supports a single temperature sensor.
18It communicates with a host processor (or microcontroller) via an
19SPI/Microwire Bus interface.
20
21Communication with the LM70 is simple: when the temperature is to be sensed,
22the driver accesses the LM70 using SPI communication: 16 SCLK cycles
23comprise the MOSI/MISO loop. At the end of the transfer, the 11-bit 2's
24complement digital temperature (sent via the SIO line), is available in the
25driver for interpretation. This driver makes use of the kernel's in-core
26SPI support.
27
28Thanks to
29---------
30Jean Delvare <khali@linux-fr.org> for mentoring the hwmon-side driver
31development.
diff --git a/Documentation/hwmon/lm83 b/Documentation/hwmon/lm83
index 061d9ed8ff43..f7aad1489cb0 100644
--- a/Documentation/hwmon/lm83
+++ b/Documentation/hwmon/lm83
@@ -7,6 +7,10 @@ Supported chips:
7 Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e 7 Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
8 Datasheet: Publicly available at the National Semiconductor website 8 Datasheet: Publicly available at the National Semiconductor website
9 http://www.national.com/pf/LM/LM83.html 9 http://www.national.com/pf/LM/LM83.html
10 * National Semiconductor LM82
11 Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
12 Datasheet: Publicly available at the National Semiconductor website
13 http://www.national.com/pf/LM/LM82.html
10 14
11 15
12Author: Jean Delvare <khali@linux-fr.org> 16Author: Jean Delvare <khali@linux-fr.org>
@@ -15,10 +19,11 @@ Description
15----------- 19-----------
16 20
17The LM83 is a digital temperature sensor. It senses its own temperature as 21The LM83 is a digital temperature sensor. It senses its own temperature as
18well as the temperature of up to three external diodes. It is compatible 22well as the temperature of up to three external diodes. The LM82 is
19with many other devices such as the LM84 and all other ADM1021 clones. 23a stripped down version of the LM83 that only supports one external diode.
20The main difference between the LM83 and the LM84 in that the later can 24Both are compatible with many other devices such as the LM84 and all
21only sense the temperature of one external diode. 25other ADM1021 clones. The main difference between the LM83 and the LM84
26in that the later can only sense the temperature of one external diode.
22 27
23Using the adm1021 driver for a LM83 should work, but only two temperatures 28Using the adm1021 driver for a LM83 should work, but only two temperatures
24will be reported instead of four. 29will be reported instead of four.
@@ -30,12 +35,16 @@ contact us. Note that the LM90 can easily be misdetected as a LM83.
30 35
31Confirmed motherboards: 36Confirmed motherboards:
32 SBS P014 37 SBS P014
38 SBS PSL09
33 39
34Unconfirmed motherboards: 40Unconfirmed motherboards:
35 Gigabyte GA-8IK1100 41 Gigabyte GA-8IK1100
36 Iwill MPX2 42 Iwill MPX2
37 Soltek SL-75DRV5 43 Soltek SL-75DRV5
38 44
45The LM82 is confirmed to have been found on most AMD Geode reference
46designs and test platforms.
47
39The driver has been successfully tested by Magnus Forsström, who I'd 48The driver has been successfully tested by Magnus Forsström, who I'd
40like to thank here. More testers will be of course welcome. 49like to thank here. More testers will be of course welcome.
41 50
diff --git a/Documentation/hwmon/smsc47m192 b/Documentation/hwmon/smsc47m192
new file mode 100644
index 000000000000..45d6453cd435
--- /dev/null
+++ b/Documentation/hwmon/smsc47m192
@@ -0,0 +1,102 @@
1Kernel driver smsc47m192
2========================
3
4Supported chips:
5 * SMSC LPC47M192 and LPC47M997
6 Prefix: 'smsc47m192'
7 Addresses scanned: I2C 0x2c - 0x2d
8 Datasheet: The datasheet for LPC47M192 is publicly available from
9 http://www.smsc.com/
10 The LPC47M997 is compatible for hardware monitoring.
11
12Author: Hartmut Rick <linux@rick.claranet.de>
13 Special thanks to Jean Delvare for careful checking
14 of the code and many helpful comments and suggestions.
15
16
17Description
18-----------
19
20This driver implements support for the hardware sensor capabilities
21of the SMSC LPC47M192 and LPC47M997 Super-I/O chips.
22
23These chips support 3 temperature channels and 8 voltage inputs
24as well as CPU voltage VID input.
25
26They do also have fan monitoring and control capabilities, but the
27these features are accessed via ISA bus and are not supported by this
28driver. Use the 'smsc47m1' driver for fan monitoring and control.
29
30Voltages and temperatures are measured by an 8-bit ADC, the resolution
31of the temperatures is 1 bit per degree C.
32Voltages are scaled such that the nominal voltage corresponds to
33192 counts, i.e. 3/4 of the full range. Thus the available range for
34each voltage channel is 0V ... 255/192*(nominal voltage), the resolution
35is 1 bit per (nominal voltage)/192.
36Both voltage and temperature values are scaled by 1000, the sys files
37show voltages in mV and temperatures in units of 0.001 degC.
38
39The +12V analog voltage input channel (in4_input) is multiplexed with
40bit 4 of the encoded CPU voltage. This means that you either get
41a +12V voltage measurement or a 5 bit CPU VID, but not both.
42The default setting is to use the pin as 12V input, and use only 4 bit VID.
43This driver assumes that the information in the configuration register
44is correct, i.e. that the BIOS has updated the configuration if
45the motherboard has this input wired to VID4.
46
47The temperature and voltage readings are updated once every 1.5 seconds.
48Reading them more often repeats the same values.
49
50
51sysfs interface
52---------------
53
54in0_input - +2.5V voltage input
55in1_input - CPU voltage input (nominal 2.25V)
56in2_input - +3.3V voltage input
57in3_input - +5V voltage input
58in4_input - +12V voltage input (may be missing if used as VID4)
59in5_input - Vcc voltage input (nominal 3.3V)
60 This is the supply voltage of the sensor chip itself.
61in6_input - +1.5V voltage input
62in7_input - +1.8V voltage input
63
64in[0-7]_min,
65in[0-7]_max - lower and upper alarm thresholds for in[0-7]_input reading
66
67 All voltages are read and written in mV.
68
69in[0-7]_alarm - alarm flags for voltage inputs
70 These files read '1' in case of alarm, '0' otherwise.
71
72temp1_input - chip temperature measured by on-chip diode
73temp[2-3]_input - temperature measured by external diodes (one of these would
74 typically be wired to the diode inside the CPU)
75
76temp[1-3]_min,
77temp[1-3]_max - lower and upper alarm thresholds for temperatures
78
79temp[1-3]_offset - temperature offset registers
80 The chip adds the offsets stored in these registers to
81 the corresponding temperature readings.
82 Note that temp1 and temp2 offsets share the same register,
83 they cannot both be different from zero at the same time.
84 Writing a non-zero number to one of them will reset the other
85 offset to zero.
86
87 All temperatures and offsets are read and written in
88 units of 0.001 degC.
89
90temp[1-3]_alarm - alarm flags for temperature inputs, '1' in case of alarm,
91 '0' otherwise.
92temp[2-3]_input_fault - diode fault flags for temperature inputs 2 and 3.
93 A fault is detected if the two pins for the corresponding
94 sensor are open or shorted, or any of the two is shorted
95 to ground or Vcc. '1' indicates a diode fault.
96
97cpu0_vid - CPU voltage as received from the CPU
98
99vrm - CPU VID standard used for decoding CPU voltage
100
101 The *_min, *_max, *_offset and vrm files can be read and
102 written, all others are read-only.
diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface
index a0d0ab24288e..d1d390aaf620 100644
--- a/Documentation/hwmon/sysfs-interface
+++ b/Documentation/hwmon/sysfs-interface
@@ -3,15 +3,15 @@ Naming and data format standards for sysfs files
3 3
4The libsensors library offers an interface to the raw sensors data 4The libsensors library offers an interface to the raw sensors data
5through the sysfs interface. See libsensors documentation and source for 5through the sysfs interface. See libsensors documentation and source for
6more further information. As of writing this document, libsensors 6further information. As of writing this document, libsensors
7(from lm_sensors 2.8.3) is heavily chip-dependant. Adding or updating 7(from lm_sensors 2.8.3) is heavily chip-dependent. Adding or updating
8support for any given chip requires modifying the library's code. 8support for any given chip requires modifying the library's code.
9This is because libsensors was written for the procfs interface 9This is because libsensors was written for the procfs interface
10older kernel modules were using, which wasn't standardized enough. 10older kernel modules were using, which wasn't standardized enough.
11Recent versions of libsensors (from lm_sensors 2.8.2 and later) have 11Recent versions of libsensors (from lm_sensors 2.8.2 and later) have
12support for the sysfs interface, though. 12support for the sysfs interface, though.
13 13
14The new sysfs interface was designed to be as chip-independant as 14The new sysfs interface was designed to be as chip-independent as
15possible. 15possible.
16 16
17Note that motherboards vary widely in the connections to sensor chips. 17Note that motherboards vary widely in the connections to sensor chips.
@@ -24,7 +24,7 @@ range using external resistors. Since the values of these resistors
24can change from motherboard to motherboard, the conversions cannot be 24can change from motherboard to motherboard, the conversions cannot be
25hard coded into the driver and have to be done in user space. 25hard coded into the driver and have to be done in user space.
26 26
27For this reason, even if we aim at a chip-independant libsensors, it will 27For this reason, even if we aim at a chip-independent libsensors, it will
28still require a configuration file (e.g. /etc/sensors.conf) for proper 28still require a configuration file (e.g. /etc/sensors.conf) for proper
29values conversion, labeling of inputs and hiding of unused inputs. 29values conversion, labeling of inputs and hiding of unused inputs.
30 30
@@ -39,15 +39,16 @@ If you are developing a userspace application please send us feedback on
39this standard. 39this standard.
40 40
41Note that this standard isn't completely established yet, so it is subject 41Note that this standard isn't completely established yet, so it is subject
42to changes, even important ones. One more reason to use the library instead 42to changes. If you are writing a new hardware monitoring driver those
43of accessing sysfs files directly. 43features can't seem to fit in this interface, please contact us with your
44extension proposal. Keep in mind that backward compatibility must be
45preserved.
44 46
45Each chip gets its own directory in the sysfs /sys/devices tree. To 47Each chip gets its own directory in the sysfs /sys/devices tree. To
46find all sensor chips, it is easier to follow the symlinks from 48find all sensor chips, it is easier to follow the device symlinks from
47/sys/i2c/devices/ 49/sys/class/hwmon/hwmon*.
48 50
49All sysfs values are fixed point numbers. To get the true value of some 51All sysfs values are fixed point numbers.
50of the values, you should divide by the specified value.
51 52
52There is only one value per file, unlike the older /proc specification. 53There is only one value per file, unlike the older /proc specification.
53The common scheme for files naming is: <type><number>_<item>. Usual 54The common scheme for files naming is: <type><number>_<item>. Usual
@@ -69,28 +70,40 @@ to cause an alarm) is chip-dependent.
69 70
70------------------------------------------------------------------------- 71-------------------------------------------------------------------------
71 72
73[0-*] denotes any positive number starting from 0
74[1-*] denotes any positive number starting from 1
75RO read only value
76RW read/write value
77
78Read/write values may be read-only for some chips, depending on the
79hardware implementation.
80
81All entries are optional, and should only be created in a given driver
82if the chip has the feature.
83
72************ 84************
73* Voltages * 85* Voltages *
74************ 86************
75 87
76in[0-8]_min Voltage min value. 88in[0-*]_min Voltage min value.
77 Unit: millivolt 89 Unit: millivolt
78 Read/Write 90 RW
79 91
80in[0-8]_max Voltage max value. 92in[0-*]_max Voltage max value.
81 Unit: millivolt 93 Unit: millivolt
82 Read/Write 94 RW
83 95
84in[0-8]_input Voltage input value. 96in[0-*]_input Voltage input value.
85 Unit: millivolt 97 Unit: millivolt
86 Read only 98 RO
99 Voltage measured on the chip pin.
87 Actual voltage depends on the scaling resistors on the 100 Actual voltage depends on the scaling resistors on the
88 motherboard, as recommended in the chip datasheet. 101 motherboard, as recommended in the chip datasheet.
89 This varies by chip and by motherboard. 102 This varies by chip and by motherboard.
90 Because of this variation, values are generally NOT scaled 103 Because of this variation, values are generally NOT scaled
91 by the chip driver, and must be done by the application. 104 by the chip driver, and must be done by the application.
92 However, some drivers (notably lm87 and via686a) 105 However, some drivers (notably lm87 and via686a)
93 do scale, with various degrees of success. 106 do scale, because of internal resistors built into a chip.
94 These drivers will output the actual voltage. 107 These drivers will output the actual voltage.
95 108
96 Typical usage: 109 Typical usage:
@@ -104,58 +117,72 @@ in[0-8]_input Voltage input value.
104 in7_* varies 117 in7_* varies
105 in8_* varies 118 in8_* varies
106 119
107cpu[0-1]_vid CPU core reference voltage. 120cpu[0-*]_vid CPU core reference voltage.
108 Unit: millivolt 121 Unit: millivolt
109 Read only. 122 RO
110 Not always correct. 123 Not always correct.
111 124
112vrm Voltage Regulator Module version number. 125vrm Voltage Regulator Module version number.
113 Read only. 126 RW (but changing it should no more be necessary)
114 Two digit number, first is major version, second is 127 Originally the VRM standard version multiplied by 10, but now
115 minor version. 128 an arbitrary number, as not all standards have a version
129 number.
116 Affects the way the driver calculates the CPU core reference 130 Affects the way the driver calculates the CPU core reference
117 voltage from the vid pins. 131 voltage from the vid pins.
118 132
133Also see the Alarms section for status flags associated with voltages.
134
119 135
120******** 136********
121* Fans * 137* Fans *
122******** 138********
123 139
124fan[1-3]_min Fan minimum value 140fan[1-*]_min Fan minimum value
125 Unit: revolution/min (RPM) 141 Unit: revolution/min (RPM)
126 Read/Write. 142 RW
127 143
128fan[1-3]_input Fan input value. 144fan[1-*]_input Fan input value.
129 Unit: revolution/min (RPM) 145 Unit: revolution/min (RPM)
130 Read only. 146 RO
131 147
132fan[1-3]_div Fan divisor. 148fan[1-*]_div Fan divisor.
133 Integer value in powers of two (1, 2, 4, 8, 16, 32, 64, 128). 149 Integer value in powers of two (1, 2, 4, 8, 16, 32, 64, 128).
150 RW
134 Some chips only support values 1, 2, 4 and 8. 151 Some chips only support values 1, 2, 4 and 8.
135 Note that this is actually an internal clock divisor, which 152 Note that this is actually an internal clock divisor, which
136 affects the measurable speed range, not the read value. 153 affects the measurable speed range, not the read value.
137 154
155Also see the Alarms section for status flags associated with fans.
156
157
138******* 158*******
139* PWM * 159* PWM *
140******* 160*******
141 161
142pwm[1-3] Pulse width modulation fan control. 162pwm[1-*] Pulse width modulation fan control.
143 Integer value in the range 0 to 255 163 Integer value in the range 0 to 255
144 Read/Write 164 RW
145 255 is max or 100%. 165 255 is max or 100%.
146 166
147pwm[1-3]_enable 167pwm[1-*]_enable
148 Switch PWM on and off. 168 Switch PWM on and off.
149 Not always present even if fan*_pwm is. 169 Not always present even if fan*_pwm is.
150 0 to turn off 170 0: turn off
151 1 to turn on in manual mode 171 1: turn on in manual mode
152 2 to turn on in automatic mode 172 2+: turn on in automatic mode
153 Read/Write 173 Check individual chip documentation files for automatic mode details.
174 RW
175
176pwm[1-*]_mode
177 0: DC mode
178 1: PWM mode
179 RW
154 180
155pwm[1-*]_auto_channels_temp 181pwm[1-*]_auto_channels_temp
156 Select which temperature channels affect this PWM output in 182 Select which temperature channels affect this PWM output in
157 auto mode. Bitfield, 1 is temp1, 2 is temp2, 4 is temp3 etc... 183 auto mode. Bitfield, 1 is temp1, 2 is temp2, 4 is temp3 etc...
158 Which values are possible depend on the chip used. 184 Which values are possible depend on the chip used.
185 RW
159 186
160pwm[1-*]_auto_point[1-*]_pwm 187pwm[1-*]_auto_point[1-*]_pwm
161pwm[1-*]_auto_point[1-*]_temp 188pwm[1-*]_auto_point[1-*]_temp
@@ -163,6 +190,7 @@ pwm[1-*]_auto_point[1-*]_temp_hyst
163 Define the PWM vs temperature curve. Number of trip points is 190 Define the PWM vs temperature curve. Number of trip points is
164 chip-dependent. Use this for chips which associate trip points 191 chip-dependent. Use this for chips which associate trip points
165 to PWM output channels. 192 to PWM output channels.
193 RW
166 194
167OR 195OR
168 196
@@ -172,50 +200,57 @@ temp[1-*]_auto_point[1-*]_temp_hyst
172 Define the PWM vs temperature curve. Number of trip points is 200 Define the PWM vs temperature curve. Number of trip points is
173 chip-dependent. Use this for chips which associate trip points 201 chip-dependent. Use this for chips which associate trip points
174 to temperature channels. 202 to temperature channels.
203 RW
175 204
176 205
177**************** 206****************
178* Temperatures * 207* Temperatures *
179**************** 208****************
180 209
181temp[1-3]_type Sensor type selection. 210temp[1-*]_type Sensor type selection.
182 Integers 1 to 4 or thermistor Beta value (typically 3435) 211 Integers 1 to 4 or thermistor Beta value (typically 3435)
183 Read/Write. 212 RW
184 1: PII/Celeron Diode 213 1: PII/Celeron Diode
185 2: 3904 transistor 214 2: 3904 transistor
186 3: thermal diode 215 3: thermal diode
187 4: thermistor (default/unknown Beta) 216 4: thermistor (default/unknown Beta)
188 Not all types are supported by all chips 217 Not all types are supported by all chips
189 218
190temp[1-4]_max Temperature max value. 219temp[1-*]_max Temperature max value.
191 Unit: millidegree Celcius 220 Unit: millidegree Celsius (or millivolt, see below)
192 Read/Write value. 221 RW
193 222
194temp[1-3]_min Temperature min value. 223temp[1-*]_min Temperature min value.
195 Unit: millidegree Celcius 224 Unit: millidegree Celsius
196 Read/Write value. 225 RW
197 226
198temp[1-3]_max_hyst 227temp[1-*]_max_hyst
199 Temperature hysteresis value for max limit. 228 Temperature hysteresis value for max limit.
200 Unit: millidegree Celcius 229 Unit: millidegree Celsius
201 Must be reported as an absolute temperature, NOT a delta 230 Must be reported as an absolute temperature, NOT a delta
202 from the max value. 231 from the max value.
203 Read/Write value. 232 RW
204 233
205temp[1-4]_input Temperature input value. 234temp[1-*]_input Temperature input value.
206 Unit: millidegree Celcius 235 Unit: millidegree Celsius
207 Read only value. 236 RO
208 237
209temp[1-4]_crit Temperature critical value, typically greater than 238temp[1-*]_crit Temperature critical value, typically greater than
210 corresponding temp_max values. 239 corresponding temp_max values.
211 Unit: millidegree Celcius 240 Unit: millidegree Celsius
212 Read/Write value. 241 RW
213 242
214temp[1-2]_crit_hyst 243temp[1-*]_crit_hyst
215 Temperature hysteresis value for critical limit. 244 Temperature hysteresis value for critical limit.
216 Unit: millidegree Celcius 245 Unit: millidegree Celsius
217 Must be reported as an absolute temperature, NOT a delta 246 Must be reported as an absolute temperature, NOT a delta
218 from the critical value. 247 from the critical value.
248 RW
249
250temp[1-4]_offset
251 Temperature offset which is added to the temperature reading
252 by the chip.
253 Unit: millidegree Celsius
219 Read/Write value. 254 Read/Write value.
220 255
221 If there are multiple temperature sensors, temp1_* is 256 If there are multiple temperature sensors, temp1_* is
@@ -225,6 +260,17 @@ temp[1-2]_crit_hyst
225 itself, for example the thermal diode inside the CPU or 260 itself, for example the thermal diode inside the CPU or
226 a thermistor nearby. 261 a thermistor nearby.
227 262
263Some chips measure temperature using external thermistors and an ADC, and
264report the temperature measurement as a voltage. Converting this voltage
265back to a temperature (or the other way around for limits) requires
266mathematical functions not available in the kernel, so the conversion
267must occur in user space. For these chips, all temp* files described
268above should contain values expressed in millivolt instead of millidegree
269Celsius. In other words, such temperature channels are handled as voltage
270channels by the driver.
271
272Also see the Alarms section for status flags associated with temperatures.
273
228 274
229************ 275************
230* Currents * 276* Currents *
@@ -233,25 +279,88 @@ temp[1-2]_crit_hyst
233Note that no known chip provides current measurements as of writing, 279Note that no known chip provides current measurements as of writing,
234so this part is theoretical, so to say. 280so this part is theoretical, so to say.
235 281
236curr[1-n]_max Current max value 282curr[1-*]_max Current max value
237 Unit: milliampere 283 Unit: milliampere
238 Read/Write. 284 RW
239 285
240curr[1-n]_min Current min value. 286curr[1-*]_min Current min value.
241 Unit: milliampere 287 Unit: milliampere
242 Read/Write. 288 RW
243 289
244curr[1-n]_input Current input value 290curr[1-*]_input Current input value
245 Unit: milliampere 291 Unit: milliampere
246 Read only. 292 RO
247 293
248 294
249********* 295**********
250* Other * 296* Alarms *
251********* 297**********
298
299Each channel or limit may have an associated alarm file, containing a
300boolean value. 1 means than an alarm condition exists, 0 means no alarm.
301
302Usually a given chip will either use channel-related alarms, or
303limit-related alarms, not both. The driver should just reflect the hardware
304implementation.
305
306in[0-*]_alarm
307fan[1-*]_alarm
308temp[1-*]_alarm
309 Channel alarm
310 0: no alarm
311 1: alarm
312 RO
313
314OR
315
316in[0-*]_min_alarm
317in[0-*]_max_alarm
318fan[1-*]_min_alarm
319temp[1-*]_min_alarm
320temp[1-*]_max_alarm
321temp[1-*]_crit_alarm
322 Limit alarm
323 0: no alarm
324 1: alarm
325 RO
326
327Each input channel may have an associated fault file. This can be used
328to notify open diodes, unconnected fans etc. where the hardware
329supports it. When this boolean has value 1, the measurement for that
330channel should not be trusted.
331
332in[0-*]_input_fault
333fan[1-*]_input_fault
334temp[1-*]_input_fault
335 Input fault condition
336 0: no fault occured
337 1: fault condition
338 RO
339
340Some chips also offer the possibility to get beeped when an alarm occurs:
341
342beep_enable Master beep enable
343 0: no beeps
344 1: beeps
345 RW
346
347in[0-*]_beep
348fan[1-*]_beep
349temp[1-*]_beep
350 Channel beep
351 0: disable
352 1: enable
353 RW
354
355In theory, a chip could provide per-limit beep masking, but no such chip
356was seen so far.
357
358Old drivers provided a different, non-standard interface to alarms and
359beeps. These interface files are deprecated, but will be kept around
360for compatibility reasons:
252 361
253alarms Alarm bitmask. 362alarms Alarm bitmask.
254 Read only. 363 RO
255 Integer representation of one to four bytes. 364 Integer representation of one to four bytes.
256 A '1' bit means an alarm. 365 A '1' bit means an alarm.
257 Chips should be programmed for 'comparator' mode so that 366 Chips should be programmed for 'comparator' mode so that
@@ -259,35 +368,26 @@ alarms Alarm bitmask.
259 if it is still valid. 368 if it is still valid.
260 Generally a direct representation of a chip's internal 369 Generally a direct representation of a chip's internal
261 alarm registers; there is no standard for the position 370 alarm registers; there is no standard for the position
262 of individual bits. 371 of individual bits. For this reason, the use of this
372 interface file for new drivers is discouraged. Use
373 individual *_alarm and *_fault files instead.
263 Bits are defined in kernel/include/sensors.h. 374 Bits are defined in kernel/include/sensors.h.
264 375
265alarms_in Alarm bitmask relative to in (voltage) channels 376beep_mask Bitmask for beep.
266 Read only 377 Same format as 'alarms' with the same bit locations,
267 A '1' bit means an alarm, LSB corresponds to in0 and so on 378 use discouraged for the same reason. Use individual
268 Prefered to 'alarms' for newer chips 379 *_beep files instead.
269 380 RW
270alarms_fan Alarm bitmask relative to fan channels
271 Read only
272 A '1' bit means an alarm, LSB corresponds to fan1 and so on
273 Prefered to 'alarms' for newer chips
274
275alarms_temp Alarm bitmask relative to temp (temperature) channels
276 Read only
277 A '1' bit means an alarm, LSB corresponds to temp1 and so on
278 Prefered to 'alarms' for newer chips
279 381
280beep_enable Beep/interrupt enable
281 0 to disable.
282 1 to enable.
283 Read/Write
284 382
285beep_mask Bitmask for beep. 383*********
286 Same format as 'alarms' with the same bit locations. 384* Other *
287 Read/Write 385*********
288 386
289eeprom Raw EEPROM data in binary form. 387eeprom Raw EEPROM data in binary form.
290 Read only. 388 RO
291 389
292pec Enable or disable PEC (SMBus only) 390pec Enable or disable PEC (SMBus only)
293 Read/Write 391 0: disable
392 1: enable
393 RW
diff --git a/Documentation/hwmon/userspace-tools b/Documentation/hwmon/userspace-tools
index 2622aac65422..19900a8fe679 100644
--- a/Documentation/hwmon/userspace-tools
+++ b/Documentation/hwmon/userspace-tools
@@ -6,31 +6,32 @@ voltages, fans speed). They are often connected through an I2C bus, but some
6are also connected directly through the ISA bus. 6are also connected directly through the ISA bus.
7 7
8The kernel drivers make the data from the sensor chips available in the /sys 8The kernel drivers make the data from the sensor chips available in the /sys
9virtual filesystem. Userspace tools are then used to display or set or the 9virtual filesystem. Userspace tools are then used to display the measured
10data in a more friendly manner. 10values or configure the chips in a more friendly manner.
11 11
12Lm-sensors 12Lm-sensors
13---------- 13----------
14 14
15Core set of utilites that will allow you to obtain health information, 15Core set of utilities that will allow you to obtain health information,
16setup monitoring limits etc. You can get them on their homepage 16setup monitoring limits etc. You can get them on their homepage
17http://www.lm-sensors.nu/ or as a package from your Linux distribution. 17http://www.lm-sensors.nu/ or as a package from your Linux distribution.
18 18
19If from website: 19If from website:
20Get lmsensors from project web site. Please note, you need only userspace 20Get lm-sensors from project web site. Please note, you need only userspace
21part, so compile with "make user_install" target. 21part, so compile with "make user" and install with "make user_install".
22 22
23General hints to get things working: 23General hints to get things working:
24 24
250) get lm-sensors userspace utils 250) get lm-sensors userspace utils
261) compile all drivers in I2C section as modules in your kernel 261) compile all drivers in I2C and Hardware Monitoring sections as modules
27 in your kernel
272) run sensors-detect script, it will tell you what modules you need to load. 282) run sensors-detect script, it will tell you what modules you need to load.
283) load them and run "sensors" command, you should see some results. 293) load them and run "sensors" command, you should see some results.
294) fix sensors.conf, labels, limits, fan divisors 304) fix sensors.conf, labels, limits, fan divisors
305) if any more problems consult FAQ, or documentation 315) if any more problems consult FAQ, or documentation
31 32
32Other utilites 33Other utilities
33-------------- 34---------------
34 35
35If you want some graphical indicators of system health look for applications 36If you want some graphical indicators of system health look for applications
36like: gkrellm, ksensors, xsensors, wmtemp, wmsensors, wmgtemp, ksysguardd, 37like: gkrellm, ksensors, xsensors, wmtemp, wmsensors, wmgtemp, ksysguardd,
diff --git a/Documentation/hwmon/w83791d b/Documentation/hwmon/w83791d
new file mode 100644
index 000000000000..83a3836289c2
--- /dev/null
+++ b/Documentation/hwmon/w83791d
@@ -0,0 +1,113 @@
1Kernel driver w83791d
2=====================
3
4Supported chips:
5 * Winbond W83791D
6 Prefix: 'w83791d'
7 Addresses scanned: I2C 0x2c - 0x2f
8 Datasheet: http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/W83791Da.pdf
9
10Author: Charles Spirakis <bezaur@gmail.com>
11
12This driver was derived from the w83781d.c and w83792d.c source files.
13
14Credits:
15 w83781d.c:
16 Frodo Looijaard <frodol@dds.nl>,
17 Philip Edelbrock <phil@netroedge.com>,
18 and Mark Studebaker <mdsxyz123@yahoo.com>
19 w83792d.c:
20 Chunhao Huang <DZShen@Winbond.com.tw>,
21 Rudolf Marek <r.marek@sh.cvut.cz>
22
23Module Parameters
24-----------------
25
26* init boolean
27 (default 0)
28 Use 'init=1' to have the driver do extra software initializations.
29 The default behavior is to do the minimum initialization possible
30 and depend on the BIOS to properly setup the chip. If you know you
31 have a w83791d and you're having problems, try init=1 before trying
32 reset=1.
33
34* reset boolean
35 (default 0)
36 Use 'reset=1' to reset the chip (via index 0x40, bit 7). The default
37 behavior is no chip reset to preserve BIOS settings.
38
39* force_subclients=bus,caddr,saddr,saddr
40 This is used to force the i2c addresses for subclients of
41 a certain chip. Example usage is `force_subclients=0,0x2f,0x4a,0x4b'
42 to force the subclients of chip 0x2f on bus 0 to i2c addresses
43 0x4a and 0x4b.
44
45
46Description
47-----------
48
49This driver implements support for the Winbond W83791D chip.
50
51Detection of the chip can sometimes be foiled because it can be in an
52internal state that allows no clean access (Bank with ID register is not
53currently selected). If you know the address of the chip, use a 'force'
54parameter; this will put it into a more well-behaved state first.
55
56The driver implements three temperature sensors, five fan rotation speed
57sensors, and ten voltage sensors.
58
59Temperatures are measured in degrees Celsius and measurement resolution is 1
60degC for temp1 and 0.5 degC for temp2 and temp3. An alarm is triggered when
61the temperature gets higher than the Overtemperature Shutdown value; it stays
62on until the temperature falls below the Hysteresis value.
63
64Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
65triggered if the rotation speed has dropped below a programmable limit. Fan
66readings can be divided by a programmable divider (1, 2, 4, 8 for fan 1/2/3
67and 1, 2, 4, 8, 16, 32, 64 or 128 for fan 4/5) to give the readings more
68range or accuracy.
69
70Voltage sensors (also known as IN sensors) report their values in millivolts.
71An alarm is triggered if the voltage has crossed a programmable minimum
72or maximum limit.
73
74Alarms are provided as output from a "realtime status register". The
75following bits are defined:
76
77bit - alarm on:
780 - Vcore
791 - VINR0
802 - +3.3VIN
813 - 5VDD
824 - temp1
835 - temp2
846 - fan1
857 - fan2
868 - +12VIN
879 - -12VIN
8810 - -5VIN
8911 - fan3
9012 - chassis
9113 - temp3
9214 - VINR1
9315 - reserved
9416 - tart1
9517 - tart2
9618 - tart3
9719 - VSB
9820 - VBAT
9921 - fan4
10022 - fan5
10123 - reserved
102
103When an alarm goes off, you can be warned by a beeping signal through your
104computer speaker. It is possible to enable all beeping globally, or only
105the beeping for some alarms.
106
107The driver only reads the chip values each 3 seconds; reading them more
108often will do no harm, but will return 'old' values.
109
110W83791D TODO:
111---------------
112Provide a patch for per-file alarms as discussed on the mailing list
113Provide a patch for smart-fan control (still need appropriate motherboard/fans)
diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
index fd4b2712d570..e46c23458242 100644
--- a/Documentation/i2c/busses/i2c-i801
+++ b/Documentation/i2c/busses/i2c-i801
@@ -21,8 +21,7 @@ Authors:
21Module Parameters 21Module Parameters
22----------------- 22-----------------
23 23
24* force_addr: int 24None.
25 Forcibly enable the ICH at the given address. EXTREMELY DANGEROUS!
26 25
27 26
28Description 27Description
diff --git a/Documentation/i2c/busses/i2c-nforce2 b/Documentation/i2c/busses/i2c-nforce2
index d751282d9b2a..cd49c428a3ab 100644
--- a/Documentation/i2c/busses/i2c-nforce2
+++ b/Documentation/i2c/busses/i2c-nforce2
@@ -7,6 +7,8 @@ Supported adapters:
7 * nForce3 250Gb MCP 10de:00E4 7 * nForce3 250Gb MCP 10de:00E4
8 * nForce4 MCP 10de:0052 8 * nForce4 MCP 10de:0052
9 * nForce4 MCP-04 10de:0034 9 * nForce4 MCP-04 10de:0034
10 * nForce4 MCP51 10de:0264
11 * nForce4 MCP55 10de:0368
10 12
11Datasheet: not publically available, but seems to be similar to the 13Datasheet: not publically available, but seems to be similar to the
12 AMD-8111 SMBus 2.0 adapter. 14 AMD-8111 SMBus 2.0 adapter.
diff --git a/Documentation/i2c/busses/i2c-ocores b/Documentation/i2c/busses/i2c-ocores
new file mode 100644
index 000000000000..cfcebb10d14e
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-ocores
@@ -0,0 +1,51 @@
1Kernel driver i2c-ocores
2
3Supported adapters:
4 * OpenCores.org I2C controller by Richard Herveille (see datasheet link)
5 Datasheet: http://www.opencores.org/projects.cgi/web/i2c/overview
6
7Author: Peter Korsgaard <jacmet@sunsite.dk>
8
9Description
10-----------
11
12i2c-ocores is an i2c bus driver for the OpenCores.org I2C controller
13IP core by Richard Herveille.
14
15Usage
16-----
17
18i2c-ocores uses the platform bus, so you need to provide a struct
19platform_device with the base address and interrupt number. The
20dev.platform_data of the device should also point to a struct
21ocores_i2c_platform_data (see linux/i2c-ocores.h) describing the
22distance between registers and the input clock speed.
23
24E.G. something like:
25
26static struct resource ocores_resources[] = {
27 [0] = {
28 .start = MYI2C_BASEADDR,
29 .end = MYI2C_BASEADDR + 8,
30 .flags = IORESOURCE_MEM,
31 },
32 [1] = {
33 .start = MYI2C_IRQ,
34 .end = MYI2C_IRQ,
35 .flags = IORESOURCE_IRQ,
36 },
37};
38
39static struct ocores_i2c_platform_data myi2c_data = {
40 .regstep = 2, /* two bytes between registers */
41 .clock_khz = 50000, /* input clock of 50MHz */
42};
43
44static struct platform_device myi2c = {
45 .name = "ocores-i2c",
46 .dev = {
47 .platform_data = &myi2c_data,
48 },
49 .num_resources = ARRAY_SIZE(ocores_resources),
50 .resource = ocores_resources,
51};
diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4
index a1c8f581afed..921476333235 100644
--- a/Documentation/i2c/busses/i2c-piix4
+++ b/Documentation/i2c/busses/i2c-piix4
@@ -6,6 +6,8 @@ Supported adapters:
6 Datasheet: Publicly available at the Intel website 6 Datasheet: Publicly available at the Intel website
7 * ServerWorks OSB4, CSB5, CSB6 and HT-1000 southbridges 7 * ServerWorks OSB4, CSB5, CSB6 and HT-1000 southbridges
8 Datasheet: Only available via NDA from ServerWorks 8 Datasheet: Only available via NDA from ServerWorks
9 * ATI IXP southbridges IXP200, IXP300, IXP400
10 Datasheet: Not publicly available
9 * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge 11 * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge
10 Datasheet: Publicly available at the SMSC website http://www.smsc.com 12 Datasheet: Publicly available at the SMSC website http://www.smsc.com
11 13
@@ -21,8 +23,6 @@ Module Parameters
21 Forcibly enable the PIIX4. DANGEROUS! 23 Forcibly enable the PIIX4. DANGEROUS!
22* force_addr: int 24* force_addr: int
23 Forcibly enable the PIIX4 at the given address. EXTREMELY DANGEROUS! 25 Forcibly enable the PIIX4 at the given address. EXTREMELY DANGEROUS!
24* fix_hstcfg: int
25 Fix config register. Needed on some boards (Force CPCI735).
26 26
27 27
28Description 28Description
@@ -63,10 +63,36 @@ The PIIX4E is just an new version of the PIIX4; it is supported as well.
63The PIIX/PIIX3 does not implement an SMBus or I2C bus, so you can't use 63The PIIX/PIIX3 does not implement an SMBus or I2C bus, so you can't use
64this driver on those mainboards. 64this driver on those mainboards.
65 65
66The ServerWorks Southbridges, the Intel 440MX, and the Victory766 are 66The ServerWorks Southbridges, the Intel 440MX, and the Victory66 are
67identical to the PIIX4 in I2C/SMBus support. 67identical to the PIIX4 in I2C/SMBus support.
68 68
69A few OSB4 southbridges are known to be misconfigured by the BIOS. In this 69If you own Force CPCI735 motherboard or other OSB4 based systems you may need
70case, you have you use the fix_hstcfg module parameter. Do not use it 70to change the SMBus Interrupt Select register so the SMBus controller uses
71unless you know you have to, because in some cases it also breaks 71the SMI mode.
72configuration on southbridges that don't need it. 72
731) Use lspci command and locate the PCI device with the SMBus controller:
74 00:0f.0 ISA bridge: ServerWorks OSB4 South Bridge (rev 4f)
75 The line may vary for different chipsets. Please consult the driver source
76 for all possible PCI ids (and lspci -n to match them). Lets assume the
77 device is located at 00:0f.0.
782) Now you just need to change the value in 0xD2 register. Get it first with
79 command: lspci -xxx -s 00:0f.0
80 If the value is 0x3 then you need to change it to 0x1
81 setpci -s 00:0f.0 d2.b=1
82
83Please note that you don't need to do that in all cases, just when the SMBus is
84not working properly.
85
86
87Hardware-specific issues
88------------------------
89
90This driver will refuse to load on IBM systems with an Intel PIIX4 SMBus.
91Some of these machines have an RFID EEPROM (24RF08) connected to the SMBus,
92which can easily get corrupted due to a state machine bug. These are mostly
93Thinkpad laptops, but desktop systems may also be affected. We have no list
94of all affected systems, so the only safe solution was to prevent access to
95the SMBus on all IBM systems (detected using DMI data.)
96
97For additional information, read:
98http://www2.lm-sensors.nu/~lm78/cvs/lm_sensors2/README.thinkpad
diff --git a/Documentation/i2c/busses/scx200_acb b/Documentation/i2c/busses/scx200_acb
index f50e69981ec6..7c07883d4dfc 100644
--- a/Documentation/i2c/busses/scx200_acb
+++ b/Documentation/i2c/busses/scx200_acb
@@ -2,14 +2,31 @@ Kernel driver scx200_acb
2 2
3Author: Christer Weinigel <wingel@nano-system.com> 3Author: Christer Weinigel <wingel@nano-system.com>
4 4
5The driver supersedes the older, never merged driver named i2c-nscacb.
6
5Module Parameters 7Module Parameters
6----------------- 8-----------------
7 9
8* base: int 10* base: up to 4 ints
9 Base addresses for the ACCESS.bus controllers on SCx200 and SC1100 devices 11 Base addresses for the ACCESS.bus controllers on SCx200 and SC1100 devices
10 12
13 By default the driver uses two base addresses 0x820 and 0x840.
14 If you want only one base address, specify the second as 0 so as to
15 override this default.
16
11Description 17Description
12----------- 18-----------
13 19
14Enable the use of the ACCESS.bus controller on the Geode SCx200 and 20Enable the use of the ACCESS.bus controller on the Geode SCx200 and
15SC1100 processors and the CS5535 and CS5536 Geode companion devices. 21SC1100 processors and the CS5535 and CS5536 Geode companion devices.
22
23Device-specific notes
24---------------------
25
26The SC1100 WRAP boards are known to use base addresses 0x810 and 0x820.
27If the scx200_acb driver is built into the kernel, add the following
28parameter to your boot command line:
29 scx200_acb.base=0x810,0x820
30If the scx200_acb driver is built as a module, add the following line to
31the file /etc/modprobe.conf instead:
32 options scx200_acb base=0x810,0x820
diff --git a/Documentation/ia64/aliasing.txt b/Documentation/ia64/aliasing.txt
new file mode 100644
index 000000000000..38f9a52d1820
--- /dev/null
+++ b/Documentation/ia64/aliasing.txt
@@ -0,0 +1,208 @@
1 MEMORY ATTRIBUTE ALIASING ON IA-64
2
3 Bjorn Helgaas
4 <bjorn.helgaas@hp.com>
5 May 4, 2006
6
7
8MEMORY ATTRIBUTES
9
10 Itanium supports several attributes for virtual memory references.
11 The attribute is part of the virtual translation, i.e., it is
12 contained in the TLB entry. The ones of most interest to the Linux
13 kernel are:
14
15 WB Write-back (cacheable)
16 UC Uncacheable
17 WC Write-coalescing
18
19 System memory typically uses the WB attribute. The UC attribute is
20 used for memory-mapped I/O devices. The WC attribute is uncacheable
21 like UC is, but writes may be delayed and combined to increase
22 performance for things like frame buffers.
23
24 The Itanium architecture requires that we avoid accessing the same
25 page with both a cacheable mapping and an uncacheable mapping[1].
26
27 The design of the chipset determines which attributes are supported
28 on which regions of the address space. For example, some chipsets
29 support either WB or UC access to main memory, while others support
30 only WB access.
31
32MEMORY MAP
33
34 Platform firmware describes the physical memory map and the
35 supported attributes for each region. At boot-time, the kernel uses
36 the EFI GetMemoryMap() interface. ACPI can also describe memory
37 devices and the attributes they support, but Linux/ia64 currently
38 doesn't use this information.
39
40 The kernel uses the efi_memmap table returned from GetMemoryMap() to
41 learn the attributes supported by each region of physical address
42 space. Unfortunately, this table does not completely describe the
43 address space because some machines omit some or all of the MMIO
44 regions from the map.
45
46 The kernel maintains another table, kern_memmap, which describes the
47 memory Linux is actually using and the attribute for each region.
48 This contains only system memory; it does not contain MMIO space.
49
50 The kern_memmap table typically contains only a subset of the system
51 memory described by the efi_memmap. Linux/ia64 can't use all memory
52 in the system because of constraints imposed by the identity mapping
53 scheme.
54
55 The efi_memmap table is preserved unmodified because the original
56 boot-time information is required for kexec.
57
58KERNEL IDENTITY MAPPINGS
59
60 Linux/ia64 identity mappings are done with large pages, currently
61 either 16MB or 64MB, referred to as "granules." Cacheable mappings
62 are speculative[2], so the processor can read any location in the
63 page at any time, independent of the programmer's intentions. This
64 means that to avoid attribute aliasing, Linux can create a cacheable
65 identity mapping only when the entire granule supports cacheable
66 access.
67
68 Therefore, kern_memmap contains only full granule-sized regions that
69 can referenced safely by an identity mapping.
70
71 Uncacheable mappings are not speculative, so the processor will
72 generate UC accesses only to locations explicitly referenced by
73 software. This allows UC identity mappings to cover granules that
74 are only partially populated, or populated with a combination of UC
75 and WB regions.
76
77USER MAPPINGS
78
79 User mappings are typically done with 16K or 64K pages. The smaller
80 page size allows more flexibility because only 16K or 64K has to be
81 homogeneous with respect to memory attributes.
82
83POTENTIAL ATTRIBUTE ALIASING CASES
84
85 There are several ways the kernel creates new mappings:
86
87 mmap of /dev/mem
88
89 This uses remap_pfn_range(), which creates user mappings. These
90 mappings may be either WB or UC. If the region being mapped
91 happens to be in kern_memmap, meaning that it may also be mapped
92 by a kernel identity mapping, the user mapping must use the same
93 attribute as the kernel mapping.
94
95 If the region is not in kern_memmap, the user mapping should use
96 an attribute reported as being supported in the EFI memory map.
97
98 Since the EFI memory map does not describe MMIO on some
99 machines, this should use an uncacheable mapping as a fallback.
100
101 mmap of /sys/class/pci_bus/.../legacy_mem
102
103 This is very similar to mmap of /dev/mem, except that legacy_mem
104 only allows mmap of the one megabyte "legacy MMIO" area for a
105 specific PCI bus. Typically this is the first megabyte of
106 physical address space, but it may be different on machines with
107 several VGA devices.
108
109 "X" uses this to access VGA frame buffers. Using legacy_mem
110 rather than /dev/mem allows multiple instances of X to talk to
111 different VGA cards.
112
113 The /dev/mem mmap constraints apply.
114
115 However, since this is for mapping legacy MMIO space, WB access
116 does not make sense. This matters on machines without legacy
117 VGA support: these machines may have WB memory for the entire
118 first megabyte (or even the entire first granule).
119
120 On these machines, we could mmap legacy_mem as WB, which would
121 be safe in terms of attribute aliasing, but X has no way of
122 knowing that it is accessing regular memory, not a frame buffer,
123 so the kernel should fail the mmap rather than doing it with WB.
124
125 read/write of /dev/mem
126
127 This uses copy_from_user(), which implicitly uses a kernel
128 identity mapping. This is obviously safe for things in
129 kern_memmap.
130
131 There may be corner cases of things that are not in kern_memmap,
132 but could be accessed this way. For example, registers in MMIO
133 space are not in kern_memmap, but could be accessed with a UC
134 mapping. This would not cause attribute aliasing. But
135 registers typically can be accessed only with four-byte or
136 eight-byte accesses, and the copy_from_user() path doesn't allow
137 any control over the access size, so this would be dangerous.
138
139 ioremap()
140
141 This returns a kernel identity mapping for use inside the
142 kernel.
143
144 If the region is in kern_memmap, we should use the attribute
145 specified there. Otherwise, if the EFI memory map reports that
146 the entire granule supports WB, we should use that (granules
147 that are partially reserved or occupied by firmware do not appear
148 in kern_memmap). Otherwise, we should use a UC mapping.
149
150PAST PROBLEM CASES
151
152 mmap of various MMIO regions from /dev/mem by "X" on Intel platforms
153
154 The EFI memory map may not report these MMIO regions.
155
156 These must be allowed so that X will work. This means that
157 when the EFI memory map is incomplete, every /dev/mem mmap must
158 succeed. It may create either WB or UC user mappings, depending
159 on whether the region is in kern_memmap or the EFI memory map.
160
161 mmap of 0x0-0xA0000 /dev/mem by "hwinfo" on HP sx1000 with VGA enabled
162
163 See https://bugzilla.novell.com/show_bug.cgi?id=140858.
164
165 The EFI memory map reports the following attributes:
166 0x00000-0x9FFFF WB only
167 0xA0000-0xBFFFF UC only (VGA frame buffer)
168 0xC0000-0xFFFFF WB only
169
170 This mmap is done with user pages, not kernel identity mappings,
171 so it is safe to use WB mappings.
172
173 The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000,
174 which will use a granule-sized UC mapping covering 0-0xFFFFF. This
175 granule covers some WB-only memory, but since UC is non-speculative,
176 the processor will never generate an uncacheable reference to the
177 WB-only areas unless the driver explicitly touches them.
178
179 mmap of 0x0-0xFFFFF legacy_mem by "X"
180
181 If the EFI memory map reports this entire range as WB, there
182 is no VGA MMIO hole, and the mmap should fail or be done with
183 a WB mapping.
184
185 There's no easy way for X to determine whether the 0xA0000-0xBFFFF
186 region is a frame buffer or just memory, so I think it's best to
187 just fail this mmap request rather than using a WB mapping. As
188 far as I know, there's no need to map legacy_mem with WB
189 mappings.
190
191 Otherwise, a UC mapping of the entire region is probably safe.
192 The VGA hole means the region will not be in kern_memmap. The
193 HP sx1000 chipset doesn't support UC access to the memory surrounding
194 the VGA hole, but X doesn't need that area anyway and should not
195 reference it.
196
197 mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled
198
199 The EFI memory map reports the following attributes:
200 0x00000-0xFFFFF WB only (no VGA MMIO hole)
201
202 This is a special case of the previous case, and the mmap should
203 fail for the same reason as above.
204
205NOTES
206
207 [1] SDM rev 2.2, vol 2, sec 4.4.1.
208 [2] SDM rev 2.2, vol 2, sec 4.4.6.
diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
index 5c5a4ccce76a..187035560d7f 100644
--- a/Documentation/infiniband/ipoib.txt
+++ b/Documentation/infiniband/ipoib.txt
@@ -1,10 +1,10 @@
1IP OVER INFINIBAND 1IP OVER INFINIBAND
2 2
3 The ib_ipoib driver is an implementation of the IP over InfiniBand 3 The ib_ipoib driver is an implementation of the IP over InfiniBand
4 protocol as specified by the latest Internet-Drafts issued by the 4 protocol as specified by RFC 4391 and 4392, issued by the IETF ipoib
5 IETF ipoib working group. It is a "native" implementation in the 5 working group. It is a "native" implementation in the sense of
6 sense of setting the interface type to ARPHRD_INFINIBAND and the 6 setting the interface type to ARPHRD_INFINIBAND and the hardware
7 hardware address length to 20 (earlier proprietary implementations 7 address length to 20 (earlier proprietary implementations
8 masqueraded to the kernel as ethernet interfaces). 8 masqueraded to the kernel as ethernet interfaces).
9 9
10Partitions and P_Keys 10Partitions and P_Keys
@@ -53,3 +53,7 @@ References
53 53
54 IETF IP over InfiniBand (ipoib) Working Group 54 IETF IP over InfiniBand (ipoib) Working Group
55 http://ietf.org/html.charters/ipoib-charter.html 55 http://ietf.org/html.charters/ipoib-charter.html
56 Transmission of IP over InfiniBand (IPoIB) (RFC 4391)
57 http://ietf.org/rfc/rfc4391.txt
58 IP over InfiniBand (IPoIB) Architecture (RFC 4392)
59 http://ietf.org/rfc/rfc4392.txt
diff --git a/Documentation/initrd.txt b/Documentation/initrd.txt
index 7de1c80cd719..b1b6440237a6 100644
--- a/Documentation/initrd.txt
+++ b/Documentation/initrd.txt
@@ -67,8 +67,7 @@ initrd adds the following new options:
67 as the last process has closed it, all data is freed and /dev/initrd 67 as the last process has closed it, all data is freed and /dev/initrd
68 can't be opened anymore. 68 can't be opened anymore.
69 69
70 root=/dev/ram0 (without devfs) 70 root=/dev/ram0
71 root=/dev/rd/0 (with devfs)
72 71
73 initrd is mounted as root, and the normal boot procedure is followed, 72 initrd is mounted as root, and the normal boot procedure is followed,
74 with the RAM disk still mounted as root. 73 with the RAM disk still mounted as root.
@@ -90,8 +89,7 @@ you're building an install floppy), the root file system creation
90procedure should create the /initrd directory. 89procedure should create the /initrd directory.
91 90
92If initrd will not be mounted in some cases, its content is still 91If initrd will not be mounted in some cases, its content is still
93accessible if the following device has been created (note that this 92accessible if the following device has been created:
94does not work if using devfs):
95 93
96# mknod /dev/initrd b 1 250 94# mknod /dev/initrd b 1 250
97# chmod 400 /dev/initrd 95# chmod 400 /dev/initrd
@@ -119,8 +117,7 @@ We'll describe the loopback device method:
119 (if space is critical, you may want to use the Minix FS instead of Ext2) 117 (if space is critical, you may want to use the Minix FS instead of Ext2)
120 3) mount the file system, e.g. 118 3) mount the file system, e.g.
121 # mount -t ext2 -o loop initrd /mnt 119 # mount -t ext2 -o loop initrd /mnt
122 4) create the console device (not necessary if using devfs, but it can't 120 4) create the console device:
123 hurt to do it anyway):
124 # mkdir /mnt/dev 121 # mkdir /mnt/dev
125 # mknod /mnt/dev/console c 5 1 122 # mknod /mnt/dev/console c 5 1
126 5) copy all the files that are needed to properly use the initrd 123 5) copy all the files that are needed to properly use the initrd
@@ -152,12 +149,7 @@ have to be given:
152 149
153 root=/dev/ram0 init=/linuxrc rw 150 root=/dev/ram0 init=/linuxrc rw
154 151
155if not using devfs, or 152(rw is only necessary if writing to the initrd file system.)
156
157 root=/dev/rd/0 init=/linuxrc rw
158
159if using devfs. (rw is only necessary if writing to the initrd file
160system.)
161 153
162With LOADLIN, you simply execute 154With LOADLIN, you simply execute
163 155
@@ -217,9 +209,9 @@ following command:
217# exec chroot . what-follows <dev/console >dev/console 2>&1 209# exec chroot . what-follows <dev/console >dev/console 2>&1
218 210
219Where what-follows is a program under the new root, e.g. /sbin/init 211Where what-follows is a program under the new root, e.g. /sbin/init
220If the new root file system will be used with devfs and has no valid 212If the new root file system will be used with udev and has no valid
221/dev directory, devfs must be mounted before invoking chroot in order to 213/dev directory, udev must be initialized before invoking chroot in order
222provide /dev/console. 214to provide /dev/console.
223 215
224Note: implementation details of pivot_root may change with time. In order 216Note: implementation details of pivot_root may change with time. In order
225to ensure compatibility, the following points should be observed: 217to ensure compatibility, the following points should be observed:
@@ -236,7 +228,7 @@ Now, the initrd can be unmounted and the memory allocated by the RAM
236disk can be freed: 228disk can be freed:
237 229
238# umount /initrd 230# umount /initrd
239# blockdev --flushbufs /dev/ram0 # /dev/rd/0 if using devfs 231# blockdev --flushbufs /dev/ram0
240 232
241It is also possible to use initrd with an NFS-mounted root, see the 233It is also possible to use initrd with an NFS-mounted root, see the
242pivot_root(8) man page for details. 234pivot_root(8) man page for details.
diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt
index 171a44ebd939..edc04d74ae23 100644
--- a/Documentation/ioctl-number.txt
+++ b/Documentation/ioctl-number.txt
@@ -85,7 +85,9 @@ Code Seq# Include File Comments
85 <mailto:maassen@uni-freiburg.de> 85 <mailto:maassen@uni-freiburg.de>
86'C' all linux/soundcard.h 86'C' all linux/soundcard.h
87'D' all asm-s390/dasd.h 87'D' all asm-s390/dasd.h
88'E' all linux/input.h
88'F' all linux/fb.h 89'F' all linux/fb.h
90'H' all linux/hiddev.h
89'I' all linux/isdn.h 91'I' all linux/isdn.h
90'J' 00-1F drivers/scsi/gdth_ioctl.h 92'J' 00-1F drivers/scsi/gdth_ioctl.h
91'K' all linux/kd.h 93'K' all linux/kd.h
@@ -117,7 +119,6 @@ Code Seq# Include File Comments
117'c' 00-7F linux/comstats.h conflict! 119'c' 00-7F linux/comstats.h conflict!
118'c' 00-7F linux/coda.h conflict! 120'c' 00-7F linux/coda.h conflict!
119'd' 00-FF linux/char/drm/drm/h conflict! 121'd' 00-FF linux/char/drm/drm/h conflict!
120'd' 00-1F linux/devfs_fs.h conflict!
121'd' 00-DF linux/video_decoder.h conflict! 122'd' 00-DF linux/video_decoder.h conflict!
122'd' F0-FF linux/digi1.h 123'd' F0-FF linux/digi1.h
123'e' all linux/digi1.h conflict! 124'e' all linux/digi1.h conflict!
diff --git a/Documentation/irqflags-tracing.txt b/Documentation/irqflags-tracing.txt
new file mode 100644
index 000000000000..6a444877ee0b
--- /dev/null
+++ b/Documentation/irqflags-tracing.txt
@@ -0,0 +1,57 @@
1IRQ-flags state tracing
2
3started by Ingo Molnar <mingo@redhat.com>
4
5the "irq-flags tracing" feature "traces" hardirq and softirq state, in
6that it gives interested subsystems an opportunity to be notified of
7every hardirqs-off/hardirqs-on, softirqs-off/softirqs-on event that
8happens in the kernel.
9
10CONFIG_TRACE_IRQFLAGS_SUPPORT is needed for CONFIG_PROVE_SPIN_LOCKING
11and CONFIG_PROVE_RW_LOCKING to be offered by the generic lock debugging
12code. Otherwise only CONFIG_PROVE_MUTEX_LOCKING and
13CONFIG_PROVE_RWSEM_LOCKING will be offered on an architecture - these
14are locking APIs that are not used in IRQ context. (the one exception
15for rwsems is worked around)
16
17architecture support for this is certainly not in the "trivial"
18category, because lots of lowlevel assembly code deal with irq-flags
19state changes. But an architecture can be irq-flags-tracing enabled in a
20rather straightforward and risk-free manner.
21
22Architectures that want to support this need to do a couple of
23code-organizational changes first:
24
25- move their irq-flags manipulation code from their asm/system.h header
26 to asm/irqflags.h
27
28- rename local_irq_disable()/etc to raw_local_irq_disable()/etc. so that
29 the linux/irqflags.h code can inject callbacks and can construct the
30 real local_irq_disable()/etc APIs.
31
32- add and enable TRACE_IRQFLAGS_SUPPORT in their arch level Kconfig file
33
34and then a couple of functional changes are needed as well to implement
35irq-flags-tracing support:
36
37- in lowlevel entry code add (build-conditional) calls to the
38 trace_hardirqs_off()/trace_hardirqs_on() functions. The lock validator
39 closely guards whether the 'real' irq-flags matches the 'virtual'
40 irq-flags state, and complains loudly (and turns itself off) if the
41 two do not match. Usually most of the time for arch support for
42 irq-flags-tracing is spent in this state: look at the lockdep
43 complaint, try to figure out the assembly code we did not cover yet,
44 fix and repeat. Once the system has booted up and works without a
45 lockdep complaint in the irq-flags-tracing functions arch support is
46 complete.
47- if the architecture has non-maskable interrupts then those need to be
48 excluded from the irq-tracing [and lock validation] mechanism via
49 lockdep_off()/lockdep_on().
50
51in general there is no risk from having an incomplete irq-flags-tracing
52implementation in an architecture: lockdep will detect that and will
53turn itself off. I.e. the lock validator will still be reliable. There
54should be no crashes due to irq-tracing bugs. (except if the assembly
55changes break other code by modifying conditions or registers that
56shouldnt be)
57
diff --git a/Documentation/isdn/README.gigaset b/Documentation/isdn/README.gigaset
index 85a64defd385..fa0d4cca964a 100644
--- a/Documentation/isdn/README.gigaset
+++ b/Documentation/isdn/README.gigaset
@@ -124,7 +124,8 @@ GigaSet 307x Device Driver
124 124
125 You can use some configuration tool of your distribution to configure this 125 You can use some configuration tool of your distribution to configure this
126 "modem" or configure pppd/wvdial manually. There are some example ppp 126 "modem" or configure pppd/wvdial manually. There are some example ppp
127 configuration files and chat scripts in the gigaset-VERSION/ppp directory. 127 configuration files and chat scripts in the gigaset-VERSION/ppp directory
128 in the driver packages from http://sourceforge.net/projects/gigaset307x/.
128 Please note that the USB drivers are not able to change the state of the 129 Please note that the USB drivers are not able to change the state of the
129 control lines (the M105 driver can be configured to use some undocumented 130 control lines (the M105 driver can be configured to use some undocumented
130 control requests, if you really need the control lines, though). This means 131 control requests, if you really need the control lines, though). This means
@@ -164,8 +165,8 @@ GigaSet 307x Device Driver
164 165
165 If you want both of these at once, you are out of luck. 166 If you want both of these at once, you are out of luck.
166 167
167 You can also use /sys/module/<name>/parameters/cidmode for changing 168 You can also use /sys/class/tty/ttyGxy/cidmode for changing the CID mode
168 the CID mode setting (<name> is usb_gigaset or bas_gigaset). 169 setting (ttyGxy is ttyGU0 or ttyGB0).
169 170
170 171
1713. Troubleshooting 1723. Troubleshooting
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index a9c00facdf40..14ef3868a328 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -1123,6 +1123,14 @@ The top Makefile exports the following variables:
1123 $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE). The user may 1123 $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE). The user may
1124 override this value on the command line if desired. 1124 override this value on the command line if desired.
1125 1125
1126 INSTALL_MOD_STRIP
1127
1128 If this variable is specified, will cause modules to be stripped
1129 after they are installed. If INSTALL_MOD_STRIP is '1', then the
1130 default option --strip-debug will be used. Otherwise,
1131 INSTALL_MOD_STRIP will used as the option(s) to the strip command.
1132
1133
1126=== 8 Makefile language 1134=== 8 Makefile language
1127 1135
1128The kernel Makefiles are designed to run with GNU Make. The Makefiles 1136The kernel Makefiles are designed to run with GNU Make. The Makefiles
diff --git a/Documentation/kdump/gdbmacros.txt b/Documentation/kdump/gdbmacros.txt
index dcf5580380ab..9b9b454b048a 100644
--- a/Documentation/kdump/gdbmacros.txt
+++ b/Documentation/kdump/gdbmacros.txt
@@ -175,7 +175,7 @@ end
175document trapinfo 175document trapinfo
176 Run info threads and lookup pid of thread #1 176 Run info threads and lookup pid of thread #1
177 'trapinfo <pid>' will tell you by which trap & possibly 177 'trapinfo <pid>' will tell you by which trap & possibly
178 addresthe kernel paniced. 178 address the kernel panicked.
179end 179end
180 180
181 181
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 212cf3c21abf..08bafa8c1caa 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -1,155 +1,325 @@
1Documentation for kdump - the kexec-based crash dumping solution 1================================================================
2Documentation for Kdump - The kexec-based Crash Dumping Solution
2================================================================ 3================================================================
3 4
4DESIGN 5This document includes overview, setup and installation, and analysis
5====== 6information.
6 7
7Kdump uses kexec to reboot to a second kernel whenever a dump needs to be 8Overview
8taken. This second kernel is booted with very little memory. The first kernel 9========
9reserves the section of memory that the second kernel uses. This ensures that
10on-going DMA from the first kernel does not corrupt the second kernel.
11 10
12All the necessary information about Core image is encoded in ELF format and 11Kdump uses kexec to quickly boot to a dump-capture kernel whenever a
13stored in reserved area of memory before crash. Physical address of start of 12dump of the system kernel's memory needs to be taken (for example, when
14ELF header is passed to new kernel through command line parameter elfcorehdr=. 13the system panics). The system kernel's memory image is preserved across
14the reboot and is accessible to the dump-capture kernel.
15 15
16On i386, the first 640 KB of physical memory is needed to boot, irrespective 16You can use common Linux commands, such as cp and scp, to copy the
17of where the kernel loads. Hence, this region is backed up by kexec just before 17memory image to a dump file on the local disk, or across the network to
18rebooting into the new kernel. 18a remote system.
19 19
20In the second kernel, "old memory" can be accessed in two ways. 20Kdump and kexec are currently supported on the x86, x86_64, and ppc64
21architectures.
21 22
22- The first one is through a /dev/oldmem device interface. A capture utility 23When the system kernel boots, it reserves a small section of memory for
23 can read the device file and write out the memory in raw format. This is raw 24the dump-capture kernel. This ensures that ongoing Direct Memory Access
24 dump of memory and analysis/capture tool should be intelligent enough to 25(DMA) from the system kernel does not corrupt the dump-capture kernel.
25 determine where to look for the right information. ELF headers (elfcorehdr=) 26The kexec -p command loads the dump-capture kernel into this reserved
26 can become handy here. 27memory.
27 28
28- The second interface is through /proc/vmcore. This exports the dump as an ELF 29On x86 machines, the first 640 KB of physical memory is needed to boot,
29 format file which can be written out using any file copy command 30regardless of where the kernel loads. Therefore, kexec backs up this
30 (cp, scp, etc). Further, gdb can be used to perform limited debugging on 31region just before rebooting into the dump-capture kernel.
31 the dump file. This method ensures methods ensure that there is correct
32 ordering of the dump pages (corresponding to the first 640 KB that has been
33 relocated).
34 32
35SETUP 33All of the necessary information about the system kernel's core image is
36===== 34encoded in the ELF format, and stored in a reserved area of memory
35before a crash. The physical address of the start of the ELF header is
36passed to the dump-capture kernel through the elfcorehdr= boot
37parameter.
38
39With the dump-capture kernel, you can access the memory image, or "old
40memory," in two ways:
41
42- Through a /dev/oldmem device interface. A capture utility can read the
43 device file and write out the memory in raw format. This is a raw dump
44 of memory. Analysis and capture tools must be intelligent enough to
45 determine where to look for the right information.
46
47- Through /proc/vmcore. This exports the dump as an ELF-format file that
48 you can write out using file copy commands such as cp or scp. Further,
49 you can use analysis tools such as the GNU Debugger (GDB) and the Crash
50 tool to debug the dump file. This method ensures that the dump pages are
51 correctly ordered.
52
53
54Setup and Installation
55======================
56
57Install kexec-tools and the Kdump patch
58---------------------------------------
59
601) Login as the root user.
61
622) Download the kexec-tools user-space package from the following URL:
63
64 http://www.xmission.com/~ebiederm/files/kexec/kexec-tools-1.101.tar.gz
65
663) Unpack the tarball with the tar command, as follows:
67
68 tar xvpzf kexec-tools-1.101.tar.gz
69
704) Download the latest consolidated Kdump patch from the following URL:
71
72 http://lse.sourceforge.net/kdump/
73
74 (This location is being used until all the user-space Kdump patches
75 are integrated with the kexec-tools package.)
76
775) Change to the kexec-tools-1.101 directory, as follows:
78
79 cd kexec-tools-1.101
80
816) Apply the consolidated patch to the kexec-tools-1.101 source tree
82 with the patch command, as follows. (Modify the path to the downloaded
83 patch as necessary.)
84
85 patch -p1 < /path-to-kdump-patch/kexec-tools-1.101-kdump.patch
86
877) Configure the package, as follows:
88
89 ./configure
90
918) Compile the package, as follows:
92
93 make
94
959) Install the package, as follows:
96
97 make install
98
99
100Download and build the system and dump-capture kernels
101------------------------------------------------------
102
103Download the mainline (vanilla) kernel source code (2.6.13-rc1 or newer)
104from http://www.kernel.org. Two kernels must be built: a system kernel
105and a dump-capture kernel. Use the following steps to configure these
106kernels with the necessary kexec and Kdump features:
107
108System kernel
109-------------
110
1111) Enable "kexec system call" in "Processor type and features."
112
113 CONFIG_KEXEC=y
114
1152) Enable "sysfs file system support" in "Filesystem" -> "Pseudo
116 filesystems." This is usually enabled by default.
117
118 CONFIG_SYSFS=y
119
120 Note that "sysfs file system support" might not appear in the "Pseudo
121 filesystems" menu if "Configure standard kernel features (for small
122 systems)" is not enabled in "General Setup." In this case, check the
123 .config file itself to ensure that sysfs is turned on, as follows:
124
125 grep 'CONFIG_SYSFS' .config
126
1273) Enable "Compile the kernel with debug info" in "Kernel hacking."
128
129 CONFIG_DEBUG_INFO=Y
130
131 This causes the kernel to be built with debug symbols. The dump
132 analysis tools require a vmlinux with debug symbols in order to read
133 and analyze a dump file.
134
1354) Make and install the kernel and its modules. Update the boot loader
136 (such as grub, yaboot, or lilo) configuration files as necessary.
137
1385) Boot the system kernel with the boot parameter "crashkernel=Y@X",
139 where Y specifies how much memory to reserve for the dump-capture kernel
140 and X specifies the beginning of this reserved memory. For example,
141 "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory
142 starting at physical address 0x01000000 for the dump-capture kernel.
143
144 On x86 and x86_64, use "crashkernel=64M@16M".
145
146 On ppc64, use "crashkernel=128M@32M".
147
148
149The dump-capture kernel
150-----------------------
37 151
381) Download the upstream kexec-tools userspace package from 1521) Under "General setup," append "-kdump" to the current string in
39 http://www.xmission.com/~ebiederm/files/kexec/kexec-tools-1.101.tar.gz. 153 "Local version."
40 154
41 Apply the latest consolidated kdump patch on top of kexec-tools-1.101 1552) On x86, enable high memory support under "Processor type and
42 from http://lse.sourceforge.net/kdump/. This arrangment has been made 156 features":
43 till all the userspace patches supporting kdump are integrated with 157
44 upstream kexec-tools userspace. 158 CONFIG_HIGHMEM64G=y
45 159 or
462) Download and build the appropriate (2.6.13-rc1 onwards) vanilla kernels. 160 CONFIG_HIGHMEM4G
47 Two kernels need to be built in order to get this feature working. 161
48 Following are the steps to properly configure the two kernels specific 1623) On x86 and x86_64, disable symmetric multi-processing support
49 to kexec and kdump features: 163 under "Processor type and features":
50 164
51 A) First kernel or regular kernel: 165 CONFIG_SMP=n
52 ---------------------------------- 166 (If CONFIG_SMP=y, then specify maxcpus=1 on the kernel command line
53 a) Enable "kexec system call" feature (in Processor type and features). 167 when loading the dump-capture kernel, see section "Load the Dump-capture
54 CONFIG_KEXEC=y 168 Kernel".)
55 b) Enable "sysfs file system support" (in Pseudo filesystems). 169
56 CONFIG_SYSFS=y 1704) On ppc64, disable NUMA support and enable EMBEDDED support:
57 c) make 171
58 d) Boot into first kernel with the command line parameter "crashkernel=Y@X". 172 CONFIG_NUMA=n
59 Use appropriate values for X and Y. Y denotes how much memory to reserve 173 CONFIG_EMBEDDED=y
60 for the second kernel, and X denotes at what physical address the 174 CONFIG_EEH=N for the dump-capture kernel
61 reserved memory section starts. For example: "crashkernel=64M@16M". 175
62 1765) Enable "kernel crash dumps" support under "Processor type and
63 177 features":
64 B) Second kernel or dump capture kernel: 178
65 --------------------------------------- 179 CONFIG_CRASH_DUMP=y
66 a) For i386 architecture enable Highmem support 180
67 CONFIG_HIGHMEM=y 1816) Use a suitable value for "Physical address where the kernel is
68 b) Enable "kernel crash dumps" feature (under "Processor type and features") 182 loaded" (under "Processor type and features"). This only appears when
69 CONFIG_CRASH_DUMP=y 183 "kernel crash dumps" is enabled. By default this value is 0x1000000
70 c) Make sure a suitable value for "Physical address where the kernel is 184 (16MB). It should be the same as X in the "crashkernel=Y@X" boot
71 loaded" (under "Processor type and features"). By default this value 185 parameter discussed above.
72 is 0x1000000 (16MB) and it should be same as X (See option d above), 186
73 e.g., 16 MB or 0x1000000. 187 On x86 and x86_64, use "CONFIG_PHYSICAL_START=0x1000000".
74 CONFIG_PHYSICAL_START=0x1000000 188
75 d) Enable "/proc/vmcore support" (Optional, under "Pseudo filesystems"). 189 On ppc64 the value is automatically set at 32MB when
76 CONFIG_PROC_VMCORE=y 190 CONFIG_CRASH_DUMP is set.
77 191
783) After booting to regular kernel or first kernel, load the second kernel 1926) Optionally enable "/proc/vmcore support" under "Filesystems" ->
79 using the following command: 193 "Pseudo filesystems".
80 194
81 kexec -p <second-kernel> --args-linux --elf32-core-headers 195 CONFIG_PROC_VMCORE=y
82 --append="root=<root-dev> init 1 irqpoll maxcpus=1" 196 (CONFIG_PROC_VMCORE is set by default when CONFIG_CRASH_DUMP is selected.)
83 197
84 Notes: 1987) Make and install the kernel and its modules. DO NOT add this kernel
85 ====== 199 to the boot loader configuration files.
86 i) <second-kernel> has to be a vmlinux image ie uncompressed elf image. 200
87 bzImage will not work, as of now. 201
88 ii) --args-linux has to be speicfied as if kexec it loading an elf image, 202Load the Dump-capture Kernel
89 it needs to know that the arguments supplied are of linux type. 203============================
90 iii) By default ELF headers are stored in ELF64 format to support systems 204
91 with more than 4GB memory. Option --elf32-core-headers forces generation 205After booting to the system kernel, load the dump-capture kernel using
92 of ELF32 headers. The reason for this option being, as of now gdb can 206the following command:
93 not open vmcore file with ELF64 headers on a 32 bit systems. So ELF32 207
94 headers can be used if one has non-PAE systems and hence memory less 208 kexec -p <dump-capture-kernel> \
95 than 4GB. 209 --initrd=<initrd-for-dump-capture-kernel> --args-linux \
96 iv) Specify "irqpoll" as command line parameter. This reduces driver 210 --append="root=<root-dev> init 1 irqpoll"
97 initialization failures in second kernel due to shared interrupts. 211
98 v) <root-dev> needs to be specified in a format corresponding to the root 212
99 device name in the output of mount command. 213Notes on loading the dump-capture kernel:
100 vi) If you have built the drivers required to mount root file system as 214
101 modules in <second-kernel>, then, specify 215* <dump-capture-kernel> must be a vmlinux image (that is, an
102 --initrd=<initrd-for-second-kernel>. 216 uncompressed ELF image). bzImage does not work at this time.
103 vii) Specify maxcpus=1 as, if during first kernel run, if panic happens on 217
104 non-boot cpus, second kernel doesn't seem to be boot up all the cpus. 218* By default, the ELF headers are stored in ELF64 format to support
105 The other option is to always built the second kernel without SMP 219 systems with more than 4GB memory. The --elf32-core-headers option can
106 support ie CONFIG_SMP=n 220 be used to force the generation of ELF32 headers. This is necessary
107 221 because GDB currently cannot open vmcore files with ELF64 headers on
1084) After successfully loading the second kernel as above, if a panic occurs 222 32-bit systems. ELF32 headers can be used on non-PAE systems (that is,
109 system reboots into the second kernel. A module can be written to force 223 less than 4GB of memory).
110 the panic or "ALT-SysRq-c" can be used initiate a crash dump for testing 224
111 purposes. 225* The "irqpoll" boot parameter reduces driver initialization failures
112 226 due to shared interrupts in the dump-capture kernel.
1135) Once the second kernel has booted, write out the dump file using 227
228* You must specify <root-dev> in the format corresponding to the root
229 device name in the output of mount command.
230
231* "init 1" boots the dump-capture kernel into single-user mode without
232 networking. If you want networking, use "init 3."
233
234
235Kernel Panic
236============
237
238After successfully loading the dump-capture kernel as previously
239described, the system will reboot into the dump-capture kernel if a
240system crash is triggered. Trigger points are located in panic(),
241die(), die_nmi() and in the sysrq handler (ALT-SysRq-c).
242
243The following conditions will execute a crash trigger point:
244
245If a hard lockup is detected and "NMI watchdog" is configured, the system
246will boot into the dump-capture kernel ( die_nmi() ).
247
248If die() is called, and it happens to be a thread with pid 0 or 1, or die()
249is called inside interrupt context or die() is called and panic_on_oops is set,
250the system will boot into the dump-capture kernel.
251
252On powererpc systems when a soft-reset is generated, die() is called by all cpus and the system system will boot into the dump-capture kernel.
253
254For testing purposes, you can trigger a crash by using "ALT-SysRq-c",
255"echo c > /proc/sysrq-trigger or write a module to force the panic.
256
257Write Out the Dump File
258=======================
259
260After the dump-capture kernel is booted, write out the dump file with
261the following command:
114 262
115 cp /proc/vmcore <dump-file> 263 cp /proc/vmcore <dump-file>
116 264
117 Dump memory can also be accessed as a /dev/oldmem device for a linear/raw 265You can also access dumped memory as a /dev/oldmem device for a linear
118 view. To create the device, type: 266and raw view. To create the device, use the following command:
119 267
120 mknod /dev/oldmem c 1 12 268 mknod /dev/oldmem c 1 12
121 269
122 Use "dd" with suitable options for count, bs and skip to access specific 270Use the dd command with suitable options for count, bs, and skip to
123 portions of the dump. 271access specific portions of the dump.
124 272
125 Entire memory: dd if=/dev/oldmem of=oldmem.001 273To see the entire memory, use the following command:
126 274
275 dd if=/dev/oldmem of=oldmem.001
127 276
128ANALYSIS 277
278Analysis
129======== 279========
130Limited analysis can be done using gdb on the dump file copied out of
131/proc/vmcore. Use vmlinux built with -g and run
132 280
133 gdb vmlinux <dump-file> 281Before analyzing the dump image, you should reboot into a stable kernel.
282
283You can do limited analysis using GDB on the dump file copied out of
284/proc/vmcore. Use the debug vmlinux built with -g and run the following
285command:
286
287 gdb vmlinux <dump-file>
134 288
135Stack trace for the task on processor 0, register display, memory display 289Stack trace for the task on processor 0, register display, and memory
136work fine. 290display work fine.
137 291
138Note: gdb cannot analyse core files generated in ELF64 format for i386. 292Note: GDB cannot analyze core files generated in ELF64 format for x86.
293On systems with a maximum of 4GB of memory, you can generate
294ELF32-format headers using the --elf32-core-headers kernel option on the
295dump kernel.
139 296
140Latest "crash" (crash-4.0-2.18) as available on Dave Anderson's site 297You can also use the Crash utility to analyze dump files in Kdump
141http://people.redhat.com/~anderson/ works well with kdump format. 298format. Crash is available on Dave Anderson's site at the following URL:
142 299
300 http://people.redhat.com/~anderson/
301
302
303To Do
304=====
143 305
144TODO 3061) Provide a kernel pages filtering mechanism, so core file size is not
145==== 307 extreme on systems with huge memory banks.
1461) Provide a kernel pages filtering mechanism so that core file size is not
147 insane on systems having huge memory banks.
1482) Relocatable kernel can help in maintaining multiple kernels for crashdump
149 and same kernel as the first kernel can be used to capture the dump.
150 308
3092) Relocatable kernel can help in maintaining multiple kernels for
310 crash_dump, and the same kernel as the system kernel can be used to
311 capture the dump.
151 312
152CONTACT 313
314Contact
153======= 315=======
316
154Vivek Goyal (vgoyal@in.ibm.com) 317Vivek Goyal (vgoyal@in.ibm.com)
155Maneesh Soni (maneesh@in.ibm.com) 318Maneesh Soni (maneesh@in.ibm.com)
319
320
321Trademark
322=========
323
324Linux is a trademark of Linus Torvalds in the United States, other
325countries, or both.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index b3a6187e5305..149f62ba14a5 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -35,7 +35,6 @@ parameter is applicable:
35 APM Advanced Power Management support is enabled. 35 APM Advanced Power Management support is enabled.
36 AX25 Appropriate AX.25 support is enabled. 36 AX25 Appropriate AX.25 support is enabled.
37 CD Appropriate CD support is enabled. 37 CD Appropriate CD support is enabled.
38 DEVFS devfs support is enabled.
39 DRM Direct Rendering Management support is enabled. 38 DRM Direct Rendering Management support is enabled.
40 EDD BIOS Enhanced Disk Drive Services (EDD) is enabled 39 EDD BIOS Enhanced Disk Drive Services (EDD) is enabled
41 EFI EFI Partitioning (GPT) is enabled 40 EFI EFI Partitioning (GPT) is enabled
@@ -61,6 +60,7 @@ parameter is applicable:
61 MTD MTD support is enabled. 60 MTD MTD support is enabled.
62 NET Appropriate network support is enabled. 61 NET Appropriate network support is enabled.
63 NUMA NUMA support is enabled. 62 NUMA NUMA support is enabled.
63 GENERIC_TIME The generic timeofday code is enabled.
64 NFS Appropriate NFS support is enabled. 64 NFS Appropriate NFS support is enabled.
65 OSS OSS sound support is enabled. 65 OSS OSS sound support is enabled.
66 PARIDE The ParIDE subsystem is enabled. 66 PARIDE The ParIDE subsystem is enabled.
@@ -147,6 +147,9 @@ running once the system is up.
147 acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA 147 acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA
148 Format: <irq>,<irq>... 148 Format: <irq>,<irq>...
149 149
150 acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS
151 Format: To spoof as Windows 98: ="Microsoft Windows"
152
150 acpi_osi= [HW,ACPI] empty param disables _OSI 153 acpi_osi= [HW,ACPI] empty param disables _OSI
151 154
152 acpi_serialize [HW,ACPI] force serialization of AML methods 155 acpi_serialize [HW,ACPI] force serialization of AML methods
@@ -176,6 +179,11 @@ running once the system is up.
176 override platform specific driver. 179 override platform specific driver.
177 See also Documentation/acpi-hotkey.txt. 180 See also Documentation/acpi-hotkey.txt.
178 181
182 acpi_pm_good [IA-32,X86-64]
183 Override the pmtimer bug detection: force the kernel
184 to assume that this machine's pmtimer latches its value
185 and always returns good values.
186
179 enable_timer_pin_1 [i386,x86-64] 187 enable_timer_pin_1 [i386,x86-64]
180 Enable PIN 1 of APIC timer 188 Enable PIN 1 of APIC timer
181 Can be useful to work around chipset bugs 189 Can be useful to work around chipset bugs
@@ -338,10 +346,11 @@ running once the system is up.
338 Value can be changed at runtime via 346 Value can be changed at runtime via
339 /selinux/checkreqprot. 347 /selinux/checkreqprot.
340 348
341 clock= [BUGS=IA-32,HW] gettimeofday timesource override. 349 clock= [BUGS=IA-32, HW] gettimeofday clocksource override.
342 Forces specified timesource (if avaliable) to be used 350 [Deprecated]
343 when calculating gettimeofday(). If specicified 351 Forces specified clocksource (if avaliable) to be used
344 timesource is not avalible, it defaults to PIT. 352 when calculating gettimeofday(). If specified
353 clocksource is not avalible, it defaults to PIT.
345 Format: { pit | tsc | cyclone | pmtmr } 354 Format: { pit | tsc | cyclone | pmtmr }
346 355
347 disable_8254_timer 356 disable_8254_timer
@@ -426,13 +435,19 @@ running once the system is up.
426 435
427 debug [KNL] Enable kernel debugging (events log level). 436 debug [KNL] Enable kernel debugging (events log level).
428 437
438 debug_locks_verbose=
439 [KNL] verbose self-tests
440 Format=<0|1>
441 Print debugging info while doing the locking API
442 self-tests.
443 We default to 0 (no extra messages), setting it to
444 1 will print _a lot_ more information - normally
445 only useful to kernel developers.
446
429 decnet= [HW,NET] 447 decnet= [HW,NET]
430 Format: <area>[,<node>] 448 Format: <area>[,<node>]
431 See also Documentation/networking/decnet.txt. 449 See also Documentation/networking/decnet.txt.
432 450
433 devfs= [DEVFS]
434 See Documentation/filesystems/devfs/boot-options.
435
436 dhash_entries= [KNL] 451 dhash_entries= [KNL]
437 Set number of hash buckets for dentry cache. 452 Set number of hash buckets for dentry cache.
438 453
@@ -1402,6 +1417,15 @@ running once the system is up.
1402 If enabled at boot time, /selinux/disable can be used 1417 If enabled at boot time, /selinux/disable can be used
1403 later to disable prior to initial policy load. 1418 later to disable prior to initial policy load.
1404 1419
1420 selinux_compat_net =
1421 [SELINUX] Set initial selinux_compat_net flag value.
1422 Format: { "0" | "1" }
1423 0 -- use new secmark-based packet controls
1424 1 -- use legacy packet controls
1425 Default value is 0 (preferred).
1426 Value can be changed at runtime via
1427 /selinux/compat_net.
1428
1405 serialnumber [BUGS=IA-32] 1429 serialnumber [BUGS=IA-32]
1406 1430
1407 sg_def_reserved_size= [SCSI] 1431 sg_def_reserved_size= [SCSI]
@@ -1605,6 +1629,10 @@ running once the system is up.
1605 1629
1606 time Show timing data prefixed to each printk message line 1630 time Show timing data prefixed to each printk message line
1607 1631
1632 clocksource= [GENERIC_TIME] Override the default clocksource
1633 Override the default clocksource and use the clocksource
1634 with the name specified.
1635
1608 tipar.timeout= [HW,PPT] 1636 tipar.timeout= [HW,PPT]
1609 Set communications timeout in tenths of a second 1637 Set communications timeout in tenths of a second
1610 (default 15). 1638 (default 15).
@@ -1646,6 +1674,10 @@ running once the system is up.
1646 usbhid.mousepoll= 1674 usbhid.mousepoll=
1647 [USBHID] The interval which mice are to be polled at. 1675 [USBHID] The interval which mice are to be polled at.
1648 1676
1677 vdso= [IA-32]
1678 vdso=1: enable VDSO (default)
1679 vdso=0: disable VDSO mapping
1680
1649 video= [FB] Frame buffer configuration 1681 video= [FB] Frame buffer configuration
1650 See Documentation/fb/modedb.txt. 1682 See Documentation/fb/modedb.txt.
1651 1683
@@ -1662,9 +1694,14 @@ running once the system is up.
1662 decrease the size and leave more room for directly 1694 decrease the size and leave more room for directly
1663 mapped kernel RAM. 1695 mapped kernel RAM.
1664 1696
1665 vmhalt= [KNL,S390] 1697 vmhalt= [KNL,S390] Perform z/VM CP command after system halt.
1698 Format: <command>
1699
1700 vmpanic= [KNL,S390] Perform z/VM CP command after kernel panic.
1701 Format: <command>
1666 1702
1667 vmpoff= [KNL,S390] 1703 vmpoff= [KNL,S390] Perform z/VM CP command after power off.
1704 Format: <command>
1668 1705
1669 waveartist= [HW,OSS] 1706 waveartist= [HW,OSS]
1670 Format: <io>,<irq>,<dma>,<dma2> 1707 Format: <io>,<irq>,<dma>,<dma2>
diff --git a/Documentation/keys-request-key.txt b/Documentation/keys-request-key.txt
index 22488d791168..c1f64fdf84cb 100644
--- a/Documentation/keys-request-key.txt
+++ b/Documentation/keys-request-key.txt
@@ -3,16 +3,23 @@
3 =================== 3 ===================
4 4
5The key request service is part of the key retention service (refer to 5The key request service is part of the key retention service (refer to
6Documentation/keys.txt). This document explains more fully how that the 6Documentation/keys.txt). This document explains more fully how the requesting
7requesting algorithm works. 7algorithm works.
8 8
9The process starts by either the kernel requesting a service by calling 9The process starts by either the kernel requesting a service by calling
10request_key(): 10request_key*():
11 11
12 struct key *request_key(const struct key_type *type, 12 struct key *request_key(const struct key_type *type,
13 const char *description, 13 const char *description,
14 const char *callout_string); 14 const char *callout_string);
15 15
16or:
17
18 struct key *request_key_with_auxdata(const struct key_type *type,
19 const char *description,
20 const char *callout_string,
21 void *aux);
22
16Or by userspace invoking the request_key system call: 23Or by userspace invoking the request_key system call:
17 24
18 key_serial_t request_key(const char *type, 25 key_serial_t request_key(const char *type,
@@ -20,16 +27,26 @@ Or by userspace invoking the request_key system call:
20 const char *callout_info, 27 const char *callout_info,
21 key_serial_t dest_keyring); 28 key_serial_t dest_keyring);
22 29
23The main difference between the two access points is that the in-kernel 30The main difference between the access points is that the in-kernel interface
24interface does not need to link the key to a keyring to prevent it from being 31does not need to link the key to a keyring to prevent it from being immediately
25immediately destroyed. The kernel interface returns a pointer directly to the 32destroyed. The kernel interface returns a pointer directly to the key, and
26key, and it's up to the caller to destroy the key. 33it's up to the caller to destroy the key.
34
35The request_key_with_auxdata() call is like the in-kernel request_key() call,
36except that it permits auxiliary data to be passed to the upcaller (the default
37is NULL). This is only useful for those key types that define their own upcall
38mechanism rather than using /sbin/request-key.
27 39
28The userspace interface links the key to a keyring associated with the process 40The userspace interface links the key to a keyring associated with the process
29to prevent the key from going away, and returns the serial number of the key to 41to prevent the key from going away, and returns the serial number of the key to
30the caller. 42the caller.
31 43
32 44
45The following example assumes that the key types involved don't define their
46own upcall mechanisms. If they do, then those should be substituted for the
47forking and execution of /sbin/request-key.
48
49
33=========== 50===========
34THE PROCESS 51THE PROCESS
35=========== 52===========
@@ -40,8 +57,8 @@ A request proceeds in the following manner:
40 interface]. 57 interface].
41 58
42 (2) request_key() searches the process's subscribed keyrings to see if there's 59 (2) request_key() searches the process's subscribed keyrings to see if there's
43 a suitable key there. If there is, it returns the key. If there isn't, and 60 a suitable key there. If there is, it returns the key. If there isn't,
44 callout_info is not set, an error is returned. Otherwise the process 61 and callout_info is not set, an error is returned. Otherwise the process
45 proceeds to the next step. 62 proceeds to the next step.
46 63
47 (3) request_key() sees that A doesn't have the desired key yet, so it creates 64 (3) request_key() sees that A doesn't have the desired key yet, so it creates
@@ -62,7 +79,7 @@ A request proceeds in the following manner:
62 instantiation. 79 instantiation.
63 80
64 (7) The program may want to access another key from A's context (say a 81 (7) The program may want to access another key from A's context (say a
65 Kerberos TGT key). It just requests the appropriate key, and the keyring 82 Kerberos TGT key). It just requests the appropriate key, and the keyring
66 search notes that the session keyring has auth key V in its bottom level. 83 search notes that the session keyring has auth key V in its bottom level.
67 84
68 This will permit it to then search the keyrings of process A with the 85 This will permit it to then search the keyrings of process A with the
@@ -79,10 +96,11 @@ A request proceeds in the following manner:
79(10) The program then exits 0 and request_key() deletes key V and returns key 96(10) The program then exits 0 and request_key() deletes key V and returns key
80 U to the caller. 97 U to the caller.
81 98
82This also extends further. If key W (step 7 above) didn't exist, key W would be 99This also extends further. If key W (step 7 above) didn't exist, key W would
83created uninstantiated, another auth key (X) would be created (as per step 3) 100be created uninstantiated, another auth key (X) would be created (as per step
84and another copy of /sbin/request-key spawned (as per step 4); but the context 1013) and another copy of /sbin/request-key spawned (as per step 4); but the
85specified by auth key X will still be process A, as it was in auth key V. 102context specified by auth key X will still be process A, as it was in auth key
103V.
86 104
87This is because process A's keyrings can't simply be attached to 105This is because process A's keyrings can't simply be attached to
88/sbin/request-key at the appropriate places because (a) execve will discard two 106/sbin/request-key at the appropriate places because (a) execve will discard two
@@ -118,17 +136,17 @@ A search of any particular keyring proceeds in the following fashion:
118 136
119 (2) It considers all the non-keyring keys within that keyring and, if any key 137 (2) It considers all the non-keyring keys within that keyring and, if any key
120 matches the criteria specified, calls key_permission(SEARCH) on it to see 138 matches the criteria specified, calls key_permission(SEARCH) on it to see
121 if the key is allowed to be found. If it is, that key is returned; if 139 if the key is allowed to be found. If it is, that key is returned; if
122 not, the search continues, and the error code is retained if of higher 140 not, the search continues, and the error code is retained if of higher
123 priority than the one currently set. 141 priority than the one currently set.
124 142
125 (3) It then considers all the keyring-type keys in the keyring it's currently 143 (3) It then considers all the keyring-type keys in the keyring it's currently
126 searching. It calls key_permission(SEARCH) on each keyring, and if this 144 searching. It calls key_permission(SEARCH) on each keyring, and if this
127 grants permission, it recurses, executing steps (2) and (3) on that 145 grants permission, it recurses, executing steps (2) and (3) on that
128 keyring. 146 keyring.
129 147
130The process stops immediately a valid key is found with permission granted to 148The process stops immediately a valid key is found with permission granted to
131use it. Any error from a previous match attempt is discarded and the key is 149use it. Any error from a previous match attempt is discarded and the key is
132returned. 150returned.
133 151
134When search_process_keyrings() is invoked, it performs the following searches 152When search_process_keyrings() is invoked, it performs the following searches
@@ -153,7 +171,7 @@ The moment one succeeds, all pending errors are discarded and the found key is
153returned. 171returned.
154 172
155Only if all these fail does the whole thing fail with the highest priority 173Only if all these fail does the whole thing fail with the highest priority
156error. Note that several errors may have come from LSM. 174error. Note that several errors may have come from LSM.
157 175
158The error priority is: 176The error priority is:
159 177
diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index aaa01b0e3ee9..e373f0212843 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -19,6 +19,7 @@ This document has the following sections:
19 - Key overview 19 - Key overview
20 - Key service overview 20 - Key service overview
21 - Key access permissions 21 - Key access permissions
22 - SELinux support
22 - New procfs files 23 - New procfs files
23 - Userspace system call interface 24 - Userspace system call interface
24 - Kernel services 25 - Kernel services
@@ -232,6 +233,39 @@ For changing the ownership, group ID or permissions mask, being the owner of
232the key or having the sysadmin capability is sufficient. 233the key or having the sysadmin capability is sufficient.
233 234
234 235
236===============
237SELINUX SUPPORT
238===============
239
240The security class "key" has been added to SELinux so that mandatory access
241controls can be applied to keys created within various contexts. This support
242is preliminary, and is likely to change quite significantly in the near future.
243Currently, all of the basic permissions explained above are provided in SELinux
244as well; SELinux is simply invoked after all basic permission checks have been
245performed.
246
247The value of the file /proc/self/attr/keycreate influences the labeling of
248newly-created keys. If the contents of that file correspond to an SELinux
249security context, then the key will be assigned that context. Otherwise, the
250key will be assigned the current context of the task that invoked the key
251creation request. Tasks must be granted explicit permission to assign a
252particular context to newly-created keys, using the "create" permission in the
253key security class.
254
255The default keyrings associated with users will be labeled with the default
256context of the user if and only if the login programs have been instrumented to
257properly initialize keycreate during the login process. Otherwise, they will
258be labeled with the context of the login program itself.
259
260Note, however, that the default keyrings associated with the root user are
261labeled with the default kernel context, since they are created early in the
262boot process, before root has a chance to log in.
263
264The keyrings associated with new threads are each labeled with the context of
265their associated thread, and both session and process keyrings are handled
266similarly.
267
268
235================ 269================
236NEW PROCFS FILES 270NEW PROCFS FILES
237================ 271================
@@ -241,9 +275,17 @@ about the status of the key service:
241 275
242 (*) /proc/keys 276 (*) /proc/keys
243 277
244 This lists all the keys on the system, giving information about their 278 This lists the keys that are currently viewable by the task reading the
245 type, description and permissions. The payload of the key is not available 279 file, giving information about their type, description and permissions.
246 this way: 280 It is not possible to view the payload of the key this way, though some
281 information about it may be given.
282
283 The only keys included in the list are those that grant View permission to
284 the reading process whether or not it possesses them. Note that LSM
285 security checks are still performed, and may further filter out keys that
286 the current process is not authorised to view.
287
288 The contents of the file look like this:
247 289
248 SERIAL FLAGS USAGE EXPY PERM UID GID TYPE DESCRIPTION: SUMMARY 290 SERIAL FLAGS USAGE EXPY PERM UID GID TYPE DESCRIPTION: SUMMARY
249 00000001 I----- 39 perm 1f3f0000 0 0 keyring _uid_ses.0: 1/4 291 00000001 I----- 39 perm 1f3f0000 0 0 keyring _uid_ses.0: 1/4
@@ -271,7 +313,7 @@ about the status of the key service:
271 (*) /proc/key-users 313 (*) /proc/key-users
272 314
273 This file lists the tracking data for each user that has at least one key 315 This file lists the tracking data for each user that has at least one key
274 on the system. Such data includes quota information and statistics: 316 on the system. Such data includes quota information and statistics:
275 317
276 [root@andromeda root]# cat /proc/key-users 318 [root@andromeda root]# cat /proc/key-users
277 0: 46 45/45 1/100 13/10000 319 0: 46 45/45 1/100 13/10000
@@ -738,6 +780,17 @@ payload contents" for more information.
738 See also Documentation/keys-request-key.txt. 780 See also Documentation/keys-request-key.txt.
739 781
740 782
783(*) To search for a key, passing auxiliary data to the upcaller, call:
784
785 struct key *request_key_with_auxdata(const struct key_type *type,
786 const char *description,
787 const char *callout_string,
788 void *aux);
789
790 This is identical to request_key(), except that the auxiliary data is
791 passed to the key_type->request_key() op if it exists.
792
793
741(*) When it is no longer required, the key should be released using: 794(*) When it is no longer required, the key should be released using:
742 795
743 void key_put(struct key *key); 796 void key_put(struct key *key);
@@ -935,6 +988,16 @@ The structure has a number of fields, some of which are mandatory:
935 It is not safe to sleep in this method; the caller may hold spinlocks. 988 It is not safe to sleep in this method; the caller may hold spinlocks.
936 989
937 990
991 (*) void (*revoke)(struct key *key);
992
993 This method is optional. It is called to discard part of the payload
994 data upon a key being revoked. The caller will have the key semaphore
995 write-locked.
996
997 It is safe to sleep in this method, though care should be taken to avoid
998 a deadlock against the key semaphore.
999
1000
938 (*) void (*destroy)(struct key *key); 1001 (*) void (*destroy)(struct key *key);
939 1002
940 This method is optional. It is called to discard the payload data on a key 1003 This method is optional. It is called to discard the payload data on a key
@@ -979,6 +1042,24 @@ The structure has a number of fields, some of which are mandatory:
979 as might happen when the userspace buffer is accessed. 1042 as might happen when the userspace buffer is accessed.
980 1043
981 1044
1045 (*) int (*request_key)(struct key *key, struct key *authkey, const char *op,
1046 void *aux);
1047
1048 This method is optional. If provided, request_key() and
1049 request_key_with_auxdata() will invoke this function rather than
1050 upcalling to /sbin/request-key to operate upon a key of this type.
1051
1052 The aux parameter is as passed to request_key_with_auxdata() or is NULL
1053 otherwise. Also passed are the key to be operated upon, the
1054 authorisation key for this operation and the operation type (currently
1055 only "create").
1056
1057 This function should return only when the upcall is complete. Upon return
1058 the authorisation key will be revoked, and the target key will be
1059 negatively instantiated if it is still uninstantiated. The error will be
1060 returned to the caller of request_key*().
1061
1062
982============================ 1063============================
983REQUEST-KEY CALLBACK SERVICE 1064REQUEST-KEY CALLBACK SERVICE
984============================ 1065============================
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt
new file mode 100644
index 000000000000..00d93605bfd3
--- /dev/null
+++ b/Documentation/lockdep-design.txt
@@ -0,0 +1,197 @@
1Runtime locking correctness validator
2=====================================
3
4started by Ingo Molnar <mingo@redhat.com>
5additions by Arjan van de Ven <arjan@linux.intel.com>
6
7Lock-class
8----------
9
10The basic object the validator operates upon is a 'class' of locks.
11
12A class of locks is a group of locks that are logically the same with
13respect to locking rules, even if the locks may have multiple (possibly
14tens of thousands of) instantiations. For example a lock in the inode
15struct is one class, while each inode has its own instantiation of that
16lock class.
17
18The validator tracks the 'state' of lock-classes, and it tracks
19dependencies between different lock-classes. The validator maintains a
20rolling proof that the state and the dependencies are correct.
21
22Unlike an lock instantiation, the lock-class itself never goes away: when
23a lock-class is used for the first time after bootup it gets registered,
24and all subsequent uses of that lock-class will be attached to this
25lock-class.
26
27State
28-----
29
30The validator tracks lock-class usage history into 5 separate state bits:
31
32- 'ever held in hardirq context' [ == hardirq-safe ]
33- 'ever held in softirq context' [ == softirq-safe ]
34- 'ever held with hardirqs enabled' [ == hardirq-unsafe ]
35- 'ever held with softirqs and hardirqs enabled' [ == softirq-unsafe ]
36
37- 'ever used' [ == !unused ]
38
39Single-lock state rules:
40------------------------
41
42A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
43following states are exclusive, and only one of them is allowed to be
44set for any lock-class:
45
46 <hardirq-safe> and <hardirq-unsafe>
47 <softirq-safe> and <softirq-unsafe>
48
49The validator detects and reports lock usage that violate these
50single-lock state rules.
51
52Multi-lock dependency rules:
53----------------------------
54
55The same lock-class must not be acquired twice, because this could lead
56to lock recursion deadlocks.
57
58Furthermore, two locks may not be taken in different order:
59
60 <L1> -> <L2>
61 <L2> -> <L1>
62
63because this could lead to lock inversion deadlocks. (The validator
64finds such dependencies in arbitrary complexity, i.e. there can be any
65other locking sequence between the acquire-lock operations, the
66validator will still track all dependencies between locks.)
67
68Furthermore, the following usage based lock dependencies are not allowed
69between any two lock-classes:
70
71 <hardirq-safe> -> <hardirq-unsafe>
72 <softirq-safe> -> <softirq-unsafe>
73
74The first rule comes from the fact the a hardirq-safe lock could be
75taken by a hardirq context, interrupting a hardirq-unsafe lock - and
76thus could result in a lock inversion deadlock. Likewise, a softirq-safe
77lock could be taken by an softirq context, interrupting a softirq-unsafe
78lock.
79
80The above rules are enforced for any locking sequence that occurs in the
81kernel: when acquiring a new lock, the validator checks whether there is
82any rule violation between the new lock and any of the held locks.
83
84When a lock-class changes its state, the following aspects of the above
85dependency rules are enforced:
86
87- if a new hardirq-safe lock is discovered, we check whether it
88 took any hardirq-unsafe lock in the past.
89
90- if a new softirq-safe lock is discovered, we check whether it took
91 any softirq-unsafe lock in the past.
92
93- if a new hardirq-unsafe lock is discovered, we check whether any
94 hardirq-safe lock took it in the past.
95
96- if a new softirq-unsafe lock is discovered, we check whether any
97 softirq-safe lock took it in the past.
98
99(Again, we do these checks too on the basis that an interrupt context
100could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which
101could lead to a lock inversion deadlock - even if that lock scenario did
102not trigger in practice yet.)
103
104Exception: Nested data dependencies leading to nested locking
105-------------------------------------------------------------
106
107There are a few cases where the Linux kernel acquires more than one
108instance of the same lock-class. Such cases typically happen when there
109is some sort of hierarchy within objects of the same type. In these
110cases there is an inherent "natural" ordering between the two objects
111(defined by the properties of the hierarchy), and the kernel grabs the
112locks in this fixed order on each of the objects.
113
114An example of such an object hieararchy that results in "nested locking"
115is that of a "whole disk" block-dev object and a "partition" block-dev
116object; the partition is "part of" the whole device and as long as one
117always takes the whole disk lock as a higher lock than the partition
118lock, the lock ordering is fully correct. The validator does not
119automatically detect this natural ordering, as the locking rule behind
120the ordering is not static.
121
122In order to teach the validator about this correct usage model, new
123versions of the various locking primitives were added that allow you to
124specify a "nesting level". An example call, for the block device mutex,
125looks like this:
126
127enum bdev_bd_mutex_lock_class
128{
129 BD_MUTEX_NORMAL,
130 BD_MUTEX_WHOLE,
131 BD_MUTEX_PARTITION
132};
133
134 mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION);
135
136In this case the locking is done on a bdev object that is known to be a
137partition.
138
139The validator treats a lock that is taken in such a nested fasion as a
140separate (sub)class for the purposes of validation.
141
142Note: When changing code to use the _nested() primitives, be careful and
143check really thoroughly that the hiearchy is correctly mapped; otherwise
144you can get false positives or false negatives.
145
146Proof of 100% correctness:
147--------------------------
148
149The validator achieves perfect, mathematical 'closure' (proof of locking
150correctness) in the sense that for every simple, standalone single-task
151locking sequence that occured at least once during the lifetime of the
152kernel, the validator proves it with a 100% certainty that no
153combination and timing of these locking sequences can cause any class of
154lock related deadlock. [*]
155
156I.e. complex multi-CPU and multi-task locking scenarios do not have to
157occur in practice to prove a deadlock: only the simple 'component'
158locking chains have to occur at least once (anytime, in any
159task/context) for the validator to be able to prove correctness. (For
160example, complex deadlocks that would normally need more than 3 CPUs and
161a very unlikely constellation of tasks, irq-contexts and timings to
162occur, can be detected on a plain, lightly loaded single-CPU system as
163well!)
164
165This radically decreases the complexity of locking related QA of the
166kernel: what has to be done during QA is to trigger as many "simple"
167single-task locking dependencies in the kernel as possible, at least
168once, to prove locking correctness - instead of having to trigger every
169possible combination of locking interaction between CPUs, combined with
170every possible hardirq and softirq nesting scenario (which is impossible
171to do in practice).
172
173[*] assuming that the validator itself is 100% correct, and no other
174 part of the system corrupts the state of the validator in any way.
175 We also assume that all NMI/SMM paths [which could interrupt
176 even hardirq-disabled codepaths] are correct and do not interfere
177 with the validator. We also assume that the 64-bit 'chain hash'
178 value is unique for every lock-chain in the system. Also, lock
179 recursion must not be higher than 20.
180
181Performance:
182------------
183
184The above rules require _massive_ amounts of runtime checking. If we did
185that for every lock taken and for every irqs-enable event, it would
186render the system practically unusably slow. The complexity of checking
187is O(N^2), so even with just a few hundred lock-classes we'd have to do
188tens of thousands of checks for every event.
189
190This problem is solved by checking any given 'locking scenario' (unique
191sequence of locks taken after each other) only once. A simple stack of
192held locks is maintained, and a lightweight 64-bit hash value is
193calculated, which hash is unique for every lock chain. The hash value,
194when the chain is validated for the first time, is then put into a hash
195table, which hash-table can be checked in a lockfree manner. If the
196locking chain occurs again later on, the hash table tells us that we
197dont have to validate the chain again.
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 03a13c462cf2..0668f9dc9d29 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -200,6 +200,17 @@ All md devices contain:
200 This can be written only while the array is being assembled, not 200 This can be written only while the array is being assembled, not
201 after it is started. 201 after it is started.
202 202
203 layout
204 The "layout" for the array for the particular level. This is
205 simply a number that is interpretted differently by different
206 levels. It can be written while assembling an array.
207
208 resync_start
209 The point at which resync should start. If no resync is needed,
210 this will be a very large number. At array creation it will
211 default to 0, though starting the array as 'clean' will
212 set it much larger.
213
203 new_dev 214 new_dev
204 This file can be written but not read. The value written should 215 This file can be written but not read. The value written should
205 be a block device number as major:minor. e.g. 8:0 216 be a block device number as major:minor. e.g. 8:0
@@ -207,6 +218,54 @@ All md devices contain:
207 available. It will then appear at md/dev-XXX (depending on the 218 available. It will then appear at md/dev-XXX (depending on the
208 name of the device) and further configuration is then possible. 219 name of the device) and further configuration is then possible.
209 220
221 safe_mode_delay
222 When an md array has seen no write requests for a certain period
223 of time, it will be marked as 'clean'. When another write
224 request arrive, the array is marked as 'dirty' before the write
225 commenses. This is known as 'safe_mode'.
226 The 'certain period' is controlled by this file which stores the
227 period as a number of seconds. The default is 200msec (0.200).
228 Writing a value of 0 disables safemode.
229
230 array_state
231 This file contains a single word which describes the current
232 state of the array. In many cases, the state can be set by
233 writing the word for the desired state, however some states
234 cannot be explicitly set, and some transitions are not allowed.
235
236 clear
237 No devices, no size, no level
238 Writing is equivalent to STOP_ARRAY ioctl
239 inactive
240 May have some settings, but array is not active
241 all IO results in error
242 When written, doesn't tear down array, but just stops it
243 suspended (not supported yet)
244 All IO requests will block. The array can be reconfigured.
245 Writing this, if accepted, will block until array is quiessent
246 readonly
247 no resync can happen. no superblocks get written.
248 write requests fail
249 read-auto
250 like readonly, but behaves like 'clean' on a write request.
251
252 clean - no pending writes, but otherwise active.
253 When written to inactive array, starts without resync
254 If a write request arrives then
255 if metadata is known, mark 'dirty' and switch to 'active'.
256 if not known, block and switch to write-pending
257 If written to an active array that has pending writes, then fails.
258 active
259 fully active: IO and resync can be happening.
260 When written to inactive array, starts with resync
261
262 write-pending
263 clean, but writes are blocked waiting for 'active' to be written.
264
265 active-idle
266 like active, but no writes have been seen for a while (safe_mode_delay).
267
268
210 sync_speed_min 269 sync_speed_min
211 sync_speed_max 270 sync_speed_max
212 This are similar to /proc/sys/dev/raid/speed_limit_{min,max} 271 This are similar to /proc/sys/dev/raid/speed_limit_{min,max}
@@ -250,10 +309,18 @@ Each directory contains:
250 faulty - device has been kicked from active use due to 309 faulty - device has been kicked from active use due to
251 a detected fault 310 a detected fault
252 in_sync - device is a fully in-sync member of the array 311 in_sync - device is a fully in-sync member of the array
312 writemostly - device will only be subject to read
313 requests if there are no other options.
314 This applies only to raid1 arrays.
253 spare - device is working, but not a full member. 315 spare - device is working, but not a full member.
254 This includes spares that are in the process 316 This includes spares that are in the process
255 of being recoverred to 317 of being recoverred to
256 This list make grow in future. 318 This list make grow in future.
319 This can be written to.
320 Writing "faulty" simulates a failure on the device.
321 Writing "remove" removes the device from the array.
322 Writing "writemostly" sets the writemostly flag.
323 Writing "-writemostly" clears the writemostly flag.
257 324
258 errors 325 errors
259 An approximate count of read errors that have been detected on 326 An approximate count of read errors that have been detected on
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index c61d8b876fdb..28d1bc3edb1c 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -19,6 +19,7 @@ Contents:
19 - Control dependencies. 19 - Control dependencies.
20 - SMP barrier pairing. 20 - SMP barrier pairing.
21 - Examples of memory barrier sequences. 21 - Examples of memory barrier sequences.
22 - Read memory barriers vs load speculation.
22 23
23 (*) Explicit kernel barriers. 24 (*) Explicit kernel barriers.
24 25
@@ -248,7 +249,7 @@ And there are a number of things that _must_ or _must_not_ be assumed:
248 we may get either of: 249 we may get either of:
249 250
250 STORE *A = X; Y = LOAD *A; 251 STORE *A = X; Y = LOAD *A;
251 STORE *A = Y; 252 STORE *A = Y = X;
252 253
253 254
254========================= 255=========================
@@ -261,9 +262,14 @@ What is required is some way of intervening to instruct the compiler and the
261CPU to restrict the order. 262CPU to restrict the order.
262 263
263Memory barriers are such interventions. They impose a perceived partial 264Memory barriers are such interventions. They impose a perceived partial
264ordering between the memory operations specified on either side of the barrier. 265ordering over the memory operations on either side of the barrier.
265They request that the sequence of memory events generated appears to other 266
266parts of the system as if the barrier is effective on that CPU. 267Such enforcement is important because the CPUs and other devices in a system
268can use a variety of tricks to improve performance - including reordering,
269deferral and combination of memory operations; speculative loads; speculative
270branch prediction and various types of caching. Memory barriers are used to
271override or suppress these tricks, allowing the code to sanely control the
272interaction of multiple CPUs and/or devices.
267 273
268 274
269VARIETIES OF MEMORY BARRIER 275VARIETIES OF MEMORY BARRIER
@@ -281,7 +287,7 @@ Memory barriers come in four basic varieties:
281 A write barrier is a partial ordering on stores only; it is not required 287 A write barrier is a partial ordering on stores only; it is not required
282 to have any effect on loads. 288 to have any effect on loads.
283 289
284 A CPU can be viewed as as commiting a sequence of store operations to the 290 A CPU can be viewed as committing a sequence of store operations to the
285 memory system as time progresses. All stores before a write barrier will 291 memory system as time progresses. All stores before a write barrier will
286 occur in the sequence _before_ all the stores after the write barrier. 292 occur in the sequence _before_ all the stores after the write barrier.
287 293
@@ -344,9 +350,12 @@ Memory barriers come in four basic varieties:
344 350
345 (4) General memory barriers. 351 (4) General memory barriers.
346 352
347 A general memory barrier is a combination of both a read memory barrier 353 A general memory barrier gives a guarantee that all the LOAD and STORE
348 and a write memory barrier. It is a partial ordering over both loads and 354 operations specified before the barrier will appear to happen before all
349 stores. 355 the LOAD and STORE operations specified after the barrier with respect to
356 the other components of the system.
357
358 A general memory barrier is a partial ordering over both loads and stores.
350 359
351 General memory barriers imply both read and write memory barriers, and so 360 General memory barriers imply both read and write memory barriers, and so
352 can substitute for either. 361 can substitute for either.
@@ -409,7 +418,7 @@ There are certain things that the Linux kernel memory barriers do not guarantee:
409 indirect effect will be the order in which the second CPU sees the effects 418 indirect effect will be the order in which the second CPU sees the effects
410 of the first CPU's accesses occur, but see the next point: 419 of the first CPU's accesses occur, but see the next point:
411 420
412 (*) There is no guarantee that the a CPU will see the correct order of effects 421 (*) There is no guarantee that a CPU will see the correct order of effects
413 from a second CPU's accesses, even _if_ the second CPU uses a memory 422 from a second CPU's accesses, even _if_ the second CPU uses a memory
414 barrier, unless the first CPU _also_ uses a matching memory barrier (see 423 barrier, unless the first CPU _also_ uses a matching memory barrier (see
415 the subsection on "SMP Barrier Pairing"). 424 the subsection on "SMP Barrier Pairing").
@@ -457,8 +466,8 @@ Whilst this may seem like a failure of coherency or causality maintenance, it
457isn't, and this behaviour can be observed on certain real CPUs (such as the DEC 466isn't, and this behaviour can be observed on certain real CPUs (such as the DEC
458Alpha). 467Alpha).
459 468
460To deal with this, a data dependency barrier must be inserted between the 469To deal with this, a data dependency barrier or better must be inserted
461address load and the data load: 470between the address load and the data load:
462 471
463 CPU 1 CPU 2 472 CPU 1 CPU 2
464 =============== =============== 473 =============== ===============
@@ -480,7 +489,7 @@ lines. The pointer P might be stored in an odd-numbered cache line, and the
480variable B might be stored in an even-numbered cache line. Then, if the 489variable B might be stored in an even-numbered cache line. Then, if the
481even-numbered bank of the reading CPU's cache is extremely busy while the 490even-numbered bank of the reading CPU's cache is extremely busy while the
482odd-numbered bank is idle, one can see the new value of the pointer P (&B), 491odd-numbered bank is idle, one can see the new value of the pointer P (&B),
483but the old value of the variable B (1). 492but the old value of the variable B (2).
484 493
485 494
486Another example of where data dependency barriers might by required is where a 495Another example of where data dependency barriers might by required is where a
@@ -546,9 +555,9 @@ write barrier, though, again, a general barrier is viable:
546 =============== =============== 555 =============== ===============
547 a = 1; 556 a = 1;
548 <write barrier> 557 <write barrier>
549 b = 2; x = a; 558 b = 2; x = b;
550 <read barrier> 559 <read barrier>
551 y = b; 560 y = a;
552 561
553Or: 562Or:
554 563
@@ -563,6 +572,18 @@ Or:
563Basically, the read barrier always has to be there, even though it can be of 572Basically, the read barrier always has to be there, even though it can be of
564the "weaker" type. 573the "weaker" type.
565 574
575[!] Note that the stores before the write barrier would normally be expected to
576match the loads after the read barrier or data dependency barrier, and vice
577versa:
578
579 CPU 1 CPU 2
580 =============== ===============
581 a = 1; }---- --->{ v = c
582 b = 2; } \ / { w = d
583 <write barrier> \ <read barrier>
584 c = 3; } / \ { x = a;
585 d = 4; }---- --->{ y = b;
586
566 587
567EXAMPLES OF MEMORY BARRIER SEQUENCES 588EXAMPLES OF MEMORY BARRIER SEQUENCES
568------------------------------------ 589------------------------------------
@@ -581,7 +602,7 @@ Consider the following sequence of events:
581 602
582This sequence of events is committed to the memory coherence system in an order 603This sequence of events is committed to the memory coherence system in an order
583that the rest of the system might perceive as the unordered set of { STORE A, 604that the rest of the system might perceive as the unordered set of { STORE A,
584STORE B, STORE C } all occuring before the unordered set of { STORE D, STORE E 605STORE B, STORE C } all occurring before the unordered set of { STORE D, STORE E
585}: 606}:
586 607
587 +-------+ : : 608 +-------+ : :
@@ -600,8 +621,8 @@ STORE B, STORE C } all occuring before the unordered set of { STORE D, STORE E
600 | | +------+ 621 | | +------+
601 +-------+ : : 622 +-------+ : :
602 | 623 |
603 | Sequence in which stores committed to memory system 624 | Sequence in which stores are committed to the
604 | by CPU 1 625 | memory system by CPU 1
605 V 626 V
606 627
607 628
@@ -683,14 +704,12 @@ then the following will occur:
683 | : : | | 704 | : : | |
684 | : : | CPU 2 | 705 | : : | CPU 2 |
685 | +-------+ | | 706 | +-------+ | |
686 \ | X->9 |------>| | 707 | | X->9 |------>| |
687 \ +-------+ | | 708 | +-------+ | |
688 ----->| B->2 | | | 709 Makes sure all effects ---> \ ddddddddddddddddd | |
689 +-------+ | | 710 prior to the store of C \ +-------+ | |
690 Makes sure all effects ---> ddddddddddddddddd | | 711 are perceptible to ----->| B->2 |------>| |
691 prior to the store of C +-------+ | | 712 subsequent loads +-------+ | |
692 are perceptible to | B->2 |------>| |
693 successive loads +-------+ | |
694 : : +-------+ 713 : : +-------+
695 714
696 715
@@ -699,73 +718,239 @@ following sequence of events:
699 718
700 CPU 1 CPU 2 719 CPU 1 CPU 2
701 ======================= ======================= 720 ======================= =======================
721 { A = 0, B = 9 }
702 STORE A=1 722 STORE A=1
703 STORE B=2
704 STORE C=3
705 <write barrier> 723 <write barrier>
706 STORE D=4 724 STORE B=2
707 STORE E=5
708 LOAD A
709 LOAD B 725 LOAD B
710 LOAD C 726 LOAD A
711 LOAD D
712 LOAD E
713 727
714Without intervention, CPU 2 may then choose to perceive the events on CPU 1 in 728Without intervention, CPU 2 may then choose to perceive the events on CPU 1 in
715some effectively random order, despite the write barrier issued by CPU 1: 729some effectively random order, despite the write barrier issued by CPU 1:
716 730
717 +-------+ : : 731 +-------+ : : : :
718 | | +------+ 732 | | +------+ +-------+
719 | |------>| C=3 | } 733 | |------>| A=1 |------ --->| A->0 |
720 | | : +------+ } 734 | | +------+ \ +-------+
721 | | : | A=1 | } 735 | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
722 | | : +------+ } 736 | | +------+ | +-------+
723 | CPU 1 | : | B=2 | }--- 737 | |------>| B=2 |--- | : :
724 | | +------+ } \ 738 | | +------+ \ | : : +-------+
725 | | wwwwwwwwwwwww} \ 739 +-------+ : : \ | +-------+ | |
726 | | +------+ } \ : : +-------+ 740 ---------->| B->2 |------>| |
727 | | : | E=5 | } \ +-------+ | | 741 | +-------+ | CPU 2 |
728 | | : +------+ } \ { | C->3 |------>| | 742 | | A->0 |------>| |
729 | |------>| D=4 | } \ { +-------+ : | | 743 | +-------+ | |
730 | | +------+ \ { | E->5 | : | | 744 | : : +-------+
731 +-------+ : : \ { +-------+ : | | 745 \ : :
732 Transfer -->{ | A->1 | : | CPU 2 | 746 \ +-------+
733 from CPU 1 { +-------+ : | | 747 ---->| A->1 |
734 to CPU 2 { | D->4 | : | | 748 +-------+
735 { +-------+ : | | 749 : :
736 { | B->2 |------>| |
737 +-------+ | |
738 : : +-------+
739
740
741If, however, a read barrier were to be placed between the load of C and the
742load of D on CPU 2, then the partial ordering imposed by CPU 1 will be
743perceived correctly by CPU 2.
744 750
745 +-------+ : : 751
746 | | +------+ 752If, however, a read barrier were to be placed between the load of B and the
747 | |------>| C=3 | } 753load of A on CPU 2:
748 | | : +------+ } 754
749 | | : | A=1 | }--- 755 CPU 1 CPU 2
750 | | : +------+ } \ 756 ======================= =======================
751 | CPU 1 | : | B=2 | } \ 757 { A = 0, B = 9 }
752 | | +------+ \ 758 STORE A=1
753 | | wwwwwwwwwwwwwwww \ 759 <write barrier>
754 | | +------+ \ : : +-------+ 760 STORE B=2
755 | | : | E=5 | } \ +-------+ | | 761 LOAD B
756 | | : +------+ }--- \ { | C->3 |------>| | 762 <read barrier>
757 | |------>| D=4 | } \ \ { +-------+ : | | 763 LOAD A
758 | | +------+ \ -->{ | B->2 | : | | 764
759 +-------+ : : \ { +-------+ : | | 765then the partial ordering imposed by CPU 1 will be perceived correctly by CPU
760 \ { | A->1 | : | CPU 2 | 7662:
761 \ +-------+ | | 767
762 At this point the read ----> \ rrrrrrrrrrrrrrrrr | | 768 +-------+ : : : :
763 barrier causes all effects \ +-------+ | | 769 | | +------+ +-------+
764 prior to the storage of C \ { | E->5 | : | | 770 | |------>| A=1 |------ --->| A->0 |
765 to be perceptible to CPU 2 -->{ +-------+ : | | 771 | | +------+ \ +-------+
766 { | D->4 |------>| | 772 | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
767 +-------+ | | 773 | | +------+ | +-------+
768 : : +-------+ 774 | |------>| B=2 |--- | : :
775 | | +------+ \ | : : +-------+
776 +-------+ : : \ | +-------+ | |
777 ---------->| B->2 |------>| |
778 | +-------+ | CPU 2 |
779 | : : | |
780 | : : | |
781 At this point the read ----> \ rrrrrrrrrrrrrrrrr | |
782 barrier causes all effects \ +-------+ | |
783 prior to the storage of B ---->| A->1 |------>| |
784 to be perceptible to CPU 2 +-------+ | |
785 : : +-------+
786
787
788To illustrate this more completely, consider what could happen if the code
789contained a load of A either side of the read barrier:
790
791 CPU 1 CPU 2
792 ======================= =======================
793 { A = 0, B = 9 }
794 STORE A=1
795 <write barrier>
796 STORE B=2
797 LOAD B
798 LOAD A [first load of A]
799 <read barrier>
800 LOAD A [second load of A]
801
802Even though the two loads of A both occur after the load of B, they may both
803come up with different values:
804
805 +-------+ : : : :
806 | | +------+ +-------+
807 | |------>| A=1 |------ --->| A->0 |
808 | | +------+ \ +-------+
809 | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
810 | | +------+ | +-------+
811 | |------>| B=2 |--- | : :
812 | | +------+ \ | : : +-------+
813 +-------+ : : \ | +-------+ | |
814 ---------->| B->2 |------>| |
815 | +-------+ | CPU 2 |
816 | : : | |
817 | : : | |
818 | +-------+ | |
819 | | A->0 |------>| 1st |
820 | +-------+ | |
821 At this point the read ----> \ rrrrrrrrrrrrrrrrr | |
822 barrier causes all effects \ +-------+ | |
823 prior to the storage of B ---->| A->1 |------>| 2nd |
824 to be perceptible to CPU 2 +-------+ | |
825 : : +-------+
826
827
828But it may be that the update to A from CPU 1 becomes perceptible to CPU 2
829before the read barrier completes anyway:
830
831 +-------+ : : : :
832 | | +------+ +-------+
833 | |------>| A=1 |------ --->| A->0 |
834 | | +------+ \ +-------+
835 | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
836 | | +------+ | +-------+
837 | |------>| B=2 |--- | : :
838 | | +------+ \ | : : +-------+
839 +-------+ : : \ | +-------+ | |
840 ---------->| B->2 |------>| |
841 | +-------+ | CPU 2 |
842 | : : | |
843 \ : : | |
844 \ +-------+ | |
845 ---->| A->1 |------>| 1st |
846 +-------+ | |
847 rrrrrrrrrrrrrrrrr | |
848 +-------+ | |
849 | A->1 |------>| 2nd |
850 +-------+ | |
851 : : +-------+
852
853
854The guarantee is that the second load will always come up with A == 1 if the
855load of B came up with B == 2. No such guarantee exists for the first load of
856A; that may come up with either A == 0 or A == 1.
857
858
859READ MEMORY BARRIERS VS LOAD SPECULATION
860----------------------------------------
861
862Many CPUs speculate with loads: that is they see that they will need to load an
863item from memory, and they find a time where they're not using the bus for any
864other loads, and so do the load in advance - even though they haven't actually
865got to that point in the instruction execution flow yet. This permits the
866actual load instruction to potentially complete immediately because the CPU
867already has the value to hand.
868
869It may turn out that the CPU didn't actually need the value - perhaps because a
870branch circumvented the load - in which case it can discard the value or just
871cache it for later use.
872
873Consider:
874
875 CPU 1 CPU 2
876 ======================= =======================
877 LOAD B
878 DIVIDE } Divide instructions generally
879 DIVIDE } take a long time to perform
880 LOAD A
881
882Which might appear as this:
883
884 : : +-------+
885 +-------+ | |
886 --->| B->2 |------>| |
887 +-------+ | CPU 2 |
888 : :DIVIDE | |
889 +-------+ | |
890 The CPU being busy doing a ---> --->| A->0 |~~~~ | |
891 division speculates on the +-------+ ~ | |
892 LOAD of A : : ~ | |
893 : :DIVIDE | |
894 : : ~ | |
895 Once the divisions are complete --> : : ~-->| |
896 the CPU can then perform the : : | |
897 LOAD with immediate effect : : +-------+
898
899
900Placing a read barrier or a data dependency barrier just before the second
901load:
902
903 CPU 1 CPU 2
904 ======================= =======================
905 LOAD B
906 DIVIDE
907 DIVIDE
908 <read barrier>
909 LOAD A
910
911will force any value speculatively obtained to be reconsidered to an extent
912dependent on the type of barrier used. If there was no change made to the
913speculated memory location, then the speculated value will just be used:
914
915 : : +-------+
916 +-------+ | |
917 --->| B->2 |------>| |
918 +-------+ | CPU 2 |
919 : :DIVIDE | |
920 +-------+ | |
921 The CPU being busy doing a ---> --->| A->0 |~~~~ | |
922 division speculates on the +-------+ ~ | |
923 LOAD of A : : ~ | |
924 : :DIVIDE | |
925 : : ~ | |
926 : : ~ | |
927 rrrrrrrrrrrrrrrr~ | |
928 : : ~ | |
929 : : ~-->| |
930 : : | |
931 : : +-------+
932
933
934but if there was an update or an invalidation from another CPU pending, then
935the speculation will be cancelled and the value reloaded:
936
937 : : +-------+
938 +-------+ | |
939 --->| B->2 |------>| |
940 +-------+ | CPU 2 |
941 : :DIVIDE | |
942 +-------+ | |
943 The CPU being busy doing a ---> --->| A->0 |~~~~ | |
944 division speculates on the +-------+ ~ | |
945 LOAD of A : : ~ | |
946 : :DIVIDE | |
947 : : ~ | |
948 : : ~ | |
949 rrrrrrrrrrrrrrrrr | |
950 +-------+ | |
951 The speculation is discarded ---> --->| A->1 |------>| |
952 and an updated value is +-------+ | |
953 retrieved : : +-------+
769 954
770 955
771======================== 956========================
@@ -901,7 +1086,7 @@ IMPLICIT KERNEL MEMORY BARRIERS
901=============================== 1086===============================
902 1087
903Some of the other functions in the linux kernel imply memory barriers, amongst 1088Some of the other functions in the linux kernel imply memory barriers, amongst
904which are locking, scheduling and memory allocation functions. 1089which are locking and scheduling functions.
905 1090
906This specification is a _minimum_ guarantee; any particular architecture may 1091This specification is a _minimum_ guarantee; any particular architecture may
907provide more substantial guarantees, but these may not be relied upon outside 1092provide more substantial guarantees, but these may not be relied upon outside
@@ -966,6 +1151,20 @@ equivalent to a full barrier, but a LOCK followed by an UNLOCK is not.
966 barriers is that the effects instructions outside of a critical section may 1151 barriers is that the effects instructions outside of a critical section may
967 seep into the inside of the critical section. 1152 seep into the inside of the critical section.
968 1153
1154A LOCK followed by an UNLOCK may not be assumed to be full memory barrier
1155because it is possible for an access preceding the LOCK to happen after the
1156LOCK, and an access following the UNLOCK to happen before the UNLOCK, and the
1157two accesses can themselves then cross:
1158
1159 *A = a;
1160 LOCK
1161 UNLOCK
1162 *B = b;
1163
1164may occur as:
1165
1166 LOCK, STORE *B, STORE *A, UNLOCK
1167
969Locks and semaphores may not provide any guarantee of ordering on UP compiled 1168Locks and semaphores may not provide any guarantee of ordering on UP compiled
970systems, and so cannot be counted on in such a situation to actually achieve 1169systems, and so cannot be counted on in such a situation to actually achieve
971anything at all - especially with respect to I/O accesses - unless combined 1170anything at all - especially with respect to I/O accesses - unless combined
@@ -1016,8 +1215,6 @@ Other functions that imply barriers:
1016 1215
1017 (*) schedule() and similar imply full memory barriers. 1216 (*) schedule() and similar imply full memory barriers.
1018 1217
1019 (*) Memory allocation and release functions imply full memory barriers.
1020
1021 1218
1022================================= 1219=================================
1023INTER-CPU LOCKING BARRIER EFFECTS 1220INTER-CPU LOCKING BARRIER EFFECTS
@@ -1269,9 +1466,8 @@ instruction itself is complete.
1269 1466
1270On a UP system - where this wouldn't be a problem - the smp_mb() is just a 1467On a UP system - where this wouldn't be a problem - the smp_mb() is just a
1271compiler barrier, thus making sure the compiler emits the instructions in the 1468compiler barrier, thus making sure the compiler emits the instructions in the
1272right order without actually intervening in the CPU. Since there there's only 1469right order without actually intervening in the CPU. Since there's only one
1273one CPU, that CPU's dependency ordering logic will take care of everything 1470CPU, that CPU's dependency ordering logic will take care of everything else.
1274else.
1275 1471
1276 1472
1277ATOMIC OPERATIONS 1473ATOMIC OPERATIONS
@@ -1448,9 +1644,9 @@ functions:
1448 1644
1449 The PCI bus, amongst others, defines an I/O space concept - which on such 1645 The PCI bus, amongst others, defines an I/O space concept - which on such
1450 CPUs as i386 and x86_64 cpus readily maps to the CPU's concept of I/O 1646 CPUs as i386 and x86_64 cpus readily maps to the CPU's concept of I/O
1451 space. However, it may also mapped as a virtual I/O space in the CPU's 1647 space. However, it may also be mapped as a virtual I/O space in the CPU's
1452 memory map, particularly on those CPUs that don't support alternate 1648 memory map, particularly on those CPUs that don't support alternate I/O
1453 I/O spaces. 1649 spaces.
1454 1650
1455 Accesses to this space may be fully synchronous (as on i386), but 1651 Accesses to this space may be fully synchronous (as on i386), but
1456 intermediary bridges (such as the PCI host bridge) may not fully honour 1652 intermediary bridges (such as the PCI host bridge) may not fully honour
diff --git a/Documentation/networking/README.ipw2200 b/Documentation/networking/README.ipw2200
index acb30c5dcff3..4f2a40f1dbc6 100644
--- a/Documentation/networking/README.ipw2200
+++ b/Documentation/networking/README.ipw2200
@@ -14,8 +14,8 @@ Copyright (C) 2004-2006, Intel Corporation
14 14
15README.ipw2200 15README.ipw2200
16 16
17Version: 1.0.8 17Version: 1.1.2
18Date : October 20, 2005 18Date : March 30, 2006
19 19
20 20
21Index 21Index
@@ -103,7 +103,7 @@ file.
103 103
1041.1. Overview of Features 1041.1. Overview of Features
105----------------------------------------------- 105-----------------------------------------------
106The current release (1.0.8) supports the following features: 106The current release (1.1.2) supports the following features:
107 107
108+ BSS mode (Infrastructure, Managed) 108+ BSS mode (Infrastructure, Managed)
109+ IBSS mode (Ad-Hoc) 109+ IBSS mode (Ad-Hoc)
@@ -247,8 +247,8 @@ and can set the contents via echo. For example:
247% cat /sys/bus/pci/drivers/ipw2200/debug_level 247% cat /sys/bus/pci/drivers/ipw2200/debug_level
248 248
249Will report the current debug level of the driver's logging subsystem 249Will report the current debug level of the driver's logging subsystem
250(only available if CONFIG_IPW_DEBUG was configured when the driver was 250(only available if CONFIG_IPW2200_DEBUG was configured when the driver
251built). 251was built).
252 252
253You can set the debug level via: 253You can set the debug level via:
254 254
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index 8d8b4e5ea184..afac780445cd 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -1,7 +1,7 @@
1 1
2 Linux Ethernet Bonding Driver HOWTO 2 Linux Ethernet Bonding Driver HOWTO
3 3
4 Latest update: 21 June 2005 4 Latest update: 24 April 2006
5 5
6Initial release : Thomas Davis <tadavis at lbl.gov> 6Initial release : Thomas Davis <tadavis at lbl.gov>
7Corrections, HA extensions : 2000/10/03-15 : 7Corrections, HA extensions : 2000/10/03-15 :
@@ -12,6 +12,8 @@ Corrections, HA extensions : 2000/10/03-15 :
12 - Jay Vosburgh <fubar at us dot ibm dot com> 12 - Jay Vosburgh <fubar at us dot ibm dot com>
13 13
14Reorganized and updated Feb 2005 by Jay Vosburgh 14Reorganized and updated Feb 2005 by Jay Vosburgh
15Added Sysfs information: 2006/04/24
16 - Mitch Williams <mitch.a.williams at intel.com>
15 17
16Introduction 18Introduction
17============ 19============
@@ -38,61 +40,62 @@ Table of Contents
382. Bonding Driver Options 402. Bonding Driver Options
39 41
403. Configuring Bonding Devices 423. Configuring Bonding Devices
413.1 Configuration with sysconfig support 433.1 Configuration with Sysconfig Support
423.1.1 Using DHCP with sysconfig 443.1.1 Using DHCP with Sysconfig
433.1.2 Configuring Multiple Bonds with sysconfig 453.1.2 Configuring Multiple Bonds with Sysconfig
443.2 Configuration with initscripts support 463.2 Configuration with Initscripts Support
453.2.1 Using DHCP with initscripts 473.2.1 Using DHCP with Initscripts
463.2.2 Configuring Multiple Bonds with initscripts 483.2.2 Configuring Multiple Bonds with Initscripts
473.3 Configuring Bonding Manually 493.3 Configuring Bonding Manually with Ifenslave
483.3.1 Configuring Multiple Bonds Manually 503.3.1 Configuring Multiple Bonds Manually
513.4 Configuring Bonding Manually via Sysfs
49 52
505. Querying Bonding Configuration 534. Querying Bonding Configuration
515.1 Bonding Configuration 544.1 Bonding Configuration
525.2 Network Configuration 554.2 Network Configuration
53 56
546. Switch Configuration 575. Switch Configuration
55 58
567. 802.1q VLAN Support 596. 802.1q VLAN Support
57 60
588. Link Monitoring 617. Link Monitoring
598.1 ARP Monitor Operation 627.1 ARP Monitor Operation
608.2 Configuring Multiple ARP Targets 637.2 Configuring Multiple ARP Targets
618.3 MII Monitor Operation 647.3 MII Monitor Operation
62 65
639. Potential Trouble Sources 668. Potential Trouble Sources
649.1 Adventures in Routing 678.1 Adventures in Routing
659.2 Ethernet Device Renaming 688.2 Ethernet Device Renaming
669.3 Painfully Slow Or No Failed Link Detection By Miimon 698.3 Painfully Slow Or No Failed Link Detection By Miimon
67 70
6810. SNMP agents 719. SNMP agents
69 72
7011. Promiscuous mode 7310. Promiscuous mode
71 74
7212. Configuring Bonding for High Availability 7511. Configuring Bonding for High Availability
7312.1 High Availability in a Single Switch Topology 7611.1 High Availability in a Single Switch Topology
7412.2 High Availability in a Multiple Switch Topology 7711.2 High Availability in a Multiple Switch Topology
7512.2.1 HA Bonding Mode Selection for Multiple Switch Topology 7811.2.1 HA Bonding Mode Selection for Multiple Switch Topology
7612.2.2 HA Link Monitoring for Multiple Switch Topology 7911.2.2 HA Link Monitoring for Multiple Switch Topology
77 80
7813. Configuring Bonding for Maximum Throughput 8112. Configuring Bonding for Maximum Throughput
7913.1 Maximum Throughput in a Single Switch Topology 8212.1 Maximum Throughput in a Single Switch Topology
8013.1.1 MT Bonding Mode Selection for Single Switch Topology 8312.1.1 MT Bonding Mode Selection for Single Switch Topology
8113.1.2 MT Link Monitoring for Single Switch Topology 8412.1.2 MT Link Monitoring for Single Switch Topology
8213.2 Maximum Throughput in a Multiple Switch Topology 8512.2 Maximum Throughput in a Multiple Switch Topology
8313.2.1 MT Bonding Mode Selection for Multiple Switch Topology 8612.2.1 MT Bonding Mode Selection for Multiple Switch Topology
8413.2.2 MT Link Monitoring for Multiple Switch Topology 8712.2.2 MT Link Monitoring for Multiple Switch Topology
85 88
8614. Switch Behavior Issues 8913. Switch Behavior Issues
8714.1 Link Establishment and Failover Delays 9013.1 Link Establishment and Failover Delays
8814.2 Duplicated Incoming Packets 9113.2 Duplicated Incoming Packets
89 92
9015. Hardware Specific Considerations 9314. Hardware Specific Considerations
9115.1 IBM BladeCenter 9414.1 IBM BladeCenter
92 95
9316. Frequently Asked Questions 9615. Frequently Asked Questions
94 97
9517. Resources and Links 9816. Resources and Links
96 99
97 100
981. Bonding Driver Installation 1011. Bonding Driver Installation
@@ -156,6 +159,9 @@ you're trying to build it for. Some distros (e.g., Red Hat from 7.1
156onwards) do not have /usr/include/linux symbolically linked to the 159onwards) do not have /usr/include/linux symbolically linked to the
157default kernel source include directory. 160default kernel source include directory.
158 161
162SECOND IMPORTANT NOTE:
163 If you plan to configure bonding using sysfs, you do not need
164to use ifenslave.
159 165
1602. Bonding Driver Options 1662. Bonding Driver Options
161========================= 167=========================
@@ -270,7 +276,7 @@ mode
270 In bonding version 2.6.2 or later, when a failover 276 In bonding version 2.6.2 or later, when a failover
271 occurs in active-backup mode, bonding will issue one 277 occurs in active-backup mode, bonding will issue one
272 or more gratuitous ARPs on the newly active slave. 278 or more gratuitous ARPs on the newly active slave.
273 One gratutious ARP is issued for the bonding master 279 One gratuitous ARP is issued for the bonding master
274 interface and each VLAN interfaces configured above 280 interface and each VLAN interfaces configured above
275 it, provided that the interface has at least one IP 281 it, provided that the interface has at least one IP
276 address configured. Gratuitous ARPs issued for VLAN 282 address configured. Gratuitous ARPs issued for VLAN
@@ -377,7 +383,7 @@ mode
377 When a link is reconnected or a new slave joins the 383 When a link is reconnected or a new slave joins the
378 bond the receive traffic is redistributed among all 384 bond the receive traffic is redistributed among all
379 active slaves in the bond by initiating ARP Replies 385 active slaves in the bond by initiating ARP Replies
380 with the selected mac address to each of the 386 with the selected MAC address to each of the
381 clients. The updelay parameter (detailed below) must 387 clients. The updelay parameter (detailed below) must
382 be set to a value equal or greater than the switch's 388 be set to a value equal or greater than the switch's
383 forwarding delay so that the ARP Replies sent to the 389 forwarding delay so that the ARP Replies sent to the
@@ -498,11 +504,12 @@ not exist, and the layer2 policy is the only policy.
4983. Configuring Bonding Devices 5043. Configuring Bonding Devices
499============================== 505==============================
500 506
501 There are, essentially, two methods for configuring bonding: 507 You can configure bonding using either your distro's network
502with support from the distro's network initialization scripts, and 508initialization scripts, or manually using either ifenslave or the
503without. Distros generally use one of two packages for the network 509sysfs interface. Distros generally use one of two packages for the
504initialization scripts: initscripts or sysconfig. Recent versions of 510network initialization scripts: initscripts or sysconfig. Recent
505these packages have support for bonding, while older versions do not. 511versions of these packages have support for bonding, while older
512versions do not.
506 513
507 We will first describe the options for configuring bonding for 514 We will first describe the options for configuring bonding for
508distros using versions of initscripts and sysconfig with full or 515distros using versions of initscripts and sysconfig with full or
@@ -530,7 +537,7 @@ $ grep ifenslave /sbin/ifup
530 If this returns any matches, then your initscripts or 537 If this returns any matches, then your initscripts or
531sysconfig has support for bonding. 538sysconfig has support for bonding.
532 539
5333.1 Configuration with sysconfig support 5403.1 Configuration with Sysconfig Support
534---------------------------------------- 541----------------------------------------
535 542
536 This section applies to distros using a version of sysconfig 543 This section applies to distros using a version of sysconfig
@@ -538,7 +545,7 @@ with bonding support, for example, SuSE Linux Enterprise Server 9.
538 545
539 SuSE SLES 9's networking configuration system does support 546 SuSE SLES 9's networking configuration system does support
540bonding, however, at this writing, the YaST system configuration 547bonding, however, at this writing, the YaST system configuration
541frontend does not provide any means to work with bonding devices. 548front end does not provide any means to work with bonding devices.
542Bonding devices can be managed by hand, however, as follows. 549Bonding devices can be managed by hand, however, as follows.
543 550
544 First, if they have not already been configured, configure the 551 First, if they have not already been configured, configure the
@@ -660,7 +667,7 @@ format can be found in an example ifcfg template file:
660 Note that the template does not document the various BONDING_ 667 Note that the template does not document the various BONDING_
661settings described above, but does describe many of the other options. 668settings described above, but does describe many of the other options.
662 669
6633.1.1 Using DHCP with sysconfig 6703.1.1 Using DHCP with Sysconfig
664------------------------------- 671-------------------------------
665 672
666 Under sysconfig, configuring a device with BOOTPROTO='dhcp' 673 Under sysconfig, configuring a device with BOOTPROTO='dhcp'
@@ -670,7 +677,7 @@ attempt to obtain the device address from DHCP prior to adding any of
670the slave devices. Without active slaves, the DHCP requests are not 677the slave devices. Without active slaves, the DHCP requests are not
671sent to the network. 678sent to the network.
672 679
6733.1.2 Configuring Multiple Bonds with sysconfig 6803.1.2 Configuring Multiple Bonds with Sysconfig
674----------------------------------------------- 681-----------------------------------------------
675 682
676 The sysconfig network initialization system is capable of 683 The sysconfig network initialization system is capable of
@@ -685,7 +692,7 @@ ifcfg-bondX files.
685options in the ifcfg-bondX file, it is not necessary to add them to 692options in the ifcfg-bondX file, it is not necessary to add them to
686the system /etc/modules.conf or /etc/modprobe.conf configuration file. 693the system /etc/modules.conf or /etc/modprobe.conf configuration file.
687 694
6883.2 Configuration with initscripts support 6953.2 Configuration with Initscripts Support
689------------------------------------------ 696------------------------------------------
690 697
691 This section applies to distros using a version of initscripts 698 This section applies to distros using a version of initscripts
@@ -756,7 +763,7 @@ options for your configuration.
756will restart the networking subsystem and your bond link should be now 763will restart the networking subsystem and your bond link should be now
757up and running. 764up and running.
758 765
7593.2.1 Using DHCP with initscripts 7663.2.1 Using DHCP with Initscripts
760--------------------------------- 767---------------------------------
761 768
762 Recent versions of initscripts (the version supplied with 769 Recent versions of initscripts (the version supplied with
@@ -768,7 +775,7 @@ above, except replace the line "BOOTPROTO=none" with "BOOTPROTO=dhcp"
768and add a line consisting of "TYPE=Bonding". Note that the TYPE value 775and add a line consisting of "TYPE=Bonding". Note that the TYPE value
769is case sensitive. 776is case sensitive.
770 777
7713.2.2 Configuring Multiple Bonds with initscripts 7783.2.2 Configuring Multiple Bonds with Initscripts
772------------------------------------------------- 779-------------------------------------------------
773 780
774 At this writing, the initscripts package does not directly 781 At this writing, the initscripts package does not directly
@@ -784,8 +791,8 @@ Fedora Core kernels, and has been seen on RHEL 4 as well. On kernels
784exhibiting this problem, it will be impossible to configure multiple 791exhibiting this problem, it will be impossible to configure multiple
785bonds with differing parameters. 792bonds with differing parameters.
786 793
7873.3 Configuring Bonding Manually 7943.3 Configuring Bonding Manually with Ifenslave
788-------------------------------- 795-----------------------------------------------
789 796
790 This section applies to distros whose network initialization 797 This section applies to distros whose network initialization
791scripts (the sysconfig or initscripts package) do not have specific 798scripts (the sysconfig or initscripts package) do not have specific
@@ -889,11 +896,139 @@ install bond1 /sbin/modprobe --ignore-install bonding -o bond1 \
889 This may be repeated any number of times, specifying a new and 896 This may be repeated any number of times, specifying a new and
890unique name in place of bond1 for each subsequent instance. 897unique name in place of bond1 for each subsequent instance.
891 898
8993.4 Configuring Bonding Manually via Sysfs
900------------------------------------------
901
902 Starting with version 3.0, Channel Bonding may be configured
903via the sysfs interface. This interface allows dynamic configuration
904of all bonds in the system without unloading the module. It also
905allows for adding and removing bonds at runtime. Ifenslave is no
906longer required, though it is still supported.
907
908 Use of the sysfs interface allows you to use multiple bonds
909with different configurations without having to reload the module.
910It also allows you to use multiple, differently configured bonds when
911bonding is compiled into the kernel.
912
913 You must have the sysfs filesystem mounted to configure
914bonding this way. The examples in this document assume that you
915are using the standard mount point for sysfs, e.g. /sys. If your
916sysfs filesystem is mounted elsewhere, you will need to adjust the
917example paths accordingly.
918
919Creating and Destroying Bonds
920-----------------------------
921To add a new bond foo:
922# echo +foo > /sys/class/net/bonding_masters
923
924To remove an existing bond bar:
925# echo -bar > /sys/class/net/bonding_masters
926
927To show all existing bonds:
928# cat /sys/class/net/bonding_masters
929
930NOTE: due to 4K size limitation of sysfs files, this list may be
931truncated if you have more than a few hundred bonds. This is unlikely
932to occur under normal operating conditions.
933
934Adding and Removing Slaves
935--------------------------
936 Interfaces may be enslaved to a bond using the file
937/sys/class/net/<bond>/bonding/slaves. The semantics for this file
938are the same as for the bonding_masters file.
939
940To enslave interface eth0 to bond bond0:
941# ifconfig bond0 up
942# echo +eth0 > /sys/class/net/bond0/bonding/slaves
943
944To free slave eth0 from bond bond0:
945# echo -eth0 > /sys/class/net/bond0/bonding/slaves
946
947 NOTE: The bond must be up before slaves can be added. All
948slaves are freed when the interface is brought down.
949
950 When an interface is enslaved to a bond, symlinks between the
951two are created in the sysfs filesystem. In this case, you would get
952/sys/class/net/bond0/slave_eth0 pointing to /sys/class/net/eth0, and
953/sys/class/net/eth0/master pointing to /sys/class/net/bond0.
954
955 This means that you can tell quickly whether or not an
956interface is enslaved by looking for the master symlink. Thus:
957# echo -eth0 > /sys/class/net/eth0/master/bonding/slaves
958will free eth0 from whatever bond it is enslaved to, regardless of
959the name of the bond interface.
960
961Changing a Bond's Configuration
962-------------------------------
963 Each bond may be configured individually by manipulating the
964files located in /sys/class/net/<bond name>/bonding
965
966 The names of these files correspond directly with the command-
967line parameters described elsewhere in in this file, and, with the
968exception of arp_ip_target, they accept the same values. To see the
969current setting, simply cat the appropriate file.
970
971 A few examples will be given here; for specific usage
972guidelines for each parameter, see the appropriate section in this
973document.
974
975To configure bond0 for balance-alb mode:
976# ifconfig bond0 down
977# echo 6 > /sys/class/net/bond0/bonding/mode
978 - or -
979# echo balance-alb > /sys/class/net/bond0/bonding/mode
980 NOTE: The bond interface must be down before the mode can be
981changed.
982
983To enable MII monitoring on bond0 with a 1 second interval:
984# echo 1000 > /sys/class/net/bond0/bonding/miimon
985 NOTE: If ARP monitoring is enabled, it will disabled when MII
986monitoring is enabled, and vice-versa.
987
988To add ARP targets:
989# echo +192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target
990# echo +192.168.0.101 > /sys/class/net/bond0/bonding/arp_ip_target
991 NOTE: up to 10 target addresses may be specified.
992
993To remove an ARP target:
994# echo -192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target
995
996Example Configuration
997---------------------
998 We begin with the same example that is shown in section 3.3,
999executed with sysfs, and without using ifenslave.
1000
1001 To make a simple bond of two e100 devices (presumed to be eth0
1002and eth1), and have it persist across reboots, edit the appropriate
1003file (/etc/init.d/boot.local or /etc/rc.d/rc.local), and add the
1004following:
1005
1006modprobe bonding
1007modprobe e100
1008echo balance-alb > /sys/class/net/bond0/bonding/mode
1009ifconfig bond0 192.168.1.1 netmask 255.255.255.0 up
1010echo 100 > /sys/class/net/bond0/bonding/miimon
1011echo +eth0 > /sys/class/net/bond0/bonding/slaves
1012echo +eth1 > /sys/class/net/bond0/bonding/slaves
1013
1014 To add a second bond, with two e1000 interfaces in
1015active-backup mode, using ARP monitoring, add the following lines to
1016your init script:
1017
1018modprobe e1000
1019echo +bond1 > /sys/class/net/bonding_masters
1020echo active-backup > /sys/class/net/bond1/bonding/mode
1021ifconfig bond1 192.168.2.1 netmask 255.255.255.0 up
1022echo +192.168.2.100 /sys/class/net/bond1/bonding/arp_ip_target
1023echo 2000 > /sys/class/net/bond1/bonding/arp_interval
1024echo +eth2 > /sys/class/net/bond1/bonding/slaves
1025echo +eth3 > /sys/class/net/bond1/bonding/slaves
1026
892 1027
8935. Querying Bonding Configuration 10284. Querying Bonding Configuration
894================================= 1029=================================
895 1030
8965.1 Bonding Configuration 10314.1 Bonding Configuration
897------------------------- 1032-------------------------
898 1033
899 Each bonding device has a read-only file residing in the 1034 Each bonding device has a read-only file residing in the
@@ -923,7 +1058,7 @@ generally as follows:
923 The precise format and contents will change depending upon the 1058 The precise format and contents will change depending upon the
924bonding configuration, state, and version of the bonding driver. 1059bonding configuration, state, and version of the bonding driver.
925 1060
9265.2 Network configuration 10614.2 Network configuration
927------------------------- 1062-------------------------
928 1063
929 The network configuration can be inspected using the ifconfig 1064 The network configuration can be inspected using the ifconfig
@@ -958,7 +1093,7 @@ eth1 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4
958 collisions:0 txqueuelen:100 1093 collisions:0 txqueuelen:100
959 Interrupt:9 Base address:0x1400 1094 Interrupt:9 Base address:0x1400
960 1095
9616. Switch Configuration 10965. Switch Configuration
962======================= 1097=======================
963 1098
964 For this section, "switch" refers to whatever system the 1099 For this section, "switch" refers to whatever system the
@@ -991,7 +1126,7 @@ transmit policy for an EtherChannel group; all three will interoperate
991with another EtherChannel group. 1126with another EtherChannel group.
992 1127
993 1128
9947. 802.1q VLAN Support 11296. 802.1q VLAN Support
995====================== 1130======================
996 1131
997 It is possible to configure VLAN devices over a bond interface 1132 It is possible to configure VLAN devices over a bond interface
@@ -1042,7 +1177,7 @@ underlying device -- i.e. the bonding interface -- to promiscuous
1042mode, which might not be what you want. 1177mode, which might not be what you want.
1043 1178
1044 1179
10458. Link Monitoring 11807. Link Monitoring
1046================== 1181==================
1047 1182
1048 The bonding driver at present supports two schemes for 1183 The bonding driver at present supports two schemes for
@@ -1053,7 +1188,7 @@ monitor.
1053bonding driver itself, it is not possible to enable both ARP and MII 1188bonding driver itself, it is not possible to enable both ARP and MII
1054monitoring simultaneously. 1189monitoring simultaneously.
1055 1190
10568.1 ARP Monitor Operation 11917.1 ARP Monitor Operation
1057------------------------- 1192-------------------------
1058 1193
1059 The ARP monitor operates as its name suggests: it sends ARP 1194 The ARP monitor operates as its name suggests: it sends ARP
@@ -1071,7 +1206,7 @@ those slaves will stay down. If networking monitoring (tcpdump, etc)
1071shows the ARP requests and replies on the network, then it may be that 1206shows the ARP requests and replies on the network, then it may be that
1072your device driver is not updating last_rx and trans_start. 1207your device driver is not updating last_rx and trans_start.
1073 1208
10748.2 Configuring Multiple ARP Targets 12097.2 Configuring Multiple ARP Targets
1075------------------------------------ 1210------------------------------------
1076 1211
1077 While ARP monitoring can be done with just one target, it can 1212 While ARP monitoring can be done with just one target, it can
@@ -1094,7 +1229,7 @@ alias bond0 bonding
1094options bond0 arp_interval=60 arp_ip_target=192.168.0.100 1229options bond0 arp_interval=60 arp_ip_target=192.168.0.100
1095 1230
1096 1231
10978.3 MII Monitor Operation 12327.3 MII Monitor Operation
1098------------------------- 1233-------------------------
1099 1234
1100 The MII monitor monitors only the carrier state of the local 1235 The MII monitor monitors only the carrier state of the local
@@ -1120,14 +1255,14 @@ does not support or had some error in processing both the MII register
1120and ethtool requests), then the MII monitor will assume the link is 1255and ethtool requests), then the MII monitor will assume the link is
1121up. 1256up.
1122 1257
11239. Potential Sources of Trouble 12588. Potential Sources of Trouble
1124=============================== 1259===============================
1125 1260
11269.1 Adventures in Routing 12618.1 Adventures in Routing
1127------------------------- 1262-------------------------
1128 1263
1129 When bonding is configured, it is important that the slave 1264 When bonding is configured, it is important that the slave
1130devices not have routes that supercede routes of the master (or, 1265devices not have routes that supersede routes of the master (or,
1131generally, not have routes at all). For example, suppose the bonding 1266generally, not have routes at all). For example, suppose the bonding
1132device bond0 has two slaves, eth0 and eth1, and the routing table is 1267device bond0 has two slaves, eth0 and eth1, and the routing table is
1133as follows: 1268as follows:
@@ -1154,11 +1289,11 @@ by the state of the routing table.
1154 1289
1155 The solution here is simply to insure that slaves do not have 1290 The solution here is simply to insure that slaves do not have
1156routes of their own, and if for some reason they must, those routes do 1291routes of their own, and if for some reason they must, those routes do
1157not supercede routes of their master. This should generally be the 1292not supersede routes of their master. This should generally be the
1158case, but unusual configurations or errant manual or automatic static 1293case, but unusual configurations or errant manual or automatic static
1159route additions may cause trouble. 1294route additions may cause trouble.
1160 1295
11619.2 Ethernet Device Renaming 12968.2 Ethernet Device Renaming
1162---------------------------- 1297----------------------------
1163 1298
1164 On systems with network configuration scripts that do not 1299 On systems with network configuration scripts that do not
@@ -1207,7 +1342,7 @@ modprobe with --ignore-install to cause the normal action to then take
1207place. Full documentation on this can be found in the modprobe.conf 1342place. Full documentation on this can be found in the modprobe.conf
1208and modprobe manual pages. 1343and modprobe manual pages.
1209 1344
12109.3. Painfully Slow Or No Failed Link Detection By Miimon 13458.3. Painfully Slow Or No Failed Link Detection By Miimon
1211--------------------------------------------------------- 1346---------------------------------------------------------
1212 1347
1213 By default, bonding enables the use_carrier option, which 1348 By default, bonding enables the use_carrier option, which
@@ -1235,7 +1370,7 @@ carrier state. It has no way to determine the state of devices on or
1235beyond other ports of a switch, or if a switch is refusing to pass 1370beyond other ports of a switch, or if a switch is refusing to pass
1236traffic while still maintaining carrier on. 1371traffic while still maintaining carrier on.
1237 1372
123810. SNMP agents 13739. SNMP agents
1239=============== 1374===============
1240 1375
1241 If running SNMP agents, the bonding driver should be loaded 1376 If running SNMP agents, the bonding driver should be loaded
@@ -1281,7 +1416,7 @@ ifDescr, the association between the IP address and IfIndex remains
1281and SNMP functions such as Interface_Scan_Next will report that 1416and SNMP functions such as Interface_Scan_Next will report that
1282association. 1417association.
1283 1418
128411. Promiscuous mode 141910. Promiscuous mode
1285==================== 1420====================
1286 1421
1287 When running network monitoring tools, e.g., tcpdump, it is 1422 When running network monitoring tools, e.g., tcpdump, it is
@@ -1308,7 +1443,7 @@ sending to peers that are unassigned or if the load is unbalanced.
1308the active slave changes (e.g., due to a link failure), the 1443the active slave changes (e.g., due to a link failure), the
1309promiscuous setting will be propagated to the new active slave. 1444promiscuous setting will be propagated to the new active slave.
1310 1445
131112. Configuring Bonding for High Availability 144611. Configuring Bonding for High Availability
1312============================================= 1447=============================================
1313 1448
1314 High Availability refers to configurations that provide 1449 High Availability refers to configurations that provide
@@ -1318,7 +1453,7 @@ goal is to provide the maximum availability of network connectivity
1318(i.e., the network always works), even though other configurations 1453(i.e., the network always works), even though other configurations
1319could provide higher throughput. 1454could provide higher throughput.
1320 1455
132112.1 High Availability in a Single Switch Topology 145611.1 High Availability in a Single Switch Topology
1322-------------------------------------------------- 1457--------------------------------------------------
1323 1458
1324 If two hosts (or a host and a single switch) are directly 1459 If two hosts (or a host and a single switch) are directly
@@ -1332,7 +1467,7 @@ the load will be rebalanced across the remaining devices.
1332 See Section 13, "Configuring Bonding for Maximum Throughput" 1467 See Section 13, "Configuring Bonding for Maximum Throughput"
1333for information on configuring bonding with one peer device. 1468for information on configuring bonding with one peer device.
1334 1469
133512.2 High Availability in a Multiple Switch Topology 147011.2 High Availability in a Multiple Switch Topology
1336---------------------------------------------------- 1471----------------------------------------------------
1337 1472
1338 With multiple switches, the configuration of bonding and the 1473 With multiple switches, the configuration of bonding and the
@@ -1359,7 +1494,7 @@ switches (ISL, or inter switch link), and multiple ports connecting to
1359the outside world ("port3" on each switch). There is no technical 1494the outside world ("port3" on each switch). There is no technical
1360reason that this could not be extended to a third switch. 1495reason that this could not be extended to a third switch.
1361 1496
136212.2.1 HA Bonding Mode Selection for Multiple Switch Topology 149711.2.1 HA Bonding Mode Selection for Multiple Switch Topology
1363------------------------------------------------------------- 1498-------------------------------------------------------------
1364 1499
1365 In a topology such as the example above, the active-backup and 1500 In a topology such as the example above, the active-backup and
@@ -1381,7 +1516,7 @@ broadcast: This mode is really a special purpose mode, and is suitable
1381 necessary for some specific one-way traffic to reach both 1516 necessary for some specific one-way traffic to reach both
1382 independent networks, then the broadcast mode may be suitable. 1517 independent networks, then the broadcast mode may be suitable.
1383 1518
138412.2.2 HA Link Monitoring Selection for Multiple Switch Topology 151911.2.2 HA Link Monitoring Selection for Multiple Switch Topology
1385---------------------------------------------------------------- 1520----------------------------------------------------------------
1386 1521
1387 The choice of link monitoring ultimately depends upon your 1522 The choice of link monitoring ultimately depends upon your
@@ -1402,10 +1537,10 @@ regardless of which switch is active, the ARP monitor has a suitable
1402target to query. 1537target to query.
1403 1538
1404 1539
140513. Configuring Bonding for Maximum Throughput 154012. Configuring Bonding for Maximum Throughput
1406============================================== 1541==============================================
1407 1542
140813.1 Maximizing Throughput in a Single Switch Topology 154312.1 Maximizing Throughput in a Single Switch Topology
1409------------------------------------------------------ 1544------------------------------------------------------
1410 1545
1411 In a single switch configuration, the best method to maximize 1546 In a single switch configuration, the best method to maximize
@@ -1476,7 +1611,7 @@ destination to make load balancing decisions. The behavior of each
1476mode is described below. 1611mode is described below.
1477 1612
1478 1613
147913.1.1 MT Bonding Mode Selection for Single Switch Topology 161412.1.1 MT Bonding Mode Selection for Single Switch Topology
1480----------------------------------------------------------- 1615-----------------------------------------------------------
1481 1616
1482 This configuration is the easiest to set up and to understand, 1617 This configuration is the easiest to set up and to understand,
@@ -1607,7 +1742,7 @@ balance-alb: This mode is everything that balance-tlb is, and more.
1607 device driver must support changing the hardware address while 1742 device driver must support changing the hardware address while
1608 the device is open. 1743 the device is open.
1609 1744
161013.1.2 MT Link Monitoring for Single Switch Topology 174512.1.2 MT Link Monitoring for Single Switch Topology
1611---------------------------------------------------- 1746----------------------------------------------------
1612 1747
1613 The choice of link monitoring may largely depend upon which 1748 The choice of link monitoring may largely depend upon which
@@ -1616,7 +1751,7 @@ support the use of the ARP monitor, and are thus restricted to using
1616the MII monitor (which does not provide as high a level of end to end 1751the MII monitor (which does not provide as high a level of end to end
1617assurance as the ARP monitor). 1752assurance as the ARP monitor).
1618 1753
161913.2 Maximum Throughput in a Multiple Switch Topology 175412.2 Maximum Throughput in a Multiple Switch Topology
1620----------------------------------------------------- 1755-----------------------------------------------------
1621 1756
1622 Multiple switches may be utilized to optimize for throughput 1757 Multiple switches may be utilized to optimize for throughput
@@ -1651,7 +1786,7 @@ a single 72 port switch.
1651can be equipped with an additional network device connected to an 1786can be equipped with an additional network device connected to an
1652external network; this host then additionally acts as a gateway. 1787external network; this host then additionally acts as a gateway.
1653 1788
165413.2.1 MT Bonding Mode Selection for Multiple Switch Topology 178912.2.1 MT Bonding Mode Selection for Multiple Switch Topology
1655------------------------------------------------------------- 1790-------------------------------------------------------------
1656 1791
1657 In actual practice, the bonding mode typically employed in 1792 In actual practice, the bonding mode typically employed in
@@ -1664,7 +1799,7 @@ packets has arrived). When employed in this fashion, the balance-rr
1664mode allows individual connections between two hosts to effectively 1799mode allows individual connections between two hosts to effectively
1665utilize greater than one interface's bandwidth. 1800utilize greater than one interface's bandwidth.
1666 1801
166713.2.2 MT Link Monitoring for Multiple Switch Topology 180212.2.2 MT Link Monitoring for Multiple Switch Topology
1668------------------------------------------------------ 1803------------------------------------------------------
1669 1804
1670 Again, in actual practice, the MII monitor is most often used 1805 Again, in actual practice, the MII monitor is most often used
@@ -1674,10 +1809,10 @@ advantages over the MII monitor are mitigated by the volume of probes
1674needed as the number of systems involved grows (remember that each 1809needed as the number of systems involved grows (remember that each
1675host in the network is configured with bonding). 1810host in the network is configured with bonding).
1676 1811
167714. Switch Behavior Issues 181213. Switch Behavior Issues
1678========================== 1813==========================
1679 1814
168014.1 Link Establishment and Failover Delays 181513.1 Link Establishment and Failover Delays
1681------------------------------------------- 1816-------------------------------------------
1682 1817
1683 Some switches exhibit undesirable behavior with regard to the 1818 Some switches exhibit undesirable behavior with regard to the
@@ -1712,7 +1847,7 @@ switches take a long time to go into backup mode, it may be desirable
1712to not activate a backup interface immediately after a link goes down. 1847to not activate a backup interface immediately after a link goes down.
1713Failover may be delayed via the downdelay bonding module option. 1848Failover may be delayed via the downdelay bonding module option.
1714 1849
171514.2 Duplicated Incoming Packets 185013.2 Duplicated Incoming Packets
1716-------------------------------- 1851--------------------------------
1717 1852
1718 It is not uncommon to observe a short burst of duplicated 1853 It is not uncommon to observe a short burst of duplicated
@@ -1751,14 +1886,14 @@ behavior, it can be induced by clearing the MAC forwarding table (on
1751most Cisco switches, the privileged command "clear mac address-table 1886most Cisco switches, the privileged command "clear mac address-table
1752dynamic" will accomplish this). 1887dynamic" will accomplish this).
1753 1888
175415. Hardware Specific Considerations 188914. Hardware Specific Considerations
1755==================================== 1890====================================
1756 1891
1757 This section contains additional information for configuring 1892 This section contains additional information for configuring
1758bonding on specific hardware platforms, or for interfacing bonding 1893bonding on specific hardware platforms, or for interfacing bonding
1759with particular switches or other devices. 1894with particular switches or other devices.
1760 1895
176115.1 IBM BladeCenter 189614.1 IBM BladeCenter
1762-------------------- 1897--------------------
1763 1898
1764 This applies to the JS20 and similar systems. 1899 This applies to the JS20 and similar systems.
@@ -1861,7 +1996,7 @@ bonding driver.
1861avoid fail-over delay issues when using bonding. 1996avoid fail-over delay issues when using bonding.
1862 1997
1863 1998
186416. Frequently Asked Questions 199915. Frequently Asked Questions
1865============================== 2000==============================
1866 2001
18671. Is it SMP safe? 20021. Is it SMP safe?
@@ -1925,7 +2060,7 @@ not have special switch requirements, but do need device drivers that
1925support specific features (described in the appropriate section under 2060support specific features (described in the appropriate section under
1926module parameters, above). 2061module parameters, above).
1927 2062
1928 In 802.3ad mode, it works with with systems that support IEEE 2063 In 802.3ad mode, it works with systems that support IEEE
1929802.3ad Dynamic Link Aggregation. Most managed and many unmanaged 2064802.3ad Dynamic Link Aggregation. Most managed and many unmanaged
1930switches currently available support 802.3ad. 2065switches currently available support 802.3ad.
1931 2066
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index f12007b80a46..d46338af6002 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -362,6 +362,13 @@ tcp_workaround_signed_windows - BOOLEAN
362 not receive a window scaling option from them. 362 not receive a window scaling option from them.
363 Default: 0 363 Default: 0
364 364
365tcp_slow_start_after_idle - BOOLEAN
366 If set, provide RFC2861 behavior and time out the congestion
367 window after an idle period. An idle period is defined at
368 the current RTO. If unset, the congestion window will not
369 be timed out after an idle period.
370 Default: 1
371
365IP Variables: 372IP Variables:
366 373
367ip_local_port_range - 2 INTEGERS 374ip_local_port_range - 2 INTEGERS
diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt
new file mode 100644
index 000000000000..4ccdbca03811
--- /dev/null
+++ b/Documentation/networking/ipvs-sysctl.txt
@@ -0,0 +1,143 @@
1/proc/sys/net/ipv4/vs/* Variables:
2
3am_droprate - INTEGER
4 default 10
5
6 It sets the always mode drop rate, which is used in the mode 3
7 of the drop_rate defense.
8
9amemthresh - INTEGER
10 default 1024
11
12 It sets the available memory threshold (in pages), which is
13 used in the automatic modes of defense. When there is no
14 enough available memory, the respective strategy will be
15 enabled and the variable is automatically set to 2, otherwise
16 the strategy is disabled and the variable is set to 1.
17
18cache_bypass - BOOLEAN
19 0 - disabled (default)
20 not 0 - enabled
21
22 If it is enabled, forward packets to the original destination
23 directly when no cache server is available and destination
24 address is not local (iph->daddr is RTN_UNICAST). It is mostly
25 used in transparent web cache cluster.
26
27debug_level - INTEGER
28 0 - transmission error messages (default)
29 1 - non-fatal error messages
30 2 - configuration
31 3 - destination trash
32 4 - drop entry
33 5 - service lookup
34 6 - scheduling
35 7 - connection new/expire, lookup and synchronization
36 8 - state transition
37 9 - binding destination, template checks and applications
38 10 - IPVS packet transmission
39 11 - IPVS packet handling (ip_vs_in/ip_vs_out)
40 12 or more - packet traversal
41
42 Only available when IPVS is compiled with the CONFIG_IPVS_DEBUG
43
44 Higher debugging levels include the messages for lower debugging
45 levels, so setting debug level 2, includes level 0, 1 and 2
46 messages. Thus, logging becomes more and more verbose the higher
47 the level.
48
49drop_entry - INTEGER
50 0 - disabled (default)
51
52 The drop_entry defense is to randomly drop entries in the
53 connection hash table, just in order to collect back some
54 memory for new connections. In the current code, the
55 drop_entry procedure can be activated every second, then it
56 randomly scans 1/32 of the whole and drops entries that are in
57 the SYN-RECV/SYNACK state, which should be effective against
58 syn-flooding attack.
59
60 The valid values of drop_entry are from 0 to 3, where 0 means
61 that this strategy is always disabled, 1 and 2 mean automatic
62 modes (when there is no enough available memory, the strategy
63 is enabled and the variable is automatically set to 2,
64 otherwise the strategy is disabled and the variable is set to
65 1), and 3 means that that the strategy is always enabled.
66
67drop_packet - INTEGER
68 0 - disabled (default)
69
70 The drop_packet defense is designed to drop 1/rate packets
71 before forwarding them to real servers. If the rate is 1, then
72 drop all the incoming packets.
73
74 The value definition is the same as that of the drop_entry. In
75 the automatic mode, the rate is determined by the follow
76 formula: rate = amemthresh / (amemthresh - available_memory)
77 when available memory is less than the available memory
78 threshold. When the mode 3 is set, the always mode drop rate
79 is controlled by the /proc/sys/net/ipv4/vs/am_droprate.
80
81expire_nodest_conn - BOOLEAN
82 0 - disabled (default)
83 not 0 - enabled
84
85 The default value is 0, the load balancer will silently drop
86 packets when its destination server is not available. It may
87 be useful, when user-space monitoring program deletes the
88 destination server (because of server overload or wrong
89 detection) and add back the server later, and the connections
90 to the server can continue.
91
92 If this feature is enabled, the load balancer will expire the
93 connection immediately when a packet arrives and its
94 destination server is not available, then the client program
95 will be notified that the connection is closed. This is
96 equivalent to the feature some people requires to flush
97 connections when its destination is not available.
98
99expire_quiescent_template - BOOLEAN
100 0 - disabled (default)
101 not 0 - enabled
102
103 When set to a non-zero value, the load balancer will expire
104 persistent templates when the destination server is quiescent.
105 This may be useful, when a user makes a destination server
106 quiescent by setting its weight to 0 and it is desired that
107 subsequent otherwise persistent connections are sent to a
108 different destination server. By default new persistent
109 connections are allowed to quiescent destination servers.
110
111 If this feature is enabled, the load balancer will expire the
112 persistence template if it is to be used to schedule a new
113 connection and the destination server is quiescent.
114
115nat_icmp_send - BOOLEAN
116 0 - disabled (default)
117 not 0 - enabled
118
119 It controls sending icmp error messages (ICMP_DEST_UNREACH)
120 for VS/NAT when the load balancer receives packets from real
121 servers but the connection entries don't exist.
122
123secure_tcp - INTEGER
124 0 - disabled (default)
125
126 The secure_tcp defense is to use a more complicated state
127 transition table and some possible short timeouts of each
128 state. In the VS/NAT, it delays the entering the ESTABLISHED
129 until the real server starts to send data and ACK packet
130 (after 3-way handshake).
131
132 The value definition is the same as that of drop_entry or
133 drop_packet.
134
135sync_threshold - INTEGER
136 default 3
137
138 It sets synchronization threshold, which is the minimum number
139 of incoming packets that a connection needs to receive before
140 the connection will be synchronized. A connection will be
141 synchronized, every time the number of its incoming packets
142 modulus 50 equals the threshold. The range of the threshold is
143 from 0 to 49.
diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
index 3c0a5ba614d7..847cedb238f6 100644
--- a/Documentation/networking/netdevices.txt
+++ b/Documentation/networking/netdevices.txt
@@ -42,9 +42,9 @@ dev->get_stats:
42 Context: nominally process, but don't sleep inside an rwlock 42 Context: nominally process, but don't sleep inside an rwlock
43 43
44dev->hard_start_xmit: 44dev->hard_start_xmit:
45 Synchronization: dev->xmit_lock spinlock. 45 Synchronization: netif_tx_lock spinlock.
46 When the driver sets NETIF_F_LLTX in dev->features this will be 46 When the driver sets NETIF_F_LLTX in dev->features this will be
47 called without holding xmit_lock. In this case the driver 47 called without holding netif_tx_lock. In this case the driver
48 has to lock by itself when needed. It is recommended to use a try lock 48 has to lock by itself when needed. It is recommended to use a try lock
49 for this and return -1 when the spin lock fails. 49 for this and return -1 when the spin lock fails.
50 The locking there should also properly protect against 50 The locking there should also properly protect against
@@ -62,12 +62,12 @@ dev->hard_start_xmit:
62 Only valid when NETIF_F_LLTX is set. 62 Only valid when NETIF_F_LLTX is set.
63 63
64dev->tx_timeout: 64dev->tx_timeout:
65 Synchronization: dev->xmit_lock spinlock. 65 Synchronization: netif_tx_lock spinlock.
66 Context: BHs disabled 66 Context: BHs disabled
67 Notes: netif_queue_stopped() is guaranteed true 67 Notes: netif_queue_stopped() is guaranteed true
68 68
69dev->set_multicast_list: 69dev->set_multicast_list:
70 Synchronization: dev->xmit_lock spinlock. 70 Synchronization: netif_tx_lock spinlock.
71 Context: BHs disabled 71 Context: BHs disabled
72 72
73dev->poll: 73dev->poll:
diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
index 278771c9ad99..44f2f769e865 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -74,7 +74,7 @@ Examples:
74 pgset "pkt_size 9014" sets packet size to 9014 74 pgset "pkt_size 9014" sets packet size to 9014
75 pgset "frags 5" packet will consist of 5 fragments 75 pgset "frags 5" packet will consist of 5 fragments
76 pgset "count 200000" sets number of packets to send, set to zero 76 pgset "count 200000" sets number of packets to send, set to zero
77 for continious sends untill explicitl stopped. 77 for continuous sends until explicitly stopped.
78 78
79 pgset "delay 5000" adds delay to hard_start_xmit(). nanoseconds 79 pgset "delay 5000" adds delay to hard_start_xmit(). nanoseconds
80 80
diff --git a/Documentation/networking/tuntap.txt b/Documentation/networking/tuntap.txt
index 76750fb9151a..839cbb71388b 100644
--- a/Documentation/networking/tuntap.txt
+++ b/Documentation/networking/tuntap.txt
@@ -39,10 +39,13 @@ Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
39 mknod /dev/net/tun c 10 200 39 mknod /dev/net/tun c 10 200
40 40
41 Set permissions: 41 Set permissions:
42 e.g. chmod 0700 /dev/net/tun 42 e.g. chmod 0666 /dev/net/tun
43 if you want the device only accessible by root. Giving regular users the 43 There's no harm in allowing the device to be accessible by non-root users,
44 right to assign network devices is NOT a good idea. Users could assign 44 since CAP_NET_ADMIN is required for creating network devices or for
45 bogus network interfaces to trick firewalls or administrators. 45 connecting to network devices which aren't owned by the user in question.
46 If you want to create persistent devices and give ownership of them to
47 unprivileged users, then you need the /dev/net/tun device to be usable by
48 those users.
46 49
47 Driver module autoloading 50 Driver module autoloading
48 51
diff --git a/Documentation/pci.txt b/Documentation/pci.txt
index 66bbbf1d1ef6..2b395e478961 100644
--- a/Documentation/pci.txt
+++ b/Documentation/pci.txt
@@ -213,11 +213,19 @@ have been remapped by the kernel.
213 213
214 See Documentation/IO-mapping.txt for how to access device memory. 214 See Documentation/IO-mapping.txt for how to access device memory.
215 215
216 You still need to call request_region() for I/O regions and 216 The device driver needs to call pci_request_region() to make sure
217request_mem_region() for memory regions to make sure nobody else is using the 217no other device is already using the same resource. The driver is expected
218same device. 218to determine MMIO and IO Port resource availability _before_ calling
219 219pci_enable_device(). Conversely, drivers should call pci_release_region()
220 All interrupt handlers should be registered with SA_SHIRQ and use the devid 220_after_ calling pci_disable_device(). The idea is to prevent two devices
221colliding on the same address range.
222
223Generic flavors of pci_request_region() are request_mem_region()
224(for MMIO ranges) and request_region() (for IO Port ranges).
225Use these for address resources that are not described by "normal" PCI
226interfaces (e.g. BAR).
227
228 All interrupt handlers should be registered with IRQF_SHARED and use the devid
221to map IRQs to devices (remember that all PCI interrupts are shared). 229to map IRQs to devices (remember that all PCI interrupts are shared).
222 230
223 231
diff --git a/Documentation/pcmcia/crc32hash.c b/Documentation/pcmcia/crc32hash.c
new file mode 100644
index 000000000000..cbc36d299af8
--- /dev/null
+++ b/Documentation/pcmcia/crc32hash.c
@@ -0,0 +1,32 @@
1/* crc32hash.c - derived from linux/lib/crc32.c, GNU GPL v2 */
2/* Usage example:
3$ ./crc32hash "Dual Speed"
4*/
5
6#include <string.h>
7#include <stdio.h>
8#include <ctype.h>
9#include <stdlib.h>
10
11unsigned int crc32(unsigned char const *p, unsigned int len)
12{
13 int i;
14 unsigned int crc = 0;
15 while (len--) {
16 crc ^= *p++;
17 for (i = 0; i < 8; i++)
18 crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
19 }
20 return crc;
21}
22
23int main(int argc, char **argv) {
24 unsigned int result;
25 if (argc != 2) {
26 printf("no string passed as argument\n");
27 return -1;
28 }
29 result = crc32(argv[1], strlen(argv[1]));
30 printf("0x%x\n", result);
31 return 0;
32}
diff --git a/Documentation/pcmcia/devicetable.txt b/Documentation/pcmcia/devicetable.txt
index 3351c0355143..199afd100cf2 100644
--- a/Documentation/pcmcia/devicetable.txt
+++ b/Documentation/pcmcia/devicetable.txt
@@ -27,37 +27,7 @@ pcmcia:m0149cC1ABf06pfn00fn00pa725B842DpbF1EFEE84pc0877B627pd00000000
27The hex value after "pa" is the hash of product ID string 1, after "pb" for 27The hex value after "pa" is the hash of product ID string 1, after "pb" for
28string 2 and so on. 28string 2 and so on.
29 29
30Alternatively, you can use this small tool to determine the crc32 hash. 30Alternatively, you can use crc32hash (see Documentation/pcmcia/crc32hash.c)
31simply pass the string you want to evaluate as argument to this program, 31to determine the crc32 hash. Simply pass the string you want to evaluate
32e.g. 32as argument to this program, e.g.:
33$ ./crc32hash "Dual Speed" 33$ ./crc32hash "Dual Speed"
34
35-------------------------------------------------------------------------
36/* crc32hash.c - derived from linux/lib/crc32.c, GNU GPL v2 */
37#include <string.h>
38#include <stdio.h>
39#include <ctype.h>
40#include <stdlib.h>
41
42unsigned int crc32(unsigned char const *p, unsigned int len)
43{
44 int i;
45 unsigned int crc = 0;
46 while (len--) {
47 crc ^= *p++;
48 for (i = 0; i < 8; i++)
49 crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
50 }
51 return crc;
52}
53
54int main(int argc, char **argv) {
55 unsigned int result;
56 if (argc != 2) {
57 printf("no string passed as argument\n");
58 return -1;
59 }
60 result = crc32(argv[1], strlen(argv[1]));
61 printf("0x%x\n", result);
62 return 0;
63}
diff --git a/Documentation/pi-futex.txt b/Documentation/pi-futex.txt
new file mode 100644
index 000000000000..5d61dacd21f6
--- /dev/null
+++ b/Documentation/pi-futex.txt
@@ -0,0 +1,121 @@
1Lightweight PI-futexes
2----------------------
3
4We are calling them lightweight for 3 reasons:
5
6 - in the user-space fastpath a PI-enabled futex involves no kernel work
7 (or any other PI complexity) at all. No registration, no extra kernel
8 calls - just pure fast atomic ops in userspace.
9
10 - even in the slowpath, the system call and scheduling pattern is very
11 similar to normal futexes.
12
13 - the in-kernel PI implementation is streamlined around the mutex
14 abstraction, with strict rules that keep the implementation
15 relatively simple: only a single owner may own a lock (i.e. no
16 read-write lock support), only the owner may unlock a lock, no
17 recursive locking, etc.
18
19Priority Inheritance - why?
20---------------------------
21
22The short reply: user-space PI helps achieving/improving determinism for
23user-space applications. In the best-case, it can help achieve
24determinism and well-bound latencies. Even in the worst-case, PI will
25improve the statistical distribution of locking related application
26delays.
27
28The longer reply:
29-----------------
30
31Firstly, sharing locks between multiple tasks is a common programming
32technique that often cannot be replaced with lockless algorithms. As we
33can see it in the kernel [which is a quite complex program in itself],
34lockless structures are rather the exception than the norm - the current
35ratio of lockless vs. locky code for shared data structures is somewhere
36between 1:10 and 1:100. Lockless is hard, and the complexity of lockless
37algorithms often endangers to ability to do robust reviews of said code.
38I.e. critical RT apps often choose lock structures to protect critical
39data structures, instead of lockless algorithms. Furthermore, there are
40cases (like shared hardware, or other resource limits) where lockless
41access is mathematically impossible.
42
43Media players (such as Jack) are an example of reasonable application
44design with multiple tasks (with multiple priority levels) sharing
45short-held locks: for example, a highprio audio playback thread is
46combined with medium-prio construct-audio-data threads and low-prio
47display-colory-stuff threads. Add video and decoding to the mix and
48we've got even more priority levels.
49
50So once we accept that synchronization objects (locks) are an
51unavoidable fact of life, and once we accept that multi-task userspace
52apps have a very fair expectation of being able to use locks, we've got
53to think about how to offer the option of a deterministic locking
54implementation to user-space.
55
56Most of the technical counter-arguments against doing priority
57inheritance only apply to kernel-space locks. But user-space locks are
58different, there we cannot disable interrupts or make the task
59non-preemptible in a critical section, so the 'use spinlocks' argument
60does not apply (user-space spinlocks have the same priority inversion
61problems as other user-space locking constructs). Fact is, pretty much
62the only technique that currently enables good determinism for userspace
63locks (such as futex-based pthread mutexes) is priority inheritance:
64
65Currently (without PI), if a high-prio and a low-prio task shares a lock
66[this is a quite common scenario for most non-trivial RT applications],
67even if all critical sections are coded carefully to be deterministic
68(i.e. all critical sections are short in duration and only execute a
69limited number of instructions), the kernel cannot guarantee any
70deterministic execution of the high-prio task: any medium-priority task
71could preempt the low-prio task while it holds the shared lock and
72executes the critical section, and could delay it indefinitely.
73
74Implementation:
75---------------
76
77As mentioned before, the userspace fastpath of PI-enabled pthread
78mutexes involves no kernel work at all - they behave quite similarly to
79normal futex-based locks: a 0 value means unlocked, and a value==TID
80means locked. (This is the same method as used by list-based robust
81futexes.) Userspace uses atomic ops to lock/unlock these mutexes without
82entering the kernel.
83
84To handle the slowpath, we have added two new futex ops:
85
86 FUTEX_LOCK_PI
87 FUTEX_UNLOCK_PI
88
89If the lock-acquire fastpath fails, [i.e. an atomic transition from 0 to
90TID fails], then FUTEX_LOCK_PI is called. The kernel does all the
91remaining work: if there is no futex-queue attached to the futex address
92yet then the code looks up the task that owns the futex [it has put its
93own TID into the futex value], and attaches a 'PI state' structure to
94the futex-queue. The pi_state includes an rt-mutex, which is a PI-aware,
95kernel-based synchronization object. The 'other' task is made the owner
96of the rt-mutex, and the FUTEX_WAITERS bit is atomically set in the
97futex value. Then this task tries to lock the rt-mutex, on which it
98blocks. Once it returns, it has the mutex acquired, and it sets the
99futex value to its own TID and returns. Userspace has no other work to
100perform - it now owns the lock, and futex value contains
101FUTEX_WAITERS|TID.
102
103If the unlock side fastpath succeeds, [i.e. userspace manages to do a
104TID -> 0 atomic transition of the futex value], then no kernel work is
105triggered.
106
107If the unlock fastpath fails (because the FUTEX_WAITERS bit is set),
108then FUTEX_UNLOCK_PI is called, and the kernel unlocks the futex on the
109behalf of userspace - and it also unlocks the attached
110pi_state->rt_mutex and thus wakes up any potential waiters.
111
112Note that under this approach, contrary to previous PI-futex approaches,
113there is no prior 'registration' of a PI-futex. [which is not quite
114possible anyway, due to existing ABI properties of pthread mutexes.]
115
116Also, under this scheme, 'robustness' and 'PI' are two orthogonal
117properties of futexes, and all four combinations are possible: futex,
118robust-futex, PI-futex, robust+PI-futex.
119
120More details about priority inheritance can be found in
121Documentation/rtmutex.txt.
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index f987afe43e28..fba1e05c47c7 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -135,96 +135,6 @@ HW.
135 135
136FREEZE -- stop DMA and interrupts, and be prepared to reinit HW from 136FREEZE -- stop DMA and interrupts, and be prepared to reinit HW from
137scratch. That probably means stop accepting upstream requests, the 137scratch. That probably means stop accepting upstream requests, the
138actual policy of what to do with them beeing specific to a given
139driver. It's acceptable for a network driver to just drop packets
140while a block driver is expected to block the queue so no request is
141lost. (Use IDE as an example on how to do that). FREEZE requires no
142power state change, and it's expected for drivers to be able to
143quickly transition back to operating state.
144
145SUSPEND -- like FREEZE, but also put hardware into low-power state. If
146there's need to distinguish several levels of sleep, additional flag
147is probably best way to do that.
148
149Transitions are only from a resumed state to a suspended state, never
150between 2 suspended states. (ON -> FREEZE or ON -> SUSPEND can happen,
151FREEZE -> SUSPEND or SUSPEND -> FREEZE can not).
152
153All events are:
154
155[NOTE NOTE NOTE: If you are driver author, you should not care; you
156should only look at event, and ignore flags.]
157
158#Prepare for suspend -- userland is still running but we are going to
159#enter suspend state. This gives drivers chance to load firmware from
160#disk and store it in memory, or do other activities taht require
161#operating userland, ability to kmalloc GFP_KERNEL, etc... All of these
162#are forbiden once the suspend dance is started.. event = ON, flags =
163#PREPARE_TO_SUSPEND
164
165Apm standby -- prepare for APM event. Quiesce devices to make life
166easier for APM BIOS. event = FREEZE, flags = APM_STANDBY
167
168Apm suspend -- same as APM_STANDBY, but it we should probably avoid
169spinning down disks. event = FREEZE, flags = APM_SUSPEND
170
171System halt, reboot -- quiesce devices to make life easier for BIOS. event
172= FREEZE, flags = SYSTEM_HALT or SYSTEM_REBOOT
173
174System shutdown -- at least disks need to be spun down, or data may be
175lost. Quiesce devices, just to make life easier for BIOS. event =
176FREEZE, flags = SYSTEM_SHUTDOWN
177
178Kexec -- turn off DMAs and put hardware into some state where new
179kernel can take over. event = FREEZE, flags = KEXEC
180
181Powerdown at end of swsusp -- very similar to SYSTEM_SHUTDOWN, except wake
182may need to be enabled on some devices. This actually has at least 3
183subtypes, system can reboot, enter S4 and enter S5 at the end of
184swsusp. event = FREEZE, flags = SWSUSP and one of SYSTEM_REBOOT,
185SYSTEM_SHUTDOWN, SYSTEM_S4
186
187Suspend to ram -- put devices into low power state. event = SUSPEND,
188flags = SUSPEND_TO_RAM
189
190Freeze for swsusp snapshot -- stop DMA and interrupts. No need to put
191devices into low power mode, but you must be able to reinitialize
192device from scratch in resume method. This has two flavors, its done
193once on suspending kernel, once on resuming kernel. event = FREEZE,
194flags = DURING_SUSPEND or DURING_RESUME
195
196Device detach requested from /sys -- deinitialize device; proably same as
197SYSTEM_SHUTDOWN, I do not understand this one too much. probably event
198= FREEZE, flags = DEV_DETACH.
199
200#These are not really events sent:
201#
202#System fully on -- device is working normally; this is probably never
203#passed to suspend() method... event = ON, flags = 0
204#
205#Ready after resume -- userland is now running, again. Time to free any
206#memory you ate during prepare to suspend... event = ON, flags =
207#READY_AFTER_RESUME
208#
209
210
211pm_message_t meaning
212
213pm_message_t has two fields. event ("major"), and flags. If driver
214does not know event code, it aborts the request, returning error. Some
215drivers may need to deal with special cases based on the actual type
216of suspend operation being done at the system level. This is why
217there are flags.
218
219Event codes are:
220
221ON -- no need to do anything except special cases like broken
222HW.
223
224# NOTIFICATION -- pretty much same as ON?
225
226FREEZE -- stop DMA and interrupts, and be prepared to reinit HW from
227scratch. That probably means stop accepting upstream requests, the
228actual policy of what to do with them being specific to a given 138actual policy of what to do with them being specific to a given
229driver. It's acceptable for a network driver to just drop packets 139driver. It's acceptable for a network driver to just drop packets
230while a block driver is expected to block the queue so no request is 140while a block driver is expected to block the queue so no request is
diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt
index d7814a113ee1..823b2cf6e3dc 100644
--- a/Documentation/power/swsusp.txt
+++ b/Documentation/power/swsusp.txt
@@ -18,10 +18,11 @@ Some warnings, first.
18 * 18 *
19 * (*) suspend/resume support is needed to make it safe. 19 * (*) suspend/resume support is needed to make it safe.
20 * 20 *
21 * If you have any filesystems on USB devices mounted before suspend, 21 * If you have any filesystems on USB devices mounted before software suspend,
22 * they won't be accessible after resume and you may lose data, as though 22 * they won't be accessible after resume and you may lose data, as though
23 * you have unplugged the USB devices with mounted filesystems on them 23 * you have unplugged the USB devices with mounted filesystems on them;
24 * (see the FAQ below for details). 24 * see the FAQ below for details. (This is not true for more traditional
25 * power states like "standby", which normally don't turn USB off.)
25 26
26You need to append resume=/dev/your_swap_partition to kernel command 27You need to append resume=/dev/your_swap_partition to kernel command
27line. Then you suspend by 28line. Then you suspend by
@@ -204,7 +205,7 @@ Q: There don't seem to be any generally useful behavioral
204distinctions between SUSPEND and FREEZE. 205distinctions between SUSPEND and FREEZE.
205 206
206A: Doing SUSPEND when you are asked to do FREEZE is always correct, 207A: Doing SUSPEND when you are asked to do FREEZE is always correct,
207but it may be unneccessarily slow. If you want USB to stay simple, 208but it may be unneccessarily slow. If you want your driver to stay simple,
208slowness may not matter to you. It can always be fixed later. 209slowness may not matter to you. It can always be fixed later.
209 210
210For devices like disk it does matter, you do not want to spindown for 211For devices like disk it does matter, you do not want to spindown for
@@ -349,25 +350,72 @@ Q: How do I make suspend more verbose?
349 350
350A: If you want to see any non-error kernel messages on the virtual 351A: If you want to see any non-error kernel messages on the virtual
351terminal the kernel switches to during suspend, you have to set the 352terminal the kernel switches to during suspend, you have to set the
352kernel console loglevel to at least 5, for example by doing 353kernel console loglevel to at least 4 (KERN_WARNING), for example by
353 354doing
354 echo 5 > /proc/sys/kernel/printk 355
356 # save the old loglevel
357 read LOGLEVEL DUMMY < /proc/sys/kernel/printk
358 # set the loglevel so we see the progress bar.
359 # if the level is higher than needed, we leave it alone.
360 if [ $LOGLEVEL -lt 5 ]; then
361 echo 5 > /proc/sys/kernel/printk
362 fi
363
364 IMG_SZ=0
365 read IMG_SZ < /sys/power/image_size
366 echo -n disk > /sys/power/state
367 RET=$?
368 #
369 # the logic here is:
370 # if image_size > 0 (without kernel support, IMG_SZ will be zero),
371 # then try again with image_size set to zero.
372 if [ $RET -ne 0 -a $IMG_SZ -ne 0 ]; then # try again with minimal image size
373 echo 0 > /sys/power/image_size
374 echo -n disk > /sys/power/state
375 RET=$?
376 fi
377
378 # restore previous loglevel
379 echo $LOGLEVEL > /proc/sys/kernel/printk
380 exit $RET
355 381
356Q: Is this true that if I have a mounted filesystem on a USB device and 382Q: Is this true that if I have a mounted filesystem on a USB device and
357I suspend to disk, I can lose data unless the filesystem has been mounted 383I suspend to disk, I can lose data unless the filesystem has been mounted
358with "sync"? 384with "sync"?
359 385
360A: That's right. It depends on your hardware, and it could be true even for 386A: That's right ... if you disconnect that device, you may lose data.
361suspend-to-RAM. In fact, even with "-o sync" you can lose data if your 387In fact, even with "-o sync" you can lose data if your programs have
362programs have information in buffers they haven't written out to disk. 388information in buffers they haven't written out to a disk you disconnect,
389or if you disconnect before the device finished saving data you wrote.
363 390
364If you're lucky, your hardware will support low-power modes for USB 391Software suspend normally powers down USB controllers, which is equivalent
365controllers while the system is asleep. Lots of hardware doesn't, 392to disconnecting all USB devices attached to your system.
366however. Shutting off the power to a USB controller is equivalent to
367unplugging all the attached devices.
368 393
369Remember that it's always a bad idea to unplug a disk drive containing a 394Your system might well support low-power modes for its USB controllers
370mounted filesystem. With USB that's true even when your system is asleep! 395while the system is asleep, maintaining the connection, using true sleep
371The safest thing is to unmount all USB-based filesystems before suspending 396modes like "suspend-to-RAM" or "standby". (Don't write "disk" to the
372and remount them after resuming. 397/sys/power/state file; write "standby" or "mem".) We've not seen any
398hardware that can use these modes through software suspend, although in
399theory some systems might support "platform" or "firmware" modes that
400won't break the USB connections.
373 401
402Remember that it's always a bad idea to unplug a disk drive containing a
403mounted filesystem. That's true even when your system is asleep! The
404safest thing is to unmount all filesystems on removable media (such USB,
405Firewire, CompactFlash, MMC, external SATA, or even IDE hotplug bays)
406before suspending; then remount them after resuming.
407
408Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were
409compiled with the similar configuration files. Anyway I found that
410suspend to disk (and resume) is much slower on 2.6.16 compared to
4112.6.15. Any idea for why that might happen or how can I speed it up?
412
413A: This is because the size of the suspend image is now greater than
414for 2.6.15 (by saving more data we can get more responsive system
415after resume).
416
417There's the /sys/power/image_size knob that controls the size of the
418image. If you set it to 0 (eg. by echo 0 > /sys/power/image_size as
419root), the 2.6.15 behavior should be restored. If it is still too
420slow, take a look at suspend.sf.net -- userland suspend is faster and
421supports LZF compression to speed it up further.
diff --git a/Documentation/power/video.txt b/Documentation/power/video.txt
index 43a889f8f08d..d859faa3a463 100644
--- a/Documentation/power/video.txt
+++ b/Documentation/power/video.txt
@@ -90,6 +90,7 @@ Table of known working notebooks:
90Model hack (or "how to do it") 90Model hack (or "how to do it")
91------------------------------------------------------------------------------ 91------------------------------------------------------------------------------
92Acer Aspire 1406LC ole's late BIOS init (7), turn off DRI 92Acer Aspire 1406LC ole's late BIOS init (7), turn off DRI
93Acer TM 230 s3_bios (2)
93Acer TM 242FX vbetool (6) 94Acer TM 242FX vbetool (6)
94Acer TM C110 video_post (8) 95Acer TM C110 video_post (8)
95Acer TM C300 vga=normal (only suspend on console, not in X), vbetool (6) or video_post (8) 96Acer TM C300 vga=normal (only suspend on console, not in X), vbetool (6) or video_post (8)
@@ -115,6 +116,7 @@ Dell D610 vga=normal and X (possibly vbestate (6) too, but not tested)
115Dell Inspiron 4000 ??? (*) 116Dell Inspiron 4000 ??? (*)
116Dell Inspiron 500m ??? (*) 117Dell Inspiron 500m ??? (*)
117Dell Inspiron 510m ??? 118Dell Inspiron 510m ???
119Dell Inspiron 5150 vbetool needed (6)
118Dell Inspiron 600m ??? (*) 120Dell Inspiron 600m ??? (*)
119Dell Inspiron 8200 ??? (*) 121Dell Inspiron 8200 ??? (*)
120Dell Inspiron 8500 ??? (*) 122Dell Inspiron 8500 ??? (*)
@@ -125,6 +127,7 @@ HP NX7000 ??? (*)
125HP Pavilion ZD7000 vbetool post needed, need open-source nv driver for X 127HP Pavilion ZD7000 vbetool post needed, need open-source nv driver for X
126HP Omnibook XE3 athlon version none (1) 128HP Omnibook XE3 athlon version none (1)
127HP Omnibook XE3GC none (1), video is S3 Savage/IX-MV 129HP Omnibook XE3GC none (1), video is S3 Savage/IX-MV
130HP Omnibook XE3L-GF vbetool (6)
128HP Omnibook 5150 none (1), (S1 also works OK) 131HP Omnibook 5150 none (1), (S1 also works OK)
129IBM TP T20, model 2647-44G none (1), video is S3 Inc. 86C270-294 Savage/IX-MV, vesafb gets "interesting" but X work. 132IBM TP T20, model 2647-44G none (1), video is S3 Inc. 86C270-294 Savage/IX-MV, vesafb gets "interesting" but X work.
130IBM TP A31 / Type 2652-M5G s3_mode (3) [works ok with BIOS 1.04 2002-08-23, but not at all with BIOS 1.11 2004-11-05 :-(] 133IBM TP A31 / Type 2652-M5G s3_mode (3) [works ok with BIOS 1.04 2002-08-23, but not at all with BIOS 1.11 2004-11-05 :-(]
@@ -157,6 +160,7 @@ Sony Vaio vgn-s260 X or boot-radeon can init it (5)
157Sony Vaio vgn-S580BH vga=normal, but suspend from X. Console will be blank unless you return to X. 160Sony Vaio vgn-S580BH vga=normal, but suspend from X. Console will be blank unless you return to X.
158Sony Vaio vgn-FS115B s3_bios (2),s3_mode (4) 161Sony Vaio vgn-FS115B s3_bios (2),s3_mode (4)
159Toshiba Libretto L5 none (1) 162Toshiba Libretto L5 none (1)
163Toshiba Libretto 100CT/110CT vbetool (6)
160Toshiba Portege 3020CT s3_mode (3) 164Toshiba Portege 3020CT s3_mode (3)
161Toshiba Satellite 4030CDT s3_mode (3) (S1 also works OK) 165Toshiba Satellite 4030CDT s3_mode (3) (S1 also works OK)
162Toshiba Satellite 4080XCDT s3_mode (3) (S1 also works OK) 166Toshiba Satellite 4080XCDT s3_mode (3) (S1 also works OK)
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index 217e51768b87..3c62e66e1fcc 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -1436,9 +1436,9 @@ platforms are moved over to use the flattened-device-tree model.
1436 interrupts = <1d 3>; 1436 interrupts = <1d 3>;
1437 interrupt-parent = <40000>; 1437 interrupt-parent = <40000>;
1438 num-channels = <4>; 1438 num-channels = <4>;
1439 channel-fifo-len = <24>; 1439 channel-fifo-len = <18>;
1440 exec-units-mask = <000000fe>; 1440 exec-units-mask = <000000fe>;
1441 descriptor-types-mask = <073f1127>; 1441 descriptor-types-mask = <012b0ebf>;
1442 }; 1442 };
1443 1443
1444 1444
diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt
index df82d75245a0..76e8064b8c3a 100644
--- a/Documentation/robust-futexes.txt
+++ b/Documentation/robust-futexes.txt
@@ -95,7 +95,7 @@ comparison. If the thread has registered a list, then normally the list
95is empty. If the thread/process crashed or terminated in some incorrect 95is empty. If the thread/process crashed or terminated in some incorrect
96way then the list might be non-empty: in this case the kernel carefully 96way then the list might be non-empty: in this case the kernel carefully
97walks the list [not trusting it], and marks all locks that are owned by 97walks the list [not trusting it], and marks all locks that are owned by
98this thread with the FUTEX_OWNER_DEAD bit, and wakes up one waiter (if 98this thread with the FUTEX_OWNER_DIED bit, and wakes up one waiter (if
99any). 99any).
100 100
101The list is guaranteed to be private and per-thread at do_exit() time, 101The list is guaranteed to be private and per-thread at do_exit() time,
diff --git a/Documentation/rt-mutex-design.txt b/Documentation/rt-mutex-design.txt
new file mode 100644
index 000000000000..c472ffacc2f6
--- /dev/null
+++ b/Documentation/rt-mutex-design.txt
@@ -0,0 +1,781 @@
1#
2# Copyright (c) 2006 Steven Rostedt
3# Licensed under the GNU Free Documentation License, Version 1.2
4#
5
6RT-mutex implementation design
7------------------------------
8
9This document tries to describe the design of the rtmutex.c implementation.
10It doesn't describe the reasons why rtmutex.c exists. For that please see
11Documentation/rt-mutex.txt. Although this document does explain problems
12that happen without this code, but that is in the concept to understand
13what the code actually is doing.
14
15The goal of this document is to help others understand the priority
16inheritance (PI) algorithm that is used, as well as reasons for the
17decisions that were made to implement PI in the manner that was done.
18
19
20Unbounded Priority Inversion
21----------------------------
22
23Priority inversion is when a lower priority process executes while a higher
24priority process wants to run. This happens for several reasons, and
25most of the time it can't be helped. Anytime a high priority process wants
26to use a resource that a lower priority process has (a mutex for example),
27the high priority process must wait until the lower priority process is done
28with the resource. This is a priority inversion. What we want to prevent
29is something called unbounded priority inversion. That is when the high
30priority process is prevented from running by a lower priority process for
31an undetermined amount of time.
32
33The classic example of unbounded priority inversion is were you have three
34processes, let's call them processes A, B, and C, where A is the highest
35priority process, C is the lowest, and B is in between. A tries to grab a lock
36that C owns and must wait and lets C run to release the lock. But in the
37meantime, B executes, and since B is of a higher priority than C, it preempts C,
38but by doing so, it is in fact preempting A which is a higher priority process.
39Now there's no way of knowing how long A will be sleeping waiting for C
40to release the lock, because for all we know, B is a CPU hog and will
41never give C a chance to release the lock. This is called unbounded priority
42inversion.
43
44Here's a little ASCII art to show the problem.
45
46 grab lock L1 (owned by C)
47 |
48A ---+
49 C preempted by B
50 |
51C +----+
52
53B +-------->
54 B now keeps A from running.
55
56
57Priority Inheritance (PI)
58-------------------------
59
60There are several ways to solve this issue, but other ways are out of scope
61for this document. Here we only discuss PI.
62
63PI is where a process inherits the priority of another process if the other
64process blocks on a lock owned by the current process. To make this easier
65to understand, let's use the previous example, with processes A, B, and C again.
66
67This time, when A blocks on the lock owned by C, C would inherit the priority
68of A. So now if B becomes runnable, it would not preempt C, since C now has
69the high priority of A. As soon as C releases the lock, it loses its
70inherited priority, and A then can continue with the resource that C had.
71
72Terminology
73-----------
74
75Here I explain some terminology that is used in this document to help describe
76the design that is used to implement PI.
77
78PI chain - The PI chain is an ordered series of locks and processes that cause
79 processes to inherit priorities from a previous process that is
80 blocked on one of its locks. This is described in more detail
81 later in this document.
82
83mutex - In this document, to differentiate from locks that implement
84 PI and spin locks that are used in the PI code, from now on
85 the PI locks will be called a mutex.
86
87lock - In this document from now on, I will use the term lock when
88 referring to spin locks that are used to protect parts of the PI
89 algorithm. These locks disable preemption for UP (when
90 CONFIG_PREEMPT is enabled) and on SMP prevents multiple CPUs from
91 entering critical sections simultaneously.
92
93spin lock - Same as lock above.
94
95waiter - A waiter is a struct that is stored on the stack of a blocked
96 process. Since the scope of the waiter is within the code for
97 a process being blocked on the mutex, it is fine to allocate
98 the waiter on the process's stack (local variable). This
99 structure holds a pointer to the task, as well as the mutex that
100 the task is blocked on. It also has the plist node structures to
101 place the task in the waiter_list of a mutex as well as the
102 pi_list of a mutex owner task (described below).
103
104 waiter is sometimes used in reference to the task that is waiting
105 on a mutex. This is the same as waiter->task.
106
107waiters - A list of processes that are blocked on a mutex.
108
109top waiter - The highest priority process waiting on a specific mutex.
110
111top pi waiter - The highest priority process waiting on one of the mutexes
112 that a specific process owns.
113
114Note: task and process are used interchangeably in this document, mostly to
115 differentiate between two processes that are being described together.
116
117
118PI chain
119--------
120
121The PI chain is a list of processes and mutexes that may cause priority
122inheritance to take place. Multiple chains may converge, but a chain
123would never diverge, since a process can't be blocked on more than one
124mutex at a time.
125
126Example:
127
128 Process: A, B, C, D, E
129 Mutexes: L1, L2, L3, L4
130
131 A owns: L1
132 B blocked on L1
133 B owns L2
134 C blocked on L2
135 C owns L3
136 D blocked on L3
137 D owns L4
138 E blocked on L4
139
140The chain would be:
141
142 E->L4->D->L3->C->L2->B->L1->A
143
144To show where two chains merge, we could add another process F and
145another mutex L5 where B owns L5 and F is blocked on mutex L5.
146
147The chain for F would be:
148
149 F->L5->B->L1->A
150
151Since a process may own more than one mutex, but never be blocked on more than
152one, the chains merge.
153
154Here we show both chains:
155
156 E->L4->D->L3->C->L2-+
157 |
158 +->B->L1->A
159 |
160 F->L5-+
161
162For PI to work, the processes at the right end of these chains (or we may
163also call it the Top of the chain) must be equal to or higher in priority
164than the processes to the left or below in the chain.
165
166Also since a mutex may have more than one process blocked on it, we can
167have multiple chains merge at mutexes. If we add another process G that is
168blocked on mutex L2:
169
170 G->L2->B->L1->A
171
172And once again, to show how this can grow I will show the merging chains
173again.
174
175 E->L4->D->L3->C-+
176 +->L2-+
177 | |
178 G-+ +->B->L1->A
179 |
180 F->L5-+
181
182
183Plist
184-----
185
186Before I go further and talk about how the PI chain is stored through lists
187on both mutexes and processes, I'll explain the plist. This is similar to
188the struct list_head functionality that is already in the kernel.
189The implementation of plist is out of scope for this document, but it is
190very important to understand what it does.
191
192There are a few differences between plist and list, the most important one
193being that plist is a priority sorted linked list. This means that the
194priorities of the plist are sorted, such that it takes O(1) to retrieve the
195highest priority item in the list. Obviously this is useful to store processes
196based on their priorities.
197
198Another difference, which is important for implementation, is that, unlike
199list, the head of the list is a different element than the nodes of a list.
200So the head of the list is declared as struct plist_head and nodes that will
201be added to the list are declared as struct plist_node.
202
203
204Mutex Waiter List
205-----------------
206
207Every mutex keeps track of all the waiters that are blocked on itself. The mutex
208has a plist to store these waiters by priority. This list is protected by
209a spin lock that is located in the struct of the mutex. This lock is called
210wait_lock. Since the modification of the waiter list is never done in
211interrupt context, the wait_lock can be taken without disabling interrupts.
212
213
214Task PI List
215------------
216
217To keep track of the PI chains, each process has its own PI list. This is
218a list of all top waiters of the mutexes that are owned by the process.
219Note that this list only holds the top waiters and not all waiters that are
220blocked on mutexes owned by the process.
221
222The top of the task's PI list is always the highest priority task that
223is waiting on a mutex that is owned by the task. So if the task has
224inherited a priority, it will always be the priority of the task that is
225at the top of this list.
226
227This list is stored in the task structure of a process as a plist called
228pi_list. This list is protected by a spin lock also in the task structure,
229called pi_lock. This lock may also be taken in interrupt context, so when
230locking the pi_lock, interrupts must be disabled.
231
232
233Depth of the PI Chain
234---------------------
235
236The maximum depth of the PI chain is not dynamic, and could actually be
237defined. But is very complex to figure it out, since it depends on all
238the nesting of mutexes. Let's look at the example where we have 3 mutexes,
239L1, L2, and L3, and four separate functions func1, func2, func3 and func4.
240The following shows a locking order of L1->L2->L3, but may not actually
241be directly nested that way.
242
243void func1(void)
244{
245 mutex_lock(L1);
246
247 /* do anything */
248
249 mutex_unlock(L1);
250}
251
252void func2(void)
253{
254 mutex_lock(L1);
255 mutex_lock(L2);
256
257 /* do something */
258
259 mutex_unlock(L2);
260 mutex_unlock(L1);
261}
262
263void func3(void)
264{
265 mutex_lock(L2);
266 mutex_lock(L3);
267
268 /* do something else */
269
270 mutex_unlock(L3);
271 mutex_unlock(L2);
272}
273
274void func4(void)
275{
276 mutex_lock(L3);
277
278 /* do something again */
279
280 mutex_unlock(L3);
281}
282
283Now we add 4 processes that run each of these functions separately.
284Processes A, B, C, and D which run functions func1, func2, func3 and func4
285respectively, and such that D runs first and A last. With D being preempted
286in func4 in the "do something again" area, we have a locking that follows:
287
288D owns L3
289 C blocked on L3
290 C owns L2
291 B blocked on L2
292 B owns L1
293 A blocked on L1
294
295And thus we have the chain A->L1->B->L2->C->L3->D.
296
297This gives us a PI depth of 4 (four processes), but looking at any of the
298functions individually, it seems as though they only have at most a locking
299depth of two. So, although the locking depth is defined at compile time,
300it still is very difficult to find the possibilities of that depth.
301
302Now since mutexes can be defined by user-land applications, we don't want a DOS
303type of application that nests large amounts of mutexes to create a large
304PI chain, and have the code holding spin locks while looking at a large
305amount of data. So to prevent this, the implementation not only implements
306a maximum lock depth, but also only holds at most two different locks at a
307time, as it walks the PI chain. More about this below.
308
309
310Mutex owner and flags
311---------------------
312
313The mutex structure contains a pointer to the owner of the mutex. If the
314mutex is not owned, this owner is set to NULL. Since all architectures
315have the task structure on at least a four byte alignment (and if this is
316not true, the rtmutex.c code will be broken!), this allows for the two
317least significant bits to be used as flags. This part is also described
318in Documentation/rt-mutex.txt, but will also be briefly described here.
319
320Bit 0 is used as the "Pending Owner" flag. This is described later.
321Bit 1 is used as the "Has Waiters" flags. This is also described later
322 in more detail, but is set whenever there are waiters on a mutex.
323
324
325cmpxchg Tricks
326--------------
327
328Some architectures implement an atomic cmpxchg (Compare and Exchange). This
329is used (when applicable) to keep the fast path of grabbing and releasing
330mutexes short.
331
332cmpxchg is basically the following function performed atomically:
333
334unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C)
335{
336 unsigned long T = *A;
337 if (*A == *B) {
338 *A = *C;
339 }
340 return T;
341}
342#define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c)
343
344This is really nice to have, since it allows you to only update a variable
345if the variable is what you expect it to be. You know if it succeeded if
346the return value (the old value of A) is equal to B.
347
348The macro rt_mutex_cmpxchg is used to try to lock and unlock mutexes. If
349the architecture does not support CMPXCHG, then this macro is simply set
350to fail every time. But if CMPXCHG is supported, then this will
351help out extremely to keep the fast path short.
352
353The use of rt_mutex_cmpxchg with the flags in the owner field help optimize
354the system for architectures that support it. This will also be explained
355later in this document.
356
357
358Priority adjustments
359--------------------
360
361The implementation of the PI code in rtmutex.c has several places that a
362process must adjust its priority. With the help of the pi_list of a
363process this is rather easy to know what needs to be adjusted.
364
365The functions implementing the task adjustments are rt_mutex_adjust_prio,
366__rt_mutex_adjust_prio (same as the former, but expects the task pi_lock
367to already be taken), rt_mutex_get_prio, and rt_mutex_setprio.
368
369rt_mutex_getprio and rt_mutex_setprio are only used in __rt_mutex_adjust_prio.
370
371rt_mutex_getprio returns the priority that the task should have. Either the
372task's own normal priority, or if a process of a higher priority is waiting on
373a mutex owned by the task, then that higher priority should be returned.
374Since the pi_list of a task holds an order by priority list of all the top
375waiters of all the mutexes that the task owns, rt_mutex_getprio simply needs
376to compare the top pi waiter to its own normal priority, and return the higher
377priority back.
378
379(Note: if looking at the code, you will notice that the lower number of
380 prio is returned. This is because the prio field in the task structure
381 is an inverse order of the actual priority. So a "prio" of 5 is
382 of higher priority than a "prio" of 10.)
383
384__rt_mutex_adjust_prio examines the result of rt_mutex_getprio, and if the
385result does not equal the task's current priority, then rt_mutex_setprio
386is called to adjust the priority of the task to the new priority.
387Note that rt_mutex_setprio is defined in kernel/sched.c to implement the
388actual change in priority.
389
390It is interesting to note that __rt_mutex_adjust_prio can either increase
391or decrease the priority of the task. In the case that a higher priority
392process has just blocked on a mutex owned by the task, __rt_mutex_adjust_prio
393would increase/boost the task's priority. But if a higher priority task
394were for some reason to leave the mutex (timeout or signal), this same function
395would decrease/unboost the priority of the task. That is because the pi_list
396always contains the highest priority task that is waiting on a mutex owned
397by the task, so we only need to compare the priority of that top pi waiter
398to the normal priority of the given task.
399
400
401High level overview of the PI chain walk
402----------------------------------------
403
404The PI chain walk is implemented by the function rt_mutex_adjust_prio_chain.
405
406The implementation has gone through several iterations, and has ended up
407with what we believe is the best. It walks the PI chain by only grabbing
408at most two locks at a time, and is very efficient.
409
410The rt_mutex_adjust_prio_chain can be used either to boost or lower process
411priorities.
412
413rt_mutex_adjust_prio_chain is called with a task to be checked for PI
414(de)boosting (the owner of a mutex that a process is blocking on), a flag to
415check for deadlocking, the mutex that the task owns, and a pointer to a waiter
416that is the process's waiter struct that is blocked on the mutex (although this
417parameter may be NULL for deboosting).
418
419For this explanation, I will not mention deadlock detection. This explanation
420will try to stay at a high level.
421
422When this function is called, there are no locks held. That also means
423that the state of the owner and lock can change when entered into this function.
424
425Before this function is called, the task has already had rt_mutex_adjust_prio
426performed on it. This means that the task is set to the priority that it
427should be at, but the plist nodes of the task's waiter have not been updated
428with the new priorities, and that this task may not be in the proper locations
429in the pi_lists and wait_lists that the task is blocked on. This function
430solves all that.
431
432A loop is entered, where task is the owner to be checked for PI changes that
433was passed by parameter (for the first iteration). The pi_lock of this task is
434taken to prevent any more changes to the pi_list of the task. This also
435prevents new tasks from completing the blocking on a mutex that is owned by this
436task.
437
438If the task is not blocked on a mutex then the loop is exited. We are at
439the top of the PI chain.
440
441A check is now done to see if the original waiter (the process that is blocked
442on the current mutex) is the top pi waiter of the task. That is, is this
443waiter on the top of the task's pi_list. If it is not, it either means that
444there is another process higher in priority that is blocked on one of the
445mutexes that the task owns, or that the waiter has just woken up via a signal
446or timeout and has left the PI chain. In either case, the loop is exited, since
447we don't need to do any more changes to the priority of the current task, or any
448task that owns a mutex that this current task is waiting on. A priority chain
449walk is only needed when a new top pi waiter is made to a task.
450
451The next check sees if the task's waiter plist node has the priority equal to
452the priority the task is set at. If they are equal, then we are done with
453the loop. Remember that the function started with the priority of the
454task adjusted, but the plist nodes that hold the task in other processes
455pi_lists have not been adjusted.
456
457Next, we look at the mutex that the task is blocked on. The mutex's wait_lock
458is taken. This is done by a spin_trylock, because the locking order of the
459pi_lock and wait_lock goes in the opposite direction. If we fail to grab the
460lock, the pi_lock is released, and we restart the loop.
461
462Now that we have both the pi_lock of the task as well as the wait_lock of
463the mutex the task is blocked on, we update the task's waiter's plist node
464that is located on the mutex's wait_list.
465
466Now we release the pi_lock of the task.
467
468Next the owner of the mutex has its pi_lock taken, so we can update the
469task's entry in the owner's pi_list. If the task is the highest priority
470process on the mutex's wait_list, then we remove the previous top waiter
471from the owner's pi_list, and replace it with the task.
472
473Note: It is possible that the task was the current top waiter on the mutex,
474 in which case the task is not yet on the pi_list of the waiter. This
475 is OK, since plist_del does nothing if the plist node is not on any
476 list.
477
478If the task was not the top waiter of the mutex, but it was before we
479did the priority updates, that means we are deboosting/lowering the
480task. In this case, the task is removed from the pi_list of the owner,
481and the new top waiter is added.
482
483Lastly, we unlock both the pi_lock of the task, as well as the mutex's
484wait_lock, and continue the loop again. On the next iteration of the
485loop, the previous owner of the mutex will be the task that will be
486processed.
487
488Note: One might think that the owner of this mutex might have changed
489 since we just grab the mutex's wait_lock. And one could be right.
490 The important thing to remember is that the owner could not have
491 become the task that is being processed in the PI chain, since
492 we have taken that task's pi_lock at the beginning of the loop.
493 So as long as there is an owner of this mutex that is not the same
494 process as the tasked being worked on, we are OK.
495
496 Looking closely at the code, one might be confused. The check for the
497 end of the PI chain is when the task isn't blocked on anything or the
498 task's waiter structure "task" element is NULL. This check is
499 protected only by the task's pi_lock. But the code to unlock the mutex
500 sets the task's waiter structure "task" element to NULL with only
501 the protection of the mutex's wait_lock, which was not taken yet.
502 Isn't this a race condition if the task becomes the new owner?
503
504 The answer is No! The trick is the spin_trylock of the mutex's
505 wait_lock. If we fail that lock, we release the pi_lock of the
506 task and continue the loop, doing the end of PI chain check again.
507
508 In the code to release the lock, the wait_lock of the mutex is held
509 the entire time, and it is not let go when we grab the pi_lock of the
510 new owner of the mutex. So if the switch of a new owner were to happen
511 after the check for end of the PI chain and the grabbing of the
512 wait_lock, the unlocking code would spin on the new owner's pi_lock
513 but never give up the wait_lock. So the PI chain loop is guaranteed to
514 fail the spin_trylock on the wait_lock, release the pi_lock, and
515 try again.
516
517 If you don't quite understand the above, that's OK. You don't have to,
518 unless you really want to make a proof out of it ;)
519
520
521Pending Owners and Lock stealing
522--------------------------------
523
524One of the flags in the owner field of the mutex structure is "Pending Owner".
525What this means is that an owner was chosen by the process releasing the
526mutex, but that owner has yet to wake up and actually take the mutex.
527
528Why is this important? Why can't we just give the mutex to another process
529and be done with it?
530
531The PI code is to help with real-time processes, and to let the highest
532priority process run as long as possible with little latencies and delays.
533If a high priority process owns a mutex that a lower priority process is
534blocked on, when the mutex is released it would be given to the lower priority
535process. What if the higher priority process wants to take that mutex again.
536The high priority process would fail to take that mutex that it just gave up
537and it would need to boost the lower priority process to run with full
538latency of that critical section (since the low priority process just entered
539it).
540
541There's no reason a high priority process that gives up a mutex should be
542penalized if it tries to take that mutex again. If the new owner of the
543mutex has not woken up yet, there's no reason that the higher priority process
544could not take that mutex away.
545
546To solve this, we introduced Pending Ownership and Lock Stealing. When a
547new process is given a mutex that it was blocked on, it is only given
548pending ownership. This means that it's the new owner, unless a higher
549priority process comes in and tries to grab that mutex. If a higher priority
550process does come along and wants that mutex, we let the higher priority
551process "steal" the mutex from the pending owner (only if it is still pending)
552and continue with the mutex.
553
554
555Taking of a mutex (The walk through)
556------------------------------------
557
558OK, now let's take a look at the detailed walk through of what happens when
559taking a mutex.
560
561The first thing that is tried is the fast taking of the mutex. This is
562done when we have CMPXCHG enabled (otherwise the fast taking automatically
563fails). Only when the owner field of the mutex is NULL can the lock be
564taken with the CMPXCHG and nothing else needs to be done.
565
566If there is contention on the lock, whether it is owned or pending owner
567we go about the slow path (rt_mutex_slowlock).
568
569The slow path function is where the task's waiter structure is created on
570the stack. This is because the waiter structure is only needed for the
571scope of this function. The waiter structure holds the nodes to store
572the task on the wait_list of the mutex, and if need be, the pi_list of
573the owner.
574
575The wait_lock of the mutex is taken since the slow path of unlocking the
576mutex also takes this lock.
577
578We then call try_to_take_rt_mutex. This is where the architecture that
579does not implement CMPXCHG would always grab the lock (if there's no
580contention).
581
582try_to_take_rt_mutex is used every time the task tries to grab a mutex in the
583slow path. The first thing that is done here is an atomic setting of
584the "Has Waiters" flag of the mutex's owner field. Yes, this could really
585be false, because if the the mutex has no owner, there are no waiters and
586the current task also won't have any waiters. But we don't have the lock
587yet, so we assume we are going to be a waiter. The reason for this is to
588play nice for those architectures that do have CMPXCHG. By setting this flag
589now, the owner of the mutex can't release the mutex without going into the
590slow unlock path, and it would then need to grab the wait_lock, which this
591code currently holds. So setting the "Has Waiters" flag forces the owner
592to synchronize with this code.
593
594Now that we know that we can't have any races with the owner releasing the
595mutex, we check to see if we can take the ownership. This is done if the
596mutex doesn't have a owner, or if we can steal the mutex from a pending
597owner. Let's look at the situations we have here.
598
599 1) Has owner that is pending
600 ----------------------------
601
602 The mutex has a owner, but it hasn't woken up and the mutex flag
603 "Pending Owner" is set. The first check is to see if the owner isn't the
604 current task. This is because this function is also used for the pending
605 owner to grab the mutex. When a pending owner wakes up, it checks to see
606 if it can take the mutex, and this is done if the owner is already set to
607 itself. If so, we succeed and leave the function, clearing the "Pending
608 Owner" bit.
609
610 If the pending owner is not current, we check to see if the current priority is
611 higher than the pending owner. If not, we fail the function and return.
612
613 There's also something special about a pending owner. That is a pending owner
614 is never blocked on a mutex. So there is no PI chain to worry about. It also
615 means that if the mutex doesn't have any waiters, there's no accounting needed
616 to update the pending owner's pi_list, since we only worry about processes
617 blocked on the current mutex.
618
619 If there are waiters on this mutex, and we just stole the ownership, we need
620 to take the top waiter, remove it from the pi_list of the pending owner, and
621 add it to the current pi_list. Note that at this moment, the pending owner
622 is no longer on the list of waiters. This is fine, since the pending owner
623 would add itself back when it realizes that it had the ownership stolen
624 from itself. When the pending owner tries to grab the mutex, it will fail
625 in try_to_take_rt_mutex if the owner field points to another process.
626
627 2) No owner
628 -----------
629
630 If there is no owner (or we successfully stole the lock), we set the owner
631 of the mutex to current, and set the flag of "Has Waiters" if the current
632 mutex actually has waiters, or we clear the flag if it doesn't. See, it was
633 OK that we set that flag early, since now it is cleared.
634
635 3) Failed to grab ownership
636 ---------------------------
637
638 The most interesting case is when we fail to take ownership. This means that
639 there exists an owner, or there's a pending owner with equal or higher
640 priority than the current task.
641
642We'll continue on the failed case.
643
644If the mutex has a timeout, we set up a timer to go off to break us out
645of this mutex if we failed to get it after a specified amount of time.
646
647Now we enter a loop that will continue to try to take ownership of the mutex, or
648fail from a timeout or signal.
649
650Once again we try to take the mutex. This will usually fail the first time
651in the loop, since it had just failed to get the mutex. But the second time
652in the loop, this would likely succeed, since the task would likely be
653the pending owner.
654
655If the mutex is TASK_INTERRUPTIBLE a check for signals and timeout is done
656here.
657
658The waiter structure has a "task" field that points to the task that is blocked
659on the mutex. This field can be NULL the first time it goes through the loop
660or if the task is a pending owner and had it's mutex stolen. If the "task"
661field is NULL then we need to set up the accounting for it.
662
663Task blocks on mutex
664--------------------
665
666The accounting of a mutex and process is done with the waiter structure of
667the process. The "task" field is set to the process, and the "lock" field
668to the mutex. The plist nodes are initialized to the processes current
669priority.
670
671Since the wait_lock was taken at the entry of the slow lock, we can safely
672add the waiter to the wait_list. If the current process is the highest
673priority process currently waiting on this mutex, then we remove the
674previous top waiter process (if it exists) from the pi_list of the owner,
675and add the current process to that list. Since the pi_list of the owner
676has changed, we call rt_mutex_adjust_prio on the owner to see if the owner
677should adjust its priority accordingly.
678
679If the owner is also blocked on a lock, and had its pi_list changed
680(or deadlock checking is on), we unlock the wait_lock of the mutex and go ahead
681and run rt_mutex_adjust_prio_chain on the owner, as described earlier.
682
683Now all locks are released, and if the current process is still blocked on a
684mutex (waiter "task" field is not NULL), then we go to sleep (call schedule).
685
686Waking up in the loop
687---------------------
688
689The schedule can then wake up for a few reasons.
690 1) we were given pending ownership of the mutex.
691 2) we received a signal and was TASK_INTERRUPTIBLE
692 3) we had a timeout and was TASK_INTERRUPTIBLE
693
694In any of these cases, we continue the loop and once again try to grab the
695ownership of the mutex. If we succeed, we exit the loop, otherwise we continue
696and on signal and timeout, will exit the loop, or if we had the mutex stolen
697we just simply add ourselves back on the lists and go back to sleep.
698
699Note: For various reasons, because of timeout and signals, the steal mutex
700 algorithm needs to be careful. This is because the current process is
701 still on the wait_list. And because of dynamic changing of priorities,
702 especially on SCHED_OTHER tasks, the current process can be the
703 highest priority task on the wait_list.
704
705Failed to get mutex on Timeout or Signal
706----------------------------------------
707
708If a timeout or signal occurred, the waiter's "task" field would not be
709NULL and the task needs to be taken off the wait_list of the mutex and perhaps
710pi_list of the owner. If this process was a high priority process, then
711the rt_mutex_adjust_prio_chain needs to be executed again on the owner,
712but this time it will be lowering the priorities.
713
714
715Unlocking the Mutex
716-------------------
717
718The unlocking of a mutex also has a fast path for those architectures with
719CMPXCHG. Since the taking of a mutex on contention always sets the
720"Has Waiters" flag of the mutex's owner, we use this to know if we need to
721take the slow path when unlocking the mutex. If the mutex doesn't have any
722waiters, the owner field of the mutex would equal the current process and
723the mutex can be unlocked by just replacing the owner field with NULL.
724
725If the owner field has the "Has Waiters" bit set (or CMPXCHG is not available),
726the slow unlock path is taken.
727
728The first thing done in the slow unlock path is to take the wait_lock of the
729mutex. This synchronizes the locking and unlocking of the mutex.
730
731A check is made to see if the mutex has waiters or not. On architectures that
732do not have CMPXCHG, this is the location that the owner of the mutex will
733determine if a waiter needs to be awoken or not. On architectures that
734do have CMPXCHG, that check is done in the fast path, but it is still needed
735in the slow path too. If a waiter of a mutex woke up because of a signal
736or timeout between the time the owner failed the fast path CMPXCHG check and
737the grabbing of the wait_lock, the mutex may not have any waiters, thus the
738owner still needs to make this check. If there are no waiters than the mutex
739owner field is set to NULL, the wait_lock is released and nothing more is
740needed.
741
742If there are waiters, then we need to wake one up and give that waiter
743pending ownership.
744
745On the wake up code, the pi_lock of the current owner is taken. The top
746waiter of the lock is found and removed from the wait_list of the mutex
747as well as the pi_list of the current owner. The task field of the new
748pending owner's waiter structure is set to NULL, and the owner field of the
749mutex is set to the new owner with the "Pending Owner" bit set, as well
750as the "Has Waiters" bit if there still are other processes blocked on the
751mutex.
752
753The pi_lock of the previous owner is released, and the new pending owner's
754pi_lock is taken. Remember that this is the trick to prevent the race
755condition in rt_mutex_adjust_prio_chain from adding itself as a waiter
756on the mutex.
757
758We now clear the "pi_blocked_on" field of the new pending owner, and if
759the mutex still has waiters pending, we add the new top waiter to the pi_list
760of the pending owner.
761
762Finally we unlock the pi_lock of the pending owner and wake it up.
763
764
765Contact
766-------
767
768For updates on this document, please email Steven Rostedt <rostedt@goodmis.org>
769
770
771Credits
772-------
773
774Author: Steven Rostedt <rostedt@goodmis.org>
775
776Reviewers: Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and Randy Dunlap
777
778Updates
779-------
780
781This document was originally written for 2.6.17-rc3-mm1
diff --git a/Documentation/rt-mutex.txt b/Documentation/rt-mutex.txt
new file mode 100644
index 000000000000..243393d882ee
--- /dev/null
+++ b/Documentation/rt-mutex.txt
@@ -0,0 +1,79 @@
1RT-mutex subsystem with PI support
2----------------------------------
3
4RT-mutexes with priority inheritance are used to support PI-futexes,
5which enable pthread_mutex_t priority inheritance attributes
6(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details
7about PI-futexes.]
8
9This technology was developed in the -rt tree and streamlined for
10pthread_mutex support.
11
12Basic principles:
13-----------------
14
15RT-mutexes extend the semantics of simple mutexes by the priority
16inheritance protocol.
17
18A low priority owner of a rt-mutex inherits the priority of a higher
19priority waiter until the rt-mutex is released. If the temporarily
20boosted owner blocks on a rt-mutex itself it propagates the priority
21boosting to the owner of the other rt_mutex it gets blocked on. The
22priority boosting is immediately removed once the rt_mutex has been
23unlocked.
24
25This approach allows us to shorten the block of high-prio tasks on
26mutexes which protect shared resources. Priority inheritance is not a
27magic bullet for poorly designed applications, but it allows
28well-designed applications to use userspace locks in critical parts of
29an high priority thread, without losing determinism.
30
31The enqueueing of the waiters into the rtmutex waiter list is done in
32priority order. For same priorities FIFO order is chosen. For each
33rtmutex, only the top priority waiter is enqueued into the owner's
34priority waiters list. This list too queues in priority order. Whenever
35the top priority waiter of a task changes (for example it timed out or
36got a signal), the priority of the owner task is readjusted. [The
37priority enqueueing is handled by "plists", see include/linux/plist.h
38for more details.]
39
40RT-mutexes are optimized for fastpath operations and have no internal
41locking overhead when locking an uncontended mutex or unlocking a mutex
42without waiters. The optimized fastpath operations require cmpxchg
43support. [If that is not available then the rt-mutex internal spinlock
44is used]
45
46The state of the rt-mutex is tracked via the owner field of the rt-mutex
47structure:
48
49rt_mutex->owner holds the task_struct pointer of the owner. Bit 0 and 1
50are used to keep track of the "owner is pending" and "rtmutex has
51waiters" state.
52
53 owner bit1 bit0
54 NULL 0 0 mutex is free (fast acquire possible)
55 NULL 0 1 invalid state
56 NULL 1 0 Transitional state*
57 NULL 1 1 invalid state
58 taskpointer 0 0 mutex is held (fast release possible)
59 taskpointer 0 1 task is pending owner
60 taskpointer 1 0 mutex is held and has waiters
61 taskpointer 1 1 task is pending owner and mutex has waiters
62
63Pending-ownership handling is a performance optimization:
64pending-ownership is assigned to the first (highest priority) waiter of
65the mutex, when the mutex is released. The thread is woken up and once
66it starts executing it can acquire the mutex. Until the mutex is taken
67by it (bit 0 is cleared) a competing higher priority thread can "steal"
68the mutex which puts the woken up thread back on the waiters list.
69
70The pending-ownership optimization is especially important for the
71uninterrupted workflow of high-prio tasks which repeatedly
72takes/releases locks that have lower-prio waiters. Without this
73optimization the higher-prio thread would ping-pong to the lower-prio
74task [because at unlock time we always assign a new owner].
75
76(*) The "mutex has waiters" bit gets set to take the lock. If the lock
77doesn't already have an owner, this bit is quickly cleared if there are
78no waiters. So this is a transitional state to synchronize with looking
79at the owner field of the mutex and the mutex owner releasing the lock.
diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
index 95d17b3e2eee..2a58f985795a 100644
--- a/Documentation/rtc.txt
+++ b/Documentation/rtc.txt
@@ -44,8 +44,10 @@ normal timer interrupt, which is 100Hz.
44Programming and/or enabling interrupt frequencies greater than 64Hz is 44Programming and/or enabling interrupt frequencies greater than 64Hz is
45only allowed by root. This is perhaps a bit conservative, but we don't want 45only allowed by root. This is perhaps a bit conservative, but we don't want
46an evil user generating lots of IRQs on a slow 386sx-16, where it might have 46an evil user generating lots of IRQs on a slow 386sx-16, where it might have
47a negative impact on performance. Note that the interrupt handler is only 47a negative impact on performance. This 64Hz limit can be changed by writing
48a few lines of code to minimize any possibility of this effect. 48a different value to /proc/sys/dev/rtc/max-user-freq. Note that the
49interrupt handler is only a few lines of code to minimize any possibility
50of this effect.
49 51
50Also, if the kernel time is synchronized with an external source, the 52Also, if the kernel time is synchronized with an external source, the
51kernel will write the time back to the CMOS clock every 11 minutes. In 53kernel will write the time back to the CMOS clock every 11 minutes. In
@@ -81,6 +83,7 @@ that will be using this driver.
81 */ 83 */
82 84
83#include <stdio.h> 85#include <stdio.h>
86#include <stdlib.h>
84#include <linux/rtc.h> 87#include <linux/rtc.h>
85#include <sys/ioctl.h> 88#include <sys/ioctl.h>
86#include <sys/time.h> 89#include <sys/time.h>
diff --git a/Documentation/scsi/00-INDEX b/Documentation/scsi/00-INDEX
index e7da8c3a255b..12354830c6b0 100644
--- a/Documentation/scsi/00-INDEX
+++ b/Documentation/scsi/00-INDEX
@@ -30,8 +30,6 @@ aic7xxx.txt
30 - info on driver for Adaptec controllers 30 - info on driver for Adaptec controllers
31aic7xxx_old.txt 31aic7xxx_old.txt
32 - info on driver for Adaptec controllers, old generation 32 - info on driver for Adaptec controllers, old generation
33cpqfc.txt
34 - info on driver for Compaq Tachyon TS adapters
35dpti.txt 33dpti.txt
36 - info on driver for DPT SmartRAID and Adaptec I2O RAID based adapters 34 - info on driver for DPT SmartRAID and Adaptec I2O RAID based adapters
37dtc3x80.txt 35dtc3x80.txt
diff --git a/Documentation/scsi/ChangeLog.megaraid_sas b/Documentation/scsi/ChangeLog.megaraid_sas
index 2dafa63bd370..d9e5960dafd5 100644
--- a/Documentation/scsi/ChangeLog.megaraid_sas
+++ b/Documentation/scsi/ChangeLog.megaraid_sas
@@ -1,3 +1,32 @@
1
21 Release Date : Sun May 14 22:49:52 PDT 2006 - Sumant Patro <Sumant.Patro@lsil.com>
32 Current Version : 00.00.03.01
43 Older Version : 00.00.02.04
5
6i. Added support for ZCR controller.
7
8 New device id 0x413 added.
9
10ii. Bug fix : Disable controller interrupt before firing INIT cmd to FW.
11
12 Interrupt is enabled after required initialization is over.
13 This is done to ensure that driver is ready to handle interrupts when
14 it is generated by the controller.
15
16 -Sumant Patro <Sumant.Patro@lsil.com>
17
181 Release Date : Wed Feb 03 14:31:44 PST 2006 - Sumant Patro <Sumant.Patro@lsil.com>
192 Current Version : 00.00.02.04
203 Older Version : 00.00.02.04
21
22i. Remove superflous instance_lock
23
24 gets rid of the otherwise superflous instance_lock and avoids an unsave
25 unsynchronized access in the error handler.
26
27 - Christoph Hellwig <hch@lst.de>
28
29
11 Release Date : Wed Feb 03 14:31:44 PST 2006 - Sumant Patro <Sumant.Patro@lsil.com> 301 Release Date : Wed Feb 03 14:31:44 PST 2006 - Sumant Patro <Sumant.Patro@lsil.com>
22 Current Version : 00.00.02.04 312 Current Version : 00.00.02.04
33 Older Version : 00.00.02.04 323 Older Version : 00.00.02.04
diff --git a/Documentation/scsi/aacraid.txt b/Documentation/scsi/aacraid.txt
index 820fd0793502..be55670851a4 100644
--- a/Documentation/scsi/aacraid.txt
+++ b/Documentation/scsi/aacraid.txt
@@ -24,10 +24,10 @@ Supported Cards/Chipsets
24 9005:0285:9005:0296 Adaptec 2240S (SabreExpress) 24 9005:0285:9005:0296 Adaptec 2240S (SabreExpress)
25 9005:0285:9005:0290 Adaptec 2410SA (Jaguar) 25 9005:0285:9005:0290 Adaptec 2410SA (Jaguar)
26 9005:0285:9005:0293 Adaptec 21610SA (Corsair-16) 26 9005:0285:9005:0293 Adaptec 21610SA (Corsair-16)
27 9005:0285:103c:3227 Adaptec 2610SA (Bearcat) 27 9005:0285:103c:3227 Adaptec 2610SA (Bearcat HP release)
28 9005:0285:9005:0292 Adaptec 2810SA (Corsair-8) 28 9005:0285:9005:0292 Adaptec 2810SA (Corsair-8)
29 9005:0285:9005:0294 Adaptec Prowler 29 9005:0285:9005:0294 Adaptec Prowler
30 9005:0286:9005:029d Adaptec 2420SA (Intruder) 30 9005:0286:9005:029d Adaptec 2420SA (Intruder HP release)
31 9005:0286:9005:029c Adaptec 2620SA (Intruder) 31 9005:0286:9005:029c Adaptec 2620SA (Intruder)
32 9005:0286:9005:029b Adaptec 2820SA (Intruder) 32 9005:0286:9005:029b Adaptec 2820SA (Intruder)
33 9005:0286:9005:02a7 Adaptec 2830SA (Skyray) 33 9005:0286:9005:02a7 Adaptec 2830SA (Skyray)
@@ -38,7 +38,7 @@ Supported Cards/Chipsets
38 9005:0285:9005:0297 Adaptec 4005SAS (AvonPark) 38 9005:0285:9005:0297 Adaptec 4005SAS (AvonPark)
39 9005:0285:9005:0299 Adaptec 4800SAS (Marauder-X) 39 9005:0285:9005:0299 Adaptec 4800SAS (Marauder-X)
40 9005:0285:9005:029a Adaptec 4805SAS (Marauder-E) 40 9005:0285:9005:029a Adaptec 4805SAS (Marauder-E)
41 9005:0286:9005:02a2 Adaptec 4810SAS (Hurricane) 41 9005:0286:9005:02a2 Adaptec 3800SAS (Hurricane44)
42 1011:0046:9005:0364 Adaptec 5400S (Mustang) 42 1011:0046:9005:0364 Adaptec 5400S (Mustang)
43 1011:0046:9005:0365 Adaptec 5400S (Mustang) 43 1011:0046:9005:0365 Adaptec 5400S (Mustang)
44 9005:0283:9005:0283 Adaptec Catapult (3210S with arc firmware) 44 9005:0283:9005:0283 Adaptec Catapult (3210S with arc firmware)
@@ -72,7 +72,7 @@ Supported Cards/Chipsets
72 9005:0286:9005:02a1 ICP ICP9087MA (Lancer) 72 9005:0286:9005:02a1 ICP ICP9087MA (Lancer)
73 9005:0286:9005:02a4 ICP ICP9085LI (Marauder-X) 73 9005:0286:9005:02a4 ICP ICP9085LI (Marauder-X)
74 9005:0286:9005:02a5 ICP ICP5085BR (Marauder-E) 74 9005:0286:9005:02a5 ICP ICP5085BR (Marauder-E)
75 9005:0286:9005:02a3 ICP ICP5085AU (Hurricane) 75 9005:0286:9005:02a3 ICP ICP5445AU (Hurricane44)
76 9005:0286:9005:02a6 ICP ICP9067MA (Intruder-6) 76 9005:0286:9005:02a6 ICP ICP9067MA (Intruder-6)
77 9005:0286:9005:02a9 ICP ICP5087AU (Skyray) 77 9005:0286:9005:02a9 ICP ICP5087AU (Skyray)
78 9005:0286:9005:02aa ICP ICP5047AU (Skyray) 78 9005:0286:9005:02aa ICP ICP5047AU (Skyray)
diff --git a/Documentation/scsi/cpqfc.txt b/Documentation/scsi/cpqfc.txt
deleted file mode 100644
index dd33e61c0645..000000000000
--- a/Documentation/scsi/cpqfc.txt
+++ /dev/null
@@ -1,272 +0,0 @@
1Notes for CPQFCTS driver for Compaq Tachyon TS
2Fibre Channel Host Bus Adapter, PCI 64-bit, 66MHz
3for Linux (RH 6.1, 6.2 kernel 2.2.12-32, 2.2.14-5)
4SMP tested
5Tested in single and dual HBA configuration, 32 and 64bit busses,
633 and 66MHz. Only supports FC-AL.
7SEST size 512 Exchanges (simultaneous I/Os) limited by module kmalloc()
8 max of 128k bytes contiguous.
9
10Ver 2.5.4 Oct 03, 2002
11 * fixed memcpy of sense buffer in ioctl to copy the smaller defined size
12Ver 2.5.3 Aug 01, 2002
13 * fix the passthru ioctl to handle the Scsi_Cmnd->request being a pointer
14Ver 2.5.1 Jul 30, 2002
15 * fix ioctl to pay attention to the specified LUN.
16Ver 2.5.0 Nov 29, 2001
17 * eliminated io_request_lock. This change makes the driver specific
18 to the 2.5.x kernels.
19 * silenced excessively noisy printks.
20
21Ver 2.1.2 July 23, 2002
22 * initialize DumCmnd->lun in cpqfcTS_ioctl (used in fcFindLoggedInPorts as LUN index)
23
24Ver 2.1.1 Oct 18, 2001
25 * reinitialize Cmnd->SCp.sent_command (used to identify commands as
26 passthrus) on calling scsi_done, since the scsi mid layer does not
27 use (or reinitialize) this field to prevent subsequent comands from
28 having it set incorrectly.
29
30Ver 2.1.0 Aug 27, 2001
31 * Revise driver to use new kernel 2.4.x PCI DMA API, instead of
32 virt_to_bus(). (enables driver to work w/ ia64 systems with >2Gb RAM.)
33 Rework main scatter-gather code to handle cases where SG element
34 lengths are larger than 0x7FFFF bytes and use as many scatter
35 gather pages as necessary. (Steve Cameron)
36 * Makefile changes to bring cpqfc into line w/ rest of SCSI drivers
37 (thanks to Keith Owens)
38
39Ver 2.0.5 Aug 06, 2001
40 * Reject non-existent luns in the driver rather than letting the
41 hardware do it. (some HW behaves differently than others in this area.)
42 * Changed Makefile to rely on "make dep" instead of explicit dependencies
43 * ifdef'ed out fibre channel analyzer triggering debug code
44 * fixed a jiffies wrapping issue
45
46Ver 2.0.4 Aug 01, 2001
47 * Incorporated fix for target device reset from Steeleye
48 * Fixed passthrough ioctl so it doesn't hang.
49 * Fixed hang in launch_FCworker_thread() that occurred on some machines.
50 * Avoid problem when number of volumes in a single cabinet > 8
51
52Ver 2.0.2 July 23, 2001
53 Changed the semiphore changes so the driver would compile in 2.4.7.
54 This version is for 2.4.7 and beyond.
55
56Ver 2.0.1 May 7, 2001
57 Merged version 1.3.6 fixes into version 2.0.0.
58
59Ver 2.0.0 May 7, 2001
60 Fixed problem so spinlock is being initialized to UNLOCKED.
61 Fixed updated driver so it compiles in the 2.4 tree.
62
63 Ver 1.3.6 Feb 27, 2001
64 Added Target_Device_Reset function for SCSI error handling
65 Fixed problem with not reseting addressing mode after implicit logout
66
67
68Ver 1.3.4 Sep 7, 2000
69 Added Modinfo information
70 Fixed problem with statically linking the driver
71
72Ver 1.3.3, Aug 23, 2000
73 Fixed device/function number in ioctl
74
75Ver 1.3.2, July 27, 2000
76 Add include for Alpha compile on 2.2.14 kernel (cpq*i2c.c)
77 Change logic for different FCP-RSP sense_buffer location for HSG80 target
78 And search for Agilent Tachyon XL2 HBAs (not finished! - in test)
79
80Tested with
81(storage):
82 Compaq RA-4x000, RAID firmware ver 2.40 - 2.54
83 Seagate FC drives model ST39102FC, rev 0006
84 Hitachi DK31CJ-72FC rev J8A8
85 IBM DDYF-T18350R rev F60K
86 Compaq FC-SCSI bridge w/ DLT 35/70 Gb DLT (tape)
87(servers):
88 Compaq PL-1850R
89 Compaq PL-6500 Xeon (400MHz)
90 Compaq PL-8500 (500MHz, 66MHz, 64bit PCI)
91 Compaq Alpha DS20 (RH 6.1)
92(hubs):
93 Vixel Rapport 1000 (7-port "dumb")
94 Gadzoox Gibralter (12-port "dumb")
95 Gadzoox Capellix 2000, 3000
96(switches):
97 Brocade 2010, 2400, 2800, rev 2.0.3a (& later)
98 Gadzoox 3210 (Fabric blade beta)
99 Vixel 7100 (Fabric beta firmare - known hot plug issues)
100using "qa_test" (esp. io_test script) suite modified from Unix tests.
101
102Installation:
103make menuconfig
104 (select SCSI low-level, Compaq FC HBA)
105make modules
106make modules_install
107
108e.g. insmod -f cpqfc
109
110Due to Fabric/switch delays, driver requires 4 seconds
111to initialize. If adapters are found, there will be a entries at
112/proc/scsi/cpqfcTS/*
113
114sample contents of startup messages
115
116*************************
117 scsi_register allocating 3596 bytes for CPQFCHBA
118 ioremap'd Membase: c887e600
119 HBA Tachyon RevId 1.2
120Allocating 119808 for 576 Exchanges @ c0dc0000
121Allocating 112904 for LinkQ @ c0c20000 (576 elements)
122Allocating 110600 for TachSEST for 512 Exchanges
123 cpqfcTS: writing IMQ BASE 7C0000h PI 7C4000h
124 cpqfcTS: SEST c0e40000(virt): Wrote base E40000h @ c887e740
125cpqfcTS: New FC port 0000E8h WWN: 500507650642499D SCSI Chan/Trgt 0/0
126cpqfcTS: New FC port 0000EFh WWN: 50000E100000D5A6 SCSI Chan/Trgt 0/1
127cpqfcTS: New FC port 0000E4h WWN: 21000020370097BB SCSI Chan/Trgt 0/2
128cpqfcTS: New FC port 0000E2h WWN: 2100002037009946 SCSI Chan/Trgt 0/3
129cpqfcTS: New FC port 0000E1h WWN: 21000020370098FE SCSI Chan/Trgt 0/4
130cpqfcTS: New FC port 0000E0h WWN: 21000020370097B2 SCSI Chan/Trgt 0/5
131cpqfcTS: New FC port 0000DCh WWN: 2100002037006CC1 SCSI Chan/Trgt 0/6
132cpqfcTS: New FC port 0000DAh WWN: 21000020370059F6 SCSI Chan/Trgt 0/7
133cpqfcTS: New FC port 00000Fh WWN: 500805F1FADB0E20 SCSI Chan/Trgt 0/8
134cpqfcTS: New FC port 000008h WWN: 500805F1FADB0EBA SCSI Chan/Trgt 0/9
135cpqfcTS: New FC port 000004h WWN: 500805F1FADB1EB9 SCSI Chan/Trgt 0/10
136cpqfcTS: New FC port 000002h WWN: 500805F1FADB1ADE SCSI Chan/Trgt 0/11
137cpqfcTS: New FC port 000001h WWN: 500805F1FADBA2CA SCSI Chan/Trgt 0/12
138scsi4 : Compaq FibreChannel HBA Tachyon TS HPFC-5166A/1.2: WWN 500508B200193F50
139 on PCI bus 0 device 0xa0fc irq 5 IObaseL 0x3400, MEMBASE 0xc6ef8600
140PCI bus width 32 bits, bus speed 33 MHz
141FCP-SCSI Driver v1.3.0
142GBIC detected: Short-wave. LPSM 0h Monitor
143scsi : 5 hosts.
144 Vendor: IBM Model: DDYF-T18350R Rev: F60K
145 Type: Direct-Access ANSI SCSI revision: 03
146Detected scsi disk sdb at scsi4, channel 0, id 0, lun 0
147 Vendor: HITACHI Model: DK31CJ-72FC Rev: J8A8
148 Type: Direct-Access ANSI SCSI revision: 02
149Detected scsi disk sdc at scsi4, channel 0, id 1, lun 0
150 Vendor: SEAGATE Model: ST39102FC Rev: 0006
151 Type: Direct-Access ANSI SCSI revision: 02
152Detected scsi disk sdd at scsi4, channel 0, id 2, lun 0
153 Vendor: SEAGATE Model: ST39102FC Rev: 0006
154 Type: Direct-Access ANSI SCSI revision: 02
155Detected scsi disk sde at scsi4, channel 0, id 3, lun 0
156 Vendor: SEAGATE Model: ST39102FC Rev: 0006
157 Type: Direct-Access ANSI SCSI revision: 02
158Detected scsi disk sdf at scsi4, channel 0, id 4, lun 0
159 Vendor: SEAGATE Model: ST39102FC Rev: 0006
160 Type: Direct-Access ANSI SCSI revision: 02
161Detected scsi disk sdg at scsi4, channel 0, id 5, lun 0
162 Vendor: SEAGATE Model: ST39102FC Rev: 0006
163 Type: Direct-Access ANSI SCSI revision: 02
164Detected scsi disk sdh at scsi4, channel 0, id 6, lun 0
165 Vendor: SEAGATE Model: ST39102FC Rev: 0006
166 Type: Direct-Access ANSI SCSI revision: 02
167Detected scsi disk sdi at scsi4, channel 0, id 7, lun 0
168 Vendor: COMPAQ Model: LOGICAL VOLUME Rev: 2.48
169 Type: Direct-Access ANSI SCSI revision: 02
170Detected scsi disk sdj at scsi4, channel 0, id 8, lun 0
171 Vendor: COMPAQ Model: LOGICAL VOLUME Rev: 2.48
172 Type: Direct-Access ANSI SCSI revision: 02
173Detected scsi disk sdk at scsi4, channel 0, id 8, lun 1
174 Vendor: COMPAQ Model: LOGICAL VOLUME Rev: 2.40
175 Type: Direct-Access ANSI SCSI revision: 02
176Detected scsi disk sdl at scsi4, channel 0, id 9, lun 0
177 Vendor: COMPAQ Model: LOGICAL VOLUME Rev: 2.40
178 Type: Direct-Access ANSI SCSI revision: 02
179Detected scsi disk sdm at scsi4, channel 0, id 9, lun 1
180 Vendor: COMPAQ Model: LOGICAL VOLUME Rev: 2.54
181 Type: Direct-Access ANSI SCSI revision: 02
182Detected scsi disk sdn at scsi4, channel 0, id 10, lun 0
183 Vendor: COMPAQ Model: LOGICAL VOLUME Rev: 2.54
184 Type: Direct-Access ANSI SCSI revision: 02
185Detected scsi disk sdo at scsi4, channel 0, id 11, lun 0
186 Vendor: COMPAQ Model: LOGICAL VOLUME Rev: 2.54
187 Type: Direct-Access ANSI SCSI revision: 02
188Detected scsi disk sdp at scsi4, channel 0, id 11, lun 1
189 Vendor: COMPAQ Model: LOGICAL VOLUME Rev: 2.54
190 Type: Direct-Access ANSI SCSI revision: 02
191Detected scsi disk sdq at scsi4, channel 0, id 12, lun 0
192 Vendor: COMPAQ Model: LOGICAL VOLUME Rev: 2.54
193 Type: Direct-Access ANSI SCSI revision: 02
194Detected scsi disk sdr at scsi4, channel 0, id 12, lun 1
195resize_dma_pool: unknown device type 12
196resize_dma_pool: unknown device type 12
197SCSI device sdb: hdwr sector= 512 bytes. Sectors= 35843670 [17501 MB] [17.5 GB]
198 sdb: sdb1
199SCSI device sdc: hdwr sector= 512 bytes. Sectors= 144410880 [70513 MB] [70.5 GB]
200 sdc: sdc1
201SCSI device sdd: hdwr sector= 512 bytes. Sectors= 17783240 [8683 MB] [8.7 GB]
202 sdd: sdd1
203SCSI device sde: hdwr sector= 512 bytes. Sectors= 17783240 [8683 MB] [8.7 GB]
204 sde: sde1
205SCSI device sdf: hdwr sector= 512 bytes. Sectors= 17783240 [8683 MB] [8.7 GB]
206 sdf: sdf1
207SCSI device sdg: hdwr sector= 512 bytes. Sectors= 17783240 [8683 MB] [8.7 GB]
208 sdg: sdg1
209SCSI device sdh: hdwr sector= 512 bytes. Sectors= 17783240 [8683 MB] [8.7 GB]
210 sdh: sdh1
211SCSI device sdi: hdwr sector= 512 bytes. Sectors= 17783240 [8683 MB] [8.7 GB]
212 sdi: sdi1
213SCSI device sdj: hdwr sector= 512 bytes. Sectors= 2056160 [1003 MB] [1.0 GB]
214 sdj: sdj1
215SCSI device sdk: hdwr sector= 512 bytes. Sectors= 2052736 [1002 MB] [1.0 GB]
216 sdk: sdk1
217SCSI device sdl: hdwr sector= 512 bytes. Sectors= 17764320 [8673 MB] [8.7 GB]
218 sdl: sdl1
219SCSI device sdm: hdwr sector= 512 bytes. Sectors= 8380320 [4091 MB] [4.1 GB]
220 sdm: sdm1
221SCSI device sdn: hdwr sector= 512 bytes. Sectors= 17764320 [8673 MB] [8.7 GB]
222 sdn: sdn1
223SCSI device sdo: hdwr sector= 512 bytes. Sectors= 17764320 [8673 MB] [8.7 GB]
224 sdo: sdo1
225SCSI device sdp: hdwr sector= 512 bytes. Sectors= 17764320 [8673 MB] [8.7 GB]
226 sdp: sdp1
227SCSI device sdq: hdwr sector= 512 bytes. Sectors= 2056160 [1003 MB] [1.0 GB]
228 sdq: sdq1
229SCSI device sdr: hdwr sector= 512 bytes. Sectors= 2052736 [1002 MB] [1.0 GB]
230 sdr: sdr1
231
232*************************
233
234If a GBIC of type Short-wave, Long-wave, or Copper is detected, it will
235print out; otherwise, "none" is displayed. If the cabling is correct
236and a loop circuit is completed, you should see "Monitor"; otherwise,
237"LoopFail" (on open circuit) or some LPSM number/state with bit 3 set.
238
239
240ERRATA:
2411. Normally, Linux Scsi queries FC devices with INQUIRY strings. All LUNs
242found according to INQUIRY should get READ commands at sector 0 to find
243partition table, etc. Older kernels only query the first 4 devices. Some
244Linux kernels only look for one LUN per target (i.e. FC device).
245
2462. Physically removing a device, or a malfunctioning system which hides a
247device, leads to a 30-second timeout and subsequent _abort call.
248In some process contexts, this will hang the kernel (crashing the system).
249Single bit errors in frames and virtually all hot plugging events are
250gracefully handled with internal driver timer and Abort processing.
251
2523. Some SCSI drives with error conditions will not handle the 7 second timeout
253in this software driver, leading to infinite retries on timed out SCSI commands.
254The 7 secs balances the need to quickly recover from lost frames (esp. on sequence
255initiatives) and time needed by older/slower/error-state drives in responding.
256This can be easily changed in "Exchanges[].timeOut".
257
2584. Due to the nature of FC soft addressing, there is no assurance that the
259same LUNs (drives) will have the same path (e.g. /dev/sdb1) from one boot to
260next. Dynamic soft address changes (i.e. 24-bit FC port_id) are
261supported during run time (e.g. due to hot plug event) by the use of WWN to
262SCSI Nexus (channel/target/LUN) mapping.
263
2645. Compaq RA4x00 firmware version 2.54 and later supports SSP (Selective
265Storage Presentation), which maps LUNs to a WWN. If RA4x00 firmware prior
2662.54 (e.g. older controller) is used, or the FC HBA is replaced (another WWN
267is used), logical volumes on the RA4x00 will no longer be visible.
268
269
270Send questions/comments to:
271Amy Vanzant-Hodge (fibrechannel@compaq.com)
272
diff --git a/Documentation/scsi/hptiop.txt b/Documentation/scsi/hptiop.txt
new file mode 100644
index 000000000000..d28a31247d4c
--- /dev/null
+++ b/Documentation/scsi/hptiop.txt
@@ -0,0 +1,92 @@
1HIGHPOINT ROCKETRAID 3xxx RAID DRIVER (hptiop)
2
3Controller Register Map
4-------------------------
5
6The controller IOP is accessed via PCI BAR0.
7
8 BAR0 offset Register
9 0x10 Inbound Message Register 0
10 0x14 Inbound Message Register 1
11 0x18 Outbound Message Register 0
12 0x1C Outbound Message Register 1
13 0x20 Inbound Doorbell Register
14 0x24 Inbound Interrupt Status Register
15 0x28 Inbound Interrupt Mask Register
16 0x30 Outbound Interrupt Status Register
17 0x34 Outbound Interrupt Mask Register
18 0x40 Inbound Queue Port
19 0x44 Outbound Queue Port
20
21
22I/O Request Workflow
23----------------------
24
25All queued requests are handled via inbound/outbound queue port.
26A request packet can be allocated in either IOP or host memory.
27
28To send a request to the controller:
29
30 - Get a free request packet by reading the inbound queue port or
31 allocate a free request in host DMA coherent memory.
32
33 The value returned from the inbound queue port is an offset
34 relative to the IOP BAR0.
35
36 Requests allocated in host memory must be aligned on 32-bytes boundary.
37
38 - Fill the packet.
39
40 - Post the packet to IOP by writing it to inbound queue. For requests
41 allocated in IOP memory, write the offset to inbound queue port. For
42 requests allocated in host memory, write (0x80000000|(bus_addr>>5))
43 to the inbound queue port.
44
45 - The IOP process the request. When the request is completed, it
46 will be put into outbound queue. An outbound interrupt will be
47 generated.
48
49 For requests allocated in IOP memory, the request offset is posted to
50 outbound queue.
51
52 For requests allocated in host memory, (0x80000000|(bus_addr>>5))
53 is posted to the outbound queue. If IOP_REQUEST_FLAG_OUTPUT_CONTEXT
54 flag is set in the request, the low 32-bit context value will be
55 posted instead.
56
57 - The host read the outbound queue and complete the request.
58
59 For requests allocated in IOP memory, the host driver free the request
60 by writing it to the outbound queue.
61
62Non-queued requests (reset/flush etc) can be sent via inbound message
63register 0. An outbound message with the same value indicates the completion
64of an inbound message.
65
66
67User-level Interface
68---------------------
69
70The driver exposes following sysfs attributes:
71
72 NAME R/W Description
73 driver-version R driver version string
74 firmware-version R firmware version string
75
76The driver registers char device "hptiop" to communicate with HighPoint RAID
77management software. Its ioctl routine acts as a general binary interface
78between the IOP firmware and HighPoint RAID management software. New management
79functions can be implemented in application/firmware without modification
80in driver code.
81
82
83-----------------------------------------------------------------------------
84Copyright (C) 2006 HighPoint Technologies, Inc. All Rights Reserved.
85
86 This file is distributed in the hope that it will be useful,
87 but WITHOUT ANY WARRANTY; without even the implied warranty of
88 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
89 GNU General Public License for more details.
90
91 linux@highpoint-tech.com
92 http://www.highpoint-tech.com
diff --git a/Documentation/scsi/ppa.txt b/Documentation/scsi/ppa.txt
index 0dac88d86d87..5d9223bc1bd5 100644
--- a/Documentation/scsi/ppa.txt
+++ b/Documentation/scsi/ppa.txt
@@ -12,5 +12,3 @@ http://www.torque.net/parport/
12Email list for Linux Parport 12Email list for Linux Parport
13linux-parport@torque.net 13linux-parport@torque.net
14 14
15Email for problems with ZIP or ZIP Plus drivers
16campbell@torque.net
diff --git a/Documentation/scsi/tmscsim.txt b/Documentation/scsi/tmscsim.txt
index e165229adf50..df7a02bfb5bf 100644
--- a/Documentation/scsi/tmscsim.txt
+++ b/Documentation/scsi/tmscsim.txt
@@ -109,7 +109,7 @@ than the 33.33 MHz being in the PCI spec.
109 109
110If you want to share the IRQ with another device and the driver refuses to 110If you want to share the IRQ with another device and the driver refuses to
111do so, you might succeed with changing the DC390_IRQ type in tmscsim.c to 111do so, you might succeed with changing the DC390_IRQ type in tmscsim.c to
112SA_SHIRQ | SA_INTERRUPT. 112IRQF_SHARED | IRQF_DISABLED.
113 113
114 114
1153.Features 1153.Features
diff --git a/Documentation/serial/driver b/Documentation/serial/driver
index df82116a9f26..88ad615dd338 100644
--- a/Documentation/serial/driver
+++ b/Documentation/serial/driver
@@ -214,12 +214,13 @@ hardware.
214 The interaction of the iflag bits is as follows (parity error 214 The interaction of the iflag bits is as follows (parity error
215 given as an example): 215 given as an example):
216 Parity error INPCK IGNPAR 216 Parity error INPCK IGNPAR
217 None n/a n/a character received 217 n/a 0 n/a character received, marked as
218 Yes n/a 0 character discarded
219 Yes 0 1 character received, marked as
220 TTY_NORMAL 218 TTY_NORMAL
221 Yes 1 1 character received, marked as 219 None 1 n/a character received, marked as
220 TTY_NORMAL
221 Yes 1 0 character received, marked as
222 TTY_PARITY 222 TTY_PARITY
223 Yes 1 1 character discarded
223 224
224 Other flags may be used (eg, xon/xoff characters) if your 225 Other flags may be used (eg, xon/xoff characters) if your
225 hardware supports hardware "soft" flow control. 226 hardware supports hardware "soft" flow control.
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 0ee2c7dfc482..f61af23dd85d 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -366,7 +366,9 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
366 366
367 Module for C-Media CMI8338 and 8738 PCI sound cards. 367 Module for C-Media CMI8338 and 8738 PCI sound cards.
368 368
369 mpu_port - 0x300,0x310,0x320,0x330, 0 = disable (default) 369 mpu_port - 0x300,0x310,0x320,0x330 = legacy port,
370 1 = integrated PCI port,
371 0 = disable (default)
370 fm_port - 0x388 (default), 0 = disable (default) 372 fm_port - 0x388 (default), 0 = disable (default)
371 soft_ac3 - Software-conversion of raw SPDIF packets (model 033 only) 373 soft_ac3 - Software-conversion of raw SPDIF packets (model 033 only)
372 (default = 1) 374 (default = 1)
@@ -468,7 +470,23 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
468 470
469 Module for multifunction CS5535 companion PCI device 471 Module for multifunction CS5535 companion PCI device
470 472
473 The power-management is supported.
474
475 Module snd-darla20
476 ------------------
477
478 Module for Echoaudio Darla20
479
480 This module supports multiple cards.
481 The driver requires the firmware loader support on kernel.
482
483 Module snd-darla24
484 ------------------
485
486 Module for Echoaudio Darla24
487
471 This module supports multiple cards. 488 This module supports multiple cards.
489 The driver requires the firmware loader support on kernel.
472 490
473 Module snd-dt019x 491 Module snd-dt019x
474 ----------------- 492 -----------------
@@ -497,6 +515,14 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
497 515
498 The power-management is supported. 516 The power-management is supported.
499 517
518 Module snd-echo3g
519 -----------------
520
521 Module for Echoaudio 3G cards (Gina3G/Layla3G)
522
523 This module supports multiple cards.
524 The driver requires the firmware loader support on kernel.
525
500 Module snd-emu10k1 526 Module snd-emu10k1
501 ------------------ 527 ------------------
502 528
@@ -655,6 +681,22 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
655 681
656 The power-management is supported. 682 The power-management is supported.
657 683
684 Module snd-gina20
685 -----------------
686
687 Module for Echoaudio Gina20
688
689 This module supports multiple cards.
690 The driver requires the firmware loader support on kernel.
691
692 Module snd-gina24
693 -----------------
694
695 Module for Echoaudio Gina24
696
697 This module supports multiple cards.
698 The driver requires the firmware loader support on kernel.
699
658 Module snd-gusclassic 700 Module snd-gusclassic
659 --------------------- 701 ---------------------
660 702
@@ -707,8 +749,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
707 Module snd-hda-intel 749 Module snd-hda-intel
708 -------------------- 750 --------------------
709 751
710 Module for Intel HD Audio (ICH6, ICH6M, ICH7), ATI SB450, 752 Module for Intel HD Audio (ICH6, ICH6M, ESB2, ICH7, ICH8),
711 VIA VT8251/VT8237A 753 ATI SB450, SB600, RS600,
754 VIA VT8251/VT8237A,
755 SIS966, ULI M5461
712 756
713 model - force the model name 757 model - force the model name
714 position_fix - Fix DMA pointer (0 = auto, 1 = none, 2 = POSBUF, 3 = FIFO size) 758 position_fix - Fix DMA pointer (0 = auto, 1 = none, 2 = POSBUF, 3 = FIFO size)
@@ -756,12 +800,18 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
756 basic fixed pin assignment w/o SPDIF 800 basic fixed pin assignment w/o SPDIF
757 auto auto-config reading BIOS (default) 801 auto auto-config reading BIOS (default)
758 802
759 ALC882/883/885 803 ALC882/885
760 3stack-dig 3-jack with SPDIF I/O 804 3stack-dig 3-jack with SPDIF I/O
761 6stck-dig 6-jack digital with SPDIF I/O 805 6stck-dig 6-jack digital with SPDIF I/O
762 auto auto-config reading BIOS (default) 806 auto auto-config reading BIOS (default)
763 807
764 ALC861 808 ALC883/888
809 3stack-dig 3-jack with SPDIF I/O
810 6stack-dig 6-jack digital with SPDIF I/O
811 6stack-dig-demo 6-stack digital for Intel demo board
812 auto auto-config reading BIOS (default)
813
814 ALC861/660
765 3stack 3-jack 815 3stack 3-jack
766 3stack-dig 3-jack with SPDIF I/O 816 3stack-dig 3-jack with SPDIF I/O
767 6stack-dig 6-jack with SPDIF I/O 817 6stack-dig 6-jack with SPDIF I/O
@@ -778,6 +828,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
778 AD1981 828 AD1981
779 basic 3-jack (default) 829 basic 3-jack (default)
780 hp HP nx6320 830 hp HP nx6320
831 thinkpad Lenovo Thinkpad T60/X60/Z60
781 832
782 AD1986A 833 AD1986A
783 6stack 6-jack, separate surrounds (default) 834 6stack 6-jack, separate surrounds (default)
@@ -932,6 +983,30 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
932 driver isn't configured properly or you want to try another 983 driver isn't configured properly or you want to try another
933 type for testing. 984 type for testing.
934 985
986 Module snd-indigo
987 -----------------
988
989 Module for Echoaudio Indigo
990
991 This module supports multiple cards.
992 The driver requires the firmware loader support on kernel.
993
994 Module snd-indigodj
995 -------------------
996
997 Module for Echoaudio Indigo DJ
998
999 This module supports multiple cards.
1000 The driver requires the firmware loader support on kernel.
1001
1002 Module snd-indigoio
1003 -------------------
1004
1005 Module for Echoaudio Indigo IO
1006
1007 This module supports multiple cards.
1008 The driver requires the firmware loader support on kernel.
1009
935 Module snd-intel8x0 1010 Module snd-intel8x0
936 ------------------- 1011 -------------------
937 1012
@@ -1031,6 +1106,22 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1031 1106
1032 This module supports multiple cards. 1107 This module supports multiple cards.
1033 1108
1109 Module snd-layla20
1110 ------------------
1111
1112 Module for Echoaudio Layla20
1113
1114 This module supports multiple cards.
1115 The driver requires the firmware loader support on kernel.
1116
1117 Module snd-layla24
1118 ------------------
1119
1120 Module for Echoaudio Layla24
1121
1122 This module supports multiple cards.
1123 The driver requires the firmware loader support on kernel.
1124
1034 Module snd-maestro3 1125 Module snd-maestro3
1035 ------------------- 1126 -------------------
1036 1127
@@ -1051,6 +1142,14 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1051 1142
1052 The power-management is supported. 1143 The power-management is supported.
1053 1144
1145 Module snd-mia
1146 ---------------
1147
1148 Module for Echoaudio Mia
1149
1150 This module supports multiple cards.
1151 The driver requires the firmware loader support on kernel.
1152
1054 Module snd-miro 1153 Module snd-miro
1055 --------------- 1154 ---------------
1056 1155
@@ -1083,6 +1182,14 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1083 When no hotplug fw loader is available, you need to load the 1182 When no hotplug fw loader is available, you need to load the
1084 firmware via mixartloader utility in alsa-tools package. 1183 firmware via mixartloader utility in alsa-tools package.
1085 1184
1185 Module snd-mona
1186 ---------------
1187
1188 Module for Echoaudio Mona
1189
1190 This module supports multiple cards.
1191 The driver requires the firmware loader support on kernel.
1192
1086 Module snd-mpu401 1193 Module snd-mpu401
1087 ----------------- 1194 -----------------
1088 1195
@@ -1633,9 +1740,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1633 1740
1634 About capture IBL, see the description of snd-vx222 module. 1741 About capture IBL, see the description of snd-vx222 module.
1635 1742
1636 Note: the driver is build only when CONFIG_ISA is set. 1743 Note: snd-vxp440 driver is merged to snd-vxpocket driver since
1637
1638 Note2: snd-vxp440 driver is merged to snd-vxpocket driver since
1639 ALSA 1.0.10. 1744 ALSA 1.0.10.
1640 1745
1641 The power-management is supported. 1746 The power-management is supported.
@@ -1662,8 +1767,6 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1662 1767
1663 Module for Sound Core PDAudioCF sound card. 1768 Module for Sound Core PDAudioCF sound card.
1664 1769
1665 Note: the driver is build only when CONFIG_ISA is set.
1666
1667 The power-management is supported. 1770 The power-management is supported.
1668 1771
1669 1772
diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
index 1faf76383bab..69866d5997a4 100644
--- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
+++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
@@ -1149,7 +1149,7 @@
1149 } 1149 }
1150 chip->port = pci_resource_start(pci, 0); 1150 chip->port = pci_resource_start(pci, 0);
1151 if (request_irq(pci->irq, snd_mychip_interrupt, 1151 if (request_irq(pci->irq, snd_mychip_interrupt,
1152 SA_INTERRUPT|SA_SHIRQ, "My Chip", chip)) { 1152 IRQF_DISABLED|IRQF_SHARED, "My Chip", chip)) {
1153 printk(KERN_ERR "cannot grab irq %d\n", pci->irq); 1153 printk(KERN_ERR "cannot grab irq %d\n", pci->irq);
1154 snd_mychip_free(chip); 1154 snd_mychip_free(chip);
1155 return -EBUSY; 1155 return -EBUSY;
@@ -1323,7 +1323,7 @@
1323 <programlisting> 1323 <programlisting>
1324<![CDATA[ 1324<![CDATA[
1325 if (request_irq(pci->irq, snd_mychip_interrupt, 1325 if (request_irq(pci->irq, snd_mychip_interrupt,
1326 SA_INTERRUPT|SA_SHIRQ, "My Chip", chip)) { 1326 IRQF_DISABLED|IRQF_SHARED, "My Chip", chip)) {
1327 printk(KERN_ERR "cannot grab irq %d\n", pci->irq); 1327 printk(KERN_ERR "cannot grab irq %d\n", pci->irq);
1328 snd_mychip_free(chip); 1328 snd_mychip_free(chip);
1329 return -EBUSY; 1329 return -EBUSY;
@@ -1342,7 +1342,7 @@
1342 1342
1343 <para> 1343 <para>
1344 On the PCI bus, the interrupts can be shared. Thus, 1344 On the PCI bus, the interrupts can be shared. Thus,
1345 <constant>SA_SHIRQ</constant> is given as the interrupt flag of 1345 <constant>IRQF_SHARED</constant> is given as the interrupt flag of
1346 <function>request_irq()</function>. 1346 <function>request_irq()</function>.
1347 </para> 1347 </para>
1348 1348
@@ -3048,7 +3048,7 @@ struct _snd_pcm_runtime {
3048 </para> 3048 </para>
3049 3049
3050 <para> 3050 <para>
3051 If you aquire a spinlock in the interrupt handler, and the 3051 If you acquire a spinlock in the interrupt handler, and the
3052 lock is used in other pcm callbacks, too, then you have to 3052 lock is used in other pcm callbacks, too, then you have to
3053 release the lock before calling 3053 release the lock before calling
3054 <function>snd_pcm_period_elapsed()</function>, because 3054 <function>snd_pcm_period_elapsed()</function>, because
@@ -4215,7 +4215,7 @@ struct _snd_pcm_runtime {
4215 <programlisting> 4215 <programlisting>
4216<![CDATA[ 4216<![CDATA[
4217 struct snd_rawmidi *rmidi; 4217 struct snd_rawmidi *rmidi;
4218 snd_mpu401_uart_new(card, 0, MPU401_HW_MPU401, port, integrated, 4218 snd_mpu401_uart_new(card, 0, MPU401_HW_MPU401, port, info_flags,
4219 irq, irq_flags, &rmidi); 4219 irq, irq_flags, &rmidi);
4220]]> 4220]]>
4221 </programlisting> 4221 </programlisting>
@@ -4242,15 +4242,36 @@ struct _snd_pcm_runtime {
4242 </para> 4242 </para>
4243 4243
4244 <para> 4244 <para>
4245 The 5th argument is bitflags for additional information.
4245 When the i/o port address above is a part of the PCI i/o 4246 When the i/o port address above is a part of the PCI i/o
4246 region, the MPU401 i/o port might have been already allocated 4247 region, the MPU401 i/o port might have been already allocated
4247 (reserved) by the driver itself. In such a case, pass non-zero 4248 (reserved) by the driver itself. In such a case, pass a bit flag
4248 to the 5th argument 4249 <constant>MPU401_INFO_INTEGRATED</constant>,
4249 (<parameter>integrated</parameter>). Otherwise, pass 0 to it,
4250 and 4250 and
4251 the mpu401-uart layer will allocate the i/o ports by itself. 4251 the mpu401-uart layer will allocate the i/o ports by itself.
4252 </para> 4252 </para>
4253 4253
4254 <para>
4255 When the controller supports only the input or output MIDI stream,
4256 pass <constant>MPU401_INFO_INPUT</constant> or
4257 <constant>MPU401_INFO_OUTPUT</constant> bitflag, respectively.
4258 Then the rawmidi instance is created as a single stream.
4259 </para>
4260
4261 <para>
4262 <constant>MPU401_INFO_MMIO</constant> bitflag is used to change
4263 the access method to MMIO (via readb and writeb) instead of
4264 iob and outb. In this case, you have to pass the iomapped address
4265 to <function>snd_mpu401_uart_new()</function>.
4266 </para>
4267
4268 <para>
4269 When <constant>MPU401_INFO_TX_IRQ</constant> is set, the output
4270 stream isn't checked in the default interrupt handler. The driver
4271 needs to call <function>snd_mpu401_uart_interrupt_tx()</function>
4272 by itself to start processing the output stream in irq handler.
4273 </para>
4274
4254 <para> 4275 <para>
4255 Usually, the port address corresponds to the command port and 4276 Usually, the port address corresponds to the command port and
4256 port + 1 corresponds to the data port. If not, you may change 4277 port + 1 corresponds to the data port. If not, you may change
@@ -5333,7 +5354,7 @@ struct _snd_pcm_runtime {
5333 <informalexample> 5354 <informalexample>
5334 <programlisting> 5355 <programlisting>
5335<![CDATA[ 5356<![CDATA[
5336 snd_info_set_text_ops(entry, chip, read_size, my_proc_read); 5357 snd_info_set_text_ops(entry, chip, my_proc_read);
5337]]> 5358]]>
5338 </programlisting> 5359 </programlisting>
5339 </informalexample> 5360 </informalexample>
@@ -5394,7 +5415,6 @@ struct _snd_pcm_runtime {
5394 <informalexample> 5415 <informalexample>
5395 <programlisting> 5416 <programlisting>
5396<![CDATA[ 5417<![CDATA[
5397 entry->c.text.write_size = 256;
5398 entry->c.text.write = my_proc_write; 5418 entry->c.text.write = my_proc_write;
5399]]> 5419]]>
5400 </programlisting> 5420 </programlisting>
@@ -5402,22 +5422,6 @@ struct _snd_pcm_runtime {
5402 </para> 5422 </para>
5403 5423
5404 <para> 5424 <para>
5405 The buffer size for read is set to 1024 implicitly by
5406 <function>snd_info_set_text_ops()</function>. It should suffice
5407 in most cases (the size will be aligned to
5408 <constant>PAGE_SIZE</constant> anyway), but if you need to handle
5409 very large text files, you can set it explicitly, too.
5410
5411 <informalexample>
5412 <programlisting>
5413<![CDATA[
5414 entry->c.text.read_size = 65536;
5415]]>
5416 </programlisting>
5417 </informalexample>
5418 </para>
5419
5420 <para>
5421 For the write callback, you can use 5425 For the write callback, you can use
5422 <function>snd_info_get_line()</function> to get a text line, and 5426 <function>snd_info_get_line()</function> to get a text line, and
5423 <function>snd_info_get_str()</function> to retrieve a string from 5427 <function>snd_info_get_str()</function> to retrieve a string from
@@ -5562,7 +5566,7 @@ struct _snd_pcm_runtime {
5562 power status.</para></listitem> 5566 power status.</para></listitem>
5563 <listitem><para>Call <function>snd_pcm_suspend_all()</function> to suspend the running PCM streams.</para></listitem> 5567 <listitem><para>Call <function>snd_pcm_suspend_all()</function> to suspend the running PCM streams.</para></listitem>
5564 <listitem><para>If AC97 codecs are used, call 5568 <listitem><para>If AC97 codecs are used, call
5565 <function>snd_ac97_resume()</function> for each codec.</para></listitem> 5569 <function>snd_ac97_suspend()</function> for each codec.</para></listitem>
5566 <listitem><para>Save the register values if necessary.</para></listitem> 5570 <listitem><para>Save the register values if necessary.</para></listitem>
5567 <listitem><para>Stop the hardware if necessary.</para></listitem> 5571 <listitem><para>Stop the hardware if necessary.</para></listitem>
5568 <listitem><para>Disable the PCI device by calling 5572 <listitem><para>Disable the PCI device by calling
diff --git a/Documentation/sparc/sbus_drivers.txt b/Documentation/sparc/sbus_drivers.txt
index 876195dc2aef..4b9351624f13 100644
--- a/Documentation/sparc/sbus_drivers.txt
+++ b/Documentation/sparc/sbus_drivers.txt
@@ -25,42 +25,84 @@ the bits necessary to run your device. The most commonly
25used members of this structure, and their typical usage, 25used members of this structure, and their typical usage,
26will be detailed below. 26will be detailed below.
27 27
28 Here is how probing is performed by an SBUS driver 28 Here is a piece of skeleton code for perofming a device
29under Linux: 29probe in an SBUS driverunder Linux:
30 30
31 static void init_one_mydevice(struct sbus_dev *sdev) 31 static int __devinit mydevice_probe_one(struct sbus_dev *sdev)
32 { 32 {
33 struct mysdevice *mp = kzalloc(sizeof(*mp), GFP_KERNEL);
34
35 if (!mp)
36 return -ENODEV;
37
38 ...
39 dev_set_drvdata(&sdev->ofdev.dev, mp);
40 return 0;
33 ... 41 ...
34 } 42 }
35 43
36 static int mydevice_match(struct sbus_dev *sdev) 44 static int __devinit mydevice_probe(struct of_device *dev,
45 const struct of_device_id *match)
37 { 46 {
38 if (some_criteria(sdev)) 47 struct sbus_dev *sdev = to_sbus_device(&dev->dev);
39 return 1; 48
40 return 0; 49 return mydevice_probe_one(sdev);
41 } 50 }
42 51
43 static void mydevice_probe(void) 52 static int __devexit mydevice_remove(struct of_device *dev)
44 { 53 {
45 struct sbus_bus *sbus; 54 struct sbus_dev *sdev = to_sbus_device(&dev->dev);
46 struct sbus_dev *sdev; 55 struct mydevice *mp = dev_get_drvdata(&dev->dev);
47 56
48 for_each_sbus(sbus) { 57 return mydevice_remove_one(sdev, mp);
49 for_each_sbusdev(sdev, sbus) {
50 if (mydevice_match(sdev))
51 init_one_mydevice(sdev);
52 }
53 }
54 } 58 }
55 59
56 All this does is walk through all SBUS devices in the 60 static struct of_device_id mydevice_match[] = {
57system, checks each to see if it is of the type which 61 {
58your driver is written for, and if so it calls the init 62 .name = "mydevice",
59routine to attach the device and prepare to drive it. 63 },
64 {},
65 };
66
67 MODULE_DEVICE_TABLE(of, mydevice_match);
60 68
61 "init_one_mydevice" might do things like allocate software 69 static struct of_platform_driver mydevice_driver = {
62state structures, map in I/O registers, place the hardware 70 .name = "mydevice",
63into an initialized state, etc. 71 .match_table = mydevice_match,
72 .probe = mydevice_probe,
73 .remove = __devexit_p(mydevice_remove),
74 };
75
76 static int __init mydevice_init(void)
77 {
78 return of_register_driver(&mydevice_driver, &sbus_bus_type);
79 }
80
81 static void __exit mydevice_exit(void)
82 {
83 of_unregister_driver(&mydevice_driver);
84 }
85
86 module_init(mydevice_init);
87 module_exit(mydevice_exit);
88
89 The mydevice_match table is a series of entries which
90describes what SBUS devices your driver is meant for. In the
91simplest case you specify a string for the 'name' field. Every
92SBUS device with a 'name' property matching your string will
93be passed one-by-one to your .probe method.
94
95 You should store away your device private state structure
96pointer in the drvdata area so that you can retrieve it later on
97in your .remove method.
98
99 Any memory allocated, registers mapped, IRQs registered,
100etc. must be undone by your .remove method so that all resources
101of your device are relased by the time it returns.
102
103 You should _NOT_ use the for_each_sbus(), for_each_sbusdev(),
104and for_all_sbusdev() interfaces. They are deprecated, will be
105removed, and no new driver should reference them ever.
64 106
65 Mapping and Accessing I/O Registers 107 Mapping and Accessing I/O Registers
66 108
@@ -263,10 +305,3 @@ discussed above and plus it handles both PCI and SBUS boards.
263 Lance driver abuses consistent mappings for data transfer. 305 Lance driver abuses consistent mappings for data transfer.
264It is a nifty trick which we do not particularly recommend... 306It is a nifty trick which we do not particularly recommend...
265Just check it out and know that it's legal. 307Just check it out and know that it's legal.
266
267 Bad examples, do NOT use
268
269 drivers/video/cgsix.c
270 This one uses result of sbus_ioremap as if it is an address.
271This does NOT work on sparc64 and therefore is broken. We will
272convert it at a later date.
diff --git a/Documentation/sparse.txt b/Documentation/sparse.txt
index 3f1c5464b1c9..5a311c38dd1a 100644
--- a/Documentation/sparse.txt
+++ b/Documentation/sparse.txt
@@ -1,5 +1,6 @@
1Copyright 2004 Linus Torvalds 1Copyright 2004 Linus Torvalds
2Copyright 2004 Pavel Machek <pavel@suse.cz> 2Copyright 2004 Pavel Machek <pavel@suse.cz>
3Copyright 2006 Bob Copeland <me@bobcopeland.com>
3 4
4Using sparse for typechecking 5Using sparse for typechecking
5~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -41,15 +42,8 @@ sure that bitwise types don't get mixed up (little-endian vs big-endian
41vs cpu-endian vs whatever), and there the constant "0" really _is_ 42vs cpu-endian vs whatever), and there the constant "0" really _is_
42special. 43special.
43 44
44Use 45Getting sparse
45 46~~~~~~~~~~~~~~
46 make C=[12] CF=-Wbitwise
47
48or you don't get any checking at all.
49
50
51Where to get sparse
52~~~~~~~~~~~~~~~~~~~
53 47
54With git, you can just get it from 48With git, you can just get it from
55 49
@@ -57,7 +51,7 @@ With git, you can just get it from
57 51
58and DaveJ has tar-balls at 52and DaveJ has tar-balls at
59 53
60 http://www.codemonkey.org.uk/projects/git-snapshots/sparse/ 54 http://www.codemonkey.org.uk/projects/git-snapshots/sparse/
61 55
62 56
63Once you have it, just do 57Once you have it, just do
@@ -65,8 +59,20 @@ Once you have it, just do
65 make 59 make
66 make install 60 make install
67 61
68as your regular user, and it will install sparse in your ~/bin directory. 62as a regular user, and it will install sparse in your ~/bin directory.
69After that, doing a kernel make with "make C=1" will run sparse on all the 63
70C files that get recompiled, or with "make C=2" will run sparse on the 64Using sparse
71files whether they need to be recompiled or not (ie the latter is fast way 65~~~~~~~~~~~~
72to check the whole tree if you have already built it). 66
67Do a kernel make with "make C=1" to run sparse on all the C files that get
68recompiled, or use "make C=2" to run sparse on the files whether they need to
69be recompiled or not. The latter is a fast way to check the whole tree if you
70have already built it.
71
72The optional make variable CF can be used to pass arguments to sparse. The
73build system passes -Wbitwise to sparse automatically. To perform endianness
74checks, you may define __CHECK_ENDIAN__:
75
76 make C=2 CF="-D__CHECK_ENDIAN__"
77
78These checks are disabled by default as they generate a host of warnings.
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index a46c10fcddfc..7cee90223d3a 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -28,7 +28,8 @@ Currently, these files are in /proc/sys/vm:
28- block_dump 28- block_dump
29- drop-caches 29- drop-caches
30- zone_reclaim_mode 30- zone_reclaim_mode
31- zone_reclaim_interval 31- min_unmapped_ratio
32- panic_on_oom
32 33
33============================================================== 34==============================================================
34 35
@@ -166,15 +167,28 @@ use of files and builds up large slab caches. However, the slab
166shrink operation is global, may take a long time and free slabs 167shrink operation is global, may take a long time and free slabs
167in all nodes of the system. 168in all nodes of the system.
168 169
169================================================================ 170=============================================================
170 171
171zone_reclaim_interval: 172min_unmapped_ratio:
172 173
173The time allowed for off node allocations after zone reclaim 174This is available only on NUMA kernels.
174has failed to reclaim enough pages to allow a local allocation.
175 175
176Time is set in seconds and set by default to 30 seconds. 176A percentage of the file backed pages in each zone. Zone reclaim will only
177occur if more than this percentage of pages are file backed and unmapped.
178This is to insure that a minimal amount of local pages is still available for
179file I/O even if the node is overallocated.
177 180
178Reduce the interval if undesired off node allocations occur. However, too 181The default is 1 percent.
179frequent scans will have a negative impact onoff node allocation performance. 182
183=============================================================
184
185panic_on_oom
186
187This enables or disables panic on out-of-memory feature. If this is set to 1,
188the kernel panics when out-of-memory happens. If this is set to 0, the kernel
189will kill some rogue process, called oom_killer. Usually, oom_killer can kill
190rogue processes and system will survive. If you want to panic the system
191rather than killing rogue processes, set this to 1.
192
193The default value is 0.
180 194
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index ad0bedf678b3..e0188a23fd5e 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -115,8 +115,9 @@ trojan program is running at console and which could grab your password
115when you would try to login. It will kill all programs on given console 115when you would try to login. It will kill all programs on given console
116and thus letting you make sure that the login prompt you see is actually 116and thus letting you make sure that the login prompt you see is actually
117the one from init, not some trojan program. 117the one from init, not some trojan program.
118IMPORTANT:In its true form it is not a true SAK like the one in :IMPORTANT 118IMPORTANT: In its true form it is not a true SAK like the one in a :IMPORTANT
119IMPORTANT:c2 compliant systems, and it should be mistook as such. :IMPORTANT 119IMPORTANT: c2 compliant system, and it should not be mistaken as :IMPORTANT
120IMPORTANT: such. :IMPORTANT
120 It seems other find it useful as (System Attention Key) which is 121 It seems other find it useful as (System Attention Key) which is
121useful when you want to exit a program that will not let you switch consoles. 122useful when you want to exit a program that will not let you switch consoles.
122(For example, X or a svgalib program.) 123(For example, X or a svgalib program.)
diff --git a/Documentation/tty.txt b/Documentation/tty.txt
index 8ff7bc2a0811..dab56604745d 100644
--- a/Documentation/tty.txt
+++ b/Documentation/tty.txt
@@ -80,13 +80,6 @@ receive_buf() - Hand buffers of bytes from the driver to the ldisc
80 for processing. Semantics currently rather 80 for processing. Semantics currently rather
81 mysterious 8( 81 mysterious 8(
82 82
83receive_room() - Can be called by the driver layer at any time when
84 the ldisc is opened. The ldisc must be able to
85 handle the reported amount of data at that instant.
86 Synchronization between active receive_buf and
87 receive_room calls is down to the driver not the
88 ldisc. Must not sleep.
89
90write_wakeup() - May be called at any point between open and close. 83write_wakeup() - May be called at any point between open and close.
91 The TTY_DO_WRITE_WAKEUP flag indicates if a call 84 The TTY_DO_WRITE_WAKEUP flag indicates if a call
92 is needed but always races versus calls. Thus the 85 is needed but always races versus calls. Thus the
diff --git a/Documentation/usb/usbmon.txt b/Documentation/usb/usbmon.txt
index 63cb7edd177e..e65ec828d7aa 100644
--- a/Documentation/usb/usbmon.txt
+++ b/Documentation/usb/usbmon.txt
@@ -29,14 +29,13 @@ if usbmon is built into the kernel.
29 29
30# mount -t debugfs none_debugs /sys/kernel/debug 30# mount -t debugfs none_debugs /sys/kernel/debug
31# modprobe usbmon 31# modprobe usbmon
32#
32 33
33Verify that bus sockets are present. 34Verify that bus sockets are present.
34 35
35[root@lembas zaitcev]# ls /sys/kernel/debug/usbmon 36# ls /sys/kernel/debug/usbmon
361s 1t 2s 2t 3s 3t 4s 4t 371s 1t 2s 2t 3s 3t 4s 4t
37[root@lembas zaitcev]# 38#
38
39# ls /sys/kernel
40 39
412. Find which bus connects to the desired device 402. Find which bus connects to the desired device
42 41
@@ -76,7 +75,7 @@ that the file size is not excessive for your favourite editor.
76 75
77* Raw text data format 76* Raw text data format
78 77
79The '0t' type data consists of a stream of events, such as URB submission, 78The '1t' type data consists of a stream of events, such as URB submission,
80URB callback, submission error. Every event is a text line, which consists 79URB callback, submission error. Every event is a text line, which consists
81of whitespace separated words. The number of position of words may depend 80of whitespace separated words. The number of position of words may depend
82on the event type, but there is a set of words, common for all types. 81on the event type, but there is a set of words, common for all types.
@@ -97,20 +96,25 @@ Here is the list of words, from left to right:
97 Zi Zo Isochronous input and output 96 Zi Zo Isochronous input and output
98 Ii Io Interrupt input and output 97 Ii Io Interrupt input and output
99 Bi Bo Bulk input and output 98 Bi Bo Bulk input and output
100 Device address and Endpoint number are decimal numbers with leading zeroes 99 Device address and Endpoint number are 3-digit and 2-digit (respectively)
101 or 3 and 2 positions, correspondingly. 100 decimal numbers, with leading zeroes.
102- URB Status. This field makes no sense for submissions, but is present 101- URB Status. In most cases, this field contains a number, sometimes negative,
103 to help scripts with parsing. In error case, it contains the error code. 102 which represents a "status" field of the URB. This field makes no sense for
104 In case of a setup packet, it contains a Setup Tag. If scripts read a number 103 submissions, but is present anyway to help scripts with parsing. When an
105 in this field, they proceed to read Data Length. Otherwise, they read 104 error occurs, the field contains the error code. In case of a submission of
106 the setup packet before reading the Data Length. 105 a Control packet, this field contains a Setup Tag instead of an error code.
106 It is easy to tell whether the Setup Tag is present because it is never a
107 number. Thus if scripts find a number in this field, they proceed to read
108 Data Length. If they find something else, like a letter, they read the setup
109 packet before reading the Data Length.
107- Setup packet, if present, consists of 5 words: one of each for bmRequestType, 110- Setup packet, if present, consists of 5 words: one of each for bmRequestType,
108 bRequest, wValue, wIndex, wLength, as specified by the USB Specification 2.0. 111 bRequest, wValue, wIndex, wLength, as specified by the USB Specification 2.0.
109 These words are safe to decode if Setup Tag was 's'. Otherwise, the setup 112 These words are safe to decode if Setup Tag was 's'. Otherwise, the setup
110 packet was present, but not captured, and the fields contain filler. 113 packet was present, but not captured, and the fields contain filler.
111- Data Length. This is the actual length in the URB. 114- Data Length. For submissions, this is the requested length. For callbacks,
115 this is the actual length.
112- Data tag. The usbmon may not always capture data, even if length is nonzero. 116- Data tag. The usbmon may not always capture data, even if length is nonzero.
113 Only if tag is '=', the data words are present. 117 The data words are present only if this tag is '='.
114- Data words follow, in big endian hexadecimal format. Notice that they are 118- Data words follow, in big endian hexadecimal format. Notice that they are
115 not machine words, but really just a byte stream split into words to make 119 not machine words, but really just a byte stream split into words to make
116 it easier to read. Thus, the last word may contain from one to four bytes. 120 it easier to read. Thus, the last word may contain from one to four bytes.
diff --git a/Documentation/video4linux/CARDLIST.bttv b/Documentation/video4linux/CARDLIST.bttv
index b72706c58a44..4efa4645885f 100644
--- a/Documentation/video4linux/CARDLIST.bttv
+++ b/Documentation/video4linux/CARDLIST.bttv
@@ -87,7 +87,7 @@
87 86 -> Osprey 101/151 w/ svid 87 86 -> Osprey 101/151 w/ svid
88 87 -> Osprey 200/201/250/251 88 87 -> Osprey 200/201/250/251
89 88 -> Osprey 200/250 [0070:ff01] 89 88 -> Osprey 200/250 [0070:ff01]
90 89 -> Osprey 210/220 90 89 -> Osprey 210/220/230
91 90 -> Osprey 500 [0070:ff02] 91 90 -> Osprey 500 [0070:ff02]
92 91 -> Osprey 540 [0070:ff04] 92 91 -> Osprey 540 [0070:ff04]
93 92 -> Osprey 2000 [0070:ff03] 93 92 -> Osprey 2000 [0070:ff03]
@@ -111,7 +111,7 @@
111110 -> IVC-100 [ff00:a132] 111110 -> IVC-100 [ff00:a132]
112111 -> IVC-120G [ff00:a182,ff01:a182,ff02:a182,ff03:a182,ff04:a182,ff05:a182,ff06:a182,ff07:a182,ff08:a182,ff09:a182,ff0a:a182,ff0b:a182,ff0c:a182,ff0d:a182,ff0e:a182,ff0f:a182] 112111 -> IVC-120G [ff00:a182,ff01:a182,ff02:a182,ff03:a182,ff04:a182,ff05:a182,ff06:a182,ff07:a182,ff08:a182,ff09:a182,ff0a:a182,ff0b:a182,ff0c:a182,ff0d:a182,ff0e:a182,ff0f:a182]
113112 -> pcHDTV HD-2000 TV [7063:2000] 113112 -> pcHDTV HD-2000 TV [7063:2000]
114113 -> Twinhan DST + clones [11bd:0026,1822:0001,270f:fc00] 114113 -> Twinhan DST + clones [11bd:0026,1822:0001,270f:fc00,1822:0026]
115114 -> Winfast VC100 [107d:6607] 115114 -> Winfast VC100 [107d:6607]
116115 -> Teppro TEV-560/InterVision IV-560 116115 -> Teppro TEV-560/InterVision IV-560
117116 -> SIMUS GVC1100 [aa6a:82b2] 117116 -> SIMUS GVC1100 [aa6a:82b2]
diff --git a/Documentation/video4linux/CARDLIST.cx88 b/Documentation/video4linux/CARDLIST.cx88
index 3b39a91b24bd..00d9a1f2a54c 100644
--- a/Documentation/video4linux/CARDLIST.cx88
+++ b/Documentation/video4linux/CARDLIST.cx88
@@ -15,7 +15,7 @@
15 14 -> KWorld/VStream XPert DVB-T [17de:08a6] 15 14 -> KWorld/VStream XPert DVB-T [17de:08a6]
16 15 -> DViCO FusionHDTV DVB-T1 [18ac:db00] 16 15 -> DViCO FusionHDTV DVB-T1 [18ac:db00]
17 16 -> KWorld LTV883RF 17 16 -> KWorld LTV883RF
18 17 -> DViCO FusionHDTV 3 Gold-Q [18ac:d810] 18 17 -> DViCO FusionHDTV 3 Gold-Q [18ac:d810,18ac:d800]
19 18 -> Hauppauge Nova-T DVB-T [0070:9002,0070:9001] 19 18 -> Hauppauge Nova-T DVB-T [0070:9002,0070:9001]
20 19 -> Conexant DVB-T reference design [14f1:0187] 20 19 -> Conexant DVB-T reference design [14f1:0187]
21 20 -> Provideo PV259 [1540:2580] 21 20 -> Provideo PV259 [1540:2580]
@@ -40,8 +40,14 @@
40 39 -> KWorld DVB-S 100 [17de:08b2] 40 39 -> KWorld DVB-S 100 [17de:08b2]
41 40 -> Hauppauge WinTV-HVR1100 DVB-T/Hybrid [0070:9400,0070:9402] 41 40 -> Hauppauge WinTV-HVR1100 DVB-T/Hybrid [0070:9400,0070:9402]
42 41 -> Hauppauge WinTV-HVR1100 DVB-T/Hybrid (Low Profile) [0070:9800,0070:9802] 42 41 -> Hauppauge WinTV-HVR1100 DVB-T/Hybrid (Low Profile) [0070:9800,0070:9802]
43 42 -> digitalnow DNTV Live! DVB-T Pro [1822:0025] 43 42 -> digitalnow DNTV Live! DVB-T Pro [1822:0025,1822:0019]
44 43 -> KWorld/VStream XPert DVB-T with cx22702 [17de:08a1] 44 43 -> KWorld/VStream XPert DVB-T with cx22702 [17de:08a1]
45 44 -> DViCO FusionHDTV DVB-T Dual Digital [18ac:db50,18ac:db54] 45 44 -> DViCO FusionHDTV DVB-T Dual Digital [18ac:db50,18ac:db54]
46 45 -> KWorld HardwareMpegTV XPert [17de:0840] 46 45 -> KWorld HardwareMpegTV XPert [17de:0840]
47 46 -> DViCO FusionHDTV DVB-T Hybrid [18ac:db40,18ac:db44] 47 46 -> DViCO FusionHDTV DVB-T Hybrid [18ac:db40,18ac:db44]
48 47 -> pcHDTV HD5500 HDTV [7063:5500]
49 48 -> Kworld MCE 200 Deluxe [17de:0841]
50 49 -> PixelView PlayTV P7000 [1554:4813]
51 50 -> NPG Tech Real TV FM Top 10 [14f1:0842]
52 51 -> WinFast DTV2000 H [107d:665e]
53 52 -> Geniatech DVB-S [14f1:0084]
diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134
index bca50903233f..9068b669f5ee 100644
--- a/Documentation/video4linux/CARDLIST.saa7134
+++ b/Documentation/video4linux/CARDLIST.saa7134
@@ -93,3 +93,4 @@
93 92 -> AVerMedia A169 B1 [1461:6360] 93 92 -> AVerMedia A169 B1 [1461:6360]
94 93 -> Medion 7134 Bridge #2 [16be:0005] 94 93 -> Medion 7134 Bridge #2 [16be:0005]
95 94 -> LifeView FlyDVB-T Hybrid Cardbus [5168:3306,5168:3502] 95 94 -> LifeView FlyDVB-T Hybrid Cardbus [5168:3306,5168:3502]
96 95 -> LifeView FlyVIDEO3000 (NTSC) [5169:0138]
diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner
index 1bcdac67dd8c..44134f04b82a 100644
--- a/Documentation/video4linux/CARDLIST.tuner
+++ b/Documentation/video4linux/CARDLIST.tuner
@@ -62,7 +62,7 @@ tuner=60 - Thomson DTT 761X (ATSC/NTSC)
62tuner=61 - Tena TNF9533-D/IF/TNF9533-B/DF 62tuner=61 - Tena TNF9533-D/IF/TNF9533-B/DF
63tuner=62 - Philips TEA5767HN FM Radio 63tuner=62 - Philips TEA5767HN FM Radio
64tuner=63 - Philips FMD1216ME MK3 Hybrid Tuner 64tuner=63 - Philips FMD1216ME MK3 Hybrid Tuner
65tuner=64 - LG TDVS-H062F/TUA6034 65tuner=64 - LG TDVS-H06xF
66tuner=65 - Ymec TVF66T5-B/DFF 66tuner=65 - Ymec TVF66T5-B/DFF
67tuner=66 - LG TALN series 67tuner=66 - LG TALN series
68tuner=67 - Philips TD1316 Hybrid Tuner 68tuner=67 - Philips TD1316 Hybrid Tuner
@@ -71,3 +71,4 @@ tuner=69 - Tena TNF 5335 and similar models
71tuner=70 - Samsung TCPN 2121P30A 71tuner=70 - Samsung TCPN 2121P30A
72tuner=71 - Xceive xc3028 72tuner=71 - Xceive xc3028
73tuner=72 - Thomson FE6600 73tuner=72 - Thomson FE6600
74tuner=73 - Samsung TCPG 6121P30A
diff --git a/Documentation/video4linux/CQcam.txt b/Documentation/video4linux/CQcam.txt
index 464e4cec94cb..ade8651e2443 100644
--- a/Documentation/video4linux/CQcam.txt
+++ b/Documentation/video4linux/CQcam.txt
@@ -185,207 +185,10 @@ this work is documented at the video4linux2 site listed below.
185 185
1869.0 --- A sample program using v4lgrabber, 1869.0 --- A sample program using v4lgrabber,
187 187
188This program is a simple image grabber that will copy a frame from the 188v4lgrab is a simple image grabber that will copy a frame from the
189first video device, /dev/video0 to standard output in portable pixmap 189first video device, /dev/video0 to standard output in portable pixmap
190format (.ppm) Using this like: 'v4lgrab | convert - c-qcam.jpg' 190format (.ppm) To produce .jpg output, you can use it like this:
191produced this picture of me at 191'v4lgrab | convert - c-qcam.jpg'
192 http://mug.sys.virginia.edu/~drf5n/extras/c-qcam.jpg
193
194-------------------- 8< ---------------- 8< -----------------------------
195
196/* Simple Video4Linux image grabber. */
197/*
198 * Video4Linux Driver Test/Example Framegrabbing Program
199 *
200 * Compile with:
201 * gcc -s -Wall -Wstrict-prototypes v4lgrab.c -o v4lgrab
202 * Use as:
203 * v4lgrab >image.ppm
204 *
205 * Copyright (C) 1998-05-03, Phil Blundell <philb@gnu.org>
206 * Copied from http://www.tazenda.demon.co.uk/phil/vgrabber.c
207 * with minor modifications (Dave Forrest, drf5n@virginia.edu).
208 *
209 */
210
211#include <unistd.h>
212#include <sys/types.h>
213#include <sys/stat.h>
214#include <fcntl.h>
215#include <stdio.h>
216#include <sys/ioctl.h>
217#include <stdlib.h>
218
219#include <linux/types.h>
220#include <linux/videodev.h>
221
222#define FILE "/dev/video0"
223
224/* Stole this from tvset.c */
225
226#define READ_VIDEO_PIXEL(buf, format, depth, r, g, b) \
227{ \
228 switch (format) \
229 { \
230 case VIDEO_PALETTE_GREY: \
231 switch (depth) \
232 { \
233 case 4: \
234 case 6: \
235 case 8: \
236 (r) = (g) = (b) = (*buf++ << 8);\
237 break; \
238 \
239 case 16: \
240 (r) = (g) = (b) = \
241 *((unsigned short *) buf); \
242 buf += 2; \
243 break; \
244 } \
245 break; \
246 \
247 \
248 case VIDEO_PALETTE_RGB565: \
249 { \
250 unsigned short tmp = *(unsigned short *)buf; \
251 (r) = tmp&0xF800; \
252 (g) = (tmp<<5)&0xFC00; \
253 (b) = (tmp<<11)&0xF800; \
254 buf += 2; \
255 } \
256 break; \
257 \
258 case VIDEO_PALETTE_RGB555: \
259 (r) = (buf[0]&0xF8)<<8; \
260 (g) = ((buf[0] << 5 | buf[1] >> 3)&0xF8)<<8; \
261 (b) = ((buf[1] << 2 ) & 0xF8)<<8; \
262 buf += 2; \
263 break; \
264 \
265 case VIDEO_PALETTE_RGB24: \
266 (r) = buf[0] << 8; (g) = buf[1] << 8; \
267 (b) = buf[2] << 8; \
268 buf += 3; \
269 break; \
270 \
271 default: \
272 fprintf(stderr, \
273 "Format %d not yet supported\n", \
274 format); \
275 } \
276}
277
278int get_brightness_adj(unsigned char *image, long size, int *brightness) {
279 long i, tot = 0;
280 for (i=0;i<size*3;i++)
281 tot += image[i];
282 *brightness = (128 - tot/(size*3))/3;
283 return !((tot/(size*3)) >= 126 && (tot/(size*3)) <= 130);
284}
285
286int main(int argc, char ** argv)
287{
288 int fd = open(FILE, O_RDONLY), f;
289 struct video_capability cap;
290 struct video_window win;
291 struct video_picture vpic;
292
293 unsigned char *buffer, *src;
294 int bpp = 24, r, g, b;
295 unsigned int i, src_depth;
296
297 if (fd < 0) {
298 perror(FILE);
299 exit(1);
300 }
301
302 if (ioctl(fd, VIDIOCGCAP, &cap) < 0) {
303 perror("VIDIOGCAP");
304 fprintf(stderr, "(" FILE " not a video4linux device?)\n");
305 close(fd);
306 exit(1);
307 }
308
309 if (ioctl(fd, VIDIOCGWIN, &win) < 0) {
310 perror("VIDIOCGWIN");
311 close(fd);
312 exit(1);
313 }
314
315 if (ioctl(fd, VIDIOCGPICT, &vpic) < 0) {
316 perror("VIDIOCGPICT");
317 close(fd);
318 exit(1);
319 }
320
321 if (cap.type & VID_TYPE_MONOCHROME) {
322 vpic.depth=8;
323 vpic.palette=VIDEO_PALETTE_GREY; /* 8bit grey */
324 if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) {
325 vpic.depth=6;
326 if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) {
327 vpic.depth=4;
328 if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) {
329 fprintf(stderr, "Unable to find a supported capture format.\n");
330 close(fd);
331 exit(1);
332 }
333 }
334 }
335 } else {
336 vpic.depth=24;
337 vpic.palette=VIDEO_PALETTE_RGB24;
338
339 if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) {
340 vpic.palette=VIDEO_PALETTE_RGB565;
341 vpic.depth=16;
342
343 if(ioctl(fd, VIDIOCSPICT, &vpic)==-1) {
344 vpic.palette=VIDEO_PALETTE_RGB555;
345 vpic.depth=15;
346
347 if(ioctl(fd, VIDIOCSPICT, &vpic)==-1) {
348 fprintf(stderr, "Unable to find a supported capture format.\n");
349 return -1;
350 }
351 }
352 }
353 }
354
355 buffer = malloc(win.width * win.height * bpp);
356 if (!buffer) {
357 fprintf(stderr, "Out of memory.\n");
358 exit(1);
359 }
360
361 do {
362 int newbright;
363 read(fd, buffer, win.width * win.height * bpp);
364 f = get_brightness_adj(buffer, win.width * win.height, &newbright);
365 if (f) {
366 vpic.brightness += (newbright << 8);
367 if(ioctl(fd, VIDIOCSPICT, &vpic)==-1) {
368 perror("VIDIOSPICT");
369 break;
370 }
371 }
372 } while (f);
373
374 fprintf(stdout, "P6\n%d %d 255\n", win.width, win.height);
375
376 src = buffer;
377
378 for (i = 0; i < win.width * win.height; i++) {
379 READ_VIDEO_PIXEL(src, vpic.palette, src_depth, r, g, b);
380 fputc(r>>8, stdout);
381 fputc(g>>8, stdout);
382 fputc(b>>8, stdout);
383 }
384
385 close(fd);
386 return 0;
387}
388-------------------- 8< ---------------- 8< -----------------------------
389 192
390 193
39110.0 --- Other Information 19410.0 --- Other Information
diff --git a/Documentation/video4linux/README.pvrusb2 b/Documentation/video4linux/README.pvrusb2
new file mode 100644
index 000000000000..c73a32c34528
--- /dev/null
+++ b/Documentation/video4linux/README.pvrusb2
@@ -0,0 +1,212 @@
1
2$Id$
3Mike Isely <isely@pobox.com>
4
5 pvrusb2 driver
6
7Background:
8
9 This driver is intended for the "Hauppauge WinTV PVR USB 2.0", which
10 is a USB 2.0 hosted TV Tuner. This driver is a work in progress.
11 Its history started with the reverse-engineering effort by Björn
12 Danielsson <pvrusb2@dax.nu> whose web page can be found here:
13
14 http://pvrusb2.dax.nu/
15
16 From there Aurelien Alleaume <slts@free.fr> began an effort to
17 create a video4linux compatible driver. I began with Aurelien's
18 last known snapshot and evolved the driver to the state it is in
19 here.
20
21 More information on this driver can be found at:
22
23 http://www.isely.net/pvrusb2.html
24
25
26 This driver has a strong separation of layers. They are very
27 roughly:
28
29 1a. Low level wire-protocol implementation with the device.
30
31 1b. I2C adaptor implementation and corresponding I2C client drivers
32 implemented elsewhere in V4L.
33
34 1c. High level hardware driver implementation which coordinates all
35 activities that ensure correct operation of the device.
36
37 2. A "context" layer which manages instancing of driver, setup,
38 tear-down, arbitration, and interaction with high level
39 interfaces appropriately as devices are hotplugged in the
40 system.
41
42 3. High level interfaces which glue the driver to various published
43 Linux APIs (V4L, sysfs, maybe DVB in the future).
44
45 The most important shearing layer is between the top 2 layers. A
46 lot of work went into the driver to ensure that any kind of
47 conceivable API can be laid on top of the core driver. (Yes, the
48 driver internally leverages V4L to do its work but that really has
49 nothing to do with the API published by the driver to the outside
50 world.) The architecture allows for different APIs to
51 simultaneously access the driver. I have a strong sense of fairness
52 about APIs and also feel that it is a good design principle to keep
53 implementation and interface isolated from each other. Thus while
54 right now the V4L high level interface is the most complete, the
55 sysfs high level interface will work equally well for similar
56 functions, and there's no reason I see right now why it shouldn't be
57 possible to produce a DVB high level interface that can sit right
58 alongside V4L.
59
60 NOTE: Complete documentation on the pvrusb2 driver is contained in
61 the html files within the doc directory; these are exactly the same
62 as what is on the web site at the time. Browse those files
63 (especially the FAQ) before asking questions.
64
65
66Building
67
68 To build these modules essentially amounts to just running "Make",
69 but you need the kernel source tree nearby and you will likely also
70 want to set a few controlling environment variables first in order
71 to link things up with that source tree. Please see the Makefile
72 here for comments that explain how to do that.
73
74
75Source file list / functional overview:
76
77 (Note: The term "module" used below generally refers to loosely
78 defined functional units within the pvrusb2 driver and bears no
79 relation to the Linux kernel's concept of a loadable module.)
80
81 pvrusb2-audio.[ch] - This is glue logic that resides between this
82 driver and the msp3400.ko I2C client driver (which is found
83 elsewhere in V4L).
84
85 pvrusb2-context.[ch] - This module implements the context for an
86 instance of the driver. Everything else eventually ties back to
87 or is otherwise instanced within the data structures implemented
88 here. Hotplugging is ultimately coordinated here. All high level
89 interfaces tie into the driver through this module. This module
90 helps arbitrate each interface's access to the actual driver core,
91 and is designed to allow concurrent access through multiple
92 instances of multiple interfaces (thus you can for example change
93 the tuner's frequency through sysfs while simultaneously streaming
94 video through V4L out to an instance of mplayer).
95
96 pvrusb2-debug.h - This header defines a printk() wrapper and a mask
97 of debugging bit definitions for the various kinds of debug
98 messages that can be enabled within the driver.
99
100 pvrusb2-debugifc.[ch] - This module implements a crude command line
101 oriented debug interface into the driver. Aside from being part
102 of the process for implementing manual firmware extraction (see
103 the pvrusb2 web site mentioned earlier), probably I'm the only one
104 who has ever used this. It is mainly a debugging aid.
105
106 pvrusb2-eeprom.[ch] - This is glue logic that resides between this
107 driver the tveeprom.ko module, which is itself implemented
108 elsewhere in V4L.
109
110 pvrusb2-encoder.[ch] - This module implements all protocol needed to
111 interact with the Conexant mpeg2 encoder chip within the pvrusb2
112 device. It is a crude echo of corresponding logic in ivtv,
113 however the design goals (strict isolation) and physical layer
114 (proxy through USB instead of PCI) are enough different that this
115 implementation had to be completely different.
116
117 pvrusb2-hdw-internal.h - This header defines the core data structure
118 in the driver used to track ALL internal state related to control
119 of the hardware. Nobody outside of the core hardware-handling
120 modules should have any business using this header. All external
121 access to the driver should be through one of the high level
122 interfaces (e.g. V4L, sysfs, etc), and in fact even those high
123 level interfaces are restricted to the API defined in
124 pvrusb2-hdw.h and NOT this header.
125
126 pvrusb2-hdw.h - This header defines the full internal API for
127 controlling the hardware. High level interfaces (e.g. V4L, sysfs)
128 will work through here.
129
130 pvrusb2-hdw.c - This module implements all the various bits of logic
131 that handle overall control of a specific pvrusb2 device.
132 (Policy, instantiation, and arbitration of pvrusb2 devices fall
133 within the jurisdiction of pvrusb-context not here).
134
135 pvrusb2-i2c-chips-*.c - These modules implement the glue logic to
136 tie together and configure various I2C modules as they attach to
137 the I2C bus. There are two versions of this file. The "v4l2"
138 version is intended to be used in-tree alongside V4L, where we
139 implement just the logic that makes sense for a pure V4L
140 environment. The "all" version is intended for use outside of
141 V4L, where we might encounter other possibly "challenging" modules
142 from ivtv or older kernel snapshots (or even the support modules
143 in the standalone snapshot).
144
145 pvrusb2-i2c-cmd-v4l1.[ch] - This module implements generic V4L1
146 compatible commands to the I2C modules. It is here where state
147 changes inside the pvrusb2 driver are translated into V4L1
148 commands that are in turn send to the various I2C modules.
149
150 pvrusb2-i2c-cmd-v4l2.[ch] - This module implements generic V4L2
151 compatible commands to the I2C modules. It is here where state
152 changes inside the pvrusb2 driver are translated into V4L2
153 commands that are in turn send to the various I2C modules.
154
155 pvrusb2-i2c-core.[ch] - This module provides an implementation of a
156 kernel-friendly I2C adaptor driver, through which other external
157 I2C client drivers (e.g. msp3400, tuner, lirc) may connect and
158 operate corresponding chips within the the pvrusb2 device. It is
159 through here that other V4L modules can reach into this driver to
160 operate specific pieces (and those modules are in turn driven by
161 glue logic which is coordinated by pvrusb2-hdw, doled out by
162 pvrusb2-context, and then ultimately made available to users
163 through one of the high level interfaces).
164
165 pvrusb2-io.[ch] - This module implements a very low level ring of
166 transfer buffers, required in order to stream data from the
167 device. This module is *very* low level. It only operates the
168 buffers and makes no attempt to define any policy or mechanism for
169 how such buffers might be used.
170
171 pvrusb2-ioread.[ch] - This module layers on top of pvrusb2-io.[ch]
172 to provide a streaming API usable by a read() system call style of
173 I/O. Right now this is the only layer on top of pvrusb2-io.[ch],
174 however the underlying architecture here was intended to allow for
175 other styles of I/O to be implemented with additonal modules, like
176 mmap()'ed buffers or something even more exotic.
177
178 pvrusb2-main.c - This is the top level of the driver. Module level
179 and USB core entry points are here. This is our "main".
180
181 pvrusb2-sysfs.[ch] - This is the high level interface which ties the
182 pvrusb2 driver into sysfs. Through this interface you can do
183 everything with the driver except actually stream data.
184
185 pvrusb2-tuner.[ch] - This is glue logic that resides between this
186 driver and the tuner.ko I2C client driver (which is found
187 elsewhere in V4L).
188
189 pvrusb2-util.h - This header defines some common macros used
190 throughout the driver. These macros are not really specific to
191 the driver, but they had to go somewhere.
192
193 pvrusb2-v4l2.[ch] - This is the high level interface which ties the
194 pvrusb2 driver into video4linux. It is through here that V4L
195 applications can open and operate the driver in the usual V4L
196 ways. Note that **ALL** V4L functionality is published only
197 through here and nowhere else.
198
199 pvrusb2-video-*.[ch] - This is glue logic that resides between this
200 driver and the saa711x.ko I2C client driver (which is found
201 elsewhere in V4L). Note that saa711x.ko used to be known as
202 saa7115.ko in ivtv. There are two versions of this; one is
203 selected depending on the particular saa711[5x].ko that is found.
204
205 pvrusb2.h - This header contains compile time tunable parameters
206 (and at the moment the driver has very little that needs to be
207 tuned).
208
209
210 -Mike Isely
211 isely@pobox.com
212
diff --git a/Documentation/video4linux/Zoran b/Documentation/video4linux/Zoran
index be9f21b84555..040a2c841ae9 100644
--- a/Documentation/video4linux/Zoran
+++ b/Documentation/video4linux/Zoran
@@ -33,6 +33,21 @@ Inputs/outputs: Composite and S-video
33Norms: PAL, SECAM (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps) 33Norms: PAL, SECAM (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps)
34Card number: 7 34Card number: 7
35 35
36AverMedia 6 Eyes AVS6EYES:
37* Zoran zr36067 PCI controller
38* Zoran zr36060 MJPEG codec
39* Samsung ks0127 TV decoder
40* Conexant bt866 TV encoder
41Drivers to use: videodev, i2c-core, i2c-algo-bit,
42 videocodec, ks0127, bt866, zr36060, zr36067
43Inputs/outputs: Six physical inputs. 1-6 are composite,
44 1-2, 3-4, 5-6 doubles as S-video,
45 1-3 triples as component.
46 One composite output.
47Norms: PAL, SECAM (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps)
48Card number: 8
49Not autodetected, card=8 is necessary.
50
36Linux Media Labs LML33: 51Linux Media Labs LML33:
37* Zoran zr36067 PCI controller 52* Zoran zr36067 PCI controller
38* Zoran zr36060 MJPEG codec 53* Zoran zr36060 MJPEG codec
@@ -192,6 +207,10 @@ Micronas vpx3220a TV decoder
192was introduced in 1996, is used in the DC30 and DC30+ and 207was introduced in 1996, is used in the DC30 and DC30+ and
193can handle: PAL B/G/H/I, PAL N, PAL M, NTSC M, NTSC 44, PAL 60, SECAM,NTSC Comb 208can handle: PAL B/G/H/I, PAL N, PAL M, NTSC M, NTSC 44, PAL 60, SECAM,NTSC Comb
194 209
210Samsung ks0127 TV decoder
211is used in the AVS6EYES card and
212can handle: NTSC-M/N/44, PAL-M/N/B/G/H/I/D/K/L and SECAM
213
195=========================== 214===========================
196 215
1971.2 What the TV encoder can do an what not 2161.2 What the TV encoder can do an what not
@@ -221,6 +240,10 @@ ITT mse3000 TV encoder
221was introduced in 1991, is used in the DC10 old 240was introduced in 1991, is used in the DC10 old
222can generate: PAL , NTSC , SECAM 241can generate: PAL , NTSC , SECAM
223 242
243Conexant bt866 TV encoder
244is used in AVS6EYES, and
245can generate: NTSC/PAL, PAL­M, PAL­N
246
224The adv717x, should be able to produce PAL N. But you find nothing PAL N 247The adv717x, should be able to produce PAL N. But you find nothing PAL N
225specific in the registers. Seem that you have to reuse a other standard 248specific in the registers. Seem that you have to reuse a other standard
226to generate PAL N, maybe it would work if you use the PAL M settings. 249to generate PAL N, maybe it would work if you use the PAL M settings.
diff --git a/Documentation/video4linux/bttv/CONTRIBUTORS b/Documentation/video4linux/bttv/CONTRIBUTORS
index aef49db8847d..8aad6dd93d6b 100644
--- a/Documentation/video4linux/bttv/CONTRIBUTORS
+++ b/Documentation/video4linux/bttv/CONTRIBUTORS
@@ -1,4 +1,4 @@
1Contributors to bttv: 1Contributors to bttv:
2 2
3Michael Chu <mmchu@pobox.com> 3Michael Chu <mmchu@pobox.com>
4 AverMedia fix and more flexible card recognition 4 AverMedia fix and more flexible card recognition
@@ -8,8 +8,8 @@ Alan Cox <alan@redhat.com>
8 8
9Chris Kleitsch 9Chris Kleitsch
10 Hardware I2C 10 Hardware I2C
11 11
12Gerd Knorr <kraxel@cs.tu-berlin.de> 12Gerd Knorr <kraxel@cs.tu-berlin.de>
13 Radio card (ITT sound processor) 13 Radio card (ITT sound processor)
14 14
15bigfoot <bigfoot@net-way.net> 15bigfoot <bigfoot@net-way.net>
@@ -18,7 +18,7 @@ Ragnar Hojland Espinosa <ragnar@macula.net>
18 18
19 19
20+ many more (please mail me if you are missing in this list and would 20+ many more (please mail me if you are missing in this list and would
21 like to be mentioned) 21 like to be mentioned)
22 22
23 23
24 24
diff --git a/Documentation/video4linux/cx2341x/fw-calling.txt b/Documentation/video4linux/cx2341x/fw-calling.txt
new file mode 100644
index 000000000000..8d21181de537
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-calling.txt
@@ -0,0 +1,69 @@
1This page describes how to make calls to the firmware api.
2
3How to call
4===========
5
6The preferred calling convention is known as the firmware mailbox. The
7mailboxes are basically a fixed length array that serves as the call-stack.
8
9Firmware mailboxes can be located by searching the encoder and decoder memory
10for a 16 byte signature. That signature will be located on a 256-byte boundary.
11
12Signature:
130x78, 0x56, 0x34, 0x12, 0x12, 0x78, 0x56, 0x34,
140x34, 0x12, 0x78, 0x56, 0x56, 0x34, 0x12, 0x78
15
16The firmware implements 20 mailboxes of 20 32-bit words. The first 10 are
17reserved for API calls. The second 10 are used by the firmware for event
18notification.
19
20 Index Name
21 ----- ----
22 0 Flags
23 1 Command
24 2 Return value
25 3 Timeout
26 4-19 Parameter/Result
27
28
29The flags are defined in the following table. The direction is from the
30perspective of the firmware.
31
32 Bit Direction Purpose
33 --- --------- -------
34 2 O Firmware has processed the command.
35 1 I Driver has finished setting the parameters.
36 0 I Driver is using this mailbox.
37
38
39The command is a 32-bit enumerator. The API specifics may be found in the
40fw-*-api.txt documents.
41
42The return value is a 32-bit enumerator. Only two values are currently defined:
430=success and -1=command undefined.
44
45There are 16 parameters/results 32-bit fields. The driver populates these fields
46with values for all the parameters required by the call. The driver overwrites
47these fields with result values returned by the call. The API specifics may be
48found in the fw-*-api.txt documents.
49
50The timeout value protects the card from a hung driver thread. If the driver
51doesn't handle the completed call within the timeout specified, the firmware
52will reset that mailbox.
53
54To make an API call, the driver iterates over each mailbox looking for the
55first one available (bit 0 has been cleared). The driver sets that bit, fills
56in the command enumerator, the timeout value and any required parameters. The
57driver then sets the parameter ready bit (bit 1). The firmware scans the
58mailboxes for pending commands, processes them, sets the result code, populates
59the result value array with that call's return values and sets the call
60complete bit (bit 2). Once bit 2 is set, the driver should retrieve the results
61and clear all the flags. If the driver does not perform this task within the
62time set in the timeout register, the firmware will reset that mailbox.
63
64Event notifications are sent from the firmware to the host. The host tells the
65firmware which events it is interested in via an API call. That call tells the
66firmware which notification mailbox to use. The firmware signals the host via
67an interrupt. Only the 16 Results fields are used, the Flags, Command, Return
68value and Timeout words are not used.
69
diff --git a/Documentation/video4linux/cx2341x/fw-decoder-api.txt b/Documentation/video4linux/cx2341x/fw-decoder-api.txt
new file mode 100644
index 000000000000..9df4fb3ea0f2
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-decoder-api.txt
@@ -0,0 +1,319 @@
1Decoder firmware API description
2================================
3
4Note: this API is part of the decoder firmware, so it's cx23415 only.
5
6-------------------------------------------------------------------------------
7
8Name CX2341X_DEC_PING_FW
9Enum 0/0x00
10Description
11 This API call does nothing. It may be used to check if the firmware
12 is responding.
13
14-------------------------------------------------------------------------------
15
16Name CX2341X_DEC_START_PLAYBACK
17Enum 1/0x01
18Description
19 Begin or resume playback.
20Param[0]
21 0 based frame number in GOP to begin playback from.
22Param[1]
23 Specifies the number of muted audio frames to play before normal
24 audio resumes.
25
26-------------------------------------------------------------------------------
27
28Name CX2341X_DEC_STOP_PLAYBACK
29Enum 2/0x02
30Description
31 Ends playback and clears all decoder buffers. If PTS is not zero,
32 playback stops at specified PTS.
33Param[0]
34 Display 0=last frame, 1=black
35Param[1]
36 PTS low
37Param[2]
38 PTS high
39
40-------------------------------------------------------------------------------
41
42Name CX2341X_DEC_SET_PLAYBACK_SPEED
43Enum 3/0x03
44Description
45 Playback stream at speed other than normal. There are two modes of
46 operation:
47 Smooth: host transfers entire stream and firmware drops unused
48 frames.
49 Coarse: host drops frames based on indexing as required to achieve
50 desired speed.
51Param[0]
52 Bitmap:
53 0:7 0 normal
54 1 fast only "1.5 times"
55 n nX fast, 1/nX slow
56 30 Framedrop:
57 '0' during 1.5 times play, every other B frame is dropped
58 '1' during 1.5 times play, stream is unchanged (bitrate
59 must not exceed 8mbps)
60 31 Speed:
61 '0' slow
62 '1' fast
63Param[1]
64 Direction: 0=forward, 1=reverse
65Param[2]
66 Picture mask:
67 1=I frames
68 3=I, P frames
69 7=I, P, B frames
70Param[3]
71 B frames per GOP (for reverse play only)
72Param[4]
73 Mute audio: 0=disable, 1=enable
74Param[5]
75 Display 0=frame, 1=field
76Param[6]
77 Specifies the number of muted audio frames to play before normal audio
78 resumes.
79
80-------------------------------------------------------------------------------
81
82Name CX2341X_DEC_STEP_VIDEO
83Enum 5/0x05
84Description
85 Each call to this API steps the playback to the next unit defined below
86 in the current playback direction.
87Param[0]
88 0=frame, 1=top field, 2=bottom field
89
90-------------------------------------------------------------------------------
91
92Name CX2341X_DEC_SET_DMA_BLOCK_SIZE
93Enum 8/0x08
94Description
95 Set DMA transfer block size. Counterpart to API 0xC9
96Param[0]
97 DMA transfer block size in bytes. A different size may be specified
98 when issuing the DMA transfer command.
99
100-------------------------------------------------------------------------------
101
102Name CX2341X_DEC_GET_XFER_INFO
103Enum 9/0x09
104Description
105 This API call may be used to detect an end of stream condtion.
106Result[0]
107 Stream type
108Result[1]
109 Address offset
110Result[2]
111 Maximum bytes to transfer
112Result[3]
113 Buffer fullness
114
115-------------------------------------------------------------------------------
116
117Name CX2341X_DEC_GET_DMA_STATUS
118Enum 10/0x0A
119Description
120 Status of the last DMA transfer
121Result[0]
122 Bit 1 set means transfer complete
123 Bit 2 set means DMA error
124 Bit 3 set means linked list error
125Result[1]
126 DMA type: 0=MPEG, 1=OSD, 2=YUV
127
128-------------------------------------------------------------------------------
129
130Name CX2341X_DEC_SCHED_DMA_FROM_HOST
131Enum 11/0x0B
132Description
133 Setup DMA from host operation. Counterpart to API 0xCC
134Param[0]
135 Memory address of link list
136Param[1]
137 Total # of bytes to transfer
138Param[2]
139 DMA type (0=MPEG, 1=OSD, 2=YUV)
140
141-------------------------------------------------------------------------------
142
143Name CX2341X_DEC_PAUSE_PLAYBACK
144Enum 13/0x0D
145Description
146 Freeze playback immediately. In this mode, when internal buffers are
147 full, no more data will be accepted and data request IRQs will be
148 masked.
149Param[0]
150 Display: 0=last frame, 1=black
151
152-------------------------------------------------------------------------------
153
154Name CX2341X_DEC_HALT_FW
155Enum 14/0x0E
156Description
157 The firmware is halted and no further API calls are serviced until
158 the firmware is uploaded again.
159
160-------------------------------------------------------------------------------
161
162Name CX2341X_DEC_SET_STANDARD
163Enum 16/0x10
164Description
165 Selects display standard
166Param[0]
167 0=NTSC, 1=PAL
168
169-------------------------------------------------------------------------------
170
171Name CX2341X_DEC_GET_VERSION
172Enum 17/0x11
173Description
174 Returns decoder firmware version information
175Result[0]
176 Version bitmask:
177 Bits 0:15 build
178 Bits 16:23 minor
179 Bits 24:31 major
180
181-------------------------------------------------------------------------------
182
183Name CX2341X_DEC_SET_STREAM_INPUT
184Enum 20/0x14
185Description
186 Select decoder stream input port
187Param[0]
188 0=memory (default), 1=streaming
189
190-------------------------------------------------------------------------------
191
192Name CX2341X_DEC_GET_TIMING_INFO
193Enum 21/0x15
194Description
195 Returns timing information from start of playback
196Result[0]
197 Frame count by decode order
198Result[1]
199 Video PTS bits 0:31 by display order
200Result[2]
201 Video PTS bit 32 by display order
202Result[3]
203 SCR bits 0:31 by display order
204Result[4]
205 SCR bit 32 by display order
206
207-------------------------------------------------------------------------------
208
209Name CX2341X_DEC_SET_AUDIO_MODE
210Enum 22/0x16
211Description
212 Select audio mode
213Param[0]
214 Dual mono mode action
215Param[1]
216 Stereo mode action:
217 0=Stereo, 1=Left, 2=Right, 3=Mono, 4=Swap, -1=Unchanged
218
219-------------------------------------------------------------------------------
220
221Name CX2341X_DEC_SET_EVENT_NOTIFICATION
222Enum 23/0x17
223Description
224 Setup firmware to notify the host about a particular event.
225 Counterpart to API 0xD5
226Param[0]
227 Event: 0=Audio mode change between stereo and dual channel
228Param[1]
229 Notification 0=disabled, 1=enabled
230Param[2]
231 Interrupt bit
232Param[3]
233 Mailbox slot, -1 if no mailbox required.
234
235-------------------------------------------------------------------------------
236
237Name CX2341X_DEC_SET_DISPLAY_BUFFERS
238Enum 24/0x18
239Description
240 Number of display buffers. To decode all frames in reverse playback you
241 must use nine buffers.
242Param[0]
243 0=six buffers, 1=nine buffers
244
245-------------------------------------------------------------------------------
246
247Name CX2341X_DEC_EXTRACT_VBI
248Enum 25/0x19
249Description
250 Extracts VBI data
251Param[0]
252 0=extract from extension & user data, 1=extract from private packets
253Result[0]
254 VBI table location
255Result[1]
256 VBI table size
257
258-------------------------------------------------------------------------------
259
260Name CX2341X_DEC_SET_DECODER_SOURCE
261Enum 26/0x1A
262Description
263 Selects decoder source. Ensure that the parameters passed to this
264 API match the encoder settings.
265Param[0]
266 Mode: 0=MPEG from host, 1=YUV from encoder, 2=YUV from host
267Param[1]
268 YUV picture width
269Param[2]
270 YUV picture height
271Param[3]
272 Bitmap: see Param[0] of API 0xBD
273
274-------------------------------------------------------------------------------
275
276Name CX2341X_DEC_SET_AUDIO_OUTPUT
277Enum 27/0x1B
278Description
279 Select audio output format
280Param[0]
281 Bitmask:
282 0:1 Data size:
283 '00' 16 bit
284 '01' 20 bit
285 '10' 24 bit
286 2:7 Unused
287 8:9 Mode:
288 '00' 2 channels
289 '01' 4 channels
290 '10' 6 channels
291 '11' 6 channels with one line data mode
292 (for left justified MSB first mode, 20 bit only)
293 10:11 Unused
294 12:13 Channel format:
295 '00' right justified MSB first mode
296 '01' left justified MSB first mode
297 '10' I2S mode
298 14:15 Unused
299 16:21 Right justify bit count
300 22:31 Unused
301
302-------------------------------------------------------------------------------
303
304Name CX2341X_DEC_SET_AV_DELAY
305Enum 28/0x1C
306Description
307 Set audio/video delay in 90Khz ticks
308Param[0]
309 0=A/V in sync, negative=audio lags, positive=video lags
310
311-------------------------------------------------------------------------------
312
313Name CX2341X_DEC_SET_PREBUFFERING
314Enum 30/0x1E
315Description
316 Decoder prebuffering, when enabled up to 128KB are buffered for
317 streams <8mpbs or 640KB for streams >8mbps
318Param[0]
319 0=off, 1=on
diff --git a/Documentation/video4linux/cx2341x/fw-dma.txt b/Documentation/video4linux/cx2341x/fw-dma.txt
new file mode 100644
index 000000000000..8123e262d5b6
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-dma.txt
@@ -0,0 +1,94 @@
1This page describes the structures and procedures used by the cx2341x DMA
2engine.
3
4Introduction
5============
6
7The cx2341x PCI interface is busmaster capable. This means it has a DMA
8engine to efficiently transfer large volumes of data between the card and main
9memory without requiring help from a CPU. Like most hardware, it must operate
10on contiguous physical memory. This is difficult to come by in large quantities
11on virtual memory machines.
12
13Therefore, it also supports a technique called "scatter-gather". The card can
14transfer multiple buffers in one operation. Instead of allocating one large
15contiguous buffer, the driver can allocate several smaller buffers.
16
17In practice, I've seen the average transfer to be roughly 80K, but transfers
18above 128K were not uncommon, particularly at startup. The 128K figure is
19important, because that is the largest block that the kernel can normally
20allocate. Even still, 128K blocks are hard to come by, so the driver writer is
21urged to choose a smaller block size and learn the scatter-gather technique.
22
23Mailbox #10 is reserved for DMA transfer information.
24
25Flow
26====
27
28This section describes, in general, the order of events when handling DMA
29transfers. Detailed information follows this section.
30
31- The card raises the Encoder interrupt.
32- The driver reads the transfer type, offset and size from Mailbox #10.
33- The driver constructs the scatter-gather array from enough free dma buffers
34 to cover the size.
35- The driver schedules the DMA transfer via the ScheduleDMAtoHost API call.
36- The card raises the DMA Complete interrupt.
37- The driver checks the DMA status register for any errors.
38- The driver post-processes the newly transferred buffers.
39
40NOTE! It is possible that the Encoder and DMA Complete interrupts get raised
41simultaneously. (End of the last, start of the next, etc.)
42
43Mailbox #10
44===========
45
46The Flags, Command, Return Value and Timeout fields are ignored.
47
48Name: Mailbox #10
49Results[0]: Type: 0: MPEG.
50Results[1]: Offset: The position relative to the card's memory space.
51Results[2]: Size: The exact number of bytes to transfer.
52
53My speculation is that since the StartCapture API has a capture type of "RAW"
54available, that the type field will have other values that correspond to YUV
55and PCM data.
56
57Scatter-Gather Array
58====================
59
60The scatter-gather array is a contiguously allocated block of memory that
61tells the card the source and destination of each data-block to transfer.
62Card "addresses" are derived from the offset supplied by Mailbox #10. Host
63addresses are the physical memory location of the target DMA buffer.
64
65Each S-G array element is a struct of three 32-bit words. The first word is
66the source address, the second is the destination address. Both take up the
67entire 32 bits. The lowest 16 bits of the third word is the transfer byte
68count. The high-bit of the third word is the "last" flag. The last-flag tells
69the card to raise the DMA_DONE interrupt. From hard personal experience, if
70you forget to set this bit, the card will still "work" but the stream will
71most likely get corrupted.
72
73The transfer count must be a multiple of 256. Therefore, the driver will need
74to track how much data in the target buffer is valid and deal with it
75accordingly.
76
77Array Element:
78
79- 32-bit Source Address
80- 32-bit Destination Address
81- 16-bit reserved (high bit is the last flag)
82- 16-bit byte count
83
84DMA Transfer Status
85===================
86
87Register 0x0004 holds the DMA Transfer Status:
88
89Bit
904 Scatter-Gather array error
913 DMA write error
922 DMA read error
931 write completed
940 read completed
diff --git a/Documentation/video4linux/cx2341x/fw-encoder-api.txt b/Documentation/video4linux/cx2341x/fw-encoder-api.txt
new file mode 100644
index 000000000000..001c68644b08
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-encoder-api.txt
@@ -0,0 +1,694 @@
1Encoder firmware API description
2================================
3
4-------------------------------------------------------------------------------
5
6Name CX2341X_ENC_PING_FW
7Enum 128/0x80
8Description
9 Does nothing. Can be used to check if the firmware is responding.
10
11-------------------------------------------------------------------------------
12
13Name CX2341X_ENC_START_CAPTURE
14Enum 129/0x81
15Description
16 Commences the capture of video, audio and/or VBI data. All encoding
17 parameters must be initialized prior to this API call. Captures frames
18 continuously or until a predefined number of frames have been captured.
19Param[0]
20 Capture stream type:
21 0=MPEG
22 1=Raw
23 2=Raw passthrough
24 3=VBI
25
26Param[1]
27 Bitmask:
28 Bit 0 when set, captures YUV
29 Bit 1 when set, captures PCM audio
30 Bit 2 when set, captures VBI (same as param[0]=3)
31 Bit 3 when set, the capture destination is the decoder
32 (same as param[0]=2)
33 Bit 4 when set, the capture destination is the host
34 Note: this parameter is only meaningful for RAW capture type.
35
36-------------------------------------------------------------------------------
37
38Name CX2341X_ENC_STOP_CAPTURE
39Enum 130/0x82
40Description
41 Ends a capture in progress
42Param[0]
43 0=stop at end of GOP (generates IRQ)
44 1=stop immediate (no IRQ)
45Param[1]
46 Stream type to stop, see param[0] of API 0x81
47Param[2]
48 Subtype, see param[1] of API 0x81
49
50-------------------------------------------------------------------------------
51
52Name CX2341X_ENC_SET_AUDIO_ID
53Enum 137/0x89
54Description
55 Assigns the transport stream ID of the encoded audio stream
56Param[0]
57 Audio Stream ID
58
59-------------------------------------------------------------------------------
60
61Name CX2341X_ENC_SET_VIDEO_ID
62Enum 139/0x8B
63Description
64 Set video transport stream ID
65Param[0]
66 Video stream ID
67
68-------------------------------------------------------------------------------
69
70Name CX2341X_ENC_SET_PCR_ID
71Enum 141/0x8D
72Description
73 Assigns the transport stream ID for PCR packets
74Param[0]
75 PCR Stream ID
76
77-------------------------------------------------------------------------------
78
79Name CX2341X_ENC_SET_FRAME_RATE
80Enum 143/0x8F
81Description
82 Set video frames per second. Change occurs at start of new GOP.
83Param[0]
84 0=30fps
85 1=25fps
86
87-------------------------------------------------------------------------------
88
89Name CX2341X_ENC_SET_FRAME_SIZE
90Enum 145/0x91
91Description
92 Select video stream encoding resolution.
93Param[0]
94 Height in lines. Default 480
95Param[1]
96 Width in pixels. Default 720
97
98-------------------------------------------------------------------------------
99
100Name CX2341X_ENC_SET_BIT_RATE
101Enum 149/0x95
102Description
103 Assign average video stream bitrate. Note on the last three params:
104 Param[3] and [4] seem to be always 0, param [5] doesn't seem to be used.
105Param[0]
106 0=variable bitrate, 1=constant bitrate
107Param[1]
108 bitrate in bits per second
109Param[2]
110 peak bitrate in bits per second, divided by 400
111Param[3]
112 Mux bitrate in bits per second, divided by 400. May be 0 (default).
113Param[4]
114 Rate Control VBR Padding
115Param[5]
116 VBV Buffer used by encoder
117
118-------------------------------------------------------------------------------
119
120Name CX2341X_ENC_SET_GOP_PROPERTIES
121Enum 151/0x97
122Description
123 Setup the GOP structure
124Param[0]
125 GOP size (maximum is 34)
126Param[1]
127 Number of B frames between the I and P frame, plus 1.
128 For example: IBBPBBPBBPBB --> GOP size: 12, number of B frames: 2+1 = 3
129 Note that GOP size must be a multiple of (B-frames + 1).
130
131-------------------------------------------------------------------------------
132
133Name CX2341X_ENC_SET_ASPECT_RATIO
134Enum 153/0x99
135Description
136 Sets the encoding aspect ratio. Changes in the aspect ratio take effect
137 at the start of the next GOP.
138Param[0]
139 '0000' forbidden
140 '0001' 1:1 square
141 '0010' 4:3
142 '0011' 16:9
143 '0100' 2.21:1
144 '0101' reserved
145 ....
146 '1111' reserved
147
148-------------------------------------------------------------------------------
149
150Name CX2341X_ENC_SET_DNR_FILTER_MODE
151Enum 155/0x9B
152Description
153 Assign Dynamic Noise Reduction operating mode
154Param[0]
155 Bit0: Spatial filter, set=auto, clear=manual
156 Bit1: Temporal filter, set=auto, clear=manual
157Param[1]
158 Median filter:
159 0=Disabled
160 1=Horizontal
161 2=Vertical
162 3=Horiz/Vert
163 4=Diagonal
164
165-------------------------------------------------------------------------------
166
167Name CX2341X_ENC_SET_DNR_FILTER_PROPS
168Enum 157/0x9D
169Description
170 These Dynamic Noise Reduction filter values are only meaningful when
171 the respective filter is set to "manual" (See API 0x9B)
172Param[0]
173 Spatial filter: default 0, range 0:15
174Param[1]
175 Temporal filter: default 0, range 0:31
176
177-------------------------------------------------------------------------------
178
179Name CX2341X_ENC_SET_CORING_LEVELS
180Enum 159/0x9F
181Description
182 Assign Dynamic Noise Reduction median filter properties.
183Param[0]
184 Threshold above which the luminance median filter is enabled.
185 Default: 0, range 0:255
186Param[1]
187 Threshold below which the luminance median filter is enabled.
188 Default: 255, range 0:255
189Param[2]
190 Threshold above which the chrominance median filter is enabled.
191 Default: 0, range 0:255
192Param[3]
193 Threshold below which the chrominance median filter is enabled.
194 Default: 255, range 0:255
195
196-------------------------------------------------------------------------------
197
198Name CX2341X_ENC_SET_SPATIAL_FILTER_TYPE
199Enum 161/0xA1
200Description
201 Assign spatial prefilter parameters
202Param[0]
203 Luminance filter
204 0=Off
205 1=1D Horizontal
206 2=1D Vertical
207 3=2D H/V Separable (default)
208 4=2D Symmetric non-separable
209Param[1]
210 Chrominance filter
211 0=Off
212 1=1D Horizontal (default)
213
214-------------------------------------------------------------------------------
215
216Name CX2341X_ENC_SET_3_2_PULLDOWN
217Enum 177/0xB1
218Description
219 3:2 pulldown properties
220Param[0]
221 0=enabled
222 1=disabled
223
224-------------------------------------------------------------------------------
225
226Name CX2341X_ENC_SET_VBI_LINE
227Enum 183/0xB7
228Description
229 Selects VBI line number.
230Param[0]
231 Bits 0:4 line number
232 Bit 31 0=top_field, 1=bottom_field
233 Bits 0:31 all set specifies "all lines"
234Param[1]
235 VBI line information features: 0=disabled, 1=enabled
236Param[2]
237 Slicing: 0=None, 1=Closed Caption
238 Almost certainly not implemented. Set to 0.
239Param[3]
240 Luminance samples in this line.
241 Almost certainly not implemented. Set to 0.
242Param[4]
243 Chrominance samples in this line
244 Almost certainly not implemented. Set to 0.
245
246-------------------------------------------------------------------------------
247
248Name CX2341X_ENC_SET_STREAM_TYPE
249Enum 185/0xB9
250Description
251 Assign stream type
252 Note: Transport stream is not working in recent firmwares.
253 And in older firmwares the timestamps in the TS seem to be
254 unreliable.
255Param[0]
256 0=Program stream
257 1=Transport stream
258 2=MPEG1 stream
259 3=PES A/V stream
260 5=PES Video stream
261 7=PES Audio stream
262 10=DVD stream
263 11=VCD stream
264 12=SVCD stream
265 13=DVD_S1 stream
266 14=DVD_S2 stream
267
268-------------------------------------------------------------------------------
269
270Name CX2341X_ENC_SET_OUTPUT_PORT
271Enum 187/0xBB
272Description
273 Assign stream output port. Normally 0 when the data is copied through
274 the PCI bus (DMA), and 1 when the data is streamed to another chip
275 (pvrusb and cx88-blackbird).
276Param[0]
277 0=Memory (default)
278 1=Streaming
279 2=Serial
280Param[1]
281 Unknown, but leaving this to 0 seems to work best. Indications are that
282 this might have to do with USB support, although passing anything but 0
283 onl breaks things.
284
285-------------------------------------------------------------------------------
286
287Name CX2341X_ENC_SET_AUDIO_PROPERTIES
288Enum 189/0xBD
289Description
290 Set audio stream properties, may be called while encoding is in progress.
291 Note: all bitfields are consistent with ISO11172 documentation except
292 bits 2:3 which ISO docs define as:
293 '11' Layer I
294 '10' Layer II
295 '01' Layer III
296 '00' Undefined
297 This discrepancy may indicate a possible error in the documentation.
298 Testing indicated that only Layer II is actually working, and that
299 the minimum bitrate should be 192 kbps.
300Param[0]
301 Bitmask:
302 0:1 '00' 44.1Khz
303 '01' 48Khz
304 '10' 32Khz
305 '11' reserved
306
307 2:3 '01'=Layer I
308 '10'=Layer II
309
310 4:7 Bitrate:
311 Index | Layer I | Layer II
312 ------+-------------+------------
313 '0000' | free format | free format
314 '0001' | 32 kbit/s | 32 kbit/s
315 '0010' | 64 kbit/s | 48 kbit/s
316 '0011' | 96 kbit/s | 56 kbit/s
317 '0100' | 128 kbit/s | 64 kbit/s
318 '0101' | 160 kbit/s | 80 kbit/s
319 '0110' | 192 kbit/s | 96 kbit/s
320 '0111' | 224 kbit/s | 112 kbit/s
321 '1000' | 256 kbit/s | 128 kbit/s
322 '1001' | 288 kbit/s | 160 kbit/s
323 '1010' | 320 kbit/s | 192 kbit/s
324 '1011' | 352 kbit/s | 224 kbit/s
325 '1100' | 384 kbit/s | 256 kbit/s
326 '1101' | 416 kbit/s | 320 kbit/s
327 '1110' | 448 kbit/s | 384 kbit/s
328 Note: For Layer II, not all combinations of total bitrate
329 and mode are allowed. See ISO11172-3 3-Annex B, Table 3-B.2
330
331 8:9 '00'=Stereo
332 '01'=JointStereo
333 '10'=Dual
334 '11'=Mono
335 Note: testing seems to indicate that Mono and possibly
336 JointStereo are not working (default to stereo).
337 Dual does work, though.
338
339 10:11 Mode Extension used in joint_stereo mode.
340 In Layer I and II they indicate which subbands are in
341 intensity_stereo. All other subbands are coded in stereo.
342 '00' subbands 4-31 in intensity_stereo, bound==4
343 '01' subbands 8-31 in intensity_stereo, bound==8
344 '10' subbands 12-31 in intensity_stereo, bound==12
345 '11' subbands 16-31 in intensity_stereo, bound==16
346
347 12:13 Emphasis:
348 '00' None
349 '01' 50/15uS
350 '10' reserved
351 '11' CCITT J.17
352
353 14 CRC:
354 '0' off
355 '1' on
356
357 15 Copyright:
358 '0' off
359 '1' on
360
361 16 Generation:
362 '0' copy
363 '1' original
364
365-------------------------------------------------------------------------------
366
367Name CX2341X_ENC_HALT_FW
368Enum 195/0xC3
369Description
370 The firmware is halted and no further API calls are serviced until the
371 firmware is uploaded again.
372
373-------------------------------------------------------------------------------
374
375Name CX2341X_ENC_GET_VERSION
376Enum 196/0xC4
377Description
378 Returns the version of the encoder firmware.
379Result[0]
380 Version bitmask:
381 Bits 0:15 build
382 Bits 16:23 minor
383 Bits 24:31 major
384
385-------------------------------------------------------------------------------
386
387Name CX2341X_ENC_SET_GOP_CLOSURE
388Enum 197/0xC5
389Description
390 Assigns the GOP open/close property.
391Param[0]
392 0=Open
393 1=Closed
394
395-------------------------------------------------------------------------------
396
397Name CX2341X_ENC_GET_SEQ_END
398Enum 198/0xC6
399Description
400 Obtains the sequence end code of the encoder's buffer. When a capture
401 is started a number of interrupts are still generated, the last of
402 which will have Result[0] set to 1 and Result[1] will contain the size
403 of the buffer.
404Result[0]
405 State of the transfer (1 if last buffer)
406Result[1]
407 If Result[0] is 1, this contains the size of the last buffer, undefined
408 otherwise.
409
410-------------------------------------------------------------------------------
411
412Name CX2341X_ENC_SET_PGM_INDEX_INFO
413Enum 199/0xC7
414Description
415 Sets the Program Index Information.
416Param[0]
417 Picture Mask:
418 0=No index capture
419 1=I frames
420 3=I,P frames
421 7=I,P,B frames
422Param[1]
423 Elements requested (up to 400)
424Result[0]
425 Offset in SDF memory of the table.
426Result[1]
427 Number of allocated elements up to a maximum of Param[1]
428
429-------------------------------------------------------------------------------
430
431Name CX2341X_ENC_SET_VBI_CONFIG
432Enum 200/0xC8
433Description
434 Configure VBI settings
435Param[0]
436 Bitmap:
437 0 Mode '0' Sliced, '1' Raw
438 1:3 Insertion:
439 '000' insert in extension & user data
440 '001' insert in private packets
441 '010' separate stream and user data
442 '111' separate stream and private data
443 8:15 Stream ID (normally 0xBD)
444Param[1]
445 Frames per interrupt (max 8). Only valid in raw mode.
446Param[2]
447 Total raw VBI frames. Only valid in raw mode.
448Param[3]
449 Start codes
450Param[4]
451 Stop codes
452Param[5]
453 Lines per frame
454Param[6]
455 Byte per line
456Result[0]
457 Observed frames per interrupt in raw mode only. Rage 1 to Param[1]
458Result[1]
459 Observed number of frames in raw mode. Range 1 to Param[2]
460Result[2]
461 Memory offset to start or raw VBI data
462
463-------------------------------------------------------------------------------
464
465Name CX2341X_ENC_SET_DMA_BLOCK_SIZE
466Enum 201/0xC9
467Description
468 Set DMA transfer block size
469Param[0]
470 DMA transfer block size in bytes or frames. When unit is bytes,
471 supported block sizes are 2^7, 2^8 and 2^9 bytes.
472Param[1]
473 Unit: 0=bytes, 1=frames
474
475-------------------------------------------------------------------------------
476
477Name CX2341X_ENC_GET_PREV_DMA_INFO_MB_10
478Enum 202/0xCA
479Description
480 Returns information on the previous DMA transfer in conjunction with
481 bit 27 of the interrupt mask. Uses mailbox 10.
482Result[0]
483 Type of stream
484Result[1]
485 Address Offset
486Result[2]
487 Maximum size of transfer
488
489-------------------------------------------------------------------------------
490
491Name CX2341X_ENC_GET_PREV_DMA_INFO_MB_9
492Enum 203/0xCB
493Description
494 Returns information on the previous DMA transfer in conjunction with
495 bit 27 of the interrupt mask. Uses mailbox 9.
496Result[0]
497 Status bits:
498 Bit 0 set indicates transfer complete
499 Bit 2 set indicates transfer error
500 Bit 4 set indicates linked list error
501Result[1]
502 DMA type
503Result[2]
504 Presentation Time Stamp bits 0..31
505Result[3]
506 Presentation Time Stamp bit 32
507
508-------------------------------------------------------------------------------
509
510Name CX2341X_ENC_SCHED_DMA_TO_HOST
511Enum 204/0xCC
512Description
513 Setup DMA to host operation
514Param[0]
515 Memory address of link list
516Param[1]
517 Length of link list (wtf: what units ???)
518Param[2]
519 DMA type (0=MPEG)
520
521-------------------------------------------------------------------------------
522
523Name CX2341X_ENC_INITIALIZE_INPUT
524Enum 205/0xCD
525Description
526 Initializes the video input
527
528-------------------------------------------------------------------------------
529
530Name CX2341X_ENC_SET_FRAME_DROP_RATE
531Enum 208/0xD0
532Description
533 For each frame captured, skip specified number of frames.
534Param[0]
535 Number of frames to skip
536
537-------------------------------------------------------------------------------
538
539Name CX2341X_ENC_PAUSE_ENCODER
540Enum 210/0xD2
541Description
542 During a pause condition, all frames are dropped instead of being encoded.
543Param[0]
544 0=Pause encoding
545 1=Continue encoding
546
547-------------------------------------------------------------------------------
548
549Name CX2341X_ENC_REFRESH_INPUT
550Enum 211/0xD3
551Description
552 Refreshes the video input
553
554-------------------------------------------------------------------------------
555
556Name CX2341X_ENC_SET_COPYRIGHT
557Enum 212/0xD4
558Description
559 Sets stream copyright property
560Param[0]
561 0=Stream is not copyrighted
562 1=Stream is copyrighted
563
564-------------------------------------------------------------------------------
565
566Name CX2341X_ENC_SET_EVENT_NOTIFICATION
567Enum 213/0xD5
568Description
569 Setup firmware to notify the host about a particular event. Host must
570 unmask the interrupt bit.
571Param[0]
572 Event (0=refresh encoder input)
573Param[1]
574 Notification 0=disabled 1=enabled
575Param[2]
576 Interrupt bit
577Param[3]
578 Mailbox slot, -1 if no mailbox required.
579
580-------------------------------------------------------------------------------
581
582Name CX2341X_ENC_SET_NUM_VSYNC_LINES
583Enum 214/0xD6
584Description
585 Depending on the analog video decoder used, this assigns the number
586 of lines for field 1 and 2.
587Param[0]
588 Field 1 number of lines:
589 0x00EF for SAA7114
590 0x00F0 for SAA7115
591 0x0105 for Micronas
592Param[1]
593 Field 2 number of lines:
594 0x00EF for SAA7114
595 0x00F0 for SAA7115
596 0x0106 for Micronas
597
598-------------------------------------------------------------------------------
599
600Name CX2341X_ENC_SET_PLACEHOLDER
601Enum 215/0xD7
602Description
603 Provides a mechanism of inserting custom user data in the MPEG stream.
604Param[0]
605 0=extension & user data
606 1=private packet with stream ID 0xBD
607Param[1]
608 Rate at which to insert data, in units of frames (for private packet)
609 or GOPs (for ext. & user data)
610Param[2]
611 Number of data DWORDs (below) to insert
612Param[3]
613 Custom data 0
614Param[4]
615 Custom data 1
616Param[5]
617 Custom data 2
618Param[6]
619 Custom data 3
620Param[7]
621 Custom data 4
622Param[8]
623 Custom data 5
624Param[9]
625 Custom data 6
626Param[10]
627 Custom data 7
628Param[11]
629 Custom data 8
630
631-------------------------------------------------------------------------------
632
633Name CX2341X_ENC_MUTE_VIDEO
634Enum 217/0xD9
635Description
636 Video muting
637Param[0]
638 Bit usage:
639 0 '0'=video not muted
640 '1'=video muted, creates frames with the YUV color defined below
641 1:7 Unused
642 8:15 V chrominance information
643 16:23 U chrominance information
644 24:31 Y luminance information
645
646-------------------------------------------------------------------------------
647
648Name CX2341X_ENC_MUTE_AUDIO
649Enum 218/0xDA
650Description
651 Audio muting
652Param[0]
653 0=audio not muted
654 1=audio muted (produces silent mpeg audio stream)
655
656-------------------------------------------------------------------------------
657
658Name CX2341X_ENC_UNKNOWN
659Enum 219/0xDB
660Description
661 Unknown API, it's used by Hauppauge though.
662Param[0]
663 0 This is the value Hauppauge uses, Unknown what it means.
664
665-------------------------------------------------------------------------------
666
667Name CX2341X_ENC_MISC
668Enum 220/0xDC
669Description
670 Miscellaneous actions. Not known for 100% what it does. It's really a
671 sort of ioctl call. The first parameter is a command number, the second
672 the value.
673Param[0]
674 Command number:
675 1=set initial SCR value when starting encoding.
676 2=set quality mode (apparently some test setting).
677 3=setup advanced VIM protection handling (supposedly only for the cx23416
678 for raw YUV).
679 Actually it looks like this should be 0 for saa7114/5 based card and 1
680 for cx25840 based cards.
681 4=generate artificial PTS timestamps
682 5=USB flush mode
683 6=something to do with the quantization matrix
684 7=set navigation pack insertion for DVD
685 8=enable scene change detection (seems to be a failure)
686 9=set history parameters of the video input module
687 10=set input field order of VIM
688 11=set quantization matrix
689 12=reset audio interface
690 13=set audio volume delay
691 14=set audio delay
692
693Param[1]
694 Command value.
diff --git a/Documentation/video4linux/cx2341x/fw-memory.txt b/Documentation/video4linux/cx2341x/fw-memory.txt
new file mode 100644
index 000000000000..ef0aad3f88fc
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-memory.txt
@@ -0,0 +1,141 @@
1This document describes the cx2341x memory map and documents some of the register
2space.
3
4Warning! This information was figured out from searching through the memory and
5registers, this information may not be correct and is certainly not complete, and
6was not derived from anything more than searching through the memory space with
7commands like:
8
9 ivtvctl -O min=0x02000000,max=0x020000ff
10
11So take this as is, I'm always searching for more stuff, it's a large
12register space :-).
13
14Memory Map
15==========
16
17The cx2341x exposes its entire 64M memory space to the PCI host via the PCI BAR0
18(Base Address Register 0). The addresses here are offsets relative to the
19address held in BAR0.
20
210x00000000-0x00ffffff Encoder memory space
220x00000000-0x0003ffff Encode.rom
23 ???-??? MPEG buffer(s)
24 ???-??? Raw video capture buffer(s)
25 ???-??? Raw audio capture buffer(s)
26 ???-??? Display buffers (6 or 9)
27
280x01000000-0x01ffffff Decoder memory space
290x01000000-0x0103ffff Decode.rom
30 ???-??? MPEG buffers(s)
310x0114b000-0x0115afff Audio.rom (deprecated?)
32
330x02000000-0x0200ffff Register Space
34
35Registers
36=========
37
38The registers occupy the 64k space starting at the 0x02000000 offset from BAR0.
39All of these registers are 32 bits wide.
40
41DMA Registers 0x000-0xff:
42
43 0x00 - Control:
44 0=reset/cancel, 1=read, 2=write, 4=stop
45 0x04 - DMA status:
46 1=read busy, 2=write busy, 4=read error, 8=write error, 16=link list error
47 0x08 - pci DMA pointer for read link list
48 0x0c - pci DMA pointer for write link list
49 0x10 - read/write DMA enable:
50 1=read enable, 2=write enable
51 0x14 - always 0xffffffff, if set any lower instability occurs, 0x00 crashes
52 0x18 - ??
53 0x1c - always 0x20 or 32, smaller values slow down DMA transactions
54 0x20 - always value of 0x780a010a
55 0x24-0x3c - usually just random values???
56 0x40 - Interrupt status
57 0x44 - Write a bit here and shows up in Interrupt status 0x40
58 0x48 - Interrupt Mask
59 0x4C - always value of 0xfffdffff,
60 if changed to 0xffffffff DMA write interrupts break.
61 0x50 - always 0xffffffff
62 0x54 - always 0xffffffff (0x4c, 0x50, 0x54 seem like interrupt masks, are
63 3 processors on chip, Java ones, VPU, SPU, APU, maybe these are the
64 interrupt masks???).
65 0x60-0x7C - random values
66 0x80 - first write linked list reg, for Encoder Memory addr
67 0x84 - first write linked list reg, for pci memory addr
68 0x88 - first write linked list reg, for length of buffer in memory addr
69 (|0x80000000 or this for last link)
70 0x8c-0xcc - rest of write linked list reg, 8 sets of 3 total, DMA goes here
71 from linked list addr in reg 0x0c, firmware must push through or
72 something.
73 0xe0 - first (and only) read linked list reg, for pci memory addr
74 0xe4 - first (and only) read linked list reg, for Decoder memory addr
75 0xe8 - first (and only) read linked list reg, for length of buffer
76 0xec-0xff - Nothing seems to be in these registers, 0xec-f4 are 0x00000000.
77
78Memory locations for Encoder Buffers 0x700-0x7ff:
79
80These registers show offsets of memory locations pertaining to each
81buffer area used for encoding, have to shift them by <<1 first.
82
830x07F8: Encoder SDRAM refresh
840x07FC: Encoder SDRAM pre-charge
85
86Memory locations for Decoder Buffers 0x800-0x8ff:
87
88These registers show offsets of memory locations pertaining to each
89buffer area used for decoding, have to shift them by <<1 first.
90
910x08F8: Decoder SDRAM refresh
920x08FC: Decoder SDRAM pre-charge
93
94Other memory locations:
95
960x2800: Video Display Module control
970x2D00: AO (audio output?) control
980x2D24: Bytes Flushed
990x7000: LSB I2C write clock bit (inverted)
1000x7004: LSB I2C write data bit (inverted)
1010x7008: LSB I2C read clock bit
1020x700c: LSB I2C read data bit
1030x9008: GPIO get input state
1040x900c: GPIO set output state
1050x9020: GPIO direction (Bit7 (GPIO 0..7) - 0:input, 1:output)
1060x9050: SPU control
1070x9054: Reset HW blocks
1080x9058: VPU control
1090xA018: Bit6: interrupt pending?
1100xA064: APU command
111
112
113Interrupt Status Register
114=========================
115
116The definition of the bits in the interrupt status register 0x0040, and the
117interrupt mask 0x0048. If a bit is cleared in the mask, then we want our ISR to
118execute.
119
120Bit
12131 Encoder Start Capture
12230 Encoder EOS
12329 Encoder VBI capture
12428 Encoder Video Input Module reset event
12527 Encoder DMA complete
12626
12725 Decoder copy protect detection event
12824 Decoder audio mode change detection event
12923
13022 Decoder data request
13121 Decoder I-Frame? done
13220 Decoder DMA complete
13319 Decoder VBI re-insertion
13418 Decoder DMA err (linked-list bad)
135
136Missing
137Encoder API call completed
138Decoder API call completed
139Encoder API post(?)
140Decoder API post(?)
141Decoder VTRACE event
diff --git a/Documentation/video4linux/cx2341x/fw-osd-api.txt b/Documentation/video4linux/cx2341x/fw-osd-api.txt
new file mode 100644
index 000000000000..da98ae30a37a
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-osd-api.txt
@@ -0,0 +1,342 @@
1OSD firmware API description
2============================
3
4Note: this API is part of the decoder firmware, so it's cx23415 only.
5
6-------------------------------------------------------------------------------
7
8Name CX2341X_OSD_GET_FRAMEBUFFER
9Enum 65/0x41
10Description
11 Return base and length of contiguous OSD memory.
12Result[0]
13 OSD base address
14Result[1]
15 OSD length
16
17-------------------------------------------------------------------------------
18
19Name CX2341X_OSD_GET_PIXEL_FORMAT
20Enum 66/0x42
21Description
22 Query OSD format
23Result[0]
24 0=8bit index, 4=AlphaRGB 8:8:8:8
25
26-------------------------------------------------------------------------------
27
28Name CX2341X_OSD_SET_PIXEL_FORMAT
29Enum 67/0x43
30Description
31 Assign pixel format
32Param[0]
33 0=8bit index, 4=AlphaRGB 8:8:8:8
34
35-------------------------------------------------------------------------------
36
37Name CX2341X_OSD_GET_STATE
38Enum 68/0x44
39Description
40 Query OSD state
41Result[0]
42 Bit 0 0=off, 1=on
43 Bits 1:2 alpha control
44 Bits 3:5 pixel format
45
46-------------------------------------------------------------------------------
47
48Name CX2341X_OSD_SET_STATE
49Enum 69/0x45
50Description
51 OSD switch
52Param[0]
53 0=off, 1=on
54
55-------------------------------------------------------------------------------
56
57Name CX2341X_OSD_GET_OSD_COORDS
58Enum 70/0x46
59Description
60 Retrieve coordinates of OSD area blended with video
61Result[0]
62 OSD buffer address
63Result[1]
64 Stride in pixels
65Result[2]
66 Lines in OSD buffer
67Result[3]
68 Horizontal offset in buffer
69Result[4]
70 Vertical offset in buffer
71
72-------------------------------------------------------------------------------
73
74Name CX2341X_OSD_SET_OSD_COORDS
75Enum 71/0x47
76Description
77 Assign the coordinates of the OSD area to blend with video
78Param[0]
79 buffer address
80Param[1]
81 buffer stride in pixels
82Param[2]
83 lines in buffer
84Param[3]
85 horizontal offset
86Param[4]
87 vertical offset
88
89-------------------------------------------------------------------------------
90
91Name CX2341X_OSD_GET_SCREEN_COORDS
92Enum 72/0x48
93Description
94 Retrieve OSD screen area coordinates
95Result[0]
96 top left horizontal offset
97Result[1]
98 top left vertical offset
99Result[2]
100 bottom right hotizontal offset
101Result[3]
102 bottom right vertical offset
103
104-------------------------------------------------------------------------------
105
106Name CX2341X_OSD_SET_SCREEN_COORDS
107Enum 73/0x49
108Description
109 Assign the coordinates of the screen area to blend with video
110Param[0]
111 top left horizontal offset
112Param[1]
113 top left vertical offset
114Param[2]
115 bottom left horizontal offset
116Param[3]
117 bottom left vertical offset
118
119-------------------------------------------------------------------------------
120
121Name CX2341X_OSD_GET_GLOBAL_ALPHA
122Enum 74/0x4A
123Description
124 Retrieve OSD global alpha
125Result[0]
126 global alpha: 0=off, 1=on
127Result[1]
128 bits 0:7 global alpha
129
130-------------------------------------------------------------------------------
131
132Name CX2341X_OSD_SET_GLOBAL_ALPHA
133Enum 75/0x4B
134Description
135 Update global alpha
136Param[0]
137 global alpha: 0=off, 1=on
138Param[1]
139 global alpha (8 bits)
140Param[2]
141 local alpha: 0=on, 1=off
142
143-------------------------------------------------------------------------------
144
145Name CX2341X_OSD_SET_BLEND_COORDS
146Enum 78/0x4C
147Description
148 Move start of blending area within display buffer
149Param[0]
150 horizontal offset in buffer
151Param[1]
152 vertical offset in buffer
153
154-------------------------------------------------------------------------------
155
156Name CX2341X_OSD_GET_FLICKER_STATE
157Enum 79/0x4F
158Description
159 Retrieve flicker reduction module state
160Result[0]
161 flicker state: 0=off, 1=on
162
163-------------------------------------------------------------------------------
164
165Name CX2341X_OSD_SET_FLICKER_STATE
166Enum 80/0x50
167Description
168 Set flicker reduction module state
169Param[0]
170 State: 0=off, 1=on
171
172-------------------------------------------------------------------------------
173
174Name CX2341X_OSD_BLT_COPY
175Enum 82/0x52
176Description
177 BLT copy
178Param[0]
179'0000' zero
180'0001' ~destination AND ~source
181'0010' ~destination AND source
182'0011' ~destination
183'0100' destination AND ~source
184'0101' ~source
185'0110' destination XOR source
186'0111' ~destination OR ~source
187'1000' ~destination AND ~source
188'1001' destination XNOR source
189'1010' source
190'1011' ~destination OR source
191'1100' destination
192'1101' destination OR ~source
193'1110' destination OR source
194'1111' one
195
196Param[1]
197 Resulting alpha blending
198 '01' source_alpha
199 '10' destination_alpha
200 '11' source_alpha*destination_alpha+1
201 (zero if both source and destination alpha are zero)
202Param[2]
203 '00' output_pixel = source_pixel
204
205 '01' if source_alpha=0:
206 output_pixel = destination_pixel
207 if 256 > source_alpha > 1:
208 output_pixel = ((source_alpha + 1)*source_pixel +
209 (255 - source_alpha)*destination_pixel)/256
210
211 '10' if destination_alpha=0:
212 output_pixel = source_pixel
213 if 255 > destination_alpha > 0:
214 output_pixel = ((255 - destination_alpha)*source_pixel +
215 (destination_alpha + 1)*destination_pixel)/256
216
217 '11' if source_alpha=0:
218 source_temp = 0
219 if source_alpha=255:
220 source_temp = source_pixel*256
221 if 255 > source_alpha > 0:
222 source_temp = source_pixel*(source_alpha + 1)
223 if destination_alpha=0:
224 destination_temp = 0
225 if destination_alpha=255:
226 destination_temp = destination_pixel*256
227 if 255 > destination_alpha > 0:
228 destination_temp = destination_pixel*(destination_alpha + 1)
229 output_pixel = (source_temp + destination_temp)/256
230Param[3]
231 width
232Param[4]
233 height
234Param[5]
235 destination pixel mask
236Param[6]
237 destination rectangle start address
238Param[7]
239 destination stride in dwords
240Param[8]
241 source stride in dwords
242Param[9]
243 source rectangle start address
244
245-------------------------------------------------------------------------------
246
247Name CX2341X_OSD_BLT_FILL
248Enum 83/0x53
249Description
250 BLT fill color
251Param[0]
252 Same as Param[0] on API 0x52
253Param[1]
254 Same as Param[1] on API 0x52
255Param[2]
256 Same as Param[2] on API 0x52
257Param[3]
258 width
259Param[4]
260 height
261Param[5]
262 destination pixel mask
263Param[6]
264 destination rectangle start address
265Param[7]
266 destination stride in dwords
267Param[8]
268 color fill value
269
270-------------------------------------------------------------------------------
271
272Name CX2341X_OSD_BLT_TEXT
273Enum 84/0x54
274Description
275 BLT for 8 bit alpha text source
276Param[0]
277 Same as Param[0] on API 0x52
278Param[1]
279 Same as Param[1] on API 0x52
280Param[2]
281 Same as Param[2] on API 0x52
282Param[3]
283 width
284Param[4]
285 height
286Param[5]
287 destination pixel mask
288Param[6]
289 destination rectangle start address
290Param[7]
291 destination stride in dwords
292Param[8]
293 source stride in dwords
294Param[9]
295 source rectangle start address
296Param[10]
297 color fill value
298
299-------------------------------------------------------------------------------
300
301Name CX2341X_OSD_SET_FRAMEBUFFER_WINDOW
302Enum 86/0x56
303Description
304 Positions the main output window on the screen. The coordinates must be
305 such that the entire window fits on the screen.
306Param[0]
307 window width
308Param[1]
309 window height
310Param[2]
311 top left window corner horizontal offset
312Param[3]
313 top left window corner vertical offset
314
315-------------------------------------------------------------------------------
316
317Name CX2341X_OSD_SET_CHROMA_KEY
318Enum 96/0x60
319Description
320 Chroma key switch and color
321Param[0]
322 state: 0=off, 1=on
323Param[1]
324 color
325
326-------------------------------------------------------------------------------
327
328Name CX2341X_OSD_GET_ALPHA_CONTENT_INDEX
329Enum 97/0x61
330Description
331 Retrieve alpha content index
332Result[0]
333 alpha content index, Range 0:15
334
335-------------------------------------------------------------------------------
336
337Name CX2341X_OSD_SET_ALPHA_CONTENT_INDEX
338Enum 98/0x62
339Description
340 Assign alpha content index
341Param[0]
342 alpha content index, range 0:15
diff --git a/Documentation/video4linux/cx2341x/fw-upload.txt b/Documentation/video4linux/cx2341x/fw-upload.txt
new file mode 100644
index 000000000000..60c502ce3215
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-upload.txt
@@ -0,0 +1,49 @@
1This document describes how to upload the cx2341x firmware to the card.
2
3How to find
4===========
5
6See the web pages of the various projects that uses this chip for information
7on how to obtain the firmware.
8
9The firmware stored in a Windows driver can be detected as follows:
10
11- Each firmware image is 256k bytes.
12- The 1st 32-bit word of the Encoder image is 0x0000da7
13- The 1st 32-bit word of the Decoder image is 0x00003a7
14- The 2nd 32-bit word of both images is 0xaa55bb66
15
16How to load
17===========
18
19- Issue the FWapi command to stop the encoder if it is running. Wait for the
20 command to complete.
21- Issue the FWapi command to stop the decoder if it is running. Wait for the
22 command to complete.
23- Issue the I2C command to the digitizer to stop emitting VSYNC events.
24- Issue the FWapi command to halt the encoder's firmware.
25- Sleep for 10ms.
26- Issue the FWapi command to halt the decoder's firmware.
27- Sleep for 10ms.
28- Write 0x00000000 to register 0x2800 to stop the Video Display Module.
29- Write 0x00000005 to register 0x2D00 to stop the AO (audio output?).
30- Write 0x00000000 to register 0xA064 to ping? the APU.
31- Write 0xFFFFFFFE to register 0x9058 to stop the VPU.
32- Write 0xFFFFFFFF to register 0x9054 to reset the HW blocks.
33- Write 0x00000001 to register 0x9050 to stop the SPU.
34- Sleep for 10ms.
35- Write 0x0000001A to register 0x07FC to init the Encoder SDRAM's pre-charge.
36- Write 0x80000640 to register 0x07F8 to init the Encoder SDRAM's refresh to 1us.
37- Write 0x0000001A to register 0x08FC to init the Decoder SDRAM's pre-charge.
38- Write 0x80000640 to register 0x08F8 to init the Decoder SDRAM's refresh to 1us.
39- Sleep for 512ms. (600ms is recommended)
40- Transfer the encoder's firmware image to offset 0 in Encoder memory space.
41- Transfer the decoder's firmware image to offset 0 in Decoder memory space.
42- Use a read-modify-write operation to Clear bit 0 of register 0x9050 to
43 re-enable the SPU.
44- Sleep for 1 second.
45- Use a read-modify-write operation to Clear bits 3 and 0 of register 0x9058
46 to re-enable the VPU.
47- Sleep for 1 second.
48- Issue status API commands to both firmware images to verify.
49
diff --git a/Documentation/video4linux/cx88/hauppauge-wintv-cx88-ir.txt b/Documentation/video4linux/cx88/hauppauge-wintv-cx88-ir.txt
new file mode 100644
index 000000000000..93fec32a1188
--- /dev/null
+++ b/Documentation/video4linux/cx88/hauppauge-wintv-cx88-ir.txt
@@ -0,0 +1,54 @@
1The controls for the mux are GPIO [0,1] for source, and GPIO 2 for muting.
2
3GPIO0 GPIO1
4 0 0 TV Audio
5 1 0 FM radio
6 0 1 Line-In
7 1 1 Mono tuner bypass or CD passthru (tuner specific)
8
9GPIO 16(i believe) is tied to the IR port (if present).
10
11------------------------------------------------------------------------------------
12
13>From the data sheet:
14 Register 24'h20004 PCI Interrupt Status
15 bit [18] IR_SMP_INT Set when 32 input samples have been collected over
16 gpio[16] pin into GP_SAMPLE register.
17
18What's missing from the data sheet:
19
20Setup 4KHz sampling rate (roughly 2x oversampled; good enough for our RC5
21compat remote)
22set register 0x35C050 to 0xa80a80
23
24enable sampling
25set register 0x35C054 to 0x5
26
27Of course, enable the IRQ bit 18 in the interrupt mask register .(and
28provide for a handler)
29
30GP_SAMPLE register is at 0x35C058
31
32Bits are then right shifted into the GP_SAMPLE register at the specified
33rate; you get an interrupt when a full DWORD is recieved.
34You need to recover the actual RC5 bits out of the (oversampled) IR sensor
35bits. (Hint: look for the 0/1and 1/0 crossings of the RC5 bi-phase data) An
36actual raw RC5 code will span 2-3 DWORDS, depending on the actual alignment.
37
38I'm pretty sure when no IR signal is present the receiver is always in a
39marking state(1); but stray light, etc can cause intermittent noise values
40as well. Remember, this is a free running sample of the IR receiver state
41over time, so don't assume any sample starts at any particular place.
42
43http://www.atmel.com/dyn/resources/prod_documents/doc2817.pdf
44This data sheet (google search) seems to have a lovely description of the
45RC5 basics
46
47http://users.pandora.be/nenya/electronics/rc5/ and more data
48
49http://www.ee.washington.edu/circuit_archive/text/ir_decode.txt
50and even a reference to how to decode a bi-phase data stream.
51
52http://www.xs4all.nl/~sbp/knowledge/ir/rc5.htm
53still more info
54
diff --git a/Documentation/video4linux/et61x251.txt b/Documentation/video4linux/et61x251.txt
index 29340282ab5f..cd584f20a997 100644
--- a/Documentation/video4linux/et61x251.txt
+++ b/Documentation/video4linux/et61x251.txt
@@ -1,9 +1,9 @@
1 1
2 ET61X[12]51 PC Camera Controllers 2 ET61X[12]51 PC Camera Controllers
3 Driver for Linux 3 Driver for Linux
4 ================================= 4 =================================
5 5
6 - Documentation - 6 - Documentation -
7 7
8 8
9Index 9Index
@@ -156,46 +156,46 @@ Name: video_nr
156Type: short array (min = 0, max = 64) 156Type: short array (min = 0, max = 64)
157Syntax: <-1|n[,...]> 157Syntax: <-1|n[,...]>
158Description: Specify V4L2 minor mode number: 158Description: Specify V4L2 minor mode number:
159 -1 = use next available 159 -1 = use next available
160 n = use minor number n 160 n = use minor number n
161 You can specify up to 64 cameras this way. 161 You can specify up to 64 cameras this way.
162 For example: 162 For example:
163 video_nr=-1,2,-1 would assign minor number 2 to the second 163 video_nr=-1,2,-1 would assign minor number 2 to the second
164 registered camera and use auto for the first one and for every 164 registered camera and use auto for the first one and for every
165 other camera. 165 other camera.
166Default: -1 166Default: -1
167------------------------------------------------------------------------------- 167-------------------------------------------------------------------------------
168Name: force_munmap 168Name: force_munmap
169Type: bool array (min = 0, max = 64) 169Type: bool array (min = 0, max = 64)
170Syntax: <0|1[,...]> 170Syntax: <0|1[,...]>
171Description: Force the application to unmap previously mapped buffer memory 171Description: Force the application to unmap previously mapped buffer memory
172 before calling any VIDIOC_S_CROP or VIDIOC_S_FMT ioctl's. Not 172 before calling any VIDIOC_S_CROP or VIDIOC_S_FMT ioctl's. Not
173 all the applications support this feature. This parameter is 173 all the applications support this feature. This parameter is
174 specific for each detected camera. 174 specific for each detected camera.
175 0 = do not force memory unmapping 175 0 = do not force memory unmapping
176 1 = force memory unmapping (save memory) 176 1 = force memory unmapping (save memory)
177Default: 0 177Default: 0
178------------------------------------------------------------------------------- 178-------------------------------------------------------------------------------
179Name: frame_timeout 179Name: frame_timeout
180Type: uint array (min = 0, max = 64) 180Type: uint array (min = 0, max = 64)
181Syntax: <n[,...]> 181Syntax: <n[,...]>
182Description: Timeout for a video frame in seconds. This parameter is 182Description: Timeout for a video frame in seconds. This parameter is
183 specific for each detected camera. This parameter can be 183 specific for each detected camera. This parameter can be
184 changed at runtime thanks to the /sys filesystem interface. 184 changed at runtime thanks to the /sys filesystem interface.
185Default: 2 185Default: 2
186------------------------------------------------------------------------------- 186-------------------------------------------------------------------------------
187Name: debug 187Name: debug
188Type: ushort 188Type: ushort
189Syntax: <n> 189Syntax: <n>
190Description: Debugging information level, from 0 to 3: 190Description: Debugging information level, from 0 to 3:
191 0 = none (use carefully) 191 0 = none (use carefully)
192 1 = critical errors 192 1 = critical errors
193 2 = significant informations 193 2 = significant informations
194 3 = more verbose messages 194 3 = more verbose messages
195 Level 3 is useful for testing only, when only one device 195 Level 3 is useful for testing only, when only one device
196 is used at the same time. It also shows some more informations 196 is used at the same time. It also shows some more informations
197 about the hardware being detected. This module parameter can be 197 about the hardware being detected. This module parameter can be
198 changed at runtime thanks to the /sys filesystem interface. 198 changed at runtime thanks to the /sys filesystem interface.
199Default: 2 199Default: 2
200------------------------------------------------------------------------------- 200-------------------------------------------------------------------------------
201 201
diff --git a/Documentation/video4linux/ibmcam.txt b/Documentation/video4linux/ibmcam.txt
index 4a40a2e99451..397a94eb77b8 100644
--- a/Documentation/video4linux/ibmcam.txt
+++ b/Documentation/video4linux/ibmcam.txt
@@ -21,7 +21,7 @@ Internal interface: Video For Linux (V4L)
21Supported controls: 21Supported controls:
22- by V4L: Contrast, Brightness, Color, Hue 22- by V4L: Contrast, Brightness, Color, Hue
23- by driver options: frame rate, lighting conditions, video format, 23- by driver options: frame rate, lighting conditions, video format,
24 default picture settings, sharpness. 24 default picture settings, sharpness.
25 25
26SUPPORTED CAMERAS: 26SUPPORTED CAMERAS:
27 27
@@ -191,66 +191,66 @@ init_model2_sat Integer 0..255 [0x34] init_model2_sat=65
191init_model2_yb Integer 0..255 [0xa0] init_model2_yb=200 191init_model2_yb Integer 0..255 [0xa0] init_model2_yb=200
192 192
193debug You don't need this option unless you are a developer. 193debug You don't need this option unless you are a developer.
194 If you are a developer then you will see in the code 194 If you are a developer then you will see in the code
195 what values do what. 0=off. 195 what values do what. 0=off.
196 196
197flags This is a bit mask, and you can combine any number of 197flags This is a bit mask, and you can combine any number of
198 bits to produce what you want. Usually you don't want 198 bits to produce what you want. Usually you don't want
199 any of extra features this option provides: 199 any of extra features this option provides:
200 200
201 FLAGS_RETRY_VIDIOCSYNC 1 This bit allows to retry failed 201 FLAGS_RETRY_VIDIOCSYNC 1 This bit allows to retry failed
202 VIDIOCSYNC ioctls without failing. 202 VIDIOCSYNC ioctls without failing.
203 Will work with xawtv, will not 203 Will work with xawtv, will not
204 with xrealproducer. Default is 204 with xrealproducer. Default is
205 not set. 205 not set.
206 FLAGS_MONOCHROME 2 Activates monochrome (b/w) mode. 206 FLAGS_MONOCHROME 2 Activates monochrome (b/w) mode.
207 FLAGS_DISPLAY_HINTS 4 Shows colored pixels which have 207 FLAGS_DISPLAY_HINTS 4 Shows colored pixels which have
208 magic meaning to developers. 208 magic meaning to developers.
209 FLAGS_OVERLAY_STATS 8 Shows tiny numbers on screen, 209 FLAGS_OVERLAY_STATS 8 Shows tiny numbers on screen,
210 useful only for debugging. 210 useful only for debugging.
211 FLAGS_FORCE_TESTPATTERN 16 Shows blue screen with numbers. 211 FLAGS_FORCE_TESTPATTERN 16 Shows blue screen with numbers.
212 FLAGS_SEPARATE_FRAMES 32 Shows each frame separately, as 212 FLAGS_SEPARATE_FRAMES 32 Shows each frame separately, as
213 it was received from the camera. 213 it was received from the camera.
214 Default (not set) is to mix the 214 Default (not set) is to mix the
215 preceding frame in to compensate 215 preceding frame in to compensate
216 for occasional loss of Isoc data 216 for occasional loss of Isoc data
217 on high frame rates. 217 on high frame rates.
218 FLAGS_CLEAN_FRAMES 64 Forces "cleanup" of each frame 218 FLAGS_CLEAN_FRAMES 64 Forces "cleanup" of each frame
219 prior to use; relevant only if 219 prior to use; relevant only if
220 FLAGS_SEPARATE_FRAMES is set. 220 FLAGS_SEPARATE_FRAMES is set.
221 Default is not to clean frames, 221 Default is not to clean frames,
222 this is a little faster but may 222 this is a little faster but may
223 produce flicker if frame rate is 223 produce flicker if frame rate is
224 too high and Isoc data gets lost. 224 too high and Isoc data gets lost.
225 FLAGS_NO_DECODING 128 This flag turns the video stream 225 FLAGS_NO_DECODING 128 This flag turns the video stream
226 decoder off, and dumps the raw 226 decoder off, and dumps the raw
227 Isoc data from the camera into 227 Isoc data from the camera into
228 the reading process. Useful to 228 the reading process. Useful to
229 developers, but not to users. 229 developers, but not to users.
230 230
231framerate This setting controls frame rate of the camera. This is 231framerate This setting controls frame rate of the camera. This is
232 an approximate setting (in terms of "worst" ... "best") 232 an approximate setting (in terms of "worst" ... "best")
233 because camera changes frame rate depending on amount 233 because camera changes frame rate depending on amount
234 of light available. Setting 0 is slowest, 6 is fastest. 234 of light available. Setting 0 is slowest, 6 is fastest.
235 Beware - fast settings are very demanding and may not 235 Beware - fast settings are very demanding and may not
236 work well with all video sizes. Be conservative. 236 work well with all video sizes. Be conservative.
237 237
238hue_correction This highly optional setting allows to adjust the 238hue_correction This highly optional setting allows to adjust the
239 hue of the image in a way slightly different from 239 hue of the image in a way slightly different from
240 what usual "hue" control does. Both controls affect 240 what usual "hue" control does. Both controls affect
241 YUV colorspace: regular "hue" control adjusts only 241 YUV colorspace: regular "hue" control adjusts only
242 U component, and this "hue_correction" option similarly 242 U component, and this "hue_correction" option similarly
243 adjusts only V component. However usually it is enough 243 adjusts only V component. However usually it is enough
244 to tweak only U or V to compensate for colored light or 244 to tweak only U or V to compensate for colored light or
245 color temperature; this option simply allows more 245 color temperature; this option simply allows more
246 complicated correction when and if it is necessary. 246 complicated correction when and if it is necessary.
247 247
248init_brightness These settings specify _initial_ values which will be 248init_brightness These settings specify _initial_ values which will be
249init_contrast used to set up the camera. If your V4L application has 249init_contrast used to set up the camera. If your V4L application has
250init_color its own controls to adjust the picture then these 250init_color its own controls to adjust the picture then these
251init_hue controls will be used too. These options allow you to 251init_hue controls will be used too. These options allow you to
252 preconfigure the camera when it gets connected, before 252 preconfigure the camera when it gets connected, before
253 any V4L application connects to it. Good for webcams. 253 any V4L application connects to it. Good for webcams.
254 254
255init_model2_rg These initial settings alter color balance of the 255init_model2_rg These initial settings alter color balance of the
256init_model2_rg2 camera on hardware level. All four settings may be used 256init_model2_rg2 camera on hardware level. All four settings may be used
@@ -258,47 +258,47 @@ init_model2_sat to tune the camera to specific lighting conditions. These
258init_model2_yb settings only apply to Model 2 cameras. 258init_model2_yb settings only apply to Model 2 cameras.
259 259
260lighting This option selects one of three hardware-defined 260lighting This option selects one of three hardware-defined
261 photosensitivity settings of the camera. 0=bright light, 261 photosensitivity settings of the camera. 0=bright light,
262 1=Medium (default), 2=Low light. This setting affects 262 1=Medium (default), 2=Low light. This setting affects
263 frame rate: the dimmer the lighting the lower the frame 263 frame rate: the dimmer the lighting the lower the frame
264 rate (because longer exposition time is needed). The 264 rate (because longer exposition time is needed). The
265 Model 2 cameras allow values more than 2 for this option, 265 Model 2 cameras allow values more than 2 for this option,
266 thus enabling extremely high sensitivity at cost of frame 266 thus enabling extremely high sensitivity at cost of frame
267 rate, color saturation and imaging sensor noise. 267 rate, color saturation and imaging sensor noise.
268 268
269sharpness This option controls smoothing (noise reduction) 269sharpness This option controls smoothing (noise reduction)
270 made by camera. Setting 0 is most smooth, setting 6 270 made by camera. Setting 0 is most smooth, setting 6
271 is most sharp. Be aware that CMOS sensor used in the 271 is most sharp. Be aware that CMOS sensor used in the
272 camera is pretty noisy, so if you choose 6 you will 272 camera is pretty noisy, so if you choose 6 you will
273 be greeted with "snowy" image. Default is 4. Model 2 273 be greeted with "snowy" image. Default is 4. Model 2
274 cameras do not support this feature. 274 cameras do not support this feature.
275 275
276size This setting chooses one of several image sizes that are 276size This setting chooses one of several image sizes that are
277 supported by this driver. Cameras may support more, but 277 supported by this driver. Cameras may support more, but
278 it's difficult to reverse-engineer all formats. 278 it's difficult to reverse-engineer all formats.
279 Following video sizes are supported: 279 Following video sizes are supported:
280 280
281 size=0 128x96 (Model 1 only) 281 size=0 128x96 (Model 1 only)
282 size=1 160x120 282 size=1 160x120
283 size=2 176x144 283 size=2 176x144
284 size=3 320x240 (Model 2 only) 284 size=3 320x240 (Model 2 only)
285 size=4 352x240 (Model 2 only) 285 size=4 352x240 (Model 2 only)
286 size=5 352x288 286 size=5 352x288
287 size=6 640x480 (Model 3 only) 287 size=6 640x480 (Model 3 only)
288 288
289 The 352x288 is the native size of the Model 1 sensor 289 The 352x288 is the native size of the Model 1 sensor
290 array, so it's the best resolution the camera can 290 array, so it's the best resolution the camera can
291 yield. The best resolution of Model 2 is 176x144, and 291 yield. The best resolution of Model 2 is 176x144, and
292 larger images are produced by stretching the bitmap. 292 larger images are produced by stretching the bitmap.
293 Model 3 has sensor with 640x480 grid, and it works too, 293 Model 3 has sensor with 640x480 grid, and it works too,
294 but the frame rate will be exceptionally low (1-2 FPS); 294 but the frame rate will be exceptionally low (1-2 FPS);
295 it may be still OK for some applications, like security. 295 it may be still OK for some applications, like security.
296 Choose the image size you need. The smaller image can 296 Choose the image size you need. The smaller image can
297 support faster frame rate. Default is 352x288. 297 support faster frame rate. Default is 352x288.
298 298
299For more information and the Troubleshooting FAQ visit this URL: 299For more information and the Troubleshooting FAQ visit this URL:
300 300
301 http://www.linux-usb.org/ibmcam/ 301 http://www.linux-usb.org/ibmcam/
302 302
303WHAT NEEDS TO BE DONE: 303WHAT NEEDS TO BE DONE:
304 304
diff --git a/Documentation/video4linux/ov511.txt b/Documentation/video4linux/ov511.txt
index 142741e3c578..79af610d4ba5 100644
--- a/Documentation/video4linux/ov511.txt
+++ b/Documentation/video4linux/ov511.txt
@@ -81,7 +81,7 @@ MODULE PARAMETERS:
81 TYPE: integer (Boolean) 81 TYPE: integer (Boolean)
82 DEFAULT: 1 82 DEFAULT: 1
83 DESC: Brightness is normally under automatic control and can't be set 83 DESC: Brightness is normally under automatic control and can't be set
84 manually by the video app. Set to 0 for manual control. 84 manually by the video app. Set to 0 for manual control.
85 85
86 NAME: autogain 86 NAME: autogain
87 TYPE: integer (Boolean) 87 TYPE: integer (Boolean)
@@ -97,13 +97,13 @@ MODULE PARAMETERS:
97 TYPE: integer (0-6) 97 TYPE: integer (0-6)
98 DEFAULT: 3 98 DEFAULT: 3
99 DESC: Sets the threshold for printing debug messages. The higher the value, 99 DESC: Sets the threshold for printing debug messages. The higher the value,
100 the more is printed. The levels are cumulative, and are as follows: 100 the more is printed. The levels are cumulative, and are as follows:
101 0=no debug messages 101 0=no debug messages
102 1=init/detection/unload and other significant messages 102 1=init/detection/unload and other significant messages
103 2=some warning messages 103 2=some warning messages
104 3=config/control function calls 104 3=config/control function calls
105 4=most function calls and data parsing messages 105 4=most function calls and data parsing messages
106 5=highly repetitive mesgs 106 5=highly repetitive mesgs
107 107
108 NAME: snapshot 108 NAME: snapshot
109 TYPE: integer (Boolean) 109 TYPE: integer (Boolean)
@@ -116,24 +116,24 @@ MODULE PARAMETERS:
116 TYPE: integer (1-4 for OV511, 1-31 for OV511+) 116 TYPE: integer (1-4 for OV511, 1-31 for OV511+)
117 DEFAULT: 1 117 DEFAULT: 1
118 DESC: Number of cameras allowed to stream simultaneously on a single bus. 118 DESC: Number of cameras allowed to stream simultaneously on a single bus.
119 Values higher than 1 reduce the data rate of each camera, allowing two 119 Values higher than 1 reduce the data rate of each camera, allowing two
120 or more to be used at once. If you have a complicated setup involving 120 or more to be used at once. If you have a complicated setup involving
121 both OV511 and OV511+ cameras, trial-and-error may be necessary for 121 both OV511 and OV511+ cameras, trial-and-error may be necessary for
122 finding the optimum setting. 122 finding the optimum setting.
123 123
124 NAME: compress 124 NAME: compress
125 TYPE: integer (Boolean) 125 TYPE: integer (Boolean)
126 DEFAULT: 0 126 DEFAULT: 0
127 DESC: Set this to 1 to turn on the camera's compression engine. This can 127 DESC: Set this to 1 to turn on the camera's compression engine. This can
128 potentially increase the frame rate at the expense of quality, if you 128 potentially increase the frame rate at the expense of quality, if you
129 have a fast CPU. You must load the proper compression module for your 129 have a fast CPU. You must load the proper compression module for your
130 camera before starting your application (ov511_decomp or ov518_decomp). 130 camera before starting your application (ov511_decomp or ov518_decomp).
131 131
132 NAME: testpat 132 NAME: testpat
133 TYPE: integer (Boolean) 133 TYPE: integer (Boolean)
134 DEFAULT: 0 134 DEFAULT: 0
135 DESC: This configures the camera's sensor to transmit a colored test-pattern 135 DESC: This configures the camera's sensor to transmit a colored test-pattern
136 instead of an image. This does not work correctly yet. 136 instead of an image. This does not work correctly yet.
137 137
138 NAME: dumppix 138 NAME: dumppix
139 TYPE: integer (0-2) 139 TYPE: integer (0-2)
diff --git a/Documentation/video4linux/sn9c102.txt b/Documentation/video4linux/sn9c102.txt
index 142920bc011f..1d20895b4354 100644
--- a/Documentation/video4linux/sn9c102.txt
+++ b/Documentation/video4linux/sn9c102.txt
@@ -1,9 +1,9 @@
1 1
2 SN9C10x PC Camera Controllers 2 SN9C10x PC Camera Controllers
3 Driver for Linux 3 Driver for Linux
4 ============================= 4 =============================
5 5
6 - Documentation - 6 - Documentation -
7 7
8 8
9Index 9Index
@@ -176,46 +176,46 @@ Name: video_nr
176Type: short array (min = 0, max = 64) 176Type: short array (min = 0, max = 64)
177Syntax: <-1|n[,...]> 177Syntax: <-1|n[,...]>
178Description: Specify V4L2 minor mode number: 178Description: Specify V4L2 minor mode number:
179 -1 = use next available 179 -1 = use next available
180 n = use minor number n 180 n = use minor number n
181 You can specify up to 64 cameras this way. 181 You can specify up to 64 cameras this way.
182 For example: 182 For example:
183 video_nr=-1,2,-1 would assign minor number 2 to the second 183 video_nr=-1,2,-1 would assign minor number 2 to the second
184 recognized camera and use auto for the first one and for every 184 recognized camera and use auto for the first one and for every
185 other camera. 185 other camera.
186Default: -1 186Default: -1
187------------------------------------------------------------------------------- 187-------------------------------------------------------------------------------
188Name: force_munmap 188Name: force_munmap
189Type: bool array (min = 0, max = 64) 189Type: bool array (min = 0, max = 64)
190Syntax: <0|1[,...]> 190Syntax: <0|1[,...]>
191Description: Force the application to unmap previously mapped buffer memory 191Description: Force the application to unmap previously mapped buffer memory
192 before calling any VIDIOC_S_CROP or VIDIOC_S_FMT ioctl's. Not 192 before calling any VIDIOC_S_CROP or VIDIOC_S_FMT ioctl's. Not
193 all the applications support this feature. This parameter is 193 all the applications support this feature. This parameter is
194 specific for each detected camera. 194 specific for each detected camera.
195 0 = do not force memory unmapping 195 0 = do not force memory unmapping
196 1 = force memory unmapping (save memory) 196 1 = force memory unmapping (save memory)
197Default: 0 197Default: 0
198------------------------------------------------------------------------------- 198-------------------------------------------------------------------------------
199Name: frame_timeout 199Name: frame_timeout
200Type: uint array (min = 0, max = 64) 200Type: uint array (min = 0, max = 64)
201Syntax: <n[,...]> 201Syntax: <n[,...]>
202Description: Timeout for a video frame in seconds. This parameter is 202Description: Timeout for a video frame in seconds. This parameter is
203 specific for each detected camera. This parameter can be 203 specific for each detected camera. This parameter can be
204 changed at runtime thanks to the /sys filesystem interface. 204 changed at runtime thanks to the /sys filesystem interface.
205Default: 2 205Default: 2
206------------------------------------------------------------------------------- 206-------------------------------------------------------------------------------
207Name: debug 207Name: debug
208Type: ushort 208Type: ushort
209Syntax: <n> 209Syntax: <n>
210Description: Debugging information level, from 0 to 3: 210Description: Debugging information level, from 0 to 3:
211 0 = none (use carefully) 211 0 = none (use carefully)
212 1 = critical errors 212 1 = critical errors
213 2 = significant informations 213 2 = significant informations
214 3 = more verbose messages 214 3 = more verbose messages
215 Level 3 is useful for testing only, when only one device 215 Level 3 is useful for testing only, when only one device
216 is used. It also shows some more informations about the 216 is used. It also shows some more informations about the
217 hardware being detected. This parameter can be changed at 217 hardware being detected. This parameter can be changed at
218 runtime thanks to the /sys filesystem interface. 218 runtime thanks to the /sys filesystem interface.
219Default: 2 219Default: 2
220------------------------------------------------------------------------------- 220-------------------------------------------------------------------------------
221 221
@@ -280,24 +280,24 @@ Byte # Value Description
2800x04 0xC4 Frame synchronisation pattern. 2800x04 0xC4 Frame synchronisation pattern.
2810x05 0x96 Frame synchronisation pattern. 2810x05 0x96 Frame synchronisation pattern.
2820x06 0xXX Unknown meaning. The exact value depends on the chip; 2820x06 0xXX Unknown meaning. The exact value depends on the chip;
283 possible values are 0x00, 0x01 and 0x20. 283 possible values are 0x00, 0x01 and 0x20.
2840x07 0xXX Variable value, whose bits are ff00uzzc, where ff is a 2840x07 0xXX Variable value, whose bits are ff00uzzc, where ff is a
285 frame counter, u is unknown, zz is a size indicator 285 frame counter, u is unknown, zz is a size indicator
286 (00 = VGA, 01 = SIF, 10 = QSIF) and c stands for 286 (00 = VGA, 01 = SIF, 10 = QSIF) and c stands for
287 "compression enabled" (1 = yes, 0 = no). 287 "compression enabled" (1 = yes, 0 = no).
2880x08 0xXX Brightness sum inside Auto-Exposure area (low-byte). 2880x08 0xXX Brightness sum inside Auto-Exposure area (low-byte).
2890x09 0xXX Brightness sum inside Auto-Exposure area (high-byte). 2890x09 0xXX Brightness sum inside Auto-Exposure area (high-byte).
290 For a pure white image, this number will be equal to 500 290 For a pure white image, this number will be equal to 500
291 times the area of the specified AE area. For images 291 times the area of the specified AE area. For images
292 that are not pure white, the value scales down according 292 that are not pure white, the value scales down according
293 to relative whiteness. 293 to relative whiteness.
2940x0A 0xXX Brightness sum outside Auto-Exposure area (low-byte). 2940x0A 0xXX Brightness sum outside Auto-Exposure area (low-byte).
2950x0B 0xXX Brightness sum outside Auto-Exposure area (high-byte). 2950x0B 0xXX Brightness sum outside Auto-Exposure area (high-byte).
296 For a pure white image, this number will be equal to 125 296 For a pure white image, this number will be equal to 125
297 times the area outside of the specified AE area. For 297 times the area outside of the specified AE area. For
298 images that are not pure white, the value scales down 298 images that are not pure white, the value scales down
299 according to relative whiteness. 299 according to relative whiteness.
300 according to relative whiteness. 300 according to relative whiteness.
301 301
302The following bytes are used by the SN9C103 bridge only: 302The following bytes are used by the SN9C103 bridge only:
303 303
diff --git a/Documentation/video4linux/v4lgrab.c b/Documentation/video4linux/v4lgrab.c
new file mode 100644
index 000000000000..079b628481cf
--- /dev/null
+++ b/Documentation/video4linux/v4lgrab.c
@@ -0,0 +1,192 @@
1/* Simple Video4Linux image grabber. */
2/*
3 * Video4Linux Driver Test/Example Framegrabbing Program
4 *
5 * Compile with:
6 * gcc -s -Wall -Wstrict-prototypes v4lgrab.c -o v4lgrab
7 * Use as:
8 * v4lgrab >image.ppm
9 *
10 * Copyright (C) 1998-05-03, Phil Blundell <philb@gnu.org>
11 * Copied from http://www.tazenda.demon.co.uk/phil/vgrabber.c
12 * with minor modifications (Dave Forrest, drf5n@virginia.edu).
13 *
14 */
15
16#include <unistd.h>
17#include <sys/types.h>
18#include <sys/stat.h>
19#include <fcntl.h>
20#include <stdio.h>
21#include <sys/ioctl.h>
22#include <stdlib.h>
23
24#include <linux/types.h>
25#include <linux/videodev.h>
26
27#define FILE "/dev/video0"
28
29/* Stole this from tvset.c */
30
31#define READ_VIDEO_PIXEL(buf, format, depth, r, g, b) \
32{ \
33 switch (format) \
34 { \
35 case VIDEO_PALETTE_GREY: \
36 switch (depth) \
37 { \
38 case 4: \
39 case 6: \
40 case 8: \
41 (r) = (g) = (b) = (*buf++ << 8);\
42 break; \
43 \
44 case 16: \
45 (r) = (g) = (b) = \
46 *((unsigned short *) buf); \
47 buf += 2; \
48 break; \
49 } \
50 break; \
51 \
52 \
53 case VIDEO_PALETTE_RGB565: \
54 { \
55 unsigned short tmp = *(unsigned short *)buf; \
56 (r) = tmp&0xF800; \
57 (g) = (tmp<<5)&0xFC00; \
58 (b) = (tmp<<11)&0xF800; \
59 buf += 2; \
60 } \
61 break; \
62 \
63 case VIDEO_PALETTE_RGB555: \
64 (r) = (buf[0]&0xF8)<<8; \
65 (g) = ((buf[0] << 5 | buf[1] >> 3)&0xF8)<<8; \
66 (b) = ((buf[1] << 2 ) & 0xF8)<<8; \
67 buf += 2; \
68 break; \
69 \
70 case VIDEO_PALETTE_RGB24: \
71 (r) = buf[0] << 8; (g) = buf[1] << 8; \
72 (b) = buf[2] << 8; \
73 buf += 3; \
74 break; \
75 \
76 default: \
77 fprintf(stderr, \
78 "Format %d not yet supported\n", \
79 format); \
80 } \
81}
82
83int get_brightness_adj(unsigned char *image, long size, int *brightness) {
84 long i, tot = 0;
85 for (i=0;i<size*3;i++)
86 tot += image[i];
87 *brightness = (128 - tot/(size*3))/3;
88 return !((tot/(size*3)) >= 126 && (tot/(size*3)) <= 130);
89}
90
91int main(int argc, char ** argv)
92{
93 int fd = open(FILE, O_RDONLY), f;
94 struct video_capability cap;
95 struct video_window win;
96 struct video_picture vpic;
97
98 unsigned char *buffer, *src;
99 int bpp = 24, r, g, b;
100 unsigned int i, src_depth;
101
102 if (fd < 0) {
103 perror(FILE);
104 exit(1);
105 }
106
107 if (ioctl(fd, VIDIOCGCAP, &cap) < 0) {
108 perror("VIDIOGCAP");
109 fprintf(stderr, "(" FILE " not a video4linux device?)\n");
110 close(fd);
111 exit(1);
112 }
113
114 if (ioctl(fd, VIDIOCGWIN, &win) < 0) {
115 perror("VIDIOCGWIN");
116 close(fd);
117 exit(1);
118 }
119
120 if (ioctl(fd, VIDIOCGPICT, &vpic) < 0) {
121 perror("VIDIOCGPICT");
122 close(fd);
123 exit(1);
124 }
125
126 if (cap.type & VID_TYPE_MONOCHROME) {
127 vpic.depth=8;
128 vpic.palette=VIDEO_PALETTE_GREY; /* 8bit grey */
129 if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) {
130 vpic.depth=6;
131 if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) {
132 vpic.depth=4;
133 if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) {
134 fprintf(stderr, "Unable to find a supported capture format.\n");
135 close(fd);
136 exit(1);
137 }
138 }
139 }
140 } else {
141 vpic.depth=24;
142 vpic.palette=VIDEO_PALETTE_RGB24;
143
144 if(ioctl(fd, VIDIOCSPICT, &vpic) < 0) {
145 vpic.palette=VIDEO_PALETTE_RGB565;
146 vpic.depth=16;
147
148 if(ioctl(fd, VIDIOCSPICT, &vpic)==-1) {
149 vpic.palette=VIDEO_PALETTE_RGB555;
150 vpic.depth=15;
151
152 if(ioctl(fd, VIDIOCSPICT, &vpic)==-1) {
153 fprintf(stderr, "Unable to find a supported capture format.\n");
154 return -1;
155 }
156 }
157 }
158 }
159
160 buffer = malloc(win.width * win.height * bpp);
161 if (!buffer) {
162 fprintf(stderr, "Out of memory.\n");
163 exit(1);
164 }
165
166 do {
167 int newbright;
168 read(fd, buffer, win.width * win.height * bpp);
169 f = get_brightness_adj(buffer, win.width * win.height, &newbright);
170 if (f) {
171 vpic.brightness += (newbright << 8);
172 if(ioctl(fd, VIDIOCSPICT, &vpic)==-1) {
173 perror("VIDIOSPICT");
174 break;
175 }
176 }
177 } while (f);
178
179 fprintf(stdout, "P6\n%d %d 255\n", win.width, win.height);
180
181 src = buffer;
182
183 for (i = 0; i < win.width * win.height; i++) {
184 READ_VIDEO_PIXEL(src, vpic.palette, src_depth, r, g, b);
185 fputc(r>>8, stdout);
186 fputc(g>>8, stdout);
187 fputc(b>>8, stdout);
188 }
189
190 close(fd);
191 return 0;
192}
diff --git a/Documentation/video4linux/w9968cf.txt b/Documentation/video4linux/w9968cf.txt
index 3b704f2aae6d..0d53ce774b01 100644
--- a/Documentation/video4linux/w9968cf.txt
+++ b/Documentation/video4linux/w9968cf.txt
@@ -1,9 +1,9 @@
1 1
2 W996[87]CF JPEG USB Dual Mode Camera Chip 2 W996[87]CF JPEG USB Dual Mode Camera Chip
3 Driver for Linux 2.6 (basic version) 3 Driver for Linux 2.6 (basic version)
4 ========================================= 4 =========================================
5 5
6 - Documentation - 6 - Documentation -
7 7
8 8
9Index 9Index
@@ -188,57 +188,57 @@ Name: ovmod_load
188Type: bool 188Type: bool
189Syntax: <0|1> 189Syntax: <0|1>
190Description: Automatic 'ovcamchip' module loading: 0 disabled, 1 enabled. 190Description: Automatic 'ovcamchip' module loading: 0 disabled, 1 enabled.
191 If enabled, 'insmod' searches for the required 'ovcamchip' 191 If enabled, 'insmod' searches for the required 'ovcamchip'
192 module in the system, according to its configuration, and 192 module in the system, according to its configuration, and
193 loads that module automatically. This action is performed as 193 loads that module automatically. This action is performed as
194 once soon as the 'w9968cf' module is loaded into memory. 194 once soon as the 'w9968cf' module is loaded into memory.
195Default: 1 195Default: 1
196Note: The kernel must be compiled with the CONFIG_KMOD option 196Note: The kernel must be compiled with the CONFIG_KMOD option
197 enabled for the 'ovcamchip' module to be loaded and for 197 enabled for the 'ovcamchip' module to be loaded and for
198 this parameter to be present. 198 this parameter to be present.
199------------------------------------------------------------------------------- 199-------------------------------------------------------------------------------
200Name: simcams 200Name: simcams
201Type: int 201Type: int
202Syntax: <n> 202Syntax: <n>
203Description: Number of cameras allowed to stream simultaneously. 203Description: Number of cameras allowed to stream simultaneously.
204 n may vary from 0 to 32. 204 n may vary from 0 to 32.
205Default: 32 205Default: 32
206------------------------------------------------------------------------------- 206-------------------------------------------------------------------------------
207Name: video_nr 207Name: video_nr
208Type: int array (min = 0, max = 32) 208Type: int array (min = 0, max = 32)
209Syntax: <-1|n[,...]> 209Syntax: <-1|n[,...]>
210Description: Specify V4L minor mode number. 210Description: Specify V4L minor mode number.
211 -1 = use next available 211 -1 = use next available
212 n = use minor number n 212 n = use minor number n
213 You can specify up to 32 cameras this way. 213 You can specify up to 32 cameras this way.
214 For example: 214 For example:
215 video_nr=-1,2,-1 would assign minor number 2 to the second 215 video_nr=-1,2,-1 would assign minor number 2 to the second
216 recognized camera and use auto for the first one and for every 216 recognized camera and use auto for the first one and for every
217 other camera. 217 other camera.
218Default: -1 218Default: -1
219------------------------------------------------------------------------------- 219-------------------------------------------------------------------------------
220Name: packet_size 220Name: packet_size
221Type: int array (min = 0, max = 32) 221Type: int array (min = 0, max = 32)
222Syntax: <n[,...]> 222Syntax: <n[,...]>
223Description: Specify the maximum data payload size in bytes for alternate 223Description: Specify the maximum data payload size in bytes for alternate
224 settings, for each device. n is scaled between 63 and 1023. 224 settings, for each device. n is scaled between 63 and 1023.
225Default: 1023 225Default: 1023
226------------------------------------------------------------------------------- 226-------------------------------------------------------------------------------
227Name: max_buffers 227Name: max_buffers
228Type: int array (min = 0, max = 32) 228Type: int array (min = 0, max = 32)
229Syntax: <n[,...]> 229Syntax: <n[,...]>
230Description: For advanced users. 230Description: For advanced users.
231 Specify the maximum number of video frame buffers to allocate 231 Specify the maximum number of video frame buffers to allocate
232 for each device, from 2 to 32. 232 for each device, from 2 to 32.
233Default: 2 233Default: 2
234------------------------------------------------------------------------------- 234-------------------------------------------------------------------------------
235Name: double_buffer 235Name: double_buffer
236Type: bool array (min = 0, max = 32) 236Type: bool array (min = 0, max = 32)
237Syntax: <0|1[,...]> 237Syntax: <0|1[,...]>
238Description: Hardware double buffering: 0 disabled, 1 enabled. 238Description: Hardware double buffering: 0 disabled, 1 enabled.
239 It should be enabled if you want smooth video output: if you 239 It should be enabled if you want smooth video output: if you
240 obtain out of sync. video, disable it, or try to 240 obtain out of sync. video, disable it, or try to
241 decrease the 'clockdiv' module parameter value. 241 decrease the 'clockdiv' module parameter value.
242Default: 1 for every device. 242Default: 1 for every device.
243------------------------------------------------------------------------------- 243-------------------------------------------------------------------------------
244Name: clamping 244Name: clamping
@@ -251,9 +251,9 @@ Name: filter_type
251Type: int array (min = 0, max = 32) 251Type: int array (min = 0, max = 32)
252Syntax: <0|1|2[,...]> 252Syntax: <0|1|2[,...]>
253Description: Video filter type. 253Description: Video filter type.
254 0 none, 1 (1-2-1) 3-tap filter, 2 (2-3-6-3-2) 5-tap filter. 254 0 none, 1 (1-2-1) 3-tap filter, 2 (2-3-6-3-2) 5-tap filter.
255 The filter is used to reduce noise and aliasing artifacts 255 The filter is used to reduce noise and aliasing artifacts
256 produced by the CCD or CMOS image sensor. 256 produced by the CCD or CMOS image sensor.
257Default: 0 for every device. 257Default: 0 for every device.
258------------------------------------------------------------------------------- 258-------------------------------------------------------------------------------
259Name: largeview 259Name: largeview
@@ -266,9 +266,9 @@ Name: upscaling
266Type: bool array (min = 0, max = 32) 266Type: bool array (min = 0, max = 32)
267Syntax: <0|1[,...]> 267Syntax: <0|1[,...]>
268Description: Software scaling (for non-compressed video only): 268Description: Software scaling (for non-compressed video only):
269 0 disabled, 1 enabled. 269 0 disabled, 1 enabled.
270 Disable it if you have a slow CPU or you don't have enough 270 Disable it if you have a slow CPU or you don't have enough
271 memory. 271 memory.
272Default: 0 for every device. 272Default: 0 for every device.
273Note: If 'w9968cf-vpp' is not present, this parameter is set to 0. 273Note: If 'w9968cf-vpp' is not present, this parameter is set to 0.
274------------------------------------------------------------------------------- 274-------------------------------------------------------------------------------
@@ -276,36 +276,36 @@ Name: decompression
276Type: int array (min = 0, max = 32) 276Type: int array (min = 0, max = 32)
277Syntax: <0|1|2[,...]> 277Syntax: <0|1|2[,...]>
278Description: Software video decompression: 278Description: Software video decompression:
279 0 = disables decompression 279 0 = disables decompression
280 (doesn't allow formats needing decompression). 280 (doesn't allow formats needing decompression).
281 1 = forces decompression 281 1 = forces decompression
282 (allows formats needing decompression only). 282 (allows formats needing decompression only).
283 2 = allows any permitted formats. 283 2 = allows any permitted formats.
284 Formats supporting (de)compressed video are YUV422P and 284 Formats supporting (de)compressed video are YUV422P and
285 YUV420P/YUV420 in any resolutions where width and height are 285 YUV420P/YUV420 in any resolutions where width and height are
286 multiples of 16. 286 multiples of 16.
287Default: 2 for every device. 287Default: 2 for every device.
288Note: If 'w9968cf-vpp' is not present, forcing decompression is not 288Note: If 'w9968cf-vpp' is not present, forcing decompression is not
289 allowed; in this case this parameter is set to 2. 289 allowed; in this case this parameter is set to 2.
290------------------------------------------------------------------------------- 290-------------------------------------------------------------------------------
291Name: force_palette 291Name: force_palette
292Type: int array (min = 0, max = 32) 292Type: int array (min = 0, max = 32)
293Syntax: <0|9|10|13|15|8|7|1|6|3|4|5[,...]> 293Syntax: <0|9|10|13|15|8|7|1|6|3|4|5[,...]>
294Description: Force picture palette. 294Description: Force picture palette.
295 In order: 295 In order:
296 0 = Off - allows any of the following formats: 296 0 = Off - allows any of the following formats:
297 9 = UYVY 16 bpp - Original video, compression disabled 297 9 = UYVY 16 bpp - Original video, compression disabled
298 10 = YUV420 12 bpp - Original video, compression enabled 298 10 = YUV420 12 bpp - Original video, compression enabled
299 13 = YUV422P 16 bpp - Original video, compression enabled 299 13 = YUV422P 16 bpp - Original video, compression enabled
300 15 = YUV420P 12 bpp - Original video, compression enabled 300 15 = YUV420P 12 bpp - Original video, compression enabled
301 8 = YUVY 16 bpp - Software conversion from UYVY 301 8 = YUVY 16 bpp - Software conversion from UYVY
302 7 = YUV422 16 bpp - Software conversion from UYVY 302 7 = YUV422 16 bpp - Software conversion from UYVY
303 1 = GREY 8 bpp - Software conversion from UYVY 303 1 = GREY 8 bpp - Software conversion from UYVY
304 6 = RGB555 16 bpp - Software conversion from UYVY 304 6 = RGB555 16 bpp - Software conversion from UYVY
305 3 = RGB565 16 bpp - Software conversion from UYVY 305 3 = RGB565 16 bpp - Software conversion from UYVY
306 4 = RGB24 24 bpp - Software conversion from UYVY 306 4 = RGB24 24 bpp - Software conversion from UYVY
307 5 = RGB32 32 bpp - Software conversion from UYVY 307 5 = RGB32 32 bpp - Software conversion from UYVY
308 When not 0, this parameter will override 'decompression'. 308 When not 0, this parameter will override 'decompression'.
309Default: 0 for every device. Initial palette is 9 (UYVY). 309Default: 0 for every device. Initial palette is 9 (UYVY).
310Note: If 'w9968cf-vpp' is not present, this parameter is set to 9. 310Note: If 'w9968cf-vpp' is not present, this parameter is set to 9.
311------------------------------------------------------------------------------- 311-------------------------------------------------------------------------------
@@ -313,77 +313,77 @@ Name: force_rgb
313Type: bool array (min = 0, max = 32) 313Type: bool array (min = 0, max = 32)
314Syntax: <0|1[,...]> 314Syntax: <0|1[,...]>
315Description: Read RGB video data instead of BGR: 315Description: Read RGB video data instead of BGR:
316 1 = use RGB component ordering. 316 1 = use RGB component ordering.
317 0 = use BGR component ordering. 317 0 = use BGR component ordering.
318 This parameter has effect when using RGBX palettes only. 318 This parameter has effect when using RGBX palettes only.
319Default: 0 for every device. 319Default: 0 for every device.
320------------------------------------------------------------------------------- 320-------------------------------------------------------------------------------
321Name: autobright 321Name: autobright
322Type: bool array (min = 0, max = 32) 322Type: bool array (min = 0, max = 32)
323Syntax: <0|1[,...]> 323Syntax: <0|1[,...]>
324Description: Image sensor automatically changes brightness: 324Description: Image sensor automatically changes brightness:
325 0 = no, 1 = yes 325 0 = no, 1 = yes
326Default: 0 for every device. 326Default: 0 for every device.
327------------------------------------------------------------------------------- 327-------------------------------------------------------------------------------
328Name: autoexp 328Name: autoexp
329Type: bool array (min = 0, max = 32) 329Type: bool array (min = 0, max = 32)
330Syntax: <0|1[,...]> 330Syntax: <0|1[,...]>
331Description: Image sensor automatically changes exposure: 331Description: Image sensor automatically changes exposure:
332 0 = no, 1 = yes 332 0 = no, 1 = yes
333Default: 1 for every device. 333Default: 1 for every device.
334------------------------------------------------------------------------------- 334-------------------------------------------------------------------------------
335Name: lightfreq 335Name: lightfreq
336Type: int array (min = 0, max = 32) 336Type: int array (min = 0, max = 32)
337Syntax: <50|60[,...]> 337Syntax: <50|60[,...]>
338Description: Light frequency in Hz: 338Description: Light frequency in Hz:
339 50 for European and Asian lighting, 60 for American lighting. 339 50 for European and Asian lighting, 60 for American lighting.
340Default: 50 for every device. 340Default: 50 for every device.
341------------------------------------------------------------------------------- 341-------------------------------------------------------------------------------
342Name: bandingfilter 342Name: bandingfilter
343Type: bool array (min = 0, max = 32) 343Type: bool array (min = 0, max = 32)
344Syntax: <0|1[,...]> 344Syntax: <0|1[,...]>
345Description: Banding filter to reduce effects of fluorescent 345Description: Banding filter to reduce effects of fluorescent
346 lighting: 346 lighting:
347 0 disabled, 1 enabled. 347 0 disabled, 1 enabled.
348 This filter tries to reduce the pattern of horizontal 348 This filter tries to reduce the pattern of horizontal
349 light/dark bands caused by some (usually fluorescent) lighting. 349 light/dark bands caused by some (usually fluorescent) lighting.
350Default: 0 for every device. 350Default: 0 for every device.
351------------------------------------------------------------------------------- 351-------------------------------------------------------------------------------
352Name: clockdiv 352Name: clockdiv
353Type: int array (min = 0, max = 32) 353Type: int array (min = 0, max = 32)
354Syntax: <-1|n[,...]> 354Syntax: <-1|n[,...]>
355Description: Force pixel clock divisor to a specific value (for experts): 355Description: Force pixel clock divisor to a specific value (for experts):
356 n may vary from 0 to 127. 356 n may vary from 0 to 127.
357 -1 for automatic value. 357 -1 for automatic value.
358 See also the 'double_buffer' module parameter. 358 See also the 'double_buffer' module parameter.
359Default: -1 for every device. 359Default: -1 for every device.
360------------------------------------------------------------------------------- 360-------------------------------------------------------------------------------
361Name: backlight 361Name: backlight
362Type: bool array (min = 0, max = 32) 362Type: bool array (min = 0, max = 32)
363Syntax: <0|1[,...]> 363Syntax: <0|1[,...]>
364Description: Objects are lit from behind: 364Description: Objects are lit from behind:
365 0 = no, 1 = yes 365 0 = no, 1 = yes
366Default: 0 for every device. 366Default: 0 for every device.
367------------------------------------------------------------------------------- 367-------------------------------------------------------------------------------
368Name: mirror 368Name: mirror
369Type: bool array (min = 0, max = 32) 369Type: bool array (min = 0, max = 32)
370Syntax: <0|1[,...]> 370Syntax: <0|1[,...]>
371Description: Reverse image horizontally: 371Description: Reverse image horizontally:
372 0 = no, 1 = yes 372 0 = no, 1 = yes
373Default: 0 for every device. 373Default: 0 for every device.
374------------------------------------------------------------------------------- 374-------------------------------------------------------------------------------
375Name: monochrome 375Name: monochrome
376Type: bool array (min = 0, max = 32) 376Type: bool array (min = 0, max = 32)
377Syntax: <0|1[,...]> 377Syntax: <0|1[,...]>
378Description: The image sensor is monochrome: 378Description: The image sensor is monochrome:
379 0 = no, 1 = yes 379 0 = no, 1 = yes
380Default: 0 for every device. 380Default: 0 for every device.
381------------------------------------------------------------------------------- 381-------------------------------------------------------------------------------
382Name: brightness 382Name: brightness
383Type: long array (min = 0, max = 32) 383Type: long array (min = 0, max = 32)
384Syntax: <n[,...]> 384Syntax: <n[,...]>
385Description: Set picture brightness (0-65535). 385Description: Set picture brightness (0-65535).
386 This parameter has no effect if 'autobright' is enabled. 386 This parameter has no effect if 'autobright' is enabled.
387Default: 31000 for every device. 387Default: 31000 for every device.
388------------------------------------------------------------------------------- 388-------------------------------------------------------------------------------
389Name: hue 389Name: hue
@@ -414,23 +414,23 @@ Name: debug
414Type: int 414Type: int
415Syntax: <n> 415Syntax: <n>
416Description: Debugging information level, from 0 to 6: 416Description: Debugging information level, from 0 to 6:
417 0 = none (use carefully) 417 0 = none (use carefully)
418 1 = critical errors 418 1 = critical errors
419 2 = significant informations 419 2 = significant informations
420 3 = configuration or general messages 420 3 = configuration or general messages
421 4 = warnings 421 4 = warnings
422 5 = called functions 422 5 = called functions
423 6 = function internals 423 6 = function internals
424 Level 5 and 6 are useful for testing only, when only one 424 Level 5 and 6 are useful for testing only, when only one
425 device is used. 425 device is used.
426Default: 2 426Default: 2
427------------------------------------------------------------------------------- 427-------------------------------------------------------------------------------
428Name: specific_debug 428Name: specific_debug
429Type: bool 429Type: bool
430Syntax: <0|1> 430Syntax: <0|1>
431Description: Enable or disable specific debugging messages: 431Description: Enable or disable specific debugging messages:
432 0 = print messages concerning every level <= 'debug' level. 432 0 = print messages concerning every level <= 'debug' level.
433 1 = print messages concerning the level indicated by 'debug'. 433 1 = print messages concerning the level indicated by 'debug'.
434Default: 0 434Default: 0
435------------------------------------------------------------------------------- 435-------------------------------------------------------------------------------
436 436
diff --git a/Documentation/video4linux/zc0301.txt b/Documentation/video4linux/zc0301.txt
index f55262c6733b..f406f5e80046 100644
--- a/Documentation/video4linux/zc0301.txt
+++ b/Documentation/video4linux/zc0301.txt
@@ -1,9 +1,9 @@
1 1
2 ZC0301 Image Processor and Control Chip 2 ZC0301 and ZC0301P Image Processor and Control Chip
3 Driver for Linux 3 Driver for Linux
4 ======================================= 4 ===================================================
5 5
6 - Documentation - 6 - Documentation -
7 7
8 8
9Index 9Index
@@ -51,13 +51,13 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
51 51
524. Overview and features 524. Overview and features
53======================== 53========================
54This driver supports the video interface of the devices mounting the ZC0301 54This driver supports the video interface of the devices mounting the ZC0301 or
55Image Processor and Control Chip. 55ZC0301P Image Processors and Control Chips.
56 56
57The driver relies on the Video4Linux2 and USB core modules. It has been 57The driver relies on the Video4Linux2 and USB core modules. It has been
58designed to run properly on SMP systems as well. 58designed to run properly on SMP systems as well.
59 59
60The latest version of the ZC0301 driver can be found at the following URL: 60The latest version of the ZC0301[P] driver can be found at the following URL:
61http://www.linux-projects.org/ 61http://www.linux-projects.org/
62 62
63Some of the features of the driver are: 63Some of the features of the driver are:
@@ -117,7 +117,7 @@ supported by the USB Audio driver thanks to the ALSA API:
117 117
118And finally: 118And finally:
119 119
120 # USB Multimedia devices 120 # V4L USB devices
121 # 121 #
122 CONFIG_USB_ZC0301=m 122 CONFIG_USB_ZC0301=m
123 123
@@ -146,46 +146,46 @@ Name: video_nr
146Type: short array (min = 0, max = 64) 146Type: short array (min = 0, max = 64)
147Syntax: <-1|n[,...]> 147Syntax: <-1|n[,...]>
148Description: Specify V4L2 minor mode number: 148Description: Specify V4L2 minor mode number:
149 -1 = use next available 149 -1 = use next available
150 n = use minor number n 150 n = use minor number n
151 You can specify up to 64 cameras this way. 151 You can specify up to 64 cameras this way.
152 For example: 152 For example:
153 video_nr=-1,2,-1 would assign minor number 2 to the second 153 video_nr=-1,2,-1 would assign minor number 2 to the second
154 registered camera and use auto for the first one and for every 154 registered camera and use auto for the first one and for every
155 other camera. 155 other camera.
156Default: -1 156Default: -1
157------------------------------------------------------------------------------- 157-------------------------------------------------------------------------------
158Name: force_munmap 158Name: force_munmap
159Type: bool array (min = 0, max = 64) 159Type: bool array (min = 0, max = 64)
160Syntax: <0|1[,...]> 160Syntax: <0|1[,...]>
161Description: Force the application to unmap previously mapped buffer memory 161Description: Force the application to unmap previously mapped buffer memory
162 before calling any VIDIOC_S_CROP or VIDIOC_S_FMT ioctl's. Not 162 before calling any VIDIOC_S_CROP or VIDIOC_S_FMT ioctl's. Not
163 all the applications support this feature. This parameter is 163 all the applications support this feature. This parameter is
164 specific for each detected camera. 164 specific for each detected camera.
165 0 = do not force memory unmapping 165 0 = do not force memory unmapping
166 1 = force memory unmapping (save memory) 166 1 = force memory unmapping (save memory)
167Default: 0 167Default: 0
168------------------------------------------------------------------------------- 168-------------------------------------------------------------------------------
169Name: frame_timeout 169Name: frame_timeout
170Type: uint array (min = 0, max = 64) 170Type: uint array (min = 0, max = 64)
171Syntax: <n[,...]> 171Syntax: <n[,...]>
172Description: Timeout for a video frame in seconds. This parameter is 172Description: Timeout for a video frame in seconds. This parameter is
173 specific for each detected camera. This parameter can be 173 specific for each detected camera. This parameter can be
174 changed at runtime thanks to the /sys filesystem interface. 174 changed at runtime thanks to the /sys filesystem interface.
175Default: 2 175Default: 2
176------------------------------------------------------------------------------- 176-------------------------------------------------------------------------------
177Name: debug 177Name: debug
178Type: ushort 178Type: ushort
179Syntax: <n> 179Syntax: <n>
180Description: Debugging information level, from 0 to 3: 180Description: Debugging information level, from 0 to 3:
181 0 = none (use carefully) 181 0 = none (use carefully)
182 1 = critical errors 182 1 = critical errors
183 2 = significant informations 183 2 = significant informations
184 3 = more verbose messages 184 3 = more verbose messages
185 Level 3 is useful for testing only, when only one device 185 Level 3 is useful for testing only, when only one device
186 is used at the same time. It also shows some more informations 186 is used at the same time. It also shows some more informations
187 about the hardware being detected. This module parameter can be 187 about the hardware being detected. This module parameter can be
188 changed at runtime thanks to the /sys filesystem interface. 188 changed at runtime thanks to the /sys filesystem interface.
189Default: 2 189Default: 2
190------------------------------------------------------------------------------- 190-------------------------------------------------------------------------------
191 191
@@ -204,11 +204,25 @@ Vendor ID Product ID
2040x041e 0x4017 2040x041e 0x4017
2050x041e 0x401c 2050x041e 0x401c
2060x041e 0x401e 2060x041e 0x401e
2070x041e 0x401f
2080x041e 0x4022
2070x041e 0x4034 2090x041e 0x4034
2080x041e 0x4035 2100x041e 0x4035
2110x041e 0x4036
2120x041e 0x403a
2130x0458 0x7007
2140x0458 0x700C
2150x0458 0x700f
2160x046d 0x08ae
2170x055f 0xd003
2180x055f 0xd004
2090x046d 0x08ae 2190x046d 0x08ae
2100x0ac8 0x0301 2200x0ac8 0x0301
2210x0ac8 0x301b
2220x0ac8 0x303b
2230x10fd 0x0128
2110x10fd 0x8050 2240x10fd 0x8050
2250x10fd 0x804e
212 226
213The list above does not imply that all those devices work with this driver: up 227The list above does not imply that all those devices work with this driver: up
214until now only the ones that mount the following image sensors are supported; 228until now only the ones that mount the following image sensors are supported;
@@ -217,6 +231,7 @@ kernel messages will always tell you whether this is the case:
217Model Manufacturer 231Model Manufacturer
218----- ------------ 232----- ------------
219PAS202BCB PixArt Imaging, Inc. 233PAS202BCB PixArt Imaging, Inc.
234PB-0330 Photobit Corporation
220 235
221 236
2229. Notes for V4L2 application developers 2379. Notes for V4L2 application developers
@@ -250,5 +265,6 @@ the fingerprint is: '88E8 F32F 7244 68BA 3958 5D40 99DA 5D2A FCE6 35A4'.
250 been taken from the documentation of the ZC030x Video4Linux1 driver written 265 been taken from the documentation of the ZC030x Video4Linux1 driver written
251 by Andrew Birkett <andy@nobugs.org>; 266 by Andrew Birkett <andy@nobugs.org>;
252- The initialization values of the ZC0301 controller connected to the PAS202BCB 267- The initialization values of the ZC0301 controller connected to the PAS202BCB
253 image sensor have been taken from the SPCA5XX driver maintained by 268 and PB-0330 image sensors have been taken from the SPCA5XX driver maintained
254 Michel Xhaard <mxhaard@magic.fr>. 269 by Michel Xhaard <mxhaard@magic.fr>;
270- Stanislav Lechev donated one camera.
diff --git a/Documentation/vm/page_migration b/Documentation/vm/page_migration
index 0dd4ef30c361..99f89aa10169 100644
--- a/Documentation/vm/page_migration
+++ b/Documentation/vm/page_migration
@@ -26,8 +26,13 @@ a process are located. See also the numa_maps manpage in the numactl package.
26Manual migration is useful if for example the scheduler has relocated 26Manual migration is useful if for example the scheduler has relocated
27a process to a processor on a distant node. A batch scheduler or an 27a process to a processor on a distant node. A batch scheduler or an
28administrator may detect the situation and move the pages of the process 28administrator may detect the situation and move the pages of the process
29nearer to the new processor. At some point in the future we may have 29nearer to the new processor. The kernel itself does only provide
30some mechanism in the scheduler that will automatically move the pages. 30manual page migration support. Automatic page migration may be implemented
31through user space processes that move pages. A special function call
32"move_pages" allows the moving of individual pages within a process.
33A NUMA profiler may f.e. obtain a log showing frequent off node
34accesses and may use the result to move pages to more advantageous
35locations.
31 36
32Larger installations usually partition the system using cpusets into 37Larger installations usually partition the system using cpusets into
33sections of nodes. Paul Jackson has equipped cpusets with the ability to 38sections of nodes. Paul Jackson has equipped cpusets with the ability to
@@ -62,22 +67,14 @@ A. In kernel use of migrate_pages()
62 It also prevents the swapper or other scans to encounter 67 It also prevents the swapper or other scans to encounter
63 the page. 68 the page.
64 69
652. Generate a list of newly allocates page. These pages will contain the 702. We need to have a function of type new_page_t that can be
66 contents of the pages from the first list after page migration is 71 passed to migrate_pages(). This function should figure out
67 complete. 72 how to allocate the correct new page given the old page.
68 73
693. The migrate_pages() function is called which attempts 743. The migrate_pages() function is called which attempts
70 to do the migration. It returns the moved pages in the 75 to do the migration. It will call the function to allocate
71 list specified as the third parameter and the failed 76 the new page for each page that is considered for
72 migrations in the fourth parameter. The first parameter 77 moving.
73 will contain the pages that could still be retried.
74
754. The leftover pages of various types are returned
76 to the LRU using putback_to_lru_pages() or otherwise
77 disposed of. The pages will still have the refcount as
78 increased by isolate_lru_pages() if putback_to_lru_pages() is not
79 used! The kernel may want to handle the various cases of failures in
80 different ways.
81 78
82B. How migrate_pages() works 79B. How migrate_pages() works
83---------------------------- 80----------------------------
@@ -93,83 +90,58 @@ Steps:
93 90
942. Insure that writeback is complete. 912. Insure that writeback is complete.
95 92
963. Make sure that the page has assigned swap cache entry if 933. Prep the new page that we want to move to. It is locked
97 it is an anonyous page. The swap cache reference is necessary
98 to preserve the information contain in the page table maps while
99 page migration occurs.
100
1014. Prep the new page that we want to move to. It is locked
102 and set to not being uptodate so that all accesses to the new 94 and set to not being uptodate so that all accesses to the new
103 page immediately lock while the move is in progress. 95 page immediately lock while the move is in progress.
104 96
1055. All the page table references to the page are either dropped (file 974. The new page is prepped with some settings from the old page so that
106 backed pages) or converted to swap references (anonymous pages). 98 accesses to the new page will discover a page with the correct settings.
107 This should decrease the reference count. 99
1005. All the page table references to the page are converted
101 to migration entries or dropped (nonlinear vmas).
102 This decrease the mapcount of a page. If the resulting
103 mapcount is not zero then we do not migrate the page.
104 All user space processes that attempt to access the page
105 will now wait on the page lock.
108 106
1096. The radix tree lock is taken. This will cause all processes trying 1076. The radix tree lock is taken. This will cause all processes trying
110 to reestablish a pte to block on the radix tree spinlock. 108 to access the page via the mapping to block on the radix tree spinlock.
111 109
1127. The refcount of the page is examined and we back out if references remain 1107. The refcount of the page is examined and we back out if references remain
113 otherwise we know that we are the only one referencing this page. 111 otherwise we know that we are the only one referencing this page.
114 112
1158. The radix tree is checked and if it does not contain the pointer to this 1138. The radix tree is checked and if it does not contain the pointer to this
116 page then we back out because someone else modified the mapping first. 114 page then we back out because someone else modified the radix tree.
117
1189. The mapping is checked. If the mapping is gone then a truncate action may
119 be in progress and we back out.
120
12110. The new page is prepped with some settings from the old page so that
122 accesses to the new page will be discovered to have the correct settings.
123 115
12411. The radix tree is changed to point to the new page. 1169. The radix tree is changed to point to the new page.
125 117
12612. The reference count of the old page is dropped because the radix tree 11810. The reference count of the old page is dropped because the radix tree
127 reference is gone. 119 reference is gone. A reference to the new page is established because
120 the new page is referenced to by the radix tree.
128 121
12913. The radix tree lock is dropped. With that lookups become possible again 12211. The radix tree lock is dropped. With that lookups in the mapping
130 and other processes will move from spinning on the tree lock to sleeping on 123 become possible again. Processes will move from spinning on the tree_lock
131 the locked new page. 124 to sleeping on the locked new page.
132 125
13314. The page contents are copied to the new page. 12612. The page contents are copied to the new page.
134 127
13515. The remaining page flags are copied to the new page. 12813. The remaining page flags are copied to the new page.
136 129
13716. The old page flags are cleared to indicate that the page does 13014. The old page flags are cleared to indicate that the page does
138 not use any information anymore. 131 not provide any information anymore.
139 132
14017. Queued up writeback on the new page is triggered. 13315. Queued up writeback on the new page is triggered.
141 134
14218. If swap pte's were generated for the page then replace them with real 13516. If migration entries were page then replace them with real ptes. Doing
143 ptes. This will reenable access for processes not blocked by the page lock. 136 so will enable access for user space processes not already waiting for
137 the page lock.
144 138
14519. The page locks are dropped from the old and new page. 13919. The page locks are dropped from the old and new page.
146 Processes waiting on the page lock can continue. 140 Processes waiting on the page lock will redo their page faults
141 and will reach the new page.
147 142
14820. The new page is moved to the LRU and can be scanned by the swapper 14320. The new page is moved to the LRU and can be scanned by the swapper
149 etc again. 144 etc again.
150 145
151TODO list 146Christoph Lameter, May 8, 2006.
152---------
153
154- Page migration requires the use of swap handles to preserve the
155 information of the anonymous page table entries. This means that swap
156 space is reserved but never used. The maximum number of swap handles used
157 is determined by CHUNK_SIZE (see mm/mempolicy.c) per ongoing migration.
158 Reservation of pages could be avoided by having a special type of swap
159 handle that does not require swap space and that would only track the page
160 references. Something like that was proposed by Marcelo Tosatti in the
161 past (search for migration cache on lkml or linux-mm@kvack.org).
162
163- Page migration unmaps ptes for file backed pages and requires page
164 faults to reestablish these ptes. This could be optimized by somehow
165 recording the references before migration and then reestablish them later.
166 However, there are several locking challenges that have to be overcome
167 before this is possible.
168
169- Page migration generates read ptes for anonymous pages. Dirty page
170 faults are required to make the pages writable again. It may be possible
171 to generate a pte marked dirty if it is known that the page is dirty and
172 that this process has the only reference to that page.
173
174Christoph Lameter, March 8, 2006.
175 147
diff --git a/Documentation/w1/masters/ds2490 b/Documentation/w1/masters/ds2490
new file mode 100644
index 000000000000..44a4918bd7f2
--- /dev/null
+++ b/Documentation/w1/masters/ds2490
@@ -0,0 +1,18 @@
1Kernel driver ds2490
2====================
3
4Supported chips:
5 * Maxim DS2490 based
6
7Author: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
8
9
10Description
11-----------
12
13The Maixm/Dallas Semiconductor DS2490 is a chip
14which allows to build USB <-> W1 bridges.
15
16DS9490(R) is a USB <-> W1 bus master device
17which has 0x81 family ID integrated chip and DS2490
18low-level operational chip.
diff --git a/Documentation/w1/w1.generic b/Documentation/w1/w1.generic
index f937fbe1cacb..4c6509dd4789 100644
--- a/Documentation/w1/w1.generic
+++ b/Documentation/w1/w1.generic
@@ -27,8 +27,19 @@ When a w1 master driver registers with the w1 subsystem, the following occurs:
27 27
28When a device is found on the bus, w1 core checks if driver for it's family is 28When a device is found on the bus, w1 core checks if driver for it's family is
29loaded. If so, the family driver is attached to the slave. 29loaded. If so, the family driver is attached to the slave.
30If there is no driver for the family, a simple sysfs entry is created 30If there is no driver for the family, default one is assigned, which allows to perform
31for the slave device. 31almost any kind of operations. Each logical operation is a transaction
32in nature, which can contain several (two or one) low-level operations.
33Let's see how one can read EEPROM context:
341. one must write control buffer, i.e. buffer containing command byte
35and two byte address. At this step bus is reset and appropriate device
36is selected using either W1_SKIP_ROM or W1_MATCH_ROM command.
37Then provided control buffer is being written to the wire.
382. reading. This will issue reading eeprom response.
39
40It is possible that between 1. and 2. w1 master thread will reset bus for searching
41and slave device will be even removed, but in this case 0xff will
42be read, since no device was selected.
32 43
33 44
34W1 device families 45W1 device families
@@ -89,4 +100,5 @@ driver - (standard) symlink to the w1 driver
89name - the device name, usually the same as the directory name 100name - the device name, usually the same as the directory name
90w1_slave - (optional) a binary file whose meaning depends on the 101w1_slave - (optional) a binary file whose meaning depends on the
91 family driver 102 family driver
92 103rw - (optional) created for slave devices which do not have
104 appropriate family driver. Allows to read/write binary data.
diff --git a/Documentation/w1/w1.netlink b/Documentation/w1/w1.netlink
new file mode 100644
index 000000000000..3640c7c87d45
--- /dev/null
+++ b/Documentation/w1/w1.netlink
@@ -0,0 +1,98 @@
1Userspace communication protocol over connector [1].
2
3
4Message types.
5=============
6
7There are three types of messages between w1 core and userspace:
81. Events. They are generated each time new master or slave device found
9 either due to automatic or requested search.
102. Userspace commands. Includes read/write and search/alarm search comamnds.
113. Replies to userspace commands.
12
13
14Protocol.
15========
16
17[struct cn_msg] - connector header. It's length field is equal to size of the attached data.
18[struct w1_netlink_msg] - w1 netlink header.
19 __u8 type - message type.
20 W1_SLAVE_ADD/W1_SLAVE_REMOVE - slave add/remove events.
21 W1_MASTER_ADD/W1_MASTER_REMOVE - master add/remove events.
22 W1_MASTER_CMD - userspace command for bus master device (search/alarm search).
23 W1_SLAVE_CMD - userspace command for slave device (read/write/ search/alarm search
24 for bus master device where given slave device found).
25 __u8 res - reserved
26 __u16 len - size of attached to this header data.
27 union {
28 __u8 id; - slave unique device id
29 struct w1_mst {
30 __u32 id; - master's id.
31 __u32 res; - reserved
32 } mst;
33 } id;
34
35[strucrt w1_netlink_cmd] - command for gived master or slave device.
36 __u8 cmd - command opcode.
37 W1_CMD_READ - read command.
38 W1_CMD_WRITE - write command.
39 W1_CMD_SEARCH - search command.
40 W1_CMD_ALARM_SEARCH - alarm search command.
41 __u8 res - reserved
42 __u16 len - length of data for this command.
43 For read command data must be allocated like for write command.
44 __u8 data[0] - data for this command.
45
46
47Each connector message can include one or more w1_netlink_msg with zero of more attached w1_netlink_cmd messages.
48
49For event messages there are no w1_netlink_cmd embedded structures, only connector header
50and w1_netlink_msg strucutre with "len" field being zero and filled type (one of event types)
51and id - either 8 bytes of slave unique id in host order, or master's id, which is assigned
52to bus master device when it is added to w1 core.
53
54Currently replies to userspace commands are only generated for read command request.
55One reply is generated exactly for one w1_netlink_cmd read request.
56Replies are not combined when sent - i.e. typical reply messages looks like the following:
57[cn_msg][w1_netlink_msg][w1_netlink_cmd]
58cn_msg.len = sizeof(struct w1_netlink_msg) + sizeof(struct w1_netlink_cmd) + cmd->len;
59w1_netlink_msg.len = sizeof(struct w1_netlink_cmd) + cmd->len;
60w1_netlink_cmd.len = cmd->len;
61
62
63Operation steps in w1 core when new command is received.
64=======================================================
65
66When new message (w1_netlink_msg) is received w1 core detects if it is master of slave request,
67according to w1_netlink_msg.type field.
68Then master or slave device is searched for.
69When found, master device (requested or those one on where slave device is found) is locked.
70If slave command is requested, then reset/select procedure is started to select given device.
71
72Then all requested in w1_netlink_msg operations are performed one by one.
73If command requires reply (like read command) it is sent on command completion.
74
75When all commands (w1_netlink_cmd) are processed muster device is unlocked
76and next w1_netlink_msg header processing started.
77
78
79Connector [1] specific documentation.
80====================================
81
82Each connector message includes two u32 fields as "address".
83w1 uses CN_W1_IDX and CN_W1_VAL defined in include/linux/connector.h header.
84Each message also includes sequence and acknowledge numbers.
85Sequence number for event messages is appropriate bus master sequence number increased with
86each event message sent "through" this master.
87Sequence number for userspace requests is set by userspace application.
88Sequence number for reply is the same as was in request, and
89acknowledge number is set to seq+1.
90
91
92Additional documantion, source code examples.
93============================================
94
951. Documentation/connector
962. http://tservice.net.ru/~s0mbre/archive/w1
97This archive includes userspace application w1d.c which
98uses read/write/search commands for all master/slave devices found on the bus.
diff --git a/Documentation/watchdog/pcwd-watchdog.txt b/Documentation/watchdog/pcwd-watchdog.txt
index 12187a33e310..d9ee6336c1d4 100644
--- a/Documentation/watchdog/pcwd-watchdog.txt
+++ b/Documentation/watchdog/pcwd-watchdog.txt
@@ -22,78 +22,9 @@
22 to run the program with an "&" to run it in the background!) 22 to run the program with an "&" to run it in the background!)
23 23
24 If you want to write a program to be compatible with the PC Watchdog 24 If you want to write a program to be compatible with the PC Watchdog
25 driver, simply do the following: 25 driver, simply use of modify the watchdog test program:
26 26 Documentation/watchdog/src/watchdog-test.c
27-- Snippet of code -- 27
28/*
29 * Watchdog Driver Test Program
30 */
31
32#include <stdio.h>
33#include <stdlib.h>
34#include <string.h>
35#include <unistd.h>
36#include <fcntl.h>
37#include <sys/ioctl.h>
38#include <linux/types.h>
39#include <linux/watchdog.h>
40
41int fd;
42
43/*
44 * This function simply sends an IOCTL to the driver, which in turn ticks
45 * the PC Watchdog card to reset its internal timer so it doesn't trigger
46 * a computer reset.
47 */
48void keep_alive(void)
49{
50 int dummy;
51
52 ioctl(fd, WDIOC_KEEPALIVE, &dummy);
53}
54
55/*
56 * The main program. Run the program with "-d" to disable the card,
57 * or "-e" to enable the card.
58 */
59int main(int argc, char *argv[])
60{
61 fd = open("/dev/watchdog", O_WRONLY);
62
63 if (fd == -1) {
64 fprintf(stderr, "Watchdog device not enabled.\n");
65 fflush(stderr);
66 exit(-1);
67 }
68
69 if (argc > 1) {
70 if (!strncasecmp(argv[1], "-d", 2)) {
71 ioctl(fd, WDIOC_SETOPTIONS, WDIOS_DISABLECARD);
72 fprintf(stderr, "Watchdog card disabled.\n");
73 fflush(stderr);
74 exit(0);
75 } else if (!strncasecmp(argv[1], "-e", 2)) {
76 ioctl(fd, WDIOC_SETOPTIONS, WDIOS_ENABLECARD);
77 fprintf(stderr, "Watchdog card enabled.\n");
78 fflush(stderr);
79 exit(0);
80 } else {
81 fprintf(stderr, "-d to disable, -e to enable.\n");
82 fprintf(stderr, "run by itself to tick the card.\n");
83 fflush(stderr);
84 exit(0);
85 }
86 } else {
87 fprintf(stderr, "Watchdog Ticking Away!\n");
88 fflush(stderr);
89 }
90
91 while(1) {
92 keep_alive();
93 sleep(1);
94 }
95}
96-- End snippet --
97 28
98 Other IOCTL functions include: 29 Other IOCTL functions include:
99 30
diff --git a/Documentation/watchdog/src/watchdog-simple.c b/Documentation/watchdog/src/watchdog-simple.c
new file mode 100644
index 000000000000..85cf17c48669
--- /dev/null
+++ b/Documentation/watchdog/src/watchdog-simple.c
@@ -0,0 +1,15 @@
1#include <stdlib.h>
2#include <fcntl.h>
3
4int main(int argc, const char *argv[]) {
5 int fd = open("/dev/watchdog", O_WRONLY);
6 if (fd == -1) {
7 perror("watchdog");
8 exit(1);
9 }
10 while (1) {
11 write(fd, "\0", 1);
12 fsync(fd);
13 sleep(10);
14 }
15}
diff --git a/Documentation/watchdog/src/watchdog-test.c b/Documentation/watchdog/src/watchdog-test.c
new file mode 100644
index 000000000000..65f6c19cb865
--- /dev/null
+++ b/Documentation/watchdog/src/watchdog-test.c
@@ -0,0 +1,68 @@
1/*
2 * Watchdog Driver Test Program
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <string.h>
8#include <unistd.h>
9#include <fcntl.h>
10#include <sys/ioctl.h>
11#include <linux/types.h>
12#include <linux/watchdog.h>
13
14int fd;
15
16/*
17 * This function simply sends an IOCTL to the driver, which in turn ticks
18 * the PC Watchdog card to reset its internal timer so it doesn't trigger
19 * a computer reset.
20 */
21void keep_alive(void)
22{
23 int dummy;
24
25 ioctl(fd, WDIOC_KEEPALIVE, &dummy);
26}
27
28/*
29 * The main program. Run the program with "-d" to disable the card,
30 * or "-e" to enable the card.
31 */
32int main(int argc, char *argv[])
33{
34 fd = open("/dev/watchdog", O_WRONLY);
35
36 if (fd == -1) {
37 fprintf(stderr, "Watchdog device not enabled.\n");
38 fflush(stderr);
39 exit(-1);
40 }
41
42 if (argc > 1) {
43 if (!strncasecmp(argv[1], "-d", 2)) {
44 ioctl(fd, WDIOC_SETOPTIONS, WDIOS_DISABLECARD);
45 fprintf(stderr, "Watchdog card disabled.\n");
46 fflush(stderr);
47 exit(0);
48 } else if (!strncasecmp(argv[1], "-e", 2)) {
49 ioctl(fd, WDIOC_SETOPTIONS, WDIOS_ENABLECARD);
50 fprintf(stderr, "Watchdog card enabled.\n");
51 fflush(stderr);
52 exit(0);
53 } else {
54 fprintf(stderr, "-d to disable, -e to enable.\n");
55 fprintf(stderr, "run by itself to tick the card.\n");
56 fflush(stderr);
57 exit(0);
58 }
59 } else {
60 fprintf(stderr, "Watchdog Ticking Away!\n");
61 fflush(stderr);
62 }
63
64 while(1) {
65 keep_alive();
66 sleep(1);
67 }
68}
diff --git a/Documentation/watchdog/watchdog-api.txt b/Documentation/watchdog/watchdog-api.txt
index 21ed51173662..958ff3d48be3 100644
--- a/Documentation/watchdog/watchdog-api.txt
+++ b/Documentation/watchdog/watchdog-api.txt
@@ -34,22 +34,7 @@ activates as soon as /dev/watchdog is opened and will reboot unless
34the watchdog is pinged within a certain time, this time is called the 34the watchdog is pinged within a certain time, this time is called the
35timeout or margin. The simplest way to ping the watchdog is to write 35timeout or margin. The simplest way to ping the watchdog is to write
36some data to the device. So a very simple watchdog daemon would look 36some data to the device. So a very simple watchdog daemon would look
37like this: 37like this source file: see Documentation/watchdog/src/watchdog-simple.c
38
39#include <stdlib.h>
40#include <fcntl.h>
41
42int main(int argc, const char *argv[]) {
43 int fd=open("/dev/watchdog",O_WRONLY);
44 if (fd==-1) {
45 perror("watchdog");
46 exit(1);
47 }
48 while(1) {
49 write(fd, "\0", 1);
50 sleep(10);
51 }
52}
53 38
54A more advanced driver could for example check that a HTTP server is 39A more advanced driver could for example check that a HTTP server is
55still responding before doing the write call to ping the watchdog. 40still responding before doing the write call to ping the watchdog.
@@ -110,7 +95,40 @@ current timeout using the GETTIMEOUT ioctl.
110 ioctl(fd, WDIOC_GETTIMEOUT, &timeout); 95 ioctl(fd, WDIOC_GETTIMEOUT, &timeout);
111 printf("The timeout was is %d seconds\n", timeout); 96 printf("The timeout was is %d seconds\n", timeout);
112 97
113Envinronmental monitoring: 98Pretimeouts:
99
100Some watchdog timers can be set to have a trigger go off before the
101actual time they will reset the system. This can be done with an NMI,
102interrupt, or other mechanism. This allows Linux to record useful
103information (like panic information and kernel coredumps) before it
104resets.
105
106 pretimeout = 10;
107 ioctl(fd, WDIOC_SETPRETIMEOUT, &pretimeout);
108
109Note that the pretimeout is the number of seconds before the time
110when the timeout will go off. It is not the number of seconds until
111the pretimeout. So, for instance, if you set the timeout to 60 seconds
112and the pretimeout to 10 seconds, the pretimout will go of in 50
113seconds. Setting a pretimeout to zero disables it.
114
115There is also a get function for getting the pretimeout:
116
117 ioctl(fd, WDIOC_GETPRETIMEOUT, &timeout);
118 printf("The pretimeout was is %d seconds\n", timeout);
119
120Not all watchdog drivers will support a pretimeout.
121
122Get the number of seconds before reboot:
123
124Some watchdog drivers have the ability to report the remaining time
125before the system will reboot. The WDIOC_GETTIMELEFT is the ioctl
126that returns the number of seconds before reboot.
127
128 ioctl(fd, WDIOC_GETTIMELEFT, &timeleft);
129 printf("The timeout was is %d seconds\n", timeleft);
130
131Environmental monitoring:
114 132
115All watchdog drivers are required return more information about the system, 133All watchdog drivers are required return more information about the system,
116some do temperature, fan and power level monitoring, some can tell you 134some do temperature, fan and power level monitoring, some can tell you
@@ -169,6 +187,10 @@ The watchdog saw a keepalive ping since it was last queried.
169 187
170 WDIOF_SETTIMEOUT Can set/get the timeout 188 WDIOF_SETTIMEOUT Can set/get the timeout
171 189
190The watchdog can do pretimeouts.
191
192 WDIOF_PRETIMEOUT Pretimeout (in seconds), get/set
193
172 194
173For those drivers that return any bits set in the option field, the 195For those drivers that return any bits set in the option field, the
174GETSTATUS and GETBOOTSTATUS ioctls can be used to ask for the current 196GETSTATUS and GETBOOTSTATUS ioctls can be used to ask for the current
diff --git a/Documentation/watchdog/watchdog.txt b/Documentation/watchdog/watchdog.txt
index dffda29c8799..4b1ff69cc19a 100644
--- a/Documentation/watchdog/watchdog.txt
+++ b/Documentation/watchdog/watchdog.txt
@@ -65,28 +65,7 @@ The external event interfaces on the WDT boards are not currently supported.
65Minor numbers are however allocated for it. 65Minor numbers are however allocated for it.
66 66
67 67
68Example Watchdog Driver 68Example Watchdog Driver: see Documentation/watchdog/src/watchdog-simple.c
69-----------------------
70
71#include <stdio.h>
72#include <unistd.h>
73#include <fcntl.h>
74
75int main(int argc, const char *argv[])
76{
77 int fd=open("/dev/watchdog",O_WRONLY);
78 if(fd==-1)
79 {
80 perror("watchdog");
81 exit(1);
82 }
83 while(1)
84 {
85 write(fd,"\0",1);
86 fsync(fd);
87 sleep(10);
88 }
89}
90 69
91 70
92Contact Information 71Contact Information
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index f2cd6ef53ff3..6887d44d2661 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -205,6 +205,27 @@ IOMMU
205 pages Prereserve that many 128K pages for the software IO bounce buffering. 205 pages Prereserve that many 128K pages for the software IO bounce buffering.
206 force Force all IO through the software TLB. 206 force Force all IO through the software TLB.
207 207
208 calgary=[64k,128k,256k,512k,1M,2M,4M,8M]
209 calgary=[translate_empty_slots]
210 calgary=[disable=<PCI bus number>]
211
212 64k,...,8M - Set the size of each PCI slot's translation table
213 when using the Calgary IOMMU. This is the size of the translation
214 table itself in main memory. The smallest table, 64k, covers an IO
215 space of 32MB; the largest, 8MB table, can cover an IO space of
216 4GB. Normally the kernel will make the right choice by itself.
217
218 translate_empty_slots - Enable translation even on slots that have
219 no devices attached to them, in case a device will be hotplugged
220 in the future.
221
222 disable=<PCI bus number> - Disable translation on a given PHB. For
223 example, the built-in graphics adapter resides on the first bridge
224 (PCI bus number 0); if translation (isolation) is enabled on this
225 bridge, X servers that access the hardware directly from user
226 space might stop working. Use this option if you have devices that
227 are accessed from userspace directly on some PCI host bridge.
228
208Debugging 229Debugging
209 230
210 oops=panic Always panic on oopses. Default is to just kill the process, 231 oops=panic Always panic on oopses. Default is to just kill the process,