aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/00-INDEX22
-rw-r--r--Documentation/ABI/obsolete/o2cb11
-rw-r--r--Documentation/ABI/stable/o2cb10
-rw-r--r--Documentation/ABI/stable/sysfs-class-ubi212
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci11
-rw-r--r--Documentation/ABI/testing/sysfs-class-bdi50
-rw-r--r--Documentation/ABI/testing/sysfs-ibft23
-rw-r--r--Documentation/ABI/testing/sysfs-ocfs289
-rw-r--r--Documentation/DMA-API.txt69
-rw-r--r--Documentation/DMA-attributes.txt24
-rw-r--r--Documentation/DMA-mapping.txt38
-rw-r--r--Documentation/DocBook/Makefile10
-rw-r--r--Documentation/DocBook/debugobjects.tmpl391
-rw-r--r--Documentation/DocBook/kernel-api.tmpl61
-rw-r--r--Documentation/DocBook/kernel-locking.tmpl35
-rw-r--r--Documentation/DocBook/kgdb.tmpl441
-rw-r--r--Documentation/DocBook/mac80211.tmpl335
-rw-r--r--Documentation/DocBook/rapidio.tmpl1
-rw-r--r--Documentation/DocBook/writing_usb_driver.tmpl14
-rw-r--r--Documentation/HOWTO30
-rw-r--r--Documentation/PCI/00-INDEX12
-rw-r--r--Documentation/PCI/PCIEBUS-HOWTO.txt (renamed from Documentation/PCIEBUS-HOWTO.txt)12
-rw-r--r--Documentation/PCI/pci-error-recovery.txt (renamed from Documentation/pci-error-recovery.txt)0
-rw-r--r--Documentation/PCI/pci.txt (renamed from Documentation/pci.txt)2
-rw-r--r--Documentation/PCI/pcieaer-howto.txt (renamed from Documentation/pcieaer-howto.txt)2
-rw-r--r--Documentation/SubmittingPatches106
-rw-r--r--Documentation/arm/Samsung-S3C24XX/NAND.txt30
-rw-r--r--Documentation/arm/Samsung-S3C24XX/Overview.txt2
-rw-r--r--Documentation/block/biodoc.txt2
-rw-r--r--Documentation/braille-console.txt34
-rw-r--r--Documentation/cciss.txt5
-rw-r--r--Documentation/cdrom/cdrom-standard.tex2
-rw-r--r--Documentation/cgroups.txt7
-rw-r--r--Documentation/cli-sti-removal.txt2
-rw-r--r--Documentation/controllers/devices.txt48
-rw-r--r--Documentation/controllers/resource_counter.txt181
-rw-r--r--Documentation/cpu-freq/governors.txt8
-rw-r--r--Documentation/cpu-freq/user-guide.txt14
-rw-r--r--Documentation/cpusets.txt105
-rw-r--r--Documentation/debugging-via-ohci1394.txt16
-rw-r--r--Documentation/device-mapper/dm-crypt.txt52
-rw-r--r--Documentation/dontdiff3
-rw-r--r--Documentation/early-userspace/README4
-rw-r--r--Documentation/fb/gxfb.txt52
-rw-r--r--Documentation/fb/intelfb.txt2
-rw-r--r--Documentation/fb/lxfb.txt52
-rw-r--r--Documentation/fb/metronomefb.txt16
-rw-r--r--Documentation/fb/modedb.txt4
-rw-r--r--Documentation/feature-removal-schedule.txt119
-rw-r--r--Documentation/filesystems/00-INDEX6
-rw-r--r--Documentation/filesystems/Locking5
-rw-r--r--Documentation/filesystems/ext4.txt12
-rw-r--r--Documentation/filesystems/nfs-rdma.txt256
-rw-r--r--Documentation/filesystems/nfsroot.txt (renamed from Documentation/nfsroot.txt)0
-rw-r--r--Documentation/filesystems/proc.txt59
-rw-r--r--Documentation/filesystems/rpc-cache.txt (renamed from Documentation/rpc-cache.txt)0
-rw-r--r--Documentation/filesystems/seq_file.txt294
-rw-r--r--Documentation/filesystems/sysfs-pci.txt1
-rw-r--r--Documentation/filesystems/sysfs.txt9
-rw-r--r--Documentation/filesystems/tmpfs.txt12
-rw-r--r--Documentation/filesystems/vfat.txt15
-rw-r--r--Documentation/filesystems/vfs.txt4
-rw-r--r--Documentation/filesystems/xfs.txt15
-rw-r--r--Documentation/firmware_class/firmware_sample_driver.c115
-rw-r--r--Documentation/firmware_class/firmware_sample_firmware_class.c207
-rw-r--r--Documentation/gpio.txt10
-rw-r--r--Documentation/highuid.txt2
-rw-r--r--Documentation/hwmon/adt74733
-rw-r--r--Documentation/hwmon/ibmaem37
-rw-r--r--Documentation/hwmon/sysfs-interface33
-rw-r--r--Documentation/hwmon/w83l785ts3
-rw-r--r--Documentation/i2c/functionality95
-rw-r--r--Documentation/i2c/smbus-protocol81
-rw-r--r--Documentation/i2c/writing-clients21
-rw-r--r--Documentation/i386/IO-APIC.txt2
-rw-r--r--Documentation/i386/boot.txt64
-rw-r--r--Documentation/ia64/kvm.txt82
-rw-r--r--Documentation/ide/ide-tape.txt211
-rw-r--r--Documentation/ide/ide.txt146
-rw-r--r--Documentation/ide/warm-plug-howto.txt13
-rw-r--r--Documentation/ioctl-number.txt2
-rw-r--r--Documentation/kbuild/kconfig-language.txt41
-rw-r--r--Documentation/kbuild/modules.txt9
-rw-r--r--Documentation/kdump/kdump.txt5
-rw-r--r--Documentation/kernel-doc-nano-HOWTO.txt99
-rw-r--r--Documentation/kernel-docs.txt8
-rw-r--r--Documentation/kernel-parameters.txt110
-rw-r--r--Documentation/keys-request-key.txt11
-rw-r--r--Documentation/keys.txt59
-rw-r--r--Documentation/kobject.txt2
-rw-r--r--Documentation/kprobes.txt51
-rw-r--r--Documentation/laptops/acer-wmi.txt2
-rw-r--r--Documentation/laptops/thinkpad-acpi.txt141
-rw-r--r--Documentation/leds-class.txt12
-rw-r--r--Documentation/lguest/lguest.c142
-rw-r--r--Documentation/lguest/lguest.txt19
-rw-r--r--Documentation/magic-number.txt1
-rw-r--r--Documentation/md.txt6
-rw-r--r--Documentation/memory-barriers.txt16
-rw-r--r--Documentation/mips/AU1xxx_IDE.README46
-rw-r--r--Documentation/networking/00-INDEX5
-rw-r--r--Documentation/networking/arcnet.txt2
-rw-r--r--Documentation/networking/bcm43xx.txt89
-rw-r--r--Documentation/networking/bridge.txt2
-rw-r--r--Documentation/networking/can.txt8
-rw-r--r--Documentation/networking/ip-sysctl.txt12
-rw-r--r--Documentation/networking/phy.txt38
-rw-r--r--Documentation/networking/s2io.txt6
-rw-r--r--Documentation/networking/sk98lin.txt568
-rw-r--r--Documentation/networking/wan-router.txt621
-rw-r--r--Documentation/nmi_watchdog.txt3
-rw-r--r--Documentation/oops-tracing.txt4
-rw-r--r--Documentation/power/devices.txt5
-rw-r--r--Documentation/powerpc/booting-without-of.txt666
-rw-r--r--Documentation/powerpc/kvm_440.txt41
-rw-r--r--Documentation/powerpc/mpc52xx-device-tree-bindings.txt23
-rw-r--r--Documentation/powerpc/phyp-assisted-dump.txt127
-rw-r--r--Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c96
-rw-r--r--Documentation/prctl/disable-tsc-on-off-stress-test.c95
-rw-r--r--Documentation/prctl/disable-tsc-test.c94
-rw-r--r--Documentation/s390/CommonIO11
-rw-r--r--Documentation/s390/kvm.txt125
-rw-r--r--Documentation/s390/s390dbf.txt21
-rw-r--r--Documentation/sched-rt-group.txt59
-rw-r--r--Documentation/scheduler/00-INDEX2
-rw-r--r--Documentation/scheduler/sched-design.txt165
-rw-r--r--Documentation/scheduler/sched-rt-group.txt177
-rw-r--r--Documentation/scsi/ChangeLog.megaraid_sas22
-rw-r--r--Documentation/scsi/st.txt12
-rw-r--r--Documentation/smart-config.txt98
-rw-r--r--Documentation/sound/alsa/ALSA-Configuration.txt48
-rw-r--r--Documentation/spi/spi-summary15
-rw-r--r--Documentation/spi/spidev168
-rw-r--r--Documentation/spi/spidev_fdx.c158
-rw-r--r--Documentation/spinlocks.txt22
-rw-r--r--Documentation/sysrq.txt2
-rw-r--r--Documentation/thermal/sysfs-api.txt33
-rw-r--r--Documentation/timers/highres.txt (renamed from Documentation/hrtimers/highres.txt)2
-rw-r--r--Documentation/timers/hrtimers.txt (renamed from Documentation/hrtimers/hrtimers.txt)0
-rw-r--r--Documentation/timers/timer_stats.txt (renamed from Documentation/hrtimer/timer_stats.txt)0
-rw-r--r--Documentation/unaligned-memory-access.txt4
-rw-r--r--Documentation/usb/anchors.txt50
-rw-r--r--Documentation/usb/callbacks.txt132
-rw-r--r--Documentation/usb/persist.txt43
-rw-r--r--Documentation/usb/usb-serial.txt7
-rw-r--r--Documentation/video4linux/CARDLIST.au08284
-rw-r--r--Documentation/video4linux/CARDLIST.bttv2
-rw-r--r--Documentation/video4linux/CARDLIST.cx238853
-rw-r--r--Documentation/video4linux/CARDLIST.cx889
-rw-r--r--Documentation/video4linux/CARDLIST.em28xx2
-rw-r--r--Documentation/video4linux/CARDLIST.saa713416
-rw-r--r--Documentation/video4linux/cx18.txt36
-rw-r--r--Documentation/video4linux/extract_xc3028.pl46
-rw-r--r--Documentation/vm/hugetlbpage.txt7
-rw-r--r--Documentation/vm/numa_memory_policy.txt281
-rw-r--r--Documentation/vm/pagemap.txt77
-rw-r--r--Documentation/vm/slabinfo.c37
-rw-r--r--Documentation/x86/pat.txt100
-rw-r--r--Documentation/x86_64/boot-options.txt5
159 files changed, 6762 insertions, 3110 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index fc8e7c7d182f..1977fab38656 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -25,8 +25,6 @@ DMA-API.txt
25 - DMA API, pci_ API & extensions for non-consistent memory machines. 25 - DMA API, pci_ API & extensions for non-consistent memory machines.
26DMA-ISA-LPC.txt 26DMA-ISA-LPC.txt
27 - How to do DMA with ISA (and LPC) devices. 27 - How to do DMA with ISA (and LPC) devices.
28DMA-mapping.txt
29 - info for PCI drivers using DMA portably across all platforms.
30DocBook/ 28DocBook/
31 - directory with DocBook templates etc. for kernel documentation. 29 - directory with DocBook templates etc. for kernel documentation.
32HOWTO 30HOWTO
@@ -43,8 +41,6 @@ ManagementStyle
43 - how to (attempt to) manage kernel hackers. 41 - how to (attempt to) manage kernel hackers.
44MSI-HOWTO.txt 42MSI-HOWTO.txt
45 - the Message Signaled Interrupts (MSI) Driver Guide HOWTO and FAQ. 43 - the Message Signaled Interrupts (MSI) Driver Guide HOWTO and FAQ.
46PCIEBUS-HOWTO.txt
47 - a guide describing the PCI Express Port Bus driver.
48RCU/ 44RCU/
49 - directory with info on RCU (read-copy update). 45 - directory with info on RCU (read-copy update).
50README.DAC960 46README.DAC960
@@ -167,10 +163,8 @@ highuid.txt
167 - notes on the change from 16 bit to 32 bit user/group IDs. 163 - notes on the change from 16 bit to 32 bit user/group IDs.
168hpet.txt 164hpet.txt
169 - High Precision Event Timer Driver for Linux. 165 - High Precision Event Timer Driver for Linux.
170hrtimer/ 166timers/
171 - info on the timer_stats debugging facility for timer (ab)use. 167 - info on the timer related topics
172hrtimers/
173 - info on the hrtimers subsystem for high-resolution kernel timers.
174hw_random.txt 168hw_random.txt
175 - info on Linux support for random number generator in i8xx chipsets. 169 - info on Linux support for random number generator in i8xx chipsets.
176hwmon/ 170hwmon/
@@ -271,8 +265,6 @@ netlabel/
271 - directory with information on the NetLabel subsystem. 265 - directory with information on the NetLabel subsystem.
272networking/ 266networking/
273 - directory with info on various aspects of networking with Linux. 267 - directory with info on various aspects of networking with Linux.
274nfsroot.txt
275 - short guide on setting up a diskless box with NFS root filesystem.
276nmi_watchdog.txt 268nmi_watchdog.txt
277 - info on NMI watchdog for SMP systems. 269 - info on NMI watchdog for SMP systems.
278nommu-mmap.txt 270nommu-mmap.txt
@@ -289,12 +281,6 @@ parport.txt
289 - how to use the parallel-port driver. 281 - how to use the parallel-port driver.
290parport-lowlevel.txt 282parport-lowlevel.txt
291 - description and usage of the low level parallel port functions. 283 - description and usage of the low level parallel port functions.
292pci-error-recovery.txt
293 - info on PCI error recovery.
294pci.txt
295 - info on the PCI subsystem for device driver authors.
296pcieaer-howto.txt
297 - the PCI Express Advanced Error Reporting Driver Guide HOWTO.
298pcmcia/ 284pcmcia/
299 - info on the Linux PCMCIA driver. 285 - info on the Linux PCMCIA driver.
300pi-futex.txt 286pi-futex.txt
@@ -321,8 +307,6 @@ robust-futexes.txt
321 - a description of what robust futexes are. 307 - a description of what robust futexes are.
322rocket.txt 308rocket.txt
323 - info on the Comtrol RocketPort multiport serial driver. 309 - info on the Comtrol RocketPort multiport serial driver.
324rpc-cache.txt
325 - introduction to the caching mechanisms in the sunrpc layer.
326rt-mutex-design.txt 310rt-mutex-design.txt
327 - description of the RealTime mutex implementation design. 311 - description of the RealTime mutex implementation design.
328rt-mutex.txt 312rt-mutex.txt
@@ -345,8 +329,6 @@ sgi-visws.txt
345 - short blurb on the SGI Visual Workstations. 329 - short blurb on the SGI Visual Workstations.
346sh/ 330sh/
347 - directory with info on porting Linux to a new architecture. 331 - directory with info on porting Linux to a new architecture.
348smart-config.txt
349 - description of the Smart Config makefile feature.
350sound/ 332sound/
351 - directory with info on sound card support. 333 - directory with info on sound card support.
352sparc/ 334sparc/
diff --git a/Documentation/ABI/obsolete/o2cb b/Documentation/ABI/obsolete/o2cb
new file mode 100644
index 000000000000..9c49d8e6c0cc
--- /dev/null
+++ b/Documentation/ABI/obsolete/o2cb
@@ -0,0 +1,11 @@
1What: /sys/o2cb symlink
2Date: Dec 2005
3KernelVersion: 2.6.16
4Contact: ocfs2-devel@oss.oracle.com
5Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink will
6 be removed when new versions of ocfs2-tools which know to look
7 in /sys/fs/o2cb are sufficiently prevalent. Don't code new
8 software to look here, it should try /sys/fs/o2cb instead.
9 See Documentation/ABI/stable/o2cb for more information on usage.
10Users: ocfs2-tools. It's sufficient to mail proposed changes to
11 ocfs2-devel@oss.oracle.com.
diff --git a/Documentation/ABI/stable/o2cb b/Documentation/ABI/stable/o2cb
new file mode 100644
index 000000000000..5eb1545e0b8d
--- /dev/null
+++ b/Documentation/ABI/stable/o2cb
@@ -0,0 +1,10 @@
1What: /sys/fs/o2cb/ (was /sys/o2cb)
2Date: Dec 2005
3KernelVersion: 2.6.16
4Contact: ocfs2-devel@oss.oracle.com
5Description: Ocfs2-tools looks at 'interface-revision' for versioning
6 information. Each logmask/ file controls a set of debug prints
7 and can be written into with the strings "allow", "deny", or
8 "off". Reading the file returns the current state.
9Users: ocfs2-tools. It's sufficient to mail proposed changes to
10 ocfs2-devel@oss.oracle.com.
diff --git a/Documentation/ABI/stable/sysfs-class-ubi b/Documentation/ABI/stable/sysfs-class-ubi
new file mode 100644
index 000000000000..18d471d9faea
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-class-ubi
@@ -0,0 +1,212 @@
1What: /sys/class/ubi/
2Date: July 2006
3KernelVersion: 2.6.22
4Contact: Artem Bityutskiy <dedekind@infradead.org>
5Description:
6 The ubi/ class sub-directory belongs to the UBI subsystem and
7 provides general UBI information, per-UBI device information
8 and per-UBI volume information.
9
10What: /sys/class/ubi/version
11Date: July 2006
12KernelVersion: 2.6.22
13Contact: Artem Bityutskiy <dedekind@infradead.org>
14Description:
15 This file contains version of the latest supported UBI on-media
16 format. Currently it is 1, and there is no plan to change this.
17 However, if in the future UBI needs on-flash format changes
18 which cannot be done in a compatible manner, a new format
19 version will be added. So this is a mechanism for possible
20 future backward-compatible (but forward-incompatible)
21 improvements.
22
23What: /sys/class/ubiX/
24Date: July 2006
25KernelVersion: 2.6.22
26Contact: Artem Bityutskiy <dedekind@infradead.org>
27Description:
28 The /sys/class/ubi0, /sys/class/ubi1, etc directories describe
29 UBI devices (UBI device 0, 1, etc). They contain general UBI
30 device information and per UBI volume information (each UBI
31 device may have many UBI volumes)
32
33What: /sys/class/ubi/ubiX/avail_eraseblocks
34Date: July 2006
35KernelVersion: 2.6.22
36Contact: Artem Bityutskiy <dedekind@infradead.org>
37Description:
38 Amount of available logical eraseblock. For example, one may
39 create a new UBI volume which has this amount of logical
40 eraseblocks.
41
42What: /sys/class/ubi/ubiX/bad_peb_count
43Date: July 2006
44KernelVersion: 2.6.22
45Contact: Artem Bityutskiy <dedekind@infradead.org>
46Description:
47 Count of bad physical eraseblocks on the underlying MTD device.
48
49What: /sys/class/ubi/ubiX/bgt_enabled
50Date: July 2006
51KernelVersion: 2.6.22
52Contact: Artem Bityutskiy <dedekind@infradead.org>
53Description:
54 Contains ASCII "0\n" if the UBI background thread is disabled,
55 and ASCII "1\n" if it is enabled.
56
57What: /sys/class/ubi/ubiX/dev
58Date: July 2006
59KernelVersion: 2.6.22
60Contact: Artem Bityutskiy <dedekind@infradead.org>
61Description:
62 Major and minor numbers of the character device corresponding
63 to this UBI device (in <major>:<minor> format).
64
65What: /sys/class/ubi/ubiX/eraseblock_size
66Date: July 2006
67KernelVersion: 2.6.22
68Contact: Artem Bityutskiy <dedekind@infradead.org>
69Description:
70 Maximum logical eraseblock size this UBI device may provide. UBI
71 volumes may have smaller logical eraseblock size because of their
72 alignment.
73
74What: /sys/class/ubi/ubiX/max_ec
75Date: July 2006
76KernelVersion: 2.6.22
77Contact: Artem Bityutskiy <dedekind@infradead.org>
78Description:
79 Maximum physical eraseblock erase counter value.
80
81What: /sys/class/ubi/ubiX/max_vol_count
82Date: July 2006
83KernelVersion: 2.6.22
84Contact: Artem Bityutskiy <dedekind@infradead.org>
85Description:
86 Maximum number of volumes which this UBI device may have.
87
88What: /sys/class/ubi/ubiX/min_io_size
89Date: July 2006
90KernelVersion: 2.6.22
91Contact: Artem Bityutskiy <dedekind@infradead.org>
92Description:
93 Minimum input/output unit size. All the I/O may only be done
94 in fractions of the contained number.
95
96What: /sys/class/ubi/ubiX/mtd_num
97Date: January 2008
98KernelVersion: 2.6.25
99Contact: Artem Bityutskiy <dedekind@infradead.org>
100Description:
101 Number of the underlying MTD device.
102
103What: /sys/class/ubi/ubiX/reserved_for_bad
104Date: July 2006
105KernelVersion: 2.6.22
106Contact: Artem Bityutskiy <dedekind@infradead.org>
107Description:
108 Number of physical eraseblocks reserved for bad block handling.
109
110What: /sys/class/ubi/ubiX/total_eraseblocks
111Date: July 2006
112KernelVersion: 2.6.22
113Contact: Artem Bityutskiy <dedekind@infradead.org>
114Description:
115 Total number of good (not marked as bad) physical eraseblocks on
116 the underlying MTD device.
117
118What: /sys/class/ubi/ubiX/volumes_count
119Date: July 2006
120KernelVersion: 2.6.22
121Contact: Artem Bityutskiy <dedekind@infradead.org>
122Description:
123 Count of volumes on this UBI device.
124
125What: /sys/class/ubi/ubiX/ubiX_Y/
126Date: July 2006
127KernelVersion: 2.6.22
128Contact: Artem Bityutskiy <dedekind@infradead.org>
129Description:
130 The /sys/class/ubi/ubiX/ubiX_0/, /sys/class/ubi/ubiX/ubiX_1/,
131 etc directories describe UBI volumes on UBI device X (volumes
132 0, 1, etc).
133
134What: /sys/class/ubi/ubiX/ubiX_Y/alignment
135Date: July 2006
136KernelVersion: 2.6.22
137Contact: Artem Bityutskiy <dedekind@infradead.org>
138Description:
139 Volume alignment - the value the logical eraseblock size of
140 this volume has to be aligned on. For example, 2048 means that
141 logical eraseblock size is multiple of 2048. In other words,
142 volume logical eraseblock size is UBI device logical eraseblock
143 size aligned to the alignment value.
144
145What: /sys/class/ubi/ubiX/ubiX_Y/corrupted
146Date: July 2006
147KernelVersion: 2.6.22
148Contact: Artem Bityutskiy <dedekind@infradead.org>
149Description:
150 Contains ASCII "0\n" if the UBI volume is OK, and ASCII "1\n"
151 if it is corrupted (e.g., due to an interrupted volume update).
152
153What: /sys/class/ubi/ubiX/ubiX_Y/data_bytes
154Date: July 2006
155KernelVersion: 2.6.22
156Contact: Artem Bityutskiy <dedekind@infradead.org>
157Description:
158 The amount of data this volume contains. This value makes sense
159 only for static volumes, and for dynamic volume it equivalent
160 to the total volume size in bytes.
161
162What: /sys/class/ubi/ubiX/ubiX_Y/dev
163Date: July 2006
164KernelVersion: 2.6.22
165Contact: Artem Bityutskiy <dedekind@infradead.org>
166Description:
167 Major and minor numbers of the character device corresponding
168 to this UBI volume (in <major>:<minor> format).
169
170What: /sys/class/ubi/ubiX/ubiX_Y/name
171Date: July 2006
172KernelVersion: 2.6.22
173Contact: Artem Bityutskiy <dedekind@infradead.org>
174Description:
175 Volume name.
176
177What: /sys/class/ubi/ubiX/ubiX_Y/reserved_ebs
178Date: July 2006
179KernelVersion: 2.6.22
180Contact: Artem Bityutskiy <dedekind@infradead.org>
181Description:
182 Count of physical eraseblock reserved for this volume.
183 Equivalent to the volume size in logical eraseblocks.
184
185What: /sys/class/ubi/ubiX/ubiX_Y/type
186Date: July 2006
187KernelVersion: 2.6.22
188Contact: Artem Bityutskiy <dedekind@infradead.org>
189Description:
190 Volume type. Contains ASCII "dynamic\n" for dynamic volumes and
191 "static\n" for static volumes.
192
193What: /sys/class/ubi/ubiX/ubiX_Y/upd_marker
194Date: July 2006
195KernelVersion: 2.6.22
196Contact: Artem Bityutskiy <dedekind@infradead.org>
197Description:
198 Contains ASCII "0\n" if the update marker is not set for this
199 volume, and "1\n" if it is set. The update marker is set when
200 volume update starts, and cleaned when it ends. So the presence
201 of the update marker indicates that the volume is being updated
202 at the moment of the update was interrupted. The later may be
203 checked using the "corrupted" sysfs file.
204
205What: /sys/class/ubi/ubiX/ubiX_Y/usable_eb_size
206Date: July 2006
207KernelVersion: 2.6.22
208Contact: Artem Bityutskiy <dedekind@infradead.org>
209Description:
210 Logical eraseblock size of this volume. Equivalent to logical
211 eraseblock size of the device aligned on the volume alignment
212 value.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
new file mode 100644
index 000000000000..ceddcff4082a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -0,0 +1,11 @@
1What: /sys/bus/pci/devices/.../vpd
2Date: February 2008
3Contact: Ben Hutchings <bhutchings@solarflare.com>
4Description:
5 A file named vpd in a device directory will be a
6 binary file containing the Vital Product Data for the
7 device. It should follow the VPD format defined in
8 PCI Specification 2.1 or 2.2, but users should consider
9 that some devices may have malformatted data. If the
10 underlying VPD has a writable section then the
11 corresponding section of this file will be writable.
diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi
new file mode 100644
index 000000000000..5f500977b42f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@@ -0,0 +1,50 @@
1What: /sys/class/bdi/<bdi>/
2Date: January 2008
3Contact: Peter Zijlstra <a.p.zijlstra@chello.nl>
4Description:
5
6Provide a place in sysfs for the backing_dev_info object. This allows
7setting and retrieving various BDI specific variables.
8
9The <bdi> identifier can be either of the following:
10
11MAJOR:MINOR
12
13 Device number for block devices, or value of st_dev on
14 non-block filesystems which provide their own BDI, such as NFS
15 and FUSE.
16
17MAJOR:MINOR-fuseblk
18
19 Value of st_dev on fuseblk filesystems.
20
21default
22
23 The default backing dev, used for non-block device backed
24 filesystems which do not provide their own BDI.
25
26Files under /sys/class/bdi/<bdi>/
27---------------------------------
28
29read_ahead_kb (read-write)
30
31 Size of the read-ahead window in kilobytes
32
33min_ratio (read-write)
34
35 Under normal circumstances each device is given a part of the
36 total write-back cache that relates to its current average
37 writeout speed in relation to the other devices.
38
39 The 'min_ratio' parameter allows assigning a minimum
40 percentage of the write-back cache to a particular device.
41 For example, this is useful for providing a minimum QoS.
42
43max_ratio (read-write)
44
45 Allows limiting a particular device to use not more than the
46 given percentage of the write-back cache. This is useful in
47 situations where we want to avoid one device taking all or
48 most of the write-back cache. For example in case of an NFS
49 mount that is prone to get stuck, or a FUSE mount which cannot
50 be trusted to play fair.
diff --git a/Documentation/ABI/testing/sysfs-ibft b/Documentation/ABI/testing/sysfs-ibft
new file mode 100644
index 000000000000..c2b7d1154bec
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-ibft
@@ -0,0 +1,23 @@
1What: /sys/firmware/ibft/initiator
2Date: November 2007
3Contact: Konrad Rzeszutek <ketuzsezr@darnok.org>
4Description: The /sys/firmware/ibft/initiator directory will contain
5 files that expose the iSCSI Boot Firmware Table initiator data.
6 Usually this contains the Initiator name.
7
8What: /sys/firmware/ibft/targetX
9Date: November 2007
10Contact: Konrad Rzeszutek <ketuzsezr@darnok.org>
11Description: The /sys/firmware/ibft/targetX directory will contain
12 files that expose the iSCSI Boot Firmware Table target data.
13 Usually this contains the target's IP address, boot LUN,
14 target name, and what NIC it is associated with. It can also
15 contain the CHAP name (and password), the reverse CHAP
16 name (and password)
17
18What: /sys/firmware/ibft/ethernetX
19Date: November 2007
20Contact: Konrad Rzeszutek <ketuzsezr@darnok.org>
21Description: The /sys/firmware/ibft/ethernetX directory will contain
22 files that expose the iSCSI Boot Firmware Table NIC data.
23 This can this can the IP address, MAC, and gateway of the NIC.
diff --git a/Documentation/ABI/testing/sysfs-ocfs2 b/Documentation/ABI/testing/sysfs-ocfs2
new file mode 100644
index 000000000000..b7cc516a8a8a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-ocfs2
@@ -0,0 +1,89 @@
1What: /sys/fs/ocfs2/
2Date: April 2008
3Contact: ocfs2-devel@oss.oracle.com
4Description:
5 The /sys/fs/ocfs2 directory contains knobs used by the
6 ocfs2-tools to interact with the filesystem.
7
8What: /sys/fs/ocfs2/max_locking_protocol
9Date: April 2008
10Contact: ocfs2-devel@oss.oracle.com
11Description:
12 The /sys/fs/ocfs2/max_locking_protocol file displays version
13 of ocfs2 locking supported by the filesystem. This version
14 covers how ocfs2 uses distributed locking between cluster
15 nodes.
16
17 The protocol version has a major and minor number. Two
18 cluster nodes can interoperate if they have an identical
19 major number and an overlapping minor number - thus,
20 a node with version 1.10 can interoperate with a node
21 sporting version 1.8, as long as both use the 1.8 protocol.
22
23 Reading from this file returns a single line, the major
24 number and minor number joined by a period, eg "1.10".
25
26 This file is read-only. The value is compiled into the
27 driver.
28
29What: /sys/fs/ocfs2/loaded_cluster_plugins
30Date: April 2008
31Contact: ocfs2-devel@oss.oracle.com
32Description:
33 The /sys/fs/ocfs2/loaded_cluster_plugins file describes
34 the available plugins to support ocfs2 cluster operation.
35 A cluster plugin is required to use ocfs2 in a cluster.
36 There are currently two available plugins:
37
38 * 'o2cb' - The classic o2cb cluster stack that ocfs2 has
39 used since its inception.
40 * 'user' - A plugin supporting userspace cluster software
41 in conjunction with fs/dlm.
42
43 Reading from this file returns the names of all loaded
44 plugins, one per line.
45
46 This file is read-only. Its contents may change as
47 plugins are loaded or removed.
48
49What: /sys/fs/ocfs2/active_cluster_plugin
50Date: April 2008
51Contact: ocfs2-devel@oss.oracle.com
52Description:
53 The /sys/fs/ocfs2/active_cluster_plugin displays which
54 cluster plugin is currently in use by the filesystem.
55 The active plugin will appear in the loaded_cluster_plugins
56 file as well. Only one plugin can be used at a time.
57
58 Reading from this file returns the name of the active plugin
59 on a single line.
60
61 This file is read-only. Which plugin is active depends on
62 the cluster stack in use. The contents may change
63 when all filesystems are unmounted and the cluster stack
64 is changed.
65
66What: /sys/fs/ocfs2/cluster_stack
67Date: April 2008
68Contact: ocfs2-devel@oss.oracle.com
69Description:
70 The /sys/fs/ocfs2/cluster_stack file contains the name
71 of current ocfs2 cluster stack. This value is set by
72 userspace tools when bringing the cluster stack online.
73
74 Cluster stack names are 4 characters in length.
75
76 When the 'o2cb' cluster stack is used, the 'o2cb' cluster
77 plugin is active. All other cluster stacks use the 'user'
78 cluster plugin.
79
80 Reading from this file returns the name of the current
81 cluster stack on a single line.
82
83 Writing a new stack name to this file changes the current
84 cluster stack unless there are mounted ocfs2 filesystems.
85 If there are mounted filesystems, attempts to change the
86 stack return an error.
87
88Users:
89 ocfs2-tools <ocfs2-tools-devel@oss.oracle.com>
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index b939ebb62871..80d150458c80 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -145,7 +145,7 @@ Part Ic - DMA addressing limitations
145int 145int
146dma_supported(struct device *dev, u64 mask) 146dma_supported(struct device *dev, u64 mask)
147int 147int
148pci_dma_supported(struct device *dev, u64 mask) 148pci_dma_supported(struct pci_dev *hwdev, u64 mask)
149 149
150Checks to see if the device can support DMA to the memory described by 150Checks to see if the device can support DMA to the memory described by
151mask. 151mask.
@@ -189,7 +189,7 @@ dma_addr_t
189dma_map_single(struct device *dev, void *cpu_addr, size_t size, 189dma_map_single(struct device *dev, void *cpu_addr, size_t size,
190 enum dma_data_direction direction) 190 enum dma_data_direction direction)
191dma_addr_t 191dma_addr_t
192pci_map_single(struct device *dev, void *cpu_addr, size_t size, 192pci_map_single(struct pci_dev *hwdev, void *cpu_addr, size_t size,
193 int direction) 193 int direction)
194 194
195Maps a piece of processor virtual memory so it can be accessed by the 195Maps a piece of processor virtual memory so it can be accessed by the
@@ -395,6 +395,71 @@ Notes: You must do this:
395 395
396See also dma_map_single(). 396See also dma_map_single().
397 397
398dma_addr_t
399dma_map_single_attrs(struct device *dev, void *cpu_addr, size_t size,
400 enum dma_data_direction dir,
401 struct dma_attrs *attrs)
402
403void
404dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr,
405 size_t size, enum dma_data_direction dir,
406 struct dma_attrs *attrs)
407
408int
409dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
410 int nents, enum dma_data_direction dir,
411 struct dma_attrs *attrs)
412
413void
414dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
415 int nents, enum dma_data_direction dir,
416 struct dma_attrs *attrs)
417
418The four functions above are just like the counterpart functions
419without the _attrs suffixes, except that they pass an optional
420struct dma_attrs*.
421
422struct dma_attrs encapsulates a set of "dma attributes". For the
423definition of struct dma_attrs see linux/dma-attrs.h.
424
425The interpretation of dma attributes is architecture-specific, and
426each attribute should be documented in Documentation/DMA-attributes.txt.
427
428If struct dma_attrs* is NULL, the semantics of each of these
429functions is identical to those of the corresponding function
430without the _attrs suffix. As a result dma_map_single_attrs()
431can generally replace dma_map_single(), etc.
432
433As an example of the use of the *_attrs functions, here's how
434you could pass an attribute DMA_ATTR_FOO when mapping memory
435for DMA:
436
437#include <linux/dma-attrs.h>
438/* DMA_ATTR_FOO should be defined in linux/dma-attrs.h and
439 * documented in Documentation/DMA-attributes.txt */
440...
441
442 DEFINE_DMA_ATTRS(attrs);
443 dma_set_attr(DMA_ATTR_FOO, &attrs);
444 ....
445 n = dma_map_sg_attrs(dev, sg, nents, DMA_TO_DEVICE, &attr);
446 ....
447
448Architectures that care about DMA_ATTR_FOO would check for its
449presence in their implementations of the mapping and unmapping
450routines, e.g.:
451
452void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr,
453 size_t size, enum dma_data_direction dir,
454 struct dma_attrs *attrs)
455{
456 ....
457 int foo = dma_get_attr(DMA_ATTR_FOO, attrs);
458 ....
459 if (foo)
460 /* twizzle the frobnozzle */
461 ....
462
398 463
399Part II - Advanced dma_ usage 464Part II - Advanced dma_ usage
400----------------------------- 465-----------------------------
diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
new file mode 100644
index 000000000000..6d772f84b477
--- /dev/null
+++ b/Documentation/DMA-attributes.txt
@@ -0,0 +1,24 @@
1 DMA attributes
2 ==============
3
4This document describes the semantics of the DMA attributes that are
5defined in linux/dma-attrs.h.
6
7DMA_ATTR_WRITE_BARRIER
8----------------------
9
10DMA_ATTR_WRITE_BARRIER is a (write) barrier attribute for DMA. DMA
11to a memory region with the DMA_ATTR_WRITE_BARRIER attribute forces
12all pending DMA writes to complete, and thus provides a mechanism to
13strictly order DMA from a device across all intervening busses and
14bridges. This barrier is not specific to a particular type of
15interconnect, it applies to the system as a whole, and so its
16implementation must account for the idiosyncracies of the system all
17the way from the DMA device to memory.
18
19As an example of a situation where DMA_ATTR_WRITE_BARRIER would be
20useful, suppose that a device does a DMA write to indicate that data is
21ready and available in memory. The DMA of the "completion indication"
22could race with data DMA. Mapping the memory used for completion
23indications with DMA_ATTR_WRITE_BARRIER would prevent the race.
24
diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt
index d84f89dbf921..b463ecd0c7ce 100644
--- a/Documentation/DMA-mapping.txt
+++ b/Documentation/DMA-mapping.txt
@@ -315,11 +315,11 @@ you should do:
315 315
316 dma_addr_t dma_handle; 316 dma_addr_t dma_handle;
317 317
318 cpu_addr = pci_alloc_consistent(dev, size, &dma_handle); 318 cpu_addr = pci_alloc_consistent(pdev, size, &dma_handle);
319 319
320where dev is a struct pci_dev *. You should pass NULL for PCI like buses 320where pdev is a struct pci_dev *. This may be called in interrupt context.
321where devices don't have struct pci_dev (like ISA, EISA). This may be 321You should use dma_alloc_coherent (see DMA-API.txt) for buses
322called in interrupt context. 322where devices don't have struct pci_dev (like ISA, EISA).
323 323
324This argument is needed because the DMA translations may be bus 324This argument is needed because the DMA translations may be bus
325specific (and often is private to the bus which the device is attached 325specific (and often is private to the bus which the device is attached
@@ -332,7 +332,7 @@ __get_free_pages (but takes size instead of a page order). If your
332driver needs regions sized smaller than a page, you may prefer using 332driver needs regions sized smaller than a page, you may prefer using
333the pci_pool interface, described below. 333the pci_pool interface, described below.
334 334
335The consistent DMA mapping interfaces, for non-NULL dev, will by 335The consistent DMA mapping interfaces, for non-NULL pdev, will by
336default return a DMA address which is SAC (Single Address Cycle) 336default return a DMA address which is SAC (Single Address Cycle)
337addressable. Even if the device indicates (via PCI dma mask) that it 337addressable. Even if the device indicates (via PCI dma mask) that it
338may address the upper 32-bits and thus perform DAC cycles, consistent 338may address the upper 32-bits and thus perform DAC cycles, consistent
@@ -354,9 +354,9 @@ buffer you receive will not cross a 64K boundary.
354 354
355To unmap and free such a DMA region, you call: 355To unmap and free such a DMA region, you call:
356 356
357 pci_free_consistent(dev, size, cpu_addr, dma_handle); 357 pci_free_consistent(pdev, size, cpu_addr, dma_handle);
358 358
359where dev, size are the same as in the above call and cpu_addr and 359where pdev, size are the same as in the above call and cpu_addr and
360dma_handle are the values pci_alloc_consistent returned to you. 360dma_handle are the values pci_alloc_consistent returned to you.
361This function may not be called in interrupt context. 361This function may not be called in interrupt context.
362 362
@@ -371,9 +371,9 @@ Create a pci_pool like this:
371 371
372 struct pci_pool *pool; 372 struct pci_pool *pool;
373 373
374 pool = pci_pool_create(name, dev, size, align, alloc); 374 pool = pci_pool_create(name, pdev, size, align, alloc);
375 375
376The "name" is for diagnostics (like a kmem_cache name); dev and size 376The "name" is for diagnostics (like a kmem_cache name); pdev and size
377are as above. The device's hardware alignment requirement for this 377are as above. The device's hardware alignment requirement for this
378type of data is "align" (which is expressed in bytes, and must be a 378type of data is "align" (which is expressed in bytes, and must be a
379power of two). If your device has no boundary crossing restrictions, 379power of two). If your device has no boundary crossing restrictions,
@@ -472,11 +472,11 @@ To map a single region, you do:
472 void *addr = buffer->ptr; 472 void *addr = buffer->ptr;
473 size_t size = buffer->len; 473 size_t size = buffer->len;
474 474
475 dma_handle = pci_map_single(dev, addr, size, direction); 475 dma_handle = pci_map_single(pdev, addr, size, direction);
476 476
477and to unmap it: 477and to unmap it:
478 478
479 pci_unmap_single(dev, dma_handle, size, direction); 479 pci_unmap_single(pdev, dma_handle, size, direction);
480 480
481You should call pci_unmap_single when the DMA activity is finished, e.g. 481You should call pci_unmap_single when the DMA activity is finished, e.g.
482from the interrupt which told you that the DMA transfer is done. 482from the interrupt which told you that the DMA transfer is done.
@@ -493,17 +493,17 @@ Specifically:
493 unsigned long offset = buffer->offset; 493 unsigned long offset = buffer->offset;
494 size_t size = buffer->len; 494 size_t size = buffer->len;
495 495
496 dma_handle = pci_map_page(dev, page, offset, size, direction); 496 dma_handle = pci_map_page(pdev, page, offset, size, direction);
497 497
498 ... 498 ...
499 499
500 pci_unmap_page(dev, dma_handle, size, direction); 500 pci_unmap_page(pdev, dma_handle, size, direction);
501 501
502Here, "offset" means byte offset within the given page. 502Here, "offset" means byte offset within the given page.
503 503
504With scatterlists, you map a region gathered from several regions by: 504With scatterlists, you map a region gathered from several regions by:
505 505
506 int i, count = pci_map_sg(dev, sglist, nents, direction); 506 int i, count = pci_map_sg(pdev, sglist, nents, direction);
507 struct scatterlist *sg; 507 struct scatterlist *sg;
508 508
509 for_each_sg(sglist, sg, count, i) { 509 for_each_sg(sglist, sg, count, i) {
@@ -527,7 +527,7 @@ accessed sg->address and sg->length as shown above.
527 527
528To unmap a scatterlist, just call: 528To unmap a scatterlist, just call:
529 529
530 pci_unmap_sg(dev, sglist, nents, direction); 530 pci_unmap_sg(pdev, sglist, nents, direction);
531 531
532Again, make sure DMA activity has already finished. 532Again, make sure DMA activity has already finished.
533 533
@@ -550,11 +550,11 @@ correct copy of the DMA buffer.
550So, firstly, just map it with pci_map_{single,sg}, and after each DMA 550So, firstly, just map it with pci_map_{single,sg}, and after each DMA
551transfer call either: 551transfer call either:
552 552
553 pci_dma_sync_single_for_cpu(dev, dma_handle, size, direction); 553 pci_dma_sync_single_for_cpu(pdev, dma_handle, size, direction);
554 554
555or: 555or:
556 556
557 pci_dma_sync_sg_for_cpu(dev, sglist, nents, direction); 557 pci_dma_sync_sg_for_cpu(pdev, sglist, nents, direction);
558 558
559as appropriate. 559as appropriate.
560 560
@@ -562,7 +562,7 @@ Then, if you wish to let the device get at the DMA area again,
562finish accessing the data with the cpu, and then before actually 562finish accessing the data with the cpu, and then before actually
563giving the buffer to the hardware call either: 563giving the buffer to the hardware call either:
564 564
565 pci_dma_sync_single_for_device(dev, dma_handle, size, direction); 565 pci_dma_sync_single_for_device(pdev, dma_handle, size, direction);
566 566
567or: 567or:
568 568
@@ -739,7 +739,7 @@ failure can be determined by:
739 739
740 dma_addr_t dma_handle; 740 dma_addr_t dma_handle;
741 741
742 dma_handle = pci_map_single(dev, addr, size, direction); 742 dma_handle = pci_map_single(pdev, addr, size, direction);
743 if (pci_dma_mapping_error(dma_handle)) { 743 if (pci_dma_mapping_error(dma_handle)) {
744 /* 744 /*
745 * reduce current DMA mapping usage, 745 * reduce current DMA mapping usage,
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 300e1707893f..0eb0d027eb32 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -9,9 +9,10 @@
9DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \ 9DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
10 kernel-hacking.xml kernel-locking.xml deviceiobook.xml \ 10 kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
11 procfs-guide.xml writing_usb_driver.xml networking.xml \ 11 procfs-guide.xml writing_usb_driver.xml networking.xml \
12 kernel-api.xml filesystems.xml lsm.xml usb.xml \ 12 kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ 13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
14 genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml 14 genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
15 mac80211.xml debugobjects.xml
15 16
16### 17###
17# The build process is as follows (targets): 18# The build process is as follows (targets):
@@ -186,8 +187,11 @@ quiet_cmd_fig2png = FIG2PNG $@
186 187
187### 188###
188# Rule to convert a .c file to inline XML documentation 189# Rule to convert a .c file to inline XML documentation
190 gen_xml = :
191 quiet_gen_xml = echo ' GEN $@'
192silent_gen_xml = :
189%.xml: %.c 193%.xml: %.c
190 @echo ' GEN $@' 194 @$($(quiet)gen_xml)
191 @( \ 195 @( \
192 echo "<programlisting>"; \ 196 echo "<programlisting>"; \
193 expand --tabs=8 < $< | \ 197 expand --tabs=8 < $< | \
diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl
new file mode 100644
index 000000000000..7f5f218015fe
--- /dev/null
+++ b/Documentation/DocBook/debugobjects.tmpl
@@ -0,0 +1,391 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4
5<book id="debug-objects-guide">
6 <bookinfo>
7 <title>Debug objects life time</title>
8
9 <authorgroup>
10 <author>
11 <firstname>Thomas</firstname>
12 <surname>Gleixner</surname>
13 <affiliation>
14 <address>
15 <email>tglx@linutronix.de</email>
16 </address>
17 </affiliation>
18 </author>
19 </authorgroup>
20
21 <copyright>
22 <year>2008</year>
23 <holder>Thomas Gleixner</holder>
24 </copyright>
25
26 <legalnotice>
27 <para>
28 This documentation is free software; you can redistribute
29 it and/or modify it under the terms of the GNU General Public
30 License version 2 as published by the Free Software Foundation.
31 </para>
32
33 <para>
34 This program is distributed in the hope that it will be
35 useful, but WITHOUT ANY WARRANTY; without even the implied
36 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
37 See the GNU General Public License for more details.
38 </para>
39
40 <para>
41 You should have received a copy of the GNU General Public
42 License along with this program; if not, write to the Free
43 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
44 MA 02111-1307 USA
45 </para>
46
47 <para>
48 For more details see the file COPYING in the source
49 distribution of Linux.
50 </para>
51 </legalnotice>
52 </bookinfo>
53
54<toc></toc>
55
56 <chapter id="intro">
57 <title>Introduction</title>
58 <para>
59 debugobjects is a generic infrastructure to track the life time
60 of kernel objects and validate the operations on those.
61 </para>
62 <para>
63 debugobjects is useful to check for the following error patterns:
64 <itemizedlist>
65 <listitem><para>Activation of uninitialized objects</para></listitem>
66 <listitem><para>Initialization of active objects</para></listitem>
67 <listitem><para>Usage of freed/destroyed objects</para></listitem>
68 </itemizedlist>
69 </para>
70 <para>
71 debugobjects is not changing the data structure of the real
72 object so it can be compiled in with a minimal runtime impact
73 and enabled on demand with a kernel command line option.
74 </para>
75 </chapter>
76
77 <chapter id="howto">
78 <title>Howto use debugobjects</title>
79 <para>
80 A kernel subsystem needs to provide a data structure which
81 describes the object type and add calls into the debug code at
82 appropriate places. The data structure to describe the object
83 type needs at minimum the name of the object type. Optional
84 functions can and should be provided to fixup detected problems
85 so the kernel can continue to work and the debug information can
86 be retrieved from a live system instead of hard core debugging
87 with serial consoles and stack trace transcripts from the
88 monitor.
89 </para>
90 <para>
91 The debug calls provided by debugobjects are:
92 <itemizedlist>
93 <listitem><para>debug_object_init</para></listitem>
94 <listitem><para>debug_object_init_on_stack</para></listitem>
95 <listitem><para>debug_object_activate</para></listitem>
96 <listitem><para>debug_object_deactivate</para></listitem>
97 <listitem><para>debug_object_destroy</para></listitem>
98 <listitem><para>debug_object_free</para></listitem>
99 </itemizedlist>
100 Each of these functions takes the address of the real object and
101 a pointer to the object type specific debug description
102 structure.
103 </para>
104 <para>
105 Each detected error is reported in the statistics and a limited
106 number of errors are printk'ed including a full stack trace.
107 </para>
108 <para>
109 The statistics are available via debugfs/debug_objects/stats.
110 They provide information about the number of warnings and the
111 number of successful fixups along with information about the
112 usage of the internal tracking objects and the state of the
113 internal tracking objects pool.
114 </para>
115 </chapter>
116 <chapter id="debugfunctions">
117 <title>Debug functions</title>
118 <sect1 id="prototypes">
119 <title>Debug object function reference</title>
120!Elib/debugobjects.c
121 </sect1>
122 <sect1 id="debug_object_init">
123 <title>debug_object_init</title>
124 <para>
125 This function is called whenever the initialization function
126 of a real object is called.
127 </para>
128 <para>
129 When the real object is already tracked by debugobjects it is
130 checked, whether the object can be initialized. Initializing
131 is not allowed for active and destroyed objects. When
132 debugobjects detects an error, then it calls the fixup_init
133 function of the object type description structure if provided
134 by the caller. The fixup function can correct the problem
135 before the real initialization of the object happens. E.g. it
136 can deactivate an active object in order to prevent damage to
137 the subsystem.
138 </para>
139 <para>
140 When the real object is not yet tracked by debugobjects,
141 debugobjects allocates a tracker object for the real object
142 and sets the tracker object state to ODEBUG_STATE_INIT. It
143 verifies that the object is not on the callers stack. If it is
144 on the callers stack then a limited number of warnings
145 including a full stack trace is printk'ed. The calling code
146 must use debug_object_init_on_stack() and remove the object
147 before leaving the function which allocated it. See next
148 section.
149 </para>
150 </sect1>
151
152 <sect1 id="debug_object_init_on_stack">
153 <title>debug_object_init_on_stack</title>
154 <para>
155 This function is called whenever the initialization function
156 of a real object which resides on the stack is called.
157 </para>
158 <para>
159 When the real object is already tracked by debugobjects it is
160 checked, whether the object can be initialized. Initializing
161 is not allowed for active and destroyed objects. When
162 debugobjects detects an error, then it calls the fixup_init
163 function of the object type description structure if provided
164 by the caller. The fixup function can correct the problem
165 before the real initialization of the object happens. E.g. it
166 can deactivate an active object in order to prevent damage to
167 the subsystem.
168 </para>
169 <para>
170 When the real object is not yet tracked by debugobjects
171 debugobjects allocates a tracker object for the real object
172 and sets the tracker object state to ODEBUG_STATE_INIT. It
173 verifies that the object is on the callers stack.
174 </para>
175 <para>
176 An object which is on the stack must be removed from the
177 tracker by calling debug_object_free() before the function
178 which allocates the object returns. Otherwise we keep track of
179 stale objects.
180 </para>
181 </sect1>
182
183 <sect1 id="debug_object_activate">
184 <title>debug_object_activate</title>
185 <para>
186 This function is called whenever the activation function of a
187 real object is called.
188 </para>
189 <para>
190 When the real object is already tracked by debugobjects it is
191 checked, whether the object can be activated. Activating is
192 not allowed for active and destroyed objects. When
193 debugobjects detects an error, then it calls the
194 fixup_activate function of the object type description
195 structure if provided by the caller. The fixup function can
196 correct the problem before the real activation of the object
197 happens. E.g. it can deactivate an active object in order to
198 prevent damage to the subsystem.
199 </para>
200 <para>
201 When the real object is not yet tracked by debugobjects then
202 the fixup_activate function is called if available. This is
203 necessary to allow the legitimate activation of statically
204 allocated and initialized objects. The fixup function checks
205 whether the object is valid and calls the debug_objects_init()
206 function to initialize the tracking of this object.
207 </para>
208 <para>
209 When the activation is legitimate, then the state of the
210 associated tracker object is set to ODEBUG_STATE_ACTIVE.
211 </para>
212 </sect1>
213
214 <sect1 id="debug_object_deactivate">
215 <title>debug_object_deactivate</title>
216 <para>
217 This function is called whenever the deactivation function of
218 a real object is called.
219 </para>
220 <para>
221 When the real object is tracked by debugobjects it is checked,
222 whether the object can be deactivated. Deactivating is not
223 allowed for untracked or destroyed objects.
224 </para>
225 <para>
226 When the deactivation is legitimate, then the state of the
227 associated tracker object is set to ODEBUG_STATE_INACTIVE.
228 </para>
229 </sect1>
230
231 <sect1 id="debug_object_destroy">
232 <title>debug_object_destroy</title>
233 <para>
234 This function is called to mark an object destroyed. This is
235 useful to prevent the usage of invalid objects, which are
236 still available in memory: either statically allocated objects
237 or objects which are freed later.
238 </para>
239 <para>
240 When the real object is tracked by debugobjects it is checked,
241 whether the object can be destroyed. Destruction is not
242 allowed for active and destroyed objects. When debugobjects
243 detects an error, then it calls the fixup_destroy function of
244 the object type description structure if provided by the
245 caller. The fixup function can correct the problem before the
246 real destruction of the object happens. E.g. it can deactivate
247 an active object in order to prevent damage to the subsystem.
248 </para>
249 <para>
250 When the destruction is legitimate, then the state of the
251 associated tracker object is set to ODEBUG_STATE_DESTROYED.
252 </para>
253 </sect1>
254
255 <sect1 id="debug_object_free">
256 <title>debug_object_free</title>
257 <para>
258 This function is called before an object is freed.
259 </para>
260 <para>
261 When the real object is tracked by debugobjects it is checked,
262 whether the object can be freed. Free is not allowed for
263 active objects. When debugobjects detects an error, then it
264 calls the fixup_free function of the object type description
265 structure if provided by the caller. The fixup function can
266 correct the problem before the real free of the object
267 happens. E.g. it can deactivate an active object in order to
268 prevent damage to the subsystem.
269 </para>
270 <para>
271 Note that debug_object_free removes the object from the
272 tracker. Later usage of the object is detected by the other
273 debug checks.
274 </para>
275 </sect1>
276 </chapter>
277 <chapter id="fixupfunctions">
278 <title>Fixup functions</title>
279 <sect1 id="debug_obj_descr">
280 <title>Debug object type description structure</title>
281!Iinclude/linux/debugobjects.h
282 </sect1>
283 <sect1 id="fixup_init">
284 <title>fixup_init</title>
285 <para>
286 This function is called from the debug code whenever a problem
287 in debug_object_init is detected. The function takes the
288 address of the object and the state which is currently
289 recorded in the tracker.
290 </para>
291 <para>
292 Called from debug_object_init when the object state is:
293 <itemizedlist>
294 <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
295 </itemizedlist>
296 </para>
297 <para>
298 The function returns 1 when the fixup was successful,
299 otherwise 0. The return value is used to update the
300 statistics.
301 </para>
302 <para>
303 Note, that the function needs to call the debug_object_init()
304 function again, after the damage has been repaired in order to
305 keep the state consistent.
306 </para>
307 </sect1>
308
309 <sect1 id="fixup_activate">
310 <title>fixup_activate</title>
311 <para>
312 This function is called from the debug code whenever a problem
313 in debug_object_activate is detected.
314 </para>
315 <para>
316 Called from debug_object_activate when the object state is:
317 <itemizedlist>
318 <listitem><para>ODEBUG_STATE_NOTAVAILABLE</para></listitem>
319 <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
320 </itemizedlist>
321 </para>
322 <para>
323 The function returns 1 when the fixup was successful,
324 otherwise 0. The return value is used to update the
325 statistics.
326 </para>
327 <para>
328 Note that the function needs to call the debug_object_activate()
329 function again after the damage has been repaired in order to
330 keep the state consistent.
331 </para>
332 <para>
333 The activation of statically initialized objects is a special
334 case. When debug_object_activate() has no tracked object for
335 this object address then fixup_activate() is called with
336 object state ODEBUG_STATE_NOTAVAILABLE. The fixup function
337 needs to check whether this is a legitimate case of a
338 statically initialized object or not. In case it is it calls
339 debug_object_init() and debug_object_activate() to make the
340 object known to the tracker and marked active. In this case
341 the function should return 0 because this is not a real fixup.
342 </para>
343 </sect1>
344
345 <sect1 id="fixup_destroy">
346 <title>fixup_destroy</title>
347 <para>
348 This function is called from the debug code whenever a problem
349 in debug_object_destroy is detected.
350 </para>
351 <para>
352 Called from debug_object_destroy when the object state is:
353 <itemizedlist>
354 <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
355 </itemizedlist>
356 </para>
357 <para>
358 The function returns 1 when the fixup was successful,
359 otherwise 0. The return value is used to update the
360 statistics.
361 </para>
362 </sect1>
363 <sect1 id="fixup_free">
364 <title>fixup_free</title>
365 <para>
366 This function is called from the debug code whenever a problem
367 in debug_object_free is detected. Further it can be called
368 from the debug checks in kfree/vfree, when an active object is
369 detected from the debug_check_no_obj_freed() sanity checks.
370 </para>
371 <para>
372 Called from debug_object_free() or debug_check_no_obj_freed()
373 when the object state is:
374 <itemizedlist>
375 <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
376 </itemizedlist>
377 </para>
378 <para>
379 The function returns 1 when the fixup was successful,
380 otherwise 0. The return value is used to update the
381 statistics.
382 </para>
383 </sect1>
384 </chapter>
385 <chapter id="bugs">
386 <title>Known Bugs And Assumptions</title>
387 <para>
388 None (knock on wood).
389 </para>
390 </chapter>
391</book>
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index dc0f30c3e571..b7b1482f6e04 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -119,7 +119,7 @@ X!Ilib/string.c
119!Elib/string.c 119!Elib/string.c
120 </sect1> 120 </sect1>
121 <sect1><title>Bit Operations</title> 121 <sect1><title>Bit Operations</title>
122!Iinclude/asm-x86/bitops_32.h 122!Iinclude/asm-x86/bitops.h
123 </sect1> 123 </sect1>
124 </chapter> 124 </chapter>
125 125
@@ -297,11 +297,6 @@ X!Earch/x86/kernel/mca_32.c
297!Ikernel/acct.c 297!Ikernel/acct.c
298 </chapter> 298 </chapter>
299 299
300 <chapter id="pmfuncs">
301 <title>Power Management</title>
302!Ekernel/power/pm.c
303 </chapter>
304
305 <chapter id="devdrivers"> 300 <chapter id="devdrivers">
306 <title>Device drivers infrastructure</title> 301 <title>Device drivers infrastructure</title>
307 <sect1><title>Device Drivers Base</title> 302 <sect1><title>Device Drivers Base</title>
@@ -650,4 +645,58 @@ X!Idrivers/video/console/fonts.c
650!Edrivers/i2c/i2c-core.c 645!Edrivers/i2c/i2c-core.c
651 </chapter> 646 </chapter>
652 647
648 <chapter id="clk">
649 <title>Clock Framework</title>
650
651 <para>
652 The clock framework defines programming interfaces to support
653 software management of the system clock tree.
654 This framework is widely used with System-On-Chip (SOC) platforms
655 to support power management and various devices which may need
656 custom clock rates.
657 Note that these "clocks" don't relate to timekeeping or real
658 time clocks (RTCs), each of which have separate frameworks.
659 These <structname>struct clk</structname> instances may be used
660 to manage for example a 96 MHz signal that is used to shift bits
661 into and out of peripherals or busses, or otherwise trigger
662 synchronous state machine transitions in system hardware.
663 </para>
664
665 <para>
666 Power management is supported by explicit software clock gating:
667 unused clocks are disabled, so the system doesn't waste power
668 changing the state of transistors that aren't in active use.
669 On some systems this may be backed by hardware clock gating,
670 where clocks are gated without being disabled in software.
671 Sections of chips that are powered but not clocked may be able
672 to retain their last state.
673 This low power state is often called a <emphasis>retention
674 mode</emphasis>.
675 This mode still incurs leakage currents, especially with finer
676 circuit geometries, but for CMOS circuits power is mostly used
677 by clocked state changes.
678 </para>
679
680 <para>
681 Power-aware drivers only enable their clocks when the device
682 they manage is in active use. Also, system sleep states often
683 differ according to which clock domains are active: while a
684 "standby" state may allow wakeup from several active domains, a
685 "mem" (suspend-to-RAM) state may require a more wholesale shutdown
686 of clocks derived from higher speed PLLs and oscillators, limiting
687 the number of possible wakeup event sources. A driver's suspend
688 method may need to be aware of system-specific clock constraints
689 on the target sleep state.
690 </para>
691
692 <para>
693 Some platforms support programmable clock generators. These
694 can be used by external chips of various kinds, such as other
695 CPUs, multimedia codecs, and devices with strict requirements
696 for interface clocking.
697 </para>
698
699!Iinclude/linux/clk.h
700 </chapter>
701
653</book> 702</book>
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
index 2e9d6b41f034..2510763295d0 100644
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -241,7 +241,7 @@
241 </para> 241 </para>
242 <para> 242 <para>
243 The third type is a semaphore 243 The third type is a semaphore
244 (<filename class="headerfile">include/asm/semaphore.h</filename>): it 244 (<filename class="headerfile">include/linux/semaphore.h</filename>): it
245 can have more than one holder at any time (the number decided at 245 can have more than one holder at any time (the number decided at
246 initialization time), although it is most commonly used as a 246 initialization time), although it is most commonly used as a
247 single-holder lock (a mutex). If you can't get a semaphore, your 247 single-holder lock (a mutex). If you can't get a semaphore, your
@@ -290,7 +290,7 @@
290 <para> 290 <para>
291 If you have a data structure which is only ever accessed from 291 If you have a data structure which is only ever accessed from
292 user context, then you can use a simple semaphore 292 user context, then you can use a simple semaphore
293 (<filename>linux/asm/semaphore.h</filename>) to protect it. This 293 (<filename>linux/linux/semaphore.h</filename>) to protect it. This
294 is the most trivial case: you initialize the semaphore to the number 294 is the most trivial case: you initialize the semaphore to the number
295 of resources available (usually 1), and call 295 of resources available (usually 1), and call
296 <function>down_interruptible()</function> to grab the semaphore, and 296 <function>down_interruptible()</function> to grab the semaphore, and
@@ -703,6 +703,31 @@
703</sect1> 703</sect1>
704</chapter> 704</chapter>
705 705
706<chapter id="trylock-functions">
707 <title>The trylock Functions</title>
708 <para>
709 There are functions that try to acquire a lock only once and immediately
710 return a value telling about success or failure to acquire the lock.
711 They can be used if you need no access to the data protected with the lock
712 when some other thread is holding the lock. You should acquire the lock
713 later if you then need access to the data protected with the lock.
714 </para>
715
716 <para>
717 <function>spin_trylock()</function> does not spin but returns non-zero if
718 it acquires the spinlock on the first try or 0 if not. This function can
719 be used in all contexts like <function>spin_lock</function>: you must have
720 disabled the contexts that might interrupt you and acquire the spin lock.
721 </para>
722
723 <para>
724 <function>mutex_trylock()</function> does not suspend your task
725 but returns non-zero if it could lock the mutex on the first try
726 or 0 if not. This function cannot be safely used in hardware or software
727 interrupt contexts despite not sleeping.
728 </para>
729</chapter>
730
706 <chapter id="Examples"> 731 <chapter id="Examples">
707 <title>Common Examples</title> 732 <title>Common Examples</title>
708 <para> 733 <para>
@@ -854,7 +879,7 @@ The change is shown below, in standard patch format: the
854 }; 879 };
855 880
856-static DEFINE_MUTEX(cache_lock); 881-static DEFINE_MUTEX(cache_lock);
857+static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED; 882+static DEFINE_SPINLOCK(cache_lock);
858 static LIST_HEAD(cache); 883 static LIST_HEAD(cache);
859 static unsigned int cache_num = 0; 884 static unsigned int cache_num = 0;
860 #define MAX_CACHE_SIZE 10 885 #define MAX_CACHE_SIZE 10
@@ -1238,7 +1263,7 @@ Here is the "lock-per-object" implementation:
1238- int popularity; 1263- int popularity;
1239 }; 1264 };
1240 1265
1241 static spinlock_t cache_lock = SPIN_LOCK_UNLOCKED; 1266 static DEFINE_SPINLOCK(cache_lock);
1242@@ -77,6 +84,7 @@ 1267@@ -77,6 +84,7 @@
1243 obj-&gt;id = id; 1268 obj-&gt;id = id;
1244 obj-&gt;popularity = 0; 1269 obj-&gt;popularity = 0;
@@ -1656,7 +1681,7 @@ the amount of locking which needs to be done.
1656 #include &lt;linux/slab.h&gt; 1681 #include &lt;linux/slab.h&gt;
1657 #include &lt;linux/string.h&gt; 1682 #include &lt;linux/string.h&gt;
1658+#include &lt;linux/rcupdate.h&gt; 1683+#include &lt;linux/rcupdate.h&gt;
1659 #include &lt;asm/semaphore.h&gt; 1684 #include &lt;linux/semaphore.h&gt;
1660 #include &lt;asm/errno.h&gt; 1685 #include &lt;asm/errno.h&gt;
1661 1686
1662 struct object 1687 struct object
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
new file mode 100644
index 000000000000..e8acd1f03456
--- /dev/null
+++ b/Documentation/DocBook/kgdb.tmpl
@@ -0,0 +1,441 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4
5<book id="kgdbOnLinux">
6 <bookinfo>
7 <title>Using kgdb and the kgdb Internals</title>
8
9 <authorgroup>
10 <author>
11 <firstname>Jason</firstname>
12 <surname>Wessel</surname>
13 <affiliation>
14 <address>
15 <email>jason.wessel@windriver.com</email>
16 </address>
17 </affiliation>
18 </author>
19 </authorgroup>
20
21 <authorgroup>
22 <author>
23 <firstname>Tom</firstname>
24 <surname>Rini</surname>
25 <affiliation>
26 <address>
27 <email>trini@kernel.crashing.org</email>
28 </address>
29 </affiliation>
30 </author>
31 </authorgroup>
32
33 <authorgroup>
34 <author>
35 <firstname>Amit S.</firstname>
36 <surname>Kale</surname>
37 <affiliation>
38 <address>
39 <email>amitkale@linsyssoft.com</email>
40 </address>
41 </affiliation>
42 </author>
43 </authorgroup>
44
45 <copyright>
46 <year>2008</year>
47 <holder>Wind River Systems, Inc.</holder>
48 </copyright>
49 <copyright>
50 <year>2004-2005</year>
51 <holder>MontaVista Software, Inc.</holder>
52 </copyright>
53 <copyright>
54 <year>2004</year>
55 <holder>Amit S. Kale</holder>
56 </copyright>
57
58 <legalnotice>
59 <para>
60 This file is licensed under the terms of the GNU General Public License
61 version 2. This program is licensed "as is" without any warranty of any
62 kind, whether express or implied.
63 </para>
64
65 </legalnotice>
66 </bookinfo>
67
68<toc></toc>
69 <chapter id="Introduction">
70 <title>Introduction</title>
71 <para>
72 kgdb is a source level debugger for linux kernel. It is used along
73 with gdb to debug a linux kernel. The expectation is that gdb can
74 be used to "break in" to the kernel to inspect memory, variables
75 and look through call stack information similar to what an
76 application developer would use gdb for. It is possible to place
77 breakpoints in kernel code and perform some limited execution
78 stepping.
79 </para>
80 <para>
81 Two machines are required for using kgdb. One of these machines is a
82 development machine and the other is a test machine. The kernel
83 to be debugged runs on the test machine. The development machine
84 runs an instance of gdb against the vmlinux file which contains
85 the symbols (not boot image such as bzImage, zImage, uImage...).
86 In gdb the developer specifies the connection parameters and
87 connects to kgdb. The type of connection a developer makes with
88 gdb depends on the availability of kgdb I/O modules compiled as
89 builtin's or kernel modules in the test machine's kernel.
90 </para>
91 </chapter>
92 <chapter id="CompilingAKernel">
93 <title>Compiling a kernel</title>
94 <para>
95 To enable <symbol>CONFIG_KGDB</symbol> you should first turn on
96 "Prompt for development and/or incomplete code/drivers"
97 (CONFIG_EXPERIMENTAL) in "General setup", then under the
98 "Kernel debugging" select "KGDB: kernel debugging with remote gdb".
99 </para>
100 <para>
101 Next you should choose one of more I/O drivers to interconnect debugging
102 host and debugged target. Early boot debugging requires a KGDB
103 I/O driver that supports early debugging and the driver must be
104 built into the kernel directly. Kgdb I/O driver configuration
105 takes place via kernel or module parameters, see following
106 chapter.
107 </para>
108 <para>
109 The kgdb test compile options are described in the kgdb test suite chapter.
110 </para>
111
112 </chapter>
113 <chapter id="EnableKGDB">
114 <title>Enable kgdb for debugging</title>
115 <para>
116 In order to use kgdb you must activate it by passing configuration
117 information to one of the kgdb I/O drivers. If you do not pass any
118 configuration information kgdb will not do anything at all. Kgdb
119 will only actively hook up to the kernel trap hooks if a kgdb I/O
120 driver is loaded and configured. If you unconfigure a kgdb I/O
121 driver, kgdb will unregister all the kernel hook points.
122 </para>
123 <para>
124 All drivers can be reconfigured at run time, if
125 <symbol>CONFIG_SYSFS</symbol> and <symbol>CONFIG_MODULES</symbol>
126 are enabled, by echo'ing a new config string to
127 <constant>/sys/module/&lt;driver&gt;/parameter/&lt;option&gt;</constant>.
128 The driver can be unconfigured by passing an empty string. You cannot
129 change the configuration while the debugger is attached. Make sure
130 to detach the debugger with the <constant>detach</constant> command
131 prior to trying unconfigure a kgdb I/O driver.
132 </para>
133 <sect1 id="kgdbwait">
134 <title>Kernel parameter: kgdbwait</title>
135 <para>
136 The Kernel command line option <constant>kgdbwait</constant> makes
137 kgdb wait for a debugger connection during booting of a kernel. You
138 can only use this option you compiled a kgdb I/O driver into the
139 kernel and you specified the I/O driver configuration as a kernel
140 command line option. The kgdbwait parameter should always follow the
141 configuration parameter for the kgdb I/O driver in the kernel
142 command line else the I/O driver will not be configured prior to
143 asking the kernel to use it to wait.
144 </para>
145 <para>
146 The kernel will stop and wait as early as the I/O driver and
147 architecture will allow when you use this option. If you build the
148 kgdb I/O driver as a kernel module kgdbwait will not do anything.
149 </para>
150 </sect1>
151 <sect1 id="kgdboc">
152 <title>Kernel parameter: kgdboc</title>
153 <para>
154 The kgdboc driver was originally an abbreviation meant to stand for
155 "kgdb over console". Kgdboc is designed to work with a single
156 serial port. It was meant to cover the circumstance
157 where you wanted to use a serial console as your primary console as
158 well as using it to perform kernel debugging. Of course you can
159 also use kgdboc without assigning a console to the same port.
160 </para>
161 <sect2 id="UsingKgdboc">
162 <title>Using kgdboc</title>
163 <para>
164 You can configure kgdboc via sysfs or a module or kernel boot line
165 parameter depending on if you build with CONFIG_KGDBOC as a module
166 or built-in.
167 <orderedlist>
168 <listitem><para>From the module load or build-in</para>
169 <para><constant>kgdboc=&lt;tty-device&gt;,[baud]</constant></para>
170 <para>
171 The example here would be if your console port was typically ttyS0, you would use something like <constant>kgdboc=ttyS0,115200</constant> or on the ARM Versatile AB you would likely use <constant>kgdboc=ttyAMA0,115200</constant>
172 </para>
173 </listitem>
174 <listitem><para>From sysfs</para>
175 <para><constant>echo ttyS0 &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para>
176 </listitem>
177 </orderedlist>
178 </para>
179 <para>
180 NOTE: Kgdboc does not support interrupting the target via the
181 gdb remote protocol. You must manually send a sysrq-g unless you
182 have a proxy that splits console output to a terminal problem and
183 has a separate port for the debugger to connect to that sends the
184 sysrq-g for you.
185 </para>
186 <para>When using kgdboc with no debugger proxy, you can end up
187 connecting the debugger for one of two entry points. If an
188 exception occurs after you have loaded kgdboc a message should print
189 on the console stating it is waiting for the debugger. In case you
190 disconnect your terminal program and then connect the debugger in
191 its place. If you want to interrupt the target system and forcibly
192 enter a debug session you have to issue a Sysrq sequence and then
193 type the letter <constant>g</constant>. Then you disconnect the
194 terminal session and connect gdb. Your options if you don't like
195 this are to hack gdb to send the sysrq-g for you as well as on the
196 initial connect, or to use a debugger proxy that allows an
197 unmodified gdb to do the debugging.
198 </para>
199 </sect2>
200 </sect1>
201 <sect1 id="kgdbcon">
202 <title>Kernel parameter: kgdbcon</title>
203 <para>
204 Kgdb supports using the gdb serial protocol to send console messages
205 to the debugger when the debugger is connected and running. There
206 are two ways to activate this feature.
207 <orderedlist>
208 <listitem><para>Activate with the kernel command line option:</para>
209 <para><constant>kgdbcon</constant></para>
210 </listitem>
211 <listitem><para>Use sysfs before configuring an io driver</para>
212 <para>
213 <constant>echo 1 &gt; /sys/module/kgdb/parameters/kgdb_use_con</constant>
214 </para>
215 <para>
216 NOTE: If you do this after you configure the kgdb I/O driver, the
217 setting will not take effect until the next point the I/O is
218 reconfigured.
219 </para>
220 </listitem>
221 </orderedlist>
222 </para>
223 <para>
224 IMPORTANT NOTE: Using this option with kgdb over the console
225 (kgdboc) is not supported.
226 </para>
227 </sect1>
228 </chapter>
229 <chapter id="ConnectingGDB">
230 <title>Connecting gdb</title>
231 <para>
232 If you are using kgdboc, you need to have used kgdbwait as a boot
233 argument, issued a sysrq-g, or the system you are going to debug
234 has already taken an exception and is waiting for the debugger to
235 attach before you can connect gdb.
236 </para>
237 <para>
238 If you are not using different kgdb I/O driver other than kgdboc,
239 you should be able to connect and the target will automatically
240 respond.
241 </para>
242 <para>
243 Example (using a serial port):
244 </para>
245 <programlisting>
246 % gdb ./vmlinux
247 (gdb) set remotebaud 115200
248 (gdb) target remote /dev/ttyS0
249 </programlisting>
250 <para>
251 Example (kgdb to a terminal server on tcp port 2012):
252 </para>
253 <programlisting>
254 % gdb ./vmlinux
255 (gdb) target remote 192.168.2.2:2012
256 </programlisting>
257 <para>
258 Once connected, you can debug a kernel the way you would debug an
259 application program.
260 </para>
261 <para>
262 If you are having problems connecting or something is going
263 seriously wrong while debugging, it will most often be the case
264 that you want to enable gdb to be verbose about its target
265 communications. You do this prior to issuing the <constant>target
266 remote</constant> command by typing in: <constant>set remote debug 1</constant>
267 </para>
268 </chapter>
269 <chapter id="KGDBTestSuite">
270 <title>kgdb Test Suite</title>
271 <para>
272 When kgdb is enabled in the kernel config you can also elect to
273 enable the config parameter KGDB_TESTS. Turning this on will
274 enable a special kgdb I/O module which is designed to test the
275 kgdb internal functions.
276 </para>
277 <para>
278 The kgdb tests are mainly intended for developers to test the kgdb
279 internals as well as a tool for developing a new kgdb architecture
280 specific implementation. These tests are not really for end users
281 of the Linux kernel. The primary source of documentation would be
282 to look in the drivers/misc/kgdbts.c file.
283 </para>
284 <para>
285 The kgdb test suite can also be configured at compile time to run
286 the core set of tests by setting the kernel config parameter
287 KGDB_TESTS_ON_BOOT. This particular option is aimed at automated
288 regression testing and does not require modifying the kernel boot
289 config arguments. If this is turned on, the kgdb test suite can
290 be disabled by specifying "kgdbts=" as a kernel boot argument.
291 </para>
292 </chapter>
293 <chapter id="CommonBackEndReq">
294 <title>KGDB Internals</title>
295 <sect1 id="kgdbArchitecture">
296 <title>Architecture Specifics</title>
297 <para>
298 Kgdb is organized into three basic components:
299 <orderedlist>
300 <listitem><para>kgdb core</para>
301 <para>
302 The kgdb core is found in kernel/kgdb.c. It contains:
303 <itemizedlist>
304 <listitem><para>All the logic to implement the gdb serial protocol</para></listitem>
305 <listitem><para>A generic OS exception handler which includes sync'ing the processors into a stopped state on an multi cpu system.</para></listitem>
306 <listitem><para>The API to talk to the kgdb I/O drivers</para></listitem>
307 <listitem><para>The API to make calls to the arch specific kgdb implementation</para></listitem>
308 <listitem><para>The logic to perform safe memory reads and writes to memory while using the debugger</para></listitem>
309 <listitem><para>A full implementation for software breakpoints unless overridden by the arch</para></listitem>
310 </itemizedlist>
311 </para>
312 </listitem>
313 <listitem><para>kgdb arch specific implementation</para>
314 <para>
315 This implementation is generally found in arch/*/kernel/kgdb.c.
316 As an example, arch/x86/kernel/kgdb.c contains the specifics to
317 implement HW breakpoint as well as the initialization to
318 dynamically register and unregister for the trap handlers on
319 this architecture. The arch specific portion implements:
320 <itemizedlist>
321 <listitem><para>contains an arch specific trap catcher which
322 invokes kgdb_handle_exception() to start kgdb about doing its
323 work</para></listitem>
324 <listitem><para>translation to and from gdb specific packet format to pt_regs</para></listitem>
325 <listitem><para>Registration and unregistration of architecture specific trap hooks</para></listitem>
326 <listitem><para>Any special exception handling and cleanup</para></listitem>
327 <listitem><para>NMI exception handling and cleanup</para></listitem>
328 <listitem><para>(optional)HW breakpoints</para></listitem>
329 </itemizedlist>
330 </para>
331 </listitem>
332 <listitem><para>kgdb I/O driver</para>
333 <para>
334 Each kgdb I/O driver has to provide an implemenation for the following:
335 <itemizedlist>
336 <listitem><para>configuration via builtin or module</para></listitem>
337 <listitem><para>dynamic configuration and kgdb hook registration calls</para></listitem>
338 <listitem><para>read and write character interface</para></listitem>
339 <listitem><para>A cleanup handler for unconfiguring from the kgdb core</para></listitem>
340 <listitem><para>(optional) Early debug methodology</para></listitem>
341 </itemizedlist>
342 Any given kgdb I/O driver has to operate very closely with the
343 hardware and must do it in such a way that does not enable
344 interrupts or change other parts of the system context without
345 completely restoring them. The kgdb core will repeatedly "poll"
346 a kgdb I/O driver for characters when it needs input. The I/O
347 driver is expected to return immediately if there is no data
348 available. Doing so allows for the future possibility to touch
349 watch dog hardware in such a way as to have a target system not
350 reset when these are enabled.
351 </para>
352 </listitem>
353 </orderedlist>
354 </para>
355 <para>
356 If you are intent on adding kgdb architecture specific support
357 for a new architecture, the architecture should define
358 <constant>HAVE_ARCH_KGDB</constant> in the architecture specific
359 Kconfig file. This will enable kgdb for the architecture, and
360 at that point you must create an architecture specific kgdb
361 implementation.
362 </para>
363 <para>
364 There are a few flags which must be set on every architecture in
365 their &lt;asm/kgdb.h&gt; file. These are:
366 <itemizedlist>
367 <listitem>
368 <para>
369 NUMREGBYTES: The size in bytes of all of the registers, so
370 that we can ensure they will all fit into a packet.
371 </para>
372 <para>
373 BUFMAX: The size in bytes of the buffer GDB will read into.
374 This must be larger than NUMREGBYTES.
375 </para>
376 <para>
377 CACHE_FLUSH_IS_SAFE: Set to 1 if it is always safe to call
378 flush_cache_range or flush_icache_range. On some architectures,
379 these functions may not be safe to call on SMP since we keep other
380 CPUs in a holding pattern.
381 </para>
382 </listitem>
383 </itemizedlist>
384 </para>
385 <para>
386 There are also the following functions for the common backend,
387 found in kernel/kgdb.c, that must be supplied by the
388 architecture-specific backend unless marked as (optional), in
389 which case a default function maybe used if the architecture
390 does not need to provide a specific implementation.
391 </para>
392!Iinclude/linux/kgdb.h
393 </sect1>
394 <sect1 id="kgdbocDesign">
395 <title>kgdboc internals</title>
396 <para>
397 The kgdboc driver is actually a very thin driver that relies on the
398 underlying low level to the hardware driver having "polling hooks"
399 which the to which the tty driver is attached. In the initial
400 implementation of kgdboc it the serial_core was changed to expose a
401 low level uart hook for doing polled mode reading and writing of a
402 single character while in an atomic context. When kgdb makes an I/O
403 request to the debugger, kgdboc invokes a call back in the serial
404 core which in turn uses the call back in the uart driver. It is
405 certainly possible to extend kgdboc to work with non-uart based
406 consoles in the future.
407 </para>
408 <para>
409 When using kgdboc with a uart, the uart driver must implement two callbacks in the <constant>struct uart_ops</constant>. Example from drivers/8250.c:<programlisting>
410#ifdef CONFIG_CONSOLE_POLL
411 .poll_get_char = serial8250_get_poll_char,
412 .poll_put_char = serial8250_put_poll_char,
413#endif
414 </programlisting>
415 Any implementation specifics around creating a polling driver use the
416 <constant>#ifdef CONFIG_CONSOLE_POLL</constant>, as shown above.
417 Keep in mind that polling hooks have to be implemented in such a way
418 that they can be called from an atomic context and have to restore
419 the state of the uart chip on return such that the system can return
420 to normal when the debugger detaches. You need to be very careful
421 with any kind of lock you consider, because failing here is most
422 going to mean pressing the reset button.
423 </para>
424 </sect1>
425 </chapter>
426 <chapter id="credits">
427 <title>Credits</title>
428 <para>
429 The following people have contributed to this document:
430 <orderedlist>
431 <listitem><para>Amit Kale<email>amitkale@linsyssoft.com</email></para></listitem>
432 <listitem><para>Tom Rini<email>trini@kernel.crashing.org</email></para></listitem>
433 </orderedlist>
434 In March 2008 this document was completely rewritten by:
435 <itemizedlist>
436 <listitem><para>Jason Wessel<email>jason.wessel@windriver.com</email></para></listitem>
437 </itemizedlist>
438 </para>
439 </chapter>
440</book>
441
diff --git a/Documentation/DocBook/mac80211.tmpl b/Documentation/DocBook/mac80211.tmpl
new file mode 100644
index 000000000000..b651e0a4b1c0
--- /dev/null
+++ b/Documentation/DocBook/mac80211.tmpl
@@ -0,0 +1,335 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4
5<book id="mac80211-developers-guide">
6 <bookinfo>
7 <title>The mac80211 subsystem for kernel developers</title>
8
9 <authorgroup>
10 <author>
11 <firstname>Johannes</firstname>
12 <surname>Berg</surname>
13 <affiliation>
14 <address><email>johannes@sipsolutions.net</email></address>
15 </affiliation>
16 </author>
17 </authorgroup>
18
19 <copyright>
20 <year>2007</year>
21 <year>2008</year>
22 <holder>Johannes Berg</holder>
23 </copyright>
24
25 <legalnotice>
26 <para>
27 This documentation is free software; you can redistribute
28 it and/or modify it under the terms of the GNU General Public
29 License version 2 as published by the Free Software Foundation.
30 </para>
31
32 <para>
33 This documentation is distributed in the hope that it will be
34 useful, but WITHOUT ANY WARRANTY; without even the implied
35 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
36 See the GNU General Public License for more details.
37 </para>
38
39 <para>
40 You should have received a copy of the GNU General Public
41 License along with this documentation; if not, write to the Free
42 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
43 MA 02111-1307 USA
44 </para>
45
46 <para>
47 For more details see the file COPYING in the source
48 distribution of Linux.
49 </para>
50 </legalnotice>
51
52 <abstract>
53!Pinclude/net/mac80211.h Introduction
54!Pinclude/net/mac80211.h Warning
55 </abstract>
56 </bookinfo>
57
58 <toc></toc>
59
60<!--
61Generally, this document shall be ordered by increasing complexity.
62It is important to note that readers should be able to read only
63the first few sections to get a working driver and only advanced
64usage should require reading the full document.
65-->
66
67 <part>
68 <title>The basic mac80211 driver interface</title>
69 <partintro>
70 <para>
71 You should read and understand the information contained
72 within this part of the book while implementing a driver.
73 In some chapters, advanced usage is noted, that may be
74 skipped at first.
75 </para>
76 <para>
77 This part of the book only covers station and monitor mode
78 functionality, additional information required to implement
79 the other modes is covered in the second part of the book.
80 </para>
81 </partintro>
82
83 <chapter id="basics">
84 <title>Basic hardware handling</title>
85 <para>TBD</para>
86 <para>
87 This chapter shall contain information on getting a hw
88 struct allocated and registered with mac80211.
89 </para>
90 <para>
91 Since it is required to allocate rates/modes before registering
92 a hw struct, this chapter shall also contain information on setting
93 up the rate/mode structs.
94 </para>
95 <para>
96 Additionally, some discussion about the callbacks and
97 the general programming model should be in here, including
98 the definition of ieee80211_ops which will be referred to
99 a lot.
100 </para>
101 <para>
102 Finally, a discussion of hardware capabilities should be done
103 with references to other parts of the book.
104 </para>
105<!-- intentionally multiple !F lines to get proper order -->
106!Finclude/net/mac80211.h ieee80211_hw
107!Finclude/net/mac80211.h ieee80211_hw_flags
108!Finclude/net/mac80211.h SET_IEEE80211_DEV
109!Finclude/net/mac80211.h SET_IEEE80211_PERM_ADDR
110!Finclude/net/mac80211.h ieee80211_ops
111!Finclude/net/mac80211.h ieee80211_alloc_hw
112!Finclude/net/mac80211.h ieee80211_register_hw
113!Finclude/net/mac80211.h ieee80211_get_tx_led_name
114!Finclude/net/mac80211.h ieee80211_get_rx_led_name
115!Finclude/net/mac80211.h ieee80211_get_assoc_led_name
116!Finclude/net/mac80211.h ieee80211_get_radio_led_name
117!Finclude/net/mac80211.h ieee80211_unregister_hw
118!Finclude/net/mac80211.h ieee80211_free_hw
119 </chapter>
120
121 <chapter id="phy-handling">
122 <title>PHY configuration</title>
123 <para>TBD</para>
124 <para>
125 This chapter should describe PHY handling including
126 start/stop callbacks and the various structures used.
127 </para>
128!Finclude/net/mac80211.h ieee80211_conf
129!Finclude/net/mac80211.h ieee80211_conf_flags
130 </chapter>
131
132 <chapter id="iface-handling">
133 <title>Virtual interfaces</title>
134 <para>TBD</para>
135 <para>
136 This chapter should describe virtual interface basics
137 that are relevant to the driver (VLANs, MGMT etc are not.)
138 It should explain the use of the add_iface/remove_iface
139 callbacks as well as the interface configuration callbacks.
140 </para>
141 <para>Things related to AP mode should be discussed there.</para>
142 <para>
143 Things related to supporting multiple interfaces should be
144 in the appropriate chapter, a BIG FAT note should be here about
145 this though and the recommendation to allow only a single
146 interface in STA mode at first!
147 </para>
148!Finclude/net/mac80211.h ieee80211_if_types
149!Finclude/net/mac80211.h ieee80211_if_init_conf
150!Finclude/net/mac80211.h ieee80211_if_conf
151 </chapter>
152
153 <chapter id="rx-tx">
154 <title>Receive and transmit processing</title>
155 <sect1>
156 <title>what should be here</title>
157 <para>TBD</para>
158 <para>
159 This should describe the receive and transmit
160 paths in mac80211/the drivers as well as
161 transmit status handling.
162 </para>
163 </sect1>
164 <sect1>
165 <title>Frame format</title>
166!Pinclude/net/mac80211.h Frame format
167 </sect1>
168 <sect1>
169 <title>Alignment issues</title>
170 <para>TBD</para>
171 </sect1>
172 <sect1>
173 <title>Calling into mac80211 from interrupts</title>
174!Pinclude/net/mac80211.h Calling mac80211 from interrupts
175 </sect1>
176 <sect1>
177 <title>functions/definitions</title>
178!Finclude/net/mac80211.h ieee80211_rx_status
179!Finclude/net/mac80211.h mac80211_rx_flags
180!Finclude/net/mac80211.h ieee80211_tx_control
181!Finclude/net/mac80211.h ieee80211_tx_status_flags
182!Finclude/net/mac80211.h ieee80211_rx
183!Finclude/net/mac80211.h ieee80211_rx_irqsafe
184!Finclude/net/mac80211.h ieee80211_tx_status
185!Finclude/net/mac80211.h ieee80211_tx_status_irqsafe
186!Finclude/net/mac80211.h ieee80211_rts_get
187!Finclude/net/mac80211.h ieee80211_rts_duration
188!Finclude/net/mac80211.h ieee80211_ctstoself_get
189!Finclude/net/mac80211.h ieee80211_ctstoself_duration
190!Finclude/net/mac80211.h ieee80211_generic_frame_duration
191!Finclude/net/mac80211.h ieee80211_get_hdrlen_from_skb
192!Finclude/net/mac80211.h ieee80211_get_hdrlen
193!Finclude/net/mac80211.h ieee80211_wake_queue
194!Finclude/net/mac80211.h ieee80211_stop_queue
195!Finclude/net/mac80211.h ieee80211_start_queues
196!Finclude/net/mac80211.h ieee80211_stop_queues
197!Finclude/net/mac80211.h ieee80211_wake_queues
198 </sect1>
199 </chapter>
200
201 <chapter id="filters">
202 <title>Frame filtering</title>
203!Pinclude/net/mac80211.h Frame filtering
204!Finclude/net/mac80211.h ieee80211_filter_flags
205 </chapter>
206 </part>
207
208 <part id="advanced">
209 <title>Advanced driver interface</title>
210 <partintro>
211 <para>
212 Information contained within this part of the book is
213 of interest only for advanced interaction of mac80211
214 with drivers to exploit more hardware capabilities and
215 improve performance.
216 </para>
217 </partintro>
218
219 <chapter id="hardware-crypto-offload">
220 <title>Hardware crypto acceleration</title>
221!Pinclude/net/mac80211.h Hardware crypto acceleration
222<!-- intentionally multiple !F lines to get proper order -->
223!Finclude/net/mac80211.h set_key_cmd
224!Finclude/net/mac80211.h ieee80211_key_conf
225!Finclude/net/mac80211.h ieee80211_key_alg
226!Finclude/net/mac80211.h ieee80211_key_flags
227 </chapter>
228
229 <chapter id="qos">
230 <title>Multiple queues and QoS support</title>
231 <para>TBD</para>
232!Finclude/net/mac80211.h ieee80211_tx_queue_params
233!Finclude/net/mac80211.h ieee80211_tx_queue_stats_data
234!Finclude/net/mac80211.h ieee80211_tx_queue
235 </chapter>
236
237 <chapter id="AP">
238 <title>Access point mode support</title>
239 <para>TBD</para>
240 <para>Some parts of the if_conf should be discussed here instead</para>
241 <para>
242 Insert notes about VLAN interfaces with hw crypto here or
243 in the hw crypto chapter.
244 </para>
245!Finclude/net/mac80211.h ieee80211_get_buffered_bc
246!Finclude/net/mac80211.h ieee80211_beacon_get
247 </chapter>
248
249 <chapter id="multi-iface">
250 <title>Supporting multiple virtual interfaces</title>
251 <para>TBD</para>
252 <para>
253 Note: WDS with identical MAC address should almost always be OK
254 </para>
255 <para>
256 Insert notes about having multiple virtual interfaces with
257 different MAC addresses here, note which configurations are
258 supported by mac80211, add notes about supporting hw crypto
259 with it.
260 </para>
261 </chapter>
262
263 <chapter id="hardware-scan-offload">
264 <title>Hardware scan offload</title>
265 <para>TBD</para>
266!Finclude/net/mac80211.h ieee80211_scan_completed
267 </chapter>
268 </part>
269
270 <part id="rate-control">
271 <title>Rate control interface</title>
272 <partintro>
273 <para>TBD</para>
274 <para>
275 This part of the book describes the rate control algorithm
276 interface and how it relates to mac80211 and drivers.
277 </para>
278 </partintro>
279 <chapter id="dummy">
280 <title>dummy chapter</title>
281 <para>TBD</para>
282 </chapter>
283 </part>
284
285 <part id="internal">
286 <title>Internals</title>
287 <partintro>
288 <para>TBD</para>
289 <para>
290 This part of the book describes mac80211 internals.
291 </para>
292 </partintro>
293
294 <chapter id="key-handling">
295 <title>Key handling</title>
296 <sect1>
297 <title>Key handling basics</title>
298!Pnet/mac80211/key.c Key handling basics
299 </sect1>
300 <sect1>
301 <title>MORE TBD</title>
302 <para>TBD</para>
303 </sect1>
304 </chapter>
305
306 <chapter id="rx-processing">
307 <title>Receive processing</title>
308 <para>TBD</para>
309 </chapter>
310
311 <chapter id="tx-processing">
312 <title>Transmit processing</title>
313 <para>TBD</para>
314 </chapter>
315
316 <chapter id="sta-info">
317 <title>Station info handling</title>
318 <sect1>
319 <title>Programming information</title>
320!Fnet/mac80211/sta_info.h sta_info
321!Fnet/mac80211/sta_info.h ieee80211_sta_info_flags
322 </sect1>
323 <sect1>
324 <title>STA information lifetime rules</title>
325!Pnet/mac80211/sta_info.c STA information lifetime rules
326 </sect1>
327 </chapter>
328
329 <chapter id="synchronisation">
330 <title>Synchronisation</title>
331 <para>TBD</para>
332 <para>Locking, lots of RCU</para>
333 </chapter>
334 </part>
335</book>
diff --git a/Documentation/DocBook/rapidio.tmpl b/Documentation/DocBook/rapidio.tmpl
index b9e143e28c64..54eb26b57372 100644
--- a/Documentation/DocBook/rapidio.tmpl
+++ b/Documentation/DocBook/rapidio.tmpl
@@ -133,7 +133,6 @@
133!Idrivers/rapidio/rio-sysfs.c 133!Idrivers/rapidio/rio-sysfs.c
134 </sect1> 134 </sect1>
135 <sect1 id="PPC32_support"><title>PPC32 support</title> 135 <sect1 id="PPC32_support"><title>PPC32 support</title>
136!Iarch/powerpc/kernel/rio.c
137!Earch/powerpc/sysdev/fsl_rio.c 136!Earch/powerpc/sysdev/fsl_rio.c
138!Iarch/powerpc/sysdev/fsl_rio.c 137!Iarch/powerpc/sysdev/fsl_rio.c
139 </sect1> 138 </sect1>
diff --git a/Documentation/DocBook/writing_usb_driver.tmpl b/Documentation/DocBook/writing_usb_driver.tmpl
index d4188d4ff535..eeff19ca831b 100644
--- a/Documentation/DocBook/writing_usb_driver.tmpl
+++ b/Documentation/DocBook/writing_usb_driver.tmpl
@@ -100,8 +100,8 @@
100 useful documents, at the USB home page (see Resources). An excellent 100 useful documents, at the USB home page (see Resources). An excellent
101 introduction to the Linux USB subsystem can be found at the USB Working 101 introduction to the Linux USB subsystem can be found at the USB Working
102 Devices List (see Resources). It explains how the Linux USB subsystem is 102 Devices List (see Resources). It explains how the Linux USB subsystem is
103 structured and introduces the reader to the concept of USB urbs, which 103 structured and introduces the reader to the concept of USB urbs
104 are essential to USB drivers. 104 (USB Request Blocks), which are essential to USB drivers.
105 </para> 105 </para>
106 <para> 106 <para>
107 The first thing a Linux USB driver needs to do is register itself with 107 The first thing a Linux USB driver needs to do is register itself with
@@ -162,8 +162,8 @@ static int __init usb_skel_init(void)
162module_init(usb_skel_init); 162module_init(usb_skel_init);
163 </programlisting> 163 </programlisting>
164 <para> 164 <para>
165 When the driver is unloaded from the system, it needs to unregister 165 When the driver is unloaded from the system, it needs to deregister
166 itself with the USB subsystem. This is done with the usb_unregister 166 itself with the USB subsystem. This is done with the usb_deregister
167 function: 167 function:
168 </para> 168 </para>
169 <programlisting> 169 <programlisting>
@@ -232,7 +232,7 @@ static int skel_probe(struct usb_interface *interface,
232 were passed to the USB subsystem will be called from a user program trying 232 were passed to the USB subsystem will be called from a user program trying
233 to talk to the device. The first function called will be open, as the 233 to talk to the device. The first function called will be open, as the
234 program tries to open the device for I/O. We increment our private usage 234 program tries to open the device for I/O. We increment our private usage
235 count and save off a pointer to our internal structure in the file 235 count and save a pointer to our internal structure in the file
236 structure. This is done so that future calls to file operations will 236 structure. This is done so that future calls to file operations will
237 enable the driver to determine which device the user is addressing. All 237 enable the driver to determine which device the user is addressing. All
238 of this is done with the following code: 238 of this is done with the following code:
@@ -252,8 +252,8 @@ file->private_data = dev;
252 send to the device based on the size of the write urb it has created (this 252 send to the device based on the size of the write urb it has created (this
253 size depends on the size of the bulk out end point that the device has). 253 size depends on the size of the bulk out end point that the device has).
254 Then it copies the data from user space to kernel space, points the urb to 254 Then it copies the data from user space to kernel space, points the urb to
255 the data and submits the urb to the USB subsystem. This can be shown in 255 the data and submits the urb to the USB subsystem. This can be seen in
256 he following code: 256 the following code:
257 </para> 257 </para>
258 <programlisting> 258 <programlisting>
259/* we can only write as much as 1 urb will hold */ 259/* we can only write as much as 1 urb will hold */
diff --git a/Documentation/HOWTO b/Documentation/HOWTO
index 54835610b3d6..0291ade44c17 100644
--- a/Documentation/HOWTO
+++ b/Documentation/HOWTO
@@ -249,9 +249,11 @@ process is as follows:
249 release a new -rc kernel every week. 249 release a new -rc kernel every week.
250 - Process continues until the kernel is considered "ready", the 250 - Process continues until the kernel is considered "ready", the
251 process should last around 6 weeks. 251 process should last around 6 weeks.
252 - A list of known regressions present in each -rc release is 252 - Known regressions in each release are periodically posted to the
253 tracked at the following URI: 253 linux-kernel mailing list. The goal is to reduce the length of
254 http://kernelnewbies.org/known_regressions 254 that list to zero before declaring the kernel to be "ready," but, in
255 the real world, a small number of regressions often remain at
256 release time.
255 257
256It is worth mentioning what Andrew Morton wrote on the linux-kernel 258It is worth mentioning what Andrew Morton wrote on the linux-kernel
257mailing list about kernel releases: 259mailing list about kernel releases:
@@ -261,7 +263,7 @@ mailing list about kernel releases:
261 263
2622.6.x.y -stable kernel tree 2642.6.x.y -stable kernel tree
263--------------------------- 265---------------------------
264Kernels with 4 digit versions are -stable kernels. They contain 266Kernels with 4-part versions are -stable kernels. They contain
265relatively small and critical fixes for security problems or significant 267relatively small and critical fixes for security problems or significant
266regressions discovered in a given 2.6.x kernel. 268regressions discovered in a given 2.6.x kernel.
267 269
@@ -273,7 +275,10 @@ If no 2.6.x.y kernel is available, then the highest numbered 2.6.x
273kernel is the current stable kernel. 275kernel is the current stable kernel.
274 276
2752.6.x.y are maintained by the "stable" team <stable@kernel.org>, and are 2772.6.x.y are maintained by the "stable" team <stable@kernel.org>, and are
276released almost every other week. 278released as needs dictate. The normal release period is approximately
279two weeks, but it can be longer if there are no pressing problems. A
280security-related problem, instead, can cause a release to happen almost
281instantly.
277 282
278The file Documentation/stable_kernel_rules.txt in the kernel tree 283The file Documentation/stable_kernel_rules.txt in the kernel tree
279documents what kinds of changes are acceptable for the -stable tree, and 284documents what kinds of changes are acceptable for the -stable tree, and
@@ -298,7 +303,9 @@ a while Andrew or the subsystem maintainer pushes it on to Linus for
298inclusion in mainline. 303inclusion in mainline.
299 304
300It is heavily encouraged that all new patches get tested in the -mm tree 305It is heavily encouraged that all new patches get tested in the -mm tree
301before they are sent to Linus for inclusion in the main kernel tree. 306before they are sent to Linus for inclusion in the main kernel tree. Code
307which does not make an appearance in -mm before the opening of the merge
308window will prove hard to merge into the mainline.
302 309
303These kernels are not appropriate for use on systems that are supposed 310These kernels are not appropriate for use on systems that are supposed
304to be stable and they are more risky to run than any of the other 311to be stable and they are more risky to run than any of the other
@@ -354,11 +361,12 @@ Here is a list of some of the different kernel trees available:
354 - SCSI, James Bottomley <James.Bottomley@SteelEye.com> 361 - SCSI, James Bottomley <James.Bottomley@SteelEye.com>
355 git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git 362 git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
356 363
364 - x86, Ingo Molnar <mingo@elte.hu>
365 git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git
366
357 quilt trees: 367 quilt trees:
358 - USB, PCI, Driver Core, and I2C, Greg Kroah-Hartman <gregkh@suse.de> 368 - USB, Driver Core, and I2C, Greg Kroah-Hartman <gregkh@suse.de>
359 kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ 369 kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
360 - x86-64, partly i386, Andi Kleen <ak@suse.de>
361 ftp.firstfloor.org:/pub/ak/x86_64/quilt/
362 370
363 Other kernel trees can be found listed at http://git.kernel.org/ and in 371 Other kernel trees can be found listed at http://git.kernel.org/ and in
364 the MAINTAINERS file. 372 the MAINTAINERS file.
@@ -392,8 +400,8 @@ If you want to be advised of the future bug reports, you can subscribe to the
392bugme-new mailing list (only new bug reports are mailed here) or to the 400bugme-new mailing list (only new bug reports are mailed here) or to the
393bugme-janitor mailing list (every change in the bugzilla is mailed here) 401bugme-janitor mailing list (every change in the bugzilla is mailed here)
394 402
395 http://lists.osdl.org/mailman/listinfo/bugme-new 403 http://lists.linux-foundation.org/mailman/listinfo/bugme-new
396 http://lists.osdl.org/mailman/listinfo/bugme-janitors 404 http://lists.linux-foundation.org/mailman/listinfo/bugme-janitors
397 405
398 406
399 407
diff --git a/Documentation/PCI/00-INDEX b/Documentation/PCI/00-INDEX
new file mode 100644
index 000000000000..49f43946c6b6
--- /dev/null
+++ b/Documentation/PCI/00-INDEX
@@ -0,0 +1,12 @@
100-INDEX
2 - this file
3PCI-DMA-mapping.txt
4 - info for PCI drivers using DMA portably across all platforms
5PCIEBUS-HOWTO.txt
6 - a guide describing the PCI Express Port Bus driver
7pci-error-recovery.txt
8 - info on PCI error recovery
9pci.txt
10 - info on the PCI subsystem for device driver authors
11pcieaer-howto.txt
12 - the PCI Express Advanced Error Reporting Driver Guide HOWTO
diff --git a/Documentation/PCIEBUS-HOWTO.txt b/Documentation/PCI/PCIEBUS-HOWTO.txt
index c93f42a74d7e..9a07e38631b0 100644
--- a/Documentation/PCIEBUS-HOWTO.txt
+++ b/Documentation/PCI/PCIEBUS-HOWTO.txt
@@ -56,9 +56,9 @@ advantages of using the PCI Express Port Bus driver are listed below:
56 56
57 - Allow service drivers implemented in an independent 57 - Allow service drivers implemented in an independent
58 staged approach. 58 staged approach.
59 59
60 - Allow one service driver to run on multiple PCI-PCI Bridge 60 - Allow one service driver to run on multiple PCI-PCI Bridge
61 Port devices. 61 Port devices.
62 62
63 - Manage and distribute resources of a PCI-PCI Bridge Port 63 - Manage and distribute resources of a PCI-PCI Bridge Port
64 device to requested service drivers. 64 device to requested service drivers.
@@ -82,7 +82,7 @@ Model requires some minimal changes on existing service drivers that
82imposes no impact on the functionality of existing service drivers. 82imposes no impact on the functionality of existing service drivers.
83 83
84A service driver is required to use the two APIs shown below to 84A service driver is required to use the two APIs shown below to
85register its service with the PCI Express Port Bus driver (see 85register its service with the PCI Express Port Bus driver (see
86section 5.2.1 & 5.2.2). It is important that a service driver 86section 5.2.1 & 5.2.2). It is important that a service driver
87initializes the pcie_port_service_driver data structure, included in 87initializes the pcie_port_service_driver data structure, included in
88header file /include/linux/pcieport_if.h, before calling these APIs. 88header file /include/linux/pcieport_if.h, before calling these APIs.
@@ -137,7 +137,7 @@ driver.
137static int __init aerdrv_service_init(void) 137static int __init aerdrv_service_init(void)
138{ 138{
139 int retval = 0; 139 int retval = 0;
140 140
141 retval = pcie_port_service_register(&root_aerdrv); 141 retval = pcie_port_service_register(&root_aerdrv);
142 if (!retval) { 142 if (!retval) {
143 /* 143 /*
@@ -147,7 +147,7 @@ static int __init aerdrv_service_init(void)
147 return retval; 147 return retval;
148} 148}
149 149
150static void __exit aerdrv_service_exit(void) 150static void __exit aerdrv_service_exit(void)
151{ 151{
152 pcie_port_service_unregister(&root_aerdrv); 152 pcie_port_service_unregister(&root_aerdrv);
153} 153}
@@ -175,7 +175,7 @@ same physical Root Port. Both service drivers call pci_enable_msi to
175request MSI based interrupts. A service driver may not know whether 175request MSI based interrupts. A service driver may not know whether
176any other service drivers have run on this Root Port. If either one 176any other service drivers have run on this Root Port. If either one
177of them calls pci_disable_msi, it puts the other service driver 177of them calls pci_disable_msi, it puts the other service driver
178in a wrong interrupt mode. 178in a wrong interrupt mode.
179 179
180To avoid this situation all service drivers are not permitted to 180To avoid this situation all service drivers are not permitted to
181switch interrupt mode on its device. The PCI Express Port Bus driver 181switch interrupt mode on its device. The PCI Express Port Bus driver
diff --git a/Documentation/pci-error-recovery.txt b/Documentation/PCI/pci-error-recovery.txt
index 6650af432523..6650af432523 100644
--- a/Documentation/pci-error-recovery.txt
+++ b/Documentation/PCI/pci-error-recovery.txt
diff --git a/Documentation/pci.txt b/Documentation/PCI/pci.txt
index d2c2e6e2b224..8d4dc6250c58 100644
--- a/Documentation/pci.txt
+++ b/Documentation/PCI/pci.txt
@@ -119,7 +119,7 @@ initialization with a pointer to a structure describing the driver
119 the power state of a device before reboot. 119 the power state of a device before reboot.
120 e.g. drivers/net/e100.c. 120 e.g. drivers/net/e100.c.
121 121
122 err_handler See Documentation/pci-error-recovery.txt 122 err_handler See Documentation/PCI/pci-error-recovery.txt
123 123
124 124
125The ID table is an array of struct pci_device_id entries ending with an 125The ID table is an array of struct pci_device_id entries ending with an
diff --git a/Documentation/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt
index d5da86170106..16c251230c82 100644
--- a/Documentation/pcieaer-howto.txt
+++ b/Documentation/PCI/pcieaer-howto.txt
@@ -13,7 +13,7 @@ Reporting (AER) driver and provides information on how to use it, as
13well as how to enable the drivers of endpoint devices to conform with 13well as how to enable the drivers of endpoint devices to conform with
14PCI Express AER driver. 14PCI Express AER driver.
15 15
161.2 Copyright © Intel Corporation 2006. 161.2 Copyright © Intel Corporation 2006.
17 17
181.3 What is the PCI Express AER Driver? 181.3 What is the PCI Express AER Driver?
19 19
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index 08a1ed1cb5d8..118ca6e9404f 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -183,7 +183,7 @@ Even if the maintainer did not respond in step #4, make sure to ALWAYS
183copy the maintainer when you change their code. 183copy the maintainer when you change their code.
184 184
185For small patches you may want to CC the Trivial Patch Monkey 185For small patches you may want to CC the Trivial Patch Monkey
186trivial@kernel.org managed by Adrian Bunk; which collects "trivial" 186trivial@kernel.org managed by Jesper Juhl; which collects "trivial"
187patches. Trivial patches must qualify for one of the following rules: 187patches. Trivial patches must qualify for one of the following rules:
188 Spelling fixes in documentation 188 Spelling fixes in documentation
189 Spelling fixes which could break grep(1) 189 Spelling fixes which could break grep(1)
@@ -196,7 +196,7 @@ patches. Trivial patches must qualify for one of the following rules:
196 since people copy, as long as it's trivial) 196 since people copy, as long as it's trivial)
197 Any fix by the author/maintainer of the file (ie. patch monkey 197 Any fix by the author/maintainer of the file (ie. patch monkey
198 in re-transmission mode) 198 in re-transmission mode)
199URL: <http://www.kernel.org/pub/linux/kernel/people/bunk/trivial/> 199URL: <http://www.kernel.org/pub/linux/kernel/people/juhl/trivial/>
200 200
201 201
202 202
@@ -327,8 +327,54 @@ Some people also put extra tags at the end. They'll just be ignored for
327now, but you can do this to mark internal company procedures or just 327now, but you can do this to mark internal company procedures or just
328point out some special detail about the sign-off. 328point out some special detail about the sign-off.
329 329
330If you are a subsystem or branch maintainer, sometimes you need to slightly
331modify patches you receive in order to merge them, because the code is not
332exactly the same in your tree and the submitters'. If you stick strictly to
333rule (c), you should ask the submitter to rediff, but this is a totally
334counter-productive waste of time and energy. Rule (b) allows you to adjust
335the code, but then it is very impolite to change one submitter's code and
336make him endorse your bugs. To solve this problem, it is recommended that
337you add a line between the last Signed-off-by header and yours, indicating
338the nature of your changes. While there is nothing mandatory about this, it
339seems like prepending the description with your mail and/or name, all
340enclosed in square brackets, is noticeable enough to make it obvious that
341you are responsible for last-minute changes. Example :
330 342
33113) When to use Acked-by: 343 Signed-off-by: Random J Developer <random@developer.example.org>
344 [lucky@maintainer.example.org: struct foo moved from foo.c to foo.h]
345 Signed-off-by: Lucky K Maintainer <lucky@maintainer.example.org>
346
347This practise is particularly helpful if you maintain a stable branch and
348want at the same time to credit the author, track changes, merge the fix,
349and protect the submitter from complaints. Note that under no circumstances
350can you change the author's identity (the From header), as it is the one
351which appears in the changelog.
352
353Special note to back-porters: It seems to be a common and useful practise
354to insert an indication of the origin of a patch at the top of the commit
355message (just after the subject line) to facilitate tracking. For instance,
356here's what we see in 2.6-stable :
357
358 Date: Tue May 13 19:10:30 2008 +0000
359
360 SCSI: libiscsi regression in 2.6.25: fix nop timer handling
361
362 commit 4cf1043593db6a337f10e006c23c69e5fc93e722 upstream
363
364And here's what appears in 2.4 :
365
366 Date: Tue May 13 22:12:27 2008 +0200
367
368 wireless, airo: waitbusy() won't delay
369
370 [backport of 2.6 commit b7acbdfbd1f277c1eb23f344f899cfa4cd0bf36a]
371
372Whatever the format, this information provides a valuable help to people
373tracking your trees, and to people trying to trouble-shoot bugs in your
374tree.
375
376
37713) When to use Acked-by: and Cc:
332 378
333The Signed-off-by: tag indicates that the signer was involved in the 379The Signed-off-by: tag indicates that the signer was involved in the
334development of the patch, or that he/she was in the patch's delivery path. 380development of the patch, or that he/she was in the patch's delivery path.
@@ -349,11 +395,59 @@ Acked-by: does not necessarily indicate acknowledgement of the entire patch.
349For example, if a patch affects multiple subsystems and has an Acked-by: from 395For example, if a patch affects multiple subsystems and has an Acked-by: from
350one subsystem maintainer then this usually indicates acknowledgement of just 396one subsystem maintainer then this usually indicates acknowledgement of just
351the part which affects that maintainer's code. Judgement should be used here. 397the part which affects that maintainer's code. Judgement should be used here.
352 When in doubt people should refer to the original discussion in the mailing 398When in doubt people should refer to the original discussion in the mailing
353list archives. 399list archives.
354 400
401If a person has had the opportunity to comment on a patch, but has not
402provided such comments, you may optionally add a "Cc:" tag to the patch.
403This is the only tag which might be added without an explicit action by the
404person it names. This tag documents that potentially interested parties
405have been included in the discussion
406
407
40814) Using Test-by: and Reviewed-by:
409
410A Tested-by: tag indicates that the patch has been successfully tested (in
411some environment) by the person named. This tag informs maintainers that
412some testing has been performed, provides a means to locate testers for
413future patches, and ensures credit for the testers.
414
415Reviewed-by:, instead, indicates that the patch has been reviewed and found
416acceptable according to the Reviewer's Statement:
417
418 Reviewer's statement of oversight
419
420 By offering my Reviewed-by: tag, I state that:
421
422 (a) I have carried out a technical review of this patch to
423 evaluate its appropriateness and readiness for inclusion into
424 the mainline kernel.
425
426 (b) Any problems, concerns, or questions relating to the patch
427 have been communicated back to the submitter. I am satisfied
428 with the submitter's response to my comments.
429
430 (c) While there may be things that could be improved with this
431 submission, I believe that it is, at this time, (1) a
432 worthwhile modification to the kernel, and (2) free of known
433 issues which would argue against its inclusion.
434
435 (d) While I have reviewed the patch and believe it to be sound, I
436 do not (unless explicitly stated elsewhere) make any
437 warranties or guarantees that it will achieve its stated
438 purpose or function properly in any given situation.
439
440A Reviewed-by tag is a statement of opinion that the patch is an
441appropriate modification of the kernel without any remaining serious
442technical issues. Any interested reviewer (who has done the work) can
443offer a Reviewed-by tag for a patch. This tag serves to give credit to
444reviewers and to inform maintainers of the degree of review which has been
445done on the patch. Reviewed-by: tags, when supplied by reviewers known to
446understand the subject area and to perform thorough reviews, will normally
447increase the liklihood of your patch getting into the kernel.
448
355 449
35614) The canonical patch format 45015) The canonical patch format
357 451
358The canonical patch subject line is: 452The canonical patch subject line is:
359 453
@@ -512,7 +606,7 @@ They provide type safety, have no length limitations, no formatting
512limitations, and under gcc they are as cheap as macros. 606limitations, and under gcc they are as cheap as macros.
513 607
514Macros should only be used for cases where a static inline is clearly 608Macros should only be used for cases where a static inline is clearly
515suboptimal [there a few, isolated cases of this in fast paths], 609suboptimal [there are a few, isolated cases of this in fast paths],
516or where it is impossible to use a static inline function [such as 610or where it is impossible to use a static inline function [such as
517string-izing]. 611string-izing].
518 612
diff --git a/Documentation/arm/Samsung-S3C24XX/NAND.txt b/Documentation/arm/Samsung-S3C24XX/NAND.txt
new file mode 100644
index 000000000000..bc478a3409b8
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/NAND.txt
@@ -0,0 +1,30 @@
1 S3C24XX NAND Support
2 ====================
3
4Introduction
5------------
6
7Small Page NAND
8---------------
9
10The driver uses a 512 byte (1 page) ECC code for this setup. The
11ECC code is not directly compatible with the default kernel ECC
12code, so the driver enforces its own OOB layout and ECC parameters
13
14Large Page NAND
15---------------
16
17The driver is capable of handling NAND flash with a 2KiB page
18size, with support for hardware ECC generation and correction.
19
20Unlike the 512byte page mode, the driver generates ECC data for
21each 256 byte block in an 2KiB page. This means that more than
22one error in a page can be rectified. It also means that the
23OOB layout remains the default kernel layout for these flashes.
24
25
26Document Author
27---------------
28
29Ben Dooks, Copyright 2007 Simtec Electronics
30
diff --git a/Documentation/arm/Samsung-S3C24XX/Overview.txt b/Documentation/arm/Samsung-S3C24XX/Overview.txt
index c31b76fa66c4..d04e1e30c47f 100644
--- a/Documentation/arm/Samsung-S3C24XX/Overview.txt
+++ b/Documentation/arm/Samsung-S3C24XX/Overview.txt
@@ -156,6 +156,8 @@ NAND
156 controller. If there are any problems the latest linux-mtd 156 controller. If there are any problems the latest linux-mtd
157 code can be found from http://www.linux-mtd.infradead.org/ 157 code can be found from http://www.linux-mtd.infradead.org/
158 158
159 For more information see Documentation/arm/Samsung-S3C24XX/NAND.txt
160
159 161
160Serial 162Serial
161------ 163------
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 93f223b9723f..4dbb8be1c991 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -1097,7 +1097,7 @@ lock themselves, if required. Drivers that explicitly used the
1097io_request_lock for serialization need to be modified accordingly. 1097io_request_lock for serialization need to be modified accordingly.
1098Usually it's as easy as adding a global lock: 1098Usually it's as easy as adding a global lock:
1099 1099
1100 static spinlock_t my_driver_lock = SPIN_LOCK_UNLOCKED; 1100 static DEFINE_SPINLOCK(my_driver_lock);
1101 1101
1102and passing the address to that lock to blk_init_queue(). 1102and passing the address to that lock to blk_init_queue().
1103 1103
diff --git a/Documentation/braille-console.txt b/Documentation/braille-console.txt
new file mode 100644
index 000000000000..000b0fbdc105
--- /dev/null
+++ b/Documentation/braille-console.txt
@@ -0,0 +1,34 @@
1 Linux Braille Console
2
3To get early boot messages on a braille device (before userspace screen
4readers can start), you first need to compile the support for the usual serial
5console (see serial-console.txt), and for braille device (in Device Drivers -
6Accessibility).
7
8Then you need to specify a console=brl, option on the kernel command line, the
9format is:
10
11 console=brl,serial_options...
12
13where serial_options... are the same as described in serial-console.txt
14
15So for instance you can use console=brl,ttyS0 if the braille device is connected
16to the first serial port, and console=brl,ttyS0,115200 to override the baud rate
17to 115200, etc.
18
19By default, the braille device will just show the last kernel message (console
20mode). To review previous messages, press the Insert key to switch to the VT
21review mode. In review mode, the arrow keys permit to browse in the VT content,
22page up/down keys go at the top/bottom of the screen, and the home key goes back
23to the cursor, hence providing very basic screen reviewing facility.
24
25Sound feedback can be obtained by adding the braille_console.sound=1 kernel
26parameter.
27
28For simplicity, only one braille console can be enabled, other uses of
29console=brl,... will be discarded. Also note that it does not interfere with
30the console selection mecanism described in serial-console.txt
31
32For now, only the VisioBraille device is supported.
33
34Samuel Thibault <samuel.thibault@ens-lyon.org>
diff --git a/Documentation/cciss.txt b/Documentation/cciss.txt
index e65736c6b8bc..63e59b8847c5 100644
--- a/Documentation/cciss.txt
+++ b/Documentation/cciss.txt
@@ -21,6 +21,11 @@ This driver is known to work with the following cards:
21 * SA E200 21 * SA E200
22 * SA E200i 22 * SA E200i
23 * SA E500 23 * SA E500
24 * SA P212
25 * SA P410
26 * SA P410i
27 * SA P411
28 * SA P812
24 29
25Detecting drive failures: 30Detecting drive failures:
26------------------------- 31-------------------------
diff --git a/Documentation/cdrom/cdrom-standard.tex b/Documentation/cdrom/cdrom-standard.tex
index c713aeb020c4..c06233fe52ac 100644
--- a/Documentation/cdrom/cdrom-standard.tex
+++ b/Documentation/cdrom/cdrom-standard.tex
@@ -777,7 +777,7 @@ Note that a driver must have one static structure, $<device>_dops$, while
777it may have as many structures $<device>_info$ as there are minor devices 777it may have as many structures $<device>_info$ as there are minor devices
778active. $Register_cdrom()$ builds a linked list from these. 778active. $Register_cdrom()$ builds a linked list from these.
779 779
780\subsection{$Int\ unregister_cdrom(struct\ cdrom_device_info * cdi)$} 780\subsection{$Void\ unregister_cdrom(struct\ cdrom_device_info * cdi)$}
781 781
782Unregistering device $cdi$ with minor number $MINOR(cdi\to dev)$ removes 782Unregistering device $cdi$ with minor number $MINOR(cdi\to dev)$ removes
783the minor device from the list. If it was the last registered minor for 783the minor device from the list. If it was the last registered minor for
diff --git a/Documentation/cgroups.txt b/Documentation/cgroups.txt
index 31d12e21ff8a..824fc0274471 100644
--- a/Documentation/cgroups.txt
+++ b/Documentation/cgroups.txt
@@ -310,8 +310,8 @@ and then start a subshell 'sh' in that cgroup:
310 cd /dev/cgroup 310 cd /dev/cgroup
311 mkdir Charlie 311 mkdir Charlie
312 cd Charlie 312 cd Charlie
313 /bin/echo 2-3 > cpus 313 /bin/echo 2-3 > cpuset.cpus
314 /bin/echo 1 > mems 314 /bin/echo 1 > cpuset.mems
315 /bin/echo $$ > tasks 315 /bin/echo $$ > tasks
316 sh 316 sh
317 # The subshell 'sh' is now running in cgroup Charlie 317 # The subshell 'sh' is now running in cgroup Charlie
@@ -500,8 +500,7 @@ post-attachment activity that requires memory allocations or blocking.
500 500
501void fork(struct cgroup_subsy *ss, struct task_struct *task) 501void fork(struct cgroup_subsy *ss, struct task_struct *task)
502 502
503Called when a task is forked into a cgroup. Also called during 503Called when a task is forked into a cgroup.
504registration for all existing tasks.
505 504
506void exit(struct cgroup_subsys *ss, struct task_struct *task) 505void exit(struct cgroup_subsys *ss, struct task_struct *task)
507 506
diff --git a/Documentation/cli-sti-removal.txt b/Documentation/cli-sti-removal.txt
index 0223c9d20331..60932b02fcb3 100644
--- a/Documentation/cli-sti-removal.txt
+++ b/Documentation/cli-sti-removal.txt
@@ -43,7 +43,7 @@ would execute while the cli()-ed section is executing.
43 43
44but from now on a more direct method of locking has to be used: 44but from now on a more direct method of locking has to be used:
45 45
46 spinlock_t driver_lock = SPIN_LOCK_UNLOCKED; 46 DEFINE_SPINLOCK(driver_lock);
47 struct driver_data; 47 struct driver_data;
48 48
49 irq_handler (...) 49 irq_handler (...)
diff --git a/Documentation/controllers/devices.txt b/Documentation/controllers/devices.txt
new file mode 100644
index 000000000000..4dcea42432c2
--- /dev/null
+++ b/Documentation/controllers/devices.txt
@@ -0,0 +1,48 @@
1Device Whitelist Controller
2
31. Description:
4
5Implement a cgroup to track and enforce open and mknod restrictions
6on device files. A device cgroup associates a device access
7whitelist with each cgroup. A whitelist entry has 4 fields.
8'type' is a (all), c (char), or b (block). 'all' means it applies
9to all types and all major and minor numbers. Major and minor are
10either an integer or * for all. Access is a composition of r
11(read), w (write), and m (mknod).
12
13The root device cgroup starts with rwm to 'all'. A child device
14cgroup gets a copy of the parent. Administrators can then remove
15devices from the whitelist or add new entries. A child cgroup can
16never receive a device access which is denied its parent. However
17when a device access is removed from a parent it will not also be
18removed from the child(ren).
19
202. User Interface
21
22An entry is added using devices.allow, and removed using
23devices.deny. For instance
24
25 echo 'c 1:3 mr' > /cgroups/1/devices.allow
26
27allows cgroup 1 to read and mknod the device usually known as
28/dev/null. Doing
29
30 echo a > /cgroups/1/devices.deny
31
32will remove the default 'a *:* mrw' entry.
33
343. Security
35
36Any task can move itself between cgroups. This clearly won't
37suffice, but we can decide the best way to adequately restrict
38movement as people get some experience with this. We may just want
39to require CAP_SYS_ADMIN, which at least is a separate bit from
40CAP_MKNOD. We may want to just refuse moving to a cgroup which
41isn't a descendent of the current one. Or we may want to use
42CAP_MAC_ADMIN, since we really are trying to lock down root.
43
44CAP_SYS_ADMIN is needed to modify the whitelist or move another
45task to a new cgroup. (Again we'll probably want to change that).
46
47A cgroup may not be granted more permissions than the cgroup's
48parent has.
diff --git a/Documentation/controllers/resource_counter.txt b/Documentation/controllers/resource_counter.txt
new file mode 100644
index 000000000000..f196ac1d7d25
--- /dev/null
+++ b/Documentation/controllers/resource_counter.txt
@@ -0,0 +1,181 @@
1
2 The Resource Counter
3
4The resource counter, declared at include/linux/res_counter.h,
5is supposed to facilitate the resource management by controllers
6by providing common stuff for accounting.
7
8This "stuff" includes the res_counter structure and routines
9to work with it.
10
11
12
131. Crucial parts of the res_counter structure
14
15 a. unsigned long long usage
16
17 The usage value shows the amount of a resource that is consumed
18 by a group at a given time. The units of measurement should be
19 determined by the controller that uses this counter. E.g. it can
20 be bytes, items or any other unit the controller operates on.
21
22 b. unsigned long long max_usage
23
24 The maximal value of the usage over time.
25
26 This value is useful when gathering statistical information about
27 the particular group, as it shows the actual resource requirements
28 for a particular group, not just some usage snapshot.
29
30 c. unsigned long long limit
31
32 The maximal allowed amount of resource to consume by the group. In
33 case the group requests for more resources, so that the usage value
34 would exceed the limit, the resource allocation is rejected (see
35 the next section).
36
37 d. unsigned long long failcnt
38
39 The failcnt stands for "failures counter". This is the number of
40 resource allocation attempts that failed.
41
42 c. spinlock_t lock
43
44 Protects changes of the above values.
45
46
47
482. Basic accounting routines
49
50 a. void res_counter_init(struct res_counter *rc)
51
52 Initializes the resource counter. As usual, should be the first
53 routine called for a new counter.
54
55 b. int res_counter_charge[_locked]
56 (struct res_counter *rc, unsigned long val)
57
58 When a resource is about to be allocated it has to be accounted
59 with the appropriate resource counter (controller should determine
60 which one to use on its own). This operation is called "charging".
61
62 This is not very important which operation - resource allocation
63 or charging - is performed first, but
64 * if the allocation is performed first, this may create a
65 temporary resource over-usage by the time resource counter is
66 charged;
67 * if the charging is performed first, then it should be uncharged
68 on error path (if the one is called).
69
70 c. void res_counter_uncharge[_locked]
71 (struct res_counter *rc, unsigned long val)
72
73 When a resource is released (freed) it should be de-accounted
74 from the resource counter it was accounted to. This is called
75 "uncharging".
76
77 The _locked routines imply that the res_counter->lock is taken.
78
79
80 2.1 Other accounting routines
81
82 There are more routines that may help you with common needs, like
83 checking whether the limit is reached or resetting the max_usage
84 value. They are all declared in include/linux/res_counter.h.
85
86
87
883. Analyzing the resource counter registrations
89
90 a. If the failcnt value constantly grows, this means that the counter's
91 limit is too tight. Either the group is misbehaving and consumes too
92 many resources, or the configuration is not suitable for the group
93 and the limit should be increased.
94
95 b. The max_usage value can be used to quickly tune the group. One may
96 set the limits to maximal values and either load the container with
97 a common pattern or leave one for a while. After this the max_usage
98 value shows the amount of memory the container would require during
99 its common activity.
100
101 Setting the limit a bit above this value gives a pretty good
102 configuration that works in most of the cases.
103
104 c. If the max_usage is much less than the limit, but the failcnt value
105 is growing, then the group tries to allocate a big chunk of resource
106 at once.
107
108 d. If the max_usage is much less than the limit, but the failcnt value
109 is 0, then this group is given too high limit, that it does not
110 require. It is better to lower the limit a bit leaving more resource
111 for other groups.
112
113
114
1154. Communication with the control groups subsystem (cgroups)
116
117All the resource controllers that are using cgroups and resource counters
118should provide files (in the cgroup filesystem) to work with the resource
119counter fields. They are recommended to adhere to the following rules:
120
121 a. File names
122
123 Field name File name
124 ---------------------------------------------------
125 usage usage_in_<unit_of_measurement>
126 max_usage max_usage_in_<unit_of_measurement>
127 limit limit_in_<unit_of_measurement>
128 failcnt failcnt
129 lock no file :)
130
131 b. Reading from file should show the corresponding field value in the
132 appropriate format.
133
134 c. Writing to file
135
136 Field Expected behavior
137 ----------------------------------
138 usage prohibited
139 max_usage reset to usage
140 limit set the limit
141 failcnt reset to zero
142
143
144
1455. Usage example
146
147 a. Declare a task group (take a look at cgroups subsystem for this) and
148 fold a res_counter into it
149
150 struct my_group {
151 struct res_counter res;
152
153 <other fields>
154 }
155
156 b. Put hooks in resource allocation/release paths
157
158 int alloc_something(...)
159 {
160 if (res_counter_charge(res_counter_ptr, amount) < 0)
161 return -ENOMEM;
162
163 <allocate the resource and return to the caller>
164 }
165
166 void release_something(...)
167 {
168 res_counter_uncharge(res_counter_ptr, amount);
169
170 <release the resource>
171 }
172
173 In order to keep the usage value self-consistent, both the
174 "res_counter_ptr" and the "amount" in release_something() should be
175 the same as they were in the alloc_something() when the releasing
176 resource was allocated.
177
178 c. Provide the way to read res_counter values and set them (the cgroups
179 still can help with it).
180
181 c. Compile and run :)
diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt
index 6a9c55bd556b..dcec0564d040 100644
--- a/Documentation/cpu-freq/governors.txt
+++ b/Documentation/cpu-freq/governors.txt
@@ -129,14 +129,6 @@ to its default value of '80' it means that between the checking
129intervals the CPU needs to be on average more than 80% in use to then 129intervals the CPU needs to be on average more than 80% in use to then
130decide that the CPU frequency needs to be increased. 130decide that the CPU frequency needs to be increased.
131 131
132sampling_down_factor: this parameter controls the rate that the CPU
133makes a decision on when to decrease the frequency. When set to its
134default value of '5' it means that at 1/5 the sampling_rate the kernel
135makes a decision to lower the frequency. Five "lower rate" decisions
136have to be made in a row before the CPU frequency is actually lower.
137If set to '1' then the frequency decreases as quickly as it increases,
138if set to '2' it decreases at half the rate of the increase.
139
140ignore_nice_load: this parameter takes a value of '0' or '1'. When 132ignore_nice_load: this parameter takes a value of '0' or '1'. When
141set to '0' (its default), all processes are counted towards the 133set to '0' (its default), all processes are counted towards the
142'cpu utilisation' value. When set to '1', the processes that are 134'cpu utilisation' value. When set to '1', the processes that are
diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt
index af3b925ece08..6c442d8426b5 100644
--- a/Documentation/cpu-freq/user-guide.txt
+++ b/Documentation/cpu-freq/user-guide.txt
@@ -154,6 +154,11 @@ scaling_governor, and by "echoing" the name of another
154 that some governors won't load - they only 154 that some governors won't load - they only
155 work on some specific architectures or 155 work on some specific architectures or
156 processors. 156 processors.
157
158cpuinfo_cur_freq : Current speed of the CPU, in KHz.
159
160scaling_available_frequencies : List of available frequencies, in KHz.
161
157scaling_min_freq and 162scaling_min_freq and
158scaling_max_freq show the current "policy limits" (in 163scaling_max_freq show the current "policy limits" (in
159 kHz). By echoing new values into these 164 kHz). By echoing new values into these
@@ -162,6 +167,15 @@ scaling_max_freq show the current "policy limits" (in
162 first set scaling_max_freq, then 167 first set scaling_max_freq, then
163 scaling_min_freq. 168 scaling_min_freq.
164 169
170affected_cpus : List of CPUs that require software coordination
171 of frequency.
172
173related_cpus : List of CPUs that need some sort of frequency
174 coordination, whether software or hardware.
175
176scaling_driver : Hardware driver for cpufreq.
177
178scaling_cur_freq : Current frequency of the CPU, in KHz.
165 179
166If you have selected the "userspace" governor which allows you to 180If you have selected the "userspace" governor which allows you to
167set the CPU operating frequency to a specific value, you can read out 181set the CPU operating frequency to a specific value, you can read out
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index ad2bb3b3acc1..353504de3084 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -8,6 +8,7 @@ Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
8Modified by Paul Jackson <pj@sgi.com> 8Modified by Paul Jackson <pj@sgi.com>
9Modified by Christoph Lameter <clameter@sgi.com> 9Modified by Christoph Lameter <clameter@sgi.com>
10Modified by Paul Menage <menage@google.com> 10Modified by Paul Menage <menage@google.com>
11Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
11 12
12CONTENTS: 13CONTENTS:
13========= 14=========
@@ -20,7 +21,8 @@ CONTENTS:
20 1.5 What is memory_pressure ? 21 1.5 What is memory_pressure ?
21 1.6 What is memory spread ? 22 1.6 What is memory spread ?
22 1.7 What is sched_load_balance ? 23 1.7 What is sched_load_balance ?
23 1.8 How do I use cpusets ? 24 1.8 What is sched_relax_domain_level ?
25 1.9 How do I use cpusets ?
242. Usage Examples and Syntax 262. Usage Examples and Syntax
25 2.1 Basic Usage 27 2.1 Basic Usage
26 2.2 Adding/removing cpus 28 2.2 Adding/removing cpus
@@ -169,6 +171,7 @@ files describing that cpuset:
169 - memory_migrate flag: if set, move pages to cpusets nodes 171 - memory_migrate flag: if set, move pages to cpusets nodes
170 - cpu_exclusive flag: is cpu placement exclusive? 172 - cpu_exclusive flag: is cpu placement exclusive?
171 - mem_exclusive flag: is memory placement exclusive? 173 - mem_exclusive flag: is memory placement exclusive?
174 - mem_hardwall flag: is memory allocation hardwalled
172 - memory_pressure: measure of how much paging pressure in cpuset 175 - memory_pressure: measure of how much paging pressure in cpuset
173 176
174In addition, the root cpuset only has the following file: 177In addition, the root cpuset only has the following file:
@@ -196,7 +199,7 @@ using the sched_setaffinity, mbind and set_mempolicy system calls.
196The following rules apply to each cpuset: 199The following rules apply to each cpuset:
197 200
198 - Its CPUs and Memory Nodes must be a subset of its parents. 201 - Its CPUs and Memory Nodes must be a subset of its parents.
199 - It can only be marked exclusive if its parent is. 202 - It can't be marked exclusive unless its parent is.
200 - If its cpu or memory is exclusive, they may not overlap any sibling. 203 - If its cpu or memory is exclusive, they may not overlap any sibling.
201 204
202These rules, and the natural hierarchy of cpusets, enable efficient 205These rules, and the natural hierarchy of cpusets, enable efficient
@@ -220,17 +223,18 @@ If a cpuset is cpu or mem exclusive, no other cpuset, other than
220a direct ancestor or descendent, may share any of the same CPUs or 223a direct ancestor or descendent, may share any of the same CPUs or
221Memory Nodes. 224Memory Nodes.
222 225
223A cpuset that is mem_exclusive restricts kernel allocations for 226A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled",
224page, buffer and other data commonly shared by the kernel across 227i.e. it restricts kernel allocations for page, buffer and other data
225multiple users. All cpusets, whether mem_exclusive or not, restrict 228commonly shared by the kernel across multiple users. All cpusets,
226allocations of memory for user space. This enables configuring a 229whether hardwalled or not, restrict allocations of memory for user
227system so that several independent jobs can share common kernel data, 230space. This enables configuring a system so that several independent
228such as file system pages, while isolating each jobs user allocation in 231jobs can share common kernel data, such as file system pages, while
229its own cpuset. To do this, construct a large mem_exclusive cpuset to 232isolating each job's user allocation in its own cpuset. To do this,
230hold all the jobs, and construct child, non-mem_exclusive cpusets for 233construct a large mem_exclusive cpuset to hold all the jobs, and
231each individual job. Only a small amount of typical kernel memory, 234construct child, non-mem_exclusive cpusets for each individual job.
232such as requests from interrupt handlers, is allowed to be taken 235Only a small amount of typical kernel memory, such as requests from
233outside even a mem_exclusive cpuset. 236interrupt handlers, is allowed to be taken outside even a
237mem_exclusive cpuset.
234 238
235 239
2361.5 What is memory_pressure ? 2401.5 What is memory_pressure ?
@@ -341,7 +345,7 @@ is modified to perform an inline check for this PF_SPREAD_PAGE task
341flag, and if set, a call to a new routine cpuset_mem_spread_node() 345flag, and if set, a call to a new routine cpuset_mem_spread_node()
342returns the node to prefer for the allocation. 346returns the node to prefer for the allocation.
343 347
344Similarly, setting 'memory_spread_cache' turns on the flag 348Similarly, setting 'memory_spread_slab' turns on the flag
345PF_SPREAD_SLAB, and appropriately marked slab caches will allocate 349PF_SPREAD_SLAB, and appropriately marked slab caches will allocate
346pages from the node returned by cpuset_mem_spread_node(). 350pages from the node returned by cpuset_mem_spread_node().
347 351
@@ -497,7 +501,73 @@ the cpuset code to update these sched domains, it compares the new
497partition requested with the current, and updates its sched domains, 501partition requested with the current, and updates its sched domains,
498removing the old and adding the new, for each change. 502removing the old and adding the new, for each change.
499 503
5001.8 How do I use cpusets ? 504
5051.8 What is sched_relax_domain_level ?
506--------------------------------------
507
508In sched domain, the scheduler migrates tasks in 2 ways; periodic load
509balance on tick, and at time of some schedule events.
510
511When a task is woken up, scheduler try to move the task on idle CPU.
512For example, if a task A running on CPU X activates another task B
513on the same CPU X, and if CPU Y is X's sibling and performing idle,
514then scheduler migrate task B to CPU Y so that task B can start on
515CPU Y without waiting task A on CPU X.
516
517And if a CPU run out of tasks in its runqueue, the CPU try to pull
518extra tasks from other busy CPUs to help them before it is going to
519be idle.
520
521Of course it takes some searching cost to find movable tasks and/or
522idle CPUs, the scheduler might not search all CPUs in the domain
523everytime. In fact, in some architectures, the searching ranges on
524events are limited in the same socket or node where the CPU locates,
525while the load balance on tick searchs all.
526
527For example, assume CPU Z is relatively far from CPU X. Even if CPU Z
528is idle while CPU X and the siblings are busy, scheduler can't migrate
529woken task B from X to Z since it is out of its searching range.
530As the result, task B on CPU X need to wait task A or wait load balance
531on the next tick. For some applications in special situation, waiting
5321 tick may be too long.
533
534The 'sched_relax_domain_level' file allows you to request changing
535this searching range as you like. This file takes int value which
536indicates size of searching range in levels ideally as follows,
537otherwise initial value -1 that indicates the cpuset has no request.
538
539 -1 : no request. use system default or follow request of others.
540 0 : no search.
541 1 : search siblings (hyperthreads in a core).
542 2 : search cores in a package.
543 3 : search cpus in a node [= system wide on non-NUMA system]
544 ( 4 : search nodes in a chunk of node [on NUMA system] )
545 ( 5 : search system wide [on NUMA system] )
546
547This file is per-cpuset and affect the sched domain where the cpuset
548belongs to. Therefore if the flag 'sched_load_balance' of a cpuset
549is disabled, then 'sched_relax_domain_level' have no effect since
550there is no sched domain belonging the cpuset.
551
552If multiple cpusets are overlapping and hence they form a single sched
553domain, the largest value among those is used. Be careful, if one
554requests 0 and others are -1 then 0 is used.
555
556Note that modifying this file will have both good and bad effects,
557and whether it is acceptable or not will be depend on your situation.
558Don't modify this file if you are not sure.
559
560If your situation is:
561 - The migration costs between each cpu can be assumed considerably
562 small(for you) due to your special application's behavior or
563 special hardware support for CPU cache etc.
564 - The searching cost doesn't have impact(for you) or you can make
565 the searching cost enough small by managing cpuset to compact etc.
566 - The latency is required even it sacrifices cache hit rate etc.
567then increasing 'sched_relax_domain_level' would benefit you.
568
569
5701.9 How do I use cpusets ?
501-------------------------- 571--------------------------
502 572
503In order to minimize the impact of cpusets on critical kernel 573In order to minimize the impact of cpusets on critical kernel
@@ -639,7 +709,10 @@ Now you want to do something with this cpuset.
639 709
640In this directory you can find several files: 710In this directory you can find several files:
641# ls 711# ls
642cpus cpu_exclusive mems mem_exclusive tasks 712cpu_exclusive memory_migrate mems tasks
713cpus memory_pressure notify_on_release
714mem_exclusive memory_spread_page sched_load_balance
715mem_hardwall memory_spread_slab sched_relax_domain_level
643 716
644Reading them will give you information about the state of this cpuset: 717Reading them will give you information about the state of this cpuset:
645the CPUs and Memory Nodes it can use, the processes that are using 718the CPUs and Memory Nodes it can use, the processes that are using
diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt
index c360d4e91b48..59a91e5c6909 100644
--- a/Documentation/debugging-via-ohci1394.txt
+++ b/Documentation/debugging-via-ohci1394.txt
@@ -41,15 +41,19 @@ to a working state and enables physical DMA by default for all remote nodes.
41This can be turned off by ohci1394's module parameter phys_dma=0. 41This can be turned off by ohci1394's module parameter phys_dma=0.
42 42
43The alternative firewire-ohci driver in drivers/firewire uses filtered physical 43The alternative firewire-ohci driver in drivers/firewire uses filtered physical
44DMA, hence is not yet suitable for remote debugging. 44DMA by default, which is more secure but not suitable for remote debugging.
45Compile the driver with CONFIG_FIREWIRE_OHCI_REMOTE_DMA (Kernel hacking menu:
46Remote debugging over FireWire with firewire-ohci) to get unfiltered physical
47DMA.
45 48
46Because ohci1394 depends on the PCI enumeration to be completed, an 49Because ohci1394 and firewire-ohci depend on the PCI enumeration to be
47initialization routine which runs pretty early (long before console_init() 50completed, an initialization routine which runs pretty early has been
48which makes the printk buffer appear on the console can be called) was written. 51implemented for x86. This routine runs long before console_init() can be
52called, i.e. before the printk buffer appears on the console.
49 53
50To activate it, enable CONFIG_PROVIDE_OHCI1394_DMA_INIT (Kernel hacking menu: 54To activate it, enable CONFIG_PROVIDE_OHCI1394_DMA_INIT (Kernel hacking menu:
51Provide code for enabling DMA over FireWire early on boot) and pass the 55Remote debugging over FireWire early on boot) and pass the parameter
52parameter "ohci1394_dma=early" to the recompiled kernel on boot. 56"ohci1394_dma=early" to the recompiled kernel on boot.
53 57
54Tools 58Tools
55----- 59-----
diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt
new file mode 100644
index 000000000000..6680cab2c705
--- /dev/null
+++ b/Documentation/device-mapper/dm-crypt.txt
@@ -0,0 +1,52 @@
1dm-crypt
2=========
3
4Device-Mapper's "crypt" target provides transparent encryption of block devices
5using the kernel crypto API.
6
7Parameters: <cipher> <key> <iv_offset> <device path> <offset>
8
9<cipher>
10 Encryption cipher and an optional IV generation mode.
11 (In format cipher-chainmode-ivopts:ivmode).
12 Examples:
13 des
14 aes-cbc-essiv:sha256
15 twofish-ecb
16
17 /proc/crypto contains supported crypto modes
18
19<key>
20 Key used for encryption. It is encoded as a hexadecimal number.
21 You can only use key sizes that are valid for the selected cipher.
22
23<iv_offset>
24 The IV offset is a sector count that is added to the sector number
25 before creating the IV.
26
27<device path>
28 This is the device that is going to be used as backend and contains the
29 encrypted data. You can specify it as a path like /dev/xxx or a device
30 number <major>:<minor>.
31
32<offset>
33 Starting sector within the device where the encrypted data begins.
34
35Example scripts
36===============
37LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
38encryption with dm-crypt using the 'cryptsetup' utility, see
39http://luks.endorphin.org/
40
41[[
42#!/bin/sh
43# Create a crypt device using dmsetup
44dmsetup create crypt1 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 babebabebabebabebabebabebabebabe 0 $1 0"
45]]
46
47[[
48#!/bin/sh
49# Create a crypt device using cryptsetup and LUKS header with default cipher
50cryptsetup luksFormat $1
51cryptsetup luksOpen $1 crypt1
52]]
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index c09a96b99354..881e6dd03aea 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -47,7 +47,6 @@
47.mm 47.mm
4853c700_d.h 4853c700_d.h
4953c8xx_d.h* 4953c8xx_d.h*
50BitKeeper
51COPYING 50COPYING
52CREDITS 51CREDITS
53CVS 52CVS
@@ -142,6 +141,7 @@ mkprep
142mktables 141mktables
143mktree 142mktree
144modpost 143modpost
144modules.order
145modversions.h* 145modversions.h*
146offset.h 146offset.h
147offsets.h 147offsets.h
@@ -172,6 +172,7 @@ sm_tbl*
172split-include 172split-include
173tags 173tags
174tftpboot.img 174tftpboot.img
175timeconst.h
175times.h* 176times.h*
176tkparse 177tkparse
177trix_boot.h 178trix_boot.h
diff --git a/Documentation/early-userspace/README b/Documentation/early-userspace/README
index 766d320c8eb6..e35d83052192 100644
--- a/Documentation/early-userspace/README
+++ b/Documentation/early-userspace/README
@@ -89,8 +89,8 @@ the 2.7 era (it missed the boat for 2.5).
89You can obtain somewhat infrequent snapshots of klibc from 89You can obtain somewhat infrequent snapshots of klibc from
90ftp://ftp.kernel.org/pub/linux/libs/klibc/ 90ftp://ftp.kernel.org/pub/linux/libs/klibc/
91 91
92For active users, you are better off using the klibc BitKeeper 92For active users, you are better off using the klibc git
93repositories, at http://klibc.bkbits.net/ 93repository, at http://git.kernel.org/?p=libs/klibc/klibc.git
94 94
95The standalone klibc distribution currently provides three components, 95The standalone klibc distribution currently provides three components,
96in addition to the klibc library: 96in addition to the klibc library:
diff --git a/Documentation/fb/gxfb.txt b/Documentation/fb/gxfb.txt
new file mode 100644
index 000000000000..2f640903bbb2
--- /dev/null
+++ b/Documentation/fb/gxfb.txt
@@ -0,0 +1,52 @@
1[This file is cloned from VesaFB/aty128fb]
2
3What is gxfb?
4=================
5
6This is a graphics framebuffer driver for AMD Geode GX2 based processors.
7
8Advantages:
9
10 * No need to use AMD's VSA code (or other VESA emulation layer) in the
11 BIOS.
12 * It provides a nice large console (128 cols + 48 lines with 1024x768)
13 without using tiny, unreadable fonts.
14 * You can run XF68_FBDev on top of /dev/fb0
15 * Most important: boot logo :-)
16
17Disadvantages:
18
19 * graphic mode is slower than text mode...
20
21
22How to use it?
23==============
24
25Switching modes is done using gxfb.mode_option=<resolution>... boot
26parameter or using `fbset' program.
27
28See Documentation/fb/modedb.txt for more information on modedb
29resolutions.
30
31
32X11
33===
34
35XF68_FBDev should generally work fine, but it is non-accelerated.
36
37
38Configuration
39=============
40
41You can pass kernel command line options to gxfb with gxfb.<option>.
42For example, gxfb.mode_option=800x600@75.
43Accepted options:
44
45mode_option - specify the video mode. Of the form
46 <x>x<y>[-<bpp>][@<refresh>]
47vram - size of video ram (normally auto-detected)
48vt_switch - enable vt switching during suspend/resume. The vt
49 switch is slow, but harmless.
50
51--
52Andres Salomon <dilinger@debian.org>
diff --git a/Documentation/fb/intelfb.txt b/Documentation/fb/intelfb.txt
index da5ee74219e8..27a3160650a4 100644
--- a/Documentation/fb/intelfb.txt
+++ b/Documentation/fb/intelfb.txt
@@ -14,6 +14,8 @@ graphics devices. These would include:
14 Intel 915GM 14 Intel 915GM
15 Intel 945G 15 Intel 945G
16 Intel 945GM 16 Intel 945GM
17 Intel 965G
18 Intel 965GM
17 19
18B. List of available options 20B. List of available options
19 21
diff --git a/Documentation/fb/lxfb.txt b/Documentation/fb/lxfb.txt
new file mode 100644
index 000000000000..38b3ca6f6ca7
--- /dev/null
+++ b/Documentation/fb/lxfb.txt
@@ -0,0 +1,52 @@
1[This file is cloned from VesaFB/aty128fb]
2
3What is lxfb?
4=================
5
6This is a graphics framebuffer driver for AMD Geode LX based processors.
7
8Advantages:
9
10 * No need to use AMD's VSA code (or other VESA emulation layer) in the
11 BIOS.
12 * It provides a nice large console (128 cols + 48 lines with 1024x768)
13 without using tiny, unreadable fonts.
14 * You can run XF68_FBDev on top of /dev/fb0
15 * Most important: boot logo :-)
16
17Disadvantages:
18
19 * graphic mode is slower than text mode...
20
21
22How to use it?
23==============
24
25Switching modes is done using lxfb.mode_option=<resolution>... boot
26parameter or using `fbset' program.
27
28See Documentation/fb/modedb.txt for more information on modedb
29resolutions.
30
31
32X11
33===
34
35XF68_FBDev should generally work fine, but it is non-accelerated.
36
37
38Configuration
39=============
40
41You can pass kernel command line options to lxfb with lxfb.<option>.
42For example, lxfb.mode_option=800x600@75.
43Accepted options:
44
45mode_option - specify the video mode. Of the form
46 <x>x<y>[-<bpp>][@<refresh>]
47vram - size of video ram (normally auto-detected)
48vt_switch - enable vt switching during suspend/resume. The vt
49 switch is slow, but harmless.
50
51--
52Andres Salomon <dilinger@debian.org>
diff --git a/Documentation/fb/metronomefb.txt b/Documentation/fb/metronomefb.txt
index b9a2e7b7e838..237ca412582d 100644
--- a/Documentation/fb/metronomefb.txt
+++ b/Documentation/fb/metronomefb.txt
@@ -1,7 +1,7 @@
1 Metronomefb 1 Metronomefb
2 ----------- 2 -----------
3Maintained by Jaya Kumar <jayakumar.lkml.gmail.com> 3Maintained by Jaya Kumar <jayakumar.lkml.gmail.com>
4Last revised: Nov 20, 2007 4Last revised: Mar 10, 2008
5 5
6Metronomefb is a driver for the Metronome display controller. The controller 6Metronomefb is a driver for the Metronome display controller. The controller
7is from E-Ink Corporation. It is intended to be used to drive the E-Ink 7is from E-Ink Corporation. It is intended to be used to drive the E-Ink
@@ -11,20 +11,18 @@ display media here http://www.e-ink.com/products/matrix/metronome.html .
11Metronome is interfaced to the host CPU through the AMLCD interface. The 11Metronome is interfaced to the host CPU through the AMLCD interface. The
12host CPU generates the control information and the image in a framebuffer 12host CPU generates the control information and the image in a framebuffer
13which is then delivered to the AMLCD interface by a host specific method. 13which is then delivered to the AMLCD interface by a host specific method.
14Currently, that's implemented for the PXA's LCDC controller. The display and 14The display and error status are each pulled through individual GPIOs.
15error status are each pulled through individual GPIOs.
16 15
17Metronomefb was written for the PXA255/gumstix/lyre combination and 16Metronomefb is platform independent and depends on a board specific driver
18therefore currently has board set specific code in it. If other boards based on 17to do all physical IO work. Currently, an example is implemented for the
19other architectures are available, then the host specific code can be separated 18PXA board used in the AM-200 EPD devkit. This example is am200epd.c
20and abstracted out.
21 19
22Metronomefb requires waveform information which is delivered via the AMLCD 20Metronomefb requires waveform information which is delivered via the AMLCD
23interface to the metronome controller. The waveform information is expected to 21interface to the metronome controller. The waveform information is expected to
24be delivered from userspace via the firmware class interface. The waveform file 22be delivered from userspace via the firmware class interface. The waveform file
25can be compressed as long as your udev or hotplug script is aware of the need 23can be compressed as long as your udev or hotplug script is aware of the need
26to uncompress it before delivering it. metronomefb will ask for waveform.wbf 24to uncompress it before delivering it. metronomefb will ask for metronome.wbf
27which would typically go into /lib/firmware/waveform.wbf depending on your 25which would typically go into /lib/firmware/metronome.wbf depending on your
28udev/hotplug setup. I have only tested with a single waveform file which was 26udev/hotplug setup. I have only tested with a single waveform file which was
29originally labeled 23P01201_60_WT0107_MTC. I do not know what it stands for. 27originally labeled 23P01201_60_WT0107_MTC. I do not know what it stands for.
30Caution should be exercised when manipulating the waveform as there may be 28Caution should be exercised when manipulating the waveform as there may be
diff --git a/Documentation/fb/modedb.txt b/Documentation/fb/modedb.txt
index 4fcdb4cf4cca..ec4dee75a354 100644
--- a/Documentation/fb/modedb.txt
+++ b/Documentation/fb/modedb.txt
@@ -125,8 +125,12 @@ There may be more modes.
125 amifb - Amiga chipset frame buffer 125 amifb - Amiga chipset frame buffer
126 aty128fb - ATI Rage128 / Pro frame buffer 126 aty128fb - ATI Rage128 / Pro frame buffer
127 atyfb - ATI Mach64 frame buffer 127 atyfb - ATI Mach64 frame buffer
128 pm2fb - Permedia 2/2V frame buffer
129 pm3fb - Permedia 3 frame buffer
130 sstfb - Voodoo 1/2 (SST1) chipset frame buffer
128 tdfxfb - 3D Fx frame buffer 131 tdfxfb - 3D Fx frame buffer
129 tridentfb - Trident (Cyber)blade chipset frame buffer 132 tridentfb - Trident (Cyber)blade chipset frame buffer
133 vt8623fb - VIA 8623 frame buffer
130 134
131BTW, only a few drivers use this at the moment. Others are to follow 135BTW, only a few drivers use this at the moment. Others are to follow
132(feel free to send patches). 136(feel free to send patches).
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index bf0e3df8e7a1..46ece3fba6f9 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -128,15 +128,6 @@ Who: Arjan van de Ven <arjan@linux.intel.com>
128 128
129--------------------------- 129---------------------------
130 130
131What: vm_ops.nopage
132When: Soon, provided in-kernel callers have been converted
133Why: This interface is replaced by vm_ops.fault, but it has been around
134 forever, is used by a lot of drivers, and doesn't cost much to
135 maintain.
136Who: Nick Piggin <npiggin@suse.de>
137
138---------------------------
139
140What: PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment 131What: PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment
141When: October 2008 132When: October 2008
142Why: The stacking of class devices makes these values misleading and 133Why: The stacking of class devices makes these values misleading and
@@ -147,6 +138,24 @@ Who: Kay Sievers <kay.sievers@suse.de>
147 138
148--------------------------- 139---------------------------
149 140
141What: find_task_by_pid
142When: 2.6.26
143Why: With pid namespaces, calling this funciton will return the
144 wrong task when called from inside a namespace.
145
146 The best way to save a task pid and find a task by this
147 pid later, is to find this task's struct pid pointer (or get
148 it directly from the task) and call pid_task() later.
149
150 If someone really needs to get a task by its pid_t, then
151 he most likely needs the find_task_by_vpid() to get the
152 task from the same namespace as the current task is in, but
153 this may be not so in general.
154
155Who: Pavel Emelyanov <xemul@openvz.org>
156
157---------------------------
158
150What: ACPI procfs interface 159What: ACPI procfs interface
151When: July 2008 160When: July 2008
152Why: ACPI sysfs conversion should be finished by January 2008. 161Why: ACPI sysfs conversion should be finished by January 2008.
@@ -203,16 +212,8 @@ Who: linuxppc-dev@ozlabs.org
203 212
204--------------------------- 213---------------------------
205 214
206What: sk98lin network driver
207When: Feburary 2008
208Why: In kernel tree version of driver is unmaintained. Sk98lin driver
209 replaced by the skge driver.
210Who: Stephen Hemminger <shemminger@linux-foundation.org>
211
212---------------------------
213
214What: i386/x86_64 bzImage symlinks 215What: i386/x86_64 bzImage symlinks
215When: April 2008 216When: April 2010
216 217
217Why: The i386/x86_64 merge provides a symlink to the old bzImage 218Why: The i386/x86_64 merge provides a symlink to the old bzImage
218 location so not yet updated user space tools, e.g. package 219 location so not yet updated user space tools, e.g. package
@@ -221,8 +222,6 @@ Who: Thomas Gleixner <tglx@linutronix.de>
221 222
222--------------------------- 223---------------------------
223 224
224---------------------------
225
226What: i2c-i810, i2c-prosavage and i2c-savage4 225What: i2c-i810, i2c-prosavage and i2c-savage4
227When: May 2008 226When: May 2008
228Why: These drivers are superseded by i810fb, intelfb and savagefb. 227Why: These drivers are superseded by i810fb, intelfb and savagefb.
@@ -230,33 +229,6 @@ Who: Jean Delvare <khali@linux-fr.org>
230 229
231--------------------------- 230---------------------------
232 231
233What: bcm43xx wireless network driver
234When: 2.6.26
235Files: drivers/net/wireless/bcm43xx
236Why: This driver's functionality has been replaced by the
237 mac80211-based b43 and b43legacy drivers.
238Who: John W. Linville <linville@tuxdriver.com>
239
240---------------------------
241
242What: ieee80211 softmac wireless networking component
243When: 2.6.26 (or after removal of bcm43xx and port of zd1211rw to mac80211)
244Files: net/ieee80211/softmac
245Why: No in-kernel drivers will depend on it any longer.
246Who: John W. Linville <linville@tuxdriver.com>
247
248---------------------------
249
250What: rc80211-simple rate control algorithm for mac80211
251When: 2.6.26
252Files: net/mac80211/rc80211-simple.c
253Why: This algorithm was provided for reference but always exhibited bad
254 responsiveness and performance and has some serious flaws. It has been
255 replaced by rc80211-pid.
256Who: Stefano Brivio <stefano.brivio@polimi.it>
257
258---------------------------
259
260What (Why): 232What (Why):
261 - include/linux/netfilter_ipv4/ipt_TOS.h ipt_tos.h header files 233 - include/linux/netfilter_ipv4/ipt_TOS.h ipt_tos.h header files
262 (superseded by xt_TOS/xt_tos target & match) 234 (superseded by xt_TOS/xt_tos target & match)
@@ -298,17 +270,6 @@ Who: Michael Buesch <mb@bu3sch.de>
298 270
299--------------------------- 271---------------------------
300 272
301What: Solaris/SunOS syscall and binary support on Sparc
302When: 2.6.26
303Why: Largely unmaintained and almost entirely unused. File system
304 layering used to divert library and dynamic linker searches to
305 /usr/gnemul is extremely buggy and unfixable. Making it work
306 is largely pointless as without a lot of work only the most
307 trivial of Solaris binaries can work with the emulation code.
308Who: David S. Miller <davem@davemloft.net>
309
310---------------------------
311
312What: init_mm export 273What: init_mm export
313When: 2.6.26 274When: 2.6.26
314Why: Not used in-tree. The current out-of-tree users used it to 275Why: Not used in-tree. The current out-of-tree users used it to
@@ -318,3 +279,45 @@ Why: Not used in-tree. The current out-of-tree users used it to
318 code / infrastructure should be in the kernel and not in some 279 code / infrastructure should be in the kernel and not in some
319 out-of-tree driver. 280 out-of-tree driver.
320Who: Thomas Gleixner <tglx@linutronix.de> 281Who: Thomas Gleixner <tglx@linutronix.de>
282
283----------------------------
284
285What: usedac i386 kernel parameter
286When: 2.6.27
287Why: replaced by allowdac and no dac combination
288Who: Glauber Costa <gcosta@redhat.com>
289
290---------------------------
291
292What: old style serial driver for ColdFire (CONFIG_SERIAL_COLDFIRE)
293When: 2.6.28
294Why: This driver still uses the old interface and has been replaced
295 by CONFIG_SERIAL_MCF.
296Who: Sebastian Siewior <sebastian@breakpoint.cc>
297
298---------------------------
299
300What: /sys/o2cb symlink
301When: January 2010
302Why: /sys/fs/o2cb is the proper location for this information - /sys/o2cb
303 exists as a symlink for backwards compatibility for old versions of
304 ocfs2-tools. 2 years should be sufficient time to phase in new versions
305 which know to look in /sys/fs/o2cb.
306Who: ocfs2-devel@oss.oracle.com
307
308---------------------------
309
310What: asm/semaphore.h
311When: 2.6.26
312Why: Implementation became generic; users should now include
313 linux/semaphore.h instead.
314Who: Matthew Wilcox <willy@linux.intel.com>
315
316---------------------------
317
318What: CONFIG_THERMAL_HWMON
319When: January 2009
320Why: This option was introduced just to allow older lm-sensors userspace
321 to keep working over the upgrade to 2.6.26. At the scheduled time of
322 removal fixed lm-sensors (2.x or 3.x) should be readily available.
323Who: Rene Herman <rene.herman@gmail.com>
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index e68021c08fbd..52cd611277a3 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -66,6 +66,8 @@ mandatory-locking.txt
66 - info on the Linux implementation of Sys V mandatory file locking. 66 - info on the Linux implementation of Sys V mandatory file locking.
67ncpfs.txt 67ncpfs.txt
68 - info on Novell Netware(tm) filesystem using NCP protocol. 68 - info on Novell Netware(tm) filesystem using NCP protocol.
69nfsroot.txt
70 - short guide on setting up a diskless box with NFS root filesystem.
69ntfs.txt 71ntfs.txt
70 - info and mount options for the NTFS filesystem (Windows NT). 72 - info and mount options for the NTFS filesystem (Windows NT).
71ocfs2.txt 73ocfs2.txt
@@ -82,6 +84,10 @@ relay.txt
82 - info on relay, for efficient streaming from kernel to user space. 84 - info on relay, for efficient streaming from kernel to user space.
83romfs.txt 85romfs.txt
84 - description of the ROMFS filesystem. 86 - description of the ROMFS filesystem.
87rpc-cache.txt
88 - introduction to the caching mechanisms in the sunrpc layer.
89seq_file.txt
90 - how to use the seq_file API
85sharedsubtree.txt 91sharedsubtree.txt
86 - a description of shared subtrees for namespaces. 92 - a description of shared subtrees for namespaces.
87smbfs.txt 93smbfs.txt
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 42d4b30b1045..8b22d7d8b991 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -92,7 +92,6 @@ prototypes:
92 void (*destroy_inode)(struct inode *); 92 void (*destroy_inode)(struct inode *);
93 void (*dirty_inode) (struct inode *); 93 void (*dirty_inode) (struct inode *);
94 int (*write_inode) (struct inode *, int); 94 int (*write_inode) (struct inode *, int);
95 void (*put_inode) (struct inode *);
96 void (*drop_inode) (struct inode *); 95 void (*drop_inode) (struct inode *);
97 void (*delete_inode) (struct inode *); 96 void (*delete_inode) (struct inode *);
98 void (*put_super) (struct super_block *); 97 void (*put_super) (struct super_block *);
@@ -115,7 +114,6 @@ alloc_inode: no no no
115destroy_inode: no 114destroy_inode: no
116dirty_inode: no (must not sleep) 115dirty_inode: no (must not sleep)
117write_inode: no 116write_inode: no
118put_inode: no
119drop_inode: no !!!inode_lock!!! 117drop_inode: no !!!inode_lock!!!
120delete_inode: no 118delete_inode: no
121put_super: yes yes no 119put_super: yes yes no
@@ -511,7 +509,6 @@ prototypes:
511 void (*open)(struct vm_area_struct*); 509 void (*open)(struct vm_area_struct*);
512 void (*close)(struct vm_area_struct*); 510 void (*close)(struct vm_area_struct*);
513 int (*fault)(struct vm_area_struct*, struct vm_fault *); 511 int (*fault)(struct vm_area_struct*, struct vm_fault *);
514 struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *);
515 int (*page_mkwrite)(struct vm_area_struct *, struct page *); 512 int (*page_mkwrite)(struct vm_area_struct *, struct page *);
516 513
517locking rules: 514locking rules:
@@ -519,7 +516,6 @@ locking rules:
519open: no yes 516open: no yes
520close: no yes 517close: no yes
521fault: no yes 518fault: no yes
522nopage: no yes
523page_mkwrite: no yes no 519page_mkwrite: no yes no
524 520
525 ->page_mkwrite() is called when a previously read-only page is 521 ->page_mkwrite() is called when a previously read-only page is
@@ -537,4 +533,3 @@ NULL.
537 533
538ipc/shm.c::shm_delete() - may need BKL. 534ipc/shm.c::shm_delete() - may need BKL.
539->read() and ->write() in many drivers are (probably) missing BKL. 535->read() and ->write() in many drivers are (probably) missing BKL.
540drivers/sgi/char/graphics.c::sgi_graphics_nopage() - may need BKL.
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 560f88dc7090..0c5086db8352 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -139,8 +139,16 @@ commit=nrsec (*) Ext4 can be told to sync all its data and metadata
139 Setting it to very large values will improve 139 Setting it to very large values will improve
140 performance. 140 performance.
141 141
142barrier=1 This enables/disables barriers. barrier=0 disables 142barrier=<0|1(*)> This enables/disables the use of write barriers in
143 it, barrier=1 enables it. 143 the jbd code. barrier=0 disables, barrier=1 enables.
144 This also requires an IO stack which can support
145 barriers, and if jbd gets an error on a barrier
146 write, it will disable again with a warning.
147 Write barriers enforce proper on-disk ordering
148 of journal commits, making volatile disk write caches
149 safe to use, at some performance penalty. If
150 your disks are battery-backed in one way or another,
151 disabling barriers may safely improve performance.
144 152
145orlov (*) This enables the new Orlov block allocator. It is 153orlov (*) This enables the new Orlov block allocator. It is
146 enabled by default. 154 enabled by default.
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt
new file mode 100644
index 000000000000..d0ec45ae4e7d
--- /dev/null
+++ b/Documentation/filesystems/nfs-rdma.txt
@@ -0,0 +1,256 @@
1################################################################################
2# #
3# NFS/RDMA README #
4# #
5################################################################################
6
7 Author: NetApp and Open Grid Computing
8 Date: April 15, 2008
9
10Table of Contents
11~~~~~~~~~~~~~~~~~
12 - Overview
13 - Getting Help
14 - Installation
15 - Check RDMA and NFS Setup
16 - NFS/RDMA Setup
17
18Overview
19~~~~~~~~
20
21 This document describes how to install and setup the Linux NFS/RDMA client
22 and server software.
23
24 The NFS/RDMA client was first included in Linux 2.6.24. The NFS/RDMA server
25 was first included in the following release, Linux 2.6.25.
26
27 In our testing, we have obtained excellent performance results (full 10Gbit
28 wire bandwidth at minimal client CPU) under many workloads. The code passes
29 the full Connectathon test suite and operates over both Infiniband and iWARP
30 RDMA adapters.
31
32Getting Help
33~~~~~~~~~~~~
34
35 If you get stuck, you can ask questions on the
36
37 nfs-rdma-devel@lists.sourceforge.net
38
39 mailing list.
40
41Installation
42~~~~~~~~~~~~
43
44 These instructions are a step by step guide to building a machine for
45 use with NFS/RDMA.
46
47 - Install an RDMA device
48
49 Any device supported by the drivers in drivers/infiniband/hw is acceptable.
50
51 Testing has been performed using several Mellanox-based IB cards, the
52 Ammasso AMS1100 iWARP adapter, and the Chelsio cxgb3 iWARP adapter.
53
54 - Install a Linux distribution and tools
55
56 The first kernel release to contain both the NFS/RDMA client and server was
57 Linux 2.6.25 Therefore, a distribution compatible with this and subsequent
58 Linux kernel release should be installed.
59
60 The procedures described in this document have been tested with
61 distributions from Red Hat's Fedora Project (http://fedora.redhat.com/).
62
63 - Install nfs-utils-1.1.1 or greater on the client
64
65 An NFS/RDMA mount point can only be obtained by using the mount.nfs
66 command in nfs-utils-1.1.1 or greater. To see which version of mount.nfs
67 you are using, type:
68
69 > /sbin/mount.nfs -V
70
71 If the version is less than 1.1.1 or the command does not exist,
72 then you will need to install the latest version of nfs-utils.
73
74 Download the latest package from:
75
76 http://www.kernel.org/pub/linux/utils/nfs
77
78 Uncompress the package and follow the installation instructions.
79
80 If you will not be using GSS and NFSv4, the installation process
81 can be simplified by disabling these features when running configure:
82
83 > ./configure --disable-gss --disable-nfsv4
84
85 For more information on this see the package's README and INSTALL files.
86
87 After building the nfs-utils package, there will be a mount.nfs binary in
88 the utils/mount directory. This binary can be used to initiate NFS v2, v3,
89 or v4 mounts. To initiate a v4 mount, the binary must be called mount.nfs4.
90 The standard technique is to create a symlink called mount.nfs4 to mount.nfs.
91
92 NOTE: mount.nfs and therefore nfs-utils-1.1.1 or greater is only needed
93 on the NFS client machine. You do not need this specific version of
94 nfs-utils on the server. Furthermore, only the mount.nfs command from
95 nfs-utils-1.1.1 is needed on the client.
96
97 - Install a Linux kernel with NFS/RDMA
98
99 The NFS/RDMA client and server are both included in the mainline Linux
100 kernel version 2.6.25 and later. This and other versions of the 2.6 Linux
101 kernel can be found at:
102
103 ftp://ftp.kernel.org/pub/linux/kernel/v2.6/
104
105 Download the sources and place them in an appropriate location.
106
107 - Configure the RDMA stack
108
109 Make sure your kernel configuration has RDMA support enabled. Under
110 Device Drivers -> InfiniBand support, update the kernel configuration
111 to enable InfiniBand support [NOTE: the option name is misleading. Enabling
112 InfiniBand support is required for all RDMA devices (IB, iWARP, etc.)].
113
114 Enable the appropriate IB HCA support (mlx4, mthca, ehca, ipath, etc.) or
115 iWARP adapter support (amso, cxgb3, etc.).
116
117 If you are using InfiniBand, be sure to enable IP-over-InfiniBand support.
118
119 - Configure the NFS client and server
120
121 Your kernel configuration must also have NFS file system support and/or
122 NFS server support enabled. These and other NFS related configuration
123 options can be found under File Systems -> Network File Systems.
124
125 - Build, install, reboot
126
127 The NFS/RDMA code will be enabled automatically if NFS and RDMA
128 are turned on. The NFS/RDMA client and server are configured via the hidden
129 SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The
130 value of SUNRPC_XPRT_RDMA will be:
131
132 - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client
133 and server will not be built
134 - M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M,
135 in this case the NFS/RDMA client and server will be built as modules
136 - Y if both SUNRPC and INFINIBAND are Y, in this case the NFS/RDMA client
137 and server will be built into the kernel
138
139 Therefore, if you have followed the steps above and turned no NFS and RDMA,
140 the NFS/RDMA client and server will be built.
141
142 Build a new kernel, install it, boot it.
143
144Check RDMA and NFS Setup
145~~~~~~~~~~~~~~~~~~~~~~~~
146
147 Before configuring the NFS/RDMA software, it is a good idea to test
148 your new kernel to ensure that the kernel is working correctly.
149 In particular, it is a good idea to verify that the RDMA stack
150 is functioning as expected and standard NFS over TCP/IP and/or UDP/IP
151 is working properly.
152
153 - Check RDMA Setup
154
155 If you built the RDMA components as modules, load them at
156 this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel
157 card:
158
159 > modprobe ib_mthca
160 > modprobe ib_ipoib
161
162 If you are using InfiniBand, make sure there is a Subnet Manager (SM)
163 running on the network. If your IB switch has an embedded SM, you can
164 use it. Otherwise, you will need to run an SM, such as OpenSM, on one
165 of your end nodes.
166
167 If an SM is running on your network, you should see the following:
168
169 > cat /sys/class/infiniband/driverX/ports/1/state
170 4: ACTIVE
171
172 where driverX is mthca0, ipath5, ehca3, etc.
173
174 To further test the InfiniBand software stack, use IPoIB (this
175 assumes you have two IB hosts named host1 and host2):
176
177 host1> ifconfig ib0 a.b.c.x
178 host2> ifconfig ib0 a.b.c.y
179 host1> ping a.b.c.y
180 host2> ping a.b.c.x
181
182 For other device types, follow the appropriate procedures.
183
184 - Check NFS Setup
185
186 For the NFS components enabled above (client and/or server),
187 test their functionality over standard Ethernet using TCP/IP or UDP/IP.
188
189NFS/RDMA Setup
190~~~~~~~~~~~~~~
191
192 We recommend that you use two machines, one to act as the client and
193 one to act as the server.
194
195 One time configuration:
196
197 - On the server system, configure the /etc/exports file and
198 start the NFS/RDMA server.
199
200 Exports entries with the following formats have been tested:
201
202 /vol0 192.168.0.47(fsid=0,rw,async,insecure,no_root_squash)
203 /vol0 192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash)
204
205 The IP address(es) is(are) the client's IPoIB address for an InfiniBand HCA or the
206 cleint's iWARP address(es) for an RNIC.
207
208 NOTE: The "insecure" option must be used because the NFS/RDMA client does not
209 use a reserved port.
210
211 Each time a machine boots:
212
213 - Load and configure the RDMA drivers
214
215 For InfiniBand using a Mellanox adapter:
216
217 > modprobe ib_mthca
218 > modprobe ib_ipoib
219 > ifconfig ib0 a.b.c.d
220
221 NOTE: use unique addresses for the client and server
222
223 - Start the NFS server
224
225 If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config),
226 load the RDMA transport module:
227
228 > modprobe svcrdma
229
230 Regardless of how the server was built (module or built-in), start the server:
231
232 > /etc/init.d/nfs start
233
234 or
235
236 > service nfs start
237
238 Instruct the server to listen on the RDMA transport:
239
240 > echo rdma 2050 > /proc/fs/nfsd/portlist
241
242 - On the client system
243
244 If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config),
245 load the RDMA client module:
246
247 > modprobe xprtrdma.ko
248
249 Regardless of how the client was built (module or built-in), issue the mount.nfs command:
250
251 > /path/to/your/mount.nfs <IPoIB-server-name-or-address>:/<export> /mnt -i -o rdma,port=2050
252
253 To verify that the mount is using RDMA, run "cat /proc/mounts" and check the
254 "proto" field for the given mount.
255
256 Congratulations! You're using NFS/RDMA!
diff --git a/Documentation/nfsroot.txt b/Documentation/filesystems/nfsroot.txt
index 31b329172343..31b329172343 100644
--- a/Documentation/nfsroot.txt
+++ b/Documentation/filesystems/nfsroot.txt
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 518ebe609e2b..dbc3c6a3650f 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -43,6 +43,7 @@ Table of Contents
43 2.13 /proc/<pid>/oom_score - Display current oom-killer score 43 2.13 /proc/<pid>/oom_score - Display current oom-killer score
44 2.14 /proc/<pid>/io - Display the IO accounting fields 44 2.14 /proc/<pid>/io - Display the IO accounting fields
45 2.15 /proc/<pid>/coredump_filter - Core dump filtering settings 45 2.15 /proc/<pid>/coredump_filter - Core dump filtering settings
46 2.16 /proc/<pid>/mountinfo - Information about mounts
46 47
47------------------------------------------------------------------------------ 48------------------------------------------------------------------------------
48Preface 49Preface
@@ -462,11 +463,17 @@ SwapTotal: 0 kB
462SwapFree: 0 kB 463SwapFree: 0 kB
463Dirty: 968 kB 464Dirty: 968 kB
464Writeback: 0 kB 465Writeback: 0 kB
466AnonPages: 861800 kB
465Mapped: 280372 kB 467Mapped: 280372 kB
466Slab: 684068 kB 468Slab: 284364 kB
469SReclaimable: 159856 kB
470SUnreclaim: 124508 kB
471PageTables: 24448 kB
472NFS_Unstable: 0 kB
473Bounce: 0 kB
474WritebackTmp: 0 kB
467CommitLimit: 7669796 kB 475CommitLimit: 7669796 kB
468Committed_AS: 100056 kB 476Committed_AS: 100056 kB
469PageTables: 24448 kB
470VmallocTotal: 112216 kB 477VmallocTotal: 112216 kB
471VmallocUsed: 428 kB 478VmallocUsed: 428 kB
472VmallocChunk: 111088 kB 479VmallocChunk: 111088 kB
@@ -502,8 +509,17 @@ VmallocChunk: 111088 kB
502 on the disk 509 on the disk
503 Dirty: Memory which is waiting to get written back to the disk 510 Dirty: Memory which is waiting to get written back to the disk
504 Writeback: Memory which is actively being written back to the disk 511 Writeback: Memory which is actively being written back to the disk
512 AnonPages: Non-file backed pages mapped into userspace page tables
505 Mapped: files which have been mmaped, such as libraries 513 Mapped: files which have been mmaped, such as libraries
506 Slab: in-kernel data structures cache 514 Slab: in-kernel data structures cache
515SReclaimable: Part of Slab, that might be reclaimed, such as caches
516 SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure
517 PageTables: amount of memory dedicated to the lowest level of page
518 tables.
519NFS_Unstable: NFS pages sent to the server, but not yet committed to stable
520 storage
521 Bounce: Memory used for block device "bounce buffers"
522WritebackTmp: Memory used by FUSE for temporary writeback buffers
507 CommitLimit: Based on the overcommit ratio ('vm.overcommit_ratio'), 523 CommitLimit: Based on the overcommit ratio ('vm.overcommit_ratio'),
508 this is the total amount of memory currently available to 524 this is the total amount of memory currently available to
509 be allocated on the system. This limit is only adhered to 525 be allocated on the system. This limit is only adhered to
@@ -530,8 +546,6 @@ Committed_AS: The amount of memory presently allocated on the system.
530 above) will not be permitted. This is useful if one needs 546 above) will not be permitted. This is useful if one needs
531 to guarantee that processes will not fail due to lack of 547 to guarantee that processes will not fail due to lack of
532 memory once that memory has been successfully allocated. 548 memory once that memory has been successfully allocated.
533 PageTables: amount of memory dedicated to the lowest level of page
534 tables.
535VmallocTotal: total size of vmalloc memory area 549VmallocTotal: total size of vmalloc memory area
536 VmallocUsed: amount of vmalloc area which is used 550 VmallocUsed: amount of vmalloc area which is used
537VmallocChunk: largest contigious block of vmalloc area which is free 551VmallocChunk: largest contigious block of vmalloc area which is free
@@ -2348,4 +2362,41 @@ For example:
2348 $ echo 0x7 > /proc/self/coredump_filter 2362 $ echo 0x7 > /proc/self/coredump_filter
2349 $ ./some_program 2363 $ ./some_program
2350 2364
23652.16 /proc/<pid>/mountinfo - Information about mounts
2366--------------------------------------------------------
2367
2368This file contains lines of the form:
2369
237036 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
2371(1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
2372
2373(1) mount ID: unique identifier of the mount (may be reused after umount)
2374(2) parent ID: ID of parent (or of self for the top of the mount tree)
2375(3) major:minor: value of st_dev for files on filesystem
2376(4) root: root of the mount within the filesystem
2377(5) mount point: mount point relative to the process's root
2378(6) mount options: per mount options
2379(7) optional fields: zero or more fields of the form "tag[:value]"
2380(8) separator: marks the end of the optional fields
2381(9) filesystem type: name of filesystem of the form "type[.subtype]"
2382(10) mount source: filesystem specific information or "none"
2383(11) super options: per super block options
2384
2385Parsers should ignore all unrecognised optional fields. Currently the
2386possible optional fields are:
2387
2388shared:X mount is shared in peer group X
2389master:X mount is slave to peer group X
2390propagate_from:X mount is slave and receives propagation from peer group X (*)
2391unbindable mount is unbindable
2392
2393(*) X is the closest dominant peer group under the process's root. If
2394X is the immediate master of the mount, or if there's no dominant peer
2395group under the same root, then only the "master:X" field is present
2396and not the "propagate_from:X" field.
2397
2398For more information on mount propagation see:
2399
2400 Documentation/filesystems/sharedsubtree.txt
2401
2351------------------------------------------------------------------------------ 2402------------------------------------------------------------------------------
diff --git a/Documentation/rpc-cache.txt b/Documentation/filesystems/rpc-cache.txt
index 8a382bea6808..8a382bea6808 100644
--- a/Documentation/rpc-cache.txt
+++ b/Documentation/filesystems/rpc-cache.txt
diff --git a/Documentation/filesystems/seq_file.txt b/Documentation/filesystems/seq_file.txt
new file mode 100644
index 000000000000..b843743aa0b5
--- /dev/null
+++ b/Documentation/filesystems/seq_file.txt
@@ -0,0 +1,294 @@
1The seq_file interface
2
3 Copyright 2003 Jonathan Corbet <corbet@lwn.net>
4 This file is originally from the LWN.net Driver Porting series at
5 http://lwn.net/Articles/driver-porting/
6
7
8There are numerous ways for a device driver (or other kernel component) to
9provide information to the user or system administrator. One useful
10technique is the creation of virtual files, in debugfs, /proc or elsewhere.
11Virtual files can provide human-readable output that is easy to get at
12without any special utility programs; they can also make life easier for
13script writers. It is not surprising that the use of virtual files has
14grown over the years.
15
16Creating those files correctly has always been a bit of a challenge,
17however. It is not that hard to make a virtual file which returns a
18string. But life gets trickier if the output is long - anything greater
19than an application is likely to read in a single operation. Handling
20multiple reads (and seeks) requires careful attention to the reader's
21position within the virtual file - that position is, likely as not, in the
22middle of a line of output. The kernel has traditionally had a number of
23implementations that got this wrong.
24
25The 2.6 kernel contains a set of functions (implemented by Alexander Viro)
26which are designed to make it easy for virtual file creators to get it
27right.
28
29The seq_file interface is available via <linux/seq_file.h>. There are
30three aspects to seq_file:
31
32 * An iterator interface which lets a virtual file implementation
33 step through the objects it is presenting.
34
35 * Some utility functions for formatting objects for output without
36 needing to worry about things like output buffers.
37
38 * A set of canned file_operations which implement most operations on
39 the virtual file.
40
41We'll look at the seq_file interface via an extremely simple example: a
42loadable module which creates a file called /proc/sequence. The file, when
43read, simply produces a set of increasing integer values, one per line. The
44sequence will continue until the user loses patience and finds something
45better to do. The file is seekable, in that one can do something like the
46following:
47
48 dd if=/proc/sequence of=out1 count=1
49 dd if=/proc/sequence skip=1 out=out2 count=1
50
51Then concatenate the output files out1 and out2 and get the right
52result. Yes, it is a thoroughly useless module, but the point is to show
53how the mechanism works without getting lost in other details. (Those
54wanting to see the full source for this module can find it at
55http://lwn.net/Articles/22359/).
56
57
58The iterator interface
59
60Modules implementing a virtual file with seq_file must implement a simple
61iterator object that allows stepping through the data of interest.
62Iterators must be able to move to a specific position - like the file they
63implement - but the interpretation of that position is up to the iterator
64itself. A seq_file implementation that is formatting firewall rules, for
65example, could interpret position N as the Nth rule in the chain.
66Positioning can thus be done in whatever way makes the most sense for the
67generator of the data, which need not be aware of how a position translates
68to an offset in the virtual file. The one obvious exception is that a
69position of zero should indicate the beginning of the file.
70
71The /proc/sequence iterator just uses the count of the next number it
72will output as its position.
73
74Four functions must be implemented to make the iterator work. The first,
75called start() takes a position as an argument and returns an iterator
76which will start reading at that position. For our simple sequence example,
77the start() function looks like:
78
79 static void *ct_seq_start(struct seq_file *s, loff_t *pos)
80 {
81 loff_t *spos = kmalloc(sizeof(loff_t), GFP_KERNEL);
82 if (! spos)
83 return NULL;
84 *spos = *pos;
85 return spos;
86 }
87
88The entire data structure for this iterator is a single loff_t value
89holding the current position. There is no upper bound for the sequence
90iterator, but that will not be the case for most other seq_file
91implementations; in most cases the start() function should check for a
92"past end of file" condition and return NULL if need be.
93
94For more complicated applications, the private field of the seq_file
95structure can be used. There is also a special value which can be returned
96by the start() function called SEQ_START_TOKEN; it can be used if you wish
97to instruct your show() function (described below) to print a header at the
98top of the output. SEQ_START_TOKEN should only be used if the offset is
99zero, however.
100
101The next function to implement is called, amazingly, next(); its job is to
102move the iterator forward to the next position in the sequence. The
103example module can simply increment the position by one; more useful
104modules will do what is needed to step through some data structure. The
105next() function returns a new iterator, or NULL if the sequence is
106complete. Here's the example version:
107
108 static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
109 {
110 loff_t *spos = v;
111 *pos = ++*spos;
112 return spos;
113 }
114
115The stop() function is called when iteration is complete; its job, of
116course, is to clean up. If dynamic memory is allocated for the iterator,
117stop() is the place to free it.
118
119 static void ct_seq_stop(struct seq_file *s, void *v)
120 {
121 kfree(v);
122 }
123
124Finally, the show() function should format the object currently pointed to
125by the iterator for output. The example module's show() function is:
126
127 static int ct_seq_show(struct seq_file *s, void *v)
128 {
129 loff_t *spos = v;
130 seq_printf(s, "%lld\n", (long long)*spos);
131 return 0;
132 }
133
134If all is well, the show() function should return zero. A negative error
135code in the usual manner indicates that something went wrong; it will be
136passed back to user space. This function can also return SEQ_SKIP, which
137causes the current item to be skipped; if the show() function has already
138generated output before returning SEQ_SKIP, that output will be dropped.
139
140We will look at seq_printf() in a moment. But first, the definition of the
141seq_file iterator is finished by creating a seq_operations structure with
142the four functions we have just defined:
143
144 static const struct seq_operations ct_seq_ops = {
145 .start = ct_seq_start,
146 .next = ct_seq_next,
147 .stop = ct_seq_stop,
148 .show = ct_seq_show
149 };
150
151This structure will be needed to tie our iterator to the /proc file in
152a little bit.
153
154It's worth noting that the iterator value returned by start() and
155manipulated by the other functions is considered to be completely opaque by
156the seq_file code. It can thus be anything that is useful in stepping
157through the data to be output. Counters can be useful, but it could also be
158a direct pointer into an array or linked list. Anything goes, as long as
159the programmer is aware that things can happen between calls to the
160iterator function. However, the seq_file code (by design) will not sleep
161between the calls to start() and stop(), so holding a lock during that time
162is a reasonable thing to do. The seq_file code will also avoid taking any
163other locks while the iterator is active.
164
165
166Formatted output
167
168The seq_file code manages positioning within the output created by the
169iterator and getting it into the user's buffer. But, for that to work, that
170output must be passed to the seq_file code. Some utility functions have
171been defined which make this task easy.
172
173Most code will simply use seq_printf(), which works pretty much like
174printk(), but which requires the seq_file pointer as an argument. It is
175common to ignore the return value from seq_printf(), but a function
176producing complicated output may want to check that value and quit if
177something non-zero is returned; an error return means that the seq_file
178buffer has been filled and further output will be discarded.
179
180For straight character output, the following functions may be used:
181
182 int seq_putc(struct seq_file *m, char c);
183 int seq_puts(struct seq_file *m, const char *s);
184 int seq_escape(struct seq_file *m, const char *s, const char *esc);
185
186The first two output a single character and a string, just like one would
187expect. seq_escape() is like seq_puts(), except that any character in s
188which is in the string esc will be represented in octal form in the output.
189
190There is also a pair of functions for printing filenames:
191
192 int seq_path(struct seq_file *m, struct path *path, char *esc);
193 int seq_path_root(struct seq_file *m, struct path *path,
194 struct path *root, char *esc)
195
196Here, path indicates the file of interest, and esc is a set of characters
197which should be escaped in the output. A call to seq_path() will output
198the path relative to the current process's filesystem root. If a different
199root is desired, it can be used with seq_path_root(). Note that, if it
200turns out that path cannot be reached from root, the value of root will be
201changed in seq_file_root() to a root which *does* work.
202
203
204Making it all work
205
206So far, we have a nice set of functions which can produce output within the
207seq_file system, but we have not yet turned them into a file that a user
208can see. Creating a file within the kernel requires, of course, the
209creation of a set of file_operations which implement the operations on that
210file. The seq_file interface provides a set of canned operations which do
211most of the work. The virtual file author still must implement the open()
212method, however, to hook everything up. The open function is often a single
213line, as in the example module:
214
215 static int ct_open(struct inode *inode, struct file *file)
216 {
217 return seq_open(file, &ct_seq_ops);
218 }
219
220Here, the call to seq_open() takes the seq_operations structure we created
221before, and gets set up to iterate through the virtual file.
222
223On a successful open, seq_open() stores the struct seq_file pointer in
224file->private_data. If you have an application where the same iterator can
225be used for more than one file, you can store an arbitrary pointer in the
226private field of the seq_file structure; that value can then be retrieved
227by the iterator functions.
228
229The other operations of interest - read(), llseek(), and release() - are
230all implemented by the seq_file code itself. So a virtual file's
231file_operations structure will look like:
232
233 static const struct file_operations ct_file_ops = {
234 .owner = THIS_MODULE,
235 .open = ct_open,
236 .read = seq_read,
237 .llseek = seq_lseek,
238 .release = seq_release
239 };
240
241There is also a seq_release_private() which passes the contents of the
242seq_file private field to kfree() before releasing the structure.
243
244The final step is the creation of the /proc file itself. In the example
245code, that is done in the initialization code in the usual way:
246
247 static int ct_init(void)
248 {
249 struct proc_dir_entry *entry;
250
251 entry = create_proc_entry("sequence", 0, NULL);
252 if (entry)
253 entry->proc_fops = &ct_file_ops;
254 return 0;
255 }
256
257 module_init(ct_init);
258
259And that is pretty much it.
260
261
262seq_list
263
264If your file will be iterating through a linked list, you may find these
265routines useful:
266
267 struct list_head *seq_list_start(struct list_head *head,
268 loff_t pos);
269 struct list_head *seq_list_start_head(struct list_head *head,
270 loff_t pos);
271 struct list_head *seq_list_next(void *v, struct list_head *head,
272 loff_t *ppos);
273
274These helpers will interpret pos as a position within the list and iterate
275accordingly. Your start() and next() functions need only invoke the
276seq_list_* helpers with a pointer to the appropriate list_head structure.
277
278
279The extra-simple version
280
281For extremely simple virtual files, there is an even easier interface. A
282module can define only the show() function, which should create all the
283output that the virtual file will contain. The file's open() method then
284calls:
285
286 int single_open(struct file *file,
287 int (*show)(struct seq_file *m, void *p),
288 void *data);
289
290When output time comes, the show() function will be called once. The data
291value given to single_open() can be found in the private field of the
292seq_file structure. When using single_open(), the programmer should use
293single_release() instead of seq_release() in the file_operations structure
294to avoid a memory leak.
diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt
index 5daa2aaec2c5..68ef48839c04 100644
--- a/Documentation/filesystems/sysfs-pci.txt
+++ b/Documentation/filesystems/sysfs-pci.txt
@@ -36,6 +36,7 @@ files, each with their own function.
36 local_cpus nearby CPU mask (cpumask, ro) 36 local_cpus nearby CPU mask (cpumask, ro)
37 resource PCI resource host addresses (ascii, ro) 37 resource PCI resource host addresses (ascii, ro)
38 resource0..N PCI resource N, if present (binary, mmap) 38 resource0..N PCI resource N, if present (binary, mmap)
39 resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap)
39 rom PCI ROM resource, if present (binary, ro) 40 rom PCI ROM resource, if present (binary, ro)
40 subsystem_device PCI subsystem device (ascii, ro) 41 subsystem_device PCI subsystem device (ascii, ro)
41 subsystem_vendor PCI subsystem vendor (ascii, ro) 42 subsystem_vendor PCI subsystem vendor (ascii, ro)
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index 4598ef7b622b..7f27b8f840d0 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -176,8 +176,10 @@ implementations:
176 Recall that an attribute should only be exporting one value, or an 176 Recall that an attribute should only be exporting one value, or an
177 array of similar values, so this shouldn't be that expensive. 177 array of similar values, so this shouldn't be that expensive.
178 178
179 This allows userspace to do partial reads and seeks arbitrarily over 179 This allows userspace to do partial reads and forward seeks
180 the entire file at will. 180 arbitrarily over the entire file at will. If userspace seeks back to
181 zero or does a pread(2) with an offset of '0' the show() method will
182 be called again, rearmed, to fill the buffer.
181 183
182- On write(2), sysfs expects the entire buffer to be passed during the 184- On write(2), sysfs expects the entire buffer to be passed during the
183 first write. Sysfs then passes the entire buffer to the store() 185 first write. Sysfs then passes the entire buffer to the store()
@@ -192,6 +194,9 @@ implementations:
192 194
193Other notes: 195Other notes:
194 196
197- Writing causes the show() method to be rearmed regardless of current
198 file position.
199
195- The buffer will always be PAGE_SIZE bytes in length. On i386, this 200- The buffer will always be PAGE_SIZE bytes in length. On i386, this
196 is 4096. 201 is 4096.
197 202
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
index 145e44086358..222437efd75a 100644
--- a/Documentation/filesystems/tmpfs.txt
+++ b/Documentation/filesystems/tmpfs.txt
@@ -92,6 +92,18 @@ NodeList format is a comma-separated list of decimal numbers and ranges,
92a range being two hyphen-separated decimal numbers, the smallest and 92a range being two hyphen-separated decimal numbers, the smallest and
93largest node numbers in the range. For example, mpol=bind:0-3,5,7,9-15 93largest node numbers in the range. For example, mpol=bind:0-3,5,7,9-15
94 94
95NUMA memory allocation policies have optional flags that can be used in
96conjunction with their modes. These optional flags can be specified
97when tmpfs is mounted by appending them to the mode before the NodeList.
98See Documentation/vm/numa_memory_policy.txt for a list of all available
99memory allocation policy mode flags.
100
101 =static is equivalent to MPOL_F_STATIC_NODES
102 =relative is equivalent to MPOL_F_RELATIVE_NODES
103
104For example, mpol=bind=static:NodeList, is the equivalent of an
105allocation policy of MPOL_BIND | MPOL_F_STATIC_NODES.
106
95Note that trying to mount a tmpfs with an mpol option will fail if the 107Note that trying to mount a tmpfs with an mpol option will fail if the
96running kernel does not support NUMA; and will fail if its nodelist 108running kernel does not support NUMA; and will fail if its nodelist
97specifies a node which is not online. If your system relies on that 109specifies a node which is not online. If your system relies on that
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index fcc123ffa252..2d5e1e582e13 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -17,6 +17,21 @@ dmask=### -- The permission mask for the directory.
17fmask=### -- The permission mask for files. 17fmask=### -- The permission mask for files.
18 The default is the umask of current process. 18 The default is the umask of current process.
19 19
20allow_utime=### -- This option controls the permission check of mtime/atime.
21
22 20 - If current process is in group of file's group ID,
23 you can change timestamp.
24 2 - Other users can change timestamp.
25
26 The default is set from `dmask' option. (If the directory is
27 writable, utime(2) is also allowed. I.e. ~dmask & 022)
28
29 Normally utime(2) checks current process is owner of
30 the file, or it has CAP_FOWNER capability. But FAT
31 filesystem doesn't have uid/gid on disk, so normal
32 check is too unflexible. With this option you can
33 relax it.
34
20codepage=### -- Sets the codepage number for converting to shortname 35codepage=### -- Sets the codepage number for converting to shortname
21 characters on FAT filesystem. 36 characters on FAT filesystem.
22 By default, FAT_DEFAULT_CODEPAGE setting is used. 37 By default, FAT_DEFAULT_CODEPAGE setting is used.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 81e5be6e6e35..b7522c6cbae3 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -205,7 +205,6 @@ struct super_operations {
205 205
206 void (*dirty_inode) (struct inode *); 206 void (*dirty_inode) (struct inode *);
207 int (*write_inode) (struct inode *, int); 207 int (*write_inode) (struct inode *, int);
208 void (*put_inode) (struct inode *);
209 void (*drop_inode) (struct inode *); 208 void (*drop_inode) (struct inode *);
210 void (*delete_inode) (struct inode *); 209 void (*delete_inode) (struct inode *);
211 void (*put_super) (struct super_block *); 210 void (*put_super) (struct super_block *);
@@ -246,9 +245,6 @@ or bottom half).
246 inode to disc. The second parameter indicates whether the write 245 inode to disc. The second parameter indicates whether the write
247 should be synchronous or not, not all filesystems check this flag. 246 should be synchronous or not, not all filesystems check this flag.
248 247
249 put_inode: called when the VFS inode is removed from the inode
250 cache.
251
252 drop_inode: called when the last access to the inode is dropped, 248 drop_inode: called when the last access to the inode is dropped,
253 with the inode_lock spinlock held. 249 with the inode_lock spinlock held.
254 250
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt
index 74aeb142ae5f..0a1668ba2600 100644
--- a/Documentation/filesystems/xfs.txt
+++ b/Documentation/filesystems/xfs.txt
@@ -52,16 +52,15 @@ When mounting an XFS filesystem, the following options are accepted.
52 and also gets the setgid bit set if it is a directory itself. 52 and also gets the setgid bit set if it is a directory itself.
53 53
54 ihashsize=value 54 ihashsize=value
55 Sets the number of hash buckets available for hashing the 55 In memory inode hashes have been removed, so this option has
56 in-memory inodes of the specified mount point. If a value 56 no function as of August 2007. Option is deprecated.
57 of zero is used, the value selected by the default algorithm
58 will be displayed in /proc/mounts.
59 57
60 ikeep/noikeep 58 ikeep/noikeep
61 When inode clusters are emptied of inodes, keep them around 59 When ikeep is specified, XFS does not delete empty inode clusters
62 on the disk (ikeep) - this is the traditional XFS behaviour 60 and keeps them around on disk. ikeep is the traditional XFS
63 and is still the default for now. Using the noikeep option, 61 behaviour. When noikeep is specified, empty inode clusters
64 inode clusters are returned to the free space pool. 62 are returned to the free space pool. The default is noikeep for
63 non-DMAPI mounts, while ikeep is the default when DMAPI is in use.
65 64
66 inode64 65 inode64
67 Indicates that XFS is allowed to create inodes at any location 66 Indicates that XFS is allowed to create inodes at any location
diff --git a/Documentation/firmware_class/firmware_sample_driver.c b/Documentation/firmware_class/firmware_sample_driver.c
deleted file mode 100644
index 6865cbe075ec..000000000000
--- a/Documentation/firmware_class/firmware_sample_driver.c
+++ /dev/null
@@ -1,115 +0,0 @@
1/*
2 * firmware_sample_driver.c -
3 *
4 * Copyright (c) 2003 Manuel Estrada Sainz
5 *
6 * Sample code on how to use request_firmware() from drivers.
7 *
8 */
9
10#include <linux/module.h>
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/device.h>
14#include <linux/string.h>
15
16#include "linux/firmware.h"
17
18static struct device ghost_device = {
19 .bus_id = "ghost0",
20};
21
22
23static void sample_firmware_load(char *firmware, int size)
24{
25 u8 buf[size+1];
26 memcpy(buf, firmware, size);
27 buf[size] = '\0';
28 printk(KERN_INFO "firmware_sample_driver: firmware: %s\n", buf);
29}
30
31static void sample_probe_default(void)
32{
33 /* uses the default method to get the firmware */
34 const struct firmware *fw_entry;
35 printk(KERN_INFO "firmware_sample_driver: a ghost device got inserted :)\n");
36
37 if(request_firmware(&fw_entry, "sample_driver_fw", &ghost_device)!=0)
38 {
39 printk(KERN_ERR
40 "firmware_sample_driver: Firmware not available\n");
41 return;
42 }
43
44 sample_firmware_load(fw_entry->data, fw_entry->size);
45
46 release_firmware(fw_entry);
47
48 /* finish setting up the device */
49}
50static void sample_probe_specific(void)
51{
52 /* Uses some specific hotplug support to get the firmware from
53 * userspace directly into the hardware, or via some sysfs file */
54
55 /* NOTE: This currently doesn't work */
56
57 printk(KERN_INFO "firmware_sample_driver: a ghost device got inserted :)\n");
58
59 if(request_firmware(NULL, "sample_driver_fw", &ghost_device)!=0)
60 {
61 printk(KERN_ERR
62 "firmware_sample_driver: Firmware load failed\n");
63 return;
64 }
65
66 /* request_firmware blocks until userspace finished, so at
67 * this point the firmware should be already in the device */
68
69 /* finish setting up the device */
70}
71static void sample_probe_async_cont(const struct firmware *fw, void *context)
72{
73 if(!fw){
74 printk(KERN_ERR
75 "firmware_sample_driver: firmware load failed\n");
76 return;
77 }
78
79 printk(KERN_INFO "firmware_sample_driver: device pointer \"%s\"\n",
80 (char *)context);
81 sample_firmware_load(fw->data, fw->size);
82}
83static void sample_probe_async(void)
84{
85 /* Let's say that I can't sleep */
86 int error;
87 error = request_firmware_nowait (THIS_MODULE, FW_ACTION_NOHOTPLUG,
88 "sample_driver_fw", &ghost_device,
89 "my device pointer",
90 sample_probe_async_cont);
91 if(error){
92 printk(KERN_ERR
93 "firmware_sample_driver:"
94 " request_firmware_nowait failed\n");
95 }
96}
97
98static int sample_init(void)
99{
100 device_initialize(&ghost_device);
101 /* since there is no real hardware insertion I just call the
102 * sample probe functions here */
103 sample_probe_specific();
104 sample_probe_default();
105 sample_probe_async();
106 return 0;
107}
108static void __exit sample_exit(void)
109{
110}
111
112module_init (sample_init);
113module_exit (sample_exit);
114
115MODULE_LICENSE("GPL");
diff --git a/Documentation/firmware_class/firmware_sample_firmware_class.c b/Documentation/firmware_class/firmware_sample_firmware_class.c
deleted file mode 100644
index 2de62854f0e5..000000000000
--- a/Documentation/firmware_class/firmware_sample_firmware_class.c
+++ /dev/null
@@ -1,207 +0,0 @@
1/*
2 * firmware_sample_firmware_class.c -
3 *
4 * Copyright (c) 2003 Manuel Estrada Sainz
5 *
6 * NOTE: This is just a probe of concept, if you think that your driver would
7 * be well served by this mechanism please contact me first.
8 *
9 * DON'T USE THIS CODE AS IS
10 *
11 */
12
13#include <linux/device.h>
14#include <linux/module.h>
15#include <linux/init.h>
16#include <linux/timer.h>
17#include <linux/slab.h>
18#include <linux/string.h>
19#include <linux/firmware.h>
20
21
22MODULE_AUTHOR("Manuel Estrada Sainz");
23MODULE_DESCRIPTION("Hackish sample for using firmware class directly");
24MODULE_LICENSE("GPL");
25
26static inline struct class_device *to_class_dev(struct kobject *obj)
27{
28 return container_of(obj,struct class_device,kobj);
29}
30static inline
31struct class_device_attribute *to_class_dev_attr(struct attribute *_attr)
32{
33 return container_of(_attr,struct class_device_attribute,attr);
34}
35
36int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr);
37int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr);
38
39struct firmware_priv {
40 char fw_id[FIRMWARE_NAME_MAX];
41 s32 loading:2;
42 u32 abort:1;
43};
44
45extern struct class firmware_class;
46
47static ssize_t firmware_loading_show(struct class_device *class_dev, char *buf)
48{
49 struct firmware_priv *fw_priv = class_get_devdata(class_dev);
50 return sprintf(buf, "%d\n", fw_priv->loading);
51}
52static ssize_t firmware_loading_store(struct class_device *class_dev,
53 const char *buf, size_t count)
54{
55 struct firmware_priv *fw_priv = class_get_devdata(class_dev);
56 int prev_loading = fw_priv->loading;
57
58 fw_priv->loading = simple_strtol(buf, NULL, 10);
59
60 switch(fw_priv->loading){
61 case -1:
62 /* abort load an panic */
63 break;
64 case 1:
65 /* setup load */
66 break;
67 case 0:
68 if(prev_loading==1){
69 /* finish load and get the device back to working
70 * state */
71 }
72 break;
73 }
74
75 return count;
76}
77static CLASS_DEVICE_ATTR(loading, 0644,
78 firmware_loading_show, firmware_loading_store);
79
80static ssize_t firmware_data_read(struct kobject *kobj,
81 struct bin_attribute *bin_attr,
82 char *buffer, loff_t offset, size_t count)
83{
84 struct class_device *class_dev = to_class_dev(kobj);
85 struct firmware_priv *fw_priv = class_get_devdata(class_dev);
86
87 /* read from the devices firmware memory */
88
89 return count;
90}
91static ssize_t firmware_data_write(struct kobject *kobj,
92 struct bin_attribute *bin_attr,
93 char *buffer, loff_t offset, size_t count)
94{
95 struct class_device *class_dev = to_class_dev(kobj);
96 struct firmware_priv *fw_priv = class_get_devdata(class_dev);
97
98 /* write to the devices firmware memory */
99
100 return count;
101}
102static struct bin_attribute firmware_attr_data = {
103 .attr = {.name = "data", .mode = 0644},
104 .size = 0,
105 .read = firmware_data_read,
106 .write = firmware_data_write,
107};
108static int fw_setup_class_device(struct class_device *class_dev,
109 const char *fw_name,
110 struct device *device)
111{
112 int retval;
113 struct firmware_priv *fw_priv;
114
115 fw_priv = kzalloc(sizeof(struct firmware_priv), GFP_KERNEL);
116 if (!fw_priv) {
117 retval = -ENOMEM;
118 goto out;
119 }
120
121 memset(class_dev, 0, sizeof(*class_dev));
122
123 strncpy(fw_priv->fw_id, fw_name, FIRMWARE_NAME_MAX);
124 fw_priv->fw_id[FIRMWARE_NAME_MAX-1] = '\0';
125
126 strncpy(class_dev->class_id, device->bus_id, BUS_ID_SIZE);
127 class_dev->class_id[BUS_ID_SIZE-1] = '\0';
128 class_dev->dev = device;
129
130 class_dev->class = &firmware_class,
131 class_set_devdata(class_dev, fw_priv);
132 retval = class_device_register(class_dev);
133 if (retval){
134 printk(KERN_ERR "%s: class_device_register failed\n",
135 __FUNCTION__);
136 goto error_free_fw_priv;
137 }
138
139 retval = sysfs_create_bin_file(&class_dev->kobj, &firmware_attr_data);
140 if (retval){
141 printk(KERN_ERR "%s: sysfs_create_bin_file failed\n",
142 __FUNCTION__);
143 goto error_unreg_class_dev;
144 }
145
146 retval = class_device_create_file(class_dev,
147 &class_device_attr_loading);
148 if (retval){
149 printk(KERN_ERR "%s: class_device_create_file failed\n",
150 __FUNCTION__);
151 goto error_remove_data;
152 }
153
154 goto out;
155
156error_remove_data:
157 sysfs_remove_bin_file(&class_dev->kobj, &firmware_attr_data);
158error_unreg_class_dev:
159 class_device_unregister(class_dev);
160error_free_fw_priv:
161 kfree(fw_priv);
162out:
163 return retval;
164}
165static void fw_remove_class_device(struct class_device *class_dev)
166{
167 struct firmware_priv *fw_priv = class_get_devdata(class_dev);
168
169 class_device_remove_file(class_dev, &class_device_attr_loading);
170 sysfs_remove_bin_file(&class_dev->kobj, &firmware_attr_data);
171 class_device_unregister(class_dev);
172}
173
174static struct class_device *class_dev;
175
176static struct device my_device = {
177 .bus_id = "my_dev0",
178};
179
180static int __init firmware_sample_init(void)
181{
182 int error;
183
184 device_initialize(&my_device);
185 class_dev = kmalloc(sizeof(struct class_device), GFP_KERNEL);
186 if(!class_dev)
187 return -ENOMEM;
188
189 error = fw_setup_class_device(class_dev, "my_firmware_image",
190 &my_device);
191 if(error){
192 kfree(class_dev);
193 return error;
194 }
195 return 0;
196
197}
198static void __exit firmware_sample_exit(void)
199{
200 struct firmware_priv *fw_priv = class_get_devdata(class_dev);
201 fw_remove_class_device(class_dev);
202 kfree(fw_priv);
203 kfree(class_dev);
204}
205module_init(firmware_sample_init);
206module_exit(firmware_sample_exit);
207
diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index 54630095aa3c..c35ca9e40d4c 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -107,6 +107,16 @@ type of GPIO controller, and on one particular board 80-95 with an FPGA.
107The numbers need not be contiguous; either of those platforms could also 107The numbers need not be contiguous; either of those platforms could also
108use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders. 108use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders.
109 109
110If you want to initialize a structure with an invalid GPIO number, use
111some negative number (perhaps "-EINVAL"); that will never be valid. To
112test if a number could reference a GPIO, you may use this predicate:
113
114 int gpio_is_valid(int number);
115
116A number that's not valid will be rejected by calls which may request
117or free GPIOs (see below). Other numbers may also be rejected; for
118example, a number might be valid but unused on a given board.
119
110Whether a platform supports multiple GPIO controllers is currently a 120Whether a platform supports multiple GPIO controllers is currently a
111platform-specific implementation issue. 121platform-specific implementation issue.
112 122
diff --git a/Documentation/highuid.txt b/Documentation/highuid.txt
index 76034d9dbfc0..6bad6f1d1cac 100644
--- a/Documentation/highuid.txt
+++ b/Documentation/highuid.txt
@@ -28,8 +28,6 @@ What's left to be done for 32-bit UIDs on all Linux architectures:
28 uses the 32-bit UID system calls properly otherwise. 28 uses the 32-bit UID system calls properly otherwise.
29 29
30 This affects at least: 30 This affects at least:
31 SunOS emulation
32 Solaris emulation
33 iBCS on Intel 31 iBCS on Intel
34 32
35 sparc32 emulation on sparc64 33 sparc32 emulation on sparc64
diff --git a/Documentation/hwmon/adt7473 b/Documentation/hwmon/adt7473
index 22d8b19046ab..2126de34c711 100644
--- a/Documentation/hwmon/adt7473
+++ b/Documentation/hwmon/adt7473
@@ -69,7 +69,8 @@ point2: Set the pwm speed at a higher temperature bound.
69 69
70The ADT7473 will scale the pwm between the lower and higher pwm speed when 70The ADT7473 will scale the pwm between the lower and higher pwm speed when
71the temperature is between the two temperature boundaries. PWM values range 71the temperature is between the two temperature boundaries. PWM values range
72from 0 (off) to 255 (full speed). 72from 0 (off) to 255 (full speed). Fan speed will be set to maximum when the
73temperature sensor associated with the PWM control exceeds temp#_max.
73 74
74Notes 75Notes
75----- 76-----
diff --git a/Documentation/hwmon/ibmaem b/Documentation/hwmon/ibmaem
new file mode 100644
index 000000000000..2fefaf582a43
--- /dev/null
+++ b/Documentation/hwmon/ibmaem
@@ -0,0 +1,37 @@
1Kernel driver ibmaem
2======================
3
4Supported systems:
5 * Any recent IBM System X server with Active Energy Manager support.
6 This includes the x3350, x3550, x3650, x3655, x3755, x3850 M2,
7 x3950 M2, and certain HS2x/LS2x/QS2x blades. The IPMI host interface
8 driver ("ipmi-si") needs to be loaded for this driver to do anything.
9 Prefix: 'ibmaem'
10 Datasheet: Not available
11
12Author: Darrick J. Wong
13
14Description
15-----------
16
17This driver implements sensor reading support for the energy and power
18meters available on various IBM System X hardware through the BMC. All
19sensor banks will be exported as platform devices; this driver can talk
20to both v1 and v2 interfaces. This driver is completely separate from the
21older ibmpex driver.
22
23The v1 AEM interface has a simple set of features to monitor energy use.
24There is a register that displays an estimate of raw energy consumption
25since the last BMC reset, and a power sensor that returns average power
26use over a configurable interval.
27
28The v2 AEM interface is a bit more sophisticated, being able to present
29a wider range of energy and power use registers, the power cap as
30set by the AEM software, and temperature sensors.
31
32Special Features
33----------------
34
35The "power_cap" value displays the current system power cap, as set by
36the Active Energy Manager software. Setting the power cap from the host
37is not currently supported.
diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface
index f4a8ebc1ef1a..2d845730d4e0 100644
--- a/Documentation/hwmon/sysfs-interface
+++ b/Documentation/hwmon/sysfs-interface
@@ -2,17 +2,12 @@ Naming and data format standards for sysfs files
2------------------------------------------------ 2------------------------------------------------
3 3
4The libsensors library offers an interface to the raw sensors data 4The libsensors library offers an interface to the raw sensors data
5through the sysfs interface. See libsensors documentation and source for 5through the sysfs interface. Since lm-sensors 3.0.0, libsensors is
6further information. As of writing this document, libsensors 6completely chip-independent. It assumes that all the kernel drivers
7(from lm_sensors 2.8.3) is heavily chip-dependent. Adding or updating 7implement the standard sysfs interface described in this document.
8support for any given chip requires modifying the library's code. 8This makes adding or updating support for any given chip very easy, as
9This is because libsensors was written for the procfs interface 9libsensors, and applications using it, do not need to be modified.
10older kernel modules were using, which wasn't standardized enough. 10This is a major improvement compared to lm-sensors 2.
11Recent versions of libsensors (from lm_sensors 2.8.2 and later) have
12support for the sysfs interface, though.
13
14The new sysfs interface was designed to be as chip-independent as
15possible.
16 11
17Note that motherboards vary widely in the connections to sensor chips. 12Note that motherboards vary widely in the connections to sensor chips.
18There is no standard that ensures, for example, that the second 13There is no standard that ensures, for example, that the second
@@ -35,19 +30,17 @@ access this data in a simple and consistent way. That said, such programs
35will have to implement conversion, labeling and hiding of inputs. For 30will have to implement conversion, labeling and hiding of inputs. For
36this reason, it is still not recommended to bypass the library. 31this reason, it is still not recommended to bypass the library.
37 32
38If you are developing a userspace application please send us feedback on
39this standard.
40
41Note that this standard isn't completely established yet, so it is subject
42to changes. If you are writing a new hardware monitoring driver those
43features can't seem to fit in this interface, please contact us with your
44extension proposal. Keep in mind that backward compatibility must be
45preserved.
46
47Each chip gets its own directory in the sysfs /sys/devices tree. To 33Each chip gets its own directory in the sysfs /sys/devices tree. To
48find all sensor chips, it is easier to follow the device symlinks from 34find all sensor chips, it is easier to follow the device symlinks from
49/sys/class/hwmon/hwmon*. 35/sys/class/hwmon/hwmon*.
50 36
37Up to lm-sensors 3.0.0, libsensors looks for hardware monitoring attributes
38in the "physical" device directory. Since lm-sensors 3.0.1, attributes found
39in the hwmon "class" device directory are also supported. Complex drivers
40(e.g. drivers for multifunction chips) may want to use this possibility to
41avoid namespace pollution. The only drawback will be that older versions of
42libsensors won't support the driver in question.
43
51All sysfs values are fixed point numbers. 44All sysfs values are fixed point numbers.
52 45
53There is only one value per file, unlike the older /proc specification. 46There is only one value per file, unlike the older /proc specification.
diff --git a/Documentation/hwmon/w83l785ts b/Documentation/hwmon/w83l785ts
index 1841cedc25b2..bd1fa9d4468d 100644
--- a/Documentation/hwmon/w83l785ts
+++ b/Documentation/hwmon/w83l785ts
@@ -33,7 +33,8 @@ Known Issues
33------------ 33------------
34 34
35On some systems (Asus), the BIOS is known to interfere with the driver 35On some systems (Asus), the BIOS is known to interfere with the driver
36and cause read errors. The driver will retry a given number of times 36and cause read errors. Or maybe the W83L785TS-S chip is simply unreliable,
37we don't really know. The driver will retry a given number of times
37(5 by default) and then give up, returning the old value (or 0 if 38(5 by default) and then give up, returning the old value (or 0 if
38there is no old value). It seems to work well enough so that you should 39there is no old value). It seems to work well enough so that you should
39not notice anything. Thanks to James Bolt for helping test this feature. 40not notice anything. Thanks to James Bolt for helping test this feature.
diff --git a/Documentation/i2c/functionality b/Documentation/i2c/functionality
index 60cca249e452..42c17c1fb3cd 100644
--- a/Documentation/i2c/functionality
+++ b/Documentation/i2c/functionality
@@ -51,26 +51,38 @@ A few combinations of the above flags are also defined for your convenience:
51 the transparent emulation layer) 51 the transparent emulation layer)
52 52
53 53
54ALGORITHM/ADAPTER IMPLEMENTATION 54ADAPTER IMPLEMENTATION
55-------------------------------- 55----------------------
56 56
57When you write a new algorithm driver, you will have to implement a 57When you write a new adapter driver, you will have to implement a
58function callback `functionality', that gets an i2c_adapter structure 58function callback `functionality'. Typical implementations are given
59pointer as its only parameter: 59below.
60 60
61 struct i2c_algorithm { 61A typical SMBus-only adapter would list all the SMBus transactions it
62 /* Many other things of course; check <linux/i2c.h>! */ 62supports. This example comes from the i2c-piix4 driver:
63 u32 (*functionality) (struct i2c_adapter *); 63
64 static u32 piix4_func(struct i2c_adapter *adapter)
65 {
66 return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
67 I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
68 I2C_FUNC_SMBUS_BLOCK_DATA;
64 } 69 }
65 70
66A typically implementation is given below, from i2c-algo-bit.c: 71A typical full-I2C adapter would use the following (from the i2c-pxa
72driver):
67 73
68 static u32 bit_func(struct i2c_adapter *adap) 74 static u32 i2c_pxa_functionality(struct i2c_adapter *adap)
69 { 75 {
70 return I2C_FUNC_SMBUS_EMUL | I2C_FUNC_10BIT_ADDR | 76 return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
71 I2C_FUNC_PROTOCOL_MANGLING;
72 } 77 }
73 78
79I2C_FUNC_SMBUS_EMUL includes all the SMBus transactions (with the
80addition of I2C block transactions) which i2c-core can emulate using
81I2C_FUNC_I2C without any help from the adapter driver. The idea is
82to let the client drivers check for the support of SMBus functions
83without having to care whether the said functions are implemented in
84hardware by the adapter, or emulated in software by i2c-core on top
85of an I2C adapter.
74 86
75 87
76CLIENT CHECKING 88CLIENT CHECKING
@@ -78,36 +90,33 @@ CLIENT CHECKING
78 90
79Before a client tries to attach to an adapter, or even do tests to check 91Before a client tries to attach to an adapter, or even do tests to check
80whether one of the devices it supports is present on an adapter, it should 92whether one of the devices it supports is present on an adapter, it should
81check whether the needed functionality is present. There are two functions 93check whether the needed functionality is present. The typical way to do
82defined which should be used instead of calling the functionality hook 94this is (from the lm75 driver):
83in the algorithm structure directly:
84
85 /* Return the functionality mask */
86 extern u32 i2c_get_functionality (struct i2c_adapter *adap);
87
88 /* Return 1 if adapter supports everything we need, 0 if not. */
89 extern int i2c_check_functionality (struct i2c_adapter *adap, u32 func);
90 95
91This is a typical way to use these functions (from the writing-clients 96 static int lm75_detect(...)
92document):
93 int foo_detect_client(struct i2c_adapter *adapter, int address,
94 unsigned short flags, int kind)
95 { 97 {
96 /* Define needed variables */ 98 (...)
97 99 if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA |
98 /* As the very first action, we check whether the adapter has the 100 I2C_FUNC_SMBUS_WORD_DATA))
99 needed functionality: we need the SMBus read_word_data, 101 goto exit;
100 write_word_data and write_byte functions in this example. */ 102 (...)
101 if (!i2c_check_functionality(adapter,I2C_FUNC_SMBUS_WORD_DATA |
102 I2C_FUNC_SMBUS_WRITE_BYTE))
103 goto ERROR0;
104
105 /* Now we can do the real detection */
106
107 ERROR0:
108 /* Return an error */
109 } 103 }
110 104
105Here, the lm75 driver checks if the adapter can do both SMBus byte data
106and SMBus word data transactions. If not, then the driver won't work on
107this adapter and there's no point in going on. If the check above is
108successful, then the driver knows that it can call the following
109functions: i2c_smbus_read_byte_data(), i2c_smbus_write_byte_data(),
110i2c_smbus_read_word_data() and i2c_smbus_write_word_data(). As a rule of
111thumb, the functionality constants you test for with
112i2c_check_functionality() should match exactly the i2c_smbus_* functions
113which you driver is calling.
114
115Note that the check above doesn't tell whether the functionalities are
116implemented in hardware by the underlying adapter or emulated in
117software by i2c-core. Client drivers don't have to care about this, as
118i2c-core will transparently implement SMBus transactions on top of I2C
119adapters.
111 120
112 121
113CHECKING THROUGH /DEV 122CHECKING THROUGH /DEV
@@ -116,19 +125,19 @@ CHECKING THROUGH /DEV
116If you try to access an adapter from a userspace program, you will have 125If you try to access an adapter from a userspace program, you will have
117to use the /dev interface. You will still have to check whether the 126to use the /dev interface. You will still have to check whether the
118functionality you need is supported, of course. This is done using 127functionality you need is supported, of course. This is done using
119the I2C_FUNCS ioctl. An example, adapted from the lm_sensors i2cdetect 128the I2C_FUNCS ioctl. An example, adapted from the i2cdetect program, is
120program, is below: 129below:
121 130
122 int file; 131 int file;
123 if (file = open("/dev/i2c-0",O_RDWR) < 0) { 132 if (file = open("/dev/i2c-0", O_RDWR) < 0) {
124 /* Some kind of error handling */ 133 /* Some kind of error handling */
125 exit(1); 134 exit(1);
126 } 135 }
127 if (ioctl(file,I2C_FUNCS,&funcs) < 0) { 136 if (ioctl(file, I2C_FUNCS, &funcs) < 0) {
128 /* Some kind of error handling */ 137 /* Some kind of error handling */
129 exit(1); 138 exit(1);
130 } 139 }
131 if (! (funcs & I2C_FUNC_SMBUS_QUICK)) { 140 if (!(funcs & I2C_FUNC_SMBUS_QUICK)) {
132 /* Oops, the needed functionality (SMBus write_quick function) is 141 /* Oops, the needed functionality (SMBus write_quick function) is
133 not available! */ 142 not available! */
134 exit(1); 143 exit(1);
diff --git a/Documentation/i2c/smbus-protocol b/Documentation/i2c/smbus-protocol
index 8a653c60d25a..03f08fb491cc 100644
--- a/Documentation/i2c/smbus-protocol
+++ b/Documentation/i2c/smbus-protocol
@@ -1,5 +1,6 @@
1SMBus Protocol Summary 1SMBus Protocol Summary
2====================== 2======================
3
3The following is a summary of the SMBus protocol. It applies to 4The following is a summary of the SMBus protocol. It applies to
4all revisions of the protocol (1.0, 1.1, and 2.0). 5all revisions of the protocol (1.0, 1.1, and 2.0).
5Certain protocol features which are not supported by 6Certain protocol features which are not supported by
@@ -8,6 +9,7 @@ this package are briefly described at the end of this document.
8Some adapters understand only the SMBus (System Management Bus) protocol, 9Some adapters understand only the SMBus (System Management Bus) protocol,
9which is a subset from the I2C protocol. Fortunately, many devices use 10which is a subset from the I2C protocol. Fortunately, many devices use
10only the same subset, which makes it possible to put them on an SMBus. 11only the same subset, which makes it possible to put them on an SMBus.
12
11If you write a driver for some I2C device, please try to use the SMBus 13If you write a driver for some I2C device, please try to use the SMBus
12commands if at all possible (if the device uses only that subset of the 14commands if at all possible (if the device uses only that subset of the
13I2C protocol). This makes it possible to use the device driver on both 15I2C protocol). This makes it possible to use the device driver on both
@@ -15,7 +17,12 @@ SMBus adapters and I2C adapters (the SMBus command set is automatically
15translated to I2C on I2C adapters, but plain I2C commands can not be 17translated to I2C on I2C adapters, but plain I2C commands can not be
16handled at all on most pure SMBus adapters). 18handled at all on most pure SMBus adapters).
17 19
18Below is a list of SMBus commands. 20Below is a list of SMBus protocol operations, and the functions executing
21them. Note that the names used in the SMBus protocol specifications usually
22don't match these function names. For some of the operations which pass a
23single data byte, the functions using SMBus protocol operation names execute
24a different protocol operation entirely.
25
19 26
20Key to symbols 27Key to symbols
21============== 28==============
@@ -35,17 +42,16 @@ Count (8 bits): A data byte containing the length of a block operation.
35[..]: Data sent by I2C device, as opposed to data sent by the host adapter. 42[..]: Data sent by I2C device, as opposed to data sent by the host adapter.
36 43
37 44
38SMBus Write Quick 45SMBus Quick Command: i2c_smbus_write_quick()
39================= 46=============================================
40 47
41This sends a single bit to the device, at the place of the Rd/Wr bit. 48This sends a single bit to the device, at the place of the Rd/Wr bit.
42There is no equivalent Read Quick command.
43 49
44A Addr Rd/Wr [A] P 50A Addr Rd/Wr [A] P
45 51
46 52
47SMBus Read Byte 53SMBus Receive Byte: i2c_smbus_read_byte()
48=============== 54==========================================
49 55
50This reads a single byte from a device, without specifying a device 56This reads a single byte from a device, without specifying a device
51register. Some devices are so simple that this interface is enough; for 57register. Some devices are so simple that this interface is enough; for
@@ -55,17 +61,17 @@ the previous SMBus command.
55S Addr Rd [A] [Data] NA P 61S Addr Rd [A] [Data] NA P
56 62
57 63
58SMBus Write Byte 64SMBus Send Byte: i2c_smbus_write_byte()
59================ 65========================================
60 66
61This is the reverse of Read Byte: it sends a single byte to a device. 67This operation is the reverse of Receive Byte: it sends a single byte
62See Read Byte for more information. 68to a device. See Receive Byte for more information.
63 69
64S Addr Wr [A] Data [A] P 70S Addr Wr [A] Data [A] P
65 71
66 72
67SMBus Read Byte Data 73SMBus Read Byte: i2c_smbus_read_byte_data()
68==================== 74============================================
69 75
70This reads a single byte from a device, from a designated register. 76This reads a single byte from a device, from a designated register.
71The register is specified through the Comm byte. 77The register is specified through the Comm byte.
@@ -73,30 +79,30 @@ The register is specified through the Comm byte.
73S Addr Wr [A] Comm [A] S Addr Rd [A] [Data] NA P 79S Addr Wr [A] Comm [A] S Addr Rd [A] [Data] NA P
74 80
75 81
76SMBus Read Word Data 82SMBus Read Word: i2c_smbus_read_word_data()
77==================== 83============================================
78 84
79This command is very like Read Byte Data; again, data is read from a 85This operation is very like Read Byte; again, data is read from a
80device, from a designated register that is specified through the Comm 86device, from a designated register that is specified through the Comm
81byte. But this time, the data is a complete word (16 bits). 87byte. But this time, the data is a complete word (16 bits).
82 88
83S Addr Wr [A] Comm [A] S Addr Rd [A] [DataLow] A [DataHigh] NA P 89S Addr Wr [A] Comm [A] S Addr Rd [A] [DataLow] A [DataHigh] NA P
84 90
85 91
86SMBus Write Byte Data 92SMBus Write Byte: i2c_smbus_write_byte_data()
87===================== 93==============================================
88 94
89This writes a single byte to a device, to a designated register. The 95This writes a single byte to a device, to a designated register. The
90register is specified through the Comm byte. This is the opposite of 96register is specified through the Comm byte. This is the opposite of
91the Read Byte Data command. 97the Read Byte operation.
92 98
93S Addr Wr [A] Comm [A] Data [A] P 99S Addr Wr [A] Comm [A] Data [A] P
94 100
95 101
96SMBus Write Word Data 102SMBus Write Word: i2c_smbus_write_word_data()
97===================== 103==============================================
98 104
99This is the opposite operation of the Read Word Data command. 16 bits 105This is the opposite of the Read Word operation. 16 bits
100of data is written to a device, to the designated register that is 106of data is written to a device, to the designated register that is
101specified through the Comm byte. 107specified through the Comm byte.
102 108
@@ -113,8 +119,8 @@ S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A]
113 S Addr Rd [A] [DataLow] A [DataHigh] NA P 119 S Addr Rd [A] [DataLow] A [DataHigh] NA P
114 120
115 121
116SMBus Block Read 122SMBus Block Read: i2c_smbus_read_block_data()
117================ 123==============================================
118 124
119This command reads a block of up to 32 bytes from a device, from a 125This command reads a block of up to 32 bytes from a device, from a
120designated register that is specified through the Comm byte. The amount 126designated register that is specified through the Comm byte. The amount
@@ -124,8 +130,8 @@ S Addr Wr [A] Comm [A]
124 S Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P 130 S Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P
125 131
126 132
127SMBus Block Write 133SMBus Block Write: i2c_smbus_write_block_data()
128================= 134================================================
129 135
130The opposite of the Block Read command, this writes up to 32 bytes to 136The opposite of the Block Read command, this writes up to 32 bytes to
131a device, to a designated register that is specified through the 137a device, to a designated register that is specified through the
@@ -134,10 +140,11 @@ Comm byte. The amount of data is specified in the Count byte.
134S Addr Wr [A] Comm [A] Count [A] Data [A] Data [A] ... [A] Data [A] P 140S Addr Wr [A] Comm [A] Count [A] Data [A] Data [A] ... [A] Data [A] P
135 141
136 142
137SMBus Block Process Call 143SMBus Block Write - Block Read Process Call
138======================== 144===========================================
139 145
140SMBus Block Process Call was introduced in Revision 2.0 of the specification. 146SMBus Block Write - Block Read Process Call was introduced in
147Revision 2.0 of the specification.
141 148
142This command selects a device register (through the Comm byte), sends 149This command selects a device register (through the Comm byte), sends
1431 to 31 bytes of data to it, and reads 1 to 31 bytes of data in return. 1501 to 31 bytes of data to it, and reads 1 to 31 bytes of data in return.
@@ -159,13 +166,16 @@ alerting device's address.
159 166
160Packet Error Checking (PEC) 167Packet Error Checking (PEC)
161=========================== 168===========================
169
162Packet Error Checking was introduced in Revision 1.1 of the specification. 170Packet Error Checking was introduced in Revision 1.1 of the specification.
163 171
164PEC adds a CRC-8 error-checking byte to all transfers. 172PEC adds a CRC-8 error-checking byte to transfers using it, immediately
173before the terminating STOP.
165 174
166 175
167Address Resolution Protocol (ARP) 176Address Resolution Protocol (ARP)
168================================= 177=================================
178
169The Address Resolution Protocol was introduced in Revision 2.0 of 179The Address Resolution Protocol was introduced in Revision 2.0 of
170the specification. It is a higher-layer protocol which uses the 180the specification. It is a higher-layer protocol which uses the
171messages above. 181messages above.
@@ -177,14 +187,17 @@ require PEC checksums.
177 187
178I2C Block Transactions 188I2C Block Transactions
179====================== 189======================
190
180The following I2C block transactions are supported by the 191The following I2C block transactions are supported by the
181SMBus layer and are described here for completeness. 192SMBus layer and are described here for completeness.
193They are *NOT* defined by the SMBus specification.
194
182I2C block transactions do not limit the number of bytes transferred 195I2C block transactions do not limit the number of bytes transferred
183but the SMBus layer places a limit of 32 bytes. 196but the SMBus layer places a limit of 32 bytes.
184 197
185 198
186I2C Block Read 199I2C Block Read: i2c_smbus_read_i2c_block_data()
187============== 200================================================
188 201
189This command reads a block of bytes from a device, from a 202This command reads a block of bytes from a device, from a
190designated register that is specified through the Comm byte. 203designated register that is specified through the Comm byte.
@@ -203,8 +216,8 @@ S Addr Wr [A] Comm1 [A] Comm2 [A]
203 S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P 216 S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
204 217
205 218
206I2C Block Write 219I2C Block Write: i2c_smbus_write_i2c_block_data()
207=============== 220==================================================
208 221
209The opposite of the Block Read command, this writes bytes to 222The opposite of the Block Read command, this writes bytes to
210a device, to a designated register that is specified through the 223a device, to a designated register that is specified through the
@@ -212,5 +225,3 @@ Comm byte. Note that command lengths of 0, 2, or more bytes are
212supported as they are indistinguishable from data. 225supported as they are indistinguishable from data.
213 226
214S Addr Wr [A] Comm [A] Data [A] Data [A] ... [A] Data [A] P 227S Addr Wr [A] Comm [A] Data [A] Data [A] ... [A] Data [A] P
215
216
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index bfb0a5520817..d4cd4126d1ad 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -25,12 +25,23 @@ routines, and should be zero-initialized except for fields with data you
25provide. A client structure holds device-specific information like the 25provide. A client structure holds device-specific information like the
26driver model device node, and its I2C address. 26driver model device node, and its I2C address.
27 27
28/* iff driver uses driver model ("new style") binding model: */
29
30static struct i2c_device_id foo_idtable[] = {
31 { "foo", my_id_for_foo },
32 { "bar", my_id_for_bar },
33 { }
34};
35
36MODULE_DEVICE_TABLE(i2c, foo_idtable);
37
28static struct i2c_driver foo_driver = { 38static struct i2c_driver foo_driver = {
29 .driver = { 39 .driver = {
30 .name = "foo", 40 .name = "foo",
31 }, 41 },
32 42
33 /* iff driver uses driver model ("new style") binding model: */ 43 /* iff driver uses driver model ("new style") binding model: */
44 .id_table = foo_ids,
34 .probe = foo_probe, 45 .probe = foo_probe,
35 .remove = foo_remove, 46 .remove = foo_remove,
36 47
@@ -164,7 +175,8 @@ I2C device drivers using this binding model work just like any other
164kind of driver in Linux: they provide a probe() method to bind to 175kind of driver in Linux: they provide a probe() method to bind to
165those devices, and a remove() method to unbind. 176those devices, and a remove() method to unbind.
166 177
167 static int foo_probe(struct i2c_client *client); 178 static int foo_probe(struct i2c_client *client,
179 const struct i2c_device_id *id);
168 static int foo_remove(struct i2c_client *client); 180 static int foo_remove(struct i2c_client *client);
169 181
170Remember that the i2c_driver does not create those client handles. The 182Remember that the i2c_driver does not create those client handles. The
@@ -172,10 +184,9 @@ handle may be used during foo_probe(). If foo_probe() reports success
172(zero not a negative status code) it may save the handle and use it until 184(zero not a negative status code) it may save the handle and use it until
173foo_remove() returns. That binding model is used by most Linux drivers. 185foo_remove() returns. That binding model is used by most Linux drivers.
174 186
175Drivers match devices when i2c_client.driver_name and the driver name are 187The probe function is called when an entry in the id_table name field
176the same; this approach is used in several other busses that don't have 188matches the device's name. It is passed the entry that was matched so
177device typing support in the hardware. The driver and module name should 189the driver knows which one in the table matched.
178match, so hotplug/coldplug mechanisms will modprobe the driver.
179 190
180 191
181Device Creation (Standard driver model) 192Device Creation (Standard driver model)
diff --git a/Documentation/i386/IO-APIC.txt b/Documentation/i386/IO-APIC.txt
index f95166645d29..30b4c714fbe1 100644
--- a/Documentation/i386/IO-APIC.txt
+++ b/Documentation/i386/IO-APIC.txt
@@ -70,7 +70,7 @@ Every PCI card emits a PCI IRQ, which can be INTA, INTB, INTC or INTD:
70 70
71These INTA-D PCI IRQs are always 'local to the card', their real meaning 71These INTA-D PCI IRQs are always 'local to the card', their real meaning
72depends on which slot they are in. If you look at the daisy chaining diagram, 72depends on which slot they are in. If you look at the daisy chaining diagram,
73a card in slot4, issuing INTA IRQ, it will end up as a signal on PIRQ2 of 73a card in slot4, issuing INTA IRQ, it will end up as a signal on PIRQ4 of
74the PCI chipset. Most cards issue INTA, this creates optimal distribution 74the PCI chipset. Most cards issue INTA, this creates optimal distribution
75between the PIRQ lines. (distributing IRQ sources properly is not a 75between the PIRQ lines. (distributing IRQ sources properly is not a
76necessity, PCI IRQs can be shared at will, but it's a good for performance 76necessity, PCI IRQs can be shared at will, but it's a good for performance
diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt
index fc49b79bc1ab..95ad15c3b01f 100644
--- a/Documentation/i386/boot.txt
+++ b/Documentation/i386/boot.txt
@@ -40,8 +40,18 @@ Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable.
40 Introduce relocatable_kernel and kernel_alignment fields. 40 Introduce relocatable_kernel and kernel_alignment fields.
41 41
42Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of 42Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of
43 the boot command line 43 the boot command line.
44 44
45Protocol 2.07: (Kernel 2.6.24) Added paravirtualised boot protocol.
46 Introduced hardware_subarch and hardware_subarch_data
47 and KEEP_SEGMENTS flag in load_flags.
48
49Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format
50 payload. Introduced payload_offset and payload length
51 fields to aid in locating the payload.
52
53Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical
54 pointer to single linked list of struct setup_data.
45 55
46**** MEMORY LAYOUT 56**** MEMORY LAYOUT
47 57
@@ -170,6 +180,10 @@ Offset Proto Name Meaning
1700238/4 2.06+ cmdline_size Maximum size of the kernel command line 1800238/4 2.06+ cmdline_size Maximum size of the kernel command line
171023C/4 2.07+ hardware_subarch Hardware subarchitecture 181023C/4 2.07+ hardware_subarch Hardware subarchitecture
1720240/8 2.07+ hardware_subarch_data Subarchitecture-specific data 1820240/8 2.07+ hardware_subarch_data Subarchitecture-specific data
1830248/4 2.08+ payload_offset Offset of kernel payload
184024C/4 2.08+ payload_length Length of kernel payload
1850250/8 2.09+ setup_data 64-bit physical pointer to linked list
186 of struct setup_data
173 187
174(1) For backwards compatibility, if the setup_sects field contains 0, the 188(1) For backwards compatibility, if the setup_sects field contains 0, the
175 real value is 4. 189 real value is 4.
@@ -512,6 +526,32 @@ Protocol: 2.07+
512 526
513 A pointer to data that is specific to hardware subarch 527 A pointer to data that is specific to hardware subarch
514 528
529Field name: payload_offset
530Type: read
531Offset/size: 0x248/4
532Protocol: 2.08+
533
534 If non-zero then this field contains the offset from the end of the
535 real-mode code to the payload.
536
537 The payload may be compressed. The format of both the compressed and
538 uncompressed data should be determined using the standard magic
539 numbers. Currently only gzip compressed ELF is used.
540
541Field name: payload_length
542Type: read
543Offset/size: 0x24c/4
544Protocol: 2.08+
545
546 The length of the payload.
547
548**** THE IMAGE CHECKSUM
549
550From boot protocol version 2.08 onwards the CRC-32 is calculated over
551the entire file using the characteristic polynomial 0x04C11DB7 and an
552initial remainder of 0xffffffff. The checksum is appended to the
553file; therefore the CRC of the file up to the limit specified in the
554syssize field of the header is always 0.
515 555
516**** THE KERNEL COMMAND LINE 556**** THE KERNEL COMMAND LINE
517 557
@@ -544,6 +584,28 @@ command line is entered using the following protocol:
544 covered by setup_move_size, so you may need to adjust this 584 covered by setup_move_size, so you may need to adjust this
545 field. 585 field.
546 586
587Field name: setup_data
588Type: write (obligatory)
589Offset/size: 0x250/8
590Protocol: 2.09+
591
592 The 64-bit physical pointer to NULL terminated single linked list of
593 struct setup_data. This is used to define a more extensible boot
594 parameters passing mechanism. The definition of struct setup_data is
595 as follow:
596
597 struct setup_data {
598 u64 next;
599 u32 type;
600 u32 len;
601 u8 data[0];
602 };
603
604 Where, the next is a 64-bit physical pointer to the next node of
605 linked list, the next field of the last node is 0; the type is used
606 to identify the contents of data; the len is the length of data
607 field; the data holds the real payload.
608
547 609
548**** MEMORY LAYOUT OF THE REAL-MODE CODE 610**** MEMORY LAYOUT OF THE REAL-MODE CODE
549 611
diff --git a/Documentation/ia64/kvm.txt b/Documentation/ia64/kvm.txt
new file mode 100644
index 000000000000..bec9d815da33
--- /dev/null
+++ b/Documentation/ia64/kvm.txt
@@ -0,0 +1,82 @@
1Currently, kvm module in EXPERIMENTAL stage on IA64. This means that
2interfaces are not stable enough to use. So, plase had better don't run
3critical applications in virtual machine. We will try our best to make it
4strong in future versions!
5 Guide: How to boot up guests on kvm/ia64
6
7This guide is to describe how to enable kvm support for IA-64 systems.
8
91. Get the kvm source from git.kernel.org.
10 Userspace source:
11 git clone git://git.kernel.org/pub/scm/virt/kvm/kvm-userspace.git
12 Kernel Source:
13 git clone git://git.kernel.org/pub/scm/linux/kernel/git/xiantao/kvm-ia64.git
14
152. Compile the source code.
16 2.1 Compile userspace code:
17 (1)cd ./kvm-userspace
18 (2)./configure
19 (3)cd kernel
20 (4)make sync LINUX= $kernel_dir (kernel_dir is the directory of kernel source.)
21 (5)cd ..
22 (6)make qemu
23 (7)cd qemu; make install
24
25 2.2 Compile kernel source code:
26 (1) cd ./$kernel_dir
27 (2) Make menuconfig
28 (3) Enter into virtualization option, and choose kvm.
29 (4) make
30 (5) Once (4) done, make modules_install
31 (6) Make initrd, and use new kernel to reboot up host machine.
32 (7) Once (6) done, cd $kernel_dir/arch/ia64/kvm
33 (8) insmod kvm.ko; insmod kvm-intel.ko
34
35Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qemu, otherwise, may fail.
36
373. Get Guest Firmware named as Flash.fd, and put it under right place:
38 (1) If you have the guest firmware (binary) released by Intel Corp for Xen, use it directly.
39
40 (2) If you have no firmware at hand, Please download its source from
41 hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg
42 you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries.
43
44 (3) Rename the firware you owned to Flash.fd, and copy it to /usr/local/share/qemu
45
464. Boot up Linux or Windows guests:
47 4.1 Create or install a image for guest boot. If you have xen experience, it should be easy.
48
49 4.2 Boot up guests use the following command.
50 /usr/local/bin/qemu-system-ia64 -smp xx -m 512 -hda $your_image
51 (xx is the number of virtual processors for the guest, now the maximum value is 4)
52
535. Known possibile issue on some platforms with old Firmware.
54
55If meet strange host crashe issues, try to solve it through either of the following ways:
56
57(1): Upgrade your Firmware to the latest one.
58
59(2): Applying the below patch to kernel source.
60diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
61index 0b53344..f02b0f7 100644
62--- a/arch/ia64/kernel/pal.S
63+++ b/arch/ia64/kernel/pal.S
64@@ -84,7 +84,8 @@ GLOBAL_ENTRY(ia64_pal_call_static)
65 mov ar.pfs = loc1
66 mov rp = loc0
67 ;;
68- srlz.d // seralize restoration of psr.l
69+ srlz.i // seralize restoration of psr.l
70+ ;;
71 br.ret.sptk.many b0
72 END(ia64_pal_call_static)
73
746. Bug report:
75 If you found any issues when use kvm/ia64, Please post the bug info to kvm-ia64-devel mailing list.
76 https://lists.sourceforge.net/lists/listinfo/kvm-ia64-devel/
77
78Thanks for your interest! Let's work together, and make kvm/ia64 stronger and stronger!
79
80
81 Xiantao Zhang <xiantao.zhang@intel.com>
82 2008.3.10
diff --git a/Documentation/ide/ide-tape.txt b/Documentation/ide/ide-tape.txt
index 658f271a373f..3f348a0b21d8 100644
--- a/Documentation/ide/ide-tape.txt
+++ b/Documentation/ide/ide-tape.txt
@@ -1,146 +1,65 @@
1/* 1IDE ATAPI streaming tape driver.
2 * IDE ATAPI streaming tape driver. 2
3 * 3This driver is a part of the Linux ide driver.
4 * This driver is a part of the Linux ide driver. 4
5 * 5The driver, in co-operation with ide.c, basically traverses the
6 * The driver, in co-operation with ide.c, basically traverses the 6request-list for the block device interface. The character device
7 * request-list for the block device interface. The character device 7interface, on the other hand, creates new requests, adds them
8 * interface, on the other hand, creates new requests, adds them 8to the request-list of the block device, and waits for their completion.
9 * to the request-list of the block device, and waits for their completion. 9
10 * 10The block device major and minor numbers are determined from the
11 * Pipelined operation mode is now supported on both reads and writes. 11tape's relative position in the ide interfaces, as explained in ide.c.
12 * 12
13 * The block device major and minor numbers are determined from the 13The character device interface consists of the following devices:
14 * tape's relative position in the ide interfaces, as explained in ide.c. 14
15 * 15ht0 major 37, minor 0 first IDE tape, rewind on close.
16 * The character device interface consists of the following devices: 16ht1 major 37, minor 1 second IDE tape, rewind on close.
17 * 17...
18 * ht0 major 37, minor 0 first IDE tape, rewind on close. 18nht0 major 37, minor 128 first IDE tape, no rewind on close.
19 * ht1 major 37, minor 1 second IDE tape, rewind on close. 19nht1 major 37, minor 129 second IDE tape, no rewind on close.
20 * ... 20...
21 * nht0 major 37, minor 128 first IDE tape, no rewind on close. 21
22 * nht1 major 37, minor 129 second IDE tape, no rewind on close. 22The general magnetic tape commands compatible interface, as defined by
23 * ... 23include/linux/mtio.h, is accessible through the character device.
24 * 24
25 * The general magnetic tape commands compatible interface, as defined by 25General ide driver configuration options, such as the interrupt-unmask
26 * include/linux/mtio.h, is accessible through the character device. 26flag, can be configured by issuing an ioctl to the block device interface,
27 * 27as any other ide device.
28 * General ide driver configuration options, such as the interrupt-unmask 28
29 * flag, can be configured by issuing an ioctl to the block device interface, 29Our own ide-tape ioctl's can be issued to either the block device or
30 * as any other ide device. 30the character device interface.
31 * 31
32 * Our own ide-tape ioctl's can be issued to either the block device or 32Maximal throughput with minimal bus load will usually be achieved in the
33 * the character device interface. 33following scenario:
34 * 34
35 * Maximal throughput with minimal bus load will usually be achieved in the 35 1. ide-tape is operating in the pipelined operation mode.
36 * following scenario: 36 2. No buffering is performed by the user backup program.
37 * 37
38 * 1. ide-tape is operating in the pipelined operation mode. 38Testing was done with a 2 GB CONNER CTMA 4000 IDE ATAPI Streaming Tape Drive.
39 * 2. No buffering is performed by the user backup program. 39
40 * 40Here are some words from the first releases of hd.c, which are quoted
41 * Testing was done with a 2 GB CONNER CTMA 4000 IDE ATAPI Streaming Tape Drive. 41in ide.c and apply here as well:
42 * 42
43 * Here are some words from the first releases of hd.c, which are quoted 43| Special care is recommended. Have Fun!
44 * in ide.c and apply here as well: 44
45 * 45Possible improvements:
46 * | Special care is recommended. Have Fun! 46
47 * 471. Support for the ATAPI overlap protocol.
48 * 48
49 * An overview of the pipelined operation mode. 49In order to maximize bus throughput, we currently use the DSC
50 * 50overlap method which enables ide.c to service requests from the
51 * In the pipelined write mode, we will usually just add requests to our 51other device while the tape is busy executing a command. The
52 * pipeline and return immediately, before we even start to service them. The 52DSC overlap method involves polling the tape's status register
53 * user program will then have enough time to prepare the next request while 53for the DSC bit, and servicing the other device while the tape
54 * we are still busy servicing previous requests. In the pipelined read mode, 54isn't ready.
55 * the situation is similar - we add read-ahead requests into the pipeline, 55
56 * before the user even requested them. 56In the current QIC development standard (December 1995),
57 * 57it is recommended that new tape drives will *in addition*
58 * The pipeline can be viewed as a "safety net" which will be activated when 58implement the ATAPI overlap protocol, which is used for the
59 * the system load is high and prevents the user backup program from keeping up 59same purpose - efficient use of the IDE bus, but is interrupt
60 * with the current tape speed. At this point, the pipeline will get 60driven and thus has much less CPU overhead.
61 * shorter and shorter but the tape will still be streaming at the same speed. 61
62 * Assuming we have enough pipeline stages, the system load will hopefully 62ATAPI overlap is likely to be supported in most new ATAPI
63 * decrease before the pipeline is completely empty, and the backup program 63devices, including new ATAPI cdroms, and thus provides us
64 * will be able to "catch up" and refill the pipeline again. 64a method by which we can achieve higher throughput when
65 * 65sharing a (fast) ATA-2 disk with any (slow) new ATAPI device.
66 * When using the pipelined mode, it would be best to disable any type of
67 * buffering done by the user program, as ide-tape already provides all the
68 * benefits in the kernel, where it can be done in a more efficient way.
69 * As we will usually not block the user program on a request, the most
70 * efficient user code will then be a simple read-write-read-... cycle.
71 * Any additional logic will usually just slow down the backup process.
72 *
73 * Using the pipelined mode, I get a constant over 400 KBps throughput,
74 * which seems to be the maximum throughput supported by my tape.
75 *
76 * However, there are some downfalls:
77 *
78 * 1. We use memory (for data buffers) in proportional to the number
79 * of pipeline stages (each stage is about 26 KB with my tape).
80 * 2. In the pipelined write mode, we cheat and postpone error codes
81 * to the user task. In read mode, the actual tape position
82 * will be a bit further than the last requested block.
83 *
84 * Concerning (1):
85 *
86 * 1. We allocate stages dynamically only when we need them. When
87 * we don't need them, we don't consume additional memory. In
88 * case we can't allocate stages, we just manage without them
89 * (at the expense of decreased throughput) so when Linux is
90 * tight in memory, we will not pose additional difficulties.
91 *
92 * 2. The maximum number of stages (which is, in fact, the maximum
93 * amount of memory) which we allocate is limited by the compile
94 * time parameter IDETAPE_MAX_PIPELINE_STAGES.
95 *
96 * 3. The maximum number of stages is a controlled parameter - We
97 * don't start from the user defined maximum number of stages
98 * but from the lower IDETAPE_MIN_PIPELINE_STAGES (again, we
99 * will not even allocate this amount of stages if the user
100 * program can't handle the speed). We then implement a feedback
101 * loop which checks if the pipeline is empty, and if it is, we
102 * increase the maximum number of stages as necessary until we
103 * reach the optimum value which just manages to keep the tape
104 * busy with minimum allocated memory or until we reach
105 * IDETAPE_MAX_PIPELINE_STAGES.
106 *
107 * Concerning (2):
108 *
109 * In pipelined write mode, ide-tape can not return accurate error codes
110 * to the user program since we usually just add the request to the
111 * pipeline without waiting for it to be serviced. In case an error
112 * occurs, I will report it on the next user request.
113 *
114 * In the pipelined read mode, subsequent read requests or forward
115 * filemark spacing will perform correctly, as we preserve all blocks
116 * and filemarks which we encountered during our excess read-ahead.
117 *
118 * For accurate tape positioning and error reporting, disabling
119 * pipelined mode might be the best option.
120 *
121 * You can enable/disable/tune the pipelined operation mode by adjusting
122 * the compile time parameters below.
123 *
124 *
125 * Possible improvements.
126 *
127 * 1. Support for the ATAPI overlap protocol.
128 *
129 * In order to maximize bus throughput, we currently use the DSC
130 * overlap method which enables ide.c to service requests from the
131 * other device while the tape is busy executing a command. The
132 * DSC overlap method involves polling the tape's status register
133 * for the DSC bit, and servicing the other device while the tape
134 * isn't ready.
135 *
136 * In the current QIC development standard (December 1995),
137 * it is recommended that new tape drives will *in addition*
138 * implement the ATAPI overlap protocol, which is used for the
139 * same purpose - efficient use of the IDE bus, but is interrupt
140 * driven and thus has much less CPU overhead.
141 *
142 * ATAPI overlap is likely to be supported in most new ATAPI
143 * devices, including new ATAPI cdroms, and thus provides us
144 * a method by which we can achieve higher throughput when
145 * sharing a (fast) ATA-2 disk with any (slow) new ATAPI device.
146 */
diff --git a/Documentation/ide/ide.txt b/Documentation/ide/ide.txt
index 818676aad45a..0c78f4b1d9d9 100644
--- a/Documentation/ide/ide.txt
+++ b/Documentation/ide/ide.txt
@@ -71,29 +71,6 @@ This driver automatically probes for most IDE interfaces (including all PCI
71ones), for the drives/geometries attached to those interfaces, and for the IRQ 71ones), for the drives/geometries attached to those interfaces, and for the IRQ
72lines being used by the interfaces (normally 14, 15 for ide0/ide1). 72lines being used by the interfaces (normally 14, 15 for ide0/ide1).
73 73
74For special cases, interfaces may be specified using kernel "command line"
75options. For example,
76
77 ide3=0x168,0x36e,10 /* ioports 0x168-0x16f,0x36e, irq 10 */
78
79Normally the irq number need not be specified, as ide.c will probe for it:
80
81 ide3=0x168,0x36e /* ioports 0x168-0x16f,0x36e */
82
83The standard port, and irq values are these:
84
85 ide0=0x1f0,0x3f6,14
86 ide1=0x170,0x376,15
87 ide2=0x1e8,0x3ee,11
88 ide3=0x168,0x36e,10
89
90Note that the first parameter reserves 8 contiguous ioports, whereas the
91second value denotes a single ioport. If in doubt, do a 'cat /proc/ioports'.
92
93In all probability the device uses these ports and IRQs if it is attached
94to the appropriate ide channel. Pass the parameter for the correct ide
95channel to the kernel, as explained above.
96
97Any number of interfaces may share a single IRQ if necessary, at a slight 74Any number of interfaces may share a single IRQ if necessary, at a slight
98performance penalty, whether on separate cards or a single VLB card. 75performance penalty, whether on separate cards or a single VLB card.
99The IDE driver automatically detects and handles this. However, this may 76The IDE driver automatically detects and handles this. However, this may
@@ -105,27 +82,26 @@ Drives are normally found by auto-probing and/or examining the CMOS/BIOS data.
105For really weird situations, the apparent (fdisk) geometry can also be specified 82For really weird situations, the apparent (fdisk) geometry can also be specified
106on the kernel "command line" using LILO. The format of such lines is: 83on the kernel "command line" using LILO. The format of such lines is:
107 84
108 hdx=cyls,heads,sects 85 ide_core.chs=[interface_number.device_number]:cyls,heads,sects
109or hdx=cdrom 86or ide_core.cdrom=[interface_number.device_number]
110 87
111where hdx can be any of hda through hdh, Three values are required 88For example:
112(cyls,heads,sects). For example:
113 89
114 hdc=1050,32,64 hdd=cdrom 90 ide_core.chs=1.0:1050,32,64 ide_core.cdrom=1.1
115 91
116either {hda,hdb} or {hdc,hdd}. The results of successful auto-probing may 92The results of successful auto-probing may override the physical geometry/irq
117override the physical geometry/irq specified, though the "original" geometry 93specified, though the "original" geometry may be retained as the "logical"
118may be retained as the "logical" geometry for partitioning purposes (fdisk). 94geometry for partitioning purposes (fdisk).
119 95
120If the auto-probing during boot time confuses a drive (ie. the drive works 96If the auto-probing during boot time confuses a drive (ie. the drive works
121with hd.c but not with ide.c), then an command line option may be specified 97with hd.c but not with ide.c), then an command line option may be specified
122for each drive for which you'd like the drive to skip the hardware 98for each drive for which you'd like the drive to skip the hardware
123probe/identification sequence. For example: 99probe/identification sequence. For example:
124 100
125 hdb=noprobe 101 ide_core.noprobe=0.1
126or 102or
127 hdc=768,16,32 103 ide_core.chs=1.0:768,16,32
128 hdc=noprobe 104 ide_core.noprobe=1.0
129 105
130Note that when only one IDE device is attached to an interface, it should be 106Note that when only one IDE device is attached to an interface, it should be
131jumpered as "single" or "master", *not* "slave". Many folks have had 107jumpered as "single" or "master", *not* "slave". Many folks have had
@@ -141,9 +117,9 @@ If for some reason your cdrom drive is *not* found at boot time, you can force
141the probe to look harder by supplying a kernel command line parameter 117the probe to look harder by supplying a kernel command line parameter
142via LILO, such as: 118via LILO, such as:
143 119
144 hdc=cdrom /* hdc = "master" on second interface */ 120 ide_core.cdrom=1.0 /* "master" on second interface (hdc) */
145or 121or
146 hdd=cdrom /* hdd = "slave" on second interface */ 122 ide_core.cdrom=1.1 /* "slave" on second interface (hdd) */
147 123
148For example, a GW2000 system might have a hard drive on the primary 124For example, a GW2000 system might have a hard drive on the primary
149interface (/dev/hda) and an IDE cdrom drive on the secondary interface 125interface (/dev/hda) and an IDE cdrom drive on the secondary interface
@@ -184,13 +160,6 @@ provided it is mounted with the default block size of 1024 (as above).
184Please pass on any feedback on any of this stuff to the maintainer, 160Please pass on any feedback on any of this stuff to the maintainer,
185whose address can be found in linux/MAINTAINERS. 161whose address can be found in linux/MAINTAINERS.
186 162
187Note that if BOTH hd.c and ide.c are configured into the kernel,
188hd.c will normally be allowed to control the primary IDE interface.
189This is useful for older hardware that may be incompatible with ide.c,
190and still allows newer hardware to run on the 2nd/3rd/4th IDE ports
191under control of ide.c. To have ide.c also "take over" the primary
192IDE port in this situation, use the "command line" parameter: ide0=0x1f0
193
194The IDE driver is modularized. The high level disk/CD-ROM/tape/floppy 163The IDE driver is modularized. The high level disk/CD-ROM/tape/floppy
195drivers can always be compiled as loadable modules, the chipset drivers 164drivers can always be compiled as loadable modules, the chipset drivers
196can only be compiled into the kernel, and the core code (ide.c) can be 165can only be compiled into the kernel, and the core code (ide.c) can be
@@ -204,9 +173,7 @@ to /etc/modprobe.conf.
204 173
205When ide.c is used as a module, you can pass command line parameters to the 174When ide.c is used as a module, you can pass command line parameters to the
206driver using the "options=" keyword to insmod, while replacing any ',' with 175driver using the "options=" keyword to insmod, while replacing any ',' with
207';'. For example: 176';'.
208
209 insmod ide.o options="ide0=serialize ide1=serialize ide2=0x1e8;0x3ee;11"
210 177
211 178
212================================================================================ 179================================================================================
@@ -214,81 +181,48 @@ driver using the "options=" keyword to insmod, while replacing any ',' with
214Summary of ide driver parameters for kernel command line 181Summary of ide driver parameters for kernel command line
215-------------------------------------------------------- 182--------------------------------------------------------
216 183
217 "hdx=" is recognized for all "x" from "a" to "u", such as "hdc". 184For legacy IDE VLB host drivers (ali14xx/dtc2278/ht6560b/qd65xx/umc8672)
218 185you need to explicitly enable probing by using "probe" kernel parameter,
219 "idex=" is recognized for all "x" from "0" to "9", such as "ide1". 186i.e. to enable probing for ALI M14xx chipsets (ali14xx host driver) use:
220
221 "hdx=noprobe" : drive may be present, but do not probe for it
222
223 "hdx=none" : drive is NOT present, ignore cmos and do not probe
224
225 "hdx=nowerr" : ignore the WRERR_STAT bit on this drive
226
227 "hdx=cdrom" : drive is present, and is a cdrom drive
228
229 "hdx=cyl,head,sect" : disk drive is present, with specified geometry
230
231 "hdx=autotune" : driver will attempt to tune interface speed
232 to the fastest PIO mode supported,
233 if possible for this drive only.
234 Not fully supported by all chipset types,
235 and quite likely to cause trouble with
236 older/odd IDE drives.
237
238 "hdx=nodma" : disallow DMA
239
240 "idebus=xx" : inform IDE driver of VESA/PCI bus speed in MHz,
241 where "xx" is between 20 and 66 inclusive,
242 used when tuning chipset PIO modes.
243 For PCI bus, 25 is correct for a P75 system,
244 30 is correct for P90,P120,P180 systems,
245 and 33 is used for P100,P133,P166 systems.
246 If in doubt, use idebus=33 for PCI.
247 As for VLB, it is safest to not specify it.
248 Bigger values are safer than smaller ones.
249 187
250 "idex=base" : probe for an interface at the addr specified, 188* "ali14xx.probe" boot option when ali14xx driver is built-in the kernel
251 where "base" is usually 0x1f0 or 0x170
252 and "ctl" is assumed to be "base"+0x206
253 189
254 "idex=base,ctl" : specify both base and ctl 190* "probe" module parameter when ali14xx driver is compiled as module
191 ("modprobe ali14xx probe")
255 192
256 "idex=base,ctl,irq" : specify base, ctl, and irq number 193Also for legacy CMD640 host driver (cmd640) you need to use "probe_vlb"
194kernel paremeter to enable probing for VLB version of the chipset (PCI ones
195are detected automatically).
257 196
258 "idex=serialize" : do not overlap operations on idex. Please note 197You also need to use "probe" kernel parameter for ide-4drives driver
259 that you will have to specify this option for 198(support for IDE generic chipset with four drives on one port).
260 both the respective primary and secondary channel
261 to take effect.
262 199
263 "idex=four" : four drives on idex and ide(x^1) share same ports 200To enable support for IDE doublers on Amiga use "doubler" kernel parameter
201for gayle host driver (i.e. "gayle.doubler" if the driver is built-in).
264 202
265 "idex=reset" : reset interface after probe 203To force ignoring cable detection (this should be needed only if you're using
204short 40-wires cable which cannot be automatically detected - if this is not
205a case please report it as a bug instead) use "ignore_cable" kernel parameter:
266 206
267 "idex=ata66" : informs the interface that it has an 80c cable 207* "ide_core.ignore_cable=[interface_number]" boot option if IDE is built-in
268 for chipsets that are ATA-66 capable, but the 208 (i.e. "ide_core.ignore_cable=1" to force ignoring cable for "ide1")
269 ability to bit test for detection is currently
270 unknown.
271 209
272 "ide=reverse" : formerly called to pci sub-system, but now local. 210* "ignore_cable=[interface_number]" module parameter (for ide_core module)
211 if IDE is compiled as module
273 212
274 "ide=doubler" : probe/support IDE doublers on Amiga 213Other kernel parameters for ide_core are:
275 214
276There may be more options than shown -- use the source, Luke! 215* "nodma=[interface_number.device_number]" to disallow DMA for a device
277 216
278Everything else is rejected with a "BAD OPTION" message. 217* "noflush=[interface_number.device_number]" to disable flush requests
279 218
280For legacy IDE VLB host drivers (ali14xx/dtc2278/ht6560b/qd65xx/umc8672) 219* "noprobe=[interface_number.device_number]" to skip probing
281you need to explicitly enable probing by using "probe" kernel parameter,
282i.e. to enable probing for ALI M14xx chipsets (ali14xx host driver) use:
283 220
284* "ali14xx.probe" boot option when ali14xx driver is built-in the kernel 221* "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit
285 222
286* "probe" module parameter when ali14xx driver is compiled as module 223* "cdrom=[interface_number.device_number]" to force device as a CD-ROM
287 ("modprobe ali14xx probe")
288 224
289Also for legacy CMD640 host driver (cmd640) you need to use "probe_vlb" 225* "chs=[interface_number.device_number]" to force device as a disk (using CHS)
290kernel paremeter to enable probing for VLB version of the chipset (PCI ones
291are detected automatically).
292 226
293================================================================================ 227================================================================================
294 228
diff --git a/Documentation/ide/warm-plug-howto.txt b/Documentation/ide/warm-plug-howto.txt
new file mode 100644
index 000000000000..d5885468b072
--- /dev/null
+++ b/Documentation/ide/warm-plug-howto.txt
@@ -0,0 +1,13 @@
1
2IDE warm-plug HOWTO
3===================
4
5To warm-plug devices on a port 'idex':
6
7# echo -n "1" > /sys/class/ide_port/idex/delete_devices
8
9unplug old device(s) and plug new device(s)
10
11# echo -n "1" > /sys/class/ide_port/idex/scan
12
13done
diff --git a/Documentation/ioctl-number.txt b/Documentation/ioctl-number.txt
index c18363bd8d11..240ce7a56c40 100644
--- a/Documentation/ioctl-number.txt
+++ b/Documentation/ioctl-number.txt
@@ -183,6 +183,8 @@ Code Seq# Include File Comments
1830xAC 00-1F linux/raw.h 1830xAC 00-1F linux/raw.h
1840xAD 00 Netfilter device in development: 1840xAD 00 Netfilter device in development:
185 <mailto:rusty@rustcorp.com.au> 185 <mailto:rusty@rustcorp.com.au>
1860xAE all linux/kvm.h Kernel-based Virtual Machine
187 <mailto:kvm-devel@lists.sourceforge.net>
1860xB0 all RATIO devices in development: 1880xB0 all RATIO devices in development:
187 <mailto:vgo@ratio.de> 189 <mailto:vgo@ratio.de>
1880xB1 00-1F PPPoX <mailto:mostrows@styx.uwaterloo.ca> 1900xB1 00-1F PPPoX <mailto:mostrows@styx.uwaterloo.ca>
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index 649cb8799890..c412c245848f 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -104,14 +104,15 @@ applicable everywhere (see syntax).
104 Reverse dependencies can only be used with boolean or tristate 104 Reverse dependencies can only be used with boolean or tristate
105 symbols. 105 symbols.
106 Note: 106 Note:
107 select is evil.... select will by brute force set a symbol 107 select should be used with care. select will force
108 equal to 'y' without visiting the dependencies. So abusing 108 a symbol to a value without visiting the dependencies.
109 select you are able to select a symbol FOO even if FOO depends 109 By abusing select you are able to select a symbol FOO even
110 on BAR that is not set. In general use select only for 110 if FOO depends on BAR that is not set.
111 non-visible symbols (no prompts anywhere) and for symbols with 111 In general use select only for non-visible symbols
112 no dependencies. That will limit the usefulness but on the 112 (no prompts anywhere) and for symbols with no dependencies.
113 other hand avoid the illegal configurations all over. kconfig 113 That will limit the usefulness but on the other hand avoid
114 should one day warn about such things. 114 the illegal configurations all over.
115 kconfig should one day warn about such things.
115 116
116- numerical ranges: "range" <symbol> <symbol> ["if" <expr>] 117- numerical ranges: "range" <symbol> <symbol> ["if" <expr>]
117 This allows to limit the range of possible input values for int 118 This allows to limit the range of possible input values for int
@@ -376,27 +377,3 @@ config FOO
376 377
377limits FOO to module (=m) or disabled (=n). 378limits FOO to module (=m) or disabled (=n).
378 379
379
380Build limited by a third config symbol which may be =y or =m
381~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
382A common idiom that we see (and sometimes have problems with) is this:
383
384When option C in B (module or subsystem) uses interfaces from A (module
385or subsystem), and both A and B are tristate (could be =y or =m if they
386were independent of each other, but they aren't), then we need to limit
387C such that it cannot be built statically if A is built as a loadable
388module. (C already depends on B, so there is no dependency issue to
389take care of here.)
390
391If A is linked statically into the kernel image, C can be built
392statically or as loadable module(s). However, if A is built as loadable
393module(s), then C must be restricted to loadable module(s) also. This
394can be expressed in kconfig language as:
395
396config C
397 depends on A = y || A = B
398
399or for real examples, use this command in a kernel tree:
400
401$ find . -name Kconfig\* | xargs grep -ns "depends on.*=.*||.*=" | grep -v orig
402
diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt
index 1d247d59ad56..1821c077b435 100644
--- a/Documentation/kbuild/modules.txt
+++ b/Documentation/kbuild/modules.txt
@@ -486,7 +486,7 @@ Module.symvers contains a list of all exported symbols from a kernel build.
486 Sometimes, an external module uses exported symbols from another 486 Sometimes, an external module uses exported symbols from another
487 external module. Kbuild needs to have full knowledge on all symbols 487 external module. Kbuild needs to have full knowledge on all symbols
488 to avoid spitting out warnings about undefined symbols. 488 to avoid spitting out warnings about undefined symbols.
489 Two solutions exist to let kbuild know all symbols of more than 489 Three solutions exist to let kbuild know all symbols of more than
490 one external module. 490 one external module.
491 The method with a top-level kbuild file is recommended but may be 491 The method with a top-level kbuild file is recommended but may be
492 impractical in certain situations. 492 impractical in certain situations.
@@ -523,6 +523,13 @@ Module.symvers contains a list of all exported symbols from a kernel build.
523 containing the sum of all symbols defined and not part of the 523 containing the sum of all symbols defined and not part of the
524 kernel. 524 kernel.
525 525
526 Use make variable KBUILD_EXTRA_SYMBOLS in the Makefile
527 If it is impractical to copy Module.symvers from another
528 module, you can assign a space separated list of files to
529 KBUILD_EXTRA_SYMBOLS in your Makfile. These files will be
530 loaded by modpost during the initialisation of its symbol
531 tables.
532
526=== 8. Tips & Tricks 533=== 8. Tips & Tricks
527 534
528--- 8.1 Testing for CONFIG_FOO_BAR 535--- 8.1 Testing for CONFIG_FOO_BAR
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index d0ac72cc19ff..b8e52c0355d3 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -245,6 +245,8 @@ The syntax is:
245 crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset] 245 crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
246 range=start-[end] 246 range=start-[end]
247 247
248 'start' is inclusive and 'end' is exclusive.
249
248For example: 250For example:
249 251
250 crashkernel=512M-2G:64M,2G-:128M 252 crashkernel=512M-2G:64M,2G-:128M
@@ -253,10 +255,11 @@ This would mean:
253 255
254 1) if the RAM is smaller than 512M, then don't reserve anything 256 1) if the RAM is smaller than 512M, then don't reserve anything
255 (this is the "rescue" case) 257 (this is the "rescue" case)
256 2) if the RAM size is between 512M and 2G, then reserve 64M 258 2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M
257 3) if the RAM size is larger than 2G, then reserve 128M 259 3) if the RAM size is larger than 2G, then reserve 128M
258 260
259 261
262
260Boot into System Kernel 263Boot into System Kernel
261======================= 264=======================
262 265
diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt
index 2075c0658bf5..0bd32748a467 100644
--- a/Documentation/kernel-doc-nano-HOWTO.txt
+++ b/Documentation/kernel-doc-nano-HOWTO.txt
@@ -1,6 +1,105 @@
1kernel-doc nano-HOWTO 1kernel-doc nano-HOWTO
2===================== 2=====================
3 3
4How to format kernel-doc comments
5---------------------------------
6
7In order to provide embedded, 'C' friendly, easy to maintain,
8but consistent and extractable documentation of the functions and
9data structures in the Linux kernel, the Linux kernel has adopted
10a consistent style for documenting functions and their parameters,
11and structures and their members.
12
13The format for this documentation is called the kernel-doc format.
14It is documented in this Documentation/kernel-doc-nano-HOWTO.txt file.
15
16This style embeds the documentation within the source files, using
17a few simple conventions. The scripts/kernel-doc perl script, some
18SGML templates in Documentation/DocBook, and other tools understand
19these conventions, and are used to extract this embedded documentation
20into various documents.
21
22In order to provide good documentation of kernel functions and data
23structures, please use the following conventions to format your
24kernel-doc comments in Linux kernel source.
25
26We definitely need kernel-doc formatted documentation for functions
27that are exported to loadable modules using EXPORT_SYMBOL.
28
29We also look to provide kernel-doc formatted documentation for
30functions externally visible to other kernel files (not marked
31"static").
32
33We also recommend providing kernel-doc formatted documentation
34for private (file "static") routines, for consistency of kernel
35source code layout. But this is lower priority and at the
36discretion of the MAINTAINER of that kernel source file.
37
38Data structures visible in kernel include files should also be
39documented using kernel-doc formatted comments.
40
41The opening comment mark "/**" is reserved for kernel-doc comments.
42Only comments so marked will be considered by the kernel-doc scripts,
43and any comment so marked must be in kernel-doc format. Do not use
44"/**" to be begin a comment block unless the comment block contains
45kernel-doc formatted comments. The closing comment marker for
46kernel-doc comments can be either "*/" or "**/".
47
48Kernel-doc comments should be placed just before the function
49or data structure being described.
50
51Example kernel-doc function comment:
52
53/**
54 * foobar() - short function description of foobar
55 * @arg1: Describe the first argument to foobar.
56 * @arg2: Describe the second argument to foobar.
57 * One can provide multiple line descriptions
58 * for arguments.
59 *
60 * A longer description, with more discussion of the function foobar()
61 * that might be useful to those using or modifying it. Begins with
62 * empty comment line, and may include additional embedded empty
63 * comment lines.
64 *
65 * The longer description can have multiple paragraphs.
66 **/
67
68The first line, with the short description, must be on a single line.
69
70The @argument descriptions must begin on the very next line following
71this opening short function description line, with no intervening
72empty comment lines.
73
74Example kernel-doc data structure comment.
75
76/**
77 * struct blah - the basic blah structure
78 * @mem1: describe the first member of struct blah
79 * @mem2: describe the second member of struct blah,
80 * perhaps with more lines and words.
81 *
82 * Longer description of this structure.
83 **/
84
85The kernel-doc function comments describe each parameter to the
86function, in order, with the @name lines.
87
88The kernel-doc data structure comments describe each structure member
89in the data structure, with the @name lines.
90
91The longer description formatting is "reflowed", losing your line
92breaks. So presenting carefully formatted lists within these
93descriptions won't work so well; derived documentation will lose
94the formatting.
95
96See the section below "How to add extractable documentation to your
97source files" for more details and notes on how to format kernel-doc
98comments.
99
100Components of the kernel-doc system
101-----------------------------------
102
4Many places in the source tree have extractable documentation in the 103Many places in the source tree have extractable documentation in the
5form of block comments above functions. The components of this system 104form of block comments above functions. The components of this system
6are: 105are:
diff --git a/Documentation/kernel-docs.txt b/Documentation/kernel-docs.txt
index 5a4ef48224ae..28cdc2af2131 100644
--- a/Documentation/kernel-docs.txt
+++ b/Documentation/kernel-docs.txt
@@ -715,14 +715,14 @@
715 715
716 * Name: "Gary's Encyclopedia - The Linux Kernel" 716 * Name: "Gary's Encyclopedia - The Linux Kernel"
717 Author: Gary (I suppose...). 717 Author: Gary (I suppose...).
718 URL: http://www.lisoleg.net/cgi-bin/lisoleg.pl?view=kernel.htm 718 URL: http://slencyclopedia.berlios.de/index.html
719 Keywords: links, not found here?. 719 Keywords: linux, community, everything!
720 Description: Gary's Encyclopedia exists to allow the rapid finding 720 Description: Gary's Encyclopedia exists to allow the rapid finding
721 of documentation and other information of interest to GNU/Linux 721 of documentation and other information of interest to GNU/Linux
722 users. It has about 4000 links to external pages in 150 major 722 users. It has about 4000 links to external pages in 150 major
723 categories. This link is for kernel-specific links, documents, 723 categories. This link is for kernel-specific links, documents,
724 sites... Look there if you could not find here what you were 724 sites... This list is now hosted by developer.Berlios.de,
725 looking for. 725 but seems not to have been updated since sometime in 1999.
726 726
727 * Name: "The home page of Linux-MM" 727 * Name: "The home page of Linux-MM"
728 Author: The Linux-MM team. 728 Author: The Linux-MM team.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 508e2a2c9864..f1e970acad4c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -170,11 +170,6 @@ and is between 256 and 4096 characters. It is defined in the file
170 acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA 170 acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA
171 Format: <irq>,<irq>... 171 Format: <irq>,<irq>...
172 172
173 acpi_new_pts_ordering [HW,ACPI]
174 Enforce the ACPI 2.0 ordering of the _PTS control
175 method wrt putting devices into low power states
176 default: pre ACPI 2.0 ordering of _PTS
177
178 acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT 173 acpi_no_auto_ssdt [HW,ACPI] Disable automatic loading of SSDT
179 174
180 acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS 175 acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS
@@ -371,6 +366,12 @@ and is between 256 and 4096 characters. It is defined in the file
371 possible to determine what the correct size should be. 366 possible to determine what the correct size should be.
372 This option provides an override for these situations. 367 This option provides an override for these situations.
373 368
369 security= [SECURITY] Choose a security module to enable at boot.
370 If this boot parameter is not specified, only the first
371 security module asking for security registration will be
372 loaded. An invalid security module name will be treated
373 as if no module has been chosen.
374
374 capability.disable= 375 capability.disable=
375 [SECURITY] Disable capabilities. This would normally 376 [SECURITY] Disable capabilities. This would normally
376 be used only if an alternative security model is to be 377 be used only if an alternative security model is to be
@@ -380,6 +381,10 @@ and is between 256 and 4096 characters. It is defined in the file
380 ccw_timeout_log [S390] 381 ccw_timeout_log [S390]
381 See Documentation/s390/CommonIO for details. 382 See Documentation/s390/CommonIO for details.
382 383
384 cgroup_disable= [KNL] Disable a particular controller
385 Format: {name of the controller(s) to disable}
386 {Currently supported controllers - "memory"}
387
383 checkreqprot [SELINUX] Set initial checkreqprot flag value. 388 checkreqprot [SELINUX] Set initial checkreqprot flag value.
384 Format: { "0" | "1" } 389 Format: { "0" | "1" }
385 See security/selinux/Kconfig help text. 390 See security/selinux/Kconfig help text.
@@ -393,9 +398,6 @@ and is between 256 and 4096 characters. It is defined in the file
393 cio_ignore= [S390] 398 cio_ignore= [S390]
394 See Documentation/s390/CommonIO for details. 399 See Documentation/s390/CommonIO for details.
395 400
396 cio_msg= [S390]
397 See Documentation/s390/CommonIO for details.
398
399 clock= [BUGS=X86-32, HW] gettimeofday clocksource override. 401 clock= [BUGS=X86-32, HW] gettimeofday clocksource override.
400 [Deprecated] 402 [Deprecated]
401 Forces specified clocksource (if available) to be used 403 Forces specified clocksource (if available) to be used
@@ -491,6 +493,11 @@ and is between 256 and 4096 characters. It is defined in the file
491 switching to the matching ttyS device later. The 493 switching to the matching ttyS device later. The
492 options are the same as for ttyS, above. 494 options are the same as for ttyS, above.
493 495
496 If the device connected to the port is not a TTY but a braille
497 device, prepend "brl," before the device type, for instance
498 console=brl,ttyS0
499 For now, only VisioBraille is supported.
500
494 earlycon= [KNL] Output early console device and options. 501 earlycon= [KNL] Output early console device and options.
495 uart[8250],io,<addr>[,options] 502 uart[8250],io,<addr>[,options]
496 uart[8250],mmio,<addr>[,options] 503 uart[8250],mmio,<addr>[,options]
@@ -551,6 +558,8 @@ and is between 256 and 4096 characters. It is defined in the file
551 1 will print _a lot_ more information - normally 558 1 will print _a lot_ more information - normally
552 only useful to kernel developers. 559 only useful to kernel developers.
553 560
561 debug_objects [KNL] Enable object debugging
562
554 decnet.addr= [HW,NET] 563 decnet.addr= [HW,NET]
555 Format: <area>[,<node>] 564 Format: <area>[,<node>]
556 See also Documentation/networking/decnet.txt. 565 See also Documentation/networking/decnet.txt.
@@ -622,8 +631,7 @@ and is between 256 and 4096 characters. It is defined in the file
622 eata= [HW,SCSI] 631 eata= [HW,SCSI]
623 632
624 edd= [EDD] 633 edd= [EDD]
625 Format: {"of[f]" | "sk[ipmbr]"} 634 Format: {"off" | "on" | "skip[mbr]"}
626 See comment in arch/i386/boot/edd.S
627 635
628 eisa_irq_edge= [PARISC,HW] 636 eisa_irq_edge= [PARISC,HW]
629 See header of drivers/parisc/eisa.c. 637 See header of drivers/parisc/eisa.c.
@@ -678,6 +686,12 @@ and is between 256 and 4096 characters. It is defined in the file
678 floppy= [HW] 686 floppy= [HW]
679 See Documentation/floppy.txt. 687 See Documentation/floppy.txt.
680 688
689 force_pal_cache_flush
690 [IA-64] Avoid check_sal_cache_flush which may hang on
691 buggy SAL_CACHE_FLUSH implementations. Using this
692 parameter will force ia64_sal_cache_flush to call
693 ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
694
681 gamecon.map[2|3]= 695 gamecon.map[2|3]=
682 [HW,JOY] Multisystem joystick and NES/SNES/PSX pad 696 [HW,JOY] Multisystem joystick and NES/SNES/PSX pad
683 support via parallel port (up to 5 devices per port) 697 support via parallel port (up to 5 devices per port)
@@ -764,11 +778,7 @@ and is between 256 and 4096 characters. It is defined in the file
764 Format: <io>[,<membase>[,<icn_id>[,<icn_id2>]]] 778 Format: <io>[,<membase>[,<icn_id>[,<icn_id2>]]]
765 779
766 ide= [HW] (E)IDE subsystem 780 ide= [HW] (E)IDE subsystem
767 Format: ide=nodma or ide=doubler or ide=reverse 781 Format: ide=nodma or ide=doubler
768 See Documentation/ide/ide.txt.
769
770 ide?= [HW] (E)IDE subsystem
771 Format: ide?=noprobe or chipset specific parameters.
772 See Documentation/ide/ide.txt. 782 See Documentation/ide/ide.txt.
773 783
774 idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed 784 idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed
@@ -813,6 +823,19 @@ and is between 256 and 4096 characters. It is defined in the file
813 823
814 inttest= [IA64] 824 inttest= [IA64]
815 825
826 iommu= [x86]
827 off
828 force
829 noforce
830 biomerge
831 panic
832 nopanic
833 merge
834 nomerge
835 forcesac
836 soft
837
838
816 intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option 839 intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option
817 off 840 off
818 Disable intel iommu driver. 841 Disable intel iommu driver.
@@ -829,6 +852,10 @@ and is between 256 and 4096 characters. It is defined in the file
829 than 32 bit addressing. The default is to look 852 than 32 bit addressing. The default is to look
830 for translation below 32 bit and if not available 853 for translation below 32 bit and if not available
831 then look in the higher range. 854 then look in the higher range.
855 strict [Default Off]
856 With this option on every unmap_single operation will
857 result in a hardware IOTLB flush operation as opposed
858 to batching them for performance.
832 859
833 io_delay= [X86-32,X86-64] I/O delay method 860 io_delay= [X86-32,X86-64] I/O delay method
834 0x80 861 0x80
@@ -845,7 +872,7 @@ and is between 256 and 4096 characters. It is defined in the file
845 arch/alpha/kernel/core_marvel.c. 872 arch/alpha/kernel/core_marvel.c.
846 873
847 ip= [IP_PNP] 874 ip= [IP_PNP]
848 See Documentation/nfsroot.txt. 875 See Documentation/filesystems/nfsroot.txt.
849 876
850 ip2= [HW] Set IO/IRQ pairs for up to 4 IntelliPort boards 877 ip2= [HW] Set IO/IRQ pairs for up to 4 IntelliPort boards
851 See comment before ip2_setup() in 878 See comment before ip2_setup() in
@@ -929,8 +956,15 @@ and is between 256 and 4096 characters. It is defined in the file
929 kstack=N [X86-32,X86-64] Print N words from the kernel stack 956 kstack=N [X86-32,X86-64] Print N words from the kernel stack
930 in oops dumps. 957 in oops dumps.
931 958
959 kgdboc= [HW] kgdb over consoles.
960 Requires a tty driver that supports console polling.
961 (only serial suported for now)
962 Format: <serial_device>[,baud]
963
932 l2cr= [PPC] 964 l2cr= [PPC]
933 965
966 l3cr= [PPC]
967
934 lapic [X86-32,APIC] Enable the local APIC even if BIOS 968 lapic [X86-32,APIC] Enable the local APIC even if BIOS
935 disabled it. 969 disabled it.
936 970
@@ -1063,9 +1097,6 @@ and is between 256 and 4096 characters. It is defined in the file
1063 mac5380= [HW,SCSI] Format: 1097 mac5380= [HW,SCSI] Format:
1064 <can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags> 1098 <can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
1065 1099
1066 mac53c9x= [HW,SCSI] Format:
1067 <num_esps>,<disconnect>,<nosync>,<can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
1068
1069 machvec= [IA64] Force the use of a particular machine-vector 1100 machvec= [IA64] Force the use of a particular machine-vector
1070 (machvec) in a generic kernel. 1101 (machvec) in a generic kernel.
1071 Example: machvec=hpzx1_swiotlb 1102 Example: machvec=hpzx1_swiotlb
@@ -1135,6 +1166,11 @@ and is between 256 and 4096 characters. It is defined in the file
1135 or 1166 or
1136 memmap=0x10000$0x18690000 1167 memmap=0x10000$0x18690000
1137 1168
1169 memtest= [KNL,X86_64] Enable memtest
1170 Format: <integer>
1171 range: 0,4 : pattern number
1172 default : 0 <disable>
1173
1138 meye.*= [HW] Set MotionEye Camera parameters 1174 meye.*= [HW] Set MotionEye Camera parameters
1139 See Documentation/video4linux/meye.txt. 1175 See Documentation/video4linux/meye.txt.
1140 1176
@@ -1172,6 +1208,11 @@ and is between 256 and 4096 characters. It is defined in the file
1172 mtdparts= [MTD] 1208 mtdparts= [MTD]
1173 See drivers/mtd/cmdlinepart.c. 1209 See drivers/mtd/cmdlinepart.c.
1174 1210
1211 mtdset= [ARM]
1212 ARM/S3C2412 JIVE boot control
1213
1214 See arch/arm/mach-s3c2412/mach-jive.c
1215
1175 mtouchusb.raw_coordinates= 1216 mtouchusb.raw_coordinates=
1176 [HW] Make the MicroTouch USB driver use raw coordinates 1217 [HW] Make the MicroTouch USB driver use raw coordinates
1177 ('y', default) or cooked coordinates ('n') 1218 ('y', default) or cooked coordinates ('n')
@@ -1199,10 +1240,10 @@ and is between 256 and 4096 characters. It is defined in the file
1199 file if at all. 1240 file if at all.
1200 1241
1201 nfsaddrs= [NFS] 1242 nfsaddrs= [NFS]
1202 See Documentation/nfsroot.txt. 1243 See Documentation/filesystems/nfsroot.txt.
1203 1244
1204 nfsroot= [NFS] nfs root filesystem for disk-less boxes. 1245 nfsroot= [NFS] nfs root filesystem for disk-less boxes.
1205 See Documentation/nfsroot.txt. 1246 See Documentation/filesystems/nfsroot.txt.
1206 1247
1207 nfs.callback_tcpport= 1248 nfs.callback_tcpport=
1208 [NFS] set the TCP port on which the NFSv4 callback 1249 [NFS] set the TCP port on which the NFSv4 callback
@@ -1252,8 +1293,16 @@ and is between 256 and 4096 characters. It is defined in the file
1252 noexec [IA-64] 1293 noexec [IA-64]
1253 1294
1254 noexec [X86-32,X86-64] 1295 noexec [X86-32,X86-64]
1296 On X86-32 available only on PAE configured kernels.
1255 noexec=on: enable non-executable mappings (default) 1297 noexec=on: enable non-executable mappings (default)
1256 noexec=off: disable nn-executable mappings 1298 noexec=off: disable non-executable mappings
1299
1300 noexec32 [X86-64]
1301 This affects only 32-bit executables.
1302 noexec32=on: enable non-executable mappings (default)
1303 read doesn't imply executable mappings
1304 noexec32=off: disable non-executable mappings
1305 read implies executable mappings
1257 1306
1258 nofxsr [BUGS=X86-32] Disables x86 floating point extended 1307 nofxsr [BUGS=X86-32] Disables x86 floating point extended
1259 register save and restore. The kernel will only save 1308 register save and restore. The kernel will only save
@@ -1340,6 +1389,10 @@ and is between 256 and 4096 characters. It is defined in the file
1340 1389
1341 nowb [ARM] 1390 nowb [ARM]
1342 1391
1392 nptcg= [IA64] Override max number of concurrent global TLB
1393 purges which is reported from either PAL_VM_SUMMARY or
1394 SAL PALO.
1395
1343 numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA. 1396 numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
1344 one of ['zone', 'node', 'default'] can be specified 1397 one of ['zone', 'node', 'default'] can be specified
1345 This can be set from sysctl after boot. 1398 This can be set from sysctl after boot.
@@ -1347,6 +1400,13 @@ and is between 256 and 4096 characters. It is defined in the file
1347 1400
1348 nr_uarts= [SERIAL] maximum number of UARTs to be registered. 1401 nr_uarts= [SERIAL] maximum number of UARTs to be registered.
1349 1402
1403 olpc_ec_timeout= [OLPC] ms delay when issuing EC commands
1404 Rather than timing out after 20 ms if an EC
1405 command is not properly ACKed, override the length
1406 of the timeout. We have interrupts disabled while
1407 waiting for the ACK, so if this is set too high
1408 interrupts *may* be lost!
1409
1350 opl3= [HW,OSS] 1410 opl3= [HW,OSS]
1351 Format: <io> 1411 Format: <io>
1352 1412
@@ -1429,10 +1489,6 @@ and is between 256 and 4096 characters. It is defined in the file
1429 nomsi [MSI] If the PCI_MSI kernel config parameter is 1489 nomsi [MSI] If the PCI_MSI kernel config parameter is
1430 enabled, this kernel boot option can be used to 1490 enabled, this kernel boot option can be used to
1431 disable the use of MSI interrupts system-wide. 1491 disable the use of MSI interrupts system-wide.
1432 nosort [X86-32] Don't sort PCI devices according to
1433 order given by the PCI BIOS. This sorting is
1434 done to get a device order compatible with
1435 older kernels.
1436 biosirq [X86-32] Use PCI BIOS calls to get the interrupt 1492 biosirq [X86-32] Use PCI BIOS calls to get the interrupt
1437 routing table. These calls are known to be buggy 1493 routing table. These calls are known to be buggy
1438 on several machines and they hang the machine 1494 on several machines and they hang the machine
@@ -1474,6 +1530,8 @@ and is between 256 and 4096 characters. It is defined in the file
1474 This is normally done in pci_enable_device(), 1530 This is normally done in pci_enable_device(),
1475 so this option is a temporary workaround 1531 so this option is a temporary workaround
1476 for broken drivers that don't call it. 1532 for broken drivers that don't call it.
1533 skip_isa_align [X86] do not align io start addr, so can
1534 handle more pci cards
1477 firmware [ARM] Do not re-enumerate the bus but instead 1535 firmware [ARM] Do not re-enumerate the bus but instead
1478 just use the configuration from the 1536 just use the configuration from the
1479 bootloader. This is currently used on 1537 bootloader. This is currently used on
diff --git a/Documentation/keys-request-key.txt b/Documentation/keys-request-key.txt
index 266955d23ee6..09b55e461740 100644
--- a/Documentation/keys-request-key.txt
+++ b/Documentation/keys-request-key.txt
@@ -11,26 +11,29 @@ request_key*():
11 11
12 struct key *request_key(const struct key_type *type, 12 struct key *request_key(const struct key_type *type,
13 const char *description, 13 const char *description,
14 const char *callout_string); 14 const char *callout_info);
15 15
16or: 16or:
17 17
18 struct key *request_key_with_auxdata(const struct key_type *type, 18 struct key *request_key_with_auxdata(const struct key_type *type,
19 const char *description, 19 const char *description,
20 const char *callout_string, 20 const char *callout_info,
21 size_t callout_len,
21 void *aux); 22 void *aux);
22 23
23or: 24or:
24 25
25 struct key *request_key_async(const struct key_type *type, 26 struct key *request_key_async(const struct key_type *type,
26 const char *description, 27 const char *description,
27 const char *callout_string); 28 const char *callout_info,
29 size_t callout_len);
28 30
29or: 31or:
30 32
31 struct key *request_key_async_with_auxdata(const struct key_type *type, 33 struct key *request_key_async_with_auxdata(const struct key_type *type,
32 const char *description, 34 const char *description,
33 const char *callout_string, 35 const char *callout_info,
36 size_t callout_len,
34 void *aux); 37 void *aux);
35 38
36Or by userspace invoking the request_key system call: 39Or by userspace invoking the request_key system call:
diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index 51652d39e61c..d5c7a57d1700 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -170,7 +170,8 @@ The key service provides a number of features besides keys:
170 amount of description and payload space that can be consumed. 170 amount of description and payload space that can be consumed.
171 171
172 The user can view information on this and other statistics through procfs 172 The user can view information on this and other statistics through procfs
173 files. 173 files. The root user may also alter the quota limits through sysctl files
174 (see the section "New procfs files").
174 175
175 Process-specific and thread-specific keyrings are not counted towards a 176 Process-specific and thread-specific keyrings are not counted towards a
176 user's quota. 177 user's quota.
@@ -329,6 +330,27 @@ about the status of the key service:
329 <bytes>/<max> Key size quota 330 <bytes>/<max> Key size quota
330 331
331 332
333Four new sysctl files have been added also for the purpose of controlling the
334quota limits on keys:
335
336 (*) /proc/sys/kernel/keys/root_maxkeys
337 /proc/sys/kernel/keys/root_maxbytes
338
339 These files hold the maximum number of keys that root may have and the
340 maximum total number of bytes of data that root may have stored in those
341 keys.
342
343 (*) /proc/sys/kernel/keys/maxkeys
344 /proc/sys/kernel/keys/maxbytes
345
346 These files hold the maximum number of keys that each non-root user may
347 have and the maximum total number of bytes of data that each of those
348 users may have stored in their keys.
349
350Root may alter these by writing each new limit as a decimal number string to
351the appropriate file.
352
353
332=============================== 354===============================
333USERSPACE SYSTEM CALL INTERFACE 355USERSPACE SYSTEM CALL INTERFACE
334=============================== 356===============================
@@ -711,6 +733,27 @@ The keyctl syscall functions are:
711 The assumed authoritative key is inherited across fork and exec. 733 The assumed authoritative key is inherited across fork and exec.
712 734
713 735
736 (*) Get the LSM security context attached to a key.
737
738 long keyctl(KEYCTL_GET_SECURITY, key_serial_t key, char *buffer,
739 size_t buflen)
740
741 This function returns a string that represents the LSM security context
742 attached to a key in the buffer provided.
743
744 Unless there's an error, it always returns the amount of data it could
745 produce, even if that's too big for the buffer, but it won't copy more
746 than requested to userspace. If the buffer pointer is NULL then no copy
747 will take place.
748
749 A NUL character is included at the end of the string if the buffer is
750 sufficiently big. This is included in the returned count. If no LSM is
751 in force then an empty string will be returned.
752
753 A process must have view permission on the key for this function to be
754 successful.
755
756
714=============== 757===============
715KERNEL SERVICES 758KERNEL SERVICES
716=============== 759===============
@@ -771,7 +814,7 @@ payload contents" for more information.
771 814
772 struct key *request_key(const struct key_type *type, 815 struct key *request_key(const struct key_type *type,
773 const char *description, 816 const char *description,
774 const char *callout_string); 817 const char *callout_info);
775 818
776 This is used to request a key or keyring with a description that matches 819 This is used to request a key or keyring with a description that matches
777 the description specified according to the key type's match function. This 820 the description specified according to the key type's match function. This
@@ -793,24 +836,28 @@ payload contents" for more information.
793 836
794 struct key *request_key_with_auxdata(const struct key_type *type, 837 struct key *request_key_with_auxdata(const struct key_type *type,
795 const char *description, 838 const char *description,
796 const char *callout_string, 839 const void *callout_info,
840 size_t callout_len,
797 void *aux); 841 void *aux);
798 842
799 This is identical to request_key(), except that the auxiliary data is 843 This is identical to request_key(), except that the auxiliary data is
800 passed to the key_type->request_key() op if it exists. 844 passed to the key_type->request_key() op if it exists, and the callout_info
845 is a blob of length callout_len, if given (the length may be 0).
801 846
802 847
803(*) A key can be requested asynchronously by calling one of: 848(*) A key can be requested asynchronously by calling one of:
804 849
805 struct key *request_key_async(const struct key_type *type, 850 struct key *request_key_async(const struct key_type *type,
806 const char *description, 851 const char *description,
807 const char *callout_string); 852 const void *callout_info,
853 size_t callout_len);
808 854
809 or: 855 or:
810 856
811 struct key *request_key_async_with_auxdata(const struct key_type *type, 857 struct key *request_key_async_with_auxdata(const struct key_type *type,
812 const char *description, 858 const char *description,
813 const char *callout_string, 859 const char *callout_info,
860 size_t callout_len,
814 void *aux); 861 void *aux);
815 862
816 which are asynchronous equivalents of request_key() and 863 which are asynchronous equivalents of request_key() and
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index bf3256e04027..51a8021ee532 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -305,7 +305,7 @@ should not be manipulated by any other user.
305 305
306A kset keeps its children in a standard kernel linked list. Kobjects point 306A kset keeps its children in a standard kernel linked list. Kobjects point
307back to their containing kset via their kset field. In almost all cases, 307back to their containing kset via their kset field. In almost all cases,
308the kobjects belonging to a ket have that kset (or, strictly, its embedded 308the kobjects belonging to a kset have that kset (or, strictly, its embedded
309kobject) in their parent. 309kobject) in their parent.
310 310
311As a kset contains a kobject within it, it should always be dynamically 311As a kset contains a kobject within it, it should always be dynamically
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index be89f393274f..6877e7187113 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -37,6 +37,11 @@ registration function such as register_kprobe() specifies where
37the probe is to be inserted and what handler is to be called when 37the probe is to be inserted and what handler is to be called when
38the probe is hit. 38the probe is hit.
39 39
40There are also register_/unregister_*probes() functions for batch
41registration/unregistration of a group of *probes. These functions
42can speed up unregistration process when you have to unregister
43a lot of probes at once.
44
40The next three subsections explain how the different types of 45The next three subsections explain how the different types of
41probes work. They explain certain things that you'll need to 46probes work. They explain certain things that you'll need to
42know in order to make the best use of Kprobes -- e.g., the 47know in order to make the best use of Kprobes -- e.g., the
@@ -190,10 +195,11 @@ code mapping.
1904. API Reference 1954. API Reference
191 196
192The Kprobes API includes a "register" function and an "unregister" 197The Kprobes API includes a "register" function and an "unregister"
193function for each type of probe. Here are terse, mini-man-page 198function for each type of probe. The API also includes "register_*probes"
194specifications for these functions and the associated probe handlers 199and "unregister_*probes" functions for (un)registering arrays of probes.
195that you'll write. See the files in the samples/kprobes/ sub-directory 200Here are terse, mini-man-page specifications for these functions and
196for examples. 201the associated probe handlers that you'll write. See the files in the
202samples/kprobes/ sub-directory for examples.
197 203
1984.1 register_kprobe 2044.1 register_kprobe
199 205
@@ -319,6 +325,43 @@ void unregister_kretprobe(struct kretprobe *rp);
319Removes the specified probe. The unregister function can be called 325Removes the specified probe. The unregister function can be called
320at any time after the probe has been registered. 326at any time after the probe has been registered.
321 327
328NOTE:
329If the functions find an incorrect probe (ex. an unregistered probe),
330they clear the addr field of the probe.
331
3324.5 register_*probes
333
334#include <linux/kprobes.h>
335int register_kprobes(struct kprobe **kps, int num);
336int register_kretprobes(struct kretprobe **rps, int num);
337int register_jprobes(struct jprobe **jps, int num);
338
339Registers each of the num probes in the specified array. If any
340error occurs during registration, all probes in the array, up to
341the bad probe, are safely unregistered before the register_*probes
342function returns.
343- kps/rps/jps: an array of pointers to *probe data structures
344- num: the number of the array entries.
345
346NOTE:
347You have to allocate(or define) an array of pointers and set all
348of the array entries before using these functions.
349
3504.6 unregister_*probes
351
352#include <linux/kprobes.h>
353void unregister_kprobes(struct kprobe **kps, int num);
354void unregister_kretprobes(struct kretprobe **rps, int num);
355void unregister_jprobes(struct jprobe **jps, int num);
356
357Removes each of the num probes in the specified array at once.
358
359NOTE:
360If the functions find some incorrect probes (ex. unregistered
361probes) in the specified array, they clear the addr field of those
362incorrect probes. However, other probes in the array are
363unregistered correctly.
364
3225. Kprobes Features and Limitations 3655. Kprobes Features and Limitations
323 366
324Kprobes allows multiple probes at the same address. Currently, 367Kprobes allows multiple probes at the same address. Currently,
diff --git a/Documentation/laptops/acer-wmi.txt b/Documentation/laptops/acer-wmi.txt
index 23df051dbf69..79b7dbd22141 100644
--- a/Documentation/laptops/acer-wmi.txt
+++ b/Documentation/laptops/acer-wmi.txt
@@ -80,7 +80,7 @@ once you enable the radio, will depend on your hardware and driver combination.
80e.g. With the BCM4318 on the Acer Aspire 5020 series: 80e.g. With the BCM4318 on the Acer Aspire 5020 series:
81 81
82ndiswrapper: Light blinks on when transmitting 82ndiswrapper: Light blinks on when transmitting
83bcm43xx/b43: Solid light, blinks off when transmitting 83b43: Solid light, blinks off when transmitting
84 84
85Wireless radio control is unconditionally enabled - all Acer laptops that support 85Wireless radio control is unconditionally enabled - all Acer laptops that support
86acer-wmi come with built-in wireless. However, should you feel so inclined to 86acer-wmi come with built-in wireless. However, should you feel so inclined to
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index 76cb428435da..64b3f146e4b0 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -1,7 +1,7 @@
1 ThinkPad ACPI Extras Driver 1 ThinkPad ACPI Extras Driver
2 2
3 Version 0.19 3 Version 0.20
4 January 06th, 2008 4 April 09th, 2008
5 5
6 Borislav Deianov <borislav@users.sf.net> 6 Borislav Deianov <borislav@users.sf.net>
7 Henrique de Moraes Holschuh <hmh@hmh.eng.br> 7 Henrique de Moraes Holschuh <hmh@hmh.eng.br>
@@ -18,6 +18,11 @@ This driver used to be named ibm-acpi until kernel 2.6.21 and release
18moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel 18moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel
192.6.22, and release 0.14. 192.6.22, and release 0.14.
20 20
21The driver is named "thinkpad-acpi". In some places, like module
22names, "thinkpad_acpi" is used because of userspace issues.
23
24"tpacpi" is used as a shorthand where "thinkpad-acpi" would be too
25long due to length limitations on some Linux kernel versions.
21 26
22Status 27Status
23------ 28------
@@ -498,7 +503,7 @@ generate input device EV_KEY events.
498In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW 503In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW
499events for switches: 504events for switches:
500 505
501SW_RADIO T60 and later hardare rfkill rocker switch 506SW_RFKILL_ALL T60 and later hardare rfkill rocker switch
502SW_TABLET_MODE Tablet ThinkPads HKEY events 0x5009 and 0x500A 507SW_TABLET_MODE Tablet ThinkPads HKEY events 0x5009 and 0x500A
503 508
504Non hot-key ACPI HKEY event map: 509Non hot-key ACPI HKEY event map:
@@ -571,6 +576,47 @@ netlink interface and the input layer interface, and don't bother at all
571with hotkey_report_mode. 576with hotkey_report_mode.
572 577
573 578
579Brightness hotkey notes:
580
581These are the current sane choices for brightness key mapping in
582thinkpad-acpi:
583
584For IBM and Lenovo models *without* ACPI backlight control (the ones on
585which thinkpad-acpi will autoload its backlight interface by default,
586and on which ACPI video does not export a backlight interface):
587
5881. Don't enable or map the brightness hotkeys in thinkpad-acpi, as
589 these older firmware versions unfortunately won't respect the hotkey
590 mask for brightness keys anyway, and always reacts to them. This
591 usually work fine, unless X.org drivers are doing something to block
592 the BIOS. In that case, use (3) below. This is the default mode of
593 operation.
594
5952. Enable the hotkeys, but map them to something else that is NOT
596 KEY_BRIGHTNESS_UP/DOWN or any other keycode that would cause
597 userspace to try to change the backlight level, and use that as an
598 on-screen-display hint.
599
6003. IF AND ONLY IF X.org drivers find a way to block the firmware from
601 automatically changing the brightness, enable the hotkeys and map
602 them to KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN, and feed that to
603 something that calls xbacklight. thinkpad-acpi will not be able to
604 change brightness in that case either, so you should disable its
605 backlight interface.
606
607For Lenovo models *with* ACPI backlight control:
608
6091. Load up ACPI video and use that. ACPI video will report ACPI
610 events for brightness change keys. Do not mess with thinkpad-acpi
611 defaults in this case. thinkpad-acpi should not have anything to do
612 with backlight events in a scenario where ACPI video is loaded:
613 brightness hotkeys must be disabled, and the backlight interface is
614 to be kept disabled as well. This is the default mode of operation.
615
6162. Do *NOT* load up ACPI video, enable the hotkeys in thinkpad-acpi,
617 and map them to KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN. Process
618 these keys on userspace somehow (e.g. by calling xbacklight).
619
574Bluetooth 620Bluetooth
575--------- 621---------
576 622
@@ -647,16 +693,31 @@ while others are still having problems. For more information:
647 693
648https://bugs.freedesktop.org/show_bug.cgi?id=2000 694https://bugs.freedesktop.org/show_bug.cgi?id=2000
649 695
650ThinkLight control -- /proc/acpi/ibm/light 696ThinkLight control
651------------------------------------------ 697------------------
698
699procfs: /proc/acpi/ibm/light
700sysfs attributes: as per LED class, for the "tpacpi::thinklight" LED
652 701
653The current status of the ThinkLight can be found in this file. A few 702procfs notes:
654models which do not make the status available will show it as 703
655"unknown". The available commands are: 704The ThinkLight status can be read and set through the procfs interface. A
705few models which do not make the status available will show the ThinkLight
706status as "unknown". The available commands are:
656 707
657 echo on > /proc/acpi/ibm/light 708 echo on > /proc/acpi/ibm/light
658 echo off > /proc/acpi/ibm/light 709 echo off > /proc/acpi/ibm/light
659 710
711sysfs notes:
712
713The ThinkLight sysfs interface is documented by the LED class
714documentation, in Documentation/leds-class.txt. The ThinkLight LED name
715is "tpacpi::thinklight".
716
717Due to limitations in the sysfs LED class, if the status of the thinklight
718cannot be read or if it is unknown, thinkpad-acpi will report it as "off".
719It is impossible to know if the status returned through sysfs is valid.
720
660Docking / undocking -- /proc/acpi/ibm/dock 721Docking / undocking -- /proc/acpi/ibm/dock
661------------------------------------------ 722------------------------------------------
662 723
@@ -815,28 +876,63 @@ The cmos command interface is prone to firmware split-brain problems, as
815in newer ThinkPads it is just a compatibility layer. Do not use it, it is 876in newer ThinkPads it is just a compatibility layer. Do not use it, it is
816exported just as a debug tool. 877exported just as a debug tool.
817 878
818LED control -- /proc/acpi/ibm/led 879LED control
819--------------------------------- 880-----------
881
882procfs: /proc/acpi/ibm/led
883sysfs attributes: as per LED class, see below for names
884
885Some of the LED indicators can be controlled through this feature. On
886some older ThinkPad models, it is possible to query the status of the
887LED indicators as well. Newer ThinkPads cannot query the real status
888of the LED indicators.
820 889
821Some of the LED indicators can be controlled through this feature. The 890procfs notes:
822available commands are: 891
892The available commands are:
823 893
824 echo '<led number> on' >/proc/acpi/ibm/led 894 echo '<LED number> on' >/proc/acpi/ibm/led
825 echo '<led number> off' >/proc/acpi/ibm/led 895 echo '<LED number> off' >/proc/acpi/ibm/led
826 echo '<led number> blink' >/proc/acpi/ibm/led 896 echo '<LED number> blink' >/proc/acpi/ibm/led
827 897
828The <led number> range is 0 to 7. The set of LEDs that can be 898The <LED number> range is 0 to 7. The set of LEDs that can be
829controlled varies from model to model. Here is the mapping on the X40: 899controlled varies from model to model. Here is the common ThinkPad
900mapping:
830 901
831 0 - power 902 0 - power
832 1 - battery (orange) 903 1 - battery (orange)
833 2 - battery (green) 904 2 - battery (green)
834 3 - UltraBase 905 3 - UltraBase/dock
835 4 - UltraBay 906 4 - UltraBay
907 5 - UltraBase battery slot
908 6 - (unknown)
836 7 - standby 909 7 - standby
837 910
838All of the above can be turned on and off and can be made to blink. 911All of the above can be turned on and off and can be made to blink.
839 912
913sysfs notes:
914
915The ThinkPad LED sysfs interface is described in detail by the LED class
916documentation, in Documentation/leds-class.txt.
917
918The leds are named (in LED ID order, from 0 to 7):
919"tpacpi::power", "tpacpi:orange:batt", "tpacpi:green:batt",
920"tpacpi::dock_active", "tpacpi::bay_active", "tpacpi::dock_batt",
921"tpacpi::unknown_led", "tpacpi::standby".
922
923Due to limitations in the sysfs LED class, if the status of the LED
924indicators cannot be read due to an error, thinkpad-acpi will report it as
925a brightness of zero (same as LED off).
926
927If the thinkpad firmware doesn't support reading the current status,
928trying to read the current LED brightness will just return whatever
929brightness was last written to that attribute.
930
931These LEDs can blink using hardware acceleration. To request that a
932ThinkPad indicator LED should blink in hardware accelerated mode, use the
933"timer" trigger, and leave the delay_on and delay_off parameters set to
934zero (to request hardware acceleration autodetection).
935
840ACPI sounds -- /proc/acpi/ibm/beep 936ACPI sounds -- /proc/acpi/ibm/beep
841---------------------------------- 937----------------------------------
842 938
@@ -1090,6 +1186,15 @@ it there will be the following attributes:
1090 dim the display. 1186 dim the display.
1091 1187
1092 1188
1189WARNING:
1190
1191 Whatever you do, do NOT ever call thinkpad-acpi backlight-level change
1192 interface and the ACPI-based backlight level change interface
1193 (available on newer BIOSes, and driven by the Linux ACPI video driver)
1194 at the same time. The two will interact in bad ways, do funny things,
1195 and maybe reduce the life of the backlight lamps by needlessly kicking
1196 its level up and down at every change.
1197
1093Volume control -- /proc/acpi/ibm/volume 1198Volume control -- /proc/acpi/ibm/volume
1094--------------------------------------- 1199---------------------------------------
1095 1200
diff --git a/Documentation/leds-class.txt b/Documentation/leds-class.txt
index 56757c751d6f..18860ad9935a 100644
--- a/Documentation/leds-class.txt
+++ b/Documentation/leds-class.txt
@@ -19,6 +19,12 @@ optimises away.
19 19
20Complex triggers whilst available to all LEDs have LED specific 20Complex triggers whilst available to all LEDs have LED specific
21parameters and work on a per LED basis. The timer trigger is an example. 21parameters and work on a per LED basis. The timer trigger is an example.
22The timer trigger will periodically change the LED brightness between
23LED_OFF and the current brightness setting. The "on" and "off" time can
24be specified via /sys/class/leds/<device>/delay_{on,off} in milliseconds.
25You can change the brightness value of a LED independently of the timer
26trigger. However, if you set the brightness value to LED_OFF it will
27also disable the timer trigger.
22 28
23You can change triggers in a similar manner to the way an IO scheduler 29You can change triggers in a similar manner to the way an IO scheduler
24is chosen (via /sys/class/leds/<device>/trigger). Trigger specific 30is chosen (via /sys/class/leds/<device>/trigger). Trigger specific
@@ -63,9 +69,9 @@ value if it is called with *delay_on==0 && *delay_off==0 parameters. In
63this case the driver should give back the chosen value through delay_on 69this case the driver should give back the chosen value through delay_on
64and delay_off parameters to the leds subsystem. 70and delay_off parameters to the leds subsystem.
65 71
66Any call to the brightness_set() callback function should cancel the 72Setting the brightness to zero with brightness_set() callback function
67previously programmed hardware blinking function so setting the brightness 73should completely turn off the LED and cancel the previously programmed
68to 0 can also cancel the blinking of the LED. 74hardware blinking function, if any.
69 75
70 76
71Known Issues 77Known Issues
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index bec5a32e4095..82fafe0429fe 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -1,7 +1,7 @@
1/*P:100 This is the Launcher code, a simple program which lays out the 1/*P:100 This is the Launcher code, a simple program which lays out the
2 * "physical" memory for the new Guest by mapping the kernel image and the 2 * "physical" memory for the new Guest by mapping the kernel image and
3 * virtual devices, then reads repeatedly from /dev/lguest to run the Guest. 3 * the virtual devices, then opens /dev/lguest to tell the kernel
4:*/ 4 * about the Guest and control it. :*/
5#define _LARGEFILE64_SOURCE 5#define _LARGEFILE64_SOURCE
6#define _GNU_SOURCE 6#define _GNU_SOURCE
7#include <stdio.h> 7#include <stdio.h>
@@ -43,7 +43,7 @@
43#include "linux/virtio_console.h" 43#include "linux/virtio_console.h"
44#include "linux/virtio_ring.h" 44#include "linux/virtio_ring.h"
45#include "asm-x86/bootparam.h" 45#include "asm-x86/bootparam.h"
46/*L:110 We can ignore the 38 include files we need for this program, but I do 46/*L:110 We can ignore the 39 include files we need for this program, but I do
47 * want to draw attention to the use of kernel-style types. 47 * want to draw attention to the use of kernel-style types.
48 * 48 *
49 * As Linus said, "C is a Spartan language, and so should your naming be." I 49 * As Linus said, "C is a Spartan language, and so should your naming be." I
@@ -131,6 +131,9 @@ struct device
131 /* Any queues attached to this device */ 131 /* Any queues attached to this device */
132 struct virtqueue *vq; 132 struct virtqueue *vq;
133 133
134 /* Handle status being finalized (ie. feature bits stable). */
135 void (*ready)(struct device *me);
136
134 /* Device-specific data. */ 137 /* Device-specific data. */
135 void *priv; 138 void *priv;
136}; 139};
@@ -154,6 +157,9 @@ struct virtqueue
154 157
155 /* The routine to call when the Guest pings us. */ 158 /* The routine to call when the Guest pings us. */
156 void (*handle_output)(int fd, struct virtqueue *me); 159 void (*handle_output)(int fd, struct virtqueue *me);
160
161 /* Outstanding buffers */
162 unsigned int inflight;
157}; 163};
158 164
159/* Remember the arguments to the program so we can "reboot" */ 165/* Remember the arguments to the program so we can "reboot" */
@@ -320,7 +326,7 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
320 err(1, "Reading program headers"); 326 err(1, "Reading program headers");
321 327
322 /* Try all the headers: there are usually only three. A read-only one, 328 /* Try all the headers: there are usually only three. A read-only one,
323 * a read-write one, and a "note" section which isn't loadable. */ 329 * a read-write one, and a "note" section which we don't load. */
324 for (i = 0; i < ehdr->e_phnum; i++) { 330 for (i = 0; i < ehdr->e_phnum; i++) {
325 /* If this isn't a loadable segment, we ignore it */ 331 /* If this isn't a loadable segment, we ignore it */
326 if (phdr[i].p_type != PT_LOAD) 332 if (phdr[i].p_type != PT_LOAD)
@@ -387,7 +393,7 @@ static unsigned long load_kernel(int fd)
387 if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) 393 if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0)
388 return map_elf(fd, &hdr); 394 return map_elf(fd, &hdr);
389 395
390 /* Otherwise we assume it's a bzImage, and try to unpack it */ 396 /* Otherwise we assume it's a bzImage, and try to load it. */
391 return load_bzimage(fd); 397 return load_bzimage(fd);
392} 398}
393 399
@@ -433,12 +439,12 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
433 return len; 439 return len;
434} 440}
435 441
436/* Once we know how much memory we have, we can construct simple linear page 442/* Once we know how much memory we have we can construct simple linear page
437 * tables which set virtual == physical which will get the Guest far enough 443 * tables which set virtual == physical which will get the Guest far enough
438 * into the boot to create its own. 444 * into the boot to create its own.
439 * 445 *
440 * We lay them out of the way, just below the initrd (which is why we need to 446 * We lay them out of the way, just below the initrd (which is why we need to
441 * know its size). */ 447 * know its size here). */
442static unsigned long setup_pagetables(unsigned long mem, 448static unsigned long setup_pagetables(unsigned long mem,
443 unsigned long initrd_size) 449 unsigned long initrd_size)
444{ 450{
@@ -699,6 +705,7 @@ static unsigned get_vq_desc(struct virtqueue *vq,
699 errx(1, "Looped descriptor"); 705 errx(1, "Looped descriptor");
700 } while ((i = next_desc(vq, i)) != vq->vring.num); 706 } while ((i = next_desc(vq, i)) != vq->vring.num);
701 707
708 vq->inflight++;
702 return head; 709 return head;
703} 710}
704 711
@@ -716,6 +723,7 @@ static void add_used(struct virtqueue *vq, unsigned int head, int len)
716 /* Make sure buffer is written before we update index. */ 723 /* Make sure buffer is written before we update index. */
717 wmb(); 724 wmb();
718 vq->vring.used->idx++; 725 vq->vring.used->idx++;
726 vq->inflight--;
719} 727}
720 728
721/* This actually sends the interrupt for this virtqueue */ 729/* This actually sends the interrupt for this virtqueue */
@@ -723,8 +731,9 @@ static void trigger_irq(int fd, struct virtqueue *vq)
723{ 731{
724 unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; 732 unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
725 733
726 /* If they don't want an interrupt, don't send one. */ 734 /* If they don't want an interrupt, don't send one, unless empty. */
727 if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) 735 if ((vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
736 && vq->inflight)
728 return; 737 return;
729 738
730 /* Send the Guest an interrupt tell them we used something up. */ 739 /* Send the Guest an interrupt tell them we used something up. */
@@ -850,7 +859,8 @@ static void handle_console_output(int fd, struct virtqueue *vq)
850 * 859 *
851 * Handling output for network is also simple: we get all the output buffers 860 * Handling output for network is also simple: we get all the output buffers
852 * and write them (ignoring the first element) to this device's file descriptor 861 * and write them (ignoring the first element) to this device's file descriptor
853 * (stdout). */ 862 * (/dev/net/tun).
863 */
854static void handle_net_output(int fd, struct virtqueue *vq) 864static void handle_net_output(int fd, struct virtqueue *vq)
855{ 865{
856 unsigned int head, out, in; 866 unsigned int head, out, in;
@@ -924,24 +934,40 @@ static void enable_fd(int fd, struct virtqueue *vq)
924 write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); 934 write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd));
925} 935}
926 936
927/* Resetting a device is fairly easy. */ 937/* When the Guest tells us they updated the status field, we handle it. */
928static void reset_device(struct device *dev) 938static void update_device_status(struct device *dev)
929{ 939{
930 struct virtqueue *vq; 940 struct virtqueue *vq;
931 941
932 verbose("Resetting device %s\n", dev->name); 942 /* This is a reset. */
933 /* Clear the status. */ 943 if (dev->desc->status == 0) {
934 dev->desc->status = 0; 944 verbose("Resetting device %s\n", dev->name);
935 945
936 /* Clear any features they've acked. */ 946 /* Clear any features they've acked. */
937 memset(get_feature_bits(dev) + dev->desc->feature_len, 0, 947 memset(get_feature_bits(dev) + dev->desc->feature_len, 0,
938 dev->desc->feature_len); 948 dev->desc->feature_len);
939 949
940 /* Zero out the virtqueues. */ 950 /* Zero out the virtqueues. */
941 for (vq = dev->vq; vq; vq = vq->next) { 951 for (vq = dev->vq; vq; vq = vq->next) {
942 memset(vq->vring.desc, 0, 952 memset(vq->vring.desc, 0,
943 vring_size(vq->config.num, getpagesize())); 953 vring_size(vq->config.num, getpagesize()));
944 vq->last_avail_idx = 0; 954 vq->last_avail_idx = 0;
955 }
956 } else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
957 warnx("Device %s configuration FAILED", dev->name);
958 } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
959 unsigned int i;
960
961 verbose("Device %s OK: offered", dev->name);
962 for (i = 0; i < dev->desc->feature_len; i++)
963 verbose(" %08x", get_feature_bits(dev)[i]);
964 verbose(", accepted");
965 for (i = 0; i < dev->desc->feature_len; i++)
966 verbose(" %08x", get_feature_bits(dev)
967 [dev->desc->feature_len+i]);
968
969 if (dev->ready)
970 dev->ready(dev);
945 } 971 }
946} 972}
947 973
@@ -953,9 +979,9 @@ static void handle_output(int fd, unsigned long addr)
953 979
954 /* Check each device and virtqueue. */ 980 /* Check each device and virtqueue. */
955 for (i = devices.dev; i; i = i->next) { 981 for (i = devices.dev; i; i = i->next) {
956 /* Notifications to device descriptors reset the device. */ 982 /* Notifications to device descriptors update device status. */
957 if (from_guest_phys(addr) == i->desc) { 983 if (from_guest_phys(addr) == i->desc) {
958 reset_device(i); 984 update_device_status(i);
959 return; 985 return;
960 } 986 }
961 987
@@ -1003,8 +1029,8 @@ static void handle_input(int fd)
1003 if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0) 1029 if (select(devices.max_infd+1, &fds, NULL, NULL, &poll) == 0)
1004 break; 1030 break;
1005 1031
1006 /* Otherwise, call the device(s) which have readable 1032 /* Otherwise, call the device(s) which have readable file
1007 * file descriptors and a method of handling them. */ 1033 * descriptors and a method of handling them. */
1008 for (i = devices.dev; i; i = i->next) { 1034 for (i = devices.dev; i; i = i->next) {
1009 if (i->handle_input && FD_ISSET(i->fd, &fds)) { 1035 if (i->handle_input && FD_ISSET(i->fd, &fds)) {
1010 int dev_fd; 1036 int dev_fd;
@@ -1015,8 +1041,7 @@ static void handle_input(int fd)
1015 * should no longer service it. Networking and 1041 * should no longer service it. Networking and
1016 * console do this when there's no input 1042 * console do this when there's no input
1017 * buffers to deliver into. Console also uses 1043 * buffers to deliver into. Console also uses
1018 * it when it discovers that stdin is 1044 * it when it discovers that stdin is closed. */
1019 * closed. */
1020 FD_CLR(i->fd, &devices.infds); 1045 FD_CLR(i->fd, &devices.infds);
1021 /* Tell waker to ignore it too, by sending a 1046 /* Tell waker to ignore it too, by sending a
1022 * negative fd number (-1, since 0 is a valid 1047 * negative fd number (-1, since 0 is a valid
@@ -1033,7 +1058,8 @@ static void handle_input(int fd)
1033 * 1058 *
1034 * All devices need a descriptor so the Guest knows it exists, and a "struct 1059 * All devices need a descriptor so the Guest knows it exists, and a "struct
1035 * device" so the Launcher can keep track of it. We have common helper 1060 * device" so the Launcher can keep track of it. We have common helper
1036 * routines to allocate and manage them. */ 1061 * routines to allocate and manage them.
1062 */
1037 1063
1038/* The layout of the device page is a "struct lguest_device_desc" followed by a 1064/* The layout of the device page is a "struct lguest_device_desc" followed by a
1039 * number of virtqueue descriptors, then two sets of feature bits, then an 1065 * number of virtqueue descriptors, then two sets of feature bits, then an
@@ -1078,7 +1104,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1078 struct virtqueue **i, *vq = malloc(sizeof(*vq)); 1104 struct virtqueue **i, *vq = malloc(sizeof(*vq));
1079 void *p; 1105 void *p;
1080 1106
1081 /* First we need some pages for this virtqueue. */ 1107 /* First we need some memory for this virtqueue. */
1082 pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1) 1108 pages = (vring_size(num_descs, getpagesize()) + getpagesize() - 1)
1083 / getpagesize(); 1109 / getpagesize();
1084 p = get_pages(pages); 1110 p = get_pages(pages);
@@ -1087,6 +1113,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1087 vq->next = NULL; 1113 vq->next = NULL;
1088 vq->last_avail_idx = 0; 1114 vq->last_avail_idx = 0;
1089 vq->dev = dev; 1115 vq->dev = dev;
1116 vq->inflight = 0;
1090 1117
1091 /* Initialize the configuration. */ 1118 /* Initialize the configuration. */
1092 vq->config.num = num_descs; 1119 vq->config.num = num_descs;
@@ -1122,7 +1149,7 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
1122} 1149}
1123 1150
1124/* The first half of the feature bitmask is for us to advertise features. The 1151/* The first half of the feature bitmask is for us to advertise features. The
1125 * second half if for the Guest to accept features. */ 1152 * second half is for the Guest to accept features. */
1126static void add_feature(struct device *dev, unsigned bit) 1153static void add_feature(struct device *dev, unsigned bit)
1127{ 1154{
1128 u8 *features = get_feature_bits(dev); 1155 u8 *features = get_feature_bits(dev);
@@ -1151,7 +1178,9 @@ static void set_config(struct device *dev, unsigned len, const void *conf)
1151} 1178}
1152 1179
1153/* This routine does all the creation and setup of a new device, including 1180/* This routine does all the creation and setup of a new device, including
1154 * calling new_dev_desc() to allocate the descriptor and device memory. */ 1181 * calling new_dev_desc() to allocate the descriptor and device memory.
1182 *
1183 * See what I mean about userspace being boring? */
1155static struct device *new_device(const char *name, u16 type, int fd, 1184static struct device *new_device(const char *name, u16 type, int fd,
1156 bool (*handle_input)(int, struct device *)) 1185 bool (*handle_input)(int, struct device *))
1157{ 1186{
@@ -1167,6 +1196,7 @@ static struct device *new_device(const char *name, u16 type, int fd,
1167 dev->handle_input = handle_input; 1196 dev->handle_input = handle_input;
1168 dev->name = name; 1197 dev->name = name;
1169 dev->vq = NULL; 1198 dev->vq = NULL;
1199 dev->ready = NULL;
1170 1200
1171 /* Append to device list. Prepending to a single-linked list is 1201 /* Append to device list. Prepending to a single-linked list is
1172 * easier, but the user expects the devices to be arranged on the bus 1202 * easier, but the user expects the devices to be arranged on the bus
@@ -1345,6 +1375,7 @@ static void setup_tun_net(const char *arg)
1345 1375
1346 /* Tell Guest what MAC address to use. */ 1376 /* Tell Guest what MAC address to use. */
1347 add_feature(dev, VIRTIO_NET_F_MAC); 1377 add_feature(dev, VIRTIO_NET_F_MAC);
1378 add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
1348 set_config(dev, sizeof(conf), &conf); 1379 set_config(dev, sizeof(conf), &conf);
1349 1380
1350 /* We don't need the socket any more; setup is done. */ 1381 /* We don't need the socket any more; setup is done. */
@@ -1383,7 +1414,6 @@ struct vblk_info
1383 * Launcher triggers interrupt to Guest. */ 1414 * Launcher triggers interrupt to Guest. */
1384 int done_fd; 1415 int done_fd;
1385}; 1416};
1386/*:*/
1387 1417
1388/*L:210 1418/*L:210
1389 * The Disk 1419 * The Disk
@@ -1396,7 +1426,7 @@ static bool service_io(struct device *dev)
1396 struct vblk_info *vblk = dev->priv; 1426 struct vblk_info *vblk = dev->priv;
1397 unsigned int head, out_num, in_num, wlen; 1427 unsigned int head, out_num, in_num, wlen;
1398 int ret; 1428 int ret;
1399 struct virtio_blk_inhdr *in; 1429 u8 *in;
1400 struct virtio_blk_outhdr *out; 1430 struct virtio_blk_outhdr *out;
1401 struct iovec iov[dev->vq->vring.num]; 1431 struct iovec iov[dev->vq->vring.num];
1402 off64_t off; 1432 off64_t off;
@@ -1414,7 +1444,7 @@ static bool service_io(struct device *dev)
1414 head, out_num, in_num); 1444 head, out_num, in_num);
1415 1445
1416 out = convert(&iov[0], struct virtio_blk_outhdr); 1446 out = convert(&iov[0], struct virtio_blk_outhdr);
1417 in = convert(&iov[out_num+in_num-1], struct virtio_blk_inhdr); 1447 in = convert(&iov[out_num+in_num-1], u8);
1418 off = out->sector * 512; 1448 off = out->sector * 512;
1419 1449
1420 /* The block device implements "barriers", where the Guest indicates 1450 /* The block device implements "barriers", where the Guest indicates
@@ -1428,7 +1458,7 @@ static bool service_io(struct device *dev)
1428 * It'd be nice if we supported eject, for example, but we don't. */ 1458 * It'd be nice if we supported eject, for example, but we don't. */
1429 if (out->type & VIRTIO_BLK_T_SCSI_CMD) { 1459 if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
1430 fprintf(stderr, "Scsi commands unsupported\n"); 1460 fprintf(stderr, "Scsi commands unsupported\n");
1431 in->status = VIRTIO_BLK_S_UNSUPP; 1461 *in = VIRTIO_BLK_S_UNSUPP;
1432 wlen = sizeof(*in); 1462 wlen = sizeof(*in);
1433 } else if (out->type & VIRTIO_BLK_T_OUT) { 1463 } else if (out->type & VIRTIO_BLK_T_OUT) {
1434 /* Write */ 1464 /* Write */
@@ -1451,7 +1481,7 @@ static bool service_io(struct device *dev)
1451 errx(1, "Write past end %llu+%u", off, ret); 1481 errx(1, "Write past end %llu+%u", off, ret);
1452 } 1482 }
1453 wlen = sizeof(*in); 1483 wlen = sizeof(*in);
1454 in->status = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); 1484 *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
1455 } else { 1485 } else {
1456 /* Read */ 1486 /* Read */
1457 1487
@@ -1464,10 +1494,10 @@ static bool service_io(struct device *dev)
1464 verbose("READ from sector %llu: %i\n", out->sector, ret); 1494 verbose("READ from sector %llu: %i\n", out->sector, ret);
1465 if (ret >= 0) { 1495 if (ret >= 0) {
1466 wlen = sizeof(*in) + ret; 1496 wlen = sizeof(*in) + ret;
1467 in->status = VIRTIO_BLK_S_OK; 1497 *in = VIRTIO_BLK_S_OK;
1468 } else { 1498 } else {
1469 wlen = sizeof(*in); 1499 wlen = sizeof(*in);
1470 in->status = VIRTIO_BLK_S_IOERR; 1500 *in = VIRTIO_BLK_S_IOERR;
1471 } 1501 }
1472 } 1502 }
1473 1503
@@ -1493,7 +1523,10 @@ static int io_thread(void *_dev)
1493 while (read(vblk->workpipe[0], &c, 1) == 1) { 1523 while (read(vblk->workpipe[0], &c, 1) == 1) {
1494 /* We acknowledge each request immediately to reduce latency, 1524 /* We acknowledge each request immediately to reduce latency,
1495 * rather than waiting until we've done them all. I haven't 1525 * rather than waiting until we've done them all. I haven't
1496 * measured to see if it makes any difference. */ 1526 * measured to see if it makes any difference.
1527 *
1528 * That would be an interesting test, wouldn't it? You could
1529 * also try having more than one I/O thread. */
1497 while (service_io(dev)) 1530 while (service_io(dev))
1498 write(vblk->done_fd, &c, 1); 1531 write(vblk->done_fd, &c, 1);
1499 } 1532 }
@@ -1501,7 +1534,7 @@ static int io_thread(void *_dev)
1501} 1534}
1502 1535
1503/* Now we've seen the I/O thread, we return to the Launcher to see what happens 1536/* Now we've seen the I/O thread, we return to the Launcher to see what happens
1504 * when the thread tells us it's completed some I/O. */ 1537 * when that thread tells us it's completed some I/O. */
1505static bool handle_io_finish(int fd, struct device *dev) 1538static bool handle_io_finish(int fd, struct device *dev)
1506{ 1539{
1507 char c; 1540 char c;
@@ -1573,11 +1606,12 @@ static void setup_block_file(const char *filename)
1573 * more work. */ 1606 * more work. */
1574 pipe(vblk->workpipe); 1607 pipe(vblk->workpipe);
1575 1608
1576 /* Create stack for thread and run it */ 1609 /* Create stack for thread and run it. Since stack grows upwards, we
1610 * point the stack pointer to the end of this region. */
1577 stack = malloc(32768); 1611 stack = malloc(32768);
1578 /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from 1612 /* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from
1579 * becoming a zombie. */ 1613 * becoming a zombie. */
1580 if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1) 1614 if (clone(io_thread, stack + 32768, CLONE_VM | SIGCHLD, dev) == -1)
1581 err(1, "Creating clone"); 1615 err(1, "Creating clone");
1582 1616
1583 /* We don't need to keep the I/O thread's end of the pipes open. */ 1617 /* We don't need to keep the I/O thread's end of the pipes open. */
@@ -1587,14 +1621,14 @@ static void setup_block_file(const char *filename)
1587 verbose("device %u: virtblock %llu sectors\n", 1621 verbose("device %u: virtblock %llu sectors\n",
1588 devices.device_num, le64_to_cpu(conf.capacity)); 1622 devices.device_num, le64_to_cpu(conf.capacity));
1589} 1623}
1590/* That's the end of device setup. :*/ 1624/* That's the end of device setup. */
1591 1625
1592/* Reboot */ 1626/*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */
1593static void __attribute__((noreturn)) restart_guest(void) 1627static void __attribute__((noreturn)) restart_guest(void)
1594{ 1628{
1595 unsigned int i; 1629 unsigned int i;
1596 1630
1597 /* Closing pipes causes the waker thread and io_threads to die, and 1631 /* Closing pipes causes the Waker thread and io_threads to die, and
1598 * closing /dev/lguest cleans up the Guest. Since we don't track all 1632 * closing /dev/lguest cleans up the Guest. Since we don't track all
1599 * open fds, we simply close everything beyond stderr. */ 1633 * open fds, we simply close everything beyond stderr. */
1600 for (i = 3; i < FD_SETSIZE; i++) 1634 for (i = 3; i < FD_SETSIZE; i++)
@@ -1603,7 +1637,7 @@ static void __attribute__((noreturn)) restart_guest(void)
1603 err(1, "Could not exec %s", main_args[0]); 1637 err(1, "Could not exec %s", main_args[0]);
1604} 1638}
1605 1639
1606/*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves 1640/*L:220 Finally we reach the core of the Launcher which runs the Guest, serves
1607 * its input and output, and finally, lays it to rest. */ 1641 * its input and output, and finally, lays it to rest. */
1608static void __attribute__((noreturn)) run_guest(int lguest_fd) 1642static void __attribute__((noreturn)) run_guest(int lguest_fd)
1609{ 1643{
@@ -1644,7 +1678,7 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
1644 err(1, "Resetting break"); 1678 err(1, "Resetting break");
1645 } 1679 }
1646} 1680}
1647/* 1681/*L:240
1648 * This is the end of the Launcher. The good news: we are over halfway 1682 * This is the end of the Launcher. The good news: we are over halfway
1649 * through! The bad news: the most fiendish part of the code still lies ahead 1683 * through! The bad news: the most fiendish part of the code still lies ahead
1650 * of us. 1684 * of us.
@@ -1691,8 +1725,8 @@ int main(int argc, char *argv[])
1691 * device receive input from a file descriptor, we keep an fdset 1725 * device receive input from a file descriptor, we keep an fdset
1692 * (infds) and the maximum fd number (max_infd) with the head of the 1726 * (infds) and the maximum fd number (max_infd) with the head of the
1693 * list. We also keep a pointer to the last device. Finally, we keep 1727 * list. We also keep a pointer to the last device. Finally, we keep
1694 * the next interrupt number to hand out (1: remember that 0 is used by 1728 * the next interrupt number to use for devices (1: remember that 0 is
1695 * the timer). */ 1729 * used by the timer). */
1696 FD_ZERO(&devices.infds); 1730 FD_ZERO(&devices.infds);
1697 devices.max_infd = -1; 1731 devices.max_infd = -1;
1698 devices.lastdev = NULL; 1732 devices.lastdev = NULL;
@@ -1793,8 +1827,8 @@ int main(int argc, char *argv[])
1793 lguest_fd = tell_kernel(pgdir, start); 1827 lguest_fd = tell_kernel(pgdir, start);
1794 1828
1795 /* We fork off a child process, which wakes the Launcher whenever one 1829 /* We fork off a child process, which wakes the Launcher whenever one
1796 * of the input file descriptors needs attention. Otherwise we would 1830 * of the input file descriptors needs attention. We call this the
1797 * run the Guest until it tries to output something. */ 1831 * Waker, and we'll cover it in a moment. */
1798 waker_fd = setup_waker(lguest_fd); 1832 waker_fd = setup_waker(lguest_fd);
1799 1833
1800 /* Finally, run the Guest. This doesn't return. */ 1834 /* Finally, run the Guest. This doesn't return. */
diff --git a/Documentation/lguest/lguest.txt b/Documentation/lguest/lguest.txt
index 722d4e7fbebe..29510dc51510 100644
--- a/Documentation/lguest/lguest.txt
+++ b/Documentation/lguest/lguest.txt
@@ -1,6 +1,7 @@
1Rusty's Remarkably Unreliable Guide to Lguest 1 __
2 - or, A Young Coder's Illustrated Hypervisor 2 (___()'`; Rusty's Remarkably Unreliable Guide to Lguest
3http://lguest.ozlabs.org 3 /, /` - or, A Young Coder's Illustrated Hypervisor
4 \\"--\\ http://lguest.ozlabs.org
4 5
5Lguest is designed to be a minimal hypervisor for the Linux kernel, for 6Lguest is designed to be a minimal hypervisor for the Linux kernel, for
6Linux developers and users to experiment with virtualization with the 7Linux developers and users to experiment with virtualization with the
@@ -41,12 +42,16 @@ Running Lguest:
41 CONFIG_PHYSICAL_ALIGN=0x100000) 42 CONFIG_PHYSICAL_ALIGN=0x100000)
42 43
43 "Device Drivers": 44 "Device Drivers":
45 "Block devices"
46 "Virtio block driver (EXPERIMENTAL)" = M/Y
44 "Network device support" 47 "Network device support"
45 "Universal TUN/TAP device driver support" = M/Y 48 "Universal TUN/TAP device driver support" = M/Y
46 (CONFIG_TUN=m) 49 "Virtio network driver (EXPERIMENTAL)" = M/Y
47 "Virtualization" 50 (CONFIG_VIRTIO_BLK=m, CONFIG_VIRTIO_NET=m and CONFIG_TUN=m)
48 "Linux hypervisor example code" = M/Y 51
49 (CONFIG_LGUEST=m) 52 "Virtualization"
53 "Linux hypervisor example code" = M/Y
54 (CONFIG_LGUEST=m)
50 55
51- A tool called "lguest" is available in this directory: type "make" 56- A tool called "lguest" is available in this directory: type "make"
52 to build it. If you didn't build your kernel in-tree, use "make 57 to build it. If you didn't build your kernel in-tree, use "make
diff --git a/Documentation/magic-number.txt b/Documentation/magic-number.txt
index bd450e797558..95070028d15e 100644
--- a/Documentation/magic-number.txt
+++ b/Documentation/magic-number.txt
@@ -95,7 +95,6 @@ RFCOMM_TTY_MAGIC 0x6d02 net/bluetooth/rfcomm/tty.c
95USB_SERIAL_PORT_MAGIC 0x7301 usb_serial_port drivers/usb/serial/usb-serial.h 95USB_SERIAL_PORT_MAGIC 0x7301 usb_serial_port drivers/usb/serial/usb-serial.h
96CG_MAGIC 0x00090255 ufs_cylinder_group include/linux/ufs_fs.h 96CG_MAGIC 0x00090255 ufs_cylinder_group include/linux/ufs_fs.h
97A2232_MAGIC 0x000a2232 gs_port drivers/char/ser_a2232.h 97A2232_MAGIC 0x000a2232 gs_port drivers/char/ser_a2232.h
98SOLARIS_SOCKET_MAGIC 0x000ADDED sol_socket_struct arch/sparc64/solaris/socksys.h
99RPORT_MAGIC 0x00525001 r_port drivers/char/rocket_int.h 98RPORT_MAGIC 0x00525001 r_port drivers/char/rocket_int.h
100LSEMAGIC 0x05091998 lse drivers/fc4/fc.c 99LSEMAGIC 0x05091998 lse drivers/fc4/fc.c
101GDTIOCTL_MAGIC 0x06030f07 gdth_iowr_str drivers/scsi/gdth_ioctl.h 100GDTIOCTL_MAGIC 0x06030f07 gdth_iowr_str drivers/scsi/gdth_ioctl.h
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 396cdd982c26..a8b430627473 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -450,3 +450,9 @@ These currently include
450 there are upper and lower limits (32768, 16). Default is 128. 450 there are upper and lower limits (32768, 16). Default is 128.
451 strip_cache_active (currently raid5 only) 451 strip_cache_active (currently raid5 only)
452 number of active entries in the stripe cache 452 number of active entries in the stripe cache
453 preread_bypass_threshold (currently raid5 only)
454 number of times a stripe requiring preread will be bypassed by
455 a stripe that does not require preread. For fairness defaults
456 to 1. Setting this to 0 disables bypass accounting and
457 requires preread stripes to wait until all full-width stripe-
458 writes are complete. Valid values are 0 to stripe_cache_size.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 1f506f7830ec..f5b7127f54ac 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -430,8 +430,8 @@ There are certain things that the Linux kernel memory barriers do not guarantee:
430 430
431 [*] For information on bus mastering DMA and coherency please read: 431 [*] For information on bus mastering DMA and coherency please read:
432 432
433 Documentation/pci.txt 433 Documentation/PCI/pci.txt
434 Documentation/DMA-mapping.txt 434 Documentation/PCI/PCI-DMA-mapping.txt
435 Documentation/DMA-API.txt 435 Documentation/DMA-API.txt
436 436
437 437
@@ -994,7 +994,17 @@ The Linux kernel has eight basic CPU memory barriers:
994 DATA DEPENDENCY read_barrier_depends() smp_read_barrier_depends() 994 DATA DEPENDENCY read_barrier_depends() smp_read_barrier_depends()
995 995
996 996
997All CPU memory barriers unconditionally imply compiler barriers. 997All memory barriers except the data dependency barriers imply a compiler
998barrier. Data dependencies do not impose any additional compiler ordering.
999
1000Aside: In the case of data dependencies, the compiler would be expected to
1001issue the loads in the correct order (eg. `a[b]` would have to load the value
1002of b before loading a[b]), however there is no guarantee in the C specification
1003that the compiler may not speculate the value of b (eg. is equal to 1) and load
1004a before b (eg. tmp = a[1]; if (b != 1) tmp = a[b]; ). There is also the
1005problem of a compiler reloading b after having loaded a[b], thus having a newer
1006copy of b than a[b]. A consensus has not yet been reached about these problems,
1007however the ACCESS_ONCE macro is a good place to start looking.
998 1008
999SMP memory barriers are reduced to compiler barriers on uniprocessor compiled 1009SMP memory barriers are reduced to compiler barriers on uniprocessor compiled
1000systems because it is assumed that a CPU will appear to be self-consistent, 1010systems because it is assumed that a CPU will appear to be self-consistent,
diff --git a/Documentation/mips/AU1xxx_IDE.README b/Documentation/mips/AU1xxx_IDE.README
index 5c8334123f4f..25a6ed1aaa5b 100644
--- a/Documentation/mips/AU1xxx_IDE.README
+++ b/Documentation/mips/AU1xxx_IDE.README
@@ -46,8 +46,6 @@ Two files are introduced:
46 46
47 a) 'include/asm-mips/mach-au1x00/au1xxx_ide.h' 47 a) 'include/asm-mips/mach-au1x00/au1xxx_ide.h'
48 containes : struct _auide_hwif 48 containes : struct _auide_hwif
49 struct drive_list_entry dma_white_list
50 struct drive_list_entry dma_black_list
51 timing parameters for PIO mode 0/1/2/3/4 49 timing parameters for PIO mode 0/1/2/3/4
52 timing parameters for MWDMA 0/1/2 50 timing parameters for MWDMA 0/1/2
53 51
@@ -63,12 +61,6 @@ Four configs variables are introduced:
63 CONFIG_BLK_DEV_IDE_AU1XXX_SEQTS_PER_RQ - maximum transfer size 61 CONFIG_BLK_DEV_IDE_AU1XXX_SEQTS_PER_RQ - maximum transfer size
64 per descriptor 62 per descriptor
65 63
66If MWDMA is enabled and the connected hard disc is not on the white list, the
67kernel switches to a "safe mwdma mode" at boot time. In this mode the IDE
68performance is substantial slower then in full speed mwdma. In this case
69please add your hard disc to the white list (follow instruction from 'ADD NEW
70HARD DISC TO WHITE OR BLACK LIST' section).
71
72 64
73SUPPORTED IDE MODES 65SUPPORTED IDE MODES
74------------------- 66-------------------
@@ -120,44 +112,6 @@ CONFIG_IDEDMA_AUTO=y
120Also undefine 'IDE_AU1XXX_BURSTMODE' in 'drivers/ide/mips/au1xxx-ide.c' to 112Also undefine 'IDE_AU1XXX_BURSTMODE' in 'drivers/ide/mips/au1xxx-ide.c' to
121disable the burst support on DBDMA controller. 113disable the burst support on DBDMA controller.
122 114
123ADD NEW HARD DISC TO WHITE OR BLACK LIST
124----------------------------------------
125
126Step 1 : detect the model name of your hard disc
127
128 a) connect your hard disc to the AU1XXX
129
130 b) boot your kernel and get the hard disc model.
131
132 Example boot log:
133
134 --snipped--
135 Uniform Multi-Platform E-IDE driver Revision: 7.00alpha2
136 ide: Assuming 50MHz system bus speed for PIO modes; override with idebus=xx
137 Au1xxx IDE(builtin) configured for MWDMA2
138 Probing IDE interface ide0...
139 hda: Maxtor 6E040L0, ATA DISK drive
140 ide0 at 0xac800000-0xac800007,0xac8001c0 on irq 64
141 hda: max request size: 64KiB
142 hda: 80293248 sectors (41110 MB) w/2048KiB Cache, CHS=65535/16/63, (U)DMA
143 --snipped--
144
145 In this example 'Maxtor 6E040L0'.
146
147Step 2 : edit 'include/asm-mips/mach-au1x00/au1xxx_ide.h'
148
149 Add your hard disc to the dma_white_list or dma_black_list structur.
150
151Step 3 : Recompile the kernel
152
153 Enable MWDMA support in the kernel configuration. Recompile the kernel and
154 reboot.
155
156Step 4 : Tests
157
158 If you have add a hard disc to the white list, please run some stress tests
159 for verification.
160
161 115
162ACKNOWLEDGMENTS 116ACKNOWLEDGMENTS
163--------------- 117---------------
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index 02e56d447a8f..1634c6dcecae 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -84,9 +84,6 @@ policy-routing.txt
84 - IP policy-based routing 84 - IP policy-based routing
85ray_cs.txt 85ray_cs.txt
86 - Raylink Wireless LAN card driver info. 86 - Raylink Wireless LAN card driver info.
87sk98lin.txt
88 - Marvell Yukon Chipset / SysKonnect SK-98xx compliant Gigabit
89 Ethernet Adapter family driver info
90skfp.txt 87skfp.txt
91 - SysKonnect FDDI (SK-5xxx, Compaq Netelligent) driver info. 88 - SysKonnect FDDI (SK-5xxx, Compaq Netelligent) driver info.
92smc9.txt 89smc9.txt
@@ -103,8 +100,6 @@ tuntap.txt
103 - TUN/TAP device driver, allowing user space Rx/Tx of packets. 100 - TUN/TAP device driver, allowing user space Rx/Tx of packets.
104vortex.txt 101vortex.txt
105 - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards. 102 - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards.
106wan-router.txt
107 - WAN router documentation
108wavelan.txt 103wavelan.txt
109 - AT&T GIS (nee NCR) WaveLAN card: An Ethernet-like radio transceiver 104 - AT&T GIS (nee NCR) WaveLAN card: An Ethernet-like radio transceiver
110x25.txt 105x25.txt
diff --git a/Documentation/networking/arcnet.txt b/Documentation/networking/arcnet.txt
index 770fc41a78e8..796012540386 100644
--- a/Documentation/networking/arcnet.txt
+++ b/Documentation/networking/arcnet.txt
@@ -46,7 +46,7 @@ These are the ARCnet drivers for Linux.
46 46
47 47
48This new release (2.91) has been put together by David Woodhouse 48This new release (2.91) has been put together by David Woodhouse
49<dwmw2@cam.ac.uk>, in an attempt to tidy up the driver after adding support 49<dwmw2@infradead.org>, in an attempt to tidy up the driver after adding support
50for yet another chipset. Now the generic support has been separated from the 50for yet another chipset. Now the generic support has been separated from the
51individual chipset drivers, and the source files aren't quite so packed with 51individual chipset drivers, and the source files aren't quite so packed with
52#ifdefs! I've changed this file a bit, but kept it in the first person from 52#ifdefs! I've changed this file a bit, but kept it in the first person from
diff --git a/Documentation/networking/bcm43xx.txt b/Documentation/networking/bcm43xx.txt
deleted file mode 100644
index d602c8d6ff3e..000000000000
--- a/Documentation/networking/bcm43xx.txt
+++ /dev/null
@@ -1,89 +0,0 @@
1
2 BCM43xx Linux Driver Project
3 ============================
4
5Introduction
6------------
7
8Many of the wireless devices found in modern notebook computers are
9based on the wireless chips produced by Broadcom. These devices have
10been a problem for Linux users as there is no open-source driver
11available. In addition, Broadcom has not released specifications
12for the device, and driver availability has been limited to the
13binary-only form used in the GPL versions of AP hardware such as the
14Linksys WRT54G, and the Windows and OS X drivers. Before this project
15began, the only way to use these devices were to use the Windows or
16OS X drivers with either the Linuxant or ndiswrapper modules. There
17is a strong penalty if this method is used as loading the binary-only
18module "taints" the kernel, and no kernel developer will help diagnose
19any kernel problems.
20
21Development
22-----------
23
24This driver has been developed using
25a clean-room technique that is described at
26http://bcm-specs.sipsolutions.net/ReverseEngineeringProcess. For legal
27reasons, none of the clean-room crew works on the on the Linux driver,
28and none of the Linux developers sees anything but the specifications,
29which are the ultimate product of the reverse-engineering group.
30
31Software
32--------
33
34Since the release of the 2.6.17 kernel, the bcm43xx driver has been
35distributed with the kernel source, and is prebuilt in most, if not
36all, distributions. There is, however, additional software that is
37required. The firmware used by the chip is the intellectual property
38of Broadcom and they have not given the bcm43xx team redistribution
39rights to this firmware. Since we cannot legally redistribute
40the firmware we cannot include it with the driver. Furthermore, it
41cannot be placed in the downloadable archives of any distributing
42organization; therefore, the user is responsible for obtaining the
43firmware and placing it in the appropriate location so that the driver
44can find it when initializing.
45
46To help with this process, the bcm43xx developers provide a separate
47program named bcm43xx-fwcutter to "cut" the firmware out of a
48Windows or OS X driver and write the extracted files to the proper
49location. This program is usually provided with the distribution;
50however, it may be downloaded from
51
52http://developer.berlios.de/project/showfiles.php?group_id=4547
53
54The firmware is available in two versions. V3 firmware is used with
55the in-kernel bcm43xx driver that uses a software MAC layer called
56SoftMAC, and will have a microcode revision of 0x127 or smaller. The
57V4 firmware is used by an out-of-kernel driver employing a variation of
58the Devicescape MAC layer known as d80211. Once bcm43xx-d80211 reaches
59a satisfactory level of development, it will replace bcm43xx-softmac
60in the kernel as it is much more flexible and powerful.
61
62A source for the latest V3 firmware is
63
64http://downloads.openwrt.org/sources/wl_apsta-3.130.20.0.o
65
66Once this file is downloaded, the command
67'bcm43xx-fwcutter -w <dir> <filename>'
68will extract the microcode and write it to directory
69<dir>. The correct directory will depend on your distribution;
70however, most use '/lib/firmware'. Once this step is completed,
71the bcm3xx driver should load when the system is booted. To see
72any messages relating to the driver, issue the command 'dmesg |
73grep bcm43xx' from a terminal window. If there are any problems,
74please send that output to Bcm43xx-dev@lists.berlios.de.
75
76Although the driver has been in-kernel since 2.6.17, the earliest
77version is quite limited in its capability. Patches that include
78all features of later versions are available for the stable kernel
79versions from 2.6.18. These will be needed if you use a BCM4318,
80or a PCI Express version (BCM4311 and BCM4312). In addition, if you
81have an early BCM4306 and more than 1 GB RAM, your kernel will need
82to be patched. These patches, which are being updated regularly,
83are available at ftp://lwfinger.dynalias.org/patches. Look for
84combined_2.6.YY.patch. Of course you will need kernel source downloaded
85from kernel.org, or the source from your distribution.
86
87If you build your own kernel, please enable CONFIG_BCM43XX_DEBUG
88and CONFIG_IEEE80211_SOFTMAC_DEBUG. The log information provided is
89essential for solving any problems.
diff --git a/Documentation/networking/bridge.txt b/Documentation/networking/bridge.txt
index bdae2db4119c..bec69a8a1697 100644
--- a/Documentation/networking/bridge.txt
+++ b/Documentation/networking/bridge.txt
@@ -1,6 +1,6 @@
1In order to use the Ethernet bridging functionality, you'll need the 1In order to use the Ethernet bridging functionality, you'll need the
2userspace tools. These programs and documentation are available 2userspace tools. These programs and documentation are available
3at http://bridge.sourceforge.net. The download page is 3at http://www.linux-foundation.org/en/Net:Bridge. The download page is
4http://prdownloads.sourceforge.net/bridge. 4http://prdownloads.sourceforge.net/bridge.
5 5
6If you still have questions, don't hesitate to post to the mailing list 6If you still have questions, don't hesitate to post to the mailing list
diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
index f1b2de170929..641d2afacffa 100644
--- a/Documentation/networking/can.txt
+++ b/Documentation/networking/can.txt
@@ -281,10 +281,10 @@ solution for a couple of reasons:
281 sa_family_t can_family; 281 sa_family_t can_family;
282 int can_ifindex; 282 int can_ifindex;
283 union { 283 union {
284 struct { canid_t rx_id, tx_id; } tp16; 284 /* transport protocol class address info (e.g. ISOTP) */
285 struct { canid_t rx_id, tx_id; } tp20; 285 struct { canid_t rx_id, tx_id; } tp;
286 struct { canid_t rx_id, tx_id; } mcnet; 286
287 struct { canid_t rx_id, tx_id; } isotp; 287 /* reserved for future CAN protocols address information */
288 } can_addr; 288 } can_addr;
289 }; 289 };
290 290
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 17a6e46fbd43..17f1f91af35c 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -81,23 +81,23 @@ inet_peer_minttl - INTEGER
81 Minimum time-to-live of entries. Should be enough to cover fragment 81 Minimum time-to-live of entries. Should be enough to cover fragment
82 time-to-live on the reassembling side. This minimum time-to-live is 82 time-to-live on the reassembling side. This minimum time-to-live is
83 guaranteed if the pool size is less than inet_peer_threshold. 83 guaranteed if the pool size is less than inet_peer_threshold.
84 Measured in jiffies(1). 84 Measured in seconds.
85 85
86inet_peer_maxttl - INTEGER 86inet_peer_maxttl - INTEGER
87 Maximum time-to-live of entries. Unused entries will expire after 87 Maximum time-to-live of entries. Unused entries will expire after
88 this period of time if there is no memory pressure on the pool (i.e. 88 this period of time if there is no memory pressure on the pool (i.e.
89 when the number of entries in the pool is very small). 89 when the number of entries in the pool is very small).
90 Measured in jiffies(1). 90 Measured in seconds.
91 91
92inet_peer_gc_mintime - INTEGER 92inet_peer_gc_mintime - INTEGER
93 Minimum interval between garbage collection passes. This interval is 93 Minimum interval between garbage collection passes. This interval is
94 in effect under high memory pressure on the pool. 94 in effect under high memory pressure on the pool.
95 Measured in jiffies(1). 95 Measured in seconds.
96 96
97inet_peer_gc_maxtime - INTEGER 97inet_peer_gc_maxtime - INTEGER
98 Minimum interval between garbage collection passes. This interval is 98 Minimum interval between garbage collection passes. This interval is
99 in effect under low (or absent) memory pressure on the pool. 99 in effect under low (or absent) memory pressure on the pool.
100 Measured in jiffies(1). 100 Measured in seconds.
101 101
102TCP variables: 102TCP variables:
103 103
@@ -794,10 +794,6 @@ tag - INTEGER
794 Allows you to write a number, which can be used as required. 794 Allows you to write a number, which can be used as required.
795 Default value is 0. 795 Default value is 0.
796 796
797(1) Jiffie: internal timeunit for the kernel. On the i386 1/100s, on the
798Alpha 1/1024s. See the HZ define in /usr/include/asm/param.h for the exact
799value on your system.
800
801Alexey Kuznetsov. 797Alexey Kuznetsov.
802kuznet@ms2.inr.ac.ru 798kuznet@ms2.inr.ac.ru
803 799
diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt
index 0bc95eab1512..8df6a7b0e66c 100644
--- a/Documentation/networking/phy.txt
+++ b/Documentation/networking/phy.txt
@@ -1,7 +1,7 @@
1 1
2------- 2-------
3PHY Abstraction Layer 3PHY Abstraction Layer
4(Updated 2006-11-30) 4(Updated 2008-04-08)
5 5
6Purpose 6Purpose
7 7
@@ -291,3 +291,39 @@ Writing a PHY driver
291 Feel free to look at the Marvell, Cicada, and Davicom drivers in 291 Feel free to look at the Marvell, Cicada, and Davicom drivers in
292 drivers/net/phy/ for examples (the lxt and qsemi drivers have 292 drivers/net/phy/ for examples (the lxt and qsemi drivers have
293 not been tested as of this writing) 293 not been tested as of this writing)
294
295Board Fixups
296
297 Sometimes the specific interaction between the platform and the PHY requires
298 special handling. For instance, to change where the PHY's clock input is,
299 or to add a delay to account for latency issues in the data path. In order
300 to support such contingencies, the PHY Layer allows platform code to register
301 fixups to be run when the PHY is brought up (or subsequently reset).
302
303 When the PHY Layer brings up a PHY it checks to see if there are any fixups
304 registered for it, matching based on UID (contained in the PHY device's phy_id
305 field) and the bus identifier (contained in phydev->dev.bus_id). Both must
306 match, however two constants, PHY_ANY_ID and PHY_ANY_UID, are provided as
307 wildcards for the bus ID and UID, respectively.
308
309 When a match is found, the PHY layer will invoke the run function associated
310 with the fixup. This function is passed a pointer to the phy_device of
311 interest. It should therefore only operate on that PHY.
312
313 The platform code can either register the fixup using phy_register_fixup():
314
315 int phy_register_fixup(const char *phy_id,
316 u32 phy_uid, u32 phy_uid_mask,
317 int (*run)(struct phy_device *));
318
319 Or using one of the two stubs, phy_register_fixup_for_uid() and
320 phy_register_fixup_for_id():
321
322 int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
323 int (*run)(struct phy_device *));
324 int phy_register_fixup_for_id(const char *phy_id,
325 int (*run)(struct phy_device *));
326
327 The stubs set one of the two matching criteria, and set the other one to
328 match anything.
329
diff --git a/Documentation/networking/s2io.txt b/Documentation/networking/s2io.txt
index 4bde53e85f3f..1e28e2ddb90a 100644
--- a/Documentation/networking/s2io.txt
+++ b/Documentation/networking/s2io.txt
@@ -83,9 +83,9 @@ Valid range: Limited by memory on system
83Default: 30 83Default: 30
84 84
85e. intr_type 85e. intr_type
86Specifies interrupt type. Possible values 1(INTA), 2(MSI), 3(MSI-X) 86Specifies interrupt type. Possible values 0(INTA), 2(MSI-X)
87Valid range: 1-3 87Valid values: 0, 2
88Default: 1 88Default: 2
89 89
905. Performance suggestions 905. Performance suggestions
91General: 91General:
diff --git a/Documentation/networking/sk98lin.txt b/Documentation/networking/sk98lin.txt
deleted file mode 100644
index 8590a954df1d..000000000000
--- a/Documentation/networking/sk98lin.txt
+++ /dev/null
@@ -1,568 +0,0 @@
1(C)Copyright 1999-2004 Marvell(R).
2All rights reserved
3===========================================================================
4
5sk98lin.txt created 13-Feb-2004
6
7Readme File for sk98lin v6.23
8Marvell Yukon/SysKonnect SK-98xx Gigabit Ethernet Adapter family driver for LINUX
9
10This file contains
11 1 Overview
12 2 Required Files
13 3 Installation
14 3.1 Driver Installation
15 3.2 Inclusion of adapter at system start
16 4 Driver Parameters
17 4.1 Per-Port Parameters
18 4.2 Adapter Parameters
19 5 Large Frame Support
20 6 VLAN and Link Aggregation Support (IEEE 802.1, 802.1q, 802.3ad)
21 7 Troubleshooting
22
23===========================================================================
24
25
261 Overview
27===========
28
29The sk98lin driver supports the Marvell Yukon and SysKonnect
30SK-98xx/SK-95xx compliant Gigabit Ethernet Adapter on Linux. It has
31been tested with Linux on Intel/x86 machines.
32***
33
34
352 Required Files
36=================
37
38The linux kernel source.
39No additional files required.
40***
41
42
433 Installation
44===============
45
46It is recommended to download the latest version of the driver from the
47SysKonnect web site www.syskonnect.com. If you have downloaded the latest
48driver, the Linux kernel has to be patched before the driver can be
49installed. For details on how to patch a Linux kernel, refer to the
50patch.txt file.
51
523.1 Driver Installation
53------------------------
54
55The following steps describe the actions that are required to install
56the driver and to start it manually. These steps should be carried
57out for the initial driver setup. Once confirmed to be ok, they can
58be included in the system start.
59
60NOTE 1: To perform the following tasks you need 'root' access.
61
62NOTE 2: In case of problems, please read the section "Troubleshooting"
63 below.
64
65The driver can either be integrated into the kernel or it can be compiled
66as a module. Select the appropriate option during the kernel
67configuration.
68
69Compile/use the driver as a module
70----------------------------------
71To compile the driver, go to the directory /usr/src/linux and
72execute the command "make menuconfig" or "make xconfig" and proceed as
73follows:
74
75To integrate the driver permanently into the kernel, proceed as follows:
76
771. Select the menu "Network device support" and then "Ethernet(1000Mbit)"
782. Mark "Marvell Yukon Chipset / SysKonnect SK-98xx family support"
79 with (*)
803. Build a new kernel when the configuration of the above options is
81 finished.
824. Install the new kernel.
835. Reboot your system.
84
85To use the driver as a module, proceed as follows:
86
871. Enable 'loadable module support' in the kernel.
882. For automatic driver start, enable the 'Kernel module loader'.
893. Select the menu "Network device support" and then "Ethernet(1000Mbit)"
904. Mark "Marvell Yukon Chipset / SysKonnect SK-98xx family support"
91 with (M)
925. Execute the command "make modules".
936. Execute the command "make modules_install".
94 The appropriate modules will be installed.
957. Reboot your system.
96
97
98Load the module manually
99------------------------
100To load the module manually, proceed as follows:
101
1021. Enter "modprobe sk98lin".
1032. If a Marvell Yukon or SysKonnect SK-98xx adapter is installed in
104 your computer and you have a /proc file system, execute the command:
105 "ls /proc/net/sk98lin/"
106 This should produce an output containing a line with the following
107 format:
108 eth0 eth1 ...
109 which indicates that your adapter has been found and initialized.
110
111 NOTE 1: If you have more than one Marvell Yukon or SysKonnect SK-98xx
112 adapter installed, the adapters will be listed as 'eth0',
113 'eth1', 'eth2', etc.
114 For each adapter, repeat steps 3 and 4 below.
115
116 NOTE 2: If you have other Ethernet adapters installed, your Marvell
117 Yukon or SysKonnect SK-98xx adapter will be mapped to the
118 next available number, e.g. 'eth1'. The mapping is executed
119 automatically.
120 The module installation message (displayed either in a system
121 log file or on the console) prints a line for each adapter
122 found containing the corresponding 'ethX'.
123
1243. Select an IP address and assign it to the respective adapter by
125 entering:
126 ifconfig eth0 <ip-address>
127 With this command, the adapter is connected to the Ethernet.
128
129 SK-98xx Gigabit Ethernet Server Adapters: The yellow LED on the adapter
130 is now active, the link status LED of the primary port is active and
131 the link status LED of the secondary port (on dual port adapters) is
132 blinking (if the ports are connected to a switch or hub).
133 SK-98xx V2.0 Gigabit Ethernet Adapters: The link status LED is active.
134 In addition, you will receive a status message on the console stating
135 "ethX: network connection up using port Y" and showing the selected
136 connection parameters (x stands for the ethernet device number
137 (0,1,2, etc), y stands for the port name (A or B)).
138
139 NOTE: If you are in doubt about IP addresses, ask your network
140 administrator for assistance.
141
1424. Your adapter should now be fully operational.
143 Use 'ping <otherstation>' to verify the connection to other computers
144 on your network.
1455. To check the adapter configuration view /proc/net/sk98lin/[devicename].
146 For example by executing:
147 "cat /proc/net/sk98lin/eth0"
148
149Unload the module
150-----------------
151To stop and unload the driver modules, proceed as follows:
152
1531. Execute the command "ifconfig eth0 down".
1542. Execute the command "rmmod sk98lin".
155
1563.2 Inclusion of adapter at system start
157-----------------------------------------
158
159Since a large number of different Linux distributions are
160available, we are unable to describe a general installation procedure
161for the driver module.
162Because the driver is now integrated in the kernel, installation should
163be easy, using the standard mechanism of your distribution.
164Refer to the distribution's manual for installation of ethernet adapters.
165
166***
167
1684 Driver Parameters
169====================
170
171Parameters can be set at the command line after the module has been
172loaded with the command 'modprobe'.
173In some distributions, the configuration tools are able to pass parameters
174to the driver module.
175
176If you use the kernel module loader, you can set driver parameters
177in the file /etc/modprobe.conf (or /etc/modules.conf in 2.4 or earlier).
178To set the driver parameters in this file, proceed as follows:
179
1801. Insert a line of the form :
181 options sk98lin ...
182 For "...", the same syntax is required as described for the command
183 line parameters of modprobe below.
1842. To activate the new parameters, either reboot your computer
185 or
186 unload and reload the driver.
187 The syntax of the driver parameters is:
188
189 modprobe sk98lin parameter=value1[,value2[,value3...]]
190
191 where value1 refers to the first adapter, value2 to the second etc.
192
193NOTE: All parameters are case sensitive. Write them exactly as shown
194 below.
195
196Example:
197Suppose you have two adapters. You want to set auto-negotiation
198on the first adapter to ON and on the second adapter to OFF.
199You also want to set DuplexCapabilities on the first adapter
200to FULL, and on the second adapter to HALF.
201Then, you must enter:
202
203 modprobe sk98lin AutoNeg_A=On,Off DupCap_A=Full,Half
204
205NOTE: The number of adapters that can be configured this way is
206 limited in the driver (file skge.c, constant SK_MAX_CARD_PARAM).
207 The current limit is 16. If you happen to install
208 more adapters, adjust this and recompile.
209
210
2114.1 Per-Port Parameters
212------------------------
213
214These settings are available for each port on the adapter.
215In the following description, '?' stands for the port for
216which you set the parameter (A or B).
217
218Speed
219-----
220Parameter: Speed_?
221Values: 10, 100, 1000, Auto
222Default: Auto
223
224This parameter is used to set the speed capabilities. It is only valid
225for the SK-98xx V2.0 copper adapters.
226Usually, the speed is negotiated between the two ports during link
227establishment. If this fails, a port can be forced to a specific setting
228with this parameter.
229
230Auto-Negotiation
231----------------
232Parameter: AutoNeg_?
233Values: On, Off, Sense
234Default: On
235
236The "Sense"-mode automatically detects whether the link partner supports
237auto-negotiation or not.
238
239Duplex Capabilities
240-------------------
241Parameter: DupCap_?
242Values: Half, Full, Both
243Default: Both
244
245This parameters is only relevant if auto-negotiation for this port is
246not set to "Sense". If auto-negotiation is set to "On", all three values
247are possible. If it is set to "Off", only "Full" and "Half" are allowed.
248This parameter is useful if your link partner does not support all
249possible combinations.
250
251Flow Control
252------------
253Parameter: FlowCtrl_?
254Values: Sym, SymOrRem, LocSend, None
255Default: SymOrRem
256
257This parameter can be used to set the flow control capabilities the
258port reports during auto-negotiation. It can be set for each port
259individually.
260Possible modes:
261 -- Sym = Symmetric: both link partners are allowed to send
262 PAUSE frames
263 -- SymOrRem = SymmetricOrRemote: both or only remote partner
264 are allowed to send PAUSE frames
265 -- LocSend = LocalSend: only local link partner is allowed
266 to send PAUSE frames
267 -- None = no link partner is allowed to send PAUSE frames
268
269NOTE: This parameter is ignored if auto-negotiation is set to "Off".
270
271Role in Master-Slave-Negotiation (1000Base-T only)
272--------------------------------------------------
273Parameter: Role_?
274Values: Auto, Master, Slave
275Default: Auto
276
277This parameter is only valid for the SK-9821 and SK-9822 adapters.
278For two 1000Base-T ports to communicate, one must take the role of the
279master (providing timing information), while the other must be the
280slave. Usually, this is negotiated between the two ports during link
281establishment. If this fails, a port can be forced to a specific setting
282with this parameter.
283
284
2854.2 Adapter Parameters
286-----------------------
287
288Connection Type (SK-98xx V2.0 copper adapters only)
289---------------
290Parameter: ConType
291Values: Auto, 100FD, 100HD, 10FD, 10HD
292Default: Auto
293
294The parameter 'ConType' is a combination of all five per-port parameters
295within one single parameter. This simplifies the configuration of both ports
296of an adapter card! The different values of this variable reflect the most
297meaningful combinations of port parameters.
298
299The following table shows the values of 'ConType' and the corresponding
300combinations of the per-port parameters:
301
302 ConType | DupCap AutoNeg FlowCtrl Role Speed
303 ----------+------------------------------------------------------
304 Auto | Both On SymOrRem Auto Auto
305 100FD | Full Off None Auto (ignored) 100
306 100HD | Half Off None Auto (ignored) 100
307 10FD | Full Off None Auto (ignored) 10
308 10HD | Half Off None Auto (ignored) 10
309
310Stating any other port parameter together with this 'ConType' variable
311will result in a merged configuration of those settings. This due to
312the fact, that the per-port parameters (e.g. Speed_? ) have a higher
313priority than the combined variable 'ConType'.
314
315NOTE: This parameter is always used on both ports of the adapter card.
316
317Interrupt Moderation
318--------------------
319Parameter: Moderation
320Values: None, Static, Dynamic
321Default: None
322
323Interrupt moderation is employed to limit the maximum number of interrupts
324the driver has to serve. That is, one or more interrupts (which indicate any
325transmit or receive packet to be processed) are queued until the driver
326processes them. When queued interrupts are to be served, is determined by the
327'IntsPerSec' parameter, which is explained later below.
328
329Possible modes:
330
331 -- None - No interrupt moderation is applied on the adapter card.
332 Therefore, each transmit or receive interrupt is served immediately
333 as soon as it appears on the interrupt line of the adapter card.
334
335 -- Static - Interrupt moderation is applied on the adapter card.
336 All transmit and receive interrupts are queued until a complete
337 moderation interval ends. If such a moderation interval ends, all
338 queued interrupts are processed in one big bunch without any delay.
339 The term 'static' reflects the fact, that interrupt moderation is
340 always enabled, regardless how much network load is currently
341 passing via a particular interface. In addition, the duration of
342 the moderation interval has a fixed length that never changes while
343 the driver is operational.
344
345 -- Dynamic - Interrupt moderation might be applied on the adapter card,
346 depending on the load of the system. If the driver detects that the
347 system load is too high, the driver tries to shield the system against
348 too much network load by enabling interrupt moderation. If - at a later
349 time - the CPU utilization decreases again (or if the network load is
350 negligible) the interrupt moderation will automatically be disabled.
351
352Interrupt moderation should be used when the driver has to handle one or more
353interfaces with a high network load, which - as a consequence - leads also to a
354high CPU utilization. When moderation is applied in such high network load
355situations, CPU load might be reduced by 20-30%.
356
357NOTE: The drawback of using interrupt moderation is an increase of the round-
358trip-time (RTT), due to the queueing and serving of interrupts at dedicated
359moderation times.
360
361Interrupts per second
362---------------------
363Parameter: IntsPerSec
364Values: 30...40000 (interrupts per second)
365Default: 2000
366
367This parameter is only used if either static or dynamic interrupt moderation
368is used on a network adapter card. Using this parameter if no moderation is
369applied will lead to no action performed.
370
371This parameter determines the length of any interrupt moderation interval.
372Assuming that static interrupt moderation is to be used, an 'IntsPerSec'
373parameter value of 2000 will lead to an interrupt moderation interval of
374500 microseconds.
375
376NOTE: The duration of the moderation interval is to be chosen with care.
377At first glance, selecting a very long duration (e.g. only 100 interrupts per
378second) seems to be meaningful, but the increase of packet-processing delay
379is tremendous. On the other hand, selecting a very short moderation time might
380compensate the use of any moderation being applied.
381
382
383Preferred Port
384--------------
385Parameter: PrefPort
386Values: A, B
387Default: A
388
389This is used to force the preferred port to A or B (on dual-port network
390adapters). The preferred port is the one that is used if both are detected
391as fully functional.
392
393RLMT Mode (Redundant Link Management Technology)
394------------------------------------------------
395Parameter: RlmtMode
396Values: CheckLinkState,CheckLocalPort, CheckSeg, DualNet
397Default: CheckLinkState
398
399RLMT monitors the status of the port. If the link of the active port
400fails, RLMT switches immediately to the standby link. The virtual link is
401maintained as long as at least one 'physical' link is up.
402
403Possible modes:
404
405 -- CheckLinkState - Check link state only: RLMT uses the link state
406 reported by the adapter hardware for each individual port to
407 determine whether a port can be used for all network traffic or
408 not.
409
410 -- CheckLocalPort - In this mode, RLMT monitors the network path
411 between the two ports of an adapter by regularly exchanging packets
412 between them. This mode requires a network configuration in which
413 the two ports are able to "see" each other (i.e. there must not be
414 any router between the ports).
415
416 -- CheckSeg - Check local port and segmentation: This mode supports the
417 same functions as the CheckLocalPort mode and additionally checks
418 network segmentation between the ports. Therefore, this mode is only
419 to be used if Gigabit Ethernet switches are installed on the network
420 that have been configured to use the Spanning Tree protocol.
421
422 -- DualNet - In this mode, ports A and B are used as separate devices.
423 If you have a dual port adapter, port A will be configured as eth0
424 and port B as eth1. Both ports can be used independently with
425 distinct IP addresses. The preferred port setting is not used.
426 RLMT is turned off.
427
428NOTE: RLMT modes CLP and CLPSS are designed to operate in configurations
429 where a network path between the ports on one adapter exists.
430 Moreover, they are not designed to work where adapters are connected
431 back-to-back.
432***
433
434
4355 Large Frame Support
436======================
437
438The driver supports large frames (also called jumbo frames). Using large
439frames can result in an improved throughput if transferring large amounts
440of data.
441To enable large frames, set the MTU (maximum transfer unit) of the
442interface to the desired value (up to 9000), execute the following
443command:
444 ifconfig eth0 mtu 9000
445This will only work if you have two adapters connected back-to-back
446or if you use a switch that supports large frames. When using a switch,
447it should be configured to allow large frames and auto-negotiation should
448be set to OFF. The setting must be configured on all adapters that can be
449reached by the large frames. If one adapter is not set to receive large
450frames, it will simply drop them.
451
452You can switch back to the standard ethernet frame size by executing the
453following command:
454 ifconfig eth0 mtu 1500
455
456To permanently configure this setting, add a script with the 'ifconfig'
457line to the system startup sequence (named something like "S99sk98lin"
458in /etc/rc.d/rc2.d).
459***
460
461
4626 VLAN and Link Aggregation Support (IEEE 802.1, 802.1q, 802.3ad)
463==================================================================
464
465The Marvell Yukon/SysKonnect Linux drivers are able to support VLAN and
466Link Aggregation according to IEEE standards 802.1, 802.1q, and 802.3ad.
467These features are only available after installation of open source
468modules available on the Internet:
469For VLAN go to: http://www.candelatech.com/~greear/vlan.html
470For Link Aggregation go to: http://www.st.rim.or.jp/~yumo
471
472NOTE: SysKonnect GmbH does not offer any support for these open source
473 modules and does not take the responsibility for any kind of
474 failures or problems arising in connection with these modules.
475
476NOTE: Configuring Link Aggregation on a SysKonnect dual link adapter may
477 cause problems when unloading the driver.
478
479
4807 Troubleshooting
481==================
482
483If any problems occur during the installation process, check the
484following list:
485
486
487Problem: The SK-98xx adapter cannot be found by the driver.
488Solution: In /proc/pci search for the following entry:
489 'Ethernet controller: SysKonnect SK-98xx ...'
490 If this entry exists, the SK-98xx or SK-98xx V2.0 adapter has
491 been found by the system and should be operational.
492 If this entry does not exist or if the file '/proc/pci' is not
493 found, there may be a hardware problem or the PCI support may
494 not be enabled in your kernel.
495 The adapter can be checked using the diagnostics program which
496 is available on the SysKonnect web site:
497 www.syskonnect.com
498
499 Some COMPAQ machines have problems dealing with PCI under Linux.
500 This problem is described in the 'PCI howto' document
501 (included in some distributions or available from the
502 web, e.g. at 'www.linux.org').
503
504
505Problem: Programs such as 'ifconfig' or 'route' cannot be found or the
506 error message 'Operation not permitted' is displayed.
507Reason: You are not logged in as user 'root'.
508Solution: Logout and login as 'root' or change to 'root' via 'su'.
509
510
511Problem: Upon use of the command 'ping <address>' the message
512 "ping: sendto: Network is unreachable" is displayed.
513Reason: Your route is not set correctly.
514Solution: If you are using RedHat, you probably forgot to set up the
515 route in the 'network configuration'.
516 Check the existing routes with the 'route' command and check
517 if an entry for 'eth0' exists, and if so, if it is set correctly.
518
519
520Problem: The driver can be started, the adapter is connected to the
521 network, but you cannot receive or transmit any packets;
522 e.g. 'ping' does not work.
523Reason: There is an incorrect route in your routing table.
524Solution: Check the routing table with the command 'route' and read the
525 manual help pages dealing with routes (enter 'man route').
526
527NOTE: Although the 2.2.x kernel versions generate the routing entry
528 automatically, problems of this kind may occur here as well. We've
529 come across a situation in which the driver started correctly at
530 system start, but after the driver has been removed and reloaded,
531 the route of the adapter's network pointed to the 'dummy0'device
532 and had to be corrected manually.
533
534
535Problem: Your computer should act as a router between multiple
536 IP subnetworks (using multiple adapters), but computers in
537 other subnetworks cannot be reached.
538Reason: Either the router's kernel is not configured for IP forwarding
539 or the routing table and gateway configuration of at least one
540 computer is not working.
541
542Problem: Upon driver start, the following error message is displayed:
543 "eth0: -- ERROR --
544 Class: internal Software error
545 Nr: 0xcc
546 Msg: SkGeInitPort() cannot init running ports"
547Reason: You are using a driver compiled for single processor machines
548 on a multiprocessor machine with SMP (Symmetric MultiProcessor)
549 kernel.
550Solution: Configure your kernel appropriately and recompile the kernel or
551 the modules.
552
553
554
555If your problem is not listed here, please contact SysKonnect's technical
556support for help (linux@syskonnect.de).
557When contacting our technical support, please ensure that the following
558information is available:
559- System Manufacturer and HW Informations (CPU, Memory... )
560- PCI-Boards in your system
561- Distribution
562- Kernel version
563- Driver version
564***
565
566
567
568***End of Readme File***
diff --git a/Documentation/networking/wan-router.txt b/Documentation/networking/wan-router.txt
deleted file mode 100644
index bc2ab419a74a..000000000000
--- a/Documentation/networking/wan-router.txt
+++ /dev/null
@@ -1,621 +0,0 @@
1------------------------------------------------------------------------------
2Linux WAN Router Utilities Package
3------------------------------------------------------------------------------
4Version 2.2.1
5Mar 28, 2001
6Author: Nenad Corbic <ncorbic@sangoma.com>
7Copyright (c) 1995-2001 Sangoma Technologies Inc.
8------------------------------------------------------------------------------
9
10INTRODUCTION
11
12Wide Area Networks (WANs) are used to interconnect Local Area Networks (LANs)
13and/or stand-alone hosts over vast distances with data transfer rates
14significantly higher than those achievable with commonly used dial-up
15connections.
16
17Usually an external device called `WAN router' sitting on your local network
18or connected to your machine's serial port provides physical connection to
19WAN. Although router's job may be as simple as taking your local network
20traffic, converting it to WAN format and piping it through the WAN link, these
21devices are notoriously expensive, with prices as much as 2 - 5 times higher
22then the price of a typical PC box.
23
24Alternatively, considering robustness and multitasking capabilities of Linux,
25an internal router can be built (most routers use some sort of stripped down
26Unix-like operating system anyway). With a number of relatively inexpensive WAN
27interface cards available on the market, a perfectly usable router can be
28built for less than half a price of an external router. Yet a Linux box
29acting as a router can still be used for other purposes, such as fire-walling,
30running FTP, WWW or DNS server, etc.
31
32This kernel module introduces the notion of a WAN Link Driver (WLD) to Linux
33operating system and provides generic hardware-independent services for such
34drivers. Why can existing Linux network device interface not be used for
35this purpose? Well, it can. However, there are a few key differences between
36a typical network interface (e.g. Ethernet) and a WAN link.
37
38Many WAN protocols, such as X.25 and frame relay, allow for multiple logical
39connections (known as `virtual circuits' in X.25 terminology) over a single
40physical link. Each such virtual circuit may (and almost always does) lead
41to a different geographical location and, therefore, different network. As a
42result, it is the virtual circuit, not the physical link, that represents a
43route and, therefore, a network interface in Linux terms.
44
45To further complicate things, virtual circuits are usually volatile in nature
46(excluding so called `permanent' virtual circuits or PVCs). With almost no
47time required to set up and tear down a virtual circuit, it is highly desirable
48to implement on-demand connections in order to minimize network charges. So
49unlike a typical network driver, the WAN driver must be able to handle multiple
50network interfaces and cope as multiple virtual circuits come into existence
51and go away dynamically.
52
53Last, but not least, WAN configuration is much more complex than that of say
54Ethernet and may well amount to several dozens of parameters. Some of them
55are "link-wide" while others are virtual circuit-specific. The same holds
56true for WAN statistics which is by far more extensive and extremely useful
57when troubleshooting WAN connections. Extending the ifconfig utility to suit
58these needs may be possible, but does not seem quite reasonable. Therefore, a
59WAN configuration utility and corresponding application programmer's interface
60is needed for this purpose.
61
62Most of these problems are taken care of by this module. Its goal is to
63provide a user with more-or-less standard look and feel for all WAN devices and
64assist a WAN device driver writer by providing common services, such as:
65
66 o User-level interface via /proc file system
67 o Centralized configuration
68 o Device management (setup, shutdown, etc.)
69 o Network interface management (dynamic creation/destruction)
70 o Protocol encapsulation/decapsulation
71
72To ba able to use the Linux WAN Router you will also need a WAN Tools package
73available from
74
75 ftp.sangoma.com/pub/linux/current_wanpipe/wanpipe-X.Y.Z.tgz
76
77where vX.Y.Z represent the wanpipe version number.
78
79For technical questions and/or comments please e-mail to ncorbic@sangoma.com.
80For general inquiries please contact Sangoma Technologies Inc. by
81
82 Hotline: 1-800-388-2475 (USA and Canada, toll free)
83 Phone: (905) 474-1990 ext: 106
84 Fax: (905) 474-9223
85 E-mail: dm@sangoma.com (David Mandelstam)
86 WWW: http://www.sangoma.com
87
88
89INSTALLATION
90
91Please read the WanpipeForLinux.pdf manual on how to
92install the WANPIPE tools and drivers properly.
93
94
95After installing wanpipe package: /usr/local/wanrouter/doc.
96On the ftp.sangoma.com : /linux/current_wanpipe/doc
97
98
99COPYRIGHT AND LICENSING INFORMATION
100
101This program is free software; you can redistribute it and/or modify it under
102the terms of the GNU General Public License as published by the Free Software
103Foundation; either version 2, or (at your option) any later version.
104
105This program is distributed in the hope that it will be useful, but WITHOUT
106ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
107FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
108
109You should have received a copy of the GNU General Public License along with
110this program; if not, write to the Free Software Foundation, Inc., 675 Mass
111Ave, Cambridge, MA 02139, USA.
112
113
114
115ACKNOWLEDGEMENTS
116
117This product is based on the WANPIPE(tm) Multiprotocol WAN Router developed
118by Sangoma Technologies Inc. for Linux 2.0.x and 2.2.x. Success of the WANPIPE
119together with the next major release of Linux kernel in summer 1996 commanded
120adequate changes to the WANPIPE code to take full advantage of new Linux
121features.
122
123Instead of continuing developing proprietary interface tied to Sangoma WAN
124cards, we decided to separate all hardware-independent code into a separate
125module and defined two levels of interfaces - one for user-level applications
126and another for kernel-level WAN drivers. WANPIPE is now implemented as a
127WAN driver compliant with the WAN Link Driver interface. Also a general
128purpose WAN configuration utility and a set of shell scripts was developed to
129support WAN router at the user level.
130
131Many useful ideas concerning hardware-independent interface implementation
132were given by Mike McLagan <mike.mclagan@linux.org> and his implementation
133of the Frame Relay router and drivers for Sangoma cards (dlci/sdla).
134
135With the new implementation of the APIs being incorporated into the WANPIPE,
136a special thank goes to Alan Cox in providing insight into BSD sockets.
137
138Special thanks to all the WANPIPE users who performed field-testing, reported
139bugs and made valuable comments and suggestions that help us to improve this
140product.
141
142
143
144NEW IN THIS RELEASE
145
146 o Updated the WANCFG utility
147 Calls the pppconfig to configure the PPPD
148 for async connections.
149
150 o Added the PPPCONFIG utility
151 Used to configure the PPPD daemon for the
152 WANPIPE Async PPP and standard serial port.
153 The wancfg calls the pppconfig to configure
154 the pppd.
155
156 o Fixed the PCI autodetect feature.
157 The SLOT 0 was used as an autodetect option
158 however, some high end PC's slot numbers start
159 from 0.
160
161 o This release has been tested with the new backupd
162 daemon release.
163
164
165PRODUCT COMPONENTS AND RELATED FILES
166
167/etc: (or user defined)
168 wanpipe1.conf default router configuration file
169
170/lib/modules/X.Y.Z/misc:
171 wanrouter.o router kernel loadable module
172 af_wanpipe.o wanpipe api socket module
173
174/lib/modules/X.Y.Z/net:
175 sdladrv.o Sangoma SDLA support module
176 wanpipe.o Sangoma WANPIPE(tm) driver module
177
178/proc/net/wanrouter
179 Config reads current router configuration
180 Status reads current router status
181 {name} reads WAN driver statistics
182
183/usr/sbin:
184 wanrouter wanrouter start-up script
185 wanconfig wanrouter configuration utility
186 sdladump WANPIPE adapter memory dump utility
187 fpipemon Monitor for Frame Relay
188 cpipemon Monitor for Cisco HDLC
189 ppipemon Monitor for PPP
190 xpipemon Monitor for X25
191 wpkbdmon WANPIPE keyboard led monitor/debugger
192
193/usr/local/wanrouter:
194 README this file
195 COPYING GNU General Public License
196 Setup installation script
197 Filelist distribution definition file
198 wanrouter.rc meta-configuration file
199 (used by the Setup and wanrouter script)
200
201/usr/local/wanrouter/doc:
202 wanpipeForLinux.pdf WAN Router User's Manual
203
204/usr/local/wanrouter/patches:
205 wanrouter-v2213.gz patch for Linux kernels 2.2.11 up to 2.2.13.
206 wanrouter-v2214.gz patch for Linux kernel 2.2.14.
207 wanrouter-v2215.gz patch for Linux kernels 2.2.15 to 2.2.17.
208 wanrouter-v2218.gz patch for Linux kernels 2.2.18 and up.
209 wanrouter-v240.gz patch for Linux kernel 2.4.0.
210 wanrouter-v242.gz patch for Linux kernel 2.4.2 and up.
211 wanrouter-v2034.gz patch for Linux kernel 2.0.34
212 wanrouter-v2036.gz patch for Linux kernel 2.0.36 and up.
213
214/usr/local/wanrouter/patches/kdrivers:
215 Sources of the latest WANPIPE device drivers.
216 These are used to UPGRADE the linux kernel to the newest
217 version if the kernel source has already been patched with
218 WANPIPE drivers.
219
220/usr/local/wanrouter/samples:
221 interface sample interface configuration file
222 wanpipe1.cpri CHDLC primary port
223 wanpipe2.csec CHDLC secondary port
224 wanpipe1.fr Frame Relay protocol
225 wanpipe1.ppp PPP protocol )
226 wanpipe1.asy CHDLC ASYNC protocol
227 wanpipe1.x25 X25 protocol
228 wanpipe1.stty Sync TTY driver (Used by Kernel PPPD daemon)
229 wanpipe1.atty Async TTY driver (Used by Kernel PPPD daemon)
230 wanrouter.rc sample meta-configuration file
231
232/usr/local/wanrouter/util:
233 * wan-tools utilities source code
234
235/usr/local/wanrouter/api/x25:
236 * x25 api sample programs.
237/usr/local/wanrouter/api/chdlc:
238 * chdlc api sample programs.
239/usr/local/wanrouter/api/fr:
240 * fr api sample programs.
241/usr/local/wanrouter/config/wancfg:
242 wancfg WANPIPE GUI configuration program.
243 Creates wanpipe#.conf files.
244/usr/local/wanrouter/config/cfgft1:
245 cfgft1 GUI CSU/DSU configuration program.
246
247/usr/include/linux:
248 wanrouter.h router API definitions
249 wanpipe.h WANPIPE API definitions
250 sdladrv.h SDLA support module API definitions
251 sdlasfm.h SDLA firmware module definitions
252 if_wanpipe.h WANPIPE Socket definitions
253 sdlapci.h WANPIPE PCI definitions
254
255
256/usr/src/linux/net/wanrouter:
257 * wanrouter source code
258
259/var/log:
260 wanrouter wanrouter start-up log (created by the Setup script)
261
262/var/lock: (or /var/lock/subsys for RedHat)
263 wanrouter wanrouter lock file (created by the Setup script)
264
265/usr/local/wanrouter/firmware:
266 fr514.sfm Frame relay firmware for Sangoma S508/S514 card
267 cdual514.sfm Dual Port Cisco HDLC firmware for Sangoma S508/S514 card
268 ppp514.sfm PPP Firmware for Sangoma S508 and S514 cards
269 x25_508.sfm X25 Firmware for Sangoma S508 card.
270
271
272REVISION HISTORY
273
2741.0.0 December 31, 1996 Initial version
275
2761.0.1 January 30, 1997 Status and statistics can be read via /proc
277 filesystem entries.
278
2791.0.2 April 30, 1997 Added UDP management via monitors.
280
2811.0.3 June 3, 1997 UDP management for multiple boards using Frame
282 Relay and PPP
283 Enabled continuous transmission of Configure
284 Request Packet for PPP (for 508 only)
285 Connection Timeout for PPP changed from 900 to 0
286 Flow Control Problem fixed for Frame Relay
287
2881.0.4 July 10, 1997 S508/FT1 monitoring capability in fpipemon and
289 ppipemon utilities.
290 Configurable TTL for UDP packets.
291 Multicast and Broadcast IP source addresses are
292 silently discarded.
293
2941.0.5 July 28, 1997 Configurable T391,T392,N391,N392,N393 for Frame
295 Relay in router.conf.
296 Configurable Memory Address through router.conf
297 for Frame Relay, PPP and X.25. (commenting this
298 out enables auto-detection).
299 Fixed freeing up received buffers using kfree()
300 for Frame Relay and X.25.
301 Protect sdla_peek() by calling save_flags(),
302 cli() and restore_flags().
303 Changed number of Trace elements from 32 to 20
304 Added DLCI specific data monitoring in FPIPEMON.
3052.0.0 Nov 07, 1997 Implemented protection of RACE conditions by
306 critical flags for FRAME RELAY and PPP.
307 DLCI List interrupt mode implemented.
308 IPX support in FRAME RELAY and PPP.
309 IPX Server Support (MARS)
310 More driver specific stats included in FPIPEMON
311 and PIPEMON.
312
3132.0.1 Nov 28, 1997 Bug Fixes for version 2.0.0.
314 Protection of "enable_irq()" while
315 "disable_irq()" has been enabled from any other
316 routine (for Frame Relay, PPP and X25).
317 Added additional Stats for Fpipemon and Ppipemon
318 Improved Load Sharing for multiple boards
319
3202.0.2 Dec 09, 1997 Support for PAP and CHAP for ppp has been
321 implemented.
322
3232.0.3 Aug 15, 1998 New release supporting Cisco HDLC, CIR for Frame
324 relay, Dynamic IP assignment for PPP and Inverse
325 Arp support for Frame-relay. Man Pages are
326 included for better support and a new utility
327 for configuring FT1 cards.
328
3292.0.4 Dec 09, 1998 Dual Port support for Cisco HDLC.
330 Support for HDLC (LAPB) API.
331 Supports BiSync Streaming code for S502E
332 and S503 cards.
333 Support for Streaming HDLC API.
334 Provides a BSD socket interface for
335 creating applications using BiSync
336 streaming.
337
3382.0.5 Aug 04, 1999 CHDLC initialization bug fix.
339 PPP interrupt driven driver:
340 Fix to the PPP line hangup problem.
341 New PPP firmware
342 Added comments to the startup SYSTEM ERROR messages
343 Xpipemon debugging application for the X25 protocol
344 New USER_MANUAL.txt
345 Fixed the odd boundary 4byte writes to the board.
346 BiSync Streaming code has been taken out.
347 Available as a patch.
348 Streaming HDLC API has been taken out.
349 Available as a patch.
350
3512.0.6 Aug 17, 1999 Increased debugging in statup scripts
352 Fixed installation bugs from 2.0.5
353 Kernel patch works for both 2.2.10 and 2.2.11 kernels.
354 There is no functional difference between the two packages
355
3562.0.7 Aug 26, 1999 o Merged X25API code into WANPIPE.
357 o Fixed a memory leak for X25API
358 o Updated the X25API code for 2.2.X kernels.
359 o Improved NEM handling.
360
3612.1.0 Oct 25, 1999 o New code for S514 PCI Card
362 o New CHDLC and Frame Relay drivers
363 o PPP and X25 are not supported in this release
364
3652.1.1 Nov 30, 1999 o PPP support for S514 PCI Cards
366
3672.1.3 Apr 06, 2000 o Socket based x25api
368 o Socket based chdlc api
369 o Socket based fr api
370 o Dual Port Receive only CHDLC support.
371 o Asynchronous CHDLC support (Secondary Port)
372 o cfgft1 GUI csu/dsu configurator
373 o wancfg GUI configuration file
374 configurator.
375 o Architectural directory changes.
376
377beta-2.1.4 Jul 2000 o Dynamic interface configuration:
378 Network interfaces reflect the state
379 of protocol layer. If the protocol becomes
380 disconnected, driver will bring down
381 the interface. Once the protocol reconnects
382 the interface will be brought up.
383
384 Note: This option is turned off by default.
385
386 o Dynamic wanrouter setup using 'wanconfig':
387 wanconfig utility can be used to
388 shutdown,restart,start or reconfigure
389 a virtual circuit dynamically.
390
391 Frame Relay: Each DLCI can be:
392 created,stopped,restarted and reconfigured
393 dynamically using wanconfig.
394
395 ex: wanconfig card wanpipe1 dev wp1_fr16 up
396
397 o Wanrouter startup via command line arguments:
398 wanconfig also supports wanrouter startup via command line
399 arguments. Thus, there is no need to create a wanpipe#.conf
400 configuration file.
401
402 o Socket based x25api update/bug fixes.
403 Added support for LCN numbers greater than 255.
404 Option to pass up modem messages.
405 Provided a PCI IRQ check, so a single S514
406 card is guaranteed to have a non-sharing interrupt.
407
408 o Fixes to the wancfg utility.
409 o New FT1 debugging support via *pipemon utilities.
410 o Frame Relay ARP support Enabled.
411
412beta3-2.1.4 Jul 2000 o X25 M_BIT Problem fix.
413 o Added the Multi-Port PPP
414 Updated utilities for the Multi-Port PPP.
415
4162.1.4 Aut 2000
417 o In X25API:
418 Maximum packet an application can send
419 to the driver has been extended to 4096 bytes.
420
421 Fixed the x25 startup bug. Enable
422 communications only after all interfaces
423 come up. HIGH SVC/PVC is used to calculate
424 the number of channels.
425 Enable protocol only after all interfaces
426 are enabled.
427
428 o Added an extra state to the FT1 config, kernel module.
429 o Updated the pipemon debuggers.
430
431 o Blocked the Multi-Port PPP from running on kernels
432 2.2.16 or greater, due to syncppp kernel module
433 change.
434
435beta1-2.1.5 Nov 15 2000
436 o Fixed the MultiPort PPP Support for kernels 2.2.16 and above.
437 2.2.X kernels only
438
439 o Secured the driver UDP debugging calls
440 - All illegal network debugging calls are reported to
441 the log.
442 - Defined a set of allowed commands, all other denied.
443
444 o Cpipemon
445 - Added set FT1 commands to the cpipemon. Thus CSU/DSU
446 configuration can be performed using cpipemon.
447 All systems that cannot run cfgft1 GUI utility should
448 use cpipemon to configure the on board CSU/DSU.
449
450
451 o Keyboard Led Monitor/Debugger
452 - A new utility /usr/sbin/wpkbdmon uses keyboard leds
453 to convey operational statistic information of the
454 Sangoma WANPIPE cards.
455 NUM_LOCK = Line State (On=connected, Off=disconnected)
456 CAPS_LOCK = Tx data (On=transmitting, Off=no tx data)
457 SCROLL_LOCK = Rx data (On=receiving, Off=no rx data
458
459 o Hardware probe on module load and dynamic device allocation
460 - During WANPIPE module load, all Sangoma cards are probed
461 and found information is printed in the /var/log/messages.
462 - If no cards are found, the module load fails.
463 - Appropriate number of devices are dynamically loaded
464 based on the number of Sangoma cards found.
465
466 Note: The kernel configuration option
467 CONFIG_WANPIPE_CARDS has been taken out.
468
469 o Fixed the Frame Relay and Chdlc network interfaces so they are
470 compatible with libpcap libraries. Meaning, tcpdump, snort,
471 ethereal, and all other packet sniffers and debuggers work on
472 all WANPIPE network interfaces.
473 - Set the network interface encoding type to ARPHRD_PPP.
474 This tell the sniffers that data obtained from the
475 network interface is in pure IP format.
476 Fix for 2.2.X kernels only.
477
478 o True interface encoding option for Frame Relay and CHDLC
479 - The above fix sets the network interface encoding
480 type to ARPHRD_PPP, however some customers use
481 the encoding interface type to determine the
482 protocol running. Therefore, the TURE ENCODING
483 option will set the interface type back to the
484 original value.
485
486 NOTE: If this option is used with Frame Relay and CHDLC
487 libpcap library support will be broken.
488 i.e. tcpdump will not work.
489 Fix for 2.2.x Kernels only.
490
491 o Ethernet Bridgind over Frame Relay
492 - The Frame Relay bridging has been developed by
493 Kristian Hoffmann and Mark Wells.
494 - The Linux kernel bridge is used to send ethernet
495 data over the frame relay links.
496 For 2.2.X Kernels only.
497
498 o Added extensive 2.0.X support. Most new features of
499 2.1.5 for protocols Frame Relay, PPP and CHDLC are
500 supported under 2.0.X kernels.
501
502beta1-2.2.0 Dec 30 2000
503 o Updated drivers for 2.4.X kernels.
504 o Updated drivers for SMP support.
505 o X25API is now able to share PCI interrupts.
506 o Took out a general polling routine that was used
507 only by X25API.
508 o Added appropriate locks to the dynamic reconfiguration
509 code.
510 o Fixed a bug in the keyboard debug monitor.
511
512beta2-2.2.0 Jan 8 2001
513 o Patches for 2.4.0 kernel
514 o Patches for 2.2.18 kernel
515 o Minor updates to PPP and CHLDC drivers.
516 Note: No functional difference.
517
518beta3-2.2.9 Jan 10 2001
519 o I missed the 2.2.18 kernel patches in beta2-2.2.0
520 release. They are included in this release.
521
522Stable Release
5232.2.0 Feb 01 2001
524 o Bug fix in wancfg GUI configurator.
525 The edit function didn't work properly.
526
527
528bata1-2.2.1 Feb 09 2001
529 o WANPIPE TTY Driver emulation.
530 Two modes of operation Sync and Async.
531 Sync: Using the PPPD daemon, kernel SyncPPP layer
532 and the Wanpipe sync TTY driver: a PPP protocol
533 connection can be established via Sangoma adapter, over
534 a T1 leased line.
535
536 The 2.4.0 kernel PPP layer supports MULTILINK
537 protocol, that can be used to bundle any number of Sangoma
538 adapters (T1 lines) into one, under a single IP address.
539 Thus, efficiently obtaining multiple T1 throughput.
540
541 NOTE: The remote side must also implement MULTILINK PPP
542 protocol.
543
544 Async:Using the PPPD daemon, kernel AsyncPPP layer
545 and the WANPIPE async TTY driver: a PPP protocol
546 connection can be established via Sangoma adapter and
547 a modem, over a telephone line.
548
549 Thus, the WANPIPE async TTY driver simulates a serial
550 TTY driver that would normally be used to interface the
551 MODEM to the linux kernel.
552
553 o WANPIPE PPP Backup Utility
554 This utility will monitor the state of the PPP T1 line.
555 In case of failure, a dial up connection will be established
556 via pppd daemon, ether via a serial tty driver (serial port),
557 or a WANPIPE async TTY driver (in case serial port is unavailable).
558
559 Furthermore, while in dial up mode, the primary PPP T1 link
560 will be monitored for signs of life.
561
562 If the PPP T1 link comes back to life, the dial up connection
563 will be shutdown and T1 line re-established.
564
565
566 o New Setup installation script.
567 Option to UPGRADE device drivers if the kernel source has
568 already been patched with WANPIPE.
569
570 Option to COMPILE WANPIPE modules against the currently
571 running kernel, thus no need for manual kernel and module
572 re-compilation.
573
574 o Updates and Bug Fixes to wancfg utility.
575
576bata2-2.2.1 Feb 20 2001
577
578 o Bug fixes to the CHDLC device drivers.
579 The driver had compilation problems under kernels
580 2.2.14 or lower.
581
582 o Bug fixes to the Setup installation script.
583 The device drivers compilation options didn't work
584 properly.
585
586 o Update to the wpbackupd daemon.
587 Optimized the cross-over times, between the primary
588 link and the backup dialup.
589
590beta3-2.2.1 Mar 02 2001
591 o Patches for 2.4.2 kernel.
592
593 o Bug fixes to util/ make files.
594 o Bug fixes to the Setup installation script.
595
596 o Took out the backupd support and made it into
597 as separate package.
598
599beta4-2.2.1 Mar 12 2001
600
601 o Fix to the Frame Relay Device driver.
602 IPSAC sends a packet of zero length
603 header to the frame relay driver. The
604 driver tries to push its own 2 byte header
605 into the packet, which causes the driver to
606 crash.
607
608 o Fix the WANPIPE re-configuration code.
609 Bug was found by trying to run the cfgft1 while the
610 interface was already running.
611
612 o Updates to cfgft1.
613 Writes a wanpipe#.cfgft1 configuration file
614 once the CSU/DSU is configured. This file can
615 holds the current CSU/DSU configuration.
616
617
618
619>>>>>> END OF README <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
620
621
diff --git a/Documentation/nmi_watchdog.txt b/Documentation/nmi_watchdog.txt
index c025a4561c10..757c729ee42e 100644
--- a/Documentation/nmi_watchdog.txt
+++ b/Documentation/nmi_watchdog.txt
@@ -23,8 +23,7 @@ kernel debugging options, such as Kernel Stack Meter or Kernel Tracer,
23may implicitly disable the NMI watchdog.] 23may implicitly disable the NMI watchdog.]
24 24
25For x86-64, the needed APIC is always compiled in, and the NMI watchdog is 25For x86-64, the needed APIC is always compiled in, and the NMI watchdog is
26always enabled with I/O-APIC mode (nmi_watchdog=1). Currently, local APIC 26always enabled with I/O-APIC mode (nmi_watchdog=1).
27mode (nmi_watchdog=2) does not work on x86-64.
28 27
29Using local APIC (nmi_watchdog=2) needs the first performance register, so 28Using local APIC (nmi_watchdog=2) needs the first performance register, so
30you can't use it for other purposes (such as high precision performance 29you can't use it for other purposes (such as high precision performance
diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt
index 7f60dfe642ca..b152e81da592 100644
--- a/Documentation/oops-tracing.txt
+++ b/Documentation/oops-tracing.txt
@@ -253,6 +253,10 @@ characters, each representing a particular tainted value.
253 253
254 8: 'D' if the kernel has died recently, i.e. there was an OOPS or BUG. 254 8: 'D' if the kernel has died recently, i.e. there was an OOPS or BUG.
255 255
256 9: 'A' if the ACPI table has been overridden.
257
258 10: 'W' if a warning has previously been issued by the kernel.
259
256The primary reason for the 'Tainted: ' string is to tell kernel 260The primary reason for the 'Tainted: ' string is to tell kernel
257debuggers if this is a clean kernel or if anything unusual has 261debuggers if this is a clean kernel or if anything unusual has
258occurred. Tainting is permanent: even if an offending module is 262occurred. Tainting is permanent: even if an offending module is
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 461e4f1dbec4..421e7d00ffd0 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -196,6 +196,11 @@ its parent; and can't be removed or suspended after that parent.
196 196
197The policy is that the device tree should match hardware bus topology. 197The policy is that the device tree should match hardware bus topology.
198(Or at least the control bus, for devices which use multiple busses.) 198(Or at least the control bus, for devices which use multiple busses.)
199In particular, this means that a device registration may fail if the parent of
200the device is suspending (ie. has been chosen by the PM core as the next
201device to suspend) or has already suspended, as well as after all of the other
202devices have been suspended. Device drivers must be prepared to cope with such
203situations.
199 204
200 205
201Suspending Devices 206Suspending Devices
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index 7b4e8a70882c..1d2a772506cf 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -59,12 +59,39 @@ Table of Contents
59 p) Freescale Synchronous Serial Interface 59 p) Freescale Synchronous Serial Interface
60 q) USB EHCI controllers 60 q) USB EHCI controllers
61 61
62 VII - Specifying interrupt information for devices 62 VII - Marvell Discovery mv64[345]6x System Controller chips
63 1) The /system-controller node
64 2) Child nodes of /system-controller
65 a) Marvell Discovery MDIO bus
66 b) Marvell Discovery ethernet controller
67 c) Marvell Discovery PHY nodes
68 d) Marvell Discovery SDMA nodes
69 e) Marvell Discovery BRG nodes
70 f) Marvell Discovery CUNIT nodes
71 g) Marvell Discovery MPSCROUTING nodes
72 h) Marvell Discovery MPSCINTR nodes
73 i) Marvell Discovery MPSC nodes
74 j) Marvell Discovery Watch Dog Timer nodes
75 k) Marvell Discovery I2C nodes
76 l) Marvell Discovery PIC (Programmable Interrupt Controller) nodes
77 m) Marvell Discovery MPP (Multipurpose Pins) multiplexing nodes
78 n) Marvell Discovery GPP (General Purpose Pins) nodes
79 o) Marvell Discovery PCI host bridge node
80 p) Marvell Discovery CPU Error nodes
81 q) Marvell Discovery SRAM Controller nodes
82 r) Marvell Discovery PCI Error Handler nodes
83 s) Marvell Discovery Memory Controller nodes
84
85 VIII - Specifying interrupt information for devices
63 1) interrupts property 86 1) interrupts property
64 2) interrupt-parent property 87 2) interrupt-parent property
65 3) OpenPIC Interrupt Controllers 88 3) OpenPIC Interrupt Controllers
66 4) ISA Interrupt Controllers 89 4) ISA Interrupt Controllers
67 90
91 VIII - Specifying GPIO information for devices
92 1) gpios property
93 2) gpio-controller nodes
94
68 Appendix A - Sample SOC node for MPC8540 95 Appendix A - Sample SOC node for MPC8540
69 96
70 97
@@ -1269,10 +1296,6 @@ platforms are moved over to use the flattened-device-tree model.
1269 1296
1270 Recommended properties: 1297 Recommended properties:
1271 1298
1272 - linux,network-index : This is the intended "index" of this
1273 network device. This is used by the bootwrapper to interpret
1274 MAC addresses passed by the firmware when no information other
1275 than indices is available to associate an address with a device.
1276 - phy-connection-type : a string naming the controller/PHY interface type, 1299 - phy-connection-type : a string naming the controller/PHY interface type,
1277 i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id", "sgmii", 1300 i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id", "sgmii",
1278 "tbi", or "rtbi". This property is only really needed if the connection 1301 "tbi", or "rtbi". This property is only really needed if the connection
@@ -1622,8 +1645,7 @@ platforms are moved over to use the flattened-device-tree model.
1622 - device_type : should be "network", "hldc", "uart", "transparent" 1645 - device_type : should be "network", "hldc", "uart", "transparent"
1623 "bisync", "atm", or "serial". 1646 "bisync", "atm", or "serial".
1624 - compatible : could be "ucc_geth" or "fsl_atm" and so on. 1647 - compatible : could be "ucc_geth" or "fsl_atm" and so on.
1625 - model : should be "UCC". 1648 - cell-index : the ucc number(1-8), corresponding to UCCx in UM.
1626 - device-id : the ucc number(1-8), corresponding to UCCx in UM.
1627 - reg : Offset and length of the register set for the device 1649 - reg : Offset and length of the register set for the device
1628 - interrupts : <a b> where a is the interrupt number and b is a 1650 - interrupts : <a b> where a is the interrupt number and b is a
1629 field that represents an encoding of the sense and level 1651 field that represents an encoding of the sense and level
@@ -1667,10 +1689,6 @@ platforms are moved over to use the flattened-device-tree model.
1667 - phy-handle : The phandle for the PHY connected to this controller. 1689 - phy-handle : The phandle for the PHY connected to this controller.
1668 1690
1669 Recommended properties: 1691 Recommended properties:
1670 - linux,network-index : This is the intended "index" of this
1671 network device. This is used by the bootwrapper to interpret
1672 MAC addresses passed by the firmware when no information other
1673 than indices is available to associate an address with a device.
1674 - phy-connection-type : a string naming the controller/PHY interface type, 1692 - phy-connection-type : a string naming the controller/PHY interface type,
1675 i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id" (Internal 1693 i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id" (Internal
1676 Delay), "rgmii-txid" (delay on TX only), "rgmii-rxid" (delay on RX only), 1694 Delay), "rgmii-txid" (delay on TX only), "rgmii-rxid" (delay on RX only),
@@ -1680,8 +1698,7 @@ platforms are moved over to use the flattened-device-tree model.
1680 ucc@2000 { 1698 ucc@2000 {
1681 device_type = "network"; 1699 device_type = "network";
1682 compatible = "ucc_geth"; 1700 compatible = "ucc_geth";
1683 model = "UCC"; 1701 cell-index = <1>;
1684 device-id = <1>;
1685 reg = <2000 200>; 1702 reg = <2000 200>;
1686 interrupts = <a0 0>; 1703 interrupts = <a0 0>;
1687 interrupt-parent = <700>; 1704 interrupt-parent = <700>;
@@ -1995,7 +2012,6 @@ platforms are moved over to use the flattened-device-tree model.
1995 interrupts = <20 8>; 2012 interrupts = <20 8>;
1996 interrupt-parent = <&PIC>; 2013 interrupt-parent = <&PIC>;
1997 phy-handle = <&PHY0>; 2014 phy-handle = <&PHY0>;
1998 linux,network-index = <0>;
1999 fsl,cpm-command = <12000300>; 2015 fsl,cpm-command = <12000300>;
2000 }; 2016 };
2001 2017
@@ -2217,12 +2233,6 @@ platforms are moved over to use the flattened-device-tree model.
2217 EMAC, that is the content of the current (bogus) "phy-port" 2233 EMAC, that is the content of the current (bogus) "phy-port"
2218 property. 2234 property.
2219 2235
2220 Recommended properties:
2221 - linux,network-index : This is the intended "index" of this
2222 network device. This is used by the bootwrapper to interpret
2223 MAC addresses passed by the firmware when no information other
2224 than indices is available to associate an address with a device.
2225
2226 Optional properties: 2236 Optional properties:
2227 - phy-address : 1 cell, optional, MDIO address of the PHY. If absent, 2237 - phy-address : 1 cell, optional, MDIO address of the PHY. If absent,
2228 a search is performed. 2238 a search is performed.
@@ -2246,7 +2256,6 @@ platforms are moved over to use the flattened-device-tree model.
2246 Example: 2256 Example:
2247 2257
2248 EMAC0: ethernet@40000800 { 2258 EMAC0: ethernet@40000800 {
2249 linux,network-index = <0>;
2250 device_type = "network"; 2259 device_type = "network";
2251 compatible = "ibm,emac-440gp", "ibm,emac"; 2260 compatible = "ibm,emac-440gp", "ibm,emac";
2252 interrupt-parent = <&UIC1>; 2261 interrupt-parent = <&UIC1>;
@@ -2592,6 +2601,17 @@ platforms are moved over to use the flattened-device-tree model.
2592 differ between different families. May be 2601 differ between different families. May be
2593 'virtex2p', 'virtex4', or 'virtex5'. 2602 'virtex2p', 'virtex4', or 'virtex5'.
2594 2603
2604 vi) Xilinx Uart 16550
2605
2606 Xilinx UART 16550 devices are very similar to the NS16550 but with
2607 different register spacing and an offset from the base address.
2608
2609 Requred properties:
2610 - clock-frequency : Frequency of the clock input
2611 - reg-offset : A value of 3 is required
2612 - reg-shift : A value of 2 is required
2613
2614
2595 p) Freescale Synchronous Serial Interface 2615 p) Freescale Synchronous Serial Interface
2596 2616
2597 The SSI is a serial device that communicates with audio codecs. It can 2617 The SSI is a serial device that communicates with audio codecs. It can
@@ -2816,10 +2836,562 @@ platforms are moved over to use the flattened-device-tree model.
2816 big-endian; 2836 big-endian;
2817 }; 2837 };
2818 2838
2839 r) Freescale Display Interface Unit
2840
2841 The Freescale DIU is a LCD controller, with proper hardware, it can also
2842 drive DVI monitors.
2843
2844 Required properties:
2845 - compatible : should be "fsl-diu".
2846 - reg : should contain at least address and length of the DIU register
2847 set.
2848 - Interrupts : one DIU interrupt should be describe here.
2849
2850 Example (MPC8610HPCD)
2851 display@2c000 {
2852 compatible = "fsl,diu";
2853 reg = <0x2c000 100>;
2854 interrupts = <72 2>;
2855 interrupt-parent = <&mpic>;
2856 };
2857
2858 s) Freescale on board FPGA
2859
2860 This is the memory-mapped registers for on board FPGA.
2861
2862 Required properities:
2863 - compatible : should be "fsl,fpga-pixis".
2864 - reg : should contain the address and the lenght of the FPPGA register
2865 set.
2866
2867 Example (MPC8610HPCD)
2868 board-control@e8000000 {
2869 compatible = "fsl,fpga-pixis";
2870 reg = <0xe8000000 32>;
2871 };
2872
2873VII - Marvell Discovery mv64[345]6x System Controller chips
2874===========================================================
2875
2876The Marvell mv64[345]60 series of system controller chips contain
2877many of the peripherals needed to implement a complete computer
2878system. In this section, we define device tree nodes to describe
2879the system controller chip itself and each of the peripherals
2880which it contains. Compatible string values for each node are
2881prefixed with the string "marvell,", for Marvell Technology Group Ltd.
2882
28831) The /system-controller node
2884
2885 This node is used to represent the system-controller and must be
2886 present when the system uses a system contller chip. The top-level
2887 system-controller node contains information that is global to all
2888 devices within the system controller chip. The node name begins
2889 with "system-controller" followed by the unit address, which is
2890 the base address of the memory-mapped register set for the system
2891 controller chip.
2892
2893 Required properties:
2894
2895 - ranges : Describes the translation of system controller addresses
2896 for memory mapped registers.
2897 - clock-frequency: Contains the main clock frequency for the system
2898 controller chip.
2899 - reg : This property defines the address and size of the
2900 memory-mapped registers contained within the system controller
2901 chip. The address specified in the "reg" property should match
2902 the unit address of the system-controller node.
2903 - #address-cells : Address representation for system controller
2904 devices. This field represents the number of cells needed to
2905 represent the address of the memory-mapped registers of devices
2906 within the system controller chip.
2907 - #size-cells : Size representation for for the memory-mapped
2908 registers within the system controller chip.
2909 - #interrupt-cells : Defines the width of cells used to represent
2910 interrupts.
2911
2912 Optional properties:
2913
2914 - model : The specific model of the system controller chip. Such
2915 as, "mv64360", "mv64460", or "mv64560".
2916 - compatible : A string identifying the compatibility identifiers
2917 of the system controller chip.
2918
2919 The system-controller node contains child nodes for each system
2920 controller device that the platform uses. Nodes should not be created
2921 for devices which exist on the system controller chip but are not used
2922
2923 Example Marvell Discovery mv64360 system-controller node:
2924
2925 system-controller@f1000000 { /* Marvell Discovery mv64360 */
2926 #address-cells = <1>;
2927 #size-cells = <1>;
2928 model = "mv64360"; /* Default */
2929 compatible = "marvell,mv64360";
2930 clock-frequency = <133333333>;
2931 reg = <0xf1000000 0x10000>;
2932 virtual-reg = <0xf1000000>;
2933 ranges = <0x88000000 0x88000000 0x1000000 /* PCI 0 I/O Space */
2934 0x80000000 0x80000000 0x8000000 /* PCI 0 MEM Space */
2935 0xa0000000 0xa0000000 0x4000000 /* User FLASH */
2936 0x00000000 0xf1000000 0x0010000 /* Bridge's regs */
2937 0xf2000000 0xf2000000 0x0040000>;/* Integrated SRAM */
2938
2939 [ child node definitions... ]
2940 }
2941
29422) Child nodes of /system-controller
2943
2944 a) Marvell Discovery MDIO bus
2945
2946 The MDIO is a bus to which the PHY devices are connected. For each
2947 device that exists on this bus, a child node should be created. See
2948 the definition of the PHY node below for an example of how to define
2949 a PHY.
2950
2951 Required properties:
2952 - #address-cells : Should be <1>
2953 - #size-cells : Should be <0>
2954 - device_type : Should be "mdio"
2955 - compatible : Should be "marvell,mv64360-mdio"
2956
2957 Example:
2958
2959 mdio {
2960 #address-cells = <1>;
2961 #size-cells = <0>;
2962 device_type = "mdio";
2963 compatible = "marvell,mv64360-mdio";
2964
2965 ethernet-phy@0 {
2966 ......
2967 };
2968 };
2969
2970
2971 b) Marvell Discovery ethernet controller
2972
2973 The Discover ethernet controller is described with two levels
2974 of nodes. The first level describes an ethernet silicon block
2975 and the second level describes up to 3 ethernet nodes within
2976 that block. The reason for the multiple levels is that the
2977 registers for the node are interleaved within a single set
2978 of registers. The "ethernet-block" level describes the
2979 shared register set, and the "ethernet" nodes describe ethernet
2980 port-specific properties.
2981
2982 Ethernet block node
2983
2984 Required properties:
2985 - #address-cells : <1>
2986 - #size-cells : <0>
2987 - compatible : "marvell,mv64360-eth-block"
2988 - reg : Offset and length of the register set for this block
2989
2990 Example Discovery Ethernet block node:
2991 ethernet-block@2000 {
2992 #address-cells = <1>;
2993 #size-cells = <0>;
2994 compatible = "marvell,mv64360-eth-block";
2995 reg = <0x2000 0x2000>;
2996 ethernet@0 {
2997 .......
2998 };
2999 };
3000
3001 Ethernet port node
3002
3003 Required properties:
3004 - device_type : Should be "network".
3005 - compatible : Should be "marvell,mv64360-eth".
3006 - reg : Should be <0>, <1>, or <2>, according to which registers
3007 within the silicon block the device uses.
3008 - interrupts : <a> where a is the interrupt number for the port.
3009 - interrupt-parent : the phandle for the interrupt controller
3010 that services interrupts for this device.
3011 - phy : the phandle for the PHY connected to this ethernet
3012 controller.
3013 - local-mac-address : 6 bytes, MAC address
3014
3015 Example Discovery Ethernet port node:
3016 ethernet@0 {
3017 device_type = "network";
3018 compatible = "marvell,mv64360-eth";
3019 reg = <0>;
3020 interrupts = <32>;
3021 interrupt-parent = <&PIC>;
3022 phy = <&PHY0>;
3023 local-mac-address = [ 00 00 00 00 00 00 ];
3024 };
3025
3026
3027
3028 c) Marvell Discovery PHY nodes
3029
3030 Required properties:
3031 - device_type : Should be "ethernet-phy"
3032 - interrupts : <a> where a is the interrupt number for this phy.
3033 - interrupt-parent : the phandle for the interrupt controller that
3034 services interrupts for this device.
3035 - reg : The ID number for the phy, usually a small integer
2819 3036
2820 More devices will be defined as this spec matures. 3037 Example Discovery PHY node:
3038 ethernet-phy@1 {
3039 device_type = "ethernet-phy";
3040 compatible = "broadcom,bcm5421";
3041 interrupts = <76>; /* GPP 12 */
3042 interrupt-parent = <&PIC>;
3043 reg = <1>;
3044 };
2821 3045
2822VII - Specifying interrupt information for devices 3046
3047 d) Marvell Discovery SDMA nodes
3048
3049 Represent DMA hardware associated with the MPSC (multiprotocol
3050 serial controllers).
3051
3052 Required properties:
3053 - compatible : "marvell,mv64360-sdma"
3054 - reg : Offset and length of the register set for this device
3055 - interrupts : <a> where a is the interrupt number for the DMA
3056 device.
3057 - interrupt-parent : the phandle for the interrupt controller
3058 that services interrupts for this device.
3059
3060 Example Discovery SDMA node:
3061 sdma@4000 {
3062 compatible = "marvell,mv64360-sdma";
3063 reg = <0x4000 0xc18>;
3064 virtual-reg = <0xf1004000>;
3065 interrupts = <36>;
3066 interrupt-parent = <&PIC>;
3067 };
3068
3069
3070 e) Marvell Discovery BRG nodes
3071
3072 Represent baud rate generator hardware associated with the MPSC
3073 (multiprotocol serial controllers).
3074
3075 Required properties:
3076 - compatible : "marvell,mv64360-brg"
3077 - reg : Offset and length of the register set for this device
3078 - clock-src : A value from 0 to 15 which selects the clock
3079 source for the baud rate generator. This value corresponds
3080 to the CLKS value in the BRGx configuration register. See
3081 the mv64x60 User's Manual.
3082 - clock-frequence : The frequency (in Hz) of the baud rate
3083 generator's input clock.
3084 - current-speed : The current speed setting (presumably by
3085 firmware) of the baud rate generator.
3086
3087 Example Discovery BRG node:
3088 brg@b200 {
3089 compatible = "marvell,mv64360-brg";
3090 reg = <0xb200 0x8>;
3091 clock-src = <8>;
3092 clock-frequency = <133333333>;
3093 current-speed = <9600>;
3094 };
3095
3096
3097 f) Marvell Discovery CUNIT nodes
3098
3099 Represent the Serial Communications Unit device hardware.
3100
3101 Required properties:
3102 - reg : Offset and length of the register set for this device
3103
3104 Example Discovery CUNIT node:
3105 cunit@f200 {
3106 reg = <0xf200 0x200>;
3107 };
3108
3109
3110 g) Marvell Discovery MPSCROUTING nodes
3111
3112 Represent the Discovery's MPSC routing hardware
3113
3114 Required properties:
3115 - reg : Offset and length of the register set for this device
3116
3117 Example Discovery CUNIT node:
3118 mpscrouting@b500 {
3119 reg = <0xb400 0xc>;
3120 };
3121
3122
3123 h) Marvell Discovery MPSCINTR nodes
3124
3125 Represent the Discovery's MPSC DMA interrupt hardware registers
3126 (SDMA cause and mask registers).
3127
3128 Required properties:
3129 - reg : Offset and length of the register set for this device
3130
3131 Example Discovery MPSCINTR node:
3132 mpsintr@b800 {
3133 reg = <0xb800 0x100>;
3134 };
3135
3136
3137 i) Marvell Discovery MPSC nodes
3138
3139 Represent the Discovery's MPSC (Multiprotocol Serial Controller)
3140 serial port.
3141
3142 Required properties:
3143 - device_type : "serial"
3144 - compatible : "marvell,mv64360-mpsc"
3145 - reg : Offset and length of the register set for this device
3146 - sdma : the phandle for the SDMA node used by this port
3147 - brg : the phandle for the BRG node used by this port
3148 - cunit : the phandle for the CUNIT node used by this port
3149 - mpscrouting : the phandle for the MPSCROUTING node used by this port
3150 - mpscintr : the phandle for the MPSCINTR node used by this port
3151 - cell-index : the hardware index of this cell in the MPSC core
3152 - max_idle : value needed for MPSC CHR3 (Maximum Frame Length)
3153 register
3154 - interrupts : <a> where a is the interrupt number for the MPSC.
3155 - interrupt-parent : the phandle for the interrupt controller
3156 that services interrupts for this device.
3157
3158 Example Discovery MPSCINTR node:
3159 mpsc@8000 {
3160 device_type = "serial";
3161 compatible = "marvell,mv64360-mpsc";
3162 reg = <0x8000 0x38>;
3163 virtual-reg = <0xf1008000>;
3164 sdma = <&SDMA0>;
3165 brg = <&BRG0>;
3166 cunit = <&CUNIT>;
3167 mpscrouting = <&MPSCROUTING>;
3168 mpscintr = <&MPSCINTR>;
3169 cell-index = <0>;
3170 max_idle = <40>;
3171 interrupts = <40>;
3172 interrupt-parent = <&PIC>;
3173 };
3174
3175
3176 j) Marvell Discovery Watch Dog Timer nodes
3177
3178 Represent the Discovery's watchdog timer hardware
3179
3180 Required properties:
3181 - compatible : "marvell,mv64360-wdt"
3182 - reg : Offset and length of the register set for this device
3183
3184 Example Discovery Watch Dog Timer node:
3185 wdt@b410 {
3186 compatible = "marvell,mv64360-wdt";
3187 reg = <0xb410 0x8>;
3188 };
3189
3190
3191 k) Marvell Discovery I2C nodes
3192
3193 Represent the Discovery's I2C hardware
3194
3195 Required properties:
3196 - device_type : "i2c"
3197 - compatible : "marvell,mv64360-i2c"
3198 - reg : Offset and length of the register set for this device
3199 - interrupts : <a> where a is the interrupt number for the I2C.
3200 - interrupt-parent : the phandle for the interrupt controller
3201 that services interrupts for this device.
3202
3203 Example Discovery I2C node:
3204 compatible = "marvell,mv64360-i2c";
3205 reg = <0xc000 0x20>;
3206 virtual-reg = <0xf100c000>;
3207 interrupts = <37>;
3208 interrupt-parent = <&PIC>;
3209 };
3210
3211
3212 l) Marvell Discovery PIC (Programmable Interrupt Controller) nodes
3213
3214 Represent the Discovery's PIC hardware
3215
3216 Required properties:
3217 - #interrupt-cells : <1>
3218 - #address-cells : <0>
3219 - compatible : "marvell,mv64360-pic"
3220 - reg : Offset and length of the register set for this device
3221 - interrupt-controller
3222
3223 Example Discovery PIC node:
3224 pic {
3225 #interrupt-cells = <1>;
3226 #address-cells = <0>;
3227 compatible = "marvell,mv64360-pic";
3228 reg = <0x0 0x88>;
3229 interrupt-controller;
3230 };
3231
3232
3233 m) Marvell Discovery MPP (Multipurpose Pins) multiplexing nodes
3234
3235 Represent the Discovery's MPP hardware
3236
3237 Required properties:
3238 - compatible : "marvell,mv64360-mpp"
3239 - reg : Offset and length of the register set for this device
3240
3241 Example Discovery MPP node:
3242 mpp@f000 {
3243 compatible = "marvell,mv64360-mpp";
3244 reg = <0xf000 0x10>;
3245 };
3246
3247
3248 n) Marvell Discovery GPP (General Purpose Pins) nodes
3249
3250 Represent the Discovery's GPP hardware
3251
3252 Required properties:
3253 - compatible : "marvell,mv64360-gpp"
3254 - reg : Offset and length of the register set for this device
3255
3256 Example Discovery GPP node:
3257 gpp@f000 {
3258 compatible = "marvell,mv64360-gpp";
3259 reg = <0xf100 0x20>;
3260 };
3261
3262
3263 o) Marvell Discovery PCI host bridge node
3264
3265 Represents the Discovery's PCI host bridge device. The properties
3266 for this node conform to Rev 2.1 of the PCI Bus Binding to IEEE
3267 1275-1994. A typical value for the compatible property is
3268 "marvell,mv64360-pci".
3269
3270 Example Discovery PCI host bridge node
3271 pci@80000000 {
3272 #address-cells = <3>;
3273 #size-cells = <2>;
3274 #interrupt-cells = <1>;
3275 device_type = "pci";
3276 compatible = "marvell,mv64360-pci";
3277 reg = <0xcf8 0x8>;
3278 ranges = <0x01000000 0x0 0x0
3279 0x88000000 0x0 0x01000000
3280 0x02000000 0x0 0x80000000
3281 0x80000000 0x0 0x08000000>;
3282 bus-range = <0 255>;
3283 clock-frequency = <66000000>;
3284 interrupt-parent = <&PIC>;
3285 interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
3286 interrupt-map = <
3287 /* IDSEL 0x0a */
3288 0x5000 0 0 1 &PIC 80
3289 0x5000 0 0 2 &PIC 81
3290 0x5000 0 0 3 &PIC 91
3291 0x5000 0 0 4 &PIC 93
3292
3293 /* IDSEL 0x0b */
3294 0x5800 0 0 1 &PIC 91
3295 0x5800 0 0 2 &PIC 93
3296 0x5800 0 0 3 &PIC 80
3297 0x5800 0 0 4 &PIC 81
3298
3299 /* IDSEL 0x0c */
3300 0x6000 0 0 1 &PIC 91
3301 0x6000 0 0 2 &PIC 93
3302 0x6000 0 0 3 &PIC 80
3303 0x6000 0 0 4 &PIC 81
3304
3305 /* IDSEL 0x0d */
3306 0x6800 0 0 1 &PIC 93
3307 0x6800 0 0 2 &PIC 80
3308 0x6800 0 0 3 &PIC 81
3309 0x6800 0 0 4 &PIC 91
3310 >;
3311 };
3312
3313
3314 p) Marvell Discovery CPU Error nodes
3315
3316 Represent the Discovery's CPU error handler device.
3317
3318 Required properties:
3319 - compatible : "marvell,mv64360-cpu-error"
3320 - reg : Offset and length of the register set for this device
3321 - interrupts : the interrupt number for this device
3322 - interrupt-parent : the phandle for the interrupt controller
3323 that services interrupts for this device.
3324
3325 Example Discovery CPU Error node:
3326 cpu-error@0070 {
3327 compatible = "marvell,mv64360-cpu-error";
3328 reg = <0x70 0x10 0x128 0x28>;
3329 interrupts = <3>;
3330 interrupt-parent = <&PIC>;
3331 };
3332
3333
3334 q) Marvell Discovery SRAM Controller nodes
3335
3336 Represent the Discovery's SRAM controller device.
3337
3338 Required properties:
3339 - compatible : "marvell,mv64360-sram-ctrl"
3340 - reg : Offset and length of the register set for this device
3341 - interrupts : the interrupt number for this device
3342 - interrupt-parent : the phandle for the interrupt controller
3343 that services interrupts for this device.
3344
3345 Example Discovery SRAM Controller node:
3346 sram-ctrl@0380 {
3347 compatible = "marvell,mv64360-sram-ctrl";
3348 reg = <0x380 0x80>;
3349 interrupts = <13>;
3350 interrupt-parent = <&PIC>;
3351 };
3352
3353
3354 r) Marvell Discovery PCI Error Handler nodes
3355
3356 Represent the Discovery's PCI error handler device.
3357
3358 Required properties:
3359 - compatible : "marvell,mv64360-pci-error"
3360 - reg : Offset and length of the register set for this device
3361 - interrupts : the interrupt number for this device
3362 - interrupt-parent : the phandle for the interrupt controller
3363 that services interrupts for this device.
3364
3365 Example Discovery PCI Error Handler node:
3366 pci-error@1d40 {
3367 compatible = "marvell,mv64360-pci-error";
3368 reg = <0x1d40 0x40 0xc28 0x4>;
3369 interrupts = <12>;
3370 interrupt-parent = <&PIC>;
3371 };
3372
3373
3374 s) Marvell Discovery Memory Controller nodes
3375
3376 Represent the Discovery's memory controller device.
3377
3378 Required properties:
3379 - compatible : "marvell,mv64360-mem-ctrl"
3380 - reg : Offset and length of the register set for this device
3381 - interrupts : the interrupt number for this device
3382 - interrupt-parent : the phandle for the interrupt controller
3383 that services interrupts for this device.
3384
3385 Example Discovery Memory Controller node:
3386 mem-ctrl@1400 {
3387 compatible = "marvell,mv64360-mem-ctrl";
3388 reg = <0x1400 0x60>;
3389 interrupts = <17>;
3390 interrupt-parent = <&PIC>;
3391 };
3392
3393
3394VIII - Specifying interrupt information for devices
2823=================================================== 3395===================================================
2824 3396
2825The device tree represents the busses and devices of a hardware 3397The device tree represents the busses and devices of a hardware
@@ -2905,6 +3477,54 @@ encodings listed below:
2905 2 = high to low edge sensitive type enabled 3477 2 = high to low edge sensitive type enabled
2906 3 = low to high edge sensitive type enabled 3478 3 = low to high edge sensitive type enabled
2907 3479
3480VIII - Specifying GPIO information for devices
3481==============================================
3482
34831) gpios property
3484-----------------
3485
3486Nodes that makes use of GPIOs should define them using `gpios' property,
3487format of which is: <&gpio-controller1-phandle gpio1-specifier
3488 &gpio-controller2-phandle gpio2-specifier
3489 0 /* holes are permitted, means no GPIO 3 */
3490 &gpio-controller4-phandle gpio4-specifier
3491 ...>;
3492
3493Note that gpio-specifier length is controller dependent.
3494
3495gpio-specifier may encode: bank, pin position inside the bank,
3496whether pin is open-drain and whether pin is logically inverted.
3497
3498Example of the node using GPIOs:
3499
3500 node {
3501 gpios = <&qe_pio_e 18 0>;
3502 };
3503
3504In this example gpio-specifier is "18 0" and encodes GPIO pin number,
3505and empty GPIO flags as accepted by the "qe_pio_e" gpio-controller.
3506
35072) gpio-controller nodes
3508------------------------
3509
3510Every GPIO controller node must have #gpio-cells property defined,
3511this information will be used to translate gpio-specifiers.
3512
3513Example of two SOC GPIO banks defined as gpio-controller nodes:
3514
3515 qe_pio_a: gpio-controller@1400 {
3516 #gpio-cells = <2>;
3517 compatible = "fsl,qe-pario-bank-a", "fsl,qe-pario-bank";
3518 reg = <0x1400 0x18>;
3519 gpio-controller;
3520 };
3521
3522 qe_pio_e: gpio-controller@1460 {
3523 #gpio-cells = <2>;
3524 compatible = "fsl,qe-pario-bank-e", "fsl,qe-pario-bank";
3525 reg = <0x1460 0x18>;
3526 gpio-controller;
3527 };
2908 3528
2909Appendix A - Sample SOC node for MPC8540 3529Appendix A - Sample SOC node for MPC8540
2910======================================== 3530========================================
diff --git a/Documentation/powerpc/kvm_440.txt b/Documentation/powerpc/kvm_440.txt
new file mode 100644
index 000000000000..c02a003fa03a
--- /dev/null
+++ b/Documentation/powerpc/kvm_440.txt
@@ -0,0 +1,41 @@
1Hollis Blanchard <hollisb@us.ibm.com>
215 Apr 2008
3
4Various notes on the implementation of KVM for PowerPC 440:
5
6To enforce isolation, host userspace, guest kernel, and guest userspace all
7run at user privilege level. Only the host kernel runs in supervisor mode.
8Executing privileged instructions in the guest traps into KVM (in the host
9kernel), where we decode and emulate them. Through this technique, unmodified
10440 Linux kernels can be run (slowly) as guests. Future performance work will
11focus on reducing the overhead and frequency of these traps.
12
13The usual code flow is started from userspace invoking an "run" ioctl, which
14causes KVM to switch into guest context. We use IVPR to hijack the host
15interrupt vectors while running the guest, which allows us to direct all
16interrupts to kvmppc_handle_interrupt(). At this point, we could either
17- handle the interrupt completely (e.g. emulate "mtspr SPRG0"), or
18- let the host interrupt handler run (e.g. when the decrementer fires), or
19- return to host userspace (e.g. when the guest performs device MMIO)
20
21Address spaces: We take advantage of the fact that Linux doesn't use the AS=1
22address space (in host or guest), which gives us virtual address space to use
23for guest mappings. While the guest is running, the host kernel remains mapped
24in AS=0, but the guest can only use AS=1 mappings.
25
26TLB entries: The TLB entries covering the host linear mapping remain
27present while running the guest. This reduces the overhead of lightweight
28exits, which are handled by KVM running in the host kernel. We keep three
29copies of the TLB:
30 - guest TLB: contents of the TLB as the guest sees it
31 - shadow TLB: the TLB that is actually in hardware while guest is running
32 - host TLB: to restore TLB state when context switching guest -> host
33When a TLB miss occurs because a mapping was not present in the shadow TLB,
34but was present in the guest TLB, KVM handles the fault without invoking the
35guest. Large guest pages are backed by multiple 4KB shadow pages through this
36mechanism.
37
38IO: MMIO and DCR accesses are emulated by userspace. We use virtio for network
39and block IO, so those drivers must be enabled in the guest. It's possible
40that some qemu device emulation (e.g. e1000 or rtl8139) may also work with
41little effort.
diff --git a/Documentation/powerpc/mpc52xx-device-tree-bindings.txt b/Documentation/powerpc/mpc52xx-device-tree-bindings.txt
index 5e03610e186f..6f12f1c79c0c 100644
--- a/Documentation/powerpc/mpc52xx-device-tree-bindings.txt
+++ b/Documentation/powerpc/mpc52xx-device-tree-bindings.txt
@@ -186,6 +186,12 @@ Recommended soc5200 child nodes; populate as needed for your board
186name device_type compatible Description 186name device_type compatible Description
187---- ----------- ---------- ----------- 187---- ----------- ---------- -----------
188gpt@<addr> gpt fsl,mpc5200-gpt General purpose timers 188gpt@<addr> gpt fsl,mpc5200-gpt General purpose timers
189gpt@<addr> gpt fsl,mpc5200-gpt-gpio General purpose
190 timers in GPIO mode
191gpio@<addr> fsl,mpc5200-gpio MPC5200 simple gpio
192 controller
193gpio@<addr> fsl,mpc5200-gpio-wkup MPC5200 wakeup gpio
194 controller
189rtc@<addr> rtc mpc5200-rtc Real time clock 195rtc@<addr> rtc mpc5200-rtc Real time clock
190mscan@<addr> mscan mpc5200-mscan CAN bus controller 196mscan@<addr> mscan mpc5200-mscan CAN bus controller
191pci@<addr> pci mpc5200-pci PCI bridge 197pci@<addr> pci mpc5200-pci PCI bridge
@@ -225,6 +231,23 @@ PSC in i2s mode: The mpc5200 and mpc5200b PSCs are not compatible when in
225i2s mode. An 'mpc5200b-psc-i2s' node cannot include 'mpc5200-psc-i2s' in the 231i2s mode. An 'mpc5200b-psc-i2s' node cannot include 'mpc5200-psc-i2s' in the
226compatible field. 232compatible field.
227 233
2347) GPIO controller nodes
235Each GPIO controller node should have the empty property gpio-controller and
236#gpio-cells set to 2. First cell is the GPIO number which is interpreted
237according to the bit numbers in the GPIO control registers. The second cell
238is for flags which is currently unsused.
239
2408) FEC nodes
241The FEC node can specify one of the following properties to configure
242the MII link:
243"fsl,7-wire-mode" - An empty property that specifies the link uses 7-wire
244 mode instead of MII
245"current-speed" - Specifies that the MII should be configured for a fixed
246 speed. This property should contain two cells. The
247 first cell specifies the speed in Mbps and the second
248 should be '0' for half duplex and '1' for full duplex
249"phy-handle" - Contains a phandle to an Ethernet PHY.
250
228IV - Extra Notes 251IV - Extra Notes
229================ 252================
230 253
diff --git a/Documentation/powerpc/phyp-assisted-dump.txt b/Documentation/powerpc/phyp-assisted-dump.txt
new file mode 100644
index 000000000000..c4682b982a2e
--- /dev/null
+++ b/Documentation/powerpc/phyp-assisted-dump.txt
@@ -0,0 +1,127 @@
1
2 Hypervisor-Assisted Dump
3 ------------------------
4 November 2007
5
6The goal of hypervisor-assisted dump is to enable the dump of
7a crashed system, and to do so from a fully-reset system, and
8to minimize the total elapsed time until the system is back
9in production use.
10
11As compared to kdump or other strategies, hypervisor-assisted
12dump offers several strong, practical advantages:
13
14-- Unlike kdump, the system has been reset, and loaded
15 with a fresh copy of the kernel. In particular,
16 PCI and I/O devices have been reinitialized and are
17 in a clean, consistent state.
18-- As the dump is performed, the dumped memory becomes
19 immediately available to the system for normal use.
20-- After the dump is completed, no further reboots are
21 required; the system will be fully usable, and running
22 in it's normal, production mode on it normal kernel.
23
24The above can only be accomplished by coordination with,
25and assistance from the hypervisor. The procedure is
26as follows:
27
28-- When a system crashes, the hypervisor will save
29 the low 256MB of RAM to a previously registered
30 save region. It will also save system state, system
31 registers, and hardware PTE's.
32
33-- After the low 256MB area has been saved, the
34 hypervisor will reset PCI and other hardware state.
35 It will *not* clear RAM. It will then launch the
36 bootloader, as normal.
37
38-- The freshly booted kernel will notice that there
39 is a new node (ibm,dump-kernel) in the device tree,
40 indicating that there is crash data available from
41 a previous boot. It will boot into only 256MB of RAM,
42 reserving the rest of system memory.
43
44-- Userspace tools will parse /sys/kernel/release_region
45 and read /proc/vmcore to obtain the contents of memory,
46 which holds the previous crashed kernel. The userspace
47 tools may copy this info to disk, or network, nas, san,
48 iscsi, etc. as desired.
49
50 For Example: the values in /sys/kernel/release-region
51 would look something like this (address-range pairs).
52 CPU:0x177fee000-0x10000: HPTE:0x177ffe020-0x1000: /
53 DUMP:0x177fff020-0x10000000, 0x10000000-0x16F1D370A
54
55-- As the userspace tools complete saving a portion of
56 dump, they echo an offset and size to
57 /sys/kernel/release_region to release the reserved
58 memory back to general use.
59
60 An example of this is:
61 "echo 0x40000000 0x10000000 > /sys/kernel/release_region"
62 which will release 256MB at the 1GB boundary.
63
64Please note that the hypervisor-assisted dump feature
65is only available on Power6-based systems with recent
66firmware versions.
67
68Implementation details:
69----------------------
70
71During boot, a check is made to see if firmware supports
72this feature on this particular machine. If it does, then
73we check to see if a active dump is waiting for us. If yes
74then everything but 256 MB of RAM is reserved during early
75boot. This area is released once we collect a dump from user
76land scripts that are run. If there is dump data, then
77the /sys/kernel/release_region file is created, and
78the reserved memory is held.
79
80If there is no waiting dump data, then only the highest
81256MB of the ram is reserved as a scratch area. This area
82is *not* released: this region will be kept permanently
83reserved, so that it can act as a receptacle for a copy
84of the low 256MB in the case a crash does occur. See,
85however, "open issues" below, as to whether
86such a reserved region is really needed.
87
88Currently the dump will be copied from /proc/vmcore to a
89a new file upon user intervention. The starting address
90to be read and the range for each data point in provided
91in /sys/kernel/release_region.
92
93The tools to examine the dump will be same as the ones
94used for kdump.
95
96General notes:
97--------------
98Security: please note that there are potential security issues
99with any sort of dump mechanism. In particular, plaintext
100(unencrypted) data, and possibly passwords, may be present in
101the dump data. Userspace tools must take adequate precautions to
102preserve security.
103
104Open issues/ToDo:
105------------
106 o The various code paths that tell the hypervisor that a crash
107 occurred, vs. it simply being a normal reboot, should be
108 reviewed, and possibly clarified/fixed.
109
110 o Instead of using /sys/kernel, should there be a /sys/dump
111 instead? There is a dump_subsys being created by the s390 code,
112 perhaps the pseries code should use a similar layout as well.
113
114 o Is reserving a 256MB region really required? The goal of
115 reserving a 256MB scratch area is to make sure that no
116 important crash data is clobbered when the hypervisor
117 save low mem to the scratch area. But, if one could assure
118 that nothing important is located in some 256MB area, then
119 it would not need to be reserved. Something that can be
120 improved in subsequent versions.
121
122 o Still working the kdump team to integrate this with kdump,
123 some work remains but this would not affect the current
124 patches.
125
126 o Still need to write a shell script, to copy the dump away.
127 Currently I am parsing it manually.
diff --git a/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c b/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c
new file mode 100644
index 000000000000..f8e8e95e81fd
--- /dev/null
+++ b/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c
@@ -0,0 +1,96 @@
1/*
2 * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
3 *
4 * Tests if the control register is updated correctly
5 * at context switches
6 *
7 * Warning: this test will cause a very high load for a few seconds
8 *
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <unistd.h>
14#include <signal.h>
15#include <inttypes.h>
16#include <wait.h>
17
18
19#include <sys/prctl.h>
20#include <linux/prctl.h>
21
22/* Get/set the process' ability to use the timestamp counter instruction */
23#ifndef PR_GET_TSC
24#define PR_GET_TSC 25
25#define PR_SET_TSC 26
26# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
27# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
28#endif
29
30uint64_t rdtsc() {
31uint32_t lo, hi;
32/* We cannot use "=A", since this would use %rax on x86_64 */
33__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
34return (uint64_t)hi << 32 | lo;
35}
36
37void sigsegv_expect(int sig)
38{
39 /* */
40}
41
42void segvtask(void)
43{
44 if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
45 {
46 perror("prctl");
47 exit(0);
48 }
49 signal(SIGSEGV, sigsegv_expect);
50 alarm(10);
51 rdtsc();
52 fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
53 exit(0);
54}
55
56
57void sigsegv_fail(int sig)
58{
59 fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
60 exit(0);
61}
62
63void rdtsctask(void)
64{
65 if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
66 {
67 perror("prctl");
68 exit(0);
69 }
70 signal(SIGSEGV, sigsegv_fail);
71 alarm(10);
72 for(;;) rdtsc();
73}
74
75
76int main(int argc, char **argv)
77{
78 int n_tasks = 100, i;
79
80 fprintf(stderr, "[No further output means we're allright]\n");
81
82 for (i=0; i<n_tasks; i++)
83 if (fork() == 0)
84 {
85 if (i & 1)
86 segvtask();
87 else
88 rdtsctask();
89 }
90
91 for (i=0; i<n_tasks; i++)
92 wait(NULL);
93
94 exit(0);
95}
96
diff --git a/Documentation/prctl/disable-tsc-on-off-stress-test.c b/Documentation/prctl/disable-tsc-on-off-stress-test.c
new file mode 100644
index 000000000000..1fcd91445375
--- /dev/null
+++ b/Documentation/prctl/disable-tsc-on-off-stress-test.c
@@ -0,0 +1,95 @@
1/*
2 * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
3 *
4 * Tests if the control register is updated correctly
5 * when set with prctl()
6 *
7 * Warning: this test will cause a very high load for a few seconds
8 *
9 */
10
11#include <stdio.h>
12#include <stdlib.h>
13#include <unistd.h>
14#include <signal.h>
15#include <inttypes.h>
16#include <wait.h>
17
18
19#include <sys/prctl.h>
20#include <linux/prctl.h>
21
22/* Get/set the process' ability to use the timestamp counter instruction */
23#ifndef PR_GET_TSC
24#define PR_GET_TSC 25
25#define PR_SET_TSC 26
26# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
27# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
28#endif
29
30/* snippet from wikipedia :-) */
31
32uint64_t rdtsc() {
33uint32_t lo, hi;
34/* We cannot use "=A", since this would use %rax on x86_64 */
35__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
36return (uint64_t)hi << 32 | lo;
37}
38
39int should_segv = 0;
40
41void sigsegv_cb(int sig)
42{
43 if (!should_segv)
44 {
45 fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
46 exit(0);
47 }
48 if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
49 {
50 perror("prctl");
51 exit(0);
52 }
53 should_segv = 0;
54
55 rdtsc();
56}
57
58void task(void)
59{
60 signal(SIGSEGV, sigsegv_cb);
61 alarm(10);
62 for(;;)
63 {
64 rdtsc();
65 if (should_segv)
66 {
67 fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
68 exit(0);
69 }
70 if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
71 {
72 perror("prctl");
73 exit(0);
74 }
75 should_segv = 1;
76 }
77}
78
79
80int main(int argc, char **argv)
81{
82 int n_tasks = 100, i;
83
84 fprintf(stderr, "[No further output means we're allright]\n");
85
86 for (i=0; i<n_tasks; i++)
87 if (fork() == 0)
88 task();
89
90 for (i=0; i<n_tasks; i++)
91 wait(NULL);
92
93 exit(0);
94}
95
diff --git a/Documentation/prctl/disable-tsc-test.c b/Documentation/prctl/disable-tsc-test.c
new file mode 100644
index 000000000000..843c81eac235
--- /dev/null
+++ b/Documentation/prctl/disable-tsc-test.c
@@ -0,0 +1,94 @@
1/*
2 * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
3 *
4 * Basic test to test behaviour of PR_GET_TSC and PR_SET_TSC
5 */
6
7#include <stdio.h>
8#include <stdlib.h>
9#include <unistd.h>
10#include <signal.h>
11#include <inttypes.h>
12
13
14#include <sys/prctl.h>
15#include <linux/prctl.h>
16
17/* Get/set the process' ability to use the timestamp counter instruction */
18#ifndef PR_GET_TSC
19#define PR_GET_TSC 25
20#define PR_SET_TSC 26
21# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
22# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
23#endif
24
25const char *tsc_names[] =
26{
27 [0] = "[not set]",
28 [PR_TSC_ENABLE] = "PR_TSC_ENABLE",
29 [PR_TSC_SIGSEGV] = "PR_TSC_SIGSEGV",
30};
31
32uint64_t rdtsc() {
33uint32_t lo, hi;
34/* We cannot use "=A", since this would use %rax on x86_64 */
35__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
36return (uint64_t)hi << 32 | lo;
37}
38
39void sigsegv_cb(int sig)
40{
41 int tsc_val = 0;
42
43 printf("[ SIG_SEGV ]\n");
44 printf("prctl(PR_GET_TSC, &tsc_val); ");
45 fflush(stdout);
46
47 if ( prctl(PR_GET_TSC, &tsc_val) == -1)
48 perror("prctl");
49
50 printf("tsc_val == %s\n", tsc_names[tsc_val]);
51 printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
52 fflush(stdout);
53 if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
54 perror("prctl");
55
56 printf("rdtsc() == ");
57}
58
59int main(int argc, char **argv)
60{
61 int tsc_val = 0;
62
63 signal(SIGSEGV, sigsegv_cb);
64
65 printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
66 printf("prctl(PR_GET_TSC, &tsc_val); ");
67 fflush(stdout);
68
69 if ( prctl(PR_GET_TSC, &tsc_val) == -1)
70 perror("prctl");
71
72 printf("tsc_val == %s\n", tsc_names[tsc_val]);
73 printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
74 printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
75 fflush(stdout);
76
77 if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
78 perror("prctl");
79
80 printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
81 printf("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)\n");
82 fflush(stdout);
83
84 if ( prctl(PR_SET_TSC, PR_TSC_SIGSEGV) == -1)
85 perror("prctl");
86
87 printf("rdtsc() == ");
88 fflush(stdout);
89 printf("%llu\n", (unsigned long long)rdtsc());
90 fflush(stdout);
91
92 exit(EXIT_SUCCESS);
93}
94
diff --git a/Documentation/s390/CommonIO b/Documentation/s390/CommonIO
index 8fbc0a852870..bf0baa19ec24 100644
--- a/Documentation/s390/CommonIO
+++ b/Documentation/s390/CommonIO
@@ -8,17 +8,6 @@ Command line parameters
8 8
9 Enable logging of debug information in case of ccw device timeouts. 9 Enable logging of debug information in case of ccw device timeouts.
10 10
11
12* cio_msg = yes | no
13
14 Determines whether information on found devices and sensed device
15 characteristics should be shown during startup or when new devices are
16 found, i. e. messages of the types "Detected device 0.0.4711 on subchannel
17 0.0.0042" and "SenseID: Device 0.0.4711 reports: ...".
18
19 Default is off.
20
21
22* cio_ignore = {all} | 11* cio_ignore = {all} |
23 {<device> | <range of devices>} | 12 {<device> | <range of devices>} |
24 {!<device> | !<range of devices>} 13 {!<device> | !<range of devices>}
diff --git a/Documentation/s390/kvm.txt b/Documentation/s390/kvm.txt
new file mode 100644
index 000000000000..6f5ceb0f09fc
--- /dev/null
+++ b/Documentation/s390/kvm.txt
@@ -0,0 +1,125 @@
1*** BIG FAT WARNING ***
2The kvm module is currently in EXPERIMENTAL state for s390. This means that
3the interface to the module is not yet considered to remain stable. Thus, be
4prepared that we keep breaking your userspace application and guest
5compatibility over and over again until we feel happy with the result. Make sure
6your guest kernel, your host kernel, and your userspace launcher are in a
7consistent state.
8
9This Documentation describes the unique ioctl calls to /dev/kvm, the resulting
10kvm-vm file descriptors, and the kvm-vcpu file descriptors that differ from x86.
11
121. ioctl calls to /dev/kvm
13KVM does support the following ioctls on s390 that are common with other
14architectures and do behave the same:
15KVM_GET_API_VERSION
16KVM_CREATE_VM (*) see note
17KVM_CHECK_EXTENSION
18KVM_GET_VCPU_MMAP_SIZE
19
20Notes:
21* KVM_CREATE_VM may fail on s390, if the calling process has multiple
22threads and has not called KVM_S390_ENABLE_SIE before.
23
24In addition, on s390 the following architecture specific ioctls are supported:
25ioctl: KVM_S390_ENABLE_SIE
26args: none
27see also: include/linux/kvm.h
28This call causes the kernel to switch on PGSTE in the user page table. This
29operation is needed in order to run a virtual machine, and it requires the
30calling process to be single-threaded. Note that the first call to KVM_CREATE_VM
31will implicitly try to switch on PGSTE if the user process has not called
32KVM_S390_ENABLE_SIE before. User processes that want to launch multiple threads
33before creating a virtual machine have to call KVM_S390_ENABLE_SIE, or will
34observe an error calling KVM_CREATE_VM. Switching on PGSTE is a one-time
35operation, is not reversible, and will persist over the entire lifetime of
36the calling process. It does not have any user-visible effect other than a small
37performance penalty.
38
392. ioctl calls to the kvm-vm file descriptor
40KVM does support the following ioctls on s390 that are common with other
41architectures and do behave the same:
42KVM_CREATE_VCPU
43KVM_SET_USER_MEMORY_REGION (*) see note
44KVM_GET_DIRTY_LOG (**) see note
45
46Notes:
47* kvm does only allow exactly one memory slot on s390, which has to start
48 at guest absolute address zero and at a user address that is aligned on any
49 page boundary. This hardware "limitation" allows us to have a few unique
50 optimizations. The memory slot doesn't have to be filled
51 with memory actually, it may contain sparse holes. That said, with different
52 user memory layout this does still allow a large flexibility when
53 doing the guest memory setup.
54** KVM_GET_DIRTY_LOG doesn't work properly yet. The user will receive an empty
55log. This ioctl call is only needed for guest migration, and we intend to
56implement this one in the future.
57
58In addition, on s390 the following architecture specific ioctls for the kvm-vm
59file descriptor are supported:
60ioctl: KVM_S390_INTERRUPT
61args: struct kvm_s390_interrupt *
62see also: include/linux/kvm.h
63This ioctl is used to submit a floating interrupt for a virtual machine.
64Floating interrupts may be delivered to any virtual cpu in the configuration.
65Only some interrupt types defined in include/linux/kvm.h make sense when
66submitted as floating interrupts. The following interrupts are not considered
67to be useful as floating interrupts, and a call to inject them will result in
68-EINVAL error code: program interrupts and interprocessor signals. Valid
69floating interrupts are:
70KVM_S390_INT_VIRTIO
71KVM_S390_INT_SERVICE
72
733. ioctl calls to the kvm-vcpu file descriptor
74KVM does support the following ioctls on s390 that are common with other
75architectures and do behave the same:
76KVM_RUN
77KVM_GET_REGS
78KVM_SET_REGS
79KVM_GET_SREGS
80KVM_SET_SREGS
81KVM_GET_FPU
82KVM_SET_FPU
83
84In addition, on s390 the following architecture specific ioctls for the
85kvm-vcpu file descriptor are supported:
86ioctl: KVM_S390_INTERRUPT
87args: struct kvm_s390_interrupt *
88see also: include/linux/kvm.h
89This ioctl is used to submit an interrupt for a specific virtual cpu.
90Only some interrupt types defined in include/linux/kvm.h make sense when
91submitted for a specific cpu. The following interrupts are not considered
92to be useful, and a call to inject them will result in -EINVAL error code:
93service processor calls and virtio interrupts. Valid interrupt types are:
94KVM_S390_PROGRAM_INT
95KVM_S390_SIGP_STOP
96KVM_S390_RESTART
97KVM_S390_SIGP_SET_PREFIX
98KVM_S390_INT_EMERGENCY
99
100ioctl: KVM_S390_STORE_STATUS
101args: unsigned long
102see also: include/linux/kvm.h
103This ioctl stores the state of the cpu at the guest real address given as
104argument, unless one of the following values defined in include/linux/kvm.h
105is given as arguement:
106KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in
107absolute lowcore as defined by the principles of operation
108KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in
109its prefix page just like the dump tool that comes with zipl. This is useful
110to create a system dump for use with lkcdutils or crash.
111
112ioctl: KVM_S390_SET_INITIAL_PSW
113args: struct kvm_s390_psw *
114see also: include/linux/kvm.h
115This ioctl can be used to set the processor status word (psw) of a stopped cpu
116prior to running it with KVM_RUN. Note that this call is not required to modify
117the psw during sie intercepts that fall back to userspace because struct kvm_run
118does contain the psw, and this value is evaluated during reentry of KVM_RUN
119after the intercept exit was recognized.
120
121ioctl: KVM_S390_INITIAL_RESET
122args: none
123see also: include/linux/kvm.h
124This ioctl can be used to perform an initial cpu reset as defined by the
125principles of operation. The target cpu has to be in stopped state.
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt
index 0eb7c58916de..e05420973698 100644
--- a/Documentation/s390/s390dbf.txt
+++ b/Documentation/s390/s390dbf.txt
@@ -115,6 +115,27 @@ Return Value: Handle for generated debug area
115Description: Allocates memory for a debug log 115Description: Allocates memory for a debug log
116 Must not be called within an interrupt handler 116 Must not be called within an interrupt handler
117 117
118----------------------------------------------------------------------------
119debug_info_t *debug_register_mode(char *name, int pages, int nr_areas,
120 int buf_size, mode_t mode, uid_t uid,
121 gid_t gid);
122
123Parameter: name: Name of debug log (e.g. used for debugfs entry)
124 pages: Number of pages, which will be allocated per area
125 nr_areas: Number of debug areas
126 buf_size: Size of data area in each debug entry
127 mode: File mode for debugfs files. E.g. S_IRWXUGO
128 uid: User ID for debugfs files. Currently only 0 is
129 supported.
130 gid: Group ID for debugfs files. Currently only 0 is
131 supported.
132
133Return Value: Handle for generated debug area
134 NULL if register failed
135
136Description: Allocates memory for a debug log
137 Must not be called within an interrupt handler
138
118--------------------------------------------------------------------------- 139---------------------------------------------------------------------------
119void debug_unregister (debug_info_t * id); 140void debug_unregister (debug_info_t * id);
120 141
diff --git a/Documentation/sched-rt-group.txt b/Documentation/sched-rt-group.txt
deleted file mode 100644
index 1c6332f4543c..000000000000
--- a/Documentation/sched-rt-group.txt
+++ /dev/null
@@ -1,59 +0,0 @@
1
2
3Real-Time group scheduling.
4
5The problem space:
6
7In order to schedule multiple groups of realtime tasks each group must
8be assigned a fixed portion of the CPU time available. Without a minimum
9guarantee a realtime group can obviously fall short. A fuzzy upper limit
10is of no use since it cannot be relied upon. Which leaves us with just
11the single fixed portion.
12
13CPU time is divided by means of specifying how much time can be spent
14running in a given period. Say a frame fixed realtime renderer must
15deliver 25 frames a second, which yields a period of 0.04s. Now say
16it will also have to play some music and respond to input, leaving it
17with around 80% for the graphics. We can then give this group a runtime
18of 0.8 * 0.04s = 0.032s.
19
20This way the graphics group will have a 0.04s period with a 0.032s runtime
21limit.
22
23Now if the audio thread needs to refill the DMA buffer every 0.005s, but
24needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s
25= 0.00015s.
26
27
28The Interface:
29
30system wide:
31
32/proc/sys/kernel/sched_rt_period_ms
33/proc/sys/kernel/sched_rt_runtime_us
34
35CONFIG_FAIR_USER_SCHED
36
37/sys/kernel/uids/<uid>/cpu_rt_runtime_us
38
39or
40
41CONFIG_FAIR_CGROUP_SCHED
42
43/cgroup/<cgroup>/cpu.rt_runtime_us
44
45[ time is specified in us because the interface is s32; this gives an
46 operating range of ~35m to 1us ]
47
48The period takes values in [ 1, INT_MAX ], runtime in [ -1, INT_MAX - 1 ].
49
50A runtime of -1 specifies runtime == period, ie. no limit.
51
52New groups get the period from /proc/sys/kernel/sched_rt_period_us and
53a runtime of 0.
54
55Settings are constrained to:
56
57 \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period
58
59in order to keep the configuration schedulable.
diff --git a/Documentation/scheduler/00-INDEX b/Documentation/scheduler/00-INDEX
index b5f5ca069b2d..fc234d093fbf 100644
--- a/Documentation/scheduler/00-INDEX
+++ b/Documentation/scheduler/00-INDEX
@@ -12,5 +12,7 @@ sched-domains.txt
12 - information on scheduling domains. 12 - information on scheduling domains.
13sched-nice-design.txt 13sched-nice-design.txt
14 - How and why the scheduler's nice levels are implemented. 14 - How and why the scheduler's nice levels are implemented.
15sched-rt-group.txt
16 - real-time group scheduling.
15sched-stats.txt 17sched-stats.txt
16 - information on schedstats (Linux Scheduler Statistics). 18 - information on schedstats (Linux Scheduler Statistics).
diff --git a/Documentation/scheduler/sched-design.txt b/Documentation/scheduler/sched-design.txt
deleted file mode 100644
index 1605bf0cba8b..000000000000
--- a/Documentation/scheduler/sched-design.txt
+++ /dev/null
@@ -1,165 +0,0 @@
1 Goals, Design and Implementation of the
2 new ultra-scalable O(1) scheduler
3
4
5 This is an edited version of an email Ingo Molnar sent to
6 lkml on 4 Jan 2002. It describes the goals, design, and
7 implementation of Ingo's new ultra-scalable O(1) scheduler.
8 Last Updated: 18 April 2002.
9
10
11Goal
12====
13
14The main goal of the new scheduler is to keep all the good things we know
15and love about the current Linux scheduler:
16
17 - good interactive performance even during high load: if the user
18 types or clicks then the system must react instantly and must execute
19 the user tasks smoothly, even during considerable background load.
20
21 - good scheduling/wakeup performance with 1-2 runnable processes.
22
23 - fairness: no process should stay without any timeslice for any
24 unreasonable amount of time. No process should get an unjustly high
25 amount of CPU time.
26
27 - priorities: less important tasks can be started with lower priority,
28 more important tasks with higher priority.
29
30 - SMP efficiency: no CPU should stay idle if there is work to do.
31
32 - SMP affinity: processes which run on one CPU should stay affine to
33 that CPU. Processes should not bounce between CPUs too frequently.
34
35 - plus additional scheduler features: RT scheduling, CPU binding.
36
37and the goal is also to add a few new things:
38
39 - fully O(1) scheduling. Are you tired of the recalculation loop
40 blowing the L1 cache away every now and then? Do you think the goodness
41 loop is taking a bit too long to finish if there are lots of runnable
42 processes? This new scheduler takes no prisoners: wakeup(), schedule(),
43 the timer interrupt are all O(1) algorithms. There is no recalculation
44 loop. There is no goodness loop either.
45
46 - 'perfect' SMP scalability. With the new scheduler there is no 'big'
47 runqueue_lock anymore - it's all per-CPU runqueues and locks - two
48 tasks on two separate CPUs can wake up, schedule and context-switch
49 completely in parallel, without any interlocking. All
50 scheduling-relevant data is structured for maximum scalability.
51
52 - better SMP affinity. The old scheduler has a particular weakness that
53 causes the random bouncing of tasks between CPUs if/when higher
54 priority/interactive tasks, this was observed and reported by many
55 people. The reason is that the timeslice recalculation loop first needs
56 every currently running task to consume its timeslice. But when this
57 happens on eg. an 8-way system, then this property starves an
58 increasing number of CPUs from executing any process. Once the last
59 task that has a timeslice left has finished using up that timeslice,
60 the recalculation loop is triggered and other CPUs can start executing
61 tasks again - after having idled around for a number of timer ticks.
62 The more CPUs, the worse this effect.
63
64 Furthermore, this same effect causes the bouncing effect as well:
65 whenever there is such a 'timeslice squeeze' of the global runqueue,
66 idle processors start executing tasks which are not affine to that CPU.
67 (because the affine tasks have finished off their timeslices already.)
68
69 The new scheduler solves this problem by distributing timeslices on a
70 per-CPU basis, without having any global synchronization or
71 recalculation.
72
73 - batch scheduling. A significant proportion of computing-intensive tasks
74 benefit from batch-scheduling, where timeslices are long and processes
75 are roundrobin scheduled. The new scheduler does such batch-scheduling
76 of the lowest priority tasks - so nice +19 jobs will get
77 'batch-scheduled' automatically. With this scheduler, nice +19 jobs are
78 in essence SCHED_IDLE, from an interactiveness point of view.
79
80 - handle extreme loads more smoothly, without breakdown and scheduling
81 storms.
82
83 - O(1) RT scheduling. For those RT folks who are paranoid about the
84 O(nr_running) property of the goodness loop and the recalculation loop.
85
86 - run fork()ed children before the parent. Andrea has pointed out the
87 advantages of this a few months ago, but patches for this feature
88 do not work with the old scheduler as well as they should,
89 because idle processes often steal the new child before the fork()ing
90 CPU gets to execute it.
91
92
93Design
94======
95
96The core of the new scheduler contains the following mechanisms:
97
98 - *two* priority-ordered 'priority arrays' per CPU. There is an 'active'
99 array and an 'expired' array. The active array contains all tasks that
100 are affine to this CPU and have timeslices left. The expired array
101 contains all tasks which have used up their timeslices - but this array
102 is kept sorted as well. The active and expired array is not accessed
103 directly, it's accessed through two pointers in the per-CPU runqueue
104 structure. If all active tasks are used up then we 'switch' the two
105 pointers and from now on the ready-to-go (former-) expired array is the
106 active array - and the empty active array serves as the new collector
107 for expired tasks.
108
109 - there is a 64-bit bitmap cache for array indices. Finding the highest
110 priority task is thus a matter of two x86 BSFL bit-search instructions.
111
112the split-array solution enables us to have an arbitrary number of active
113and expired tasks, and the recalculation of timeslices can be done
114immediately when the timeslice expires. Because the arrays are always
115access through the pointers in the runqueue, switching the two arrays can
116be done very quickly.
117
118this is a hybride priority-list approach coupled with roundrobin
119scheduling and the array-switch method of distributing timeslices.
120
121 - there is a per-task 'load estimator'.
122
123one of the toughest things to get right is good interactive feel during
124heavy system load. While playing with various scheduler variants i found
125that the best interactive feel is achieved not by 'boosting' interactive
126tasks, but by 'punishing' tasks that want to use more CPU time than there
127is available. This method is also much easier to do in an O(1) fashion.
128
129to establish the actual 'load' the task contributes to the system, a
130complex-looking but pretty accurate method is used: there is a 4-entry
131'history' ringbuffer of the task's activities during the last 4 seconds.
132This ringbuffer is operated without much overhead. The entries tell the
133scheduler a pretty accurate load-history of the task: has it used up more
134CPU time or less during the past N seconds. [the size '4' and the interval
135of 4x 1 seconds was found by lots of experimentation - this part is
136flexible and can be changed in both directions.]
137
138the penalty a task gets for generating more load than the CPU can handle
139is a priority decrease - there is a maximum amount to this penalty
140relative to their static priority, so even fully CPU-bound tasks will
141observe each other's priorities, and will share the CPU accordingly.
142
143the SMP load-balancer can be extended/switched with additional parallel
144computing and cache hierarchy concepts: NUMA scheduling, multi-core CPUs
145can be supported easily by changing the load-balancer. Right now it's
146tuned for my SMP systems.
147
148i skipped the prev->mm == next->mm advantage - no workload i know of shows
149any sensitivity to this. It can be added back by sacrificing O(1)
150schedule() [the current and one-lower priority list can be searched for a
151that->mm == current->mm condition], but costs a fair number of cycles
152during a number of important workloads, so i wanted to avoid this as much
153as possible.
154
155- the SMP idle-task startup code was still racy and the new scheduler
156triggered this. So i streamlined the idle-setup code a bit. We do not call
157into schedule() before all processors have started up fully and all idle
158threads are in place.
159
160- the patch also cleans up a number of aspects of sched.c - moves code
161into other areas of the kernel where it's appropriate, and simplifies
162certain code paths and data constructs. As a result, the new scheduler's
163code is smaller than the old one.
164
165 Ingo
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
new file mode 100644
index 000000000000..14f901f639ee
--- /dev/null
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -0,0 +1,177 @@
1 Real-Time group scheduling
2 --------------------------
3
4CONTENTS
5========
6
71. Overview
8 1.1 The problem
9 1.2 The solution
102. The interface
11 2.1 System-wide settings
12 2.2 Default behaviour
13 2.3 Basis for grouping tasks
143. Future plans
15
16
171. Overview
18===========
19
20
211.1 The problem
22---------------
23
24Realtime scheduling is all about determinism, a group has to be able to rely on
25the amount of bandwidth (eg. CPU time) being constant. In order to schedule
26multiple groups of realtime tasks, each group must be assigned a fixed portion
27of the CPU time available. Without a minimum guarantee a realtime group can
28obviously fall short. A fuzzy upper limit is of no use since it cannot be
29relied upon. Which leaves us with just the single fixed portion.
30
311.2 The solution
32----------------
33
34CPU time is divided by means of specifying how much time can be spent running
35in a given period. We allocate this "run time" for each realtime group which
36the other realtime groups will not be permitted to use.
37
38Any time not allocated to a realtime group will be used to run normal priority
39tasks (SCHED_OTHER). Any allocated run time not used will also be picked up by
40SCHED_OTHER.
41
42Let's consider an example: a frame fixed realtime renderer must deliver 25
43frames a second, which yields a period of 0.04s per frame. Now say it will also
44have to play some music and respond to input, leaving it with around 80% CPU
45time dedicated for the graphics. We can then give this group a run time of 0.8
46* 0.04s = 0.032s.
47
48This way the graphics group will have a 0.04s period with a 0.032s run time
49limit. Now if the audio thread needs to refill the DMA buffer every 0.005s, but
50needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s =
510.00015s. So this group can be scheduled with a period of 0.005s and a run time
52of 0.00015s.
53
54The remaining CPU time will be used for user input and other tass. Because
55realtime tasks have explicitly allocated the CPU time they need to perform
56their tasks, buffer underruns in the graphocs or audio can be eliminated.
57
58NOTE: the above example is not fully implemented as of yet (2.6.25). We still
59lack an EDF scheduler to make non-uniform periods usable.
60
61
622. The Interface
63================
64
65
662.1 System wide settings
67------------------------
68
69The system wide settings are configured under the /proc virtual file system:
70
71/proc/sys/kernel/sched_rt_period_us:
72 The scheduling period that is equivalent to 100% CPU bandwidth
73
74/proc/sys/kernel/sched_rt_runtime_us:
75 A global limit on how much time realtime scheduling may use. Even without
76 CONFIG_RT_GROUP_SCHED enabled, this will limit time reserved to realtime
77 processes. With CONFIG_RT_GROUP_SCHED it signifies the total bandwidth
78 available to all realtime groups.
79
80 * Time is specified in us because the interface is s32. This gives an
81 operating range from 1us to about 35 minutes.
82 * sched_rt_period_us takes values from 1 to INT_MAX.
83 * sched_rt_runtime_us takes values from -1 to (INT_MAX - 1).
84 * A run time of -1 specifies runtime == period, ie. no limit.
85
86
872.2 Default behaviour
88---------------------
89
90The default values for sched_rt_period_us (1000000 or 1s) and
91sched_rt_runtime_us (950000 or 0.95s). This gives 0.05s to be used by
92SCHED_OTHER (non-RT tasks). These defaults were chosen so that a run-away
93realtime tasks will not lock up the machine but leave a little time to recover
94it. By setting runtime to -1 you'd get the old behaviour back.
95
96By default all bandwidth is assigned to the root group and new groups get the
97period from /proc/sys/kernel/sched_rt_period_us and a run time of 0. If you
98want to assign bandwidth to another group, reduce the root group's bandwidth
99and assign some or all of the difference to another group.
100
101Realtime group scheduling means you have to assign a portion of total CPU
102bandwidth to the group before it will accept realtime tasks. Therefore you will
103not be able to run realtime tasks as any user other than root until you have
104done that, even if the user has the rights to run processes with realtime
105priority!
106
107
1082.3 Basis for grouping tasks
109----------------------------
110
111There are two compile-time settings for allocating CPU bandwidth. These are
112configured using the "Basis for grouping tasks" multiple choice menu under
113General setup > Group CPU Scheduler:
114
115a. CONFIG_USER_SCHED (aka "Basis for grouping tasks" = "user id")
116
117This lets you use the virtual files under
118"/sys/kernel/uids/<uid>/cpu_rt_runtime_us" to control he CPU time reserved for
119each user .
120
121The other option is:
122
123.o CONFIG_CGROUP_SCHED (aka "Basis for grouping tasks" = "Control groups")
124
125This uses the /cgroup virtual file system and "/cgroup/<cgroup>/cpu.rt_runtime_us"
126to control the CPU time reserved for each control group instead.
127
128For more information on working with control groups, you should read
129Documentation/cgroups.txt as well.
130
131Group settings are checked against the following limits in order to keep the configuration
132schedulable:
133
134 \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period
135
136For now, this can be simplified to just the following (but see Future plans):
137
138 \Sum_{i} runtime_{i} <= global_runtime
139
140
1413. Future plans
142===============
143
144There is work in progress to make the scheduling period for each group
145("/sys/kernel/uids/<uid>/cpu_rt_period_us" or
146"/cgroup/<cgroup>/cpu.rt_period_us" respectively) configurable as well.
147
148The constraint on the period is that a subgroup must have a smaller or
149equal period to its parent. But realistically its not very useful _yet_
150as its prone to starvation without deadline scheduling.
151
152Consider two sibling groups A and B; both have 50% bandwidth, but A's
153period is twice the length of B's.
154
155* group A: period=100000us, runtime=10000us
156 - this runs for 0.01s once every 0.1s
157
158* group B: period= 50000us, runtime=10000us
159 - this runs for 0.01s twice every 0.1s (or once every 0.05 sec).
160
161This means that currently a while (1) loop in A will run for the full period of
162B and can starve B's tasks (assuming they are of lower priority) for a whole
163period.
164
165The next project will be SCHED_EDF (Earliest Deadline First scheduling) to bring
166full deadline scheduling to the linux kernel. Deadline scheduling the above
167groups and treating end of the period as a deadline will ensure that they both
168get their allocated time.
169
170Implementing SCHED_EDF might take a while to complete. Priority Inheritance is
171the biggest challenge as the current linux PI infrastructure is geared towards
172the limited static priority levels 0-139. With deadline scheduling you need to
173do deadline inheritance (since priority is inversely proportional to the
174deadline delta (deadline - now).
175
176This means the whole PI machinery will have to be reworked - and that is one of
177the most complex pieces of code we have.
diff --git a/Documentation/scsi/ChangeLog.megaraid_sas b/Documentation/scsi/ChangeLog.megaraid_sas
index 91c81db0ba71..716fcc1cafb5 100644
--- a/Documentation/scsi/ChangeLog.megaraid_sas
+++ b/Documentation/scsi/ChangeLog.megaraid_sas
@@ -1,3 +1,25 @@
11 Release Date : Mon. March 10 11:02:31 PDT 2008 -
2 (emaild-id:megaraidlinux@lsi.com)
3 Sumant Patro
4 Bo Yang
5
62 Current Version : 00.00.03.20-RC1
73 Older Version : 00.00.03.16
8
91. Rollback the sense info implementation
10 Sense buffer ptr data type in the ioctl path is reverted back
11 to u32 * as in previous versions of driver.
12
132. Fixed the driver frame count.
14 When Driver sent wrong frame count to firmware. As this
15 particular command is sent to drive, FW is seeing continuous
16 chip resets and so the command will timeout.
17
183. Add the new controller(1078DE) support to the driver
19 and Increase the max_wait to 60 from 10 in the controller
20 operational status. With this max_wait increase, driver will
21 make sure the FW will finish the pending cmd for KDUMP case.
22
11 Release Date : Thur. Nov. 07 16:30:43 PST 2007 - 231 Release Date : Thur. Nov. 07 16:30:43 PST 2007 -
2 (emaild-id:megaraidlinux@lsi.com) 24 (emaild-id:megaraidlinux@lsi.com)
3 Sumant Patro 25 Sumant Patro
diff --git a/Documentation/scsi/st.txt b/Documentation/scsi/st.txt
index b7be95b5bd24..40752602c050 100644
--- a/Documentation/scsi/st.txt
+++ b/Documentation/scsi/st.txt
@@ -2,7 +2,7 @@ This file contains brief information about the SCSI tape driver.
2The driver is currently maintained by Kai Mäkisara (email 2The driver is currently maintained by Kai Mäkisara (email
3Kai.Makisara@kolumbus.fi) 3Kai.Makisara@kolumbus.fi)
4 4
5Last modified: Mon Mar 7 21:14:44 2005 by kai.makisara 5Last modified: Sun Feb 24 21:59:07 2008 by kai.makisara
6 6
7 7
8BASICS 8BASICS
@@ -133,6 +133,11 @@ the defaults set by the user. The value -1 means the default is not set. The
133file 'dev' contains the device numbers corresponding to this device. The links 133file 'dev' contains the device numbers corresponding to this device. The links
134'device' and 'driver' point to the SCSI device and driver entries. 134'device' and 'driver' point to the SCSI device and driver entries.
135 135
136Each directory also contains the entry 'options' which shows the currently
137enabled driver and mode options. The value in the file is a bit mask where the
138bit definitions are the same as those used with MTSETDRVBUFFER in setting the
139options.
140
136A link named 'tape' is made from the SCSI device directory to the class 141A link named 'tape' is made from the SCSI device directory to the class
137directory corresponding to the mode 0 auto-rewind device (e.g., st0). 142directory corresponding to the mode 0 auto-rewind device (e.g., st0).
138 143
@@ -372,6 +377,11 @@ MTSETDRVBUFFER
372 MT_ST_SYSV sets the SYSV semantics (mode) 377 MT_ST_SYSV sets the SYSV semantics (mode)
373 MT_ST_NOWAIT enables immediate mode (i.e., don't wait for 378 MT_ST_NOWAIT enables immediate mode (i.e., don't wait for
374 the command to finish) for some commands (e.g., rewind) 379 the command to finish) for some commands (e.g., rewind)
380 MT_ST_SILI enables setting the SILI bit in SCSI commands when
381 reading in variable block mode to enhance performance when
382 reading blocks shorter than the byte count; set this only
383 if you are sure that the drive supports SILI and the HBA
384 correctly returns transfer residuals
375 MT_ST_DEBUGGING debugging (global; debugging must be 385 MT_ST_DEBUGGING debugging (global; debugging must be
376 compiled into the driver) 386 compiled into the driver)
377 MT_ST_SETBOOLEANS 387 MT_ST_SETBOOLEANS
diff --git a/Documentation/smart-config.txt b/Documentation/smart-config.txt
deleted file mode 100644
index 8467447b5a87..000000000000
--- a/Documentation/smart-config.txt
+++ /dev/null
@@ -1,98 +0,0 @@
1Smart CONFIG_* Dependencies
21 August 1999
3
4Michael Chastain <mec@shout.net>
5Werner Almesberger <almesber@lrc.di.epfl.ch>
6Martin von Loewis <martin@mira.isdn.cs.tu-berlin.de>
7
8Here is the problem:
9
10 Suppose that drivers/net/foo.c has the following lines:
11
12 #include <linux/config.h>
13
14 ...
15
16 #ifdef CONFIG_FOO_AUTOFROB
17 /* Code for auto-frobbing */
18 #else
19 /* Manual frobbing only */
20 #endif
21
22 ...
23
24 #ifdef CONFIG_FOO_MODEL_TWO
25 /* Code for model two */
26 #endif
27
28 Now suppose the user (the person building kernels) reconfigures the
29 kernel to change some unrelated setting. This will regenerate the
30 file include/linux/autoconf.h, which will cause include/linux/config.h
31 to be out of date, which will cause drivers/net/foo.c to be recompiled.
32
33 Most kernel sources, perhaps 80% of them, have at least one CONFIG_*
34 dependency somewhere. So changing _any_ CONFIG_* setting requires
35 almost _all_ of the kernel to be recompiled.
36
37Here is the solution:
38
39 We've made the dependency generator, mkdep.c, smarter. Instead of
40 generating this dependency:
41
42 drivers/net/foo.c: include/linux/config.h
43
44 It now generates these dependencies:
45
46 drivers/net/foo.c: \
47 include/config/foo/autofrob.h \
48 include/config/foo/model/two.h
49
50 So drivers/net/foo.c depends only on the CONFIG_* lines that
51 it actually uses.
52
53 A new program, split-include.c, runs at the beginning of
54 compilation (make bzImage or make zImage). split-include reads
55 include/linux/autoconf.h and updates the include/config/ tree,
56 writing one file per option. It updates only the files for options
57 that have changed.
58
59Flag Dependencies
60
61 Martin Von Loewis contributed another feature to this patch:
62 'flag dependencies'. The idea is that a .o file depends on
63 the compilation flags used to build it. The file foo.o has
64 its flags stored in .flags.foo.o.
65
66 Suppose the user changes the foo driver from resident to modular.
67 'make' will notice that the current foo.o was not compiled with
68 -DMODULE and will recompile foo.c.
69
70 All .o files made from C source have flag dependencies. So do .o
71 files made with ld, and .a files made with ar. However, .o files
72 made from assembly source do not have flag dependencies (nobody
73 needs this yet, but it would be good to fix).
74
75Per-source-file Flags
76
77 Flag dependencies also work with per-source-file flags.
78 You can specify compilation flags for individual source files
79 like this:
80
81 CFLAGS_foo.o = -DSPECIAL_FOO_DEFINE
82
83 This helps clean up drivers/net/Makefile, drivers/scsi/Makefile,
84 and several other Makefiles.
85
86Credit
87
88 Werner Almesberger had the original idea and wrote the first
89 version of this patch.
90
91 Michael Chastain picked it up and continued development. He is
92 now the principal author and maintainer. Please report any bugs
93 to him.
94
95 Martin von Loewis wrote flag dependencies, with some modifications
96 by Michael Chastain.
97
98 Thanks to all of the beta testers.
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index e985cf5e0410..0bbee38acd26 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -284,6 +284,13 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
284 control correctly. If you have problems regarding this, try 284 control correctly. If you have problems regarding this, try
285 another ALSA compliant mixer (alsamixer works). 285 another ALSA compliant mixer (alsamixer works).
286 286
287 Module snd-aw2
288 --------------
289
290 Module for Audiowerk2 sound card
291
292 This module supports multiple cards.
293
287 Module snd-azt2320 294 Module snd-azt2320
288 ------------------ 295 ------------------
289 296
@@ -788,6 +795,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
788 lg-lw LG LW20/LW25 laptop 795 lg-lw LG LW20/LW25 laptop
789 tcl TCL S700 796 tcl TCL S700
790 clevo Clevo laptops (m520G, m665n) 797 clevo Clevo laptops (m520G, m665n)
798 medion Medion Rim 2150
791 test for testing/debugging purpose, almost all controls can be 799 test for testing/debugging purpose, almost all controls can be
792 adjusted. Appearing only when compiled with 800 adjusted. Appearing only when compiled with
793 $CONFIG_SND_DEBUG=y 801 $CONFIG_SND_DEBUG=y
@@ -818,19 +826,25 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
818 hippo_1 Hippo (Benq) with jack detection 826 hippo_1 Hippo (Benq) with jack detection
819 sony-assamd Sony ASSAMD 827 sony-assamd Sony ASSAMD
820 ultra Samsung Q1 Ultra Vista model 828 ultra Samsung Q1 Ultra Vista model
829 lenovo-3000 Lenovo 3000 y410
821 basic fixed pin assignment w/o SPDIF 830 basic fixed pin assignment w/o SPDIF
822 auto auto-config reading BIOS (default) 831 auto auto-config reading BIOS (default)
823 832
824 ALC268 833 ALC267/268
834 quanta-il1 Quanta IL1 mini-notebook
825 3stack 3-stack model 835 3stack 3-stack model
826 toshiba Toshiba A205 836 toshiba Toshiba A205
827 acer Acer laptops 837 acer Acer laptops
828 dell Dell OEM laptops (Vostro 1200) 838 dell Dell OEM laptops (Vostro 1200)
839 zepto Zepto laptops
829 test for testing/debugging purpose, almost all controls can 840 test for testing/debugging purpose, almost all controls can
830 adjusted. Appearing only when compiled with 841 adjusted. Appearing only when compiled with
831 $CONFIG_SND_DEBUG=y 842 $CONFIG_SND_DEBUG=y
832 auto auto-config reading BIOS (default) 843 auto auto-config reading BIOS (default)
833 844
845 ALC269
846 basic Basic preset
847
834 ALC662 848 ALC662
835 3stack-dig 3-stack (2-channel) with SPDIF 849 3stack-dig 3-stack (2-channel) with SPDIF
836 3stack-6ch 3-stack (6-channel) 850 3stack-6ch 3-stack (6-channel)
@@ -871,10 +885,11 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
871 lenovo-nb0763 Lenovo NB0763 885 lenovo-nb0763 Lenovo NB0763
872 lenovo-ms7195-dig Lenovo MS7195 886 lenovo-ms7195-dig Lenovo MS7195
873 haier-w66 Haier W66 887 haier-w66 Haier W66
874 6stack-hp HP machines with 6stack (Nettle boards)
875 3stack-hp HP machines with 3stack (Lucknow, Samba boards) 888 3stack-hp HP machines with 3stack (Lucknow, Samba boards)
876 6stack-dell Dell machines with 6stack (Inspiron 530) 889 6stack-dell Dell machines with 6stack (Inspiron 530)
877 mitac Mitac 8252D 890 mitac Mitac 8252D
891 clevo-m720 Clevo M720 laptop series
892 fujitsu-pi2515 Fujitsu AMILO Pi2515
878 auto auto-config reading BIOS (default) 893 auto auto-config reading BIOS (default)
879 894
880 ALC861/660 895 ALC861/660
@@ -911,6 +926,12 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
911 3stack 3-stack mode (default) 926 3stack 3-stack mode (default)
912 6stack 6-stack mode 927 6stack 6-stack mode
913 928
929 AD1884A / AD1883 / AD1984A / AD1984B
930 desktop 3-stack desktop (default)
931 laptop laptop with HP jack sensing
932 mobile mobile devices with HP jack sensing
933 thinkpad Lenovo Thinkpad X300
934
914 AD1884 935 AD1884
915 N/A 936 N/A
916 937
@@ -936,7 +957,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
936 laptop-automute 2-channel with EAPD and HP-automute (Lenovo N100) 957 laptop-automute 2-channel with EAPD and HP-automute (Lenovo N100)
937 ultra 2-channel with EAPD (Samsung Ultra tablet PC) 958 ultra 2-channel with EAPD (Samsung Ultra tablet PC)
938 959
939 AD1988 960 AD1988/AD1988B/AD1989A/AD1989B
940 6stack 6-jack 961 6stack 6-jack
941 6stack-dig ditto with SPDIF 962 6stack-dig ditto with SPDIF
942 3stack 3-jack 963 3stack 3-jack
@@ -979,6 +1000,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
979 dell-m26 Dell Inspiron 1501 1000 dell-m26 Dell Inspiron 1501
980 dell-m27 Dell Inspiron E1705/9400 1001 dell-m27 Dell Inspiron E1705/9400
981 gateway Gateway laptops with EAPD control 1002 gateway Gateway laptops with EAPD control
1003 panasonic Panasonic CF-74
982 1004
983 STAC9205/9254 1005 STAC9205/9254
984 ref Reference board 1006 ref Reference board
@@ -1017,6 +1039,16 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1017 3stack D965 3stack 1039 3stack D965 3stack
1018 5stack D965 5stack + SPDIF 1040 5stack D965 5stack + SPDIF
1019 dell-3stack Dell Dimension E520 1041 dell-3stack Dell Dimension E520
1042 dell-bios Fixes with Dell BIOS setup
1043
1044 STAC92HD71B*
1045 ref Reference board
1046 dell-m4-1 Dell desktops
1047 dell-m4-2 Dell desktops
1048
1049 STAC92HD73*
1050 ref Reference board
1051 dell-m6 Dell desktops
1020 1052
1021 STAC9872 1053 STAC9872
1022 vaio Setup for VAIO FE550G/SZ110 1054 vaio Setup for VAIO FE550G/SZ110
@@ -1590,6 +1622,16 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
1590 1622
1591 Power management is _not_ supported. 1623 Power management is _not_ supported.
1592 1624
1625 Module snd-pcsp
1626 -----------------
1627
1628 Module for internal PC-Speaker.
1629
1630 nforce_wa - enable NForce chipset workaround. Expect bad sound.
1631
1632 This module supports system beeps, some kind of PCM playback and
1633 even a few mixer controls.
1634
1593 Module snd-pcxhr 1635 Module snd-pcxhr
1594 ---------------- 1636 ----------------
1595 1637
diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary
index 8861e47e5a2d..6d5f18143c50 100644
--- a/Documentation/spi/spi-summary
+++ b/Documentation/spi/spi-summary
@@ -116,6 +116,13 @@ low order bit. So when a chip's timing diagram shows the clock
116starting low (CPOL=0) and data stabilized for sampling during the 116starting low (CPOL=0) and data stabilized for sampling during the
117trailing clock edge (CPHA=1), that's SPI mode 1. 117trailing clock edge (CPHA=1), that's SPI mode 1.
118 118
119Note that the clock mode is relevant as soon as the chipselect goes
120active. So the master must set the clock to inactive before selecting
121a slave, and the slave can tell the chosen polarity by sampling the
122clock level when its select line goes active. That's why many devices
123support for example both modes 0 and 3: they don't care about polarity,
124and alway clock data in/out on rising clock edges.
125
119 126
120How do these driver programming interfaces work? 127How do these driver programming interfaces work?
121------------------------------------------------ 128------------------------------------------------
@@ -379,8 +386,14 @@ any more such messages.
379 + when bidirectional reads and writes start ... by how its 386 + when bidirectional reads and writes start ... by how its
380 sequence of spi_transfer requests is arranged; 387 sequence of spi_transfer requests is arranged;
381 388
389 + which I/O buffers are used ... each spi_transfer wraps a
390 buffer for each transfer direction, supporting full duplex
391 (two pointers, maybe the same one in both cases) and half
392 duplex (one pointer is NULL) transfers;
393
382 + optionally defining short delays after transfers ... using 394 + optionally defining short delays after transfers ... using
383 the spi_transfer.delay_usecs setting; 395 the spi_transfer.delay_usecs setting (this delay can be the
396 only protocol effect, if the buffer length is zero);
384 397
385 + whether the chipselect becomes inactive after a transfer and 398 + whether the chipselect becomes inactive after a transfer and
386 any delay ... by using the spi_transfer.cs_change flag; 399 any delay ... by using the spi_transfer.cs_change flag;
diff --git a/Documentation/spi/spidev b/Documentation/spi/spidev
index 5c8e1b988a08..ed2da5e5b28a 100644
--- a/Documentation/spi/spidev
+++ b/Documentation/spi/spidev
@@ -126,8 +126,8 @@ NOTES:
126FULL DUPLEX CHARACTER DEVICE API 126FULL DUPLEX CHARACTER DEVICE API
127================================ 127================================
128 128
129See the sample program below for one example showing the use of the full 129See the spidev_fdx.c sample program for one example showing the use of the
130duplex programming interface. (Although it doesn't perform a full duplex 130full duplex programming interface. (Although it doesn't perform a full duplex
131transfer.) The model is the same as that used in the kernel spi_sync() 131transfer.) The model is the same as that used in the kernel spi_sync()
132request; the individual transfers offer the same capabilities as are 132request; the individual transfers offer the same capabilities as are
133available to kernel drivers (except that it's not asynchronous). 133available to kernel drivers (except that it's not asynchronous).
@@ -141,167 +141,3 @@ and bitrate for each transfer segment.)
141 141
142To make a full duplex request, provide both rx_buf and tx_buf for the 142To make a full duplex request, provide both rx_buf and tx_buf for the
143same transfer. It's even OK if those are the same buffer. 143same transfer. It's even OK if those are the same buffer.
144
145
146SAMPLE PROGRAM
147==============
148
149-------------------------------- CUT HERE
150#include <stdio.h>
151#include <unistd.h>
152#include <stdlib.h>
153#include <fcntl.h>
154#include <string.h>
155
156#include <sys/ioctl.h>
157#include <sys/types.h>
158#include <sys/stat.h>
159
160#include <linux/types.h>
161#include <linux/spi/spidev.h>
162
163
164static int verbose;
165
166static void do_read(int fd, int len)
167{
168 unsigned char buf[32], *bp;
169 int status;
170
171 /* read at least 2 bytes, no more than 32 */
172 if (len < 2)
173 len = 2;
174 else if (len > sizeof(buf))
175 len = sizeof(buf);
176 memset(buf, 0, sizeof buf);
177
178 status = read(fd, buf, len);
179 if (status < 0) {
180 perror("read");
181 return;
182 }
183 if (status != len) {
184 fprintf(stderr, "short read\n");
185 return;
186 }
187
188 printf("read(%2d, %2d): %02x %02x,", len, status,
189 buf[0], buf[1]);
190 status -= 2;
191 bp = buf + 2;
192 while (status-- > 0)
193 printf(" %02x", *bp++);
194 printf("\n");
195}
196
197static void do_msg(int fd, int len)
198{
199 struct spi_ioc_transfer xfer[2];
200 unsigned char buf[32], *bp;
201 int status;
202
203 memset(xfer, 0, sizeof xfer);
204 memset(buf, 0, sizeof buf);
205
206 if (len > sizeof buf)
207 len = sizeof buf;
208
209 buf[0] = 0xaa;
210 xfer[0].tx_buf = (__u64) buf;
211 xfer[0].len = 1;
212
213 xfer[1].rx_buf = (__u64) buf;
214 xfer[1].len = len;
215
216 status = ioctl(fd, SPI_IOC_MESSAGE(2), xfer);
217 if (status < 0) {
218 perror("SPI_IOC_MESSAGE");
219 return;
220 }
221
222 printf("response(%2d, %2d): ", len, status);
223 for (bp = buf; len; len--)
224 printf(" %02x", *bp++);
225 printf("\n");
226}
227
228static void dumpstat(const char *name, int fd)
229{
230 __u8 mode, lsb, bits;
231 __u32 speed;
232
233 if (ioctl(fd, SPI_IOC_RD_MODE, &mode) < 0) {
234 perror("SPI rd_mode");
235 return;
236 }
237 if (ioctl(fd, SPI_IOC_RD_LSB_FIRST, &lsb) < 0) {
238 perror("SPI rd_lsb_fist");
239 return;
240 }
241 if (ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits) < 0) {
242 perror("SPI bits_per_word");
243 return;
244 }
245 if (ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed) < 0) {
246 perror("SPI max_speed_hz");
247 return;
248 }
249
250 printf("%s: spi mode %d, %d bits %sper word, %d Hz max\n",
251 name, mode, bits, lsb ? "(lsb first) " : "", speed);
252}
253
254int main(int argc, char **argv)
255{
256 int c;
257 int readcount = 0;
258 int msglen = 0;
259 int fd;
260 const char *name;
261
262 while ((c = getopt(argc, argv, "hm:r:v")) != EOF) {
263 switch (c) {
264 case 'm':
265 msglen = atoi(optarg);
266 if (msglen < 0)
267 goto usage;
268 continue;
269 case 'r':
270 readcount = atoi(optarg);
271 if (readcount < 0)
272 goto usage;
273 continue;
274 case 'v':
275 verbose++;
276 continue;
277 case 'h':
278 case '?':
279usage:
280 fprintf(stderr,
281 "usage: %s [-h] [-m N] [-r N] /dev/spidevB.D\n",
282 argv[0]);
283 return 1;
284 }
285 }
286
287 if ((optind + 1) != argc)
288 goto usage;
289 name = argv[optind];
290
291 fd = open(name, O_RDWR);
292 if (fd < 0) {
293 perror("open");
294 return 1;
295 }
296
297 dumpstat(name, fd);
298
299 if (msglen)
300 do_msg(fd, msglen);
301
302 if (readcount)
303 do_read(fd, readcount);
304
305 close(fd);
306 return 0;
307}
diff --git a/Documentation/spi/spidev_fdx.c b/Documentation/spi/spidev_fdx.c
new file mode 100644
index 000000000000..fc354f760384
--- /dev/null
+++ b/Documentation/spi/spidev_fdx.c
@@ -0,0 +1,158 @@
1#include <stdio.h>
2#include <unistd.h>
3#include <stdlib.h>
4#include <fcntl.h>
5#include <string.h>
6
7#include <sys/ioctl.h>
8#include <sys/types.h>
9#include <sys/stat.h>
10
11#include <linux/types.h>
12#include <linux/spi/spidev.h>
13
14
15static int verbose;
16
17static void do_read(int fd, int len)
18{
19 unsigned char buf[32], *bp;
20 int status;
21
22 /* read at least 2 bytes, no more than 32 */
23 if (len < 2)
24 len = 2;
25 else if (len > sizeof(buf))
26 len = sizeof(buf);
27 memset(buf, 0, sizeof buf);
28
29 status = read(fd, buf, len);
30 if (status < 0) {
31 perror("read");
32 return;
33 }
34 if (status != len) {
35 fprintf(stderr, "short read\n");
36 return;
37 }
38
39 printf("read(%2d, %2d): %02x %02x,", len, status,
40 buf[0], buf[1]);
41 status -= 2;
42 bp = buf + 2;
43 while (status-- > 0)
44 printf(" %02x", *bp++);
45 printf("\n");
46}
47
48static void do_msg(int fd, int len)
49{
50 struct spi_ioc_transfer xfer[2];
51 unsigned char buf[32], *bp;
52 int status;
53
54 memset(xfer, 0, sizeof xfer);
55 memset(buf, 0, sizeof buf);
56
57 if (len > sizeof buf)
58 len = sizeof buf;
59
60 buf[0] = 0xaa;
61 xfer[0].tx_buf = (__u64) buf;
62 xfer[0].len = 1;
63
64 xfer[1].rx_buf = (__u64) buf;
65 xfer[1].len = len;
66
67 status = ioctl(fd, SPI_IOC_MESSAGE(2), xfer);
68 if (status < 0) {
69 perror("SPI_IOC_MESSAGE");
70 return;
71 }
72
73 printf("response(%2d, %2d): ", len, status);
74 for (bp = buf; len; len--)
75 printf(" %02x", *bp++);
76 printf("\n");
77}
78
79static void dumpstat(const char *name, int fd)
80{
81 __u8 mode, lsb, bits;
82 __u32 speed;
83
84 if (ioctl(fd, SPI_IOC_RD_MODE, &mode) < 0) {
85 perror("SPI rd_mode");
86 return;
87 }
88 if (ioctl(fd, SPI_IOC_RD_LSB_FIRST, &lsb) < 0) {
89 perror("SPI rd_lsb_fist");
90 return;
91 }
92 if (ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits) < 0) {
93 perror("SPI bits_per_word");
94 return;
95 }
96 if (ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed) < 0) {
97 perror("SPI max_speed_hz");
98 return;
99 }
100
101 printf("%s: spi mode %d, %d bits %sper word, %d Hz max\n",
102 name, mode, bits, lsb ? "(lsb first) " : "", speed);
103}
104
105int main(int argc, char **argv)
106{
107 int c;
108 int readcount = 0;
109 int msglen = 0;
110 int fd;
111 const char *name;
112
113 while ((c = getopt(argc, argv, "hm:r:v")) != EOF) {
114 switch (c) {
115 case 'm':
116 msglen = atoi(optarg);
117 if (msglen < 0)
118 goto usage;
119 continue;
120 case 'r':
121 readcount = atoi(optarg);
122 if (readcount < 0)
123 goto usage;
124 continue;
125 case 'v':
126 verbose++;
127 continue;
128 case 'h':
129 case '?':
130usage:
131 fprintf(stderr,
132 "usage: %s [-h] [-m N] [-r N] /dev/spidevB.D\n",
133 argv[0]);
134 return 1;
135 }
136 }
137
138 if ((optind + 1) != argc)
139 goto usage;
140 name = argv[optind];
141
142 fd = open(name, O_RDWR);
143 if (fd < 0) {
144 perror("open");
145 return 1;
146 }
147
148 dumpstat(name, fd);
149
150 if (msglen)
151 do_msg(fd, msglen);
152
153 if (readcount)
154 do_read(fd, readcount);
155
156 close(fd);
157 return 0;
158}
diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt
index 471e75389778..619699dde593 100644
--- a/Documentation/spinlocks.txt
+++ b/Documentation/spinlocks.txt
@@ -5,6 +5,28 @@ Please use DEFINE_SPINLOCK()/DEFINE_RWLOCK() or
5__SPIN_LOCK_UNLOCKED()/__RW_LOCK_UNLOCKED() as appropriate for static 5__SPIN_LOCK_UNLOCKED()/__RW_LOCK_UNLOCKED() as appropriate for static
6initialization. 6initialization.
7 7
8Most of the time, you can simply turn:
9
10 static spinlock_t xxx_lock = SPIN_LOCK_UNLOCKED;
11
12into:
13
14 static DEFINE_SPINLOCK(xxx_lock);
15
16Static structure member variables go from:
17
18 struct foo bar {
19 .lock = SPIN_LOCK_UNLOCKED;
20 };
21
22to:
23
24 struct foo bar {
25 .lock = __SPIN_LOCK_UNLOCKED(bar.lock);
26 };
27
28Declaration of static rw_locks undergo a similar transformation.
29
8Dynamic initialization, when necessary, may be performed as 30Dynamic initialization, when necessary, may be performed as
9demonstrated below. 31demonstrated below.
10 32
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index 10c8f6922ef4..5ce0952aa065 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -85,6 +85,8 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
85'k' - Secure Access Key (SAK) Kills all programs on the current virtual 85'k' - Secure Access Key (SAK) Kills all programs on the current virtual
86 console. NOTE: See important comments below in SAK section. 86 console. NOTE: See important comments below in SAK section.
87 87
88'l' - Shows a stack backtrace for all active CPUs.
89
88'm' - Will dump current memory info to your console. 90'm' - Will dump current memory info to your console.
89 91
90'n' - Used to make RT tasks nice-able 92'n' - Used to make RT tasks nice-able
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index d9f28be75403..70d68ce8640a 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -108,10 +108,12 @@ and throttle appropriate devices.
108RO read only value 108RO read only value
109RW read/write value 109RW read/write value
110 110
111All thermal sysfs attributes will be represented under /sys/class/thermal 111Thermal sysfs attributes will be represented under /sys/class/thermal.
112Hwmon sysfs I/F extension is also available under /sys/class/hwmon
113if hwmon is compiled in or built as a module.
112 114
113Thermal zone device sys I/F, created once it's registered: 115Thermal zone device sys I/F, created once it's registered:
114|thermal_zone[0-*]: 116/sys/class/thermal/thermal_zone[0-*]:
115 |-----type: Type of the thermal zone 117 |-----type: Type of the thermal zone
116 |-----temp: Current temperature 118 |-----temp: Current temperature
117 |-----mode: Working mode of the thermal zone 119 |-----mode: Working mode of the thermal zone
@@ -119,7 +121,7 @@ Thermal zone device sys I/F, created once it's registered:
119 |-----trip_point_[0-*]_type: Trip point type 121 |-----trip_point_[0-*]_type: Trip point type
120 122
121Thermal cooling device sys I/F, created once it's registered: 123Thermal cooling device sys I/F, created once it's registered:
122|cooling_device[0-*]: 124/sys/class/thermal/cooling_device[0-*]:
123 |-----type : Type of the cooling device(processor/fan/...) 125 |-----type : Type of the cooling device(processor/fan/...)
124 |-----max_state: Maximum cooling state of the cooling device 126 |-----max_state: Maximum cooling state of the cooling device
125 |-----cur_state: Current cooling state of the cooling device 127 |-----cur_state: Current cooling state of the cooling device
@@ -130,10 +132,19 @@ They represent the relationship between a thermal zone and its associated coolin
130They are created/removed for each 132They are created/removed for each
131thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful execution. 133thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful execution.
132 134
133|thermal_zone[0-*] 135/sys/class/thermal/thermal_zone[0-*]
134 |-----cdev[0-*]: The [0-*]th cooling device in the current thermal zone 136 |-----cdev[0-*]: The [0-*]th cooling device in the current thermal zone
135 |-----cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with 137 |-----cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with
136 138
139Besides the thermal zone device sysfs I/F and cooling device sysfs I/F,
140the generic thermal driver also creates a hwmon sysfs I/F for each _type_ of
141thermal zone device. E.g. the generic thermal driver registers one hwmon class device
142and build the associated hwmon sysfs I/F for all the registered ACPI thermal zones.
143/sys/class/hwmon/hwmon[0-*]:
144 |-----name: The type of the thermal zone devices.
145 |-----temp[1-*]_input: The current temperature of thermal zone [1-*].
146 |-----temp[1-*]_critical: The critical trip point of thermal zone [1-*].
147Please read Documentation/hwmon/sysfs-interface for additional information.
137 148
138*************************** 149***************************
139* Thermal zone attributes * 150* Thermal zone attributes *
@@ -141,7 +152,10 @@ thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device successful e
141 152
142type Strings which represent the thermal zone type. 153type Strings which represent the thermal zone type.
143 This is given by thermal zone driver as part of registration. 154 This is given by thermal zone driver as part of registration.
144 Eg: "ACPI thermal zone" indicates it's a ACPI thermal device 155 Eg: "acpitz" indicates it's an ACPI thermal device.
156 In order to keep it consistent with hwmon sys attribute,
157 this should be a short, lowercase string,
158 not containing spaces nor dashes.
145 RO 159 RO
146 Required 160 Required
147 161
@@ -218,7 +232,7 @@ the sys I/F structure will be built like this:
218/sys/class/thermal: 232/sys/class/thermal:
219 233
220|thermal_zone1: 234|thermal_zone1:
221 |-----type: ACPI thermal zone 235 |-----type: acpitz
222 |-----temp: 37000 236 |-----temp: 37000
223 |-----mode: kernel 237 |-----mode: kernel
224 |-----trip_point_0_temp: 100000 238 |-----trip_point_0_temp: 100000
@@ -243,3 +257,10 @@ the sys I/F structure will be built like this:
243 |-----type: Fan 257 |-----type: Fan
244 |-----max_state: 2 258 |-----max_state: 2
245 |-----cur_state: 0 259 |-----cur_state: 0
260
261/sys/class/hwmon:
262
263|hwmon0:
264 |-----name: acpitz
265 |-----temp1_input: 37000
266 |-----temp1_crit: 100000
diff --git a/Documentation/hrtimers/highres.txt b/Documentation/timers/highres.txt
index ce0e9a91e157..a73ecf5b4bdb 100644
--- a/Documentation/hrtimers/highres.txt
+++ b/Documentation/timers/highres.txt
@@ -98,7 +98,7 @@ System-level global event devices are used for the Linux periodic tick. Per-CPU
98event devices are used to provide local CPU functionality such as process 98event devices are used to provide local CPU functionality such as process
99accounting, profiling, and high resolution timers. 99accounting, profiling, and high resolution timers.
100 100
101The management layer assignes one or more of the folliwing functions to a clock 101The management layer assigns one or more of the following functions to a clock
102event device: 102event device:
103 - system global periodic tick (jiffies update) 103 - system global periodic tick (jiffies update)
104 - cpu local update_process_times 104 - cpu local update_process_times
diff --git a/Documentation/hrtimers/hrtimers.txt b/Documentation/timers/hrtimers.txt
index ce31f65e12e7..ce31f65e12e7 100644
--- a/Documentation/hrtimers/hrtimers.txt
+++ b/Documentation/timers/hrtimers.txt
diff --git a/Documentation/hrtimer/timer_stats.txt b/Documentation/timers/timer_stats.txt
index 20d368c59814..20d368c59814 100644
--- a/Documentation/hrtimer/timer_stats.txt
+++ b/Documentation/timers/timer_stats.txt
diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt
index 6223eace3c09..b0472ac5226a 100644
--- a/Documentation/unaligned-memory-access.txt
+++ b/Documentation/unaligned-memory-access.txt
@@ -57,7 +57,7 @@ here; a summary of the common scenarios is presented below:
57 unaligned access to be corrected. 57 unaligned access to be corrected.
58 - Some architectures are not capable of unaligned memory access, but will 58 - Some architectures are not capable of unaligned memory access, but will
59 silently perform a different memory access to the one that was requested, 59 silently perform a different memory access to the one that was requested,
60 resulting a a subtle code bug that is hard to detect! 60 resulting in a subtle code bug that is hard to detect!
61 61
62It should be obvious from the above that if your code causes unaligned 62It should be obvious from the above that if your code causes unaligned
63memory accesses to happen, your code will not work correctly on certain 63memory accesses to happen, your code will not work correctly on certain
@@ -209,7 +209,7 @@ memory and you wish to avoid unaligned access, its usage is as follows:
209 209
210 u32 value = get_unaligned((u32 *) data); 210 u32 value = get_unaligned((u32 *) data);
211 211
212These macros work work for memory accesses of any length (not just 32 bits as 212These macros work for memory accesses of any length (not just 32 bits as
213in the examples above). Be aware that when compared to standard access of 213in the examples above). Be aware that when compared to standard access of
214aligned memory, using these macros to access unaligned memory can be costly in 214aligned memory, using these macros to access unaligned memory can be costly in
215terms of performance. 215terms of performance.
diff --git a/Documentation/usb/anchors.txt b/Documentation/usb/anchors.txt
new file mode 100644
index 000000000000..7304bcf5a306
--- /dev/null
+++ b/Documentation/usb/anchors.txt
@@ -0,0 +1,50 @@
1What is anchor?
2===============
3
4A USB driver needs to support some callbacks requiring
5a driver to cease all IO to an interface. To do so, a
6driver has to keep track of the URBs it has submitted
7to know they've all completed or to call usb_kill_urb
8for them. The anchor is a data structure takes care of
9keeping track of URBs and provides methods to deal with
10multiple URBs.
11
12Allocation and Initialisation
13=============================
14
15There's no API to allocate an anchor. It is simply declared
16as struct usb_anchor. init_usb_anchor() must be called to
17initialise the data structure.
18
19Deallocation
20============
21
22Once it has no more URBs associated with it, the anchor can be
23freed with normal memory management operations.
24
25Association and disassociation of URBs with anchors
26===================================================
27
28An association of URBs to an anchor is made by an explicit
29call to usb_anchor_urb(). The association is maintained until
30an URB is finished by (successfull) completion. Thus disassociation
31is automatic. A function is provided to forcibly finish (kill)
32all URBs associated with an anchor.
33Furthermore, disassociation can be made with usb_unanchor_urb()
34
35Operations on multitudes of URBs
36================================
37
38usb_kill_anchored_urbs()
39------------------------
40
41This function kills all URBs associated with an anchor. The URBs
42are called in the reverse temporal order they were submitted.
43This way no data can be reordered.
44
45usb_wait_anchor_empty_timeout()
46-------------------------------
47
48This function waits for all URBs associated with an anchor to finish
49or a timeout, whichever comes first. Its return value will tell you
50whether the timeout was reached.
diff --git a/Documentation/usb/callbacks.txt b/Documentation/usb/callbacks.txt
new file mode 100644
index 000000000000..7c812411945b
--- /dev/null
+++ b/Documentation/usb/callbacks.txt
@@ -0,0 +1,132 @@
1What callbacks will usbcore do?
2===============================
3
4Usbcore will call into a driver through callbacks defined in the driver
5structure and through the completion handler of URBs a driver submits.
6Only the former are in the scope of this document. These two kinds of
7callbacks are completely independent of each other. Information on the
8completion callback can be found in Documentation/usb/URB.txt.
9
10The callbacks defined in the driver structure are:
11
121. Hotplugging callbacks:
13
14 * @probe: Called to see if the driver is willing to manage a particular
15 * interface on a device.
16 * @disconnect: Called when the interface is no longer accessible, usually
17 * because its device has been (or is being) disconnected or the
18 * driver module is being unloaded.
19
202. Odd backdoor through usbfs:
21
22 * @ioctl: Used for drivers that want to talk to userspace through
23 * the "usbfs" filesystem. This lets devices provide ways to
24 * expose information to user space regardless of where they
25 * do (or don't) show up otherwise in the filesystem.
26
273. Power management (PM) callbacks:
28
29 * @suspend: Called when the device is going to be suspended.
30 * @resume: Called when the device is being resumed.
31 * @reset_resume: Called when the suspended device has been reset instead
32 * of being resumed.
33
344. Device level operations:
35
36 * @pre_reset: Called when the device is about to be reset.
37 * @post_reset: Called after the device has been reset
38
39The ioctl interface (2) should be used only if you have a very good
40reason. Sysfs is preferred these days. The PM callbacks are covered
41separately in Documentation/usb/power-management.txt.
42
43Calling conventions
44===================
45
46All callbacks are mutually exclusive. There's no need for locking
47against other USB callbacks. All callbacks are called from a task
48context. You may sleep. However, it is important that all sleeps have a
49small fixed upper limit in time. In particular you must not call out to
50user space and await results.
51
52Hotplugging callbacks
53=====================
54
55These callbacks are intended to associate and disassociate a driver with
56an interface. A driver's bond to an interface is exclusive.
57
58The probe() callback
59--------------------
60
61int (*probe) (struct usb_interface *intf,
62 const struct usb_device_id *id);
63
64Accept or decline an interface. If you accept the device return 0,
65otherwise -ENODEV or -ENXIO. Other error codes should be used only if a
66genuine error occurred during initialisation which prevented a driver
67from accepting a device that would else have been accepted.
68You are strongly encouraged to use usbcore'sfacility,
69usb_set_intfdata(), to associate a data structure with an interface, so
70that you know which internal state and identity you associate with a
71particular interface. The device will not be suspended and you may do IO
72to the interface you are called for and endpoint 0 of the device. Device
73initialisation that doesn't take too long is a good idea here.
74
75The disconnect() callback
76-------------------------
77
78void (*disconnect) (struct usb_interface *intf);
79
80This callback is a signal to break any connection with an interface.
81You are not allowed any IO to a device after returning from this
82callback. You also may not do any other operation that may interfere
83with another driver bound the interface, eg. a power management
84operation.
85If you are called due to a physical disconnection, all your URBs will be
86killed by usbcore. Note that in this case disconnect will be called some
87time after the physical disconnection. Thus your driver must be prepared
88to deal with failing IO even prior to the callback.
89
90Device level callbacks
91======================
92
93pre_reset
94---------
95
96int (*pre_reset)(struct usb_interface *intf);
97
98Another driver or user space is triggering a reset on the device which
99contains the interface passed as an argument. Cease IO and save any
100device state you need to restore.
101
102If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if you
103are in atomic context.
104
105post_reset
106----------
107
108int (*post_reset)(struct usb_interface *intf);
109
110The reset has completed. Restore any saved device state and begin
111using the device again.
112
113If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if you
114are in atomic context.
115
116Call sequences
117==============
118
119No callbacks other than probe will be invoked for an interface
120that isn't bound to your driver.
121
122Probe will never be called for an interface bound to a driver.
123Hence following a successful probe, disconnect will be called
124before there is another probe for the same interface.
125
126Once your driver is bound to an interface, disconnect can be
127called at any time except in between pre_reset and post_reset.
128pre_reset is always followed by post_reset, even if the reset
129failed or the device has been unplugged.
130
131suspend is always followed by one of: resume, reset_resume, or
132disconnect.
diff --git a/Documentation/usb/persist.txt b/Documentation/usb/persist.txt
index df54d645cbb5..d56cb1a11550 100644
--- a/Documentation/usb/persist.txt
+++ b/Documentation/usb/persist.txt
@@ -2,7 +2,7 @@
2 2
3 Alan Stern <stern@rowland.harvard.edu> 3 Alan Stern <stern@rowland.harvard.edu>
4 4
5 September 2, 2006 (Updated May 29, 2007) 5 September 2, 2006 (Updated February 25, 2008)
6 6
7 7
8 What is the problem? 8 What is the problem?
@@ -65,9 +65,10 @@ much better.)
65 65
66 What is the solution? 66 What is the solution?
67 67
68Setting CONFIG_USB_PERSIST will cause the kernel to work around these 68The kernel includes a feature called USB-persist. It tries to work
69issues. It enables a mode in which the core USB device data 69around these issues by allowing the core USB device data structures to
70structures are allowed to persist across a power-session disruption. 70persist across a power-session disruption.
71
71It works like this. If the kernel sees that a USB host controller is 72It works like this. If the kernel sees that a USB host controller is
72not in the expected state during resume (i.e., if the controller was 73not in the expected state during resume (i.e., if the controller was
73reset or otherwise had lost power) then it applies a persistence check 74reset or otherwise had lost power) then it applies a persistence check
@@ -80,28 +81,30 @@ re-enumeration shows that the device now attached to that port has the
80same descriptors as before, including the Vendor and Product IDs, then 81same descriptors as before, including the Vendor and Product IDs, then
81the kernel continues to use the same device structure. In effect, the 82the kernel continues to use the same device structure. In effect, the
82kernel treats the device as though it had merely been reset instead of 83kernel treats the device as though it had merely been reset instead of
83unplugged. 84unplugged. The same thing happens if the host controller is in the
85expected state but a USB device was unplugged and then replugged.
84 86
85If no device is now attached to the port, or if the descriptors are 87If no device is now attached to the port, or if the descriptors are
86different from what the kernel remembers, then the treatment is what 88different from what the kernel remembers, then the treatment is what
87you would expect. The kernel destroys the old device structure and 89you would expect. The kernel destroys the old device structure and
88behaves as though the old device had been unplugged and a new device 90behaves as though the old device had been unplugged and a new device
89plugged in, just as it would without the CONFIG_USB_PERSIST option. 91plugged in.
90 92
91The end result is that the USB device remains available and usable. 93The end result is that the USB device remains available and usable.
92Filesystem mounts and memory mappings are unaffected, and the world is 94Filesystem mounts and memory mappings are unaffected, and the world is
93now a good and happy place. 95now a good and happy place.
94 96
95Note that even when CONFIG_USB_PERSIST is set, the "persist" feature 97Note that the "USB-persist" feature will be applied only to those
96will be applied only to those devices for which it is enabled. You 98devices for which it is enabled. You can enable the feature by doing
97can enable the feature by doing (as root): 99(as root):
98 100
99 echo 1 >/sys/bus/usb/devices/.../power/persist 101 echo 1 >/sys/bus/usb/devices/.../power/persist
100 102
101where the "..." should be filled in the with the device's ID. Disable 103where the "..." should be filled in the with the device's ID. Disable
102the feature by writing 0 instead of 1. For hubs the feature is 104the feature by writing 0 instead of 1. For hubs the feature is
103automatically and permanently enabled, so you only have to worry about 105automatically and permanently enabled and the power/persist file
104setting it for devices where it really matters. 106doesn't even exist, so you only have to worry about setting it for
107devices where it really matters.
105 108
106 109
107 Is this the best solution? 110 Is this the best solution?
@@ -112,19 +115,19 @@ centralized Logical Volume Manager. Such a solution would allow you
112to plug in a USB flash device, create a persistent volume associated 115to plug in a USB flash device, create a persistent volume associated
113with it, unplug the flash device, plug it back in later, and still 116with it, unplug the flash device, plug it back in later, and still
114have the same persistent volume associated with the device. As such 117have the same persistent volume associated with the device. As such
115it would be more far-reaching than CONFIG_USB_PERSIST. 118it would be more far-reaching than USB-persist.
116 119
117On the other hand, writing a persistent volume manager would be a big 120On the other hand, writing a persistent volume manager would be a big
118job and using it would require significant input from the user. This 121job and using it would require significant input from the user. This
119solution is much quicker and easier -- and it exists now, a giant 122solution is much quicker and easier -- and it exists now, a giant
120point in its favor! 123point in its favor!
121 124
122Furthermore, the USB_PERSIST option applies to _all_ USB devices, not 125Furthermore, the USB-persist feature applies to _all_ USB devices, not
123just mass-storage devices. It might turn out to be equally useful for 126just mass-storage devices. It might turn out to be equally useful for
124other device types, such as network interfaces. 127other device types, such as network interfaces.
125 128
126 129
127 WARNING: Using CONFIG_USB_PERSIST can be dangerous!! 130 WARNING: USB-persist can be dangerous!!
128 131
129When recovering an interrupted power session the kernel does its best 132When recovering an interrupted power session the kernel does its best
130to make sure the USB device hasn't been changed; that is, the same 133to make sure the USB device hasn't been changed; that is, the same
@@ -133,10 +136,10 @@ aren't guaranteed to be 100% accurate.
133 136
134If you replace one USB device with another of the same type (same 137If you replace one USB device with another of the same type (same
135manufacturer, same IDs, and so on) there's an excellent chance the 138manufacturer, same IDs, and so on) there's an excellent chance the
136kernel won't detect the change. Serial numbers and other strings are 139kernel won't detect the change. The serial number string and other
137not compared. In many cases it wouldn't help if they were, because 140descriptors are compared with the kernel's stored values, but this
138manufacturers frequently omit serial numbers entirely in their 141might not help since manufacturers frequently omit serial numbers
139devices. 142entirely in their devices.
140 143
141Furthermore it's quite possible to leave a USB device exactly the same 144Furthermore it's quite possible to leave a USB device exactly the same
142while changing its media. If you replace the flash memory card in a 145while changing its media. If you replace the flash memory card in a
@@ -152,5 +155,5 @@ but yourself.
152YOU HAVE BEEN WARNED! USE AT YOUR OWN RISK! 155YOU HAVE BEEN WARNED! USE AT YOUR OWN RISK!
153 156
154That having been said, most of the time there shouldn't be any trouble 157That having been said, most of the time there shouldn't be any trouble
155at all. The "persist" feature can be extremely useful. Make the most 158at all. The USB-persist feature can be extremely useful. Make the
156of it. 159most of it.
diff --git a/Documentation/usb/usb-serial.txt b/Documentation/usb/usb-serial.txt
index 8b077e43eee7..ff2c1ff57ba2 100644
--- a/Documentation/usb/usb-serial.txt
+++ b/Documentation/usb/usb-serial.txt
@@ -192,12 +192,9 @@ Keyspan USA-series Serial Adapters
192 192
193FTDI Single Port Serial Driver 193FTDI Single Port Serial Driver
194 194
195 This is a single port DB-25 serial adapter. More information about this 195 This is a single port DB-25 serial adapter.
196 device and the Linux driver can be found at:
197 http://reality.sgi.com/bryder_wellington/ftdi_sio/
198 196
199 For any questions or problems with this driver, please contact Bill Ryder 197 For any questions or problems with this driver, please contact Bill Ryder.
200 at bryder@sgi.com
201 198
202 199
203ZyXEL omni.net lcd plus ISDN TA 200ZyXEL omni.net lcd plus ISDN TA
diff --git a/Documentation/video4linux/CARDLIST.au0828 b/Documentation/video4linux/CARDLIST.au0828
new file mode 100644
index 000000000000..86d1c8e7b18f
--- /dev/null
+++ b/Documentation/video4linux/CARDLIST.au0828
@@ -0,0 +1,4 @@
1 0 -> Unknown board (au0828)
2 1 -> Hauppauge HVR950Q (au0828) [2040:7200,2040:7210,2040:7217,2040:721b,2040:721f,2040:7280,0fd9:0008]
3 2 -> Hauppauge HVR850 (au0828) [2040:7240]
4 3 -> DViCO FusionHDTV USB (au0828) [0fe9:d620]
diff --git a/Documentation/video4linux/CARDLIST.bttv b/Documentation/video4linux/CARDLIST.bttv
index d97cf7cc6088..f32efb6fb12c 100644
--- a/Documentation/video4linux/CARDLIST.bttv
+++ b/Documentation/video4linux/CARDLIST.bttv
@@ -148,3 +148,5 @@
148147 -> VoodooTV 200 (USA) [121a:3000] 148147 -> VoodooTV 200 (USA) [121a:3000]
149148 -> DViCO FusionHDTV 2 [dbc0:d200] 149148 -> DViCO FusionHDTV 2 [dbc0:d200]
150149 -> Typhoon TV-Tuner PCI (50684) 150149 -> Typhoon TV-Tuner PCI (50684)
151150 -> Geovision GV-600 [008a:763c]
152151 -> Kozumi KTV-01C
diff --git a/Documentation/video4linux/CARDLIST.cx23885 b/Documentation/video4linux/CARDLIST.cx23885
index 0924e6e142c4..191194ea1e25 100644
--- a/Documentation/video4linux/CARDLIST.cx23885
+++ b/Documentation/video4linux/CARDLIST.cx23885
@@ -5,3 +5,6 @@
5 4 -> DViCO FusionHDTV5 Express [18ac:d500] 5 4 -> DViCO FusionHDTV5 Express [18ac:d500]
6 5 -> Hauppauge WinTV-HVR1500Q [0070:7790,0070:7797] 6 5 -> Hauppauge WinTV-HVR1500Q [0070:7790,0070:7797]
7 6 -> Hauppauge WinTV-HVR1500 [0070:7710,0070:7717] 7 6 -> Hauppauge WinTV-HVR1500 [0070:7710,0070:7717]
8 7 -> Hauppauge WinTV-HVR1200 [0070:71d1,0070:71d3]
9 8 -> Hauppauge WinTV-HVR1700 [0070:8101]
10 9 -> Hauppauge WinTV-HVR1400 [0070:8010]
diff --git a/Documentation/video4linux/CARDLIST.cx88 b/Documentation/video4linux/CARDLIST.cx88
index bc5593bd9704..7cf5685d3645 100644
--- a/Documentation/video4linux/CARDLIST.cx88
+++ b/Documentation/video4linux/CARDLIST.cx88
@@ -57,3 +57,12 @@
57 56 -> Hauppauge WinTV-HVR1300 DVB-T/Hybrid MPEG Encoder [0070:9600,0070:9601,0070:9602] 57 56 -> Hauppauge WinTV-HVR1300 DVB-T/Hybrid MPEG Encoder [0070:9600,0070:9601,0070:9602]
58 57 -> ADS Tech Instant Video PCI [1421:0390] 58 57 -> ADS Tech Instant Video PCI [1421:0390]
59 58 -> Pinnacle PCTV HD 800i [11bd:0051] 59 58 -> Pinnacle PCTV HD 800i [11bd:0051]
60 59 -> DViCO FusionHDTV 5 PCI nano [18ac:d530]
61 60 -> Pinnacle Hybrid PCTV [12ab:1788]
62 61 -> Winfast TV2000 XP Global [107d:6f18]
63 62 -> PowerColor RA330 [14f1:ea3d]
64 63 -> Geniatech X8000-MT DVBT [14f1:8852]
65 64 -> DViCO FusionHDTV DVB-T PRO [18ac:db30]
66 65 -> DViCO FusionHDTV 7 Gold [18ac:d610]
67 66 -> Prolink Pixelview MPEG 8000GT [1554:4935]
68 67 -> Kworld PlusTV HD PCI 120 (ATSC 120) [17de:08c1]
diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx
index f40e09296f30..1d6a245c828f 100644
--- a/Documentation/video4linux/CARDLIST.em28xx
+++ b/Documentation/video4linux/CARDLIST.em28xx
@@ -14,4 +14,4 @@
14 13 -> Terratec Prodigy XS (em2880) [0ccd:0047] 14 13 -> Terratec Prodigy XS (em2880) [0ccd:0047]
15 14 -> Pixelview Prolink PlayTV USB 2.0 (em2820/em2840) 15 14 -> Pixelview Prolink PlayTV USB 2.0 (em2820/em2840)
16 15 -> V-Gear PocketTV (em2800) 16 15 -> V-Gear PocketTV (em2800)
17 16 -> Hauppauge WinTV HVR 950 (em2880) [2040:6513] 17 16 -> Hauppauge WinTV HVR 950 (em2880) [2040:6513,2040:6517,2040:651b,2040:651f]
diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134
index 0424901ebc78..67937df1e974 100644
--- a/Documentation/video4linux/CARDLIST.saa7134
+++ b/Documentation/video4linux/CARDLIST.saa7134
@@ -25,8 +25,8 @@
25 24 -> KNC One TV-Station DVR [1894:a006] 25 24 -> KNC One TV-Station DVR [1894:a006]
26 25 -> ASUS TV-FM 7133 [1043:4843] 26 25 -> ASUS TV-FM 7133 [1043:4843]
27 26 -> Pinnacle PCTV Stereo (saa7134) [11bd:002b] 27 26 -> Pinnacle PCTV Stereo (saa7134) [11bd:002b]
28 27 -> Manli MuchTV M-TV002/Behold TV 403 FM 28 27 -> Manli MuchTV M-TV002
29 28 -> Manli MuchTV M-TV001/Behold TV 401 29 28 -> Manli MuchTV M-TV001
30 29 -> Nagase Sangyo TransGear 3000TV [1461:050c] 30 29 -> Nagase Sangyo TransGear 3000TV [1461:050c]
31 30 -> Elitegroup ECS TVP3XP FM1216 Tuner Card(PAL-BG,FM) [1019:4cb4] 31 30 -> Elitegroup ECS TVP3XP FM1216 Tuner Card(PAL-BG,FM) [1019:4cb4]
32 31 -> Elitegroup ECS TVP3XP FM1236 Tuner Card (NTSC,FM) [1019:4cb5] 32 31 -> Elitegroup ECS TVP3XP FM1236 Tuner Card (NTSC,FM) [1019:4cb5]
@@ -128,6 +128,16 @@
128127 -> Beholder BeholdTV 507 FM/RDS / BeholdTV 509 FM [0000:5071,0000:507B,5ace:5070,5ace:5090] 128127 -> Beholder BeholdTV 507 FM/RDS / BeholdTV 509 FM [0000:5071,0000:507B,5ace:5070,5ace:5090]
129128 -> Beholder BeholdTV Columbus TVFM [0000:5201] 129128 -> Beholder BeholdTV Columbus TVFM [0000:5201]
130129 -> Beholder BeholdTV 607 / BeholdTV 609 [5ace:6070,5ace:6071,5ace:6072,5ace:6073,5ace:6090,5ace:6091,5ace:6092,5ace:6093] 130129 -> Beholder BeholdTV 607 / BeholdTV 609 [5ace:6070,5ace:6071,5ace:6072,5ace:6073,5ace:6090,5ace:6091,5ace:6092,5ace:6093]
131130 -> Beholder BeholdTV M6 / BeholdTV M6 Extra [5ace:6190,5ace:6193] 131130 -> Beholder BeholdTV M6 / BeholdTV M6 Extra [5ace:6190,5ace:6193,5ace:6191]
132131 -> Twinhan Hybrid DTV-DVB 3056 PCI [1822:0022] 132131 -> Twinhan Hybrid DTV-DVB 3056 PCI [1822:0022]
133132 -> Genius TVGO AM11MCE 133132 -> Genius TVGO AM11MCE
134133 -> NXP Snake DVB-S reference design
135134 -> Medion/Creatix CTX953 Hybrid [16be:0010]
136135 -> MSI TV@nywhere A/D v1.1 [1462:8625]
137136 -> AVerMedia Cardbus TV/Radio (E506R) [1461:f436]
138137 -> AVerMedia Hybrid TV/Radio (A16D) [1461:f936]
139138 -> Avermedia M115 [1461:a836]
140139 -> Compro VideoMate T750 [185b:c900]
141140 -> Avermedia DVB-S Pro A700 [1461:a7a1]
142141 -> Avermedia DVB-S Hybrid+FM A700 [1461:a7a2]
143142 -> Beholder BeholdTV H6 [5ace:6290]
diff --git a/Documentation/video4linux/cx18.txt b/Documentation/video4linux/cx18.txt
new file mode 100644
index 000000000000..6842c262890f
--- /dev/null
+++ b/Documentation/video4linux/cx18.txt
@@ -0,0 +1,36 @@
1Some notes regarding the cx18 driver for the Conexant CX23418 MPEG
2encoder chip:
3
41) The only hardware currently supported is the Hauppauge HVR-1600
5 card and the Compro VideoMate H900 (note that this card only
6 supports analog input, it has no digital tuner!).
7
82) Some people have problems getting the i2c bus to work. Cause unknown.
9 The symptom is that the eeprom cannot be read and the card is
10 unusable.
11
123) The audio from the analog tuner is mono only. Probably caused by
13 incorrect audio register information in the datasheet. We are
14 waiting for updated information from Conexant.
15
164) VBI (raw or sliced) has not yet been implemented.
17
185) MPEG indexing is not yet implemented.
19
206) The driver is still a bit rough around the edges, this should
21 improve over time.
22
23
24Firmware:
25
26The firmware needs to be extracted from the Windows Hauppauge HVR-1600
27driver, available here:
28
29http://hauppauge.lightpath.net/software/install_cd/hauppauge_cd_3.4d1.zip
30
31Unzip, then copy the following files to the firmware directory
32and rename them as follows:
33
34Drivers/Driver18/hcw18apu.rom -> v4l-cx23418-apu.fw
35Drivers/Driver18/hcw18enc.rom -> v4l-cx23418-cpu.fw
36Drivers/Driver18/hcw18mlC.rom -> v4l-cx23418-dig.fw
diff --git a/Documentation/video4linux/extract_xc3028.pl b/Documentation/video4linux/extract_xc3028.pl
index cced8ac5c543..2cb816047fc1 100644
--- a/Documentation/video4linux/extract_xc3028.pl
+++ b/Documentation/video4linux/extract_xc3028.pl
@@ -686,11 +686,11 @@ sub main_firmware($$$$)
686 write_hunk(812664, 192); 686 write_hunk(812664, 192);
687 687
688 # 688 #
689 # Firmware 58, type: SCODE FW HAS IF (0x60000000), IF = 4.50 MHz id: NTSC/M Jp (0000000000002000), size: 192 689 # Firmware 58, type: SCODE FW MTS LCD NOGD MONO IF HAS IF (0x6002b004), IF = 4.50 MHz id: NTSC PAL/M PAL/N (000000000000b700), size: 192
690 # 690 #
691 691
692 write_le32(0x60000000); # Type 692 write_le32(0x6002b004); # Type
693 write_le64(0x00000000, 0x00002000); # ID 693 write_le64(0x00000000, 0x0000b700); # ID
694 write_le16(4500); # IF 694 write_le16(4500); # IF
695 write_le32(192); # Size 695 write_le32(192); # Size
696 write_hunk(807672, 192); 696 write_hunk(807672, 192);
@@ -706,10 +706,10 @@ sub main_firmware($$$$)
706 write_hunk(807864, 192); 706 write_hunk(807864, 192);
707 707
708 # 708 #
709 # Firmware 60, type: SCODE FW DTV78 ZARLINK456 HAS IF (0x62000100), IF = 4.76 MHz id: (0000000000000000), size: 192 709 # Firmware 60, type: SCODE FW DTV6 QAM DTV7 DTV78 DTV8 ZARLINK456 HAS IF (0x620003e0), IF = 4.76 MHz id: (0000000000000000), size: 192
710 # 710 #
711 711
712 write_le32(0x62000100); # Type 712 write_le32(0x620003e0); # Type
713 write_le64(0x00000000, 0x00000000); # ID 713 write_le64(0x00000000, 0x00000000); # ID
714 write_le16(4760); # IF 714 write_le16(4760); # IF
715 write_le32(192); # Size 715 write_le32(192); # Size
@@ -726,30 +726,30 @@ sub main_firmware($$$$)
726 write_hunk(811512, 192); 726 write_hunk(811512, 192);
727 727
728 # 728 #
729 # Firmware 62, type: SCODE FW DTV7 ZARLINK456 HAS IF (0x62000080), IF = 5.26 MHz id: (0000000000000000), size: 192 729 # Firmware 62, type: SCODE FW HAS IF (0x60000000), IF = 5.26 MHz id: (0000000000000000), size: 192
730 # 730 #
731 731
732 write_le32(0x62000080); # Type 732 write_le32(0x60000000); # Type
733 write_le64(0x00000000, 0x00000000); # ID 733 write_le64(0x00000000, 0x00000000); # ID
734 write_le16(5260); # IF 734 write_le16(5260); # IF
735 write_le32(192); # Size 735 write_le32(192); # Size
736 write_hunk(810552, 192); 736 write_hunk(810552, 192);
737 737
738 # 738 #
739 # Firmware 63, type: SCODE FW MONO HAS IF (0x60008000), IF = 5.32 MHz id: PAL/BG NICAM/B (0000000800000007), size: 192 739 # Firmware 63, type: SCODE FW MONO HAS IF (0x60008000), IF = 5.32 MHz id: PAL/BG A2 NICAM (0000000f00000007), size: 192
740 # 740 #
741 741
742 write_le32(0x60008000); # Type 742 write_le32(0x60008000); # Type
743 write_le64(0x00000008, 0x00000007); # ID 743 write_le64(0x0000000f, 0x00000007); # ID
744 write_le16(5320); # IF 744 write_le16(5320); # IF
745 write_le32(192); # Size 745 write_le32(192); # Size
746 write_hunk(810744, 192); 746 write_hunk(810744, 192);
747 747
748 # 748 #
749 # Firmware 64, type: SCODE FW DTV8 CHINA HAS IF (0x64000200), IF = 5.40 MHz id: (0000000000000000), size: 192 749 # Firmware 64, type: SCODE FW DTV7 DTV78 DTV8 DIBCOM52 CHINA HAS IF (0x65000380), IF = 5.40 MHz id: (0000000000000000), size: 192
750 # 750 #
751 751
752 write_le32(0x64000200); # Type 752 write_le32(0x65000380); # Type
753 write_le64(0x00000000, 0x00000000); # ID 753 write_le64(0x00000000, 0x00000000); # ID
754 write_le16(5400); # IF 754 write_le16(5400); # IF
755 write_le32(192); # Size 755 write_le32(192); # Size
@@ -766,50 +766,50 @@ sub main_firmware($$$$)
766 write_hunk(809592, 192); 766 write_hunk(809592, 192);
767 767
768 # 768 #
769 # Firmware 66, type: SCODE FW HAS IF (0x60000000), IF = 5.64 MHz id: PAL/BG A2/B (0000000200000007), size: 192 769 # Firmware 66, type: SCODE FW HAS IF (0x60000000), IF = 5.64 MHz id: PAL/BG A2 (0000000300000007), size: 192
770 # 770 #
771 771
772 write_le32(0x60000000); # Type 772 write_le32(0x60000000); # Type
773 write_le64(0x00000002, 0x00000007); # ID 773 write_le64(0x00000003, 0x00000007); # ID
774 write_le16(5640); # IF 774 write_le16(5640); # IF
775 write_le32(192); # Size 775 write_le32(192); # Size
776 write_hunk(808440, 192); 776 write_hunk(808440, 192);
777 777
778 # 778 #
779 # Firmware 67, type: SCODE FW HAS IF (0x60000000), IF = 5.74 MHz id: PAL/BG NICAM/B (0000000800000007), size: 192 779 # Firmware 67, type: SCODE FW HAS IF (0x60000000), IF = 5.74 MHz id: PAL/BG NICAM (0000000c00000007), size: 192
780 # 780 #
781 781
782 write_le32(0x60000000); # Type 782 write_le32(0x60000000); # Type
783 write_le64(0x00000008, 0x00000007); # ID 783 write_le64(0x0000000c, 0x00000007); # ID
784 write_le16(5740); # IF 784 write_le16(5740); # IF
785 write_le32(192); # Size 785 write_le32(192); # Size
786 write_hunk(808632, 192); 786 write_hunk(808632, 192);
787 787
788 # 788 #
789 # Firmware 68, type: SCODE FW DTV7 DIBCOM52 HAS IF (0x61000080), IF = 5.90 MHz id: (0000000000000000), size: 192 789 # Firmware 68, type: SCODE FW HAS IF (0x60000000), IF = 5.90 MHz id: (0000000000000000), size: 192
790 # 790 #
791 791
792 write_le32(0x61000080); # Type 792 write_le32(0x60000000); # Type
793 write_le64(0x00000000, 0x00000000); # ID 793 write_le64(0x00000000, 0x00000000); # ID
794 write_le16(5900); # IF 794 write_le16(5900); # IF
795 write_le32(192); # Size 795 write_le32(192); # Size
796 write_hunk(810360, 192); 796 write_hunk(810360, 192);
797 797
798 # 798 #
799 # Firmware 69, type: SCODE FW MONO HAS IF (0x60008000), IF = 6.00 MHz id: PAL/I (0000000000000010), size: 192 799 # Firmware 69, type: SCODE FW MONO HAS IF (0x60008000), IF = 6.00 MHz id: PAL/DK PAL/I SECAM/K3 SECAM/L SECAM/Lc NICAM (0000000c04c000f0), size: 192
800 # 800 #
801 801
802 write_le32(0x60008000); # Type 802 write_le32(0x60008000); # Type
803 write_le64(0x00000000, 0x00000010); # ID 803 write_le64(0x0000000c, 0x04c000f0); # ID
804 write_le16(6000); # IF 804 write_le16(6000); # IF
805 write_le32(192); # Size 805 write_le32(192); # Size
806 write_hunk(808824, 192); 806 write_hunk(808824, 192);
807 807
808 # 808 #
809 # Firmware 70, type: SCODE FW DTV6 QAM F6MHZ HAS IF (0x68000060), IF = 6.20 MHz id: (0000000000000000), size: 192 809 # Firmware 70, type: SCODE FW DTV6 QAM ATSC LG60 F6MHZ HAS IF (0x68050060), IF = 6.20 MHz id: (0000000000000000), size: 192
810 # 810 #
811 811
812 write_le32(0x68000060); # Type 812 write_le32(0x68050060); # Type
813 write_le64(0x00000000, 0x00000000); # ID 813 write_le64(0x00000000, 0x00000000); # ID
814 write_le16(6200); # IF 814 write_le16(6200); # IF
815 write_le32(192); # Size 815 write_le32(192); # Size
@@ -846,11 +846,11 @@ sub main_firmware($$$$)
846 write_hunk(809208, 192); 846 write_hunk(809208, 192);
847 847
848 # 848 #
849 # Firmware 74, type: SCODE FW MONO HAS IF (0x60008000), IF = 6.50 MHz id: SECAM/K3 (0000000004000000), size: 192 849 # Firmware 74, type: SCODE FW MONO HAS IF (0x60008000), IF = 6.50 MHz id: PAL/DK SECAM/K3 SECAM/L NICAM (0000000c044000e0), size: 192
850 # 850 #
851 851
852 write_le32(0x60008000); # Type 852 write_le32(0x60008000); # Type
853 write_le64(0x00000000, 0x04000000); # ID 853 write_le64(0x0000000c, 0x044000e0); # ID
854 write_le16(6500); # IF 854 write_le16(6500); # IF
855 write_le32(192); # Size 855 write_le32(192); # Size
856 write_hunk(811128, 192); 856 write_hunk(811128, 192);
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index f962d01bea2a..3102b81bef88 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -88,10 +88,9 @@ hugepages from the buddy allocator, if the normal pool is exhausted. As
88these surplus hugepages go out of use, they are freed back to the buddy 88these surplus hugepages go out of use, they are freed back to the buddy
89allocator. 89allocator.
90 90
91Caveat: Shrinking the pool via nr_hugepages while a surplus is in effect 91Caveat: Shrinking the pool via nr_hugepages such that it becomes less
92will allow the number of surplus huge pages to exceed the overcommit 92than the number of hugepages in use will convert the balance to surplus
93value, as the pool hugepages (which must have been in use for a surplus 93huge pages even if it would exceed the overcommit value. As long as
94hugepages to be allocated) will become surplus hugepages. As long as
95this condition holds, however, no more surplus huge pages will be 94this condition holds, however, no more surplus huge pages will be
96allowed on the system until one of the two sysctls are increased 95allowed on the system until one of the two sysctls are increased
97sufficiently, or the surplus huge pages go out of use and are freed. 96sufficiently, or the surplus huge pages go out of use and are freed.
diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt
index dd4986497996..bad16d3f6a47 100644
--- a/Documentation/vm/numa_memory_policy.txt
+++ b/Documentation/vm/numa_memory_policy.txt
@@ -135,77 +135,58 @@ most general to most specific:
135 135
136Components of Memory Policies 136Components of Memory Policies
137 137
138 A Linux memory policy is a tuple consisting of a "mode" and an optional set 138 A Linux memory policy consists of a "mode", optional mode flags, and an
139 of nodes. The mode determine the behavior of the policy, while the 139 optional set of nodes. The mode determines the behavior of the policy,
140 optional set of nodes can be viewed as the arguments to the behavior. 140 the optional mode flags determine the behavior of the mode, and the
141 optional set of nodes can be viewed as the arguments to the policy
142 behavior.
141 143
142 Internally, memory policies are implemented by a reference counted 144 Internally, memory policies are implemented by a reference counted
143 structure, struct mempolicy. Details of this structure will be discussed 145 structure, struct mempolicy. Details of this structure will be discussed
144 in context, below, as required to explain the behavior. 146 in context, below, as required to explain the behavior.
145 147
146 Note: in some functions AND in the struct mempolicy itself, the mode
147 is called "policy". However, to avoid confusion with the policy tuple,
148 this document will continue to use the term "mode".
149
150 Linux memory policy supports the following 4 behavioral modes: 148 Linux memory policy supports the following 4 behavioral modes:
151 149
152 Default Mode--MPOL_DEFAULT: The behavior specified by this mode is 150 Default Mode--MPOL_DEFAULT: This mode is only used in the memory
153 context or scope dependent. 151 policy APIs. Internally, MPOL_DEFAULT is converted to the NULL
154 152 memory policy in all policy scopes. Any existing non-default policy
155 As mentioned in the Policy Scope section above, during normal 153 will simply be removed when MPOL_DEFAULT is specified. As a result,
156 system operation, the System Default Policy is hard coded to 154 MPOL_DEFAULT means "fall back to the next most specific policy scope."
157 contain the Default mode.
158
159 In this context, default mode means "local" allocation--that is
160 attempt to allocate the page from the node associated with the cpu
161 where the fault occurs. If the "local" node has no memory, or the
162 node's memory can be exhausted [no free pages available], local
163 allocation will "fallback to"--attempt to allocate pages from--
164 "nearby" nodes, in order of increasing "distance".
165 155
166 Implementation detail -- subject to change: "Fallback" uses 156 For example, a NULL or default task policy will fall back to the
167 a per node list of sibling nodes--called zonelists--built at 157 system default policy. A NULL or default vma policy will fall
168 boot time, or when nodes or memory are added or removed from 158 back to the task policy.
169 the system [memory hotplug]. These per node zonelist are
170 constructed with nodes in order of increasing distance based
171 on information provided by the platform firmware.
172 159
173 When a task/process policy or a shared policy contains the Default 160 When specified in one of the memory policy APIs, the Default mode
174 mode, this also means "local allocation", as described above. 161 does not use the optional set of nodes.
175 162
176 In the context of a VMA, Default mode means "fall back to task 163 It is an error for the set of nodes specified for this policy to
177 policy"--which may or may not specify Default mode. Thus, Default 164 be non-empty.
178 mode can not be counted on to mean local allocation when used
179 on a non-shared region of the address space. However, see
180 MPOL_PREFERRED below.
181
182 The Default mode does not use the optional set of nodes.
183 165
184 MPOL_BIND: This mode specifies that memory must come from the 166 MPOL_BIND: This mode specifies that memory must come from the
185 set of nodes specified by the policy. 167 set of nodes specified by the policy. Memory will be allocated from
186 168 the node in the set with sufficient free memory that is closest to
187 The memory policy APIs do not specify an order in which the nodes 169 the node where the allocation takes place.
188 will be searched. However, unlike "local allocation", the Bind
189 policy does not consider the distance between the nodes. Rather,
190 allocations will fallback to the nodes specified by the policy in
191 order of numeric node id. Like everything in Linux, this is subject
192 to change.
193 170
194 MPOL_PREFERRED: This mode specifies that the allocation should be 171 MPOL_PREFERRED: This mode specifies that the allocation should be
195 attempted from the single node specified in the policy. If that 172 attempted from the single node specified in the policy. If that
196 allocation fails, the kernel will search other nodes, exactly as 173 allocation fails, the kernel will search other nodes, in order of
197 it would for a local allocation that started at the preferred node 174 increasing distance from the preferred node based on information
198 in increasing distance from the preferred node. "Local" allocation 175 provided by the platform firmware.
199 policy can be viewed as a Preferred policy that starts at the node
200 containing the cpu where the allocation takes place. 176 containing the cpu where the allocation takes place.
201 177
202 Internally, the Preferred policy uses a single node--the 178 Internally, the Preferred policy uses a single node--the
203 preferred_node member of struct mempolicy. A "distinguished 179 preferred_node member of struct mempolicy. When the internal
204 value of this preferred_node, currently '-1', is interpreted 180 mode flag MPOL_F_LOCAL is set, the preferred_node is ignored and
205 as "the node containing the cpu where the allocation takes 181 the policy is interpreted as local allocation. "Local" allocation
206 place"--local allocation. This is the way to specify 182 policy can be viewed as a Preferred policy that starts at the node
207 local allocation for a specific range of addresses--i.e. for 183 containing the cpu where the allocation takes place.
208 VMA policies. 184
185 It is possible for the user to specify that local allocation is
186 always preferred by passing an empty nodemask with this mode.
187 If an empty nodemask is passed, the policy cannot use the
188 MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES flags described
189 below.
209 190
210 MPOL_INTERLEAVED: This mode specifies that page allocations be 191 MPOL_INTERLEAVED: This mode specifies that page allocations be
211 interleaved, on a page granularity, across the nodes specified in 192 interleaved, on a page granularity, across the nodes specified in
@@ -231,6 +212,154 @@ Components of Memory Policies
231 the temporary interleaved system default policy works in this 212 the temporary interleaved system default policy works in this
232 mode. 213 mode.
233 214
215 Linux memory policy supports the following optional mode flags:
216
217 MPOL_F_STATIC_NODES: This flag specifies that the nodemask passed by
218 the user should not be remapped if the task or VMA's set of allowed
219 nodes changes after the memory policy has been defined.
220
221 Without this flag, anytime a mempolicy is rebound because of a
222 change in the set of allowed nodes, the node (Preferred) or
223 nodemask (Bind, Interleave) is remapped to the new set of
224 allowed nodes. This may result in nodes being used that were
225 previously undesired.
226
227 With this flag, if the user-specified nodes overlap with the
228 nodes allowed by the task's cpuset, then the memory policy is
229 applied to their intersection. If the two sets of nodes do not
230 overlap, the Default policy is used.
231
232 For example, consider a task that is attached to a cpuset with
233 mems 1-3 that sets an Interleave policy over the same set. If
234 the cpuset's mems change to 3-5, the Interleave will now occur
235 over nodes 3, 4, and 5. With this flag, however, since only node
236 3 is allowed from the user's nodemask, the "interleave" only
237 occurs over that node. If no nodes from the user's nodemask are
238 now allowed, the Default behavior is used.
239
240 MPOL_F_STATIC_NODES cannot be combined with the
241 MPOL_F_RELATIVE_NODES flag. It also cannot be used for
242 MPOL_PREFERRED policies that were created with an empty nodemask
243 (local allocation).
244
245 MPOL_F_RELATIVE_NODES: This flag specifies that the nodemask passed
246 by the user will be mapped relative to the set of the task or VMA's
247 set of allowed nodes. The kernel stores the user-passed nodemask,
248 and if the allowed nodes changes, then that original nodemask will
249 be remapped relative to the new set of allowed nodes.
250
251 Without this flag (and without MPOL_F_STATIC_NODES), anytime a
252 mempolicy is rebound because of a change in the set of allowed
253 nodes, the node (Preferred) or nodemask (Bind, Interleave) is
254 remapped to the new set of allowed nodes. That remap may not
255 preserve the relative nature of the user's passed nodemask to its
256 set of allowed nodes upon successive rebinds: a nodemask of
257 1,3,5 may be remapped to 7-9 and then to 1-3 if the set of
258 allowed nodes is restored to its original state.
259
260 With this flag, the remap is done so that the node numbers from
261 the user's passed nodemask are relative to the set of allowed
262 nodes. In other words, if nodes 0, 2, and 4 are set in the user's
263 nodemask, the policy will be effected over the first (and in the
264 Bind or Interleave case, the third and fifth) nodes in the set of
265 allowed nodes. The nodemask passed by the user represents nodes
266 relative to task or VMA's set of allowed nodes.
267
268 If the user's nodemask includes nodes that are outside the range
269 of the new set of allowed nodes (for example, node 5 is set in
270 the user's nodemask when the set of allowed nodes is only 0-3),
271 then the remap wraps around to the beginning of the nodemask and,
272 if not already set, sets the node in the mempolicy nodemask.
273
274 For example, consider a task that is attached to a cpuset with
275 mems 2-5 that sets an Interleave policy over the same set with
276 MPOL_F_RELATIVE_NODES. If the cpuset's mems change to 3-7, the
277 interleave now occurs over nodes 3,5-6. If the cpuset's mems
278 then change to 0,2-3,5, then the interleave occurs over nodes
279 0,3,5.
280
281 Thanks to the consistent remapping, applications preparing
282 nodemasks to specify memory policies using this flag should
283 disregard their current, actual cpuset imposed memory placement
284 and prepare the nodemask as if they were always located on
285 memory nodes 0 to N-1, where N is the number of memory nodes the
286 policy is intended to manage. Let the kernel then remap to the
287 set of memory nodes allowed by the task's cpuset, as that may
288 change over time.
289
290 MPOL_F_RELATIVE_NODES cannot be combined with the
291 MPOL_F_STATIC_NODES flag. It also cannot be used for
292 MPOL_PREFERRED policies that were created with an empty nodemask
293 (local allocation).
294
295MEMORY POLICY REFERENCE COUNTING
296
297To resolve use/free races, struct mempolicy contains an atomic reference
298count field. Internal interfaces, mpol_get()/mpol_put() increment and
299decrement this reference count, respectively. mpol_put() will only free
300the structure back to the mempolicy kmem cache when the reference count
301goes to zero.
302
303When a new memory policy is allocated, it's reference count is initialized
304to '1', representing the reference held by the task that is installing the
305new policy. When a pointer to a memory policy structure is stored in another
306structure, another reference is added, as the task's reference will be dropped
307on completion of the policy installation.
308
309During run-time "usage" of the policy, we attempt to minimize atomic operations
310on the reference count, as this can lead to cache lines bouncing between cpus
311and NUMA nodes. "Usage" here means one of the following:
312
3131) querying of the policy, either by the task itself [using the get_mempolicy()
314 API discussed below] or by another task using the /proc/<pid>/numa_maps
315 interface.
316
3172) examination of the policy to determine the policy mode and associated node
318 or node lists, if any, for page allocation. This is considered a "hot
319 path". Note that for MPOL_BIND, the "usage" extends across the entire
320 allocation process, which may sleep during page reclaimation, because the
321 BIND policy nodemask is used, by reference, to filter ineligible nodes.
322
323We can avoid taking an extra reference during the usages listed above as
324follows:
325
3261) we never need to get/free the system default policy as this is never
327 changed nor freed, once the system is up and running.
328
3292) for querying the policy, we do not need to take an extra reference on the
330 target task's task policy nor vma policies because we always acquire the
331 task's mm's mmap_sem for read during the query. The set_mempolicy() and
332 mbind() APIs [see below] always acquire the mmap_sem for write when
333 installing or replacing task or vma policies. Thus, there is no possibility
334 of a task or thread freeing a policy while another task or thread is
335 querying it.
336
3373) Page allocation usage of task or vma policy occurs in the fault path where
338 we hold them mmap_sem for read. Again, because replacing the task or vma
339 policy requires that the mmap_sem be held for write, the policy can't be
340 freed out from under us while we're using it for page allocation.
341
3424) Shared policies require special consideration. One task can replace a
343 shared memory policy while another task, with a distinct mmap_sem, is
344 querying or allocating a page based on the policy. To resolve this
345 potential race, the shared policy infrastructure adds an extra reference
346 to the shared policy during lookup while holding a spin lock on the shared
347 policy management structure. This requires that we drop this extra
348 reference when we're finished "using" the policy. We must drop the
349 extra reference on shared policies in the same query/allocation paths
350 used for non-shared policies. For this reason, shared policies are marked
351 as such, and the extra reference is dropped "conditionally"--i.e., only
352 for shared policies.
353
354 Because of this extra reference counting, and because we must lookup
355 shared policies in a tree structure under spinlock, shared policies are
356 more expensive to use in the page allocation path. This is expecially
357 true for shared policies on shared memory regions shared by tasks running
358 on different NUMA nodes. This extra overhead can be avoided by always
359 falling back to task or system default policy for shared memory regions,
360 or by prefaulting the entire shared memory region into memory and locking
361 it down. However, this might not be appropriate for all applications.
362
234MEMORY POLICY APIs 363MEMORY POLICY APIs
235 364
236Linux supports 3 system calls for controlling memory policy. These APIS 365Linux supports 3 system calls for controlling memory policy. These APIS
@@ -251,7 +380,9 @@ Set [Task] Memory Policy:
251 Set's the calling task's "task/process memory policy" to mode 380 Set's the calling task's "task/process memory policy" to mode
252 specified by the 'mode' argument and the set of nodes defined 381 specified by the 'mode' argument and the set of nodes defined
253 by 'nmask'. 'nmask' points to a bit mask of node ids containing 382 by 'nmask'. 'nmask' points to a bit mask of node ids containing
254 at least 'maxnode' ids. 383 at least 'maxnode' ids. Optional mode flags may be passed by
384 combining the 'mode' argument with the flag (for example:
385 MPOL_INTERLEAVE | MPOL_F_STATIC_NODES).
255 386
256 See the set_mempolicy(2) man page for more details 387 See the set_mempolicy(2) man page for more details
257 388
@@ -303,29 +434,19 @@ MEMORY POLICIES AND CPUSETS
303Memory policies work within cpusets as described above. For memory policies 434Memory policies work within cpusets as described above. For memory policies
304that require a node or set of nodes, the nodes are restricted to the set of 435that require a node or set of nodes, the nodes are restricted to the set of
305nodes whose memories are allowed by the cpuset constraints. If the nodemask 436nodes whose memories are allowed by the cpuset constraints. If the nodemask
306specified for the policy contains nodes that are not allowed by the cpuset, or 437specified for the policy contains nodes that are not allowed by the cpuset and
307the intersection of the set of nodes specified for the policy and the set of 438MPOL_F_RELATIVE_NODES is not used, the intersection of the set of nodes
308nodes with memory is the empty set, the policy is considered invalid 439specified for the policy and the set of nodes with memory is used. If the
309and cannot be installed. 440result is the empty set, the policy is considered invalid and cannot be
310 441installed. If MPOL_F_RELATIVE_NODES is used, the policy's nodes are mapped
311The interaction of memory policies and cpusets can be problematic for a 442onto and folded into the task's set of allowed nodes as previously described.
312couple of reasons: 443
313 444The interaction of memory policies and cpusets can be problematic when tasks
3141) the memory policy APIs take physical node id's as arguments. As mentioned 445in two cpusets share access to a memory region, such as shared memory segments
315 above, it is illegal to specify nodes that are not allowed in the cpuset. 446created by shmget() of mmap() with the MAP_ANONYMOUS and MAP_SHARED flags, and
316 The application must query the allowed nodes using the get_mempolicy() 447any of the tasks install shared policy on the region, only nodes whose
317 API with the MPOL_F_MEMS_ALLOWED flag to determine the allowed nodes and 448memories are allowed in both cpusets may be used in the policies. Obtaining
318 restrict itself to those nodes. However, the resources available to a 449this information requires "stepping outside" the memory policy APIs to use the
319 cpuset can be changed by the system administrator, or a workload manager 450cpuset information and requires that one know in what cpusets other task might
320 application, at any time. So, a task may still get errors attempting to 451be attaching to the shared region. Furthermore, if the cpusets' allowed
321 specify policy nodes, and must query the allowed memories again. 452memory sets are disjoint, "local" allocation is the only valid policy.
322
3232) when tasks in two cpusets share access to a memory region, such as shared
324 memory segments created by shmget() of mmap() with the MAP_ANONYMOUS and
325 MAP_SHARED flags, and any of the tasks install shared policy on the region,
326 only nodes whose memories are allowed in both cpusets may be used in the
327 policies. Obtaining this information requires "stepping outside" the
328 memory policy APIs to use the cpuset information and requires that one
329 know in what cpusets other task might be attaching to the shared region.
330 Furthermore, if the cpusets' allowed memory sets are disjoint, "local"
331 allocation is the only valid policy.
diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt
new file mode 100644
index 000000000000..ce72c0fe6177
--- /dev/null
+++ b/Documentation/vm/pagemap.txt
@@ -0,0 +1,77 @@
1pagemap, from the userspace perspective
2---------------------------------------
3
4pagemap is a new (as of 2.6.25) set of interfaces in the kernel that allow
5userspace programs to examine the page tables and related information by
6reading files in /proc.
7
8There are three components to pagemap:
9
10 * /proc/pid/pagemap. This file lets a userspace process find out which
11 physical frame each virtual page is mapped to. It contains one 64-bit
12 value for each virtual page, containing the following data (from
13 fs/proc/task_mmu.c, above pagemap_read):
14
15 * Bits 0-55 page frame number (PFN) if present
16 * Bits 0-4 swap type if swapped
17 * Bits 5-55 swap offset if swapped
18 * Bits 55-60 page shift (page size = 1<<page shift)
19 * Bit 61 reserved for future use
20 * Bit 62 page swapped
21 * Bit 63 page present
22
23 If the page is not present but in swap, then the PFN contains an
24 encoding of the swap file number and the page's offset into the
25 swap. Unmapped pages return a null PFN. This allows determining
26 precisely which pages are mapped (or in swap) and comparing mapped
27 pages between processes.
28
29 Efficient users of this interface will use /proc/pid/maps to
30 determine which areas of memory are actually mapped and llseek to
31 skip over unmapped regions.
32
33 * /proc/kpagecount. This file contains a 64-bit count of the number of
34 times each page is mapped, indexed by PFN.
35
36 * /proc/kpageflags. This file contains a 64-bit set of flags for each
37 page, indexed by PFN.
38
39 The flags are (from fs/proc/proc_misc, above kpageflags_read):
40
41 0. LOCKED
42 1. ERROR
43 2. REFERENCED
44 3. UPTODATE
45 4. DIRTY
46 5. LRU
47 6. ACTIVE
48 7. SLAB
49 8. WRITEBACK
50 9. RECLAIM
51 10. BUDDY
52
53Using pagemap to do something useful:
54
55The general procedure for using pagemap to find out about a process' memory
56usage goes like this:
57
58 1. Read /proc/pid/maps to determine which parts of the memory space are
59 mapped to what.
60 2. Select the maps you are interested in -- all of them, or a particular
61 library, or the stack or the heap, etc.
62 3. Open /proc/pid/pagemap and seek to the pages you would like to examine.
63 4. Read a u64 for each page from pagemap.
64 5. Open /proc/kpagecount and/or /proc/kpageflags. For each PFN you just
65 read, seek to that entry in the file, and read the data you want.
66
67For example, to find the "unique set size" (USS), which is the amount of
68memory that a process is using that is not shared with any other process,
69you can go through every map in the process, find the PFNs, look those up
70in kpagecount, and tally up the number of pages that are only referenced
71once.
72
73Other notes:
74
75Reading from any of the files will return -EINVAL if you are not starting
76the read on an 8-byte boundary (e.g., if you seeked an odd number of bytes
77into the file), or if the size of the read is not a multiple of 8 bytes.
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c
index 22d7e3e4d60c..e4230ed16ee7 100644
--- a/Documentation/vm/slabinfo.c
+++ b/Documentation/vm/slabinfo.c
@@ -31,14 +31,14 @@ struct slabinfo {
31 int hwcache_align, object_size, objs_per_slab; 31 int hwcache_align, object_size, objs_per_slab;
32 int sanity_checks, slab_size, store_user, trace; 32 int sanity_checks, slab_size, store_user, trace;
33 int order, poison, reclaim_account, red_zone; 33 int order, poison, reclaim_account, red_zone;
34 unsigned long partial, objects, slabs; 34 unsigned long partial, objects, slabs, objects_partial, objects_total;
35 unsigned long alloc_fastpath, alloc_slowpath; 35 unsigned long alloc_fastpath, alloc_slowpath;
36 unsigned long free_fastpath, free_slowpath; 36 unsigned long free_fastpath, free_slowpath;
37 unsigned long free_frozen, free_add_partial, free_remove_partial; 37 unsigned long free_frozen, free_add_partial, free_remove_partial;
38 unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill; 38 unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill;
39 unsigned long cpuslab_flush, deactivate_full, deactivate_empty; 39 unsigned long cpuslab_flush, deactivate_full, deactivate_empty;
40 unsigned long deactivate_to_head, deactivate_to_tail; 40 unsigned long deactivate_to_head, deactivate_to_tail;
41 unsigned long deactivate_remote_frees; 41 unsigned long deactivate_remote_frees, order_fallback;
42 int numa[MAX_NODES]; 42 int numa[MAX_NODES];
43 int numa_partial[MAX_NODES]; 43 int numa_partial[MAX_NODES];
44} slabinfo[MAX_SLABS]; 44} slabinfo[MAX_SLABS];
@@ -293,7 +293,7 @@ int line = 0;
293void first_line(void) 293void first_line(void)
294{ 294{
295 if (show_activity) 295 if (show_activity)
296 printf("Name Objects Alloc Free %%Fast\n"); 296 printf("Name Objects Alloc Free %%Fast Fallb O\n");
297 else 297 else
298 printf("Name Objects Objsize Space " 298 printf("Name Objects Objsize Space "
299 "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); 299 "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n");
@@ -540,7 +540,8 @@ void slabcache(struct slabinfo *s)
540 return; 540 return;
541 541
542 store_size(size_str, slab_size(s)); 542 store_size(size_str, slab_size(s));
543 snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs, s->partial, s->cpu_slabs); 543 snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs,
544 s->partial, s->cpu_slabs);
544 545
545 if (!line++) 546 if (!line++)
546 first_line(); 547 first_line();
@@ -572,11 +573,12 @@ void slabcache(struct slabinfo *s)
572 total_alloc = s->alloc_fastpath + s->alloc_slowpath; 573 total_alloc = s->alloc_fastpath + s->alloc_slowpath;
573 total_free = s->free_fastpath + s->free_slowpath; 574 total_free = s->free_fastpath + s->free_slowpath;
574 575
575 printf("%-21s %8ld %8ld %8ld %3ld %3ld \n", 576 printf("%-21s %8ld %10ld %10ld %3ld %3ld %5ld %1d\n",
576 s->name, s->objects, 577 s->name, s->objects,
577 total_alloc, total_free, 578 total_alloc, total_free,
578 total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0, 579 total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0,
579 total_free ? (s->free_fastpath * 100 / total_free) : 0); 580 total_free ? (s->free_fastpath * 100 / total_free) : 0,
581 s->order_fallback, s->order);
580 } 582 }
581 else 583 else
582 printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", 584 printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
@@ -776,7 +778,6 @@ void totals(void)
776 unsigned long used; 778 unsigned long used;
777 unsigned long long wasted; 779 unsigned long long wasted;
778 unsigned long long objwaste; 780 unsigned long long objwaste;
779 long long objects_in_partial_slabs;
780 unsigned long percentage_partial_slabs; 781 unsigned long percentage_partial_slabs;
781 unsigned long percentage_partial_objs; 782 unsigned long percentage_partial_objs;
782 783
@@ -790,18 +791,11 @@ void totals(void)
790 wasted = size - used; 791 wasted = size - used;
791 objwaste = s->slab_size - s->object_size; 792 objwaste = s->slab_size - s->object_size;
792 793
793 objects_in_partial_slabs = s->objects -
794 (s->slabs - s->partial - s ->cpu_slabs) *
795 s->objs_per_slab;
796
797 if (objects_in_partial_slabs < 0)
798 objects_in_partial_slabs = 0;
799
800 percentage_partial_slabs = s->partial * 100 / s->slabs; 794 percentage_partial_slabs = s->partial * 100 / s->slabs;
801 if (percentage_partial_slabs > 100) 795 if (percentage_partial_slabs > 100)
802 percentage_partial_slabs = 100; 796 percentage_partial_slabs = 100;
803 797
804 percentage_partial_objs = objects_in_partial_slabs * 100 798 percentage_partial_objs = s->objects_partial * 100
805 / s->objects; 799 / s->objects;
806 800
807 if (percentage_partial_objs > 100) 801 if (percentage_partial_objs > 100)
@@ -823,8 +817,8 @@ void totals(void)
823 min_objects = s->objects; 817 min_objects = s->objects;
824 if (used < min_used) 818 if (used < min_used)
825 min_used = used; 819 min_used = used;
826 if (objects_in_partial_slabs < min_partobj) 820 if (s->objects_partial < min_partobj)
827 min_partobj = objects_in_partial_slabs; 821 min_partobj = s->objects_partial;
828 if (percentage_partial_slabs < min_ppart) 822 if (percentage_partial_slabs < min_ppart)
829 min_ppart = percentage_partial_slabs; 823 min_ppart = percentage_partial_slabs;
830 if (percentage_partial_objs < min_ppartobj) 824 if (percentage_partial_objs < min_ppartobj)
@@ -848,8 +842,8 @@ void totals(void)
848 max_objects = s->objects; 842 max_objects = s->objects;
849 if (used > max_used) 843 if (used > max_used)
850 max_used = used; 844 max_used = used;
851 if (objects_in_partial_slabs > max_partobj) 845 if (s->objects_partial > max_partobj)
852 max_partobj = objects_in_partial_slabs; 846 max_partobj = s->objects_partial;
853 if (percentage_partial_slabs > max_ppart) 847 if (percentage_partial_slabs > max_ppart)
854 max_ppart = percentage_partial_slabs; 848 max_ppart = percentage_partial_slabs;
855 if (percentage_partial_objs > max_ppartobj) 849 if (percentage_partial_objs > max_ppartobj)
@@ -864,7 +858,7 @@ void totals(void)
864 858
865 total_objects += s->objects; 859 total_objects += s->objects;
866 total_used += used; 860 total_used += used;
867 total_partobj += objects_in_partial_slabs; 861 total_partobj += s->objects_partial;
868 total_ppart += percentage_partial_slabs; 862 total_ppart += percentage_partial_slabs;
869 total_ppartobj += percentage_partial_objs; 863 total_ppartobj += percentage_partial_objs;
870 864
@@ -1160,6 +1154,8 @@ void read_slab_dir(void)
1160 slab->hwcache_align = get_obj("hwcache_align"); 1154 slab->hwcache_align = get_obj("hwcache_align");
1161 slab->object_size = get_obj("object_size"); 1155 slab->object_size = get_obj("object_size");
1162 slab->objects = get_obj("objects"); 1156 slab->objects = get_obj("objects");
1157 slab->objects_partial = get_obj("objects_partial");
1158 slab->objects_total = get_obj("objects_total");
1163 slab->objs_per_slab = get_obj("objs_per_slab"); 1159 slab->objs_per_slab = get_obj("objs_per_slab");
1164 slab->order = get_obj("order"); 1160 slab->order = get_obj("order");
1165 slab->partial = get_obj("partial"); 1161 slab->partial = get_obj("partial");
@@ -1193,6 +1189,7 @@ void read_slab_dir(void)
1193 slab->deactivate_to_head = get_obj("deactivate_to_head"); 1189 slab->deactivate_to_head = get_obj("deactivate_to_head");
1194 slab->deactivate_to_tail = get_obj("deactivate_to_tail"); 1190 slab->deactivate_to_tail = get_obj("deactivate_to_tail");
1195 slab->deactivate_remote_frees = get_obj("deactivate_remote_frees"); 1191 slab->deactivate_remote_frees = get_obj("deactivate_remote_frees");
1192 slab->order_fallback = get_obj("order_fallback");
1196 chdir(".."); 1193 chdir("..");
1197 if (slab->name[0] == ':') 1194 if (slab->name[0] == ':')
1198 alias_targets++; 1195 alias_targets++;
diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
new file mode 100644
index 000000000000..17965f927c15
--- /dev/null
+++ b/Documentation/x86/pat.txt
@@ -0,0 +1,100 @@
1
2PAT (Page Attribute Table)
3
4x86 Page Attribute Table (PAT) allows for setting the memory attribute at the
5page level granularity. PAT is complementary to the MTRR settings which allows
6for setting of memory types over physical address ranges. However, PAT is
7more flexible than MTRR due to its capability to set attributes at page level
8and also due to the fact that there are no hardware limitations on number of
9such attribute settings allowed. Added flexibility comes with guidelines for
10not having memory type aliasing for the same physical memory with multiple
11virtual addresses.
12
13PAT allows for different types of memory attributes. The most commonly used
14ones that will be supported at this time are Write-back, Uncached,
15Write-combined and Uncached Minus.
16
17There are many different APIs in the kernel that allows setting of memory
18attributes at the page level. In order to avoid aliasing, these interfaces
19should be used thoughtfully. Below is a table of interfaces available,
20their intended usage and their memory attribute relationships. Internally,
21these APIs use a reserve_memtype()/free_memtype() interface on the physical
22address range to avoid any aliasing.
23
24
25-------------------------------------------------------------------
26API | RAM | ACPI,... | Reserved/Holes |
27-----------------------|----------|------------|------------------|
28 | | | |
29ioremap | -- | UC | UC |
30 | | | |
31ioremap_cache | -- | WB | WB |
32 | | | |
33ioremap_nocache | -- | UC | UC |
34 | | | |
35ioremap_wc | -- | -- | WC |
36 | | | |
37set_memory_uc | UC | -- | -- |
38 set_memory_wb | | | |
39 | | | |
40set_memory_wc | WC | -- | -- |
41 set_memory_wb | | | |
42 | | | |
43pci sysfs resource | -- | -- | UC |
44 | | | |
45pci sysfs resource_wc | -- | -- | WC |
46 is IORESOURCE_PREFETCH| | | |
47 | | | |
48pci proc | -- | -- | UC |
49 !PCIIOC_WRITE_COMBINE | | | |
50 | | | |
51pci proc | -- | -- | WC |
52 PCIIOC_WRITE_COMBINE | | | |
53 | | | |
54/dev/mem | -- | UC | UC |
55 read-write | | | |
56 | | | |
57/dev/mem | -- | UC | UC |
58 mmap SYNC flag | | | |
59 | | | |
60/dev/mem | -- | WB/WC/UC | WB/WC/UC |
61 mmap !SYNC flag | |(from exist-| (from exist- |
62 and | | ing alias)| ing alias) |
63 any alias to this area| | | |
64 | | | |
65/dev/mem | -- | WB | WB |
66 mmap !SYNC flag | | | |
67 no alias to this area | | | |
68 and | | | |
69 MTRR says WB | | | |
70 | | | |
71/dev/mem | -- | -- | UC_MINUS |
72 mmap !SYNC flag | | | |
73 no alias to this area | | | |
74 and | | | |
75 MTRR says !WB | | | |
76 | | | |
77-------------------------------------------------------------------
78
79Notes:
80
81-- in the above table mean "Not suggested usage for the API". Some of the --'s
82are strictly enforced by the kernel. Some others are not really enforced
83today, but may be enforced in future.
84
85For ioremap and pci access through /sys or /proc - The actual type returned
86can be more restrictive, in case of any existing aliasing for that address.
87For example: If there is an existing uncached mapping, a new ioremap_wc can
88return uncached mapping in place of write-combine requested.
89
90set_memory_[uc|wc] and set_memory_wb should be used in pairs, where driver will
91first make a region uc or wc and switch it back to wb after use.
92
93Over time writes to /proc/mtrr will be deprecated in favor of using PAT based
94interfaces. Users writing to /proc/mtrr are suggested to use above interfaces.
95
96Drivers should use ioremap_[uc|wc] to access PCI BARs with [uc|wc] access
97types.
98
99Drivers should use set_memory_[uc|wc] to set access type for RAM ranges.
100
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index 34abae4e9442..b0c7b6c4abda 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -307,3 +307,8 @@ Debugging
307 stuck (default) 307 stuck (default)
308 308
309Miscellaneous 309Miscellaneous
310
311 nogbpages
312 Do not use GB pages for kernel direct mappings.
313 gbpages
314 Use GB pages for kernel direct mappings.