aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2007-05-07 23:37:51 -0400
committerPaul Mackerras <paulus@samba.org>2007-05-07 23:37:51 -0400
commit02bbc0f09c90cefdb2837605c96a66c5ce4ba2e1 (patch)
tree04ef573cd4de095c500c9fc3477f4278c0b36300 /Documentation
parent7487a2245b8841c77ba9db406cf99a483b9334e9 (diff)
parent5b94f675f57e4ff16c8fda09088d7480a84dcd91 (diff)
Merge branch 'linux-2.6'
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/DocBook/Makefile9
-rw-r--r--Documentation/DocBook/man/Makefile3
-rw-r--r--Documentation/blackfin/00-INDEX11
-rw-r--r--Documentation/blackfin/Filesystems169
-rw-r--r--Documentation/blackfin/cache-lock.txt48
-rw-r--r--Documentation/blackfin/cachefeatures.txt65
-rw-r--r--Documentation/dontdiff4
-rw-r--r--Documentation/driver-model/devres.txt2
-rw-r--r--Documentation/feature-removal-schedule.txt45
-rw-r--r--Documentation/filesystems/proc.txt31
-rw-r--r--Documentation/i2c/busses/i2c-nforce22
-rw-r--r--Documentation/i2c/porting-clients18
-rw-r--r--Documentation/i2c/summary29
-rw-r--r--Documentation/i2c/writing-clients415
-rw-r--r--Documentation/i386/boot.txt23
-rw-r--r--Documentation/ia64/aliasing-test.c247
-rw-r--r--Documentation/ia64/aliasing.txt71
-rw-r--r--Documentation/ia64/err_inject.txt1068
-rw-r--r--Documentation/input/input-programming.txt125
-rw-r--r--Documentation/kbuild/modules.txt2
-rw-r--r--Documentation/kernel-parameters.txt21
-rw-r--r--Documentation/pci.txt12
-rw-r--r--Documentation/pcmcia/driver.txt30
-rw-r--r--Documentation/power/interface.txt8
-rw-r--r--Documentation/power/pci.txt2
-rw-r--r--Documentation/scsi/aacraid.txt7
-rw-r--r--Documentation/scsi/ncr53c8xx.txt5
-rw-r--r--Documentation/sh/clk.txt32
-rw-r--r--Documentation/spi/pxa2xx2
-rw-r--r--Documentation/sysctl/vm.txt23
-rw-r--r--Documentation/sysrq.txt4
-rw-r--r--Documentation/usb/usb-serial.txt2
-rw-r--r--Documentation/vm/slabinfo.c943
-rw-r--r--Documentation/vm/slub.txt113
-rw-r--r--Documentation/x86_64/boot-options.txt14
-rw-r--r--Documentation/x86_64/fake-numa-for-cpusets66
-rw-r--r--Documentation/x86_64/machinecheck7
37 files changed, 3215 insertions, 463 deletions
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 867608ab3ca0..10b5cd6c54a0 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -41,7 +41,7 @@ psdocs: $(PS)
41PDF := $(patsubst %.xml, %.pdf, $(BOOKS)) 41PDF := $(patsubst %.xml, %.pdf, $(BOOKS))
42pdfdocs: $(PDF) 42pdfdocs: $(PDF)
43 43
44HTML := $(patsubst %.xml, %.html, $(BOOKS)) 44HTML := $(sort $(patsubst %.xml, %.html, $(BOOKS)))
45htmldocs: $(HTML) 45htmldocs: $(HTML)
46 46
47MAN := $(patsubst %.xml, %.9, $(BOOKS)) 47MAN := $(patsubst %.xml, %.9, $(BOOKS))
@@ -152,6 +152,7 @@ quiet_cmd_db2man = MAN $@
152 @(which xmlto > /dev/null 2>&1) || \ 152 @(which xmlto > /dev/null 2>&1) || \
153 (echo "*** You need to install xmlto ***"; \ 153 (echo "*** You need to install xmlto ***"; \
154 exit 1) 154 exit 1)
155 $(Q)mkdir -p $(obj)/man
155 $(call cmd,db2man) 156 $(call cmd,db2man)
156 @touch $@ 157 @touch $@
157 158
@@ -212,11 +213,7 @@ clean-files := $(DOCBOOKS) \
212 $(patsubst %.xml, %.9, $(DOCBOOKS)) \ 213 $(patsubst %.xml, %.9, $(DOCBOOKS)) \
213 $(C-procfs-example) 214 $(C-procfs-example)
214 215
215clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) 216clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) man
216
217#man put files in man subdir - traverse down
218subdir- := man/
219
220 217
221# Declare the contents of the .PHONY variable as phony. We keep that 218# Declare the contents of the .PHONY variable as phony. We keep that
222# information in a variable se we can use it in if_changed and friends. 219# information in a variable se we can use it in if_changed and friends.
diff --git a/Documentation/DocBook/man/Makefile b/Documentation/DocBook/man/Makefile
deleted file mode 100644
index 4fb7ea0f7ac8..000000000000
--- a/Documentation/DocBook/man/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
1# Rules are put in Documentation/DocBook
2
3clean-files := *.9.gz *.sgml manpage.links manpage.refs
diff --git a/Documentation/blackfin/00-INDEX b/Documentation/blackfin/00-INDEX
new file mode 100644
index 000000000000..7cb3b356b249
--- /dev/null
+++ b/Documentation/blackfin/00-INDEX
@@ -0,0 +1,11 @@
100-INDEX
2 - This file
3
4cache-lock.txt
5 - HOWTO for blackfin cache locking.
6
7cachefeatures.txt
8 - Supported cache features.
9
10Filesystems
11 - Requirements for mounting the root file system.
diff --git a/Documentation/blackfin/Filesystems b/Documentation/blackfin/Filesystems
new file mode 100644
index 000000000000..51260a1b8032
--- /dev/null
+++ b/Documentation/blackfin/Filesystems
@@ -0,0 +1,169 @@
1/*
2 * File: Documentation/blackfin/Filesystems
3 * Based on:
4 * Author:
5 *
6 * Created:
7 * Description: This file contains the simple DMA Implementation for Blackfin
8 *
9 * Rev: $Id: Filesystems 2384 2006-11-01 04:12:43Z magicyang $
10 *
11 * Modified:
12 * Copyright 2004-2006 Analog Devices Inc.
13 *
14 * Bugs: Enter bugs at http://blackfin.uclinux.org/
15 *
16 */
17
18 How to mount the root file system in uClinux/Blackfin
19 -----------------------------------------------------
20
211 Mounting EXT3 File system.
22 ------------------------
23
24 Creating an EXT3 File system for uClinux/Blackfin:
25
26
27Please follow the steps to form the EXT3 File system and mount the same as root
28file system.
29
30a Make an ext3 file system as large as you want the final root file
31 system.
32
33 mkfs.ext3 /dev/ram0 <your-rootfs-size-in-1k-blocks>
34
35b Mount this Empty file system on a free directory as:
36
37 mount -t ext3 /dev/ram0 ./test
38 where ./test is the empty directory.
39
40c Copy your root fs directory that you have so carefully made over.
41
42 cp -af /tmp/my_final_rootfs_files/* ./test
43
44 (For ex: cp -af uClinux-dist/romfs/* ./test)
45
46d If you have done everything right till now you should be able to see
47 the required "root" dir's (that's etc, root, bin, lib, sbin...)
48
49e Now unmount the file system
50
51 umount ./test
52
53f Create the root file system image.
54
55 dd if=/dev/ram0 bs=1k count=<your-rootfs-size-in-1k-blocks> \
56 > ext3fs.img
57
58
59Now you have to tell the kernel that will be mounting this file system as
60rootfs.
61So do a make menuconfig under kernel and select the Ext3 journaling file system
62support under File system --> submenu.
63
64
652. Mounting EXT2 File system.
66 -------------------------
67
68By default the ext2 file system image will be created if you invoke make from
69the top uClinux-dist directory.
70
71
723. Mounting CRAMFS File System
73 ----------------------------
74
75To create a CRAMFS file system image execute the command
76
77 mkfs.cramfs ./test cramfs.img
78
79 where ./test is the target directory.
80
81
824. Mounting ROMFS File System
83 --------------------------
84
85To create a ROMFS file system image execute the command
86
87 genromfs -v -V "ROMdisk" -f romfs.img -d ./test
88
89 where ./test is the target directory
90
91
925. Mounting the JFFS2 Filesystem
93 -----------------------------
94
95To create a compressed JFFS filesystem (JFFS2), please execute the command
96
97 mkfs.jffs2 -d ./test -o jffs2.img
98
99 where ./test is the target directory.
100
101However, please make sure the following is in your kernel config.
102
103/*
104 * RAM/ROM/Flash chip drivers
105 */
106#define CONFIG_MTD_CFI 1
107#define CONFIG_MTD_ROM 1
108/*
109 * Mapping drivers for chip access
110 */
111#define CONFIG_MTD_COMPLEX_MAPPINGS 1
112#define CONFIG_MTD_BF533 1
113#undef CONFIG_MTD_UCLINUX
114
115Through the u-boot boot loader, use the jffs2.img in the corresponding
116partition made in linux-2.6.x/drivers/mtd/maps/bf533_flash.c.
117
118NOTE - Currently the Flash driver is available only for EZKIT. Watch out for a
119 STAMP driver soon.
120
121
1226. Mounting the NFS File system
123 -----------------------------
124
125 For mounting the NFS please do the following in the kernel config.
126
127 In Networking Support --> Networking options --> TCP/IP networking -->
128 IP: kernel level autoconfiguration
129
130 Enable BOOTP Support.
131
132 In Kernel hacking --> Compiled-in kernel boot parameter add the following
133
134 root=/dev/nfs rw ip=bootp
135
136 In File system --> Network File system, Enable
137
138 NFS file system support --> NFSv3 client support
139 Root File system on NFS
140
141 in uClibc menuconfig, do the following
142 In Networking Support
143 enable Remote Procedure Call (RPC) support
144 Full RPC Support
145
146 On the Host side, ensure that /etc/dhcpd.conf looks something like this
147
148 ddns-update-style ad-hoc;
149 allow bootp;
150 subnet 10.100.4.0 netmask 255.255.255.0 {
151 default-lease-time 122209600;
152 max-lease-time 31557600;
153 group {
154 host bf533 {
155 hardware ethernet 00:CF:52:49:C3:01;
156 fixed-address 10.100.4.50;
157 option root-path "/home/nfsmount";
158 }
159 }
160
161 ensure that /etc/exports looks something like this
162 /home/nfsmount *(rw,no_root_squash,no_all_squash)
163
164 run the following commands as root (may differ depending on your
165 distribution) :
166 - service nfs start
167 - service portmap start
168 - service dhcpd start
169 - /usr/sbin/exportfs
diff --git a/Documentation/blackfin/cache-lock.txt b/Documentation/blackfin/cache-lock.txt
new file mode 100644
index 000000000000..88ba1e6c31c3
--- /dev/null
+++ b/Documentation/blackfin/cache-lock.txt
@@ -0,0 +1,48 @@
1/*
2 * File: Documentation/blackfin/cache-lock.txt
3 * Based on:
4 * Author:
5 *
6 * Created:
7 * Description: This file contains the simple DMA Implementation for Blackfin
8 *
9 * Rev: $Id: cache-lock.txt 2384 2006-11-01 04:12:43Z magicyang $
10 *
11 * Modified:
12 * Copyright 2004-2006 Analog Devices Inc.
13 *
14 * Bugs: Enter bugs at http://blackfin.uclinux.org/
15 *
16 */
17
18How to lock your code in cache in uClinux/blackfin
19--------------------------------------------------
20
21There are only a few steps required to lock your code into the cache.
22Currently you can lock the code by Way.
23
24Below are the interface provided for locking the cache.
25
26
271. cache_grab_lock(int Ways);
28
29This function grab the lock for locking your code into the cache specified
30by Ways.
31
32
332. cache_lock(int Ways);
34
35This function should be called after your critical code has been executed.
36Once the critical code exits, the code is now loaded into the cache. This
37function locks the code into the cache.
38
39
40So, the example sequence will be:
41
42 cache_grab_lock(WAY0_L); /* Grab the lock */
43
44 critical_code(); /* Execute the code of interest */
45
46 cache_lock(WAY0_L); /* Lock the cache */
47
48Where WAY0_L signifies WAY0 locking.
diff --git a/Documentation/blackfin/cachefeatures.txt b/Documentation/blackfin/cachefeatures.txt
new file mode 100644
index 000000000000..0fbec23becb5
--- /dev/null
+++ b/Documentation/blackfin/cachefeatures.txt
@@ -0,0 +1,65 @@
1/*
2 * File: Documentation/blackfin/cachefeatures.txt
3 * Based on:
4 * Author:
5 *
6 * Created:
7 * Description: This file contains the simple DMA Implementation for Blackfin
8 *
9 * Rev: $Id: cachefeatures.txt 2384 2006-11-01 04:12:43Z magicyang $
10 *
11 * Modified:
12 * Copyright 2004-2006 Analog Devices Inc.
13 *
14 * Bugs: Enter bugs at http://blackfin.uclinux.org/
15 *
16 */
17
18 - Instruction and Data cache initialization.
19 icache_init();
20 dcache_init();
21
22 - Instruction and Data cache Invalidation Routines, when flushing the
23 same is not required.
24 _icache_invalidate();
25 _dcache_invalidate();
26
27 Also, for invalidating the entire instruction and data cache, the below
28 routines are provided (another method for invalidation, refer page no 267 and 287 of
29 ADSP-BF533 Hardware Reference manual)
30
31 invalidate_entire_dcache();
32 invalidate_entire_icache();
33
34 -External Flushing of Instruction and data cache routines.
35
36 flush_instruction_cache();
37 flush_data_cache();
38
39 - Internal Flushing of Instruction and Data Cache.
40
41 icplb_flush();
42 dcplb_flush();
43
44 - Locking the cache.
45
46 cache_grab_lock();
47 cache_lock();
48
49 Please refer linux-2.6.x/Documentation/blackfin/cache-lock.txt for how to
50 lock the cache.
51
52 Locking the cache is optional feature.
53
54 - Miscellaneous cache functions.
55
56 flush_cache_all();
57 flush_cache_mm();
58 invalidate_dcache_range();
59 flush_dcache_range();
60 flush_dcache_page();
61 flush_cache_range();
62 flush_cache_page();
63 invalidate_dcache_range();
64 flush_page_to_ram();
65
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 63c2d0c55aa2..64e9f6c4826b 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -55,8 +55,8 @@ aic7*seq.h*
55aicasm 55aicasm
56aicdb.h* 56aicdb.h*
57asm 57asm
58asm-offsets.* 58asm-offsets.h
59asm_offsets.* 59asm_offsets.h
60autoconf.h* 60autoconf.h*
61bbootsect 61bbootsect
62bin2c 62bin2c
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 5163b85308f5..6c8d8f27db34 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -182,7 +182,7 @@ For example, you can do something like the following.
182 182
183 ... 183 ...
184 184
185 devres_close_group(dev, my_midlayer_something); 185 devres_close_group(dev, my_midlayer_create_something);
186 return 0; 186 return 0;
187 } 187 }
188 188
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 5c88ba1ea262..5f96cb33743e 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -117,13 +117,6 @@ Who: Adrian Bunk <bunk@stusta.de>
117 117
118--------------------------- 118---------------------------
119 119
120What: pci_module_init(driver)
121When: January 2007
122Why: Is replaced by pci_register_driver(pci_driver).
123Who: Richard Knutsson <ricknu-0@student.ltu.se> and Greg Kroah-Hartman <gregkh@suse.de>
124
125---------------------------
126
127What: Usage of invalid timevals in setitimer 120What: Usage of invalid timevals in setitimer
128When: March 2007 121When: March 2007
129Why: POSIX requires to validate timevals in the setitimer call. This 122Why: POSIX requires to validate timevals in the setitimer call. This
@@ -190,18 +183,10 @@ Who: Jean Delvare <khali@linux-fr.org>
190 183
191--------------------------- 184---------------------------
192 185
193What: i2c_adapter.dev 186What: i2c_adapter.list
194 i2c_adapter.list
195When: July 2007 187When: July 2007
196Why: Superfluous, given i2c_adapter.class_dev: 188Why: Superfluous, this list duplicates the one maintained by the driver
197 * The "dev" was a stand-in for the physical device node that legacy 189 core.
198 drivers would not have; but now it's almost always present. Any
199 remaining legacy drivers must upgrade (they now trigger warnings).
200 * The "list" duplicates class device children.
201 The delay in removing this is so upgraded lm_sensors and libsensors
202 can get deployed. (Removal causes minor changes in the sysfs layout,
203 notably the location of the adapter type name and parenting the i2c
204 client hardware directly from their controller.)
205Who: Jean Delvare <khali@linux-fr.org>, 190Who: Jean Delvare <khali@linux-fr.org>,
206 David Brownell <dbrownell@users.sourceforge.net> 191 David Brownell <dbrownell@users.sourceforge.net>
207 192
@@ -314,3 +299,27 @@ Why: Code was merged, then submitter immediately disappeared leaving
314Who: David S. Miller <davem@davemloft.net> 299Who: David S. Miller <davem@davemloft.net>
315 300
316--------------------------- 301---------------------------
302
303What: read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer)
304When: December 2007
305Why: These functions are a leftover from 2.4 times. They have several
306 problems:
307 - Duplication of checks that are done in the device driver's
308 interrupt handler
309 - common I/O layer can't do device specific error recovery
310 - device driver can't be notified for conditions happening during
311 execution of the function
312 Device drivers should issue the read device characteristics and read
313 configuration data ccws and do the appropriate error handling
314 themselves.
315Who: Cornelia Huck <cornelia.huck@de.ibm.com>
316
317---------------------------
318
319What: i2c-ixp2000, i2c-ixp4xx and scx200_i2c drivers
320When: September 2007
321Why: Obsolete. The new i2c-gpio driver replaces all hardware-specific
322 I2C-over-GPIO drivers.
323Who: Jean Delvare <khali@linux-fr.org>
324
325---------------------------
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 7aaf09b86a55..3f4b226572e7 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -122,21 +122,22 @@ subdirectory has the entries listed in Table 1-1.
122 122
123Table 1-1: Process specific entries in /proc 123Table 1-1: Process specific entries in /proc
124.............................................................................. 124..............................................................................
125 File Content 125 File Content
126 cmdline Command line arguments 126 clear_refs Clears page referenced bits shown in smaps output
127 cpu Current and last cpu in which it was executed (2.4)(smp) 127 cmdline Command line arguments
128 cwd Link to the current working directory 128 cpu Current and last cpu in which it was executed (2.4)(smp)
129 environ Values of environment variables 129 cwd Link to the current working directory
130 exe Link to the executable of this process 130 environ Values of environment variables
131 fd Directory, which contains all file descriptors 131 exe Link to the executable of this process
132 maps Memory maps to executables and library files (2.4) 132 fd Directory, which contains all file descriptors
133 mem Memory held by this process 133 maps Memory maps to executables and library files (2.4)
134 root Link to the root directory of this process 134 mem Memory held by this process
135 stat Process status 135 root Link to the root directory of this process
136 statm Process memory status information 136 stat Process status
137 status Process status in human readable form 137 statm Process memory status information
138 wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan 138 status Process status in human readable form
139 smaps Extension based on maps, presenting the rss size for each mapped file 139 wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan
140 smaps Extension based on maps, the rss size for each mapped file
140.............................................................................. 141..............................................................................
141 142
142For example, to get the status information of a process, all you have to do is 143For example, to get the status information of a process, all you have to do is
diff --git a/Documentation/i2c/busses/i2c-nforce2 b/Documentation/i2c/busses/i2c-nforce2
index 7f61fbc03f7f..fae3495bcbaf 100644
--- a/Documentation/i2c/busses/i2c-nforce2
+++ b/Documentation/i2c/busses/i2c-nforce2
@@ -9,6 +9,8 @@ Supported adapters:
9 * nForce4 MCP-04 10de:0034 9 * nForce4 MCP-04 10de:0034
10 * nForce4 MCP51 10de:0264 10 * nForce4 MCP51 10de:0264
11 * nForce4 MCP55 10de:0368 11 * nForce4 MCP55 10de:0368
12 * nForce4 MCP61 10de:03EB
13 * nForce4 MCP65 10de:0446
12 14
13Datasheet: not publicly available, but seems to be similar to the 15Datasheet: not publicly available, but seems to be similar to the
14 AMD-8111 SMBus 2.0 adapter. 16 AMD-8111 SMBus 2.0 adapter.
diff --git a/Documentation/i2c/porting-clients b/Documentation/i2c/porting-clients
index ca272b263a92..7bf82c08f6ca 100644
--- a/Documentation/i2c/porting-clients
+++ b/Documentation/i2c/porting-clients
@@ -1,4 +1,4 @@
1Revision 6, 2005-11-20 1Revision 7, 2007-04-19
2Jean Delvare <khali@linux-fr.org> 2Jean Delvare <khali@linux-fr.org>
3Greg KH <greg@kroah.com> 3Greg KH <greg@kroah.com>
4 4
@@ -20,6 +20,10 @@ yours for best results.
20 20
21Technical changes: 21Technical changes:
22 22
23* [Driver type] Any driver that was relying on i2c-isa has to be
24 converted to a proper isa, platform or pci driver. This is not
25 covered by this guide.
26
23* [Includes] Get rid of "version.h" and <linux/i2c-proc.h>. 27* [Includes] Get rid of "version.h" and <linux/i2c-proc.h>.
24 Includes typically look like that: 28 Includes typically look like that:
25 #include <linux/module.h> 29 #include <linux/module.h>
@@ -27,12 +31,10 @@ Technical changes:
27 #include <linux/slab.h> 31 #include <linux/slab.h>
28 #include <linux/jiffies.h> 32 #include <linux/jiffies.h>
29 #include <linux/i2c.h> 33 #include <linux/i2c.h>
30 #include <linux/i2c-isa.h> /* for ISA drivers */
31 #include <linux/hwmon.h> /* for hardware monitoring drivers */ 34 #include <linux/hwmon.h> /* for hardware monitoring drivers */
32 #include <linux/hwmon-sysfs.h> 35 #include <linux/hwmon-sysfs.h>
33 #include <linux/hwmon-vid.h> /* if you need VRM support */ 36 #include <linux/hwmon-vid.h> /* if you need VRM support */
34 #include <linux/err.h> /* for class registration */ 37 #include <linux/err.h> /* for class registration */
35 #include <asm/io.h> /* if you have I/O operations */
36 Please respect this inclusion order. Some extra headers may be 38 Please respect this inclusion order. Some extra headers may be
37 required for a given driver (e.g. "lm75.h"). 39 required for a given driver (e.g. "lm75.h").
38 40
@@ -69,20 +71,16 @@ Technical changes:
69 sensors mailing list <lm-sensors@lm-sensors.org> by providing a 71 sensors mailing list <lm-sensors@lm-sensors.org> by providing a
70 patch to the Documentation/hwmon/sysfs-interface file. 72 patch to the Documentation/hwmon/sysfs-interface file.
71 73
72* [Attach] For I2C drivers, the attach function should make sure 74* [Attach] The attach function should make sure that the adapter's
73 that the adapter's class has I2C_CLASS_HWMON (or whatever class is 75 class has I2C_CLASS_HWMON (or whatever class is suitable for your
74 suitable for your driver), using the following construct: 76 driver), using the following construct:
75 if (!(adapter->class & I2C_CLASS_HWMON)) 77 if (!(adapter->class & I2C_CLASS_HWMON))
76 return 0; 78 return 0;
77 ISA-only drivers of course don't need this.
78 Call i2c_probe() instead of i2c_detect(). 79 Call i2c_probe() instead of i2c_detect().
79 80
80* [Detect] As mentioned earlier, the flags parameter is gone. 81* [Detect] As mentioned earlier, the flags parameter is gone.
81 The type_name and client_name strings are replaced by a single 82 The type_name and client_name strings are replaced by a single
82 name string, which will be filled with a lowercase, short string. 83 name string, which will be filled with a lowercase, short string.
83 In i2c-only drivers, drop the i2c_is_isa_adapter check, it's
84 useless. Same for isa-only drivers, as the test would always be
85 true. Only hybrid drivers (which are quite rare) still need it.
86 The labels used for error paths are reduced to the number needed. 84 The labels used for error paths are reduced to the number needed.
87 It is advised that the labels are given descriptive names such as 85 It is advised that the labels are given descriptive names such as
88 exit and exit_free. Don't forget to properly set err before 86 exit and exit_free. Don't forget to properly set err before
diff --git a/Documentation/i2c/summary b/Documentation/i2c/summary
index 41dde8776791..aea60bf7e8f0 100644
--- a/Documentation/i2c/summary
+++ b/Documentation/i2c/summary
@@ -4,17 +4,23 @@ I2C and SMBus
4============= 4=============
5 5
6I2C (pronounce: I squared C) is a protocol developed by Philips. It is a 6I2C (pronounce: I squared C) is a protocol developed by Philips. It is a
7slow two-wire protocol (10-400 kHz), but it suffices for many types of 7slow two-wire protocol (variable speed, up to 400 kHz), with a high speed
8devices. 8extension (3.4 MHz). It provides an inexpensive bus for connecting many
9types of devices with infrequent or low bandwidth communications needs.
10I2C is widely used with embedded systems. Some systems use variants that
11don't meet branding requirements, and so are not advertised as being I2C.
9 12
10SMBus (System Management Bus) is a subset of the I2C protocol. Many 13SMBus (System Management Bus) is based on the I2C protocol, and is mostly
11modern mainboards have a System Management Bus. There are a lot of 14a subset of I2C protocols and signaling. Many I2C devices will work on an
12devices which can be connected to a SMBus; the most notable are modern 15SMBus, but some SMBus protocols add semantics beyond what is required to
13memory chips with EEPROM memories and chips for hardware monitoring. 16achieve I2C branding. Modern PC mainboards rely on SMBus. The most common
17devices connected through SMBus are RAM modules configured using I2C EEPROMs,
18and hardware monitoring chips.
14 19
15Because the SMBus is just a special case of the generalized I2C bus, we 20Because the SMBus is mostly a subset of the generalized I2C bus, we can
16can simulate the SMBus protocol on plain I2C busses. The reverse is 21use its protocols on many I2C systems. However, there are systems that don't
17regretfully impossible. 22meet both SMBus and I2C electrical constraints; and others which can't
23implement all the common SMBus protocol semantics or messages.
18 24
19 25
20Terminology 26Terminology
@@ -29,6 +35,7 @@ When we talk about I2C, we use the following terms:
29An Algorithm driver contains general code that can be used for a whole class 35An Algorithm driver contains general code that can be used for a whole class
30of I2C adapters. Each specific adapter driver depends on one algorithm 36of I2C adapters. Each specific adapter driver depends on one algorithm
31driver. 37driver.
38
32A Driver driver (yes, this sounds ridiculous, sorry) contains the general 39A Driver driver (yes, this sounds ridiculous, sorry) contains the general
33code to access some type of device. Each detected device gets its own 40code to access some type of device. Each detected device gets its own
34data in the Client structure. Usually, Driver and Client are more closely 41data in the Client structure. Usually, Driver and Client are more closely
@@ -40,6 +47,10 @@ a separate Adapter and Algorithm driver), and drivers for your I2C devices
40in this package. See the lm_sensors project http://www.lm-sensors.nu 47in this package. See the lm_sensors project http://www.lm-sensors.nu
41for device drivers. 48for device drivers.
42 49
50At this time, Linux only operates I2C (or SMBus) in master mode; you can't
51use these APIs to make a Linux system behave as a slave/device, either to
52speak a custom protocol or to emulate some other device.
53
43 54
44Included Bus Drivers 55Included Bus Drivers
45==================== 56====================
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index fbcff96f4ca1..3d8d36b0ad12 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -1,5 +1,5 @@
1This is a small guide for those who want to write kernel drivers for I2C 1This is a small guide for those who want to write kernel drivers for I2C
2or SMBus devices. 2or SMBus devices, using Linux as the protocol host/master (not slave).
3 3
4To set up a driver, you need to do several things. Some are optional, and 4To set up a driver, you need to do several things. Some are optional, and
5some things can be done slightly or completely different. Use this as a 5some things can be done slightly or completely different. Use this as a
@@ -29,8 +29,16 @@ static struct i2c_driver foo_driver = {
29 .driver = { 29 .driver = {
30 .name = "foo", 30 .name = "foo",
31 }, 31 },
32
33 /* iff driver uses driver model ("new style") binding model: */
34 .probe = foo_probe,
35 .remove = foo_remove,
36
37 /* else, driver uses "legacy" binding model: */
32 .attach_adapter = foo_attach_adapter, 38 .attach_adapter = foo_attach_adapter,
33 .detach_client = foo_detach_client, 39 .detach_client = foo_detach_client,
40
41 /* these may be used regardless of the driver binding model */
34 .shutdown = foo_shutdown, /* optional */ 42 .shutdown = foo_shutdown, /* optional */
35 .suspend = foo_suspend, /* optional */ 43 .suspend = foo_suspend, /* optional */
36 .resume = foo_resume, /* optional */ 44 .resume = foo_resume, /* optional */
@@ -40,7 +48,8 @@ static struct i2c_driver foo_driver = {
40The name field is the driver name, and must not contain spaces. It 48The name field is the driver name, and must not contain spaces. It
41should match the module name (if the driver can be compiled as a module), 49should match the module name (if the driver can be compiled as a module),
42although you can use MODULE_ALIAS (passing "foo" in this example) to add 50although you can use MODULE_ALIAS (passing "foo" in this example) to add
43another name for the module. 51another name for the module. If the driver name doesn't match the module
52name, the module won't be automatically loaded (hotplug/coldplug).
44 53
45All other fields are for call-back functions which will be explained 54All other fields are for call-back functions which will be explained
46below. 55below.
@@ -65,16 +74,13 @@ An example structure is below.
65 74
66 struct foo_data { 75 struct foo_data {
67 struct i2c_client client; 76 struct i2c_client client;
68 struct semaphore lock; /* For ISA access in `sensors' drivers. */
69 int sysctl_id; /* To keep the /proc directory entry for
70 `sensors' drivers. */
71 enum chips type; /* To keep the chips type for `sensors' drivers. */ 77 enum chips type; /* To keep the chips type for `sensors' drivers. */
72 78
73 /* Because the i2c bus is slow, it is often useful to cache the read 79 /* Because the i2c bus is slow, it is often useful to cache the read
74 information of a chip for some time (for example, 1 or 2 seconds). 80 information of a chip for some time (for example, 1 or 2 seconds).
75 It depends of course on the device whether this is really worthwhile 81 It depends of course on the device whether this is really worthwhile
76 or even sensible. */ 82 or even sensible. */
77 struct semaphore update_lock; /* When we are reading lots of information, 83 struct mutex update_lock; /* When we are reading lots of information,
78 another process should not update the 84 another process should not update the
79 below information */ 85 below information */
80 char valid; /* != 0 if the following fields are valid. */ 86 char valid; /* != 0 if the following fields are valid. */
@@ -95,8 +101,7 @@ some obscure clients). But we need generic reading and writing routines.
95I have found it useful to define foo_read and foo_write function for this. 101I have found it useful to define foo_read and foo_write function for this.
96For some cases, it will be easier to call the i2c functions directly, 102For some cases, it will be easier to call the i2c functions directly,
97but many chips have some kind of register-value idea that can easily 103but many chips have some kind of register-value idea that can easily
98be encapsulated. Also, some chips have both ISA and I2C interfaces, and 104be encapsulated.
99it useful to abstract from this (only for `sensors' drivers).
100 105
101The below functions are simple examples, and should not be copied 106The below functions are simple examples, and should not be copied
102literally. 107literally.
@@ -119,28 +124,101 @@ literally.
119 return i2c_smbus_write_word_data(client,reg,value); 124 return i2c_smbus_write_word_data(client,reg,value);
120 } 125 }
121 126
122For sensors code, you may have to cope with ISA registers too. Something
123like the below often works. Note the locking!
124
125 int foo_read_value(struct i2c_client *client, u8 reg)
126 {
127 int res;
128 if (i2c_is_isa_client(client)) {
129 down(&(((struct foo_data *) (client->data)) -> lock));
130 outb_p(reg,client->addr + FOO_ADDR_REG_OFFSET);
131 res = inb_p(client->addr + FOO_DATA_REG_OFFSET);
132 up(&(((struct foo_data *) (client->data)) -> lock));
133 return res;
134 } else
135 return i2c_smbus_read_byte_data(client,reg);
136 }
137
138Writing is done the same way.
139
140 127
141Probing and attaching 128Probing and attaching
142===================== 129=====================
143 130
131The Linux I2C stack was originally written to support access to hardware
132monitoring chips on PC motherboards, and thus it embeds some assumptions
133that are more appropriate to SMBus (and PCs) than to I2C. One of these
134assumptions is that most adapters and devices drivers support the SMBUS_QUICK
135protocol to probe device presence. Another is that devices and their drivers
136can be sufficiently configured using only such probe primitives.
137
138As Linux and its I2C stack became more widely used in embedded systems
139and complex components such as DVB adapters, those assumptions became more
140problematic. Drivers for I2C devices that issue interrupts need more (and
141different) configuration information, as do drivers handling chip variants
142that can't be distinguished by protocol probing, or which need some board
143specific information to operate correctly.
144
145Accordingly, the I2C stack now has two models for associating I2C devices
146with their drivers: the original "legacy" model, and a newer one that's
147fully compatible with the Linux 2.6 driver model. These models do not mix,
148since the "legacy" model requires drivers to create "i2c_client" device
149objects after SMBus style probing, while the Linux driver model expects
150drivers to be given such device objects in their probe() routines.
151
152
153Standard Driver Model Binding ("New Style")
154-------------------------------------------
155
156System infrastructure, typically board-specific initialization code or
157boot firmware, reports what I2C devices exist. For example, there may be
158a table, in the kernel or from the boot loader, identifying I2C devices
159and linking them to board-specific configuration information about IRQs
160and other wiring artifacts, chip type, and so on. That could be used to
161create i2c_client objects for each I2C device.
162
163I2C device drivers using this binding model work just like any other
164kind of driver in Linux: they provide a probe() method to bind to
165those devices, and a remove() method to unbind.
166
167 static int foo_probe(struct i2c_client *client);
168 static int foo_remove(struct i2c_client *client);
169
170Remember that the i2c_driver does not create those client handles. The
171handle may be used during foo_probe(). If foo_probe() reports success
172(zero not a negative status code) it may save the handle and use it until
173foo_remove() returns. That binding model is used by most Linux drivers.
174
175Drivers match devices when i2c_client.driver_name and the driver name are
176the same; this approach is used in several other busses that don't have
177device typing support in the hardware. The driver and module name should
178match, so hotplug/coldplug mechanisms will modprobe the driver.
179
180
181Device Creation (Standard driver model)
182---------------------------------------
183
184If you know for a fact that an I2C device is connected to a given I2C bus,
185you can instantiate that device by simply filling an i2c_board_info
186structure with the device address and driver name, and calling
187i2c_new_device(). This will create the device, then the driver core will
188take care of finding the right driver and will call its probe() method.
189If a driver supports different device types, you can specify the type you
190want using the type field. You can also specify an IRQ and platform data
191if needed.
192
193Sometimes you know that a device is connected to a given I2C bus, but you
194don't know the exact address it uses. This happens on TV adapters for
195example, where the same driver supports dozens of slightly different
196models, and I2C device addresses change from one model to the next. In
197that case, you can use the i2c_new_probed_device() variant, which is
198similar to i2c_new_device(), except that it takes an additional list of
199possible I2C addresses to probe. A device is created for the first
200responsive address in the list. If you expect more than one device to be
201present in the address range, simply call i2c_new_probed_device() that
202many times.
203
204The call to i2c_new_device() or i2c_new_probed_device() typically happens
205in the I2C bus driver. You may want to save the returned i2c_client
206reference for later use.
207
208
209Device Deletion (Standard driver model)
210---------------------------------------
211
212Each I2C device which has been created using i2c_new_device() or
213i2c_new_probed_device() can be unregistered by calling
214i2c_unregister_device(). If you don't call it explicitly, it will be
215called automatically before the underlying I2C bus itself is removed, as a
216device can't survive its parent in the device driver model.
217
218
219Legacy Driver Binding Model
220---------------------------
221
144Most i2c devices can be present on several i2c addresses; for some this 222Most i2c devices can be present on several i2c addresses; for some this
145is determined in hardware (by soldering some chip pins to Vcc or Ground), 223is determined in hardware (by soldering some chip pins to Vcc or Ground),
146for others this can be changed in software (by writing to specific client 224for others this can be changed in software (by writing to specific client
@@ -157,13 +235,9 @@ detection algorithm.
157You do not have to use this parameter interface; but don't try to use 235You do not have to use this parameter interface; but don't try to use
158function i2c_probe() if you don't. 236function i2c_probe() if you don't.
159 237
160NOTE: If you want to write a `sensors' driver, the interface is slightly
161 different! See below.
162
163 238
164 239Probing classes (Legacy model)
165Probing classes 240------------------------------
166---------------
167 241
168All parameters are given as lists of unsigned 16-bit integers. Lists are 242All parameters are given as lists of unsigned 16-bit integers. Lists are
169terminated by I2C_CLIENT_END. 243terminated by I2C_CLIENT_END.
@@ -210,8 +284,8 @@ Note that you *have* to call the defined variable `normal_i2c',
210without any prefix! 284without any prefix!
211 285
212 286
213Attaching to an adapter 287Attaching to an adapter (Legacy model)
214----------------------- 288--------------------------------------
215 289
216Whenever a new adapter is inserted, or for all adapters if the driver is 290Whenever a new adapter is inserted, or for all adapters if the driver is
217being registered, the callback attach_adapter() is called. Now is the 291being registered, the callback attach_adapter() is called. Now is the
@@ -237,17 +311,13 @@ them (unless a `force' parameter was used). In addition, addresses that
237are already in use (by some other registered client) are skipped. 311are already in use (by some other registered client) are skipped.
238 312
239 313
240The detect client function 314The detect client function (Legacy model)
241-------------------------- 315-----------------------------------------
242 316
243The detect client function is called by i2c_probe. The `kind' parameter 317The detect client function is called by i2c_probe. The `kind' parameter
244contains -1 for a probed detection, 0 for a forced detection, or a positive 318contains -1 for a probed detection, 0 for a forced detection, or a positive
245number for a forced detection with a chip type forced. 319number for a forced detection with a chip type forced.
246 320
247Below, some things are only needed if this is a `sensors' driver. Those
248parts are between /* SENSORS ONLY START */ and /* SENSORS ONLY END */
249markers.
250
251Returning an error different from -ENODEV in a detect function will cause 321Returning an error different from -ENODEV in a detect function will cause
252the detection to stop: other addresses and adapters won't be scanned. 322the detection to stop: other addresses and adapters won't be scanned.
253This should only be done on fatal or internal errors, such as a memory 323This should only be done on fatal or internal errors, such as a memory
@@ -256,64 +326,20 @@ shortage or i2c_attach_client failing.
256For now, you can ignore the `flags' parameter. It is there for future use. 326For now, you can ignore the `flags' parameter. It is there for future use.
257 327
258 int foo_detect_client(struct i2c_adapter *adapter, int address, 328 int foo_detect_client(struct i2c_adapter *adapter, int address,
259 unsigned short flags, int kind) 329 int kind)
260 { 330 {
261 int err = 0; 331 int err = 0;
262 int i; 332 int i;
263 struct i2c_client *new_client; 333 struct i2c_client *client;
264 struct foo_data *data; 334 struct foo_data *data;
265 const char *client_name = ""; /* For non-`sensors' drivers, put the real 335 const char *name = "";
266 name here! */
267 336
268 /* Let's see whether this adapter can support what we need. 337 /* Let's see whether this adapter can support what we need.
269 Please substitute the things you need here! 338 Please substitute the things you need here! */
270 For `sensors' drivers, add `! is_isa &&' to the if statement */
271 if (!i2c_check_functionality(adapter,I2C_FUNC_SMBUS_WORD_DATA | 339 if (!i2c_check_functionality(adapter,I2C_FUNC_SMBUS_WORD_DATA |
272 I2C_FUNC_SMBUS_WRITE_BYTE)) 340 I2C_FUNC_SMBUS_WRITE_BYTE))
273 goto ERROR0; 341 goto ERROR0;
274 342
275 /* SENSORS ONLY START */
276 const char *type_name = "";
277 int is_isa = i2c_is_isa_adapter(adapter);
278
279 /* Do this only if the chip can additionally be found on the ISA bus
280 (hybrid chip). */
281
282 if (is_isa) {
283
284 /* Discard immediately if this ISA range is already used */
285 /* FIXME: never use check_region(), only request_region() */
286 if (check_region(address,FOO_EXTENT))
287 goto ERROR0;
288
289 /* Probe whether there is anything on this address.
290 Some example code is below, but you will have to adapt this
291 for your own driver */
292
293 if (kind < 0) /* Only if no force parameter was used */ {
294 /* We may need long timeouts at least for some chips. */
295 #define REALLY_SLOW_IO
296 i = inb_p(address + 1);
297 if (inb_p(address + 2) != i)
298 goto ERROR0;
299 if (inb_p(address + 3) != i)
300 goto ERROR0;
301 if (inb_p(address + 7) != i)
302 goto ERROR0;
303 #undef REALLY_SLOW_IO
304
305 /* Let's just hope nothing breaks here */
306 i = inb_p(address + 5) & 0x7f;
307 outb_p(~i & 0x7f,address+5);
308 if ((inb_p(address + 5) & 0x7f) != (~i & 0x7f)) {
309 outb_p(i,address+5);
310 return 0;
311 }
312 }
313 }
314
315 /* SENSORS ONLY END */
316
317 /* OK. For now, we presume we have a valid client. We now create the 343 /* OK. For now, we presume we have a valid client. We now create the
318 client structure, even though we cannot fill it completely yet. 344 client structure, even though we cannot fill it completely yet.
319 But it allows us to access several i2c functions safely */ 345 But it allows us to access several i2c functions safely */
@@ -323,13 +349,12 @@ For now, you can ignore the `flags' parameter. It is there for future use.
323 goto ERROR0; 349 goto ERROR0;
324 } 350 }
325 351
326 new_client = &data->client; 352 client = &data->client;
327 i2c_set_clientdata(new_client, data); 353 i2c_set_clientdata(client, data);
328 354
329 new_client->addr = address; 355 client->addr = address;
330 new_client->adapter = adapter; 356 client->adapter = adapter;
331 new_client->driver = &foo_driver; 357 client->driver = &foo_driver;
332 new_client->flags = 0;
333 358
334 /* Now, we do the remaining detection. If no `force' parameter is used. */ 359 /* Now, we do the remaining detection. If no `force' parameter is used. */
335 360
@@ -337,19 +362,17 @@ For now, you can ignore the `flags' parameter. It is there for future use.
337 parameter was used. */ 362 parameter was used. */
338 if (kind < 0) { 363 if (kind < 0) {
339 /* The below is of course bogus */ 364 /* The below is of course bogus */
340 if (foo_read(new_client,FOO_REG_GENERIC) != FOO_GENERIC_VALUE) 365 if (foo_read(client, FOO_REG_GENERIC) != FOO_GENERIC_VALUE)
341 goto ERROR1; 366 goto ERROR1;
342 } 367 }
343 368
344 /* SENSORS ONLY START */
345
346 /* Next, specific detection. This is especially important for `sensors' 369 /* Next, specific detection. This is especially important for `sensors'
347 devices. */ 370 devices. */
348 371
349 /* Determine the chip type. Not needed if a `force_CHIPTYPE' parameter 372 /* Determine the chip type. Not needed if a `force_CHIPTYPE' parameter
350 was used. */ 373 was used. */
351 if (kind <= 0) { 374 if (kind <= 0) {
352 i = foo_read(new_client,FOO_REG_CHIPTYPE); 375 i = foo_read(client, FOO_REG_CHIPTYPE);
353 if (i == FOO_TYPE_1) 376 if (i == FOO_TYPE_1)
354 kind = chip1; /* As defined in the enum */ 377 kind = chip1; /* As defined in the enum */
355 else if (i == FOO_TYPE_2) 378 else if (i == FOO_TYPE_2)
@@ -363,63 +386,31 @@ For now, you can ignore the `flags' parameter. It is there for future use.
363 386
364 /* Now set the type and chip names */ 387 /* Now set the type and chip names */
365 if (kind == chip1) { 388 if (kind == chip1) {
366 type_name = "chip1"; /* For /proc entry */ 389 name = "chip1";
367 client_name = "CHIP 1";
368 } else if (kind == chip2) { 390 } else if (kind == chip2) {
369 type_name = "chip2"; /* For /proc entry */ 391 name = "chip2";
370 client_name = "CHIP 2";
371 } 392 }
372 393
373 /* Reserve the ISA region */
374 if (is_isa)
375 request_region(address,FOO_EXTENT,type_name);
376
377 /* SENSORS ONLY END */
378
379 /* Fill in the remaining client fields. */ 394 /* Fill in the remaining client fields. */
380 strcpy(new_client->name,client_name); 395 strlcpy(client->name, name, I2C_NAME_SIZE);
381
382 /* SENSORS ONLY BEGIN */
383 data->type = kind; 396 data->type = kind;
384 /* SENSORS ONLY END */ 397 mutex_init(&data->update_lock); /* Only if you use this field */
385
386 data->valid = 0; /* Only if you use this field */
387 init_MUTEX(&data->update_lock); /* Only if you use this field */
388 398
389 /* Any other initializations in data must be done here too. */ 399 /* Any other initializations in data must be done here too. */
390 400
391 /* Tell the i2c layer a new client has arrived */
392 if ((err = i2c_attach_client(new_client)))
393 goto ERROR3;
394
395 /* SENSORS ONLY BEGIN */
396 /* Register a new directory entry with module sensors. See below for
397 the `template' structure. */
398 if ((i = i2c_register_entry(new_client, type_name,
399 foo_dir_table_template,THIS_MODULE)) < 0) {
400 err = i;
401 goto ERROR4;
402 }
403 data->sysctl_id = i;
404
405 /* SENSORS ONLY END */
406
407 /* This function can write default values to the client registers, if 401 /* This function can write default values to the client registers, if
408 needed. */ 402 needed. */
409 foo_init_client(new_client); 403 foo_init_client(client);
404
405 /* Tell the i2c layer a new client has arrived */
406 if ((err = i2c_attach_client(client)))
407 goto ERROR1;
408
410 return 0; 409 return 0;
411 410
412 /* OK, this is not exactly good programming practice, usually. But it is 411 /* OK, this is not exactly good programming practice, usually. But it is
413 very code-efficient in this case. */ 412 very code-efficient in this case. */
414 413
415 ERROR4:
416 i2c_detach_client(new_client);
417 ERROR3:
418 ERROR2:
419 /* SENSORS ONLY START */
420 if (is_isa)
421 release_region(address,FOO_EXTENT);
422 /* SENSORS ONLY END */
423 ERROR1: 414 ERROR1:
424 kfree(data); 415 kfree(data);
425 ERROR0: 416 ERROR0:
@@ -427,8 +418,8 @@ For now, you can ignore the `flags' parameter. It is there for future use.
427 } 418 }
428 419
429 420
430Removing the client 421Removing the client (Legacy model)
431=================== 422==================================
432 423
433The detach_client call back function is called when a client should be 424The detach_client call back function is called when a client should be
434removed. It may actually fail, but only when panicking. This code is 425removed. It may actually fail, but only when panicking. This code is
@@ -436,22 +427,12 @@ much simpler than the attachment code, fortunately!
436 427
437 int foo_detach_client(struct i2c_client *client) 428 int foo_detach_client(struct i2c_client *client)
438 { 429 {
439 int err,i; 430 int err;
440
441 /* SENSORS ONLY START */
442 /* Deregister with the `i2c-proc' module. */
443 i2c_deregister_entry(((struct lm78_data *)(client->data))->sysctl_id);
444 /* SENSORS ONLY END */
445 431
446 /* Try to detach the client from i2c space */ 432 /* Try to detach the client from i2c space */
447 if ((err = i2c_detach_client(client))) 433 if ((err = i2c_detach_client(client)))
448 return err; 434 return err;
449 435
450 /* HYBRID SENSORS CHIP ONLY START */
451 if i2c_is_isa_client(client)
452 release_region(client->addr,LM78_EXTENT);
453 /* HYBRID SENSORS CHIP ONLY END */
454
455 kfree(i2c_get_clientdata(client)); 436 kfree(i2c_get_clientdata(client));
456 return 0; 437 return 0;
457 } 438 }
@@ -464,45 +445,34 @@ When the kernel is booted, or when your foo driver module is inserted,
464you have to do some initializing. Fortunately, just attaching (registering) 445you have to do some initializing. Fortunately, just attaching (registering)
465the driver module is usually enough. 446the driver module is usually enough.
466 447
467 /* Keep track of how far we got in the initialization process. If several
468 things have to initialized, and we fail halfway, only those things
469 have to be cleaned up! */
470 static int __initdata foo_initialized = 0;
471
472 static int __init foo_init(void) 448 static int __init foo_init(void)
473 { 449 {
474 int res; 450 int res;
475 printk("foo version %s (%s)\n",FOO_VERSION,FOO_DATE);
476 451
477 if ((res = i2c_add_driver(&foo_driver))) { 452 if ((res = i2c_add_driver(&foo_driver))) {
478 printk("foo: Driver registration failed, module not inserted.\n"); 453 printk("foo: Driver registration failed, module not inserted.\n");
479 foo_cleanup();
480 return res; 454 return res;
481 } 455 }
482 foo_initialized ++;
483 return 0; 456 return 0;
484 } 457 }
485 458
486 void foo_cleanup(void) 459 static void __exit foo_cleanup(void)
487 { 460 {
488 if (foo_initialized == 1) { 461 i2c_del_driver(&foo_driver);
489 if ((res = i2c_del_driver(&foo_driver))) {
490 printk("foo: Driver registration failed, module not removed.\n");
491 return;
492 }
493 foo_initialized --;
494 }
495 } 462 }
496 463
497 /* Substitute your own name and email address */ 464 /* Substitute your own name and email address */
498 MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl>" 465 MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl>"
499 MODULE_DESCRIPTION("Driver for Barf Inc. Foo I2C devices"); 466 MODULE_DESCRIPTION("Driver for Barf Inc. Foo I2C devices");
500 467
468 /* a few non-GPL license types are also allowed */
469 MODULE_LICENSE("GPL");
470
501 module_init(foo_init); 471 module_init(foo_init);
502 module_exit(foo_cleanup); 472 module_exit(foo_cleanup);
503 473
504Note that some functions are marked by `__init', and some data structures 474Note that some functions are marked by `__init', and some data structures
505by `__init_data'. Hose functions and structures can be removed after 475by `__initdata'. These functions and structures can be removed after
506kernel booting (or module loading) is completed. 476kernel booting (or module loading) is completed.
507 477
508 478
@@ -632,110 +602,7 @@ General purpose routines
632Below all general purpose routines are listed, that were not mentioned 602Below all general purpose routines are listed, that were not mentioned
633before. 603before.
634 604
635 /* This call returns a unique low identifier for each registered adapter, 605 /* This call returns a unique low identifier for each registered adapter.
636 * or -1 if the adapter was not registered.
637 */ 606 */
638 extern int i2c_adapter_id(struct i2c_adapter *adap); 607 extern int i2c_adapter_id(struct i2c_adapter *adap);
639 608
640
641The sensors sysctl/proc interface
642=================================
643
644This section only applies if you write `sensors' drivers.
645
646Each sensors driver creates a directory in /proc/sys/dev/sensors for each
647registered client. The directory is called something like foo-i2c-4-65.
648The sensors module helps you to do this as easily as possible.
649
650The template
651------------
652
653You will need to define a ctl_table template. This template will automatically
654be copied to a newly allocated structure and filled in where necessary when
655you call sensors_register_entry.
656
657First, I will give an example definition.
658 static ctl_table foo_dir_table_template[] = {
659 { FOO_SYSCTL_FUNC1, "func1", NULL, 0, 0644, NULL, &i2c_proc_real,
660 &i2c_sysctl_real,NULL,&foo_func },
661 { FOO_SYSCTL_FUNC2, "func2", NULL, 0, 0644, NULL, &i2c_proc_real,
662 &i2c_sysctl_real,NULL,&foo_func },
663 { FOO_SYSCTL_DATA, "data", NULL, 0, 0644, NULL, &i2c_proc_real,
664 &i2c_sysctl_real,NULL,&foo_data },
665 { 0 }
666 };
667
668In the above example, three entries are defined. They can either be
669accessed through the /proc interface, in the /proc/sys/dev/sensors/*
670directories, as files named func1, func2 and data, or alternatively
671through the sysctl interface, in the appropriate table, with identifiers
672FOO_SYSCTL_FUNC1, FOO_SYSCTL_FUNC2 and FOO_SYSCTL_DATA.
673
674The third, sixth and ninth parameters should always be NULL, and the
675fourth should always be 0. The fifth is the mode of the /proc file;
6760644 is safe, as the file will be owned by root:root.
677
678The seventh and eighth parameters should be &i2c_proc_real and
679&i2c_sysctl_real if you want to export lists of reals (scaled
680integers). You can also use your own function for them, as usual.
681Finally, the last parameter is the call-back to gather the data
682(see below) if you use the *_proc_real functions.
683
684
685Gathering the data
686------------------
687
688The call back functions (foo_func and foo_data in the above example)
689can be called in several ways; the operation parameter determines
690what should be done:
691
692 * If operation == SENSORS_PROC_REAL_INFO, you must return the
693 magnitude (scaling) in nrels_mag;
694 * If operation == SENSORS_PROC_REAL_READ, you must read information
695 from the chip and return it in results. The number of integers
696 to display should be put in nrels_mag;
697 * If operation == SENSORS_PROC_REAL_WRITE, you must write the
698 supplied information to the chip. nrels_mag will contain the number
699 of integers, results the integers themselves.
700
701The *_proc_real functions will display the elements as reals for the
702/proc interface. If you set the magnitude to 2, and supply 345 for
703SENSORS_PROC_REAL_READ, it would display 3.45; and if the user would
704write 45.6 to the /proc file, it would be returned as 4560 for
705SENSORS_PROC_REAL_WRITE. A magnitude may even be negative!
706
707An example function:
708
709 /* FOO_FROM_REG and FOO_TO_REG translate between scaled values and
710 register values. Note the use of the read cache. */
711 void foo_in(struct i2c_client *client, int operation, int ctl_name,
712 int *nrels_mag, long *results)
713 {
714 struct foo_data *data = client->data;
715 int nr = ctl_name - FOO_SYSCTL_FUNC1; /* reduce to 0 upwards */
716
717 if (operation == SENSORS_PROC_REAL_INFO)
718 *nrels_mag = 2;
719 else if (operation == SENSORS_PROC_REAL_READ) {
720 /* Update the readings cache (if necessary) */
721 foo_update_client(client);
722 /* Get the readings from the cache */
723 results[0] = FOO_FROM_REG(data->foo_func_base[nr]);
724 results[1] = FOO_FROM_REG(data->foo_func_more[nr]);
725 results[2] = FOO_FROM_REG(data->foo_func_readonly[nr]);
726 *nrels_mag = 2;
727 } else if (operation == SENSORS_PROC_REAL_WRITE) {
728 if (*nrels_mag >= 1) {
729 /* Update the cache */
730 data->foo_base[nr] = FOO_TO_REG(results[0]);
731 /* Update the chip */
732 foo_write_value(client,FOO_REG_FUNC_BASE(nr),data->foo_base[nr]);
733 }
734 if (*nrels_mag >= 2) {
735 /* Update the cache */
736 data->foo_more[nr] = FOO_TO_REG(results[1]);
737 /* Update the chip */
738 foo_write_value(client,FOO_REG_FUNC_MORE(nr),data->foo_more[nr]);
739 }
740 }
741 }
diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt
index 38fe1f03fb14..6498666ea330 100644
--- a/Documentation/i386/boot.txt
+++ b/Documentation/i386/boot.txt
@@ -2,7 +2,7 @@
2 ---------------------------- 2 ----------------------------
3 3
4 H. Peter Anvin <hpa@zytor.com> 4 H. Peter Anvin <hpa@zytor.com>
5 Last update 2007-01-26 5 Last update 2007-03-06
6 6
7On the i386 platform, the Linux kernel uses a rather complicated boot 7On the i386 platform, the Linux kernel uses a rather complicated boot
8convention. This has evolved partially due to historical aspects, as 8convention. This has evolved partially due to historical aspects, as
@@ -35,9 +35,13 @@ Protocol 2.03: (Kernel 2.4.18-pre1) Explicitly makes the highest possible
35 initrd address available to the bootloader. 35 initrd address available to the bootloader.
36 36
37Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes. 37Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes.
38
38Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable. 39Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable.
39 Introduce relocatable_kernel and kernel_alignment fields. 40 Introduce relocatable_kernel and kernel_alignment fields.
40 41
42Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of
43 the boot command line
44
41 45
42**** MEMORY LAYOUT 46**** MEMORY LAYOUT
43 47
@@ -133,6 +137,8 @@ Offset Proto Name Meaning
133022C/4 2.03+ initrd_addr_max Highest legal initrd address 137022C/4 2.03+ initrd_addr_max Highest legal initrd address
1340230/4 2.05+ kernel_alignment Physical addr alignment required for kernel 1380230/4 2.05+ kernel_alignment Physical addr alignment required for kernel
1350234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not 1390234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not
1400235/3 N/A pad2 Unused
1410238/4 2.06+ cmdline_size Maximum size of the kernel command line
136 142
137(1) For backwards compatibility, if the setup_sects field contains 0, the 143(1) For backwards compatibility, if the setup_sects field contains 0, the
138 real value is 4. 144 real value is 4.
@@ -233,6 +239,12 @@ filled out, however:
233 if your ramdisk is exactly 131072 bytes long and this field is 239 if your ramdisk is exactly 131072 bytes long and this field is
234 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.) 240 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
235 241
242 cmdline_size:
243 The maximum size of the command line without the terminating
244 zero. This means that the command line can contain at most
245 cmdline_size characters. With protocol version 2.05 and
246 earlier, the maximum size was 255.
247
236 248
237**** THE KERNEL COMMAND LINE 249**** THE KERNEL COMMAND LINE
238 250
@@ -241,11 +253,10 @@ loader to communicate with the kernel. Some of its options are also
241relevant to the boot loader itself, see "special command line options" 253relevant to the boot loader itself, see "special command line options"
242below. 254below.
243 255
244The kernel command line is a null-terminated string currently up to 256The kernel command line is a null-terminated string. The maximum
245255 characters long, plus the final null. A string that is too long 257length can be retrieved from the field cmdline_size. Before protocol
246will be automatically truncated by the kernel, a boot loader may allow 258version 2.06, the maximum was 255 characters. A string that is too
247a longer command line to be passed to permit future kernels to extend 259long will be automatically truncated by the kernel.
248this limit.
249 260
250If the boot protocol version is 2.02 or later, the address of the 261If the boot protocol version is 2.02 or later, the address of the
251kernel command line is given by the header field cmd_line_ptr (see 262kernel command line is given by the header field cmd_line_ptr (see
diff --git a/Documentation/ia64/aliasing-test.c b/Documentation/ia64/aliasing-test.c
new file mode 100644
index 000000000000..3153167b41c3
--- /dev/null
+++ b/Documentation/ia64/aliasing-test.c
@@ -0,0 +1,247 @@
1/*
2 * Exercise /dev/mem mmap cases that have been troublesome in the past
3 *
4 * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
5 * Bjorn Helgaas <bjorn.helgaas@hp.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <stdlib.h>
13#include <stdio.h>
14#include <sys/types.h>
15#include <dirent.h>
16#include <fcntl.h>
17#include <fnmatch.h>
18#include <string.h>
19#include <sys/mman.h>
20#include <sys/stat.h>
21#include <unistd.h>
22
23int sum;
24
25int map_mem(char *path, off_t offset, size_t length, int touch)
26{
27 int fd, rc;
28 void *addr;
29 int *c;
30
31 fd = open(path, O_RDWR);
32 if (fd == -1) {
33 perror(path);
34 return -1;
35 }
36
37 addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset);
38 if (addr == MAP_FAILED)
39 return 1;
40
41 if (touch) {
42 c = (int *) addr;
43 while (c < (int *) (offset + length))
44 sum += *c++;
45 }
46
47 rc = munmap(addr, length);
48 if (rc == -1) {
49 perror("munmap");
50 return -1;
51 }
52
53 close(fd);
54 return 0;
55}
56
57int scan_sysfs(char *path, char *file, off_t offset, size_t length, int touch)
58{
59 struct dirent **namelist;
60 char *name, *path2;
61 int i, n, r, rc, result = 0;
62 struct stat buf;
63
64 n = scandir(path, &namelist, 0, alphasort);
65 if (n < 0) {
66 perror("scandir");
67 return -1;
68 }
69
70 for (i = 0; i < n; i++) {
71 name = namelist[i]->d_name;
72
73 if (fnmatch(".", name, 0) == 0)
74 goto skip;
75 if (fnmatch("..", name, 0) == 0)
76 goto skip;
77
78 path2 = malloc(strlen(path) + strlen(name) + 3);
79 strcpy(path2, path);
80 strcat(path2, "/");
81 strcat(path2, name);
82
83 if (fnmatch(file, name, 0) == 0) {
84 rc = map_mem(path2, offset, length, touch);
85 if (rc == 0)
86 fprintf(stderr, "PASS: %s 0x%lx-0x%lx is %s\n", path2, offset, offset + length, touch ? "readable" : "mappable");
87 else if (rc > 0)
88 fprintf(stderr, "PASS: %s 0x%lx-0x%lx not mappable\n", path2, offset, offset + length);
89 else {
90 fprintf(stderr, "FAIL: %s 0x%lx-0x%lx not accessible\n", path2, offset, offset + length);
91 return rc;
92 }
93 } else {
94 r = lstat(path2, &buf);
95 if (r == 0 && S_ISDIR(buf.st_mode)) {
96 rc = scan_sysfs(path2, file, offset, length, touch);
97 if (rc < 0)
98 return rc;
99 }
100 }
101
102 result |= rc;
103 free(path2);
104
105skip:
106 free(namelist[i]);
107 }
108 free(namelist);
109 return rc;
110}
111
112char buf[1024];
113
114int read_rom(char *path)
115{
116 int fd, rc;
117 size_t size = 0;
118
119 fd = open(path, O_RDWR);
120 if (fd == -1) {
121 perror(path);
122 return -1;
123 }
124
125 rc = write(fd, "1", 2);
126 if (rc <= 0) {
127 perror("write");
128 return -1;
129 }
130
131 do {
132 rc = read(fd, buf, sizeof(buf));
133 if (rc > 0)
134 size += rc;
135 } while (rc > 0);
136
137 close(fd);
138 return size;
139}
140
141int scan_rom(char *path, char *file)
142{
143 struct dirent **namelist;
144 char *name, *path2;
145 int i, n, r, rc, result = 0;
146 struct stat buf;
147
148 n = scandir(path, &namelist, 0, alphasort);
149 if (n < 0) {
150 perror("scandir");
151 return -1;
152 }
153
154 for (i = 0; i < n; i++) {
155 name = namelist[i]->d_name;
156
157 if (fnmatch(".", name, 0) == 0)
158 goto skip;
159 if (fnmatch("..", name, 0) == 0)
160 goto skip;
161
162 path2 = malloc(strlen(path) + strlen(name) + 3);
163 strcpy(path2, path);
164 strcat(path2, "/");
165 strcat(path2, name);
166
167 if (fnmatch(file, name, 0) == 0) {
168 rc = read_rom(path2);
169
170 /*
171 * It's OK if the ROM is unreadable. Maybe there
172 * is no ROM, or some other error ocurred. The
173 * important thing is that no MCA happened.
174 */
175 if (rc > 0)
176 fprintf(stderr, "PASS: %s read %ld bytes\n", path2, rc);
177 else {
178 fprintf(stderr, "PASS: %s not readable\n", path2);
179 return rc;
180 }
181 } else {
182 r = lstat(path2, &buf);
183 if (r == 0 && S_ISDIR(buf.st_mode)) {
184 rc = scan_rom(path2, file);
185 if (rc < 0)
186 return rc;
187 }
188 }
189
190 result |= rc;
191 free(path2);
192
193skip:
194 free(namelist[i]);
195 }
196 free(namelist);
197 return rc;
198}
199
200main()
201{
202 int rc;
203
204 if (map_mem("/dev/mem", 0, 0xA0000, 1) == 0)
205 fprintf(stderr, "PASS: /dev/mem 0x0-0xa0000 is readable\n");
206 else
207 fprintf(stderr, "FAIL: /dev/mem 0x0-0xa0000 not accessible\n");
208
209 /*
210 * It's not safe to blindly read the VGA frame buffer. If you know
211 * how to poke the card the right way, it should respond, but it's
212 * not safe in general. Many machines, e.g., Intel chipsets, cover
213 * up a non-responding card by just returning -1, but others will
214 * report the failure as a machine check.
215 */
216 if (map_mem("/dev/mem", 0xA0000, 0x20000, 0) == 0)
217 fprintf(stderr, "PASS: /dev/mem 0xa0000-0xc0000 is mappable\n");
218 else
219 fprintf(stderr, "FAIL: /dev/mem 0xa0000-0xc0000 not accessible\n");
220
221 if (map_mem("/dev/mem", 0xC0000, 0x40000, 1) == 0)
222 fprintf(stderr, "PASS: /dev/mem 0xc0000-0x100000 is readable\n");
223 else
224 fprintf(stderr, "FAIL: /dev/mem 0xc0000-0x100000 not accessible\n");
225
226 /*
227 * Often you can map all the individual pieces above (0-0xA0000,
228 * 0xA0000-0xC0000, and 0xC0000-0x100000), but can't map the whole
229 * thing at once. This is because the individual pieces use different
230 * attributes, and there's no single attribute supported over the
231 * whole region.
232 */
233 rc = map_mem("/dev/mem", 0, 1024*1024, 0);
234 if (rc == 0)
235 fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 is mappable\n");
236 else if (rc > 0)
237 fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 not mappable\n");
238 else
239 fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n");
240
241 scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1);
242 scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0);
243 scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1);
244 scan_sysfs("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0);
245
246 scan_rom("/sys/devices", "rom");
247}
diff --git a/Documentation/ia64/aliasing.txt b/Documentation/ia64/aliasing.txt
index 38f9a52d1820..9a431a7d0f5d 100644
--- a/Documentation/ia64/aliasing.txt
+++ b/Documentation/ia64/aliasing.txt
@@ -112,16 +112,6 @@ POTENTIAL ATTRIBUTE ALIASING CASES
112 112
113 The /dev/mem mmap constraints apply. 113 The /dev/mem mmap constraints apply.
114 114
115 However, since this is for mapping legacy MMIO space, WB access
116 does not make sense. This matters on machines without legacy
117 VGA support: these machines may have WB memory for the entire
118 first megabyte (or even the entire first granule).
119
120 On these machines, we could mmap legacy_mem as WB, which would
121 be safe in terms of attribute aliasing, but X has no way of
122 knowing that it is accessing regular memory, not a frame buffer,
123 so the kernel should fail the mmap rather than doing it with WB.
124
125 read/write of /dev/mem 115 read/write of /dev/mem
126 116
127 This uses copy_from_user(), which implicitly uses a kernel 117 This uses copy_from_user(), which implicitly uses a kernel
@@ -138,14 +128,20 @@ POTENTIAL ATTRIBUTE ALIASING CASES
138 128
139 ioremap() 129 ioremap()
140 130
141 This returns a kernel identity mapping for use inside the 131 This returns a mapping for use inside the kernel.
142 kernel.
143 132
144 If the region is in kern_memmap, we should use the attribute 133 If the region is in kern_memmap, we should use the attribute
145 specified there. Otherwise, if the EFI memory map reports that 134 specified there.
146 the entire granule supports WB, we should use that (granules 135
147 that are partially reserved or occupied by firmware do not appear 136 If the EFI memory map reports that the entire granule supports
148 in kern_memmap). Otherwise, we should use a UC mapping. 137 WB, we should use that (granules that are partially reserved
138 or occupied by firmware do not appear in kern_memmap).
139
140 If the granule contains non-WB memory, but we can cover the
141 region safely with kernel page table mappings, we can use
142 ioremap_page_range() as most other architectures do.
143
144 Failing all of the above, we have to fall back to a UC mapping.
149 145
150PAST PROBLEM CASES 146PAST PROBLEM CASES
151 147
@@ -158,7 +154,7 @@ PAST PROBLEM CASES
158 succeed. It may create either WB or UC user mappings, depending 154 succeed. It may create either WB or UC user mappings, depending
159 on whether the region is in kern_memmap or the EFI memory map. 155 on whether the region is in kern_memmap or the EFI memory map.
160 156
161 mmap of 0x0-0xA0000 /dev/mem by "hwinfo" on HP sx1000 with VGA enabled 157 mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled
162 158
163 See https://bugzilla.novell.com/show_bug.cgi?id=140858. 159 See https://bugzilla.novell.com/show_bug.cgi?id=140858.
164 160
@@ -171,28 +167,25 @@ PAST PROBLEM CASES
171 so it is safe to use WB mappings. 167 so it is safe to use WB mappings.
172 168
173 The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000, 169 The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000,
174 which will use a granule-sized UC mapping covering 0-0xFFFFF. This 170 which uses a granule-sized UC mapping. This granule will cover some
175 granule covers some WB-only memory, but since UC is non-speculative, 171 WB-only memory, but since UC is non-speculative, the processor will
176 the processor will never generate an uncacheable reference to the 172 never generate an uncacheable reference to the WB-only areas unless
177 WB-only areas unless the driver explicitly touches them. 173 the driver explicitly touches them.
178 174
179 mmap of 0x0-0xFFFFF legacy_mem by "X" 175 mmap of 0x0-0xFFFFF legacy_mem by "X"
180 176
181 If the EFI memory map reports this entire range as WB, there 177 If the EFI memory map reports that the entire range supports the
182 is no VGA MMIO hole, and the mmap should fail or be done with 178 same attributes, we can allow the mmap (and we will prefer WB if
183 a WB mapping. 179 supported, as is the case with HP sx[12]000 machines with VGA
180 disabled).
184 181
185 There's no easy way for X to determine whether the 0xA0000-0xBFFFF 182 If EFI reports the range as partly WB and partly UC (as on sx[12]000
186 region is a frame buffer or just memory, so I think it's best to 183 machines with VGA enabled), we must fail the mmap because there's no
187 just fail this mmap request rather than using a WB mapping. As 184 safe attribute to use.
188 far as I know, there's no need to map legacy_mem with WB
189 mappings.
190 185
191 Otherwise, a UC mapping of the entire region is probably safe. 186 If EFI reports some of the range but not all (as on Intel firmware
192 The VGA hole means the region will not be in kern_memmap. The 187 that doesn't report the VGA frame buffer at all), we should fail the
193 HP sx1000 chipset doesn't support UC access to the memory surrounding 188 mmap and force the user to map just the specific region of interest.
194 the VGA hole, but X doesn't need that area anyway and should not
195 reference it.
196 189
197 mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled 190 mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled
198 191
@@ -202,6 +195,16 @@ PAST PROBLEM CASES
202 This is a special case of the previous case, and the mmap should 195 This is a special case of the previous case, and the mmap should
203 fail for the same reason as above. 196 fail for the same reason as above.
204 197
198 read of /sys/devices/.../rom
199
200 For VGA devices, this may cause an ioremap() of 0xC0000. This
201 used to be done with a UC mapping, because the VGA frame buffer
202 at 0xA0000 prevents use of a WB granule. The UC mapping causes
203 an MCA on HP sx[12]000 chipsets.
204
205 We should use WB page table mappings to avoid covering the VGA
206 frame buffer.
207
205NOTES 208NOTES
206 209
207 [1] SDM rev 2.2, vol 2, sec 4.4.1. 210 [1] SDM rev 2.2, vol 2, sec 4.4.1.
diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt
new file mode 100644
index 000000000000..6449a7090dbb
--- /dev/null
+++ b/Documentation/ia64/err_inject.txt
@@ -0,0 +1,1068 @@
1
2IPF Machine Check (MC) error inject tool
3========================================
4
5IPF Machine Check (MC) error inject tool is used to inject MC
6errors from Linux. The tool is a test bed for IPF MC work flow including
7hardware correctable error handling, OS recoverable error handling, MC
8event logging, etc.
9
10The tool includes two parts: a kernel driver and a user application
11sample. The driver provides interface to PAL to inject error
12and query error injection capabilities. The driver code is in
13arch/ia64/kernel/err_inject.c. The application sample (shown below)
14provides a combination of various errors and calls the driver's interface
15(sysfs interface) to inject errors or query error injection capabilities.
16
17The tool can be used to test Intel IPF machine MC handling capabilities.
18It's especially useful for people who can not access hardware MC injection
19tool to inject error. It's also very useful to integrate with other
20software test suits to do stressful testing on IPF.
21
22Below is a sample application as part of the whole tool. The sample
23can be used as a working test tool. Or it can be expanded to include
24more features. It also can be a integrated into a libary or other user
25application to have more thorough test.
26
27The sample application takes err.conf as error configuation input. Gcc
28compiles the code. After you install err_inject driver, you can run
29this sample application to inject errors.
30
31Errata: Itanium 2 Processors Specification Update lists some errata against
32the pal_mc_error_inject PAL procedure. The following err.conf has been tested
33on latest Montecito PAL.
34
35err.conf:
36
37#This is configuration file for err_inject_tool.
38#The format of the each line is:
39#cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer
40#where
41# cpu: logical cpu number the error will be inject in.
42# loop: times the error will be injected.
43# interval: In second. every so often one error is injected.
44# err_type_info, err_struct_info: PAL parameters.
45#
46#Note: All values are hex w/o or w/ 0x prefix.
47
48
49#On cpu2, inject only total 0x10 errors, interval 5 seconds
50#corrected, data cache, hier-2, physical addr(assigned by tool code).
51#working on Montecito latest PAL.
522, 10, 5, 4101, 95
53
54#On cpu4, inject and consume total 0x10 errors, interval 5 seconds
55#corrected, data cache, hier-2, physical addr(assigned by tool code).
56#working on Montecito latest PAL.
574, 10, 5, 4109, 95
58
59#On cpu15, inject and consume total 0x10 errors, interval 5 seconds
60#recoverable, DTR0, hier-2.
61#working on Montecito latest PAL.
620xf, 0x10, 5, 4249, 15
63
64The sample application source code:
65
66err_injection_tool.c:
67
68/*
69 * This program is free software; you can redistribute it and/or modify
70 * it under the terms of the GNU General Public License as published by
71 * the Free Software Foundation; either version 2 of the License, or
72 * (at your option) any later version.
73 *
74 * This program is distributed in the hope that it will be useful, but
75 * WITHOUT ANY WARRANTY; without even the implied warranty of
76 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
77 * NON INFRINGEMENT. See the GNU General Public License for more
78 * details.
79 *
80 * You should have received a copy of the GNU General Public License
81 * along with this program; if not, write to the Free Software
82 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
83 *
84 * Copyright (C) 2006 Intel Co
85 * Fenghua Yu <fenghua.yu@intel.com>
86 *
87 */
88#include <sys/types.h>
89#include <sys/stat.h>
90#include <fcntl.h>
91#include <stdio.h>
92#include <sched.h>
93#include <unistd.h>
94#include <stdlib.h>
95#include <stdarg.h>
96#include <string.h>
97#include <errno.h>
98#include <time.h>
99#include <sys/ipc.h>
100#include <sys/sem.h>
101#include <sys/wait.h>
102#include <sys/mman.h>
103#include <sys/shm.h>
104
105#define MAX_FN_SIZE 256
106#define MAX_BUF_SIZE 256
107#define DATA_BUF_SIZE 256
108#define NR_CPUS 512
109#define MAX_TASK_NUM 2048
110#define MIN_INTERVAL 5 // seconds
111#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte.
112#define PARA_FIELD_NUM 5
113#define MASK_SIZE (NR_CPUS/64)
114#define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/"
115
116int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask);
117
118int verbose;
119#define vbprintf if (verbose) printf
120
121int log_info(int cpu, const char *fmt, ...)
122{
123 FILE *log;
124 char fn[MAX_FN_SIZE];
125 char buf[MAX_BUF_SIZE];
126 va_list args;
127
128 sprintf(fn, "%d.log", cpu);
129 log=fopen(fn, "a+");
130 if (log==NULL) {
131 perror("Error open:");
132 return -1;
133 }
134
135 va_start(args, fmt);
136 vprintf(fmt, args);
137 memset(buf, 0, MAX_BUF_SIZE);
138 vsprintf(buf, fmt, args);
139 va_end(args);
140
141 fwrite(buf, sizeof(buf), 1, log);
142 fclose(log);
143
144 return 0;
145}
146
147typedef unsigned long u64;
148typedef unsigned int u32;
149
150typedef union err_type_info_u {
151 struct {
152 u64 mode : 3, /* 0-2 */
153 err_inj : 3, /* 3-5 */
154 err_sev : 2, /* 6-7 */
155 err_struct : 5, /* 8-12 */
156 struct_hier : 3, /* 13-15 */
157 reserved : 48; /* 16-63 */
158 } err_type_info_u;
159 u64 err_type_info;
160} err_type_info_t;
161
162typedef union err_struct_info_u {
163 struct {
164 u64 siv : 1, /* 0 */
165 c_t : 2, /* 1-2 */
166 cl_p : 3, /* 3-5 */
167 cl_id : 3, /* 6-8 */
168 cl_dp : 1, /* 9 */
169 reserved1 : 22, /* 10-31 */
170 tiv : 1, /* 32 */
171 trigger : 4, /* 33-36 */
172 trigger_pl : 3, /* 37-39 */
173 reserved2 : 24; /* 40-63 */
174 } err_struct_info_cache;
175 struct {
176 u64 siv : 1, /* 0 */
177 tt : 2, /* 1-2 */
178 tc_tr : 2, /* 3-4 */
179 tr_slot : 8, /* 5-12 */
180 reserved1 : 19, /* 13-31 */
181 tiv : 1, /* 32 */
182 trigger : 4, /* 33-36 */
183 trigger_pl : 3, /* 37-39 */
184 reserved2 : 24; /* 40-63 */
185 } err_struct_info_tlb;
186 struct {
187 u64 siv : 1, /* 0 */
188 regfile_id : 4, /* 1-4 */
189 reg_num : 7, /* 5-11 */
190 reserved1 : 20, /* 12-31 */
191 tiv : 1, /* 32 */
192 trigger : 4, /* 33-36 */
193 trigger_pl : 3, /* 37-39 */
194 reserved2 : 24; /* 40-63 */
195 } err_struct_info_register;
196 struct {
197 u64 reserved;
198 } err_struct_info_bus_processor_interconnect;
199 u64 err_struct_info;
200} err_struct_info_t;
201
202typedef union err_data_buffer_u {
203 struct {
204 u64 trigger_addr; /* 0-63 */
205 u64 inj_addr; /* 64-127 */
206 u64 way : 5, /* 128-132 */
207 index : 20, /* 133-152 */
208 : 39; /* 153-191 */
209 } err_data_buffer_cache;
210 struct {
211 u64 trigger_addr; /* 0-63 */
212 u64 inj_addr; /* 64-127 */
213 u64 way : 5, /* 128-132 */
214 index : 20, /* 133-152 */
215 reserved : 39; /* 153-191 */
216 } err_data_buffer_tlb;
217 struct {
218 u64 trigger_addr; /* 0-63 */
219 } err_data_buffer_register;
220 struct {
221 u64 reserved; /* 0-63 */
222 } err_data_buffer_bus_processor_interconnect;
223 u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
224} err_data_buffer_t;
225
226typedef union capabilities_u {
227 struct {
228 u64 i : 1,
229 d : 1,
230 rv : 1,
231 tag : 1,
232 data : 1,
233 mesi : 1,
234 dp : 1,
235 reserved1 : 3,
236 pa : 1,
237 va : 1,
238 wi : 1,
239 reserved2 : 20,
240 trigger : 1,
241 trigger_pl : 1,
242 reserved3 : 30;
243 } capabilities_cache;
244 struct {
245 u64 d : 1,
246 i : 1,
247 rv : 1,
248 tc : 1,
249 tr : 1,
250 reserved1 : 27,
251 trigger : 1,
252 trigger_pl : 1,
253 reserved2 : 30;
254 } capabilities_tlb;
255 struct {
256 u64 gr_b0 : 1,
257 gr_b1 : 1,
258 fr : 1,
259 br : 1,
260 pr : 1,
261 ar : 1,
262 cr : 1,
263 rr : 1,
264 pkr : 1,
265 dbr : 1,
266 ibr : 1,
267 pmc : 1,
268 pmd : 1,
269 reserved1 : 3,
270 regnum : 1,
271 reserved2 : 15,
272 trigger : 1,
273 trigger_pl : 1,
274 reserved3 : 30;
275 } capabilities_register;
276 struct {
277 u64 reserved;
278 } capabilities_bus_processor_interconnect;
279} capabilities_t;
280
281typedef struct resources_s {
282 u64 ibr0 : 1,
283 ibr2 : 1,
284 ibr4 : 1,
285 ibr6 : 1,
286 dbr0 : 1,
287 dbr2 : 1,
288 dbr4 : 1,
289 dbr6 : 1,
290 reserved : 48;
291} resources_t;
292
293
294long get_page_size(void)
295{
296 long page_size=sysconf(_SC_PAGESIZE);
297 return page_size;
298}
299
300#define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size())
301#define SHM_SIZE (2*PAGE_SIZE*NR_CPUS)
302#define SHM_VA 0x2000000100000000
303
304int shmid;
305void *shmaddr;
306
307int create_shm(void)
308{
309 key_t key;
310 char fn[MAX_FN_SIZE];
311
312 /* cpu0 is always existing */
313 sprintf(fn, PATH_FORMAT, 0);
314 if ((key = ftok(fn, 's')) == -1) {
315 perror("ftok");
316 return -1;
317 }
318
319 shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT);
320 if (shmid == -1) {
321 if (errno==EEXIST) {
322 shmid = shmget(key, SHM_SIZE, 0);
323 if (shmid == -1) {
324 perror("shmget");
325 return -1;
326 }
327 }
328 else {
329 perror("shmget");
330 return -1;
331 }
332 }
333 vbprintf("shmid=%d", shmid);
334
335 /* connect to the segment: */
336 shmaddr = shmat(shmid, (void *)SHM_VA, 0);
337 if (shmaddr == (void*)-1) {
338 perror("shmat");
339 return -1;
340 }
341
342 memset(shmaddr, 0, SHM_SIZE);
343 mlock(shmaddr, SHM_SIZE);
344
345 return 0;
346}
347
348int free_shm()
349{
350 munlock(shmaddr, SHM_SIZE);
351 shmdt(shmaddr);
352 semctl(shmid, 0, IPC_RMID);
353
354 return 0;
355}
356
357#ifdef _SEM_SEMUN_UNDEFINED
358union semun
359{
360 int val;
361 struct semid_ds *buf;
362 unsigned short int *array;
363 struct seminfo *__buf;
364};
365#endif
366
367u32 mode=1; /* 1: physical mode; 2: virtual mode. */
368int one_lock=1;
369key_t key[NR_CPUS];
370int semid[NR_CPUS];
371
372int create_sem(int cpu)
373{
374 union semun arg;
375 char fn[MAX_FN_SIZE];
376 int sid;
377
378 sprintf(fn, PATH_FORMAT, cpu);
379 sprintf(fn, "%s/%s", fn, "err_type_info");
380 if ((key[cpu] = ftok(fn, 'e')) == -1) {
381 perror("ftok");
382 return -1;
383 }
384
385 if (semid[cpu]!=0)
386 return 0;
387
388 /* clear old semaphore */
389 if ((sid = semget(key[cpu], 1, 0)) != -1)
390 semctl(sid, 0, IPC_RMID);
391
392 /* get one semaphore */
393 if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) {
394 perror("semget");
395 printf("Please remove semaphore with key=0x%lx, then run the tool.\n",
396 (u64)key[cpu]);
397 return -1;
398 }
399
400 vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu,
401 (u64)key[cpu]);
402 /* initialize the semaphore to 1: */
403 arg.val = 1;
404 if (semctl(semid[cpu], 0, SETVAL, arg) == -1) {
405 perror("semctl");
406 return -1;
407 }
408
409 return 0;
410}
411
412static int lock(int cpu)
413{
414 struct sembuf lock;
415
416 lock.sem_num = cpu;
417 lock.sem_op = 1;
418 semop(semid[cpu], &lock, 1);
419
420 return 0;
421}
422
423static int unlock(int cpu)
424{
425 struct sembuf unlock;
426
427 unlock.sem_num = cpu;
428 unlock.sem_op = -1;
429 semop(semid[cpu], &unlock, 1);
430
431 return 0;
432}
433
434void free_sem(int cpu)
435{
436 semctl(semid[cpu], 0, IPC_RMID);
437}
438
439int wr_multi(char *fn, unsigned long *data, int size)
440{
441 int fd;
442 char buf[MAX_BUF_SIZE];
443 int ret;
444
445 if (size==1)
446 sprintf(buf, "%lx", *data);
447 else if (size==3)
448 sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]);
449 else {
450 fprintf(stderr,"write to file with wrong size!\n");
451 return -1;
452 }
453
454 fd=open(fn, O_RDWR);
455 if (!fd) {
456 perror("Error:");
457 return -1;
458 }
459 ret=write(fd, buf, sizeof(buf));
460 close(fd);
461 return ret;
462}
463
464int wr(char *fn, unsigned long data)
465{
466 return wr_multi(fn, &data, 1);
467}
468
469int rd(char *fn, unsigned long *data)
470{
471 int fd;
472 char buf[MAX_BUF_SIZE];
473
474 fd=open(fn, O_RDONLY);
475 if (fd<0) {
476 perror("Error:");
477 return -1;
478 }
479 read(fd, buf, MAX_BUF_SIZE);
480 *data=strtoul(buf, NULL, 16);
481 close(fd);
482 return 0;
483}
484
485int rd_status(char *path, int *status)
486{
487 char fn[MAX_FN_SIZE];
488 sprintf(fn, "%s/status", path);
489 if (rd(fn, (u64*)status)<0) {
490 perror("status reading error.\n");
491 return -1;
492 }
493
494 return 0;
495}
496
497int rd_capabilities(char *path, u64 *capabilities)
498{
499 char fn[MAX_FN_SIZE];
500 sprintf(fn, "%s/capabilities", path);
501 if (rd(fn, capabilities)<0) {
502 perror("capabilities reading error.\n");
503 return -1;
504 }
505
506 return 0;
507}
508
509int rd_all(char *path)
510{
511 unsigned long err_type_info, err_struct_info, err_data_buffer;
512 int status;
513 unsigned long capabilities, resources;
514 char fn[MAX_FN_SIZE];
515
516 sprintf(fn, "%s/err_type_info", path);
517 if (rd(fn, &err_type_info)<0) {
518 perror("err_type_info reading error.\n");
519 return -1;
520 }
521 printf("err_type_info=%lx\n", err_type_info);
522
523 sprintf(fn, "%s/err_struct_info", path);
524 if (rd(fn, &err_struct_info)<0) {
525 perror("err_struct_info reading error.\n");
526 return -1;
527 }
528 printf("err_struct_info=%lx\n", err_struct_info);
529
530 sprintf(fn, "%s/err_data_buffer", path);
531 if (rd(fn, &err_data_buffer)<0) {
532 perror("err_data_buffer reading error.\n");
533 return -1;
534 }
535 printf("err_data_buffer=%lx\n", err_data_buffer);
536
537 sprintf(fn, "%s/status", path);
538 if (rd("status", (u64*)&status)<0) {
539 perror("status reading error.\n");
540 return -1;
541 }
542 printf("status=%d\n", status);
543
544 sprintf(fn, "%s/capabilities", path);
545 if (rd(fn,&capabilities)<0) {
546 perror("capabilities reading error.\n");
547 return -1;
548 }
549 printf("capabilities=%lx\n", capabilities);
550
551 sprintf(fn, "%s/resources", path);
552 if (rd(fn, &resources)<0) {
553 perror("resources reading error.\n");
554 return -1;
555 }
556 printf("resources=%lx\n", resources);
557
558 return 0;
559}
560
561int query_capabilities(char *path, err_type_info_t err_type_info,
562 u64 *capabilities)
563{
564 char fn[MAX_FN_SIZE];
565 err_struct_info_t err_struct_info;
566 err_data_buffer_t err_data_buffer;
567
568 err_struct_info.err_struct_info=0;
569 memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8);
570
571 sprintf(fn, "%s/err_type_info", path);
572 wr(fn, err_type_info.err_type_info);
573 sprintf(fn, "%s/err_struct_info", path);
574 wr(fn, 0x0);
575 sprintf(fn, "%s/err_data_buffer", path);
576 wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
577
578 // Fire pal_mc_error_inject procedure.
579 sprintf(fn, "%s/call_start", path);
580 wr(fn, mode);
581
582 if (rd_capabilities(path, capabilities)<0)
583 return -1;
584
585 return 0;
586}
587
588int query_all_capabilities()
589{
590 int status;
591 err_type_info_t err_type_info;
592 int err_sev, err_struct, struct_hier;
593 int cap=0;
594 u64 capabilities;
595 char path[MAX_FN_SIZE];
596
597 err_type_info.err_type_info=0; // Initial
598 err_type_info.err_type_info_u.mode=0; // Query mode;
599 err_type_info.err_type_info_u.err_inj=0;
600
601 printf("All capabilities implemented in pal_mc_error_inject:\n");
602 sprintf(path, PATH_FORMAT ,0);
603 for (err_sev=0;err_sev<3;err_sev++)
604 for (err_struct=0;err_struct<5;err_struct++)
605 for (struct_hier=0;struct_hier<5;struct_hier++)
606 {
607 status=-1;
608 capabilities=0;
609 err_type_info.err_type_info_u.err_sev=err_sev;
610 err_type_info.err_type_info_u.err_struct=err_struct;
611 err_type_info.err_type_info_u.struct_hier=struct_hier;
612
613 if (query_capabilities(path, err_type_info, &capabilities)<0)
614 continue;
615
616 if (rd_status(path, &status)<0)
617 continue;
618
619 if (status==0) {
620 cap=1;
621 printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ",
622 err_sev, err_struct, struct_hier);
623 printf("capabilities 0x%lx\n", capabilities);
624 }
625 }
626 if (!cap) {
627 printf("No capabilities supported.\n");
628 return 0;
629 }
630
631 return 0;
632}
633
634int err_inject(int cpu, char *path, err_type_info_t err_type_info,
635 err_struct_info_t err_struct_info,
636 err_data_buffer_t err_data_buffer)
637{
638 int status;
639 char fn[MAX_FN_SIZE];
640
641 log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ",
642 err_type_info.err_type_info,
643 err_struct_info.err_struct_info);
644 log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n",
645 err_data_buffer.err_data_buffer[0],
646 err_data_buffer.err_data_buffer[1],
647 err_data_buffer.err_data_buffer[2]);
648 sprintf(fn, "%s/err_type_info", path);
649 wr(fn, err_type_info.err_type_info);
650 sprintf(fn, "%s/err_struct_info", path);
651 wr(fn, err_struct_info.err_struct_info);
652 sprintf(fn, "%s/err_data_buffer", path);
653 wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
654
655 // Fire pal_mc_error_inject procedure.
656 sprintf(fn, "%s/call_start", path);
657 wr(fn,mode);
658
659 if (rd_status(path, &status)<0) {
660 vbprintf("fail: read status\n");
661 return -100;
662 }
663
664 if (status!=0) {
665 log_info(cpu, "fail: status=%d\n", status);
666 return status;
667 }
668
669 return status;
670}
671
672static int construct_data_buf(char *path, err_type_info_t err_type_info,
673 err_struct_info_t err_struct_info,
674 err_data_buffer_t *err_data_buffer,
675 void *va1)
676{
677 char fn[MAX_FN_SIZE];
678 u64 virt_addr=0, phys_addr=0;
679
680 vbprintf("va1=%lx\n", (u64)va1);
681 memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8);
682
683 switch (err_type_info.err_type_info_u.err_struct) {
684 case 1: // Cache
685 switch (err_struct_info.err_struct_info_cache.cl_id) {
686 case 1: //Virtual addr
687 err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1;
688 break;
689 case 2: //Phys addr
690 sprintf(fn, "%s/virtual_to_phys", path);
691 virt_addr=(u64)va1;
692 if (wr(fn,virt_addr)<0)
693 return -1;
694 rd(fn, &phys_addr);
695 err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr;
696 break;
697 default:
698 printf("Not supported cl_id\n");
699 break;
700 }
701 break;
702 case 2: // TLB
703 break;
704 case 3: // Register file
705 break;
706 case 4: // Bus/system interconnect
707 default:
708 printf("Not supported err_struct\n");
709 break;
710 }
711
712 return 0;
713}
714
715typedef struct {
716 u64 cpu;
717 u64 loop;
718 u64 interval;
719 u64 err_type_info;
720 u64 err_struct_info;
721 u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
722} parameters_t;
723
724parameters_t line_para;
725int para;
726
727static int empty_data_buffer(u64 *err_data_buffer)
728{
729 int empty=1;
730 int i;
731
732 for (i=0;i<ERR_DATA_BUFFER_SIZE; i++)
733 if (err_data_buffer[i]!=-1)
734 empty=0;
735
736 return empty;
737}
738
739int err_inj()
740{
741 err_type_info_t err_type_info;
742 err_struct_info_t err_struct_info;
743 err_data_buffer_t err_data_buffer;
744 int count;
745 FILE *fp;
746 unsigned long cpu, loop, interval, err_type_info_conf, err_struct_info_conf;
747 u64 err_data_buffer_conf[ERR_DATA_BUFFER_SIZE];
748 int num;
749 int i;
750 char path[MAX_FN_SIZE];
751 parameters_t parameters[MAX_TASK_NUM]={};
752 pid_t child_pid[MAX_TASK_NUM];
753 time_t current_time;
754 int status;
755
756 if (!para) {
757 fp=fopen("err.conf", "r");
758 if (fp==NULL) {
759 perror("Error open err.conf");
760 return -1;
761 }
762
763 num=0;
764 while (!feof(fp)) {
765 char buf[256];
766 memset(buf,0,256);
767 fgets(buf, 256, fp);
768 count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
769 &cpu, &loop, &interval,&err_type_info_conf,
770 &err_struct_info_conf,
771 &err_data_buffer_conf[0],
772 &err_data_buffer_conf[1],
773 &err_data_buffer_conf[2]);
774 if (count!=PARA_FIELD_NUM+3) {
775 err_data_buffer_conf[0]=-1;
776 err_data_buffer_conf[1]=-1;
777 err_data_buffer_conf[2]=-1;
778 count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx\n",
779 &cpu, &loop, &interval,&err_type_info_conf,
780 &err_struct_info_conf);
781 if (count!=PARA_FIELD_NUM)
782 continue;
783 }
784
785 parameters[num].cpu=cpu;
786 parameters[num].loop=loop;
787 parameters[num].interval= interval>MIN_INTERVAL
788 ?interval:MIN_INTERVAL;
789 parameters[num].err_type_info=err_type_info_conf;
790 parameters[num].err_struct_info=err_struct_info_conf;
791 memcpy(parameters[num++].err_data_buffer,
792 err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ;
793
794 if (num>=MAX_TASK_NUM)
795 break;
796 }
797 }
798 else {
799 parameters[0].cpu=line_para.cpu;
800 parameters[0].loop=line_para.loop;
801 parameters[0].interval= line_para.interval>MIN_INTERVAL
802 ?line_para.interval:MIN_INTERVAL;
803 parameters[0].err_type_info=line_para.err_type_info;
804 parameters[0].err_struct_info=line_para.err_struct_info;
805 memcpy(parameters[0].err_data_buffer,
806 line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ;
807
808 num=1;
809 }
810
811 /* Create semaphore: If one_lock, one semaphore for all processors.
812 Otherwise, one sempaphore for each processor. */
813 if (one_lock) {
814 if (create_sem(0)) {
815 printf("Can not create semaphore...exit\n");
816 free_sem(0);
817 return -1;
818 }
819 }
820 else {
821 for (i=0;i<num;i++) {
822 if (create_sem(parameters[i].cpu)) {
823 printf("Can not create semaphore for cpu%d...exit\n",i);
824 free_sem(parameters[num].cpu);
825 return -1;
826 }
827 }
828 }
829
830 /* Create a shm segment which will be used to inject/consume errors on.*/
831 if (create_shm()==-1) {
832 printf("Error to create shm...exit\n");
833 return -1;
834 }
835
836 for (i=0;i<num;i++) {
837 pid_t pid;
838
839 current_time=time(NULL);
840 log_info(parameters[i].cpu, "\nBegine at %s", ctime(&current_time));
841 log_info(parameters[i].cpu, "Configurations:\n");
842 log_info(parameters[i].cpu,"On cpu%ld: loop=%lx, interval=%lx(s)",
843 parameters[i].cpu,
844 parameters[i].loop,
845 parameters[i].interval);
846 log_info(parameters[i].cpu," err_type_info=%lx,err_struct_info=%lx\n",
847 parameters[i].err_type_info,
848 parameters[i].err_struct_info);
849
850 sprintf(path, PATH_FORMAT, (int)parameters[i].cpu);
851 err_type_info.err_type_info=parameters[i].err_type_info;
852 err_struct_info.err_struct_info=parameters[i].err_struct_info;
853 memcpy(err_data_buffer.err_data_buffer,
854 parameters[i].err_data_buffer,
855 ERR_DATA_BUFFER_SIZE*8);
856
857 pid=fork();
858 if (pid==0) {
859 unsigned long mask[MASK_SIZE];
860 int j, k;
861
862 void *va1, *va2;
863
864 /* Allocate two memory areas va1 and va2 in shm */
865 va1=shmaddr+parameters[i].cpu*PAGE_SIZE;
866 va2=shmaddr+parameters[i].cpu*PAGE_SIZE+PAGE_SIZE;
867
868 vbprintf("va1=%lx, va2=%lx\n", (u64)va1, (u64)va2);
869 memset(va1, 0x1, PAGE_SIZE);
870 memset(va2, 0x2, PAGE_SIZE);
871
872 if (empty_data_buffer(err_data_buffer.err_data_buffer))
873 /* If not specified yet, construct data buffer
874 * with va1
875 */
876 construct_data_buf(path, err_type_info,
877 err_struct_info, &err_data_buffer,va1);
878
879 for (j=0;j<MASK_SIZE;j++)
880 mask[j]=0;
881
882 cpu=parameters[i].cpu;
883 k = cpu%64;
884 j = cpu/64;
885 mask[j]=1<<k;
886
887 if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) {
888 perror("Error sched_setaffinity:");
889 return -1;
890 }
891
892 for (j=0; j<parameters[i].loop; j++) {
893 log_info(parameters[i].cpu,"Injection ");
894 log_info(parameters[i].cpu,"on cpu%ld: #%d/%ld ",
895
896 parameters[i].cpu,j+1, parameters[i].loop);
897
898 /* Hold the lock */
899 if (one_lock)
900 lock(0);
901 else
902 /* Hold lock on this cpu */
903 lock(parameters[i].cpu);
904
905 if ((status=err_inject(parameters[i].cpu,
906 path, err_type_info,
907 err_struct_info, err_data_buffer))
908 ==0) {
909 /* consume the error for "inject only"*/
910 memcpy(va2, va1, PAGE_SIZE);
911 memcpy(va1, va2, PAGE_SIZE);
912 log_info(parameters[i].cpu,
913 "successful\n");
914 }
915 else {
916 log_info(parameters[i].cpu,"fail:");
917 log_info(parameters[i].cpu,
918 "status=%d\n", status);
919 unlock(parameters[i].cpu);
920 break;
921 }
922 if (one_lock)
923 /* Release the lock */
924 unlock(0);
925 /* Release lock on this cpu */
926 else
927 unlock(parameters[i].cpu);
928
929 if (j < parameters[i].loop-1)
930 sleep(parameters[i].interval);
931 }
932 current_time=time(NULL);
933 log_info(parameters[i].cpu, "Done at %s", ctime(&current_time));
934 return 0;
935 }
936 else if (pid<0) {
937 perror("Error fork:");
938 continue;
939 }
940 child_pid[i]=pid;
941 }
942 for (i=0;i<num;i++)
943 waitpid(child_pid[i], NULL, 0);
944
945 if (one_lock)
946 free_sem(0);
947 else
948 for (i=0;i<num;i++)
949 free_sem(parameters[i].cpu);
950
951 printf("All done.\n");
952
953 return 0;
954}
955
956void help()
957{
958 printf("err_inject_tool:\n");
959 printf("\t-q: query all capabilities. default: off\n");
960 printf("\t-m: procedure mode. 1: physical 2: virtual. default: 1\n");
961 printf("\t-i: inject errors. default: off\n");
962 printf("\t-l: one lock per cpu. default: one lock for all\n");
963 printf("\t-e: error parameters:\n");
964 printf("\t\tcpu,loop,interval,err_type_info,err_struct_info[,err_data_buffer[0],err_data_buffer[1],err_data_buffer[2]]\n");
965 printf("\t\t cpu: logical cpu number the error will be inject in.\n");
966 printf("\t\t loop: times the error will be injected.\n");
967 printf("\t\t interval: In second. every so often one error is injected.\n");
968 printf("\t\t err_type_info, err_struct_info: PAL parameters.\n");
969 printf("\t\t err_data_buffer: PAL parameter. Optional. If not present,\n");
970 printf("\t\t it's constructed by tool automatically. Be\n");
971 printf("\t\t careful to provide err_data_buffer and make\n");
972 printf("\t\t sure it's working with the environment.\n");
973 printf("\t Note:no space between error parameters.\n");
974 printf("\t default: Take error parameters from err.conf instead of command line.\n");
975 printf("\t-v: verbose. default: off\n");
976 printf("\t-h: help\n\n");
977 printf("The tool will take err.conf file as ");
978 printf("input to inject single or multiple errors ");
979 printf("on one or multiple cpus in parallel.\n");
980}
981
982int main(int argc, char **argv)
983{
984 char c;
985 int do_err_inj=0;
986 int do_query_all=0;
987 int count;
988 u32 m;
989
990 /* Default one lock for all cpu's */
991 one_lock=1;
992 while ((c = getopt(argc, argv, "m:iqvhle:")) != EOF)
993 switch (c) {
994 case 'm': /* Procedure mode. 1: phys 2: virt */
995 count=sscanf(optarg, "%x", &m);
996 if (count!=1 || (m!=1 && m!=2)) {
997 printf("Wrong mode number.\n");
998 help();
999 return -1;
1000 }
1001 mode=m;
1002 break;
1003 case 'i': /* Inject errors */
1004 do_err_inj=1;
1005 break;
1006 case 'q': /* Query */
1007 do_query_all=1;
1008 break;
1009 case 'v': /* Verbose */
1010 verbose=1;
1011 break;
1012 case 'l': /* One lock per cpu */
1013 one_lock=0;
1014 break;
1015 case 'e': /* error arguments */
1016 /* Take parameters:
1017 * #cpu, loop, interval, err_type_info, err_struct_info[, err_data_buffer]
1018 * err_data_buffer is optional. Recommend not to specify
1019 * err_data_buffer. Better to use tool to generate it.
1020 */
1021 count=sscanf(optarg,
1022 "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
1023 &line_para.cpu,
1024 &line_para.loop,
1025 &line_para.interval,
1026 &line_para.err_type_info,
1027 &line_para.err_struct_info,
1028 &line_para.err_data_buffer[0],
1029 &line_para.err_data_buffer[1],
1030 &line_para.err_data_buffer[2]);
1031 if (count!=PARA_FIELD_NUM+3) {
1032 line_para.err_data_buffer[0]=-1,
1033 line_para.err_data_buffer[1]=-1,
1034 line_para.err_data_buffer[2]=-1;
1035 count=sscanf(optarg, "%lx, %lx, %lx, %lx, %lx\n",
1036 &line_para.cpu,
1037 &line_para.loop,
1038 &line_para.interval,
1039 &line_para.err_type_info,
1040 &line_para.err_struct_info);
1041 if (count!=PARA_FIELD_NUM) {
1042 printf("Wrong error arguments.\n");
1043 help();
1044 return -1;
1045 }
1046 }
1047 para=1;
1048 break;
1049 continue;
1050 break;
1051 case 'h':
1052 help();
1053 return 0;
1054 default:
1055 break;
1056 }
1057
1058 if (do_query_all)
1059 query_all_capabilities();
1060 if (do_err_inj)
1061 err_inj();
1062
1063 if (!do_query_all && !do_err_inj)
1064 help();
1065
1066 return 0;
1067}
1068
diff --git a/Documentation/input/input-programming.txt b/Documentation/input/input-programming.txt
index 180e0689676c..d9d523099bb7 100644
--- a/Documentation/input/input-programming.txt
+++ b/Documentation/input/input-programming.txt
@@ -1,5 +1,3 @@
1$Id: input-programming.txt,v 1.4 2001/05/04 09:47:14 vojtech Exp $
2
3Programming input drivers 1Programming input drivers
4~~~~~~~~~~~~~~~~~~~~~~~~~ 2~~~~~~~~~~~~~~~~~~~~~~~~~
5 3
@@ -20,28 +18,51 @@ pressed or released a BUTTON_IRQ happens. The driver could look like:
20#include <asm/irq.h> 18#include <asm/irq.h>
21#include <asm/io.h> 19#include <asm/io.h>
22 20
21static struct input_dev *button_dev;
22
23static void button_interrupt(int irq, void *dummy, struct pt_regs *fp) 23static void button_interrupt(int irq, void *dummy, struct pt_regs *fp)
24{ 24{
25 input_report_key(&button_dev, BTN_1, inb(BUTTON_PORT) & 1); 25 input_report_key(button_dev, BTN_1, inb(BUTTON_PORT) & 1);
26 input_sync(&button_dev); 26 input_sync(button_dev);
27} 27}
28 28
29static int __init button_init(void) 29static int __init button_init(void)
30{ 30{
31 int error;
32
31 if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) { 33 if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) {
32 printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq); 34 printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq);
33 return -EBUSY; 35 return -EBUSY;
34 } 36 }
35 37
36 button_dev.evbit[0] = BIT(EV_KEY); 38 button_dev = input_allocate_device();
37 button_dev.keybit[LONG(BTN_0)] = BIT(BTN_0); 39 if (!button_dev) {
38 40 printk(KERN_ERR "button.c: Not enough memory\n");
39 input_register_device(&button_dev); 41 error = -ENOMEM;
42 goto err_free_irq;
43 }
44
45 button_dev->evbit[0] = BIT(EV_KEY);
46 button_dev->keybit[LONG(BTN_0)] = BIT(BTN_0);
47
48 error = input_register_device(button_dev);
49 if (error) {
50 printk(KERN_ERR "button.c: Failed to register device\n");
51 goto err_free_dev;
52 }
53
54 return 0;
55
56 err_free_dev:
57 input_free_device(button_dev);
58 err_free_irq:
59 free_irq(BUTTON_IRQ, button_interrupt);
60 return error;
40} 61}
41 62
42static void __exit button_exit(void) 63static void __exit button_exit(void)
43{ 64{
44 input_unregister_device(&button_dev); 65 input_unregister_device(button_dev);
45 free_irq(BUTTON_IRQ, button_interrupt); 66 free_irq(BUTTON_IRQ, button_interrupt);
46} 67}
47 68
@@ -58,17 +79,18 @@ In the _init function, which is called either upon module load or when
58booting the kernel, it grabs the required resources (it should also check 79booting the kernel, it grabs the required resources (it should also check
59for the presence of the device). 80for the presence of the device).
60 81
61Then it sets the input bitfields. This way the device driver tells the other 82Then it allocates a new input device structure with input_aloocate_device()
83and sets up input bitfields. This way the device driver tells the other
62parts of the input systems what it is - what events can be generated or 84parts of the input systems what it is - what events can be generated or
63accepted by this input device. Our example device can only generate EV_KEY type 85accepted by this input device. Our example device can only generate EV_KEY
64events, and from those only BTN_0 event code. Thus we only set these two 86type events, and from those only BTN_0 event code. Thus we only set these
65bits. We could have used 87two bits. We could have used
66 88
67 set_bit(EV_KEY, button_dev.evbit); 89 set_bit(EV_KEY, button_dev.evbit);
68 set_bit(BTN_0, button_dev.keybit); 90 set_bit(BTN_0, button_dev.keybit);
69 91
70as well, but with more than single bits the first approach tends to be 92as well, but with more than single bits the first approach tends to be
71shorter. 93shorter.
72 94
73Then the example driver registers the input device structure by calling 95Then the example driver registers the input device structure by calling
74 96
@@ -76,16 +98,15 @@ Then the example driver registers the input device structure by calling
76 98
77This adds the button_dev structure to linked lists of the input driver and 99This adds the button_dev structure to linked lists of the input driver and
78calls device handler modules _connect functions to tell them a new input 100calls device handler modules _connect functions to tell them a new input
79device has appeared. Because the _connect functions may call kmalloc(, 101device has appeared. input_register_device() may sleep and therefore must
80GFP_KERNEL), which can sleep, input_register_device() must not be called 102not be called from an interrupt or with a spinlock held.
81from an interrupt or with a spinlock held.
82 103
83While in use, the only used function of the driver is 104While in use, the only used function of the driver is
84 105
85 button_interrupt() 106 button_interrupt()
86 107
87which upon every interrupt from the button checks its state and reports it 108which upon every interrupt from the button checks its state and reports it
88via the 109via the
89 110
90 input_report_key() 111 input_report_key()
91 112
@@ -113,16 +134,10 @@ can use the open and close callback to know when it can stop polling or
113release the interrupt and when it must resume polling or grab the interrupt 134release the interrupt and when it must resume polling or grab the interrupt
114again. To do that, we would add this to our example driver: 135again. To do that, we would add this to our example driver:
115 136
116int button_used = 0;
117
118static int button_open(struct input_dev *dev) 137static int button_open(struct input_dev *dev)
119{ 138{
120 if (button_used++)
121 return 0;
122
123 if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) { 139 if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) {
124 printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq); 140 printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq);
125 button_used--;
126 return -EBUSY; 141 return -EBUSY;
127 } 142 }
128 143
@@ -131,20 +146,21 @@ static int button_open(struct input_dev *dev)
131 146
132static void button_close(struct input_dev *dev) 147static void button_close(struct input_dev *dev)
133{ 148{
134 if (!--button_used) 149 free_irq(IRQ_AMIGA_VERTB, button_interrupt);
135 free_irq(IRQ_AMIGA_VERTB, button_interrupt);
136} 150}
137 151
138static int __init button_init(void) 152static int __init button_init(void)
139{ 153{
140 ... 154 ...
141 button_dev.open = button_open; 155 button_dev->open = button_open;
142 button_dev.close = button_close; 156 button_dev->close = button_close;
143 ... 157 ...
144} 158}
145 159
146Note the button_used variable - we have to track how many times the open 160Note that input core keeps track of number of users for the device and
147function was called to know when exactly our device stops being used. 161makes sure that dev->open() is called only when the first user connects
162to the device and that dev->close() is called when the very last user
163disconnects. Calls to both callbacks are serialized.
148 164
149The open() callback should return a 0 in case of success or any nonzero value 165The open() callback should return a 0 in case of success or any nonzero value
150in case of failure. The close() callback (which is void) must always succeed. 166in case of failure. The close() callback (which is void) must always succeed.
@@ -175,7 +191,7 @@ set the corresponding bits and call the
175 191
176 input_report_rel(struct input_dev *dev, int code, int value) 192 input_report_rel(struct input_dev *dev, int code, int value)
177 193
178function. Events are generated only for nonzero value. 194function. Events are generated only for nonzero value.
179 195
180However EV_ABS requires a little special care. Before calling 196However EV_ABS requires a little special care. Before calling
181input_register_device, you have to fill additional fields in the input_dev 197input_register_device, you have to fill additional fields in the input_dev
@@ -187,6 +203,10 @@ the ABS_X axis:
187 button_dev.absfuzz[ABS_X] = 4; 203 button_dev.absfuzz[ABS_X] = 4;
188 button_dev.absflat[ABS_X] = 8; 204 button_dev.absflat[ABS_X] = 8;
189 205
206Or, you can just say:
207
208 input_set_abs_params(button_dev, ABS_X, 0, 255, 4, 8);
209
190This setting would be appropriate for a joystick X axis, with the minimum of 210This setting would be appropriate for a joystick X axis, with the minimum of
1910, maximum of 255 (which the joystick *must* be able to reach, no problem if 2110, maximum of 255 (which the joystick *must* be able to reach, no problem if
192it sometimes reports more, but it must be able to always reach the min and 212it sometimes reports more, but it must be able to always reach the min and
@@ -197,14 +217,7 @@ If you don't need absfuzz and absflat, you can set them to zero, which mean
197that the thing is precise and always returns to exactly the center position 217that the thing is precise and always returns to exactly the center position
198(if it has any). 218(if it has any).
199 219
2001.4 The void *private field 2201.4 NBITS(), LONG(), BIT()
201~~~~~~~~~~~~~~~~~~~~~~~~~~~
202
203This field in the input structure can be used to point to any private data
204structures in the input device driver, in case the driver handles more than
205one device. You'll need it in the open and close callbacks.
206
2071.5 NBITS(), LONG(), BIT()
208~~~~~~~~~~~~~~~~~~~~~~~~~~ 221~~~~~~~~~~~~~~~~~~~~~~~~~~
209 222
210These three macros from input.h help some bitfield computations: 223These three macros from input.h help some bitfield computations:
@@ -213,13 +226,9 @@ These three macros from input.h help some bitfield computations:
213 LONG(x) - returns the index in the array in longs for bit x 226 LONG(x) - returns the index in the array in longs for bit x
214 BIT(x) - returns the index in a long for bit x 227 BIT(x) - returns the index in a long for bit x
215 228
2161.6 The number, id* and name fields 2291.5 The id* and name fields
217~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 230~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
218 231
219The dev->number is assigned by the input system to the input device when it
220is registered. It has no use except for identifying the device to the user
221in system messages.
222
223The dev->name should be set before registering the input device by the input 232The dev->name should be set before registering the input device by the input
224device driver. It's a string like 'Generic button device' containing a 233device driver. It's a string like 'Generic button device' containing a
225user friendly name of the device. 234user friendly name of the device.
@@ -234,15 +243,25 @@ driver.
234 243
235The id and name fields can be passed to userland via the evdev interface. 244The id and name fields can be passed to userland via the evdev interface.
236 245
2371.7 The keycode, keycodemax, keycodesize fields 2461.6 The keycode, keycodemax, keycodesize fields
238~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 247~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
239 248
240These two fields will be used for any input devices that report their data 249These three fields should be used by input devices that have dense keymaps.
241as scancodes. If not all scancodes can be known by autodetection, they may 250The keycode is an array used to map from scancodes to input system keycodes.
242need to be set by userland utilities. The keycode array then is an array 251The keycode max should contain the size of the array and keycodesize the
243used to map from scancodes to input system keycodes. The keycode max will 252size of each entry in it (in bytes).
244contain the size of the array and keycodesize the size of each entry in it 253
245(in bytes). 254Userspace can query and alter current scancode to keycode mappings using
255EVIOCGKEYCODE and EVIOCSKEYCODE ioctls on corresponding evdev interface.
256When a device has all 3 aforementioned fields filled in, the driver may
257rely on kernel's default implementation of setting and querying keycode
258mappings.
259
2601.7 dev->getkeycode() and dev->setkeycode()
261~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
262getkeycode() and setkeycode() callbacks allow drivers to override default
263keycode/keycodesize/keycodemax mapping mechanism provided by input core
264and implement sparse keycode maps.
246 265
2471.8 Key autorepeat 2661.8 Key autorepeat
248~~~~~~~~~~~~~~~~~~ 267~~~~~~~~~~~~~~~~~~
@@ -266,7 +285,7 @@ direction - from the system to the input device driver. If your input device
266driver can handle these events, it has to set the respective bits in evbit, 285driver can handle these events, it has to set the respective bits in evbit,
267*and* also the callback routine: 286*and* also the callback routine:
268 287
269 button_dev.event = button_event; 288 button_dev->event = button_event;
270 289
271int button_event(struct input_dev *dev, unsigned int type, unsigned int code, int value); 290int button_event(struct input_dev *dev, unsigned int type, unsigned int code, int value);
272{ 291{
diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt
index 769ee05ee4d1..1d247d59ad56 100644
--- a/Documentation/kbuild/modules.txt
+++ b/Documentation/kbuild/modules.txt
@@ -249,7 +249,7 @@ following files:
249 --> filename: Makefile 249 --> filename: Makefile
250 KERNELDIR := /lib/modules/`uname -r`/build 250 KERNELDIR := /lib/modules/`uname -r`/build
251 all:: 251 all::
252 $(MAKE) -C $KERNELDIR M=`pwd` $@ 252 $(MAKE) -C $(KERNELDIR) M=`pwd` $@
253 253
254 # Module specific targets 254 # Module specific targets
255 genbin: 255 genbin:
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 84c3bd05c639..38d7db3262c7 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -64,6 +64,7 @@ parameter is applicable:
64 GENERIC_TIME The generic timeofday code is enabled. 64 GENERIC_TIME The generic timeofday code is enabled.
65 NFS Appropriate NFS support is enabled. 65 NFS Appropriate NFS support is enabled.
66 OSS OSS sound support is enabled. 66 OSS OSS sound support is enabled.
67 PV_OPS A paravirtualized kernel
67 PARIDE The ParIDE subsystem is enabled. 68 PARIDE The ParIDE subsystem is enabled.
68 PARISC The PA-RISC architecture is enabled. 69 PARISC The PA-RISC architecture is enabled.
69 PCI PCI bus support is enabled. 70 PCI PCI bus support is enabled.
@@ -695,8 +696,15 @@ and is between 256 and 4096 characters. It is defined in the file
695 idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed 696 idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed
696 See Documentation/ide.txt. 697 See Documentation/ide.txt.
697 698
698 idle= [HW] 699 idle= [X86]
699 Format: idle=poll or idle=halt 700 Format: idle=poll or idle=mwait
701 Poll forces a polling idle loop that can slightly improves the performance
702 of waking up a idle CPU, but will use a lot of power and make the system
703 run hot. Not recommended.
704 idle=mwait. On systems which support MONITOR/MWAIT but the kernel chose
705 to not use it because it doesn't save as much power as a normal idle
706 loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same
707 as idle=poll.
700 708
701 ignore_loglevel [KNL] 709 ignore_loglevel [KNL]
702 Ignore loglevel setting - this will print /all/ 710 Ignore loglevel setting - this will print /all/
@@ -1157,6 +1165,11 @@ and is between 256 and 4096 characters. It is defined in the file
1157 1165
1158 nomce [IA-32] Machine Check Exception 1166 nomce [IA-32] Machine Check Exception
1159 1167
1168 noreplace-paravirt [IA-32,PV_OPS] Don't patch paravirt_ops
1169
1170 noreplace-smp [IA-32,SMP] Don't replace SMP instructions
1171 with UP alternatives
1172
1160 noresidual [PPC] Don't use residual data on PReP machines. 1173 noresidual [PPC] Don't use residual data on PReP machines.
1161 1174
1162 noresume [SWSUSP] Disables resume and restores original swap 1175 noresume [SWSUSP] Disables resume and restores original swap
@@ -1562,6 +1575,9 @@ and is between 256 and 4096 characters. It is defined in the file
1562 smart2= [HW] 1575 smart2= [HW]
1563 Format: <io1>[,<io2>[,...,<io8>]] 1576 Format: <io1>[,<io2>[,...,<io8>]]
1564 1577
1578 smp-alt-once [IA-32,SMP] On a hotplug CPU system, only
1579 attempt to substitute SMP alternatives once at boot.
1580
1565 snd-ad1816a= [HW,ALSA] 1581 snd-ad1816a= [HW,ALSA]
1566 1582
1567 snd-ad1848= [HW,ALSA] 1583 snd-ad1848= [HW,ALSA]
@@ -1820,6 +1836,7 @@ and is between 256 and 4096 characters. It is defined in the file
1820 [USBHID] The interval which mice are to be polled at. 1836 [USBHID] The interval which mice are to be polled at.
1821 1837
1822 vdso= [IA-32,SH] 1838 vdso= [IA-32,SH]
1839 vdso=2: enable compat VDSO (default with COMPAT_VDSO)
1823 vdso=1: enable VDSO (default) 1840 vdso=1: enable VDSO (default)
1824 vdso=0: disable VDSO mapping 1841 vdso=0: disable VDSO mapping
1825 1842
diff --git a/Documentation/pci.txt b/Documentation/pci.txt
index cdf2f3c0ab14..e2c9d0a0c43d 100644
--- a/Documentation/pci.txt
+++ b/Documentation/pci.txt
@@ -124,10 +124,6 @@ initialization with a pointer to a structure describing the driver
124 124
125 err_handler See Documentation/pci-error-recovery.txt 125 err_handler See Documentation/pci-error-recovery.txt
126 126
127 multithread_probe Enable multi-threaded probe/scan. Driver must
128 provide its own locking/syncronization for init
129 operations if this is enabled.
130
131 127
132The ID table is an array of struct pci_device_id entries ending with an 128The ID table is an array of struct pci_device_id entries ending with an
133all-zero entry. Each entry consists of: 129all-zero entry. Each entry consists of:
@@ -163,9 +159,9 @@ echo "vendor device subvendor subdevice class class_mask driver_data" > \
163/sys/bus/pci/drivers/{driver}/new_id 159/sys/bus/pci/drivers/{driver}/new_id
164 160
165All fields are passed in as hexadecimal values (no leading 0x). 161All fields are passed in as hexadecimal values (no leading 0x).
166Users need pass only as many fields as necessary: 162The vendor and device fields are mandatory, the others are optional. Users
167 o vendor, device, subvendor, and subdevice fields default 163need pass only as many optional fields as necessary:
168 to PCI_ANY_ID (FFFFFFFF), 164 o subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF)
169 o class and classmask fields default to 0 165 o class and classmask fields default to 0
170 o driver_data defaults to 0UL. 166 o driver_data defaults to 0UL.
171 167
@@ -549,8 +545,6 @@ pci_find_slot() Find pci_dev corresponding to given bus and
549pci_set_power_state() Set PCI Power Management state (0=D0 ... 3=D3) 545pci_set_power_state() Set PCI Power Management state (0=D0 ... 3=D3)
550pci_find_capability() Find specified capability in device's capability 546pci_find_capability() Find specified capability in device's capability
551 list. 547 list.
552pci_module_init() Inline helper function for ensuring correct
553 pci_driver initialization and error handling.
554pci_resource_start() Returns bus start address for a given PCI region 548pci_resource_start() Returns bus start address for a given PCI region
555pci_resource_end() Returns bus end address for a given PCI region 549pci_resource_end() Returns bus end address for a given PCI region
556pci_resource_len() Returns the byte length of a PCI region 550pci_resource_len() Returns the byte length of a PCI region
diff --git a/Documentation/pcmcia/driver.txt b/Documentation/pcmcia/driver.txt
new file mode 100644
index 000000000000..0ac167920778
--- /dev/null
+++ b/Documentation/pcmcia/driver.txt
@@ -0,0 +1,30 @@
1PCMCIA Driver
2-------------
3
4
5sysfs
6-----
7
8New PCMCIA IDs may be added to a device driver pcmcia_device_id table at
9runtime as shown below:
10
11echo "match_flags manf_id card_id func_id function device_no \
12prod_id_hash[0] prod_id_hash[1] prod_id_hash[2] prod_id_hash[3]" > \
13/sys/bus/pcmcia/drivers/{driver}/new_id
14
15All fields are passed in as hexadecimal values (no leading 0x).
16The meaning is described in the PCMCIA specification, the match_flags is
17a bitwise or-ed combination from PCMCIA_DEV_ID_MATCH_* constants
18defined in include/linux/mod_devicetable.h.
19
20Once added, the driver probe routine will be invoked for any unclaimed
21PCMCIA device listed in its (newly updated) pcmcia_device_id list.
22
23A common use-case is to add a new device according to the manufacturer ID
24and the card ID (form the manf_id and card_id file in the device tree).
25For this, just use:
26
27echo "0x3 manf_id card_id 0 0 0 0 0 0 0" > \
28 /sys/bus/pcmcia/drivers/{driver}/new_id
29
30after loading the driver.
diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt
index 8c5b41bf3f36..fd5192a8fa8a 100644
--- a/Documentation/power/interface.txt
+++ b/Documentation/power/interface.txt
@@ -34,8 +34,12 @@ for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs. Then,
34we are able to look in the log messages and work out, for example, which code 34we are able to look in the log messages and work out, for example, which code
35is being slow and which device drivers are misbehaving. 35is being slow and which device drivers are misbehaving.
36 36
37Reading from this file will display what the mode is currently set 37Reading from this file will display all supported modes and the currently
38to. Writing to this file will accept one of 38selected one in brackets, for example
39
40 [shutdown] reboot test testproc
41
42Writing to this file will accept one of
39 43
40 'platform' (only if the platform supports it) 44 'platform' (only if the platform supports it)
41 'shutdown' 45 'shutdown'
diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt
index b6a3cbf7e846..e00b099a4b86 100644
--- a/Documentation/power/pci.txt
+++ b/Documentation/power/pci.txt
@@ -203,7 +203,7 @@ resume
203 203
204Usage: 204Usage:
205 205
206if (dev->driver && dev->driver->suspend) 206if (dev->driver && dev->driver->resume)
207 dev->driver->resume(dev) 207 dev->driver->resume(dev)
208 208
209The resume callback may be called from any power state, and is always meant to 209The resume callback may be called from any power state, and is always meant to
diff --git a/Documentation/scsi/aacraid.txt b/Documentation/scsi/aacraid.txt
index dc8e44fc650f..2368e7e4a8cf 100644
--- a/Documentation/scsi/aacraid.txt
+++ b/Documentation/scsi/aacraid.txt
@@ -37,7 +37,11 @@ Supported Cards/Chipsets
37 9005:0286:9005:029d Adaptec 2420SA (Intruder HP release) 37 9005:0286:9005:029d Adaptec 2420SA (Intruder HP release)
38 9005:0286:9005:02ac Adaptec 1800 (Typhoon44) 38 9005:0286:9005:02ac Adaptec 1800 (Typhoon44)
39 9005:0285:9005:02b5 Adaptec 5445 (Voodoo44) 39 9005:0285:9005:02b5 Adaptec 5445 (Voodoo44)
40 9005:0285:15d9:02b5 SMC AOC-USAS-S4i
41 9005:0285:15d9:02c9 SMC AOC-USAS-S4iR
40 9005:0285:9005:02b6 Adaptec 5805 (Voodoo80) 42 9005:0285:9005:02b6 Adaptec 5805 (Voodoo80)
43 9005:0285:15d9:02b6 SMC AOC-USAS-S8i
44 9005:0285:15d9:02ca SMC AOC-USAS-S8iR
41 9005:0285:9005:02b7 Adaptec 5085 (Voodoo08) 45 9005:0285:9005:02b7 Adaptec 5085 (Voodoo08)
42 9005:0285:9005:02bb Adaptec 3405 (Marauder40LP) 46 9005:0285:9005:02bb Adaptec 3405 (Marauder40LP)
43 9005:0285:9005:02bc Adaptec 3805 (Marauder80LP) 47 9005:0285:9005:02bc Adaptec 3805 (Marauder80LP)
@@ -93,6 +97,9 @@ Supported Cards/Chipsets
93 9005:0286:9005:02ae (Aurora Lite ARK) 97 9005:0286:9005:02ae (Aurora Lite ARK)
94 9005:0285:9005:02b0 (Sunrise Lake ARK) 98 9005:0285:9005:02b0 (Sunrise Lake ARK)
95 9005:0285:9005:02b1 Adaptec (Voodoo 8 internal 8 external) 99 9005:0285:9005:02b1 Adaptec (Voodoo 8 internal 8 external)
100 9005:0285:108e:7aac SUN STK RAID REM (Voodoo44 Coyote)
101 9005:0285:108e:0286 SUN SG-XPCIESAS-R-IN (Cougar)
102 9005:0285:108e:0287 SUN SG-XPCIESAS-R-EX (Prometheus)
96 103
97People 104People
98------------------------- 105-------------------------
diff --git a/Documentation/scsi/ncr53c8xx.txt b/Documentation/scsi/ncr53c8xx.txt
index caf10b155185..88ef88b949f7 100644
--- a/Documentation/scsi/ncr53c8xx.txt
+++ b/Documentation/scsi/ncr53c8xx.txt
@@ -562,11 +562,6 @@ if only one has a flaw for some SCSI feature, you can disable the
562support by the driver of this feature at linux start-up and enable 562support by the driver of this feature at linux start-up and enable
563this feature after boot-up only for devices that support it safely. 563this feature after boot-up only for devices that support it safely.
564 564
565CONFIG_SCSI_NCR53C8XX_PROFILE_SUPPORT (default answer: n)
566 This option must be set for profiling information to be gathered
567 and printed out through the proc file system. This features may
568 impact performances.
569
570CONFIG_SCSI_NCR53C8XX_IOMAPPED (default answer: n) 565CONFIG_SCSI_NCR53C8XX_IOMAPPED (default answer: n)
571 Answer "y" if you suspect your mother board to not allow memory mapped I/O. 566 Answer "y" if you suspect your mother board to not allow memory mapped I/O.
572 May slow down performance a little. This option is required by 567 May slow down performance a little. This option is required by
diff --git a/Documentation/sh/clk.txt b/Documentation/sh/clk.txt
new file mode 100644
index 000000000000..9aef710e9a4b
--- /dev/null
+++ b/Documentation/sh/clk.txt
@@ -0,0 +1,32 @@
1Clock framework on SuperH architecture
2
3The framework on SH extends existing API by the function clk_set_rate_ex,
4which prototype is as follows:
5
6 clk_set_rate_ex (struct clk *clk, unsigned long rate, int algo_id)
7
8The algo_id parameter is used to specify algorithm used to recalculate clocks,
9adjanced to clock, specified as first argument. It is assumed that algo_id==0
10means no changes to adjanced clock
11
12Internally, the clk_set_rate_ex forwards request to clk->ops->set_rate method,
13if it is present in ops structure. The method should set the clock rate and adjust
14all needed clocks according to the passed algo_id.
15Exact values for algo_id are machine-dependend. For the sh7722, the following
16values are defined:
17
18 NO_CHANGE = 0,
19 IUS_N1_N1, /* I:U = N:1, U:Sh = N:1 */
20 IUS_322, /* I:U:Sh = 3:2:2 */
21 IUS_522, /* I:U:Sh = 5:2:2 */
22 IUS_N11, /* I:U:Sh = N:1:1 */
23 SB_N1, /* Sh:B = N:1 */
24 SB3_N1, /* Sh:B3 = N:1 */
25 SB3_32, /* Sh:B3 = 3:2 */
26 SB3_43, /* Sh:B3 = 4:3 */
27 SB3_54, /* Sh:B3 = 5:4 */
28 BP_N1, /* B:P = N:1 */
29 IP_N1 /* I:P = N:1 */
30
31Each of these constants means relation between clocks that can be set via the FRQCR
32register
diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx
index f9717fe9bd85..215e3b8e7266 100644
--- a/Documentation/spi/pxa2xx
+++ b/Documentation/spi/pxa2xx
@@ -62,7 +62,7 @@ static struct resource pxa_spi_nssp_resources[] = {
62 62
63static struct pxa2xx_spi_master pxa_nssp_master_info = { 63static struct pxa2xx_spi_master pxa_nssp_master_info = {
64 .ssp_type = PXA25x_NSSP, /* Type of SSP */ 64 .ssp_type = PXA25x_NSSP, /* Type of SSP */
65 .clock_enable = CKEN9_NSSP, /* NSSP Peripheral clock */ 65 .clock_enable = CKEN_NSSP, /* NSSP Peripheral clock */
66 .num_chipselect = 1, /* Matches the number of chips attached to NSSP */ 66 .num_chipselect = 1, /* Matches the number of chips attached to NSSP */
67 .enable_dma = 1, /* Enables NSSP DMA */ 67 .enable_dma = 1, /* Enables NSSP DMA */
68}; 68};
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index e96a341eb7e4..1d192565e182 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -197,11 +197,22 @@ and may not be fast.
197 197
198panic_on_oom 198panic_on_oom
199 199
200This enables or disables panic on out-of-memory feature. If this is set to 1, 200This enables or disables panic on out-of-memory feature.
201the kernel panics when out-of-memory happens. If this is set to 0, the kernel
202will kill some rogue process, called oom_killer. Usually, oom_killer can kill
203rogue processes and system will survive. If you want to panic the system
204rather than killing rogue processes, set this to 1.
205 201
206The default value is 0. 202If this is set to 0, the kernel will kill some rogue process,
203called oom_killer. Usually, oom_killer can kill rogue processes and
204system will survive.
205
206If this is set to 1, the kernel panics when out-of-memory happens.
207However, if a process limits using nodes by mempolicy/cpusets,
208and those nodes become memory exhaustion status, one process
209may be killed by oom-killer. No panic occurs in this case.
210Because other nodes' memory may be free. This means system total status
211may be not fatal yet.
207 212
213If this is set to 2, the kernel panics compulsorily even on the
214above-mentioned.
215
216The default value is 0.
2171 and 2 are for failover of clustering. Please select either
218according to your policy of failover.
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index d43aa9d3c105..ba328f255417 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -1,6 +1,6 @@
1Linux Magic System Request Key Hacks 1Linux Magic System Request Key Hacks
2Documentation for sysrq.c 2Documentation for sysrq.c
3Last update: 2007-JAN-06 3Last update: 2007-MAR-14
4 4
5* What is the magic SysRq key? 5* What is the magic SysRq key?
6~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -75,7 +75,7 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
75 75
76'f' - Will call oom_kill to kill a memory hog process. 76'f' - Will call oom_kill to kill a memory hog process.
77 77
78'g' - Used by kgdb on ppc platforms. 78'g' - Used by kgdb on ppc and sh platforms.
79 79
80'h' - Will display help (actually any other key than those listed 80'h' - Will display help (actually any other key than those listed
81 above will display help. but 'h' is easy to remember :-) 81 above will display help. but 'h' is easy to remember :-)
diff --git a/Documentation/usb/usb-serial.txt b/Documentation/usb/usb-serial.txt
index d61f6e7865de..b18e86a22506 100644
--- a/Documentation/usb/usb-serial.txt
+++ b/Documentation/usb/usb-serial.txt
@@ -42,7 +42,7 @@ ConnectTech WhiteHEAT 4 port converter
42 http://www.connecttech.com 42 http://www.connecttech.com
43 43
44 For any questions or problems with this driver, please contact 44 For any questions or problems with this driver, please contact
45 Stuart MacDonald at stuartm@connecttech.com 45 Connect Tech's Support Department at support@connecttech.com
46 46
47 47
48HandSpring Visor, Palm USB, and Clié USB driver 48HandSpring Visor, Palm USB, and Clié USB driver
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c
new file mode 100644
index 000000000000..41710ccf3a29
--- /dev/null
+++ b/Documentation/vm/slabinfo.c
@@ -0,0 +1,943 @@
1/*
2 * Slabinfo: Tool to get reports about slabs
3 *
4 * (C) 2007 sgi, Christoph Lameter <clameter@sgi.com>
5 *
6 * Compile by:
7 *
8 * gcc -o slabinfo slabinfo.c
9 */
10#include <stdio.h>
11#include <stdlib.h>
12#include <sys/types.h>
13#include <dirent.h>
14#include <string.h>
15#include <unistd.h>
16#include <stdarg.h>
17#include <getopt.h>
18#include <regex.h>
19
20#define MAX_SLABS 500
21#define MAX_ALIASES 500
22#define MAX_NODES 1024
23
24struct slabinfo {
25 char *name;
26 int alias;
27 int refs;
28 int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu;
29 int hwcache_align, object_size, objs_per_slab;
30 int sanity_checks, slab_size, store_user, trace;
31 int order, poison, reclaim_account, red_zone;
32 unsigned long partial, objects, slabs;
33 int numa[MAX_NODES];
34 int numa_partial[MAX_NODES];
35} slabinfo[MAX_SLABS];
36
37struct aliasinfo {
38 char *name;
39 char *ref;
40 struct slabinfo *slab;
41} aliasinfo[MAX_ALIASES];
42
43int slabs = 0;
44int aliases = 0;
45int alias_targets = 0;
46int highest_node = 0;
47
48char buffer[4096];
49
50int show_alias = 0;
51int show_slab = 0;
52int skip_zero = 1;
53int show_numa = 0;
54int show_track = 0;
55int show_first_alias = 0;
56int validate = 0;
57int shrink = 0;
58int show_inverted = 0;
59int show_single_ref = 0;
60int show_totals = 0;
61int sort_size = 0;
62
63int page_size;
64
65regex_t pattern;
66
67void fatal(const char *x, ...)
68{
69 va_list ap;
70
71 va_start(ap, x);
72 vfprintf(stderr, x, ap);
73 va_end(ap);
74 exit(1);
75}
76
77void usage(void)
78{
79 printf("slabinfo [-ahnpvtsz] [slab-regexp]\n"
80 "-a|--aliases Show aliases\n"
81 "-h|--help Show usage information\n"
82 "-n|--numa Show NUMA information\n"
83 "-s|--shrink Shrink slabs\n"
84 "-v|--validate Validate slabs\n"
85 "-t|--tracking Show alloc/free information\n"
86 "-T|--Totals Show summary information\n"
87 "-l|--slabs Show slabs\n"
88 "-S|--Size Sort by size\n"
89 "-z|--zero Include empty slabs\n"
90 "-f|--first-alias Show first alias\n"
91 "-i|--inverted Inverted list\n"
92 "-1|--1ref Single reference\n"
93 );
94}
95
96unsigned long read_obj(char *name)
97{
98 FILE *f = fopen(name, "r");
99
100 if (!f)
101 buffer[0] = 0;
102 else {
103 if (!fgets(buffer,sizeof(buffer), f))
104 buffer[0] = 0;
105 fclose(f);
106 if (buffer[strlen(buffer)] == '\n')
107 buffer[strlen(buffer)] = 0;
108 }
109 return strlen(buffer);
110}
111
112
113/*
114 * Get the contents of an attribute
115 */
116unsigned long get_obj(char *name)
117{
118 if (!read_obj(name))
119 return 0;
120
121 return atol(buffer);
122}
123
124unsigned long get_obj_and_str(char *name, char **x)
125{
126 unsigned long result = 0;
127 char *p;
128
129 *x = NULL;
130
131 if (!read_obj(name)) {
132 x = NULL;
133 return 0;
134 }
135 result = strtoul(buffer, &p, 10);
136 while (*p == ' ')
137 p++;
138 if (*p)
139 *x = strdup(p);
140 return result;
141}
142
143void set_obj(struct slabinfo *s, char *name, int n)
144{
145 char x[100];
146
147 sprintf(x, "%s/%s", s->name, name);
148
149 FILE *f = fopen(x, "w");
150
151 if (!f)
152 fatal("Cannot write to %s\n", x);
153
154 fprintf(f, "%d\n", n);
155 fclose(f);
156}
157
158/*
159 * Put a size string together
160 */
161int store_size(char *buffer, unsigned long value)
162{
163 unsigned long divisor = 1;
164 char trailer = 0;
165 int n;
166
167 if (value > 1000000000UL) {
168 divisor = 100000000UL;
169 trailer = 'G';
170 } else if (value > 1000000UL) {
171 divisor = 100000UL;
172 trailer = 'M';
173 } else if (value > 1000UL) {
174 divisor = 100;
175 trailer = 'K';
176 }
177
178 value /= divisor;
179 n = sprintf(buffer, "%ld",value);
180 if (trailer) {
181 buffer[n] = trailer;
182 n++;
183 buffer[n] = 0;
184 }
185 if (divisor != 1) {
186 memmove(buffer + n - 2, buffer + n - 3, 4);
187 buffer[n-2] = '.';
188 n++;
189 }
190 return n;
191}
192
193void decode_numa_list(int *numa, char *t)
194{
195 int node;
196 int nr;
197
198 memset(numa, 0, MAX_NODES * sizeof(int));
199
200 while (*t == 'N') {
201 t++;
202 node = strtoul(t, &t, 10);
203 if (*t == '=') {
204 t++;
205 nr = strtoul(t, &t, 10);
206 numa[node] = nr;
207 if (node > highest_node)
208 highest_node = node;
209 }
210 while (*t == ' ')
211 t++;
212 }
213}
214
215void slab_validate(struct slabinfo *s)
216{
217 set_obj(s, "validate", 1);
218}
219
220void slab_shrink(struct slabinfo *s)
221{
222 set_obj(s, "shrink", 1);
223}
224
225int line = 0;
226
227void first_line(void)
228{
229 printf("Name Objects Objsize Space "
230 "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n");
231}
232
233/*
234 * Find the shortest alias of a slab
235 */
236struct aliasinfo *find_one_alias(struct slabinfo *find)
237{
238 struct aliasinfo *a;
239 struct aliasinfo *best = NULL;
240
241 for(a = aliasinfo;a < aliasinfo + aliases; a++) {
242 if (a->slab == find &&
243 (!best || strlen(best->name) < strlen(a->name))) {
244 best = a;
245 if (strncmp(a->name,"kmall", 5) == 0)
246 return best;
247 }
248 }
249 if (best)
250 return best;
251 fatal("Cannot find alias for %s\n", find->name);
252 return NULL;
253}
254
255unsigned long slab_size(struct slabinfo *s)
256{
257 return s->slabs * (page_size << s->order);
258}
259
260
261void slabcache(struct slabinfo *s)
262{
263 char size_str[20];
264 char dist_str[40];
265 char flags[20];
266 char *p = flags;
267
268 if (skip_zero && !s->slabs)
269 return;
270
271 store_size(size_str, slab_size(s));
272 sprintf(dist_str,"%lu/%lu/%d", s->slabs, s->partial, s->cpu_slabs);
273
274 if (!line++)
275 first_line();
276
277 if (s->aliases)
278 *p++ = '*';
279 if (s->cache_dma)
280 *p++ = 'd';
281 if (s->hwcache_align)
282 *p++ = 'A';
283 if (s->poison)
284 *p++ = 'P';
285 if (s->reclaim_account)
286 *p++ = 'a';
287 if (s->red_zone)
288 *p++ = 'Z';
289 if (s->sanity_checks)
290 *p++ = 'F';
291 if (s->store_user)
292 *p++ = 'U';
293 if (s->trace)
294 *p++ = 'T';
295
296 *p = 0;
297 printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
298 s->name, s->objects, s->object_size, size_str, dist_str,
299 s->objs_per_slab, s->order,
300 s->slabs ? (s->partial * 100) / s->slabs : 100,
301 s->slabs ? (s->objects * s->object_size * 100) /
302 (s->slabs * (page_size << s->order)) : 100,
303 flags);
304}
305
306void slab_numa(struct slabinfo *s)
307{
308 int node;
309
310 if (!highest_node)
311 fatal("No NUMA information available.\n");
312
313 if (skip_zero && !s->slabs)
314 return;
315
316 if (!line) {
317 printf("\nSlab Node ");
318 for(node = 0; node <= highest_node; node++)
319 printf(" %4d", node);
320 printf("\n----------------------");
321 for(node = 0; node <= highest_node; node++)
322 printf("-----");
323 printf("\n");
324 }
325 printf("%-21s ", s->name);
326 for(node = 0; node <= highest_node; node++) {
327 char b[20];
328
329 store_size(b, s->numa[node]);
330 printf(" %4s", b);
331 }
332 printf("\n");
333 line++;
334}
335
336void show_tracking(struct slabinfo *s)
337{
338 printf("\n%s: Calls to allocate a slab object\n", s->name);
339 printf("---------------------------------------------------\n");
340 if (read_obj("alloc_calls"))
341 printf(buffer);
342
343 printf("%s: Calls to free a slab object\n", s->name);
344 printf("-----------------------------------------------\n");
345 if (read_obj("free_calls"))
346 printf(buffer);
347
348}
349
350void totals(void)
351{
352 struct slabinfo *s;
353
354 int used_slabs = 0;
355 char b1[20], b2[20], b3[20], b4[20];
356 unsigned long long max = 1ULL << 63;
357
358 /* Object size */
359 unsigned long long min_objsize = max, max_objsize = 0, avg_objsize;
360
361 /* Number of partial slabs in a slabcache */
362 unsigned long long min_partial = max, max_partial = 0,
363 avg_partial, total_partial = 0;
364
365 /* Number of slabs in a slab cache */
366 unsigned long long min_slabs = max, max_slabs = 0,
367 avg_slabs, total_slabs = 0;
368
369 /* Size of the whole slab */
370 unsigned long long min_size = max, max_size = 0,
371 avg_size, total_size = 0;
372
373 /* Bytes used for object storage in a slab */
374 unsigned long long min_used = max, max_used = 0,
375 avg_used, total_used = 0;
376
377 /* Waste: Bytes used for alignment and padding */
378 unsigned long long min_waste = max, max_waste = 0,
379 avg_waste, total_waste = 0;
380 /* Number of objects in a slab */
381 unsigned long long min_objects = max, max_objects = 0,
382 avg_objects, total_objects = 0;
383 /* Waste per object */
384 unsigned long long min_objwaste = max,
385 max_objwaste = 0, avg_objwaste,
386 total_objwaste = 0;
387
388 /* Memory per object */
389 unsigned long long min_memobj = max,
390 max_memobj = 0, avg_memobj,
391 total_objsize = 0;
392
393 /* Percentage of partial slabs per slab */
394 unsigned long min_ppart = 100, max_ppart = 0,
395 avg_ppart, total_ppart = 0;
396
397 /* Number of objects in partial slabs */
398 unsigned long min_partobj = max, max_partobj = 0,
399 avg_partobj, total_partobj = 0;
400
401 /* Percentage of partial objects of all objects in a slab */
402 unsigned long min_ppartobj = 100, max_ppartobj = 0,
403 avg_ppartobj, total_ppartobj = 0;
404
405
406 for (s = slabinfo; s < slabinfo + slabs; s++) {
407 unsigned long long size;
408 unsigned long used;
409 unsigned long long wasted;
410 unsigned long long objwaste;
411 long long objects_in_partial_slabs;
412 unsigned long percentage_partial_slabs;
413 unsigned long percentage_partial_objs;
414
415 if (!s->slabs || !s->objects)
416 continue;
417
418 used_slabs++;
419
420 size = slab_size(s);
421 used = s->objects * s->object_size;
422 wasted = size - used;
423 objwaste = s->slab_size - s->object_size;
424
425 objects_in_partial_slabs = s->objects -
426 (s->slabs - s->partial - s ->cpu_slabs) *
427 s->objs_per_slab;
428
429 if (objects_in_partial_slabs < 0)
430 objects_in_partial_slabs = 0;
431
432 percentage_partial_slabs = s->partial * 100 / s->slabs;
433 if (percentage_partial_slabs > 100)
434 percentage_partial_slabs = 100;
435
436 percentage_partial_objs = objects_in_partial_slabs * 100
437 / s->objects;
438
439 if (percentage_partial_objs > 100)
440 percentage_partial_objs = 100;
441
442 if (s->object_size < min_objsize)
443 min_objsize = s->object_size;
444 if (s->partial < min_partial)
445 min_partial = s->partial;
446 if (s->slabs < min_slabs)
447 min_slabs = s->slabs;
448 if (size < min_size)
449 min_size = size;
450 if (wasted < min_waste)
451 min_waste = wasted;
452 if (objwaste < min_objwaste)
453 min_objwaste = objwaste;
454 if (s->objects < min_objects)
455 min_objects = s->objects;
456 if (used < min_used)
457 min_used = used;
458 if (objects_in_partial_slabs < min_partobj)
459 min_partobj = objects_in_partial_slabs;
460 if (percentage_partial_slabs < min_ppart)
461 min_ppart = percentage_partial_slabs;
462 if (percentage_partial_objs < min_ppartobj)
463 min_ppartobj = percentage_partial_objs;
464 if (s->slab_size < min_memobj)
465 min_memobj = s->slab_size;
466
467 if (s->object_size > max_objsize)
468 max_objsize = s->object_size;
469 if (s->partial > max_partial)
470 max_partial = s->partial;
471 if (s->slabs > max_slabs)
472 max_slabs = s->slabs;
473 if (size > max_size)
474 max_size = size;
475 if (wasted > max_waste)
476 max_waste = wasted;
477 if (objwaste > max_objwaste)
478 max_objwaste = objwaste;
479 if (s->objects > max_objects)
480 max_objects = s->objects;
481 if (used > max_used)
482 max_used = used;
483 if (objects_in_partial_slabs > max_partobj)
484 max_partobj = objects_in_partial_slabs;
485 if (percentage_partial_slabs > max_ppart)
486 max_ppart = percentage_partial_slabs;
487 if (percentage_partial_objs > max_ppartobj)
488 max_ppartobj = percentage_partial_objs;
489 if (s->slab_size > max_memobj)
490 max_memobj = s->slab_size;
491
492 total_partial += s->partial;
493 total_slabs += s->slabs;
494 total_size += size;
495 total_waste += wasted;
496
497 total_objects += s->objects;
498 total_used += used;
499 total_partobj += objects_in_partial_slabs;
500 total_ppart += percentage_partial_slabs;
501 total_ppartobj += percentage_partial_objs;
502
503 total_objwaste += s->objects * objwaste;
504 total_objsize += s->objects * s->slab_size;
505 }
506
507 if (!total_objects) {
508 printf("No objects\n");
509 return;
510 }
511 if (!used_slabs) {
512 printf("No slabs\n");
513 return;
514 }
515
516 /* Per slab averages */
517 avg_partial = total_partial / used_slabs;
518 avg_slabs = total_slabs / used_slabs;
519 avg_size = total_size / used_slabs;
520 avg_waste = total_waste / used_slabs;
521
522 avg_objects = total_objects / used_slabs;
523 avg_used = total_used / used_slabs;
524 avg_partobj = total_partobj / used_slabs;
525 avg_ppart = total_ppart / used_slabs;
526 avg_ppartobj = total_ppartobj / used_slabs;
527
528 /* Per object object sizes */
529 avg_objsize = total_used / total_objects;
530 avg_objwaste = total_objwaste / total_objects;
531 avg_partobj = total_partobj * 100 / total_objects;
532 avg_memobj = total_objsize / total_objects;
533
534 printf("Slabcache Totals\n");
535 printf("----------------\n");
536 printf("Slabcaches : %3d Aliases : %3d->%-3d Active: %3d\n",
537 slabs, aliases, alias_targets, used_slabs);
538
539 store_size(b1, total_size);store_size(b2, total_waste);
540 store_size(b3, total_waste * 100 / total_used);
541 printf("Memory used: %6s # Loss : %6s MRatio: %6s%%\n", b1, b2, b3);
542
543 store_size(b1, total_objects);store_size(b2, total_partobj);
544 store_size(b3, total_partobj * 100 / total_objects);
545 printf("# Objects : %6s # PartObj: %6s ORatio: %6s%%\n", b1, b2, b3);
546
547 printf("\n");
548 printf("Per Cache Average Min Max Total\n");
549 printf("---------------------------------------------------------\n");
550
551 store_size(b1, avg_objects);store_size(b2, min_objects);
552 store_size(b3, max_objects);store_size(b4, total_objects);
553 printf("#Objects %10s %10s %10s %10s\n",
554 b1, b2, b3, b4);
555
556 store_size(b1, avg_slabs);store_size(b2, min_slabs);
557 store_size(b3, max_slabs);store_size(b4, total_slabs);
558 printf("#Slabs %10s %10s %10s %10s\n",
559 b1, b2, b3, b4);
560
561 store_size(b1, avg_partial);store_size(b2, min_partial);
562 store_size(b3, max_partial);store_size(b4, total_partial);
563 printf("#PartSlab %10s %10s %10s %10s\n",
564 b1, b2, b3, b4);
565 store_size(b1, avg_ppart);store_size(b2, min_ppart);
566 store_size(b3, max_ppart);
567 store_size(b4, total_partial * 100 / total_slabs);
568 printf("%%PartSlab %10s%% %10s%% %10s%% %10s%%\n",
569 b1, b2, b3, b4);
570
571 store_size(b1, avg_partobj);store_size(b2, min_partobj);
572 store_size(b3, max_partobj);
573 store_size(b4, total_partobj);
574 printf("PartObjs %10s %10s %10s %10s\n",
575 b1, b2, b3, b4);
576
577 store_size(b1, avg_ppartobj);store_size(b2, min_ppartobj);
578 store_size(b3, max_ppartobj);
579 store_size(b4, total_partobj * 100 / total_objects);
580 printf("%% PartObj %10s%% %10s%% %10s%% %10s%%\n",
581 b1, b2, b3, b4);
582
583 store_size(b1, avg_size);store_size(b2, min_size);
584 store_size(b3, max_size);store_size(b4, total_size);
585 printf("Memory %10s %10s %10s %10s\n",
586 b1, b2, b3, b4);
587
588 store_size(b1, avg_used);store_size(b2, min_used);
589 store_size(b3, max_used);store_size(b4, total_used);
590 printf("Used %10s %10s %10s %10s\n",
591 b1, b2, b3, b4);
592
593 store_size(b1, avg_waste);store_size(b2, min_waste);
594 store_size(b3, max_waste);store_size(b4, total_waste);
595 printf("Loss %10s %10s %10s %10s\n",
596 b1, b2, b3, b4);
597
598 printf("\n");
599 printf("Per Object Average Min Max\n");
600 printf("---------------------------------------------\n");
601
602 store_size(b1, avg_memobj);store_size(b2, min_memobj);
603 store_size(b3, max_memobj);
604 printf("Memory %10s %10s %10s\n",
605 b1, b2, b3);
606 store_size(b1, avg_objsize);store_size(b2, min_objsize);
607 store_size(b3, max_objsize);
608 printf("User %10s %10s %10s\n",
609 b1, b2, b3);
610
611 store_size(b1, avg_objwaste);store_size(b2, min_objwaste);
612 store_size(b3, max_objwaste);
613 printf("Loss %10s %10s %10s\n",
614 b1, b2, b3);
615}
616
617void sort_slabs(void)
618{
619 struct slabinfo *s1,*s2;
620
621 for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) {
622 for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) {
623 int result;
624
625 if (sort_size)
626 result = slab_size(s1) < slab_size(s2);
627 else
628 result = strcasecmp(s1->name, s2->name);
629
630 if (show_inverted)
631 result = -result;
632
633 if (result > 0) {
634 struct slabinfo t;
635
636 memcpy(&t, s1, sizeof(struct slabinfo));
637 memcpy(s1, s2, sizeof(struct slabinfo));
638 memcpy(s2, &t, sizeof(struct slabinfo));
639 }
640 }
641 }
642}
643
644void sort_aliases(void)
645{
646 struct aliasinfo *a1,*a2;
647
648 for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) {
649 for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) {
650 char *n1, *n2;
651
652 n1 = a1->name;
653 n2 = a2->name;
654 if (show_alias && !show_inverted) {
655 n1 = a1->ref;
656 n2 = a2->ref;
657 }
658 if (strcasecmp(n1, n2) > 0) {
659 struct aliasinfo t;
660
661 memcpy(&t, a1, sizeof(struct aliasinfo));
662 memcpy(a1, a2, sizeof(struct aliasinfo));
663 memcpy(a2, &t, sizeof(struct aliasinfo));
664 }
665 }
666 }
667}
668
669void link_slabs(void)
670{
671 struct aliasinfo *a;
672 struct slabinfo *s;
673
674 for (a = aliasinfo; a < aliasinfo + aliases; a++) {
675
676 for(s = slabinfo; s < slabinfo + slabs; s++)
677 if (strcmp(a->ref, s->name) == 0) {
678 a->slab = s;
679 s->refs++;
680 break;
681 }
682 if (s == slabinfo + slabs)
683 fatal("Unresolved alias %s\n", a->ref);
684 }
685}
686
687void alias(void)
688{
689 struct aliasinfo *a;
690 char *active = NULL;
691
692 sort_aliases();
693 link_slabs();
694
695 for(a = aliasinfo; a < aliasinfo + aliases; a++) {
696
697 if (!show_single_ref && a->slab->refs == 1)
698 continue;
699
700 if (!show_inverted) {
701 if (active) {
702 if (strcmp(a->slab->name, active) == 0) {
703 printf(" %s", a->name);
704 continue;
705 }
706 }
707 printf("\n%-20s <- %s", a->slab->name, a->name);
708 active = a->slab->name;
709 }
710 else
711 printf("%-20s -> %s\n", a->name, a->slab->name);
712 }
713 if (active)
714 printf("\n");
715}
716
717
718void rename_slabs(void)
719{
720 struct slabinfo *s;
721 struct aliasinfo *a;
722
723 for (s = slabinfo; s < slabinfo + slabs; s++) {
724 if (*s->name != ':')
725 continue;
726
727 if (s->refs > 1 && !show_first_alias)
728 continue;
729
730 a = find_one_alias(s);
731
732 s->name = a->name;
733 }
734}
735
736int slab_mismatch(char *slab)
737{
738 return regexec(&pattern, slab, 0, NULL, 0);
739}
740
741void read_slab_dir(void)
742{
743 DIR *dir;
744 struct dirent *de;
745 struct slabinfo *slab = slabinfo;
746 struct aliasinfo *alias = aliasinfo;
747 char *p;
748 char *t;
749 int count;
750
751 dir = opendir(".");
752 while ((de = readdir(dir))) {
753 if (de->d_name[0] == '.' ||
754 slab_mismatch(de->d_name))
755 continue;
756 switch (de->d_type) {
757 case DT_LNK:
758 alias->name = strdup(de->d_name);
759 count = readlink(de->d_name, buffer, sizeof(buffer));
760
761 if (count < 0)
762 fatal("Cannot read symlink %s\n", de->d_name);
763
764 buffer[count] = 0;
765 p = buffer + count;
766 while (p > buffer && p[-1] != '/')
767 p--;
768 alias->ref = strdup(p);
769 alias++;
770 break;
771 case DT_DIR:
772 if (chdir(de->d_name))
773 fatal("Unable to access slab %s\n", slab->name);
774 slab->name = strdup(de->d_name);
775 slab->alias = 0;
776 slab->refs = 0;
777 slab->aliases = get_obj("aliases");
778 slab->align = get_obj("align");
779 slab->cache_dma = get_obj("cache_dma");
780 slab->cpu_slabs = get_obj("cpu_slabs");
781 slab->destroy_by_rcu = get_obj("destroy_by_rcu");
782 slab->hwcache_align = get_obj("hwcache_align");
783 slab->object_size = get_obj("object_size");
784 slab->objects = get_obj("objects");
785 slab->objs_per_slab = get_obj("objs_per_slab");
786 slab->order = get_obj("order");
787 slab->partial = get_obj("partial");
788 slab->partial = get_obj_and_str("partial", &t);
789 decode_numa_list(slab->numa_partial, t);
790 slab->poison = get_obj("poison");
791 slab->reclaim_account = get_obj("reclaim_account");
792 slab->red_zone = get_obj("red_zone");
793 slab->sanity_checks = get_obj("sanity_checks");
794 slab->slab_size = get_obj("slab_size");
795 slab->slabs = get_obj_and_str("slabs", &t);
796 decode_numa_list(slab->numa, t);
797 slab->store_user = get_obj("store_user");
798 slab->trace = get_obj("trace");
799 chdir("..");
800 if (slab->name[0] == ':')
801 alias_targets++;
802 slab++;
803 break;
804 default :
805 fatal("Unknown file type %lx\n", de->d_type);
806 }
807 }
808 closedir(dir);
809 slabs = slab - slabinfo;
810 aliases = alias - aliasinfo;
811 if (slabs > MAX_SLABS)
812 fatal("Too many slabs\n");
813 if (aliases > MAX_ALIASES)
814 fatal("Too many aliases\n");
815}
816
817void output_slabs(void)
818{
819 struct slabinfo *slab;
820
821 for (slab = slabinfo; slab < slabinfo + slabs; slab++) {
822
823 if (slab->alias)
824 continue;
825
826
827 if (show_numa)
828 slab_numa(slab);
829 else
830 if (show_track)
831 show_tracking(slab);
832 else
833 if (validate)
834 slab_validate(slab);
835 else
836 if (shrink)
837 slab_shrink(slab);
838 else {
839 if (show_slab)
840 slabcache(slab);
841 }
842 }
843}
844
845struct option opts[] = {
846 { "aliases", 0, NULL, 'a' },
847 { "slabs", 0, NULL, 'l' },
848 { "numa", 0, NULL, 'n' },
849 { "zero", 0, NULL, 'z' },
850 { "help", 0, NULL, 'h' },
851 { "validate", 0, NULL, 'v' },
852 { "first-alias", 0, NULL, 'f' },
853 { "shrink", 0, NULL, 's' },
854 { "track", 0, NULL, 't'},
855 { "inverted", 0, NULL, 'i'},
856 { "1ref", 0, NULL, '1'},
857 { NULL, 0, NULL, 0 }
858};
859
860int main(int argc, char *argv[])
861{
862 int c;
863 int err;
864 char *pattern_source;
865
866 page_size = getpagesize();
867 if (chdir("/sys/slab"))
868 fatal("This kernel does not have SLUB support.\n");
869
870 while ((c = getopt_long(argc, argv, "afhil1npstvzTS", opts, NULL)) != -1)
871 switch(c) {
872 case '1':
873 show_single_ref = 1;
874 break;
875 case 'a':
876 show_alias = 1;
877 break;
878 case 'f':
879 show_first_alias = 1;
880 break;
881 case 'h':
882 usage();
883 return 0;
884 case 'i':
885 show_inverted = 1;
886 break;
887 case 'n':
888 show_numa = 1;
889 break;
890 case 's':
891 shrink = 1;
892 break;
893 case 'l':
894 show_slab = 1;
895 break;
896 case 't':
897 show_track = 1;
898 break;
899 case 'v':
900 validate = 1;
901 break;
902 case 'z':
903 skip_zero = 0;
904 break;
905 case 'T':
906 show_totals = 1;
907 break;
908 case 'S':
909 sort_size = 1;
910 break;
911
912 default:
913 fatal("%s: Invalid option '%c'\n", argv[0], optopt);
914
915 }
916
917 if (!show_slab && !show_alias && !show_track
918 && !validate && !shrink)
919 show_slab = 1;
920
921 if (argc > optind)
922 pattern_source = argv[optind];
923 else
924 pattern_source = ".*";
925
926 err = regcomp(&pattern, pattern_source, REG_ICASE|REG_NOSUB);
927 if (err)
928 fatal("%s: Invalid pattern '%s' code %d\n",
929 argv[0], pattern_source, err);
930 read_slab_dir();
931 if (show_alias)
932 alias();
933 else
934 if (show_totals)
935 totals();
936 else {
937 link_slabs();
938 rename_slabs();
939 sort_slabs();
940 output_slabs();
941 }
942 return 0;
943}
diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt
new file mode 100644
index 000000000000..727c8d81aeaf
--- /dev/null
+++ b/Documentation/vm/slub.txt
@@ -0,0 +1,113 @@
1Short users guide for SLUB
2--------------------------
3
4First of all slub should transparently replace SLAB. If you enable
5SLUB then everything should work the same (Note the word "should".
6There is likely not much value in that word at this point).
7
8The basic philosophy of SLUB is very different from SLAB. SLAB
9requires rebuilding the kernel to activate debug options for all
10SLABS. SLUB always includes full debugging but its off by default.
11SLUB can enable debugging only for selected slabs in order to avoid
12an impact on overall system performance which may make a bug more
13difficult to find.
14
15In order to switch debugging on one can add a option "slub_debug"
16to the kernel command line. That will enable full debugging for
17all slabs.
18
19Typically one would then use the "slabinfo" command to get statistical
20data and perform operation on the slabs. By default slabinfo only lists
21slabs that have data in them. See "slabinfo -h" for more options when
22running the command. slabinfo can be compiled with
23
24gcc -o slabinfo Documentation/vm/slabinfo.c
25
26Some of the modes of operation of slabinfo require that slub debugging
27be enabled on the command line. F.e. no tracking information will be
28available without debugging on and validation can only partially
29be performed if debugging was not switched on.
30
31Some more sophisticated uses of slub_debug:
32-------------------------------------------
33
34Parameters may be given to slub_debug. If none is specified then full
35debugging is enabled. Format:
36
37slub_debug=<Debug-Options> Enable options for all slabs
38slub_debug=<Debug-Options>,<slab name>
39 Enable options only for select slabs
40
41Possible debug options are
42 F Sanity checks on (enables SLAB_DEBUG_FREE. Sorry
43 SLAB legacy issues)
44 Z Red zoning
45 P Poisoning (object and padding)
46 U User tracking (free and alloc)
47 T Trace (please only use on single slabs)
48
49F.e. in order to boot just with sanity checks and red zoning one would specify:
50
51 slub_debug=FZ
52
53Trying to find an issue in the dentry cache? Try
54
55 slub_debug=,dentry_cache
56
57to only enable debugging on the dentry cache.
58
59Red zoning and tracking may realign the slab. We can just apply sanity checks
60to the dentry cache with
61
62 slub_debug=F,dentry_cache
63
64In case you forgot to enable debugging on the kernel command line: It is
65possible to enable debugging manually when the kernel is up. Look at the
66contents of:
67
68/sys/slab/<slab name>/
69
70Look at the writable files. Writing 1 to them will enable the
71corresponding debug option. All options can be set on a slab that does
72not contain objects. If the slab already contains objects then sanity checks
73and tracing may only be enabled. The other options may cause the realignment
74of objects.
75
76Careful with tracing: It may spew out lots of information and never stop if
77used on the wrong slab.
78
79SLAB Merging
80------------
81
82If no debugging is specified then SLUB may merge similar slabs together
83in order to reduce overhead and increase cache hotness of objects.
84slabinfo -a displays which slabs were merged together.
85
86Getting more performance
87------------------------
88
89To some degree SLUB's performance is limited by the need to take the
90list_lock once in a while to deal with partial slabs. That overhead is
91governed by the order of the allocation for each slab. The allocations
92can be influenced by kernel parameters:
93
94slub_min_objects=x (default 8)
95slub_min_order=x (default 0)
96slub_max_order=x (default 4)
97
98slub_min_objects allows to specify how many objects must at least fit
99into one slab in order for the allocation order to be acceptable.
100In general slub will be able to perform this number of allocations
101on a slab without consulting centralized resources (list_lock) where
102contention may occur.
103
104slub_min_order specifies a minim order of slabs. A similar effect like
105slub_min_objects.
106
107slub_max_order specified the order at which slub_min_objects should no
108longer be checked. This is useful to avoid SLUB trying to generate
109super large order pages to fit slub_min_objects of a slab cache with
110large object sizes into one high order page.
111
112
113Christoph Lameter, <clameter@sgi.com>, April 10, 2007
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index 85f51e5a749f..6177d881983f 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -149,7 +149,19 @@ NUMA
149 149
150 numa=noacpi Don't parse the SRAT table for NUMA setup 150 numa=noacpi Don't parse the SRAT table for NUMA setup
151 151
152 numa=fake=X Fake X nodes and ignore NUMA setup of the actual machine. 152 numa=fake=CMDLINE
153 If a number, fakes CMDLINE nodes and ignores NUMA setup of the
154 actual machine. Otherwise, system memory is configured
155 depending on the sizes and coefficients listed. For example:
156 numa=fake=2*512,1024,4*256,*128
157 gives two 512M nodes, a 1024M node, four 256M nodes, and the
158 rest split into 128M chunks. If the last character of CMDLINE
159 is a *, the remaining memory is divided up equally among its
160 coefficient:
161 numa=fake=2*512,2*
162 gives two 512M nodes and the rest split into two nodes.
163 Otherwise, the remaining system RAM is allocated to an
164 additional node.
153 165
154 numa=hotadd=percent 166 numa=hotadd=percent
155 Only allow hotadd memory to preallocate page structures upto 167 Only allow hotadd memory to preallocate page structures upto
diff --git a/Documentation/x86_64/fake-numa-for-cpusets b/Documentation/x86_64/fake-numa-for-cpusets
new file mode 100644
index 000000000000..d1a985c5b00a
--- /dev/null
+++ b/Documentation/x86_64/fake-numa-for-cpusets
@@ -0,0 +1,66 @@
1Using numa=fake and CPUSets for Resource Management
2Written by David Rientjes <rientjes@cs.washington.edu>
3
4This document describes how the numa=fake x86_64 command-line option can be used
5in conjunction with cpusets for coarse memory management. Using this feature,
6you can create fake NUMA nodes that represent contiguous chunks of memory and
7assign them to cpusets and their attached tasks. This is a way of limiting the
8amount of system memory that are available to a certain class of tasks.
9
10For more information on the features of cpusets, see Documentation/cpusets.txt.
11There are a number of different configurations you can use for your needs. For
12more information on the numa=fake command line option and its various ways of
13configuring fake nodes, see Documentation/x86_64/boot-options.txt.
14
15For the purposes of this introduction, we'll assume a very primitive NUMA
16emulation setup of "numa=fake=4*512,". This will split our system memory into
17four equal chunks of 512M each that we can now use to assign to cpusets. As
18you become more familiar with using this combination for resource control,
19you'll determine a better setup to minimize the number of nodes you have to deal
20with.
21
22A machine may be split as follows with "numa=fake=4*512," as reported by dmesg:
23
24 Faking node 0 at 0000000000000000-0000000020000000 (512MB)
25 Faking node 1 at 0000000020000000-0000000040000000 (512MB)
26 Faking node 2 at 0000000040000000-0000000060000000 (512MB)
27 Faking node 3 at 0000000060000000-0000000080000000 (512MB)
28 ...
29 On node 0 totalpages: 130975
30 On node 1 totalpages: 131072
31 On node 2 totalpages: 131072
32 On node 3 totalpages: 131072
33
34Now following the instructions for mounting the cpusets filesystem from
35Documentation/cpusets.txt, you can assign fake nodes (i.e. contiguous memory
36address spaces) to individual cpusets:
37
38 [root@xroads /]# mkdir exampleset
39 [root@xroads /]# mount -t cpuset none exampleset
40 [root@xroads /]# mkdir exampleset/ddset
41 [root@xroads /]# cd exampleset/ddset
42 [root@xroads /exampleset/ddset]# echo 0-1 > cpus
43 [root@xroads /exampleset/ddset]# echo 0-1 > mems
44
45Now this cpuset, 'ddset', will only allowed access to fake nodes 0 and 1 for
46memory allocations (1G).
47
48You can now assign tasks to these cpusets to limit the memory resources
49available to them according to the fake nodes assigned as mems:
50
51 [root@xroads /exampleset/ddset]# echo $$ > tasks
52 [root@xroads /exampleset/ddset]# dd if=/dev/zero of=tmp bs=1024 count=1G
53 [1] 13425
54
55Notice the difference between the system memory usage as reported by
56/proc/meminfo between the restricted cpuset case above and the unrestricted
57case (i.e. running the same 'dd' command without assigning it to a fake NUMA
58cpuset):
59 Unrestricted Restricted
60 MemTotal: 3091900 kB 3091900 kB
61 MemFree: 42113 kB 1513236 kB
62
63This allows for coarse memory management for the tasks you assign to particular
64cpusets. Since cpusets can form a hierarchy, you can create some pretty
65interesting combinations of use-cases for various classes of tasks for your
66memory management needs.
diff --git a/Documentation/x86_64/machinecheck b/Documentation/x86_64/machinecheck
index 068a6d9904b9..feaeaf6f6e4d 100644
--- a/Documentation/x86_64/machinecheck
+++ b/Documentation/x86_64/machinecheck
@@ -36,7 +36,12 @@ between all CPUs.
36 36
37check_interval 37check_interval
38 How often to poll for corrected machine check errors, in seconds 38 How often to poll for corrected machine check errors, in seconds
39 (Note output is hexademical). Default 5 minutes. 39 (Note output is hexademical). Default 5 minutes. When the poller
40 finds MCEs it triggers an exponential speedup (poll more often) on
41 the polling interval. When the poller stops finding MCEs, it
42 triggers an exponential backoff (poll less often) on the polling
43 interval. The check_interval variable is both the initial and
44 maximum polling interval.
40 45
41tolerant 46tolerant
42 Tolerance level. When a machine check exception occurs for a non 47 Tolerance level. When a machine check exception occurs for a non