aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/DMA-mapping.txt8
-rw-r--r--Documentation/DocBook/kernel-api.tmpl9
-rw-r--r--Documentation/RCU/whatisRCU.txt5
-rw-r--r--Documentation/SubmitChecklist76
-rw-r--r--Documentation/SubmittingPatches12
-rw-r--r--Documentation/accounting/delay-accounting.txt112
-rw-r--r--Documentation/accounting/getdelays.c396
-rw-r--r--Documentation/accounting/taskstats.txt181
-rw-r--r--Documentation/cciss.txt1
-rw-r--r--Documentation/connector/ucon.c206
-rw-r--r--Documentation/cpu-freq/user-guide.txt5
-rw-r--r--Documentation/cpu-hotplug.txt12
-rw-r--r--Documentation/cpusets.txt6
-rw-r--r--Documentation/devices.txt8
-rw-r--r--Documentation/drivers/edac/edac.txt152
-rw-r--r--Documentation/fb/imacfb.txt31
-rw-r--r--Documentation/feature-removal-schedule.txt66
-rw-r--r--Documentation/filesystems/00-INDEX4
-rw-r--r--Documentation/filesystems/Locking4
-rw-r--r--Documentation/filesystems/relay.txt479
-rw-r--r--Documentation/filesystems/relayfs.txt442
-rw-r--r--Documentation/filesystems/vfs.txt4
-rw-r--r--Documentation/hwmon/abituguru32
-rw-r--r--Documentation/i2c/busses/i2c-sis96x4
-rw-r--r--Documentation/i386/boot.txt1
-rw-r--r--Documentation/i386/zero-page.txt4
-rw-r--r--Documentation/infiniband/ipoib.txt2
-rw-r--r--Documentation/initrd.txt16
-rw-r--r--Documentation/input/joystick.txt1
-rw-r--r--Documentation/kbuild/makefiles.txt14
-rw-r--r--Documentation/kernel-parameters.txt10
-rw-r--r--Documentation/kobject.txt2
-rw-r--r--Documentation/memory-barriers.txt5
-rw-r--r--Documentation/mips/time.README10
-rw-r--r--Documentation/networking/ip-sysctl.txt18
-rw-r--r--Documentation/nfsroot.txt275
-rw-r--r--Documentation/powerpc/booting-without-of.txt16
-rw-r--r--Documentation/ramdisk.txt12
-rw-r--r--Documentation/scsi/ChangeLog.megaraid123
-rw-r--r--Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl4
-rw-r--r--Documentation/sysctl/fs.txt20
-rw-r--r--Documentation/sysctl/kernel.txt25
-rw-r--r--Documentation/usb/proc_usb_info.txt2
-rw-r--r--Documentation/usb/usb-help.txt3
-rw-r--r--Documentation/usb/usb-serial.txt4
-rw-r--r--Documentation/x86_64/boot-options.txt7
46 files changed, 1997 insertions, 832 deletions
diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt
index 7c717699032c..63392c9132b4 100644
--- a/Documentation/DMA-mapping.txt
+++ b/Documentation/DMA-mapping.txt
@@ -698,12 +698,12 @@ these interfaces. Remember that, as defined, consistent mappings are
698always going to be SAC addressable. 698always going to be SAC addressable.
699 699
700The first thing your driver needs to do is query the PCI platform 700The first thing your driver needs to do is query the PCI platform
701layer with your devices DAC addressing capabilities: 701layer if it is capable of handling your devices DAC addressing
702capabilities:
702 703
703 int pci_dac_set_dma_mask(struct pci_dev *pdev, u64 mask); 704 int pci_dac_dma_supported(struct pci_dev *hwdev, u64 mask);
704 705
705This routine behaves identically to pci_set_dma_mask. You may not 706You may not use the following interfaces if this routine fails.
706use the following interfaces if this routine fails.
707 707
708Next, DMA addresses using this API are kept track of using the 708Next, DMA addresses using this API are kept track of using the
709dma64_addr_t type. It is guaranteed to be big enough to hold any 709dma64_addr_t type. It is guaranteed to be big enough to hold any
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 1ae4dc0fd856..f8fe882e33dc 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -59,6 +59,9 @@
59!Iinclude/linux/hrtimer.h 59!Iinclude/linux/hrtimer.h
60!Ekernel/hrtimer.c 60!Ekernel/hrtimer.c
61 </sect1> 61 </sect1>
62 <sect1><title>Workqueues and Kevents</title>
63!Ekernel/workqueue.c
64 </sect1>
62 <sect1><title>Internal Functions</title> 65 <sect1><title>Internal Functions</title>
63!Ikernel/exit.c 66!Ikernel/exit.c
64!Ikernel/signal.c 67!Ikernel/signal.c
@@ -300,7 +303,7 @@ X!Ekernel/module.c
300 </sect1> 303 </sect1>
301 304
302 <sect1><title>Resources Management</title> 305 <sect1><title>Resources Management</title>
303!Ekernel/resource.c 306!Ikernel/resource.c
304 </sect1> 307 </sect1>
305 308
306 <sect1><title>MTRR Handling</title> 309 <sect1><title>MTRR Handling</title>
@@ -312,9 +315,7 @@ X!Ekernel/module.c
312!Edrivers/pci/pci-driver.c 315!Edrivers/pci/pci-driver.c
313!Edrivers/pci/remove.c 316!Edrivers/pci/remove.c
314!Edrivers/pci/pci-acpi.c 317!Edrivers/pci/pci-acpi.c
315<!-- kerneldoc does not understand __devinit 318!Edrivers/pci/search.c
316X!Edrivers/pci/search.c
317 -->
318!Edrivers/pci/msi.c 319!Edrivers/pci/msi.c
319!Edrivers/pci/bus.c 320!Edrivers/pci/bus.c
320<!-- FIXME: Removed for now since no structured comments in source 321<!-- FIXME: Removed for now since no structured comments in source
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 4f41a60e5111..318df44259b3 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -687,8 +687,9 @@ diff shows how closely related RCU and reader-writer locking can be.
687 + spin_lock(&listmutex); 687 + spin_lock(&listmutex);
688 list_for_each_entry(p, head, lp) { 688 list_for_each_entry(p, head, lp) {
689 if (p->key == key) { 689 if (p->key == key) {
690 list_del(&p->list); 690 - list_del(&p->list);
691 - write_unlock(&listmutex); 691 - write_unlock(&listmutex);
692 + list_del_rcu(&p->list);
692 + spin_unlock(&listmutex); 693 + spin_unlock(&listmutex);
693 + synchronize_rcu(); 694 + synchronize_rcu();
694 kfree(p); 695 kfree(p);
@@ -736,7 +737,7 @@ Or, for those who prefer a side-by-side listing:
736 5 write_lock(&listmutex); 5 spin_lock(&listmutex); 737 5 write_lock(&listmutex); 5 spin_lock(&listmutex);
737 6 list_for_each_entry(p, head, lp) { 6 list_for_each_entry(p, head, lp) { 738 6 list_for_each_entry(p, head, lp) { 6 list_for_each_entry(p, head, lp) {
738 7 if (p->key == key) { 7 if (p->key == key) { 739 7 if (p->key == key) { 7 if (p->key == key) {
739 8 list_del(&p->list); 8 list_del(&p->list); 740 8 list_del(&p->list); 8 list_del_rcu(&p->list);
740 9 write_unlock(&listmutex); 9 spin_unlock(&listmutex); 741 9 write_unlock(&listmutex); 9 spin_unlock(&listmutex);
741 10 synchronize_rcu(); 742 10 synchronize_rcu();
74210 kfree(p); 11 kfree(p); 74310 kfree(p); 11 kfree(p);
diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist
index 8230098da529..a10bfb6ecd9f 100644
--- a/Documentation/SubmitChecklist
+++ b/Documentation/SubmitChecklist
@@ -1,57 +1,63 @@
1Linux Kernel patch sumbittal checklist 1Linux Kernel patch sumbittal checklist
2~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3 3
4Here are some basic things that developers should do if they 4Here are some basic things that developers should do if they want to see their
5want to see their kernel patch submittals accepted quicker. 5kernel patch submissions accepted more quickly.
6 6
7These are all above and beyond the documentation that is provided 7These are all above and beyond the documentation that is provided in
8in Documentation/SubmittingPatches and elsewhere about submitting 8Documentation/SubmittingPatches and elsewhere regarding submitting Linux
9Linux kernel patches. 9kernel patches.
10 10
11 11
12 12
13- Builds cleanly with applicable or modified CONFIG options =y, =m, and =n. 131: Builds cleanly with applicable or modified CONFIG options =y, =m, and
14 No gcc warnings/errors, no linker warnings/errors. 14 =n. No gcc warnings/errors, no linker warnings/errors.
15 15
16- Passes allnoconfig, allmodconfig 162: Passes allnoconfig, allmodconfig
17 17
18- Builds on multiple CPU arch-es by using local cross-compile tools 183: Builds on multiple CPU architectures by using local cross-compile tools
19 or something like PLM at OSDL. 19 or something like PLM at OSDL.
20 20
21- ppc64 is a good architecture for cross-compilation checking because it 214: ppc64 is a good architecture for cross-compilation checking because it
22 tends to use `unsigned long' for 64-bit quantities. 22 tends to use `unsigned long' for 64-bit quantities.
23 23
24- Matches kernel coding style(!) 245: Matches kernel coding style(!)
25 25
26- Any new or modified CONFIG options don't muck up the config menu. 266: Any new or modified CONFIG options don't muck up the config menu.
27 27
28- All new Kconfig options have help text. 287: All new Kconfig options have help text.
29 29
30- Has been carefully reviewed with respect to relevant Kconfig 308: Has been carefully reviewed with respect to relevant Kconfig
31 combinations. This is very hard to get right with testing -- 31 combinations. This is very hard to get right with testing -- brainpower
32 brainpower pays off here. 32 pays off here.
33 33
34- Check cleanly with sparse. 349: Check cleanly with sparse.
35 35
36- Use 'make checkstack' and 'make namespacecheck' and fix any 3610: Use 'make checkstack' and 'make namespacecheck' and fix any problems
37 problems that they find. Note: checkstack does not point out 37 that they find. Note: checkstack does not point out problems explicitly,
38 problems explicitly, but any one function that uses more than 38 but any one function that uses more than 512 bytes on the stack is a
39 512 bytes on the stack is a candidate for change. 39 candidate for change.
40 40
41- Include kernel-doc to document global kernel APIs. (Not required 4111: Include kernel-doc to document global kernel APIs. (Not required for
42 for static functions, but OK there also.) Use 'make htmldocs' 42 static functions, but OK there also.) Use 'make htmldocs' or 'make
43 or 'make mandocs' to check the kernel-doc and fix any issues. 43 mandocs' to check the kernel-doc and fix any issues.
44 44
45- Has been tested with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, 4512: Has been tested with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT,
46 CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, 46 CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES,
47 CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_SPINLOCK_SLEEP all simultaneously 47 CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_SPINLOCK_SLEEP all simultaneously
48 enabled. 48 enabled.
49 49
50- Has been build- and runtime tested with and without CONFIG_SMP and 5013: Has been build- and runtime tested with and without CONFIG_SMP and
51 CONFIG_PREEMPT. 51 CONFIG_PREEMPT.
52 52
53- If the patch affects IO/Disk, etc: has been tested with and without 5314: If the patch affects IO/Disk, etc: has been tested with and without
54 CONFIG_LBD. 54 CONFIG_LBD.
55 55
5615: All codepaths have been exercised with all lockdep features enabled.
56 57
572006-APR-27 5816: All new /proc entries are documented under Documentation/
59
6017: All new kernel boot parameters are documented in
61 Documentation/kernel-parameters.txt.
62
6318: All new module parameters are documented with MODULE_PARM_DESC()
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index c2c85bcb3d43..d42ab4c9e893 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -10,7 +10,9 @@ kernel, the process can sometimes be daunting if you're not familiar
10with "the system." This text is a collection of suggestions which 10with "the system." This text is a collection of suggestions which
11can greatly increase the chances of your change being accepted. 11can greatly increase the chances of your change being accepted.
12 12
13If you are submitting a driver, also read Documentation/SubmittingDrivers. 13Read Documentation/SubmitChecklist for a list of items to check
14before submitting code. If you are submitting a driver, also read
15Documentation/SubmittingDrivers.
14 16
15 17
16 18
@@ -74,9 +76,6 @@ There are a number of scripts which can aid in this:
74Quilt: 76Quilt:
75http://savannah.nongnu.org/projects/quilt 77http://savannah.nongnu.org/projects/quilt
76 78
77Randy Dunlap's patch scripts:
78http://www.xenotime.net/linux/scripts/patching-scripts-002.tar.gz
79
80Andrew Morton's patch scripts: 79Andrew Morton's patch scripts:
81http://www.zip.com.au/~akpm/linux/patches/ 80http://www.zip.com.au/~akpm/linux/patches/
82Instead of these scripts, quilt is the recommended patch management 81Instead of these scripts, quilt is the recommended patch management
@@ -309,6 +308,8 @@ then you just add a line saying
309 308
310 Signed-off-by: Random J Developer <random@developer.example.org> 309 Signed-off-by: Random J Developer <random@developer.example.org>
311 310
311using your real name (sorry, no pseudonyms or anonymous contributions.)
312
312Some people also put extra tags at the end. They'll just be ignored for 313Some people also put extra tags at the end. They'll just be ignored for
313now, but you can do this to mark internal company procedures or just 314now, but you can do this to mark internal company procedures or just
314point out some special detail about the sign-off. 315point out some special detail about the sign-off.
@@ -484,7 +485,7 @@ Greg Kroah-Hartman "How to piss off a kernel subsystem maintainer".
484 <http://www.kroah.com/log/2005/10/19/> 485 <http://www.kroah.com/log/2005/10/19/>
485 <http://www.kroah.com/log/2006/01/11/> 486 <http://www.kroah.com/log/2006/01/11/>
486 487
487NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!. 488NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
488 <http://marc.theaimsgroup.com/?l=linux-kernel&m=112112749912944&w=2> 489 <http://marc.theaimsgroup.com/?l=linux-kernel&m=112112749912944&w=2>
489 490
490Kernel Documentation/CodingStyle 491Kernel Documentation/CodingStyle
@@ -493,4 +494,3 @@ Kernel Documentation/CodingStyle
493Linus Torvald's mail on the canonical patch format: 494Linus Torvald's mail on the canonical patch format:
494 <http://lkml.org/lkml/2005/4/7/183> 495 <http://lkml.org/lkml/2005/4/7/183>
495-- 496--
496Last updated on 17 Nov 2005.
diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt
new file mode 100644
index 000000000000..1443cd71d263
--- /dev/null
+++ b/Documentation/accounting/delay-accounting.txt
@@ -0,0 +1,112 @@
1Delay accounting
2----------------
3
4Tasks encounter delays in execution when they wait
5for some kernel resource to become available e.g. a
6runnable task may wait for a free CPU to run on.
7
8The per-task delay accounting functionality measures
9the delays experienced by a task while
10
11a) waiting for a CPU (while being runnable)
12b) completion of synchronous block I/O initiated by the task
13c) swapping in pages
14
15and makes these statistics available to userspace through
16the taskstats interface.
17
18Such delays provide feedback for setting a task's cpu priority,
19io priority and rss limit values appropriately. Long delays for
20important tasks could be a trigger for raising its corresponding priority.
21
22The functionality, through its use of the taskstats interface, also provides
23delay statistics aggregated for all tasks (or threads) belonging to a
24thread group (corresponding to a traditional Unix process). This is a commonly
25needed aggregation that is more efficiently done by the kernel.
26
27Userspace utilities, particularly resource management applications, can also
28aggregate delay statistics into arbitrary groups. To enable this, delay
29statistics of a task are available both during its lifetime as well as on its
30exit, ensuring continuous and complete monitoring can be done.
31
32
33Interface
34---------
35
36Delay accounting uses the taskstats interface which is described
37in detail in a separate document in this directory. Taskstats returns a
38generic data structure to userspace corresponding to per-pid and per-tgid
39statistics. The delay accounting functionality populates specific fields of
40this structure. See
41 include/linux/taskstats.h
42for a description of the fields pertaining to delay accounting.
43It will generally be in the form of counters returning the cumulative
44delay seen for cpu, sync block I/O, swapin etc.
45
46Taking the difference of two successive readings of a given
47counter (say cpu_delay_total) for a task will give the delay
48experienced by the task waiting for the corresponding resource
49in that interval.
50
51When a task exits, records containing the per-task statistics
52are sent to userspace without requiring a command. If it is the last exiting
53task of a thread group, the per-tgid statistics are also sent. More details
54are given in the taskstats interface description.
55
56The getdelays.c userspace utility in this directory allows simple commands to
57be run and the corresponding delay statistics to be displayed. It also serves
58as an example of using the taskstats interface.
59
60Usage
61-----
62
63Compile the kernel with
64 CONFIG_TASK_DELAY_ACCT=y
65 CONFIG_TASKSTATS=y
66
67Delay accounting is enabled by default at boot up.
68To disable, add
69 nodelayacct
70to the kernel boot options. The rest of the instructions
71below assume this has not been done.
72
73After the system has booted up, use a utility
74similar to getdelays.c to access the delays
75seen by a given task or a task group (tgid).
76The utility also allows a given command to be
77executed and the corresponding delays to be
78seen.
79
80General format of the getdelays command
81
82getdelays [-t tgid] [-p pid] [-c cmd...]
83
84
85Get delays, since system boot, for pid 10
86# ./getdelays -p 10
87(output similar to next case)
88
89Get sum of delays, since system boot, for all pids with tgid 5
90# ./getdelays -t 5
91
92
93CPU count real total virtual total delay total
94 7876 92005750 100000000 24001500
95IO count delay total
96 0 0
97MEM count delay total
98 0 0
99
100Get delays seen in executing a given simple command
101# ./getdelays -c ls /
102
103bin data1 data3 data5 dev home media opt root srv sys usr
104boot data2 data4 data6 etc lib mnt proc sbin subdomain tmp var
105
106
107CPU count real total virtual total delay total
108 6 4000250 4000000 0
109IO count delay total
110 0 0
111MEM count delay total
112 0 0
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
new file mode 100644
index 000000000000..795ca3911cc5
--- /dev/null
+++ b/Documentation/accounting/getdelays.c
@@ -0,0 +1,396 @@
1/* getdelays.c
2 *
3 * Utility to get per-pid and per-tgid delay accounting statistics
4 * Also illustrates usage of the taskstats interface
5 *
6 * Copyright (C) Shailabh Nagar, IBM Corp. 2005
7 * Copyright (C) Balbir Singh, IBM Corp. 2006
8 * Copyright (c) Jay Lan, SGI. 2006
9 *
10 */
11
12#include <stdio.h>
13#include <stdlib.h>
14#include <errno.h>
15#include <unistd.h>
16#include <poll.h>
17#include <string.h>
18#include <fcntl.h>
19#include <sys/types.h>
20#include <sys/stat.h>
21#include <sys/socket.h>
22#include <sys/types.h>
23#include <signal.h>
24
25#include <linux/genetlink.h>
26#include <linux/taskstats.h>
27
28/*
29 * Generic macros for dealing with netlink sockets. Might be duplicated
30 * elsewhere. It is recommended that commercial grade applications use
31 * libnl or libnetlink and use the interfaces provided by the library
32 */
33#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
34#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
35#define NLA_DATA(na) ((void *)((char*)(na) + NLA_HDRLEN))
36#define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
37
38#define err(code, fmt, arg...) do { printf(fmt, ##arg); exit(code); } while (0)
39int done = 0;
40int rcvbufsz=0;
41
42 char name[100];
43int dbg=0, print_delays=0;
44__u64 stime, utime;
45#define PRINTF(fmt, arg...) { \
46 if (dbg) { \
47 printf(fmt, ##arg); \
48 } \
49 }
50
51/* Maximum size of response requested or message sent */
52#define MAX_MSG_SIZE 256
53/* Maximum number of cpus expected to be specified in a cpumask */
54#define MAX_CPUS 32
55/* Maximum length of pathname to log file */
56#define MAX_FILENAME 256
57
58struct msgtemplate {
59 struct nlmsghdr n;
60 struct genlmsghdr g;
61 char buf[MAX_MSG_SIZE];
62};
63
64char cpumask[100+6*MAX_CPUS];
65
66/*
67 * Create a raw netlink socket and bind
68 */
69static int create_nl_socket(int protocol)
70{
71 int fd;
72 struct sockaddr_nl local;
73
74 fd = socket(AF_NETLINK, SOCK_RAW, protocol);
75 if (fd < 0)
76 return -1;
77
78 if (rcvbufsz)
79 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
80 &rcvbufsz, sizeof(rcvbufsz)) < 0) {
81 printf("Unable to set socket rcv buf size to %d\n",
82 rcvbufsz);
83 return -1;
84 }
85
86 memset(&local, 0, sizeof(local));
87 local.nl_family = AF_NETLINK;
88
89 if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
90 goto error;
91
92 return fd;
93error:
94 close(fd);
95 return -1;
96}
97
98
99int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
100 __u8 genl_cmd, __u16 nla_type,
101 void *nla_data, int nla_len)
102{
103 struct nlattr *na;
104 struct sockaddr_nl nladdr;
105 int r, buflen;
106 char *buf;
107
108 struct msgtemplate msg;
109
110 msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
111 msg.n.nlmsg_type = nlmsg_type;
112 msg.n.nlmsg_flags = NLM_F_REQUEST;
113 msg.n.nlmsg_seq = 0;
114 msg.n.nlmsg_pid = nlmsg_pid;
115 msg.g.cmd = genl_cmd;
116 msg.g.version = 0x1;
117 na = (struct nlattr *) GENLMSG_DATA(&msg);
118 na->nla_type = nla_type;
119 na->nla_len = nla_len + 1 + NLA_HDRLEN;
120 memcpy(NLA_DATA(na), nla_data, nla_len);
121 msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
122
123 buf = (char *) &msg;
124 buflen = msg.n.nlmsg_len ;
125 memset(&nladdr, 0, sizeof(nladdr));
126 nladdr.nl_family = AF_NETLINK;
127 while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
128 sizeof(nladdr))) < buflen) {
129 if (r > 0) {
130 buf += r;
131 buflen -= r;
132 } else if (errno != EAGAIN)
133 return -1;
134 }
135 return 0;
136}
137
138
139/*
140 * Probe the controller in genetlink to find the family id
141 * for the TASKSTATS family
142 */
143int get_family_id(int sd)
144{
145 struct {
146 struct nlmsghdr n;
147 struct genlmsghdr g;
148 char buf[256];
149 } ans;
150
151 int id, rc;
152 struct nlattr *na;
153 int rep_len;
154
155 strcpy(name, TASKSTATS_GENL_NAME);
156 rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
157 CTRL_ATTR_FAMILY_NAME, (void *)name,
158 strlen(TASKSTATS_GENL_NAME)+1);
159
160 rep_len = recv(sd, &ans, sizeof(ans), 0);
161 if (ans.n.nlmsg_type == NLMSG_ERROR ||
162 (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
163 return 0;
164
165 na = (struct nlattr *) GENLMSG_DATA(&ans);
166 na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
167 if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
168 id = *(__u16 *) NLA_DATA(na);
169 }
170 return id;
171}
172
173void print_delayacct(struct taskstats *t)
174{
175 printf("\n\nCPU %15s%15s%15s%15s\n"
176 " %15llu%15llu%15llu%15llu\n"
177 "IO %15s%15s\n"
178 " %15llu%15llu\n"
179 "MEM %15s%15s\n"
180 " %15llu%15llu\n\n",
181 "count", "real total", "virtual total", "delay total",
182 t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
183 t->cpu_delay_total,
184 "count", "delay total",
185 t->blkio_count, t->blkio_delay_total,
186 "count", "delay total", t->swapin_count, t->swapin_delay_total);
187}
188
189int main(int argc, char *argv[])
190{
191 int c, rc, rep_len, aggr_len, len2, cmd_type;
192 __u16 id;
193 __u32 mypid;
194
195 struct nlattr *na;
196 int nl_sd = -1;
197 int len = 0;
198 pid_t tid = 0;
199 pid_t rtid = 0;
200
201 int fd = 0;
202 int count = 0;
203 int write_file = 0;
204 int maskset = 0;
205 char logfile[128];
206 int loop = 0;
207
208 struct msgtemplate msg;
209
210 while (1) {
211 c = getopt(argc, argv, "dw:r:m:t:p:v:l");
212 if (c < 0)
213 break;
214
215 switch (c) {
216 case 'd':
217 printf("print delayacct stats ON\n");
218 print_delays = 1;
219 break;
220 case 'w':
221 strncpy(logfile, optarg, MAX_FILENAME);
222 printf("write to file %s\n", logfile);
223 write_file = 1;
224 break;
225 case 'r':
226 rcvbufsz = atoi(optarg);
227 printf("receive buf size %d\n", rcvbufsz);
228 if (rcvbufsz < 0)
229 err(1, "Invalid rcv buf size\n");
230 break;
231 case 'm':
232 strncpy(cpumask, optarg, sizeof(cpumask));
233 maskset = 1;
234 printf("cpumask %s maskset %d\n", cpumask, maskset);
235 break;
236 case 't':
237 tid = atoi(optarg);
238 if (!tid)
239 err(1, "Invalid tgid\n");
240 cmd_type = TASKSTATS_CMD_ATTR_TGID;
241 print_delays = 1;
242 break;
243 case 'p':
244 tid = atoi(optarg);
245 if (!tid)
246 err(1, "Invalid pid\n");
247 cmd_type = TASKSTATS_CMD_ATTR_PID;
248 print_delays = 1;
249 break;
250 case 'v':
251 printf("debug on\n");
252 dbg = 1;
253 break;
254 case 'l':
255 printf("listen forever\n");
256 loop = 1;
257 break;
258 default:
259 printf("Unknown option %d\n", c);
260 exit(-1);
261 }
262 }
263
264 if (write_file) {
265 fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC,
266 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
267 if (fd == -1) {
268 perror("Cannot open output file\n");
269 exit(1);
270 }
271 }
272
273 if ((nl_sd = create_nl_socket(NETLINK_GENERIC)) < 0)
274 err(1, "error creating Netlink socket\n");
275
276
277 mypid = getpid();
278 id = get_family_id(nl_sd);
279 if (!id) {
280 printf("Error getting family id, errno %d", errno);
281 goto err;
282 }
283 PRINTF("family id %d\n", id);
284
285 if (maskset) {
286 rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
287 TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
288 &cpumask, sizeof(cpumask));
289 PRINTF("Sent register cpumask, retval %d\n", rc);
290 if (rc < 0) {
291 printf("error sending register cpumask\n");
292 goto err;
293 }
294 }
295
296 if (tid) {
297 rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
298 cmd_type, &tid, sizeof(__u32));
299 PRINTF("Sent pid/tgid, retval %d\n", rc);
300 if (rc < 0) {
301 printf("error sending tid/tgid cmd\n");
302 goto done;
303 }
304 }
305
306 do {
307 int i;
308
309 rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
310 PRINTF("received %d bytes\n", rep_len);
311
312 if (rep_len < 0) {
313 printf("nonfatal reply error: errno %d\n", errno);
314 continue;
315 }
316 if (msg.n.nlmsg_type == NLMSG_ERROR ||
317 !NLMSG_OK((&msg.n), rep_len)) {
318 printf("fatal reply error, errno %d\n", errno);
319 goto done;
320 }
321
322 PRINTF("nlmsghdr size=%d, nlmsg_len=%d, rep_len=%d\n",
323 sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
324
325
326 rep_len = GENLMSG_PAYLOAD(&msg.n);
327
328 na = (struct nlattr *) GENLMSG_DATA(&msg);
329 len = 0;
330 i = 0;
331 while (len < rep_len) {
332 len += NLA_ALIGN(na->nla_len);
333 switch (na->nla_type) {
334 case TASKSTATS_TYPE_AGGR_TGID:
335 /* Fall through */
336 case TASKSTATS_TYPE_AGGR_PID:
337 aggr_len = NLA_PAYLOAD(na->nla_len);
338 len2 = 0;
339 /* For nested attributes, na follows */
340 na = (struct nlattr *) NLA_DATA(na);
341 done = 0;
342 while (len2 < aggr_len) {
343 switch (na->nla_type) {
344 case TASKSTATS_TYPE_PID:
345 rtid = *(int *) NLA_DATA(na);
346 if (print_delays)
347 printf("PID\t%d\n", rtid);
348 break;
349 case TASKSTATS_TYPE_TGID:
350 rtid = *(int *) NLA_DATA(na);
351 if (print_delays)
352 printf("TGID\t%d\n", rtid);
353 break;
354 case TASKSTATS_TYPE_STATS:
355 count++;
356 if (print_delays)
357 print_delayacct((struct taskstats *) NLA_DATA(na));
358 if (fd) {
359 if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
360 err(1,"write error\n");
361 }
362 }
363 if (!loop)
364 goto done;
365 break;
366 default:
367 printf("Unknown nested nla_type %d\n", na->nla_type);
368 break;
369 }
370 len2 += NLA_ALIGN(na->nla_len);
371 na = (struct nlattr *) ((char *) na + len2);
372 }
373 break;
374
375 default:
376 printf("Unknown nla_type %d\n", na->nla_type);
377 break;
378 }
379 na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
380 }
381 } while (loop);
382done:
383 if (maskset) {
384 rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
385 TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
386 &cpumask, sizeof(cpumask));
387 printf("Sent deregister mask, retval %d\n", rc);
388 if (rc < 0)
389 err(rc, "error sending deregister cpumask\n");
390 }
391err:
392 close(nl_sd);
393 if (fd)
394 close(fd);
395 return 0;
396}
diff --git a/Documentation/accounting/taskstats.txt b/Documentation/accounting/taskstats.txt
new file mode 100644
index 000000000000..92ebf29e9041
--- /dev/null
+++ b/Documentation/accounting/taskstats.txt
@@ -0,0 +1,181 @@
1Per-task statistics interface
2-----------------------------
3
4
5Taskstats is a netlink-based interface for sending per-task and
6per-process statistics from the kernel to userspace.
7
8Taskstats was designed for the following benefits:
9
10- efficiently provide statistics during lifetime of a task and on its exit
11- unified interface for multiple accounting subsystems
12- extensibility for use by future accounting patches
13
14Terminology
15-----------
16
17"pid", "tid" and "task" are used interchangeably and refer to the standard
18Linux task defined by struct task_struct. per-pid stats are the same as
19per-task stats.
20
21"tgid", "process" and "thread group" are used interchangeably and refer to the
22tasks that share an mm_struct i.e. the traditional Unix process. Despite the
23use of tgid, there is no special treatment for the task that is thread group
24leader - a process is deemed alive as long as it has any task belonging to it.
25
26Usage
27-----
28
29To get statistics during a task's lifetime, userspace opens a unicast netlink
30socket (NETLINK_GENERIC family) and sends commands specifying a pid or a tgid.
31The response contains statistics for a task (if pid is specified) or the sum of
32statistics for all tasks of the process (if tgid is specified).
33
34To obtain statistics for tasks which are exiting, the userspace listener
35sends a register command and specifies a cpumask. Whenever a task exits on
36one of the cpus in the cpumask, its per-pid statistics are sent to the
37registered listener. Using cpumasks allows the data received by one listener
38to be limited and assists in flow control over the netlink interface and is
39explained in more detail below.
40
41If the exiting task is the last thread exiting its thread group,
42an additional record containing the per-tgid stats is also sent to userspace.
43The latter contains the sum of per-pid stats for all threads in the thread
44group, both past and present.
45
46getdelays.c is a simple utility demonstrating usage of the taskstats interface
47for reporting delay accounting statistics. Users can register cpumasks,
48send commands and process responses, listen for per-tid/tgid exit data,
49write the data received to a file and do basic flow control by increasing
50receive buffer sizes.
51
52Interface
53---------
54
55The user-kernel interface is encapsulated in include/linux/taskstats.h
56
57To avoid this documentation becoming obsolete as the interface evolves, only
58an outline of the current version is given. taskstats.h always overrides the
59description here.
60
61struct taskstats is the common accounting structure for both per-pid and
62per-tgid data. It is versioned and can be extended by each accounting subsystem
63that is added to the kernel. The fields and their semantics are defined in the
64taskstats.h file.
65
66The data exchanged between user and kernel space is a netlink message belonging
67to the NETLINK_GENERIC family and using the netlink attributes interface.
68The messages are in the format
69
70 +----------+- - -+-------------+-------------------+
71 | nlmsghdr | Pad | genlmsghdr | taskstats payload |
72 +----------+- - -+-------------+-------------------+
73
74
75The taskstats payload is one of the following three kinds:
76
771. Commands: Sent from user to kernel. Commands to get data on
78a pid/tgid consist of one attribute, of type TASKSTATS_CMD_ATTR_PID/TGID,
79containing a u32 pid or tgid in the attribute payload. The pid/tgid denotes
80the task/process for which userspace wants statistics.
81
82Commands to register/deregister interest in exit data from a set of cpus
83consist of one attribute, of type
84TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK and contain a cpumask in the
85attribute payload. The cpumask is specified as an ascii string of
86comma-separated cpu ranges e.g. to listen to exit data from cpus 1,2,3,5,7,8
87the cpumask would be "1-3,5,7-8". If userspace forgets to deregister interest
88in cpus before closing the listening socket, the kernel cleans up its interest
89set over time. However, for the sake of efficiency, an explicit deregistration
90is advisable.
91
922. Response for a command: sent from the kernel in response to a userspace
93command. The payload is a series of three attributes of type:
94
95a) TASKSTATS_TYPE_AGGR_PID/TGID : attribute containing no payload but indicates
96a pid/tgid will be followed by some stats.
97
98b) TASKSTATS_TYPE_PID/TGID: attribute whose payload is the pid/tgid whose stats
99is being returned.
100
101c) TASKSTATS_TYPE_STATS: attribute with a struct taskstsats as payload. The
102same structure is used for both per-pid and per-tgid stats.
103
1043. New message sent by kernel whenever a task exits. The payload consists of a
105 series of attributes of the following type:
106
107a) TASKSTATS_TYPE_AGGR_PID: indicates next two attributes will be pid+stats
108b) TASKSTATS_TYPE_PID: contains exiting task's pid
109c) TASKSTATS_TYPE_STATS: contains the exiting task's per-pid stats
110d) TASKSTATS_TYPE_AGGR_TGID: indicates next two attributes will be tgid+stats
111e) TASKSTATS_TYPE_TGID: contains tgid of process to which task belongs
112f) TASKSTATS_TYPE_STATS: contains the per-tgid stats for exiting task's process
113
114
115per-tgid stats
116--------------
117
118Taskstats provides per-process stats, in addition to per-task stats, since
119resource management is often done at a process granularity and aggregating task
120stats in userspace alone is inefficient and potentially inaccurate (due to lack
121of atomicity).
122
123However, maintaining per-process, in addition to per-task stats, within the
124kernel has space and time overheads. To address this, the taskstats code
125accumalates each exiting task's statistics into a process-wide data structure.
126When the last task of a process exits, the process level data accumalated also
127gets sent to userspace (along with the per-task data).
128
129When a user queries to get per-tgid data, the sum of all other live threads in
130the group is added up and added to the accumalated total for previously exited
131threads of the same thread group.
132
133Extending taskstats
134-------------------
135
136There are two ways to extend the taskstats interface to export more
137per-task/process stats as patches to collect them get added to the kernel
138in future:
139
1401. Adding more fields to the end of the existing struct taskstats. Backward
141 compatibility is ensured by the version number within the
142 structure. Userspace will use only the fields of the struct that correspond
143 to the version its using.
144
1452. Defining separate statistic structs and using the netlink attributes
146 interface to return them. Since userspace processes each netlink attribute
147 independently, it can always ignore attributes whose type it does not
148 understand (because it is using an older version of the interface).
149
150
151Choosing between 1. and 2. is a matter of trading off flexibility and
152overhead. If only a few fields need to be added, then 1. is the preferable
153path since the kernel and userspace don't need to incur the overhead of
154processing new netlink attributes. But if the new fields expand the existing
155struct too much, requiring disparate userspace accounting utilities to
156unnecessarily receive large structures whose fields are of no interest, then
157extending the attributes structure would be worthwhile.
158
159Flow control for taskstats
160--------------------------
161
162When the rate of task exits becomes large, a listener may not be able to keep
163up with the kernel's rate of sending per-tid/tgid exit data leading to data
164loss. This possibility gets compounded when the taskstats structure gets
165extended and the number of cpus grows large.
166
167To avoid losing statistics, userspace should do one or more of the following:
168
169- increase the receive buffer sizes for the netlink sockets opened by
170listeners to receive exit data.
171
172- create more listeners and reduce the number of cpus being listened to by
173each listener. In the extreme case, there could be one listener for each cpu.
174Users may also consider setting the cpu affinity of the listener to the subset
175of cpus to which it listens, especially if they are listening to just one cpu.
176
177Despite these measures, if the userspace receives ENOBUFS error messages
178indicated overflow of receive buffers, it should take measures to handle the
179loss of data.
180
181----
diff --git a/Documentation/cciss.txt b/Documentation/cciss.txt
index 15378422fc46..9c629ffa0e58 100644
--- a/Documentation/cciss.txt
+++ b/Documentation/cciss.txt
@@ -20,6 +20,7 @@ This driver is known to work with the following cards:
20 * SA P400i 20 * SA P400i
21 * SA E200 21 * SA E200
22 * SA E200i 22 * SA E200i
23 * SA E500
23 24
24If nodes are not already created in the /dev/cciss directory, run as root: 25If nodes are not already created in the /dev/cciss directory, run as root:
25 26
diff --git a/Documentation/connector/ucon.c b/Documentation/connector/ucon.c
new file mode 100644
index 000000000000..d738cde2a8d5
--- /dev/null
+++ b/Documentation/connector/ucon.c
@@ -0,0 +1,206 @@
1/*
2 * ucon.c
3 *
4 * Copyright (c) 2004+ Evgeniy Polyakov <johnpol@2ka.mipt.ru>
5 *
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <asm/types.h>
23
24#include <sys/types.h>
25#include <sys/socket.h>
26#include <sys/poll.h>
27
28#include <linux/netlink.h>
29#include <linux/rtnetlink.h>
30
31#include <arpa/inet.h>
32
33#include <stdio.h>
34#include <stdlib.h>
35#include <unistd.h>
36#include <string.h>
37#include <errno.h>
38#include <time.h>
39
40#include <linux/connector.h>
41
42#define DEBUG
43#define NETLINK_CONNECTOR 11
44
45#ifdef DEBUG
46#define ulog(f, a...) fprintf(stdout, f, ##a)
47#else
48#define ulog(f, a...) do {} while (0)
49#endif
50
51static int need_exit;
52static __u32 seq;
53
54static int netlink_send(int s, struct cn_msg *msg)
55{
56 struct nlmsghdr *nlh;
57 unsigned int size;
58 int err;
59 char buf[128];
60 struct cn_msg *m;
61
62 size = NLMSG_SPACE(sizeof(struct cn_msg) + msg->len);
63
64 nlh = (struct nlmsghdr *)buf;
65 nlh->nlmsg_seq = seq++;
66 nlh->nlmsg_pid = getpid();
67 nlh->nlmsg_type = NLMSG_DONE;
68 nlh->nlmsg_len = NLMSG_LENGTH(size - sizeof(*nlh));
69 nlh->nlmsg_flags = 0;
70
71 m = NLMSG_DATA(nlh);
72#if 0
73 ulog("%s: [%08x.%08x] len=%u, seq=%u, ack=%u.\n",
74 __func__, msg->id.idx, msg->id.val, msg->len, msg->seq, msg->ack);
75#endif
76 memcpy(m, msg, sizeof(*m) + msg->len);
77
78 err = send(s, nlh, size, 0);
79 if (err == -1)
80 ulog("Failed to send: %s [%d].\n",
81 strerror(errno), errno);
82
83 return err;
84}
85
86int main(int argc, char *argv[])
87{
88 int s;
89 char buf[1024];
90 int len;
91 struct nlmsghdr *reply;
92 struct sockaddr_nl l_local;
93 struct cn_msg *data;
94 FILE *out;
95 time_t tm;
96 struct pollfd pfd;
97
98 if (argc < 2)
99 out = stdout;
100 else {
101 out = fopen(argv[1], "a+");
102 if (!out) {
103 ulog("Unable to open %s for writing: %s\n",
104 argv[1], strerror(errno));
105 out = stdout;
106 }
107 }
108
109 memset(buf, 0, sizeof(buf));
110
111 s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
112 if (s == -1) {
113 perror("socket");
114 return -1;
115 }
116
117 l_local.nl_family = AF_NETLINK;
118 l_local.nl_groups = 0x123; /* bitmask of requested groups */
119 l_local.nl_pid = 0;
120
121 if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) {
122 perror("bind");
123 close(s);
124 return -1;
125 }
126
127#if 0
128 {
129 int on = 0x57; /* Additional group number */
130 setsockopt(s, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &on, sizeof(on));
131 }
132#endif
133 if (0) {
134 int i, j;
135
136 memset(buf, 0, sizeof(buf));
137
138 data = (struct cn_msg *)buf;
139
140 data->id.idx = 0x123;
141 data->id.val = 0x456;
142 data->seq = seq++;
143 data->ack = 0;
144 data->len = 0;
145
146 for (j=0; j<10; ++j) {
147 for (i=0; i<1000; ++i) {
148 len = netlink_send(s, data);
149 }
150
151 ulog("%d messages have been sent to %08x.%08x.\n", i, data->id.idx, data->id.val);
152 }
153
154 return 0;
155 }
156
157
158 pfd.fd = s;
159
160 while (!need_exit) {
161 pfd.events = POLLIN;
162 pfd.revents = 0;
163 switch (poll(&pfd, 1, -1)) {
164 case 0:
165 need_exit = 1;
166 break;
167 case -1:
168 if (errno != EINTR) {
169 need_exit = 1;
170 break;
171 }
172 continue;
173 }
174 if (need_exit)
175 break;
176
177 memset(buf, 0, sizeof(buf));
178 len = recv(s, buf, sizeof(buf), 0);
179 if (len == -1) {
180 perror("recv buf");
181 close(s);
182 return -1;
183 }
184 reply = (struct nlmsghdr *)buf;
185
186 switch (reply->nlmsg_type) {
187 case NLMSG_ERROR:
188 fprintf(out, "Error message received.\n");
189 fflush(out);
190 break;
191 case NLMSG_DONE:
192 data = (struct cn_msg *)NLMSG_DATA(reply);
193
194 time(&tm);
195 fprintf(out, "%.24s : [%x.%x] [%08u.%08u].\n",
196 ctime(&tm), data->id.idx, data->id.val, data->seq, data->ack);
197 fflush(out);
198 break;
199 default:
200 break;
201 }
202 }
203
204 close(s);
205 return 0;
206}
diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt
index 7fedc00c3d30..555c8cf3650a 100644
--- a/Documentation/cpu-freq/user-guide.txt
+++ b/Documentation/cpu-freq/user-guide.txt
@@ -153,10 +153,13 @@ scaling_governor, and by "echoing" the name of another
153 that some governors won't load - they only 153 that some governors won't load - they only
154 work on some specific architectures or 154 work on some specific architectures or
155 processors. 155 processors.
156scaling_min_freq and 156scaling_min_freq and
157scaling_max_freq show the current "policy limits" (in 157scaling_max_freq show the current "policy limits" (in
158 kHz). By echoing new values into these 158 kHz). By echoing new values into these
159 files, you can change these limits. 159 files, you can change these limits.
160 NOTE: when setting a policy you need to
161 first set scaling_max_freq, then
162 scaling_min_freq.
160 163
161 164
162If you have selected the "userspace" governor which allows you to 165If you have selected the "userspace" governor which allows you to
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index 1bcf69996c9d..bc107cb157a8 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -251,16 +251,24 @@ A: This is what you would need in your kernel code to receive notifications.
251 return NOTIFY_OK; 251 return NOTIFY_OK;
252 } 252 }
253 253
254 static struct notifier_block foobar_cpu_notifer = 254 static struct notifier_block __cpuinitdata foobar_cpu_notifer =
255 { 255 {
256 .notifier_call = foobar_cpu_callback, 256 .notifier_call = foobar_cpu_callback,
257 }; 257 };
258 258
259You need to call register_cpu_notifier() from your init function.
260Init functions could be of two types:
2611. early init (init function called when only the boot processor is online).
2622. late init (init function called _after_ all the CPUs are online).
259 263
260In your init function, 264For the first case, you should add the following to your init function
261 265
262 register_cpu_notifier(&foobar_cpu_notifier); 266 register_cpu_notifier(&foobar_cpu_notifier);
263 267
268For the second case, you should add the following to your init function
269
270 register_hotcpu_notifier(&foobar_cpu_notifier);
271
264You can fail PREPARE notifiers if something doesn't work to prepare resources. 272You can fail PREPARE notifiers if something doesn't work to prepare resources.
265This will stop the activity and send a following CANCELED event back. 273This will stop the activity and send a following CANCELED event back.
266 274
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index 159e2a0c3e80..76b44290c154 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -217,6 +217,12 @@ exclusive cpuset. Also, the use of a Linux virtual file system (vfs)
217to represent the cpuset hierarchy provides for a familiar permission 217to represent the cpuset hierarchy provides for a familiar permission
218and name space for cpusets, with a minimum of additional kernel code. 218and name space for cpusets, with a minimum of additional kernel code.
219 219
220The cpus file in the root (top_cpuset) cpuset is read-only.
221It automatically tracks the value of cpu_online_map, using a CPU
222hotplug notifier. If and when memory nodes can be hotplugged,
223we expect to make the mems file in the root cpuset read-only
224as well, and have it track the value of node_online_map.
225
220 226
2211.4 What are exclusive cpusets ? 2271.4 What are exclusive cpusets ?
222-------------------------------- 228--------------------------------
diff --git a/Documentation/devices.txt b/Documentation/devices.txt
index 4aaf68fafebe..66c725f530f3 100644
--- a/Documentation/devices.txt
+++ b/Documentation/devices.txt
@@ -2565,10 +2565,10 @@ Your cooperation is appreciated.
2565 243 = /dev/usb/dabusb3 Fourth dabusb device 2565 243 = /dev/usb/dabusb3 Fourth dabusb device
2566 2566
2567180 block USB block devices 2567180 block USB block devices
2568 0 = /dev/uba First USB block device 2568 0 = /dev/uba First USB block device
2569 8 = /dev/ubb Second USB block device 2569 8 = /dev/ubb Second USB block device
2570 16 = /dev/ubc Thrid USB block device 2570 16 = /dev/ubc Third USB block device
2571 ... 2571 ...
2572 2572
2573181 char Conrad Electronic parallel port radio clocks 2573181 char Conrad Electronic parallel port radio clocks
2574 0 = /dev/pcfclock0 First Conrad radio clock 2574 0 = /dev/pcfclock0 First Conrad radio clock
diff --git a/Documentation/drivers/edac/edac.txt b/Documentation/drivers/edac/edac.txt
index 70d96a62e5e1..7b3d969d2964 100644
--- a/Documentation/drivers/edac/edac.txt
+++ b/Documentation/drivers/edac/edac.txt
@@ -35,15 +35,14 @@ the vendor should tie the parity status bits to 0 if they do not intend
35to generate parity. Some vendors do not do this, and thus the parity bit 35to generate parity. Some vendors do not do this, and thus the parity bit
36can "float" giving false positives. 36can "float" giving false positives.
37 37
38The PCI Parity EDAC device has the ability to "skip" known flaky 38[There are patches in the kernel queue which will allow for storage of
39cards during the parity scan. These are set by the parity "blacklist" 39quirks of PCI devices reporting false parity positives. The 2.6.18
40interface in the sysfs for PCI Parity. (See the PCI section in the sysfs 40kernel should have those patches included. When that becomes available,
41section below.) There is also a parity "whitelist" which is used as 41then EDAC will be patched to utilize that information to "skip" such
42an explicit list of devices to scan, while the blacklist is a list 42devices.]
43of devices to skip.
44 43
45EDAC will have future error detectors that will be added or integrated 44EDAC will have future error detectors that will be integrated with
46into EDAC in the following list: 45EDAC or added to it, in the following list:
47 46
48 MCE Machine Check Exception 47 MCE Machine Check Exception
49 MCA Machine Check Architecture 48 MCA Machine Check Architecture
@@ -93,22 +92,24 @@ EDAC lives in the /sys/devices/system/edac directory. Within this directory
93there currently reside 2 'edac' components: 92there currently reside 2 'edac' components:
94 93
95 mc memory controller(s) system 94 mc memory controller(s) system
96 pci PCI status system 95 pci PCI control and status system
97 96
98 97
99============================================================================ 98============================================================================
100Memory Controller (mc) Model 99Memory Controller (mc) Model
101 100
102First a background on the memory controller's model abstracted in EDAC. 101First a background on the memory controller's model abstracted in EDAC.
103Each mc device controls a set of DIMM memory modules. These modules are 102Each 'mc' device controls a set of DIMM memory modules. These modules are
104laid out in a Chip-Select Row (csrowX) and Channel table (chX). There can 103laid out in a Chip-Select Row (csrowX) and Channel table (chX). There can
105be multiple csrows and two channels. 104be multiple csrows and multiple channels.
106 105
107Memory controllers allow for several csrows, with 8 csrows being a typical value. 106Memory controllers allow for several csrows, with 8 csrows being a typical value.
108Yet, the actual number of csrows depends on the electrical "loading" 107Yet, the actual number of csrows depends on the electrical "loading"
109of a given motherboard, memory controller and DIMM characteristics. 108of a given motherboard, memory controller and DIMM characteristics.
110 109
111Dual channels allows for 128 bit data transfers to the CPU from memory. 110Dual channels allows for 128 bit data transfers to the CPU from memory.
111Some newer chipsets allow for more than 2 channels, like Fully Buffered DIMMs
112(FB-DIMMs). The following example will assume 2 channels:
112 113
113 114
114 Channel 0 Channel 1 115 Channel 0 Channel 1
@@ -234,23 +235,15 @@ Polling period control file:
234 The time period, in milliseconds, for polling for error information. 235 The time period, in milliseconds, for polling for error information.
235 Too small a value wastes resources. Too large a value might delay 236 Too small a value wastes resources. Too large a value might delay
236 necessary handling of errors and might loose valuable information for 237 necessary handling of errors and might loose valuable information for
237 locating the error. 1000 milliseconds (once each second) is about 238 locating the error. 1000 milliseconds (once each second) is the current
238 right for most uses. 239 default. Systems which require all the bandwidth they can get, may
240 increase this.
239 241
240 LOAD TIME: module/kernel parameter: poll_msec=[0|1] 242 LOAD TIME: module/kernel parameter: poll_msec=[0|1]
241 243
242 RUN TIME: echo "1000" >/sys/devices/system/edac/mc/poll_msec 244 RUN TIME: echo "1000" >/sys/devices/system/edac/mc/poll_msec
243 245
244 246
245Module Version read-only attribute file:
246
247 'mc_version'
248
249 The EDAC CORE module's version and compile date are shown here to
250 indicate what EDAC is running.
251
252
253
254============================================================================ 247============================================================================
255'mcX' DIRECTORIES 248'mcX' DIRECTORIES
256 249
@@ -284,35 +277,6 @@ Seconds since last counter reset control file:
284 277
285 278
286 279
287DIMM capability attribute file:
288
289 'edac_capability'
290
291 The EDAC (Error Detection and Correction) capabilities/modes of
292 the memory controller hardware.
293
294
295DIMM Current Capability attribute file:
296
297 'edac_current_capability'
298
299 The EDAC capabilities available with the hardware
300 configuration. This may not be the same as "EDAC capability"
301 if the correct memory is not used. If a memory controller is
302 capable of EDAC, but DIMMs without check bits are in use, then
303 Parity, SECDED, S4ECD4ED capabilities will not be available
304 even though the memory controller might be capable of those
305 modes with the proper memory loaded.
306
307
308Memory Type supported on this controller attribute file:
309
310 'supported_mem_type'
311
312 This attribute file displays the memory type, usually
313 buffered and unbuffered DIMMs.
314
315
316Memory Controller name attribute file: 280Memory Controller name attribute file:
317 281
318 'mc_name' 282 'mc_name'
@@ -321,16 +285,6 @@ Memory Controller name attribute file:
321 that is being utilized. 285 that is being utilized.
322 286
323 287
324Memory Controller Module name attribute file:
325
326 'module_name'
327
328 This attribute file displays the memory controller module name,
329 version and date built. The name of the memory controller
330 hardware - some drivers work with multiple controllers and
331 this field shows which hardware is present.
332
333
334Total memory managed by this memory controller attribute file: 288Total memory managed by this memory controller attribute file:
335 289
336 'size_mb' 290 'size_mb'
@@ -432,6 +386,9 @@ Memory Type attribute file:
432 386
433 This attribute file will display what type of memory is currently 387 This attribute file will display what type of memory is currently
434 on this csrow. Normally, either buffered or unbuffered memory. 388 on this csrow. Normally, either buffered or unbuffered memory.
389 Examples:
390 Registered-DDR
391 Unbuffered-DDR
435 392
436 393
437EDAC Mode of operation attribute file: 394EDAC Mode of operation attribute file:
@@ -446,8 +403,13 @@ Device type attribute file:
446 403
447 'dev_type' 404 'dev_type'
448 405
449 This attribute file will display what type of DIMM device is 406 This attribute file will display what type of DRAM device is
450 being utilized. Example: x4 407 being utilized on this DIMM.
408 Examples:
409 x1
410 x2
411 x4
412 x8
451 413
452 414
453Channel 0 CE Count attribute file: 415Channel 0 CE Count attribute file:
@@ -522,10 +484,10 @@ SYSTEM LOGGING
522If logging for UEs and CEs are enabled then system logs will have 484If logging for UEs and CEs are enabled then system logs will have
523error notices indicating errors that have been detected: 485error notices indicating errors that have been detected:
524 486
525MC0: CE page 0x283, offset 0xce0, grain 8, syndrome 0x6ec3, row 0, 487EDAC MC0: CE page 0x283, offset 0xce0, grain 8, syndrome 0x6ec3, row 0,
526channel 1 "DIMM_B1": amd76x_edac 488channel 1 "DIMM_B1": amd76x_edac
527 489
528MC0: CE page 0x1e5, offset 0xfb0, grain 8, syndrome 0xb741, row 0, 490EDAC MC0: CE page 0x1e5, offset 0xfb0, grain 8, syndrome 0xb741, row 0,
529channel 1 "DIMM_B1": amd76x_edac 491channel 1 "DIMM_B1": amd76x_edac
530 492
531 493
@@ -610,64 +572,4 @@ Parity Count:
610 572
611 573
612 574
613PCI Device Whitelist:
614
615 'pci_parity_whitelist'
616
617 This control file allows for an explicit list of PCI devices to be
618 scanned for parity errors. Only devices found on this list will
619 be examined. The list is a line of hexadecimal VENDOR and DEVICE
620 ID tuples:
621
622 1022:7450,1434:16a6
623
624 One or more can be inserted, separated by a comma.
625
626 To write the above list doing the following as one command line:
627
628 echo "1022:7450,1434:16a6"
629 > /sys/devices/system/edac/pci/pci_parity_whitelist
630
631
632
633 To display what the whitelist is, simply 'cat' the same file.
634
635
636PCI Device Blacklist:
637
638 'pci_parity_blacklist'
639
640 This control file allows for a list of PCI devices to be
641 skipped for scanning.
642 The list is a line of hexadecimal VENDOR and DEVICE ID tuples:
643
644 1022:7450,1434:16a6
645
646 One or more can be inserted, separated by a comma.
647
648 To write the above list doing the following as one command line:
649
650 echo "1022:7450,1434:16a6"
651 > /sys/devices/system/edac/pci/pci_parity_blacklist
652
653
654 To display what the whitelist currently contains,
655 simply 'cat' the same file.
656
657======================================================================= 575=======================================================================
658
659PCI Vendor and Devices IDs can be obtained with the lspci command. Using
660the -n option lspci will display the vendor and device IDs. The system
661administrator will have to determine which devices should be scanned or
662skipped.
663
664
665
666The two lists (white and black) are prioritized. blacklist is the lower
667priority and will NOT be utilized when a whitelist has been set.
668Turn OFF a whitelist by an empty echo command:
669
670 echo > /sys/devices/system/edac/pci/pci_parity_whitelist
671
672and any previous blacklist will be utilized.
673
diff --git a/Documentation/fb/imacfb.txt b/Documentation/fb/imacfb.txt
new file mode 100644
index 000000000000..759028545a7e
--- /dev/null
+++ b/Documentation/fb/imacfb.txt
@@ -0,0 +1,31 @@
1
2What is imacfb?
3===============
4
5This is a generic EFI platform driver for Intel based Apple computers.
6Imacfb is only for EFI booted Intel Macs.
7
8Supported Hardware
9==================
10
11iMac 17"/20"
12Macbook
13Macbook Pro 15"/17"
14MacMini
15
16How to use it?
17==============
18
19Imacfb does not have any kind of autodetection of your machine.
20You have to add the fillowing kernel parameters in your elilo.conf:
21 Macbook :
22 video=imacfb:macbook
23 MacMini :
24 video=imacfb:mini
25 Macbook Pro 15", iMac 17" :
26 video=imacfb:i17
27 Macbook Pro 17", iMac 20" :
28 video=imacfb:i20
29
30--
31Edgar Hucek <gimli@dark-green.com>
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 99f219a01e0e..552507fe9a7e 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -55,14 +55,6 @@ Who: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
55 55
56--------------------------- 56---------------------------
57 57
58What: remove EXPORT_SYMBOL(insert_resource)
59When: April 2006
60Files: kernel/resource.c
61Why: No modular usage in the kernel.
62Who: Adrian Bunk <bunk@stusta.de>
63
64---------------------------
65
66What: PCMCIA control ioctl (needed for pcmcia-cs [cardmgr, cardctl]) 58What: PCMCIA control ioctl (needed for pcmcia-cs [cardmgr, cardctl])
67When: November 2005 59When: November 2005
68Files: drivers/pcmcia/: pcmcia_ioctl.c 60Files: drivers/pcmcia/: pcmcia_ioctl.c
@@ -128,6 +120,13 @@ Who: Adrian Bunk <bunk@stusta.de>
128 120
129--------------------------- 121---------------------------
130 122
123What: drivers depending on OSS_OBSOLETE_DRIVER
124When: options in 2.6.20, code in 2.6.22
125Why: OSS drivers with ALSA replacements
126Who: Adrian Bunk <bunk@stusta.de>
127
128---------------------------
129
131What: pci_module_init(driver) 130What: pci_module_init(driver)
132When: January 2007 131When: January 2007
133Why: Is replaced by pci_register_driver(pci_driver). 132Why: Is replaced by pci_register_driver(pci_driver).
@@ -166,17 +165,6 @@ Who: Arjan van de Ven <arjan@linux.intel.com>
166 165
167--------------------------- 166---------------------------
168 167
169What: remove EXPORT_SYMBOL(tasklist_lock)
170When: August 2006
171Files: kernel/fork.c
172Why: tasklist_lock protects the kernel internal task list. Modules have
173 no business looking at it, and all instances in drivers have been due
174 to use of too-lowlevel APIs. Having this symbol exported prevents
175 moving to more scalable locking schemes for the task list.
176Who: Christoph Hellwig <hch@lst.de>
177
178---------------------------
179
180What: mount/umount uevents 168What: mount/umount uevents
181When: February 2007 169When: February 2007
182Why: These events are not correct, and do not properly let userspace know 170Why: These events are not correct, and do not properly let userspace know
@@ -266,3 +254,43 @@ Why: The interrupt related SA_* flags are replaced by IRQF_* to move them
266Who: Thomas Gleixner <tglx@linutronix.de> 254Who: Thomas Gleixner <tglx@linutronix.de>
267 255
268--------------------------- 256---------------------------
257
258What: i2c-ite and i2c-algo-ite drivers
259When: September 2006
260Why: These drivers never compiled since they were added to the kernel
261 tree 5 years ago. This feature removal can be reevaluated if
262 someone shows interest in the drivers, fixes them and takes over
263 maintenance.
264 http://marc.theaimsgroup.com/?l=linux-mips&m=115040510817448
265Who: Jean Delvare <khali@linux-fr.org>
266
267---------------------------
268
269What: Bridge netfilter deferred IPv4/IPv6 output hook calling
270When: January 2007
271Why: The deferred output hooks are a layering violation causing unusual
272 and broken behaviour on bridge devices. Examples of things they
273 break include QoS classifation using the MARK or CLASSIFY targets,
274 the IPsec policy match and connection tracking with VLANs on a
275 bridge. Their only use is to enable bridge output port filtering
276 within iptables with the physdev match, which can also be done by
277 combining iptables and ebtables using netfilter marks. Until it
278 will get removed the hook deferral is disabled by default and is
279 only enabled when needed.
280
281Who: Patrick McHardy <kaber@trash.net>
282
283---------------------------
284
285What: frame diverter
286When: November 2006
287Why: The frame diverter is included in most distribution kernels, but is
288 broken. It does not correctly handle many things:
289 - IPV6
290 - non-linear skb's
291 - network device RCU on removal
292 - input frames not correctly checked for protocol errors
293 It also adds allocation overhead even if not enabled.
294 It is not clear if anyone is still using it.
295Who: Stephen Hemminger <shemminger@osdl.org>
296
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 66fdc0744fe0..16dec61d7671 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -62,8 +62,8 @@ ramfs-rootfs-initramfs.txt
62 - info on the 'in memory' filesystems ramfs, rootfs and initramfs. 62 - info on the 'in memory' filesystems ramfs, rootfs and initramfs.
63reiser4.txt 63reiser4.txt
64 - info on the Reiser4 filesystem based on dancing tree algorithms. 64 - info on the Reiser4 filesystem based on dancing tree algorithms.
65relayfs.txt 65relay.txt
66 - info on relayfs, for efficient streaming from kernel to user space. 66 - info on relay, for efficient streaming from kernel to user space.
67romfs.txt 67romfs.txt
68 - description of the ROMFS filesystem. 68 - description of the ROMFS filesystem.
69smbfs.txt 69smbfs.txt
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index d31efbbdfe50..247d7f619aa2 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -142,8 +142,8 @@ see also dquot_operations section.
142 142
143--------------------------- file_system_type --------------------------- 143--------------------------- file_system_type ---------------------------
144prototypes: 144prototypes:
145 struct int (*get_sb) (struct file_system_type *, int, 145 int (*get_sb) (struct file_system_type *, int,
146 const char *, void *, struct vfsmount *); 146 const char *, void *, struct vfsmount *);
147 void (*kill_sb) (struct super_block *); 147 void (*kill_sb) (struct super_block *);
148locking rules: 148locking rules:
149 may block BKL 149 may block BKL
diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt
new file mode 100644
index 000000000000..d6788dae0349
--- /dev/null
+++ b/Documentation/filesystems/relay.txt
@@ -0,0 +1,479 @@
1relay interface (formerly relayfs)
2==================================
3
4The relay interface provides a means for kernel applications to
5efficiently log and transfer large quantities of data from the kernel
6to userspace via user-defined 'relay channels'.
7
8A 'relay channel' is a kernel->user data relay mechanism implemented
9as a set of per-cpu kernel buffers ('channel buffers'), each
10represented as a regular file ('relay file') in user space. Kernel
11clients write into the channel buffers using efficient write
12functions; these automatically log into the current cpu's channel
13buffer. User space applications mmap() or read() from the relay files
14and retrieve the data as it becomes available. The relay files
15themselves are files created in a host filesystem, e.g. debugfs, and
16are associated with the channel buffers using the API described below.
17
18The format of the data logged into the channel buffers is completely
19up to the kernel client; the relay interface does however provide
20hooks which allow kernel clients to impose some structure on the
21buffer data. The relay interface doesn't implement any form of data
22filtering - this also is left to the kernel client. The purpose is to
23keep things as simple as possible.
24
25This document provides an overview of the relay interface API. The
26details of the function parameters are documented along with the
27functions in the relay interface code - please see that for details.
28
29Semantics
30=========
31
32Each relay channel has one buffer per CPU, each buffer has one or more
33sub-buffers. Messages are written to the first sub-buffer until it is
34too full to contain a new message, in which case it it is written to
35the next (if available). Messages are never split across sub-buffers.
36At this point, userspace can be notified so it empties the first
37sub-buffer, while the kernel continues writing to the next.
38
39When notified that a sub-buffer is full, the kernel knows how many
40bytes of it are padding i.e. unused space occurring because a complete
41message couldn't fit into a sub-buffer. Userspace can use this
42knowledge to copy only valid data.
43
44After copying it, userspace can notify the kernel that a sub-buffer
45has been consumed.
46
47A relay channel can operate in a mode where it will overwrite data not
48yet collected by userspace, and not wait for it to be consumed.
49
50The relay channel itself does not provide for communication of such
51data between userspace and kernel, allowing the kernel side to remain
52simple and not impose a single interface on userspace. It does
53provide a set of examples and a separate helper though, described
54below.
55
56The read() interface both removes padding and internally consumes the
57read sub-buffers; thus in cases where read(2) is being used to drain
58the channel buffers, special-purpose communication between kernel and
59user isn't necessary for basic operation.
60
61One of the major goals of the relay interface is to provide a low
62overhead mechanism for conveying kernel data to userspace. While the
63read() interface is easy to use, it's not as efficient as the mmap()
64approach; the example code attempts to make the tradeoff between the
65two approaches as small as possible.
66
67klog and relay-apps example code
68================================
69
70The relay interface itself is ready to use, but to make things easier,
71a couple simple utility functions and a set of examples are provided.
72
73The relay-apps example tarball, available on the relay sourceforge
74site, contains a set of self-contained examples, each consisting of a
75pair of .c files containing boilerplate code for each of the user and
76kernel sides of a relay application. When combined these two sets of
77boilerplate code provide glue to easily stream data to disk, without
78having to bother with mundane housekeeping chores.
79
80The 'klog debugging functions' patch (klog.patch in the relay-apps
81tarball) provides a couple of high-level logging functions to the
82kernel which allow writing formatted text or raw data to a channel,
83regardless of whether a channel to write into exists or not, or even
84whether the relay interface is compiled into the kernel or not. These
85functions allow you to put unconditional 'trace' statements anywhere
86in the kernel or kernel modules; only when there is a 'klog handler'
87registered will data actually be logged (see the klog and kleak
88examples for details).
89
90It is of course possible to use the relay interface from scratch,
91i.e. without using any of the relay-apps example code or klog, but
92you'll have to implement communication between userspace and kernel,
93allowing both to convey the state of buffers (full, empty, amount of
94padding). The read() interface both removes padding and internally
95consumes the read sub-buffers; thus in cases where read(2) is being
96used to drain the channel buffers, special-purpose communication
97between kernel and user isn't necessary for basic operation. Things
98such as buffer-full conditions would still need to be communicated via
99some channel though.
100
101klog and the relay-apps examples can be found in the relay-apps
102tarball on http://relayfs.sourceforge.net
103
104The relay interface user space API
105==================================
106
107The relay interface implements basic file operations for user space
108access to relay channel buffer data. Here are the file operations
109that are available and some comments regarding their behavior:
110
111open() enables user to open an _existing_ channel buffer.
112
113mmap() results in channel buffer being mapped into the caller's
114 memory space. Note that you can't do a partial mmap - you
115 must map the entire file, which is NRBUF * SUBBUFSIZE.
116
117read() read the contents of a channel buffer. The bytes read are
118 'consumed' by the reader, i.e. they won't be available
119 again to subsequent reads. If the channel is being used
120 in no-overwrite mode (the default), it can be read at any
121 time even if there's an active kernel writer. If the
122 channel is being used in overwrite mode and there are
123 active channel writers, results may be unpredictable -
124 users should make sure that all logging to the channel has
125 ended before using read() with overwrite mode. Sub-buffer
126 padding is automatically removed and will not be seen by
127 the reader.
128
129sendfile() transfer data from a channel buffer to an output file
130 descriptor. Sub-buffer padding is automatically removed
131 and will not be seen by the reader.
132
133poll() POLLIN/POLLRDNORM/POLLERR supported. User applications are
134 notified when sub-buffer boundaries are crossed.
135
136close() decrements the channel buffer's refcount. When the refcount
137 reaches 0, i.e. when no process or kernel client has the
138 buffer open, the channel buffer is freed.
139
140In order for a user application to make use of relay files, the
141host filesystem must be mounted. For example,
142
143 mount -t debugfs debugfs /debug
144
145NOTE: the host filesystem doesn't need to be mounted for kernel
146 clients to create or use channels - it only needs to be
147 mounted when user space applications need access to the buffer
148 data.
149
150
151The relay interface kernel API
152==============================
153
154Here's a summary of the API the relay interface provides to in-kernel clients:
155
156TBD(curr. line MT:/API/)
157 channel management functions:
158
159 relay_open(base_filename, parent, subbuf_size, n_subbufs,
160 callbacks)
161 relay_close(chan)
162 relay_flush(chan)
163 relay_reset(chan)
164
165 channel management typically called on instigation of userspace:
166
167 relay_subbufs_consumed(chan, cpu, subbufs_consumed)
168
169 write functions:
170
171 relay_write(chan, data, length)
172 __relay_write(chan, data, length)
173 relay_reserve(chan, length)
174
175 callbacks:
176
177 subbuf_start(buf, subbuf, prev_subbuf, prev_padding)
178 buf_mapped(buf, filp)
179 buf_unmapped(buf, filp)
180 create_buf_file(filename, parent, mode, buf, is_global)
181 remove_buf_file(dentry)
182
183 helper functions:
184
185 relay_buf_full(buf)
186 subbuf_start_reserve(buf, length)
187
188
189Creating a channel
190------------------
191
192relay_open() is used to create a channel, along with its per-cpu
193channel buffers. Each channel buffer will have an associated file
194created for it in the host filesystem, which can be and mmapped or
195read from in user space. The files are named basename0...basenameN-1
196where N is the number of online cpus, and by default will be created
197in the root of the filesystem (if the parent param is NULL). If you
198want a directory structure to contain your relay files, you should
199create it using the host filesystem's directory creation function,
200e.g. debugfs_create_dir(), and pass the parent directory to
201relay_open(). Users are responsible for cleaning up any directory
202structure they create, when the channel is closed - again the host
203filesystem's directory removal functions should be used for that,
204e.g. debugfs_remove().
205
206In order for a channel to be created and the host filesystem's files
207associated with its channel buffers, the user must provide definitions
208for two callback functions, create_buf_file() and remove_buf_file().
209create_buf_file() is called once for each per-cpu buffer from
210relay_open() and allows the user to create the file which will be used
211to represent the corresponding channel buffer. The callback should
212return the dentry of the file created to represent the channel buffer.
213remove_buf_file() must also be defined; it's responsible for deleting
214the file(s) created in create_buf_file() and is called during
215relay_close().
216
217Here are some typical definitions for these callbacks, in this case
218using debugfs:
219
220/*
221 * create_buf_file() callback. Creates relay file in debugfs.
222 */
223static struct dentry *create_buf_file_handler(const char *filename,
224 struct dentry *parent,
225 int mode,
226 struct rchan_buf *buf,
227 int *is_global)
228{
229 return debugfs_create_file(filename, mode, parent, buf,
230 &relay_file_operations);
231}
232
233/*
234 * remove_buf_file() callback. Removes relay file from debugfs.
235 */
236static int remove_buf_file_handler(struct dentry *dentry)
237{
238 debugfs_remove(dentry);
239
240 return 0;
241}
242
243/*
244 * relay interface callbacks
245 */
246static struct rchan_callbacks relay_callbacks =
247{
248 .create_buf_file = create_buf_file_handler,
249 .remove_buf_file = remove_buf_file_handler,
250};
251
252And an example relay_open() invocation using them:
253
254 chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks);
255
256If the create_buf_file() callback fails, or isn't defined, channel
257creation and thus relay_open() will fail.
258
259The total size of each per-cpu buffer is calculated by multiplying the
260number of sub-buffers by the sub-buffer size passed into relay_open().
261The idea behind sub-buffers is that they're basically an extension of
262double-buffering to N buffers, and they also allow applications to
263easily implement random-access-on-buffer-boundary schemes, which can
264be important for some high-volume applications. The number and size
265of sub-buffers is completely dependent on the application and even for
266the same application, different conditions will warrant different
267values for these parameters at different times. Typically, the right
268values to use are best decided after some experimentation; in general,
269though, it's safe to assume that having only 1 sub-buffer is a bad
270idea - you're guaranteed to either overwrite data or lose events
271depending on the channel mode being used.
272
273The create_buf_file() implementation can also be defined in such a way
274as to allow the creation of a single 'global' buffer instead of the
275default per-cpu set. This can be useful for applications interested
276mainly in seeing the relative ordering of system-wide events without
277the need to bother with saving explicit timestamps for the purpose of
278merging/sorting per-cpu files in a postprocessing step.
279
280To have relay_open() create a global buffer, the create_buf_file()
281implementation should set the value of the is_global outparam to a
282non-zero value in addition to creating the file that will be used to
283represent the single buffer. In the case of a global buffer,
284create_buf_file() and remove_buf_file() will be called only once. The
285normal channel-writing functions, e.g. relay_write(), can still be
286used - writes from any cpu will transparently end up in the global
287buffer - but since it is a global buffer, callers should make sure
288they use the proper locking for such a buffer, either by wrapping
289writes in a spinlock, or by copying a write function from relay.h and
290creating a local version that internally does the proper locking.
291
292Channel 'modes'
293---------------
294
295relay channels can be used in either of two modes - 'overwrite' or
296'no-overwrite'. The mode is entirely determined by the implementation
297of the subbuf_start() callback, as described below. The default if no
298subbuf_start() callback is defined is 'no-overwrite' mode. If the
299default mode suits your needs, and you plan to use the read()
300interface to retrieve channel data, you can ignore the details of this
301section, as it pertains mainly to mmap() implementations.
302
303In 'overwrite' mode, also known as 'flight recorder' mode, writes
304continuously cycle around the buffer and will never fail, but will
305unconditionally overwrite old data regardless of whether it's actually
306been consumed. In no-overwrite mode, writes will fail, i.e. data will
307be lost, if the number of unconsumed sub-buffers equals the total
308number of sub-buffers in the channel. It should be clear that if
309there is no consumer or if the consumer can't consume sub-buffers fast
310enough, data will be lost in either case; the only difference is
311whether data is lost from the beginning or the end of a buffer.
312
313As explained above, a relay channel is made of up one or more
314per-cpu channel buffers, each implemented as a circular buffer
315subdivided into one or more sub-buffers. Messages are written into
316the current sub-buffer of the channel's current per-cpu buffer via the
317write functions described below. Whenever a message can't fit into
318the current sub-buffer, because there's no room left for it, the
319client is notified via the subbuf_start() callback that a switch to a
320new sub-buffer is about to occur. The client uses this callback to 1)
321initialize the next sub-buffer if appropriate 2) finalize the previous
322sub-buffer if appropriate and 3) return a boolean value indicating
323whether or not to actually move on to the next sub-buffer.
324
325To implement 'no-overwrite' mode, the userspace client would provide
326an implementation of the subbuf_start() callback something like the
327following:
328
329static int subbuf_start(struct rchan_buf *buf,
330 void *subbuf,
331 void *prev_subbuf,
332 unsigned int prev_padding)
333{
334 if (prev_subbuf)
335 *((unsigned *)prev_subbuf) = prev_padding;
336
337 if (relay_buf_full(buf))
338 return 0;
339
340 subbuf_start_reserve(buf, sizeof(unsigned int));
341
342 return 1;
343}
344
345If the current buffer is full, i.e. all sub-buffers remain unconsumed,
346the callback returns 0 to indicate that the buffer switch should not
347occur yet, i.e. until the consumer has had a chance to read the
348current set of ready sub-buffers. For the relay_buf_full() function
349to make sense, the consumer is reponsible for notifying the relay
350interface when sub-buffers have been consumed via
351relay_subbufs_consumed(). Any subsequent attempts to write into the
352buffer will again invoke the subbuf_start() callback with the same
353parameters; only when the consumer has consumed one or more of the
354ready sub-buffers will relay_buf_full() return 0, in which case the
355buffer switch can continue.
356
357The implementation of the subbuf_start() callback for 'overwrite' mode
358would be very similar:
359
360static int subbuf_start(struct rchan_buf *buf,
361 void *subbuf,
362 void *prev_subbuf,
363 unsigned int prev_padding)
364{
365 if (prev_subbuf)
366 *((unsigned *)prev_subbuf) = prev_padding;
367
368 subbuf_start_reserve(buf, sizeof(unsigned int));
369
370 return 1;
371}
372
373In this case, the relay_buf_full() check is meaningless and the
374callback always returns 1, causing the buffer switch to occur
375unconditionally. It's also meaningless for the client to use the
376relay_subbufs_consumed() function in this mode, as it's never
377consulted.
378
379The default subbuf_start() implementation, used if the client doesn't
380define any callbacks, or doesn't define the subbuf_start() callback,
381implements the simplest possible 'no-overwrite' mode, i.e. it does
382nothing but return 0.
383
384Header information can be reserved at the beginning of each sub-buffer
385by calling the subbuf_start_reserve() helper function from within the
386subbuf_start() callback. This reserved area can be used to store
387whatever information the client wants. In the example above, room is
388reserved in each sub-buffer to store the padding count for that
389sub-buffer. This is filled in for the previous sub-buffer in the
390subbuf_start() implementation; the padding value for the previous
391sub-buffer is passed into the subbuf_start() callback along with a
392pointer to the previous sub-buffer, since the padding value isn't
393known until a sub-buffer is filled. The subbuf_start() callback is
394also called for the first sub-buffer when the channel is opened, to
395give the client a chance to reserve space in it. In this case the
396previous sub-buffer pointer passed into the callback will be NULL, so
397the client should check the value of the prev_subbuf pointer before
398writing into the previous sub-buffer.
399
400Writing to a channel
401--------------------
402
403Kernel clients write data into the current cpu's channel buffer using
404relay_write() or __relay_write(). relay_write() is the main logging
405function - it uses local_irqsave() to protect the buffer and should be
406used if you might be logging from interrupt context. If you know
407you'll never be logging from interrupt context, you can use
408__relay_write(), which only disables preemption. These functions
409don't return a value, so you can't determine whether or not they
410failed - the assumption is that you wouldn't want to check a return
411value in the fast logging path anyway, and that they'll always succeed
412unless the buffer is full and no-overwrite mode is being used, in
413which case you can detect a failed write in the subbuf_start()
414callback by calling the relay_buf_full() helper function.
415
416relay_reserve() is used to reserve a slot in a channel buffer which
417can be written to later. This would typically be used in applications
418that need to write directly into a channel buffer without having to
419stage data in a temporary buffer beforehand. Because the actual write
420may not happen immediately after the slot is reserved, applications
421using relay_reserve() can keep a count of the number of bytes actually
422written, either in space reserved in the sub-buffers themselves or as
423a separate array. See the 'reserve' example in the relay-apps tarball
424at http://relayfs.sourceforge.net for an example of how this can be
425done. Because the write is under control of the client and is
426separated from the reserve, relay_reserve() doesn't protect the buffer
427at all - it's up to the client to provide the appropriate
428synchronization when using relay_reserve().
429
430Closing a channel
431-----------------
432
433The client calls relay_close() when it's finished using the channel.
434The channel and its associated buffers are destroyed when there are no
435longer any references to any of the channel buffers. relay_flush()
436forces a sub-buffer switch on all the channel buffers, and can be used
437to finalize and process the last sub-buffers before the channel is
438closed.
439
440Misc
441----
442
443Some applications may want to keep a channel around and re-use it
444rather than open and close a new channel for each use. relay_reset()
445can be used for this purpose - it resets a channel to its initial
446state without reallocating channel buffer memory or destroying
447existing mappings. It should however only be called when it's safe to
448do so, i.e. when the channel isn't currently being written to.
449
450Finally, there are a couple of utility callbacks that can be used for
451different purposes. buf_mapped() is called whenever a channel buffer
452is mmapped from user space and buf_unmapped() is called when it's
453unmapped. The client can use this notification to trigger actions
454within the kernel application, such as enabling/disabling logging to
455the channel.
456
457
458Resources
459=========
460
461For news, example code, mailing list, etc. see the relay interface homepage:
462
463 http://relayfs.sourceforge.net
464
465
466Credits
467=======
468
469The ideas and specs for the relay interface came about as a result of
470discussions on tracing involving the following:
471
472Michel Dagenais <michel.dagenais@polymtl.ca>
473Richard Moore <richardj_moore@uk.ibm.com>
474Bob Wisniewski <bob@watson.ibm.com>
475Karim Yaghmour <karim@opersys.com>
476Tom Zanussi <zanussi@us.ibm.com>
477
478Also thanks to Hubertus Franke for a lot of useful suggestions and bug
479reports.
diff --git a/Documentation/filesystems/relayfs.txt b/Documentation/filesystems/relayfs.txt
deleted file mode 100644
index 5832377b7340..000000000000
--- a/Documentation/filesystems/relayfs.txt
+++ /dev/null
@@ -1,442 +0,0 @@
1
2relayfs - a high-speed data relay filesystem
3============================================
4
5relayfs is a filesystem designed to provide an efficient mechanism for
6tools and facilities to relay large and potentially sustained streams
7of data from kernel space to user space.
8
9The main abstraction of relayfs is the 'channel'. A channel consists
10of a set of per-cpu kernel buffers each represented by a file in the
11relayfs filesystem. Kernel clients write into a channel using
12efficient write functions which automatically log to the current cpu's
13channel buffer. User space applications mmap() the per-cpu files and
14retrieve the data as it becomes available.
15
16The format of the data logged into the channel buffers is completely
17up to the relayfs client; relayfs does however provide hooks which
18allow clients to impose some structure on the buffer data. Nor does
19relayfs implement any form of data filtering - this also is left to
20the client. The purpose is to keep relayfs as simple as possible.
21
22This document provides an overview of the relayfs API. The details of
23the function parameters are documented along with the functions in the
24filesystem code - please see that for details.
25
26Semantics
27=========
28
29Each relayfs channel has one buffer per CPU, each buffer has one or
30more sub-buffers. Messages are written to the first sub-buffer until
31it is too full to contain a new message, in which case it it is
32written to the next (if available). Messages are never split across
33sub-buffers. At this point, userspace can be notified so it empties
34the first sub-buffer, while the kernel continues writing to the next.
35
36When notified that a sub-buffer is full, the kernel knows how many
37bytes of it are padding i.e. unused. Userspace can use this knowledge
38to copy only valid data.
39
40After copying it, userspace can notify the kernel that a sub-buffer
41has been consumed.
42
43relayfs can operate in a mode where it will overwrite data not yet
44collected by userspace, and not wait for it to consume it.
45
46relayfs itself does not provide for communication of such data between
47userspace and kernel, allowing the kernel side to remain simple and
48not impose a single interface on userspace. It does provide a set of
49examples and a separate helper though, described below.
50
51klog and relay-apps example code
52================================
53
54relayfs itself is ready to use, but to make things easier, a couple
55simple utility functions and a set of examples are provided.
56
57The relay-apps example tarball, available on the relayfs sourceforge
58site, contains a set of self-contained examples, each consisting of a
59pair of .c files containing boilerplate code for each of the user and
60kernel sides of a relayfs application; combined these two sets of
61boilerplate code provide glue to easily stream data to disk, without
62having to bother with mundane housekeeping chores.
63
64The 'klog debugging functions' patch (klog.patch in the relay-apps
65tarball) provides a couple of high-level logging functions to the
66kernel which allow writing formatted text or raw data to a channel,
67regardless of whether a channel to write into exists or not, or
68whether relayfs is compiled into the kernel or is configured as a
69module. These functions allow you to put unconditional 'trace'
70statements anywhere in the kernel or kernel modules; only when there
71is a 'klog handler' registered will data actually be logged (see the
72klog and kleak examples for details).
73
74It is of course possible to use relayfs from scratch i.e. without
75using any of the relay-apps example code or klog, but you'll have to
76implement communication between userspace and kernel, allowing both to
77convey the state of buffers (full, empty, amount of padding).
78
79klog and the relay-apps examples can be found in the relay-apps
80tarball on http://relayfs.sourceforge.net
81
82
83The relayfs user space API
84==========================
85
86relayfs implements basic file operations for user space access to
87relayfs channel buffer data. Here are the file operations that are
88available and some comments regarding their behavior:
89
90open() enables user to open an _existing_ buffer.
91
92mmap() results in channel buffer being mapped into the caller's
93 memory space. Note that you can't do a partial mmap - you must
94 map the entire file, which is NRBUF * SUBBUFSIZE.
95
96read() read the contents of a channel buffer. The bytes read are
97 'consumed' by the reader i.e. they won't be available again
98 to subsequent reads. If the channel is being used in
99 no-overwrite mode (the default), it can be read at any time
100 even if there's an active kernel writer. If the channel is
101 being used in overwrite mode and there are active channel
102 writers, results may be unpredictable - users should make
103 sure that all logging to the channel has ended before using
104 read() with overwrite mode.
105
106poll() POLLIN/POLLRDNORM/POLLERR supported. User applications are
107 notified when sub-buffer boundaries are crossed.
108
109close() decrements the channel buffer's refcount. When the refcount
110 reaches 0 i.e. when no process or kernel client has the buffer
111 open, the channel buffer is freed.
112
113
114In order for a user application to make use of relayfs files, the
115relayfs filesystem must be mounted. For example,
116
117 mount -t relayfs relayfs /mnt/relay
118
119NOTE: relayfs doesn't need to be mounted for kernel clients to create
120 or use channels - it only needs to be mounted when user space
121 applications need access to the buffer data.
122
123
124The relayfs kernel API
125======================
126
127Here's a summary of the API relayfs provides to in-kernel clients:
128
129
130 channel management functions:
131
132 relay_open(base_filename, parent, subbuf_size, n_subbufs,
133 callbacks)
134 relay_close(chan)
135 relay_flush(chan)
136 relay_reset(chan)
137 relayfs_create_dir(name, parent)
138 relayfs_remove_dir(dentry)
139 relayfs_create_file(name, parent, mode, fops, data)
140 relayfs_remove_file(dentry)
141
142 channel management typically called on instigation of userspace:
143
144 relay_subbufs_consumed(chan, cpu, subbufs_consumed)
145
146 write functions:
147
148 relay_write(chan, data, length)
149 __relay_write(chan, data, length)
150 relay_reserve(chan, length)
151
152 callbacks:
153
154 subbuf_start(buf, subbuf, prev_subbuf, prev_padding)
155 buf_mapped(buf, filp)
156 buf_unmapped(buf, filp)
157 create_buf_file(filename, parent, mode, buf, is_global)
158 remove_buf_file(dentry)
159
160 helper functions:
161
162 relay_buf_full(buf)
163 subbuf_start_reserve(buf, length)
164
165
166Creating a channel
167------------------
168
169relay_open() is used to create a channel, along with its per-cpu
170channel buffers. Each channel buffer will have an associated file
171created for it in the relayfs filesystem, which can be opened and
172mmapped from user space if desired. The files are named
173basename0...basenameN-1 where N is the number of online cpus, and by
174default will be created in the root of the filesystem. If you want a
175directory structure to contain your relayfs files, you can create it
176with relayfs_create_dir() and pass the parent directory to
177relay_open(). Clients are responsible for cleaning up any directory
178structure they create when the channel is closed - use
179relayfs_remove_dir() for that.
180
181The total size of each per-cpu buffer is calculated by multiplying the
182number of sub-buffers by the sub-buffer size passed into relay_open().
183The idea behind sub-buffers is that they're basically an extension of
184double-buffering to N buffers, and they also allow applications to
185easily implement random-access-on-buffer-boundary schemes, which can
186be important for some high-volume applications. The number and size
187of sub-buffers is completely dependent on the application and even for
188the same application, different conditions will warrant different
189values for these parameters at different times. Typically, the right
190values to use are best decided after some experimentation; in general,
191though, it's safe to assume that having only 1 sub-buffer is a bad
192idea - you're guaranteed to either overwrite data or lose events
193depending on the channel mode being used.
194
195Channel 'modes'
196---------------
197
198relayfs channels can be used in either of two modes - 'overwrite' or
199'no-overwrite'. The mode is entirely determined by the implementation
200of the subbuf_start() callback, as described below. In 'overwrite'
201mode, also known as 'flight recorder' mode, writes continuously cycle
202around the buffer and will never fail, but will unconditionally
203overwrite old data regardless of whether it's actually been consumed.
204In no-overwrite mode, writes will fail i.e. data will be lost, if the
205number of unconsumed sub-buffers equals the total number of
206sub-buffers in the channel. It should be clear that if there is no
207consumer or if the consumer can't consume sub-buffers fast enought,
208data will be lost in either case; the only difference is whether data
209is lost from the beginning or the end of a buffer.
210
211As explained above, a relayfs channel is made of up one or more
212per-cpu channel buffers, each implemented as a circular buffer
213subdivided into one or more sub-buffers. Messages are written into
214the current sub-buffer of the channel's current per-cpu buffer via the
215write functions described below. Whenever a message can't fit into
216the current sub-buffer, because there's no room left for it, the
217client is notified via the subbuf_start() callback that a switch to a
218new sub-buffer is about to occur. The client uses this callback to 1)
219initialize the next sub-buffer if appropriate 2) finalize the previous
220sub-buffer if appropriate and 3) return a boolean value indicating
221whether or not to actually go ahead with the sub-buffer switch.
222
223To implement 'no-overwrite' mode, the userspace client would provide
224an implementation of the subbuf_start() callback something like the
225following:
226
227static int subbuf_start(struct rchan_buf *buf,
228 void *subbuf,
229 void *prev_subbuf,
230 unsigned int prev_padding)
231{
232 if (prev_subbuf)
233 *((unsigned *)prev_subbuf) = prev_padding;
234
235 if (relay_buf_full(buf))
236 return 0;
237
238 subbuf_start_reserve(buf, sizeof(unsigned int));
239
240 return 1;
241}
242
243If the current buffer is full i.e. all sub-buffers remain unconsumed,
244the callback returns 0 to indicate that the buffer switch should not
245occur yet i.e. until the consumer has had a chance to read the current
246set of ready sub-buffers. For the relay_buf_full() function to make
247sense, the consumer is reponsible for notifying relayfs when
248sub-buffers have been consumed via relay_subbufs_consumed(). Any
249subsequent attempts to write into the buffer will again invoke the
250subbuf_start() callback with the same parameters; only when the
251consumer has consumed one or more of the ready sub-buffers will
252relay_buf_full() return 0, in which case the buffer switch can
253continue.
254
255The implementation of the subbuf_start() callback for 'overwrite' mode
256would be very similar:
257
258static int subbuf_start(struct rchan_buf *buf,
259 void *subbuf,
260 void *prev_subbuf,
261 unsigned int prev_padding)
262{
263 if (prev_subbuf)
264 *((unsigned *)prev_subbuf) = prev_padding;
265
266 subbuf_start_reserve(buf, sizeof(unsigned int));
267
268 return 1;
269}
270
271In this case, the relay_buf_full() check is meaningless and the
272callback always returns 1, causing the buffer switch to occur
273unconditionally. It's also meaningless for the client to use the
274relay_subbufs_consumed() function in this mode, as it's never
275consulted.
276
277The default subbuf_start() implementation, used if the client doesn't
278define any callbacks, or doesn't define the subbuf_start() callback,
279implements the simplest possible 'no-overwrite' mode i.e. it does
280nothing but return 0.
281
282Header information can be reserved at the beginning of each sub-buffer
283by calling the subbuf_start_reserve() helper function from within the
284subbuf_start() callback. This reserved area can be used to store
285whatever information the client wants. In the example above, room is
286reserved in each sub-buffer to store the padding count for that
287sub-buffer. This is filled in for the previous sub-buffer in the
288subbuf_start() implementation; the padding value for the previous
289sub-buffer is passed into the subbuf_start() callback along with a
290pointer to the previous sub-buffer, since the padding value isn't
291known until a sub-buffer is filled. The subbuf_start() callback is
292also called for the first sub-buffer when the channel is opened, to
293give the client a chance to reserve space in it. In this case the
294previous sub-buffer pointer passed into the callback will be NULL, so
295the client should check the value of the prev_subbuf pointer before
296writing into the previous sub-buffer.
297
298Writing to a channel
299--------------------
300
301kernel clients write data into the current cpu's channel buffer using
302relay_write() or __relay_write(). relay_write() is the main logging
303function - it uses local_irqsave() to protect the buffer and should be
304used if you might be logging from interrupt context. If you know
305you'll never be logging from interrupt context, you can use
306__relay_write(), which only disables preemption. These functions
307don't return a value, so you can't determine whether or not they
308failed - the assumption is that you wouldn't want to check a return
309value in the fast logging path anyway, and that they'll always succeed
310unless the buffer is full and no-overwrite mode is being used, in
311which case you can detect a failed write in the subbuf_start()
312callback by calling the relay_buf_full() helper function.
313
314relay_reserve() is used to reserve a slot in a channel buffer which
315can be written to later. This would typically be used in applications
316that need to write directly into a channel buffer without having to
317stage data in a temporary buffer beforehand. Because the actual write
318may not happen immediately after the slot is reserved, applications
319using relay_reserve() can keep a count of the number of bytes actually
320written, either in space reserved in the sub-buffers themselves or as
321a separate array. See the 'reserve' example in the relay-apps tarball
322at http://relayfs.sourceforge.net for an example of how this can be
323done. Because the write is under control of the client and is
324separated from the reserve, relay_reserve() doesn't protect the buffer
325at all - it's up to the client to provide the appropriate
326synchronization when using relay_reserve().
327
328Closing a channel
329-----------------
330
331The client calls relay_close() when it's finished using the channel.
332The channel and its associated buffers are destroyed when there are no
333longer any references to any of the channel buffers. relay_flush()
334forces a sub-buffer switch on all the channel buffers, and can be used
335to finalize and process the last sub-buffers before the channel is
336closed.
337
338Creating non-relay files
339------------------------
340
341relay_open() automatically creates files in the relayfs filesystem to
342represent the per-cpu kernel buffers; it's often useful for
343applications to be able to create their own files alongside the relay
344files in the relayfs filesystem as well e.g. 'control' files much like
345those created in /proc or debugfs for similar purposes, used to
346communicate control information between the kernel and user sides of a
347relayfs application. For this purpose the relayfs_create_file() and
348relayfs_remove_file() API functions exist. For relayfs_create_file(),
349the caller passes in a set of user-defined file operations to be used
350for the file and an optional void * to a user-specified data item,
351which will be accessible via inode->u.generic_ip (see the relay-apps
352tarball for examples). The file_operations are a required parameter
353to relayfs_create_file() and thus the semantics of these files are
354completely defined by the caller.
355
356See the relay-apps tarball at http://relayfs.sourceforge.net for
357examples of how these non-relay files are meant to be used.
358
359Creating relay files in other filesystems
360-----------------------------------------
361
362By default of course, relay_open() creates relay files in the relayfs
363filesystem. Because relay_file_operations is exported, however, it's
364also possible to create and use relay files in other pseudo-filesytems
365such as debugfs.
366
367For this purpose, two callback functions are provided,
368create_buf_file() and remove_buf_file(). create_buf_file() is called
369once for each per-cpu buffer from relay_open() to allow the client to
370create a file to be used to represent the corresponding buffer; if
371this callback is not defined, the default implementation will create
372and return a file in the relayfs filesystem to represent the buffer.
373The callback should return the dentry of the file created to represent
374the relay buffer. Note that the parent directory passed to
375relay_open() (and passed along to the callback), if specified, must
376exist in the same filesystem the new relay file is created in. If
377create_buf_file() is defined, remove_buf_file() must also be defined;
378it's responsible for deleting the file(s) created in create_buf_file()
379and is called during relay_close().
380
381The create_buf_file() implementation can also be defined in such a way
382as to allow the creation of a single 'global' buffer instead of the
383default per-cpu set. This can be useful for applications interested
384mainly in seeing the relative ordering of system-wide events without
385the need to bother with saving explicit timestamps for the purpose of
386merging/sorting per-cpu files in a postprocessing step.
387
388To have relay_open() create a global buffer, the create_buf_file()
389implementation should set the value of the is_global outparam to a
390non-zero value in addition to creating the file that will be used to
391represent the single buffer. In the case of a global buffer,
392create_buf_file() and remove_buf_file() will be called only once. The
393normal channel-writing functions e.g. relay_write() can still be used
394- writes from any cpu will transparently end up in the global buffer -
395but since it is a global buffer, callers should make sure they use the
396proper locking for such a buffer, either by wrapping writes in a
397spinlock, or by copying a write function from relayfs_fs.h and
398creating a local version that internally does the proper locking.
399
400See the 'exported-relayfile' examples in the relay-apps tarball for
401examples of creating and using relay files in debugfs.
402
403Misc
404----
405
406Some applications may want to keep a channel around and re-use it
407rather than open and close a new channel for each use. relay_reset()
408can be used for this purpose - it resets a channel to its initial
409state without reallocating channel buffer memory or destroying
410existing mappings. It should however only be called when it's safe to
411do so i.e. when the channel isn't currently being written to.
412
413Finally, there are a couple of utility callbacks that can be used for
414different purposes. buf_mapped() is called whenever a channel buffer
415is mmapped from user space and buf_unmapped() is called when it's
416unmapped. The client can use this notification to trigger actions
417within the kernel application, such as enabling/disabling logging to
418the channel.
419
420
421Resources
422=========
423
424For news, example code, mailing list, etc. see the relayfs homepage:
425
426 http://relayfs.sourceforge.net
427
428
429Credits
430=======
431
432The ideas and specs for relayfs came about as a result of discussions
433on tracing involving the following:
434
435Michel Dagenais <michel.dagenais@polymtl.ca>
436Richard Moore <richardj_moore@uk.ibm.com>
437Bob Wisniewski <bob@watson.ibm.com>
438Karim Yaghmour <karim@opersys.com>
439Tom Zanussi <zanussi@us.ibm.com>
440
441Also thanks to Hubertus Franke for a lot of useful suggestions and bug
442reports.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 9d3aed628bc1..1cb7e8be927a 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -113,8 +113,8 @@ members are defined:
113struct file_system_type { 113struct file_system_type {
114 const char *name; 114 const char *name;
115 int fs_flags; 115 int fs_flags;
116 struct int (*get_sb) (struct file_system_type *, int, 116 int (*get_sb) (struct file_system_type *, int,
117 const char *, void *, struct vfsmount *); 117 const char *, void *, struct vfsmount *);
118 void (*kill_sb) (struct super_block *); 118 void (*kill_sb) (struct super_block *);
119 struct module *owner; 119 struct module *owner;
120 struct file_system_type * next; 120 struct file_system_type * next;
diff --git a/Documentation/hwmon/abituguru b/Documentation/hwmon/abituguru
index 69cdb527d58f..b2c0d61b39a2 100644
--- a/Documentation/hwmon/abituguru
+++ b/Documentation/hwmon/abituguru
@@ -2,13 +2,36 @@ Kernel driver abituguru
2======================= 2=======================
3 3
4Supported chips: 4Supported chips:
5 * Abit uGuru (Hardware Monitor part only) 5 * Abit uGuru revision 1-3 (Hardware Monitor part only)
6 Prefix: 'abituguru' 6 Prefix: 'abituguru'
7 Addresses scanned: ISA 0x0E0 7 Addresses scanned: ISA 0x0E0
8 Datasheet: Not available, this driver is based on reverse engineering. 8 Datasheet: Not available, this driver is based on reverse engineering.
9 A "Datasheet" has been written based on the reverse engineering it 9 A "Datasheet" has been written based on the reverse engineering it
10 should be available in the same dir as this file under the name 10 should be available in the same dir as this file under the name
11 abituguru-datasheet. 11 abituguru-datasheet.
12 Note:
13 The uGuru is a microcontroller with onboard firmware which programs
14 it to behave as a hwmon IC. There are many different revisions of the
15 firmware and thus effectivly many different revisions of the uGuru.
16 Below is an incomplete list with which revisions are used for which
17 Motherboards:
18 uGuru 1.00 ~ 1.24 (AI7, KV8-MAX3, AN7) (1)
19 uGuru 2.0.0.0 ~ 2.0.4.2 (KV8-PRO)
20 uGuru 2.1.0.0 ~ 2.1.2.8 (AS8, AV8, AA8, AG8, AA8XE, AX8)
21 uGuru 2.2.0.0 ~ 2.2.0.6 (AA8 Fatal1ty)
22 uGuru 2.3.0.0 ~ 2.3.0.9 (AN8)
23 uGuru 3.0.0.0 ~ 3.0.1.2 (AW8, AL8, NI8)
24 uGuru 4.xxxxx? (AT8 32X) (2)
25 1) For revisions 2 and 3 uGuru's the driver can autodetect the
26 sensortype (Volt or Temp) for bank1 sensors, for revision 1 uGuru's
27 this doesnot always work. For these uGuru's the autodection can
28 be overriden with the bank1_types module param. For all 3 known
29 revison 1 motherboards the correct use of this param is:
30 bank1_types=1,1,0,0,0,0,0,2,0,0,0,0,2,0,0,1
31 You may also need to specify the fan_sensors option for these boards
32 fan_sensors=5
33 2) The current version of the abituguru driver is known to NOT work
34 on these Motherboards
12 35
13Authors: 36Authors:
14 Hans de Goede <j.w.r.degoede@hhs.nl>, 37 Hans de Goede <j.w.r.degoede@hhs.nl>,
@@ -22,6 +45,11 @@ Module Parameters
22* force: bool Force detection. Note this parameter only causes the 45* force: bool Force detection. Note this parameter only causes the
23 detection to be skipped, if the uGuru can't be read 46 detection to be skipped, if the uGuru can't be read
24 the module initialization (insmod) will still fail. 47 the module initialization (insmod) will still fail.
48* bank1_types: int[] Bank1 sensortype autodetection override:
49 -1 autodetect (default)
50 0 volt sensor
51 1 temp sensor
52 2 not connected
25* fan_sensors: int Tell the driver how many fan speed sensors there are 53* fan_sensors: int Tell the driver how many fan speed sensors there are
26 on your motherboard. Default: 0 (autodetect). 54 on your motherboard. Default: 0 (autodetect).
27* pwms: int Tell the driver how many fan speed controls (fan 55* pwms: int Tell the driver how many fan speed controls (fan
@@ -29,7 +57,7 @@ Module Parameters
29* verbose: int How verbose should the driver be? (0-3): 57* verbose: int How verbose should the driver be? (0-3):
30 0 normal output 58 0 normal output
31 1 + verbose error reporting 59 1 + verbose error reporting
32 2 + sensors type probing info\n" 60 2 + sensors type probing info (default)
33 3 + retryable error reporting 61 3 + retryable error reporting
34 Default: 2 (the driver is still in the testing phase) 62 Default: 2 (the driver is still in the testing phase)
35 63
diff --git a/Documentation/i2c/busses/i2c-sis96x b/Documentation/i2c/busses/i2c-sis96x
index 00a009b977e9..08d7b2dac69a 100644
--- a/Documentation/i2c/busses/i2c-sis96x
+++ b/Documentation/i2c/busses/i2c-sis96x
@@ -42,8 +42,8 @@ I suspect that this driver could be made to work for the following SiS
42chipsets as well: 635, and 635T. If anyone owns a board with those chips 42chipsets as well: 635, and 635T. If anyone owns a board with those chips
43AND is willing to risk crashing & burning an otherwise well-behaved kernel 43AND is willing to risk crashing & burning an otherwise well-behaved kernel
44in the name of progress... please contact me at <mhoffman@lightlink.com> or 44in the name of progress... please contact me at <mhoffman@lightlink.com> or
45via the project's mailing list: <lm-sensors@lm-sensors.org>. Please 45via the project's mailing list: <i2c@lm-sensors.org>. Please send bug
46send bug reports and/or success stories as well. 46reports and/or success stories as well.
47 47
48 48
49TO DOs 49TO DOs
diff --git a/Documentation/i386/boot.txt b/Documentation/i386/boot.txt
index 10312bebe55d..c51314b1a463 100644
--- a/Documentation/i386/boot.txt
+++ b/Documentation/i386/boot.txt
@@ -181,6 +181,7 @@ filled out, however:
181 5 ELILO 181 5 ELILO
182 7 GRuB 182 7 GRuB
183 8 U-BOOT 183 8 U-BOOT
184 9 Xen
184 185
185 Please contact <hpa@zytor.com> if you need a bootloader ID 186 Please contact <hpa@zytor.com> if you need a bootloader ID
186 value assigned. 187 value assigned.
diff --git a/Documentation/i386/zero-page.txt b/Documentation/i386/zero-page.txt
index df28c7416781..c04a421f4a7c 100644
--- a/Documentation/i386/zero-page.txt
+++ b/Documentation/i386/zero-page.txt
@@ -63,6 +63,10 @@ Offset Type Description
63 2 for bootsect-loader 63 2 for bootsect-loader
64 3 for SYSLINUX 64 3 for SYSLINUX
65 4 for ETHERBOOT 65 4 for ETHERBOOT
66 5 for ELILO
67 7 for GRuB
68 8 for U-BOOT
69 9 for Xen
66 V = version 70 V = version
670x211 char loadflags: 710x211 char loadflags:
68 bit0 = 1: kernel is loaded high (bzImage) 72 bit0 = 1: kernel is loaded high (bzImage)
diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
index 187035560d7f..864ff3283780 100644
--- a/Documentation/infiniband/ipoib.txt
+++ b/Documentation/infiniband/ipoib.txt
@@ -51,8 +51,6 @@ Debugging Information
51 51
52References 52References
53 53
54 IETF IP over InfiniBand (ipoib) Working Group
55 http://ietf.org/html.charters/ipoib-charter.html
56 Transmission of IP over InfiniBand (IPoIB) (RFC 4391) 54 Transmission of IP over InfiniBand (IPoIB) (RFC 4391)
57 http://ietf.org/rfc/rfc4391.txt 55 http://ietf.org/rfc/rfc4391.txt
58 IP over InfiniBand (IPoIB) Architecture (RFC 4392) 56 IP over InfiniBand (IPoIB) Architecture (RFC 4392)
diff --git a/Documentation/initrd.txt b/Documentation/initrd.txt
index b1b6440237a6..15f1b35deb34 100644
--- a/Documentation/initrd.txt
+++ b/Documentation/initrd.txt
@@ -72,6 +72,22 @@ initrd adds the following new options:
72 initrd is mounted as root, and the normal boot procedure is followed, 72 initrd is mounted as root, and the normal boot procedure is followed,
73 with the RAM disk still mounted as root. 73 with the RAM disk still mounted as root.
74 74
75Compressed cpio images
76----------------------
77
78Recent kernels have support for populating a ramdisk from a compressed cpio
79archive, on such systems, the creation of a ramdisk image doesn't need to
80involve special block devices or loopbacks, you merely create a directory on
81disk with the desired initrd content, cd to that directory, and run (as an
82example):
83
84find . | cpio --quiet -c -o | gzip -9 -n > /boot/imagefile.img
85
86Examining the contents of an existing image file is just as simple:
87
88mkdir /tmp/imagefile
89cd /tmp/imagefile
90gzip -cd /boot/imagefile.img | cpio -imd --quiet
75 91
76Installation 92Installation
77------------ 93------------
diff --git a/Documentation/input/joystick.txt b/Documentation/input/joystick.txt
index d53b857a3710..841c353297e6 100644
--- a/Documentation/input/joystick.txt
+++ b/Documentation/input/joystick.txt
@@ -39,7 +39,6 @@ them. Bug reports and success stories are also welcome.
39 39
40 The input project website is at: 40 The input project website is at:
41 41
42 http://www.suse.cz/development/input/
43 http://atrey.karlin.mff.cuni.cz/~vojtech/input/ 42 http://atrey.karlin.mff.cuni.cz/~vojtech/input/
44 43
45 There is also a mailing list for the driver at: 44 There is also a mailing list for the driver at:
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index 14ef3868a328..0706699c9da9 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -407,6 +407,20 @@ more details, with real examples.
407 The second argument is optional, and if supplied will be used 407 The second argument is optional, and if supplied will be used
408 if first argument is not supported. 408 if first argument is not supported.
409 409
410 ld-option
411 ld-option is used to check if $(CC) when used to link object files
412 supports the given option. An optional second option may be
413 specified if first option are not supported.
414
415 Example:
416 #arch/i386/kernel/Makefile
417 vsyscall-flags += $(call ld-option, -Wl$(comma)--hash-style=sysv)
418
419 In the above example vsyscall-flags will be assigned the option
420 -Wl$(comma)--hash-style=sysv if it is supported by $(CC).
421 The second argument is optional, and if supplied will be used
422 if first argument is not supported.
423
410 cc-option 424 cc-option
411 cc-option is used to check if $(CC) support a given option, and not 425 cc-option is used to check if $(CC) support a given option, and not
412 supported to use an optional second option. 426 supported to use an optional second option.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 149f62ba14a5..87a17337c7f6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -697,6 +697,12 @@ running once the system is up.
697 ips= [HW,SCSI] Adaptec / IBM ServeRAID controller 697 ips= [HW,SCSI] Adaptec / IBM ServeRAID controller
698 See header of drivers/scsi/ips.c. 698 See header of drivers/scsi/ips.c.
699 699
700 ports= [IP_VS_FTP] IPVS ftp helper module
701 Default is 21.
702 Up to 8 (IP_VS_APP_MAX_PORTS) ports
703 may be specified.
704 Format: <port>,<port>....
705
700 irqfixup [HW] 706 irqfixup [HW]
701 When an interrupt is not handled search all handlers 707 When an interrupt is not handled search all handlers
702 for it. Intended to get systems with badly broken 708 for it. Intended to get systems with badly broken
@@ -1029,6 +1035,8 @@ running once the system is up.
1029 1035
1030 nocache [ARM] 1036 nocache [ARM]
1031 1037
1038 nodelayacct [KNL] Disable per-task delay accounting
1039
1032 nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects. 1040 nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects.
1033 1041
1034 noexec [IA-64] 1042 noexec [IA-64]
@@ -1181,6 +1189,8 @@ running once the system is up.
1181 Mechanism 2. 1189 Mechanism 2.
1182 nommconf [IA-32,X86_64] Disable use of MMCONFIG for PCI 1190 nommconf [IA-32,X86_64] Disable use of MMCONFIG for PCI
1183 Configuration 1191 Configuration
1192 mmconf [IA-32,X86_64] Force MMCONFIG. This is useful
1193 to override the builtin blacklist.
1184 nomsi [MSI] If the PCI_MSI kernel config parameter is 1194 nomsi [MSI] If the PCI_MSI kernel config parameter is
1185 enabled, this kernel boot option can be used to 1195 enabled, this kernel boot option can be used to
1186 disable the use of MSI interrupts system-wide. 1196 disable the use of MSI interrupts system-wide.
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index 8d9bffbd192c..949f7b5a2053 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -247,7 +247,7 @@ the object-specific fields, which include:
247- default_attrs: Default attributes to be exported via sysfs when the 247- default_attrs: Default attributes to be exported via sysfs when the
248 object is registered.Note that the last attribute has to be 248 object is registered.Note that the last attribute has to be
249 initialized to NULL ! You can find a complete implementation 249 initialized to NULL ! You can find a complete implementation
250 in drivers/block/genhd.c 250 in block/genhd.c
251 251
252 252
253Instances of struct kobj_type are not registered; only referenced by 253Instances of struct kobj_type are not registered; only referenced by
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 28d1bc3edb1c..46b9b389df35 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1015,10 +1015,9 @@ CPU from reordering them.
1015There are some more advanced barrier functions: 1015There are some more advanced barrier functions:
1016 1016
1017 (*) set_mb(var, value) 1017 (*) set_mb(var, value)
1018 (*) set_wmb(var, value)
1019 1018
1020 These assign the value to the variable and then insert at least a write 1019 This assigns the value to the variable and then inserts at least a write
1021 barrier after it, depending on the function. They aren't guaranteed to 1020 barrier after it, depending on the function. It isn't guaranteed to
1022 insert anything more than a compiler barrier in a UP compilation. 1021 insert anything more than a compiler barrier in a UP compilation.
1023 1022
1024 1023
diff --git a/Documentation/mips/time.README b/Documentation/mips/time.README
index 70bc0dd43d6d..69ddc5c14b79 100644
--- a/Documentation/mips/time.README
+++ b/Documentation/mips/time.README
@@ -65,7 +65,7 @@ the following functions or values:
65 1. (optional) set up RTC routines 65 1. (optional) set up RTC routines
66 2. (optional) calibrate and set the mips_counter_frequency 66 2. (optional) calibrate and set the mips_counter_frequency
67 67
68 b) board_timer_setup - a function pointer. Invoked at the end of time_init() 68 b) plat_timer_setup - a function pointer. Invoked at the end of time_init()
69 1. (optional) over-ride any decisions made in time_init() 69 1. (optional) over-ride any decisions made in time_init()
70 2. set up the irqaction for timer interrupt. 70 2. set up the irqaction for timer interrupt.
71 3. enable the timer interrupt 71 3. enable the timer interrupt
@@ -116,19 +116,17 @@ Step 2: the machine setup() function
116 116
117 If you supply board_time_init(), set the function poointer. 117 If you supply board_time_init(), set the function poointer.
118 118
119 Set the function pointer board_timer_setup() (mandatory)
120 119
121 120Step 3: implement rtc routines, board_time_init() and plat_timer_setup()
122Step 3: implement rtc routines, board_time_init() and board_timer_setup()
123 if needed. 121 if needed.
124 122
125 board_time_init() - 123 board_time_init() -
126 a) (optional) set up RTC routines, 124 a) (optional) set up RTC routines,
127 b) (optional) calibrate and set the mips_counter_frequency 125 b) (optional) calibrate and set the mips_counter_frequency
128 (only needed if you intended to use fixed_rate_gettimeoffset 126 (only needed if you intended to use fixed_rate_gettimeoffset
129 or use cpu counter as timer interrupt source) 127 or use cpu counter as timer interrupt source)
130 128
131 board_timer_setup() - 129 plat_timer_setup() -
132 a) (optional) over-write any choices made above by time_init(). 130 a) (optional) over-write any choices made above by time_init().
133 b) machine specific code should setup the timer irqaction. 131 b) machine specific code should setup the timer irqaction.
134 c) enable the timer interrupt 132 c) enable the timer interrupt
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index d46338af6002..90ed78110fd4 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -102,9 +102,15 @@ inet_peer_gc_maxtime - INTEGER
102TCP variables: 102TCP variables:
103 103
104tcp_abc - INTEGER 104tcp_abc - INTEGER
105 Controls Appropriate Byte Count defined in RFC3465. If set to 105 Controls Appropriate Byte Count (ABC) defined in RFC3465.
106 0 then does congestion avoid once per ack. 1 is conservative 106 ABC is a way of increasing congestion window (cwnd) more slowly
107 value, and 2 is more agressive. 107 in response to partial acknowledgments.
108 Possible values are:
109 0 increase cwnd once per acknowledgment (no ABC)
110 1 increase cwnd once per acknowledgment of full sized segment
111 2 allow increase cwnd by two if acknowledgment is
112 of two segments to compensate for delayed acknowledgments.
113 Default: 0 (off)
108 114
109tcp_syn_retries - INTEGER 115tcp_syn_retries - INTEGER
110 Number of times initial SYNs for an active TCP connection attempt 116 Number of times initial SYNs for an active TCP connection attempt
@@ -294,15 +300,15 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
294 Default: 87380*2 bytes. 300 Default: 87380*2 bytes.
295 301
296tcp_mem - vector of 3 INTEGERs: min, pressure, max 302tcp_mem - vector of 3 INTEGERs: min, pressure, max
297 low: below this number of pages TCP is not bothered about its 303 min: below this number of pages TCP is not bothered about its
298 memory appetite. 304 memory appetite.
299 305
300 pressure: when amount of memory allocated by TCP exceeds this number 306 pressure: when amount of memory allocated by TCP exceeds this number
301 of pages, TCP moderates its memory consumption and enters memory 307 of pages, TCP moderates its memory consumption and enters memory
302 pressure mode, which is exited when memory consumption falls 308 pressure mode, which is exited when memory consumption falls
303 under "low". 309 under "min".
304 310
305 high: number of pages allowed for queueing by all TCP sockets. 311 max: number of pages allowed for queueing by all TCP sockets.
306 312
307 Defaults are calculated at boot time from amount of available 313 Defaults are calculated at boot time from amount of available
308 memory. 314 memory.
diff --git a/Documentation/nfsroot.txt b/Documentation/nfsroot.txt
index d56dc71d9430..3cc953cb288f 100644
--- a/Documentation/nfsroot.txt
+++ b/Documentation/nfsroot.txt
@@ -4,15 +4,16 @@ Mounting the root filesystem via NFS (nfsroot)
4Written 1996 by Gero Kuhlmann <gero@gkminix.han.de> 4Written 1996 by Gero Kuhlmann <gero@gkminix.han.de>
5Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz> 5Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
6Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org> 6Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org>
7Updated 2006 by Horms <horms@verge.net.au>
7 8
8 9
9 10
10If you want to use a diskless system, as an X-terminal or printer 11In order to use a diskless system, such as an X-terminal or printer server
11server for example, you have to put your root filesystem onto a 12for example, it is necessary for the root filesystem to be present on a
12non-disk device. This can either be a ramdisk (see initrd.txt in 13non-disk device. This may be an initramfs (see Documentation/filesystems/
13this directory for further information) or a filesystem mounted 14ramfs-rootfs-initramfs.txt), a ramdisk (see Documenation/initrd.txt) or a
14via NFS. The following text describes on how to use NFS for the 15filesystem mounted via NFS. The following text describes on how to use NFS
15root filesystem. For the rest of this text 'client' means the 16for the root filesystem. For the rest of this text 'client' means the
16diskless system, and 'server' means the NFS server. 17diskless system, and 'server' means the NFS server.
17 18
18 19
@@ -21,11 +22,13 @@ diskless system, and 'server' means the NFS server.
211.) Enabling nfsroot capabilities 221.) Enabling nfsroot capabilities
22 ----------------------------- 23 -----------------------------
23 24
24In order to use nfsroot you have to select support for NFS during 25In order to use nfsroot, NFS client support needs to be selected as
25kernel configuration. Note that NFS cannot be loaded as a module 26built-in during configuration. Once this has been selected, the nfsroot
26in this case. The configuration script will then ask you whether 27option will become available, which should also be selected.
27you want to use nfsroot, and if yes what kind of auto configuration 28
28system you want to use. Selecting both BOOTP and RARP is safe. 29In the networking options, kernel level autoconfiguration can be selected,
30along with the types of autoconfiguration to support. Selecting all of
31DHCP, BOOTP and RARP is safe.
29 32
30 33
31 34
@@ -33,11 +36,10 @@ system you want to use. Selecting both BOOTP and RARP is safe.
332.) Kernel command line 362.) Kernel command line
34 ------------------- 37 -------------------
35 38
36When the kernel has been loaded by a boot loader (either by loadlin, 39When the kernel has been loaded by a boot loader (see below) it needs to be
37LILO or a network boot program) it has to be told what root fs device 40told what root fs device to use. And in the case of nfsroot, where to find
38to use, and where to find the server and the name of the directory 41both the server and the name of the directory on the server to mount as root.
39on the server to mount as root. This can be established by a couple 42This can be established using the following kernel command line parameters:
40of kernel command line parameters:
41 43
42 44
43root=/dev/nfs 45root=/dev/nfs
@@ -49,23 +51,21 @@ root=/dev/nfs
49 51
50nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>] 52nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
51 53
52 If the `nfsroot' parameter is NOT given on the command line, the default 54 If the `nfsroot' parameter is NOT given on the command line,
53 "/tftpboot/%s" will be used. 55 the default "/tftpboot/%s" will be used.
54 56
55 <server-ip> Specifies the IP address of the NFS server. If this field 57 <server-ip> Specifies the IP address of the NFS server.
56 is not given, the default address as determined by the 58 The default address is determined by the `ip' parameter
57 `ip' variable (see below) is used. One use of this 59 (see below). This parameter allows the use of different
58 parameter is for example to allow using different servers 60 servers for IP autoconfiguration and NFS.
59 for RARP and NFS. Usually you can leave this blank.
60 61
61 <root-dir> Name of the directory on the server to mount as root. If 62 <root-dir> Name of the directory on the server to mount as root.
62 there is a "%s" token in the string, the token will be 63 If there is a "%s" token in the string, it will be
63 replaced by the ASCII-representation of the client's IP 64 replaced by the ASCII-representation of the client's
64 address. 65 IP address.
65 66
66 <nfs-options> Standard NFS options. All options are separated by commas. 67 <nfs-options> Standard NFS options. All options are separated by commas.
67 If the options field is not given, the following defaults 68 The following defaults are used:
68 will be used:
69 port = as given by server portmap daemon 69 port = as given by server portmap daemon
70 rsize = 1024 70 rsize = 1024
71 wsize = 1024 71 wsize = 1024
@@ -81,129 +81,174 @@ nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
81ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf> 81ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>
82 82
83 This parameter tells the kernel how to configure IP addresses of devices 83 This parameter tells the kernel how to configure IP addresses of devices
84 and also how to set up the IP routing table. It was originally called `nfsaddrs', 84 and also how to set up the IP routing table. It was originally called
85 but now the boot-time IP configuration works independently of NFS, so it 85 `nfsaddrs', but now the boot-time IP configuration works independently of
86 was renamed to `ip' and the old name remained as an alias for compatibility 86 NFS, so it was renamed to `ip' and the old name remained as an alias for
87 reasons. 87 compatibility reasons.
88 88
89 If this parameter is missing from the kernel command line, all fields are 89 If this parameter is missing from the kernel command line, all fields are
90 assumed to be empty, and the defaults mentioned below apply. In general 90 assumed to be empty, and the defaults mentioned below apply. In general
91 this means that the kernel tries to configure everything using both 91 this means that the kernel tries to configure everything using
92 RARP and BOOTP (depending on what has been enabled during kernel confi- 92 autoconfiguration.
93 guration, and if both what protocol answer got in first). 93
94 The <autoconf> parameter can appear alone as the value to the `ip'
95 parameter (without all the ':' characters before) in which case auto-
96 configuration is used.
97
98 <client-ip> IP address of the client.
94 99
95 <client-ip> IP address of the client. If empty, the address will either 100 Default: Determined using autoconfiguration.
96 be determined by RARP or BOOTP. What protocol is used de-
97 pends on what has been enabled during kernel configuration
98 and on the <autoconf> parameter. If this parameter is not
99 empty, neither RARP nor BOOTP will be used.
100 101
101 <server-ip> IP address of the NFS server. If RARP is used to determine 102 <server-ip> IP address of the NFS server. If RARP is used to determine
102 the client address and this parameter is NOT empty only 103 the client address and this parameter is NOT empty only
103 replies from the specified server are accepted. To use 104 replies from the specified server are accepted.
104 different RARP and NFS server, specify your RARP server 105
105 here (or leave it blank), and specify your NFS server in 106 Only required for for NFS root. That is autoconfiguration
106 the `nfsroot' parameter (see above). If this entry is blank 107 will not be triggered if it is missing and NFS root is not
107 the address of the server is used which answered the RARP 108 in operation.
108 or BOOTP request. 109
109 110 Default: Determined using autoconfiguration.
110 <gw-ip> IP address of a gateway if the server is on a different 111 The address of the autoconfiguration server is used.
111 subnet. If this entry is empty no gateway is used and the 112
112 server is assumed to be on the local network, unless a 113 <gw-ip> IP address of a gateway if the server is on a different subnet.
113 value has been received by BOOTP. 114
114 115 Default: Determined using autoconfiguration.
115 <netmask> Netmask for local network interface. If this is empty, 116
117 <netmask> Netmask for local network interface. If unspecified
116 the netmask is derived from the client IP address assuming 118 the netmask is derived from the client IP address assuming
117 classful addressing, unless overridden in BOOTP reply. 119 classful addressing.
118 120
119 <hostname> Name of the client. If empty, the client IP address is 121 Default: Determined using autoconfiguration.
120 used in ASCII notation, or the value received by BOOTP.
121 122
122 <device> Name of network device to use. If this is empty, all 123 <hostname> Name of the client. May be supplied by autoconfiguration,
123 devices are used for RARP and BOOTP requests, and the 124 but its absence will not trigger autoconfiguration.
124 first one we receive a reply on is configured. If you have
125 only one device, you can safely leave this blank.
126 125
127 <autoconf> Method to use for autoconfiguration. If this is either 126 Default: Client IP address is used in ASCII notation.
128 'rarp' or 'bootp', the specified protocol is used.
129 If the value is 'both' or empty, both protocols are used
130 so far as they have been enabled during kernel configura-
131 tion. 'off' means no autoconfiguration.
132 127
133 The <autoconf> parameter can appear alone as the value to the `ip' 128 <device> Name of network device to use.
134 parameter (without all the ':' characters before) in which case auto- 129
135 configuration is used. 130 Default: If the host only has one device, it is used.
131 Otherwise the device is determined using
132 autoconfiguration. This is done by sending
133 autoconfiguration requests out of all devices,
134 and using the device that received the first reply.
136 135
136 <autoconf> Method to use for autoconfiguration. In the case of options
137 which specify multiple autoconfiguration protocols,
138 requests are sent using all protocols, and the first one
139 to reply is used.
137 140
141 Only autoconfiguration protocols that have been compiled
142 into the kernel will be used, regardless of the value of
143 this option.
138 144
145 off or none: don't use autoconfiguration (default)
146 on or any: use any protocol available in the kernel
147 dhcp: use DHCP
148 bootp: use BOOTP
149 rarp: use RARP
150 both: use both BOOTP and RARP but not DHCP
151 (old option kept for backwards compatibility)
139 152
1403.) Kernel loader 153 Default: any
141 -------------
142 154
143To get the kernel into memory different approaches can be used. They
144depend on what facilities are available:
145 155
146 156
1473.1) Writing the kernel onto a floppy using dd:
148 As always you can just write the kernel onto a floppy using dd,
149 but then it's not possible to use kernel command lines at all.
150 To substitute the 'root=' parameter, create a dummy device on any
151 linux system with major number 0 and minor number 255 using mknod:
152 157
153 mknod /dev/boot255 c 0 255 1583.) Boot Loader
159 ----------
154 160
155 Then copy the kernel zImage file onto a floppy using dd: 161To get the kernel into memory different approaches can be used.
162They depend on various facilities being available:
156 163
157 dd if=/usr/src/linux/arch/i386/boot/zImage of=/dev/fd0
158 164
159 And finally use rdev to set the root device: 1653.1) Booting from a floppy using syslinux
160 166
161 rdev /dev/fd0 /dev/boot255 167 When building kernels, an easy way to create a boot floppy that uses
168 syslinux is to use the zdisk or bzdisk make targets which use
169 and bzimage images respectively. Both targets accept the
170 FDARGS parameter which can be used to set the kernel command line.
162 171
163 You can then remove the dummy device /dev/boot255 again. There 172 e.g.
164 is no real device available for it. 173 make bzdisk FDARGS="root=/dev/nfs"
165 The other two kernel command line parameters cannot be substi- 174
166 tuted with rdev. Therefore, using this method the kernel will 175 Note that the user running this command will need to have
167 by default use RARP and/or BOOTP, and if it gets an answer via 176 access to the floppy drive device, /dev/fd0
168 RARP will mount the directory /tftpboot/<client-ip>/ as its 177
169 root. If it got a BOOTP answer the directory name in that answer 178 For more information on syslinux, including how to create bootdisks
170 is used. 179 for prebuilt kernels, see http://syslinux.zytor.com/
180
181 N.B: Previously it was possible to write a kernel directly to
182 a floppy using dd, configure the boot device using rdev, and
183 boot using the resulting floppy. Linux no longer supports this
184 method of booting.
185
1863.2) Booting from a cdrom using isolinux
187
188 When building kernels, an easy way to create a bootable cdrom that
189 uses isolinux is to use the isoimage target which uses a bzimage
190 image. Like zdisk and bzdisk, this target accepts the FDARGS
191 parameter which can be used to set the kernel command line.
192
193 e.g.
194 make isoimage FDARGS="root=/dev/nfs"
195
196 The resulting iso image will be arch/<ARCH>/boot/image.iso
197 This can be written to a cdrom using a variety of tools including
198 cdrecord.
199
200 e.g.
201 cdrecord dev=ATAPI:1,0,0 arch/i386/boot/image.iso
202
203 For more information on isolinux, including how to create bootdisks
204 for prebuilt kernels, see http://syslinux.zytor.com/
171 205
1723.2) Using LILO 2063.2) Using LILO
173 When using LILO you can specify all necessary command line 207 When using LILO all the necessary command line parameters may be
174 parameters with the 'append=' command in the LILO configuration 208 specified using the 'append=' directive in the LILO configuration
175 file. However, to use the 'root=' command you also need to 209 file.
176 set up a dummy device as described in 3.1 above. For how to use 210
177 LILO and its 'append=' command please refer to the LILO 211 However, to use the 'root=' directive you also need to create
178 documentation. 212 a dummy root device, which may be removed after LILO is run.
213
214 mknod /dev/boot255 c 0 255
215
216 For information on configuring LILO, please refer to its documentation.
179 217
1803.3) Using GRUB 2183.3) Using GRUB
181 When you use GRUB, you simply append the parameters after the kernel 219 When using GRUB, kernel parameter are simply appended after the kernel
182 specification: "kernel <kernel> <parameters>" (without the quotes). 220 specification: kernel <kernel> <parameters>
183 221
1843.4) Using loadlin 2223.4) Using loadlin
185 When you want to boot Linux from a DOS command prompt without 223 loadlin may be used to boot Linux from a DOS command prompt without
186 having a local hard disk to mount as root, you can use loadlin. 224 requiring a local hard disk to mount as root. This has not been
187 I was told that it works, but haven't used it myself yet. In 225 thoroughly tested by the authors of this document, but in general
188 general you should be able to create a kernel command line simi- 226 it should be possible configure the kernel command line similarly
189 lar to how LILO is doing it. Please refer to the loadlin docu- 227 to the configuration of LILO.
190 mentation for further information. 228
229 Please refer to the loadlin documentation for further information.
191 230
1923.5) Using a boot ROM 2313.5) Using a boot ROM
193 This is probably the most elegant way of booting a diskless 232 This is probably the most elegant way of booting a diskless client.
194 client. With a boot ROM the kernel gets loaded using the TFTP 233 With a boot ROM the kernel is loaded using the TFTP protocol. The
195 protocol. As far as I know, no commercial boot ROMs yet 234 authors of this document are not aware of any no commercial boot
196 support booting Linux over the network, but there are two 235 ROMs that support booting Linux over the network. However, there
197 free implementations of a boot ROM available on sunsite.unc.edu 236 are two free implementations of a boot ROM, netboot-nfs and
198 and its mirrors. They are called 'netboot-nfs' and 'etherboot'. 237 etherboot, both of which are available on sunsite.unc.edu, and both
199 Both contain everything you need to boot a diskless Linux client. 238 of which contain everything you need to boot a diskless Linux client.
200 239
2013.6) Using pxelinux 2403.6) Using pxelinux
202 Using pxelinux you specify the kernel you built with 241 Pxelinux may be used to boot linux using the PXE boot loader
242 which is present on many modern network cards.
243
244 When using pxelinux, the kernel image is specified using
203 "kernel <relative-path-below /tftpboot>". The nfsroot parameters 245 "kernel <relative-path-below /tftpboot>". The nfsroot parameters
204 are passed to the kernel by adding them to the "append" line. 246 are passed to the kernel by adding them to the "append" line.
205 You may perhaps also want to fine tune the console output, 247 It is common to use serial console in conjunction with pxeliunx,
206 see Documentation/serial-console.txt for serial console help. 248 see Documentation/serial-console.txt for more information.
249
250 For more information on isolinux, including how to create bootdisks
251 for prebuilt kernels, see http://syslinux.zytor.com/
207 252
208 253
209 254
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index 3c62e66e1fcc..5c0ba235f5a5 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -1136,10 +1136,10 @@ Sense and level information should be encoded as follows:
1136 Devices connected to openPIC-compatible controllers should encode 1136 Devices connected to openPIC-compatible controllers should encode
1137 sense and polarity as follows: 1137 sense and polarity as follows:
1138 1138
1139 0 = high to low edge sensitive type enabled 1139 0 = low to high edge sensitive type enabled
1140 1 = active low level sensitive type enabled 1140 1 = active low level sensitive type enabled
1141 2 = low to high edge sensitive type enabled 1141 2 = active high level sensitive type enabled
1142 3 = active high level sensitive type enabled 1142 3 = high to low edge sensitive type enabled
1143 1143
1144 ISA PIC interrupt controllers should adhere to the ISA PIC 1144 ISA PIC interrupt controllers should adhere to the ISA PIC
1145 encodings listed below: 1145 encodings listed below:
@@ -1196,7 +1196,7 @@ platforms are moved over to use the flattened-device-tree model.
1196 - model : Model of the device. Can be "TSEC", "eTSEC", or "FEC" 1196 - model : Model of the device. Can be "TSEC", "eTSEC", or "FEC"
1197 - compatible : Should be "gianfar" 1197 - compatible : Should be "gianfar"
1198 - reg : Offset and length of the register set for the device 1198 - reg : Offset and length of the register set for the device
1199 - address : List of bytes representing the ethernet address of 1199 - mac-address : List of bytes representing the ethernet address of
1200 this controller 1200 this controller
1201 - interrupts : <a b> where a is the interrupt number and b is a 1201 - interrupts : <a b> where a is the interrupt number and b is a
1202 field that represents an encoding of the sense and level 1202 field that represents an encoding of the sense and level
@@ -1216,7 +1216,7 @@ platforms are moved over to use the flattened-device-tree model.
1216 model = "TSEC"; 1216 model = "TSEC";
1217 compatible = "gianfar"; 1217 compatible = "gianfar";
1218 reg = <24000 1000>; 1218 reg = <24000 1000>;
1219 address = [ 00 E0 0C 00 73 00 ]; 1219 mac-address = [ 00 E0 0C 00 73 00 ];
1220 interrupts = <d 3 e 3 12 3>; 1220 interrupts = <d 3 e 3 12 3>;
1221 interrupt-parent = <40000>; 1221 interrupt-parent = <40000>;
1222 phy-handle = <2452000> 1222 phy-handle = <2452000>
@@ -1498,7 +1498,7 @@ not necessary as they are usually the same as the root node.
1498 model = "TSEC"; 1498 model = "TSEC";
1499 compatible = "gianfar"; 1499 compatible = "gianfar";
1500 reg = <24000 1000>; 1500 reg = <24000 1000>;
1501 address = [ 00 E0 0C 00 73 00 ]; 1501 mac-address = [ 00 E0 0C 00 73 00 ];
1502 interrupts = <d 3 e 3 12 3>; 1502 interrupts = <d 3 e 3 12 3>;
1503 interrupt-parent = <40000>; 1503 interrupt-parent = <40000>;
1504 phy-handle = <2452000>; 1504 phy-handle = <2452000>;
@@ -1511,7 +1511,7 @@ not necessary as they are usually the same as the root node.
1511 model = "TSEC"; 1511 model = "TSEC";
1512 compatible = "gianfar"; 1512 compatible = "gianfar";
1513 reg = <25000 1000>; 1513 reg = <25000 1000>;
1514 address = [ 00 E0 0C 00 73 01 ]; 1514 mac-address = [ 00 E0 0C 00 73 01 ];
1515 interrupts = <13 3 14 3 18 3>; 1515 interrupts = <13 3 14 3 18 3>;
1516 interrupt-parent = <40000>; 1516 interrupt-parent = <40000>;
1517 phy-handle = <2452001>; 1517 phy-handle = <2452001>;
@@ -1524,7 +1524,7 @@ not necessary as they are usually the same as the root node.
1524 model = "FEC"; 1524 model = "FEC";
1525 compatible = "gianfar"; 1525 compatible = "gianfar";
1526 reg = <26000 1000>; 1526 reg = <26000 1000>;
1527 address = [ 00 E0 0C 00 73 02 ]; 1527 mac-address = [ 00 E0 0C 00 73 02 ];
1528 interrupts = <19 3>; 1528 interrupts = <19 3>;
1529 interrupt-parent = <40000>; 1529 interrupt-parent = <40000>;
1530 phy-handle = <2452002>; 1530 phy-handle = <2452002>;
diff --git a/Documentation/ramdisk.txt b/Documentation/ramdisk.txt
index 7c25584e082c..52f75b7d51c2 100644
--- a/Documentation/ramdisk.txt
+++ b/Documentation/ramdisk.txt
@@ -6,7 +6,7 @@ Contents:
6 1) Overview 6 1) Overview
7 2) Kernel Command Line Parameters 7 2) Kernel Command Line Parameters
8 3) Using "rdev -r" 8 3) Using "rdev -r"
9 4) An Example of Creating a Compressed RAM Disk 9 4) An Example of Creating a Compressed RAM Disk
10 10
11 11
121) Overview 121) Overview
@@ -34,7 +34,7 @@ make it clearer. The original "ramdisk=<ram_size>" has been kept around for
34compatibility reasons, but it may be removed in the future. 34compatibility reasons, but it may be removed in the future.
35 35
36The new RAM disk also has the ability to load compressed RAM disk images, 36The new RAM disk also has the ability to load compressed RAM disk images,
37allowing one to squeeze more programs onto an average installation or 37allowing one to squeeze more programs onto an average installation or
38rescue floppy disk. 38rescue floppy disk.
39 39
40 40
@@ -51,7 +51,7 @@ default is 4096 (4 MB) (8192 (8 MB) on S390).
51 =================== 51 ===================
52 52
53This parameter tells the RAM disk driver how many bytes to use per block. The 53This parameter tells the RAM disk driver how many bytes to use per block. The
54default is 512. 54default is 1024 (BLOCK_SIZE).
55 55
56 56
573) Using "rdev -r" 573) Using "rdev -r"
@@ -70,7 +70,7 @@ These numbers are no magical secrets, as seen below:
70./arch/i386/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000 70./arch/i386/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000
71./arch/i386/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000 71./arch/i386/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000
72 72
73Consider a typical two floppy disk setup, where you will have the 73Consider a typical two floppy disk setup, where you will have the
74kernel on disk one, and have already put a RAM disk image onto disk #2. 74kernel on disk one, and have already put a RAM disk image onto disk #2.
75 75
76Hence you want to set bits 0 to 13 as 0, meaning that your RAM disk 76Hence you want to set bits 0 to 13 as 0, meaning that your RAM disk
@@ -97,12 +97,12 @@ Since the default start = 0 and the default prompt = 1, you could use:
97 append = "load_ramdisk=1" 97 append = "load_ramdisk=1"
98 98
99 99
1004) An Example of Creating a Compressed RAM Disk 1004) An Example of Creating a Compressed RAM Disk
101---------------------------------------------- 101----------------------------------------------
102 102
103To create a RAM disk image, you will need a spare block device to 103To create a RAM disk image, you will need a spare block device to
104construct it on. This can be the RAM disk device itself, or an 104construct it on. This can be the RAM disk device itself, or an
105unused disk partition (such as an unmounted swap partition). For this 105unused disk partition (such as an unmounted swap partition). For this
106example, we will use the RAM disk device, "/dev/ram0". 106example, we will use the RAM disk device, "/dev/ram0".
107 107
108Note: This technique should not be done on a machine with less than 8 MB 108Note: This technique should not be done on a machine with less than 8 MB
diff --git a/Documentation/scsi/ChangeLog.megaraid b/Documentation/scsi/ChangeLog.megaraid
index c173806c91fa..a056bbe67c7e 100644
--- a/Documentation/scsi/ChangeLog.megaraid
+++ b/Documentation/scsi/ChangeLog.megaraid
@@ -1,3 +1,126 @@
1Release Date : Fri May 19 09:31:45 EST 2006 - Seokmann Ju <sju@lsil.com>
2Current Version : 2.20.4.9 (scsi module), 2.20.2.6 (cmm module)
3Older Version : 2.20.4.8 (scsi module), 2.20.2.6 (cmm module)
4
51. Fixed a bug in megaraid_init_mbox().
6 Customer reported "garbage in file on x86_64 platform".
7 Root Cause: the driver registered controllers as 64-bit DMA capable
8 for those which are not support it.
9 Fix: Made change in the function inserting identification machanism
10 identifying 64-bit DMA capable controllers.
11
12 > -----Original Message-----
13 > From: Vasily Averin [mailto:vvs@sw.ru]
14 > Sent: Thursday, May 04, 2006 2:49 PM
15 > To: linux-scsi@vger.kernel.org; Kolli, Neela; Mukker, Atul;
16 > Ju, Seokmann; Bagalkote, Sreenivas;
17 > James.Bottomley@SteelEye.com; devel@openvz.org
18 > Subject: megaraid_mbox: garbage in file
19 >
20 > Hello all,
21 >
22 > I've investigated customers claim on the unstable work of
23 > their node and found a
24 > strange effect: reading from some files leads to the
25 > "attempt to access beyond end of device" messages.
26 >
27 > I've checked filesystem, memory on the node, motherboard BIOS
28 > version, but it
29 > does not help and issue still has been reproduced by simple
30 > file reading.
31 >
32 > Reproducer is simple:
33 >
34 > echo 0xffffffff >/proc/sys/dev/scsi/logging_level ;
35 > cat /vz/private/101/root/etc/ld.so.cache >/tmp/ttt ;
36 > echo 0 >/proc/sys/dev/scsi/logging
37 >
38 > It leads to the following messages in dmesg
39 >
40 > sd_init_command: disk=sda, block=871769260, count=26
41 > sda : block=871769260
42 > sda : reading 26/26 512 byte blocks.
43 > scsi_add_timer: scmd: f79ed980, time: 7500, (c02b1420)
44 > sd 0:1:0:0: send 0xf79ed980 sd 0:1:0:0:
45 > command: Read (10): 28 00 33 f6 24 ac 00 00 1a 00
46 > buffer = 0xf7cfb540, bufflen = 13312, done = 0xc0366b40,
47 > queuecommand 0xc0344010
48 > leaving scsi_dispatch_cmnd()
49 > scsi_delete_timer: scmd: f79ed980, rtn: 1
50 > sd 0:1:0:0: done 0xf79ed980 SUCCESS 0 sd 0:1:0:0:
51 > command: Read (10): 28 00 33 f6 24 ac 00 00 1a 00
52 > scsi host busy 1 failed 0
53 > sd 0:1:0:0: Notifying upper driver of completion (result 0)
54 > sd_rw_intr: sda: res=0x0
55 > 26 sectors total, 13312 bytes done.
56 > use_sg is 4
57 > attempt to access beyond end of device
58 > sda6: rw=0, want=1044134458, limit=951401367
59 > Buffer I/O error on device sda6, logical block 522067228
60 > attempt to access beyond end of device
61
622. When INQUIRY with EVPD bit set issued to the MegaRAID controller,
63 system memory gets corrupted.
64 Root Cause: MegaRAID F/W handle the INQUIRY with EVPD bit set
65 incorrectly.
66 Fix: MegaRAID F/W has fixed the problem and being process of release,
67 soon. Meanwhile, driver will filter out the request.
68
693. One of member in the data structure of the driver leads unaligne
70 issue on 64-bit platform.
71 Customer reporeted "kernel unaligned access addrss" issue when
72 application communicates with MegaRAID HBA driver.
73 Root Cause: in uioc_t structure, one of member had misaligned and it
74 led system to display the error message.
75 Fix: A patch submitted to community from following folk.
76
77 > -----Original Message-----
78 > From: linux-scsi-owner@vger.kernel.org
79 > [mailto:linux-scsi-owner@vger.kernel.org] On Behalf Of Sakurai Hiroomi
80 > Sent: Wednesday, July 12, 2006 4:20 AM
81 > To: linux-scsi@vger.kernel.org; linux-kernel@vger.kernel.org
82 > Subject: Re: Help: strange messages from kernel on IA64 platform
83 >
84 > Hi,
85 >
86 > I saw same message.
87 >
88 > When GAM(Global Array Manager) is started, The following
89 > message output.
90 > kernel: kernel unaligned access to 0xe0000001fe1080d4,
91 > ip=0xa000000200053371
92 >
93 > The uioc structure used by ioctl is defined by packed,
94 > the allignment of each member are disturbed.
95 > In a 64 bit structure, the allignment of member doesn't fit 64 bit
96 > boundary. this causes this messages.
97 > In a 32 bit structure, we don't see the message because the allinment
98 > of member fit 32 bit boundary even if packed is specified.
99 >
100 > patch
101 > I Add 32 bit dummy member to fit 64 bit boundary. I tested.
102 > We confirmed this patch fix the problem by IA64 server.
103 >
104 > **************************************************************
105 > ****************
106 > --- linux-2.6.9/drivers/scsi/megaraid/megaraid_ioctl.h.orig
107 > 2006-04-03 17:13:03.000000000 +0900
108 > +++ linux-2.6.9/drivers/scsi/megaraid/megaraid_ioctl.h
109 > 2006-04-03 17:14:09.000000000 +0900
110 > @@ -132,6 +132,10 @@
111 > /* Driver Data: */
112 > void __user * user_data;
113 > uint32_t user_data_len;
114 > +
115 > + /* 64bit alignment */
116 > + uint32_t pad_0xBC;
117 > +
118 > mraid_passthru_t __user *user_pthru;
119 >
120 > mraid_passthru_t *pthru32;
121 > **************************************************************
122 > ****************
123
1Release Date : Mon Apr 11 12:27:22 EST 2006 - Seokmann Ju <sju@lsil.com> 124Release Date : Mon Apr 11 12:27:22 EST 2006 - Seokmann Ju <sju@lsil.com>
2Current Version : 2.20.4.8 (scsi module), 2.20.2.6 (cmm module) 125Current Version : 2.20.4.8 (scsi module), 2.20.2.6 (cmm module)
3Older Version : 2.20.4.7 (scsi module), 2.20.2.6 (cmm module) 126Older Version : 2.20.4.7 (scsi module), 2.20.2.6 (cmm module)
diff --git a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
index 69866d5997a4..b8dc51ca776c 100644
--- a/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
+++ b/Documentation/sound/alsa/DocBook/writing-an-alsa-driver.tmpl
@@ -1172,7 +1172,7 @@
1172 } 1172 }
1173 1173
1174 /* PCI IDs */ 1174 /* PCI IDs */
1175 static struct pci_device_id snd_mychip_ids[] __devinitdata = { 1175 static struct pci_device_id snd_mychip_ids[] = {
1176 { PCI_VENDOR_ID_FOO, PCI_DEVICE_ID_BAR, 1176 { PCI_VENDOR_ID_FOO, PCI_DEVICE_ID_BAR,
1177 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, }, 1177 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
1178 .... 1178 ....
@@ -1565,7 +1565,7 @@
1565 <informalexample> 1565 <informalexample>
1566 <programlisting> 1566 <programlisting>
1567<![CDATA[ 1567<![CDATA[
1568 static struct pci_device_id snd_mychip_ids[] __devinitdata = { 1568 static struct pci_device_id snd_mychip_ids[] = {
1569 { PCI_VENDOR_ID_FOO, PCI_DEVICE_ID_BAR, 1569 { PCI_VENDOR_ID_FOO, PCI_DEVICE_ID_BAR,
1570 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, }, 1570 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
1571 .... 1571 ....
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 0b62c62142cf..5c3a51905969 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -25,6 +25,7 @@ Currently, these files are in /proc/sys/fs:
25- inode-state 25- inode-state
26- overflowuid 26- overflowuid
27- overflowgid 27- overflowgid
28- suid_dumpable
28- super-max 29- super-max
29- super-nr 30- super-nr
30 31
@@ -131,6 +132,25 @@ The default is 65534.
131 132
132============================================================== 133==============================================================
133 134
135suid_dumpable:
136
137This value can be used to query and set the core dump mode for setuid
138or otherwise protected/tainted binaries. The modes are
139
1400 - (default) - traditional behaviour. Any process which has changed
141 privilege levels or is execute only will not be dumped
1421 - (debug) - all processes dump core when possible. The core dump is
143 owned by the current user and no security is applied. This is
144 intended for system debugging situations only. Ptrace is unchecked.
1452 - (suidsafe) - any binary which normally would not be dumped is dumped
146 readable by root only. This allows the end user to remove
147 such a dump but not access it directly. For security reasons
148 core dumps in this mode will not overwrite one another or
149 other files. This mode is appropriate when adminstrators are
150 attempting to debug problems in a normal environment.
151
152==============================================================
153
134super-max & super-nr: 154super-max & super-nr:
135 155
136These numbers control the maximum number of superblocks, and 156These numbers control the maximum number of superblocks, and
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index b0c7ab93dcb9..89bf8c20a586 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -50,7 +50,6 @@ show up in /proc/sys/kernel:
50- shmmax [ sysv ipc ] 50- shmmax [ sysv ipc ]
51- shmmni 51- shmmni
52- stop-a [ SPARC only ] 52- stop-a [ SPARC only ]
53- suid_dumpable
54- sysrq ==> Documentation/sysrq.txt 53- sysrq ==> Documentation/sysrq.txt
55- tainted 54- tainted
56- threads-max 55- threads-max
@@ -211,9 +210,8 @@ Controls the kernel's behaviour when an oops or BUG is encountered.
211 210
2120: try to continue operation 2110: try to continue operation
213 212
2141: delay a few seconds (to give klogd time to record the oops output) and 2131: panic immediatly. If the `panic' sysctl is also non-zero then the
215 then panic. If the `panic' sysctl is also non-zero then the machine will 214 machine will be rebooted.
216 be rebooted.
217 215
218============================================================== 216==============================================================
219 217
@@ -311,25 +309,6 @@ kernel. This value defaults to SHMMAX.
311 309
312============================================================== 310==============================================================
313 311
314suid_dumpable:
315
316This value can be used to query and set the core dump mode for setuid
317or otherwise protected/tainted binaries. The modes are
318
3190 - (default) - traditional behaviour. Any process which has changed
320 privilege levels or is execute only will not be dumped
3211 - (debug) - all processes dump core when possible. The core dump is
322 owned by the current user and no security is applied. This is
323 intended for system debugging situations only. Ptrace is unchecked.
3242 - (suidsafe) - any binary which normally would not be dumped is dumped
325 readable by root only. This allows the end user to remove
326 such a dump but not access it directly. For security reasons
327 core dumps in this mode will not overwrite one another or
328 other files. This mode is appropriate when adminstrators are
329 attempting to debug problems in a normal environment.
330
331==============================================================
332
333tainted: 312tainted:
334 313
335Non-zero if the kernel has been tainted. Numeric values, which 314Non-zero if the kernel has been tainted. Numeric values, which
diff --git a/Documentation/usb/proc_usb_info.txt b/Documentation/usb/proc_usb_info.txt
index f86550fe38ee..22c5331260ca 100644
--- a/Documentation/usb/proc_usb_info.txt
+++ b/Documentation/usb/proc_usb_info.txt
@@ -59,7 +59,7 @@ bind to an interface (or perhaps several) using an ioctl call. You
59would issue more ioctls to the device to communicate to it using 59would issue more ioctls to the device to communicate to it using
60control, bulk, or other kinds of USB transfers. The IOCTLs are 60control, bulk, or other kinds of USB transfers. The IOCTLs are
61listed in the <linux/usbdevice_fs.h> file, and at this writing the 61listed in the <linux/usbdevice_fs.h> file, and at this writing the
62source code (linux/drivers/usb/devio.c) is the primary reference 62source code (linux/drivers/usb/core/devio.c) is the primary reference
63for how to access devices through those files. 63for how to access devices through those files.
64 64
65Note that since by default these BBB/DDD files are writable only by 65Note that since by default these BBB/DDD files are writable only by
diff --git a/Documentation/usb/usb-help.txt b/Documentation/usb/usb-help.txt
index b7c324973695..a7408593829f 100644
--- a/Documentation/usb/usb-help.txt
+++ b/Documentation/usb/usb-help.txt
@@ -5,8 +5,7 @@ For USB help other than the readme files that are located in
5Documentation/usb/*, see the following: 5Documentation/usb/*, see the following:
6 6
7Linux-USB project: http://www.linux-usb.org 7Linux-USB project: http://www.linux-usb.org
8 mirrors at http://www.suse.cz/development/linux-usb/ 8 mirrors at http://usb.in.tum.de/linux-usb/
9 and http://usb.in.tum.de/linux-usb/
10 and http://it.linux-usb.org 9 and http://it.linux-usb.org
11Linux USB Guide: http://linux-usb.sourceforge.net 10Linux USB Guide: http://linux-usb.sourceforge.net
12Linux-USB device overview (working devices and drivers): 11Linux-USB device overview (working devices and drivers):
diff --git a/Documentation/usb/usb-serial.txt b/Documentation/usb/usb-serial.txt
index f001cd93b79b..02b0f7beb6d1 100644
--- a/Documentation/usb/usb-serial.txt
+++ b/Documentation/usb/usb-serial.txt
@@ -399,10 +399,10 @@ REINER SCT cyberJack pinpad/e-com USB chipcard reader
399 399
400Prolific PL2303 Driver 400Prolific PL2303 Driver
401 401
402 This driver support any device that has the PL2303 chip from Prolific 402 This driver supports any device that has the PL2303 chip from Prolific
403 in it. This includes a number of single port USB to serial 403 in it. This includes a number of single port USB to serial
404 converters and USB GPS devices. Devices from Aten (the UC-232) and 404 converters and USB GPS devices. Devices from Aten (the UC-232) and
405 IO-Data work with this driver. 405 IO-Data work with this driver, as does the DCU-11 mobile-phone cable.
406 406
407 For any questions or problems with this driver, please contact Greg 407 For any questions or problems with this driver, please contact Greg
408 Kroah-Hartman at greg@kroah.com 408 Kroah-Hartman at greg@kroah.com
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index 6887d44d2661..6da24e7a56cb 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -238,6 +238,13 @@ Debugging
238 pagefaulttrace Dump all page faults. Only useful for extreme debugging 238 pagefaulttrace Dump all page faults. Only useful for extreme debugging
239 and will create a lot of output. 239 and will create a lot of output.
240 240
241 call_trace=[old|both|newfallback|new]
242 old: use old inexact backtracer
243 new: use new exact dwarf2 unwinder
244 both: print entries from both
245 newfallback: use new unwinder but fall back to old if it gets
246 stuck (default)
247
241Misc 248Misc
242 249
243 noreplacement Don't replace instructions with more appropriate ones 250 noreplacement Don't replace instructions with more appropriate ones