aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-power17
-rw-r--r--Documentation/DocBook/Makefile2
-rw-r--r--Documentation/DocBook/filesystems.tmpl300
-rw-r--r--Documentation/DocBook/journal-api.tmpl333
-rw-r--r--Documentation/accounting/getdelays.c2
-rw-r--r--Documentation/kernel-doc-nano-HOWTO.txt2
-rw-r--r--Documentation/power/interface.txt13
-rw-r--r--arch/i386/kernel/acpi/boot.c2
-rw-r--r--arch/um/include/sysdep-i386/barrier.h9
-rw-r--r--arch/um/include/sysdep-x86_64/barrier.h7
-rw-r--r--arch/um/os-Linux/process.c1
-rw-r--r--arch/um/os-Linux/signal.c31
-rw-r--r--arch/um/os-Linux/skas/process.c2
-rw-r--r--arch/um/os-Linux/tls.c2
-rw-r--r--block/ll_rw_blk.c9
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c2
-rw-r--r--drivers/edac/edac_mc.c45
-rw-r--r--drivers/ide/pci/amd74xx.c7
-rw-r--r--drivers/isdn/hysdn/hysdn_sched.c19
-rw-r--r--drivers/md/md.c2
-rw-r--r--drivers/misc/lkdtm.c24
-rw-r--r--drivers/spi/spi.c1
-rw-r--r--fs/cifs/file.c23
-rw-r--r--fs/compat.c20
-rw-r--r--fs/ecryptfs/crypto.c2
-rw-r--r--fs/fuse/file.c9
-rw-r--r--fs/gfs2/ops_address.c7
-rw-r--r--fs/nfsd/nfs4recover.c2
-rw-r--r--fs/reiserfs/super.c1
-rw-r--r--fs/xattr.c13
-rw-r--r--include/asm-powerpc/systbl.h2
-rw-r--r--include/asm-powerpc/unistd.h2
-rw-r--r--include/linux/compat.h4
-rw-r--r--include/linux/kernel.h2
-rw-r--r--include/linux/pm.h4
-rw-r--r--include/linux/ufs_fs.h2
-rw-r--r--ipc/msg.c1
-rw-r--r--ipc/sem.c1
-rw-r--r--ipc/shm.c1
-rw-r--r--ipc/util.c2
-rw-r--r--ipc/util.h12
-rw-r--r--kernel/compat.c33
-rw-r--r--kernel/futex.c7
-rw-r--r--kernel/power/disk.c37
-rw-r--r--kernel/printk.c21
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--mm/migrate.c3
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/slab.c2
-rw-r--r--scripts/basic/docproc.c2
51 files changed, 595 insertions, 457 deletions
diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index d882f8093871..dcff4d0623ad 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -21,7 +21,7 @@ Description:
21 these states. 21 these states.
22 22
23What: /sys/power/disk 23What: /sys/power/disk
24Date: August 2006 24Date: September 2006
25Contact: Rafael J. Wysocki <rjw@sisk.pl> 25Contact: Rafael J. Wysocki <rjw@sisk.pl>
26Description: 26Description:
27 The /sys/power/disk file controls the operating mode of the 27 The /sys/power/disk file controls the operating mode of the
@@ -39,6 +39,19 @@ Description:
39 'reboot' - the memory image will be saved by the kernel and 39 'reboot' - the memory image will be saved by the kernel and
40 the system will be rebooted. 40 the system will be rebooted.
41 41
42 Additionally, /sys/power/disk can be used to turn on one of the
43 two testing modes of the suspend-to-disk mechanism: 'testproc'
44 or 'test'. If the suspend-to-disk mechanism is in the
45 'testproc' mode, writing 'disk' to /sys/power/state will cause
46 the kernel to disable nonboot CPUs and freeze tasks, wait for 5
47 seconds, unfreeze tasks and enable nonboot CPUs. If it is in
48 the 'test' mode, writing 'disk' to /sys/power/state will cause
49 the kernel to disable nonboot CPUs and freeze tasks, shrink
50 memory, suspend devices, wait for 5 seconds, resume devices,
51 unfreeze tasks and enable nonboot CPUs. Then, we are able to
52 look in the log messages and work out, for example, which code
53 is being slow and which device drivers are misbehaving.
54
42 The suspend-to-disk method may be chosen by writing to this 55 The suspend-to-disk method may be chosen by writing to this
43 file one of the accepted strings: 56 file one of the accepted strings:
44 57
@@ -46,6 +59,8 @@ Description:
46 'platform' 59 'platform'
47 'shutdown' 60 'shutdown'
48 'reboot' 61 'reboot'
62 'testproc'
63 'test'
49 64
50 It will only change to 'firmware' or 'platform' if the system 65 It will only change to 'firmware' or 'platform' if the system
51 supports that. 66 supports that.
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 3bf5086574bc..db9499adbed4 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -9,7 +9,7 @@
9DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \ 9DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
10 kernel-hacking.xml kernel-locking.xml deviceiobook.xml \ 10 kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
11 procfs-guide.xml writing_usb_driver.xml \ 11 procfs-guide.xml writing_usb_driver.xml \
12 kernel-api.xml filesystems.xml journal-api.xml lsm.xml usb.xml \ 12 kernel-api.xml filesystems.xml lsm.xml usb.xml \
13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ 13 gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
14 genericirq.xml 14 genericirq.xml
15 15
diff --git a/Documentation/DocBook/filesystems.tmpl b/Documentation/DocBook/filesystems.tmpl
index 4785032fb6ea..39fa2aba7f9b 100644
--- a/Documentation/DocBook/filesystems.tmpl
+++ b/Documentation/DocBook/filesystems.tmpl
@@ -98,4 +98,304 @@
98 </sect1> 98 </sect1>
99 </chapter> 99 </chapter>
100 100
101 <chapter id="LinuxJDBAPI">
102 <chapterinfo>
103 <title>The Linux Journalling API</title>
104
105 <authorgroup>
106 <author>
107 <firstname>Roger</firstname>
108 <surname>Gammans</surname>
109 <affiliation>
110 <address>
111 <email>rgammans@computer-surgery.co.uk</email>
112 </address>
113 </affiliation>
114 </author>
115 </authorgroup>
116
117 <authorgroup>
118 <author>
119 <firstname>Stephen</firstname>
120 <surname>Tweedie</surname>
121 <affiliation>
122 <address>
123 <email>sct@redhat.com</email>
124 </address>
125 </affiliation>
126 </author>
127 </authorgroup>
128
129 <copyright>
130 <year>2002</year>
131 <holder>Roger Gammans</holder>
132 </copyright>
133 </chapterinfo>
134
135 <title>The Linux Journalling API</title>
136
137 <sect1>
138 <title>Overview</title>
139 <sect2>
140 <title>Details</title>
141<para>
142The journalling layer is easy to use. You need to
143first of all create a journal_t data structure. There are
144two calls to do this dependent on how you decide to allocate the physical
145media on which the journal resides. The journal_init_inode() call
146is for journals stored in filesystem inodes, or the journal_init_dev()
147call can be use for journal stored on a raw device (in a continuous range
148of blocks). A journal_t is a typedef for a struct pointer, so when
149you are finally finished make sure you call journal_destroy() on it
150to free up any used kernel memory.
151</para>
152
153<para>
154Once you have got your journal_t object you need to 'mount' or load the journal
155file, unless of course you haven't initialised it yet - in which case you
156need to call journal_create().
157</para>
158
159<para>
160Most of the time however your journal file will already have been created, but
161before you load it you must call journal_wipe() to empty the journal file.
162Hang on, you say , what if the filesystem wasn't cleanly umount()'d . Well, it is the
163job of the client file system to detect this and skip the call to journal_wipe().
164</para>
165
166<para>
167In either case the next call should be to journal_load() which prepares the
168journal file for use. Note that journal_wipe(..,0) calls journal_skip_recovery()
169for you if it detects any outstanding transactions in the journal and similarly
170journal_load() will call journal_recover() if necessary.
171I would advise reading fs/ext3/super.c for examples on this stage.
172[RGG: Why is the journal_wipe() call necessary - doesn't this needlessly
173complicate the API. Or isn't a good idea for the journal layer to hide
174dirty mounts from the client fs]
175</para>
176
177<para>
178Now you can go ahead and start modifying the underlying
179filesystem. Almost.
180</para>
181
182<para>
183
184You still need to actually journal your filesystem changes, this
185is done by wrapping them into transactions. Additionally you
186also need to wrap the modification of each of the buffers
187with calls to the journal layer, so it knows what the modifications
188you are actually making are. To do this use journal_start() which
189returns a transaction handle.
190</para>
191
192<para>
193journal_start()
194and its counterpart journal_stop(), which indicates the end of a transaction
195are nestable calls, so you can reenter a transaction if necessary,
196but remember you must call journal_stop() the same number of times as
197journal_start() before the transaction is completed (or more accurately
198leaves the update phase). Ext3/VFS makes use of this feature to simplify
199quota support.
200</para>
201
202<para>
203Inside each transaction you need to wrap the modifications to the
204individual buffers (blocks). Before you start to modify a buffer you
205need to call journal_get_{create,write,undo}_access() as appropriate,
206this allows the journalling layer to copy the unmodified data if it
207needs to. After all the buffer may be part of a previously uncommitted
208transaction.
209At this point you are at last ready to modify a buffer, and once
210you are have done so you need to call journal_dirty_{meta,}data().
211Or if you've asked for access to a buffer you now know is now longer
212required to be pushed back on the device you can call journal_forget()
213in much the same way as you might have used bforget() in the past.
214</para>
215
216<para>
217A journal_flush() may be called at any time to commit and checkpoint
218all your transactions.
219</para>
220
221<para>
222Then at umount time , in your put_super() (2.4) or write_super() (2.5)
223you can then call journal_destroy() to clean up your in-core journal object.
224</para>
225
226<para>
227Unfortunately there a couple of ways the journal layer can cause a deadlock.
228The first thing to note is that each task can only have
229a single outstanding transaction at any one time, remember nothing
230commits until the outermost journal_stop(). This means
231you must complete the transaction at the end of each file/inode/address
232etc. operation you perform, so that the journalling system isn't re-entered
233on another journal. Since transactions can't be nested/batched
234across differing journals, and another filesystem other than
235yours (say ext3) may be modified in a later syscall.
236</para>
237
238<para>
239The second case to bear in mind is that journal_start() can
240block if there isn't enough space in the journal for your transaction
241(based on the passed nblocks param) - when it blocks it merely(!) needs to
242wait for transactions to complete and be committed from other tasks,
243so essentially we are waiting for journal_stop(). So to avoid
244deadlocks you must treat journal_start/stop() as if they
245were semaphores and include them in your semaphore ordering rules to prevent
246deadlocks. Note that journal_extend() has similar blocking behaviour to
247journal_start() so you can deadlock here just as easily as on journal_start().
248</para>
249
250<para>
251Try to reserve the right number of blocks the first time. ;-). This will
252be the maximum number of blocks you are going to touch in this transaction.
253I advise having a look at at least ext3_jbd.h to see the basis on which
254ext3 uses to make these decisions.
255</para>
256
257<para>
258Another wriggle to watch out for is your on-disk block allocation strategy.
259why? Because, if you undo a delete, you need to ensure you haven't reused any
260of the freed blocks in a later transaction. One simple way of doing this
261is make sure any blocks you allocate only have checkpointed transactions
262listed against them. Ext3 does this in ext3_test_allocatable().
263</para>
264
265<para>
266Lock is also providing through journal_{un,}lock_updates(),
267ext3 uses this when it wants a window with a clean and stable fs for a moment.
268eg.
269</para>
270
271<programlisting>
272
273 journal_lock_updates() //stop new stuff happening..
274 journal_flush() // checkpoint everything.
275 ..do stuff on stable fs
276 journal_unlock_updates() // carry on with filesystem use.
277</programlisting>
278
279<para>
280The opportunities for abuse and DOS attacks with this should be obvious,
281if you allow unprivileged userspace to trigger codepaths containing these
282calls.
283</para>
284
285<para>
286A new feature of jbd since 2.5.25 is commit callbacks with the new
287journal_callback_set() function you can now ask the journalling layer
288to call you back when the transaction is finally committed to disk, so that
289you can do some of your own management. The key to this is the journal_callback
290struct, this maintains the internal callback information but you can
291extend it like this:-
292</para>
293<programlisting>
294 struct myfs_callback_s {
295 //Data structure element required by jbd..
296 struct journal_callback for_jbd;
297 // Stuff for myfs allocated together.
298 myfs_inode* i_commited;
299
300 }
301</programlisting>
302
303<para>
304this would be useful if you needed to know when data was committed to a
305particular inode.
306</para>
307
308 </sect2>
309
310 <sect2>
311 <title>Summary</title>
312<para>
313Using the journal is a matter of wrapping the different context changes,
314being each mount, each modification (transaction) and each changed buffer
315to tell the journalling layer about them.
316</para>
317
318<para>
319Here is a some pseudo code to give you an idea of how it works, as
320an example.
321</para>
322
323<programlisting>
324 journal_t* my_jnrl = journal_create();
325 journal_init_{dev,inode}(jnrl,...)
326 if (clean) journal_wipe();
327 journal_load();
328
329 foreach(transaction) { /*transactions must be
330 completed before
331 a syscall returns to
332 userspace*/
333
334 handle_t * xct=journal_start(my_jnrl);
335 foreach(bh) {
336 journal_get_{create,write,undo}_access(xact,bh);
337 if ( myfs_modify(bh) ) { /* returns true
338 if makes changes */
339 journal_dirty_{meta,}data(xact,bh);
340 } else {
341 journal_forget(bh);
342 }
343 }
344 journal_stop(xct);
345 }
346 journal_destroy(my_jrnl);
347</programlisting>
348 </sect2>
349
350 </sect1>
351
352 <sect1>
353 <title>Data Types</title>
354 <para>
355 The journalling layer uses typedefs to 'hide' the concrete definitions
356 of the structures used. As a client of the JBD layer you can
357 just rely on the using the pointer as a magic cookie of some sort.
358
359 Obviously the hiding is not enforced as this is 'C'.
360 </para>
361 <sect2><title>Structures</title>
362!Iinclude/linux/jbd.h
363 </sect2>
364 </sect1>
365
366 <sect1>
367 <title>Functions</title>
368 <para>
369 The functions here are split into two groups those that
370 affect a journal as a whole, and those which are used to
371 manage transactions
372 </para>
373 <sect2><title>Journal Level</title>
374!Efs/jbd/journal.c
375!Ifs/jbd/recovery.c
376 </sect2>
377 <sect2><title>Transasction Level</title>
378!Efs/jbd/transaction.c
379 </sect2>
380 </sect1>
381 <sect1>
382 <title>See also</title>
383 <para>
384 <citation>
385 <ulink url="ftp://ftp.uk.linux.org/pub/linux/sct/fs/jfs/journal-design.ps.gz">
386 Journaling the Linux ext2fs Filesystem, LinuxExpo 98, Stephen Tweedie
387 </ulink>
388 </citation>
389 </para>
390 <para>
391 <citation>
392 <ulink url="http://olstrans.sourceforge.net/release/OLS2000-ext3/OLS2000-ext3.html">
393 Ext3 Journalling FileSystem, OLS 2000, Dr. Stephen Tweedie
394 </ulink>
395 </citation>
396 </para>
397 </sect1>
398
399 </chapter>
400
101</book> 401</book>
diff --git a/Documentation/DocBook/journal-api.tmpl b/Documentation/DocBook/journal-api.tmpl
deleted file mode 100644
index 2077f9a28c19..000000000000
--- a/Documentation/DocBook/journal-api.tmpl
+++ /dev/null
@@ -1,333 +0,0 @@
1<?xml version="1.0" encoding="UTF-8"?>
2<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
3 "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
4
5<book id="LinuxJBDAPI">
6 <bookinfo>
7 <title>The Linux Journalling API</title>
8 <authorgroup>
9 <author>
10 <firstname>Roger</firstname>
11 <surname>Gammans</surname>
12 <affiliation>
13 <address>
14 <email>rgammans@computer-surgery.co.uk</email>
15 </address>
16 </affiliation>
17 </author>
18 </authorgroup>
19
20 <authorgroup>
21 <author>
22 <firstname>Stephen</firstname>
23 <surname>Tweedie</surname>
24 <affiliation>
25 <address>
26 <email>sct@redhat.com</email>
27 </address>
28 </affiliation>
29 </author>
30 </authorgroup>
31
32 <copyright>
33 <year>2002</year>
34 <holder>Roger Gammans</holder>
35 </copyright>
36
37<legalnotice>
38 <para>
39 This documentation is free software; you can redistribute
40 it and/or modify it under the terms of the GNU General Public
41 License as published by the Free Software Foundation; either
42 version 2 of the License, or (at your option) any later
43 version.
44 </para>
45
46 <para>
47 This program is distributed in the hope that it will be
48 useful, but WITHOUT ANY WARRANTY; without even the implied
49 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
50 See the GNU General Public License for more details.
51 </para>
52
53 <para>
54 You should have received a copy of the GNU General Public
55 License along with this program; if not, write to the Free
56 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
57 MA 02111-1307 USA
58 </para>
59
60 <para>
61 For more details see the file COPYING in the source
62 distribution of Linux.
63 </para>
64 </legalnotice>
65 </bookinfo>
66
67<toc></toc>
68
69 <chapter id="Overview">
70 <title>Overview</title>
71 <sect1>
72 <title>Details</title>
73<para>
74The journalling layer is easy to use. You need to
75first of all create a journal_t data structure. There are
76two calls to do this dependent on how you decide to allocate the physical
77media on which the journal resides. The journal_init_inode() call
78is for journals stored in filesystem inodes, or the journal_init_dev()
79call can be use for journal stored on a raw device (in a continuous range
80of blocks). A journal_t is a typedef for a struct pointer, so when
81you are finally finished make sure you call journal_destroy() on it
82to free up any used kernel memory.
83</para>
84
85<para>
86Once you have got your journal_t object you need to 'mount' or load the journal
87file, unless of course you haven't initialised it yet - in which case you
88need to call journal_create().
89</para>
90
91<para>
92Most of the time however your journal file will already have been created, but
93before you load it you must call journal_wipe() to empty the journal file.
94Hang on, you say , what if the filesystem wasn't cleanly umount()'d . Well, it is the
95job of the client file system to detect this and skip the call to journal_wipe().
96</para>
97
98<para>
99In either case the next call should be to journal_load() which prepares the
100journal file for use. Note that journal_wipe(..,0) calls journal_skip_recovery()
101for you if it detects any outstanding transactions in the journal and similarly
102journal_load() will call journal_recover() if necessary.
103I would advise reading fs/ext3/super.c for examples on this stage.
104[RGG: Why is the journal_wipe() call necessary - doesn't this needlessly
105complicate the API. Or isn't a good idea for the journal layer to hide
106dirty mounts from the client fs]
107</para>
108
109<para>
110Now you can go ahead and start modifying the underlying
111filesystem. Almost.
112</para>
113
114
115<para>
116
117You still need to actually journal your filesystem changes, this
118is done by wrapping them into transactions. Additionally you
119also need to wrap the modification of each of the buffers
120with calls to the journal layer, so it knows what the modifications
121you are actually making are. To do this use journal_start() which
122returns a transaction handle.
123</para>
124
125<para>
126journal_start()
127and its counterpart journal_stop(), which indicates the end of a transaction
128are nestable calls, so you can reenter a transaction if necessary,
129but remember you must call journal_stop() the same number of times as
130journal_start() before the transaction is completed (or more accurately
131leaves the update phase). Ext3/VFS makes use of this feature to simplify
132quota support.
133</para>
134
135<para>
136Inside each transaction you need to wrap the modifications to the
137individual buffers (blocks). Before you start to modify a buffer you
138need to call journal_get_{create,write,undo}_access() as appropriate,
139this allows the journalling layer to copy the unmodified data if it
140needs to. After all the buffer may be part of a previously uncommitted
141transaction.
142At this point you are at last ready to modify a buffer, and once
143you are have done so you need to call journal_dirty_{meta,}data().
144Or if you've asked for access to a buffer you now know is now longer
145required to be pushed back on the device you can call journal_forget()
146in much the same way as you might have used bforget() in the past.
147</para>
148
149<para>
150A journal_flush() may be called at any time to commit and checkpoint
151all your transactions.
152</para>
153
154<para>
155Then at umount time , in your put_super() (2.4) or write_super() (2.5)
156you can then call journal_destroy() to clean up your in-core journal object.
157</para>
158
159
160<para>
161Unfortunately there a couple of ways the journal layer can cause a deadlock.
162The first thing to note is that each task can only have
163a single outstanding transaction at any one time, remember nothing
164commits until the outermost journal_stop(). This means
165you must complete the transaction at the end of each file/inode/address
166etc. operation you perform, so that the journalling system isn't re-entered
167on another journal. Since transactions can't be nested/batched
168across differing journals, and another filesystem other than
169yours (say ext3) may be modified in a later syscall.
170</para>
171
172<para>
173The second case to bear in mind is that journal_start() can
174block if there isn't enough space in the journal for your transaction
175(based on the passed nblocks param) - when it blocks it merely(!) needs to
176wait for transactions to complete and be committed from other tasks,
177so essentially we are waiting for journal_stop(). So to avoid
178deadlocks you must treat journal_start/stop() as if they
179were semaphores and include them in your semaphore ordering rules to prevent
180deadlocks. Note that journal_extend() has similar blocking behaviour to
181journal_start() so you can deadlock here just as easily as on journal_start().
182</para>
183
184<para>
185Try to reserve the right number of blocks the first time. ;-). This will
186be the maximum number of blocks you are going to touch in this transaction.
187I advise having a look at at least ext3_jbd.h to see the basis on which
188ext3 uses to make these decisions.
189</para>
190
191<para>
192Another wriggle to watch out for is your on-disk block allocation strategy.
193why? Because, if you undo a delete, you need to ensure you haven't reused any
194of the freed blocks in a later transaction. One simple way of doing this
195is make sure any blocks you allocate only have checkpointed transactions
196listed against them. Ext3 does this in ext3_test_allocatable().
197</para>
198
199<para>
200Lock is also providing through journal_{un,}lock_updates(),
201ext3 uses this when it wants a window with a clean and stable fs for a moment.
202eg.
203</para>
204
205<programlisting>
206
207 journal_lock_updates() //stop new stuff happening..
208 journal_flush() // checkpoint everything.
209 ..do stuff on stable fs
210 journal_unlock_updates() // carry on with filesystem use.
211</programlisting>
212
213<para>
214The opportunities for abuse and DOS attacks with this should be obvious,
215if you allow unprivileged userspace to trigger codepaths containing these
216calls.
217</para>
218
219<para>
220A new feature of jbd since 2.5.25 is commit callbacks with the new
221journal_callback_set() function you can now ask the journalling layer
222to call you back when the transaction is finally committed to disk, so that
223you can do some of your own management. The key to this is the journal_callback
224struct, this maintains the internal callback information but you can
225extend it like this:-
226</para>
227<programlisting>
228 struct myfs_callback_s {
229 //Data structure element required by jbd..
230 struct journal_callback for_jbd;
231 // Stuff for myfs allocated together.
232 myfs_inode* i_commited;
233
234 }
235</programlisting>
236
237<para>
238this would be useful if you needed to know when data was committed to a
239particular inode.
240</para>
241
242</sect1>
243
244<sect1>
245<title>Summary</title>
246<para>
247Using the journal is a matter of wrapping the different context changes,
248being each mount, each modification (transaction) and each changed buffer
249to tell the journalling layer about them.
250</para>
251
252<para>
253Here is a some pseudo code to give you an idea of how it works, as
254an example.
255</para>
256
257<programlisting>
258 journal_t* my_jnrl = journal_create();
259 journal_init_{dev,inode}(jnrl,...)
260 if (clean) journal_wipe();
261 journal_load();
262
263 foreach(transaction) { /*transactions must be
264 completed before
265 a syscall returns to
266 userspace*/
267
268 handle_t * xct=journal_start(my_jnrl);
269 foreach(bh) {
270 journal_get_{create,write,undo}_access(xact,bh);
271 if ( myfs_modify(bh) ) { /* returns true
272 if makes changes */
273 journal_dirty_{meta,}data(xact,bh);
274 } else {
275 journal_forget(bh);
276 }
277 }
278 journal_stop(xct);
279 }
280 journal_destroy(my_jrnl);
281</programlisting>
282</sect1>
283
284</chapter>
285
286 <chapter id="adt">
287 <title>Data Types</title>
288 <para>
289 The journalling layer uses typedefs to 'hide' the concrete definitions
290 of the structures used. As a client of the JBD layer you can
291 just rely on the using the pointer as a magic cookie of some sort.
292
293 Obviously the hiding is not enforced as this is 'C'.
294 </para>
295 <sect1><title>Structures</title>
296!Iinclude/linux/jbd.h
297 </sect1>
298</chapter>
299
300 <chapter id="calls">
301 <title>Functions</title>
302 <para>
303 The functions here are split into two groups those that
304 affect a journal as a whole, and those which are used to
305 manage transactions
306</para>
307 <sect1><title>Journal Level</title>
308!Efs/jbd/journal.c
309!Ifs/jbd/recovery.c
310 </sect1>
311 <sect1><title>Transasction Level</title>
312!Efs/jbd/transaction.c
313 </sect1>
314</chapter>
315<chapter>
316 <title>See also</title>
317 <para>
318 <citation>
319 <ulink url="ftp://ftp.uk.linux.org/pub/linux/sct/fs/jfs/journal-design.ps.gz">
320 Journaling the Linux ext2fs Filesystem,LinuxExpo 98, Stephen Tweedie
321 </ulink>
322 </citation>
323 </para>
324 <para>
325 <citation>
326 <ulink url="http://olstrans.sourceforge.net/release/OLS2000-ext3/OLS2000-ext3.html">
327 Ext3 Journalling FileSystem , OLS 2000, Dr. Stephen Tweedie
328 </ulink>
329 </citation>
330 </para>
331</chapter>
332
333</book>
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
index b11792abd6b6..bf2b0e2f87e1 100644
--- a/Documentation/accounting/getdelays.c
+++ b/Documentation/accounting/getdelays.c
@@ -49,7 +49,7 @@ __u64 stime, utime;
49 } 49 }
50 50
51/* Maximum size of response requested or message sent */ 51/* Maximum size of response requested or message sent */
52#define MAX_MSG_SIZE 256 52#define MAX_MSG_SIZE 1024
53/* Maximum number of cpus expected to be specified in a cpumask */ 53/* Maximum number of cpus expected to be specified in a cpumask */
54#define MAX_CPUS 32 54#define MAX_CPUS 32
55/* Maximum length of pathname to log file */ 55/* Maximum length of pathname to log file */
diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt
index c65233d430f0..284e7e198e93 100644
--- a/Documentation/kernel-doc-nano-HOWTO.txt
+++ b/Documentation/kernel-doc-nano-HOWTO.txt
@@ -17,7 +17,7 @@ are:
17 special place-holders for where the extracted documentation should 17 special place-holders for where the extracted documentation should
18 go. 18 go.
19 19
20- scripts/docproc.c 20- scripts/basic/docproc.c
21 21
22 This is a program for converting SGML template files into SGML 22 This is a program for converting SGML template files into SGML
23 files. When a file is referenced it is searched for symbols 23 files. When a file is referenced it is searched for symbols
diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt
index a66bec222b16..74311d7e0f3c 100644
--- a/Documentation/power/interface.txt
+++ b/Documentation/power/interface.txt
@@ -30,6 +30,17 @@ testing). The system will support either 'firmware' or 'platform', and
30that is known a priori. But, the user may choose 'shutdown' or 30that is known a priori. But, the user may choose 'shutdown' or
31'reboot' as alternatives. 31'reboot' as alternatives.
32 32
33Additionally, /sys/power/disk can be used to turn on one of the two testing
34modes of the suspend-to-disk mechanism: 'testproc' or 'test'. If the
35suspend-to-disk mechanism is in the 'testproc' mode, writing 'disk' to
36/sys/power/state will cause the kernel to disable nonboot CPUs and freeze
37tasks, wait for 5 seconds, unfreeze tasks and enable nonboot CPUs. If it is
38in the 'test' mode, writing 'disk' to /sys/power/state will cause the kernel
39to disable nonboot CPUs and freeze tasks, shrink memory, suspend devices, wait
40for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs. Then,
41we are able to look in the log messages and work out, for example, which code
42is being slow and which device drivers are misbehaving.
43
33Reading from this file will display what the mode is currently set 44Reading from this file will display what the mode is currently set
34to. Writing to this file will accept one of 45to. Writing to this file will accept one of
35 46
@@ -37,6 +48,8 @@ to. Writing to this file will accept one of
37 'platform' 48 'platform'
38 'shutdown' 49 'shutdown'
39 'reboot' 50 'reboot'
51 'testproc'
52 'test'
40 53
41It will only change to 'firmware' or 'platform' if the system supports 54It will only change to 'firmware' or 'platform' if the system supports
42it. 55it.
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index ab974ff97073..22e4c466e5a3 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -70,7 +70,7 @@ static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return
70 70
71#define PREFIX "ACPI: " 71#define PREFIX "ACPI: "
72 72
73int acpi_noirq __initdata; /* skip ACPI IRQ initialization */ 73int acpi_noirq; /* skip ACPI IRQ initialization */
74int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */ 74int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */
75int acpi_ht __initdata = 1; /* enable HT */ 75int acpi_ht __initdata = 1; /* enable HT */
76 76
diff --git a/arch/um/include/sysdep-i386/barrier.h b/arch/um/include/sysdep-i386/barrier.h
new file mode 100644
index 000000000000..b58d52c5b2f4
--- /dev/null
+++ b/arch/um/include/sysdep-i386/barrier.h
@@ -0,0 +1,9 @@
1#ifndef __SYSDEP_I386_BARRIER_H
2#define __SYSDEP_I386_BARRIER_H
3
4/* Copied from include/asm-i386 for use by userspace. i386 has the option
5 * of using mfence, but I'm just using this, which works everywhere, for now.
6 */
7#define mb() asm volatile("lock; addl $0,0(%esp)")
8
9#endif
diff --git a/arch/um/include/sysdep-x86_64/barrier.h b/arch/um/include/sysdep-x86_64/barrier.h
new file mode 100644
index 000000000000..7b610befdc8f
--- /dev/null
+++ b/arch/um/include/sysdep-x86_64/barrier.h
@@ -0,0 +1,7 @@
1#ifndef __SYSDEP_X86_64_BARRIER_H
2#define __SYSDEP_X86_64_BARRIER_H
3
4/* Copied from include/asm-x86_64 for use by userspace. */
5#define mb() asm volatile("mfence":::"memory")
6
7#endif
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 51f0893640a6..c692a192957a 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -7,7 +7,6 @@
7#include <stdio.h> 7#include <stdio.h>
8#include <errno.h> 8#include <errno.h>
9#include <signal.h> 9#include <signal.h>
10#include <linux/unistd.h>
11#include <sys/mman.h> 10#include <sys/mman.h>
12#include <sys/wait.h> 11#include <sys/wait.h>
13#include <sys/mman.h> 12#include <sys/mman.h>
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 6b81739279d1..b897e8592d77 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -15,6 +15,7 @@
15#include "user.h" 15#include "user.h"
16#include "signal_kern.h" 16#include "signal_kern.h"
17#include "sysdep/sigcontext.h" 17#include "sysdep/sigcontext.h"
18#include "sysdep/barrier.h"
18#include "sigcontext.h" 19#include "sigcontext.h"
19#include "mode.h" 20#include "mode.h"
20#include "os.h" 21#include "os.h"
@@ -34,8 +35,12 @@
34#define SIGALRM_BIT 2 35#define SIGALRM_BIT 2
35#define SIGALRM_MASK (1 << SIGALRM_BIT) 36#define SIGALRM_MASK (1 << SIGALRM_BIT)
36 37
37static int signals_enabled = 1; 38/* These are used by both the signal handlers and
38static int pending = 0; 39 * block/unblock_signals. I don't want modifications cached in a
40 * register - they must go straight to memory.
41 */
42static volatile int signals_enabled = 1;
43static volatile int pending = 0;
39 44
40void sig_handler(int sig, struct sigcontext *sc) 45void sig_handler(int sig, struct sigcontext *sc)
41{ 46{
@@ -152,6 +157,12 @@ int change_sig(int signal, int on)
152void block_signals(void) 157void block_signals(void)
153{ 158{
154 signals_enabled = 0; 159 signals_enabled = 0;
160 /* This must return with signals disabled, so this barrier
161 * ensures that writes are flushed out before the return.
162 * This might matter if gcc figures out how to inline this and
163 * decides to shuffle this code into the caller.
164 */
165 mb();
155} 166}
156 167
157void unblock_signals(void) 168void unblock_signals(void)
@@ -171,9 +182,23 @@ void unblock_signals(void)
171 */ 182 */
172 signals_enabled = 1; 183 signals_enabled = 1;
173 184
185 /* Setting signals_enabled and reading pending must
186 * happen in this order.
187 */
188 mb();
189
174 save_pending = pending; 190 save_pending = pending;
175 if(save_pending == 0) 191 if(save_pending == 0){
192 /* This must return with signals enabled, so
193 * this barrier ensures that writes are
194 * flushed out before the return. This might
195 * matter if gcc figures out how to inline
196 * this (unlikely, given its size) and decides
197 * to shuffle this code into the caller.
198 */
199 mb();
176 return; 200 return;
201 }
177 202
178 pending = 0; 203 pending = 0;
179 204
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index cb9ab54146cc..9b34fe65949a 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -14,7 +14,7 @@
14#include <sys/mman.h> 14#include <sys/mman.h>
15#include <sys/user.h> 15#include <sys/user.h>
16#include <sys/time.h> 16#include <sys/time.h>
17#include <asm/unistd.h> 17#include <sys/syscall.h>
18#include <asm/types.h> 18#include <asm/types.h>
19#include "user.h" 19#include "user.h"
20#include "sysdep/ptrace.h" 20#include "sysdep/ptrace.h"
diff --git a/arch/um/os-Linux/tls.c b/arch/um/os-Linux/tls.c
index 9f7999f27c77..16215b990804 100644
--- a/arch/um/os-Linux/tls.c
+++ b/arch/um/os-Linux/tls.c
@@ -1,7 +1,7 @@
1#include <errno.h> 1#include <errno.h>
2#include <unistd.h>
2#include <sys/ptrace.h> 3#include <sys/ptrace.h>
3#include <sys/syscall.h> 4#include <sys/syscall.h>
4#include <unistd.h>
5#include <asm/ldt.h> 5#include <asm/ldt.h>
6#include "sysdep/tls.h" 6#include "sysdep/tls.h"
7#include "uml-config.h" 7#include "uml-config.h"
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index c7b1dac8bee9..9eaee6640535 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3075,11 +3075,12 @@ end_io:
3075 if (maxsector) { 3075 if (maxsector) {
3076 sector_t sector = bio->bi_sector; 3076 sector_t sector = bio->bi_sector;
3077 3077
3078 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { 3078 if (maxsector < nr_sectors ||
3079 maxsector - nr_sectors < sector) {
3079 /* 3080 /*
3080 * This may well happen - partitions are not checked 3081 * This may well happen - partitions are not
3081 * to make sure they are within the size of the 3082 * checked to make sure they are within the size
3082 * whole device. 3083 * of the whole device.
3083 */ 3084 */
3084 handle_bad_sector(bio); 3085 handle_bad_sector(bio);
3085 goto end_io; 3086 goto end_io;
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index e5cfb1fa47d1..157fa81a264f 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1867,7 +1867,7 @@ static int ipmi_pci_resume(struct pci_dev *pdev)
1867 1867
1868static struct pci_device_id ipmi_pci_devices[] = { 1868static struct pci_device_id ipmi_pci_devices[] = {
1869 { PCI_DEVICE(PCI_HP_VENDOR_ID, PCI_MMC_DEVICE_ID) }, 1869 { PCI_DEVICE(PCI_HP_VENDOR_ID, PCI_MMC_DEVICE_ID) },
1870 { PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE) } 1870 { PCI_DEVICE_CLASS(PCI_ERMC_CLASSCODE, PCI_ERMC_CLASSCODE_MASK) }
1871}; 1871};
1872MODULE_DEVICE_TABLE(pci, ipmi_pci_devices); 1872MODULE_DEVICE_TABLE(pci, ipmi_pci_devices);
1873 1873
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 4bde30bb3be7..75e9e38330ff 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -230,34 +230,43 @@ static struct kobj_type ktype_memctrl = {
230 */ 230 */
231static int edac_sysfs_memctrl_setup(void) 231static int edac_sysfs_memctrl_setup(void)
232{ 232{
233 int err=0; 233 int err = 0;
234 234
235 debugf1("%s()\n", __func__); 235 debugf1("%s()\n", __func__);
236 236
237 /* create the /sys/devices/system/edac directory */ 237 /* create the /sys/devices/system/edac directory */
238 err = sysdev_class_register(&edac_class); 238 err = sysdev_class_register(&edac_class);
239 239
240 if (!err) { 240 if (err) {
241 /* Init the MC's kobject */ 241 debugf1("%s() error=%d\n", __func__, err);
242 memset(&edac_memctrl_kobj, 0, sizeof (edac_memctrl_kobj)); 242 return err;
243 edac_memctrl_kobj.parent = &edac_class.kset.kobj; 243 }
244 edac_memctrl_kobj.ktype = &ktype_memctrl;
245 244
246 /* generate sysfs "..../edac/mc" */ 245 /* Init the MC's kobject */
247 err = kobject_set_name(&edac_memctrl_kobj,"mc"); 246 memset(&edac_memctrl_kobj, 0, sizeof (edac_memctrl_kobj));
247 edac_memctrl_kobj.parent = &edac_class.kset.kobj;
248 edac_memctrl_kobj.ktype = &ktype_memctrl;
248 249
249 if (!err) { 250 /* generate sysfs "..../edac/mc" */
250 /* FIXME: maybe new sysdev_create_subdir() */ 251 err = kobject_set_name(&edac_memctrl_kobj,"mc");
251 err = kobject_register(&edac_memctrl_kobj);
252 252
253 if (err) 253 if (err)
254 debugf1("Failed to register '.../edac/mc'\n"); 254 goto fail;
255 else 255
256 debugf1("Registered '.../edac/mc' kobject\n"); 256 /* FIXME: maybe new sysdev_create_subdir() */
257 } 257 err = kobject_register(&edac_memctrl_kobj);
258 } else 258
259 debugf1("%s() error=%d\n", __func__, err); 259 if (err) {
260 debugf1("Failed to register '.../edac/mc'\n");
261 goto fail;
262 }
260 263
264 debugf1("Registered '.../edac/mc' kobject\n");
265
266 return 0;
267
268fail:
269 sysdev_class_unregister(&edac_class);
261 return err; 270 return err;
262} 271}
263 272
diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c
index 2b0ea8b6608d..753fe0e21456 100644
--- a/drivers/ide/pci/amd74xx.c
+++ b/drivers/ide/pci/amd74xx.c
@@ -75,6 +75,7 @@ static struct amd_ide_chip {
75 { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, 0x50, AMD_UDMA_133 }, 75 { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, 0x50, AMD_UDMA_133 },
76 { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE, 0x50, AMD_UDMA_133 }, 76 { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE, 0x50, AMD_UDMA_133 },
77 { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE, 0x50, AMD_UDMA_133 }, 77 { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE, 0x50, AMD_UDMA_133 },
78 { PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE, 0x50, AMD_UDMA_133 },
78 { PCI_DEVICE_ID_AMD_CS5536_IDE, 0x40, AMD_UDMA_100 }, 79 { PCI_DEVICE_ID_AMD_CS5536_IDE, 0x40, AMD_UDMA_100 },
79 { 0 } 80 { 0 }
80}; 81};
@@ -491,7 +492,8 @@ static ide_pci_device_t amd74xx_chipsets[] __devinitdata = {
491 /* 16 */ DECLARE_NV_DEV("NFORCE-MCP55"), 492 /* 16 */ DECLARE_NV_DEV("NFORCE-MCP55"),
492 /* 17 */ DECLARE_NV_DEV("NFORCE-MCP61"), 493 /* 17 */ DECLARE_NV_DEV("NFORCE-MCP61"),
493 /* 18 */ DECLARE_NV_DEV("NFORCE-MCP65"), 494 /* 18 */ DECLARE_NV_DEV("NFORCE-MCP65"),
494 /* 19 */ DECLARE_AMD_DEV("AMD5536"), 495 /* 19 */ DECLARE_NV_DEV("NFORCE-MCP67"),
496 /* 20 */ DECLARE_AMD_DEV("AMD5536"),
495}; 497};
496 498
497static int __devinit amd74xx_probe(struct pci_dev *dev, const struct pci_device_id *id) 499static int __devinit amd74xx_probe(struct pci_dev *dev, const struct pci_device_id *id)
@@ -530,7 +532,8 @@ static struct pci_device_id amd74xx_pci_tbl[] = {
530 { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 16 }, 532 { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 16 },
531 { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 17 }, 533 { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 17 },
532 { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 18 }, 534 { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 18 },
533 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 19 }, 535 { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 19 },
536 { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 20 },
534 { 0, }, 537 { 0, },
535}; 538};
536MODULE_DEVICE_TABLE(pci, amd74xx_pci_tbl); 539MODULE_DEVICE_TABLE(pci, amd74xx_pci_tbl);
diff --git a/drivers/isdn/hysdn/hysdn_sched.c b/drivers/isdn/hysdn/hysdn_sched.c
index 1fadf0133e9b..18758772b744 100644
--- a/drivers/isdn/hysdn/hysdn_sched.c
+++ b/drivers/isdn/hysdn/hysdn_sched.c
@@ -155,21 +155,17 @@ hysdn_tx_cfgline(hysdn_card *card, unsigned char *line, unsigned short chan)
155 if (card->debug_flags & LOG_SCHED_ASYN) 155 if (card->debug_flags & LOG_SCHED_ASYN)
156 hysdn_addlog(card, "async tx-cfg chan=%d len=%d", chan, strlen(line) + 1); 156 hysdn_addlog(card, "async tx-cfg chan=%d len=%d", chan, strlen(line) + 1);
157 157
158 spin_lock_irqsave(&card->hysdn_lock, flags);
159 while (card->async_busy) { 158 while (card->async_busy) {
160 sti();
161 159
162 if (card->debug_flags & LOG_SCHED_ASYN) 160 if (card->debug_flags & LOG_SCHED_ASYN)
163 hysdn_addlog(card, "async tx-cfg delayed"); 161 hysdn_addlog(card, "async tx-cfg delayed");
164 162
165 msleep_interruptible(20); /* Timeout 20ms */ 163 msleep_interruptible(20); /* Timeout 20ms */
166 if (!--cnt) { 164 if (!--cnt)
167 spin_unlock_irqrestore(&card->hysdn_lock, flags);
168 return (-ERR_ASYNC_TIME); /* timed out */ 165 return (-ERR_ASYNC_TIME); /* timed out */
169 }
170 cli();
171 } /* wait for buffer to become free */ 166 } /* wait for buffer to become free */
172 167
168 spin_lock_irqsave(&card->hysdn_lock, flags);
173 strcpy(card->async_data, line); 169 strcpy(card->async_data, line);
174 card->async_len = strlen(line) + 1; 170 card->async_len = strlen(line) + 1;
175 card->async_channel = chan; 171 card->async_channel = chan;
@@ -177,30 +173,23 @@ hysdn_tx_cfgline(hysdn_card *card, unsigned char *line, unsigned short chan)
177 173
178 /* now queue the task */ 174 /* now queue the task */
179 schedule_work(&card->irq_queue); 175 schedule_work(&card->irq_queue);
180 sti(); 176 spin_unlock_irqrestore(&card->hysdn_lock, flags);
181 177
182 if (card->debug_flags & LOG_SCHED_ASYN) 178 if (card->debug_flags & LOG_SCHED_ASYN)
183 hysdn_addlog(card, "async tx-cfg data queued"); 179 hysdn_addlog(card, "async tx-cfg data queued");
184 180
185 cnt++; /* short delay */ 181 cnt++; /* short delay */
186 cli();
187 182
188 while (card->async_busy) { 183 while (card->async_busy) {
189 sti();
190 184
191 if (card->debug_flags & LOG_SCHED_ASYN) 185 if (card->debug_flags & LOG_SCHED_ASYN)
192 hysdn_addlog(card, "async tx-cfg waiting for tx-ready"); 186 hysdn_addlog(card, "async tx-cfg waiting for tx-ready");
193 187
194 msleep_interruptible(20); /* Timeout 20ms */ 188 msleep_interruptible(20); /* Timeout 20ms */
195 if (!--cnt) { 189 if (!--cnt)
196 spin_unlock_irqrestore(&card->hysdn_lock, flags);
197 return (-ERR_ASYNC_TIME); /* timed out */ 190 return (-ERR_ASYNC_TIME); /* timed out */
198 }
199 cli();
200 } /* wait for buffer to become free again */ 191 } /* wait for buffer to become free again */
201 192
202 spin_unlock_irqrestore(&card->hysdn_lock, flags);
203
204 if (card->debug_flags & LOG_SCHED_ASYN) 193 if (card->debug_flags & LOG_SCHED_ASYN)
205 hysdn_addlog(card, "async tx-cfg data send"); 194 hysdn_addlog(card, "async tx-cfg data send");
206 195
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 50ab4a936e30..d11135604403 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3200,6 +3200,7 @@ static int do_md_run(mddev_t * mddev)
3200 3200
3201 mddev->changed = 1; 3201 mddev->changed = 1;
3202 md_new_event(mddev); 3202 md_new_event(mddev);
3203 kobject_uevent(&mddev->gendisk->kobj, KOBJ_ONLINE);
3203 return 0; 3204 return 0;
3204} 3205}
3205 3206
@@ -3313,6 +3314,7 @@ static int do_md_stop(mddev_t * mddev, int mode)
3313 3314
3314 module_put(mddev->pers->owner); 3315 module_put(mddev->pers->owner);
3315 mddev->pers = NULL; 3316 mddev->pers = NULL;
3317 kobject_uevent(&mddev->gendisk->kobj, KOBJ_OFFLINE);
3316 if (mddev->ro) 3318 if (mddev->ro)
3317 mddev->ro = 0; 3319 mddev->ro = 0;
3318 } 3320 }
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
index bbdba7b37e11..46a9c35943bd 100644
--- a/drivers/misc/lkdtm.c
+++ b/drivers/misc/lkdtm.c
@@ -44,12 +44,14 @@
44 */ 44 */
45 45
46#include <linux/kernel.h> 46#include <linux/kernel.h>
47#include <linux/fs.h>
47#include <linux/module.h> 48#include <linux/module.h>
49#include <linux/buffer_head.h>
48#include <linux/kprobes.h> 50#include <linux/kprobes.h>
49#include <linux/kallsyms.h> 51#include <linux/list.h>
50#include <linux/init.h> 52#include <linux/init.h>
51#include <linux/irq.h>
52#include <linux/interrupt.h> 53#include <linux/interrupt.h>
54#include <linux/hrtimer.h>
53#include <scsi/scsi_cmnd.h> 55#include <scsi/scsi_cmnd.h>
54 56
55#ifdef CONFIG_IDE 57#ifdef CONFIG_IDE
@@ -116,16 +118,16 @@ static enum ctype cptype = NONE;
116static int count = DEFAULT_COUNT; 118static int count = DEFAULT_COUNT;
117 119
118module_param(recur_count, int, 0644); 120module_param(recur_count, int, 0644);
119MODULE_PARM_DESC(recur_count, "Recurcion level for the stack overflow test,\ 121MODULE_PARM_DESC(recur_count, " Recursion level for the stack overflow test, "\
120 default is 10"); 122 "default is 10");
121module_param(cpoint_name, charp, 0644); 123module_param(cpoint_name, charp, 0644);
122MODULE_PARM_DESC(cpoint_name, "Crash Point, where kernel is to be crashed"); 124MODULE_PARM_DESC(cpoint_name, " Crash Point, where kernel is to be crashed");
123module_param(cpoint_type, charp, 06444); 125module_param(cpoint_type, charp, 0644);
124MODULE_PARM_DESC(cpoint_type, "Crash Point Type, action to be taken on\ 126MODULE_PARM_DESC(cpoint_type, " Crash Point Type, action to be taken on "\
125 hitting the crash point"); 127 "hitting the crash point");
126module_param(cpoint_count, int, 06444); 128module_param(cpoint_count, int, 0644);
127MODULE_PARM_DESC(cpoint_count, "Crash Point Count, number of times the \ 129MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\
128 crash point is to be hit to trigger action"); 130 "crash point is to be hit to trigger action");
129 131
130unsigned int jp_do_irq(unsigned int irq) 132unsigned int jp_do_irq(unsigned int irq)
131{ 133{
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 146298ad7371..c3c0626f550b 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -281,7 +281,6 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n)
281 up(&board_lock); 281 up(&board_lock);
282 return 0; 282 return 0;
283} 283}
284EXPORT_SYMBOL_GPL(spi_register_board_info);
285 284
286/* FIXME someone should add support for a __setup("spi", ...) that 285/* FIXME someone should add support for a __setup("spi", ...) that
287 * creates board info from kernel command lines 286 * creates board info from kernel command lines
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 976a691c5a68..7e056b9b49e8 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1806,13 +1806,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1806 } 1806 }
1807 if ((rc < 0) || (smb_read_data == NULL)) { 1807 if ((rc < 0) || (smb_read_data == NULL)) {
1808 cFYI(1, ("Read error in readpages: %d", rc)); 1808 cFYI(1, ("Read error in readpages: %d", rc));
1809 /* clean up remaing pages off list */
1810 while (!list_empty(page_list) && (i < num_pages)) {
1811 page = list_entry(page_list->prev, struct page,
1812 lru);
1813 list_del(&page->lru);
1814 page_cache_release(page);
1815 }
1816 break; 1809 break;
1817 } else if (bytes_read > 0) { 1810 } else if (bytes_read > 0) {
1818 pSMBr = (struct smb_com_read_rsp *)smb_read_data; 1811 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
@@ -1831,13 +1824,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1831 this case is ok - if we are at server EOF 1824 this case is ok - if we are at server EOF
1832 we will hit it on next read */ 1825 we will hit it on next read */
1833 1826
1834 /* while (!list_empty(page_list) && (i < num_pages)) { 1827 /* break; */
1835 page = list_entry(page_list->prev,
1836 struct page, list);
1837 list_del(&page->list);
1838 page_cache_release(page);
1839 }
1840 break; */
1841 } 1828 }
1842 } else { 1829 } else {
1843 cFYI(1, ("No bytes read (%d) at offset %lld . " 1830 cFYI(1, ("No bytes read (%d) at offset %lld . "
@@ -1845,14 +1832,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1845 bytes_read, offset)); 1832 bytes_read, offset));
1846 /* BB turn off caching and do new lookup on 1833 /* BB turn off caching and do new lookup on
1847 file size at server? */ 1834 file size at server? */
1848 while (!list_empty(page_list) && (i < num_pages)) {
1849 page = list_entry(page_list->prev, struct page,
1850 lru);
1851 list_del(&page->lru);
1852
1853 /* BB removeme - replace with zero of page? */
1854 page_cache_release(page);
1855 }
1856 break; 1835 break;
1857 } 1836 }
1858 if (smb_read_data) { 1837 if (smb_read_data) {
diff --git a/fs/compat.c b/fs/compat.c
index 50624d4a70c6..8d0a0018a7d2 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1835,9 +1835,12 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1835 1835
1836 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec)); 1836 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
1837 1837
1838 if (ret == 0 && tsp && !(current->personality & STICKY_TIMEOUTS)) { 1838 if (tsp) {
1839 struct compat_timespec rts; 1839 struct compat_timespec rts;
1840 1840
1841 if (current->personality & STICKY_TIMEOUTS)
1842 goto sticky;
1843
1841 rts.tv_sec = timeout / HZ; 1844 rts.tv_sec = timeout / HZ;
1842 rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ); 1845 rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
1843 if (rts.tv_nsec >= NSEC_PER_SEC) { 1846 if (rts.tv_nsec >= NSEC_PER_SEC) {
@@ -1846,8 +1849,19 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1846 } 1849 }
1847 if (compat_timespec_compare(&rts, &ts) >= 0) 1850 if (compat_timespec_compare(&rts, &ts) >= 0)
1848 rts = ts; 1851 rts = ts;
1849 if (copy_to_user(tsp, &rts, sizeof(rts))) 1852 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1850 ret = -EFAULT; 1853sticky:
1854 /*
1855 * If an application puts its timeval in read-only
1856 * memory, we don't want the Linux-specific update to
1857 * the timeval to cause a fault after the select has
1858 * completed successfully. However, because we're not
1859 * updating the timeval, we can't restart the system
1860 * call.
1861 */
1862 if (ret == -ERESTARTNOHAND)
1863 ret = -EINTR;
1864 }
1851 } 1865 }
1852 1866
1853 if (ret == -ERESTARTNOHAND) { 1867 if (ret == -ERESTARTNOHAND) {
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f49f105394b7..136175a69332 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -134,7 +134,7 @@ int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
134 134
135 algified_name_len = (chaining_modifier_len + cipher_name_len + 3); 135 algified_name_len = (chaining_modifier_len + cipher_name_len + 3);
136 (*algified_name) = kmalloc(algified_name_len, GFP_KERNEL); 136 (*algified_name) = kmalloc(algified_name_len, GFP_KERNEL);
137 if (!(algified_name)) { 137 if (!(*algified_name)) {
138 rc = -ENOMEM; 138 rc = -ENOMEM;
139 goto out; 139 goto out;
140 } 140 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2bb5ace3882d..763a50daf1c0 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -397,14 +397,14 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
397 397
398 err = -EIO; 398 err = -EIO;
399 if (is_bad_inode(inode)) 399 if (is_bad_inode(inode))
400 goto clean_pages_up; 400 goto out;
401 401
402 data.file = file; 402 data.file = file;
403 data.inode = inode; 403 data.inode = inode;
404 data.req = fuse_get_req(fc); 404 data.req = fuse_get_req(fc);
405 err = PTR_ERR(data.req); 405 err = PTR_ERR(data.req);
406 if (IS_ERR(data.req)) 406 if (IS_ERR(data.req))
407 goto clean_pages_up; 407 goto out;
408 408
409 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); 409 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
410 if (!err) { 410 if (!err) {
@@ -413,10 +413,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
413 else 413 else
414 fuse_put_request(fc, data.req); 414 fuse_put_request(fc, data.req);
415 } 415 }
416 return err; 416out:
417
418clean_pages_up:
419 put_pages_list(pages);
420 return err; 417 return err;
421} 418}
422 419
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 8d5963c7e123..015640b3f123 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -337,13 +337,6 @@ out:
337out_noerror: 337out_noerror:
338 ret = 0; 338 ret = 0;
339out_unlock: 339out_unlock:
340 /* unlock all pages, we can't do any I/O right now */
341 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
342 struct page *page = list_entry(pages->prev, struct page, lru);
343 list_del(&page->lru);
344 unlock_page(page);
345 page_cache_release(page);
346 }
347 if (do_unlock) 340 if (do_unlock)
348 gfs2_holder_uninit(&gh); 341 gfs2_holder_uninit(&gh);
349 goto out; 342 goto out;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index e9d07704680e..81b8565d3837 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -274,7 +274,7 @@ nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
274 * any regular files anyway, just in case the directory was created by 274 * any regular files anyway, just in case the directory was created by
275 * a kernel from the future.... */ 275 * a kernel from the future.... */
276 nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); 276 nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
277 mutex_lock(&dir->d_inode->i_mutex); 277 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
278 status = vfs_rmdir(dir->d_inode, dentry); 278 status = vfs_rmdir(dir->d_inode, dentry);
279 mutex_unlock(&dir->d_inode->i_mutex); 279 mutex_unlock(&dir->d_inode->i_mutex);
280 return status; 280 return status;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 9041802df832..17249994110f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1619,6 +1619,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1619 "jmacd-8: reiserfs_fill_super: unable to read bitmap"); 1619 "jmacd-8: reiserfs_fill_super: unable to read bitmap");
1620 goto error; 1620 goto error;
1621 } 1621 }
1622 errval = -EINVAL;
1622#ifdef CONFIG_REISERFS_CHECK 1623#ifdef CONFIG_REISERFS_CHECK
1623 SWARN(silent, s, "CONFIG_REISERFS_CHECK is set ON"); 1624 SWARN(silent, s, "CONFIG_REISERFS_CHECK is set ON");
1624 SWARN(silent, s, "- it is slow mode for debugging."); 1625 SWARN(silent, s, "- it is slow mode for debugging.");
diff --git a/fs/xattr.c b/fs/xattr.c
index 395635100f77..0901bdc2ce24 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -48,14 +48,21 @@ xattr_permission(struct inode *inode, const char *name, int mask)
48 return 0; 48 return 0;
49 49
50 /* 50 /*
51 * The trusted.* namespace can only accessed by a privilegued user. 51 * The trusted.* namespace can only be accessed by a privileged user.
52 */ 52 */
53 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) 53 if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
54 return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); 54 return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM);
55 55
56 /* In user.* namespace, only regular files and directories can have
57 * extended attributes. For sticky directories, only the owner and
58 * privileged user can write attributes.
59 */
56 if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { 60 if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
57 if (!S_ISREG(inode->i_mode) && 61 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
58 (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) 62 return -EPERM;
63 if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
64 (mask & MAY_WRITE) && (current->fsuid != inode->i_uid) &&
65 !capable(CAP_FOWNER))
59 return -EPERM; 66 return -EPERM;
60 } 67 }
61 68
diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h
index eac85ce101b6..c6a03187f932 100644
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -261,7 +261,7 @@ SYSX(sys_ni_syscall, ppc_fadvise64_64, ppc_fadvise64_64)
261PPC_SYS_SPU(rtas) 261PPC_SYS_SPU(rtas)
262OLDSYS(debug_setcontext) 262OLDSYS(debug_setcontext)
263SYSCALL(ni_syscall) 263SYSCALL(ni_syscall)
264SYSCALL(ni_syscall) 264COMPAT_SYS(migrate_pages)
265COMPAT_SYS(mbind) 265COMPAT_SYS(mbind)
266COMPAT_SYS(get_mempolicy) 266COMPAT_SYS(get_mempolicy)
267COMPAT_SYS(set_mempolicy) 267COMPAT_SYS(set_mempolicy)
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index 464a48cce7f5..b5fe93291c96 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -276,7 +276,7 @@
276#define __NR_rtas 255 276#define __NR_rtas 255
277#define __NR_sys_debug_setcontext 256 277#define __NR_sys_debug_setcontext 256
278/* Number 257 is reserved for vserver */ 278/* Number 257 is reserved for vserver */
279/* 258 currently unused */ 279#define __NR_migrate_pages 258
280#define __NR_mbind 259 280#define __NR_mbind 259
281#define __NR_get_mempolicy 260 281#define __NR_get_mempolicy 260
282#define __NR_set_mempolicy 261 282#define __NR_set_mempolicy 261
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f1553196826f..80b17f440ec1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -230,5 +230,9 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
230extern int compat_printk(const char *fmt, ...); 230extern int compat_printk(const char *fmt, ...);
231extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); 231extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
232 232
233asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
234 compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes,
235 const compat_ulong_t __user *new_nodes);
236
233#endif /* CONFIG_COMPAT */ 237#endif /* CONFIG_COMPAT */
234#endif /* _LINUX_COMPAT_H */ 238#endif /* _LINUX_COMPAT_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 80f39cab470a..24b611147adb 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -171,6 +171,8 @@ __attribute_const__ roundup_pow_of_two(unsigned long x)
171 171
172extern int printk_ratelimit(void); 172extern int printk_ratelimit(void);
173extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst); 173extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
174extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
175 unsigned int interval_msec);
174 176
175static inline void console_silent(void) 177static inline void console_silent(void)
176{ 178{
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 6b27e07aef19..070394e846d0 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -116,7 +116,9 @@ typedef int __bitwise suspend_disk_method_t;
116#define PM_DISK_PLATFORM ((__force suspend_disk_method_t) 2) 116#define PM_DISK_PLATFORM ((__force suspend_disk_method_t) 2)
117#define PM_DISK_SHUTDOWN ((__force suspend_disk_method_t) 3) 117#define PM_DISK_SHUTDOWN ((__force suspend_disk_method_t) 3)
118#define PM_DISK_REBOOT ((__force suspend_disk_method_t) 4) 118#define PM_DISK_REBOOT ((__force suspend_disk_method_t) 4)
119#define PM_DISK_MAX ((__force suspend_disk_method_t) 5) 119#define PM_DISK_TEST ((__force suspend_disk_method_t) 5)
120#define PM_DISK_TESTPROC ((__force suspend_disk_method_t) 6)
121#define PM_DISK_MAX ((__force suspend_disk_method_t) 7)
120 122
121struct pm_ops { 123struct pm_ops {
122 suspend_disk_method_t pm_disk_mode; 124 suspend_disk_method_t pm_disk_mode;
diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h
index 61eef508b041..28967eda9d7b 100644
--- a/include/linux/ufs_fs.h
+++ b/include/linux/ufs_fs.h
@@ -908,7 +908,7 @@ struct ufs_super_block_third {
908 __fs64 fs_csaddr; /* blk addr of cyl grp summary area */ 908 __fs64 fs_csaddr; /* blk addr of cyl grp summary area */
909 __fs64 fs_pendingblocks;/* blocks in process of being freed */ 909 __fs64 fs_pendingblocks;/* blocks in process of being freed */
910 __fs32 fs_pendinginodes;/*inodes in process of being freed */ 910 __fs32 fs_pendinginodes;/*inodes in process of being freed */
911 } fs_u2; 911 } __attribute__ ((packed)) fs_u2;
912 } fs_un1; 912 } fs_un1;
913 union { 913 union {
914 struct { 914 struct {
diff --git a/ipc/msg.c b/ipc/msg.c
index 5b213d952545..600e06f943a6 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -124,6 +124,7 @@ void msg_exit_ns(struct ipc_namespace *ns)
124 } 124 }
125 mutex_unlock(&msg_ids(ns).mutex); 125 mutex_unlock(&msg_ids(ns).mutex);
126 126
127 ipc_fini_ids(ns->ids[IPC_MSG_IDS]);
127 kfree(ns->ids[IPC_MSG_IDS]); 128 kfree(ns->ids[IPC_MSG_IDS]);
128 ns->ids[IPC_MSG_IDS] = NULL; 129 ns->ids[IPC_MSG_IDS] = NULL;
129} 130}
diff --git a/ipc/sem.c b/ipc/sem.c
index 0dafcc455f92..21b3289d640c 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -161,6 +161,7 @@ void sem_exit_ns(struct ipc_namespace *ns)
161 } 161 }
162 mutex_unlock(&sem_ids(ns).mutex); 162 mutex_unlock(&sem_ids(ns).mutex);
163 163
164 ipc_fini_ids(ns->ids[IPC_SEM_IDS]);
164 kfree(ns->ids[IPC_SEM_IDS]); 165 kfree(ns->ids[IPC_SEM_IDS]);
165 ns->ids[IPC_SEM_IDS] = NULL; 166 ns->ids[IPC_SEM_IDS] = NULL;
166} 167}
diff --git a/ipc/shm.c b/ipc/shm.c
index bfbd317ec11c..d1198dd07a1a 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -116,6 +116,7 @@ void shm_exit_ns(struct ipc_namespace *ns)
116 } 116 }
117 mutex_unlock(&shm_ids(ns).mutex); 117 mutex_unlock(&shm_ids(ns).mutex);
118 118
119 ipc_fini_ids(ns->ids[IPC_SHM_IDS]);
119 kfree(ns->ids[IPC_SHM_IDS]); 120 kfree(ns->ids[IPC_SHM_IDS]);
120 ns->ids[IPC_SHM_IDS] = NULL; 121 ns->ids[IPC_SHM_IDS] = NULL;
121} 122}
diff --git a/ipc/util.c b/ipc/util.c
index 42479e4eec59..cd8bb14a431f 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -301,7 +301,7 @@ static int grow_ary(struct ipc_ids* ids, int newsize)
301 */ 301 */
302 rcu_assign_pointer(ids->entries, new); 302 rcu_assign_pointer(ids->entries, new);
303 303
304 ipc_rcu_putref(old); 304 __ipc_fini_ids(ids, old);
305 return newsize; 305 return newsize;
306} 306}
307 307
diff --git a/ipc/util.h b/ipc/util.h
index c8fd6b9d77b5..e3aa2c5c97dc 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -83,6 +83,18 @@ void* ipc_rcu_alloc(int size);
83void ipc_rcu_getref(void *ptr); 83void ipc_rcu_getref(void *ptr);
84void ipc_rcu_putref(void *ptr); 84void ipc_rcu_putref(void *ptr);
85 85
86static inline void __ipc_fini_ids(struct ipc_ids *ids,
87 struct ipc_id_ary *entries)
88{
89 if (entries != &ids->nullentry)
90 ipc_rcu_putref(entries);
91}
92
93static inline void ipc_fini_ids(struct ipc_ids *ids)
94{
95 __ipc_fini_ids(ids, ids->entries);
96}
97
86struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id); 98struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id);
87struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id); 99struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id);
88void ipc_lock_by_ptr(struct kern_ipc_perm *ipcp); 100void ipc_lock_by_ptr(struct kern_ipc_perm *ipcp);
diff --git a/kernel/compat.c b/kernel/compat.c
index d4898aad6cfa..6952dd057300 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -982,4 +982,37 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages,
982 } 982 }
983 return sys_move_pages(pid, nr_pages, pages, nodes, status, flags); 983 return sys_move_pages(pid, nr_pages, pages, nodes, status, flags);
984} 984}
985
986asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
987 compat_ulong_t maxnode,
988 const compat_ulong_t __user *old_nodes,
989 const compat_ulong_t __user *new_nodes)
990{
991 unsigned long __user *old = NULL;
992 unsigned long __user *new = NULL;
993 nodemask_t tmp_mask;
994 unsigned long nr_bits;
995 unsigned long size;
996
997 nr_bits = min_t(unsigned long, maxnode - 1, MAX_NUMNODES);
998 size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
999 if (old_nodes) {
1000 if (compat_get_bitmap(nodes_addr(tmp_mask), old_nodes, nr_bits))
1001 return -EFAULT;
1002 old = compat_alloc_user_space(new_nodes ? size * 2 : size);
1003 if (new_nodes)
1004 new = old + size / sizeof(unsigned long);
1005 if (copy_to_user(old, nodes_addr(tmp_mask), size))
1006 return -EFAULT;
1007 }
1008 if (new_nodes) {
1009 if (compat_get_bitmap(nodes_addr(tmp_mask), new_nodes, nr_bits))
1010 return -EFAULT;
1011 if (new == NULL)
1012 new = compat_alloc_user_space(size);
1013 if (copy_to_user(new, nodes_addr(tmp_mask), size))
1014 return -EFAULT;
1015 }
1016 return sys_migrate_pages(pid, nr_bits + 1, old, new);
1017}
985#endif 1018#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index b364e0026191..93ef30ba209f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1507,6 +1507,13 @@ static int futex_fd(u32 __user *uaddr, int signal)
1507 struct futex_q *q; 1507 struct futex_q *q;
1508 struct file *filp; 1508 struct file *filp;
1509 int ret, err; 1509 int ret, err;
1510 static unsigned long printk_interval;
1511
1512 if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
1513 printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
1514 "will be removed from the kernel in June 2007\n",
1515 current->comm);
1516 }
1510 1517
1511 ret = -EINVAL; 1518 ret = -EINVAL;
1512 if (!valid_signal(signal)) 1519 if (!valid_signal(signal))
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index d3a158a60312..b1fb7866b0b3 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -71,7 +71,7 @@ static inline void platform_finish(void)
71 71
72static int prepare_processes(void) 72static int prepare_processes(void)
73{ 73{
74 int error; 74 int error = 0;
75 75
76 pm_prepare_console(); 76 pm_prepare_console();
77 77
@@ -84,6 +84,12 @@ static int prepare_processes(void)
84 goto thaw; 84 goto thaw;
85 } 85 }
86 86
87 if (pm_disk_mode == PM_DISK_TESTPROC) {
88 printk("swsusp debug: Waiting for 5 seconds.\n");
89 mdelay(5000);
90 goto thaw;
91 }
92
87 /* Free memory before shutting down devices. */ 93 /* Free memory before shutting down devices. */
88 if (!(error = swsusp_shrink_memory())) 94 if (!(error = swsusp_shrink_memory()))
89 return 0; 95 return 0;
@@ -120,13 +126,21 @@ int pm_suspend_disk(void)
120 if (error) 126 if (error)
121 return error; 127 return error;
122 128
129 if (pm_disk_mode == PM_DISK_TESTPROC)
130 goto Thaw;
131
123 suspend_console(); 132 suspend_console();
124 error = device_suspend(PMSG_FREEZE); 133 error = device_suspend(PMSG_FREEZE);
125 if (error) { 134 if (error) {
126 resume_console(); 135 resume_console();
127 printk("Some devices failed to suspend\n"); 136 printk("Some devices failed to suspend\n");
128 unprepare_processes(); 137 goto Thaw;
129 return error; 138 }
139
140 if (pm_disk_mode == PM_DISK_TEST) {
141 printk("swsusp debug: Waiting for 5 seconds.\n");
142 mdelay(5000);
143 goto Done;
130 } 144 }
131 145
132 pr_debug("PM: snapshotting memory.\n"); 146 pr_debug("PM: snapshotting memory.\n");
@@ -143,16 +157,17 @@ int pm_suspend_disk(void)
143 power_down(pm_disk_mode); 157 power_down(pm_disk_mode);
144 else { 158 else {
145 swsusp_free(); 159 swsusp_free();
146 unprepare_processes(); 160 goto Thaw;
147 return error;
148 } 161 }
149 } else 162 } else {
150 pr_debug("PM: Image restored successfully.\n"); 163 pr_debug("PM: Image restored successfully.\n");
164 }
151 165
152 swsusp_free(); 166 swsusp_free();
153 Done: 167 Done:
154 device_resume(); 168 device_resume();
155 resume_console(); 169 resume_console();
170 Thaw:
156 unprepare_processes(); 171 unprepare_processes();
157 return error; 172 return error;
158} 173}
@@ -249,6 +264,8 @@ static const char * const pm_disk_modes[] = {
249 [PM_DISK_PLATFORM] = "platform", 264 [PM_DISK_PLATFORM] = "platform",
250 [PM_DISK_SHUTDOWN] = "shutdown", 265 [PM_DISK_SHUTDOWN] = "shutdown",
251 [PM_DISK_REBOOT] = "reboot", 266 [PM_DISK_REBOOT] = "reboot",
267 [PM_DISK_TEST] = "test",
268 [PM_DISK_TESTPROC] = "testproc",
252}; 269};
253 270
254/** 271/**
@@ -303,17 +320,19 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
303 } 320 }
304 } 321 }
305 if (mode) { 322 if (mode) {
306 if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT) 323 if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT ||
324 mode == PM_DISK_TEST || mode == PM_DISK_TESTPROC) {
307 pm_disk_mode = mode; 325 pm_disk_mode = mode;
308 else { 326 } else {
309 if (pm_ops && pm_ops->enter && 327 if (pm_ops && pm_ops->enter &&
310 (mode == pm_ops->pm_disk_mode)) 328 (mode == pm_ops->pm_disk_mode))
311 pm_disk_mode = mode; 329 pm_disk_mode = mode;
312 else 330 else
313 error = -EINVAL; 331 error = -EINVAL;
314 } 332 }
315 } else 333 } else {
316 error = -EINVAL; 334 error = -EINVAL;
335 }
317 336
318 pr_debug("PM: suspend-to-disk mode set to '%s'\n", 337 pr_debug("PM: suspend-to-disk mode set to '%s'\n",
319 pm_disk_modes[mode]); 338 pm_disk_modes[mode]);
diff --git a/kernel/printk.c b/kernel/printk.c
index f7d427ef5038..66426552fbfe 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -31,6 +31,7 @@
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/bootmem.h> 32#include <linux/bootmem.h>
33#include <linux/syscalls.h> 33#include <linux/syscalls.h>
34#include <linux/jiffies.h>
34 35
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
36 37
@@ -1101,3 +1102,23 @@ int printk_ratelimit(void)
1101 printk_ratelimit_burst); 1102 printk_ratelimit_burst);
1102} 1103}
1103EXPORT_SYMBOL(printk_ratelimit); 1104EXPORT_SYMBOL(printk_ratelimit);
1105
1106/**
1107 * printk_timed_ratelimit - caller-controlled printk ratelimiting
1108 * @caller_jiffies: pointer to caller's state
1109 * @interval_msecs: minimum interval between prints
1110 *
1111 * printk_timed_ratelimit() returns true if more than @interval_msecs
1112 * milliseconds have elapsed since the last time printk_timed_ratelimit()
1113 * returned true.
1114 */
1115bool printk_timed_ratelimit(unsigned long *caller_jiffies,
1116 unsigned int interval_msecs)
1117{
1118 if (*caller_jiffies == 0 || time_after(jiffies, *caller_jiffies)) {
1119 *caller_jiffies = jiffies + msecs_to_jiffies(interval_msecs);
1120 return true;
1121 }
1122 return false;
1123}
1124EXPORT_SYMBOL(printk_timed_ratelimit);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 0e53314b14de..d7306d0f3dfc 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -135,6 +135,7 @@ cond_syscall(sys_madvise);
135cond_syscall(sys_mremap); 135cond_syscall(sys_mremap);
136cond_syscall(sys_remap_file_pages); 136cond_syscall(sys_remap_file_pages);
137cond_syscall(compat_sys_move_pages); 137cond_syscall(compat_sys_move_pages);
138cond_syscall(compat_sys_migrate_pages);
138 139
139/* block-layer dependent */ 140/* block-layer dependent */
140cond_syscall(sys_bdflush); 141cond_syscall(sys_bdflush);
diff --git a/mm/migrate.c b/mm/migrate.c
index ba2453f9483d..b4979d423d2b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -952,7 +952,8 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
952 goto out; 952 goto out;
953 953
954 pm[i].node = node; 954 pm[i].node = node;
955 } 955 } else
956 pm[i].node = 0; /* anything to not match MAX_NUMNODES */
956 } 957 }
957 /* End marker */ 958 /* End marker */
958 pm[nr_pages].node = MAX_NUMNODES; 959 pm[nr_pages].node = MAX_NUMNODES;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b55bb358b832..bf2f6cff1d6a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -853,7 +853,7 @@ again:
853 pcp = &zone_pcp(zone, cpu)->pcp[cold]; 853 pcp = &zone_pcp(zone, cpu)->pcp[cold];
854 local_irq_save(flags); 854 local_irq_save(flags);
855 if (!pcp->count) { 855 if (!pcp->count) {
856 pcp->count += rmqueue_bulk(zone, 0, 856 pcp->count = rmqueue_bulk(zone, 0,
857 pcp->batch, &pcp->list); 857 pcp->batch, &pcp->list);
858 if (unlikely(!pcp->count)) 858 if (unlikely(!pcp->count))
859 goto failed; 859 goto failed;
diff --git a/mm/readahead.c b/mm/readahead.c
index 1ba736ac0367..23cb61a01c6e 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -173,6 +173,8 @@ static int read_pages(struct address_space *mapping, struct file *filp,
173 173
174 if (mapping->a_ops->readpages) { 174 if (mapping->a_ops->readpages) {
175 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); 175 ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
176 /* Clean up the remaining pages */
177 put_pages_list(pages);
176 goto out; 178 goto out;
177 } 179 }
178 180
diff --git a/mm/slab.c b/mm/slab.c
index 84c631f30741..3c4a7e34eddc 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -883,7 +883,7 @@ static void init_reap_node(int cpu)
883 if (node == MAX_NUMNODES) 883 if (node == MAX_NUMNODES)
884 node = first_node(node_online_map); 884 node = first_node(node_online_map);
885 885
886 __get_cpu_var(reap_node) = node; 886 per_cpu(reap_node, cpu) = node;
887} 887}
888 888
889static void next_reap_node(void) 889static void next_reap_node(void)
diff --git a/scripts/basic/docproc.c b/scripts/basic/docproc.c
index 4ab6cbf09225..d6071cbf13d7 100644
--- a/scripts/basic/docproc.c
+++ b/scripts/basic/docproc.c
@@ -250,7 +250,7 @@ void intfunc(char * filename) { docfunctions(filename, NOFUNCTION); }
250void extfunc(char * filename) { docfunctions(filename, FUNCTION); } 250void extfunc(char * filename) { docfunctions(filename, FUNCTION); }
251 251
252/* 252/*
253 * Document spåecific function(s) in a file. 253 * Document specific function(s) in a file.
254 * Call kernel-doc with the following parameters: 254 * Call kernel-doc with the following parameters:
255 * kernel-doc -docbook -function function1 [-function function2] 255 * kernel-doc -docbook -function function1 [-function function2]
256 */ 256 */