aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c1
-rw-r--r--fs/9p/vfs_dentry.c1
-rw-r--r--fs/9p/vfs_inode.c1
-rw-r--r--fs/9p/vfs_super.c1
-rw-r--r--fs/adfs/super.c3
-rw-r--r--fs/affs/inode.c2
-rw-r--r--fs/affs/super.c9
-rw-r--r--fs/afs/callback.c1
-rw-r--r--fs/afs/cell.c1
-rw-r--r--fs/afs/dir.c1
-rw-r--r--fs/afs/inode.c1
-rw-r--r--fs/afs/internal.h1
-rw-r--r--fs/afs/main.c1
-rw-r--r--fs/afs/proc.c1
-rw-r--r--fs/afs/security.c1
-rw-r--r--fs/afs/super.c22
-rw-r--r--fs/afs/vlocation.c1
-rw-r--r--fs/afs/vnode.c1
-rw-r--r--fs/afs/volume.c1
-rw-r--r--fs/afs/write.c3
-rw-r--r--fs/befs/linuxvfs.c6
-rw-r--r--fs/bfs/inode.c3
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/block_dev.c16
-rw-r--r--fs/buffer.c27
-rw-r--r--fs/cifs/cifsfs.c6
-rw-r--r--fs/coda/cache.c1
-rw-r--r--fs/coda/inode.c3
-rw-r--r--fs/coda/upcall.c2
-rw-r--r--fs/compat.c13
-rw-r--r--fs/configfs/inode.c1
-rw-r--r--fs/dquot.c2
-rw-r--r--fs/ecryptfs/main.c3
-rw-r--r--fs/ecryptfs/messaging.c2
-rw-r--r--fs/ecryptfs/mmap.c14
-rw-r--r--fs/efs/super.c3
-rw-r--r--fs/eventfd.c26
-rw-r--r--fs/eventpoll.c561
-rw-r--r--fs/exec.c4
-rw-r--r--fs/ext2/super.c8
-rw-r--r--fs/ext3/super.c10
-rw-r--r--fs/ext4/super.c10
-rw-r--r--fs/fat/cache.c3
-rw-r--r--fs/fat/inode.c14
-rw-r--r--fs/fifo.c1
-rw-r--r--fs/fuse/file.c1
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/glock.h1
-rw-r--r--fs/gfs2/main.c34
-rw-r--r--fs/hfs/inode.c1
-rw-r--r--fs/hfs/super.c3
-rw-r--r--fs/hfsplus/inode.c1
-rw-r--r--fs/hfsplus/super.c3
-rw-r--r--fs/hpfs/buffer.c2
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c9
-rw-r--r--fs/hugetlbfs/inode.c3
-rw-r--r--fs/inode.c3
-rw-r--r--fs/isofs/inode.c3
-rw-r--r--fs/jffs2/super.c6
-rw-r--r--fs/jfs/jfs_metapage.c18
-rw-r--r--fs/jfs/super.c22
-rw-r--r--fs/lockd/clntlock.c2
-rw-r--r--fs/lockd/host.c2
-rw-r--r--fs/lockd/xdr.c4
-rw-r--r--fs/lockd/xdr4.c6
-rw-r--r--fs/locks.c3
-rw-r--r--fs/minix/bitmap.c1
-rw-r--r--fs/minix/inode.c3
-rw-r--r--fs/ncpfs/file.c1
-rw-r--r--fs/ncpfs/inode.c6
-rw-r--r--fs/ncpfs/ioctl.c1
-rw-r--r--fs/nfs/callback.h4
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/delegation.c2
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/inode.c30
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/nfs4xdr.c96
-rw-r--r--fs/nfs/pagelist.c1
-rw-r--r--fs/nfs/read.c10
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/nfsd/nfs4callback.c1
-rw-r--r--fs/nfsd/nfs4recover.c2
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/ntfs/file.c1
-rw-r--r--fs/ntfs/super.c3
-rw-r--r--fs/ocfs2/dlm/dlmfs.c8
-rw-r--r--fs/ocfs2/super.c38
-rw-r--r--fs/openpromfs/inode.c3
-rw-r--r--fs/partitions/Kconfig12
-rw-r--r--fs/partitions/ldm.c206
-rw-r--r--fs/partitions/ldm.h6
-rw-r--r--fs/proc/inode.c3
-rw-r--r--fs/qnx4/inode.c3
-rw-r--r--fs/quota.c23
-rw-r--r--fs/ramfs/inode.c2
-rw-r--r--fs/reiserfs/super.c10
-rw-r--r--fs/romfs/inode.c7
-rw-r--r--fs/smbfs/dir.c1
-rw-r--r--fs/smbfs/file.c1
-rw-r--r--fs/smbfs/inode.c4
-rw-r--r--fs/smbfs/request.c1
-rw-r--r--fs/sysfs/inode.c1
-rw-r--r--fs/sysv/inode.c3
-rw-r--r--fs/timerfd.c24
-rw-r--r--fs/udf/file.c1
-rw-r--r--fs/udf/namei.c1
-rw-r--r--fs/udf/super.c6
-rw-r--r--fs/ufs/super.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c3
113 files changed, 684 insertions, 798 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 3128aa948a4e..9ac4ffe9ac7d 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -32,6 +32,7 @@
32#include <linux/inet.h> 32#include <linux/inet.h>
33#include <linux/pagemap.h> 33#include <linux/pagemap.h>
34#include <linux/idr.h> 34#include <linux/idr.h>
35#include <linux/sched.h>
35 36
36#include "debug.h" 37#include "debug.h"
37#include "v9fs.h" 38#include "v9fs.h"
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 775e26e82cbc..d93960429c09 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -33,6 +33,7 @@
33#include <linux/inet.h> 33#include <linux/inet.h>
34#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/idr.h> 35#include <linux/idr.h>
36#include <linux/sched.h>
36 37
37#include "debug.h" 38#include "debug.h"
38#include "v9fs.h" 39#include "v9fs.h"
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 7624821729a0..c76cd8fa3f6c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -33,6 +33,7 @@
33#include <linux/inet.h> 33#include <linux/inet.h>
34#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/idr.h> 35#include <linux/idr.h>
36#include <linux/sched.h>
36 37
37#include "debug.h" 38#include "debug.h"
38#include "v9fs.h" 39#include "v9fs.h"
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 8eb9263a67b9..7bdf8b326841 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -36,6 +36,7 @@
36#include <linux/seq_file.h> 36#include <linux/seq_file.h>
37#include <linux/mount.h> 37#include <linux/mount.h>
38#include <linux/idr.h> 38#include <linux/idr.h>
39#include <linux/sched.h>
39 40
40#include "debug.h" 41#include "debug.h"
41#include "v9fs.h" 42#include "v9fs.h"
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 30c296508497..de2ed5ca3351 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -232,8 +232,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
232{ 232{
233 struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; 233 struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
234 234
235 if (flags & SLAB_CTOR_CONSTRUCTOR) 235 inode_init_once(&ei->vfs_inode);
236 inode_init_once(&ei->vfs_inode);
237} 236}
238 237
239static int init_inodecache(void) 238static int init_inodecache(void)
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index c5b9d73c084a..4609a6c13fe9 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -9,7 +9,7 @@
9 * 9 *
10 * (C) 1991 Linus Torvalds - minix filesystem 10 * (C) 1991 Linus Torvalds - minix filesystem
11 */ 11 */
12 12#include <linux/sched.h>
13#include "affs.h" 13#include "affs.h"
14 14
15extern const struct inode_operations affs_symlink_inode_operations; 15extern const struct inode_operations affs_symlink_inode_operations;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index beff7d21e6e2..6d0ebc321530 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -15,6 +15,7 @@
15#include <linux/statfs.h> 15#include <linux/statfs.h>
16#include <linux/parser.h> 16#include <linux/parser.h>
17#include <linux/magic.h> 17#include <linux/magic.h>
18#include <linux/sched.h>
18#include "affs.h" 19#include "affs.h"
19 20
20extern struct timezone sys_tz; 21extern struct timezone sys_tz;
@@ -87,11 +88,9 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
87{ 88{
88 struct affs_inode_info *ei = (struct affs_inode_info *) foo; 89 struct affs_inode_info *ei = (struct affs_inode_info *) foo;
89 90
90 if (flags & SLAB_CTOR_CONSTRUCTOR) { 91 init_MUTEX(&ei->i_link_lock);
91 init_MUTEX(&ei->i_link_lock); 92 init_MUTEX(&ei->i_ext_lock);
92 init_MUTEX(&ei->i_ext_lock); 93 inode_init_once(&ei->vfs_inode);
93 inode_init_once(&ei->vfs_inode);
94 }
95} 94}
96 95
97static int init_inodecache(void) 96static int init_inodecache(void)
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index f64e40fefc02..bacf518c6fa8 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -17,6 +17,7 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/circ_buf.h> 19#include <linux/circ_buf.h>
20#include <linux/sched.h>
20#include "internal.h" 21#include "internal.h"
21 22
22unsigned afs_vnode_update_timeout = 10; 23unsigned afs_vnode_update_timeout = 10;
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 9b1311a1df51..175a567db78c 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/key.h> 14#include <linux/key.h>
15#include <linux/ctype.h> 15#include <linux/ctype.h>
16#include <linux/sched.h>
16#include <keys/rxrpc-type.h> 17#include <keys/rxrpc-type.h>
17#include "internal.h" 18#include "internal.h"
18 19
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 719af4fb15dc..546c59522eb1 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -16,6 +16,7 @@
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/ctype.h> 18#include <linux/ctype.h>
19#include <linux/sched.h>
19#include "internal.h" 20#include "internal.h"
20 21
21static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, 22static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 47f5fed7195d..d196840127c6 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -19,6 +19,7 @@
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/pagemap.h> 21#include <linux/pagemap.h>
22#include <linux/sched.h>
22#include "internal.h" 23#include "internal.h"
23 24
24struct afs_iget_data { 25struct afs_iget_data {
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 4953ba5a6f44..2dac3ad2c44b 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -16,6 +16,7 @@
16#include <linux/skbuff.h> 16#include <linux/skbuff.h>
17#include <linux/rxrpc.h> 17#include <linux/rxrpc.h>
18#include <linux/key.h> 18#include <linux/key.h>
19#include <linux/workqueue.h>
19#include "afs.h" 20#include "afs.h"
20#include "afs_vl.h" 21#include "afs_vl.h"
21 22
diff --git a/fs/afs/main.c b/fs/afs/main.c
index f1f71ff7d5c6..cd21195bbb24 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -13,6 +13,7 @@
13#include <linux/moduleparam.h> 13#include <linux/moduleparam.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/completion.h> 15#include <linux/completion.h>
16#include <linux/sched.h>
16#include "internal.h" 17#include "internal.h"
17 18
18MODULE_DESCRIPTION("AFS Client File System"); 19MODULE_DESCRIPTION("AFS Client File System");
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index d5601f617cdb..13df512aea9e 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/proc_fs.h> 14#include <linux/proc_fs.h>
15#include <linux/seq_file.h> 15#include <linux/seq_file.h>
16#include <linux/sched.h>
16#include <asm/uaccess.h> 17#include <asm/uaccess.h>
17#include "internal.h" 18#include "internal.h"
18 19
diff --git a/fs/afs/security.c b/fs/afs/security.c
index e0ea88b63ebf..566fe712c682 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -13,6 +13,7 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/ctype.h> 15#include <linux/ctype.h>
16#include <linux/sched.h>
16#include <keys/rxrpc-type.h> 17#include <keys/rxrpc-type.h>
17#include "internal.h" 18#include "internal.h"
18 19
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 579af632c8e8..2e8496ba1205 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -22,6 +22,7 @@
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/parser.h> 23#include <linux/parser.h>
24#include <linux/statfs.h> 24#include <linux/statfs.h>
25#include <linux/sched.h>
25#include "internal.h" 26#include "internal.h"
26 27
27#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ 28#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
@@ -47,7 +48,6 @@ struct file_system_type afs_fs_type = {
47static const struct super_operations afs_super_ops = { 48static const struct super_operations afs_super_ops = {
48 .statfs = afs_statfs, 49 .statfs = afs_statfs,
49 .alloc_inode = afs_alloc_inode, 50 .alloc_inode = afs_alloc_inode,
50 .drop_inode = generic_delete_inode,
51 .write_inode = afs_write_inode, 51 .write_inode = afs_write_inode,
52 .destroy_inode = afs_destroy_inode, 52 .destroy_inode = afs_destroy_inode,
53 .clear_inode = afs_clear_inode, 53 .clear_inode = afs_clear_inode,
@@ -452,17 +452,15 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
452{ 452{
453 struct afs_vnode *vnode = _vnode; 453 struct afs_vnode *vnode = _vnode;
454 454
455 if (flags & SLAB_CTOR_CONSTRUCTOR) { 455 memset(vnode, 0, sizeof(*vnode));
456 memset(vnode, 0, sizeof(*vnode)); 456 inode_init_once(&vnode->vfs_inode);
457 inode_init_once(&vnode->vfs_inode); 457 init_waitqueue_head(&vnode->update_waitq);
458 init_waitqueue_head(&vnode->update_waitq); 458 mutex_init(&vnode->permits_lock);
459 mutex_init(&vnode->permits_lock); 459 mutex_init(&vnode->validate_lock);
460 mutex_init(&vnode->validate_lock); 460 spin_lock_init(&vnode->writeback_lock);
461 spin_lock_init(&vnode->writeback_lock); 461 spin_lock_init(&vnode->lock);
462 spin_lock_init(&vnode->lock); 462 INIT_LIST_HEAD(&vnode->writebacks);
463 INIT_LIST_HEAD(&vnode->writebacks); 463 INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
464 INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
465 }
466} 464}
467 465
468/* 466/*
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 3370cdb72566..09e3ad0fc7cc 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -12,6 +12,7 @@
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/sched.h>
15#include "internal.h" 16#include "internal.h"
16 17
17unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ 18unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index c36c98ce2c3c..232c55dc245d 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -14,6 +14,7 @@
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/sched.h>
17#include "internal.h" 18#include "internal.h"
18 19
19#if 0 20#if 0
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index dd160cada45d..8bab0e3437f9 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -15,6 +15,7 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/sched.h>
18#include "internal.h" 19#include "internal.h"
19 20
20static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" }; 21static const char *afs_voltypes[] = { "R/W", "R/O", "BAK" };
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 28f37516c126..a03b92a0fe1d 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -206,7 +206,6 @@ int afs_prepare_write(struct file *file, struct page *page,
206 _leave(" = %d [prep]", ret); 206 _leave(" = %d [prep]", ret);
207 return ret; 207 return ret;
208 } 208 }
209 SetPageUptodate(page);
210 } 209 }
211 210
212try_again: 211try_again:
@@ -311,8 +310,8 @@ int afs_commit_write(struct file *file, struct page *page,
311 spin_unlock(&vnode->writeback_lock); 310 spin_unlock(&vnode->writeback_lock);
312 } 311 }
313 312
313 SetPageUptodate(page);
314 set_page_dirty(page); 314 set_page_dirty(page);
315
316 if (PageDirty(page)) 315 if (PageDirty(page))
317 _debug("dirtied"); 316 _debug("dirtied");
318 317
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index fe96108a788d..a5c5171c2828 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -292,10 +292,8 @@ befs_destroy_inode(struct inode *inode)
292static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) 292static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags)
293{ 293{
294 struct befs_inode_info *bi = (struct befs_inode_info *) foo; 294 struct befs_inode_info *bi = (struct befs_inode_info *) foo;
295 295
296 if (flags & SLAB_CTOR_CONSTRUCTOR) { 296 inode_init_once(&bi->vfs_inode);
297 inode_init_once(&bi->vfs_inode);
298 }
299} 297}
300 298
301static void 299static void
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index edc08d89aabc..58c7bd9f5301 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -248,8 +248,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
248{ 248{
249 struct bfs_inode_info *bi = foo; 249 struct bfs_inode_info *bi = foo;
250 250
251 if (flags & SLAB_CTOR_CONSTRUCTOR) 251 inode_init_once(&bi->vfs_inode);
252 inode_init_once(&bi->vfs_inode);
253} 252}
254 253
255static int init_inodecache(void) 254static int init_inodecache(void)
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 72d0b412c376..330fd3fe8546 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -18,7 +18,7 @@
18 18
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/init.h> 20#include <linux/init.h>
21 21#include <linux/sched.h>
22#include <linux/binfmts.h> 22#include <linux/binfmts.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/ctype.h> 24#include <linux/ctype.h>
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 742899240872..ea1480a16f51 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -458,17 +458,15 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
458 struct bdev_inode *ei = (struct bdev_inode *) foo; 458 struct bdev_inode *ei = (struct bdev_inode *) foo;
459 struct block_device *bdev = &ei->bdev; 459 struct block_device *bdev = &ei->bdev;
460 460
461 if (flags & SLAB_CTOR_CONSTRUCTOR) { 461 memset(bdev, 0, sizeof(*bdev));
462 memset(bdev, 0, sizeof(*bdev)); 462 mutex_init(&bdev->bd_mutex);
463 mutex_init(&bdev->bd_mutex); 463 sema_init(&bdev->bd_mount_sem, 1);
464 sema_init(&bdev->bd_mount_sem, 1); 464 INIT_LIST_HEAD(&bdev->bd_inodes);
465 INIT_LIST_HEAD(&bdev->bd_inodes); 465 INIT_LIST_HEAD(&bdev->bd_list);
466 INIT_LIST_HEAD(&bdev->bd_list);
467#ifdef CONFIG_SYSFS 466#ifdef CONFIG_SYSFS
468 INIT_LIST_HEAD(&bdev->bd_holder_list); 467 INIT_LIST_HEAD(&bdev->bd_holder_list);
469#endif 468#endif
470 inode_init_once(&ei->vfs_inode); 469 inode_init_once(&ei->vfs_inode);
471 }
472} 470}
473 471
474static inline void __bd_forget(struct inode *inode) 472static inline void __bd_forget(struct inode *inode)
diff --git a/fs/buffer.c b/fs/buffer.c
index aecd057cd0e0..aa68206bd517 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -981,7 +981,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
981 struct page *page; 981 struct page *page;
982 struct buffer_head *bh; 982 struct buffer_head *bh;
983 983
984 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 984 page = find_or_create_page(inode->i_mapping, index,
985 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
985 if (!page) 986 if (!page)
986 return NULL; 987 return NULL;
987 988
@@ -2100,7 +2101,7 @@ int cont_prepare_write(struct page *page, unsigned offset,
2100 PAGE_CACHE_SIZE, get_block); 2101 PAGE_CACHE_SIZE, get_block);
2101 if (status) 2102 if (status)
2102 goto out_unmap; 2103 goto out_unmap;
2103 zero_user_page(page, zerofrom, PAGE_CACHE_SIZE - zerofrom, 2104 zero_user_page(new_page, zerofrom, PAGE_CACHE_SIZE - zerofrom,
2104 KM_USER0); 2105 KM_USER0);
2105 generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE); 2106 generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE);
2106 unlock_page(new_page); 2107 unlock_page(new_page);
@@ -2898,8 +2899,9 @@ static void recalc_bh_state(void)
2898 2899
2899struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) 2900struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
2900{ 2901{
2901 struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags); 2902 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
2902 if (ret) { 2903 if (ret) {
2904 INIT_LIST_HEAD(&ret->b_assoc_buffers);
2903 get_cpu_var(bh_accounting).nr++; 2905 get_cpu_var(bh_accounting).nr++;
2904 recalc_bh_state(); 2906 recalc_bh_state();
2905 put_cpu_var(bh_accounting); 2907 put_cpu_var(bh_accounting);
@@ -2918,17 +2920,6 @@ void free_buffer_head(struct buffer_head *bh)
2918} 2920}
2919EXPORT_SYMBOL(free_buffer_head); 2921EXPORT_SYMBOL(free_buffer_head);
2920 2922
2921static void
2922init_buffer_head(void *data, struct kmem_cache *cachep, unsigned long flags)
2923{
2924 if (flags & SLAB_CTOR_CONSTRUCTOR) {
2925 struct buffer_head * bh = (struct buffer_head *)data;
2926
2927 memset(bh, 0, sizeof(*bh));
2928 INIT_LIST_HEAD(&bh->b_assoc_buffers);
2929 }
2930}
2931
2932static void buffer_exit_cpu(int cpu) 2923static void buffer_exit_cpu(int cpu)
2933{ 2924{
2934 int i; 2925 int i;
@@ -2955,12 +2946,8 @@ void __init buffer_init(void)
2955{ 2946{
2956 int nrpages; 2947 int nrpages;
2957 2948
2958 bh_cachep = kmem_cache_create("buffer_head", 2949 bh_cachep = KMEM_CACHE(buffer_head,
2959 sizeof(struct buffer_head), 0, 2950 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
2960 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
2961 SLAB_MEM_SPREAD),
2962 init_buffer_head,
2963 NULL);
2964 2951
2965 /* 2952 /*
2966 * Limit the bh occupancy to 10% of ZONE_NORMAL 2953 * Limit the bh occupancy to 10% of ZONE_NORMAL
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8568e100953c..d38c69b591cf 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -701,10 +701,8 @@ cifs_init_once(void *inode, struct kmem_cache * cachep, unsigned long flags)
701{ 701{
702 struct cifsInodeInfo *cifsi = inode; 702 struct cifsInodeInfo *cifsi = inode;
703 703
704 if (flags & SLAB_CTOR_CONSTRUCTOR) { 704 inode_init_once(&cifsi->vfs_inode);
705 inode_init_once(&cifsi->vfs_inode); 705 INIT_LIST_HEAD(&cifsi->lockList);
706 INIT_LIST_HEAD(&cifsi->lockList);
707 }
708} 706}
709 707
710static int 708static int
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 5d0527133266..fcb88fa8d2f2 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -16,6 +16,7 @@
16#include <asm/uaccess.h> 16#include <asm/uaccess.h>
17#include <linux/string.h> 17#include <linux/string.h>
18#include <linux/list.h> 18#include <linux/list.h>
19#include <linux/sched.h>
19 20
20#include <linux/coda.h> 21#include <linux/coda.h>
21#include <linux/coda_linux.h> 22#include <linux/coda_linux.h>
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 0aaff3651d14..dbff1bd4fb96 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -62,8 +62,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
62{ 62{
63 struct coda_inode_info *ei = (struct coda_inode_info *) foo; 63 struct coda_inode_info *ei = (struct coda_inode_info *) foo;
64 64
65 if (flags & SLAB_CTOR_CONSTRUCTOR) 65 inode_init_once(&ei->vfs_inode);
66 inode_init_once(&ei->vfs_inode);
67} 66}
68 67
69int coda_init_inodecache(void) 68int coda_init_inodecache(void)
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index a5b5e631ba61..5faacdb1a479 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -16,7 +16,7 @@
16 16
17#include <asm/system.h> 17#include <asm/system.h>
18#include <linux/signal.h> 18#include <linux/signal.h>
19 19#include <linux/sched.h>
20#include <linux/types.h> 20#include <linux/types.h>
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/mm.h> 22#include <linux/mm.h>
diff --git a/fs/compat.c b/fs/compat.c
index 7b21b0a82596..1de2331db844 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -2230,21 +2230,16 @@ asmlinkage long compat_sys_signalfd(int ufd,
2230asmlinkage long compat_sys_timerfd(int ufd, int clockid, int flags, 2230asmlinkage long compat_sys_timerfd(int ufd, int clockid, int flags,
2231 const struct compat_itimerspec __user *utmr) 2231 const struct compat_itimerspec __user *utmr)
2232{ 2232{
2233 long res;
2234 struct itimerspec t; 2233 struct itimerspec t;
2235 struct itimerspec __user *ut; 2234 struct itimerspec __user *ut;
2236 2235
2237 res = -EFAULT;
2238 if (get_compat_itimerspec(&t, utmr)) 2236 if (get_compat_itimerspec(&t, utmr))
2239 goto err_exit; 2237 return -EFAULT;
2240 ut = compat_alloc_user_space(sizeof(*ut)); 2238 ut = compat_alloc_user_space(sizeof(*ut));
2241 if (copy_to_user(ut, &t, sizeof(t)) ) 2239 if (copy_to_user(ut, &t, sizeof(t)))
2242 goto err_exit; 2240 return -EFAULT;
2243 2241
2244 res = sys_timerfd(ufd, clockid, flags, ut); 2242 return sys_timerfd(ufd, clockid, flags, ut);
2245err_exit:
2246 return res;
2247} 2243}
2248 2244
2249#endif /* CONFIG_TIMERFD */ 2245#endif /* CONFIG_TIMERFD */
2250
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 2ec9beac17cf..ddc003a9d214 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -32,6 +32,7 @@
32#include <linux/namei.h> 32#include <linux/namei.h>
33#include <linux/backing-dev.h> 33#include <linux/backing-dev.h>
34#include <linux/capability.h> 34#include <linux/capability.h>
35#include <linux/sched.h>
35 36
36#include <linux/configfs.h> 37#include <linux/configfs.h>
37#include "configfs_internal.h" 38#include "configfs_internal.h"
diff --git a/fs/dquot.c b/fs/dquot.c
index 3a995841de90..8819d281500c 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1421,7 +1421,7 @@ int vfs_quota_off(struct super_block *sb, int type)
1421 /* If quota was reenabled in the meantime, we have 1421 /* If quota was reenabled in the meantime, we have
1422 * nothing to do */ 1422 * nothing to do */
1423 if (!sb_has_quota_enabled(sb, cnt)) { 1423 if (!sb_has_quota_enabled(sb, cnt)) {
1424 mutex_lock(&toputinode[cnt]->i_mutex); 1424 mutex_lock_nested(&toputinode[cnt]->i_mutex, I_MUTEX_QUOTA);
1425 toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | 1425 toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
1426 S_NOATIME | S_NOQUOTA); 1426 S_NOATIME | S_NOQUOTA);
1427 truncate_inode_pages(&toputinode[cnt]->i_data, 0); 1427 truncate_inode_pages(&toputinode[cnt]->i_data, 0);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 8cbf3f69ebe5..606128f5c927 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -583,8 +583,7 @@ inode_info_init_once(void *vptr, struct kmem_cache *cachep, unsigned long flags)
583{ 583{
584 struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr; 584 struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr;
585 585
586 if (flags & SLAB_CTOR_CONSTRUCTOR) 586 inode_init_once(&ei->vfs_inode);
587 inode_init_once(&ei->vfs_inode);
588} 587}
589 588
590static struct ecryptfs_cache_info { 589static struct ecryptfs_cache_info {
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 3baf253be95a..a9d87c47f72d 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -19,7 +19,7 @@
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 * 02111-1307, USA. 20 * 02111-1307, USA.
21 */ 21 */
22 22#include <linux/sched.h>
23#include "ecryptfs_kernel.h" 23#include "ecryptfs_kernel.h"
24 24
25static LIST_HEAD(ecryptfs_msg_ctx_free_list); 25static LIST_HEAD(ecryptfs_msg_ctx_free_list);
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 0770c4b66f53..88ea6697908f 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -364,18 +364,14 @@ static int fill_zeros_to_end_of_page(struct page *page, unsigned int to)
364{ 364{
365 struct inode *inode = page->mapping->host; 365 struct inode *inode = page->mapping->host;
366 int end_byte_in_page; 366 int end_byte_in_page;
367 char *page_virt;
368 367
369 if ((i_size_read(inode) / PAGE_CACHE_SIZE) != page->index) 368 if ((i_size_read(inode) / PAGE_CACHE_SIZE) != page->index)
370 goto out; 369 goto out;
371 end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE; 370 end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE;
372 if (to > end_byte_in_page) 371 if (to > end_byte_in_page)
373 end_byte_in_page = to; 372 end_byte_in_page = to;
374 page_virt = kmap_atomic(page, KM_USER0); 373 zero_user_page(page, end_byte_in_page,
375 memset((page_virt + end_byte_in_page), 0, 374 PAGE_CACHE_SIZE - end_byte_in_page, KM_USER0);
376 (PAGE_CACHE_SIZE - end_byte_in_page));
377 kunmap_atomic(page_virt, KM_USER0);
378 flush_dcache_page(page);
379out: 375out:
380 return 0; 376 return 0;
381} 377}
@@ -740,7 +736,6 @@ int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
740{ 736{
741 int rc = 0; 737 int rc = 0;
742 struct page *tmp_page; 738 struct page *tmp_page;
743 char *tmp_page_virt;
744 739
745 tmp_page = ecryptfs_get1page(file, index); 740 tmp_page = ecryptfs_get1page(file, index);
746 if (IS_ERR(tmp_page)) { 741 if (IS_ERR(tmp_page)) {
@@ -757,10 +752,7 @@ int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
757 page_cache_release(tmp_page); 752 page_cache_release(tmp_page);
758 goto out; 753 goto out;
759 } 754 }
760 tmp_page_virt = kmap_atomic(tmp_page, KM_USER0); 755 zero_user_page(tmp_page, start, num_zeros, KM_USER0);
761 memset(((char *)tmp_page_virt + start), 0, num_zeros);
762 kunmap_atomic(tmp_page_virt, KM_USER0);
763 flush_dcache_page(tmp_page);
764 rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros); 756 rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros);
765 if (rc < 0) { 757 if (rc < 0) {
766 ecryptfs_printk(KERN_ERR, "Error attempting to write zero's " 758 ecryptfs_printk(KERN_ERR, "Error attempting to write zero's "
diff --git a/fs/efs/super.c b/fs/efs/super.c
index ba7a8b9da0c1..e0a6839e68ae 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -72,8 +72,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
72{ 72{
73 struct efs_inode_info *ei = (struct efs_inode_info *) foo; 73 struct efs_inode_info *ei = (struct efs_inode_info *) foo;
74 74
75 if (flags & SLAB_CTOR_CONSTRUCTOR) 75 inode_init_once(&ei->vfs_inode);
76 inode_init_once(&ei->vfs_inode);
77} 76}
78 77
79static int init_inodecache(void) 78static int init_inodecache(void)
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 480e2b3c4166..2ce19c000d2a 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -17,7 +17,6 @@
17#include <linux/eventfd.h> 17#include <linux/eventfd.h>
18 18
19struct eventfd_ctx { 19struct eventfd_ctx {
20 spinlock_t lock;
21 wait_queue_head_t wqh; 20 wait_queue_head_t wqh;
22 /* 21 /*
23 * Every time that a write(2) is performed on an eventfd, the 22 * Every time that a write(2) is performed on an eventfd, the
@@ -45,13 +44,13 @@ int eventfd_signal(struct file *file, int n)
45 44
46 if (n < 0) 45 if (n < 0)
47 return -EINVAL; 46 return -EINVAL;
48 spin_lock_irqsave(&ctx->lock, flags); 47 spin_lock_irqsave(&ctx->wqh.lock, flags);
49 if (ULLONG_MAX - ctx->count < n) 48 if (ULLONG_MAX - ctx->count < n)
50 n = (int) (ULLONG_MAX - ctx->count); 49 n = (int) (ULLONG_MAX - ctx->count);
51 ctx->count += n; 50 ctx->count += n;
52 if (waitqueue_active(&ctx->wqh)) 51 if (waitqueue_active(&ctx->wqh))
53 wake_up_locked(&ctx->wqh); 52 wake_up_locked(&ctx->wqh);
54 spin_unlock_irqrestore(&ctx->lock, flags); 53 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
55 54
56 return n; 55 return n;
57} 56}
@@ -70,14 +69,14 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
70 69
71 poll_wait(file, &ctx->wqh, wait); 70 poll_wait(file, &ctx->wqh, wait);
72 71
73 spin_lock_irqsave(&ctx->lock, flags); 72 spin_lock_irqsave(&ctx->wqh.lock, flags);
74 if (ctx->count > 0) 73 if (ctx->count > 0)
75 events |= POLLIN; 74 events |= POLLIN;
76 if (ctx->count == ULLONG_MAX) 75 if (ctx->count == ULLONG_MAX)
77 events |= POLLERR; 76 events |= POLLERR;
78 if (ULLONG_MAX - 1 > ctx->count) 77 if (ULLONG_MAX - 1 > ctx->count)
79 events |= POLLOUT; 78 events |= POLLOUT;
80 spin_unlock_irqrestore(&ctx->lock, flags); 79 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
81 80
82 return events; 81 return events;
83} 82}
@@ -92,7 +91,7 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
92 91
93 if (count < sizeof(ucnt)) 92 if (count < sizeof(ucnt))
94 return -EINVAL; 93 return -EINVAL;
95 spin_lock_irq(&ctx->lock); 94 spin_lock_irq(&ctx->wqh.lock);
96 res = -EAGAIN; 95 res = -EAGAIN;
97 ucnt = ctx->count; 96 ucnt = ctx->count;
98 if (ucnt > 0) 97 if (ucnt > 0)
@@ -110,9 +109,9 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
110 res = -ERESTARTSYS; 109 res = -ERESTARTSYS;
111 break; 110 break;
112 } 111 }
113 spin_unlock_irq(&ctx->lock); 112 spin_unlock_irq(&ctx->wqh.lock);
114 schedule(); 113 schedule();
115 spin_lock_irq(&ctx->lock); 114 spin_lock_irq(&ctx->wqh.lock);
116 } 115 }
117 __remove_wait_queue(&ctx->wqh, &wait); 116 __remove_wait_queue(&ctx->wqh, &wait);
118 __set_current_state(TASK_RUNNING); 117 __set_current_state(TASK_RUNNING);
@@ -122,7 +121,7 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
122 if (waitqueue_active(&ctx->wqh)) 121 if (waitqueue_active(&ctx->wqh))
123 wake_up_locked(&ctx->wqh); 122 wake_up_locked(&ctx->wqh);
124 } 123 }
125 spin_unlock_irq(&ctx->lock); 124 spin_unlock_irq(&ctx->wqh.lock);
126 if (res > 0 && put_user(ucnt, (__u64 __user *) buf)) 125 if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
127 return -EFAULT; 126 return -EFAULT;
128 127
@@ -143,7 +142,7 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
143 return -EFAULT; 142 return -EFAULT;
144 if (ucnt == ULLONG_MAX) 143 if (ucnt == ULLONG_MAX)
145 return -EINVAL; 144 return -EINVAL;
146 spin_lock_irq(&ctx->lock); 145 spin_lock_irq(&ctx->wqh.lock);
147 res = -EAGAIN; 146 res = -EAGAIN;
148 if (ULLONG_MAX - ctx->count > ucnt) 147 if (ULLONG_MAX - ctx->count > ucnt)
149 res = sizeof(ucnt); 148 res = sizeof(ucnt);
@@ -159,9 +158,9 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
159 res = -ERESTARTSYS; 158 res = -ERESTARTSYS;
160 break; 159 break;
161 } 160 }
162 spin_unlock_irq(&ctx->lock); 161 spin_unlock_irq(&ctx->wqh.lock);
163 schedule(); 162 schedule();
164 spin_lock_irq(&ctx->lock); 163 spin_lock_irq(&ctx->wqh.lock);
165 } 164 }
166 __remove_wait_queue(&ctx->wqh, &wait); 165 __remove_wait_queue(&ctx->wqh, &wait);
167 __set_current_state(TASK_RUNNING); 166 __set_current_state(TASK_RUNNING);
@@ -171,7 +170,7 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
171 if (waitqueue_active(&ctx->wqh)) 170 if (waitqueue_active(&ctx->wqh))
172 wake_up_locked(&ctx->wqh); 171 wake_up_locked(&ctx->wqh);
173 } 172 }
174 spin_unlock_irq(&ctx->lock); 173 spin_unlock_irq(&ctx->wqh.lock);
175 174
176 return res; 175 return res;
177} 176}
@@ -210,7 +209,6 @@ asmlinkage long sys_eventfd(unsigned int count)
210 return -ENOMEM; 209 return -ENOMEM;
211 210
212 init_waitqueue_head(&ctx->wqh); 211 init_waitqueue_head(&ctx->wqh);
213 spin_lock_init(&ctx->lock);
214 ctx->count = count; 212 ctx->count = count;
215 213
216 /* 214 /*
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1aad34ea61a4..0b73cd45a06d 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * fs/eventpoll.c ( Efficent event polling implementation ) 2 * fs/eventpoll.c (Efficent event polling implementation)
3 * Copyright (C) 2001,...,2006 Davide Libenzi 3 * Copyright (C) 2001,...,2007 Davide Libenzi
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -26,7 +26,6 @@
26#include <linux/hash.h> 26#include <linux/hash.h>
27#include <linux/spinlock.h> 27#include <linux/spinlock.h>
28#include <linux/syscalls.h> 28#include <linux/syscalls.h>
29#include <linux/rwsem.h>
30#include <linux/rbtree.h> 29#include <linux/rbtree.h>
31#include <linux/wait.h> 30#include <linux/wait.h>
32#include <linux/eventpoll.h> 31#include <linux/eventpoll.h>
@@ -39,15 +38,14 @@
39#include <asm/io.h> 38#include <asm/io.h>
40#include <asm/mman.h> 39#include <asm/mman.h>
41#include <asm/atomic.h> 40#include <asm/atomic.h>
42#include <asm/semaphore.h>
43 41
44/* 42/*
45 * LOCKING: 43 * LOCKING:
46 * There are three level of locking required by epoll : 44 * There are three level of locking required by epoll :
47 * 45 *
48 * 1) epmutex (mutex) 46 * 1) epmutex (mutex)
49 * 2) ep->sem (rw_semaphore) 47 * 2) ep->mtx (mutex)
50 * 3) ep->lock (rw_lock) 48 * 3) ep->lock (spinlock)
51 * 49 *
52 * The acquire order is the one listed above, from 1 to 3. 50 * The acquire order is the one listed above, from 1 to 3.
53 * We need a spinlock (ep->lock) because we manipulate objects 51 * We need a spinlock (ep->lock) because we manipulate objects
@@ -57,20 +55,20 @@
57 * a spinlock. During the event transfer loop (from kernel to 55 * a spinlock. During the event transfer loop (from kernel to
58 * user space) we could end up sleeping due a copy_to_user(), so 56 * user space) we could end up sleeping due a copy_to_user(), so
59 * we need a lock that will allow us to sleep. This lock is a 57 * we need a lock that will allow us to sleep. This lock is a
60 * read-write semaphore (ep->sem). It is acquired on read during 58 * mutex (ep->mtx). It is acquired during the event transfer loop,
61 * the event transfer loop and in write during epoll_ctl(EPOLL_CTL_DEL) 59 * during epoll_ctl(EPOLL_CTL_DEL) and during eventpoll_release_file().
62 * and during eventpoll_release_file(). Then we also need a global 60 * Then we also need a global mutex to serialize eventpoll_release_file()
63 * semaphore to serialize eventpoll_release_file() and ep_free(). 61 * and ep_free().
64 * This semaphore is acquired by ep_free() during the epoll file 62 * This mutex is acquired by ep_free() during the epoll file
65 * cleanup path and it is also acquired by eventpoll_release_file() 63 * cleanup path and it is also acquired by eventpoll_release_file()
66 * if a file has been pushed inside an epoll set and it is then 64 * if a file has been pushed inside an epoll set and it is then
67 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). 65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL).
68 * It is possible to drop the "ep->sem" and to use the global 66 * It is possible to drop the "ep->mtx" and to use the global
69 * semaphore "epmutex" (together with "ep->lock") to have it working, 67 * mutex "epmutex" (together with "ep->lock") to have it working,
70 * but having "ep->sem" will make the interface more scalable. 68 * but having "ep->mtx" will make the interface more scalable.
71 * Events that require holding "epmutex" are very rare, while for 69 * Events that require holding "epmutex" are very rare, while for
72 * normal operations the epoll private "ep->sem" will guarantee 70 * normal operations the epoll private "ep->mtx" will guarantee
73 * a greater scalability. 71 * a better scalability.
74 */ 72 */
75 73
76#define DEBUG_EPOLL 0 74#define DEBUG_EPOLL 0
@@ -102,6 +100,8 @@
102 100
103#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 101#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
104 102
103#define EP_UNACTIVE_PTR ((void *) -1L)
104
105struct epoll_filefd { 105struct epoll_filefd {
106 struct file *file; 106 struct file *file;
107 int fd; 107 int fd;
@@ -111,7 +111,7 @@ struct epoll_filefd {
111 * Node that is linked into the "wake_task_list" member of the "struct poll_safewake". 111 * Node that is linked into the "wake_task_list" member of the "struct poll_safewake".
112 * It is used to keep track on all tasks that are currently inside the wake_up() code 112 * It is used to keep track on all tasks that are currently inside the wake_up() code
113 * to 1) short-circuit the one coming from the same task and same wait queue head 113 * to 1) short-circuit the one coming from the same task and same wait queue head
114 * ( loop ) 2) allow a maximum number of epoll descriptors inclusion nesting 114 * (loop) 2) allow a maximum number of epoll descriptors inclusion nesting
115 * 3) let go the ones coming from other tasks. 115 * 3) let go the ones coming from other tasks.
116 */ 116 */
117struct wake_task_node { 117struct wake_task_node {
@@ -130,21 +130,57 @@ struct poll_safewake {
130}; 130};
131 131
132/* 132/*
133 * Each file descriptor added to the eventpoll interface will
134 * have an entry of this type linked to the "rbr" RB tree.
135 */
136struct epitem {
137 /* RB tree node used to link this structure to the eventpoll RB tree */
138 struct rb_node rbn;
139
140 /* List header used to link this structure to the eventpoll ready list */
141 struct list_head rdllink;
142
143 /*
144 * Works together "struct eventpoll"->ovflist in keeping the
145 * single linked chain of items.
146 */
147 struct epitem *next;
148
149 /* The file descriptor information this item refers to */
150 struct epoll_filefd ffd;
151
152 /* Number of active wait queue attached to poll operations */
153 int nwait;
154
155 /* List containing poll wait queues */
156 struct list_head pwqlist;
157
158 /* The "container" of this item */
159 struct eventpoll *ep;
160
161 /* List header used to link this item to the "struct file" items list */
162 struct list_head fllink;
163
164 /* The structure that describe the interested events and the source fd */
165 struct epoll_event event;
166};
167
168/*
133 * This structure is stored inside the "private_data" member of the file 169 * This structure is stored inside the "private_data" member of the file
134 * structure and rapresent the main data sructure for the eventpoll 170 * structure and rapresent the main data sructure for the eventpoll
135 * interface. 171 * interface.
136 */ 172 */
137struct eventpoll { 173struct eventpoll {
138 /* Protect the this structure access */ 174 /* Protect the this structure access */
139 rwlock_t lock; 175 spinlock_t lock;
140 176
141 /* 177 /*
142 * This semaphore is used to ensure that files are not removed 178 * This mutex is used to ensure that files are not removed
143 * while epoll is using them. This is read-held during the event 179 * while epoll is using them. This is held during the event
144 * collection loop and it is write-held during the file cleanup 180 * collection loop, the file cleanup path, the epoll file exit
145 * path, the epoll file exit code and the ctl operations. 181 * code and the ctl operations.
146 */ 182 */
147 struct rw_semaphore sem; 183 struct mutex mtx;
148 184
149 /* Wait queue used by sys_epoll_wait() */ 185 /* Wait queue used by sys_epoll_wait() */
150 wait_queue_head_t wq; 186 wait_queue_head_t wq;
@@ -155,8 +191,15 @@ struct eventpoll {
155 /* List of ready file descriptors */ 191 /* List of ready file descriptors */
156 struct list_head rdllist; 192 struct list_head rdllist;
157 193
158 /* RB-Tree root used to store monitored fd structs */ 194 /* RB tree root used to store monitored fd structs */
159 struct rb_root rbr; 195 struct rb_root rbr;
196
197 /*
198 * This is a single linked list that chains all the "struct epitem" that
199 * happened while transfering ready events to userspace w/out
200 * holding ->lock.
201 */
202 struct epitem *ovflist;
160}; 203};
161 204
162/* Wait structure used by the poll hooks */ 205/* Wait structure used by the poll hooks */
@@ -177,42 +220,6 @@ struct eppoll_entry {
177 wait_queue_head_t *whead; 220 wait_queue_head_t *whead;
178}; 221};
179 222
180/*
181 * Each file descriptor added to the eventpoll interface will
182 * have an entry of this type linked to the "rbr" RB tree.
183 */
184struct epitem {
185 /* RB-Tree node used to link this structure to the eventpoll rb-tree */
186 struct rb_node rbn;
187
188 /* List header used to link this structure to the eventpoll ready list */
189 struct list_head rdllink;
190
191 /* The file descriptor information this item refers to */
192 struct epoll_filefd ffd;
193
194 /* Number of active wait queue attached to poll operations */
195 int nwait;
196
197 /* List containing poll wait queues */
198 struct list_head pwqlist;
199
200 /* The "container" of this item */
201 struct eventpoll *ep;
202
203 /* The structure that describe the interested events and the source fd */
204 struct epoll_event event;
205
206 /*
207 * Used to keep track of the usage count of the structure. This avoids
208 * that the structure will desappear from underneath our processing.
209 */
210 atomic_t usecnt;
211
212 /* List header used to link this item to the "struct file" items list */
213 struct list_head fllink;
214};
215
216/* Wrapper struct used by poll queueing */ 223/* Wrapper struct used by poll queueing */
217struct ep_pqueue { 224struct ep_pqueue {
218 poll_table pt; 225 poll_table pt;
@@ -220,7 +227,7 @@ struct ep_pqueue {
220}; 227};
221 228
222/* 229/*
223 * This semaphore is used to serialize ep_free() and eventpoll_release_file(). 230 * This mutex is used to serialize ep_free() and eventpoll_release_file().
224 */ 231 */
225static struct mutex epmutex; 232static struct mutex epmutex;
226 233
@@ -234,7 +241,7 @@ static struct kmem_cache *epi_cache __read_mostly;
234static struct kmem_cache *pwq_cache __read_mostly; 241static struct kmem_cache *pwq_cache __read_mostly;
235 242
236 243
237/* Setup the structure that is used as key for the rb-tree */ 244/* Setup the structure that is used as key for the RB tree */
238static inline void ep_set_ffd(struct epoll_filefd *ffd, 245static inline void ep_set_ffd(struct epoll_filefd *ffd,
239 struct file *file, int fd) 246 struct file *file, int fd)
240{ 247{
@@ -242,7 +249,7 @@ static inline void ep_set_ffd(struct epoll_filefd *ffd,
242 ffd->fd = fd; 249 ffd->fd = fd;
243} 250}
244 251
245/* Compare rb-tree keys */ 252/* Compare RB tree keys */
246static inline int ep_cmp_ffd(struct epoll_filefd *p1, 253static inline int ep_cmp_ffd(struct epoll_filefd *p1,
247 struct epoll_filefd *p2) 254 struct epoll_filefd *p2)
248{ 255{
@@ -250,20 +257,20 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1,
250 (p1->file < p2->file ? -1 : p1->fd - p2->fd)); 257 (p1->file < p2->file ? -1 : p1->fd - p2->fd));
251} 258}
252 259
253/* Special initialization for the rb-tree node to detect linkage */ 260/* Special initialization for the RB tree node to detect linkage */
254static inline void ep_rb_initnode(struct rb_node *n) 261static inline void ep_rb_initnode(struct rb_node *n)
255{ 262{
256 rb_set_parent(n, n); 263 rb_set_parent(n, n);
257} 264}
258 265
259/* Removes a node from the rb-tree and marks it for a fast is-linked check */ 266/* Removes a node from the RB tree and marks it for a fast is-linked check */
260static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r) 267static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
261{ 268{
262 rb_erase(n, r); 269 rb_erase(n, r);
263 rb_set_parent(n, n); 270 rb_set_parent(n, n);
264} 271}
265 272
266/* Fast check to verify that the item is linked to the main rb-tree */ 273/* Fast check to verify that the item is linked to the main RB tree */
267static inline int ep_rb_linked(struct rb_node *n) 274static inline int ep_rb_linked(struct rb_node *n)
268{ 275{
269 return rb_parent(n) != n; 276 return rb_parent(n) != n;
@@ -381,78 +388,11 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
381} 388}
382 389
383/* 390/*
384 * Unlink the "struct epitem" from all places it might have been hooked up.
385 * This function must be called with write IRQ lock on "ep->lock".
386 */
387static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
388{
389 int error;
390
391 /*
392 * It can happen that this one is called for an item already unlinked.
393 * The check protect us from doing a double unlink ( crash ).
394 */
395 error = -ENOENT;
396 if (!ep_rb_linked(&epi->rbn))
397 goto error_return;
398
399 /*
400 * Clear the event mask for the unlinked item. This will avoid item
401 * notifications to be sent after the unlink operation from inside
402 * the kernel->userspace event transfer loop.
403 */
404 epi->event.events = 0;
405
406 /*
407 * At this point is safe to do the job, unlink the item from our rb-tree.
408 * This operation togheter with the above check closes the door to
409 * double unlinks.
410 */
411 ep_rb_erase(&epi->rbn, &ep->rbr);
412
413 /*
414 * If the item we are going to remove is inside the ready file descriptors
415 * we want to remove it from this list to avoid stale events.
416 */
417 if (ep_is_linked(&epi->rdllink))
418 list_del_init(&epi->rdllink);
419
420 error = 0;
421error_return:
422
423 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n",
424 current, ep, epi->ffd.file, error));
425
426 return error;
427}
428
429/*
430 * Increment the usage count of the "struct epitem" making it sure
431 * that the user will have a valid pointer to reference.
432 */
433static void ep_use_epitem(struct epitem *epi)
434{
435 atomic_inc(&epi->usecnt);
436}
437
438/*
439 * Decrement ( release ) the usage count by signaling that the user
440 * has finished using the structure. It might lead to freeing the
441 * structure itself if the count goes to zero.
442 */
443static void ep_release_epitem(struct epitem *epi)
444{
445 if (atomic_dec_and_test(&epi->usecnt))
446 kmem_cache_free(epi_cache, epi);
447}
448
449/*
450 * Removes a "struct epitem" from the eventpoll RB tree and deallocates 391 * Removes a "struct epitem" from the eventpoll RB tree and deallocates
451 * all the associated resources. 392 * all the associated resources. Must be called with "mtx" held.
452 */ 393 */
453static int ep_remove(struct eventpoll *ep, struct epitem *epi) 394static int ep_remove(struct eventpoll *ep, struct epitem *epi)
454{ 395{
455 int error;
456 unsigned long flags; 396 unsigned long flags;
457 struct file *file = epi->ffd.file; 397 struct file *file = epi->ffd.file;
458 398
@@ -472,26 +412,21 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
472 list_del_init(&epi->fllink); 412 list_del_init(&epi->fllink);
473 spin_unlock(&file->f_ep_lock); 413 spin_unlock(&file->f_ep_lock);
474 414
475 /* We need to acquire the write IRQ lock before calling ep_unlink() */ 415 if (ep_rb_linked(&epi->rbn))
476 write_lock_irqsave(&ep->lock, flags); 416 ep_rb_erase(&epi->rbn, &ep->rbr);
477
478 /* Really unlink the item from the RB tree */
479 error = ep_unlink(ep, epi);
480
481 write_unlock_irqrestore(&ep->lock, flags);
482 417
483 if (error) 418 spin_lock_irqsave(&ep->lock, flags);
484 goto error_return; 419 if (ep_is_linked(&epi->rdllink))
420 list_del_init(&epi->rdllink);
421 spin_unlock_irqrestore(&ep->lock, flags);
485 422
486 /* At this point it is safe to free the eventpoll item */ 423 /* At this point it is safe to free the eventpoll item */
487 ep_release_epitem(epi); 424 kmem_cache_free(epi_cache, epi);
488 425
489 error = 0; 426 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n",
490error_return: 427 current, ep, file));
491 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p) = %d\n",
492 current, ep, file, error));
493 428
494 return error; 429 return 0;
495} 430}
496 431
497static void ep_free(struct eventpoll *ep) 432static void ep_free(struct eventpoll *ep)
@@ -506,7 +441,7 @@ static void ep_free(struct eventpoll *ep)
506 /* 441 /*
507 * We need to lock this because we could be hit by 442 * We need to lock this because we could be hit by
508 * eventpoll_release_file() while we're freeing the "struct eventpoll". 443 * eventpoll_release_file() while we're freeing the "struct eventpoll".
509 * We do not need to hold "ep->sem" here because the epoll file 444 * We do not need to hold "ep->mtx" here because the epoll file
510 * is on the way to be removed and no one has references to it 445 * is on the way to be removed and no one has references to it
511 * anymore. The only hit might come from eventpoll_release_file() but 446 * anymore. The only hit might come from eventpoll_release_file() but
512 * holding "epmutex" is sufficent here. 447 * holding "epmutex" is sufficent here.
@@ -525,7 +460,7 @@ static void ep_free(struct eventpoll *ep)
525 /* 460 /*
526 * Walks through the whole tree by freeing each "struct epitem". At this 461 * Walks through the whole tree by freeing each "struct epitem". At this
527 * point we are sure no poll callbacks will be lingering around, and also by 462 * point we are sure no poll callbacks will be lingering around, and also by
528 * write-holding "sem" we can be sure that no file cleanup code will hit 463 * holding "epmutex" we can be sure that no file cleanup code will hit
529 * us during this operation. So we can avoid the lock on "ep->lock". 464 * us during this operation. So we can avoid the lock on "ep->lock".
530 */ 465 */
531 while ((rbp = rb_first(&ep->rbr)) != 0) { 466 while ((rbp = rb_first(&ep->rbr)) != 0) {
@@ -534,16 +469,16 @@ static void ep_free(struct eventpoll *ep)
534 } 469 }
535 470
536 mutex_unlock(&epmutex); 471 mutex_unlock(&epmutex);
472 mutex_destroy(&ep->mtx);
473 kfree(ep);
537} 474}
538 475
539static int ep_eventpoll_release(struct inode *inode, struct file *file) 476static int ep_eventpoll_release(struct inode *inode, struct file *file)
540{ 477{
541 struct eventpoll *ep = file->private_data; 478 struct eventpoll *ep = file->private_data;
542 479
543 if (ep) { 480 if (ep)
544 ep_free(ep); 481 ep_free(ep);
545 kfree(ep);
546 }
547 482
548 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep)); 483 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep));
549 return 0; 484 return 0;
@@ -559,10 +494,10 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
559 poll_wait(file, &ep->poll_wait, wait); 494 poll_wait(file, &ep->poll_wait, wait);
560 495
561 /* Check our condition */ 496 /* Check our condition */
562 read_lock_irqsave(&ep->lock, flags); 497 spin_lock_irqsave(&ep->lock, flags);
563 if (!list_empty(&ep->rdllist)) 498 if (!list_empty(&ep->rdllist))
564 pollflags = POLLIN | POLLRDNORM; 499 pollflags = POLLIN | POLLRDNORM;
565 read_unlock_irqrestore(&ep->lock, flags); 500 spin_unlock_irqrestore(&ep->lock, flags);
566 501
567 return pollflags; 502 return pollflags;
568} 503}
@@ -594,9 +529,11 @@ void eventpoll_release_file(struct file *file)
594 * We don't want to get "file->f_ep_lock" because it is not 529 * We don't want to get "file->f_ep_lock" because it is not
595 * necessary. It is not necessary because we're in the "struct file" 530 * necessary. It is not necessary because we're in the "struct file"
596 * cleanup path, and this means that noone is using this file anymore. 531 * cleanup path, and this means that noone is using this file anymore.
597 * The only hit might come from ep_free() but by holding the semaphore 532 * So, for example, epoll_ctl() cannot hit here sicne if we reach this
533 * point, the file counter already went to zero and fget() would fail.
534 * The only hit might come from ep_free() but by holding the mutex
598 * will correctly serialize the operation. We do need to acquire 535 * will correctly serialize the operation. We do need to acquire
599 * "ep->sem" after "epmutex" because ep_remove() requires it when called 536 * "ep->mtx" after "epmutex" because ep_remove() requires it when called
600 * from anywhere but ep_free(). 537 * from anywhere but ep_free().
601 */ 538 */
602 mutex_lock(&epmutex); 539 mutex_lock(&epmutex);
@@ -606,9 +543,9 @@ void eventpoll_release_file(struct file *file)
606 543
607 ep = epi->ep; 544 ep = epi->ep;
608 list_del_init(&epi->fllink); 545 list_del_init(&epi->fllink);
609 down_write(&ep->sem); 546 mutex_lock(&ep->mtx);
610 ep_remove(ep, epi); 547 ep_remove(ep, epi);
611 up_write(&ep->sem); 548 mutex_unlock(&ep->mtx);
612 } 549 }
613 550
614 mutex_unlock(&epmutex); 551 mutex_unlock(&epmutex);
@@ -621,12 +558,13 @@ static int ep_alloc(struct eventpoll **pep)
621 if (!ep) 558 if (!ep)
622 return -ENOMEM; 559 return -ENOMEM;
623 560
624 rwlock_init(&ep->lock); 561 spin_lock_init(&ep->lock);
625 init_rwsem(&ep->sem); 562 mutex_init(&ep->mtx);
626 init_waitqueue_head(&ep->wq); 563 init_waitqueue_head(&ep->wq);
627 init_waitqueue_head(&ep->poll_wait); 564 init_waitqueue_head(&ep->poll_wait);
628 INIT_LIST_HEAD(&ep->rdllist); 565 INIT_LIST_HEAD(&ep->rdllist);
629 ep->rbr = RB_ROOT; 566 ep->rbr = RB_ROOT;
567 ep->ovflist = EP_UNACTIVE_PTR;
630 568
631 *pep = ep; 569 *pep = ep;
632 570
@@ -636,20 +574,18 @@ static int ep_alloc(struct eventpoll **pep)
636} 574}
637 575
638/* 576/*
639 * Search the file inside the eventpoll tree. It add usage count to 577 * Search the file inside the eventpoll tree. The RB tree operations
640 * the returned item, so the caller must call ep_release_epitem() 578 * are protected by the "mtx" mutex, and ep_find() must be called with
641 * after finished using the "struct epitem". 579 * "mtx" held.
642 */ 580 */
643static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) 581static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
644{ 582{
645 int kcmp; 583 int kcmp;
646 unsigned long flags;
647 struct rb_node *rbp; 584 struct rb_node *rbp;
648 struct epitem *epi, *epir = NULL; 585 struct epitem *epi, *epir = NULL;
649 struct epoll_filefd ffd; 586 struct epoll_filefd ffd;
650 587
651 ep_set_ffd(&ffd, file, fd); 588 ep_set_ffd(&ffd, file, fd);
652 read_lock_irqsave(&ep->lock, flags);
653 for (rbp = ep->rbr.rb_node; rbp; ) { 589 for (rbp = ep->rbr.rb_node; rbp; ) {
654 epi = rb_entry(rbp, struct epitem, rbn); 590 epi = rb_entry(rbp, struct epitem, rbn);
655 kcmp = ep_cmp_ffd(&ffd, &epi->ffd); 591 kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
@@ -658,12 +594,10 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
658 else if (kcmp < 0) 594 else if (kcmp < 0)
659 rbp = rbp->rb_left; 595 rbp = rbp->rb_left;
660 else { 596 else {
661 ep_use_epitem(epi);
662 epir = epi; 597 epir = epi;
663 break; 598 break;
664 } 599 }
665 } 600 }
666 read_unlock_irqrestore(&ep->lock, flags);
667 601
668 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n", 602 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n",
669 current, file, epir)); 603 current, file, epir));
@@ -686,7 +620,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
686 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", 620 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
687 current, epi->ffd.file, epi, ep)); 621 current, epi->ffd.file, epi, ep));
688 622
689 write_lock_irqsave(&ep->lock, flags); 623 spin_lock_irqsave(&ep->lock, flags);
690 624
691 /* 625 /*
692 * If the event mask does not contain any poll(2) event, we consider the 626 * If the event mask does not contain any poll(2) event, we consider the
@@ -695,7 +629,21 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
695 * until the next EPOLL_CTL_MOD will be issued. 629 * until the next EPOLL_CTL_MOD will be issued.
696 */ 630 */
697 if (!(epi->event.events & ~EP_PRIVATE_BITS)) 631 if (!(epi->event.events & ~EP_PRIVATE_BITS))
698 goto is_disabled; 632 goto out_unlock;
633
634 /*
635 * If we are trasfering events to userspace, we can hold no locks
636 * (because we're accessing user memory, and because of linux f_op->poll()
637 * semantics). All the events that happens during that period of time are
638 * chained in ep->ovflist and requeued later on.
639 */
640 if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
641 if (epi->next == EP_UNACTIVE_PTR) {
642 epi->next = ep->ovflist;
643 ep->ovflist = epi;
644 }
645 goto out_unlock;
646 }
699 647
700 /* If this file is already in the ready list we exit soon */ 648 /* If this file is already in the ready list we exit soon */
701 if (ep_is_linked(&epi->rdllink)) 649 if (ep_is_linked(&epi->rdllink))
@@ -714,8 +662,8 @@ is_linked:
714 if (waitqueue_active(&ep->poll_wait)) 662 if (waitqueue_active(&ep->poll_wait))
715 pwake++; 663 pwake++;
716 664
717is_disabled: 665out_unlock:
718 write_unlock_irqrestore(&ep->lock, flags); 666 spin_unlock_irqrestore(&ep->lock, flags);
719 667
720 /* We have to call this outside the lock */ 668 /* We have to call this outside the lock */
721 if (pwake) 669 if (pwake)
@@ -766,6 +714,9 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
766 rb_insert_color(&epi->rbn, &ep->rbr); 714 rb_insert_color(&epi->rbn, &ep->rbr);
767} 715}
768 716
717/*
718 * Must be called with "mtx" held.
719 */
769static int ep_insert(struct eventpoll *ep, struct epoll_event *event, 720static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
770 struct file *tfile, int fd) 721 struct file *tfile, int fd)
771{ 722{
@@ -786,8 +737,8 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
786 epi->ep = ep; 737 epi->ep = ep;
787 ep_set_ffd(&epi->ffd, tfile, fd); 738 ep_set_ffd(&epi->ffd, tfile, fd);
788 epi->event = *event; 739 epi->event = *event;
789 atomic_set(&epi->usecnt, 1);
790 epi->nwait = 0; 740 epi->nwait = 0;
741 epi->next = EP_UNACTIVE_PTR;
791 742
792 /* Initialize the poll table using the queue callback */ 743 /* Initialize the poll table using the queue callback */
793 epq.epi = epi; 744 epq.epi = epi;
@@ -796,7 +747,9 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
796 /* 747 /*
797 * Attach the item to the poll hooks and get current event bits. 748 * Attach the item to the poll hooks and get current event bits.
798 * We can safely use the file* here because its usage count has 749 * We can safely use the file* here because its usage count has
799 * been increased by the caller of this function. 750 * been increased by the caller of this function. Note that after
751 * this operation completes, the poll callback can start hitting
752 * the new item.
800 */ 753 */
801 revents = tfile->f_op->poll(tfile, &epq.pt); 754 revents = tfile->f_op->poll(tfile, &epq.pt);
802 755
@@ -813,12 +766,15 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
813 list_add_tail(&epi->fllink, &tfile->f_ep_links); 766 list_add_tail(&epi->fllink, &tfile->f_ep_links);
814 spin_unlock(&tfile->f_ep_lock); 767 spin_unlock(&tfile->f_ep_lock);
815 768
816 /* We have to drop the new item inside our item list to keep track of it */ 769 /*
817 write_lock_irqsave(&ep->lock, flags); 770 * Add the current item to the RB tree. All RB tree operations are
818 771 * protected by "mtx", and ep_insert() is called with "mtx" held.
819 /* Add the current item to the rb-tree */ 772 */
820 ep_rbtree_insert(ep, epi); 773 ep_rbtree_insert(ep, epi);
821 774
775 /* We have to drop the new item inside our item list to keep track of it */
776 spin_lock_irqsave(&ep->lock, flags);
777
822 /* If the file is already "ready" we drop it inside the ready list */ 778 /* If the file is already "ready" we drop it inside the ready list */
823 if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) { 779 if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
824 list_add_tail(&epi->rdllink, &ep->rdllist); 780 list_add_tail(&epi->rdllink, &ep->rdllist);
@@ -830,7 +786,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
830 pwake++; 786 pwake++;
831 } 787 }
832 788
833 write_unlock_irqrestore(&ep->lock, flags); 789 spin_unlock_irqrestore(&ep->lock, flags);
834 790
835 /* We have to call this outside the lock */ 791 /* We have to call this outside the lock */
836 if (pwake) 792 if (pwake)
@@ -846,12 +802,14 @@ error_unregister:
846 802
847 /* 803 /*
848 * We need to do this because an event could have been arrived on some 804 * We need to do this because an event could have been arrived on some
849 * allocated wait queue. 805 * allocated wait queue. Note that we don't care about the ep->ovflist
806 * list, since that is used/cleaned only inside a section bound by "mtx".
807 * And ep_insert() is called with "mtx" held.
850 */ 808 */
851 write_lock_irqsave(&ep->lock, flags); 809 spin_lock_irqsave(&ep->lock, flags);
852 if (ep_is_linked(&epi->rdllink)) 810 if (ep_is_linked(&epi->rdllink))
853 list_del_init(&epi->rdllink); 811 list_del_init(&epi->rdllink);
854 write_unlock_irqrestore(&ep->lock, flags); 812 spin_unlock_irqrestore(&ep->lock, flags);
855 813
856 kmem_cache_free(epi_cache, epi); 814 kmem_cache_free(epi_cache, epi);
857error_return: 815error_return:
@@ -860,7 +818,7 @@ error_return:
860 818
861/* 819/*
862 * Modify the interest event mask by dropping an event if the new mask 820 * Modify the interest event mask by dropping an event if the new mask
863 * has a match in the current file status. 821 * has a match in the current file status. Must be called with "mtx" held.
864 */ 822 */
865static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event) 823static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_event *event)
866{ 824{
@@ -882,36 +840,28 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
882 */ 840 */
883 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); 841 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL);
884 842
885 write_lock_irqsave(&ep->lock, flags); 843 spin_lock_irqsave(&ep->lock, flags);
886 844
887 /* Copy the data member from inside the lock */ 845 /* Copy the data member from inside the lock */
888 epi->event.data = event->data; 846 epi->event.data = event->data;
889 847
890 /* 848 /*
891 * If the item is not linked to the RB tree it means that it's on its 849 * If the item is "hot" and it is not registered inside the ready
892 * way toward the removal. Do nothing in this case. 850 * list, push it inside.
893 */ 851 */
894 if (ep_rb_linked(&epi->rbn)) { 852 if (revents & event->events) {
895 /* 853 if (!ep_is_linked(&epi->rdllink)) {
896 * If the item is "hot" and it is not registered inside the ready 854 list_add_tail(&epi->rdllink, &ep->rdllist);
897 * list, push it inside. If the item is not "hot" and it is currently 855
898 * registered inside the ready list, unlink it. 856 /* Notify waiting tasks that events are available */
899 */ 857 if (waitqueue_active(&ep->wq))
900 if (revents & event->events) { 858 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
901 if (!ep_is_linked(&epi->rdllink)) { 859 TASK_INTERRUPTIBLE);
902 list_add_tail(&epi->rdllink, &ep->rdllist); 860 if (waitqueue_active(&ep->poll_wait))
903 861 pwake++;
904 /* Notify waiting tasks that events are available */
905 if (waitqueue_active(&ep->wq))
906 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
907 TASK_INTERRUPTIBLE);
908 if (waitqueue_active(&ep->poll_wait))
909 pwake++;
910 }
911 } 862 }
912 } 863 }
913 864 spin_unlock_irqrestore(&ep->lock, flags);
914 write_unlock_irqrestore(&ep->lock, flags);
915 865
916 /* We have to call this outside the lock */ 866 /* We have to call this outside the lock */
917 if (pwake) 867 if (pwake)
@@ -920,36 +870,50 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
920 return 0; 870 return 0;
921} 871}
922 872
923/* 873static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events,
924 * This function is called without holding the "ep->lock" since the call to 874 int maxevents)
925 * __copy_to_user() might sleep, and also f_op->poll() might reenable the IRQ
926 * because of the way poll() is traditionally implemented in Linux.
927 */
928static int ep_send_events(struct eventpoll *ep, struct list_head *txlist,
929 struct epoll_event __user *events, int maxevents)
930{ 875{
931 int eventcnt, error = -EFAULT, pwake = 0; 876 int eventcnt, error = -EFAULT, pwake = 0;
932 unsigned int revents; 877 unsigned int revents;
933 unsigned long flags; 878 unsigned long flags;
934 struct epitem *epi; 879 struct epitem *epi, *nepi;
935 struct list_head injlist; 880 struct list_head txlist;
881
882 INIT_LIST_HEAD(&txlist);
883
884 /*
885 * We need to lock this because we could be hit by
886 * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL).
887 */
888 mutex_lock(&ep->mtx);
936 889
937 INIT_LIST_HEAD(&injlist); 890 /*
891 * Steal the ready list, and re-init the original one to the
892 * empty list. Also, set ep->ovflist to NULL so that events
893 * happening while looping w/out locks, are not lost. We cannot
894 * have the poll callback to queue directly on ep->rdllist,
895 * because we are doing it in the loop below, in a lockless way.
896 */
897 spin_lock_irqsave(&ep->lock, flags);
898 list_splice(&ep->rdllist, &txlist);
899 INIT_LIST_HEAD(&ep->rdllist);
900 ep->ovflist = NULL;
901 spin_unlock_irqrestore(&ep->lock, flags);
938 902
939 /* 903 /*
940 * We can loop without lock because this is a task private list. 904 * We can loop without lock because this is a task private list.
941 * We just splice'd out the ep->rdllist in ep_collect_ready_items(). 905 * We just splice'd out the ep->rdllist in ep_collect_ready_items().
942 * Items cannot vanish during the loop because we are holding "sem" in 906 * Items cannot vanish during the loop because we are holding "mtx".
943 * read.
944 */ 907 */
945 for (eventcnt = 0; !list_empty(txlist) && eventcnt < maxevents;) { 908 for (eventcnt = 0; !list_empty(&txlist) && eventcnt < maxevents;) {
946 epi = list_first_entry(txlist, struct epitem, rdllink); 909 epi = list_first_entry(&txlist, struct epitem, rdllink);
947 prefetch(epi->rdllink.next); 910
911 list_del_init(&epi->rdllink);
948 912
949 /* 913 /*
950 * Get the ready file event set. We can safely use the file 914 * Get the ready file event set. We can safely use the file
951 * because we are holding the "sem" in read and this will 915 * because we are holding the "mtx" and this will guarantee
952 * guarantee that both the file and the item will not vanish. 916 * that both the file and the item will not vanish.
953 */ 917 */
954 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL); 918 revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL);
955 revents &= epi->event.events; 919 revents &= epi->event.events;
@@ -957,8 +921,8 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist,
957 /* 921 /*
958 * Is the event mask intersect the caller-requested one, 922 * Is the event mask intersect the caller-requested one,
959 * deliver the event to userspace. Again, we are holding 923 * deliver the event to userspace. Again, we are holding
960 * "sem" in read, so no operations coming from userspace 924 * "mtx", so no operations coming from userspace can change
961 * can change the item. 925 * the item.
962 */ 926 */
963 if (revents) { 927 if (revents) {
964 if (__put_user(revents, 928 if (__put_user(revents,
@@ -970,59 +934,59 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist,
970 epi->event.events &= EP_PRIVATE_BITS; 934 epi->event.events &= EP_PRIVATE_BITS;
971 eventcnt++; 935 eventcnt++;
972 } 936 }
973
974 /* 937 /*
975 * This is tricky. We are holding the "sem" in read, and this 938 * At this point, noone can insert into ep->rdllist besides
976 * means that the operations that can change the "linked" status 939 * us. The epoll_ctl() callers are locked out by us holding
977 * of the epoll item (epi->rbn and epi->rdllink), cannot touch 940 * "mtx" and the poll callback will queue them in ep->ovflist.
978 * them. Also, since we are "linked" from a epi->rdllink POV
979 * (the item is linked to our transmission list we just
980 * spliced), the ep_poll_callback() cannot touch us either,
981 * because of the check present in there. Another parallel
982 * epoll_wait() will not get the same result set, since we
983 * spliced the ready list before. Note that list_del() still
984 * shows the item as linked to the test in ep_poll_callback().
985 */ 941 */
986 list_del(&epi->rdllink);
987 if (!(epi->event.events & EPOLLET) && 942 if (!(epi->event.events & EPOLLET) &&
988 (revents & epi->event.events)) 943 (revents & epi->event.events))
989 list_add_tail(&epi->rdllink, &injlist); 944 list_add_tail(&epi->rdllink, &ep->rdllist);
990 else {
991 /*
992 * Be sure the item is totally detached before re-init
993 * the list_head. After INIT_LIST_HEAD() is committed,
994 * the ep_poll_callback() can requeue the item again,
995 * but we don't care since we are already past it.
996 */
997 smp_mb();
998 INIT_LIST_HEAD(&epi->rdllink);
999 }
1000 } 945 }
1001 error = 0; 946 error = 0;
1002 947
1003 errxit: 948errxit:
1004 949
950 spin_lock_irqsave(&ep->lock, flags);
1005 /* 951 /*
1006 * If the re-injection list or the txlist are not empty, re-splice 952 * During the time we spent in the loop above, some other events
1007 * them to the ready list and do proper wakeups. 953 * might have been queued by the poll callback. We re-insert them
954 * here (in case they are not already queued, or they're one-shot).
1008 */ 955 */
1009 if (!list_empty(&injlist) || !list_empty(txlist)) { 956 for (nepi = ep->ovflist; (epi = nepi) != NULL;
1010 write_lock_irqsave(&ep->lock, flags); 957 nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
958 if (!ep_is_linked(&epi->rdllink) &&
959 (epi->event.events & ~EP_PRIVATE_BITS))
960 list_add_tail(&epi->rdllink, &ep->rdllist);
961 }
962 /*
963 * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
964 * releasing the lock, events will be queued in the normal way inside
965 * ep->rdllist.
966 */
967 ep->ovflist = EP_UNACTIVE_PTR;
968
969 /*
970 * In case of error in the event-send loop, or in case the number of
971 * ready events exceeds the userspace limit, we need to splice the
972 * "txlist" back inside ep->rdllist.
973 */
974 list_splice(&txlist, &ep->rdllist);
1011 975
1012 list_splice(txlist, &ep->rdllist); 976 if (!list_empty(&ep->rdllist)) {
1013 list_splice(&injlist, &ep->rdllist);
1014 /* 977 /*
1015 * Wake up ( if active ) both the eventpoll wait list and the ->poll() 978 * Wake up (if active) both the eventpoll wait list and the ->poll()
1016 * wait list. 979 * wait list (delayed after we release the lock).
1017 */ 980 */
1018 if (waitqueue_active(&ep->wq)) 981 if (waitqueue_active(&ep->wq))
1019 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE | 982 __wake_up_locked(&ep->wq, TASK_UNINTERRUPTIBLE |
1020 TASK_INTERRUPTIBLE); 983 TASK_INTERRUPTIBLE);
1021 if (waitqueue_active(&ep->poll_wait)) 984 if (waitqueue_active(&ep->poll_wait))
1022 pwake++; 985 pwake++;
1023
1024 write_unlock_irqrestore(&ep->lock, flags);
1025 } 986 }
987 spin_unlock_irqrestore(&ep->lock, flags);
988
989 mutex_unlock(&ep->mtx);
1026 990
1027 /* We have to call this outside the lock */ 991 /* We have to call this outside the lock */
1028 if (pwake) 992 if (pwake)
@@ -1031,41 +995,6 @@ static int ep_send_events(struct eventpoll *ep, struct list_head *txlist,
1031 return eventcnt == 0 ? error: eventcnt; 995 return eventcnt == 0 ? error: eventcnt;
1032} 996}
1033 997
1034/*
1035 * Perform the transfer of events to user space.
1036 */
1037static int ep_events_transfer(struct eventpoll *ep,
1038 struct epoll_event __user *events, int maxevents)
1039{
1040 int eventcnt;
1041 unsigned long flags;
1042 struct list_head txlist;
1043
1044 INIT_LIST_HEAD(&txlist);
1045
1046 /*
1047 * We need to lock this because we could be hit by
1048 * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL).
1049 */
1050 down_read(&ep->sem);
1051
1052 /*
1053 * Steal the ready list, and re-init the original one to the
1054 * empty list.
1055 */
1056 write_lock_irqsave(&ep->lock, flags);
1057 list_splice(&ep->rdllist, &txlist);
1058 INIT_LIST_HEAD(&ep->rdllist);
1059 write_unlock_irqrestore(&ep->lock, flags);
1060
1061 /* Build result set in userspace */
1062 eventcnt = ep_send_events(ep, &txlist, events, maxevents);
1063
1064 up_read(&ep->sem);
1065
1066 return eventcnt;
1067}
1068
1069static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, 998static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1070 int maxevents, long timeout) 999 int maxevents, long timeout)
1071{ 1000{
@@ -1083,7 +1012,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1083 MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; 1012 MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
1084 1013
1085retry: 1014retry:
1086 write_lock_irqsave(&ep->lock, flags); 1015 spin_lock_irqsave(&ep->lock, flags);
1087 1016
1088 res = 0; 1017 res = 0;
1089 if (list_empty(&ep->rdllist)) { 1018 if (list_empty(&ep->rdllist)) {
@@ -1093,6 +1022,7 @@ retry:
1093 * ep_poll_callback() when events will become available. 1022 * ep_poll_callback() when events will become available.
1094 */ 1023 */
1095 init_waitqueue_entry(&wait, current); 1024 init_waitqueue_entry(&wait, current);
1025 wait.flags |= WQ_FLAG_EXCLUSIVE;
1096 __add_wait_queue(&ep->wq, &wait); 1026 __add_wait_queue(&ep->wq, &wait);
1097 1027
1098 for (;;) { 1028 for (;;) {
@@ -1109,9 +1039,9 @@ retry:
1109 break; 1039 break;
1110 } 1040 }
1111 1041
1112 write_unlock_irqrestore(&ep->lock, flags); 1042 spin_unlock_irqrestore(&ep->lock, flags);
1113 jtimeout = schedule_timeout(jtimeout); 1043 jtimeout = schedule_timeout(jtimeout);
1114 write_lock_irqsave(&ep->lock, flags); 1044 spin_lock_irqsave(&ep->lock, flags);
1115 } 1045 }
1116 __remove_wait_queue(&ep->wq, &wait); 1046 __remove_wait_queue(&ep->wq, &wait);
1117 1047
@@ -1121,7 +1051,7 @@ retry:
1121 /* Is it worth to try to dig for events ? */ 1051 /* Is it worth to try to dig for events ? */
1122 eavail = !list_empty(&ep->rdllist); 1052 eavail = !list_empty(&ep->rdllist);
1123 1053
1124 write_unlock_irqrestore(&ep->lock, flags); 1054 spin_unlock_irqrestore(&ep->lock, flags);
1125 1055
1126 /* 1056 /*
1127 * Try to transfer events to user space. In case we get 0 events and 1057 * Try to transfer events to user space. In case we get 0 events and
@@ -1129,18 +1059,17 @@ retry:
1129 * more luck. 1059 * more luck.
1130 */ 1060 */
1131 if (!res && eavail && 1061 if (!res && eavail &&
1132 !(res = ep_events_transfer(ep, events, maxevents)) && jtimeout) 1062 !(res = ep_send_events(ep, events, maxevents)) && jtimeout)
1133 goto retry; 1063 goto retry;
1134 1064
1135 return res; 1065 return res;
1136} 1066}
1137 1067
1138/* 1068/*
1139 * It opens an eventpoll file descriptor by suggesting a storage of "size" 1069 * It opens an eventpoll file descriptor. The "size" parameter is there
1140 * file descriptors. The size parameter is just an hint about how to size 1070 * for historical reasons, when epoll was using an hash instead of an
1141 * data structures. It won't prevent the user to store more than "size" 1071 * RB tree. With the current implementation, the "size" parameter is ignored
1142 * file descriptors inside the epoll interface. It is the kernel part of 1072 * (besides sanity checks).
1143 * the userspace epoll_create(2).
1144 */ 1073 */
1145asmlinkage long sys_epoll_create(int size) 1074asmlinkage long sys_epoll_create(int size)
1146{ 1075{
@@ -1176,7 +1105,6 @@ asmlinkage long sys_epoll_create(int size)
1176 1105
1177error_free: 1106error_free:
1178 ep_free(ep); 1107 ep_free(ep);
1179 kfree(ep);
1180error_return: 1108error_return:
1181 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1109 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1182 current, size, error)); 1110 current, size, error));
@@ -1186,8 +1114,7 @@ error_return:
1186/* 1114/*
1187 * The following function implements the controller interface for 1115 * The following function implements the controller interface for
1188 * the eventpoll file that enables the insertion/removal/change of 1116 * the eventpoll file that enables the insertion/removal/change of
1189 * file descriptors inside the interest set. It represents 1117 * file descriptors inside the interest set.
1190 * the kernel part of the user space epoll_ctl(2).
1191 */ 1118 */
1192asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, 1119asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
1193 struct epoll_event __user *event) 1120 struct epoll_event __user *event)
@@ -1237,9 +1164,13 @@ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
1237 */ 1164 */
1238 ep = file->private_data; 1165 ep = file->private_data;
1239 1166
1240 down_write(&ep->sem); 1167 mutex_lock(&ep->mtx);
1241 1168
1242 /* Try to lookup the file inside our RB tree */ 1169 /*
1170 * Try to lookup the file inside our RB tree, Since we grabbed "mtx"
1171 * above, we can be sure to be able to use the item looked up by
1172 * ep_find() till we release the mutex.
1173 */
1243 epi = ep_find(ep, tfile, fd); 1174 epi = ep_find(ep, tfile, fd);
1244 1175
1245 error = -EINVAL; 1176 error = -EINVAL;
@@ -1266,13 +1197,7 @@ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
1266 error = -ENOENT; 1197 error = -ENOENT;
1267 break; 1198 break;
1268 } 1199 }
1269 /* 1200 mutex_unlock(&ep->mtx);
1270 * The function ep_find() increments the usage count of the structure
1271 * so, if this is not NULL, we need to release it.
1272 */
1273 if (epi)
1274 ep_release_epitem(epi);
1275 up_write(&ep->sem);
1276 1201
1277error_tgt_fput: 1202error_tgt_fput:
1278 fput(tfile); 1203 fput(tfile);
@@ -1378,7 +1303,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
1378 if (sigmask) { 1303 if (sigmask) {
1379 if (error == -EINTR) { 1304 if (error == -EINTR) {
1380 memcpy(&current->saved_sigmask, &sigsaved, 1305 memcpy(&current->saved_sigmask, &sigsaved,
1381 sizeof(sigsaved)); 1306 sizeof(sigsaved));
1382 set_thread_flag(TIF_RESTORE_SIGMASK); 1307 set_thread_flag(TIF_RESTORE_SIGMASK);
1383 } else 1308 } else
1384 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1309 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
diff --git a/fs/exec.c b/fs/exec.c
index 70fa36554c14..0b685888ff6f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -60,7 +60,7 @@
60#endif 60#endif
61 61
62int core_uses_pid; 62int core_uses_pid;
63char core_pattern[128] = "core"; 63char core_pattern[CORENAME_MAX_SIZE] = "core";
64int suid_dumpable = 0; 64int suid_dumpable = 0;
65 65
66EXPORT_SYMBOL(suid_dumpable); 66EXPORT_SYMBOL(suid_dumpable);
@@ -1264,8 +1264,6 @@ int set_binfmt(struct linux_binfmt *new)
1264 1264
1265EXPORT_SYMBOL(set_binfmt); 1265EXPORT_SYMBOL(set_binfmt);
1266 1266
1267#define CORENAME_MAX_SIZE 64
1268
1269/* format_corename will inspect the pattern parameter, and output a 1267/* format_corename will inspect the pattern parameter, and output a
1270 * name into corename, which must have space for at least 1268 * name into corename, which must have space for at least
1271 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 1269 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 685a1c287177..16337bff0272 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -160,13 +160,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
160{ 160{
161 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; 161 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
162 162
163 if (flags & SLAB_CTOR_CONSTRUCTOR) { 163 rwlock_init(&ei->i_meta_lock);
164 rwlock_init(&ei->i_meta_lock);
165#ifdef CONFIG_EXT2_FS_XATTR 164#ifdef CONFIG_EXT2_FS_XATTR
166 init_rwsem(&ei->xattr_sem); 165 init_rwsem(&ei->xattr_sem);
167#endif 166#endif
168 inode_init_once(&ei->vfs_inode); 167 inode_init_once(&ei->vfs_inode);
169 }
170} 168}
171 169
172static int init_inodecache(void) 170static int init_inodecache(void)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 54d3c9041259..6e3062913a92 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -466,14 +466,12 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
466{ 466{
467 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; 467 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
468 468
469 if (flags & SLAB_CTOR_CONSTRUCTOR) { 469 INIT_LIST_HEAD(&ei->i_orphan);
470 INIT_LIST_HEAD(&ei->i_orphan);
471#ifdef CONFIG_EXT3_FS_XATTR 470#ifdef CONFIG_EXT3_FS_XATTR
472 init_rwsem(&ei->xattr_sem); 471 init_rwsem(&ei->xattr_sem);
473#endif 472#endif
474 mutex_init(&ei->truncate_mutex); 473 mutex_init(&ei->truncate_mutex);
475 inode_init_once(&ei->vfs_inode); 474 inode_init_once(&ei->vfs_inode);
476 }
477} 475}
478 476
479static int init_inodecache(void) 477static int init_inodecache(void)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 719126932354..cb9afdd0e26e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -517,14 +517,12 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
517{ 517{
518 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 518 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
519 519
520 if (flags & SLAB_CTOR_CONSTRUCTOR) { 520 INIT_LIST_HEAD(&ei->i_orphan);
521 INIT_LIST_HEAD(&ei->i_orphan);
522#ifdef CONFIG_EXT4DEV_FS_XATTR 521#ifdef CONFIG_EXT4DEV_FS_XATTR
523 init_rwsem(&ei->xattr_sem); 522 init_rwsem(&ei->xattr_sem);
524#endif 523#endif
525 mutex_init(&ei->truncate_mutex); 524 mutex_init(&ei->truncate_mutex);
526 inode_init_once(&ei->vfs_inode); 525 inode_init_once(&ei->vfs_inode);
527 }
528} 526}
529 527
530static int init_inodecache(void) 528static int init_inodecache(void)
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 1959143c1d27..3c9c8a15ec73 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -40,8 +40,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
40{ 40{
41 struct fat_cache *cache = (struct fat_cache *)foo; 41 struct fat_cache *cache = (struct fat_cache *)foo;
42 42
43 if (flags & SLAB_CTOR_CONSTRUCTOR) 43 INIT_LIST_HEAD(&cache->cache_list);
44 INIT_LIST_HEAD(&cache->cache_list);
45} 44}
46 45
47int __init fat_cache_init(void) 46int __init fat_cache_init(void)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 2c55e8dce793..479722d89667 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -500,14 +500,12 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
500{ 500{
501 struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; 501 struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
502 502
503 if (flags & SLAB_CTOR_CONSTRUCTOR) { 503 spin_lock_init(&ei->cache_lru_lock);
504 spin_lock_init(&ei->cache_lru_lock); 504 ei->nr_caches = 0;
505 ei->nr_caches = 0; 505 ei->cache_valid_id = FAT_CACHE_VALID + 1;
506 ei->cache_valid_id = FAT_CACHE_VALID + 1; 506 INIT_LIST_HEAD(&ei->cache_lru);
507 INIT_LIST_HEAD(&ei->cache_lru); 507 INIT_HLIST_NODE(&ei->i_fat_hash);
508 INIT_HLIST_NODE(&ei->i_fat_hash); 508 inode_init_once(&ei->vfs_inode);
509 inode_init_once(&ei->vfs_inode);
510 }
511} 509}
512 510
513static int __init fat_init_inodecache(void) 511static int __init fat_init_inodecache(void)
diff --git a/fs/fifo.c b/fs/fifo.c
index 6e7df7256782..9785e36f81e7 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -12,6 +12,7 @@
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/sched.h>
15#include <linux/pipe_fs_i.h> 16#include <linux/pipe_fs_i.h>
16 17
17static void wait_for_partner(struct inode* inode, unsigned int *cnt) 18static void wait_for_partner(struct inode* inode, unsigned int *cnt)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index acfad65a6e8e..d0ed60bc3188 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -11,6 +11,7 @@
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/sched.h>
14 15
15static const struct file_operations fuse_direct_io_file_operations; 16static const struct file_operations fuse_direct_io_file_operations;
16 17
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1397018ff476..78f7a1dc90dd 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -17,6 +17,7 @@
17#include <linux/parser.h> 17#include <linux/parser.h>
18#include <linux/statfs.h> 18#include <linux/statfs.h>
19#include <linux/random.h> 19#include <linux/random.h>
20#include <linux/sched.h>
20 21
21MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 22MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
22MODULE_DESCRIPTION("Filesystem in Userspace"); 23MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -687,8 +688,7 @@ static void fuse_inode_init_once(void *foo, struct kmem_cache *cachep,
687{ 688{
688 struct inode * inode = foo; 689 struct inode * inode = foo;
689 690
690 if (flags & SLAB_CTOR_CONSTRUCTOR) 691 inode_init_once(inode);
691 inode_init_once(inode);
692} 692}
693 693
694static int __init fuse_fs_init(void) 694static int __init fuse_fs_init(void)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 11477ca3a3c0..b3e152db70c8 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -10,6 +10,7 @@
10#ifndef __GLOCK_DOT_H__ 10#ifndef __GLOCK_DOT_H__
11#define __GLOCK_DOT_H__ 11#define __GLOCK_DOT_H__
12 12
13#include <linux/sched.h>
13#include "incore.h" 14#include "incore.h"
14 15
15/* Flags for lock requests; used in gfs2_holder gh_flag field. 16/* Flags for lock requests; used in gfs2_holder gh_flag field.
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index e460487c0557..787a0edef100 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -27,29 +27,27 @@
27static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned long flags) 27static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
28{ 28{
29 struct gfs2_inode *ip = foo; 29 struct gfs2_inode *ip = foo;
30 if (flags & SLAB_CTOR_CONSTRUCTOR) { 30
31 inode_init_once(&ip->i_inode); 31 inode_init_once(&ip->i_inode);
32 spin_lock_init(&ip->i_spin); 32 spin_lock_init(&ip->i_spin);
33 init_rwsem(&ip->i_rw_mutex); 33 init_rwsem(&ip->i_rw_mutex);
34 memset(ip->i_cache, 0, sizeof(ip->i_cache)); 34 memset(ip->i_cache, 0, sizeof(ip->i_cache));
35 }
36} 35}
37 36
38static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned long flags) 37static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
39{ 38{
40 struct gfs2_glock *gl = foo; 39 struct gfs2_glock *gl = foo;
41 if (flags & SLAB_CTOR_CONSTRUCTOR) { 40
42 INIT_HLIST_NODE(&gl->gl_list); 41 INIT_HLIST_NODE(&gl->gl_list);
43 spin_lock_init(&gl->gl_spin); 42 spin_lock_init(&gl->gl_spin);
44 INIT_LIST_HEAD(&gl->gl_holders); 43 INIT_LIST_HEAD(&gl->gl_holders);
45 INIT_LIST_HEAD(&gl->gl_waiters1); 44 INIT_LIST_HEAD(&gl->gl_waiters1);
46 INIT_LIST_HEAD(&gl->gl_waiters3); 45 INIT_LIST_HEAD(&gl->gl_waiters3);
47 gl->gl_lvb = NULL; 46 gl->gl_lvb = NULL;
48 atomic_set(&gl->gl_lvb_count, 0); 47 atomic_set(&gl->gl_lvb_count, 0);
49 INIT_LIST_HEAD(&gl->gl_reclaim); 48 INIT_LIST_HEAD(&gl->gl_reclaim);
50 INIT_LIST_HEAD(&gl->gl_ail_list); 49 INIT_LIST_HEAD(&gl->gl_ail_list);
51 atomic_set(&gl->gl_ail_count, 0); 50 atomic_set(&gl->gl_ail_count, 0);
52 }
53} 51}
54 52
55/** 53/**
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index fafcba593871..9a934db0bd8a 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -13,6 +13,7 @@
13 13
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/mpage.h> 15#include <linux/mpage.h>
16#include <linux/sched.h>
16 17
17#include "hfs_fs.h" 18#include "hfs_fs.h"
18#include "btree.h" 19#include "btree.h"
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4f1888f16cf0..92cf8751e428 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -434,8 +434,7 @@ static void hfs_init_once(void *p, struct kmem_cache *cachep, unsigned long flag
434{ 434{
435 struct hfs_inode_info *i = p; 435 struct hfs_inode_info *i = p;
436 436
437 if (flags & SLAB_CTOR_CONSTRUCTOR) 437 inode_init_once(&i->vfs_inode);
438 inode_init_once(&i->vfs_inode);
439} 438}
440 439
441static int __init init_hfs_fs(void) 440static int __init init_hfs_fs(void)
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 642012ac3370..45dab5d6cc10 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -12,6 +12,7 @@
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/mpage.h> 14#include <linux/mpage.h>
15#include <linux/sched.h>
15 16
16#include "hfsplus_fs.h" 17#include "hfsplus_fs.h"
17#include "hfsplus_raw.h" 18#include "hfsplus_raw.h"
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 37afbec8a761..ebd1b380cbbc 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -470,8 +470,7 @@ static void hfsplus_init_once(void *p, struct kmem_cache *cachep, unsigned long
470{ 470{
471 struct hfsplus_inode_info *i = p; 471 struct hfsplus_inode_info *i = p;
472 472
473 if (flags & SLAB_CTOR_CONSTRUCTOR) 473 inode_init_once(&i->vfs_inode);
474 inode_init_once(&i->vfs_inode);
475} 474}
476 475
477static int __init init_hfsplus_fs(void) 476static int __init init_hfsplus_fs(void)
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c
index b52b7381d10f..b6fca543544c 100644
--- a/fs/hpfs/buffer.c
+++ b/fs/hpfs/buffer.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * general buffer i/o 6 * general buffer i/o
7 */ 7 */
8 8#include <linux/sched.h>
9#include "hpfs_fn.h" 9#include "hpfs_fn.h"
10 10
11void hpfs_lock_creation(struct super_block *s) 11void hpfs_lock_creation(struct super_block *s)
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 9953cf9a2f16..d256559b4104 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -5,7 +5,7 @@
5 * 5 *
6 * adding & removing files & directories 6 * adding & removing files & directories
7 */ 7 */
8 8#include <linux/sched.h>
9#include "hpfs_fn.h" 9#include "hpfs_fn.h"
10 10
11static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 11static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 1b95f39fbc37..29cc34abb2ea 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -12,6 +12,7 @@
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/statfs.h> 13#include <linux/statfs.h>
14#include <linux/magic.h> 14#include <linux/magic.h>
15#include <linux/sched.h>
15 16
16/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ 17/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
17 18
@@ -176,11 +177,9 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
176{ 177{
177 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; 178 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
178 179
179 if (flags & SLAB_CTOR_CONSTRUCTOR) { 180 mutex_init(&ei->i_mutex);
180 mutex_init(&ei->i_mutex); 181 mutex_init(&ei->i_parent_mutex);
181 mutex_init(&ei->i_parent_mutex); 182 inode_init_once(&ei->vfs_inode);
182 inode_init_once(&ei->vfs_inode);
183 }
184} 183}
185 184
186static int init_inodecache(void) 185static int init_inodecache(void)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 98959b87cdf8..aa083dd34e92 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -556,8 +556,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
556{ 556{
557 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; 557 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
558 558
559 if (flags & SLAB_CTOR_CONSTRUCTOR) 559 inode_init_once(&ei->vfs_inode);
560 inode_init_once(&ei->vfs_inode);
561} 560}
562 561
563const struct file_operations hugetlbfs_file_operations = { 562const struct file_operations hugetlbfs_file_operations = {
diff --git a/fs/inode.c b/fs/inode.c
index df2ef15d03d2..9a012cc5b6cd 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -213,8 +213,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
213{ 213{
214 struct inode * inode = (struct inode *) foo; 214 struct inode * inode = (struct inode *) foo;
215 215
216 if (flags & SLAB_CTOR_CONSTRUCTOR) 216 inode_init_once(inode);
217 inode_init_once(inode);
218} 217}
219 218
220/* 219/*
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index e99f7ff4ecb4..5c3eecf7542e 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -77,8 +77,7 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags
77{ 77{
78 struct iso_inode_info *ei = foo; 78 struct iso_inode_info *ei = foo;
79 79
80 if (flags & SLAB_CTOR_CONSTRUCTOR) 80 inode_init_once(&ei->vfs_inode);
81 inode_init_once(&ei->vfs_inode);
82} 81}
83 82
84static int init_inodecache(void) 83static int init_inodecache(void)
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 45368f8bbe72..6488af43bc9b 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -47,10 +47,8 @@ static void jffs2_i_init_once(void * foo, struct kmem_cache * cachep, unsigned l
47{ 47{
48 struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo; 48 struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo;
49 49
50 if (flags & SLAB_CTOR_CONSTRUCTOR) { 50 init_MUTEX(&ei->sem);
51 init_MUTEX(&ei->sem); 51 inode_init_once(&ei->vfs_inode);
52 inode_init_once(&ei->vfs_inode);
53 }
54} 52}
55 53
56static int jffs2_sync_fs(struct super_block *sb, int wait) 54static int jffs2_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 6b3acb0b5781..43d4f69afbec 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -184,16 +184,14 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
184{ 184{
185 struct metapage *mp = (struct metapage *)foo; 185 struct metapage *mp = (struct metapage *)foo;
186 186
187 if (flags & SLAB_CTOR_CONSTRUCTOR) { 187 mp->lid = 0;
188 mp->lid = 0; 188 mp->lsn = 0;
189 mp->lsn = 0; 189 mp->flag = 0;
190 mp->flag = 0; 190 mp->data = NULL;
191 mp->data = NULL; 191 mp->clsn = 0;
192 mp->clsn = 0; 192 mp->log = NULL;
193 mp->log = NULL; 193 set_bit(META_free, &mp->flag);
194 set_bit(META_free, &mp->flag); 194 init_waitqueue_head(&mp->wait);
195 init_waitqueue_head(&mp->wait);
196 }
197} 195}
198 196
199static inline struct metapage *alloc_metapage(gfp_t gfp_mask) 197static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index ea9dc3e65dcf..20e4ac1c79a3 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -752,20 +752,18 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags
752{ 752{
753 struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; 753 struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
754 754
755 if (flags & SLAB_CTOR_CONSTRUCTOR) { 755 memset(jfs_ip, 0, sizeof(struct jfs_inode_info));
756 memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); 756 INIT_LIST_HEAD(&jfs_ip->anon_inode_list);
757 INIT_LIST_HEAD(&jfs_ip->anon_inode_list); 757 init_rwsem(&jfs_ip->rdwrlock);
758 init_rwsem(&jfs_ip->rdwrlock); 758 mutex_init(&jfs_ip->commit_mutex);
759 mutex_init(&jfs_ip->commit_mutex); 759 init_rwsem(&jfs_ip->xattr_sem);
760 init_rwsem(&jfs_ip->xattr_sem); 760 spin_lock_init(&jfs_ip->ag_lock);
761 spin_lock_init(&jfs_ip->ag_lock); 761 jfs_ip->active_ag = -1;
762 jfs_ip->active_ag = -1;
763#ifdef CONFIG_JFS_POSIX_ACL 762#ifdef CONFIG_JFS_POSIX_ACL
764 jfs_ip->i_acl = JFS_ACL_NOT_CACHED; 763 jfs_ip->i_acl = JFS_ACL_NOT_CACHED;
765 jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED; 764 jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED;
766#endif 765#endif
767 inode_init_once(&jfs_ip->vfs_inode); 766 inode_init_once(&jfs_ip->vfs_inode);
768 }
769} 767}
770 768
771static int __init init_jfs_fs(void) 769static int __init init_jfs_fs(void)
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index f4d45d4d835b..d070b18e539d 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -153,7 +153,7 @@ nlmclnt_recovery(struct nlm_host *host)
153 if (!host->h_reclaiming++) { 153 if (!host->h_reclaiming++) {
154 nlm_get_host(host); 154 nlm_get_host(host);
155 __module_get(THIS_MODULE); 155 __module_get(THIS_MODULE);
156 if (kernel_thread(reclaimer, host, CLONE_KERNEL) < 0) 156 if (kernel_thread(reclaimer, host, CLONE_FS | CLONE_FILES) < 0)
157 module_put(THIS_MODULE); 157 module_put(THIS_MODULE);
158 } 158 }
159} 159}
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index ad21c0713efa..96070bff93fc 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -221,7 +221,7 @@ nlm_bind_host(struct nlm_host *host)
221 host->h_nextrebind - jiffies); 221 host->h_nextrebind - jiffies);
222 } 222 }
223 } else { 223 } else {
224 unsigned long increment = nlmsvc_timeout * HZ; 224 unsigned long increment = nlmsvc_timeout;
225 struct rpc_timeout timeparms = { 225 struct rpc_timeout timeparms = {
226 .to_initval = increment, 226 .to_initval = increment,
227 .to_increment = increment, 227 .to_increment = increment,
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 9702956d206c..5316e307a49d 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -586,10 +586,6 @@ static struct rpc_version nlm_version3 = {
586 .procs = nlm_procedures, 586 .procs = nlm_procedures,
587}; 587};
588 588
589#ifdef CONFIG_LOCKD_V4
590extern struct rpc_version nlm_version4;
591#endif
592
593static struct rpc_version * nlm_versions[] = { 589static struct rpc_version * nlm_versions[] = {
594 [1] = &nlm_version1, 590 [1] = &nlm_version1,
595 [3] = &nlm_version3, 591 [3] = &nlm_version3,
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index ce1efdbe1b3a..846fc1d639dd 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -123,7 +123,8 @@ static __be32 *
123nlm4_decode_lock(__be32 *p, struct nlm_lock *lock) 123nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
124{ 124{
125 struct file_lock *fl = &lock->fl; 125 struct file_lock *fl = &lock->fl;
126 __s64 len, start, end; 126 __u64 len, start;
127 __s64 end;
127 128
128 if (!(p = xdr_decode_string_inplace(p, &lock->caller, 129 if (!(p = xdr_decode_string_inplace(p, &lock->caller,
129 &lock->len, NLM_MAXSTRLEN)) 130 &lock->len, NLM_MAXSTRLEN))
@@ -417,7 +418,8 @@ nlm4clt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
417 if (resp->status == nlm_lck_denied) { 418 if (resp->status == nlm_lck_denied) {
418 struct file_lock *fl = &resp->lock.fl; 419 struct file_lock *fl = &resp->lock.fl;
419 u32 excl; 420 u32 excl;
420 s64 start, end, len; 421 __u64 start, len;
422 __s64 end;
421 423
422 memset(&resp->lock, 0, sizeof(resp->lock)); 424 memset(&resp->lock, 0, sizeof(resp->lock));
423 locks_init_lock(fl); 425 locks_init_lock(fl);
diff --git a/fs/locks.c b/fs/locks.c
index 8ec16ab5ef74..431a8b871fce 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -203,9 +203,6 @@ static void init_once(void *foo, struct kmem_cache *cache, unsigned long flags)
203{ 203{
204 struct file_lock *lock = (struct file_lock *) foo; 204 struct file_lock *lock = (struct file_lock *) foo;
205 205
206 if (!(flags & SLAB_CTOR_CONSTRUCTOR))
207 return;
208
209 locks_init_lock(lock); 206 locks_init_lock(lock);
210} 207}
211 208
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index c4a554df7b7e..99a12f127769 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -15,6 +15,7 @@
15#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
16#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
17#include <linux/bitops.h> 17#include <linux/bitops.h>
18#include <linux/sched.h>
18 19
19static int nibblemap[] = { 4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0 }; 20static int nibblemap[] = { 4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0 };
20 21
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 2f4d43a2a310..be4044614ac8 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -73,8 +73,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
73{ 73{
74 struct minix_inode_info *ei = (struct minix_inode_info *) foo; 74 struct minix_inode_info *ei = (struct minix_inode_info *) foo;
75 75
76 if (flags & SLAB_CTOR_CONSTRUCTOR) 76 inode_init_once(&ei->vfs_inode);
77 inode_init_once(&ei->vfs_inode);
78} 77}
79 78
80static int init_inodecache(void) 79static int init_inodecache(void)
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index addfd3147ea7..d3152f8d95c6 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -17,6 +17,7 @@
17#include <linux/mm.h> 17#include <linux/mm.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
20#include <linux/sched.h>
20 21
21#include <linux/ncp_fs.h> 22#include <linux/ncp_fs.h>
22#include "ncplib_kernel.h" 23#include "ncplib_kernel.h"
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index c29f00ad495d..cf06eb9f050e 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -60,10 +60,8 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
60{ 60{
61 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; 61 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
62 62
63 if (flags & SLAB_CTOR_CONSTRUCTOR) { 63 mutex_init(&ei->open_mutex);
64 mutex_init(&ei->open_mutex); 64 inode_init_once(&ei->vfs_inode);
65 inode_init_once(&ei->vfs_inode);
66 }
67} 65}
68 66
69static int init_inodecache(void) 67static int init_inodecache(void)
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 8843a83d4ef0..c67b4bdcf719 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -17,6 +17,7 @@
17#include <linux/highuid.h> 17#include <linux/highuid.h>
18#include <linux/smp_lock.h> 18#include <linux/smp_lock.h>
19#include <linux/vmalloc.h> 19#include <linux/vmalloc.h>
20#include <linux/sched.h>
20 21
21#include <linux/ncp_fs.h> 22#include <linux/ncp_fs.h>
22 23
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index db3d7919c601..c2bb14e053e1 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -24,7 +24,7 @@ enum nfs4_callback_opnum {
24}; 24};
25 25
26struct cb_compound_hdr_arg { 26struct cb_compound_hdr_arg {
27 int taglen; 27 unsigned int taglen;
28 const char *tag; 28 const char *tag;
29 unsigned int callback_ident; 29 unsigned int callback_ident;
30 unsigned nops; 30 unsigned nops;
@@ -32,7 +32,7 @@ struct cb_compound_hdr_arg {
32 32
33struct cb_compound_hdr_res { 33struct cb_compound_hdr_res {
34 __be32 *status; 34 __be32 *status;
35 int taglen; 35 unsigned int taglen;
36 const char *tag; 36 const char *tag;
37 __be32 *nops; 37 __be32 *nops;
38}; 38};
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 50c6821bad26..881fa4900923 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -12,7 +12,7 @@
12 12
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/init.h> 14#include <linux/init.h>
15 15#include <linux/sched.h>
16#include <linux/time.h> 16#include <linux/time.h>
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/mm.h> 18#include <linux/mm.h>
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 841c99a9b11c..7f37d1bea83f 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -226,7 +226,7 @@ restart:
226 spin_unlock(&clp->cl_lock); 226 spin_unlock(&clp->cl_lock);
227} 227}
228 228
229int nfs_do_expire_all_delegations(void *ptr) 229static int nfs_do_expire_all_delegations(void *ptr)
230{ 230{
231 struct nfs_client *clp = ptr; 231 struct nfs_client *clp = ptr;
232 struct nfs_delegation *delegation; 232 struct nfs_delegation *delegation;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3df428816559..c27258b5d3e1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,6 +33,7 @@
33#include <linux/pagevec.h> 33#include <linux/pagevec.h>
34#include <linux/namei.h> 34#include <linux/namei.h>
35#include <linux/mount.h> 35#include <linux/mount.h>
36#include <linux/sched.h>
36 37
37#include "nfs4_fs.h" 38#include "nfs4_fs.h"
38#include "delegation.h" 39#include "delegation.h"
@@ -607,7 +608,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
607 return res; 608 return res;
608} 609}
609 610
610loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) 611static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
611{ 612{
612 mutex_lock(&filp->f_path.dentry->d_inode->i_mutex); 613 mutex_lock(&filp->f_path.dentry->d_inode->i_mutex);
613 switch (origin) { 614 switch (origin) {
@@ -633,7 +634,7 @@ out:
633 * All directory operations under NFS are synchronous, so fsync() 634 * All directory operations under NFS are synchronous, so fsync()
634 * is a dummy operation. 635 * is a dummy operation.
635 */ 636 */
636int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) 637static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
637{ 638{
638 dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n", 639 dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n",
639 dentry->d_parent->d_name.name, dentry->d_name.name, 640 dentry->d_parent->d_name.name, dentry->d_name.name,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5eaee6dd040b..9eb8eb4e4a08 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -27,6 +27,7 @@
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/pagemap.h> 28#include <linux/pagemap.h>
29#include <linux/smp_lock.h> 29#include <linux/smp_lock.h>
30#include <linux/aio.h>
30 31
31#include <asm/uaccess.h> 32#include <asm/uaccess.h>
32#include <asm/system.h> 33#include <asm/system.h>
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 2a3fd9573207..bd9f5a836592 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -15,7 +15,7 @@
15 15
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/init.h> 17#include <linux/init.h>
18 18#include <linux/sched.h>
19#include <linux/time.h> 19#include <linux/time.h>
20#include <linux/kernel.h> 20#include <linux/kernel.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
@@ -1164,21 +1164,19 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
1164{ 1164{
1165 struct nfs_inode *nfsi = (struct nfs_inode *) foo; 1165 struct nfs_inode *nfsi = (struct nfs_inode *) foo;
1166 1166
1167 if (flags & SLAB_CTOR_CONSTRUCTOR) { 1167 inode_init_once(&nfsi->vfs_inode);
1168 inode_init_once(&nfsi->vfs_inode); 1168 spin_lock_init(&nfsi->req_lock);
1169 spin_lock_init(&nfsi->req_lock); 1169 INIT_LIST_HEAD(&nfsi->dirty);
1170 INIT_LIST_HEAD(&nfsi->dirty); 1170 INIT_LIST_HEAD(&nfsi->commit);
1171 INIT_LIST_HEAD(&nfsi->commit); 1171 INIT_LIST_HEAD(&nfsi->open_files);
1172 INIT_LIST_HEAD(&nfsi->open_files); 1172 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1173 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 1173 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1174 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1174 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
1175 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); 1175 atomic_set(&nfsi->data_updates, 0);
1176 atomic_set(&nfsi->data_updates, 0); 1176 nfsi->ndirty = 0;
1177 nfsi->ndirty = 0; 1177 nfsi->ncommit = 0;
1178 nfsi->ncommit = 0; 1178 nfsi->npages = 0;
1179 nfsi->npages = 0; 1179 nfs4_init_once(nfsi);
1180 nfs4_init_once(nfsi);
1181 }
1182} 1180}
1183 1181
1184static int __init nfs_init_inodecache(void) 1182static int __init nfs_init_inodecache(void)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d6a30e965787..648e0ac0f90e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -790,7 +790,7 @@ out:
790 return -EACCES; 790 return -EACCES;
791} 791}
792 792
793int nfs4_recover_expired_lease(struct nfs_server *server) 793static int nfs4_recover_expired_lease(struct nfs_server *server)
794{ 794{
795 struct nfs_client *clp = server->nfs_client; 795 struct nfs_client *clp = server->nfs_client;
796 int ret; 796 int ret;
@@ -2748,7 +2748,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
2748/* This is the error handling routine for processes that are allowed 2748/* This is the error handling routine for processes that are allowed
2749 * to sleep. 2749 * to sleep.
2750 */ 2750 */
2751int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) 2751static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
2752{ 2752{
2753 struct nfs_client *clp = server->nfs_client; 2753 struct nfs_client *clp = server->nfs_client;
2754 int ret = errorcode; 2754 int ret = errorcode;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5fffbdfa971f..8ed79d5c54f9 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -104,7 +104,7 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
104 return cred; 104 return cred;
105} 105}
106 106
107struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) 107static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
108{ 108{
109 struct nfs4_state_owner *sp; 109 struct nfs4_state_owner *sp;
110 110
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 938f37166788..8003c91ccb9a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -646,10 +646,10 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
646{ 646{
647 __be32 *p; 647 __be32 *p;
648 648
649 RESERVE_SPACE(8+sizeof(arg->stateid->data)); 649 RESERVE_SPACE(8+NFS4_STATEID_SIZE);
650 WRITE32(OP_CLOSE); 650 WRITE32(OP_CLOSE);
651 WRITE32(arg->seqid->sequence->counter); 651 WRITE32(arg->seqid->sequence->counter);
652 WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); 652 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
653 653
654 return 0; 654 return 0;
655} 655}
@@ -793,17 +793,17 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
793 WRITE64(nfs4_lock_length(args->fl)); 793 WRITE64(nfs4_lock_length(args->fl));
794 WRITE32(args->new_lock_owner); 794 WRITE32(args->new_lock_owner);
795 if (args->new_lock_owner){ 795 if (args->new_lock_owner){
796 RESERVE_SPACE(40); 796 RESERVE_SPACE(4+NFS4_STATEID_SIZE+20);
797 WRITE32(args->open_seqid->sequence->counter); 797 WRITE32(args->open_seqid->sequence->counter);
798 WRITEMEM(args->open_stateid->data, sizeof(args->open_stateid->data)); 798 WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE);
799 WRITE32(args->lock_seqid->sequence->counter); 799 WRITE32(args->lock_seqid->sequence->counter);
800 WRITE64(args->lock_owner.clientid); 800 WRITE64(args->lock_owner.clientid);
801 WRITE32(4); 801 WRITE32(4);
802 WRITE32(args->lock_owner.id); 802 WRITE32(args->lock_owner.id);
803 } 803 }
804 else { 804 else {
805 RESERVE_SPACE(20); 805 RESERVE_SPACE(NFS4_STATEID_SIZE+4);
806 WRITEMEM(args->lock_stateid->data, sizeof(args->lock_stateid->data)); 806 WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE);
807 WRITE32(args->lock_seqid->sequence->counter); 807 WRITE32(args->lock_seqid->sequence->counter);
808 } 808 }
809 809
@@ -830,11 +830,11 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *arg
830{ 830{
831 __be32 *p; 831 __be32 *p;
832 832
833 RESERVE_SPACE(44); 833 RESERVE_SPACE(12+NFS4_STATEID_SIZE+16);
834 WRITE32(OP_LOCKU); 834 WRITE32(OP_LOCKU);
835 WRITE32(nfs4_lock_type(args->fl, 0)); 835 WRITE32(nfs4_lock_type(args->fl, 0));
836 WRITE32(args->seqid->sequence->counter); 836 WRITE32(args->seqid->sequence->counter);
837 WRITEMEM(args->stateid->data, sizeof(args->stateid->data)); 837 WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE);
838 WRITE64(args->fl->fl_start); 838 WRITE64(args->fl->fl_start);
839 WRITE64(nfs4_lock_length(args->fl)); 839 WRITE64(nfs4_lock_length(args->fl));
840 840
@@ -966,9 +966,9 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
966{ 966{
967 __be32 *p; 967 __be32 *p;
968 968
969 RESERVE_SPACE(4+sizeof(stateid->data)); 969 RESERVE_SPACE(4+NFS4_STATEID_SIZE);
970 WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR); 970 WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
971 WRITEMEM(stateid->data, sizeof(stateid->data)); 971 WRITEMEM(stateid->data, NFS4_STATEID_SIZE);
972 encode_string(xdr, name->len, name->name); 972 encode_string(xdr, name->len, name->name);
973} 973}
974 974
@@ -996,9 +996,9 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con
996{ 996{
997 __be32 *p; 997 __be32 *p;
998 998
999 RESERVE_SPACE(8+sizeof(arg->stateid->data)); 999 RESERVE_SPACE(4+NFS4_STATEID_SIZE+4);
1000 WRITE32(OP_OPEN_CONFIRM); 1000 WRITE32(OP_OPEN_CONFIRM);
1001 WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); 1001 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
1002 WRITE32(arg->seqid->sequence->counter); 1002 WRITE32(arg->seqid->sequence->counter);
1003 1003
1004 return 0; 1004 return 0;
@@ -1008,9 +1008,9 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea
1008{ 1008{
1009 __be32 *p; 1009 __be32 *p;
1010 1010
1011 RESERVE_SPACE(8+sizeof(arg->stateid->data)); 1011 RESERVE_SPACE(4+NFS4_STATEID_SIZE+4);
1012 WRITE32(OP_OPEN_DOWNGRADE); 1012 WRITE32(OP_OPEN_DOWNGRADE);
1013 WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data)); 1013 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE);
1014 WRITE32(arg->seqid->sequence->counter); 1014 WRITE32(arg->seqid->sequence->counter);
1015 encode_share_access(xdr, arg->open_flags); 1015 encode_share_access(xdr, arg->open_flags);
1016 return 0; 1016 return 0;
@@ -1045,12 +1045,12 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
1045 nfs4_stateid stateid; 1045 nfs4_stateid stateid;
1046 __be32 *p; 1046 __be32 *p;
1047 1047
1048 RESERVE_SPACE(16); 1048 RESERVE_SPACE(NFS4_STATEID_SIZE);
1049 if (ctx->state != NULL) { 1049 if (ctx->state != NULL) {
1050 nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); 1050 nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
1051 WRITEMEM(stateid.data, sizeof(stateid.data)); 1051 WRITEMEM(stateid.data, NFS4_STATEID_SIZE);
1052 } else 1052 } else
1053 WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); 1053 WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE);
1054} 1054}
1055 1055
1056static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args) 1056static int encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args)
@@ -1079,10 +1079,10 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1079 int replen; 1079 int replen;
1080 __be32 *p; 1080 __be32 *p;
1081 1081
1082 RESERVE_SPACE(32+sizeof(nfs4_verifier)); 1082 RESERVE_SPACE(12+NFS4_VERIFIER_SIZE+20);
1083 WRITE32(OP_READDIR); 1083 WRITE32(OP_READDIR);
1084 WRITE64(readdir->cookie); 1084 WRITE64(readdir->cookie);
1085 WRITEMEM(readdir->verifier.data, sizeof(readdir->verifier.data)); 1085 WRITEMEM(readdir->verifier.data, NFS4_VERIFIER_SIZE);
1086 WRITE32(readdir->count >> 1); /* We're not doing readdirplus */ 1086 WRITE32(readdir->count >> 1); /* We're not doing readdirplus */
1087 WRITE32(readdir->count); 1087 WRITE32(readdir->count);
1088 WRITE32(2); 1088 WRITE32(2);
@@ -1190,9 +1190,9 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
1190{ 1190{
1191 __be32 *p; 1191 __be32 *p;
1192 1192
1193 RESERVE_SPACE(4+sizeof(zero_stateid.data)); 1193 RESERVE_SPACE(4+NFS4_STATEID_SIZE);
1194 WRITE32(OP_SETATTR); 1194 WRITE32(OP_SETATTR);
1195 WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); 1195 WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE);
1196 RESERVE_SPACE(2*4); 1196 RESERVE_SPACE(2*4);
1197 WRITE32(1); 1197 WRITE32(1);
1198 WRITE32(FATTR4_WORD0_ACL); 1198 WRITE32(FATTR4_WORD0_ACL);
@@ -1220,9 +1220,9 @@ static int encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *
1220 int status; 1220 int status;
1221 __be32 *p; 1221 __be32 *p;
1222 1222
1223 RESERVE_SPACE(4+sizeof(arg->stateid.data)); 1223 RESERVE_SPACE(4+NFS4_STATEID_SIZE);
1224 WRITE32(OP_SETATTR); 1224 WRITE32(OP_SETATTR);
1225 WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); 1225 WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE);
1226 1226
1227 if ((status = encode_attrs(xdr, arg->iap, server))) 1227 if ((status = encode_attrs(xdr, arg->iap, server)))
1228 return status; 1228 return status;
@@ -1234,9 +1234,9 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien
1234{ 1234{
1235 __be32 *p; 1235 __be32 *p;
1236 1236
1237 RESERVE_SPACE(4 + sizeof(setclientid->sc_verifier->data)); 1237 RESERVE_SPACE(4 + NFS4_VERIFIER_SIZE);
1238 WRITE32(OP_SETCLIENTID); 1238 WRITE32(OP_SETCLIENTID);
1239 WRITEMEM(setclientid->sc_verifier->data, sizeof(setclientid->sc_verifier->data)); 1239 WRITEMEM(setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE);
1240 1240
1241 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); 1241 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
1242 RESERVE_SPACE(4); 1242 RESERVE_SPACE(4);
@@ -1253,10 +1253,10 @@ static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_c
1253{ 1253{
1254 __be32 *p; 1254 __be32 *p;
1255 1255
1256 RESERVE_SPACE(12 + sizeof(client_state->cl_confirm.data)); 1256 RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE);
1257 WRITE32(OP_SETCLIENTID_CONFIRM); 1257 WRITE32(OP_SETCLIENTID_CONFIRM);
1258 WRITE64(client_state->cl_clientid); 1258 WRITE64(client_state->cl_clientid);
1259 WRITEMEM(client_state->cl_confirm.data, sizeof(client_state->cl_confirm.data)); 1259 WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
1260 1260
1261 return 0; 1261 return 0;
1262} 1262}
@@ -1284,10 +1284,10 @@ static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *statei
1284{ 1284{
1285 __be32 *p; 1285 __be32 *p;
1286 1286
1287 RESERVE_SPACE(20); 1287 RESERVE_SPACE(4+NFS4_STATEID_SIZE);
1288 1288
1289 WRITE32(OP_DELEGRETURN); 1289 WRITE32(OP_DELEGRETURN);
1290 WRITEMEM(stateid->data, sizeof(stateid->data)); 1290 WRITEMEM(stateid->data, NFS4_STATEID_SIZE);
1291 return 0; 1291 return 0;
1292 1292
1293} 1293}
@@ -2494,7 +2494,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
2494 int i; 2494 int i;
2495 dprintk("%s: using first %d of %d servers returned for location %d\n", __FUNCTION__, NFS4_FS_LOCATION_MAXSERVERS, m, res->nlocations); 2495 dprintk("%s: using first %d of %d servers returned for location %d\n", __FUNCTION__, NFS4_FS_LOCATION_MAXSERVERS, m, res->nlocations);
2496 for (i = loc->nservers; i < m; i++) { 2496 for (i = loc->nservers; i < m; i++) {
2497 int len; 2497 unsigned int len;
2498 char *data; 2498 char *data;
2499 status = decode_opaque_inline(xdr, &len, &data); 2499 status = decode_opaque_inline(xdr, &len, &data);
2500 if (unlikely(status != 0)) 2500 if (unlikely(status != 0))
@@ -2642,7 +2642,7 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
2642 return 0; 2642 return 0;
2643} 2643}
2644 2644
2645static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid) 2645static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid)
2646{ 2646{
2647 uint32_t len; 2647 uint32_t len;
2648 __be32 *p; 2648 __be32 *p;
@@ -2667,7 +2667,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
2667 return 0; 2667 return 0;
2668} 2668}
2669 2669
2670static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid) 2670static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid)
2671{ 2671{
2672 uint32_t len; 2672 uint32_t len;
2673 __be32 *p; 2673 __be32 *p;
@@ -2897,8 +2897,8 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
2897 status = decode_op_hdr(xdr, OP_CLOSE); 2897 status = decode_op_hdr(xdr, OP_CLOSE);
2898 if (status) 2898 if (status)
2899 return status; 2899 return status;
2900 READ_BUF(sizeof(res->stateid.data)); 2900 READ_BUF(NFS4_STATEID_SIZE);
2901 COPYMEM(res->stateid.data, sizeof(res->stateid.data)); 2901 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
2902 return 0; 2902 return 0;
2903} 2903}
2904 2904
@@ -3186,8 +3186,8 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
3186 3186
3187 status = decode_op_hdr(xdr, OP_LOCK); 3187 status = decode_op_hdr(xdr, OP_LOCK);
3188 if (status == 0) { 3188 if (status == 0) {
3189 READ_BUF(sizeof(res->stateid.data)); 3189 READ_BUF(NFS4_STATEID_SIZE);
3190 COPYMEM(res->stateid.data, sizeof(res->stateid.data)); 3190 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3191 } else if (status == -NFS4ERR_DENIED) 3191 } else if (status == -NFS4ERR_DENIED)
3192 return decode_lock_denied(xdr, NULL); 3192 return decode_lock_denied(xdr, NULL);
3193 return status; 3193 return status;
@@ -3209,8 +3209,8 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
3209 3209
3210 status = decode_op_hdr(xdr, OP_LOCKU); 3210 status = decode_op_hdr(xdr, OP_LOCKU);
3211 if (status == 0) { 3211 if (status == 0) {
3212 READ_BUF(sizeof(res->stateid.data)); 3212 READ_BUF(NFS4_STATEID_SIZE);
3213 COPYMEM(res->stateid.data, sizeof(res->stateid.data)); 3213 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3214 } 3214 }
3215 return status; 3215 return status;
3216} 3216}
@@ -3251,8 +3251,8 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3251 res->delegation_type = 0; 3251 res->delegation_type = 0;
3252 return 0; 3252 return 0;
3253 } 3253 }
3254 READ_BUF(20); 3254 READ_BUF(NFS4_STATEID_SIZE+4);
3255 COPYMEM(res->delegation.data, sizeof(res->delegation.data)); 3255 COPYMEM(res->delegation.data, NFS4_STATEID_SIZE);
3256 READ32(res->do_recall); 3256 READ32(res->do_recall);
3257 switch (delegation_type) { 3257 switch (delegation_type) {
3258 case NFS4_OPEN_DELEGATE_READ: 3258 case NFS4_OPEN_DELEGATE_READ:
@@ -3275,8 +3275,8 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3275 status = decode_op_hdr(xdr, OP_OPEN); 3275 status = decode_op_hdr(xdr, OP_OPEN);
3276 if (status) 3276 if (status)
3277 return status; 3277 return status;
3278 READ_BUF(sizeof(res->stateid.data)); 3278 READ_BUF(NFS4_STATEID_SIZE);
3279 COPYMEM(res->stateid.data, sizeof(res->stateid.data)); 3279 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3280 3280
3281 decode_change_info(xdr, &res->cinfo); 3281 decode_change_info(xdr, &res->cinfo);
3282 3282
@@ -3302,8 +3302,8 @@ static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmre
3302 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); 3302 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
3303 if (status) 3303 if (status)
3304 return status; 3304 return status;
3305 READ_BUF(sizeof(res->stateid.data)); 3305 READ_BUF(NFS4_STATEID_SIZE);
3306 COPYMEM(res->stateid.data, sizeof(res->stateid.data)); 3306 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3307 return 0; 3307 return 0;
3308} 3308}
3309 3309
@@ -3315,8 +3315,8 @@ static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *re
3315 status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); 3315 status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
3316 if (status) 3316 if (status)
3317 return status; 3317 return status;
3318 READ_BUF(sizeof(res->stateid.data)); 3318 READ_BUF(NFS4_STATEID_SIZE);
3319 COPYMEM(res->stateid.data, sizeof(res->stateid.data)); 3319 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3320 return 0; 3320 return 0;
3321} 3321}
3322 3322
@@ -3590,9 +3590,9 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
3590 } 3590 }
3591 READ32(nfserr); 3591 READ32(nfserr);
3592 if (nfserr == NFS_OK) { 3592 if (nfserr == NFS_OK) {
3593 READ_BUF(8 + sizeof(clp->cl_confirm.data)); 3593 READ_BUF(8 + NFS4_VERIFIER_SIZE);
3594 READ64(clp->cl_clientid); 3594 READ64(clp->cl_clientid);
3595 COPYMEM(clp->cl_confirm.data, sizeof(clp->cl_confirm.data)); 3595 COPYMEM(clp->cl_confirm.data, NFS4_VERIFIER_SIZE);
3596 } else if (nfserr == NFSERR_CLID_INUSE) { 3596 } else if (nfserr == NFSERR_CLID_INUSE) {
3597 uint32_t len; 3597 uint32_t len;
3598 3598
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e12054c86d0d..cbdd1c6aaa94 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/sched.h>
14#include <linux/sunrpc/clnt.h> 15#include <linux/sunrpc/clnt.h>
15#include <linux/nfs3.h> 16#include <linux/nfs3.h>
16#include <linux/nfs4.h> 17#include <linux/nfs4.h>
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 9a55807b2a70..7bd7cb95c034 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -79,7 +79,7 @@ void nfs_readdata_release(void *data)
79static 79static
80int nfs_return_empty_page(struct page *page) 80int nfs_return_empty_page(struct page *page)
81{ 81{
82 memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE); 82 zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
83 SetPageUptodate(page); 83 SetPageUptodate(page);
84 unlock_page(page); 84 unlock_page(page);
85 return 0; 85 return 0;
@@ -103,10 +103,10 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
103 pglen = PAGE_CACHE_SIZE - base; 103 pglen = PAGE_CACHE_SIZE - base;
104 for (;;) { 104 for (;;) {
105 if (remainder <= pglen) { 105 if (remainder <= pglen) {
106 memclear_highpage_flush(*pages, base, remainder); 106 zero_user_page(*pages, base, remainder, KM_USER0);
107 break; 107 break;
108 } 108 }
109 memclear_highpage_flush(*pages, base, pglen); 109 zero_user_page(*pages, base, pglen, KM_USER0);
110 pages++; 110 pages++;
111 remainder -= pglen; 111 remainder -= pglen;
112 pglen = PAGE_CACHE_SIZE; 112 pglen = PAGE_CACHE_SIZE;
@@ -130,7 +130,7 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
130 return PTR_ERR(new); 130 return PTR_ERR(new);
131 } 131 }
132 if (len < PAGE_CACHE_SIZE) 132 if (len < PAGE_CACHE_SIZE)
133 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 133 zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
134 134
135 nfs_list_add_request(new, &one_request); 135 nfs_list_add_request(new, &one_request);
136 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 136 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
@@ -532,7 +532,7 @@ readpage_async_filler(void *data, struct page *page)
532 return PTR_ERR(new); 532 return PTR_ERR(new);
533 } 533 }
534 if (len < PAGE_CACHE_SIZE) 534 if (len < PAGE_CACHE_SIZE)
535 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 535 zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
536 nfs_pageio_add_request(desc->pgio, new); 536 nfs_pageio_add_request(desc->pgio, new);
537 return 0; 537 return 0;
538} 538}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index de92b9509d94..b084c03ce493 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -58,7 +58,7 @@ struct nfs_write_data *nfs_commit_alloc(void)
58 return p; 58 return p;
59} 59}
60 60
61void nfs_commit_rcu_free(struct rcu_head *head) 61static void nfs_commit_rcu_free(struct rcu_head *head)
62{ 62{
63 struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu); 63 struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
64 if (p && (p->pagevec != &p->page_array[0])) 64 if (p && (p->pagevec != &p->page_array[0]))
@@ -168,7 +168,7 @@ static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int
168 if (count != nfs_page_length(page)) 168 if (count != nfs_page_length(page))
169 return; 169 return;
170 if (count != PAGE_CACHE_SIZE) 170 if (count != PAGE_CACHE_SIZE)
171 memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count); 171 zero_user_page(page, count, PAGE_CACHE_SIZE - count, KM_USER0);
172 SetPageUptodate(page); 172 SetPageUptodate(page);
173} 173}
174 174
@@ -922,7 +922,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
922 return 0; 922 return 0;
923 out_bad: 923 out_bad:
924 while (!list_empty(head)) { 924 while (!list_empty(head)) {
925 struct nfs_page *req = nfs_list_entry(head->next); 925 req = nfs_list_entry(head->next);
926 nfs_list_remove_request(req); 926 nfs_list_remove_request(req);
927 nfs_redirty_request(req); 927 nfs_redirty_request(req);
928 nfs_end_page_writeback(req->wb_page); 928 nfs_end_page_writeback(req->wb_page);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 32ffea033c7a..864090edc28b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -38,6 +38,7 @@
38#include <linux/inet.h> 38#include <linux/inet.h>
39#include <linux/errno.h> 39#include <linux/errno.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41#include <linux/sched.h>
41#include <linux/sunrpc/xdr.h> 42#include <linux/sunrpc/xdr.h>
42#include <linux/sunrpc/svc.h> 43#include <linux/sunrpc/svc.h>
43#include <linux/sunrpc/clnt.h> 44#include <linux/sunrpc/clnt.h>
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index c7774e3a9469..ebd03cc07479 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -45,7 +45,7 @@
45#include <asm/uaccess.h> 45#include <asm/uaccess.h>
46#include <asm/scatterlist.h> 46#include <asm/scatterlist.h>
47#include <linux/crypto.h> 47#include <linux/crypto.h>
48 48#include <linux/sched.h>
49 49
50#define NFSDDBG_FACILITY NFSDDBG_PROC 50#define NFSDDBG_FACILITY NFSDDBG_PROC
51 51
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d7759ce6ed94..ff55950efb43 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -9,7 +9,7 @@
9 */ 9 */
10 10
11#include <linux/module.h> 11#include <linux/module.h>
12 12#include <linux/sched.h>
13#include <linux/time.h> 13#include <linux/time.h>
14#include <linux/errno.h> 14#include <linux/errno.h>
15#include <linux/nfs.h> 15#include <linux/nfs.h>
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 39a1669506bd..7ed56390b582 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -26,6 +26,7 @@
26#include <linux/swap.h> 26#include <linux/swap.h>
27#include <linux/uio.h> 27#include <linux/uio.h>
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/sched.h>
29 30
30#include <asm/page.h> 31#include <asm/page.h>
31#include <asm/uaccess.h> 32#include <asm/uaccess.h>
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 21d834e5ed73..4566b9182551 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3085,8 +3085,7 @@ static void ntfs_big_inode_init_once(void *foo, struct kmem_cache *cachep,
3085{ 3085{
3086 ntfs_inode *ni = (ntfs_inode *)foo; 3086 ntfs_inode *ni = (ntfs_inode *)foo;
3087 3087
3088 if (flags & SLAB_CTOR_CONSTRUCTOR) 3088 inode_init_once(VFS_I(ni));
3089 inode_init_once(VFS_I(ni));
3090} 3089}
3091 3090
3092/* 3091/*
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 5671cf9d6383..fd8cb1badc9b 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -262,12 +262,10 @@ static void dlmfs_init_once(void *foo,
262 struct dlmfs_inode_private *ip = 262 struct dlmfs_inode_private *ip =
263 (struct dlmfs_inode_private *) foo; 263 (struct dlmfs_inode_private *) foo;
264 264
265 if (flags & SLAB_CTOR_CONSTRUCTOR) { 265 ip->ip_dlm = NULL;
266 ip->ip_dlm = NULL; 266 ip->ip_parent = NULL;
267 ip->ip_parent = NULL;
268 267
269 inode_init_once(&ip->ip_vfs_inode); 268 inode_init_once(&ip->ip_vfs_inode);
270 }
271} 269}
272 270
273static struct inode *dlmfs_alloc_inode(struct super_block *sb) 271static struct inode *dlmfs_alloc_inode(struct super_block *sb)
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7c5e3f5d6634..86b559c7dce9 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -937,31 +937,29 @@ static void ocfs2_inode_init_once(void *data,
937{ 937{
938 struct ocfs2_inode_info *oi = data; 938 struct ocfs2_inode_info *oi = data;
939 939
940 if (flags & SLAB_CTOR_CONSTRUCTOR) { 940 oi->ip_flags = 0;
941 oi->ip_flags = 0; 941 oi->ip_open_count = 0;
942 oi->ip_open_count = 0; 942 spin_lock_init(&oi->ip_lock);
943 spin_lock_init(&oi->ip_lock); 943 ocfs2_extent_map_init(&oi->vfs_inode);
944 ocfs2_extent_map_init(&oi->vfs_inode); 944 INIT_LIST_HEAD(&oi->ip_io_markers);
945 INIT_LIST_HEAD(&oi->ip_io_markers); 945 oi->ip_created_trans = 0;
946 oi->ip_created_trans = 0; 946 oi->ip_last_trans = 0;
947 oi->ip_last_trans = 0; 947 oi->ip_dir_start_lookup = 0;
948 oi->ip_dir_start_lookup = 0;
949 948
950 init_rwsem(&oi->ip_alloc_sem); 949 init_rwsem(&oi->ip_alloc_sem);
951 mutex_init(&oi->ip_io_mutex); 950 mutex_init(&oi->ip_io_mutex);
952 951
953 oi->ip_blkno = 0ULL; 952 oi->ip_blkno = 0ULL;
954 oi->ip_clusters = 0; 953 oi->ip_clusters = 0;
955 954
956 ocfs2_lock_res_init_once(&oi->ip_rw_lockres); 955 ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
957 ocfs2_lock_res_init_once(&oi->ip_meta_lockres); 956 ocfs2_lock_res_init_once(&oi->ip_meta_lockres);
958 ocfs2_lock_res_init_once(&oi->ip_data_lockres); 957 ocfs2_lock_res_init_once(&oi->ip_data_lockres);
959 ocfs2_lock_res_init_once(&oi->ip_open_lockres); 958 ocfs2_lock_res_init_once(&oi->ip_open_lockres);
960 959
961 ocfs2_metadata_cache_init(&oi->vfs_inode); 960 ocfs2_metadata_cache_init(&oi->vfs_inode);
962 961
963 inode_init_once(&oi->vfs_inode); 962 inode_init_once(&oi->vfs_inode);
964 }
965} 963}
966 964
967static int ocfs2_initialize_mem_caches(void) 965static int ocfs2_initialize_mem_caches(void)
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 731a90e9f0cd..e62397341c36 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -419,8 +419,7 @@ static void op_inode_init_once(void *data, struct kmem_cache * cachep, unsigned
419{ 419{
420 struct op_inode_info *oi = (struct op_inode_info *) data; 420 struct op_inode_info *oi = (struct op_inode_info *) data;
421 421
422 if (flags & SLAB_CTOR_CONSTRUCTOR) 422 inode_init_once(&oi->vfs_inode);
423 inode_init_once(&oi->vfs_inode);
424} 423}
425 424
426static int __init init_openprom_fs(void) 425static int __init init_openprom_fs(void)
diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig
index 7638a1c42a7d..a99acd8de353 100644
--- a/fs/partitions/Kconfig
+++ b/fs/partitions/Kconfig
@@ -166,8 +166,12 @@ config LDM_PARTITION
166 depends on PARTITION_ADVANCED 166 depends on PARTITION_ADVANCED
167 ---help--- 167 ---help---
168 Say Y here if you would like to use hard disks under Linux which 168 Say Y here if you would like to use hard disks under Linux which
169 were partitioned using Windows 2000's or XP's Logical Disk Manager. 169 were partitioned using Windows 2000's/XP's or Vista's Logical Disk
170 They are also known as "Dynamic Disks". 170 Manager. They are also known as "Dynamic Disks".
171
172 Note this driver only supports Dynamic Disks with a protective MBR
173 label, i.e. DOS partition table. It does not support GPT labelled
174 Dynamic Disks yet as can be created with Vista.
171 175
172 Windows 2000 introduced the concept of Dynamic Disks to get around 176 Windows 2000 introduced the concept of Dynamic Disks to get around
173 the limitations of the PC's partitioning scheme. The Logical Disk 177 the limitations of the PC's partitioning scheme. The Logical Disk
@@ -175,8 +179,8 @@ config LDM_PARTITION
175 mirrored, striped or RAID volumes, all without the need for 179 mirrored, striped or RAID volumes, all without the need for
176 rebooting. 180 rebooting.
177 181
178 Normal partitions are now called Basic Disks under Windows 2000 and 182 Normal partitions are now called Basic Disks under Windows 2000, XP,
179 XP. 183 and Vista.
180 184
181 For a fuller description read <file:Documentation/ldm.txt>. 185 For a fuller description read <file:Documentation/ldm.txt>.
182 186
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 1a60926a4ccd..99873a2b4cbc 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -2,10 +2,10 @@
2 * ldm - Support for Windows Logical Disk Manager (Dynamic Disks) 2 * ldm - Support for Windows Logical Disk Manager (Dynamic Disks)
3 * 3 *
4 * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org> 4 * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>
5 * Copyright (c) 2001-2004 Anton Altaparmakov 5 * Copyright (c) 2001-2007 Anton Altaparmakov
6 * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> 6 * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com>
7 * 7 *
8 * Documentation is available at http://linux-ntfs.sf.net/ldm 8 * Documentation is available at http://www.linux-ntfs.org/content/view/19/37/
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify it under 10 * This program is free software; you can redistribute it and/or modify it under
11 * the terms of the GNU General Public License as published by the Free Software 11 * the terms of the GNU General Public License as published by the Free Software
@@ -62,7 +62,6 @@ static void _ldm_printk (const char *level, const char *function,
62 printk ("%s%s(): %s\n", level, function, buf); 62 printk ("%s%s(): %s\n", level, function, buf);
63} 63}
64 64
65
66/** 65/**
67 * ldm_parse_hexbyte - Convert a ASCII hex number to a byte 66 * ldm_parse_hexbyte - Convert a ASCII hex number to a byte
68 * @src: Pointer to at least 2 characters to convert. 67 * @src: Pointer to at least 2 characters to convert.
@@ -118,7 +117,6 @@ static bool ldm_parse_guid (const u8 *src, u8 *dest)
118 return true; 117 return true;
119} 118}
120 119
121
122/** 120/**
123 * ldm_parse_privhead - Read the LDM Database PRIVHEAD structure 121 * ldm_parse_privhead - Read the LDM Database PRIVHEAD structure
124 * @data: Raw database PRIVHEAD structure loaded from the device 122 * @data: Raw database PRIVHEAD structure loaded from the device
@@ -130,46 +128,48 @@ static bool ldm_parse_guid (const u8 *src, u8 *dest)
130 * Return: 'true' @ph contains the PRIVHEAD data 128 * Return: 'true' @ph contains the PRIVHEAD data
131 * 'false' @ph contents are undefined 129 * 'false' @ph contents are undefined
132 */ 130 */
133static bool ldm_parse_privhead (const u8 *data, struct privhead *ph) 131static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
134{ 132{
135 BUG_ON (!data || !ph); 133 bool is_vista = false;
136 134
137 if (MAGIC_PRIVHEAD != BE64 (data)) { 135 BUG_ON(!data || !ph);
138 ldm_error ("Cannot find PRIVHEAD structure. LDM database is" 136 if (MAGIC_PRIVHEAD != BE64(data)) {
137 ldm_error("Cannot find PRIVHEAD structure. LDM database is"
139 " corrupt. Aborting."); 138 " corrupt. Aborting.");
140 return false; 139 return false;
141 } 140 }
142 141 ph->ver_major = BE16(data + 0x000C);
143 ph->ver_major = BE16 (data + 0x000C); 142 ph->ver_minor = BE16(data + 0x000E);
144 ph->ver_minor = BE16 (data + 0x000E); 143 ph->logical_disk_start = BE64(data + 0x011B);
145 ph->logical_disk_start = BE64 (data + 0x011B); 144 ph->logical_disk_size = BE64(data + 0x0123);
146 ph->logical_disk_size = BE64 (data + 0x0123); 145 ph->config_start = BE64(data + 0x012B);
147 ph->config_start = BE64 (data + 0x012B); 146 ph->config_size = BE64(data + 0x0133);
148 ph->config_size = BE64 (data + 0x0133); 147 /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */
149 148 if (ph->ver_major == 2 && ph->ver_minor == 12)
150 if ((ph->ver_major != 2) || (ph->ver_minor != 11)) { 149 is_vista = true;
151 ldm_error ("Expected PRIVHEAD version %d.%d, got %d.%d." 150 if (!is_vista && (ph->ver_major != 2 || ph->ver_minor != 11)) {
152 " Aborting.", 2, 11, ph->ver_major, ph->ver_minor); 151 ldm_error("Expected PRIVHEAD version 2.11 or 2.12, got %d.%d."
152 " Aborting.", ph->ver_major, ph->ver_minor);
153 return false; 153 return false;
154 } 154 }
155 ldm_debug("PRIVHEAD version %d.%d (Windows %s).", ph->ver_major,
156 ph->ver_minor, is_vista ? "Vista" : "2000/XP");
155 if (ph->config_size != LDM_DB_SIZE) { /* 1 MiB in sectors. */ 157 if (ph->config_size != LDM_DB_SIZE) { /* 1 MiB in sectors. */
156 /* Warn the user and continue, carefully */ 158 /* Warn the user and continue, carefully. */
157 ldm_info ("Database is normally %u bytes, it claims to " 159 ldm_info("Database is normally %u bytes, it claims to "
158 "be %llu bytes.", LDM_DB_SIZE, 160 "be %llu bytes.", LDM_DB_SIZE,
159 (unsigned long long)ph->config_size ); 161 (unsigned long long)ph->config_size);
160 } 162 }
161 if ((ph->logical_disk_size == 0) || 163 if ((ph->logical_disk_size == 0) || (ph->logical_disk_start +
162 (ph->logical_disk_start + ph->logical_disk_size > ph->config_start)) { 164 ph->logical_disk_size > ph->config_start)) {
163 ldm_error ("PRIVHEAD disk size doesn't match real disk size"); 165 ldm_error("PRIVHEAD disk size doesn't match real disk size");
164 return false; 166 return false;
165 } 167 }
166 168 if (!ldm_parse_guid(data + 0x0030, ph->disk_id)) {
167 if (!ldm_parse_guid (data + 0x0030, ph->disk_id)) { 169 ldm_error("PRIVHEAD contains an invalid GUID.");
168 ldm_error ("PRIVHEAD contains an invalid GUID.");
169 return false; 170 return false;
170 } 171 }
171 172 ldm_debug("Parsed PRIVHEAD successfully.");
172 ldm_debug ("Parsed PRIVHEAD successfully.");
173 return true; 173 return true;
174} 174}
175 175
@@ -409,7 +409,7 @@ out:
409 * Return: 'true' @toc1 contains validated TOCBLOCK info 409 * Return: 'true' @toc1 contains validated TOCBLOCK info
410 * 'false' @toc1 contents are undefined 410 * 'false' @toc1 contents are undefined
411 */ 411 */
412static bool ldm_validate_tocblocks (struct block_device *bdev, 412static bool ldm_validate_tocblocks(struct block_device *bdev,
413 unsigned long base, struct ldmdb *ldb) 413 unsigned long base, struct ldmdb *ldb)
414{ 414{
415 static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4}; 415 static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4};
@@ -417,54 +417,57 @@ static bool ldm_validate_tocblocks (struct block_device *bdev,
417 struct privhead *ph; 417 struct privhead *ph;
418 Sector sect; 418 Sector sect;
419 u8 *data; 419 u8 *data;
420 int i, nr_tbs;
420 bool result = false; 421 bool result = false;
421 int i;
422 422
423 BUG_ON (!bdev || !ldb); 423 BUG_ON(!bdev || !ldb);
424 424 ph = &ldb->ph;
425 ph = &ldb->ph;
426 tb[0] = &ldb->toc; 425 tb[0] = &ldb->toc;
427 tb[1] = kmalloc (sizeof (*tb[1]), GFP_KERNEL); 426 tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL);
428 tb[2] = kmalloc (sizeof (*tb[2]), GFP_KERNEL); 427 if (!tb[1]) {
429 tb[3] = kmalloc (sizeof (*tb[3]), GFP_KERNEL); 428 ldm_crit("Out of memory.");
430 if (!tb[1] || !tb[2] || !tb[3]) { 429 goto err;
431 ldm_crit ("Out of memory.");
432 goto out;
433 } 430 }
434 431 tb[2] = (struct tocblock*)((u8*)tb[1] + sizeof(*tb[1]));
435 for (i = 0; i < 4; i++) /* Read and parse all four toc's. */ 432 tb[3] = (struct tocblock*)((u8*)tb[2] + sizeof(*tb[2]));
436 { 433 /*
437 data = read_dev_sector (bdev, base + off[i], &sect); 434 * Try to read and parse all four TOCBLOCKs.
435 *
436 * Windows Vista LDM v2.12 does not always have all four TOCBLOCKs so
437 * skip any that fail as long as we get at least one valid TOCBLOCK.
438 */
439 for (nr_tbs = i = 0; i < 4; i++) {
440 data = read_dev_sector(bdev, base + off[i], &sect);
438 if (!data) { 441 if (!data) {
439 ldm_crit ("Disk read failed."); 442 ldm_error("Disk read failed for TOCBLOCK %d.", i);
440 goto out; 443 continue;
441 } 444 }
442 result = ldm_parse_tocblock (data, tb[i]); 445 if (ldm_parse_tocblock(data, tb[nr_tbs]))
443 put_dev_sector (sect); 446 nr_tbs++;
444 if (!result) 447 put_dev_sector(sect);
445 goto out; /* Already logged */
446 } 448 }
447 449 if (!nr_tbs) {
448 /* Range check the toc against a privhead. */ 450 ldm_crit("Failed to find a valid TOCBLOCK.");
451 goto err;
452 }
453 /* Range check the TOCBLOCK against a privhead. */
449 if (((tb[0]->bitmap1_start + tb[0]->bitmap1_size) > ph->config_size) || 454 if (((tb[0]->bitmap1_start + tb[0]->bitmap1_size) > ph->config_size) ||
450 ((tb[0]->bitmap2_start + tb[0]->bitmap2_size) > ph->config_size)) { 455 ((tb[0]->bitmap2_start + tb[0]->bitmap2_size) >
451 ldm_crit ("The bitmaps are out of range. Giving up."); 456 ph->config_size)) {
452 goto out; 457 ldm_crit("The bitmaps are out of range. Giving up.");
458 goto err;
453 } 459 }
454 460 /* Compare all loaded TOCBLOCKs. */
455 if (!ldm_compare_tocblocks (tb[0], tb[1]) || /* Compare all tocs. */ 461 for (i = 1; i < nr_tbs; i++) {
456 !ldm_compare_tocblocks (tb[0], tb[2]) || 462 if (!ldm_compare_tocblocks(tb[0], tb[i])) {
457 !ldm_compare_tocblocks (tb[0], tb[3])) { 463 ldm_crit("TOCBLOCKs 0 and %d do not match.", i);
458 ldm_crit ("The TOCBLOCKs don't match."); 464 goto err;
459 goto out; 465 }
460 } 466 }
461 467 ldm_debug("Validated %d TOCBLOCKs successfully.", nr_tbs);
462 ldm_debug ("Validated TOCBLOCKs successfully.");
463 result = true; 468 result = true;
464out: 469err:
465 kfree (tb[1]); 470 kfree(tb[1]);
466 kfree (tb[2]);
467 kfree (tb[3]);
468 return result; 471 return result;
469} 472}
470 473
@@ -566,7 +569,7 @@ static bool ldm_validate_partition_table (struct block_device *bdev)
566 569
567 p = (struct partition*)(data + 0x01BE); 570 p = (struct partition*)(data + 0x01BE);
568 for (i = 0; i < 4; i++, p++) 571 for (i = 0; i < 4; i++, p++)
569 if (SYS_IND (p) == WIN2K_DYNAMIC_PARTITION) { 572 if (SYS_IND (p) == LDM_PARTITION) {
570 result = true; 573 result = true;
571 break; 574 break;
572 } 575 }
@@ -975,44 +978,68 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
975 * Return: 'true' @vb contains a Partition VBLK 978 * Return: 'true' @vb contains a Partition VBLK
976 * 'false' @vb contents are not defined 979 * 'false' @vb contents are not defined
977 */ 980 */
978static bool ldm_parse_prt3 (const u8 *buffer, int buflen, struct vblk *vb) 981static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
979{ 982{
980 int r_objid, r_name, r_size, r_parent, r_diskid, r_index, len; 983 int r_objid, r_name, r_size, r_parent, r_diskid, r_index, len;
981 struct vblk_part *part; 984 struct vblk_part *part;
982 985
983 BUG_ON (!buffer || !vb); 986 BUG_ON(!buffer || !vb);
984 987 r_objid = ldm_relative(buffer, buflen, 0x18, 0);
985 r_objid = ldm_relative (buffer, buflen, 0x18, 0); 988 if (r_objid < 0) {
986 r_name = ldm_relative (buffer, buflen, 0x18, r_objid); 989 ldm_error("r_objid %d < 0", r_objid);
987 r_size = ldm_relative (buffer, buflen, 0x34, r_name); 990 return false;
988 r_parent = ldm_relative (buffer, buflen, 0x34, r_size); 991 }
989 r_diskid = ldm_relative (buffer, buflen, 0x34, r_parent); 992 r_name = ldm_relative(buffer, buflen, 0x18, r_objid);
990 993 if (r_name < 0) {
994 ldm_error("r_name %d < 0", r_name);
995 return false;
996 }
997 r_size = ldm_relative(buffer, buflen, 0x34, r_name);
998 if (r_size < 0) {
999 ldm_error("r_size %d < 0", r_size);
1000 return false;
1001 }
1002 r_parent = ldm_relative(buffer, buflen, 0x34, r_size);
1003 if (r_parent < 0) {
1004 ldm_error("r_parent %d < 0", r_parent);
1005 return false;
1006 }
1007 r_diskid = ldm_relative(buffer, buflen, 0x34, r_parent);
1008 if (r_diskid < 0) {
1009 ldm_error("r_diskid %d < 0", r_diskid);
1010 return false;
1011 }
991 if (buffer[0x12] & VBLK_FLAG_PART_INDEX) { 1012 if (buffer[0x12] & VBLK_FLAG_PART_INDEX) {
992 r_index = ldm_relative (buffer, buflen, 0x34, r_diskid); 1013 r_index = ldm_relative(buffer, buflen, 0x34, r_diskid);
1014 if (r_index < 0) {
1015 ldm_error("r_index %d < 0", r_index);
1016 return false;
1017 }
993 len = r_index; 1018 len = r_index;
994 } else { 1019 } else {
995 r_index = 0; 1020 r_index = 0;
996 len = r_diskid; 1021 len = r_diskid;
997 } 1022 }
998 if (len < 0) 1023 if (len < 0) {
1024 ldm_error("len %d < 0", len);
999 return false; 1025 return false;
1000 1026 }
1001 len += VBLK_SIZE_PRT3; 1027 len += VBLK_SIZE_PRT3;
1002 if (len != BE32 (buffer + 0x14)) 1028 if (len > BE32(buffer + 0x14)) {
1029 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
1030 BE32(buffer + 0x14));
1003 return false; 1031 return false;
1004 1032 }
1005 part = &vb->vblk.part; 1033 part = &vb->vblk.part;
1006 part->start = BE64 (buffer + 0x24 + r_name); 1034 part->start = BE64(buffer + 0x24 + r_name);
1007 part->volume_offset = BE64 (buffer + 0x2C + r_name); 1035 part->volume_offset = BE64(buffer + 0x2C + r_name);
1008 part->size = ldm_get_vnum (buffer + 0x34 + r_name); 1036 part->size = ldm_get_vnum(buffer + 0x34 + r_name);
1009 part->parent_id = ldm_get_vnum (buffer + 0x34 + r_size); 1037 part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size);
1010 part->disk_id = ldm_get_vnum (buffer + 0x34 + r_parent); 1038 part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent);
1011 if (vb->flags & VBLK_FLAG_PART_INDEX) 1039 if (vb->flags & VBLK_FLAG_PART_INDEX)
1012 part->partnum = buffer[0x35 + r_diskid]; 1040 part->partnum = buffer[0x35 + r_diskid];
1013 else 1041 else
1014 part->partnum = 0; 1042 part->partnum = 0;
1015
1016 return true; 1043 return true;
1017} 1044}
1018 1045
@@ -1475,4 +1502,3 @@ out:
1475 kfree (ldb); 1502 kfree (ldb);
1476 return result; 1503 return result;
1477} 1504}
1478
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
index 6e8d7952b8b5..d2e6a3046939 100644
--- a/fs/partitions/ldm.h
+++ b/fs/partitions/ldm.h
@@ -2,10 +2,10 @@
2 * ldm - Part of the Linux-NTFS project. 2 * ldm - Part of the Linux-NTFS project.
3 * 3 *
4 * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org> 4 * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>
5 * Copyright (C) 2001 Anton Altaparmakov <aia21@cantab.net> 5 * Copyright (c) 2001-2007 Anton Altaparmakov
6 * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> 6 * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com>
7 * 7 *
8 * Documentation is available at http://linux-ntfs.sf.net/ldm 8 * Documentation is available at http://www.linux-ntfs.org/content/view/19/37/
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify it 10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the Free 11 * under the terms of the GNU General Public License as published by the Free
@@ -93,7 +93,7 @@ struct parsed_partitions;
93 93
94#define OFF_VMDB 17 /* List of partitions. */ 94#define OFF_VMDB 17 /* List of partitions. */
95 95
96#define WIN2K_DYNAMIC_PARTITION 0x42 /* Formerly SFS (Landis). */ 96#define LDM_PARTITION 0x42 /* Formerly SFS (Landis). */
97 97
98#define TOC_BITMAP1 "config" /* Names of the two defined */ 98#define TOC_BITMAP1 "config" /* Names of the two defined */
99#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */ 99#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b8171907c83b..d5ce65c68d7b 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -109,8 +109,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
109{ 109{
110 struct proc_inode *ei = (struct proc_inode *) foo; 110 struct proc_inode *ei = (struct proc_inode *) foo;
111 111
112 if (flags & SLAB_CTOR_CONSTRUCTOR) 112 inode_init_once(&ei->vfs_inode);
113 inode_init_once(&ei->vfs_inode);
114} 113}
115 114
116int __init proc_init_inodecache(void) 115int __init proc_init_inodecache(void)
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 75fc8498f2e2..8d256eb11813 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -536,8 +536,7 @@ static void init_once(void *foo, struct kmem_cache * cachep,
536{ 536{
537 struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; 537 struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
538 538
539 if (flags & SLAB_CTOR_CONSTRUCTOR) 539 inode_init_once(&ei->vfs_inode);
540 inode_init_once(&ei->vfs_inode);
541} 540}
542 541
543static int init_inodecache(void) 542static int init_inodecache(void)
diff --git a/fs/quota.c b/fs/quota.c
index e9d88fd0eca8..9f237d6182c9 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -157,7 +157,6 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t
157static void quota_sync_sb(struct super_block *sb, int type) 157static void quota_sync_sb(struct super_block *sb, int type)
158{ 158{
159 int cnt; 159 int cnt;
160 struct inode *discard[MAXQUOTAS];
161 160
162 sb->s_qcop->quota_sync(sb, type); 161 sb->s_qcop->quota_sync(sb, type);
163 /* This is not very clever (and fast) but currently I don't know about 162 /* This is not very clever (and fast) but currently I don't know about
@@ -167,29 +166,21 @@ static void quota_sync_sb(struct super_block *sb, int type)
167 sb->s_op->sync_fs(sb, 1); 166 sb->s_op->sync_fs(sb, 1);
168 sync_blockdev(sb->s_bdev); 167 sync_blockdev(sb->s_bdev);
169 168
170 /* Now when everything is written we can discard the pagecache so 169 /*
171 * that userspace sees the changes. We need i_mutex and so we could 170 * Now when everything is written we can discard the pagecache so
172 * not do it inside dqonoff_mutex. Moreover we need to be carefull 171 * that userspace sees the changes.
173 * about races with quotaoff() (that is the reason why we have own 172 */
174 * reference to inode). */
175 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); 173 mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
176 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 174 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
177 discard[cnt] = NULL;
178 if (type != -1 && cnt != type) 175 if (type != -1 && cnt != type)
179 continue; 176 continue;
180 if (!sb_has_quota_enabled(sb, cnt)) 177 if (!sb_has_quota_enabled(sb, cnt))
181 continue; 178 continue;
182 discard[cnt] = igrab(sb_dqopt(sb)->files[cnt]); 179 mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex, I_MUTEX_QUOTA);
180 truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
181 mutex_unlock(&sb_dqopt(sb)->files[cnt]->i_mutex);
183 } 182 }
184 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); 183 mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
185 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
186 if (discard[cnt]) {
187 mutex_lock(&discard[cnt]->i_mutex);
188 truncate_inode_pages(&discard[cnt]->i_data, 0);
189 mutex_unlock(&discard[cnt]->i_mutex);
190 iput(discard[cnt]);
191 }
192 }
193} 184}
194 185
195void sync_dquots(struct super_block *sb, int type) 186void sync_dquots(struct super_block *sb, int type)
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 4ace5d72eae1..d40d22b347b7 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -32,7 +32,7 @@
32#include <linux/string.h> 32#include <linux/string.h>
33#include <linux/backing-dev.h> 33#include <linux/backing-dev.h>
34#include <linux/ramfs.h> 34#include <linux/ramfs.h>
35 35#include <linux/sched.h>
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37#include "internal.h" 37#include "internal.h"
38 38
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index c7762140c425..b4ac9119200e 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -511,14 +511,12 @@ static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags
511{ 511{
512 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; 512 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
513 513
514 if (flags & SLAB_CTOR_CONSTRUCTOR) { 514 INIT_LIST_HEAD(&ei->i_prealloc_list);
515 INIT_LIST_HEAD(&ei->i_prealloc_list); 515 inode_init_once(&ei->vfs_inode);
516 inode_init_once(&ei->vfs_inode);
517#ifdef CONFIG_REISERFS_FS_POSIX_ACL 516#ifdef CONFIG_REISERFS_FS_POSIX_ACL
518 ei->i_acl_access = NULL; 517 ei->i_acl_access = NULL;
519 ei->i_acl_default = NULL; 518 ei->i_acl_default = NULL;
520#endif 519#endif
521 }
522} 520}
523 521
524static int init_inodecache(void) 522static int init_inodecache(void)
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 804285190271..2284e03342c6 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -566,12 +566,11 @@ static void romfs_destroy_inode(struct inode *inode)
566 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); 566 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
567} 567}
568 568
569static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flags) 569static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
570{ 570{
571 struct romfs_inode_info *ei = (struct romfs_inode_info *) foo; 571 struct romfs_inode_info *ei = foo;
572 572
573 if (flags & SLAB_CTOR_CONSTRUCTOR) 573 inode_init_once(&ei->vfs_inode);
574 inode_init_once(&ei->vfs_inode);
575} 574}
576 575
577static int init_inodecache(void) 576static int init_inodecache(void)
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index 50136b1a3eca..48da4fa6b7d4 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -13,6 +13,7 @@
13#include <linux/smp_lock.h> 13#include <linux/smp_lock.h>
14#include <linux/ctype.h> 14#include <linux/ctype.h>
15#include <linux/net.h> 15#include <linux/net.h>
16#include <linux/sched.h>
16 17
17#include <linux/smb_fs.h> 18#include <linux/smb_fs.h>
18#include <linux/smb_mount.h> 19#include <linux/smb_mount.h>
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index f161797160c4..aea3f8aa54c0 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -17,6 +17,7 @@
17#include <linux/pagemap.h> 17#include <linux/pagemap.h>
18#include <linux/smp_lock.h> 18#include <linux/smp_lock.h>
19#include <linux/net.h> 19#include <linux/net.h>
20#include <linux/aio.h>
20 21
21#include <asm/uaccess.h> 22#include <asm/uaccess.h>
22#include <asm/system.h> 23#include <asm/system.h>
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 424a3ddf86dd..6724a6cf01ff 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -25,6 +25,7 @@
25#include <linux/net.h> 25#include <linux/net.h>
26#include <linux/vfs.h> 26#include <linux/vfs.h>
27#include <linux/highuid.h> 27#include <linux/highuid.h>
28#include <linux/sched.h>
28#include <linux/smb_fs.h> 29#include <linux/smb_fs.h>
29#include <linux/smbno.h> 30#include <linux/smbno.h>
30#include <linux/smb_mount.h> 31#include <linux/smb_mount.h>
@@ -70,8 +71,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
70{ 71{
71 struct smb_inode_info *ei = (struct smb_inode_info *) foo; 72 struct smb_inode_info *ei = (struct smb_inode_info *) foo;
72 73
73 if (flags & SLAB_CTOR_CONSTRUCTOR) 74 inode_init_once(&ei->vfs_inode);
74 inode_init_once(&ei->vfs_inode);
75} 75}
76 76
77static int init_inodecache(void) 77static int init_inodecache(void)
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index c288fbe7953d..3f54a0f80fae 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -11,6 +11,7 @@
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/net.h> 13#include <linux/net.h>
14#include <linux/sched.h>
14 15
15#include <linux/smb_fs.h> 16#include <linux/smb_fs.h>
16#include <linux/smbno.h> 17#include <linux/smbno.h>
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 4de5c6b89918..bdd30e74de6b 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -13,6 +13,7 @@
13#include <linux/backing-dev.h> 13#include <linux/backing-dev.h>
14#include <linux/capability.h> 14#include <linux/capability.h>
15#include <linux/errno.h> 15#include <linux/errno.h>
16#include <linux/sched.h>
16#include <asm/semaphore.h> 17#include <asm/semaphore.h>
17#include "sysfs.h" 18#include "sysfs.h"
18 19
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 3152d7415606..564411693394 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -322,8 +322,7 @@ static void init_once(void *p, struct kmem_cache *cachep, unsigned long flags)
322{ 322{
323 struct sysv_inode_info *si = (struct sysv_inode_info *)p; 323 struct sysv_inode_info *si = (struct sysv_inode_info *)p;
324 324
325 if (flags & SLAB_CTOR_CONSTRUCTOR) 325 inode_init_once(&si->vfs_inode);
326 inode_init_once(&si->vfs_inode);
327} 326}
328 327
329const struct super_operations sysv_sops = { 328const struct super_operations sysv_sops = {
diff --git a/fs/timerfd.c b/fs/timerfd.c
index e329e37f15a8..af9eca5c0230 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -24,7 +24,6 @@
24struct timerfd_ctx { 24struct timerfd_ctx {
25 struct hrtimer tmr; 25 struct hrtimer tmr;
26 ktime_t tintv; 26 ktime_t tintv;
27 spinlock_t lock;
28 wait_queue_head_t wqh; 27 wait_queue_head_t wqh;
29 int expired; 28 int expired;
30}; 29};
@@ -39,10 +38,10 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
39 struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr); 38 struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr);
40 unsigned long flags; 39 unsigned long flags;
41 40
42 spin_lock_irqsave(&ctx->lock, flags); 41 spin_lock_irqsave(&ctx->wqh.lock, flags);
43 ctx->expired = 1; 42 ctx->expired = 1;
44 wake_up_locked(&ctx->wqh); 43 wake_up_locked(&ctx->wqh);
45 spin_unlock_irqrestore(&ctx->lock, flags); 44 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
46 45
47 return HRTIMER_NORESTART; 46 return HRTIMER_NORESTART;
48} 47}
@@ -83,10 +82,10 @@ static unsigned int timerfd_poll(struct file *file, poll_table *wait)
83 82
84 poll_wait(file, &ctx->wqh, wait); 83 poll_wait(file, &ctx->wqh, wait);
85 84
86 spin_lock_irqsave(&ctx->lock, flags); 85 spin_lock_irqsave(&ctx->wqh.lock, flags);
87 if (ctx->expired) 86 if (ctx->expired)
88 events |= POLLIN; 87 events |= POLLIN;
89 spin_unlock_irqrestore(&ctx->lock, flags); 88 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
90 89
91 return events; 90 return events;
92} 91}
@@ -101,7 +100,7 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
101 100
102 if (count < sizeof(ticks)) 101 if (count < sizeof(ticks))
103 return -EINVAL; 102 return -EINVAL;
104 spin_lock_irq(&ctx->lock); 103 spin_lock_irq(&ctx->wqh.lock);
105 res = -EAGAIN; 104 res = -EAGAIN;
106 if (!ctx->expired && !(file->f_flags & O_NONBLOCK)) { 105 if (!ctx->expired && !(file->f_flags & O_NONBLOCK)) {
107 __add_wait_queue(&ctx->wqh, &wait); 106 __add_wait_queue(&ctx->wqh, &wait);
@@ -115,9 +114,9 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
115 res = -ERESTARTSYS; 114 res = -ERESTARTSYS;
116 break; 115 break;
117 } 116 }
118 spin_unlock_irq(&ctx->lock); 117 spin_unlock_irq(&ctx->wqh.lock);
119 schedule(); 118 schedule();
120 spin_lock_irq(&ctx->lock); 119 spin_lock_irq(&ctx->wqh.lock);
121 } 120 }
122 __remove_wait_queue(&ctx->wqh, &wait); 121 __remove_wait_queue(&ctx->wqh, &wait);
123 __set_current_state(TASK_RUNNING); 122 __set_current_state(TASK_RUNNING);
@@ -139,7 +138,7 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
139 } else 138 } else
140 ticks = 1; 139 ticks = 1;
141 } 140 }
142 spin_unlock_irq(&ctx->lock); 141 spin_unlock_irq(&ctx->wqh.lock);
143 if (ticks) 142 if (ticks)
144 res = put_user(ticks, buf) ? -EFAULT: sizeof(ticks); 143 res = put_user(ticks, buf) ? -EFAULT: sizeof(ticks);
145 return res; 144 return res;
@@ -176,7 +175,6 @@ asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
176 return -ENOMEM; 175 return -ENOMEM;
177 176
178 init_waitqueue_head(&ctx->wqh); 177 init_waitqueue_head(&ctx->wqh);
179 spin_lock_init(&ctx->lock);
180 178
181 timerfd_setup(ctx, clockid, flags, &ktmr); 179 timerfd_setup(ctx, clockid, flags, &ktmr);
182 180
@@ -202,10 +200,10 @@ asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
202 * it to the new values. 200 * it to the new values.
203 */ 201 */
204 for (;;) { 202 for (;;) {
205 spin_lock_irq(&ctx->lock); 203 spin_lock_irq(&ctx->wqh.lock);
206 if (hrtimer_try_to_cancel(&ctx->tmr) >= 0) 204 if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
207 break; 205 break;
208 spin_unlock_irq(&ctx->lock); 206 spin_unlock_irq(&ctx->wqh.lock);
209 cpu_relax(); 207 cpu_relax();
210 } 208 }
211 /* 209 /*
@@ -213,7 +211,7 @@ asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
213 */ 211 */
214 timerfd_setup(ctx, clockid, flags, &ktmr); 212 timerfd_setup(ctx, clockid, flags, &ktmr);
215 213
216 spin_unlock_irq(&ctx->lock); 214 spin_unlock_irq(&ctx->wqh.lock);
217 fput(file); 215 fput(file);
218 } 216 }
219 217
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 40d5047defea..51b5764685e7 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -36,6 +36,7 @@
36#include <linux/smp_lock.h> 36#include <linux/smp_lock.h>
37#include <linux/pagemap.h> 37#include <linux/pagemap.h>
38#include <linux/buffer_head.h> 38#include <linux/buffer_head.h>
39#include <linux/aio.h>
39 40
40#include "udf_i.h" 41#include "udf_i.h"
41#include "udf_sb.h" 42#include "udf_sb.h"
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 91df4928651c..51fe307dc0ec 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -30,6 +30,7 @@
30#include <linux/quotaops.h> 30#include <linux/quotaops.h>
31#include <linux/smp_lock.h> 31#include <linux/smp_lock.h>
32#include <linux/buffer_head.h> 32#include <linux/buffer_head.h>
33#include <linux/sched.h>
33 34
34static inline int udf_match(int len1, const char *name1, int len2, const char *name2) 35static inline int udf_match(int len1, const char *name1, int len2, const char *name2)
35{ 36{
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 9b8644a06e53..3a743d854c17 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -134,10 +134,8 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
134{ 134{
135 struct udf_inode_info *ei = (struct udf_inode_info *) foo; 135 struct udf_inode_info *ei = (struct udf_inode_info *) foo;
136 136
137 if (flags & SLAB_CTOR_CONSTRUCTOR) { 137 ei->i_ext.i_data = NULL;
138 ei->i_ext.i_data = NULL; 138 inode_init_once(&ei->vfs_inode);
139 inode_init_once(&ei->vfs_inode);
140 }
141} 139}
142 140
143static int init_inodecache(void) 141static int init_inodecache(void)
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index be7c48c5f203..22ff6ed55ce9 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1237,8 +1237,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
1237{ 1237{
1238 struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; 1238 struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
1239 1239
1240 if (flags & SLAB_CTOR_CONSTRUCTOR) 1240 inode_init_once(&ei->vfs_inode);
1241 inode_init_once(&ei->vfs_inode);
1242} 1241}
1243 1242
1244static int init_inodecache(void) 1243static int init_inodecache(void)
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 14e2cbe5a8d5..bf9a9d5909be 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -360,8 +360,7 @@ xfs_fs_inode_init_once(
360 kmem_zone_t *zonep, 360 kmem_zone_t *zonep,
361 unsigned long flags) 361 unsigned long flags)
362{ 362{
363 if (flags & SLAB_CTOR_CONSTRUCTOR) 363 inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
364 inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
365} 364}
366 365
367STATIC int 366STATIC int