aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/Locking31
-rw-r--r--Documentation/filesystems/sharedsubtree.txt4
-rw-r--r--drivers/char/mem.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c1
-rw-r--r--drivers/misc/ibmasm/ibmasmfs.c1
-rw-r--r--drivers/oprofile/oprofilefs.c1
-rw-r--r--drivers/staging/pohmelfs/inode.c6
-rw-r--r--drivers/usb/core/inode.c1
-rw-r--r--drivers/usb/gadget/f_fs.c1
-rw-r--r--drivers/usb/gadget/inode.c1
-rw-r--r--fs/9p/vfs_inode.c5
-rw-r--r--fs/affs/file.c4
-rw-r--r--fs/affs/inode.c2
-rw-r--r--fs/afs/dir.c2
-rw-r--r--fs/aio.c14
-rw-r--r--fs/anon_inodes.c6
-rw-r--r--fs/autofs4/inode.c1
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/binfmt_misc.c1
-rw-r--r--fs/block_dev.c34
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/buffer.c26
-rw-r--r--fs/coda/dir.c2
-rw-r--r--fs/configfs/inode.c1
-rw-r--r--fs/dcache.c277
-rw-r--r--fs/debugfs/inode.c1
-rw-r--r--fs/exofs/file.c6
-rw-r--r--fs/exofs/namei.c2
-rw-r--r--fs/exportfs/expfs.c17
-rw-r--r--fs/ext2/dir.c2
-rw-r--r--fs/ext2/ext2.h1
-rw-r--r--fs/ext2/inode.c11
-rw-r--r--fs/ext2/namei.c2
-rw-r--r--fs/ext2/super.c4
-rw-r--r--fs/ext2/xattr.c2
-rw-r--r--fs/ext3/inode.c4
-rw-r--r--fs/ext3/namei.c2
-rw-r--r--fs/ext4/inode.c11
-rw-r--r--fs/ext4/mballoc.c1
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/freevxfs/vxfs_inode.c1
-rw-r--r--fs/fs-writeback.c80
-rw-r--r--fs/fuse/control.c1
-rw-r--r--fs/gfs2/aops.c3
-rw-r--r--fs/gfs2/ops_fstype.c1
-rw-r--r--fs/gfs2/ops_inode.c8
-rw-r--r--fs/gfs2/super.c1
-rw-r--r--fs/hfs/hfs_fs.h13
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfs/mdb.c4
-rw-r--r--fs/hfs/super.c1
-rw-r--r--fs/hfsplus/dir.c2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hugetlbfs/inode.c1
-rw-r--r--fs/inode.c526
-rw-r--r--fs/internal.h7
-rw-r--r--fs/isofs/inode.c17
-rw-r--r--fs/jffs2/dir.c4
-rw-r--r--fs/jfs/jfs_imap.c2
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/jfs/namei.c2
-rw-r--r--fs/libfs.c8
-rw-r--r--fs/logfs/dir.c2
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/namei.c16
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfsd/vfs.c16
-rw-r--r--fs/nilfs2/namei.c2
-rw-r--r--fs/notify/fsnotify.c33
-rw-r--r--fs/notify/inode_mark.c2
-rw-r--r--fs/ntfs/super.c19
-rw-r--r--fs/ocfs2/aops.c19
-rw-r--r--fs/ocfs2/aops.h3
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c2
-rw-r--r--fs/ocfs2/file.c9
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/proc_sysctl.c2
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/read_write.c28
-rw-r--r--fs/reiserfs/inode.c24
-rw-r--r--fs/reiserfs/ioctl.c6
-rw-r--r--fs/reiserfs/namei.c2
-rw-r--r--fs/reiserfs/xattr.c7
-rw-r--r--fs/seq_file.c2
-rw-r--r--fs/smbfs/dir.c16
-rw-r--r--fs/smbfs/inode.c1
-rw-r--r--fs/smbfs/proc.c10
-rw-r--r--fs/super.c8
-rw-r--r--fs/sysv/namei.c2
-rw-r--r--fs/ubifs/dir.c2
-rw-r--r--fs/udf/namei.c2
-rw-r--r--fs/ufs/namei.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--include/linux/buffer_head.h1
-rw-r--r--include/linux/fs.h39
-rw-r--r--include/linux/list.h6
-rw-r--r--include/linux/reiserfs_fs.h2
-rw-r--r--include/linux/writeback.h2
-rw-r--r--ipc/mqueue.c3
-rw-r--r--kernel/cgroup.c1
-rw-r--r--kernel/futex.c2
-rw-r--r--kernel/sysctl.c6
-rw-r--r--mm/backing-dev.c6
-rw-r--r--mm/shmem.c7
-rw-r--r--net/socket.c3
-rw-r--r--net/sunrpc/rpc_pipe.c1
-rw-r--r--security/apparmor/path.c2
-rw-r--r--security/inode.c1
-rw-r--r--security/selinux/selinuxfs.c1
-rw-r--r--security/tomoyo/realpath.c2
118 files changed, 851 insertions, 689 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 2db4283efa8d..8a817f656f0a 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -349,21 +349,36 @@ call this method upon the IO completion.
349 349
350--------------------------- block_device_operations ----------------------- 350--------------------------- block_device_operations -----------------------
351prototypes: 351prototypes:
352 int (*open) (struct inode *, struct file *); 352 int (*open) (struct block_device *, fmode_t);
353 int (*release) (struct inode *, struct file *); 353 int (*release) (struct gendisk *, fmode_t);
354 int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); 354 int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
355 int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
356 int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *);
355 int (*media_changed) (struct gendisk *); 357 int (*media_changed) (struct gendisk *);
358 void (*unlock_native_capacity) (struct gendisk *);
356 int (*revalidate_disk) (struct gendisk *); 359 int (*revalidate_disk) (struct gendisk *);
360 int (*getgeo)(struct block_device *, struct hd_geometry *);
361 void (*swap_slot_free_notify) (struct block_device *, unsigned long);
357 362
358locking rules: 363locking rules:
359 BKL bd_sem 364 BKL bd_mutex
360open: yes yes 365open: no yes
361release: yes yes 366release: no yes
362ioctl: yes no 367ioctl: no no
368compat_ioctl: no no
369direct_access: no no
363media_changed: no no 370media_changed: no no
371unlock_native_capacity: no no
364revalidate_disk: no no 372revalidate_disk: no no
373getgeo: no no
374swap_slot_free_notify: no no (see below)
375
376media_changed, unlock_native_capacity and revalidate_disk are called only from
377check_disk_change().
378
379swap_slot_free_notify is called with swap_lock and sometimes the page lock
380held.
365 381
366The last two are called only from check_disk_change().
367 382
368--------------------------- file_operations ------------------------------- 383--------------------------- file_operations -------------------------------
369prototypes: 384prototypes:
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt
index fc0e39af43c3..4ede421c9687 100644
--- a/Documentation/filesystems/sharedsubtree.txt
+++ b/Documentation/filesystems/sharedsubtree.txt
@@ -62,10 +62,10 @@ replicas continue to be exactly same.
62 # mount /dev/sd0 /tmp/a 62 # mount /dev/sd0 /tmp/a
63 63
64 #ls /tmp/a 64 #ls /tmp/a
65 t1 t2 t2 65 t1 t2 t3
66 66
67 #ls /mnt/a 67 #ls /mnt/a
68 t1 t2 t2 68 t1 t2 t3
69 69
70 Note that the mount has propagated to the mount at /mnt as well. 70 Note that the mount has propagated to the mount at /mnt as well.
71 71
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index e985b1c2730e..1256454b2d43 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -876,6 +876,10 @@ static int memory_open(struct inode *inode, struct file *filp)
876 if (dev->dev_info) 876 if (dev->dev_info)
877 filp->f_mapping->backing_dev_info = dev->dev_info; 877 filp->f_mapping->backing_dev_info = dev->dev_info;
878 878
879 /* Is /dev/mem or /dev/kmem ? */
880 if (dev->dev_info == &directly_mappable_cdev_bdi)
881 filp->f_mode |= FMODE_UNSIGNED_OFFSET;
882
879 if (dev->fops->open) 883 if (dev->fops->open)
880 return dev->fops->open(inode, filp); 884 return dev->fops->open(inode, filp);
881 885
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index d13e72685dcf..12d5bf76302c 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -57,6 +57,7 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
57 goto bail; 57 goto bail;
58 } 58 }
59 59
60 inode->i_ino = get_next_ino();
60 inode->i_mode = mode; 61 inode->i_mode = mode;
61 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 62 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
62 inode->i_private = data; 63 inode->i_private = data;
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index a0e6613e8be6..7e433d75c775 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -58,6 +58,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
58 goto bail; 58 goto bail;
59 } 59 }
60 60
61 inode->i_ino = get_next_ino();
61 inode->i_mode = mode; 62 inode->i_mode = mode;
62 inode->i_uid = 0; 63 inode->i_uid = 0;
63 inode->i_gid = 0; 64 inode->i_gid = 0;
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index af2497ae5fe3..0a53500636c9 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -146,6 +146,7 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode)
146 struct inode *ret = new_inode(sb); 146 struct inode *ret = new_inode(sb);
147 147
148 if (ret) { 148 if (ret) {
149 ret->i_ino = get_next_ino();
149 ret->i_mode = mode; 150 ret->i_mode = mode;
150 ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; 151 ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
151 } 152 }
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index 95f711b251ad..449de59bf35b 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -28,6 +28,7 @@ static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode)
28 struct inode *inode = new_inode(sb); 28 struct inode *inode = new_inode(sb);
29 29
30 if (inode) { 30 if (inode) {
31 inode->i_ino = get_next_ino();
31 inode->i_mode = mode; 32 inode->i_mode = mode;
32 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 33 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
33 } 34 }
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index 97dae297ca3c..c62d30017c07 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -882,12 +882,8 @@ static struct inode *pohmelfs_alloc_inode(struct super_block *sb)
882static int pohmelfs_fsync(struct file *file, int datasync) 882static int pohmelfs_fsync(struct file *file, int datasync)
883{ 883{
884 struct inode *inode = file->f_mapping->host; 884 struct inode *inode = file->f_mapping->host;
885 struct writeback_control wbc = {
886 .sync_mode = WB_SYNC_ALL,
887 .nr_to_write = 0, /* sys_fsync did this */
888 };
889 885
890 return sync_inode(inode, &wbc); 886 return sync_inode_metadata(inode, 1);
891} 887}
892 888
893ssize_t pohmelfs_write(struct file *file, const char __user *buf, 889ssize_t pohmelfs_write(struct file *file, const char __user *buf,
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 095fa5366690..e2f63c0ea09d 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -276,6 +276,7 @@ static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t de
276 struct inode *inode = new_inode(sb); 276 struct inode *inode = new_inode(sb);
277 277
278 if (inode) { 278 if (inode) {
279 inode->i_ino = get_next_ino();
279 inode->i_mode = mode; 280 inode->i_mode = mode;
280 inode->i_uid = current_fsuid(); 281 inode->i_uid = current_fsuid();
281 inode->i_gid = current_fsgid(); 282 inode->i_gid = current_fsgid();
diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c
index e4f595055208..e093fd8d04d3 100644
--- a/drivers/usb/gadget/f_fs.c
+++ b/drivers/usb/gadget/f_fs.c
@@ -980,6 +980,7 @@ ffs_sb_make_inode(struct super_block *sb, void *data,
980 if (likely(inode)) { 980 if (likely(inode)) {
981 struct timespec current_time = CURRENT_TIME; 981 struct timespec current_time = CURRENT_TIME;
982 982
983 inode->i_ino = usbfs_get_inode();
983 inode->i_mode = perms->mode; 984 inode->i_mode = perms->mode;
984 inode->i_uid = perms->uid; 985 inode->i_uid = perms->uid;
985 inode->i_gid = perms->gid; 986 inode->i_gid = perms->gid;
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index d1d72d946b04..ba145e7fbe03 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -1991,6 +1991,7 @@ gadgetfs_make_inode (struct super_block *sb,
1991 struct inode *inode = new_inode (sb); 1991 struct inode *inode = new_inode (sb);
1992 1992
1993 if (inode) { 1993 if (inode) {
1994 inode->i_ino = get_next_ino();
1994 inode->i_mode = mode; 1995 inode->i_mode = mode;
1995 inode->i_uid = default_uid; 1996 inode->i_uid = default_uid;
1996 inode->i_gid = default_gid; 1997 inode->i_gid = default_gid;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 9e670d527646..ef5905f7c8a3 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1789,9 +1789,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
1789 kfree(st); 1789 kfree(st);
1790 } else { 1790 } else {
1791 /* Caching disabled. No need to get upto date stat info. 1791 /* Caching disabled. No need to get upto date stat info.
1792 * This dentry will be released immediately. So, just i_count++ 1792 * This dentry will be released immediately. So, just hold the
1793 * inode
1793 */ 1794 */
1794 atomic_inc(&old_dentry->d_inode->i_count); 1795 ihold(old_dentry->d_inode);
1795 } 1796 }
1796 1797
1797 dentry->d_op = old_dentry->d_op; 1798 dentry->d_op = old_dentry->d_op;
diff --git a/fs/affs/file.c b/fs/affs/file.c
index c4a9875bd1a6..0a90dcd46de2 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -894,9 +894,9 @@ affs_truncate(struct inode *inode)
894 if (AFFS_SB(sb)->s_flags & SF_OFS) { 894 if (AFFS_SB(sb)->s_flags & SF_OFS) {
895 struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0); 895 struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0);
896 u32 tmp; 896 u32 tmp;
897 if (IS_ERR(ext_bh)) { 897 if (IS_ERR(bh)) {
898 affs_warning(sb, "truncate", "unexpected read error for last block %u (%d)", 898 affs_warning(sb, "truncate", "unexpected read error for last block %u (%d)",
899 ext, PTR_ERR(ext_bh)); 899 ext, PTR_ERR(bh));
900 return; 900 return;
901 } 901 }
902 tmp = be32_to_cpu(AFFS_DATA_HEAD(bh)->next); 902 tmp = be32_to_cpu(AFFS_DATA_HEAD(bh)->next);
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 3a0fdec175ba..5d828903ac69 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -388,7 +388,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
388 affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain)); 388 affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
389 mark_buffer_dirty_inode(inode_bh, inode); 389 mark_buffer_dirty_inode(inode_bh, inode);
390 inode->i_nlink = 2; 390 inode->i_nlink = 2;
391 atomic_inc(&inode->i_count); 391 ihold(inode);
392 } 392 }
393 affs_fix_checksum(sb, bh); 393 affs_fix_checksum(sb, bh);
394 mark_buffer_dirty_inode(bh, inode); 394 mark_buffer_dirty_inode(bh, inode);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 0d38c09bd55e..5439e1bc9a86 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1045,7 +1045,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
1045 if (ret < 0) 1045 if (ret < 0)
1046 goto link_error; 1046 goto link_error;
1047 1047
1048 atomic_inc(&vnode->vfs_inode.i_count); 1048 ihold(&vnode->vfs_inode);
1049 d_instantiate(dentry, &vnode->vfs_inode); 1049 d_instantiate(dentry, &vnode->vfs_inode);
1050 key_put(key); 1050 key_put(key);
1051 _leave(" = 0"); 1051 _leave(" = 0");
diff --git a/fs/aio.c b/fs/aio.c
index 250b0a73c8a8..8c8f6c5b6d79 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1543,7 +1543,19 @@ static void aio_batch_add(struct address_space *mapping,
1543 } 1543 }
1544 1544
1545 abe = mempool_alloc(abe_pool, GFP_KERNEL); 1545 abe = mempool_alloc(abe_pool, GFP_KERNEL);
1546 BUG_ON(!igrab(mapping->host)); 1546
1547 /*
1548 * we should be using igrab here, but
1549 * we don't want to hammer on the global
1550 * inode spinlock just to take an extra
1551 * reference on a file that we must already
1552 * have a reference to.
1553 *
1554 * When we're called, we always have a reference
1555 * on the file, so we must always have a reference
1556 * on the inode, so ihold() is safe here.
1557 */
1558 ihold(mapping->host);
1547 abe->mapping = mapping; 1559 abe->mapping = mapping;
1548 hlist_add_head(&abe->list, &batch_hash[bucket]); 1560 hlist_add_head(&abe->list, &batch_hash[bucket]);
1549 return; 1561 return;
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index e4b75d6eda83..5365527ca43f 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -111,10 +111,9 @@ struct file *anon_inode_getfile(const char *name,
111 path.mnt = mntget(anon_inode_mnt); 111 path.mnt = mntget(anon_inode_mnt);
112 /* 112 /*
113 * We know the anon_inode inode count is always greater than zero, 113 * We know the anon_inode inode count is always greater than zero,
114 * so we can avoid doing an igrab() and we can use an open-coded 114 * so ihold() is safe.
115 * atomic_inc().
116 */ 115 */
117 atomic_inc(&anon_inode_inode->i_count); 116 ihold(anon_inode_inode);
118 117
119 path.dentry->d_op = &anon_inodefs_dentry_operations; 118 path.dentry->d_op = &anon_inodefs_dentry_operations;
120 d_instantiate(path.dentry, anon_inode_inode); 119 d_instantiate(path.dentry, anon_inode_inode);
@@ -194,6 +193,7 @@ static struct inode *anon_inode_mkinode(void)
194 if (!inode) 193 if (!inode)
195 return ERR_PTR(-ENOMEM); 194 return ERR_PTR(-ENOMEM);
196 195
196 inode->i_ino = get_next_ino();
197 inode->i_fop = &anon_inode_fops; 197 inode->i_fop = &anon_inode_fops;
198 198
199 inode->i_mapping->a_ops = &anon_aops; 199 inode->i_mapping->a_ops = &anon_aops;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 821b2b955dac..ac87e49fa706 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -398,6 +398,7 @@ struct inode *autofs4_get_inode(struct super_block *sb,
398 inode->i_gid = sb->s_root->d_inode->i_gid; 398 inode->i_gid = sb->s_root->d_inode->i_gid;
399 } 399 }
400 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 400 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
401 inode->i_ino = get_next_ino();
401 402
402 if (S_ISDIR(inf->mode)) { 403 if (S_ISDIR(inf->mode)) {
403 inode->i_nlink = 2; 404 inode->i_nlink = 2;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index d967e052b779..685ecff3ab31 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -176,7 +176,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
176 inc_nlink(inode); 176 inc_nlink(inode);
177 inode->i_ctime = CURRENT_TIME_SEC; 177 inode->i_ctime = CURRENT_TIME_SEC;
178 mark_inode_dirty(inode); 178 mark_inode_dirty(inode);
179 atomic_inc(&inode->i_count); 179 ihold(inode);
180 d_instantiate(new, inode); 180 d_instantiate(new, inode);
181 mutex_unlock(&info->bfs_lock); 181 mutex_unlock(&info->bfs_lock);
182 return 0; 182 return 0;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 139fc8083f53..29990f0eee0c 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -495,6 +495,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
495 struct inode * inode = new_inode(sb); 495 struct inode * inode = new_inode(sb);
496 496
497 if (inode) { 497 if (inode) {
498 inode->i_ino = get_next_ino();
498 inode->i_mode = mode; 499 inode->i_mode = mode;
499 inode->i_atime = inode->i_mtime = inode->i_ctime = 500 inode->i_atime = inode->i_mtime = inode->i_ctime =
500 current_fs_time(inode->i_sb); 501 current_fs_time(inode->i_sb);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b737451e2e9d..dea3b628a6ce 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -48,6 +48,21 @@ inline struct block_device *I_BDEV(struct inode *inode)
48 48
49EXPORT_SYMBOL(I_BDEV); 49EXPORT_SYMBOL(I_BDEV);
50 50
51/*
52 * move the inode from it's current bdi to the a new bdi. if the inode is dirty
53 * we need to move it onto the dirty list of @dst so that the inode is always
54 * on the right list.
55 */
56static void bdev_inode_switch_bdi(struct inode *inode,
57 struct backing_dev_info *dst)
58{
59 spin_lock(&inode_lock);
60 inode->i_data.backing_dev_info = dst;
61 if (inode->i_state & I_DIRTY)
62 list_move(&inode->i_wb_list, &dst->wb.b_dirty);
63 spin_unlock(&inode_lock);
64}
65
51static sector_t max_block(struct block_device *bdev) 66static sector_t max_block(struct block_device *bdev)
52{ 67{
53 sector_t retval = ~((sector_t)0); 68 sector_t retval = ~((sector_t)0);
@@ -550,7 +565,7 @@ EXPORT_SYMBOL(bdget);
550 */ 565 */
551struct block_device *bdgrab(struct block_device *bdev) 566struct block_device *bdgrab(struct block_device *bdev)
552{ 567{
553 atomic_inc(&bdev->bd_inode->i_count); 568 ihold(bdev->bd_inode);
554 return bdev; 569 return bdev;
555} 570}
556 571
@@ -580,7 +595,7 @@ static struct block_device *bd_acquire(struct inode *inode)
580 spin_lock(&bdev_lock); 595 spin_lock(&bdev_lock);
581 bdev = inode->i_bdev; 596 bdev = inode->i_bdev;
582 if (bdev) { 597 if (bdev) {
583 atomic_inc(&bdev->bd_inode->i_count); 598 ihold(bdev->bd_inode);
584 spin_unlock(&bdev_lock); 599 spin_unlock(&bdev_lock);
585 return bdev; 600 return bdev;
586 } 601 }
@@ -591,12 +606,12 @@ static struct block_device *bd_acquire(struct inode *inode)
591 spin_lock(&bdev_lock); 606 spin_lock(&bdev_lock);
592 if (!inode->i_bdev) { 607 if (!inode->i_bdev) {
593 /* 608 /*
594 * We take an additional bd_inode->i_count for inode, 609 * We take an additional reference to bd_inode,
595 * and it's released in clear_inode() of inode. 610 * and it's released in clear_inode() of inode.
596 * So, we can access it via ->i_mapping always 611 * So, we can access it via ->i_mapping always
597 * without igrab(). 612 * without igrab().
598 */ 613 */
599 atomic_inc(&bdev->bd_inode->i_count); 614 ihold(bdev->bd_inode);
600 inode->i_bdev = bdev; 615 inode->i_bdev = bdev;
601 inode->i_mapping = bdev->bd_inode->i_mapping; 616 inode->i_mapping = bdev->bd_inode->i_mapping;
602 list_add(&inode->i_devices, &bdev->bd_inodes); 617 list_add(&inode->i_devices, &bdev->bd_inodes);
@@ -1390,7 +1405,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1390 bdi = blk_get_backing_dev_info(bdev); 1405 bdi = blk_get_backing_dev_info(bdev);
1391 if (bdi == NULL) 1406 if (bdi == NULL)
1392 bdi = &default_backing_dev_info; 1407 bdi = &default_backing_dev_info;
1393 bdev->bd_inode->i_data.backing_dev_info = bdi; 1408 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1394 } 1409 }
1395 if (bdev->bd_invalidated) 1410 if (bdev->bd_invalidated)
1396 rescan_partitions(disk, bdev); 1411 rescan_partitions(disk, bdev);
@@ -1405,8 +1420,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1405 if (ret) 1420 if (ret)
1406 goto out_clear; 1421 goto out_clear;
1407 bdev->bd_contains = whole; 1422 bdev->bd_contains = whole;
1408 bdev->bd_inode->i_data.backing_dev_info = 1423 bdev_inode_switch_bdi(bdev->bd_inode,
1409 whole->bd_inode->i_data.backing_dev_info; 1424 whole->bd_inode->i_data.backing_dev_info);
1410 bdev->bd_part = disk_get_part(disk, partno); 1425 bdev->bd_part = disk_get_part(disk, partno);
1411 if (!(disk->flags & GENHD_FL_UP) || 1426 if (!(disk->flags & GENHD_FL_UP) ||
1412 !bdev->bd_part || !bdev->bd_part->nr_sects) { 1427 !bdev->bd_part || !bdev->bd_part->nr_sects) {
@@ -1439,7 +1454,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1439 disk_put_part(bdev->bd_part); 1454 disk_put_part(bdev->bd_part);
1440 bdev->bd_disk = NULL; 1455 bdev->bd_disk = NULL;
1441 bdev->bd_part = NULL; 1456 bdev->bd_part = NULL;
1442 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1457 bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
1443 if (bdev != bdev->bd_contains) 1458 if (bdev != bdev->bd_contains)
1444 __blkdev_put(bdev->bd_contains, mode, 1); 1459 __blkdev_put(bdev->bd_contains, mode, 1);
1445 bdev->bd_contains = NULL; 1460 bdev->bd_contains = NULL;
@@ -1533,7 +1548,8 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1533 disk_put_part(bdev->bd_part); 1548 disk_put_part(bdev->bd_part);
1534 bdev->bd_part = NULL; 1549 bdev->bd_part = NULL;
1535 bdev->bd_disk = NULL; 1550 bdev->bd_disk = NULL;
1536 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1551 bdev_inode_switch_bdi(bdev->bd_inode,
1552 &default_backing_dev_info);
1537 if (bdev != bdev->bd_contains) 1553 if (bdev != bdev->bd_contains)
1538 victim = bdev->bd_contains; 1554 victim = bdev->bd_contains;
1539 bdev->bd_contains = NULL; 1555 bdev->bd_contains = NULL;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c03864406af3..64f99cf69ce0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3849,7 +3849,7 @@ again:
3849 p = &root->inode_tree.rb_node; 3849 p = &root->inode_tree.rb_node;
3850 parent = NULL; 3850 parent = NULL;
3851 3851
3852 if (hlist_unhashed(&inode->i_hash)) 3852 if (inode_unhashed(inode))
3853 return; 3853 return;
3854 3854
3855 spin_lock(&root->inode_lock); 3855 spin_lock(&root->inode_lock);
@@ -4758,7 +4758,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4758 } 4758 }
4759 4759
4760 btrfs_set_trans_block_group(trans, dir); 4760 btrfs_set_trans_block_group(trans, dir);
4761 atomic_inc(&inode->i_count); 4761 ihold(inode);
4762 4762
4763 err = btrfs_add_nondir(trans, dentry, inode, 1, index); 4763 err = btrfs_add_nondir(trans, dentry, inode, 1, index);
4764 4764
diff --git a/fs/buffer.c b/fs/buffer.c
index 8d595ab2aed1..5930e382959b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1833,9 +1833,11 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1833} 1833}
1834EXPORT_SYMBOL(page_zero_new_buffers); 1834EXPORT_SYMBOL(page_zero_new_buffers);
1835 1835
1836int block_prepare_write(struct page *page, unsigned from, unsigned to, 1836int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1837 get_block_t *get_block) 1837 get_block_t *get_block)
1838{ 1838{
1839 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1840 unsigned to = from + len;
1839 struct inode *inode = page->mapping->host; 1841 struct inode *inode = page->mapping->host;
1840 unsigned block_start, block_end; 1842 unsigned block_start, block_end;
1841 sector_t block; 1843 sector_t block;
@@ -1915,7 +1917,7 @@ int block_prepare_write(struct page *page, unsigned from, unsigned to,
1915 } 1917 }
1916 return err; 1918 return err;
1917} 1919}
1918EXPORT_SYMBOL(block_prepare_write); 1920EXPORT_SYMBOL(__block_write_begin);
1919 1921
1920static int __block_commit_write(struct inode *inode, struct page *page, 1922static int __block_commit_write(struct inode *inode, struct page *page,
1921 unsigned from, unsigned to) 1923 unsigned from, unsigned to)
@@ -1952,15 +1954,6 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1952 return 0; 1954 return 0;
1953} 1955}
1954 1956
1955int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1956 get_block_t *get_block)
1957{
1958 unsigned start = pos & (PAGE_CACHE_SIZE - 1);
1959
1960 return block_prepare_write(page, start, start + len, get_block);
1961}
1962EXPORT_SYMBOL(__block_write_begin);
1963
1964/* 1957/*
1965 * block_write_begin takes care of the basic task of block allocation and 1958 * block_write_begin takes care of the basic task of block allocation and
1966 * bringing partial write blocks uptodate first. 1959 * bringing partial write blocks uptodate first.
@@ -2378,7 +2371,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2378 else 2371 else
2379 end = PAGE_CACHE_SIZE; 2372 end = PAGE_CACHE_SIZE;
2380 2373
2381 ret = block_prepare_write(page, 0, end, get_block); 2374 ret = __block_write_begin(page, 0, end, get_block);
2382 if (!ret) 2375 if (!ret)
2383 ret = block_commit_write(page, 0, end); 2376 ret = block_commit_write(page, 0, end);
2384 2377
@@ -2465,11 +2458,10 @@ int nobh_write_begin(struct address_space *mapping,
2465 *fsdata = NULL; 2458 *fsdata = NULL;
2466 2459
2467 if (page_has_buffers(page)) { 2460 if (page_has_buffers(page)) {
2468 unlock_page(page); 2461 ret = __block_write_begin(page, pos, len, get_block);
2469 page_cache_release(page); 2462 if (unlikely(ret))
2470 *pagep = NULL; 2463 goto out_release;
2471 return block_write_begin(mapping, pos, len, flags, pagep, 2464 return ret;
2472 get_block);
2473 } 2465 }
2474 2466
2475 if (PageMappedToDisk(page)) 2467 if (PageMappedToDisk(page))
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 96fbeab77f2f..5d8b35539601 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -276,7 +276,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode,
276 } 276 }
277 277
278 coda_dir_update_mtime(dir_inode); 278 coda_dir_update_mtime(dir_inode);
279 atomic_inc(&inode->i_count); 279 ihold(inode);
280 d_instantiate(de, inode); 280 d_instantiate(de, inode);
281 inc_nlink(inode); 281 inc_nlink(inode);
282 return 0; 282 return 0;
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index cf78d44a8d6a..253476d78ed8 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -135,6 +135,7 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
135{ 135{
136 struct inode * inode = new_inode(configfs_sb); 136 struct inode * inode = new_inode(configfs_sb);
137 if (inode) { 137 if (inode) {
138 inode->i_ino = get_next_ino();
138 inode->i_mapping->a_ops = &configfs_aops; 139 inode->i_mapping->a_ops = &configfs_aops;
139 inode->i_mapping->backing_dev_info = &configfs_backing_dev_info; 140 inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
140 inode->i_op = &configfs_inode_operations; 141 inode->i_op = &configfs_inode_operations;
diff --git a/fs/dcache.c b/fs/dcache.c
index 83293be48149..23702a9d4e6d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -67,33 +67,43 @@ struct dentry_stat_t dentry_stat = {
67 .age_limit = 45, 67 .age_limit = 45,
68}; 68};
69 69
70static void __d_free(struct dentry *dentry) 70static struct percpu_counter nr_dentry __cacheline_aligned_in_smp;
71static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp;
72
73#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
74int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
75 size_t *lenp, loff_t *ppos)
76{
77 dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry);
78 dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
79 return proc_dointvec(table, write, buffer, lenp, ppos);
80}
81#endif
82
83static void __d_free(struct rcu_head *head)
71{ 84{
85 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
86
72 WARN_ON(!list_empty(&dentry->d_alias)); 87 WARN_ON(!list_empty(&dentry->d_alias));
73 if (dname_external(dentry)) 88 if (dname_external(dentry))
74 kfree(dentry->d_name.name); 89 kfree(dentry->d_name.name);
75 kmem_cache_free(dentry_cache, dentry); 90 kmem_cache_free(dentry_cache, dentry);
76} 91}
77 92
78static void d_callback(struct rcu_head *head)
79{
80 struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
81 __d_free(dentry);
82}
83
84/* 93/*
85 * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry 94 * no dcache_lock, please.
86 * inside dcache_lock.
87 */ 95 */
88static void d_free(struct dentry *dentry) 96static void d_free(struct dentry *dentry)
89{ 97{
98 percpu_counter_dec(&nr_dentry);
90 if (dentry->d_op && dentry->d_op->d_release) 99 if (dentry->d_op && dentry->d_op->d_release)
91 dentry->d_op->d_release(dentry); 100 dentry->d_op->d_release(dentry);
101
92 /* if dentry was never inserted into hash, immediate free is OK */ 102 /* if dentry was never inserted into hash, immediate free is OK */
93 if (hlist_unhashed(&dentry->d_hash)) 103 if (hlist_unhashed(&dentry->d_hash))
94 __d_free(dentry); 104 __d_free(&dentry->d_u.d_rcu);
95 else 105 else
96 call_rcu(&dentry->d_u.d_rcu, d_callback); 106 call_rcu(&dentry->d_u.d_rcu, __d_free);
97} 107}
98 108
99/* 109/*
@@ -123,37 +133,34 @@ static void dentry_iput(struct dentry * dentry)
123} 133}
124 134
125/* 135/*
126 * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held. 136 * dentry_lru_(add|del|move_tail) must be called with dcache_lock held.
127 */ 137 */
128static void dentry_lru_add(struct dentry *dentry) 138static void dentry_lru_add(struct dentry *dentry)
129{ 139{
130 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 140 if (list_empty(&dentry->d_lru)) {
131 dentry->d_sb->s_nr_dentry_unused++; 141 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
132 dentry_stat.nr_unused++; 142 dentry->d_sb->s_nr_dentry_unused++;
133} 143 percpu_counter_inc(&nr_dentry_unused);
134 144 }
135static void dentry_lru_add_tail(struct dentry *dentry)
136{
137 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
138 dentry->d_sb->s_nr_dentry_unused++;
139 dentry_stat.nr_unused++;
140} 145}
141 146
142static void dentry_lru_del(struct dentry *dentry) 147static void dentry_lru_del(struct dentry *dentry)
143{ 148{
144 if (!list_empty(&dentry->d_lru)) { 149 if (!list_empty(&dentry->d_lru)) {
145 list_del(&dentry->d_lru); 150 list_del_init(&dentry->d_lru);
146 dentry->d_sb->s_nr_dentry_unused--; 151 dentry->d_sb->s_nr_dentry_unused--;
147 dentry_stat.nr_unused--; 152 percpu_counter_dec(&nr_dentry_unused);
148 } 153 }
149} 154}
150 155
151static void dentry_lru_del_init(struct dentry *dentry) 156static void dentry_lru_move_tail(struct dentry *dentry)
152{ 157{
153 if (likely(!list_empty(&dentry->d_lru))) { 158 if (list_empty(&dentry->d_lru)) {
154 list_del_init(&dentry->d_lru); 159 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
155 dentry->d_sb->s_nr_dentry_unused--; 160 dentry->d_sb->s_nr_dentry_unused++;
156 dentry_stat.nr_unused--; 161 percpu_counter_inc(&nr_dentry_unused);
162 } else {
163 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
157 } 164 }
158} 165}
159 166
@@ -172,7 +179,6 @@ static struct dentry *d_kill(struct dentry *dentry)
172 struct dentry *parent; 179 struct dentry *parent;
173 180
174 list_del(&dentry->d_u.d_child); 181 list_del(&dentry->d_u.d_child);
175 dentry_stat.nr_dentry--; /* For d_free, below */
176 /*drops the locks, at that point nobody can reach this dentry */ 182 /*drops the locks, at that point nobody can reach this dentry */
177 dentry_iput(dentry); 183 dentry_iput(dentry);
178 if (IS_ROOT(dentry)) 184 if (IS_ROOT(dentry))
@@ -237,13 +243,15 @@ repeat:
237 if (dentry->d_op->d_delete(dentry)) 243 if (dentry->d_op->d_delete(dentry))
238 goto unhash_it; 244 goto unhash_it;
239 } 245 }
246
240 /* Unreachable? Get rid of it */ 247 /* Unreachable? Get rid of it */
241 if (d_unhashed(dentry)) 248 if (d_unhashed(dentry))
242 goto kill_it; 249 goto kill_it;
243 if (list_empty(&dentry->d_lru)) { 250
244 dentry->d_flags |= DCACHE_REFERENCED; 251 /* Otherwise leave it cached and ensure it's on the LRU */
245 dentry_lru_add(dentry); 252 dentry->d_flags |= DCACHE_REFERENCED;
246 } 253 dentry_lru_add(dentry);
254
247 spin_unlock(&dentry->d_lock); 255 spin_unlock(&dentry->d_lock);
248 spin_unlock(&dcache_lock); 256 spin_unlock(&dcache_lock);
249 return; 257 return;
@@ -318,11 +326,10 @@ int d_invalidate(struct dentry * dentry)
318EXPORT_SYMBOL(d_invalidate); 326EXPORT_SYMBOL(d_invalidate);
319 327
320/* This should be called _only_ with dcache_lock held */ 328/* This should be called _only_ with dcache_lock held */
321
322static inline struct dentry * __dget_locked(struct dentry *dentry) 329static inline struct dentry * __dget_locked(struct dentry *dentry)
323{ 330{
324 atomic_inc(&dentry->d_count); 331 atomic_inc(&dentry->d_count);
325 dentry_lru_del_init(dentry); 332 dentry_lru_del(dentry);
326 return dentry; 333 return dentry;
327} 334}
328 335
@@ -441,73 +448,27 @@ static void prune_one_dentry(struct dentry * dentry)
441 448
442 if (dentry->d_op && dentry->d_op->d_delete) 449 if (dentry->d_op && dentry->d_op->d_delete)
443 dentry->d_op->d_delete(dentry); 450 dentry->d_op->d_delete(dentry);
444 dentry_lru_del_init(dentry); 451 dentry_lru_del(dentry);
445 __d_drop(dentry); 452 __d_drop(dentry);
446 dentry = d_kill(dentry); 453 dentry = d_kill(dentry);
447 spin_lock(&dcache_lock); 454 spin_lock(&dcache_lock);
448 } 455 }
449} 456}
450 457
451/* 458static void shrink_dentry_list(struct list_head *list)
452 * Shrink the dentry LRU on a given superblock.
453 * @sb : superblock to shrink dentry LRU.
454 * @count: If count is NULL, we prune all dentries on superblock.
455 * @flags: If flags is non-zero, we need to do special processing based on
456 * which flags are set. This means we don't need to maintain multiple
457 * similar copies of this loop.
458 */
459static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
460{ 459{
461 LIST_HEAD(referenced);
462 LIST_HEAD(tmp);
463 struct dentry *dentry; 460 struct dentry *dentry;
464 int cnt = 0;
465 461
466 BUG_ON(!sb); 462 while (!list_empty(list)) {
467 BUG_ON((flags & DCACHE_REFERENCED) && count == NULL); 463 dentry = list_entry(list->prev, struct dentry, d_lru);
468 spin_lock(&dcache_lock); 464 dentry_lru_del(dentry);
469 if (count != NULL)
470 /* called from prune_dcache() and shrink_dcache_parent() */
471 cnt = *count;
472restart:
473 if (count == NULL)
474 list_splice_init(&sb->s_dentry_lru, &tmp);
475 else {
476 while (!list_empty(&sb->s_dentry_lru)) {
477 dentry = list_entry(sb->s_dentry_lru.prev,
478 struct dentry, d_lru);
479 BUG_ON(dentry->d_sb != sb);
480 465
481 spin_lock(&dentry->d_lock);
482 /*
483 * If we are honouring the DCACHE_REFERENCED flag and
484 * the dentry has this flag set, don't free it. Clear
485 * the flag and put it back on the LRU.
486 */
487 if ((flags & DCACHE_REFERENCED)
488 && (dentry->d_flags & DCACHE_REFERENCED)) {
489 dentry->d_flags &= ~DCACHE_REFERENCED;
490 list_move(&dentry->d_lru, &referenced);
491 spin_unlock(&dentry->d_lock);
492 } else {
493 list_move_tail(&dentry->d_lru, &tmp);
494 spin_unlock(&dentry->d_lock);
495 cnt--;
496 if (!cnt)
497 break;
498 }
499 cond_resched_lock(&dcache_lock);
500 }
501 }
502 while (!list_empty(&tmp)) {
503 dentry = list_entry(tmp.prev, struct dentry, d_lru);
504 dentry_lru_del_init(dentry);
505 spin_lock(&dentry->d_lock);
506 /* 466 /*
507 * We found an inuse dentry which was not removed from 467 * We found an inuse dentry which was not removed from
508 * the LRU because of laziness during lookup. Do not free 468 * the LRU because of laziness during lookup. Do not free
509 * it - just keep it off the LRU list. 469 * it - just keep it off the LRU list.
510 */ 470 */
471 spin_lock(&dentry->d_lock);
511 if (atomic_read(&dentry->d_count)) { 472 if (atomic_read(&dentry->d_count)) {
512 spin_unlock(&dentry->d_lock); 473 spin_unlock(&dentry->d_lock);
513 continue; 474 continue;
@@ -516,13 +477,60 @@ restart:
516 /* dentry->d_lock was dropped in prune_one_dentry() */ 477 /* dentry->d_lock was dropped in prune_one_dentry() */
517 cond_resched_lock(&dcache_lock); 478 cond_resched_lock(&dcache_lock);
518 } 479 }
519 if (count == NULL && !list_empty(&sb->s_dentry_lru)) 480}
520 goto restart; 481
521 if (count != NULL) 482/**
522 *count = cnt; 483 * __shrink_dcache_sb - shrink the dentry LRU on a given superblock
484 * @sb: superblock to shrink dentry LRU.
485 * @count: number of entries to prune
486 * @flags: flags to control the dentry processing
487 *
488 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
489 */
490static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
491{
492 /* called from prune_dcache() and shrink_dcache_parent() */
493 struct dentry *dentry;
494 LIST_HEAD(referenced);
495 LIST_HEAD(tmp);
496 int cnt = *count;
497
498 spin_lock(&dcache_lock);
499 while (!list_empty(&sb->s_dentry_lru)) {
500 dentry = list_entry(sb->s_dentry_lru.prev,
501 struct dentry, d_lru);
502 BUG_ON(dentry->d_sb != sb);
503
504 /*
505 * If we are honouring the DCACHE_REFERENCED flag and the
506 * dentry has this flag set, don't free it. Clear the flag
507 * and put it back on the LRU.
508 */
509 if (flags & DCACHE_REFERENCED) {
510 spin_lock(&dentry->d_lock);
511 if (dentry->d_flags & DCACHE_REFERENCED) {
512 dentry->d_flags &= ~DCACHE_REFERENCED;
513 list_move(&dentry->d_lru, &referenced);
514 spin_unlock(&dentry->d_lock);
515 cond_resched_lock(&dcache_lock);
516 continue;
517 }
518 spin_unlock(&dentry->d_lock);
519 }
520
521 list_move_tail(&dentry->d_lru, &tmp);
522 if (!--cnt)
523 break;
524 cond_resched_lock(&dcache_lock);
525 }
526
527 *count = cnt;
528 shrink_dentry_list(&tmp);
529
523 if (!list_empty(&referenced)) 530 if (!list_empty(&referenced))
524 list_splice(&referenced, &sb->s_dentry_lru); 531 list_splice(&referenced, &sb->s_dentry_lru);
525 spin_unlock(&dcache_lock); 532 spin_unlock(&dcache_lock);
533
526} 534}
527 535
528/** 536/**
@@ -538,7 +546,7 @@ static void prune_dcache(int count)
538{ 546{
539 struct super_block *sb, *p = NULL; 547 struct super_block *sb, *p = NULL;
540 int w_count; 548 int w_count;
541 int unused = dentry_stat.nr_unused; 549 int unused = percpu_counter_sum_positive(&nr_dentry_unused);
542 int prune_ratio; 550 int prune_ratio;
543 int pruned; 551 int pruned;
544 552
@@ -608,13 +616,19 @@ static void prune_dcache(int count)
608 * shrink_dcache_sb - shrink dcache for a superblock 616 * shrink_dcache_sb - shrink dcache for a superblock
609 * @sb: superblock 617 * @sb: superblock
610 * 618 *
611 * Shrink the dcache for the specified super block. This 619 * Shrink the dcache for the specified super block. This is used to free
612 * is used to free the dcache before unmounting a file 620 * the dcache before unmounting a file system.
613 * system
614 */ 621 */
615void shrink_dcache_sb(struct super_block * sb) 622void shrink_dcache_sb(struct super_block *sb)
616{ 623{
617 __shrink_dcache_sb(sb, NULL, 0); 624 LIST_HEAD(tmp);
625
626 spin_lock(&dcache_lock);
627 while (!list_empty(&sb->s_dentry_lru)) {
628 list_splice_init(&sb->s_dentry_lru, &tmp);
629 shrink_dentry_list(&tmp);
630 }
631 spin_unlock(&dcache_lock);
618} 632}
619EXPORT_SYMBOL(shrink_dcache_sb); 633EXPORT_SYMBOL(shrink_dcache_sb);
620 634
@@ -632,7 +646,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
632 646
633 /* detach this root from the system */ 647 /* detach this root from the system */
634 spin_lock(&dcache_lock); 648 spin_lock(&dcache_lock);
635 dentry_lru_del_init(dentry); 649 dentry_lru_del(dentry);
636 __d_drop(dentry); 650 __d_drop(dentry);
637 spin_unlock(&dcache_lock); 651 spin_unlock(&dcache_lock);
638 652
@@ -646,7 +660,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
646 spin_lock(&dcache_lock); 660 spin_lock(&dcache_lock);
647 list_for_each_entry(loop, &dentry->d_subdirs, 661 list_for_each_entry(loop, &dentry->d_subdirs,
648 d_u.d_child) { 662 d_u.d_child) {
649 dentry_lru_del_init(loop); 663 dentry_lru_del(loop);
650 __d_drop(loop); 664 __d_drop(loop);
651 cond_resched_lock(&dcache_lock); 665 cond_resched_lock(&dcache_lock);
652 } 666 }
@@ -703,20 +717,13 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
703 * otherwise we ascend to the parent and move to the 717 * otherwise we ascend to the parent and move to the
704 * next sibling if there is one */ 718 * next sibling if there is one */
705 if (!parent) 719 if (!parent)
706 goto out; 720 return;
707
708 dentry = parent; 721 dentry = parent;
709
710 } while (list_empty(&dentry->d_subdirs)); 722 } while (list_empty(&dentry->d_subdirs));
711 723
712 dentry = list_entry(dentry->d_subdirs.next, 724 dentry = list_entry(dentry->d_subdirs.next,
713 struct dentry, d_u.d_child); 725 struct dentry, d_u.d_child);
714 } 726 }
715out:
716 /* several dentries were freed, need to correct nr_dentry */
717 spin_lock(&dcache_lock);
718 dentry_stat.nr_dentry -= detached;
719 spin_unlock(&dcache_lock);
720} 727}
721 728
722/* 729/*
@@ -830,14 +837,15 @@ resume:
830 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 837 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
831 next = tmp->next; 838 next = tmp->next;
832 839
833 dentry_lru_del_init(dentry);
834 /* 840 /*
835 * move only zero ref count dentries to the end 841 * move only zero ref count dentries to the end
836 * of the unused list for prune_dcache 842 * of the unused list for prune_dcache
837 */ 843 */
838 if (!atomic_read(&dentry->d_count)) { 844 if (!atomic_read(&dentry->d_count)) {
839 dentry_lru_add_tail(dentry); 845 dentry_lru_move_tail(dentry);
840 found++; 846 found++;
847 } else {
848 dentry_lru_del(dentry);
841 } 849 }
842 850
843 /* 851 /*
@@ -900,12 +908,16 @@ EXPORT_SYMBOL(shrink_dcache_parent);
900 */ 908 */
901static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 909static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
902{ 910{
911 int nr_unused;
912
903 if (nr) { 913 if (nr) {
904 if (!(gfp_mask & __GFP_FS)) 914 if (!(gfp_mask & __GFP_FS))
905 return -1; 915 return -1;
906 prune_dcache(nr); 916 prune_dcache(nr);
907 } 917 }
908 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 918
919 nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
920 return (nr_unused / 100) * sysctl_vfs_cache_pressure;
909} 921}
910 922
911static struct shrinker dcache_shrinker = { 923static struct shrinker dcache_shrinker = {
@@ -972,9 +984,10 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
972 spin_lock(&dcache_lock); 984 spin_lock(&dcache_lock);
973 if (parent) 985 if (parent)
974 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 986 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
975 dentry_stat.nr_dentry++;
976 spin_unlock(&dcache_lock); 987 spin_unlock(&dcache_lock);
977 988
989 percpu_counter_inc(&nr_dentry);
990
978 return dentry; 991 return dentry;
979} 992}
980EXPORT_SYMBOL(d_alloc); 993EXPORT_SYMBOL(d_alloc);
@@ -1478,33 +1491,26 @@ out:
1478 * This is used by ncpfs in its readdir implementation. 1491 * This is used by ncpfs in its readdir implementation.
1479 * Zero is returned in the dentry is invalid. 1492 * Zero is returned in the dentry is invalid.
1480 */ 1493 */
1481 1494int d_validate(struct dentry *dentry, struct dentry *parent)
1482int d_validate(struct dentry *dentry, struct dentry *dparent)
1483{ 1495{
1484 struct hlist_head *base; 1496 struct hlist_head *head = d_hash(parent, dentry->d_name.hash);
1485 struct hlist_node *lhp; 1497 struct hlist_node *node;
1498 struct dentry *d;
1486 1499
1487 /* Check whether the ptr might be valid at all.. */ 1500 /* Check whether the ptr might be valid at all.. */
1488 if (!kmem_ptr_validate(dentry_cache, dentry)) 1501 if (!kmem_ptr_validate(dentry_cache, dentry))
1489 goto out; 1502 return 0;
1490 1503 if (dentry->d_parent != parent)
1491 if (dentry->d_parent != dparent) 1504 return 0;
1492 goto out;
1493 1505
1494 spin_lock(&dcache_lock); 1506 rcu_read_lock();
1495 base = d_hash(dparent, dentry->d_name.hash); 1507 hlist_for_each_entry_rcu(d, node, head, d_hash) {
1496 hlist_for_each(lhp,base) { 1508 if (d == dentry) {
1497 /* hlist_for_each_entry_rcu() not required for d_hash list 1509 dget(dentry);
1498 * as it is parsed under dcache_lock
1499 */
1500 if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
1501 __dget_locked(dentry);
1502 spin_unlock(&dcache_lock);
1503 return 1; 1510 return 1;
1504 } 1511 }
1505 } 1512 }
1506 spin_unlock(&dcache_lock); 1513 rcu_read_unlock();
1507out:
1508 return 0; 1514 return 0;
1509} 1515}
1510EXPORT_SYMBOL(d_validate); 1516EXPORT_SYMBOL(d_validate);
@@ -1994,7 +2000,7 @@ global_root:
1994 * Returns a pointer into the buffer or an error code if the 2000 * Returns a pointer into the buffer or an error code if the
1995 * path was too long. 2001 * path was too long.
1996 * 2002 *
1997 * "buflen" should be positive. Caller holds the dcache_lock. 2003 * "buflen" should be positive.
1998 * 2004 *
1999 * If path is not reachable from the supplied root, then the value of 2005 * If path is not reachable from the supplied root, then the value of
2000 * root is changed (without modifying refcounts). 2006 * root is changed (without modifying refcounts).
@@ -2006,10 +2012,12 @@ char *__d_path(const struct path *path, struct path *root,
2006 int error; 2012 int error;
2007 2013
2008 prepend(&res, &buflen, "\0", 1); 2014 prepend(&res, &buflen, "\0", 1);
2015 spin_lock(&dcache_lock);
2009 error = prepend_path(path, root, &res, &buflen); 2016 error = prepend_path(path, root, &res, &buflen);
2017 spin_unlock(&dcache_lock);
2018
2010 if (error) 2019 if (error)
2011 return ERR_PTR(error); 2020 return ERR_PTR(error);
2012
2013 return res; 2021 return res;
2014} 2022}
2015 2023
@@ -2419,6 +2427,9 @@ static void __init dcache_init(void)
2419{ 2427{
2420 int loop; 2428 int loop;
2421 2429
2430 percpu_counter_init(&nr_dentry, 0);
2431 percpu_counter_init(&nr_dentry_unused, 0);
2432
2422 /* 2433 /*
2423 * A constructor could be added for stable state like the lists, 2434 * A constructor could be added for stable state like the lists,
2424 * but it is probably not worth it because of the cache nature 2435 * but it is probably not worth it because of the cache nature
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 30a87b3dbcac..a4ed8380e98a 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -40,6 +40,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
40 struct inode *inode = new_inode(sb); 40 struct inode *inode = new_inode(sb);
41 41
42 if (inode) { 42 if (inode) {
43 inode->i_ino = get_next_ino();
43 inode->i_mode = mode; 44 inode->i_mode = mode;
44 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 45 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
45 switch (mode & S_IFMT) { 46 switch (mode & S_IFMT) {
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 68cb23e3bb98..b905c79b4f0a 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -46,10 +46,6 @@ static int exofs_file_fsync(struct file *filp, int datasync)
46{ 46{
47 int ret; 47 int ret;
48 struct inode *inode = filp->f_mapping->host; 48 struct inode *inode = filp->f_mapping->host;
49 struct writeback_control wbc = {
50 .sync_mode = WB_SYNC_ALL,
51 .nr_to_write = 0, /* metadata-only; caller takes care of data */
52 };
53 struct super_block *sb; 49 struct super_block *sb;
54 50
55 if (!(inode->i_state & I_DIRTY)) 51 if (!(inode->i_state & I_DIRTY))
@@ -57,7 +53,7 @@ static int exofs_file_fsync(struct file *filp, int datasync)
57 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 53 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
58 return 0; 54 return 0;
59 55
60 ret = sync_inode(inode, &wbc); 56 ret = sync_inode_metadata(inode, 1);
61 57
62 /* This is a good place to write the sb */ 58 /* This is a good place to write the sb */
63 /* TODO: Sechedule an sb-sync on create */ 59 /* TODO: Sechedule an sb-sync on create */
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index b7dd0c236863..264e95d02830 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
153 153
154 inode->i_ctime = CURRENT_TIME; 154 inode->i_ctime = CURRENT_TIME;
155 inode_inc_link_count(inode); 155 inode_inc_link_count(inode);
156 atomic_inc(&inode->i_count); 156 ihold(inode);
157 157
158 return exofs_add_nondir(dentry, inode); 158 return exofs_add_nondir(dentry, inode);
159} 159}
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index e9e175949a63..51b304056f10 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -74,21 +74,20 @@ static struct dentry *
74find_disconnected_root(struct dentry *dentry) 74find_disconnected_root(struct dentry *dentry)
75{ 75{
76 dget(dentry); 76 dget(dentry);
77 spin_lock(&dentry->d_lock); 77 while (!IS_ROOT(dentry)) {
78 while (!IS_ROOT(dentry) && 78 struct dentry *parent = dget_parent(dentry);
79 (dentry->d_parent->d_flags & DCACHE_DISCONNECTED)) { 79
80 struct dentry *parent = dentry->d_parent; 80 if (!(parent->d_flags & DCACHE_DISCONNECTED)) {
81 dget(parent); 81 dput(parent);
82 spin_unlock(&dentry->d_lock); 82 break;
83 }
84
83 dput(dentry); 85 dput(dentry);
84 dentry = parent; 86 dentry = parent;
85 spin_lock(&dentry->d_lock);
86 } 87 }
87 spin_unlock(&dentry->d_lock);
88 return dentry; 88 return dentry;
89} 89}
90 90
91
92/* 91/*
93 * Make sure target_dir is fully connected to the dentry tree. 92 * Make sure target_dir is fully connected to the dentry tree.
94 * 93 *
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 764109886ec0..2709b34206ab 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -98,7 +98,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
98 if (IS_DIRSYNC(dir)) { 98 if (IS_DIRSYNC(dir)) {
99 err = write_one_page(page, 1); 99 err = write_one_page(page, 1);
100 if (!err) 100 if (!err)
101 err = ext2_sync_inode(dir); 101 err = sync_inode_metadata(dir, 1);
102 } else { 102 } else {
103 unlock_page(page); 103 unlock_page(page);
104 } 104 }
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 416daa62242c..6346a2acf326 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -120,7 +120,6 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
120extern struct inode *ext2_iget (struct super_block *, unsigned long); 120extern struct inode *ext2_iget (struct super_block *, unsigned long);
121extern int ext2_write_inode (struct inode *, struct writeback_control *); 121extern int ext2_write_inode (struct inode *, struct writeback_control *);
122extern void ext2_evict_inode(struct inode *); 122extern void ext2_evict_inode(struct inode *);
123extern int ext2_sync_inode (struct inode *);
124extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); 123extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
125extern int ext2_setattr (struct dentry *, struct iattr *); 124extern int ext2_setattr (struct dentry *, struct iattr *);
126extern void ext2_set_inode_flags(struct inode *inode); 125extern void ext2_set_inode_flags(struct inode *inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 533699c16040..40ad210a5049 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1203,7 +1203,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
1203 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 1203 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
1204 if (inode_needs_sync(inode)) { 1204 if (inode_needs_sync(inode)) {
1205 sync_mapping_buffers(inode->i_mapping); 1205 sync_mapping_buffers(inode->i_mapping);
1206 ext2_sync_inode (inode); 1206 sync_inode_metadata(inode, 1);
1207 } else { 1207 } else {
1208 mark_inode_dirty(inode); 1208 mark_inode_dirty(inode);
1209 } 1209 }
@@ -1523,15 +1523,6 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
1523 return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); 1523 return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
1524} 1524}
1525 1525
1526int ext2_sync_inode(struct inode *inode)
1527{
1528 struct writeback_control wbc = {
1529 .sync_mode = WB_SYNC_ALL,
1530 .nr_to_write = 0, /* sys_fsync did this */
1531 };
1532 return sync_inode(inode, &wbc);
1533}
1534
1535int ext2_setattr(struct dentry *dentry, struct iattr *iattr) 1526int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
1536{ 1527{
1537 struct inode *inode = dentry->d_inode; 1528 struct inode *inode = dentry->d_inode;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 71efb0e9a3f2..f8aecd2e3297 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -206,7 +206,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
206 206
207 inode->i_ctime = CURRENT_TIME_SEC; 207 inode->i_ctime = CURRENT_TIME_SEC;
208 inode_inc_link_count(inode); 208 inode_inc_link_count(inode);
209 atomic_inc(&inode->i_count); 209 ihold(inode);
210 210
211 err = ext2_add_link(dentry, inode); 211 err = ext2_add_link(dentry, inode);
212 if (!err) { 212 if (!err) {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 85df87d0f7b7..0901320671da 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1221,9 +1221,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
1221 } 1221 }
1222 1222
1223 es = sbi->s_es; 1223 es = sbi->s_es;
1224 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != 1224 if ((sbi->s_mount_opt ^ old_mount_opt) & EXT2_MOUNT_XIP) {
1225 (old_mount_opt & EXT2_MOUNT_XIP)) &&
1226 invalidate_inodes(sb)) {
1227 ext2_msg(sb, KERN_WARNING, "warning: refusing change of " 1225 ext2_msg(sb, KERN_WARNING, "warning: refusing change of "
1228 "xip flag with busy inodes while remounting"); 1226 "xip flag with busy inodes while remounting");
1229 sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; 1227 sbi->s_mount_opt &= ~EXT2_MOUNT_XIP;
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 8c29ae15129e..f84700be3274 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -699,7 +699,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
699 EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; 699 EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
700 inode->i_ctime = CURRENT_TIME_SEC; 700 inode->i_ctime = CURRENT_TIME_SEC;
701 if (IS_SYNC(inode)) { 701 if (IS_SYNC(inode)) {
702 error = ext2_sync_inode (inode); 702 error = sync_inode_metadata(inode, 1);
703 /* In case sync failed due to ENOSPC the inode was actually 703 /* In case sync failed due to ENOSPC the inode was actually
704 * written (only some dirty data were not) so we just proceed 704 * written (only some dirty data were not) so we just proceed
705 * as if nothing happened and cleanup the unused block */ 705 * as if nothing happened and cleanup the unused block */
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 5e0faf4cda79..ad05353040a1 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1696,8 +1696,8 @@ static int ext3_journalled_writepage(struct page *page,
1696 * doesn't seem much point in redirtying the page here. 1696 * doesn't seem much point in redirtying the page here.
1697 */ 1697 */
1698 ClearPageChecked(page); 1698 ClearPageChecked(page);
1699 ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, 1699 ret = __block_write_begin(page, 0, PAGE_CACHE_SIZE,
1700 ext3_get_block); 1700 ext3_get_block);
1701 if (ret != 0) { 1701 if (ret != 0) {
1702 ext3_journal_stop(handle); 1702 ext3_journal_stop(handle);
1703 goto out_unlock; 1703 goto out_unlock;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 2b35ddb70d65..bce9dce639b8 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2260,7 +2260,7 @@ retry:
2260 2260
2261 inode->i_ctime = CURRENT_TIME_SEC; 2261 inode->i_ctime = CURRENT_TIME_SEC;
2262 inc_nlink(inode); 2262 inc_nlink(inode);
2263 atomic_inc(&inode->i_count); 2263 ihold(inode);
2264 2264
2265 err = ext3_add_entry(handle, dentry, inode); 2265 err = ext3_add_entry(handle, dentry, inode);
2266 if (!err) { 2266 if (!err) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4b8debeb3965..49635ef236f8 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1538,10 +1538,10 @@ static int do_journal_get_write_access(handle_t *handle,
1538 if (!buffer_mapped(bh) || buffer_freed(bh)) 1538 if (!buffer_mapped(bh) || buffer_freed(bh))
1539 return 0; 1539 return 0;
1540 /* 1540 /*
1541 * __block_prepare_write() could have dirtied some buffers. Clean 1541 * __block_write_begin() could have dirtied some buffers. Clean
1542 * the dirty bit as jbd2_journal_get_write_access() could complain 1542 * the dirty bit as jbd2_journal_get_write_access() could complain
1543 * otherwise about fs integrity issues. Setting of the dirty bit 1543 * otherwise about fs integrity issues. Setting of the dirty bit
1544 * by __block_prepare_write() isn't a real problem here as we clear 1544 * by __block_write_begin() isn't a real problem here as we clear
1545 * the bit before releasing a page lock and thus writeback cannot 1545 * the bit before releasing a page lock and thus writeback cannot
1546 * ever write the buffer. 1546 * ever write the buffer.
1547 */ 1547 */
@@ -2550,8 +2550,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2550 if (buffer_delay(bh)) 2550 if (buffer_delay(bh))
2551 return 0; /* Not sure this could or should happen */ 2551 return 0; /* Not sure this could or should happen */
2552 /* 2552 /*
2553 * XXX: __block_prepare_write() unmaps passed block, 2553 * XXX: __block_write_begin() unmaps passed block, is it OK?
2554 * is it OK?
2555 */ 2554 */
2556 ret = ext4_da_reserve_space(inode, iblock); 2555 ret = ext4_da_reserve_space(inode, iblock);
2557 if (ret) 2556 if (ret)
@@ -2583,7 +2582,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2583/* 2582/*
2584 * This function is used as a standard get_block_t calback function 2583 * This function is used as a standard get_block_t calback function
2585 * when there is no desire to allocate any blocks. It is used as a 2584 * when there is no desire to allocate any blocks. It is used as a
2586 * callback function for block_prepare_write() and block_write_full_page(). 2585 * callback function for block_write_begin() and block_write_full_page().
2587 * These functions should only try to map a single block at a time. 2586 * These functions should only try to map a single block at a time.
2588 * 2587 *
2589 * Since this function doesn't do block allocations even if the caller 2588 * Since this function doesn't do block allocations even if the caller
@@ -2743,7 +2742,7 @@ static int ext4_writepage(struct page *page,
2743 * all are mapped and non delay. We don't want to 2742 * all are mapped and non delay. We don't want to
2744 * do block allocation here. 2743 * do block allocation here.
2745 */ 2744 */
2746 ret = block_prepare_write(page, 0, len, 2745 ret = __block_write_begin(page, 0, len,
2747 noalloc_get_block_write); 2746 noalloc_get_block_write);
2748 if (!ret) { 2747 if (!ret) {
2749 page_bufs = page_buffers(page); 2748 page_bufs = page_buffers(page);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 19aa0d44d822..42f77b1dc72d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2373,6 +2373,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
2373 printk(KERN_ERR "EXT4-fs: can't get new inode\n"); 2373 printk(KERN_ERR "EXT4-fs: can't get new inode\n");
2374 goto err_freesgi; 2374 goto err_freesgi;
2375 } 2375 }
2376 sbi->s_buddy_cache->i_ino = get_next_ino();
2376 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; 2377 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2377 for (i = 0; i < ngroups; i++) { 2378 for (i = 0; i < ngroups; i++) {
2378 desc = ext4_get_group_desc(sb, i, NULL); 2379 desc = ext4_get_group_desc(sb, i, NULL);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 314c0d3b3fa9..bd39885b5998 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2312,7 +2312,7 @@ retry:
2312 2312
2313 inode->i_ctime = ext4_current_time(inode); 2313 inode->i_ctime = ext4_current_time(inode);
2314 ext4_inc_count(handle, inode); 2314 ext4_inc_count(handle, inode);
2315 atomic_inc(&inode->i_count); 2315 ihold(inode);
2316 2316
2317 err = ext4_add_entry(handle, dentry, inode); 2317 err = ext4_add_entry(handle, dentry, inode);
2318 if (!err) { 2318 if (!err) {
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 79d1b4ea13e7..8c04eac5079d 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -260,6 +260,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
260 struct inode *ip = NULL; 260 struct inode *ip = NULL;
261 261
262 if ((ip = new_inode(sbp))) { 262 if ((ip = new_inode(sbp))) {
263 ip->i_ino = get_next_ino();
263 vxfs_iinit(ip, vip); 264 vxfs_iinit(ip, vip);
264 ip->i_mapping->a_ops = &vxfs_aops; 265 ip->i_mapping->a_ops = &vxfs_aops;
265 } 266 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 9e46aec10d1a..aed881a76b22 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -79,6 +79,11 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
79 return sb->s_bdi; 79 return sb->s_bdi;
80} 80}
81 81
82static inline struct inode *wb_inode(struct list_head *head)
83{
84 return list_entry(head, struct inode, i_wb_list);
85}
86
82static void bdi_queue_work(struct backing_dev_info *bdi, 87static void bdi_queue_work(struct backing_dev_info *bdi,
83 struct wb_writeback_work *work) 88 struct wb_writeback_work *work)
84{ 89{
@@ -172,11 +177,11 @@ static void redirty_tail(struct inode *inode)
172 if (!list_empty(&wb->b_dirty)) { 177 if (!list_empty(&wb->b_dirty)) {
173 struct inode *tail; 178 struct inode *tail;
174 179
175 tail = list_entry(wb->b_dirty.next, struct inode, i_list); 180 tail = wb_inode(wb->b_dirty.next);
176 if (time_before(inode->dirtied_when, tail->dirtied_when)) 181 if (time_before(inode->dirtied_when, tail->dirtied_when))
177 inode->dirtied_when = jiffies; 182 inode->dirtied_when = jiffies;
178 } 183 }
179 list_move(&inode->i_list, &wb->b_dirty); 184 list_move(&inode->i_wb_list, &wb->b_dirty);
180} 185}
181 186
182/* 187/*
@@ -186,7 +191,7 @@ static void requeue_io(struct inode *inode)
186{ 191{
187 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 192 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
188 193
189 list_move(&inode->i_list, &wb->b_more_io); 194 list_move(&inode->i_wb_list, &wb->b_more_io);
190} 195}
191 196
192static void inode_sync_complete(struct inode *inode) 197static void inode_sync_complete(struct inode *inode)
@@ -227,14 +232,14 @@ static void move_expired_inodes(struct list_head *delaying_queue,
227 int do_sb_sort = 0; 232 int do_sb_sort = 0;
228 233
229 while (!list_empty(delaying_queue)) { 234 while (!list_empty(delaying_queue)) {
230 inode = list_entry(delaying_queue->prev, struct inode, i_list); 235 inode = wb_inode(delaying_queue->prev);
231 if (older_than_this && 236 if (older_than_this &&
232 inode_dirtied_after(inode, *older_than_this)) 237 inode_dirtied_after(inode, *older_than_this))
233 break; 238 break;
234 if (sb && sb != inode->i_sb) 239 if (sb && sb != inode->i_sb)
235 do_sb_sort = 1; 240 do_sb_sort = 1;
236 sb = inode->i_sb; 241 sb = inode->i_sb;
237 list_move(&inode->i_list, &tmp); 242 list_move(&inode->i_wb_list, &tmp);
238 } 243 }
239 244
240 /* just one sb in list, splice to dispatch_queue and we're done */ 245 /* just one sb in list, splice to dispatch_queue and we're done */
@@ -245,12 +250,11 @@ static void move_expired_inodes(struct list_head *delaying_queue,
245 250
246 /* Move inodes from one superblock together */ 251 /* Move inodes from one superblock together */
247 while (!list_empty(&tmp)) { 252 while (!list_empty(&tmp)) {
248 inode = list_entry(tmp.prev, struct inode, i_list); 253 sb = wb_inode(tmp.prev)->i_sb;
249 sb = inode->i_sb;
250 list_for_each_prev_safe(pos, node, &tmp) { 254 list_for_each_prev_safe(pos, node, &tmp) {
251 inode = list_entry(pos, struct inode, i_list); 255 inode = wb_inode(pos);
252 if (inode->i_sb == sb) 256 if (inode->i_sb == sb)
253 list_move(&inode->i_list, dispatch_queue); 257 list_move(&inode->i_wb_list, dispatch_queue);
254 } 258 }
255 } 259 }
256} 260}
@@ -408,16 +412,13 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
408 * completion. 412 * completion.
409 */ 413 */
410 redirty_tail(inode); 414 redirty_tail(inode);
411 } else if (atomic_read(&inode->i_count)) {
412 /*
413 * The inode is clean, inuse
414 */
415 list_move(&inode->i_list, &inode_in_use);
416 } else { 415 } else {
417 /* 416 /*
418 * The inode is clean, unused 417 * The inode is clean. At this point we either have
418 * a reference to the inode or it's on it's way out.
419 * No need to add it back to the LRU.
419 */ 420 */
420 list_move(&inode->i_list, &inode_unused); 421 list_del_init(&inode->i_wb_list);
421 } 422 }
422 } 423 }
423 inode_sync_complete(inode); 424 inode_sync_complete(inode);
@@ -465,8 +466,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
465{ 466{
466 while (!list_empty(&wb->b_io)) { 467 while (!list_empty(&wb->b_io)) {
467 long pages_skipped; 468 long pages_skipped;
468 struct inode *inode = list_entry(wb->b_io.prev, 469 struct inode *inode = wb_inode(wb->b_io.prev);
469 struct inode, i_list);
470 470
471 if (inode->i_sb != sb) { 471 if (inode->i_sb != sb) {
472 if (only_this_sb) { 472 if (only_this_sb) {
@@ -487,10 +487,16 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
487 return 0; 487 return 0;
488 } 488 }
489 489
490 if (inode->i_state & (I_NEW | I_WILL_FREE)) { 490 /*
491 * Don't bother with new inodes or inodes beeing freed, first
492 * kind does not need peridic writeout yet, and for the latter
493 * kind writeout is handled by the freer.
494 */
495 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
491 requeue_io(inode); 496 requeue_io(inode);
492 continue; 497 continue;
493 } 498 }
499
494 /* 500 /*
495 * Was this inode dirtied after sync_sb_inodes was called? 501 * Was this inode dirtied after sync_sb_inodes was called?
496 * This keeps sync from extra jobs and livelock. 502 * This keeps sync from extra jobs and livelock.
@@ -498,7 +504,6 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
498 if (inode_dirtied_after(inode, wbc->wb_start)) 504 if (inode_dirtied_after(inode, wbc->wb_start))
499 return 1; 505 return 1;
500 506
501 BUG_ON(inode->i_state & I_FREEING);
502 __iget(inode); 507 __iget(inode);
503 pages_skipped = wbc->pages_skipped; 508 pages_skipped = wbc->pages_skipped;
504 writeback_single_inode(inode, wbc); 509 writeback_single_inode(inode, wbc);
@@ -536,8 +541,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
536 queue_io(wb, wbc->older_than_this); 541 queue_io(wb, wbc->older_than_this);
537 542
538 while (!list_empty(&wb->b_io)) { 543 while (!list_empty(&wb->b_io)) {
539 struct inode *inode = list_entry(wb->b_io.prev, 544 struct inode *inode = wb_inode(wb->b_io.prev);
540 struct inode, i_list);
541 struct super_block *sb = inode->i_sb; 545 struct super_block *sb = inode->i_sb;
542 546
543 if (!pin_sb_for_writeback(sb)) { 547 if (!pin_sb_for_writeback(sb)) {
@@ -675,8 +679,7 @@ static long wb_writeback(struct bdi_writeback *wb,
675 */ 679 */
676 spin_lock(&inode_lock); 680 spin_lock(&inode_lock);
677 if (!list_empty(&wb->b_more_io)) { 681 if (!list_empty(&wb->b_more_io)) {
678 inode = list_entry(wb->b_more_io.prev, 682 inode = wb_inode(wb->b_more_io.prev);
679 struct inode, i_list);
680 trace_wbc_writeback_wait(&wbc, wb->bdi); 683 trace_wbc_writeback_wait(&wbc, wb->bdi);
681 inode_wait_for_writeback(inode); 684 inode_wait_for_writeback(inode);
682 } 685 }
@@ -727,7 +730,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
727 */ 730 */
728 nr_pages = global_page_state(NR_FILE_DIRTY) + 731 nr_pages = global_page_state(NR_FILE_DIRTY) +
729 global_page_state(NR_UNSTABLE_NFS) + 732 global_page_state(NR_UNSTABLE_NFS) +
730 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 733 get_nr_dirty_inodes();
731 734
732 if (nr_pages) { 735 if (nr_pages) {
733 struct wb_writeback_work work = { 736 struct wb_writeback_work work = {
@@ -966,7 +969,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
966 * dirty list. Add blockdev inodes as well. 969 * dirty list. Add blockdev inodes as well.
967 */ 970 */
968 if (!S_ISBLK(inode->i_mode)) { 971 if (!S_ISBLK(inode->i_mode)) {
969 if (hlist_unhashed(&inode->i_hash)) 972 if (inode_unhashed(inode))
970 goto out; 973 goto out;
971 } 974 }
972 if (inode->i_state & I_FREEING) 975 if (inode->i_state & I_FREEING)
@@ -994,7 +997,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
994 } 997 }
995 998
996 inode->dirtied_when = jiffies; 999 inode->dirtied_when = jiffies;
997 list_move(&inode->i_list, &bdi->wb.b_dirty); 1000 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
998 } 1001 }
999 } 1002 }
1000out: 1003out:
@@ -1094,8 +1097,7 @@ void writeback_inodes_sb(struct super_block *sb)
1094 1097
1095 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1098 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1096 1099
1097 work.nr_pages = nr_dirty + nr_unstable + 1100 work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();
1098 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1099 1101
1100 bdi_queue_work(sb->s_bdi, &work); 1102 bdi_queue_work(sb->s_bdi, &work);
1101 wait_for_completion(&done); 1103 wait_for_completion(&done);
@@ -1202,3 +1204,23 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
1202 return ret; 1204 return ret;
1203} 1205}
1204EXPORT_SYMBOL(sync_inode); 1206EXPORT_SYMBOL(sync_inode);
1207
1208/**
1209 * sync_inode - write an inode to disk
1210 * @inode: the inode to sync
1211 * @wait: wait for I/O to complete.
1212 *
1213 * Write an inode to disk and adjust it's dirty state after completion.
1214 *
1215 * Note: only writes the actual inode, no associated data or other metadata.
1216 */
1217int sync_inode_metadata(struct inode *inode, int wait)
1218{
1219 struct writeback_control wbc = {
1220 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
1221 .nr_to_write = 0, /* metadata-only */
1222 };
1223
1224 return sync_inode(inode, &wbc);
1225}
1226EXPORT_SYMBOL(sync_inode_metadata);
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 7367e177186f..4eba07661e5c 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -222,6 +222,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
222 if (!inode) 222 if (!inode)
223 return NULL; 223 return NULL;
224 224
225 inode->i_ino = get_next_ino();
225 inode->i_mode = mode; 226 inode->i_mode = mode;
226 inode->i_uid = fc->user_id; 227 inode->i_uid = fc->user_id;
227 inode->i_gid = fc->group_id; 228 inode->i_gid = fc->group_id;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 6b24afb96aae..4f36f8832b9b 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -618,7 +618,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
618 struct gfs2_alloc *al = NULL; 618 struct gfs2_alloc *al = NULL;
619 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 619 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
620 unsigned from = pos & (PAGE_CACHE_SIZE - 1); 620 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
621 unsigned to = from + len;
622 struct page *page; 621 struct page *page;
623 622
624 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); 623 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
@@ -691,7 +690,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
691 } 690 }
692 691
693prepare_write: 692prepare_write:
694 error = block_prepare_write(page, from, to, gfs2_block_map); 693 error = __block_write_begin(page, from, len, gfs2_block_map);
695out: 694out:
696 if (error == 0) 695 if (error == 0)
697 return 0; 696 return 0;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index aeafc233dc89..cade1acbcea9 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1219,7 +1219,6 @@ fail_sb:
1219fail_locking: 1219fail_locking:
1220 init_locking(sdp, &mount_gh, UNDO); 1220 init_locking(sdp, &mount_gh, UNDO);
1221fail_lm: 1221fail_lm:
1222 invalidate_inodes(sb);
1223 gfs2_gl_hash_clear(sdp); 1222 gfs2_gl_hash_clear(sdp);
1224 gfs2_lm_unmount(sdp); 1223 gfs2_lm_unmount(sdp);
1225fail_sys: 1224fail_sys:
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 0534510200d5..12cbea7502c2 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -255,7 +255,7 @@ out_parent:
255 gfs2_holder_uninit(ghs); 255 gfs2_holder_uninit(ghs);
256 gfs2_holder_uninit(ghs + 1); 256 gfs2_holder_uninit(ghs + 1);
257 if (!error) { 257 if (!error) {
258 atomic_inc(&inode->i_count); 258 ihold(inode);
259 d_instantiate(dentry, inode); 259 d_instantiate(dentry, inode);
260 mark_inode_dirty(inode); 260 mark_inode_dirty(inode);
261 } 261 }
@@ -1294,7 +1294,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
1294 int error; 1294 int error;
1295 1295
1296 if (!page_has_buffers(page)) { 1296 if (!page_has_buffers(page)) {
1297 error = block_prepare_write(page, from, to, gfs2_block_map); 1297 error = __block_write_begin(page, from, to - from, gfs2_block_map);
1298 if (unlikely(error)) 1298 if (unlikely(error))
1299 return error; 1299 return error;
1300 1300
@@ -1313,7 +1313,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
1313 next += bh->b_size; 1313 next += bh->b_size;
1314 if (buffer_mapped(bh)) { 1314 if (buffer_mapped(bh)) {
1315 if (end) { 1315 if (end) {
1316 error = block_prepare_write(page, start, end, 1316 error = __block_write_begin(page, start, end - start,
1317 gfs2_block_map); 1317 gfs2_block_map);
1318 if (unlikely(error)) 1318 if (unlikely(error))
1319 return error; 1319 return error;
@@ -1328,7 +1328,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
1328 } while (next < to); 1328 } while (next < to);
1329 1329
1330 if (end) { 1330 if (end) {
1331 error = block_prepare_write(page, start, end, gfs2_block_map); 1331 error = __block_write_begin(page, start, end - start, gfs2_block_map);
1332 if (unlikely(error)) 1332 if (unlikely(error))
1333 return error; 1333 return error;
1334 empty_write_end(page, start, end); 1334 empty_write_end(page, start, end);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 047d1176096c..2b2c4997430b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -857,7 +857,6 @@ restart:
857 gfs2_clear_rgrpd(sdp); 857 gfs2_clear_rgrpd(sdp);
858 gfs2_jindex_free(sdp); 858 gfs2_jindex_free(sdp);
859 /* Take apart glock structures and buffer lists */ 859 /* Take apart glock structures and buffer lists */
860 invalidate_inodes(sdp->sd_vfs);
861 gfs2_gl_hash_clear(sdp); 860 gfs2_gl_hash_clear(sdp);
862 /* Unmount the locking protocol */ 861 /* Unmount the locking protocol */
863 gfs2_lm_unmount(sdp); 862 gfs2_lm_unmount(sdp);
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 4f55651aaa51..c8cffb81e849 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -147,8 +147,6 @@ struct hfs_sb_info {
147 u16 blockoffset; 147 u16 blockoffset;
148 148
149 int fs_div; 149 int fs_div;
150
151 struct hlist_head rsrc_inodes;
152}; 150};
153 151
154#define HFS_FLG_BITMAP_DIRTY 0 152#define HFS_FLG_BITMAP_DIRTY 0
@@ -254,17 +252,6 @@ static inline void hfs_bitmap_dirty(struct super_block *sb)
254 sb->s_dirt = 1; 252 sb->s_dirt = 1;
255} 253}
256 254
257static inline void hfs_buffer_sync(struct buffer_head *bh)
258{
259 while (buffer_locked(bh)) {
260 wait_on_buffer(bh);
261 }
262 if (buffer_dirty(bh)) {
263 ll_rw_block(WRITE, 1, &bh);
264 wait_on_buffer(bh);
265 }
266}
267
268#define sb_bread512(sb, sec, data) ({ \ 255#define sb_bread512(sb, sec, data) ({ \
269 struct buffer_head *__bh; \ 256 struct buffer_head *__bh; \
270 sector_t __block; \ 257 sector_t __block; \
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 397b7adc7ce6..dffb4e996643 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -524,7 +524,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
524 HFS_I(inode)->rsrc_inode = dir; 524 HFS_I(inode)->rsrc_inode = dir;
525 HFS_I(dir)->rsrc_inode = inode; 525 HFS_I(dir)->rsrc_inode = inode;
526 igrab(dir); 526 igrab(dir);
527 hlist_add_head(&inode->i_hash, &HFS_SB(dir->i_sb)->rsrc_inodes); 527 hlist_add_fake(&inode->i_hash);
528 mark_inode_dirty(inode); 528 mark_inode_dirty(inode);
529out: 529out:
530 d_add(dentry, inode); 530 d_add(dentry, inode);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 86428f5ac991..1563d5ce5764 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -220,7 +220,7 @@ int hfs_mdb_get(struct super_block *sb)
220 mdb->drLsMod = hfs_mtime(); 220 mdb->drLsMod = hfs_mtime();
221 221
222 mark_buffer_dirty(HFS_SB(sb)->mdb_bh); 222 mark_buffer_dirty(HFS_SB(sb)->mdb_bh);
223 hfs_buffer_sync(HFS_SB(sb)->mdb_bh); 223 sync_dirty_buffer(HFS_SB(sb)->mdb_bh);
224 } 224 }
225 225
226 return 0; 226 return 0;
@@ -287,7 +287,7 @@ void hfs_mdb_commit(struct super_block *sb)
287 HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT); 287 HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT);
288 HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT); 288 HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT);
289 mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh); 289 mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh);
290 hfs_buffer_sync(HFS_SB(sb)->alt_mdb_bh); 290 sync_dirty_buffer(HFS_SB(sb)->alt_mdb_bh);
291 } 291 }
292 292
293 if (test_and_clear_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags)) { 293 if (test_and_clear_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags)) {
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 33254160f650..6ee1586f2334 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -382,7 +382,6 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
382 return -ENOMEM; 382 return -ENOMEM;
383 383
384 sb->s_fs_info = sbi; 384 sb->s_fs_info = sbi;
385 INIT_HLIST_HEAD(&sbi->rsrc_inodes);
386 385
387 res = -EINVAL; 386 res = -EINVAL;
388 if (!parse_options((char *)data, sbi)) { 387 if (!parse_options((char *)data, sbi)) {
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index d236d85ec9d7..e318bbc0daf6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -286,7 +286,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
286 286
287 inc_nlink(inode); 287 inc_nlink(inode);
288 hfsplus_instantiate(dst_dentry, inode, cnid); 288 hfsplus_instantiate(dst_dentry, inode, cnid);
289 atomic_inc(&inode->i_count); 289 ihold(inode);
290 inode->i_ctime = CURRENT_TIME_SEC; 290 inode->i_ctime = CURRENT_TIME_SEC;
291 mark_inode_dirty(inode); 291 mark_inode_dirty(inode);
292 sbi->file_count++; 292 sbi->file_count++;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 78449280dae0..8afd7e84f98d 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -211,7 +211,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
211 * appear hashed, but do not put on any lists. hlist_del() 211 * appear hashed, but do not put on any lists. hlist_del()
212 * will work fine and require no locking. 212 * will work fine and require no locking.
213 */ 213 */
214 inode->i_hash.pprev = &inode->i_hash.next; 214 hlist_add_fake(&inode->i_hash);
215 215
216 mark_inode_dirty(inode); 216 mark_inode_dirty(inode);
217out: 217out:
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a14328d270e8..b14be3f781c7 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -456,6 +456,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
456 inode = new_inode(sb); 456 inode = new_inode(sb);
457 if (inode) { 457 if (inode) {
458 struct hugetlbfs_inode_info *info; 458 struct hugetlbfs_inode_info *info;
459 inode->i_ino = get_next_ino();
459 inode->i_mode = mode; 460 inode->i_mode = mode;
460 inode->i_uid = uid; 461 inode->i_uid = uid;
461 inode->i_gid = gid; 462 inode->i_gid = gid;
diff --git a/fs/inode.c b/fs/inode.c
index 56d909d69bc8..ae2727ab0c3a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -29,7 +29,6 @@
29/* 29/*
30 * This is needed for the following functions: 30 * This is needed for the following functions:
31 * - inode_has_buffers 31 * - inode_has_buffers
32 * - invalidate_inode_buffers
33 * - invalidate_bdev 32 * - invalidate_bdev
34 * 33 *
35 * FIXME: remove all knowledge of the buffer layer from this file 34 * FIXME: remove all knowledge of the buffer layer from this file
@@ -73,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly;
73 * allowing for low-overhead inode sync() operations. 72 * allowing for low-overhead inode sync() operations.
74 */ 73 */
75 74
76LIST_HEAD(inode_in_use); 75static LIST_HEAD(inode_lru);
77LIST_HEAD(inode_unused);
78static struct hlist_head *inode_hashtable __read_mostly; 76static struct hlist_head *inode_hashtable __read_mostly;
79 77
80/* 78/*
@@ -104,8 +102,41 @@ static DECLARE_RWSEM(iprune_sem);
104 */ 102 */
105struct inodes_stat_t inodes_stat; 103struct inodes_stat_t inodes_stat;
106 104
105static struct percpu_counter nr_inodes __cacheline_aligned_in_smp;
106static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;
107
107static struct kmem_cache *inode_cachep __read_mostly; 108static struct kmem_cache *inode_cachep __read_mostly;
108 109
110static inline int get_nr_inodes(void)
111{
112 return percpu_counter_sum_positive(&nr_inodes);
113}
114
115static inline int get_nr_inodes_unused(void)
116{
117 return percpu_counter_sum_positive(&nr_inodes_unused);
118}
119
120int get_nr_dirty_inodes(void)
121{
122 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
123 return nr_dirty > 0 ? nr_dirty : 0;
124
125}
126
127/*
128 * Handle nr_inode sysctl
129 */
130#ifdef CONFIG_SYSCTL
131int proc_nr_inodes(ctl_table *table, int write,
132 void __user *buffer, size_t *lenp, loff_t *ppos)
133{
134 inodes_stat.nr_inodes = get_nr_inodes();
135 inodes_stat.nr_unused = get_nr_inodes_unused();
136 return proc_dointvec(table, write, buffer, lenp, ppos);
137}
138#endif
139
109static void wake_up_inode(struct inode *inode) 140static void wake_up_inode(struct inode *inode)
110{ 141{
111 /* 142 /*
@@ -193,6 +224,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
193 inode->i_fsnotify_mask = 0; 224 inode->i_fsnotify_mask = 0;
194#endif 225#endif
195 226
227 percpu_counter_inc(&nr_inodes);
228
196 return 0; 229 return 0;
197out: 230out:
198 return -ENOMEM; 231 return -ENOMEM;
@@ -233,11 +266,13 @@ void __destroy_inode(struct inode *inode)
233 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) 266 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
234 posix_acl_release(inode->i_default_acl); 267 posix_acl_release(inode->i_default_acl);
235#endif 268#endif
269 percpu_counter_dec(&nr_inodes);
236} 270}
237EXPORT_SYMBOL(__destroy_inode); 271EXPORT_SYMBOL(__destroy_inode);
238 272
239void destroy_inode(struct inode *inode) 273static void destroy_inode(struct inode *inode)
240{ 274{
275 BUG_ON(!list_empty(&inode->i_lru));
241 __destroy_inode(inode); 276 __destroy_inode(inode);
242 if (inode->i_sb->s_op->destroy_inode) 277 if (inode->i_sb->s_op->destroy_inode)
243 inode->i_sb->s_op->destroy_inode(inode); 278 inode->i_sb->s_op->destroy_inode(inode);
@@ -256,6 +291,8 @@ void inode_init_once(struct inode *inode)
256 INIT_HLIST_NODE(&inode->i_hash); 291 INIT_HLIST_NODE(&inode->i_hash);
257 INIT_LIST_HEAD(&inode->i_dentry); 292 INIT_LIST_HEAD(&inode->i_dentry);
258 INIT_LIST_HEAD(&inode->i_devices); 293 INIT_LIST_HEAD(&inode->i_devices);
294 INIT_LIST_HEAD(&inode->i_wb_list);
295 INIT_LIST_HEAD(&inode->i_lru);
259 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 296 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
260 spin_lock_init(&inode->i_data.tree_lock); 297 spin_lock_init(&inode->i_data.tree_lock);
261 spin_lock_init(&inode->i_data.i_mmap_lock); 298 spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -282,14 +319,109 @@ static void init_once(void *foo)
282 */ 319 */
283void __iget(struct inode *inode) 320void __iget(struct inode *inode)
284{ 321{
285 if (atomic_inc_return(&inode->i_count) != 1) 322 atomic_inc(&inode->i_count);
286 return; 323}
324
325/*
326 * get additional reference to inode; caller must already hold one.
327 */
328void ihold(struct inode *inode)
329{
330 WARN_ON(atomic_inc_return(&inode->i_count) < 2);
331}
332EXPORT_SYMBOL(ihold);
333
334static void inode_lru_list_add(struct inode *inode)
335{
336 if (list_empty(&inode->i_lru)) {
337 list_add(&inode->i_lru, &inode_lru);
338 percpu_counter_inc(&nr_inodes_unused);
339 }
340}
287 341
288 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 342static void inode_lru_list_del(struct inode *inode)
289 list_move(&inode->i_list, &inode_in_use); 343{
290 inodes_stat.nr_unused--; 344 if (!list_empty(&inode->i_lru)) {
345 list_del_init(&inode->i_lru);
346 percpu_counter_dec(&nr_inodes_unused);
347 }
348}
349
350static inline void __inode_sb_list_add(struct inode *inode)
351{
352 list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
291} 353}
292 354
355/**
356 * inode_sb_list_add - add inode to the superblock list of inodes
357 * @inode: inode to add
358 */
359void inode_sb_list_add(struct inode *inode)
360{
361 spin_lock(&inode_lock);
362 __inode_sb_list_add(inode);
363 spin_unlock(&inode_lock);
364}
365EXPORT_SYMBOL_GPL(inode_sb_list_add);
366
367static inline void __inode_sb_list_del(struct inode *inode)
368{
369 list_del_init(&inode->i_sb_list);
370}
371
372static unsigned long hash(struct super_block *sb, unsigned long hashval)
373{
374 unsigned long tmp;
375
376 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
377 L1_CACHE_BYTES;
378 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
379 return tmp & I_HASHMASK;
380}
381
382/**
383 * __insert_inode_hash - hash an inode
384 * @inode: unhashed inode
385 * @hashval: unsigned long value used to locate this object in the
386 * inode_hashtable.
387 *
388 * Add an inode to the inode hash for this superblock.
389 */
390void __insert_inode_hash(struct inode *inode, unsigned long hashval)
391{
392 struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
393
394 spin_lock(&inode_lock);
395 hlist_add_head(&inode->i_hash, b);
396 spin_unlock(&inode_lock);
397}
398EXPORT_SYMBOL(__insert_inode_hash);
399
400/**
401 * __remove_inode_hash - remove an inode from the hash
402 * @inode: inode to unhash
403 *
404 * Remove an inode from the superblock.
405 */
406static void __remove_inode_hash(struct inode *inode)
407{
408 hlist_del_init(&inode->i_hash);
409}
410
411/**
412 * remove_inode_hash - remove an inode from the hash
413 * @inode: inode to unhash
414 *
415 * Remove an inode from the superblock.
416 */
417void remove_inode_hash(struct inode *inode)
418{
419 spin_lock(&inode_lock);
420 hlist_del_init(&inode->i_hash);
421 spin_unlock(&inode_lock);
422}
423EXPORT_SYMBOL(remove_inode_hash);
424
293void end_writeback(struct inode *inode) 425void end_writeback(struct inode *inode)
294{ 426{
295 might_sleep(); 427 might_sleep();
@@ -328,101 +460,113 @@ static void evict(struct inode *inode)
328 */ 460 */
329static void dispose_list(struct list_head *head) 461static void dispose_list(struct list_head *head)
330{ 462{
331 int nr_disposed = 0;
332
333 while (!list_empty(head)) { 463 while (!list_empty(head)) {
334 struct inode *inode; 464 struct inode *inode;
335 465
336 inode = list_first_entry(head, struct inode, i_list); 466 inode = list_first_entry(head, struct inode, i_lru);
337 list_del(&inode->i_list); 467 list_del_init(&inode->i_lru);
338 468
339 evict(inode); 469 evict(inode);
340 470
341 spin_lock(&inode_lock); 471 spin_lock(&inode_lock);
342 hlist_del_init(&inode->i_hash); 472 __remove_inode_hash(inode);
343 list_del_init(&inode->i_sb_list); 473 __inode_sb_list_del(inode);
344 spin_unlock(&inode_lock); 474 spin_unlock(&inode_lock);
345 475
346 wake_up_inode(inode); 476 wake_up_inode(inode);
347 destroy_inode(inode); 477 destroy_inode(inode);
348 nr_disposed++;
349 } 478 }
350 spin_lock(&inode_lock);
351 inodes_stat.nr_inodes -= nr_disposed;
352 spin_unlock(&inode_lock);
353} 479}
354 480
355/* 481/**
356 * Invalidate all inodes for a device. 482 * evict_inodes - evict all evictable inodes for a superblock
483 * @sb: superblock to operate on
484 *
485 * Make sure that no inodes with zero refcount are retained. This is
486 * called by superblock shutdown after having MS_ACTIVE flag removed,
487 * so any inode reaching zero refcount during or after that call will
488 * be immediately evicted.
357 */ 489 */
358static int invalidate_list(struct list_head *head, struct list_head *dispose) 490void evict_inodes(struct super_block *sb)
359{ 491{
360 struct list_head *next; 492 struct inode *inode, *next;
361 int busy = 0, count = 0; 493 LIST_HEAD(dispose);
362
363 next = head->next;
364 for (;;) {
365 struct list_head *tmp = next;
366 struct inode *inode;
367 494
368 /* 495 down_write(&iprune_sem);
369 * We can reschedule here without worrying about the list's
370 * consistency because the per-sb list of inodes must not
371 * change during umount anymore, and because iprune_sem keeps
372 * shrink_icache_memory() away.
373 */
374 cond_resched_lock(&inode_lock);
375 496
376 next = next->next; 497 spin_lock(&inode_lock);
377 if (tmp == head) 498 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
378 break; 499 if (atomic_read(&inode->i_count))
379 inode = list_entry(tmp, struct inode, i_sb_list);
380 if (inode->i_state & I_NEW)
381 continue; 500 continue;
382 invalidate_inode_buffers(inode); 501
383 if (!atomic_read(&inode->i_count)) { 502 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
384 list_move(&inode->i_list, dispose); 503 WARN_ON(1);
385 WARN_ON(inode->i_state & I_NEW);
386 inode->i_state |= I_FREEING;
387 count++;
388 continue; 504 continue;
389 } 505 }
390 busy = 1; 506
507 inode->i_state |= I_FREEING;
508
509 /*
510 * Move the inode off the IO lists and LRU once I_FREEING is
511 * set so that it won't get moved back on there if it is dirty.
512 */
513 list_move(&inode->i_lru, &dispose);
514 list_del_init(&inode->i_wb_list);
515 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
516 percpu_counter_dec(&nr_inodes_unused);
391 } 517 }
392 /* only unused inodes may be cached with i_count zero */ 518 spin_unlock(&inode_lock);
393 inodes_stat.nr_unused -= count; 519
394 return busy; 520 dispose_list(&dispose);
521 up_write(&iprune_sem);
395} 522}
396 523
397/** 524/**
398 * invalidate_inodes - discard the inodes on a device 525 * invalidate_inodes - attempt to free all inodes on a superblock
399 * @sb: superblock 526 * @sb: superblock to operate on
400 * 527 *
401 * Discard all of the inodes for a given superblock. If the discard 528 * Attempts to free all inodes for a given superblock. If there were any
402 * fails because there are busy inodes then a non zero value is returned. 529 * busy inodes return a non-zero value, else zero.
403 * If the discard is successful all the inodes have been discarded.
404 */ 530 */
405int invalidate_inodes(struct super_block *sb) 531int invalidate_inodes(struct super_block *sb)
406{ 532{
407 int busy; 533 int busy = 0;
408 LIST_HEAD(throw_away); 534 struct inode *inode, *next;
535 LIST_HEAD(dispose);
409 536
410 down_write(&iprune_sem); 537 down_write(&iprune_sem);
538
411 spin_lock(&inode_lock); 539 spin_lock(&inode_lock);
412 fsnotify_unmount_inodes(&sb->s_inodes); 540 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
413 busy = invalidate_list(&sb->s_inodes, &throw_away); 541 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
542 continue;
543 if (atomic_read(&inode->i_count)) {
544 busy = 1;
545 continue;
546 }
547
548 inode->i_state |= I_FREEING;
549
550 /*
551 * Move the inode off the IO lists and LRU once I_FREEING is
552 * set so that it won't get moved back on there if it is dirty.
553 */
554 list_move(&inode->i_lru, &dispose);
555 list_del_init(&inode->i_wb_list);
556 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
557 percpu_counter_dec(&nr_inodes_unused);
558 }
414 spin_unlock(&inode_lock); 559 spin_unlock(&inode_lock);
415 560
416 dispose_list(&throw_away); 561 dispose_list(&dispose);
417 up_write(&iprune_sem); 562 up_write(&iprune_sem);
418 563
419 return busy; 564 return busy;
420} 565}
421EXPORT_SYMBOL(invalidate_inodes);
422 566
423static int can_unuse(struct inode *inode) 567static int can_unuse(struct inode *inode)
424{ 568{
425 if (inode->i_state) 569 if (inode->i_state & ~I_REFERENCED)
426 return 0; 570 return 0;
427 if (inode_has_buffers(inode)) 571 if (inode_has_buffers(inode))
428 return 0; 572 return 0;
@@ -434,22 +578,24 @@ static int can_unuse(struct inode *inode)
434} 578}
435 579
436/* 580/*
437 * Scan `goal' inodes on the unused list for freeable ones. They are moved to 581 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
438 * a temporary list and then are freed outside inode_lock by dispose_list(). 582 * temporary list and then are freed outside inode_lock by dispose_list().
439 * 583 *
440 * Any inodes which are pinned purely because of attached pagecache have their 584 * Any inodes which are pinned purely because of attached pagecache have their
441 * pagecache removed. We expect the final iput() on that inode to add it to 585 * pagecache removed. If the inode has metadata buffers attached to
442 * the front of the inode_unused list. So look for it there and if the 586 * mapping->private_list then try to remove them.
443 * inode is still freeable, proceed. The right inode is found 99.9% of the
444 * time in testing on a 4-way.
445 * 587 *
446 * If the inode has metadata buffers attached to mapping->private_list then 588 * If the inode has the I_REFERENCED flag set, then it means that it has been
447 * try to remove them. 589 * used recently - the flag is set in iput_final(). When we encounter such an
590 * inode, clear the flag and move it to the back of the LRU so it gets another
591 * pass through the LRU before it gets reclaimed. This is necessary because of
592 * the fact we are doing lazy LRU updates to minimise lock contention so the
593 * LRU does not have strict ordering. Hence we don't want to reclaim inodes
594 * with this flag set because they are the inodes that are out of order.
448 */ 595 */
449static void prune_icache(int nr_to_scan) 596static void prune_icache(int nr_to_scan)
450{ 597{
451 LIST_HEAD(freeable); 598 LIST_HEAD(freeable);
452 int nr_pruned = 0;
453 int nr_scanned; 599 int nr_scanned;
454 unsigned long reap = 0; 600 unsigned long reap = 0;
455 601
@@ -458,13 +604,26 @@ static void prune_icache(int nr_to_scan)
458 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { 604 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
459 struct inode *inode; 605 struct inode *inode;
460 606
461 if (list_empty(&inode_unused)) 607 if (list_empty(&inode_lru))
462 break; 608 break;
463 609
464 inode = list_entry(inode_unused.prev, struct inode, i_list); 610 inode = list_entry(inode_lru.prev, struct inode, i_lru);
465 611
466 if (inode->i_state || atomic_read(&inode->i_count)) { 612 /*
467 list_move(&inode->i_list, &inode_unused); 613 * Referenced or dirty inodes are still in use. Give them
614 * another pass through the LRU as we canot reclaim them now.
615 */
616 if (atomic_read(&inode->i_count) ||
617 (inode->i_state & ~I_REFERENCED)) {
618 list_del_init(&inode->i_lru);
619 percpu_counter_dec(&nr_inodes_unused);
620 continue;
621 }
622
623 /* recently referenced inodes get one more pass */
624 if (inode->i_state & I_REFERENCED) {
625 list_move(&inode->i_lru, &inode_lru);
626 inode->i_state &= ~I_REFERENCED;
468 continue; 627 continue;
469 } 628 }
470 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 629 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
@@ -476,18 +635,23 @@ static void prune_icache(int nr_to_scan)
476 iput(inode); 635 iput(inode);
477 spin_lock(&inode_lock); 636 spin_lock(&inode_lock);
478 637
479 if (inode != list_entry(inode_unused.next, 638 if (inode != list_entry(inode_lru.next,
480 struct inode, i_list)) 639 struct inode, i_lru))
481 continue; /* wrong inode or list_empty */ 640 continue; /* wrong inode or list_empty */
482 if (!can_unuse(inode)) 641 if (!can_unuse(inode))
483 continue; 642 continue;
484 } 643 }
485 list_move(&inode->i_list, &freeable);
486 WARN_ON(inode->i_state & I_NEW); 644 WARN_ON(inode->i_state & I_NEW);
487 inode->i_state |= I_FREEING; 645 inode->i_state |= I_FREEING;
488 nr_pruned++; 646
647 /*
648 * Move the inode off the IO lists and LRU once I_FREEING is
649 * set so that it won't get moved back on there if it is dirty.
650 */
651 list_move(&inode->i_lru, &freeable);
652 list_del_init(&inode->i_wb_list);
653 percpu_counter_dec(&nr_inodes_unused);
489 } 654 }
490 inodes_stat.nr_unused -= nr_pruned;
491 if (current_is_kswapd()) 655 if (current_is_kswapd())
492 __count_vm_events(KSWAPD_INODESTEAL, reap); 656 __count_vm_events(KSWAPD_INODESTEAL, reap);
493 else 657 else
@@ -519,7 +683,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
519 return -1; 683 return -1;
520 prune_icache(nr); 684 prune_icache(nr);
521 } 685 }
522 return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 686 return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
523} 687}
524 688
525static struct shrinker icache_shrinker = { 689static struct shrinker icache_shrinker = {
@@ -530,9 +694,6 @@ static struct shrinker icache_shrinker = {
530static void __wait_on_freeing_inode(struct inode *inode); 694static void __wait_on_freeing_inode(struct inode *inode);
531/* 695/*
532 * Called with the inode lock held. 696 * Called with the inode lock held.
533 * NOTE: we are not increasing the inode-refcount, you must call __iget()
534 * by hand after calling find_inode now! This simplifies iunique and won't
535 * add any additional branch in the common code.
536 */ 697 */
537static struct inode *find_inode(struct super_block *sb, 698static struct inode *find_inode(struct super_block *sb,
538 struct hlist_head *head, 699 struct hlist_head *head,
@@ -552,9 +713,10 @@ repeat:
552 __wait_on_freeing_inode(inode); 713 __wait_on_freeing_inode(inode);
553 goto repeat; 714 goto repeat;
554 } 715 }
555 break; 716 __iget(inode);
717 return inode;
556 } 718 }
557 return node ? inode : NULL; 719 return NULL;
558} 720}
559 721
560/* 722/*
@@ -577,53 +739,49 @@ repeat:
577 __wait_on_freeing_inode(inode); 739 __wait_on_freeing_inode(inode);
578 goto repeat; 740 goto repeat;
579 } 741 }
580 break; 742 __iget(inode);
743 return inode;
581 } 744 }
582 return node ? inode : NULL; 745 return NULL;
583}
584
585static unsigned long hash(struct super_block *sb, unsigned long hashval)
586{
587 unsigned long tmp;
588
589 tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
590 L1_CACHE_BYTES;
591 tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
592 return tmp & I_HASHMASK;
593}
594
595static inline void
596__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
597 struct inode *inode)
598{
599 inodes_stat.nr_inodes++;
600 list_add(&inode->i_list, &inode_in_use);
601 list_add(&inode->i_sb_list, &sb->s_inodes);
602 if (head)
603 hlist_add_head(&inode->i_hash, head);
604} 746}
605 747
606/** 748/*
607 * inode_add_to_lists - add a new inode to relevant lists 749 * Each cpu owns a range of LAST_INO_BATCH numbers.
608 * @sb: superblock inode belongs to 750 * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
609 * @inode: inode to mark in use 751 * to renew the exhausted range.
610 * 752 *
611 * When an inode is allocated it needs to be accounted for, added to the in use 753 * This does not significantly increase overflow rate because every CPU can
612 * list, the owning superblock and the inode hash. This needs to be done under 754 * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
613 * the inode_lock, so export a function to do this rather than the inode lock 755 * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
614 * itself. We calculate the hash list to add to here so it is all internal 756 * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
615 * which requires the caller to have already set up the inode number in the 757 * overflow rate by 2x, which does not seem too significant.
616 * inode to add. 758 *
759 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
760 * error if st_ino won't fit in target struct field. Use 32bit counter
761 * here to attempt to avoid that.
617 */ 762 */
618void inode_add_to_lists(struct super_block *sb, struct inode *inode) 763#define LAST_INO_BATCH 1024
764static DEFINE_PER_CPU(unsigned int, last_ino);
765
766unsigned int get_next_ino(void)
619{ 767{
620 struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino); 768 unsigned int *p = &get_cpu_var(last_ino);
769 unsigned int res = *p;
621 770
622 spin_lock(&inode_lock); 771#ifdef CONFIG_SMP
623 __inode_add_to_lists(sb, head, inode); 772 if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
624 spin_unlock(&inode_lock); 773 static atomic_t shared_last_ino;
774 int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
775
776 res = next - LAST_INO_BATCH;
777 }
778#endif
779
780 *p = ++res;
781 put_cpu_var(last_ino);
782 return res;
625} 783}
626EXPORT_SYMBOL_GPL(inode_add_to_lists); 784EXPORT_SYMBOL(get_next_ino);
627 785
628/** 786/**
629 * new_inode - obtain an inode 787 * new_inode - obtain an inode
@@ -639,12 +797,6 @@ EXPORT_SYMBOL_GPL(inode_add_to_lists);
639 */ 797 */
640struct inode *new_inode(struct super_block *sb) 798struct inode *new_inode(struct super_block *sb)
641{ 799{
642 /*
643 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
644 * error if st_ino won't fit in target struct field. Use 32bit counter
645 * here to attempt to avoid that.
646 */
647 static unsigned int last_ino;
648 struct inode *inode; 800 struct inode *inode;
649 801
650 spin_lock_prefetch(&inode_lock); 802 spin_lock_prefetch(&inode_lock);
@@ -652,8 +804,7 @@ struct inode *new_inode(struct super_block *sb)
652 inode = alloc_inode(sb); 804 inode = alloc_inode(sb);
653 if (inode) { 805 if (inode) {
654 spin_lock(&inode_lock); 806 spin_lock(&inode_lock);
655 __inode_add_to_lists(sb, NULL, inode); 807 __inode_sb_list_add(inode);
656 inode->i_ino = ++last_ino;
657 inode->i_state = 0; 808 inode->i_state = 0;
658 spin_unlock(&inode_lock); 809 spin_unlock(&inode_lock);
659 } 810 }
@@ -664,7 +815,7 @@ EXPORT_SYMBOL(new_inode);
664void unlock_new_inode(struct inode *inode) 815void unlock_new_inode(struct inode *inode)
665{ 816{
666#ifdef CONFIG_DEBUG_LOCK_ALLOC 817#ifdef CONFIG_DEBUG_LOCK_ALLOC
667 if (inode->i_mode & S_IFDIR) { 818 if (S_ISDIR(inode->i_mode)) {
668 struct file_system_type *type = inode->i_sb->s_type; 819 struct file_system_type *type = inode->i_sb->s_type;
669 820
670 /* Set new key only if filesystem hasn't already changed it */ 821 /* Set new key only if filesystem hasn't already changed it */
@@ -721,7 +872,8 @@ static struct inode *get_new_inode(struct super_block *sb,
721 if (set(inode, data)) 872 if (set(inode, data))
722 goto set_failed; 873 goto set_failed;
723 874
724 __inode_add_to_lists(sb, head, inode); 875 hlist_add_head(&inode->i_hash, head);
876 __inode_sb_list_add(inode);
725 inode->i_state = I_NEW; 877 inode->i_state = I_NEW;
726 spin_unlock(&inode_lock); 878 spin_unlock(&inode_lock);
727 879
@@ -736,7 +888,6 @@ static struct inode *get_new_inode(struct super_block *sb,
736 * us. Use the old inode instead of the one we just 888 * us. Use the old inode instead of the one we just
737 * allocated. 889 * allocated.
738 */ 890 */
739 __iget(old);
740 spin_unlock(&inode_lock); 891 spin_unlock(&inode_lock);
741 destroy_inode(inode); 892 destroy_inode(inode);
742 inode = old; 893 inode = old;
@@ -768,7 +919,8 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
768 old = find_inode_fast(sb, head, ino); 919 old = find_inode_fast(sb, head, ino);
769 if (!old) { 920 if (!old) {
770 inode->i_ino = ino; 921 inode->i_ino = ino;
771 __inode_add_to_lists(sb, head, inode); 922 hlist_add_head(&inode->i_hash, head);
923 __inode_sb_list_add(inode);
772 inode->i_state = I_NEW; 924 inode->i_state = I_NEW;
773 spin_unlock(&inode_lock); 925 spin_unlock(&inode_lock);
774 926
@@ -783,7 +935,6 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
783 * us. Use the old inode instead of the one we just 935 * us. Use the old inode instead of the one we just
784 * allocated. 936 * allocated.
785 */ 937 */
786 __iget(old);
787 spin_unlock(&inode_lock); 938 spin_unlock(&inode_lock);
788 destroy_inode(inode); 939 destroy_inode(inode);
789 inode = old; 940 inode = old;
@@ -792,6 +943,27 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
792 return inode; 943 return inode;
793} 944}
794 945
946/*
947 * search the inode cache for a matching inode number.
948 * If we find one, then the inode number we are trying to
949 * allocate is not unique and so we should not use it.
950 *
951 * Returns 1 if the inode number is unique, 0 if it is not.
952 */
953static int test_inode_iunique(struct super_block *sb, unsigned long ino)
954{
955 struct hlist_head *b = inode_hashtable + hash(sb, ino);
956 struct hlist_node *node;
957 struct inode *inode;
958
959 hlist_for_each_entry(inode, node, b, i_hash) {
960 if (inode->i_ino == ino && inode->i_sb == sb)
961 return 0;
962 }
963
964 return 1;
965}
966
795/** 967/**
796 * iunique - get a unique inode number 968 * iunique - get a unique inode number
797 * @sb: superblock 969 * @sb: superblock
@@ -813,19 +985,18 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
813 * error if st_ino won't fit in target struct field. Use 32bit counter 985 * error if st_ino won't fit in target struct field. Use 32bit counter
814 * here to attempt to avoid that. 986 * here to attempt to avoid that.
815 */ 987 */
988 static DEFINE_SPINLOCK(iunique_lock);
816 static unsigned int counter; 989 static unsigned int counter;
817 struct inode *inode;
818 struct hlist_head *head;
819 ino_t res; 990 ino_t res;
820 991
821 spin_lock(&inode_lock); 992 spin_lock(&inode_lock);
993 spin_lock(&iunique_lock);
822 do { 994 do {
823 if (counter <= max_reserved) 995 if (counter <= max_reserved)
824 counter = max_reserved + 1; 996 counter = max_reserved + 1;
825 res = counter++; 997 res = counter++;
826 head = inode_hashtable + hash(sb, res); 998 } while (!test_inode_iunique(sb, res));
827 inode = find_inode_fast(sb, head, res); 999 spin_unlock(&iunique_lock);
828 } while (inode != NULL);
829 spin_unlock(&inode_lock); 1000 spin_unlock(&inode_lock);
830 1001
831 return res; 1002 return res;
@@ -877,7 +1048,6 @@ static struct inode *ifind(struct super_block *sb,
877 spin_lock(&inode_lock); 1048 spin_lock(&inode_lock);
878 inode = find_inode(sb, head, test, data); 1049 inode = find_inode(sb, head, test, data);
879 if (inode) { 1050 if (inode) {
880 __iget(inode);
881 spin_unlock(&inode_lock); 1051 spin_unlock(&inode_lock);
882 if (likely(wait)) 1052 if (likely(wait))
883 wait_on_inode(inode); 1053 wait_on_inode(inode);
@@ -910,7 +1080,6 @@ static struct inode *ifind_fast(struct super_block *sb,
910 spin_lock(&inode_lock); 1080 spin_lock(&inode_lock);
911 inode = find_inode_fast(sb, head, ino); 1081 inode = find_inode_fast(sb, head, ino);
912 if (inode) { 1082 if (inode) {
913 __iget(inode);
914 spin_unlock(&inode_lock); 1083 spin_unlock(&inode_lock);
915 wait_on_inode(inode); 1084 wait_on_inode(inode);
916 return inode; 1085 return inode;
@@ -1096,7 +1265,7 @@ int insert_inode_locked(struct inode *inode)
1096 __iget(old); 1265 __iget(old);
1097 spin_unlock(&inode_lock); 1266 spin_unlock(&inode_lock);
1098 wait_on_inode(old); 1267 wait_on_inode(old);
1099 if (unlikely(!hlist_unhashed(&old->i_hash))) { 1268 if (unlikely(!inode_unhashed(old))) {
1100 iput(old); 1269 iput(old);
1101 return -EBUSY; 1270 return -EBUSY;
1102 } 1271 }
@@ -1135,7 +1304,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1135 __iget(old); 1304 __iget(old);
1136 spin_unlock(&inode_lock); 1305 spin_unlock(&inode_lock);
1137 wait_on_inode(old); 1306 wait_on_inode(old);
1138 if (unlikely(!hlist_unhashed(&old->i_hash))) { 1307 if (unlikely(!inode_unhashed(old))) {
1139 iput(old); 1308 iput(old);
1140 return -EBUSY; 1309 return -EBUSY;
1141 } 1310 }
@@ -1144,36 +1313,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1144} 1313}
1145EXPORT_SYMBOL(insert_inode_locked4); 1314EXPORT_SYMBOL(insert_inode_locked4);
1146 1315
1147/**
1148 * __insert_inode_hash - hash an inode
1149 * @inode: unhashed inode
1150 * @hashval: unsigned long value used to locate this object in the
1151 * inode_hashtable.
1152 *
1153 * Add an inode to the inode hash for this superblock.
1154 */
1155void __insert_inode_hash(struct inode *inode, unsigned long hashval)
1156{
1157 struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
1158 spin_lock(&inode_lock);
1159 hlist_add_head(&inode->i_hash, head);
1160 spin_unlock(&inode_lock);
1161}
1162EXPORT_SYMBOL(__insert_inode_hash);
1163
1164/**
1165 * remove_inode_hash - remove an inode from the hash
1166 * @inode: inode to unhash
1167 *
1168 * Remove an inode from the superblock.
1169 */
1170void remove_inode_hash(struct inode *inode)
1171{
1172 spin_lock(&inode_lock);
1173 hlist_del_init(&inode->i_hash);
1174 spin_unlock(&inode_lock);
1175}
1176EXPORT_SYMBOL(remove_inode_hash);
1177 1316
1178int generic_delete_inode(struct inode *inode) 1317int generic_delete_inode(struct inode *inode)
1179{ 1318{
@@ -1188,7 +1327,7 @@ EXPORT_SYMBOL(generic_delete_inode);
1188 */ 1327 */
1189int generic_drop_inode(struct inode *inode) 1328int generic_drop_inode(struct inode *inode)
1190{ 1329{
1191 return !inode->i_nlink || hlist_unhashed(&inode->i_hash); 1330 return !inode->i_nlink || inode_unhashed(inode);
1192} 1331}
1193EXPORT_SYMBOL_GPL(generic_drop_inode); 1332EXPORT_SYMBOL_GPL(generic_drop_inode);
1194 1333
@@ -1214,10 +1353,11 @@ static void iput_final(struct inode *inode)
1214 drop = generic_drop_inode(inode); 1353 drop = generic_drop_inode(inode);
1215 1354
1216 if (!drop) { 1355 if (!drop) {
1217 if (!(inode->i_state & (I_DIRTY|I_SYNC)))
1218 list_move(&inode->i_list, &inode_unused);
1219 inodes_stat.nr_unused++;
1220 if (sb->s_flags & MS_ACTIVE) { 1356 if (sb->s_flags & MS_ACTIVE) {
1357 inode->i_state |= I_REFERENCED;
1358 if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
1359 inode_lru_list_add(inode);
1360 }
1221 spin_unlock(&inode_lock); 1361 spin_unlock(&inode_lock);
1222 return; 1362 return;
1223 } 1363 }
@@ -1228,19 +1368,23 @@ static void iput_final(struct inode *inode)
1228 spin_lock(&inode_lock); 1368 spin_lock(&inode_lock);
1229 WARN_ON(inode->i_state & I_NEW); 1369 WARN_ON(inode->i_state & I_NEW);
1230 inode->i_state &= ~I_WILL_FREE; 1370 inode->i_state &= ~I_WILL_FREE;
1231 inodes_stat.nr_unused--; 1371 __remove_inode_hash(inode);
1232 hlist_del_init(&inode->i_hash);
1233 } 1372 }
1234 list_del_init(&inode->i_list); 1373
1235 list_del_init(&inode->i_sb_list);
1236 WARN_ON(inode->i_state & I_NEW); 1374 WARN_ON(inode->i_state & I_NEW);
1237 inode->i_state |= I_FREEING; 1375 inode->i_state |= I_FREEING;
1238 inodes_stat.nr_inodes--; 1376
1377 /*
1378 * Move the inode off the IO lists and LRU once I_FREEING is
1379 * set so that it won't get moved back on there if it is dirty.
1380 */
1381 inode_lru_list_del(inode);
1382 list_del_init(&inode->i_wb_list);
1383
1384 __inode_sb_list_del(inode);
1239 spin_unlock(&inode_lock); 1385 spin_unlock(&inode_lock);
1240 evict(inode); 1386 evict(inode);
1241 spin_lock(&inode_lock); 1387 remove_inode_hash(inode);
1242 hlist_del_init(&inode->i_hash);
1243 spin_unlock(&inode_lock);
1244 wake_up_inode(inode); 1388 wake_up_inode(inode);
1245 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); 1389 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
1246 destroy_inode(inode); 1390 destroy_inode(inode);
@@ -1504,6 +1648,8 @@ void __init inode_init(void)
1504 SLAB_MEM_SPREAD), 1648 SLAB_MEM_SPREAD),
1505 init_once); 1649 init_once);
1506 register_shrinker(&icache_shrinker); 1650 register_shrinker(&icache_shrinker);
1651 percpu_counter_init(&nr_inodes, 0);
1652 percpu_counter_init(&nr_inodes_unused, 0);
1507 1653
1508 /* Hash may have been set up in inode_init_early */ 1654 /* Hash may have been set up in inode_init_early */
1509 if (!hashdist) 1655 if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h
index a6910e91cee8..ebad3b90752d 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -101,3 +101,10 @@ extern void put_super(struct super_block *sb);
101struct nameidata; 101struct nameidata;
102extern struct file *nameidata_to_filp(struct nameidata *); 102extern struct file *nameidata_to_filp(struct nameidata *);
103extern void release_open_intent(struct nameidata *); 103extern void release_open_intent(struct nameidata *);
104
105/*
106 * inode.c
107 */
108extern int get_nr_dirty_inodes(void);
109extern int evict_inodes(struct super_block *);
110extern int invalidate_inodes(struct super_block *);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 09ff41a752a0..60c2b944d762 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -962,25 +962,23 @@ static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf)
962 * or getblk() if they are not. Returns the number of blocks inserted 962 * or getblk() if they are not. Returns the number of blocks inserted
963 * (-ve == error.) 963 * (-ve == error.)
964 */ 964 */
965int isofs_get_blocks(struct inode *inode, sector_t iblock_s, 965int isofs_get_blocks(struct inode *inode, sector_t iblock,
966 struct buffer_head **bh, unsigned long nblocks) 966 struct buffer_head **bh, unsigned long nblocks)
967{ 967{
968 unsigned long b_off; 968 unsigned long b_off = iblock;
969 unsigned offset, sect_size; 969 unsigned offset, sect_size;
970 unsigned int firstext; 970 unsigned int firstext;
971 unsigned long nextblk, nextoff; 971 unsigned long nextblk, nextoff;
972 long iblock = (long)iblock_s;
973 int section, rv, error; 972 int section, rv, error;
974 struct iso_inode_info *ei = ISOFS_I(inode); 973 struct iso_inode_info *ei = ISOFS_I(inode);
975 974
976 error = -EIO; 975 error = -EIO;
977 rv = 0; 976 rv = 0;
978 if (iblock < 0 || iblock != iblock_s) { 977 if (iblock != b_off) {
979 printk(KERN_DEBUG "%s: block number too large\n", __func__); 978 printk(KERN_DEBUG "%s: block number too large\n", __func__);
980 goto abort; 979 goto abort;
981 } 980 }
982 981
983 b_off = iblock;
984 982
985 offset = 0; 983 offset = 0;
986 firstext = ei->i_first_extent; 984 firstext = ei->i_first_extent;
@@ -998,8 +996,9 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
998 * I/O errors. 996 * I/O errors.
999 */ 997 */
1000 if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) { 998 if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
1001 printk(KERN_DEBUG "%s: block >= EOF (%ld, %ld)\n", 999 printk(KERN_DEBUG "%s: block >= EOF (%lu, %llu)\n",
1002 __func__, iblock, (unsigned long) inode->i_size); 1000 __func__, b_off,
1001 (unsigned long long)inode->i_size);
1003 goto abort; 1002 goto abort;
1004 } 1003 }
1005 1004
@@ -1025,9 +1024,9 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
1025 if (++section > 100) { 1024 if (++section > 100) {
1026 printk(KERN_DEBUG "%s: More than 100 file sections ?!?" 1025 printk(KERN_DEBUG "%s: More than 100 file sections ?!?"
1027 " aborting...\n", __func__); 1026 " aborting...\n", __func__);
1028 printk(KERN_DEBUG "%s: block=%ld firstext=%u sect_size=%u " 1027 printk(KERN_DEBUG "%s: block=%lu firstext=%u sect_size=%u "
1029 "nextblk=%lu nextoff=%lu\n", __func__, 1028 "nextblk=%lu nextoff=%lu\n", __func__,
1030 iblock, firstext, (unsigned) sect_size, 1029 b_off, firstext, (unsigned) sect_size,
1031 nextblk, nextoff); 1030 nextblk, nextoff);
1032 goto abort; 1031 goto abort;
1033 } 1032 }
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index ed78a3cf3cb0..79121aa5858b 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -289,7 +289,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
289 mutex_unlock(&f->sem); 289 mutex_unlock(&f->sem);
290 d_instantiate(dentry, old_dentry->d_inode); 290 d_instantiate(dentry, old_dentry->d_inode);
291 dir_i->i_mtime = dir_i->i_ctime = ITIME(now); 291 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
292 atomic_inc(&old_dentry->d_inode->i_count); 292 ihold(old_dentry->d_inode);
293 } 293 }
294 return ret; 294 return ret;
295} 295}
@@ -864,7 +864,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
864 printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret); 864 printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
865 /* Might as well let the VFS know */ 865 /* Might as well let the VFS know */
866 d_instantiate(new_dentry, old_dentry->d_inode); 866 d_instantiate(new_dentry, old_dentry->d_inode);
867 atomic_inc(&old_dentry->d_inode->i_count); 867 ihold(old_dentry->d_inode);
868 new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now); 868 new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
869 return ret; 869 return ret;
870 } 870 }
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index f8332dc8eeb2..3a09423b6c22 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -497,7 +497,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
497 * appear hashed, but do not put on any lists. hlist_del() 497 * appear hashed, but do not put on any lists. hlist_del()
498 * will work fine and require no locking. 498 * will work fine and require no locking.
499 */ 499 */
500 ip->i_hash.pprev = &ip->i_hash.next; 500 hlist_add_fake(&ip->i_hash);
501 501
502 return (ip); 502 return (ip);
503} 503}
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index d945ea76b445..9466957ec841 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1279,7 +1279,7 @@ int txCommit(tid_t tid, /* transaction identifier */
1279 * lazy commit thread finishes processing 1279 * lazy commit thread finishes processing
1280 */ 1280 */
1281 if (tblk->xflag & COMMIT_DELETE) { 1281 if (tblk->xflag & COMMIT_DELETE) {
1282 atomic_inc(&tblk->u.ip->i_count); 1282 ihold(tblk->u.ip);
1283 /* 1283 /*
1284 * Avoid a rare deadlock 1284 * Avoid a rare deadlock
1285 * 1285 *
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index a9cf8e8675be..231ca4af9bce 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -839,7 +839,7 @@ static int jfs_link(struct dentry *old_dentry,
839 ip->i_ctime = CURRENT_TIME; 839 ip->i_ctime = CURRENT_TIME;
840 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 840 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
841 mark_inode_dirty(dir); 841 mark_inode_dirty(dir);
842 atomic_inc(&ip->i_count); 842 ihold(ip);
843 843
844 iplist[0] = ip; 844 iplist[0] = ip;
845 iplist[1] = dir; 845 iplist[1] = dir;
diff --git a/fs/libfs.c b/fs/libfs.c
index 62baa0387d6e..304a5132ca27 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -255,7 +255,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
255 255
256 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 256 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
257 inc_nlink(inode); 257 inc_nlink(inode);
258 atomic_inc(&inode->i_count); 258 ihold(inode);
259 dget(dentry); 259 dget(dentry);
260 d_instantiate(dentry, inode); 260 d_instantiate(dentry, inode);
261 return 0; 261 return 0;
@@ -892,10 +892,6 @@ EXPORT_SYMBOL_GPL(generic_fh_to_parent);
892 */ 892 */
893int generic_file_fsync(struct file *file, int datasync) 893int generic_file_fsync(struct file *file, int datasync)
894{ 894{
895 struct writeback_control wbc = {
896 .sync_mode = WB_SYNC_ALL,
897 .nr_to_write = 0, /* metadata-only; caller takes care of data */
898 };
899 struct inode *inode = file->f_mapping->host; 895 struct inode *inode = file->f_mapping->host;
900 int err; 896 int err;
901 int ret; 897 int ret;
@@ -906,7 +902,7 @@ int generic_file_fsync(struct file *file, int datasync)
906 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 902 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
907 return ret; 903 return ret;
908 904
909 err = sync_inode(inode, &wbc); 905 err = sync_inode_metadata(inode, 1);
910 if (ret == 0) 906 if (ret == 0)
911 ret = err; 907 ret = err;
912 return ret; 908 return ret;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 1eb4e89e045b..409dfd65e9a1 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -569,7 +569,7 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
569 return -EMLINK; 569 return -EMLINK;
570 570
571 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 571 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
572 atomic_inc(&inode->i_count); 572 ihold(inode);
573 inode->i_nlink++; 573 inode->i_nlink++;
574 mark_inode_dirty_sync(inode); 574 mark_inode_dirty_sync(inode);
575 575
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index f3f3578393a4..c0d35a3accef 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -101,7 +101,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
101 101
102 inode->i_ctime = CURRENT_TIME_SEC; 102 inode->i_ctime = CURRENT_TIME_SEC;
103 inode_inc_link_count(inode); 103 inode_inc_link_count(inode);
104 atomic_inc(&inode->i_count); 104 ihold(inode);
105 return add_nondir(dentry, inode); 105 return add_nondir(dentry, inode);
106} 106}
107 107
diff --git a/fs/namei.c b/fs/namei.c
index 24896e833565..f7dbc06857ab 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1121,11 +1121,13 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1121static struct dentry *__lookup_hash(struct qstr *name, 1121static struct dentry *__lookup_hash(struct qstr *name,
1122 struct dentry *base, struct nameidata *nd) 1122 struct dentry *base, struct nameidata *nd)
1123{ 1123{
1124 struct inode *inode = base->d_inode;
1124 struct dentry *dentry; 1125 struct dentry *dentry;
1125 struct inode *inode;
1126 int err; 1126 int err;
1127 1127
1128 inode = base->d_inode; 1128 err = exec_permission(inode);
1129 if (err)
1130 return ERR_PTR(err);
1129 1131
1130 /* 1132 /*
1131 * See if the low-level filesystem might want 1133 * See if the low-level filesystem might want
@@ -1161,11 +1163,6 @@ out:
1161 */ 1163 */
1162static struct dentry *lookup_hash(struct nameidata *nd) 1164static struct dentry *lookup_hash(struct nameidata *nd)
1163{ 1165{
1164 int err;
1165
1166 err = exec_permission(nd->path.dentry->d_inode);
1167 if (err)
1168 return ERR_PTR(err);
1169 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1166 return __lookup_hash(&nd->last, nd->path.dentry, nd);
1170} 1167}
1171 1168
@@ -1213,9 +1210,6 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1213 if (err) 1210 if (err)
1214 return ERR_PTR(err); 1211 return ERR_PTR(err);
1215 1212
1216 err = exec_permission(base->d_inode);
1217 if (err)
1218 return ERR_PTR(err);
1219 return __lookup_hash(&this, base, NULL); 1213 return __lookup_hash(&this, base, NULL);
1220} 1214}
1221 1215
@@ -2291,7 +2285,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2291 goto slashes; 2285 goto slashes;
2292 inode = dentry->d_inode; 2286 inode = dentry->d_inode;
2293 if (inode) 2287 if (inode)
2294 atomic_inc(&inode->i_count); 2288 ihold(inode);
2295 error = mnt_want_write(nd.path.mnt); 2289 error = mnt_want_write(nd.path.mnt);
2296 if (error) 2290 if (error)
2297 goto exit2; 2291 goto exit2;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7ca5182c0bed..8a415c9c5e55 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -595,7 +595,7 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
595 goto out_free; 595 goto out_free;
596 } 596 }
597 597
598 mnt->mnt_flags = old->mnt_flags; 598 mnt->mnt_flags = old->mnt_flags & ~MNT_WRITE_HOLD;
599 atomic_inc(&sb->s_active); 599 atomic_inc(&sb->s_active);
600 mnt->mnt_sb = sb; 600 mnt->mnt_sb = sb;
601 mnt->mnt_root = dget(root); 601 mnt->mnt_root = dget(root);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 257e4052492e..07ac3847e562 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1801,7 +1801,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1801 d_drop(dentry); 1801 d_drop(dentry);
1802 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); 1802 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
1803 if (error == 0) { 1803 if (error == 0) {
1804 atomic_inc(&inode->i_count); 1804 ihold(inode);
1805 d_add(dentry, inode); 1805 d_add(dentry, inode);
1806 } 1806 }
1807 return error; 1807 return error;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index a70e446e1605..ac7b814ce162 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -54,8 +54,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
54 iput(inode); 54 iput(inode);
55 return -ENOMEM; 55 return -ENOMEM;
56 } 56 }
57 /* Circumvent igrab(): we know the inode is not being freed */ 57 ihold(inode);
58 atomic_inc(&inode->i_count);
59 /* 58 /*
60 * Ensure that this dentry is invisible to d_find_alias(). 59 * Ensure that this dentry is invisible to d_find_alias().
61 * Otherwise, it may be spliced into the tree by 60 * Otherwise, it may be spliced into the tree by
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 661a6cf8e826..184938fcff04 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -281,23 +281,13 @@ commit_metadata(struct svc_fh *fhp)
281{ 281{
282 struct inode *inode = fhp->fh_dentry->d_inode; 282 struct inode *inode = fhp->fh_dentry->d_inode;
283 const struct export_operations *export_ops = inode->i_sb->s_export_op; 283 const struct export_operations *export_ops = inode->i_sb->s_export_op;
284 int error = 0;
285 284
286 if (!EX_ISSYNC(fhp->fh_export)) 285 if (!EX_ISSYNC(fhp->fh_export))
287 return 0; 286 return 0;
288 287
289 if (export_ops->commit_metadata) { 288 if (export_ops->commit_metadata)
290 error = export_ops->commit_metadata(inode); 289 return export_ops->commit_metadata(inode);
291 } else { 290 return sync_inode_metadata(inode, 1);
292 struct writeback_control wbc = {
293 .sync_mode = WB_SYNC_ALL,
294 .nr_to_write = 0, /* metadata only */
295 };
296
297 error = sync_inode(inode, &wbc);
298 }
299
300 return error;
301} 291}
302 292
303/* 293/*
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 185d1607cb00..6e9557ecf161 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -207,7 +207,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
207 207
208 inode->i_ctime = CURRENT_TIME; 208 inode->i_ctime = CURRENT_TIME;
209 inode_inc_link_count(inode); 209 inode_inc_link_count(inode);
210 atomic_inc(&inode->i_count); 210 ihold(inode);
211 211
212 err = nilfs_add_nondir(dentry, inode); 212 err = nilfs_add_nondir(dentry, inode);
213 if (!err) 213 if (!err)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 36802420d69a..4498a208df94 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -88,8 +88,6 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
88{ 88{
89 struct dentry *parent; 89 struct dentry *parent;
90 struct inode *p_inode; 90 struct inode *p_inode;
91 bool send = false;
92 bool should_update_children = false;
93 91
94 if (!dentry) 92 if (!dentry)
95 dentry = path->dentry; 93 dentry = path->dentry;
@@ -97,29 +95,12 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
97 if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) 95 if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
98 return; 96 return;
99 97
100 spin_lock(&dentry->d_lock); 98 parent = dget_parent(dentry);
101 parent = dentry->d_parent;
102 p_inode = parent->d_inode; 99 p_inode = parent->d_inode;
103 100
104 if (fsnotify_inode_watches_children(p_inode)) { 101 if (unlikely(!fsnotify_inode_watches_children(p_inode)))
105 if (p_inode->i_fsnotify_mask & mask) { 102 __fsnotify_update_child_dentry_flags(p_inode);
106 dget(parent); 103 else if (p_inode->i_fsnotify_mask & mask) {
107 send = true;
108 }
109 } else {
110 /*
111 * The parent doesn't care about events on it's children but
112 * at least one child thought it did. We need to run all the
113 * children and update their d_flags to let them know p_inode
114 * doesn't care about them any more.
115 */
116 dget(parent);
117 should_update_children = true;
118 }
119
120 spin_unlock(&dentry->d_lock);
121
122 if (send) {
123 /* we are notifying a parent so come up with the new mask which 104 /* we are notifying a parent so come up with the new mask which
124 * specifies these are events which came from a child. */ 105 * specifies these are events which came from a child. */
125 mask |= FS_EVENT_ON_CHILD; 106 mask |= FS_EVENT_ON_CHILD;
@@ -130,13 +111,9 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
130 else 111 else
131 fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, 112 fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
132 dentry->d_name.name, 0); 113 dentry->d_name.name, 0);
133 dput(parent);
134 } 114 }
135 115
136 if (unlikely(should_update_children)) { 116 dput(parent);
137 __fsnotify_update_child_dentry_flags(p_inode);
138 dput(parent);
139 }
140} 117}
141EXPORT_SYMBOL_GPL(__fsnotify_parent); 118EXPORT_SYMBOL_GPL(__fsnotify_parent);
142 119
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 33297c005060..21ed10660b80 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -240,6 +240,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
240{ 240{
241 struct inode *inode, *next_i, *need_iput = NULL; 241 struct inode *inode, *next_i, *need_iput = NULL;
242 242
243 spin_lock(&inode_lock);
243 list_for_each_entry_safe(inode, next_i, list, i_sb_list) { 244 list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
244 struct inode *need_iput_tmp; 245 struct inode *need_iput_tmp;
245 246
@@ -297,4 +298,5 @@ void fsnotify_unmount_inodes(struct list_head *list)
297 298
298 spin_lock(&inode_lock); 299 spin_lock(&inode_lock);
299 } 300 }
301 spin_unlock(&inode_lock);
300} 302}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 19c5180f8a28..d3fbe5730bfc 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2911,8 +2911,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
2911 goto unl_upcase_iput_tmp_ino_err_out_now; 2911 goto unl_upcase_iput_tmp_ino_err_out_now;
2912 } 2912 }
2913 if ((sb->s_root = d_alloc_root(vol->root_ino))) { 2913 if ((sb->s_root = d_alloc_root(vol->root_ino))) {
2914 /* We increment i_count simulating an ntfs_iget(). */ 2914 /* We grab a reference, simulating an ntfs_iget(). */
2915 atomic_inc(&vol->root_ino->i_count); 2915 ihold(vol->root_ino);
2916 ntfs_debug("Exiting, status successful."); 2916 ntfs_debug("Exiting, status successful.");
2917 /* Release the default upcase if it has no users. */ 2917 /* Release the default upcase if it has no users. */
2918 mutex_lock(&ntfs_lock); 2918 mutex_lock(&ntfs_lock);
@@ -3021,21 +3021,6 @@ iput_tmp_ino_err_out_now:
3021 if (vol->mft_ino && vol->mft_ino != tmp_ino) 3021 if (vol->mft_ino && vol->mft_ino != tmp_ino)
3022 iput(vol->mft_ino); 3022 iput(vol->mft_ino);
3023 vol->mft_ino = NULL; 3023 vol->mft_ino = NULL;
3024 /*
3025 * This is needed to get ntfs_clear_extent_inode() called for each
3026 * inode we have ever called ntfs_iget()/iput() on, otherwise we A)
3027 * leak resources and B) a subsequent mount fails automatically due to
3028 * ntfs_iget() never calling down into our ntfs_read_locked_inode()
3029 * method again... FIXME: Do we need to do this twice now because of
3030 * attribute inodes? I think not, so leave as is for now... (AIA)
3031 */
3032 if (invalidate_inodes(sb)) {
3033 ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
3034 "driver bug.");
3035 /* Copied from fs/super.c. I just love this message. (-; */
3036 printk("NTFS: Busy inodes after umount. Self-destruct in 5 "
3037 "seconds. Have a nice day...\n");
3038 }
3039 /* Errors at this stage are irrelevant. */ 3024 /* Errors at this stage are irrelevant. */
3040err_out_now: 3025err_out_now:
3041 sb->s_fs_info = NULL; 3026 sb->s_fs_info = NULL;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 5cfeee118158..f1e962cb3b73 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -165,7 +165,7 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
165 * ocfs2 never allocates in this function - the only time we 165 * ocfs2 never allocates in this function - the only time we
166 * need to use BH_New is when we're extending i_size on a file 166 * need to use BH_New is when we're extending i_size on a file
167 * system which doesn't support holes, in which case BH_New 167 * system which doesn't support holes, in which case BH_New
168 * allows block_prepare_write() to zero. 168 * allows __block_write_begin() to zero.
169 * 169 *
170 * If we see this on a sparse file system, then a truncate has 170 * If we see this on a sparse file system, then a truncate has
171 * raced us and removed the cluster. In this case, we clear 171 * raced us and removed the cluster. In this case, we clear
@@ -407,21 +407,6 @@ static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)
407 return ret; 407 return ret;
408} 408}
409 409
410/*
411 * This is called from ocfs2_write_zero_page() which has handled it's
412 * own cluster locking and has ensured allocation exists for those
413 * blocks to be written.
414 */
415int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
416 unsigned from, unsigned to)
417{
418 int ret;
419
420 ret = block_prepare_write(page, from, to, ocfs2_get_block);
421
422 return ret;
423}
424
425/* Taken from ext3. We don't necessarily need the full blown 410/* Taken from ext3. We don't necessarily need the full blown
426 * functionality yet, but IMHO it's better to cut and paste the whole 411 * functionality yet, but IMHO it's better to cut and paste the whole
427 * thing so we can avoid introducing our own bugs (and easily pick up 412 * thing so we can avoid introducing our own bugs (and easily pick up
@@ -732,7 +717,7 @@ static int ocfs2_should_read_blk(struct inode *inode, struct page *page,
732} 717}
733 718
734/* 719/*
735 * Some of this taken from block_prepare_write(). We already have our 720 * Some of this taken from __block_write_begin(). We already have our
736 * mapping by now though, and the entire write will be allocating or 721 * mapping by now though, and the entire write will be allocating or
737 * it won't, so not much need to use BH_New. 722 * it won't, so not much need to use BH_New.
738 * 723 *
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 7606f663da6d..76bfdfda691a 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,9 +22,6 @@
22#ifndef OCFS2_AOPS_H 22#ifndef OCFS2_AOPS_H
23#define OCFS2_AOPS_H 23#define OCFS2_AOPS_H
24 24
25int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
26 unsigned from, unsigned to);
27
28handle_t *ocfs2_start_walk_page_trans(struct inode *inode, 25handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
29 struct page *page, 26 struct page *page,
30 unsigned from, 27 unsigned from,
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index a7ebd9d42dc8..75e115f1bd73 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -400,6 +400,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
400 if (inode) { 400 if (inode) {
401 ip = DLMFS_I(inode); 401 ip = DLMFS_I(inode);
402 402
403 inode->i_ino = get_next_ino();
403 inode->i_mode = mode; 404 inode->i_mode = mode;
404 inode->i_uid = current_fsuid(); 405 inode->i_uid = current_fsuid();
405 inode->i_gid = current_fsgid(); 406 inode->i_gid = current_fsgid();
@@ -425,6 +426,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
425 if (!inode) 426 if (!inode)
426 return NULL; 427 return NULL;
427 428
429 inode->i_ino = get_next_ino();
428 inode->i_mode = mode; 430 inode->i_mode = mode;
429 inode->i_uid = current_fsuid(); 431 inode->i_uid = current_fsuid();
430 inode->i_gid = current_fsgid(); 432 inode->i_gid = current_fsgid();
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 1ca6867935bb..77b4c04a2809 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -796,13 +796,12 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
796 block_end = block_start + (1 << inode->i_blkbits); 796 block_end = block_start + (1 << inode->i_blkbits);
797 797
798 /* 798 /*
799 * block_start is block-aligned. Bump it by one to 799 * block_start is block-aligned. Bump it by one to force
800 * force ocfs2_{prepare,commit}_write() to zero the 800 * __block_write_begin and block_commit_write to zero the
801 * whole block. 801 * whole block.
802 */ 802 */
803 ret = ocfs2_prepare_write_nolock(inode, page, 803 ret = __block_write_begin(page, block_start + 1, 0,
804 block_start + 1, 804 ocfs2_get_block);
805 block_start + 1);
806 if (ret < 0) { 805 if (ret < 0) {
807 mlog_errno(ret); 806 mlog_errno(ret);
808 goto out_unlock; 807 goto out_unlock;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index e7bde21149ae..ff5744e1e36f 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -742,7 +742,7 @@ static int ocfs2_link(struct dentry *old_dentry,
742 goto out_commit; 742 goto out_commit;
743 } 743 }
744 744
745 atomic_inc(&inode->i_count); 745 ihold(inode);
746 dentry->d_op = &ocfs2_dentry_ops; 746 dentry->d_op = &ocfs2_dentry_ops;
747 d_instantiate(dentry, inode); 747 d_instantiate(dentry, inode);
748 748
diff --git a/fs/pipe.c b/fs/pipe.c
index 37eb1ebeaa90..d2d7566ce68e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -954,6 +954,8 @@ static struct inode * get_pipe_inode(void)
954 if (!inode) 954 if (!inode)
955 goto fail_inode; 955 goto fail_inode;
956 956
957 inode->i_ino = get_next_ino();
958
957 pipe = alloc_pipe_info(inode); 959 pipe = alloc_pipe_info(inode);
958 if (!pipe) 960 if (!pipe)
959 goto fail_iput; 961 goto fail_iput;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 53dc8ad40ae6..9b094c1c8465 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -771,6 +771,8 @@ static const struct file_operations proc_single_file_operations = {
771static int mem_open(struct inode* inode, struct file* file) 771static int mem_open(struct inode* inode, struct file* file)
772{ 772{
773 file->private_data = (void*)((long)current->self_exec_id); 773 file->private_data = (void*)((long)current->self_exec_id);
774 /* OK to pass negative loff_t, we can catch out-of-range */
775 file->f_mode |= FMODE_UNSIGNED_OFFSET;
774 return 0; 776 return 0;
775} 777}
776 778
@@ -1646,6 +1648,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
1646 1648
1647 /* Common stuff */ 1649 /* Common stuff */
1648 ei = PROC_I(inode); 1650 ei = PROC_I(inode);
1651 inode->i_ino = get_next_ino();
1649 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1652 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1650 inode->i_op = &proc_def_inode_operations; 1653 inode->i_op = &proc_def_inode_operations;
1651 1654
@@ -2592,6 +2595,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2592 2595
2593 /* Initialize the inode */ 2596 /* Initialize the inode */
2594 ei = PROC_I(inode); 2597 ei = PROC_I(inode);
2598 inode->i_ino = get_next_ino();
2595 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 2599 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
2596 2600
2597 /* 2601 /*
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 2fc52552271d..b652cb00906b 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -23,6 +23,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
23 if (!inode) 23 if (!inode)
24 goto out; 24 goto out;
25 25
26 inode->i_ino = get_next_ino();
27
26 sysctl_head_get(head); 28 sysctl_head_get(head);
27 ei = PROC_I(inode); 29 ei = PROC_I(inode);
28 ei->sysctl = head; 30 ei->sysctl = head;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a5ebae70dc6d..67fadb1ad2c1 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -58,6 +58,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
58 struct inode * inode = new_inode(sb); 58 struct inode * inode = new_inode(sb);
59 59
60 if (inode) { 60 if (inode) {
61 inode->i_ino = get_next_ino();
61 inode_init_owner(inode, dir, mode); 62 inode_init_owner(inode, dir, mode);
62 inode->i_mapping->a_ops = &ramfs_aops; 63 inode->i_mapping->a_ops = &ramfs_aops;
63 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; 64 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
diff --git a/fs/read_write.c b/fs/read_write.c
index e757ef26e4ce..9cd9d148105d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -31,6 +31,20 @@ const struct file_operations generic_ro_fops = {
31 31
32EXPORT_SYMBOL(generic_ro_fops); 32EXPORT_SYMBOL(generic_ro_fops);
33 33
34static int
35__negative_fpos_check(struct file *file, loff_t pos, size_t count)
36{
37 /*
38 * pos or pos+count is negative here, check overflow.
39 * too big "count" will be caught in rw_verify_area().
40 */
41 if ((pos < 0) && (pos + count < pos))
42 return -EOVERFLOW;
43 if (file->f_mode & FMODE_UNSIGNED_OFFSET)
44 return 0;
45 return -EINVAL;
46}
47
34/** 48/**
35 * generic_file_llseek_unlocked - lockless generic llseek implementation 49 * generic_file_llseek_unlocked - lockless generic llseek implementation
36 * @file: file structure to seek on 50 * @file: file structure to seek on
@@ -62,7 +76,9 @@ generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
62 break; 76 break;
63 } 77 }
64 78
65 if (offset < 0 || offset > inode->i_sb->s_maxbytes) 79 if (offset < 0 && __negative_fpos_check(file, offset, 0))
80 return -EINVAL;
81 if (offset > inode->i_sb->s_maxbytes)
66 return -EINVAL; 82 return -EINVAL;
67 83
68 /* Special lock needed here? */ 84 /* Special lock needed here? */
@@ -137,7 +153,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
137 offset += file->f_pos; 153 offset += file->f_pos;
138 } 154 }
139 retval = -EINVAL; 155 retval = -EINVAL;
140 if (offset >= 0) { 156 if (offset >= 0 || !__negative_fpos_check(file, offset, 0)) {
141 if (offset != file->f_pos) { 157 if (offset != file->f_pos) {
142 file->f_pos = offset; 158 file->f_pos = offset;
143 file->f_version = 0; 159 file->f_version = 0;
@@ -221,6 +237,7 @@ bad:
221} 237}
222#endif 238#endif
223 239
240
224/* 241/*
225 * rw_verify_area doesn't like huge counts. We limit 242 * rw_verify_area doesn't like huge counts. We limit
226 * them to something that fits in "int" so that others 243 * them to something that fits in "int" so that others
@@ -238,8 +255,11 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
238 if (unlikely((ssize_t) count < 0)) 255 if (unlikely((ssize_t) count < 0))
239 return retval; 256 return retval;
240 pos = *ppos; 257 pos = *ppos;
241 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) 258 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) {
242 return retval; 259 retval = __negative_fpos_check(file, pos, count);
260 if (retval)
261 return retval;
262 }
243 263
244 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 264 if (unlikely(inode->i_flock && mandatory_lock(inode))) {
245 retval = locks_mandatory_area( 265 retval = locks_mandatory_area(
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index c1f93896cb53..41656d40dc5c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -22,8 +22,6 @@
22 22
23int reiserfs_commit_write(struct file *f, struct page *page, 23int reiserfs_commit_write(struct file *f, struct page *page,
24 unsigned from, unsigned to); 24 unsigned from, unsigned to);
25int reiserfs_prepare_write(struct file *f, struct page *page,
26 unsigned from, unsigned to);
27 25
28void reiserfs_evict_inode(struct inode *inode) 26void reiserfs_evict_inode(struct inode *inode)
29{ 27{
@@ -165,7 +163,7 @@ inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
165** but tail is still sitting in a direct item, and we can't write to 163** but tail is still sitting in a direct item, and we can't write to
166** it. So, look through this page, and check all the mapped buffers 164** it. So, look through this page, and check all the mapped buffers
167** to make sure they have valid block numbers. Any that don't need 165** to make sure they have valid block numbers. Any that don't need
168** to be unmapped, so that block_prepare_write will correctly call 166** to be unmapped, so that __block_write_begin will correctly call
169** reiserfs_get_block to convert the tail into an unformatted node 167** reiserfs_get_block to convert the tail into an unformatted node
170*/ 168*/
171static inline void fix_tail_page_for_writing(struct page *page) 169static inline void fix_tail_page_for_writing(struct page *page)
@@ -439,13 +437,13 @@ static int reiserfs_bmap(struct inode *inode, sector_t block,
439} 437}
440 438
441/* special version of get_block that is only used by grab_tail_page right 439/* special version of get_block that is only used by grab_tail_page right
442** now. It is sent to block_prepare_write, and when you try to get a 440** now. It is sent to __block_write_begin, and when you try to get a
443** block past the end of the file (or a block from a hole) it returns 441** block past the end of the file (or a block from a hole) it returns
444** -ENOENT instead of a valid buffer. block_prepare_write expects to 442** -ENOENT instead of a valid buffer. __block_write_begin expects to
445** be able to do i/o on the buffers returned, unless an error value 443** be able to do i/o on the buffers returned, unless an error value
446** is also returned. 444** is also returned.
447** 445**
448** So, this allows block_prepare_write to be used for reading a single block 446** So, this allows __block_write_begin to be used for reading a single block
449** in a page. Where it does not produce a valid page for holes, or past the 447** in a page. Where it does not produce a valid page for holes, or past the
450** end of the file. This turns out to be exactly what we need for reading 448** end of the file. This turns out to be exactly what we need for reading
451** tails for conversion. 449** tails for conversion.
@@ -558,11 +556,12 @@ static int convert_tail_for_hole(struct inode *inode,
558 ** 556 **
559 ** We must fix the tail page for writing because it might have buffers 557 ** We must fix the tail page for writing because it might have buffers
560 ** that are mapped, but have a block number of 0. This indicates tail 558 ** that are mapped, but have a block number of 0. This indicates tail
561 ** data that has been read directly into the page, and block_prepare_write 559 ** data that has been read directly into the page, and
562 ** won't trigger a get_block in this case. 560 ** __block_write_begin won't trigger a get_block in this case.
563 */ 561 */
564 fix_tail_page_for_writing(tail_page); 562 fix_tail_page_for_writing(tail_page);
565 retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end); 563 retval = __reiserfs_write_begin(tail_page, tail_start,
564 tail_end - tail_start);
566 if (retval) 565 if (retval)
567 goto unlock; 566 goto unlock;
568 567
@@ -2033,7 +2032,7 @@ static int grab_tail_page(struct inode *inode,
2033 /* start within the page of the last block in the file */ 2032 /* start within the page of the last block in the file */
2034 start = (offset / blocksize) * blocksize; 2033 start = (offset / blocksize) * blocksize;
2035 2034
2036 error = block_prepare_write(page, start, offset, 2035 error = __block_write_begin(page, start, offset - start,
2037 reiserfs_get_block_create_0); 2036 reiserfs_get_block_create_0);
2038 if (error) 2037 if (error)
2039 goto unlock; 2038 goto unlock;
@@ -2628,8 +2627,7 @@ static int reiserfs_write_begin(struct file *file,
2628 return ret; 2627 return ret;
2629} 2628}
2630 2629
2631int reiserfs_prepare_write(struct file *f, struct page *page, 2630int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len)
2632 unsigned from, unsigned to)
2633{ 2631{
2634 struct inode *inode = page->mapping->host; 2632 struct inode *inode = page->mapping->host;
2635 int ret; 2633 int ret;
@@ -2650,7 +2648,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
2650 th->t_refcount++; 2648 th->t_refcount++;
2651 } 2649 }
2652 2650
2653 ret = block_prepare_write(page, from, to, reiserfs_get_block); 2651 ret = __block_write_begin(page, from, len, reiserfs_get_block);
2654 if (ret && reiserfs_transaction_running(inode->i_sb)) { 2652 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2655 struct reiserfs_transaction_handle *th = current->journal_info; 2653 struct reiserfs_transaction_handle *th = current->journal_info;
2656 /* this gets a little ugly. If reiserfs_get_block returned an 2654 /* this gets a little ugly. If reiserfs_get_block returned an
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 5cbb81e134ac..adf22b485cea 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -160,8 +160,6 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
160 160
161int reiserfs_commit_write(struct file *f, struct page *page, 161int reiserfs_commit_write(struct file *f, struct page *page,
162 unsigned from, unsigned to); 162 unsigned from, unsigned to);
163int reiserfs_prepare_write(struct file *f, struct page *page,
164 unsigned from, unsigned to);
165/* 163/*
166** reiserfs_unpack 164** reiserfs_unpack
167** Function try to convert tail from direct item into indirect. 165** Function try to convert tail from direct item into indirect.
@@ -200,7 +198,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
200 } 198 }
201 199
202 /* we unpack by finding the page with the tail, and calling 200 /* we unpack by finding the page with the tail, and calling
203 ** reiserfs_prepare_write on that page. This will force a 201 ** __reiserfs_write_begin on that page. This will force a
204 ** reiserfs_get_block to unpack the tail for us. 202 ** reiserfs_get_block to unpack the tail for us.
205 */ 203 */
206 index = inode->i_size >> PAGE_CACHE_SHIFT; 204 index = inode->i_size >> PAGE_CACHE_SHIFT;
@@ -210,7 +208,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
210 if (!page) { 208 if (!page) {
211 goto out; 209 goto out;
212 } 210 }
213 retval = reiserfs_prepare_write(NULL, page, write_from, write_from); 211 retval = __reiserfs_write_begin(page, write_from, 0);
214 if (retval) 212 if (retval)
215 goto out_unlock; 213 goto out_unlock;
216 214
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ee78d4a0086a..ba5f51ec3458 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1156,7 +1156,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1156 inode->i_ctime = CURRENT_TIME_SEC; 1156 inode->i_ctime = CURRENT_TIME_SEC;
1157 reiserfs_update_sd(&th, inode); 1157 reiserfs_update_sd(&th, inode);
1158 1158
1159 atomic_inc(&inode->i_count); 1159 ihold(inode);
1160 d_instantiate(dentry, inode); 1160 d_instantiate(dentry, inode);
1161 retval = journal_end(&th, dir->i_sb, jbegin_count); 1161 retval = journal_end(&th, dir->i_sb, jbegin_count);
1162 reiserfs_write_unlock(dir->i_sb); 1162 reiserfs_write_unlock(dir->i_sb);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8c4cf273c672..5d04a7828e7a 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -418,13 +418,11 @@ static inline __u32 xattr_hash(const char *msg, int len)
418 418
419int reiserfs_commit_write(struct file *f, struct page *page, 419int reiserfs_commit_write(struct file *f, struct page *page,
420 unsigned from, unsigned to); 420 unsigned from, unsigned to);
421int reiserfs_prepare_write(struct file *f, struct page *page,
422 unsigned from, unsigned to);
423 421
424static void update_ctime(struct inode *inode) 422static void update_ctime(struct inode *inode)
425{ 423{
426 struct timespec now = current_fs_time(inode->i_sb); 424 struct timespec now = current_fs_time(inode->i_sb);
427 if (hlist_unhashed(&inode->i_hash) || !inode->i_nlink || 425 if (inode_unhashed(inode) || !inode->i_nlink ||
428 timespec_equal(&inode->i_ctime, &now)) 426 timespec_equal(&inode->i_ctime, &now))
429 return; 427 return;
430 428
@@ -532,8 +530,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
532 rxh->h_hash = cpu_to_le32(xahash); 530 rxh->h_hash = cpu_to_le32(xahash);
533 } 531 }
534 532
535 err = reiserfs_prepare_write(NULL, page, page_offset, 533 err = __reiserfs_write_begin(page, page_offset, chunk + skip);
536 page_offset + chunk + skip);
537 if (!err) { 534 if (!err) {
538 if (buffer) 535 if (buffer)
539 memcpy(data + skip, buffer + buffer_pos, chunk); 536 memcpy(data + skip, buffer + buffer_pos, chunk);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 0e7cb1395a94..05d6b0e78c95 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -462,9 +462,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
462 if (size) { 462 if (size) {
463 char *p; 463 char *p;
464 464
465 spin_lock(&dcache_lock);
466 p = __d_path(path, root, buf, size); 465 p = __d_path(path, root, buf, size);
467 spin_unlock(&dcache_lock);
468 res = PTR_ERR(p); 466 res = PTR_ERR(p);
469 if (!IS_ERR(p)) { 467 if (!IS_ERR(p)) {
470 char *end = mangle_path(buf, p, esc); 468 char *end = mangle_path(buf, p, esc);
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index 00a70cab1f36..f678d421e541 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -406,21 +406,15 @@ void
406smb_renew_times(struct dentry * dentry) 406smb_renew_times(struct dentry * dentry)
407{ 407{
408 dget(dentry); 408 dget(dentry);
409 spin_lock(&dentry->d_lock); 409 dentry->d_time = jiffies;
410 for (;;) {
411 struct dentry *parent;
412 410
413 dentry->d_time = jiffies; 411 while (!IS_ROOT(dentry)) {
414 if (IS_ROOT(dentry)) 412 struct dentry *parent = dget_parent(dentry);
415 break;
416 parent = dentry->d_parent;
417 dget(parent);
418 spin_unlock(&dentry->d_lock);
419 dput(dentry); 413 dput(dentry);
420 dentry = parent; 414 dentry = parent;
421 spin_lock(&dentry->d_lock); 415
416 dentry->d_time = jiffies;
422 } 417 }
423 spin_unlock(&dentry->d_lock);
424 dput(dentry); 418 dput(dentry);
425} 419}
426 420
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 8fc5e50e142f..f6e9ee59757e 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -229,7 +229,6 @@ smb_invalidate_inodes(struct smb_sb_info *server)
229{ 229{
230 VERBOSE("\n"); 230 VERBOSE("\n");
231 shrink_dcache_sb(SB_of(server)); 231 shrink_dcache_sb(SB_of(server));
232 invalidate_inodes(SB_of(server));
233} 232}
234 233
235/* 234/*
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index 71c29b6670b4..3dcf638d4d3a 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -332,16 +332,15 @@ static int smb_build_path(struct smb_sb_info *server, unsigned char *buf,
332 * and store it in reversed order [see reverse_string()] 332 * and store it in reversed order [see reverse_string()]
333 */ 333 */
334 dget(entry); 334 dget(entry);
335 spin_lock(&entry->d_lock);
336 while (!IS_ROOT(entry)) { 335 while (!IS_ROOT(entry)) {
337 struct dentry *parent; 336 struct dentry *parent;
338 337
339 if (maxlen < (3<<unicode)) { 338 if (maxlen < (3<<unicode)) {
340 spin_unlock(&entry->d_lock);
341 dput(entry); 339 dput(entry);
342 return -ENAMETOOLONG; 340 return -ENAMETOOLONG;
343 } 341 }
344 342
343 spin_lock(&entry->d_lock);
345 len = server->ops->convert(path, maxlen-2, 344 len = server->ops->convert(path, maxlen-2,
346 entry->d_name.name, entry->d_name.len, 345 entry->d_name.name, entry->d_name.len,
347 server->local_nls, server->remote_nls); 346 server->local_nls, server->remote_nls);
@@ -359,15 +358,12 @@ static int smb_build_path(struct smb_sb_info *server, unsigned char *buf,
359 } 358 }
360 *path++ = '\\'; 359 *path++ = '\\';
361 maxlen -= len+1; 360 maxlen -= len+1;
362
363 parent = entry->d_parent;
364 dget(parent);
365 spin_unlock(&entry->d_lock); 361 spin_unlock(&entry->d_lock);
362
363 parent = dget_parent(entry);
366 dput(entry); 364 dput(entry);
367 entry = parent; 365 entry = parent;
368 spin_lock(&entry->d_lock);
369 } 366 }
370 spin_unlock(&entry->d_lock);
371 dput(entry); 367 dput(entry);
372 reverse_string(buf, path-buf); 368 reverse_string(buf, path-buf);
373 369
diff --git a/fs/super.c b/fs/super.c
index 8819e3a7ff20..b9c9869165db 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -273,14 +273,14 @@ void generic_shutdown_super(struct super_block *sb)
273 get_fs_excl(); 273 get_fs_excl();
274 sb->s_flags &= ~MS_ACTIVE; 274 sb->s_flags &= ~MS_ACTIVE;
275 275
276 /* bad name - it should be evict_inodes() */ 276 fsnotify_unmount_inodes(&sb->s_inodes);
277 invalidate_inodes(sb); 277
278 evict_inodes(sb);
278 279
279 if (sop->put_super) 280 if (sop->put_super)
280 sop->put_super(sb); 281 sop->put_super(sb);
281 282
282 /* Forget any remaining inodes */ 283 if (!list_empty(&sb->s_inodes)) {
283 if (invalidate_inodes(sb)) {
284 printk("VFS: Busy inodes after unmount of %s. " 284 printk("VFS: Busy inodes after unmount of %s. "
285 "Self-destruct in 5 seconds. Have a nice day...\n", 285 "Self-destruct in 5 seconds. Have a nice day...\n",
286 sb->s_id); 286 sb->s_id);
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 33e047b59b8d..11e7f7d11cd0 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -126,7 +126,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
126 126
127 inode->i_ctime = CURRENT_TIME_SEC; 127 inode->i_ctime = CURRENT_TIME_SEC;
128 inode_inc_link_count(inode); 128 inode_inc_link_count(inode);
129 atomic_inc(&inode->i_count); 129 ihold(inode);
130 130
131 return add_nondir(dentry, inode); 131 return add_nondir(dentry, inode);
132} 132}
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 87ebcce72213..14f64b689d7f 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -550,7 +550,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
550 550
551 lock_2_inodes(dir, inode); 551 lock_2_inodes(dir, inode);
552 inc_nlink(inode); 552 inc_nlink(inode);
553 atomic_inc(&inode->i_count); 553 ihold(inode);
554 inode->i_ctime = ubifs_current_time(inode); 554 inode->i_ctime = ubifs_current_time(inode);
555 dir->i_size += sz_change; 555 dir->i_size += sz_change;
556 dir_ui->ui_size = dir->i_size; 556 dir_ui->ui_size = dir->i_size;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index bf5fc674193c..6d8dc02baebb 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1101,7 +1101,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1101 inc_nlink(inode); 1101 inc_nlink(inode);
1102 inode->i_ctime = current_fs_time(inode->i_sb); 1102 inode->i_ctime = current_fs_time(inode->i_sb);
1103 mark_inode_dirty(inode); 1103 mark_inode_dirty(inode);
1104 atomic_inc(&inode->i_count); 1104 ihold(inode);
1105 d_instantiate(dentry, inode); 1105 d_instantiate(dentry, inode);
1106 unlock_kernel(); 1106 unlock_kernel();
1107 1107
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index b056f02b1fb3..12f39b9e4437 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -180,7 +180,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
180 180
181 inode->i_ctime = CURRENT_TIME_SEC; 181 inode->i_ctime = CURRENT_TIME_SEC;
182 inode_inc_link_count(inode); 182 inode_inc_link_count(inode);
183 atomic_inc(&inode->i_count); 183 ihold(inode);
184 184
185 error = ufs_add_nondir(dentry, inode); 185 error = ufs_add_nondir(dentry, inode);
186 unlock_kernel(); 186 unlock_kernel();
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ba5312802aa9..63fd2c07cb57 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1580,6 +1580,7 @@ xfs_mapping_buftarg(
1580 XFS_BUFTARG_NAME(btp)); 1580 XFS_BUFTARG_NAME(btp));
1581 return ENOMEM; 1581 return ENOMEM;
1582 } 1582 }
1583 inode->i_ino = get_next_ino();
1583 inode->i_mode = S_IFBLK; 1584 inode->i_mode = S_IFBLK;
1584 inode->i_bdev = bdev; 1585 inode->i_bdev = bdev;
1585 inode->i_rdev = bdev->bd_dev; 1586 inode->i_rdev = bdev->bd_dev;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index ec858e09d546..96107efc0c61 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -317,7 +317,7 @@ xfs_vn_link(
317 if (unlikely(error)) 317 if (unlikely(error))
318 return -error; 318 return -error;
319 319
320 atomic_inc(&inode->i_count); 320 ihold(inode);
321 d_instantiate(dentry, inode); 321 d_instantiate(dentry, inode);
322 return 0; 322 return 0;
323} 323}
@@ -760,7 +760,9 @@ xfs_setup_inode(
760 760
761 inode->i_ino = ip->i_ino; 761 inode->i_ino = ip->i_ino;
762 inode->i_state = I_NEW; 762 inode->i_state = I_NEW;
763 inode_add_to_lists(ip->i_mount->m_super, inode); 763
764 inode_sb_list_add(inode);
765 insert_inode_hash(inode);
764 766
765 inode->i_mode = ip->i_d.di_mode; 767 inode->i_mode = ip->i_d.di_mode;
766 inode->i_nlink = ip->i_d.di_nlink; 768 inode->i_nlink = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index ab31ce5aeaf9..cf808782c065 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -576,7 +576,7 @@ xfs_max_file_offset(
576 576
577 /* Figure out maximum filesize, on Linux this can depend on 577 /* Figure out maximum filesize, on Linux this can depend on
578 * the filesystem blocksize (on 32 bit platforms). 578 * the filesystem blocksize (on 32 bit platforms).
579 * __block_prepare_write does this in an [unsigned] long... 579 * __block_write_begin does this in an [unsigned] long...
580 * page->index << (PAGE_CACHE_SHIFT - bbits) 580 * page->index << (PAGE_CACHE_SHIFT - bbits)
581 * So, for page sized blocks (4K on 32 bit platforms), 581 * So, for page sized blocks (4K on 32 bit platforms),
582 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is 582 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index fac52290de90..fb2ca2e4cdc9 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -500,7 +500,7 @@ void xfs_mark_inode_dirty_sync(xfs_inode_t *);
500#define IHOLD(ip) \ 500#define IHOLD(ip) \
501do { \ 501do { \
502 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ 502 ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
503 atomic_inc(&(VFS_I(ip)->i_count)); \ 503 ihold(VFS_I(ip)); \
504 trace_xfs_ihold(ip, _THIS_IP_); \ 504 trace_xfs_ihold(ip, _THIS_IP_); \
505} while (0) 505} while (0)
506 506
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index dd1b25b2641c..68d1fe7b877c 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -212,7 +212,6 @@ int generic_write_end(struct file *, struct address_space *,
212 loff_t, unsigned, unsigned, 212 loff_t, unsigned, unsigned,
213 struct page *, void *); 213 struct page *, void *);
214void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); 214void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
215int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
216int cont_write_begin(struct file *, struct address_space *, loff_t, 215int cont_write_begin(struct file *, struct address_space *, loff_t,
217 unsigned, unsigned, struct page **, void **, 216 unsigned, unsigned, struct page **, void **,
218 get_block_t *, loff_t *); 217 get_block_t *, loff_t *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4658777b41cc..240eb1d4f876 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -92,6 +92,9 @@ struct inodes_stat_t {
92/* Expect random access pattern */ 92/* Expect random access pattern */
93#define FMODE_RANDOM ((__force fmode_t)0x1000) 93#define FMODE_RANDOM ((__force fmode_t)0x1000)
94 94
95/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
96#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
97
95/* File was opened by fanotify and shouldn't generate fanotify events */ 98/* File was opened by fanotify and shouldn't generate fanotify events */
96#define FMODE_NONOTIFY ((__force fmode_t)0x1000000) 99#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
97 100
@@ -722,7 +725,8 @@ struct posix_acl;
722 725
723struct inode { 726struct inode {
724 struct hlist_node i_hash; 727 struct hlist_node i_hash;
725 struct list_head i_list; /* backing dev IO list */ 728 struct list_head i_wb_list; /* backing dev IO list */
729 struct list_head i_lru; /* inode LRU list */
726 struct list_head i_sb_list; 730 struct list_head i_sb_list;
727 struct list_head i_dentry; 731 struct list_head i_dentry;
728 unsigned long i_ino; 732 unsigned long i_ino;
@@ -789,6 +793,11 @@ struct inode {
789 void *i_private; /* fs or device private pointer */ 793 void *i_private; /* fs or device private pointer */
790}; 794};
791 795
796static inline int inode_unhashed(struct inode *inode)
797{
798 return hlist_unhashed(&inode->i_hash);
799}
800
792/* 801/*
793 * inode->i_mutex nesting subclasses for the lock validator: 802 * inode->i_mutex nesting subclasses for the lock validator:
794 * 803 *
@@ -1639,16 +1648,17 @@ struct super_operations {
1639 * 1648 *
1640 * Q: What is the difference between I_WILL_FREE and I_FREEING? 1649 * Q: What is the difference between I_WILL_FREE and I_FREEING?
1641 */ 1650 */
1642#define I_DIRTY_SYNC 1 1651#define I_DIRTY_SYNC (1 << 0)
1643#define I_DIRTY_DATASYNC 2 1652#define I_DIRTY_DATASYNC (1 << 1)
1644#define I_DIRTY_PAGES 4 1653#define I_DIRTY_PAGES (1 << 2)
1645#define __I_NEW 3 1654#define __I_NEW 3
1646#define I_NEW (1 << __I_NEW) 1655#define I_NEW (1 << __I_NEW)
1647#define I_WILL_FREE 16 1656#define I_WILL_FREE (1 << 4)
1648#define I_FREEING 32 1657#define I_FREEING (1 << 5)
1649#define I_CLEAR 64 1658#define I_CLEAR (1 << 6)
1650#define __I_SYNC 7 1659#define __I_SYNC 7
1651#define I_SYNC (1 << __I_SYNC) 1660#define I_SYNC (1 << __I_SYNC)
1661#define I_REFERENCED (1 << 8)
1652 1662
1653#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) 1663#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
1654 1664
@@ -1740,6 +1750,7 @@ static inline void file_accessed(struct file *file)
1740} 1750}
1741 1751
1742int sync_inode(struct inode *inode, struct writeback_control *wbc); 1752int sync_inode(struct inode *inode, struct writeback_control *wbc);
1753int sync_inode_metadata(struct inode *inode, int wait);
1743 1754
1744struct file_system_type { 1755struct file_system_type {
1745 const char *name; 1756 const char *name;
@@ -2084,7 +2095,6 @@ extern int check_disk_change(struct block_device *);
2084extern int __invalidate_device(struct block_device *); 2095extern int __invalidate_device(struct block_device *);
2085extern int invalidate_partition(struct gendisk *, int); 2096extern int invalidate_partition(struct gendisk *, int);
2086#endif 2097#endif
2087extern int invalidate_inodes(struct super_block *);
2088unsigned long invalidate_mapping_pages(struct address_space *mapping, 2098unsigned long invalidate_mapping_pages(struct address_space *mapping,
2089 pgoff_t start, pgoff_t end); 2099 pgoff_t start, pgoff_t end);
2090 2100
@@ -2168,7 +2178,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
2168 2178
2169extern int inode_init_always(struct super_block *, struct inode *); 2179extern int inode_init_always(struct super_block *, struct inode *);
2170extern void inode_init_once(struct inode *); 2180extern void inode_init_once(struct inode *);
2171extern void inode_add_to_lists(struct super_block *, struct inode *); 2181extern void ihold(struct inode * inode);
2172extern void iput(struct inode *); 2182extern void iput(struct inode *);
2173extern struct inode * igrab(struct inode *); 2183extern struct inode * igrab(struct inode *);
2174extern ino_t iunique(struct super_block *, ino_t); 2184extern ino_t iunique(struct super_block *, ino_t);
@@ -2188,11 +2198,11 @@ extern struct inode * iget_locked(struct super_block *, unsigned long);
2188extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); 2198extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
2189extern int insert_inode_locked(struct inode *); 2199extern int insert_inode_locked(struct inode *);
2190extern void unlock_new_inode(struct inode *); 2200extern void unlock_new_inode(struct inode *);
2201extern unsigned int get_next_ino(void);
2191 2202
2192extern void __iget(struct inode * inode); 2203extern void __iget(struct inode * inode);
2193extern void iget_failed(struct inode *); 2204extern void iget_failed(struct inode *);
2194extern void end_writeback(struct inode *); 2205extern void end_writeback(struct inode *);
2195extern void destroy_inode(struct inode *);
2196extern void __destroy_inode(struct inode *); 2206extern void __destroy_inode(struct inode *);
2197extern struct inode *new_inode(struct super_block *); 2207extern struct inode *new_inode(struct super_block *);
2198extern int should_remove_suid(struct dentry *); 2208extern int should_remove_suid(struct dentry *);
@@ -2200,9 +2210,11 @@ extern int file_remove_suid(struct file *);
2200 2210
2201extern void __insert_inode_hash(struct inode *, unsigned long hashval); 2211extern void __insert_inode_hash(struct inode *, unsigned long hashval);
2202extern void remove_inode_hash(struct inode *); 2212extern void remove_inode_hash(struct inode *);
2203static inline void insert_inode_hash(struct inode *inode) { 2213static inline void insert_inode_hash(struct inode *inode)
2214{
2204 __insert_inode_hash(inode, inode->i_ino); 2215 __insert_inode_hash(inode, inode->i_ino);
2205} 2216}
2217extern void inode_sb_list_add(struct inode *inode);
2206 2218
2207#ifdef CONFIG_BLOCK 2219#ifdef CONFIG_BLOCK
2208extern void submit_bio(int, struct bio *); 2220extern void submit_bio(int, struct bio *);
@@ -2485,7 +2497,10 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
2485struct ctl_table; 2497struct ctl_table;
2486int proc_nr_files(struct ctl_table *table, int write, 2498int proc_nr_files(struct ctl_table *table, int write,
2487 void __user *buffer, size_t *lenp, loff_t *ppos); 2499 void __user *buffer, size_t *lenp, loff_t *ppos);
2488 2500int proc_nr_dentry(struct ctl_table *table, int write,
2501 void __user *buffer, size_t *lenp, loff_t *ppos);
2502int proc_nr_inodes(struct ctl_table *table, int write,
2503 void __user *buffer, size_t *lenp, loff_t *ppos);
2489int __init get_filesystem_list(char *buf); 2504int __init get_filesystem_list(char *buf);
2490 2505
2491#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) 2506#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
diff --git a/include/linux/list.h b/include/linux/list.h
index 88a000617d77..9a5f8a71810c 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -636,6 +636,12 @@ static inline void hlist_add_after(struct hlist_node *n,
636 next->next->pprev = &next->next; 636 next->next->pprev = &next->next;
637} 637}
638 638
639/* after that we'll appear to be on some hlist and hlist_del will work */
640static inline void hlist_add_fake(struct hlist_node *n)
641{
642 n->pprev = &n->next;
643}
644
639/* 645/*
640 * Move a list from one list head to another. Fixup the pprev 646 * Move a list from one list head to another. Fixup the pprev
641 * reference of the first entry if it exists. 647 * reference of the first entry if it exists.
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 91a4177e60ce..5ca47e59b727 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -2072,6 +2072,8 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
2072void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs); 2072void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
2073int reiserfs_setattr(struct dentry *dentry, struct iattr *attr); 2073int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
2074 2074
2075int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
2076
2075/* namei.c */ 2077/* namei.c */
2076void set_de_name_and_namelen(struct reiserfs_dir_entry *de); 2078void set_de_name_and_namelen(struct reiserfs_dir_entry *de);
2077int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, 2079int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index c7299d2ace6b..d5c7aaadda59 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -10,8 +10,6 @@
10struct backing_dev_info; 10struct backing_dev_info;
11 11
12extern spinlock_t inode_lock; 12extern spinlock_t inode_lock;
13extern struct list_head inode_in_use;
14extern struct list_head inode_unused;
15 13
16/* 14/*
17 * fs/fs-writeback.c 15 * fs/fs-writeback.c
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index e1e7b9635f5d..3a61ffefe884 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -116,6 +116,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
116 116
117 inode = new_inode(sb); 117 inode = new_inode(sb);
118 if (inode) { 118 if (inode) {
119 inode->i_ino = get_next_ino();
119 inode->i_mode = mode; 120 inode->i_mode = mode;
120 inode->i_uid = current_fsuid(); 121 inode->i_uid = current_fsuid();
121 inode->i_gid = current_fsgid(); 122 inode->i_gid = current_fsgid();
@@ -769,7 +770,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
769 770
770 inode = dentry->d_inode; 771 inode = dentry->d_inode;
771 if (inode) 772 if (inode)
772 atomic_inc(&inode->i_count); 773 ihold(inode);
773 err = mnt_want_write(ipc_ns->mq_mnt); 774 err = mnt_want_write(ipc_ns->mq_mnt);
774 if (err) 775 if (err)
775 goto out_err; 776 goto out_err;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7b69b8d0313d..9270d532ec3c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -777,6 +777,7 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
777 struct inode *inode = new_inode(sb); 777 struct inode *inode = new_inode(sb);
778 778
779 if (inode) { 779 if (inode) {
780 inode->i_ino = get_next_ino();
780 inode->i_mode = mode; 781 inode->i_mode = mode;
781 inode->i_uid = current_fsuid(); 782 inode->i_uid = current_fsuid();
782 inode->i_gid = current_fsgid(); 783 inode->i_gid = current_fsgid();
diff --git a/kernel/futex.c b/kernel/futex.c
index a118bf160e0b..6c683b37f2ce 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -169,7 +169,7 @@ static void get_futex_key_refs(union futex_key *key)
169 169
170 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { 170 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
171 case FUT_OFF_INODE: 171 case FUT_OFF_INODE:
172 atomic_inc(&key->shared.inode->i_count); 172 ihold(key->shared.inode);
173 break; 173 break;
174 case FUT_OFF_MMSHARED: 174 case FUT_OFF_MMSHARED:
175 atomic_inc(&key->private.mm->mm_count); 175 atomic_inc(&key->private.mm->mm_count);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 48d9d689498f..c33a1edb799f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1338,14 +1338,14 @@ static struct ctl_table fs_table[] = {
1338 .data = &inodes_stat, 1338 .data = &inodes_stat,
1339 .maxlen = 2*sizeof(int), 1339 .maxlen = 2*sizeof(int),
1340 .mode = 0444, 1340 .mode = 0444,
1341 .proc_handler = proc_dointvec, 1341 .proc_handler = proc_nr_inodes,
1342 }, 1342 },
1343 { 1343 {
1344 .procname = "inode-state", 1344 .procname = "inode-state",
1345 .data = &inodes_stat, 1345 .data = &inodes_stat,
1346 .maxlen = 7*sizeof(int), 1346 .maxlen = 7*sizeof(int),
1347 .mode = 0444, 1347 .mode = 0444,
1348 .proc_handler = proc_dointvec, 1348 .proc_handler = proc_nr_inodes,
1349 }, 1349 },
1350 { 1350 {
1351 .procname = "file-nr", 1351 .procname = "file-nr",
@@ -1375,7 +1375,7 @@ static struct ctl_table fs_table[] = {
1375 .data = &dentry_stat, 1375 .data = &dentry_stat,
1376 .maxlen = 6*sizeof(int), 1376 .maxlen = 6*sizeof(int),
1377 .mode = 0444, 1377 .mode = 0444,
1378 .proc_handler = proc_dointvec, 1378 .proc_handler = proc_nr_dentry,
1379 }, 1379 },
1380 { 1380 {
1381 .procname = "overflowuid", 1381 .procname = "overflowuid",
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f2eb27884ffa..027100d30227 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -74,11 +74,11 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
74 74
75 nr_wb = nr_dirty = nr_io = nr_more_io = 0; 75 nr_wb = nr_dirty = nr_io = nr_more_io = 0;
76 spin_lock(&inode_lock); 76 spin_lock(&inode_lock);
77 list_for_each_entry(inode, &wb->b_dirty, i_list) 77 list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
78 nr_dirty++; 78 nr_dirty++;
79 list_for_each_entry(inode, &wb->b_io, i_list) 79 list_for_each_entry(inode, &wb->b_io, i_wb_list)
80 nr_io++; 80 nr_io++;
81 list_for_each_entry(inode, &wb->b_more_io, i_list) 81 list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
82 nr_more_io++; 82 nr_more_io++;
83 spin_unlock(&inode_lock); 83 spin_unlock(&inode_lock);
84 84
diff --git a/mm/shmem.c b/mm/shmem.c
index 080b09a57a8f..f6d350e8adc5 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1586,6 +1586,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
1586 1586
1587 inode = new_inode(sb); 1587 inode = new_inode(sb);
1588 if (inode) { 1588 if (inode) {
1589 inode->i_ino = get_next_ino();
1589 inode_init_owner(inode, dir, mode); 1590 inode_init_owner(inode, dir, mode);
1590 inode->i_blocks = 0; 1591 inode->i_blocks = 0;
1591 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; 1592 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
@@ -1903,7 +1904,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
1903 dir->i_size += BOGO_DIRENT_SIZE; 1904 dir->i_size += BOGO_DIRENT_SIZE;
1904 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; 1905 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1905 inc_nlink(inode); 1906 inc_nlink(inode);
1906 atomic_inc(&inode->i_count); /* New dentry reference */ 1907 ihold(inode); /* New dentry reference */
1907 dget(dentry); /* Extra pinning count for the created dentry */ 1908 dget(dentry); /* Extra pinning count for the created dentry */
1908 d_instantiate(dentry, inode); 1909 d_instantiate(dentry, inode);
1909out: 1910out:
@@ -2146,7 +2147,7 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
2146 if (*len < 3) 2147 if (*len < 3)
2147 return 255; 2148 return 255;
2148 2149
2149 if (hlist_unhashed(&inode->i_hash)) { 2150 if (inode_unhashed(inode)) {
2150 /* Unfortunately insert_inode_hash is not idempotent, 2151 /* Unfortunately insert_inode_hash is not idempotent,
2151 * so as we hash inodes here rather than at creation 2152 * so as we hash inodes here rather than at creation
2152 * time, we need a lock to ensure we only try 2153 * time, we need a lock to ensure we only try
@@ -2154,7 +2155,7 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
2154 */ 2155 */
2155 static DEFINE_SPINLOCK(lock); 2156 static DEFINE_SPINLOCK(lock);
2156 spin_lock(&lock); 2157 spin_lock(&lock);
2157 if (hlist_unhashed(&inode->i_hash)) 2158 if (inode_unhashed(inode))
2158 __insert_inode_hash(inode, 2159 __insert_inode_hash(inode,
2159 inode->i_ino + inode->i_generation); 2160 inode->i_ino + inode->i_generation);
2160 spin_unlock(&lock); 2161 spin_unlock(&lock);
diff --git a/net/socket.c b/net/socket.c
index 7f67c072d496..ee3cd280c76e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -377,7 +377,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
377 &socket_file_ops); 377 &socket_file_ops);
378 if (unlikely(!file)) { 378 if (unlikely(!file)) {
379 /* drop dentry, keep inode */ 379 /* drop dentry, keep inode */
380 atomic_inc(&path.dentry->d_inode->i_count); 380 ihold(path.dentry->d_inode);
381 path_put(&path); 381 path_put(&path);
382 put_unused_fd(fd); 382 put_unused_fd(fd);
383 return -ENFILE; 383 return -ENFILE;
@@ -480,6 +480,7 @@ static struct socket *sock_alloc(void)
480 sock = SOCKET_I(inode); 480 sock = SOCKET_I(inode);
481 481
482 kmemcheck_annotate_bitfield(sock, type); 482 kmemcheck_annotate_bitfield(sock, type);
483 inode->i_ino = get_next_ino();
483 inode->i_mode = S_IFSOCK | S_IRWXUGO; 484 inode->i_mode = S_IFSOCK | S_IRWXUGO;
484 inode->i_uid = current_fsuid(); 485 inode->i_uid = current_fsuid();
485 inode->i_gid = current_fsgid(); 486 inode->i_gid = current_fsgid();
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 52f252432144..7df92d237cb8 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -445,6 +445,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
445 struct inode *inode = new_inode(sb); 445 struct inode *inode = new_inode(sb);
446 if (!inode) 446 if (!inode)
447 return NULL; 447 return NULL;
448 inode->i_ino = get_next_ino();
448 inode->i_mode = mode; 449 inode->i_mode = mode;
449 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 450 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
450 switch(mode & S_IFMT) { 451 switch(mode & S_IFMT) {
diff --git a/security/apparmor/path.c b/security/apparmor/path.c
index 82396050f186..36cc0cc39e78 100644
--- a/security/apparmor/path.c
+++ b/security/apparmor/path.c
@@ -72,10 +72,8 @@ static int d_namespace_path(struct path *path, char *buf, int buflen,
72 path_get(&root); 72 path_get(&root);
73 } 73 }
74 74
75 spin_lock(&dcache_lock);
76 tmp = root; 75 tmp = root;
77 res = __d_path(path, &tmp, buf, buflen); 76 res = __d_path(path, &tmp, buf, buflen);
78 spin_unlock(&dcache_lock);
79 77
80 *name = res; 78 *name = res;
81 /* handle error conditions - and still allow a partial path to 79 /* handle error conditions - and still allow a partial path to
diff --git a/security/inode.c b/security/inode.c
index 88839866cbcd..cb8f47c66a58 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -61,6 +61,7 @@ static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev)
61 struct inode *inode = new_inode(sb); 61 struct inode *inode = new_inode(sb);
62 62
63 if (inode) { 63 if (inode) {
64 inode->i_ino = get_next_ino();
64 inode->i_mode = mode; 65 inode->i_mode = mode;
65 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 66 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
66 switch (mode & S_IFMT) { 67 switch (mode & S_IFMT) {
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 87e0556bae70..55a755c1a1bd 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -978,6 +978,7 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode)
978 struct inode *ret = new_inode(sb); 978 struct inode *ret = new_inode(sb);
979 979
980 if (ret) { 980 if (ret) {
981 ret->i_ino = get_next_ino();
981 ret->i_mode = mode; 982 ret->i_mode = mode;
982 ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; 983 ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
983 } 984 }
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index ed8ccd680102..1d0bf8fa1922 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -127,10 +127,8 @@ char *tomoyo_realpath_from_path(struct path *path)
127 /* If we don't have a vfsmount, we can't calculate. */ 127 /* If we don't have a vfsmount, we can't calculate. */
128 if (!path->mnt) 128 if (!path->mnt)
129 break; 129 break;
130 spin_lock(&dcache_lock);
131 /* go to whatever namespace root we are under */ 130 /* go to whatever namespace root we are under */
132 pos = __d_path(path, &ns_root, buf, buf_len); 131 pos = __d_path(path, &ns_root, buf, buf_len);
133 spin_unlock(&dcache_lock);
134 /* Prepend "/proc" prefix if using internal proc vfs mount. */ 132 /* Prepend "/proc" prefix if using internal proc vfs mount. */
135 if (!IS_ERR(pos) && (path->mnt->mnt_flags & MNT_INTERNAL) && 133 if (!IS_ERR(pos) && (path->mnt->mnt_flags & MNT_INTERNAL) &&
136 (path->mnt->mnt_sb->s_magic == PROC_SUPER_MAGIC)) { 134 (path->mnt->mnt_sb->s_magic == PROC_SUPER_MAGIC)) {