aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2010-08-11 03:36:51 -0400
committerJiri Kosina <jkosina@suse.cz>2010-08-11 03:36:51 -0400
commit6396fc3b3ff3f6b942992b653a62df11dcef9bea (patch)
treedb3c7cbe833b43c653adc99f70941431c5ff7c4e /fs
parent4785879e4d340e24e54f6de2ccfc42728b912808 (diff)
parent3d30701b58970425e1d45994d6cb82f828924fdd (diff)
Merge branch 'master' into for-next
Conflicts: fs/exofs/inode.c
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs_vfs.h2
-rw-r--r--fs/9p/vfs_inode.c19
-rw-r--r--fs/9p/vfs_super.c4
-rw-r--r--fs/adfs/inode.c16
-rw-r--r--fs/affs/affs.h3
-rw-r--r--fs/affs/file.c11
-rw-r--r--fs/affs/inode.c38
-rw-r--r--fs/affs/super.c32
-rw-r--r--fs/afs/inode.c5
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/super.c2
-rw-r--r--fs/attr.c88
-rw-r--r--fs/autofs/root.c67
-rw-r--r--fs/autofs4/root.c49
-rw-r--r--fs/bfs/bfs.h1
-rw-r--r--fs/bfs/file.c17
-rw-r--r--fs/bfs/inode.c116
-rw-r--r--fs/binfmt_misc.c5
-rw-r--r--fs/bio.c5
-rw-r--r--fs/block_dev.c27
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/disk-io.c8
-rw-r--r--fs/btrfs/inode.c40
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/btrfs/volumes.c18
-rw-r--r--fs/buffer.c180
-rw-r--r--fs/cachefiles/bind.c2
-rw-r--r--fs/cachefiles/daemon.c6
-rw-r--r--fs/cifs/cifsfs.c15
-rw-r--r--fs/cifs/inode.c86
-rw-r--r--fs/coda/inode.c8
-rw-r--r--fs/coda/psdev.c12
-rw-r--r--fs/coda/upcall.c12
-rw-r--r--fs/compat.c15
-rw-r--r--fs/compat_ioctl.c37
-rw-r--r--fs/cramfs/inode.c88
-rw-r--r--fs/dcache.c39
-rw-r--r--fs/direct-io.c74
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/ecryptfs/file.c60
-rw-r--r--fs/ecryptfs/inode.c112
-rw-r--r--fs/ecryptfs/messaging.c2
-rw-r--r--fs/ecryptfs/super.c14
-rw-r--r--fs/exec.c4
-rw-r--r--fs/exofs/exofs.h3
-rw-r--r--fs/exofs/file.c1
-rw-r--r--fs/exofs/inode.c127
-rw-r--r--fs/exofs/ios.c2
-rw-r--r--fs/exofs/super.c2
-rw-r--r--fs/ext2/balloc.c11
-rw-r--r--fs/ext2/dir.c23
-rw-r--r--fs/ext2/ext2.h5
-rw-r--r--fs/ext2/ialloc.c13
-rw-r--r--fs/ext2/inode.c87
-rw-r--r--fs/ext2/super.c14
-rw-r--r--fs/ext2/xattr.c25
-rw-r--r--fs/ext3/ialloc.c12
-rw-r--r--fs/ext3/inode.c63
-rw-r--r--fs/ext3/super.c14
-rw-r--r--fs/ext3/xattr.c12
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c53
-rw-r--r--fs/ext4/super.c10
-rw-r--r--fs/ext4/xattr.c12
-rw-r--r--fs/fat/fat.h1
-rw-r--r--fs/fat/file.c49
-rw-r--r--fs/fat/inode.c26
-rw-r--r--fs/file_table.c9
-rw-r--r--fs/freevxfs/vxfs_extern.h2
-rw-r--r--fs/freevxfs/vxfs_inode.c8
-rw-r--r--fs/freevxfs/vxfs_super.c2
-rw-r--r--fs/fs-writeback.c169
-rw-r--r--fs/fuse/dir.c17
-rw-r--r--fs/fuse/inode.c6
-rw-r--r--fs/gfs2/aops.c10
-rw-r--r--fs/gfs2/inode.c27
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/meta_io.c8
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/ops_inode.c18
-rw-r--r--fs/gfs2/super.c43
-rw-r--r--fs/gfs2/xattr.c24
-rw-r--r--fs/hfs/hfs_fs.h2
-rw-r--r--fs/hfs/inode.c70
-rw-r--r--fs/hfs/super.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h1
-rw-r--r--fs/hfsplus/inode.c77
-rw-r--r--fs/hfsplus/super.c10
-rw-r--r--fs/hostfs/hostfs.h22
-rw-r--r--fs/hostfs/hostfs_kern.c517
-rw-r--r--fs/hostfs/hostfs_user.c112
-rw-r--r--fs/hpfs/file.c11
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/inode.c24
-rw-r--r--fs/hpfs/super.c2
-rw-r--r--fs/hppfs/hppfs.c8
-rw-r--r--fs/hugetlbfs/inode.c41
-rw-r--r--fs/inode.c185
-rw-r--r--fs/jffs2/background.c1
-rw-r--r--fs/jffs2/build.c1
-rw-r--r--fs/jffs2/compr.c5
-rw-r--r--fs/jffs2/compr.h1
-rw-r--r--fs/jffs2/compr_lzo.c1
-rw-r--r--fs/jffs2/compr_rtime.c1
-rw-r--r--fs/jffs2/compr_rubin.c1
-rw-r--r--fs/jffs2/compr_zlib.c1
-rw-r--r--fs/jffs2/debug.c1
-rw-r--r--fs/jffs2/debug.h1
-rw-r--r--fs/jffs2/dir.c17
-rw-r--r--fs/jffs2/erase.c1
-rw-r--r--fs/jffs2/file.c1
-rw-r--r--fs/jffs2/fs.c11
-rw-r--r--fs/jffs2/gc.c1
-rw-r--r--fs/jffs2/ioctl.c1
-rw-r--r--fs/jffs2/jffs2_fs_i.h1
-rw-r--r--fs/jffs2/jffs2_fs_sb.h1
-rw-r--r--fs/jffs2/nodelist.h1
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/jffs2/xattr.c2
-rw-r--r--fs/jfs/file.c14
-rw-r--r--fs/jfs/inode.c63
-rw-r--r--fs/jfs/jfs_inode.h2
-rw-r--r--fs/jfs/super.c8
-rw-r--r--fs/jfs/xattr.c87
-rw-r--r--fs/libfs.c70
-rw-r--r--fs/logfs/dir.c5
-rw-r--r--fs/logfs/file.c18
-rw-r--r--fs/logfs/inode.c51
-rw-r--r--fs/logfs/journal.c2
-rw-r--r--fs/logfs/logfs.h4
-rw-r--r--fs/logfs/readwrite.c62
-rw-r--r--fs/logfs/segment.c1
-rw-r--r--fs/logfs/super.c23
-rw-r--r--fs/mbcache.c168
-rw-r--r--fs/minix/bitmap.c6
-rw-r--r--fs/minix/dir.c21
-rw-r--r--fs/minix/file.c22
-rw-r--r--fs/minix/inode.c35
-rw-r--r--fs/minix/minix.h4
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/ncpfs/inode.c36
-rw-r--r--fs/ncpfs/ioctl.c1
-rw-r--r--fs/nfs/inode.c13
-rw-r--r--fs/nfs/internal.h4
-rw-r--r--fs/nfs/super.c4
-rw-r--r--fs/nfsd/nfs4xdr.c6
-rw-r--r--fs/nfsd/vfs.c14
-rw-r--r--fs/nilfs2/dir.c25
-rw-r--r--fs/nilfs2/gcdat.c2
-rw-r--r--fs/nilfs2/inode.c78
-rw-r--r--fs/nilfs2/nilfs.h2
-rw-r--r--fs/nilfs2/recovery.c11
-rw-r--r--fs/nilfs2/segbuf.c2
-rw-r--r--fs/nilfs2/super.c20
-rw-r--r--fs/notify/Kconfig1
-rw-r--r--fs/notify/Makefile4
-rw-r--r--fs/notify/dnotify/dnotify.c213
-rw-r--r--fs/notify/fanotify/Kconfig26
-rw-r--r--fs/notify/fanotify/Makefile1
-rw-r--r--fs/notify/fanotify/fanotify.c212
-rw-r--r--fs/notify/fanotify/fanotify_user.c760
-rw-r--r--fs/notify/fsnotify.c201
-rw-r--r--fs/notify/fsnotify.h27
-rw-r--r--fs/notify/group.c182
-rw-r--r--fs/notify/inode_mark.c337
-rw-r--r--fs/notify/inotify/Kconfig15
-rw-r--r--fs/notify/inotify/Makefile1
-rw-r--r--fs/notify/inotify/inotify.c873
-rw-r--r--fs/notify/inotify/inotify.h7
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c151
-rw-r--r--fs/notify/inotify/inotify_user.c369
-rw-r--r--fs/notify/mark.c371
-rw-r--r--fs/notify/notification.c236
-rw-r--r--fs/notify/vfsmount_mark.c187
-rw-r--r--fs/ntfs/inode.c10
-rw-r--r--fs/ntfs/inode.h2
-rw-r--r--fs/ntfs/super.c2
-rw-r--r--fs/ocfs2/aops.c9
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c15
-rw-r--r--fs/ocfs2/file.c22
-rw-r--r--fs/ocfs2/inode.c29
-rw-r--r--fs/ocfs2/inode.h5
-rw-r--r--fs/ocfs2/super.c3
-rw-r--r--fs/omfs/dir.c22
-rw-r--r--fs/omfs/file.c46
-rw-r--r--fs/omfs/inode.c53
-rw-r--r--fs/omfs/omfs.h1
-rw-r--r--fs/omfs/omfs_fs.h1
-rw-r--r--fs/open.c3
-rw-r--r--fs/proc/base.c16
-rw-r--r--fs/proc/generic.c18
-rw-r--r--fs/proc/inode.c6
-rw-r--r--fs/proc/proc_sysctl.c15
-rw-r--r--fs/qnx4/inode.c11
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/ramfs/file-nommu.c7
-rw-r--r--fs/read_write.c8
-rw-r--r--fs/reiserfs/file.c50
-rw-r--r--fs/reiserfs/inode.c134
-rw-r--r--fs/reiserfs/super.c10
-rw-r--r--fs/smbfs/inode.c12
-rw-r--r--fs/splice.c14
-rw-r--r--fs/statfs.c95
-rw-r--r--fs/super.c51
-rw-r--r--fs/sync.c25
-rw-r--r--fs/sysfs/inode.c8
-rw-r--r--fs/sysfs/mount.c2
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/sysv/dir.c21
-rw-r--r--fs/sysv/file.c22
-rw-r--r--fs/sysv/ialloc.c1
-rw-r--r--fs/sysv/inode.c19
-rw-r--r--fs/sysv/itree.c19
-rw-r--r--fs/sysv/super.c1
-rw-r--r--fs/sysv/sysv.h4
-rw-r--r--fs/ubifs/file.c23
-rw-r--r--fs/ubifs/super.c12
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/udf/file.c22
-rw-r--r--fs/udf/ialloc.c2
-rw-r--r--fs/udf/inode.c61
-rw-r--r--fs/udf/super.c3
-rw-r--r--fs/udf/udfdecl.h3
-rw-r--r--fs/ufs/dir.c13
-rw-r--r--fs/ufs/ialloc.c2
-rw-r--r--fs/ufs/inode.c63
-rw-r--r--fs/ufs/super.c2
-rw-r--r--fs/ufs/truncate.c16
-rw-r--r--fs/ufs/ufs.h2
-rw-r--r--fs/ufs/util.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c62
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c20
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h2
-rw-r--r--fs/xfs/xfs_vnodeops.c38
239 files changed, 5297 insertions, 4337 deletions
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index f47c6bbb01b3..88418c419ea7 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -52,7 +52,7 @@ void v9fs_destroy_inode(struct inode *inode);
52#endif 52#endif
53 53
54struct inode *v9fs_get_inode(struct super_block *sb, int mode); 54struct inode *v9fs_get_inode(struct super_block *sb, int mode);
55void v9fs_clear_inode(struct inode *inode); 55void v9fs_evict_inode(struct inode *inode);
56ino_t v9fs_qid2ino(struct p9_qid *qid); 56ino_t v9fs_qid2ino(struct p9_qid *qid);
57void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); 57void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
58void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *); 58void v9fs_stat2inode_dotl(struct p9_stat_dotl *, struct inode *);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 6e94f3247cec..d97c34a24f7a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -430,8 +430,10 @@ error:
430 * @inode: inode to release 430 * @inode: inode to release
431 * 431 *
432 */ 432 */
433void v9fs_clear_inode(struct inode *inode) 433void v9fs_evict_inode(struct inode *inode)
434{ 434{
435 truncate_inode_pages(inode->i_mapping, 0);
436 end_writeback(inode);
435 filemap_fdatawrite(inode->i_mapping); 437 filemap_fdatawrite(inode->i_mapping);
436 438
437#ifdef CONFIG_9P_FSCACHE 439#ifdef CONFIG_9P_FSCACHE
@@ -1209,10 +1211,19 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
1209 } 1211 }
1210 1212
1211 retval = p9_client_wstat(fid, &wstat); 1213 retval = p9_client_wstat(fid, &wstat);
1212 if (retval >= 0) 1214 if (retval < 0)
1213 retval = inode_setattr(dentry->d_inode, iattr); 1215 return retval;
1214 1216
1215 return retval; 1217 if ((iattr->ia_valid & ATTR_SIZE) &&
1218 iattr->ia_size != i_size_read(dentry->d_inode)) {
1219 retval = vmtruncate(dentry->d_inode, iattr->ia_size);
1220 if (retval)
1221 return retval;
1222 }
1223
1224 setattr_copy(dentry->d_inode, iattr);
1225 mark_inode_dirty(dentry->d_inode);
1226 return 0;
1216} 1227}
1217 1228
1218/** 1229/**
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 4b9ede0b41b7..f9311077de68 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -266,7 +266,7 @@ static const struct super_operations v9fs_super_ops = {
266 .destroy_inode = v9fs_destroy_inode, 266 .destroy_inode = v9fs_destroy_inode,
267#endif 267#endif
268 .statfs = simple_statfs, 268 .statfs = simple_statfs,
269 .clear_inode = v9fs_clear_inode, 269 .evict_inode = v9fs_evict_inode,
270 .show_options = generic_show_options, 270 .show_options = generic_show_options,
271 .umount_begin = v9fs_umount_begin, 271 .umount_begin = v9fs_umount_begin,
272}; 272};
@@ -277,7 +277,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
277 .destroy_inode = v9fs_destroy_inode, 277 .destroy_inode = v9fs_destroy_inode,
278#endif 278#endif
279 .statfs = v9fs_statfs, 279 .statfs = v9fs_statfs,
280 .clear_inode = v9fs_clear_inode, 280 .evict_inode = v9fs_evict_inode,
281 .show_options = generic_show_options, 281 .show_options = generic_show_options,
282 .umount_begin = v9fs_umount_begin, 282 .umount_begin = v9fs_umount_begin,
283}; 283};
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 6f850b06ab62..65794b8fe79e 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -50,10 +50,19 @@ static int adfs_write_begin(struct file *file, struct address_space *mapping,
50 loff_t pos, unsigned len, unsigned flags, 50 loff_t pos, unsigned len, unsigned flags,
51 struct page **pagep, void **fsdata) 51 struct page **pagep, void **fsdata)
52{ 52{
53 int ret;
54
53 *pagep = NULL; 55 *pagep = NULL;
54 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 56 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
55 adfs_get_block, 57 adfs_get_block,
56 &ADFS_I(mapping->host)->mmu_private); 58 &ADFS_I(mapping->host)->mmu_private);
59 if (unlikely(ret)) {
60 loff_t isize = mapping->host->i_size;
61 if (pos + len > isize)
62 vmtruncate(mapping->host, isize);
63 }
64
65 return ret;
57} 66}
58 67
59static sector_t _adfs_bmap(struct address_space *mapping, sector_t block) 68static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
@@ -324,10 +333,7 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr)
324 333
325 /* XXX: this is missing some actual on-disk truncation.. */ 334 /* XXX: this is missing some actual on-disk truncation.. */
326 if (ia_valid & ATTR_SIZE) 335 if (ia_valid & ATTR_SIZE)
327 error = simple_setsize(inode, attr->ia_size); 336 truncate_setsize(inode, attr->ia_size);
328
329 if (error)
330 goto out;
331 337
332 if (ia_valid & ATTR_MTIME) { 338 if (ia_valid & ATTR_MTIME) {
333 inode->i_mtime = attr->ia_mtime; 339 inode->i_mtime = attr->ia_mtime;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index f05b6155ccc8..a8cbdeb34025 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -171,8 +171,7 @@ extern int affs_rename(struct inode *old_dir, struct dentry *old_dentry,
171extern unsigned long affs_parent_ino(struct inode *dir); 171extern unsigned long affs_parent_ino(struct inode *dir);
172extern struct inode *affs_new_inode(struct inode *dir); 172extern struct inode *affs_new_inode(struct inode *dir);
173extern int affs_notify_change(struct dentry *dentry, struct iattr *attr); 173extern int affs_notify_change(struct dentry *dentry, struct iattr *attr);
174extern void affs_delete_inode(struct inode *inode); 174extern void affs_evict_inode(struct inode *inode);
175extern void affs_clear_inode(struct inode *inode);
176extern struct inode *affs_iget(struct super_block *sb, 175extern struct inode *affs_iget(struct super_block *sb,
177 unsigned long ino); 176 unsigned long ino);
178extern int affs_write_inode(struct inode *inode, 177extern int affs_write_inode(struct inode *inode,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 322710c3eedf..c4a9875bd1a6 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -406,10 +406,19 @@ static int affs_write_begin(struct file *file, struct address_space *mapping,
406 loff_t pos, unsigned len, unsigned flags, 406 loff_t pos, unsigned len, unsigned flags,
407 struct page **pagep, void **fsdata) 407 struct page **pagep, void **fsdata)
408{ 408{
409 int ret;
410
409 *pagep = NULL; 411 *pagep = NULL;
410 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 412 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
411 affs_get_block, 413 affs_get_block,
412 &AFFS_I(mapping->host)->mmu_private); 414 &AFFS_I(mapping->host)->mmu_private);
415 if (unlikely(ret)) {
416 loff_t isize = mapping->host->i_size;
417 if (pos + len > isize)
418 vmtruncate(mapping->host, isize);
419 }
420
421 return ret;
413} 422}
414 423
415static sector_t _affs_bmap(struct address_space *mapping, sector_t block) 424static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index f4b2a4ee4f91..3a0fdec175ba 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -235,31 +235,36 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
235 goto out; 235 goto out;
236 } 236 }
237 237
238 error = inode_setattr(inode, attr); 238 if ((attr->ia_valid & ATTR_SIZE) &&
239 if (!error && (attr->ia_valid & ATTR_MODE)) 239 attr->ia_size != i_size_read(inode)) {
240 error = vmtruncate(inode, attr->ia_size);
241 if (error)
242 return error;
243 }
244
245 setattr_copy(inode, attr);
246 mark_inode_dirty(inode);
247
248 if (attr->ia_valid & ATTR_MODE)
240 mode_to_prot(inode); 249 mode_to_prot(inode);
241out: 250out:
242 return error; 251 return error;
243} 252}
244 253
245void 254void
246affs_delete_inode(struct inode *inode) 255affs_evict_inode(struct inode *inode)
247{
248 pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
249 truncate_inode_pages(&inode->i_data, 0);
250 inode->i_size = 0;
251 affs_truncate(inode);
252 clear_inode(inode);
253 affs_free_block(inode->i_sb, inode->i_ino);
254}
255
256void
257affs_clear_inode(struct inode *inode)
258{ 256{
259 unsigned long cache_page; 257 unsigned long cache_page;
258 pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
259 truncate_inode_pages(&inode->i_data, 0);
260 260
261 pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); 261 if (!inode->i_nlink) {
262 inode->i_size = 0;
263 affs_truncate(inode);
264 }
262 265
266 invalidate_inode_buffers(inode);
267 end_writeback(inode);
263 affs_free_prealloc(inode); 268 affs_free_prealloc(inode);
264 cache_page = (unsigned long)AFFS_I(inode)->i_lc; 269 cache_page = (unsigned long)AFFS_I(inode)->i_lc;
265 if (cache_page) { 270 if (cache_page) {
@@ -271,6 +276,9 @@ affs_clear_inode(struct inode *inode)
271 affs_brelse(AFFS_I(inode)->i_ext_bh); 276 affs_brelse(AFFS_I(inode)->i_ext_bh);
272 AFFS_I(inode)->i_ext_last = ~1; 277 AFFS_I(inode)->i_ext_last = ~1;
273 AFFS_I(inode)->i_ext_bh = NULL; 278 AFFS_I(inode)->i_ext_bh = NULL;
279
280 if (!inode->i_nlink)
281 affs_free_block(inode->i_sb, inode->i_ino);
274} 282}
275 283
276struct inode * 284struct inode *
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 16a3e4765f68..33c4e7eef470 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -26,7 +26,7 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
26static int affs_remount (struct super_block *sb, int *flags, char *data); 26static int affs_remount (struct super_block *sb, int *flags, char *data);
27 27
28static void 28static void
29affs_commit_super(struct super_block *sb, int clean) 29affs_commit_super(struct super_block *sb, int wait, int clean)
30{ 30{
31 struct affs_sb_info *sbi = AFFS_SB(sb); 31 struct affs_sb_info *sbi = AFFS_SB(sb);
32 struct buffer_head *bh = sbi->s_root_bh; 32 struct buffer_head *bh = sbi->s_root_bh;
@@ -36,6 +36,8 @@ affs_commit_super(struct super_block *sb, int clean)
36 secs_to_datestamp(get_seconds(), &tail->disk_change); 36 secs_to_datestamp(get_seconds(), &tail->disk_change);
37 affs_fix_checksum(sb, bh); 37 affs_fix_checksum(sb, bh);
38 mark_buffer_dirty(bh); 38 mark_buffer_dirty(bh);
39 if (wait)
40 sync_dirty_buffer(bh);
39} 41}
40 42
41static void 43static void
@@ -46,8 +48,8 @@ affs_put_super(struct super_block *sb)
46 48
47 lock_kernel(); 49 lock_kernel();
48 50
49 if (!(sb->s_flags & MS_RDONLY)) 51 if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt)
50 affs_commit_super(sb, 1); 52 affs_commit_super(sb, 1, 1);
51 53
52 kfree(sbi->s_prefix); 54 kfree(sbi->s_prefix);
53 affs_free_bitmap(sb); 55 affs_free_bitmap(sb);
@@ -61,27 +63,20 @@ affs_put_super(struct super_block *sb)
61static void 63static void
62affs_write_super(struct super_block *sb) 64affs_write_super(struct super_block *sb)
63{ 65{
64 int clean = 2;
65
66 lock_super(sb); 66 lock_super(sb);
67 if (!(sb->s_flags & MS_RDONLY)) { 67 if (!(sb->s_flags & MS_RDONLY))
68 // if (sbi->s_bitmap[i].bm_bh) { 68 affs_commit_super(sb, 1, 2);
69 // if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) { 69 sb->s_dirt = 0;
70 // clean = 0;
71 affs_commit_super(sb, clean);
72 sb->s_dirt = !clean; /* redo until bitmap synced */
73 } else
74 sb->s_dirt = 0;
75 unlock_super(sb); 70 unlock_super(sb);
76 71
77 pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean); 72 pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds());
78} 73}
79 74
80static int 75static int
81affs_sync_fs(struct super_block *sb, int wait) 76affs_sync_fs(struct super_block *sb, int wait)
82{ 77{
83 lock_super(sb); 78 lock_super(sb);
84 affs_commit_super(sb, 2); 79 affs_commit_super(sb, wait, 2);
85 sb->s_dirt = 0; 80 sb->s_dirt = 0;
86 unlock_super(sb); 81 unlock_super(sb);
87 return 0; 82 return 0;
@@ -140,8 +135,7 @@ static const struct super_operations affs_sops = {
140 .alloc_inode = affs_alloc_inode, 135 .alloc_inode = affs_alloc_inode,
141 .destroy_inode = affs_destroy_inode, 136 .destroy_inode = affs_destroy_inode,
142 .write_inode = affs_write_inode, 137 .write_inode = affs_write_inode,
143 .delete_inode = affs_delete_inode, 138 .evict_inode = affs_evict_inode,
144 .clear_inode = affs_clear_inode,
145 .put_super = affs_put_super, 139 .put_super = affs_put_super,
146 .write_super = affs_write_super, 140 .write_super = affs_write_super,
147 .sync_fs = affs_sync_fs, 141 .sync_fs = affs_sync_fs,
@@ -554,9 +548,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
554 return 0; 548 return 0;
555 } 549 }
556 if (*flags & MS_RDONLY) { 550 if (*flags & MS_RDONLY) {
557 sb->s_dirt = 1; 551 affs_write_super(sb);
558 while (sb->s_dirt)
559 affs_write_super(sb);
560 affs_free_bitmap(sb); 552 affs_free_bitmap(sb);
561 } else 553 } else
562 res = affs_init_bitmap(sb, flags); 554 res = affs_init_bitmap(sb, flags);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index d00b312e3110..320ffef11574 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -316,7 +316,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
316/* 316/*
317 * clear an AFS inode 317 * clear an AFS inode
318 */ 318 */
319void afs_clear_inode(struct inode *inode) 319void afs_evict_inode(struct inode *inode)
320{ 320{
321 struct afs_permits *permits; 321 struct afs_permits *permits;
322 struct afs_vnode *vnode; 322 struct afs_vnode *vnode;
@@ -335,6 +335,9 @@ void afs_clear_inode(struct inode *inode)
335 335
336 ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); 336 ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
337 337
338 truncate_inode_pages(&inode->i_data, 0);
339 end_writeback(inode);
340
338 afs_give_up_callback(vnode); 341 afs_give_up_callback(vnode);
339 342
340 if (vnode->server) { 343 if (vnode->server) {
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5f679b77ce24..8679089ce9a1 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -565,7 +565,7 @@ extern void afs_zap_data(struct afs_vnode *);
565extern int afs_validate(struct afs_vnode *, struct key *); 565extern int afs_validate(struct afs_vnode *, struct key *);
566extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 566extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
567extern int afs_setattr(struct dentry *, struct iattr *); 567extern int afs_setattr(struct dentry *, struct iattr *);
568extern void afs_clear_inode(struct inode *); 568extern void afs_evict_inode(struct inode *);
569 569
570/* 570/*
571 * main.c 571 * main.c
diff --git a/fs/afs/super.c b/fs/afs/super.c
index e932e5a3a0c1..9cf80f02da16 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -49,7 +49,7 @@ static const struct super_operations afs_super_ops = {
49 .statfs = afs_statfs, 49 .statfs = afs_statfs,
50 .alloc_inode = afs_alloc_inode, 50 .alloc_inode = afs_alloc_inode,
51 .destroy_inode = afs_destroy_inode, 51 .destroy_inode = afs_destroy_inode,
52 .clear_inode = afs_clear_inode, 52 .evict_inode = afs_evict_inode,
53 .put_super = afs_put_super, 53 .put_super = afs_put_super,
54 .show_options = generic_show_options, 54 .show_options = generic_show_options,
55}; 55};
diff --git a/fs/attr.c b/fs/attr.c
index b4fa3b0aa596..7ca41811afa1 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -14,35 +14,53 @@
14#include <linux/fcntl.h> 14#include <linux/fcntl.h>
15#include <linux/security.h> 15#include <linux/security.h>
16 16
17/* Taken over from the old code... */ 17/**
18 18 * inode_change_ok - check if attribute changes to an inode are allowed
19/* POSIX UID/GID verification for setting inode attributes. */ 19 * @inode: inode to check
20 * @attr: attributes to change
21 *
22 * Check if we are allowed to change the attributes contained in @attr
23 * in the given inode. This includes the normal unix access permission
24 * checks, as well as checks for rlimits and others.
25 *
26 * Should be called as the first thing in ->setattr implementations,
27 * possibly after taking additional locks.
28 */
20int inode_change_ok(const struct inode *inode, struct iattr *attr) 29int inode_change_ok(const struct inode *inode, struct iattr *attr)
21{ 30{
22 int retval = -EPERM;
23 unsigned int ia_valid = attr->ia_valid; 31 unsigned int ia_valid = attr->ia_valid;
24 32
33 /*
34 * First check size constraints. These can't be overriden using
35 * ATTR_FORCE.
36 */
37 if (ia_valid & ATTR_SIZE) {
38 int error = inode_newsize_ok(inode, attr->ia_size);
39 if (error)
40 return error;
41 }
42
25 /* If force is set do it anyway. */ 43 /* If force is set do it anyway. */
26 if (ia_valid & ATTR_FORCE) 44 if (ia_valid & ATTR_FORCE)
27 goto fine; 45 return 0;
28 46
29 /* Make sure a caller can chown. */ 47 /* Make sure a caller can chown. */
30 if ((ia_valid & ATTR_UID) && 48 if ((ia_valid & ATTR_UID) &&
31 (current_fsuid() != inode->i_uid || 49 (current_fsuid() != inode->i_uid ||
32 attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN)) 50 attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN))
33 goto error; 51 return -EPERM;
34 52
35 /* Make sure caller can chgrp. */ 53 /* Make sure caller can chgrp. */
36 if ((ia_valid & ATTR_GID) && 54 if ((ia_valid & ATTR_GID) &&
37 (current_fsuid() != inode->i_uid || 55 (current_fsuid() != inode->i_uid ||
38 (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) && 56 (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) &&
39 !capable(CAP_CHOWN)) 57 !capable(CAP_CHOWN))
40 goto error; 58 return -EPERM;
41 59
42 /* Make sure a caller can chmod. */ 60 /* Make sure a caller can chmod. */
43 if (ia_valid & ATTR_MODE) { 61 if (ia_valid & ATTR_MODE) {
44 if (!is_owner_or_cap(inode)) 62 if (!is_owner_or_cap(inode))
45 goto error; 63 return -EPERM;
46 /* Also check the setgid bit! */ 64 /* Also check the setgid bit! */
47 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : 65 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
48 inode->i_gid) && !capable(CAP_FSETID)) 66 inode->i_gid) && !capable(CAP_FSETID))
@@ -52,12 +70,10 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
52 /* Check for setting the inode time. */ 70 /* Check for setting the inode time. */
53 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { 71 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
54 if (!is_owner_or_cap(inode)) 72 if (!is_owner_or_cap(inode))
55 goto error; 73 return -EPERM;
56 } 74 }
57fine: 75
58 retval = 0; 76 return 0;
59error:
60 return retval;
61} 77}
62EXPORT_SYMBOL(inode_change_ok); 78EXPORT_SYMBOL(inode_change_ok);
63 79
@@ -105,21 +121,21 @@ out_big:
105EXPORT_SYMBOL(inode_newsize_ok); 121EXPORT_SYMBOL(inode_newsize_ok);
106 122
107/** 123/**
108 * generic_setattr - copy simple metadata updates into the generic inode 124 * setattr_copy - copy simple metadata updates into the generic inode
109 * @inode: the inode to be updated 125 * @inode: the inode to be updated
110 * @attr: the new attributes 126 * @attr: the new attributes
111 * 127 *
112 * generic_setattr must be called with i_mutex held. 128 * setattr_copy must be called with i_mutex held.
113 * 129 *
114 * generic_setattr updates the inode's metadata with that specified 130 * setattr_copy updates the inode's metadata with that specified
115 * in attr. Noticably missing is inode size update, which is more complex 131 * in attr. Noticably missing is inode size update, which is more complex
116 * as it requires pagecache updates. See simple_setsize. 132 * as it requires pagecache updates.
117 * 133 *
118 * The inode is not marked as dirty after this operation. The rationale is 134 * The inode is not marked as dirty after this operation. The rationale is
119 * that for "simple" filesystems, the struct inode is the inode storage. 135 * that for "simple" filesystems, the struct inode is the inode storage.
120 * The caller is free to mark the inode dirty afterwards if needed. 136 * The caller is free to mark the inode dirty afterwards if needed.
121 */ 137 */
122void generic_setattr(struct inode *inode, const struct iattr *attr) 138void setattr_copy(struct inode *inode, const struct iattr *attr)
123{ 139{
124 unsigned int ia_valid = attr->ia_valid; 140 unsigned int ia_valid = attr->ia_valid;
125 141
@@ -144,32 +160,7 @@ void generic_setattr(struct inode *inode, const struct iattr *attr)
144 inode->i_mode = mode; 160 inode->i_mode = mode;
145 } 161 }
146} 162}
147EXPORT_SYMBOL(generic_setattr); 163EXPORT_SYMBOL(setattr_copy);
148
149/*
150 * note this function is deprecated, the new truncate sequence should be
151 * used instead -- see eg. simple_setsize, generic_setattr.
152 */
153int inode_setattr(struct inode *inode, const struct iattr *attr)
154{
155 unsigned int ia_valid = attr->ia_valid;
156
157 if (ia_valid & ATTR_SIZE &&
158 attr->ia_size != i_size_read(inode)) {
159 int error;
160
161 error = vmtruncate(inode, attr->ia_size);
162 if (error)
163 return error;
164 }
165
166 generic_setattr(inode, attr);
167
168 mark_inode_dirty(inode);
169
170 return 0;
171}
172EXPORT_SYMBOL(inode_setattr);
173 164
174int notify_change(struct dentry * dentry, struct iattr * attr) 165int notify_change(struct dentry * dentry, struct iattr * attr)
175{ 166{
@@ -237,13 +228,10 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
237 if (ia_valid & ATTR_SIZE) 228 if (ia_valid & ATTR_SIZE)
238 down_write(&dentry->d_inode->i_alloc_sem); 229 down_write(&dentry->d_inode->i_alloc_sem);
239 230
240 if (inode->i_op && inode->i_op->setattr) { 231 if (inode->i_op->setattr)
241 error = inode->i_op->setattr(dentry, attr); 232 error = inode->i_op->setattr(dentry, attr);
242 } else { 233 else
243 error = inode_change_ok(inode, attr); 234 error = simple_setattr(dentry, attr);
244 if (!error)
245 error = inode_setattr(inode, attr);
246 }
247 235
248 if (ia_valid & ATTR_SIZE) 236 if (ia_valid & ATTR_SIZE)
249 up_write(&dentry->d_inode->i_alloc_sem); 237 up_write(&dentry->d_inode->i_alloc_sem);
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 9a0520b50663..11b1ea786d00 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -16,6 +16,7 @@
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/param.h> 17#include <linux/param.h>
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/compat.h>
19#include <linux/smp_lock.h> 20#include <linux/smp_lock.h>
20#include "autofs_i.h" 21#include "autofs_i.h"
21 22
@@ -25,13 +26,17 @@ static int autofs_root_symlink(struct inode *,struct dentry *,const char *);
25static int autofs_root_unlink(struct inode *,struct dentry *); 26static int autofs_root_unlink(struct inode *,struct dentry *);
26static int autofs_root_rmdir(struct inode *,struct dentry *); 27static int autofs_root_rmdir(struct inode *,struct dentry *);
27static int autofs_root_mkdir(struct inode *,struct dentry *,int); 28static int autofs_root_mkdir(struct inode *,struct dentry *,int);
28static int autofs_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); 29static long autofs_root_ioctl(struct file *,unsigned int,unsigned long);
30static long autofs_root_compat_ioctl(struct file *,unsigned int,unsigned long);
29 31
30const struct file_operations autofs_root_operations = { 32const struct file_operations autofs_root_operations = {
31 .llseek = generic_file_llseek, 33 .llseek = generic_file_llseek,
32 .read = generic_read_dir, 34 .read = generic_read_dir,
33 .readdir = autofs_root_readdir, 35 .readdir = autofs_root_readdir,
34 .ioctl = autofs_root_ioctl, 36 .unlocked_ioctl = autofs_root_ioctl,
37#ifdef CONFIG_COMPAT
38 .compat_ioctl = autofs_root_compat_ioctl,
39#endif
35}; 40};
36 41
37const struct inode_operations autofs_root_inode_operations = { 42const struct inode_operations autofs_root_inode_operations = {
@@ -492,6 +497,25 @@ static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode)
492} 497}
493 498
494/* Get/set timeout ioctl() operation */ 499/* Get/set timeout ioctl() operation */
500#ifdef CONFIG_COMPAT
501static inline int autofs_compat_get_set_timeout(struct autofs_sb_info *sbi,
502 unsigned int __user *p)
503{
504 unsigned long ntimeout;
505
506 if (get_user(ntimeout, p) ||
507 put_user(sbi->exp_timeout / HZ, p))
508 return -EFAULT;
509
510 if (ntimeout > UINT_MAX/HZ)
511 sbi->exp_timeout = 0;
512 else
513 sbi->exp_timeout = ntimeout * HZ;
514
515 return 0;
516}
517#endif
518
495static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi, 519static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi,
496 unsigned long __user *p) 520 unsigned long __user *p)
497{ 521{
@@ -546,7 +570,7 @@ static inline int autofs_expire_run(struct super_block *sb,
546 * ioctl()'s on the root directory is the chief method for the daemon to 570 * ioctl()'s on the root directory is the chief method for the daemon to
547 * generate kernel reactions 571 * generate kernel reactions
548 */ 572 */
549static int autofs_root_ioctl(struct inode *inode, struct file *filp, 573static int autofs_do_root_ioctl(struct inode *inode, struct file *filp,
550 unsigned int cmd, unsigned long arg) 574 unsigned int cmd, unsigned long arg)
551{ 575{
552 struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb); 576 struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb);
@@ -571,6 +595,10 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp,
571 return 0; 595 return 0;
572 case AUTOFS_IOC_PROTOVER: /* Get protocol version */ 596 case AUTOFS_IOC_PROTOVER: /* Get protocol version */
573 return autofs_get_protover(argp); 597 return autofs_get_protover(argp);
598#ifdef CONFIG_COMPAT
599 case AUTOFS_IOC_SETTIMEOUT32:
600 return autofs_compat_get_set_timeout(sbi, argp);
601#endif
574 case AUTOFS_IOC_SETTIMEOUT: 602 case AUTOFS_IOC_SETTIMEOUT:
575 return autofs_get_set_timeout(sbi, argp); 603 return autofs_get_set_timeout(sbi, argp);
576 case AUTOFS_IOC_EXPIRE: 604 case AUTOFS_IOC_EXPIRE:
@@ -579,4 +607,37 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp,
579 default: 607 default:
580 return -ENOSYS; 608 return -ENOSYS;
581 } 609 }
610
611}
612
613static long autofs_root_ioctl(struct file *filp,
614 unsigned int cmd, unsigned long arg)
615{
616 int ret;
617
618 lock_kernel();
619 ret = autofs_do_root_ioctl(filp->f_path.dentry->d_inode,
620 filp, cmd, arg);
621 unlock_kernel();
622
623 return ret;
624}
625
626#ifdef CONFIG_COMPAT
627static long autofs_root_compat_ioctl(struct file *filp,
628 unsigned int cmd, unsigned long arg)
629{
630 struct inode *inode = filp->f_path.dentry->d_inode;
631 int ret;
632
633 lock_kernel();
634 if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
635 ret = autofs_do_root_ioctl(inode, filp, cmd, arg);
636 else
637 ret = autofs_do_root_ioctl(inode, filp, cmd,
638 (unsigned long)compat_ptr(arg));
639 unlock_kernel();
640
641 return ret;
582} 642}
643#endif
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index db4117ed7803..48e056e70fd6 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -18,7 +18,9 @@
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/param.h> 19#include <linux/param.h>
20#include <linux/time.h> 20#include <linux/time.h>
21#include <linux/compat.h>
21#include <linux/smp_lock.h> 22#include <linux/smp_lock.h>
23
22#include "autofs_i.h" 24#include "autofs_i.h"
23 25
24static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); 26static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
@@ -26,6 +28,7 @@ static int autofs4_dir_unlink(struct inode *,struct dentry *);
26static int autofs4_dir_rmdir(struct inode *,struct dentry *); 28static int autofs4_dir_rmdir(struct inode *,struct dentry *);
27static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); 29static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
28static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long); 30static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
31static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
29static int autofs4_dir_open(struct inode *inode, struct file *file); 32static int autofs4_dir_open(struct inode *inode, struct file *file);
30static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); 33static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
31static void *autofs4_follow_link(struct dentry *, struct nameidata *); 34static void *autofs4_follow_link(struct dentry *, struct nameidata *);
@@ -40,6 +43,9 @@ const struct file_operations autofs4_root_operations = {
40 .readdir = dcache_readdir, 43 .readdir = dcache_readdir,
41 .llseek = dcache_dir_lseek, 44 .llseek = dcache_dir_lseek,
42 .unlocked_ioctl = autofs4_root_ioctl, 45 .unlocked_ioctl = autofs4_root_ioctl,
46#ifdef CONFIG_COMPAT
47 .compat_ioctl = autofs4_root_compat_ioctl,
48#endif
43}; 49};
44 50
45const struct file_operations autofs4_dir_operations = { 51const struct file_operations autofs4_dir_operations = {
@@ -840,6 +846,26 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
840} 846}
841 847
842/* Get/set timeout ioctl() operation */ 848/* Get/set timeout ioctl() operation */
849#ifdef CONFIG_COMPAT
850static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi,
851 compat_ulong_t __user *p)
852{
853 int rv;
854 unsigned long ntimeout;
855
856 if ((rv = get_user(ntimeout, p)) ||
857 (rv = put_user(sbi->exp_timeout/HZ, p)))
858 return rv;
859
860 if (ntimeout > UINT_MAX/HZ)
861 sbi->exp_timeout = 0;
862 else
863 sbi->exp_timeout = ntimeout * HZ;
864
865 return 0;
866}
867#endif
868
843static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi, 869static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi,
844 unsigned long __user *p) 870 unsigned long __user *p)
845{ 871{
@@ -933,6 +959,10 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
933 return autofs4_get_protosubver(sbi, p); 959 return autofs4_get_protosubver(sbi, p);
934 case AUTOFS_IOC_SETTIMEOUT: 960 case AUTOFS_IOC_SETTIMEOUT:
935 return autofs4_get_set_timeout(sbi, p); 961 return autofs4_get_set_timeout(sbi, p);
962#ifdef CONFIG_COMPAT
963 case AUTOFS_IOC_SETTIMEOUT32:
964 return autofs4_compat_get_set_timeout(sbi, p);
965#endif
936 966
937 case AUTOFS_IOC_ASKUMOUNT: 967 case AUTOFS_IOC_ASKUMOUNT:
938 return autofs4_ask_umount(filp->f_path.mnt, p); 968 return autofs4_ask_umount(filp->f_path.mnt, p);
@@ -961,3 +991,22 @@ static long autofs4_root_ioctl(struct file *filp,
961 991
962 return ret; 992 return ret;
963} 993}
994
995#ifdef CONFIG_COMPAT
996static long autofs4_root_compat_ioctl(struct file *filp,
997 unsigned int cmd, unsigned long arg)
998{
999 struct inode *inode = filp->f_path.dentry->d_inode;
1000 int ret;
1001
1002 lock_kernel();
1003 if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
1004 ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
1005 else
1006 ret = autofs4_root_ioctl_unlocked(inode, filp, cmd,
1007 (unsigned long)compat_ptr(arg));
1008 unlock_kernel();
1009
1010 return ret;
1011}
1012#endif
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 7109e451abf7..f7f87e233dd9 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -17,7 +17,6 @@ struct bfs_sb_info {
17 unsigned long si_lf_eblk; 17 unsigned long si_lf_eblk;
18 unsigned long si_lasti; 18 unsigned long si_lasti;
19 unsigned long *si_imap; 19 unsigned long *si_imap;
20 struct buffer_head *si_sbh; /* buffer header w/superblock */
21 struct mutex bfs_lock; 20 struct mutex bfs_lock;
22}; 21};
23 22
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 88b9a3ff44e4..eb67edd0f8ea 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -70,7 +70,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
70 struct super_block *sb = inode->i_sb; 70 struct super_block *sb = inode->i_sb;
71 struct bfs_sb_info *info = BFS_SB(sb); 71 struct bfs_sb_info *info = BFS_SB(sb);
72 struct bfs_inode_info *bi = BFS_I(inode); 72 struct bfs_inode_info *bi = BFS_I(inode);
73 struct buffer_head *sbh = info->si_sbh;
74 73
75 phys = bi->i_sblock + block; 74 phys = bi->i_sblock + block;
76 if (!create) { 75 if (!create) {
@@ -112,7 +111,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
112 info->si_freeb -= phys - bi->i_eblock; 111 info->si_freeb -= phys - bi->i_eblock;
113 info->si_lf_eblk = bi->i_eblock = phys; 112 info->si_lf_eblk = bi->i_eblock = phys;
114 mark_inode_dirty(inode); 113 mark_inode_dirty(inode);
115 mark_buffer_dirty(sbh);
116 err = 0; 114 err = 0;
117 goto out; 115 goto out;
118 } 116 }
@@ -147,7 +145,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
147 */ 145 */
148 info->si_freeb -= bi->i_eblock - bi->i_sblock + 1 - inode->i_blocks; 146 info->si_freeb -= bi->i_eblock - bi->i_sblock + 1 - inode->i_blocks;
149 mark_inode_dirty(inode); 147 mark_inode_dirty(inode);
150 mark_buffer_dirty(sbh);
151 map_bh(bh_result, sb, phys); 148 map_bh(bh_result, sb, phys);
152out: 149out:
153 mutex_unlock(&info->bfs_lock); 150 mutex_unlock(&info->bfs_lock);
@@ -168,9 +165,17 @@ static int bfs_write_begin(struct file *file, struct address_space *mapping,
168 loff_t pos, unsigned len, unsigned flags, 165 loff_t pos, unsigned len, unsigned flags,
169 struct page **pagep, void **fsdata) 166 struct page **pagep, void **fsdata)
170{ 167{
171 *pagep = NULL; 168 int ret;
172 return block_write_begin(file, mapping, pos, len, flags, 169
173 pagep, fsdata, bfs_get_block); 170 ret = block_write_begin(mapping, pos, len, flags, pagep,
171 bfs_get_block);
172 if (unlikely(ret)) {
173 loff_t isize = mapping->host->i_size;
174 if (pos + len > isize)
175 vmtruncate(mapping->host, isize);
176 }
177
178 return ret;
174} 179}
175 180
176static sector_t bfs_bmap(struct address_space *mapping, sector_t block) 181static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index f22a7d3dc362..c4daf0f5fc02 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -31,7 +31,6 @@ MODULE_LICENSE("GPL");
31#define dprintf(x...) 31#define dprintf(x...)
32#endif 32#endif
33 33
34static void bfs_write_super(struct super_block *s);
35void dump_imap(const char *prefix, struct super_block *s); 34void dump_imap(const char *prefix, struct super_block *s);
36 35
37struct inode *bfs_iget(struct super_block *sb, unsigned long ino) 36struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
@@ -99,6 +98,24 @@ error:
99 return ERR_PTR(-EIO); 98 return ERR_PTR(-EIO);
100} 99}
101 100
101static struct bfs_inode *find_inode(struct super_block *sb, u16 ino, struct buffer_head **p)
102{
103 if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(sb)->si_lasti)) {
104 printf("Bad inode number %s:%08x\n", sb->s_id, ino);
105 return ERR_PTR(-EIO);
106 }
107
108 ino -= BFS_ROOT_INO;
109
110 *p = sb_bread(sb, 1 + ino / BFS_INODES_PER_BLOCK);
111 if (!*p) {
112 printf("Unable to read inode %s:%08x\n", sb->s_id, ino);
113 return ERR_PTR(-EIO);
114 }
115
116 return (struct bfs_inode *)(*p)->b_data + ino % BFS_INODES_PER_BLOCK;
117}
118
102static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) 119static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
103{ 120{
104 struct bfs_sb_info *info = BFS_SB(inode->i_sb); 121 struct bfs_sb_info *info = BFS_SB(inode->i_sb);
@@ -106,28 +123,15 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
106 unsigned long i_sblock; 123 unsigned long i_sblock;
107 struct bfs_inode *di; 124 struct bfs_inode *di;
108 struct buffer_head *bh; 125 struct buffer_head *bh;
109 int block, off;
110 int err = 0; 126 int err = 0;
111 127
112 dprintf("ino=%08x\n", ino); 128 dprintf("ino=%08x\n", ino);
113 129
114 if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(inode->i_sb)->si_lasti)) { 130 di = find_inode(inode->i_sb, ino, &bh);
115 printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino); 131 if (IS_ERR(di))
116 return -EIO; 132 return PTR_ERR(di);
117 }
118 133
119 mutex_lock(&info->bfs_lock); 134 mutex_lock(&info->bfs_lock);
120 block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
121 bh = sb_bread(inode->i_sb, block);
122 if (!bh) {
123 printf("Unable to read inode %s:%08x\n",
124 inode->i_sb->s_id, ino);
125 mutex_unlock(&info->bfs_lock);
126 return -EIO;
127 }
128
129 off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
130 di = (struct bfs_inode *)bh->b_data + off;
131 135
132 if (ino == BFS_ROOT_INO) 136 if (ino == BFS_ROOT_INO)
133 di->i_vtype = cpu_to_le32(BFS_VDIR); 137 di->i_vtype = cpu_to_le32(BFS_VDIR);
@@ -158,12 +162,11 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
158 return err; 162 return err;
159} 163}
160 164
161static void bfs_delete_inode(struct inode *inode) 165static void bfs_evict_inode(struct inode *inode)
162{ 166{
163 unsigned long ino = inode->i_ino; 167 unsigned long ino = inode->i_ino;
164 struct bfs_inode *di; 168 struct bfs_inode *di;
165 struct buffer_head *bh; 169 struct buffer_head *bh;
166 int block, off;
167 struct super_block *s = inode->i_sb; 170 struct super_block *s = inode->i_sb;
168 struct bfs_sb_info *info = BFS_SB(s); 171 struct bfs_sb_info *info = BFS_SB(s);
169 struct bfs_inode_info *bi = BFS_I(inode); 172 struct bfs_inode_info *bi = BFS_I(inode);
@@ -171,28 +174,19 @@ static void bfs_delete_inode(struct inode *inode)
171 dprintf("ino=%08lx\n", ino); 174 dprintf("ino=%08lx\n", ino);
172 175
173 truncate_inode_pages(&inode->i_data, 0); 176 truncate_inode_pages(&inode->i_data, 0);
177 invalidate_inode_buffers(inode);
178 end_writeback(inode);
174 179
175 if ((ino < BFS_ROOT_INO) || (ino > info->si_lasti)) { 180 if (inode->i_nlink)
176 printf("invalid ino=%08lx\n", ino);
177 return; 181 return;
178 }
179
180 inode->i_size = 0;
181 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
182 mutex_lock(&info->bfs_lock);
183 mark_inode_dirty(inode);
184 182
185 block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; 183 di = find_inode(s, inode->i_ino, &bh);
186 bh = sb_bread(s, block); 184 if (IS_ERR(di))
187 if (!bh) {
188 printf("Unable to read inode %s:%08lx\n",
189 inode->i_sb->s_id, ino);
190 mutex_unlock(&info->bfs_lock);
191 return; 185 return;
192 } 186
193 off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; 187 mutex_lock(&info->bfs_lock);
194 di = (struct bfs_inode *)bh->b_data + off; 188 /* clear on-disk inode */
195 memset((void *)di, 0, sizeof(struct bfs_inode)); 189 memset(di, 0, sizeof(struct bfs_inode));
196 mark_buffer_dirty(bh); 190 mark_buffer_dirty(bh);
197 brelse(bh); 191 brelse(bh);
198 192
@@ -209,32 +203,9 @@ static void bfs_delete_inode(struct inode *inode)
209 * "last block of the last file" even if there is no 203 * "last block of the last file" even if there is no
210 * real file there, saves us 1 gap. 204 * real file there, saves us 1 gap.
211 */ 205 */
212 if (info->si_lf_eblk == bi->i_eblock) { 206 if (info->si_lf_eblk == bi->i_eblock)
213 info->si_lf_eblk = bi->i_sblock - 1; 207 info->si_lf_eblk = bi->i_sblock - 1;
214 mark_buffer_dirty(info->si_sbh);
215 }
216 mutex_unlock(&info->bfs_lock); 208 mutex_unlock(&info->bfs_lock);
217 clear_inode(inode);
218}
219
220static int bfs_sync_fs(struct super_block *sb, int wait)
221{
222 struct bfs_sb_info *info = BFS_SB(sb);
223
224 mutex_lock(&info->bfs_lock);
225 mark_buffer_dirty(info->si_sbh);
226 sb->s_dirt = 0;
227 mutex_unlock(&info->bfs_lock);
228
229 return 0;
230}
231
232static void bfs_write_super(struct super_block *sb)
233{
234 if (!(sb->s_flags & MS_RDONLY))
235 bfs_sync_fs(sb, 1);
236 else
237 sb->s_dirt = 0;
238} 209}
239 210
240static void bfs_put_super(struct super_block *s) 211static void bfs_put_super(struct super_block *s)
@@ -246,10 +217,6 @@ static void bfs_put_super(struct super_block *s)
246 217
247 lock_kernel(); 218 lock_kernel();
248 219
249 if (s->s_dirt)
250 bfs_write_super(s);
251
252 brelse(info->si_sbh);
253 mutex_destroy(&info->bfs_lock); 220 mutex_destroy(&info->bfs_lock);
254 kfree(info->si_imap); 221 kfree(info->si_imap);
255 kfree(info); 222 kfree(info);
@@ -319,10 +286,8 @@ static const struct super_operations bfs_sops = {
319 .alloc_inode = bfs_alloc_inode, 286 .alloc_inode = bfs_alloc_inode,
320 .destroy_inode = bfs_destroy_inode, 287 .destroy_inode = bfs_destroy_inode,
321 .write_inode = bfs_write_inode, 288 .write_inode = bfs_write_inode,
322 .delete_inode = bfs_delete_inode, 289 .evict_inode = bfs_evict_inode,
323 .put_super = bfs_put_super, 290 .put_super = bfs_put_super,
324 .write_super = bfs_write_super,
325 .sync_fs = bfs_sync_fs,
326 .statfs = bfs_statfs, 291 .statfs = bfs_statfs,
327}; 292};
328 293
@@ -349,7 +314,7 @@ void dump_imap(const char *prefix, struct super_block *s)
349 314
350static int bfs_fill_super(struct super_block *s, void *data, int silent) 315static int bfs_fill_super(struct super_block *s, void *data, int silent)
351{ 316{
352 struct buffer_head *bh; 317 struct buffer_head *bh, *sbh;
353 struct bfs_super_block *bfs_sb; 318 struct bfs_super_block *bfs_sb;
354 struct inode *inode; 319 struct inode *inode;
355 unsigned i, imap_len; 320 unsigned i, imap_len;
@@ -365,10 +330,10 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
365 330
366 sb_set_blocksize(s, BFS_BSIZE); 331 sb_set_blocksize(s, BFS_BSIZE);
367 332
368 info->si_sbh = sb_bread(s, 0); 333 sbh = sb_bread(s, 0);
369 if (!info->si_sbh) 334 if (!sbh)
370 goto out; 335 goto out;
371 bfs_sb = (struct bfs_super_block *)info->si_sbh->b_data; 336 bfs_sb = (struct bfs_super_block *)sbh->b_data;
372 if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { 337 if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
373 if (!silent) 338 if (!silent)
374 printf("No BFS filesystem on %s (magic=%08x)\n", 339 printf("No BFS filesystem on %s (magic=%08x)\n",
@@ -472,10 +437,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
472 info->si_lf_eblk = eblock; 437 info->si_lf_eblk = eblock;
473 } 438 }
474 brelse(bh); 439 brelse(bh);
475 if (!(s->s_flags & MS_RDONLY)) { 440 brelse(sbh);
476 mark_buffer_dirty(info->si_sbh);
477 s->s_dirt = 1;
478 }
479 dump_imap("read_super", s); 441 dump_imap("read_super", s);
480 return 0; 442 return 0;
481 443
@@ -485,7 +447,7 @@ out3:
485out2: 447out2:
486 kfree(info->si_imap); 448 kfree(info->si_imap);
487out1: 449out1:
488 brelse(info->si_sbh); 450 brelse(sbh);
489out: 451out:
490 mutex_destroy(&info->bfs_lock); 452 mutex_destroy(&info->bfs_lock);
491 kfree(info); 453 kfree(info);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c4e83537ead7..9e60fd201716 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -502,8 +502,9 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
502 return inode; 502 return inode;
503} 503}
504 504
505static void bm_clear_inode(struct inode *inode) 505static void bm_evict_inode(struct inode *inode)
506{ 506{
507 end_writeback(inode);
507 kfree(inode->i_private); 508 kfree(inode->i_private);
508} 509}
509 510
@@ -685,7 +686,7 @@ static const struct file_operations bm_status_operations = {
685 686
686static const struct super_operations s_ops = { 687static const struct super_operations s_ops = {
687 .statfs = simple_statfs, 688 .statfs = simple_statfs,
688 .clear_inode = bm_clear_inode, 689 .evict_inode = bm_evict_inode,
689}; 690};
690 691
691static int bm_fill_super(struct super_block * sb, void * data, int silent) 692static int bm_fill_super(struct super_block * sb, void * data, int silent)
diff --git a/fs/bio.c b/fs/bio.c
index e7bf6ca64dcf..8abb2dfb2e7c 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -843,7 +843,8 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
843 if (!bio) 843 if (!bio)
844 goto out_bmd; 844 goto out_bmd;
845 845
846 bio->bi_rw |= (!write_to_vm << BIO_RW); 846 if (!write_to_vm)
847 bio->bi_rw |= REQ_WRITE;
847 848
848 ret = 0; 849 ret = 0;
849 850
@@ -1024,7 +1025,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
1024 * set data direction, and check if mapped pages need bouncing 1025 * set data direction, and check if mapped pages need bouncing
1025 */ 1026 */
1026 if (!write_to_vm) 1027 if (!write_to_vm)
1027 bio->bi_rw |= (1 << BIO_RW); 1028 bio->bi_rw |= REQ_WRITE;
1028 1029
1029 bio->bi_bdev = bdev; 1030 bio->bi_bdev = bdev;
1030 bio->bi_flags |= (1 << BIO_USER_MAPPED); 1031 bio->bi_flags |= (1 << BIO_USER_MAPPED);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b3171fb0dc9a..66411463b734 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -172,9 +172,8 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
172 struct file *file = iocb->ki_filp; 172 struct file *file = iocb->ki_filp;
173 struct inode *inode = file->f_mapping->host; 173 struct inode *inode = file->f_mapping->host;
174 174
175 return blockdev_direct_IO_no_locking_newtrunc(rw, iocb, inode, 175 return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
176 I_BDEV(inode), iov, offset, nr_segs, 176 nr_segs, blkdev_get_blocks, NULL, NULL, 0);
177 blkdev_get_blocks, NULL);
178} 177}
179 178
180int __sync_blockdev(struct block_device *bdev, int wait) 179int __sync_blockdev(struct block_device *bdev, int wait)
@@ -309,9 +308,8 @@ static int blkdev_write_begin(struct file *file, struct address_space *mapping,
309 loff_t pos, unsigned len, unsigned flags, 308 loff_t pos, unsigned len, unsigned flags,
310 struct page **pagep, void **fsdata) 309 struct page **pagep, void **fsdata)
311{ 310{
312 *pagep = NULL; 311 return block_write_begin(mapping, pos, len, flags, pagep,
313 return block_write_begin_newtrunc(file, mapping, pos, len, flags, 312 blkdev_get_block);
314 pagep, fsdata, blkdev_get_block);
315} 313}
316 314
317static int blkdev_write_end(struct file *file, struct address_space *mapping, 315static int blkdev_write_end(struct file *file, struct address_space *mapping,
@@ -428,10 +426,13 @@ static inline void __bd_forget(struct inode *inode)
428 inode->i_mapping = &inode->i_data; 426 inode->i_mapping = &inode->i_data;
429} 427}
430 428
431static void bdev_clear_inode(struct inode *inode) 429static void bdev_evict_inode(struct inode *inode)
432{ 430{
433 struct block_device *bdev = &BDEV_I(inode)->bdev; 431 struct block_device *bdev = &BDEV_I(inode)->bdev;
434 struct list_head *p; 432 struct list_head *p;
433 truncate_inode_pages(&inode->i_data, 0);
434 invalidate_inode_buffers(inode); /* is it needed here? */
435 end_writeback(inode);
435 spin_lock(&bdev_lock); 436 spin_lock(&bdev_lock);
436 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { 437 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
437 __bd_forget(list_entry(p, struct inode, i_devices)); 438 __bd_forget(list_entry(p, struct inode, i_devices));
@@ -445,7 +446,7 @@ static const struct super_operations bdev_sops = {
445 .alloc_inode = bdev_alloc_inode, 446 .alloc_inode = bdev_alloc_inode,
446 .destroy_inode = bdev_destroy_inode, 447 .destroy_inode = bdev_destroy_inode,
447 .drop_inode = generic_delete_inode, 448 .drop_inode = generic_delete_inode,
448 .clear_inode = bdev_clear_inode, 449 .evict_inode = bdev_evict_inode,
449}; 450};
450 451
451static int bd_get_sb(struct file_system_type *fs_type, 452static int bd_get_sb(struct file_system_type *fs_type,
@@ -1345,13 +1346,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1345 return ret; 1346 return ret;
1346 } 1347 }
1347 1348
1348 lock_kernel();
1349 restart: 1349 restart:
1350 1350
1351 ret = -ENXIO; 1351 ret = -ENXIO;
1352 disk = get_gendisk(bdev->bd_dev, &partno); 1352 disk = get_gendisk(bdev->bd_dev, &partno);
1353 if (!disk) 1353 if (!disk)
1354 goto out_unlock_kernel; 1354 goto out;
1355 1355
1356 mutex_lock_nested(&bdev->bd_mutex, for_part); 1356 mutex_lock_nested(&bdev->bd_mutex, for_part);
1357 if (!bdev->bd_openers) { 1357 if (!bdev->bd_openers) {
@@ -1431,7 +1431,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1431 if (for_part) 1431 if (for_part)
1432 bdev->bd_part_count++; 1432 bdev->bd_part_count++;
1433 mutex_unlock(&bdev->bd_mutex); 1433 mutex_unlock(&bdev->bd_mutex);
1434 unlock_kernel();
1435 return 0; 1434 return 0;
1436 1435
1437 out_clear: 1436 out_clear:
@@ -1444,9 +1443,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1444 bdev->bd_contains = NULL; 1443 bdev->bd_contains = NULL;
1445 out_unlock_bdev: 1444 out_unlock_bdev:
1446 mutex_unlock(&bdev->bd_mutex); 1445 mutex_unlock(&bdev->bd_mutex);
1447 out_unlock_kernel: 1446 out:
1448 unlock_kernel();
1449
1450 if (disk) 1447 if (disk)
1451 module_put(disk->fops->owner); 1448 module_put(disk->fops->owner);
1452 put_disk(disk); 1449 put_disk(disk);
@@ -1515,7 +1512,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1515 struct block_device *victim = NULL; 1512 struct block_device *victim = NULL;
1516 1513
1517 mutex_lock_nested(&bdev->bd_mutex, for_part); 1514 mutex_lock_nested(&bdev->bd_mutex, for_part);
1518 lock_kernel();
1519 if (for_part) 1515 if (for_part)
1520 bdev->bd_part_count--; 1516 bdev->bd_part_count--;
1521 1517
@@ -1540,7 +1536,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1540 victim = bdev->bd_contains; 1536 victim = bdev->bd_contains;
1541 bdev->bd_contains = NULL; 1537 bdev->bd_contains = NULL;
1542 } 1538 }
1543 unlock_kernel();
1544 mutex_unlock(&bdev->bd_mutex); 1539 mutex_unlock(&bdev->bd_mutex);
1545 bdput(bdev); 1540 bdput(bdev);
1546 if (victim) 1541 if (victim)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 29c20092847e..eaf286abad17 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2389,13 +2389,13 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
2389 pgoff_t offset, pgoff_t last_index); 2389 pgoff_t offset, pgoff_t last_index);
2390int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 2390int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2391int btrfs_readpage(struct file *file, struct page *page); 2391int btrfs_readpage(struct file *file, struct page *page);
2392void btrfs_delete_inode(struct inode *inode); 2392void btrfs_evict_inode(struct inode *inode);
2393void btrfs_put_inode(struct inode *inode); 2393void btrfs_put_inode(struct inode *inode);
2394int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); 2394int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
2395void btrfs_dirty_inode(struct inode *inode); 2395void btrfs_dirty_inode(struct inode *inode);
2396struct inode *btrfs_alloc_inode(struct super_block *sb); 2396struct inode *btrfs_alloc_inode(struct super_block *sb);
2397void btrfs_destroy_inode(struct inode *inode); 2397void btrfs_destroy_inode(struct inode *inode);
2398void btrfs_drop_inode(struct inode *inode); 2398int btrfs_drop_inode(struct inode *inode);
2399int btrfs_init_cachep(void); 2399int btrfs_init_cachep(void);
2400void btrfs_destroy_cachep(void); 2400void btrfs_destroy_cachep(void);
2401long btrfs_ioctl_trans_end(struct file *file); 2401long btrfs_ioctl_trans_end(struct file *file);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 34f7c375567e..64f10082f048 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -480,7 +480,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
480 end_io_wq->work.func = end_workqueue_fn; 480 end_io_wq->work.func = end_workqueue_fn;
481 end_io_wq->work.flags = 0; 481 end_io_wq->work.flags = 0;
482 482
483 if (bio->bi_rw & (1 << BIO_RW)) { 483 if (bio->bi_rw & REQ_WRITE) {
484 if (end_io_wq->metadata) 484 if (end_io_wq->metadata)
485 btrfs_queue_worker(&fs_info->endio_meta_write_workers, 485 btrfs_queue_worker(&fs_info->endio_meta_write_workers,
486 &end_io_wq->work); 486 &end_io_wq->work);
@@ -604,7 +604,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
604 604
605 atomic_inc(&fs_info->nr_async_submits); 605 atomic_inc(&fs_info->nr_async_submits);
606 606
607 if (rw & (1 << BIO_RW_SYNCIO)) 607 if (rw & REQ_SYNC)
608 btrfs_set_work_high_prio(&async->work); 608 btrfs_set_work_high_prio(&async->work);
609 609
610 btrfs_queue_worker(&fs_info->workers, &async->work); 610 btrfs_queue_worker(&fs_info->workers, &async->work);
@@ -668,7 +668,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
668 bio, 1); 668 bio, 1);
669 BUG_ON(ret); 669 BUG_ON(ret);
670 670
671 if (!(rw & (1 << BIO_RW))) { 671 if (!(rw & REQ_WRITE)) {
672 /* 672 /*
673 * called for a read, do the setup so that checksum validation 673 * called for a read, do the setup so that checksum validation
674 * can happen in the async kernel threads 674 * can happen in the async kernel threads
@@ -1427,7 +1427,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
1427 * ram and up to date before trying to verify things. For 1427 * ram and up to date before trying to verify things. For
1428 * blocksize <= pagesize, it is basically a noop 1428 * blocksize <= pagesize, it is basically a noop
1429 */ 1429 */
1430 if (!(bio->bi_rw & (1 << BIO_RW)) && end_io_wq->metadata && 1430 if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata &&
1431 !bio_ready_for_csum(bio)) { 1431 !bio_ready_for_csum(bio)) {
1432 btrfs_queue_worker(&fs_info->endio_meta_workers, 1432 btrfs_queue_worker(&fs_info->endio_meta_workers,
1433 &end_io_wq->work); 1433 &end_io_wq->work);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1bff92ad4744..c03864406af3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1429,7 +1429,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1429 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1429 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
1430 BUG_ON(ret); 1430 BUG_ON(ret);
1431 1431
1432 if (!(rw & (1 << BIO_RW))) { 1432 if (!(rw & REQ_WRITE)) {
1433 if (bio_flags & EXTENT_BIO_COMPRESSED) { 1433 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1434 return btrfs_submit_compressed_read(inode, bio, 1434 return btrfs_submit_compressed_read(inode, bio,
1435 mirror_num, bio_flags); 1435 mirror_num, bio_flags);
@@ -1841,7 +1841,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1841 bio->bi_size = 0; 1841 bio->bi_size = 0;
1842 1842
1843 bio_add_page(bio, page, failrec->len, start - page_offset(page)); 1843 bio_add_page(bio, page, failrec->len, start - page_offset(page));
1844 if (failed_bio->bi_rw & (1 << BIO_RW)) 1844 if (failed_bio->bi_rw & REQ_WRITE)
1845 rw = WRITE; 1845 rw = WRITE;
1846 else 1846 else
1847 rw = READ; 1847 rw = READ;
@@ -2938,7 +2938,6 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2938 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 2938 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
2939 ret = btrfs_update_inode(trans, root, dir); 2939 ret = btrfs_update_inode(trans, root, dir);
2940 BUG_ON(ret); 2940 BUG_ON(ret);
2941 dir->i_sb->s_dirt = 1;
2942 2941
2943 btrfs_free_path(path); 2942 btrfs_free_path(path);
2944 return 0; 2943 return 0;
@@ -3656,17 +3655,19 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3656 if (err) 3655 if (err)
3657 return err; 3656 return err;
3658 } 3657 }
3659 attr->ia_valid &= ~ATTR_SIZE;
3660 3658
3661 if (attr->ia_valid) 3659 if (attr->ia_valid) {
3662 err = inode_setattr(inode, attr); 3660 setattr_copy(inode, attr);
3661 mark_inode_dirty(inode);
3662
3663 if (attr->ia_valid & ATTR_MODE)
3664 err = btrfs_acl_chmod(inode);
3665 }
3663 3666
3664 if (!err && ((attr->ia_valid & ATTR_MODE)))
3665 err = btrfs_acl_chmod(inode);
3666 return err; 3667 return err;
3667} 3668}
3668 3669
3669void btrfs_delete_inode(struct inode *inode) 3670void btrfs_evict_inode(struct inode *inode)
3670{ 3671{
3671 struct btrfs_trans_handle *trans; 3672 struct btrfs_trans_handle *trans;
3672 struct btrfs_root *root = BTRFS_I(inode)->root; 3673 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3674,10 +3675,14 @@ void btrfs_delete_inode(struct inode *inode)
3674 int ret; 3675 int ret;
3675 3676
3676 truncate_inode_pages(&inode->i_data, 0); 3677 truncate_inode_pages(&inode->i_data, 0);
3678 if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
3679 goto no_delete;
3680
3677 if (is_bad_inode(inode)) { 3681 if (is_bad_inode(inode)) {
3678 btrfs_orphan_del(NULL, inode); 3682 btrfs_orphan_del(NULL, inode);
3679 goto no_delete; 3683 goto no_delete;
3680 } 3684 }
3685 /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
3681 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3686 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3682 3687
3683 if (root->fs_info->log_root_recovering) { 3688 if (root->fs_info->log_root_recovering) {
@@ -3727,7 +3732,7 @@ void btrfs_delete_inode(struct inode *inode)
3727 btrfs_end_transaction(trans, root); 3732 btrfs_end_transaction(trans, root);
3728 btrfs_btree_balance_dirty(root, nr); 3733 btrfs_btree_balance_dirty(root, nr);
3729no_delete: 3734no_delete:
3730 clear_inode(inode); 3735 end_writeback(inode);
3731 return; 3736 return;
3732} 3737}
3733 3738
@@ -3858,7 +3863,7 @@ again:
3858 p = &parent->rb_right; 3863 p = &parent->rb_right;
3859 else { 3864 else {
3860 WARN_ON(!(entry->vfs_inode.i_state & 3865 WARN_ON(!(entry->vfs_inode.i_state &
3861 (I_WILL_FREE | I_FREEING | I_CLEAR))); 3866 (I_WILL_FREE | I_FREEING)));
3862 rb_erase(parent, &root->inode_tree); 3867 rb_erase(parent, &root->inode_tree);
3863 RB_CLEAR_NODE(parent); 3868 RB_CLEAR_NODE(parent);
3864 spin_unlock(&root->inode_lock); 3869 spin_unlock(&root->inode_lock);
@@ -3937,7 +3942,7 @@ again:
3937 if (atomic_read(&inode->i_count) > 1) 3942 if (atomic_read(&inode->i_count) > 1)
3938 d_prune_aliases(inode); 3943 d_prune_aliases(inode);
3939 /* 3944 /*
3940 * btrfs_drop_inode will remove it from 3945 * btrfs_drop_inode will have it removed from
3941 * the inode cache when its usage count 3946 * the inode cache when its usage count
3942 * hits zero. 3947 * hits zero.
3943 */ 3948 */
@@ -5642,7 +5647,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5642 struct bio_vec *bvec = bio->bi_io_vec; 5647 struct bio_vec *bvec = bio->bi_io_vec;
5643 u64 start; 5648 u64 start;
5644 int skip_sum; 5649 int skip_sum;
5645 int write = rw & (1 << BIO_RW); 5650 int write = rw & REQ_WRITE;
5646 int ret = 0; 5651 int ret = 0;
5647 5652
5648 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 5653 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
@@ -6331,13 +6336,14 @@ free:
6331 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 6336 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
6332} 6337}
6333 6338
6334void btrfs_drop_inode(struct inode *inode) 6339int btrfs_drop_inode(struct inode *inode)
6335{ 6340{
6336 struct btrfs_root *root = BTRFS_I(inode)->root; 6341 struct btrfs_root *root = BTRFS_I(inode)->root;
6337 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) 6342
6338 generic_delete_inode(inode); 6343 if (btrfs_root_refs(&root->root_item) == 0)
6344 return 1;
6339 else 6345 else
6340 generic_drop_inode(inode); 6346 return generic_drop_inode(inode);
6341} 6347}
6342 6348
6343static void init_once(void *foo) 6349static void init_once(void *foo)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f2393b390318..1776dbd8dc98 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -797,7 +797,7 @@ static int btrfs_unfreeze(struct super_block *sb)
797 797
798static const struct super_operations btrfs_super_ops = { 798static const struct super_operations btrfs_super_ops = {
799 .drop_inode = btrfs_drop_inode, 799 .drop_inode = btrfs_drop_inode,
800 .delete_inode = btrfs_delete_inode, 800 .evict_inode = btrfs_evict_inode,
801 .put_super = btrfs_put_super, 801 .put_super = btrfs_put_super,
802 .sync_fs = btrfs_sync_fs, 802 .sync_fs = btrfs_sync_fs,
803 .show_options = btrfs_show_options, 803 .show_options = btrfs_show_options,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d6e3af8be95b..dd318ff280b2 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -258,7 +258,7 @@ loop_lock:
258 258
259 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 259 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
260 260
261 if (bio_rw_flagged(cur, BIO_RW_SYNCIO)) 261 if (cur->bi_rw & REQ_SYNC)
262 num_sync_run++; 262 num_sync_run++;
263 263
264 submit_bio(cur->bi_rw, cur); 264 submit_bio(cur->bi_rw, cur);
@@ -2651,7 +2651,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
2651 int max_errors = 0; 2651 int max_errors = 0;
2652 struct btrfs_multi_bio *multi = NULL; 2652 struct btrfs_multi_bio *multi = NULL;
2653 2653
2654 if (multi_ret && !(rw & (1 << BIO_RW))) 2654 if (multi_ret && !(rw & REQ_WRITE))
2655 stripes_allocated = 1; 2655 stripes_allocated = 1;
2656again: 2656again:
2657 if (multi_ret) { 2657 if (multi_ret) {
@@ -2687,7 +2687,7 @@ again:
2687 mirror_num = 0; 2687 mirror_num = 0;
2688 2688
2689 /* if our multi bio struct is too small, back off and try again */ 2689 /* if our multi bio struct is too small, back off and try again */
2690 if (rw & (1 << BIO_RW)) { 2690 if (rw & REQ_WRITE) {
2691 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | 2691 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
2692 BTRFS_BLOCK_GROUP_DUP)) { 2692 BTRFS_BLOCK_GROUP_DUP)) {
2693 stripes_required = map->num_stripes; 2693 stripes_required = map->num_stripes;
@@ -2697,7 +2697,7 @@ again:
2697 max_errors = 1; 2697 max_errors = 1;
2698 } 2698 }
2699 } 2699 }
2700 if (multi_ret && (rw & (1 << BIO_RW)) && 2700 if (multi_ret && (rw & REQ_WRITE) &&
2701 stripes_allocated < stripes_required) { 2701 stripes_allocated < stripes_required) {
2702 stripes_allocated = map->num_stripes; 2702 stripes_allocated = map->num_stripes;
2703 free_extent_map(em); 2703 free_extent_map(em);
@@ -2733,7 +2733,7 @@ again:
2733 num_stripes = 1; 2733 num_stripes = 1;
2734 stripe_index = 0; 2734 stripe_index = 0;
2735 if (map->type & BTRFS_BLOCK_GROUP_RAID1) { 2735 if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
2736 if (unplug_page || (rw & (1 << BIO_RW))) 2736 if (unplug_page || (rw & REQ_WRITE))
2737 num_stripes = map->num_stripes; 2737 num_stripes = map->num_stripes;
2738 else if (mirror_num) 2738 else if (mirror_num)
2739 stripe_index = mirror_num - 1; 2739 stripe_index = mirror_num - 1;
@@ -2744,7 +2744,7 @@ again:
2744 } 2744 }
2745 2745
2746 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { 2746 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
2747 if (rw & (1 << BIO_RW)) 2747 if (rw & REQ_WRITE)
2748 num_stripes = map->num_stripes; 2748 num_stripes = map->num_stripes;
2749 else if (mirror_num) 2749 else if (mirror_num)
2750 stripe_index = mirror_num - 1; 2750 stripe_index = mirror_num - 1;
@@ -2755,7 +2755,7 @@ again:
2755 stripe_index = do_div(stripe_nr, factor); 2755 stripe_index = do_div(stripe_nr, factor);
2756 stripe_index *= map->sub_stripes; 2756 stripe_index *= map->sub_stripes;
2757 2757
2758 if (unplug_page || (rw & (1 << BIO_RW))) 2758 if (unplug_page || (rw & REQ_WRITE))
2759 num_stripes = map->sub_stripes; 2759 num_stripes = map->sub_stripes;
2760 else if (mirror_num) 2760 else if (mirror_num)
2761 stripe_index += mirror_num - 1; 2761 stripe_index += mirror_num - 1;
@@ -2945,7 +2945,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
2945 struct btrfs_pending_bios *pending_bios; 2945 struct btrfs_pending_bios *pending_bios;
2946 2946
2947 /* don't bother with additional async steps for reads, right now */ 2947 /* don't bother with additional async steps for reads, right now */
2948 if (!(rw & (1 << BIO_RW))) { 2948 if (!(rw & REQ_WRITE)) {
2949 bio_get(bio); 2949 bio_get(bio);
2950 submit_bio(rw, bio); 2950 submit_bio(rw, bio);
2951 bio_put(bio); 2951 bio_put(bio);
@@ -2964,7 +2964,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
2964 bio->bi_rw |= rw; 2964 bio->bi_rw |= rw;
2965 2965
2966 spin_lock(&device->io_lock); 2966 spin_lock(&device->io_lock);
2967 if (bio_rw_flagged(bio, BIO_RW_SYNCIO)) 2967 if (bio->bi_rw & REQ_SYNC)
2968 pending_bios = &device->pending_sync_bios; 2968 pending_bios = &device->pending_sync_bios;
2969 else 2969 else
2970 pending_bios = &device->pending_bios; 2970 pending_bios = &device->pending_bios;
diff --git a/fs/buffer.c b/fs/buffer.c
index d54812b198e9..50efa339e051 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1833,9 +1833,10 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1833} 1833}
1834EXPORT_SYMBOL(page_zero_new_buffers); 1834EXPORT_SYMBOL(page_zero_new_buffers);
1835 1835
1836static int __block_prepare_write(struct inode *inode, struct page *page, 1836int block_prepare_write(struct page *page, unsigned from, unsigned to,
1837 unsigned from, unsigned to, get_block_t *get_block) 1837 get_block_t *get_block)
1838{ 1838{
1839 struct inode *inode = page->mapping->host;
1839 unsigned block_start, block_end; 1840 unsigned block_start, block_end;
1840 sector_t block; 1841 sector_t block;
1841 int err = 0; 1842 int err = 0;
@@ -1908,10 +1909,13 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1908 if (!buffer_uptodate(*wait_bh)) 1909 if (!buffer_uptodate(*wait_bh))
1909 err = -EIO; 1910 err = -EIO;
1910 } 1911 }
1911 if (unlikely(err)) 1912 if (unlikely(err)) {
1912 page_zero_new_buffers(page, from, to); 1913 page_zero_new_buffers(page, from, to);
1914 ClearPageUptodate(page);
1915 }
1913 return err; 1916 return err;
1914} 1917}
1918EXPORT_SYMBOL(block_prepare_write);
1915 1919
1916static int __block_commit_write(struct inode *inode, struct page *page, 1920static int __block_commit_write(struct inode *inode, struct page *page,
1917 unsigned from, unsigned to) 1921 unsigned from, unsigned to)
@@ -1948,90 +1952,41 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1948 return 0; 1952 return 0;
1949} 1953}
1950 1954
1951/* 1955int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1952 * Filesystems implementing the new truncate sequence should use the 1956 get_block_t *get_block)
1953 * _newtrunc postfix variant which won't incorrectly call vmtruncate.
1954 * The filesystem needs to handle block truncation upon failure.
1955 */
1956int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
1957 loff_t pos, unsigned len, unsigned flags,
1958 struct page **pagep, void **fsdata,
1959 get_block_t *get_block)
1960{ 1957{
1961 struct inode *inode = mapping->host; 1958 unsigned start = pos & (PAGE_CACHE_SIZE - 1);
1962 int status = 0;
1963 struct page *page;
1964 pgoff_t index;
1965 unsigned start, end;
1966 int ownpage = 0;
1967
1968 index = pos >> PAGE_CACHE_SHIFT;
1969 start = pos & (PAGE_CACHE_SIZE - 1);
1970 end = start + len;
1971
1972 page = *pagep;
1973 if (page == NULL) {
1974 ownpage = 1;
1975 page = grab_cache_page_write_begin(mapping, index, flags);
1976 if (!page) {
1977 status = -ENOMEM;
1978 goto out;
1979 }
1980 *pagep = page;
1981 } else
1982 BUG_ON(!PageLocked(page));
1983
1984 status = __block_prepare_write(inode, page, start, end, get_block);
1985 if (unlikely(status)) {
1986 ClearPageUptodate(page);
1987 1959
1988 if (ownpage) { 1960 return block_prepare_write(page, start, start + len, get_block);
1989 unlock_page(page);
1990 page_cache_release(page);
1991 *pagep = NULL;
1992 }
1993 }
1994
1995out:
1996 return status;
1997} 1961}
1998EXPORT_SYMBOL(block_write_begin_newtrunc); 1962EXPORT_SYMBOL(__block_write_begin);
1999 1963
2000/* 1964/*
2001 * block_write_begin takes care of the basic task of block allocation and 1965 * block_write_begin takes care of the basic task of block allocation and
2002 * bringing partial write blocks uptodate first. 1966 * bringing partial write blocks uptodate first.
2003 * 1967 *
2004 * If *pagep is not NULL, then block_write_begin uses the locked page 1968 * The filesystem needs to handle block truncation upon failure.
2005 * at *pagep rather than allocating its own. In this case, the page will
2006 * not be unlocked or deallocated on failure.
2007 */ 1969 */
2008int block_write_begin(struct file *file, struct address_space *mapping, 1970int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2009 loff_t pos, unsigned len, unsigned flags, 1971 unsigned flags, struct page **pagep, get_block_t *get_block)
2010 struct page **pagep, void **fsdata,
2011 get_block_t *get_block)
2012{ 1972{
2013 int ret; 1973 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1974 struct page *page;
1975 int status;
2014 1976
2015 ret = block_write_begin_newtrunc(file, mapping, pos, len, flags, 1977 page = grab_cache_page_write_begin(mapping, index, flags);
2016 pagep, fsdata, get_block); 1978 if (!page)
1979 return -ENOMEM;
2017 1980
2018 /* 1981 status = __block_write_begin(page, pos, len, get_block);
2019 * prepare_write() may have instantiated a few blocks 1982 if (unlikely(status)) {
2020 * outside i_size. Trim these off again. Don't need 1983 unlock_page(page);
2021 * i_size_read because we hold i_mutex. 1984 page_cache_release(page);
2022 * 1985 page = NULL;
2023 * Filesystems which pass down their own page also cannot
2024 * call into vmtruncate here because it would lead to lock
2025 * inversion problems (*pagep is locked). This is a further
2026 * example of where the old truncate sequence is inadequate.
2027 */
2028 if (unlikely(ret) && *pagep == NULL) {
2029 loff_t isize = mapping->host->i_size;
2030 if (pos + len > isize)
2031 vmtruncate(mapping->host, isize);
2032 } 1986 }
2033 1987
2034 return ret; 1988 *pagep = page;
1989 return status;
2035} 1990}
2036EXPORT_SYMBOL(block_write_begin); 1991EXPORT_SYMBOL(block_write_begin);
2037 1992
@@ -2351,7 +2306,7 @@ out:
2351 * For moronic filesystems that do not allow holes in file. 2306 * For moronic filesystems that do not allow holes in file.
2352 * We may have to extend the file. 2307 * We may have to extend the file.
2353 */ 2308 */
2354int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, 2309int cont_write_begin(struct file *file, struct address_space *mapping,
2355 loff_t pos, unsigned len, unsigned flags, 2310 loff_t pos, unsigned len, unsigned flags,
2356 struct page **pagep, void **fsdata, 2311 struct page **pagep, void **fsdata,
2357 get_block_t *get_block, loff_t *bytes) 2312 get_block_t *get_block, loff_t *bytes)
@@ -2363,7 +2318,7 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2363 2318
2364 err = cont_expand_zero(file, mapping, pos, bytes); 2319 err = cont_expand_zero(file, mapping, pos, bytes);
2365 if (err) 2320 if (err)
2366 goto out; 2321 return err;
2367 2322
2368 zerofrom = *bytes & ~PAGE_CACHE_MASK; 2323 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2369 if (pos+len > *bytes && zerofrom & (blocksize-1)) { 2324 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
@@ -2371,44 +2326,10 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2371 (*bytes)++; 2326 (*bytes)++;
2372 } 2327 }
2373 2328
2374 *pagep = NULL; 2329 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2375 err = block_write_begin_newtrunc(file, mapping, pos, len,
2376 flags, pagep, fsdata, get_block);
2377out:
2378 return err;
2379}
2380EXPORT_SYMBOL(cont_write_begin_newtrunc);
2381
2382int cont_write_begin(struct file *file, struct address_space *mapping,
2383 loff_t pos, unsigned len, unsigned flags,
2384 struct page **pagep, void **fsdata,
2385 get_block_t *get_block, loff_t *bytes)
2386{
2387 int ret;
2388
2389 ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
2390 pagep, fsdata, get_block, bytes);
2391 if (unlikely(ret)) {
2392 loff_t isize = mapping->host->i_size;
2393 if (pos + len > isize)
2394 vmtruncate(mapping->host, isize);
2395 }
2396
2397 return ret;
2398} 2330}
2399EXPORT_SYMBOL(cont_write_begin); 2331EXPORT_SYMBOL(cont_write_begin);
2400 2332
2401int block_prepare_write(struct page *page, unsigned from, unsigned to,
2402 get_block_t *get_block)
2403{
2404 struct inode *inode = page->mapping->host;
2405 int err = __block_prepare_write(inode, page, from, to, get_block);
2406 if (err)
2407 ClearPageUptodate(page);
2408 return err;
2409}
2410EXPORT_SYMBOL(block_prepare_write);
2411
2412int block_commit_write(struct page *page, unsigned from, unsigned to) 2333int block_commit_write(struct page *page, unsigned from, unsigned to)
2413{ 2334{
2414 struct inode *inode = page->mapping->host; 2335 struct inode *inode = page->mapping->host;
@@ -2510,11 +2431,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2510} 2431}
2511 2432
2512/* 2433/*
2513 * Filesystems implementing the new truncate sequence should use the 2434 * On entry, the page is fully not uptodate.
2514 * _newtrunc postfix variant which won't incorrectly call vmtruncate. 2435 * On exit the page is fully uptodate in the areas outside (from,to)
2515 * The filesystem needs to handle block truncation upon failure. 2436 * The filesystem needs to handle block truncation upon failure.
2516 */ 2437 */
2517int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping, 2438int nobh_write_begin(struct address_space *mapping,
2518 loff_t pos, unsigned len, unsigned flags, 2439 loff_t pos, unsigned len, unsigned flags,
2519 struct page **pagep, void **fsdata, 2440 struct page **pagep, void **fsdata,
2520 get_block_t *get_block) 2441 get_block_t *get_block)
@@ -2547,8 +2468,8 @@ int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2547 unlock_page(page); 2468 unlock_page(page);
2548 page_cache_release(page); 2469 page_cache_release(page);
2549 *pagep = NULL; 2470 *pagep = NULL;
2550 return block_write_begin_newtrunc(file, mapping, pos, len, 2471 return block_write_begin(mapping, pos, len, flags, pagep,
2551 flags, pagep, fsdata, get_block); 2472 get_block);
2552 } 2473 }
2553 2474
2554 if (PageMappedToDisk(page)) 2475 if (PageMappedToDisk(page))
@@ -2654,35 +2575,6 @@ out_release:
2654 2575
2655 return ret; 2576 return ret;
2656} 2577}
2657EXPORT_SYMBOL(nobh_write_begin_newtrunc);
2658
2659/*
2660 * On entry, the page is fully not uptodate.
2661 * On exit the page is fully uptodate in the areas outside (from,to)
2662 */
2663int nobh_write_begin(struct file *file, struct address_space *mapping,
2664 loff_t pos, unsigned len, unsigned flags,
2665 struct page **pagep, void **fsdata,
2666 get_block_t *get_block)
2667{
2668 int ret;
2669
2670 ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
2671 pagep, fsdata, get_block);
2672
2673 /*
2674 * prepare_write() may have instantiated a few blocks
2675 * outside i_size. Trim these off again. Don't need
2676 * i_size_read because we hold i_mutex.
2677 */
2678 if (unlikely(ret)) {
2679 loff_t isize = mapping->host->i_size;
2680 if (pos + len > isize)
2681 vmtruncate(mapping->host, isize);
2682 }
2683
2684 return ret;
2685}
2686EXPORT_SYMBOL(nobh_write_begin); 2578EXPORT_SYMBOL(nobh_write_begin);
2687 2579
2688int nobh_write_end(struct file *file, struct address_space *mapping, 2580int nobh_write_end(struct file *file, struct address_space *mapping,
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 2906077ac798..a2603e7c0bb5 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -146,7 +146,7 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
146 goto error_unsupported; 146 goto error_unsupported;
147 147
148 /* get the cache size and blocksize */ 148 /* get the cache size and blocksize */
149 ret = vfs_statfs(root, &stats); 149 ret = vfs_statfs(&path, &stats);
150 if (ret < 0) 150 if (ret < 0)
151 goto error_unsupported; 151 goto error_unsupported;
152 152
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index c2413561ea75..24eb0d37241a 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -683,6 +683,10 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
683 unsigned fnr, unsigned bnr) 683 unsigned fnr, unsigned bnr)
684{ 684{
685 struct kstatfs stats; 685 struct kstatfs stats;
686 struct path path = {
687 .mnt = cache->mnt,
688 .dentry = cache->mnt->mnt_root,
689 };
686 int ret; 690 int ret;
687 691
688 //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u", 692 //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u",
@@ -697,7 +701,7 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
697 /* find out how many pages of blockdev are available */ 701 /* find out how many pages of blockdev are available */
698 memset(&stats, 0, sizeof(stats)); 702 memset(&stats, 0, sizeof(stats));
699 703
700 ret = vfs_statfs(cache->mnt->mnt_root, &stats); 704 ret = vfs_statfs(&path, &stats);
701 if (ret < 0) { 705 if (ret < 0) {
702 if (ret == -EIO) 706 if (ret == -EIO)
703 cachefiles_io_error(cache, "statfs failed"); 707 cachefiles_io_error(cache, "statfs failed");
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a5ed10c9afef..b7431afdd76d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -329,8 +329,10 @@ cifs_destroy_inode(struct inode *inode)
329} 329}
330 330
331static void 331static void
332cifs_clear_inode(struct inode *inode) 332cifs_evict_inode(struct inode *inode)
333{ 333{
334 truncate_inode_pages(&inode->i_data, 0);
335 end_writeback(inode);
334 cifs_fscache_release_inode_cookie(inode); 336 cifs_fscache_release_inode_cookie(inode);
335} 337}
336 338
@@ -479,14 +481,13 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
479 return 0; 481 return 0;
480} 482}
481 483
482void cifs_drop_inode(struct inode *inode) 484static int cifs_drop_inode(struct inode *inode)
483{ 485{
484 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 486 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
485 487
486 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) 488 /* no serverino => unconditional eviction */
487 return generic_drop_inode(inode); 489 return !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) ||
488 490 generic_drop_inode(inode);
489 return generic_delete_inode(inode);
490} 491}
491 492
492static const struct super_operations cifs_super_ops = { 493static const struct super_operations cifs_super_ops = {
@@ -495,7 +496,7 @@ static const struct super_operations cifs_super_ops = {
495 .alloc_inode = cifs_alloc_inode, 496 .alloc_inode = cifs_alloc_inode,
496 .destroy_inode = cifs_destroy_inode, 497 .destroy_inode = cifs_destroy_inode,
497 .drop_inode = cifs_drop_inode, 498 .drop_inode = cifs_drop_inode,
498 .clear_inode = cifs_clear_inode, 499 .evict_inode = cifs_evict_inode,
499/* .delete_inode = cifs_delete_inode, */ /* Do not need above 500/* .delete_inode = cifs_delete_inode, */ /* Do not need above
500 function unless later we add lazy close of inodes or unless the 501 function unless later we add lazy close of inodes or unless the
501 kernel forgets to call us with the same number of releases (closes) 502 kernel forgets to call us with the same number of releases (closes)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index dc4c47ab9588..4bc47e5b5f29 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1698,26 +1698,16 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
1698 return rc; 1698 return rc;
1699} 1699}
1700 1700
1701static int cifs_vmtruncate(struct inode *inode, loff_t offset) 1701static void cifs_setsize(struct inode *inode, loff_t offset)
1702{ 1702{
1703 loff_t oldsize; 1703 loff_t oldsize;
1704 int err;
1705 1704
1706 spin_lock(&inode->i_lock); 1705 spin_lock(&inode->i_lock);
1707 err = inode_newsize_ok(inode, offset);
1708 if (err) {
1709 spin_unlock(&inode->i_lock);
1710 goto out;
1711 }
1712
1713 oldsize = inode->i_size; 1706 oldsize = inode->i_size;
1714 i_size_write(inode, offset); 1707 i_size_write(inode, offset);
1715 spin_unlock(&inode->i_lock); 1708 spin_unlock(&inode->i_lock);
1709
1716 truncate_pagecache(inode, oldsize, offset); 1710 truncate_pagecache(inode, oldsize, offset);
1717 if (inode->i_op->truncate)
1718 inode->i_op->truncate(inode);
1719out:
1720 return err;
1721} 1711}
1722 1712
1723static int 1713static int
@@ -1790,7 +1780,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1790 1780
1791 if (rc == 0) { 1781 if (rc == 0) {
1792 cifsInode->server_eof = attrs->ia_size; 1782 cifsInode->server_eof = attrs->ia_size;
1793 rc = cifs_vmtruncate(inode, attrs->ia_size); 1783 cifs_setsize(inode, attrs->ia_size);
1794 cifs_truncate_page(inode->i_mapping, inode->i_size); 1784 cifs_truncate_page(inode->i_mapping, inode->i_size);
1795 } 1785 }
1796 1786
@@ -1815,14 +1805,12 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1815 1805
1816 xid = GetXid(); 1806 xid = GetXid();
1817 1807
1818 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { 1808 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
1819 /* check if we have permission to change attrs */ 1809 attrs->ia_valid |= ATTR_FORCE;
1820 rc = inode_change_ok(inode, attrs); 1810
1821 if (rc < 0) 1811 rc = inode_change_ok(inode, attrs);
1822 goto out; 1812 if (rc < 0)
1823 else 1813 goto out;
1824 rc = 0;
1825 }
1826 1814
1827 full_path = build_path_from_dentry(direntry); 1815 full_path = build_path_from_dentry(direntry);
1828 if (full_path == NULL) { 1816 if (full_path == NULL) {
@@ -1908,18 +1896,24 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1908 CIFS_MOUNT_MAP_SPECIAL_CHR); 1896 CIFS_MOUNT_MAP_SPECIAL_CHR);
1909 } 1897 }
1910 1898
1911 if (!rc) { 1899 if (rc)
1912 rc = inode_setattr(inode, attrs); 1900 goto out;
1913 1901
1914 /* force revalidate when any of these times are set since some 1902 if ((attrs->ia_valid & ATTR_SIZE) &&
1915 of the fs types (eg ext3, fat) do not have fine enough 1903 attrs->ia_size != i_size_read(inode))
1916 time granularity to match protocol, and we do not have a 1904 truncate_setsize(inode, attrs->ia_size);
1917 a way (yet) to query the server fs's time granularity (and 1905
1918 whether it rounds times down). 1906 setattr_copy(inode, attrs);
1919 */ 1907 mark_inode_dirty(inode);
1920 if (!rc && (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME))) 1908
1921 cifsInode->time = 0; 1909 /* force revalidate when any of these times are set since some
1922 } 1910 of the fs types (eg ext3, fat) do not have fine enough
1911 time granularity to match protocol, and we do not have a
1912 a way (yet) to query the server fs's time granularity (and
1913 whether it rounds times down).
1914 */
1915 if (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME))
1916 cifsInode->time = 0;
1923out: 1917out:
1924 kfree(args); 1918 kfree(args);
1925 kfree(full_path); 1919 kfree(full_path);
@@ -1944,14 +1938,13 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
1944 cFYI(1, "setattr on file %s attrs->iavalid 0x%x", 1938 cFYI(1, "setattr on file %s attrs->iavalid 0x%x",
1945 direntry->d_name.name, attrs->ia_valid); 1939 direntry->d_name.name, attrs->ia_valid);
1946 1940
1947 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { 1941 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
1948 /* check if we have permission to change attrs */ 1942 attrs->ia_valid |= ATTR_FORCE;
1949 rc = inode_change_ok(inode, attrs); 1943
1950 if (rc < 0) { 1944 rc = inode_change_ok(inode, attrs);
1951 FreeXid(xid); 1945 if (rc < 0) {
1952 return rc; 1946 FreeXid(xid);
1953 } else 1947 return rc;
1954 rc = 0;
1955 } 1948 }
1956 1949
1957 full_path = build_path_from_dentry(direntry); 1950 full_path = build_path_from_dentry(direntry);
@@ -2059,8 +2052,17 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2059 2052
2060 /* do not need local check to inode_check_ok since the server does 2053 /* do not need local check to inode_check_ok since the server does
2061 that */ 2054 that */
2062 if (!rc) 2055 if (rc)
2063 rc = inode_setattr(inode, attrs); 2056 goto cifs_setattr_exit;
2057
2058 if ((attrs->ia_valid & ATTR_SIZE) &&
2059 attrs->ia_size != i_size_read(inode))
2060 truncate_setsize(inode, attrs->ia_size);
2061
2062 setattr_copy(inode, attrs);
2063 mark_inode_dirty(inode);
2064 return 0;
2065
2064cifs_setattr_exit: 2066cifs_setattr_exit:
2065 kfree(full_path); 2067 kfree(full_path);
2066 FreeXid(xid); 2068 FreeXid(xid);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index d97f9935a028..6526e6f21ecf 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -35,7 +35,7 @@
35#include "coda_int.h" 35#include "coda_int.h"
36 36
37/* VFS super_block ops */ 37/* VFS super_block ops */
38static void coda_clear_inode(struct inode *); 38static void coda_evict_inode(struct inode *);
39static void coda_put_super(struct super_block *); 39static void coda_put_super(struct super_block *);
40static int coda_statfs(struct dentry *dentry, struct kstatfs *buf); 40static int coda_statfs(struct dentry *dentry, struct kstatfs *buf);
41 41
@@ -93,7 +93,7 @@ static const struct super_operations coda_super_operations =
93{ 93{
94 .alloc_inode = coda_alloc_inode, 94 .alloc_inode = coda_alloc_inode,
95 .destroy_inode = coda_destroy_inode, 95 .destroy_inode = coda_destroy_inode,
96 .clear_inode = coda_clear_inode, 96 .evict_inode = coda_evict_inode,
97 .put_super = coda_put_super, 97 .put_super = coda_put_super,
98 .statfs = coda_statfs, 98 .statfs = coda_statfs,
99 .remount_fs = coda_remount, 99 .remount_fs = coda_remount,
@@ -224,8 +224,10 @@ static void coda_put_super(struct super_block *sb)
224 printk("Coda: Bye bye.\n"); 224 printk("Coda: Bye bye.\n");
225} 225}
226 226
227static void coda_clear_inode(struct inode *inode) 227static void coda_evict_inode(struct inode *inode)
228{ 228{
229 truncate_inode_pages(&inode->i_data, 0);
230 end_writeback(inode);
229 coda_cache_clear_inode(inode); 231 coda_cache_clear_inode(inode);
230} 232}
231 233
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 66b9cf79c5ba..de89645777c7 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -177,7 +177,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
177 nbytes = req->uc_outSize; /* don't have more space! */ 177 nbytes = req->uc_outSize; /* don't have more space! */
178 } 178 }
179 if (copy_from_user(req->uc_data, buf, nbytes)) { 179 if (copy_from_user(req->uc_data, buf, nbytes)) {
180 req->uc_flags |= REQ_ABORT; 180 req->uc_flags |= CODA_REQ_ABORT;
181 wake_up(&req->uc_sleep); 181 wake_up(&req->uc_sleep);
182 retval = -EFAULT; 182 retval = -EFAULT;
183 goto out; 183 goto out;
@@ -254,8 +254,8 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
254 retval = -EFAULT; 254 retval = -EFAULT;
255 255
256 /* If request was not a signal, enqueue and don't free */ 256 /* If request was not a signal, enqueue and don't free */
257 if (!(req->uc_flags & REQ_ASYNC)) { 257 if (!(req->uc_flags & CODA_REQ_ASYNC)) {
258 req->uc_flags |= REQ_READ; 258 req->uc_flags |= CODA_REQ_READ;
259 list_add_tail(&(req->uc_chain), &vcp->vc_processing); 259 list_add_tail(&(req->uc_chain), &vcp->vc_processing);
260 goto out; 260 goto out;
261 } 261 }
@@ -315,19 +315,19 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
315 list_del(&req->uc_chain); 315 list_del(&req->uc_chain);
316 316
317 /* Async requests need to be freed here */ 317 /* Async requests need to be freed here */
318 if (req->uc_flags & REQ_ASYNC) { 318 if (req->uc_flags & CODA_REQ_ASYNC) {
319 CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr)); 319 CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr));
320 kfree(req); 320 kfree(req);
321 continue; 321 continue;
322 } 322 }
323 req->uc_flags |= REQ_ABORT; 323 req->uc_flags |= CODA_REQ_ABORT;
324 wake_up(&req->uc_sleep); 324 wake_up(&req->uc_sleep);
325 } 325 }
326 326
327 list_for_each_entry_safe(req, tmp, &vcp->vc_processing, uc_chain) { 327 list_for_each_entry_safe(req, tmp, &vcp->vc_processing, uc_chain) {
328 list_del(&req->uc_chain); 328 list_del(&req->uc_chain);
329 329
330 req->uc_flags |= REQ_ABORT; 330 req->uc_flags |= CODA_REQ_ABORT;
331 wake_up(&req->uc_sleep); 331 wake_up(&req->uc_sleep);
332 } 332 }
333 333
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index f09c5ed76f6c..b8893ab6f9e6 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -604,7 +604,7 @@ static void coda_unblock_signals(sigset_t *old)
604 (((r)->uc_opcode != CODA_CLOSE && \ 604 (((r)->uc_opcode != CODA_CLOSE && \
605 (r)->uc_opcode != CODA_STORE && \ 605 (r)->uc_opcode != CODA_STORE && \
606 (r)->uc_opcode != CODA_RELEASE) || \ 606 (r)->uc_opcode != CODA_RELEASE) || \
607 (r)->uc_flags & REQ_READ)) 607 (r)->uc_flags & CODA_REQ_READ))
608 608
609static inline void coda_waitfor_upcall(struct upc_req *req) 609static inline void coda_waitfor_upcall(struct upc_req *req)
610{ 610{
@@ -624,7 +624,7 @@ static inline void coda_waitfor_upcall(struct upc_req *req)
624 set_current_state(TASK_UNINTERRUPTIBLE); 624 set_current_state(TASK_UNINTERRUPTIBLE);
625 625
626 /* got a reply */ 626 /* got a reply */
627 if (req->uc_flags & (REQ_WRITE | REQ_ABORT)) 627 if (req->uc_flags & (CODA_REQ_WRITE | CODA_REQ_ABORT))
628 break; 628 break;
629 629
630 if (blocked && time_after(jiffies, timeout) && 630 if (blocked && time_after(jiffies, timeout) &&
@@ -708,7 +708,7 @@ static int coda_upcall(struct venus_comm *vcp,
708 coda_waitfor_upcall(req); 708 coda_waitfor_upcall(req);
709 709
710 /* Op went through, interrupt or not... */ 710 /* Op went through, interrupt or not... */
711 if (req->uc_flags & REQ_WRITE) { 711 if (req->uc_flags & CODA_REQ_WRITE) {
712 out = (union outputArgs *)req->uc_data; 712 out = (union outputArgs *)req->uc_data;
713 /* here we map positive Venus errors to kernel errors */ 713 /* here we map positive Venus errors to kernel errors */
714 error = -out->oh.result; 714 error = -out->oh.result;
@@ -717,13 +717,13 @@ static int coda_upcall(struct venus_comm *vcp,
717 } 717 }
718 718
719 error = -EINTR; 719 error = -EINTR;
720 if ((req->uc_flags & REQ_ABORT) || !signal_pending(current)) { 720 if ((req->uc_flags & CODA_REQ_ABORT) || !signal_pending(current)) {
721 printk(KERN_WARNING "coda: Unexpected interruption.\n"); 721 printk(KERN_WARNING "coda: Unexpected interruption.\n");
722 goto exit; 722 goto exit;
723 } 723 }
724 724
725 /* Interrupted before venus read it. */ 725 /* Interrupted before venus read it. */
726 if (!(req->uc_flags & REQ_READ)) 726 if (!(req->uc_flags & CODA_REQ_READ))
727 goto exit; 727 goto exit;
728 728
729 /* Venus saw the upcall, make sure we can send interrupt signal */ 729 /* Venus saw the upcall, make sure we can send interrupt signal */
@@ -747,7 +747,7 @@ static int coda_upcall(struct venus_comm *vcp,
747 sig_inputArgs->ih.opcode = CODA_SIGNAL; 747 sig_inputArgs->ih.opcode = CODA_SIGNAL;
748 sig_inputArgs->ih.unique = req->uc_unique; 748 sig_inputArgs->ih.unique = req->uc_unique;
749 749
750 sig_req->uc_flags = REQ_ASYNC; 750 sig_req->uc_flags = CODA_REQ_ASYNC;
751 sig_req->uc_opcode = sig_inputArgs->ih.opcode; 751 sig_req->uc_opcode = sig_inputArgs->ih.opcode;
752 sig_req->uc_unique = sig_inputArgs->ih.unique; 752 sig_req->uc_unique = sig_inputArgs->ih.unique;
753 sig_req->uc_inSize = sizeof(struct coda_in_hdr); 753 sig_req->uc_inSize = sizeof(struct coda_in_hdr);
diff --git a/fs/compat.c b/fs/compat.c
index 5976bad85f65..e6d5d70cf3cf 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -267,7 +267,7 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta
267 error = user_path(pathname, &path); 267 error = user_path(pathname, &path);
268 if (!error) { 268 if (!error) {
269 struct kstatfs tmp; 269 struct kstatfs tmp;
270 error = vfs_statfs(path.dentry, &tmp); 270 error = vfs_statfs(&path, &tmp);
271 if (!error) 271 if (!error)
272 error = put_compat_statfs(buf, &tmp); 272 error = put_compat_statfs(buf, &tmp);
273 path_put(&path); 273 path_put(&path);
@@ -285,7 +285,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user
285 file = fget(fd); 285 file = fget(fd);
286 if (!file) 286 if (!file)
287 goto out; 287 goto out;
288 error = vfs_statfs(file->f_path.dentry, &tmp); 288 error = vfs_statfs(&file->f_path, &tmp);
289 if (!error) 289 if (!error)
290 error = put_compat_statfs(buf, &tmp); 290 error = put_compat_statfs(buf, &tmp);
291 fput(file); 291 fput(file);
@@ -335,7 +335,7 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s
335 error = user_path(pathname, &path); 335 error = user_path(pathname, &path);
336 if (!error) { 336 if (!error) {
337 struct kstatfs tmp; 337 struct kstatfs tmp;
338 error = vfs_statfs(path.dentry, &tmp); 338 error = vfs_statfs(&path, &tmp);
339 if (!error) 339 if (!error)
340 error = put_compat_statfs64(buf, &tmp); 340 error = put_compat_statfs64(buf, &tmp);
341 path_put(&path); 341 path_put(&path);
@@ -356,7 +356,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
356 file = fget(fd); 356 file = fget(fd);
357 if (!file) 357 if (!file)
358 goto out; 358 goto out;
359 error = vfs_statfs(file->f_path.dentry, &tmp); 359 error = vfs_statfs(&file->f_path, &tmp);
360 if (!error) 360 if (!error)
361 error = put_compat_statfs64(buf, &tmp); 361 error = put_compat_statfs64(buf, &tmp);
362 fput(file); 362 fput(file);
@@ -379,7 +379,7 @@ asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u)
379 sb = user_get_super(new_decode_dev(dev)); 379 sb = user_get_super(new_decode_dev(dev));
380 if (!sb) 380 if (!sb)
381 return -EINVAL; 381 return -EINVAL;
382 err = vfs_statfs(sb->s_root, &sbuf); 382 err = statfs_by_dentry(sb->s_root, &sbuf);
383 drop_super(sb); 383 drop_super(sb);
384 if (err) 384 if (err)
385 return err; 385 return err;
@@ -1193,11 +1193,10 @@ out:
1193 if (iov != iovstack) 1193 if (iov != iovstack)
1194 kfree(iov); 1194 kfree(iov);
1195 if ((ret + (type == READ)) > 0) { 1195 if ((ret + (type == READ)) > 0) {
1196 struct dentry *dentry = file->f_path.dentry;
1197 if (type == READ) 1196 if (type == READ)
1198 fsnotify_access(dentry); 1197 fsnotify_access(file);
1199 else 1198 else
1200 fsnotify_modify(dentry); 1199 fsnotify_modify(file);
1201 } 1200 }
1202 return ret; 1201 return ret;
1203} 1202}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 63ae85831464..70227e0dc01d 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -131,23 +131,6 @@ static int w_long(unsigned int fd, unsigned int cmd,
131 return err; 131 return err;
132} 132}
133 133
134static int rw_long(unsigned int fd, unsigned int cmd,
135 compat_ulong_t __user *argp)
136{
137 mm_segment_t old_fs = get_fs();
138 int err;
139 unsigned long val;
140
141 if(get_user(val, argp))
142 return -EFAULT;
143 set_fs (KERNEL_DS);
144 err = sys_ioctl(fd, cmd, (unsigned long)&val);
145 set_fs (old_fs);
146 if (!err && put_user(val, argp))
147 return -EFAULT;
148 return err;
149}
150
151struct compat_video_event { 134struct compat_video_event {
152 int32_t type; 135 int32_t type;
153 compat_time_t timestamp; 136 compat_time_t timestamp;
@@ -594,12 +577,6 @@ static int do_smb_getmountuid(unsigned int fd, unsigned int cmd,
594 return err; 577 return err;
595} 578}
596 579
597static int ioc_settimeout(unsigned int fd, unsigned int cmd,
598 compat_ulong_t __user *argp)
599{
600 return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, argp);
601}
602
603/* Bluetooth ioctls */ 580/* Bluetooth ioctls */
604#define HCIUARTSETPROTO _IOW('U', 200, int) 581#define HCIUARTSETPROTO _IOW('U', 200, int)
605#define HCIUARTGETPROTO _IOR('U', 201, int) 582#define HCIUARTGETPROTO _IOR('U', 201, int)
@@ -969,6 +946,7 @@ COMPATIBLE_IOCTL(TIOCGPGRP)
969COMPATIBLE_IOCTL(TIOCGPTN) 946COMPATIBLE_IOCTL(TIOCGPTN)
970COMPATIBLE_IOCTL(TIOCSPTLCK) 947COMPATIBLE_IOCTL(TIOCSPTLCK)
971COMPATIBLE_IOCTL(TIOCSERGETLSR) 948COMPATIBLE_IOCTL(TIOCSERGETLSR)
949COMPATIBLE_IOCTL(TIOCSIG)
972#ifdef TCGETS2 950#ifdef TCGETS2
973COMPATIBLE_IOCTL(TCGETS2) 951COMPATIBLE_IOCTL(TCGETS2)
974COMPATIBLE_IOCTL(TCSETS2) 952COMPATIBLE_IOCTL(TCSETS2)
@@ -1284,13 +1262,6 @@ COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5)
1284COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS) 1262COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS)
1285COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS) 1263COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS)
1286COMPATIBLE_IOCTL(OSS_GETVERSION) 1264COMPATIBLE_IOCTL(OSS_GETVERSION)
1287/* AUTOFS */
1288COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC)
1289COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
1290COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
1291COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
1292COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER)
1293COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT)
1294/* Raw devices */ 1265/* Raw devices */
1295COMPATIBLE_IOCTL(RAW_SETBIND) 1266COMPATIBLE_IOCTL(RAW_SETBIND)
1296COMPATIBLE_IOCTL(RAW_GETBIND) 1267COMPATIBLE_IOCTL(RAW_GETBIND)
@@ -1557,9 +1528,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
1557 case RAW_GETBIND: 1528 case RAW_GETBIND:
1558 return raw_ioctl(fd, cmd, argp); 1529 return raw_ioctl(fd, cmd, argp);
1559#endif 1530#endif
1560#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
1561 case AUTOFS_IOC_SETTIMEOUT32:
1562 return ioc_settimeout(fd, cmd, argp);
1563 /* One SMB ioctl needs translations. */ 1531 /* One SMB ioctl needs translations. */
1564#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t) 1532#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
1565 case SMB_IOC_GETMOUNTUID_32: 1533 case SMB_IOC_GETMOUNTUID_32:
@@ -1614,9 +1582,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
1614 case KDSKBMETA: 1582 case KDSKBMETA:
1615 case KDSKBLED: 1583 case KDSKBLED:
1616 case KDSETLED: 1584 case KDSETLED:
1617 /* AUTOFS */
1618 case AUTOFS_IOC_READY:
1619 case AUTOFS_IOC_FAIL:
1620 /* NBD */ 1585 /* NBD */
1621 case NBD_SET_SOCK: 1586 case NBD_SET_SOCK:
1622 case NBD_SET_BLKSIZE: 1587 case NBD_SET_BLKSIZE:
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index dd3634e4c967..a53b130b366c 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -39,66 +39,55 @@ static DEFINE_MUTEX(read_mutex);
39#define CRAMINO(x) (((x)->offset && (x)->size)?(x)->offset<<2:1) 39#define CRAMINO(x) (((x)->offset && (x)->size)?(x)->offset<<2:1)
40#define OFFSET(x) ((x)->i_ino) 40#define OFFSET(x) ((x)->i_ino)
41 41
42 42static void setup_inode(struct inode *inode, struct cramfs_inode * cramfs_inode)
43static int cramfs_iget5_test(struct inode *inode, void *opaque)
44{
45 struct cramfs_inode *cramfs_inode = opaque;
46 return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1;
47}
48
49static int cramfs_iget5_set(struct inode *inode, void *opaque)
50{ 43{
51 struct cramfs_inode *cramfs_inode = opaque; 44 static struct timespec zerotime;
52 inode->i_ino = CRAMINO(cramfs_inode); 45 inode->i_mode = cramfs_inode->mode;
53 return 0; 46 inode->i_uid = cramfs_inode->uid;
47 inode->i_size = cramfs_inode->size;
48 inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
49 inode->i_gid = cramfs_inode->gid;
50 /* Struct copy intentional */
51 inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
52 /* inode->i_nlink is left 1 - arguably wrong for directories,
53 but it's the best we can do without reading the directory
54 contents. 1 yields the right result in GNU find, even
55 without -noleaf option. */
56 if (S_ISREG(inode->i_mode)) {
57 inode->i_fop = &generic_ro_fops;
58 inode->i_data.a_ops = &cramfs_aops;
59 } else if (S_ISDIR(inode->i_mode)) {
60 inode->i_op = &cramfs_dir_inode_operations;
61 inode->i_fop = &cramfs_directory_operations;
62 } else if (S_ISLNK(inode->i_mode)) {
63 inode->i_op = &page_symlink_inode_operations;
64 inode->i_data.a_ops = &cramfs_aops;
65 } else {
66 init_special_inode(inode, inode->i_mode,
67 old_decode_dev(cramfs_inode->size));
68 }
54} 69}
55 70
56static struct inode *get_cramfs_inode(struct super_block *sb, 71static struct inode *get_cramfs_inode(struct super_block *sb,
57 struct cramfs_inode * cramfs_inode) 72 struct cramfs_inode * cramfs_inode)
58{ 73{
59 struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), 74 struct inode *inode;
60 cramfs_iget5_test, cramfs_iget5_set, 75 if (CRAMINO(cramfs_inode) == 1) {
61 cramfs_inode); 76 inode = new_inode(sb);
62 static struct timespec zerotime; 77 if (inode) {
63 78 inode->i_ino = 1;
64 if (inode && (inode->i_state & I_NEW)) { 79 setup_inode(inode, cramfs_inode);
65 inode->i_mode = cramfs_inode->mode; 80 }
66 inode->i_uid = cramfs_inode->uid; 81 } else {
67 inode->i_size = cramfs_inode->size; 82 inode = iget_locked(sb, CRAMINO(cramfs_inode));
68 inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; 83 if (inode) {
69 inode->i_gid = cramfs_inode->gid; 84 setup_inode(inode, cramfs_inode);
70 /* Struct copy intentional */ 85 unlock_new_inode(inode);
71 inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
72 /* inode->i_nlink is left 1 - arguably wrong for directories,
73 but it's the best we can do without reading the directory
74 contents. 1 yields the right result in GNU find, even
75 without -noleaf option. */
76 if (S_ISREG(inode->i_mode)) {
77 inode->i_fop = &generic_ro_fops;
78 inode->i_data.a_ops = &cramfs_aops;
79 } else if (S_ISDIR(inode->i_mode)) {
80 inode->i_op = &cramfs_dir_inode_operations;
81 inode->i_fop = &cramfs_directory_operations;
82 } else if (S_ISLNK(inode->i_mode)) {
83 inode->i_op = &page_symlink_inode_operations;
84 inode->i_data.a_ops = &cramfs_aops;
85 } else {
86 init_special_inode(inode, inode->i_mode,
87 old_decode_dev(cramfs_inode->size));
88 } 86 }
89 unlock_new_inode(inode);
90 } 87 }
91 return inode; 88 return inode;
92} 89}
93 90
94static void cramfs_drop_inode(struct inode *inode)
95{
96 if (inode->i_ino == 1)
97 generic_delete_inode(inode);
98 else
99 generic_drop_inode(inode);
100}
101
102/* 91/*
103 * We have our own block cache: don't fill up the buffer cache 92 * We have our own block cache: don't fill up the buffer cache
104 * with the rom-image, because the way the filesystem is set 93 * with the rom-image, because the way the filesystem is set
@@ -542,7 +531,6 @@ static const struct super_operations cramfs_ops = {
542 .put_super = cramfs_put_super, 531 .put_super = cramfs_put_super,
543 .remount_fs = cramfs_remount, 532 .remount_fs = cramfs_remount,
544 .statfs = cramfs_statfs, 533 .statfs = cramfs_statfs,
545 .drop_inode = cramfs_drop_inode,
546}; 534};
547 535
548static int cramfs_get_sb(struct file_system_type *fs_type, 536static int cramfs_get_sb(struct file_system_type *fs_type,
diff --git a/fs/dcache.c b/fs/dcache.c
index 86d4db15473e..9f2c13417969 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -536,7 +536,7 @@ restart:
536 */ 536 */
537static void prune_dcache(int count) 537static void prune_dcache(int count)
538{ 538{
539 struct super_block *sb, *n; 539 struct super_block *sb, *p = NULL;
540 int w_count; 540 int w_count;
541 int unused = dentry_stat.nr_unused; 541 int unused = dentry_stat.nr_unused;
542 int prune_ratio; 542 int prune_ratio;
@@ -550,7 +550,7 @@ static void prune_dcache(int count)
550 else 550 else
551 prune_ratio = unused / count; 551 prune_ratio = unused / count;
552 spin_lock(&sb_lock); 552 spin_lock(&sb_lock);
553 list_for_each_entry_safe(sb, n, &super_blocks, s_list) { 553 list_for_each_entry(sb, &super_blocks, s_list) {
554 if (list_empty(&sb->s_instances)) 554 if (list_empty(&sb->s_instances))
555 continue; 555 continue;
556 if (sb->s_nr_dentry_unused == 0) 556 if (sb->s_nr_dentry_unused == 0)
@@ -590,14 +590,16 @@ static void prune_dcache(int count)
590 up_read(&sb->s_umount); 590 up_read(&sb->s_umount);
591 } 591 }
592 spin_lock(&sb_lock); 592 spin_lock(&sb_lock);
593 /* lock was dropped, must reset next */ 593 if (p)
594 list_safe_reset_next(sb, n, s_list); 594 __put_super(p);
595 count -= pruned; 595 count -= pruned;
596 __put_super(sb); 596 p = sb;
597 /* more work left to do? */ 597 /* more work left to do? */
598 if (count <= 0) 598 if (count <= 0)
599 break; 599 break;
600 } 600 }
601 if (p)
602 __put_super(p);
601 spin_unlock(&sb_lock); 603 spin_unlock(&sb_lock);
602 spin_unlock(&dcache_lock); 604 spin_unlock(&dcache_lock);
603} 605}
@@ -2049,16 +2051,12 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2049/* 2051/*
2050 * Write full pathname from the root of the filesystem into the buffer. 2052 * Write full pathname from the root of the filesystem into the buffer.
2051 */ 2053 */
2052char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2054char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2053{ 2055{
2054 char *end = buf + buflen; 2056 char *end = buf + buflen;
2055 char *retval; 2057 char *retval;
2056 2058
2057 spin_lock(&dcache_lock);
2058 prepend(&end, &buflen, "\0", 1); 2059 prepend(&end, &buflen, "\0", 1);
2059 if (d_unlinked(dentry) &&
2060 (prepend(&end, &buflen, "//deleted", 9) != 0))
2061 goto Elong;
2062 if (buflen < 1) 2060 if (buflen < 1)
2063 goto Elong; 2061 goto Elong;
2064 /* Get '/' right */ 2062 /* Get '/' right */
@@ -2076,7 +2074,28 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2076 retval = end; 2074 retval = end;
2077 dentry = parent; 2075 dentry = parent;
2078 } 2076 }
2077 return retval;
2078Elong:
2079 return ERR_PTR(-ENAMETOOLONG);
2080}
2081EXPORT_SYMBOL(__dentry_path);
2082
2083char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2084{
2085 char *p = NULL;
2086 char *retval;
2087
2088 spin_lock(&dcache_lock);
2089 if (d_unlinked(dentry)) {
2090 p = buf + buflen;
2091 if (prepend(&p, &buflen, "//deleted", 10) != 0)
2092 goto Elong;
2093 buflen++;
2094 }
2095 retval = __dentry_path(dentry, buf, buflen);
2079 spin_unlock(&dcache_lock); 2096 spin_unlock(&dcache_lock);
2097 if (!IS_ERR(retval) && p)
2098 *p = '/'; /* restore '/' overriden with '\0' */
2080 return retval; 2099 return retval;
2081Elong: 2100Elong:
2082 spin_unlock(&dcache_lock); 2101 spin_unlock(&dcache_lock);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a10cb91cadea..51f270b479b6 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1136,8 +1136,27 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1136 return ret; 1136 return ret;
1137} 1137}
1138 1138
1139/*
1140 * This is a library function for use by filesystem drivers.
1141 *
1142 * The locking rules are governed by the flags parameter:
1143 * - if the flags value contains DIO_LOCKING we use a fancy locking
1144 * scheme for dumb filesystems.
1145 * For writes this function is called under i_mutex and returns with
1146 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1147 * taken and dropped again before returning.
1148 * For reads and writes i_alloc_sem is taken in shared mode and released
1149 * on I/O completion (which may happen asynchronously after returning to
1150 * the caller).
1151 *
1152 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1153 * internal locking but rather rely on the filesystem to synchronize
1154 * direct I/O reads/writes versus each other and truncate.
1155 * For reads and writes both i_mutex and i_alloc_sem are not held on
1156 * entry and are never taken.
1157 */
1139ssize_t 1158ssize_t
1140__blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode, 1159__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1141 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1160 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1142 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1161 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1143 dio_submit_t submit_io, int flags) 1162 dio_submit_t submit_io, int flags)
@@ -1233,57 +1252,4 @@ __blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode,
1233out: 1252out:
1234 return retval; 1253 return retval;
1235} 1254}
1236EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc);
1237
1238/*
1239 * This is a library function for use by filesystem drivers.
1240 *
1241 * The locking rules are governed by the flags parameter:
1242 * - if the flags value contains DIO_LOCKING we use a fancy locking
1243 * scheme for dumb filesystems.
1244 * For writes this function is called under i_mutex and returns with
1245 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1246 * taken and dropped again before returning.
1247 * For reads and writes i_alloc_sem is taken in shared mode and released
1248 * on I/O completion (which may happen asynchronously after returning to
1249 * the caller).
1250 *
1251 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1252 * internal locking but rather rely on the filesystem to synchronize
1253 * direct I/O reads/writes versus each other and truncate.
1254 * For reads and writes both i_mutex and i_alloc_sem are not held on
1255 * entry and are never taken.
1256 */
1257ssize_t
1258__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1259 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1260 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1261 dio_submit_t submit_io, int flags)
1262{
1263 ssize_t retval;
1264
1265 retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov,
1266 offset, nr_segs, get_block, end_io, submit_io, flags);
1267 /*
1268 * In case of error extending write may have instantiated a few
1269 * blocks outside i_size. Trim these off again for DIO_LOCKING.
1270 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in
1271 * their own manner. This is a further example of where the old
1272 * truncate sequence is inadequate.
1273 *
1274 * NOTE: filesystems with their own locking have to handle this
1275 * on their own.
1276 */
1277 if (flags & DIO_LOCKING) {
1278 if (unlikely((rw & WRITE) && retval < 0)) {
1279 loff_t isize = i_size_read(inode);
1280 loff_t end = offset + iov_length(iov, nr_segs);
1281
1282 if (end > isize)
1283 vmtruncate(inode, isize);
1284 }
1285 }
1286
1287 return retval;
1288}
1289EXPORT_SYMBOL(__blockdev_direct_IO); 1255EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 83c4f600786a..2195c213ab2f 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -18,7 +18,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
18 18
19 spin_lock(&inode_lock); 19 spin_lock(&inode_lock);
20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 20 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
21 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 21 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
22 continue; 22 continue;
23 if (inode->i_mapping->nrpages == 0) 23 if (inode->i_mapping->nrpages == 0)
24 continue; 24 continue;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index e8fcf4e2ed7d..622c95140802 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -199,7 +199,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
199 "the persistent file for the dentry with name " 199 "the persistent file for the dentry with name "
200 "[%s]; rc = [%d]\n", __func__, 200 "[%s]; rc = [%d]\n", __func__,
201 ecryptfs_dentry->d_name.name, rc); 201 ecryptfs_dentry->d_name.name, rc);
202 goto out; 202 goto out_free;
203 } 203 }
204 } 204 }
205 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY) 205 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
@@ -207,7 +207,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
207 rc = -EPERM; 207 rc = -EPERM;
208 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs " 208 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
209 "file must hence be opened RO\n", __func__); 209 "file must hence be opened RO\n", __func__);
210 goto out; 210 goto out_free;
211 } 211 }
212 ecryptfs_set_file_lower( 212 ecryptfs_set_file_lower(
213 file, ecryptfs_inode_to_private(inode)->lower_file); 213 file, ecryptfs_inode_to_private(inode)->lower_file);
@@ -292,12 +292,40 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
292 return rc; 292 return rc;
293} 293}
294 294
295static int ecryptfs_ioctl(struct inode *inode, struct file *file, 295static long
296 unsigned int cmd, unsigned long arg); 296ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
297{
298 struct file *lower_file = NULL;
299 long rc = -ENOTTY;
300
301 if (ecryptfs_file_to_private(file))
302 lower_file = ecryptfs_file_to_lower(file);
303 if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl)
304 rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
305 return rc;
306}
307
308#ifdef CONFIG_COMPAT
309static long
310ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
311{
312 struct file *lower_file = NULL;
313 long rc = -ENOIOCTLCMD;
314
315 if (ecryptfs_file_to_private(file))
316 lower_file = ecryptfs_file_to_lower(file);
317 if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl)
318 rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
319 return rc;
320}
321#endif
297 322
298const struct file_operations ecryptfs_dir_fops = { 323const struct file_operations ecryptfs_dir_fops = {
299 .readdir = ecryptfs_readdir, 324 .readdir = ecryptfs_readdir,
300 .ioctl = ecryptfs_ioctl, 325 .unlocked_ioctl = ecryptfs_unlocked_ioctl,
326#ifdef CONFIG_COMPAT
327 .compat_ioctl = ecryptfs_compat_ioctl,
328#endif
301 .open = ecryptfs_open, 329 .open = ecryptfs_open,
302 .flush = ecryptfs_flush, 330 .flush = ecryptfs_flush,
303 .release = ecryptfs_release, 331 .release = ecryptfs_release,
@@ -313,7 +341,10 @@ const struct file_operations ecryptfs_main_fops = {
313 .write = do_sync_write, 341 .write = do_sync_write,
314 .aio_write = generic_file_aio_write, 342 .aio_write = generic_file_aio_write,
315 .readdir = ecryptfs_readdir, 343 .readdir = ecryptfs_readdir,
316 .ioctl = ecryptfs_ioctl, 344 .unlocked_ioctl = ecryptfs_unlocked_ioctl,
345#ifdef CONFIG_COMPAT
346 .compat_ioctl = ecryptfs_compat_ioctl,
347#endif
317 .mmap = generic_file_mmap, 348 .mmap = generic_file_mmap,
318 .open = ecryptfs_open, 349 .open = ecryptfs_open,
319 .flush = ecryptfs_flush, 350 .flush = ecryptfs_flush,
@@ -322,20 +353,3 @@ const struct file_operations ecryptfs_main_fops = {
322 .fasync = ecryptfs_fasync, 353 .fasync = ecryptfs_fasync,
323 .splice_read = generic_file_splice_read, 354 .splice_read = generic_file_splice_read,
324}; 355};
325
326static int
327ecryptfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
328 unsigned long arg)
329{
330 int rc = 0;
331 struct file *lower_file = NULL;
332
333 if (ecryptfs_file_to_private(file))
334 lower_file = ecryptfs_file_to_lower(file);
335 if (lower_file && lower_file->f_op && lower_file->f_op->ioctl)
336 rc = lower_file->f_op->ioctl(ecryptfs_inode_to_lower(inode),
337 lower_file, cmd, arg);
338 else
339 rc = -ENOTTY;
340 return rc;
341}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 31ef5252f0fe..6c55113e7222 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -264,7 +264,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
264 printk(KERN_ERR "%s: Out of memory whilst attempting " 264 printk(KERN_ERR "%s: Out of memory whilst attempting "
265 "to allocate ecryptfs_dentry_info struct\n", 265 "to allocate ecryptfs_dentry_info struct\n",
266 __func__); 266 __func__);
267 goto out_dput; 267 goto out_put;
268 } 268 }
269 ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry); 269 ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry);
270 ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt); 270 ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt);
@@ -339,14 +339,85 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
339out_free_kmem: 339out_free_kmem:
340 kmem_cache_free(ecryptfs_header_cache_2, page_virt); 340 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
341 goto out; 341 goto out;
342out_dput: 342out_put:
343 dput(lower_dentry); 343 dput(lower_dentry);
344 mntput(lower_mnt);
344 d_drop(ecryptfs_dentry); 345 d_drop(ecryptfs_dentry);
345out: 346out:
346 return rc; 347 return rc;
347} 348}
348 349
349/** 350/**
351 * ecryptfs_new_lower_dentry
352 * @ename: The name of the new dentry.
353 * @lower_dir_dentry: Parent directory of the new dentry.
354 * @nd: nameidata from last lookup.
355 *
356 * Create a new dentry or get it from lower parent dir.
357 */
358static struct dentry *
359ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry,
360 struct nameidata *nd)
361{
362 struct dentry *new_dentry;
363 struct dentry *tmp;
364 struct inode *lower_dir_inode;
365
366 lower_dir_inode = lower_dir_dentry->d_inode;
367
368 tmp = d_alloc(lower_dir_dentry, name);
369 if (!tmp)
370 return ERR_PTR(-ENOMEM);
371
372 mutex_lock(&lower_dir_inode->i_mutex);
373 new_dentry = lower_dir_inode->i_op->lookup(lower_dir_inode, tmp, nd);
374 mutex_unlock(&lower_dir_inode->i_mutex);
375
376 if (!new_dentry)
377 new_dentry = tmp;
378 else
379 dput(tmp);
380
381 return new_dentry;
382}
383
384
385/**
386 * ecryptfs_lookup_one_lower
387 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
388 * @lower_dir_dentry: lower parent directory
389 *
390 * Get the lower dentry from vfs. If lower dentry does not exist yet,
391 * create it.
392 */
393static struct dentry *
394ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry,
395 struct dentry *lower_dir_dentry)
396{
397 struct nameidata nd;
398 struct vfsmount *lower_mnt;
399 struct qstr *name;
400 int err;
401
402 name = &ecryptfs_dentry->d_name;
403 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
404 ecryptfs_dentry->d_parent));
405 err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd);
406 mntput(lower_mnt);
407
408 if (!err) {
409 /* we dont need the mount */
410 mntput(nd.path.mnt);
411 return nd.path.dentry;
412 }
413 if (err != -ENOENT)
414 return ERR_PTR(err);
415
416 /* create a new lower dentry */
417 return ecryptfs_new_lower_dentry(name, lower_dir_dentry, &nd);
418}
419
420/**
350 * ecryptfs_lookup 421 * ecryptfs_lookup
351 * @ecryptfs_dir_inode: The eCryptfs directory inode 422 * @ecryptfs_dir_inode: The eCryptfs directory inode
352 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up 423 * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
@@ -373,14 +444,12 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
373 goto out_d_drop; 444 goto out_d_drop;
374 } 445 }
375 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); 446 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
376 mutex_lock(&lower_dir_dentry->d_inode->i_mutex); 447
377 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, 448 lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
378 lower_dir_dentry, 449 lower_dir_dentry);
379 ecryptfs_dentry->d_name.len);
380 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
381 if (IS_ERR(lower_dentry)) { 450 if (IS_ERR(lower_dentry)) {
382 rc = PTR_ERR(lower_dentry); 451 rc = PTR_ERR(lower_dentry);
383 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " 452 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
384 "[%d] on lower_dentry = [%s]\n", __func__, rc, 453 "[%d] on lower_dentry = [%s]\n", __func__, rc,
385 encrypted_and_encoded_name); 454 encrypted_and_encoded_name);
386 goto out_d_drop; 455 goto out_d_drop;
@@ -402,14 +471,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
402 "filename; rc = [%d]\n", __func__, rc); 471 "filename; rc = [%d]\n", __func__, rc);
403 goto out_d_drop; 472 goto out_d_drop;
404 } 473 }
405 mutex_lock(&lower_dir_dentry->d_inode->i_mutex); 474 lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
406 lower_dentry = lookup_one_len(encrypted_and_encoded_name, 475 lower_dir_dentry);
407 lower_dir_dentry,
408 encrypted_and_encoded_name_size - 1);
409 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
410 if (IS_ERR(lower_dentry)) { 476 if (IS_ERR(lower_dentry)) {
411 rc = PTR_ERR(lower_dentry); 477 rc = PTR_ERR(lower_dentry);
412 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " 478 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
413 "[%d] on lower_dentry = [%s]\n", __func__, rc, 479 "[%d] on lower_dentry = [%s]\n", __func__, rc,
414 encrypted_and_encoded_name); 480 encrypted_and_encoded_name);
415 goto out_d_drop; 481 goto out_d_drop;
@@ -804,10 +870,20 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
804 size_t num_zeros = (PAGE_CACHE_SIZE 870 size_t num_zeros = (PAGE_CACHE_SIZE
805 - (ia->ia_size & ~PAGE_CACHE_MASK)); 871 - (ia->ia_size & ~PAGE_CACHE_MASK));
806 872
873
874 /*
875 * XXX(truncate) this should really happen at the begginning
876 * of ->setattr. But the code is too messy to that as part
877 * of a larger patch. ecryptfs is also totally missing out
878 * on the inode_change_ok check at the beginning of
879 * ->setattr while would include this.
880 */
881 rc = inode_newsize_ok(inode, ia->ia_size);
882 if (rc)
883 goto out;
884
807 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 885 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
808 rc = simple_setsize(inode, ia->ia_size); 886 truncate_setsize(inode, ia->ia_size);
809 if (rc)
810 goto out;
811 lower_ia->ia_size = ia->ia_size; 887 lower_ia->ia_size = ia->ia_size;
812 lower_ia->ia_valid |= ATTR_SIZE; 888 lower_ia->ia_valid |= ATTR_SIZE;
813 goto out; 889 goto out;
@@ -830,7 +906,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
830 goto out; 906 goto out;
831 } 907 }
832 } 908 }
833 simple_setsize(inode, ia->ia_size); 909 truncate_setsize(inode, ia->ia_size);
834 rc = ecryptfs_write_inode_size_to_metadata(inode); 910 rc = ecryptfs_write_inode_size_to_metadata(inode);
835 if (rc) { 911 if (rc) {
836 printk(KERN_ERR "Problem with " 912 printk(KERN_ERR "Problem with "
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 46c4dd8dfcc3..bcb68c0cb1f0 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -274,7 +274,7 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
274 struct user_namespace *user_ns, struct pid *pid, 274 struct user_namespace *user_ns, struct pid *pid,
275 u32 seq) 275 u32 seq)
276{ 276{
277 struct ecryptfs_daemon *daemon; 277 struct ecryptfs_daemon *uninitialized_var(daemon);
278 struct ecryptfs_msg_ctx *msg_ctx; 278 struct ecryptfs_msg_ctx *msg_ctx;
279 size_t msg_size; 279 size_t msg_size;
280 struct nsproxy *nsproxy; 280 struct nsproxy *nsproxy;
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 0435886e4a9f..f7fc286a3aa9 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -118,11 +118,15 @@ void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode)
118 */ 118 */
119static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) 119static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
120{ 120{
121 return vfs_statfs(ecryptfs_dentry_to_lower(dentry), buf); 121 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
122
123 if (!lower_dentry->d_sb->s_op->statfs)
124 return -ENOSYS;
125 return lower_dentry->d_sb->s_op->statfs(lower_dentry, buf);
122} 126}
123 127
124/** 128/**
125 * ecryptfs_clear_inode 129 * ecryptfs_evict_inode
126 * @inode - The ecryptfs inode 130 * @inode - The ecryptfs inode
127 * 131 *
128 * Called by iput() when the inode reference count reached zero 132 * Called by iput() when the inode reference count reached zero
@@ -131,8 +135,10 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
131 * on the inode free list. We use this to drop out reference to the 135 * on the inode free list. We use this to drop out reference to the
132 * lower inode. 136 * lower inode.
133 */ 137 */
134static void ecryptfs_clear_inode(struct inode *inode) 138static void ecryptfs_evict_inode(struct inode *inode)
135{ 139{
140 truncate_inode_pages(&inode->i_data, 0);
141 end_writeback(inode);
136 iput(ecryptfs_inode_to_lower(inode)); 142 iput(ecryptfs_inode_to_lower(inode));
137} 143}
138 144
@@ -184,6 +190,6 @@ const struct super_operations ecryptfs_sops = {
184 .drop_inode = generic_delete_inode, 190 .drop_inode = generic_delete_inode,
185 .statfs = ecryptfs_statfs, 191 .statfs = ecryptfs_statfs,
186 .remount_fs = NULL, 192 .remount_fs = NULL,
187 .clear_inode = ecryptfs_clear_inode, 193 .evict_inode = ecryptfs_evict_inode,
188 .show_options = ecryptfs_show_options 194 .show_options = ecryptfs_show_options
189}; 195};
diff --git a/fs/exec.c b/fs/exec.c
index dab85ecad686..7761837e4500 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -128,7 +128,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
128 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) 128 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
129 goto exit; 129 goto exit;
130 130
131 fsnotify_open(file->f_path.dentry); 131 fsnotify_open(file);
132 132
133 error = -ENOEXEC; 133 error = -ENOEXEC;
134 if(file->f_op) { 134 if(file->f_op) {
@@ -683,7 +683,7 @@ struct file *open_exec(const char *name)
683 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) 683 if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
684 goto exit; 684 goto exit;
685 685
686 fsnotify_open(file->f_path.dentry); 686 fsnotify_open(file);
687 687
688 err = deny_write_access(file); 688 err = deny_write_access(file);
689 if (err) 689 if (err)
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 22721b2fd890..2dc925fa1010 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -256,7 +256,6 @@ static inline int exofs_oi_read(struct exofs_i_info *oi,
256} 256}
257 257
258/* inode.c */ 258/* inode.c */
259void exofs_truncate(struct inode *inode);
260int exofs_setattr(struct dentry *, struct iattr *); 259int exofs_setattr(struct dentry *, struct iattr *);
261int exofs_write_begin(struct file *file, struct address_space *mapping, 260int exofs_write_begin(struct file *file, struct address_space *mapping,
262 loff_t pos, unsigned len, unsigned flags, 261 loff_t pos, unsigned len, unsigned flags,
@@ -264,7 +263,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
264extern struct inode *exofs_iget(struct super_block *, unsigned long); 263extern struct inode *exofs_iget(struct super_block *, unsigned long);
265struct inode *exofs_new_inode(struct inode *, int); 264struct inode *exofs_new_inode(struct inode *, int);
266extern int exofs_write_inode(struct inode *, struct writeback_control *wbc); 265extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
267extern void exofs_delete_inode(struct inode *); 266extern void exofs_evict_inode(struct inode *);
268 267
269/* dir.c: */ 268/* dir.c: */
270int exofs_add_link(struct dentry *, struct inode *); 269int exofs_add_link(struct dentry *, struct inode *);
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index fef6899be397..f9bfe2b501d5 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -86,6 +86,5 @@ const struct file_operations exofs_file_operations = {
86}; 86};
87 87
88const struct inode_operations exofs_file_inode_operations = { 88const struct inode_operations exofs_file_inode_operations = {
89 .truncate = exofs_truncate,
90 .setattr = exofs_setattr, 89 .setattr = exofs_setattr,
91}; 90};
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 5862ae87ed29..185ef1281e0c 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -697,6 +697,13 @@ static int exofs_writepage(struct page *page, struct writeback_control *wbc)
697 return write_exec(&pcol); 697 return write_exec(&pcol);
698} 698}
699 699
700/* i_mutex held using inode->i_size directly */
701static void _write_failed(struct inode *inode, loff_t to)
702{
703 if (to > inode->i_size)
704 truncate_pagecache(inode, to, inode->i_size);
705}
706
700int exofs_write_begin(struct file *file, struct address_space *mapping, 707int exofs_write_begin(struct file *file, struct address_space *mapping,
701 loff_t pos, unsigned len, unsigned flags, 708 loff_t pos, unsigned len, unsigned flags,
702 struct page **pagep, void **fsdata) 709 struct page **pagep, void **fsdata)
@@ -710,7 +717,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
710 fsdata); 717 fsdata);
711 if (ret) { 718 if (ret) {
712 EXOFS_DBGMSG("simple_write_begin failed\n"); 719 EXOFS_DBGMSG("simple_write_begin failed\n");
713 return ret; 720 goto out;
714 } 721 }
715 722
716 page = *pagep; 723 page = *pagep;
@@ -725,6 +732,9 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
725 EXOFS_DBGMSG("__readpage_filler failed\n"); 732 EXOFS_DBGMSG("__readpage_filler failed\n");
726 } 733 }
727 } 734 }
735out:
736 if (unlikely(ret))
737 _write_failed(mapping->host, pos + len);
728 738
729 return ret; 739 return ret;
730} 740}
@@ -750,6 +760,10 @@ static int exofs_write_end(struct file *file, struct address_space *mapping,
750 int ret; 760 int ret;
751 761
752 ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata); 762 ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
763 if (unlikely(ret))
764 _write_failed(inode, pos + len);
765
766 /* TODO: once simple_write_end marks inode dirty remove */
753 if (i_size != inode->i_size) 767 if (i_size != inode->i_size)
754 mark_inode_dirty(inode); 768 mark_inode_dirty(inode);
755 return ret; 769 return ret;
@@ -808,87 +822,55 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode)
808 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0); 822 return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
809} 823}
810 824
811/*
812 * get_block_t - Fill in a buffer_head
813 * An OSD takes care of block allocation so we just fake an allocation by
814 * putting in the inode's sector_t in the buffer_head.
815 * TODO: What about the case of create==0 and @iblock does not exist in the
816 * object?
817 */
818static int exofs_get_block(struct inode *inode, sector_t iblock,
819 struct buffer_head *bh_result, int create)
820{
821 map_bh(bh_result, inode->i_sb, iblock);
822 return 0;
823}
824
825const struct osd_attr g_attr_logical_length = ATTR_DEF( 825const struct osd_attr g_attr_logical_length = ATTR_DEF(
826 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); 826 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
827 827
828static int _do_truncate(struct inode *inode) 828static int _do_truncate(struct inode *inode, loff_t newsize)
829{ 829{
830 struct exofs_i_info *oi = exofs_i(inode); 830 struct exofs_i_info *oi = exofs_i(inode);
831 loff_t isize = i_size_read(inode);
832 int ret; 831 int ret;
833 832
834 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 833 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
835 834
836 nobh_truncate_page(inode->i_mapping, isize, exofs_get_block); 835 ret = exofs_oi_truncate(oi, (u64)newsize);
836 if (likely(!ret))
837 truncate_setsize(inode, newsize);
837 838
838 ret = exofs_oi_truncate(oi, (u64)isize); 839 EXOFS_DBGMSG("(0x%lx) size=0x%llx ret=>%d\n",
839 EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize); 840 inode->i_ino, newsize, ret);
840 return ret; 841 return ret;
841} 842}
842 843
843/* 844/*
844 * Truncate a file to the specified size - all we have to do is set the size 845 * Set inode attributes - update size attribute on OSD if needed,
845 * attribute. We make sure the object exists first. 846 * otherwise just call generic functions.
846 */
847void exofs_truncate(struct inode *inode)
848{
849 struct exofs_i_info *oi = exofs_i(inode);
850 int ret;
851
852 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
853 || S_ISLNK(inode->i_mode)))
854 return;
855 if (exofs_inode_is_fast_symlink(inode))
856 return;
857 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
858 return;
859
860 /* if we are about to truncate an object, and it hasn't been
861 * created yet, wait
862 */
863 if (unlikely(wait_obj_created(oi)))
864 goto fail;
865
866 ret = _do_truncate(inode);
867 if (ret)
868 goto fail;
869
870out:
871 mark_inode_dirty(inode);
872 return;
873fail:
874 make_bad_inode(inode);
875 goto out;
876}
877
878/*
879 * Set inode attributes - just call generic functions.
880 */ 847 */
881int exofs_setattr(struct dentry *dentry, struct iattr *iattr) 848int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
882{ 849{
883 struct inode *inode = dentry->d_inode; 850 struct inode *inode = dentry->d_inode;
884 int error; 851 int error;
885 852
853 /* if we are about to modify an object, and it hasn't been
854 * created yet, wait
855 */
856 error = wait_obj_created(exofs_i(inode));
857 if (unlikely(error))
858 return error;
859
886 error = inode_change_ok(inode, iattr); 860 error = inode_change_ok(inode, iattr);
887 if (error) 861 if (unlikely(error))
888 return error; 862 return error;
889 863
890 error = inode_setattr(inode, iattr); 864 if ((iattr->ia_valid & ATTR_SIZE) &&
891 return error; 865 iattr->ia_size != i_size_read(inode)) {
866 error = _do_truncate(inode, iattr->ia_size);
867 if (unlikely(error))
868 return error;
869 }
870
871 setattr_copy(inode, iattr);
872 mark_inode_dirty(inode);
873 return 0;
892} 874}
893 875
894static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF( 876static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF(
@@ -1325,7 +1307,7 @@ static void delete_done(struct exofs_io_state *ios, void *p)
1325 * from the OSD here. We make sure the object was created before we try and 1307 * from the OSD here. We make sure the object was created before we try and
1326 * delete it. 1308 * delete it.
1327 */ 1309 */
1328void exofs_delete_inode(struct inode *inode) 1310void exofs_evict_inode(struct inode *inode)
1329{ 1311{
1330 struct exofs_i_info *oi = exofs_i(inode); 1312 struct exofs_i_info *oi = exofs_i(inode);
1331 struct super_block *sb = inode->i_sb; 1313 struct super_block *sb = inode->i_sb;
@@ -1335,30 +1317,27 @@ void exofs_delete_inode(struct inode *inode)
1335 1317
1336 truncate_inode_pages(&inode->i_data, 0); 1318 truncate_inode_pages(&inode->i_data, 0);
1337 1319
1338 if (is_bad_inode(inode)) 1320 /* TODO: should do better here */
1321 if (inode->i_nlink || is_bad_inode(inode))
1339 goto no_delete; 1322 goto no_delete;
1340 1323
1341 mark_inode_dirty(inode);
1342 exofs_update_inode(inode, inode_needs_sync(inode));
1343
1344 inode->i_size = 0; 1324 inode->i_size = 0;
1345 if (inode->i_blocks) 1325 end_writeback(inode);
1346 exofs_truncate(inode);
1347 1326
1348 clear_inode(inode); 1327 /* if we are deleting an obj that hasn't been created yet, wait */
1328 if (!obj_created(oi)) {
1329 BUG_ON(!obj_2bcreated(oi));
1330 wait_event(oi->i_wq, obj_created(oi));
1331 /* ignore the error attempt a remove anyway */
1332 }
1349 1333
1334 /* Now Remove the OSD objects */
1350 ret = exofs_get_io_state(&sbi->layout, &ios); 1335 ret = exofs_get_io_state(&sbi->layout, &ios);
1351 if (unlikely(ret)) { 1336 if (unlikely(ret)) {
1352 EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__); 1337 EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
1353 return; 1338 return;
1354 } 1339 }
1355 1340
1356 /* if we are deleting an obj that hasn't been created yet, wait */
1357 if (!obj_created(oi)) {
1358 BUG_ON(!obj_2bcreated(oi));
1359 wait_event(oi->i_wq, obj_created(oi));
1360 }
1361
1362 ios->obj.id = exofs_oi_objno(oi); 1341 ios->obj.id = exofs_oi_objno(oi);
1363 ios->done = delete_done; 1342 ios->done = delete_done;
1364 ios->private = sbi; 1343 ios->private = sbi;
@@ -1374,5 +1353,5 @@ void exofs_delete_inode(struct inode *inode)
1374 return; 1353 return;
1375 1354
1376no_delete: 1355no_delete:
1377 clear_inode(inode); 1356 end_writeback(inode);
1378} 1357}
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
index 95921f501f2f..908cdbe4b99a 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ios.c
@@ -599,7 +599,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
599 } else { 599 } else {
600 bio = master_dev->bio; 600 bio = master_dev->bio;
601 /* FIXME: bio_set_dir() */ 601 /* FIXME: bio_set_dir() */
602 bio->bi_rw |= (1 << BIO_RW); 602 bio->bi_rw |= REQ_WRITE;
603 } 603 }
604 604
605 osd_req_write(or, &ios->obj, per_dev->offset, bio, 605 osd_req_write(or, &ios->obj, per_dev->offset, bio,
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 03149b9a5178..32cfd61def5f 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -743,7 +743,7 @@ static const struct super_operations exofs_sops = {
743 .alloc_inode = exofs_alloc_inode, 743 .alloc_inode = exofs_alloc_inode,
744 .destroy_inode = exofs_destroy_inode, 744 .destroy_inode = exofs_destroy_inode,
745 .write_inode = exofs_write_inode, 745 .write_inode = exofs_write_inode,
746 .delete_inode = exofs_delete_inode, 746 .evict_inode = exofs_evict_inode,
747 .put_super = exofs_put_super, 747 .put_super = exofs_put_super,
748 .write_super = exofs_write_super, 748 .write_super = exofs_write_super,
749 .sync_fs = exofs_sync_fs, 749 .sync_fs = exofs_sync_fs,
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index e8766a396776..c6c684b44ea1 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -571,7 +571,7 @@ do_more:
571error_return: 571error_return:
572 brelse(bitmap_bh); 572 brelse(bitmap_bh);
573 release_blocks(sb, freed); 573 release_blocks(sb, freed);
574 dquot_free_block(inode, freed); 574 dquot_free_block_nodirty(inode, freed);
575} 575}
576 576
577/** 577/**
@@ -1418,7 +1418,8 @@ allocated:
1418 1418
1419 *errp = 0; 1419 *errp = 0;
1420 brelse(bitmap_bh); 1420 brelse(bitmap_bh);
1421 dquot_free_block(inode, *count-num); 1421 dquot_free_block_nodirty(inode, *count-num);
1422 mark_inode_dirty(inode);
1422 *count = num; 1423 *count = num;
1423 return ret_block; 1424 return ret_block;
1424 1425
@@ -1428,8 +1429,10 @@ out:
1428 /* 1429 /*
1429 * Undo the block allocation 1430 * Undo the block allocation
1430 */ 1431 */
1431 if (!performed_allocation) 1432 if (!performed_allocation) {
1432 dquot_free_block(inode, *count); 1433 dquot_free_block_nodirty(inode, *count);
1434 mark_inode_dirty(inode);
1435 }
1433 brelse(bitmap_bh); 1436 brelse(bitmap_bh);
1434 return 0; 1437 return 0;
1435} 1438}
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 7516957273ed..764109886ec0 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -448,6 +448,11 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child)
448 return res; 448 return res;
449} 449}
450 450
451static int ext2_prepare_chunk(struct page *page, loff_t pos, unsigned len)
452{
453 return __block_write_begin(page, pos, len, ext2_get_block);
454}
455
451/* Releases the page */ 456/* Releases the page */
452void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, 457void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
453 struct page *page, struct inode *inode, int update_times) 458 struct page *page, struct inode *inode, int update_times)
@@ -458,8 +463,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
458 int err; 463 int err;
459 464
460 lock_page(page); 465 lock_page(page);
461 err = __ext2_write_begin(NULL, page->mapping, pos, len, 466 err = ext2_prepare_chunk(page, pos, len);
462 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
463 BUG_ON(err); 467 BUG_ON(err);
464 de->inode = cpu_to_le32(inode->i_ino); 468 de->inode = cpu_to_le32(inode->i_ino);
465 ext2_set_de_type(de, inode); 469 ext2_set_de_type(de, inode);
@@ -542,8 +546,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
542got_it: 546got_it:
543 pos = page_offset(page) + 547 pos = page_offset(page) +
544 (char*)de - (char*)page_address(page); 548 (char*)de - (char*)page_address(page);
545 err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0, 549 err = ext2_prepare_chunk(page, pos, rec_len);
546 &page, NULL);
547 if (err) 550 if (err)
548 goto out_unlock; 551 goto out_unlock;
549 if (de->inode) { 552 if (de->inode) {
@@ -576,8 +579,7 @@ out_unlock:
576 */ 579 */
577int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) 580int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
578{ 581{
579 struct address_space *mapping = page->mapping; 582 struct inode *inode = page->mapping->host;
580 struct inode *inode = mapping->host;
581 char *kaddr = page_address(page); 583 char *kaddr = page_address(page);
582 unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); 584 unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
583 unsigned to = ((char *)dir - kaddr) + 585 unsigned to = ((char *)dir - kaddr) +
@@ -601,8 +603,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
601 from = (char*)pde - (char*)page_address(page); 603 from = (char*)pde - (char*)page_address(page);
602 pos = page_offset(page) + from; 604 pos = page_offset(page) + from;
603 lock_page(page); 605 lock_page(page);
604 err = __ext2_write_begin(NULL, page->mapping, pos, to - from, 0, 606 err = ext2_prepare_chunk(page, pos, to - from);
605 &page, NULL);
606 BUG_ON(err); 607 BUG_ON(err);
607 if (pde) 608 if (pde)
608 pde->rec_len = ext2_rec_len_to_disk(to - from); 609 pde->rec_len = ext2_rec_len_to_disk(to - from);
@@ -621,8 +622,7 @@ out:
621 */ 622 */
622int ext2_make_empty(struct inode *inode, struct inode *parent) 623int ext2_make_empty(struct inode *inode, struct inode *parent)
623{ 624{
624 struct address_space *mapping = inode->i_mapping; 625 struct page *page = grab_cache_page(inode->i_mapping, 0);
625 struct page *page = grab_cache_page(mapping, 0);
626 unsigned chunk_size = ext2_chunk_size(inode); 626 unsigned chunk_size = ext2_chunk_size(inode);
627 struct ext2_dir_entry_2 * de; 627 struct ext2_dir_entry_2 * de;
628 int err; 628 int err;
@@ -631,8 +631,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
631 if (!page) 631 if (!page)
632 return -ENOMEM; 632 return -ENOMEM;
633 633
634 err = __ext2_write_begin(NULL, page->mapping, 0, chunk_size, 0, 634 err = ext2_prepare_chunk(page, 0, chunk_size);
635 &page, NULL);
636 if (err) { 635 if (err) {
637 unlock_page(page); 636 unlock_page(page);
638 goto fail; 637 goto fail;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 52b34f1d2738..416daa62242c 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -119,7 +119,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
119/* inode.c */ 119/* inode.c */
120extern struct inode *ext2_iget (struct super_block *, unsigned long); 120extern struct inode *ext2_iget (struct super_block *, unsigned long);
121extern int ext2_write_inode (struct inode *, struct writeback_control *); 121extern int ext2_write_inode (struct inode *, struct writeback_control *);
122extern void ext2_delete_inode (struct inode *); 122extern void ext2_evict_inode(struct inode *);
123extern int ext2_sync_inode (struct inode *); 123extern int ext2_sync_inode (struct inode *);
124extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); 124extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
125extern int ext2_setattr (struct dentry *, struct iattr *); 125extern int ext2_setattr (struct dentry *, struct iattr *);
@@ -127,9 +127,6 @@ extern void ext2_set_inode_flags(struct inode *inode);
127extern void ext2_get_inode_flags(struct ext2_inode_info *); 127extern void ext2_get_inode_flags(struct ext2_inode_info *);
128extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 128extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
129 u64 start, u64 len); 129 u64 start, u64 len);
130int __ext2_write_begin(struct file *file, struct address_space *mapping,
131 loff_t pos, unsigned len, unsigned flags,
132 struct page **pagep, void **fsdata);
133 130
134/* ioctl.c */ 131/* ioctl.c */
135extern long ext2_ioctl(struct file *, unsigned int, unsigned long); 132extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 938dbc739d00..ad70479aabff 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -118,19 +118,14 @@ void ext2_free_inode (struct inode * inode)
118 * Note: we must free any quota before locking the superblock, 118 * Note: we must free any quota before locking the superblock,
119 * as writing the quota to disk may need the lock as well. 119 * as writing the quota to disk may need the lock as well.
120 */ 120 */
121 if (!is_bad_inode(inode)) { 121 /* Quota is already initialized in iput() */
122 /* Quota is already initialized in iput() */ 122 ext2_xattr_delete_inode(inode);
123 ext2_xattr_delete_inode(inode); 123 dquot_free_inode(inode);
124 dquot_free_inode(inode); 124 dquot_drop(inode);
125 dquot_drop(inode);
126 }
127 125
128 es = EXT2_SB(sb)->s_es; 126 es = EXT2_SB(sb)->s_es;
129 is_directory = S_ISDIR(inode->i_mode); 127 is_directory = S_ISDIR(inode->i_mode);
130 128
131 /* Do this BEFORE marking the inode not in use or returning an error */
132 clear_inode (inode);
133
134 if (ino < EXT2_FIRST_INO(sb) || 129 if (ino < EXT2_FIRST_INO(sb) ||
135 ino > le32_to_cpu(es->s_inodes_count)) { 130 ino > le32_to_cpu(es->s_inodes_count)) {
136 ext2_error (sb, "ext2_free_inode", 131 ext2_error (sb, "ext2_free_inode",
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 3675088cb88c..940c96168868 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -69,26 +69,42 @@ static void ext2_write_failed(struct address_space *mapping, loff_t to)
69/* 69/*
70 * Called at the last iput() if i_nlink is zero. 70 * Called at the last iput() if i_nlink is zero.
71 */ 71 */
72void ext2_delete_inode (struct inode * inode) 72void ext2_evict_inode(struct inode * inode)
73{ 73{
74 if (!is_bad_inode(inode)) 74 struct ext2_block_alloc_info *rsv;
75 int want_delete = 0;
76
77 if (!inode->i_nlink && !is_bad_inode(inode)) {
78 want_delete = 1;
75 dquot_initialize(inode); 79 dquot_initialize(inode);
80 } else {
81 dquot_drop(inode);
82 }
83
76 truncate_inode_pages(&inode->i_data, 0); 84 truncate_inode_pages(&inode->i_data, 0);
77 85
78 if (is_bad_inode(inode)) 86 if (want_delete) {
79 goto no_delete; 87 /* set dtime */
80 EXT2_I(inode)->i_dtime = get_seconds(); 88 EXT2_I(inode)->i_dtime = get_seconds();
81 mark_inode_dirty(inode); 89 mark_inode_dirty(inode);
82 __ext2_write_inode(inode, inode_needs_sync(inode)); 90 __ext2_write_inode(inode, inode_needs_sync(inode));
91 /* truncate to 0 */
92 inode->i_size = 0;
93 if (inode->i_blocks)
94 ext2_truncate_blocks(inode, 0);
95 }
83 96
84 inode->i_size = 0; 97 invalidate_inode_buffers(inode);
85 if (inode->i_blocks) 98 end_writeback(inode);
86 ext2_truncate_blocks(inode, 0);
87 ext2_free_inode (inode);
88 99
89 return; 100 ext2_discard_reservation(inode);
90no_delete: 101 rsv = EXT2_I(inode)->i_block_alloc_info;
91 clear_inode(inode); /* We must guarantee clearing of inode... */ 102 EXT2_I(inode)->i_block_alloc_info = NULL;
103 if (unlikely(rsv))
104 kfree(rsv);
105
106 if (want_delete)
107 ext2_free_inode(inode);
92} 108}
93 109
94typedef struct { 110typedef struct {
@@ -423,6 +439,8 @@ static int ext2_alloc_blocks(struct inode *inode,
423failed_out: 439failed_out:
424 for (i = 0; i <index; i++) 440 for (i = 0; i <index; i++)
425 ext2_free_blocks(inode, new_blocks[i], 1); 441 ext2_free_blocks(inode, new_blocks[i], 1);
442 if (index)
443 mark_inode_dirty(inode);
426 return ret; 444 return ret;
427} 445}
428 446
@@ -765,14 +783,6 @@ ext2_readpages(struct file *file, struct address_space *mapping,
765 return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); 783 return mpage_readpages(mapping, pages, nr_pages, ext2_get_block);
766} 784}
767 785
768int __ext2_write_begin(struct file *file, struct address_space *mapping,
769 loff_t pos, unsigned len, unsigned flags,
770 struct page **pagep, void **fsdata)
771{
772 return block_write_begin_newtrunc(file, mapping, pos, len, flags,
773 pagep, fsdata, ext2_get_block);
774}
775
776static int 786static int
777ext2_write_begin(struct file *file, struct address_space *mapping, 787ext2_write_begin(struct file *file, struct address_space *mapping,
778 loff_t pos, unsigned len, unsigned flags, 788 loff_t pos, unsigned len, unsigned flags,
@@ -780,8 +790,8 @@ ext2_write_begin(struct file *file, struct address_space *mapping,
780{ 790{
781 int ret; 791 int ret;
782 792
783 *pagep = NULL; 793 ret = block_write_begin(mapping, pos, len, flags, pagep,
784 ret = __ext2_write_begin(file, mapping, pos, len, flags, pagep, fsdata); 794 ext2_get_block);
785 if (ret < 0) 795 if (ret < 0)
786 ext2_write_failed(mapping, pos + len); 796 ext2_write_failed(mapping, pos + len);
787 return ret; 797 return ret;
@@ -806,13 +816,8 @@ ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
806{ 816{
807 int ret; 817 int ret;
808 818
809 /* 819 ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata,
810 * Dir-in-pagecache still uses ext2_write_begin. Would have to rework 820 ext2_get_block);
811 * directory handling code to pass around offsets rather than struct
812 * pages in order to make this work easily.
813 */
814 ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, pagep,
815 fsdata, ext2_get_block);
816 if (ret < 0) 821 if (ret < 0)
817 ext2_write_failed(mapping, pos + len); 822 ext2_write_failed(mapping, pos + len);
818 return ret; 823 return ret;
@@ -838,7 +843,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
838 struct inode *inode = mapping->host; 843 struct inode *inode = mapping->host;
839 ssize_t ret; 844 ssize_t ret;
840 845
841 ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev, 846 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
842 iov, offset, nr_segs, ext2_get_block, NULL); 847 iov, offset, nr_segs, ext2_get_block, NULL);
843 if (ret < 0 && (rw & WRITE)) 848 if (ret < 0 && (rw & WRITE))
844 ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); 849 ext2_write_failed(mapping, offset + iov_length(iov, nr_segs));
@@ -1006,8 +1011,8 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
1006 else if (block_to_free == nr - count) 1011 else if (block_to_free == nr - count)
1007 count++; 1012 count++;
1008 else { 1013 else {
1009 mark_inode_dirty(inode);
1010 ext2_free_blocks (inode, block_to_free, count); 1014 ext2_free_blocks (inode, block_to_free, count);
1015 mark_inode_dirty(inode);
1011 free_this: 1016 free_this:
1012 block_to_free = nr; 1017 block_to_free = nr;
1013 count = 1; 1018 count = 1;
@@ -1015,8 +1020,8 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
1015 } 1020 }
1016 } 1021 }
1017 if (count > 0) { 1022 if (count > 0) {
1018 mark_inode_dirty(inode);
1019 ext2_free_blocks (inode, block_to_free, count); 1023 ext2_free_blocks (inode, block_to_free, count);
1024 mark_inode_dirty(inode);
1020 } 1025 }
1021} 1026}
1022 1027
@@ -1169,15 +1174,10 @@ static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
1169 __ext2_truncate_blocks(inode, offset); 1174 __ext2_truncate_blocks(inode, offset);
1170} 1175}
1171 1176
1172int ext2_setsize(struct inode *inode, loff_t newsize) 1177static int ext2_setsize(struct inode *inode, loff_t newsize)
1173{ 1178{
1174 loff_t oldsize;
1175 int error; 1179 int error;
1176 1180
1177 error = inode_newsize_ok(inode, newsize);
1178 if (error)
1179 return error;
1180
1181 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1181 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1182 S_ISLNK(inode->i_mode))) 1182 S_ISLNK(inode->i_mode)))
1183 return -EINVAL; 1183 return -EINVAL;
@@ -1197,10 +1197,7 @@ int ext2_setsize(struct inode *inode, loff_t newsize)
1197 if (error) 1197 if (error)
1198 return error; 1198 return error;
1199 1199
1200 oldsize = inode->i_size; 1200 truncate_setsize(inode, newsize);
1201 i_size_write(inode, newsize);
1202 truncate_pagecache(inode, oldsize, newsize);
1203
1204 __ext2_truncate_blocks(inode, newsize); 1201 __ext2_truncate_blocks(inode, newsize);
1205 1202
1206 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 1203 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
@@ -1557,7 +1554,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
1557 if (error) 1554 if (error)
1558 return error; 1555 return error;
1559 } 1556 }
1560 generic_setattr(inode, iattr); 1557 setattr_copy(inode, iattr);
1561 if (iattr->ia_valid & ATTR_MODE) 1558 if (iattr->ia_valid & ATTR_MODE)
1562 error = ext2_acl_chmod(inode); 1559 error = ext2_acl_chmod(inode);
1563 mark_inode_dirty(inode); 1560 mark_inode_dirty(inode);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7ff43f4a59cd..1ec602673ea8 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -195,17 +195,6 @@ static void destroy_inodecache(void)
195 kmem_cache_destroy(ext2_inode_cachep); 195 kmem_cache_destroy(ext2_inode_cachep);
196} 196}
197 197
198static void ext2_clear_inode(struct inode *inode)
199{
200 struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info;
201
202 dquot_drop(inode);
203 ext2_discard_reservation(inode);
204 EXT2_I(inode)->i_block_alloc_info = NULL;
205 if (unlikely(rsv))
206 kfree(rsv);
207}
208
209static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) 198static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
210{ 199{
211 struct super_block *sb = vfs->mnt_sb; 200 struct super_block *sb = vfs->mnt_sb;
@@ -299,13 +288,12 @@ static const struct super_operations ext2_sops = {
299 .alloc_inode = ext2_alloc_inode, 288 .alloc_inode = ext2_alloc_inode,
300 .destroy_inode = ext2_destroy_inode, 289 .destroy_inode = ext2_destroy_inode,
301 .write_inode = ext2_write_inode, 290 .write_inode = ext2_write_inode,
302 .delete_inode = ext2_delete_inode, 291 .evict_inode = ext2_evict_inode,
303 .put_super = ext2_put_super, 292 .put_super = ext2_put_super,
304 .write_super = ext2_write_super, 293 .write_super = ext2_write_super,
305 .sync_fs = ext2_sync_fs, 294 .sync_fs = ext2_sync_fs,
306 .statfs = ext2_statfs, 295 .statfs = ext2_statfs,
307 .remount_fs = ext2_remount, 296 .remount_fs = ext2_remount,
308 .clear_inode = ext2_clear_inode,
309 .show_options = ext2_show_options, 297 .show_options = ext2_show_options,
310#ifdef CONFIG_QUOTA 298#ifdef CONFIG_QUOTA
311 .quota_read = ext2_quota_read, 299 .quota_read = ext2_quota_read,
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 7c3915780b19..8c29ae15129e 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -674,6 +674,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
674 new_bh = sb_getblk(sb, block); 674 new_bh = sb_getblk(sb, block);
675 if (!new_bh) { 675 if (!new_bh) {
676 ext2_free_blocks(inode, block, 1); 676 ext2_free_blocks(inode, block, 1);
677 mark_inode_dirty(inode);
677 error = -EIO; 678 error = -EIO;
678 goto cleanup; 679 goto cleanup;
679 } 680 }
@@ -703,8 +704,10 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
703 * written (only some dirty data were not) so we just proceed 704 * written (only some dirty data were not) so we just proceed
704 * as if nothing happened and cleanup the unused block */ 705 * as if nothing happened and cleanup the unused block */
705 if (error && error != -ENOSPC) { 706 if (error && error != -ENOSPC) {
706 if (new_bh && new_bh != old_bh) 707 if (new_bh && new_bh != old_bh) {
707 dquot_free_block(inode, 1); 708 dquot_free_block_nodirty(inode, 1);
709 mark_inode_dirty(inode);
710 }
708 goto cleanup; 711 goto cleanup;
709 } 712 }
710 } else 713 } else
@@ -727,6 +730,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
727 mb_cache_entry_free(ce); 730 mb_cache_entry_free(ce);
728 ea_bdebug(old_bh, "freeing"); 731 ea_bdebug(old_bh, "freeing");
729 ext2_free_blocks(inode, old_bh->b_blocknr, 1); 732 ext2_free_blocks(inode, old_bh->b_blocknr, 1);
733 mark_inode_dirty(inode);
730 /* We let our caller release old_bh, so we 734 /* We let our caller release old_bh, so we
731 * need to duplicate the buffer before. */ 735 * need to duplicate the buffer before. */
732 get_bh(old_bh); 736 get_bh(old_bh);
@@ -736,7 +740,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
736 le32_add_cpu(&HDR(old_bh)->h_refcount, -1); 740 le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
737 if (ce) 741 if (ce)
738 mb_cache_entry_release(ce); 742 mb_cache_entry_release(ce);
739 dquot_free_block(inode, 1); 743 dquot_free_block_nodirty(inode, 1);
744 mark_inode_dirty(inode);
740 mark_buffer_dirty(old_bh); 745 mark_buffer_dirty(old_bh);
741 ea_bdebug(old_bh, "refcount now=%d", 746 ea_bdebug(old_bh, "refcount now=%d",
742 le32_to_cpu(HDR(old_bh)->h_refcount)); 747 le32_to_cpu(HDR(old_bh)->h_refcount));
@@ -799,7 +804,7 @@ ext2_xattr_delete_inode(struct inode *inode)
799 mark_buffer_dirty(bh); 804 mark_buffer_dirty(bh);
800 if (IS_SYNC(inode)) 805 if (IS_SYNC(inode))
801 sync_dirty_buffer(bh); 806 sync_dirty_buffer(bh);
802 dquot_free_block(inode, 1); 807 dquot_free_block_nodirty(inode, 1);
803 } 808 }
804 EXT2_I(inode)->i_file_acl = 0; 809 EXT2_I(inode)->i_file_acl = 0;
805 810
@@ -838,7 +843,7 @@ ext2_xattr_cache_insert(struct buffer_head *bh)
838 ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS); 843 ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS);
839 if (!ce) 844 if (!ce)
840 return -ENOMEM; 845 return -ENOMEM;
841 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); 846 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
842 if (error) { 847 if (error) {
843 mb_cache_entry_free(ce); 848 mb_cache_entry_free(ce);
844 if (error == -EBUSY) { 849 if (error == -EBUSY) {
@@ -912,8 +917,8 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
912 return NULL; /* never share */ 917 return NULL; /* never share */
913 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 918 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
914again: 919again:
915 ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, 920 ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev,
916 inode->i_sb->s_bdev, hash); 921 hash);
917 while (ce) { 922 while (ce) {
918 struct buffer_head *bh; 923 struct buffer_head *bh;
919 924
@@ -945,7 +950,7 @@ again:
945 unlock_buffer(bh); 950 unlock_buffer(bh);
946 brelse(bh); 951 brelse(bh);
947 } 952 }
948 ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); 953 ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
949 } 954 }
950 return NULL; 955 return NULL;
951} 956}
@@ -1021,9 +1026,7 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header,
1021int __init 1026int __init
1022init_ext2_xattr(void) 1027init_ext2_xattr(void)
1023{ 1028{
1024 ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, 1029 ext2_xattr_cache = mb_cache_create("ext2_xattr", 6);
1025 sizeof(struct mb_cache_entry) +
1026 sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
1027 if (!ext2_xattr_cache) 1030 if (!ext2_xattr_cache)
1028 return -ENOMEM; 1031 return -ENOMEM;
1029 return 0; 1032 return 0;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 498021eb88fb..4ab72db3559e 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -119,20 +119,8 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
119 ino = inode->i_ino; 119 ino = inode->i_ino;
120 ext3_debug ("freeing inode %lu\n", ino); 120 ext3_debug ("freeing inode %lu\n", ino);
121 121
122 /*
123 * Note: we must free any quota before locking the superblock,
124 * as writing the quota to disk may need the lock as well.
125 */
126 dquot_initialize(inode);
127 ext3_xattr_delete_inode(handle, inode);
128 dquot_free_inode(inode);
129 dquot_drop(inode);
130
131 is_directory = S_ISDIR(inode->i_mode); 122 is_directory = S_ISDIR(inode->i_mode);
132 123
133 /* Do this BEFORE marking the inode not in use or returning an error */
134 clear_inode (inode);
135
136 es = EXT3_SB(sb)->s_es; 124 es = EXT3_SB(sb)->s_es;
137 if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { 125 if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
138 ext3_error (sb, "ext3_free_inode", 126 ext3_error (sb, "ext3_free_inode",
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 001eb0e2d48e..5e0faf4cda79 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -190,18 +190,28 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
190} 190}
191 191
192/* 192/*
193 * Called at the last iput() if i_nlink is zero. 193 * Called at inode eviction from icache
194 */ 194 */
195void ext3_delete_inode (struct inode * inode) 195void ext3_evict_inode (struct inode *inode)
196{ 196{
197 struct ext3_block_alloc_info *rsv;
197 handle_t *handle; 198 handle_t *handle;
199 int want_delete = 0;
198 200
199 if (!is_bad_inode(inode)) 201 if (!inode->i_nlink && !is_bad_inode(inode)) {
200 dquot_initialize(inode); 202 dquot_initialize(inode);
203 want_delete = 1;
204 }
201 205
202 truncate_inode_pages(&inode->i_data, 0); 206 truncate_inode_pages(&inode->i_data, 0);
203 207
204 if (is_bad_inode(inode)) 208 ext3_discard_reservation(inode);
209 rsv = EXT3_I(inode)->i_block_alloc_info;
210 EXT3_I(inode)->i_block_alloc_info = NULL;
211 if (unlikely(rsv))
212 kfree(rsv);
213
214 if (!want_delete)
205 goto no_delete; 215 goto no_delete;
206 216
207 handle = start_transaction(inode); 217 handle = start_transaction(inode);
@@ -238,15 +248,22 @@ void ext3_delete_inode (struct inode * inode)
238 * having errors), but we can't free the inode if the mark_dirty 248 * having errors), but we can't free the inode if the mark_dirty
239 * fails. 249 * fails.
240 */ 250 */
241 if (ext3_mark_inode_dirty(handle, inode)) 251 if (ext3_mark_inode_dirty(handle, inode)) {
242 /* If that failed, just do the required in-core inode clear. */ 252 /* If that failed, just dquot_drop() and be done with that */
243 clear_inode(inode); 253 dquot_drop(inode);
244 else 254 end_writeback(inode);
255 } else {
256 ext3_xattr_delete_inode(handle, inode);
257 dquot_free_inode(inode);
258 dquot_drop(inode);
259 end_writeback(inode);
245 ext3_free_inode(handle, inode); 260 ext3_free_inode(handle, inode);
261 }
246 ext3_journal_stop(handle); 262 ext3_journal_stop(handle);
247 return; 263 return;
248no_delete: 264no_delete:
249 clear_inode(inode); /* We must guarantee clearing of inode... */ 265 end_writeback(inode);
266 dquot_drop(inode);
250} 267}
251 268
252typedef struct { 269typedef struct {
@@ -1212,8 +1229,7 @@ retry:
1212 ret = PTR_ERR(handle); 1229 ret = PTR_ERR(handle);
1213 goto out; 1230 goto out;
1214 } 1231 }
1215 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 1232 ret = __block_write_begin(page, pos, len, ext3_get_block);
1216 ext3_get_block);
1217 if (ret) 1233 if (ret)
1218 goto write_begin_failed; 1234 goto write_begin_failed;
1219 1235
@@ -1798,6 +1814,17 @@ retry:
1798 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 1814 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
1799 offset, nr_segs, 1815 offset, nr_segs,
1800 ext3_get_block, NULL); 1816 ext3_get_block, NULL);
1817 /*
1818 * In case of error extending write may have instantiated a few
1819 * blocks outside i_size. Trim these off again.
1820 */
1821 if (unlikely((rw & WRITE) && ret < 0)) {
1822 loff_t isize = i_size_read(inode);
1823 loff_t end = offset + iov_length(iov, nr_segs);
1824
1825 if (end > isize)
1826 vmtruncate(inode, isize);
1827 }
1801 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) 1828 if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
1802 goto retry; 1829 goto retry;
1803 1830
@@ -2560,7 +2587,7 @@ out_stop:
2560 * If this was a simple ftruncate(), and the file will remain alive 2587 * If this was a simple ftruncate(), and the file will remain alive
2561 * then we need to clear up the orphan record which we created above. 2588 * then we need to clear up the orphan record which we created above.
2562 * However, if this was a real unlink then we were called by 2589 * However, if this was a real unlink then we were called by
2563 * ext3_delete_inode(), and we allow that function to clean up the 2590 * ext3_evict_inode(), and we allow that function to clean up the
2564 * orphan info for us. 2591 * orphan info for us.
2565 */ 2592 */
2566 if (inode->i_nlink) 2593 if (inode->i_nlink)
@@ -3204,9 +3231,17 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3204 ext3_journal_stop(handle); 3231 ext3_journal_stop(handle);
3205 } 3232 }
3206 3233
3207 rc = inode_setattr(inode, attr); 3234 if ((attr->ia_valid & ATTR_SIZE) &&
3235 attr->ia_size != i_size_read(inode)) {
3236 rc = vmtruncate(inode, attr->ia_size);
3237 if (rc)
3238 goto err_out;
3239 }
3240
3241 setattr_copy(inode, attr);
3242 mark_inode_dirty(inode);
3208 3243
3209 if (!rc && (ia_valid & ATTR_MODE)) 3244 if (ia_valid & ATTR_MODE)
3210 rc = ext3_acl_chmod(inode); 3245 rc = ext3_acl_chmod(inode);
3211 3246
3212err_out: 3247err_out:
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 9650a956fd0e..5dbf4dba03c4 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -527,17 +527,6 @@ static void destroy_inodecache(void)
527 kmem_cache_destroy(ext3_inode_cachep); 527 kmem_cache_destroy(ext3_inode_cachep);
528} 528}
529 529
530static void ext3_clear_inode(struct inode *inode)
531{
532 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
533
534 dquot_drop(inode);
535 ext3_discard_reservation(inode);
536 EXT3_I(inode)->i_block_alloc_info = NULL;
537 if (unlikely(rsv))
538 kfree(rsv);
539}
540
541static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb) 530static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
542{ 531{
543#if defined(CONFIG_QUOTA) 532#if defined(CONFIG_QUOTA)
@@ -780,14 +769,13 @@ static const struct super_operations ext3_sops = {
780 .destroy_inode = ext3_destroy_inode, 769 .destroy_inode = ext3_destroy_inode,
781 .write_inode = ext3_write_inode, 770 .write_inode = ext3_write_inode,
782 .dirty_inode = ext3_dirty_inode, 771 .dirty_inode = ext3_dirty_inode,
783 .delete_inode = ext3_delete_inode, 772 .evict_inode = ext3_evict_inode,
784 .put_super = ext3_put_super, 773 .put_super = ext3_put_super,
785 .sync_fs = ext3_sync_fs, 774 .sync_fs = ext3_sync_fs,
786 .freeze_fs = ext3_freeze, 775 .freeze_fs = ext3_freeze,
787 .unfreeze_fs = ext3_unfreeze, 776 .unfreeze_fs = ext3_unfreeze,
788 .statfs = ext3_statfs, 777 .statfs = ext3_statfs,
789 .remount_fs = ext3_remount, 778 .remount_fs = ext3_remount,
790 .clear_inode = ext3_clear_inode,
791 .show_options = ext3_show_options, 779 .show_options = ext3_show_options,
792#ifdef CONFIG_QUOTA 780#ifdef CONFIG_QUOTA
793 .quota_read = ext3_quota_read, 781 .quota_read = ext3_quota_read,
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 71fb8d65e54c..e69dc6dfaa89 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -1139,7 +1139,7 @@ ext3_xattr_cache_insert(struct buffer_head *bh)
1139 ea_bdebug(bh, "out of memory"); 1139 ea_bdebug(bh, "out of memory");
1140 return; 1140 return;
1141 } 1141 }
1142 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); 1142 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
1143 if (error) { 1143 if (error) {
1144 mb_cache_entry_free(ce); 1144 mb_cache_entry_free(ce);
1145 if (error == -EBUSY) { 1145 if (error == -EBUSY) {
@@ -1211,8 +1211,8 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
1211 return NULL; /* never share */ 1211 return NULL; /* never share */
1212 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 1212 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1213again: 1213again:
1214 ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, 1214 ce = mb_cache_entry_find_first(ext3_xattr_cache, inode->i_sb->s_bdev,
1215 inode->i_sb->s_bdev, hash); 1215 hash);
1216 while (ce) { 1216 while (ce) {
1217 struct buffer_head *bh; 1217 struct buffer_head *bh;
1218 1218
@@ -1237,7 +1237,7 @@ again:
1237 return bh; 1237 return bh;
1238 } 1238 }
1239 brelse(bh); 1239 brelse(bh);
1240 ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); 1240 ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
1241 } 1241 }
1242 return NULL; 1242 return NULL;
1243} 1243}
@@ -1313,9 +1313,7 @@ static void ext3_xattr_rehash(struct ext3_xattr_header *header,
1313int __init 1313int __init
1314init_ext3_xattr(void) 1314init_ext3_xattr(void)
1315{ 1315{
1316 ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, 1316 ext3_xattr_cache = mb_cache_create("ext3_xattr", 6);
1317 sizeof(struct mb_cache_entry) +
1318 sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
1319 if (!ext3_xattr_cache) 1317 if (!ext3_xattr_cache)
1320 return -ENOMEM; 1318 return -ENOMEM;
1321 return 0; 1319 return 0;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e03841d9f30b..889ec9d5e6ad 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1643,7 +1643,8 @@ extern int ext4_write_inode(struct inode *, struct writeback_control *);
1643extern int ext4_setattr(struct dentry *, struct iattr *); 1643extern int ext4_setattr(struct dentry *, struct iattr *);
1644extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, 1644extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
1645 struct kstat *stat); 1645 struct kstat *stat);
1646extern void ext4_delete_inode(struct inode *); 1646extern void ext4_evict_inode(struct inode *);
1647extern void ext4_clear_inode(struct inode *);
1647extern int ext4_sync_inode(handle_t *, struct inode *); 1648extern int ext4_sync_inode(handle_t *, struct inode *);
1648extern void ext4_dirty_inode(struct inode *); 1649extern void ext4_dirty_inode(struct inode *);
1649extern int ext4_change_inode_journal_flag(struct inode *, int); 1650extern int ext4_change_inode_journal_flag(struct inode *, int);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ac377505ed57..45853e0d1f21 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -222,7 +222,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
222 is_directory = S_ISDIR(inode->i_mode); 222 is_directory = S_ISDIR(inode->i_mode);
223 223
224 /* Do this BEFORE marking the inode not in use or returning an error */ 224 /* Do this BEFORE marking the inode not in use or returning an error */
225 clear_inode(inode); 225 ext4_clear_inode(inode);
226 226
227 es = EXT4_SB(sb)->s_es; 227 es = EXT4_SB(sb)->s_es;
228 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { 228 if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a0ab3754d0d6..4b8debeb3965 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -167,11 +167,16 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
167/* 167/*
168 * Called at the last iput() if i_nlink is zero. 168 * Called at the last iput() if i_nlink is zero.
169 */ 169 */
170void ext4_delete_inode(struct inode *inode) 170void ext4_evict_inode(struct inode *inode)
171{ 171{
172 handle_t *handle; 172 handle_t *handle;
173 int err; 173 int err;
174 174
175 if (inode->i_nlink) {
176 truncate_inode_pages(&inode->i_data, 0);
177 goto no_delete;
178 }
179
175 if (!is_bad_inode(inode)) 180 if (!is_bad_inode(inode))
176 dquot_initialize(inode); 181 dquot_initialize(inode);
177 182
@@ -246,13 +251,13 @@ void ext4_delete_inode(struct inode *inode)
246 */ 251 */
247 if (ext4_mark_inode_dirty(handle, inode)) 252 if (ext4_mark_inode_dirty(handle, inode))
248 /* If that failed, just do the required in-core inode clear. */ 253 /* If that failed, just do the required in-core inode clear. */
249 clear_inode(inode); 254 ext4_clear_inode(inode);
250 else 255 else
251 ext4_free_inode(handle, inode); 256 ext4_free_inode(handle, inode);
252 ext4_journal_stop(handle); 257 ext4_journal_stop(handle);
253 return; 258 return;
254no_delete: 259no_delete:
255 clear_inode(inode); /* We must guarantee clearing of inode... */ 260 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
256} 261}
257 262
258typedef struct { 263typedef struct {
@@ -1602,11 +1607,9 @@ retry:
1602 *pagep = page; 1607 *pagep = page;
1603 1608
1604 if (ext4_should_dioread_nolock(inode)) 1609 if (ext4_should_dioread_nolock(inode))
1605 ret = block_write_begin(file, mapping, pos, len, flags, pagep, 1610 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
1606 fsdata, ext4_get_block_write);
1607 else 1611 else
1608 ret = block_write_begin(file, mapping, pos, len, flags, pagep, 1612 ret = __block_write_begin(page, pos, len, ext4_get_block);
1609 fsdata, ext4_get_block);
1610 1613
1611 if (!ret && ext4_should_journal_data(inode)) { 1614 if (!ret && ext4_should_journal_data(inode)) {
1612 ret = walk_page_buffers(handle, page_buffers(page), 1615 ret = walk_page_buffers(handle, page_buffers(page),
@@ -1617,7 +1620,7 @@ retry:
1617 unlock_page(page); 1620 unlock_page(page);
1618 page_cache_release(page); 1621 page_cache_release(page);
1619 /* 1622 /*
1620 * block_write_begin may have instantiated a few blocks 1623 * __block_write_begin may have instantiated a few blocks
1621 * outside i_size. Trim these off again. Don't need 1624 * outside i_size. Trim these off again. Don't need
1622 * i_size_read because we hold i_mutex. 1625 * i_size_read because we hold i_mutex.
1623 * 1626 *
@@ -3205,8 +3208,7 @@ retry:
3205 } 3208 }
3206 *pagep = page; 3209 *pagep = page;
3207 3210
3208 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 3211 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
3209 ext4_da_get_block_prep);
3210 if (ret < 0) { 3212 if (ret < 0) {
3211 unlock_page(page); 3213 unlock_page(page);
3212 ext4_journal_stop(handle); 3214 ext4_journal_stop(handle);
@@ -3565,15 +3567,24 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
3565 3567
3566retry: 3568retry:
3567 if (rw == READ && ext4_should_dioread_nolock(inode)) 3569 if (rw == READ && ext4_should_dioread_nolock(inode))
3568 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 3570 ret = __blockdev_direct_IO(rw, iocb, inode,
3569 inode->i_sb->s_bdev, iov, 3571 inode->i_sb->s_bdev, iov,
3570 offset, nr_segs, 3572 offset, nr_segs,
3571 ext4_get_block, NULL); 3573 ext4_get_block, NULL, NULL, 0);
3572 else 3574 else {
3573 ret = blockdev_direct_IO(rw, iocb, inode, 3575 ret = blockdev_direct_IO(rw, iocb, inode,
3574 inode->i_sb->s_bdev, iov, 3576 inode->i_sb->s_bdev, iov,
3575 offset, nr_segs, 3577 offset, nr_segs,
3576 ext4_get_block, NULL); 3578 ext4_get_block, NULL);
3579
3580 if (unlikely((rw & WRITE) && ret < 0)) {
3581 loff_t isize = i_size_read(inode);
3582 loff_t end = offset + iov_length(iov, nr_segs);
3583
3584 if (end > isize)
3585 vmtruncate(inode, isize);
3586 }
3587 }
3577 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3588 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
3578 goto retry; 3589 goto retry;
3579 3590
@@ -5536,11 +5547,19 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5536 ext4_truncate(inode); 5547 ext4_truncate(inode);
5537 } 5548 }
5538 5549
5539 rc = inode_setattr(inode, attr); 5550 if ((attr->ia_valid & ATTR_SIZE) &&
5551 attr->ia_size != i_size_read(inode))
5552 rc = vmtruncate(inode, attr->ia_size);
5540 5553
5541 /* If inode_setattr's call to ext4_truncate failed to get a 5554 if (!rc) {
5542 * transaction handle at all, we need to clean up the in-core 5555 setattr_copy(inode, attr);
5543 * orphan list manually. */ 5556 mark_inode_dirty(inode);
5557 }
5558
5559 /*
5560 * If the call to ext4_truncate failed to get a transaction handle at
5561 * all, we need to clean up the in-core orphan list manually.
5562 */
5544 if (inode->i_nlink) 5563 if (inode->i_nlink)
5545 ext4_orphan_del(NULL, inode); 5564 ext4_orphan_del(NULL, inode);
5546 5565
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8d65575f8c8c..26147746c272 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -868,8 +868,10 @@ static void destroy_inodecache(void)
868 kmem_cache_destroy(ext4_inode_cachep); 868 kmem_cache_destroy(ext4_inode_cachep);
869} 869}
870 870
871static void ext4_clear_inode(struct inode *inode) 871void ext4_clear_inode(struct inode *inode)
872{ 872{
873 invalidate_inode_buffers(inode);
874 end_writeback(inode);
873 dquot_drop(inode); 875 dquot_drop(inode);
874 ext4_discard_preallocations(inode); 876 ext4_discard_preallocations(inode);
875 if (EXT4_JOURNAL(inode)) 877 if (EXT4_JOURNAL(inode))
@@ -1158,14 +1160,13 @@ static const struct super_operations ext4_sops = {
1158 .destroy_inode = ext4_destroy_inode, 1160 .destroy_inode = ext4_destroy_inode,
1159 .write_inode = ext4_write_inode, 1161 .write_inode = ext4_write_inode,
1160 .dirty_inode = ext4_dirty_inode, 1162 .dirty_inode = ext4_dirty_inode,
1161 .delete_inode = ext4_delete_inode, 1163 .evict_inode = ext4_evict_inode,
1162 .put_super = ext4_put_super, 1164 .put_super = ext4_put_super,
1163 .sync_fs = ext4_sync_fs, 1165 .sync_fs = ext4_sync_fs,
1164 .freeze_fs = ext4_freeze, 1166 .freeze_fs = ext4_freeze,
1165 .unfreeze_fs = ext4_unfreeze, 1167 .unfreeze_fs = ext4_unfreeze,
1166 .statfs = ext4_statfs, 1168 .statfs = ext4_statfs,
1167 .remount_fs = ext4_remount, 1169 .remount_fs = ext4_remount,
1168 .clear_inode = ext4_clear_inode,
1169 .show_options = ext4_show_options, 1170 .show_options = ext4_show_options,
1170#ifdef CONFIG_QUOTA 1171#ifdef CONFIG_QUOTA
1171 .quota_read = ext4_quota_read, 1172 .quota_read = ext4_quota_read,
@@ -1179,12 +1180,11 @@ static const struct super_operations ext4_nojournal_sops = {
1179 .destroy_inode = ext4_destroy_inode, 1180 .destroy_inode = ext4_destroy_inode,
1180 .write_inode = ext4_write_inode, 1181 .write_inode = ext4_write_inode,
1181 .dirty_inode = ext4_dirty_inode, 1182 .dirty_inode = ext4_dirty_inode,
1182 .delete_inode = ext4_delete_inode, 1183 .evict_inode = ext4_evict_inode,
1183 .write_super = ext4_write_super, 1184 .write_super = ext4_write_super,
1184 .put_super = ext4_put_super, 1185 .put_super = ext4_put_super,
1185 .statfs = ext4_statfs, 1186 .statfs = ext4_statfs,
1186 .remount_fs = ext4_remount, 1187 .remount_fs = ext4_remount,
1187 .clear_inode = ext4_clear_inode,
1188 .show_options = ext4_show_options, 1188 .show_options = ext4_show_options,
1189#ifdef CONFIG_QUOTA 1189#ifdef CONFIG_QUOTA
1190 .quota_read = ext4_quota_read, 1190 .quota_read = ext4_quota_read,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index a6f314249574..3a8cd8dff1ad 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1417,7 +1417,7 @@ ext4_xattr_cache_insert(struct buffer_head *bh)
1417 ea_bdebug(bh, "out of memory"); 1417 ea_bdebug(bh, "out of memory");
1418 return; 1418 return;
1419 } 1419 }
1420 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); 1420 error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
1421 if (error) { 1421 if (error) {
1422 mb_cache_entry_free(ce); 1422 mb_cache_entry_free(ce);
1423 if (error == -EBUSY) { 1423 if (error == -EBUSY) {
@@ -1489,8 +1489,8 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
1489 return NULL; /* never share */ 1489 return NULL; /* never share */
1490 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); 1490 ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1491again: 1491again:
1492 ce = mb_cache_entry_find_first(ext4_xattr_cache, 0, 1492 ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev,
1493 inode->i_sb->s_bdev, hash); 1493 hash);
1494 while (ce) { 1494 while (ce) {
1495 struct buffer_head *bh; 1495 struct buffer_head *bh;
1496 1496
@@ -1514,7 +1514,7 @@ again:
1514 return bh; 1514 return bh;
1515 } 1515 }
1516 brelse(bh); 1516 brelse(bh);
1517 ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); 1517 ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
1518 } 1518 }
1519 return NULL; 1519 return NULL;
1520} 1520}
@@ -1590,9 +1590,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
1590int __init 1590int __init
1591init_ext4_xattr(void) 1591init_ext4_xattr(void)
1592{ 1592{
1593 ext4_xattr_cache = mb_cache_create("ext4_xattr", NULL, 1593 ext4_xattr_cache = mb_cache_create("ext4_xattr", 6);
1594 sizeof(struct mb_cache_entry) +
1595 sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
1596 if (!ext4_xattr_cache) 1594 if (!ext4_xattr_cache)
1597 return -ENOMEM; 1595 return -ENOMEM;
1598 return 0; 1596 return 0;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 27ac25725954..d75a77f85c28 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -306,7 +306,6 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
306extern const struct file_operations fat_file_operations; 306extern const struct file_operations fat_file_operations;
307extern const struct inode_operations fat_file_inode_operations; 307extern const struct inode_operations fat_file_inode_operations;
308extern int fat_setattr(struct dentry * dentry, struct iattr * attr); 308extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
309extern int fat_setsize(struct inode *inode, loff_t offset);
310extern void fat_truncate_blocks(struct inode *inode, loff_t offset); 309extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
311extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, 310extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
312 struct kstat *stat); 311 struct kstat *stat);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 990dfae022e5..7257752b6d5d 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -364,18 +364,6 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
364 return 0; 364 return 0;
365} 365}
366 366
367int fat_setsize(struct inode *inode, loff_t offset)
368{
369 int error;
370
371 error = simple_setsize(inode, offset);
372 if (error)
373 return error;
374 fat_truncate_blocks(inode, offset);
375
376 return error;
377}
378
379#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) 367#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
380/* valid file mode bits */ 368/* valid file mode bits */
381#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) 369#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO)
@@ -387,21 +375,6 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
387 unsigned int ia_valid; 375 unsigned int ia_valid;
388 int error; 376 int error;
389 377
390 /*
391 * Expand the file. Since inode_setattr() updates ->i_size
392 * before calling the ->truncate(), but FAT needs to fill the
393 * hole before it. XXX: this is no longer true with new truncate
394 * sequence.
395 */
396 if (attr->ia_valid & ATTR_SIZE) {
397 if (attr->ia_size > inode->i_size) {
398 error = fat_cont_expand(inode, attr->ia_size);
399 if (error || attr->ia_valid == ATTR_SIZE)
400 goto out;
401 attr->ia_valid &= ~ATTR_SIZE;
402 }
403 }
404
405 /* Check for setting the inode time. */ 378 /* Check for setting the inode time. */
406 ia_valid = attr->ia_valid; 379 ia_valid = attr->ia_valid;
407 if (ia_valid & TIMES_SET_FLAGS) { 380 if (ia_valid & TIMES_SET_FLAGS) {
@@ -417,6 +390,21 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
417 goto out; 390 goto out;
418 } 391 }
419 392
393 /*
394 * Expand the file. Since inode_setattr() updates ->i_size
395 * before calling the ->truncate(), but FAT needs to fill the
396 * hole before it. XXX: this is no longer true with new truncate
397 * sequence.
398 */
399 if (attr->ia_valid & ATTR_SIZE) {
400 if (attr->ia_size > inode->i_size) {
401 error = fat_cont_expand(inode, attr->ia_size);
402 if (error || attr->ia_valid == ATTR_SIZE)
403 goto out;
404 attr->ia_valid &= ~ATTR_SIZE;
405 }
406 }
407
420 if (((attr->ia_valid & ATTR_UID) && 408 if (((attr->ia_valid & ATTR_UID) &&
421 (attr->ia_uid != sbi->options.fs_uid)) || 409 (attr->ia_uid != sbi->options.fs_uid)) ||
422 ((attr->ia_valid & ATTR_GID) && 410 ((attr->ia_valid & ATTR_GID) &&
@@ -441,12 +429,11 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
441 } 429 }
442 430
443 if (attr->ia_valid & ATTR_SIZE) { 431 if (attr->ia_valid & ATTR_SIZE) {
444 error = fat_setsize(inode, attr->ia_size); 432 truncate_setsize(inode, attr->ia_size);
445 if (error) 433 fat_truncate_blocks(inode, attr->ia_size);
446 goto out;
447 } 434 }
448 435
449 generic_setattr(inode, attr); 436 setattr_copy(inode, attr);
450 mark_inode_dirty(inode); 437 mark_inode_dirty(inode);
451out: 438out:
452 return error; 439 return error;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 7bf45aee56d7..830058057d33 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -159,7 +159,7 @@ static int fat_write_begin(struct file *file, struct address_space *mapping,
159 int err; 159 int err;
160 160
161 *pagep = NULL; 161 *pagep = NULL;
162 err = cont_write_begin_newtrunc(file, mapping, pos, len, flags, 162 err = cont_write_begin(file, mapping, pos, len, flags,
163 pagep, fsdata, fat_get_block, 163 pagep, fsdata, fat_get_block,
164 &MSDOS_I(mapping->host)->mmu_private); 164 &MSDOS_I(mapping->host)->mmu_private);
165 if (err < 0) 165 if (err < 0)
@@ -212,8 +212,8 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
212 * FAT need to use the DIO_LOCKING for avoiding the race 212 * FAT need to use the DIO_LOCKING for avoiding the race
213 * condition of fat_get_block() and ->truncate(). 213 * condition of fat_get_block() and ->truncate().
214 */ 214 */
215 ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev, 215 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
216 iov, offset, nr_segs, fat_get_block, NULL); 216 iov, offset, nr_segs, fat_get_block, NULL);
217 if (ret < 0 && (rw & WRITE)) 217 if (ret < 0 && (rw & WRITE))
218 fat_write_failed(mapping, offset + iov_length(iov, nr_segs)); 218 fat_write_failed(mapping, offset + iov_length(iov, nr_segs));
219 219
@@ -263,7 +263,7 @@ static const struct address_space_operations fat_aops = {
263 * check if the location is still valid and retry if it 263 * check if the location is still valid and retry if it
264 * isn't. Otherwise we do changes. 264 * isn't. Otherwise we do changes.
265 * 5. Spinlock is used to protect hash/unhash/location check/lookup 265 * 5. Spinlock is used to protect hash/unhash/location check/lookup
266 * 6. fat_clear_inode() unhashes the F-d-c entry. 266 * 6. fat_evict_inode() unhashes the F-d-c entry.
267 * 7. lookup() and readdir() do igrab() if they find a F-d-c entry 267 * 7. lookup() and readdir() do igrab() if they find a F-d-c entry
268 * and consider negative result as cache miss. 268 * and consider negative result as cache miss.
269 */ 269 */
@@ -448,16 +448,15 @@ out:
448 448
449EXPORT_SYMBOL_GPL(fat_build_inode); 449EXPORT_SYMBOL_GPL(fat_build_inode);
450 450
451static void fat_delete_inode(struct inode *inode) 451static void fat_evict_inode(struct inode *inode)
452{ 452{
453 truncate_inode_pages(&inode->i_data, 0); 453 truncate_inode_pages(&inode->i_data, 0);
454 inode->i_size = 0; 454 if (!inode->i_nlink) {
455 fat_truncate_blocks(inode, 0); 455 inode->i_size = 0;
456 clear_inode(inode); 456 fat_truncate_blocks(inode, 0);
457} 457 }
458 458 invalidate_inode_buffers(inode);
459static void fat_clear_inode(struct inode *inode) 459 end_writeback(inode);
460{
461 fat_cache_inval_inode(inode); 460 fat_cache_inval_inode(inode);
462 fat_detach(inode); 461 fat_detach(inode);
463} 462}
@@ -674,12 +673,11 @@ static const struct super_operations fat_sops = {
674 .alloc_inode = fat_alloc_inode, 673 .alloc_inode = fat_alloc_inode,
675 .destroy_inode = fat_destroy_inode, 674 .destroy_inode = fat_destroy_inode,
676 .write_inode = fat_write_inode, 675 .write_inode = fat_write_inode,
677 .delete_inode = fat_delete_inode, 676 .evict_inode = fat_evict_inode,
678 .put_super = fat_put_super, 677 .put_super = fat_put_super,
679 .write_super = fat_write_super, 678 .write_super = fat_write_super,
680 .sync_fs = fat_sync_fs, 679 .sync_fs = fat_sync_fs,
681 .statfs = fat_statfs, 680 .statfs = fat_statfs,
682 .clear_inode = fat_clear_inode,
683 .remount_fs = fat_remount, 681 .remount_fs = fat_remount,
684 682
685 .show_options = fat_show_options, 683 .show_options = fat_show_options,
diff --git a/fs/file_table.c b/fs/file_table.c
index 5c7d10ead4ad..b8a0bb63cbd7 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -230,6 +230,15 @@ static void __fput(struct file *file)
230 might_sleep(); 230 might_sleep();
231 231
232 fsnotify_close(file); 232 fsnotify_close(file);
233
234 /*
235 * fsnotify_create_event may have taken one or more references on this
236 * file. If it did so it left one reference for us to drop to make sure
237 * its calls to fput could not prematurely destroy the file.
238 */
239 if (atomic_long_read(&file->f_count))
240 return fput(file);
241
233 /* 242 /*
234 * The function eventpoll_release() should be the first called 243 * The function eventpoll_release() should be the first called
235 * in the file cleanup chain. 244 * in the file cleanup chain.
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 50ab5eecb99b..881aa3d217f0 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -63,7 +63,7 @@ extern void vxfs_put_fake_inode(struct inode *);
63extern struct vxfs_inode_info * vxfs_blkiget(struct super_block *, u_long, ino_t); 63extern struct vxfs_inode_info * vxfs_blkiget(struct super_block *, u_long, ino_t);
64extern struct vxfs_inode_info * vxfs_stiget(struct super_block *, ino_t); 64extern struct vxfs_inode_info * vxfs_stiget(struct super_block *, ino_t);
65extern struct inode * vxfs_iget(struct super_block *, ino_t); 65extern struct inode * vxfs_iget(struct super_block *, ino_t);
66extern void vxfs_clear_inode(struct inode *); 66extern void vxfs_evict_inode(struct inode *);
67 67
68/* vxfs_lookup.c */ 68/* vxfs_lookup.c */
69extern const struct inode_operations vxfs_dir_inode_ops; 69extern const struct inode_operations vxfs_dir_inode_ops;
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 03a6ea5e99f7..79d1b4ea13e7 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -337,15 +337,17 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
337} 337}
338 338
339/** 339/**
340 * vxfs_clear_inode - remove inode from main memory 340 * vxfs_evict_inode - remove inode from main memory
341 * @ip: inode to discard. 341 * @ip: inode to discard.
342 * 342 *
343 * Description: 343 * Description:
344 * vxfs_clear_inode() is called on the final iput and frees the private 344 * vxfs_evict_inode() is called on the final iput and frees the private
345 * inode area. 345 * inode area.
346 */ 346 */
347void 347void
348vxfs_clear_inode(struct inode *ip) 348vxfs_evict_inode(struct inode *ip)
349{ 349{
350 truncate_inode_pages(&ip->i_data, 0);
351 end_writeback(ip);
350 kmem_cache_free(vxfs_inode_cachep, ip->i_private); 352 kmem_cache_free(vxfs_inode_cachep, ip->i_private);
351} 353}
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 5132c99b1ca2..dc0c041e85cb 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -61,7 +61,7 @@ static int vxfs_statfs(struct dentry *, struct kstatfs *);
61static int vxfs_remount(struct super_block *, int *, char *); 61static int vxfs_remount(struct super_block *, int *, char *);
62 62
63static const struct super_operations vxfs_super_ops = { 63static const struct super_operations vxfs_super_ops = {
64 .clear_inode = vxfs_clear_inode, 64 .evict_inode = vxfs_evict_inode,
65 .put_super = vxfs_put_super, 65 .put_super = vxfs_put_super,
66 .statfs = vxfs_statfs, 66 .statfs = vxfs_statfs,
67 .remount_fs = vxfs_remount, 67 .remount_fs = vxfs_remount,
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 30ac305e8293..2f76c4a081a2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -26,15 +26,9 @@
26#include <linux/blkdev.h> 26#include <linux/blkdev.h>
27#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
28#include <linux/buffer_head.h> 28#include <linux/buffer_head.h>
29#include <linux/tracepoint.h>
29#include "internal.h" 30#include "internal.h"
30 31
31#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info)
32
33/*
34 * We don't actually have pdflush, but this one is exported though /proc...
35 */
36int nr_pdflush_threads;
37
38/* 32/*
39 * Passed into wb_writeback(), essentially a subset of writeback_control 33 * Passed into wb_writeback(), essentially a subset of writeback_control
40 */ 34 */
@@ -50,6 +44,21 @@ struct wb_writeback_work {
50 struct completion *done; /* set if the caller waits */ 44 struct completion *done; /* set if the caller waits */
51}; 45};
52 46
47/*
48 * Include the creation of the trace points after defining the
49 * wb_writeback_work structure so that the definition remains local to this
50 * file.
51 */
52#define CREATE_TRACE_POINTS
53#include <trace/events/writeback.h>
54
55#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info)
56
57/*
58 * We don't actually have pdflush, but this one is exported though /proc...
59 */
60int nr_pdflush_threads;
61
53/** 62/**
54 * writeback_in_progress - determine whether there is writeback in progress 63 * writeback_in_progress - determine whether there is writeback in progress
55 * @bdi: the device's backing_dev_info structure. 64 * @bdi: the device's backing_dev_info structure.
@@ -65,22 +74,21 @@ int writeback_in_progress(struct backing_dev_info *bdi)
65static void bdi_queue_work(struct backing_dev_info *bdi, 74static void bdi_queue_work(struct backing_dev_info *bdi,
66 struct wb_writeback_work *work) 75 struct wb_writeback_work *work)
67{ 76{
68 spin_lock(&bdi->wb_lock); 77 trace_writeback_queue(bdi, work);
69 list_add_tail(&work->list, &bdi->work_list);
70 spin_unlock(&bdi->wb_lock);
71 78
72 /* 79 spin_lock_bh(&bdi->wb_lock);
73 * If the default thread isn't there, make sure we add it. When 80 list_add_tail(&work->list, &bdi->work_list);
74 * it gets created and wakes up, we'll run this work. 81 if (bdi->wb.task) {
75 */ 82 wake_up_process(bdi->wb.task);
76 if (unlikely(list_empty_careful(&bdi->wb_list))) 83 } else {
84 /*
85 * The bdi thread isn't there, wake up the forker thread which
86 * will create and run it.
87 */
88 trace_writeback_nothread(bdi, work);
77 wake_up_process(default_backing_dev_info.wb.task); 89 wake_up_process(default_backing_dev_info.wb.task);
78 else {
79 struct bdi_writeback *wb = &bdi->wb;
80
81 if (wb->task)
82 wake_up_process(wb->task);
83 } 90 }
91 spin_unlock_bh(&bdi->wb_lock);
84} 92}
85 93
86static void 94static void
@@ -95,8 +103,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
95 */ 103 */
96 work = kzalloc(sizeof(*work), GFP_ATOMIC); 104 work = kzalloc(sizeof(*work), GFP_ATOMIC);
97 if (!work) { 105 if (!work) {
98 if (bdi->wb.task) 106 if (bdi->wb.task) {
107 trace_writeback_nowork(bdi);
99 wake_up_process(bdi->wb.task); 108 wake_up_process(bdi->wb.task);
109 }
100 return; 110 return;
101 } 111 }
102 112
@@ -352,7 +362,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
352 362
353 spin_lock(&inode_lock); 363 spin_lock(&inode_lock);
354 inode->i_state &= ~I_SYNC; 364 inode->i_state &= ~I_SYNC;
355 if (!(inode->i_state & (I_FREEING | I_CLEAR))) { 365 if (!(inode->i_state & I_FREEING)) {
356 if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { 366 if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) {
357 /* 367 /*
358 * More pages get dirtied by a fast dirtier. 368 * More pages get dirtied by a fast dirtier.
@@ -499,7 +509,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
499 if (inode_dirtied_after(inode, wbc->wb_start)) 509 if (inode_dirtied_after(inode, wbc->wb_start))
500 return 1; 510 return 1;
501 511
502 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); 512 BUG_ON(inode->i_state & I_FREEING);
503 __iget(inode); 513 __iget(inode);
504 pages_skipped = wbc->pages_skipped; 514 pages_skipped = wbc->pages_skipped;
505 writeback_single_inode(inode, wbc); 515 writeback_single_inode(inode, wbc);
@@ -643,10 +653,14 @@ static long wb_writeback(struct bdi_writeback *wb,
643 wbc.more_io = 0; 653 wbc.more_io = 0;
644 wbc.nr_to_write = MAX_WRITEBACK_PAGES; 654 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
645 wbc.pages_skipped = 0; 655 wbc.pages_skipped = 0;
656
657 trace_wbc_writeback_start(&wbc, wb->bdi);
646 if (work->sb) 658 if (work->sb)
647 __writeback_inodes_sb(work->sb, wb, &wbc); 659 __writeback_inodes_sb(work->sb, wb, &wbc);
648 else 660 else
649 writeback_inodes_wb(wb, &wbc); 661 writeback_inodes_wb(wb, &wbc);
662 trace_wbc_writeback_written(&wbc, wb->bdi);
663
650 work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; 664 work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
651 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; 665 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
652 666
@@ -674,6 +688,7 @@ static long wb_writeback(struct bdi_writeback *wb,
674 if (!list_empty(&wb->b_more_io)) { 688 if (!list_empty(&wb->b_more_io)) {
675 inode = list_entry(wb->b_more_io.prev, 689 inode = list_entry(wb->b_more_io.prev,
676 struct inode, i_list); 690 struct inode, i_list);
691 trace_wbc_writeback_wait(&wbc, wb->bdi);
677 inode_wait_for_writeback(inode); 692 inode_wait_for_writeback(inode);
678 } 693 }
679 spin_unlock(&inode_lock); 694 spin_unlock(&inode_lock);
@@ -686,17 +701,17 @@ static long wb_writeback(struct bdi_writeback *wb,
686 * Return the next wb_writeback_work struct that hasn't been processed yet. 701 * Return the next wb_writeback_work struct that hasn't been processed yet.
687 */ 702 */
688static struct wb_writeback_work * 703static struct wb_writeback_work *
689get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb) 704get_next_work_item(struct backing_dev_info *bdi)
690{ 705{
691 struct wb_writeback_work *work = NULL; 706 struct wb_writeback_work *work = NULL;
692 707
693 spin_lock(&bdi->wb_lock); 708 spin_lock_bh(&bdi->wb_lock);
694 if (!list_empty(&bdi->work_list)) { 709 if (!list_empty(&bdi->work_list)) {
695 work = list_entry(bdi->work_list.next, 710 work = list_entry(bdi->work_list.next,
696 struct wb_writeback_work, list); 711 struct wb_writeback_work, list);
697 list_del_init(&work->list); 712 list_del_init(&work->list);
698 } 713 }
699 spin_unlock(&bdi->wb_lock); 714 spin_unlock_bh(&bdi->wb_lock);
700 return work; 715 return work;
701} 716}
702 717
@@ -744,7 +759,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
744 struct wb_writeback_work *work; 759 struct wb_writeback_work *work;
745 long wrote = 0; 760 long wrote = 0;
746 761
747 while ((work = get_next_work_item(bdi, wb)) != NULL) { 762 while ((work = get_next_work_item(bdi)) != NULL) {
748 /* 763 /*
749 * Override sync mode, in case we must wait for completion 764 * Override sync mode, in case we must wait for completion
750 * because this thread is exiting now. 765 * because this thread is exiting now.
@@ -752,6 +767,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
752 if (force_wait) 767 if (force_wait)
753 work->sync_mode = WB_SYNC_ALL; 768 work->sync_mode = WB_SYNC_ALL;
754 769
770 trace_writeback_exec(bdi, work);
771
755 wrote += wb_writeback(wb, work); 772 wrote += wb_writeback(wb, work);
756 773
757 /* 774 /*
@@ -776,47 +793,66 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
776 * Handle writeback of dirty data for the device backed by this bdi. Also 793 * Handle writeback of dirty data for the device backed by this bdi. Also
777 * wakes up periodically and does kupdated style flushing. 794 * wakes up periodically and does kupdated style flushing.
778 */ 795 */
779int bdi_writeback_task(struct bdi_writeback *wb) 796int bdi_writeback_thread(void *data)
780{ 797{
781 unsigned long last_active = jiffies; 798 struct bdi_writeback *wb = data;
782 unsigned long wait_jiffies = -1UL; 799 struct backing_dev_info *bdi = wb->bdi;
783 long pages_written; 800 long pages_written;
784 801
802 current->flags |= PF_FLUSHER | PF_SWAPWRITE;
803 set_freezable();
804 wb->last_active = jiffies;
805
806 /*
807 * Our parent may run at a different priority, just set us to normal
808 */
809 set_user_nice(current, 0);
810
811 trace_writeback_thread_start(bdi);
812
785 while (!kthread_should_stop()) { 813 while (!kthread_should_stop()) {
814 /*
815 * Remove own delayed wake-up timer, since we are already awake
816 * and we'll take care of the preriodic write-back.
817 */
818 del_timer(&wb->wakeup_timer);
819
786 pages_written = wb_do_writeback(wb, 0); 820 pages_written = wb_do_writeback(wb, 0);
787 821
822 trace_writeback_pages_written(pages_written);
823
788 if (pages_written) 824 if (pages_written)
789 last_active = jiffies; 825 wb->last_active = jiffies;
790 else if (wait_jiffies != -1UL) {
791 unsigned long max_idle;
792 826
793 /* 827 set_current_state(TASK_INTERRUPTIBLE);
794 * Longest period of inactivity that we tolerate. If we 828 if (!list_empty(&bdi->work_list)) {
795 * see dirty data again later, the task will get 829 __set_current_state(TASK_RUNNING);
796 * recreated automatically. 830 continue;
797 */
798 max_idle = max(5UL * 60 * HZ, wait_jiffies);
799 if (time_after(jiffies, max_idle + last_active))
800 break;
801 } 831 }
802 832
803 if (dirty_writeback_interval) { 833 if (wb_has_dirty_io(wb) && dirty_writeback_interval)
804 wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); 834 schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
805 schedule_timeout_interruptible(wait_jiffies); 835 else {
806 } else { 836 /*
807 set_current_state(TASK_INTERRUPTIBLE); 837 * We have nothing to do, so can go sleep without any
808 if (list_empty_careful(&wb->bdi->work_list) && 838 * timeout and save power. When a work is queued or
809 !kthread_should_stop()) 839 * something is made dirty - we will be woken up.
810 schedule(); 840 */
811 __set_current_state(TASK_RUNNING); 841 schedule();
812 } 842 }
813 843
814 try_to_freeze(); 844 try_to_freeze();
815 } 845 }
816 846
847 /* Flush any work that raced with us exiting */
848 if (!list_empty(&bdi->work_list))
849 wb_do_writeback(wb, 1);
850
851 trace_writeback_thread_stop(bdi);
817 return 0; 852 return 0;
818} 853}
819 854
855
820/* 856/*
821 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back 857 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
822 * the whole world. 858 * the whole world.
@@ -891,6 +927,8 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
891void __mark_inode_dirty(struct inode *inode, int flags) 927void __mark_inode_dirty(struct inode *inode, int flags)
892{ 928{
893 struct super_block *sb = inode->i_sb; 929 struct super_block *sb = inode->i_sb;
930 struct backing_dev_info *bdi = NULL;
931 bool wakeup_bdi = false;
894 932
895 /* 933 /*
896 * Don't do this for I_DIRTY_PAGES - that doesn't actually 934 * Don't do this for I_DIRTY_PAGES - that doesn't actually
@@ -936,7 +974,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
936 if (hlist_unhashed(&inode->i_hash)) 974 if (hlist_unhashed(&inode->i_hash))
937 goto out; 975 goto out;
938 } 976 }
939 if (inode->i_state & (I_FREEING|I_CLEAR)) 977 if (inode->i_state & I_FREEING)
940 goto out; 978 goto out;
941 979
942 /* 980 /*
@@ -944,22 +982,31 @@ void __mark_inode_dirty(struct inode *inode, int flags)
944 * reposition it (that would break b_dirty time-ordering). 982 * reposition it (that would break b_dirty time-ordering).
945 */ 983 */
946 if (!was_dirty) { 984 if (!was_dirty) {
947 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 985 bdi = inode_to_bdi(inode);
948 struct backing_dev_info *bdi = wb->bdi; 986
949 987 if (bdi_cap_writeback_dirty(bdi)) {
950 if (bdi_cap_writeback_dirty(bdi) && 988 WARN(!test_bit(BDI_registered, &bdi->state),
951 !test_bit(BDI_registered, &bdi->state)) { 989 "bdi-%s not registered\n", bdi->name);
952 WARN_ON(1); 990
953 printk(KERN_ERR "bdi-%s not registered\n", 991 /*
954 bdi->name); 992 * If this is the first dirty inode for this
993 * bdi, we have to wake-up the corresponding
994 * bdi thread to make sure background
995 * write-back happens later.
996 */
997 if (!wb_has_dirty_io(&bdi->wb))
998 wakeup_bdi = true;
955 } 999 }
956 1000
957 inode->dirtied_when = jiffies; 1001 inode->dirtied_when = jiffies;
958 list_move(&inode->i_list, &wb->b_dirty); 1002 list_move(&inode->i_list, &bdi->wb.b_dirty);
959 } 1003 }
960 } 1004 }
961out: 1005out:
962 spin_unlock(&inode_lock); 1006 spin_unlock(&inode_lock);
1007
1008 if (wakeup_bdi)
1009 bdi_wakeup_thread_delayed(bdi);
963} 1010}
964EXPORT_SYMBOL(__mark_inode_dirty); 1011EXPORT_SYMBOL(__mark_inode_dirty);
965 1012
@@ -1002,7 +1049,7 @@ static void wait_sb_inodes(struct super_block *sb)
1002 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1049 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
1003 struct address_space *mapping; 1050 struct address_space *mapping;
1004 1051
1005 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 1052 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
1006 continue; 1053 continue;
1007 mapping = inode->i_mapping; 1054 mapping = inode->i_mapping;
1008 if (mapping->nrpages == 0) 1055 if (mapping->nrpages == 0)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 431be0795b6b..c9627c95482d 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1270,21 +1270,18 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
1270 if (!fuse_allow_task(fc, current)) 1270 if (!fuse_allow_task(fc, current))
1271 return -EACCES; 1271 return -EACCES;
1272 1272
1273 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { 1273 if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
1274 err = inode_change_ok(inode, attr); 1274 attr->ia_valid |= ATTR_FORCE;
1275 if (err) 1275
1276 return err; 1276 err = inode_change_ok(inode, attr);
1277 } 1277 if (err)
1278 return err;
1278 1279
1279 if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc) 1280 if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
1280 return 0; 1281 return 0;
1281 1282
1282 if (attr->ia_valid & ATTR_SIZE) { 1283 if (attr->ia_valid & ATTR_SIZE)
1283 err = inode_newsize_ok(inode, attr->ia_size);
1284 if (err)
1285 return err;
1286 is_truncate = true; 1284 is_truncate = true;
1287 }
1288 1285
1289 req = fuse_get_req(fc); 1286 req = fuse_get_req(fc);
1290 if (IS_ERR(req)) 1287 if (IS_ERR(req))
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ec14d19ce501..da9e6e11374c 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -122,8 +122,10 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
122 fuse_request_send_noreply(fc, req); 122 fuse_request_send_noreply(fc, req);
123} 123}
124 124
125static void fuse_clear_inode(struct inode *inode) 125static void fuse_evict_inode(struct inode *inode)
126{ 126{
127 truncate_inode_pages(&inode->i_data, 0);
128 end_writeback(inode);
127 if (inode->i_sb->s_flags & MS_ACTIVE) { 129 if (inode->i_sb->s_flags & MS_ACTIVE) {
128 struct fuse_conn *fc = get_fuse_conn(inode); 130 struct fuse_conn *fc = get_fuse_conn(inode);
129 struct fuse_inode *fi = get_fuse_inode(inode); 131 struct fuse_inode *fi = get_fuse_inode(inode);
@@ -736,7 +738,7 @@ static const struct export_operations fuse_export_operations = {
736static const struct super_operations fuse_super_operations = { 738static const struct super_operations fuse_super_operations = {
737 .alloc_inode = fuse_alloc_inode, 739 .alloc_inode = fuse_alloc_inode,
738 .destroy_inode = fuse_destroy_inode, 740 .destroy_inode = fuse_destroy_inode,
739 .clear_inode = fuse_clear_inode, 741 .evict_inode = fuse_evict_inode,
740 .drop_inode = generic_delete_inode, 742 .drop_inode = generic_delete_inode,
741 .remount_fs = fuse_remount_fs, 743 .remount_fs = fuse_remount_fs,
742 .put_super = fuse_put_super, 744 .put_super = fuse_put_super,
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 5e96cbd8a454..194fe16d8418 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -697,12 +697,12 @@ out:
697 page_cache_release(page); 697 page_cache_release(page);
698 698
699 /* 699 /*
700 * XXX(hch): the call below should probably be replaced with 700 * XXX(truncate): the call below should probably be replaced with
701 * a call to the gfs2-specific truncate blocks helper to actually 701 * a call to the gfs2-specific truncate blocks helper to actually
702 * release disk blocks.. 702 * release disk blocks..
703 */ 703 */
704 if (pos + len > ip->i_inode.i_size) 704 if (pos + len > ip->i_inode.i_size)
705 simple_setsize(&ip->i_inode, ip->i_inode.i_size); 705 truncate_setsize(&ip->i_inode, ip->i_inode.i_size);
706out_endtrans: 706out_endtrans:
707 gfs2_trans_end(sdp); 707 gfs2_trans_end(sdp);
708out_trans_fail: 708out_trans_fail:
@@ -1042,9 +1042,9 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
1042 if (rv != 1) 1042 if (rv != 1)
1043 goto out; /* dio not valid, fall back to buffered i/o */ 1043 goto out; /* dio not valid, fall back to buffered i/o */
1044 1044
1045 rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev, 1045 rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
1046 iov, offset, nr_segs, 1046 offset, nr_segs, gfs2_get_block_direct,
1047 gfs2_get_block_direct, NULL); 1047 NULL, NULL, 0);
1048out: 1048out:
1049 gfs2_glock_dq_m(1, &gh); 1049 gfs2_glock_dq_m(1, &gh);
1050 gfs2_holder_uninit(&gh); 1050 gfs2_holder_uninit(&gh);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index f03afd9c44bc..08140f185a37 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -84,7 +84,7 @@ static int iget_skip_test(struct inode *inode, void *opaque)
84 struct gfs2_skip_data *data = opaque; 84 struct gfs2_skip_data *data = opaque;
85 85
86 if (ip->i_no_addr == data->no_addr) { 86 if (ip->i_no_addr == data->no_addr) {
87 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ 87 if (inode->i_state & (I_FREEING|I_WILL_FREE)){
88 data->skipped = 1; 88 data->skipped = 1;
89 return 0; 89 return 0;
90 } 90 }
@@ -991,18 +991,29 @@ fail:
991 991
992static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 992static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
993{ 993{
994 struct inode *inode = &ip->i_inode;
994 struct buffer_head *dibh; 995 struct buffer_head *dibh;
995 int error; 996 int error;
996 997
997 error = gfs2_meta_inode_buffer(ip, &dibh); 998 error = gfs2_meta_inode_buffer(ip, &dibh);
998 if (!error) { 999 if (error)
999 error = inode_setattr(&ip->i_inode, attr); 1000 return error;
1000 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); 1001
1001 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1002 if ((attr->ia_valid & ATTR_SIZE) &&
1002 gfs2_dinode_out(ip, dibh->b_data); 1003 attr->ia_size != i_size_read(inode)) {
1003 brelse(dibh); 1004 error = vmtruncate(inode, attr->ia_size);
1005 if (error)
1006 return error;
1004 } 1007 }
1005 return error; 1008
1009 setattr_copy(inode, attr);
1010 mark_inode_dirty(inode);
1011
1012 gfs2_assert_warn(GFS2_SB(inode), !error);
1013 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1014 gfs2_dinode_out(ip, dibh->b_data);
1015 brelse(dibh);
1016 return 0;
1006} 1017}
1007 1018
1008/** 1019/**
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6a857e24f947..cde1248a6225 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -595,7 +595,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
595 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) 595 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
596 goto skip_barrier; 596 goto skip_barrier;
597 get_bh(bh); 597 get_bh(bh);
598 submit_bh(WRITE_SYNC | (1 << BIO_RW_BARRIER) | (1 << BIO_RW_META), bh); 598 submit_bh(WRITE_BARRIER | REQ_META, bh);
599 wait_on_buffer(bh); 599 wait_on_buffer(bh);
600 if (buffer_eopnotsupp(bh)) { 600 if (buffer_eopnotsupp(bh)) {
601 clear_buffer_eopnotsupp(bh); 601 clear_buffer_eopnotsupp(bh);
@@ -605,7 +605,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
605 lock_buffer(bh); 605 lock_buffer(bh);
606skip_barrier: 606skip_barrier:
607 get_bh(bh); 607 get_bh(bh);
608 submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh); 608 submit_bh(WRITE_SYNC | REQ_META, bh);
609 wait_on_buffer(bh); 609 wait_on_buffer(bh);
610 } 610 }
611 if (!buffer_uptodate(bh)) 611 if (!buffer_uptodate(bh))
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 18176d0b75d7..f3b071f921aa 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -36,8 +36,8 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
36{ 36{
37 struct buffer_head *bh, *head; 37 struct buffer_head *bh, *head;
38 int nr_underway = 0; 38 int nr_underway = 0;
39 int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ? 39 int write_op = REQ_META |
40 WRITE_SYNC_PLUG : WRITE)); 40 (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE);
41 41
42 BUG_ON(!PageLocked(page)); 42 BUG_ON(!PageLocked(page));
43 BUG_ON(!page_has_buffers(page)); 43 BUG_ON(!page_has_buffers(page));
@@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
225 } 225 }
226 bh->b_end_io = end_buffer_read_sync; 226 bh->b_end_io = end_buffer_read_sync;
227 get_bh(bh); 227 get_bh(bh);
228 submit_bh(READ_SYNC | (1 << BIO_RW_META), bh); 228 submit_bh(READ_SYNC | REQ_META, bh);
229 if (!(flags & DIO_WAIT)) 229 if (!(flags & DIO_WAIT))
230 return 0; 230 return 0;
231 231
@@ -432,7 +432,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
432 if (buffer_uptodate(first_bh)) 432 if (buffer_uptodate(first_bh))
433 goto out; 433 goto out;
434 if (!buffer_locked(first_bh)) 434 if (!buffer_locked(first_bh))
435 ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh); 435 ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh);
436 436
437 dblock++; 437 dblock++;
438 extlen--; 438 extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 4f44bdeb2f03..4d4b1e8ac64c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -274,7 +274,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
274 274
275 bio->bi_end_io = end_bio_io_page; 275 bio->bi_end_io = end_bio_io_page;
276 bio->bi_private = page; 276 bio->bi_private = page;
277 submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); 277 submit_bio(READ_SYNC | REQ_META, bio);
278 wait_on_page_locked(page); 278 wait_on_page_locked(page);
279 bio_put(bio); 279 bio_put(bio);
280 if (!PageUptodate(page)) { 280 if (!PageUptodate(page)) {
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 98cdd05f3316..1009be2c9737 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1072,7 +1072,7 @@ int gfs2_permission(struct inode *inode, int mask)
1072} 1072}
1073 1073
1074/* 1074/*
1075 * XXX: should be changed to have proper ordering by opencoding simple_setsize 1075 * XXX(truncate): the truncate_setsize calls should be moved to the end.
1076 */ 1076 */
1077static int setattr_size(struct inode *inode, struct iattr *attr) 1077static int setattr_size(struct inode *inode, struct iattr *attr)
1078{ 1078{
@@ -1084,10 +1084,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
1084 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); 1084 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
1085 if (error) 1085 if (error)
1086 return error; 1086 return error;
1087 error = simple_setsize(inode, attr->ia_size); 1087 truncate_setsize(inode, attr->ia_size);
1088 gfs2_trans_end(sdp); 1088 gfs2_trans_end(sdp);
1089 if (error)
1090 return error;
1091 } 1089 }
1092 1090
1093 error = gfs2_truncatei(ip, attr->ia_size); 1091 error = gfs2_truncatei(ip, attr->ia_size);
@@ -1136,8 +1134,16 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1136 if (error) 1134 if (error)
1137 goto out_end_trans; 1135 goto out_end_trans;
1138 1136
1139 error = inode_setattr(inode, attr); 1137 if ((attr->ia_valid & ATTR_SIZE) &&
1140 gfs2_assert_warn(sdp, !error); 1138 attr->ia_size != i_size_read(inode)) {
1139 int error;
1140
1141 error = vmtruncate(inode, attr->ia_size);
1142 gfs2_assert_warn(sdp, !error);
1143 }
1144
1145 setattr_copy(inode, attr);
1146 mark_inode_dirty(inode);
1141 1147
1142 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1148 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1143 gfs2_dinode_out(ip, dibh->b_data); 1149 gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4140811a921c..77cb9f830ee4 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1188,7 +1188,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
1188 * node for later deallocation. 1188 * node for later deallocation.
1189 */ 1189 */
1190 1190
1191static void gfs2_drop_inode(struct inode *inode) 1191static int gfs2_drop_inode(struct inode *inode)
1192{ 1192{
1193 struct gfs2_inode *ip = GFS2_I(inode); 1193 struct gfs2_inode *ip = GFS2_I(inode);
1194 1194
@@ -1197,26 +1197,7 @@ static void gfs2_drop_inode(struct inode *inode)
1197 if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) 1197 if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags))
1198 clear_nlink(inode); 1198 clear_nlink(inode);
1199 } 1199 }
1200 generic_drop_inode(inode); 1200 return generic_drop_inode(inode);
1201}
1202
1203/**
1204 * gfs2_clear_inode - Deallocate an inode when VFS is done with it
1205 * @inode: The VFS inode
1206 *
1207 */
1208
1209static void gfs2_clear_inode(struct inode *inode)
1210{
1211 struct gfs2_inode *ip = GFS2_I(inode);
1212
1213 ip->i_gl->gl_object = NULL;
1214 gfs2_glock_put(ip->i_gl);
1215 ip->i_gl = NULL;
1216 if (ip->i_iopen_gh.gh_gl) {
1217 ip->i_iopen_gh.gh_gl->gl_object = NULL;
1218 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1219 }
1220} 1201}
1221 1202
1222static int is_ancestor(const struct dentry *d1, const struct dentry *d2) 1203static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
@@ -1344,13 +1325,16 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1344 * is safe, just less efficient. 1325 * is safe, just less efficient.
1345 */ 1326 */
1346 1327
1347static void gfs2_delete_inode(struct inode *inode) 1328static void gfs2_evict_inode(struct inode *inode)
1348{ 1329{
1349 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; 1330 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
1350 struct gfs2_inode *ip = GFS2_I(inode); 1331 struct gfs2_inode *ip = GFS2_I(inode);
1351 struct gfs2_holder gh; 1332 struct gfs2_holder gh;
1352 int error; 1333 int error;
1353 1334
1335 if (inode->i_nlink)
1336 goto out;
1337
1354 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 1338 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1355 if (unlikely(error)) { 1339 if (unlikely(error)) {
1356 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 1340 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
@@ -1404,10 +1388,18 @@ out_unlock:
1404 gfs2_holder_uninit(&ip->i_iopen_gh); 1388 gfs2_holder_uninit(&ip->i_iopen_gh);
1405 gfs2_glock_dq_uninit(&gh); 1389 gfs2_glock_dq_uninit(&gh);
1406 if (error && error != GLR_TRYFAILED && error != -EROFS) 1390 if (error && error != GLR_TRYFAILED && error != -EROFS)
1407 fs_warn(sdp, "gfs2_delete_inode: %d\n", error); 1391 fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
1408out: 1392out:
1409 truncate_inode_pages(&inode->i_data, 0); 1393 truncate_inode_pages(&inode->i_data, 0);
1410 clear_inode(inode); 1394 end_writeback(inode);
1395
1396 ip->i_gl->gl_object = NULL;
1397 gfs2_glock_put(ip->i_gl);
1398 ip->i_gl = NULL;
1399 if (ip->i_iopen_gh.gh_gl) {
1400 ip->i_iopen_gh.gh_gl->gl_object = NULL;
1401 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1402 }
1411} 1403}
1412 1404
1413static struct inode *gfs2_alloc_inode(struct super_block *sb) 1405static struct inode *gfs2_alloc_inode(struct super_block *sb)
@@ -1431,14 +1423,13 @@ const struct super_operations gfs2_super_ops = {
1431 .alloc_inode = gfs2_alloc_inode, 1423 .alloc_inode = gfs2_alloc_inode,
1432 .destroy_inode = gfs2_destroy_inode, 1424 .destroy_inode = gfs2_destroy_inode,
1433 .write_inode = gfs2_write_inode, 1425 .write_inode = gfs2_write_inode,
1434 .delete_inode = gfs2_delete_inode, 1426 .evict_inode = gfs2_evict_inode,
1435 .put_super = gfs2_put_super, 1427 .put_super = gfs2_put_super,
1436 .sync_fs = gfs2_sync_fs, 1428 .sync_fs = gfs2_sync_fs,
1437 .freeze_fs = gfs2_freeze, 1429 .freeze_fs = gfs2_freeze,
1438 .unfreeze_fs = gfs2_unfreeze, 1430 .unfreeze_fs = gfs2_unfreeze,
1439 .statfs = gfs2_statfs, 1431 .statfs = gfs2_statfs,
1440 .remount_fs = gfs2_remount_fs, 1432 .remount_fs = gfs2_remount_fs,
1441 .clear_inode = gfs2_clear_inode,
1442 .drop_inode = gfs2_drop_inode, 1433 .drop_inode = gfs2_drop_inode,
1443 .show_options = gfs2_show_options, 1434 .show_options = gfs2_show_options,
1444}; 1435};
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 82f93da00d1b..776af6eb4bcb 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1296,6 +1296,7 @@ fail:
1296 1296
1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) 1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1298{ 1298{
1299 struct inode *inode = &ip->i_inode;
1299 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1300 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1300 struct gfs2_ea_location el; 1301 struct gfs2_ea_location el;
1301 struct buffer_head *dibh; 1302 struct buffer_head *dibh;
@@ -1321,14 +1322,25 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1321 return error; 1322 return error;
1322 1323
1323 error = gfs2_meta_inode_buffer(ip, &dibh); 1324 error = gfs2_meta_inode_buffer(ip, &dibh);
1324 if (!error) { 1325 if (error)
1325 error = inode_setattr(&ip->i_inode, attr); 1326 goto out_trans_end;
1326 gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); 1327
1327 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1328 if ((attr->ia_valid & ATTR_SIZE) &&
1328 gfs2_dinode_out(ip, dibh->b_data); 1329 attr->ia_size != i_size_read(inode)) {
1329 brelse(dibh); 1330 int error;
1331
1332 error = vmtruncate(inode, attr->ia_size);
1333 gfs2_assert_warn(GFS2_SB(inode), !error);
1330 } 1334 }
1331 1335
1336 setattr_copy(inode, attr);
1337 mark_inode_dirty(inode);
1338
1339 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1340 gfs2_dinode_out(ip, dibh->b_data);
1341 brelse(dibh);
1342
1343out_trans_end:
1332 gfs2_trans_end(sdp); 1344 gfs2_trans_end(sdp);
1333 return error; 1345 return error;
1334} 1346}
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index fe35e3b626c4..4f55651aaa51 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -193,7 +193,7 @@ extern int hfs_inode_setattr(struct dentry *, struct iattr *);
193extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, 193extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
194 __be32 log_size, __be32 phys_size, u32 clump_size); 194 __be32 log_size, __be32 phys_size, u32 clump_size);
195extern struct inode *hfs_iget(struct super_block *, struct hfs_cat_key *, hfs_cat_rec *); 195extern struct inode *hfs_iget(struct super_block *, struct hfs_cat_key *, hfs_cat_rec *);
196extern void hfs_clear_inode(struct inode *); 196extern void hfs_evict_inode(struct inode *);
197extern void hfs_delete_inode(struct inode *); 197extern void hfs_delete_inode(struct inode *);
198 198
199/* attr.c */ 199/* attr.c */
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 14f5cb1b9fdc..397b7adc7ce6 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -39,10 +39,19 @@ static int hfs_write_begin(struct file *file, struct address_space *mapping,
39 loff_t pos, unsigned len, unsigned flags, 39 loff_t pos, unsigned len, unsigned flags,
40 struct page **pagep, void **fsdata) 40 struct page **pagep, void **fsdata)
41{ 41{
42 int ret;
43
42 *pagep = NULL; 44 *pagep = NULL;
43 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 45 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
44 hfs_get_block, 46 hfs_get_block,
45 &HFS_I(mapping->host)->phys_size); 47 &HFS_I(mapping->host)->phys_size);
48 if (unlikely(ret)) {
49 loff_t isize = mapping->host->i_size;
50 if (pos + len > isize)
51 vmtruncate(mapping->host, isize);
52 }
53
54 return ret;
46} 55}
47 56
48static sector_t hfs_bmap(struct address_space *mapping, sector_t block) 57static sector_t hfs_bmap(struct address_space *mapping, sector_t block)
@@ -112,9 +121,24 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
112{ 121{
113 struct file *file = iocb->ki_filp; 122 struct file *file = iocb->ki_filp;
114 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; 123 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
124 ssize_t ret;
115 125
116 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 126 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
117 offset, nr_segs, hfs_get_block, NULL); 127 offset, nr_segs, hfs_get_block, NULL);
128
129 /*
130 * In case of error extending write may have instantiated a few
131 * blocks outside i_size. Trim these off again.
132 */
133 if (unlikely((rw & WRITE) && ret < 0)) {
134 loff_t isize = i_size_read(inode);
135 loff_t end = offset + iov_length(iov, nr_segs);
136
137 if (end > isize)
138 vmtruncate(inode, isize);
139 }
140
141 return ret;
118} 142}
119 143
120static int hfs_writepages(struct address_space *mapping, 144static int hfs_writepages(struct address_space *mapping,
@@ -507,8 +531,10 @@ out:
507 return NULL; 531 return NULL;
508} 532}
509 533
510void hfs_clear_inode(struct inode *inode) 534void hfs_evict_inode(struct inode *inode)
511{ 535{
536 truncate_inode_pages(&inode->i_data, 0);
537 end_writeback(inode);
512 if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { 538 if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) {
513 HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; 539 HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
514 iput(HFS_I(inode)->rsrc_inode); 540 iput(HFS_I(inode)->rsrc_inode);
@@ -588,13 +614,43 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
588 attr->ia_mode = inode->i_mode & ~S_IWUGO; 614 attr->ia_mode = inode->i_mode & ~S_IWUGO;
589 attr->ia_mode &= S_ISDIR(inode->i_mode) ? ~hsb->s_dir_umask: ~hsb->s_file_umask; 615 attr->ia_mode &= S_ISDIR(inode->i_mode) ? ~hsb->s_dir_umask: ~hsb->s_file_umask;
590 } 616 }
591 error = inode_setattr(inode, attr);
592 if (error)
593 return error;
594 617
618 if ((attr->ia_valid & ATTR_SIZE) &&
619 attr->ia_size != i_size_read(inode)) {
620 error = vmtruncate(inode, attr->ia_size);
621 if (error)
622 return error;
623 }
624
625 setattr_copy(inode, attr);
626 mark_inode_dirty(inode);
595 return 0; 627 return 0;
596} 628}
597 629
630static int hfs_file_fsync(struct file *filp, int datasync)
631{
632 struct inode *inode = filp->f_mapping->host;
633 struct super_block * sb;
634 int ret, err;
635
636 /* sync the inode to buffers */
637 ret = write_inode_now(inode, 0);
638
639 /* sync the superblock to buffers */
640 sb = inode->i_sb;
641 if (sb->s_dirt) {
642 lock_super(sb);
643 sb->s_dirt = 0;
644 if (!(sb->s_flags & MS_RDONLY))
645 hfs_mdb_commit(sb);
646 unlock_super(sb);
647 }
648 /* .. finally sync the buffers to disk */
649 err = sync_blockdev(sb->s_bdev);
650 if (!ret)
651 ret = err;
652 return ret;
653}
598 654
599static const struct file_operations hfs_file_operations = { 655static const struct file_operations hfs_file_operations = {
600 .llseek = generic_file_llseek, 656 .llseek = generic_file_llseek,
@@ -604,7 +660,7 @@ static const struct file_operations hfs_file_operations = {
604 .aio_write = generic_file_aio_write, 660 .aio_write = generic_file_aio_write,
605 .mmap = generic_file_mmap, 661 .mmap = generic_file_mmap,
606 .splice_read = generic_file_splice_read, 662 .splice_read = generic_file_splice_read,
607 .fsync = file_fsync, 663 .fsync = hfs_file_fsync,
608 .open = hfs_file_open, 664 .open = hfs_file_open,
609 .release = hfs_file_release, 665 .release = hfs_file_release,
610}; 666};
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 0a81eb7111f3..34235d4bf08b 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -181,7 +181,7 @@ static const struct super_operations hfs_super_operations = {
181 .alloc_inode = hfs_alloc_inode, 181 .alloc_inode = hfs_alloc_inode,
182 .destroy_inode = hfs_destroy_inode, 182 .destroy_inode = hfs_destroy_inode,
183 .write_inode = hfs_write_inode, 183 .write_inode = hfs_write_inode,
184 .clear_inode = hfs_clear_inode, 184 .evict_inode = hfs_evict_inode,
185 .put_super = hfs_put_super, 185 .put_super = hfs_put_super,
186 .write_super = hfs_write_super, 186 .write_super = hfs_write_super,
187 .sync_fs = hfs_sync_fs, 187 .sync_fs = hfs_sync_fs,
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 6505c30ad965..dc856be3c2b0 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -351,6 +351,7 @@ int hfsplus_show_options(struct seq_file *, struct vfsmount *);
351 351
352/* super.c */ 352/* super.c */
353struct inode *hfsplus_iget(struct super_block *, unsigned long); 353struct inode *hfsplus_iget(struct super_block *, unsigned long);
354int hfsplus_sync_fs(struct super_block *sb, int wait);
354 355
355/* tables.c */ 356/* tables.c */
356extern u16 hfsplus_case_fold_table[]; 357extern u16 hfsplus_case_fold_table[];
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 9bbb82924a22..c5a979d62c65 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -31,10 +31,19 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping,
31 loff_t pos, unsigned len, unsigned flags, 31 loff_t pos, unsigned len, unsigned flags,
32 struct page **pagep, void **fsdata) 32 struct page **pagep, void **fsdata)
33{ 33{
34 int ret;
35
34 *pagep = NULL; 36 *pagep = NULL;
35 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 37 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
36 hfsplus_get_block, 38 hfsplus_get_block,
37 &HFSPLUS_I(mapping->host).phys_size); 39 &HFSPLUS_I(mapping->host).phys_size);
40 if (unlikely(ret)) {
41 loff_t isize = mapping->host->i_size;
42 if (pos + len > isize)
43 vmtruncate(mapping->host, isize);
44 }
45
46 return ret;
38} 47}
39 48
40static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block) 49static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block)
@@ -105,9 +114,24 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
105{ 114{
106 struct file *file = iocb->ki_filp; 115 struct file *file = iocb->ki_filp;
107 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; 116 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
117 ssize_t ret;
108 118
109 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 119 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
110 offset, nr_segs, hfsplus_get_block, NULL); 120 offset, nr_segs, hfsplus_get_block, NULL);
121
122 /*
123 * In case of error extending write may have instantiated a few
124 * blocks outside i_size. Trim these off again.
125 */
126 if (unlikely((rw & WRITE) && ret < 0)) {
127 loff_t isize = i_size_read(inode);
128 loff_t end = offset + iov_length(iov, nr_segs);
129
130 if (end > isize)
131 vmtruncate(inode, isize);
132 }
133
134 return ret;
111} 135}
112 136
113static int hfsplus_writepages(struct address_space *mapping, 137static int hfsplus_writepages(struct address_space *mapping,
@@ -266,9 +290,56 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
266 return 0; 290 return 0;
267} 291}
268 292
293static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
294{
295 struct inode *inode = dentry->d_inode;
296 int error;
297
298 error = inode_change_ok(inode, attr);
299 if (error)
300 return error;
301
302 if ((attr->ia_valid & ATTR_SIZE) &&
303 attr->ia_size != i_size_read(inode)) {
304 error = vmtruncate(inode, attr->ia_size);
305 if (error)
306 return error;
307 }
308
309 setattr_copy(inode, attr);
310 mark_inode_dirty(inode);
311 return 0;
312}
313
314static int hfsplus_file_fsync(struct file *filp, int datasync)
315{
316 struct inode *inode = filp->f_mapping->host;
317 struct super_block * sb;
318 int ret, err;
319
320 /* sync the inode to buffers */
321 ret = write_inode_now(inode, 0);
322
323 /* sync the superblock to buffers */
324 sb = inode->i_sb;
325 if (sb->s_dirt) {
326 if (!(sb->s_flags & MS_RDONLY))
327 hfsplus_sync_fs(sb, 1);
328 else
329 sb->s_dirt = 0;
330 }
331
332 /* .. finally sync the buffers to disk */
333 err = sync_blockdev(sb->s_bdev);
334 if (!ret)
335 ret = err;
336 return ret;
337}
338
269static const struct inode_operations hfsplus_file_inode_operations = { 339static const struct inode_operations hfsplus_file_inode_operations = {
270 .lookup = hfsplus_file_lookup, 340 .lookup = hfsplus_file_lookup,
271 .truncate = hfsplus_file_truncate, 341 .truncate = hfsplus_file_truncate,
342 .setattr = hfsplus_setattr,
272 .setxattr = hfsplus_setxattr, 343 .setxattr = hfsplus_setxattr,
273 .getxattr = hfsplus_getxattr, 344 .getxattr = hfsplus_getxattr,
274 .listxattr = hfsplus_listxattr, 345 .listxattr = hfsplus_listxattr,
@@ -282,7 +353,7 @@ static const struct file_operations hfsplus_file_operations = {
282 .aio_write = generic_file_aio_write, 353 .aio_write = generic_file_aio_write,
283 .mmap = generic_file_mmap, 354 .mmap = generic_file_mmap,
284 .splice_read = generic_file_splice_read, 355 .splice_read = generic_file_splice_read,
285 .fsync = file_fsync, 356 .fsync = hfsplus_file_fsync,
286 .open = hfsplus_file_open, 357 .open = hfsplus_file_open,
287 .release = hfsplus_file_release, 358 .release = hfsplus_file_release,
288 .unlocked_ioctl = hfsplus_ioctl, 359 .unlocked_ioctl = hfsplus_ioctl,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 74b473a8ef92..3b55c050c742 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -145,16 +145,18 @@ static int hfsplus_write_inode(struct inode *inode,
145 return ret; 145 return ret;
146} 146}
147 147
148static void hfsplus_clear_inode(struct inode *inode) 148static void hfsplus_evict_inode(struct inode *inode)
149{ 149{
150 dprint(DBG_INODE, "hfsplus_clear_inode: %lu\n", inode->i_ino); 150 dprint(DBG_INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino);
151 truncate_inode_pages(&inode->i_data, 0);
152 end_writeback(inode);
151 if (HFSPLUS_IS_RSRC(inode)) { 153 if (HFSPLUS_IS_RSRC(inode)) {
152 HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL; 154 HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL;
153 iput(HFSPLUS_I(inode).rsrc_inode); 155 iput(HFSPLUS_I(inode).rsrc_inode);
154 } 156 }
155} 157}
156 158
157static int hfsplus_sync_fs(struct super_block *sb, int wait) 159int hfsplus_sync_fs(struct super_block *sb, int wait)
158{ 160{
159 struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; 161 struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
160 162
@@ -293,7 +295,7 @@ static const struct super_operations hfsplus_sops = {
293 .alloc_inode = hfsplus_alloc_inode, 295 .alloc_inode = hfsplus_alloc_inode,
294 .destroy_inode = hfsplus_destroy_inode, 296 .destroy_inode = hfsplus_destroy_inode,
295 .write_inode = hfsplus_write_inode, 297 .write_inode = hfsplus_write_inode,
296 .clear_inode = hfsplus_clear_inode, 298 .evict_inode = hfsplus_evict_inode,
297 .put_super = hfsplus_put_super, 299 .put_super = hfsplus_put_super,
298 .write_super = hfsplus_write_super, 300 .write_super = hfsplus_write_super,
299 .sync_fs = hfsplus_sync_fs, 301 .sync_fs = hfsplus_sync_fs,
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 2f34f8f2134b..6bbd75c5589b 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -53,18 +53,28 @@ struct hostfs_iattr {
53 struct timespec ia_ctime; 53 struct timespec ia_ctime;
54}; 54};
55 55
56extern int stat_file(const char *path, unsigned long long *inode_out, 56struct hostfs_stat {
57 int *mode_out, int *nlink_out, int *uid_out, int *gid_out, 57 unsigned long long ino;
58 unsigned long long *size_out, struct timespec *atime_out, 58 unsigned int mode;
59 struct timespec *mtime_out, struct timespec *ctime_out, 59 unsigned int nlink;
60 int *blksize_out, unsigned long long *blocks_out, int fd); 60 unsigned int uid;
61 unsigned int gid;
62 unsigned long long size;
63 struct timespec atime, mtime, ctime;
64 unsigned int blksize;
65 unsigned long long blocks;
66 unsigned int maj;
67 unsigned int min;
68};
69
70extern int stat_file(const char *path, struct hostfs_stat *p, int fd);
61extern int access_file(char *path, int r, int w, int x); 71extern int access_file(char *path, int r, int w, int x);
62extern int open_file(char *path, int r, int w, int append); 72extern int open_file(char *path, int r, int w, int append);
63extern int file_type(const char *path, int *maj, int *min);
64extern void *open_dir(char *path, int *err_out); 73extern void *open_dir(char *path, int *err_out);
65extern char *read_dir(void *stream, unsigned long long *pos, 74extern char *read_dir(void *stream, unsigned long long *pos,
66 unsigned long long *ino_out, int *len_out); 75 unsigned long long *ino_out, int *len_out);
67extern void close_file(void *stream); 76extern void close_file(void *stream);
77extern int replace_file(int oldfd, int fd);
68extern void close_dir(void *stream); 78extern void close_dir(void *stream);
69extern int read_file(int fd, unsigned long long *offset, char *buf, int len); 79extern int read_file(int fd, unsigned long long *offset, char *buf, int len);
70extern int write_file(int fd, unsigned long long *offset, const char *buf, 80extern int write_file(int fd, unsigned long long *offset, const char *buf,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 87ac1891a185..dd1e55535a4e 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -14,12 +14,12 @@
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/seq_file.h> 15#include <linux/seq_file.h>
16#include <linux/mount.h> 16#include <linux/mount.h>
17#include <linux/namei.h>
17#include "hostfs.h" 18#include "hostfs.h"
18#include "init.h" 19#include "init.h"
19#include "kern.h" 20#include "kern.h"
20 21
21struct hostfs_inode_info { 22struct hostfs_inode_info {
22 char *host_filename;
23 int fd; 23 int fd;
24 fmode_t mode; 24 fmode_t mode;
25 struct inode vfs_inode; 25 struct inode vfs_inode;
@@ -49,7 +49,7 @@ static int append = 0;
49 49
50static const struct inode_operations hostfs_iops; 50static const struct inode_operations hostfs_iops;
51static const struct inode_operations hostfs_dir_iops; 51static const struct inode_operations hostfs_dir_iops;
52static const struct address_space_operations hostfs_link_aops; 52static const struct inode_operations hostfs_link_iops;
53 53
54#ifndef MODULE 54#ifndef MODULE
55static int __init hostfs_args(char *options, int *add) 55static int __init hostfs_args(char *options, int *add)
@@ -90,71 +90,58 @@ __uml_setup("hostfs=", hostfs_args,
90); 90);
91#endif 91#endif
92 92
93static char *dentry_name(struct dentry *dentry, int extra) 93static char *__dentry_name(struct dentry *dentry, char *name)
94{ 94{
95 struct dentry *parent; 95 char *p = __dentry_path(dentry, name, PATH_MAX);
96 char *root, *name; 96 char *root;
97 int len; 97 size_t len;
98
99 len = 0;
100 parent = dentry;
101 while (parent->d_parent != parent) {
102 len += parent->d_name.len + 1;
103 parent = parent->d_parent;
104 }
105 98
106 root = HOSTFS_I(parent->d_inode)->host_filename; 99 spin_unlock(&dcache_lock);
107 len += strlen(root);
108 name = kmalloc(len + extra + 1, GFP_KERNEL);
109 if (name == NULL)
110 return NULL;
111 100
112 name[len] = '\0'; 101 root = dentry->d_sb->s_fs_info;
113 parent = dentry; 102 len = strlen(root);
114 while (parent->d_parent != parent) { 103 if (IS_ERR(p)) {
115 len -= parent->d_name.len + 1; 104 __putname(name);
116 name[len] = '/'; 105 return NULL;
117 strncpy(&name[len + 1], parent->d_name.name, 106 }
118 parent->d_name.len); 107 strncpy(name, root, PATH_MAX);
119 parent = parent->d_parent; 108 if (len > p - name) {
109 __putname(name);
110 return NULL;
111 }
112 if (p > name + len) {
113 char *s = name + len;
114 while ((*s++ = *p++) != '\0')
115 ;
120 } 116 }
121 strncpy(name, root, strlen(root));
122 return name; 117 return name;
123} 118}
124 119
125static char *inode_name(struct inode *ino, int extra) 120static char *dentry_name(struct dentry *dentry)
126{ 121{
127 struct dentry *dentry; 122 char *name = __getname();
123 if (!name)
124 return NULL;
128 125
129 dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); 126 spin_lock(&dcache_lock);
130 return dentry_name(dentry, extra); 127 return __dentry_name(dentry, name); /* will unlock */
131} 128}
132 129
133static int read_name(struct inode *ino, char *name) 130static char *inode_name(struct inode *ino)
134{ 131{
135 /* 132 struct dentry *dentry;
136 * The non-int inode fields are copied into ints by stat_file and 133 char *name = __getname();
137 * then copied into the inode because passing the actual pointers 134 if (!name)
138 * in and having them treated as int * breaks on big-endian machines 135 return NULL;
139 */
140 int err;
141 int i_mode, i_nlink, i_blksize;
142 unsigned long long i_size;
143 unsigned long long i_ino;
144 unsigned long long i_blocks;
145
146 err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid,
147 &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime,
148 &ino->i_ctime, &i_blksize, &i_blocks, -1);
149 if (err)
150 return err;
151 136
152 ino->i_ino = i_ino; 137 spin_lock(&dcache_lock);
153 ino->i_mode = i_mode; 138 if (list_empty(&ino->i_dentry)) {
154 ino->i_nlink = i_nlink; 139 spin_unlock(&dcache_lock);
155 ino->i_size = i_size; 140 __putname(name);
156 ino->i_blocks = i_blocks; 141 return NULL;
157 return 0; 142 }
143 dentry = list_first_entry(&ino->i_dentry, struct dentry, d_alias);
144 return __dentry_name(dentry, name); /* will unlock */
158} 145}
159 146
160static char *follow_link(char *link) 147static char *follow_link(char *link)
@@ -205,53 +192,11 @@ static char *follow_link(char *link)
205 return ERR_PTR(n); 192 return ERR_PTR(n);
206} 193}
207 194
208static int hostfs_read_inode(struct inode *ino)
209{
210 char *name;
211 int err = 0;
212
213 /*
214 * Unfortunately, we are called from iget() when we don't have a dentry
215 * allocated yet.
216 */
217 if (list_empty(&ino->i_dentry))
218 goto out;
219
220 err = -ENOMEM;
221 name = inode_name(ino, 0);
222 if (name == NULL)
223 goto out;
224
225 if (file_type(name, NULL, NULL) == OS_TYPE_SYMLINK) {
226 name = follow_link(name);
227 if (IS_ERR(name)) {
228 err = PTR_ERR(name);
229 goto out;
230 }
231 }
232
233 err = read_name(ino, name);
234 kfree(name);
235 out:
236 return err;
237}
238
239static struct inode *hostfs_iget(struct super_block *sb) 195static struct inode *hostfs_iget(struct super_block *sb)
240{ 196{
241 struct inode *inode; 197 struct inode *inode = new_inode(sb);
242 long ret;
243
244 inode = iget_locked(sb, 0);
245 if (!inode) 198 if (!inode)
246 return ERR_PTR(-ENOMEM); 199 return ERR_PTR(-ENOMEM);
247 if (inode->i_state & I_NEW) {
248 ret = hostfs_read_inode(inode);
249 if (ret < 0) {
250 iget_failed(inode);
251 return ERR_PTR(ret);
252 }
253 unlock_new_inode(inode);
254 }
255 return inode; 200 return inode;
256} 201}
257 202
@@ -269,7 +214,7 @@ int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
269 long long f_files; 214 long long f_files;
270 long long f_ffree; 215 long long f_ffree;
271 216
272 err = do_statfs(HOSTFS_I(dentry->d_sb->s_root->d_inode)->host_filename, 217 err = do_statfs(dentry->d_sb->s_fs_info,
273 &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, 218 &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
274 &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), 219 &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
275 &sf->f_namelen, sf->f_spare); 220 &sf->f_namelen, sf->f_spare);
@@ -288,47 +233,32 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
288{ 233{
289 struct hostfs_inode_info *hi; 234 struct hostfs_inode_info *hi;
290 235
291 hi = kmalloc(sizeof(*hi), GFP_KERNEL); 236 hi = kzalloc(sizeof(*hi), GFP_KERNEL);
292 if (hi == NULL) 237 if (hi == NULL)
293 return NULL; 238 return NULL;
294 239 hi->fd = -1;
295 *hi = ((struct hostfs_inode_info) { .host_filename = NULL,
296 .fd = -1,
297 .mode = 0 });
298 inode_init_once(&hi->vfs_inode); 240 inode_init_once(&hi->vfs_inode);
299 return &hi->vfs_inode; 241 return &hi->vfs_inode;
300} 242}
301 243
302static void hostfs_delete_inode(struct inode *inode) 244static void hostfs_evict_inode(struct inode *inode)
303{ 245{
304 truncate_inode_pages(&inode->i_data, 0); 246 truncate_inode_pages(&inode->i_data, 0);
247 end_writeback(inode);
305 if (HOSTFS_I(inode)->fd != -1) { 248 if (HOSTFS_I(inode)->fd != -1) {
306 close_file(&HOSTFS_I(inode)->fd); 249 close_file(&HOSTFS_I(inode)->fd);
307 HOSTFS_I(inode)->fd = -1; 250 HOSTFS_I(inode)->fd = -1;
308 } 251 }
309 clear_inode(inode);
310} 252}
311 253
312static void hostfs_destroy_inode(struct inode *inode) 254static void hostfs_destroy_inode(struct inode *inode)
313{ 255{
314 kfree(HOSTFS_I(inode)->host_filename);
315
316 /*
317 * XXX: This should not happen, probably. The check is here for
318 * additional safety.
319 */
320 if (HOSTFS_I(inode)->fd != -1) {
321 close_file(&HOSTFS_I(inode)->fd);
322 printk(KERN_DEBUG "Closing host fd in .destroy_inode\n");
323 }
324
325 kfree(HOSTFS_I(inode)); 256 kfree(HOSTFS_I(inode));
326} 257}
327 258
328static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 259static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
329{ 260{
330 struct inode *root = vfs->mnt_sb->s_root->d_inode; 261 const char *root_path = vfs->mnt_sb->s_fs_info;
331 const char *root_path = HOSTFS_I(root)->host_filename;
332 size_t offset = strlen(root_ino) + 1; 262 size_t offset = strlen(root_ino) + 1;
333 263
334 if (strlen(root_path) > offset) 264 if (strlen(root_path) > offset)
@@ -339,9 +269,8 @@ static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
339 269
340static const struct super_operations hostfs_sbops = { 270static const struct super_operations hostfs_sbops = {
341 .alloc_inode = hostfs_alloc_inode, 271 .alloc_inode = hostfs_alloc_inode,
342 .drop_inode = generic_delete_inode,
343 .delete_inode = hostfs_delete_inode,
344 .destroy_inode = hostfs_destroy_inode, 272 .destroy_inode = hostfs_destroy_inode,
273 .evict_inode = hostfs_evict_inode,
345 .statfs = hostfs_statfs, 274 .statfs = hostfs_statfs,
346 .show_options = hostfs_show_options, 275 .show_options = hostfs_show_options,
347}; 276};
@@ -353,11 +282,11 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
353 unsigned long long next, ino; 282 unsigned long long next, ino;
354 int error, len; 283 int error, len;
355 284
356 name = dentry_name(file->f_path.dentry, 0); 285 name = dentry_name(file->f_path.dentry);
357 if (name == NULL) 286 if (name == NULL)
358 return -ENOMEM; 287 return -ENOMEM;
359 dir = open_dir(name, &error); 288 dir = open_dir(name, &error);
360 kfree(name); 289 __putname(name);
361 if (dir == NULL) 290 if (dir == NULL)
362 return -error; 291 return -error;
363 next = file->f_pos; 292 next = file->f_pos;
@@ -373,40 +302,59 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
373 302
374int hostfs_file_open(struct inode *ino, struct file *file) 303int hostfs_file_open(struct inode *ino, struct file *file)
375{ 304{
305 static DEFINE_MUTEX(open_mutex);
376 char *name; 306 char *name;
377 fmode_t mode = 0; 307 fmode_t mode = 0;
308 int err;
378 int r = 0, w = 0, fd; 309 int r = 0, w = 0, fd;
379 310
380 mode = file->f_mode & (FMODE_READ | FMODE_WRITE); 311 mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
381 if ((mode & HOSTFS_I(ino)->mode) == mode) 312 if ((mode & HOSTFS_I(ino)->mode) == mode)
382 return 0; 313 return 0;
383 314
384 /* 315 mode |= HOSTFS_I(ino)->mode;
385 * The file may already have been opened, but with the wrong access,
386 * so this resets things and reopens the file with the new access.
387 */
388 if (HOSTFS_I(ino)->fd != -1) {
389 close_file(&HOSTFS_I(ino)->fd);
390 HOSTFS_I(ino)->fd = -1;
391 }
392 316
393 HOSTFS_I(ino)->mode |= mode; 317retry:
394 if (HOSTFS_I(ino)->mode & FMODE_READ) 318 if (mode & FMODE_READ)
395 r = 1; 319 r = 1;
396 if (HOSTFS_I(ino)->mode & FMODE_WRITE) 320 if (mode & FMODE_WRITE)
397 w = 1; 321 w = 1;
398 if (w) 322 if (w)
399 r = 1; 323 r = 1;
400 324
401 name = dentry_name(file->f_path.dentry, 0); 325 name = dentry_name(file->f_path.dentry);
402 if (name == NULL) 326 if (name == NULL)
403 return -ENOMEM; 327 return -ENOMEM;
404 328
405 fd = open_file(name, r, w, append); 329 fd = open_file(name, r, w, append);
406 kfree(name); 330 __putname(name);
407 if (fd < 0) 331 if (fd < 0)
408 return fd; 332 return fd;
409 FILE_HOSTFS_I(file)->fd = fd; 333
334 mutex_lock(&open_mutex);
335 /* somebody else had handled it first? */
336 if ((mode & HOSTFS_I(ino)->mode) == mode) {
337 mutex_unlock(&open_mutex);
338 return 0;
339 }
340 if ((mode | HOSTFS_I(ino)->mode) != mode) {
341 mode |= HOSTFS_I(ino)->mode;
342 mutex_unlock(&open_mutex);
343 close_file(&fd);
344 goto retry;
345 }
346 if (HOSTFS_I(ino)->fd == -1) {
347 HOSTFS_I(ino)->fd = fd;
348 } else {
349 err = replace_file(fd, HOSTFS_I(ino)->fd);
350 close_file(&fd);
351 if (err < 0) {
352 mutex_unlock(&open_mutex);
353 return err;
354 }
355 }
356 HOSTFS_I(ino)->mode = mode;
357 mutex_unlock(&open_mutex);
410 358
411 return 0; 359 return 0;
412} 360}
@@ -544,54 +492,50 @@ static const struct address_space_operations hostfs_aops = {
544 .write_end = hostfs_write_end, 492 .write_end = hostfs_write_end,
545}; 493};
546 494
547static int init_inode(struct inode *inode, struct dentry *dentry) 495static int read_name(struct inode *ino, char *name)
548{ 496{
549 char *name; 497 dev_t rdev;
550 int type, err = -ENOMEM; 498 struct hostfs_stat st;
551 int maj, min; 499 int err = stat_file(name, &st, -1);
552 dev_t rdev = 0; 500 if (err)
501 return err;
553 502
554 if (dentry) { 503 /* Reencode maj and min with the kernel encoding.*/
555 name = dentry_name(dentry, 0); 504 rdev = MKDEV(st.maj, st.min);
556 if (name == NULL)
557 goto out;
558 type = file_type(name, &maj, &min);
559 /* Reencode maj and min with the kernel encoding.*/
560 rdev = MKDEV(maj, min);
561 kfree(name);
562 }
563 else type = OS_TYPE_DIR;
564 505
565 err = 0; 506 switch (st.mode & S_IFMT) {
566 if (type == OS_TYPE_SYMLINK) 507 case S_IFLNK:
567 inode->i_op = &page_symlink_inode_operations; 508 ino->i_op = &hostfs_link_iops;
568 else if (type == OS_TYPE_DIR)
569 inode->i_op = &hostfs_dir_iops;
570 else inode->i_op = &hostfs_iops;
571
572 if (type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops;
573 else inode->i_fop = &hostfs_file_fops;
574
575 if (type == OS_TYPE_SYMLINK)
576 inode->i_mapping->a_ops = &hostfs_link_aops;
577 else inode->i_mapping->a_ops = &hostfs_aops;
578
579 switch (type) {
580 case OS_TYPE_CHARDEV:
581 init_special_inode(inode, S_IFCHR, rdev);
582 break; 509 break;
583 case OS_TYPE_BLOCKDEV: 510 case S_IFDIR:
584 init_special_inode(inode, S_IFBLK, rdev); 511 ino->i_op = &hostfs_dir_iops;
512 ino->i_fop = &hostfs_dir_fops;
585 break; 513 break;
586 case OS_TYPE_FIFO: 514 case S_IFCHR:
587 init_special_inode(inode, S_IFIFO, 0); 515 case S_IFBLK:
516 case S_IFIFO:
517 case S_IFSOCK:
518 init_special_inode(ino, st.mode & S_IFMT, rdev);
519 ino->i_op = &hostfs_iops;
588 break; 520 break;
589 case OS_TYPE_SOCK: 521
590 init_special_inode(inode, S_IFSOCK, 0); 522 default:
591 break; 523 ino->i_op = &hostfs_iops;
592 } 524 ino->i_fop = &hostfs_file_fops;
593 out: 525 ino->i_mapping->a_ops = &hostfs_aops;
594 return err; 526 }
527
528 ino->i_ino = st.ino;
529 ino->i_mode = st.mode;
530 ino->i_nlink = st.nlink;
531 ino->i_uid = st.uid;
532 ino->i_gid = st.gid;
533 ino->i_atime = st.atime;
534 ino->i_mtime = st.mtime;
535 ino->i_ctime = st.ctime;
536 ino->i_size = st.size;
537 ino->i_blocks = st.blocks;
538 return 0;
595} 539}
596 540
597int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, 541int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
@@ -607,12 +551,8 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
607 goto out; 551 goto out;
608 } 552 }
609 553
610 error = init_inode(inode, dentry);
611 if (error)
612 goto out_put;
613
614 error = -ENOMEM; 554 error = -ENOMEM;
615 name = dentry_name(dentry, 0); 555 name = dentry_name(dentry);
616 if (name == NULL) 556 if (name == NULL)
617 goto out_put; 557 goto out_put;
618 558
@@ -622,9 +562,10 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
622 mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH); 562 mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
623 if (fd < 0) 563 if (fd < 0)
624 error = fd; 564 error = fd;
625 else error = read_name(inode, name); 565 else
566 error = read_name(inode, name);
626 567
627 kfree(name); 568 __putname(name);
628 if (error) 569 if (error)
629 goto out_put; 570 goto out_put;
630 571
@@ -652,17 +593,14 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
652 goto out; 593 goto out;
653 } 594 }
654 595
655 err = init_inode(inode, dentry);
656 if (err)
657 goto out_put;
658
659 err = -ENOMEM; 596 err = -ENOMEM;
660 name = dentry_name(dentry, 0); 597 name = dentry_name(dentry);
661 if (name == NULL) 598 if (name == NULL)
662 goto out_put; 599 goto out_put;
663 600
664 err = read_name(inode, name); 601 err = read_name(inode, name);
665 kfree(name); 602
603 __putname(name);
666 if (err == -ENOENT) { 604 if (err == -ENOENT) {
667 iput(inode); 605 iput(inode);
668 inode = NULL; 606 inode = NULL;
@@ -680,36 +618,21 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
680 return ERR_PTR(err); 618 return ERR_PTR(err);
681} 619}
682 620
683static char *inode_dentry_name(struct inode *ino, struct dentry *dentry)
684{
685 char *file;
686 int len;
687
688 file = inode_name(ino, dentry->d_name.len + 1);
689 if (file == NULL)
690 return NULL;
691 strcat(file, "/");
692 len = strlen(file);
693 strncat(file, dentry->d_name.name, dentry->d_name.len);
694 file[len + dentry->d_name.len] = '\0';
695 return file;
696}
697
698int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) 621int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from)
699{ 622{
700 char *from_name, *to_name; 623 char *from_name, *to_name;
701 int err; 624 int err;
702 625
703 if ((from_name = inode_dentry_name(ino, from)) == NULL) 626 if ((from_name = dentry_name(from)) == NULL)
704 return -ENOMEM; 627 return -ENOMEM;
705 to_name = dentry_name(to, 0); 628 to_name = dentry_name(to);
706 if (to_name == NULL) { 629 if (to_name == NULL) {
707 kfree(from_name); 630 __putname(from_name);
708 return -ENOMEM; 631 return -ENOMEM;
709 } 632 }
710 err = link_file(to_name, from_name); 633 err = link_file(to_name, from_name);
711 kfree(from_name); 634 __putname(from_name);
712 kfree(to_name); 635 __putname(to_name);
713 return err; 636 return err;
714} 637}
715 638
@@ -718,13 +641,14 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry)
718 char *file; 641 char *file;
719 int err; 642 int err;
720 643
721 if ((file = inode_dentry_name(ino, dentry)) == NULL)
722 return -ENOMEM;
723 if (append) 644 if (append)
724 return -EPERM; 645 return -EPERM;
725 646
647 if ((file = dentry_name(dentry)) == NULL)
648 return -ENOMEM;
649
726 err = unlink_file(file); 650 err = unlink_file(file);
727 kfree(file); 651 __putname(file);
728 return err; 652 return err;
729} 653}
730 654
@@ -733,10 +657,10 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
733 char *file; 657 char *file;
734 int err; 658 int err;
735 659
736 if ((file = inode_dentry_name(ino, dentry)) == NULL) 660 if ((file = dentry_name(dentry)) == NULL)
737 return -ENOMEM; 661 return -ENOMEM;
738 err = make_symlink(file, to); 662 err = make_symlink(file, to);
739 kfree(file); 663 __putname(file);
740 return err; 664 return err;
741} 665}
742 666
@@ -745,10 +669,10 @@ int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
745 char *file; 669 char *file;
746 int err; 670 int err;
747 671
748 if ((file = inode_dentry_name(ino, dentry)) == NULL) 672 if ((file = dentry_name(dentry)) == NULL)
749 return -ENOMEM; 673 return -ENOMEM;
750 err = do_mkdir(file, mode); 674 err = do_mkdir(file, mode);
751 kfree(file); 675 __putname(file);
752 return err; 676 return err;
753} 677}
754 678
@@ -757,10 +681,10 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
757 char *file; 681 char *file;
758 int err; 682 int err;
759 683
760 if ((file = inode_dentry_name(ino, dentry)) == NULL) 684 if ((file = dentry_name(dentry)) == NULL)
761 return -ENOMEM; 685 return -ENOMEM;
762 err = do_rmdir(file); 686 err = do_rmdir(file);
763 kfree(file); 687 __putname(file);
764 return err; 688 return err;
765} 689}
766 690
@@ -776,22 +700,20 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
776 goto out; 700 goto out;
777 } 701 }
778 702
779 err = init_inode(inode, dentry);
780 if (err)
781 goto out_put;
782
783 err = -ENOMEM; 703 err = -ENOMEM;
784 name = dentry_name(dentry, 0); 704 name = dentry_name(dentry);
785 if (name == NULL) 705 if (name == NULL)
786 goto out_put; 706 goto out_put;
787 707
788 init_special_inode(inode, mode, dev); 708 init_special_inode(inode, mode, dev);
789 err = do_mknod(name, mode, MAJOR(dev), MINOR(dev)); 709 err = do_mknod(name, mode, MAJOR(dev), MINOR(dev));
790 if (err) 710 if (!err)
791 goto out_free; 711 goto out_free;
792 712
793 err = read_name(inode, name); 713 err = read_name(inode, name);
794 kfree(name); 714 __putname(name);
715 if (err)
716 goto out_put;
795 if (err) 717 if (err)
796 goto out_put; 718 goto out_put;
797 719
@@ -799,7 +721,7 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
799 return 0; 721 return 0;
800 722
801 out_free: 723 out_free:
802 kfree(name); 724 __putname(name);
803 out_put: 725 out_put:
804 iput(inode); 726 iput(inode);
805 out: 727 out:
@@ -812,15 +734,15 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
812 char *from_name, *to_name; 734 char *from_name, *to_name;
813 int err; 735 int err;
814 736
815 if ((from_name = inode_dentry_name(from_ino, from)) == NULL) 737 if ((from_name = dentry_name(from)) == NULL)
816 return -ENOMEM; 738 return -ENOMEM;
817 if ((to_name = inode_dentry_name(to_ino, to)) == NULL) { 739 if ((to_name = dentry_name(to)) == NULL) {
818 kfree(from_name); 740 __putname(from_name);
819 return -ENOMEM; 741 return -ENOMEM;
820 } 742 }
821 err = rename_file(from_name, to_name); 743 err = rename_file(from_name, to_name);
822 kfree(from_name); 744 __putname(from_name);
823 kfree(to_name); 745 __putname(to_name);
824 return err; 746 return err;
825} 747}
826 748
@@ -832,7 +754,7 @@ int hostfs_permission(struct inode *ino, int desired)
832 if (desired & MAY_READ) r = 1; 754 if (desired & MAY_READ) r = 1;
833 if (desired & MAY_WRITE) w = 1; 755 if (desired & MAY_WRITE) w = 1;
834 if (desired & MAY_EXEC) x = 1; 756 if (desired & MAY_EXEC) x = 1;
835 name = inode_name(ino, 0); 757 name = inode_name(ino);
836 if (name == NULL) 758 if (name == NULL)
837 return -ENOMEM; 759 return -ENOMEM;
838 760
@@ -841,7 +763,7 @@ int hostfs_permission(struct inode *ino, int desired)
841 err = 0; 763 err = 0;
842 else 764 else
843 err = access_file(name, r, w, x); 765 err = access_file(name, r, w, x);
844 kfree(name); 766 __putname(name);
845 if (!err) 767 if (!err)
846 err = generic_permission(ino, desired, NULL); 768 err = generic_permission(ino, desired, NULL);
847 return err; 769 return err;
@@ -849,13 +771,14 @@ int hostfs_permission(struct inode *ino, int desired)
849 771
850int hostfs_setattr(struct dentry *dentry, struct iattr *attr) 772int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
851{ 773{
774 struct inode *inode = dentry->d_inode;
852 struct hostfs_iattr attrs; 775 struct hostfs_iattr attrs;
853 char *name; 776 char *name;
854 int err; 777 int err;
855 778
856 int fd = HOSTFS_I(dentry->d_inode)->fd; 779 int fd = HOSTFS_I(inode)->fd;
857 780
858 err = inode_change_ok(dentry->d_inode, attr); 781 err = inode_change_ok(inode, attr);
859 if (err) 782 if (err)
860 return err; 783 return err;
861 784
@@ -897,15 +820,26 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
897 if (attr->ia_valid & ATTR_MTIME_SET) { 820 if (attr->ia_valid & ATTR_MTIME_SET) {
898 attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; 821 attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET;
899 } 822 }
900 name = dentry_name(dentry, 0); 823 name = dentry_name(dentry);
901 if (name == NULL) 824 if (name == NULL)
902 return -ENOMEM; 825 return -ENOMEM;
903 err = set_attr(name, &attrs, fd); 826 err = set_attr(name, &attrs, fd);
904 kfree(name); 827 __putname(name);
905 if (err) 828 if (err)
906 return err; 829 return err;
907 830
908 return inode_setattr(dentry->d_inode, attr); 831 if ((attr->ia_valid & ATTR_SIZE) &&
832 attr->ia_size != i_size_read(inode)) {
833 int error;
834
835 error = vmtruncate(inode, attr->ia_size);
836 if (err)
837 return err;
838 }
839
840 setattr_copy(inode, attr);
841 mark_inode_dirty(inode);
842 return 0;
909} 843}
910 844
911static const struct inode_operations hostfs_iops = { 845static const struct inode_operations hostfs_iops = {
@@ -935,32 +869,41 @@ static const struct inode_operations hostfs_dir_iops = {
935 .setattr = hostfs_setattr, 869 .setattr = hostfs_setattr,
936}; 870};
937 871
938int hostfs_link_readpage(struct file *file, struct page *page) 872static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd)
939{ 873{
940 char *buffer, *name; 874 char *link = __getname();
941 int err; 875 if (link) {
942 876 char *path = dentry_name(dentry);
943 buffer = kmap(page); 877 int err = -ENOMEM;
944 name = inode_name(page->mapping->host, 0); 878 if (path) {
945 if (name == NULL) 879 int err = hostfs_do_readlink(path, link, PATH_MAX);
946 return -ENOMEM; 880 if (err == PATH_MAX)
947 err = hostfs_do_readlink(name, buffer, PAGE_CACHE_SIZE); 881 err = -E2BIG;
948 kfree(name); 882 __putname(path);
949 if (err == PAGE_CACHE_SIZE) 883 }
950 err = -E2BIG; 884 if (err < 0) {
951 else if (err > 0) { 885 __putname(link);
952 flush_dcache_page(page); 886 link = ERR_PTR(err);
953 SetPageUptodate(page); 887 }
954 if (PageError(page)) ClearPageError(page); 888 } else {
955 err = 0; 889 link = ERR_PTR(-ENOMEM);
956 } 890 }
957 kunmap(page); 891
958 unlock_page(page); 892 nd_set_link(nd, link);
959 return err; 893 return NULL;
960} 894}
961 895
962static const struct address_space_operations hostfs_link_aops = { 896static void hostfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
963 .readpage = hostfs_link_readpage, 897{
898 char *s = nd_get_link(nd);
899 if (!IS_ERR(s))
900 __putname(s);
901}
902
903static const struct inode_operations hostfs_link_iops = {
904 .readlink = generic_readlink,
905 .follow_link = hostfs_follow_link,
906 .put_link = hostfs_put_link,
964}; 907};
965 908
966static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) 909static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
@@ -980,49 +923,41 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
980 req_root = ""; 923 req_root = "";
981 924
982 err = -ENOMEM; 925 err = -ENOMEM;
983 host_root_path = kmalloc(strlen(root_ino) + 1 926 sb->s_fs_info = host_root_path =
984 + strlen(req_root) + 1, GFP_KERNEL); 927 kmalloc(strlen(root_ino) + strlen(req_root) + 2, GFP_KERNEL);
985 if (host_root_path == NULL) 928 if (host_root_path == NULL)
986 goto out; 929 goto out;
987 930
988 sprintf(host_root_path, "%s/%s", root_ino, req_root); 931 sprintf(host_root_path, "%s/%s", root_ino, req_root);
989 932
990 root_inode = hostfs_iget(sb); 933 root_inode = new_inode(sb);
991 if (IS_ERR(root_inode)) { 934 if (!root_inode)
992 err = PTR_ERR(root_inode); 935 goto out;
993 goto out_free;
994 }
995 936
996 err = init_inode(root_inode, NULL); 937 err = read_name(root_inode, host_root_path);
997 if (err) 938 if (err)
998 goto out_put; 939 goto out_put;
999 940
1000 HOSTFS_I(root_inode)->host_filename = host_root_path; 941 if (S_ISLNK(root_inode->i_mode)) {
1001 /* 942 char *name = follow_link(host_root_path);
1002 * Avoid that in the error path, iput(root_inode) frees again 943 if (IS_ERR(name))
1003 * host_root_path through hostfs_destroy_inode! 944 err = PTR_ERR(name);
1004 */ 945 else
1005 host_root_path = NULL; 946 err = read_name(root_inode, name);
947 kfree(name);
948 if (err)
949 goto out_put;
950 }
1006 951
1007 err = -ENOMEM; 952 err = -ENOMEM;
1008 sb->s_root = d_alloc_root(root_inode); 953 sb->s_root = d_alloc_root(root_inode);
1009 if (sb->s_root == NULL) 954 if (sb->s_root == NULL)
1010 goto out_put; 955 goto out_put;
1011 956
1012 err = hostfs_read_inode(root_inode);
1013 if (err) {
1014 /* No iput in this case because the dput does that for us */
1015 dput(sb->s_root);
1016 sb->s_root = NULL;
1017 goto out;
1018 }
1019
1020 return 0; 957 return 0;
1021 958
1022out_put: 959out_put:
1023 iput(root_inode); 960 iput(root_inode);
1024out_free:
1025 kfree(host_root_path);
1026out: 961out:
1027 return err; 962 return err;
1028} 963}
@@ -1034,11 +969,17 @@ static int hostfs_read_sb(struct file_system_type *type,
1034 return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt); 969 return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt);
1035} 970}
1036 971
972static void hostfs_kill_sb(struct super_block *s)
973{
974 kill_anon_super(s);
975 kfree(s->s_fs_info);
976}
977
1037static struct file_system_type hostfs_type = { 978static struct file_system_type hostfs_type = {
1038 .owner = THIS_MODULE, 979 .owner = THIS_MODULE,
1039 .name = "hostfs", 980 .name = "hostfs",
1040 .get_sb = hostfs_read_sb, 981 .get_sb = hostfs_read_sb,
1041 .kill_sb = kill_anon_super, 982 .kill_sb = hostfs_kill_sb,
1042 .fs_flags = 0, 983 .fs_flags = 0,
1043}; 984};
1044 985
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index b79424f93282..6777aa06ce2c 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -19,11 +19,27 @@
19#include "user.h" 19#include "user.h"
20#include <utime.h> 20#include <utime.h>
21 21
22int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, 22static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
23 int *nlink_out, int *uid_out, int *gid_out, 23{
24 unsigned long long *size_out, struct timespec *atime_out, 24 p->ino = buf->st_ino;
25 struct timespec *mtime_out, struct timespec *ctime_out, 25 p->mode = buf->st_mode;
26 int *blksize_out, unsigned long long *blocks_out, int fd) 26 p->nlink = buf->st_nlink;
27 p->uid = buf->st_uid;
28 p->gid = buf->st_gid;
29 p->size = buf->st_size;
30 p->atime.tv_sec = buf->st_atime;
31 p->atime.tv_nsec = 0;
32 p->ctime.tv_sec = buf->st_ctime;
33 p->ctime.tv_nsec = 0;
34 p->mtime.tv_sec = buf->st_mtime;
35 p->mtime.tv_nsec = 0;
36 p->blksize = buf->st_blksize;
37 p->blocks = buf->st_blocks;
38 p->maj = os_major(buf->st_rdev);
39 p->min = os_minor(buf->st_rdev);
40}
41
42int stat_file(const char *path, struct hostfs_stat *p, int fd)
27{ 43{
28 struct stat64 buf; 44 struct stat64 buf;
29 45
@@ -33,68 +49,10 @@ int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
33 } else if (lstat64(path, &buf) < 0) { 49 } else if (lstat64(path, &buf) < 0) {
34 return -errno; 50 return -errno;
35 } 51 }
36 52 stat64_to_hostfs(&buf, p);
37 if (inode_out != NULL)
38 *inode_out = buf.st_ino;
39 if (mode_out != NULL)
40 *mode_out = buf.st_mode;
41 if (nlink_out != NULL)
42 *nlink_out = buf.st_nlink;
43 if (uid_out != NULL)
44 *uid_out = buf.st_uid;
45 if (gid_out != NULL)
46 *gid_out = buf.st_gid;
47 if (size_out != NULL)
48 *size_out = buf.st_size;
49 if (atime_out != NULL) {
50 atime_out->tv_sec = buf.st_atime;
51 atime_out->tv_nsec = 0;
52 }
53 if (mtime_out != NULL) {
54 mtime_out->tv_sec = buf.st_mtime;
55 mtime_out->tv_nsec = 0;
56 }
57 if (ctime_out != NULL) {
58 ctime_out->tv_sec = buf.st_ctime;
59 ctime_out->tv_nsec = 0;
60 }
61 if (blksize_out != NULL)
62 *blksize_out = buf.st_blksize;
63 if (blocks_out != NULL)
64 *blocks_out = buf.st_blocks;
65 return 0; 53 return 0;
66} 54}
67 55
68int file_type(const char *path, int *maj, int *min)
69{
70 struct stat64 buf;
71
72 if (lstat64(path, &buf) < 0)
73 return -errno;
74 /*
75 * We cannot pass rdev as is because glibc and the kernel disagree
76 * about its definition.
77 */
78 if (maj != NULL)
79 *maj = major(buf.st_rdev);
80 if (min != NULL)
81 *min = minor(buf.st_rdev);
82
83 if (S_ISDIR(buf.st_mode))
84 return OS_TYPE_DIR;
85 else if (S_ISLNK(buf.st_mode))
86 return OS_TYPE_SYMLINK;
87 else if (S_ISCHR(buf.st_mode))
88 return OS_TYPE_CHARDEV;
89 else if (S_ISBLK(buf.st_mode))
90 return OS_TYPE_BLOCKDEV;
91 else if (S_ISFIFO(buf.st_mode))
92 return OS_TYPE_FIFO;
93 else if (S_ISSOCK(buf.st_mode))
94 return OS_TYPE_SOCK;
95 else return OS_TYPE_FILE;
96}
97
98int access_file(char *path, int r, int w, int x) 56int access_file(char *path, int r, int w, int x)
99{ 57{
100 int mode = 0; 58 int mode = 0;
@@ -202,6 +160,11 @@ int fsync_file(int fd, int datasync)
202 return 0; 160 return 0;
203} 161}
204 162
163int replace_file(int oldfd, int fd)
164{
165 return dup2(oldfd, fd);
166}
167
205void close_file(void *stream) 168void close_file(void *stream)
206{ 169{
207 close(*((int *) stream)); 170 close(*((int *) stream));
@@ -235,8 +198,8 @@ int file_create(char *name, int ur, int uw, int ux, int gr,
235 198
236int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) 199int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
237{ 200{
201 struct hostfs_stat st;
238 struct timeval times[2]; 202 struct timeval times[2];
239 struct timespec atime_ts, mtime_ts;
240 int err, ma; 203 int err, ma;
241 204
242 if (attrs->ia_valid & HOSTFS_ATTR_MODE) { 205 if (attrs->ia_valid & HOSTFS_ATTR_MODE) {
@@ -279,15 +242,14 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
279 */ 242 */
280 ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET); 243 ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET);
281 if (attrs->ia_valid & ma) { 244 if (attrs->ia_valid & ma) {
282 err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, 245 err = stat_file(file, &st, fd);
283 &atime_ts, &mtime_ts, NULL, NULL, NULL, fd);
284 if (err != 0) 246 if (err != 0)
285 return err; 247 return err;
286 248
287 times[0].tv_sec = atime_ts.tv_sec; 249 times[0].tv_sec = st.atime.tv_sec;
288 times[0].tv_usec = atime_ts.tv_nsec / 1000; 250 times[0].tv_usec = st.atime.tv_nsec / 1000;
289 times[1].tv_sec = mtime_ts.tv_sec; 251 times[1].tv_sec = st.mtime.tv_sec;
290 times[1].tv_usec = mtime_ts.tv_nsec / 1000; 252 times[1].tv_usec = st.mtime.tv_nsec / 1000;
291 253
292 if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) { 254 if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) {
293 times[0].tv_sec = attrs->ia_atime.tv_sec; 255 times[0].tv_sec = attrs->ia_atime.tv_sec;
@@ -308,9 +270,9 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
308 270
309 /* Note: ctime is not handled */ 271 /* Note: ctime is not handled */
310 if (attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)) { 272 if (attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)) {
311 err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, 273 err = stat_file(file, &st, fd);
312 &attrs->ia_atime, &attrs->ia_mtime, NULL, 274 attrs->ia_atime = st.atime;
313 NULL, NULL, fd); 275 attrs->ia_mtime = st.mtime;
314 if (err != 0) 276 if (err != 0)
315 return err; 277 return err;
316 } 278 }
@@ -361,7 +323,7 @@ int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor)
361{ 323{
362 int err; 324 int err;
363 325
364 err = mknod(file, mode, makedev(major, minor)); 326 err = mknod(file, mode, os_makedev(major, minor));
365 if (err) 327 if (err)
366 return -errno; 328 return -errno;
367 return 0; 329 return 0;
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index a9ae9bfa752f..c0340887c7ea 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -97,10 +97,19 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping,
97 loff_t pos, unsigned len, unsigned flags, 97 loff_t pos, unsigned len, unsigned flags,
98 struct page **pagep, void **fsdata) 98 struct page **pagep, void **fsdata)
99{ 99{
100 int ret;
101
100 *pagep = NULL; 102 *pagep = NULL;
101 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 103 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
102 hpfs_get_block, 104 hpfs_get_block,
103 &hpfs_i(mapping->host)->mmu_private); 105 &hpfs_i(mapping->host)->mmu_private);
106 if (unlikely(ret)) {
107 loff_t isize = mapping->host->i_size;
108 if (pos + len > isize)
109 vmtruncate(mapping->host, isize);
110 }
111
112 return ret;
104} 113}
105 114
106static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) 115static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 75f9d4324851..b59eac0232a0 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -281,7 +281,7 @@ void hpfs_write_inode(struct inode *);
281void hpfs_write_inode_nolock(struct inode *); 281void hpfs_write_inode_nolock(struct inode *);
282int hpfs_setattr(struct dentry *, struct iattr *); 282int hpfs_setattr(struct dentry *, struct iattr *);
283void hpfs_write_if_changed(struct inode *); 283void hpfs_write_if_changed(struct inode *);
284void hpfs_delete_inode(struct inode *); 284void hpfs_evict_inode(struct inode *);
285 285
286/* map.c */ 286/* map.c */
287 287
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 1042a9bc97f3..56f0da1cfd10 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -277,9 +277,15 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
277 if (error) 277 if (error)
278 goto out_unlock; 278 goto out_unlock;
279 279
280 error = inode_setattr(inode, attr); 280 if ((attr->ia_valid & ATTR_SIZE) &&
281 if (error) 281 attr->ia_size != i_size_read(inode)) {
282 goto out_unlock; 282 error = vmtruncate(inode, attr->ia_size);
283 if (error)
284 return error;
285 }
286
287 setattr_copy(inode, attr);
288 mark_inode_dirty(inode);
283 289
284 hpfs_write_inode(inode); 290 hpfs_write_inode(inode);
285 291
@@ -296,11 +302,13 @@ void hpfs_write_if_changed(struct inode *inode)
296 hpfs_write_inode(inode); 302 hpfs_write_inode(inode);
297} 303}
298 304
299void hpfs_delete_inode(struct inode *inode) 305void hpfs_evict_inode(struct inode *inode)
300{ 306{
301 truncate_inode_pages(&inode->i_data, 0); 307 truncate_inode_pages(&inode->i_data, 0);
302 lock_kernel(); 308 end_writeback(inode);
303 hpfs_remove_fnode(inode->i_sb, inode->i_ino); 309 if (!inode->i_nlink) {
304 unlock_kernel(); 310 lock_kernel();
305 clear_inode(inode); 311 hpfs_remove_fnode(inode->i_sb, inode->i_ino);
312 unlock_kernel();
313 }
306} 314}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index aa53842c599c..2607010be2fe 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -450,7 +450,7 @@ static const struct super_operations hpfs_sops =
450{ 450{
451 .alloc_inode = hpfs_alloc_inode, 451 .alloc_inode = hpfs_alloc_inode,
452 .destroy_inode = hpfs_destroy_inode, 452 .destroy_inode = hpfs_destroy_inode,
453 .delete_inode = hpfs_delete_inode, 453 .evict_inode = hpfs_evict_inode,
454 .put_super = hpfs_put_super, 454 .put_super = hpfs_put_super,
455 .statfs = hpfs_statfs, 455 .statfs = hpfs_statfs,
456 .remount_fs = hpfs_remount_fs, 456 .remount_fs = hpfs_remount_fs,
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 826c3f9d29ac..7b027720d820 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -15,6 +15,7 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/statfs.h> 16#include <linux/statfs.h>
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/pid_namespace.h>
18#include <asm/uaccess.h> 19#include <asm/uaccess.h>
19#include "os.h" 20#include "os.h"
20 21
@@ -623,12 +624,11 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb)
623 return &hi->vfs_inode; 624 return &hi->vfs_inode;
624} 625}
625 626
626void hppfs_delete_inode(struct inode *ino) 627void hppfs_evict_inode(struct inode *ino)
627{ 628{
629 end_writeback(ino);
628 dput(HPPFS_I(ino)->proc_dentry); 630 dput(HPPFS_I(ino)->proc_dentry);
629 mntput(ino->i_sb->s_fs_info); 631 mntput(ino->i_sb->s_fs_info);
630
631 clear_inode(ino);
632} 632}
633 633
634static void hppfs_destroy_inode(struct inode *inode) 634static void hppfs_destroy_inode(struct inode *inode)
@@ -639,7 +639,7 @@ static void hppfs_destroy_inode(struct inode *inode)
639static const struct super_operations hppfs_sbops = { 639static const struct super_operations hppfs_sbops = {
640 .alloc_inode = hppfs_alloc_inode, 640 .alloc_inode = hppfs_alloc_inode,
641 .destroy_inode = hppfs_destroy_inode, 641 .destroy_inode = hppfs_destroy_inode,
642 .delete_inode = hppfs_delete_inode, 642 .evict_inode = hppfs_evict_inode,
643 .statfs = hppfs_statfs, 643 .statfs = hppfs_statfs,
644}; 644};
645 645
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a4e9a7ec3691..6e5bd42f3860 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -371,27 +371,10 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
371 hugetlb_unreserve_pages(inode, start, freed); 371 hugetlb_unreserve_pages(inode, start, freed);
372} 372}
373 373
374static void hugetlbfs_delete_inode(struct inode *inode) 374static void hugetlbfs_evict_inode(struct inode *inode)
375{ 375{
376 truncate_hugepages(inode, 0); 376 truncate_hugepages(inode, 0);
377 clear_inode(inode); 377 end_writeback(inode);
378}
379
380static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
381{
382 if (generic_detach_inode(inode)) {
383 truncate_hugepages(inode, 0);
384 clear_inode(inode);
385 destroy_inode(inode);
386 }
387}
388
389static void hugetlbfs_drop_inode(struct inode *inode)
390{
391 if (!inode->i_nlink)
392 generic_delete_inode(inode);
393 else
394 hugetlbfs_forget_inode(inode);
395} 378}
396 379
397static inline void 380static inline void
@@ -448,19 +431,20 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
448 431
449 error = inode_change_ok(inode, attr); 432 error = inode_change_ok(inode, attr);
450 if (error) 433 if (error)
451 goto out; 434 return error;
452 435
453 if (ia_valid & ATTR_SIZE) { 436 if (ia_valid & ATTR_SIZE) {
454 error = -EINVAL; 437 error = -EINVAL;
455 if (!(attr->ia_size & ~huge_page_mask(h))) 438 if (attr->ia_size & ~huge_page_mask(h))
456 error = hugetlb_vmtruncate(inode, attr->ia_size); 439 return -EINVAL;
440 error = hugetlb_vmtruncate(inode, attr->ia_size);
457 if (error) 441 if (error)
458 goto out; 442 return error;
459 attr->ia_valid &= ~ATTR_SIZE;
460 } 443 }
461 error = inode_setattr(inode, attr); 444
462out: 445 setattr_copy(inode, attr);
463 return error; 446 mark_inode_dirty(inode);
447 return 0;
464} 448}
465 449
466static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 450static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
@@ -712,9 +696,8 @@ static const struct inode_operations hugetlbfs_inode_operations = {
712static const struct super_operations hugetlbfs_ops = { 696static const struct super_operations hugetlbfs_ops = {
713 .alloc_inode = hugetlbfs_alloc_inode, 697 .alloc_inode = hugetlbfs_alloc_inode,
714 .destroy_inode = hugetlbfs_destroy_inode, 698 .destroy_inode = hugetlbfs_destroy_inode,
699 .evict_inode = hugetlbfs_evict_inode,
715 .statfs = hugetlbfs_statfs, 700 .statfs = hugetlbfs_statfs,
716 .delete_inode = hugetlbfs_delete_inode,
717 .drop_inode = hugetlbfs_drop_inode,
718 .put_super = hugetlbfs_put_super, 701 .put_super = hugetlbfs_put_super,
719 .show_options = generic_show_options, 702 .show_options = generic_show_options,
720}; 703};
diff --git a/fs/inode.c b/fs/inode.c
index 722860b323a9..86464332e590 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -20,7 +20,6 @@
20#include <linux/pagemap.h> 20#include <linux/pagemap.h>
21#include <linux/cdev.h> 21#include <linux/cdev.h>
22#include <linux/bootmem.h> 22#include <linux/bootmem.h>
23#include <linux/inotify.h>
24#include <linux/fsnotify.h> 23#include <linux/fsnotify.h>
25#include <linux/mount.h> 24#include <linux/mount.h>
26#include <linux/async.h> 25#include <linux/async.h>
@@ -264,12 +263,8 @@ void inode_init_once(struct inode *inode)
264 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap); 263 INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
265 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); 264 INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
266 i_size_ordered_init(inode); 265 i_size_ordered_init(inode);
267#ifdef CONFIG_INOTIFY
268 INIT_LIST_HEAD(&inode->inotify_watches);
269 mutex_init(&inode->inotify_mutex);
270#endif
271#ifdef CONFIG_FSNOTIFY 266#ifdef CONFIG_FSNOTIFY
272 INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries); 267 INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
273#endif 268#endif
274} 269}
275EXPORT_SYMBOL(inode_init_once); 270EXPORT_SYMBOL(inode_init_once);
@@ -294,32 +289,34 @@ void __iget(struct inode *inode)
294 inodes_stat.nr_unused--; 289 inodes_stat.nr_unused--;
295} 290}
296 291
297/** 292void end_writeback(struct inode *inode)
298 * clear_inode - clear an inode
299 * @inode: inode to clear
300 *
301 * This is called by the filesystem to tell us
302 * that the inode is no longer useful. We just
303 * terminate it with extreme prejudice.
304 */
305void clear_inode(struct inode *inode)
306{ 293{
307 might_sleep(); 294 might_sleep();
308 invalidate_inode_buffers(inode);
309
310 BUG_ON(inode->i_data.nrpages); 295 BUG_ON(inode->i_data.nrpages);
296 BUG_ON(!list_empty(&inode->i_data.private_list));
311 BUG_ON(!(inode->i_state & I_FREEING)); 297 BUG_ON(!(inode->i_state & I_FREEING));
312 BUG_ON(inode->i_state & I_CLEAR); 298 BUG_ON(inode->i_state & I_CLEAR);
313 inode_sync_wait(inode); 299 inode_sync_wait(inode);
314 if (inode->i_sb->s_op->clear_inode) 300 inode->i_state = I_FREEING | I_CLEAR;
315 inode->i_sb->s_op->clear_inode(inode); 301}
302EXPORT_SYMBOL(end_writeback);
303
304static void evict(struct inode *inode)
305{
306 const struct super_operations *op = inode->i_sb->s_op;
307
308 if (op->evict_inode) {
309 op->evict_inode(inode);
310 } else {
311 if (inode->i_data.nrpages)
312 truncate_inode_pages(&inode->i_data, 0);
313 end_writeback(inode);
314 }
316 if (S_ISBLK(inode->i_mode) && inode->i_bdev) 315 if (S_ISBLK(inode->i_mode) && inode->i_bdev)
317 bd_forget(inode); 316 bd_forget(inode);
318 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 317 if (S_ISCHR(inode->i_mode) && inode->i_cdev)
319 cd_forget(inode); 318 cd_forget(inode);
320 inode->i_state = I_CLEAR;
321} 319}
322EXPORT_SYMBOL(clear_inode);
323 320
324/* 321/*
325 * dispose_list - dispose of the contents of a local list 322 * dispose_list - dispose of the contents of a local list
@@ -338,9 +335,7 @@ static void dispose_list(struct list_head *head)
338 inode = list_first_entry(head, struct inode, i_list); 335 inode = list_first_entry(head, struct inode, i_list);
339 list_del(&inode->i_list); 336 list_del(&inode->i_list);
340 337
341 if (inode->i_data.nrpages) 338 evict(inode);
342 truncate_inode_pages(&inode->i_data, 0);
343 clear_inode(inode);
344 339
345 spin_lock(&inode_lock); 340 spin_lock(&inode_lock);
346 hlist_del_init(&inode->i_hash); 341 hlist_del_init(&inode->i_hash);
@@ -413,7 +408,6 @@ int invalidate_inodes(struct super_block *sb)
413 408
414 down_write(&iprune_sem); 409 down_write(&iprune_sem);
415 spin_lock(&inode_lock); 410 spin_lock(&inode_lock);
416 inotify_unmount_inodes(&sb->s_inodes);
417 fsnotify_unmount_inodes(&sb->s_inodes); 411 fsnotify_unmount_inodes(&sb->s_inodes);
418 busy = invalidate_list(&sb->s_inodes, &throw_away); 412 busy = invalidate_list(&sb->s_inodes, &throw_away);
419 spin_unlock(&inode_lock); 413 spin_unlock(&inode_lock);
@@ -553,7 +547,7 @@ repeat:
553 continue; 547 continue;
554 if (!test(inode, data)) 548 if (!test(inode, data))
555 continue; 549 continue;
556 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { 550 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
557 __wait_on_freeing_inode(inode); 551 __wait_on_freeing_inode(inode);
558 goto repeat; 552 goto repeat;
559 } 553 }
@@ -578,7 +572,7 @@ repeat:
578 continue; 572 continue;
579 if (inode->i_sb != sb) 573 if (inode->i_sb != sb)
580 continue; 574 continue;
581 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) { 575 if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
582 __wait_on_freeing_inode(inode); 576 __wait_on_freeing_inode(inode);
583 goto repeat; 577 goto repeat;
584 } 578 }
@@ -840,7 +834,7 @@ EXPORT_SYMBOL(iunique);
840struct inode *igrab(struct inode *inode) 834struct inode *igrab(struct inode *inode)
841{ 835{
842 spin_lock(&inode_lock); 836 spin_lock(&inode_lock);
843 if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))) 837 if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
844 __iget(inode); 838 __iget(inode);
845 else 839 else
846 /* 840 /*
@@ -1089,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
1089 continue; 1083 continue;
1090 if (old->i_sb != sb) 1084 if (old->i_sb != sb)
1091 continue; 1085 continue;
1092 if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) 1086 if (old->i_state & (I_FREEING|I_WILL_FREE))
1093 continue; 1087 continue;
1094 break; 1088 break;
1095 } 1089 }
@@ -1128,7 +1122,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
1128 continue; 1122 continue;
1129 if (!test(old, data)) 1123 if (!test(old, data))
1130 continue; 1124 continue;
1131 if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) 1125 if (old->i_state & (I_FREEING|I_WILL_FREE))
1132 continue; 1126 continue;
1133 break; 1127 break;
1134 } 1128 }
@@ -1180,69 +1174,51 @@ void remove_inode_hash(struct inode *inode)
1180} 1174}
1181EXPORT_SYMBOL(remove_inode_hash); 1175EXPORT_SYMBOL(remove_inode_hash);
1182 1176
1177int generic_delete_inode(struct inode *inode)
1178{
1179 return 1;
1180}
1181EXPORT_SYMBOL(generic_delete_inode);
1182
1183/* 1183/*
1184 * Tell the filesystem that this inode is no longer of any interest and should 1184 * Normal UNIX filesystem behaviour: delete the
1185 * be completely destroyed. 1185 * inode when the usage count drops to zero, and
1186 * 1186 * i_nlink is zero.
1187 * We leave the inode in the inode hash table until *after* the filesystem's
1188 * ->delete_inode completes. This ensures that an iget (such as nfsd might
1189 * instigate) will always find up-to-date information either in the hash or on
1190 * disk.
1191 *
1192 * I_FREEING is set so that no-one will take a new reference to the inode while
1193 * it is being deleted.
1194 */ 1187 */
1195void generic_delete_inode(struct inode *inode) 1188int generic_drop_inode(struct inode *inode)
1196{ 1189{
1197 const struct super_operations *op = inode->i_sb->s_op; 1190 return !inode->i_nlink || hlist_unhashed(&inode->i_hash);
1198
1199 list_del_init(&inode->i_list);
1200 list_del_init(&inode->i_sb_list);
1201 WARN_ON(inode->i_state & I_NEW);
1202 inode->i_state |= I_FREEING;
1203 inodes_stat.nr_inodes--;
1204 spin_unlock(&inode_lock);
1205
1206 if (op->delete_inode) {
1207 void (*delete)(struct inode *) = op->delete_inode;
1208 /* Filesystems implementing their own
1209 * s_op->delete_inode are required to call
1210 * truncate_inode_pages and clear_inode()
1211 * internally */
1212 delete(inode);
1213 } else {
1214 truncate_inode_pages(&inode->i_data, 0);
1215 clear_inode(inode);
1216 }
1217 spin_lock(&inode_lock);
1218 hlist_del_init(&inode->i_hash);
1219 spin_unlock(&inode_lock);
1220 wake_up_inode(inode);
1221 BUG_ON(inode->i_state != I_CLEAR);
1222 destroy_inode(inode);
1223} 1191}
1224EXPORT_SYMBOL(generic_delete_inode); 1192EXPORT_SYMBOL_GPL(generic_drop_inode);
1225 1193
1226/** 1194/*
1227 * generic_detach_inode - remove inode from inode lists 1195 * Called when we're dropping the last reference
1228 * @inode: inode to remove 1196 * to an inode.
1229 *
1230 * Remove inode from inode lists, write it if it's dirty. This is just an
1231 * internal VFS helper exported for hugetlbfs. Do not use!
1232 * 1197 *
1233 * Returns 1 if inode should be completely destroyed. 1198 * Call the FS "drop_inode()" function, defaulting to
1199 * the legacy UNIX filesystem behaviour. If it tells
1200 * us to evict inode, do so. Otherwise, retain inode
1201 * in cache if fs is alive, sync and evict if fs is
1202 * shutting down.
1234 */ 1203 */
1235int generic_detach_inode(struct inode *inode) 1204static void iput_final(struct inode *inode)
1236{ 1205{
1237 struct super_block *sb = inode->i_sb; 1206 struct super_block *sb = inode->i_sb;
1207 const struct super_operations *op = inode->i_sb->s_op;
1208 int drop;
1238 1209
1239 if (!hlist_unhashed(&inode->i_hash)) { 1210 if (op && op->drop_inode)
1211 drop = op->drop_inode(inode);
1212 else
1213 drop = generic_drop_inode(inode);
1214
1215 if (!drop) {
1240 if (!(inode->i_state & (I_DIRTY|I_SYNC))) 1216 if (!(inode->i_state & (I_DIRTY|I_SYNC)))
1241 list_move(&inode->i_list, &inode_unused); 1217 list_move(&inode->i_list, &inode_unused);
1242 inodes_stat.nr_unused++; 1218 inodes_stat.nr_unused++;
1243 if (sb->s_flags & MS_ACTIVE) { 1219 if (sb->s_flags & MS_ACTIVE) {
1244 spin_unlock(&inode_lock); 1220 spin_unlock(&inode_lock);
1245 return 0; 1221 return;
1246 } 1222 }
1247 WARN_ON(inode->i_state & I_NEW); 1223 WARN_ON(inode->i_state & I_NEW);
1248 inode->i_state |= I_WILL_FREE; 1224 inode->i_state |= I_WILL_FREE;
@@ -1260,56 +1236,15 @@ int generic_detach_inode(struct inode *inode)
1260 inode->i_state |= I_FREEING; 1236 inode->i_state |= I_FREEING;
1261 inodes_stat.nr_inodes--; 1237 inodes_stat.nr_inodes--;
1262 spin_unlock(&inode_lock); 1238 spin_unlock(&inode_lock);
1263 return 1; 1239 evict(inode);
1264} 1240 spin_lock(&inode_lock);
1265EXPORT_SYMBOL_GPL(generic_detach_inode); 1241 hlist_del_init(&inode->i_hash);
1266 1242 spin_unlock(&inode_lock);
1267static void generic_forget_inode(struct inode *inode)
1268{
1269 if (!generic_detach_inode(inode))
1270 return;
1271 if (inode->i_data.nrpages)
1272 truncate_inode_pages(&inode->i_data, 0);
1273 clear_inode(inode);
1274 wake_up_inode(inode); 1243 wake_up_inode(inode);
1244 BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
1275 destroy_inode(inode); 1245 destroy_inode(inode);
1276} 1246}
1277 1247
1278/*
1279 * Normal UNIX filesystem behaviour: delete the
1280 * inode when the usage count drops to zero, and
1281 * i_nlink is zero.
1282 */
1283void generic_drop_inode(struct inode *inode)
1284{
1285 if (!inode->i_nlink)
1286 generic_delete_inode(inode);
1287 else
1288 generic_forget_inode(inode);
1289}
1290EXPORT_SYMBOL_GPL(generic_drop_inode);
1291
1292/*
1293 * Called when we're dropping the last reference
1294 * to an inode.
1295 *
1296 * Call the FS "drop()" function, defaulting to
1297 * the legacy UNIX filesystem behaviour..
1298 *
1299 * NOTE! NOTE! NOTE! We're called with the inode lock
1300 * held, and the drop function is supposed to release
1301 * the lock!
1302 */
1303static inline void iput_final(struct inode *inode)
1304{
1305 const struct super_operations *op = inode->i_sb->s_op;
1306 void (*drop)(struct inode *) = generic_drop_inode;
1307
1308 if (op && op->drop_inode)
1309 drop = op->drop_inode;
1310 drop(inode);
1311}
1312
1313/** 1248/**
1314 * iput - put an inode 1249 * iput - put an inode
1315 * @inode: inode to put 1250 * @inode: inode to put
@@ -1322,7 +1257,7 @@ static inline void iput_final(struct inode *inode)
1322void iput(struct inode *inode) 1257void iput(struct inode *inode)
1323{ 1258{
1324 if (inode) { 1259 if (inode) {
1325 BUG_ON(inode->i_state == I_CLEAR); 1260 BUG_ON(inode->i_state & I_CLEAR);
1326 1261
1327 if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) 1262 if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
1328 iput_final(inode); 1263 iput_final(inode);
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 55f1dde2fa8b..404111b016c9 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by David Woodhouse <dwmw2@infradead.org> 7 * Created by David Woodhouse <dwmw2@infradead.org>
7 * 8 *
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index c5e1450d79f9..a906f538d11c 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by David Woodhouse <dwmw2@infradead.org> 7 * Created by David Woodhouse <dwmw2@infradead.org>
7 * 8 *
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index f0294410868d..617a1e5694c1 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -2,11 +2,12 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Created by Arjan van de Ven <arjanv@redhat.com> 5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
6 *
7 * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>, 6 * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>,
8 * University of Szeged, Hungary 7 * University of Szeged, Hungary
9 * 8 *
9 * Created by Arjan van de Ven <arjan@infradead.org>
10 *
10 * For licensing information, see the file 'LICENCE' in this directory. 11 * For licensing information, see the file 'LICENCE' in this directory.
11 * 12 *
12 */ 13 */
diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h
index 7d1d72faa774..e471a9106fd9 100644
--- a/fs/jffs2/compr.h
+++ b/fs/jffs2/compr.h
@@ -3,6 +3,7 @@
3 * 3 *
4 * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>, 4 * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>,
5 * University of Szeged, Hungary 5 * University of Szeged, Hungary
6 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
6 * 7 *
7 * For licensing information, see the file 'LICENCE' in this directory. 8 * For licensing information, see the file 'LICENCE' in this directory.
8 * 9 *
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
index cd02acafde8a..ed25ae7c98eb 100644
--- a/fs/jffs2/compr_lzo.c
+++ b/fs/jffs2/compr_lzo.c
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2007 Nokia Corporation. All rights reserved. 4 * Copyright © 2007 Nokia Corporation. All rights reserved.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by Richard Purdie <rpurdie@openedhand.com> 7 * Created by Richard Purdie <rpurdie@openedhand.com>
7 * 8 *
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 546d1538d076..9696ad9ef5f7 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by Arjan van de Ven <arjanv@redhat.com> 7 * Created by Arjan van de Ven <arjanv@redhat.com>
7 * 8 *
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c
index 170d289ac785..a12b4f763373 100644
--- a/fs/jffs2/compr_rubin.c
+++ b/fs/jffs2/compr_rubin.c
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by Arjan van de Ven <arjanv@redhat.com> 7 * Created by Arjan van de Ven <arjanv@redhat.com>
7 * 8 *
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index b46661a42758..97fc45de6f81 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by David Woodhouse <dwmw2@infradead.org> 7 * Created by David Woodhouse <dwmw2@infradead.org>
7 * 8 *
diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c
index ec3538413926..e0b76c87a91a 100644
--- a/fs/jffs2/debug.c
+++ b/fs/jffs2/debug.c
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by David Woodhouse <dwmw2@infradead.org> 7 * Created by David Woodhouse <dwmw2@infradead.org>
7 * 8 *
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index a113ecc3bafe..c4f8eef5ca68 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by David Woodhouse <dwmw2@infradead.org> 7 * Created by David Woodhouse <dwmw2@infradead.org>
7 * 8 *
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 166062a68230..ed78a3cf3cb0 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by David Woodhouse <dwmw2@infradead.org> 7 * Created by David Woodhouse <dwmw2@infradead.org>
7 * 8 *
@@ -232,9 +233,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
232 return 0; 233 return 0;
233 234
234 fail: 235 fail:
235 make_bad_inode(inode); 236 iget_failed(inode);
236 unlock_new_inode(inode);
237 iput(inode);
238 jffs2_free_raw_inode(ri); 237 jffs2_free_raw_inode(ri);
239 return ret; 238 return ret;
240} 239}
@@ -454,9 +453,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
454 return 0; 453 return 0;
455 454
456 fail: 455 fail:
457 make_bad_inode(inode); 456 iget_failed(inode);
458 unlock_new_inode(inode);
459 iput(inode);
460 return ret; 457 return ret;
461} 458}
462 459
@@ -601,9 +598,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
601 return 0; 598 return 0;
602 599
603 fail: 600 fail:
604 make_bad_inode(inode); 601 iget_failed(inode);
605 unlock_new_inode(inode);
606 iput(inode);
607 return ret; 602 return ret;
608} 603}
609 604
@@ -778,9 +773,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
778 return 0; 773 return 0;
779 774
780 fail: 775 fail:
781 make_bad_inode(inode); 776 iget_failed(inode);
782 unlock_new_inode(inode);
783 iput(inode);
784 return ret; 777 return ret;
785} 778}
786 779
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 6286ad9b00f7..abac961f617b 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by David Woodhouse <dwmw2@infradead.org> 7 * Created by David Woodhouse <dwmw2@infradead.org>
7 * 8 *
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 813497024437..1c0a08d711aa 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by David Woodhouse <dwmw2@infradead.org> 7 * Created by David Woodhouse <dwmw2@infradead.org>
7 * 8 *
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 459d39d1ea0b..6b2964a19850 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by David Woodhouse <dwmw2@infradead.org> 7 * Created by David Woodhouse <dwmw2@infradead.org>
7 * 8 *
@@ -169,13 +170,13 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
169 mutex_unlock(&f->sem); 170 mutex_unlock(&f->sem);
170 jffs2_complete_reservation(c); 171 jffs2_complete_reservation(c);
171 172
172 /* We have to do the simple_setsize() without f->sem held, since 173 /* We have to do the truncate_setsize() without f->sem held, since
173 some pages may be locked and waiting for it in readpage(). 174 some pages may be locked and waiting for it in readpage().
174 We are protected from a simultaneous write() extending i_size 175 We are protected from a simultaneous write() extending i_size
175 back past iattr->ia_size, because do_truncate() holds the 176 back past iattr->ia_size, because do_truncate() holds the
176 generic inode semaphore. */ 177 generic inode semaphore. */
177 if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) { 178 if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) {
178 simple_setsize(inode, iattr->ia_size); 179 truncate_setsize(inode, iattr->ia_size);
179 inode->i_blocks = (inode->i_size + 511) >> 9; 180 inode->i_blocks = (inode->i_size + 511) >> 9;
180 } 181 }
181 182
@@ -225,7 +226,7 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf)
225} 226}
226 227
227 228
228void jffs2_clear_inode (struct inode *inode) 229void jffs2_evict_inode (struct inode *inode)
229{ 230{
230 /* We can forget about this inode for now - drop all 231 /* We can forget about this inode for now - drop all
231 * the nodelists associated with it, etc. 232 * the nodelists associated with it, etc.
@@ -233,7 +234,9 @@ void jffs2_clear_inode (struct inode *inode)
233 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); 234 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
234 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); 235 struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
235 236
236 D1(printk(KERN_DEBUG "jffs2_clear_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode)); 237 D1(printk(KERN_DEBUG "jffs2_evict_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode));
238 truncate_inode_pages(&inode->i_data, 0);
239 end_writeback(inode);
237 jffs2_do_clear_inode(c, f); 240 jffs2_do_clear_inode(c, f);
238} 241}
239 242
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index f5e96bd656e8..846a79452497 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by David Woodhouse <dwmw2@infradead.org> 7 * Created by David Woodhouse <dwmw2@infradead.org>
7 * 8 *
diff --git a/fs/jffs2/ioctl.c b/fs/jffs2/ioctl.c
index 9d41f43e47bb..859a598af020 100644
--- a/fs/jffs2/ioctl.c
+++ b/fs/jffs2/ioctl.c
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by David Woodhouse <dwmw2@infradead.org> 7 * Created by David Woodhouse <dwmw2@infradead.org>
7 * 8 *
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index c6923da98263..2e4a86763c07 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by David Woodhouse <dwmw2@infradead.org> 7 * Created by David Woodhouse <dwmw2@infradead.org>
7 * 8 *
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 85ef6dbb1be7..6784bc89add1 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -2,6 +2,7 @@
2 * JFFS2 -- Journalling Flash File System, Version 2. 2 * JFFS2 -- Journalling Flash File System, Version 2.
3 * 3 *
4 * Copyright © 2001-2007 Red Hat, Inc. 4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
5 * 6 *
6 * Created by David Woodhouse <dwmw2@infradead.org> 7 * Created by David Woodhouse <dwmw2@infradead.org>
7 * 8 *
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index a881a42f19e3..523a91691052 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -24,7 +24,6 @@
24#ifdef __ECOS 24#ifdef __ECOS
25#include "os-ecos.h" 25#include "os-ecos.h"
26#else 26#else
27#include <linux/mtd/compatmac.h> /* For compatibility with older kernels */
28#include "os-linux.h" 27#include "os-linux.h"
29#endif 28#endif
30 29
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 4791aacf3084..00bae7cc2e48 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -171,7 +171,7 @@ extern const struct inode_operations jffs2_symlink_inode_operations;
171int jffs2_setattr (struct dentry *, struct iattr *); 171int jffs2_setattr (struct dentry *, struct iattr *);
172int jffs2_do_setattr (struct inode *, struct iattr *); 172int jffs2_do_setattr (struct inode *, struct iattr *);
173struct inode *jffs2_iget(struct super_block *, unsigned long); 173struct inode *jffs2_iget(struct super_block *, unsigned long);
174void jffs2_clear_inode (struct inode *); 174void jffs2_evict_inode (struct inode *);
175void jffs2_dirty_inode(struct inode *inode); 175void jffs2_dirty_inode(struct inode *inode);
176struct inode *jffs2_new_inode (struct inode *dir_i, int mode, 176struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
177 struct jffs2_raw_inode *ri); 177 struct jffs2_raw_inode *ri);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 511e2d609d12..662bba099501 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -135,7 +135,7 @@ static const struct super_operations jffs2_super_operations =
135 .write_super = jffs2_write_super, 135 .write_super = jffs2_write_super,
136 .statfs = jffs2_statfs, 136 .statfs = jffs2_statfs,
137 .remount_fs = jffs2_remount_fs, 137 .remount_fs = jffs2_remount_fs,
138 .clear_inode = jffs2_clear_inode, 138 .evict_inode = jffs2_evict_inode,
139 .dirty_inode = jffs2_dirty_inode, 139 .dirty_inode = jffs2_dirty_inode,
140 .sync_fs = jffs2_sync_fs, 140 .sync_fs = jffs2_sync_fs,
141}; 141};
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index d258e261bdc7..9b572ca40a49 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -588,7 +588,7 @@ static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *re
588 588
589void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic) 589void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
590{ 590{
591 /* It's called from jffs2_clear_inode() on inode removing. 591 /* It's called from jffs2_evict_inode() on inode removing.
592 When an inode with XATTR is removed, those XATTRs must be removed. */ 592 When an inode with XATTR is removed, those XATTRs must be removed. */
593 struct jffs2_xattr_ref *ref, *_ref; 593 struct jffs2_xattr_ref *ref, *_ref;
594 594
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 127263cc8657..c5ce6c1d1ff4 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -17,6 +17,7 @@
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19
20#include <linux/mm.h>
20#include <linux/fs.h> 21#include <linux/fs.h>
21#include <linux/quotaops.h> 22#include <linux/quotaops.h>
22#include "jfs_incore.h" 23#include "jfs_incore.h"
@@ -107,11 +108,18 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
107 return rc; 108 return rc;
108 } 109 }
109 110
110 rc = inode_setattr(inode, iattr); 111 if ((iattr->ia_valid & ATTR_SIZE) &&
112 iattr->ia_size != i_size_read(inode)) {
113 rc = vmtruncate(inode, iattr->ia_size);
114 if (rc)
115 return rc;
116 }
111 117
112 if (!rc && (iattr->ia_valid & ATTR_MODE)) 118 setattr_copy(inode, iattr);
113 rc = jfs_acl_chmod(inode); 119 mark_inode_dirty(inode);
114 120
121 if (iattr->ia_valid & ATTR_MODE)
122 rc = jfs_acl_chmod(inode);
115 return rc; 123 return rc;
116} 124}
117 125
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index ed9ba6fe04f5..9978803ceedc 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -145,31 +145,32 @@ int jfs_write_inode(struct inode *inode, struct writeback_control *wbc)
145 return 0; 145 return 0;
146} 146}
147 147
148void jfs_delete_inode(struct inode *inode) 148void jfs_evict_inode(struct inode *inode)
149{ 149{
150 jfs_info("In jfs_delete_inode, inode = 0x%p", inode); 150 jfs_info("In jfs_evict_inode, inode = 0x%p", inode);
151 151
152 if (!is_bad_inode(inode)) 152 if (!inode->i_nlink && !is_bad_inode(inode)) {
153 dquot_initialize(inode); 153 dquot_initialize(inode);
154 154
155 if (!is_bad_inode(inode) && 155 if (JFS_IP(inode)->fileset == FILESYSTEM_I) {
156 (JFS_IP(inode)->fileset == FILESYSTEM_I)) { 156 truncate_inode_pages(&inode->i_data, 0);
157 truncate_inode_pages(&inode->i_data, 0);
158 157
159 if (test_cflag(COMMIT_Freewmap, inode)) 158 if (test_cflag(COMMIT_Freewmap, inode))
160 jfs_free_zero_link(inode); 159 jfs_free_zero_link(inode);
161 160
162 diFree(inode); 161 diFree(inode);
163 162
164 /* 163 /*
165 * Free the inode from the quota allocation. 164 * Free the inode from the quota allocation.
166 */ 165 */
167 dquot_initialize(inode); 166 dquot_initialize(inode);
168 dquot_free_inode(inode); 167 dquot_free_inode(inode);
169 dquot_drop(inode); 168 }
169 } else {
170 truncate_inode_pages(&inode->i_data, 0);
170 } 171 }
171 172 end_writeback(inode);
172 clear_inode(inode); 173 dquot_drop(inode);
173} 174}
174 175
175void jfs_dirty_inode(struct inode *inode) 176void jfs_dirty_inode(struct inode *inode)
@@ -303,8 +304,17 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping,
303 loff_t pos, unsigned len, unsigned flags, 304 loff_t pos, unsigned len, unsigned flags,
304 struct page **pagep, void **fsdata) 305 struct page **pagep, void **fsdata)
305{ 306{
306 return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 307 int ret;
308
309 ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata,
307 jfs_get_block); 310 jfs_get_block);
311 if (unlikely(ret)) {
312 loff_t isize = mapping->host->i_size;
313 if (pos + len > isize)
314 vmtruncate(mapping->host, isize);
315 }
316
317 return ret;
308} 318}
309 319
310static sector_t jfs_bmap(struct address_space *mapping, sector_t block) 320static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
@@ -317,9 +327,24 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
317{ 327{
318 struct file *file = iocb->ki_filp; 328 struct file *file = iocb->ki_filp;
319 struct inode *inode = file->f_mapping->host; 329 struct inode *inode = file->f_mapping->host;
330 ssize_t ret;
320 331
321 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 332 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
322 offset, nr_segs, jfs_get_block, NULL); 333 offset, nr_segs, jfs_get_block, NULL);
334
335 /*
336 * In case of error extending write may have instantiated a few
337 * blocks outside i_size. Trim these off again.
338 */
339 if (unlikely((rw & WRITE) && ret < 0)) {
340 loff_t isize = i_size_read(inode);
341 loff_t end = offset + iov_length(iov, nr_segs);
342
343 if (end > isize)
344 vmtruncate(inode, isize);
345 }
346
347 return ret;
323} 348}
324 349
325const struct address_space_operations jfs_aops = { 350const struct address_space_operations jfs_aops = {
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 11042b1f44b5..155e91eff07d 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -27,7 +27,7 @@ extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long);
27extern struct inode *jfs_iget(struct super_block *, unsigned long); 27extern struct inode *jfs_iget(struct super_block *, unsigned long);
28extern int jfs_commit_inode(struct inode *, int); 28extern int jfs_commit_inode(struct inode *, int);
29extern int jfs_write_inode(struct inode *, struct writeback_control *); 29extern int jfs_write_inode(struct inode *, struct writeback_control *);
30extern void jfs_delete_inode(struct inode *); 30extern void jfs_evict_inode(struct inode *);
31extern void jfs_dirty_inode(struct inode *); 31extern void jfs_dirty_inode(struct inode *);
32extern void jfs_truncate(struct inode *); 32extern void jfs_truncate(struct inode *);
33extern void jfs_truncate_nolock(struct inode *, loff_t); 33extern void jfs_truncate_nolock(struct inode *, loff_t);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index b38f96bef829..ec8c3e4baca3 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -132,11 +132,6 @@ static void jfs_destroy_inode(struct inode *inode)
132 kmem_cache_free(jfs_inode_cachep, ji); 132 kmem_cache_free(jfs_inode_cachep, ji);
133} 133}
134 134
135static void jfs_clear_inode(struct inode *inode)
136{
137 dquot_drop(inode);
138}
139
140static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) 135static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
141{ 136{
142 struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb); 137 struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb);
@@ -765,8 +760,7 @@ static const struct super_operations jfs_super_operations = {
765 .destroy_inode = jfs_destroy_inode, 760 .destroy_inode = jfs_destroy_inode,
766 .dirty_inode = jfs_dirty_inode, 761 .dirty_inode = jfs_dirty_inode,
767 .write_inode = jfs_write_inode, 762 .write_inode = jfs_write_inode,
768 .delete_inode = jfs_delete_inode, 763 .evict_inode = jfs_evict_inode,
769 .clear_inode = jfs_clear_inode,
770 .put_super = jfs_put_super, 764 .put_super = jfs_put_super,
771 .sync_fs = jfs_sync_fs, 765 .sync_fs = jfs_sync_fs,
772 .freeze_fs = jfs_freeze, 766 .freeze_fs = jfs_freeze,
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index fa96bbb26343..2d7f165d0f1d 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -86,46 +86,25 @@ struct ea_buffer {
86#define EA_MALLOC 0x0008 86#define EA_MALLOC 0x0008
87 87
88 88
89static int is_known_namespace(const char *name)
90{
91 if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) &&
92 strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
93 strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
94 strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
95 return false;
96
97 return true;
98}
99
89/* 100/*
90 * These three routines are used to recognize on-disk extended attributes 101 * These three routines are used to recognize on-disk extended attributes
91 * that are in a recognized namespace. If the attribute is not recognized, 102 * that are in a recognized namespace. If the attribute is not recognized,
92 * "os2." is prepended to the name 103 * "os2." is prepended to the name
93 */ 104 */
94static inline int is_os2_xattr(struct jfs_ea *ea) 105static int is_os2_xattr(struct jfs_ea *ea)
95{ 106{
96 /* 107 return !is_known_namespace(ea->name);
97 * Check for "system."
98 */
99 if ((ea->namelen >= XATTR_SYSTEM_PREFIX_LEN) &&
100 !strncmp(ea->name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
101 return false;
102 /*
103 * Check for "user."
104 */
105 if ((ea->namelen >= XATTR_USER_PREFIX_LEN) &&
106 !strncmp(ea->name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
107 return false;
108 /*
109 * Check for "security."
110 */
111 if ((ea->namelen >= XATTR_SECURITY_PREFIX_LEN) &&
112 !strncmp(ea->name, XATTR_SECURITY_PREFIX,
113 XATTR_SECURITY_PREFIX_LEN))
114 return false;
115 /*
116 * Check for "trusted."
117 */
118 if ((ea->namelen >= XATTR_TRUSTED_PREFIX_LEN) &&
119 !strncmp(ea->name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
120 return false;
121 /*
122 * Add any other valid namespace prefixes here
123 */
124
125 /*
126 * We assume it's OS/2's flat namespace
127 */
128 return true;
129} 108}
130 109
131static inline int name_size(struct jfs_ea *ea) 110static inline int name_size(struct jfs_ea *ea)
@@ -764,13 +743,23 @@ static int can_set_xattr(struct inode *inode, const char *name,
764 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) 743 if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
765 return can_set_system_xattr(inode, name, value, value_len); 744 return can_set_system_xattr(inode, name, value, value_len);
766 745
746 if (!strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN)) {
747 /*
748 * This makes sure that we aren't trying to set an
749 * attribute in a different namespace by prefixing it
750 * with "os2."
751 */
752 if (is_known_namespace(name + XATTR_OS2_PREFIX_LEN))
753 return -EOPNOTSUPP;
754 return 0;
755 }
756
767 /* 757 /*
768 * Don't allow setting an attribute in an unknown namespace. 758 * Don't allow setting an attribute in an unknown namespace.
769 */ 759 */
770 if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) && 760 if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
771 strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && 761 strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
772 strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && 762 strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
773 strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))
774 return -EOPNOTSUPP; 763 return -EOPNOTSUPP;
775 764
776 return 0; 765 return 0;
@@ -952,19 +941,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
952 int xattr_size; 941 int xattr_size;
953 ssize_t size; 942 ssize_t size;
954 int namelen = strlen(name); 943 int namelen = strlen(name);
955 char *os2name = NULL;
956 char *value; 944 char *value;
957 945
958 if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
959 os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
960 GFP_KERNEL);
961 if (!os2name)
962 return -ENOMEM;
963 strcpy(os2name, name + XATTR_OS2_PREFIX_LEN);
964 name = os2name;
965 namelen -= XATTR_OS2_PREFIX_LEN;
966 }
967
968 down_read(&JFS_IP(inode)->xattr_sem); 946 down_read(&JFS_IP(inode)->xattr_sem);
969 947
970 xattr_size = ea_get(inode, &ea_buf, 0); 948 xattr_size = ea_get(inode, &ea_buf, 0);
@@ -1002,8 +980,6 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
1002 out: 980 out:
1003 up_read(&JFS_IP(inode)->xattr_sem); 981 up_read(&JFS_IP(inode)->xattr_sem);
1004 982
1005 kfree(os2name);
1006
1007 return size; 983 return size;
1008} 984}
1009 985
@@ -1012,6 +988,19 @@ ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data,
1012{ 988{
1013 int err; 989 int err;
1014 990
991 if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
992 /*
993 * skip past "os2." prefix
994 */
995 name += XATTR_OS2_PREFIX_LEN;
996 /*
997 * Don't allow retrieving properly prefixed attributes
998 * by prepending them with "os2."
999 */
1000 if (is_known_namespace(name))
1001 return -EOPNOTSUPP;
1002 }
1003
1015 err = __jfs_getxattr(dentry->d_inode, name, data, buf_size); 1004 err = __jfs_getxattr(dentry->d_inode, name, data, buf_size);
1016 1005
1017 return err; 1006 return err;
diff --git a/fs/libfs.c b/fs/libfs.c
index dcaf972cbf1b..0a9da95317f7 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -327,77 +327,35 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
327} 327}
328 328
329/** 329/**
330 * simple_setsize - handle core mm and vfs requirements for file size change 330 * simple_setattr - setattr for simple filesystem
331 * @inode: inode
332 * @newsize: new file size
333 *
334 * Returns 0 on success, -error on failure.
335 *
336 * simple_setsize must be called with inode_mutex held.
337 *
338 * simple_setsize will check that the requested new size is OK (see
339 * inode_newsize_ok), and then will perform the necessary i_size update
340 * and pagecache truncation (if necessary). It will be typically be called
341 * from the filesystem's setattr function when ATTR_SIZE is passed in.
342 *
343 * The inode itself must have correct permissions and attributes to allow
344 * i_size to be changed, this function then just checks that the new size
345 * requested is valid.
346 *
347 * In the case of simple in-memory filesystems with inodes stored solely
348 * in the inode cache, and file data in the pagecache, nothing more needs
349 * to be done to satisfy a truncate request. Filesystems with on-disk
350 * blocks for example will need to free them in the case of truncate, in
351 * that case it may be easier not to use simple_setsize (but each of its
352 * components will likely be required at some point to update pagecache
353 * and inode etc).
354 */
355int simple_setsize(struct inode *inode, loff_t newsize)
356{
357 loff_t oldsize;
358 int error;
359
360 error = inode_newsize_ok(inode, newsize);
361 if (error)
362 return error;
363
364 oldsize = inode->i_size;
365 i_size_write(inode, newsize);
366 truncate_pagecache(inode, oldsize, newsize);
367
368 return error;
369}
370EXPORT_SYMBOL(simple_setsize);
371
372/**
373 * simple_setattr - setattr for simple in-memory filesystem
374 * @dentry: dentry 331 * @dentry: dentry
375 * @iattr: iattr structure 332 * @iattr: iattr structure
376 * 333 *
377 * Returns 0 on success, -error on failure. 334 * Returns 0 on success, -error on failure.
378 * 335 *
379 * simple_setattr implements setattr for an in-memory filesystem which 336 * simple_setattr is a simple ->setattr implementation without a proper
380 * does not store its own file data or metadata (eg. uses the page cache 337 * implementation of size changes.
381 * and inode cache as its data store). 338 *
339 * It can either be used for in-memory filesystems or special files
340 * on simple regular filesystems. Anything that needs to change on-disk
341 * or wire state on size changes needs its own setattr method.
382 */ 342 */
383int simple_setattr(struct dentry *dentry, struct iattr *iattr) 343int simple_setattr(struct dentry *dentry, struct iattr *iattr)
384{ 344{
385 struct inode *inode = dentry->d_inode; 345 struct inode *inode = dentry->d_inode;
386 int error; 346 int error;
387 347
348 WARN_ON_ONCE(inode->i_op->truncate);
349
388 error = inode_change_ok(inode, iattr); 350 error = inode_change_ok(inode, iattr);
389 if (error) 351 if (error)
390 return error; 352 return error;
391 353
392 if (iattr->ia_valid & ATTR_SIZE) { 354 if (iattr->ia_valid & ATTR_SIZE)
393 error = simple_setsize(inode, iattr->ia_size); 355 truncate_setsize(inode, iattr->ia_size);
394 if (error) 356 setattr_copy(inode, iattr);
395 return error; 357 mark_inode_dirty(inode);
396 } 358 return 0;
397
398 generic_setattr(inode, iattr);
399
400 return error;
401} 359}
402EXPORT_SYMBOL(simple_setattr); 360EXPORT_SYMBOL(simple_setattr);
403 361
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 72d1893ddd36..675cc49197fe 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -434,8 +434,11 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry,
434 int ret; 434 int ret;
435 435
436 ta = kzalloc(sizeof(*ta), GFP_KERNEL); 436 ta = kzalloc(sizeof(*ta), GFP_KERNEL);
437 if (!ta) 437 if (!ta) {
438 inode->i_nlink--;
439 iput(inode);
438 return -ENOMEM; 440 return -ENOMEM;
441 }
439 442
440 ta->state = CREATE_1; 443 ta->state = CREATE_1;
441 ta->ino = inode->i_ino; 444 ta->ino = inode->i_ino;
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index abe1cafbd4c2..4dd0f7c06e39 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -232,15 +232,19 @@ static int logfs_setattr(struct dentry *dentry, struct iattr *attr)
232 struct inode *inode = dentry->d_inode; 232 struct inode *inode = dentry->d_inode;
233 int err = 0; 233 int err = 0;
234 234
235 if (attr->ia_valid & ATTR_SIZE) 235 err = inode_change_ok(inode, attr);
236 if (err)
237 return err;
238
239 if (attr->ia_valid & ATTR_SIZE) {
236 err = logfs_truncate(inode, attr->ia_size); 240 err = logfs_truncate(inode, attr->ia_size);
237 attr->ia_valid &= ~ATTR_SIZE; 241 if (err)
242 return err;
243 }
238 244
239 if (!err) 245 setattr_copy(inode, attr);
240 err = inode_change_ok(inode, attr); 246 mark_inode_dirty(inode);
241 if (!err) 247 return 0;
242 err = inode_setattr(inode, attr);
243 return err;
244} 248}
245 249
246const struct inode_operations logfs_reg_iops = { 250const struct inode_operations logfs_reg_iops = {
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index f602e230e162..d8c71ece098f 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -235,33 +235,21 @@ static struct inode *logfs_alloc_inode(struct super_block *sb)
235 * purpose is to create a new inode that will not trigger the warning if such 235 * purpose is to create a new inode that will not trigger the warning if such
236 * an inode is still in use. An ugly hack, no doubt. Suggections for 236 * an inode is still in use. An ugly hack, no doubt. Suggections for
237 * improvement are welcome. 237 * improvement are welcome.
238 *
239 * AV: that's what ->put_super() is for...
238 */ 240 */
239struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino) 241struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino)
240{ 242{
241 struct inode *inode; 243 struct inode *inode;
242 244
243 inode = logfs_alloc_inode(sb); 245 inode = new_inode(sb);
244 if (!inode) 246 if (!inode)
245 return ERR_PTR(-ENOMEM); 247 return ERR_PTR(-ENOMEM);
246 248
247 inode->i_mode = S_IFREG; 249 inode->i_mode = S_IFREG;
248 inode->i_ino = ino; 250 inode->i_ino = ino;
249 inode->i_sb = sb; 251 inode->i_data.a_ops = &logfs_reg_aops;
250 252 mapping_set_gfp_mask(&inode->i_data, GFP_NOFS);
251 /* This is a blatant copy of alloc_inode code. We'd need alloc_inode
252 * to be nonstatic, alas. */
253 {
254 struct address_space * const mapping = &inode->i_data;
255
256 mapping->a_ops = &logfs_reg_aops;
257 mapping->host = inode;
258 mapping->flags = 0;
259 mapping_set_gfp_mask(mapping, GFP_NOFS);
260 mapping->assoc_mapping = NULL;
261 mapping->backing_dev_info = &default_backing_dev_info;
262 inode->i_mapping = mapping;
263 inode->i_nlink = 1;
264 }
265 253
266 return inode; 254 return inode;
267} 255}
@@ -277,7 +265,7 @@ struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino)
277 265
278 err = logfs_read_inode(inode); 266 err = logfs_read_inode(inode);
279 if (err) { 267 if (err) {
280 destroy_meta_inode(inode); 268 iput(inode);
281 return ERR_PTR(err); 269 return ERR_PTR(err);
282 } 270 }
283 logfs_inode_setops(inode); 271 logfs_inode_setops(inode);
@@ -298,18 +286,8 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc)
298 return ret; 286 return ret;
299} 287}
300 288
301void destroy_meta_inode(struct inode *inode)
302{
303 if (inode) {
304 if (inode->i_data.nrpages)
305 truncate_inode_pages(&inode->i_data, 0);
306 logfs_clear_inode(inode);
307 kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
308 }
309}
310
311/* called with inode_lock held */ 289/* called with inode_lock held */
312static void logfs_drop_inode(struct inode *inode) 290static int logfs_drop_inode(struct inode *inode)
313{ 291{
314 struct logfs_super *super = logfs_super(inode->i_sb); 292 struct logfs_super *super = logfs_super(inode->i_sb);
315 struct logfs_inode *li = logfs_inode(inode); 293 struct logfs_inode *li = logfs_inode(inode);
@@ -317,7 +295,7 @@ static void logfs_drop_inode(struct inode *inode)
317 spin_lock(&logfs_inode_lock); 295 spin_lock(&logfs_inode_lock);
318 list_move(&li->li_freeing_list, &super->s_freeing_list); 296 list_move(&li->li_freeing_list, &super->s_freeing_list);
319 spin_unlock(&logfs_inode_lock); 297 spin_unlock(&logfs_inode_lock);
320 generic_drop_inode(inode); 298 return generic_drop_inode(inode);
321} 299}
322 300
323static void logfs_set_ino_generation(struct super_block *sb, 301static void logfs_set_ino_generation(struct super_block *sb,
@@ -384,12 +362,21 @@ static int logfs_sync_fs(struct super_block *sb, int wait)
384 return 0; 362 return 0;
385} 363}
386 364
365static void logfs_put_super(struct super_block *sb)
366{
367 struct logfs_super *super = logfs_super(sb);
368 /* kill the meta-inodes */
369 iput(super->s_master_inode);
370 iput(super->s_segfile_inode);
371 iput(super->s_mapping_inode);
372}
373
387const struct super_operations logfs_super_operations = { 374const struct super_operations logfs_super_operations = {
388 .alloc_inode = logfs_alloc_inode, 375 .alloc_inode = logfs_alloc_inode,
389 .clear_inode = logfs_clear_inode,
390 .delete_inode = logfs_delete_inode,
391 .destroy_inode = logfs_destroy_inode, 376 .destroy_inode = logfs_destroy_inode,
377 .evict_inode = logfs_evict_inode,
392 .drop_inode = logfs_drop_inode, 378 .drop_inode = logfs_drop_inode,
379 .put_super = logfs_put_super,
393 .write_inode = logfs_write_inode, 380 .write_inode = logfs_write_inode,
394 .statfs = logfs_statfs, 381 .statfs = logfs_statfs,
395 .sync_fs = logfs_sync_fs, 382 .sync_fs = logfs_sync_fs,
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 4b0e0616b357..f46ee8b0e135 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -889,8 +889,6 @@ void logfs_cleanup_journal(struct super_block *sb)
889 struct logfs_super *super = logfs_super(sb); 889 struct logfs_super *super = logfs_super(sb);
890 890
891 btree_grim_visitor32(&super->s_reserved_segments, 0, NULL); 891 btree_grim_visitor32(&super->s_reserved_segments, 0, NULL);
892 destroy_meta_inode(super->s_master_inode);
893 super->s_master_inode = NULL;
894 892
895 kfree(super->s_compressed_je); 893 kfree(super->s_compressed_je);
896 kfree(super->s_je); 894 kfree(super->s_je);
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index c838c4d72111..5e3b72077951 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -525,13 +525,11 @@ struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino);
525struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino); 525struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino);
526int logfs_init_inode_cache(void); 526int logfs_init_inode_cache(void);
527void logfs_destroy_inode_cache(void); 527void logfs_destroy_inode_cache(void);
528void destroy_meta_inode(struct inode *inode);
529void logfs_set_blocks(struct inode *inode, u64 no); 528void logfs_set_blocks(struct inode *inode, u64 no);
530/* these logically belong into inode.c but actually reside in readwrite.c */ 529/* these logically belong into inode.c but actually reside in readwrite.c */
531int logfs_read_inode(struct inode *inode); 530int logfs_read_inode(struct inode *inode);
532int __logfs_write_inode(struct inode *inode, long flags); 531int __logfs_write_inode(struct inode *inode, long flags);
533void logfs_delete_inode(struct inode *inode); 532void logfs_evict_inode(struct inode *inode);
534void logfs_clear_inode(struct inode *inode);
535 533
536/* journal.c */ 534/* journal.c */
537void logfs_write_anchor(struct super_block *sb); 535void logfs_write_anchor(struct super_block *sb);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 0718d112a1a5..6127baf0e188 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1972,31 +1972,6 @@ static struct page *inode_to_page(struct inode *inode)
1972 return page; 1972 return page;
1973} 1973}
1974 1974
1975/* Cheaper version of write_inode. All changes are concealed in
1976 * aliases, which are moved back. No write to the medium happens.
1977 */
1978void logfs_clear_inode(struct inode *inode)
1979{
1980 struct super_block *sb = inode->i_sb;
1981 struct logfs_inode *li = logfs_inode(inode);
1982 struct logfs_block *block = li->li_block;
1983 struct page *page;
1984
1985 /* Only deleted files may be dirty at this point */
1986 BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink);
1987 if (!block)
1988 return;
1989 if ((logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN)) {
1990 block->ops->free_block(inode->i_sb, block);
1991 return;
1992 }
1993
1994 BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
1995 page = inode_to_page(inode);
1996 BUG_ON(!page); /* FIXME: Use emergency page */
1997 logfs_put_write_page(page);
1998}
1999
2000static int do_write_inode(struct inode *inode) 1975static int do_write_inode(struct inode *inode)
2001{ 1976{
2002 struct super_block *sb = inode->i_sb; 1977 struct super_block *sb = inode->i_sb;
@@ -2164,18 +2139,40 @@ static int do_delete_inode(struct inode *inode)
2164 * ZOMBIE inodes have already been deleted before and should remain dead, 2139 * ZOMBIE inodes have already been deleted before and should remain dead,
2165 * if it weren't for valid checking. No need to kill them again here. 2140 * if it weren't for valid checking. No need to kill them again here.
2166 */ 2141 */
2167void logfs_delete_inode(struct inode *inode) 2142void logfs_evict_inode(struct inode *inode)
2168{ 2143{
2144 struct super_block *sb = inode->i_sb;
2169 struct logfs_inode *li = logfs_inode(inode); 2145 struct logfs_inode *li = logfs_inode(inode);
2146 struct logfs_block *block = li->li_block;
2147 struct page *page;
2170 2148
2171 if (!(li->li_flags & LOGFS_IF_ZOMBIE)) { 2149 if (!inode->i_nlink) {
2172 li->li_flags |= LOGFS_IF_ZOMBIE; 2150 if (!(li->li_flags & LOGFS_IF_ZOMBIE)) {
2173 if (i_size_read(inode) > 0) 2151 li->li_flags |= LOGFS_IF_ZOMBIE;
2174 logfs_truncate(inode, 0); 2152 if (i_size_read(inode) > 0)
2175 do_delete_inode(inode); 2153 logfs_truncate(inode, 0);
2154 do_delete_inode(inode);
2155 }
2176 } 2156 }
2177 truncate_inode_pages(&inode->i_data, 0); 2157 truncate_inode_pages(&inode->i_data, 0);
2178 clear_inode(inode); 2158 end_writeback(inode);
2159
2160 /* Cheaper version of write_inode. All changes are concealed in
2161 * aliases, which are moved back. No write to the medium happens.
2162 */
2163 /* Only deleted files may be dirty at this point */
2164 BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink);
2165 if (!block)
2166 return;
2167 if ((logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN)) {
2168 block->ops->free_block(inode->i_sb, block);
2169 return;
2170 }
2171
2172 BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
2173 page = inode_to_page(inode);
2174 BUG_ON(!page); /* FIXME: Use emergency page */
2175 logfs_put_write_page(page);
2179} 2176}
2180 2177
2181void btree_write_block(struct logfs_block *block) 2178void btree_write_block(struct logfs_block *block)
@@ -2272,7 +2269,6 @@ void logfs_cleanup_rw(struct super_block *sb)
2272{ 2269{
2273 struct logfs_super *super = logfs_super(sb); 2270 struct logfs_super *super = logfs_super(sb);
2274 2271
2275 destroy_meta_inode(super->s_segfile_inode);
2276 logfs_mempool_destroy(super->s_block_pool); 2272 logfs_mempool_destroy(super->s_block_pool);
2277 logfs_mempool_destroy(super->s_shadow_pool); 2273 logfs_mempool_destroy(super->s_shadow_pool);
2278} 2274}
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index a9657afb70ad..9d5187353255 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -929,5 +929,4 @@ void logfs_cleanup_areas(struct super_block *sb)
929 for_each_area(i) 929 for_each_area(i)
930 free_area(super->s_area[i]); 930 free_area(super->s_area[i]);
931 free_area(super->s_journal_area); 931 free_area(super->s_journal_area);
932 destroy_meta_inode(super->s_mapping_inode);
933} 932}
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index d651e10a1e9c..5336155c5d81 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -342,24 +342,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
342 goto fail; 342 goto fail;
343 } 343 }
344 344
345 /* at that point we know that ->put_super() will be called */
345 super->s_erase_page = alloc_pages(GFP_KERNEL, 0); 346 super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
346 if (!super->s_erase_page) 347 if (!super->s_erase_page)
347 goto fail; 348 return -ENOMEM;
348 memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE); 349 memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
349 350
350 /* FIXME: check for read-only mounts */ 351 /* FIXME: check for read-only mounts */
351 err = logfs_make_writeable(sb); 352 err = logfs_make_writeable(sb);
352 if (err) 353 if (err) {
353 goto fail1; 354 __free_page(super->s_erase_page);
355 return err;
356 }
354 357
355 log_super("LogFS: Finished mounting\n"); 358 log_super("LogFS: Finished mounting\n");
356 simple_set_mnt(mnt, sb); 359 simple_set_mnt(mnt, sb);
357 return 0; 360 return 0;
358 361
359fail1:
360 __free_page(super->s_erase_page);
361fail: 362fail:
362 iput(logfs_super(sb)->s_master_inode); 363 iput(super->s_master_inode);
364 iput(super->s_segfile_inode);
365 iput(super->s_mapping_inode);
363 return -EIO; 366 return -EIO;
364} 367}
365 368
@@ -580,10 +583,14 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
580 sb->s_flags |= MS_ACTIVE; 583 sb->s_flags |= MS_ACTIVE;
581 err = logfs_get_sb_final(sb, mnt); 584 err = logfs_get_sb_final(sb, mnt);
582 if (err) 585 if (err)
583 goto err1; 586 deactivate_locked_super(sb);
584 return 0; 587 return err;
585 588
586err1: 589err1:
590 /* no ->s_root, no ->put_super() */
591 iput(super->s_master_inode);
592 iput(super->s_segfile_inode);
593 iput(super->s_mapping_inode);
587 deactivate_locked_super(sb); 594 deactivate_locked_super(sb);
588 return err; 595 return err;
589err0: 596err0:
diff --git a/fs/mbcache.c b/fs/mbcache.c
index e28f21b95344..cf4e6cdfd15b 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -79,15 +79,11 @@ EXPORT_SYMBOL(mb_cache_entry_find_next);
79struct mb_cache { 79struct mb_cache {
80 struct list_head c_cache_list; 80 struct list_head c_cache_list;
81 const char *c_name; 81 const char *c_name;
82 struct mb_cache_op c_op;
83 atomic_t c_entry_count; 82 atomic_t c_entry_count;
84 int c_bucket_bits; 83 int c_bucket_bits;
85#ifndef MB_CACHE_INDEXES_COUNT 84 struct kmem_cache *c_entry_cache;
86 int c_indexes_count;
87#endif
88 struct kmem_cache *c_entry_cache;
89 struct list_head *c_block_hash; 85 struct list_head *c_block_hash;
90 struct list_head *c_indexes_hash[0]; 86 struct list_head *c_index_hash;
91}; 87};
92 88
93 89
@@ -101,16 +97,6 @@ static LIST_HEAD(mb_cache_list);
101static LIST_HEAD(mb_cache_lru_list); 97static LIST_HEAD(mb_cache_lru_list);
102static DEFINE_SPINLOCK(mb_cache_spinlock); 98static DEFINE_SPINLOCK(mb_cache_spinlock);
103 99
104static inline int
105mb_cache_indexes(struct mb_cache *cache)
106{
107#ifdef MB_CACHE_INDEXES_COUNT
108 return MB_CACHE_INDEXES_COUNT;
109#else
110 return cache->c_indexes_count;
111#endif
112}
113
114/* 100/*
115 * What the mbcache registers as to get shrunk dynamically. 101 * What the mbcache registers as to get shrunk dynamically.
116 */ 102 */
@@ -132,12 +118,9 @@ __mb_cache_entry_is_hashed(struct mb_cache_entry *ce)
132static void 118static void
133__mb_cache_entry_unhash(struct mb_cache_entry *ce) 119__mb_cache_entry_unhash(struct mb_cache_entry *ce)
134{ 120{
135 int n;
136
137 if (__mb_cache_entry_is_hashed(ce)) { 121 if (__mb_cache_entry_is_hashed(ce)) {
138 list_del_init(&ce->e_block_list); 122 list_del_init(&ce->e_block_list);
139 for (n=0; n<mb_cache_indexes(ce->e_cache); n++) 123 list_del(&ce->e_index.o_list);
140 list_del(&ce->e_indexes[n].o_list);
141 } 124 }
142} 125}
143 126
@@ -148,16 +131,8 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
148 struct mb_cache *cache = ce->e_cache; 131 struct mb_cache *cache = ce->e_cache;
149 132
150 mb_assert(!(ce->e_used || ce->e_queued)); 133 mb_assert(!(ce->e_used || ce->e_queued));
151 if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) { 134 kmem_cache_free(cache->c_entry_cache, ce);
152 /* free failed -- put back on the lru list 135 atomic_dec(&cache->c_entry_count);
153 for freeing later. */
154 spin_lock(&mb_cache_spinlock);
155 list_add(&ce->e_lru_list, &mb_cache_lru_list);
156 spin_unlock(&mb_cache_spinlock);
157 } else {
158 kmem_cache_free(cache->c_entry_cache, ce);
159 atomic_dec(&cache->c_entry_count);
160 }
161} 136}
162 137
163 138
@@ -201,22 +176,12 @@ static int
201mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 176mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
202{ 177{
203 LIST_HEAD(free_list); 178 LIST_HEAD(free_list);
204 struct list_head *l, *ltmp; 179 struct mb_cache *cache;
180 struct mb_cache_entry *entry, *tmp;
205 int count = 0; 181 int count = 0;
206 182
207 spin_lock(&mb_cache_spinlock);
208 list_for_each(l, &mb_cache_list) {
209 struct mb_cache *cache =
210 list_entry(l, struct mb_cache, c_cache_list);
211 mb_debug("cache %s (%d)", cache->c_name,
212 atomic_read(&cache->c_entry_count));
213 count += atomic_read(&cache->c_entry_count);
214 }
215 mb_debug("trying to free %d entries", nr_to_scan); 183 mb_debug("trying to free %d entries", nr_to_scan);
216 if (nr_to_scan == 0) { 184 spin_lock(&mb_cache_spinlock);
217 spin_unlock(&mb_cache_spinlock);
218 goto out;
219 }
220 while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { 185 while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) {
221 struct mb_cache_entry *ce = 186 struct mb_cache_entry *ce =
222 list_entry(mb_cache_lru_list.next, 187 list_entry(mb_cache_lru_list.next,
@@ -224,12 +189,15 @@ mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
224 list_move_tail(&ce->e_lru_list, &free_list); 189 list_move_tail(&ce->e_lru_list, &free_list);
225 __mb_cache_entry_unhash(ce); 190 __mb_cache_entry_unhash(ce);
226 } 191 }
192 list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
193 mb_debug("cache %s (%d)", cache->c_name,
194 atomic_read(&cache->c_entry_count));
195 count += atomic_read(&cache->c_entry_count);
196 }
227 spin_unlock(&mb_cache_spinlock); 197 spin_unlock(&mb_cache_spinlock);
228 list_for_each_safe(l, ltmp, &free_list) { 198 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
229 __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, 199 __mb_cache_entry_forget(entry, gfp_mask);
230 e_lru_list), gfp_mask);
231 } 200 }
232out:
233 return (count / 100) * sysctl_vfs_cache_pressure; 201 return (count / 100) * sysctl_vfs_cache_pressure;
234} 202}
235 203
@@ -243,72 +211,49 @@ out:
243 * memory was available. 211 * memory was available.
244 * 212 *
245 * @name: name of the cache (informal) 213 * @name: name of the cache (informal)
246 * @cache_op: contains the callback called when freeing a cache entry
247 * @entry_size: The size of a cache entry, including
248 * struct mb_cache_entry
249 * @indexes_count: number of additional indexes in the cache. Must equal
250 * MB_CACHE_INDEXES_COUNT if the number of indexes is
251 * hardwired.
252 * @bucket_bits: log2(number of hash buckets) 214 * @bucket_bits: log2(number of hash buckets)
253 */ 215 */
254struct mb_cache * 216struct mb_cache *
255mb_cache_create(const char *name, struct mb_cache_op *cache_op, 217mb_cache_create(const char *name, int bucket_bits)
256 size_t entry_size, int indexes_count, int bucket_bits)
257{ 218{
258 int m=0, n, bucket_count = 1 << bucket_bits; 219 int n, bucket_count = 1 << bucket_bits;
259 struct mb_cache *cache = NULL; 220 struct mb_cache *cache = NULL;
260 221
261 if(entry_size < sizeof(struct mb_cache_entry) + 222 cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
262 indexes_count * sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]))
263 return NULL;
264
265 cache = kmalloc(sizeof(struct mb_cache) +
266 indexes_count * sizeof(struct list_head), GFP_KERNEL);
267 if (!cache) 223 if (!cache)
268 goto fail; 224 return NULL;
269 cache->c_name = name; 225 cache->c_name = name;
270 cache->c_op.free = NULL;
271 if (cache_op)
272 cache->c_op.free = cache_op->free;
273 atomic_set(&cache->c_entry_count, 0); 226 atomic_set(&cache->c_entry_count, 0);
274 cache->c_bucket_bits = bucket_bits; 227 cache->c_bucket_bits = bucket_bits;
275#ifdef MB_CACHE_INDEXES_COUNT
276 mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT);
277#else
278 cache->c_indexes_count = indexes_count;
279#endif
280 cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), 228 cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head),
281 GFP_KERNEL); 229 GFP_KERNEL);
282 if (!cache->c_block_hash) 230 if (!cache->c_block_hash)
283 goto fail; 231 goto fail;
284 for (n=0; n<bucket_count; n++) 232 for (n=0; n<bucket_count; n++)
285 INIT_LIST_HEAD(&cache->c_block_hash[n]); 233 INIT_LIST_HEAD(&cache->c_block_hash[n]);
286 for (m=0; m<indexes_count; m++) { 234 cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head),
287 cache->c_indexes_hash[m] = kmalloc(bucket_count * 235 GFP_KERNEL);
288 sizeof(struct list_head), 236 if (!cache->c_index_hash)
289 GFP_KERNEL); 237 goto fail;
290 if (!cache->c_indexes_hash[m]) 238 for (n=0; n<bucket_count; n++)
291 goto fail; 239 INIT_LIST_HEAD(&cache->c_index_hash[n]);
292 for (n=0; n<bucket_count; n++) 240 cache->c_entry_cache = kmem_cache_create(name,
293 INIT_LIST_HEAD(&cache->c_indexes_hash[m][n]); 241 sizeof(struct mb_cache_entry), 0,
294 }
295 cache->c_entry_cache = kmem_cache_create(name, entry_size, 0,
296 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); 242 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
297 if (!cache->c_entry_cache) 243 if (!cache->c_entry_cache)
298 goto fail; 244 goto fail2;
299 245
300 spin_lock(&mb_cache_spinlock); 246 spin_lock(&mb_cache_spinlock);
301 list_add(&cache->c_cache_list, &mb_cache_list); 247 list_add(&cache->c_cache_list, &mb_cache_list);
302 spin_unlock(&mb_cache_spinlock); 248 spin_unlock(&mb_cache_spinlock);
303 return cache; 249 return cache;
304 250
251fail2:
252 kfree(cache->c_index_hash);
253
305fail: 254fail:
306 if (cache) { 255 kfree(cache->c_block_hash);
307 while (--m >= 0) 256 kfree(cache);
308 kfree(cache->c_indexes_hash[m]);
309 kfree(cache->c_block_hash);
310 kfree(cache);
311 }
312 return NULL; 257 return NULL;
313} 258}
314 259
@@ -357,7 +302,6 @@ mb_cache_destroy(struct mb_cache *cache)
357{ 302{
358 LIST_HEAD(free_list); 303 LIST_HEAD(free_list);
359 struct list_head *l, *ltmp; 304 struct list_head *l, *ltmp;
360 int n;
361 305
362 spin_lock(&mb_cache_spinlock); 306 spin_lock(&mb_cache_spinlock);
363 list_for_each_safe(l, ltmp, &mb_cache_lru_list) { 307 list_for_each_safe(l, ltmp, &mb_cache_lru_list) {
@@ -384,8 +328,7 @@ mb_cache_destroy(struct mb_cache *cache)
384 328
385 kmem_cache_destroy(cache->c_entry_cache); 329 kmem_cache_destroy(cache->c_entry_cache);
386 330
387 for (n=0; n < mb_cache_indexes(cache); n++) 331 kfree(cache->c_index_hash);
388 kfree(cache->c_indexes_hash[n]);
389 kfree(cache->c_block_hash); 332 kfree(cache->c_block_hash);
390 kfree(cache); 333 kfree(cache);
391} 334}
@@ -429,17 +372,16 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
429 * 372 *
430 * @bdev: device the cache entry belongs to 373 * @bdev: device the cache entry belongs to
431 * @block: block number 374 * @block: block number
432 * @keys: array of additional keys. There must be indexes_count entries 375 * @key: lookup key
433 * in the array (as specified when creating the cache).
434 */ 376 */
435int 377int
436mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, 378mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
437 sector_t block, unsigned int keys[]) 379 sector_t block, unsigned int key)
438{ 380{
439 struct mb_cache *cache = ce->e_cache; 381 struct mb_cache *cache = ce->e_cache;
440 unsigned int bucket; 382 unsigned int bucket;
441 struct list_head *l; 383 struct list_head *l;
442 int error = -EBUSY, n; 384 int error = -EBUSY;
443 385
444 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 386 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
445 cache->c_bucket_bits); 387 cache->c_bucket_bits);
@@ -454,12 +396,9 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
454 ce->e_bdev = bdev; 396 ce->e_bdev = bdev;
455 ce->e_block = block; 397 ce->e_block = block;
456 list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); 398 list_add(&ce->e_block_list, &cache->c_block_hash[bucket]);
457 for (n=0; n<mb_cache_indexes(cache); n++) { 399 ce->e_index.o_key = key;
458 ce->e_indexes[n].o_key = keys[n]; 400 bucket = hash_long(key, cache->c_bucket_bits);
459 bucket = hash_long(keys[n], cache->c_bucket_bits); 401 list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]);
460 list_add(&ce->e_indexes[n].o_list,
461 &cache->c_indexes_hash[n][bucket]);
462 }
463 error = 0; 402 error = 0;
464out: 403out:
465 spin_unlock(&mb_cache_spinlock); 404 spin_unlock(&mb_cache_spinlock);
@@ -555,13 +494,12 @@ cleanup:
555 494
556static struct mb_cache_entry * 495static struct mb_cache_entry *
557__mb_cache_entry_find(struct list_head *l, struct list_head *head, 496__mb_cache_entry_find(struct list_head *l, struct list_head *head,
558 int index, struct block_device *bdev, unsigned int key) 497 struct block_device *bdev, unsigned int key)
559{ 498{
560 while (l != head) { 499 while (l != head) {
561 struct mb_cache_entry *ce = 500 struct mb_cache_entry *ce =
562 list_entry(l, struct mb_cache_entry, 501 list_entry(l, struct mb_cache_entry, e_index.o_list);
563 e_indexes[index].o_list); 502 if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
564 if (ce->e_bdev == bdev && ce->e_indexes[index].o_key == key) {
565 DEFINE_WAIT(wait); 503 DEFINE_WAIT(wait);
566 504
567 if (!list_empty(&ce->e_lru_list)) 505 if (!list_empty(&ce->e_lru_list))
@@ -603,23 +541,20 @@ __mb_cache_entry_find(struct list_head *l, struct list_head *head,
603 * returned cache entry is locked for shared access ("multiple readers"). 541 * returned cache entry is locked for shared access ("multiple readers").
604 * 542 *
605 * @cache: the cache to search 543 * @cache: the cache to search
606 * @index: the number of the additonal index to search (0<=index<indexes_count)
607 * @bdev: the device the cache entry should belong to 544 * @bdev: the device the cache entry should belong to
608 * @key: the key in the index 545 * @key: the key in the index
609 */ 546 */
610struct mb_cache_entry * 547struct mb_cache_entry *
611mb_cache_entry_find_first(struct mb_cache *cache, int index, 548mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
612 struct block_device *bdev, unsigned int key) 549 unsigned int key)
613{ 550{
614 unsigned int bucket = hash_long(key, cache->c_bucket_bits); 551 unsigned int bucket = hash_long(key, cache->c_bucket_bits);
615 struct list_head *l; 552 struct list_head *l;
616 struct mb_cache_entry *ce; 553 struct mb_cache_entry *ce;
617 554
618 mb_assert(index < mb_cache_indexes(cache));
619 spin_lock(&mb_cache_spinlock); 555 spin_lock(&mb_cache_spinlock);
620 l = cache->c_indexes_hash[index][bucket].next; 556 l = cache->c_index_hash[bucket].next;
621 ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], 557 ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key);
622 index, bdev, key);
623 spin_unlock(&mb_cache_spinlock); 558 spin_unlock(&mb_cache_spinlock);
624 return ce; 559 return ce;
625} 560}
@@ -640,12 +575,11 @@ mb_cache_entry_find_first(struct mb_cache *cache, int index,
640 * } 575 * }
641 * 576 *
642 * @prev: The previous match 577 * @prev: The previous match
643 * @index: the number of the additonal index to search (0<=index<indexes_count)
644 * @bdev: the device the cache entry should belong to 578 * @bdev: the device the cache entry should belong to
645 * @key: the key in the index 579 * @key: the key in the index
646 */ 580 */
647struct mb_cache_entry * 581struct mb_cache_entry *
648mb_cache_entry_find_next(struct mb_cache_entry *prev, int index, 582mb_cache_entry_find_next(struct mb_cache_entry *prev,
649 struct block_device *bdev, unsigned int key) 583 struct block_device *bdev, unsigned int key)
650{ 584{
651 struct mb_cache *cache = prev->e_cache; 585 struct mb_cache *cache = prev->e_cache;
@@ -653,11 +587,9 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, int index,
653 struct list_head *l; 587 struct list_head *l;
654 struct mb_cache_entry *ce; 588 struct mb_cache_entry *ce;
655 589
656 mb_assert(index < mb_cache_indexes(cache));
657 spin_lock(&mb_cache_spinlock); 590 spin_lock(&mb_cache_spinlock);
658 l = prev->e_indexes[index].o_list.next; 591 l = prev->e_index.o_list.next;
659 ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], 592 ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key);
660 index, bdev, key);
661 __mb_cache_entry_release_unlock(prev); 593 __mb_cache_entry_release_unlock(prev);
662 return ce; 594 return ce;
663} 595}
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 482779fe4e7c..3f32bcb0d9bd 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -200,13 +200,13 @@ void minix_free_inode(struct inode * inode)
200 ino = inode->i_ino; 200 ino = inode->i_ino;
201 if (ino < 1 || ino > sbi->s_ninodes) { 201 if (ino < 1 || ino > sbi->s_ninodes) {
202 printk("minix_free_inode: inode 0 or nonexistent inode\n"); 202 printk("minix_free_inode: inode 0 or nonexistent inode\n");
203 goto out; 203 return;
204 } 204 }
205 bit = ino & ((1<<k) - 1); 205 bit = ino & ((1<<k) - 1);
206 ino >>= k; 206 ino >>= k;
207 if (ino >= sbi->s_imap_blocks) { 207 if (ino >= sbi->s_imap_blocks) {
208 printk("minix_free_inode: nonexistent imap in superblock\n"); 208 printk("minix_free_inode: nonexistent imap in superblock\n");
209 goto out; 209 return;
210 } 210 }
211 211
212 minix_clear_inode(inode); /* clear on-disk copy */ 212 minix_clear_inode(inode); /* clear on-disk copy */
@@ -217,8 +217,6 @@ void minix_free_inode(struct inode * inode)
217 printk("minix_free_inode: bit %lu already cleared\n", bit); 217 printk("minix_free_inode: bit %lu already cleared\n", bit);
218 spin_unlock(&bitmap_lock); 218 spin_unlock(&bitmap_lock);
219 mark_buffer_dirty(bh); 219 mark_buffer_dirty(bh);
220 out:
221 clear_inode(inode); /* clear in-memory copy */
222} 220}
223 221
224struct inode *minix_new_inode(const struct inode *dir, int mode, int *error) 222struct inode *minix_new_inode(const struct inode *dir, int mode, int *error)
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 1dbf921ca44b..085a9262c692 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -271,8 +271,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
271 271
272got_it: 272got_it:
273 pos = page_offset(page) + p - (char *)page_address(page); 273 pos = page_offset(page) + p - (char *)page_address(page);
274 err = __minix_write_begin(NULL, page->mapping, pos, sbi->s_dirsize, 274 err = minix_prepare_chunk(page, pos, sbi->s_dirsize);
275 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
276 if (err) 275 if (err)
277 goto out_unlock; 276 goto out_unlock;
278 memcpy (namx, name, namelen); 277 memcpy (namx, name, namelen);
@@ -297,8 +296,7 @@ out_unlock:
297 296
298int minix_delete_entry(struct minix_dir_entry *de, struct page *page) 297int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
299{ 298{
300 struct address_space *mapping = page->mapping; 299 struct inode *inode = page->mapping->host;
301 struct inode *inode = (struct inode*)mapping->host;
302 char *kaddr = page_address(page); 300 char *kaddr = page_address(page);
303 loff_t pos = page_offset(page) + (char*)de - kaddr; 301 loff_t pos = page_offset(page) + (char*)de - kaddr;
304 struct minix_sb_info *sbi = minix_sb(inode->i_sb); 302 struct minix_sb_info *sbi = minix_sb(inode->i_sb);
@@ -306,8 +304,7 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
306 int err; 304 int err;
307 305
308 lock_page(page); 306 lock_page(page);
309 err = __minix_write_begin(NULL, mapping, pos, len, 307 err = minix_prepare_chunk(page, pos, len);
310 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
311 if (err == 0) { 308 if (err == 0) {
312 if (sbi->s_version == MINIX_V3) 309 if (sbi->s_version == MINIX_V3)
313 ((minix3_dirent *) de)->inode = 0; 310 ((minix3_dirent *) de)->inode = 0;
@@ -325,16 +322,14 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
325 322
326int minix_make_empty(struct inode *inode, struct inode *dir) 323int minix_make_empty(struct inode *inode, struct inode *dir)
327{ 324{
328 struct address_space *mapping = inode->i_mapping; 325 struct page *page = grab_cache_page(inode->i_mapping, 0);
329 struct page *page = grab_cache_page(mapping, 0);
330 struct minix_sb_info *sbi = minix_sb(inode->i_sb); 326 struct minix_sb_info *sbi = minix_sb(inode->i_sb);
331 char *kaddr; 327 char *kaddr;
332 int err; 328 int err;
333 329
334 if (!page) 330 if (!page)
335 return -ENOMEM; 331 return -ENOMEM;
336 err = __minix_write_begin(NULL, mapping, 0, 2 * sbi->s_dirsize, 332 err = minix_prepare_chunk(page, 0, 2 * sbi->s_dirsize);
337 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
338 if (err) { 333 if (err) {
339 unlock_page(page); 334 unlock_page(page);
340 goto fail; 335 goto fail;
@@ -425,8 +420,7 @@ not_empty:
425void minix_set_link(struct minix_dir_entry *de, struct page *page, 420void minix_set_link(struct minix_dir_entry *de, struct page *page,
426 struct inode *inode) 421 struct inode *inode)
427{ 422{
428 struct address_space *mapping = page->mapping; 423 struct inode *dir = page->mapping->host;
429 struct inode *dir = mapping->host;
430 struct minix_sb_info *sbi = minix_sb(dir->i_sb); 424 struct minix_sb_info *sbi = minix_sb(dir->i_sb);
431 loff_t pos = page_offset(page) + 425 loff_t pos = page_offset(page) +
432 (char *)de-(char*)page_address(page); 426 (char *)de-(char*)page_address(page);
@@ -434,8 +428,7 @@ void minix_set_link(struct minix_dir_entry *de, struct page *page,
434 428
435 lock_page(page); 429 lock_page(page);
436 430
437 err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize, 431 err = minix_prepare_chunk(page, pos, sbi->s_dirsize);
438 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
439 if (err == 0) { 432 if (err == 0) {
440 if (sbi->s_version == MINIX_V3) 433 if (sbi->s_version == MINIX_V3)
441 ((minix3_dirent *) de)->inode = inode->i_ino; 434 ((minix3_dirent *) de)->inode = inode->i_ino;
diff --git a/fs/minix/file.c b/fs/minix/file.c
index d5320ff23faf..4493ce695ab8 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -23,7 +23,29 @@ const struct file_operations minix_file_operations = {
23 .splice_read = generic_file_splice_read, 23 .splice_read = generic_file_splice_read,
24}; 24};
25 25
26static int minix_setattr(struct dentry *dentry, struct iattr *attr)
27{
28 struct inode *inode = dentry->d_inode;
29 int error;
30
31 error = inode_change_ok(inode, attr);
32 if (error)
33 return error;
34
35 if ((attr->ia_valid & ATTR_SIZE) &&
36 attr->ia_size != i_size_read(inode)) {
37 error = vmtruncate(inode, attr->ia_size);
38 if (error)
39 return error;
40 }
41
42 setattr_copy(inode, attr);
43 mark_inode_dirty(inode);
44 return 0;
45}
46
26const struct inode_operations minix_file_inode_operations = { 47const struct inode_operations minix_file_inode_operations = {
27 .truncate = minix_truncate, 48 .truncate = minix_truncate,
49 .setattr = minix_setattr,
28 .getattr = minix_getattr, 50 .getattr = minix_getattr,
29}; 51};
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 756f8c93780c..e39d6bf2e8fb 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -24,12 +24,17 @@ static int minix_write_inode(struct inode *inode,
24static int minix_statfs(struct dentry *dentry, struct kstatfs *buf); 24static int minix_statfs(struct dentry *dentry, struct kstatfs *buf);
25static int minix_remount (struct super_block * sb, int * flags, char * data); 25static int minix_remount (struct super_block * sb, int * flags, char * data);
26 26
27static void minix_delete_inode(struct inode *inode) 27static void minix_evict_inode(struct inode *inode)
28{ 28{
29 truncate_inode_pages(&inode->i_data, 0); 29 truncate_inode_pages(&inode->i_data, 0);
30 inode->i_size = 0; 30 if (!inode->i_nlink) {
31 minix_truncate(inode); 31 inode->i_size = 0;
32 minix_free_inode(inode); 32 minix_truncate(inode);
33 }
34 invalidate_inode_buffers(inode);
35 end_writeback(inode);
36 if (!inode->i_nlink)
37 minix_free_inode(inode);
33} 38}
34 39
35static void minix_put_super(struct super_block *sb) 40static void minix_put_super(struct super_block *sb)
@@ -96,7 +101,7 @@ static const struct super_operations minix_sops = {
96 .alloc_inode = minix_alloc_inode, 101 .alloc_inode = minix_alloc_inode,
97 .destroy_inode = minix_destroy_inode, 102 .destroy_inode = minix_destroy_inode,
98 .write_inode = minix_write_inode, 103 .write_inode = minix_write_inode,
99 .delete_inode = minix_delete_inode, 104 .evict_inode = minix_evict_inode,
100 .put_super = minix_put_super, 105 .put_super = minix_put_super,
101 .statfs = minix_statfs, 106 .statfs = minix_statfs,
102 .remount_fs = minix_remount, 107 .remount_fs = minix_remount,
@@ -357,20 +362,26 @@ static int minix_readpage(struct file *file, struct page *page)
357 return block_read_full_page(page,minix_get_block); 362 return block_read_full_page(page,minix_get_block);
358} 363}
359 364
360int __minix_write_begin(struct file *file, struct address_space *mapping, 365int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len)
361 loff_t pos, unsigned len, unsigned flags,
362 struct page **pagep, void **fsdata)
363{ 366{
364 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 367 return __block_write_begin(page, pos, len, minix_get_block);
365 minix_get_block);
366} 368}
367 369
368static int minix_write_begin(struct file *file, struct address_space *mapping, 370static int minix_write_begin(struct file *file, struct address_space *mapping,
369 loff_t pos, unsigned len, unsigned flags, 371 loff_t pos, unsigned len, unsigned flags,
370 struct page **pagep, void **fsdata) 372 struct page **pagep, void **fsdata)
371{ 373{
372 *pagep = NULL; 374 int ret;
373 return __minix_write_begin(file, mapping, pos, len, flags, pagep, fsdata); 375
376 ret = block_write_begin(mapping, pos, len, flags, pagep,
377 minix_get_block);
378 if (unlikely(ret)) {
379 loff_t isize = mapping->host->i_size;
380 if (pos + len > isize)
381 vmtruncate(mapping->host, isize);
382 }
383
384 return ret;
374} 385}
375 386
376static sector_t minix_bmap(struct address_space *mapping, sector_t block) 387static sector_t minix_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 111f34ee9e3b..407b1c84911e 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -53,9 +53,7 @@ extern int minix_new_block(struct inode * inode);
53extern void minix_free_block(struct inode *inode, unsigned long block); 53extern void minix_free_block(struct inode *inode, unsigned long block);
54extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi); 54extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi);
55extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *); 55extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
56extern int __minix_write_begin(struct file *file, struct address_space *mapping, 56extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
57 loff_t pos, unsigned len, unsigned flags,
58 struct page **pagep, void **fsdata);
59 57
60extern void V1_minix_truncate(struct inode *); 58extern void V1_minix_truncate(struct inode *);
61extern void V2_minix_truncate(struct inode *); 59extern void V2_minix_truncate(struct inode *);
diff --git a/fs/namei.c b/fs/namei.c
index 42d2d28fb827..13ff4abdbdca 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2633,7 +2633,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2633{ 2633{
2634 int error; 2634 int error;
2635 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); 2635 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
2636 const char *old_name; 2636 const unsigned char *old_name;
2637 2637
2638 if (old_dentry->d_inode == new_dentry->d_inode) 2638 if (old_dentry->d_inode == new_dentry->d_inode)
2639 return 0; 2639 return 0;
diff --git a/fs/namespace.c b/fs/namespace.c
index 88058de59c7c..66c4f7e781cb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -29,6 +29,7 @@
29#include <linux/log2.h> 29#include <linux/log2.h>
30#include <linux/idr.h> 30#include <linux/idr.h>
31#include <linux/fs_struct.h> 31#include <linux/fs_struct.h>
32#include <linux/fsnotify.h>
32#include <asm/uaccess.h> 33#include <asm/uaccess.h>
33#include <asm/unistd.h> 34#include <asm/unistd.h>
34#include "pnode.h" 35#include "pnode.h"
@@ -150,6 +151,9 @@ struct vfsmount *alloc_vfsmnt(const char *name)
150 INIT_LIST_HEAD(&mnt->mnt_share); 151 INIT_LIST_HEAD(&mnt->mnt_share);
151 INIT_LIST_HEAD(&mnt->mnt_slave_list); 152 INIT_LIST_HEAD(&mnt->mnt_slave_list);
152 INIT_LIST_HEAD(&mnt->mnt_slave); 153 INIT_LIST_HEAD(&mnt->mnt_slave);
154#ifdef CONFIG_FSNOTIFY
155 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
156#endif
153#ifdef CONFIG_SMP 157#ifdef CONFIG_SMP
154 mnt->mnt_writers = alloc_percpu(int); 158 mnt->mnt_writers = alloc_percpu(int);
155 if (!mnt->mnt_writers) 159 if (!mnt->mnt_writers)
@@ -610,6 +614,7 @@ static inline void __mntput(struct vfsmount *mnt)
610 * provides barriers, so count_mnt_writers() below is safe. AV 614 * provides barriers, so count_mnt_writers() below is safe. AV
611 */ 615 */
612 WARN_ON(count_mnt_writers(mnt)); 616 WARN_ON(count_mnt_writers(mnt));
617 fsnotify_vfsmount_delete(mnt);
613 dput(mnt->mnt_root); 618 dput(mnt->mnt_root);
614 free_vfsmnt(mnt); 619 free_vfsmnt(mnt);
615 deactivate_super(sb); 620 deactivate_super(sb);
@@ -1984,7 +1989,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1984 if (flags & MS_RDONLY) 1989 if (flags & MS_RDONLY)
1985 mnt_flags |= MNT_READONLY; 1990 mnt_flags |= MNT_READONLY;
1986 1991
1987 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | 1992 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
1988 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | 1993 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
1989 MS_STRICTATIME); 1994 MS_STRICTATIME);
1990 1995
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 1e634deff941..b4de38cf49f5 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -43,7 +43,7 @@
43#define NCP_DEFAULT_TIME_OUT 10 43#define NCP_DEFAULT_TIME_OUT 10
44#define NCP_DEFAULT_RETRY_COUNT 20 44#define NCP_DEFAULT_RETRY_COUNT 20
45 45
46static void ncp_delete_inode(struct inode *); 46static void ncp_evict_inode(struct inode *);
47static void ncp_put_super(struct super_block *); 47static void ncp_put_super(struct super_block *);
48static int ncp_statfs(struct dentry *, struct kstatfs *); 48static int ncp_statfs(struct dentry *, struct kstatfs *);
49static int ncp_show_options(struct seq_file *, struct vfsmount *); 49static int ncp_show_options(struct seq_file *, struct vfsmount *);
@@ -100,7 +100,7 @@ static const struct super_operations ncp_sops =
100 .alloc_inode = ncp_alloc_inode, 100 .alloc_inode = ncp_alloc_inode,
101 .destroy_inode = ncp_destroy_inode, 101 .destroy_inode = ncp_destroy_inode,
102 .drop_inode = generic_delete_inode, 102 .drop_inode = generic_delete_inode,
103 .delete_inode = ncp_delete_inode, 103 .evict_inode = ncp_evict_inode,
104 .put_super = ncp_put_super, 104 .put_super = ncp_put_super,
105 .statfs = ncp_statfs, 105 .statfs = ncp_statfs,
106 .remount_fs = ncp_remount, 106 .remount_fs = ncp_remount,
@@ -282,19 +282,19 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
282} 282}
283 283
284static void 284static void
285ncp_delete_inode(struct inode *inode) 285ncp_evict_inode(struct inode *inode)
286{ 286{
287 truncate_inode_pages(&inode->i_data, 0); 287 truncate_inode_pages(&inode->i_data, 0);
288 end_writeback(inode);
288 289
289 if (S_ISDIR(inode->i_mode)) { 290 if (S_ISDIR(inode->i_mode)) {
290 DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino); 291 DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino);
291 } 292 }
292 293
293 if (ncp_make_closed(inode) != 0) { 294 if (ncp_make_closed(inode) != 0) {
294 /* We can't do anything but complain. */ 295 /* We can't do anything but complain. */
295 printk(KERN_ERR "ncp_delete_inode: could not close\n"); 296 printk(KERN_ERR "ncp_evict_inode: could not close\n");
296 } 297 }
297 clear_inode(inode);
298} 298}
299 299
300static void ncp_stop_tasks(struct ncp_server *server) { 300static void ncp_stop_tasks(struct ncp_server *server) {
@@ -924,9 +924,8 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
924 tmpattr.ia_valid = ATTR_MODE; 924 tmpattr.ia_valid = ATTR_MODE;
925 tmpattr.ia_mode = attr->ia_mode; 925 tmpattr.ia_mode = attr->ia_mode;
926 926
927 result = inode_setattr(inode, &tmpattr); 927 setattr_copy(inode, &tmpattr);
928 if (result) 928 mark_inode_dirty(inode);
929 goto out;
930 } 929 }
931 } 930 }
932#endif 931#endif
@@ -954,15 +953,12 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
954 result = ncp_make_closed(inode); 953 result = ncp_make_closed(inode);
955 if (result) 954 if (result)
956 goto out; 955 goto out;
957 { 956
958 struct iattr tmpattr; 957 if (attr->ia_size != i_size_read(inode)) {
959 958 result = vmtruncate(inode, attr->ia_size);
960 tmpattr.ia_valid = ATTR_SIZE;
961 tmpattr.ia_size = attr->ia_size;
962
963 result = inode_setattr(inode, &tmpattr);
964 if (result) 959 if (result)
965 goto out; 960 goto out;
961 mark_inode_dirty(inode);
966 } 962 }
967 } 963 }
968 if ((attr->ia_valid & ATTR_CTIME) != 0) { 964 if ((attr->ia_valid & ATTR_CTIME) != 0) {
@@ -1002,8 +998,12 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
1002 NCP_FINFO(inode)->nwattr = info.attributes; 998 NCP_FINFO(inode)->nwattr = info.attributes;
1003#endif 999#endif
1004 } 1000 }
1005 if (!result) 1001 if (result)
1006 result = inode_setattr(inode, attr); 1002 goto out;
1003
1004 setattr_copy(inode, attr);
1005 mark_inode_dirty(inode);
1006
1007out: 1007out:
1008 unlock_kernel(); 1008 unlock_kernel();
1009 return result; 1009 return result;
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 023c03d02070..84a8cfc4e38e 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -20,7 +20,6 @@
20#include <linux/smp_lock.h> 20#include <linux/smp_lock.h>
21#include <linux/vmalloc.h> 21#include <linux/vmalloc.h>
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/smp_lock.h>
24 23
25#include <linux/ncp_fs.h> 24#include <linux/ncp_fs.h>
26 25
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 581d8f081e68..7d2d6c72aa78 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -98,7 +98,7 @@ u64 nfs_compat_user_ino64(u64 fileid)
98 return ino; 98 return ino;
99} 99}
100 100
101void nfs_clear_inode(struct inode *inode) 101static void nfs_clear_inode(struct inode *inode)
102{ 102{
103 /* 103 /*
104 * The following should never happen... 104 * The following should never happen...
@@ -110,6 +110,13 @@ void nfs_clear_inode(struct inode *inode)
110 nfs_fscache_release_inode_cookie(inode); 110 nfs_fscache_release_inode_cookie(inode);
111} 111}
112 112
113void nfs_evict_inode(struct inode *inode)
114{
115 truncate_inode_pages(&inode->i_data, 0);
116 end_writeback(inode);
117 nfs_clear_inode(inode);
118}
119
113/** 120/**
114 * nfs_sync_mapping - helper to flush all mmapped dirty data to disk 121 * nfs_sync_mapping - helper to flush all mmapped dirty data to disk
115 */ 122 */
@@ -1398,8 +1405,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1398 * to open() calls that passed nfs_atomic_lookup, but failed to call 1405 * to open() calls that passed nfs_atomic_lookup, but failed to call
1399 * nfs_open(). 1406 * nfs_open().
1400 */ 1407 */
1401void nfs4_clear_inode(struct inode *inode) 1408void nfs4_evict_inode(struct inode *inode)
1402{ 1409{
1410 truncate_inode_pages(&inode->i_data, 0);
1411 end_writeback(inode);
1403 /* If we are holding a delegation, return it! */ 1412 /* If we are holding a delegation, return it! */
1404 nfs_inode_return_delegation_noreclaim(inode); 1413 nfs_inode_return_delegation_noreclaim(inode);
1405 /* First call standard NFS clear_inode() code */ 1414 /* First call standard NFS clear_inode() code */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 4c2150d86714..c961bc92c107 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -213,9 +213,9 @@ extern struct workqueue_struct *nfsiod_workqueue;
213extern struct inode *nfs_alloc_inode(struct super_block *sb); 213extern struct inode *nfs_alloc_inode(struct super_block *sb);
214extern void nfs_destroy_inode(struct inode *); 214extern void nfs_destroy_inode(struct inode *);
215extern int nfs_write_inode(struct inode *, struct writeback_control *); 215extern int nfs_write_inode(struct inode *, struct writeback_control *);
216extern void nfs_clear_inode(struct inode *); 216extern void nfs_evict_inode(struct inode *);
217#ifdef CONFIG_NFS_V4 217#ifdef CONFIG_NFS_V4
218extern void nfs4_clear_inode(struct inode *); 218extern void nfs4_evict_inode(struct inode *);
219#endif 219#endif
220void nfs_zap_acl_cache(struct inode *inode); 220void nfs_zap_acl_cache(struct inode *inode);
221extern int nfs_wait_bit_killable(void *word); 221extern int nfs_wait_bit_killable(void *word);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f1ae39f6cb02..ee26316ad1f4 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -270,7 +270,7 @@ static const struct super_operations nfs_sops = {
270 .write_inode = nfs_write_inode, 270 .write_inode = nfs_write_inode,
271 .put_super = nfs_put_super, 271 .put_super = nfs_put_super,
272 .statfs = nfs_statfs, 272 .statfs = nfs_statfs,
273 .clear_inode = nfs_clear_inode, 273 .evict_inode = nfs_evict_inode,
274 .umount_begin = nfs_umount_begin, 274 .umount_begin = nfs_umount_begin,
275 .show_options = nfs_show_options, 275 .show_options = nfs_show_options,
276 .show_stats = nfs_show_stats, 276 .show_stats = nfs_show_stats,
@@ -340,7 +340,7 @@ static const struct super_operations nfs4_sops = {
340 .write_inode = nfs_write_inode, 340 .write_inode = nfs_write_inode,
341 .put_super = nfs_put_super, 341 .put_super = nfs_put_super,
342 .statfs = nfs_statfs, 342 .statfs = nfs_statfs,
343 .clear_inode = nfs4_clear_inode, 343 .evict_inode = nfs4_evict_inode,
344 .umount_begin = nfs_umount_begin, 344 .umount_begin = nfs_umount_begin,
345 .show_options = nfs_show_options, 345 .show_options = nfs_show_options,
346 .show_stats = nfs_show_stats, 346 .show_stats = nfs_show_stats,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index f8931acb05f3..1a468bbd330f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1756,6 +1756,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1756 struct nfs4_acl *acl = NULL; 1756 struct nfs4_acl *acl = NULL;
1757 struct nfsd4_compoundres *resp = rqstp->rq_resp; 1757 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1758 u32 minorversion = resp->cstate.minorversion; 1758 u32 minorversion = resp->cstate.minorversion;
1759 struct path path = {
1760 .mnt = exp->ex_path.mnt,
1761 .dentry = dentry,
1762 };
1759 1763
1760 BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); 1764 BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
1761 BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion)); 1765 BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion));
@@ -1776,7 +1780,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1776 FATTR4_WORD0_MAXNAME)) || 1780 FATTR4_WORD0_MAXNAME)) ||
1777 (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | 1781 (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
1778 FATTR4_WORD1_SPACE_TOTAL))) { 1782 FATTR4_WORD1_SPACE_TOTAL))) {
1779 err = vfs_statfs(dentry, &statfs); 1783 err = vfs_statfs(&path, &statfs);
1780 if (err) 1784 if (err)
1781 goto out_nfserr; 1785 goto out_nfserr;
1782 } 1786 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9df85a13af28..96360a83cb91 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -934,7 +934,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
934 nfsdstats.io_read += host_err; 934 nfsdstats.io_read += host_err;
935 *count = host_err; 935 *count = host_err;
936 err = 0; 936 err = 0;
937 fsnotify_access(file->f_path.dentry); 937 fsnotify_access(file);
938 } else 938 } else
939 err = nfserrno(host_err); 939 err = nfserrno(host_err);
940out: 940out:
@@ -1045,7 +1045,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1045 goto out_nfserr; 1045 goto out_nfserr;
1046 *cnt = host_err; 1046 *cnt = host_err;
1047 nfsdstats.io_write += host_err; 1047 nfsdstats.io_write += host_err;
1048 fsnotify_modify(file->f_path.dentry); 1048 fsnotify_modify(file);
1049 1049
1050 /* clear setuid/setgid flag after write */ 1050 /* clear setuid/setgid flag after write */
1051 if (inode->i_mode & (S_ISUID | S_ISGID)) 1051 if (inode->i_mode & (S_ISUID | S_ISGID))
@@ -2033,8 +2033,14 @@ out:
2033__be32 2033__be32
2034nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) 2034nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access)
2035{ 2035{
2036 __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); 2036 struct path path = {
2037 if (!err && vfs_statfs(fhp->fh_dentry,stat)) 2037 .mnt = fhp->fh_export->ex_path.mnt,
2038 .dentry = fhp->fh_dentry,
2039 };
2040 __be32 err;
2041
2042 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
2043 if (!err && vfs_statfs(&path, stat))
2038 err = nfserr_io; 2044 err = nfserr_io;
2039 return err; 2045 return err;
2040} 2046}
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index b60277b44468..cb003c8ee1f6 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -80,23 +80,10 @@ static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr)
80 return last_byte; 80 return last_byte;
81} 81}
82 82
83static int nilfs_prepare_chunk_uninterruptible(struct page *page, 83static int nilfs_prepare_chunk(struct page *page, unsigned from, unsigned to)
84 struct address_space *mapping,
85 unsigned from, unsigned to)
86{ 84{
87 loff_t pos = page_offset(page) + from; 85 loff_t pos = page_offset(page) + from;
88 return block_write_begin(NULL, mapping, pos, to - from, 86 return __block_write_begin(page, pos, to - from, nilfs_get_block);
89 AOP_FLAG_UNINTERRUPTIBLE, &page,
90 NULL, nilfs_get_block);
91}
92
93static int nilfs_prepare_chunk(struct page *page,
94 struct address_space *mapping,
95 unsigned from, unsigned to)
96{
97 loff_t pos = page_offset(page) + from;
98 return block_write_begin(NULL, mapping, pos, to - from, 0, &page,
99 NULL, nilfs_get_block);
100} 87}
101 88
102static void nilfs_commit_chunk(struct page *page, 89static void nilfs_commit_chunk(struct page *page,
@@ -447,7 +434,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
447 int err; 434 int err;
448 435
449 lock_page(page); 436 lock_page(page);
450 err = nilfs_prepare_chunk_uninterruptible(page, mapping, from, to); 437 err = nilfs_prepare_chunk(page, from, to);
451 BUG_ON(err); 438 BUG_ON(err);
452 de->inode = cpu_to_le64(inode->i_ino); 439 de->inode = cpu_to_le64(inode->i_ino);
453 nilfs_set_de_type(de, inode); 440 nilfs_set_de_type(de, inode);
@@ -528,7 +515,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
528got_it: 515got_it:
529 from = (char *)de - (char *)page_address(page); 516 from = (char *)de - (char *)page_address(page);
530 to = from + rec_len; 517 to = from + rec_len;
531 err = nilfs_prepare_chunk(page, page->mapping, from, to); 518 err = nilfs_prepare_chunk(page, from, to);
532 if (err) 519 if (err)
533 goto out_unlock; 520 goto out_unlock;
534 if (de->inode) { 521 if (de->inode) {
@@ -586,7 +573,7 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
586 if (pde) 573 if (pde)
587 from = (char *)pde - (char *)page_address(page); 574 from = (char *)pde - (char *)page_address(page);
588 lock_page(page); 575 lock_page(page);
589 err = nilfs_prepare_chunk(page, mapping, from, to); 576 err = nilfs_prepare_chunk(page, from, to);
590 BUG_ON(err); 577 BUG_ON(err);
591 if (pde) 578 if (pde)
592 pde->rec_len = nilfs_rec_len_to_disk(to - from); 579 pde->rec_len = nilfs_rec_len_to_disk(to - from);
@@ -614,7 +601,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
614 if (!page) 601 if (!page)
615 return -ENOMEM; 602 return -ENOMEM;
616 603
617 err = nilfs_prepare_chunk(page, mapping, 0, chunk_size); 604 err = nilfs_prepare_chunk(page, 0, chunk_size);
618 if (unlikely(err)) { 605 if (unlikely(err)) {
619 unlock_page(page); 606 unlock_page(page);
620 goto fail; 607 goto fail;
diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c
index dd5f7e0a95f6..84a45d1d5464 100644
--- a/fs/nilfs2/gcdat.c
+++ b/fs/nilfs2/gcdat.c
@@ -78,7 +78,7 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs)
78 struct inode *gcdat = nilfs->ns_gc_dat; 78 struct inode *gcdat = nilfs->ns_gc_dat;
79 struct nilfs_inode_info *gii = NILFS_I(gcdat); 79 struct nilfs_inode_info *gii = NILFS_I(gcdat);
80 80
81 gcdat->i_state = I_CLEAR; 81 gcdat->i_state = I_FREEING | I_CLEAR;
82 gii->i_flags = 0; 82 gii->i_flags = 0;
83 83
84 nilfs_palloc_clear_cache(gcdat); 84 nilfs_palloc_clear_cache(gcdat);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 39e038ac8fcb..eccb2f2e2315 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -27,6 +27,7 @@
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/uio.h> 28#include <linux/uio.h>
29#include "nilfs.h" 29#include "nilfs.h"
30#include "btnode.h"
30#include "segment.h" 31#include "segment.h"
31#include "page.h" 32#include "page.h"
32#include "mdt.h" 33#include "mdt.h"
@@ -197,11 +198,15 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping,
197 if (unlikely(err)) 198 if (unlikely(err))
198 return err; 199 return err;
199 200
200 *pagep = NULL; 201 err = block_write_begin(mapping, pos, len, flags, pagep,
201 err = block_write_begin(file, mapping, pos, len, flags, pagep, 202 nilfs_get_block);
202 fsdata, nilfs_get_block); 203 if (unlikely(err)) {
203 if (unlikely(err)) 204 loff_t isize = mapping->host->i_size;
205 if (pos + len > isize)
206 vmtruncate(mapping->host, isize);
207
204 nilfs_transaction_abort(inode->i_sb); 208 nilfs_transaction_abort(inode->i_sb);
209 }
205 return err; 210 return err;
206} 211}
207 212
@@ -237,6 +242,19 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
237 /* Needs synchronization with the cleaner */ 242 /* Needs synchronization with the cleaner */
238 size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 243 size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
239 offset, nr_segs, nilfs_get_block, NULL); 244 offset, nr_segs, nilfs_get_block, NULL);
245
246 /*
247 * In case of error extending write may have instantiated a few
248 * blocks outside i_size. Trim these off again.
249 */
250 if (unlikely((rw & WRITE) && size < 0)) {
251 loff_t isize = i_size_read(inode);
252 loff_t end = offset + iov_length(iov, nr_segs);
253
254 if (end > isize)
255 vmtruncate(inode, isize);
256 }
257
240 return size; 258 return size;
241} 259}
242 260
@@ -337,7 +355,6 @@ void nilfs_free_inode(struct inode *inode)
337 struct super_block *sb = inode->i_sb; 355 struct super_block *sb = inode->i_sb;
338 struct nilfs_sb_info *sbi = NILFS_SB(sb); 356 struct nilfs_sb_info *sbi = NILFS_SB(sb);
339 357
340 clear_inode(inode);
341 /* XXX: check error code? Is there any thing I can do? */ 358 /* XXX: check error code? Is there any thing I can do? */
342 (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino); 359 (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino);
343 atomic_dec(&sbi->s_inodes_count); 360 atomic_dec(&sbi->s_inodes_count);
@@ -597,16 +614,34 @@ void nilfs_truncate(struct inode *inode)
597 But truncate has no return value. */ 614 But truncate has no return value. */
598} 615}
599 616
600void nilfs_delete_inode(struct inode *inode) 617static void nilfs_clear_inode(struct inode *inode)
618{
619 struct nilfs_inode_info *ii = NILFS_I(inode);
620
621 /*
622 * Free resources allocated in nilfs_read_inode(), here.
623 */
624 BUG_ON(!list_empty(&ii->i_dirty));
625 brelse(ii->i_bh);
626 ii->i_bh = NULL;
627
628 if (test_bit(NILFS_I_BMAP, &ii->i_state))
629 nilfs_bmap_clear(ii->i_bmap);
630
631 nilfs_btnode_cache_clear(&ii->i_btnode_cache);
632}
633
634void nilfs_evict_inode(struct inode *inode)
601{ 635{
602 struct nilfs_transaction_info ti; 636 struct nilfs_transaction_info ti;
603 struct super_block *sb = inode->i_sb; 637 struct super_block *sb = inode->i_sb;
604 struct nilfs_inode_info *ii = NILFS_I(inode); 638 struct nilfs_inode_info *ii = NILFS_I(inode);
605 639
606 if (unlikely(is_bad_inode(inode))) { 640 if (inode->i_nlink || unlikely(is_bad_inode(inode))) {
607 if (inode->i_data.nrpages) 641 if (inode->i_data.nrpages)
608 truncate_inode_pages(&inode->i_data, 0); 642 truncate_inode_pages(&inode->i_data, 0);
609 clear_inode(inode); 643 end_writeback(inode);
644 nilfs_clear_inode(inode);
610 return; 645 return;
611 } 646 }
612 nilfs_transaction_begin(sb, &ti, 0); /* never fails */ 647 nilfs_transaction_begin(sb, &ti, 0); /* never fails */
@@ -616,6 +651,8 @@ void nilfs_delete_inode(struct inode *inode)
616 651
617 nilfs_truncate_bmap(ii, 0); 652 nilfs_truncate_bmap(ii, 0);
618 nilfs_mark_inode_dirty(inode); 653 nilfs_mark_inode_dirty(inode);
654 end_writeback(inode);
655 nilfs_clear_inode(inode);
619 nilfs_free_inode(inode); 656 nilfs_free_inode(inode);
620 /* nilfs_free_inode() marks inode buffer dirty */ 657 /* nilfs_free_inode() marks inode buffer dirty */
621 if (IS_SYNC(inode)) 658 if (IS_SYNC(inode))
@@ -639,14 +676,27 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
639 err = nilfs_transaction_begin(sb, &ti, 0); 676 err = nilfs_transaction_begin(sb, &ti, 0);
640 if (unlikely(err)) 677 if (unlikely(err))
641 return err; 678 return err;
642 err = inode_setattr(inode, iattr); 679
643 if (!err && (iattr->ia_valid & ATTR_MODE)) 680 if ((iattr->ia_valid & ATTR_SIZE) &&
681 iattr->ia_size != i_size_read(inode)) {
682 err = vmtruncate(inode, iattr->ia_size);
683 if (unlikely(err))
684 goto out_err;
685 }
686
687 setattr_copy(inode, iattr);
688 mark_inode_dirty(inode);
689
690 if (iattr->ia_valid & ATTR_MODE) {
644 err = nilfs_acl_chmod(inode); 691 err = nilfs_acl_chmod(inode);
645 if (likely(!err)) 692 if (unlikely(err))
646 err = nilfs_transaction_commit(sb); 693 goto out_err;
647 else 694 }
648 nilfs_transaction_abort(sb); 695
696 return nilfs_transaction_commit(sb);
649 697
698out_err:
699 nilfs_transaction_abort(sb);
650 return err; 700 return err;
651} 701}
652 702
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 0842d775b3e0..d3d54046e5f8 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -250,7 +250,7 @@ extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int);
250extern struct inode *nilfs_iget(struct super_block *, unsigned long); 250extern struct inode *nilfs_iget(struct super_block *, unsigned long);
251extern void nilfs_update_inode(struct inode *, struct buffer_head *); 251extern void nilfs_update_inode(struct inode *, struct buffer_head *);
252extern void nilfs_truncate(struct inode *); 252extern void nilfs_truncate(struct inode *);
253extern void nilfs_delete_inode(struct inode *); 253extern void nilfs_evict_inode(struct inode *);
254extern int nilfs_setattr(struct dentry *, struct iattr *); 254extern int nilfs_setattr(struct dentry *, struct iattr *);
255extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, 255extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *,
256 struct buffer_head **); 256 struct buffer_head **);
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 83e3d8c61a01..d0c35ef39f6a 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -523,11 +523,14 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
523 } 523 }
524 524
525 pos = rb->blkoff << inode->i_blkbits; 525 pos = rb->blkoff << inode->i_blkbits;
526 page = NULL; 526 err = block_write_begin(inode->i_mapping, pos, blocksize,
527 err = block_write_begin(NULL, inode->i_mapping, pos, blocksize, 527 0, &page, nilfs_get_block);
528 0, &page, NULL, nilfs_get_block); 528 if (unlikely(err)) {
529 if (unlikely(err)) 529 loff_t isize = inode->i_size;
530 if (pos + blocksize > isize)
531 vmtruncate(inode, isize);
530 goto failed_inode; 532 goto failed_inode;
533 }
531 534
532 err = nilfs_recovery_copy_block(nilfs, rb, page); 535 err = nilfs_recovery_copy_block(nilfs, rb, page);
533 if (unlikely(err)) 536 if (unlikely(err))
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2e6a2723b8fa..4588fb9e93df 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -508,7 +508,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
508 * Last BIO is always sent through the following 508 * Last BIO is always sent through the following
509 * submission. 509 * submission.
510 */ 510 */
511 rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); 511 rw |= REQ_SYNC | REQ_UNPLUG;
512 res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); 512 res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
513 } 513 }
514 514
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 26078b3407c9..1fa86b9df73b 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -171,23 +171,6 @@ void nilfs_destroy_inode(struct inode *inode)
171 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); 171 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
172} 172}
173 173
174static void nilfs_clear_inode(struct inode *inode)
175{
176 struct nilfs_inode_info *ii = NILFS_I(inode);
177
178 /*
179 * Free resources allocated in nilfs_read_inode(), here.
180 */
181 BUG_ON(!list_empty(&ii->i_dirty));
182 brelse(ii->i_bh);
183 ii->i_bh = NULL;
184
185 if (test_bit(NILFS_I_BMAP, &ii->i_state))
186 nilfs_bmap_clear(ii->i_bmap);
187
188 nilfs_btnode_cache_clear(&ii->i_btnode_cache);
189}
190
191static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) 174static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
192{ 175{
193 struct the_nilfs *nilfs = sbi->s_nilfs; 176 struct the_nilfs *nilfs = sbi->s_nilfs;
@@ -548,7 +531,7 @@ static const struct super_operations nilfs_sops = {
548 /* .write_inode = nilfs_write_inode, */ 531 /* .write_inode = nilfs_write_inode, */
549 /* .put_inode = nilfs_put_inode, */ 532 /* .put_inode = nilfs_put_inode, */
550 /* .drop_inode = nilfs_drop_inode, */ 533 /* .drop_inode = nilfs_drop_inode, */
551 .delete_inode = nilfs_delete_inode, 534 .evict_inode = nilfs_evict_inode,
552 .put_super = nilfs_put_super, 535 .put_super = nilfs_put_super,
553 /* .write_super = nilfs_write_super, */ 536 /* .write_super = nilfs_write_super, */
554 .sync_fs = nilfs_sync_fs, 537 .sync_fs = nilfs_sync_fs,
@@ -556,7 +539,6 @@ static const struct super_operations nilfs_sops = {
556 /* .unlockfs */ 539 /* .unlockfs */
557 .statfs = nilfs_statfs, 540 .statfs = nilfs_statfs,
558 .remount_fs = nilfs_remount, 541 .remount_fs = nilfs_remount,
559 .clear_inode = nilfs_clear_inode,
560 /* .umount_begin */ 542 /* .umount_begin */
561 .show_options = nilfs_show_options 543 .show_options = nilfs_show_options
562}; 544};
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index dffbb0911d02..22c629eedd82 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -3,3 +3,4 @@ config FSNOTIFY
3 3
4source "fs/notify/dnotify/Kconfig" 4source "fs/notify/dnotify/Kconfig"
5source "fs/notify/inotify/Kconfig" 5source "fs/notify/inotify/Kconfig"
6source "fs/notify/fanotify/Kconfig"
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index 0922cc826c46..ae5f33a6d868 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,4 +1,6 @@
1obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o 1obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o \
2 mark.o vfsmount_mark.o
2 3
3obj-y += dnotify/ 4obj-y += dnotify/
4obj-y += inotify/ 5obj-y += inotify/
6obj-y += fanotify/
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 7e54e52964dd..3344bdd5506e 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -29,17 +29,17 @@
29int dir_notify_enable __read_mostly = 1; 29int dir_notify_enable __read_mostly = 1;
30 30
31static struct kmem_cache *dnotify_struct_cache __read_mostly; 31static struct kmem_cache *dnotify_struct_cache __read_mostly;
32static struct kmem_cache *dnotify_mark_entry_cache __read_mostly; 32static struct kmem_cache *dnotify_mark_cache __read_mostly;
33static struct fsnotify_group *dnotify_group __read_mostly; 33static struct fsnotify_group *dnotify_group __read_mostly;
34static DEFINE_MUTEX(dnotify_mark_mutex); 34static DEFINE_MUTEX(dnotify_mark_mutex);
35 35
36/* 36/*
37 * dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which 37 * dnotify will attach one of these to each inode (i_fsnotify_marks) which
38 * is being watched by dnotify. If multiple userspace applications are watching 38 * is being watched by dnotify. If multiple userspace applications are watching
39 * the same directory with dnotify their information is chained in dn 39 * the same directory with dnotify their information is chained in dn
40 */ 40 */
41struct dnotify_mark_entry { 41struct dnotify_mark {
42 struct fsnotify_mark_entry fsn_entry; 42 struct fsnotify_mark fsn_mark;
43 struct dnotify_struct *dn; 43 struct dnotify_struct *dn;
44}; 44};
45 45
@@ -51,27 +51,27 @@ struct dnotify_mark_entry {
51 * it calls the fsnotify function so it can update the set of all events relevant 51 * it calls the fsnotify function so it can update the set of all events relevant
52 * to this inode. 52 * to this inode.
53 */ 53 */
54static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry) 54static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
55{ 55{
56 __u32 new_mask, old_mask; 56 __u32 new_mask, old_mask;
57 struct dnotify_struct *dn; 57 struct dnotify_struct *dn;
58 struct dnotify_mark_entry *dnentry = container_of(entry, 58 struct dnotify_mark *dn_mark = container_of(fsn_mark,
59 struct dnotify_mark_entry, 59 struct dnotify_mark,
60 fsn_entry); 60 fsn_mark);
61 61
62 assert_spin_locked(&entry->lock); 62 assert_spin_locked(&fsn_mark->lock);
63 63
64 old_mask = entry->mask; 64 old_mask = fsn_mark->mask;
65 new_mask = 0; 65 new_mask = 0;
66 for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next) 66 for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next)
67 new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT); 67 new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
68 entry->mask = new_mask; 68 fsnotify_set_mark_mask_locked(fsn_mark, new_mask);
69 69
70 if (old_mask == new_mask) 70 if (old_mask == new_mask)
71 return; 71 return;
72 72
73 if (entry->inode) 73 if (fsn_mark->i.inode)
74 fsnotify_recalc_inode_mask(entry->inode); 74 fsnotify_recalc_inode_mask(fsn_mark->i.inode);
75} 75}
76 76
77/* 77/*
@@ -83,29 +83,25 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
83 * events. 83 * events.
84 */ 84 */
85static int dnotify_handle_event(struct fsnotify_group *group, 85static int dnotify_handle_event(struct fsnotify_group *group,
86 struct fsnotify_mark *inode_mark,
87 struct fsnotify_mark *vfsmount_mark,
86 struct fsnotify_event *event) 88 struct fsnotify_event *event)
87{ 89{
88 struct fsnotify_mark_entry *entry = NULL; 90 struct dnotify_mark *dn_mark;
89 struct dnotify_mark_entry *dnentry;
90 struct inode *to_tell; 91 struct inode *to_tell;
91 struct dnotify_struct *dn; 92 struct dnotify_struct *dn;
92 struct dnotify_struct **prev; 93 struct dnotify_struct **prev;
93 struct fown_struct *fown; 94 struct fown_struct *fown;
94 __u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD; 95 __u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD;
95 96
96 to_tell = event->to_tell; 97 BUG_ON(vfsmount_mark);
97 98
98 spin_lock(&to_tell->i_lock); 99 to_tell = event->to_tell;
99 entry = fsnotify_find_mark_entry(group, to_tell);
100 spin_unlock(&to_tell->i_lock);
101 100
102 /* unlikely since we alreay passed dnotify_should_send_event() */ 101 dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);
103 if (unlikely(!entry))
104 return 0;
105 dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
106 102
107 spin_lock(&entry->lock); 103 spin_lock(&inode_mark->lock);
108 prev = &dnentry->dn; 104 prev = &dn_mark->dn;
109 while ((dn = *prev) != NULL) { 105 while ((dn = *prev) != NULL) {
110 if ((dn->dn_mask & test_mask) == 0) { 106 if ((dn->dn_mask & test_mask) == 0) {
111 prev = &dn->dn_next; 107 prev = &dn->dn_next;
@@ -118,12 +114,11 @@ static int dnotify_handle_event(struct fsnotify_group *group,
118 else { 114 else {
119 *prev = dn->dn_next; 115 *prev = dn->dn_next;
120 kmem_cache_free(dnotify_struct_cache, dn); 116 kmem_cache_free(dnotify_struct_cache, dn);
121 dnotify_recalc_inode_mask(entry); 117 dnotify_recalc_inode_mask(inode_mark);
122 } 118 }
123 } 119 }
124 120
125 spin_unlock(&entry->lock); 121 spin_unlock(&inode_mark->lock);
126 fsnotify_put_mark(entry);
127 122
128 return 0; 123 return 0;
129} 124}
@@ -133,44 +128,27 @@ static int dnotify_handle_event(struct fsnotify_group *group,
133 * userspace notification for that pair. 128 * userspace notification for that pair.
134 */ 129 */
135static bool dnotify_should_send_event(struct fsnotify_group *group, 130static bool dnotify_should_send_event(struct fsnotify_group *group,
136 struct inode *inode, __u32 mask) 131 struct inode *inode,
132 struct fsnotify_mark *inode_mark,
133 struct fsnotify_mark *vfsmount_mark,
134 __u32 mask, void *data, int data_type)
137{ 135{
138 struct fsnotify_mark_entry *entry;
139 bool send;
140
141 /* !dir_notify_enable should never get here, don't waste time checking
142 if (!dir_notify_enable)
143 return 0; */
144
145 /* not a dir, dnotify doesn't care */ 136 /* not a dir, dnotify doesn't care */
146 if (!S_ISDIR(inode->i_mode)) 137 if (!S_ISDIR(inode->i_mode))
147 return false; 138 return false;
148 139
149 spin_lock(&inode->i_lock); 140 return true;
150 entry = fsnotify_find_mark_entry(group, inode);
151 spin_unlock(&inode->i_lock);
152
153 /* no mark means no dnotify watch */
154 if (!entry)
155 return false;
156
157 mask = (mask & ~FS_EVENT_ON_CHILD);
158 send = (mask & entry->mask);
159
160 fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
161
162 return send;
163} 141}
164 142
165static void dnotify_free_mark(struct fsnotify_mark_entry *entry) 143static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
166{ 144{
167 struct dnotify_mark_entry *dnentry = container_of(entry, 145 struct dnotify_mark *dn_mark = container_of(fsn_mark,
168 struct dnotify_mark_entry, 146 struct dnotify_mark,
169 fsn_entry); 147 fsn_mark);
170 148
171 BUG_ON(dnentry->dn); 149 BUG_ON(dn_mark->dn);
172 150
173 kmem_cache_free(dnotify_mark_entry_cache, dnentry); 151 kmem_cache_free(dnotify_mark_cache, dn_mark);
174} 152}
175 153
176static struct fsnotify_ops dnotify_fsnotify_ops = { 154static struct fsnotify_ops dnotify_fsnotify_ops = {
@@ -183,15 +161,15 @@ static struct fsnotify_ops dnotify_fsnotify_ops = {
183 161
184/* 162/*
185 * Called every time a file is closed. Looks first for a dnotify mark on the 163 * Called every time a file is closed. Looks first for a dnotify mark on the
186 * inode. If one is found run all of the ->dn entries attached to that 164 * inode. If one is found run all of the ->dn structures attached to that
187 * mark for one relevant to this process closing the file and remove that 165 * mark for one relevant to this process closing the file and remove that
188 * dnotify_struct. If that was the last dnotify_struct also remove the 166 * dnotify_struct. If that was the last dnotify_struct also remove the
189 * fsnotify_mark_entry. 167 * fsnotify_mark.
190 */ 168 */
191void dnotify_flush(struct file *filp, fl_owner_t id) 169void dnotify_flush(struct file *filp, fl_owner_t id)
192{ 170{
193 struct fsnotify_mark_entry *entry; 171 struct fsnotify_mark *fsn_mark;
194 struct dnotify_mark_entry *dnentry; 172 struct dnotify_mark *dn_mark;
195 struct dnotify_struct *dn; 173 struct dnotify_struct *dn;
196 struct dnotify_struct **prev; 174 struct dnotify_struct **prev;
197 struct inode *inode; 175 struct inode *inode;
@@ -200,38 +178,34 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
200 if (!S_ISDIR(inode->i_mode)) 178 if (!S_ISDIR(inode->i_mode))
201 return; 179 return;
202 180
203 spin_lock(&inode->i_lock); 181 fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
204 entry = fsnotify_find_mark_entry(dnotify_group, inode); 182 if (!fsn_mark)
205 spin_unlock(&inode->i_lock);
206 if (!entry)
207 return; 183 return;
208 dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry); 184 dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
209 185
210 mutex_lock(&dnotify_mark_mutex); 186 mutex_lock(&dnotify_mark_mutex);
211 187
212 spin_lock(&entry->lock); 188 spin_lock(&fsn_mark->lock);
213 prev = &dnentry->dn; 189 prev = &dn_mark->dn;
214 while ((dn = *prev) != NULL) { 190 while ((dn = *prev) != NULL) {
215 if ((dn->dn_owner == id) && (dn->dn_filp == filp)) { 191 if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
216 *prev = dn->dn_next; 192 *prev = dn->dn_next;
217 kmem_cache_free(dnotify_struct_cache, dn); 193 kmem_cache_free(dnotify_struct_cache, dn);
218 dnotify_recalc_inode_mask(entry); 194 dnotify_recalc_inode_mask(fsn_mark);
219 break; 195 break;
220 } 196 }
221 prev = &dn->dn_next; 197 prev = &dn->dn_next;
222 } 198 }
223 199
224 spin_unlock(&entry->lock); 200 spin_unlock(&fsn_mark->lock);
225 201
226 /* nothing else could have found us thanks to the dnotify_mark_mutex */ 202 /* nothing else could have found us thanks to the dnotify_mark_mutex */
227 if (dnentry->dn == NULL) 203 if (dn_mark->dn == NULL)
228 fsnotify_destroy_mark_by_entry(entry); 204 fsnotify_destroy_mark(fsn_mark);
229
230 fsnotify_recalc_group_mask(dnotify_group);
231 205
232 mutex_unlock(&dnotify_mark_mutex); 206 mutex_unlock(&dnotify_mark_mutex);
233 207
234 fsnotify_put_mark(entry); 208 fsnotify_put_mark(fsn_mark);
235} 209}
236 210
237/* this conversion is done only at watch creation */ 211/* this conversion is done only at watch creation */
@@ -259,16 +233,16 @@ static __u32 convert_arg(unsigned long arg)
259 233
260/* 234/*
261 * If multiple processes watch the same inode with dnotify there is only one 235 * If multiple processes watch the same inode with dnotify there is only one
262 * dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct 236 * dnotify mark in inode->i_fsnotify_marks but we chain a dnotify_struct
263 * onto that mark. This function either attaches the new dnotify_struct onto 237 * onto that mark. This function either attaches the new dnotify_struct onto
264 * that list, or it |= the mask onto an existing dnofiy_struct. 238 * that list, or it |= the mask onto an existing dnofiy_struct.
265 */ 239 */
266static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnentry, 240static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark,
267 fl_owner_t id, int fd, struct file *filp, __u32 mask) 241 fl_owner_t id, int fd, struct file *filp, __u32 mask)
268{ 242{
269 struct dnotify_struct *odn; 243 struct dnotify_struct *odn;
270 244
271 odn = dnentry->dn; 245 odn = dn_mark->dn;
272 while (odn != NULL) { 246 while (odn != NULL) {
273 /* adding more events to existing dnofiy_struct? */ 247 /* adding more events to existing dnofiy_struct? */
274 if ((odn->dn_owner == id) && (odn->dn_filp == filp)) { 248 if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
@@ -283,8 +257,8 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnent
283 dn->dn_fd = fd; 257 dn->dn_fd = fd;
284 dn->dn_filp = filp; 258 dn->dn_filp = filp;
285 dn->dn_owner = id; 259 dn->dn_owner = id;
286 dn->dn_next = dnentry->dn; 260 dn->dn_next = dn_mark->dn;
287 dnentry->dn = dn; 261 dn_mark->dn = dn;
288 262
289 return 0; 263 return 0;
290} 264}
@@ -296,8 +270,8 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnent
296 */ 270 */
297int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) 271int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
298{ 272{
299 struct dnotify_mark_entry *new_dnentry, *dnentry; 273 struct dnotify_mark *new_dn_mark, *dn_mark;
300 struct fsnotify_mark_entry *new_entry, *entry; 274 struct fsnotify_mark *new_fsn_mark, *fsn_mark;
301 struct dnotify_struct *dn; 275 struct dnotify_struct *dn;
302 struct inode *inode; 276 struct inode *inode;
303 fl_owner_t id = current->files; 277 fl_owner_t id = current->files;
@@ -306,7 +280,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
306 __u32 mask; 280 __u32 mask;
307 281
308 /* we use these to tell if we need to kfree */ 282 /* we use these to tell if we need to kfree */
309 new_entry = NULL; 283 new_fsn_mark = NULL;
310 dn = NULL; 284 dn = NULL;
311 285
312 if (!dir_notify_enable) { 286 if (!dir_notify_enable) {
@@ -336,8 +310,8 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
336 } 310 }
337 311
338 /* new fsnotify mark, we expect most fcntl calls to add a new mark */ 312 /* new fsnotify mark, we expect most fcntl calls to add a new mark */
339 new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL); 313 new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL);
340 if (!new_dnentry) { 314 if (!new_dn_mark) {
341 error = -ENOMEM; 315 error = -ENOMEM;
342 goto out_err; 316 goto out_err;
343 } 317 }
@@ -345,29 +319,27 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
345 /* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */ 319 /* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
346 mask = convert_arg(arg); 320 mask = convert_arg(arg);
347 321
348 /* set up the new_entry and new_dnentry */ 322 /* set up the new_fsn_mark and new_dn_mark */
349 new_entry = &new_dnentry->fsn_entry; 323 new_fsn_mark = &new_dn_mark->fsn_mark;
350 fsnotify_init_mark(new_entry, dnotify_free_mark); 324 fsnotify_init_mark(new_fsn_mark, dnotify_free_mark);
351 new_entry->mask = mask; 325 new_fsn_mark->mask = mask;
352 new_dnentry->dn = NULL; 326 new_dn_mark->dn = NULL;
353 327
354 /* this is needed to prevent the fcntl/close race described below */ 328 /* this is needed to prevent the fcntl/close race described below */
355 mutex_lock(&dnotify_mark_mutex); 329 mutex_lock(&dnotify_mark_mutex);
356 330
357 /* add the new_entry or find an old one. */ 331 /* add the new_fsn_mark or find an old one. */
358 spin_lock(&inode->i_lock); 332 fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
359 entry = fsnotify_find_mark_entry(dnotify_group, inode); 333 if (fsn_mark) {
360 spin_unlock(&inode->i_lock); 334 dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
361 if (entry) { 335 spin_lock(&fsn_mark->lock);
362 dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
363 spin_lock(&entry->lock);
364 } else { 336 } else {
365 fsnotify_add_mark(new_entry, dnotify_group, inode); 337 fsnotify_add_mark(new_fsn_mark, dnotify_group, inode, NULL, 0);
366 spin_lock(&new_entry->lock); 338 spin_lock(&new_fsn_mark->lock);
367 entry = new_entry; 339 fsn_mark = new_fsn_mark;
368 dnentry = new_dnentry; 340 dn_mark = new_dn_mark;
369 /* we used new_entry, so don't free it */ 341 /* we used new_fsn_mark, so don't free it */
370 new_entry = NULL; 342 new_fsn_mark = NULL;
371 } 343 }
372 344
373 rcu_read_lock(); 345 rcu_read_lock();
@@ -376,17 +348,17 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
376 348
377 /* if (f != filp) means that we lost a race and another task/thread 349 /* if (f != filp) means that we lost a race and another task/thread
378 * actually closed the fd we are still playing with before we grabbed 350 * actually closed the fd we are still playing with before we grabbed
379 * the dnotify_mark_mutex and entry->lock. Since closing the fd is the 351 * the dnotify_mark_mutex and fsn_mark->lock. Since closing the fd is the
380 * only time we clean up the mark entries we need to get our mark off 352 * only time we clean up the marks we need to get our mark off
381 * the list. */ 353 * the list. */
382 if (f != filp) { 354 if (f != filp) {
383 /* if we added ourselves, shoot ourselves, it's possible that 355 /* if we added ourselves, shoot ourselves, it's possible that
384 * the flush actually did shoot this entry. That's fine too 356 * the flush actually did shoot this fsn_mark. That's fine too
385 * since multiple calls to destroy_mark is perfectly safe, if 357 * since multiple calls to destroy_mark is perfectly safe, if
386 * we found a dnentry already attached to the inode, just sod 358 * we found a dn_mark already attached to the inode, just sod
387 * off silently as the flush at close time dealt with it. 359 * off silently as the flush at close time dealt with it.
388 */ 360 */
389 if (dnentry == new_dnentry) 361 if (dn_mark == new_dn_mark)
390 destroy = 1; 362 destroy = 1;
391 goto out; 363 goto out;
392 } 364 }
@@ -394,13 +366,13 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
394 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); 366 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
395 if (error) { 367 if (error) {
396 /* if we added, we must shoot */ 368 /* if we added, we must shoot */
397 if (dnentry == new_dnentry) 369 if (dn_mark == new_dn_mark)
398 destroy = 1; 370 destroy = 1;
399 goto out; 371 goto out;
400 } 372 }
401 373
402 error = attach_dn(dn, dnentry, id, fd, filp, mask); 374 error = attach_dn(dn, dn_mark, id, fd, filp, mask);
403 /* !error means that we attached the dn to the dnentry, so don't free it */ 375 /* !error means that we attached the dn to the dn_mark, so don't free it */
404 if (!error) 376 if (!error)
405 dn = NULL; 377 dn = NULL;
406 /* -EEXIST means that we didn't add this new dn and used an old one. 378 /* -EEXIST means that we didn't add this new dn and used an old one.
@@ -408,20 +380,18 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
408 else if (error == -EEXIST) 380 else if (error == -EEXIST)
409 error = 0; 381 error = 0;
410 382
411 dnotify_recalc_inode_mask(entry); 383 dnotify_recalc_inode_mask(fsn_mark);
412out: 384out:
413 spin_unlock(&entry->lock); 385 spin_unlock(&fsn_mark->lock);
414 386
415 if (destroy) 387 if (destroy)
416 fsnotify_destroy_mark_by_entry(entry); 388 fsnotify_destroy_mark(fsn_mark);
417
418 fsnotify_recalc_group_mask(dnotify_group);
419 389
420 mutex_unlock(&dnotify_mark_mutex); 390 mutex_unlock(&dnotify_mark_mutex);
421 fsnotify_put_mark(entry); 391 fsnotify_put_mark(fsn_mark);
422out_err: 392out_err:
423 if (new_entry) 393 if (new_fsn_mark)
424 fsnotify_put_mark(new_entry); 394 fsnotify_put_mark(new_fsn_mark);
425 if (dn) 395 if (dn)
426 kmem_cache_free(dnotify_struct_cache, dn); 396 kmem_cache_free(dnotify_struct_cache, dn);
427 return error; 397 return error;
@@ -430,10 +400,9 @@ out_err:
430static int __init dnotify_init(void) 400static int __init dnotify_init(void)
431{ 401{
432 dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC); 402 dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
433 dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC); 403 dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC);
434 404
435 dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM, 405 dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
436 0, &dnotify_fsnotify_ops);
437 if (IS_ERR(dnotify_group)) 406 if (IS_ERR(dnotify_group))
438 panic("unable to allocate fsnotify group for dnotify\n"); 407 panic("unable to allocate fsnotify group for dnotify\n");
439 return 0; 408 return 0;
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
new file mode 100644
index 000000000000..3ac36b7bf6b9
--- /dev/null
+++ b/fs/notify/fanotify/Kconfig
@@ -0,0 +1,26 @@
1config FANOTIFY
2 bool "Filesystem wide access notification"
3 select FSNOTIFY
4 select ANON_INODES
5 default n
6 ---help---
7 Say Y here to enable fanotify suport. fanotify is a file access
8 notification system which differs from inotify in that it sends
9 and open file descriptor to the userspace listener along with
10 the event.
11
12 If unsure, say Y.
13
14config FANOTIFY_ACCESS_PERMISSIONS
15 bool "fanotify permissions checking"
16 depends on FANOTIFY
17 depends on SECURITY
18 default n
19 ---help---
20 Say Y here is you want fanotify listeners to be able to make permissions
21 decisions concerning filesystem events. This is used by some fanotify
22 listeners which need to scan files before allowing the system access to
23 use those files. This is used by some anti-malware vendors and by some
24 hierarchical storage managent systems.
25
26 If unsure, say N.
diff --git a/fs/notify/fanotify/Makefile b/fs/notify/fanotify/Makefile
new file mode 100644
index 000000000000..0999213e7e6e
--- /dev/null
+++ b/fs/notify/fanotify/Makefile
@@ -0,0 +1 @@
obj-$(CONFIG_FANOTIFY) += fanotify.o fanotify_user.o
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
new file mode 100644
index 000000000000..eb8f73c9c131
--- /dev/null
+++ b/fs/notify/fanotify/fanotify.c
@@ -0,0 +1,212 @@
1#include <linux/fanotify.h>
2#include <linux/fdtable.h>
3#include <linux/fsnotify_backend.h>
4#include <linux/init.h>
5#include <linux/jiffies.h>
6#include <linux/kernel.h> /* UINT_MAX */
7#include <linux/mount.h>
8#include <linux/sched.h>
9#include <linux/types.h>
10#include <linux/wait.h>
11
12static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
13{
14 pr_debug("%s: old=%p new=%p\n", __func__, old, new);
15
16 if (old->to_tell == new->to_tell &&
17 old->data_type == new->data_type &&
18 old->tgid == new->tgid) {
19 switch (old->data_type) {
20 case (FSNOTIFY_EVENT_FILE):
21 if ((old->file->f_path.mnt == new->file->f_path.mnt) &&
22 (old->file->f_path.dentry == new->file->f_path.dentry))
23 return true;
24 case (FSNOTIFY_EVENT_NONE):
25 return true;
26 default:
27 BUG();
28 };
29 }
30 return false;
31}
32
33/* and the list better be locked by something too! */
34static struct fsnotify_event *fanotify_merge(struct list_head *list,
35 struct fsnotify_event *event)
36{
37 struct fsnotify_event_holder *test_holder;
38 struct fsnotify_event *test_event = NULL;
39 struct fsnotify_event *new_event;
40
41 pr_debug("%s: list=%p event=%p\n", __func__, list, event);
42
43
44 list_for_each_entry_reverse(test_holder, list, event_list) {
45 if (should_merge(test_holder->event, event)) {
46 test_event = test_holder->event;
47 break;
48 }
49 }
50
51 if (!test_event)
52 return NULL;
53
54 fsnotify_get_event(test_event);
55
56 /* if they are exactly the same we are done */
57 if (test_event->mask == event->mask)
58 return test_event;
59
60 /*
61 * if the refcnt == 2 this is the only queue
62 * for this event and so we can update the mask
63 * in place.
64 */
65 if (atomic_read(&test_event->refcnt) == 2) {
66 test_event->mask |= event->mask;
67 return test_event;
68 }
69
70 new_event = fsnotify_clone_event(test_event);
71
72 /* done with test_event */
73 fsnotify_put_event(test_event);
74
75 /* couldn't allocate memory, merge was not possible */
76 if (unlikely(!new_event))
77 return ERR_PTR(-ENOMEM);
78
79 /* build new event and replace it on the list */
80 new_event->mask = (test_event->mask | event->mask);
81 fsnotify_replace_event(test_holder, new_event);
82
83 /* we hold a reference on new_event from clone_event */
84 return new_event;
85}
86
87#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
88static int fanotify_get_response_from_access(struct fsnotify_group *group,
89 struct fsnotify_event *event)
90{
91 int ret;
92
93 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
94
95 wait_event(group->fanotify_data.access_waitq, event->response);
96
97 /* userspace responded, convert to something usable */
98 spin_lock(&event->lock);
99 switch (event->response) {
100 case FAN_ALLOW:
101 ret = 0;
102 break;
103 case FAN_DENY:
104 default:
105 ret = -EPERM;
106 }
107 event->response = 0;
108 spin_unlock(&event->lock);
109
110 pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
111 group, event, ret);
112
113 return ret;
114}
115#endif
116
117static int fanotify_handle_event(struct fsnotify_group *group,
118 struct fsnotify_mark *inode_mark,
119 struct fsnotify_mark *fanotify_mark,
120 struct fsnotify_event *event)
121{
122 int ret = 0;
123 struct fsnotify_event *notify_event = NULL;
124
125 BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
126 BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
127 BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
128 BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
129 BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
130 BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
131 BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
132 BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
133 BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
134
135 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
136
137 notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
138 if (IS_ERR(notify_event))
139 return PTR_ERR(notify_event);
140
141#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
142 if (event->mask & FAN_ALL_PERM_EVENTS) {
143 /* if we merged we need to wait on the new event */
144 if (notify_event)
145 event = notify_event;
146 ret = fanotify_get_response_from_access(group, event);
147 }
148#endif
149
150 if (notify_event)
151 fsnotify_put_event(notify_event);
152
153 return ret;
154}
155
156static bool fanotify_should_send_event(struct fsnotify_group *group,
157 struct inode *to_tell,
158 struct fsnotify_mark *inode_mark,
159 struct fsnotify_mark *vfsmnt_mark,
160 __u32 event_mask, void *data, int data_type)
161{
162 __u32 marks_mask, marks_ignored_mask;
163
164 pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
165 "mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
166 inode_mark, vfsmnt_mark, event_mask, data, data_type);
167
168 pr_debug("%s: group=%p vfsmount_mark=%p inode_mark=%p mask=%x\n",
169 __func__, group, vfsmnt_mark, inode_mark, event_mask);
170
171 /* sorry, fanotify only gives a damn about files and dirs */
172 if (!S_ISREG(to_tell->i_mode) &&
173 !S_ISDIR(to_tell->i_mode))
174 return false;
175
176 /* if we don't have enough info to send an event to userspace say no */
177 if (data_type != FSNOTIFY_EVENT_FILE)
178 return false;
179
180 if (inode_mark && vfsmnt_mark) {
181 marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
182 marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
183 } else if (inode_mark) {
184 /*
185 * if the event is for a child and this inode doesn't care about
186 * events on the child, don't send it!
187 */
188 if ((event_mask & FS_EVENT_ON_CHILD) &&
189 !(inode_mark->mask & FS_EVENT_ON_CHILD))
190 return false;
191 marks_mask = inode_mark->mask;
192 marks_ignored_mask = inode_mark->ignored_mask;
193 } else if (vfsmnt_mark) {
194 marks_mask = vfsmnt_mark->mask;
195 marks_ignored_mask = vfsmnt_mark->ignored_mask;
196 } else {
197 BUG();
198 }
199
200 if (event_mask & marks_mask & ~marks_ignored_mask)
201 return true;
202
203 return false;
204}
205
206const struct fsnotify_ops fanotify_fsnotify_ops = {
207 .handle_event = fanotify_handle_event,
208 .should_send_event = fanotify_should_send_event,
209 .free_group_priv = NULL,
210 .free_event_priv = NULL,
211 .freeing_mark = NULL,
212};
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
new file mode 100644
index 000000000000..25a3b4dfcf61
--- /dev/null
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -0,0 +1,760 @@
1#include <linux/fanotify.h>
2#include <linux/fcntl.h>
3#include <linux/file.h>
4#include <linux/fs.h>
5#include <linux/anon_inodes.h>
6#include <linux/fsnotify_backend.h>
7#include <linux/init.h>
8#include <linux/mount.h>
9#include <linux/namei.h>
10#include <linux/poll.h>
11#include <linux/security.h>
12#include <linux/syscalls.h>
13#include <linux/slab.h>
14#include <linux/types.h>
15#include <linux/uaccess.h>
16
17#include <asm/ioctls.h>
18
19extern const struct fsnotify_ops fanotify_fsnotify_ops;
20
21static struct kmem_cache *fanotify_mark_cache __read_mostly;
22static struct kmem_cache *fanotify_response_event_cache __read_mostly;
23
24struct fanotify_response_event {
25 struct list_head list;
26 __s32 fd;
27 struct fsnotify_event *event;
28};
29
30/*
31 * Get an fsnotify notification event if one exists and is small
32 * enough to fit in "count". Return an error pointer if the count
33 * is not large enough.
34 *
35 * Called with the group->notification_mutex held.
36 */
37static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
38 size_t count)
39{
40 BUG_ON(!mutex_is_locked(&group->notification_mutex));
41
42 pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
43
44 if (fsnotify_notify_queue_is_empty(group))
45 return NULL;
46
47 if (FAN_EVENT_METADATA_LEN > count)
48 return ERR_PTR(-EINVAL);
49
50 /* held the notification_mutex the whole time, so this is the
51 * same event we peeked above */
52 return fsnotify_remove_notify_event(group);
53}
54
55static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
56{
57 int client_fd;
58 struct dentry *dentry;
59 struct vfsmount *mnt;
60 struct file *new_file;
61
62 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
63
64 client_fd = get_unused_fd();
65 if (client_fd < 0)
66 return client_fd;
67
68 if (event->data_type != FSNOTIFY_EVENT_FILE) {
69 WARN_ON(1);
70 put_unused_fd(client_fd);
71 return -EINVAL;
72 }
73
74 /*
75 * we need a new file handle for the userspace program so it can read even if it was
76 * originally opened O_WRONLY.
77 */
78 dentry = dget(event->file->f_path.dentry);
79 mnt = mntget(event->file->f_path.mnt);
80 /* it's possible this event was an overflow event. in that case dentry and mnt
81 * are NULL; That's fine, just don't call dentry open */
82 if (dentry && mnt)
83 new_file = dentry_open(dentry, mnt,
84 group->fanotify_data.f_flags | FMODE_NONOTIFY,
85 current_cred());
86 else
87 new_file = ERR_PTR(-EOVERFLOW);
88 if (IS_ERR(new_file)) {
89 /*
90 * we still send an event even if we can't open the file. this
91 * can happen when say tasks are gone and we try to open their
92 * /proc files or we try to open a WRONLY file like in sysfs
93 * we just send the errno to userspace since there isn't much
94 * else we can do.
95 */
96 put_unused_fd(client_fd);
97 client_fd = PTR_ERR(new_file);
98 } else {
99 fd_install(client_fd, new_file);
100 }
101
102 return client_fd;
103}
104
105static ssize_t fill_event_metadata(struct fsnotify_group *group,
106 struct fanotify_event_metadata *metadata,
107 struct fsnotify_event *event)
108{
109 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
110 group, metadata, event);
111
112 metadata->event_len = FAN_EVENT_METADATA_LEN;
113 metadata->vers = FANOTIFY_METADATA_VERSION;
114 metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
115 metadata->pid = pid_vnr(event->tgid);
116 metadata->fd = create_fd(group, event);
117
118 return metadata->fd;
119}
120
121#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
122static struct fanotify_response_event *dequeue_re(struct fsnotify_group *group,
123 __s32 fd)
124{
125 struct fanotify_response_event *re, *return_re = NULL;
126
127 mutex_lock(&group->fanotify_data.access_mutex);
128 list_for_each_entry(re, &group->fanotify_data.access_list, list) {
129 if (re->fd != fd)
130 continue;
131
132 list_del_init(&re->list);
133 return_re = re;
134 break;
135 }
136 mutex_unlock(&group->fanotify_data.access_mutex);
137
138 pr_debug("%s: found return_re=%p\n", __func__, return_re);
139
140 return return_re;
141}
142
143static int process_access_response(struct fsnotify_group *group,
144 struct fanotify_response *response_struct)
145{
146 struct fanotify_response_event *re;
147 __s32 fd = response_struct->fd;
148 __u32 response = response_struct->response;
149
150 pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group,
151 fd, response);
152 /*
153 * make sure the response is valid, if invalid we do nothing and either
154 * userspace can send a valid responce or we will clean it up after the
155 * timeout
156 */
157 switch (response) {
158 case FAN_ALLOW:
159 case FAN_DENY:
160 break;
161 default:
162 return -EINVAL;
163 }
164
165 if (fd < 0)
166 return -EINVAL;
167
168 re = dequeue_re(group, fd);
169 if (!re)
170 return -ENOENT;
171
172 re->event->response = response;
173
174 wake_up(&group->fanotify_data.access_waitq);
175
176 kmem_cache_free(fanotify_response_event_cache, re);
177
178 return 0;
179}
180
181static int prepare_for_access_response(struct fsnotify_group *group,
182 struct fsnotify_event *event,
183 __s32 fd)
184{
185 struct fanotify_response_event *re;
186
187 if (!(event->mask & FAN_ALL_PERM_EVENTS))
188 return 0;
189
190 re = kmem_cache_alloc(fanotify_response_event_cache, GFP_KERNEL);
191 if (!re)
192 return -ENOMEM;
193
194 re->event = event;
195 re->fd = fd;
196
197 mutex_lock(&group->fanotify_data.access_mutex);
198 list_add_tail(&re->list, &group->fanotify_data.access_list);
199 mutex_unlock(&group->fanotify_data.access_mutex);
200
201 return 0;
202}
203
204static void remove_access_response(struct fsnotify_group *group,
205 struct fsnotify_event *event,
206 __s32 fd)
207{
208 struct fanotify_response_event *re;
209
210 if (!(event->mask & FAN_ALL_PERM_EVENTS))
211 return;
212
213 re = dequeue_re(group, fd);
214 if (!re)
215 return;
216
217 BUG_ON(re->event != event);
218
219 kmem_cache_free(fanotify_response_event_cache, re);
220
221 return;
222}
223#else
224static int prepare_for_access_response(struct fsnotify_group *group,
225 struct fsnotify_event *event,
226 __s32 fd)
227{
228 return 0;
229}
230
231static void remove_access_response(struct fsnotify_group *group,
232 struct fsnotify_event *event,
233 __s32 fd)
234{
235 return;
236}
237#endif
238
239static ssize_t copy_event_to_user(struct fsnotify_group *group,
240 struct fsnotify_event *event,
241 char __user *buf)
242{
243 struct fanotify_event_metadata fanotify_event_metadata;
244 int fd, ret;
245
246 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
247
248 fd = fill_event_metadata(group, &fanotify_event_metadata, event);
249 if (fd < 0)
250 return fd;
251
252 ret = prepare_for_access_response(group, event, fd);
253 if (ret)
254 goto out_close_fd;
255
256 ret = -EFAULT;
257 if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN))
258 goto out_kill_access_response;
259
260 return FAN_EVENT_METADATA_LEN;
261
262out_kill_access_response:
263 remove_access_response(group, event, fd);
264out_close_fd:
265 sys_close(fd);
266 return ret;
267}
268
269/* intofiy userspace file descriptor functions */
270static unsigned int fanotify_poll(struct file *file, poll_table *wait)
271{
272 struct fsnotify_group *group = file->private_data;
273 int ret = 0;
274
275 poll_wait(file, &group->notification_waitq, wait);
276 mutex_lock(&group->notification_mutex);
277 if (!fsnotify_notify_queue_is_empty(group))
278 ret = POLLIN | POLLRDNORM;
279 mutex_unlock(&group->notification_mutex);
280
281 return ret;
282}
283
284static ssize_t fanotify_read(struct file *file, char __user *buf,
285 size_t count, loff_t *pos)
286{
287 struct fsnotify_group *group;
288 struct fsnotify_event *kevent;
289 char __user *start;
290 int ret;
291 DEFINE_WAIT(wait);
292
293 start = buf;
294 group = file->private_data;
295
296 pr_debug("%s: group=%p\n", __func__, group);
297
298 while (1) {
299 prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
300
301 mutex_lock(&group->notification_mutex);
302 kevent = get_one_event(group, count);
303 mutex_unlock(&group->notification_mutex);
304
305 if (kevent) {
306 ret = PTR_ERR(kevent);
307 if (IS_ERR(kevent))
308 break;
309 ret = copy_event_to_user(group, kevent, buf);
310 fsnotify_put_event(kevent);
311 if (ret < 0)
312 break;
313 buf += ret;
314 count -= ret;
315 continue;
316 }
317
318 ret = -EAGAIN;
319 if (file->f_flags & O_NONBLOCK)
320 break;
321 ret = -EINTR;
322 if (signal_pending(current))
323 break;
324
325 if (start != buf)
326 break;
327
328 schedule();
329 }
330
331 finish_wait(&group->notification_waitq, &wait);
332 if (start != buf && ret != -EFAULT)
333 ret = buf - start;
334 return ret;
335}
336
337static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
338{
339#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
340 struct fanotify_response response = { .fd = -1, .response = -1 };
341 struct fsnotify_group *group;
342 int ret;
343
344 group = file->private_data;
345
346 if (count > sizeof(response))
347 count = sizeof(response);
348
349 pr_debug("%s: group=%p count=%zu\n", __func__, group, count);
350
351 if (copy_from_user(&response, buf, count))
352 return -EFAULT;
353
354 ret = process_access_response(group, &response);
355 if (ret < 0)
356 count = ret;
357
358 return count;
359#else
360 return -EINVAL;
361#endif
362}
363
364static int fanotify_release(struct inode *ignored, struct file *file)
365{
366 struct fsnotify_group *group = file->private_data;
367
368 pr_debug("%s: file=%p group=%p\n", __func__, file, group);
369
370 /* matches the fanotify_init->fsnotify_alloc_group */
371 fsnotify_put_group(group);
372
373 return 0;
374}
375
376static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
377{
378 struct fsnotify_group *group;
379 struct fsnotify_event_holder *holder;
380 void __user *p;
381 int ret = -ENOTTY;
382 size_t send_len = 0;
383
384 group = file->private_data;
385
386 p = (void __user *) arg;
387
388 switch (cmd) {
389 case FIONREAD:
390 mutex_lock(&group->notification_mutex);
391 list_for_each_entry(holder, &group->notification_list, event_list)
392 send_len += FAN_EVENT_METADATA_LEN;
393 mutex_unlock(&group->notification_mutex);
394 ret = put_user(send_len, (int __user *) p);
395 break;
396 }
397
398 return ret;
399}
400
401static const struct file_operations fanotify_fops = {
402 .poll = fanotify_poll,
403 .read = fanotify_read,
404 .write = fanotify_write,
405 .fasync = NULL,
406 .release = fanotify_release,
407 .unlocked_ioctl = fanotify_ioctl,
408 .compat_ioctl = fanotify_ioctl,
409};
410
411static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
412{
413 kmem_cache_free(fanotify_mark_cache, fsn_mark);
414}
415
416static int fanotify_find_path(int dfd, const char __user *filename,
417 struct path *path, unsigned int flags)
418{
419 int ret;
420
421 pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__,
422 dfd, filename, flags);
423
424 if (filename == NULL) {
425 struct file *file;
426 int fput_needed;
427
428 ret = -EBADF;
429 file = fget_light(dfd, &fput_needed);
430 if (!file)
431 goto out;
432
433 ret = -ENOTDIR;
434 if ((flags & FAN_MARK_ONLYDIR) &&
435 !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) {
436 fput_light(file, fput_needed);
437 goto out;
438 }
439
440 *path = file->f_path;
441 path_get(path);
442 fput_light(file, fput_needed);
443 } else {
444 unsigned int lookup_flags = 0;
445
446 if (!(flags & FAN_MARK_DONT_FOLLOW))
447 lookup_flags |= LOOKUP_FOLLOW;
448 if (flags & FAN_MARK_ONLYDIR)
449 lookup_flags |= LOOKUP_DIRECTORY;
450
451 ret = user_path_at(dfd, filename, lookup_flags, path);
452 if (ret)
453 goto out;
454 }
455
456 /* you can only watch an inode if you have read permissions on it */
457 ret = inode_permission(path->dentry->d_inode, MAY_READ);
458 if (ret)
459 path_put(path);
460out:
461 return ret;
462}
463
464static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
465 __u32 mask,
466 unsigned int flags)
467{
468 __u32 oldmask;
469
470 spin_lock(&fsn_mark->lock);
471 if (!(flags & FAN_MARK_IGNORED_MASK)) {
472 oldmask = fsn_mark->mask;
473 fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask));
474 } else {
475 oldmask = fsn_mark->ignored_mask;
476 fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask & ~mask));
477 }
478 spin_unlock(&fsn_mark->lock);
479
480 if (!(oldmask & ~mask))
481 fsnotify_destroy_mark(fsn_mark);
482
483 return mask & oldmask;
484}
485
486static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
487 struct vfsmount *mnt, __u32 mask,
488 unsigned int flags)
489{
490 struct fsnotify_mark *fsn_mark = NULL;
491 __u32 removed;
492
493 fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
494 if (!fsn_mark)
495 return -ENOENT;
496
497 removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
498 fsnotify_put_mark(fsn_mark);
499 if (removed & mnt->mnt_fsnotify_mask)
500 fsnotify_recalc_vfsmount_mask(mnt);
501
502 return 0;
503}
504
505static int fanotify_remove_inode_mark(struct fsnotify_group *group,
506 struct inode *inode, __u32 mask,
507 unsigned int flags)
508{
509 struct fsnotify_mark *fsn_mark = NULL;
510 __u32 removed;
511
512 fsn_mark = fsnotify_find_inode_mark(group, inode);
513 if (!fsn_mark)
514 return -ENOENT;
515
516 removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
517 /* matches the fsnotify_find_inode_mark() */
518 fsnotify_put_mark(fsn_mark);
519 if (removed & inode->i_fsnotify_mask)
520 fsnotify_recalc_inode_mask(inode);
521
522 return 0;
523}
524
525static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
526 __u32 mask,
527 unsigned int flags)
528{
529 __u32 oldmask;
530
531 spin_lock(&fsn_mark->lock);
532 if (!(flags & FAN_MARK_IGNORED_MASK)) {
533 oldmask = fsn_mark->mask;
534 fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
535 } else {
536 oldmask = fsn_mark->ignored_mask;
537 fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask | mask));
538 if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
539 fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
540 }
541 spin_unlock(&fsn_mark->lock);
542
543 return mask & ~oldmask;
544}
545
546static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
547 struct vfsmount *mnt, __u32 mask,
548 unsigned int flags)
549{
550 struct fsnotify_mark *fsn_mark;
551 __u32 added;
552
553 fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
554 if (!fsn_mark) {
555 int ret;
556
557 fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
558 if (!fsn_mark)
559 return -ENOMEM;
560
561 fsnotify_init_mark(fsn_mark, fanotify_free_mark);
562 ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0);
563 if (ret) {
564 fanotify_free_mark(fsn_mark);
565 return ret;
566 }
567 }
568 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
569 fsnotify_put_mark(fsn_mark);
570 if (added & ~mnt->mnt_fsnotify_mask)
571 fsnotify_recalc_vfsmount_mask(mnt);
572
573 return 0;
574}
575
576static int fanotify_add_inode_mark(struct fsnotify_group *group,
577 struct inode *inode, __u32 mask,
578 unsigned int flags)
579{
580 struct fsnotify_mark *fsn_mark;
581 __u32 added;
582
583 pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
584
585 fsn_mark = fsnotify_find_inode_mark(group, inode);
586 if (!fsn_mark) {
587 int ret;
588
589 fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
590 if (!fsn_mark)
591 return -ENOMEM;
592
593 fsnotify_init_mark(fsn_mark, fanotify_free_mark);
594 ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0);
595 if (ret) {
596 fanotify_free_mark(fsn_mark);
597 return ret;
598 }
599 }
600 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
601 fsnotify_put_mark(fsn_mark);
602 if (added & ~inode->i_fsnotify_mask)
603 fsnotify_recalc_inode_mask(inode);
604 return 0;
605}
606
607/* fanotify syscalls */
608SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
609{
610 struct fsnotify_group *group;
611 int f_flags, fd;
612
613 pr_debug("%s: flags=%d event_f_flags=%d\n",
614 __func__, flags, event_f_flags);
615
616 if (!capable(CAP_SYS_ADMIN))
617 return -EACCES;
618
619 if (flags & ~FAN_ALL_INIT_FLAGS)
620 return -EINVAL;
621
622 f_flags = O_RDWR | FMODE_NONOTIFY;
623 if (flags & FAN_CLOEXEC)
624 f_flags |= O_CLOEXEC;
625 if (flags & FAN_NONBLOCK)
626 f_flags |= O_NONBLOCK;
627
628 /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
629 group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
630 if (IS_ERR(group))
631 return PTR_ERR(group);
632
633 group->fanotify_data.f_flags = event_f_flags;
634#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
635 mutex_init(&group->fanotify_data.access_mutex);
636 init_waitqueue_head(&group->fanotify_data.access_waitq);
637 INIT_LIST_HEAD(&group->fanotify_data.access_list);
638#endif
639
640 fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
641 if (fd < 0)
642 goto out_put_group;
643
644 return fd;
645
646out_put_group:
647 fsnotify_put_group(group);
648 return fd;
649}
650
651SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
652 __u64 mask, int dfd,
653 const char __user * pathname)
654{
655 struct inode *inode = NULL;
656 struct vfsmount *mnt = NULL;
657 struct fsnotify_group *group;
658 struct file *filp;
659 struct path path;
660 int ret, fput_needed;
661
662 pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
663 __func__, fanotify_fd, flags, dfd, pathname, mask);
664
665 /* we only use the lower 32 bits as of right now. */
666 if (mask & ((__u64)0xffffffff << 32))
667 return -EINVAL;
668
669 if (flags & ~FAN_ALL_MARK_FLAGS)
670 return -EINVAL;
671 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
672 case FAN_MARK_ADD:
673 case FAN_MARK_REMOVE:
674 case FAN_MARK_FLUSH:
675 break;
676 default:
677 return -EINVAL;
678 }
679#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
680 if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD))
681#else
682 if (mask & ~(FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD))
683#endif
684 return -EINVAL;
685
686 filp = fget_light(fanotify_fd, &fput_needed);
687 if (unlikely(!filp))
688 return -EBADF;
689
690 /* verify that this is indeed an fanotify instance */
691 ret = -EINVAL;
692 if (unlikely(filp->f_op != &fanotify_fops))
693 goto fput_and_out;
694
695 ret = fanotify_find_path(dfd, pathname, &path, flags);
696 if (ret)
697 goto fput_and_out;
698
699 /* inode held in place by reference to path; group by fget on fd */
700 if (!(flags & FAN_MARK_MOUNT))
701 inode = path.dentry->d_inode;
702 else
703 mnt = path.mnt;
704 group = filp->private_data;
705
706 /* create/update an inode mark */
707 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
708 case FAN_MARK_ADD:
709 if (flags & FAN_MARK_MOUNT)
710 ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags);
711 else
712 ret = fanotify_add_inode_mark(group, inode, mask, flags);
713 break;
714 case FAN_MARK_REMOVE:
715 if (flags & FAN_MARK_MOUNT)
716 ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags);
717 else
718 ret = fanotify_remove_inode_mark(group, inode, mask, flags);
719 break;
720 case FAN_MARK_FLUSH:
721 if (flags & FAN_MARK_MOUNT)
722 fsnotify_clear_vfsmount_marks_by_group(group);
723 else
724 fsnotify_clear_inode_marks_by_group(group);
725 break;
726 default:
727 ret = -EINVAL;
728 }
729
730 path_put(&path);
731fput_and_out:
732 fput_light(filp, fput_needed);
733 return ret;
734}
735
736#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
737asmlinkage long SyS_fanotify_mark(long fanotify_fd, long flags, __u64 mask,
738 long dfd, long pathname)
739{
740 return SYSC_fanotify_mark((int) fanotify_fd, (unsigned int) flags,
741 mask, (int) dfd,
742 (const char __user *) pathname);
743}
744SYSCALL_ALIAS(sys_fanotify_mark, SyS_fanotify_mark);
745#endif
746
747/*
748 * fanotify_user_setup - Our initialization function. Note that we cannnot return
749 * error because we have compiled-in VFS hooks. So an (unlikely) failure here
750 * must result in panic().
751 */
752static int __init fanotify_user_setup(void)
753{
754 fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
755 fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event,
756 SLAB_PANIC);
757
758 return 0;
759}
760device_initcall(fanotify_user_setup);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index fcc2f064af83..4d2a82c1ceb1 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -21,6 +21,7 @@
21#include <linux/gfp.h> 21#include <linux/gfp.h>
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/module.h> 23#include <linux/module.h>
24#include <linux/mount.h>
24#include <linux/srcu.h> 25#include <linux/srcu.h>
25 26
26#include <linux/fsnotify_backend.h> 27#include <linux/fsnotify_backend.h>
@@ -35,6 +36,11 @@ void __fsnotify_inode_delete(struct inode *inode)
35} 36}
36EXPORT_SYMBOL_GPL(__fsnotify_inode_delete); 37EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
37 38
39void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
40{
41 fsnotify_clear_marks_by_mount(mnt);
42}
43
38/* 44/*
39 * Given an inode, first check if we care what happens to our children. Inotify 45 * Given an inode, first check if we care what happens to our children. Inotify
40 * and dnotify both tell their parents about events. If we care about any event 46 * and dnotify both tell their parents about events. If we care about any event
@@ -78,13 +84,16 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
78} 84}
79 85
80/* Notify this dentry's parent about a child's events. */ 86/* Notify this dentry's parent about a child's events. */
81void __fsnotify_parent(struct dentry *dentry, __u32 mask) 87void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
82{ 88{
83 struct dentry *parent; 89 struct dentry *parent;
84 struct inode *p_inode; 90 struct inode *p_inode;
85 bool send = false; 91 bool send = false;
86 bool should_update_children = false; 92 bool should_update_children = false;
87 93
94 if (!dentry)
95 dentry = file->f_path.dentry;
96
88 if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) 97 if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
89 return; 98 return;
90 99
@@ -115,8 +124,12 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask)
115 * specifies these are events which came from a child. */ 124 * specifies these are events which came from a child. */
116 mask |= FS_EVENT_ON_CHILD; 125 mask |= FS_EVENT_ON_CHILD;
117 126
118 fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, 127 if (file)
119 dentry->d_name.name, 0); 128 fsnotify(p_inode, mask, file, FSNOTIFY_EVENT_FILE,
129 dentry->d_name.name, 0);
130 else
131 fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
132 dentry->d_name.name, 0);
120 dput(parent); 133 dput(parent);
121 } 134 }
122 135
@@ -127,63 +140,181 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask)
127} 140}
128EXPORT_SYMBOL_GPL(__fsnotify_parent); 141EXPORT_SYMBOL_GPL(__fsnotify_parent);
129 142
143static int send_to_group(struct inode *to_tell, struct vfsmount *mnt,
144 struct fsnotify_mark *inode_mark,
145 struct fsnotify_mark *vfsmount_mark,
146 __u32 mask, void *data,
147 int data_is, u32 cookie,
148 const unsigned char *file_name,
149 struct fsnotify_event **event)
150{
151 struct fsnotify_group *group = inode_mark->group;
152 __u32 inode_test_mask = (mask & ~FS_EVENT_ON_CHILD);
153 __u32 vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD);
154
155 pr_debug("%s: group=%p to_tell=%p mnt=%p mark=%p mask=%x data=%p"
156 " data_is=%d cookie=%d event=%p\n", __func__, group, to_tell,
157 mnt, inode_mark, mask, data, data_is, cookie, *event);
158
159 /* clear ignored on inode modification */
160 if (mask & FS_MODIFY) {
161 if (inode_mark &&
162 !(inode_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
163 inode_mark->ignored_mask = 0;
164 if (vfsmount_mark &&
165 !(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
166 vfsmount_mark->ignored_mask = 0;
167 }
168
169 /* does the inode mark tell us to do something? */
170 if (inode_mark) {
171 inode_test_mask &= inode_mark->mask;
172 inode_test_mask &= ~inode_mark->ignored_mask;
173 }
174
175 /* does the vfsmount_mark tell us to do something? */
176 if (vfsmount_mark) {
177 vfsmount_test_mask &= vfsmount_mark->mask;
178 vfsmount_test_mask &= ~vfsmount_mark->ignored_mask;
179 if (inode_mark)
180 vfsmount_test_mask &= ~inode_mark->ignored_mask;
181 }
182
183 if (!inode_test_mask && !vfsmount_test_mask)
184 return 0;
185
186 if (group->ops->should_send_event(group, to_tell, inode_mark,
187 vfsmount_mark, mask, data,
188 data_is) == false)
189 return 0;
190
191 if (!*event) {
192 *event = fsnotify_create_event(to_tell, mask, data,
193 data_is, file_name,
194 cookie, GFP_KERNEL);
195 if (!*event)
196 return -ENOMEM;
197 }
198 return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event);
199}
200
130/* 201/*
131 * This is the main call to fsnotify. The VFS calls into hook specific functions 202 * This is the main call to fsnotify. The VFS calls into hook specific functions
132 * in linux/fsnotify.h. Those functions then in turn call here. Here will call 203 * in linux/fsnotify.h. Those functions then in turn call here. Here will call
133 * out to all of the registered fsnotify_group. Those groups can then use the 204 * out to all of the registered fsnotify_group. Those groups can then use the
134 * notification event in whatever means they feel necessary. 205 * notification event in whatever means they feel necessary.
135 */ 206 */
136void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name, u32 cookie) 207int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
208 const unsigned char *file_name, u32 cookie)
137{ 209{
138 struct fsnotify_group *group; 210 struct hlist_node *inode_node, *vfsmount_node;
211 struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
212 struct fsnotify_group *inode_group, *vfsmount_group;
139 struct fsnotify_event *event = NULL; 213 struct fsnotify_event *event = NULL;
140 int idx; 214 struct vfsmount *mnt;
215 int idx, ret = 0;
216 bool used_inode = false, used_vfsmount = false;
141 /* global tests shouldn't care about events on child only the specific event */ 217 /* global tests shouldn't care about events on child only the specific event */
142 __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); 218 __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
143 219
144 if (list_empty(&fsnotify_groups)) 220 if (data_is == FSNOTIFY_EVENT_FILE)
145 return; 221 mnt = ((struct file *)data)->f_path.mnt;
222 else
223 mnt = NULL;
146 224
147 if (!(test_mask & fsnotify_mask))
148 return;
149
150 if (!(test_mask & to_tell->i_fsnotify_mask))
151 return;
152 /* 225 /*
153 * SRCU!! the groups list is very very much read only and the path is 226 * if this is a modify event we may need to clear the ignored masks
154 * very hot. The VAST majority of events are not going to need to do 227 * otherwise return if neither the inode nor the vfsmount care about
155 * anything other than walk the list so it's crazy to pre-allocate. 228 * this type of event.
156 */ 229 */
157 idx = srcu_read_lock(&fsnotify_grp_srcu); 230 if (!(mask & FS_MODIFY) &&
158 list_for_each_entry_rcu(group, &fsnotify_groups, group_list) { 231 !(test_mask & to_tell->i_fsnotify_mask) &&
159 if (test_mask & group->mask) { 232 !(mnt && test_mask & mnt->mnt_fsnotify_mask))
160 if (!group->ops->should_send_event(group, to_tell, mask)) 233 return 0;
161 continue; 234
162 if (!event) { 235 idx = srcu_read_lock(&fsnotify_mark_srcu);
163 event = fsnotify_create_event(to_tell, mask, data, 236
164 data_is, file_name, cookie, 237 if ((mask & FS_MODIFY) ||
165 GFP_KERNEL); 238 (test_mask & to_tell->i_fsnotify_mask))
166 /* shit, we OOM'd and now we can't tell, maybe 239 inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first,
167 * someday someone else will want to do something 240 &fsnotify_mark_srcu);
168 * here */ 241 else
169 if (!event) 242 inode_node = NULL;
170 break; 243
171 } 244 if (mnt) {
172 group->ops->handle_event(group, event); 245 if ((mask & FS_MODIFY) ||
246 (test_mask & mnt->mnt_fsnotify_mask))
247 vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first,
248 &fsnotify_mark_srcu);
249 else
250 vfsmount_node = NULL;
251 } else {
252 mnt = NULL;
253 vfsmount_node = NULL;
254 }
255
256 while (inode_node || vfsmount_node) {
257 if (inode_node) {
258 inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
259 struct fsnotify_mark, i.i_list);
260 inode_group = inode_mark->group;
261 } else
262 inode_group = (void *)-1;
263
264 if (vfsmount_node) {
265 vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu),
266 struct fsnotify_mark, m.m_list);
267 vfsmount_group = vfsmount_mark->group;
268 } else
269 vfsmount_group = (void *)-1;
270
271 if (inode_group < vfsmount_group) {
272 /* handle inode */
273 send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
274 data_is, cookie, file_name, &event);
275 used_inode = true;
276 } else if (vfsmount_group < inode_group) {
277 send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
278 data_is, cookie, file_name, &event);
279 used_vfsmount = true;
280 } else {
281 send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
282 mask, data, data_is, cookie, file_name,
283 &event);
284 used_vfsmount = true;
285 used_inode = true;
173 } 286 }
287
288 if (used_inode)
289 inode_node = srcu_dereference(inode_node->next,
290 &fsnotify_mark_srcu);
291 if (used_vfsmount)
292 vfsmount_node = srcu_dereference(vfsmount_node->next,
293 &fsnotify_mark_srcu);
174 } 294 }
175 srcu_read_unlock(&fsnotify_grp_srcu, idx); 295
296 srcu_read_unlock(&fsnotify_mark_srcu, idx);
176 /* 297 /*
177 * fsnotify_create_event() took a reference so the event can't be cleaned 298 * fsnotify_create_event() took a reference so the event can't be cleaned
178 * up while we are still trying to add it to lists, drop that one. 299 * up while we are still trying to add it to lists, drop that one.
179 */ 300 */
180 if (event) 301 if (event)
181 fsnotify_put_event(event); 302 fsnotify_put_event(event);
303
304 return ret;
182} 305}
183EXPORT_SYMBOL_GPL(fsnotify); 306EXPORT_SYMBOL_GPL(fsnotify);
184 307
185static __init int fsnotify_init(void) 308static __init int fsnotify_init(void)
186{ 309{
187 return init_srcu_struct(&fsnotify_grp_srcu); 310 int ret;
311
312 BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
313
314 ret = init_srcu_struct(&fsnotify_mark_srcu);
315 if (ret)
316 panic("initializing fsnotify_mark_srcu");
317
318 return 0;
188} 319}
189subsys_initcall(fsnotify_init); 320core_initcall(fsnotify_init);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 4dc240824b2d..85e7d2b431d9 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -6,21 +6,34 @@
6#include <linux/srcu.h> 6#include <linux/srcu.h>
7#include <linux/types.h> 7#include <linux/types.h>
8 8
9/* protects reads of fsnotify_groups */
10extern struct srcu_struct fsnotify_grp_srcu;
11/* all groups which receive fsnotify events */
12extern struct list_head fsnotify_groups;
13/* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
14extern __u32 fsnotify_mask;
15
16/* destroy all events sitting in this groups notification queue */ 9/* destroy all events sitting in this groups notification queue */
17extern void fsnotify_flush_notify(struct fsnotify_group *group); 10extern void fsnotify_flush_notify(struct fsnotify_group *group);
18 11
12/* protects reads of inode and vfsmount marks list */
13extern struct srcu_struct fsnotify_mark_srcu;
14
15extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
16 __u32 mask);
17/* add a mark to an inode */
18extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
19 struct fsnotify_group *group, struct inode *inode,
20 int allow_dups);
21/* add a mark to a vfsmount */
22extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
23 struct fsnotify_group *group, struct vfsmount *mnt,
24 int allow_dups);
25
19/* final kfree of a group */ 26/* final kfree of a group */
20extern void fsnotify_final_destroy_group(struct fsnotify_group *group); 27extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
21 28
29/* vfsmount specific destruction of a mark */
30extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark);
31/* inode specific destruction of a mark */
32extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark);
22/* run the list of all marks associated with inode and flag them to be freed */ 33/* run the list of all marks associated with inode and flag them to be freed */
23extern void fsnotify_clear_marks_by_inode(struct inode *inode); 34extern void fsnotify_clear_marks_by_inode(struct inode *inode);
35/* run the list of all marks associated with vfsmount and flag them to be freed */
36extern void fsnotify_clear_marks_by_mount(struct vfsmount *mnt);
24/* 37/*
25 * update the dentry->d_flags of all of inode's children to indicate if inode cares 38 * update the dentry->d_flags of all of inode's children to indicate if inode cares
26 * about events that happen to its children. 39 * about events that happen to its children.
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 0e1677144bc5..d309f38449cb 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -28,64 +28,6 @@
28 28
29#include <asm/atomic.h> 29#include <asm/atomic.h>
30 30
31/* protects writes to fsnotify_groups and fsnotify_mask */
32static DEFINE_MUTEX(fsnotify_grp_mutex);
33/* protects reads while running the fsnotify_groups list */
34struct srcu_struct fsnotify_grp_srcu;
35/* all groups registered to receive filesystem notifications */
36LIST_HEAD(fsnotify_groups);
37/* bitwise OR of all events (FS_*) interesting to some group on this system */
38__u32 fsnotify_mask;
39
40/*
41 * When a new group registers or changes it's set of interesting events
42 * this function updates the fsnotify_mask to contain all interesting events
43 */
44void fsnotify_recalc_global_mask(void)
45{
46 struct fsnotify_group *group;
47 __u32 mask = 0;
48 int idx;
49
50 idx = srcu_read_lock(&fsnotify_grp_srcu);
51 list_for_each_entry_rcu(group, &fsnotify_groups, group_list)
52 mask |= group->mask;
53 srcu_read_unlock(&fsnotify_grp_srcu, idx);
54 fsnotify_mask = mask;
55}
56
57/*
58 * Update the group->mask by running all of the marks associated with this
59 * group and finding the bitwise | of all of the mark->mask. If we change
60 * the group->mask we need to update the global mask of events interesting
61 * to the system.
62 */
63void fsnotify_recalc_group_mask(struct fsnotify_group *group)
64{
65 __u32 mask = 0;
66 __u32 old_mask = group->mask;
67 struct fsnotify_mark_entry *entry;
68
69 spin_lock(&group->mark_lock);
70 list_for_each_entry(entry, &group->mark_entries, g_list)
71 mask |= entry->mask;
72 spin_unlock(&group->mark_lock);
73
74 group->mask = mask;
75
76 if (old_mask != mask)
77 fsnotify_recalc_global_mask();
78}
79
80/*
81 * Take a reference to a group so things found under the fsnotify_grp_mutex
82 * can't get freed under us
83 */
84static void fsnotify_get_group(struct fsnotify_group *group)
85{
86 atomic_inc(&group->refcnt);
87}
88
89/* 31/*
90 * Final freeing of a group 32 * Final freeing of a group
91 */ 33 */
@@ -110,145 +52,53 @@ void fsnotify_final_destroy_group(struct fsnotify_group *group)
110 */ 52 */
111static void fsnotify_destroy_group(struct fsnotify_group *group) 53static void fsnotify_destroy_group(struct fsnotify_group *group)
112{ 54{
113 /* clear all inode mark entries for this group */ 55 /* clear all inode marks for this group */
114 fsnotify_clear_marks_by_group(group); 56 fsnotify_clear_marks_by_group(group);
115 57
58 synchronize_srcu(&fsnotify_mark_srcu);
59
116 /* past the point of no return, matches the initial value of 1 */ 60 /* past the point of no return, matches the initial value of 1 */
117 if (atomic_dec_and_test(&group->num_marks)) 61 if (atomic_dec_and_test(&group->num_marks))
118 fsnotify_final_destroy_group(group); 62 fsnotify_final_destroy_group(group);
119} 63}
120 64
121/* 65/*
122 * Remove this group from the global list of groups that will get events
123 * this can be done even if there are still references and things still using
124 * this group. This just stops the group from getting new events.
125 */
126static void __fsnotify_evict_group(struct fsnotify_group *group)
127{
128 BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
129
130 if (group->on_group_list)
131 list_del_rcu(&group->group_list);
132 group->on_group_list = 0;
133}
134
135/*
136 * Called when a group is no longer interested in getting events. This can be
137 * used if a group is misbehaving or if for some reason a group should no longer
138 * get any filesystem events.
139 */
140void fsnotify_evict_group(struct fsnotify_group *group)
141{
142 mutex_lock(&fsnotify_grp_mutex);
143 __fsnotify_evict_group(group);
144 mutex_unlock(&fsnotify_grp_mutex);
145}
146
147/*
148 * Drop a reference to a group. Free it if it's through. 66 * Drop a reference to a group. Free it if it's through.
149 */ 67 */
150void fsnotify_put_group(struct fsnotify_group *group) 68void fsnotify_put_group(struct fsnotify_group *group)
151{ 69{
152 if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex)) 70 if (atomic_dec_and_test(&group->refcnt))
153 return; 71 fsnotify_destroy_group(group);
154
155 /*
156 * OK, now we know that there's no other users *and* we hold mutex,
157 * so no new references will appear
158 */
159 __fsnotify_evict_group(group);
160
161 /*
162 * now it's off the list, so the only thing we might care about is
163 * srcu access....
164 */
165 mutex_unlock(&fsnotify_grp_mutex);
166 synchronize_srcu(&fsnotify_grp_srcu);
167
168 /* and now it is really dead. _Nothing_ could be seeing it */
169 fsnotify_recalc_global_mask();
170 fsnotify_destroy_group(group);
171}
172
173/*
174 * Simply run the fsnotify_groups list and find a group which matches
175 * the given parameters. If a group is found we take a reference to that
176 * group.
177 */
178static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask,
179 const struct fsnotify_ops *ops)
180{
181 struct fsnotify_group *group_iter;
182 struct fsnotify_group *group = NULL;
183
184 BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
185
186 list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) {
187 if (group_iter->group_num == group_num) {
188 if ((group_iter->mask == mask) &&
189 (group_iter->ops == ops)) {
190 fsnotify_get_group(group_iter);
191 group = group_iter;
192 } else
193 group = ERR_PTR(-EEXIST);
194 }
195 }
196 return group;
197} 72}
198 73
199/* 74/*
200 * Either finds an existing group which matches the group_num, mask, and ops or 75 * Create a new fsnotify_group and hold a reference for the group returned.
201 * creates a new group and adds it to the global group list. In either case we
202 * take a reference for the group returned.
203 */ 76 */
204struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask, 77struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
205 const struct fsnotify_ops *ops)
206{ 78{
207 struct fsnotify_group *group, *tgroup; 79 struct fsnotify_group *group;
208 80
209 /* very low use, simpler locking if we just always alloc */ 81 group = kzalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
210 group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
211 if (!group) 82 if (!group)
212 return ERR_PTR(-ENOMEM); 83 return ERR_PTR(-ENOMEM);
213 84
85 /* set to 0 when there a no external references to this group */
214 atomic_set(&group->refcnt, 1); 86 atomic_set(&group->refcnt, 1);
215 87 /*
216 group->on_group_list = 0; 88 * hits 0 when there are no external references AND no marks for
217 group->group_num = group_num; 89 * this group
218 group->mask = mask; 90 */
91 atomic_set(&group->num_marks, 1);
219 92
220 mutex_init(&group->notification_mutex); 93 mutex_init(&group->notification_mutex);
221 INIT_LIST_HEAD(&group->notification_list); 94 INIT_LIST_HEAD(&group->notification_list);
222 init_waitqueue_head(&group->notification_waitq); 95 init_waitqueue_head(&group->notification_waitq);
223 group->q_len = 0;
224 group->max_events = UINT_MAX; 96 group->max_events = UINT_MAX;
225 97
226 spin_lock_init(&group->mark_lock); 98 spin_lock_init(&group->mark_lock);
227 atomic_set(&group->num_marks, 0); 99 INIT_LIST_HEAD(&group->marks_list);
228 INIT_LIST_HEAD(&group->mark_entries);
229 100
230 group->ops = ops; 101 group->ops = ops;
231 102
232 mutex_lock(&fsnotify_grp_mutex);
233 tgroup = fsnotify_find_group(group_num, mask, ops);
234 if (tgroup) {
235 /* group already exists */
236 mutex_unlock(&fsnotify_grp_mutex);
237 /* destroy the new one we made */
238 fsnotify_put_group(group);
239 return tgroup;
240 }
241
242 /* group not found, add a new one */
243 list_add_rcu(&group->group_list, &fsnotify_groups);
244 group->on_group_list = 1;
245 /* being on the fsnotify_groups list holds one num_marks */
246 atomic_inc(&group->num_marks);
247
248 mutex_unlock(&fsnotify_grp_mutex);
249
250 if (mask)
251 fsnotify_recalc_global_mask();
252
253 return group; 103 return group;
254} 104}
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 0399bcbe09c8..33297c005060 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -16,72 +16,6 @@
16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
17 */ 17 */
18 18
19/*
20 * fsnotify inode mark locking/lifetime/and refcnting
21 *
22 * REFCNT:
23 * The mark->refcnt tells how many "things" in the kernel currently are
24 * referencing this object. The object typically will live inside the kernel
25 * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task
26 * which can find this object holding the appropriete locks, can take a reference
27 * and the object itself is guarenteed to survive until the reference is dropped.
28 *
29 * LOCKING:
30 * There are 3 spinlocks involved with fsnotify inode marks and they MUST
31 * be taken in order as follows:
32 *
33 * entry->lock
34 * group->mark_lock
35 * inode->i_lock
36 *
37 * entry->lock protects 2 things, entry->group and entry->inode. You must hold
38 * that lock to dereference either of these things (they could be NULL even with
39 * the lock)
40 *
41 * group->mark_lock protects the mark_entries list anchored inside a given group
42 * and each entry is hooked via the g_list. It also sorta protects the
43 * free_g_list, which when used is anchored by a private list on the stack of the
44 * task which held the group->mark_lock.
45 *
46 * inode->i_lock protects the i_fsnotify_mark_entries list anchored inside a
47 * given inode and each entry is hooked via the i_list. (and sorta the
48 * free_i_list)
49 *
50 *
51 * LIFETIME:
52 * Inode marks survive between when they are added to an inode and when their
53 * refcnt==0.
54 *
55 * The inode mark can be cleared for a number of different reasons including:
56 * - The inode is unlinked for the last time. (fsnotify_inode_remove)
57 * - The inode is being evicted from cache. (fsnotify_inode_delete)
58 * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes)
59 * - Something explicitly requests that it be removed. (fsnotify_destroy_mark_by_entry)
60 * - The fsnotify_group associated with the mark is going away and all such marks
61 * need to be cleaned up. (fsnotify_clear_marks_by_group)
62 *
63 * Worst case we are given an inode and need to clean up all the marks on that
64 * inode. We take i_lock and walk the i_fsnotify_mark_entries safely. For each
65 * mark on the list we take a reference (so the mark can't disappear under us).
66 * We remove that mark form the inode's list of marks and we add this mark to a
67 * private list anchored on the stack using i_free_list; At this point we no
68 * longer fear anything finding the mark using the inode's list of marks.
69 *
70 * We can safely and locklessly run the private list on the stack of everything
71 * we just unattached from the original inode. For each mark on the private list
72 * we grab the mark-> and can thus dereference mark->group and mark->inode. If
73 * we see the group and inode are not NULL we take those locks. Now holding all
74 * 3 locks we can completely remove the mark from other tasks finding it in the
75 * future. Remember, 10 things might already be referencing this mark, but they
76 * better be holding a ref. We drop our reference we took before we unhooked it
77 * from the inode. When the ref hits 0 we can free the mark.
78 *
79 * Very similarly for freeing by group, except we use free_g_list.
80 *
81 * This has the very interesting property of being able to run concurrently with
82 * any (or all) other directions.
83 */
84
85#include <linux/fs.h> 19#include <linux/fs.h>
86#include <linux/init.h> 20#include <linux/init.h>
87#include <linux/kernel.h> 21#include <linux/kernel.h>
@@ -95,30 +29,19 @@
95#include <linux/fsnotify_backend.h> 29#include <linux/fsnotify_backend.h>
96#include "fsnotify.h" 30#include "fsnotify.h"
97 31
98void fsnotify_get_mark(struct fsnotify_mark_entry *entry)
99{
100 atomic_inc(&entry->refcnt);
101}
102
103void fsnotify_put_mark(struct fsnotify_mark_entry *entry)
104{
105 if (atomic_dec_and_test(&entry->refcnt))
106 entry->free_mark(entry);
107}
108
109/* 32/*
110 * Recalculate the mask of events relevant to a given inode locked. 33 * Recalculate the mask of events relevant to a given inode locked.
111 */ 34 */
112static void fsnotify_recalc_inode_mask_locked(struct inode *inode) 35static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
113{ 36{
114 struct fsnotify_mark_entry *entry; 37 struct fsnotify_mark *mark;
115 struct hlist_node *pos; 38 struct hlist_node *pos;
116 __u32 new_mask = 0; 39 __u32 new_mask = 0;
117 40
118 assert_spin_locked(&inode->i_lock); 41 assert_spin_locked(&inode->i_lock);
119 42
120 hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) 43 hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list)
121 new_mask |= entry->mask; 44 new_mask |= mark->mask;
122 inode->i_fsnotify_mask = new_mask; 45 inode->i_fsnotify_mask = new_mask;
123} 46}
124 47
@@ -135,107 +58,26 @@ void fsnotify_recalc_inode_mask(struct inode *inode)
135 __fsnotify_update_child_dentry_flags(inode); 58 __fsnotify_update_child_dentry_flags(inode);
136} 59}
137 60
138/* 61void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
139 * Any time a mark is getting freed we end up here.
140 * The caller had better be holding a reference to this mark so we don't actually
141 * do the final put under the entry->lock
142 */
143void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
144{ 62{
145 struct fsnotify_group *group; 63 struct inode *inode = mark->i.inode;
146 struct inode *inode;
147 64
148 spin_lock(&entry->lock); 65 assert_spin_locked(&mark->lock);
66 assert_spin_locked(&mark->group->mark_lock);
149 67
150 group = entry->group;
151 inode = entry->inode;
152
153 BUG_ON(group && !inode);
154 BUG_ON(!group && inode);
155
156 /* if !group something else already marked this to die */
157 if (!group) {
158 spin_unlock(&entry->lock);
159 return;
160 }
161
162 /* 1 from caller and 1 for being on i_list/g_list */
163 BUG_ON(atomic_read(&entry->refcnt) < 2);
164
165 spin_lock(&group->mark_lock);
166 spin_lock(&inode->i_lock); 68 spin_lock(&inode->i_lock);
167 69
168 hlist_del_init(&entry->i_list); 70 hlist_del_init_rcu(&mark->i.i_list);
169 entry->inode = NULL; 71 mark->i.inode = NULL;
170
171 list_del_init(&entry->g_list);
172 entry->group = NULL;
173
174 fsnotify_put_mark(entry); /* for i_list and g_list */
175 72
176 /* 73 /*
177 * this mark is now off the inode->i_fsnotify_mark_entries list and we 74 * this mark is now off the inode->i_fsnotify_marks list and we
178 * hold the inode->i_lock, so this is the perfect time to update the 75 * hold the inode->i_lock, so this is the perfect time to update the
179 * inode->i_fsnotify_mask 76 * inode->i_fsnotify_mask
180 */ 77 */
181 fsnotify_recalc_inode_mask_locked(inode); 78 fsnotify_recalc_inode_mask_locked(inode);
182 79
183 spin_unlock(&inode->i_lock); 80 spin_unlock(&inode->i_lock);
184 spin_unlock(&group->mark_lock);
185 spin_unlock(&entry->lock);
186
187 /*
188 * Some groups like to know that marks are being freed. This is a
189 * callback to the group function to let it know that this entry
190 * is being freed.
191 */
192 if (group->ops->freeing_mark)
193 group->ops->freeing_mark(entry, group);
194
195 /*
196 * __fsnotify_update_child_dentry_flags(inode);
197 *
198 * I really want to call that, but we can't, we have no idea if the inode
199 * still exists the second we drop the entry->lock.
200 *
201 * The next time an event arrive to this inode from one of it's children
202 * __fsnotify_parent will see that the inode doesn't care about it's
203 * children and will update all of these flags then. So really this
204 * is just a lazy update (and could be a perf win...)
205 */
206
207
208 iput(inode);
209
210 /*
211 * it's possible that this group tried to destroy itself, but this
212 * this mark was simultaneously being freed by inode. If that's the
213 * case, we finish freeing the group here.
214 */
215 if (unlikely(atomic_dec_and_test(&group->num_marks)))
216 fsnotify_final_destroy_group(group);
217}
218
219/*
220 * Given a group, destroy all of the marks associated with that group.
221 */
222void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
223{
224 struct fsnotify_mark_entry *lentry, *entry;
225 LIST_HEAD(free_list);
226
227 spin_lock(&group->mark_lock);
228 list_for_each_entry_safe(entry, lentry, &group->mark_entries, g_list) {
229 list_add(&entry->free_g_list, &free_list);
230 list_del_init(&entry->g_list);
231 fsnotify_get_mark(entry);
232 }
233 spin_unlock(&group->mark_lock);
234
235 list_for_each_entry_safe(entry, lentry, &free_list, free_g_list) {
236 fsnotify_destroy_mark_by_entry(entry);
237 fsnotify_put_mark(entry);
238 }
239} 81}
240 82
241/* 83/*
@@ -243,112 +85,145 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
243 */ 85 */
244void fsnotify_clear_marks_by_inode(struct inode *inode) 86void fsnotify_clear_marks_by_inode(struct inode *inode)
245{ 87{
246 struct fsnotify_mark_entry *entry, *lentry; 88 struct fsnotify_mark *mark, *lmark;
247 struct hlist_node *pos, *n; 89 struct hlist_node *pos, *n;
248 LIST_HEAD(free_list); 90 LIST_HEAD(free_list);
249 91
250 spin_lock(&inode->i_lock); 92 spin_lock(&inode->i_lock);
251 hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_mark_entries, i_list) { 93 hlist_for_each_entry_safe(mark, pos, n, &inode->i_fsnotify_marks, i.i_list) {
252 list_add(&entry->free_i_list, &free_list); 94 list_add(&mark->i.free_i_list, &free_list);
253 hlist_del_init(&entry->i_list); 95 hlist_del_init_rcu(&mark->i.i_list);
254 fsnotify_get_mark(entry); 96 fsnotify_get_mark(mark);
255 } 97 }
256 spin_unlock(&inode->i_lock); 98 spin_unlock(&inode->i_lock);
257 99
258 list_for_each_entry_safe(entry, lentry, &free_list, free_i_list) { 100 list_for_each_entry_safe(mark, lmark, &free_list, i.free_i_list) {
259 fsnotify_destroy_mark_by_entry(entry); 101 fsnotify_destroy_mark(mark);
260 fsnotify_put_mark(entry); 102 fsnotify_put_mark(mark);
261 } 103 }
262} 104}
263 105
264/* 106/*
107 * Given a group clear all of the inode marks associated with that group.
108 */
109void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group)
110{
111 fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_INODE);
112}
113
114/*
265 * given a group and inode, find the mark associated with that combination. 115 * given a group and inode, find the mark associated with that combination.
266 * if found take a reference to that mark and return it, else return NULL 116 * if found take a reference to that mark and return it, else return NULL
267 */ 117 */
268struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group, 118struct fsnotify_mark *fsnotify_find_inode_mark_locked(struct fsnotify_group *group,
269 struct inode *inode) 119 struct inode *inode)
270{ 120{
271 struct fsnotify_mark_entry *entry; 121 struct fsnotify_mark *mark;
272 struct hlist_node *pos; 122 struct hlist_node *pos;
273 123
274 assert_spin_locked(&inode->i_lock); 124 assert_spin_locked(&inode->i_lock);
275 125
276 hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) { 126 hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) {
277 if (entry->group == group) { 127 if (mark->group == group) {
278 fsnotify_get_mark(entry); 128 fsnotify_get_mark(mark);
279 return entry; 129 return mark;
280 } 130 }
281 } 131 }
282 return NULL; 132 return NULL;
283} 133}
284 134
285/* 135/*
286 * Nothing fancy, just initialize lists and locks and counters. 136 * given a group and inode, find the mark associated with that combination.
137 * if found take a reference to that mark and return it, else return NULL
287 */ 138 */
288void fsnotify_init_mark(struct fsnotify_mark_entry *entry, 139struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group,
289 void (*free_mark)(struct fsnotify_mark_entry *entry)) 140 struct inode *inode)
141{
142 struct fsnotify_mark *mark;
143
144 spin_lock(&inode->i_lock);
145 mark = fsnotify_find_inode_mark_locked(group, inode);
146 spin_unlock(&inode->i_lock);
290 147
148 return mark;
149}
150
151/*
152 * If we are setting a mark mask on an inode mark we should pin the inode
153 * in memory.
154 */
155void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
156 __u32 mask)
291{ 157{
292 spin_lock_init(&entry->lock); 158 struct inode *inode;
293 atomic_set(&entry->refcnt, 1); 159
294 INIT_HLIST_NODE(&entry->i_list); 160 assert_spin_locked(&mark->lock);
295 entry->group = NULL; 161
296 entry->mask = 0; 162 if (mask &&
297 entry->inode = NULL; 163 mark->i.inode &&
298 entry->free_mark = free_mark; 164 !(mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) {
165 mark->flags |= FSNOTIFY_MARK_FLAG_OBJECT_PINNED;
166 inode = igrab(mark->i.inode);
167 /*
168 * we shouldn't be able to get here if the inode wasn't
169 * already safely held in memory. But bug in case it
170 * ever is wrong.
171 */
172 BUG_ON(!inode);
173 }
299} 174}
300 175
301/* 176/*
302 * Attach an initialized mark entry to a given group and inode. 177 * Attach an initialized mark to a given inode.
303 * These marks may be used for the fsnotify backend to determine which 178 * These marks may be used for the fsnotify backend to determine which
304 * event types should be delivered to which group and for which inodes. 179 * event types should be delivered to which group and for which inodes. These
180 * marks are ordered according to the group's location in memory.
305 */ 181 */
306int fsnotify_add_mark(struct fsnotify_mark_entry *entry, 182int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
307 struct fsnotify_group *group, struct inode *inode) 183 struct fsnotify_group *group, struct inode *inode,
184 int allow_dups)
308{ 185{
309 struct fsnotify_mark_entry *lentry; 186 struct fsnotify_mark *lmark;
187 struct hlist_node *node, *last = NULL;
310 int ret = 0; 188 int ret = 0;
311 189
312 inode = igrab(inode); 190 mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
313 if (unlikely(!inode)) 191
314 return -EINVAL; 192 assert_spin_locked(&mark->lock);
193 assert_spin_locked(&group->mark_lock);
315 194
316 /*
317 * LOCKING ORDER!!!!
318 * entry->lock
319 * group->mark_lock
320 * inode->i_lock
321 */
322 spin_lock(&entry->lock);
323 spin_lock(&group->mark_lock);
324 spin_lock(&inode->i_lock); 195 spin_lock(&inode->i_lock);
325 196
326 lentry = fsnotify_find_mark_entry(group, inode); 197 mark->i.inode = inode;
327 if (!lentry) {
328 entry->group = group;
329 entry->inode = inode;
330 198
331 hlist_add_head(&entry->i_list, &inode->i_fsnotify_mark_entries); 199 /* is mark the first mark? */
332 list_add(&entry->g_list, &group->mark_entries); 200 if (hlist_empty(&inode->i_fsnotify_marks)) {
201 hlist_add_head_rcu(&mark->i.i_list, &inode->i_fsnotify_marks);
202 goto out;
203 }
333 204
334 fsnotify_get_mark(entry); /* for i_list and g_list */ 205 /* should mark be in the middle of the current list? */
206 hlist_for_each_entry(lmark, node, &inode->i_fsnotify_marks, i.i_list) {
207 last = node;
208
209 if ((lmark->group == group) && !allow_dups) {
210 ret = -EEXIST;
211 goto out;
212 }
335 213
336 atomic_inc(&group->num_marks); 214 if (mark->group < lmark->group)
215 continue;
337 216
338 fsnotify_recalc_inode_mask_locked(inode); 217 hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
218 goto out;
339 } 219 }
340 220
221 BUG_ON(last == NULL);
222 /* mark should be the last entry. last is the current last entry */
223 hlist_add_after_rcu(last, &mark->i.i_list);
224out:
225 fsnotify_recalc_inode_mask_locked(inode);
341 spin_unlock(&inode->i_lock); 226 spin_unlock(&inode->i_lock);
342 spin_unlock(&group->mark_lock);
343 spin_unlock(&entry->lock);
344
345 if (lentry) {
346 ret = -EEXIST;
347 iput(inode);
348 fsnotify_put_mark(lentry);
349 } else {
350 __fsnotify_update_child_dentry_flags(inode);
351 }
352 227
353 return ret; 228 return ret;
354} 229}
@@ -369,11 +244,11 @@ void fsnotify_unmount_inodes(struct list_head *list)
369 struct inode *need_iput_tmp; 244 struct inode *need_iput_tmp;
370 245
371 /* 246 /*
372 * We cannot __iget() an inode in state I_CLEAR, I_FREEING, 247 * We cannot __iget() an inode in state I_FREEING,
373 * I_WILL_FREE, or I_NEW which is fine because by that point 248 * I_WILL_FREE, or I_NEW which is fine because by that point
374 * the inode cannot have any associated watches. 249 * the inode cannot have any associated watches.
375 */ 250 */
376 if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW)) 251 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
377 continue; 252 continue;
378 253
379 /* 254 /*
@@ -397,7 +272,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
397 /* In case the dropping of a reference would nuke next_i. */ 272 /* In case the dropping of a reference would nuke next_i. */
398 if ((&next_i->i_sb_list != list) && 273 if ((&next_i->i_sb_list != list) &&
399 atomic_read(&next_i->i_count) && 274 atomic_read(&next_i->i_count) &&
400 !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) { 275 !(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
401 __iget(next_i); 276 __iget(next_i);
402 need_iput = next_i; 277 need_iput = next_i;
403 } 278 }
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index b3a159b21cfd..b981fc0c8379 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -1,18 +1,3 @@
1config INOTIFY
2 bool "Inotify file change notification support"
3 default n
4 ---help---
5 Say Y here to enable legacy in kernel inotify support. Inotify is a
6 file change notification system. It is a replacement for dnotify.
7 This option only provides the legacy inotify in kernel API. There
8 are no in tree kernel users of this interface since it is deprecated.
9 You only need this if you are loading an out of tree kernel module
10 that uses inotify.
11
12 For more information, see <file:Documentation/filesystems/inotify.txt>
13
14 If unsure, say N.
15
16config INOTIFY_USER 1config INOTIFY_USER
17 bool "Inotify support for userspace" 2 bool "Inotify support for userspace"
18 select ANON_INODES 3 select ANON_INODES
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile
index 943828171362..a380dabe09de 100644
--- a/fs/notify/inotify/Makefile
+++ b/fs/notify/inotify/Makefile
@@ -1,2 +1 @@
1obj-$(CONFIG_INOTIFY) += inotify.o
2obj-$(CONFIG_INOTIFY_USER) += inotify_fsnotify.o inotify_user.o obj-$(CONFIG_INOTIFY_USER) += inotify_fsnotify.o inotify_user.o
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
deleted file mode 100644
index 27b75ebc7460..000000000000
--- a/fs/notify/inotify/inotify.c
+++ /dev/null
@@ -1,873 +0,0 @@
1/*
2 * fs/inotify.c - inode-based file event notifications
3 *
4 * Authors:
5 * John McCutchan <ttb@tentacle.dhs.org>
6 * Robert Love <rml@novell.com>
7 *
8 * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
9 *
10 * Copyright (C) 2005 John McCutchan
11 * Copyright 2006 Hewlett-Packard Development Company, L.P.
12 *
13 * This program is free software; you can redistribute it and/or modify it
14 * under the terms of the GNU General Public License as published by the
15 * Free Software Foundation; either version 2, or (at your option) any
16 * later version.
17 *
18 * This program is distributed in the hope that it will be useful, but
19 * WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * General Public License for more details.
22 */
23
24#include <linux/module.h>
25#include <linux/kernel.h>
26#include <linux/spinlock.h>
27#include <linux/idr.h>
28#include <linux/slab.h>
29#include <linux/fs.h>
30#include <linux/sched.h>
31#include <linux/init.h>
32#include <linux/list.h>
33#include <linux/writeback.h>
34#include <linux/inotify.h>
35#include <linux/fsnotify_backend.h>
36
37static atomic_t inotify_cookie;
38
39/*
40 * Lock ordering:
41 *
42 * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
43 * iprune_mutex (synchronize shrink_icache_memory())
44 * inode_lock (protects the super_block->s_inodes list)
45 * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
46 * inotify_handle->mutex (protects inotify_handle and watches->h_list)
47 *
48 * The inode->inotify_mutex and inotify_handle->mutex and held during execution
49 * of a caller's event handler. Thus, the caller must not hold any locks
50 * taken in their event handler while calling any of the published inotify
51 * interfaces.
52 */
53
54/*
55 * Lifetimes of the three main data structures--inotify_handle, inode, and
56 * inotify_watch--are managed by reference count.
57 *
58 * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
59 * Additional references can bump the count via get_inotify_handle() and drop
60 * the count via put_inotify_handle().
61 *
62 * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
63 * to remove_watch_no_event(). Additional references can bump the count via
64 * get_inotify_watch() and drop the count via put_inotify_watch(). The caller
65 * is reponsible for the final put after receiving IN_IGNORED, or when using
66 * IN_ONESHOT after receiving the first event. Inotify does the final put if
67 * inotify_destroy() is called.
68 *
69 * inode: Pinned so long as the inode is associated with a watch, from
70 * inotify_add_watch() to the final put_inotify_watch().
71 */
72
73/*
74 * struct inotify_handle - represents an inotify instance
75 *
76 * This structure is protected by the mutex 'mutex'.
77 */
78struct inotify_handle {
79 struct idr idr; /* idr mapping wd -> watch */
80 struct mutex mutex; /* protects this bad boy */
81 struct list_head watches; /* list of watches */
82 atomic_t count; /* reference count */
83 u32 last_wd; /* the last wd allocated */
84 const struct inotify_operations *in_ops; /* inotify caller operations */
85};
86
87static inline void get_inotify_handle(struct inotify_handle *ih)
88{
89 atomic_inc(&ih->count);
90}
91
92static inline void put_inotify_handle(struct inotify_handle *ih)
93{
94 if (atomic_dec_and_test(&ih->count)) {
95 idr_destroy(&ih->idr);
96 kfree(ih);
97 }
98}
99
100/**
101 * get_inotify_watch - grab a reference to an inotify_watch
102 * @watch: watch to grab
103 */
104void get_inotify_watch(struct inotify_watch *watch)
105{
106 atomic_inc(&watch->count);
107}
108EXPORT_SYMBOL_GPL(get_inotify_watch);
109
110int pin_inotify_watch(struct inotify_watch *watch)
111{
112 struct super_block *sb = watch->inode->i_sb;
113 if (atomic_inc_not_zero(&sb->s_active)) {
114 atomic_inc(&watch->count);
115 return 1;
116 }
117 return 0;
118}
119
120/**
121 * put_inotify_watch - decrements the ref count on a given watch. cleans up
122 * watch references if the count reaches zero. inotify_watch is freed by
123 * inotify callers via the destroy_watch() op.
124 * @watch: watch to release
125 */
126void put_inotify_watch(struct inotify_watch *watch)
127{
128 if (atomic_dec_and_test(&watch->count)) {
129 struct inotify_handle *ih = watch->ih;
130
131 iput(watch->inode);
132 ih->in_ops->destroy_watch(watch);
133 put_inotify_handle(ih);
134 }
135}
136EXPORT_SYMBOL_GPL(put_inotify_watch);
137
138void unpin_inotify_watch(struct inotify_watch *watch)
139{
140 struct super_block *sb = watch->inode->i_sb;
141 put_inotify_watch(watch);
142 deactivate_super(sb);
143}
144
145/*
146 * inotify_handle_get_wd - returns the next WD for use by the given handle
147 *
148 * Callers must hold ih->mutex. This function can sleep.
149 */
150static int inotify_handle_get_wd(struct inotify_handle *ih,
151 struct inotify_watch *watch)
152{
153 int ret;
154
155 do {
156 if (unlikely(!idr_pre_get(&ih->idr, GFP_NOFS)))
157 return -ENOSPC;
158 ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
159 } while (ret == -EAGAIN);
160
161 if (likely(!ret))
162 ih->last_wd = watch->wd;
163
164 return ret;
165}
166
167/*
168 * inotify_inode_watched - returns nonzero if there are watches on this inode
169 * and zero otherwise. We call this lockless, we do not care if we race.
170 */
171static inline int inotify_inode_watched(struct inode *inode)
172{
173 return !list_empty(&inode->inotify_watches);
174}
175
176/*
177 * Get child dentry flag into synch with parent inode.
178 * Flag should always be clear for negative dentrys.
179 */
180static void set_dentry_child_flags(struct inode *inode, int watched)
181{
182 struct dentry *alias;
183
184 spin_lock(&dcache_lock);
185 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
186 struct dentry *child;
187
188 list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
189 if (!child->d_inode)
190 continue;
191
192 spin_lock(&child->d_lock);
193 if (watched)
194 child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
195 else
196 child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
197 spin_unlock(&child->d_lock);
198 }
199 }
200 spin_unlock(&dcache_lock);
201}
202
203/*
204 * inotify_find_handle - find the watch associated with the given inode and
205 * handle
206 *
207 * Callers must hold inode->inotify_mutex.
208 */
209static struct inotify_watch *inode_find_handle(struct inode *inode,
210 struct inotify_handle *ih)
211{
212 struct inotify_watch *watch;
213
214 list_for_each_entry(watch, &inode->inotify_watches, i_list) {
215 if (watch->ih == ih)
216 return watch;
217 }
218
219 return NULL;
220}
221
222/*
223 * remove_watch_no_event - remove watch without the IN_IGNORED event.
224 *
225 * Callers must hold both inode->inotify_mutex and ih->mutex.
226 */
227static void remove_watch_no_event(struct inotify_watch *watch,
228 struct inotify_handle *ih)
229{
230 list_del(&watch->i_list);
231 list_del(&watch->h_list);
232
233 if (!inotify_inode_watched(watch->inode))
234 set_dentry_child_flags(watch->inode, 0);
235
236 idr_remove(&ih->idr, watch->wd);
237}
238
239/**
240 * inotify_remove_watch_locked - Remove a watch from both the handle and the
241 * inode. Sends the IN_IGNORED event signifying that the inode is no longer
242 * watched. May be invoked from a caller's event handler.
243 * @ih: inotify handle associated with watch
244 * @watch: watch to remove
245 *
246 * Callers must hold both inode->inotify_mutex and ih->mutex.
247 */
248void inotify_remove_watch_locked(struct inotify_handle *ih,
249 struct inotify_watch *watch)
250{
251 remove_watch_no_event(watch, ih);
252 ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
253}
254EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
255
256/* Kernel API for producing events */
257
258/*
259 * inotify_d_instantiate - instantiate dcache entry for inode
260 */
261void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
262{
263 struct dentry *parent;
264
265 if (!inode)
266 return;
267
268 spin_lock(&entry->d_lock);
269 parent = entry->d_parent;
270 if (parent->d_inode && inotify_inode_watched(parent->d_inode))
271 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
272 spin_unlock(&entry->d_lock);
273}
274
275/*
276 * inotify_d_move - dcache entry has been moved
277 */
278void inotify_d_move(struct dentry *entry)
279{
280 struct dentry *parent;
281
282 parent = entry->d_parent;
283 if (inotify_inode_watched(parent->d_inode))
284 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
285 else
286 entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
287}
288
289/**
290 * inotify_inode_queue_event - queue an event to all watches on this inode
291 * @inode: inode event is originating from
292 * @mask: event mask describing this event
293 * @cookie: cookie for synchronization, or zero
294 * @name: filename, if any
295 * @n_inode: inode associated with name
296 */
297void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
298 const char *name, struct inode *n_inode)
299{
300 struct inotify_watch *watch, *next;
301
302 if (!inotify_inode_watched(inode))
303 return;
304
305 mutex_lock(&inode->inotify_mutex);
306 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
307 u32 watch_mask = watch->mask;
308 if (watch_mask & mask) {
309 struct inotify_handle *ih= watch->ih;
310 mutex_lock(&ih->mutex);
311 if (watch_mask & IN_ONESHOT)
312 remove_watch_no_event(watch, ih);
313 ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
314 name, n_inode);
315 mutex_unlock(&ih->mutex);
316 }
317 }
318 mutex_unlock(&inode->inotify_mutex);
319}
320EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
321
322/**
323 * inotify_dentry_parent_queue_event - queue an event to a dentry's parent
324 * @dentry: the dentry in question, we queue against this dentry's parent
325 * @mask: event mask describing this event
326 * @cookie: cookie for synchronization, or zero
327 * @name: filename, if any
328 */
329void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
330 u32 cookie, const char *name)
331{
332 struct dentry *parent;
333 struct inode *inode;
334
335 if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
336 return;
337
338 spin_lock(&dentry->d_lock);
339 parent = dentry->d_parent;
340 inode = parent->d_inode;
341
342 if (inotify_inode_watched(inode)) {
343 dget(parent);
344 spin_unlock(&dentry->d_lock);
345 inotify_inode_queue_event(inode, mask, cookie, name,
346 dentry->d_inode);
347 dput(parent);
348 } else
349 spin_unlock(&dentry->d_lock);
350}
351EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
352
353/**
354 * inotify_get_cookie - return a unique cookie for use in synchronizing events.
355 */
356u32 inotify_get_cookie(void)
357{
358 return atomic_inc_return(&inotify_cookie);
359}
360EXPORT_SYMBOL_GPL(inotify_get_cookie);
361
362/**
363 * inotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
364 * @list: list of inodes being unmounted (sb->s_inodes)
365 *
366 * Called with inode_lock held, protecting the unmounting super block's list
367 * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
368 * We temporarily drop inode_lock, however, and CAN block.
369 */
370void inotify_unmount_inodes(struct list_head *list)
371{
372 struct inode *inode, *next_i, *need_iput = NULL;
373
374 list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
375 struct inotify_watch *watch, *next_w;
376 struct inode *need_iput_tmp;
377 struct list_head *watches;
378
379 /*
380 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
381 * I_WILL_FREE, or I_NEW which is fine because by that point
382 * the inode cannot have any associated watches.
383 */
384 if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
385 continue;
386
387 /*
388 * If i_count is zero, the inode cannot have any watches and
389 * doing an __iget/iput with MS_ACTIVE clear would actually
390 * evict all inodes with zero i_count from icache which is
391 * unnecessarily violent and may in fact be illegal to do.
392 */
393 if (!atomic_read(&inode->i_count))
394 continue;
395
396 need_iput_tmp = need_iput;
397 need_iput = NULL;
398 /* In case inotify_remove_watch_locked() drops a reference. */
399 if (inode != need_iput_tmp)
400 __iget(inode);
401 else
402 need_iput_tmp = NULL;
403 /* In case the dropping of a reference would nuke next_i. */
404 if ((&next_i->i_sb_list != list) &&
405 atomic_read(&next_i->i_count) &&
406 !(next_i->i_state & (I_CLEAR | I_FREEING |
407 I_WILL_FREE))) {
408 __iget(next_i);
409 need_iput = next_i;
410 }
411
412 /*
413 * We can safely drop inode_lock here because we hold
414 * references on both inode and next_i. Also no new inodes
415 * will be added since the umount has begun. Finally,
416 * iprune_mutex keeps shrink_icache_memory() away.
417 */
418 spin_unlock(&inode_lock);
419
420 if (need_iput_tmp)
421 iput(need_iput_tmp);
422
423 /* for each watch, send IN_UNMOUNT and then remove it */
424 mutex_lock(&inode->inotify_mutex);
425 watches = &inode->inotify_watches;
426 list_for_each_entry_safe(watch, next_w, watches, i_list) {
427 struct inotify_handle *ih= watch->ih;
428 get_inotify_watch(watch);
429 mutex_lock(&ih->mutex);
430 ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
431 NULL, NULL);
432 inotify_remove_watch_locked(ih, watch);
433 mutex_unlock(&ih->mutex);
434 put_inotify_watch(watch);
435 }
436 mutex_unlock(&inode->inotify_mutex);
437 iput(inode);
438
439 spin_lock(&inode_lock);
440 }
441}
442EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
443
444/**
445 * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
446 * @inode: inode that is about to be removed
447 */
448void inotify_inode_is_dead(struct inode *inode)
449{
450 struct inotify_watch *watch, *next;
451
452 mutex_lock(&inode->inotify_mutex);
453 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
454 struct inotify_handle *ih = watch->ih;
455 mutex_lock(&ih->mutex);
456 inotify_remove_watch_locked(ih, watch);
457 mutex_unlock(&ih->mutex);
458 }
459 mutex_unlock(&inode->inotify_mutex);
460}
461EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
462
463/* Kernel Consumer API */
464
465/**
466 * inotify_init - allocate and initialize an inotify instance
467 * @ops: caller's inotify operations
468 */
469struct inotify_handle *inotify_init(const struct inotify_operations *ops)
470{
471 struct inotify_handle *ih;
472
473 ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
474 if (unlikely(!ih))
475 return ERR_PTR(-ENOMEM);
476
477 idr_init(&ih->idr);
478 INIT_LIST_HEAD(&ih->watches);
479 mutex_init(&ih->mutex);
480 ih->last_wd = 0;
481 ih->in_ops = ops;
482 atomic_set(&ih->count, 0);
483 get_inotify_handle(ih);
484
485 return ih;
486}
487EXPORT_SYMBOL_GPL(inotify_init);
488
489/**
490 * inotify_init_watch - initialize an inotify watch
491 * @watch: watch to initialize
492 */
493void inotify_init_watch(struct inotify_watch *watch)
494{
495 INIT_LIST_HEAD(&watch->h_list);
496 INIT_LIST_HEAD(&watch->i_list);
497 atomic_set(&watch->count, 0);
498 get_inotify_watch(watch); /* initial get */
499}
500EXPORT_SYMBOL_GPL(inotify_init_watch);
501
502/*
503 * Watch removals suck violently. To kick the watch out we need (in this
504 * order) inode->inotify_mutex and ih->mutex. That's fine if we have
505 * a hold on inode; however, for all other cases we need to make damn sure
506 * we don't race with umount. We can *NOT* just grab a reference to a
507 * watch - inotify_unmount_inodes() will happily sail past it and we'll end
508 * with reference to inode potentially outliving its superblock. Ideally
509 * we just want to grab an active reference to superblock if we can; that
510 * will make sure we won't go into inotify_umount_inodes() until we are
511 * done. Cleanup is just deactivate_super(). However, that leaves a messy
512 * case - what if we *are* racing with umount() and active references to
513 * superblock can't be acquired anymore? We can bump ->s_count, grab
514 * ->s_umount, which will wait until the superblock is shut down and the
515 * watch in question is pining for fjords.
516 *
517 * And yes, this is far beyond mere "not very pretty"; so's the entire
518 * concept of inotify to start with.
519 */
520
521/**
522 * pin_to_kill - pin the watch down for removal
523 * @ih: inotify handle
524 * @watch: watch to kill
525 *
526 * Called with ih->mutex held, drops it. Possible return values:
527 * 0 - nothing to do, it has died
528 * 1 - remove it, drop the reference and deactivate_super()
529 */
530static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
531{
532 struct super_block *sb = watch->inode->i_sb;
533
534 if (atomic_inc_not_zero(&sb->s_active)) {
535 get_inotify_watch(watch);
536 mutex_unlock(&ih->mutex);
537 return 1; /* the best outcome */
538 }
539 spin_lock(&sb_lock);
540 sb->s_count++;
541 spin_unlock(&sb_lock);
542 mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
543 down_read(&sb->s_umount);
544 /* fs is already shut down; the watch is dead */
545 drop_super(sb);
546 return 0;
547}
548
549static void unpin_and_kill(struct inotify_watch *watch)
550{
551 struct super_block *sb = watch->inode->i_sb;
552 put_inotify_watch(watch);
553 deactivate_super(sb);
554}
555
556/**
557 * inotify_destroy - clean up and destroy an inotify instance
558 * @ih: inotify handle
559 */
560void inotify_destroy(struct inotify_handle *ih)
561{
562 /*
563 * Destroy all of the watches for this handle. Unfortunately, not very
564 * pretty. We cannot do a simple iteration over the list, because we
565 * do not know the inode until we iterate to the watch. But we need to
566 * hold inode->inotify_mutex before ih->mutex. The following works.
567 *
568 * AV: it had to become even uglier to start working ;-/
569 */
570 while (1) {
571 struct inotify_watch *watch;
572 struct list_head *watches;
573 struct super_block *sb;
574 struct inode *inode;
575
576 mutex_lock(&ih->mutex);
577 watches = &ih->watches;
578 if (list_empty(watches)) {
579 mutex_unlock(&ih->mutex);
580 break;
581 }
582 watch = list_first_entry(watches, struct inotify_watch, h_list);
583 sb = watch->inode->i_sb;
584 if (!pin_to_kill(ih, watch))
585 continue;
586
587 inode = watch->inode;
588 mutex_lock(&inode->inotify_mutex);
589 mutex_lock(&ih->mutex);
590
591 /* make sure we didn't race with another list removal */
592 if (likely(idr_find(&ih->idr, watch->wd))) {
593 remove_watch_no_event(watch, ih);
594 put_inotify_watch(watch);
595 }
596
597 mutex_unlock(&ih->mutex);
598 mutex_unlock(&inode->inotify_mutex);
599 unpin_and_kill(watch);
600 }
601
602 /* free this handle: the put matching the get in inotify_init() */
603 put_inotify_handle(ih);
604}
605EXPORT_SYMBOL_GPL(inotify_destroy);
606
607/**
608 * inotify_find_watch - find an existing watch for an (ih,inode) pair
609 * @ih: inotify handle
610 * @inode: inode to watch
611 * @watchp: pointer to existing inotify_watch
612 *
613 * Caller must pin given inode (via nameidata).
614 */
615s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
616 struct inotify_watch **watchp)
617{
618 struct inotify_watch *old;
619 int ret = -ENOENT;
620
621 mutex_lock(&inode->inotify_mutex);
622 mutex_lock(&ih->mutex);
623
624 old = inode_find_handle(inode, ih);
625 if (unlikely(old)) {
626 get_inotify_watch(old); /* caller must put watch */
627 *watchp = old;
628 ret = old->wd;
629 }
630
631 mutex_unlock(&ih->mutex);
632 mutex_unlock(&inode->inotify_mutex);
633
634 return ret;
635}
636EXPORT_SYMBOL_GPL(inotify_find_watch);
637
638/**
639 * inotify_find_update_watch - find and update the mask of an existing watch
640 * @ih: inotify handle
641 * @inode: inode's watch to update
642 * @mask: mask of events to watch
643 *
644 * Caller must pin given inode (via nameidata).
645 */
646s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
647 u32 mask)
648{
649 struct inotify_watch *old;
650 int mask_add = 0;
651 int ret;
652
653 if (mask & IN_MASK_ADD)
654 mask_add = 1;
655
656 /* don't allow invalid bits: we don't want flags set */
657 mask &= IN_ALL_EVENTS | IN_ONESHOT;
658 if (unlikely(!mask))
659 return -EINVAL;
660
661 mutex_lock(&inode->inotify_mutex);
662 mutex_lock(&ih->mutex);
663
664 /*
665 * Handle the case of re-adding a watch on an (inode,ih) pair that we
666 * are already watching. We just update the mask and return its wd.
667 */
668 old = inode_find_handle(inode, ih);
669 if (unlikely(!old)) {
670 ret = -ENOENT;
671 goto out;
672 }
673
674 if (mask_add)
675 old->mask |= mask;
676 else
677 old->mask = mask;
678 ret = old->wd;
679out:
680 mutex_unlock(&ih->mutex);
681 mutex_unlock(&inode->inotify_mutex);
682 return ret;
683}
684EXPORT_SYMBOL_GPL(inotify_find_update_watch);
685
686/**
687 * inotify_add_watch - add a watch to an inotify instance
688 * @ih: inotify handle
689 * @watch: caller allocated watch structure
690 * @inode: inode to watch
691 * @mask: mask of events to watch
692 *
693 * Caller must pin given inode (via nameidata).
694 * Caller must ensure it only calls inotify_add_watch() once per watch.
695 * Calls inotify_handle_get_wd() so may sleep.
696 */
697s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
698 struct inode *inode, u32 mask)
699{
700 int ret = 0;
701 int newly_watched;
702
703 /* don't allow invalid bits: we don't want flags set */
704 mask &= IN_ALL_EVENTS | IN_ONESHOT;
705 if (unlikely(!mask))
706 return -EINVAL;
707 watch->mask = mask;
708
709 mutex_lock(&inode->inotify_mutex);
710 mutex_lock(&ih->mutex);
711
712 /* Initialize a new watch */
713 ret = inotify_handle_get_wd(ih, watch);
714 if (unlikely(ret))
715 goto out;
716 ret = watch->wd;
717
718 /* save a reference to handle and bump the count to make it official */
719 get_inotify_handle(ih);
720 watch->ih = ih;
721
722 /*
723 * Save a reference to the inode and bump the ref count to make it
724 * official. We hold a reference to nameidata, which makes this safe.
725 */
726 watch->inode = igrab(inode);
727
728 /* Add the watch to the handle's and the inode's list */
729 newly_watched = !inotify_inode_watched(inode);
730 list_add(&watch->h_list, &ih->watches);
731 list_add(&watch->i_list, &inode->inotify_watches);
732 /*
733 * Set child flags _after_ adding the watch, so there is no race
734 * windows where newly instantiated children could miss their parent's
735 * watched flag.
736 */
737 if (newly_watched)
738 set_dentry_child_flags(inode, 1);
739
740out:
741 mutex_unlock(&ih->mutex);
742 mutex_unlock(&inode->inotify_mutex);
743 return ret;
744}
745EXPORT_SYMBOL_GPL(inotify_add_watch);
746
747/**
748 * inotify_clone_watch - put the watch next to existing one
749 * @old: already installed watch
750 * @new: new watch
751 *
752 * Caller must hold the inotify_mutex of inode we are dealing with;
753 * it is expected to remove the old watch before unlocking the inode.
754 */
755s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new)
756{
757 struct inotify_handle *ih = old->ih;
758 int ret = 0;
759
760 new->mask = old->mask;
761 new->ih = ih;
762
763 mutex_lock(&ih->mutex);
764
765 /* Initialize a new watch */
766 ret = inotify_handle_get_wd(ih, new);
767 if (unlikely(ret))
768 goto out;
769 ret = new->wd;
770
771 get_inotify_handle(ih);
772
773 new->inode = igrab(old->inode);
774
775 list_add(&new->h_list, &ih->watches);
776 list_add(&new->i_list, &old->inode->inotify_watches);
777out:
778 mutex_unlock(&ih->mutex);
779 return ret;
780}
781
782void inotify_evict_watch(struct inotify_watch *watch)
783{
784 get_inotify_watch(watch);
785 mutex_lock(&watch->ih->mutex);
786 inotify_remove_watch_locked(watch->ih, watch);
787 mutex_unlock(&watch->ih->mutex);
788}
789
790/**
791 * inotify_rm_wd - remove a watch from an inotify instance
792 * @ih: inotify handle
793 * @wd: watch descriptor to remove
794 *
795 * Can sleep.
796 */
797int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
798{
799 struct inotify_watch *watch;
800 struct super_block *sb;
801 struct inode *inode;
802
803 mutex_lock(&ih->mutex);
804 watch = idr_find(&ih->idr, wd);
805 if (unlikely(!watch)) {
806 mutex_unlock(&ih->mutex);
807 return -EINVAL;
808 }
809 sb = watch->inode->i_sb;
810 if (!pin_to_kill(ih, watch))
811 return 0;
812
813 inode = watch->inode;
814
815 mutex_lock(&inode->inotify_mutex);
816 mutex_lock(&ih->mutex);
817
818 /* make sure that we did not race */
819 if (likely(idr_find(&ih->idr, wd) == watch))
820 inotify_remove_watch_locked(ih, watch);
821
822 mutex_unlock(&ih->mutex);
823 mutex_unlock(&inode->inotify_mutex);
824 unpin_and_kill(watch);
825
826 return 0;
827}
828EXPORT_SYMBOL_GPL(inotify_rm_wd);
829
830/**
831 * inotify_rm_watch - remove a watch from an inotify instance
832 * @ih: inotify handle
833 * @watch: watch to remove
834 *
835 * Can sleep.
836 */
837int inotify_rm_watch(struct inotify_handle *ih,
838 struct inotify_watch *watch)
839{
840 return inotify_rm_wd(ih, watch->wd);
841}
842EXPORT_SYMBOL_GPL(inotify_rm_watch);
843
844/*
845 * inotify_setup - core initialization function
846 */
847static int __init inotify_setup(void)
848{
849 BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
850 BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
851 BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
852 BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
853 BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
854 BUILD_BUG_ON(IN_OPEN != FS_OPEN);
855 BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
856 BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
857 BUILD_BUG_ON(IN_CREATE != FS_CREATE);
858 BUILD_BUG_ON(IN_DELETE != FS_DELETE);
859 BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
860 BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
861 BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
862
863 BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
864 BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
865 BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
866 BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
867
868 atomic_set(&inotify_cookie, 0);
869
870 return 0;
871}
872
873module_init(inotify_setup);
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index f234f3a4c8ca..b6642e4de4bf 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -9,13 +9,12 @@ struct inotify_event_private_data {
9 int wd; 9 int wd;
10}; 10};
11 11
12struct inotify_inode_mark_entry { 12struct inotify_inode_mark {
13 /* fsnotify_mark_entry MUST be the first thing */ 13 struct fsnotify_mark fsn_mark;
14 struct fsnotify_mark_entry fsn_entry;
15 int wd; 14 int wd;
16}; 15};
17 16
18extern void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, 17extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
19 struct fsnotify_group *group); 18 struct fsnotify_group *group);
20extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv); 19extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
21 20
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index e27960cd76ab..5e73eeb2c697 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -22,6 +22,7 @@
22 * General Public License for more details. 22 * General Public License for more details.
23 */ 23 */
24 24
25#include <linux/dcache.h> /* d_unlinked */
25#include <linux/fs.h> /* struct inode */ 26#include <linux/fs.h> /* struct inode */
26#include <linux/fsnotify_backend.h> 27#include <linux/fsnotify_backend.h>
27#include <linux/inotify.h> 28#include <linux/inotify.h>
@@ -32,26 +33,84 @@
32 33
33#include "inotify.h" 34#include "inotify.h"
34 35
35static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event) 36/*
37 * Check if 2 events contain the same information. We do not compare private data
38 * but at this moment that isn't a problem for any know fsnotify listeners.
39 */
40static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
41{
42 if ((old->mask == new->mask) &&
43 (old->to_tell == new->to_tell) &&
44 (old->data_type == new->data_type) &&
45 (old->name_len == new->name_len)) {
46 switch (old->data_type) {
47 case (FSNOTIFY_EVENT_INODE):
48 /* remember, after old was put on the wait_q we aren't
49 * allowed to look at the inode any more, only thing
50 * left to check was if the file_name is the same */
51 if (!old->name_len ||
52 !strcmp(old->file_name, new->file_name))
53 return true;
54 break;
55 case (FSNOTIFY_EVENT_FILE):
56 if ((old->file->f_path.mnt == new->file->f_path.mnt) &&
57 (old->file->f_path.dentry == new->file->f_path.dentry))
58 return true;
59 break;
60 case (FSNOTIFY_EVENT_NONE):
61 if (old->mask & FS_Q_OVERFLOW)
62 return true;
63 else if (old->mask & FS_IN_IGNORED)
64 return false;
65 return true;
66 };
67 }
68 return false;
69}
70
71static struct fsnotify_event *inotify_merge(struct list_head *list,
72 struct fsnotify_event *event)
36{ 73{
37 struct fsnotify_mark_entry *entry; 74 struct fsnotify_event_holder *last_holder;
38 struct inotify_inode_mark_entry *ientry; 75 struct fsnotify_event *last_event;
76
77 /* and the list better be locked by something too */
78 spin_lock(&event->lock);
79
80 last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
81 last_event = last_holder->event;
82 if (event_compare(last_event, event))
83 fsnotify_get_event(last_event);
84 else
85 last_event = NULL;
86
87 spin_unlock(&event->lock);
88
89 return last_event;
90}
91
92static int inotify_handle_event(struct fsnotify_group *group,
93 struct fsnotify_mark *inode_mark,
94 struct fsnotify_mark *vfsmount_mark,
95 struct fsnotify_event *event)
96{
97 struct inotify_inode_mark *i_mark;
39 struct inode *to_tell; 98 struct inode *to_tell;
40 struct inotify_event_private_data *event_priv; 99 struct inotify_event_private_data *event_priv;
41 struct fsnotify_event_private_data *fsn_event_priv; 100 struct fsnotify_event_private_data *fsn_event_priv;
42 int wd, ret; 101 struct fsnotify_event *added_event;
102 int wd, ret = 0;
103
104 BUG_ON(vfsmount_mark);
105
106 pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group,
107 event, event->to_tell, event->mask);
43 108
44 to_tell = event->to_tell; 109 to_tell = event->to_tell;
45 110
46 spin_lock(&to_tell->i_lock); 111 i_mark = container_of(inode_mark, struct inotify_inode_mark,
47 entry = fsnotify_find_mark_entry(group, to_tell); 112 fsn_mark);
48 spin_unlock(&to_tell->i_lock); 113 wd = i_mark->wd;
49 /* race with watch removal? We already passes should_send */
50 if (unlikely(!entry))
51 return 0;
52 ientry = container_of(entry, struct inotify_inode_mark_entry,
53 fsn_entry);
54 wd = ientry->wd;
55 114
56 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); 115 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
57 if (unlikely(!event_priv)) 116 if (unlikely(!event_priv))
@@ -62,48 +121,40 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
62 fsn_event_priv->group = group; 121 fsn_event_priv->group = group;
63 event_priv->wd = wd; 122 event_priv->wd = wd;
64 123
65 ret = fsnotify_add_notify_event(group, event, fsn_event_priv); 124 added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge);
66 if (ret) { 125 if (added_event) {
67 inotify_free_event_priv(fsn_event_priv); 126 inotify_free_event_priv(fsn_event_priv);
68 /* EEXIST says we tail matched, EOVERFLOW isn't something 127 if (!IS_ERR(added_event))
69 * to report up the stack. */ 128 fsnotify_put_event(added_event);
70 if ((ret == -EEXIST) || 129 else
71 (ret == -EOVERFLOW)) 130 ret = PTR_ERR(added_event);
72 ret = 0;
73 } 131 }
74 132
75 /* 133 if (inode_mark->mask & IN_ONESHOT)
76 * If we hold the entry until after the event is on the queue 134 fsnotify_destroy_mark(inode_mark);
77 * IN_IGNORED won't be able to pass this event in the queue
78 */
79 fsnotify_put_mark(entry);
80 135
81 return ret; 136 return ret;
82} 137}
83 138
84static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) 139static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group)
85{ 140{
86 inotify_ignored_and_remove_idr(entry, group); 141 inotify_ignored_and_remove_idr(fsn_mark, group);
87} 142}
88 143
89static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask) 144static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
145 struct fsnotify_mark *inode_mark,
146 struct fsnotify_mark *vfsmount_mark,
147 __u32 mask, void *data, int data_type)
90{ 148{
91 struct fsnotify_mark_entry *entry; 149 if ((inode_mark->mask & FS_EXCL_UNLINK) &&
92 bool send; 150 (data_type == FSNOTIFY_EVENT_FILE)) {
93 151 struct file *file = data;
94 spin_lock(&inode->i_lock);
95 entry = fsnotify_find_mark_entry(group, inode);
96 spin_unlock(&inode->i_lock);
97 if (!entry)
98 return false;
99 152
100 mask = (mask & ~FS_EVENT_ON_CHILD); 153 if (d_unlinked(file->f_path.dentry))
101 send = (entry->mask & mask); 154 return false;
102 155 }
103 /* find took a reference */
104 fsnotify_put_mark(entry);
105 156
106 return send; 157 return true;
107} 158}
108 159
109/* 160/*
@@ -115,18 +166,18 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
115 */ 166 */
116static int idr_callback(int id, void *p, void *data) 167static int idr_callback(int id, void *p, void *data)
117{ 168{
118 struct fsnotify_mark_entry *entry; 169 struct fsnotify_mark *fsn_mark;
119 struct inotify_inode_mark_entry *ientry; 170 struct inotify_inode_mark *i_mark;
120 static bool warned = false; 171 static bool warned = false;
121 172
122 if (warned) 173 if (warned)
123 return 0; 174 return 0;
124 175
125 warned = true; 176 warned = true;
126 entry = p; 177 fsn_mark = p;
127 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 178 i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
128 179
129 WARN(1, "inotify closing but id=%d for entry=%p in group=%p still in " 180 WARN(1, "inotify closing but id=%d for fsn_mark=%p in group=%p still in "
130 "idr. Probably leaking memory\n", id, p, data); 181 "idr. Probably leaking memory\n", id, p, data);
131 182
132 /* 183 /*
@@ -135,9 +186,9 @@ static int idr_callback(int id, void *p, void *data)
135 * out why we got here and the panic is no worse than the original 186 * out why we got here and the panic is no worse than the original
136 * BUG() that was here. 187 * BUG() that was here.
137 */ 188 */
138 if (entry) 189 if (fsn_mark)
139 printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n", 190 printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n",
140 entry->group, entry->inode, ientry->wd); 191 fsn_mark->group, fsn_mark->i.inode, i_mark->wd);
141 return 0; 192 return 0;
142} 193}
143 194
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index e46ca685b9be..bf7f6d776c31 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -46,17 +46,11 @@
46/* these are configurable via /proc/sys/fs/inotify/ */ 46/* these are configurable via /proc/sys/fs/inotify/ */
47static int inotify_max_user_instances __read_mostly; 47static int inotify_max_user_instances __read_mostly;
48static int inotify_max_queued_events __read_mostly; 48static int inotify_max_queued_events __read_mostly;
49int inotify_max_user_watches __read_mostly; 49static int inotify_max_user_watches __read_mostly;
50 50
51static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; 51static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
52struct kmem_cache *event_priv_cachep __read_mostly; 52struct kmem_cache *event_priv_cachep __read_mostly;
53 53
54/*
55 * When inotify registers a new group it increments this and uses that
56 * value as an offset to set the fsnotify group "name" and priority.
57 */
58static atomic_t inotify_grp_num;
59
60#ifdef CONFIG_SYSCTL 54#ifdef CONFIG_SYSCTL
61 55
62#include <linux/sysctl.h> 56#include <linux/sysctl.h>
@@ -96,11 +90,14 @@ static inline __u32 inotify_arg_to_mask(u32 arg)
96{ 90{
97 __u32 mask; 91 __u32 mask;
98 92
99 /* everything should accept their own ignored and cares about children */ 93 /*
100 mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD); 94 * everything should accept their own ignored, cares about children,
95 * and should receive events when the inode is unmounted
96 */
97 mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT);
101 98
102 /* mask off the flags used to open the fd */ 99 /* mask off the flags used to open the fd */
103 mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT)); 100 mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK));
104 101
105 return mask; 102 return mask;
106} 103}
@@ -144,6 +141,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
144 141
145 event = fsnotify_peek_notify_event(group); 142 event = fsnotify_peek_notify_event(group);
146 143
144 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
145
147 if (event->name_len) 146 if (event->name_len)
148 event_size += roundup(event->name_len + 1, event_size); 147 event_size += roundup(event->name_len + 1, event_size);
149 148
@@ -173,6 +172,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
173 size_t event_size = sizeof(struct inotify_event); 172 size_t event_size = sizeof(struct inotify_event);
174 size_t name_len = 0; 173 size_t name_len = 0;
175 174
175 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
176
176 /* we get the inotify watch descriptor from the event private data */ 177 /* we get the inotify watch descriptor from the event private data */
177 spin_lock(&event->lock); 178 spin_lock(&event->lock);
178 fsn_priv = fsnotify_remove_priv_from_event(group, event); 179 fsn_priv = fsnotify_remove_priv_from_event(group, event);
@@ -245,6 +246,8 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
245 kevent = get_one_event(group, count); 246 kevent = get_one_event(group, count);
246 mutex_unlock(&group->notification_mutex); 247 mutex_unlock(&group->notification_mutex);
247 248
249 pr_debug("%s: group=%p kevent=%p\n", __func__, group, kevent);
250
248 if (kevent) { 251 if (kevent) {
249 ret = PTR_ERR(kevent); 252 ret = PTR_ERR(kevent);
250 if (IS_ERR(kevent)) 253 if (IS_ERR(kevent))
@@ -289,6 +292,8 @@ static int inotify_release(struct inode *ignored, struct file *file)
289 struct fsnotify_group *group = file->private_data; 292 struct fsnotify_group *group = file->private_data;
290 struct user_struct *user = group->inotify_data.user; 293 struct user_struct *user = group->inotify_data.user;
291 294
295 pr_debug("%s: group=%p\n", __func__, group);
296
292 fsnotify_clear_marks_by_group(group); 297 fsnotify_clear_marks_by_group(group);
293 298
294 /* free this group, matching get was inotify_init->fsnotify_obtain_group */ 299 /* free this group, matching get was inotify_init->fsnotify_obtain_group */
@@ -312,6 +317,8 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
312 group = file->private_data; 317 group = file->private_data;
313 p = (void __user *) arg; 318 p = (void __user *) arg;
314 319
320 pr_debug("%s: group=%p cmd=%u\n", __func__, group, cmd);
321
315 switch (cmd) { 322 switch (cmd) {
316 case FIONREAD: 323 case FIONREAD:
317 mutex_lock(&group->notification_mutex); 324 mutex_lock(&group->notification_mutex);
@@ -357,59 +364,159 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
357 return error; 364 return error;
358} 365}
359 366
367static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
368 int *last_wd,
369 struct inotify_inode_mark *i_mark)
370{
371 int ret;
372
373 do {
374 if (unlikely(!idr_pre_get(idr, GFP_KERNEL)))
375 return -ENOMEM;
376
377 spin_lock(idr_lock);
378 ret = idr_get_new_above(idr, i_mark, *last_wd + 1,
379 &i_mark->wd);
380 /* we added the mark to the idr, take a reference */
381 if (!ret) {
382 *last_wd = i_mark->wd;
383 fsnotify_get_mark(&i_mark->fsn_mark);
384 }
385 spin_unlock(idr_lock);
386 } while (ret == -EAGAIN);
387
388 return ret;
389}
390
391static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group,
392 int wd)
393{
394 struct idr *idr = &group->inotify_data.idr;
395 spinlock_t *idr_lock = &group->inotify_data.idr_lock;
396 struct inotify_inode_mark *i_mark;
397
398 assert_spin_locked(idr_lock);
399
400 i_mark = idr_find(idr, wd);
401 if (i_mark) {
402 struct fsnotify_mark *fsn_mark = &i_mark->fsn_mark;
403
404 fsnotify_get_mark(fsn_mark);
405 /* One ref for being in the idr, one ref we just took */
406 BUG_ON(atomic_read(&fsn_mark->refcnt) < 2);
407 }
408
409 return i_mark;
410}
411
412static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group,
413 int wd)
414{
415 struct inotify_inode_mark *i_mark;
416 spinlock_t *idr_lock = &group->inotify_data.idr_lock;
417
418 spin_lock(idr_lock);
419 i_mark = inotify_idr_find_locked(group, wd);
420 spin_unlock(idr_lock);
421
422 return i_mark;
423}
424
425static void do_inotify_remove_from_idr(struct fsnotify_group *group,
426 struct inotify_inode_mark *i_mark)
427{
428 struct idr *idr = &group->inotify_data.idr;
429 spinlock_t *idr_lock = &group->inotify_data.idr_lock;
430 int wd = i_mark->wd;
431
432 assert_spin_locked(idr_lock);
433
434 idr_remove(idr, wd);
435
436 /* removed from the idr, drop that ref */
437 fsnotify_put_mark(&i_mark->fsn_mark);
438}
439
360/* 440/*
361 * Remove the mark from the idr (if present) and drop the reference 441 * Remove the mark from the idr (if present) and drop the reference
362 * on the mark because it was in the idr. 442 * on the mark because it was in the idr.
363 */ 443 */
364static void inotify_remove_from_idr(struct fsnotify_group *group, 444static void inotify_remove_from_idr(struct fsnotify_group *group,
365 struct inotify_inode_mark_entry *ientry) 445 struct inotify_inode_mark *i_mark)
366{ 446{
367 struct idr *idr; 447 spinlock_t *idr_lock = &group->inotify_data.idr_lock;
368 struct fsnotify_mark_entry *entry; 448 struct inotify_inode_mark *found_i_mark = NULL;
369 struct inotify_inode_mark_entry *found_ientry;
370 int wd; 449 int wd;
371 450
372 spin_lock(&group->inotify_data.idr_lock); 451 spin_lock(idr_lock);
373 idr = &group->inotify_data.idr; 452 wd = i_mark->wd;
374 wd = ientry->wd;
375 453
376 if (wd == -1) 454 /*
455 * does this i_mark think it is in the idr? we shouldn't get called
456 * if it wasn't....
457 */
458 if (wd == -1) {
459 WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
460 " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
461 i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
377 goto out; 462 goto out;
463 }
378 464
379 entry = idr_find(&group->inotify_data.idr, wd); 465 /* Lets look in the idr to see if we find it */
380 if (unlikely(!entry)) 466 found_i_mark = inotify_idr_find_locked(group, wd);
467 if (unlikely(!found_i_mark)) {
468 WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
469 " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
470 i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
381 goto out; 471 goto out;
472 }
382 473
383 found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 474 /*
384 if (unlikely(found_ientry != ientry)) { 475 * We found an mark in the idr at the right wd, but it's
385 /* We found an entry in the idr with the right wd, but it's 476 * not the mark we were told to remove. eparis seriously
386 * not the entry we were told to remove. eparis seriously 477 * fucked up somewhere.
387 * fucked up somewhere. */ 478 */
388 WARN_ON(1); 479 if (unlikely(found_i_mark != i_mark)) {
389 ientry->wd = -1; 480 WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p "
481 "mark->inode=%p found_i_mark=%p found_i_mark->wd=%d "
482 "found_i_mark->group=%p found_i_mark->inode=%p\n",
483 __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group,
484 i_mark->fsn_mark.i.inode, found_i_mark, found_i_mark->wd,
485 found_i_mark->fsn_mark.group,
486 found_i_mark->fsn_mark.i.inode);
390 goto out; 487 goto out;
391 } 488 }
392 489
393 /* One ref for being in the idr, one ref held by the caller */ 490 /*
394 BUG_ON(atomic_read(&entry->refcnt) < 2); 491 * One ref for being in the idr
395 492 * one ref held by the caller trying to kill us
396 idr_remove(idr, wd); 493 * one ref grabbed by inotify_idr_find
397 ientry->wd = -1; 494 */
495 if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) {
496 printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
497 " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
498 i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
499 /* we can't really recover with bad ref cnting.. */
500 BUG();
501 }
398 502
399 /* removed from the idr, drop that ref */ 503 do_inotify_remove_from_idr(group, i_mark);
400 fsnotify_put_mark(entry);
401out: 504out:
402 spin_unlock(&group->inotify_data.idr_lock); 505 /* match the ref taken by inotify_idr_find_locked() */
506 if (found_i_mark)
507 fsnotify_put_mark(&found_i_mark->fsn_mark);
508 i_mark->wd = -1;
509 spin_unlock(idr_lock);
403} 510}
404 511
405/* 512/*
406 * Send IN_IGNORED for this wd, remove this wd from the idr. 513 * Send IN_IGNORED for this wd, remove this wd from the idr.
407 */ 514 */
408void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, 515void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
409 struct fsnotify_group *group) 516 struct fsnotify_group *group)
410{ 517{
411 struct inotify_inode_mark_entry *ientry; 518 struct inotify_inode_mark *i_mark;
412 struct fsnotify_event *ignored_event; 519 struct fsnotify_event *ignored_event, *notify_event;
413 struct inotify_event_private_data *event_priv; 520 struct inotify_event_private_data *event_priv;
414 struct fsnotify_event_private_data *fsn_event_priv; 521 struct fsnotify_event_private_data *fsn_event_priv;
415 int ret; 522 int ret;
@@ -420,7 +527,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
420 if (!ignored_event) 527 if (!ignored_event)
421 return; 528 return;
422 529
423 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 530 i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
424 531
425 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS); 532 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
426 if (unlikely(!event_priv)) 533 if (unlikely(!event_priv))
@@ -429,37 +536,44 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
429 fsn_event_priv = &event_priv->fsnotify_event_priv_data; 536 fsn_event_priv = &event_priv->fsnotify_event_priv_data;
430 537
431 fsn_event_priv->group = group; 538 fsn_event_priv->group = group;
432 event_priv->wd = ientry->wd; 539 event_priv->wd = i_mark->wd;
433 540
434 ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); 541 notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL);
435 if (ret) 542 if (notify_event) {
543 if (IS_ERR(notify_event))
544 ret = PTR_ERR(notify_event);
545 else
546 fsnotify_put_event(notify_event);
436 inotify_free_event_priv(fsn_event_priv); 547 inotify_free_event_priv(fsn_event_priv);
548 }
437 549
438skip_send_ignore: 550skip_send_ignore:
439 551
440 /* matches the reference taken when the event was created */ 552 /* matches the reference taken when the event was created */
441 fsnotify_put_event(ignored_event); 553 fsnotify_put_event(ignored_event);
442 554
443 /* remove this entry from the idr */ 555 /* remove this mark from the idr */
444 inotify_remove_from_idr(group, ientry); 556 inotify_remove_from_idr(group, i_mark);
445 557
446 atomic_dec(&group->inotify_data.user->inotify_watches); 558 atomic_dec(&group->inotify_data.user->inotify_watches);
447} 559}
448 560
449/* ding dong the mark is dead */ 561/* ding dong the mark is dead */
450static void inotify_free_mark(struct fsnotify_mark_entry *entry) 562static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
451{ 563{
452 struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry; 564 struct inotify_inode_mark *i_mark;
565
566 i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
453 567
454 kmem_cache_free(inotify_inode_mark_cachep, ientry); 568 kmem_cache_free(inotify_inode_mark_cachep, i_mark);
455} 569}
456 570
457static int inotify_update_existing_watch(struct fsnotify_group *group, 571static int inotify_update_existing_watch(struct fsnotify_group *group,
458 struct inode *inode, 572 struct inode *inode,
459 u32 arg) 573 u32 arg)
460{ 574{
461 struct fsnotify_mark_entry *entry; 575 struct fsnotify_mark *fsn_mark;
462 struct inotify_inode_mark_entry *ientry; 576 struct inotify_inode_mark *i_mark;
463 __u32 old_mask, new_mask; 577 __u32 old_mask, new_mask;
464 __u32 mask; 578 __u32 mask;
465 int add = (arg & IN_MASK_ADD); 579 int add = (arg & IN_MASK_ADD);
@@ -467,52 +581,43 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
467 581
468 /* don't allow invalid bits: we don't want flags set */ 582 /* don't allow invalid bits: we don't want flags set */
469 mask = inotify_arg_to_mask(arg); 583 mask = inotify_arg_to_mask(arg);
470 if (unlikely(!mask)) 584 if (unlikely(!(mask & IN_ALL_EVENTS)))
471 return -EINVAL; 585 return -EINVAL;
472 586
473 spin_lock(&inode->i_lock); 587 fsn_mark = fsnotify_find_inode_mark(group, inode);
474 entry = fsnotify_find_mark_entry(group, inode); 588 if (!fsn_mark)
475 spin_unlock(&inode->i_lock);
476 if (!entry)
477 return -ENOENT; 589 return -ENOENT;
478 590
479 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 591 i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
480 592
481 spin_lock(&entry->lock); 593 spin_lock(&fsn_mark->lock);
482 594
483 old_mask = entry->mask; 595 old_mask = fsn_mark->mask;
484 if (add) { 596 if (add)
485 entry->mask |= mask; 597 fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask));
486 new_mask = entry->mask; 598 else
487 } else { 599 fsnotify_set_mark_mask_locked(fsn_mark, mask);
488 entry->mask = mask; 600 new_mask = fsn_mark->mask;
489 new_mask = entry->mask;
490 }
491 601
492 spin_unlock(&entry->lock); 602 spin_unlock(&fsn_mark->lock);
493 603
494 if (old_mask != new_mask) { 604 if (old_mask != new_mask) {
495 /* more bits in old than in new? */ 605 /* more bits in old than in new? */
496 int dropped = (old_mask & ~new_mask); 606 int dropped = (old_mask & ~new_mask);
497 /* more bits in this entry than the inode's mask? */ 607 /* more bits in this fsn_mark than the inode's mask? */
498 int do_inode = (new_mask & ~inode->i_fsnotify_mask); 608 int do_inode = (new_mask & ~inode->i_fsnotify_mask);
499 /* more bits in this entry than the group? */
500 int do_group = (new_mask & ~group->mask);
501 609
502 /* update the inode with this new entry */ 610 /* update the inode with this new fsn_mark */
503 if (dropped || do_inode) 611 if (dropped || do_inode)
504 fsnotify_recalc_inode_mask(inode); 612 fsnotify_recalc_inode_mask(inode);
505 613
506 /* update the group mask with the new mask */
507 if (dropped || do_group)
508 fsnotify_recalc_group_mask(group);
509 } 614 }
510 615
511 /* return the wd */ 616 /* return the wd */
512 ret = ientry->wd; 617 ret = i_mark->wd;
513 618
514 /* match the get from fsnotify_find_mark_entry() */ 619 /* match the get from fsnotify_find_mark() */
515 fsnotify_put_mark(entry); 620 fsnotify_put_mark(fsn_mark);
516 621
517 return ret; 622 return ret;
518} 623}
@@ -521,73 +626,51 @@ static int inotify_new_watch(struct fsnotify_group *group,
521 struct inode *inode, 626 struct inode *inode,
522 u32 arg) 627 u32 arg)
523{ 628{
524 struct inotify_inode_mark_entry *tmp_ientry; 629 struct inotify_inode_mark *tmp_i_mark;
525 __u32 mask; 630 __u32 mask;
526 int ret; 631 int ret;
632 struct idr *idr = &group->inotify_data.idr;
633 spinlock_t *idr_lock = &group->inotify_data.idr_lock;
527 634
528 /* don't allow invalid bits: we don't want flags set */ 635 /* don't allow invalid bits: we don't want flags set */
529 mask = inotify_arg_to_mask(arg); 636 mask = inotify_arg_to_mask(arg);
530 if (unlikely(!mask)) 637 if (unlikely(!(mask & IN_ALL_EVENTS)))
531 return -EINVAL; 638 return -EINVAL;
532 639
533 tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); 640 tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
534 if (unlikely(!tmp_ientry)) 641 if (unlikely(!tmp_i_mark))
535 return -ENOMEM; 642 return -ENOMEM;
536 643
537 fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark); 644 fsnotify_init_mark(&tmp_i_mark->fsn_mark, inotify_free_mark);
538 tmp_ientry->fsn_entry.mask = mask; 645 tmp_i_mark->fsn_mark.mask = mask;
539 tmp_ientry->wd = -1; 646 tmp_i_mark->wd = -1;
540 647
541 ret = -ENOSPC; 648 ret = -ENOSPC;
542 if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) 649 if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
543 goto out_err; 650 goto out_err;
544retry:
545 ret = -ENOMEM;
546 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
547 goto out_err;
548 651
549 /* we are putting the mark on the idr, take a reference */ 652 ret = inotify_add_to_idr(idr, idr_lock, &group->inotify_data.last_wd,
550 fsnotify_get_mark(&tmp_ientry->fsn_entry); 653 tmp_i_mark);
551 654 if (ret)
552 spin_lock(&group->inotify_data.idr_lock);
553 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
554 group->inotify_data.last_wd+1,
555 &tmp_ientry->wd);
556 spin_unlock(&group->inotify_data.idr_lock);
557 if (ret) {
558 /* we didn't get on the idr, drop the idr reference */
559 fsnotify_put_mark(&tmp_ientry->fsn_entry);
560
561 /* idr was out of memory allocate and try again */
562 if (ret == -EAGAIN)
563 goto retry;
564 goto out_err; 655 goto out_err;
565 }
566 656
567 /* we are on the idr, now get on the inode */ 657 /* we are on the idr, now get on the inode */
568 ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); 658 ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, NULL, 0);
569 if (ret) { 659 if (ret) {
570 /* we failed to get on the inode, get off the idr */ 660 /* we failed to get on the inode, get off the idr */
571 inotify_remove_from_idr(group, tmp_ientry); 661 inotify_remove_from_idr(group, tmp_i_mark);
572 goto out_err; 662 goto out_err;
573 } 663 }
574 664
575 /* update the idr hint, who cares about races, it's just a hint */
576 group->inotify_data.last_wd = tmp_ientry->wd;
577
578 /* increment the number of watches the user has */ 665 /* increment the number of watches the user has */
579 atomic_inc(&group->inotify_data.user->inotify_watches); 666 atomic_inc(&group->inotify_data.user->inotify_watches);
580 667
581 /* return the watch descriptor for this new entry */ 668 /* return the watch descriptor for this new mark */
582 ret = tmp_ientry->wd; 669 ret = tmp_i_mark->wd;
583
584 /* if this mark added a new event update the group mask */
585 if (mask & ~group->mask)
586 fsnotify_recalc_group_mask(group);
587 670
588out_err: 671out_err:
589 /* match the ref from fsnotify_init_markentry() */ 672 /* match the ref from fsnotify_init_mark() */
590 fsnotify_put_mark(&tmp_ientry->fsn_entry); 673 fsnotify_put_mark(&tmp_i_mark->fsn_mark);
591 674
592 return ret; 675 return ret;
593} 676}
@@ -616,11 +699,8 @@ retry:
616static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events) 699static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events)
617{ 700{
618 struct fsnotify_group *group; 701 struct fsnotify_group *group;
619 unsigned int grp_num;
620 702
621 /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ 703 group = fsnotify_alloc_group(&inotify_fsnotify_ops);
622 grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num));
623 group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops);
624 if (IS_ERR(group)) 704 if (IS_ERR(group))
625 return group; 705 return group;
626 706
@@ -726,7 +806,7 @@ fput_and_out:
726SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) 806SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
727{ 807{
728 struct fsnotify_group *group; 808 struct fsnotify_group *group;
729 struct fsnotify_mark_entry *entry; 809 struct inotify_inode_mark *i_mark;
730 struct file *filp; 810 struct file *filp;
731 int ret = 0, fput_needed; 811 int ret = 0, fput_needed;
732 812
@@ -735,25 +815,23 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
735 return -EBADF; 815 return -EBADF;
736 816
737 /* verify that this is indeed an inotify instance */ 817 /* verify that this is indeed an inotify instance */
738 if (unlikely(filp->f_op != &inotify_fops)) { 818 ret = -EINVAL;
739 ret = -EINVAL; 819 if (unlikely(filp->f_op != &inotify_fops))
740 goto out; 820 goto out;
741 }
742 821
743 group = filp->private_data; 822 group = filp->private_data;
744 823
745 spin_lock(&group->inotify_data.idr_lock); 824 ret = -EINVAL;
746 entry = idr_find(&group->inotify_data.idr, wd); 825 i_mark = inotify_idr_find(group, wd);
747 if (unlikely(!entry)) { 826 if (unlikely(!i_mark))
748 spin_unlock(&group->inotify_data.idr_lock);
749 ret = -EINVAL;
750 goto out; 827 goto out;
751 }
752 fsnotify_get_mark(entry);
753 spin_unlock(&group->inotify_data.idr_lock);
754 828
755 fsnotify_destroy_mark_by_entry(entry); 829 ret = 0;
756 fsnotify_put_mark(entry); 830
831 fsnotify_destroy_mark(&i_mark->fsn_mark);
832
833 /* match ref taken by inotify_idr_find */
834 fsnotify_put_mark(&i_mark->fsn_mark);
757 835
758out: 836out:
759 fput_light(filp, fput_needed); 837 fput_light(filp, fput_needed);
@@ -767,7 +845,28 @@ out:
767 */ 845 */
768static int __init inotify_user_setup(void) 846static int __init inotify_user_setup(void)
769{ 847{
770 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC); 848 BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
849 BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
850 BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
851 BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
852 BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
853 BUILD_BUG_ON(IN_OPEN != FS_OPEN);
854 BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
855 BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
856 BUILD_BUG_ON(IN_CREATE != FS_CREATE);
857 BUILD_BUG_ON(IN_DELETE != FS_DELETE);
858 BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
859 BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
860 BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
861 BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
862 BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
863 BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
864 BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
865 BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
866
867 BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
868
869 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
771 event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC); 870 event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
772 871
773 inotify_max_queued_events = 16384; 872 inotify_max_queued_events = 16384;
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
new file mode 100644
index 000000000000..325185e514bb
--- /dev/null
+++ b/fs/notify/mark.c
@@ -0,0 +1,371 @@
1/*
2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; see the file COPYING. If not, write to
16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19/*
20 * fsnotify inode mark locking/lifetime/and refcnting
21 *
22 * REFCNT:
23 * The mark->refcnt tells how many "things" in the kernel currently are
24 * referencing this object. The object typically will live inside the kernel
25 * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task
26 * which can find this object holding the appropriete locks, can take a reference
27 * and the object itself is guarenteed to survive until the reference is dropped.
28 *
29 * LOCKING:
30 * There are 3 spinlocks involved with fsnotify inode marks and they MUST
31 * be taken in order as follows:
32 *
33 * mark->lock
34 * group->mark_lock
35 * inode->i_lock
36 *
37 * mark->lock protects 2 things, mark->group and mark->inode. You must hold
38 * that lock to dereference either of these things (they could be NULL even with
39 * the lock)
40 *
41 * group->mark_lock protects the marks_list anchored inside a given group
42 * and each mark is hooked via the g_list. It also sorta protects the
43 * free_g_list, which when used is anchored by a private list on the stack of the
44 * task which held the group->mark_lock.
45 *
46 * inode->i_lock protects the i_fsnotify_marks list anchored inside a
47 * given inode and each mark is hooked via the i_list. (and sorta the
48 * free_i_list)
49 *
50 *
51 * LIFETIME:
52 * Inode marks survive between when they are added to an inode and when their
53 * refcnt==0.
54 *
55 * The inode mark can be cleared for a number of different reasons including:
56 * - The inode is unlinked for the last time. (fsnotify_inode_remove)
57 * - The inode is being evicted from cache. (fsnotify_inode_delete)
58 * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes)
59 * - Something explicitly requests that it be removed. (fsnotify_destroy_mark)
60 * - The fsnotify_group associated with the mark is going away and all such marks
61 * need to be cleaned up. (fsnotify_clear_marks_by_group)
62 *
63 * Worst case we are given an inode and need to clean up all the marks on that
64 * inode. We take i_lock and walk the i_fsnotify_marks safely. For each
65 * mark on the list we take a reference (so the mark can't disappear under us).
66 * We remove that mark form the inode's list of marks and we add this mark to a
67 * private list anchored on the stack using i_free_list; At this point we no
68 * longer fear anything finding the mark using the inode's list of marks.
69 *
70 * We can safely and locklessly run the private list on the stack of everything
71 * we just unattached from the original inode. For each mark on the private list
72 * we grab the mark-> and can thus dereference mark->group and mark->inode. If
73 * we see the group and inode are not NULL we take those locks. Now holding all
74 * 3 locks we can completely remove the mark from other tasks finding it in the
75 * future. Remember, 10 things might already be referencing this mark, but they
76 * better be holding a ref. We drop our reference we took before we unhooked it
77 * from the inode. When the ref hits 0 we can free the mark.
78 *
79 * Very similarly for freeing by group, except we use free_g_list.
80 *
81 * This has the very interesting property of being able to run concurrently with
82 * any (or all) other directions.
83 */
84
85#include <linux/fs.h>
86#include <linux/init.h>
87#include <linux/kernel.h>
88#include <linux/kthread.h>
89#include <linux/module.h>
90#include <linux/mutex.h>
91#include <linux/slab.h>
92#include <linux/spinlock.h>
93#include <linux/srcu.h>
94#include <linux/writeback.h> /* for inode_lock */
95
96#include <asm/atomic.h>
97
98#include <linux/fsnotify_backend.h>
99#include "fsnotify.h"
100
101struct srcu_struct fsnotify_mark_srcu;
102static DEFINE_SPINLOCK(destroy_lock);
103static LIST_HEAD(destroy_list);
104static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq);
105
106void fsnotify_get_mark(struct fsnotify_mark *mark)
107{
108 atomic_inc(&mark->refcnt);
109}
110
111void fsnotify_put_mark(struct fsnotify_mark *mark)
112{
113 if (atomic_dec_and_test(&mark->refcnt))
114 mark->free_mark(mark);
115}
116
117/*
118 * Any time a mark is getting freed we end up here.
119 * The caller had better be holding a reference to this mark so we don't actually
120 * do the final put under the mark->lock
121 */
122void fsnotify_destroy_mark(struct fsnotify_mark *mark)
123{
124 struct fsnotify_group *group;
125 struct inode *inode = NULL;
126
127 spin_lock(&mark->lock);
128
129 group = mark->group;
130
131 /* something else already called this function on this mark */
132 if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
133 spin_unlock(&mark->lock);
134 return;
135 }
136
137 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
138
139 /* 1 from caller and 1 for being on i_list/g_list */
140 BUG_ON(atomic_read(&mark->refcnt) < 2);
141
142 spin_lock(&group->mark_lock);
143
144 if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
145 inode = mark->i.inode;
146 fsnotify_destroy_inode_mark(mark);
147 } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT)
148 fsnotify_destroy_vfsmount_mark(mark);
149 else
150 BUG();
151
152 list_del_init(&mark->g_list);
153
154 spin_unlock(&group->mark_lock);
155 spin_unlock(&mark->lock);
156
157 spin_lock(&destroy_lock);
158 list_add(&mark->destroy_list, &destroy_list);
159 spin_unlock(&destroy_lock);
160 wake_up(&destroy_waitq);
161
162 /*
163 * Some groups like to know that marks are being freed. This is a
164 * callback to the group function to let it know that this mark
165 * is being freed.
166 */
167 if (group->ops->freeing_mark)
168 group->ops->freeing_mark(mark, group);
169
170 /*
171 * __fsnotify_update_child_dentry_flags(inode);
172 *
173 * I really want to call that, but we can't, we have no idea if the inode
174 * still exists the second we drop the mark->lock.
175 *
176 * The next time an event arrive to this inode from one of it's children
177 * __fsnotify_parent will see that the inode doesn't care about it's
178 * children and will update all of these flags then. So really this
179 * is just a lazy update (and could be a perf win...)
180 */
181
182 if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
183 iput(inode);
184
185 /*
186 * it's possible that this group tried to destroy itself, but this
187 * this mark was simultaneously being freed by inode. If that's the
188 * case, we finish freeing the group here.
189 */
190 if (unlikely(atomic_dec_and_test(&group->num_marks)))
191 fsnotify_final_destroy_group(group);
192}
193
194void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
195{
196 assert_spin_locked(&mark->lock);
197
198 mark->mask = mask;
199
200 if (mark->flags & FSNOTIFY_MARK_FLAG_INODE)
201 fsnotify_set_inode_mark_mask_locked(mark, mask);
202}
203
204void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask)
205{
206 assert_spin_locked(&mark->lock);
207
208 mark->ignored_mask = mask;
209}
210
211/*
212 * Attach an initialized mark to a given group and fs object.
213 * These marks may be used for the fsnotify backend to determine which
214 * event types should be delivered to which group.
215 */
216int fsnotify_add_mark(struct fsnotify_mark *mark,
217 struct fsnotify_group *group, struct inode *inode,
218 struct vfsmount *mnt, int allow_dups)
219{
220 int ret = 0;
221
222 BUG_ON(inode && mnt);
223 BUG_ON(!inode && !mnt);
224
225 /*
226 * LOCKING ORDER!!!!
227 * mark->lock
228 * group->mark_lock
229 * inode->i_lock
230 */
231 spin_lock(&mark->lock);
232 spin_lock(&group->mark_lock);
233
234 mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE;
235
236 mark->group = group;
237 list_add(&mark->g_list, &group->marks_list);
238 atomic_inc(&group->num_marks);
239 fsnotify_get_mark(mark); /* for i_list and g_list */
240
241 if (inode) {
242 ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups);
243 if (ret)
244 goto err;
245 } else if (mnt) {
246 ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups);
247 if (ret)
248 goto err;
249 } else {
250 BUG();
251 }
252
253 spin_unlock(&group->mark_lock);
254
255 /* this will pin the object if appropriate */
256 fsnotify_set_mark_mask_locked(mark, mark->mask);
257
258 spin_unlock(&mark->lock);
259
260 if (inode)
261 __fsnotify_update_child_dentry_flags(inode);
262
263 return ret;
264err:
265 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
266 list_del_init(&mark->g_list);
267 mark->group = NULL;
268 atomic_dec(&group->num_marks);
269
270 spin_unlock(&group->mark_lock);
271 spin_unlock(&mark->lock);
272
273 spin_lock(&destroy_lock);
274 list_add(&mark->destroy_list, &destroy_list);
275 spin_unlock(&destroy_lock);
276 wake_up(&destroy_waitq);
277
278 return ret;
279}
280
281/*
282 * clear any marks in a group in which mark->flags & flags is true
283 */
284void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
285 unsigned int flags)
286{
287 struct fsnotify_mark *lmark, *mark;
288 LIST_HEAD(free_list);
289
290 spin_lock(&group->mark_lock);
291 list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
292 if (mark->flags & flags) {
293 list_add(&mark->free_g_list, &free_list);
294 list_del_init(&mark->g_list);
295 fsnotify_get_mark(mark);
296 }
297 }
298 spin_unlock(&group->mark_lock);
299
300 list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
301 fsnotify_destroy_mark(mark);
302 fsnotify_put_mark(mark);
303 }
304}
305
306/*
307 * Given a group, destroy all of the marks associated with that group.
308 */
309void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
310{
311 fsnotify_clear_marks_by_group_flags(group, (unsigned int)-1);
312}
313
314void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
315{
316 assert_spin_locked(&old->lock);
317 new->i.inode = old->i.inode;
318 new->m.mnt = old->m.mnt;
319 new->group = old->group;
320 new->mask = old->mask;
321 new->free_mark = old->free_mark;
322}
323
324/*
325 * Nothing fancy, just initialize lists and locks and counters.
326 */
327void fsnotify_init_mark(struct fsnotify_mark *mark,
328 void (*free_mark)(struct fsnotify_mark *mark))
329{
330 memset(mark, 0, sizeof(*mark));
331 spin_lock_init(&mark->lock);
332 atomic_set(&mark->refcnt, 1);
333 mark->free_mark = free_mark;
334}
335
336static int fsnotify_mark_destroy(void *ignored)
337{
338 struct fsnotify_mark *mark, *next;
339 LIST_HEAD(private_destroy_list);
340
341 for (;;) {
342 spin_lock(&destroy_lock);
343 /* exchange the list head */
344 list_replace_init(&destroy_list, &private_destroy_list);
345 spin_unlock(&destroy_lock);
346
347 synchronize_srcu(&fsnotify_mark_srcu);
348
349 list_for_each_entry_safe(mark, next, &private_destroy_list, destroy_list) {
350 list_del_init(&mark->destroy_list);
351 fsnotify_put_mark(mark);
352 }
353
354 wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list));
355 }
356
357 return 0;
358}
359
360static int __init fsnotify_mark_init(void)
361{
362 struct task_struct *thread;
363
364 thread = kthread_run(fsnotify_mark_destroy, NULL,
365 "fsnotify_mark");
366 if (IS_ERR(thread))
367 panic("unable to start fsnotify mark destruction thread.");
368
369 return 0;
370}
371device_initcall(fsnotify_mark_init);
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index b8bf53b4c108..d6c435adc7a2 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -31,6 +31,7 @@
31 * allocated and used. 31 * allocated and used.
32 */ 32 */
33 33
34#include <linux/file.h>
34#include <linux/fs.h> 35#include <linux/fs.h>
35#include <linux/init.h> 36#include <linux/init.h>
36#include <linux/kernel.h> 37#include <linux/kernel.h>
@@ -56,7 +57,7 @@ static struct kmem_cache *fsnotify_event_holder_cachep;
56 * it is needed. It's refcnt is set 1 at kernel init time and will never 57 * it is needed. It's refcnt is set 1 at kernel init time and will never
57 * get set to 0 so it will never get 'freed' 58 * get set to 0 so it will never get 'freed'
58 */ 59 */
59static struct fsnotify_event q_overflow_event; 60static struct fsnotify_event *q_overflow_event;
60static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0); 61static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
61 62
62/** 63/**
@@ -87,12 +88,15 @@ void fsnotify_put_event(struct fsnotify_event *event)
87 return; 88 return;
88 89
89 if (atomic_dec_and_test(&event->refcnt)) { 90 if (atomic_dec_and_test(&event->refcnt)) {
90 if (event->data_type == FSNOTIFY_EVENT_PATH) 91 pr_debug("%s: event=%p\n", __func__, event);
91 path_put(&event->path); 92
93 if (event->data_type == FSNOTIFY_EVENT_FILE)
94 fput(event->file);
92 95
93 BUG_ON(!list_empty(&event->private_data_list)); 96 BUG_ON(!list_empty(&event->private_data_list));
94 97
95 kfree(event->file_name); 98 kfree(event->file_name);
99 put_pid(event->tgid);
96 kmem_cache_free(fsnotify_event_cachep, event); 100 kmem_cache_free(fsnotify_event_cachep, event);
97 } 101 }
98} 102}
@@ -104,7 +108,8 @@ struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
104 108
105void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder) 109void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
106{ 110{
107 kmem_cache_free(fsnotify_event_holder_cachep, holder); 111 if (holder)
112 kmem_cache_free(fsnotify_event_holder_cachep, holder);
108} 113}
109 114
110/* 115/*
@@ -129,53 +134,20 @@ struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnot
129} 134}
130 135
131/* 136/*
132 * Check if 2 events contain the same information. We do not compare private data
133 * but at this moment that isn't a problem for any know fsnotify listeners.
134 */
135static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
136{
137 if ((old->mask == new->mask) &&
138 (old->to_tell == new->to_tell) &&
139 (old->data_type == new->data_type) &&
140 (old->name_len == new->name_len)) {
141 switch (old->data_type) {
142 case (FSNOTIFY_EVENT_INODE):
143 /* remember, after old was put on the wait_q we aren't
144 * allowed to look at the inode any more, only thing
145 * left to check was if the file_name is the same */
146 if (!old->name_len ||
147 !strcmp(old->file_name, new->file_name))
148 return true;
149 break;
150 case (FSNOTIFY_EVENT_PATH):
151 if ((old->path.mnt == new->path.mnt) &&
152 (old->path.dentry == new->path.dentry))
153 return true;
154 break;
155 case (FSNOTIFY_EVENT_NONE):
156 if (old->mask & FS_Q_OVERFLOW)
157 return true;
158 else if (old->mask & FS_IN_IGNORED)
159 return false;
160 return false;
161 };
162 }
163 return false;
164}
165
166/*
167 * Add an event to the group notification queue. The group can later pull this 137 * Add an event to the group notification queue. The group can later pull this
168 * event off the queue to deal with. If the event is successfully added to the 138 * event off the queue to deal with. If the event is successfully added to the
169 * group's notification queue, a reference is taken on event. 139 * group's notification queue, a reference is taken on event.
170 */ 140 */
171int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event, 141struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
172 struct fsnotify_event_private_data *priv) 142 struct fsnotify_event_private_data *priv,
143 struct fsnotify_event *(*merge)(struct list_head *,
144 struct fsnotify_event *))
173{ 145{
146 struct fsnotify_event *return_event = NULL;
174 struct fsnotify_event_holder *holder = NULL; 147 struct fsnotify_event_holder *holder = NULL;
175 struct list_head *list = &group->notification_list; 148 struct list_head *list = &group->notification_list;
176 struct fsnotify_event_holder *last_holder; 149
177 struct fsnotify_event *last_event; 150 pr_debug("%s: group=%p event=%p priv=%p\n", __func__, group, event, priv);
178 int ret = 0;
179 151
180 /* 152 /*
181 * There is one fsnotify_event_holder embedded inside each fsnotify_event. 153 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
@@ -189,18 +161,40 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even
189alloc_holder: 161alloc_holder:
190 holder = fsnotify_alloc_event_holder(); 162 holder = fsnotify_alloc_event_holder();
191 if (!holder) 163 if (!holder)
192 return -ENOMEM; 164 return ERR_PTR(-ENOMEM);
193 } 165 }
194 166
195 mutex_lock(&group->notification_mutex); 167 mutex_lock(&group->notification_mutex);
196 168
197 if (group->q_len >= group->max_events) { 169 if (group->q_len >= group->max_events) {
198 event = &q_overflow_event; 170 event = q_overflow_event;
199 ret = -EOVERFLOW; 171
172 /*
173 * we need to return the overflow event
174 * which means we need a ref
175 */
176 fsnotify_get_event(event);
177 return_event = event;
178
200 /* sorry, no private data on the overflow event */ 179 /* sorry, no private data on the overflow event */
201 priv = NULL; 180 priv = NULL;
202 } 181 }
203 182
183 if (!list_empty(list) && merge) {
184 struct fsnotify_event *tmp;
185
186 tmp = merge(list, event);
187 if (tmp) {
188 mutex_unlock(&group->notification_mutex);
189
190 if (return_event)
191 fsnotify_put_event(return_event);
192 if (holder != &event->holder)
193 fsnotify_destroy_event_holder(holder);
194 return tmp;
195 }
196 }
197
204 spin_lock(&event->lock); 198 spin_lock(&event->lock);
205 199
206 if (list_empty(&event->holder.event_list)) { 200 if (list_empty(&event->holder.event_list)) {
@@ -212,19 +206,13 @@ alloc_holder:
212 * event holder was used, go back and get a new one */ 206 * event holder was used, go back and get a new one */
213 spin_unlock(&event->lock); 207 spin_unlock(&event->lock);
214 mutex_unlock(&group->notification_mutex); 208 mutex_unlock(&group->notification_mutex);
215 goto alloc_holder;
216 }
217 209
218 if (!list_empty(list)) { 210 if (return_event) {
219 last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list); 211 fsnotify_put_event(return_event);
220 last_event = last_holder->event; 212 return_event = NULL;
221 if (event_compare(last_event, event)) {
222 spin_unlock(&event->lock);
223 mutex_unlock(&group->notification_mutex);
224 if (holder != &event->holder)
225 fsnotify_destroy_event_holder(holder);
226 return -EEXIST;
227 } 213 }
214
215 goto alloc_holder;
228 } 216 }
229 217
230 group->q_len++; 218 group->q_len++;
@@ -238,7 +226,7 @@ alloc_holder:
238 mutex_unlock(&group->notification_mutex); 226 mutex_unlock(&group->notification_mutex);
239 227
240 wake_up(&group->notification_waitq); 228 wake_up(&group->notification_waitq);
241 return ret; 229 return return_event;
242} 230}
243 231
244/* 232/*
@@ -253,6 +241,8 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
253 241
254 BUG_ON(!mutex_is_locked(&group->notification_mutex)); 242 BUG_ON(!mutex_is_locked(&group->notification_mutex));
255 243
244 pr_debug("%s: group=%p\n", __func__, group);
245
256 holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list); 246 holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
257 247
258 event = holder->event; 248 event = holder->event;
@@ -314,25 +304,82 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
314 304
315static void initialize_event(struct fsnotify_event *event) 305static void initialize_event(struct fsnotify_event *event)
316{ 306{
317 event->holder.event = NULL;
318 INIT_LIST_HEAD(&event->holder.event_list); 307 INIT_LIST_HEAD(&event->holder.event_list);
319 atomic_set(&event->refcnt, 1); 308 atomic_set(&event->refcnt, 1);
320 309
321 spin_lock_init(&event->lock); 310 spin_lock_init(&event->lock);
322 311
323 event->path.dentry = NULL;
324 event->path.mnt = NULL;
325 event->inode = NULL;
326 event->data_type = FSNOTIFY_EVENT_NONE;
327
328 INIT_LIST_HEAD(&event->private_data_list); 312 INIT_LIST_HEAD(&event->private_data_list);
313}
329 314
330 event->to_tell = NULL; 315/*
316 * Caller damn well better be holding whatever mutex is protecting the
317 * old_holder->event_list and the new_event must be a clean event which
318 * cannot be found anywhere else in the kernel.
319 */
320int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
321 struct fsnotify_event *new_event)
322{
323 struct fsnotify_event *old_event = old_holder->event;
324 struct fsnotify_event_holder *new_holder = &new_event->holder;
331 325
332 event->file_name = NULL; 326 enum event_spinlock_class {
333 event->name_len = 0; 327 SPINLOCK_OLD,
328 SPINLOCK_NEW,
329 };
334 330
335 event->sync_cookie = 0; 331 pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, new_event);
332
333 /*
334 * if the new_event's embedded holder is in use someone
335 * screwed up and didn't give us a clean new event.
336 */
337 BUG_ON(!list_empty(&new_holder->event_list));
338
339 spin_lock_nested(&old_event->lock, SPINLOCK_OLD);
340 spin_lock_nested(&new_event->lock, SPINLOCK_NEW);
341
342 new_holder->event = new_event;
343 list_replace_init(&old_holder->event_list, &new_holder->event_list);
344
345 spin_unlock(&new_event->lock);
346 spin_unlock(&old_event->lock);
347
348 /* event == holder means we are referenced through the in event holder */
349 if (old_holder != &old_event->holder)
350 fsnotify_destroy_event_holder(old_holder);
351
352 fsnotify_get_event(new_event); /* on the list take reference */
353 fsnotify_put_event(old_event); /* off the list, drop reference */
354
355 return 0;
356}
357
358struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
359{
360 struct fsnotify_event *event;
361
362 event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
363 if (!event)
364 return NULL;
365
366 pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, event);
367
368 memcpy(event, old_event, sizeof(*event));
369 initialize_event(event);
370
371 if (event->name_len) {
372 event->file_name = kstrdup(old_event->file_name, GFP_KERNEL);
373 if (!event->file_name) {
374 kmem_cache_free(fsnotify_event_cachep, event);
375 return NULL;
376 }
377 }
378 event->tgid = get_pid(old_event->tgid);
379 if (event->data_type == FSNOTIFY_EVENT_FILE)
380 get_file(event->file);
381
382 return event;
336} 383}
337 384
338/* 385/*
@@ -348,15 +395,18 @@ static void initialize_event(struct fsnotify_event *event)
348 * @name the filename, if available 395 * @name the filename, if available
349 */ 396 */
350struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data, 397struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
351 int data_type, const char *name, u32 cookie, 398 int data_type, const unsigned char *name,
352 gfp_t gfp) 399 u32 cookie, gfp_t gfp)
353{ 400{
354 struct fsnotify_event *event; 401 struct fsnotify_event *event;
355 402
356 event = kmem_cache_alloc(fsnotify_event_cachep, gfp); 403 event = kmem_cache_zalloc(fsnotify_event_cachep, gfp);
357 if (!event) 404 if (!event)
358 return NULL; 405 return NULL;
359 406
407 pr_debug("%s: event=%p to_tell=%p mask=%x data=%p data_type=%d\n",
408 __func__, event, to_tell, mask, data, data_type);
409
360 initialize_event(event); 410 initialize_event(event);
361 411
362 if (name) { 412 if (name) {
@@ -368,35 +418,36 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
368 event->name_len = strlen(event->file_name); 418 event->name_len = strlen(event->file_name);
369 } 419 }
370 420
421 event->tgid = get_pid(task_tgid(current));
371 event->sync_cookie = cookie; 422 event->sync_cookie = cookie;
372 event->to_tell = to_tell; 423 event->to_tell = to_tell;
424 event->data_type = data_type;
373 425
374 switch (data_type) { 426 switch (data_type) {
375 case FSNOTIFY_EVENT_FILE: { 427 case FSNOTIFY_EVENT_FILE: {
376 struct file *file = data; 428 event->file = data;
377 struct path *path = &file->f_path; 429 /*
378 event->path.dentry = path->dentry; 430 * if this file is about to disappear hold an extra reference
379 event->path.mnt = path->mnt; 431 * until we return to __fput so we don't have to worry about
380 path_get(&event->path); 432 * future get/put destroying the file under us or generating
381 event->data_type = FSNOTIFY_EVENT_PATH; 433 * additional events. Notice that we change f_mode without
382 break; 434 * holding f_lock. This is safe since this is the only possible
383 } 435 * reference to this object in the kernel (it was about to be
384 case FSNOTIFY_EVENT_PATH: { 436 * freed, remember?)
385 struct path *path = data; 437 */
386 event->path.dentry = path->dentry; 438 if (!atomic_long_read(&event->file->f_count)) {
387 event->path.mnt = path->mnt; 439 event->file->f_mode |= FMODE_NONOTIFY;
388 path_get(&event->path); 440 get_file(event->file);
389 event->data_type = FSNOTIFY_EVENT_PATH; 441 }
442 get_file(event->file);
390 break; 443 break;
391 } 444 }
392 case FSNOTIFY_EVENT_INODE: 445 case FSNOTIFY_EVENT_INODE:
393 event->inode = data; 446 event->inode = data;
394 event->data_type = FSNOTIFY_EVENT_INODE;
395 break; 447 break;
396 case FSNOTIFY_EVENT_NONE: 448 case FSNOTIFY_EVENT_NONE:
397 event->inode = NULL; 449 event->inode = NULL;
398 event->path.dentry = NULL; 450 event->file = NULL;
399 event->path.mnt = NULL;
400 break; 451 break;
401 default: 452 default:
402 BUG(); 453 BUG();
@@ -412,8 +463,11 @@ __init int fsnotify_notification_init(void)
412 fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC); 463 fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
413 fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC); 464 fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
414 465
415 initialize_event(&q_overflow_event); 466 q_overflow_event = fsnotify_create_event(NULL, FS_Q_OVERFLOW, NULL,
416 q_overflow_event.mask = FS_Q_OVERFLOW; 467 FSNOTIFY_EVENT_NONE, NULL, 0,
468 GFP_KERNEL);
469 if (!q_overflow_event)
470 panic("unable to allocate fsnotify q_overflow_event\n");
417 471
418 return 0; 472 return 0;
419} 473}
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
new file mode 100644
index 000000000000..56772b578fbd
--- /dev/null
+++ b/fs/notify/vfsmount_mark.c
@@ -0,0 +1,187 @@
1/*
2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; see the file COPYING. If not, write to
16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19#include <linux/fs.h>
20#include <linux/init.h>
21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/mount.h>
24#include <linux/mutex.h>
25#include <linux/spinlock.h>
26#include <linux/writeback.h> /* for inode_lock */
27
28#include <asm/atomic.h>
29
30#include <linux/fsnotify_backend.h>
31#include "fsnotify.h"
32
33void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
34{
35 struct fsnotify_mark *mark, *lmark;
36 struct hlist_node *pos, *n;
37 LIST_HEAD(free_list);
38
39 spin_lock(&mnt->mnt_root->d_lock);
40 hlist_for_each_entry_safe(mark, pos, n, &mnt->mnt_fsnotify_marks, m.m_list) {
41 list_add(&mark->m.free_m_list, &free_list);
42 hlist_del_init_rcu(&mark->m.m_list);
43 fsnotify_get_mark(mark);
44 }
45 spin_unlock(&mnt->mnt_root->d_lock);
46
47 list_for_each_entry_safe(mark, lmark, &free_list, m.free_m_list) {
48 fsnotify_destroy_mark(mark);
49 fsnotify_put_mark(mark);
50 }
51}
52
53void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
54{
55 fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_VFSMOUNT);
56}
57
58/*
59 * Recalculate the mask of events relevant to a given vfsmount locked.
60 */
61static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt)
62{
63 struct fsnotify_mark *mark;
64 struct hlist_node *pos;
65 __u32 new_mask = 0;
66
67 assert_spin_locked(&mnt->mnt_root->d_lock);
68
69 hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list)
70 new_mask |= mark->mask;
71 mnt->mnt_fsnotify_mask = new_mask;
72}
73
74/*
75 * Recalculate the mnt->mnt_fsnotify_mask, or the mask of all FS_* event types
76 * any notifier is interested in hearing for this mount point
77 */
78void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt)
79{
80 spin_lock(&mnt->mnt_root->d_lock);
81 fsnotify_recalc_vfsmount_mask_locked(mnt);
82 spin_unlock(&mnt->mnt_root->d_lock);
83}
84
85void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
86{
87 struct vfsmount *mnt = mark->m.mnt;
88
89 assert_spin_locked(&mark->lock);
90 assert_spin_locked(&mark->group->mark_lock);
91
92 spin_lock(&mnt->mnt_root->d_lock);
93
94 hlist_del_init_rcu(&mark->m.m_list);
95 mark->m.mnt = NULL;
96
97 fsnotify_recalc_vfsmount_mask_locked(mnt);
98
99 spin_unlock(&mnt->mnt_root->d_lock);
100}
101
102static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_group *group,
103 struct vfsmount *mnt)
104{
105 struct fsnotify_mark *mark;
106 struct hlist_node *pos;
107
108 assert_spin_locked(&mnt->mnt_root->d_lock);
109
110 hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list) {
111 if (mark->group == group) {
112 fsnotify_get_mark(mark);
113 return mark;
114 }
115 }
116 return NULL;
117}
118
119/*
120 * given a group and vfsmount, find the mark associated with that combination.
121 * if found take a reference to that mark and return it, else return NULL
122 */
123struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group,
124 struct vfsmount *mnt)
125{
126 struct fsnotify_mark *mark;
127
128 spin_lock(&mnt->mnt_root->d_lock);
129 mark = fsnotify_find_vfsmount_mark_locked(group, mnt);
130 spin_unlock(&mnt->mnt_root->d_lock);
131
132 return mark;
133}
134
135/*
136 * Attach an initialized mark to a given group and vfsmount.
137 * These marks may be used for the fsnotify backend to determine which
138 * event types should be delivered to which groups.
139 */
140int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
141 struct fsnotify_group *group, struct vfsmount *mnt,
142 int allow_dups)
143{
144 struct fsnotify_mark *lmark;
145 struct hlist_node *node, *last = NULL;
146 int ret = 0;
147
148 mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
149
150 assert_spin_locked(&mark->lock);
151 assert_spin_locked(&group->mark_lock);
152
153 spin_lock(&mnt->mnt_root->d_lock);
154
155 mark->m.mnt = mnt;
156
157 /* is mark the first mark? */
158 if (hlist_empty(&mnt->mnt_fsnotify_marks)) {
159 hlist_add_head_rcu(&mark->m.m_list, &mnt->mnt_fsnotify_marks);
160 goto out;
161 }
162
163 /* should mark be in the middle of the current list? */
164 hlist_for_each_entry(lmark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
165 last = node;
166
167 if ((lmark->group == group) && !allow_dups) {
168 ret = -EEXIST;
169 goto out;
170 }
171
172 if (mark->group < lmark->group)
173 continue;
174
175 hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
176 goto out;
177 }
178
179 BUG_ON(last == NULL);
180 /* mark should be the last entry. last is the current last entry */
181 hlist_add_after_rcu(last, &mark->m.m_list);
182out:
183 fsnotify_recalc_vfsmount_mask_locked(mnt);
184 spin_unlock(&mnt->mnt_root->d_lock);
185
186 return ret;
187}
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 4b57fb1eac2a..93622b175fc7 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2238,7 +2238,7 @@ void ntfs_clear_extent_inode(ntfs_inode *ni)
2238} 2238}
2239 2239
2240/** 2240/**
2241 * ntfs_clear_big_inode - clean up the ntfs specific part of an inode 2241 * ntfs_evict_big_inode - clean up the ntfs specific part of an inode
2242 * @vi: vfs inode pending annihilation 2242 * @vi: vfs inode pending annihilation
2243 * 2243 *
2244 * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode() 2244 * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
@@ -2247,10 +2247,13 @@ void ntfs_clear_extent_inode(ntfs_inode *ni)
2247 * 2247 *
2248 * If the MFT record is dirty, we commit it before doing anything else. 2248 * If the MFT record is dirty, we commit it before doing anything else.
2249 */ 2249 */
2250void ntfs_clear_big_inode(struct inode *vi) 2250void ntfs_evict_big_inode(struct inode *vi)
2251{ 2251{
2252 ntfs_inode *ni = NTFS_I(vi); 2252 ntfs_inode *ni = NTFS_I(vi);
2253 2253
2254 truncate_inode_pages(&vi->i_data, 0);
2255 end_writeback(vi);
2256
2254#ifdef NTFS_RW 2257#ifdef NTFS_RW
2255 if (NInoDirty(ni)) { 2258 if (NInoDirty(ni)) {
2256 bool was_bad = (is_bad_inode(vi)); 2259 bool was_bad = (is_bad_inode(vi));
@@ -2879,9 +2882,6 @@ void ntfs_truncate_vfs(struct inode *vi) {
2879 * 2882 *
2880 * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also 2883 * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also
2881 * called with ->i_alloc_sem held for writing. 2884 * called with ->i_alloc_sem held for writing.
2882 *
2883 * Basically this is a copy of generic notify_change() and inode_setattr()
2884 * functionality, except we intercept and abort changes in i_size.
2885 */ 2885 */
2886int ntfs_setattr(struct dentry *dentry, struct iattr *attr) 2886int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
2887{ 2887{
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 9a113544605d..2dabf813456c 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -279,7 +279,7 @@ extern struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
279 279
280extern struct inode *ntfs_alloc_big_inode(struct super_block *sb); 280extern struct inode *ntfs_alloc_big_inode(struct super_block *sb);
281extern void ntfs_destroy_big_inode(struct inode *inode); 281extern void ntfs_destroy_big_inode(struct inode *inode);
282extern void ntfs_clear_big_inode(struct inode *vi); 282extern void ntfs_evict_big_inode(struct inode *vi);
283 283
284extern void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni); 284extern void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni);
285 285
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 0de1db6cddbf..512806171bfa 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2700,7 +2700,7 @@ static const struct super_operations ntfs_sops = {
2700 .put_super = ntfs_put_super, /* Syscall: umount. */ 2700 .put_super = ntfs_put_super, /* Syscall: umount. */
2701 .statfs = ntfs_statfs, /* Syscall: statfs */ 2701 .statfs = ntfs_statfs, /* Syscall: statfs */
2702 .remount_fs = ntfs_remount, /* Syscall: mount -o remount. */ 2702 .remount_fs = ntfs_remount, /* Syscall: mount -o remount. */
2703 .clear_inode = ntfs_clear_big_inode, /* VFS: Called when an inode is 2703 .evict_inode = ntfs_evict_big_inode, /* VFS: Called when an inode is
2704 removed from memory. */ 2704 removed from memory. */
2705 //.umount_begin = NULL, /* Forced umount. */ 2705 //.umount_begin = NULL, /* Forced umount. */
2706 .show_options = ntfs_show_options, /* Show mount options in 2706 .show_options = ntfs_show_options, /* Show mount options in
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 96337a4fbbdf..0de69c9a08be 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -643,11 +643,10 @@ static ssize_t ocfs2_direct_IO(int rw,
643 if (i_size_read(inode) <= offset) 643 if (i_size_read(inode) <= offset)
644 return 0; 644 return 0;
645 645
646 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 646 ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
647 inode->i_sb->s_bdev, iov, offset, 647 iov, offset, nr_segs,
648 nr_segs, 648 ocfs2_direct_IO_get_blocks,
649 ocfs2_direct_IO_get_blocks, 649 ocfs2_dio_end_io, NULL, 0);
650 ocfs2_dio_end_io);
651 650
652 mlog_exit(ret); 651 mlog_exit(ret);
653 return ret; 652 return ret;
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index bef34d0528d5..c2903b84bb7a 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -213,10 +213,12 @@ static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr)
213 213
214 attr->ia_valid &= ~ATTR_SIZE; 214 attr->ia_valid &= ~ATTR_SIZE;
215 error = inode_change_ok(inode, attr); 215 error = inode_change_ok(inode, attr);
216 if (!error) 216 if (error)
217 error = inode_setattr(inode, attr); 217 return error;
218 218
219 return error; 219 setattr_copy(inode, attr);
220 mark_inode_dirty(inode);
221 return 0;
220} 222}
221 223
222static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait) 224static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait)
@@ -354,13 +356,12 @@ static void dlmfs_destroy_inode(struct inode *inode)
354 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); 356 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
355} 357}
356 358
357static void dlmfs_clear_inode(struct inode *inode) 359static void dlmfs_evict_inode(struct inode *inode)
358{ 360{
359 int status; 361 int status;
360 struct dlmfs_inode_private *ip; 362 struct dlmfs_inode_private *ip;
361 363
362 if (!inode) 364 end_writeback(inode);
363 return;
364 365
365 mlog(0, "inode %lu\n", inode->i_ino); 366 mlog(0, "inode %lu\n", inode->i_ino);
366 367
@@ -630,7 +631,7 @@ static const struct super_operations dlmfs_ops = {
630 .statfs = simple_statfs, 631 .statfs = simple_statfs,
631 .alloc_inode = dlmfs_alloc_inode, 632 .alloc_inode = dlmfs_alloc_inode,
632 .destroy_inode = dlmfs_destroy_inode, 633 .destroy_inode = dlmfs_destroy_inode,
633 .clear_inode = dlmfs_clear_inode, 634 .evict_inode = dlmfs_evict_inode,
634 .drop_inode = generic_delete_inode, 635 .drop_inode = generic_delete_inode,
635}; 636};
636 637
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 2b10b36d1577..81296b4e3646 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1233,18 +1233,26 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1233 } 1233 }
1234 1234
1235 /* 1235 /*
1236 * This will intentionally not wind up calling simple_setsize(), 1236 * This will intentionally not wind up calling truncate_setsize(),
1237 * since all the work for a size change has been done above. 1237 * since all the work for a size change has been done above.
1238 * Otherwise, we could get into problems with truncate as 1238 * Otherwise, we could get into problems with truncate as
1239 * ip_alloc_sem is used there to protect against i_size 1239 * ip_alloc_sem is used there to protect against i_size
1240 * changes. 1240 * changes.
1241 *
1242 * XXX: this means the conditional below can probably be removed.
1241 */ 1243 */
1242 status = inode_setattr(inode, attr); 1244 if ((attr->ia_valid & ATTR_SIZE) &&
1243 if (status < 0) { 1245 attr->ia_size != i_size_read(inode)) {
1244 mlog_errno(status); 1246 status = vmtruncate(inode, attr->ia_size);
1245 goto bail_commit; 1247 if (status) {
1248 mlog_errno(status);
1249 goto bail_commit;
1250 }
1246 } 1251 }
1247 1252
1253 setattr_copy(inode, attr);
1254 mark_inode_dirty(inode);
1255
1248 status = ocfs2_mark_inode_dirty(handle, inode, bh); 1256 status = ocfs2_mark_inode_dirty(handle, inode, bh);
1249 if (status < 0) 1257 if (status < 0)
1250 mlog_errno(status); 1258 mlog_errno(status);
@@ -2300,12 +2308,12 @@ relock:
2300 * blocks outside i_size. Trim these off again. 2308 * blocks outside i_size. Trim these off again.
2301 * Don't need i_size_read because we hold i_mutex. 2309 * Don't need i_size_read because we hold i_mutex.
2302 * 2310 *
2303 * XXX(hch): this looks buggy because ocfs2 did not 2311 * XXX(truncate): this looks buggy because ocfs2 did not
2304 * actually implement ->truncate. Take a look at 2312 * actually implement ->truncate. Take a look at
2305 * the new truncate sequence and update this accordingly 2313 * the new truncate sequence and update this accordingly
2306 */ 2314 */
2307 if (*ppos + count > inode->i_size) 2315 if (*ppos + count > inode->i_size)
2308 simple_setsize(inode, inode->i_size); 2316 truncate_setsize(inode, inode->i_size);
2309 ret = written; 2317 ret = written;
2310 goto out_dio; 2318 goto out_dio;
2311 } 2319 }
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index abb0a95cc717..0492464916b1 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -969,7 +969,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode,
969 truncate_inode_pages(&inode->i_data, 0); 969 truncate_inode_pages(&inode->i_data, 0);
970} 970}
971 971
972void ocfs2_delete_inode(struct inode *inode) 972static void ocfs2_delete_inode(struct inode *inode)
973{ 973{
974 int wipe, status; 974 int wipe, status;
975 sigset_t oldset; 975 sigset_t oldset;
@@ -1075,20 +1075,17 @@ bail_unlock_nfs_sync:
1075bail_unblock: 1075bail_unblock:
1076 ocfs2_unblock_signals(&oldset); 1076 ocfs2_unblock_signals(&oldset);
1077bail: 1077bail:
1078 clear_inode(inode);
1079 mlog_exit_void(); 1078 mlog_exit_void();
1080} 1079}
1081 1080
1082void ocfs2_clear_inode(struct inode *inode) 1081static void ocfs2_clear_inode(struct inode *inode)
1083{ 1082{
1084 int status; 1083 int status;
1085 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1084 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1086 1085
1087 mlog_entry_void(); 1086 mlog_entry_void();
1088 1087
1089 if (!inode) 1088 end_writeback(inode);
1090 goto bail;
1091
1092 mlog(0, "Clearing inode: %llu, nlink = %u\n", 1089 mlog(0, "Clearing inode: %llu, nlink = %u\n",
1093 (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink); 1090 (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink);
1094 1091
@@ -1180,16 +1177,27 @@ void ocfs2_clear_inode(struct inode *inode)
1180 jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, 1177 jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
1181 &oi->ip_jinode); 1178 &oi->ip_jinode);
1182 1179
1183bail:
1184 mlog_exit_void(); 1180 mlog_exit_void();
1185} 1181}
1186 1182
1183void ocfs2_evict_inode(struct inode *inode)
1184{
1185 if (!inode->i_nlink ||
1186 (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) {
1187 ocfs2_delete_inode(inode);
1188 } else {
1189 truncate_inode_pages(&inode->i_data, 0);
1190 }
1191 ocfs2_clear_inode(inode);
1192}
1193
1187/* Called under inode_lock, with no more references on the 1194/* Called under inode_lock, with no more references on the
1188 * struct inode, so it's safe here to check the flags field 1195 * struct inode, so it's safe here to check the flags field
1189 * and to manipulate i_nlink without any other locks. */ 1196 * and to manipulate i_nlink without any other locks. */
1190void ocfs2_drop_inode(struct inode *inode) 1197int ocfs2_drop_inode(struct inode *inode)
1191{ 1198{
1192 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1199 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1200 int res;
1193 1201
1194 mlog_entry_void(); 1202 mlog_entry_void();
1195 1203
@@ -1197,11 +1205,12 @@ void ocfs2_drop_inode(struct inode *inode)
1197 (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags); 1205 (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags);
1198 1206
1199 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) 1207 if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
1200 generic_delete_inode(inode); 1208 res = 1;
1201 else 1209 else
1202 generic_drop_inode(inode); 1210 res = generic_drop_inode(inode);
1203 1211
1204 mlog_exit_void(); 1212 mlog_exit_void();
1213 return res;
1205} 1214}
1206 1215
1207/* 1216/*
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 9f5f5fcadc45..6de5a869db30 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -123,9 +123,8 @@ static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode)
123 return &OCFS2_I(inode)->ip_metadata_cache; 123 return &OCFS2_I(inode)->ip_metadata_cache;
124} 124}
125 125
126void ocfs2_clear_inode(struct inode *inode); 126void ocfs2_evict_inode(struct inode *inode);
127void ocfs2_delete_inode(struct inode *inode); 127int ocfs2_drop_inode(struct inode *inode);
128void ocfs2_drop_inode(struct inode *inode);
129 128
130/* Flags for ocfs2_iget() */ 129/* Flags for ocfs2_iget() */
131#define OCFS2_FI_FLAG_SYSFILE 0x1 130#define OCFS2_FI_FLAG_SYSFILE 0x1
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 03a799fdd740..fa1be1b304d1 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -145,8 +145,7 @@ static const struct super_operations ocfs2_sops = {
145 .alloc_inode = ocfs2_alloc_inode, 145 .alloc_inode = ocfs2_alloc_inode,
146 .destroy_inode = ocfs2_destroy_inode, 146 .destroy_inode = ocfs2_destroy_inode,
147 .drop_inode = ocfs2_drop_inode, 147 .drop_inode = ocfs2_drop_inode,
148 .clear_inode = ocfs2_clear_inode, 148 .evict_inode = ocfs2_evict_inode,
149 .delete_inode = ocfs2_delete_inode,
150 .sync_fs = ocfs2_sync_fs, 149 .sync_fs = ocfs2_sync_fs,
151 .put_super = ocfs2_put_super, 150 .put_super = ocfs2_put_super,
152 .remount_fs = ocfs2_remount, 151 .remount_fs = ocfs2_remount,
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index b42d62419034..393f3f659da7 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -25,11 +25,10 @@ static struct buffer_head *omfs_get_bucket(struct inode *dir,
25 const char *name, int namelen, int *ofs) 25 const char *name, int namelen, int *ofs)
26{ 26{
27 int nbuckets = (dir->i_size - OMFS_DIR_START)/8; 27 int nbuckets = (dir->i_size - OMFS_DIR_START)/8;
28 int block = clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino);
29 int bucket = omfs_hash(name, namelen, nbuckets); 28 int bucket = omfs_hash(name, namelen, nbuckets);
30 29
31 *ofs = OMFS_DIR_START + bucket * 8; 30 *ofs = OMFS_DIR_START + bucket * 8;
32 return sb_bread(dir->i_sb, block); 31 return omfs_bread(dir->i_sb, dir->i_ino);
33} 32}
34 33
35static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block, 34static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block,
@@ -42,8 +41,7 @@ static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block,
42 *prev_block = ~0; 41 *prev_block = ~0;
43 42
44 while (block != ~0) { 43 while (block != ~0) {
45 bh = sb_bread(dir->i_sb, 44 bh = omfs_bread(dir->i_sb, block);
46 clus_to_blk(OMFS_SB(dir->i_sb), block));
47 if (!bh) { 45 if (!bh) {
48 err = -EIO; 46 err = -EIO;
49 goto err; 47 goto err;
@@ -86,11 +84,10 @@ static struct buffer_head *omfs_find_entry(struct inode *dir,
86int omfs_make_empty(struct inode *inode, struct super_block *sb) 84int omfs_make_empty(struct inode *inode, struct super_block *sb)
87{ 85{
88 struct omfs_sb_info *sbi = OMFS_SB(sb); 86 struct omfs_sb_info *sbi = OMFS_SB(sb);
89 int block = clus_to_blk(sbi, inode->i_ino);
90 struct buffer_head *bh; 87 struct buffer_head *bh;
91 struct omfs_inode *oi; 88 struct omfs_inode *oi;
92 89
93 bh = sb_bread(sb, block); 90 bh = omfs_bread(sb, inode->i_ino);
94 if (!bh) 91 if (!bh)
95 return -ENOMEM; 92 return -ENOMEM;
96 93
@@ -134,7 +131,7 @@ static int omfs_add_link(struct dentry *dentry, struct inode *inode)
134 brelse(bh); 131 brelse(bh);
135 132
136 /* now set the sibling and parent pointers on the new inode */ 133 /* now set the sibling and parent pointers on the new inode */
137 bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), inode->i_ino)); 134 bh = omfs_bread(dir->i_sb, inode->i_ino);
138 if (!bh) 135 if (!bh)
139 goto out; 136 goto out;
140 137
@@ -190,8 +187,7 @@ static int omfs_delete_entry(struct dentry *dentry)
190 if (prev != ~0) { 187 if (prev != ~0) {
191 /* found in middle of list, get list ptr */ 188 /* found in middle of list, get list ptr */
192 brelse(bh); 189 brelse(bh);
193 bh = sb_bread(dir->i_sb, 190 bh = omfs_bread(dir->i_sb, prev);
194 clus_to_blk(OMFS_SB(dir->i_sb), prev));
195 if (!bh) 191 if (!bh)
196 goto out; 192 goto out;
197 193
@@ -224,8 +220,7 @@ static int omfs_dir_is_empty(struct inode *inode)
224 u64 *ptr; 220 u64 *ptr;
225 int i; 221 int i;
226 222
227 bh = sb_bread(inode->i_sb, clus_to_blk(OMFS_SB(inode->i_sb), 223 bh = omfs_bread(inode->i_sb, inode->i_ino);
228 inode->i_ino));
229 224
230 if (!bh) 225 if (!bh)
231 return 0; 226 return 0;
@@ -353,8 +348,7 @@ static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir,
353 348
354 /* follow chain in this bucket */ 349 /* follow chain in this bucket */
355 while (fsblock != ~0) { 350 while (fsblock != ~0) {
356 bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), 351 bh = omfs_bread(dir->i_sb, fsblock);
357 fsblock));
358 if (!bh) 352 if (!bh)
359 goto out; 353 goto out;
360 354
@@ -466,7 +460,7 @@ static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
466 hchain = (filp->f_pos >> 20) - 1; 460 hchain = (filp->f_pos >> 20) - 1;
467 hindex = filp->f_pos & 0xfffff; 461 hindex = filp->f_pos & 0xfffff;
468 462
469 bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino)); 463 bh = omfs_bread(dir->i_sb, dir->i_ino);
470 if (!bh) 464 if (!bh)
471 goto out; 465 goto out;
472 466
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 6e7a3291bbe8..8a6d34fa668a 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -50,7 +50,7 @@ int omfs_shrink_inode(struct inode *inode)
50 if (inode->i_size != 0) 50 if (inode->i_size != 0)
51 goto out; 51 goto out;
52 52
53 bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next)); 53 bh = omfs_bread(inode->i_sb, next);
54 if (!bh) 54 if (!bh)
55 goto out; 55 goto out;
56 56
@@ -90,7 +90,7 @@ int omfs_shrink_inode(struct inode *inode)
90 if (next == ~0) 90 if (next == ~0)
91 break; 91 break;
92 92
93 bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next)); 93 bh = omfs_bread(inode->i_sb, next);
94 if (!bh) 94 if (!bh)
95 goto out; 95 goto out;
96 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); 96 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
@@ -222,7 +222,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
222 struct buffer_head *bh; 222 struct buffer_head *bh;
223 sector_t next, offset; 223 sector_t next, offset;
224 int ret; 224 int ret;
225 u64 new_block; 225 u64 uninitialized_var(new_block);
226 u32 max_extents; 226 u32 max_extents;
227 int extent_count; 227 int extent_count;
228 struct omfs_extent *oe; 228 struct omfs_extent *oe;
@@ -232,7 +232,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
232 int remain; 232 int remain;
233 233
234 ret = -EIO; 234 ret = -EIO;
235 bh = sb_bread(inode->i_sb, clus_to_blk(sbi, inode->i_ino)); 235 bh = omfs_bread(inode->i_sb, inode->i_ino);
236 if (!bh) 236 if (!bh)
237 goto out; 237 goto out;
238 238
@@ -265,7 +265,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
265 break; 265 break;
266 266
267 brelse(bh); 267 brelse(bh);
268 bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next)); 268 bh = omfs_bread(inode->i_sb, next);
269 if (!bh) 269 if (!bh)
270 goto out; 270 goto out;
271 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); 271 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
@@ -312,9 +312,17 @@ static int omfs_write_begin(struct file *file, struct address_space *mapping,
312 loff_t pos, unsigned len, unsigned flags, 312 loff_t pos, unsigned len, unsigned flags,
313 struct page **pagep, void **fsdata) 313 struct page **pagep, void **fsdata)
314{ 314{
315 *pagep = NULL; 315 int ret;
316 return block_write_begin(file, mapping, pos, len, flags, 316
317 pagep, fsdata, omfs_get_block); 317 ret = block_write_begin(mapping, pos, len, flags, pagep,
318 omfs_get_block);
319 if (unlikely(ret)) {
320 loff_t isize = mapping->host->i_size;
321 if (pos + len > isize)
322 vmtruncate(mapping->host, isize);
323 }
324
325 return ret;
318} 326}
319 327
320static sector_t omfs_bmap(struct address_space *mapping, sector_t block) 328static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
@@ -333,7 +341,29 @@ const struct file_operations omfs_file_operations = {
333 .splice_read = generic_file_splice_read, 341 .splice_read = generic_file_splice_read,
334}; 342};
335 343
344static int omfs_setattr(struct dentry *dentry, struct iattr *attr)
345{
346 struct inode *inode = dentry->d_inode;
347 int error;
348
349 error = inode_change_ok(inode, attr);
350 if (error)
351 return error;
352
353 if ((attr->ia_valid & ATTR_SIZE) &&
354 attr->ia_size != i_size_read(inode)) {
355 error = vmtruncate(inode, attr->ia_size);
356 if (error)
357 return error;
358 }
359
360 setattr_copy(inode, attr);
361 mark_inode_dirty(inode);
362 return 0;
363}
364
336const struct inode_operations omfs_file_inops = { 365const struct inode_operations omfs_file_inops = {
366 .setattr = omfs_setattr,
337 .truncate = omfs_truncate 367 .truncate = omfs_truncate
338}; 368};
339 369
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 089839a6cc64..14a22863291a 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -19,6 +19,15 @@ MODULE_AUTHOR("Bob Copeland <me@bobcopeland.com>");
19MODULE_DESCRIPTION("OMFS (ReplayTV/Karma) Filesystem for Linux"); 19MODULE_DESCRIPTION("OMFS (ReplayTV/Karma) Filesystem for Linux");
20MODULE_LICENSE("GPL"); 20MODULE_LICENSE("GPL");
21 21
22struct buffer_head *omfs_bread(struct super_block *sb, sector_t block)
23{
24 struct omfs_sb_info *sbi = OMFS_SB(sb);
25 if (block >= sbi->s_num_blocks)
26 return NULL;
27
28 return sb_bread(sb, clus_to_blk(sbi, block));
29}
30
22struct inode *omfs_new_inode(struct inode *dir, int mode) 31struct inode *omfs_new_inode(struct inode *dir, int mode)
23{ 32{
24 struct inode *inode; 33 struct inode *inode;
@@ -93,15 +102,13 @@ static int __omfs_write_inode(struct inode *inode, int wait)
93 struct omfs_inode *oi; 102 struct omfs_inode *oi;
94 struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb); 103 struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
95 struct buffer_head *bh, *bh2; 104 struct buffer_head *bh, *bh2;
96 unsigned int block;
97 u64 ctime; 105 u64 ctime;
98 int i; 106 int i;
99 int ret = -EIO; 107 int ret = -EIO;
100 int sync_failed = 0; 108 int sync_failed = 0;
101 109
102 /* get current inode since we may have written sibling ptrs etc. */ 110 /* get current inode since we may have written sibling ptrs etc. */
103 block = clus_to_blk(sbi, inode->i_ino); 111 bh = omfs_bread(inode->i_sb, inode->i_ino);
104 bh = sb_bread(inode->i_sb, block);
105 if (!bh) 112 if (!bh)
106 goto out; 113 goto out;
107 114
@@ -140,8 +147,7 @@ static int __omfs_write_inode(struct inode *inode, int wait)
140 147
141 /* if mirroring writes, copy to next fsblock */ 148 /* if mirroring writes, copy to next fsblock */
142 for (i = 1; i < sbi->s_mirrors; i++) { 149 for (i = 1; i < sbi->s_mirrors; i++) {
143 bh2 = sb_bread(inode->i_sb, block + i * 150 bh2 = omfs_bread(inode->i_sb, inode->i_ino + i);
144 (sbi->s_blocksize / sbi->s_sys_blocksize));
145 if (!bh2) 151 if (!bh2)
146 goto out_brelse; 152 goto out_brelse;
147 153
@@ -175,9 +181,13 @@ int omfs_sync_inode(struct inode *inode)
175 * called when an entry is deleted, need to clear the bits in the 181 * called when an entry is deleted, need to clear the bits in the
176 * bitmaps. 182 * bitmaps.
177 */ 183 */
178static void omfs_delete_inode(struct inode *inode) 184static void omfs_evict_inode(struct inode *inode)
179{ 185{
180 truncate_inode_pages(&inode->i_data, 0); 186 truncate_inode_pages(&inode->i_data, 0);
187 end_writeback(inode);
188
189 if (inode->i_nlink)
190 return;
181 191
182 if (S_ISREG(inode->i_mode)) { 192 if (S_ISREG(inode->i_mode)) {
183 inode->i_size = 0; 193 inode->i_size = 0;
@@ -185,7 +195,6 @@ static void omfs_delete_inode(struct inode *inode)
185 } 195 }
186 196
187 omfs_clear_range(inode->i_sb, inode->i_ino, 2); 197 omfs_clear_range(inode->i_sb, inode->i_ino, 2);
188 clear_inode(inode);
189} 198}
190 199
191struct inode *omfs_iget(struct super_block *sb, ino_t ino) 200struct inode *omfs_iget(struct super_block *sb, ino_t ino)
@@ -193,7 +202,6 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino)
193 struct omfs_sb_info *sbi = OMFS_SB(sb); 202 struct omfs_sb_info *sbi = OMFS_SB(sb);
194 struct omfs_inode *oi; 203 struct omfs_inode *oi;
195 struct buffer_head *bh; 204 struct buffer_head *bh;
196 unsigned int block;
197 u64 ctime; 205 u64 ctime;
198 unsigned long nsecs; 206 unsigned long nsecs;
199 struct inode *inode; 207 struct inode *inode;
@@ -204,8 +212,7 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino)
204 if (!(inode->i_state & I_NEW)) 212 if (!(inode->i_state & I_NEW))
205 return inode; 213 return inode;
206 214
207 block = clus_to_blk(sbi, ino); 215 bh = omfs_bread(inode->i_sb, ino);
208 bh = sb_bread(inode->i_sb, block);
209 if (!bh) 216 if (!bh)
210 goto iget_failed; 217 goto iget_failed;
211 218
@@ -284,7 +291,7 @@ static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf)
284 291
285static const struct super_operations omfs_sops = { 292static const struct super_operations omfs_sops = {
286 .write_inode = omfs_write_inode, 293 .write_inode = omfs_write_inode,
287 .delete_inode = omfs_delete_inode, 294 .evict_inode = omfs_evict_inode,
288 .put_super = omfs_put_super, 295 .put_super = omfs_put_super,
289 .statfs = omfs_statfs, 296 .statfs = omfs_statfs,
290 .show_options = generic_show_options, 297 .show_options = generic_show_options,
@@ -319,6 +326,9 @@ static int omfs_get_imap(struct super_block *sb)
319 goto nomem; 326 goto nomem;
320 327
321 block = clus_to_blk(sbi, sbi->s_bitmap_ino); 328 block = clus_to_blk(sbi, sbi->s_bitmap_ino);
329 if (block >= sbi->s_num_blocks)
330 goto nomem;
331
322 ptr = sbi->s_imap; 332 ptr = sbi->s_imap;
323 for (count = bitmap_size; count > 0; count -= sb->s_blocksize) { 333 for (count = bitmap_size; count > 0; count -= sb->s_blocksize) {
324 bh = sb_bread(sb, block++); 334 bh = sb_bread(sb, block++);
@@ -417,7 +427,6 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
417 struct omfs_root_block *omfs_rb; 427 struct omfs_root_block *omfs_rb;
418 struct omfs_sb_info *sbi; 428 struct omfs_sb_info *sbi;
419 struct inode *root; 429 struct inode *root;
420 sector_t start;
421 int ret = -EINVAL; 430 int ret = -EINVAL;
422 431
423 save_mount_options(sb, (char *) data); 432 save_mount_options(sb, (char *) data);
@@ -486,8 +495,7 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
486 sbi->s_block_shift = get_bitmask_order(sbi->s_blocksize) - 495 sbi->s_block_shift = get_bitmask_order(sbi->s_blocksize) -
487 get_bitmask_order(sbi->s_sys_blocksize); 496 get_bitmask_order(sbi->s_sys_blocksize);
488 497
489 start = clus_to_blk(sbi, be64_to_cpu(omfs_sb->s_root_block)); 498 bh2 = omfs_bread(sb, be64_to_cpu(omfs_sb->s_root_block));
490 bh2 = sb_bread(sb, start);
491 if (!bh2) 499 if (!bh2)
492 goto out_brelse_bh; 500 goto out_brelse_bh;
493 501
@@ -504,6 +512,21 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
504 goto out_brelse_bh2; 512 goto out_brelse_bh2;
505 } 513 }
506 514
515 if (sbi->s_bitmap_ino != ~0ULL &&
516 sbi->s_bitmap_ino > sbi->s_num_blocks) {
517 printk(KERN_ERR "omfs: free space bitmap location is corrupt "
518 "(%llx, total blocks %llx)\n",
519 (unsigned long long) sbi->s_bitmap_ino,
520 (unsigned long long) sbi->s_num_blocks);
521 goto out_brelse_bh2;
522 }
523 if (sbi->s_clustersize < 1 ||
524 sbi->s_clustersize > OMFS_MAX_CLUSTER_SIZE) {
525 printk(KERN_ERR "omfs: cluster size out of range (%d)",
526 sbi->s_clustersize);
527 goto out_brelse_bh2;
528 }
529
507 ret = omfs_get_imap(sb); 530 ret = omfs_get_imap(sb);
508 if (ret) 531 if (ret)
509 goto out_brelse_bh2; 532 goto out_brelse_bh2;
@@ -529,6 +552,8 @@ out_brelse_bh2:
529out_brelse_bh: 552out_brelse_bh:
530 brelse(bh); 553 brelse(bh);
531end: 554end:
555 if (ret)
556 kfree(sbi);
532 return ret; 557 return ret;
533} 558}
534 559
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
index ebe2fdbe535e..7d414fef501a 100644
--- a/fs/omfs/omfs.h
+++ b/fs/omfs/omfs.h
@@ -58,6 +58,7 @@ extern void omfs_make_empty_table(struct buffer_head *bh, int offset);
58extern int omfs_shrink_inode(struct inode *inode); 58extern int omfs_shrink_inode(struct inode *inode);
59 59
60/* inode.c */ 60/* inode.c */
61extern struct buffer_head *omfs_bread(struct super_block *sb, sector_t block);
61extern struct inode *omfs_iget(struct super_block *sb, ino_t inode); 62extern struct inode *omfs_iget(struct super_block *sb, ino_t inode);
62extern struct inode *omfs_new_inode(struct inode *dir, int mode); 63extern struct inode *omfs_new_inode(struct inode *dir, int mode);
63extern int omfs_reserve_block(struct super_block *sb, sector_t block); 64extern int omfs_reserve_block(struct super_block *sb, sector_t block);
diff --git a/fs/omfs/omfs_fs.h b/fs/omfs/omfs_fs.h
index 12cca245d6e8..ee5e4327de92 100644
--- a/fs/omfs/omfs_fs.h
+++ b/fs/omfs/omfs_fs.h
@@ -17,6 +17,7 @@
17#define OMFS_EXTENT_CONT 0x40 17#define OMFS_EXTENT_CONT 0x40
18#define OMFS_XOR_COUNT 19 18#define OMFS_XOR_COUNT 19
19#define OMFS_MAX_BLOCK_SIZE 8192 19#define OMFS_MAX_BLOCK_SIZE 8192
20#define OMFS_MAX_CLUSTER_SIZE 8
20 21
21struct omfs_super_block { 22struct omfs_super_block {
22 char s_fill1[256]; 23 char s_fill1[256];
diff --git a/fs/open.c b/fs/open.c
index 0d1fa3dc0efb..b715d06fbe36 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -29,6 +29,7 @@
29#include <linux/falloc.h> 29#include <linux/falloc.h>
30#include <linux/fs_struct.h> 30#include <linux/fs_struct.h>
31#include <linux/ima.h> 31#include <linux/ima.h>
32#include <linux/dnotify.h>
32 33
33#include "internal.h" 34#include "internal.h"
34 35
@@ -887,7 +888,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
887 put_unused_fd(fd); 888 put_unused_fd(fd);
888 fd = PTR_ERR(f); 889 fd = PTR_ERR(f);
889 } else { 890 } else {
890 fsnotify_open(f->f_path.dentry); 891 fsnotify_open(f);
891 fd_install(fd, f); 892 fd_install(fd, f);
892 } 893 }
893 } 894 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 69254a365ce2..c806dfb24e08 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -559,9 +559,19 @@ static int proc_setattr(struct dentry *dentry, struct iattr *attr)
559 return -EPERM; 559 return -EPERM;
560 560
561 error = inode_change_ok(inode, attr); 561 error = inode_change_ok(inode, attr);
562 if (!error) 562 if (error)
563 error = inode_setattr(inode, attr); 563 return error;
564 return error; 564
565 if ((attr->ia_valid & ATTR_SIZE) &&
566 attr->ia_size != i_size_read(inode)) {
567 error = vmtruncate(inode, attr->ia_size);
568 if (error)
569 return error;
570 }
571
572 setattr_copy(inode, attr);
573 mark_inode_dirty(inode);
574 return 0;
565} 575}
566 576
567static const struct inode_operations proc_def_inode_operations = { 577static const struct inode_operations proc_def_inode_operations = {
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 2791907744ed..dd29f0337661 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -12,6 +12,7 @@
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
14#include <linux/stat.h> 14#include <linux/stat.h>
15#include <linux/mm.h>
15#include <linux/module.h> 16#include <linux/module.h>
16#include <linux/slab.h> 17#include <linux/slab.h>
17#include <linux/mount.h> 18#include <linux/mount.h>
@@ -258,17 +259,22 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
258 259
259 error = inode_change_ok(inode, iattr); 260 error = inode_change_ok(inode, iattr);
260 if (error) 261 if (error)
261 goto out; 262 return error;
262 263
263 error = inode_setattr(inode, iattr); 264 if ((iattr->ia_valid & ATTR_SIZE) &&
264 if (error) 265 iattr->ia_size != i_size_read(inode)) {
265 goto out; 266 error = vmtruncate(inode, iattr->ia_size);
267 if (error)
268 return error;
269 }
270
271 setattr_copy(inode, iattr);
272 mark_inode_dirty(inode);
266 273
267 de->uid = inode->i_uid; 274 de->uid = inode->i_uid;
268 de->gid = inode->i_gid; 275 de->gid = inode->i_gid;
269 de->mode = inode->i_mode; 276 de->mode = inode->i_mode;
270out: 277 return 0;
271 return error;
272} 278}
273 279
274static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, 280static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index aea8502e58a3..23561cda7245 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -25,11 +25,12 @@
25 25
26#include "internal.h" 26#include "internal.h"
27 27
28static void proc_delete_inode(struct inode *inode) 28static void proc_evict_inode(struct inode *inode)
29{ 29{
30 struct proc_dir_entry *de; 30 struct proc_dir_entry *de;
31 31
32 truncate_inode_pages(&inode->i_data, 0); 32 truncate_inode_pages(&inode->i_data, 0);
33 end_writeback(inode);
33 34
34 /* Stop tracking associated processes */ 35 /* Stop tracking associated processes */
35 put_pid(PROC_I(inode)->pid); 36 put_pid(PROC_I(inode)->pid);
@@ -40,7 +41,6 @@ static void proc_delete_inode(struct inode *inode)
40 pde_put(de); 41 pde_put(de);
41 if (PROC_I(inode)->sysctl) 42 if (PROC_I(inode)->sysctl)
42 sysctl_head_put(PROC_I(inode)->sysctl); 43 sysctl_head_put(PROC_I(inode)->sysctl);
43 clear_inode(inode);
44} 44}
45 45
46struct vfsmount *proc_mnt; 46struct vfsmount *proc_mnt;
@@ -91,7 +91,7 @@ static const struct super_operations proc_sops = {
91 .alloc_inode = proc_alloc_inode, 91 .alloc_inode = proc_alloc_inode,
92 .destroy_inode = proc_destroy_inode, 92 .destroy_inode = proc_destroy_inode,
93 .drop_inode = generic_delete_inode, 93 .drop_inode = generic_delete_inode,
94 .delete_inode = proc_delete_inode, 94 .evict_inode = proc_evict_inode,
95 .statfs = simple_statfs, 95 .statfs = simple_statfs,
96}; 96};
97 97
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 6ff9981f0a18..5be436ea088e 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -329,10 +329,19 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
329 return -EPERM; 329 return -EPERM;
330 330
331 error = inode_change_ok(inode, attr); 331 error = inode_change_ok(inode, attr);
332 if (!error) 332 if (error)
333 error = inode_setattr(inode, attr); 333 return error;
334
335 if ((attr->ia_valid & ATTR_SIZE) &&
336 attr->ia_size != i_size_read(inode)) {
337 error = vmtruncate(inode, attr->ia_size);
338 if (error)
339 return error;
340 }
334 341
335 return error; 342 setattr_copy(inode, attr);
343 mark_inode_dirty(inode);
344 return 0;
336} 345}
337 346
338static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 347static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 277575ddc05c..16829722be93 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -320,10 +320,19 @@ static int qnx4_write_begin(struct file *file, struct address_space *mapping,
320 struct page **pagep, void **fsdata) 320 struct page **pagep, void **fsdata)
321{ 321{
322 struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host); 322 struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host);
323 int ret;
324
323 *pagep = NULL; 325 *pagep = NULL;
324 return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 326 ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
325 qnx4_get_block, 327 qnx4_get_block,
326 &qnx4_inode->mmu_private); 328 &qnx4_inode->mmu_private);
329 if (unlikely(ret)) {
330 loff_t isize = mapping->host->i_size;
331 if (pos + len > isize)
332 vmtruncate(mapping->host, isize);
333 }
334
335 return ret;
327} 336}
328static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) 337static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
329{ 338{
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ef72b1699429..aad1316a977f 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -898,7 +898,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
898 898
899 spin_lock(&inode_lock); 899 spin_lock(&inode_lock);
900 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 900 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
901 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 901 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
902 continue; 902 continue;
903#ifdef CONFIG_QUOTA_DEBUG 903#ifdef CONFIG_QUOTA_DEBUG
904 if (unlikely(inode_get_rsv_space(inode) > 0)) 904 if (unlikely(inode_get_rsv_space(inode) > 0))
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index d532c20fc179..9eead2c796b7 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -146,9 +146,8 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
146 return ret; 146 return ret;
147 } 147 }
148 148
149 ret = simple_setsize(inode, newsize); 149 truncate_setsize(inode, newsize);
150 150 return 0;
151 return ret;
152} 151}
153 152
154/*****************************************************************************/ 153/*****************************************************************************/
@@ -183,7 +182,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
183 } 182 }
184 } 183 }
185 184
186 generic_setattr(inode, ia); 185 setattr_copy(inode, ia);
187 out: 186 out:
188 ia->ia_valid = old_ia_valid; 187 ia->ia_valid = old_ia_valid;
189 return ret; 188 return ret;
diff --git a/fs/read_write.c b/fs/read_write.c
index 9c0485236e68..74e36586e4d3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -311,7 +311,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
311 else 311 else
312 ret = do_sync_read(file, buf, count, pos); 312 ret = do_sync_read(file, buf, count, pos);
313 if (ret > 0) { 313 if (ret > 0) {
314 fsnotify_access(file->f_path.dentry); 314 fsnotify_access(file);
315 add_rchar(current, ret); 315 add_rchar(current, ret);
316 } 316 }
317 inc_syscr(current); 317 inc_syscr(current);
@@ -367,7 +367,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
367 else 367 else
368 ret = do_sync_write(file, buf, count, pos); 368 ret = do_sync_write(file, buf, count, pos);
369 if (ret > 0) { 369 if (ret > 0) {
370 fsnotify_modify(file->f_path.dentry); 370 fsnotify_modify(file);
371 add_wchar(current, ret); 371 add_wchar(current, ret);
372 } 372 }
373 inc_syscw(current); 373 inc_syscw(current);
@@ -675,9 +675,9 @@ out:
675 kfree(iov); 675 kfree(iov);
676 if ((ret + (type == READ)) > 0) { 676 if ((ret + (type == READ)) > 0) {
677 if (type == READ) 677 if (type == READ)
678 fsnotify_access(file->f_path.dentry); 678 fsnotify_access(file);
679 else 679 else
680 fsnotify_modify(file->f_path.dentry); 680 fsnotify_modify(file);
681 } 681 }
682 return ret; 682 return ret;
683} 683}
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index b82cdd8a45dd..6846371498b6 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -38,20 +38,24 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
38 38
39 BUG_ON(!S_ISREG(inode->i_mode)); 39 BUG_ON(!S_ISREG(inode->i_mode));
40 40
41 if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1))
42 return 0;
43
44 mutex_lock(&(REISERFS_I(inode)->tailpack));
45
46 if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) {
47 mutex_unlock(&(REISERFS_I(inode)->tailpack));
48 return 0;
49 }
50
41 /* fast out for when nothing needs to be done */ 51 /* fast out for when nothing needs to be done */
42 if ((atomic_read(&inode->i_count) > 1 || 52 if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
43 !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
44 !tail_has_to_be_packed(inode)) && 53 !tail_has_to_be_packed(inode)) &&
45 REISERFS_I(inode)->i_prealloc_count <= 0) { 54 REISERFS_I(inode)->i_prealloc_count <= 0) {
55 mutex_unlock(&(REISERFS_I(inode)->tailpack));
46 return 0; 56 return 0;
47 } 57 }
48 58
49 mutex_lock(&inode->i_mutex);
50
51 mutex_lock(&(REISERFS_I(inode)->i_mmap));
52 if (REISERFS_I(inode)->i_flags & i_ever_mapped)
53 REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
54
55 reiserfs_write_lock(inode->i_sb); 59 reiserfs_write_lock(inode->i_sb);
56 /* freeing preallocation only involves relogging blocks that 60 /* freeing preallocation only involves relogging blocks that
57 * are already in the current transaction. preallocation gets 61 * are already in the current transaction. preallocation gets
@@ -94,9 +98,10 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
94 if (!err) 98 if (!err)
95 err = jbegin_failure; 99 err = jbegin_failure;
96 100
97 if (!err && atomic_read(&inode->i_count) <= 1 && 101 if (!err &&
98 (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) && 102 (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
99 tail_has_to_be_packed(inode)) { 103 tail_has_to_be_packed(inode)) {
104
100 /* if regular file is released by last holder and it has been 105 /* if regular file is released by last holder and it has been
101 appended (we append by unformatted node only) or its direct 106 appended (we append by unformatted node only) or its direct
102 item(s) had to be converted, then it may have to be 107 item(s) had to be converted, then it may have to be
@@ -104,27 +109,28 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
104 err = reiserfs_truncate_file(inode, 0); 109 err = reiserfs_truncate_file(inode, 0);
105 } 110 }
106 out: 111 out:
107 mutex_unlock(&(REISERFS_I(inode)->i_mmap));
108 mutex_unlock(&inode->i_mutex);
109 reiserfs_write_unlock(inode->i_sb); 112 reiserfs_write_unlock(inode->i_sb);
113 mutex_unlock(&(REISERFS_I(inode)->tailpack));
110 return err; 114 return err;
111} 115}
112 116
113static int reiserfs_file_mmap(struct file *file, struct vm_area_struct *vma) 117static int reiserfs_file_open(struct inode *inode, struct file *file)
114{ 118{
115 struct inode *inode; 119 int err = dquot_file_open(inode, file);
116 120 if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) {
117 inode = file->f_path.dentry->d_inode; 121 /* somebody might be tailpacking on final close; wait for it */
118 mutex_lock(&(REISERFS_I(inode)->i_mmap)); 122 mutex_lock(&(REISERFS_I(inode)->tailpack));
119 REISERFS_I(inode)->i_flags |= i_ever_mapped; 123 atomic_inc(&REISERFS_I(inode)->openers);
120 mutex_unlock(&(REISERFS_I(inode)->i_mmap)); 124 mutex_unlock(&(REISERFS_I(inode)->tailpack));
121 125 }
122 return generic_file_mmap(file, vma); 126 return err;
123} 127}
124 128
125static void reiserfs_vfs_truncate_file(struct inode *inode) 129static void reiserfs_vfs_truncate_file(struct inode *inode)
126{ 130{
131 mutex_lock(&(REISERFS_I(inode)->tailpack));
127 reiserfs_truncate_file(inode, 1); 132 reiserfs_truncate_file(inode, 1);
133 mutex_unlock(&(REISERFS_I(inode)->tailpack));
128} 134}
129 135
130/* Sync a reiserfs file. */ 136/* Sync a reiserfs file. */
@@ -288,8 +294,8 @@ const struct file_operations reiserfs_file_operations = {
288#ifdef CONFIG_COMPAT 294#ifdef CONFIG_COMPAT
289 .compat_ioctl = reiserfs_compat_ioctl, 295 .compat_ioctl = reiserfs_compat_ioctl,
290#endif 296#endif
291 .mmap = reiserfs_file_mmap, 297 .mmap = generic_file_mmap,
292 .open = dquot_file_open, 298 .open = reiserfs_file_open,
293 .release = reiserfs_file_release, 299 .release = reiserfs_file_release,
294 .fsync = reiserfs_sync_file, 300 .fsync = reiserfs_sync_file,
295 .aio_read = generic_file_aio_read, 301 .aio_read = generic_file_aio_read,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 29db72203bde..ae35413dcbe1 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -25,7 +25,7 @@ int reiserfs_commit_write(struct file *f, struct page *page,
25int reiserfs_prepare_write(struct file *f, struct page *page, 25int reiserfs_prepare_write(struct file *f, struct page *page,
26 unsigned from, unsigned to); 26 unsigned from, unsigned to);
27 27
28void reiserfs_delete_inode(struct inode *inode) 28void reiserfs_evict_inode(struct inode *inode)
29{ 29{
30 /* We need blocks for transaction + (user+group) quota update (possibly delete) */ 30 /* We need blocks for transaction + (user+group) quota update (possibly delete) */
31 int jbegin_count = 31 int jbegin_count =
@@ -35,10 +35,12 @@ void reiserfs_delete_inode(struct inode *inode)
35 int depth; 35 int depth;
36 int err; 36 int err;
37 37
38 if (!is_bad_inode(inode)) 38 if (!inode->i_nlink && !is_bad_inode(inode))
39 dquot_initialize(inode); 39 dquot_initialize(inode);
40 40
41 truncate_inode_pages(&inode->i_data, 0); 41 truncate_inode_pages(&inode->i_data, 0);
42 if (inode->i_nlink)
43 goto no_delete;
42 44
43 depth = reiserfs_write_lock_once(inode->i_sb); 45 depth = reiserfs_write_lock_once(inode->i_sb);
44 46
@@ -77,9 +79,14 @@ void reiserfs_delete_inode(struct inode *inode)
77 ; 79 ;
78 } 80 }
79 out: 81 out:
80 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ 82 end_writeback(inode); /* note this must go after the journal_end to prevent deadlock */
83 dquot_drop(inode);
81 inode->i_blocks = 0; 84 inode->i_blocks = 0;
82 reiserfs_write_unlock_once(inode->i_sb, depth); 85 reiserfs_write_unlock_once(inode->i_sb, depth);
86
87no_delete:
88 end_writeback(inode);
89 dquot_drop(inode);
83} 90}
84 91
85static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, 92static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
@@ -1138,7 +1145,6 @@ static void init_inode(struct inode *inode, struct treepath *path)
1138 REISERFS_I(inode)->i_prealloc_count = 0; 1145 REISERFS_I(inode)->i_prealloc_count = 0;
1139 REISERFS_I(inode)->i_trans_id = 0; 1146 REISERFS_I(inode)->i_trans_id = 0;
1140 REISERFS_I(inode)->i_jl = NULL; 1147 REISERFS_I(inode)->i_jl = NULL;
1141 mutex_init(&(REISERFS_I(inode)->i_mmap));
1142 reiserfs_init_xattr_rwsem(inode); 1148 reiserfs_init_xattr_rwsem(inode);
1143 1149
1144 if (stat_data_v1(ih)) { 1150 if (stat_data_v1(ih)) {
@@ -1841,7 +1847,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1841 REISERFS_I(inode)->i_attrs = 1847 REISERFS_I(inode)->i_attrs =
1842 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; 1848 REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
1843 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); 1849 sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
1844 mutex_init(&(REISERFS_I(inode)->i_mmap));
1845 reiserfs_init_xattr_rwsem(inode); 1850 reiserfs_init_xattr_rwsem(inode);
1846 1851
1847 /* key to search for correct place for new stat data */ 1852 /* key to search for correct place for new stat data */
@@ -2587,8 +2592,7 @@ static int reiserfs_write_begin(struct file *file,
2587 old_ref = th->t_refcount; 2592 old_ref = th->t_refcount;
2588 th->t_refcount++; 2593 th->t_refcount++;
2589 } 2594 }
2590 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 2595 ret = __block_write_begin(page, pos, len, reiserfs_get_block);
2591 reiserfs_get_block);
2592 if (ret && reiserfs_transaction_running(inode->i_sb)) { 2596 if (ret && reiserfs_transaction_running(inode->i_sb)) {
2593 struct reiserfs_transaction_handle *th = current->journal_info; 2597 struct reiserfs_transaction_handle *th = current->journal_info;
2594 /* this gets a little ugly. If reiserfs_get_block returned an 2598 /* this gets a little ugly. If reiserfs_get_block returned an
@@ -3059,10 +3063,25 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3059{ 3063{
3060 struct file *file = iocb->ki_filp; 3064 struct file *file = iocb->ki_filp;
3061 struct inode *inode = file->f_mapping->host; 3065 struct inode *inode = file->f_mapping->host;
3066 ssize_t ret;
3062 3067
3063 return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 3068 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
3064 offset, nr_segs, 3069 offset, nr_segs,
3065 reiserfs_get_blocks_direct_io, NULL); 3070 reiserfs_get_blocks_direct_io, NULL);
3071
3072 /*
3073 * In case of error extending write may have instantiated a few
3074 * blocks outside i_size. Trim these off again.
3075 */
3076 if (unlikely((rw & WRITE) && ret < 0)) {
3077 loff_t isize = i_size_read(inode);
3078 loff_t end = offset + iov_length(iov, nr_segs);
3079
3080 if (end > isize)
3081 vmtruncate(inode, isize);
3082 }
3083
3084 return ret;
3066} 3085}
3067 3086
3068int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) 3087int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
@@ -3072,6 +3091,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3072 int depth; 3091 int depth;
3073 int error; 3092 int error;
3074 3093
3094 error = inode_change_ok(inode, attr);
3095 if (error)
3096 return error;
3097
3075 /* must be turned off for recursive notify_change calls */ 3098 /* must be turned off for recursive notify_change calls */
3076 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); 3099 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3077 3100
@@ -3121,55 +3144,58 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3121 goto out; 3144 goto out;
3122 } 3145 }
3123 3146
3124 error = inode_change_ok(inode, attr); 3147 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
3125 if (!error) { 3148 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
3126 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 3149 struct reiserfs_transaction_handle th;
3127 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { 3150 int jbegin_count =
3128 error = reiserfs_chown_xattrs(inode, attr); 3151 2 *
3152 (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
3153 REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
3154 2;
3129 3155
3130 if (!error) { 3156 error = reiserfs_chown_xattrs(inode, attr);
3131 struct reiserfs_transaction_handle th; 3157
3132 int jbegin_count = 3158 if (error)
3133 2 * 3159 return error;
3134 (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) + 3160
3135 REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) + 3161 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
3136 2; 3162 error = journal_begin(&th, inode->i_sb, jbegin_count);
3137 3163 if (error)
3138 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ 3164 goto out;
3139 error = 3165 error = dquot_transfer(inode, attr);
3140 journal_begin(&th, inode->i_sb, 3166 if (error) {
3141 jbegin_count); 3167 journal_end(&th, inode->i_sb, jbegin_count);
3142 if (error) 3168 goto out;
3143 goto out;
3144 error = dquot_transfer(inode, attr);
3145 if (error) {
3146 journal_end(&th, inode->i_sb,
3147 jbegin_count);
3148 goto out;
3149 }
3150 /* Update corresponding info in inode so that everything is in
3151 * one transaction */
3152 if (attr->ia_valid & ATTR_UID)
3153 inode->i_uid = attr->ia_uid;
3154 if (attr->ia_valid & ATTR_GID)
3155 inode->i_gid = attr->ia_gid;
3156 mark_inode_dirty(inode);
3157 error =
3158 journal_end(&th, inode->i_sb, jbegin_count);
3159 }
3160 }
3161 if (!error) {
3162 /*
3163 * Relax the lock here, as it might truncate the
3164 * inode pages and wait for inode pages locks.
3165 * To release such page lock, the owner needs the
3166 * reiserfs lock
3167 */
3168 reiserfs_write_unlock_once(inode->i_sb, depth);
3169 error = inode_setattr(inode, attr);
3170 depth = reiserfs_write_lock_once(inode->i_sb);
3171 } 3169 }
3170
3171 /* Update corresponding info in inode so that everything is in
3172 * one transaction */
3173 if (attr->ia_valid & ATTR_UID)
3174 inode->i_uid = attr->ia_uid;
3175 if (attr->ia_valid & ATTR_GID)
3176 inode->i_gid = attr->ia_gid;
3177 mark_inode_dirty(inode);
3178 error = journal_end(&th, inode->i_sb, jbegin_count);
3179 if (error)
3180 goto out;
3181 }
3182
3183 /*
3184 * Relax the lock here, as it might truncate the
3185 * inode pages and wait for inode pages locks.
3186 * To release such page lock, the owner needs the
3187 * reiserfs lock
3188 */
3189 reiserfs_write_unlock_once(inode->i_sb, depth);
3190 if ((attr->ia_valid & ATTR_SIZE) &&
3191 attr->ia_size != i_size_read(inode))
3192 error = vmtruncate(inode, attr->ia_size);
3193
3194 if (!error) {
3195 setattr_copy(inode, attr);
3196 mark_inode_dirty(inode);
3172 } 3197 }
3198 depth = reiserfs_write_lock_once(inode->i_sb);
3173 3199
3174 if (!error && reiserfs_posixacl(inode->i_sb)) { 3200 if (!error && reiserfs_posixacl(inode->i_sb)) {
3175 if (attr->ia_valid & ATTR_MODE) 3201 if (attr->ia_valid & ATTR_MODE)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 9822fa15118b..e15ff612002d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -525,6 +525,8 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
525 kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL); 525 kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL);
526 if (!ei) 526 if (!ei)
527 return NULL; 527 return NULL;
528 atomic_set(&ei->openers, 0);
529 mutex_init(&ei->tailpack);
528 return &ei->vfs_inode; 530 return &ei->vfs_inode;
529} 531}
530 532
@@ -589,11 +591,6 @@ out:
589 reiserfs_write_unlock_once(inode->i_sb, lock_depth); 591 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
590} 592}
591 593
592static void reiserfs_clear_inode(struct inode *inode)
593{
594 dquot_drop(inode);
595}
596
597#ifdef CONFIG_QUOTA 594#ifdef CONFIG_QUOTA
598static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, 595static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
599 size_t, loff_t); 596 size_t, loff_t);
@@ -606,8 +603,7 @@ static const struct super_operations reiserfs_sops = {
606 .destroy_inode = reiserfs_destroy_inode, 603 .destroy_inode = reiserfs_destroy_inode,
607 .write_inode = reiserfs_write_inode, 604 .write_inode = reiserfs_write_inode,
608 .dirty_inode = reiserfs_dirty_inode, 605 .dirty_inode = reiserfs_dirty_inode,
609 .clear_inode = reiserfs_clear_inode, 606 .evict_inode = reiserfs_evict_inode,
610 .delete_inode = reiserfs_delete_inode,
611 .put_super = reiserfs_put_super, 607 .put_super = reiserfs_put_super,
612 .write_super = reiserfs_write_super, 608 .write_super = reiserfs_write_super,
613 .sync_fs = reiserfs_sync_fs, 609 .sync_fs = reiserfs_sync_fs,
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 9551cb6f7fe4..450c91941988 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -46,7 +46,7 @@
46 46
47#define SMB_TTL_DEFAULT 1000 47#define SMB_TTL_DEFAULT 1000
48 48
49static void smb_delete_inode(struct inode *); 49static void smb_evict_inode(struct inode *);
50static void smb_put_super(struct super_block *); 50static void smb_put_super(struct super_block *);
51static int smb_statfs(struct dentry *, struct kstatfs *); 51static int smb_statfs(struct dentry *, struct kstatfs *);
52static int smb_show_options(struct seq_file *, struct vfsmount *); 52static int smb_show_options(struct seq_file *, struct vfsmount *);
@@ -102,7 +102,7 @@ static const struct super_operations smb_sops =
102 .alloc_inode = smb_alloc_inode, 102 .alloc_inode = smb_alloc_inode,
103 .destroy_inode = smb_destroy_inode, 103 .destroy_inode = smb_destroy_inode,
104 .drop_inode = generic_delete_inode, 104 .drop_inode = generic_delete_inode,
105 .delete_inode = smb_delete_inode, 105 .evict_inode = smb_evict_inode,
106 .put_super = smb_put_super, 106 .put_super = smb_put_super,
107 .statfs = smb_statfs, 107 .statfs = smb_statfs,
108 .show_options = smb_show_options, 108 .show_options = smb_show_options,
@@ -324,15 +324,15 @@ out:
324 * All blocking cleanup operations need to go here to avoid races. 324 * All blocking cleanup operations need to go here to avoid races.
325 */ 325 */
326static void 326static void
327smb_delete_inode(struct inode *ino) 327smb_evict_inode(struct inode *ino)
328{ 328{
329 DEBUG1("ino=%ld\n", ino->i_ino); 329 DEBUG1("ino=%ld\n", ino->i_ino);
330 truncate_inode_pages(&ino->i_data, 0); 330 truncate_inode_pages(&ino->i_data, 0);
331 end_writeback(ino);
331 lock_kernel(); 332 lock_kernel();
332 if (smb_close(ino)) 333 if (smb_close(ino))
333 PARANOIA("could not close inode %ld\n", ino->i_ino); 334 PARANOIA("could not close inode %ld\n", ino->i_ino);
334 unlock_kernel(); 335 unlock_kernel();
335 clear_inode(ino);
336} 336}
337 337
338static struct option opts[] = { 338static struct option opts[] = {
@@ -714,9 +714,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr)
714 error = server->ops->truncate(inode, attr->ia_size); 714 error = server->ops->truncate(inode, attr->ia_size);
715 if (error) 715 if (error)
716 goto out; 716 goto out;
717 error = simple_setsize(inode, attr->ia_size); 717 truncate_setsize(inode, attr->ia_size);
718 if (error)
719 goto out;
720 refresh = 1; 718 refresh = 1;
721 } 719 }
722 720
diff --git a/fs/splice.c b/fs/splice.c
index efdbfece9932..8f1dfaecc8f0 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -399,17 +399,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
399 * If the page isn't uptodate, we may need to start io on it 399 * If the page isn't uptodate, we may need to start io on it
400 */ 400 */
401 if (!PageUptodate(page)) { 401 if (!PageUptodate(page)) {
402 /* 402 lock_page(page);
403 * If in nonblock mode then dont block on waiting
404 * for an in-flight io page
405 */
406 if (flags & SPLICE_F_NONBLOCK) {
407 if (!trylock_page(page)) {
408 error = -EAGAIN;
409 break;
410 }
411 } else
412 lock_page(page);
413 403
414 /* 404 /*
415 * Page was truncated, or invalidated by the 405 * Page was truncated, or invalidated by the
@@ -597,7 +587,6 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
597 struct page *pages[PIPE_DEF_BUFFERS]; 587 struct page *pages[PIPE_DEF_BUFFERS];
598 struct partial_page partial[PIPE_DEF_BUFFERS]; 588 struct partial_page partial[PIPE_DEF_BUFFERS];
599 struct iovec *vec, __vec[PIPE_DEF_BUFFERS]; 589 struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
600 pgoff_t index;
601 ssize_t res; 590 ssize_t res;
602 size_t this_len; 591 size_t this_len;
603 int error; 592 int error;
@@ -621,7 +610,6 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
621 goto shrink_ret; 610 goto shrink_ret;
622 } 611 }
623 612
624 index = *ppos >> PAGE_CACHE_SHIFT;
625 offset = *ppos & ~PAGE_CACHE_MASK; 613 offset = *ppos & ~PAGE_CACHE_MASK;
626 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 614 nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
627 615
diff --git a/fs/statfs.c b/fs/statfs.c
index 4ef021f3b612..30ea8c8a996b 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -2,38 +2,83 @@
2#include <linux/module.h> 2#include <linux/module.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/file.h> 4#include <linux/file.h>
5#include <linux/mount.h>
5#include <linux/namei.h> 6#include <linux/namei.h>
6#include <linux/statfs.h> 7#include <linux/statfs.h>
7#include <linux/security.h> 8#include <linux/security.h>
8#include <linux/uaccess.h> 9#include <linux/uaccess.h>
9 10
10int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) 11static int flags_by_mnt(int mnt_flags)
11{ 12{
12 int retval = -ENODEV; 13 int flags = 0;
13 14
14 if (dentry) { 15 if (mnt_flags & MNT_READONLY)
15 retval = -ENOSYS; 16 flags |= ST_RDONLY;
16 if (dentry->d_sb->s_op->statfs) { 17 if (mnt_flags & MNT_NOSUID)
17 memset(buf, 0, sizeof(*buf)); 18 flags |= ST_NOSUID;
18 retval = security_sb_statfs(dentry); 19 if (mnt_flags & MNT_NODEV)
19 if (retval) 20 flags |= ST_NODEV;
20 return retval; 21 if (mnt_flags & MNT_NOEXEC)
21 retval = dentry->d_sb->s_op->statfs(dentry, buf); 22 flags |= ST_NOEXEC;
22 if (retval == 0 && buf->f_frsize == 0) 23 if (mnt_flags & MNT_NOATIME)
23 buf->f_frsize = buf->f_bsize; 24 flags |= ST_NOATIME;
24 } 25 if (mnt_flags & MNT_NODIRATIME)
25 } 26 flags |= ST_NODIRATIME;
27 if (mnt_flags & MNT_RELATIME)
28 flags |= ST_RELATIME;
29 return flags;
30}
31
32static int flags_by_sb(int s_flags)
33{
34 int flags = 0;
35 if (s_flags & MS_SYNCHRONOUS)
36 flags |= ST_SYNCHRONOUS;
37 if (s_flags & MS_MANDLOCK)
38 flags |= ST_MANDLOCK;
39 return flags;
40}
41
42static int calculate_f_flags(struct vfsmount *mnt)
43{
44 return ST_VALID | flags_by_mnt(mnt->mnt_flags) |
45 flags_by_sb(mnt->mnt_sb->s_flags);
46}
47
48int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf)
49{
50 int retval;
51
52 if (!dentry->d_sb->s_op->statfs)
53 return -ENOSYS;
54
55 memset(buf, 0, sizeof(*buf));
56 retval = security_sb_statfs(dentry);
57 if (retval)
58 return retval;
59 retval = dentry->d_sb->s_op->statfs(dentry, buf);
60 if (retval == 0 && buf->f_frsize == 0)
61 buf->f_frsize = buf->f_bsize;
26 return retval; 62 return retval;
27} 63}
28 64
65int vfs_statfs(struct path *path, struct kstatfs *buf)
66{
67 int error;
68
69 error = statfs_by_dentry(path->dentry, buf);
70 if (!error)
71 buf->f_flags = calculate_f_flags(path->mnt);
72 return error;
73}
29EXPORT_SYMBOL(vfs_statfs); 74EXPORT_SYMBOL(vfs_statfs);
30 75
31static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf) 76static int do_statfs_native(struct path *path, struct statfs *buf)
32{ 77{
33 struct kstatfs st; 78 struct kstatfs st;
34 int retval; 79 int retval;
35 80
36 retval = vfs_statfs(dentry, &st); 81 retval = vfs_statfs(path, &st);
37 if (retval) 82 if (retval)
38 return retval; 83 return retval;
39 84
@@ -67,17 +112,18 @@ static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
67 buf->f_fsid = st.f_fsid; 112 buf->f_fsid = st.f_fsid;
68 buf->f_namelen = st.f_namelen; 113 buf->f_namelen = st.f_namelen;
69 buf->f_frsize = st.f_frsize; 114 buf->f_frsize = st.f_frsize;
115 buf->f_flags = st.f_flags;
70 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 116 memset(buf->f_spare, 0, sizeof(buf->f_spare));
71 } 117 }
72 return 0; 118 return 0;
73} 119}
74 120
75static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) 121static int do_statfs64(struct path *path, struct statfs64 *buf)
76{ 122{
77 struct kstatfs st; 123 struct kstatfs st;
78 int retval; 124 int retval;
79 125
80 retval = vfs_statfs(dentry, &st); 126 retval = vfs_statfs(path, &st);
81 if (retval) 127 if (retval)
82 return retval; 128 return retval;
83 129
@@ -94,6 +140,7 @@ static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
94 buf->f_fsid = st.f_fsid; 140 buf->f_fsid = st.f_fsid;
95 buf->f_namelen = st.f_namelen; 141 buf->f_namelen = st.f_namelen;
96 buf->f_frsize = st.f_frsize; 142 buf->f_frsize = st.f_frsize;
143 buf->f_flags = st.f_flags;
97 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 144 memset(buf->f_spare, 0, sizeof(buf->f_spare));
98 } 145 }
99 return 0; 146 return 0;
@@ -107,7 +154,7 @@ SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, b
107 error = user_path(pathname, &path); 154 error = user_path(pathname, &path);
108 if (!error) { 155 if (!error) {
109 struct statfs tmp; 156 struct statfs tmp;
110 error = vfs_statfs_native(path.dentry, &tmp); 157 error = do_statfs_native(&path, &tmp);
111 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 158 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
112 error = -EFAULT; 159 error = -EFAULT;
113 path_put(&path); 160 path_put(&path);
@@ -125,7 +172,7 @@ SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct stat
125 error = user_path(pathname, &path); 172 error = user_path(pathname, &path);
126 if (!error) { 173 if (!error) {
127 struct statfs64 tmp; 174 struct statfs64 tmp;
128 error = vfs_statfs64(path.dentry, &tmp); 175 error = do_statfs64(&path, &tmp);
129 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 176 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
130 error = -EFAULT; 177 error = -EFAULT;
131 path_put(&path); 178 path_put(&path);
@@ -143,7 +190,7 @@ SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
143 file = fget(fd); 190 file = fget(fd);
144 if (!file) 191 if (!file)
145 goto out; 192 goto out;
146 error = vfs_statfs_native(file->f_path.dentry, &tmp); 193 error = do_statfs_native(&file->f_path, &tmp);
147 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 194 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
148 error = -EFAULT; 195 error = -EFAULT;
149 fput(file); 196 fput(file);
@@ -164,7 +211,7 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user
164 file = fget(fd); 211 file = fget(fd);
165 if (!file) 212 if (!file)
166 goto out; 213 goto out;
167 error = vfs_statfs64(file->f_path.dentry, &tmp); 214 error = do_statfs64(&file->f_path, &tmp);
168 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 215 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
169 error = -EFAULT; 216 error = -EFAULT;
170 fput(file); 217 fput(file);
@@ -183,7 +230,7 @@ SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf)
183 if (!s) 230 if (!s)
184 return -EINVAL; 231 return -EINVAL;
185 232
186 err = vfs_statfs(s->s_root, &sbuf); 233 err = statfs_by_dentry(s->s_root, &sbuf);
187 drop_super(s); 234 drop_super(s);
188 if (err) 235 if (err)
189 return err; 236 return err;
diff --git a/fs/super.c b/fs/super.c
index 938119ab8dcb..9674ab2c8718 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -305,8 +305,13 @@ retry:
305 if (s) { 305 if (s) {
306 up_write(&s->s_umount); 306 up_write(&s->s_umount);
307 destroy_super(s); 307 destroy_super(s);
308 s = NULL;
308 } 309 }
309 down_write(&old->s_umount); 310 down_write(&old->s_umount);
311 if (unlikely(!(old->s_flags & MS_BORN))) {
312 deactivate_locked_super(old);
313 goto retry;
314 }
310 return old; 315 return old;
311 } 316 }
312 } 317 }
@@ -358,10 +363,10 @@ EXPORT_SYMBOL(drop_super);
358 */ 363 */
359void sync_supers(void) 364void sync_supers(void)
360{ 365{
361 struct super_block *sb, *n; 366 struct super_block *sb, *p = NULL;
362 367
363 spin_lock(&sb_lock); 368 spin_lock(&sb_lock);
364 list_for_each_entry_safe(sb, n, &super_blocks, s_list) { 369 list_for_each_entry(sb, &super_blocks, s_list) {
365 if (list_empty(&sb->s_instances)) 370 if (list_empty(&sb->s_instances))
366 continue; 371 continue;
367 if (sb->s_op->write_super && sb->s_dirt) { 372 if (sb->s_op->write_super && sb->s_dirt) {
@@ -374,11 +379,13 @@ void sync_supers(void)
374 up_read(&sb->s_umount); 379 up_read(&sb->s_umount);
375 380
376 spin_lock(&sb_lock); 381 spin_lock(&sb_lock);
377 /* lock was dropped, must reset next */ 382 if (p)
378 list_safe_reset_next(sb, n, s_list); 383 __put_super(p);
379 __put_super(sb); 384 p = sb;
380 } 385 }
381 } 386 }
387 if (p)
388 __put_super(p);
382 spin_unlock(&sb_lock); 389 spin_unlock(&sb_lock);
383} 390}
384 391
@@ -392,10 +399,10 @@ void sync_supers(void)
392 */ 399 */
393void iterate_supers(void (*f)(struct super_block *, void *), void *arg) 400void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
394{ 401{
395 struct super_block *sb, *n; 402 struct super_block *sb, *p = NULL;
396 403
397 spin_lock(&sb_lock); 404 spin_lock(&sb_lock);
398 list_for_each_entry_safe(sb, n, &super_blocks, s_list) { 405 list_for_each_entry(sb, &super_blocks, s_list) {
399 if (list_empty(&sb->s_instances)) 406 if (list_empty(&sb->s_instances))
400 continue; 407 continue;
401 sb->s_count++; 408 sb->s_count++;
@@ -407,10 +414,12 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
407 up_read(&sb->s_umount); 414 up_read(&sb->s_umount);
408 415
409 spin_lock(&sb_lock); 416 spin_lock(&sb_lock);
410 /* lock was dropped, must reset next */ 417 if (p)
411 list_safe_reset_next(sb, n, s_list); 418 __put_super(p);
412 __put_super(sb); 419 p = sb;
413 } 420 }
421 if (p)
422 __put_super(p);
414 spin_unlock(&sb_lock); 423 spin_unlock(&sb_lock);
415} 424}
416 425
@@ -572,10 +581,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
572 581
573static void do_emergency_remount(struct work_struct *work) 582static void do_emergency_remount(struct work_struct *work)
574{ 583{
575 struct super_block *sb, *n; 584 struct super_block *sb, *p = NULL;
576 585
577 spin_lock(&sb_lock); 586 spin_lock(&sb_lock);
578 list_for_each_entry_safe(sb, n, &super_blocks, s_list) { 587 list_for_each_entry(sb, &super_blocks, s_list) {
579 if (list_empty(&sb->s_instances)) 588 if (list_empty(&sb->s_instances))
580 continue; 589 continue;
581 sb->s_count++; 590 sb->s_count++;
@@ -589,10 +598,12 @@ static void do_emergency_remount(struct work_struct *work)
589 } 598 }
590 up_write(&sb->s_umount); 599 up_write(&sb->s_umount);
591 spin_lock(&sb_lock); 600 spin_lock(&sb_lock);
592 /* lock was dropped, must reset next */ 601 if (p)
593 list_safe_reset_next(sb, n, s_list); 602 __put_super(p);
594 __put_super(sb); 603 p = sb;
595 } 604 }
605 if (p)
606 __put_super(p);
596 spin_unlock(&sb_lock); 607 spin_unlock(&sb_lock);
597 kfree(work); 608 kfree(work);
598 printk("Emergency Remount complete\n"); 609 printk("Emergency Remount complete\n");
@@ -773,7 +784,16 @@ int get_sb_bdev(struct file_system_type *fs_type,
773 goto error_bdev; 784 goto error_bdev;
774 } 785 }
775 786
787 /*
788 * s_umount nests inside bd_mutex during
789 * __invalidate_device(). close_bdev_exclusive()
790 * acquires bd_mutex and can't be called under
791 * s_umount. Drop s_umount temporarily. This is safe
792 * as we're holding an active reference.
793 */
794 up_write(&s->s_umount);
776 close_bdev_exclusive(bdev, mode); 795 close_bdev_exclusive(bdev, mode);
796 down_write(&s->s_umount);
777 } else { 797 } else {
778 char b[BDEVNAME_SIZE]; 798 char b[BDEVNAME_SIZE];
779 799
@@ -909,6 +929,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
909 goto out_free_secdata; 929 goto out_free_secdata;
910 BUG_ON(!mnt->mnt_sb); 930 BUG_ON(!mnt->mnt_sb);
911 WARN_ON(!mnt->mnt_sb->s_bdi); 931 WARN_ON(!mnt->mnt_sb->s_bdi);
932 mnt->mnt_sb->s_flags |= MS_BORN;
912 933
913 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); 934 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
914 if (error) 935 if (error)
diff --git a/fs/sync.c b/fs/sync.c
index 15aa6f03b2da..ba76b9623e7e 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -128,31 +128,6 @@ void emergency_sync(void)
128 } 128 }
129} 129}
130 130
131/*
132 * Generic function to fsync a file.
133 */
134int file_fsync(struct file *filp, int datasync)
135{
136 struct inode *inode = filp->f_mapping->host;
137 struct super_block * sb;
138 int ret, err;
139
140 /* sync the inode to buffers */
141 ret = write_inode_now(inode, 0);
142
143 /* sync the superblock to buffers */
144 sb = inode->i_sb;
145 if (sb->s_dirt && sb->s_op->write_super)
146 sb->s_op->write_super(sb);
147
148 /* .. finally sync the buffers to disk */
149 err = sync_blockdev(sb->s_bdev);
150 if (!ret)
151 ret = err;
152 return ret;
153}
154EXPORT_SYMBOL(file_fsync);
155
156/** 131/**
157 * vfs_fsync_range - helper to sync a range of data & metadata to disk 132 * vfs_fsync_range - helper to sync a range of data & metadata to disk
158 * @file: file to sync 133 * @file: file to sync
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 0835a3b70e03..cffb1fd8ba33 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -122,7 +122,7 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
122 goto out; 122 goto out;
123 123
124 /* this ignores size changes */ 124 /* this ignores size changes */
125 generic_setattr(inode, iattr); 125 setattr_copy(inode, iattr);
126 126
127out: 127out:
128 mutex_unlock(&sysfs_mutex); 128 mutex_unlock(&sysfs_mutex);
@@ -312,15 +312,15 @@ struct inode * sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd)
312 * The sysfs_dirent serves as both an inode and a directory entry for sysfs. 312 * The sysfs_dirent serves as both an inode and a directory entry for sysfs.
313 * To prevent the sysfs inode numbers from being freed prematurely we take a 313 * To prevent the sysfs inode numbers from being freed prematurely we take a
314 * reference to sysfs_dirent from the sysfs inode. A 314 * reference to sysfs_dirent from the sysfs inode. A
315 * super_operations.delete_inode() implementation is needed to drop that 315 * super_operations.evict_inode() implementation is needed to drop that
316 * reference upon inode destruction. 316 * reference upon inode destruction.
317 */ 317 */
318void sysfs_delete_inode(struct inode *inode) 318void sysfs_evict_inode(struct inode *inode)
319{ 319{
320 struct sysfs_dirent *sd = inode->i_private; 320 struct sysfs_dirent *sd = inode->i_private;
321 321
322 truncate_inode_pages(&inode->i_data, 0); 322 truncate_inode_pages(&inode->i_data, 0);
323 clear_inode(inode); 323 end_writeback(inode);
324 sysfs_put(sd); 324 sysfs_put(sd);
325} 325}
326 326
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 281c0c9bc39f..f2af22574c50 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -29,7 +29,7 @@ struct kmem_cache *sysfs_dir_cachep;
29static const struct super_operations sysfs_ops = { 29static const struct super_operations sysfs_ops = {
30 .statfs = simple_statfs, 30 .statfs = simple_statfs,
31 .drop_inode = generic_delete_inode, 31 .drop_inode = generic_delete_inode,
32 .delete_inode = sysfs_delete_inode, 32 .evict_inode = sysfs_evict_inode,
33}; 33};
34 34
35struct sysfs_dirent sysfs_root = { 35struct sysfs_dirent sysfs_root = {
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 6a13105b5594..d9be60a2e956 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -198,7 +198,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
198 * inode.c 198 * inode.c
199 */ 199 */
200struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); 200struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
201void sysfs_delete_inode(struct inode *inode); 201void sysfs_evict_inode(struct inode *inode);
202int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); 202int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
203int sysfs_permission(struct inode *inode, int mask); 203int sysfs_permission(struct inode *inode, int mask);
204int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 204int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 79941e4964a4..a77c42157620 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -218,8 +218,7 @@ got_it:
218 pos = page_offset(page) + 218 pos = page_offset(page) +
219 (char*)de - (char*)page_address(page); 219 (char*)de - (char*)page_address(page);
220 lock_page(page); 220 lock_page(page);
221 err = __sysv_write_begin(NULL, page->mapping, pos, SYSV_DIRSIZE, 221 err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
222 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
223 if (err) 222 if (err)
224 goto out_unlock; 223 goto out_unlock;
225 memcpy (de->name, name, namelen); 224 memcpy (de->name, name, namelen);
@@ -239,15 +238,13 @@ out_unlock:
239 238
240int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page) 239int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
241{ 240{
242 struct address_space *mapping = page->mapping; 241 struct inode *inode = page->mapping->host;
243 struct inode *inode = (struct inode*)mapping->host;
244 char *kaddr = (char*)page_address(page); 242 char *kaddr = (char*)page_address(page);
245 loff_t pos = page_offset(page) + (char *)de - kaddr; 243 loff_t pos = page_offset(page) + (char *)de - kaddr;
246 int err; 244 int err;
247 245
248 lock_page(page); 246 lock_page(page);
249 err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE, 247 err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
250 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
251 BUG_ON(err); 248 BUG_ON(err);
252 de->inode = 0; 249 de->inode = 0;
253 err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); 250 err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
@@ -259,16 +256,14 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
259 256
260int sysv_make_empty(struct inode *inode, struct inode *dir) 257int sysv_make_empty(struct inode *inode, struct inode *dir)
261{ 258{
262 struct address_space *mapping = inode->i_mapping; 259 struct page *page = grab_cache_page(inode->i_mapping, 0);
263 struct page *page = grab_cache_page(mapping, 0);
264 struct sysv_dir_entry * de; 260 struct sysv_dir_entry * de;
265 char *base; 261 char *base;
266 int err; 262 int err;
267 263
268 if (!page) 264 if (!page)
269 return -ENOMEM; 265 return -ENOMEM;
270 err = __sysv_write_begin(NULL, mapping, 0, 2 * SYSV_DIRSIZE, 266 err = sysv_prepare_chunk(page, 0, 2 * SYSV_DIRSIZE);
271 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
272 if (err) { 267 if (err) {
273 unlock_page(page); 268 unlock_page(page);
274 goto fail; 269 goto fail;
@@ -341,15 +336,13 @@ not_empty:
341void sysv_set_link(struct sysv_dir_entry *de, struct page *page, 336void sysv_set_link(struct sysv_dir_entry *de, struct page *page,
342 struct inode *inode) 337 struct inode *inode)
343{ 338{
344 struct address_space *mapping = page->mapping; 339 struct inode *dir = page->mapping->host;
345 struct inode *dir = mapping->host;
346 loff_t pos = page_offset(page) + 340 loff_t pos = page_offset(page) +
347 (char *)de-(char*)page_address(page); 341 (char *)de-(char*)page_address(page);
348 int err; 342 int err;
349 343
350 lock_page(page); 344 lock_page(page);
351 err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE, 345 err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
352 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
353 BUG_ON(err); 346 BUG_ON(err);
354 de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino); 347 de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
355 err = dir_commit_chunk(page, pos, SYSV_DIRSIZE); 348 err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 750cc22349bd..0a65939508e9 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -30,7 +30,29 @@ const struct file_operations sysv_file_operations = {
30 .splice_read = generic_file_splice_read, 30 .splice_read = generic_file_splice_read,
31}; 31};
32 32
33static int sysv_setattr(struct dentry *dentry, struct iattr *attr)
34{
35 struct inode *inode = dentry->d_inode;
36 int error;
37
38 error = inode_change_ok(inode, attr);
39 if (error)
40 return error;
41
42 if ((attr->ia_valid & ATTR_SIZE) &&
43 attr->ia_size != i_size_read(inode)) {
44 error = vmtruncate(inode, attr->ia_size);
45 if (error)
46 return error;
47 }
48
49 setattr_copy(inode, attr);
50 mark_inode_dirty(inode);
51 return 0;
52}
53
33const struct inode_operations sysv_file_inode_operations = { 54const struct inode_operations sysv_file_inode_operations = {
34 .truncate = sysv_truncate, 55 .truncate = sysv_truncate,
56 .setattr = sysv_setattr,
35 .getattr = sysv_getattr, 57 .getattr = sysv_getattr,
36}; 58};
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index fcc498ec9b33..0c96c98bd1db 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -113,7 +113,6 @@ void sysv_free_inode(struct inode * inode)
113 return; 113 return;
114 } 114 }
115 raw_inode = sysv_raw_inode(sb, ino, &bh); 115 raw_inode = sysv_raw_inode(sb, ino, &bh);
116 clear_inode(inode);
117 if (!raw_inode) { 116 if (!raw_inode) {
118 printk("sysv_free_inode: unable to read inode block on device " 117 printk("sysv_free_inode: unable to read inode block on device "
119 "%s\n", inode->i_sb->s_id); 118 "%s\n", inode->i_sb->s_id);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index d4a5380b5669..de44d067b9e6 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -71,8 +71,8 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data)
71 lock_super(sb); 71 lock_super(sb);
72 if (sbi->s_forced_ro) 72 if (sbi->s_forced_ro)
73 *flags |= MS_RDONLY; 73 *flags |= MS_RDONLY;
74 if (!(*flags & MS_RDONLY)) 74 if (*flags & MS_RDONLY)
75 sb->s_dirt = 1; 75 sysv_write_super(sb);
76 unlock_super(sb); 76 unlock_super(sb);
77 return 0; 77 return 0;
78} 78}
@@ -308,12 +308,17 @@ int sysv_sync_inode(struct inode *inode)
308 return __sysv_write_inode(inode, 1); 308 return __sysv_write_inode(inode, 1);
309} 309}
310 310
311static void sysv_delete_inode(struct inode *inode) 311static void sysv_evict_inode(struct inode *inode)
312{ 312{
313 truncate_inode_pages(&inode->i_data, 0); 313 truncate_inode_pages(&inode->i_data, 0);
314 inode->i_size = 0; 314 if (!inode->i_nlink) {
315 sysv_truncate(inode); 315 inode->i_size = 0;
316 sysv_free_inode(inode); 316 sysv_truncate(inode);
317 }
318 invalidate_inode_buffers(inode);
319 end_writeback(inode);
320 if (!inode->i_nlink)
321 sysv_free_inode(inode);
317} 322}
318 323
319static struct kmem_cache *sysv_inode_cachep; 324static struct kmem_cache *sysv_inode_cachep;
@@ -344,7 +349,7 @@ const struct super_operations sysv_sops = {
344 .alloc_inode = sysv_alloc_inode, 349 .alloc_inode = sysv_alloc_inode,
345 .destroy_inode = sysv_destroy_inode, 350 .destroy_inode = sysv_destroy_inode,
346 .write_inode = sysv_write_inode, 351 .write_inode = sysv_write_inode,
347 .delete_inode = sysv_delete_inode, 352 .evict_inode = sysv_evict_inode,
348 .put_super = sysv_put_super, 353 .put_super = sysv_put_super,
349 .write_super = sysv_write_super, 354 .write_super = sysv_write_super,
350 .sync_fs = sysv_sync_fs, 355 .sync_fs = sysv_sync_fs,
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index f042eec464c2..9ca66276315e 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -459,20 +459,25 @@ static int sysv_readpage(struct file *file, struct page *page)
459 return block_read_full_page(page,get_block); 459 return block_read_full_page(page,get_block);
460} 460}
461 461
462int __sysv_write_begin(struct file *file, struct address_space *mapping, 462int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len)
463 loff_t pos, unsigned len, unsigned flags,
464 struct page **pagep, void **fsdata)
465{ 463{
466 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 464 return __block_write_begin(page, pos, len, get_block);
467 get_block);
468} 465}
469 466
470static int sysv_write_begin(struct file *file, struct address_space *mapping, 467static int sysv_write_begin(struct file *file, struct address_space *mapping,
471 loff_t pos, unsigned len, unsigned flags, 468 loff_t pos, unsigned len, unsigned flags,
472 struct page **pagep, void **fsdata) 469 struct page **pagep, void **fsdata)
473{ 470{
474 *pagep = NULL; 471 int ret;
475 return __sysv_write_begin(file, mapping, pos, len, flags, pagep, fsdata); 472
473 ret = block_write_begin(mapping, pos, len, flags, pagep, get_block);
474 if (unlikely(ret)) {
475 loff_t isize = mapping->host->i_size;
476 if (pos + len > isize)
477 vmtruncate(mapping->host, isize);
478 }
479
480 return ret;
476} 481}
477 482
478static sector_t sysv_bmap(struct address_space *mapping, sector_t block) 483static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 5a903da54551..0e44a6253352 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -347,7 +347,6 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
347 sb->s_flags |= MS_RDONLY; 347 sb->s_flags |= MS_RDONLY;
348 if (sbi->s_truncate) 348 if (sbi->s_truncate)
349 sb->s_root->d_op = &sysv_dentry_operations; 349 sb->s_root->d_op = &sysv_dentry_operations;
350 sb->s_dirt = 1;
351 return 1; 350 return 1;
352} 351}
353 352
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 94cb9b4d76c2..bb55cdb394bf 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -136,9 +136,7 @@ extern unsigned long sysv_count_free_blocks(struct super_block *);
136 136
137/* itree.c */ 137/* itree.c */
138extern void sysv_truncate(struct inode *); 138extern void sysv_truncate(struct inode *);
139extern int __sysv_write_begin(struct file *file, struct address_space *mapping, 139extern int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len);
140 loff_t pos, unsigned len, unsigned flags,
141 struct page **pagep, void **fsdata);
142 140
143/* inode.c */ 141/* inode.c */
144extern struct inode *sysv_iget(struct super_block *, unsigned int); 142extern struct inode *sysv_iget(struct super_block *, unsigned int);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 12f445cee9f7..03ae894c45de 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -967,14 +967,15 @@ static int do_writepage(struct page *page, int len)
967 * the page locked, and it locks @ui_mutex. However, write-back does take inode 967 * the page locked, and it locks @ui_mutex. However, write-back does take inode
968 * @i_mutex, which means other VFS operations may be run on this inode at the 968 * @i_mutex, which means other VFS operations may be run on this inode at the
969 * same time. And the problematic one is truncation to smaller size, from where 969 * same time. And the problematic one is truncation to smaller size, from where
970 * we have to call 'simple_setsize()', which first changes @inode->i_size, then 970 * we have to call 'truncate_setsize()', which first changes @inode->i_size, then
971 * drops the truncated pages. And while dropping the pages, it takes the page 971 * drops the truncated pages. And while dropping the pages, it takes the page
972 * lock. This means that 'do_truncation()' cannot call 'simple_setsize()' with 972 * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with
973 * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This 973 * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
974 * means that @inode->i_size is changed while @ui_mutex is unlocked. 974 * means that @inode->i_size is changed while @ui_mutex is unlocked.
975 * 975 *
976 * XXX: with the new truncate the above is not true anymore, the simple_setsize 976 * XXX(truncate): with the new truncate sequence this is not true anymore,
977 * calls can be replaced with the individual components. 977 * and the calls to truncate_setsize can be move around freely. They should
978 * be moved to the very end of the truncate sequence.
978 * 979 *
979 * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond 980 * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond
980 * inode size. How do we do this if @inode->i_size may became smaller while we 981 * inode size. How do we do this if @inode->i_size may became smaller while we
@@ -1128,9 +1129,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
1128 budgeted = 0; 1129 budgeted = 0;
1129 } 1130 }
1130 1131
1131 err = simple_setsize(inode, new_size); 1132 truncate_setsize(inode, new_size);
1132 if (err)
1133 goto out_budg;
1134 1133
1135 if (offset) { 1134 if (offset) {
1136 pgoff_t index = new_size >> PAGE_CACHE_SHIFT; 1135 pgoff_t index = new_size >> PAGE_CACHE_SHIFT;
@@ -1217,16 +1216,14 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
1217 1216
1218 if (attr->ia_valid & ATTR_SIZE) { 1217 if (attr->ia_valid & ATTR_SIZE) {
1219 dbg_gen("size %lld -> %lld", inode->i_size, new_size); 1218 dbg_gen("size %lld -> %lld", inode->i_size, new_size);
1220 err = simple_setsize(inode, new_size); 1219 truncate_setsize(inode, new_size);
1221 if (err)
1222 goto out;
1223 } 1220 }
1224 1221
1225 mutex_lock(&ui->ui_mutex); 1222 mutex_lock(&ui->ui_mutex);
1226 if (attr->ia_valid & ATTR_SIZE) { 1223 if (attr->ia_valid & ATTR_SIZE) {
1227 /* Truncation changes inode [mc]time */ 1224 /* Truncation changes inode [mc]time */
1228 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); 1225 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
1229 /* 'simple_setsize()' changed @i_size, update @ui_size */ 1226 /* 'truncate_setsize()' changed @i_size, update @ui_size */
1230 ui->ui_size = inode->i_size; 1227 ui->ui_size = inode->i_size;
1231 } 1228 }
1232 1229
@@ -1248,10 +1245,6 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
1248 if (IS_SYNC(inode)) 1245 if (IS_SYNC(inode))
1249 err = inode->i_sb->s_op->write_inode(inode, NULL); 1246 err = inode->i_sb->s_op->write_inode(inode, NULL);
1250 return err; 1247 return err;
1251
1252out:
1253 ubifs_release_budget(c, &req);
1254 return err;
1255} 1248}
1256 1249
1257int ubifs_setattr(struct dentry *dentry, struct iattr *attr) 1250int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 5fc5a0988970..cd5900b85d38 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -327,7 +327,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
327 return err; 327 return err;
328} 328}
329 329
330static void ubifs_delete_inode(struct inode *inode) 330static void ubifs_evict_inode(struct inode *inode)
331{ 331{
332 int err; 332 int err;
333 struct ubifs_info *c = inode->i_sb->s_fs_info; 333 struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -343,9 +343,12 @@ static void ubifs_delete_inode(struct inode *inode)
343 343
344 dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); 344 dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
345 ubifs_assert(!atomic_read(&inode->i_count)); 345 ubifs_assert(!atomic_read(&inode->i_count));
346 ubifs_assert(inode->i_nlink == 0);
347 346
348 truncate_inode_pages(&inode->i_data, 0); 347 truncate_inode_pages(&inode->i_data, 0);
348
349 if (inode->i_nlink)
350 goto done;
351
349 if (is_bad_inode(inode)) 352 if (is_bad_inode(inode))
350 goto out; 353 goto out;
351 354
@@ -367,7 +370,8 @@ out:
367 c->nospace = c->nospace_rp = 0; 370 c->nospace = c->nospace_rp = 0;
368 smp_wmb(); 371 smp_wmb();
369 } 372 }
370 clear_inode(inode); 373done:
374 end_writeback(inode);
371} 375}
372 376
373static void ubifs_dirty_inode(struct inode *inode) 377static void ubifs_dirty_inode(struct inode *inode)
@@ -1826,7 +1830,7 @@ const struct super_operations ubifs_super_operations = {
1826 .destroy_inode = ubifs_destroy_inode, 1830 .destroy_inode = ubifs_destroy_inode,
1827 .put_super = ubifs_put_super, 1831 .put_super = ubifs_put_super,
1828 .write_inode = ubifs_write_inode, 1832 .write_inode = ubifs_write_inode,
1829 .delete_inode = ubifs_delete_inode, 1833 .evict_inode = ubifs_evict_inode,
1830 .statfs = ubifs_statfs, 1834 .statfs = ubifs_statfs,
1831 .dirty_inode = ubifs_dirty_inode, 1835 .dirty_inode = ubifs_dirty_inode,
1832 .remount_fs = ubifs_remount_fs, 1836 .remount_fs = ubifs_remount_fs,
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 04310878f449..0c9876b396dd 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -379,7 +379,7 @@ struct ubifs_gced_idx_leb {
379 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses 379 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
380 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot 380 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
381 * make sure @inode->i_size is always changed under @ui_mutex, because it 381 * make sure @inode->i_size is always changed under @ui_mutex, because it
382 * cannot call 'simple_setsize()' with @ui_mutex locked, because it would deadlock 382 * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock
383 * with 'ubifs_writepage()' (see file.c). All the other inode fields are 383 * with 'ubifs_writepage()' (see file.c). All the other inode fields are
384 * changed under @ui_mutex, so they do not need "shadow" fields. Note, one 384 * changed under @ui_mutex, so they do not need "shadow" fields. Note, one
385 * could consider to rework locking and base it on "shadow" fields. 385 * could consider to rework locking and base it on "shadow" fields.
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 6e450e01a1bb..66b9e7e7e4c5 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -227,6 +227,28 @@ const struct file_operations udf_file_operations = {
227 .llseek = generic_file_llseek, 227 .llseek = generic_file_llseek,
228}; 228};
229 229
230static int udf_setattr(struct dentry *dentry, struct iattr *attr)
231{
232 struct inode *inode = dentry->d_inode;
233 int error;
234
235 error = inode_change_ok(inode, attr);
236 if (error)
237 return error;
238
239 if ((attr->ia_valid & ATTR_SIZE) &&
240 attr->ia_size != i_size_read(inode)) {
241 error = vmtruncate(inode, attr->ia_size);
242 if (error)
243 return error;
244 }
245
246 setattr_copy(inode, attr);
247 mark_inode_dirty(inode);
248 return 0;
249}
250
230const struct inode_operations udf_file_inode_operations = { 251const struct inode_operations udf_file_inode_operations = {
252 .setattr = udf_setattr,
231 .truncate = udf_truncate, 253 .truncate = udf_truncate,
232}; 254};
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 18cd7111185d..75d9304d0dc3 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -31,8 +31,6 @@ void udf_free_inode(struct inode *inode)
31 struct super_block *sb = inode->i_sb; 31 struct super_block *sb = inode->i_sb;
32 struct udf_sb_info *sbi = UDF_SB(sb); 32 struct udf_sb_info *sbi = UDF_SB(sb);
33 33
34 clear_inode(inode);
35
36 mutex_lock(&sbi->s_alloc_mutex); 34 mutex_lock(&sbi->s_alloc_mutex);
37 if (sbi->s_lvid_bh) { 35 if (sbi->s_lvid_bh) {
38 struct logicalVolIntegrityDescImpUse *lvidiu = 36 struct logicalVolIntegrityDescImpUse *lvidiu =
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 124852bcf6fe..fc48f37aa2dd 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -68,37 +68,23 @@ static void udf_update_extents(struct inode *,
68static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int); 68static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
69 69
70 70
71void udf_delete_inode(struct inode *inode) 71void udf_evict_inode(struct inode *inode)
72{
73 truncate_inode_pages(&inode->i_data, 0);
74
75 if (is_bad_inode(inode))
76 goto no_delete;
77
78 inode->i_size = 0;
79 udf_truncate(inode);
80 lock_kernel();
81
82 udf_update_inode(inode, IS_SYNC(inode));
83 udf_free_inode(inode);
84
85 unlock_kernel();
86 return;
87
88no_delete:
89 clear_inode(inode);
90}
91
92/*
93 * If we are going to release inode from memory, we truncate last inode extent
94 * to proper length. We could use drop_inode() but it's called under inode_lock
95 * and thus we cannot mark inode dirty there. We use clear_inode() but we have
96 * to make sure to write inode as it's not written automatically.
97 */
98void udf_clear_inode(struct inode *inode)
99{ 72{
100 struct udf_inode_info *iinfo = UDF_I(inode); 73 struct udf_inode_info *iinfo = UDF_I(inode);
74 int want_delete = 0;
75
76 truncate_inode_pages(&inode->i_data, 0);
101 77
78 if (!inode->i_nlink && !is_bad_inode(inode)) {
79 want_delete = 1;
80 inode->i_size = 0;
81 udf_truncate(inode);
82 lock_kernel();
83 udf_update_inode(inode, IS_SYNC(inode));
84 unlock_kernel();
85 }
86 invalidate_inode_buffers(inode);
87 end_writeback(inode);
102 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && 88 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
103 inode->i_size != iinfo->i_lenExtents) { 89 inode->i_size != iinfo->i_lenExtents) {
104 printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has " 90 printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has "
@@ -108,9 +94,13 @@ void udf_clear_inode(struct inode *inode)
108 (unsigned long long)inode->i_size, 94 (unsigned long long)inode->i_size,
109 (unsigned long long)iinfo->i_lenExtents); 95 (unsigned long long)iinfo->i_lenExtents);
110 } 96 }
111
112 kfree(iinfo->i_ext.i_data); 97 kfree(iinfo->i_ext.i_data);
113 iinfo->i_ext.i_data = NULL; 98 iinfo->i_ext.i_data = NULL;
99 if (want_delete) {
100 lock_kernel();
101 udf_free_inode(inode);
102 unlock_kernel();
103 }
114} 104}
115 105
116static int udf_writepage(struct page *page, struct writeback_control *wbc) 106static int udf_writepage(struct page *page, struct writeback_control *wbc)
@@ -127,9 +117,16 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
127 loff_t pos, unsigned len, unsigned flags, 117 loff_t pos, unsigned len, unsigned flags,
128 struct page **pagep, void **fsdata) 118 struct page **pagep, void **fsdata)
129{ 119{
130 *pagep = NULL; 120 int ret;
131 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 121
132 udf_get_block); 122 ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block);
123 if (unlikely(ret)) {
124 loff_t isize = mapping->host->i_size;
125 if (pos + len > isize)
126 vmtruncate(mapping->host, isize);
127 }
128
129 return ret;
133} 130}
134 131
135static sector_t udf_bmap(struct address_space *mapping, sector_t block) 132static sector_t udf_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 12bb651e5400..65412d84a45d 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -175,8 +175,7 @@ static const struct super_operations udf_sb_ops = {
175 .alloc_inode = udf_alloc_inode, 175 .alloc_inode = udf_alloc_inode,
176 .destroy_inode = udf_destroy_inode, 176 .destroy_inode = udf_destroy_inode,
177 .write_inode = udf_write_inode, 177 .write_inode = udf_write_inode,
178 .delete_inode = udf_delete_inode, 178 .evict_inode = udf_evict_inode,
179 .clear_inode = udf_clear_inode,
180 .put_super = udf_put_super, 179 .put_super = udf_put_super,
181 .sync_fs = udf_sync_fs, 180 .sync_fs = udf_sync_fs,
182 .statfs = udf_statfs, 181 .statfs = udf_statfs,
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 2bac0354891f..6995ab1f4305 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -139,8 +139,7 @@ extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
139extern struct buffer_head *udf_bread(struct inode *, int, int, int *); 139extern struct buffer_head *udf_bread(struct inode *, int, int, int *);
140extern void udf_truncate(struct inode *); 140extern void udf_truncate(struct inode *);
141extern void udf_read_inode(struct inode *); 141extern void udf_read_inode(struct inode *);
142extern void udf_delete_inode(struct inode *); 142extern void udf_evict_inode(struct inode *);
143extern void udf_clear_inode(struct inode *);
144extern int udf_write_inode(struct inode *, struct writeback_control *wbc); 143extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
145extern long udf_block_map(struct inode *, sector_t); 144extern long udf_block_map(struct inode *, sector_t);
146extern int udf_extend_file(struct inode *, struct extent_position *, 145extern int udf_extend_file(struct inode *, struct extent_position *,
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index ec784756dc65..dbc90994715a 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -95,8 +95,7 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
95 int err; 95 int err;
96 96
97 lock_page(page); 97 lock_page(page);
98 err = __ufs_write_begin(NULL, page->mapping, pos, len, 98 err = ufs_prepare_chunk(page, pos, len);
99 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
100 BUG_ON(err); 99 BUG_ON(err);
101 100
102 de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino); 101 de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino);
@@ -381,8 +380,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
381got_it: 380got_it:
382 pos = page_offset(page) + 381 pos = page_offset(page) +
383 (char*)de - (char*)page_address(page); 382 (char*)de - (char*)page_address(page);
384 err = __ufs_write_begin(NULL, page->mapping, pos, rec_len, 383 err = ufs_prepare_chunk(page, pos, rec_len);
385 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
386 if (err) 384 if (err)
387 goto out_unlock; 385 goto out_unlock;
388 if (de->d_ino) { 386 if (de->d_ino) {
@@ -518,7 +516,6 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
518 struct page * page) 516 struct page * page)
519{ 517{
520 struct super_block *sb = inode->i_sb; 518 struct super_block *sb = inode->i_sb;
521 struct address_space *mapping = page->mapping;
522 char *kaddr = page_address(page); 519 char *kaddr = page_address(page);
523 unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); 520 unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
524 unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen); 521 unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen);
@@ -549,8 +546,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
549 546
550 pos = page_offset(page) + from; 547 pos = page_offset(page) + from;
551 lock_page(page); 548 lock_page(page);
552 err = __ufs_write_begin(NULL, mapping, pos, to - from, 549 err = ufs_prepare_chunk(page, pos, to - from);
553 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
554 BUG_ON(err); 550 BUG_ON(err);
555 if (pde) 551 if (pde)
556 pde->d_reclen = cpu_to_fs16(sb, to - from); 552 pde->d_reclen = cpu_to_fs16(sb, to - from);
@@ -577,8 +573,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
577 if (!page) 573 if (!page)
578 return -ENOMEM; 574 return -ENOMEM;
579 575
580 err = __ufs_write_begin(NULL, mapping, 0, chunk_size, 576 err = ufs_prepare_chunk(page, 0, chunk_size);
581 AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
582 if (err) { 577 if (err) {
583 unlock_page(page); 578 unlock_page(page);
584 goto fail; 579 goto fail;
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 594480e537d2..428017e018fe 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -94,8 +94,6 @@ void ufs_free_inode (struct inode * inode)
94 94
95 is_directory = S_ISDIR(inode->i_mode); 95 is_directory = S_ISDIR(inode->i_mode);
96 96
97 clear_inode (inode);
98
99 if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit)) 97 if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit))
100 ufs_error(sb, "ufs_free_inode", "bit already cleared for inode %u", ino); 98 ufs_error(sb, "ufs_free_inode", "bit already cleared for inode %u", ino);
101 else { 99 else {
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 73fe773aa034..2b251f2093af 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -558,20 +558,26 @@ static int ufs_readpage(struct file *file, struct page *page)
558 return block_read_full_page(page,ufs_getfrag_block); 558 return block_read_full_page(page,ufs_getfrag_block);
559} 559}
560 560
561int __ufs_write_begin(struct file *file, struct address_space *mapping, 561int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len)
562 loff_t pos, unsigned len, unsigned flags,
563 struct page **pagep, void **fsdata)
564{ 562{
565 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 563 return __block_write_begin(page, pos, len, ufs_getfrag_block);
566 ufs_getfrag_block);
567} 564}
568 565
569static int ufs_write_begin(struct file *file, struct address_space *mapping, 566static int ufs_write_begin(struct file *file, struct address_space *mapping,
570 loff_t pos, unsigned len, unsigned flags, 567 loff_t pos, unsigned len, unsigned flags,
571 struct page **pagep, void **fsdata) 568 struct page **pagep, void **fsdata)
572{ 569{
573 *pagep = NULL; 570 int ret;
574 return __ufs_write_begin(file, mapping, pos, len, flags, pagep, fsdata); 571
572 ret = block_write_begin(mapping, pos, len, flags, pagep,
573 ufs_getfrag_block);
574 if (unlikely(ret)) {
575 loff_t isize = mapping->host->i_size;
576 if (pos + len > isize)
577 vmtruncate(mapping->host, isize);
578 }
579
580 return ret;
575} 581}
576 582
577static sector_t ufs_bmap(struct address_space *mapping, sector_t block) 583static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
@@ -905,24 +911,33 @@ int ufs_sync_inode (struct inode *inode)
905 return ufs_update_inode (inode, 1); 911 return ufs_update_inode (inode, 1);
906} 912}
907 913
908void ufs_delete_inode (struct inode * inode) 914void ufs_evict_inode(struct inode * inode)
909{ 915{
910 loff_t old_i_size; 916 int want_delete = 0;
917
918 if (!inode->i_nlink && !is_bad_inode(inode))
919 want_delete = 1;
911 920
912 truncate_inode_pages(&inode->i_data, 0); 921 truncate_inode_pages(&inode->i_data, 0);
913 if (is_bad_inode(inode)) 922 if (want_delete) {
914 goto no_delete; 923 loff_t old_i_size;
915 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ 924 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
916 lock_kernel(); 925 lock_kernel();
917 mark_inode_dirty(inode); 926 mark_inode_dirty(inode);
918 ufs_update_inode(inode, IS_SYNC(inode)); 927 ufs_update_inode(inode, IS_SYNC(inode));
919 old_i_size = inode->i_size; 928 old_i_size = inode->i_size;
920 inode->i_size = 0; 929 inode->i_size = 0;
921 if (inode->i_blocks && ufs_truncate(inode, old_i_size)) 930 if (inode->i_blocks && ufs_truncate(inode, old_i_size))
922 ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n"); 931 ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n");
923 ufs_free_inode (inode); 932 unlock_kernel();
924 unlock_kernel(); 933 }
925 return; 934
926no_delete: 935 invalidate_inode_buffers(inode);
927 clear_inode(inode); /* We must guarantee clearing of inode... */ 936 end_writeback(inode);
937
938 if (want_delete) {
939 lock_kernel();
940 ufs_free_inode (inode);
941 unlock_kernel();
942 }
928} 943}
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3ec5a9eb6efb..d510c1b91817 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1440,7 +1440,7 @@ static const struct super_operations ufs_super_ops = {
1440 .alloc_inode = ufs_alloc_inode, 1440 .alloc_inode = ufs_alloc_inode,
1441 .destroy_inode = ufs_destroy_inode, 1441 .destroy_inode = ufs_destroy_inode,
1442 .write_inode = ufs_write_inode, 1442 .write_inode = ufs_write_inode,
1443 .delete_inode = ufs_delete_inode, 1443 .evict_inode = ufs_evict_inode,
1444 .put_super = ufs_put_super, 1444 .put_super = ufs_put_super,
1445 .write_super = ufs_write_super, 1445 .write_super = ufs_write_super,
1446 .sync_fs = ufs_sync_fs, 1446 .sync_fs = ufs_sync_fs,
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 589e01a465ba..34d5cb135320 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -500,11 +500,6 @@ out:
500 return err; 500 return err;
501} 501}
502 502
503/*
504 * TODO:
505 * - truncate case should use proper ordering instead of using
506 * simple_setsize
507 */
508int ufs_setattr(struct dentry *dentry, struct iattr *attr) 503int ufs_setattr(struct dentry *dentry, struct iattr *attr)
509{ 504{
510 struct inode *inode = dentry->d_inode; 505 struct inode *inode = dentry->d_inode;
@@ -518,14 +513,17 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
518 if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { 513 if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
519 loff_t old_i_size = inode->i_size; 514 loff_t old_i_size = inode->i_size;
520 515
521 error = simple_setsize(inode, attr->ia_size); 516 /* XXX(truncate): truncate_setsize should be called last */
522 if (error) 517 truncate_setsize(inode, attr->ia_size);
523 return error; 518
524 error = ufs_truncate(inode, old_i_size); 519 error = ufs_truncate(inode, old_i_size);
525 if (error) 520 if (error)
526 return error; 521 return error;
527 } 522 }
528 return inode_setattr(inode, attr); 523
524 setattr_copy(inode, attr);
525 mark_inode_dirty(inode);
526 return 0;
529} 527}
530 528
531const struct inode_operations ufs_file_inode_operations = { 529const struct inode_operations ufs_file_inode_operations = {
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 179ae6b3180a..c08782e1b48a 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -108,7 +108,7 @@ extern struct inode * ufs_new_inode (struct inode *, int);
108extern struct inode *ufs_iget(struct super_block *, unsigned long); 108extern struct inode *ufs_iget(struct super_block *, unsigned long);
109extern int ufs_write_inode (struct inode *, struct writeback_control *); 109extern int ufs_write_inode (struct inode *, struct writeback_control *);
110extern int ufs_sync_inode (struct inode *); 110extern int ufs_sync_inode (struct inode *);
111extern void ufs_delete_inode (struct inode *); 111extern void ufs_evict_inode (struct inode *);
112extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *); 112extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
113extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create); 113extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create);
114 114
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 23ceed8c8fb9..0466036912f1 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -257,9 +257,7 @@ ufs_set_inode_gid(struct super_block *sb, struct ufs_inode *inode, u32 value)
257 257
258extern dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *); 258extern dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *);
259extern void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t); 259extern void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t);
260extern int __ufs_write_begin(struct file *file, struct address_space *mapping, 260extern int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len);
261 loff_t pos, unsigned len, unsigned flags,
262 struct page **pagep, void **fsdata);
263 261
264/* 262/*
265 * These functions manipulate ufs buffers 263 * These functions manipulate ufs buffers
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d24e78f32f3e..15412fe15c3a 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1478,22 +1478,38 @@ xfs_vm_direct_IO(
1478 if (rw & WRITE) { 1478 if (rw & WRITE) {
1479 iocb->private = xfs_alloc_ioend(inode, IO_NEW); 1479 iocb->private = xfs_alloc_ioend(inode, IO_NEW);
1480 1480
1481 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, 1481 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1482 offset, nr_segs, 1482 offset, nr_segs,
1483 xfs_get_blocks_direct, 1483 xfs_get_blocks_direct,
1484 xfs_end_io_direct_write); 1484 xfs_end_io_direct_write, NULL, 0);
1485 if (ret != -EIOCBQUEUED && iocb->private) 1485 if (ret != -EIOCBQUEUED && iocb->private)
1486 xfs_destroy_ioend(iocb->private); 1486 xfs_destroy_ioend(iocb->private);
1487 } else { 1487 } else {
1488 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov, 1488 ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
1489 offset, nr_segs, 1489 offset, nr_segs,
1490 xfs_get_blocks_direct, 1490 xfs_get_blocks_direct,
1491 NULL); 1491 NULL, NULL, 0);
1492 } 1492 }
1493 1493
1494 return ret; 1494 return ret;
1495} 1495}
1496 1496
1497STATIC void
1498xfs_vm_write_failed(
1499 struct address_space *mapping,
1500 loff_t to)
1501{
1502 struct inode *inode = mapping->host;
1503
1504 if (to > inode->i_size) {
1505 struct iattr ia = {
1506 .ia_valid = ATTR_SIZE | ATTR_FORCE,
1507 .ia_size = inode->i_size,
1508 };
1509 xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK);
1510 }
1511}
1512
1497STATIC int 1513STATIC int
1498xfs_vm_write_begin( 1514xfs_vm_write_begin(
1499 struct file *file, 1515 struct file *file,
@@ -1504,9 +1520,31 @@ xfs_vm_write_begin(
1504 struct page **pagep, 1520 struct page **pagep,
1505 void **fsdata) 1521 void **fsdata)
1506{ 1522{
1507 *pagep = NULL; 1523 int ret;
1508 return block_write_begin(file, mapping, pos, len, flags | AOP_FLAG_NOFS, 1524
1509 pagep, fsdata, xfs_get_blocks); 1525 ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
1526 pagep, xfs_get_blocks);
1527 if (unlikely(ret))
1528 xfs_vm_write_failed(mapping, pos + len);
1529 return ret;
1530}
1531
1532STATIC int
1533xfs_vm_write_end(
1534 struct file *file,
1535 struct address_space *mapping,
1536 loff_t pos,
1537 unsigned len,
1538 unsigned copied,
1539 struct page *page,
1540 void *fsdata)
1541{
1542 int ret;
1543
1544 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
1545 if (unlikely(ret < len))
1546 xfs_vm_write_failed(mapping, pos + len);
1547 return ret;
1510} 1548}
1511 1549
1512STATIC sector_t 1550STATIC sector_t
@@ -1551,7 +1589,7 @@ const struct address_space_operations xfs_address_space_operations = {
1551 .releasepage = xfs_vm_releasepage, 1589 .releasepage = xfs_vm_releasepage,
1552 .invalidatepage = xfs_vm_invalidatepage, 1590 .invalidatepage = xfs_vm_invalidatepage,
1553 .write_begin = xfs_vm_write_begin, 1591 .write_begin = xfs_vm_write_begin,
1554 .write_end = generic_write_end, 1592 .write_end = xfs_vm_write_end,
1555 .bmap = xfs_vm_bmap, 1593 .bmap = xfs_vm_bmap,
1556 .direct_IO = xfs_vm_direct_IO, 1594 .direct_IO = xfs_vm_direct_IO,
1557 .migratepage = buffer_migrate_page, 1595 .migratepage = buffer_migrate_page,
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 536b81e63a3d..68be25dcd301 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -80,7 +80,7 @@ xfs_mark_inode_dirty_sync(
80{ 80{
81 struct inode *inode = VFS_I(ip); 81 struct inode *inode = VFS_I(ip);
82 82
83 if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) 83 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
84 mark_inode_dirty_sync(inode); 84 mark_inode_dirty_sync(inode);
85} 85}
86 86
@@ -90,7 +90,7 @@ xfs_mark_inode_dirty(
90{ 90{
91 struct inode *inode = VFS_I(ip); 91 struct inode *inode = VFS_I(ip);
92 92
93 if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) 93 if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
94 mark_inode_dirty(inode); 94 mark_inode_dirty(inode);
95} 95}
96 96
@@ -540,21 +540,6 @@ xfs_vn_setattr(
540 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); 540 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
541} 541}
542 542
543/*
544 * block_truncate_page can return an error, but we can't propagate it
545 * at all here. Leave a complaint + stack trace in the syslog because
546 * this could be bad. If it is bad, we need to propagate the error further.
547 */
548STATIC void
549xfs_vn_truncate(
550 struct inode *inode)
551{
552 int error;
553 error = block_truncate_page(inode->i_mapping, inode->i_size,
554 xfs_get_blocks);
555 WARN_ON(error);
556}
557
558STATIC long 543STATIC long
559xfs_vn_fallocate( 544xfs_vn_fallocate(
560 struct inode *inode, 545 struct inode *inode,
@@ -694,7 +679,6 @@ xfs_vn_fiemap(
694 679
695static const struct inode_operations xfs_inode_operations = { 680static const struct inode_operations xfs_inode_operations = {
696 .check_acl = xfs_check_acl, 681 .check_acl = xfs_check_acl,
697 .truncate = xfs_vn_truncate,
698 .getattr = xfs_vn_getattr, 682 .getattr = xfs_vn_getattr,
699 .setattr = xfs_vn_setattr, 683 .setattr = xfs_vn_setattr,
700 .setxattr = generic_setxattr, 684 .setxattr = generic_setxattr,
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 998a9d7fb9c8..2fa0bd9ebc7f 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -156,8 +156,6 @@
156 */ 156 */
157#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) 157#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL)
158#define xfs_stack_trace() dump_stack() 158#define xfs_stack_trace() dump_stack()
159#define xfs_itruncate_data(ip, off) \
160 (-vmtruncate(VFS_I(ip), (off)))
161 159
162 160
163/* Move the kernel do_div definition off to one side */ 161/* Move the kernel do_div definition off to one side */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 758df94690ed..15c35b62ff14 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1100,13 +1100,15 @@ xfs_fs_write_inode(
1100} 1100}
1101 1101
1102STATIC void 1102STATIC void
1103xfs_fs_clear_inode( 1103xfs_fs_evict_inode(
1104 struct inode *inode) 1104 struct inode *inode)
1105{ 1105{
1106 xfs_inode_t *ip = XFS_I(inode); 1106 xfs_inode_t *ip = XFS_I(inode);
1107 1107
1108 trace_xfs_clear_inode(ip); 1108 trace_xfs_evict_inode(ip);
1109 1109
1110 truncate_inode_pages(&inode->i_data, 0);
1111 end_writeback(inode);
1110 XFS_STATS_INC(vn_rele); 1112 XFS_STATS_INC(vn_rele);
1111 XFS_STATS_INC(vn_remove); 1113 XFS_STATS_INC(vn_remove);
1112 XFS_STATS_DEC(vn_active); 1114 XFS_STATS_DEC(vn_active);
@@ -1622,7 +1624,7 @@ static const struct super_operations xfs_super_operations = {
1622 .destroy_inode = xfs_fs_destroy_inode, 1624 .destroy_inode = xfs_fs_destroy_inode,
1623 .dirty_inode = xfs_fs_dirty_inode, 1625 .dirty_inode = xfs_fs_dirty_inode,
1624 .write_inode = xfs_fs_write_inode, 1626 .write_inode = xfs_fs_write_inode,
1625 .clear_inode = xfs_fs_clear_inode, 1627 .evict_inode = xfs_fs_evict_inode,
1626 .put_super = xfs_fs_put_super, 1628 .put_super = xfs_fs_put_super,
1627 .sync_fs = xfs_fs_sync_fs, 1629 .sync_fs = xfs_fs_sync_fs,
1628 .freeze_fs = xfs_fs_freeze, 1630 .freeze_fs = xfs_fs_freeze,
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index c657cdca2cd2..be5dffd282a1 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -581,7 +581,7 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr);
581DEFINE_INODE_EVENT(xfs_file_fsync); 581DEFINE_INODE_EVENT(xfs_file_fsync);
582DEFINE_INODE_EVENT(xfs_destroy_inode); 582DEFINE_INODE_EVENT(xfs_destroy_inode);
583DEFINE_INODE_EVENT(xfs_write_inode); 583DEFINE_INODE_EVENT(xfs_write_inode);
584DEFINE_INODE_EVENT(xfs_clear_inode); 584DEFINE_INODE_EVENT(xfs_evict_inode);
585 585
586DEFINE_INODE_EVENT(xfs_dquot_dqalloc); 586DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
587DEFINE_INODE_EVENT(xfs_dquot_dqdetach); 587DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 3ac137dd531b..66d585c6917c 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -221,8 +221,11 @@ xfs_setattr(
221 * transaction to modify the i_size. 221 * transaction to modify the i_size.
222 */ 222 */
223 code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); 223 code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
224 if (code)
225 goto error_return;
224 } 226 }
225 xfs_iunlock(ip, XFS_ILOCK_EXCL); 227 xfs_iunlock(ip, XFS_ILOCK_EXCL);
228 lock_flags &= ~XFS_ILOCK_EXCL;
226 229
227 /* 230 /*
228 * We are going to log the inode size change in this 231 * We are going to log the inode size change in this
@@ -236,36 +239,35 @@ xfs_setattr(
236 * really care about here and prevents waiting for other data 239 * really care about here and prevents waiting for other data
237 * not within the range we care about here. 240 * not within the range we care about here.
238 */ 241 */
239 if (!code && 242 if (ip->i_size != ip->i_d.di_size &&
240 ip->i_size != ip->i_d.di_size &&
241 iattr->ia_size > ip->i_d.di_size) { 243 iattr->ia_size > ip->i_d.di_size) {
242 code = xfs_flush_pages(ip, 244 code = xfs_flush_pages(ip,
243 ip->i_d.di_size, iattr->ia_size, 245 ip->i_d.di_size, iattr->ia_size,
244 XBF_ASYNC, FI_NONE); 246 XBF_ASYNC, FI_NONE);
247 if (code)
248 goto error_return;
245 } 249 }
246 250
247 /* wait for all I/O to complete */ 251 /* wait for all I/O to complete */
248 xfs_ioend_wait(ip); 252 xfs_ioend_wait(ip);
249 253
250 if (!code) 254 code = -block_truncate_page(inode->i_mapping, iattr->ia_size,
251 code = xfs_itruncate_data(ip, iattr->ia_size); 255 xfs_get_blocks);
252 if (code) { 256 if (code)
253 ASSERT(tp == NULL);
254 lock_flags &= ~XFS_ILOCK_EXCL;
255 ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock);
256 goto error_return; 257 goto error_return;
257 } 258
258 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); 259 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
259 if ((code = xfs_trans_reserve(tp, 0, 260 code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
260 XFS_ITRUNCATE_LOG_RES(mp), 0, 261 XFS_TRANS_PERM_LOG_RES,
261 XFS_TRANS_PERM_LOG_RES, 262 XFS_ITRUNCATE_LOG_COUNT);
262 XFS_ITRUNCATE_LOG_COUNT))) { 263 if (code)
263 xfs_trans_cancel(tp, 0); 264 goto error_return;
264 if (need_iolock) 265
265 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 266 truncate_setsize(inode, iattr->ia_size);
266 return code; 267
267 }
268 commit_flags = XFS_TRANS_RELEASE_LOG_RES; 268 commit_flags = XFS_TRANS_RELEASE_LOG_RES;
269 lock_flags |= XFS_ILOCK_EXCL;
270
269 xfs_ilock(ip, XFS_ILOCK_EXCL); 271 xfs_ilock(ip, XFS_ILOCK_EXCL);
270 272
271 xfs_trans_ijoin(tp, ip); 273 xfs_trans_ijoin(tp, ip);