aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c3
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/file.c3
-rw-r--r--fs/btrfs/inode.c6
-rw-r--r--fs/btrfs/ioctl.c15
-rw-r--r--fs/btrfs/transaction.c7
-rw-r--r--fs/buffer.c28
-rw-r--r--fs/ceph/addr.c3
-rw-r--r--fs/ecryptfs/inode.c30
-rw-r--r--fs/exec.c19
-rw-r--r--fs/ext2/inode.c5
-rw-r--r--fs/ext2/super.c33
-rw-r--r--fs/ext4/inode.c15
-rw-r--r--fs/ext4/mmp.c6
-rw-r--r--fs/ext4/super.c31
-rw-r--r--fs/fat/file.c15
-rw-r--r--fs/file_table.c4
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/gfs2/file.c18
-rw-r--r--fs/gfs2/trans.c4
-rw-r--r--fs/inode.c12
-rw-r--r--fs/internal.h4
-rw-r--r--fs/lockd/clntproc.c14
-rw-r--r--fs/lockd/svc4proc.c1
-rw-r--r--fs/lockd/svclock.c1
-rw-r--r--fs/lockd/svcproc.c1
-rw-r--r--fs/namei.c313
-rw-r--r--fs/namespace.c97
-rw-r--r--fs/nfsd/nfs4recover.c9
-rw-r--r--fs/nfsd/nfsfh.c1
-rw-r--r--fs/nfsd/nfsproc.c9
-rw-r--r--fs/nfsd/vfs.c79
-rw-r--r--fs/nfsd/vfs.h11
-rw-r--r--fs/nilfs2/file.c18
-rw-r--r--fs/nilfs2/ioctl.c2
-rw-r--r--fs/nilfs2/segment.c5
-rw-r--r--fs/ntfs/file.c3
-rw-r--r--fs/ocfs2/file.c11
-rw-r--r--fs/ocfs2/ioctl.c14
-rw-r--r--fs/ocfs2/journal.c7
-rw-r--r--fs/ocfs2/mmap.c2
-rw-r--r--fs/ocfs2/refcounttree.c11
-rw-r--r--fs/open.c15
-rw-r--r--fs/pipe.c75
-rw-r--r--fs/splice.c3
-rw-r--r--fs/super.c252
-rw-r--r--fs/sysfs/bin.c2
-rw-r--r--fs/xfs/xfs_aops.c18
-rw-r--r--fs/xfs/xfs_file.c10
-rw-r--r--fs/xfs/xfs_ioctl.c55
-rw-r--r--fs/xfs/xfs_ioctl32.c12
-rw-r--r--fs/xfs/xfs_iomap.c4
-rw-r--r--fs/xfs/xfs_mount.c2
-rw-r--r--fs/xfs/xfs_mount.h3
-rw-r--r--fs/xfs/xfs_sync.c2
-rw-r--r--fs/xfs/xfs_trans.c17
-rw-r--r--fs/xfs/xfs_trans.h2
57 files changed, 937 insertions, 412 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index fc06fd27065e..dd6f7ee1e312 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -610,6 +610,9 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
610 p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n", 610 p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n",
611 page, (unsigned long)filp->private_data); 611 page, (unsigned long)filp->private_data);
612 612
613 /* Update file times before taking page lock */
614 file_update_time(filp);
615
613 v9inode = V9FS_I(inode); 616 v9inode = V9FS_I(inode);
614 /* make sure the cache has finished storing the page */ 617 /* make sure the cache has finished storing the page */
615 v9fs_fscache_wait_on_page_write(inode, page); 618 v9fs_fscache_wait_on_page_write(inode, page);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index fadeba6a5db9..62e0cafd6e25 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1614,8 +1614,6 @@ static int cleaner_kthread(void *arg)
1614 struct btrfs_root *root = arg; 1614 struct btrfs_root *root = arg;
1615 1615
1616 do { 1616 do {
1617 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1618
1619 if (!(root->fs_info->sb->s_flags & MS_RDONLY) && 1617 if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
1620 mutex_trylock(&root->fs_info->cleaner_mutex)) { 1618 mutex_trylock(&root->fs_info->cleaner_mutex)) {
1621 btrfs_run_delayed_iputs(root); 1619 btrfs_run_delayed_iputs(root);
@@ -1647,7 +1645,6 @@ static int transaction_kthread(void *arg)
1647 do { 1645 do {
1648 cannot_commit = false; 1646 cannot_commit = false;
1649 delay = HZ * 30; 1647 delay = HZ * 30;
1650 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1651 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1648 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1652 1649
1653 spin_lock(&root->fs_info->trans_lock); 1650 spin_lock(&root->fs_info->trans_lock);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9aa01ec2138d..5caf285c6e4d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1379,7 +1379,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1379 ssize_t err = 0; 1379 ssize_t err = 0;
1380 size_t count, ocount; 1380 size_t count, ocount;
1381 1381
1382 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 1382 sb_start_write(inode->i_sb);
1383 1383
1384 mutex_lock(&inode->i_mutex); 1384 mutex_lock(&inode->i_mutex);
1385 1385
@@ -1469,6 +1469,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1469 num_written = err; 1469 num_written = err;
1470 } 1470 }
1471out: 1471out:
1472 sb_end_write(inode->i_sb);
1472 current->backing_dev_info = NULL; 1473 current->backing_dev_info = NULL;
1473 return num_written ? num_written : err; 1474 return num_written ? num_written : err;
1474} 1475}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 48bdfd2591c2..83baec24946d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6629,6 +6629,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
6629 u64 page_start; 6629 u64 page_start;
6630 u64 page_end; 6630 u64 page_end;
6631 6631
6632 sb_start_pagefault(inode->i_sb);
6632 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 6633 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
6633 if (!ret) { 6634 if (!ret) {
6634 ret = file_update_time(vma->vm_file); 6635 ret = file_update_time(vma->vm_file);
@@ -6718,12 +6719,15 @@ again:
6718 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); 6719 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
6719 6720
6720out_unlock: 6721out_unlock:
6721 if (!ret) 6722 if (!ret) {
6723 sb_end_pagefault(inode->i_sb);
6722 return VM_FAULT_LOCKED; 6724 return VM_FAULT_LOCKED;
6725 }
6723 unlock_page(page); 6726 unlock_page(page);
6724out: 6727out:
6725 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); 6728 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
6726out_noreserve: 6729out_noreserve:
6730 sb_end_pagefault(inode->i_sb);
6727 return ret; 6731 return ret;
6728} 6732}
6729 6733
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 43f0012016e3..bc2f6ffff3cf 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -195,6 +195,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
195 if (!inode_owner_or_capable(inode)) 195 if (!inode_owner_or_capable(inode))
196 return -EACCES; 196 return -EACCES;
197 197
198 ret = mnt_want_write_file(file);
199 if (ret)
200 return ret;
201
198 mutex_lock(&inode->i_mutex); 202 mutex_lock(&inode->i_mutex);
199 203
200 ip_oldflags = ip->flags; 204 ip_oldflags = ip->flags;
@@ -209,10 +213,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
209 } 213 }
210 } 214 }
211 215
212 ret = mnt_want_write_file(file);
213 if (ret)
214 goto out_unlock;
215
216 if (flags & FS_SYNC_FL) 216 if (flags & FS_SYNC_FL)
217 ip->flags |= BTRFS_INODE_SYNC; 217 ip->flags |= BTRFS_INODE_SYNC;
218 else 218 else
@@ -275,9 +275,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
275 inode->i_flags = i_oldflags; 275 inode->i_flags = i_oldflags;
276 } 276 }
277 277
278 mnt_drop_write_file(file);
279 out_unlock: 278 out_unlock:
280 mutex_unlock(&inode->i_mutex); 279 mutex_unlock(&inode->i_mutex);
280 mnt_drop_write_file(file);
281 return ret; 281 return ret;
282} 282}
283 283
@@ -664,6 +664,10 @@ static noinline int btrfs_mksubvol(struct path *parent,
664 struct dentry *dentry; 664 struct dentry *dentry;
665 int error; 665 int error;
666 666
667 error = mnt_want_write(parent->mnt);
668 if (error)
669 return error;
670
667 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); 671 mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
668 672
669 dentry = lookup_one_len(name, parent->dentry, namelen); 673 dentry = lookup_one_len(name, parent->dentry, namelen);
@@ -699,6 +703,7 @@ out_dput:
699 dput(dentry); 703 dput(dentry);
700out_unlock: 704out_unlock:
701 mutex_unlock(&dir->i_mutex); 705 mutex_unlock(&dir->i_mutex);
706 mnt_drop_write(parent->mnt);
702 return error; 707 return error;
703} 708}
704 709
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 7ac7cdcc294e..17be3dedacba 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -335,6 +335,8 @@ again:
335 if (!h) 335 if (!h)
336 return ERR_PTR(-ENOMEM); 336 return ERR_PTR(-ENOMEM);
337 337
338 sb_start_intwrite(root->fs_info->sb);
339
338 if (may_wait_transaction(root, type)) 340 if (may_wait_transaction(root, type))
339 wait_current_trans(root); 341 wait_current_trans(root);
340 342
@@ -345,6 +347,7 @@ again:
345 } while (ret == -EBUSY); 347 } while (ret == -EBUSY);
346 348
347 if (ret < 0) { 349 if (ret < 0) {
350 sb_end_intwrite(root->fs_info->sb);
348 kmem_cache_free(btrfs_trans_handle_cachep, h); 351 kmem_cache_free(btrfs_trans_handle_cachep, h);
349 return ERR_PTR(ret); 352 return ERR_PTR(ret);
350 } 353 }
@@ -548,6 +551,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
548 btrfs_trans_release_metadata(trans, root); 551 btrfs_trans_release_metadata(trans, root);
549 trans->block_rsv = NULL; 552 trans->block_rsv = NULL;
550 553
554 sb_end_intwrite(root->fs_info->sb);
555
551 if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && 556 if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
552 should_end_transaction(trans, root)) { 557 should_end_transaction(trans, root)) {
553 trans->transaction->blocked = 1; 558 trans->transaction->blocked = 1;
@@ -1578,6 +1583,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1578 put_transaction(cur_trans); 1583 put_transaction(cur_trans);
1579 put_transaction(cur_trans); 1584 put_transaction(cur_trans);
1580 1585
1586 sb_end_intwrite(root->fs_info->sb);
1587
1581 trace_btrfs_transaction_commit(root); 1588 trace_btrfs_transaction_commit(root);
1582 1589
1583 btrfs_scrub_continue(root); 1590 btrfs_scrub_continue(root);
diff --git a/fs/buffer.c b/fs/buffer.c
index c7062c896d7c..9f6d2e41281d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2306,8 +2306,8 @@ EXPORT_SYMBOL(block_commit_write);
2306 * beyond EOF, then the page is guaranteed safe against truncation until we 2306 * beyond EOF, then the page is guaranteed safe against truncation until we
2307 * unlock the page. 2307 * unlock the page.
2308 * 2308 *
2309 * Direct callers of this function should call vfs_check_frozen() so that page 2309 * Direct callers of this function should protect against filesystem freezing
2310 * fault does not busyloop until the fs is thawed. 2310 * using sb_start_write() - sb_end_write() functions.
2311 */ 2311 */
2312int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, 2312int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2313 get_block_t get_block) 2313 get_block_t get_block)
@@ -2318,6 +2318,12 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2318 loff_t size; 2318 loff_t size;
2319 int ret; 2319 int ret;
2320 2320
2321 /*
2322 * Update file times before taking page lock. We may end up failing the
2323 * fault so this update may be superfluous but who really cares...
2324 */
2325 file_update_time(vma->vm_file);
2326
2321 lock_page(page); 2327 lock_page(page);
2322 size = i_size_read(inode); 2328 size = i_size_read(inode);
2323 if ((page->mapping != inode->i_mapping) || 2329 if ((page->mapping != inode->i_mapping) ||
@@ -2339,18 +2345,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2339 2345
2340 if (unlikely(ret < 0)) 2346 if (unlikely(ret < 0))
2341 goto out_unlock; 2347 goto out_unlock;
2342 /*
2343 * Freezing in progress? We check after the page is marked dirty and
2344 * with page lock held so if the test here fails, we are sure freezing
2345 * code will wait during syncing until the page fault is done - at that
2346 * point page will be dirty and unlocked so freezing code will write it
2347 * and writeprotect it again.
2348 */
2349 set_page_dirty(page); 2348 set_page_dirty(page);
2350 if (inode->i_sb->s_frozen != SB_UNFROZEN) {
2351 ret = -EAGAIN;
2352 goto out_unlock;
2353 }
2354 wait_on_page_writeback(page); 2349 wait_on_page_writeback(page);
2355 return 0; 2350 return 0;
2356out_unlock: 2351out_unlock:
@@ -2365,12 +2360,9 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2365 int ret; 2360 int ret;
2366 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; 2361 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
2367 2362
2368 /* 2363 sb_start_pagefault(sb);
2369 * This check is racy but catches the common case. The check in
2370 * __block_page_mkwrite() is reliable.
2371 */
2372 vfs_check_frozen(sb, SB_FREEZE_WRITE);
2373 ret = __block_page_mkwrite(vma, vmf, get_block); 2364 ret = __block_page_mkwrite(vma, vmf, get_block);
2365 sb_end_pagefault(sb);
2374 return block_page_mkwrite_return(ret); 2366 return block_page_mkwrite_return(ret);
2375} 2367}
2376EXPORT_SYMBOL(block_page_mkwrite); 2368EXPORT_SYMBOL(block_page_mkwrite);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8b67304e4b80..452e71a1b753 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1184,6 +1184,9 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1184 loff_t size, len; 1184 loff_t size, len;
1185 int ret; 1185 int ret;
1186 1186
1187 /* Update time before taking page lock */
1188 file_update_time(vma->vm_file);
1189
1187 size = i_size_read(inode); 1190 size = i_size_read(inode);
1188 if (off + PAGE_CACHE_SIZE <= size) 1191 if (off + PAGE_CACHE_SIZE <= size)
1189 len = PAGE_CACHE_SIZE; 1192 len = PAGE_CACHE_SIZE;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index ffa2be57804d..c3ca12c33ca2 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -318,21 +318,20 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
318 struct vfsmount *lower_mnt; 318 struct vfsmount *lower_mnt;
319 int rc = 0; 319 int rc = 0;
320 320
321 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
322 fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode);
323 BUG_ON(!lower_dentry->d_count);
324
325 dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); 321 dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL);
326 ecryptfs_set_dentry_private(dentry, dentry_info);
327 if (!dentry_info) { 322 if (!dentry_info) {
328 printk(KERN_ERR "%s: Out of memory whilst attempting " 323 printk(KERN_ERR "%s: Out of memory whilst attempting "
329 "to allocate ecryptfs_dentry_info struct\n", 324 "to allocate ecryptfs_dentry_info struct\n",
330 __func__); 325 __func__);
331 dput(lower_dentry); 326 dput(lower_dentry);
332 mntput(lower_mnt);
333 d_drop(dentry);
334 return -ENOMEM; 327 return -ENOMEM;
335 } 328 }
329
330 lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
331 fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode);
332 BUG_ON(!lower_dentry->d_count);
333
334 ecryptfs_set_dentry_private(dentry, dentry_info);
336 ecryptfs_set_dentry_lower(dentry, lower_dentry); 335 ecryptfs_set_dentry_lower(dentry, lower_dentry);
337 ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); 336 ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt);
338 337
@@ -381,12 +380,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
381 struct dentry *lower_dir_dentry, *lower_dentry; 380 struct dentry *lower_dir_dentry, *lower_dentry;
382 int rc = 0; 381 int rc = 0;
383 382
384 if ((ecryptfs_dentry->d_name.len == 1
385 && !strcmp(ecryptfs_dentry->d_name.name, "."))
386 || (ecryptfs_dentry->d_name.len == 2
387 && !strcmp(ecryptfs_dentry->d_name.name, ".."))) {
388 goto out_d_drop;
389 }
390 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); 383 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
391 mutex_lock(&lower_dir_dentry->d_inode->i_mutex); 384 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
392 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, 385 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
@@ -397,8 +390,8 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
397 rc = PTR_ERR(lower_dentry); 390 rc = PTR_ERR(lower_dentry);
398 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " 391 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
399 "[%d] on lower_dentry = [%s]\n", __func__, rc, 392 "[%d] on lower_dentry = [%s]\n", __func__, rc,
400 encrypted_and_encoded_name); 393 ecryptfs_dentry->d_name.name);
401 goto out_d_drop; 394 goto out;
402 } 395 }
403 if (lower_dentry->d_inode) 396 if (lower_dentry->d_inode)
404 goto interpose; 397 goto interpose;
@@ -415,7 +408,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
415 if (rc) { 408 if (rc) {
416 printk(KERN_ERR "%s: Error attempting to encrypt and encode " 409 printk(KERN_ERR "%s: Error attempting to encrypt and encode "
417 "filename; rc = [%d]\n", __func__, rc); 410 "filename; rc = [%d]\n", __func__, rc);
418 goto out_d_drop; 411 goto out;
419 } 412 }
420 mutex_lock(&lower_dir_dentry->d_inode->i_mutex); 413 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
421 lower_dentry = lookup_one_len(encrypted_and_encoded_name, 414 lower_dentry = lookup_one_len(encrypted_and_encoded_name,
@@ -427,14 +420,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
427 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " 420 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
428 "[%d] on lower_dentry = [%s]\n", __func__, rc, 421 "[%d] on lower_dentry = [%s]\n", __func__, rc,
429 encrypted_and_encoded_name); 422 encrypted_and_encoded_name);
430 goto out_d_drop; 423 goto out;
431 } 424 }
432interpose: 425interpose:
433 rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry, 426 rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry,
434 ecryptfs_dir_inode); 427 ecryptfs_dir_inode);
435 goto out;
436out_d_drop:
437 d_drop(ecryptfs_dentry);
438out: 428out:
439 kfree(encrypted_and_encoded_name); 429 kfree(encrypted_and_encoded_name);
440 return ERR_PTR(rc); 430 return ERR_PTR(rc);
diff --git a/fs/exec.c b/fs/exec.c
index 3684353ebd5f..574cf4de4ec3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2069,25 +2069,18 @@ static void wait_for_dump_helpers(struct file *file)
2069 */ 2069 */
2070static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) 2070static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
2071{ 2071{
2072 struct file *rp, *wp; 2072 struct file *files[2];
2073 struct fdtable *fdt; 2073 struct fdtable *fdt;
2074 struct coredump_params *cp = (struct coredump_params *)info->data; 2074 struct coredump_params *cp = (struct coredump_params *)info->data;
2075 struct files_struct *cf = current->files; 2075 struct files_struct *cf = current->files;
2076 int err = create_pipe_files(files, 0);
2077 if (err)
2078 return err;
2076 2079
2077 wp = create_write_pipe(0); 2080 cp->file = files[1];
2078 if (IS_ERR(wp))
2079 return PTR_ERR(wp);
2080
2081 rp = create_read_pipe(wp, 0);
2082 if (IS_ERR(rp)) {
2083 free_write_pipe(wp);
2084 return PTR_ERR(rp);
2085 }
2086
2087 cp->file = wp;
2088 2081
2089 sys_close(0); 2082 sys_close(0);
2090 fd_install(0, rp); 2083 fd_install(0, files[0]);
2091 spin_lock(&cf->file_lock); 2084 spin_lock(&cf->file_lock);
2092 fdt = files_fdtable(cf); 2085 fdt = files_fdtable(cf);
2093 __set_open_fd(0, fdt); 2086 __set_open_fd(0, fdt);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 264d315f6c47..6363ac66fafa 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -79,6 +79,7 @@ void ext2_evict_inode(struct inode * inode)
79 truncate_inode_pages(&inode->i_data, 0); 79 truncate_inode_pages(&inode->i_data, 0);
80 80
81 if (want_delete) { 81 if (want_delete) {
82 sb_start_intwrite(inode->i_sb);
82 /* set dtime */ 83 /* set dtime */
83 EXT2_I(inode)->i_dtime = get_seconds(); 84 EXT2_I(inode)->i_dtime = get_seconds();
84 mark_inode_dirty(inode); 85 mark_inode_dirty(inode);
@@ -98,8 +99,10 @@ void ext2_evict_inode(struct inode * inode)
98 if (unlikely(rsv)) 99 if (unlikely(rsv))
99 kfree(rsv); 100 kfree(rsv);
100 101
101 if (want_delete) 102 if (want_delete) {
102 ext2_free_inode(inode); 103 ext2_free_inode(inode);
104 sb_end_intwrite(inode->i_sb);
105 }
103} 106}
104 107
105typedef struct { 108typedef struct {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9f311d27b16f..af74d9e27b71 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -42,6 +42,8 @@ static void ext2_sync_super(struct super_block *sb,
42static int ext2_remount (struct super_block * sb, int * flags, char * data); 42static int ext2_remount (struct super_block * sb, int * flags, char * data);
43static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); 43static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
44static int ext2_sync_fs(struct super_block *sb, int wait); 44static int ext2_sync_fs(struct super_block *sb, int wait);
45static int ext2_freeze(struct super_block *sb);
46static int ext2_unfreeze(struct super_block *sb);
45 47
46void ext2_error(struct super_block *sb, const char *function, 48void ext2_error(struct super_block *sb, const char *function,
47 const char *fmt, ...) 49 const char *fmt, ...)
@@ -305,6 +307,8 @@ static const struct super_operations ext2_sops = {
305 .evict_inode = ext2_evict_inode, 307 .evict_inode = ext2_evict_inode,
306 .put_super = ext2_put_super, 308 .put_super = ext2_put_super,
307 .sync_fs = ext2_sync_fs, 309 .sync_fs = ext2_sync_fs,
310 .freeze_fs = ext2_freeze,
311 .unfreeze_fs = ext2_unfreeze,
308 .statfs = ext2_statfs, 312 .statfs = ext2_statfs,
309 .remount_fs = ext2_remount, 313 .remount_fs = ext2_remount,
310 .show_options = ext2_show_options, 314 .show_options = ext2_show_options,
@@ -1200,6 +1204,35 @@ static int ext2_sync_fs(struct super_block *sb, int wait)
1200 return 0; 1204 return 0;
1201} 1205}
1202 1206
1207static int ext2_freeze(struct super_block *sb)
1208{
1209 struct ext2_sb_info *sbi = EXT2_SB(sb);
1210
1211 /*
1212 * Open but unlinked files present? Keep EXT2_VALID_FS flag cleared
1213 * because we have unattached inodes and thus filesystem is not fully
1214 * consistent.
1215 */
1216 if (atomic_long_read(&sb->s_remove_count)) {
1217 ext2_sync_fs(sb, 1);
1218 return 0;
1219 }
1220 /* Set EXT2_FS_VALID flag */
1221 spin_lock(&sbi->s_lock);
1222 sbi->s_es->s_state = cpu_to_le16(sbi->s_mount_state);
1223 spin_unlock(&sbi->s_lock);
1224 ext2_sync_super(sb, sbi->s_es, 1);
1225
1226 return 0;
1227}
1228
1229static int ext2_unfreeze(struct super_block *sb)
1230{
1231 /* Just write sb to clear EXT2_VALID_FS flag */
1232 ext2_write_super(sb);
1233
1234 return 0;
1235}
1203 1236
1204void ext2_write_super(struct super_block *sb) 1237void ext2_write_super(struct super_block *sb)
1205{ 1238{
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 89b59cb7f9b8..6324f74e0342 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -233,6 +233,11 @@ void ext4_evict_inode(struct inode *inode)
233 if (is_bad_inode(inode)) 233 if (is_bad_inode(inode))
234 goto no_delete; 234 goto no_delete;
235 235
236 /*
237 * Protect us against freezing - iput() caller didn't have to have any
238 * protection against it
239 */
240 sb_start_intwrite(inode->i_sb);
236 handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3); 241 handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3);
237 if (IS_ERR(handle)) { 242 if (IS_ERR(handle)) {
238 ext4_std_error(inode->i_sb, PTR_ERR(handle)); 243 ext4_std_error(inode->i_sb, PTR_ERR(handle));
@@ -242,6 +247,7 @@ void ext4_evict_inode(struct inode *inode)
242 * cleaned up. 247 * cleaned up.
243 */ 248 */
244 ext4_orphan_del(NULL, inode); 249 ext4_orphan_del(NULL, inode);
250 sb_end_intwrite(inode->i_sb);
245 goto no_delete; 251 goto no_delete;
246 } 252 }
247 253
@@ -273,6 +279,7 @@ void ext4_evict_inode(struct inode *inode)
273 stop_handle: 279 stop_handle:
274 ext4_journal_stop(handle); 280 ext4_journal_stop(handle);
275 ext4_orphan_del(NULL, inode); 281 ext4_orphan_del(NULL, inode);
282 sb_end_intwrite(inode->i_sb);
276 goto no_delete; 283 goto no_delete;
277 } 284 }
278 } 285 }
@@ -301,6 +308,7 @@ void ext4_evict_inode(struct inode *inode)
301 else 308 else
302 ext4_free_inode(handle, inode); 309 ext4_free_inode(handle, inode);
303 ext4_journal_stop(handle); 310 ext4_journal_stop(handle);
311 sb_end_intwrite(inode->i_sb);
304 return; 312 return;
305no_delete: 313no_delete:
306 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ 314 ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
@@ -4779,11 +4787,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4779 get_block_t *get_block; 4787 get_block_t *get_block;
4780 int retries = 0; 4788 int retries = 0;
4781 4789
4782 /* 4790 sb_start_pagefault(inode->i_sb);
4783 * This check is racy but catches the common case. We rely on
4784 * __block_page_mkwrite() to do a reliable check.
4785 */
4786 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
4787 /* Delalloc case is easy... */ 4791 /* Delalloc case is easy... */
4788 if (test_opt(inode->i_sb, DELALLOC) && 4792 if (test_opt(inode->i_sb, DELALLOC) &&
4789 !ext4_should_journal_data(inode) && 4793 !ext4_should_journal_data(inode) &&
@@ -4851,5 +4855,6 @@ retry_alloc:
4851out_ret: 4855out_ret:
4852 ret = block_page_mkwrite_return(ret); 4856 ret = block_page_mkwrite_return(ret);
4853out: 4857out:
4858 sb_end_pagefault(inode->i_sb);
4854 return ret; 4859 return ret;
4855} 4860}
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index f99a1311e847..fe7c63f4717e 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -44,6 +44,11 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
44{ 44{
45 struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data); 45 struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
46 46
47 /*
48 * We protect against freezing so that we don't create dirty buffers
49 * on frozen filesystem.
50 */
51 sb_start_write(sb);
47 ext4_mmp_csum_set(sb, mmp); 52 ext4_mmp_csum_set(sb, mmp);
48 mark_buffer_dirty(bh); 53 mark_buffer_dirty(bh);
49 lock_buffer(bh); 54 lock_buffer(bh);
@@ -51,6 +56,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
51 get_bh(bh); 56 get_bh(bh);
52 submit_bh(WRITE_SYNC, bh); 57 submit_bh(WRITE_SYNC, bh);
53 wait_on_buffer(bh); 58 wait_on_buffer(bh);
59 sb_end_write(sb);
54 if (unlikely(!buffer_uptodate(bh))) 60 if (unlikely(!buffer_uptodate(bh)))
55 return 1; 61 return 1;
56 62
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2d51cd9af225..d76ec8277d3f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -331,33 +331,17 @@ static void ext4_put_nojournal(handle_t *handle)
331 * journal_end calls result in the superblock being marked dirty, so 331 * journal_end calls result in the superblock being marked dirty, so
332 * that sync() will call the filesystem's write_super callback if 332 * that sync() will call the filesystem's write_super callback if
333 * appropriate. 333 * appropriate.
334 *
335 * To avoid j_barrier hold in userspace when a user calls freeze(),
336 * ext4 prevents a new handle from being started by s_frozen, which
337 * is in an upper layer.
338 */ 334 */
339handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) 335handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
340{ 336{
341 journal_t *journal; 337 journal_t *journal;
342 handle_t *handle;
343 338
344 trace_ext4_journal_start(sb, nblocks, _RET_IP_); 339 trace_ext4_journal_start(sb, nblocks, _RET_IP_);
345 if (sb->s_flags & MS_RDONLY) 340 if (sb->s_flags & MS_RDONLY)
346 return ERR_PTR(-EROFS); 341 return ERR_PTR(-EROFS);
347 342
343 WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
348 journal = EXT4_SB(sb)->s_journal; 344 journal = EXT4_SB(sb)->s_journal;
349 handle = ext4_journal_current_handle();
350
351 /*
352 * If a handle has been started, it should be allowed to
353 * finish, otherwise deadlock could happen between freeze
354 * and others(e.g. truncate) due to the restart of the
355 * journal handle if the filesystem is forzen and active
356 * handles are not stopped.
357 */
358 if (!handle)
359 vfs_check_frozen(sb, SB_FREEZE_TRANS);
360
361 if (!journal) 345 if (!journal)
362 return ext4_get_nojournal(); 346 return ext4_get_nojournal();
363 /* 347 /*
@@ -2747,6 +2731,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
2747 sb = elr->lr_super; 2731 sb = elr->lr_super;
2748 ngroups = EXT4_SB(sb)->s_groups_count; 2732 ngroups = EXT4_SB(sb)->s_groups_count;
2749 2733
2734 sb_start_write(sb);
2750 for (group = elr->lr_next_group; group < ngroups; group++) { 2735 for (group = elr->lr_next_group; group < ngroups; group++) {
2751 gdp = ext4_get_group_desc(sb, group, NULL); 2736 gdp = ext4_get_group_desc(sb, group, NULL);
2752 if (!gdp) { 2737 if (!gdp) {
@@ -2773,6 +2758,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
2773 elr->lr_next_sched = jiffies + elr->lr_timeout; 2758 elr->lr_next_sched = jiffies + elr->lr_timeout;
2774 elr->lr_next_group = group + 1; 2759 elr->lr_next_group = group + 1;
2775 } 2760 }
2761 sb_end_write(sb);
2776 2762
2777 return ret; 2763 return ret;
2778} 2764}
@@ -4460,10 +4446,8 @@ int ext4_force_commit(struct super_block *sb)
4460 return 0; 4446 return 0;
4461 4447
4462 journal = EXT4_SB(sb)->s_journal; 4448 journal = EXT4_SB(sb)->s_journal;
4463 if (journal) { 4449 if (journal)
4464 vfs_check_frozen(sb, SB_FREEZE_TRANS);
4465 ret = ext4_journal_force_commit(journal); 4450 ret = ext4_journal_force_commit(journal);
4466 }
4467 4451
4468 return ret; 4452 return ret;
4469} 4453}
@@ -4493,9 +4477,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
4493 * gives us a chance to flush the journal completely and mark the fs clean. 4477 * gives us a chance to flush the journal completely and mark the fs clean.
4494 * 4478 *
4495 * Note that only this function cannot bring a filesystem to be in a clean 4479 * Note that only this function cannot bring a filesystem to be in a clean
4496 * state independently, because ext4 prevents a new handle from being started 4480 * state independently. It relies on upper layer to stop all data & metadata
4497 * by @sb->s_frozen, which stays in an upper layer. It thus needs help from 4481 * modifications.
4498 * the upper layer.
4499 */ 4482 */
4500static int ext4_freeze(struct super_block *sb) 4483static int ext4_freeze(struct super_block *sb)
4501{ 4484{
@@ -4522,7 +4505,7 @@ static int ext4_freeze(struct super_block *sb)
4522 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4505 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4523 error = ext4_commit_super(sb, 1); 4506 error = ext4_commit_super(sb, 1);
4524out: 4507out:
4525 /* we rely on s_frozen to stop further updates */ 4508 /* we rely on upper layer to stop further updates */
4526 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 4509 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
4527 return error; 4510 return error;
4528} 4511}
diff --git a/fs/fat/file.c b/fs/fat/file.c
index a71fe3715ee8..e007b8bd8e5e 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -43,10 +43,10 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
43 if (err) 43 if (err)
44 goto out; 44 goto out;
45 45
46 mutex_lock(&inode->i_mutex);
47 err = mnt_want_write_file(file); 46 err = mnt_want_write_file(file);
48 if (err) 47 if (err)
49 goto out_unlock_inode; 48 goto out;
49 mutex_lock(&inode->i_mutex);
50 50
51 /* 51 /*
52 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also 52 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
@@ -73,14 +73,14 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
73 /* The root directory has no attributes */ 73 /* The root directory has no attributes */
74 if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) { 74 if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) {
75 err = -EINVAL; 75 err = -EINVAL;
76 goto out_drop_write; 76 goto out_unlock_inode;
77 } 77 }
78 78
79 if (sbi->options.sys_immutable && 79 if (sbi->options.sys_immutable &&
80 ((attr | oldattr) & ATTR_SYS) && 80 ((attr | oldattr) & ATTR_SYS) &&
81 !capable(CAP_LINUX_IMMUTABLE)) { 81 !capable(CAP_LINUX_IMMUTABLE)) {
82 err = -EPERM; 82 err = -EPERM;
83 goto out_drop_write; 83 goto out_unlock_inode;
84 } 84 }
85 85
86 /* 86 /*
@@ -90,12 +90,12 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
90 */ 90 */
91 err = security_inode_setattr(file->f_path.dentry, &ia); 91 err = security_inode_setattr(file->f_path.dentry, &ia);
92 if (err) 92 if (err)
93 goto out_drop_write; 93 goto out_unlock_inode;
94 94
95 /* This MUST be done before doing anything irreversible... */ 95 /* This MUST be done before doing anything irreversible... */
96 err = fat_setattr(file->f_path.dentry, &ia); 96 err = fat_setattr(file->f_path.dentry, &ia);
97 if (err) 97 if (err)
98 goto out_drop_write; 98 goto out_unlock_inode;
99 99
100 fsnotify_change(file->f_path.dentry, ia.ia_valid); 100 fsnotify_change(file->f_path.dentry, ia.ia_valid);
101 if (sbi->options.sys_immutable) { 101 if (sbi->options.sys_immutable) {
@@ -107,10 +107,9 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
107 107
108 fat_save_attrs(inode, attr); 108 fat_save_attrs(inode, attr);
109 mark_inode_dirty(inode); 109 mark_inode_dirty(inode);
110out_drop_write:
111 mnt_drop_write_file(file);
112out_unlock_inode: 110out_unlock_inode:
113 mutex_unlock(&inode->i_mutex); 111 mutex_unlock(&inode->i_mutex);
112 mnt_drop_write_file(file);
114out: 113out:
115 return err; 114 return err;
116} 115}
diff --git a/fs/file_table.c b/fs/file_table.c
index b3fc4d67a26b..701985e4ccda 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -43,7 +43,7 @@ static struct kmem_cache *filp_cachep __read_mostly;
43 43
44static struct percpu_counter nr_files __cacheline_aligned_in_smp; 44static struct percpu_counter nr_files __cacheline_aligned_in_smp;
45 45
46static inline void file_free_rcu(struct rcu_head *head) 46static void file_free_rcu(struct rcu_head *head)
47{ 47{
48 struct file *f = container_of(head, struct file, f_u.fu_rcuhead); 48 struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
49 49
@@ -217,7 +217,7 @@ static void drop_file_write_access(struct file *file)
217 return; 217 return;
218 if (file_check_writeable(file) != 0) 218 if (file_check_writeable(file) != 0)
219 return; 219 return;
220 mnt_drop_write(mnt); 220 __mnt_drop_write(mnt);
221 file_release_write(file); 221 file_release_write(file);
222} 222}
223 223
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b321a688cde7..93d8d6c9494d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -944,9 +944,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
944 return err; 944 return err;
945 945
946 count = ocount; 946 count = ocount;
947 947 sb_start_write(inode->i_sb);
948 mutex_lock(&inode->i_mutex); 948 mutex_lock(&inode->i_mutex);
949 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
950 949
951 /* We can write back this queue in page reclaim */ 950 /* We can write back this queue in page reclaim */
952 current->backing_dev_info = mapping->backing_dev_info; 951 current->backing_dev_info = mapping->backing_dev_info;
@@ -1004,6 +1003,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1004out: 1003out:
1005 current->backing_dev_info = NULL; 1004 current->backing_dev_info = NULL;
1006 mutex_unlock(&inode->i_mutex); 1005 mutex_unlock(&inode->i_mutex);
1006 sb_end_write(inode->i_sb);
1007 1007
1008 return written ? written : err; 1008 return written ? written : err;
1009} 1009}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 9aa6af13823c..d1d791ef38de 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -373,11 +373,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
373 loff_t size; 373 loff_t size;
374 int ret; 374 int ret;
375 375
376 /* Wait if fs is frozen. This is racy so we check again later on 376 sb_start_pagefault(inode->i_sb);
377 * and retry if the fs has been frozen after the page lock has 377
378 * been acquired 378 /* Update file times before taking page lock */
379 */ 379 file_update_time(vma->vm_file);
380 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
381 380
382 ret = gfs2_rs_alloc(ip); 381 ret = gfs2_rs_alloc(ip);
383 if (ret) 382 if (ret)
@@ -462,14 +461,9 @@ out:
462 gfs2_holder_uninit(&gh); 461 gfs2_holder_uninit(&gh);
463 if (ret == 0) { 462 if (ret == 0) {
464 set_page_dirty(page); 463 set_page_dirty(page);
465 /* This check must be post dropping of transaction lock */ 464 wait_on_page_writeback(page);
466 if (inode->i_sb->s_frozen == SB_UNFROZEN) {
467 wait_on_page_writeback(page);
468 } else {
469 ret = -EAGAIN;
470 unlock_page(page);
471 }
472 } 465 }
466 sb_end_pagefault(inode->i_sb);
473 return block_page_mkwrite_return(ret); 467 return block_page_mkwrite_return(ret);
474} 468}
475 469
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index ad3e2fb763d7..adbd27875ef9 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -50,6 +50,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
50 if (revokes) 50 if (revokes)
51 tr->tr_reserved += gfs2_struct2blk(sdp, revokes, 51 tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
52 sizeof(u64)); 52 sizeof(u64));
53 sb_start_intwrite(sdp->sd_vfs);
53 gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); 54 gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh);
54 55
55 error = gfs2_glock_nq(&tr->tr_t_gh); 56 error = gfs2_glock_nq(&tr->tr_t_gh);
@@ -68,6 +69,7 @@ fail_gunlock:
68 gfs2_glock_dq(&tr->tr_t_gh); 69 gfs2_glock_dq(&tr->tr_t_gh);
69 70
70fail_holder_uninit: 71fail_holder_uninit:
72 sb_end_intwrite(sdp->sd_vfs);
71 gfs2_holder_uninit(&tr->tr_t_gh); 73 gfs2_holder_uninit(&tr->tr_t_gh);
72 kfree(tr); 74 kfree(tr);
73 75
@@ -116,6 +118,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
116 gfs2_holder_uninit(&tr->tr_t_gh); 118 gfs2_holder_uninit(&tr->tr_t_gh);
117 kfree(tr); 119 kfree(tr);
118 } 120 }
121 sb_end_intwrite(sdp->sd_vfs);
119 return; 122 return;
120 } 123 }
121 124
@@ -136,6 +139,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
136 139
137 if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) 140 if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
138 gfs2_log_flush(sdp, NULL); 141 gfs2_log_flush(sdp, NULL);
142 sb_end_intwrite(sdp->sd_vfs);
139} 143}
140 144
141/** 145/**
diff --git a/fs/inode.c b/fs/inode.c
index 3cc504320467..ac8d904b3f16 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1542,9 +1542,11 @@ void touch_atime(struct path *path)
1542 if (timespec_equal(&inode->i_atime, &now)) 1542 if (timespec_equal(&inode->i_atime, &now))
1543 return; 1543 return;
1544 1544
1545 if (mnt_want_write(mnt)) 1545 if (!sb_start_write_trylock(inode->i_sb))
1546 return; 1546 return;
1547 1547
1548 if (__mnt_want_write(mnt))
1549 goto skip_update;
1548 /* 1550 /*
1549 * File systems can error out when updating inodes if they need to 1551 * File systems can error out when updating inodes if they need to
1550 * allocate new space to modify an inode (such is the case for 1552 * allocate new space to modify an inode (such is the case for
@@ -1555,7 +1557,9 @@ void touch_atime(struct path *path)
1555 * of the fs read only, e.g. subvolumes in Btrfs. 1557 * of the fs read only, e.g. subvolumes in Btrfs.
1556 */ 1558 */
1557 update_time(inode, &now, S_ATIME); 1559 update_time(inode, &now, S_ATIME);
1558 mnt_drop_write(mnt); 1560 __mnt_drop_write(mnt);
1561skip_update:
1562 sb_end_write(inode->i_sb);
1559} 1563}
1560EXPORT_SYMBOL(touch_atime); 1564EXPORT_SYMBOL(touch_atime);
1561 1565
@@ -1662,11 +1666,11 @@ int file_update_time(struct file *file)
1662 return 0; 1666 return 0;
1663 1667
1664 /* Finally allowed to write? Takes lock. */ 1668 /* Finally allowed to write? Takes lock. */
1665 if (mnt_want_write_file(file)) 1669 if (__mnt_want_write_file(file))
1666 return 0; 1670 return 0;
1667 1671
1668 ret = update_time(inode, &now, sync_it); 1672 ret = update_time(inode, &now, sync_it);
1669 mnt_drop_write_file(file); 1673 __mnt_drop_write_file(file);
1670 1674
1671 return ret; 1675 return ret;
1672} 1676}
diff --git a/fs/internal.h b/fs/internal.h
index a6fd56c68b11..371bcc4b1697 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -61,6 +61,10 @@ extern void __init mnt_init(void);
61 61
62extern struct lglock vfsmount_lock; 62extern struct lglock vfsmount_lock;
63 63
64extern int __mnt_want_write(struct vfsmount *);
65extern int __mnt_want_write_file(struct file *);
66extern void __mnt_drop_write(struct vfsmount *);
67extern void __mnt_drop_write_file(struct file *);
64 68
65/* 69/*
66 * fs_struct.c 70 * fs_struct.c
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 8392cb85bd54..05d29124c6ab 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -156,12 +156,16 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
156 struct nlm_rqst *call; 156 struct nlm_rqst *call;
157 int status; 157 int status;
158 158
159 nlm_get_host(host);
160 call = nlm_alloc_call(host); 159 call = nlm_alloc_call(host);
161 if (call == NULL) 160 if (call == NULL)
162 return -ENOMEM; 161 return -ENOMEM;
163 162
164 nlmclnt_locks_init_private(fl, host); 163 nlmclnt_locks_init_private(fl, host);
164 if (!fl->fl_u.nfs_fl.owner) {
165 /* lockowner allocation has failed */
166 nlmclnt_release_call(call);
167 return -ENOMEM;
168 }
165 /* Set up the argument struct */ 169 /* Set up the argument struct */
166 nlmclnt_setlockargs(call, fl); 170 nlmclnt_setlockargs(call, fl);
167 171
@@ -185,9 +189,6 @@ EXPORT_SYMBOL_GPL(nlmclnt_proc);
185 189
186/* 190/*
187 * Allocate an NLM RPC call struct 191 * Allocate an NLM RPC call struct
188 *
189 * Note: the caller must hold a reference to host. In case of failure,
190 * this reference will be released.
191 */ 192 */
192struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) 193struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
193{ 194{
@@ -199,7 +200,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
199 atomic_set(&call->a_count, 1); 200 atomic_set(&call->a_count, 1);
200 locks_init_lock(&call->a_args.lock.fl); 201 locks_init_lock(&call->a_args.lock.fl);
201 locks_init_lock(&call->a_res.lock.fl); 202 locks_init_lock(&call->a_res.lock.fl);
202 call->a_host = host; 203 call->a_host = nlm_get_host(host);
203 return call; 204 return call;
204 } 205 }
205 if (signalled()) 206 if (signalled())
@@ -207,7 +208,6 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
207 printk("nlm_alloc_call: failed, waiting for memory\n"); 208 printk("nlm_alloc_call: failed, waiting for memory\n");
208 schedule_timeout_interruptible(5*HZ); 209 schedule_timeout_interruptible(5*HZ);
209 } 210 }
210 nlmclnt_release_host(host);
211 return NULL; 211 return NULL;
212} 212}
213 213
@@ -750,7 +750,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
750 dprintk("lockd: blocking lock attempt was interrupted by a signal.\n" 750 dprintk("lockd: blocking lock attempt was interrupted by a signal.\n"
751 " Attempting to cancel lock.\n"); 751 " Attempting to cancel lock.\n");
752 752
753 req = nlm_alloc_call(nlm_get_host(host)); 753 req = nlm_alloc_call(host);
754 if (!req) 754 if (!req)
755 return -ENOMEM; 755 return -ENOMEM;
756 req->a_flags = RPC_TASK_ASYNC; 756 req->a_flags = RPC_TASK_ASYNC;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4a43d253c045..b147d1ae71fd 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -257,6 +257,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
257 return rpc_system_err; 257 return rpc_system_err;
258 258
259 call = nlm_alloc_call(host); 259 call = nlm_alloc_call(host);
260 nlmsvc_release_host(host);
260 if (call == NULL) 261 if (call == NULL)
261 return rpc_system_err; 262 return rpc_system_err;
262 263
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index afe4488c33d8..fb1a2bedbe97 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -219,7 +219,6 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
219 struct nlm_block *block; 219 struct nlm_block *block;
220 struct nlm_rqst *call = NULL; 220 struct nlm_rqst *call = NULL;
221 221
222 nlm_get_host(host);
223 call = nlm_alloc_call(host); 222 call = nlm_alloc_call(host);
224 if (call == NULL) 223 if (call == NULL)
225 return NULL; 224 return NULL;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index de8f2caa2235..3009a365e082 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -297,6 +297,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
297 return rpc_system_err; 297 return rpc_system_err;
298 298
299 call = nlm_alloc_call(host); 299 call = nlm_alloc_call(host);
300 nlmsvc_release_host(host);
300 if (call == NULL) 301 if (call == NULL)
301 return rpc_system_err; 302 return rpc_system_err;
302 303
diff --git a/fs/namei.c b/fs/namei.c
index 2ccc35c4dc24..1b464390dde8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -650,6 +650,121 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki
650 path_put(link); 650 path_put(link);
651} 651}
652 652
653int sysctl_protected_symlinks __read_mostly = 1;
654int sysctl_protected_hardlinks __read_mostly = 1;
655
656/**
657 * may_follow_link - Check symlink following for unsafe situations
658 * @link: The path of the symlink
659 *
660 * In the case of the sysctl_protected_symlinks sysctl being enabled,
661 * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
662 * in a sticky world-writable directory. This is to protect privileged
663 * processes from failing races against path names that may change out
664 * from under them by way of other users creating malicious symlinks.
665 * It will permit symlinks to be followed only when outside a sticky
666 * world-writable directory, or when the uid of the symlink and follower
667 * match, or when the directory owner matches the symlink's owner.
668 *
669 * Returns 0 if following the symlink is allowed, -ve on error.
670 */
671static inline int may_follow_link(struct path *link, struct nameidata *nd)
672{
673 const struct inode *inode;
674 const struct inode *parent;
675
676 if (!sysctl_protected_symlinks)
677 return 0;
678
679 /* Allowed if owner and follower match. */
680 inode = link->dentry->d_inode;
681 if (current_cred()->fsuid == inode->i_uid)
682 return 0;
683
684 /* Allowed if parent directory not sticky and world-writable. */
685 parent = nd->path.dentry->d_inode;
686 if ((parent->i_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
687 return 0;
688
689 /* Allowed if parent directory and link owner match. */
690 if (parent->i_uid == inode->i_uid)
691 return 0;
692
693 path_put_conditional(link, nd);
694 path_put(&nd->path);
695 audit_log_link_denied("follow_link", link);
696 return -EACCES;
697}
698
699/**
700 * safe_hardlink_source - Check for safe hardlink conditions
701 * @inode: the source inode to hardlink from
702 *
703 * Return false if at least one of the following conditions:
704 * - inode is not a regular file
705 * - inode is setuid
706 * - inode is setgid and group-exec
707 * - access failure for read and write
708 *
709 * Otherwise returns true.
710 */
711static bool safe_hardlink_source(struct inode *inode)
712{
713 umode_t mode = inode->i_mode;
714
715 /* Special files should not get pinned to the filesystem. */
716 if (!S_ISREG(mode))
717 return false;
718
719 /* Setuid files should not get pinned to the filesystem. */
720 if (mode & S_ISUID)
721 return false;
722
723 /* Executable setgid files should not get pinned to the filesystem. */
724 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
725 return false;
726
727 /* Hardlinking to unreadable or unwritable sources is dangerous. */
728 if (inode_permission(inode, MAY_READ | MAY_WRITE))
729 return false;
730
731 return true;
732}
733
734/**
735 * may_linkat - Check permissions for creating a hardlink
736 * @link: the source to hardlink from
737 *
738 * Block hardlink when all of:
739 * - sysctl_protected_hardlinks enabled
740 * - fsuid does not match inode
741 * - hardlink source is unsafe (see safe_hardlink_source() above)
742 * - not CAP_FOWNER
743 *
744 * Returns 0 if successful, -ve on error.
745 */
746static int may_linkat(struct path *link)
747{
748 const struct cred *cred;
749 struct inode *inode;
750
751 if (!sysctl_protected_hardlinks)
752 return 0;
753
754 cred = current_cred();
755 inode = link->dentry->d_inode;
756
757 /* Source inode owner (or CAP_FOWNER) can hardlink all they like,
758 * otherwise, it must be a safe source.
759 */
760 if (cred->fsuid == inode->i_uid || safe_hardlink_source(inode) ||
761 capable(CAP_FOWNER))
762 return 0;
763
764 audit_log_link_denied("linkat", link);
765 return -EPERM;
766}
767
653static __always_inline int 768static __always_inline int
654follow_link(struct path *link, struct nameidata *nd, void **p) 769follow_link(struct path *link, struct nameidata *nd, void **p)
655{ 770{
@@ -1818,6 +1933,9 @@ static int path_lookupat(int dfd, const char *name,
1818 while (err > 0) { 1933 while (err > 0) {
1819 void *cookie; 1934 void *cookie;
1820 struct path link = path; 1935 struct path link = path;
1936 err = may_follow_link(&link, nd);
1937 if (unlikely(err))
1938 break;
1821 nd->flags |= LOOKUP_PARENT; 1939 nd->flags |= LOOKUP_PARENT;
1822 err = follow_link(&link, nd, &cookie); 1940 err = follow_link(&link, nd, &cookie);
1823 if (err) 1941 if (err)
@@ -2277,7 +2395,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
2277static int atomic_open(struct nameidata *nd, struct dentry *dentry, 2395static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2278 struct path *path, struct file *file, 2396 struct path *path, struct file *file,
2279 const struct open_flags *op, 2397 const struct open_flags *op,
2280 bool *want_write, bool need_lookup, 2398 bool got_write, bool need_lookup,
2281 int *opened) 2399 int *opened)
2282{ 2400{
2283 struct inode *dir = nd->path.dentry->d_inode; 2401 struct inode *dir = nd->path.dentry->d_inode;
@@ -2300,7 +2418,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2300 if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) 2418 if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
2301 mode &= ~current_umask(); 2419 mode &= ~current_umask();
2302 2420
2303 if (open_flag & O_EXCL) { 2421 if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) {
2304 open_flag &= ~O_TRUNC; 2422 open_flag &= ~O_TRUNC;
2305 *opened |= FILE_CREATED; 2423 *opened |= FILE_CREATED;
2306 } 2424 }
@@ -2314,12 +2432,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2314 * Another problem is returing the "right" error value (e.g. for an 2432 * Another problem is returing the "right" error value (e.g. for an
2315 * O_EXCL open we want to return EEXIST not EROFS). 2433 * O_EXCL open we want to return EEXIST not EROFS).
2316 */ 2434 */
2317 if ((open_flag & (O_CREAT | O_TRUNC)) || 2435 if (((open_flag & (O_CREAT | O_TRUNC)) ||
2318 (open_flag & O_ACCMODE) != O_RDONLY) { 2436 (open_flag & O_ACCMODE) != O_RDONLY) && unlikely(!got_write)) {
2319 error = mnt_want_write(nd->path.mnt); 2437 if (!(open_flag & O_CREAT)) {
2320 if (!error) {
2321 *want_write = true;
2322 } else if (!(open_flag & O_CREAT)) {
2323 /* 2438 /*
2324 * No O_CREATE -> atomicity not a requirement -> fall 2439 * No O_CREATE -> atomicity not a requirement -> fall
2325 * back to lookup + open 2440 * back to lookup + open
@@ -2327,11 +2442,11 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2327 goto no_open; 2442 goto no_open;
2328 } else if (open_flag & (O_EXCL | O_TRUNC)) { 2443 } else if (open_flag & (O_EXCL | O_TRUNC)) {
2329 /* Fall back and fail with the right error */ 2444 /* Fall back and fail with the right error */
2330 create_error = error; 2445 create_error = -EROFS;
2331 goto no_open; 2446 goto no_open;
2332 } else { 2447 } else {
2333 /* No side effects, safe to clear O_CREAT */ 2448 /* No side effects, safe to clear O_CREAT */
2334 create_error = error; 2449 create_error = -EROFS;
2335 open_flag &= ~O_CREAT; 2450 open_flag &= ~O_CREAT;
2336 } 2451 }
2337 } 2452 }
@@ -2438,7 +2553,7 @@ looked_up:
2438static int lookup_open(struct nameidata *nd, struct path *path, 2553static int lookup_open(struct nameidata *nd, struct path *path,
2439 struct file *file, 2554 struct file *file,
2440 const struct open_flags *op, 2555 const struct open_flags *op,
2441 bool *want_write, int *opened) 2556 bool got_write, int *opened)
2442{ 2557{
2443 struct dentry *dir = nd->path.dentry; 2558 struct dentry *dir = nd->path.dentry;
2444 struct inode *dir_inode = dir->d_inode; 2559 struct inode *dir_inode = dir->d_inode;
@@ -2456,7 +2571,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
2456 goto out_no_open; 2571 goto out_no_open;
2457 2572
2458 if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { 2573 if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
2459 return atomic_open(nd, dentry, path, file, op, want_write, 2574 return atomic_open(nd, dentry, path, file, op, got_write,
2460 need_lookup, opened); 2575 need_lookup, opened);
2461 } 2576 }
2462 2577
@@ -2480,10 +2595,10 @@ static int lookup_open(struct nameidata *nd, struct path *path,
2480 * a permanent write count is taken through 2595 * a permanent write count is taken through
2481 * the 'struct file' in finish_open(). 2596 * the 'struct file' in finish_open().
2482 */ 2597 */
2483 error = mnt_want_write(nd->path.mnt); 2598 if (!got_write) {
2484 if (error) 2599 error = -EROFS;
2485 goto out_dput; 2600 goto out_dput;
2486 *want_write = true; 2601 }
2487 *opened |= FILE_CREATED; 2602 *opened |= FILE_CREATED;
2488 error = security_path_mknod(&nd->path, dentry, mode, 0); 2603 error = security_path_mknod(&nd->path, dentry, mode, 0);
2489 if (error) 2604 if (error)
@@ -2513,7 +2628,7 @@ static int do_last(struct nameidata *nd, struct path *path,
2513 struct dentry *dir = nd->path.dentry; 2628 struct dentry *dir = nd->path.dentry;
2514 int open_flag = op->open_flag; 2629 int open_flag = op->open_flag;
2515 bool will_truncate = (open_flag & O_TRUNC) != 0; 2630 bool will_truncate = (open_flag & O_TRUNC) != 0;
2516 bool want_write = false; 2631 bool got_write = false;
2517 int acc_mode = op->acc_mode; 2632 int acc_mode = op->acc_mode;
2518 struct inode *inode; 2633 struct inode *inode;
2519 bool symlink_ok = false; 2634 bool symlink_ok = false;
@@ -2582,8 +2697,18 @@ static int do_last(struct nameidata *nd, struct path *path,
2582 } 2697 }
2583 2698
2584retry_lookup: 2699retry_lookup:
2700 if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
2701 error = mnt_want_write(nd->path.mnt);
2702 if (!error)
2703 got_write = true;
2704 /*
2705 * do _not_ fail yet - we might not need that or fail with
2706 * a different error; let lookup_open() decide; we'll be
2707 * dropping this one anyway.
2708 */
2709 }
2585 mutex_lock(&dir->d_inode->i_mutex); 2710 mutex_lock(&dir->d_inode->i_mutex);
2586 error = lookup_open(nd, path, file, op, &want_write, opened); 2711 error = lookup_open(nd, path, file, op, got_write, opened);
2587 mutex_unlock(&dir->d_inode->i_mutex); 2712 mutex_unlock(&dir->d_inode->i_mutex);
2588 2713
2589 if (error <= 0) { 2714 if (error <= 0) {
@@ -2608,22 +2733,23 @@ retry_lookup:
2608 } 2733 }
2609 2734
2610 /* 2735 /*
2611 * It already exists. 2736 * create/update audit record if it already exists.
2612 */ 2737 */
2613 audit_inode(pathname, path->dentry); 2738 if (path->dentry->d_inode)
2739 audit_inode(pathname, path->dentry);
2614 2740
2615 /* 2741 /*
2616 * If atomic_open() acquired write access it is dropped now due to 2742 * If atomic_open() acquired write access it is dropped now due to
2617 * possible mount and symlink following (this might be optimized away if 2743 * possible mount and symlink following (this might be optimized away if
2618 * necessary...) 2744 * necessary...)
2619 */ 2745 */
2620 if (want_write) { 2746 if (got_write) {
2621 mnt_drop_write(nd->path.mnt); 2747 mnt_drop_write(nd->path.mnt);
2622 want_write = false; 2748 got_write = false;
2623 } 2749 }
2624 2750
2625 error = -EEXIST; 2751 error = -EEXIST;
2626 if (open_flag & O_EXCL) 2752 if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))
2627 goto exit_dput; 2753 goto exit_dput;
2628 2754
2629 error = follow_managed(path, nd->flags); 2755 error = follow_managed(path, nd->flags);
@@ -2684,7 +2810,7 @@ finish_open:
2684 error = mnt_want_write(nd->path.mnt); 2810 error = mnt_want_write(nd->path.mnt);
2685 if (error) 2811 if (error)
2686 goto out; 2812 goto out;
2687 want_write = true; 2813 got_write = true;
2688 } 2814 }
2689finish_open_created: 2815finish_open_created:
2690 error = may_open(&nd->path, acc_mode, open_flag); 2816 error = may_open(&nd->path, acc_mode, open_flag);
@@ -2711,7 +2837,7 @@ opened:
2711 goto exit_fput; 2837 goto exit_fput;
2712 } 2838 }
2713out: 2839out:
2714 if (want_write) 2840 if (got_write)
2715 mnt_drop_write(nd->path.mnt); 2841 mnt_drop_write(nd->path.mnt);
2716 path_put(&save_parent); 2842 path_put(&save_parent);
2717 terminate_walk(nd); 2843 terminate_walk(nd);
@@ -2735,9 +2861,9 @@ stale_open:
2735 nd->inode = dir->d_inode; 2861 nd->inode = dir->d_inode;
2736 save_parent.mnt = NULL; 2862 save_parent.mnt = NULL;
2737 save_parent.dentry = NULL; 2863 save_parent.dentry = NULL;
2738 if (want_write) { 2864 if (got_write) {
2739 mnt_drop_write(nd->path.mnt); 2865 mnt_drop_write(nd->path.mnt);
2740 want_write = false; 2866 got_write = false;
2741 } 2867 }
2742 retried = true; 2868 retried = true;
2743 goto retry_lookup; 2869 goto retry_lookup;
@@ -2777,6 +2903,9 @@ static struct file *path_openat(int dfd, const char *pathname,
2777 error = -ELOOP; 2903 error = -ELOOP;
2778 break; 2904 break;
2779 } 2905 }
2906 error = may_follow_link(&link, nd);
2907 if (unlikely(error))
2908 break;
2780 nd->flags |= LOOKUP_PARENT; 2909 nd->flags |= LOOKUP_PARENT;
2781 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); 2910 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
2782 error = follow_link(&link, nd, &cookie); 2911 error = follow_link(&link, nd, &cookie);
@@ -2846,6 +2975,7 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
2846{ 2975{
2847 struct dentry *dentry = ERR_PTR(-EEXIST); 2976 struct dentry *dentry = ERR_PTR(-EEXIST);
2848 struct nameidata nd; 2977 struct nameidata nd;
2978 int err2;
2849 int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); 2979 int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
2850 if (error) 2980 if (error)
2851 return ERR_PTR(error); 2981 return ERR_PTR(error);
@@ -2859,16 +2989,19 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
2859 nd.flags &= ~LOOKUP_PARENT; 2989 nd.flags &= ~LOOKUP_PARENT;
2860 nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL; 2990 nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
2861 2991
2992 /* don't fail immediately if it's r/o, at least try to report other errors */
2993 err2 = mnt_want_write(nd.path.mnt);
2862 /* 2994 /*
2863 * Do the final lookup. 2995 * Do the final lookup.
2864 */ 2996 */
2865 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2997 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2866 dentry = lookup_hash(&nd); 2998 dentry = lookup_hash(&nd);
2867 if (IS_ERR(dentry)) 2999 if (IS_ERR(dentry))
2868 goto fail; 3000 goto unlock;
2869 3001
3002 error = -EEXIST;
2870 if (dentry->d_inode) 3003 if (dentry->d_inode)
2871 goto eexist; 3004 goto fail;
2872 /* 3005 /*
2873 * Special case - lookup gave negative, but... we had foo/bar/ 3006 * Special case - lookup gave negative, but... we had foo/bar/
2874 * From the vfs_mknod() POV we just have a negative dentry - 3007 * From the vfs_mknod() POV we just have a negative dentry -
@@ -2876,23 +3009,37 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
2876 * been asking for (non-existent) directory. -ENOENT for you. 3009 * been asking for (non-existent) directory. -ENOENT for you.
2877 */ 3010 */
2878 if (unlikely(!is_dir && nd.last.name[nd.last.len])) { 3011 if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
2879 dput(dentry); 3012 error = -ENOENT;
2880 dentry = ERR_PTR(-ENOENT); 3013 goto fail;
3014 }
3015 if (unlikely(err2)) {
3016 error = err2;
2881 goto fail; 3017 goto fail;
2882 } 3018 }
2883 *path = nd.path; 3019 *path = nd.path;
2884 return dentry; 3020 return dentry;
2885eexist:
2886 dput(dentry);
2887 dentry = ERR_PTR(-EEXIST);
2888fail: 3021fail:
3022 dput(dentry);
3023 dentry = ERR_PTR(error);
3024unlock:
2889 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 3025 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
3026 if (!err2)
3027 mnt_drop_write(nd.path.mnt);
2890out: 3028out:
2891 path_put(&nd.path); 3029 path_put(&nd.path);
2892 return dentry; 3030 return dentry;
2893} 3031}
2894EXPORT_SYMBOL(kern_path_create); 3032EXPORT_SYMBOL(kern_path_create);
2895 3033
3034void done_path_create(struct path *path, struct dentry *dentry)
3035{
3036 dput(dentry);
3037 mutex_unlock(&path->dentry->d_inode->i_mutex);
3038 mnt_drop_write(path->mnt);
3039 path_put(path);
3040}
3041EXPORT_SYMBOL(done_path_create);
3042
2896struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir) 3043struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
2897{ 3044{
2898 char *tmp = getname(pathname); 3045 char *tmp = getname(pathname);
@@ -2956,8 +3103,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
2956 struct path path; 3103 struct path path;
2957 int error; 3104 int error;
2958 3105
2959 if (S_ISDIR(mode)) 3106 error = may_mknod(mode);
2960 return -EPERM; 3107 if (error)
3108 return error;
2961 3109
2962 dentry = user_path_create(dfd, filename, &path, 0); 3110 dentry = user_path_create(dfd, filename, &path, 0);
2963 if (IS_ERR(dentry)) 3111 if (IS_ERR(dentry))
@@ -2965,15 +3113,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
2965 3113
2966 if (!IS_POSIXACL(path.dentry->d_inode)) 3114 if (!IS_POSIXACL(path.dentry->d_inode))
2967 mode &= ~current_umask(); 3115 mode &= ~current_umask();
2968 error = may_mknod(mode);
2969 if (error)
2970 goto out_dput;
2971 error = mnt_want_write(path.mnt);
2972 if (error)
2973 goto out_dput;
2974 error = security_path_mknod(&path, dentry, mode, dev); 3116 error = security_path_mknod(&path, dentry, mode, dev);
2975 if (error) 3117 if (error)
2976 goto out_drop_write; 3118 goto out;
2977 switch (mode & S_IFMT) { 3119 switch (mode & S_IFMT) {
2978 case 0: case S_IFREG: 3120 case 0: case S_IFREG:
2979 error = vfs_create(path.dentry->d_inode,dentry,mode,true); 3121 error = vfs_create(path.dentry->d_inode,dentry,mode,true);
@@ -2986,13 +3128,8 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
2986 error = vfs_mknod(path.dentry->d_inode,dentry,mode,0); 3128 error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
2987 break; 3129 break;
2988 } 3130 }
2989out_drop_write: 3131out:
2990 mnt_drop_write(path.mnt); 3132 done_path_create(&path, dentry);
2991out_dput:
2992 dput(dentry);
2993 mutex_unlock(&path.dentry->d_inode->i_mutex);
2994 path_put(&path);
2995
2996 return error; 3133 return error;
2997} 3134}
2998 3135
@@ -3038,19 +3175,10 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
3038 3175
3039 if (!IS_POSIXACL(path.dentry->d_inode)) 3176 if (!IS_POSIXACL(path.dentry->d_inode))
3040 mode &= ~current_umask(); 3177 mode &= ~current_umask();
3041 error = mnt_want_write(path.mnt);
3042 if (error)
3043 goto out_dput;
3044 error = security_path_mkdir(&path, dentry, mode); 3178 error = security_path_mkdir(&path, dentry, mode);
3045 if (error) 3179 if (!error)
3046 goto out_drop_write; 3180 error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
3047 error = vfs_mkdir(path.dentry->d_inode, dentry, mode); 3181 done_path_create(&path, dentry);
3048out_drop_write:
3049 mnt_drop_write(path.mnt);
3050out_dput:
3051 dput(dentry);
3052 mutex_unlock(&path.dentry->d_inode->i_mutex);
3053 path_put(&path);
3054 return error; 3182 return error;
3055} 3183}
3056 3184
@@ -3144,6 +3272,9 @@ static long do_rmdir(int dfd, const char __user *pathname)
3144 } 3272 }
3145 3273
3146 nd.flags &= ~LOOKUP_PARENT; 3274 nd.flags &= ~LOOKUP_PARENT;
3275 error = mnt_want_write(nd.path.mnt);
3276 if (error)
3277 goto exit1;
3147 3278
3148 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 3279 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
3149 dentry = lookup_hash(&nd); 3280 dentry = lookup_hash(&nd);
@@ -3154,19 +3285,15 @@ static long do_rmdir(int dfd, const char __user *pathname)
3154 error = -ENOENT; 3285 error = -ENOENT;
3155 goto exit3; 3286 goto exit3;
3156 } 3287 }
3157 error = mnt_want_write(nd.path.mnt);
3158 if (error)
3159 goto exit3;
3160 error = security_path_rmdir(&nd.path, dentry); 3288 error = security_path_rmdir(&nd.path, dentry);
3161 if (error) 3289 if (error)
3162 goto exit4; 3290 goto exit3;
3163 error = vfs_rmdir(nd.path.dentry->d_inode, dentry); 3291 error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
3164exit4:
3165 mnt_drop_write(nd.path.mnt);
3166exit3: 3292exit3:
3167 dput(dentry); 3293 dput(dentry);
3168exit2: 3294exit2:
3169 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 3295 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
3296 mnt_drop_write(nd.path.mnt);
3170exit1: 3297exit1:
3171 path_put(&nd.path); 3298 path_put(&nd.path);
3172 putname(name); 3299 putname(name);
@@ -3233,6 +3360,9 @@ static long do_unlinkat(int dfd, const char __user *pathname)
3233 goto exit1; 3360 goto exit1;
3234 3361
3235 nd.flags &= ~LOOKUP_PARENT; 3362 nd.flags &= ~LOOKUP_PARENT;
3363 error = mnt_want_write(nd.path.mnt);
3364 if (error)
3365 goto exit1;
3236 3366
3237 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 3367 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
3238 dentry = lookup_hash(&nd); 3368 dentry = lookup_hash(&nd);
@@ -3245,21 +3375,17 @@ static long do_unlinkat(int dfd, const char __user *pathname)
3245 if (!inode) 3375 if (!inode)
3246 goto slashes; 3376 goto slashes;
3247 ihold(inode); 3377 ihold(inode);
3248 error = mnt_want_write(nd.path.mnt);
3249 if (error)
3250 goto exit2;
3251 error = security_path_unlink(&nd.path, dentry); 3378 error = security_path_unlink(&nd.path, dentry);
3252 if (error) 3379 if (error)
3253 goto exit3; 3380 goto exit2;
3254 error = vfs_unlink(nd.path.dentry->d_inode, dentry); 3381 error = vfs_unlink(nd.path.dentry->d_inode, dentry);
3255exit3: 3382exit2:
3256 mnt_drop_write(nd.path.mnt);
3257 exit2:
3258 dput(dentry); 3383 dput(dentry);
3259 } 3384 }
3260 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 3385 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
3261 if (inode) 3386 if (inode)
3262 iput(inode); /* truncate the inode here */ 3387 iput(inode); /* truncate the inode here */
3388 mnt_drop_write(nd.path.mnt);
3263exit1: 3389exit1:
3264 path_put(&nd.path); 3390 path_put(&nd.path);
3265 putname(name); 3391 putname(name);
@@ -3324,19 +3450,10 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
3324 if (IS_ERR(dentry)) 3450 if (IS_ERR(dentry))
3325 goto out_putname; 3451 goto out_putname;
3326 3452
3327 error = mnt_want_write(path.mnt);
3328 if (error)
3329 goto out_dput;
3330 error = security_path_symlink(&path, dentry, from); 3453 error = security_path_symlink(&path, dentry, from);
3331 if (error) 3454 if (!error)
3332 goto out_drop_write; 3455 error = vfs_symlink(path.dentry->d_inode, dentry, from);
3333 error = vfs_symlink(path.dentry->d_inode, dentry, from); 3456 done_path_create(&path, dentry);
3334out_drop_write:
3335 mnt_drop_write(path.mnt);
3336out_dput:
3337 dput(dentry);
3338 mutex_unlock(&path.dentry->d_inode->i_mutex);
3339 path_put(&path);
3340out_putname: 3457out_putname:
3341 putname(from); 3458 putname(from);
3342 return error; 3459 return error;
@@ -3436,19 +3553,15 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3436 error = -EXDEV; 3553 error = -EXDEV;
3437 if (old_path.mnt != new_path.mnt) 3554 if (old_path.mnt != new_path.mnt)
3438 goto out_dput; 3555 goto out_dput;
3439 error = mnt_want_write(new_path.mnt); 3556 error = may_linkat(&old_path);
3440 if (error) 3557 if (unlikely(error))
3441 goto out_dput; 3558 goto out_dput;
3442 error = security_path_link(old_path.dentry, &new_path, new_dentry); 3559 error = security_path_link(old_path.dentry, &new_path, new_dentry);
3443 if (error) 3560 if (error)
3444 goto out_drop_write; 3561 goto out_dput;
3445 error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry); 3562 error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
3446out_drop_write:
3447 mnt_drop_write(new_path.mnt);
3448out_dput: 3563out_dput:
3449 dput(new_dentry); 3564 done_path_create(&new_path, new_dentry);
3450 mutex_unlock(&new_path.dentry->d_inode->i_mutex);
3451 path_put(&new_path);
3452out: 3565out:
3453 path_put(&old_path); 3566 path_put(&old_path);
3454 3567
@@ -3644,6 +3757,10 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
3644 if (newnd.last_type != LAST_NORM) 3757 if (newnd.last_type != LAST_NORM)
3645 goto exit2; 3758 goto exit2;
3646 3759
3760 error = mnt_want_write(oldnd.path.mnt);
3761 if (error)
3762 goto exit2;
3763
3647 oldnd.flags &= ~LOOKUP_PARENT; 3764 oldnd.flags &= ~LOOKUP_PARENT;
3648 newnd.flags &= ~LOOKUP_PARENT; 3765 newnd.flags &= ~LOOKUP_PARENT;
3649 newnd.flags |= LOOKUP_RENAME_TARGET; 3766 newnd.flags |= LOOKUP_RENAME_TARGET;
@@ -3679,23 +3796,19 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
3679 if (new_dentry == trap) 3796 if (new_dentry == trap)
3680 goto exit5; 3797 goto exit5;
3681 3798
3682 error = mnt_want_write(oldnd.path.mnt);
3683 if (error)
3684 goto exit5;
3685 error = security_path_rename(&oldnd.path, old_dentry, 3799 error = security_path_rename(&oldnd.path, old_dentry,
3686 &newnd.path, new_dentry); 3800 &newnd.path, new_dentry);
3687 if (error) 3801 if (error)
3688 goto exit6; 3802 goto exit5;
3689 error = vfs_rename(old_dir->d_inode, old_dentry, 3803 error = vfs_rename(old_dir->d_inode, old_dentry,
3690 new_dir->d_inode, new_dentry); 3804 new_dir->d_inode, new_dentry);
3691exit6:
3692 mnt_drop_write(oldnd.path.mnt);
3693exit5: 3805exit5:
3694 dput(new_dentry); 3806 dput(new_dentry);
3695exit4: 3807exit4:
3696 dput(old_dentry); 3808 dput(old_dentry);
3697exit3: 3809exit3:
3698 unlock_rename(new_dir, old_dir); 3810 unlock_rename(new_dir, old_dir);
3811 mnt_drop_write(oldnd.path.mnt);
3699exit2: 3812exit2:
3700 path_put(&newnd.path); 3813 path_put(&newnd.path);
3701 putname(to); 3814 putname(to);
diff --git a/fs/namespace.c b/fs/namespace.c
index c53d3381b0d0..4d31f73e2561 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -283,24 +283,22 @@ static int mnt_is_readonly(struct vfsmount *mnt)
283} 283}
284 284
285/* 285/*
286 * Most r/o checks on a fs are for operations that take 286 * Most r/o & frozen checks on a fs are for operations that take discrete
287 * discrete amounts of time, like a write() or unlink(). 287 * amounts of time, like a write() or unlink(). We must keep track of when
288 * We must keep track of when those operations start 288 * those operations start (for permission checks) and when they end, so that we
289 * (for permission checks) and when they end, so that 289 * can determine when writes are able to occur to a filesystem.
290 * we can determine when writes are able to occur to
291 * a filesystem.
292 */ 290 */
293/** 291/**
294 * mnt_want_write - get write access to a mount 292 * __mnt_want_write - get write access to a mount without freeze protection
295 * @m: the mount on which to take a write 293 * @m: the mount on which to take a write
296 * 294 *
297 * This tells the low-level filesystem that a write is 295 * This tells the low-level filesystem that a write is about to be performed to
298 * about to be performed to it, and makes sure that 296 * it, and makes sure that writes are allowed (mnt it read-write) before
299 * writes are allowed before returning success. When 297 * returning success. This operation does not protect against filesystem being
300 * the write operation is finished, mnt_drop_write() 298 * frozen. When the write operation is finished, __mnt_drop_write() must be
301 * must be called. This is effectively a refcount. 299 * called. This is effectively a refcount.
302 */ 300 */
303int mnt_want_write(struct vfsmount *m) 301int __mnt_want_write(struct vfsmount *m)
304{ 302{
305 struct mount *mnt = real_mount(m); 303 struct mount *mnt = real_mount(m);
306 int ret = 0; 304 int ret = 0;
@@ -326,6 +324,27 @@ int mnt_want_write(struct vfsmount *m)
326 ret = -EROFS; 324 ret = -EROFS;
327 } 325 }
328 preempt_enable(); 326 preempt_enable();
327
328 return ret;
329}
330
331/**
332 * mnt_want_write - get write access to a mount
333 * @m: the mount on which to take a write
334 *
335 * This tells the low-level filesystem that a write is about to be performed to
336 * it, and makes sure that writes are allowed (mount is read-write, filesystem
337 * is not frozen) before returning success. When the write operation is
338 * finished, mnt_drop_write() must be called. This is effectively a refcount.
339 */
340int mnt_want_write(struct vfsmount *m)
341{
342 int ret;
343
344 sb_start_write(m->mnt_sb);
345 ret = __mnt_want_write(m);
346 if (ret)
347 sb_end_write(m->mnt_sb);
329 return ret; 348 return ret;
330} 349}
331EXPORT_SYMBOL_GPL(mnt_want_write); 350EXPORT_SYMBOL_GPL(mnt_want_write);
@@ -355,38 +374,76 @@ int mnt_clone_write(struct vfsmount *mnt)
355EXPORT_SYMBOL_GPL(mnt_clone_write); 374EXPORT_SYMBOL_GPL(mnt_clone_write);
356 375
357/** 376/**
358 * mnt_want_write_file - get write access to a file's mount 377 * __mnt_want_write_file - get write access to a file's mount
359 * @file: the file who's mount on which to take a write 378 * @file: the file who's mount on which to take a write
360 * 379 *
361 * This is like mnt_want_write, but it takes a file and can 380 * This is like __mnt_want_write, but it takes a file and can
362 * do some optimisations if the file is open for write already 381 * do some optimisations if the file is open for write already
363 */ 382 */
364int mnt_want_write_file(struct file *file) 383int __mnt_want_write_file(struct file *file)
365{ 384{
366 struct inode *inode = file->f_dentry->d_inode; 385 struct inode *inode = file->f_dentry->d_inode;
386
367 if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode)) 387 if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
368 return mnt_want_write(file->f_path.mnt); 388 return __mnt_want_write(file->f_path.mnt);
369 else 389 else
370 return mnt_clone_write(file->f_path.mnt); 390 return mnt_clone_write(file->f_path.mnt);
371} 391}
392
393/**
394 * mnt_want_write_file - get write access to a file's mount
395 * @file: the file who's mount on which to take a write
396 *
397 * This is like mnt_want_write, but it takes a file and can
398 * do some optimisations if the file is open for write already
399 */
400int mnt_want_write_file(struct file *file)
401{
402 int ret;
403
404 sb_start_write(file->f_path.mnt->mnt_sb);
405 ret = __mnt_want_write_file(file);
406 if (ret)
407 sb_end_write(file->f_path.mnt->mnt_sb);
408 return ret;
409}
372EXPORT_SYMBOL_GPL(mnt_want_write_file); 410EXPORT_SYMBOL_GPL(mnt_want_write_file);
373 411
374/** 412/**
375 * mnt_drop_write - give up write access to a mount 413 * __mnt_drop_write - give up write access to a mount
376 * @mnt: the mount on which to give up write access 414 * @mnt: the mount on which to give up write access
377 * 415 *
378 * Tells the low-level filesystem that we are done 416 * Tells the low-level filesystem that we are done
379 * performing writes to it. Must be matched with 417 * performing writes to it. Must be matched with
380 * mnt_want_write() call above. 418 * __mnt_want_write() call above.
381 */ 419 */
382void mnt_drop_write(struct vfsmount *mnt) 420void __mnt_drop_write(struct vfsmount *mnt)
383{ 421{
384 preempt_disable(); 422 preempt_disable();
385 mnt_dec_writers(real_mount(mnt)); 423 mnt_dec_writers(real_mount(mnt));
386 preempt_enable(); 424 preempt_enable();
387} 425}
426
427/**
428 * mnt_drop_write - give up write access to a mount
429 * @mnt: the mount on which to give up write access
430 *
431 * Tells the low-level filesystem that we are done performing writes to it and
432 * also allows filesystem to be frozen again. Must be matched with
433 * mnt_want_write() call above.
434 */
435void mnt_drop_write(struct vfsmount *mnt)
436{
437 __mnt_drop_write(mnt);
438 sb_end_write(mnt->mnt_sb);
439}
388EXPORT_SYMBOL_GPL(mnt_drop_write); 440EXPORT_SYMBOL_GPL(mnt_drop_write);
389 441
442void __mnt_drop_write_file(struct file *file)
443{
444 __mnt_drop_write(file->f_path.mnt);
445}
446
390void mnt_drop_write_file(struct file *file) 447void mnt_drop_write_file(struct file *file)
391{ 448{
392 mnt_drop_write(file->f_path.mnt); 449 mnt_drop_write(file->f_path.mnt);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 5ff0b7b9fc08..43295d45cc2b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -154,6 +154,10 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
154 if (status < 0) 154 if (status < 0)
155 return; 155 return;
156 156
157 status = mnt_want_write_file(rec_file);
158 if (status)
159 return;
160
157 dir = rec_file->f_path.dentry; 161 dir = rec_file->f_path.dentry;
158 /* lock the parent */ 162 /* lock the parent */
159 mutex_lock(&dir->d_inode->i_mutex); 163 mutex_lock(&dir->d_inode->i_mutex);
@@ -173,11 +177,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
173 * as well be forgiving and just succeed silently. 177 * as well be forgiving and just succeed silently.
174 */ 178 */
175 goto out_put; 179 goto out_put;
176 status = mnt_want_write_file(rec_file);
177 if (status)
178 goto out_put;
179 status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU); 180 status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU);
180 mnt_drop_write_file(rec_file);
181out_put: 181out_put:
182 dput(dentry); 182 dput(dentry);
183out_unlock: 183out_unlock:
@@ -189,6 +189,7 @@ out_unlock:
189 " (err %d); please check that %s exists" 189 " (err %d); please check that %s exists"
190 " and is writeable", status, 190 " and is writeable", status,
191 user_recovery_dirname); 191 user_recovery_dirname);
192 mnt_drop_write_file(rec_file);
192 nfs4_reset_creds(original_cred); 193 nfs4_reset_creds(original_cred);
193} 194}
194 195
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index cc793005a87c..032af381b3aa 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -635,6 +635,7 @@ fh_put(struct svc_fh *fhp)
635 fhp->fh_post_saved = 0; 635 fhp->fh_post_saved = 0;
636#endif 636#endif
637 } 637 }
638 fh_drop_write(fhp);
638 if (exp) { 639 if (exp) {
639 exp_put(exp); 640 exp_put(exp);
640 fhp->fh_export = NULL; 641 fhp->fh_export = NULL;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index e15dc45fc5ec..aad6d457b9e8 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -196,6 +196,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
196 struct dentry *dchild; 196 struct dentry *dchild;
197 int type, mode; 197 int type, mode;
198 __be32 nfserr; 198 __be32 nfserr;
199 int hosterr;
199 dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size); 200 dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size);
200 201
201 dprintk("nfsd: CREATE %s %.*s\n", 202 dprintk("nfsd: CREATE %s %.*s\n",
@@ -214,6 +215,12 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
214 nfserr = nfserr_exist; 215 nfserr = nfserr_exist;
215 if (isdotent(argp->name, argp->len)) 216 if (isdotent(argp->name, argp->len))
216 goto done; 217 goto done;
218 hosterr = fh_want_write(dirfhp);
219 if (hosterr) {
220 nfserr = nfserrno(hosterr);
221 goto done;
222 }
223
217 fh_lock_nested(dirfhp, I_MUTEX_PARENT); 224 fh_lock_nested(dirfhp, I_MUTEX_PARENT);
218 dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); 225 dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
219 if (IS_ERR(dchild)) { 226 if (IS_ERR(dchild)) {
@@ -330,7 +337,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
330out_unlock: 337out_unlock:
331 /* We don't really need to unlock, as fh_put does it. */ 338 /* We don't really need to unlock, as fh_put does it. */
332 fh_unlock(dirfhp); 339 fh_unlock(dirfhp);
333 340 fh_drop_write(dirfhp);
334done: 341done:
335 fh_put(dirfhp); 342 fh_put(dirfhp);
336 return nfsd_return_dirop(nfserr, resp); 343 return nfsd_return_dirop(nfserr, resp);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 702f64e820c3..a9269f142cc4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1284,6 +1284,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1284 * If it has, the parent directory should already be locked. 1284 * If it has, the parent directory should already be locked.
1285 */ 1285 */
1286 if (!resfhp->fh_dentry) { 1286 if (!resfhp->fh_dentry) {
1287 host_err = fh_want_write(fhp);
1288 if (host_err)
1289 goto out_nfserr;
1290
1287 /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ 1291 /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
1288 fh_lock_nested(fhp, I_MUTEX_PARENT); 1292 fh_lock_nested(fhp, I_MUTEX_PARENT);
1289 dchild = lookup_one_len(fname, dentry, flen); 1293 dchild = lookup_one_len(fname, dentry, flen);
@@ -1327,14 +1331,11 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1327 goto out; 1331 goto out;
1328 } 1332 }
1329 1333
1330 host_err = fh_want_write(fhp);
1331 if (host_err)
1332 goto out_nfserr;
1333
1334 /* 1334 /*
1335 * Get the dir op function pointer. 1335 * Get the dir op function pointer.
1336 */ 1336 */
1337 err = 0; 1337 err = 0;
1338 host_err = 0;
1338 switch (type) { 1339 switch (type) {
1339 case S_IFREG: 1340 case S_IFREG:
1340 host_err = vfs_create(dirp, dchild, iap->ia_mode, true); 1341 host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
@@ -1351,10 +1352,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1351 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); 1352 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
1352 break; 1353 break;
1353 } 1354 }
1354 if (host_err < 0) { 1355 if (host_err < 0)
1355 fh_drop_write(fhp);
1356 goto out_nfserr; 1356 goto out_nfserr;
1357 }
1358 1357
1359 err = nfsd_create_setattr(rqstp, resfhp, iap); 1358 err = nfsd_create_setattr(rqstp, resfhp, iap);
1360 1359
@@ -1366,7 +1365,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1366 err2 = nfserrno(commit_metadata(fhp)); 1365 err2 = nfserrno(commit_metadata(fhp));
1367 if (err2) 1366 if (err2)
1368 err = err2; 1367 err = err2;
1369 fh_drop_write(fhp);
1370 /* 1368 /*
1371 * Update the file handle to get the new inode info. 1369 * Update the file handle to get the new inode info.
1372 */ 1370 */
@@ -1425,6 +1423,11 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1425 err = nfserr_notdir; 1423 err = nfserr_notdir;
1426 if (!dirp->i_op->lookup) 1424 if (!dirp->i_op->lookup)
1427 goto out; 1425 goto out;
1426
1427 host_err = fh_want_write(fhp);
1428 if (host_err)
1429 goto out_nfserr;
1430
1428 fh_lock_nested(fhp, I_MUTEX_PARENT); 1431 fh_lock_nested(fhp, I_MUTEX_PARENT);
1429 1432
1430 /* 1433 /*
@@ -1457,9 +1460,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1457 v_atime = verifier[1]&0x7fffffff; 1460 v_atime = verifier[1]&0x7fffffff;
1458 } 1461 }
1459 1462
1460 host_err = fh_want_write(fhp);
1461 if (host_err)
1462 goto out_nfserr;
1463 if (dchild->d_inode) { 1463 if (dchild->d_inode) {
1464 err = 0; 1464 err = 0;
1465 1465
@@ -1530,7 +1530,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1530 if (!err) 1530 if (!err)
1531 err = nfserrno(commit_metadata(fhp)); 1531 err = nfserrno(commit_metadata(fhp));
1532 1532
1533 fh_drop_write(fhp);
1534 /* 1533 /*
1535 * Update the filehandle to get the new inode info. 1534 * Update the filehandle to get the new inode info.
1536 */ 1535 */
@@ -1541,6 +1540,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1541 fh_unlock(fhp); 1540 fh_unlock(fhp);
1542 if (dchild && !IS_ERR(dchild)) 1541 if (dchild && !IS_ERR(dchild))
1543 dput(dchild); 1542 dput(dchild);
1543 fh_drop_write(fhp);
1544 return err; 1544 return err;
1545 1545
1546 out_nfserr: 1546 out_nfserr:
@@ -1621,6 +1621,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1621 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE); 1621 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1622 if (err) 1622 if (err)
1623 goto out; 1623 goto out;
1624
1625 host_err = fh_want_write(fhp);
1626 if (host_err)
1627 goto out_nfserr;
1628
1624 fh_lock(fhp); 1629 fh_lock(fhp);
1625 dentry = fhp->fh_dentry; 1630 dentry = fhp->fh_dentry;
1626 dnew = lookup_one_len(fname, dentry, flen); 1631 dnew = lookup_one_len(fname, dentry, flen);
@@ -1628,10 +1633,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1628 if (IS_ERR(dnew)) 1633 if (IS_ERR(dnew))
1629 goto out_nfserr; 1634 goto out_nfserr;
1630 1635
1631 host_err = fh_want_write(fhp);
1632 if (host_err)
1633 goto out_nfserr;
1634
1635 if (unlikely(path[plen] != 0)) { 1636 if (unlikely(path[plen] != 0)) {
1636 char *path_alloced = kmalloc(plen+1, GFP_KERNEL); 1637 char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
1637 if (path_alloced == NULL) 1638 if (path_alloced == NULL)
@@ -1691,6 +1692,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1691 if (isdotent(name, len)) 1692 if (isdotent(name, len))
1692 goto out; 1693 goto out;
1693 1694
1695 host_err = fh_want_write(tfhp);
1696 if (host_err) {
1697 err = nfserrno(host_err);
1698 goto out;
1699 }
1700
1694 fh_lock_nested(ffhp, I_MUTEX_PARENT); 1701 fh_lock_nested(ffhp, I_MUTEX_PARENT);
1695 ddir = ffhp->fh_dentry; 1702 ddir = ffhp->fh_dentry;
1696 dirp = ddir->d_inode; 1703 dirp = ddir->d_inode;
@@ -1702,18 +1709,13 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1702 1709
1703 dold = tfhp->fh_dentry; 1710 dold = tfhp->fh_dentry;
1704 1711
1705 host_err = fh_want_write(tfhp);
1706 if (host_err) {
1707 err = nfserrno(host_err);
1708 goto out_dput;
1709 }
1710 err = nfserr_noent; 1712 err = nfserr_noent;
1711 if (!dold->d_inode) 1713 if (!dold->d_inode)
1712 goto out_drop_write; 1714 goto out_dput;
1713 host_err = nfsd_break_lease(dold->d_inode); 1715 host_err = nfsd_break_lease(dold->d_inode);
1714 if (host_err) { 1716 if (host_err) {
1715 err = nfserrno(host_err); 1717 err = nfserrno(host_err);
1716 goto out_drop_write; 1718 goto out_dput;
1717 } 1719 }
1718 host_err = vfs_link(dold, dirp, dnew); 1720 host_err = vfs_link(dold, dirp, dnew);
1719 if (!host_err) { 1721 if (!host_err) {
@@ -1726,12 +1728,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1726 else 1728 else
1727 err = nfserrno(host_err); 1729 err = nfserrno(host_err);
1728 } 1730 }
1729out_drop_write:
1730 fh_drop_write(tfhp);
1731out_dput: 1731out_dput:
1732 dput(dnew); 1732 dput(dnew);
1733out_unlock: 1733out_unlock:
1734 fh_unlock(ffhp); 1734 fh_unlock(ffhp);
1735 fh_drop_write(tfhp);
1735out: 1736out:
1736 return err; 1737 return err;
1737 1738
@@ -1774,6 +1775,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1774 if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) 1775 if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
1775 goto out; 1776 goto out;
1776 1777
1778 host_err = fh_want_write(ffhp);
1779 if (host_err) {
1780 err = nfserrno(host_err);
1781 goto out;
1782 }
1783
1777 /* cannot use fh_lock as we need deadlock protective ordering 1784 /* cannot use fh_lock as we need deadlock protective ordering
1778 * so do it by hand */ 1785 * so do it by hand */
1779 trap = lock_rename(tdentry, fdentry); 1786 trap = lock_rename(tdentry, fdentry);
@@ -1804,17 +1811,14 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1804 host_err = -EXDEV; 1811 host_err = -EXDEV;
1805 if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) 1812 if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
1806 goto out_dput_new; 1813 goto out_dput_new;
1807 host_err = fh_want_write(ffhp);
1808 if (host_err)
1809 goto out_dput_new;
1810 1814
1811 host_err = nfsd_break_lease(odentry->d_inode); 1815 host_err = nfsd_break_lease(odentry->d_inode);
1812 if (host_err) 1816 if (host_err)
1813 goto out_drop_write; 1817 goto out_dput_new;
1814 if (ndentry->d_inode) { 1818 if (ndentry->d_inode) {
1815 host_err = nfsd_break_lease(ndentry->d_inode); 1819 host_err = nfsd_break_lease(ndentry->d_inode);
1816 if (host_err) 1820 if (host_err)
1817 goto out_drop_write; 1821 goto out_dput_new;
1818 } 1822 }
1819 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1823 host_err = vfs_rename(fdir, odentry, tdir, ndentry);
1820 if (!host_err) { 1824 if (!host_err) {
@@ -1822,8 +1826,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1822 if (!host_err) 1826 if (!host_err)
1823 host_err = commit_metadata(ffhp); 1827 host_err = commit_metadata(ffhp);
1824 } 1828 }
1825out_drop_write:
1826 fh_drop_write(ffhp);
1827 out_dput_new: 1829 out_dput_new:
1828 dput(ndentry); 1830 dput(ndentry);
1829 out_dput_old: 1831 out_dput_old:
@@ -1839,6 +1841,7 @@ out_drop_write:
1839 fill_post_wcc(tfhp); 1841 fill_post_wcc(tfhp);
1840 unlock_rename(tdentry, fdentry); 1842 unlock_rename(tdentry, fdentry);
1841 ffhp->fh_locked = tfhp->fh_locked = 0; 1843 ffhp->fh_locked = tfhp->fh_locked = 0;
1844 fh_drop_write(ffhp);
1842 1845
1843out: 1846out:
1844 return err; 1847 return err;
@@ -1864,6 +1867,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1864 if (err) 1867 if (err)
1865 goto out; 1868 goto out;
1866 1869
1870 host_err = fh_want_write(fhp);
1871 if (host_err)
1872 goto out_nfserr;
1873
1867 fh_lock_nested(fhp, I_MUTEX_PARENT); 1874 fh_lock_nested(fhp, I_MUTEX_PARENT);
1868 dentry = fhp->fh_dentry; 1875 dentry = fhp->fh_dentry;
1869 dirp = dentry->d_inode; 1876 dirp = dentry->d_inode;
@@ -1882,21 +1889,15 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1882 if (!type) 1889 if (!type)
1883 type = rdentry->d_inode->i_mode & S_IFMT; 1890 type = rdentry->d_inode->i_mode & S_IFMT;
1884 1891
1885 host_err = fh_want_write(fhp);
1886 if (host_err)
1887 goto out_put;
1888
1889 host_err = nfsd_break_lease(rdentry->d_inode); 1892 host_err = nfsd_break_lease(rdentry->d_inode);
1890 if (host_err) 1893 if (host_err)
1891 goto out_drop_write; 1894 goto out_put;
1892 if (type != S_IFDIR) 1895 if (type != S_IFDIR)
1893 host_err = vfs_unlink(dirp, rdentry); 1896 host_err = vfs_unlink(dirp, rdentry);
1894 else 1897 else
1895 host_err = vfs_rmdir(dirp, rdentry); 1898 host_err = vfs_rmdir(dirp, rdentry);
1896 if (!host_err) 1899 if (!host_err)
1897 host_err = commit_metadata(fhp); 1900 host_err = commit_metadata(fhp);
1898out_drop_write:
1899 fh_drop_write(fhp);
1900out_put: 1901out_put:
1901 dput(rdentry); 1902 dput(rdentry);
1902 1903
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index ec0611b2b738..359594c393d2 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -110,12 +110,19 @@ int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
110 110
111static inline int fh_want_write(struct svc_fh *fh) 111static inline int fh_want_write(struct svc_fh *fh)
112{ 112{
113 return mnt_want_write(fh->fh_export->ex_path.mnt); 113 int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
114
115 if (!ret)
116 fh->fh_want_write = 1;
117 return ret;
114} 118}
115 119
116static inline void fh_drop_write(struct svc_fh *fh) 120static inline void fh_drop_write(struct svc_fh *fh)
117{ 121{
118 mnt_drop_write(fh->fh_export->ex_path.mnt); 122 if (fh->fh_want_write) {
123 fh->fh_want_write = 0;
124 mnt_drop_write(fh->fh_export->ex_path.mnt);
125 }
119} 126}
120 127
121#endif /* LINUX_NFSD_VFS_H */ 128#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 62cebc8e1a1f..a4d56ac02e6c 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -69,16 +69,18 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
69 struct page *page = vmf->page; 69 struct page *page = vmf->page;
70 struct inode *inode = vma->vm_file->f_dentry->d_inode; 70 struct inode *inode = vma->vm_file->f_dentry->d_inode;
71 struct nilfs_transaction_info ti; 71 struct nilfs_transaction_info ti;
72 int ret; 72 int ret = 0;
73 73
74 if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info))) 74 if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
75 return VM_FAULT_SIGBUS; /* -ENOSPC */ 75 return VM_FAULT_SIGBUS; /* -ENOSPC */
76 76
77 sb_start_pagefault(inode->i_sb);
77 lock_page(page); 78 lock_page(page);
78 if (page->mapping != inode->i_mapping || 79 if (page->mapping != inode->i_mapping ||
79 page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { 80 page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) {
80 unlock_page(page); 81 unlock_page(page);
81 return VM_FAULT_NOPAGE; /* make the VM retry the fault */ 82 ret = -EFAULT; /* make the VM retry the fault */
83 goto out;
82 } 84 }
83 85
84 /* 86 /*
@@ -112,19 +114,21 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
112 ret = nilfs_transaction_begin(inode->i_sb, &ti, 1); 114 ret = nilfs_transaction_begin(inode->i_sb, &ti, 1);
113 /* never returns -ENOMEM, but may return -ENOSPC */ 115 /* never returns -ENOMEM, but may return -ENOSPC */
114 if (unlikely(ret)) 116 if (unlikely(ret))
115 return VM_FAULT_SIGBUS; 117 goto out;
116 118
117 ret = block_page_mkwrite(vma, vmf, nilfs_get_block); 119 ret = __block_page_mkwrite(vma, vmf, nilfs_get_block);
118 if (ret != VM_FAULT_LOCKED) { 120 if (ret) {
119 nilfs_transaction_abort(inode->i_sb); 121 nilfs_transaction_abort(inode->i_sb);
120 return ret; 122 goto out;
121 } 123 }
122 nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits)); 124 nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits));
123 nilfs_transaction_commit(inode->i_sb); 125 nilfs_transaction_commit(inode->i_sb);
124 126
125 mapped: 127 mapped:
126 wait_on_page_writeback(page); 128 wait_on_page_writeback(page);
127 return VM_FAULT_LOCKED; 129 out:
130 sb_end_pagefault(inode->i_sb);
131 return block_page_mkwrite_return(ret);
128} 132}
129 133
130static const struct vm_operations_struct nilfs_file_vm_ops = { 134static const struct vm_operations_struct nilfs_file_vm_ops = {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 0b6387c67e6c..fdb180769485 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -660,8 +660,6 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
660 goto out_free; 660 goto out_free;
661 } 661 }
662 662
663 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
664
665 ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]); 663 ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]);
666 if (ret < 0) 664 if (ret < 0)
667 printk(KERN_ERR "NILFS: GC failed during preparation: " 665 printk(KERN_ERR "NILFS: GC failed during preparation: "
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 88e11fb346b6..a5752a589932 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -189,7 +189,7 @@ int nilfs_transaction_begin(struct super_block *sb,
189 if (ret > 0) 189 if (ret > 0)
190 return 0; 190 return 0;
191 191
192 vfs_check_frozen(sb, SB_FREEZE_WRITE); 192 sb_start_intwrite(sb);
193 193
194 nilfs = sb->s_fs_info; 194 nilfs = sb->s_fs_info;
195 down_read(&nilfs->ns_segctor_sem); 195 down_read(&nilfs->ns_segctor_sem);
@@ -205,6 +205,7 @@ int nilfs_transaction_begin(struct super_block *sb,
205 current->journal_info = ti->ti_save; 205 current->journal_info = ti->ti_save;
206 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 206 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
207 kmem_cache_free(nilfs_transaction_cachep, ti); 207 kmem_cache_free(nilfs_transaction_cachep, ti);
208 sb_end_intwrite(sb);
208 return ret; 209 return ret;
209} 210}
210 211
@@ -246,6 +247,7 @@ int nilfs_transaction_commit(struct super_block *sb)
246 err = nilfs_construct_segment(sb); 247 err = nilfs_construct_segment(sb);
247 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 248 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
248 kmem_cache_free(nilfs_transaction_cachep, ti); 249 kmem_cache_free(nilfs_transaction_cachep, ti);
250 sb_end_intwrite(sb);
249 return err; 251 return err;
250} 252}
251 253
@@ -264,6 +266,7 @@ void nilfs_transaction_abort(struct super_block *sb)
264 current->journal_info = ti->ti_save; 266 current->journal_info = ti->ti_save;
265 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 267 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
266 kmem_cache_free(nilfs_transaction_cachep, ti); 268 kmem_cache_free(nilfs_transaction_cachep, ti);
269 sb_end_intwrite(sb);
267} 270}
268 271
269void nilfs_relax_pressure_in_lock(struct super_block *sb) 272void nilfs_relax_pressure_in_lock(struct super_block *sb)
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 7389d2d5e51d..1ecf46448f85 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2084,7 +2084,6 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
2084 if (err) 2084 if (err)
2085 return err; 2085 return err;
2086 pos = *ppos; 2086 pos = *ppos;
2087 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
2088 /* We can write back this queue in page reclaim. */ 2087 /* We can write back this queue in page reclaim. */
2089 current->backing_dev_info = mapping->backing_dev_info; 2088 current->backing_dev_info = mapping->backing_dev_info;
2090 written = 0; 2089 written = 0;
@@ -2119,6 +2118,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2119 2118
2120 BUG_ON(iocb->ki_pos != pos); 2119 BUG_ON(iocb->ki_pos != pos);
2121 2120
2121 sb_start_write(inode->i_sb);
2122 mutex_lock(&inode->i_mutex); 2122 mutex_lock(&inode->i_mutex);
2123 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); 2123 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
2124 mutex_unlock(&inode->i_mutex); 2124 mutex_unlock(&inode->i_mutex);
@@ -2127,6 +2127,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2127 if (err < 0) 2127 if (err < 0)
2128 ret = err; 2128 ret = err;
2129 } 2129 }
2130 sb_end_write(inode->i_sb);
2130 return ret; 2131 return ret;
2131} 2132}
2132 2133
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7602783d7f41..46a1f6d75104 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1971,6 +1971,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
1971{ 1971{
1972 struct inode *inode = file->f_path.dentry->d_inode; 1972 struct inode *inode = file->f_path.dentry->d_inode;
1973 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1973 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1974 int ret;
1974 1975
1975 if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && 1976 if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&
1976 !ocfs2_writes_unwritten_extents(osb)) 1977 !ocfs2_writes_unwritten_extents(osb))
@@ -1985,7 +1986,12 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
1985 if (!(file->f_mode & FMODE_WRITE)) 1986 if (!(file->f_mode & FMODE_WRITE))
1986 return -EBADF; 1987 return -EBADF;
1987 1988
1988 return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); 1989 ret = mnt_want_write_file(file);
1990 if (ret)
1991 return ret;
1992 ret = __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
1993 mnt_drop_write_file(file);
1994 return ret;
1989} 1995}
1990 1996
1991static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, 1997static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
@@ -2261,7 +2267,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2261 if (iocb->ki_left == 0) 2267 if (iocb->ki_left == 0)
2262 return 0; 2268 return 0;
2263 2269
2264 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 2270 sb_start_write(inode->i_sb);
2265 2271
2266 appending = file->f_flags & O_APPEND ? 1 : 0; 2272 appending = file->f_flags & O_APPEND ? 1 : 0;
2267 direct_io = file->f_flags & O_DIRECT ? 1 : 0; 2273 direct_io = file->f_flags & O_DIRECT ? 1 : 0;
@@ -2436,6 +2442,7 @@ out_sems:
2436 ocfs2_iocb_clear_sem_locked(iocb); 2442 ocfs2_iocb_clear_sem_locked(iocb);
2437 2443
2438 mutex_unlock(&inode->i_mutex); 2444 mutex_unlock(&inode->i_mutex);
2445 sb_end_write(inode->i_sb);
2439 2446
2440 if (written) 2447 if (written)
2441 ret = written; 2448 ret = written;
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index d96f7f81d8dd..f20edcbfe700 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -928,7 +928,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
928 if (get_user(new_clusters, (int __user *)arg)) 928 if (get_user(new_clusters, (int __user *)arg))
929 return -EFAULT; 929 return -EFAULT;
930 930
931 return ocfs2_group_extend(inode, new_clusters); 931 status = mnt_want_write_file(filp);
932 if (status)
933 return status;
934 status = ocfs2_group_extend(inode, new_clusters);
935 mnt_drop_write_file(filp);
936 return status;
932 case OCFS2_IOC_GROUP_ADD: 937 case OCFS2_IOC_GROUP_ADD:
933 case OCFS2_IOC_GROUP_ADD64: 938 case OCFS2_IOC_GROUP_ADD64:
934 if (!capable(CAP_SYS_RESOURCE)) 939 if (!capable(CAP_SYS_RESOURCE))
@@ -937,7 +942,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
937 if (copy_from_user(&input, (int __user *) arg, sizeof(input))) 942 if (copy_from_user(&input, (int __user *) arg, sizeof(input)))
938 return -EFAULT; 943 return -EFAULT;
939 944
940 return ocfs2_group_add(inode, &input); 945 status = mnt_want_write_file(filp);
946 if (status)
947 return status;
948 status = ocfs2_group_add(inode, &input);
949 mnt_drop_write_file(filp);
950 return status;
941 case OCFS2_IOC_REFLINK: 951 case OCFS2_IOC_REFLINK:
942 if (copy_from_user(&args, argp, sizeof(args))) 952 if (copy_from_user(&args, argp, sizeof(args)))
943 return -EFAULT; 953 return -EFAULT;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 0a42ae96dca7..2dd36af79e26 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -355,11 +355,14 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
355 if (journal_current_handle()) 355 if (journal_current_handle())
356 return jbd2_journal_start(journal, max_buffs); 356 return jbd2_journal_start(journal, max_buffs);
357 357
358 sb_start_intwrite(osb->sb);
359
358 down_read(&osb->journal->j_trans_barrier); 360 down_read(&osb->journal->j_trans_barrier);
359 361
360 handle = jbd2_journal_start(journal, max_buffs); 362 handle = jbd2_journal_start(journal, max_buffs);
361 if (IS_ERR(handle)) { 363 if (IS_ERR(handle)) {
362 up_read(&osb->journal->j_trans_barrier); 364 up_read(&osb->journal->j_trans_barrier);
365 sb_end_intwrite(osb->sb);
363 366
364 mlog_errno(PTR_ERR(handle)); 367 mlog_errno(PTR_ERR(handle));
365 368
@@ -388,8 +391,10 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
388 if (ret < 0) 391 if (ret < 0)
389 mlog_errno(ret); 392 mlog_errno(ret);
390 393
391 if (!nested) 394 if (!nested) {
392 up_read(&journal->j_trans_barrier); 395 up_read(&journal->j_trans_barrier);
396 sb_end_intwrite(osb->sb);
397 }
393 398
394 return ret; 399 return ret;
395} 400}
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 9cd41083e991..d150372fd81d 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -136,6 +136,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
136 sigset_t oldset; 136 sigset_t oldset;
137 int ret; 137 int ret;
138 138
139 sb_start_pagefault(inode->i_sb);
139 ocfs2_block_signals(&oldset); 140 ocfs2_block_signals(&oldset);
140 141
141 /* 142 /*
@@ -165,6 +166,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
165 166
166out: 167out:
167 ocfs2_unblock_signals(&oldset); 168 ocfs2_unblock_signals(&oldset);
169 sb_end_pagefault(inode->i_sb);
168 return ret; 170 return ret;
169} 171}
170 172
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 9f32d7cbb7a3..30a055049e16 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4466,20 +4466,11 @@ int ocfs2_reflink_ioctl(struct inode *inode,
4466 goto out_dput; 4466 goto out_dput;
4467 } 4467 }
4468 4468
4469 error = mnt_want_write(new_path.mnt);
4470 if (error) {
4471 mlog_errno(error);
4472 goto out_dput;
4473 }
4474
4475 error = ocfs2_vfs_reflink(old_path.dentry, 4469 error = ocfs2_vfs_reflink(old_path.dentry,
4476 new_path.dentry->d_inode, 4470 new_path.dentry->d_inode,
4477 new_dentry, preserve); 4471 new_dentry, preserve);
4478 mnt_drop_write(new_path.mnt);
4479out_dput: 4472out_dput:
4480 dput(new_dentry); 4473 done_path_create(&new_path, new_dentry);
4481 mutex_unlock(&new_path.dentry->d_inode->i_mutex);
4482 path_put(&new_path);
4483out: 4474out:
4484 path_put(&old_path); 4475 path_put(&old_path);
4485 4476
diff --git a/fs/open.c b/fs/open.c
index 1e914b397e12..f3d96e7e7b19 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -164,11 +164,13 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
164 if (IS_APPEND(inode)) 164 if (IS_APPEND(inode))
165 goto out_putf; 165 goto out_putf;
166 166
167 sb_start_write(inode->i_sb);
167 error = locks_verify_truncate(inode, file, length); 168 error = locks_verify_truncate(inode, file, length);
168 if (!error) 169 if (!error)
169 error = security_path_truncate(&file->f_path); 170 error = security_path_truncate(&file->f_path);
170 if (!error) 171 if (!error)
171 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file); 172 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
173 sb_end_write(inode->i_sb);
172out_putf: 174out_putf:
173 fput(file); 175 fput(file);
174out: 176out:
@@ -266,7 +268,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
266 if (!file->f_op->fallocate) 268 if (!file->f_op->fallocate)
267 return -EOPNOTSUPP; 269 return -EOPNOTSUPP;
268 270
269 return file->f_op->fallocate(file, mode, offset, len); 271 sb_start_write(inode->i_sb);
272 ret = file->f_op->fallocate(file, mode, offset, len);
273 sb_end_write(inode->i_sb);
274 return ret;
270} 275}
271 276
272SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) 277SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
@@ -620,7 +625,7 @@ static inline int __get_file_write_access(struct inode *inode,
620 /* 625 /*
621 * Balanced in __fput() 626 * Balanced in __fput()
622 */ 627 */
623 error = mnt_want_write(mnt); 628 error = __mnt_want_write(mnt);
624 if (error) 629 if (error)
625 put_write_access(inode); 630 put_write_access(inode);
626 } 631 }
@@ -654,6 +659,7 @@ static int do_dentry_open(struct file *f,
654 if (unlikely(f->f_flags & O_PATH)) 659 if (unlikely(f->f_flags & O_PATH))
655 f->f_mode = FMODE_PATH; 660 f->f_mode = FMODE_PATH;
656 661
662 path_get(&f->f_path);
657 inode = f->f_path.dentry->d_inode; 663 inode = f->f_path.dentry->d_inode;
658 if (f->f_mode & FMODE_WRITE) { 664 if (f->f_mode & FMODE_WRITE) {
659 error = __get_file_write_access(inode, f->f_path.mnt); 665 error = __get_file_write_access(inode, f->f_path.mnt);
@@ -739,9 +745,7 @@ int finish_open(struct file *file, struct dentry *dentry,
739 int error; 745 int error;
740 BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ 746 BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
741 747
742 mntget(file->f_path.mnt); 748 file->f_path.dentry = dentry;
743 file->f_path.dentry = dget(dentry);
744
745 error = do_dentry_open(file, open, current_cred()); 749 error = do_dentry_open(file, open, current_cred());
746 if (!error) 750 if (!error)
747 *opened |= FILE_OPENED; 751 *opened |= FILE_OPENED;
@@ -784,7 +788,6 @@ struct file *dentry_open(const struct path *path, int flags,
784 788
785 f->f_flags = flags; 789 f->f_flags = flags;
786 f->f_path = *path; 790 f->f_path = *path;
787 path_get(&f->f_path);
788 error = do_dentry_open(f, NULL, cred); 791 error = do_dentry_open(f, NULL, cred);
789 if (!error) { 792 if (!error) {
790 error = open_check_o_direct(f); 793 error = open_check_o_direct(f);
diff --git a/fs/pipe.c b/fs/pipe.c
index 95cbd6b227e6..8d85d7068c1e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1016,18 +1016,16 @@ fail_inode:
1016 return NULL; 1016 return NULL;
1017} 1017}
1018 1018
1019struct file *create_write_pipe(int flags) 1019int create_pipe_files(struct file **res, int flags)
1020{ 1020{
1021 int err; 1021 int err;
1022 struct inode *inode; 1022 struct inode *inode = get_pipe_inode();
1023 struct file *f; 1023 struct file *f;
1024 struct path path; 1024 struct path path;
1025 struct qstr name = { .name = "" }; 1025 static struct qstr name = { .name = "" };
1026 1026
1027 err = -ENFILE;
1028 inode = get_pipe_inode();
1029 if (!inode) 1027 if (!inode)
1030 goto err; 1028 return -ENFILE;
1031 1029
1032 err = -ENOMEM; 1030 err = -ENOMEM;
1033 path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); 1031 path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
@@ -1041,62 +1039,43 @@ struct file *create_write_pipe(int flags)
1041 f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops); 1039 f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
1042 if (!f) 1040 if (!f)
1043 goto err_dentry; 1041 goto err_dentry;
1044 f->f_mapping = inode->i_mapping;
1045 1042
1046 f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); 1043 f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
1047 f->f_version = 0;
1048 1044
1049 return f; 1045 res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops);
1046 if (!res[0])
1047 goto err_file;
1048
1049 path_get(&path);
1050 res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1051 res[1] = f;
1052 return 0;
1050 1053
1051 err_dentry: 1054err_file:
1055 put_filp(f);
1056err_dentry:
1052 free_pipe_info(inode); 1057 free_pipe_info(inode);
1053 path_put(&path); 1058 path_put(&path);
1054 return ERR_PTR(err); 1059 return err;
1055 1060
1056 err_inode: 1061err_inode:
1057 free_pipe_info(inode); 1062 free_pipe_info(inode);
1058 iput(inode); 1063 iput(inode);
1059 err: 1064 return err;
1060 return ERR_PTR(err);
1061}
1062
1063void free_write_pipe(struct file *f)
1064{
1065 free_pipe_info(f->f_dentry->d_inode);
1066 path_put(&f->f_path);
1067 put_filp(f);
1068}
1069
1070struct file *create_read_pipe(struct file *wrf, int flags)
1071{
1072 /* Grab pipe from the writer */
1073 struct file *f = alloc_file(&wrf->f_path, FMODE_READ,
1074 &read_pipefifo_fops);
1075 if (!f)
1076 return ERR_PTR(-ENFILE);
1077
1078 path_get(&wrf->f_path);
1079 f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1080
1081 return f;
1082} 1065}
1083 1066
1084int do_pipe_flags(int *fd, int flags) 1067int do_pipe_flags(int *fd, int flags)
1085{ 1068{
1086 struct file *fw, *fr; 1069 struct file *files[2];
1087 int error; 1070 int error;
1088 int fdw, fdr; 1071 int fdw, fdr;
1089 1072
1090 if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT)) 1073 if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
1091 return -EINVAL; 1074 return -EINVAL;
1092 1075
1093 fw = create_write_pipe(flags); 1076 error = create_pipe_files(files, flags);
1094 if (IS_ERR(fw)) 1077 if (error)
1095 return PTR_ERR(fw); 1078 return error;
1096 fr = create_read_pipe(fw, flags);
1097 error = PTR_ERR(fr);
1098 if (IS_ERR(fr))
1099 goto err_write_pipe;
1100 1079
1101 error = get_unused_fd_flags(flags); 1080 error = get_unused_fd_flags(flags);
1102 if (error < 0) 1081 if (error < 0)
@@ -1109,8 +1088,8 @@ int do_pipe_flags(int *fd, int flags)
1109 fdw = error; 1088 fdw = error;
1110 1089
1111 audit_fd_pair(fdr, fdw); 1090 audit_fd_pair(fdr, fdw);
1112 fd_install(fdr, fr); 1091 fd_install(fdr, files[0]);
1113 fd_install(fdw, fw); 1092 fd_install(fdw, files[1]);
1114 fd[0] = fdr; 1093 fd[0] = fdr;
1115 fd[1] = fdw; 1094 fd[1] = fdw;
1116 1095
@@ -1119,10 +1098,8 @@ int do_pipe_flags(int *fd, int flags)
1119 err_fdr: 1098 err_fdr:
1120 put_unused_fd(fdr); 1099 put_unused_fd(fdr);
1121 err_read_pipe: 1100 err_read_pipe:
1122 path_put(&fr->f_path); 1101 fput(files[0]);
1123 put_filp(fr); 1102 fput(files[1]);
1124 err_write_pipe:
1125 free_write_pipe(fw);
1126 return error; 1103 return error;
1127} 1104}
1128 1105
diff --git a/fs/splice.c b/fs/splice.c
index 7bf08fa22ec9..41514dd89462 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -996,6 +996,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
996 }; 996 };
997 ssize_t ret; 997 ssize_t ret;
998 998
999 sb_start_write(inode->i_sb);
1000
999 pipe_lock(pipe); 1001 pipe_lock(pipe);
1000 1002
1001 splice_from_pipe_begin(&sd); 1003 splice_from_pipe_begin(&sd);
@@ -1034,6 +1036,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
1034 *ppos += ret; 1036 *ppos += ret;
1035 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 1037 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
1036 } 1038 }
1039 sb_end_write(inode->i_sb);
1037 1040
1038 return ret; 1041 return ret;
1039} 1042}
diff --git a/fs/super.c b/fs/super.c
index 4bf714459a4b..b05cf47463d0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -33,12 +33,19 @@
33#include <linux/rculist_bl.h> 33#include <linux/rculist_bl.h>
34#include <linux/cleancache.h> 34#include <linux/cleancache.h>
35#include <linux/fsnotify.h> 35#include <linux/fsnotify.h>
36#include <linux/lockdep.h>
36#include "internal.h" 37#include "internal.h"
37 38
38 39
39LIST_HEAD(super_blocks); 40LIST_HEAD(super_blocks);
40DEFINE_SPINLOCK(sb_lock); 41DEFINE_SPINLOCK(sb_lock);
41 42
43static char *sb_writers_name[SB_FREEZE_LEVELS] = {
44 "sb_writers",
45 "sb_pagefaults",
46 "sb_internal",
47};
48
42/* 49/*
43 * One thing we have to be careful of with a per-sb shrinker is that we don't 50 * One thing we have to be careful of with a per-sb shrinker is that we don't
44 * drop the last active reference to the superblock from within the shrinker. 51 * drop the last active reference to the superblock from within the shrinker.
@@ -102,6 +109,35 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
102 return total_objects; 109 return total_objects;
103} 110}
104 111
112static int init_sb_writers(struct super_block *s, struct file_system_type *type)
113{
114 int err;
115 int i;
116
117 for (i = 0; i < SB_FREEZE_LEVELS; i++) {
118 err = percpu_counter_init(&s->s_writers.counter[i], 0);
119 if (err < 0)
120 goto err_out;
121 lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
122 &type->s_writers_key[i], 0);
123 }
124 init_waitqueue_head(&s->s_writers.wait);
125 init_waitqueue_head(&s->s_writers.wait_unfrozen);
126 return 0;
127err_out:
128 while (--i >= 0)
129 percpu_counter_destroy(&s->s_writers.counter[i]);
130 return err;
131}
132
133static void destroy_sb_writers(struct super_block *s)
134{
135 int i;
136
137 for (i = 0; i < SB_FREEZE_LEVELS; i++)
138 percpu_counter_destroy(&s->s_writers.counter[i]);
139}
140
105/** 141/**
106 * alloc_super - create new superblock 142 * alloc_super - create new superblock
107 * @type: filesystem type superblock should belong to 143 * @type: filesystem type superblock should belong to
@@ -117,18 +153,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
117 153
118 if (s) { 154 if (s) {
119 if (security_sb_alloc(s)) { 155 if (security_sb_alloc(s)) {
156 /*
157 * We cannot call security_sb_free() without
158 * security_sb_alloc() succeeding. So bail out manually
159 */
120 kfree(s); 160 kfree(s);
121 s = NULL; 161 s = NULL;
122 goto out; 162 goto out;
123 } 163 }
124#ifdef CONFIG_SMP 164#ifdef CONFIG_SMP
125 s->s_files = alloc_percpu(struct list_head); 165 s->s_files = alloc_percpu(struct list_head);
126 if (!s->s_files) { 166 if (!s->s_files)
127 security_sb_free(s); 167 goto err_out;
128 kfree(s); 168 else {
129 s = NULL;
130 goto out;
131 } else {
132 int i; 169 int i;
133 170
134 for_each_possible_cpu(i) 171 for_each_possible_cpu(i)
@@ -137,6 +174,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
137#else 174#else
138 INIT_LIST_HEAD(&s->s_files); 175 INIT_LIST_HEAD(&s->s_files);
139#endif 176#endif
177 if (init_sb_writers(s, type))
178 goto err_out;
140 s->s_flags = flags; 179 s->s_flags = flags;
141 s->s_bdi = &default_backing_dev_info; 180 s->s_bdi = &default_backing_dev_info;
142 INIT_HLIST_NODE(&s->s_instances); 181 INIT_HLIST_NODE(&s->s_instances);
@@ -178,7 +217,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
178 mutex_init(&s->s_dquot.dqio_mutex); 217 mutex_init(&s->s_dquot.dqio_mutex);
179 mutex_init(&s->s_dquot.dqonoff_mutex); 218 mutex_init(&s->s_dquot.dqonoff_mutex);
180 init_rwsem(&s->s_dquot.dqptr_sem); 219 init_rwsem(&s->s_dquot.dqptr_sem);
181 init_waitqueue_head(&s->s_wait_unfrozen);
182 s->s_maxbytes = MAX_NON_LFS; 220 s->s_maxbytes = MAX_NON_LFS;
183 s->s_op = &default_op; 221 s->s_op = &default_op;
184 s->s_time_gran = 1000000000; 222 s->s_time_gran = 1000000000;
@@ -190,6 +228,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
190 } 228 }
191out: 229out:
192 return s; 230 return s;
231err_out:
232 security_sb_free(s);
233#ifdef CONFIG_SMP
234 if (s->s_files)
235 free_percpu(s->s_files);
236#endif
237 destroy_sb_writers(s);
238 kfree(s);
239 s = NULL;
240 goto out;
193} 241}
194 242
195/** 243/**
@@ -203,6 +251,7 @@ static inline void destroy_super(struct super_block *s)
203#ifdef CONFIG_SMP 251#ifdef CONFIG_SMP
204 free_percpu(s->s_files); 252 free_percpu(s->s_files);
205#endif 253#endif
254 destroy_sb_writers(s);
206 security_sb_free(s); 255 security_sb_free(s);
207 WARN_ON(!list_empty(&s->s_mounts)); 256 WARN_ON(!list_empty(&s->s_mounts));
208 kfree(s->s_subtype); 257 kfree(s->s_subtype);
@@ -651,10 +700,11 @@ struct super_block *get_super_thawed(struct block_device *bdev)
651{ 700{
652 while (1) { 701 while (1) {
653 struct super_block *s = get_super(bdev); 702 struct super_block *s = get_super(bdev);
654 if (!s || s->s_frozen == SB_UNFROZEN) 703 if (!s || s->s_writers.frozen == SB_UNFROZEN)
655 return s; 704 return s;
656 up_read(&s->s_umount); 705 up_read(&s->s_umount);
657 vfs_check_frozen(s, SB_FREEZE_WRITE); 706 wait_event(s->s_writers.wait_unfrozen,
707 s->s_writers.frozen == SB_UNFROZEN);
658 put_super(s); 708 put_super(s);
659 } 709 }
660} 710}
@@ -732,7 +782,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
732 int retval; 782 int retval;
733 int remount_ro; 783 int remount_ro;
734 784
735 if (sb->s_frozen != SB_UNFROZEN) 785 if (sb->s_writers.frozen != SB_UNFROZEN)
736 return -EBUSY; 786 return -EBUSY;
737 787
738#ifdef CONFIG_BLOCK 788#ifdef CONFIG_BLOCK
@@ -1163,6 +1213,120 @@ out:
1163 return ERR_PTR(error); 1213 return ERR_PTR(error);
1164} 1214}
1165 1215
1216/*
1217 * This is an internal function, please use sb_end_{write,pagefault,intwrite}
1218 * instead.
1219 */
1220void __sb_end_write(struct super_block *sb, int level)
1221{
1222 percpu_counter_dec(&sb->s_writers.counter[level-1]);
1223 /*
1224 * Make sure s_writers are updated before we wake up waiters in
1225 * freeze_super().
1226 */
1227 smp_mb();
1228 if (waitqueue_active(&sb->s_writers.wait))
1229 wake_up(&sb->s_writers.wait);
1230 rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
1231}
1232EXPORT_SYMBOL(__sb_end_write);
1233
1234#ifdef CONFIG_LOCKDEP
1235/*
1236 * We want lockdep to tell us about possible deadlocks with freezing but
1237 * it's it bit tricky to properly instrument it. Getting a freeze protection
1238 * works as getting a read lock but there are subtle problems. XFS for example
1239 * gets freeze protection on internal level twice in some cases, which is OK
1240 * only because we already hold a freeze protection also on higher level. Due
1241 * to these cases we have to tell lockdep we are doing trylock when we
1242 * already hold a freeze protection for a higher freeze level.
1243 */
1244static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
1245 unsigned long ip)
1246{
1247 int i;
1248
1249 if (!trylock) {
1250 for (i = 0; i < level - 1; i++)
1251 if (lock_is_held(&sb->s_writers.lock_map[i])) {
1252 trylock = true;
1253 break;
1254 }
1255 }
1256 rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
1257}
1258#endif
1259
1260/*
1261 * This is an internal function, please use sb_start_{write,pagefault,intwrite}
1262 * instead.
1263 */
1264int __sb_start_write(struct super_block *sb, int level, bool wait)
1265{
1266retry:
1267 if (unlikely(sb->s_writers.frozen >= level)) {
1268 if (!wait)
1269 return 0;
1270 wait_event(sb->s_writers.wait_unfrozen,
1271 sb->s_writers.frozen < level);
1272 }
1273
1274#ifdef CONFIG_LOCKDEP
1275 acquire_freeze_lock(sb, level, !wait, _RET_IP_);
1276#endif
1277 percpu_counter_inc(&sb->s_writers.counter[level-1]);
1278 /*
1279 * Make sure counter is updated before we check for frozen.
1280 * freeze_super() first sets frozen and then checks the counter.
1281 */
1282 smp_mb();
1283 if (unlikely(sb->s_writers.frozen >= level)) {
1284 __sb_end_write(sb, level);
1285 goto retry;
1286 }
1287 return 1;
1288}
1289EXPORT_SYMBOL(__sb_start_write);
1290
1291/**
1292 * sb_wait_write - wait until all writers to given file system finish
1293 * @sb: the super for which we wait
1294 * @level: type of writers we wait for (normal vs page fault)
1295 *
1296 * This function waits until there are no writers of given type to given file
1297 * system. Caller of this function should make sure there can be no new writers
1298 * of type @level before calling this function. Otherwise this function can
1299 * livelock.
1300 */
1301static void sb_wait_write(struct super_block *sb, int level)
1302{
1303 s64 writers;
1304
1305 /*
1306 * We just cycle-through lockdep here so that it does not complain
1307 * about returning with lock to userspace
1308 */
1309 rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
1310 rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
1311
1312 do {
1313 DEFINE_WAIT(wait);
1314
1315 /*
1316 * We use a barrier in prepare_to_wait() to separate setting
1317 * of frozen and checking of the counter
1318 */
1319 prepare_to_wait(&sb->s_writers.wait, &wait,
1320 TASK_UNINTERRUPTIBLE);
1321
1322 writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
1323 if (writers)
1324 schedule();
1325
1326 finish_wait(&sb->s_writers.wait, &wait);
1327 } while (writers);
1328}
1329
1166/** 1330/**
1167 * freeze_super - lock the filesystem and force it into a consistent state 1331 * freeze_super - lock the filesystem and force it into a consistent state
1168 * @sb: the super to lock 1332 * @sb: the super to lock
@@ -1170,6 +1334,31 @@ out:
1170 * Syncs the super to make sure the filesystem is consistent and calls the fs's 1334 * Syncs the super to make sure the filesystem is consistent and calls the fs's
1171 * freeze_fs. Subsequent calls to this without first thawing the fs will return 1335 * freeze_fs. Subsequent calls to this without first thawing the fs will return
1172 * -EBUSY. 1336 * -EBUSY.
1337 *
1338 * During this function, sb->s_writers.frozen goes through these values:
1339 *
1340 * SB_UNFROZEN: File system is normal, all writes progress as usual.
1341 *
1342 * SB_FREEZE_WRITE: The file system is in the process of being frozen. New
1343 * writes should be blocked, though page faults are still allowed. We wait for
1344 * all writes to complete and then proceed to the next stage.
1345 *
1346 * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked
1347 * but internal fs threads can still modify the filesystem (although they
1348 * should not dirty new pages or inodes), writeback can run etc. After waiting
1349 * for all running page faults we sync the filesystem which will clean all
1350 * dirty pages and inodes (no new dirty pages or inodes can be created when
1351 * sync is running).
1352 *
1353 * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs
1354 * modification are blocked (e.g. XFS preallocation truncation on inode
1355 * reclaim). This is usually implemented by blocking new transactions for
1356 * filesystems that have them and need this additional guard. After all
1357 * internal writers are finished we call ->freeze_fs() to finish filesystem
1358 * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is
1359 * mostly auxiliary for filesystems to verify they do not modify frozen fs.
1360 *
1361 * sb->s_writers.frozen is protected by sb->s_umount.
1173 */ 1362 */
1174int freeze_super(struct super_block *sb) 1363int freeze_super(struct super_block *sb)
1175{ 1364{
@@ -1177,7 +1366,7 @@ int freeze_super(struct super_block *sb)
1177 1366
1178 atomic_inc(&sb->s_active); 1367 atomic_inc(&sb->s_active);
1179 down_write(&sb->s_umount); 1368 down_write(&sb->s_umount);
1180 if (sb->s_frozen) { 1369 if (sb->s_writers.frozen != SB_UNFROZEN) {
1181 deactivate_locked_super(sb); 1370 deactivate_locked_super(sb);
1182 return -EBUSY; 1371 return -EBUSY;
1183 } 1372 }
@@ -1188,33 +1377,53 @@ int freeze_super(struct super_block *sb)
1188 } 1377 }
1189 1378
1190 if (sb->s_flags & MS_RDONLY) { 1379 if (sb->s_flags & MS_RDONLY) {
1191 sb->s_frozen = SB_FREEZE_TRANS; 1380 /* Nothing to do really... */
1192 smp_wmb(); 1381 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1193 up_write(&sb->s_umount); 1382 up_write(&sb->s_umount);
1194 return 0; 1383 return 0;
1195 } 1384 }
1196 1385
1197 sb->s_frozen = SB_FREEZE_WRITE; 1386 /* From now on, no new normal writers can start */
1387 sb->s_writers.frozen = SB_FREEZE_WRITE;
1388 smp_wmb();
1389
1390 /* Release s_umount to preserve sb_start_write -> s_umount ordering */
1391 up_write(&sb->s_umount);
1392
1393 sb_wait_write(sb, SB_FREEZE_WRITE);
1394
1395 /* Now we go and block page faults... */
1396 down_write(&sb->s_umount);
1397 sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
1198 smp_wmb(); 1398 smp_wmb();
1199 1399
1400 sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
1401
1402 /* All writers are done so after syncing there won't be dirty data */
1200 sync_filesystem(sb); 1403 sync_filesystem(sb);
1201 1404
1202 sb->s_frozen = SB_FREEZE_TRANS; 1405 /* Now wait for internal filesystem counter */
1406 sb->s_writers.frozen = SB_FREEZE_FS;
1203 smp_wmb(); 1407 smp_wmb();
1408 sb_wait_write(sb, SB_FREEZE_FS);
1204 1409
1205 sync_blockdev(sb->s_bdev);
1206 if (sb->s_op->freeze_fs) { 1410 if (sb->s_op->freeze_fs) {
1207 ret = sb->s_op->freeze_fs(sb); 1411 ret = sb->s_op->freeze_fs(sb);
1208 if (ret) { 1412 if (ret) {
1209 printk(KERN_ERR 1413 printk(KERN_ERR
1210 "VFS:Filesystem freeze failed\n"); 1414 "VFS:Filesystem freeze failed\n");
1211 sb->s_frozen = SB_UNFROZEN; 1415 sb->s_writers.frozen = SB_UNFROZEN;
1212 smp_wmb(); 1416 smp_wmb();
1213 wake_up(&sb->s_wait_unfrozen); 1417 wake_up(&sb->s_writers.wait_unfrozen);
1214 deactivate_locked_super(sb); 1418 deactivate_locked_super(sb);
1215 return ret; 1419 return ret;
1216 } 1420 }
1217 } 1421 }
1422 /*
1423 * This is just for debugging purposes so that fs can warn if it
1424 * sees write activity when frozen is set to SB_FREEZE_COMPLETE.
1425 */
1426 sb->s_writers.frozen = SB_FREEZE_COMPLETE;
1218 up_write(&sb->s_umount); 1427 up_write(&sb->s_umount);
1219 return 0; 1428 return 0;
1220} 1429}
@@ -1231,7 +1440,7 @@ int thaw_super(struct super_block *sb)
1231 int error; 1440 int error;
1232 1441
1233 down_write(&sb->s_umount); 1442 down_write(&sb->s_umount);
1234 if (sb->s_frozen == SB_UNFROZEN) { 1443 if (sb->s_writers.frozen == SB_UNFROZEN) {
1235 up_write(&sb->s_umount); 1444 up_write(&sb->s_umount);
1236 return -EINVAL; 1445 return -EINVAL;
1237 } 1446 }
@@ -1244,16 +1453,15 @@ int thaw_super(struct super_block *sb)
1244 if (error) { 1453 if (error) {
1245 printk(KERN_ERR 1454 printk(KERN_ERR
1246 "VFS:Filesystem thaw failed\n"); 1455 "VFS:Filesystem thaw failed\n");
1247 sb->s_frozen = SB_FREEZE_TRANS;
1248 up_write(&sb->s_umount); 1456 up_write(&sb->s_umount);
1249 return error; 1457 return error;
1250 } 1458 }
1251 } 1459 }
1252 1460
1253out: 1461out:
1254 sb->s_frozen = SB_UNFROZEN; 1462 sb->s_writers.frozen = SB_UNFROZEN;
1255 smp_wmb(); 1463 smp_wmb();
1256 wake_up(&sb->s_wait_unfrozen); 1464 wake_up(&sb->s_writers.wait_unfrozen);
1257 deactivate_locked_super(sb); 1465 deactivate_locked_super(sb);
1258 1466
1259 return 0; 1467 return 0;
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index a4759833d62d..614b2b544880 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -228,6 +228,8 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
228 ret = 0; 228 ret = 0;
229 if (bb->vm_ops->page_mkwrite) 229 if (bb->vm_ops->page_mkwrite)
230 ret = bb->vm_ops->page_mkwrite(vma, vmf); 230 ret = bb->vm_ops->page_mkwrite(vma, vmf);
231 else
232 file_update_time(file);
231 233
232 sysfs_put_active(attr_sd); 234 sysfs_put_active(attr_sd);
233 return ret; 235 return ret;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 15052ff916ec..e562dd43f41f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -124,6 +124,12 @@ xfs_setfilesize_trans_alloc(
124 ioend->io_append_trans = tp; 124 ioend->io_append_trans = tp;
125 125
126 /* 126 /*
127 * We will pass freeze protection with a transaction. So tell lockdep
128 * we released it.
129 */
130 rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
131 1, _THIS_IP_);
132 /*
127 * We hand off the transaction to the completion thread now, so 133 * We hand off the transaction to the completion thread now, so
128 * clear the flag here. 134 * clear the flag here.
129 */ 135 */
@@ -199,6 +205,15 @@ xfs_end_io(
199 struct xfs_inode *ip = XFS_I(ioend->io_inode); 205 struct xfs_inode *ip = XFS_I(ioend->io_inode);
200 int error = 0; 206 int error = 0;
201 207
208 if (ioend->io_append_trans) {
209 /*
210 * We've got freeze protection passed with the transaction.
211 * Tell lockdep about it.
212 */
213 rwsem_acquire_read(
214 &ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
215 0, 1, _THIS_IP_);
216 }
202 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 217 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
203 ioend->io_error = -EIO; 218 ioend->io_error = -EIO;
204 goto done; 219 goto done;
@@ -1425,6 +1440,9 @@ out_trans_cancel:
1425 if (ioend->io_append_trans) { 1440 if (ioend->io_append_trans) {
1426 current_set_flags_nested(&ioend->io_append_trans->t_pflags, 1441 current_set_flags_nested(&ioend->io_append_trans->t_pflags,
1427 PF_FSTRANS); 1442 PF_FSTRANS);
1443 rwsem_acquire_read(
1444 &inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
1445 0, 1, _THIS_IP_);
1428 xfs_trans_cancel(ioend->io_append_trans, 0); 1446 xfs_trans_cancel(ioend->io_append_trans, 0);
1429 } 1447 }
1430out_destroy_ioend: 1448out_destroy_ioend:
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index c4559c6e6f2c..56afcdb2377d 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -770,10 +770,12 @@ xfs_file_aio_write(
770 if (ocount == 0) 770 if (ocount == 0)
771 return 0; 771 return 0;
772 772
773 xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); 773 sb_start_write(inode->i_sb);
774 774
775 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 775 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
776 return -EIO; 776 ret = -EIO;
777 goto out;
778 }
777 779
778 if (unlikely(file->f_flags & O_DIRECT)) 780 if (unlikely(file->f_flags & O_DIRECT))
779 ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); 781 ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
@@ -792,6 +794,8 @@ xfs_file_aio_write(
792 ret = err; 794 ret = err;
793 } 795 }
794 796
797out:
798 sb_end_write(inode->i_sb);
795 return ret; 799 return ret;
796} 800}
797 801
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 1f1535d25a9b..0e0232c3b6d9 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -364,9 +364,15 @@ xfs_fssetdm_by_handle(
364 if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) 364 if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
365 return -XFS_ERROR(EFAULT); 365 return -XFS_ERROR(EFAULT);
366 366
367 error = mnt_want_write_file(parfilp);
368 if (error)
369 return error;
370
367 dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq); 371 dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
368 if (IS_ERR(dentry)) 372 if (IS_ERR(dentry)) {
373 mnt_drop_write_file(parfilp);
369 return PTR_ERR(dentry); 374 return PTR_ERR(dentry);
375 }
370 376
371 if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { 377 if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
372 error = -XFS_ERROR(EPERM); 378 error = -XFS_ERROR(EPERM);
@@ -382,6 +388,7 @@ xfs_fssetdm_by_handle(
382 fsd.fsd_dmstate); 388 fsd.fsd_dmstate);
383 389
384 out: 390 out:
391 mnt_drop_write_file(parfilp);
385 dput(dentry); 392 dput(dentry);
386 return error; 393 return error;
387} 394}
@@ -634,7 +641,11 @@ xfs_ioc_space(
634 if (ioflags & IO_INVIS) 641 if (ioflags & IO_INVIS)
635 attr_flags |= XFS_ATTR_DMI; 642 attr_flags |= XFS_ATTR_DMI;
636 643
644 error = mnt_want_write_file(filp);
645 if (error)
646 return error;
637 error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); 647 error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
648 mnt_drop_write_file(filp);
638 return -error; 649 return -error;
639} 650}
640 651
@@ -1163,6 +1174,7 @@ xfs_ioc_fssetxattr(
1163{ 1174{
1164 struct fsxattr fa; 1175 struct fsxattr fa;
1165 unsigned int mask; 1176 unsigned int mask;
1177 int error;
1166 1178
1167 if (copy_from_user(&fa, arg, sizeof(fa))) 1179 if (copy_from_user(&fa, arg, sizeof(fa)))
1168 return -EFAULT; 1180 return -EFAULT;
@@ -1171,7 +1183,12 @@ xfs_ioc_fssetxattr(
1171 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 1183 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
1172 mask |= FSX_NONBLOCK; 1184 mask |= FSX_NONBLOCK;
1173 1185
1174 return -xfs_ioctl_setattr(ip, &fa, mask); 1186 error = mnt_want_write_file(filp);
1187 if (error)
1188 return error;
1189 error = xfs_ioctl_setattr(ip, &fa, mask);
1190 mnt_drop_write_file(filp);
1191 return -error;
1175} 1192}
1176 1193
1177STATIC int 1194STATIC int
@@ -1196,6 +1213,7 @@ xfs_ioc_setxflags(
1196 struct fsxattr fa; 1213 struct fsxattr fa;
1197 unsigned int flags; 1214 unsigned int flags;
1198 unsigned int mask; 1215 unsigned int mask;
1216 int error;
1199 1217
1200 if (copy_from_user(&flags, arg, sizeof(flags))) 1218 if (copy_from_user(&flags, arg, sizeof(flags)))
1201 return -EFAULT; 1219 return -EFAULT;
@@ -1210,7 +1228,12 @@ xfs_ioc_setxflags(
1210 mask |= FSX_NONBLOCK; 1228 mask |= FSX_NONBLOCK;
1211 fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); 1229 fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
1212 1230
1213 return -xfs_ioctl_setattr(ip, &fa, mask); 1231 error = mnt_want_write_file(filp);
1232 if (error)
1233 return error;
1234 error = xfs_ioctl_setattr(ip, &fa, mask);
1235 mnt_drop_write_file(filp);
1236 return -error;
1214} 1237}
1215 1238
1216STATIC int 1239STATIC int
@@ -1385,8 +1408,13 @@ xfs_file_ioctl(
1385 if (copy_from_user(&dmi, arg, sizeof(dmi))) 1408 if (copy_from_user(&dmi, arg, sizeof(dmi)))
1386 return -XFS_ERROR(EFAULT); 1409 return -XFS_ERROR(EFAULT);
1387 1410
1411 error = mnt_want_write_file(filp);
1412 if (error)
1413 return error;
1414
1388 error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, 1415 error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
1389 dmi.fsd_dmstate); 1416 dmi.fsd_dmstate);
1417 mnt_drop_write_file(filp);
1390 return -error; 1418 return -error;
1391 } 1419 }
1392 1420
@@ -1434,7 +1462,11 @@ xfs_file_ioctl(
1434 1462
1435 if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) 1463 if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
1436 return -XFS_ERROR(EFAULT); 1464 return -XFS_ERROR(EFAULT);
1465 error = mnt_want_write_file(filp);
1466 if (error)
1467 return error;
1437 error = xfs_swapext(&sxp); 1468 error = xfs_swapext(&sxp);
1469 mnt_drop_write_file(filp);
1438 return -error; 1470 return -error;
1439 } 1471 }
1440 1472
@@ -1463,9 +1495,14 @@ xfs_file_ioctl(
1463 if (copy_from_user(&inout, arg, sizeof(inout))) 1495 if (copy_from_user(&inout, arg, sizeof(inout)))
1464 return -XFS_ERROR(EFAULT); 1496 return -XFS_ERROR(EFAULT);
1465 1497
1498 error = mnt_want_write_file(filp);
1499 if (error)
1500 return error;
1501
1466 /* input parameter is passed in resblks field of structure */ 1502 /* input parameter is passed in resblks field of structure */
1467 in = inout.resblks; 1503 in = inout.resblks;
1468 error = xfs_reserve_blocks(mp, &in, &inout); 1504 error = xfs_reserve_blocks(mp, &in, &inout);
1505 mnt_drop_write_file(filp);
1469 if (error) 1506 if (error)
1470 return -error; 1507 return -error;
1471 1508
@@ -1496,7 +1533,11 @@ xfs_file_ioctl(
1496 if (copy_from_user(&in, arg, sizeof(in))) 1533 if (copy_from_user(&in, arg, sizeof(in)))
1497 return -XFS_ERROR(EFAULT); 1534 return -XFS_ERROR(EFAULT);
1498 1535
1536 error = mnt_want_write_file(filp);
1537 if (error)
1538 return error;
1499 error = xfs_growfs_data(mp, &in); 1539 error = xfs_growfs_data(mp, &in);
1540 mnt_drop_write_file(filp);
1500 return -error; 1541 return -error;
1501 } 1542 }
1502 1543
@@ -1506,7 +1547,11 @@ xfs_file_ioctl(
1506 if (copy_from_user(&in, arg, sizeof(in))) 1547 if (copy_from_user(&in, arg, sizeof(in)))
1507 return -XFS_ERROR(EFAULT); 1548 return -XFS_ERROR(EFAULT);
1508 1549
1550 error = mnt_want_write_file(filp);
1551 if (error)
1552 return error;
1509 error = xfs_growfs_log(mp, &in); 1553 error = xfs_growfs_log(mp, &in);
1554 mnt_drop_write_file(filp);
1510 return -error; 1555 return -error;
1511 } 1556 }
1512 1557
@@ -1516,7 +1561,11 @@ xfs_file_ioctl(
1516 if (copy_from_user(&in, arg, sizeof(in))) 1561 if (copy_from_user(&in, arg, sizeof(in)))
1517 return -XFS_ERROR(EFAULT); 1562 return -XFS_ERROR(EFAULT);
1518 1563
1564 error = mnt_want_write_file(filp);
1565 if (error)
1566 return error;
1519 error = xfs_growfs_rt(mp, &in); 1567 error = xfs_growfs_rt(mp, &in);
1568 mnt_drop_write_file(filp);
1520 return -error; 1569 return -error;
1521 } 1570 }
1522 1571
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index c4f2da0d2bf5..1244274a5674 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -600,7 +600,11 @@ xfs_file_compat_ioctl(
600 600
601 if (xfs_compat_growfs_data_copyin(&in, arg)) 601 if (xfs_compat_growfs_data_copyin(&in, arg))
602 return -XFS_ERROR(EFAULT); 602 return -XFS_ERROR(EFAULT);
603 error = mnt_want_write_file(filp);
604 if (error)
605 return error;
603 error = xfs_growfs_data(mp, &in); 606 error = xfs_growfs_data(mp, &in);
607 mnt_drop_write_file(filp);
604 return -error; 608 return -error;
605 } 609 }
606 case XFS_IOC_FSGROWFSRT_32: { 610 case XFS_IOC_FSGROWFSRT_32: {
@@ -608,7 +612,11 @@ xfs_file_compat_ioctl(
608 612
609 if (xfs_compat_growfs_rt_copyin(&in, arg)) 613 if (xfs_compat_growfs_rt_copyin(&in, arg))
610 return -XFS_ERROR(EFAULT); 614 return -XFS_ERROR(EFAULT);
615 error = mnt_want_write_file(filp);
616 if (error)
617 return error;
611 error = xfs_growfs_rt(mp, &in); 618 error = xfs_growfs_rt(mp, &in);
619 mnt_drop_write_file(filp);
612 return -error; 620 return -error;
613 } 621 }
614#endif 622#endif
@@ -627,7 +635,11 @@ xfs_file_compat_ioctl(
627 offsetof(struct xfs_swapext, sx_stat)) || 635 offsetof(struct xfs_swapext, sx_stat)) ||
628 xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) 636 xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
629 return -XFS_ERROR(EFAULT); 637 return -XFS_ERROR(EFAULT);
638 error = mnt_want_write_file(filp);
639 if (error)
640 return error;
630 error = xfs_swapext(&sxp); 641 error = xfs_swapext(&sxp);
642 mnt_drop_write_file(filp);
631 return -error; 643 return -error;
632 } 644 }
633 case XFS_IOC_FSBULKSTAT_32: 645 case XFS_IOC_FSBULKSTAT_32:
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 915edf6639f0..973dff6ad935 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -680,9 +680,9 @@ xfs_iomap_write_unwritten(
680 * the same inode that we complete here and might deadlock 680 * the same inode that we complete here and might deadlock
681 * on the iolock. 681 * on the iolock.
682 */ 682 */
683 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); 683 sb_start_intwrite(mp->m_super);
684 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); 684 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
685 tp->t_flags |= XFS_TRANS_RESERVE; 685 tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT;
686 error = xfs_trans_reserve(tp, resblks, 686 error = xfs_trans_reserve(tp, resblks,
687 XFS_WRITE_LOG_RES(mp), 0, 687 XFS_WRITE_LOG_RES(mp), 0,
688 XFS_TRANS_PERM_LOG_RES, 688 XFS_TRANS_PERM_LOG_RES,
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 711ca51ca3d7..29c2f83d4147 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1551,7 +1551,7 @@ xfs_unmountfs(
1551int 1551int
1552xfs_fs_writable(xfs_mount_t *mp) 1552xfs_fs_writable(xfs_mount_t *mp)
1553{ 1553{
1554 return !(xfs_test_for_freeze(mp) || XFS_FORCED_SHUTDOWN(mp) || 1554 return !(mp->m_super->s_writers.frozen || XFS_FORCED_SHUTDOWN(mp) ||
1555 (mp->m_flags & XFS_MOUNT_RDONLY)); 1555 (mp->m_flags & XFS_MOUNT_RDONLY));
1556} 1556}
1557 1557
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 8724336a9a08..05a05a7b6119 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -311,9 +311,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
311#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */ 311#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */
312#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */ 312#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */
313 313
314#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen)
315#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l))
316
317/* 314/*
318 * Flags for xfs_mountfs 315 * Flags for xfs_mountfs
319 */ 316 */
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 97304f10e78a..96548176db80 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -403,7 +403,7 @@ xfs_sync_worker(
403 if (!(mp->m_super->s_flags & MS_ACTIVE) && 403 if (!(mp->m_super->s_flags & MS_ACTIVE) &&
404 !(mp->m_flags & XFS_MOUNT_RDONLY)) { 404 !(mp->m_flags & XFS_MOUNT_RDONLY)) {
405 /* dgc: errors ignored here */ 405 /* dgc: errors ignored here */
406 if (mp->m_super->s_frozen == SB_UNFROZEN && 406 if (mp->m_super->s_writers.frozen == SB_UNFROZEN &&
407 xfs_log_need_covered(mp)) 407 xfs_log_need_covered(mp))
408 error = xfs_fs_log_dummy(mp); 408 error = xfs_fs_log_dummy(mp);
409 else 409 else
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index fdf324508c5e..06ed520a767f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -576,8 +576,12 @@ xfs_trans_alloc(
576 xfs_mount_t *mp, 576 xfs_mount_t *mp,
577 uint type) 577 uint type)
578{ 578{
579 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); 579 xfs_trans_t *tp;
580 return _xfs_trans_alloc(mp, type, KM_SLEEP); 580
581 sb_start_intwrite(mp->m_super);
582 tp = _xfs_trans_alloc(mp, type, KM_SLEEP);
583 tp->t_flags |= XFS_TRANS_FREEZE_PROT;
584 return tp;
581} 585}
582 586
583xfs_trans_t * 587xfs_trans_t *
@@ -588,6 +592,7 @@ _xfs_trans_alloc(
588{ 592{
589 xfs_trans_t *tp; 593 xfs_trans_t *tp;
590 594
595 WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
591 atomic_inc(&mp->m_active_trans); 596 atomic_inc(&mp->m_active_trans);
592 597
593 tp = kmem_zone_zalloc(xfs_trans_zone, memflags); 598 tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
@@ -611,6 +616,8 @@ xfs_trans_free(
611 xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); 616 xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
612 617
613 atomic_dec(&tp->t_mountp->m_active_trans); 618 atomic_dec(&tp->t_mountp->m_active_trans);
619 if (tp->t_flags & XFS_TRANS_FREEZE_PROT)
620 sb_end_intwrite(tp->t_mountp->m_super);
614 xfs_trans_free_dqinfo(tp); 621 xfs_trans_free_dqinfo(tp);
615 kmem_zone_free(xfs_trans_zone, tp); 622 kmem_zone_free(xfs_trans_zone, tp);
616} 623}
@@ -643,7 +650,11 @@ xfs_trans_dup(
643 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 650 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
644 ASSERT(tp->t_ticket != NULL); 651 ASSERT(tp->t_ticket != NULL);
645 652
646 ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE); 653 ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
654 (tp->t_flags & XFS_TRANS_RESERVE) |
655 (tp->t_flags & XFS_TRANS_FREEZE_PROT);
656 /* We gave our writer reference to the new transaction */
657 tp->t_flags &= ~XFS_TRANS_FREEZE_PROT;
647 ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); 658 ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
648 ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; 659 ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
649 tp->t_blk_res = tp->t_blk_res_used; 660 tp->t_blk_res = tp->t_blk_res_used;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index bc2afd52a0b7..db056544cbb5 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -179,6 +179,8 @@ struct xfs_log_item_desc {
179#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ 179#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */
180#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ 180#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */
181#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ 181#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */
182#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer
183 count in superblock */
182 184
183/* 185/*
184 * Values for call flags parameter. 186 * Values for call flags parameter.