57 files changed, 937 insertions, 412 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index fc06fd27065e..dd6f7ee1e312 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -610,6 +610,9 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n",
                 page, (unsigned long)filp->private_data);
+        /* Update file times before taking page lock */
+        file_update_time(filp);
        v9inode = V9FS_I(inode);
        /* make sure the cache has finished storing the page */
        v9fs_fscache_wait_on_page_write(inode, page);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index fadeba6a5db9..62e0cafd6e25 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1614,8 +1614,6 @@ static int cleaner_kthread(void *arg)
        struct btrfs_root *root = arg;
        do {
-                vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
                if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
                    mutex_trylock(&root->fs_info->cleaner_mutex)) {
                        btrfs_run_delayed_iputs(root);
@@ -1647,7 +1645,6 @@ static int transaction_kthread(void *arg)
        do {
                cannot_commit = false;
                delay = HZ * 30;
-                vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
                mutex_lock(&root->fs_info->transaction_kthread_mutex);
                spin_lock(&root->fs_info->trans_lock);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9aa01ec2138d..5caf285c6e4d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1379,7 +1379,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        ssize_t err = 0;
        size_t count, ocount;
-        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+        sb_start_write(inode->i_sb);
        mutex_lock(&inode->i_mutex);
@@ -1469,6 +1469,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                        num_written = err;
        }
 out:
+        sb_end_write(inode->i_sb);
        current->backing_dev_info = NULL;
        return num_written ? num_written : err;
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 48bdfd2591c2..83baec24946d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6629,6 +6629,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        u64 page_start;
        u64 page_end;
+        sb_start_pagefault(inode->i_sb);
        ret  = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
        if (!ret) {
                ret = file_update_time(vma->vm_file);
@@ -6718,12 +6719,15 @@ again:
        unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
 out_unlock:
-        if (!ret)
+        if (!ret) {
+                sb_end_pagefault(inode->i_sb);
                return VM_FAULT_LOCKED;
+        }
        unlock_page(page);
 out:
        btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
 out_noreserve:
+        sb_end_pagefault(inode->i_sb);
        return ret;
 }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 43f0012016e3..bc2f6ffff3cf 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -195,6 +195,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        if (!inode_owner_or_capable(inode))
                return -EACCES;
+        ret = mnt_want_write_file(file);
+        if (ret)
+                return ret;
        mutex_lock(&inode->i_mutex);
        ip_oldflags = ip->flags;
@@ -209,10 +213,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                }
        }
-        ret = mnt_want_write_file(file);
-        if (ret)
-                goto out_unlock;
        if (flags & FS_SYNC_FL)
                ip->flags |= BTRFS_INODE_SYNC;
        else
@@ -275,9 +275,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                inode->i_flags = i_oldflags;
        }
-        mnt_drop_write_file(file);
 out_unlock:
        mutex_unlock(&inode->i_mutex);
+        mnt_drop_write_file(file);
        return ret;
 }
@@ -664,6 +664,10 @@ static noinline int btrfs_mksubvol(struct path *parent,
        struct dentry *dentry;
        int error;
+        error = mnt_want_write(parent->mnt);
+        if (error)
+                return error;
        mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
        dentry = lookup_one_len(name, parent->dentry, namelen);
@@ -699,6 +703,7 @@ out_dput:
        dput(dentry);
 out_unlock:
        mutex_unlock(&dir->i_mutex);
+        mnt_drop_write(parent->mnt);
        return error;
 }
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 7ac7cdcc294e..17be3dedacba 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -335,6 +335,8 @@ again:
        if (!h)
                return ERR_PTR(-ENOMEM);
+        sb_start_intwrite(root->fs_info->sb);
        if (may_wait_transaction(root, type))
                wait_current_trans(root);
@@ -345,6 +347,7 @@ again:
        } while (ret == -EBUSY);
        if (ret < 0) {
+                sb_end_intwrite(root->fs_info->sb);
                kmem_cache_free(btrfs_trans_handle_cachep, h);
                return ERR_PTR(ret);
        }
@@ -548,6 +551,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        btrfs_trans_release_metadata(trans, root);
        trans->block_rsv = NULL;
+        sb_end_intwrite(root->fs_info->sb);
        if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
            should_end_transaction(trans, root)) {
                trans->transaction->blocked = 1;
@@ -1578,6 +1583,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        put_transaction(cur_trans);
        put_transaction(cur_trans);
+        sb_end_intwrite(root->fs_info->sb);
        trace_btrfs_transaction_commit(root);
        btrfs_scrub_continue(root);
diff --git a/fs/buffer.c b/fs/buffer.c
index c7062c896d7c..9f6d2e41281d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2306,8 +2306,8 @@ EXPORT_SYMBOL(block_commit_write);
 * beyond EOF, then the page is guaranteed safe against truncation until we
 * unlock the page.
 *
- * Direct callers of this function should call vfs_check_frozen() so that page
+ * Direct callers of this function should protect against filesystem freezing
- * fault does not busyloop until the fs is thawed.
+ * using sb_start_write() - sb_end_write() functions.
 */
 int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                         get_block_t get_block)
@@ -2318,6 +2318,12 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
        loff_t size;
        int ret;
+        /*
+         * Update file times before taking page lock. We may end up failing the
+         * fault so this update may be superfluous but who really cares...
+         */
+        file_update_time(vma->vm_file);
        lock_page(page);
        size = i_size_read(inode);
        if ((page->mapping != inode->i_mapping) ||
@@ -2339,18 +2345,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
        if (unlikely(ret < 0))
                goto out_unlock;
-        /*
-         * Freezing in progress? We check after the page is marked dirty and
-         * with page lock held so if the test here fails, we are sure freezing
-         * code will wait during syncing until the page fault is done - at that
-         * point page will be dirty and unlocked so freezing code will write it
-         * and writeprotect it again.
-         */
        set_page_dirty(page);
-        if (inode->i_sb->s_frozen != SB_UNFROZEN) {
-                ret = -EAGAIN;
-                goto out_unlock;
-        }
        wait_on_page_writeback(page);
        return 0;
 out_unlock:
@@ -2365,12 +2360,9 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
        int ret;
        struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
-        /*
+        sb_start_pagefault(sb);
-         * This check is racy but catches the common case. The check in
-         * __block_page_mkwrite() is reliable.
-         */
-        vfs_check_frozen(sb, SB_FREEZE_WRITE);
        ret = __block_page_mkwrite(vma, vmf, get_block);
+        sb_end_pagefault(sb);
        return block_page_mkwrite_return(ret);
 }
 EXPORT_SYMBOL(block_page_mkwrite);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8b67304e4b80..452e71a1b753 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1184,6 +1184,9 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        loff_t size, len;
        int ret;
+        /* Update time before taking page lock */
+        file_update_time(vma->vm_file);
        size = i_size_read(inode);
        if (off + PAGE_CACHE_SIZE <= size)
                len = PAGE_CACHE_SIZE;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index ffa2be57804d..c3ca12c33ca2 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -318,21 +318,20 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
        struct vfsmount *lower_mnt;
        int rc = 0;
-        lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
-        fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode);
-        BUG_ON(!lower_dentry->d_count);
        dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL);
-        ecryptfs_set_dentry_private(dentry, dentry_info);
        if (!dentry_info) {
                printk(KERN_ERR "%s: Out of memory whilst attempting "
                       "to allocate ecryptfs_dentry_info struct\n",
                        __func__);
                dput(lower_dentry);
-                mntput(lower_mnt);
-                d_drop(dentry);
                return -ENOMEM;
        }
+        lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
+        fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode);
+        BUG_ON(!lower_dentry->d_count);
+        ecryptfs_set_dentry_private(dentry, dentry_info);
        ecryptfs_set_dentry_lower(dentry, lower_dentry);
        ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt);
@@ -381,12 +380,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
        struct dentry *lower_dir_dentry, *lower_dentry;
        int rc = 0;
-        if ((ecryptfs_dentry->d_name.len == 1
-             && !strcmp(ecryptfs_dentry->d_name.name, "."))
-            || (ecryptfs_dentry->d_name.len == 2
-                && !strcmp(ecryptfs_dentry->d_name.name, ".."))) {
-                goto out_d_drop;
-        }
        lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
        mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
        lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
@@ -397,8 +390,8 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                rc = PTR_ERR(lower_dentry);
                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
                                "[%d] on lower_dentry = [%s]\n", __func__, rc,
-                                encrypted_and_encoded_name);
+                                ecryptfs_dentry->d_name.name);
-                goto out_d_drop;
+                goto out;
        }
        if (lower_dentry->d_inode)
                goto interpose;
@@ -415,7 +408,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
        if (rc) {
                printk(KERN_ERR "%s: Error attempting to encrypt and encode "
                       "filename; rc = [%d]\n", __func__, rc);
-                goto out_d_drop;
+                goto out;
        }
        mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
        lower_dentry = lookup_one_len(encrypted_and_encoded_name,
@@ -427,14 +420,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
                                "[%d] on lower_dentry = [%s]\n", __func__, rc,
                                encrypted_and_encoded_name);
-                goto out_d_drop;
+                goto out;
        }
 interpose:
        rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry,
                                       ecryptfs_dir_inode);
-        goto out;
-out_d_drop:
-        d_drop(ecryptfs_dentry);
 out:
        kfree(encrypted_and_encoded_name);
        return ERR_PTR(rc);
diff --git a/fs/exec.c b/fs/exec.c
index 3684353ebd5f..574cf4de4ec3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2069,25 +2069,18 @@ static void wait_for_dump_helpers(struct file *file)
 */
 static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 {
-        struct file *rp, *wp;
+        struct file *files[2];
        struct fdtable *fdt;
        struct coredump_params *cp = (struct coredump_params *)info->data;
        struct files_struct *cf = current->files;
+        int err = create_pipe_files(files, 0);
+        if (err)
+                return err;
-        wp = create_write_pipe(0);
+        cp->file = files[1];
-        if (IS_ERR(wp))
-                return PTR_ERR(wp);
-        rp = create_read_pipe(wp, 0);
-        if (IS_ERR(rp)) {
-                free_write_pipe(wp);
-                return PTR_ERR(rp);
-        }
-        cp->file = wp;
        sys_close(0);
-        fd_install(0, rp);
+        fd_install(0, files[0]);
        spin_lock(&cf->file_lock);
        fdt = files_fdtable(cf);
        __set_open_fd(0, fdt);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 264d315f6c47..6363ac66fafa 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -79,6 +79,7 @@ void ext2_evict_inode(struct inode * inode)
        truncate_inode_pages(&inode->i_data, 0);
        if (want_delete) {
+                sb_start_intwrite(inode->i_sb);
                /* set dtime */
                EXT2_I(inode)->i_dtime  = get_seconds();
                mark_inode_dirty(inode);
@@ -98,8 +99,10 @@ void ext2_evict_inode(struct inode * inode)
        if (unlikely(rsv))
                kfree(rsv);
-        if (want_delete)
+        if (want_delete) {
                ext2_free_inode(inode);
+                sb_end_intwrite(inode->i_sb);
+        }
 }
 typedef struct {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9f311d27b16f..af74d9e27b71 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -42,6 +42,8 @@ static void ext2_sync_super(struct super_block *sb,
 static int ext2_remount (struct super_block * sb, int * flags, char * data);
 static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
 static int ext2_sync_fs(struct super_block *sb, int wait);
+static int ext2_freeze(struct super_block *sb);
+static int ext2_unfreeze(struct super_block *sb);
 void ext2_error(struct super_block *sb, const char *function,
                const char *fmt, ...)
@@ -305,6 +307,8 @@ static const struct super_operations ext2_sops = {
        .evict_inode    = ext2_evict_inode,
        .put_super      = ext2_put_super,
        .sync_fs        = ext2_sync_fs,
+        .freeze_fs      = ext2_freeze,
+        .unfreeze_fs    = ext2_unfreeze,
        .statfs         = ext2_statfs,
        .remount_fs     = ext2_remount,
        .show_options   = ext2_show_options,
@@ -1200,6 +1204,35 @@ static int ext2_sync_fs(struct super_block *sb, int wait)
        return 0;
 }
+static int ext2_freeze(struct super_block *sb)
+{
+        struct ext2_sb_info *sbi = EXT2_SB(sb);
+        /*
+         * Open but unlinked files present? Keep EXT2_VALID_FS flag cleared
+         * because we have unattached inodes and thus filesystem is not fully
+         * consistent.
+         */
+        if (atomic_long_read(&sb->s_remove_count)) {
+                ext2_sync_fs(sb, 1);
+                return 0;
+        }
+        /* Set EXT2_FS_VALID flag */
+        spin_lock(&sbi->s_lock);
+        sbi->s_es->s_state = cpu_to_le16(sbi->s_mount_state);
+        spin_unlock(&sbi->s_lock);
+        ext2_sync_super(sb, sbi->s_es, 1);
+        return 0;
+}
+static int ext2_unfreeze(struct super_block *sb)
+{
+        /* Just write sb to clear EXT2_VALID_FS flag */
+        ext2_write_super(sb);
+        return 0;
+}
 void ext2_write_super(struct super_block *sb)
 {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 89b59cb7f9b8..6324f74e0342 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -233,6 +233,11 @@ void ext4_evict_inode(struct inode *inode)
        if (is_bad_inode(inode))
                goto no_delete;
+        /*
+         * Protect us against freezing - iput() caller didn't have to have any
+         * protection against it
+         */
+        sb_start_intwrite(inode->i_sb);
        handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3);
        if (IS_ERR(handle)) {
                ext4_std_error(inode->i_sb, PTR_ERR(handle));
@@ -242,6 +247,7 @@ void ext4_evict_inode(struct inode *inode)
                 * cleaned up.
                 */
                ext4_orphan_del(NULL, inode);
+                sb_end_intwrite(inode->i_sb);
                goto no_delete;
        }
@@ -273,6 +279,7 @@ void ext4_evict_inode(struct inode *inode)
                stop_handle:
                        ext4_journal_stop(handle);
                        ext4_orphan_del(NULL, inode);
+                        sb_end_intwrite(inode->i_sb);
                        goto no_delete;
                }
        }
@@ -301,6 +308,7 @@ void ext4_evict_inode(struct inode *inode)
        else
                ext4_free_inode(handle, inode);
        ext4_journal_stop(handle);
+        sb_end_intwrite(inode->i_sb);
        return;
 no_delete:
        ext4_clear_inode(inode);        /* We must guarantee clearing of inode... */
@@ -4779,11 +4787,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        get_block_t *get_block;
        int retries = 0;
-        /*
+        sb_start_pagefault(inode->i_sb);
-         * This check is racy but catches the common case. We rely on
-         * __block_page_mkwrite() to do a reliable check.
-         */
-        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
        /* Delalloc case is easy... */
        if (test_opt(inode->i_sb, DELALLOC) &&
            !ext4_should_journal_data(inode) &&
@@ -4851,5 +4855,6 @@ retry_alloc:
 out_ret:
        ret = block_page_mkwrite_return(ret);
 out:
+        sb_end_pagefault(inode->i_sb);
        return ret;
 }
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index f99a1311e847..fe7c63f4717e 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -44,6 +44,11 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
 {
        struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
+        /*
+         * We protect against freezing so that we don't create dirty buffers
+         * on frozen filesystem.
+         */
+        sb_start_write(sb);
        ext4_mmp_csum_set(sb, mmp);
        mark_buffer_dirty(bh);
        lock_buffer(bh);
@@ -51,6 +56,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
        get_bh(bh);
        submit_bh(WRITE_SYNC, bh);
        wait_on_buffer(bh);
+        sb_end_write(sb);
        if (unlikely(!buffer_uptodate(bh)))
                return 1;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2d51cd9af225..d76ec8277d3f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -331,33 +331,17 @@ static void ext4_put_nojournal(handle_t *handle)
 * journal_end calls result in the superblock being marked dirty, so
 * that sync() will call the filesystem's write_super callback if
 * appropriate.
- *
- * To avoid j_barrier hold in userspace when a user calls freeze(),
- * ext4 prevents a new handle from being started by s_frozen, which
- * is in an upper layer.
 */
 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 {
        journal_t *journal;
-        handle_t  *handle;
        trace_ext4_journal_start(sb, nblocks, _RET_IP_);
        if (sb->s_flags & MS_RDONLY)
                return ERR_PTR(-EROFS);
+        WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
        journal = EXT4_SB(sb)->s_journal;
-        handle = ext4_journal_current_handle();
-        /*
-         * If a handle has been started, it should be allowed to
-         * finish, otherwise deadlock could happen between freeze
-         * and others(e.g. truncate) due to the restart of the
-         * journal handle if the filesystem is forzen and active
-         * handles are not stopped.
-         */
-        if (!handle)
-                vfs_check_frozen(sb, SB_FREEZE_TRANS);
        if (!journal)
                return ext4_get_nojournal();
        /*
@@ -2747,6 +2731,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
        sb = elr->lr_super;
        ngroups = EXT4_SB(sb)->s_groups_count;
+        sb_start_write(sb);
        for (group = elr->lr_next_group; group < ngroups; group++) {
                gdp = ext4_get_group_desc(sb, group, NULL);
                if (!gdp) {
@@ -2773,6 +2758,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
                elr->lr_next_sched = jiffies + elr->lr_timeout;
                elr->lr_next_group = group + 1;
        }
+        sb_end_write(sb);
        return ret;
 }
@@ -4460,10 +4446,8 @@ int ext4_force_commit(struct super_block *sb)
                return 0;
        journal = EXT4_SB(sb)->s_journal;
-        if (journal) {
+        if (journal)
-                vfs_check_frozen(sb, SB_FREEZE_TRANS);
                ret = ext4_journal_force_commit(journal);
-        }
        return ret;
 }
@@ -4493,9 +4477,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
 * gives us a chance to flush the journal completely and mark the fs clean.
 *
 * Note that only this function cannot bring a filesystem to be in a clean
- * state independently, because ext4 prevents a new handle from being started
+ * state independently. It relies on upper layer to stop all data & metadata
- * by @sb->s_frozen, which stays in an upper layer.  It thus needs help from
+ * modifications.
- * the upper layer.
 */
 static int ext4_freeze(struct super_block *sb)
 {
@@ -4522,7 +4505,7 @@ static int ext4_freeze(struct super_block *sb)
        EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
        error = ext4_commit_super(sb, 1);
 out:
-        /* we rely on s_frozen to stop further updates */
+        /* we rely on upper layer to stop further updates */
        jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
        return error;
 }
diff --git a/fs/fat/file.c b/fs/fat/file.c
index a71fe3715ee8..e007b8bd8e5e 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -43,10 +43,10 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
        if (err)
                goto out;
-        mutex_lock(&inode->i_mutex);
        err = mnt_want_write_file(file);
        if (err)
-                goto out_unlock_inode;
+                goto out;
+        mutex_lock(&inode->i_mutex);
        /*
         * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
@@ -73,14 +73,14 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
        /* The root directory has no attributes */
        if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) {
                err = -EINVAL;
-                goto out_drop_write;
+                goto out_unlock_inode;
        }
        if (sbi->options.sys_immutable &&
            ((attr | oldattr) & ATTR_SYS) &&
            !capable(CAP_LINUX_IMMUTABLE)) {
                err = -EPERM;
-                goto out_drop_write;
+                goto out_unlock_inode;
        }
        /*
@@ -90,12 +90,12 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
         */
        err = security_inode_setattr(file->f_path.dentry, &ia);
        if (err)
-                goto out_drop_write;
+                goto out_unlock_inode;
        /* This MUST be done before doing anything irreversible... */
        err = fat_setattr(file->f_path.dentry, &ia);
        if (err)
-                goto out_drop_write;
+                goto out_unlock_inode;
        fsnotify_change(file->f_path.dentry, ia.ia_valid);
        if (sbi->options.sys_immutable) {
@@ -107,10 +107,9 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
        fat_save_attrs(inode, attr);
        mark_inode_dirty(inode);
-out_drop_write:
-        mnt_drop_write_file(file);
 out_unlock_inode:
        mutex_unlock(&inode->i_mutex);
+        mnt_drop_write_file(file);
 out:
        return err;
 }
diff --git a/fs/file_table.c b/fs/file_table.c
index b3fc4d67a26b..701985e4ccda 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -43,7 +43,7 @@ static struct kmem_cache *filp_cachep __read_mostly;
 static struct percpu_counter nr_files __cacheline_aligned_in_smp;
-static inline void file_free_rcu(struct rcu_head *head)
+static void file_free_rcu(struct rcu_head *head)
 {
        struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
@@ -217,7 +217,7 @@ static void drop_file_write_access(struct file *file)
                return;
        if (file_check_writeable(file) != 0)
                return;
-        mnt_drop_write(mnt);
+        __mnt_drop_write(mnt);
        file_release_write(file);
 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b321a688cde7..93d8d6c9494d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -944,9 +944,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                return err;
        count = ocount;
+        sb_start_write(inode->i_sb);
        mutex_lock(&inode->i_mutex);
-        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
@@ -1004,6 +1003,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 out:
        current->backing_dev_info = NULL;
        mutex_unlock(&inode->i_mutex);
+        sb_end_write(inode->i_sb);
        return written ? written : err;
 }
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 9aa6af13823c..d1d791ef38de 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -373,11 +373,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        loff_t size;
        int ret;
-        /* Wait if fs is frozen. This is racy so we check again later on
+        sb_start_pagefault(inode->i_sb);
-         * and retry if the fs has been frozen after the page lock has
-         * been acquired
+        /* Update file times before taking page lock */
-         */
+        file_update_time(vma->vm_file);
-        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
        ret = gfs2_rs_alloc(ip);
        if (ret)
@@ -462,14 +461,9 @@ out:
        gfs2_holder_uninit(&gh);
        if (ret == 0) {
                set_page_dirty(page);
-                /* This check must be post dropping of transaction lock */
+                wait_on_page_writeback(page);
-                if (inode->i_sb->s_frozen == SB_UNFROZEN) {
-                        wait_on_page_writeback(page);
-                } else {
-                        ret = -EAGAIN;
-                        unlock_page(page);
-                }
        }
+        sb_end_pagefault(inode->i_sb);
        return block_page_mkwrite_return(ret);
 }
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index ad3e2fb763d7..adbd27875ef9 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -50,6 +50,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
        if (revokes)
                tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
                                                   sizeof(u64));
+        sb_start_intwrite(sdp->sd_vfs);
        gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh);
        error = gfs2_glock_nq(&tr->tr_t_gh);
@@ -68,6 +69,7 @@ fail_gunlock:
        gfs2_glock_dq(&tr->tr_t_gh);
 fail_holder_uninit:
+        sb_end_intwrite(sdp->sd_vfs);
        gfs2_holder_uninit(&tr->tr_t_gh);
        kfree(tr);
@@ -116,6 +118,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
                        gfs2_holder_uninit(&tr->tr_t_gh);
                        kfree(tr);
                }
+                sb_end_intwrite(sdp->sd_vfs);
                return;
        }
@@ -136,6 +139,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
        if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
                gfs2_log_flush(sdp, NULL);
+        sb_end_intwrite(sdp->sd_vfs);
 }
 /**
diff --git a/fs/inode.c b/fs/inode.c
index 3cc504320467..ac8d904b3f16 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1542,9 +1542,11 @@ void touch_atime(struct path *path)
        if (timespec_equal(&inode->i_atime, &now))
                return;
-        if (mnt_want_write(mnt))
+        if (!sb_start_write_trylock(inode->i_sb))
                return;
+        if (__mnt_want_write(mnt))
+                goto skip_update;
        /*
         * File systems can error out when updating inodes if they need to
         * allocate new space to modify an inode (such is the case for
@@ -1555,7 +1557,9 @@ void touch_atime(struct path *path)
         * of the fs read only, e.g. subvolumes in Btrfs.
         */
        update_time(inode, &now, S_ATIME);
-        mnt_drop_write(mnt);
+        __mnt_drop_write(mnt);
+skip_update:
+        sb_end_write(inode->i_sb);
 }
 EXPORT_SYMBOL(touch_atime);
@@ -1662,11 +1666,11 @@ int file_update_time(struct file *file)
                return 0;
        /* Finally allowed to write? Takes lock. */
-        if (mnt_want_write_file(file))
+        if (__mnt_want_write_file(file))
                return 0;
        ret = update_time(inode, &now, sync_it);
-        mnt_drop_write_file(file);
+        __mnt_drop_write_file(file);
        return ret;
 }
diff --git a/fs/internal.h b/fs/internal.h
index a6fd56c68b11..371bcc4b1697 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -61,6 +61,10 @@ extern void __init mnt_init(void);
 extern struct lglock vfsmount_lock;
+extern int __mnt_want_write(struct vfsmount *);
+extern int __mnt_want_write_file(struct file *);
+extern void __mnt_drop_write(struct vfsmount *);
+extern void __mnt_drop_write_file(struct file *);
 /*
 * fs_struct.c
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 8392cb85bd54..05d29124c6ab 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -156,12 +156,16 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
        struct nlm_rqst         *call;
        int                     status;
-        nlm_get_host(host);
        call = nlm_alloc_call(host);
        if (call == NULL)
                return -ENOMEM;
        nlmclnt_locks_init_private(fl, host);
+        if (!fl->fl_u.nfs_fl.owner) {
+                /* lockowner allocation has failed */
+                nlmclnt_release_call(call);
+                return -ENOMEM;
+        }
        /* Set up the argument struct */
        nlmclnt_setlockargs(call, fl);
@@ -185,9 +189,6 @@ EXPORT_SYMBOL_GPL(nlmclnt_proc);
 /*
 * Allocate an NLM RPC call struct
- *
- * Note: the caller must hold a reference to host. In case of failure,
- * this reference will be released.
 */
 struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
 {
@@ -199,7 +200,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
                        atomic_set(&call->a_count, 1);
                        locks_init_lock(&call->a_args.lock.fl);
                        locks_init_lock(&call->a_res.lock.fl);
-                        call->a_host = host;
+                        call->a_host = nlm_get_host(host);
                        return call;
                }
                if (signalled())
@@ -207,7 +208,6 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
                printk("nlm_alloc_call: failed, waiting for memory\n");
                schedule_timeout_interruptible(5*HZ);
        }
-        nlmclnt_release_host(host);
        return NULL;
 }
@@ -750,7 +750,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
        dprintk("lockd: blocking lock attempt was interrupted by a signal.\n"
                "       Attempting to cancel lock.\n");
-        req = nlm_alloc_call(nlm_get_host(host));
+        req = nlm_alloc_call(host);
        if (!req)
                return -ENOMEM;
        req->a_flags = RPC_TASK_ASYNC;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4a43d253c045..b147d1ae71fd 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -257,6 +257,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
                return rpc_system_err;
        call = nlm_alloc_call(host);
+        nlmsvc_release_host(host);
        if (call == NULL)
                return rpc_system_err;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index afe4488c33d8..fb1a2bedbe97 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -219,7 +219,6 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
        struct nlm_block        *block;
        struct nlm_rqst         *call = NULL;
-        nlm_get_host(host);
        call = nlm_alloc_call(host);
        if (call == NULL)
                return NULL;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index de8f2caa2235..3009a365e082 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -297,6 +297,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
                return rpc_system_err;
        call = nlm_alloc_call(host);
+        nlmsvc_release_host(host);
        if (call == NULL)
                return rpc_system_err;
diff --git a/fs/namei.c b/fs/namei.c
index 2ccc35c4dc24..1b464390dde8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -650,6 +650,121 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki
        path_put(link);
 }
+int sysctl_protected_symlinks __read_mostly = 1;
+int sysctl_protected_hardlinks __read_mostly = 1;
+/**
+ * may_follow_link - Check symlink following for unsafe situations
+ * @link: The path of the symlink
+ *
+ * In the case of the sysctl_protected_symlinks sysctl being enabled,
+ * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
+ * in a sticky world-writable directory. This is to protect privileged
+ * processes from failing races against path names that may change out
+ * from under them by way of other users creating malicious symlinks.
+ * It will permit symlinks to be followed only when outside a sticky
+ * world-writable directory, or when the uid of the symlink and follower
+ * match, or when the directory owner matches the symlink's owner.
+ *
+ * Returns 0 if following the symlink is allowed, -ve on error.
+ */
+static inline int may_follow_link(struct path *link, struct nameidata *nd)
+{
+        const struct inode *inode;
+        const struct inode *parent;
+        if (!sysctl_protected_symlinks)
+                return 0;
+        /* Allowed if owner and follower match. */
+        inode = link->dentry->d_inode;
+        if (current_cred()->fsuid == inode->i_uid)
+                return 0;
+        /* Allowed if parent directory not sticky and world-writable. */
+        parent = nd->path.dentry->d_inode;
+        if ((parent->i_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
+                return 0;
+        /* Allowed if parent directory and link owner match. */
+        if (parent->i_uid == inode->i_uid)
+                return 0;
+        path_put_conditional(link, nd);
+        path_put(&nd->path);
+        audit_log_link_denied("follow_link", link);
+        return -EACCES;
+}
+/**
+ * safe_hardlink_source - Check for safe hardlink conditions
+ * @inode: the source inode to hardlink from
+ *
+ * Return false if at least one of the following conditions:
+ *    - inode is not a regular file
+ *    - inode is setuid
+ *    - inode is setgid and group-exec
+ *    - access failure for read and write
+ *
+ * Otherwise returns true.
+ */
+static bool safe_hardlink_source(struct inode *inode)
+{
+        umode_t mode = inode->i_mode;
+        /* Special files should not get pinned to the filesystem. */
+        if (!S_ISREG(mode))
+                return false;
+        /* Setuid files should not get pinned to the filesystem. */
+        if (mode & S_ISUID)
+                return false;
+        /* Executable setgid files should not get pinned to the filesystem. */
+        if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
+                return false;
+        /* Hardlinking to unreadable or unwritable sources is dangerous. */
+        if (inode_permission(inode, MAY_READ | MAY_WRITE))
+                return false;
+        return true;
+}
+/**
+ * may_linkat - Check permissions for creating a hardlink
+ * @link: the source to hardlink from
+ *
+ * Block hardlink when all of:
+ *  - sysctl_protected_hardlinks enabled
+ *  - fsuid does not match inode
+ *  - hardlink source is unsafe (see safe_hardlink_source() above)
+ *  - not CAP_FOWNER
+ *
+ * Returns 0 if successful, -ve on error.
+ */
+static int may_linkat(struct path *link)
+{
+        const struct cred *cred;
+        struct inode *inode;
+        if (!sysctl_protected_hardlinks)
+                return 0;
+        cred = current_cred();
+        inode = link->dentry->d_inode;
+        /* Source inode owner (or CAP_FOWNER) can hardlink all they like,
+         * otherwise, it must be a safe source.
+         */
+        if (cred->fsuid == inode->i_uid || safe_hardlink_source(inode) ||
+            capable(CAP_FOWNER))
+                return 0;
+        audit_log_link_denied("linkat", link);
+        return -EPERM;
+}
 static __always_inline int
 follow_link(struct path *link, struct nameidata *nd, void **p)
 {
@@ -1818,6 +1933,9 @@ static int path_lookupat(int dfd, const char *name,
                while (err > 0) {
                        void *cookie;
                        struct path link = path;
+                        err = may_follow_link(&link, nd);
+                        if (unlikely(err))
+                                break;
                        nd->flags |= LOOKUP_PARENT;
                        err = follow_link(&link, nd, &cookie);
                        if (err)
@@ -2277,7 +2395,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
 static int atomic_open(struct nameidata *nd, struct dentry *dentry,
                        struct path *path, struct file *file,
                        const struct open_flags *op,
-                        bool *want_write, bool need_lookup,
+                        bool got_write, bool need_lookup,
                        int *opened)
 {
        struct inode *dir =  nd->path.dentry->d_inode;
@@ -2300,7 +2418,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
        if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
                mode &= ~current_umask();
-        if (open_flag & O_EXCL) {
+        if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) {
                open_flag &= ~O_TRUNC;
                *opened |= FILE_CREATED;
        }
@@ -2314,12 +2432,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
         * Another problem is returing the "right" error value (e.g. for an
         * O_EXCL open we want to return EEXIST not EROFS).
         */
-        if ((open_flag & (O_CREAT | O_TRUNC)) ||
+        if (((open_flag & (O_CREAT | O_TRUNC)) ||
-            (open_flag & O_ACCMODE) != O_RDONLY) {
+            (open_flag & O_ACCMODE) != O_RDONLY) && unlikely(!got_write)) {
-                error = mnt_want_write(nd->path.mnt);
+                if (!(open_flag & O_CREAT)) {
-                if (!error) {
-                        *want_write = true;
-                } else if (!(open_flag & O_CREAT)) {
                        /*
                         * No O_CREATE -> atomicity not a requirement -> fall
                         * back to lookup + open
@@ -2327,11 +2442,11 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
                        goto no_open;
                } else if (open_flag & (O_EXCL | O_TRUNC)) {
                        /* Fall back and fail with the right error */
-                        create_error = error;
+                        create_error = -EROFS;
                        goto no_open;
                } else {
                        /* No side effects, safe to clear O_CREAT */
-                        create_error = error;
+                        create_error = -EROFS;
                        open_flag &= ~O_CREAT;
                }
        }
@@ -2438,7 +2553,7 @@ looked_up:
 static int lookup_open(struct nameidata *nd, struct path *path,
                        struct file *file,
                        const struct open_flags *op,
-                        bool *want_write, int *opened)
+                        bool got_write, int *opened)
 {
        struct dentry *dir = nd->path.dentry;
        struct inode *dir_inode = dir->d_inode;
@@ -2456,7 +2571,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
                goto out_no_open;
        if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
-                return atomic_open(nd, dentry, path, file, op, want_write,
+                return atomic_open(nd, dentry, path, file, op, got_write,
                                   need_lookup, opened);
        }
@@ -2480,10 +2595,10 @@ static int lookup_open(struct nameidata *nd, struct path *path,
                 * a permanent write count is taken through
                 * the 'struct file' in finish_open().
                 */
-                error = mnt_want_write(nd->path.mnt);
+                if (!got_write) {
-                if (error)
+                        error = -EROFS;
                        goto out_dput;
-                *want_write = true;
+                }
                *opened |= FILE_CREATED;
                error = security_path_mknod(&nd->path, dentry, mode, 0);
                if (error)
@@ -2513,7 +2628,7 @@ static int do_last(struct nameidata *nd, struct path *path,
        struct dentry *dir = nd->path.dentry;
        int open_flag = op->open_flag;
        bool will_truncate = (open_flag & O_TRUNC) != 0;
-        bool want_write = false;
+        bool got_write = false;
        int acc_mode = op->acc_mode;
        struct inode *inode;
        bool symlink_ok = false;
@@ -2582,8 +2697,18 @@ static int do_last(struct nameidata *nd, struct path *path,
        }
 retry_lookup:
+        if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
+                error = mnt_want_write(nd->path.mnt);
+                if (!error)
+                        got_write = true;
+                /*
+                 * do _not_ fail yet - we might not need that or fail with
+                 * a different error; let lookup_open() decide; we'll be
+                 * dropping this one anyway.
+                 */
+        }
        mutex_lock(&dir->d_inode->i_mutex);
-        error = lookup_open(nd, path, file, op, &want_write, opened);
+        error = lookup_open(nd, path, file, op, got_write, opened);
        mutex_unlock(&dir->d_inode->i_mutex);
        if (error <= 0) {
@@ -2608,22 +2733,23 @@ retry_lookup:
        }
        /*
-         * It already exists.
+         * create/update audit record if it already exists.
         */
-        audit_inode(pathname, path->dentry);
+        if (path->dentry->d_inode)
+                audit_inode(pathname, path->dentry);
        /*
         * If atomic_open() acquired write access it is dropped now due to
         * possible mount and symlink following (this might be optimized away if
         * necessary...)
         */
-        if (want_write) {
+        if (got_write) {
                mnt_drop_write(nd->path.mnt);
-                want_write = false;
+                got_write = false;
        }
        error = -EEXIST;
-        if (open_flag & O_EXCL)
+        if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))
                goto exit_dput;
        error = follow_managed(path, nd->flags);
@@ -2684,7 +2810,7 @@ finish_open:
                error = mnt_want_write(nd->path.mnt);
                if (error)
                        goto out;
-                want_write = true;
+                got_write = true;
        }
 finish_open_created:
        error = may_open(&nd->path, acc_mode, open_flag);
@@ -2711,7 +2837,7 @@ opened:
                        goto exit_fput;
        }
 out:
-        if (want_write)
+        if (got_write)
                mnt_drop_write(nd->path.mnt);
        path_put(&save_parent);
        terminate_walk(nd);
@@ -2735,9 +2861,9 @@ stale_open:
        nd->inode = dir->d_inode;
        save_parent.mnt = NULL;
        save_parent.dentry = NULL;
-        if (want_write) {
+        if (got_write) {
                mnt_drop_write(nd->path.mnt);
-                want_write = false;
+                got_write = false;
        }
        retried = true;
        goto retry_lookup;
@@ -2777,6 +2903,9 @@ static struct file *path_openat(int dfd, const char *pathname,
                        error = -ELOOP;
                        break;
                }
+                error = may_follow_link(&link, nd);
+                if (unlikely(error))
+                        break;
                nd->flags |= LOOKUP_PARENT;
                nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
                error = follow_link(&link, nd, &cookie);
@@ -2846,6 +2975,7 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
 {
        struct dentry *dentry = ERR_PTR(-EEXIST);
        struct nameidata nd;
+        int err2;
        int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
        if (error)
                return ERR_PTR(error);
@@ -2859,16 +2989,19 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
        nd.flags &= ~LOOKUP_PARENT;
        nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
+        /* don't fail immediately if it's r/o, at least try to report other errors */
+        err2 = mnt_want_write(nd.path.mnt);
        /*
         * Do the final lookup.
         */
        mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
        dentry = lookup_hash(&nd);
        if (IS_ERR(dentry))
-                goto fail;
+                goto unlock;
+        error = -EEXIST;
        if (dentry->d_inode)
-                goto eexist;
+                goto fail;
        /*
         * Special case - lookup gave negative, but... we had foo/bar/
         * From the vfs_mknod() POV we just have a negative dentry -
@@ -2876,23 +3009,37 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
         * been asking for (non-existent) directory. -ENOENT for you.
         */
        if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
-                dput(dentry);
+                error = -ENOENT;
-                dentry = ERR_PTR(-ENOENT);
+                goto fail;
+        }
+        if (unlikely(err2)) {
+                error = err2;
                goto fail;
        }
        *path = nd.path;
        return dentry;
-eexist:
-        dput(dentry);
-        dentry = ERR_PTR(-EEXIST);
 fail:
+        dput(dentry);
+        dentry = ERR_PTR(error);
+unlock:
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+        if (!err2)
+                mnt_drop_write(nd.path.mnt);
 out:
        path_put(&nd.path);
        return dentry;
 }
 EXPORT_SYMBOL(kern_path_create);
+void done_path_create(struct path *path, struct dentry *dentry)
+{
+        dput(dentry);
+        mutex_unlock(&path->dentry->d_inode->i_mutex);
+        mnt_drop_write(path->mnt);
+        path_put(path);
+}
+EXPORT_SYMBOL(done_path_create);
 struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
 {
        char *tmp = getname(pathname);
@@ -2956,8 +3103,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
        struct path path;
        int error;
-        if (S_ISDIR(mode))
+        error = may_mknod(mode);
-                return -EPERM;
+        if (error)
+                return error;
        dentry = user_path_create(dfd, filename, &path, 0);
        if (IS_ERR(dentry))
@@ -2965,15 +3113,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
        if (!IS_POSIXACL(path.dentry->d_inode))
                mode &= ~current_umask();
-        error = may_mknod(mode);
-        if (error)
-                goto out_dput;
-        error = mnt_want_write(path.mnt);
-        if (error)
-                goto out_dput;
        error = security_path_mknod(&path, dentry, mode, dev);
        if (error)
-                goto out_drop_write;
+                goto out;
        switch (mode & S_IFMT) {
                case 0: case S_IFREG:
                        error = vfs_create(path.dentry->d_inode,dentry,mode,true);
@@ -2986,13 +3128,8 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
                        error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
                        break;
        }
-out_drop_write:
+out:
-        mnt_drop_write(path.mnt);
+        done_path_create(&path, dentry);
-out_dput:
-        dput(dentry);
-        mutex_unlock(&path.dentry->d_inode->i_mutex);
-        path_put(&path);
        return error;
 }
@@ -3038,19 +3175,10 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
        if (!IS_POSIXACL(path.dentry->d_inode))
                mode &= ~current_umask();
-        error = mnt_want_write(path.mnt);
-        if (error)
-                goto out_dput;
        error = security_path_mkdir(&path, dentry, mode);
-        if (error)
+        if (!error)
-                goto out_drop_write;
+                error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
-        error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+        done_path_create(&path, dentry);
-out_drop_write:
-        mnt_drop_write(path.mnt);
-out_dput:
-        dput(dentry);
-        mutex_unlock(&path.dentry->d_inode->i_mutex);
-        path_put(&path);
        return error;
 }
@@ -3144,6 +3272,9 @@ static long do_rmdir(int dfd, const char __user *pathname)
        }
        nd.flags &= ~LOOKUP_PARENT;
+        error = mnt_want_write(nd.path.mnt);
+        if (error)
+                goto exit1;
        mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
        dentry = lookup_hash(&nd);
@@ -3154,19 +3285,15 @@ static long do_rmdir(int dfd, const char __user *pathname)
                error = -ENOENT;
                goto exit3;
        }
-        error = mnt_want_write(nd.path.mnt);
-        if (error)
-                goto exit3;
        error = security_path_rmdir(&nd.path, dentry);
        if (error)
-                goto exit4;
+                goto exit3;
        error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
-exit4:
-        mnt_drop_write(nd.path.mnt);
 exit3:
        dput(dentry);
 exit2:
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+        mnt_drop_write(nd.path.mnt);
 exit1:
        path_put(&nd.path);
        putname(name);
@@ -3233,6 +3360,9 @@ static long do_unlinkat(int dfd, const char __user *pathname)
                goto exit1;
        nd.flags &= ~LOOKUP_PARENT;
+        error = mnt_want_write(nd.path.mnt);
+        if (error)
+                goto exit1;
        mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
        dentry = lookup_hash(&nd);
@@ -3245,21 +3375,17 @@ static long do_unlinkat(int dfd, const char __user *pathname)
                if (!inode)
                        goto slashes;
                ihold(inode);
-                error = mnt_want_write(nd.path.mnt);
-                if (error)
-                        goto exit2;
                error = security_path_unlink(&nd.path, dentry);
                if (error)
-                        goto exit3;
+                        goto exit2;
                error = vfs_unlink(nd.path.dentry->d_inode, dentry);
-exit3:
+exit2:
-                mnt_drop_write(nd.path.mnt);
-        exit2:
                dput(dentry);
        }
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
        if (inode)
                iput(inode);    /* truncate the inode here */
+        mnt_drop_write(nd.path.mnt);
 exit1:
        path_put(&nd.path);
        putname(name);
@@ -3324,19 +3450,10 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
        if (IS_ERR(dentry))
                goto out_putname;
-        error = mnt_want_write(path.mnt);
-        if (error)
-                goto out_dput;
        error = security_path_symlink(&path, dentry, from);
-        if (error)
+        if (!error)
-                goto out_drop_write;
+                error = vfs_symlink(path.dentry->d_inode, dentry, from);
-        error = vfs_symlink(path.dentry->d_inode, dentry, from);
+        done_path_create(&path, dentry);
-out_drop_write:
-        mnt_drop_write(path.mnt);
-out_dput:
-        dput(dentry);
-        mutex_unlock(&path.dentry->d_inode->i_mutex);
-        path_put(&path);
 out_putname:
        putname(from);
        return error;
@@ -3436,19 +3553,15 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
        error = -EXDEV;
        if (old_path.mnt != new_path.mnt)
                goto out_dput;
-        error = mnt_want_write(new_path.mnt);
+        error = may_linkat(&old_path);
-        if (error)
+        if (unlikely(error))
                goto out_dput;
        error = security_path_link(old_path.dentry, &new_path, new_dentry);
        if (error)
-                goto out_drop_write;
+                goto out_dput;
        error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
-out_drop_write:
-        mnt_drop_write(new_path.mnt);
 out_dput:
-        dput(new_dentry);
+        done_path_create(&new_path, new_dentry);
-        mutex_unlock(&new_path.dentry->d_inode->i_mutex);
-        path_put(&new_path);
 out:
        path_put(&old_path);
@@ -3644,6 +3757,10 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
        if (newnd.last_type != LAST_NORM)
                goto exit2;
+        error = mnt_want_write(oldnd.path.mnt);
+        if (error)
+                goto exit2;
        oldnd.flags &= ~LOOKUP_PARENT;
        newnd.flags &= ~LOOKUP_PARENT;
        newnd.flags |= LOOKUP_RENAME_TARGET;
@@ -3679,23 +3796,19 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
        if (new_dentry == trap)
                goto exit5;
-        error = mnt_want_write(oldnd.path.mnt);
-        if (error)
-                goto exit5;
        error = security_path_rename(&oldnd.path, old_dentry,
                                     &newnd.path, new_dentry);
        if (error)
-                goto exit6;
+                goto exit5;
        error = vfs_rename(old_dir->d_inode, old_dentry,
                                   new_dir->d_inode, new_dentry);
-exit6:
-        mnt_drop_write(oldnd.path.mnt);
 exit5:
        dput(new_dentry);
 exit4:
        dput(old_dentry);
 exit3:
        unlock_rename(new_dir, old_dir);
+        mnt_drop_write(oldnd.path.mnt);
 exit2:
        path_put(&newnd.path);
        putname(to);
diff --git a/fs/namespace.c b/fs/namespace.c
index c53d3381b0d0..4d31f73e2561 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -283,24 +283,22 @@ static int mnt_is_readonly(struct vfsmount *mnt)
 }
 /*
- * Most r/o checks on a fs are for operations that take
+ * Most r/o & frozen checks on a fs are for operations that take discrete
- * discrete amounts of time, like a write() or unlink().
+ * amounts of time, like a write() or unlink().  We must keep track of when
- * We must keep track of when those operations start
+ * those operations start (for permission checks) and when they end, so that we
- * (for permission checks) and when they end, so that
+ * can determine when writes are able to occur to a filesystem.
- * we can determine when writes are able to occur to
- * a filesystem.
 */
 /**
- * mnt_want_write - get write access to a mount
+ * __mnt_want_write - get write access to a mount without freeze protection
 * @m: the mount on which to take a write
 *
- * This tells the low-level filesystem that a write is
+ * This tells the low-level filesystem that a write is about to be performed to
- * about to be performed to it, and makes sure that
+ * it, and makes sure that writes are allowed (mnt it read-write) before
- * writes are allowed before returning success.  When
+ * returning success. This operation does not protect against filesystem being
- * the write operation is finished, mnt_drop_write()
+ * frozen. When the write operation is finished, __mnt_drop_write() must be
- * must be called.  This is effectively a refcount.
+ * called. This is effectively a refcount.
 */
-int mnt_want_write(struct vfsmount *m)
+int __mnt_want_write(struct vfsmount *m)
 {
        struct mount *mnt = real_mount(m);
        int ret = 0;
@@ -326,6 +324,27 @@ int mnt_want_write(struct vfsmount *m)
                ret = -EROFS;
        }
        preempt_enable();
+        return ret;
+}
+/**
+ * mnt_want_write - get write access to a mount
+ * @m: the mount on which to take a write
+ *
+ * This tells the low-level filesystem that a write is about to be performed to
+ * it, and makes sure that writes are allowed (mount is read-write, filesystem
+ * is not frozen) before returning success.  When the write operation is
+ * finished, mnt_drop_write() must be called.  This is effectively a refcount.
+ */
+int mnt_want_write(struct vfsmount *m)
+{
+        int ret;
+        sb_start_write(m->mnt_sb);
+        ret = __mnt_want_write(m);
+        if (ret)
+                sb_end_write(m->mnt_sb);
        return ret;
 }
 EXPORT_SYMBOL_GPL(mnt_want_write);
@@ -355,38 +374,76 @@ int mnt_clone_write(struct vfsmount *mnt)
 EXPORT_SYMBOL_GPL(mnt_clone_write);
 /**
- * mnt_want_write_file - get write access to a file's mount
+ * __mnt_want_write_file - get write access to a file's mount
 * @file: the file who's mount on which to take a write
 *
- * This is like mnt_want_write, but it takes a file and can
+ * This is like __mnt_want_write, but it takes a file and can
 * do some optimisations if the file is open for write already
 */
-int mnt_want_write_file(struct file *file)
+int __mnt_want_write_file(struct file *file)
 {
        struct inode *inode = file->f_dentry->d_inode;
        if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
-                return mnt_want_write(file->f_path.mnt);
+                return __mnt_want_write(file->f_path.mnt);
        else
                return mnt_clone_write(file->f_path.mnt);
 }
+/**
+ * mnt_want_write_file - get write access to a file's mount
+ * @file: the file who's mount on which to take a write
+ *
+ * This is like mnt_want_write, but it takes a file and can
+ * do some optimisations if the file is open for write already
+ */
+int mnt_want_write_file(struct file *file)
+{
+        int ret;
+        sb_start_write(file->f_path.mnt->mnt_sb);
+        ret = __mnt_want_write_file(file);
+        if (ret)
+                sb_end_write(file->f_path.mnt->mnt_sb);
+        return ret;
+}
 EXPORT_SYMBOL_GPL(mnt_want_write_file);
 /**
- * mnt_drop_write - give up write access to a mount
+ * __mnt_drop_write - give up write access to a mount
 * @mnt: the mount on which to give up write access
 *
 * Tells the low-level filesystem that we are done
 * performing writes to it.  Must be matched with
- * mnt_want_write() call above.
+ * __mnt_want_write() call above.
 */
-void mnt_drop_write(struct vfsmount *mnt)
+void __mnt_drop_write(struct vfsmount *mnt)
 {
        preempt_disable();
        mnt_dec_writers(real_mount(mnt));
        preempt_enable();
 }
+/**
+ * mnt_drop_write - give up write access to a mount
+ * @mnt: the mount on which to give up write access
+ *
+ * Tells the low-level filesystem that we are done performing writes to it and
+ * also allows filesystem to be frozen again.  Must be matched with
+ * mnt_want_write() call above.
+ */
+void mnt_drop_write(struct vfsmount *mnt)
+{
+        __mnt_drop_write(mnt);
+        sb_end_write(mnt->mnt_sb);
+}
 EXPORT_SYMBOL_GPL(mnt_drop_write);
+void __mnt_drop_write_file(struct file *file)
+{
+        __mnt_drop_write(file->f_path.mnt);
+}
 void mnt_drop_write_file(struct file *file)
 {
        mnt_drop_write(file->f_path.mnt);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 5ff0b7b9fc08..43295d45cc2b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -154,6 +154,10 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
        if (status < 0)
                return;
+        status = mnt_want_write_file(rec_file);
+        if (status)
+                return;
        dir = rec_file->f_path.dentry;
        /* lock the parent */
        mutex_lock(&dir->d_inode->i_mutex);
@@ -173,11 +177,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
                 * as well be forgiving and just succeed silently.
                 */
                goto out_put;
-        status = mnt_want_write_file(rec_file);
-        if (status)
-                goto out_put;
        status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU);
-        mnt_drop_write_file(rec_file);
 out_put:
        dput(dentry);
 out_unlock:
@@ -189,6 +189,7 @@ out_unlock:
                                " (err %d); please check that %s exists"
                                " and is writeable", status,
                                user_recovery_dirname);
+        mnt_drop_write_file(rec_file);
        nfs4_reset_creds(original_cred);
 }
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index cc793005a87c..032af381b3aa 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -635,6 +635,7 @@ fh_put(struct svc_fh *fhp)
                fhp->fh_post_saved = 0;
 #endif
        }
+        fh_drop_write(fhp);
        if (exp) {
                exp_put(exp);
                fhp->fh_export = NULL;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index e15dc45fc5ec..aad6d457b9e8 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -196,6 +196,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
        struct dentry   *dchild;
        int             type, mode;
        __be32          nfserr;
+        int             hosterr;
        dev_t           rdev = 0, wanted = new_decode_dev(attr->ia_size);
        dprintk("nfsd: CREATE   %s %.*s\n",
@@ -214,6 +215,12 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
        nfserr = nfserr_exist;
        if (isdotent(argp->name, argp->len))
                goto done;
+        hosterr = fh_want_write(dirfhp);
+        if (hosterr) {
+                nfserr = nfserrno(hosterr);
+                goto done;
+        }
        fh_lock_nested(dirfhp, I_MUTEX_PARENT);
        dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
        if (IS_ERR(dchild)) {
@@ -330,7 +337,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 out_unlock:
        /* We don't really need to unlock, as fh_put does it. */
        fh_unlock(dirfhp);
+        fh_drop_write(dirfhp);
 done:
        fh_put(dirfhp);
        return nfsd_return_dirop(nfserr, resp);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 702f64e820c3..a9269f142cc4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1284,6 +1284,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
         * If it has, the parent directory should already be locked.
         */
        if (!resfhp->fh_dentry) {
+                host_err = fh_want_write(fhp);
+                if (host_err)
+                        goto out_nfserr;
                /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
                fh_lock_nested(fhp, I_MUTEX_PARENT);
                dchild = lookup_one_len(fname, dentry, flen);
@@ -1327,14 +1331,11 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                goto out;
        }
-        host_err = fh_want_write(fhp);
-        if (host_err)
-                goto out_nfserr;
        /*
         * Get the dir op function pointer.
         */
        err = 0;
+        host_err = 0;
        switch (type) {
        case S_IFREG:
                host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
@@ -1351,10 +1352,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
                break;
        }
-        if (host_err < 0) {
+        if (host_err < 0)
-                fh_drop_write(fhp);
                goto out_nfserr;
-        }
        err = nfsd_create_setattr(rqstp, resfhp, iap);
@@ -1366,7 +1365,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        err2 = nfserrno(commit_metadata(fhp));
        if (err2)
                err = err2;
-        fh_drop_write(fhp);
        /*
         * Update the file handle to get the new inode info.
         */
@@ -1425,6 +1423,11 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        err = nfserr_notdir;
        if (!dirp->i_op->lookup)
                goto out;
+        host_err = fh_want_write(fhp);
+        if (host_err)
+                goto out_nfserr;
        fh_lock_nested(fhp, I_MUTEX_PARENT);
        /*
@@ -1457,9 +1460,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                v_atime = verifier[1]&0x7fffffff;
        }
        
-        host_err = fh_want_write(fhp);
-        if (host_err)
-                goto out_nfserr;
        if (dchild->d_inode) {
                err = 0;
@@ -1530,7 +1530,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (!err)
                err = nfserrno(commit_metadata(fhp));
-        fh_drop_write(fhp);
        /*
         * Update the filehandle to get the new inode info.
         */
@@ -1541,6 +1540,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        fh_unlock(fhp);
        if (dchild && !IS_ERR(dchild))
                dput(dchild);
+        fh_drop_write(fhp);
        return err;
 
 out_nfserr:
@@ -1621,6 +1621,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
        err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
        if (err)
                goto out;
+        host_err = fh_want_write(fhp);
+        if (host_err)
+                goto out_nfserr;
        fh_lock(fhp);
        dentry = fhp->fh_dentry;
        dnew = lookup_one_len(fname, dentry, flen);
@@ -1628,10 +1633,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (IS_ERR(dnew))
                goto out_nfserr;
-        host_err = fh_want_write(fhp);
-        if (host_err)
-                goto out_nfserr;
        if (unlikely(path[plen] != 0)) {
                char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
                if (path_alloced == NULL)
@@ -1691,6 +1692,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
        if (isdotent(name, len))
                goto out;
+        host_err = fh_want_write(tfhp);
+        if (host_err) {
+                err = nfserrno(host_err);
+                goto out;
+        }
        fh_lock_nested(ffhp, I_MUTEX_PARENT);
        ddir = ffhp->fh_dentry;
        dirp = ddir->d_inode;
@@ -1702,18 +1709,13 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
        dold = tfhp->fh_dentry;
-        host_err = fh_want_write(tfhp);
-        if (host_err) {
-                err = nfserrno(host_err);
-                goto out_dput;
-        }
        err = nfserr_noent;
        if (!dold->d_inode)
-                goto out_drop_write;
+                goto out_dput;
        host_err = nfsd_break_lease(dold->d_inode);
        if (host_err) {
                err = nfserrno(host_err);
-                goto out_drop_write;
+                goto out_dput;
        }
        host_err = vfs_link(dold, dirp, dnew);
        if (!host_err) {
@@ -1726,12 +1728,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
                else
                        err = nfserrno(host_err);
        }
-out_drop_write:
-        fh_drop_write(tfhp);
 out_dput:
        dput(dnew);
 out_unlock:
        fh_unlock(ffhp);
+        fh_drop_write(tfhp);
 out:
        return err;
@@ -1774,6 +1775,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
                goto out;
+        host_err = fh_want_write(ffhp);
+        if (host_err) {
+                err = nfserrno(host_err);
+                goto out;
+        }
        /* cannot use fh_lock as we need deadlock protective ordering
         * so do it by hand */
        trap = lock_rename(tdentry, fdentry);
@@ -1804,17 +1811,14 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        host_err = -EXDEV;
        if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
                goto out_dput_new;
-        host_err = fh_want_write(ffhp);
-        if (host_err)
-                goto out_dput_new;
        host_err = nfsd_break_lease(odentry->d_inode);
        if (host_err)
-                goto out_drop_write;
+                goto out_dput_new;
        if (ndentry->d_inode) {
                host_err = nfsd_break_lease(ndentry->d_inode);
                if (host_err)
-                        goto out_drop_write;
+                        goto out_dput_new;
        }
        host_err = vfs_rename(fdir, odentry, tdir, ndentry);
        if (!host_err) {
@@ -1822,8 +1826,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
                if (!host_err)
                        host_err = commit_metadata(ffhp);
        }
-out_drop_write:
-        fh_drop_write(ffhp);
 out_dput_new:
        dput(ndentry);
 out_dput_old:
@@ -1839,6 +1841,7 @@ out_drop_write:
        fill_post_wcc(tfhp);
        unlock_rename(tdentry, fdentry);
        ffhp->fh_locked = tfhp->fh_locked = 0;
+        fh_drop_write(ffhp);
 out:
        return err;
@@ -1864,6 +1867,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        if (err)
                goto out;
+        host_err = fh_want_write(fhp);
+        if (host_err)
+                goto out_nfserr;
        fh_lock_nested(fhp, I_MUTEX_PARENT);
        dentry = fhp->fh_dentry;
        dirp = dentry->d_inode;
@@ -1882,21 +1889,15 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        if (!type)
                type = rdentry->d_inode->i_mode & S_IFMT;
-        host_err = fh_want_write(fhp);
-        if (host_err)
-                goto out_put;
        host_err = nfsd_break_lease(rdentry->d_inode);
        if (host_err)
-                goto out_drop_write;
+                goto out_put;
        if (type != S_IFDIR)
                host_err = vfs_unlink(dirp, rdentry);
        else
                host_err = vfs_rmdir(dirp, rdentry);
        if (!host_err)
                host_err = commit_metadata(fhp);
-out_drop_write:
-        fh_drop_write(fhp);
 out_put:
        dput(rdentry);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index ec0611b2b738..359594c393d2 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -110,12 +110,19 @@ int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
 static inline int fh_want_write(struct svc_fh *fh)
 {
-        return mnt_want_write(fh->fh_export->ex_path.mnt);
+        int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
+        if (!ret)
+                fh->fh_want_write = 1;
+        return ret;
 }
 static inline void fh_drop_write(struct svc_fh *fh)
 {
-        mnt_drop_write(fh->fh_export->ex_path.mnt);
+        if (fh->fh_want_write) {
+                fh->fh_want_write = 0;
+                mnt_drop_write(fh->fh_export->ex_path.mnt);
+        }
 }
 #endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 62cebc8e1a1f..a4d56ac02e6c 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -69,16 +69,18 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        struct page *page = vmf->page;
        struct inode *inode = vma->vm_file->f_dentry->d_inode;
        struct nilfs_transaction_info ti;
-        int ret;
+        int ret = 0;
        if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
                return VM_FAULT_SIGBUS; /* -ENOSPC */
+        sb_start_pagefault(inode->i_sb);
        lock_page(page);
        if (page->mapping != inode->i_mapping ||
            page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) {
                unlock_page(page);
-                return VM_FAULT_NOPAGE; /* make the VM retry the fault */
+                ret = -EFAULT;  /* make the VM retry the fault */
+                goto out;
        }
        /*
@@ -112,19 +114,21 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        ret = nilfs_transaction_begin(inode->i_sb, &ti, 1);
        /* never returns -ENOMEM, but may return -ENOSPC */
        if (unlikely(ret))
-                return VM_FAULT_SIGBUS;
+                goto out;
-        ret = block_page_mkwrite(vma, vmf, nilfs_get_block);
+        ret = __block_page_mkwrite(vma, vmf, nilfs_get_block);
-        if (ret != VM_FAULT_LOCKED) {
+        if (ret) {
                nilfs_transaction_abort(inode->i_sb);
-                return ret;
+                goto out;
        }
        nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits));
        nilfs_transaction_commit(inode->i_sb);
 mapped:
        wait_on_page_writeback(page);
-        return VM_FAULT_LOCKED;
+ out:
+        sb_end_pagefault(inode->i_sb);
+        return block_page_mkwrite_return(ret);
 }
 static const struct vm_operations_struct nilfs_file_vm_ops = {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 0b6387c67e6c..fdb180769485 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -660,8 +660,6 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
                goto out_free;
        }
-        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
        ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]);
        if (ret < 0)
                printk(KERN_ERR "NILFS: GC failed during preparation: "
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 88e11fb346b6..a5752a589932 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -189,7 +189,7 @@ int nilfs_transaction_begin(struct super_block *sb,
        if (ret > 0)
                return 0;
-        vfs_check_frozen(sb, SB_FREEZE_WRITE);
+        sb_start_intwrite(sb);
        nilfs = sb->s_fs_info;
        down_read(&nilfs->ns_segctor_sem);
@@ -205,6 +205,7 @@ int nilfs_transaction_begin(struct super_block *sb,
        current->journal_info = ti->ti_save;
        if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
                kmem_cache_free(nilfs_transaction_cachep, ti);
+        sb_end_intwrite(sb);
        return ret;
 }
@@ -246,6 +247,7 @@ int nilfs_transaction_commit(struct super_block *sb)
                err = nilfs_construct_segment(sb);
        if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
                kmem_cache_free(nilfs_transaction_cachep, ti);
+        sb_end_intwrite(sb);
        return err;
 }
@@ -264,6 +266,7 @@ void nilfs_transaction_abort(struct super_block *sb)
        current->journal_info = ti->ti_save;
        if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
                kmem_cache_free(nilfs_transaction_cachep, ti);
+        sb_end_intwrite(sb);
 }
 void nilfs_relax_pressure_in_lock(struct super_block *sb)
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 7389d2d5e51d..1ecf46448f85 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2084,7 +2084,6 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
        if (err)
                return err;
        pos = *ppos;
-        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
        /* We can write back this queue in page reclaim. */
        current->backing_dev_info = mapping->backing_dev_info;
        written = 0;
@@ -2119,6 +2118,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        BUG_ON(iocb->ki_pos != pos);
+        sb_start_write(inode->i_sb);
        mutex_lock(&inode->i_mutex);
        ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
        mutex_unlock(&inode->i_mutex);
@@ -2127,6 +2127,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                if (err < 0)
                        ret = err;
        }
+        sb_end_write(inode->i_sb);
        return ret;
 }
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7602783d7f41..46a1f6d75104 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1971,6 +1971,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
 {
        struct inode *inode = file->f_path.dentry->d_inode;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+        int ret;
        if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&
            !ocfs2_writes_unwritten_extents(osb))
@@ -1985,7 +1986,12 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;
-        return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
+        ret = mnt_want_write_file(file);
+        if (ret)
+                return ret;
+        ret = __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
+        mnt_drop_write_file(file);
+        return ret;
 }
 static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
@@ -2261,7 +2267,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
        if (iocb->ki_left == 0)
                return 0;
-        vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+        sb_start_write(inode->i_sb);
        appending = file->f_flags & O_APPEND ? 1 : 0;
        direct_io = file->f_flags & O_DIRECT ? 1 : 0;
@@ -2436,6 +2442,7 @@ out_sems:
                ocfs2_iocb_clear_sem_locked(iocb);
        mutex_unlock(&inode->i_mutex);
+        sb_end_write(inode->i_sb);
        if (written)
                ret = written;
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index d96f7f81d8dd..f20edcbfe700 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -928,7 +928,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (get_user(new_clusters, (int __user *)arg))
                        return -EFAULT;
-                return ocfs2_group_extend(inode, new_clusters);
+                status = mnt_want_write_file(filp);
+                if (status)
+                        return status;
+                status = ocfs2_group_extend(inode, new_clusters);
+                mnt_drop_write_file(filp);
+                return status;
        case OCFS2_IOC_GROUP_ADD:
        case OCFS2_IOC_GROUP_ADD64:
                if (!capable(CAP_SYS_RESOURCE))
@@ -937,7 +942,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (copy_from_user(&input, (int __user *) arg, sizeof(input)))
                        return -EFAULT;
-                return ocfs2_group_add(inode, &input);
+                status = mnt_want_write_file(filp);
+                if (status)
+                        return status;
+                status = ocfs2_group_add(inode, &input);
+                mnt_drop_write_file(filp);
+                return status;
        case OCFS2_IOC_REFLINK:
                if (copy_from_user(&args, argp, sizeof(args)))
                        return -EFAULT;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 0a42ae96dca7..2dd36af79e26 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -355,11 +355,14 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
        if (journal_current_handle())
                return jbd2_journal_start(journal, max_buffs);
+        sb_start_intwrite(osb->sb);
        down_read(&osb->journal->j_trans_barrier);
        handle = jbd2_journal_start(journal, max_buffs);
        if (IS_ERR(handle)) {
                up_read(&osb->journal->j_trans_barrier);
+                sb_end_intwrite(osb->sb);
                mlog_errno(PTR_ERR(handle));
@@ -388,8 +391,10 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
        if (ret < 0)
                mlog_errno(ret);
-        if (!nested)
+        if (!nested) {
                up_read(&journal->j_trans_barrier);
+                sb_end_intwrite(osb->sb);
+        }
        return ret;
 }
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 9cd41083e991..d150372fd81d 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -136,6 +136,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        sigset_t oldset;
        int ret;
+        sb_start_pagefault(inode->i_sb);
        ocfs2_block_signals(&oldset);
        /*
@@ -165,6 +166,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 out:
        ocfs2_unblock_signals(&oldset);
+        sb_end_pagefault(inode->i_sb);
        return ret;
 }
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 9f32d7cbb7a3..30a055049e16 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4466,20 +4466,11 @@ int ocfs2_reflink_ioctl(struct inode *inode,
                goto out_dput;
        }
-        error = mnt_want_write(new_path.mnt);
-        if (error) {
-                mlog_errno(error);
-                goto out_dput;
-        }
        error = ocfs2_vfs_reflink(old_path.dentry,
                                  new_path.dentry->d_inode,
                                  new_dentry, preserve);
-        mnt_drop_write(new_path.mnt);
 out_dput:
-        dput(new_dentry);
+        done_path_create(&new_path, new_dentry);
-        mutex_unlock(&new_path.dentry->d_inode->i_mutex);
-        path_put(&new_path);
 out:
        path_put(&old_path);
diff --git a/fs/open.c b/fs/open.c
index 1e914b397e12..f3d96e7e7b19 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -164,11 +164,13 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
        if (IS_APPEND(inode))
                goto out_putf;
+        sb_start_write(inode->i_sb);
        error = locks_verify_truncate(inode, file, length);
        if (!error)
                error = security_path_truncate(&file->f_path);
        if (!error)
                error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
+        sb_end_write(inode->i_sb);
 out_putf:
        fput(file);
 out:
@@ -266,7 +268,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (!file->f_op->fallocate)
                return -EOPNOTSUPP;
-        return file->f_op->fallocate(file, mode, offset, len);
+        sb_start_write(inode->i_sb);
+        ret = file->f_op->fallocate(file, mode, offset, len);
+        sb_end_write(inode->i_sb);
+        return ret;
 }
 SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
@@ -620,7 +625,7 @@ static inline int __get_file_write_access(struct inode *inode,
                /*
                 * Balanced in __fput()
                 */
-                error = mnt_want_write(mnt);
+                error = __mnt_want_write(mnt);
                if (error)
                        put_write_access(inode);
        }
@@ -654,6 +659,7 @@ static int do_dentry_open(struct file *f,
        if (unlikely(f->f_flags & O_PATH))
                f->f_mode = FMODE_PATH;
+        path_get(&f->f_path);
        inode = f->f_path.dentry->d_inode;
        if (f->f_mode & FMODE_WRITE) {
                error = __get_file_write_access(inode, f->f_path.mnt);
@@ -739,9 +745,7 @@ int finish_open(struct file *file, struct dentry *dentry,
        int error;
        BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
-        mntget(file->f_path.mnt);
+        file->f_path.dentry = dentry;
-        file->f_path.dentry = dget(dentry);
        error = do_dentry_open(file, open, current_cred());
        if (!error)
                *opened |= FILE_OPENED;
@@ -784,7 +788,6 @@ struct file *dentry_open(const struct path *path, int flags,
        f->f_flags = flags;
        f->f_path = *path;
-        path_get(&f->f_path);
        error = do_dentry_open(f, NULL, cred);
        if (!error) {
                error = open_check_o_direct(f);
diff --git a/fs/pipe.c b/fs/pipe.c
index 95cbd6b227e6..8d85d7068c1e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1016,18 +1016,16 @@ fail_inode:
        return NULL;
 }
-struct file *create_write_pipe(int flags)
+int create_pipe_files(struct file **res, int flags)
 {
        int err;
-        struct inode *inode;
+        struct inode *inode = get_pipe_inode();
        struct file *f;
        struct path path;
-        struct qstr name = { .name = "" };
+        static struct qstr name = { .name = "" };
-        err = -ENFILE;
-        inode = get_pipe_inode();
        if (!inode)
-                goto err;
+                return -ENFILE;
        err = -ENOMEM;
        path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
@@ -1041,62 +1039,43 @@ struct file *create_write_pipe(int flags)
        f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
        if (!f)
                goto err_dentry;
-        f->f_mapping = inode->i_mapping;
        f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
-        f->f_version = 0;
-        return f;
+        res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops);
+        if (!res[0])
+                goto err_file;
+        path_get(&path);
+        res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
+        res[1] = f;
+        return 0;
- err_dentry:
+err_file:
+        put_filp(f);
+err_dentry:
        free_pipe_info(inode);
        path_put(&path);
-        return ERR_PTR(err);
+        return err;
- err_inode:
+err_inode:
        free_pipe_info(inode);
        iput(inode);
- err:
+        return err;
-        return ERR_PTR(err);
-}
-void free_write_pipe(struct file *f)
-{
-        free_pipe_info(f->f_dentry->d_inode);
-        path_put(&f->f_path);
-        put_filp(f);
-}
-struct file *create_read_pipe(struct file *wrf, int flags)
-{
-        /* Grab pipe from the writer */
-        struct file *f = alloc_file(&wrf->f_path, FMODE_READ,
-                                    &read_pipefifo_fops);
-        if (!f)
-                return ERR_PTR(-ENFILE);
-        path_get(&wrf->f_path);
-        f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
-        return f;
 }
 int do_pipe_flags(int *fd, int flags)
 {
-        struct file *fw, *fr;
+        struct file *files[2];
        int error;
        int fdw, fdr;
        if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
                return -EINVAL;
-        fw = create_write_pipe(flags);
+        error = create_pipe_files(files, flags);
-        if (IS_ERR(fw))
+        if (error)
-                return PTR_ERR(fw);
+                return error;
-        fr = create_read_pipe(fw, flags);
-        error = PTR_ERR(fr);
-        if (IS_ERR(fr))
-                goto err_write_pipe;
        error = get_unused_fd_flags(flags);
        if (error < 0)
@@ -1109,8 +1088,8 @@ int do_pipe_flags(int *fd, int flags)
        fdw = error;
        audit_fd_pair(fdr, fdw);
-        fd_install(fdr, fr);
+        fd_install(fdr, files[0]);
-        fd_install(fdw, fw);
+        fd_install(fdw, files[1]);
        fd[0] = fdr;
        fd[1] = fdw;
@@ -1119,10 +1098,8 @@ int do_pipe_flags(int *fd, int flags)
 err_fdr:
        put_unused_fd(fdr);
 err_read_pipe:
-        path_put(&fr->f_path);
+        fput(files[0]);
-        put_filp(fr);
+        fput(files[1]);
- err_write_pipe:
-        free_write_pipe(fw);
        return error;
 }
diff --git a/fs/splice.c b/fs/splice.c
index 7bf08fa22ec9..41514dd89462 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -996,6 +996,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
        };
        ssize_t ret;
+        sb_start_write(inode->i_sb);
        pipe_lock(pipe);
        splice_from_pipe_begin(&sd);
@@ -1034,6 +1036,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                        *ppos += ret;
                balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
        }
+        sb_end_write(inode->i_sb);
        return ret;
 }
diff --git a/fs/super.c b/fs/super.c
index 4bf714459a4b..b05cf47463d0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -33,12 +33,19 @@
 #include <linux/rculist_bl.h>
 #include <linux/cleancache.h>
 #include <linux/fsnotify.h>
+#include <linux/lockdep.h>
 #include "internal.h"
 LIST_HEAD(super_blocks);
 DEFINE_SPINLOCK(sb_lock);
+static char *sb_writers_name[SB_FREEZE_LEVELS] = {
+        "sb_writers",
+        "sb_pagefaults",
+        "sb_internal",
+};
 /*
 * One thing we have to be careful of with a per-sb shrinker is that we don't
 * drop the last active reference to the superblock from within the shrinker.
@@ -102,6 +109,35 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
        return total_objects;
 }
+static int init_sb_writers(struct super_block *s, struct file_system_type *type)
+{
+        int err;
+        int i;
+        for (i = 0; i < SB_FREEZE_LEVELS; i++) {
+                err = percpu_counter_init(&s->s_writers.counter[i], 0);
+                if (err < 0)
+                        goto err_out;
+                lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
+                                 &type->s_writers_key[i], 0);
+        }
+        init_waitqueue_head(&s->s_writers.wait);
+        init_waitqueue_head(&s->s_writers.wait_unfrozen);
+        return 0;
+err_out:
+        while (--i >= 0)
+                percpu_counter_destroy(&s->s_writers.counter[i]);
+        return err;
+}
+static void destroy_sb_writers(struct super_block *s)
+{
+        int i;
+        for (i = 0; i < SB_FREEZE_LEVELS; i++)
+                percpu_counter_destroy(&s->s_writers.counter[i]);
+}
 /**
 *      alloc_super     -       create new superblock
 *      @type:  filesystem type superblock should belong to
@@ -117,18 +153,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
        if (s) {
                if (security_sb_alloc(s)) {
+                        /*
+                         * We cannot call security_sb_free() without
+                         * security_sb_alloc() succeeding. So bail out manually
+                         */
                        kfree(s);
                        s = NULL;
                        goto out;
                }
 #ifdef CONFIG_SMP
                s->s_files = alloc_percpu(struct list_head);
-                if (!s->s_files) {
+                if (!s->s_files)
-                        security_sb_free(s);
+                        goto err_out;
-                        kfree(s);
+                else {
-                        s = NULL;
-                        goto out;
-                } else {
                        int i;
                        for_each_possible_cpu(i)
@@ -137,6 +174,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 #else
                INIT_LIST_HEAD(&s->s_files);
 #endif
+                if (init_sb_writers(s, type))
+                        goto err_out;
                s->s_flags = flags;
                s->s_bdi = &default_backing_dev_info;
                INIT_HLIST_NODE(&s->s_instances);
@@ -178,7 +217,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
                mutex_init(&s->s_dquot.dqio_mutex);
                mutex_init(&s->s_dquot.dqonoff_mutex);
                init_rwsem(&s->s_dquot.dqptr_sem);
-                init_waitqueue_head(&s->s_wait_unfrozen);
                s->s_maxbytes = MAX_NON_LFS;
                s->s_op = &default_op;
                s->s_time_gran = 1000000000;
@@ -190,6 +228,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
        }
 out:
        return s;
+err_out:
+        security_sb_free(s);
+#ifdef CONFIG_SMP
+        if (s->s_files)
+                free_percpu(s->s_files);
+#endif
+        destroy_sb_writers(s);
+        kfree(s);
+        s = NULL;
+        goto out;
 }
 /**
@@ -203,6 +251,7 @@ static inline void destroy_super(struct super_block *s)
 #ifdef CONFIG_SMP
        free_percpu(s->s_files);
 #endif
+        destroy_sb_writers(s);
        security_sb_free(s);
        WARN_ON(!list_empty(&s->s_mounts));
        kfree(s->s_subtype);
@@ -651,10 +700,11 @@ struct super_block *get_super_thawed(struct block_device *bdev)
 {
        while (1) {
                struct super_block *s = get_super(bdev);
-                if (!s || s->s_frozen == SB_UNFROZEN)
+                if (!s || s->s_writers.frozen == SB_UNFROZEN)
                        return s;
                up_read(&s->s_umount);
-                vfs_check_frozen(s, SB_FREEZE_WRITE);
+                wait_event(s->s_writers.wait_unfrozen,
+                           s->s_writers.frozen == SB_UNFROZEN);
                put_super(s);
        }
 }
@@ -732,7 +782,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
        int retval;
        int remount_ro;
-        if (sb->s_frozen != SB_UNFROZEN)
+        if (sb->s_writers.frozen != SB_UNFROZEN)
                return -EBUSY;
 #ifdef CONFIG_BLOCK
@@ -1163,6 +1213,120 @@ out:
        return ERR_PTR(error);
 }
+/*
+ * This is an internal function, please use sb_end_{write,pagefault,intwrite}
+ * instead.
+ */
+void __sb_end_write(struct super_block *sb, int level)
+{
+        percpu_counter_dec(&sb->s_writers.counter[level-1]);
+        /*
+         * Make sure s_writers are updated before we wake up waiters in
+         * freeze_super().
+         */
+        smp_mb();
+        if (waitqueue_active(&sb->s_writers.wait))
+                wake_up(&sb->s_writers.wait);
+        rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
+}
+EXPORT_SYMBOL(__sb_end_write);
+#ifdef CONFIG_LOCKDEP
+/*
+ * We want lockdep to tell us about possible deadlocks with freezing but
+ * it's it bit tricky to properly instrument it. Getting a freeze protection
+ * works as getting a read lock but there are subtle problems. XFS for example
+ * gets freeze protection on internal level twice in some cases, which is OK
+ * only because we already hold a freeze protection also on higher level. Due
+ * to these cases we have to tell lockdep we are doing trylock when we
+ * already hold a freeze protection for a higher freeze level.
+ */
+static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
+                                unsigned long ip)
+{
+        int i;
+        if (!trylock) {
+                for (i = 0; i < level - 1; i++)
+                        if (lock_is_held(&sb->s_writers.lock_map[i])) {
+                                trylock = true;
+                                break;
+                        }
+        }
+        rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
+}
+#endif
+/*
+ * This is an internal function, please use sb_start_{write,pagefault,intwrite}
+ * instead.
+ */
+int __sb_start_write(struct super_block *sb, int level, bool wait)
+{
+retry:
+        if (unlikely(sb->s_writers.frozen >= level)) {
+                if (!wait)
+                        return 0;
+                wait_event(sb->s_writers.wait_unfrozen,
+                           sb->s_writers.frozen < level);
+        }
+#ifdef CONFIG_LOCKDEP
+        acquire_freeze_lock(sb, level, !wait, _RET_IP_);
+#endif
+        percpu_counter_inc(&sb->s_writers.counter[level-1]);
+        /*
+         * Make sure counter is updated before we check for frozen.
+         * freeze_super() first sets frozen and then checks the counter.
+         */
+        smp_mb();
+        if (unlikely(sb->s_writers.frozen >= level)) {
+                __sb_end_write(sb, level);
+                goto retry;
+        }
+        return 1;
+}
+EXPORT_SYMBOL(__sb_start_write);
+/**
+ * sb_wait_write - wait until all writers to given file system finish
+ * @sb: the super for which we wait
+ * @level: type of writers we wait for (normal vs page fault)
+ *
+ * This function waits until there are no writers of given type to given file
+ * system. Caller of this function should make sure there can be no new writers
+ * of type @level before calling this function. Otherwise this function can
+ * livelock.
+ */
+static void sb_wait_write(struct super_block *sb, int level)
+{
+        s64 writers;
+        /*
+         * We just cycle-through lockdep here so that it does not complain
+         * about returning with lock to userspace
+         */
+        rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
+        rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
+        do {
+                DEFINE_WAIT(wait);
+                /*
+                 * We use a barrier in prepare_to_wait() to separate setting
+                 * of frozen and checking of the counter
+                 */
+                prepare_to_wait(&sb->s_writers.wait, &wait,
+                                TASK_UNINTERRUPTIBLE);
+                writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
+                if (writers)
+                        schedule();
+                finish_wait(&sb->s_writers.wait, &wait);
+        } while (writers);
+}
 /**
 * freeze_super - lock the filesystem and force it into a consistent state
 * @sb: the super to lock
@@ -1170,6 +1334,31 @@ out:
 * Syncs the super to make sure the filesystem is consistent and calls the fs's
 * freeze_fs.  Subsequent calls to this without first thawing the fs will return
 * -EBUSY.
+ *
+ * During this function, sb->s_writers.frozen goes through these values:
+ *
+ * SB_UNFROZEN: File system is normal, all writes progress as usual.
+ *
+ * SB_FREEZE_WRITE: The file system is in the process of being frozen.  New
+ * writes should be blocked, though page faults are still allowed. We wait for
+ * all writes to complete and then proceed to the next stage.
+ *
+ * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked
+ * but internal fs threads can still modify the filesystem (although they
+ * should not dirty new pages or inodes), writeback can run etc. After waiting
+ * for all running page faults we sync the filesystem which will clean all
+ * dirty pages and inodes (no new dirty pages or inodes can be created when
+ * sync is running).
+ *
+ * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs
+ * modification are blocked (e.g. XFS preallocation truncation on inode
+ * reclaim). This is usually implemented by blocking new transactions for
+ * filesystems that have them and need this additional guard. After all
+ * internal writers are finished we call ->freeze_fs() to finish filesystem
+ * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is
+ * mostly auxiliary for filesystems to verify they do not modify frozen fs.
+ *
+ * sb->s_writers.frozen is protected by sb->s_umount.
 */
 int freeze_super(struct super_block *sb)
 {
@@ -1177,7 +1366,7 @@ int freeze_super(struct super_block *sb)
        atomic_inc(&sb->s_active);
        down_write(&sb->s_umount);
-        if (sb->s_frozen) {
+        if (sb->s_writers.frozen != SB_UNFROZEN) {
                deactivate_locked_super(sb);
                return -EBUSY;
        }
@@ -1188,33 +1377,53 @@ int freeze_super(struct super_block *sb)
        }
        if (sb->s_flags & MS_RDONLY) {
-                sb->s_frozen = SB_FREEZE_TRANS;
+                /* Nothing to do really... */
-                smp_wmb();
+                sb->s_writers.frozen = SB_FREEZE_COMPLETE;
                up_write(&sb->s_umount);
                return 0;
        }
-        sb->s_frozen = SB_FREEZE_WRITE;
+        /* From now on, no new normal writers can start */
+        sb->s_writers.frozen = SB_FREEZE_WRITE;
+        smp_wmb();
+        /* Release s_umount to preserve sb_start_write -> s_umount ordering */
+        up_write(&sb->s_umount);
+        sb_wait_write(sb, SB_FREEZE_WRITE);
+        /* Now we go and block page faults... */
+        down_write(&sb->s_umount);
+        sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
        smp_wmb();
+        sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
+        /* All writers are done so after syncing there won't be dirty data */
        sync_filesystem(sb);
-        sb->s_frozen = SB_FREEZE_TRANS;
+        /* Now wait for internal filesystem counter */
+        sb->s_writers.frozen = SB_FREEZE_FS;
        smp_wmb();
+        sb_wait_write(sb, SB_FREEZE_FS);
-        sync_blockdev(sb->s_bdev);
        if (sb->s_op->freeze_fs) {
                ret = sb->s_op->freeze_fs(sb);
                if (ret) {
                        printk(KERN_ERR
                                "VFS:Filesystem freeze failed\n");
-                        sb->s_frozen = SB_UNFROZEN;
+                        sb->s_writers.frozen = SB_UNFROZEN;
                        smp_wmb();
-                        wake_up(&sb->s_wait_unfrozen);
+                        wake_up(&sb->s_writers.wait_unfrozen);
                        deactivate_locked_super(sb);
                        return ret;
                }
        }
+        /*
+         * This is just for debugging purposes so that fs can warn if it
+         * sees write activity when frozen is set to SB_FREEZE_COMPLETE.
+         */
+        sb->s_writers.frozen = SB_FREEZE_COMPLETE;
        up_write(&sb->s_umount);
        return 0;
 }
@@ -1231,7 +1440,7 @@ int thaw_super(struct super_block *sb)
        int error;
        down_write(&sb->s_umount);
-        if (sb->s_frozen == SB_UNFROZEN) {
+        if (sb->s_writers.frozen == SB_UNFROZEN) {
                up_write(&sb->s_umount);
                return -EINVAL;
        }
@@ -1244,16 +1453,15 @@ int thaw_super(struct super_block *sb)
                if (error) {
                        printk(KERN_ERR
                                "VFS:Filesystem thaw failed\n");
-                        sb->s_frozen = SB_FREEZE_TRANS;
                        up_write(&sb->s_umount);
                        return error;
                }
        }
 out:
-        sb->s_frozen = SB_UNFROZEN;
+        sb->s_writers.frozen = SB_UNFROZEN;
        smp_wmb();
-        wake_up(&sb->s_wait_unfrozen);
+        wake_up(&sb->s_writers.wait_unfrozen);
        deactivate_locked_super(sb);
        return 0;
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index a4759833d62d..614b2b544880 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -228,6 +228,8 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        ret = 0;
        if (bb->vm_ops->page_mkwrite)
                ret = bb->vm_ops->page_mkwrite(vma, vmf);
+        else
+                file_update_time(file);
        sysfs_put_active(attr_sd);
        return ret;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 15052ff916ec..e562dd43f41f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -124,6 +124,12 @@ xfs_setfilesize_trans_alloc(
        ioend->io_append_trans = tp;
        /*
+         * We will pass freeze protection with a transaction.  So tell lockdep
+         * we released it.
+         */
+        rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
+                      1, _THIS_IP_);
+        /*
         * We hand off the transaction to the completion thread now, so
         * clear the flag here.
         */
@@ -199,6 +205,15 @@ xfs_end_io(
        struct xfs_inode *ip = XFS_I(ioend->io_inode);
        int             error = 0;
+        if (ioend->io_append_trans) {
+                /*
+                 * We've got freeze protection passed with the transaction.
+                 * Tell lockdep about it.
+                 */
+                rwsem_acquire_read(
+                        &ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
+                        0, 1, _THIS_IP_);
+        }
        if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
                ioend->io_error = -EIO;
                goto done;
@@ -1425,6 +1440,9 @@ out_trans_cancel:
        if (ioend->io_append_trans) {
                current_set_flags_nested(&ioend->io_append_trans->t_pflags,
                                         PF_FSTRANS);
+                rwsem_acquire_read(
+                        &inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
+                        0, 1, _THIS_IP_);
                xfs_trans_cancel(ioend->io_append_trans, 0);
        }
 out_destroy_ioend:
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index c4559c6e6f2c..56afcdb2377d 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -770,10 +770,12 @@ xfs_file_aio_write(
        if (ocount == 0)
                return 0;
-        xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
+        sb_start_write(inode->i_sb);
-        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+        if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-                return -EIO;
+                ret = -EIO;
+                goto out;
+        }
        if (unlikely(file->f_flags & O_DIRECT))
                ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
@@ -792,6 +794,8 @@ xfs_file_aio_write(
                        ret = err;
        }
+out:
+        sb_end_write(inode->i_sb);
        return ret;
 }
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 1f1535d25a9b..0e0232c3b6d9 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -364,9 +364,15 @@ xfs_fssetdm_by_handle(
        if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
                return -XFS_ERROR(EFAULT);
+        error = mnt_want_write_file(parfilp);
+        if (error)
+                return error;
        dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
-        if (IS_ERR(dentry))
+        if (IS_ERR(dentry)) {
+                mnt_drop_write_file(parfilp);
                return PTR_ERR(dentry);
+        }
        if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
                error = -XFS_ERROR(EPERM);
@@ -382,6 +388,7 @@ xfs_fssetdm_by_handle(
                                 fsd.fsd_dmstate);
 out:
+        mnt_drop_write_file(parfilp);
        dput(dentry);
        return error;
 }
@@ -634,7 +641,11 @@ xfs_ioc_space(
        if (ioflags & IO_INVIS)
                attr_flags |= XFS_ATTR_DMI;
+        error = mnt_want_write_file(filp);
+        if (error)
+                return error;
        error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
+        mnt_drop_write_file(filp);
        return -error;
 }
@@ -1163,6 +1174,7 @@ xfs_ioc_fssetxattr(
 {
        struct fsxattr          fa;
        unsigned int            mask;
+        int error;
        if (copy_from_user(&fa, arg, sizeof(fa)))
                return -EFAULT;
@@ -1171,7 +1183,12 @@ xfs_ioc_fssetxattr(
        if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
                mask |= FSX_NONBLOCK;
-        return -xfs_ioctl_setattr(ip, &fa, mask);
+        error = mnt_want_write_file(filp);
+        if (error)
+                return error;
+        error = xfs_ioctl_setattr(ip, &fa, mask);
+        mnt_drop_write_file(filp);
+        return -error;
 }
 STATIC int
@@ -1196,6 +1213,7 @@ xfs_ioc_setxflags(
        struct fsxattr          fa;
        unsigned int            flags;
        unsigned int            mask;
+        int error;
        if (copy_from_user(&flags, arg, sizeof(flags)))
                return -EFAULT;
@@ -1210,7 +1228,12 @@ xfs_ioc_setxflags(
                mask |= FSX_NONBLOCK;
        fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
-        return -xfs_ioctl_setattr(ip, &fa, mask);
+        error = mnt_want_write_file(filp);
+        if (error)
+                return error;
+        error = xfs_ioctl_setattr(ip, &fa, mask);
+        mnt_drop_write_file(filp);
+        return -error;
 }
 STATIC int
@@ -1385,8 +1408,13 @@ xfs_file_ioctl(
                if (copy_from_user(&dmi, arg, sizeof(dmi)))
                        return -XFS_ERROR(EFAULT);
+                error = mnt_want_write_file(filp);
+                if (error)
+                        return error;
                error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
                                dmi.fsd_dmstate);
+                mnt_drop_write_file(filp);
                return -error;
        }
@@ -1434,7 +1462,11 @@ xfs_file_ioctl(
                if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
                        return -XFS_ERROR(EFAULT);
+                error = mnt_want_write_file(filp);
+                if (error)
+                        return error;
                error = xfs_swapext(&sxp);
+                mnt_drop_write_file(filp);
                return -error;
        }
@@ -1463,9 +1495,14 @@ xfs_file_ioctl(
                if (copy_from_user(&inout, arg, sizeof(inout)))
                        return -XFS_ERROR(EFAULT);
+                error = mnt_want_write_file(filp);
+                if (error)
+                        return error;
                /* input parameter is passed in resblks field of structure */
                in = inout.resblks;
                error = xfs_reserve_blocks(mp, &in, &inout);
+                mnt_drop_write_file(filp);
                if (error)
                        return -error;
@@ -1496,7 +1533,11 @@ xfs_file_ioctl(
                if (copy_from_user(&in, arg, sizeof(in)))
                        return -XFS_ERROR(EFAULT);
+                error = mnt_want_write_file(filp);
+                if (error)
+                        return error;
                error = xfs_growfs_data(mp, &in);
+                mnt_drop_write_file(filp);
                return -error;
        }
@@ -1506,7 +1547,11 @@ xfs_file_ioctl(
                if (copy_from_user(&in, arg, sizeof(in)))
                        return -XFS_ERROR(EFAULT);
+                error = mnt_want_write_file(filp);
+                if (error)
+                        return error;
                error = xfs_growfs_log(mp, &in);
+                mnt_drop_write_file(filp);
                return -error;
        }
@@ -1516,7 +1561,11 @@ xfs_file_ioctl(
                if (copy_from_user(&in, arg, sizeof(in)))
                        return -XFS_ERROR(EFAULT);
+                error = mnt_want_write_file(filp);
+                if (error)
+                        return error;
                error = xfs_growfs_rt(mp, &in);
+                mnt_drop_write_file(filp);
                return -error;
        }
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index c4f2da0d2bf5..1244274a5674 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -600,7 +600,11 @@ xfs_file_compat_ioctl(
                if (xfs_compat_growfs_data_copyin(&in, arg))
                        return -XFS_ERROR(EFAULT);
+                error = mnt_want_write_file(filp);
+                if (error)
+                        return error;
                error = xfs_growfs_data(mp, &in);
+                mnt_drop_write_file(filp);
                return -error;
        }
        case XFS_IOC_FSGROWFSRT_32: {
@@ -608,7 +612,11 @@ xfs_file_compat_ioctl(
                if (xfs_compat_growfs_rt_copyin(&in, arg))
                        return -XFS_ERROR(EFAULT);
+                error = mnt_want_write_file(filp);
+                if (error)
+                        return error;
                error = xfs_growfs_rt(mp, &in);
+                mnt_drop_write_file(filp);
                return -error;
        }
 #endif
@@ -627,7 +635,11 @@ xfs_file_compat_ioctl(
                                   offsetof(struct xfs_swapext, sx_stat)) ||
                    xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
                        return -XFS_ERROR(EFAULT);
+                error = mnt_want_write_file(filp);
+                if (error)
+                        return error;
                error = xfs_swapext(&sxp);
+                mnt_drop_write_file(filp);
                return -error;
        }
        case XFS_IOC_FSBULKSTAT_32:
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 915edf6639f0..973dff6ad935 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -680,9 +680,9 @@ xfs_iomap_write_unwritten(
                 * the same inode that we complete here and might deadlock
                 * on the iolock.
                 */
-                xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
+                sb_start_intwrite(mp->m_super);
                tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
-                tp->t_flags |= XFS_TRANS_RESERVE;
+                tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT;
                error = xfs_trans_reserve(tp, resblks,
                                XFS_WRITE_LOG_RES(mp), 0,
                                XFS_TRANS_PERM_LOG_RES,
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 711ca51ca3d7..29c2f83d4147 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1551,7 +1551,7 @@ xfs_unmountfs(
 int
 xfs_fs_writable(xfs_mount_t *mp)
 {
-        return !(xfs_test_for_freeze(mp) || XFS_FORCED_SHUTDOWN(mp) ||
+        return !(mp->m_super->s_writers.frozen || XFS_FORCED_SHUTDOWN(mp) ||
                (mp->m_flags & XFS_MOUNT_RDONLY));
 }
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 8724336a9a08..05a05a7b6119 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -311,9 +311,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
 #define SHUTDOWN_REMOTE_REQ     0x0010  /* shutdown came from remote cell */
 #define SHUTDOWN_DEVICE_REQ     0x0020  /* failed all paths to the device */
-#define xfs_test_for_freeze(mp)         ((mp)->m_super->s_frozen)
-#define xfs_wait_for_freeze(mp,l)       vfs_check_frozen((mp)->m_super, (l))
 /*
 * Flags for xfs_mountfs
 */
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 97304f10e78a..96548176db80 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -403,7 +403,7 @@ xfs_sync_worker(
        if (!(mp->m_super->s_flags & MS_ACTIVE) &&
            !(mp->m_flags & XFS_MOUNT_RDONLY)) {
                /* dgc: errors ignored here */
-                if (mp->m_super->s_frozen == SB_UNFROZEN &&
+                if (mp->m_super->s_writers.frozen == SB_UNFROZEN &&
                    xfs_log_need_covered(mp))
                        error = xfs_fs_log_dummy(mp);
                else
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index fdf324508c5e..06ed520a767f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -576,8 +576,12 @@ xfs_trans_alloc(
        xfs_mount_t     *mp,
        uint            type)
 {
-        xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
+        xfs_trans_t     *tp;
-        return _xfs_trans_alloc(mp, type, KM_SLEEP);
+        sb_start_intwrite(mp->m_super);
+        tp = _xfs_trans_alloc(mp, type, KM_SLEEP);
+        tp->t_flags |= XFS_TRANS_FREEZE_PROT;
+        return tp;
 }
 xfs_trans_t *
@@ -588,6 +592,7 @@ _xfs_trans_alloc(
 {
        xfs_trans_t     *tp;
+        WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
        atomic_inc(&mp->m_active_trans);
        tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
@@ -611,6 +616,8 @@ xfs_trans_free(
        xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
        atomic_dec(&tp->t_mountp->m_active_trans);
+        if (tp->t_flags & XFS_TRANS_FREEZE_PROT)
+                sb_end_intwrite(tp->t_mountp->m_super);
        xfs_trans_free_dqinfo(tp);
        kmem_zone_free(xfs_trans_zone, tp);
 }
@@ -643,7 +650,11 @@ xfs_trans_dup(
        ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
        ASSERT(tp->t_ticket != NULL);
-        ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE);
+        ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
+                       (tp->t_flags & XFS_TRANS_RESERVE) |
+                       (tp->t_flags & XFS_TRANS_FREEZE_PROT);
+        /* We gave our writer reference to the new transaction */
+        tp->t_flags &= ~XFS_TRANS_FREEZE_PROT;
        ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
        ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
        tp->t_blk_res = tp->t_blk_res_used;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index bc2afd52a0b7..db056544cbb5 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -179,6 +179,8 @@ struct xfs_log_item_desc {
 #define XFS_TRANS_SYNC          0x08    /* make commit synchronous */
 #define XFS_TRANS_DQ_DIRTY      0x10    /* at least one dquot in trx dirty */
 #define XFS_TRANS_RESERVE       0x20    /* OK to use reserved data blocks */
+#define XFS_TRANS_FREEZE_PROT   0x40    /* Transaction has elevated writer
+                                           count in superblock */
 /*
 * Values for call flags parameter.