31 files changed, 1257 insertions, 352 deletions
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index b8ea11fee5c6..de876fa793e1 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -12,6 +12,7 @@
 #include <linux/time.h>
 #include <linux/sched.h>
 #include <linux/compat.h>
+#include <linux/mount.h>
 #include <linux/smp_lock.h>
 #include <asm/current.h>
 #include <asm/uaccess.h>
@@ -23,6 +24,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        struct ext2_inode_info *ei = EXT2_I(inode);
        unsigned int flags;
        unsigned short rsv_window_size;
+        int ret;
        ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg);
@@ -34,14 +36,19 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        case EXT2_IOC_SETFLAGS: {
                unsigned int oldflags;
-                if (IS_RDONLY(inode))
+                ret = mnt_want_write(filp->f_path.mnt);
-                        return -EROFS;
+                if (ret)
+                        return ret;
-                if (!is_owner_or_cap(inode))
+                if (!is_owner_or_cap(inode)) {
-                        return -EACCES;
+                        ret = -EACCES;
+                        goto setflags_out;
+                }
-                if (get_user(flags, (int __user *) arg))
+                if (get_user(flags, (int __user *) arg)) {
-                        return -EFAULT;
+                        ret = -EFAULT;
+                        goto setflags_out;
+                }
                if (!S_ISDIR(inode->i_mode))
                        flags &= ~EXT2_DIRSYNC_FL;
@@ -50,7 +57,8 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                /* Is it quota file? Do not allow user to mess with it */
                if (IS_NOQUOTA(inode)) {
                        mutex_unlock(&inode->i_mutex);
-                        return -EPERM;
+                        ret = -EPERM;
+                        goto setflags_out;
                }
                oldflags = ei->i_flags;
@@ -63,7 +71,8 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
                        if (!capable(CAP_LINUX_IMMUTABLE)) {
                                mutex_unlock(&inode->i_mutex);
-                                return -EPERM;
+                                ret = -EPERM;
+                                goto setflags_out;
                        }
                }
@@ -75,20 +84,26 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                ext2_set_inode_flags(inode);
                inode->i_ctime = CURRENT_TIME_SEC;
                mark_inode_dirty(inode);
-                return 0;
+setflags_out:
+                mnt_drop_write(filp->f_path.mnt);
+                return ret;
        }
        case EXT2_IOC_GETVERSION:
                return put_user(inode->i_generation, (int __user *) arg);
        case EXT2_IOC_SETVERSION:
                if (!is_owner_or_cap(inode))
                        return -EPERM;
-                if (IS_RDONLY(inode))
+                ret = mnt_want_write(filp->f_path.mnt);
-                        return -EROFS;
+                if (ret)
-                if (get_user(inode->i_generation, (int __user *) arg))
+                        return ret;
-                        return -EFAULT; 
+                if (get_user(inode->i_generation, (int __user *) arg)) {
-                inode->i_ctime = CURRENT_TIME_SEC;
+                        ret = -EFAULT;
-                mark_inode_dirty(inode);
+                } else {
-                return 0;
+                        inode->i_ctime = CURRENT_TIME_SEC;
+                        mark_inode_dirty(inode);
+                }
+                mnt_drop_write(filp->f_path.mnt);
+                return ret;
        case EXT2_IOC_GETRSVSZ:
                if (test_opt(inode->i_sb, RESERVATION)
                        && S_ISREG(inode->i_mode)
@@ -102,15 +117,16 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
                        return -ENOTTY;
-                if (IS_RDONLY(inode))
+                if (!is_owner_or_cap(inode))
-                        return -EROFS;
-                if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
                        return -EACCES;
                if (get_user(rsv_window_size, (int __user *)arg))
                        return -EFAULT;
+                ret = mnt_want_write(filp->f_path.mnt);
+                if (ret)
+                        return ret;
                if (rsv_window_size > EXT2_MAX_RESERVE_BLOCKS)
                        rsv_window_size = EXT2_MAX_RESERVE_BLOCKS;
@@ -131,6 +147,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        rsv->rsv_goal_size = rsv_window_size;
                }
                mutex_unlock(&ei->truncate_mutex);
+                mnt_drop_write(filp->f_path.mnt);
                return 0;
        }
        default:
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 023a070f55f1..0d0c70151642 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -12,6 +12,7 @@
 #include <linux/capability.h>
 #include <linux/ext3_fs.h>
 #include <linux/ext3_jbd.h>
+#include <linux/mount.h>
 #include <linux/time.h>
 #include <linux/compat.h>
 #include <linux/smp_lock.h>
@@ -38,14 +39,19 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                unsigned int oldflags;
                unsigned int jflag;
-                if (IS_RDONLY(inode))
+                err = mnt_want_write(filp->f_path.mnt);
-                        return -EROFS;
+                if (err)
+                        return err;
-                if (!is_owner_or_cap(inode))
+                if (!is_owner_or_cap(inode)) {
-                        return -EACCES;
+                        err = -EACCES;
+                        goto flags_out;
+                }
-                if (get_user(flags, (int __user *) arg))
+                if (get_user(flags, (int __user *) arg)) {
-                        return -EFAULT;
+                        err = -EFAULT;
+                        goto flags_out;
+                }
                if (!S_ISDIR(inode->i_mode))
                        flags &= ~EXT3_DIRSYNC_FL;
@@ -54,7 +60,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                /* Is it quota file? Do not allow user to mess with it */
                if (IS_NOQUOTA(inode)) {
                        mutex_unlock(&inode->i_mutex);
-                        return -EPERM;
+                        err = -EPERM;
+                        goto flags_out;
                }
                oldflags = ei->i_flags;
@@ -70,7 +77,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
                        if (!capable(CAP_LINUX_IMMUTABLE)) {
                                mutex_unlock(&inode->i_mutex);
-                                return -EPERM;
+                                err = -EPERM;
+                                goto flags_out;
                        }
                }
@@ -81,7 +89,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
                        if (!capable(CAP_SYS_RESOURCE)) {
                                mutex_unlock(&inode->i_mutex);
-                                return -EPERM;
+                                err = -EPERM;
+                                goto flags_out;
                        }
                }
@@ -89,7 +98,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                handle = ext3_journal_start(inode, 1);
                if (IS_ERR(handle)) {
                        mutex_unlock(&inode->i_mutex);
-                        return PTR_ERR(handle);
+                        err = PTR_ERR(handle);
+                        goto flags_out;
                }
                if (IS_SYNC(inode))
                        handle->h_sync = 1;
@@ -115,6 +125,8 @@ flags_err:
                if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
                        err = ext3_change_inode_journal_flag(inode, jflag);
                mutex_unlock(&inode->i_mutex);
+flags_out:
+                mnt_drop_write(filp->f_path.mnt);
                return err;
        }
        case EXT3_IOC_GETVERSION:
@@ -129,14 +141,18 @@ flags_err:
                if (!is_owner_or_cap(inode))
                        return -EPERM;
-                if (IS_RDONLY(inode))
+                err = mnt_want_write(filp->f_path.mnt);
-                        return -EROFS;
+                if (err)
-                if (get_user(generation, (int __user *) arg))
+                        return err;
-                        return -EFAULT;
+                if (get_user(generation, (int __user *) arg)) {
+                        err = -EFAULT;
+                        goto setversion_out;
+                }
                handle = ext3_journal_start(inode, 1);
-                if (IS_ERR(handle))
+                if (IS_ERR(handle)) {
-                        return PTR_ERR(handle);
+                        err = PTR_ERR(handle);
+                        goto setversion_out;
+                }
                err = ext3_reserve_inode_write(handle, inode, &iloc);
                if (err == 0) {
                        inode->i_ctime = CURRENT_TIME_SEC;
@@ -144,6 +160,8 @@ flags_err:
                        err = ext3_mark_iloc_dirty(handle, inode, &iloc);
                }
                ext3_journal_stop(handle);
+setversion_out:
+                mnt_drop_write(filp->f_path.mnt);
                return err;
        }
 #ifdef CONFIG_JBD_DEBUG
@@ -179,18 +197,24 @@ flags_err:
                }
                return -ENOTTY;
        case EXT3_IOC_SETRSVSZ: {
+                int err;
                if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
                        return -ENOTTY;
-                if (IS_RDONLY(inode))
+                err = mnt_want_write(filp->f_path.mnt);
-                        return -EROFS;
+                if (err)
+                        return err;
-                if (!is_owner_or_cap(inode))
+                if (!is_owner_or_cap(inode)) {
-                        return -EACCES;
+                        err = -EACCES;
+                        goto setrsvsz_out;
+                }
-                if (get_user(rsv_window_size, (int __user *)arg))
+                if (get_user(rsv_window_size, (int __user *)arg)) {
-                        return -EFAULT;
+                        err = -EFAULT;
+                        goto setrsvsz_out;
+                }
                if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS)
                        rsv_window_size = EXT3_MAX_RESERVE_BLOCKS;
@@ -208,7 +232,9 @@ flags_err:
                        rsv->rsv_goal_size = rsv_window_size;
                }
                mutex_unlock(&ei->truncate_mutex);
-                return 0;
+setrsvsz_out:
+                mnt_drop_write(filp->f_path.mnt);
+                return err;
        }
        case EXT3_IOC_GROUP_EXTEND: {
                ext3_fsblk_t n_blocks_count;
@@ -218,17 +244,20 @@ flags_err:
                if (!capable(CAP_SYS_RESOURCE))
                        return -EPERM;
-                if (IS_RDONLY(inode))
+                err = mnt_want_write(filp->f_path.mnt);
-                        return -EROFS;
+                if (err)
+                        return err;
-                if (get_user(n_blocks_count, (__u32 __user *)arg))
-                        return -EFAULT;
+                if (get_user(n_blocks_count, (__u32 __user *)arg)) {
+                        err = -EFAULT;
+                        goto group_extend_out;
+                }
                err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
                journal_lock_updates(EXT3_SB(sb)->s_journal);
                journal_flush(EXT3_SB(sb)->s_journal);
                journal_unlock_updates(EXT3_SB(sb)->s_journal);
+group_extend_out:
+                mnt_drop_write(filp->f_path.mnt);
                return err;
        }
        case EXT3_IOC_GROUP_ADD: {
@@ -239,18 +268,22 @@ flags_err:
                if (!capable(CAP_SYS_RESOURCE))
                        return -EPERM;
-                if (IS_RDONLY(inode))
+                err = mnt_want_write(filp->f_path.mnt);
-                        return -EROFS;
+                if (err)
+                        return err;
                if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg,
-                                sizeof(input)))
+                                sizeof(input))) {
-                        return -EFAULT;
+                        err = -EFAULT;
+                        goto group_add_out;
+                }
                err = ext3_group_add(sb, &input);
                journal_lock_updates(EXT3_SB(sb)->s_journal);
                journal_flush(EXT3_SB(sb)->s_journal);
                journal_unlock_updates(EXT3_SB(sb)->s_journal);
+group_add_out:
+                mnt_drop_write(filp->f_path.mnt);
                return err;
        }
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 2ed7c37f897e..25b13ede8086 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -15,6 +15,7 @@
 #include <linux/time.h>
 #include <linux/compat.h>
 #include <linux/smp_lock.h>
+#include <linux/mount.h>
 #include <asm/uaccess.h>
 int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
@@ -38,24 +39,25 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                unsigned int oldflags;
                unsigned int jflag;
-                if (IS_RDONLY(inode))
-                        return -EROFS;
                if (!is_owner_or_cap(inode))
                        return -EACCES;
                if (get_user(flags, (int __user *) arg))
                        return -EFAULT;
+                err = mnt_want_write(filp->f_path.mnt);
+                if (err)
+                        return err;
                if (!S_ISDIR(inode->i_mode))
                        flags &= ~EXT4_DIRSYNC_FL;
+                err = -EPERM;
                mutex_lock(&inode->i_mutex);
                /* Is it quota file? Do not allow user to mess with it */
-                if (IS_NOQUOTA(inode)) {
+                if (IS_NOQUOTA(inode))
-                        mutex_unlock(&inode->i_mutex);
+                        goto flags_out;
-                        return -EPERM;
-                }
                oldflags = ei->i_flags;
                /* The JOURNAL_DATA flag is modifiable only by root */
@@ -68,10 +70,8 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                 * This test looks nicer. Thanks to Pauline Middelink
                 */
                if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
-                        if (!capable(CAP_LINUX_IMMUTABLE)) {
+                        if (!capable(CAP_LINUX_IMMUTABLE))
-                                mutex_unlock(&inode->i_mutex);
+                                goto flags_out;
-                                return -EPERM;
-                        }
                }
                /*
@@ -79,17 +79,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                 * the relevant capability.
                 */
                if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
-                        if (!capable(CAP_SYS_RESOURCE)) {
+                        if (!capable(CAP_SYS_RESOURCE))
-                                mutex_unlock(&inode->i_mutex);
+                                goto flags_out;
-                                return -EPERM;
-                        }
                }
                handle = ext4_journal_start(inode, 1);
                if (IS_ERR(handle)) {
-                        mutex_unlock(&inode->i_mutex);
+                        err = PTR_ERR(handle);
-                        return PTR_ERR(handle);
+                        goto flags_out;
                }
                if (IS_SYNC(inode))
                        handle->h_sync = 1;
@@ -107,14 +104,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                err = ext4_mark_iloc_dirty(handle, inode, &iloc);
 flags_err:
                ext4_journal_stop(handle);
-                if (err) {
+                if (err)
-                        mutex_unlock(&inode->i_mutex);
+                        goto flags_out;
-                        return err;
-                }
                if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
                        err = ext4_change_inode_journal_flag(inode, jflag);
+flags_out:
                mutex_unlock(&inode->i_mutex);
+                mnt_drop_write(filp->f_path.mnt);
                return err;
        }
        case EXT4_IOC_GETVERSION:
@@ -129,14 +126,20 @@ flags_err:
                if (!is_owner_or_cap(inode))
                        return -EPERM;
-                if (IS_RDONLY(inode))
-                        return -EROFS;
+                err = mnt_want_write(filp->f_path.mnt);
-                if (get_user(generation, (int __user *) arg))
+                if (err)
-                        return -EFAULT;
+                        return err;
+                if (get_user(generation, (int __user *) arg)) {
+                        err = -EFAULT;
+                        goto setversion_out;
+                }
                handle = ext4_journal_start(inode, 1);
-                if (IS_ERR(handle))
+                if (IS_ERR(handle)) {
-                        return PTR_ERR(handle);
+                        err = PTR_ERR(handle);
+                        goto setversion_out;
+                }
                err = ext4_reserve_inode_write(handle, inode, &iloc);
                if (err == 0) {
                        inode->i_ctime = ext4_current_time(inode);
@@ -144,6 +147,8 @@ flags_err:
                        err = ext4_mark_iloc_dirty(handle, inode, &iloc);
                }
                ext4_journal_stop(handle);
+setversion_out:
+                mnt_drop_write(filp->f_path.mnt);
                return err;
        }
 #ifdef CONFIG_JBD2_DEBUG
@@ -179,19 +184,21 @@ flags_err:
                }
                return -ENOTTY;
        case EXT4_IOC_SETRSVSZ: {
+                int err;
                if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
                        return -ENOTTY;
-                if (IS_RDONLY(inode))
-                        return -EROFS;
                if (!is_owner_or_cap(inode))
                        return -EACCES;
                if (get_user(rsv_window_size, (int __user *)arg))
                        return -EFAULT;
+                err = mnt_want_write(filp->f_path.mnt);
+                if (err)
+                        return err;
                if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS)
                        rsv_window_size = EXT4_MAX_RESERVE_BLOCKS;
@@ -208,6 +215,7 @@ flags_err:
                        rsv->rsv_goal_size = rsv_window_size;
                }
                up_write(&ei->i_data_sem);
+                mnt_drop_write(filp->f_path.mnt);
                return 0;
        }
        case EXT4_IOC_GROUP_EXTEND: {
@@ -218,16 +226,18 @@ flags_err:
                if (!capable(CAP_SYS_RESOURCE))
                        return -EPERM;
-                if (IS_RDONLY(inode))
-                        return -EROFS;
                if (get_user(n_blocks_count, (__u32 __user *)arg))
                        return -EFAULT;
+                err = mnt_want_write(filp->f_path.mnt);
+                if (err)
+                        return err;
                err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
                jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
                jbd2_journal_flush(EXT4_SB(sb)->s_journal);
                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+                mnt_drop_write(filp->f_path.mnt);
                return err;
        }
@@ -239,17 +249,19 @@ flags_err:
                if (!capable(CAP_SYS_RESOURCE))
                        return -EPERM;
-                if (IS_RDONLY(inode))
-                        return -EROFS;
                if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
                                sizeof(input)))
                        return -EFAULT;
+                err = mnt_want_write(filp->f_path.mnt);
+                if (err)
+                        return err;
                err = ext4_group_add(sb, &input);
                jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
                jbd2_journal_flush(EXT4_SB(sb)->s_journal);
                jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+                mnt_drop_write(filp->f_path.mnt);
                return err;
        }
diff --git a/fs/fat/file.c b/fs/fat/file.c
index c614175876e0..2a3bed967041 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -8,6 +8,7 @@
 #include <linux/capability.h>
 #include <linux/module.h>
+#include <linux/mount.h>
 #include <linux/time.h>
 #include <linux/msdos_fs.h>
 #include <linux/smp_lock.h>
@@ -46,10 +47,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
                mutex_lock(&inode->i_mutex);
-                if (IS_RDONLY(inode)) {
+                err = mnt_want_write(filp->f_path.mnt);
-                        err = -EROFS;
+                if (err)
-                        goto up;
+                        goto up_no_drop_write;
-                }
                /*
                 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
@@ -105,7 +105,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
                MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED;
                mark_inode_dirty(inode);
-        up:
+up:
+                mnt_drop_write(filp->f_path.mnt);
+up_no_drop_write:
                mutex_unlock(&inode->i_mutex);
                return err;
        }
diff --git a/fs/file_table.c b/fs/file_table.c
index 986ff4ed0a7c..7a0a9b872251 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -42,6 +42,7 @@ static inline void file_free_rcu(struct rcu_head *head)
 static inline void file_free(struct file *f)
 {
        percpu_counter_dec(&nr_files);
+        file_check_state(f);
        call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
 }
@@ -199,6 +200,18 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
        file->f_mapping = dentry->d_inode->i_mapping;
        file->f_mode = mode;
        file->f_op = fop;
+        /*
+         * These mounts don't really matter in practice
+         * for r/o bind mounts.  They aren't userspace-
+         * visible.  We do this for consistency, and so
+         * that we can do debugging checks at __fput()
+         */
+        if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
+                file_take_write(file);
+                error = mnt_want_write(mnt);
+                WARN_ON(error);
+        }
        return error;
 }
 EXPORT_SYMBOL(init_file);
@@ -211,6 +224,31 @@ void fput(struct file *file)
 EXPORT_SYMBOL(fput);
+/**
+ * drop_file_write_access - give up ability to write to a file
+ * @file: the file to which we will stop writing
+ *
+ * This is a central place which will give up the ability
+ * to write to @file, along with access to write through
+ * its vfsmount.
+ */
+void drop_file_write_access(struct file *file)
+{
+        struct vfsmount *mnt = file->f_path.mnt;
+        struct dentry *dentry = file->f_path.dentry;
+        struct inode *inode = dentry->d_inode;
+        put_write_access(inode);
+        if (special_file(inode->i_mode))
+                return;
+        if (file_check_writeable(file) != 0)
+                return;
+        mnt_drop_write(mnt);
+        file_release_write(file);
+}
+EXPORT_SYMBOL_GPL(drop_file_write_access);
 /* __fput is called from task context when aio completion releases the last
 * last use of a struct file *.  Do not use otherwise.
 */
@@ -236,10 +274,10 @@ void __fput(struct file *file)
        if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
                cdev_put(inode->i_cdev);
        fops_put(file->f_op);
-        if (file->f_mode & FMODE_WRITE)
-                put_write_access(inode);
        put_pid(file->f_owner.pid);
        file_kill(file);
+        if (file->f_mode & FMODE_WRITE)
+                drop_file_write_access(file);
        file->f_path.dentry = NULL;
        file->f_path.mnt = NULL;
        file_free(file);
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index b60c0affbec5..f457d2ca51ab 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -14,6 +14,7 @@
 #include <linux/capability.h>
 #include <linux/fs.h>
+#include <linux/mount.h>
 #include <linux/sched.h>
 #include <linux/xattr.h>
 #include <asm/uaccess.h>
@@ -35,25 +36,32 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
                        flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */
                return put_user(flags, (int __user *)arg);
        case HFSPLUS_IOC_EXT2_SETFLAGS: {
-                if (IS_RDONLY(inode))
+                int err = 0;
-                        return -EROFS;
+                err = mnt_want_write(filp->f_path.mnt);
+                if (err)
-                if (!is_owner_or_cap(inode))
+                        return err;
-                        return -EACCES;
+                if (!is_owner_or_cap(inode)) {
-                if (get_user(flags, (int __user *)arg))
+                        err = -EACCES;
-                        return -EFAULT;
+                        goto setflags_out;
+                }
+                if (get_user(flags, (int __user *)arg)) {
+                        err = -EFAULT;
+                        goto setflags_out;
+                }
                if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) ||
                    HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) {
-                        if (!capable(CAP_LINUX_IMMUTABLE))
+                        if (!capable(CAP_LINUX_IMMUTABLE)) {
-                                return -EPERM;
+                                err = -EPERM;
+                                goto setflags_out;
+                        }
                }
                /* don't silently ignore unsupported ext2 flags */
-                if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL))
+                if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) {
-                        return -EOPNOTSUPP;
+                        err = -EOPNOTSUPP;
+                        goto setflags_out;
+                }
                if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */
                        inode->i_flags |= S_IMMUTABLE;
                        HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE;
@@ -75,7 +83,9 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
                inode->i_ctime = CURRENT_TIME_SEC;
                mark_inode_dirty(inode);
-                return 0;
+setflags_out:
+                mnt_drop_write(filp->f_path.mnt);
+                return err;
        }
        default:
                return -ENOTTY;
diff --git a/fs/inode.c b/fs/inode.c
index 53245ffcf93d..27ee1af50d02 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1199,42 +1199,37 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
        struct inode *inode = dentry->d_inode;
        struct timespec now;
-        if (inode->i_flags & S_NOATIME)
+        if (mnt_want_write(mnt))
                return;
+        if (inode->i_flags & S_NOATIME)
+                goto out;
        if (IS_NOATIME(inode))
-                return;
+                goto out;
        if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
-                return;
+                goto out;
-        /*
+        if (mnt->mnt_flags & MNT_NOATIME)
-         * We may have a NULL vfsmount when coming from NFSD
+                goto out;
-         */
+        if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
-        if (mnt) {
+                goto out;
-                if (mnt->mnt_flags & MNT_NOATIME)
+        if (mnt->mnt_flags & MNT_RELATIME) {
-                        return;
+                /*
-                if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
+                 * With relative atime, only update atime if the previous
-                        return;
+                 * atime is earlier than either the ctime or mtime.
+                 */
-                if (mnt->mnt_flags & MNT_RELATIME) {
+                if (timespec_compare(&inode->i_mtime, &inode->i_atime) < 0 &&
-                        /*
+                    timespec_compare(&inode->i_ctime, &inode->i_atime) < 0)
-                         * With relative atime, only update atime if the
+                        goto out;
-                         * previous atime is earlier than either the ctime or
-                         * mtime.
-                         */
-                        if (timespec_compare(&inode->i_mtime,
-                                                &inode->i_atime) < 0 &&
-                            timespec_compare(&inode->i_ctime,
-                                                &inode->i_atime) < 0)
-                                return;
-                }
        }
        now = current_fs_time(inode->i_sb);
        if (timespec_equal(&inode->i_atime, &now))
-                return;
+                goto out;
        inode->i_atime = now;
        mark_inode_dirty_sync(inode);
+out:
+        mnt_drop_write(mnt);
 }
 EXPORT_SYMBOL(touch_atime);
@@ -1255,10 +1250,13 @@ void file_update_time(struct file *file)
        struct inode *inode = file->f_path.dentry->d_inode;
        struct timespec now;
        int sync_it = 0;
+        int err;
        if (IS_NOCMTIME(inode))
                return;
-        if (IS_RDONLY(inode))
+        err = mnt_want_write(file->f_path.mnt);
+        if (err)
                return;
        now = current_fs_time(inode->i_sb);
@@ -1279,6 +1277,7 @@ void file_update_time(struct file *file)
        if (sync_it)
                mark_inode_dirty_sync(inode);
+        mnt_drop_write(file->f_path.mnt);
 }
 EXPORT_SYMBOL(file_update_time);
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index a1f8e375ad21..afe222bf300f 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -8,6 +8,7 @@
 #include <linux/fs.h>
 #include <linux/ctype.h>
 #include <linux/capability.h>
+#include <linux/mount.h>
 #include <linux/time.h>
 #include <linux/sched.h>
 #include <asm/current.h>
@@ -65,23 +66,30 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                return put_user(flags, (int __user *) arg);
        case JFS_IOC_SETFLAGS: {
                unsigned int oldflags;
+                int err;
-                if (IS_RDONLY(inode))
+                err = mnt_want_write(filp->f_path.mnt);
-                        return -EROFS;
+                if (err)
+                        return err;
-                if (!is_owner_or_cap(inode))
+                if (!is_owner_or_cap(inode)) {
-                        return -EACCES;
+                        err = -EACCES;
+                        goto setflags_out;
-                if (get_user(flags, (int __user *) arg))
+                }
-                        return -EFAULT;
+                if (get_user(flags, (int __user *) arg)) {
+                        err = -EFAULT;
+                        goto setflags_out;
+                }
                flags = jfs_map_ext2(flags, 1);
                if (!S_ISDIR(inode->i_mode))
                        flags &= ~JFS_DIRSYNC_FL;
                /* Is it quota file? Do not allow user to mess with it */
-                if (IS_NOQUOTA(inode))
+                if (IS_NOQUOTA(inode)) {
-                        return -EPERM;
+                        err = -EPERM;
+                        goto setflags_out;
+                }
                /* Lock against other parallel changes of flags */
                mutex_lock(&inode->i_mutex);
@@ -98,7 +106,8 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
                        if (!capable(CAP_LINUX_IMMUTABLE)) {
                                mutex_unlock(&inode->i_mutex);
-                                return -EPERM;
+                                err = -EPERM;
+                                goto setflags_out;
                        }
                }
@@ -110,7 +119,9 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                mutex_unlock(&inode->i_mutex);
                inode->i_ctime = CURRENT_TIME_SEC;
                mark_inode_dirty(inode);
-                return 0;
+setflags_out:
+                mnt_drop_write(filp->f_path.mnt);
+                return err;
        }
        default:
                return -ENOTTY;
diff --git a/fs/namei.c b/fs/namei.c
index 8cf9bb9c2fc0..e179f71bfcb0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1623,8 +1623,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
                        return -EACCES;
                flag &= ~O_TRUNC;
-        } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE))
+        }
-                return -EROFS;
        error = vfs_permission(nd, acc_mode);
        if (error)
@@ -1677,7 +1676,12 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
        return 0;
 }
-static int open_namei_create(struct nameidata *nd, struct path *path,
+/*
+ * Be careful about ever adding any more callers of this
+ * function.  Its flags must be in the namei format, not
+ * what get passed to sys_open().
+ */
+static int __open_namei_create(struct nameidata *nd, struct path *path,
                                int flag, int mode)
 {
        int error;
@@ -1696,26 +1700,56 @@ static int open_namei_create(struct nameidata *nd, struct path *path,
 }
 /*
- *      open_namei()
+ * Note that while the flag value (low two bits) for sys_open means:
+ *      00 - read-only
+ *      01 - write-only
+ *      10 - read-write
+ *      11 - special
+ * it is changed into
+ *      00 - no permissions needed
+ *      01 - read-permission
+ *      10 - write-permission
+ *      11 - read-write
+ * for the internal routines (ie open_namei()/follow_link() etc)
+ * This is more logical, and also allows the 00 "no perm needed"
+ * to be used for symlinks (where the permissions are checked
+ * later).
 *
- * namei for open - this is in fact almost the whole open-routine.
+*/
- *
+static inline int open_to_namei_flags(int flag)
- * Note that the low bits of "flag" aren't the same as in the open
+{
- * system call - they are 00 - no permissions needed
+        if ((flag+1) & O_ACCMODE)
- *                        01 - read permission needed
+                flag++;
- *                        10 - write permission needed
+        return flag;
- *                        11 - read/write permissions needed
+}
- * which is a lot more logical, and also allows the "no perm" needed
- * for symlinks (where the permissions are checked later).
+static int open_will_write_to_fs(int flag, struct inode *inode)
- * SMP-safe
+{
+        /*
+         * We'll never write to the fs underlying
+         * a device file.
+         */
+        if (special_file(inode->i_mode))
+                return 0;
+        return (flag & O_TRUNC);
+}
+/*
+ * Note that the low bits of the passed in "open_flag"
+ * are not the same as in the local variable "flag". See
+ * open_to_namei_flags() for more details.
 */
-int open_namei(int dfd, const char *pathname, int flag,
+struct file *do_filp_open(int dfd, const char *pathname,
-                int mode, struct nameidata *nd)
+                int open_flag, int mode)
 {
+        struct file *filp;
+        struct nameidata nd;
        int acc_mode, error;
        struct path path;
        struct dentry *dir;
        int count = 0;
+        int will_write;
+        int flag = open_to_namei_flags(open_flag);
        acc_mode = ACC_MODE(flag);
@@ -1733,18 +1767,19 @@ int open_namei(int dfd, const char *pathname, int flag,
         */
        if (!(flag & O_CREAT)) {
                error = path_lookup_open(dfd, pathname, lookup_flags(flag),
-                                         nd, flag);
+                                         &nd, flag);
                if (error)
-                        return error;
+                        return ERR_PTR(error);
                goto ok;
        }
        /*
         * Create - we need to know the parent.
         */
-        error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode);
+        error = path_lookup_create(dfd, pathname, LOOKUP_PARENT,
+                                   &nd, flag, mode);
        if (error)
-                return error;
+                return ERR_PTR(error);
        /*
         * We have the parent and last component. First of all, check
@@ -1752,14 +1787,14 @@ int open_namei(int dfd, const char *pathname, int flag,
         * will not do.
         */
        error = -EISDIR;
-        if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
+        if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
                goto exit;
-        dir = nd->path.dentry;
+        dir = nd.path.dentry;
-        nd->flags &= ~LOOKUP_PARENT;
+        nd.flags &= ~LOOKUP_PARENT;
        mutex_lock(&dir->d_inode->i_mutex);
-        path.dentry = lookup_hash(nd);
+        path.dentry = lookup_hash(&nd);
-        path.mnt = nd->path.mnt;
+        path.mnt = nd.path.mnt;
 do_last:
        error = PTR_ERR(path.dentry);
@@ -1768,18 +1803,31 @@ do_last:
                goto exit;
        }
-        if (IS_ERR(nd->intent.open.file)) {
+        if (IS_ERR(nd.intent.open.file)) {
-                mutex_unlock(&dir->d_inode->i_mutex);
+                error = PTR_ERR(nd.intent.open.file);
-                error = PTR_ERR(nd->intent.open.file);
+                goto exit_mutex_unlock;
-                goto exit_dput;
        }
        /* Negative dentry, just create the file */
        if (!path.dentry->d_inode) {
-                error = open_namei_create(nd, &path, flag, mode);
+                /*
+                 * This write is needed to ensure that a
+                 * ro->rw transition does not occur between
+                 * the time when the file is created and when
+                 * a permanent write count is taken through
+                 * the 'struct file' in nameidata_to_filp().
+                 */
+                error = mnt_want_write(nd.path.mnt);
                if (error)
+                        goto exit_mutex_unlock;
+                error = __open_namei_create(&nd, &path, flag, mode);
+                if (error) {
+                        mnt_drop_write(nd.path.mnt);
                        goto exit;
-                return 0;
+                }
+                filp = nameidata_to_filp(&nd, open_flag);
+                mnt_drop_write(nd.path.mnt);
+                return filp;
        }
        /*
@@ -1804,23 +1852,52 @@ do_last:
        if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
                goto do_link;
-        path_to_nameidata(&path, nd);
+        path_to_nameidata(&path, &nd);
        error = -EISDIR;
        if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
                goto exit;
 ok:
-        error = may_open(nd, acc_mode, flag);
+        /*
-        if (error)
+         * Consider:
+         * 1. may_open() truncates a file
+         * 2. a rw->ro mount transition occurs
+         * 3. nameidata_to_filp() fails due to
+         *    the ro mount.
+         * That would be inconsistent, and should
+         * be avoided. Taking this mnt write here
+         * ensures that (2) can not occur.
+         */
+        will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
+        if (will_write) {
+                error = mnt_want_write(nd.path.mnt);
+                if (error)
+                        goto exit;
+        }
+        error = may_open(&nd, acc_mode, flag);
+        if (error) {
+                if (will_write)
+                        mnt_drop_write(nd.path.mnt);
                goto exit;
-        return 0;
+        }
+        filp = nameidata_to_filp(&nd, open_flag);
+        /*
+         * It is now safe to drop the mnt write
+         * because the filp has had a write taken
+         * on its behalf.
+         */
+        if (will_write)
+                mnt_drop_write(nd.path.mnt);
+        return filp;
+exit_mutex_unlock:
+        mutex_unlock(&dir->d_inode->i_mutex);
 exit_dput:
-        path_put_conditional(&path, nd);
+        path_put_conditional(&path, &nd);
 exit:
-        if (!IS_ERR(nd->intent.open.file))
+        if (!IS_ERR(nd.intent.open.file))
-                release_open_intent(nd);
+                release_open_intent(&nd);
-        path_put(&nd->path);
+        path_put(&nd.path);
-        return error;
+        return ERR_PTR(error);
 do_link:
        error = -ELOOP;
@@ -1836,43 +1913,60 @@ do_link:
         * stored in nd->last.name and we will have to putname() it when we
         * are done. Procfs-like symlinks just set LAST_BIND.
         */
-        nd->flags |= LOOKUP_PARENT;
+        nd.flags |= LOOKUP_PARENT;
-        error = security_inode_follow_link(path.dentry, nd);
+        error = security_inode_follow_link(path.dentry, &nd);
        if (error)
                goto exit_dput;
-        error = __do_follow_link(&path, nd);
+        error = __do_follow_link(&path, &nd);
        if (error) {
                /* Does someone understand code flow here? Or it is only
                 * me so stupid? Anathema to whoever designed this non-sense
                 * with "intent.open".
                 */
-                release_open_intent(nd);
+                release_open_intent(&nd);
-                return error;
+                return ERR_PTR(error);
        }
-        nd->flags &= ~LOOKUP_PARENT;
+        nd.flags &= ~LOOKUP_PARENT;
-        if (nd->last_type == LAST_BIND)
+        if (nd.last_type == LAST_BIND)
                goto ok;
        error = -EISDIR;
-        if (nd->last_type != LAST_NORM)
+        if (nd.last_type != LAST_NORM)
                goto exit;
-        if (nd->last.name[nd->last.len]) {
+        if (nd.last.name[nd.last.len]) {
-                __putname(nd->last.name);
+                __putname(nd.last.name);
                goto exit;
        }
        error = -ELOOP;
        if (count++==32) {
-                __putname(nd->last.name);
+                __putname(nd.last.name);
                goto exit;
        }
-        dir = nd->path.dentry;
+        dir = nd.path.dentry;
        mutex_lock(&dir->d_inode->i_mutex);
-        path.dentry = lookup_hash(nd);
+        path.dentry = lookup_hash(&nd);
-        path.mnt = nd->path.mnt;
+        path.mnt = nd.path.mnt;
-        __putname(nd->last.name);
+        __putname(nd.last.name);
        goto do_last;
 }
 /**
+ * filp_open - open file and return file pointer
+ *
+ * @filename:   path to open
+ * @flags:      open flags as per the open(2) second argument
+ * @mode:       mode for the new file if O_CREAT is set, else ignored
+ *
+ * This is the helper to open a file from kernelspace if you really
+ * have to.  But in generally you should not do this, so please move
+ * along, nothing to see here..
+ */
+struct file *filp_open(const char *filename, int flags, int mode)
+{
+        return do_filp_open(AT_FDCWD, filename, flags, mode);
+}
+EXPORT_SYMBOL(filp_open);
+/**
 * lookup_create - lookup a dentry, creating it if it doesn't exist
 * @nd: nameidata info
 * @is_dir: directory flag
@@ -1945,6 +2039,23 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
        return error;
 }
+static int may_mknod(mode_t mode)
+{
+        switch (mode & S_IFMT) {
+        case S_IFREG:
+        case S_IFCHR:
+        case S_IFBLK:
+        case S_IFIFO:
+        case S_IFSOCK:
+        case 0: /* zero mode translates to S_IFREG */
+                return 0;
+        case S_IFDIR:
+                return -EPERM;
+        default:
+                return -EINVAL;
+        }
+}
 asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
                                unsigned dev)
 {
@@ -1963,12 +2074,19 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
        if (error)
                goto out;
        dentry = lookup_create(&nd, 0);
-        error = PTR_ERR(dentry);
+        if (IS_ERR(dentry)) {
+                error = PTR_ERR(dentry);
+                goto out_unlock;
+        }
        if (!IS_POSIXACL(nd.path.dentry->d_inode))
                mode &= ~current->fs->umask;
-        if (!IS_ERR(dentry)) {
+        error = may_mknod(mode);
-                switch (mode & S_IFMT) {
+        if (error)
+                goto out_dput;
+        error = mnt_want_write(nd.path.mnt);
+        if (error)
+                goto out_dput;
+        switch (mode & S_IFMT) {
                case 0: case S_IFREG:
                        error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
                        break;
@@ -1979,14 +2097,11 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
                case S_IFIFO: case S_IFSOCK:
                        error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
                        break;
-                case S_IFDIR:
-                        error = -EPERM;
-                        break;
-                default:
-                        error = -EINVAL;
-                }
-                dput(dentry);
        }
+        mnt_drop_write(nd.path.mnt);
+out_dput:
+        dput(dentry);
+out_unlock:
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
        path_put(&nd.path);
 out:
@@ -2044,7 +2159,12 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
        if (!IS_POSIXACL(nd.path.dentry->d_inode))
                mode &= ~current->fs->umask;
+        error = mnt_want_write(nd.path.mnt);
+        if (error)
+                goto out_dput;
        error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
+        mnt_drop_write(nd.path.mnt);
+out_dput:
        dput(dentry);
 out_unlock:
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2151,7 +2271,12 @@ static long do_rmdir(int dfd, const char __user *pathname)
        error = PTR_ERR(dentry);
        if (IS_ERR(dentry))
                goto exit2;
+        error = mnt_want_write(nd.path.mnt);
+        if (error)
+                goto exit3;
        error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
+        mnt_drop_write(nd.path.mnt);
+exit3:
        dput(dentry);
 exit2:
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2232,7 +2357,11 @@ static long do_unlinkat(int dfd, const char __user *pathname)
                inode = dentry->d_inode;
                if (inode)
                        atomic_inc(&inode->i_count);
+                error = mnt_want_write(nd.path.mnt);
+                if (error)
+                        goto exit2;
                error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+                mnt_drop_write(nd.path.mnt);
        exit2:
                dput(dentry);
        }
@@ -2313,7 +2442,12 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
        if (IS_ERR(dentry))
                goto out_unlock;
+        error = mnt_want_write(nd.path.mnt);
+        if (error)
+                goto out_dput;
        error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO);
+        mnt_drop_write(nd.path.mnt);
+out_dput:
        dput(dentry);
 out_unlock:
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2408,7 +2542,12 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
        error = PTR_ERR(new_dentry);
        if (IS_ERR(new_dentry))
                goto out_unlock;
+        error = mnt_want_write(nd.path.mnt);
+        if (error)
+                goto out_dput;
        error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry);
+        mnt_drop_write(nd.path.mnt);
+out_dput:
        dput(new_dentry);
 out_unlock:
        mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2634,8 +2773,12 @@ static int do_rename(int olddfd, const char *oldname,
        if (new_dentry == trap)
                goto exit5;
+        error = mnt_want_write(oldnd.path.mnt);
+        if (error)
+                goto exit5;
        error = vfs_rename(old_dir->d_inode, old_dentry,
                                   new_dir->d_inode, new_dentry);
+        mnt_drop_write(oldnd.path.mnt);
 exit5:
        dput(new_dentry);
 exit4:
diff --git a/fs/namespace.c b/fs/namespace.c
index 94f026ec990a..678f7ce060f2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -17,6 +17,7 @@
 #include <linux/quotaops.h>
 #include <linux/acct.h>
 #include <linux/capability.h>
+#include <linux/cpumask.h>
 #include <linux/module.h>
 #include <linux/sysfs.h>
 #include <linux/seq_file.h>
@@ -55,6 +56,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
        return tmp & (HASH_SIZE - 1);
 }
+#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
 struct vfsmount *alloc_vfsmnt(const char *name)
 {
        struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -68,6 +71,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
                INIT_LIST_HEAD(&mnt->mnt_share);
                INIT_LIST_HEAD(&mnt->mnt_slave_list);
                INIT_LIST_HEAD(&mnt->mnt_slave);
+                atomic_set(&mnt->__mnt_writers, 0);
                if (name) {
                        int size = strlen(name) + 1;
                        char *newname = kmalloc(size, GFP_KERNEL);
@@ -80,6 +84,263 @@ struct vfsmount *alloc_vfsmnt(const char *name)
        return mnt;
 }
+/*
+ * Most r/o checks on a fs are for operations that take
+ * discrete amounts of time, like a write() or unlink().
+ * We must keep track of when those operations start
+ * (for permission checks) and when they end, so that
+ * we can determine when writes are able to occur to
+ * a filesystem.
+ */
+/*
+ * __mnt_is_readonly: check whether a mount is read-only
+ * @mnt: the mount to check for its write status
+ *
+ * This shouldn't be used directly ouside of the VFS.
+ * It does not guarantee that the filesystem will stay
+ * r/w, just that it is right *now*.  This can not and
+ * should not be used in place of IS_RDONLY(inode).
+ * mnt_want/drop_write() will _keep_ the filesystem
+ * r/w.
+ */
+int __mnt_is_readonly(struct vfsmount *mnt)
+{
+        if (mnt->mnt_flags & MNT_READONLY)
+                return 1;
+        if (mnt->mnt_sb->s_flags & MS_RDONLY)
+                return 1;
+        return 0;
+}
+EXPORT_SYMBOL_GPL(__mnt_is_readonly);
+struct mnt_writer {
+        /*
+         * If holding multiple instances of this lock, they
+         * must be ordered by cpu number.
+         */
+        spinlock_t lock;
+        struct lock_class_key lock_class; /* compiles out with !lockdep */
+        unsigned long count;
+        struct vfsmount *mnt;
+} ____cacheline_aligned_in_smp;
+static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
+static int __init init_mnt_writers(void)
+{
+        int cpu;
+        for_each_possible_cpu(cpu) {
+                struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
+                spin_lock_init(&writer->lock);
+                lockdep_set_class(&writer->lock, &writer->lock_class);
+                writer->count = 0;
+        }
+        return 0;
+}
+fs_initcall(init_mnt_writers);
+static void unlock_mnt_writers(void)
+{
+        int cpu;
+        struct mnt_writer *cpu_writer;
+        for_each_possible_cpu(cpu) {
+                cpu_writer = &per_cpu(mnt_writers, cpu);
+                spin_unlock(&cpu_writer->lock);
+        }
+}
+static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
+{
+        if (!cpu_writer->mnt)
+                return;
+        /*
+         * This is in case anyone ever leaves an invalid,
+         * old ->mnt and a count of 0.
+         */
+        if (!cpu_writer->count)
+                return;
+        atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
+        cpu_writer->count = 0;
+}
+ /*
+ * must hold cpu_writer->lock
+ */
+static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
+                                          struct vfsmount *mnt)
+{
+        if (cpu_writer->mnt == mnt)
+                return;
+        __clear_mnt_count(cpu_writer);
+        cpu_writer->mnt = mnt;
+}
+/*
+ * Most r/o checks on a fs are for operations that take
+ * discrete amounts of time, like a write() or unlink().
+ * We must keep track of when those operations start
+ * (for permission checks) and when they end, so that
+ * we can determine when writes are able to occur to
+ * a filesystem.
+ */
+/**
+ * mnt_want_write - get write access to a mount
+ * @mnt: the mount on which to take a write
+ *
+ * This tells the low-level filesystem that a write is
+ * about to be performed to it, and makes sure that
+ * writes are allowed before returning success.  When
+ * the write operation is finished, mnt_drop_write()
+ * must be called.  This is effectively a refcount.
+ */
+int mnt_want_write(struct vfsmount *mnt)
+{
+        int ret = 0;
+        struct mnt_writer *cpu_writer;
+        cpu_writer = &get_cpu_var(mnt_writers);
+        spin_lock(&cpu_writer->lock);
+        if (__mnt_is_readonly(mnt)) {
+                ret = -EROFS;
+                goto out;
+        }
+        use_cpu_writer_for_mount(cpu_writer, mnt);
+        cpu_writer->count++;
+out:
+        spin_unlock(&cpu_writer->lock);
+        put_cpu_var(mnt_writers);
+        return ret;
+}
+EXPORT_SYMBOL_GPL(mnt_want_write);
+static void lock_mnt_writers(void)
+{
+        int cpu;
+        struct mnt_writer *cpu_writer;
+        for_each_possible_cpu(cpu) {
+                cpu_writer = &per_cpu(mnt_writers, cpu);
+                spin_lock(&cpu_writer->lock);
+                __clear_mnt_count(cpu_writer);
+                cpu_writer->mnt = NULL;
+        }
+}
+/*
+ * These per-cpu write counts are not guaranteed to have
+ * matched increments and decrements on any given cpu.
+ * A file open()ed for write on one cpu and close()d on
+ * another cpu will imbalance this count.  Make sure it
+ * does not get too far out of whack.
+ */
+static void handle_write_count_underflow(struct vfsmount *mnt)
+{
+        if (atomic_read(&mnt->__mnt_writers) >=
+            MNT_WRITER_UNDERFLOW_LIMIT)
+                return;
+        /*
+         * It isn't necessary to hold all of the locks
+         * at the same time, but doing it this way makes
+         * us share a lot more code.
+         */
+        lock_mnt_writers();
+        /*
+         * vfsmount_lock is for mnt_flags.
+         */
+        spin_lock(&vfsmount_lock);
+        /*
+         * If coalescing the per-cpu writer counts did not
+         * get us back to a positive writer count, we have
+         * a bug.
+         */
+        if ((atomic_read(&mnt->__mnt_writers) < 0) &&
+            !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
+                printk(KERN_DEBUG "leak detected on mount(%p) writers "
+                                "count: %d\n",
+                        mnt, atomic_read(&mnt->__mnt_writers));
+                WARN_ON(1);
+                /* use the flag to keep the dmesg spam down */
+                mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
+        }
+        spin_unlock(&vfsmount_lock);
+        unlock_mnt_writers();
+}
+/**
+ * mnt_drop_write - give up write access to a mount
+ * @mnt: the mount on which to give up write access
+ *
+ * Tells the low-level filesystem that we are done
+ * performing writes to it.  Must be matched with
+ * mnt_want_write() call above.
+ */
+void mnt_drop_write(struct vfsmount *mnt)
+{
+        int must_check_underflow = 0;
+        struct mnt_writer *cpu_writer;
+        cpu_writer = &get_cpu_var(mnt_writers);
+        spin_lock(&cpu_writer->lock);
+        use_cpu_writer_for_mount(cpu_writer, mnt);
+        if (cpu_writer->count > 0) {
+                cpu_writer->count--;
+        } else {
+                must_check_underflow = 1;
+                atomic_dec(&mnt->__mnt_writers);
+        }
+        spin_unlock(&cpu_writer->lock);
+        /*
+         * Logically, we could call this each time,
+         * but the __mnt_writers cacheline tends to
+         * be cold, and makes this expensive.
+         */
+        if (must_check_underflow)
+                handle_write_count_underflow(mnt);
+        /*
+         * This could be done right after the spinlock
+         * is taken because the spinlock keeps us on
+         * the cpu, and disables preemption.  However,
+         * putting it here bounds the amount that
+         * __mnt_writers can underflow.  Without it,
+         * we could theoretically wrap __mnt_writers.
+         */
+        put_cpu_var(mnt_writers);
+}
+EXPORT_SYMBOL_GPL(mnt_drop_write);
+static int mnt_make_readonly(struct vfsmount *mnt)
+{
+        int ret = 0;
+        lock_mnt_writers();
+        /*
+         * With all the locks held, this value is stable
+         */
+        if (atomic_read(&mnt->__mnt_writers) > 0) {
+                ret = -EBUSY;
+                goto out;
+        }
+        /*
+         * nobody can do a successful mnt_want_write() with all
+         * of the counts in MNT_DENIED_WRITE and the locks held.
+         */
+        spin_lock(&vfsmount_lock);
+        if (!ret)
+                mnt->mnt_flags |= MNT_READONLY;
+        spin_unlock(&vfsmount_lock);
+out:
+        unlock_mnt_writers();
+        return ret;
+}
+static void __mnt_unmake_readonly(struct vfsmount *mnt)
+{
+        spin_lock(&vfsmount_lock);
+        mnt->mnt_flags &= ~MNT_READONLY;
+        spin_unlock(&vfsmount_lock);
+}
 int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
 {
        mnt->mnt_sb = sb;
@@ -271,7 +532,36 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
 static inline void __mntput(struct vfsmount *mnt)
 {
+        int cpu;
        struct super_block *sb = mnt->mnt_sb;
+        /*
+         * We don't have to hold all of the locks at the
+         * same time here because we know that we're the
+         * last reference to mnt and that no new writers
+         * can come in.
+         */
+        for_each_possible_cpu(cpu) {
+                struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
+                if (cpu_writer->mnt != mnt)
+                        continue;
+                spin_lock(&cpu_writer->lock);
+                atomic_add(cpu_writer->count, &mnt->__mnt_writers);
+                cpu_writer->count = 0;
+                /*
+                 * Might as well do this so that no one
+                 * ever sees the pointer and expects
+                 * it to be valid.
+                 */
+                cpu_writer->mnt = NULL;
+                spin_unlock(&cpu_writer->lock);
+        }
+        /*
+         * This probably indicates that somebody messed
+         * up a mnt_want/drop_write() pair.  If this
+         * happens, the filesystem was probably unable
+         * to make r/w->r/o transitions.
+         */
+        WARN_ON(atomic_read(&mnt->__mnt_writers));
        dput(mnt->mnt_root);
        free_vfsmnt(mnt);
        deactivate_super(sb);
@@ -417,7 +707,7 @@ static int show_vfsmnt(struct seq_file *m, void *v)
                seq_putc(m, '.');
                mangle(m, mnt->mnt_sb->s_subtype);
        }
-        seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
+        seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
        for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
                if (mnt->mnt_sb->s_flags & fs_infop->flag)
                        seq_puts(m, fs_infop->str);
@@ -1019,6 +1309,23 @@ out:
        return err;
 }
+static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
+{
+        int error = 0;
+        int readonly_request = 0;
+        if (ms_flags & MS_RDONLY)
+                readonly_request = 1;
+        if (readonly_request == __mnt_is_readonly(mnt))
+                return 0;
+        if (readonly_request)
+                error = mnt_make_readonly(mnt);
+        else
+                __mnt_unmake_readonly(mnt);
+        return error;
+}
 /*
 * change filesystem flags. dir should be a physical root of filesystem.
 * If you've mounted a non-root directory somewhere and want to do remount
@@ -1041,7 +1348,10 @@ static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags,
                return -EINVAL;
        down_write(&sb->s_umount);
-        err = do_remount_sb(sb, flags, data, 0);
+        if (flags & MS_BIND)
+                err = change_mount_flags(nd->path.mnt, flags);
+        else
+                err = do_remount_sb(sb, flags, data, 0);
        if (!err)
                nd->path.mnt->mnt_flags = mnt_flags;
        up_write(&sb->s_umount);
@@ -1425,6 +1735,8 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
                mnt_flags |= MNT_NODIRATIME;
        if (flags & MS_RELATIME)
                mnt_flags |= MNT_RELATIME;
+        if (flags & MS_RDONLY)
+                mnt_flags |= MNT_READONLY;
        flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
                   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index c67b4bdcf719..ad8f167e54bc 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -14,6 +14,7 @@
 #include <linux/ioctl.h>
 #include <linux/time.h>
 #include <linux/mm.h>
+#include <linux/mount.h>
 #include <linux/highuid.h>
 #include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
@@ -261,7 +262,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
 }
 #endif /* CONFIG_NCPFS_NLS */
-int ncp_ioctl(struct inode *inode, struct file *filp,
+static int __ncp_ioctl(struct inode *inode, struct file *filp,
              unsigned int cmd, unsigned long arg)
 {
        struct ncp_server *server = NCP_SERVER(inode);
@@ -822,6 +823,57 @@ outrel:
        return -EINVAL;
 }
+static int ncp_ioctl_need_write(unsigned int cmd)
+{
+        switch (cmd) {
+        case NCP_IOC_GET_FS_INFO:
+        case NCP_IOC_GET_FS_INFO_V2:
+        case NCP_IOC_NCPREQUEST:
+        case NCP_IOC_SETDENTRYTTL:
+        case NCP_IOC_SIGN_INIT:
+        case NCP_IOC_LOCKUNLOCK:
+        case NCP_IOC_SET_SIGN_WANTED:
+                return 1;
+        case NCP_IOC_GETOBJECTNAME:
+        case NCP_IOC_SETOBJECTNAME:
+        case NCP_IOC_GETPRIVATEDATA:
+        case NCP_IOC_SETPRIVATEDATA:
+        case NCP_IOC_SETCHARSETS:
+        case NCP_IOC_GETCHARSETS:
+        case NCP_IOC_CONN_LOGGED_IN:
+        case NCP_IOC_GETDENTRYTTL:
+        case NCP_IOC_GETMOUNTUID2:
+        case NCP_IOC_SIGN_WANTED:
+        case NCP_IOC_GETROOT:
+        case NCP_IOC_SETROOT:
+                return 0;
+        default:
+                /* unkown IOCTL command, assume write */
+                return 1;
+        }
+}
+int ncp_ioctl(struct inode *inode, struct file *filp,
+              unsigned int cmd, unsigned long arg)
+{
+        int ret;
+        if (ncp_ioctl_need_write(cmd)) {
+                /*
+                 * inside the ioctl(), any failures which
+                 * are because of file_permission() are
+                 * -EACCESS, so it seems consistent to keep
+                 *  that here.
+                 */
+                if (mnt_want_write(filp->f_path.mnt))
+                        return -EACCES;
+        }
+        ret = __ncp_ioctl(inode, filp, cmd, arg);
+        if (ncp_ioctl_need_write(cmd))
+                mnt_drop_write(filp->f_path.mnt);
+        return ret;
+}
 #ifdef CONFIG_COMPAT
 long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 6cea7479c5b4..d9e30ac2798d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -967,7 +967,8 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd)
        if (nd->flags & LOOKUP_DIRECTORY)
                return 0;
        /* Are we trying to write to a read only partition? */
-        if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
+        if (__mnt_is_readonly(nd->path.mnt) &&
+            (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
                return 0;
        return 1;
 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c593db047d8b..c309c881bd4e 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -658,14 +658,19 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                        return status;
                }
        }
+        status = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt);
+        if (status)
+                return status;
        status = nfs_ok;
        if (setattr->sa_acl != NULL)
                status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh,
                                            setattr->sa_acl);
        if (status)
-                return status;
+                goto out;
        status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
                                0, (time_t)0);
+out:
+        mnt_drop_write(cstate->current_fh.fh_export->ex_path.mnt);
        return status;
 }
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 1ff90625860f..145b3c877a27 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -46,6 +46,7 @@
 #include <linux/scatterlist.h>
 #include <linux/crypto.h>
 #include <linux/sched.h>
+#include <linux/mount.h>
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
@@ -154,7 +155,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
                dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
                goto out_put;
        }
+        status = mnt_want_write(rec_dir.path.mnt);
+        if (status)
+                goto out_put;
        status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU);
+        mnt_drop_write(rec_dir.path.mnt);
 out_put:
        dput(dentry);
 out_unlock:
@@ -313,12 +318,17 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
        if (!rec_dir_init || !clp->cl_firststate)
                return;
+        status = mnt_want_write(rec_dir.path.mnt);
+        if (status)
+                goto out;
        clp->cl_firststate = 0;
        nfs4_save_user(&uid, &gid);
        status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
        nfs4_reset_user(uid, gid);
        if (status == 0)
                nfsd4_sync_rec_dir();
+        mnt_drop_write(rec_dir.path.mnt);
+out:
        if (status)
                printk("NFSD: Failed to remove expired client state directory"
                                " %.*s\n", HEXDIR_LEN, clp->cl_recdir);
@@ -347,13 +357,17 @@ nfsd4_recdir_purge_old(void) {
        if (!rec_dir_init)
                return;
+        status = mnt_want_write(rec_dir.path.mnt);
+        if (status)
+                goto out;
        status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old);
        if (status == 0)
                nfsd4_sync_rec_dir();
+        mnt_drop_write(rec_dir.path.mnt);
+out:
        if (status)
                printk("nfsd4: failed to purge old clients from recovery"
                        " directory %s\n", rec_dir.path.dentry->d_name.name);
-        return;
 }
 static int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bcb97d8e8b8b..81a75f3081f4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -41,6 +41,7 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/cache.h>
+#include <linux/file.h>
 #include <linux/mount.h>
 #include <linux/workqueue.h>
 #include <linux/smp_lock.h>
@@ -1239,7 +1240,7 @@ static inline void
 nfs4_file_downgrade(struct file *filp, unsigned int share_access)
 {
        if (share_access & NFS4_SHARE_ACCESS_WRITE) {
-                put_write_access(filp->f_path.dentry->d_inode);
+                drop_file_write_access(filp);
                filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE;
        }
 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 46f59d5365a0..304bf5f643c9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1255,23 +1255,35 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        err = 0;
        switch (type) {
        case S_IFREG:
+                host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+                if (host_err)
+                        goto out_nfserr;
                host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
                break;
        case S_IFDIR:
+                host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+                if (host_err)
+                        goto out_nfserr;
                host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
                break;
        case S_IFCHR:
        case S_IFBLK:
        case S_IFIFO:
        case S_IFSOCK:
+                host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+                if (host_err)
+                        goto out_nfserr;
                host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
                break;
        default:
                printk("nfsd: bad file type %o in nfsd_create\n", type);
                host_err = -EINVAL;
+                goto out_nfserr;
        }
-        if (host_err < 0)
+        if (host_err < 0) {
+                mnt_drop_write(fhp->fh_export->ex_path.mnt);
                goto out_nfserr;
+        }
        if (EX_ISSYNC(fhp->fh_export)) {
                err = nfserrno(nfsd_sync_dir(dentry));
@@ -1282,6 +1294,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        err2 = nfsd_create_setattr(rqstp, resfhp, iap);
        if (err2)
                err = err2;
+        mnt_drop_write(fhp->fh_export->ex_path.mnt);
        /*
         * Update the file handle to get the new inode info.
         */
@@ -1359,6 +1372,9 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
                v_atime = verifier[1]&0x7fffffff;
        }
        
+        host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+        if (host_err)
+                goto out_nfserr;
        if (dchild->d_inode) {
                err = 0;
@@ -1390,12 +1406,15 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
                case NFS3_CREATE_GUARDED:
                        err = nfserr_exist;
                }
+                mnt_drop_write(fhp->fh_export->ex_path.mnt);
                goto out;
        }
        host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
-        if (host_err < 0)
+        if (host_err < 0) {
+                mnt_drop_write(fhp->fh_export->ex_path.mnt);
                goto out_nfserr;
+        }
        if (created)
                *created = 1;
@@ -1420,6 +1439,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (err2)
                err = err2;
+        mnt_drop_write(fhp->fh_export->ex_path.mnt);
        /*
         * Update the filehandle to get the new inode info.
         */
@@ -1522,6 +1542,10 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (iap && (iap->ia_valid & ATTR_MODE))
                mode = iap->ia_mode & S_IALLUGO;
+        host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+        if (host_err)
+                goto out_nfserr;
        if (unlikely(path[plen] != 0)) {
                char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
                if (path_alloced == NULL)
@@ -1542,6 +1566,8 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
        err = nfserrno(host_err);
        fh_unlock(fhp);
+        mnt_drop_write(fhp->fh_export->ex_path.mnt);
        cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
        dput(dnew);
        if (err==0) err = cerr;
@@ -1592,6 +1618,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
        dold = tfhp->fh_dentry;
        dest = dold->d_inode;
+        host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt);
+        if (host_err) {
+                err = nfserrno(host_err);
+                goto out_dput;
+        }
        host_err = vfs_link(dold, dirp, dnew);
        if (!host_err) {
                if (EX_ISSYNC(ffhp->fh_export)) {
@@ -1605,7 +1636,8 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
                else
                        err = nfserrno(host_err);
        }
+        mnt_drop_write(tfhp->fh_export->ex_path.mnt);
+out_dput:
        dput(dnew);
 out_unlock:
        fh_unlock(ffhp);
@@ -1678,13 +1710,20 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        if (ndentry == trap)
                goto out_dput_new;
-#ifdef MSNFS
+        if (svc_msnfs(ffhp) &&
-        if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
                ((atomic_read(&odentry->d_count) > 1)
                 || (atomic_read(&ndentry->d_count) > 1))) {
                        host_err = -EPERM;
-        } else
+                        goto out_dput_new;
-#endif
+        }
+        host_err = -EXDEV;
+        if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
+                goto out_dput_new;
+        host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt);
+        if (host_err)
+                goto out_dput_new;
        host_err = vfs_rename(fdir, odentry, tdir, ndentry);
        if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
                host_err = nfsd_sync_dir(tdentry);
@@ -1692,6 +1731,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
                        host_err = nfsd_sync_dir(fdentry);
        }
+        mnt_drop_write(ffhp->fh_export->ex_path.mnt);
 out_dput_new:
        dput(ndentry);
 out_dput_old:
@@ -1750,6 +1791,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        if (!type)
                type = rdentry->d_inode->i_mode & S_IFMT;
+        host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+        if (host_err)
+                goto out_nfserr;
        if (type != S_IFDIR) { /* It's UNLINK */
 #ifdef MSNFS
                if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
@@ -1765,10 +1810,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        dput(rdentry);
        if (host_err)
-                goto out_nfserr;
+                goto out_drop;
        if (EX_ISSYNC(fhp->fh_export))
                host_err = nfsd_sync_dir(dentry);
+out_drop:
+        mnt_drop_write(fhp->fh_export->ex_path.mnt);
 out_nfserr:
        err = nfserrno(host_err);
 out:
@@ -1865,7 +1912,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
                inode->i_mode,
                IS_IMMUTABLE(inode)?    " immut" : "",
                IS_APPEND(inode)?       " append" : "",
-                IS_RDONLY(inode)?       " ro" : "");
+                __mnt_is_readonly(exp->ex_path.mnt)?    " ro" : "");
        dprintk("      owner %d/%d user %d/%d\n",
                inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
 #endif
@@ -1876,7 +1923,8 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
         */
        if (!(acc & MAY_LOCAL_ACCESS))
                if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
-                        if (exp_rdonly(rqstp, exp) || IS_RDONLY(inode))
+                        if (exp_rdonly(rqstp, exp) ||
+                            __mnt_is_readonly(exp->ex_path.mnt))
                                return nfserr_rofs;
                        if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
                                return nfserr_perm;
@@ -2039,6 +2087,9 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
        } else
                size = 0;
+        error = mnt_want_write(fhp->fh_export->ex_path.mnt);
+        if (error)
+                goto getout;
        if (size)
                error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0);
        else {
@@ -2050,6 +2101,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
                                error = 0;
                }
        }
+        mnt_drop_write(fhp->fh_export->ex_path.mnt);
 getout:
        kfree(value);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index b413166dd163..7b142f0ce995 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -60,10 +60,6 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
                goto bail;
        }
-        status = -EROFS;
-        if (IS_RDONLY(inode))
-                goto bail_unlock;
        status = -EACCES;
        if (!is_owner_or_cap(inode))
                goto bail_unlock;
@@ -134,8 +130,13 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (get_user(flags, (int __user *) arg))
                        return -EFAULT;
-                return ocfs2_set_inode_attr(inode, flags,
+                status = mnt_want_write(filp->f_path.mnt);
+                if (status)
+                        return status;
+                status = ocfs2_set_inode_attr(inode, flags,
                        OCFS2_FL_MODIFIABLE);
+                mnt_drop_write(filp->f_path.mnt);
+                return status;
        case OCFS2_IOC_RESVSP:
        case OCFS2_IOC_RESVSP64:
        case OCFS2_IOC_UNRESVSP:
diff --git a/fs/open.c b/fs/open.c
index 3fa4e4ffce4c..b70e7666bb2c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -244,21 +244,21 @@ static long do_sys_truncate(const char __user * path, loff_t length)
        if (!S_ISREG(inode->i_mode))
                goto dput_and_out;
-        error = vfs_permission(&nd, MAY_WRITE);
+        error = mnt_want_write(nd.path.mnt);
        if (error)
                goto dput_and_out;
-        error = -EROFS;
+        error = vfs_permission(&nd, MAY_WRITE);
-        if (IS_RDONLY(inode))
+        if (error)
-                goto dput_and_out;
+                goto mnt_drop_write_and_out;
        error = -EPERM;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                goto dput_and_out;
+                goto mnt_drop_write_and_out;
        error = get_write_access(inode);
        if (error)
-                goto dput_and_out;
+                goto mnt_drop_write_and_out;
        /*
         * Make sure that there are no leases.  get_write_access() protects
@@ -276,6 +276,8 @@ static long do_sys_truncate(const char __user * path, loff_t length)
 put_write_and_out:
        put_write_access(inode);
+mnt_drop_write_and_out:
+        mnt_drop_write(nd.path.mnt);
 dput_and_out:
        path_put(&nd.path);
 out:
@@ -457,8 +459,17 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
        if(res || !(mode & S_IWOTH) ||
           special_file(nd.path.dentry->d_inode->i_mode))
                goto out_path_release;
+        /*
-        if(IS_RDONLY(nd.path.dentry->d_inode))
+         * This is a rare case where using __mnt_is_readonly()
+         * is OK without a mnt_want/drop_write() pair.  Since
+         * no actual write to the fs is performed here, we do
+         * not need to telegraph to that to anyone.
+         *
+         * By doing this, we accept that this access is
+         * inherently racy and know that the fs may change
+         * state before we even see this result.
+         */
+        if (__mnt_is_readonly(nd.path.mnt))
                res = -EROFS;
 out_path_release:
@@ -567,12 +578,12 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
        audit_inode(NULL, dentry);
-        err = -EROFS;
+        err = mnt_want_write(file->f_path.mnt);
-        if (IS_RDONLY(inode))
+        if (err)
                goto out_putf;
        err = -EPERM;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                goto out_putf;
+                goto out_drop_write;
        mutex_lock(&inode->i_mutex);
        if (mode == (mode_t) -1)
                mode = inode->i_mode;
@@ -581,6 +592,8 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
        err = notify_change(dentry, &newattrs);
        mutex_unlock(&inode->i_mutex);
+out_drop_write:
+        mnt_drop_write(file->f_path.mnt);
 out_putf:
        fput(file);
 out:
@@ -600,13 +613,13 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
                goto out;
        inode = nd.path.dentry->d_inode;
-        error = -EROFS;
+        error = mnt_want_write(nd.path.mnt);
-        if (IS_RDONLY(inode))
+        if (error)
                goto dput_and_out;
        error = -EPERM;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                goto dput_and_out;
+                goto out_drop_write;
        mutex_lock(&inode->i_mutex);
        if (mode == (mode_t) -1)
@@ -616,6 +629,8 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
        error = notify_change(nd.path.dentry, &newattrs);
        mutex_unlock(&inode->i_mutex);
+out_drop_write:
+        mnt_drop_write(nd.path.mnt);
 dput_and_out:
        path_put(&nd.path);
 out:
@@ -638,9 +653,6 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
                printk(KERN_ERR "chown_common: NULL inode\n");
                goto out;
        }
-        error = -EROFS;
-        if (IS_RDONLY(inode))
-                goto out;
        error = -EPERM;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                goto out;
@@ -671,7 +683,12 @@ asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
        error = user_path_walk(filename, &nd);
        if (error)
                goto out;
+        error = mnt_want_write(nd.path.mnt);
+        if (error)
+                goto out_release;
        error = chown_common(nd.path.dentry, user, group);
+        mnt_drop_write(nd.path.mnt);
+out_release:
        path_put(&nd.path);
 out:
        return error;
@@ -691,7 +708,12 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
        error = __user_walk_fd(dfd, filename, follow, &nd);
        if (error)
                goto out;
+        error = mnt_want_write(nd.path.mnt);
+        if (error)
+                goto out_release;
        error = chown_common(nd.path.dentry, user, group);
+        mnt_drop_write(nd.path.mnt);
+out_release:
        path_put(&nd.path);
 out:
        return error;
@@ -705,7 +727,12 @@ asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group
        error = user_path_walk_link(filename, &nd);
        if (error)
                goto out;
+        error = mnt_want_write(nd.path.mnt);
+        if (error)
+                goto out_release;
        error = chown_common(nd.path.dentry, user, group);
+        mnt_drop_write(nd.path.mnt);
+out_release:
        path_put(&nd.path);
 out:
        return error;
@@ -722,14 +749,48 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
        if (!file)
                goto out;
+        error = mnt_want_write(file->f_path.mnt);
+        if (error)
+                goto out_fput;
        dentry = file->f_path.dentry;
        audit_inode(NULL, dentry);
        error = chown_common(dentry, user, group);
+        mnt_drop_write(file->f_path.mnt);
+out_fput:
        fput(file);
 out:
        return error;
 }
+/*
+ * You have to be very careful that these write
+ * counts get cleaned up in error cases and
+ * upon __fput().  This should probably never
+ * be called outside of __dentry_open().
+ */
+static inline int __get_file_write_access(struct inode *inode,
+                                          struct vfsmount *mnt)
+{
+        int error;
+        error = get_write_access(inode);
+        if (error)
+                return error;
+        /*
+         * Do not take mount writer counts on
+         * special files since no writes to
+         * the mount itself will occur.
+         */
+        if (!special_file(inode->i_mode)) {
+                /*
+                 * Balanced in __fput()
+                 */
+                error = mnt_want_write(mnt);
+                if (error)
+                        put_write_access(inode);
+        }
+        return error;
+}
 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
                                        int flags, struct file *f,
                                        int (*open)(struct inode *, struct file *))
@@ -742,9 +803,11 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
                                FMODE_PREAD | FMODE_PWRITE;
        inode = dentry->d_inode;
        if (f->f_mode & FMODE_WRITE) {
-                error = get_write_access(inode);
+                error = __get_file_write_access(inode, mnt);
                if (error)
                        goto cleanup_file;
+                if (!special_file(inode->i_mode))
+                        file_take_write(f);
        }
        f->f_mapping = inode->i_mapping;
@@ -784,8 +847,19 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 cleanup_all:
        fops_put(f->f_op);
-        if (f->f_mode & FMODE_WRITE)
+        if (f->f_mode & FMODE_WRITE) {
                put_write_access(inode);
+                if (!special_file(inode->i_mode)) {
+                        /*
+                         * We don't consider this a real
+                         * mnt_want/drop_write() pair
+                         * because it all happenend right
+                         * here, so just reset the state.
+                         */
+                        file_reset_write(f);
+                        mnt_drop_write(mnt);
+                }
+        }
        file_kill(f);
        f->f_path.dentry = NULL;
        f->f_path.mnt = NULL;
@@ -796,43 +870,6 @@ cleanup_file:
        return ERR_PTR(error);
 }
-/*
- * Note that while the flag value (low two bits) for sys_open means:
- *      00 - read-only
- *      01 - write-only
- *      10 - read-write
- *      11 - special
- * it is changed into
- *      00 - no permissions needed
- *      01 - read-permission
- *      10 - write-permission
- *      11 - read-write
- * for the internal routines (ie open_namei()/follow_link() etc). 00 is
- * used by symlinks.
- */
-static struct file *do_filp_open(int dfd, const char *filename, int flags,
-                                 int mode)
-{
-        int namei_flags, error;
-        struct nameidata nd;
-        namei_flags = flags;
-        if ((namei_flags+1) & O_ACCMODE)
-                namei_flags++;
-        error = open_namei(dfd, filename, namei_flags, mode, &nd);
-        if (!error)
-                return nameidata_to_filp(&nd, flags);
-        return ERR_PTR(error);
-}
-struct file *filp_open(const char *filename, int flags, int mode)
-{
-        return do_filp_open(AT_FDCWD, filename, flags, mode);
-}
-EXPORT_SYMBOL(filp_open);
 /**
 * lookup_instantiate_filp - instantiates the open intent filp
 * @nd: pointer to nameidata
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index e0f0f098a523..74363a7aacbc 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -4,6 +4,7 @@
 #include <linux/capability.h>
 #include <linux/fs.h>
+#include <linux/mount.h>
 #include <linux/reiserfs_fs.h>
 #include <linux/time.h>
 #include <asm/uaccess.h>
@@ -25,6 +26,7 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
                   unsigned long arg)
 {
        unsigned int flags;
+        int err = 0;
        switch (cmd) {
        case REISERFS_IOC_UNPACK:
@@ -48,50 +50,67 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
                        if (!reiserfs_attrs(inode->i_sb))
                                return -ENOTTY;
-                        if (IS_RDONLY(inode))
+                        err = mnt_want_write(filp->f_path.mnt);
-                                return -EROFS;
+                        if (err)
+                                return err;
-                        if (!is_owner_or_cap(inode))
+                        if (!is_owner_or_cap(inode)) {
-                                return -EPERM;
+                                err = -EPERM;
+                                goto setflags_out;
-                        if (get_user(flags, (int __user *)arg))
+                        }
-                                return -EFAULT;
+                        if (get_user(flags, (int __user *)arg)) {
+                                err = -EFAULT;
-                        /* Is it quota file? Do not allow user to mess with it. */
+                                goto setflags_out;
-                        if (IS_NOQUOTA(inode))
+                        }
-                                return -EPERM;
+                        /*
+                         * Is it quota file? Do not allow user to mess with it
+                         */
+                        if (IS_NOQUOTA(inode)) {
+                                err = -EPERM;
+                                goto setflags_out;
+                        }
                        if (((flags ^ REISERFS_I(inode)->
                              i_attrs) & (REISERFS_IMMUTABLE_FL |
                                          REISERFS_APPEND_FL))
-                            && !capable(CAP_LINUX_IMMUTABLE))
+                            && !capable(CAP_LINUX_IMMUTABLE)) {
-                                return -EPERM;
+                                err = -EPERM;
+                                goto setflags_out;
+                        }
                        if ((flags & REISERFS_NOTAIL_FL) &&
                            S_ISREG(inode->i_mode)) {
                                int result;
                                result = reiserfs_unpack(inode, filp);
-                                if (result)
+                                if (result) {
-                                        return result;
+                                        err = result;
+                                        goto setflags_out;
+                                }
                        }
                        sd_attrs_to_i_attrs(flags, inode);
                        REISERFS_I(inode)->i_attrs = flags;
                        inode->i_ctime = CURRENT_TIME_SEC;
                        mark_inode_dirty(inode);
-                        return 0;
+setflags_out:
+                        mnt_drop_write(filp->f_path.mnt);
+                        return err;
                }
        case REISERFS_IOC_GETVERSION:
                return put_user(inode->i_generation, (int __user *)arg);
        case REISERFS_IOC_SETVERSION:
                if (!is_owner_or_cap(inode))
                        return -EPERM;
-                if (IS_RDONLY(inode))
+                err = mnt_want_write(filp->f_path.mnt);
-                        return -EROFS;
+                if (err)
-                if (get_user(inode->i_generation, (int __user *)arg))
+                        return err;
-                        return -EFAULT;
+                if (get_user(inode->i_generation, (int __user *)arg)) {
+                        err = -EFAULT;
+                        goto setversion_out;
+                }
                inode->i_ctime = CURRENT_TIME_SEC;
                mark_inode_dirty(inode);
-                return 0;
+setversion_out:
+                mnt_drop_write(filp->f_path.mnt);
+                return err;
        default:
                return -ENOTTY;
        }
diff --git a/fs/super.c b/fs/super.c
index 09008dbd264e..1f8f05ede437 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,7 @@
 #include <linux/idr.h>
 #include <linux/kobject.h>
 #include <linux/mutex.h>
+#include <linux/file.h>
 #include <asm/uaccess.h>
@@ -567,10 +568,29 @@ static void mark_files_ro(struct super_block *sb)
 {
        struct file *f;
+retry:
        file_list_lock();
        list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
-                if (S_ISREG(f->f_path.dentry->d_inode->i_mode) && file_count(f))
+                struct vfsmount *mnt;
-                        f->f_mode &= ~FMODE_WRITE;
+                if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
+                       continue;
+                if (!file_count(f))
+                        continue;
+                if (!(f->f_mode & FMODE_WRITE))
+                        continue;
+                f->f_mode &= ~FMODE_WRITE;
+                if (file_check_writeable(f) != 0)
+                        continue;
+                file_release_write(f);
+                mnt = mntget(f->f_path.mnt);
+                file_list_unlock();
+                /*
+                 * This can sleep, so we can't hold
+                 * the file_list_lock() spinlock.
+                 */
+                mnt_drop_write(mnt);
+                mntput(mnt);
+                goto retry;
        }
        file_list_unlock();
 }
diff --git a/fs/utimes.c b/fs/utimes.c
index b18da9c0b97f..a2bef77dc9c9 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -2,6 +2,7 @@
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/linkage.h>
+#include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/sched.h>
 #include <linux/stat.h>
@@ -59,6 +60,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
        struct inode *inode;
        struct iattr newattrs;
        struct file *f = NULL;
+        struct vfsmount *mnt;
        error = -EINVAL;
        if (times && (!nsec_valid(times[0].tv_nsec) ||
@@ -79,18 +81,20 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
                if (!f)
                        goto out;
                dentry = f->f_path.dentry;
+                mnt = f->f_path.mnt;
        } else {
                error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
                if (error)
                        goto out;
                dentry = nd.path.dentry;
+                mnt = nd.path.mnt;
        }
        inode = dentry->d_inode;
-        error = -EROFS;
+        error = mnt_want_write(mnt);
-        if (IS_RDONLY(inode))
+        if (error)
                goto dput_and_out;
        /* Don't worry, the checks are done in inode_change_ok() */
@@ -98,7 +102,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
        if (times) {
                error = -EPERM;
                if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-                        goto dput_and_out;
+                        goto mnt_drop_write_and_out;
                if (times[0].tv_nsec == UTIME_OMIT)
                        newattrs.ia_valid &= ~ATTR_ATIME;
@@ -118,22 +122,24 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
        } else {
                error = -EACCES;
                if (IS_IMMUTABLE(inode))
-                        goto dput_and_out;
+                        goto mnt_drop_write_and_out;
                if (!is_owner_or_cap(inode)) {
                        if (f) {
                                if (!(f->f_mode & FMODE_WRITE))
-                                        goto dput_and_out;
+                                        goto mnt_drop_write_and_out;
                        } else {
                                error = vfs_permission(&nd, MAY_WRITE);
                                if (error)
-                                        goto dput_and_out;
+                                        goto mnt_drop_write_and_out;
                        }
                }
        }
        mutex_lock(&inode->i_mutex);
        error = notify_change(dentry, &newattrs);
        mutex_unlock(&inode->i_mutex);
+mnt_drop_write_and_out:
+        mnt_drop_write(mnt);
 dput_and_out:
        if (f)
                fput(f);
diff --git a/fs/xattr.c b/fs/xattr.c
index 3acab1615460..f7062da505d4 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -11,6 +11,7 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/xattr.h>
+#include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
@@ -32,8 +33,6 @@ xattr_permission(struct inode *inode, const char *name, int mask)
         * filesystem  or on an immutable / append-only inode.
         */
        if (mask & MAY_WRITE) {
-                if (IS_RDONLY(inode))
-                        return -EROFS;
                if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                        return -EPERM;
        }
@@ -262,7 +261,11 @@ sys_setxattr(char __user *path, char __user *name, void __user *value,
        error = user_path_walk(path, &nd);
        if (error)
                return error;
-        error = setxattr(nd.path.dentry, name, value, size, flags);
+        error = mnt_want_write(nd.path.mnt);
+        if (!error) {
+                error = setxattr(nd.path.dentry, name, value, size, flags);
+                mnt_drop_write(nd.path.mnt);
+        }
        path_put(&nd.path);
        return error;
 }
@@ -277,7 +280,11 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value,
        error = user_path_walk_link(path, &nd);
        if (error)
                return error;
-        error = setxattr(nd.path.dentry, name, value, size, flags);
+        error = mnt_want_write(nd.path.mnt);
+        if (!error) {
+                error = setxattr(nd.path.dentry, name, value, size, flags);
+                mnt_drop_write(nd.path.mnt);
+        }
        path_put(&nd.path);
        return error;
 }
@@ -295,7 +302,12 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
                return error;
        dentry = f->f_path.dentry;
        audit_inode(NULL, dentry);
-        error = setxattr(dentry, name, value, size, flags);
+        error = mnt_want_write(f->f_path.mnt);
+        if (!error) {
+                error = setxattr(dentry, name, value, size, flags);
+                mnt_drop_write(f->f_path.mnt);
+        }
+out_fput:
        fput(f);
        return error;
 }
@@ -482,7 +494,11 @@ sys_removexattr(char __user *path, char __user *name)
        error = user_path_walk(path, &nd);
        if (error)
                return error;
-        error = removexattr(nd.path.dentry, name);
+        error = mnt_want_write(nd.path.mnt);
+        if (!error) {
+                error = removexattr(nd.path.dentry, name);
+                mnt_drop_write(nd.path.mnt);
+        }
        path_put(&nd.path);
        return error;
 }
@@ -496,7 +512,11 @@ sys_lremovexattr(char __user *path, char __user *name)
        error = user_path_walk_link(path, &nd);
        if (error)
                return error;
-        error = removexattr(nd.path.dentry, name);
+        error = mnt_want_write(nd.path.mnt);
+        if (!error) {
+                error = removexattr(nd.path.dentry, name);
+                mnt_drop_write(nd.path.mnt);
+        }
        path_put(&nd.path);
        return error;
 }
@@ -513,7 +533,11 @@ sys_fremovexattr(int fd, char __user *name)
                return error;
        dentry = f->f_path.dentry;
        audit_inode(NULL, dentry);
-        error = removexattr(dentry, name);
+        error = mnt_want_write(f->f_path.mnt);
+        if (!error) {
+                error = removexattr(dentry, name);
+                mnt_drop_write(f->f_path.mnt);
+        }
        fput(f);
        return error;
 }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index bf7759793856..4ddb86b73c6b 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -535,8 +535,6 @@ xfs_attrmulti_attr_set(
        char                    *kbuf;
        int                     error = EFAULT;
-        if (IS_RDONLY(inode))
-                return -EROFS;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                return EPERM;
        if (len > XATTR_SIZE_MAX)
@@ -562,8 +560,6 @@ xfs_attrmulti_attr_remove(
        char                    *name,
        __uint32_t              flags)
 {
-        if (IS_RDONLY(inode))
-                return -EROFS;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                return EPERM;
        return xfs_attr_remove(XFS_I(inode), name, flags);
@@ -573,6 +569,7 @@ STATIC int
 xfs_attrmulti_by_handle(
        xfs_mount_t             *mp,
        void                    __user *arg,
+        struct file             *parfilp,
        struct inode            *parinode)
 {
        int                     error;
@@ -626,13 +623,21 @@ xfs_attrmulti_by_handle(
                                        &ops[i].am_length, ops[i].am_flags);
                        break;
                case ATTR_OP_SET:
+                        ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                        if (ops[i].am_error)
+                                break;
                        ops[i].am_error = xfs_attrmulti_attr_set(inode,
                                        attr_name, ops[i].am_attrvalue,
                                        ops[i].am_length, ops[i].am_flags);
+                        mnt_drop_write(parfilp->f_path.mnt);
                        break;
                case ATTR_OP_REMOVE:
+                        ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
+                        if (ops[i].am_error)
+                                break;
                        ops[i].am_error = xfs_attrmulti_attr_remove(inode,
                                        attr_name, ops[i].am_flags);
+                        mnt_drop_write(parfilp->f_path.mnt);
                        break;
                default:
                        ops[i].am_error = EINVAL;
@@ -1133,7 +1138,7 @@ xfs_ioctl(
                return xfs_attrlist_by_handle(mp, arg, inode);
        case XFS_IOC_ATTRMULTI_BY_HANDLE:
-                return xfs_attrmulti_by_handle(mp, arg, inode);
+                return xfs_attrmulti_by_handle(mp, arg, filp, inode);
        case XFS_IOC_SWAPEXT: {
                error = xfs_swapext((struct xfs_swapext __user *)arg);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 0c958cf77758..a1237dad6430 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -155,13 +155,6 @@ xfs_ichgtime_fast(
         */
        ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
-        /*
-         * We're not supposed to change timestamps in readonly-mounted
-         * filesystems.  Throw it away if anyone asks us.
-         */
-        if (unlikely(IS_RDONLY(inode)))
-                return;
        if (flags & XFS_ICHGTIME_MOD) {
                tvp = &inode->i_mtime;
                ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 21c0dbc74093..1ebd8004469c 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -51,6 +51,7 @@
 #include "xfs_vnodeops.h"
 #include <linux/capability.h>
+#include <linux/mount.h>
 #include <linux/writeback.h>
@@ -670,10 +671,16 @@ start:
        if (new_size > xip->i_size)
                xip->i_new_size = new_size;
-        if (likely(!(ioflags & IO_INVIS))) {
+        /*
+         * We're not supposed to change timestamps in readonly-mounted
+         * filesystems.  Throw it away if anyone asks us.
+         */
+        if (likely(!(ioflags & IO_INVIS) &&
+                   !mnt_want_write(file->f_path.mnt))) {
                file_update_time(file);
                xfs_ichgtime_fast(xip, inode,
                                  XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+                mnt_drop_write(file->f_path.mnt);
        }
        /*
diff --git a/include/linux/file.h b/include/linux/file.h
index 7239baac81a9..653477021e4c 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -61,6 +61,7 @@ extern struct kmem_cache *filp_cachep;
 extern void __fput(struct file *);
 extern void fput(struct file *);
+extern void drop_file_write_access(struct file *file);
 struct file_operations;
 struct vfsmount;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b84b848431f2..d1eeea669d2c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -776,6 +776,9 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
                index <  ra->start + ra->size);
 }
+#define FILE_MNT_WRITE_TAKEN    1
+#define FILE_MNT_WRITE_RELEASED 2
 struct file {
        /*
         * fu_list becomes invalid after file_free is called and queued via
@@ -810,6 +813,9 @@ struct file {
        spinlock_t              f_ep_lock;
 #endif /* #ifdef CONFIG_EPOLL */
        struct address_space    *f_mapping;
+#ifdef CONFIG_DEBUG_WRITECOUNT
+        unsigned long f_mnt_write_state;
+#endif
 };
 extern spinlock_t files_lock;
 #define file_list_lock() spin_lock(&files_lock);
@@ -818,6 +824,49 @@ extern spinlock_t files_lock;
 #define get_file(x)     atomic_inc(&(x)->f_count)
 #define file_count(x)   atomic_read(&(x)->f_count)
+#ifdef CONFIG_DEBUG_WRITECOUNT
+static inline void file_take_write(struct file *f)
+{
+        WARN_ON(f->f_mnt_write_state != 0);
+        f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN;
+}
+static inline void file_release_write(struct file *f)
+{
+        f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED;
+}
+static inline void file_reset_write(struct file *f)
+{
+        f->f_mnt_write_state = 0;
+}
+static inline void file_check_state(struct file *f)
+{
+        /*
+         * At this point, either both or neither of these bits
+         * should be set.
+         */
+        WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN);
+        WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED);
+}
+static inline int file_check_writeable(struct file *f)
+{
+        if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN)
+                return 0;
+        printk(KERN_WARNING "writeable file with no "
+                            "mnt_want_write()\n");
+        WARN_ON(1);
+        return -EINVAL;
+}
+#else /* !CONFIG_DEBUG_WRITECOUNT */
+static inline void file_take_write(struct file *filp) {}
+static inline void file_release_write(struct file *filp) {}
+static inline void file_reset_write(struct file *filp) {}
+static inline void file_check_state(struct file *filp) {}
+static inline int file_check_writeable(struct file *filp)
+{
+        return 0;
+}
+#endif /* CONFIG_DEBUG_WRITECOUNT */
 #define MAX_NON_LFS     ((1UL<<31) - 1)
 /* Page cache limit. The filesystems should put that into their s_maxbytes 
@@ -1735,7 +1784,8 @@ extern struct file *create_read_pipe(struct file *f);
 extern struct file *create_write_pipe(void);
 extern void free_write_pipe(struct file *);
-extern int open_namei(int dfd, const char *, int, int, struct nameidata *);
+extern struct file *do_filp_open(int dfd, const char *pathname,
+                int open_flag, int mode);
 extern int may_open(struct nameidata *, int, int);
 extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5ee2df217cdf..d6600e3f7e45 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/nodemask.h>
 #include <linux/spinlock.h>
 #include <asm/atomic.h>
@@ -28,8 +29,10 @@ struct mnt_namespace;
 #define MNT_NOATIME     0x08
 #define MNT_NODIRATIME  0x10
 #define MNT_RELATIME    0x20
+#define MNT_READONLY    0x40    /* does the user want this to be r/o? */
 #define MNT_SHRINKABLE  0x100
+#define MNT_IMBALANCED_WRITE_COUNT      0x200 /* just for debugging */
 #define MNT_SHARED      0x1000  /* if the vfsmount is a shared mount */
 #define MNT_UNBINDABLE  0x2000  /* if the vfsmount is a unbindable mount */
@@ -62,6 +65,11 @@ struct vfsmount {
        int mnt_expiry_mark;            /* true if marked for expiry */
        int mnt_pinned;
        int mnt_ghosts;
+        /*
+         * This value is not stable unless all of the mnt_writers[] spinlocks
+         * are held, and all mnt_writer[]s on this mount have 0 as their ->count
+         */
+        atomic_t __mnt_writers;
 };
 static inline struct vfsmount *mntget(struct vfsmount *mnt)
@@ -71,9 +79,12 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt)
        return mnt;
 }
+extern int mnt_want_write(struct vfsmount *mnt);
+extern void mnt_drop_write(struct vfsmount *mnt);
 extern void mntput_no_expire(struct vfsmount *mnt);
 extern void mnt_pin(struct vfsmount *mnt);
 extern void mnt_unpin(struct vfsmount *mnt);
+extern int __mnt_is_readonly(struct vfsmount *mnt);
 static inline void mntput(struct vfsmount *mnt)
 {
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 60f7a27f7a9e..94fd3b08fb77 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -598,6 +598,7 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
                        int oflag, mode_t mode, struct mq_attr __user *u_attr)
 {
        struct mq_attr attr;
+        struct file *result;
        int ret;
        if (u_attr) {
@@ -612,13 +613,24 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
        }
        mode &= ~current->fs->umask;
+        ret = mnt_want_write(mqueue_mnt);
+        if (ret)
+                goto out;
        ret = vfs_create(dir->d_inode, dentry, mode, NULL);
        dentry->d_fsdata = NULL;
        if (ret)
-                goto out;
+                goto out_drop_write;
-        return dentry_open(dentry, mqueue_mnt, oflag);
+        result = dentry_open(dentry, mqueue_mnt, oflag);
+        /*
+         * dentry_open() took a persistent mnt_want_write(),
+         * so we can now drop this one.
+         */
+        mnt_drop_write(mqueue_mnt);
+        return result;
+out_drop_write:
+        mnt_drop_write(mqueue_mnt);
 out:
        dput(dentry);
        mntput(mqueue_mnt);
@@ -742,8 +754,11 @@ asmlinkage long sys_mq_unlink(const char __user *u_name)
        inode = dentry->d_inode;
        if (inode)
                atomic_inc(&inode->i_count);
+        err = mnt_want_write(mqueue_mnt);
+        if (err)
+                goto out_err;
        err = vfs_unlink(dentry->d_parent->d_inode, dentry);
+        mnt_drop_write(mqueue_mnt);
 out_err:
        dput(dentry);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 95de3102bc87..623ef24c2381 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -427,6 +427,16 @@ config DEBUG_VM
          If unsure, say N.
+config DEBUG_WRITECOUNT
+        bool "Debug filesystem writers count"
+        depends on DEBUG_KERNEL
+        help
+          Enable this to catch wrong use of the writers count in struct
+          vfsmount.  This will increase the size of each file struct by
+          32 bits.
+          If unsure, say N.
 config DEBUG_LIST
        bool "Debug linked list manipulation"
        depends on DEBUG_KERNEL
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2851d0d15048..1454afcc06c4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -819,7 +819,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                 */
                mode = S_IFSOCK |
                       (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
+                err = mnt_want_write(nd.path.mnt);
+                if (err)
+                        goto out_mknod_dput;
                err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
+                mnt_drop_write(nd.path.mnt);
                if (err)
                        goto out_mknod_dput;
                mutex_unlock(&nd.path.dentry->d_inode->i_mutex);