aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ext2/ioctl.c57
-rw-r--r--fs/ext3/ioctl.c103
-rw-r--r--fs/ext4/ioctl.c86
-rw-r--r--fs/fat/file.c12
-rw-r--r--fs/file_table.c42
-rw-r--r--fs/hfsplus/ioctl.c40
-rw-r--r--fs/inode.c51
-rw-r--r--fs/jfs/ioctl.c33
-rw-r--r--fs/namei.c275
-rw-r--r--fs/namespace.c316
-rw-r--r--fs/ncpfs/ioctl.c54
-rw-r--r--fs/nfs/dir.c3
-rw-r--r--fs/nfsd/nfs4proc.c7
-rw-r--r--fs/nfsd/nfs4recover.c16
-rw-r--r--fs/nfsd/nfs4state.c3
-rw-r--r--fs/nfsd/vfs.c72
-rw-r--r--fs/ocfs2/ioctl.c11
-rw-r--r--fs/open.c149
-rw-r--r--fs/reiserfs/ioctl.c63
-rw-r--r--fs/super.c24
-rw-r--r--fs/utimes.c18
-rw-r--r--fs/xattr.c40
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c9
-rw-r--r--include/linux/file.h1
-rw-r--r--include/linux/fs.h52
-rw-r--r--include/linux/mount.h11
-rw-r--r--ipc/mqueue.c25
-rw-r--r--lib/Kconfig.debug10
-rw-r--r--net/unix/af_unix.c4
31 files changed, 1257 insertions, 352 deletions
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index b8ea11fee5c6..de876fa793e1 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -12,6 +12,7 @@
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/compat.h> 14#include <linux/compat.h>
15#include <linux/mount.h>
15#include <linux/smp_lock.h> 16#include <linux/smp_lock.h>
16#include <asm/current.h> 17#include <asm/current.h>
17#include <asm/uaccess.h> 18#include <asm/uaccess.h>
@@ -23,6 +24,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
23 struct ext2_inode_info *ei = EXT2_I(inode); 24 struct ext2_inode_info *ei = EXT2_I(inode);
24 unsigned int flags; 25 unsigned int flags;
25 unsigned short rsv_window_size; 26 unsigned short rsv_window_size;
27 int ret;
26 28
27 ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); 29 ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg);
28 30
@@ -34,14 +36,19 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
34 case EXT2_IOC_SETFLAGS: { 36 case EXT2_IOC_SETFLAGS: {
35 unsigned int oldflags; 37 unsigned int oldflags;
36 38
37 if (IS_RDONLY(inode)) 39 ret = mnt_want_write(filp->f_path.mnt);
38 return -EROFS; 40 if (ret)
41 return ret;
39 42
40 if (!is_owner_or_cap(inode)) 43 if (!is_owner_or_cap(inode)) {
41 return -EACCES; 44 ret = -EACCES;
45 goto setflags_out;
46 }
42 47
43 if (get_user(flags, (int __user *) arg)) 48 if (get_user(flags, (int __user *) arg)) {
44 return -EFAULT; 49 ret = -EFAULT;
50 goto setflags_out;
51 }
45 52
46 if (!S_ISDIR(inode->i_mode)) 53 if (!S_ISDIR(inode->i_mode))
47 flags &= ~EXT2_DIRSYNC_FL; 54 flags &= ~EXT2_DIRSYNC_FL;
@@ -50,7 +57,8 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
50 /* Is it quota file? Do not allow user to mess with it */ 57 /* Is it quota file? Do not allow user to mess with it */
51 if (IS_NOQUOTA(inode)) { 58 if (IS_NOQUOTA(inode)) {
52 mutex_unlock(&inode->i_mutex); 59 mutex_unlock(&inode->i_mutex);
53 return -EPERM; 60 ret = -EPERM;
61 goto setflags_out;
54 } 62 }
55 oldflags = ei->i_flags; 63 oldflags = ei->i_flags;
56 64
@@ -63,7 +71,8 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
63 if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { 71 if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
64 if (!capable(CAP_LINUX_IMMUTABLE)) { 72 if (!capable(CAP_LINUX_IMMUTABLE)) {
65 mutex_unlock(&inode->i_mutex); 73 mutex_unlock(&inode->i_mutex);
66 return -EPERM; 74 ret = -EPERM;
75 goto setflags_out;
67 } 76 }
68 } 77 }
69 78
@@ -75,20 +84,26 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
75 ext2_set_inode_flags(inode); 84 ext2_set_inode_flags(inode);
76 inode->i_ctime = CURRENT_TIME_SEC; 85 inode->i_ctime = CURRENT_TIME_SEC;
77 mark_inode_dirty(inode); 86 mark_inode_dirty(inode);
78 return 0; 87setflags_out:
88 mnt_drop_write(filp->f_path.mnt);
89 return ret;
79 } 90 }
80 case EXT2_IOC_GETVERSION: 91 case EXT2_IOC_GETVERSION:
81 return put_user(inode->i_generation, (int __user *) arg); 92 return put_user(inode->i_generation, (int __user *) arg);
82 case EXT2_IOC_SETVERSION: 93 case EXT2_IOC_SETVERSION:
83 if (!is_owner_or_cap(inode)) 94 if (!is_owner_or_cap(inode))
84 return -EPERM; 95 return -EPERM;
85 if (IS_RDONLY(inode)) 96 ret = mnt_want_write(filp->f_path.mnt);
86 return -EROFS; 97 if (ret)
87 if (get_user(inode->i_generation, (int __user *) arg)) 98 return ret;
88 return -EFAULT; 99 if (get_user(inode->i_generation, (int __user *) arg)) {
89 inode->i_ctime = CURRENT_TIME_SEC; 100 ret = -EFAULT;
90 mark_inode_dirty(inode); 101 } else {
91 return 0; 102 inode->i_ctime = CURRENT_TIME_SEC;
103 mark_inode_dirty(inode);
104 }
105 mnt_drop_write(filp->f_path.mnt);
106 return ret;
92 case EXT2_IOC_GETRSVSZ: 107 case EXT2_IOC_GETRSVSZ:
93 if (test_opt(inode->i_sb, RESERVATION) 108 if (test_opt(inode->i_sb, RESERVATION)
94 && S_ISREG(inode->i_mode) 109 && S_ISREG(inode->i_mode)
@@ -102,15 +117,16 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
102 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 117 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
103 return -ENOTTY; 118 return -ENOTTY;
104 119
105 if (IS_RDONLY(inode)) 120 if (!is_owner_or_cap(inode))
106 return -EROFS;
107
108 if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
109 return -EACCES; 121 return -EACCES;
110 122
111 if (get_user(rsv_window_size, (int __user *)arg)) 123 if (get_user(rsv_window_size, (int __user *)arg))
112 return -EFAULT; 124 return -EFAULT;
113 125
126 ret = mnt_want_write(filp->f_path.mnt);
127 if (ret)
128 return ret;
129
114 if (rsv_window_size > EXT2_MAX_RESERVE_BLOCKS) 130 if (rsv_window_size > EXT2_MAX_RESERVE_BLOCKS)
115 rsv_window_size = EXT2_MAX_RESERVE_BLOCKS; 131 rsv_window_size = EXT2_MAX_RESERVE_BLOCKS;
116 132
@@ -131,6 +147,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
131 rsv->rsv_goal_size = rsv_window_size; 147 rsv->rsv_goal_size = rsv_window_size;
132 } 148 }
133 mutex_unlock(&ei->truncate_mutex); 149 mutex_unlock(&ei->truncate_mutex);
150 mnt_drop_write(filp->f_path.mnt);
134 return 0; 151 return 0;
135 } 152 }
136 default: 153 default:
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 023a070f55f1..0d0c70151642 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -12,6 +12,7 @@
12#include <linux/capability.h> 12#include <linux/capability.h>
13#include <linux/ext3_fs.h> 13#include <linux/ext3_fs.h>
14#include <linux/ext3_jbd.h> 14#include <linux/ext3_jbd.h>
15#include <linux/mount.h>
15#include <linux/time.h> 16#include <linux/time.h>
16#include <linux/compat.h> 17#include <linux/compat.h>
17#include <linux/smp_lock.h> 18#include <linux/smp_lock.h>
@@ -38,14 +39,19 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
38 unsigned int oldflags; 39 unsigned int oldflags;
39 unsigned int jflag; 40 unsigned int jflag;
40 41
41 if (IS_RDONLY(inode)) 42 err = mnt_want_write(filp->f_path.mnt);
42 return -EROFS; 43 if (err)
44 return err;
43 45
44 if (!is_owner_or_cap(inode)) 46 if (!is_owner_or_cap(inode)) {
45 return -EACCES; 47 err = -EACCES;
48 goto flags_out;
49 }
46 50
47 if (get_user(flags, (int __user *) arg)) 51 if (get_user(flags, (int __user *) arg)) {
48 return -EFAULT; 52 err = -EFAULT;
53 goto flags_out;
54 }
49 55
50 if (!S_ISDIR(inode->i_mode)) 56 if (!S_ISDIR(inode->i_mode))
51 flags &= ~EXT3_DIRSYNC_FL; 57 flags &= ~EXT3_DIRSYNC_FL;
@@ -54,7 +60,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
54 /* Is it quota file? Do not allow user to mess with it */ 60 /* Is it quota file? Do not allow user to mess with it */
55 if (IS_NOQUOTA(inode)) { 61 if (IS_NOQUOTA(inode)) {
56 mutex_unlock(&inode->i_mutex); 62 mutex_unlock(&inode->i_mutex);
57 return -EPERM; 63 err = -EPERM;
64 goto flags_out;
58 } 65 }
59 oldflags = ei->i_flags; 66 oldflags = ei->i_flags;
60 67
@@ -70,7 +77,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
70 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { 77 if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
71 if (!capable(CAP_LINUX_IMMUTABLE)) { 78 if (!capable(CAP_LINUX_IMMUTABLE)) {
72 mutex_unlock(&inode->i_mutex); 79 mutex_unlock(&inode->i_mutex);
73 return -EPERM; 80 err = -EPERM;
81 goto flags_out;
74 } 82 }
75 } 83 }
76 84
@@ -81,7 +89,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
81 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { 89 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
82 if (!capable(CAP_SYS_RESOURCE)) { 90 if (!capable(CAP_SYS_RESOURCE)) {
83 mutex_unlock(&inode->i_mutex); 91 mutex_unlock(&inode->i_mutex);
84 return -EPERM; 92 err = -EPERM;
93 goto flags_out;
85 } 94 }
86 } 95 }
87 96
@@ -89,7 +98,8 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
89 handle = ext3_journal_start(inode, 1); 98 handle = ext3_journal_start(inode, 1);
90 if (IS_ERR(handle)) { 99 if (IS_ERR(handle)) {
91 mutex_unlock(&inode->i_mutex); 100 mutex_unlock(&inode->i_mutex);
92 return PTR_ERR(handle); 101 err = PTR_ERR(handle);
102 goto flags_out;
93 } 103 }
94 if (IS_SYNC(inode)) 104 if (IS_SYNC(inode))
95 handle->h_sync = 1; 105 handle->h_sync = 1;
@@ -115,6 +125,8 @@ flags_err:
115 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) 125 if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
116 err = ext3_change_inode_journal_flag(inode, jflag); 126 err = ext3_change_inode_journal_flag(inode, jflag);
117 mutex_unlock(&inode->i_mutex); 127 mutex_unlock(&inode->i_mutex);
128flags_out:
129 mnt_drop_write(filp->f_path.mnt);
118 return err; 130 return err;
119 } 131 }
120 case EXT3_IOC_GETVERSION: 132 case EXT3_IOC_GETVERSION:
@@ -129,14 +141,18 @@ flags_err:
129 141
130 if (!is_owner_or_cap(inode)) 142 if (!is_owner_or_cap(inode))
131 return -EPERM; 143 return -EPERM;
132 if (IS_RDONLY(inode)) 144 err = mnt_want_write(filp->f_path.mnt);
133 return -EROFS; 145 if (err)
134 if (get_user(generation, (int __user *) arg)) 146 return err;
135 return -EFAULT; 147 if (get_user(generation, (int __user *) arg)) {
136 148 err = -EFAULT;
149 goto setversion_out;
150 }
137 handle = ext3_journal_start(inode, 1); 151 handle = ext3_journal_start(inode, 1);
138 if (IS_ERR(handle)) 152 if (IS_ERR(handle)) {
139 return PTR_ERR(handle); 153 err = PTR_ERR(handle);
154 goto setversion_out;
155 }
140 err = ext3_reserve_inode_write(handle, inode, &iloc); 156 err = ext3_reserve_inode_write(handle, inode, &iloc);
141 if (err == 0) { 157 if (err == 0) {
142 inode->i_ctime = CURRENT_TIME_SEC; 158 inode->i_ctime = CURRENT_TIME_SEC;
@@ -144,6 +160,8 @@ flags_err:
144 err = ext3_mark_iloc_dirty(handle, inode, &iloc); 160 err = ext3_mark_iloc_dirty(handle, inode, &iloc);
145 } 161 }
146 ext3_journal_stop(handle); 162 ext3_journal_stop(handle);
163setversion_out:
164 mnt_drop_write(filp->f_path.mnt);
147 return err; 165 return err;
148 } 166 }
149#ifdef CONFIG_JBD_DEBUG 167#ifdef CONFIG_JBD_DEBUG
@@ -179,18 +197,24 @@ flags_err:
179 } 197 }
180 return -ENOTTY; 198 return -ENOTTY;
181 case EXT3_IOC_SETRSVSZ: { 199 case EXT3_IOC_SETRSVSZ: {
200 int err;
182 201
183 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 202 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
184 return -ENOTTY; 203 return -ENOTTY;
185 204
186 if (IS_RDONLY(inode)) 205 err = mnt_want_write(filp->f_path.mnt);
187 return -EROFS; 206 if (err)
207 return err;
188 208
189 if (!is_owner_or_cap(inode)) 209 if (!is_owner_or_cap(inode)) {
190 return -EACCES; 210 err = -EACCES;
211 goto setrsvsz_out;
212 }
191 213
192 if (get_user(rsv_window_size, (int __user *)arg)) 214 if (get_user(rsv_window_size, (int __user *)arg)) {
193 return -EFAULT; 215 err = -EFAULT;
216 goto setrsvsz_out;
217 }
194 218
195 if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS) 219 if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS)
196 rsv_window_size = EXT3_MAX_RESERVE_BLOCKS; 220 rsv_window_size = EXT3_MAX_RESERVE_BLOCKS;
@@ -208,7 +232,9 @@ flags_err:
208 rsv->rsv_goal_size = rsv_window_size; 232 rsv->rsv_goal_size = rsv_window_size;
209 } 233 }
210 mutex_unlock(&ei->truncate_mutex); 234 mutex_unlock(&ei->truncate_mutex);
211 return 0; 235setrsvsz_out:
236 mnt_drop_write(filp->f_path.mnt);
237 return err;
212 } 238 }
213 case EXT3_IOC_GROUP_EXTEND: { 239 case EXT3_IOC_GROUP_EXTEND: {
214 ext3_fsblk_t n_blocks_count; 240 ext3_fsblk_t n_blocks_count;
@@ -218,17 +244,20 @@ flags_err:
218 if (!capable(CAP_SYS_RESOURCE)) 244 if (!capable(CAP_SYS_RESOURCE))
219 return -EPERM; 245 return -EPERM;
220 246
221 if (IS_RDONLY(inode)) 247 err = mnt_want_write(filp->f_path.mnt);
222 return -EROFS; 248 if (err)
223 249 return err;
224 if (get_user(n_blocks_count, (__u32 __user *)arg))
225 return -EFAULT;
226 250
251 if (get_user(n_blocks_count, (__u32 __user *)arg)) {
252 err = -EFAULT;
253 goto group_extend_out;
254 }
227 err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count); 255 err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
228 journal_lock_updates(EXT3_SB(sb)->s_journal); 256 journal_lock_updates(EXT3_SB(sb)->s_journal);
229 journal_flush(EXT3_SB(sb)->s_journal); 257 journal_flush(EXT3_SB(sb)->s_journal);
230 journal_unlock_updates(EXT3_SB(sb)->s_journal); 258 journal_unlock_updates(EXT3_SB(sb)->s_journal);
231 259group_extend_out:
260 mnt_drop_write(filp->f_path.mnt);
232 return err; 261 return err;
233 } 262 }
234 case EXT3_IOC_GROUP_ADD: { 263 case EXT3_IOC_GROUP_ADD: {
@@ -239,18 +268,22 @@ flags_err:
239 if (!capable(CAP_SYS_RESOURCE)) 268 if (!capable(CAP_SYS_RESOURCE))
240 return -EPERM; 269 return -EPERM;
241 270
242 if (IS_RDONLY(inode)) 271 err = mnt_want_write(filp->f_path.mnt);
243 return -EROFS; 272 if (err)
273 return err;
244 274
245 if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg, 275 if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg,
246 sizeof(input))) 276 sizeof(input))) {
247 return -EFAULT; 277 err = -EFAULT;
278 goto group_add_out;
279 }
248 280
249 err = ext3_group_add(sb, &input); 281 err = ext3_group_add(sb, &input);
250 journal_lock_updates(EXT3_SB(sb)->s_journal); 282 journal_lock_updates(EXT3_SB(sb)->s_journal);
251 journal_flush(EXT3_SB(sb)->s_journal); 283 journal_flush(EXT3_SB(sb)->s_journal);
252 journal_unlock_updates(EXT3_SB(sb)->s_journal); 284 journal_unlock_updates(EXT3_SB(sb)->s_journal);
253 285group_add_out:
286 mnt_drop_write(filp->f_path.mnt);
254 return err; 287 return err;
255 } 288 }
256 289
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 2ed7c37f897e..25b13ede8086 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -15,6 +15,7 @@
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/compat.h> 16#include <linux/compat.h>
17#include <linux/smp_lock.h> 17#include <linux/smp_lock.h>
18#include <linux/mount.h>
18#include <asm/uaccess.h> 19#include <asm/uaccess.h>
19 20
20int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, 21int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
@@ -38,24 +39,25 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
38 unsigned int oldflags; 39 unsigned int oldflags;
39 unsigned int jflag; 40 unsigned int jflag;
40 41
41 if (IS_RDONLY(inode))
42 return -EROFS;
43
44 if (!is_owner_or_cap(inode)) 42 if (!is_owner_or_cap(inode))
45 return -EACCES; 43 return -EACCES;
46 44
47 if (get_user(flags, (int __user *) arg)) 45 if (get_user(flags, (int __user *) arg))
48 return -EFAULT; 46 return -EFAULT;
49 47
48 err = mnt_want_write(filp->f_path.mnt);
49 if (err)
50 return err;
51
50 if (!S_ISDIR(inode->i_mode)) 52 if (!S_ISDIR(inode->i_mode))
51 flags &= ~EXT4_DIRSYNC_FL; 53 flags &= ~EXT4_DIRSYNC_FL;
52 54
55 err = -EPERM;
53 mutex_lock(&inode->i_mutex); 56 mutex_lock(&inode->i_mutex);
54 /* Is it quota file? Do not allow user to mess with it */ 57 /* Is it quota file? Do not allow user to mess with it */
55 if (IS_NOQUOTA(inode)) { 58 if (IS_NOQUOTA(inode))
56 mutex_unlock(&inode->i_mutex); 59 goto flags_out;
57 return -EPERM; 60
58 }
59 oldflags = ei->i_flags; 61 oldflags = ei->i_flags;
60 62
61 /* The JOURNAL_DATA flag is modifiable only by root */ 63 /* The JOURNAL_DATA flag is modifiable only by root */
@@ -68,10 +70,8 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
68 * This test looks nicer. Thanks to Pauline Middelink 70 * This test looks nicer. Thanks to Pauline Middelink
69 */ 71 */
70 if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { 72 if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
71 if (!capable(CAP_LINUX_IMMUTABLE)) { 73 if (!capable(CAP_LINUX_IMMUTABLE))
72 mutex_unlock(&inode->i_mutex); 74 goto flags_out;
73 return -EPERM;
74 }
75 } 75 }
76 76
77 /* 77 /*
@@ -79,17 +79,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
79 * the relevant capability. 79 * the relevant capability.
80 */ 80 */
81 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) { 81 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
82 if (!capable(CAP_SYS_RESOURCE)) { 82 if (!capable(CAP_SYS_RESOURCE))
83 mutex_unlock(&inode->i_mutex); 83 goto flags_out;
84 return -EPERM;
85 }
86 } 84 }
87 85
88
89 handle = ext4_journal_start(inode, 1); 86 handle = ext4_journal_start(inode, 1);
90 if (IS_ERR(handle)) { 87 if (IS_ERR(handle)) {
91 mutex_unlock(&inode->i_mutex); 88 err = PTR_ERR(handle);
92 return PTR_ERR(handle); 89 goto flags_out;
93 } 90 }
94 if (IS_SYNC(inode)) 91 if (IS_SYNC(inode))
95 handle->h_sync = 1; 92 handle->h_sync = 1;
@@ -107,14 +104,14 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
107 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 104 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
108flags_err: 105flags_err:
109 ext4_journal_stop(handle); 106 ext4_journal_stop(handle);
110 if (err) { 107 if (err)
111 mutex_unlock(&inode->i_mutex); 108 goto flags_out;
112 return err;
113 }
114 109
115 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) 110 if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
116 err = ext4_change_inode_journal_flag(inode, jflag); 111 err = ext4_change_inode_journal_flag(inode, jflag);
112flags_out:
117 mutex_unlock(&inode->i_mutex); 113 mutex_unlock(&inode->i_mutex);
114 mnt_drop_write(filp->f_path.mnt);
118 return err; 115 return err;
119 } 116 }
120 case EXT4_IOC_GETVERSION: 117 case EXT4_IOC_GETVERSION:
@@ -129,14 +126,20 @@ flags_err:
129 126
130 if (!is_owner_or_cap(inode)) 127 if (!is_owner_or_cap(inode))
131 return -EPERM; 128 return -EPERM;
132 if (IS_RDONLY(inode)) 129
133 return -EROFS; 130 err = mnt_want_write(filp->f_path.mnt);
134 if (get_user(generation, (int __user *) arg)) 131 if (err)
135 return -EFAULT; 132 return err;
133 if (get_user(generation, (int __user *) arg)) {
134 err = -EFAULT;
135 goto setversion_out;
136 }
136 137
137 handle = ext4_journal_start(inode, 1); 138 handle = ext4_journal_start(inode, 1);
138 if (IS_ERR(handle)) 139 if (IS_ERR(handle)) {
139 return PTR_ERR(handle); 140 err = PTR_ERR(handle);
141 goto setversion_out;
142 }
140 err = ext4_reserve_inode_write(handle, inode, &iloc); 143 err = ext4_reserve_inode_write(handle, inode, &iloc);
141 if (err == 0) { 144 if (err == 0) {
142 inode->i_ctime = ext4_current_time(inode); 145 inode->i_ctime = ext4_current_time(inode);
@@ -144,6 +147,8 @@ flags_err:
144 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 147 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
145 } 148 }
146 ext4_journal_stop(handle); 149 ext4_journal_stop(handle);
150setversion_out:
151 mnt_drop_write(filp->f_path.mnt);
147 return err; 152 return err;
148 } 153 }
149#ifdef CONFIG_JBD2_DEBUG 154#ifdef CONFIG_JBD2_DEBUG
@@ -179,19 +184,21 @@ flags_err:
179 } 184 }
180 return -ENOTTY; 185 return -ENOTTY;
181 case EXT4_IOC_SETRSVSZ: { 186 case EXT4_IOC_SETRSVSZ: {
187 int err;
182 188
183 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 189 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
184 return -ENOTTY; 190 return -ENOTTY;
185 191
186 if (IS_RDONLY(inode))
187 return -EROFS;
188
189 if (!is_owner_or_cap(inode)) 192 if (!is_owner_or_cap(inode))
190 return -EACCES; 193 return -EACCES;
191 194
192 if (get_user(rsv_window_size, (int __user *)arg)) 195 if (get_user(rsv_window_size, (int __user *)arg))
193 return -EFAULT; 196 return -EFAULT;
194 197
198 err = mnt_want_write(filp->f_path.mnt);
199 if (err)
200 return err;
201
195 if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) 202 if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS)
196 rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; 203 rsv_window_size = EXT4_MAX_RESERVE_BLOCKS;
197 204
@@ -208,6 +215,7 @@ flags_err:
208 rsv->rsv_goal_size = rsv_window_size; 215 rsv->rsv_goal_size = rsv_window_size;
209 } 216 }
210 up_write(&ei->i_data_sem); 217 up_write(&ei->i_data_sem);
218 mnt_drop_write(filp->f_path.mnt);
211 return 0; 219 return 0;
212 } 220 }
213 case EXT4_IOC_GROUP_EXTEND: { 221 case EXT4_IOC_GROUP_EXTEND: {
@@ -218,16 +226,18 @@ flags_err:
218 if (!capable(CAP_SYS_RESOURCE)) 226 if (!capable(CAP_SYS_RESOURCE))
219 return -EPERM; 227 return -EPERM;
220 228
221 if (IS_RDONLY(inode))
222 return -EROFS;
223
224 if (get_user(n_blocks_count, (__u32 __user *)arg)) 229 if (get_user(n_blocks_count, (__u32 __user *)arg))
225 return -EFAULT; 230 return -EFAULT;
226 231
232 err = mnt_want_write(filp->f_path.mnt);
233 if (err)
234 return err;
235
227 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); 236 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
228 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 237 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
229 jbd2_journal_flush(EXT4_SB(sb)->s_journal); 238 jbd2_journal_flush(EXT4_SB(sb)->s_journal);
230 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 239 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
240 mnt_drop_write(filp->f_path.mnt);
231 241
232 return err; 242 return err;
233 } 243 }
@@ -239,17 +249,19 @@ flags_err:
239 if (!capable(CAP_SYS_RESOURCE)) 249 if (!capable(CAP_SYS_RESOURCE))
240 return -EPERM; 250 return -EPERM;
241 251
242 if (IS_RDONLY(inode))
243 return -EROFS;
244
245 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, 252 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
246 sizeof(input))) 253 sizeof(input)))
247 return -EFAULT; 254 return -EFAULT;
248 255
256 err = mnt_want_write(filp->f_path.mnt);
257 if (err)
258 return err;
259
249 err = ext4_group_add(sb, &input); 260 err = ext4_group_add(sb, &input);
250 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); 261 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
251 jbd2_journal_flush(EXT4_SB(sb)->s_journal); 262 jbd2_journal_flush(EXT4_SB(sb)->s_journal);
252 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 263 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
264 mnt_drop_write(filp->f_path.mnt);
253 265
254 return err; 266 return err;
255 } 267 }
diff --git a/fs/fat/file.c b/fs/fat/file.c
index c614175876e0..2a3bed967041 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/capability.h> 9#include <linux/capability.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/mount.h>
11#include <linux/time.h> 12#include <linux/time.h>
12#include <linux/msdos_fs.h> 13#include <linux/msdos_fs.h>
13#include <linux/smp_lock.h> 14#include <linux/smp_lock.h>
@@ -46,10 +47,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
46 47
47 mutex_lock(&inode->i_mutex); 48 mutex_lock(&inode->i_mutex);
48 49
49 if (IS_RDONLY(inode)) { 50 err = mnt_want_write(filp->f_path.mnt);
50 err = -EROFS; 51 if (err)
51 goto up; 52 goto up_no_drop_write;
52 }
53 53
54 /* 54 /*
55 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also 55 * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
@@ -105,7 +105,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
105 105
106 MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; 106 MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED;
107 mark_inode_dirty(inode); 107 mark_inode_dirty(inode);
108 up: 108up:
109 mnt_drop_write(filp->f_path.mnt);
110up_no_drop_write:
109 mutex_unlock(&inode->i_mutex); 111 mutex_unlock(&inode->i_mutex);
110 return err; 112 return err;
111 } 113 }
diff --git a/fs/file_table.c b/fs/file_table.c
index 986ff4ed0a7c..7a0a9b872251 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -42,6 +42,7 @@ static inline void file_free_rcu(struct rcu_head *head)
42static inline void file_free(struct file *f) 42static inline void file_free(struct file *f)
43{ 43{
44 percpu_counter_dec(&nr_files); 44 percpu_counter_dec(&nr_files);
45 file_check_state(f);
45 call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); 46 call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
46} 47}
47 48
@@ -199,6 +200,18 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
199 file->f_mapping = dentry->d_inode->i_mapping; 200 file->f_mapping = dentry->d_inode->i_mapping;
200 file->f_mode = mode; 201 file->f_mode = mode;
201 file->f_op = fop; 202 file->f_op = fop;
203
204 /*
205 * These mounts don't really matter in practice
206 * for r/o bind mounts. They aren't userspace-
207 * visible. We do this for consistency, and so
208 * that we can do debugging checks at __fput()
209 */
210 if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
211 file_take_write(file);
212 error = mnt_want_write(mnt);
213 WARN_ON(error);
214 }
202 return error; 215 return error;
203} 216}
204EXPORT_SYMBOL(init_file); 217EXPORT_SYMBOL(init_file);
@@ -211,6 +224,31 @@ void fput(struct file *file)
211 224
212EXPORT_SYMBOL(fput); 225EXPORT_SYMBOL(fput);
213 226
227/**
228 * drop_file_write_access - give up ability to write to a file
229 * @file: the file to which we will stop writing
230 *
231 * This is a central place which will give up the ability
232 * to write to @file, along with access to write through
233 * its vfsmount.
234 */
235void drop_file_write_access(struct file *file)
236{
237 struct vfsmount *mnt = file->f_path.mnt;
238 struct dentry *dentry = file->f_path.dentry;
239 struct inode *inode = dentry->d_inode;
240
241 put_write_access(inode);
242
243 if (special_file(inode->i_mode))
244 return;
245 if (file_check_writeable(file) != 0)
246 return;
247 mnt_drop_write(mnt);
248 file_release_write(file);
249}
250EXPORT_SYMBOL_GPL(drop_file_write_access);
251
214/* __fput is called from task context when aio completion releases the last 252/* __fput is called from task context when aio completion releases the last
215 * last use of a struct file *. Do not use otherwise. 253 * last use of a struct file *. Do not use otherwise.
216 */ 254 */
@@ -236,10 +274,10 @@ void __fput(struct file *file)
236 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) 274 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
237 cdev_put(inode->i_cdev); 275 cdev_put(inode->i_cdev);
238 fops_put(file->f_op); 276 fops_put(file->f_op);
239 if (file->f_mode & FMODE_WRITE)
240 put_write_access(inode);
241 put_pid(file->f_owner.pid); 277 put_pid(file->f_owner.pid);
242 file_kill(file); 278 file_kill(file);
279 if (file->f_mode & FMODE_WRITE)
280 drop_file_write_access(file);
243 file->f_path.dentry = NULL; 281 file->f_path.dentry = NULL;
244 file->f_path.mnt = NULL; 282 file->f_path.mnt = NULL;
245 file_free(file); 283 file_free(file);
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index b60c0affbec5..f457d2ca51ab 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -14,6 +14,7 @@
14 14
15#include <linux/capability.h> 15#include <linux/capability.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/mount.h>
17#include <linux/sched.h> 18#include <linux/sched.h>
18#include <linux/xattr.h> 19#include <linux/xattr.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
@@ -35,25 +36,32 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
35 flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ 36 flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */
36 return put_user(flags, (int __user *)arg); 37 return put_user(flags, (int __user *)arg);
37 case HFSPLUS_IOC_EXT2_SETFLAGS: { 38 case HFSPLUS_IOC_EXT2_SETFLAGS: {
38 if (IS_RDONLY(inode)) 39 int err = 0;
39 return -EROFS; 40 err = mnt_want_write(filp->f_path.mnt);
40 41 if (err)
41 if (!is_owner_or_cap(inode)) 42 return err;
42 return -EACCES; 43
43 44 if (!is_owner_or_cap(inode)) {
44 if (get_user(flags, (int __user *)arg)) 45 err = -EACCES;
45 return -EFAULT; 46 goto setflags_out;
46 47 }
48 if (get_user(flags, (int __user *)arg)) {
49 err = -EFAULT;
50 goto setflags_out;
51 }
47 if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) || 52 if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) ||
48 HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { 53 HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) {
49 if (!capable(CAP_LINUX_IMMUTABLE)) 54 if (!capable(CAP_LINUX_IMMUTABLE)) {
50 return -EPERM; 55 err = -EPERM;
56 goto setflags_out;
57 }
51 } 58 }
52 59
53 /* don't silently ignore unsupported ext2 flags */ 60 /* don't silently ignore unsupported ext2 flags */
54 if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) 61 if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) {
55 return -EOPNOTSUPP; 62 err = -EOPNOTSUPP;
56 63 goto setflags_out;
64 }
57 if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */ 65 if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */
58 inode->i_flags |= S_IMMUTABLE; 66 inode->i_flags |= S_IMMUTABLE;
59 HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; 67 HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE;
@@ -75,7 +83,9 @@ int hfsplus_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
75 83
76 inode->i_ctime = CURRENT_TIME_SEC; 84 inode->i_ctime = CURRENT_TIME_SEC;
77 mark_inode_dirty(inode); 85 mark_inode_dirty(inode);
78 return 0; 86setflags_out:
87 mnt_drop_write(filp->f_path.mnt);
88 return err;
79 } 89 }
80 default: 90 default:
81 return -ENOTTY; 91 return -ENOTTY;
diff --git a/fs/inode.c b/fs/inode.c
index 53245ffcf93d..27ee1af50d02 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1199,42 +1199,37 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
1199 struct inode *inode = dentry->d_inode; 1199 struct inode *inode = dentry->d_inode;
1200 struct timespec now; 1200 struct timespec now;
1201 1201
1202 if (inode->i_flags & S_NOATIME) 1202 if (mnt_want_write(mnt))
1203 return; 1203 return;
1204 if (inode->i_flags & S_NOATIME)
1205 goto out;
1204 if (IS_NOATIME(inode)) 1206 if (IS_NOATIME(inode))
1205 return; 1207 goto out;
1206 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) 1208 if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
1207 return; 1209 goto out;
1208 1210
1209 /* 1211 if (mnt->mnt_flags & MNT_NOATIME)
1210 * We may have a NULL vfsmount when coming from NFSD 1212 goto out;
1211 */ 1213 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
1212 if (mnt) { 1214 goto out;
1213 if (mnt->mnt_flags & MNT_NOATIME) 1215 if (mnt->mnt_flags & MNT_RELATIME) {
1214 return; 1216 /*
1215 if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) 1217 * With relative atime, only update atime if the previous
1216 return; 1218 * atime is earlier than either the ctime or mtime.
1217 1219 */
1218 if (mnt->mnt_flags & MNT_RELATIME) { 1220 if (timespec_compare(&inode->i_mtime, &inode->i_atime) < 0 &&
1219 /* 1221 timespec_compare(&inode->i_ctime, &inode->i_atime) < 0)
1220 * With relative atime, only update atime if the 1222 goto out;
1221 * previous atime is earlier than either the ctime or
1222 * mtime.
1223 */
1224 if (timespec_compare(&inode->i_mtime,
1225 &inode->i_atime) < 0 &&
1226 timespec_compare(&inode->i_ctime,
1227 &inode->i_atime) < 0)
1228 return;
1229 }
1230 } 1223 }
1231 1224
1232 now = current_fs_time(inode->i_sb); 1225 now = current_fs_time(inode->i_sb);
1233 if (timespec_equal(&inode->i_atime, &now)) 1226 if (timespec_equal(&inode->i_atime, &now))
1234 return; 1227 goto out;
1235 1228
1236 inode->i_atime = now; 1229 inode->i_atime = now;
1237 mark_inode_dirty_sync(inode); 1230 mark_inode_dirty_sync(inode);
1231out:
1232 mnt_drop_write(mnt);
1238} 1233}
1239EXPORT_SYMBOL(touch_atime); 1234EXPORT_SYMBOL(touch_atime);
1240 1235
@@ -1255,10 +1250,13 @@ void file_update_time(struct file *file)
1255 struct inode *inode = file->f_path.dentry->d_inode; 1250 struct inode *inode = file->f_path.dentry->d_inode;
1256 struct timespec now; 1251 struct timespec now;
1257 int sync_it = 0; 1252 int sync_it = 0;
1253 int err;
1258 1254
1259 if (IS_NOCMTIME(inode)) 1255 if (IS_NOCMTIME(inode))
1260 return; 1256 return;
1261 if (IS_RDONLY(inode)) 1257
1258 err = mnt_want_write(file->f_path.mnt);
1259 if (err)
1262 return; 1260 return;
1263 1261
1264 now = current_fs_time(inode->i_sb); 1262 now = current_fs_time(inode->i_sb);
@@ -1279,6 +1277,7 @@ void file_update_time(struct file *file)
1279 1277
1280 if (sync_it) 1278 if (sync_it)
1281 mark_inode_dirty_sync(inode); 1279 mark_inode_dirty_sync(inode);
1280 mnt_drop_write(file->f_path.mnt);
1282} 1281}
1283 1282
1284EXPORT_SYMBOL(file_update_time); 1283EXPORT_SYMBOL(file_update_time);
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index a1f8e375ad21..afe222bf300f 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -8,6 +8,7 @@
8#include <linux/fs.h> 8#include <linux/fs.h>
9#include <linux/ctype.h> 9#include <linux/ctype.h>
10#include <linux/capability.h> 10#include <linux/capability.h>
11#include <linux/mount.h>
11#include <linux/time.h> 12#include <linux/time.h>
12#include <linux/sched.h> 13#include <linux/sched.h>
13#include <asm/current.h> 14#include <asm/current.h>
@@ -65,23 +66,30 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
65 return put_user(flags, (int __user *) arg); 66 return put_user(flags, (int __user *) arg);
66 case JFS_IOC_SETFLAGS: { 67 case JFS_IOC_SETFLAGS: {
67 unsigned int oldflags; 68 unsigned int oldflags;
69 int err;
68 70
69 if (IS_RDONLY(inode)) 71 err = mnt_want_write(filp->f_path.mnt);
70 return -EROFS; 72 if (err)
73 return err;
71 74
72 if (!is_owner_or_cap(inode)) 75 if (!is_owner_or_cap(inode)) {
73 return -EACCES; 76 err = -EACCES;
74 77 goto setflags_out;
75 if (get_user(flags, (int __user *) arg)) 78 }
76 return -EFAULT; 79 if (get_user(flags, (int __user *) arg)) {
80 err = -EFAULT;
81 goto setflags_out;
82 }
77 83
78 flags = jfs_map_ext2(flags, 1); 84 flags = jfs_map_ext2(flags, 1);
79 if (!S_ISDIR(inode->i_mode)) 85 if (!S_ISDIR(inode->i_mode))
80 flags &= ~JFS_DIRSYNC_FL; 86 flags &= ~JFS_DIRSYNC_FL;
81 87
82 /* Is it quota file? Do not allow user to mess with it */ 88 /* Is it quota file? Do not allow user to mess with it */
83 if (IS_NOQUOTA(inode)) 89 if (IS_NOQUOTA(inode)) {
84 return -EPERM; 90 err = -EPERM;
91 goto setflags_out;
92 }
85 93
86 /* Lock against other parallel changes of flags */ 94 /* Lock against other parallel changes of flags */
87 mutex_lock(&inode->i_mutex); 95 mutex_lock(&inode->i_mutex);
@@ -98,7 +106,8 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
98 (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { 106 (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
99 if (!capable(CAP_LINUX_IMMUTABLE)) { 107 if (!capable(CAP_LINUX_IMMUTABLE)) {
100 mutex_unlock(&inode->i_mutex); 108 mutex_unlock(&inode->i_mutex);
101 return -EPERM; 109 err = -EPERM;
110 goto setflags_out;
102 } 111 }
103 } 112 }
104 113
@@ -110,7 +119,9 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
110 mutex_unlock(&inode->i_mutex); 119 mutex_unlock(&inode->i_mutex);
111 inode->i_ctime = CURRENT_TIME_SEC; 120 inode->i_ctime = CURRENT_TIME_SEC;
112 mark_inode_dirty(inode); 121 mark_inode_dirty(inode);
113 return 0; 122setflags_out:
123 mnt_drop_write(filp->f_path.mnt);
124 return err;
114 } 125 }
115 default: 126 default:
116 return -ENOTTY; 127 return -ENOTTY;
diff --git a/fs/namei.c b/fs/namei.c
index 8cf9bb9c2fc0..e179f71bfcb0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1623,8 +1623,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1623 return -EACCES; 1623 return -EACCES;
1624 1624
1625 flag &= ~O_TRUNC; 1625 flag &= ~O_TRUNC;
1626 } else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE)) 1626 }
1627 return -EROFS;
1628 1627
1629 error = vfs_permission(nd, acc_mode); 1628 error = vfs_permission(nd, acc_mode);
1630 if (error) 1629 if (error)
@@ -1677,7 +1676,12 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
1677 return 0; 1676 return 0;
1678} 1677}
1679 1678
1680static int open_namei_create(struct nameidata *nd, struct path *path, 1679/*
1680 * Be careful about ever adding any more callers of this
1681 * function. Its flags must be in the namei format, not
1682 * what get passed to sys_open().
1683 */
1684static int __open_namei_create(struct nameidata *nd, struct path *path,
1681 int flag, int mode) 1685 int flag, int mode)
1682{ 1686{
1683 int error; 1687 int error;
@@ -1696,26 +1700,56 @@ static int open_namei_create(struct nameidata *nd, struct path *path,
1696} 1700}
1697 1701
1698/* 1702/*
1699 * open_namei() 1703 * Note that while the flag value (low two bits) for sys_open means:
1704 * 00 - read-only
1705 * 01 - write-only
1706 * 10 - read-write
1707 * 11 - special
1708 * it is changed into
1709 * 00 - no permissions needed
1710 * 01 - read-permission
1711 * 10 - write-permission
1712 * 11 - read-write
1713 * for the internal routines (ie open_namei()/follow_link() etc)
1714 * This is more logical, and also allows the 00 "no perm needed"
1715 * to be used for symlinks (where the permissions are checked
1716 * later).
1700 * 1717 *
1701 * namei for open - this is in fact almost the whole open-routine. 1718*/
1702 * 1719static inline int open_to_namei_flags(int flag)
1703 * Note that the low bits of "flag" aren't the same as in the open 1720{
1704 * system call - they are 00 - no permissions needed 1721 if ((flag+1) & O_ACCMODE)
1705 * 01 - read permission needed 1722 flag++;
1706 * 10 - write permission needed 1723 return flag;
1707 * 11 - read/write permissions needed 1724}
1708 * which is a lot more logical, and also allows the "no perm" needed 1725
1709 * for symlinks (where the permissions are checked later). 1726static int open_will_write_to_fs(int flag, struct inode *inode)
1710 * SMP-safe 1727{
1728 /*
1729 * We'll never write to the fs underlying
1730 * a device file.
1731 */
1732 if (special_file(inode->i_mode))
1733 return 0;
1734 return (flag & O_TRUNC);
1735}
1736
1737/*
1738 * Note that the low bits of the passed in "open_flag"
1739 * are not the same as in the local variable "flag". See
1740 * open_to_namei_flags() for more details.
1711 */ 1741 */
1712int open_namei(int dfd, const char *pathname, int flag, 1742struct file *do_filp_open(int dfd, const char *pathname,
1713 int mode, struct nameidata *nd) 1743 int open_flag, int mode)
1714{ 1744{
1745 struct file *filp;
1746 struct nameidata nd;
1715 int acc_mode, error; 1747 int acc_mode, error;
1716 struct path path; 1748 struct path path;
1717 struct dentry *dir; 1749 struct dentry *dir;
1718 int count = 0; 1750 int count = 0;
1751 int will_write;
1752 int flag = open_to_namei_flags(open_flag);
1719 1753
1720 acc_mode = ACC_MODE(flag); 1754 acc_mode = ACC_MODE(flag);
1721 1755
@@ -1733,18 +1767,19 @@ int open_namei(int dfd, const char *pathname, int flag,
1733 */ 1767 */
1734 if (!(flag & O_CREAT)) { 1768 if (!(flag & O_CREAT)) {
1735 error = path_lookup_open(dfd, pathname, lookup_flags(flag), 1769 error = path_lookup_open(dfd, pathname, lookup_flags(flag),
1736 nd, flag); 1770 &nd, flag);
1737 if (error) 1771 if (error)
1738 return error; 1772 return ERR_PTR(error);
1739 goto ok; 1773 goto ok;
1740 } 1774 }
1741 1775
1742 /* 1776 /*
1743 * Create - we need to know the parent. 1777 * Create - we need to know the parent.
1744 */ 1778 */
1745 error = path_lookup_create(dfd,pathname,LOOKUP_PARENT,nd,flag,mode); 1779 error = path_lookup_create(dfd, pathname, LOOKUP_PARENT,
1780 &nd, flag, mode);
1746 if (error) 1781 if (error)
1747 return error; 1782 return ERR_PTR(error);
1748 1783
1749 /* 1784 /*
1750 * We have the parent and last component. First of all, check 1785 * We have the parent and last component. First of all, check
@@ -1752,14 +1787,14 @@ int open_namei(int dfd, const char *pathname, int flag,
1752 * will not do. 1787 * will not do.
1753 */ 1788 */
1754 error = -EISDIR; 1789 error = -EISDIR;
1755 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) 1790 if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
1756 goto exit; 1791 goto exit;
1757 1792
1758 dir = nd->path.dentry; 1793 dir = nd.path.dentry;
1759 nd->flags &= ~LOOKUP_PARENT; 1794 nd.flags &= ~LOOKUP_PARENT;
1760 mutex_lock(&dir->d_inode->i_mutex); 1795 mutex_lock(&dir->d_inode->i_mutex);
1761 path.dentry = lookup_hash(nd); 1796 path.dentry = lookup_hash(&nd);
1762 path.mnt = nd->path.mnt; 1797 path.mnt = nd.path.mnt;
1763 1798
1764do_last: 1799do_last:
1765 error = PTR_ERR(path.dentry); 1800 error = PTR_ERR(path.dentry);
@@ -1768,18 +1803,31 @@ do_last:
1768 goto exit; 1803 goto exit;
1769 } 1804 }
1770 1805
1771 if (IS_ERR(nd->intent.open.file)) { 1806 if (IS_ERR(nd.intent.open.file)) {
1772 mutex_unlock(&dir->d_inode->i_mutex); 1807 error = PTR_ERR(nd.intent.open.file);
1773 error = PTR_ERR(nd->intent.open.file); 1808 goto exit_mutex_unlock;
1774 goto exit_dput;
1775 } 1809 }
1776 1810
1777 /* Negative dentry, just create the file */ 1811 /* Negative dentry, just create the file */
1778 if (!path.dentry->d_inode) { 1812 if (!path.dentry->d_inode) {
1779 error = open_namei_create(nd, &path, flag, mode); 1813 /*
1814 * This write is needed to ensure that a
1815 * ro->rw transition does not occur between
1816 * the time when the file is created and when
1817 * a permanent write count is taken through
1818 * the 'struct file' in nameidata_to_filp().
1819 */
1820 error = mnt_want_write(nd.path.mnt);
1780 if (error) 1821 if (error)
1822 goto exit_mutex_unlock;
1823 error = __open_namei_create(&nd, &path, flag, mode);
1824 if (error) {
1825 mnt_drop_write(nd.path.mnt);
1781 goto exit; 1826 goto exit;
1782 return 0; 1827 }
1828 filp = nameidata_to_filp(&nd, open_flag);
1829 mnt_drop_write(nd.path.mnt);
1830 return filp;
1783 } 1831 }
1784 1832
1785 /* 1833 /*
@@ -1804,23 +1852,52 @@ do_last:
1804 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) 1852 if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link)
1805 goto do_link; 1853 goto do_link;
1806 1854
1807 path_to_nameidata(&path, nd); 1855 path_to_nameidata(&path, &nd);
1808 error = -EISDIR; 1856 error = -EISDIR;
1809 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode)) 1857 if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
1810 goto exit; 1858 goto exit;
1811ok: 1859ok:
1812 error = may_open(nd, acc_mode, flag); 1860 /*
1813 if (error) 1861 * Consider:
1862 * 1. may_open() truncates a file
1863 * 2. a rw->ro mount transition occurs
1864 * 3. nameidata_to_filp() fails due to
1865 * the ro mount.
1866 * That would be inconsistent, and should
1867 * be avoided. Taking this mnt write here
1868 * ensures that (2) can not occur.
1869 */
1870 will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
1871 if (will_write) {
1872 error = mnt_want_write(nd.path.mnt);
1873 if (error)
1874 goto exit;
1875 }
1876 error = may_open(&nd, acc_mode, flag);
1877 if (error) {
1878 if (will_write)
1879 mnt_drop_write(nd.path.mnt);
1814 goto exit; 1880 goto exit;
1815 return 0; 1881 }
1882 filp = nameidata_to_filp(&nd, open_flag);
1883 /*
1884 * It is now safe to drop the mnt write
1885 * because the filp has had a write taken
1886 * on its behalf.
1887 */
1888 if (will_write)
1889 mnt_drop_write(nd.path.mnt);
1890 return filp;
1816 1891
1892exit_mutex_unlock:
1893 mutex_unlock(&dir->d_inode->i_mutex);
1817exit_dput: 1894exit_dput:
1818 path_put_conditional(&path, nd); 1895 path_put_conditional(&path, &nd);
1819exit: 1896exit:
1820 if (!IS_ERR(nd->intent.open.file)) 1897 if (!IS_ERR(nd.intent.open.file))
1821 release_open_intent(nd); 1898 release_open_intent(&nd);
1822 path_put(&nd->path); 1899 path_put(&nd.path);
1823 return error; 1900 return ERR_PTR(error);
1824 1901
1825do_link: 1902do_link:
1826 error = -ELOOP; 1903 error = -ELOOP;
@@ -1836,43 +1913,60 @@ do_link:
1836 * stored in nd->last.name and we will have to putname() it when we 1913 * stored in nd->last.name and we will have to putname() it when we
1837 * are done. Procfs-like symlinks just set LAST_BIND. 1914 * are done. Procfs-like symlinks just set LAST_BIND.
1838 */ 1915 */
1839 nd->flags |= LOOKUP_PARENT; 1916 nd.flags |= LOOKUP_PARENT;
1840 error = security_inode_follow_link(path.dentry, nd); 1917 error = security_inode_follow_link(path.dentry, &nd);
1841 if (error) 1918 if (error)
1842 goto exit_dput; 1919 goto exit_dput;
1843 error = __do_follow_link(&path, nd); 1920 error = __do_follow_link(&path, &nd);
1844 if (error) { 1921 if (error) {
1845 /* Does someone understand code flow here? Or it is only 1922 /* Does someone understand code flow here? Or it is only
1846 * me so stupid? Anathema to whoever designed this non-sense 1923 * me so stupid? Anathema to whoever designed this non-sense
1847 * with "intent.open". 1924 * with "intent.open".
1848 */ 1925 */
1849 release_open_intent(nd); 1926 release_open_intent(&nd);
1850 return error; 1927 return ERR_PTR(error);
1851 } 1928 }
1852 nd->flags &= ~LOOKUP_PARENT; 1929 nd.flags &= ~LOOKUP_PARENT;
1853 if (nd->last_type == LAST_BIND) 1930 if (nd.last_type == LAST_BIND)
1854 goto ok; 1931 goto ok;
1855 error = -EISDIR; 1932 error = -EISDIR;
1856 if (nd->last_type != LAST_NORM) 1933 if (nd.last_type != LAST_NORM)
1857 goto exit; 1934 goto exit;
1858 if (nd->last.name[nd->last.len]) { 1935 if (nd.last.name[nd.last.len]) {
1859 __putname(nd->last.name); 1936 __putname(nd.last.name);
1860 goto exit; 1937 goto exit;
1861 } 1938 }
1862 error = -ELOOP; 1939 error = -ELOOP;
1863 if (count++==32) { 1940 if (count++==32) {
1864 __putname(nd->last.name); 1941 __putname(nd.last.name);
1865 goto exit; 1942 goto exit;
1866 } 1943 }
1867 dir = nd->path.dentry; 1944 dir = nd.path.dentry;
1868 mutex_lock(&dir->d_inode->i_mutex); 1945 mutex_lock(&dir->d_inode->i_mutex);
1869 path.dentry = lookup_hash(nd); 1946 path.dentry = lookup_hash(&nd);
1870 path.mnt = nd->path.mnt; 1947 path.mnt = nd.path.mnt;
1871 __putname(nd->last.name); 1948 __putname(nd.last.name);
1872 goto do_last; 1949 goto do_last;
1873} 1950}
1874 1951
1875/** 1952/**
1953 * filp_open - open file and return file pointer
1954 *
1955 * @filename: path to open
1956 * @flags: open flags as per the open(2) second argument
1957 * @mode: mode for the new file if O_CREAT is set, else ignored
1958 *
1959 * This is the helper to open a file from kernelspace if you really
1960 * have to. But in generally you should not do this, so please move
1961 * along, nothing to see here..
1962 */
1963struct file *filp_open(const char *filename, int flags, int mode)
1964{
1965 return do_filp_open(AT_FDCWD, filename, flags, mode);
1966}
1967EXPORT_SYMBOL(filp_open);
1968
1969/**
1876 * lookup_create - lookup a dentry, creating it if it doesn't exist 1970 * lookup_create - lookup a dentry, creating it if it doesn't exist
1877 * @nd: nameidata info 1971 * @nd: nameidata info
1878 * @is_dir: directory flag 1972 * @is_dir: directory flag
@@ -1945,6 +2039,23 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1945 return error; 2039 return error;
1946} 2040}
1947 2041
2042static int may_mknod(mode_t mode)
2043{
2044 switch (mode & S_IFMT) {
2045 case S_IFREG:
2046 case S_IFCHR:
2047 case S_IFBLK:
2048 case S_IFIFO:
2049 case S_IFSOCK:
2050 case 0: /* zero mode translates to S_IFREG */
2051 return 0;
2052 case S_IFDIR:
2053 return -EPERM;
2054 default:
2055 return -EINVAL;
2056 }
2057}
2058
1948asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, 2059asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
1949 unsigned dev) 2060 unsigned dev)
1950{ 2061{
@@ -1963,12 +2074,19 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
1963 if (error) 2074 if (error)
1964 goto out; 2075 goto out;
1965 dentry = lookup_create(&nd, 0); 2076 dentry = lookup_create(&nd, 0);
1966 error = PTR_ERR(dentry); 2077 if (IS_ERR(dentry)) {
1967 2078 error = PTR_ERR(dentry);
2079 goto out_unlock;
2080 }
1968 if (!IS_POSIXACL(nd.path.dentry->d_inode)) 2081 if (!IS_POSIXACL(nd.path.dentry->d_inode))
1969 mode &= ~current->fs->umask; 2082 mode &= ~current->fs->umask;
1970 if (!IS_ERR(dentry)) { 2083 error = may_mknod(mode);
1971 switch (mode & S_IFMT) { 2084 if (error)
2085 goto out_dput;
2086 error = mnt_want_write(nd.path.mnt);
2087 if (error)
2088 goto out_dput;
2089 switch (mode & S_IFMT) {
1972 case 0: case S_IFREG: 2090 case 0: case S_IFREG:
1973 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); 2091 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd);
1974 break; 2092 break;
@@ -1979,14 +2097,11 @@ asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
1979 case S_IFIFO: case S_IFSOCK: 2097 case S_IFIFO: case S_IFSOCK:
1980 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); 2098 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
1981 break; 2099 break;
1982 case S_IFDIR:
1983 error = -EPERM;
1984 break;
1985 default:
1986 error = -EINVAL;
1987 }
1988 dput(dentry);
1989 } 2100 }
2101 mnt_drop_write(nd.path.mnt);
2102out_dput:
2103 dput(dentry);
2104out_unlock:
1990 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2105 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
1991 path_put(&nd.path); 2106 path_put(&nd.path);
1992out: 2107out:
@@ -2044,7 +2159,12 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
2044 2159
2045 if (!IS_POSIXACL(nd.path.dentry->d_inode)) 2160 if (!IS_POSIXACL(nd.path.dentry->d_inode))
2046 mode &= ~current->fs->umask; 2161 mode &= ~current->fs->umask;
2162 error = mnt_want_write(nd.path.mnt);
2163 if (error)
2164 goto out_dput;
2047 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); 2165 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
2166 mnt_drop_write(nd.path.mnt);
2167out_dput:
2048 dput(dentry); 2168 dput(dentry);
2049out_unlock: 2169out_unlock:
2050 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2170 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2151,7 +2271,12 @@ static long do_rmdir(int dfd, const char __user *pathname)
2151 error = PTR_ERR(dentry); 2271 error = PTR_ERR(dentry);
2152 if (IS_ERR(dentry)) 2272 if (IS_ERR(dentry))
2153 goto exit2; 2273 goto exit2;
2274 error = mnt_want_write(nd.path.mnt);
2275 if (error)
2276 goto exit3;
2154 error = vfs_rmdir(nd.path.dentry->d_inode, dentry); 2277 error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
2278 mnt_drop_write(nd.path.mnt);
2279exit3:
2155 dput(dentry); 2280 dput(dentry);
2156exit2: 2281exit2:
2157 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2282 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2232,7 +2357,11 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2232 inode = dentry->d_inode; 2357 inode = dentry->d_inode;
2233 if (inode) 2358 if (inode)
2234 atomic_inc(&inode->i_count); 2359 atomic_inc(&inode->i_count);
2360 error = mnt_want_write(nd.path.mnt);
2361 if (error)
2362 goto exit2;
2235 error = vfs_unlink(nd.path.dentry->d_inode, dentry); 2363 error = vfs_unlink(nd.path.dentry->d_inode, dentry);
2364 mnt_drop_write(nd.path.mnt);
2236 exit2: 2365 exit2:
2237 dput(dentry); 2366 dput(dentry);
2238 } 2367 }
@@ -2313,7 +2442,12 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
2313 if (IS_ERR(dentry)) 2442 if (IS_ERR(dentry))
2314 goto out_unlock; 2443 goto out_unlock;
2315 2444
2445 error = mnt_want_write(nd.path.mnt);
2446 if (error)
2447 goto out_dput;
2316 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO); 2448 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO);
2449 mnt_drop_write(nd.path.mnt);
2450out_dput:
2317 dput(dentry); 2451 dput(dentry);
2318out_unlock: 2452out_unlock:
2319 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2453 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2408,7 +2542,12 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
2408 error = PTR_ERR(new_dentry); 2542 error = PTR_ERR(new_dentry);
2409 if (IS_ERR(new_dentry)) 2543 if (IS_ERR(new_dentry))
2410 goto out_unlock; 2544 goto out_unlock;
2545 error = mnt_want_write(nd.path.mnt);
2546 if (error)
2547 goto out_dput;
2411 error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry); 2548 error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry);
2549 mnt_drop_write(nd.path.mnt);
2550out_dput:
2412 dput(new_dentry); 2551 dput(new_dentry);
2413out_unlock: 2552out_unlock:
2414 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2553 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
@@ -2634,8 +2773,12 @@ static int do_rename(int olddfd, const char *oldname,
2634 if (new_dentry == trap) 2773 if (new_dentry == trap)
2635 goto exit5; 2774 goto exit5;
2636 2775
2776 error = mnt_want_write(oldnd.path.mnt);
2777 if (error)
2778 goto exit5;
2637 error = vfs_rename(old_dir->d_inode, old_dentry, 2779 error = vfs_rename(old_dir->d_inode, old_dentry,
2638 new_dir->d_inode, new_dentry); 2780 new_dir->d_inode, new_dentry);
2781 mnt_drop_write(oldnd.path.mnt);
2639exit5: 2782exit5:
2640 dput(new_dentry); 2783 dput(new_dentry);
2641exit4: 2784exit4:
diff --git a/fs/namespace.c b/fs/namespace.c
index 94f026ec990a..678f7ce060f2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -17,6 +17,7 @@
17#include <linux/quotaops.h> 17#include <linux/quotaops.h>
18#include <linux/acct.h> 18#include <linux/acct.h>
19#include <linux/capability.h> 19#include <linux/capability.h>
20#include <linux/cpumask.h>
20#include <linux/module.h> 21#include <linux/module.h>
21#include <linux/sysfs.h> 22#include <linux/sysfs.h>
22#include <linux/seq_file.h> 23#include <linux/seq_file.h>
@@ -55,6 +56,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
55 return tmp & (HASH_SIZE - 1); 56 return tmp & (HASH_SIZE - 1);
56} 57}
57 58
59#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
60
58struct vfsmount *alloc_vfsmnt(const char *name) 61struct vfsmount *alloc_vfsmnt(const char *name)
59{ 62{
60 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 63 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -68,6 +71,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
68 INIT_LIST_HEAD(&mnt->mnt_share); 71 INIT_LIST_HEAD(&mnt->mnt_share);
69 INIT_LIST_HEAD(&mnt->mnt_slave_list); 72 INIT_LIST_HEAD(&mnt->mnt_slave_list);
70 INIT_LIST_HEAD(&mnt->mnt_slave); 73 INIT_LIST_HEAD(&mnt->mnt_slave);
74 atomic_set(&mnt->__mnt_writers, 0);
71 if (name) { 75 if (name) {
72 int size = strlen(name) + 1; 76 int size = strlen(name) + 1;
73 char *newname = kmalloc(size, GFP_KERNEL); 77 char *newname = kmalloc(size, GFP_KERNEL);
@@ -80,6 +84,263 @@ struct vfsmount *alloc_vfsmnt(const char *name)
80 return mnt; 84 return mnt;
81} 85}
82 86
87/*
88 * Most r/o checks on a fs are for operations that take
89 * discrete amounts of time, like a write() or unlink().
90 * We must keep track of when those operations start
91 * (for permission checks) and when they end, so that
92 * we can determine when writes are able to occur to
93 * a filesystem.
94 */
95/*
96 * __mnt_is_readonly: check whether a mount is read-only
97 * @mnt: the mount to check for its write status
98 *
99 * This shouldn't be used directly ouside of the VFS.
100 * It does not guarantee that the filesystem will stay
101 * r/w, just that it is right *now*. This can not and
102 * should not be used in place of IS_RDONLY(inode).
103 * mnt_want/drop_write() will _keep_ the filesystem
104 * r/w.
105 */
106int __mnt_is_readonly(struct vfsmount *mnt)
107{
108 if (mnt->mnt_flags & MNT_READONLY)
109 return 1;
110 if (mnt->mnt_sb->s_flags & MS_RDONLY)
111 return 1;
112 return 0;
113}
114EXPORT_SYMBOL_GPL(__mnt_is_readonly);
115
116struct mnt_writer {
117 /*
118 * If holding multiple instances of this lock, they
119 * must be ordered by cpu number.
120 */
121 spinlock_t lock;
122 struct lock_class_key lock_class; /* compiles out with !lockdep */
123 unsigned long count;
124 struct vfsmount *mnt;
125} ____cacheline_aligned_in_smp;
126static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
127
128static int __init init_mnt_writers(void)
129{
130 int cpu;
131 for_each_possible_cpu(cpu) {
132 struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
133 spin_lock_init(&writer->lock);
134 lockdep_set_class(&writer->lock, &writer->lock_class);
135 writer->count = 0;
136 }
137 return 0;
138}
139fs_initcall(init_mnt_writers);
140
141static void unlock_mnt_writers(void)
142{
143 int cpu;
144 struct mnt_writer *cpu_writer;
145
146 for_each_possible_cpu(cpu) {
147 cpu_writer = &per_cpu(mnt_writers, cpu);
148 spin_unlock(&cpu_writer->lock);
149 }
150}
151
152static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
153{
154 if (!cpu_writer->mnt)
155 return;
156 /*
157 * This is in case anyone ever leaves an invalid,
158 * old ->mnt and a count of 0.
159 */
160 if (!cpu_writer->count)
161 return;
162 atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
163 cpu_writer->count = 0;
164}
165 /*
166 * must hold cpu_writer->lock
167 */
168static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
169 struct vfsmount *mnt)
170{
171 if (cpu_writer->mnt == mnt)
172 return;
173 __clear_mnt_count(cpu_writer);
174 cpu_writer->mnt = mnt;
175}
176
177/*
178 * Most r/o checks on a fs are for operations that take
179 * discrete amounts of time, like a write() or unlink().
180 * We must keep track of when those operations start
181 * (for permission checks) and when they end, so that
182 * we can determine when writes are able to occur to
183 * a filesystem.
184 */
185/**
186 * mnt_want_write - get write access to a mount
187 * @mnt: the mount on which to take a write
188 *
189 * This tells the low-level filesystem that a write is
190 * about to be performed to it, and makes sure that
191 * writes are allowed before returning success. When
192 * the write operation is finished, mnt_drop_write()
193 * must be called. This is effectively a refcount.
194 */
195int mnt_want_write(struct vfsmount *mnt)
196{
197 int ret = 0;
198 struct mnt_writer *cpu_writer;
199
200 cpu_writer = &get_cpu_var(mnt_writers);
201 spin_lock(&cpu_writer->lock);
202 if (__mnt_is_readonly(mnt)) {
203 ret = -EROFS;
204 goto out;
205 }
206 use_cpu_writer_for_mount(cpu_writer, mnt);
207 cpu_writer->count++;
208out:
209 spin_unlock(&cpu_writer->lock);
210 put_cpu_var(mnt_writers);
211 return ret;
212}
213EXPORT_SYMBOL_GPL(mnt_want_write);
214
215static void lock_mnt_writers(void)
216{
217 int cpu;
218 struct mnt_writer *cpu_writer;
219
220 for_each_possible_cpu(cpu) {
221 cpu_writer = &per_cpu(mnt_writers, cpu);
222 spin_lock(&cpu_writer->lock);
223 __clear_mnt_count(cpu_writer);
224 cpu_writer->mnt = NULL;
225 }
226}
227
228/*
229 * These per-cpu write counts are not guaranteed to have
230 * matched increments and decrements on any given cpu.
231 * A file open()ed for write on one cpu and close()d on
232 * another cpu will imbalance this count. Make sure it
233 * does not get too far out of whack.
234 */
235static void handle_write_count_underflow(struct vfsmount *mnt)
236{
237 if (atomic_read(&mnt->__mnt_writers) >=
238 MNT_WRITER_UNDERFLOW_LIMIT)
239 return;
240 /*
241 * It isn't necessary to hold all of the locks
242 * at the same time, but doing it this way makes
243 * us share a lot more code.
244 */
245 lock_mnt_writers();
246 /*
247 * vfsmount_lock is for mnt_flags.
248 */
249 spin_lock(&vfsmount_lock);
250 /*
251 * If coalescing the per-cpu writer counts did not
252 * get us back to a positive writer count, we have
253 * a bug.
254 */
255 if ((atomic_read(&mnt->__mnt_writers) < 0) &&
256 !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
257 printk(KERN_DEBUG "leak detected on mount(%p) writers "
258 "count: %d\n",
259 mnt, atomic_read(&mnt->__mnt_writers));
260 WARN_ON(1);
261 /* use the flag to keep the dmesg spam down */
262 mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
263 }
264 spin_unlock(&vfsmount_lock);
265 unlock_mnt_writers();
266}
267
268/**
269 * mnt_drop_write - give up write access to a mount
270 * @mnt: the mount on which to give up write access
271 *
272 * Tells the low-level filesystem that we are done
273 * performing writes to it. Must be matched with
274 * mnt_want_write() call above.
275 */
276void mnt_drop_write(struct vfsmount *mnt)
277{
278 int must_check_underflow = 0;
279 struct mnt_writer *cpu_writer;
280
281 cpu_writer = &get_cpu_var(mnt_writers);
282 spin_lock(&cpu_writer->lock);
283
284 use_cpu_writer_for_mount(cpu_writer, mnt);
285 if (cpu_writer->count > 0) {
286 cpu_writer->count--;
287 } else {
288 must_check_underflow = 1;
289 atomic_dec(&mnt->__mnt_writers);
290 }
291
292 spin_unlock(&cpu_writer->lock);
293 /*
294 * Logically, we could call this each time,
295 * but the __mnt_writers cacheline tends to
296 * be cold, and makes this expensive.
297 */
298 if (must_check_underflow)
299 handle_write_count_underflow(mnt);
300 /*
301 * This could be done right after the spinlock
302 * is taken because the spinlock keeps us on
303 * the cpu, and disables preemption. However,
304 * putting it here bounds the amount that
305 * __mnt_writers can underflow. Without it,
306 * we could theoretically wrap __mnt_writers.
307 */
308 put_cpu_var(mnt_writers);
309}
310EXPORT_SYMBOL_GPL(mnt_drop_write);
311
312static int mnt_make_readonly(struct vfsmount *mnt)
313{
314 int ret = 0;
315
316 lock_mnt_writers();
317 /*
318 * With all the locks held, this value is stable
319 */
320 if (atomic_read(&mnt->__mnt_writers) > 0) {
321 ret = -EBUSY;
322 goto out;
323 }
324 /*
325 * nobody can do a successful mnt_want_write() with all
326 * of the counts in MNT_DENIED_WRITE and the locks held.
327 */
328 spin_lock(&vfsmount_lock);
329 if (!ret)
330 mnt->mnt_flags |= MNT_READONLY;
331 spin_unlock(&vfsmount_lock);
332out:
333 unlock_mnt_writers();
334 return ret;
335}
336
337static void __mnt_unmake_readonly(struct vfsmount *mnt)
338{
339 spin_lock(&vfsmount_lock);
340 mnt->mnt_flags &= ~MNT_READONLY;
341 spin_unlock(&vfsmount_lock);
342}
343
83int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) 344int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
84{ 345{
85 mnt->mnt_sb = sb; 346 mnt->mnt_sb = sb;
@@ -271,7 +532,36 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
271 532
272static inline void __mntput(struct vfsmount *mnt) 533static inline void __mntput(struct vfsmount *mnt)
273{ 534{
535 int cpu;
274 struct super_block *sb = mnt->mnt_sb; 536 struct super_block *sb = mnt->mnt_sb;
537 /*
538 * We don't have to hold all of the locks at the
539 * same time here because we know that we're the
540 * last reference to mnt and that no new writers
541 * can come in.
542 */
543 for_each_possible_cpu(cpu) {
544 struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
545 if (cpu_writer->mnt != mnt)
546 continue;
547 spin_lock(&cpu_writer->lock);
548 atomic_add(cpu_writer->count, &mnt->__mnt_writers);
549 cpu_writer->count = 0;
550 /*
551 * Might as well do this so that no one
552 * ever sees the pointer and expects
553 * it to be valid.
554 */
555 cpu_writer->mnt = NULL;
556 spin_unlock(&cpu_writer->lock);
557 }
558 /*
559 * This probably indicates that somebody messed
560 * up a mnt_want/drop_write() pair. If this
561 * happens, the filesystem was probably unable
562 * to make r/w->r/o transitions.
563 */
564 WARN_ON(atomic_read(&mnt->__mnt_writers));
275 dput(mnt->mnt_root); 565 dput(mnt->mnt_root);
276 free_vfsmnt(mnt); 566 free_vfsmnt(mnt);
277 deactivate_super(sb); 567 deactivate_super(sb);
@@ -417,7 +707,7 @@ static int show_vfsmnt(struct seq_file *m, void *v)
417 seq_putc(m, '.'); 707 seq_putc(m, '.');
418 mangle(m, mnt->mnt_sb->s_subtype); 708 mangle(m, mnt->mnt_sb->s_subtype);
419 } 709 }
420 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); 710 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
421 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) { 711 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
422 if (mnt->mnt_sb->s_flags & fs_infop->flag) 712 if (mnt->mnt_sb->s_flags & fs_infop->flag)
423 seq_puts(m, fs_infop->str); 713 seq_puts(m, fs_infop->str);
@@ -1019,6 +1309,23 @@ out:
1019 return err; 1309 return err;
1020} 1310}
1021 1311
1312static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1313{
1314 int error = 0;
1315 int readonly_request = 0;
1316
1317 if (ms_flags & MS_RDONLY)
1318 readonly_request = 1;
1319 if (readonly_request == __mnt_is_readonly(mnt))
1320 return 0;
1321
1322 if (readonly_request)
1323 error = mnt_make_readonly(mnt);
1324 else
1325 __mnt_unmake_readonly(mnt);
1326 return error;
1327}
1328
1022/* 1329/*
1023 * change filesystem flags. dir should be a physical root of filesystem. 1330 * change filesystem flags. dir should be a physical root of filesystem.
1024 * If you've mounted a non-root directory somewhere and want to do remount 1331 * If you've mounted a non-root directory somewhere and want to do remount
@@ -1041,7 +1348,10 @@ static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags,
1041 return -EINVAL; 1348 return -EINVAL;
1042 1349
1043 down_write(&sb->s_umount); 1350 down_write(&sb->s_umount);
1044 err = do_remount_sb(sb, flags, data, 0); 1351 if (flags & MS_BIND)
1352 err = change_mount_flags(nd->path.mnt, flags);
1353 else
1354 err = do_remount_sb(sb, flags, data, 0);
1045 if (!err) 1355 if (!err)
1046 nd->path.mnt->mnt_flags = mnt_flags; 1356 nd->path.mnt->mnt_flags = mnt_flags;
1047 up_write(&sb->s_umount); 1357 up_write(&sb->s_umount);
@@ -1425,6 +1735,8 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1425 mnt_flags |= MNT_NODIRATIME; 1735 mnt_flags |= MNT_NODIRATIME;
1426 if (flags & MS_RELATIME) 1736 if (flags & MS_RELATIME)
1427 mnt_flags |= MNT_RELATIME; 1737 mnt_flags |= MNT_RELATIME;
1738 if (flags & MS_RDONLY)
1739 mnt_flags |= MNT_READONLY;
1428 1740
1429 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | 1741 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
1430 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); 1742 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index c67b4bdcf719..ad8f167e54bc 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -14,6 +14,7 @@
14#include <linux/ioctl.h> 14#include <linux/ioctl.h>
15#include <linux/time.h> 15#include <linux/time.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/mount.h>
17#include <linux/highuid.h> 18#include <linux/highuid.h>
18#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
19#include <linux/vmalloc.h> 20#include <linux/vmalloc.h>
@@ -261,7 +262,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
261} 262}
262#endif /* CONFIG_NCPFS_NLS */ 263#endif /* CONFIG_NCPFS_NLS */
263 264
264int ncp_ioctl(struct inode *inode, struct file *filp, 265static int __ncp_ioctl(struct inode *inode, struct file *filp,
265 unsigned int cmd, unsigned long arg) 266 unsigned int cmd, unsigned long arg)
266{ 267{
267 struct ncp_server *server = NCP_SERVER(inode); 268 struct ncp_server *server = NCP_SERVER(inode);
@@ -822,6 +823,57 @@ outrel:
822 return -EINVAL; 823 return -EINVAL;
823} 824}
824 825
826static int ncp_ioctl_need_write(unsigned int cmd)
827{
828 switch (cmd) {
829 case NCP_IOC_GET_FS_INFO:
830 case NCP_IOC_GET_FS_INFO_V2:
831 case NCP_IOC_NCPREQUEST:
832 case NCP_IOC_SETDENTRYTTL:
833 case NCP_IOC_SIGN_INIT:
834 case NCP_IOC_LOCKUNLOCK:
835 case NCP_IOC_SET_SIGN_WANTED:
836 return 1;
837 case NCP_IOC_GETOBJECTNAME:
838 case NCP_IOC_SETOBJECTNAME:
839 case NCP_IOC_GETPRIVATEDATA:
840 case NCP_IOC_SETPRIVATEDATA:
841 case NCP_IOC_SETCHARSETS:
842 case NCP_IOC_GETCHARSETS:
843 case NCP_IOC_CONN_LOGGED_IN:
844 case NCP_IOC_GETDENTRYTTL:
845 case NCP_IOC_GETMOUNTUID2:
846 case NCP_IOC_SIGN_WANTED:
847 case NCP_IOC_GETROOT:
848 case NCP_IOC_SETROOT:
849 return 0;
850 default:
851 /* unkown IOCTL command, assume write */
852 return 1;
853 }
854}
855
856int ncp_ioctl(struct inode *inode, struct file *filp,
857 unsigned int cmd, unsigned long arg)
858{
859 int ret;
860
861 if (ncp_ioctl_need_write(cmd)) {
862 /*
863 * inside the ioctl(), any failures which
864 * are because of file_permission() are
865 * -EACCESS, so it seems consistent to keep
866 * that here.
867 */
868 if (mnt_want_write(filp->f_path.mnt))
869 return -EACCES;
870 }
871 ret = __ncp_ioctl(inode, filp, cmd, arg);
872 if (ncp_ioctl_need_write(cmd))
873 mnt_drop_write(filp->f_path.mnt);
874 return ret;
875}
876
825#ifdef CONFIG_COMPAT 877#ifdef CONFIG_COMPAT
826long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 878long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
827{ 879{
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 6cea7479c5b4..d9e30ac2798d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -967,7 +967,8 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd)
967 if (nd->flags & LOOKUP_DIRECTORY) 967 if (nd->flags & LOOKUP_DIRECTORY)
968 return 0; 968 return 0;
969 /* Are we trying to write to a read only partition? */ 969 /* Are we trying to write to a read only partition? */
970 if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) 970 if (__mnt_is_readonly(nd->path.mnt) &&
971 (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
971 return 0; 972 return 0;
972 return 1; 973 return 1;
973} 974}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c593db047d8b..c309c881bd4e 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -658,14 +658,19 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
658 return status; 658 return status;
659 } 659 }
660 } 660 }
661 status = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt);
662 if (status)
663 return status;
661 status = nfs_ok; 664 status = nfs_ok;
662 if (setattr->sa_acl != NULL) 665 if (setattr->sa_acl != NULL)
663 status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, 666 status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh,
664 setattr->sa_acl); 667 setattr->sa_acl);
665 if (status) 668 if (status)
666 return status; 669 goto out;
667 status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, 670 status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
668 0, (time_t)0); 671 0, (time_t)0);
672out:
673 mnt_drop_write(cstate->current_fh.fh_export->ex_path.mnt);
669 return status; 674 return status;
670} 675}
671 676
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 1ff90625860f..145b3c877a27 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -46,6 +46,7 @@
46#include <linux/scatterlist.h> 46#include <linux/scatterlist.h>
47#include <linux/crypto.h> 47#include <linux/crypto.h>
48#include <linux/sched.h> 48#include <linux/sched.h>
49#include <linux/mount.h>
49 50
50#define NFSDDBG_FACILITY NFSDDBG_PROC 51#define NFSDDBG_FACILITY NFSDDBG_PROC
51 52
@@ -154,7 +155,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
154 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); 155 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
155 goto out_put; 156 goto out_put;
156 } 157 }
158 status = mnt_want_write(rec_dir.path.mnt);
159 if (status)
160 goto out_put;
157 status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU); 161 status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU);
162 mnt_drop_write(rec_dir.path.mnt);
158out_put: 163out_put:
159 dput(dentry); 164 dput(dentry);
160out_unlock: 165out_unlock:
@@ -313,12 +318,17 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
313 if (!rec_dir_init || !clp->cl_firststate) 318 if (!rec_dir_init || !clp->cl_firststate)
314 return; 319 return;
315 320
321 status = mnt_want_write(rec_dir.path.mnt);
322 if (status)
323 goto out;
316 clp->cl_firststate = 0; 324 clp->cl_firststate = 0;
317 nfs4_save_user(&uid, &gid); 325 nfs4_save_user(&uid, &gid);
318 status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); 326 status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
319 nfs4_reset_user(uid, gid); 327 nfs4_reset_user(uid, gid);
320 if (status == 0) 328 if (status == 0)
321 nfsd4_sync_rec_dir(); 329 nfsd4_sync_rec_dir();
330 mnt_drop_write(rec_dir.path.mnt);
331out:
322 if (status) 332 if (status)
323 printk("NFSD: Failed to remove expired client state directory" 333 printk("NFSD: Failed to remove expired client state directory"
324 " %.*s\n", HEXDIR_LEN, clp->cl_recdir); 334 " %.*s\n", HEXDIR_LEN, clp->cl_recdir);
@@ -347,13 +357,17 @@ nfsd4_recdir_purge_old(void) {
347 357
348 if (!rec_dir_init) 358 if (!rec_dir_init)
349 return; 359 return;
360 status = mnt_want_write(rec_dir.path.mnt);
361 if (status)
362 goto out;
350 status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old); 363 status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old);
351 if (status == 0) 364 if (status == 0)
352 nfsd4_sync_rec_dir(); 365 nfsd4_sync_rec_dir();
366 mnt_drop_write(rec_dir.path.mnt);
367out:
353 if (status) 368 if (status)
354 printk("nfsd4: failed to purge old clients from recovery" 369 printk("nfsd4: failed to purge old clients from recovery"
355 " directory %s\n", rec_dir.path.dentry->d_name.name); 370 " directory %s\n", rec_dir.path.dentry->d_name.name);
356 return;
357} 371}
358 372
359static int 373static int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bcb97d8e8b8b..81a75f3081f4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -41,6 +41,7 @@
41#include <linux/sunrpc/svc.h> 41#include <linux/sunrpc/svc.h>
42#include <linux/nfsd/nfsd.h> 42#include <linux/nfsd/nfsd.h>
43#include <linux/nfsd/cache.h> 43#include <linux/nfsd/cache.h>
44#include <linux/file.h>
44#include <linux/mount.h> 45#include <linux/mount.h>
45#include <linux/workqueue.h> 46#include <linux/workqueue.h>
46#include <linux/smp_lock.h> 47#include <linux/smp_lock.h>
@@ -1239,7 +1240,7 @@ static inline void
1239nfs4_file_downgrade(struct file *filp, unsigned int share_access) 1240nfs4_file_downgrade(struct file *filp, unsigned int share_access)
1240{ 1241{
1241 if (share_access & NFS4_SHARE_ACCESS_WRITE) { 1242 if (share_access & NFS4_SHARE_ACCESS_WRITE) {
1242 put_write_access(filp->f_path.dentry->d_inode); 1243 drop_file_write_access(filp);
1243 filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE; 1244 filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE;
1244 } 1245 }
1245} 1246}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 46f59d5365a0..304bf5f643c9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1255,23 +1255,35 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1255 err = 0; 1255 err = 0;
1256 switch (type) { 1256 switch (type) {
1257 case S_IFREG: 1257 case S_IFREG:
1258 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1259 if (host_err)
1260 goto out_nfserr;
1258 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1261 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1259 break; 1262 break;
1260 case S_IFDIR: 1263 case S_IFDIR:
1264 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1265 if (host_err)
1266 goto out_nfserr;
1261 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); 1267 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
1262 break; 1268 break;
1263 case S_IFCHR: 1269 case S_IFCHR:
1264 case S_IFBLK: 1270 case S_IFBLK:
1265 case S_IFIFO: 1271 case S_IFIFO:
1266 case S_IFSOCK: 1272 case S_IFSOCK:
1273 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1274 if (host_err)
1275 goto out_nfserr;
1267 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); 1276 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
1268 break; 1277 break;
1269 default: 1278 default:
1270 printk("nfsd: bad file type %o in nfsd_create\n", type); 1279 printk("nfsd: bad file type %o in nfsd_create\n", type);
1271 host_err = -EINVAL; 1280 host_err = -EINVAL;
1281 goto out_nfserr;
1272 } 1282 }
1273 if (host_err < 0) 1283 if (host_err < 0) {
1284 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1274 goto out_nfserr; 1285 goto out_nfserr;
1286 }
1275 1287
1276 if (EX_ISSYNC(fhp->fh_export)) { 1288 if (EX_ISSYNC(fhp->fh_export)) {
1277 err = nfserrno(nfsd_sync_dir(dentry)); 1289 err = nfserrno(nfsd_sync_dir(dentry));
@@ -1282,6 +1294,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1282 err2 = nfsd_create_setattr(rqstp, resfhp, iap); 1294 err2 = nfsd_create_setattr(rqstp, resfhp, iap);
1283 if (err2) 1295 if (err2)
1284 err = err2; 1296 err = err2;
1297 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1285 /* 1298 /*
1286 * Update the file handle to get the new inode info. 1299 * Update the file handle to get the new inode info.
1287 */ 1300 */
@@ -1359,6 +1372,9 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1359 v_atime = verifier[1]&0x7fffffff; 1372 v_atime = verifier[1]&0x7fffffff;
1360 } 1373 }
1361 1374
1375 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1376 if (host_err)
1377 goto out_nfserr;
1362 if (dchild->d_inode) { 1378 if (dchild->d_inode) {
1363 err = 0; 1379 err = 0;
1364 1380
@@ -1390,12 +1406,15 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1390 case NFS3_CREATE_GUARDED: 1406 case NFS3_CREATE_GUARDED:
1391 err = nfserr_exist; 1407 err = nfserr_exist;
1392 } 1408 }
1409 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1393 goto out; 1410 goto out;
1394 } 1411 }
1395 1412
1396 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1413 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1397 if (host_err < 0) 1414 if (host_err < 0) {
1415 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1398 goto out_nfserr; 1416 goto out_nfserr;
1417 }
1399 if (created) 1418 if (created)
1400 *created = 1; 1419 *created = 1;
1401 1420
@@ -1420,6 +1439,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1420 if (err2) 1439 if (err2)
1421 err = err2; 1440 err = err2;
1422 1441
1442 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1423 /* 1443 /*
1424 * Update the filehandle to get the new inode info. 1444 * Update the filehandle to get the new inode info.
1425 */ 1445 */
@@ -1522,6 +1542,10 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1522 if (iap && (iap->ia_valid & ATTR_MODE)) 1542 if (iap && (iap->ia_valid & ATTR_MODE))
1523 mode = iap->ia_mode & S_IALLUGO; 1543 mode = iap->ia_mode & S_IALLUGO;
1524 1544
1545 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1546 if (host_err)
1547 goto out_nfserr;
1548
1525 if (unlikely(path[plen] != 0)) { 1549 if (unlikely(path[plen] != 0)) {
1526 char *path_alloced = kmalloc(plen+1, GFP_KERNEL); 1550 char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
1527 if (path_alloced == NULL) 1551 if (path_alloced == NULL)
@@ -1542,6 +1566,8 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1542 err = nfserrno(host_err); 1566 err = nfserrno(host_err);
1543 fh_unlock(fhp); 1567 fh_unlock(fhp);
1544 1568
1569 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1570
1545 cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); 1571 cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
1546 dput(dnew); 1572 dput(dnew);
1547 if (err==0) err = cerr; 1573 if (err==0) err = cerr;
@@ -1592,6 +1618,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1592 dold = tfhp->fh_dentry; 1618 dold = tfhp->fh_dentry;
1593 dest = dold->d_inode; 1619 dest = dold->d_inode;
1594 1620
1621 host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt);
1622 if (host_err) {
1623 err = nfserrno(host_err);
1624 goto out_dput;
1625 }
1595 host_err = vfs_link(dold, dirp, dnew); 1626 host_err = vfs_link(dold, dirp, dnew);
1596 if (!host_err) { 1627 if (!host_err) {
1597 if (EX_ISSYNC(ffhp->fh_export)) { 1628 if (EX_ISSYNC(ffhp->fh_export)) {
@@ -1605,7 +1636,8 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1605 else 1636 else
1606 err = nfserrno(host_err); 1637 err = nfserrno(host_err);
1607 } 1638 }
1608 1639 mnt_drop_write(tfhp->fh_export->ex_path.mnt);
1640out_dput:
1609 dput(dnew); 1641 dput(dnew);
1610out_unlock: 1642out_unlock:
1611 fh_unlock(ffhp); 1643 fh_unlock(ffhp);
@@ -1678,13 +1710,20 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1678 if (ndentry == trap) 1710 if (ndentry == trap)
1679 goto out_dput_new; 1711 goto out_dput_new;
1680 1712
1681#ifdef MSNFS 1713 if (svc_msnfs(ffhp) &&
1682 if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
1683 ((atomic_read(&odentry->d_count) > 1) 1714 ((atomic_read(&odentry->d_count) > 1)
1684 || (atomic_read(&ndentry->d_count) > 1))) { 1715 || (atomic_read(&ndentry->d_count) > 1))) {
1685 host_err = -EPERM; 1716 host_err = -EPERM;
1686 } else 1717 goto out_dput_new;
1687#endif 1718 }
1719
1720 host_err = -EXDEV;
1721 if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
1722 goto out_dput_new;
1723 host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt);
1724 if (host_err)
1725 goto out_dput_new;
1726
1688 host_err = vfs_rename(fdir, odentry, tdir, ndentry); 1727 host_err = vfs_rename(fdir, odentry, tdir, ndentry);
1689 if (!host_err && EX_ISSYNC(tfhp->fh_export)) { 1728 if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
1690 host_err = nfsd_sync_dir(tdentry); 1729 host_err = nfsd_sync_dir(tdentry);
@@ -1692,6 +1731,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1692 host_err = nfsd_sync_dir(fdentry); 1731 host_err = nfsd_sync_dir(fdentry);
1693 } 1732 }
1694 1733
1734 mnt_drop_write(ffhp->fh_export->ex_path.mnt);
1735
1695 out_dput_new: 1736 out_dput_new:
1696 dput(ndentry); 1737 dput(ndentry);
1697 out_dput_old: 1738 out_dput_old:
@@ -1750,6 +1791,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1750 if (!type) 1791 if (!type)
1751 type = rdentry->d_inode->i_mode & S_IFMT; 1792 type = rdentry->d_inode->i_mode & S_IFMT;
1752 1793
1794 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1795 if (host_err)
1796 goto out_nfserr;
1797
1753 if (type != S_IFDIR) { /* It's UNLINK */ 1798 if (type != S_IFDIR) { /* It's UNLINK */
1754#ifdef MSNFS 1799#ifdef MSNFS
1755 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && 1800 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
@@ -1765,10 +1810,12 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1765 dput(rdentry); 1810 dput(rdentry);
1766 1811
1767 if (host_err) 1812 if (host_err)
1768 goto out_nfserr; 1813 goto out_drop;
1769 if (EX_ISSYNC(fhp->fh_export)) 1814 if (EX_ISSYNC(fhp->fh_export))
1770 host_err = nfsd_sync_dir(dentry); 1815 host_err = nfsd_sync_dir(dentry);
1771 1816
1817out_drop:
1818 mnt_drop_write(fhp->fh_export->ex_path.mnt);
1772out_nfserr: 1819out_nfserr:
1773 err = nfserrno(host_err); 1820 err = nfserrno(host_err);
1774out: 1821out:
@@ -1865,7 +1912,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1865 inode->i_mode, 1912 inode->i_mode,
1866 IS_IMMUTABLE(inode)? " immut" : "", 1913 IS_IMMUTABLE(inode)? " immut" : "",
1867 IS_APPEND(inode)? " append" : "", 1914 IS_APPEND(inode)? " append" : "",
1868 IS_RDONLY(inode)? " ro" : ""); 1915 __mnt_is_readonly(exp->ex_path.mnt)? " ro" : "");
1869 dprintk(" owner %d/%d user %d/%d\n", 1916 dprintk(" owner %d/%d user %d/%d\n",
1870 inode->i_uid, inode->i_gid, current->fsuid, current->fsgid); 1917 inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
1871#endif 1918#endif
@@ -1876,7 +1923,8 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1876 */ 1923 */
1877 if (!(acc & MAY_LOCAL_ACCESS)) 1924 if (!(acc & MAY_LOCAL_ACCESS))
1878 if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { 1925 if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
1879 if (exp_rdonly(rqstp, exp) || IS_RDONLY(inode)) 1926 if (exp_rdonly(rqstp, exp) ||
1927 __mnt_is_readonly(exp->ex_path.mnt))
1880 return nfserr_rofs; 1928 return nfserr_rofs;
1881 if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) 1929 if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
1882 return nfserr_perm; 1930 return nfserr_perm;
@@ -2039,6 +2087,9 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
2039 } else 2087 } else
2040 size = 0; 2088 size = 0;
2041 2089
2090 error = mnt_want_write(fhp->fh_export->ex_path.mnt);
2091 if (error)
2092 goto getout;
2042 if (size) 2093 if (size)
2043 error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0); 2094 error = vfs_setxattr(fhp->fh_dentry, name, value, size, 0);
2044 else { 2095 else {
@@ -2050,6 +2101,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
2050 error = 0; 2101 error = 0;
2051 } 2102 }
2052 } 2103 }
2104 mnt_drop_write(fhp->fh_export->ex_path.mnt);
2053 2105
2054getout: 2106getout:
2055 kfree(value); 2107 kfree(value);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index b413166dd163..7b142f0ce995 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -60,10 +60,6 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
60 goto bail; 60 goto bail;
61 } 61 }
62 62
63 status = -EROFS;
64 if (IS_RDONLY(inode))
65 goto bail_unlock;
66
67 status = -EACCES; 63 status = -EACCES;
68 if (!is_owner_or_cap(inode)) 64 if (!is_owner_or_cap(inode))
69 goto bail_unlock; 65 goto bail_unlock;
@@ -134,8 +130,13 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
134 if (get_user(flags, (int __user *) arg)) 130 if (get_user(flags, (int __user *) arg))
135 return -EFAULT; 131 return -EFAULT;
136 132
137 return ocfs2_set_inode_attr(inode, flags, 133 status = mnt_want_write(filp->f_path.mnt);
134 if (status)
135 return status;
136 status = ocfs2_set_inode_attr(inode, flags,
138 OCFS2_FL_MODIFIABLE); 137 OCFS2_FL_MODIFIABLE);
138 mnt_drop_write(filp->f_path.mnt);
139 return status;
139 case OCFS2_IOC_RESVSP: 140 case OCFS2_IOC_RESVSP:
140 case OCFS2_IOC_RESVSP64: 141 case OCFS2_IOC_RESVSP64:
141 case OCFS2_IOC_UNRESVSP: 142 case OCFS2_IOC_UNRESVSP:
diff --git a/fs/open.c b/fs/open.c
index 3fa4e4ffce4c..b70e7666bb2c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -244,21 +244,21 @@ static long do_sys_truncate(const char __user * path, loff_t length)
244 if (!S_ISREG(inode->i_mode)) 244 if (!S_ISREG(inode->i_mode))
245 goto dput_and_out; 245 goto dput_and_out;
246 246
247 error = vfs_permission(&nd, MAY_WRITE); 247 error = mnt_want_write(nd.path.mnt);
248 if (error) 248 if (error)
249 goto dput_and_out; 249 goto dput_and_out;
250 250
251 error = -EROFS; 251 error = vfs_permission(&nd, MAY_WRITE);
252 if (IS_RDONLY(inode)) 252 if (error)
253 goto dput_and_out; 253 goto mnt_drop_write_and_out;
254 254
255 error = -EPERM; 255 error = -EPERM;
256 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 256 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
257 goto dput_and_out; 257 goto mnt_drop_write_and_out;
258 258
259 error = get_write_access(inode); 259 error = get_write_access(inode);
260 if (error) 260 if (error)
261 goto dput_and_out; 261 goto mnt_drop_write_and_out;
262 262
263 /* 263 /*
264 * Make sure that there are no leases. get_write_access() protects 264 * Make sure that there are no leases. get_write_access() protects
@@ -276,6 +276,8 @@ static long do_sys_truncate(const char __user * path, loff_t length)
276 276
277put_write_and_out: 277put_write_and_out:
278 put_write_access(inode); 278 put_write_access(inode);
279mnt_drop_write_and_out:
280 mnt_drop_write(nd.path.mnt);
279dput_and_out: 281dput_and_out:
280 path_put(&nd.path); 282 path_put(&nd.path);
281out: 283out:
@@ -457,8 +459,17 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
457 if(res || !(mode & S_IWOTH) || 459 if(res || !(mode & S_IWOTH) ||
458 special_file(nd.path.dentry->d_inode->i_mode)) 460 special_file(nd.path.dentry->d_inode->i_mode))
459 goto out_path_release; 461 goto out_path_release;
460 462 /*
461 if(IS_RDONLY(nd.path.dentry->d_inode)) 463 * This is a rare case where using __mnt_is_readonly()
464 * is OK without a mnt_want/drop_write() pair. Since
465 * no actual write to the fs is performed here, we do
466 * not need to telegraph to that to anyone.
467 *
468 * By doing this, we accept that this access is
469 * inherently racy and know that the fs may change
470 * state before we even see this result.
471 */
472 if (__mnt_is_readonly(nd.path.mnt))
462 res = -EROFS; 473 res = -EROFS;
463 474
464out_path_release: 475out_path_release:
@@ -567,12 +578,12 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
567 578
568 audit_inode(NULL, dentry); 579 audit_inode(NULL, dentry);
569 580
570 err = -EROFS; 581 err = mnt_want_write(file->f_path.mnt);
571 if (IS_RDONLY(inode)) 582 if (err)
572 goto out_putf; 583 goto out_putf;
573 err = -EPERM; 584 err = -EPERM;
574 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 585 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
575 goto out_putf; 586 goto out_drop_write;
576 mutex_lock(&inode->i_mutex); 587 mutex_lock(&inode->i_mutex);
577 if (mode == (mode_t) -1) 588 if (mode == (mode_t) -1)
578 mode = inode->i_mode; 589 mode = inode->i_mode;
@@ -581,6 +592,8 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
581 err = notify_change(dentry, &newattrs); 592 err = notify_change(dentry, &newattrs);
582 mutex_unlock(&inode->i_mutex); 593 mutex_unlock(&inode->i_mutex);
583 594
595out_drop_write:
596 mnt_drop_write(file->f_path.mnt);
584out_putf: 597out_putf:
585 fput(file); 598 fput(file);
586out: 599out:
@@ -600,13 +613,13 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
600 goto out; 613 goto out;
601 inode = nd.path.dentry->d_inode; 614 inode = nd.path.dentry->d_inode;
602 615
603 error = -EROFS; 616 error = mnt_want_write(nd.path.mnt);
604 if (IS_RDONLY(inode)) 617 if (error)
605 goto dput_and_out; 618 goto dput_and_out;
606 619
607 error = -EPERM; 620 error = -EPERM;
608 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 621 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
609 goto dput_and_out; 622 goto out_drop_write;
610 623
611 mutex_lock(&inode->i_mutex); 624 mutex_lock(&inode->i_mutex);
612 if (mode == (mode_t) -1) 625 if (mode == (mode_t) -1)
@@ -616,6 +629,8 @@ asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
616 error = notify_change(nd.path.dentry, &newattrs); 629 error = notify_change(nd.path.dentry, &newattrs);
617 mutex_unlock(&inode->i_mutex); 630 mutex_unlock(&inode->i_mutex);
618 631
632out_drop_write:
633 mnt_drop_write(nd.path.mnt);
619dput_and_out: 634dput_and_out:
620 path_put(&nd.path); 635 path_put(&nd.path);
621out: 636out:
@@ -638,9 +653,6 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
638 printk(KERN_ERR "chown_common: NULL inode\n"); 653 printk(KERN_ERR "chown_common: NULL inode\n");
639 goto out; 654 goto out;
640 } 655 }
641 error = -EROFS;
642 if (IS_RDONLY(inode))
643 goto out;
644 error = -EPERM; 656 error = -EPERM;
645 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 657 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
646 goto out; 658 goto out;
@@ -671,7 +683,12 @@ asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
671 error = user_path_walk(filename, &nd); 683 error = user_path_walk(filename, &nd);
672 if (error) 684 if (error)
673 goto out; 685 goto out;
686 error = mnt_want_write(nd.path.mnt);
687 if (error)
688 goto out_release;
674 error = chown_common(nd.path.dentry, user, group); 689 error = chown_common(nd.path.dentry, user, group);
690 mnt_drop_write(nd.path.mnt);
691out_release:
675 path_put(&nd.path); 692 path_put(&nd.path);
676out: 693out:
677 return error; 694 return error;
@@ -691,7 +708,12 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
691 error = __user_walk_fd(dfd, filename, follow, &nd); 708 error = __user_walk_fd(dfd, filename, follow, &nd);
692 if (error) 709 if (error)
693 goto out; 710 goto out;
711 error = mnt_want_write(nd.path.mnt);
712 if (error)
713 goto out_release;
694 error = chown_common(nd.path.dentry, user, group); 714 error = chown_common(nd.path.dentry, user, group);
715 mnt_drop_write(nd.path.mnt);
716out_release:
695 path_put(&nd.path); 717 path_put(&nd.path);
696out: 718out:
697 return error; 719 return error;
@@ -705,7 +727,12 @@ asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group
705 error = user_path_walk_link(filename, &nd); 727 error = user_path_walk_link(filename, &nd);
706 if (error) 728 if (error)
707 goto out; 729 goto out;
730 error = mnt_want_write(nd.path.mnt);
731 if (error)
732 goto out_release;
708 error = chown_common(nd.path.dentry, user, group); 733 error = chown_common(nd.path.dentry, user, group);
734 mnt_drop_write(nd.path.mnt);
735out_release:
709 path_put(&nd.path); 736 path_put(&nd.path);
710out: 737out:
711 return error; 738 return error;
@@ -722,14 +749,48 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
722 if (!file) 749 if (!file)
723 goto out; 750 goto out;
724 751
752 error = mnt_want_write(file->f_path.mnt);
753 if (error)
754 goto out_fput;
725 dentry = file->f_path.dentry; 755 dentry = file->f_path.dentry;
726 audit_inode(NULL, dentry); 756 audit_inode(NULL, dentry);
727 error = chown_common(dentry, user, group); 757 error = chown_common(dentry, user, group);
758 mnt_drop_write(file->f_path.mnt);
759out_fput:
728 fput(file); 760 fput(file);
729out: 761out:
730 return error; 762 return error;
731} 763}
732 764
765/*
766 * You have to be very careful that these write
767 * counts get cleaned up in error cases and
768 * upon __fput(). This should probably never
769 * be called outside of __dentry_open().
770 */
771static inline int __get_file_write_access(struct inode *inode,
772 struct vfsmount *mnt)
773{
774 int error;
775 error = get_write_access(inode);
776 if (error)
777 return error;
778 /*
779 * Do not take mount writer counts on
780 * special files since no writes to
781 * the mount itself will occur.
782 */
783 if (!special_file(inode->i_mode)) {
784 /*
785 * Balanced in __fput()
786 */
787 error = mnt_want_write(mnt);
788 if (error)
789 put_write_access(inode);
790 }
791 return error;
792}
793
733static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, 794static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
734 int flags, struct file *f, 795 int flags, struct file *f,
735 int (*open)(struct inode *, struct file *)) 796 int (*open)(struct inode *, struct file *))
@@ -742,9 +803,11 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
742 FMODE_PREAD | FMODE_PWRITE; 803 FMODE_PREAD | FMODE_PWRITE;
743 inode = dentry->d_inode; 804 inode = dentry->d_inode;
744 if (f->f_mode & FMODE_WRITE) { 805 if (f->f_mode & FMODE_WRITE) {
745 error = get_write_access(inode); 806 error = __get_file_write_access(inode, mnt);
746 if (error) 807 if (error)
747 goto cleanup_file; 808 goto cleanup_file;
809 if (!special_file(inode->i_mode))
810 file_take_write(f);
748 } 811 }
749 812
750 f->f_mapping = inode->i_mapping; 813 f->f_mapping = inode->i_mapping;
@@ -784,8 +847,19 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
784 847
785cleanup_all: 848cleanup_all:
786 fops_put(f->f_op); 849 fops_put(f->f_op);
787 if (f->f_mode & FMODE_WRITE) 850 if (f->f_mode & FMODE_WRITE) {
788 put_write_access(inode); 851 put_write_access(inode);
852 if (!special_file(inode->i_mode)) {
853 /*
854 * We don't consider this a real
855 * mnt_want/drop_write() pair
856 * because it all happenend right
857 * here, so just reset the state.
858 */
859 file_reset_write(f);
860 mnt_drop_write(mnt);
861 }
862 }
789 file_kill(f); 863 file_kill(f);
790 f->f_path.dentry = NULL; 864 f->f_path.dentry = NULL;
791 f->f_path.mnt = NULL; 865 f->f_path.mnt = NULL;
@@ -796,43 +870,6 @@ cleanup_file:
796 return ERR_PTR(error); 870 return ERR_PTR(error);
797} 871}
798 872
799/*
800 * Note that while the flag value (low two bits) for sys_open means:
801 * 00 - read-only
802 * 01 - write-only
803 * 10 - read-write
804 * 11 - special
805 * it is changed into
806 * 00 - no permissions needed
807 * 01 - read-permission
808 * 10 - write-permission
809 * 11 - read-write
810 * for the internal routines (ie open_namei()/follow_link() etc). 00 is
811 * used by symlinks.
812 */
813static struct file *do_filp_open(int dfd, const char *filename, int flags,
814 int mode)
815{
816 int namei_flags, error;
817 struct nameidata nd;
818
819 namei_flags = flags;
820 if ((namei_flags+1) & O_ACCMODE)
821 namei_flags++;
822
823 error = open_namei(dfd, filename, namei_flags, mode, &nd);
824 if (!error)
825 return nameidata_to_filp(&nd, flags);
826
827 return ERR_PTR(error);
828}
829
830struct file *filp_open(const char *filename, int flags, int mode)
831{
832 return do_filp_open(AT_FDCWD, filename, flags, mode);
833}
834EXPORT_SYMBOL(filp_open);
835
836/** 873/**
837 * lookup_instantiate_filp - instantiates the open intent filp 874 * lookup_instantiate_filp - instantiates the open intent filp
838 * @nd: pointer to nameidata 875 * @nd: pointer to nameidata
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index e0f0f098a523..74363a7aacbc 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -4,6 +4,7 @@
4 4
5#include <linux/capability.h> 5#include <linux/capability.h>
6#include <linux/fs.h> 6#include <linux/fs.h>
7#include <linux/mount.h>
7#include <linux/reiserfs_fs.h> 8#include <linux/reiserfs_fs.h>
8#include <linux/time.h> 9#include <linux/time.h>
9#include <asm/uaccess.h> 10#include <asm/uaccess.h>
@@ -25,6 +26,7 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
25 unsigned long arg) 26 unsigned long arg)
26{ 27{
27 unsigned int flags; 28 unsigned int flags;
29 int err = 0;
28 30
29 switch (cmd) { 31 switch (cmd) {
30 case REISERFS_IOC_UNPACK: 32 case REISERFS_IOC_UNPACK:
@@ -48,50 +50,67 @@ int reiserfs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
48 if (!reiserfs_attrs(inode->i_sb)) 50 if (!reiserfs_attrs(inode->i_sb))
49 return -ENOTTY; 51 return -ENOTTY;
50 52
51 if (IS_RDONLY(inode)) 53 err = mnt_want_write(filp->f_path.mnt);
52 return -EROFS; 54 if (err)
55 return err;
53 56
54 if (!is_owner_or_cap(inode)) 57 if (!is_owner_or_cap(inode)) {
55 return -EPERM; 58 err = -EPERM;
56 59 goto setflags_out;
57 if (get_user(flags, (int __user *)arg)) 60 }
58 return -EFAULT; 61 if (get_user(flags, (int __user *)arg)) {
59 62 err = -EFAULT;
60 /* Is it quota file? Do not allow user to mess with it. */ 63 goto setflags_out;
61 if (IS_NOQUOTA(inode)) 64 }
62 return -EPERM; 65 /*
66 * Is it quota file? Do not allow user to mess with it
67 */
68 if (IS_NOQUOTA(inode)) {
69 err = -EPERM;
70 goto setflags_out;
71 }
63 if (((flags ^ REISERFS_I(inode)-> 72 if (((flags ^ REISERFS_I(inode)->
64 i_attrs) & (REISERFS_IMMUTABLE_FL | 73 i_attrs) & (REISERFS_IMMUTABLE_FL |
65 REISERFS_APPEND_FL)) 74 REISERFS_APPEND_FL))
66 && !capable(CAP_LINUX_IMMUTABLE)) 75 && !capable(CAP_LINUX_IMMUTABLE)) {
67 return -EPERM; 76 err = -EPERM;
68 77 goto setflags_out;
78 }
69 if ((flags & REISERFS_NOTAIL_FL) && 79 if ((flags & REISERFS_NOTAIL_FL) &&
70 S_ISREG(inode->i_mode)) { 80 S_ISREG(inode->i_mode)) {
71 int result; 81 int result;
72 82
73 result = reiserfs_unpack(inode, filp); 83 result = reiserfs_unpack(inode, filp);
74 if (result) 84 if (result) {
75 return result; 85 err = result;
86 goto setflags_out;
87 }
76 } 88 }
77 sd_attrs_to_i_attrs(flags, inode); 89 sd_attrs_to_i_attrs(flags, inode);
78 REISERFS_I(inode)->i_attrs = flags; 90 REISERFS_I(inode)->i_attrs = flags;
79 inode->i_ctime = CURRENT_TIME_SEC; 91 inode->i_ctime = CURRENT_TIME_SEC;
80 mark_inode_dirty(inode); 92 mark_inode_dirty(inode);
81 return 0; 93setflags_out:
94 mnt_drop_write(filp->f_path.mnt);
95 return err;
82 } 96 }
83 case REISERFS_IOC_GETVERSION: 97 case REISERFS_IOC_GETVERSION:
84 return put_user(inode->i_generation, (int __user *)arg); 98 return put_user(inode->i_generation, (int __user *)arg);
85 case REISERFS_IOC_SETVERSION: 99 case REISERFS_IOC_SETVERSION:
86 if (!is_owner_or_cap(inode)) 100 if (!is_owner_or_cap(inode))
87 return -EPERM; 101 return -EPERM;
88 if (IS_RDONLY(inode)) 102 err = mnt_want_write(filp->f_path.mnt);
89 return -EROFS; 103 if (err)
90 if (get_user(inode->i_generation, (int __user *)arg)) 104 return err;
91 return -EFAULT; 105 if (get_user(inode->i_generation, (int __user *)arg)) {
106 err = -EFAULT;
107 goto setversion_out;
108 }
92 inode->i_ctime = CURRENT_TIME_SEC; 109 inode->i_ctime = CURRENT_TIME_SEC;
93 mark_inode_dirty(inode); 110 mark_inode_dirty(inode);
94 return 0; 111setversion_out:
112 mnt_drop_write(filp->f_path.mnt);
113 return err;
95 default: 114 default:
96 return -ENOTTY; 115 return -ENOTTY;
97 } 116 }
diff --git a/fs/super.c b/fs/super.c
index 09008dbd264e..1f8f05ede437 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,7 @@
37#include <linux/idr.h> 37#include <linux/idr.h>
38#include <linux/kobject.h> 38#include <linux/kobject.h>
39#include <linux/mutex.h> 39#include <linux/mutex.h>
40#include <linux/file.h>
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41 42
42 43
@@ -567,10 +568,29 @@ static void mark_files_ro(struct super_block *sb)
567{ 568{
568 struct file *f; 569 struct file *f;
569 570
571retry:
570 file_list_lock(); 572 file_list_lock();
571 list_for_each_entry(f, &sb->s_files, f_u.fu_list) { 573 list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
572 if (S_ISREG(f->f_path.dentry->d_inode->i_mode) && file_count(f)) 574 struct vfsmount *mnt;
573 f->f_mode &= ~FMODE_WRITE; 575 if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
576 continue;
577 if (!file_count(f))
578 continue;
579 if (!(f->f_mode & FMODE_WRITE))
580 continue;
581 f->f_mode &= ~FMODE_WRITE;
582 if (file_check_writeable(f) != 0)
583 continue;
584 file_release_write(f);
585 mnt = mntget(f->f_path.mnt);
586 file_list_unlock();
587 /*
588 * This can sleep, so we can't hold
589 * the file_list_lock() spinlock.
590 */
591 mnt_drop_write(mnt);
592 mntput(mnt);
593 goto retry;
574 } 594 }
575 file_list_unlock(); 595 file_list_unlock();
576} 596}
diff --git a/fs/utimes.c b/fs/utimes.c
index b18da9c0b97f..a2bef77dc9c9 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -2,6 +2,7 @@
2#include <linux/file.h> 2#include <linux/file.h>
3#include <linux/fs.h> 3#include <linux/fs.h>
4#include <linux/linkage.h> 4#include <linux/linkage.h>
5#include <linux/mount.h>
5#include <linux/namei.h> 6#include <linux/namei.h>
6#include <linux/sched.h> 7#include <linux/sched.h>
7#include <linux/stat.h> 8#include <linux/stat.h>
@@ -59,6 +60,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
59 struct inode *inode; 60 struct inode *inode;
60 struct iattr newattrs; 61 struct iattr newattrs;
61 struct file *f = NULL; 62 struct file *f = NULL;
63 struct vfsmount *mnt;
62 64
63 error = -EINVAL; 65 error = -EINVAL;
64 if (times && (!nsec_valid(times[0].tv_nsec) || 66 if (times && (!nsec_valid(times[0].tv_nsec) ||
@@ -79,18 +81,20 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
79 if (!f) 81 if (!f)
80 goto out; 82 goto out;
81 dentry = f->f_path.dentry; 83 dentry = f->f_path.dentry;
84 mnt = f->f_path.mnt;
82 } else { 85 } else {
83 error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd); 86 error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
84 if (error) 87 if (error)
85 goto out; 88 goto out;
86 89
87 dentry = nd.path.dentry; 90 dentry = nd.path.dentry;
91 mnt = nd.path.mnt;
88 } 92 }
89 93
90 inode = dentry->d_inode; 94 inode = dentry->d_inode;
91 95
92 error = -EROFS; 96 error = mnt_want_write(mnt);
93 if (IS_RDONLY(inode)) 97 if (error)
94 goto dput_and_out; 98 goto dput_and_out;
95 99
96 /* Don't worry, the checks are done in inode_change_ok() */ 100 /* Don't worry, the checks are done in inode_change_ok() */
@@ -98,7 +102,7 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
98 if (times) { 102 if (times) {
99 error = -EPERM; 103 error = -EPERM;
100 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 104 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
101 goto dput_and_out; 105 goto mnt_drop_write_and_out;
102 106
103 if (times[0].tv_nsec == UTIME_OMIT) 107 if (times[0].tv_nsec == UTIME_OMIT)
104 newattrs.ia_valid &= ~ATTR_ATIME; 108 newattrs.ia_valid &= ~ATTR_ATIME;
@@ -118,22 +122,24 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
118 } else { 122 } else {
119 error = -EACCES; 123 error = -EACCES;
120 if (IS_IMMUTABLE(inode)) 124 if (IS_IMMUTABLE(inode))
121 goto dput_and_out; 125 goto mnt_drop_write_and_out;
122 126
123 if (!is_owner_or_cap(inode)) { 127 if (!is_owner_or_cap(inode)) {
124 if (f) { 128 if (f) {
125 if (!(f->f_mode & FMODE_WRITE)) 129 if (!(f->f_mode & FMODE_WRITE))
126 goto dput_and_out; 130 goto mnt_drop_write_and_out;
127 } else { 131 } else {
128 error = vfs_permission(&nd, MAY_WRITE); 132 error = vfs_permission(&nd, MAY_WRITE);
129 if (error) 133 if (error)
130 goto dput_and_out; 134 goto mnt_drop_write_and_out;
131 } 135 }
132 } 136 }
133 } 137 }
134 mutex_lock(&inode->i_mutex); 138 mutex_lock(&inode->i_mutex);
135 error = notify_change(dentry, &newattrs); 139 error = notify_change(dentry, &newattrs);
136 mutex_unlock(&inode->i_mutex); 140 mutex_unlock(&inode->i_mutex);
141mnt_drop_write_and_out:
142 mnt_drop_write(mnt);
137dput_and_out: 143dput_and_out:
138 if (f) 144 if (f)
139 fput(f); 145 fput(f);
diff --git a/fs/xattr.c b/fs/xattr.c
index 3acab1615460..f7062da505d4 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -11,6 +11,7 @@
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/file.h> 12#include <linux/file.h>
13#include <linux/xattr.h> 13#include <linux/xattr.h>
14#include <linux/mount.h>
14#include <linux/namei.h> 15#include <linux/namei.h>
15#include <linux/security.h> 16#include <linux/security.h>
16#include <linux/syscalls.h> 17#include <linux/syscalls.h>
@@ -32,8 +33,6 @@ xattr_permission(struct inode *inode, const char *name, int mask)
32 * filesystem or on an immutable / append-only inode. 33 * filesystem or on an immutable / append-only inode.
33 */ 34 */
34 if (mask & MAY_WRITE) { 35 if (mask & MAY_WRITE) {
35 if (IS_RDONLY(inode))
36 return -EROFS;
37 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 36 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
38 return -EPERM; 37 return -EPERM;
39 } 38 }
@@ -262,7 +261,11 @@ sys_setxattr(char __user *path, char __user *name, void __user *value,
262 error = user_path_walk(path, &nd); 261 error = user_path_walk(path, &nd);
263 if (error) 262 if (error)
264 return error; 263 return error;
265 error = setxattr(nd.path.dentry, name, value, size, flags); 264 error = mnt_want_write(nd.path.mnt);
265 if (!error) {
266 error = setxattr(nd.path.dentry, name, value, size, flags);
267 mnt_drop_write(nd.path.mnt);
268 }
266 path_put(&nd.path); 269 path_put(&nd.path);
267 return error; 270 return error;
268} 271}
@@ -277,7 +280,11 @@ sys_lsetxattr(char __user *path, char __user *name, void __user *value,
277 error = user_path_walk_link(path, &nd); 280 error = user_path_walk_link(path, &nd);
278 if (error) 281 if (error)
279 return error; 282 return error;
280 error = setxattr(nd.path.dentry, name, value, size, flags); 283 error = mnt_want_write(nd.path.mnt);
284 if (!error) {
285 error = setxattr(nd.path.dentry, name, value, size, flags);
286 mnt_drop_write(nd.path.mnt);
287 }
281 path_put(&nd.path); 288 path_put(&nd.path);
282 return error; 289 return error;
283} 290}
@@ -295,7 +302,12 @@ sys_fsetxattr(int fd, char __user *name, void __user *value,
295 return error; 302 return error;
296 dentry = f->f_path.dentry; 303 dentry = f->f_path.dentry;
297 audit_inode(NULL, dentry); 304 audit_inode(NULL, dentry);
298 error = setxattr(dentry, name, value, size, flags); 305 error = mnt_want_write(f->f_path.mnt);
306 if (!error) {
307 error = setxattr(dentry, name, value, size, flags);
308 mnt_drop_write(f->f_path.mnt);
309 }
310out_fput:
299 fput(f); 311 fput(f);
300 return error; 312 return error;
301} 313}
@@ -482,7 +494,11 @@ sys_removexattr(char __user *path, char __user *name)
482 error = user_path_walk(path, &nd); 494 error = user_path_walk(path, &nd);
483 if (error) 495 if (error)
484 return error; 496 return error;
485 error = removexattr(nd.path.dentry, name); 497 error = mnt_want_write(nd.path.mnt);
498 if (!error) {
499 error = removexattr(nd.path.dentry, name);
500 mnt_drop_write(nd.path.mnt);
501 }
486 path_put(&nd.path); 502 path_put(&nd.path);
487 return error; 503 return error;
488} 504}
@@ -496,7 +512,11 @@ sys_lremovexattr(char __user *path, char __user *name)
496 error = user_path_walk_link(path, &nd); 512 error = user_path_walk_link(path, &nd);
497 if (error) 513 if (error)
498 return error; 514 return error;
499 error = removexattr(nd.path.dentry, name); 515 error = mnt_want_write(nd.path.mnt);
516 if (!error) {
517 error = removexattr(nd.path.dentry, name);
518 mnt_drop_write(nd.path.mnt);
519 }
500 path_put(&nd.path); 520 path_put(&nd.path);
501 return error; 521 return error;
502} 522}
@@ -513,7 +533,11 @@ sys_fremovexattr(int fd, char __user *name)
513 return error; 533 return error;
514 dentry = f->f_path.dentry; 534 dentry = f->f_path.dentry;
515 audit_inode(NULL, dentry); 535 audit_inode(NULL, dentry);
516 error = removexattr(dentry, name); 536 error = mnt_want_write(f->f_path.mnt);
537 if (!error) {
538 error = removexattr(dentry, name);
539 mnt_drop_write(f->f_path.mnt);
540 }
517 fput(f); 541 fput(f);
518 return error; 542 return error;
519} 543}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index bf7759793856..4ddb86b73c6b 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -535,8 +535,6 @@ xfs_attrmulti_attr_set(
535 char *kbuf; 535 char *kbuf;
536 int error = EFAULT; 536 int error = EFAULT;
537 537
538 if (IS_RDONLY(inode))
539 return -EROFS;
540 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 538 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
541 return EPERM; 539 return EPERM;
542 if (len > XATTR_SIZE_MAX) 540 if (len > XATTR_SIZE_MAX)
@@ -562,8 +560,6 @@ xfs_attrmulti_attr_remove(
562 char *name, 560 char *name,
563 __uint32_t flags) 561 __uint32_t flags)
564{ 562{
565 if (IS_RDONLY(inode))
566 return -EROFS;
567 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 563 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
568 return EPERM; 564 return EPERM;
569 return xfs_attr_remove(XFS_I(inode), name, flags); 565 return xfs_attr_remove(XFS_I(inode), name, flags);
@@ -573,6 +569,7 @@ STATIC int
573xfs_attrmulti_by_handle( 569xfs_attrmulti_by_handle(
574 xfs_mount_t *mp, 570 xfs_mount_t *mp,
575 void __user *arg, 571 void __user *arg,
572 struct file *parfilp,
576 struct inode *parinode) 573 struct inode *parinode)
577{ 574{
578 int error; 575 int error;
@@ -626,13 +623,21 @@ xfs_attrmulti_by_handle(
626 &ops[i].am_length, ops[i].am_flags); 623 &ops[i].am_length, ops[i].am_flags);
627 break; 624 break;
628 case ATTR_OP_SET: 625 case ATTR_OP_SET:
626 ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
627 if (ops[i].am_error)
628 break;
629 ops[i].am_error = xfs_attrmulti_attr_set(inode, 629 ops[i].am_error = xfs_attrmulti_attr_set(inode,
630 attr_name, ops[i].am_attrvalue, 630 attr_name, ops[i].am_attrvalue,
631 ops[i].am_length, ops[i].am_flags); 631 ops[i].am_length, ops[i].am_flags);
632 mnt_drop_write(parfilp->f_path.mnt);
632 break; 633 break;
633 case ATTR_OP_REMOVE: 634 case ATTR_OP_REMOVE:
635 ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
636 if (ops[i].am_error)
637 break;
634 ops[i].am_error = xfs_attrmulti_attr_remove(inode, 638 ops[i].am_error = xfs_attrmulti_attr_remove(inode,
635 attr_name, ops[i].am_flags); 639 attr_name, ops[i].am_flags);
640 mnt_drop_write(parfilp->f_path.mnt);
636 break; 641 break;
637 default: 642 default:
638 ops[i].am_error = EINVAL; 643 ops[i].am_error = EINVAL;
@@ -1133,7 +1138,7 @@ xfs_ioctl(
1133 return xfs_attrlist_by_handle(mp, arg, inode); 1138 return xfs_attrlist_by_handle(mp, arg, inode);
1134 1139
1135 case XFS_IOC_ATTRMULTI_BY_HANDLE: 1140 case XFS_IOC_ATTRMULTI_BY_HANDLE:
1136 return xfs_attrmulti_by_handle(mp, arg, inode); 1141 return xfs_attrmulti_by_handle(mp, arg, filp, inode);
1137 1142
1138 case XFS_IOC_SWAPEXT: { 1143 case XFS_IOC_SWAPEXT: {
1139 error = xfs_swapext((struct xfs_swapext __user *)arg); 1144 error = xfs_swapext((struct xfs_swapext __user *)arg);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 0c958cf77758..a1237dad6430 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -155,13 +155,6 @@ xfs_ichgtime_fast(
155 */ 155 */
156 ASSERT((flags & XFS_ICHGTIME_ACC) == 0); 156 ASSERT((flags & XFS_ICHGTIME_ACC) == 0);
157 157
158 /*
159 * We're not supposed to change timestamps in readonly-mounted
160 * filesystems. Throw it away if anyone asks us.
161 */
162 if (unlikely(IS_RDONLY(inode)))
163 return;
164
165 if (flags & XFS_ICHGTIME_MOD) { 158 if (flags & XFS_ICHGTIME_MOD) {
166 tvp = &inode->i_mtime; 159 tvp = &inode->i_mtime;
167 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; 160 ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec;
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 21c0dbc74093..1ebd8004469c 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -51,6 +51,7 @@
51#include "xfs_vnodeops.h" 51#include "xfs_vnodeops.h"
52 52
53#include <linux/capability.h> 53#include <linux/capability.h>
54#include <linux/mount.h>
54#include <linux/writeback.h> 55#include <linux/writeback.h>
55 56
56 57
@@ -670,10 +671,16 @@ start:
670 if (new_size > xip->i_size) 671 if (new_size > xip->i_size)
671 xip->i_new_size = new_size; 672 xip->i_new_size = new_size;
672 673
673 if (likely(!(ioflags & IO_INVIS))) { 674 /*
675 * We're not supposed to change timestamps in readonly-mounted
676 * filesystems. Throw it away if anyone asks us.
677 */
678 if (likely(!(ioflags & IO_INVIS) &&
679 !mnt_want_write(file->f_path.mnt))) {
674 file_update_time(file); 680 file_update_time(file);
675 xfs_ichgtime_fast(xip, inode, 681 xfs_ichgtime_fast(xip, inode,
676 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 682 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
683 mnt_drop_write(file->f_path.mnt);
677 } 684 }
678 685
679 /* 686 /*
diff --git a/include/linux/file.h b/include/linux/file.h
index 7239baac81a9..653477021e4c 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -61,6 +61,7 @@ extern struct kmem_cache *filp_cachep;
61 61
62extern void __fput(struct file *); 62extern void __fput(struct file *);
63extern void fput(struct file *); 63extern void fput(struct file *);
64extern void drop_file_write_access(struct file *file);
64 65
65struct file_operations; 66struct file_operations;
66struct vfsmount; 67struct vfsmount;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b84b848431f2..d1eeea669d2c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -776,6 +776,9 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
776 index < ra->start + ra->size); 776 index < ra->start + ra->size);
777} 777}
778 778
779#define FILE_MNT_WRITE_TAKEN 1
780#define FILE_MNT_WRITE_RELEASED 2
781
779struct file { 782struct file {
780 /* 783 /*
781 * fu_list becomes invalid after file_free is called and queued via 784 * fu_list becomes invalid after file_free is called and queued via
@@ -810,6 +813,9 @@ struct file {
810 spinlock_t f_ep_lock; 813 spinlock_t f_ep_lock;
811#endif /* #ifdef CONFIG_EPOLL */ 814#endif /* #ifdef CONFIG_EPOLL */
812 struct address_space *f_mapping; 815 struct address_space *f_mapping;
816#ifdef CONFIG_DEBUG_WRITECOUNT
817 unsigned long f_mnt_write_state;
818#endif
813}; 819};
814extern spinlock_t files_lock; 820extern spinlock_t files_lock;
815#define file_list_lock() spin_lock(&files_lock); 821#define file_list_lock() spin_lock(&files_lock);
@@ -818,6 +824,49 @@ extern spinlock_t files_lock;
818#define get_file(x) atomic_inc(&(x)->f_count) 824#define get_file(x) atomic_inc(&(x)->f_count)
819#define file_count(x) atomic_read(&(x)->f_count) 825#define file_count(x) atomic_read(&(x)->f_count)
820 826
827#ifdef CONFIG_DEBUG_WRITECOUNT
828static inline void file_take_write(struct file *f)
829{
830 WARN_ON(f->f_mnt_write_state != 0);
831 f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN;
832}
833static inline void file_release_write(struct file *f)
834{
835 f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED;
836}
837static inline void file_reset_write(struct file *f)
838{
839 f->f_mnt_write_state = 0;
840}
841static inline void file_check_state(struct file *f)
842{
843 /*
844 * At this point, either both or neither of these bits
845 * should be set.
846 */
847 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN);
848 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED);
849}
850static inline int file_check_writeable(struct file *f)
851{
852 if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN)
853 return 0;
854 printk(KERN_WARNING "writeable file with no "
855 "mnt_want_write()\n");
856 WARN_ON(1);
857 return -EINVAL;
858}
859#else /* !CONFIG_DEBUG_WRITECOUNT */
860static inline void file_take_write(struct file *filp) {}
861static inline void file_release_write(struct file *filp) {}
862static inline void file_reset_write(struct file *filp) {}
863static inline void file_check_state(struct file *filp) {}
864static inline int file_check_writeable(struct file *filp)
865{
866 return 0;
867}
868#endif /* CONFIG_DEBUG_WRITECOUNT */
869
821#define MAX_NON_LFS ((1UL<<31) - 1) 870#define MAX_NON_LFS ((1UL<<31) - 1)
822 871
823/* Page cache limit. The filesystems should put that into their s_maxbytes 872/* Page cache limit. The filesystems should put that into their s_maxbytes
@@ -1735,7 +1784,8 @@ extern struct file *create_read_pipe(struct file *f);
1735extern struct file *create_write_pipe(void); 1784extern struct file *create_write_pipe(void);
1736extern void free_write_pipe(struct file *); 1785extern void free_write_pipe(struct file *);
1737 1786
1738extern int open_namei(int dfd, const char *, int, int, struct nameidata *); 1787extern struct file *do_filp_open(int dfd, const char *pathname,
1788 int open_flag, int mode);
1739extern int may_open(struct nameidata *, int, int); 1789extern int may_open(struct nameidata *, int, int);
1740 1790
1741extern int kernel_read(struct file *, unsigned long, char *, unsigned long); 1791extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5ee2df217cdf..d6600e3f7e45 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -14,6 +14,7 @@
14 14
15#include <linux/types.h> 15#include <linux/types.h>
16#include <linux/list.h> 16#include <linux/list.h>
17#include <linux/nodemask.h>
17#include <linux/spinlock.h> 18#include <linux/spinlock.h>
18#include <asm/atomic.h> 19#include <asm/atomic.h>
19 20
@@ -28,8 +29,10 @@ struct mnt_namespace;
28#define MNT_NOATIME 0x08 29#define MNT_NOATIME 0x08
29#define MNT_NODIRATIME 0x10 30#define MNT_NODIRATIME 0x10
30#define MNT_RELATIME 0x20 31#define MNT_RELATIME 0x20
32#define MNT_READONLY 0x40 /* does the user want this to be r/o? */
31 33
32#define MNT_SHRINKABLE 0x100 34#define MNT_SHRINKABLE 0x100
35#define MNT_IMBALANCED_WRITE_COUNT 0x200 /* just for debugging */
33 36
34#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ 37#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */
35#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ 38#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */
@@ -62,6 +65,11 @@ struct vfsmount {
62 int mnt_expiry_mark; /* true if marked for expiry */ 65 int mnt_expiry_mark; /* true if marked for expiry */
63 int mnt_pinned; 66 int mnt_pinned;
64 int mnt_ghosts; 67 int mnt_ghosts;
68 /*
69 * This value is not stable unless all of the mnt_writers[] spinlocks
70 * are held, and all mnt_writer[]s on this mount have 0 as their ->count
71 */
72 atomic_t __mnt_writers;
65}; 73};
66 74
67static inline struct vfsmount *mntget(struct vfsmount *mnt) 75static inline struct vfsmount *mntget(struct vfsmount *mnt)
@@ -71,9 +79,12 @@ static inline struct vfsmount *mntget(struct vfsmount *mnt)
71 return mnt; 79 return mnt;
72} 80}
73 81
82extern int mnt_want_write(struct vfsmount *mnt);
83extern void mnt_drop_write(struct vfsmount *mnt);
74extern void mntput_no_expire(struct vfsmount *mnt); 84extern void mntput_no_expire(struct vfsmount *mnt);
75extern void mnt_pin(struct vfsmount *mnt); 85extern void mnt_pin(struct vfsmount *mnt);
76extern void mnt_unpin(struct vfsmount *mnt); 86extern void mnt_unpin(struct vfsmount *mnt);
87extern int __mnt_is_readonly(struct vfsmount *mnt);
77 88
78static inline void mntput(struct vfsmount *mnt) 89static inline void mntput(struct vfsmount *mnt)
79{ 90{
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 60f7a27f7a9e..94fd3b08fb77 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -598,6 +598,7 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
598 int oflag, mode_t mode, struct mq_attr __user *u_attr) 598 int oflag, mode_t mode, struct mq_attr __user *u_attr)
599{ 599{
600 struct mq_attr attr; 600 struct mq_attr attr;
601 struct file *result;
601 int ret; 602 int ret;
602 603
603 if (u_attr) { 604 if (u_attr) {
@@ -612,13 +613,24 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
612 } 613 }
613 614
614 mode &= ~current->fs->umask; 615 mode &= ~current->fs->umask;
616 ret = mnt_want_write(mqueue_mnt);
617 if (ret)
618 goto out;
615 ret = vfs_create(dir->d_inode, dentry, mode, NULL); 619 ret = vfs_create(dir->d_inode, dentry, mode, NULL);
616 dentry->d_fsdata = NULL; 620 dentry->d_fsdata = NULL;
617 if (ret) 621 if (ret)
618 goto out; 622 goto out_drop_write;
619 623
620 return dentry_open(dentry, mqueue_mnt, oflag); 624 result = dentry_open(dentry, mqueue_mnt, oflag);
621 625 /*
626 * dentry_open() took a persistent mnt_want_write(),
627 * so we can now drop this one.
628 */
629 mnt_drop_write(mqueue_mnt);
630 return result;
631
632out_drop_write:
633 mnt_drop_write(mqueue_mnt);
622out: 634out:
623 dput(dentry); 635 dput(dentry);
624 mntput(mqueue_mnt); 636 mntput(mqueue_mnt);
@@ -742,8 +754,11 @@ asmlinkage long sys_mq_unlink(const char __user *u_name)
742 inode = dentry->d_inode; 754 inode = dentry->d_inode;
743 if (inode) 755 if (inode)
744 atomic_inc(&inode->i_count); 756 atomic_inc(&inode->i_count);
745 757 err = mnt_want_write(mqueue_mnt);
758 if (err)
759 goto out_err;
746 err = vfs_unlink(dentry->d_parent->d_inode, dentry); 760 err = vfs_unlink(dentry->d_parent->d_inode, dentry);
761 mnt_drop_write(mqueue_mnt);
747out_err: 762out_err:
748 dput(dentry); 763 dput(dentry);
749 764
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 95de3102bc87..623ef24c2381 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -427,6 +427,16 @@ config DEBUG_VM
427 427
428 If unsure, say N. 428 If unsure, say N.
429 429
430config DEBUG_WRITECOUNT
431 bool "Debug filesystem writers count"
432 depends on DEBUG_KERNEL
433 help
434 Enable this to catch wrong use of the writers count in struct
435 vfsmount. This will increase the size of each file struct by
436 32 bits.
437
438 If unsure, say N.
439
430config DEBUG_LIST 440config DEBUG_LIST
431 bool "Debug linked list manipulation" 441 bool "Debug linked list manipulation"
432 depends on DEBUG_KERNEL 442 depends on DEBUG_KERNEL
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2851d0d15048..1454afcc06c4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -819,7 +819,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
819 */ 819 */
820 mode = S_IFSOCK | 820 mode = S_IFSOCK |
821 (SOCK_INODE(sock)->i_mode & ~current->fs->umask); 821 (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
822 err = mnt_want_write(nd.path.mnt);
823 if (err)
824 goto out_mknod_dput;
822 err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0); 825 err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
826 mnt_drop_write(nd.path.mnt);
823 if (err) 827 if (err)
824 goto out_mknod_dput; 828 goto out_mknod_dput;
825 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 829 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);