diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-09-02 15:42:56 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-09-02 15:42:59 -0400 |
commit | f76bd108e5031202bb40849306f98c4afebe4ef6 (patch) | |
tree | 455e14a23642a82513eebd4e4f7d3b76842869cd /fs | |
parent | cd6feeeafddbef6abfe4d90fb26e42fd844d34ed (diff) | |
parent | eced1dfcfcf6b0a35e925d73916a9d8e36ab5457 (diff) |
Merge branch 'perfcounters/urgent' into perfcounters/core
Merge reason: We are going to modify a place modified by
perfcounters/urgent.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/9p/v9fs.c | 21 | ||||
-rw-r--r-- | fs/9p/v9fs.h | 1 | ||||
-rw-r--r-- | fs/9p/vfs_inode.c | 126 | ||||
-rw-r--r-- | fs/9p/vfs_super.c | 39 | ||||
-rw-r--r-- | fs/afs/file.c | 18 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 21 | ||||
-rw-r--r-- | fs/buffer.c | 7 | ||||
-rw-r--r-- | fs/exec.c | 4 | ||||
-rw-r--r-- | fs/ext3/Kconfig | 32 | ||||
-rw-r--r-- | fs/ext3/super.c | 40 | ||||
-rw-r--r-- | fs/gfs2/sys.c | 20 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 20 | ||||
-rw-r--r-- | fs/libfs.c | 2 | ||||
-rw-r--r-- | fs/nfs/nfs4state.c | 4 | ||||
-rw-r--r-- | fs/nilfs2/super.c | 2 | ||||
-rw-r--r-- | fs/nilfs2/the_nilfs.h | 2 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_fsnotify.c | 46 | ||||
-rw-r--r-- | fs/notify/inotify/inotify_user.c | 238 | ||||
-rw-r--r-- | fs/notify/notification.c | 11 | ||||
-rw-r--r-- | fs/ocfs2/alloc.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmunlock.c | 4 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2_lockid.h | 1 | ||||
-rw-r--r-- | fs/ocfs2/quota_global.c | 10 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 6 | ||||
-rw-r--r-- | fs/proc/base.c | 19 | ||||
-rw-r--r-- | fs/select.c | 1 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 13 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_iget.c | 113 |
29 files changed, 478 insertions, 346 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 332b5ff02fec..f7003cfac63d 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -76,7 +76,7 @@ static const match_table_t tokens = { | |||
76 | * Return 0 upon success, -ERRNO upon failure. | 76 | * Return 0 upon success, -ERRNO upon failure. |
77 | */ | 77 | */ |
78 | 78 | ||
79 | static int v9fs_parse_options(struct v9fs_session_info *v9ses) | 79 | static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) |
80 | { | 80 | { |
81 | char *options; | 81 | char *options; |
82 | substring_t args[MAX_OPT_ARGS]; | 82 | substring_t args[MAX_OPT_ARGS]; |
@@ -90,10 +90,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses) | |||
90 | v9ses->debug = 0; | 90 | v9ses->debug = 0; |
91 | v9ses->cache = 0; | 91 | v9ses->cache = 0; |
92 | 92 | ||
93 | if (!v9ses->options) | 93 | if (!opts) |
94 | return 0; | 94 | return 0; |
95 | 95 | ||
96 | options = kstrdup(v9ses->options, GFP_KERNEL); | 96 | options = kstrdup(opts, GFP_KERNEL); |
97 | if (!options) { | 97 | if (!options) { |
98 | P9_DPRINTK(P9_DEBUG_ERROR, | 98 | P9_DPRINTK(P9_DEBUG_ERROR, |
99 | "failed to allocate copy of option string\n"); | 99 | "failed to allocate copy of option string\n"); |
@@ -206,24 +206,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
206 | v9ses->uid = ~0; | 206 | v9ses->uid = ~0; |
207 | v9ses->dfltuid = V9FS_DEFUID; | 207 | v9ses->dfltuid = V9FS_DEFUID; |
208 | v9ses->dfltgid = V9FS_DEFGID; | 208 | v9ses->dfltgid = V9FS_DEFGID; |
209 | if (data) { | ||
210 | v9ses->options = kstrdup(data, GFP_KERNEL); | ||
211 | if (!v9ses->options) { | ||
212 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
213 | "failed to allocate copy of option string\n"); | ||
214 | retval = -ENOMEM; | ||
215 | goto error; | ||
216 | } | ||
217 | } | ||
218 | 209 | ||
219 | rc = v9fs_parse_options(v9ses); | 210 | rc = v9fs_parse_options(v9ses, data); |
220 | if (rc < 0) { | 211 | if (rc < 0) { |
221 | retval = rc; | 212 | retval = rc; |
222 | goto error; | 213 | goto error; |
223 | } | 214 | } |
224 | 215 | ||
225 | v9ses->clnt = p9_client_create(dev_name, v9ses->options); | 216 | v9ses->clnt = p9_client_create(dev_name, data); |
226 | |||
227 | if (IS_ERR(v9ses->clnt)) { | 217 | if (IS_ERR(v9ses->clnt)) { |
228 | retval = PTR_ERR(v9ses->clnt); | 218 | retval = PTR_ERR(v9ses->clnt); |
229 | v9ses->clnt = NULL; | 219 | v9ses->clnt = NULL; |
@@ -280,7 +270,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) | |||
280 | 270 | ||
281 | __putname(v9ses->uname); | 271 | __putname(v9ses->uname); |
282 | __putname(v9ses->aname); | 272 | __putname(v9ses->aname); |
283 | kfree(v9ses->options); | ||
284 | } | 273 | } |
285 | 274 | ||
286 | /** | 275 | /** |
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index a7d567192998..38762bf102a9 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h | |||
@@ -85,7 +85,6 @@ struct v9fs_session_info { | |||
85 | unsigned int afid; | 85 | unsigned int afid; |
86 | unsigned int cache; | 86 | unsigned int cache; |
87 | 87 | ||
88 | char *options; /* copy of mount options */ | ||
89 | char *uname; /* user name to mount as */ | 88 | char *uname; /* user name to mount as */ |
90 | char *aname; /* name of remote hierarchy being mounted */ | 89 | char *aname; /* name of remote hierarchy being mounted */ |
91 | unsigned int maxdata; /* max data for client interface */ | 90 | unsigned int maxdata; /* max data for client interface */ |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 81f8bbf12f9f..06a223d50a81 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -171,7 +171,6 @@ int v9fs_uflags2omode(int uflags, int extended) | |||
171 | 171 | ||
172 | /** | 172 | /** |
173 | * v9fs_blank_wstat - helper function to setup a 9P stat structure | 173 | * v9fs_blank_wstat - helper function to setup a 9P stat structure |
174 | * @v9ses: 9P session info (for determining extended mode) | ||
175 | * @wstat: structure to initialize | 174 | * @wstat: structure to initialize |
176 | * | 175 | * |
177 | */ | 176 | */ |
@@ -207,65 +206,72 @@ v9fs_blank_wstat(struct p9_wstat *wstat) | |||
207 | 206 | ||
208 | struct inode *v9fs_get_inode(struct super_block *sb, int mode) | 207 | struct inode *v9fs_get_inode(struct super_block *sb, int mode) |
209 | { | 208 | { |
209 | int err; | ||
210 | struct inode *inode; | 210 | struct inode *inode; |
211 | struct v9fs_session_info *v9ses = sb->s_fs_info; | 211 | struct v9fs_session_info *v9ses = sb->s_fs_info; |
212 | 212 | ||
213 | P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); | 213 | P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); |
214 | 214 | ||
215 | inode = new_inode(sb); | 215 | inode = new_inode(sb); |
216 | if (inode) { | 216 | if (!inode) { |
217 | inode->i_mode = mode; | ||
218 | inode->i_uid = current_fsuid(); | ||
219 | inode->i_gid = current_fsgid(); | ||
220 | inode->i_blocks = 0; | ||
221 | inode->i_rdev = 0; | ||
222 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
223 | inode->i_mapping->a_ops = &v9fs_addr_operations; | ||
224 | |||
225 | switch (mode & S_IFMT) { | ||
226 | case S_IFIFO: | ||
227 | case S_IFBLK: | ||
228 | case S_IFCHR: | ||
229 | case S_IFSOCK: | ||
230 | if (!v9fs_extended(v9ses)) { | ||
231 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
232 | "special files without extended mode\n"); | ||
233 | return ERR_PTR(-EINVAL); | ||
234 | } | ||
235 | init_special_inode(inode, inode->i_mode, | ||
236 | inode->i_rdev); | ||
237 | break; | ||
238 | case S_IFREG: | ||
239 | inode->i_op = &v9fs_file_inode_operations; | ||
240 | inode->i_fop = &v9fs_file_operations; | ||
241 | break; | ||
242 | case S_IFLNK: | ||
243 | if (!v9fs_extended(v9ses)) { | ||
244 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
245 | "extended modes used w/o 9P2000.u\n"); | ||
246 | return ERR_PTR(-EINVAL); | ||
247 | } | ||
248 | inode->i_op = &v9fs_symlink_inode_operations; | ||
249 | break; | ||
250 | case S_IFDIR: | ||
251 | inc_nlink(inode); | ||
252 | if (v9fs_extended(v9ses)) | ||
253 | inode->i_op = &v9fs_dir_inode_operations_ext; | ||
254 | else | ||
255 | inode->i_op = &v9fs_dir_inode_operations; | ||
256 | inode->i_fop = &v9fs_dir_operations; | ||
257 | break; | ||
258 | default: | ||
259 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
260 | "BAD mode 0x%x S_IFMT 0x%x\n", | ||
261 | mode, mode & S_IFMT); | ||
262 | return ERR_PTR(-EINVAL); | ||
263 | } | ||
264 | } else { | ||
265 | P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); | 217 | P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); |
266 | return ERR_PTR(-ENOMEM); | 218 | return ERR_PTR(-ENOMEM); |
267 | } | 219 | } |
220 | |||
221 | inode->i_mode = mode; | ||
222 | inode->i_uid = current_fsuid(); | ||
223 | inode->i_gid = current_fsgid(); | ||
224 | inode->i_blocks = 0; | ||
225 | inode->i_rdev = 0; | ||
226 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
227 | inode->i_mapping->a_ops = &v9fs_addr_operations; | ||
228 | |||
229 | switch (mode & S_IFMT) { | ||
230 | case S_IFIFO: | ||
231 | case S_IFBLK: | ||
232 | case S_IFCHR: | ||
233 | case S_IFSOCK: | ||
234 | if (!v9fs_extended(v9ses)) { | ||
235 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
236 | "special files without extended mode\n"); | ||
237 | err = -EINVAL; | ||
238 | goto error; | ||
239 | } | ||
240 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | ||
241 | break; | ||
242 | case S_IFREG: | ||
243 | inode->i_op = &v9fs_file_inode_operations; | ||
244 | inode->i_fop = &v9fs_file_operations; | ||
245 | break; | ||
246 | case S_IFLNK: | ||
247 | if (!v9fs_extended(v9ses)) { | ||
248 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
249 | "extended modes used w/o 9P2000.u\n"); | ||
250 | err = -EINVAL; | ||
251 | goto error; | ||
252 | } | ||
253 | inode->i_op = &v9fs_symlink_inode_operations; | ||
254 | break; | ||
255 | case S_IFDIR: | ||
256 | inc_nlink(inode); | ||
257 | if (v9fs_extended(v9ses)) | ||
258 | inode->i_op = &v9fs_dir_inode_operations_ext; | ||
259 | else | ||
260 | inode->i_op = &v9fs_dir_inode_operations; | ||
261 | inode->i_fop = &v9fs_dir_operations; | ||
262 | break; | ||
263 | default: | ||
264 | P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", | ||
265 | mode, mode & S_IFMT); | ||
266 | err = -EINVAL; | ||
267 | goto error; | ||
268 | } | ||
269 | |||
268 | return inode; | 270 | return inode; |
271 | |||
272 | error: | ||
273 | iput(inode); | ||
274 | return ERR_PTR(err); | ||
269 | } | 275 | } |
270 | 276 | ||
271 | /* | 277 | /* |
@@ -338,30 +344,25 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, | |||
338 | 344 | ||
339 | ret = NULL; | 345 | ret = NULL; |
340 | st = p9_client_stat(fid); | 346 | st = p9_client_stat(fid); |
341 | if (IS_ERR(st)) { | 347 | if (IS_ERR(st)) |
342 | err = PTR_ERR(st); | 348 | return ERR_CAST(st); |
343 | st = NULL; | ||
344 | goto error; | ||
345 | } | ||
346 | 349 | ||
347 | umode = p9mode2unixmode(v9ses, st->mode); | 350 | umode = p9mode2unixmode(v9ses, st->mode); |
348 | ret = v9fs_get_inode(sb, umode); | 351 | ret = v9fs_get_inode(sb, umode); |
349 | if (IS_ERR(ret)) { | 352 | if (IS_ERR(ret)) { |
350 | err = PTR_ERR(ret); | 353 | err = PTR_ERR(ret); |
351 | ret = NULL; | ||
352 | goto error; | 354 | goto error; |
353 | } | 355 | } |
354 | 356 | ||
355 | v9fs_stat2inode(st, ret, sb); | 357 | v9fs_stat2inode(st, ret, sb); |
356 | ret->i_ino = v9fs_qid2ino(&st->qid); | 358 | ret->i_ino = v9fs_qid2ino(&st->qid); |
359 | p9stat_free(st); | ||
357 | kfree(st); | 360 | kfree(st); |
358 | return ret; | 361 | return ret; |
359 | 362 | ||
360 | error: | 363 | error: |
364 | p9stat_free(st); | ||
361 | kfree(st); | 365 | kfree(st); |
362 | if (ret) | ||
363 | iput(ret); | ||
364 | |||
365 | return ERR_PTR(err); | 366 | return ERR_PTR(err); |
366 | } | 367 | } |
367 | 368 | ||
@@ -403,9 +404,9 @@ v9fs_open_created(struct inode *inode, struct file *file) | |||
403 | * @v9ses: session information | 404 | * @v9ses: session information |
404 | * @dir: directory that dentry is being created in | 405 | * @dir: directory that dentry is being created in |
405 | * @dentry: dentry that is being created | 406 | * @dentry: dentry that is being created |
407 | * @extension: 9p2000.u extension string to support devices, etc. | ||
406 | * @perm: create permissions | 408 | * @perm: create permissions |
407 | * @mode: open mode | 409 | * @mode: open mode |
408 | * @extension: 9p2000.u extension string to support devices, etc. | ||
409 | * | 410 | * |
410 | */ | 411 | */ |
411 | static struct p9_fid * | 412 | static struct p9_fid * |
@@ -470,7 +471,10 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
470 | dentry->d_op = &v9fs_dentry_operations; | 471 | dentry->d_op = &v9fs_dentry_operations; |
471 | 472 | ||
472 | d_instantiate(dentry, inode); | 473 | d_instantiate(dentry, inode); |
473 | v9fs_fid_add(dentry, fid); | 474 | err = v9fs_fid_add(dentry, fid); |
475 | if (err < 0) | ||
476 | goto error; | ||
477 | |||
474 | return ofid; | 478 | return ofid; |
475 | 479 | ||
476 | error: | 480 | error: |
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 38d695d66a0b..8961f1a8f668 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -81,7 +81,7 @@ static int v9fs_set_super(struct super_block *s, void *data) | |||
81 | 81 | ||
82 | static void | 82 | static void |
83 | v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, | 83 | v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, |
84 | int flags) | 84 | int flags, void *data) |
85 | { | 85 | { |
86 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 86 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
87 | sb->s_blocksize_bits = fls(v9ses->maxdata - 1); | 87 | sb->s_blocksize_bits = fls(v9ses->maxdata - 1); |
@@ -91,6 +91,8 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, | |||
91 | 91 | ||
92 | sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | | 92 | sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | |
93 | MS_NOATIME; | 93 | MS_NOATIME; |
94 | |||
95 | save_mount_options(sb, data); | ||
94 | } | 96 | } |
95 | 97 | ||
96 | /** | 98 | /** |
@@ -113,14 +115,11 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
113 | struct v9fs_session_info *v9ses = NULL; | 115 | struct v9fs_session_info *v9ses = NULL; |
114 | struct p9_wstat *st = NULL; | 116 | struct p9_wstat *st = NULL; |
115 | int mode = S_IRWXUGO | S_ISVTX; | 117 | int mode = S_IRWXUGO | S_ISVTX; |
116 | uid_t uid = current_fsuid(); | ||
117 | gid_t gid = current_fsgid(); | ||
118 | struct p9_fid *fid; | 118 | struct p9_fid *fid; |
119 | int retval = 0; | 119 | int retval = 0; |
120 | 120 | ||
121 | P9_DPRINTK(P9_DEBUG_VFS, " \n"); | 121 | P9_DPRINTK(P9_DEBUG_VFS, " \n"); |
122 | 122 | ||
123 | st = NULL; | ||
124 | v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); | 123 | v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); |
125 | if (!v9ses) | 124 | if (!v9ses) |
126 | return -ENOMEM; | 125 | return -ENOMEM; |
@@ -142,7 +141,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
142 | retval = PTR_ERR(sb); | 141 | retval = PTR_ERR(sb); |
143 | goto free_stat; | 142 | goto free_stat; |
144 | } | 143 | } |
145 | v9fs_fill_super(sb, v9ses, flags); | 144 | v9fs_fill_super(sb, v9ses, flags, data); |
146 | 145 | ||
147 | inode = v9fs_get_inode(sb, S_IFDIR | mode); | 146 | inode = v9fs_get_inode(sb, S_IFDIR | mode); |
148 | if (IS_ERR(inode)) { | 147 | if (IS_ERR(inode)) { |
@@ -150,9 +149,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
150 | goto release_sb; | 149 | goto release_sb; |
151 | } | 150 | } |
152 | 151 | ||
153 | inode->i_uid = uid; | ||
154 | inode->i_gid = gid; | ||
155 | |||
156 | root = d_alloc_root(inode); | 152 | root = d_alloc_root(inode); |
157 | if (!root) { | 153 | if (!root) { |
158 | iput(inode); | 154 | iput(inode); |
@@ -173,10 +169,8 @@ P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); | |||
173 | simple_set_mnt(mnt, sb); | 169 | simple_set_mnt(mnt, sb); |
174 | return 0; | 170 | return 0; |
175 | 171 | ||
176 | release_sb: | ||
177 | deactivate_locked_super(sb); | ||
178 | |||
179 | free_stat: | 172 | free_stat: |
173 | p9stat_free(st); | ||
180 | kfree(st); | 174 | kfree(st); |
181 | 175 | ||
182 | clunk_fid: | 176 | clunk_fid: |
@@ -185,7 +179,12 @@ clunk_fid: | |||
185 | close_session: | 179 | close_session: |
186 | v9fs_session_close(v9ses); | 180 | v9fs_session_close(v9ses); |
187 | kfree(v9ses); | 181 | kfree(v9ses); |
182 | return retval; | ||
188 | 183 | ||
184 | release_sb: | ||
185 | p9stat_free(st); | ||
186 | kfree(st); | ||
187 | deactivate_locked_super(sb); | ||
189 | return retval; | 188 | return retval; |
190 | } | 189 | } |
191 | 190 | ||
@@ -207,24 +206,10 @@ static void v9fs_kill_super(struct super_block *s) | |||
207 | 206 | ||
208 | v9fs_session_close(v9ses); | 207 | v9fs_session_close(v9ses); |
209 | kfree(v9ses); | 208 | kfree(v9ses); |
209 | s->s_fs_info = NULL; | ||
210 | P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n"); | 210 | P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n"); |
211 | } | 211 | } |
212 | 212 | ||
213 | /** | ||
214 | * v9fs_show_options - Show mount options in /proc/mounts | ||
215 | * @m: seq_file to write to | ||
216 | * @mnt: mount descriptor | ||
217 | * | ||
218 | */ | ||
219 | |||
220 | static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
221 | { | ||
222 | struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info; | ||
223 | |||
224 | seq_printf(m, "%s", v9ses->options); | ||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | static void | 213 | static void |
229 | v9fs_umount_begin(struct super_block *sb) | 214 | v9fs_umount_begin(struct super_block *sb) |
230 | { | 215 | { |
@@ -237,7 +222,7 @@ v9fs_umount_begin(struct super_block *sb) | |||
237 | static const struct super_operations v9fs_super_ops = { | 222 | static const struct super_operations v9fs_super_ops = { |
238 | .statfs = simple_statfs, | 223 | .statfs = simple_statfs, |
239 | .clear_inode = v9fs_clear_inode, | 224 | .clear_inode = v9fs_clear_inode, |
240 | .show_options = v9fs_show_options, | 225 | .show_options = generic_show_options, |
241 | .umount_begin = v9fs_umount_begin, | 226 | .umount_begin = v9fs_umount_begin, |
242 | }; | 227 | }; |
243 | 228 | ||
diff --git a/fs/afs/file.c b/fs/afs/file.c index 0149dab365e7..681c2a7b013f 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
@@ -134,9 +134,16 @@ static int afs_readpage(struct file *file, struct page *page) | |||
134 | 134 | ||
135 | inode = page->mapping->host; | 135 | inode = page->mapping->host; |
136 | 136 | ||
137 | ASSERT(file != NULL); | 137 | if (file) { |
138 | key = file->private_data; | 138 | key = file->private_data; |
139 | ASSERT(key != NULL); | 139 | ASSERT(key != NULL); |
140 | } else { | ||
141 | key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell); | ||
142 | if (IS_ERR(key)) { | ||
143 | ret = PTR_ERR(key); | ||
144 | goto error_nokey; | ||
145 | } | ||
146 | } | ||
140 | 147 | ||
141 | _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); | 148 | _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); |
142 | 149 | ||
@@ -207,12 +214,17 @@ static int afs_readpage(struct file *file, struct page *page) | |||
207 | unlock_page(page); | 214 | unlock_page(page); |
208 | } | 215 | } |
209 | 216 | ||
217 | if (!file) | ||
218 | key_put(key); | ||
210 | _leave(" = 0"); | 219 | _leave(" = 0"); |
211 | return 0; | 220 | return 0; |
212 | 221 | ||
213 | error: | 222 | error: |
214 | SetPageError(page); | 223 | SetPageError(page); |
215 | unlock_page(page); | 224 | unlock_page(page); |
225 | if (!file) | ||
226 | key_put(key); | ||
227 | error_nokey: | ||
216 | _leave(" = %d", ret); | 228 | _leave(" = %d", ret); |
217 | return ret; | 229 | return ret; |
218 | } | 230 | } |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 272b9b2bea86..59cba180fe83 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -3099,8 +3099,12 @@ static void inode_tree_add(struct inode *inode) | |||
3099 | { | 3099 | { |
3100 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3100 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3101 | struct btrfs_inode *entry; | 3101 | struct btrfs_inode *entry; |
3102 | struct rb_node **p = &root->inode_tree.rb_node; | 3102 | struct rb_node **p; |
3103 | struct rb_node *parent = NULL; | 3103 | struct rb_node *parent; |
3104 | |||
3105 | again: | ||
3106 | p = &root->inode_tree.rb_node; | ||
3107 | parent = NULL; | ||
3104 | 3108 | ||
3105 | spin_lock(&root->inode_lock); | 3109 | spin_lock(&root->inode_lock); |
3106 | while (*p) { | 3110 | while (*p) { |
@@ -3108,13 +3112,16 @@ static void inode_tree_add(struct inode *inode) | |||
3108 | entry = rb_entry(parent, struct btrfs_inode, rb_node); | 3112 | entry = rb_entry(parent, struct btrfs_inode, rb_node); |
3109 | 3113 | ||
3110 | if (inode->i_ino < entry->vfs_inode.i_ino) | 3114 | if (inode->i_ino < entry->vfs_inode.i_ino) |
3111 | p = &(*p)->rb_left; | 3115 | p = &parent->rb_left; |
3112 | else if (inode->i_ino > entry->vfs_inode.i_ino) | 3116 | else if (inode->i_ino > entry->vfs_inode.i_ino) |
3113 | p = &(*p)->rb_right; | 3117 | p = &parent->rb_right; |
3114 | else { | 3118 | else { |
3115 | WARN_ON(!(entry->vfs_inode.i_state & | 3119 | WARN_ON(!(entry->vfs_inode.i_state & |
3116 | (I_WILL_FREE | I_FREEING | I_CLEAR))); | 3120 | (I_WILL_FREE | I_FREEING | I_CLEAR))); |
3117 | break; | 3121 | rb_erase(parent, &root->inode_tree); |
3122 | RB_CLEAR_NODE(parent); | ||
3123 | spin_unlock(&root->inode_lock); | ||
3124 | goto again; | ||
3118 | } | 3125 | } |
3119 | } | 3126 | } |
3120 | rb_link_node(&BTRFS_I(inode)->rb_node, parent, p); | 3127 | rb_link_node(&BTRFS_I(inode)->rb_node, parent, p); |
@@ -3126,12 +3133,12 @@ static void inode_tree_del(struct inode *inode) | |||
3126 | { | 3133 | { |
3127 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3134 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3128 | 3135 | ||
3136 | spin_lock(&root->inode_lock); | ||
3129 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { | 3137 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { |
3130 | spin_lock(&root->inode_lock); | ||
3131 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); | 3138 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); |
3132 | spin_unlock(&root->inode_lock); | ||
3133 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | 3139 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); |
3134 | } | 3140 | } |
3141 | spin_unlock(&root->inode_lock); | ||
3135 | } | 3142 | } |
3136 | 3143 | ||
3137 | static noinline void init_btrfs_i(struct inode *inode) | 3144 | static noinline void init_btrfs_i(struct inode *inode) |
diff --git a/fs/buffer.c b/fs/buffer.c index a3ef091a45bd..28f320fac4d4 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -1165,8 +1165,11 @@ void mark_buffer_dirty(struct buffer_head *bh) | |||
1165 | 1165 | ||
1166 | if (!test_set_buffer_dirty(bh)) { | 1166 | if (!test_set_buffer_dirty(bh)) { |
1167 | struct page *page = bh->b_page; | 1167 | struct page *page = bh->b_page; |
1168 | if (!TestSetPageDirty(page)) | 1168 | if (!TestSetPageDirty(page)) { |
1169 | __set_page_dirty(page, page_mapping(page), 0); | 1169 | struct address_space *mapping = page_mapping(page); |
1170 | if (mapping) | ||
1171 | __set_page_dirty(page, mapping, 0); | ||
1172 | } | ||
1170 | } | 1173 | } |
1171 | } | 1174 | } |
1172 | 1175 | ||
@@ -678,8 +678,8 @@ exit: | |||
678 | } | 678 | } |
679 | EXPORT_SYMBOL(open_exec); | 679 | EXPORT_SYMBOL(open_exec); |
680 | 680 | ||
681 | int kernel_read(struct file *file, unsigned long offset, | 681 | int kernel_read(struct file *file, loff_t offset, |
682 | char *addr, unsigned long count) | 682 | char *addr, unsigned long count) |
683 | { | 683 | { |
684 | mm_segment_t old_fs; | 684 | mm_segment_t old_fs; |
685 | loff_t pos = offset; | 685 | loff_t pos = offset; |
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig index fb3c1a21b135..522b15498f45 100644 --- a/fs/ext3/Kconfig +++ b/fs/ext3/Kconfig | |||
@@ -29,23 +29,25 @@ config EXT3_FS | |||
29 | module will be called ext3. | 29 | module will be called ext3. |
30 | 30 | ||
31 | config EXT3_DEFAULTS_TO_ORDERED | 31 | config EXT3_DEFAULTS_TO_ORDERED |
32 | bool "Default to 'data=ordered' in ext3 (legacy option)" | 32 | bool "Default to 'data=ordered' in ext3" |
33 | depends on EXT3_FS | 33 | depends on EXT3_FS |
34 | help | 34 | help |
35 | If a filesystem does not explicitly specify a data ordering | 35 | The journal mode options for ext3 have different tradeoffs |
36 | mode, and the journal capability allowed it, ext3 used to | 36 | between when data is guaranteed to be on disk and |
37 | historically default to 'data=ordered'. | 37 | performance. The use of "data=writeback" can cause |
38 | 38 | unwritten data to appear in files after an system crash or | |
39 | That was a rather unfortunate choice, because it leads to all | 39 | power failure, which can be a security issue. However, |
40 | kinds of latency problems, and the 'data=writeback' mode is more | 40 | "data=ordered" mode can also result in major performance |
41 | appropriate these days. | 41 | problems, including seconds-long delays before an fsync() |
42 | 42 | call returns. For details, see: | |
43 | You should probably always answer 'n' here, and if you really | 43 | |
44 | want to use 'data=ordered' mode, set it in the filesystem itself | 44 | http://ext4.wiki.kernel.org/index.php/Ext3_data_mode_tradeoffs |
45 | with 'tune2fs -o journal_data_ordered'. | 45 | |
46 | 46 | If you have been historically happy with ext3's performance, | |
47 | But if you really want to enable the legacy default, you can do | 47 | data=ordered mode will be a safe choice and you should |
48 | so by answering 'y' to this question. | 48 | answer 'y' here. If you understand the reliability and data |
49 | privacy issues of data=writeback and are willing to make | ||
50 | that trade off, answer 'n'. | ||
49 | 51 | ||
50 | config EXT3_FS_XATTR | 52 | config EXT3_FS_XATTR |
51 | bool "Ext3 extended attributes" | 53 | bool "Ext3 extended attributes" |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 524b349c6299..a8d80a7f1105 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -543,6 +543,19 @@ static inline void ext3_show_quota_options(struct seq_file *seq, struct super_bl | |||
543 | #endif | 543 | #endif |
544 | } | 544 | } |
545 | 545 | ||
546 | static char *data_mode_string(unsigned long mode) | ||
547 | { | ||
548 | switch (mode) { | ||
549 | case EXT3_MOUNT_JOURNAL_DATA: | ||
550 | return "journal"; | ||
551 | case EXT3_MOUNT_ORDERED_DATA: | ||
552 | return "ordered"; | ||
553 | case EXT3_MOUNT_WRITEBACK_DATA: | ||
554 | return "writeback"; | ||
555 | } | ||
556 | return "unknown"; | ||
557 | } | ||
558 | |||
546 | /* | 559 | /* |
547 | * Show an option if | 560 | * Show an option if |
548 | * - it's set to a non-default value OR | 561 | * - it's set to a non-default value OR |
@@ -616,13 +629,8 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
616 | if (test_opt(sb, NOBH)) | 629 | if (test_opt(sb, NOBH)) |
617 | seq_puts(seq, ",nobh"); | 630 | seq_puts(seq, ",nobh"); |
618 | 631 | ||
619 | if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) | 632 | seq_printf(seq, ",data=%s", data_mode_string(sbi->s_mount_opt & |
620 | seq_puts(seq, ",data=journal"); | 633 | EXT3_MOUNT_DATA_FLAGS)); |
621 | else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA) | ||
622 | seq_puts(seq, ",data=ordered"); | ||
623 | else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) | ||
624 | seq_puts(seq, ",data=writeback"); | ||
625 | |||
626 | if (test_opt(sb, DATA_ERR_ABORT)) | 634 | if (test_opt(sb, DATA_ERR_ABORT)) |
627 | seq_puts(seq, ",data_err=abort"); | 635 | seq_puts(seq, ",data_err=abort"); |
628 | 636 | ||
@@ -1024,12 +1032,18 @@ static int parse_options (char *options, struct super_block *sb, | |||
1024 | datacheck: | 1032 | datacheck: |
1025 | if (is_remount) { | 1033 | if (is_remount) { |
1026 | if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) | 1034 | if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) |
1027 | != data_opt) { | 1035 | == data_opt) |
1028 | printk(KERN_ERR | 1036 | break; |
1029 | "EXT3-fs: cannot change data " | 1037 | printk(KERN_ERR |
1030 | "mode on remount\n"); | 1038 | "EXT3-fs (device %s): Cannot change " |
1031 | return 0; | 1039 | "data mode on remount. The filesystem " |
1032 | } | 1040 | "is mounted in data=%s mode and you " |
1041 | "try to remount it in data=%s mode.\n", | ||
1042 | sb->s_id, | ||
1043 | data_mode_string(sbi->s_mount_opt & | ||
1044 | EXT3_MOUNT_DATA_FLAGS), | ||
1045 | data_mode_string(data_opt)); | ||
1046 | return 0; | ||
1033 | } else { | 1047 | } else { |
1034 | sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS; | 1048 | sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS; |
1035 | sbi->s_mount_opt |= data_opt; | 1049 | sbi->s_mount_opt |= data_opt; |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 23419dc3027b..a7cbfbd340c7 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -386,16 +386,16 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) | |||
386 | #define GDLM_ATTR(_name,_mode,_show,_store) \ | 386 | #define GDLM_ATTR(_name,_mode,_show,_store) \ |
387 | static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) | 387 | static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) |
388 | 388 | ||
389 | GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); | 389 | GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); |
390 | GDLM_ATTR(block, 0644, block_show, block_store); | 390 | GDLM_ATTR(block, 0644, block_show, block_store); |
391 | GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); | 391 | GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); |
392 | GDLM_ATTR(id, 0444, lkid_show, NULL); | 392 | GDLM_ATTR(id, 0444, lkid_show, NULL); |
393 | GDLM_ATTR(jid, 0444, jid_show, NULL); | 393 | GDLM_ATTR(jid, 0444, jid_show, NULL); |
394 | GDLM_ATTR(first, 0444, lkfirst_show, NULL); | 394 | GDLM_ATTR(first, 0444, lkfirst_show, NULL); |
395 | GDLM_ATTR(first_done, 0444, first_done_show, NULL); | 395 | GDLM_ATTR(first_done, 0444, first_done_show, NULL); |
396 | GDLM_ATTR(recover, 0200, NULL, recover_store); | 396 | GDLM_ATTR(recover, 0600, NULL, recover_store); |
397 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); | 397 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); |
398 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); | 398 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); |
399 | 399 | ||
400 | static struct attribute *lock_module_attrs[] = { | 400 | static struct attribute *lock_module_attrs[] = { |
401 | &gdlm_attr_proto_name.attr, | 401 | &gdlm_attr_proto_name.attr, |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 941c8425c10b..cb88dac8ccaa 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -935,26 +935,28 @@ static int can_do_hugetlb_shm(void) | |||
935 | return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); | 935 | return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); |
936 | } | 936 | } |
937 | 937 | ||
938 | struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag) | 938 | struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, |
939 | struct user_struct **user) | ||
939 | { | 940 | { |
940 | int error = -ENOMEM; | 941 | int error = -ENOMEM; |
941 | int unlock_shm = 0; | ||
942 | struct file *file; | 942 | struct file *file; |
943 | struct inode *inode; | 943 | struct inode *inode; |
944 | struct dentry *dentry, *root; | 944 | struct dentry *dentry, *root; |
945 | struct qstr quick_string; | 945 | struct qstr quick_string; |
946 | struct user_struct *user = current_user(); | ||
947 | 946 | ||
947 | *user = NULL; | ||
948 | if (!hugetlbfs_vfsmount) | 948 | if (!hugetlbfs_vfsmount) |
949 | return ERR_PTR(-ENOENT); | 949 | return ERR_PTR(-ENOENT); |
950 | 950 | ||
951 | if (!can_do_hugetlb_shm()) { | 951 | if (!can_do_hugetlb_shm()) { |
952 | if (user_shm_lock(size, user)) { | 952 | *user = current_user(); |
953 | unlock_shm = 1; | 953 | if (user_shm_lock(size, *user)) { |
954 | WARN_ONCE(1, | 954 | WARN_ONCE(1, |
955 | "Using mlock ulimits for SHM_HUGETLB deprecated\n"); | 955 | "Using mlock ulimits for SHM_HUGETLB deprecated\n"); |
956 | } else | 956 | } else { |
957 | *user = NULL; | ||
957 | return ERR_PTR(-EPERM); | 958 | return ERR_PTR(-EPERM); |
959 | } | ||
958 | } | 960 | } |
959 | 961 | ||
960 | root = hugetlbfs_vfsmount->mnt_root; | 962 | root = hugetlbfs_vfsmount->mnt_root; |
@@ -996,8 +998,10 @@ out_inode: | |||
996 | out_dentry: | 998 | out_dentry: |
997 | dput(dentry); | 999 | dput(dentry); |
998 | out_shm_unlock: | 1000 | out_shm_unlock: |
999 | if (unlock_shm) | 1001 | if (*user) { |
1000 | user_shm_unlock(size, user); | 1002 | user_shm_unlock(size, *user); |
1003 | *user = NULL; | ||
1004 | } | ||
1001 | return ERR_PTR(error); | 1005 | return ERR_PTR(error); |
1002 | } | 1006 | } |
1003 | 1007 | ||
diff --git a/fs/libfs.c b/fs/libfs.c index ddfa89948c3f..dcec3d3ea64f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -217,7 +217,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, | |||
217 | return PTR_ERR(s); | 217 | return PTR_ERR(s); |
218 | 218 | ||
219 | s->s_flags = MS_NOUSER; | 219 | s->s_flags = MS_NOUSER; |
220 | s->s_maxbytes = ~0ULL; | 220 | s->s_maxbytes = MAX_LFS_FILESIZE; |
221 | s->s_blocksize = PAGE_SIZE; | 221 | s->s_blocksize = PAGE_SIZE; |
222 | s->s_blocksize_bits = PAGE_SHIFT; | 222 | s->s_blocksize_bits = PAGE_SHIFT; |
223 | s->s_magic = magic; | 223 | s->s_magic = magic; |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 65ca8c18476f..1434080aefeb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1250,8 +1250,8 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
1250 | continue; | 1250 | continue; |
1251 | } | 1251 | } |
1252 | /* Initialize or reset the session */ | 1252 | /* Initialize or reset the session */ |
1253 | if (nfs4_has_session(clp) && | 1253 | if (test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state) |
1254 | test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) { | 1254 | && nfs4_has_session(clp)) { |
1255 | if (clp->cl_cons_state == NFS_CS_SESSION_INITING) | 1255 | if (clp->cl_cons_state == NFS_CS_SESSION_INITING) |
1256 | status = nfs4_initialize_session(clp); | 1256 | status = nfs4_initialize_session(clp); |
1257 | else | 1257 | else |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 8e2ec43b18f4..151964f0de4c 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -416,8 +416,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) | |||
416 | if (unlikely(err)) | 416 | if (unlikely(err)) |
417 | goto failed; | 417 | goto failed; |
418 | 418 | ||
419 | down_read(&nilfs->ns_segctor_sem); | ||
419 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, | 420 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, |
420 | &bh_cp); | 421 | &bh_cp); |
422 | up_read(&nilfs->ns_segctor_sem); | ||
421 | if (unlikely(err)) { | 423 | if (unlikely(err)) { |
422 | if (err == -ENOENT || err == -EINVAL) { | 424 | if (err == -ENOENT || err == -EINVAL) { |
423 | printk(KERN_ERR | 425 | printk(KERN_ERR |
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index e8adbffc626f..1b9caafb8662 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h | |||
@@ -253,7 +253,7 @@ nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | |||
253 | 253 | ||
254 | static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) | 254 | static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) |
255 | { | 255 | { |
256 | if (!atomic_dec_and_test(&sbi->s_count)) | 256 | if (atomic_dec_and_test(&sbi->s_count)) |
257 | kfree(sbi); | 257 | kfree(sbi); |
258 | } | 258 | } |
259 | 259 | ||
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 47cd258fd24d..c9ee67b442e1 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c | |||
@@ -62,13 +62,14 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev | |||
62 | event_priv->wd = wd; | 62 | event_priv->wd = wd; |
63 | 63 | ||
64 | ret = fsnotify_add_notify_event(group, event, fsn_event_priv); | 64 | ret = fsnotify_add_notify_event(group, event, fsn_event_priv); |
65 | /* EEXIST is not an error */ | 65 | if (ret) { |
66 | if (ret == -EEXIST) | ||
67 | ret = 0; | ||
68 | |||
69 | /* did event_priv get attached? */ | ||
70 | if (list_empty(&fsn_event_priv->event_list)) | ||
71 | inotify_free_event_priv(fsn_event_priv); | 66 | inotify_free_event_priv(fsn_event_priv); |
67 | /* EEXIST says we tail matched, EOVERFLOW isn't something | ||
68 | * to report up the stack. */ | ||
69 | if ((ret == -EEXIST) || | ||
70 | (ret == -EOVERFLOW)) | ||
71 | ret = 0; | ||
72 | } | ||
72 | 73 | ||
73 | /* | 74 | /* |
74 | * If we hold the entry until after the event is on the queue | 75 | * If we hold the entry until after the event is on the queue |
@@ -104,16 +105,45 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode | |||
104 | return send; | 105 | return send; |
105 | } | 106 | } |
106 | 107 | ||
108 | /* | ||
109 | * This is NEVER supposed to be called. Inotify marks should either have been | ||
110 | * removed from the idr when the watch was removed or in the | ||
111 | * fsnotify_destroy_mark_by_group() call when the inotify instance was being | ||
112 | * torn down. This is only called if the idr is about to be freed but there | ||
113 | * are still marks in it. | ||
114 | */ | ||
107 | static int idr_callback(int id, void *p, void *data) | 115 | static int idr_callback(int id, void *p, void *data) |
108 | { | 116 | { |
109 | BUG(); | 117 | struct fsnotify_mark_entry *entry; |
118 | struct inotify_inode_mark_entry *ientry; | ||
119 | static bool warned = false; | ||
120 | |||
121 | if (warned) | ||
122 | return 0; | ||
123 | |||
124 | warned = false; | ||
125 | entry = p; | ||
126 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | ||
127 | |||
128 | WARN(1, "inotify closing but id=%d for entry=%p in group=%p still in " | ||
129 | "idr. Probably leaking memory\n", id, p, data); | ||
130 | |||
131 | /* | ||
132 | * I'm taking the liberty of assuming that the mark in question is a | ||
133 | * valid address and I'm dereferencing it. This might help to figure | ||
134 | * out why we got here and the panic is no worse than the original | ||
135 | * BUG() that was here. | ||
136 | */ | ||
137 | if (entry) | ||
138 | printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n", | ||
139 | entry->group, entry->inode, ientry->wd); | ||
110 | return 0; | 140 | return 0; |
111 | } | 141 | } |
112 | 142 | ||
113 | static void inotify_free_group_priv(struct fsnotify_group *group) | 143 | static void inotify_free_group_priv(struct fsnotify_group *group) |
114 | { | 144 | { |
115 | /* ideally the idr is empty and we won't hit the BUG in teh callback */ | 145 | /* ideally the idr is empty and we won't hit the BUG in teh callback */ |
116 | idr_for_each(&group->inotify_data.idr, idr_callback, NULL); | 146 | idr_for_each(&group->inotify_data.idr, idr_callback, group); |
117 | idr_remove_all(&group->inotify_data.idr); | 147 | idr_remove_all(&group->inotify_data.idr); |
118 | idr_destroy(&group->inotify_data.idr); | 148 | idr_destroy(&group->inotify_data.idr); |
119 | } | 149 | } |
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index f30d9bbc2e1b..0e781bc88d1e 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c | |||
@@ -47,9 +47,6 @@ | |||
47 | 47 | ||
48 | static struct vfsmount *inotify_mnt __read_mostly; | 48 | static struct vfsmount *inotify_mnt __read_mostly; |
49 | 49 | ||
50 | /* this just sits here and wastes global memory. used to just pad userspace messages with zeros */ | ||
51 | static struct inotify_event nul_inotify_event; | ||
52 | |||
53 | /* these are configurable via /proc/sys/fs/inotify/ */ | 50 | /* these are configurable via /proc/sys/fs/inotify/ */ |
54 | static int inotify_max_user_instances __read_mostly; | 51 | static int inotify_max_user_instances __read_mostly; |
55 | static int inotify_max_queued_events __read_mostly; | 52 | static int inotify_max_queued_events __read_mostly; |
@@ -199,8 +196,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
199 | inotify_free_event_priv(fsn_priv); | 196 | inotify_free_event_priv(fsn_priv); |
200 | } | 197 | } |
201 | 198 | ||
202 | /* round up event->name_len so it is a multiple of event_size */ | 199 | /* round up event->name_len so it is a multiple of event_size |
203 | name_len = roundup(event->name_len, event_size); | 200 | * plus an extra byte for the terminating '\0'. |
201 | */ | ||
202 | name_len = roundup(event->name_len + 1, event_size); | ||
204 | inotify_event.len = name_len; | 203 | inotify_event.len = name_len; |
205 | 204 | ||
206 | inotify_event.mask = inotify_mask_to_arg(event->mask); | 205 | inotify_event.mask = inotify_mask_to_arg(event->mask); |
@@ -224,8 +223,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
224 | return -EFAULT; | 223 | return -EFAULT; |
225 | buf += event->name_len; | 224 | buf += event->name_len; |
226 | 225 | ||
227 | /* fill userspace with 0's from nul_inotify_event */ | 226 | /* fill userspace with 0's */ |
228 | if (copy_to_user(buf, &nul_inotify_event, len_to_zero)) | 227 | if (clear_user(buf, len_to_zero)) |
229 | return -EFAULT; | 228 | return -EFAULT; |
230 | buf += len_to_zero; | 229 | buf += len_to_zero; |
231 | event_size += name_len; | 230 | event_size += name_len; |
@@ -364,20 +363,53 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns | |||
364 | return error; | 363 | return error; |
365 | } | 364 | } |
366 | 365 | ||
366 | /* | ||
367 | * Remove the mark from the idr (if present) and drop the reference | ||
368 | * on the mark because it was in the idr. | ||
369 | */ | ||
367 | static void inotify_remove_from_idr(struct fsnotify_group *group, | 370 | static void inotify_remove_from_idr(struct fsnotify_group *group, |
368 | struct inotify_inode_mark_entry *ientry) | 371 | struct inotify_inode_mark_entry *ientry) |
369 | { | 372 | { |
370 | struct idr *idr; | 373 | struct idr *idr; |
374 | struct fsnotify_mark_entry *entry; | ||
375 | struct inotify_inode_mark_entry *found_ientry; | ||
376 | int wd; | ||
371 | 377 | ||
372 | spin_lock(&group->inotify_data.idr_lock); | 378 | spin_lock(&group->inotify_data.idr_lock); |
373 | idr = &group->inotify_data.idr; | 379 | idr = &group->inotify_data.idr; |
374 | idr_remove(idr, ientry->wd); | 380 | wd = ientry->wd; |
375 | spin_unlock(&group->inotify_data.idr_lock); | 381 | |
382 | if (wd == -1) | ||
383 | goto out; | ||
384 | |||
385 | entry = idr_find(&group->inotify_data.idr, wd); | ||
386 | if (unlikely(!entry)) | ||
387 | goto out; | ||
388 | |||
389 | found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | ||
390 | if (unlikely(found_ientry != ientry)) { | ||
391 | /* We found an entry in the idr with the right wd, but it's | ||
392 | * not the entry we were told to remove. eparis seriously | ||
393 | * fucked up somewhere. */ | ||
394 | WARN_ON(1); | ||
395 | ientry->wd = -1; | ||
396 | goto out; | ||
397 | } | ||
398 | |||
399 | /* One ref for being in the idr, one ref held by the caller */ | ||
400 | BUG_ON(atomic_read(&entry->refcnt) < 2); | ||
401 | |||
402 | idr_remove(idr, wd); | ||
376 | ientry->wd = -1; | 403 | ientry->wd = -1; |
404 | |||
405 | /* removed from the idr, drop that ref */ | ||
406 | fsnotify_put_mark(entry); | ||
407 | out: | ||
408 | spin_unlock(&group->inotify_data.idr_lock); | ||
377 | } | 409 | } |
410 | |||
378 | /* | 411 | /* |
379 | * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the | 412 | * Send IN_IGNORED for this wd, remove this wd from the idr. |
380 | * internal reference help on the mark because it is in the idr. | ||
381 | */ | 413 | */ |
382 | void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, | 414 | void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, |
383 | struct fsnotify_group *group) | 415 | struct fsnotify_group *group) |
@@ -386,6 +418,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, | |||
386 | struct fsnotify_event *ignored_event; | 418 | struct fsnotify_event *ignored_event; |
387 | struct inotify_event_private_data *event_priv; | 419 | struct inotify_event_private_data *event_priv; |
388 | struct fsnotify_event_private_data *fsn_event_priv; | 420 | struct fsnotify_event_private_data *fsn_event_priv; |
421 | int ret; | ||
389 | 422 | ||
390 | ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, | 423 | ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, |
391 | FSNOTIFY_EVENT_NONE, NULL, 0, | 424 | FSNOTIFY_EVENT_NONE, NULL, 0, |
@@ -404,10 +437,8 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, | |||
404 | fsn_event_priv->group = group; | 437 | fsn_event_priv->group = group; |
405 | event_priv->wd = ientry->wd; | 438 | event_priv->wd = ientry->wd; |
406 | 439 | ||
407 | fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); | 440 | ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); |
408 | 441 | if (ret) | |
409 | /* did the private data get added? */ | ||
410 | if (list_empty(&fsn_event_priv->event_list)) | ||
411 | inotify_free_event_priv(fsn_event_priv); | 442 | inotify_free_event_priv(fsn_event_priv); |
412 | 443 | ||
413 | skip_send_ignore: | 444 | skip_send_ignore: |
@@ -418,9 +449,6 @@ skip_send_ignore: | |||
418 | /* remove this entry from the idr */ | 449 | /* remove this entry from the idr */ |
419 | inotify_remove_from_idr(group, ientry); | 450 | inotify_remove_from_idr(group, ientry); |
420 | 451 | ||
421 | /* removed from idr, drop that reference */ | ||
422 | fsnotify_put_mark(entry); | ||
423 | |||
424 | atomic_dec(&group->inotify_data.user->inotify_watches); | 452 | atomic_dec(&group->inotify_data.user->inotify_watches); |
425 | } | 453 | } |
426 | 454 | ||
@@ -432,80 +460,29 @@ static void inotify_free_mark(struct fsnotify_mark_entry *entry) | |||
432 | kmem_cache_free(inotify_inode_mark_cachep, ientry); | 460 | kmem_cache_free(inotify_inode_mark_cachep, ientry); |
433 | } | 461 | } |
434 | 462 | ||
435 | static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) | 463 | static int inotify_update_existing_watch(struct fsnotify_group *group, |
464 | struct inode *inode, | ||
465 | u32 arg) | ||
436 | { | 466 | { |
437 | struct fsnotify_mark_entry *entry = NULL; | 467 | struct fsnotify_mark_entry *entry; |
438 | struct inotify_inode_mark_entry *ientry; | 468 | struct inotify_inode_mark_entry *ientry; |
439 | struct inotify_inode_mark_entry *tmp_ientry; | ||
440 | int ret = 0; | ||
441 | int add = (arg & IN_MASK_ADD); | ||
442 | __u32 mask; | ||
443 | __u32 old_mask, new_mask; | 469 | __u32 old_mask, new_mask; |
470 | __u32 mask; | ||
471 | int add = (arg & IN_MASK_ADD); | ||
472 | int ret; | ||
444 | 473 | ||
445 | /* don't allow invalid bits: we don't want flags set */ | 474 | /* don't allow invalid bits: we don't want flags set */ |
446 | mask = inotify_arg_to_mask(arg); | 475 | mask = inotify_arg_to_mask(arg); |
447 | if (unlikely(!mask)) | 476 | if (unlikely(!mask)) |
448 | return -EINVAL; | 477 | return -EINVAL; |
449 | 478 | ||
450 | tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); | ||
451 | if (unlikely(!tmp_ientry)) | ||
452 | return -ENOMEM; | ||
453 | /* we set the mask at the end after attaching it */ | ||
454 | fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark); | ||
455 | tmp_ientry->wd = -1; | ||
456 | |||
457 | find_entry: | ||
458 | spin_lock(&inode->i_lock); | 479 | spin_lock(&inode->i_lock); |
459 | entry = fsnotify_find_mark_entry(group, inode); | 480 | entry = fsnotify_find_mark_entry(group, inode); |
460 | spin_unlock(&inode->i_lock); | 481 | spin_unlock(&inode->i_lock); |
461 | if (entry) { | 482 | if (!entry) |
462 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | 483 | return -ENOENT; |
463 | } else { | ||
464 | ret = -ENOSPC; | ||
465 | if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) | ||
466 | goto out_err; | ||
467 | retry: | ||
468 | ret = -ENOMEM; | ||
469 | if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) | ||
470 | goto out_err; | ||
471 | |||
472 | spin_lock(&group->inotify_data.idr_lock); | ||
473 | ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, | ||
474 | group->inotify_data.last_wd, | ||
475 | &tmp_ientry->wd); | ||
476 | spin_unlock(&group->inotify_data.idr_lock); | ||
477 | if (ret) { | ||
478 | if (ret == -EAGAIN) | ||
479 | goto retry; | ||
480 | goto out_err; | ||
481 | } | ||
482 | 484 | ||
483 | ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); | 485 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); |
484 | if (ret) { | ||
485 | inotify_remove_from_idr(group, tmp_ientry); | ||
486 | if (ret == -EEXIST) | ||
487 | goto find_entry; | ||
488 | goto out_err; | ||
489 | } | ||
490 | |||
491 | /* tmp_ientry has been added to the inode, so we are all set up. | ||
492 | * now we just need to make sure tmp_ientry doesn't get freed and | ||
493 | * we need to set up entry and ientry so the generic code can | ||
494 | * do its thing. */ | ||
495 | ientry = tmp_ientry; | ||
496 | entry = &ientry->fsn_entry; | ||
497 | tmp_ientry = NULL; | ||
498 | |||
499 | atomic_inc(&group->inotify_data.user->inotify_watches); | ||
500 | |||
501 | /* update the idr hint */ | ||
502 | group->inotify_data.last_wd = ientry->wd; | ||
503 | |||
504 | /* we put the mark on the idr, take a reference */ | ||
505 | fsnotify_get_mark(entry); | ||
506 | } | ||
507 | |||
508 | ret = ientry->wd; | ||
509 | 486 | ||
510 | spin_lock(&entry->lock); | 487 | spin_lock(&entry->lock); |
511 | 488 | ||
@@ -537,18 +514,103 @@ retry: | |||
537 | fsnotify_recalc_group_mask(group); | 514 | fsnotify_recalc_group_mask(group); |
538 | } | 515 | } |
539 | 516 | ||
540 | /* this either matches fsnotify_find_mark_entry, or init_mark_entry | 517 | /* return the wd */ |
541 | * depending on which path we took... */ | 518 | ret = ientry->wd; |
519 | |||
520 | /* match the get from fsnotify_find_mark_entry() */ | ||
542 | fsnotify_put_mark(entry); | 521 | fsnotify_put_mark(entry); |
543 | 522 | ||
523 | return ret; | ||
524 | } | ||
525 | |||
526 | static int inotify_new_watch(struct fsnotify_group *group, | ||
527 | struct inode *inode, | ||
528 | u32 arg) | ||
529 | { | ||
530 | struct inotify_inode_mark_entry *tmp_ientry; | ||
531 | __u32 mask; | ||
532 | int ret; | ||
533 | |||
534 | /* don't allow invalid bits: we don't want flags set */ | ||
535 | mask = inotify_arg_to_mask(arg); | ||
536 | if (unlikely(!mask)) | ||
537 | return -EINVAL; | ||
538 | |||
539 | tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); | ||
540 | if (unlikely(!tmp_ientry)) | ||
541 | return -ENOMEM; | ||
542 | |||
543 | fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark); | ||
544 | tmp_ientry->fsn_entry.mask = mask; | ||
545 | tmp_ientry->wd = -1; | ||
546 | |||
547 | ret = -ENOSPC; | ||
548 | if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) | ||
549 | goto out_err; | ||
550 | retry: | ||
551 | ret = -ENOMEM; | ||
552 | if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) | ||
553 | goto out_err; | ||
554 | |||
555 | spin_lock(&group->inotify_data.idr_lock); | ||
556 | ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, | ||
557 | group->inotify_data.last_wd, | ||
558 | &tmp_ientry->wd); | ||
559 | spin_unlock(&group->inotify_data.idr_lock); | ||
560 | if (ret) { | ||
561 | /* idr was out of memory allocate and try again */ | ||
562 | if (ret == -EAGAIN) | ||
563 | goto retry; | ||
564 | goto out_err; | ||
565 | } | ||
566 | |||
567 | /* we put the mark on the idr, take a reference */ | ||
568 | fsnotify_get_mark(&tmp_ientry->fsn_entry); | ||
569 | |||
570 | /* we are on the idr, now get on the inode */ | ||
571 | ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); | ||
572 | if (ret) { | ||
573 | /* we failed to get on the inode, get off the idr */ | ||
574 | inotify_remove_from_idr(group, tmp_ientry); | ||
575 | goto out_err; | ||
576 | } | ||
577 | |||
578 | /* update the idr hint, who cares about races, it's just a hint */ | ||
579 | group->inotify_data.last_wd = tmp_ientry->wd; | ||
580 | |||
581 | /* increment the number of watches the user has */ | ||
582 | atomic_inc(&group->inotify_data.user->inotify_watches); | ||
583 | |||
584 | /* return the watch descriptor for this new entry */ | ||
585 | ret = tmp_ientry->wd; | ||
586 | |||
587 | /* match the ref from fsnotify_init_markentry() */ | ||
588 | fsnotify_put_mark(&tmp_ientry->fsn_entry); | ||
589 | |||
544 | out_err: | 590 | out_err: |
545 | /* could be an error, could be that we found an existing mark */ | 591 | if (ret < 0) |
546 | if (tmp_ientry) { | ||
547 | /* on the idr but didn't make it on the inode */ | ||
548 | if (tmp_ientry->wd != -1) | ||
549 | inotify_remove_from_idr(group, tmp_ientry); | ||
550 | kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); | 592 | kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); |
551 | } | 593 | |
594 | return ret; | ||
595 | } | ||
596 | |||
597 | static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) | ||
598 | { | ||
599 | int ret = 0; | ||
600 | |||
601 | retry: | ||
602 | /* try to update and existing watch with the new arg */ | ||
603 | ret = inotify_update_existing_watch(group, inode, arg); | ||
604 | /* no mark present, try to add a new one */ | ||
605 | if (ret == -ENOENT) | ||
606 | ret = inotify_new_watch(group, inode, arg); | ||
607 | /* | ||
608 | * inotify_new_watch could race with another thread which did an | ||
609 | * inotify_new_watch between the update_existing and the add watch | ||
610 | * here, go back and try to update an existing mark again. | ||
611 | */ | ||
612 | if (ret == -EEXIST) | ||
613 | goto retry; | ||
552 | 614 | ||
553 | return ret; | 615 | return ret; |
554 | } | 616 | } |
@@ -568,7 +630,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign | |||
568 | 630 | ||
569 | spin_lock_init(&group->inotify_data.idr_lock); | 631 | spin_lock_init(&group->inotify_data.idr_lock); |
570 | idr_init(&group->inotify_data.idr); | 632 | idr_init(&group->inotify_data.idr); |
571 | group->inotify_data.last_wd = 0; | 633 | group->inotify_data.last_wd = 1; |
572 | group->inotify_data.user = user; | 634 | group->inotify_data.user = user; |
573 | group->inotify_data.fa = NULL; | 635 | group->inotify_data.fa = NULL; |
574 | 636 | ||
diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 521368574e97..3816d5750dd5 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c | |||
@@ -153,6 +153,10 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new | |||
153 | return true; | 153 | return true; |
154 | break; | 154 | break; |
155 | case (FSNOTIFY_EVENT_NONE): | 155 | case (FSNOTIFY_EVENT_NONE): |
156 | if (old->mask & FS_Q_OVERFLOW) | ||
157 | return true; | ||
158 | else if (old->mask & FS_IN_IGNORED) | ||
159 | return false; | ||
156 | return false; | 160 | return false; |
157 | }; | 161 | }; |
158 | } | 162 | } |
@@ -171,9 +175,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even | |||
171 | struct list_head *list = &group->notification_list; | 175 | struct list_head *list = &group->notification_list; |
172 | struct fsnotify_event_holder *last_holder; | 176 | struct fsnotify_event_holder *last_holder; |
173 | struct fsnotify_event *last_event; | 177 | struct fsnotify_event *last_event; |
174 | 178 | int ret = 0; | |
175 | /* easy to tell if priv was attached to the event */ | ||
176 | INIT_LIST_HEAD(&priv->event_list); | ||
177 | 179 | ||
178 | /* | 180 | /* |
179 | * There is one fsnotify_event_holder embedded inside each fsnotify_event. | 181 | * There is one fsnotify_event_holder embedded inside each fsnotify_event. |
@@ -194,6 +196,7 @@ alloc_holder: | |||
194 | 196 | ||
195 | if (group->q_len >= group->max_events) { | 197 | if (group->q_len >= group->max_events) { |
196 | event = &q_overflow_event; | 198 | event = &q_overflow_event; |
199 | ret = -EOVERFLOW; | ||
197 | /* sorry, no private data on the overflow event */ | 200 | /* sorry, no private data on the overflow event */ |
198 | priv = NULL; | 201 | priv = NULL; |
199 | } | 202 | } |
@@ -235,7 +238,7 @@ alloc_holder: | |||
235 | mutex_unlock(&group->notification_mutex); | 238 | mutex_unlock(&group->notification_mutex); |
236 | 239 | ||
237 | wake_up(&group->notification_waitq); | 240 | wake_up(&group->notification_waitq); |
238 | return 0; | 241 | return ret; |
239 | } | 242 | } |
240 | 243 | ||
241 | /* | 244 | /* |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index f9a3e8942669..ab513ddaeff2 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -6851,7 +6851,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, | |||
6851 | } | 6851 | } |
6852 | status = 0; | 6852 | status = 0; |
6853 | bail: | 6853 | bail: |
6854 | 6854 | brelse(last_eb_bh); | |
6855 | mlog_exit(status); | 6855 | mlog_exit(status); |
6856 | return status; | 6856 | return status; |
6857 | } | 6857 | } |
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index fcf879ed6930..756f5b0998e0 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c | |||
@@ -122,7 +122,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
122 | * that still has AST's pending... */ | 122 | * that still has AST's pending... */ |
123 | in_use = !list_empty(&lock->ast_list); | 123 | in_use = !list_empty(&lock->ast_list); |
124 | spin_unlock(&dlm->ast_lock); | 124 | spin_unlock(&dlm->ast_lock); |
125 | if (in_use) { | 125 | if (in_use && !(flags & LKM_CANCEL)) { |
126 | mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock " | 126 | mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock " |
127 | "while waiting for an ast!", res->lockname.len, | 127 | "while waiting for an ast!", res->lockname.len, |
128 | res->lockname.name); | 128 | res->lockname.name); |
@@ -131,7 +131,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
131 | 131 | ||
132 | spin_lock(&res->spinlock); | 132 | spin_lock(&res->spinlock); |
133 | if (res->state & DLM_LOCK_RES_IN_PROGRESS) { | 133 | if (res->state & DLM_LOCK_RES_IN_PROGRESS) { |
134 | if (master_node) { | 134 | if (master_node && !(flags & LKM_CANCEL)) { |
135 | mlog(ML_ERROR, "lockres in progress!\n"); | 135 | mlog(ML_ERROR, "lockres in progress!\n"); |
136 | spin_unlock(&res->spinlock); | 136 | spin_unlock(&res->spinlock); |
137 | return DLM_FORWARD; | 137 | return DLM_FORWARD; |
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index fcdba091af3d..c212cf5a2bdf 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h | |||
@@ -108,6 +108,7 @@ static char *ocfs2_lock_type_strings[] = { | |||
108 | [OCFS2_LOCK_TYPE_OPEN] = "Open", | 108 | [OCFS2_LOCK_TYPE_OPEN] = "Open", |
109 | [OCFS2_LOCK_TYPE_FLOCK] = "Flock", | 109 | [OCFS2_LOCK_TYPE_FLOCK] = "Flock", |
110 | [OCFS2_LOCK_TYPE_QINFO] = "Quota", | 110 | [OCFS2_LOCK_TYPE_QINFO] = "Quota", |
111 | [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync", | ||
111 | [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", | 112 | [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", |
112 | }; | 113 | }; |
113 | 114 | ||
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index bf7742d0ee3b..44f2a5e1d042 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include "sysfile.h" | 23 | #include "sysfile.h" |
24 | #include "dlmglue.h" | 24 | #include "dlmglue.h" |
25 | #include "uptodate.h" | 25 | #include "uptodate.h" |
26 | #include "super.h" | ||
26 | #include "quota.h" | 27 | #include "quota.h" |
27 | 28 | ||
28 | static struct workqueue_struct *ocfs2_quota_wq = NULL; | 29 | static struct workqueue_struct *ocfs2_quota_wq = NULL; |
@@ -114,6 +115,15 @@ int ocfs2_read_quota_block(struct inode *inode, u64 v_block, | |||
114 | int rc = 0; | 115 | int rc = 0; |
115 | struct buffer_head *tmp = *bh; | 116 | struct buffer_head *tmp = *bh; |
116 | 117 | ||
118 | if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) { | ||
119 | ocfs2_error(inode->i_sb, | ||
120 | "Quota file %llu is probably corrupted! Requested " | ||
121 | "to read block %Lu but file has size only %Lu\n", | ||
122 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
123 | (unsigned long long)v_block, | ||
124 | (unsigned long long)i_size_read(inode)); | ||
125 | return -EIO; | ||
126 | } | ||
117 | rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, | 127 | rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, |
118 | ocfs2_validate_quota_block); | 128 | ocfs2_validate_quota_block); |
119 | if (rc) | 129 | if (rc) |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index b0ee0fdf799a..a3f8871d21fd 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1218,13 +1218,17 @@ static void ocfs2_kill_sb(struct super_block *sb) | |||
1218 | { | 1218 | { |
1219 | struct ocfs2_super *osb = OCFS2_SB(sb); | 1219 | struct ocfs2_super *osb = OCFS2_SB(sb); |
1220 | 1220 | ||
1221 | /* Failed mount? */ | ||
1222 | if (!osb || atomic_read(&osb->vol_state) == VOLUME_DISABLED) | ||
1223 | goto out; | ||
1224 | |||
1221 | /* Prevent further queueing of inode drop events */ | 1225 | /* Prevent further queueing of inode drop events */ |
1222 | spin_lock(&dentry_list_lock); | 1226 | spin_lock(&dentry_list_lock); |
1223 | ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED); | 1227 | ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED); |
1224 | spin_unlock(&dentry_list_lock); | 1228 | spin_unlock(&dentry_list_lock); |
1225 | /* Wait for work to finish and/or remove it */ | 1229 | /* Wait for work to finish and/or remove it */ |
1226 | cancel_work_sync(&osb->dentry_lock_work); | 1230 | cancel_work_sync(&osb->dentry_lock_work); |
1227 | 1231 | out: | |
1228 | kill_block_super(sb); | 1232 | kill_block_super(sb); |
1229 | } | 1233 | } |
1230 | 1234 | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index 175db258942f..6f742f6658a9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1003,12 +1003,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, | |||
1003 | 1003 | ||
1004 | if (!task) | 1004 | if (!task) |
1005 | return -ESRCH; | 1005 | return -ESRCH; |
1006 | task_lock(task); | 1006 | oom_adjust = task->oomkilladj; |
1007 | if (task->mm) | ||
1008 | oom_adjust = task->mm->oom_adj; | ||
1009 | else | ||
1010 | oom_adjust = OOM_DISABLE; | ||
1011 | task_unlock(task); | ||
1012 | put_task_struct(task); | 1007 | put_task_struct(task); |
1013 | 1008 | ||
1014 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); | 1009 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); |
@@ -1037,19 +1032,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
1037 | task = get_proc_task(file->f_path.dentry->d_inode); | 1032 | task = get_proc_task(file->f_path.dentry->d_inode); |
1038 | if (!task) | 1033 | if (!task) |
1039 | return -ESRCH; | 1034 | return -ESRCH; |
1040 | task_lock(task); | 1035 | if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { |
1041 | if (!task->mm) { | ||
1042 | task_unlock(task); | ||
1043 | put_task_struct(task); | ||
1044 | return -EINVAL; | ||
1045 | } | ||
1046 | if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) { | ||
1047 | task_unlock(task); | ||
1048 | put_task_struct(task); | 1036 | put_task_struct(task); |
1049 | return -EACCES; | 1037 | return -EACCES; |
1050 | } | 1038 | } |
1051 | task->mm->oom_adj = oom_adjust; | 1039 | task->oomkilladj = oom_adjust; |
1052 | task_unlock(task); | ||
1053 | put_task_struct(task); | 1040 | put_task_struct(task); |
1054 | if (end - buffer == 0) | 1041 | if (end - buffer == 0) |
1055 | return -EIO; | 1042 | return -EIO; |
diff --git a/fs/select.c b/fs/select.c index d870237e42c7..8084834e123e 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -110,6 +110,7 @@ void poll_initwait(struct poll_wqueues *pwq) | |||
110 | { | 110 | { |
111 | init_poll_funcptr(&pwq->pt, __pollwait); | 111 | init_poll_funcptr(&pwq->pt, __pollwait); |
112 | pwq->polling_task = current; | 112 | pwq->polling_task = current; |
113 | pwq->triggered = 0; | ||
113 | pwq->error = 0; | 114 | pwq->error = 0; |
114 | pwq->table = NULL; | 115 | pwq->table = NULL; |
115 | pwq->inline_index = 0; | 116 | pwq->inline_index = 0; |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index b619d6b8ca43..98ef624d9baf 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -708,6 +708,16 @@ xfs_reclaim_inode( | |||
708 | return 0; | 708 | return 0; |
709 | } | 709 | } |
710 | 710 | ||
711 | void | ||
712 | __xfs_inode_set_reclaim_tag( | ||
713 | struct xfs_perag *pag, | ||
714 | struct xfs_inode *ip) | ||
715 | { | ||
716 | radix_tree_tag_set(&pag->pag_ici_root, | ||
717 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), | ||
718 | XFS_ICI_RECLAIM_TAG); | ||
719 | } | ||
720 | |||
711 | /* | 721 | /* |
712 | * We set the inode flag atomically with the radix tree tag. | 722 | * We set the inode flag atomically with the radix tree tag. |
713 | * Once we get tag lookups on the radix tree, this inode flag | 723 | * Once we get tag lookups on the radix tree, this inode flag |
@@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag( | |||
722 | 732 | ||
723 | read_lock(&pag->pag_ici_lock); | 733 | read_lock(&pag->pag_ici_lock); |
724 | spin_lock(&ip->i_flags_lock); | 734 | spin_lock(&ip->i_flags_lock); |
725 | radix_tree_tag_set(&pag->pag_ici_root, | 735 | __xfs_inode_set_reclaim_tag(pag, ip); |
726 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
727 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | 736 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); |
728 | spin_unlock(&ip->i_flags_lock); | 737 | spin_unlock(&ip->i_flags_lock); |
729 | read_unlock(&pag->pag_ici_lock); | 738 | read_unlock(&pag->pag_ici_lock); |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 2a10301c99c7..59120602588a 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -48,6 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); | |||
48 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); | 48 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); |
49 | 49 | ||
50 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); | 50 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); |
51 | void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); | ||
51 | void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); | 52 | void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); |
52 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, | 53 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, |
53 | struct xfs_inode *ip); | 54 | struct xfs_inode *ip); |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 34ec86923f7e..ecbf8b4d2e2e 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -191,80 +191,82 @@ xfs_iget_cache_hit( | |||
191 | int flags, | 191 | int flags, |
192 | int lock_flags) __releases(pag->pag_ici_lock) | 192 | int lock_flags) __releases(pag->pag_ici_lock) |
193 | { | 193 | { |
194 | struct inode *inode = VFS_I(ip); | ||
194 | struct xfs_mount *mp = ip->i_mount; | 195 | struct xfs_mount *mp = ip->i_mount; |
195 | int error = EAGAIN; | 196 | int error; |
197 | |||
198 | spin_lock(&ip->i_flags_lock); | ||
196 | 199 | ||
197 | /* | 200 | /* |
198 | * If INEW is set this inode is being set up | 201 | * If we are racing with another cache hit that is currently |
199 | * If IRECLAIM is set this inode is being torn down | 202 | * instantiating this inode or currently recycling it out of |
200 | * Pause and try again. | 203 | * reclaimabe state, wait for the initialisation to complete |
204 | * before continuing. | ||
205 | * | ||
206 | * XXX(hch): eventually we should do something equivalent to | ||
207 | * wait_on_inode to wait for these flags to be cleared | ||
208 | * instead of polling for it. | ||
201 | */ | 209 | */ |
202 | if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { | 210 | if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { |
203 | XFS_STATS_INC(xs_ig_frecycle); | 211 | XFS_STATS_INC(xs_ig_frecycle); |
212 | error = EAGAIN; | ||
204 | goto out_error; | 213 | goto out_error; |
205 | } | 214 | } |
206 | 215 | ||
207 | /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ | 216 | /* |
208 | if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { | 217 | * If lookup is racing with unlink return an error immediately. |
209 | 218 | */ | |
210 | /* | 219 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { |
211 | * If lookup is racing with unlink, then we should return an | 220 | error = ENOENT; |
212 | * error immediately so we don't remove it from the reclaim | 221 | goto out_error; |
213 | * list and potentially leak the inode. | 222 | } |
214 | */ | ||
215 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | ||
216 | error = ENOENT; | ||
217 | goto out_error; | ||
218 | } | ||
219 | 223 | ||
224 | /* | ||
225 | * If IRECLAIMABLE is set, we've torn down the VFS inode already. | ||
226 | * Need to carefully get it back into useable state. | ||
227 | */ | ||
228 | if (ip->i_flags & XFS_IRECLAIMABLE) { | ||
220 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); | 229 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); |
221 | 230 | ||
222 | /* | 231 | /* |
223 | * We need to re-initialise the VFS inode as it has been | 232 | * We need to set XFS_INEW atomically with clearing the |
224 | * 'freed' by the VFS. Do this here so we can deal with | 233 | * reclaimable tag so that we do have an indicator of the |
225 | * errors cleanly, then tag it so it can be set up correctly | 234 | * inode still being initialized. |
226 | * later. | ||
227 | */ | 235 | */ |
228 | if (inode_init_always(mp->m_super, VFS_I(ip))) { | 236 | ip->i_flags |= XFS_INEW; |
229 | error = ENOMEM; | 237 | ip->i_flags &= ~XFS_IRECLAIMABLE; |
230 | goto out_error; | 238 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); |
231 | } | ||
232 | 239 | ||
233 | /* | 240 | spin_unlock(&ip->i_flags_lock); |
234 | * We must set the XFS_INEW flag before clearing the | 241 | read_unlock(&pag->pag_ici_lock); |
235 | * XFS_IRECLAIMABLE flag so that if a racing lookup does | ||
236 | * not find the XFS_IRECLAIMABLE above but has the igrab() | ||
237 | * below succeed we can safely check XFS_INEW to detect | ||
238 | * that this inode is still being initialised. | ||
239 | */ | ||
240 | xfs_iflags_set(ip, XFS_INEW); | ||
241 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); | ||
242 | 242 | ||
243 | /* clear the radix tree reclaim flag as well. */ | 243 | error = -inode_init_always(mp->m_super, inode); |
244 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 244 | if (error) { |
245 | } else if (!igrab(VFS_I(ip))) { | 245 | /* |
246 | * Re-initializing the inode failed, and we are in deep | ||
247 | * trouble. Try to re-add it to the reclaim list. | ||
248 | */ | ||
249 | read_lock(&pag->pag_ici_lock); | ||
250 | spin_lock(&ip->i_flags_lock); | ||
251 | |||
252 | ip->i_flags &= ~XFS_INEW; | ||
253 | ip->i_flags |= XFS_IRECLAIMABLE; | ||
254 | __xfs_inode_set_reclaim_tag(pag, ip); | ||
255 | goto out_error; | ||
256 | } | ||
257 | inode->i_state = I_LOCK|I_NEW; | ||
258 | } else { | ||
246 | /* If the VFS inode is being torn down, pause and try again. */ | 259 | /* If the VFS inode is being torn down, pause and try again. */ |
247 | XFS_STATS_INC(xs_ig_frecycle); | 260 | if (!igrab(inode)) { |
248 | goto out_error; | 261 | error = EAGAIN; |
249 | } else if (xfs_iflags_test(ip, XFS_INEW)) { | 262 | goto out_error; |
250 | /* | 263 | } |
251 | * We are racing with another cache hit that is | ||
252 | * currently recycling this inode out of the XFS_IRECLAIMABLE | ||
253 | * state. Wait for the initialisation to complete before | ||
254 | * continuing. | ||
255 | */ | ||
256 | wait_on_inode(VFS_I(ip)); | ||
257 | } | ||
258 | 264 | ||
259 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { | 265 | /* We've got a live one. */ |
260 | error = ENOENT; | 266 | spin_unlock(&ip->i_flags_lock); |
261 | iput(VFS_I(ip)); | 267 | read_unlock(&pag->pag_ici_lock); |
262 | goto out_error; | ||
263 | } | 268 | } |
264 | 269 | ||
265 | /* We've got a live one. */ | ||
266 | read_unlock(&pag->pag_ici_lock); | ||
267 | |||
268 | if (lock_flags != 0) | 270 | if (lock_flags != 0) |
269 | xfs_ilock(ip, lock_flags); | 271 | xfs_ilock(ip, lock_flags); |
270 | 272 | ||
@@ -274,6 +276,7 @@ xfs_iget_cache_hit( | |||
274 | return 0; | 276 | return 0; |
275 | 277 | ||
276 | out_error: | 278 | out_error: |
279 | spin_unlock(&ip->i_flags_lock); | ||
277 | read_unlock(&pag->pag_ici_lock); | 280 | read_unlock(&pag->pag_ici_lock); |
278 | return error; | 281 | return error; |
279 | } | 282 | } |