diff options
Diffstat (limited to 'fs')
82 files changed, 1677 insertions, 908 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 332b5ff02fec..f7003cfac63d 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -76,7 +76,7 @@ static const match_table_t tokens = { | |||
76 | * Return 0 upon success, -ERRNO upon failure. | 76 | * Return 0 upon success, -ERRNO upon failure. |
77 | */ | 77 | */ |
78 | 78 | ||
79 | static int v9fs_parse_options(struct v9fs_session_info *v9ses) | 79 | static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) |
80 | { | 80 | { |
81 | char *options; | 81 | char *options; |
82 | substring_t args[MAX_OPT_ARGS]; | 82 | substring_t args[MAX_OPT_ARGS]; |
@@ -90,10 +90,10 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses) | |||
90 | v9ses->debug = 0; | 90 | v9ses->debug = 0; |
91 | v9ses->cache = 0; | 91 | v9ses->cache = 0; |
92 | 92 | ||
93 | if (!v9ses->options) | 93 | if (!opts) |
94 | return 0; | 94 | return 0; |
95 | 95 | ||
96 | options = kstrdup(v9ses->options, GFP_KERNEL); | 96 | options = kstrdup(opts, GFP_KERNEL); |
97 | if (!options) { | 97 | if (!options) { |
98 | P9_DPRINTK(P9_DEBUG_ERROR, | 98 | P9_DPRINTK(P9_DEBUG_ERROR, |
99 | "failed to allocate copy of option string\n"); | 99 | "failed to allocate copy of option string\n"); |
@@ -206,24 +206,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
206 | v9ses->uid = ~0; | 206 | v9ses->uid = ~0; |
207 | v9ses->dfltuid = V9FS_DEFUID; | 207 | v9ses->dfltuid = V9FS_DEFUID; |
208 | v9ses->dfltgid = V9FS_DEFGID; | 208 | v9ses->dfltgid = V9FS_DEFGID; |
209 | if (data) { | ||
210 | v9ses->options = kstrdup(data, GFP_KERNEL); | ||
211 | if (!v9ses->options) { | ||
212 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
213 | "failed to allocate copy of option string\n"); | ||
214 | retval = -ENOMEM; | ||
215 | goto error; | ||
216 | } | ||
217 | } | ||
218 | 209 | ||
219 | rc = v9fs_parse_options(v9ses); | 210 | rc = v9fs_parse_options(v9ses, data); |
220 | if (rc < 0) { | 211 | if (rc < 0) { |
221 | retval = rc; | 212 | retval = rc; |
222 | goto error; | 213 | goto error; |
223 | } | 214 | } |
224 | 215 | ||
225 | v9ses->clnt = p9_client_create(dev_name, v9ses->options); | 216 | v9ses->clnt = p9_client_create(dev_name, data); |
226 | |||
227 | if (IS_ERR(v9ses->clnt)) { | 217 | if (IS_ERR(v9ses->clnt)) { |
228 | retval = PTR_ERR(v9ses->clnt); | 218 | retval = PTR_ERR(v9ses->clnt); |
229 | v9ses->clnt = NULL; | 219 | v9ses->clnt = NULL; |
@@ -280,7 +270,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) | |||
280 | 270 | ||
281 | __putname(v9ses->uname); | 271 | __putname(v9ses->uname); |
282 | __putname(v9ses->aname); | 272 | __putname(v9ses->aname); |
283 | kfree(v9ses->options); | ||
284 | } | 273 | } |
285 | 274 | ||
286 | /** | 275 | /** |
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index a7d567192998..38762bf102a9 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h | |||
@@ -85,7 +85,6 @@ struct v9fs_session_info { | |||
85 | unsigned int afid; | 85 | unsigned int afid; |
86 | unsigned int cache; | 86 | unsigned int cache; |
87 | 87 | ||
88 | char *options; /* copy of mount options */ | ||
89 | char *uname; /* user name to mount as */ | 88 | char *uname; /* user name to mount as */ |
90 | char *aname; /* name of remote hierarchy being mounted */ | 89 | char *aname; /* name of remote hierarchy being mounted */ |
91 | unsigned int maxdata; /* max data for client interface */ | 90 | unsigned int maxdata; /* max data for client interface */ |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 81f8bbf12f9f..06a223d50a81 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -171,7 +171,6 @@ int v9fs_uflags2omode(int uflags, int extended) | |||
171 | 171 | ||
172 | /** | 172 | /** |
173 | * v9fs_blank_wstat - helper function to setup a 9P stat structure | 173 | * v9fs_blank_wstat - helper function to setup a 9P stat structure |
174 | * @v9ses: 9P session info (for determining extended mode) | ||
175 | * @wstat: structure to initialize | 174 | * @wstat: structure to initialize |
176 | * | 175 | * |
177 | */ | 176 | */ |
@@ -207,65 +206,72 @@ v9fs_blank_wstat(struct p9_wstat *wstat) | |||
207 | 206 | ||
208 | struct inode *v9fs_get_inode(struct super_block *sb, int mode) | 207 | struct inode *v9fs_get_inode(struct super_block *sb, int mode) |
209 | { | 208 | { |
209 | int err; | ||
210 | struct inode *inode; | 210 | struct inode *inode; |
211 | struct v9fs_session_info *v9ses = sb->s_fs_info; | 211 | struct v9fs_session_info *v9ses = sb->s_fs_info; |
212 | 212 | ||
213 | P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); | 213 | P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); |
214 | 214 | ||
215 | inode = new_inode(sb); | 215 | inode = new_inode(sb); |
216 | if (inode) { | 216 | if (!inode) { |
217 | inode->i_mode = mode; | ||
218 | inode->i_uid = current_fsuid(); | ||
219 | inode->i_gid = current_fsgid(); | ||
220 | inode->i_blocks = 0; | ||
221 | inode->i_rdev = 0; | ||
222 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
223 | inode->i_mapping->a_ops = &v9fs_addr_operations; | ||
224 | |||
225 | switch (mode & S_IFMT) { | ||
226 | case S_IFIFO: | ||
227 | case S_IFBLK: | ||
228 | case S_IFCHR: | ||
229 | case S_IFSOCK: | ||
230 | if (!v9fs_extended(v9ses)) { | ||
231 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
232 | "special files without extended mode\n"); | ||
233 | return ERR_PTR(-EINVAL); | ||
234 | } | ||
235 | init_special_inode(inode, inode->i_mode, | ||
236 | inode->i_rdev); | ||
237 | break; | ||
238 | case S_IFREG: | ||
239 | inode->i_op = &v9fs_file_inode_operations; | ||
240 | inode->i_fop = &v9fs_file_operations; | ||
241 | break; | ||
242 | case S_IFLNK: | ||
243 | if (!v9fs_extended(v9ses)) { | ||
244 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
245 | "extended modes used w/o 9P2000.u\n"); | ||
246 | return ERR_PTR(-EINVAL); | ||
247 | } | ||
248 | inode->i_op = &v9fs_symlink_inode_operations; | ||
249 | break; | ||
250 | case S_IFDIR: | ||
251 | inc_nlink(inode); | ||
252 | if (v9fs_extended(v9ses)) | ||
253 | inode->i_op = &v9fs_dir_inode_operations_ext; | ||
254 | else | ||
255 | inode->i_op = &v9fs_dir_inode_operations; | ||
256 | inode->i_fop = &v9fs_dir_operations; | ||
257 | break; | ||
258 | default: | ||
259 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
260 | "BAD mode 0x%x S_IFMT 0x%x\n", | ||
261 | mode, mode & S_IFMT); | ||
262 | return ERR_PTR(-EINVAL); | ||
263 | } | ||
264 | } else { | ||
265 | P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); | 217 | P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); |
266 | return ERR_PTR(-ENOMEM); | 218 | return ERR_PTR(-ENOMEM); |
267 | } | 219 | } |
220 | |||
221 | inode->i_mode = mode; | ||
222 | inode->i_uid = current_fsuid(); | ||
223 | inode->i_gid = current_fsgid(); | ||
224 | inode->i_blocks = 0; | ||
225 | inode->i_rdev = 0; | ||
226 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
227 | inode->i_mapping->a_ops = &v9fs_addr_operations; | ||
228 | |||
229 | switch (mode & S_IFMT) { | ||
230 | case S_IFIFO: | ||
231 | case S_IFBLK: | ||
232 | case S_IFCHR: | ||
233 | case S_IFSOCK: | ||
234 | if (!v9fs_extended(v9ses)) { | ||
235 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
236 | "special files without extended mode\n"); | ||
237 | err = -EINVAL; | ||
238 | goto error; | ||
239 | } | ||
240 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | ||
241 | break; | ||
242 | case S_IFREG: | ||
243 | inode->i_op = &v9fs_file_inode_operations; | ||
244 | inode->i_fop = &v9fs_file_operations; | ||
245 | break; | ||
246 | case S_IFLNK: | ||
247 | if (!v9fs_extended(v9ses)) { | ||
248 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
249 | "extended modes used w/o 9P2000.u\n"); | ||
250 | err = -EINVAL; | ||
251 | goto error; | ||
252 | } | ||
253 | inode->i_op = &v9fs_symlink_inode_operations; | ||
254 | break; | ||
255 | case S_IFDIR: | ||
256 | inc_nlink(inode); | ||
257 | if (v9fs_extended(v9ses)) | ||
258 | inode->i_op = &v9fs_dir_inode_operations_ext; | ||
259 | else | ||
260 | inode->i_op = &v9fs_dir_inode_operations; | ||
261 | inode->i_fop = &v9fs_dir_operations; | ||
262 | break; | ||
263 | default: | ||
264 | P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", | ||
265 | mode, mode & S_IFMT); | ||
266 | err = -EINVAL; | ||
267 | goto error; | ||
268 | } | ||
269 | |||
268 | return inode; | 270 | return inode; |
271 | |||
272 | error: | ||
273 | iput(inode); | ||
274 | return ERR_PTR(err); | ||
269 | } | 275 | } |
270 | 276 | ||
271 | /* | 277 | /* |
@@ -338,30 +344,25 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, | |||
338 | 344 | ||
339 | ret = NULL; | 345 | ret = NULL; |
340 | st = p9_client_stat(fid); | 346 | st = p9_client_stat(fid); |
341 | if (IS_ERR(st)) { | 347 | if (IS_ERR(st)) |
342 | err = PTR_ERR(st); | 348 | return ERR_CAST(st); |
343 | st = NULL; | ||
344 | goto error; | ||
345 | } | ||
346 | 349 | ||
347 | umode = p9mode2unixmode(v9ses, st->mode); | 350 | umode = p9mode2unixmode(v9ses, st->mode); |
348 | ret = v9fs_get_inode(sb, umode); | 351 | ret = v9fs_get_inode(sb, umode); |
349 | if (IS_ERR(ret)) { | 352 | if (IS_ERR(ret)) { |
350 | err = PTR_ERR(ret); | 353 | err = PTR_ERR(ret); |
351 | ret = NULL; | ||
352 | goto error; | 354 | goto error; |
353 | } | 355 | } |
354 | 356 | ||
355 | v9fs_stat2inode(st, ret, sb); | 357 | v9fs_stat2inode(st, ret, sb); |
356 | ret->i_ino = v9fs_qid2ino(&st->qid); | 358 | ret->i_ino = v9fs_qid2ino(&st->qid); |
359 | p9stat_free(st); | ||
357 | kfree(st); | 360 | kfree(st); |
358 | return ret; | 361 | return ret; |
359 | 362 | ||
360 | error: | 363 | error: |
364 | p9stat_free(st); | ||
361 | kfree(st); | 365 | kfree(st); |
362 | if (ret) | ||
363 | iput(ret); | ||
364 | |||
365 | return ERR_PTR(err); | 366 | return ERR_PTR(err); |
366 | } | 367 | } |
367 | 368 | ||
@@ -403,9 +404,9 @@ v9fs_open_created(struct inode *inode, struct file *file) | |||
403 | * @v9ses: session information | 404 | * @v9ses: session information |
404 | * @dir: directory that dentry is being created in | 405 | * @dir: directory that dentry is being created in |
405 | * @dentry: dentry that is being created | 406 | * @dentry: dentry that is being created |
407 | * @extension: 9p2000.u extension string to support devices, etc. | ||
406 | * @perm: create permissions | 408 | * @perm: create permissions |
407 | * @mode: open mode | 409 | * @mode: open mode |
408 | * @extension: 9p2000.u extension string to support devices, etc. | ||
409 | * | 410 | * |
410 | */ | 411 | */ |
411 | static struct p9_fid * | 412 | static struct p9_fid * |
@@ -470,7 +471,10 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
470 | dentry->d_op = &v9fs_dentry_operations; | 471 | dentry->d_op = &v9fs_dentry_operations; |
471 | 472 | ||
472 | d_instantiate(dentry, inode); | 473 | d_instantiate(dentry, inode); |
473 | v9fs_fid_add(dentry, fid); | 474 | err = v9fs_fid_add(dentry, fid); |
475 | if (err < 0) | ||
476 | goto error; | ||
477 | |||
474 | return ofid; | 478 | return ofid; |
475 | 479 | ||
476 | error: | 480 | error: |
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 38d695d66a0b..8961f1a8f668 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -81,7 +81,7 @@ static int v9fs_set_super(struct super_block *s, void *data) | |||
81 | 81 | ||
82 | static void | 82 | static void |
83 | v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, | 83 | v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, |
84 | int flags) | 84 | int flags, void *data) |
85 | { | 85 | { |
86 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 86 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
87 | sb->s_blocksize_bits = fls(v9ses->maxdata - 1); | 87 | sb->s_blocksize_bits = fls(v9ses->maxdata - 1); |
@@ -91,6 +91,8 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, | |||
91 | 91 | ||
92 | sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | | 92 | sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | |
93 | MS_NOATIME; | 93 | MS_NOATIME; |
94 | |||
95 | save_mount_options(sb, data); | ||
94 | } | 96 | } |
95 | 97 | ||
96 | /** | 98 | /** |
@@ -113,14 +115,11 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
113 | struct v9fs_session_info *v9ses = NULL; | 115 | struct v9fs_session_info *v9ses = NULL; |
114 | struct p9_wstat *st = NULL; | 116 | struct p9_wstat *st = NULL; |
115 | int mode = S_IRWXUGO | S_ISVTX; | 117 | int mode = S_IRWXUGO | S_ISVTX; |
116 | uid_t uid = current_fsuid(); | ||
117 | gid_t gid = current_fsgid(); | ||
118 | struct p9_fid *fid; | 118 | struct p9_fid *fid; |
119 | int retval = 0; | 119 | int retval = 0; |
120 | 120 | ||
121 | P9_DPRINTK(P9_DEBUG_VFS, " \n"); | 121 | P9_DPRINTK(P9_DEBUG_VFS, " \n"); |
122 | 122 | ||
123 | st = NULL; | ||
124 | v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); | 123 | v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); |
125 | if (!v9ses) | 124 | if (!v9ses) |
126 | return -ENOMEM; | 125 | return -ENOMEM; |
@@ -142,7 +141,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
142 | retval = PTR_ERR(sb); | 141 | retval = PTR_ERR(sb); |
143 | goto free_stat; | 142 | goto free_stat; |
144 | } | 143 | } |
145 | v9fs_fill_super(sb, v9ses, flags); | 144 | v9fs_fill_super(sb, v9ses, flags, data); |
146 | 145 | ||
147 | inode = v9fs_get_inode(sb, S_IFDIR | mode); | 146 | inode = v9fs_get_inode(sb, S_IFDIR | mode); |
148 | if (IS_ERR(inode)) { | 147 | if (IS_ERR(inode)) { |
@@ -150,9 +149,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
150 | goto release_sb; | 149 | goto release_sb; |
151 | } | 150 | } |
152 | 151 | ||
153 | inode->i_uid = uid; | ||
154 | inode->i_gid = gid; | ||
155 | |||
156 | root = d_alloc_root(inode); | 152 | root = d_alloc_root(inode); |
157 | if (!root) { | 153 | if (!root) { |
158 | iput(inode); | 154 | iput(inode); |
@@ -173,10 +169,8 @@ P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); | |||
173 | simple_set_mnt(mnt, sb); | 169 | simple_set_mnt(mnt, sb); |
174 | return 0; | 170 | return 0; |
175 | 171 | ||
176 | release_sb: | ||
177 | deactivate_locked_super(sb); | ||
178 | |||
179 | free_stat: | 172 | free_stat: |
173 | p9stat_free(st); | ||
180 | kfree(st); | 174 | kfree(st); |
181 | 175 | ||
182 | clunk_fid: | 176 | clunk_fid: |
@@ -185,7 +179,12 @@ clunk_fid: | |||
185 | close_session: | 179 | close_session: |
186 | v9fs_session_close(v9ses); | 180 | v9fs_session_close(v9ses); |
187 | kfree(v9ses); | 181 | kfree(v9ses); |
182 | return retval; | ||
188 | 183 | ||
184 | release_sb: | ||
185 | p9stat_free(st); | ||
186 | kfree(st); | ||
187 | deactivate_locked_super(sb); | ||
189 | return retval; | 188 | return retval; |
190 | } | 189 | } |
191 | 190 | ||
@@ -207,24 +206,10 @@ static void v9fs_kill_super(struct super_block *s) | |||
207 | 206 | ||
208 | v9fs_session_close(v9ses); | 207 | v9fs_session_close(v9ses); |
209 | kfree(v9ses); | 208 | kfree(v9ses); |
209 | s->s_fs_info = NULL; | ||
210 | P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n"); | 210 | P9_DPRINTK(P9_DEBUG_VFS, "exiting kill_super\n"); |
211 | } | 211 | } |
212 | 212 | ||
213 | /** | ||
214 | * v9fs_show_options - Show mount options in /proc/mounts | ||
215 | * @m: seq_file to write to | ||
216 | * @mnt: mount descriptor | ||
217 | * | ||
218 | */ | ||
219 | |||
220 | static int v9fs_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
221 | { | ||
222 | struct v9fs_session_info *v9ses = mnt->mnt_sb->s_fs_info; | ||
223 | |||
224 | seq_printf(m, "%s", v9ses->options); | ||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | static void | 213 | static void |
229 | v9fs_umount_begin(struct super_block *sb) | 214 | v9fs_umount_begin(struct super_block *sb) |
230 | { | 215 | { |
@@ -237,7 +222,7 @@ v9fs_umount_begin(struct super_block *sb) | |||
237 | static const struct super_operations v9fs_super_ops = { | 222 | static const struct super_operations v9fs_super_ops = { |
238 | .statfs = simple_statfs, | 223 | .statfs = simple_statfs, |
239 | .clear_inode = v9fs_clear_inode, | 224 | .clear_inode = v9fs_clear_inode, |
240 | .show_options = v9fs_show_options, | 225 | .show_options = generic_show_options, |
241 | .umount_begin = v9fs_umount_begin, | 226 | .umount_begin = v9fs_umount_begin, |
242 | }; | 227 | }; |
243 | 228 | ||
diff --git a/fs/afs/file.c b/fs/afs/file.c index 0149dab365e7..681c2a7b013f 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
@@ -134,9 +134,16 @@ static int afs_readpage(struct file *file, struct page *page) | |||
134 | 134 | ||
135 | inode = page->mapping->host; | 135 | inode = page->mapping->host; |
136 | 136 | ||
137 | ASSERT(file != NULL); | 137 | if (file) { |
138 | key = file->private_data; | 138 | key = file->private_data; |
139 | ASSERT(key != NULL); | 139 | ASSERT(key != NULL); |
140 | } else { | ||
141 | key = afs_request_key(AFS_FS_S(inode->i_sb)->volume->cell); | ||
142 | if (IS_ERR(key)) { | ||
143 | ret = PTR_ERR(key); | ||
144 | goto error_nokey; | ||
145 | } | ||
146 | } | ||
140 | 147 | ||
141 | _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); | 148 | _enter("{%x},{%lu},{%lu}", key_serial(key), inode->i_ino, page->index); |
142 | 149 | ||
@@ -207,12 +214,17 @@ static int afs_readpage(struct file *file, struct page *page) | |||
207 | unlock_page(page); | 214 | unlock_page(page); |
208 | } | 215 | } |
209 | 216 | ||
217 | if (!file) | ||
218 | key_put(key); | ||
210 | _leave(" = 0"); | 219 | _leave(" = 0"); |
211 | return 0; | 220 | return 0; |
212 | 221 | ||
213 | error: | 222 | error: |
214 | SetPageError(page); | 223 | SetPageError(page); |
215 | unlock_page(page); | 224 | unlock_page(page); |
225 | if (!file) | ||
226 | key_put(key); | ||
227 | error_nokey: | ||
216 | _leave(" = %d", ret); | 228 | _leave(" = %d", ret); |
217 | return ret; | 229 | return ret; |
218 | } | 230 | } |
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index aa39ae83f019..3da18d453488 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
@@ -77,7 +77,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) | |||
77 | } | 77 | } |
78 | 78 | ||
79 | /* Update the expiry counter if fs is busy */ | 79 | /* Update the expiry counter if fs is busy */ |
80 | if (!may_umount_tree(mnt)) { | 80 | if (!may_umount_tree(path.mnt)) { |
81 | struct autofs_info *ino = autofs4_dentry_ino(top); | 81 | struct autofs_info *ino = autofs4_dentry_ino(top); |
82 | ino->last_used = jiffies; | 82 | ino->last_used = jiffies; |
83 | goto done; | 83 | goto done; |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b7c1603cd4bd..7c1e65d54872 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -501,22 +501,22 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, | |||
501 | } | 501 | } |
502 | } | 502 | } |
503 | 503 | ||
504 | /* | 504 | if (last_bss > elf_bss) { |
505 | * Now fill out the bss section. First pad the last page up | 505 | /* |
506 | * to the page boundary, and then perform a mmap to make sure | 506 | * Now fill out the bss section. First pad the last page up |
507 | * that there are zero-mapped pages up to and including the | 507 | * to the page boundary, and then perform a mmap to make sure |
508 | * last bss page. | 508 | * that there are zero-mapped pages up to and including the |
509 | */ | 509 | * last bss page. |
510 | if (padzero(elf_bss)) { | 510 | */ |
511 | error = -EFAULT; | 511 | if (padzero(elf_bss)) { |
512 | goto out_close; | 512 | error = -EFAULT; |
513 | } | 513 | goto out_close; |
514 | } | ||
514 | 515 | ||
515 | /* What we have mapped so far */ | 516 | /* What we have mapped so far */ |
516 | elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); | 517 | elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); |
517 | 518 | ||
518 | /* Map the last of the bss segment */ | 519 | /* Map the last of the bss segment */ |
519 | if (last_bss > elf_bss) { | ||
520 | down_write(¤t->mm->mmap_sem); | 520 | down_write(¤t->mm->mmap_sem); |
521 | error = do_brk(elf_bss, last_bss - elf_bss); | 521 | error = do_brk(elf_bss, last_bss - elf_bss); |
522 | up_write(¤t->mm->mmap_sem); | 522 | up_write(¤t->mm->mmap_sem); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e83be2e4602c..15831d5c7367 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -1352,6 +1352,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1352 | { | 1352 | { |
1353 | int err; | 1353 | int err; |
1354 | 1354 | ||
1355 | bdi->name = "btrfs"; | ||
1355 | bdi->capabilities = BDI_CAP_MAP_COPY; | 1356 | bdi->capabilities = BDI_CAP_MAP_COPY; |
1356 | err = bdi_init(bdi); | 1357 | err = bdi_init(bdi); |
1357 | if (err) | 1358 | if (err) |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 272b9b2bea86..59cba180fe83 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -3099,8 +3099,12 @@ static void inode_tree_add(struct inode *inode) | |||
3099 | { | 3099 | { |
3100 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3100 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3101 | struct btrfs_inode *entry; | 3101 | struct btrfs_inode *entry; |
3102 | struct rb_node **p = &root->inode_tree.rb_node; | 3102 | struct rb_node **p; |
3103 | struct rb_node *parent = NULL; | 3103 | struct rb_node *parent; |
3104 | |||
3105 | again: | ||
3106 | p = &root->inode_tree.rb_node; | ||
3107 | parent = NULL; | ||
3104 | 3108 | ||
3105 | spin_lock(&root->inode_lock); | 3109 | spin_lock(&root->inode_lock); |
3106 | while (*p) { | 3110 | while (*p) { |
@@ -3108,13 +3112,16 @@ static void inode_tree_add(struct inode *inode) | |||
3108 | entry = rb_entry(parent, struct btrfs_inode, rb_node); | 3112 | entry = rb_entry(parent, struct btrfs_inode, rb_node); |
3109 | 3113 | ||
3110 | if (inode->i_ino < entry->vfs_inode.i_ino) | 3114 | if (inode->i_ino < entry->vfs_inode.i_ino) |
3111 | p = &(*p)->rb_left; | 3115 | p = &parent->rb_left; |
3112 | else if (inode->i_ino > entry->vfs_inode.i_ino) | 3116 | else if (inode->i_ino > entry->vfs_inode.i_ino) |
3113 | p = &(*p)->rb_right; | 3117 | p = &parent->rb_right; |
3114 | else { | 3118 | else { |
3115 | WARN_ON(!(entry->vfs_inode.i_state & | 3119 | WARN_ON(!(entry->vfs_inode.i_state & |
3116 | (I_WILL_FREE | I_FREEING | I_CLEAR))); | 3120 | (I_WILL_FREE | I_FREEING | I_CLEAR))); |
3117 | break; | 3121 | rb_erase(parent, &root->inode_tree); |
3122 | RB_CLEAR_NODE(parent); | ||
3123 | spin_unlock(&root->inode_lock); | ||
3124 | goto again; | ||
3118 | } | 3125 | } |
3119 | } | 3126 | } |
3120 | rb_link_node(&BTRFS_I(inode)->rb_node, parent, p); | 3127 | rb_link_node(&BTRFS_I(inode)->rb_node, parent, p); |
@@ -3126,12 +3133,12 @@ static void inode_tree_del(struct inode *inode) | |||
3126 | { | 3133 | { |
3127 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3134 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3128 | 3135 | ||
3136 | spin_lock(&root->inode_lock); | ||
3129 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { | 3137 | if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { |
3130 | spin_lock(&root->inode_lock); | ||
3131 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); | 3138 | rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); |
3132 | spin_unlock(&root->inode_lock); | ||
3133 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | 3139 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); |
3134 | } | 3140 | } |
3141 | spin_unlock(&root->inode_lock); | ||
3135 | } | 3142 | } |
3136 | 3143 | ||
3137 | static noinline void init_btrfs_i(struct inode *inode) | 3144 | static noinline void init_btrfs_i(struct inode *inode) |
diff --git a/fs/buffer.c b/fs/buffer.c index a3ef091a45bd..90a98865b0cc 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -281,7 +281,7 @@ static void free_more_memory(void) | |||
281 | struct zone *zone; | 281 | struct zone *zone; |
282 | int nid; | 282 | int nid; |
283 | 283 | ||
284 | wakeup_pdflush(1024); | 284 | wakeup_flusher_threads(1024); |
285 | yield(); | 285 | yield(); |
286 | 286 | ||
287 | for_each_online_node(nid) { | 287 | for_each_online_node(nid) { |
@@ -1165,8 +1165,11 @@ void mark_buffer_dirty(struct buffer_head *bh) | |||
1165 | 1165 | ||
1166 | if (!test_set_buffer_dirty(bh)) { | 1166 | if (!test_set_buffer_dirty(bh)) { |
1167 | struct page *page = bh->b_page; | 1167 | struct page *page = bh->b_page; |
1168 | if (!TestSetPageDirty(page)) | 1168 | if (!TestSetPageDirty(page)) { |
1169 | __set_page_dirty(page, page_mapping(page), 0); | 1169 | struct address_space *mapping = page_mapping(page); |
1170 | if (mapping) | ||
1171 | __set_page_dirty(page, mapping, 0); | ||
1172 | } | ||
1170 | } | 1173 | } |
1171 | } | 1174 | } |
1172 | 1175 | ||
diff --git a/fs/char_dev.c b/fs/char_dev.c index a173551e19d7..3cbc57f932d2 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -31,6 +31,7 @@ | |||
31 | * - no readahead or I/O queue unplugging required | 31 | * - no readahead or I/O queue unplugging required |
32 | */ | 32 | */ |
33 | struct backing_dev_info directly_mappable_cdev_bdi = { | 33 | struct backing_dev_info directly_mappable_cdev_bdi = { |
34 | .name = "char", | ||
34 | .capabilities = ( | 35 | .capabilities = ( |
35 | #ifdef CONFIG_MMU | 36 | #ifdef CONFIG_MMU |
36 | /* permit private copies of the data to be taken */ | 37 | /* permit private copies of the data to be taken */ |
@@ -237,8 +238,10 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count, | |||
237 | } | 238 | } |
238 | 239 | ||
239 | /** | 240 | /** |
240 | * register_chrdev() - Register a major number for character devices. | 241 | * __register_chrdev() - create and register a cdev occupying a range of minors |
241 | * @major: major device number or 0 for dynamic allocation | 242 | * @major: major device number or 0 for dynamic allocation |
243 | * @baseminor: first of the requested range of minor numbers | ||
244 | * @count: the number of minor numbers required | ||
242 | * @name: name of this range of devices | 245 | * @name: name of this range of devices |
243 | * @fops: file operations associated with this devices | 246 | * @fops: file operations associated with this devices |
244 | * | 247 | * |
@@ -254,19 +257,17 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count, | |||
254 | * /dev. It only helps to keep track of the different owners of devices. If | 257 | * /dev. It only helps to keep track of the different owners of devices. If |
255 | * your module name has only one type of devices it's ok to use e.g. the name | 258 | * your module name has only one type of devices it's ok to use e.g. the name |
256 | * of the module here. | 259 | * of the module here. |
257 | * | ||
258 | * This function registers a range of 256 minor numbers. The first minor number | ||
259 | * is 0. | ||
260 | */ | 260 | */ |
261 | int register_chrdev(unsigned int major, const char *name, | 261 | int __register_chrdev(unsigned int major, unsigned int baseminor, |
262 | const struct file_operations *fops) | 262 | unsigned int count, const char *name, |
263 | const struct file_operations *fops) | ||
263 | { | 264 | { |
264 | struct char_device_struct *cd; | 265 | struct char_device_struct *cd; |
265 | struct cdev *cdev; | 266 | struct cdev *cdev; |
266 | char *s; | 267 | char *s; |
267 | int err = -ENOMEM; | 268 | int err = -ENOMEM; |
268 | 269 | ||
269 | cd = __register_chrdev_region(major, 0, 256, name); | 270 | cd = __register_chrdev_region(major, baseminor, count, name); |
270 | if (IS_ERR(cd)) | 271 | if (IS_ERR(cd)) |
271 | return PTR_ERR(cd); | 272 | return PTR_ERR(cd); |
272 | 273 | ||
@@ -280,7 +281,7 @@ int register_chrdev(unsigned int major, const char *name, | |||
280 | for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/')) | 281 | for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/')) |
281 | *s = '!'; | 282 | *s = '!'; |
282 | 283 | ||
283 | err = cdev_add(cdev, MKDEV(cd->major, 0), 256); | 284 | err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); |
284 | if (err) | 285 | if (err) |
285 | goto out; | 286 | goto out; |
286 | 287 | ||
@@ -290,7 +291,7 @@ int register_chrdev(unsigned int major, const char *name, | |||
290 | out: | 291 | out: |
291 | kobject_put(&cdev->kobj); | 292 | kobject_put(&cdev->kobj); |
292 | out2: | 293 | out2: |
293 | kfree(__unregister_chrdev_region(cd->major, 0, 256)); | 294 | kfree(__unregister_chrdev_region(cd->major, baseminor, count)); |
294 | return err; | 295 | return err; |
295 | } | 296 | } |
296 | 297 | ||
@@ -316,10 +317,23 @@ void unregister_chrdev_region(dev_t from, unsigned count) | |||
316 | } | 317 | } |
317 | } | 318 | } |
318 | 319 | ||
319 | void unregister_chrdev(unsigned int major, const char *name) | 320 | /** |
321 | * __unregister_chrdev - unregister and destroy a cdev | ||
322 | * @major: major device number | ||
323 | * @baseminor: first of the range of minor numbers | ||
324 | * @count: the number of minor numbers this cdev is occupying | ||
325 | * @name: name of this range of devices | ||
326 | * | ||
327 | * Unregister and destroy the cdev occupying the region described by | ||
328 | * @major, @baseminor and @count. This function undoes what | ||
329 | * __register_chrdev() did. | ||
330 | */ | ||
331 | void __unregister_chrdev(unsigned int major, unsigned int baseminor, | ||
332 | unsigned int count, const char *name) | ||
320 | { | 333 | { |
321 | struct char_device_struct *cd; | 334 | struct char_device_struct *cd; |
322 | cd = __unregister_chrdev_region(major, 0, 256); | 335 | |
336 | cd = __unregister_chrdev_region(major, baseminor, count); | ||
323 | if (cd && cd->cdev) | 337 | if (cd && cd->cdev) |
324 | cdev_del(cd->cdev); | 338 | cdev_del(cd->cdev); |
325 | kfree(cd); | 339 | kfree(cd); |
@@ -568,6 +582,6 @@ EXPORT_SYMBOL(cdev_alloc); | |||
568 | EXPORT_SYMBOL(cdev_del); | 582 | EXPORT_SYMBOL(cdev_del); |
569 | EXPORT_SYMBOL(cdev_add); | 583 | EXPORT_SYMBOL(cdev_add); |
570 | EXPORT_SYMBOL(cdev_index); | 584 | EXPORT_SYMBOL(cdev_index); |
571 | EXPORT_SYMBOL(register_chrdev); | 585 | EXPORT_SYMBOL(__register_chrdev); |
572 | EXPORT_SYMBOL(unregister_chrdev); | 586 | EXPORT_SYMBOL(__unregister_chrdev); |
573 | EXPORT_SYMBOL(directly_mappable_cdev_bdi); | 587 | EXPORT_SYMBOL(directly_mappable_cdev_bdi); |
diff --git a/fs/compat.c b/fs/compat.c index 94502dab972a..6d6f98fe64a0 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -1485,20 +1485,15 @@ int compat_do_execve(char * filename, | |||
1485 | if (!bprm) | 1485 | if (!bprm) |
1486 | goto out_files; | 1486 | goto out_files; |
1487 | 1487 | ||
1488 | retval = -ERESTARTNOINTR; | 1488 | retval = prepare_bprm_creds(bprm); |
1489 | if (mutex_lock_interruptible(¤t->cred_guard_mutex)) | 1489 | if (retval) |
1490 | goto out_free; | 1490 | goto out_free; |
1491 | current->in_execve = 1; | ||
1492 | |||
1493 | retval = -ENOMEM; | ||
1494 | bprm->cred = prepare_exec_creds(); | ||
1495 | if (!bprm->cred) | ||
1496 | goto out_unlock; | ||
1497 | 1491 | ||
1498 | retval = check_unsafe_exec(bprm); | 1492 | retval = check_unsafe_exec(bprm); |
1499 | if (retval < 0) | 1493 | if (retval < 0) |
1500 | goto out_unlock; | 1494 | goto out_free; |
1501 | clear_in_exec = retval; | 1495 | clear_in_exec = retval; |
1496 | current->in_execve = 1; | ||
1502 | 1497 | ||
1503 | file = open_exec(filename); | 1498 | file = open_exec(filename); |
1504 | retval = PTR_ERR(file); | 1499 | retval = PTR_ERR(file); |
@@ -1547,7 +1542,6 @@ int compat_do_execve(char * filename, | |||
1547 | /* execve succeeded */ | 1542 | /* execve succeeded */ |
1548 | current->fs->in_exec = 0; | 1543 | current->fs->in_exec = 0; |
1549 | current->in_execve = 0; | 1544 | current->in_execve = 0; |
1550 | mutex_unlock(¤t->cred_guard_mutex); | ||
1551 | acct_update_integrals(current); | 1545 | acct_update_integrals(current); |
1552 | free_bprm(bprm); | 1546 | free_bprm(bprm); |
1553 | if (displaced) | 1547 | if (displaced) |
@@ -1567,10 +1561,7 @@ out_file: | |||
1567 | out_unmark: | 1561 | out_unmark: |
1568 | if (clear_in_exec) | 1562 | if (clear_in_exec) |
1569 | current->fs->in_exec = 0; | 1563 | current->fs->in_exec = 0; |
1570 | |||
1571 | out_unlock: | ||
1572 | current->in_execve = 0; | 1564 | current->in_execve = 0; |
1573 | mutex_unlock(¤t->cred_guard_mutex); | ||
1574 | 1565 | ||
1575 | out_free: | 1566 | out_free: |
1576 | free_bprm(bprm); | 1567 | free_bprm(bprm); |
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 4921e7426d95..a2f746066c5d 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c | |||
@@ -51,6 +51,7 @@ static const struct address_space_operations configfs_aops = { | |||
51 | }; | 51 | }; |
52 | 52 | ||
53 | static struct backing_dev_info configfs_backing_dev_info = { | 53 | static struct backing_dev_info configfs_backing_dev_info = { |
54 | .name = "configfs", | ||
54 | .ra_pages = 0, /* No readahead */ | 55 | .ra_pages = 0, /* No readahead */ |
55 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | 56 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
56 | }; | 57 | }; |
@@ -678,8 +678,8 @@ exit: | |||
678 | } | 678 | } |
679 | EXPORT_SYMBOL(open_exec); | 679 | EXPORT_SYMBOL(open_exec); |
680 | 680 | ||
681 | int kernel_read(struct file *file, unsigned long offset, | 681 | int kernel_read(struct file *file, loff_t offset, |
682 | char *addr, unsigned long count) | 682 | char *addr, unsigned long count) |
683 | { | 683 | { |
684 | mm_segment_t old_fs; | 684 | mm_segment_t old_fs; |
685 | loff_t pos = offset; | 685 | loff_t pos = offset; |
@@ -1016,6 +1016,35 @@ out: | |||
1016 | EXPORT_SYMBOL(flush_old_exec); | 1016 | EXPORT_SYMBOL(flush_old_exec); |
1017 | 1017 | ||
1018 | /* | 1018 | /* |
1019 | * Prepare credentials and lock ->cred_guard_mutex. | ||
1020 | * install_exec_creds() commits the new creds and drops the lock. | ||
1021 | * Or, if exec fails before, free_bprm() should release ->cred and | ||
1022 | * and unlock. | ||
1023 | */ | ||
1024 | int prepare_bprm_creds(struct linux_binprm *bprm) | ||
1025 | { | ||
1026 | if (mutex_lock_interruptible(¤t->cred_guard_mutex)) | ||
1027 | return -ERESTARTNOINTR; | ||
1028 | |||
1029 | bprm->cred = prepare_exec_creds(); | ||
1030 | if (likely(bprm->cred)) | ||
1031 | return 0; | ||
1032 | |||
1033 | mutex_unlock(¤t->cred_guard_mutex); | ||
1034 | return -ENOMEM; | ||
1035 | } | ||
1036 | |||
1037 | void free_bprm(struct linux_binprm *bprm) | ||
1038 | { | ||
1039 | free_arg_pages(bprm); | ||
1040 | if (bprm->cred) { | ||
1041 | mutex_unlock(¤t->cred_guard_mutex); | ||
1042 | abort_creds(bprm->cred); | ||
1043 | } | ||
1044 | kfree(bprm); | ||
1045 | } | ||
1046 | |||
1047 | /* | ||
1019 | * install the new credentials for this executable | 1048 | * install the new credentials for this executable |
1020 | */ | 1049 | */ |
1021 | void install_exec_creds(struct linux_binprm *bprm) | 1050 | void install_exec_creds(struct linux_binprm *bprm) |
@@ -1024,12 +1053,13 @@ void install_exec_creds(struct linux_binprm *bprm) | |||
1024 | 1053 | ||
1025 | commit_creds(bprm->cred); | 1054 | commit_creds(bprm->cred); |
1026 | bprm->cred = NULL; | 1055 | bprm->cred = NULL; |
1027 | 1056 | /* | |
1028 | /* cred_guard_mutex must be held at least to this point to prevent | 1057 | * cred_guard_mutex must be held at least to this point to prevent |
1029 | * ptrace_attach() from altering our determination of the task's | 1058 | * ptrace_attach() from altering our determination of the task's |
1030 | * credentials; any time after this it may be unlocked */ | 1059 | * credentials; any time after this it may be unlocked. |
1031 | 1060 | */ | |
1032 | security_bprm_committed_creds(bprm); | 1061 | security_bprm_committed_creds(bprm); |
1062 | mutex_unlock(¤t->cred_guard_mutex); | ||
1033 | } | 1063 | } |
1034 | EXPORT_SYMBOL(install_exec_creds); | 1064 | EXPORT_SYMBOL(install_exec_creds); |
1035 | 1065 | ||
@@ -1246,14 +1276,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) | |||
1246 | 1276 | ||
1247 | EXPORT_SYMBOL(search_binary_handler); | 1277 | EXPORT_SYMBOL(search_binary_handler); |
1248 | 1278 | ||
1249 | void free_bprm(struct linux_binprm *bprm) | ||
1250 | { | ||
1251 | free_arg_pages(bprm); | ||
1252 | if (bprm->cred) | ||
1253 | abort_creds(bprm->cred); | ||
1254 | kfree(bprm); | ||
1255 | } | ||
1256 | |||
1257 | /* | 1279 | /* |
1258 | * sys_execve() executes a new program. | 1280 | * sys_execve() executes a new program. |
1259 | */ | 1281 | */ |
@@ -1277,20 +1299,15 @@ int do_execve(char * filename, | |||
1277 | if (!bprm) | 1299 | if (!bprm) |
1278 | goto out_files; | 1300 | goto out_files; |
1279 | 1301 | ||
1280 | retval = -ERESTARTNOINTR; | 1302 | retval = prepare_bprm_creds(bprm); |
1281 | if (mutex_lock_interruptible(¤t->cred_guard_mutex)) | 1303 | if (retval) |
1282 | goto out_free; | 1304 | goto out_free; |
1283 | current->in_execve = 1; | ||
1284 | |||
1285 | retval = -ENOMEM; | ||
1286 | bprm->cred = prepare_exec_creds(); | ||
1287 | if (!bprm->cred) | ||
1288 | goto out_unlock; | ||
1289 | 1305 | ||
1290 | retval = check_unsafe_exec(bprm); | 1306 | retval = check_unsafe_exec(bprm); |
1291 | if (retval < 0) | 1307 | if (retval < 0) |
1292 | goto out_unlock; | 1308 | goto out_free; |
1293 | clear_in_exec = retval; | 1309 | clear_in_exec = retval; |
1310 | current->in_execve = 1; | ||
1294 | 1311 | ||
1295 | file = open_exec(filename); | 1312 | file = open_exec(filename); |
1296 | retval = PTR_ERR(file); | 1313 | retval = PTR_ERR(file); |
@@ -1340,7 +1357,6 @@ int do_execve(char * filename, | |||
1340 | /* execve succeeded */ | 1357 | /* execve succeeded */ |
1341 | current->fs->in_exec = 0; | 1358 | current->fs->in_exec = 0; |
1342 | current->in_execve = 0; | 1359 | current->in_execve = 0; |
1343 | mutex_unlock(¤t->cred_guard_mutex); | ||
1344 | acct_update_integrals(current); | 1360 | acct_update_integrals(current); |
1345 | free_bprm(bprm); | 1361 | free_bprm(bprm); |
1346 | if (displaced) | 1362 | if (displaced) |
@@ -1360,10 +1376,7 @@ out_file: | |||
1360 | out_unmark: | 1376 | out_unmark: |
1361 | if (clear_in_exec) | 1377 | if (clear_in_exec) |
1362 | current->fs->in_exec = 0; | 1378 | current->fs->in_exec = 0; |
1363 | |||
1364 | out_unlock: | ||
1365 | current->in_execve = 0; | 1379 | current->in_execve = 0; |
1366 | mutex_unlock(¤t->cred_guard_mutex); | ||
1367 | 1380 | ||
1368 | out_free: | 1381 | out_free: |
1369 | free_bprm(bprm); | 1382 | free_bprm(bprm); |
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index d636e1297cad..a63d44256a70 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c | |||
@@ -230,7 +230,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |||
230 | return error; | 230 | return error; |
231 | } | 231 | } |
232 | 232 | ||
233 | static int | 233 | int |
234 | ext2_check_acl(struct inode *inode, int mask) | 234 | ext2_check_acl(struct inode *inode, int mask) |
235 | { | 235 | { |
236 | struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); | 236 | struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); |
@@ -246,12 +246,6 @@ ext2_check_acl(struct inode *inode, int mask) | |||
246 | return -EAGAIN; | 246 | return -EAGAIN; |
247 | } | 247 | } |
248 | 248 | ||
249 | int | ||
250 | ext2_permission(struct inode *inode, int mask) | ||
251 | { | ||
252 | return generic_permission(inode, mask, ext2_check_acl); | ||
253 | } | ||
254 | |||
255 | /* | 249 | /* |
256 | * Initialize the ACLs of a new inode. Called from ext2_new_inode. | 250 | * Initialize the ACLs of a new inode. Called from ext2_new_inode. |
257 | * | 251 | * |
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index ecefe478898f..3ff6cbb9ac44 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h | |||
@@ -54,13 +54,13 @@ static inline int ext2_acl_count(size_t size) | |||
54 | #ifdef CONFIG_EXT2_FS_POSIX_ACL | 54 | #ifdef CONFIG_EXT2_FS_POSIX_ACL |
55 | 55 | ||
56 | /* acl.c */ | 56 | /* acl.c */ |
57 | extern int ext2_permission (struct inode *, int); | 57 | extern int ext2_check_acl (struct inode *, int); |
58 | extern int ext2_acl_chmod (struct inode *); | 58 | extern int ext2_acl_chmod (struct inode *); |
59 | extern int ext2_init_acl (struct inode *, struct inode *); | 59 | extern int ext2_init_acl (struct inode *, struct inode *); |
60 | 60 | ||
61 | #else | 61 | #else |
62 | #include <linux/sched.h> | 62 | #include <linux/sched.h> |
63 | #define ext2_permission NULL | 63 | #define ext2_check_acl NULL |
64 | #define ext2_get_acl NULL | 64 | #define ext2_get_acl NULL |
65 | #define ext2_set_acl NULL | 65 | #define ext2_set_acl NULL |
66 | 66 | ||
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 2b9e47dc9222..a2f3afd1a1c1 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -85,6 +85,6 @@ const struct inode_operations ext2_file_inode_operations = { | |||
85 | .removexattr = generic_removexattr, | 85 | .removexattr = generic_removexattr, |
86 | #endif | 86 | #endif |
87 | .setattr = ext2_setattr, | 87 | .setattr = ext2_setattr, |
88 | .permission = ext2_permission, | 88 | .check_acl = ext2_check_acl, |
89 | .fiemap = ext2_fiemap, | 89 | .fiemap = ext2_fiemap, |
90 | }; | 90 | }; |
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index e1dedb0f7873..23701f289e98 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c | |||
@@ -362,6 +362,10 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, | |||
362 | if (dir_de) { | 362 | if (dir_de) { |
363 | if (old_dir != new_dir) | 363 | if (old_dir != new_dir) |
364 | ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0); | 364 | ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0); |
365 | else { | ||
366 | kunmap(dir_page); | ||
367 | page_cache_release(dir_page); | ||
368 | } | ||
365 | inode_dec_link_count(old_dir); | 369 | inode_dec_link_count(old_dir); |
366 | } | 370 | } |
367 | return 0; | 371 | return 0; |
@@ -396,7 +400,7 @@ const struct inode_operations ext2_dir_inode_operations = { | |||
396 | .removexattr = generic_removexattr, | 400 | .removexattr = generic_removexattr, |
397 | #endif | 401 | #endif |
398 | .setattr = ext2_setattr, | 402 | .setattr = ext2_setattr, |
399 | .permission = ext2_permission, | 403 | .check_acl = ext2_check_acl, |
400 | }; | 404 | }; |
401 | 405 | ||
402 | const struct inode_operations ext2_special_inode_operations = { | 406 | const struct inode_operations ext2_special_inode_operations = { |
@@ -407,5 +411,5 @@ const struct inode_operations ext2_special_inode_operations = { | |||
407 | .removexattr = generic_removexattr, | 411 | .removexattr = generic_removexattr, |
408 | #endif | 412 | #endif |
409 | .setattr = ext2_setattr, | 413 | .setattr = ext2_setattr, |
410 | .permission = ext2_permission, | 414 | .check_acl = ext2_check_acl, |
411 | }; | 415 | }; |
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig index fb3c1a21b135..522b15498f45 100644 --- a/fs/ext3/Kconfig +++ b/fs/ext3/Kconfig | |||
@@ -29,23 +29,25 @@ config EXT3_FS | |||
29 | module will be called ext3. | 29 | module will be called ext3. |
30 | 30 | ||
31 | config EXT3_DEFAULTS_TO_ORDERED | 31 | config EXT3_DEFAULTS_TO_ORDERED |
32 | bool "Default to 'data=ordered' in ext3 (legacy option)" | 32 | bool "Default to 'data=ordered' in ext3" |
33 | depends on EXT3_FS | 33 | depends on EXT3_FS |
34 | help | 34 | help |
35 | If a filesystem does not explicitly specify a data ordering | 35 | The journal mode options for ext3 have different tradeoffs |
36 | mode, and the journal capability allowed it, ext3 used to | 36 | between when data is guaranteed to be on disk and |
37 | historically default to 'data=ordered'. | 37 | performance. The use of "data=writeback" can cause |
38 | 38 | unwritten data to appear in files after an system crash or | |
39 | That was a rather unfortunate choice, because it leads to all | 39 | power failure, which can be a security issue. However, |
40 | kinds of latency problems, and the 'data=writeback' mode is more | 40 | "data=ordered" mode can also result in major performance |
41 | appropriate these days. | 41 | problems, including seconds-long delays before an fsync() |
42 | 42 | call returns. For details, see: | |
43 | You should probably always answer 'n' here, and if you really | 43 | |
44 | want to use 'data=ordered' mode, set it in the filesystem itself | 44 | http://ext4.wiki.kernel.org/index.php/Ext3_data_mode_tradeoffs |
45 | with 'tune2fs -o journal_data_ordered'. | 45 | |
46 | 46 | If you have been historically happy with ext3's performance, | |
47 | But if you really want to enable the legacy default, you can do | 47 | data=ordered mode will be a safe choice and you should |
48 | so by answering 'y' to this question. | 48 | answer 'y' here. If you understand the reliability and data |
49 | privacy issues of data=writeback and are willing to make | ||
50 | that trade off, answer 'n'. | ||
49 | 51 | ||
50 | config EXT3_FS_XATTR | 52 | config EXT3_FS_XATTR |
51 | bool "Ext3 extended attributes" | 53 | bool "Ext3 extended attributes" |
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index e167bae37ef0..c9b0df376b5f 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c | |||
@@ -238,7 +238,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, | |||
238 | return error; | 238 | return error; |
239 | } | 239 | } |
240 | 240 | ||
241 | static int | 241 | int |
242 | ext3_check_acl(struct inode *inode, int mask) | 242 | ext3_check_acl(struct inode *inode, int mask) |
243 | { | 243 | { |
244 | struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); | 244 | struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); |
@@ -254,12 +254,6 @@ ext3_check_acl(struct inode *inode, int mask) | |||
254 | return -EAGAIN; | 254 | return -EAGAIN; |
255 | } | 255 | } |
256 | 256 | ||
257 | int | ||
258 | ext3_permission(struct inode *inode, int mask) | ||
259 | { | ||
260 | return generic_permission(inode, mask, ext3_check_acl); | ||
261 | } | ||
262 | |||
263 | /* | 257 | /* |
264 | * Initialize the ACLs of a new inode. Called from ext3_new_inode. | 258 | * Initialize the ACLs of a new inode. Called from ext3_new_inode. |
265 | * | 259 | * |
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 07d15a3a5969..597334626de9 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h | |||
@@ -54,13 +54,13 @@ static inline int ext3_acl_count(size_t size) | |||
54 | #ifdef CONFIG_EXT3_FS_POSIX_ACL | 54 | #ifdef CONFIG_EXT3_FS_POSIX_ACL |
55 | 55 | ||
56 | /* acl.c */ | 56 | /* acl.c */ |
57 | extern int ext3_permission (struct inode *, int); | 57 | extern int ext3_check_acl (struct inode *, int); |
58 | extern int ext3_acl_chmod (struct inode *); | 58 | extern int ext3_acl_chmod (struct inode *); |
59 | extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); | 59 | extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); |
60 | 60 | ||
61 | #else /* CONFIG_EXT3_FS_POSIX_ACL */ | 61 | #else /* CONFIG_EXT3_FS_POSIX_ACL */ |
62 | #include <linux/sched.h> | 62 | #include <linux/sched.h> |
63 | #define ext3_permission NULL | 63 | #define ext3_check_acl NULL |
64 | 64 | ||
65 | static inline int | 65 | static inline int |
66 | ext3_acl_chmod(struct inode *inode) | 66 | ext3_acl_chmod(struct inode *inode) |
diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 5b49704b231b..299253214789 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c | |||
@@ -137,7 +137,7 @@ const struct inode_operations ext3_file_inode_operations = { | |||
137 | .listxattr = ext3_listxattr, | 137 | .listxattr = ext3_listxattr, |
138 | .removexattr = generic_removexattr, | 138 | .removexattr = generic_removexattr, |
139 | #endif | 139 | #endif |
140 | .permission = ext3_permission, | 140 | .check_acl = ext3_check_acl, |
141 | .fiemap = ext3_fiemap, | 141 | .fiemap = ext3_fiemap, |
142 | }; | 142 | }; |
143 | 143 | ||
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 6ff7b9730234..aad6400c9b77 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -2445,7 +2445,7 @@ const struct inode_operations ext3_dir_inode_operations = { | |||
2445 | .listxattr = ext3_listxattr, | 2445 | .listxattr = ext3_listxattr, |
2446 | .removexattr = generic_removexattr, | 2446 | .removexattr = generic_removexattr, |
2447 | #endif | 2447 | #endif |
2448 | .permission = ext3_permission, | 2448 | .check_acl = ext3_check_acl, |
2449 | }; | 2449 | }; |
2450 | 2450 | ||
2451 | const struct inode_operations ext3_special_inode_operations = { | 2451 | const struct inode_operations ext3_special_inode_operations = { |
@@ -2456,5 +2456,5 @@ const struct inode_operations ext3_special_inode_operations = { | |||
2456 | .listxattr = ext3_listxattr, | 2456 | .listxattr = ext3_listxattr, |
2457 | .removexattr = generic_removexattr, | 2457 | .removexattr = generic_removexattr, |
2458 | #endif | 2458 | #endif |
2459 | .permission = ext3_permission, | 2459 | .check_acl = ext3_check_acl, |
2460 | }; | 2460 | }; |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 524b349c6299..a8d80a7f1105 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -543,6 +543,19 @@ static inline void ext3_show_quota_options(struct seq_file *seq, struct super_bl | |||
543 | #endif | 543 | #endif |
544 | } | 544 | } |
545 | 545 | ||
546 | static char *data_mode_string(unsigned long mode) | ||
547 | { | ||
548 | switch (mode) { | ||
549 | case EXT3_MOUNT_JOURNAL_DATA: | ||
550 | return "journal"; | ||
551 | case EXT3_MOUNT_ORDERED_DATA: | ||
552 | return "ordered"; | ||
553 | case EXT3_MOUNT_WRITEBACK_DATA: | ||
554 | return "writeback"; | ||
555 | } | ||
556 | return "unknown"; | ||
557 | } | ||
558 | |||
546 | /* | 559 | /* |
547 | * Show an option if | 560 | * Show an option if |
548 | * - it's set to a non-default value OR | 561 | * - it's set to a non-default value OR |
@@ -616,13 +629,8 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
616 | if (test_opt(sb, NOBH)) | 629 | if (test_opt(sb, NOBH)) |
617 | seq_puts(seq, ",nobh"); | 630 | seq_puts(seq, ",nobh"); |
618 | 631 | ||
619 | if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) | 632 | seq_printf(seq, ",data=%s", data_mode_string(sbi->s_mount_opt & |
620 | seq_puts(seq, ",data=journal"); | 633 | EXT3_MOUNT_DATA_FLAGS)); |
621 | else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA) | ||
622 | seq_puts(seq, ",data=ordered"); | ||
623 | else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) | ||
624 | seq_puts(seq, ",data=writeback"); | ||
625 | |||
626 | if (test_opt(sb, DATA_ERR_ABORT)) | 634 | if (test_opt(sb, DATA_ERR_ABORT)) |
627 | seq_puts(seq, ",data_err=abort"); | 635 | seq_puts(seq, ",data_err=abort"); |
628 | 636 | ||
@@ -1024,12 +1032,18 @@ static int parse_options (char *options, struct super_block *sb, | |||
1024 | datacheck: | 1032 | datacheck: |
1025 | if (is_remount) { | 1033 | if (is_remount) { |
1026 | if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) | 1034 | if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS) |
1027 | != data_opt) { | 1035 | == data_opt) |
1028 | printk(KERN_ERR | 1036 | break; |
1029 | "EXT3-fs: cannot change data " | 1037 | printk(KERN_ERR |
1030 | "mode on remount\n"); | 1038 | "EXT3-fs (device %s): Cannot change " |
1031 | return 0; | 1039 | "data mode on remount. The filesystem " |
1032 | } | 1040 | "is mounted in data=%s mode and you " |
1041 | "try to remount it in data=%s mode.\n", | ||
1042 | sb->s_id, | ||
1043 | data_mode_string(sbi->s_mount_opt & | ||
1044 | EXT3_MOUNT_DATA_FLAGS), | ||
1045 | data_mode_string(data_opt)); | ||
1046 | return 0; | ||
1033 | } else { | 1047 | } else { |
1034 | sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS; | 1048 | sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS; |
1035 | sbi->s_mount_opt |= data_opt; | 1049 | sbi->s_mount_opt |= data_opt; |
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index f6d8967149ca..0df88b2a69b0 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -236,7 +236,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, | |||
236 | return error; | 236 | return error; |
237 | } | 237 | } |
238 | 238 | ||
239 | static int | 239 | int |
240 | ext4_check_acl(struct inode *inode, int mask) | 240 | ext4_check_acl(struct inode *inode, int mask) |
241 | { | 241 | { |
242 | struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); | 242 | struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); |
@@ -252,12 +252,6 @@ ext4_check_acl(struct inode *inode, int mask) | |||
252 | return -EAGAIN; | 252 | return -EAGAIN; |
253 | } | 253 | } |
254 | 254 | ||
255 | int | ||
256 | ext4_permission(struct inode *inode, int mask) | ||
257 | { | ||
258 | return generic_permission(inode, mask, ext4_check_acl); | ||
259 | } | ||
260 | |||
261 | /* | 255 | /* |
262 | * Initialize the ACLs of a new inode. Called from ext4_new_inode. | 256 | * Initialize the ACLs of a new inode. Called from ext4_new_inode. |
263 | * | 257 | * |
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index 949789d2bba6..9d843d5deac4 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h | |||
@@ -54,13 +54,13 @@ static inline int ext4_acl_count(size_t size) | |||
54 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 54 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
55 | 55 | ||
56 | /* acl.c */ | 56 | /* acl.c */ |
57 | extern int ext4_permission(struct inode *, int); | 57 | extern int ext4_check_acl(struct inode *, int); |
58 | extern int ext4_acl_chmod(struct inode *); | 58 | extern int ext4_acl_chmod(struct inode *); |
59 | extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); | 59 | extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); |
60 | 60 | ||
61 | #else /* CONFIG_EXT4_FS_POSIX_ACL */ | 61 | #else /* CONFIG_EXT4_FS_POSIX_ACL */ |
62 | #include <linux/sched.h> | 62 | #include <linux/sched.h> |
63 | #define ext4_permission NULL | 63 | #define ext4_check_acl NULL |
64 | 64 | ||
65 | static inline int | 65 | static inline int |
66 | ext4_acl_chmod(struct inode *inode) | 66 | ext4_acl_chmod(struct inode *inode) |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 3f1873fef1c6..27f3c5354c0e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -207,7 +207,7 @@ const struct inode_operations ext4_file_inode_operations = { | |||
207 | .listxattr = ext4_listxattr, | 207 | .listxattr = ext4_listxattr, |
208 | .removexattr = generic_removexattr, | 208 | .removexattr = generic_removexattr, |
209 | #endif | 209 | #endif |
210 | .permission = ext4_permission, | 210 | .check_acl = ext4_check_acl, |
211 | .fallocate = ext4_fallocate, | 211 | .fallocate = ext4_fallocate, |
212 | .fiemap = ext4_fiemap, | 212 | .fiemap = ext4_fiemap, |
213 | }; | 213 | }; |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index de04013d16ff..114abe5d2c1d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -2536,7 +2536,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
2536 | .listxattr = ext4_listxattr, | 2536 | .listxattr = ext4_listxattr, |
2537 | .removexattr = generic_removexattr, | 2537 | .removexattr = generic_removexattr, |
2538 | #endif | 2538 | #endif |
2539 | .permission = ext4_permission, | 2539 | .check_acl = ext4_check_acl, |
2540 | .fiemap = ext4_fiemap, | 2540 | .fiemap = ext4_fiemap, |
2541 | }; | 2541 | }; |
2542 | 2542 | ||
@@ -2548,5 +2548,5 @@ const struct inode_operations ext4_special_inode_operations = { | |||
2548 | .listxattr = ext4_listxattr, | 2548 | .listxattr = ext4_listxattr, |
2549 | .removexattr = generic_removexattr, | 2549 | .removexattr = generic_removexattr, |
2550 | #endif | 2550 | #endif |
2551 | .permission = ext4_permission, | 2551 | .check_acl = ext4_check_acl, |
2552 | }; | 2552 | }; |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index c54226be5294..da86ef58e427 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -19,171 +19,223 @@ | |||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
22 | #include <linux/kthread.h> | ||
23 | #include <linux/freezer.h> | ||
22 | #include <linux/writeback.h> | 24 | #include <linux/writeback.h> |
23 | #include <linux/blkdev.h> | 25 | #include <linux/blkdev.h> |
24 | #include <linux/backing-dev.h> | 26 | #include <linux/backing-dev.h> |
25 | #include <linux/buffer_head.h> | 27 | #include <linux/buffer_head.h> |
26 | #include "internal.h" | 28 | #include "internal.h" |
27 | 29 | ||
30 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
28 | 31 | ||
29 | /** | 32 | /* |
30 | * writeback_acquire - attempt to get exclusive writeback access to a device | 33 | * We don't actually have pdflush, but this one is exported though /proc... |
31 | * @bdi: the device's backing_dev_info structure | ||
32 | * | ||
33 | * It is a waste of resources to have more than one pdflush thread blocked on | ||
34 | * a single request queue. Exclusion at the request_queue level is obtained | ||
35 | * via a flag in the request_queue's backing_dev_info.state. | ||
36 | * | ||
37 | * Non-request_queue-backed address_spaces will share default_backing_dev_info, | ||
38 | * unless they implement their own. Which is somewhat inefficient, as this | ||
39 | * may prevent concurrent writeback against multiple devices. | ||
40 | */ | 34 | */ |
41 | static int writeback_acquire(struct backing_dev_info *bdi) | 35 | int nr_pdflush_threads; |
36 | |||
37 | /* | ||
38 | * Work items for the bdi_writeback threads | ||
39 | */ | ||
40 | struct bdi_work { | ||
41 | struct list_head list; | ||
42 | struct list_head wait_list; | ||
43 | struct rcu_head rcu_head; | ||
44 | |||
45 | unsigned long seen; | ||
46 | atomic_t pending; | ||
47 | |||
48 | struct super_block *sb; | ||
49 | unsigned long nr_pages; | ||
50 | enum writeback_sync_modes sync_mode; | ||
51 | |||
52 | unsigned long state; | ||
53 | }; | ||
54 | |||
55 | enum { | ||
56 | WS_USED_B = 0, | ||
57 | WS_ONSTACK_B, | ||
58 | }; | ||
59 | |||
60 | #define WS_USED (1 << WS_USED_B) | ||
61 | #define WS_ONSTACK (1 << WS_ONSTACK_B) | ||
62 | |||
63 | static inline bool bdi_work_on_stack(struct bdi_work *work) | ||
42 | { | 64 | { |
43 | return !test_and_set_bit(BDI_pdflush, &bdi->state); | 65 | return test_bit(WS_ONSTACK_B, &work->state); |
66 | } | ||
67 | |||
68 | static inline void bdi_work_init(struct bdi_work *work, | ||
69 | struct writeback_control *wbc) | ||
70 | { | ||
71 | INIT_RCU_HEAD(&work->rcu_head); | ||
72 | work->sb = wbc->sb; | ||
73 | work->nr_pages = wbc->nr_to_write; | ||
74 | work->sync_mode = wbc->sync_mode; | ||
75 | work->state = WS_USED; | ||
76 | } | ||
77 | |||
78 | static inline void bdi_work_init_on_stack(struct bdi_work *work, | ||
79 | struct writeback_control *wbc) | ||
80 | { | ||
81 | bdi_work_init(work, wbc); | ||
82 | work->state |= WS_ONSTACK; | ||
44 | } | 83 | } |
45 | 84 | ||
46 | /** | 85 | /** |
47 | * writeback_in_progress - determine whether there is writeback in progress | 86 | * writeback_in_progress - determine whether there is writeback in progress |
48 | * @bdi: the device's backing_dev_info structure. | 87 | * @bdi: the device's backing_dev_info structure. |
49 | * | 88 | * |
50 | * Determine whether there is writeback in progress against a backing device. | 89 | * Determine whether there is writeback waiting to be handled against a |
90 | * backing device. | ||
51 | */ | 91 | */ |
52 | int writeback_in_progress(struct backing_dev_info *bdi) | 92 | int writeback_in_progress(struct backing_dev_info *bdi) |
53 | { | 93 | { |
54 | return test_bit(BDI_pdflush, &bdi->state); | 94 | return !list_empty(&bdi->work_list); |
55 | } | 95 | } |
56 | 96 | ||
57 | /** | 97 | static void bdi_work_clear(struct bdi_work *work) |
58 | * writeback_release - relinquish exclusive writeback access against a device. | ||
59 | * @bdi: the device's backing_dev_info structure | ||
60 | */ | ||
61 | static void writeback_release(struct backing_dev_info *bdi) | ||
62 | { | 98 | { |
63 | BUG_ON(!writeback_in_progress(bdi)); | 99 | clear_bit(WS_USED_B, &work->state); |
64 | clear_bit(BDI_pdflush, &bdi->state); | 100 | smp_mb__after_clear_bit(); |
101 | wake_up_bit(&work->state, WS_USED_B); | ||
65 | } | 102 | } |
66 | 103 | ||
67 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) | 104 | static void bdi_work_free(struct rcu_head *head) |
68 | { | 105 | { |
69 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { | 106 | struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); |
70 | struct dentry *dentry; | ||
71 | const char *name = "?"; | ||
72 | 107 | ||
73 | dentry = d_find_alias(inode); | 108 | if (!bdi_work_on_stack(work)) |
74 | if (dentry) { | 109 | kfree(work); |
75 | spin_lock(&dentry->d_lock); | 110 | else |
76 | name = (const char *) dentry->d_name.name; | 111 | bdi_work_clear(work); |
77 | } | ||
78 | printk(KERN_DEBUG | ||
79 | "%s(%d): dirtied inode %lu (%s) on %s\n", | ||
80 | current->comm, task_pid_nr(current), inode->i_ino, | ||
81 | name, inode->i_sb->s_id); | ||
82 | if (dentry) { | ||
83 | spin_unlock(&dentry->d_lock); | ||
84 | dput(dentry); | ||
85 | } | ||
86 | } | ||
87 | } | 112 | } |
88 | 113 | ||
89 | /** | 114 | static void wb_work_complete(struct bdi_work *work) |
90 | * __mark_inode_dirty - internal function | ||
91 | * @inode: inode to mark | ||
92 | * @flags: what kind of dirty (i.e. I_DIRTY_SYNC) | ||
93 | * Mark an inode as dirty. Callers should use mark_inode_dirty or | ||
94 | * mark_inode_dirty_sync. | ||
95 | * | ||
96 | * Put the inode on the super block's dirty list. | ||
97 | * | ||
98 | * CAREFUL! We mark it dirty unconditionally, but move it onto the | ||
99 | * dirty list only if it is hashed or if it refers to a blockdev. | ||
100 | * If it was not hashed, it will never be added to the dirty list | ||
101 | * even if it is later hashed, as it will have been marked dirty already. | ||
102 | * | ||
103 | * In short, make sure you hash any inodes _before_ you start marking | ||
104 | * them dirty. | ||
105 | * | ||
106 | * This function *must* be atomic for the I_DIRTY_PAGES case - | ||
107 | * set_page_dirty() is called under spinlock in several places. | ||
108 | * | ||
109 | * Note that for blockdevs, inode->dirtied_when represents the dirtying time of | ||
110 | * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of | ||
111 | * the kernel-internal blockdev inode represents the dirtying time of the | ||
112 | * blockdev's pages. This is why for I_DIRTY_PAGES we always use | ||
113 | * page->mapping->host, so the page-dirtying time is recorded in the internal | ||
114 | * blockdev inode. | ||
115 | */ | ||
116 | void __mark_inode_dirty(struct inode *inode, int flags) | ||
117 | { | 115 | { |
118 | struct super_block *sb = inode->i_sb; | 116 | const enum writeback_sync_modes sync_mode = work->sync_mode; |
119 | 117 | ||
120 | /* | 118 | /* |
121 | * Don't do this for I_DIRTY_PAGES - that doesn't actually | 119 | * For allocated work, we can clear the done/seen bit right here. |
122 | * dirty the inode itself | 120 | * For on-stack work, we need to postpone both the clear and free |
121 | * to after the RCU grace period, since the stack could be invalidated | ||
122 | * as soon as bdi_work_clear() has done the wakeup. | ||
123 | */ | 123 | */ |
124 | if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 124 | if (!bdi_work_on_stack(work)) |
125 | if (sb->s_op->dirty_inode) | 125 | bdi_work_clear(work); |
126 | sb->s_op->dirty_inode(inode); | 126 | if (sync_mode == WB_SYNC_NONE || bdi_work_on_stack(work)) |
127 | } | 127 | call_rcu(&work->rcu_head, bdi_work_free); |
128 | } | ||
128 | 129 | ||
130 | static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) | ||
131 | { | ||
129 | /* | 132 | /* |
130 | * make sure that changes are seen by all cpus before we test i_state | 133 | * The caller has retrieved the work arguments from this work, |
131 | * -- mikulas | 134 | * drop our reference. If this is the last ref, delete and free it |
132 | */ | 135 | */ |
133 | smp_mb(); | 136 | if (atomic_dec_and_test(&work->pending)) { |
137 | struct backing_dev_info *bdi = wb->bdi; | ||
134 | 138 | ||
135 | /* avoid the locking if we can */ | 139 | spin_lock(&bdi->wb_lock); |
136 | if ((inode->i_state & flags) == flags) | 140 | list_del_rcu(&work->list); |
137 | return; | 141 | spin_unlock(&bdi->wb_lock); |
138 | 142 | ||
139 | if (unlikely(block_dump)) | 143 | wb_work_complete(work); |
140 | block_dump___mark_inode_dirty(inode); | 144 | } |
141 | 145 | } | |
142 | spin_lock(&inode_lock); | ||
143 | if ((inode->i_state & flags) != flags) { | ||
144 | const int was_dirty = inode->i_state & I_DIRTY; | ||
145 | 146 | ||
146 | inode->i_state |= flags; | 147 | static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) |
148 | { | ||
149 | if (work) { | ||
150 | work->seen = bdi->wb_mask; | ||
151 | BUG_ON(!work->seen); | ||
152 | atomic_set(&work->pending, bdi->wb_cnt); | ||
153 | BUG_ON(!bdi->wb_cnt); | ||
147 | 154 | ||
148 | /* | 155 | /* |
149 | * If the inode is being synced, just update its dirty state. | 156 | * Make sure stores are seen before it appears on the list |
150 | * The unlocker will place the inode on the appropriate | ||
151 | * superblock list, based upon its state. | ||
152 | */ | 157 | */ |
153 | if (inode->i_state & I_SYNC) | 158 | smp_mb(); |
154 | goto out; | ||
155 | 159 | ||
156 | /* | 160 | spin_lock(&bdi->wb_lock); |
157 | * Only add valid (hashed) inodes to the superblock's | 161 | list_add_tail_rcu(&work->list, &bdi->work_list); |
158 | * dirty list. Add blockdev inodes as well. | 162 | spin_unlock(&bdi->wb_lock); |
159 | */ | 163 | } |
160 | if (!S_ISBLK(inode->i_mode)) { | 164 | |
161 | if (hlist_unhashed(&inode->i_hash)) | 165 | /* |
162 | goto out; | 166 | * If the default thread isn't there, make sure we add it. When |
163 | } | 167 | * it gets created and wakes up, we'll run this work. |
164 | if (inode->i_state & (I_FREEING|I_CLEAR)) | 168 | */ |
165 | goto out; | 169 | if (unlikely(list_empty_careful(&bdi->wb_list))) |
170 | wake_up_process(default_backing_dev_info.wb.task); | ||
171 | else { | ||
172 | struct bdi_writeback *wb = &bdi->wb; | ||
166 | 173 | ||
167 | /* | 174 | /* |
168 | * If the inode was already on s_dirty/s_io/s_more_io, don't | 175 | * If we failed allocating the bdi work item, wake up the wb |
169 | * reposition it (that would break s_dirty time-ordering). | 176 | * thread always. As a safety precaution, it'll flush out |
177 | * everything | ||
170 | */ | 178 | */ |
171 | if (!was_dirty) { | 179 | if (!wb_has_dirty_io(wb)) { |
172 | inode->dirtied_when = jiffies; | 180 | if (work) |
173 | list_move(&inode->i_list, &sb->s_dirty); | 181 | wb_clear_pending(wb, work); |
174 | } | 182 | } else if (wb->task) |
183 | wake_up_process(wb->task); | ||
175 | } | 184 | } |
176 | out: | ||
177 | spin_unlock(&inode_lock); | ||
178 | } | 185 | } |
179 | 186 | ||
180 | EXPORT_SYMBOL(__mark_inode_dirty); | 187 | /* |
188 | * Used for on-stack allocated work items. The caller needs to wait until | ||
189 | * the wb threads have acked the work before it's safe to continue. | ||
190 | */ | ||
191 | static void bdi_wait_on_work_clear(struct bdi_work *work) | ||
192 | { | ||
193 | wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait, | ||
194 | TASK_UNINTERRUPTIBLE); | ||
195 | } | ||
181 | 196 | ||
182 | static int write_inode(struct inode *inode, int sync) | 197 | static struct bdi_work *bdi_alloc_work(struct writeback_control *wbc) |
183 | { | 198 | { |
184 | if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) | 199 | struct bdi_work *work; |
185 | return inode->i_sb->s_op->write_inode(inode, sync); | 200 | |
186 | return 0; | 201 | work = kmalloc(sizeof(*work), GFP_ATOMIC); |
202 | if (work) | ||
203 | bdi_work_init(work, wbc); | ||
204 | |||
205 | return work; | ||
206 | } | ||
207 | |||
208 | void bdi_start_writeback(struct writeback_control *wbc) | ||
209 | { | ||
210 | const bool must_wait = wbc->sync_mode == WB_SYNC_ALL; | ||
211 | struct bdi_work work_stack, *work = NULL; | ||
212 | |||
213 | if (!must_wait) | ||
214 | work = bdi_alloc_work(wbc); | ||
215 | |||
216 | if (!work) { | ||
217 | work = &work_stack; | ||
218 | bdi_work_init_on_stack(work, wbc); | ||
219 | } | ||
220 | |||
221 | bdi_queue_work(wbc->bdi, work); | ||
222 | |||
223 | /* | ||
224 | * If the sync mode is WB_SYNC_ALL, block waiting for the work to | ||
225 | * complete. If not, we only need to wait for the work to be started, | ||
226 | * if we allocated it on-stack. We use the same mechanism, if the | ||
227 | * wait bit is set in the bdi_work struct, then threads will not | ||
228 | * clear pending until after they are done. | ||
229 | * | ||
230 | * Note that work == &work_stack if must_wait is true, so we don't | ||
231 | * need to do call_rcu() here ever, since the completion path will | ||
232 | * have done that for us. | ||
233 | */ | ||
234 | if (must_wait || work == &work_stack) { | ||
235 | bdi_wait_on_work_clear(work); | ||
236 | if (work != &work_stack) | ||
237 | call_rcu(&work->rcu_head, bdi_work_free); | ||
238 | } | ||
187 | } | 239 | } |
188 | 240 | ||
189 | /* | 241 | /* |
@@ -191,31 +243,32 @@ static int write_inode(struct inode *inode, int sync) | |||
191 | * furthest end of its superblock's dirty-inode list. | 243 | * furthest end of its superblock's dirty-inode list. |
192 | * | 244 | * |
193 | * Before stamping the inode's ->dirtied_when, we check to see whether it is | 245 | * Before stamping the inode's ->dirtied_when, we check to see whether it is |
194 | * already the most-recently-dirtied inode on the s_dirty list. If that is | 246 | * already the most-recently-dirtied inode on the b_dirty list. If that is |
195 | * the case then the inode must have been redirtied while it was being written | 247 | * the case then the inode must have been redirtied while it was being written |
196 | * out and we don't reset its dirtied_when. | 248 | * out and we don't reset its dirtied_when. |
197 | */ | 249 | */ |
198 | static void redirty_tail(struct inode *inode) | 250 | static void redirty_tail(struct inode *inode) |
199 | { | 251 | { |
200 | struct super_block *sb = inode->i_sb; | 252 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
201 | 253 | ||
202 | if (!list_empty(&sb->s_dirty)) { | 254 | if (!list_empty(&wb->b_dirty)) { |
203 | struct inode *tail_inode; | 255 | struct inode *tail; |
204 | 256 | ||
205 | tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list); | 257 | tail = list_entry(wb->b_dirty.next, struct inode, i_list); |
206 | if (time_before(inode->dirtied_when, | 258 | if (time_before(inode->dirtied_when, tail->dirtied_when)) |
207 | tail_inode->dirtied_when)) | ||
208 | inode->dirtied_when = jiffies; | 259 | inode->dirtied_when = jiffies; |
209 | } | 260 | } |
210 | list_move(&inode->i_list, &sb->s_dirty); | 261 | list_move(&inode->i_list, &wb->b_dirty); |
211 | } | 262 | } |
212 | 263 | ||
213 | /* | 264 | /* |
214 | * requeue inode for re-scanning after sb->s_io list is exhausted. | 265 | * requeue inode for re-scanning after bdi->b_io list is exhausted. |
215 | */ | 266 | */ |
216 | static void requeue_io(struct inode *inode) | 267 | static void requeue_io(struct inode *inode) |
217 | { | 268 | { |
218 | list_move(&inode->i_list, &inode->i_sb->s_more_io); | 269 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
270 | |||
271 | list_move(&inode->i_list, &wb->b_more_io); | ||
219 | } | 272 | } |
220 | 273 | ||
221 | static void inode_sync_complete(struct inode *inode) | 274 | static void inode_sync_complete(struct inode *inode) |
@@ -262,20 +315,18 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
262 | /* | 315 | /* |
263 | * Queue all expired dirty inodes for io, eldest first. | 316 | * Queue all expired dirty inodes for io, eldest first. |
264 | */ | 317 | */ |
265 | static void queue_io(struct super_block *sb, | 318 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) |
266 | unsigned long *older_than_this) | ||
267 | { | 319 | { |
268 | list_splice_init(&sb->s_more_io, sb->s_io.prev); | 320 | list_splice_init(&wb->b_more_io, wb->b_io.prev); |
269 | move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this); | 321 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
270 | } | 322 | } |
271 | 323 | ||
272 | int sb_has_dirty_inodes(struct super_block *sb) | 324 | static int write_inode(struct inode *inode, int sync) |
273 | { | 325 | { |
274 | return !list_empty(&sb->s_dirty) || | 326 | if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) |
275 | !list_empty(&sb->s_io) || | 327 | return inode->i_sb->s_op->write_inode(inode, sync); |
276 | !list_empty(&sb->s_more_io); | 328 | return 0; |
277 | } | 329 | } |
278 | EXPORT_SYMBOL(sb_has_dirty_inodes); | ||
279 | 330 | ||
280 | /* | 331 | /* |
281 | * Wait for writeback on an inode to complete. | 332 | * Wait for writeback on an inode to complete. |
@@ -322,11 +373,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
322 | if (inode->i_state & I_SYNC) { | 373 | if (inode->i_state & I_SYNC) { |
323 | /* | 374 | /* |
324 | * If this inode is locked for writeback and we are not doing | 375 | * If this inode is locked for writeback and we are not doing |
325 | * writeback-for-data-integrity, move it to s_more_io so that | 376 | * writeback-for-data-integrity, move it to b_more_io so that |
326 | * writeback can proceed with the other inodes on s_io. | 377 | * writeback can proceed with the other inodes on s_io. |
327 | * | 378 | * |
328 | * We'll have another go at writing back this inode when we | 379 | * We'll have another go at writing back this inode when we |
329 | * completed a full scan of s_io. | 380 | * completed a full scan of b_io. |
330 | */ | 381 | */ |
331 | if (!wait) { | 382 | if (!wait) { |
332 | requeue_io(inode); | 383 | requeue_io(inode); |
@@ -371,11 +422,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
371 | /* | 422 | /* |
372 | * We didn't write back all the pages. nfs_writepages() | 423 | * We didn't write back all the pages. nfs_writepages() |
373 | * sometimes bales out without doing anything. Redirty | 424 | * sometimes bales out without doing anything. Redirty |
374 | * the inode; Move it from s_io onto s_more_io/s_dirty. | 425 | * the inode; Move it from b_io onto b_more_io/b_dirty. |
375 | */ | 426 | */ |
376 | /* | 427 | /* |
377 | * akpm: if the caller was the kupdate function we put | 428 | * akpm: if the caller was the kupdate function we put |
378 | * this inode at the head of s_dirty so it gets first | 429 | * this inode at the head of b_dirty so it gets first |
379 | * consideration. Otherwise, move it to the tail, for | 430 | * consideration. Otherwise, move it to the tail, for |
380 | * the reasons described there. I'm not really sure | 431 | * the reasons described there. I'm not really sure |
381 | * how much sense this makes. Presumably I had a good | 432 | * how much sense this makes. Presumably I had a good |
@@ -385,7 +436,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
385 | if (wbc->for_kupdate) { | 436 | if (wbc->for_kupdate) { |
386 | /* | 437 | /* |
387 | * For the kupdate function we move the inode | 438 | * For the kupdate function we move the inode |
388 | * to s_more_io so it will get more writeout as | 439 | * to b_more_io so it will get more writeout as |
389 | * soon as the queue becomes uncongested. | 440 | * soon as the queue becomes uncongested. |
390 | */ | 441 | */ |
391 | inode->i_state |= I_DIRTY_PAGES; | 442 | inode->i_state |= I_DIRTY_PAGES; |
@@ -434,50 +485,84 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
434 | } | 485 | } |
435 | 486 | ||
436 | /* | 487 | /* |
437 | * Write out a superblock's list of dirty inodes. A wait will be performed | 488 | * For WB_SYNC_NONE writeback, the caller does not have the sb pinned |
438 | * upon no inodes, all inodes or the final one, depending upon sync_mode. | 489 | * before calling writeback. So make sure that we do pin it, so it doesn't |
439 | * | 490 | * go away while we are writing inodes from it. |
440 | * If older_than_this is non-NULL, then only write out inodes which | ||
441 | * had their first dirtying at a time earlier than *older_than_this. | ||
442 | * | ||
443 | * If we're a pdflush thread, then implement pdflush collision avoidance | ||
444 | * against the entire list. | ||
445 | * | 491 | * |
446 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. | 492 | * Returns 0 if the super was successfully pinned (or pinning wasn't needed), |
447 | * This function assumes that the blockdev superblock's inodes are backed by | 493 | * 1 if we failed. |
448 | * a variety of queues, so all inodes are searched. For other superblocks, | ||
449 | * assume that all inodes are backed by the same queue. | ||
450 | * | ||
451 | * FIXME: this linear search could get expensive with many fileystems. But | ||
452 | * how to fix? We need to go from an address_space to all inodes which share | ||
453 | * a queue with that address_space. (Easy: have a global "dirty superblocks" | ||
454 | * list). | ||
455 | * | ||
456 | * The inodes to be written are parked on sb->s_io. They are moved back onto | ||
457 | * sb->s_dirty as they are selected for writing. This way, none can be missed | ||
458 | * on the writer throttling path, and we get decent balancing between many | ||
459 | * throttled threads: we don't want them all piling up on inode_sync_wait. | ||
460 | */ | 494 | */ |
461 | void generic_sync_sb_inodes(struct super_block *sb, | 495 | static int pin_sb_for_writeback(struct writeback_control *wbc, |
496 | struct inode *inode) | ||
497 | { | ||
498 | struct super_block *sb = inode->i_sb; | ||
499 | |||
500 | /* | ||
501 | * Caller must already hold the ref for this | ||
502 | */ | ||
503 | if (wbc->sync_mode == WB_SYNC_ALL) { | ||
504 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | ||
505 | return 0; | ||
506 | } | ||
507 | |||
508 | spin_lock(&sb_lock); | ||
509 | sb->s_count++; | ||
510 | if (down_read_trylock(&sb->s_umount)) { | ||
511 | if (sb->s_root) { | ||
512 | spin_unlock(&sb_lock); | ||
513 | return 0; | ||
514 | } | ||
515 | /* | ||
516 | * umounted, drop rwsem again and fall through to failure | ||
517 | */ | ||
518 | up_read(&sb->s_umount); | ||
519 | } | ||
520 | |||
521 | sb->s_count--; | ||
522 | spin_unlock(&sb_lock); | ||
523 | return 1; | ||
524 | } | ||
525 | |||
526 | static void unpin_sb_for_writeback(struct writeback_control *wbc, | ||
527 | struct inode *inode) | ||
528 | { | ||
529 | struct super_block *sb = inode->i_sb; | ||
530 | |||
531 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
532 | return; | ||
533 | |||
534 | up_read(&sb->s_umount); | ||
535 | put_super(sb); | ||
536 | } | ||
537 | |||
538 | static void writeback_inodes_wb(struct bdi_writeback *wb, | ||
462 | struct writeback_control *wbc) | 539 | struct writeback_control *wbc) |
463 | { | 540 | { |
541 | struct super_block *sb = wbc->sb; | ||
542 | const int is_blkdev_sb = sb_is_blkdev_sb(sb); | ||
464 | const unsigned long start = jiffies; /* livelock avoidance */ | 543 | const unsigned long start = jiffies; /* livelock avoidance */ |
465 | int sync = wbc->sync_mode == WB_SYNC_ALL; | ||
466 | 544 | ||
467 | spin_lock(&inode_lock); | 545 | spin_lock(&inode_lock); |
468 | if (!wbc->for_kupdate || list_empty(&sb->s_io)) | ||
469 | queue_io(sb, wbc->older_than_this); | ||
470 | 546 | ||
471 | while (!list_empty(&sb->s_io)) { | 547 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
472 | struct inode *inode = list_entry(sb->s_io.prev, | 548 | queue_io(wb, wbc->older_than_this); |
549 | |||
550 | while (!list_empty(&wb->b_io)) { | ||
551 | struct inode *inode = list_entry(wb->b_io.prev, | ||
473 | struct inode, i_list); | 552 | struct inode, i_list); |
474 | struct address_space *mapping = inode->i_mapping; | ||
475 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
476 | long pages_skipped; | 553 | long pages_skipped; |
477 | 554 | ||
478 | if (!bdi_cap_writeback_dirty(bdi)) { | 555 | /* |
556 | * super block given and doesn't match, skip this inode | ||
557 | */ | ||
558 | if (sb && sb != inode->i_sb) { | ||
559 | redirty_tail(inode); | ||
560 | continue; | ||
561 | } | ||
562 | |||
563 | if (!bdi_cap_writeback_dirty(wb->bdi)) { | ||
479 | redirty_tail(inode); | 564 | redirty_tail(inode); |
480 | if (sb_is_blkdev_sb(sb)) { | 565 | if (is_blkdev_sb) { |
481 | /* | 566 | /* |
482 | * Dirty memory-backed blockdev: the ramdisk | 567 | * Dirty memory-backed blockdev: the ramdisk |
483 | * driver does this. Skip just this inode | 568 | * driver does this. Skip just this inode |
@@ -497,21 +582,14 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
497 | continue; | 582 | continue; |
498 | } | 583 | } |
499 | 584 | ||
500 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | 585 | if (wbc->nonblocking && bdi_write_congested(wb->bdi)) { |
501 | wbc->encountered_congestion = 1; | 586 | wbc->encountered_congestion = 1; |
502 | if (!sb_is_blkdev_sb(sb)) | 587 | if (!is_blkdev_sb) |
503 | break; /* Skip a congested fs */ | 588 | break; /* Skip a congested fs */ |
504 | requeue_io(inode); | 589 | requeue_io(inode); |
505 | continue; /* Skip a congested blockdev */ | 590 | continue; /* Skip a congested blockdev */ |
506 | } | 591 | } |
507 | 592 | ||
508 | if (wbc->bdi && bdi != wbc->bdi) { | ||
509 | if (!sb_is_blkdev_sb(sb)) | ||
510 | break; /* fs has the wrong queue */ | ||
511 | requeue_io(inode); | ||
512 | continue; /* blockdev has wrong queue */ | ||
513 | } | ||
514 | |||
515 | /* | 593 | /* |
516 | * Was this inode dirtied after sync_sb_inodes was called? | 594 | * Was this inode dirtied after sync_sb_inodes was called? |
517 | * This keeps sync from extra jobs and livelock. | 595 | * This keeps sync from extra jobs and livelock. |
@@ -519,16 +597,16 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
519 | if (inode_dirtied_after(inode, start)) | 597 | if (inode_dirtied_after(inode, start)) |
520 | break; | 598 | break; |
521 | 599 | ||
522 | /* Is another pdflush already flushing this queue? */ | 600 | if (pin_sb_for_writeback(wbc, inode)) { |
523 | if (current_is_pdflush() && !writeback_acquire(bdi)) | 601 | requeue_io(inode); |
524 | break; | 602 | continue; |
603 | } | ||
525 | 604 | ||
526 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); | 605 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); |
527 | __iget(inode); | 606 | __iget(inode); |
528 | pages_skipped = wbc->pages_skipped; | 607 | pages_skipped = wbc->pages_skipped; |
529 | writeback_single_inode(inode, wbc); | 608 | writeback_single_inode(inode, wbc); |
530 | if (current_is_pdflush()) | 609 | unpin_sb_for_writeback(wbc, inode); |
531 | writeback_release(bdi); | ||
532 | if (wbc->pages_skipped != pages_skipped) { | 610 | if (wbc->pages_skipped != pages_skipped) { |
533 | /* | 611 | /* |
534 | * writeback is not making progress due to locked | 612 | * writeback is not making progress due to locked |
@@ -544,144 +622,571 @@ void generic_sync_sb_inodes(struct super_block *sb, | |||
544 | wbc->more_io = 1; | 622 | wbc->more_io = 1; |
545 | break; | 623 | break; |
546 | } | 624 | } |
547 | if (!list_empty(&sb->s_more_io)) | 625 | if (!list_empty(&wb->b_more_io)) |
548 | wbc->more_io = 1; | 626 | wbc->more_io = 1; |
549 | } | 627 | } |
550 | 628 | ||
551 | if (sync) { | 629 | spin_unlock(&inode_lock); |
552 | struct inode *inode, *old_inode = NULL; | 630 | /* Leave any unwritten inodes on b_io */ |
631 | } | ||
632 | |||
633 | void writeback_inodes_wbc(struct writeback_control *wbc) | ||
634 | { | ||
635 | struct backing_dev_info *bdi = wbc->bdi; | ||
553 | 636 | ||
637 | writeback_inodes_wb(&bdi->wb, wbc); | ||
638 | } | ||
639 | |||
640 | /* | ||
641 | * The maximum number of pages to writeout in a single bdi flush/kupdate | ||
642 | * operation. We do this so we don't hold I_SYNC against an inode for | ||
643 | * enormous amounts of time, which would block a userspace task which has | ||
644 | * been forced to throttle against that inode. Also, the code reevaluates | ||
645 | * the dirty each time it has written this many pages. | ||
646 | */ | ||
647 | #define MAX_WRITEBACK_PAGES 1024 | ||
648 | |||
649 | static inline bool over_bground_thresh(void) | ||
650 | { | ||
651 | unsigned long background_thresh, dirty_thresh; | ||
652 | |||
653 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); | ||
654 | |||
655 | return (global_page_state(NR_FILE_DIRTY) + | ||
656 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); | ||
657 | } | ||
658 | |||
659 | /* | ||
660 | * Explicit flushing or periodic writeback of "old" data. | ||
661 | * | ||
662 | * Define "old": the first time one of an inode's pages is dirtied, we mark the | ||
663 | * dirtying-time in the inode's address_space. So this periodic writeback code | ||
664 | * just walks the superblock inode list, writing back any inodes which are | ||
665 | * older than a specific point in time. | ||
666 | * | ||
667 | * Try to run once per dirty_writeback_interval. But if a writeback event | ||
668 | * takes longer than a dirty_writeback_interval interval, then leave a | ||
669 | * one-second gap. | ||
670 | * | ||
671 | * older_than_this takes precedence over nr_to_write. So we'll only write back | ||
672 | * all dirty pages if they are all attached to "old" mappings. | ||
673 | */ | ||
674 | static long wb_writeback(struct bdi_writeback *wb, long nr_pages, | ||
675 | struct super_block *sb, | ||
676 | enum writeback_sync_modes sync_mode, int for_kupdate) | ||
677 | { | ||
678 | struct writeback_control wbc = { | ||
679 | .bdi = wb->bdi, | ||
680 | .sb = sb, | ||
681 | .sync_mode = sync_mode, | ||
682 | .older_than_this = NULL, | ||
683 | .for_kupdate = for_kupdate, | ||
684 | .range_cyclic = 1, | ||
685 | }; | ||
686 | unsigned long oldest_jif; | ||
687 | long wrote = 0; | ||
688 | |||
689 | if (wbc.for_kupdate) { | ||
690 | wbc.older_than_this = &oldest_jif; | ||
691 | oldest_jif = jiffies - | ||
692 | msecs_to_jiffies(dirty_expire_interval * 10); | ||
693 | } | ||
694 | |||
695 | for (;;) { | ||
554 | /* | 696 | /* |
555 | * Data integrity sync. Must wait for all pages under writeback, | 697 | * Don't flush anything for non-integrity writeback where |
556 | * because there may have been pages dirtied before our sync | 698 | * no nr_pages was given |
557 | * call, but which had writeout started before we write it out. | ||
558 | * In which case, the inode may not be on the dirty list, but | ||
559 | * we still have to wait for that writeout. | ||
560 | */ | 699 | */ |
561 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 700 | if (!for_kupdate && nr_pages <= 0 && sync_mode == WB_SYNC_NONE) |
562 | struct address_space *mapping; | 701 | break; |
563 | 702 | ||
564 | if (inode->i_state & | 703 | /* |
565 | (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | 704 | * If no specific pages were given and this is just a |
566 | continue; | 705 | * periodic background writeout and we are below the |
567 | mapping = inode->i_mapping; | 706 | * background dirty threshold, don't do anything |
568 | if (mapping->nrpages == 0) | 707 | */ |
708 | if (for_kupdate && nr_pages <= 0 && !over_bground_thresh()) | ||
709 | break; | ||
710 | |||
711 | wbc.more_io = 0; | ||
712 | wbc.encountered_congestion = 0; | ||
713 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | ||
714 | wbc.pages_skipped = 0; | ||
715 | writeback_inodes_wb(wb, &wbc); | ||
716 | nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
717 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
718 | |||
719 | /* | ||
720 | * If we ran out of stuff to write, bail unless more_io got set | ||
721 | */ | ||
722 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { | ||
723 | if (wbc.more_io && !wbc.for_kupdate) | ||
569 | continue; | 724 | continue; |
570 | __iget(inode); | 725 | break; |
571 | spin_unlock(&inode_lock); | 726 | } |
727 | } | ||
728 | |||
729 | return wrote; | ||
730 | } | ||
731 | |||
732 | /* | ||
733 | * Return the next bdi_work struct that hasn't been processed by this | ||
734 | * wb thread yet | ||
735 | */ | ||
736 | static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, | ||
737 | struct bdi_writeback *wb) | ||
738 | { | ||
739 | struct bdi_work *work, *ret = NULL; | ||
740 | |||
741 | rcu_read_lock(); | ||
742 | |||
743 | list_for_each_entry_rcu(work, &bdi->work_list, list) { | ||
744 | if (!test_and_clear_bit(wb->nr, &work->seen)) | ||
745 | continue; | ||
746 | |||
747 | ret = work; | ||
748 | break; | ||
749 | } | ||
750 | |||
751 | rcu_read_unlock(); | ||
752 | return ret; | ||
753 | } | ||
754 | |||
755 | static long wb_check_old_data_flush(struct bdi_writeback *wb) | ||
756 | { | ||
757 | unsigned long expired; | ||
758 | long nr_pages; | ||
759 | |||
760 | expired = wb->last_old_flush + | ||
761 | msecs_to_jiffies(dirty_writeback_interval * 10); | ||
762 | if (time_before(jiffies, expired)) | ||
763 | return 0; | ||
764 | |||
765 | wb->last_old_flush = jiffies; | ||
766 | nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
767 | global_page_state(NR_UNSTABLE_NFS) + | ||
768 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
769 | |||
770 | if (nr_pages) | ||
771 | return wb_writeback(wb, nr_pages, NULL, WB_SYNC_NONE, 1); | ||
772 | |||
773 | return 0; | ||
774 | } | ||
775 | |||
776 | /* | ||
777 | * Retrieve work items and do the writeback they describe | ||
778 | */ | ||
779 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | ||
780 | { | ||
781 | struct backing_dev_info *bdi = wb->bdi; | ||
782 | struct bdi_work *work; | ||
783 | long nr_pages, wrote = 0; | ||
784 | |||
785 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | ||
786 | enum writeback_sync_modes sync_mode; | ||
787 | |||
788 | nr_pages = work->nr_pages; | ||
789 | |||
790 | /* | ||
791 | * Override sync mode, in case we must wait for completion | ||
792 | */ | ||
793 | if (force_wait) | ||
794 | work->sync_mode = sync_mode = WB_SYNC_ALL; | ||
795 | else | ||
796 | sync_mode = work->sync_mode; | ||
797 | |||
798 | /* | ||
799 | * If this isn't a data integrity operation, just notify | ||
800 | * that we have seen this work and we are now starting it. | ||
801 | */ | ||
802 | if (sync_mode == WB_SYNC_NONE) | ||
803 | wb_clear_pending(wb, work); | ||
804 | |||
805 | wrote += wb_writeback(wb, nr_pages, work->sb, sync_mode, 0); | ||
806 | |||
807 | /* | ||
808 | * This is a data integrity writeback, so only do the | ||
809 | * notification when we have completed the work. | ||
810 | */ | ||
811 | if (sync_mode == WB_SYNC_ALL) | ||
812 | wb_clear_pending(wb, work); | ||
813 | } | ||
814 | |||
815 | /* | ||
816 | * Check for periodic writeback, kupdated() style | ||
817 | */ | ||
818 | wrote += wb_check_old_data_flush(wb); | ||
819 | |||
820 | return wrote; | ||
821 | } | ||
822 | |||
823 | /* | ||
824 | * Handle writeback of dirty data for the device backed by this bdi. Also | ||
825 | * wakes up periodically and does kupdated style flushing. | ||
826 | */ | ||
827 | int bdi_writeback_task(struct bdi_writeback *wb) | ||
828 | { | ||
829 | unsigned long last_active = jiffies; | ||
830 | unsigned long wait_jiffies = -1UL; | ||
831 | long pages_written; | ||
832 | |||
833 | while (!kthread_should_stop()) { | ||
834 | pages_written = wb_do_writeback(wb, 0); | ||
835 | |||
836 | if (pages_written) | ||
837 | last_active = jiffies; | ||
838 | else if (wait_jiffies != -1UL) { | ||
839 | unsigned long max_idle; | ||
840 | |||
572 | /* | 841 | /* |
573 | * We hold a reference to 'inode' so it couldn't have | 842 | * Longest period of inactivity that we tolerate. If we |
574 | * been removed from s_inodes list while we dropped the | 843 | * see dirty data again later, the task will get |
575 | * inode_lock. We cannot iput the inode now as we can | 844 | * recreated automatically. |
576 | * be holding the last reference and we cannot iput it | ||
577 | * under inode_lock. So we keep the reference and iput | ||
578 | * it later. | ||
579 | */ | 845 | */ |
580 | iput(old_inode); | 846 | max_idle = max(5UL * 60 * HZ, wait_jiffies); |
581 | old_inode = inode; | 847 | if (time_after(jiffies, max_idle + last_active)) |
848 | break; | ||
849 | } | ||
582 | 850 | ||
583 | filemap_fdatawait(mapping); | 851 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); |
852 | set_current_state(TASK_INTERRUPTIBLE); | ||
853 | schedule_timeout(wait_jiffies); | ||
854 | try_to_freeze(); | ||
855 | } | ||
584 | 856 | ||
585 | cond_resched(); | 857 | return 0; |
858 | } | ||
859 | |||
860 | /* | ||
861 | * Schedule writeback for all backing devices. Expensive! If this is a data | ||
862 | * integrity operation, writeback will be complete when this returns. If | ||
863 | * we are simply called for WB_SYNC_NONE, then writeback will merely be | ||
864 | * scheduled to run. | ||
865 | */ | ||
866 | static void bdi_writeback_all(struct writeback_control *wbc) | ||
867 | { | ||
868 | const bool must_wait = wbc->sync_mode == WB_SYNC_ALL; | ||
869 | struct backing_dev_info *bdi; | ||
870 | struct bdi_work *work; | ||
871 | LIST_HEAD(list); | ||
872 | |||
873 | restart: | ||
874 | spin_lock(&bdi_lock); | ||
875 | |||
876 | list_for_each_entry(bdi, &bdi_list, bdi_list) { | ||
877 | struct bdi_work *work; | ||
586 | 878 | ||
587 | spin_lock(&inode_lock); | 879 | if (!bdi_has_dirty_io(bdi)) |
880 | continue; | ||
881 | |||
882 | /* | ||
883 | * If work allocation fails, do the writes inline. We drop | ||
884 | * the lock and restart the list writeout. This should be OK, | ||
885 | * since this happens rarely and because the writeout should | ||
886 | * eventually make more free memory available. | ||
887 | */ | ||
888 | work = bdi_alloc_work(wbc); | ||
889 | if (!work) { | ||
890 | struct writeback_control __wbc; | ||
891 | |||
892 | /* | ||
893 | * Not a data integrity writeout, just continue | ||
894 | */ | ||
895 | if (!must_wait) | ||
896 | continue; | ||
897 | |||
898 | spin_unlock(&bdi_lock); | ||
899 | __wbc = *wbc; | ||
900 | __wbc.bdi = bdi; | ||
901 | writeback_inodes_wbc(&__wbc); | ||
902 | goto restart; | ||
588 | } | 903 | } |
589 | spin_unlock(&inode_lock); | 904 | if (must_wait) |
590 | iput(old_inode); | 905 | list_add_tail(&work->wait_list, &list); |
591 | } else | 906 | |
592 | spin_unlock(&inode_lock); | 907 | bdi_queue_work(bdi, work); |
908 | } | ||
909 | |||
910 | spin_unlock(&bdi_lock); | ||
593 | 911 | ||
594 | return; /* Leave any unwritten inodes on s_io */ | 912 | /* |
913 | * If this is for WB_SYNC_ALL, wait for pending work to complete | ||
914 | * before returning. | ||
915 | */ | ||
916 | while (!list_empty(&list)) { | ||
917 | work = list_entry(list.next, struct bdi_work, wait_list); | ||
918 | list_del(&work->wait_list); | ||
919 | bdi_wait_on_work_clear(work); | ||
920 | call_rcu(&work->rcu_head, bdi_work_free); | ||
921 | } | ||
595 | } | 922 | } |
596 | EXPORT_SYMBOL_GPL(generic_sync_sb_inodes); | ||
597 | 923 | ||
598 | static void sync_sb_inodes(struct super_block *sb, | 924 | /* |
599 | struct writeback_control *wbc) | 925 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back |
926 | * the whole world. | ||
927 | */ | ||
928 | void wakeup_flusher_threads(long nr_pages) | ||
600 | { | 929 | { |
601 | generic_sync_sb_inodes(sb, wbc); | 930 | struct writeback_control wbc = { |
931 | .sync_mode = WB_SYNC_NONE, | ||
932 | .older_than_this = NULL, | ||
933 | .range_cyclic = 1, | ||
934 | }; | ||
935 | |||
936 | if (nr_pages == 0) | ||
937 | nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
938 | global_page_state(NR_UNSTABLE_NFS); | ||
939 | wbc.nr_to_write = nr_pages; | ||
940 | bdi_writeback_all(&wbc); | ||
602 | } | 941 | } |
603 | 942 | ||
604 | /* | 943 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) |
605 | * Start writeback of dirty pagecache data against all unlocked inodes. | 944 | { |
945 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { | ||
946 | struct dentry *dentry; | ||
947 | const char *name = "?"; | ||
948 | |||
949 | dentry = d_find_alias(inode); | ||
950 | if (dentry) { | ||
951 | spin_lock(&dentry->d_lock); | ||
952 | name = (const char *) dentry->d_name.name; | ||
953 | } | ||
954 | printk(KERN_DEBUG | ||
955 | "%s(%d): dirtied inode %lu (%s) on %s\n", | ||
956 | current->comm, task_pid_nr(current), inode->i_ino, | ||
957 | name, inode->i_sb->s_id); | ||
958 | if (dentry) { | ||
959 | spin_unlock(&dentry->d_lock); | ||
960 | dput(dentry); | ||
961 | } | ||
962 | } | ||
963 | } | ||
964 | |||
965 | /** | ||
966 | * __mark_inode_dirty - internal function | ||
967 | * @inode: inode to mark | ||
968 | * @flags: what kind of dirty (i.e. I_DIRTY_SYNC) | ||
969 | * Mark an inode as dirty. Callers should use mark_inode_dirty or | ||
970 | * mark_inode_dirty_sync. | ||
606 | * | 971 | * |
607 | * Note: | 972 | * Put the inode on the super block's dirty list. |
608 | * We don't need to grab a reference to superblock here. If it has non-empty | 973 | * |
609 | * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed | 974 | * CAREFUL! We mark it dirty unconditionally, but move it onto the |
610 | * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all | 975 | * dirty list only if it is hashed or if it refers to a blockdev. |
611 | * empty. Since __sync_single_inode() regains inode_lock before it finally moves | 976 | * If it was not hashed, it will never be added to the dirty list |
612 | * inode from superblock lists we are OK. | 977 | * even if it is later hashed, as it will have been marked dirty already. |
613 | * | 978 | * |
614 | * If `older_than_this' is non-zero then only flush inodes which have a | 979 | * In short, make sure you hash any inodes _before_ you start marking |
615 | * flushtime older than *older_than_this. | 980 | * them dirty. |
616 | * | 981 | * |
617 | * If `bdi' is non-zero then we will scan the first inode against each | 982 | * This function *must* be atomic for the I_DIRTY_PAGES case - |
618 | * superblock until we find the matching ones. One group will be the dirty | 983 | * set_page_dirty() is called under spinlock in several places. |
619 | * inodes against a filesystem. Then when we hit the dummy blockdev superblock, | 984 | * |
620 | * sync_sb_inodes will seekout the blockdev which matches `bdi'. Maybe not | 985 | * Note that for blockdevs, inode->dirtied_when represents the dirtying time of |
621 | * super-efficient but we're about to do a ton of I/O... | 986 | * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of |
987 | * the kernel-internal blockdev inode represents the dirtying time of the | ||
988 | * blockdev's pages. This is why for I_DIRTY_PAGES we always use | ||
989 | * page->mapping->host, so the page-dirtying time is recorded in the internal | ||
990 | * blockdev inode. | ||
622 | */ | 991 | */ |
623 | void | 992 | void __mark_inode_dirty(struct inode *inode, int flags) |
624 | writeback_inodes(struct writeback_control *wbc) | ||
625 | { | 993 | { |
626 | struct super_block *sb; | 994 | struct super_block *sb = inode->i_sb; |
627 | 995 | ||
628 | might_sleep(); | 996 | /* |
629 | spin_lock(&sb_lock); | 997 | * Don't do this for I_DIRTY_PAGES - that doesn't actually |
630 | restart: | 998 | * dirty the inode itself |
631 | list_for_each_entry_reverse(sb, &super_blocks, s_list) { | 999 | */ |
632 | if (sb_has_dirty_inodes(sb)) { | 1000 | if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
633 | /* we're making our own get_super here */ | 1001 | if (sb->s_op->dirty_inode) |
634 | sb->s_count++; | 1002 | sb->s_op->dirty_inode(inode); |
635 | spin_unlock(&sb_lock); | 1003 | } |
636 | /* | 1004 | |
637 | * If we can't get the readlock, there's no sense in | 1005 | /* |
638 | * waiting around, most of the time the FS is going to | 1006 | * make sure that changes are seen by all cpus before we test i_state |
639 | * be unmounted by the time it is released. | 1007 | * -- mikulas |
640 | */ | 1008 | */ |
641 | if (down_read_trylock(&sb->s_umount)) { | 1009 | smp_mb(); |
642 | if (sb->s_root) | 1010 | |
643 | sync_sb_inodes(sb, wbc); | 1011 | /* avoid the locking if we can */ |
644 | up_read(&sb->s_umount); | 1012 | if ((inode->i_state & flags) == flags) |
1013 | return; | ||
1014 | |||
1015 | if (unlikely(block_dump)) | ||
1016 | block_dump___mark_inode_dirty(inode); | ||
1017 | |||
1018 | spin_lock(&inode_lock); | ||
1019 | if ((inode->i_state & flags) != flags) { | ||
1020 | const int was_dirty = inode->i_state & I_DIRTY; | ||
1021 | |||
1022 | inode->i_state |= flags; | ||
1023 | |||
1024 | /* | ||
1025 | * If the inode is being synced, just update its dirty state. | ||
1026 | * The unlocker will place the inode on the appropriate | ||
1027 | * superblock list, based upon its state. | ||
1028 | */ | ||
1029 | if (inode->i_state & I_SYNC) | ||
1030 | goto out; | ||
1031 | |||
1032 | /* | ||
1033 | * Only add valid (hashed) inodes to the superblock's | ||
1034 | * dirty list. Add blockdev inodes as well. | ||
1035 | */ | ||
1036 | if (!S_ISBLK(inode->i_mode)) { | ||
1037 | if (hlist_unhashed(&inode->i_hash)) | ||
1038 | goto out; | ||
1039 | } | ||
1040 | if (inode->i_state & (I_FREEING|I_CLEAR)) | ||
1041 | goto out; | ||
1042 | |||
1043 | /* | ||
1044 | * If the inode was already on b_dirty/b_io/b_more_io, don't | ||
1045 | * reposition it (that would break b_dirty time-ordering). | ||
1046 | */ | ||
1047 | if (!was_dirty) { | ||
1048 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | ||
1049 | struct backing_dev_info *bdi = wb->bdi; | ||
1050 | |||
1051 | if (bdi_cap_writeback_dirty(bdi) && | ||
1052 | !test_bit(BDI_registered, &bdi->state)) { | ||
1053 | WARN_ON(1); | ||
1054 | printk(KERN_ERR "bdi-%s not registered\n", | ||
1055 | bdi->name); | ||
645 | } | 1056 | } |
646 | spin_lock(&sb_lock); | 1057 | |
647 | if (__put_super_and_need_restart(sb)) | 1058 | inode->dirtied_when = jiffies; |
648 | goto restart; | 1059 | list_move(&inode->i_list, &wb->b_dirty); |
649 | } | 1060 | } |
650 | if (wbc->nr_to_write <= 0) | ||
651 | break; | ||
652 | } | 1061 | } |
653 | spin_unlock(&sb_lock); | 1062 | out: |
1063 | spin_unlock(&inode_lock); | ||
654 | } | 1064 | } |
1065 | EXPORT_SYMBOL(__mark_inode_dirty); | ||
655 | 1066 | ||
656 | /* | 1067 | /* |
657 | * writeback and wait upon the filesystem's dirty inodes. The caller will | 1068 | * Write out a superblock's list of dirty inodes. A wait will be performed |
658 | * do this in two passes - one to write, and one to wait. | 1069 | * upon no inodes, all inodes or the final one, depending upon sync_mode. |
1070 | * | ||
1071 | * If older_than_this is non-NULL, then only write out inodes which | ||
1072 | * had their first dirtying at a time earlier than *older_than_this. | ||
659 | * | 1073 | * |
660 | * A finite limit is set on the number of pages which will be written. | 1074 | * If we're a pdlfush thread, then implement pdflush collision avoidance |
661 | * To prevent infinite livelock of sys_sync(). | 1075 | * against the entire list. |
662 | * | 1076 | * |
663 | * We add in the number of potentially dirty inodes, because each inode write | 1077 | * If `bdi' is non-zero then we're being asked to writeback a specific queue. |
664 | * can dirty pagecache in the underlying blockdev. | 1078 | * This function assumes that the blockdev superblock's inodes are backed by |
1079 | * a variety of queues, so all inodes are searched. For other superblocks, | ||
1080 | * assume that all inodes are backed by the same queue. | ||
1081 | * | ||
1082 | * The inodes to be written are parked on bdi->b_io. They are moved back onto | ||
1083 | * bdi->b_dirty as they are selected for writing. This way, none can be missed | ||
1084 | * on the writer throttling path, and we get decent balancing between many | ||
1085 | * throttled threads: we don't want them all piling up on inode_sync_wait. | ||
665 | */ | 1086 | */ |
666 | void sync_inodes_sb(struct super_block *sb, int wait) | 1087 | static void wait_sb_inodes(struct writeback_control *wbc) |
1088 | { | ||
1089 | struct inode *inode, *old_inode = NULL; | ||
1090 | |||
1091 | /* | ||
1092 | * We need to be protected against the filesystem going from | ||
1093 | * r/o to r/w or vice versa. | ||
1094 | */ | ||
1095 | WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount)); | ||
1096 | |||
1097 | spin_lock(&inode_lock); | ||
1098 | |||
1099 | /* | ||
1100 | * Data integrity sync. Must wait for all pages under writeback, | ||
1101 | * because there may have been pages dirtied before our sync | ||
1102 | * call, but which had writeout started before we write it out. | ||
1103 | * In which case, the inode may not be on the dirty list, but | ||
1104 | * we still have to wait for that writeout. | ||
1105 | */ | ||
1106 | list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) { | ||
1107 | struct address_space *mapping; | ||
1108 | |||
1109 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | ||
1110 | continue; | ||
1111 | mapping = inode->i_mapping; | ||
1112 | if (mapping->nrpages == 0) | ||
1113 | continue; | ||
1114 | __iget(inode); | ||
1115 | spin_unlock(&inode_lock); | ||
1116 | /* | ||
1117 | * We hold a reference to 'inode' so it couldn't have | ||
1118 | * been removed from s_inodes list while we dropped the | ||
1119 | * inode_lock. We cannot iput the inode now as we can | ||
1120 | * be holding the last reference and we cannot iput it | ||
1121 | * under inode_lock. So we keep the reference and iput | ||
1122 | * it later. | ||
1123 | */ | ||
1124 | iput(old_inode); | ||
1125 | old_inode = inode; | ||
1126 | |||
1127 | filemap_fdatawait(mapping); | ||
1128 | |||
1129 | cond_resched(); | ||
1130 | |||
1131 | spin_lock(&inode_lock); | ||
1132 | } | ||
1133 | spin_unlock(&inode_lock); | ||
1134 | iput(old_inode); | ||
1135 | } | ||
1136 | |||
1137 | /** | ||
1138 | * writeback_inodes_sb - writeback dirty inodes from given super_block | ||
1139 | * @sb: the superblock | ||
1140 | * | ||
1141 | * Start writeback on some inodes on this super_block. No guarantees are made | ||
1142 | * on how many (if any) will be written, and this function does not wait | ||
1143 | * for IO completion of submitted IO. The number of pages submitted is | ||
1144 | * returned. | ||
1145 | */ | ||
1146 | long writeback_inodes_sb(struct super_block *sb) | ||
667 | { | 1147 | { |
668 | struct writeback_control wbc = { | 1148 | struct writeback_control wbc = { |
669 | .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, | 1149 | .sb = sb, |
1150 | .sync_mode = WB_SYNC_NONE, | ||
670 | .range_start = 0, | 1151 | .range_start = 0, |
671 | .range_end = LLONG_MAX, | 1152 | .range_end = LLONG_MAX, |
672 | }; | 1153 | }; |
1154 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | ||
1155 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
1156 | long nr_to_write; | ||
673 | 1157 | ||
674 | if (!wait) { | 1158 | nr_to_write = nr_dirty + nr_unstable + |
675 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | ||
676 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | ||
677 | |||
678 | wbc.nr_to_write = nr_dirty + nr_unstable + | ||
679 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 1159 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
680 | } else | ||
681 | wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */ | ||
682 | 1160 | ||
683 | sync_sb_inodes(sb, &wbc); | 1161 | wbc.nr_to_write = nr_to_write; |
1162 | bdi_writeback_all(&wbc); | ||
1163 | return nr_to_write - wbc.nr_to_write; | ||
1164 | } | ||
1165 | EXPORT_SYMBOL(writeback_inodes_sb); | ||
1166 | |||
1167 | /** | ||
1168 | * sync_inodes_sb - sync sb inode pages | ||
1169 | * @sb: the superblock | ||
1170 | * | ||
1171 | * This function writes and waits on any dirty inode belonging to this | ||
1172 | * super_block. The number of pages synced is returned. | ||
1173 | */ | ||
1174 | long sync_inodes_sb(struct super_block *sb) | ||
1175 | { | ||
1176 | struct writeback_control wbc = { | ||
1177 | .sb = sb, | ||
1178 | .sync_mode = WB_SYNC_ALL, | ||
1179 | .range_start = 0, | ||
1180 | .range_end = LLONG_MAX, | ||
1181 | }; | ||
1182 | long nr_to_write = LONG_MAX; /* doesn't actually matter */ | ||
1183 | |||
1184 | wbc.nr_to_write = nr_to_write; | ||
1185 | bdi_writeback_all(&wbc); | ||
1186 | wait_sb_inodes(&wbc); | ||
1187 | return nr_to_write - wbc.nr_to_write; | ||
684 | } | 1188 | } |
1189 | EXPORT_SYMBOL(sync_inodes_sb); | ||
685 | 1190 | ||
686 | /** | 1191 | /** |
687 | * write_inode_now - write an inode to disk | 1192 | * write_inode_now - write an inode to disk |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f91ccc4a189d..4567db6f9430 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -801,6 +801,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) | |||
801 | { | 801 | { |
802 | int err; | 802 | int err; |
803 | 803 | ||
804 | fc->bdi.name = "fuse"; | ||
804 | fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 805 | fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
805 | fc->bdi.unplug_io_fn = default_unplug_io_fn; | 806 | fc->bdi.unplug_io_fn = default_unplug_io_fn; |
806 | /* fuse does it's own writeback accounting */ | 807 | /* fuse does it's own writeback accounting */ |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 23419dc3027b..a7cbfbd340c7 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -386,16 +386,16 @@ static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) | |||
386 | #define GDLM_ATTR(_name,_mode,_show,_store) \ | 386 | #define GDLM_ATTR(_name,_mode,_show,_store) \ |
387 | static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) | 387 | static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) |
388 | 388 | ||
389 | GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); | 389 | GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); |
390 | GDLM_ATTR(block, 0644, block_show, block_store); | 390 | GDLM_ATTR(block, 0644, block_show, block_store); |
391 | GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); | 391 | GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); |
392 | GDLM_ATTR(id, 0444, lkid_show, NULL); | 392 | GDLM_ATTR(id, 0444, lkid_show, NULL); |
393 | GDLM_ATTR(jid, 0444, jid_show, NULL); | 393 | GDLM_ATTR(jid, 0444, jid_show, NULL); |
394 | GDLM_ATTR(first, 0444, lkfirst_show, NULL); | 394 | GDLM_ATTR(first, 0444, lkfirst_show, NULL); |
395 | GDLM_ATTR(first_done, 0444, first_done_show, NULL); | 395 | GDLM_ATTR(first_done, 0444, first_done_show, NULL); |
396 | GDLM_ATTR(recover, 0200, NULL, recover_store); | 396 | GDLM_ATTR(recover, 0600, NULL, recover_store); |
397 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); | 397 | GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); |
398 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); | 398 | GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); |
399 | 399 | ||
400 | static struct attribute *lock_module_attrs[] = { | 400 | static struct attribute *lock_module_attrs[] = { |
401 | &gdlm_attr_proto_name.attr, | 401 | &gdlm_attr_proto_name.attr, |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 941c8425c10b..a93b885311d8 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -44,6 +44,7 @@ static const struct inode_operations hugetlbfs_dir_inode_operations; | |||
44 | static const struct inode_operations hugetlbfs_inode_operations; | 44 | static const struct inode_operations hugetlbfs_inode_operations; |
45 | 45 | ||
46 | static struct backing_dev_info hugetlbfs_backing_dev_info = { | 46 | static struct backing_dev_info hugetlbfs_backing_dev_info = { |
47 | .name = "hugetlbfs", | ||
47 | .ra_pages = 0, /* No readahead */ | 48 | .ra_pages = 0, /* No readahead */ |
48 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | 49 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
49 | }; | 50 | }; |
@@ -935,26 +936,28 @@ static int can_do_hugetlb_shm(void) | |||
935 | return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); | 936 | return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); |
936 | } | 937 | } |
937 | 938 | ||
938 | struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag) | 939 | struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, |
940 | struct user_struct **user) | ||
939 | { | 941 | { |
940 | int error = -ENOMEM; | 942 | int error = -ENOMEM; |
941 | int unlock_shm = 0; | ||
942 | struct file *file; | 943 | struct file *file; |
943 | struct inode *inode; | 944 | struct inode *inode; |
944 | struct dentry *dentry, *root; | 945 | struct dentry *dentry, *root; |
945 | struct qstr quick_string; | 946 | struct qstr quick_string; |
946 | struct user_struct *user = current_user(); | ||
947 | 947 | ||
948 | *user = NULL; | ||
948 | if (!hugetlbfs_vfsmount) | 949 | if (!hugetlbfs_vfsmount) |
949 | return ERR_PTR(-ENOENT); | 950 | return ERR_PTR(-ENOENT); |
950 | 951 | ||
951 | if (!can_do_hugetlb_shm()) { | 952 | if (!can_do_hugetlb_shm()) { |
952 | if (user_shm_lock(size, user)) { | 953 | *user = current_user(); |
953 | unlock_shm = 1; | 954 | if (user_shm_lock(size, *user)) { |
954 | WARN_ONCE(1, | 955 | WARN_ONCE(1, |
955 | "Using mlock ulimits for SHM_HUGETLB deprecated\n"); | 956 | "Using mlock ulimits for SHM_HUGETLB deprecated\n"); |
956 | } else | 957 | } else { |
958 | *user = NULL; | ||
957 | return ERR_PTR(-EPERM); | 959 | return ERR_PTR(-EPERM); |
960 | } | ||
958 | } | 961 | } |
959 | 962 | ||
960 | root = hugetlbfs_vfsmount->mnt_root; | 963 | root = hugetlbfs_vfsmount->mnt_root; |
@@ -996,8 +999,10 @@ out_inode: | |||
996 | out_dentry: | 999 | out_dentry: |
997 | dput(dentry); | 1000 | dput(dentry); |
998 | out_shm_unlock: | 1001 | out_shm_unlock: |
999 | if (unlock_shm) | 1002 | if (*user) { |
1000 | user_shm_unlock(size, user); | 1003 | user_shm_unlock(size, *user); |
1004 | *user = NULL; | ||
1005 | } | ||
1001 | return ERR_PTR(error); | 1006 | return ERR_PTR(error); |
1002 | } | 1007 | } |
1003 | 1008 | ||
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 8fcb6239218e..7edb62e97419 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c | |||
@@ -258,7 +258,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |||
258 | return rc; | 258 | return rc; |
259 | } | 259 | } |
260 | 260 | ||
261 | static int jffs2_check_acl(struct inode *inode, int mask) | 261 | int jffs2_check_acl(struct inode *inode, int mask) |
262 | { | 262 | { |
263 | struct posix_acl *acl; | 263 | struct posix_acl *acl; |
264 | int rc; | 264 | int rc; |
@@ -274,11 +274,6 @@ static int jffs2_check_acl(struct inode *inode, int mask) | |||
274 | return -EAGAIN; | 274 | return -EAGAIN; |
275 | } | 275 | } |
276 | 276 | ||
277 | int jffs2_permission(struct inode *inode, int mask) | ||
278 | { | ||
279 | return generic_permission(inode, mask, jffs2_check_acl); | ||
280 | } | ||
281 | |||
282 | int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) | 277 | int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) |
283 | { | 278 | { |
284 | struct posix_acl *acl, *clone; | 279 | struct posix_acl *acl, *clone; |
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index fc929f2a14f6..f0ba63e3c36b 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h | |||
@@ -26,7 +26,7 @@ struct jffs2_acl_header { | |||
26 | 26 | ||
27 | #ifdef CONFIG_JFFS2_FS_POSIX_ACL | 27 | #ifdef CONFIG_JFFS2_FS_POSIX_ACL |
28 | 28 | ||
29 | extern int jffs2_permission(struct inode *, int); | 29 | extern int jffs2_check_acl(struct inode *, int); |
30 | extern int jffs2_acl_chmod(struct inode *); | 30 | extern int jffs2_acl_chmod(struct inode *); |
31 | extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); | 31 | extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); |
32 | extern int jffs2_init_acl_post(struct inode *); | 32 | extern int jffs2_init_acl_post(struct inode *); |
@@ -36,7 +36,7 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler; | |||
36 | 36 | ||
37 | #else | 37 | #else |
38 | 38 | ||
39 | #define jffs2_permission (NULL) | 39 | #define jffs2_check_acl (NULL) |
40 | #define jffs2_acl_chmod(inode) (0) | 40 | #define jffs2_acl_chmod(inode) (0) |
41 | #define jffs2_init_acl_pre(dir_i,inode,mode) (0) | 41 | #define jffs2_init_acl_pre(dir_i,inode,mode) (0) |
42 | #define jffs2_init_acl_post(inode) (0) | 42 | #define jffs2_init_acl_post(inode) (0) |
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 6f60cc910f4c..7aa4417e085f 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c | |||
@@ -55,7 +55,7 @@ const struct inode_operations jffs2_dir_inode_operations = | |||
55 | .rmdir = jffs2_rmdir, | 55 | .rmdir = jffs2_rmdir, |
56 | .mknod = jffs2_mknod, | 56 | .mknod = jffs2_mknod, |
57 | .rename = jffs2_rename, | 57 | .rename = jffs2_rename, |
58 | .permission = jffs2_permission, | 58 | .check_acl = jffs2_check_acl, |
59 | .setattr = jffs2_setattr, | 59 | .setattr = jffs2_setattr, |
60 | .setxattr = jffs2_setxattr, | 60 | .setxattr = jffs2_setxattr, |
61 | .getxattr = jffs2_getxattr, | 61 | .getxattr = jffs2_getxattr, |
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 23c947539864..b7b74e299142 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c | |||
@@ -56,7 +56,7 @@ const struct file_operations jffs2_file_operations = | |||
56 | 56 | ||
57 | const struct inode_operations jffs2_file_inode_operations = | 57 | const struct inode_operations jffs2_file_inode_operations = |
58 | { | 58 | { |
59 | .permission = jffs2_permission, | 59 | .check_acl = jffs2_check_acl, |
60 | .setattr = jffs2_setattr, | 60 | .setattr = jffs2_setattr, |
61 | .setxattr = jffs2_setxattr, | 61 | .setxattr = jffs2_setxattr, |
62 | .getxattr = jffs2_getxattr, | 62 | .getxattr = jffs2_getxattr, |
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c index b7339c3b6ad9..4ec11e8bda8c 100644 --- a/fs/jffs2/symlink.c +++ b/fs/jffs2/symlink.c | |||
@@ -21,7 +21,7 @@ const struct inode_operations jffs2_symlink_inode_operations = | |||
21 | { | 21 | { |
22 | .readlink = generic_readlink, | 22 | .readlink = generic_readlink, |
23 | .follow_link = jffs2_follow_link, | 23 | .follow_link = jffs2_follow_link, |
24 | .permission = jffs2_permission, | 24 | .check_acl = jffs2_check_acl, |
25 | .setattr = jffs2_setattr, | 25 | .setattr = jffs2_setattr, |
26 | .setxattr = jffs2_setxattr, | 26 | .setxattr = jffs2_setxattr, |
27 | .getxattr = jffs2_getxattr, | 27 | .getxattr = jffs2_getxattr, |
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index d9a721e6db70..5ef7bac265e5 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c | |||
@@ -1268,10 +1268,20 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) { | |||
1268 | if (!c->wbuf) | 1268 | if (!c->wbuf) |
1269 | return -ENOMEM; | 1269 | return -ENOMEM; |
1270 | 1270 | ||
1271 | #ifdef CONFIG_JFFS2_FS_WBUF_VERIFY | ||
1272 | c->wbuf_verify = kmalloc(c->wbuf_pagesize, GFP_KERNEL); | ||
1273 | if (!c->wbuf_verify) { | ||
1274 | kfree(c->wbuf); | ||
1275 | return -ENOMEM; | ||
1276 | } | ||
1277 | #endif | ||
1271 | return 0; | 1278 | return 0; |
1272 | } | 1279 | } |
1273 | 1280 | ||
1274 | void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c) { | 1281 | void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c) { |
1282 | #ifdef CONFIG_JFFS2_FS_WBUF_VERIFY | ||
1283 | kfree(c->wbuf_verify); | ||
1284 | #endif | ||
1275 | kfree(c->wbuf); | 1285 | kfree(c->wbuf); |
1276 | } | 1286 | } |
1277 | 1287 | ||
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index a29c7c3e3fb8..d66477c34306 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c | |||
@@ -114,7 +114,7 @@ out: | |||
114 | return rc; | 114 | return rc; |
115 | } | 115 | } |
116 | 116 | ||
117 | static int jfs_check_acl(struct inode *inode, int mask) | 117 | int jfs_check_acl(struct inode *inode, int mask) |
118 | { | 118 | { |
119 | struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); | 119 | struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); |
120 | 120 | ||
@@ -129,11 +129,6 @@ static int jfs_check_acl(struct inode *inode, int mask) | |||
129 | return -EAGAIN; | 129 | return -EAGAIN; |
130 | } | 130 | } |
131 | 131 | ||
132 | int jfs_permission(struct inode *inode, int mask) | ||
133 | { | ||
134 | return generic_permission(inode, mask, jfs_check_acl); | ||
135 | } | ||
136 | |||
137 | int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) | 132 | int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) |
138 | { | 133 | { |
139 | struct posix_acl *acl = NULL; | 134 | struct posix_acl *acl = NULL; |
diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 7f6063acaa3b..2b70fa78e4a7 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c | |||
@@ -96,7 +96,7 @@ const struct inode_operations jfs_file_inode_operations = { | |||
96 | .removexattr = jfs_removexattr, | 96 | .removexattr = jfs_removexattr, |
97 | #ifdef CONFIG_JFS_POSIX_ACL | 97 | #ifdef CONFIG_JFS_POSIX_ACL |
98 | .setattr = jfs_setattr, | 98 | .setattr = jfs_setattr, |
99 | .permission = jfs_permission, | 99 | .check_acl = jfs_check_acl, |
100 | #endif | 100 | #endif |
101 | }; | 101 | }; |
102 | 102 | ||
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index 88475f10a389..b07bd417ef85 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h | |||
@@ -20,7 +20,7 @@ | |||
20 | 20 | ||
21 | #ifdef CONFIG_JFS_POSIX_ACL | 21 | #ifdef CONFIG_JFS_POSIX_ACL |
22 | 22 | ||
23 | int jfs_permission(struct inode *, int); | 23 | int jfs_check_acl(struct inode *, int); |
24 | int jfs_init_acl(tid_t, struct inode *, struct inode *); | 24 | int jfs_init_acl(tid_t, struct inode *, struct inode *); |
25 | int jfs_setattr(struct dentry *, struct iattr *); | 25 | int jfs_setattr(struct dentry *, struct iattr *); |
26 | 26 | ||
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 514ee2edb92a..c79a4270f083 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -1543,7 +1543,7 @@ const struct inode_operations jfs_dir_inode_operations = { | |||
1543 | .removexattr = jfs_removexattr, | 1543 | .removexattr = jfs_removexattr, |
1544 | #ifdef CONFIG_JFS_POSIX_ACL | 1544 | #ifdef CONFIG_JFS_POSIX_ACL |
1545 | .setattr = jfs_setattr, | 1545 | .setattr = jfs_setattr, |
1546 | .permission = jfs_permission, | 1546 | .check_acl = jfs_check_acl, |
1547 | #endif | 1547 | #endif |
1548 | }; | 1548 | }; |
1549 | 1549 | ||
diff --git a/fs/libfs.c b/fs/libfs.c index ddfa89948c3f..dcec3d3ea64f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -217,7 +217,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, | |||
217 | return PTR_ERR(s); | 217 | return PTR_ERR(s); |
218 | 218 | ||
219 | s->s_flags = MS_NOUSER; | 219 | s->s_flags = MS_NOUSER; |
220 | s->s_maxbytes = ~0ULL; | 220 | s->s_maxbytes = MAX_LFS_FILESIZE; |
221 | s->s_blocksize = PAGE_SIZE; | 221 | s->s_blocksize = PAGE_SIZE; |
222 | s->s_blocksize_bits = PAGE_SHIFT; | 222 | s->s_blocksize_bits = PAGE_SHIFT; |
223 | s->s_magic = magic; | 223 | s->s_magic = magic; |
diff --git a/fs/locks.c b/fs/locks.c index b6440f52178f..52366e877d76 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -1591,7 +1591,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) | |||
1591 | if (can_sleep) | 1591 | if (can_sleep) |
1592 | lock->fl_flags |= FL_SLEEP; | 1592 | lock->fl_flags |= FL_SLEEP; |
1593 | 1593 | ||
1594 | error = security_file_lock(filp, cmd); | 1594 | error = security_file_lock(filp, lock->fl_type); |
1595 | if (error) | 1595 | if (error) |
1596 | goto out_free; | 1596 | goto out_free; |
1597 | 1597 | ||
diff --git a/fs/namei.c b/fs/namei.c index f3c5b278895a..d11f404667e9 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -169,19 +169,10 @@ void putname(const char *name) | |||
169 | EXPORT_SYMBOL(putname); | 169 | EXPORT_SYMBOL(putname); |
170 | #endif | 170 | #endif |
171 | 171 | ||
172 | 172 | /* | |
173 | /** | 173 | * This does basic POSIX ACL permission checking |
174 | * generic_permission - check for access rights on a Posix-like filesystem | ||
175 | * @inode: inode to check access rights for | ||
176 | * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) | ||
177 | * @check_acl: optional callback to check for Posix ACLs | ||
178 | * | ||
179 | * Used to check for read/write/execute permissions on a file. | ||
180 | * We use "fsuid" for this, letting us set arbitrary permissions | ||
181 | * for filesystem access without changing the "normal" uids which | ||
182 | * are used for other things.. | ||
183 | */ | 174 | */ |
184 | int generic_permission(struct inode *inode, int mask, | 175 | static int acl_permission_check(struct inode *inode, int mask, |
185 | int (*check_acl)(struct inode *inode, int mask)) | 176 | int (*check_acl)(struct inode *inode, int mask)) |
186 | { | 177 | { |
187 | umode_t mode = inode->i_mode; | 178 | umode_t mode = inode->i_mode; |
@@ -193,9 +184,7 @@ int generic_permission(struct inode *inode, int mask, | |||
193 | else { | 184 | else { |
194 | if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { | 185 | if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { |
195 | int error = check_acl(inode, mask); | 186 | int error = check_acl(inode, mask); |
196 | if (error == -EACCES) | 187 | if (error != -EAGAIN) |
197 | goto check_capabilities; | ||
198 | else if (error != -EAGAIN) | ||
199 | return error; | 188 | return error; |
200 | } | 189 | } |
201 | 190 | ||
@@ -208,8 +197,32 @@ int generic_permission(struct inode *inode, int mask, | |||
208 | */ | 197 | */ |
209 | if ((mask & ~mode) == 0) | 198 | if ((mask & ~mode) == 0) |
210 | return 0; | 199 | return 0; |
200 | return -EACCES; | ||
201 | } | ||
202 | |||
203 | /** | ||
204 | * generic_permission - check for access rights on a Posix-like filesystem | ||
205 | * @inode: inode to check access rights for | ||
206 | * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) | ||
207 | * @check_acl: optional callback to check for Posix ACLs | ||
208 | * | ||
209 | * Used to check for read/write/execute permissions on a file. | ||
210 | * We use "fsuid" for this, letting us set arbitrary permissions | ||
211 | * for filesystem access without changing the "normal" uids which | ||
212 | * are used for other things.. | ||
213 | */ | ||
214 | int generic_permission(struct inode *inode, int mask, | ||
215 | int (*check_acl)(struct inode *inode, int mask)) | ||
216 | { | ||
217 | int ret; | ||
218 | |||
219 | /* | ||
220 | * Do the basic POSIX ACL permission checks. | ||
221 | */ | ||
222 | ret = acl_permission_check(inode, mask, check_acl); | ||
223 | if (ret != -EACCES) | ||
224 | return ret; | ||
211 | 225 | ||
212 | check_capabilities: | ||
213 | /* | 226 | /* |
214 | * Read/write DACs are always overridable. | 227 | * Read/write DACs are always overridable. |
215 | * Executable DACs are overridable if at least one exec bit is set. | 228 | * Executable DACs are overridable if at least one exec bit is set. |
@@ -262,7 +275,7 @@ int inode_permission(struct inode *inode, int mask) | |||
262 | if (inode->i_op->permission) | 275 | if (inode->i_op->permission) |
263 | retval = inode->i_op->permission(inode, mask); | 276 | retval = inode->i_op->permission(inode, mask); |
264 | else | 277 | else |
265 | retval = generic_permission(inode, mask, NULL); | 278 | retval = generic_permission(inode, mask, inode->i_op->check_acl); |
266 | 279 | ||
267 | if (retval) | 280 | if (retval) |
268 | return retval; | 281 | return retval; |
@@ -432,29 +445,22 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, | |||
432 | */ | 445 | */ |
433 | static int exec_permission_lite(struct inode *inode) | 446 | static int exec_permission_lite(struct inode *inode) |
434 | { | 447 | { |
435 | umode_t mode = inode->i_mode; | 448 | int ret; |
436 | 449 | ||
437 | if (inode->i_op->permission) | 450 | if (inode->i_op->permission) { |
438 | return -EAGAIN; | 451 | ret = inode->i_op->permission(inode, MAY_EXEC); |
439 | 452 | if (!ret) | |
440 | if (current_fsuid() == inode->i_uid) | 453 | goto ok; |
441 | mode >>= 6; | 454 | return ret; |
442 | else if (in_group_p(inode->i_gid)) | 455 | } |
443 | mode >>= 3; | 456 | ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl); |
444 | 457 | if (!ret) | |
445 | if (mode & MAY_EXEC) | ||
446 | goto ok; | ||
447 | |||
448 | if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE)) | ||
449 | goto ok; | ||
450 | |||
451 | if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE)) | ||
452 | goto ok; | 458 | goto ok; |
453 | 459 | ||
454 | if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) | 460 | if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) |
455 | goto ok; | 461 | goto ok; |
456 | 462 | ||
457 | return -EACCES; | 463 | return ret; |
458 | ok: | 464 | ok: |
459 | return security_inode_permission(inode, MAY_EXEC); | 465 | return security_inode_permission(inode, MAY_EXEC); |
460 | } | 466 | } |
@@ -853,12 +859,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd) | |||
853 | 859 | ||
854 | nd->flags |= LOOKUP_CONTINUE; | 860 | nd->flags |= LOOKUP_CONTINUE; |
855 | err = exec_permission_lite(inode); | 861 | err = exec_permission_lite(inode); |
856 | if (err == -EAGAIN) | ||
857 | err = inode_permission(nd->path.dentry->d_inode, | ||
858 | MAY_EXEC); | ||
859 | if (!err) | ||
860 | err = ima_path_check(&nd->path, MAY_EXEC, | ||
861 | IMA_COUNT_UPDATE); | ||
862 | if (err) | 862 | if (err) |
863 | break; | 863 | break; |
864 | 864 | ||
@@ -1533,37 +1533,42 @@ int may_open(struct path *path, int acc_mode, int flag) | |||
1533 | if (error) | 1533 | if (error) |
1534 | return error; | 1534 | return error; |
1535 | 1535 | ||
1536 | error = ima_path_check(path, | 1536 | error = ima_path_check(path, acc_mode ? |
1537 | acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC), | 1537 | acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) : |
1538 | ACC_MODE(flag) & (MAY_READ | MAY_WRITE), | ||
1538 | IMA_COUNT_UPDATE); | 1539 | IMA_COUNT_UPDATE); |
1540 | |||
1539 | if (error) | 1541 | if (error) |
1540 | return error; | 1542 | return error; |
1541 | /* | 1543 | /* |
1542 | * An append-only file must be opened in append mode for writing. | 1544 | * An append-only file must be opened in append mode for writing. |
1543 | */ | 1545 | */ |
1544 | if (IS_APPEND(inode)) { | 1546 | if (IS_APPEND(inode)) { |
1547 | error = -EPERM; | ||
1545 | if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) | 1548 | if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) |
1546 | return -EPERM; | 1549 | goto err_out; |
1547 | if (flag & O_TRUNC) | 1550 | if (flag & O_TRUNC) |
1548 | return -EPERM; | 1551 | goto err_out; |
1549 | } | 1552 | } |
1550 | 1553 | ||
1551 | /* O_NOATIME can only be set by the owner or superuser */ | 1554 | /* O_NOATIME can only be set by the owner or superuser */ |
1552 | if (flag & O_NOATIME) | 1555 | if (flag & O_NOATIME) |
1553 | if (!is_owner_or_cap(inode)) | 1556 | if (!is_owner_or_cap(inode)) { |
1554 | return -EPERM; | 1557 | error = -EPERM; |
1558 | goto err_out; | ||
1559 | } | ||
1555 | 1560 | ||
1556 | /* | 1561 | /* |
1557 | * Ensure there are no outstanding leases on the file. | 1562 | * Ensure there are no outstanding leases on the file. |
1558 | */ | 1563 | */ |
1559 | error = break_lease(inode, flag); | 1564 | error = break_lease(inode, flag); |
1560 | if (error) | 1565 | if (error) |
1561 | return error; | 1566 | goto err_out; |
1562 | 1567 | ||
1563 | if (flag & O_TRUNC) { | 1568 | if (flag & O_TRUNC) { |
1564 | error = get_write_access(inode); | 1569 | error = get_write_access(inode); |
1565 | if (error) | 1570 | if (error) |
1566 | return error; | 1571 | goto err_out; |
1567 | 1572 | ||
1568 | /* | 1573 | /* |
1569 | * Refuse to truncate files with mandatory locks held on them. | 1574 | * Refuse to truncate files with mandatory locks held on them. |
@@ -1581,12 +1586,17 @@ int may_open(struct path *path, int acc_mode, int flag) | |||
1581 | } | 1586 | } |
1582 | put_write_access(inode); | 1587 | put_write_access(inode); |
1583 | if (error) | 1588 | if (error) |
1584 | return error; | 1589 | goto err_out; |
1585 | } else | 1590 | } else |
1586 | if (flag & FMODE_WRITE) | 1591 | if (flag & FMODE_WRITE) |
1587 | vfs_dq_init(inode); | 1592 | vfs_dq_init(inode); |
1588 | 1593 | ||
1589 | return 0; | 1594 | return 0; |
1595 | err_out: | ||
1596 | ima_counts_put(path, acc_mode ? | ||
1597 | acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) : | ||
1598 | ACC_MODE(flag) & (MAY_READ | MAY_WRITE)); | ||
1599 | return error; | ||
1590 | } | 1600 | } |
1591 | 1601 | ||
1592 | /* | 1602 | /* |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d36925f9b952..e350bd6a2334 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -882,6 +882,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo * | |||
882 | server->rsize = NFS_MAX_FILE_IO_SIZE; | 882 | server->rsize = NFS_MAX_FILE_IO_SIZE; |
883 | server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 883 | server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
884 | 884 | ||
885 | server->backing_dev_info.name = "nfs"; | ||
885 | server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; | 886 | server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; |
886 | 887 | ||
887 | if (server->wsize > max_rpc_payload) | 888 | if (server->wsize > max_rpc_payload) |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 65ca8c18476f..1434080aefeb 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1250,8 +1250,8 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
1250 | continue; | 1250 | continue; |
1251 | } | 1251 | } |
1252 | /* Initialize or reset the session */ | 1252 | /* Initialize or reset the session */ |
1253 | if (nfs4_has_session(clp) && | 1253 | if (test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state) |
1254 | test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) { | 1254 | && nfs4_has_session(clp)) { |
1255 | if (clp->cl_cons_state == NFS_CS_SESSION_INITING) | 1255 | if (clp->cl_cons_state == NFS_CS_SESSION_INITING) |
1256 | status = nfs4_initialize_session(clp); | 1256 | status = nfs4_initialize_session(clp); |
1257 | else | 1257 | else |
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 5573508f707f..36fcabbf5186 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c | |||
@@ -34,6 +34,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) | |||
34 | int flags = nfsexp_flags(rqstp, exp); | 34 | int flags = nfsexp_flags(rqstp, exp); |
35 | int ret; | 35 | int ret; |
36 | 36 | ||
37 | validate_process_creds(); | ||
38 | |||
37 | /* discard any old override before preparing the new set */ | 39 | /* discard any old override before preparing the new set */ |
38 | revert_creds(get_cred(current->real_cred)); | 40 | revert_creds(get_cred(current->real_cred)); |
39 | new = prepare_creds(); | 41 | new = prepare_creds(); |
@@ -86,8 +88,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) | |||
86 | else | 88 | else |
87 | new->cap_effective = cap_raise_nfsd_set(new->cap_effective, | 89 | new->cap_effective = cap_raise_nfsd_set(new->cap_effective, |
88 | new->cap_permitted); | 90 | new->cap_permitted); |
91 | validate_process_creds(); | ||
89 | put_cred(override_creds(new)); | 92 | put_cred(override_creds(new)); |
90 | put_cred(new); | 93 | put_cred(new); |
94 | validate_process_creds(); | ||
91 | return 0; | 95 | return 0; |
92 | 96 | ||
93 | oom: | 97 | oom: |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 492c79b7800b..24d58adfe5fd 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -496,7 +496,9 @@ nfsd(void *vrqstp) | |||
496 | /* Lock the export hash tables for reading. */ | 496 | /* Lock the export hash tables for reading. */ |
497 | exp_readlock(); | 497 | exp_readlock(); |
498 | 498 | ||
499 | validate_process_creds(); | ||
499 | svc_process(rqstp); | 500 | svc_process(rqstp); |
501 | validate_process_creds(); | ||
500 | 502 | ||
501 | /* Unlock export hash tables */ | 503 | /* Unlock export hash tables */ |
502 | exp_readunlock(); | 504 | exp_readunlock(); |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 23341c1063bc..8fa09bfbcba7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -684,6 +684,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, | |||
684 | __be32 err; | 684 | __be32 err; |
685 | int host_err; | 685 | int host_err; |
686 | 686 | ||
687 | validate_process_creds(); | ||
688 | |||
687 | /* | 689 | /* |
688 | * If we get here, then the client has already done an "open", | 690 | * If we get here, then the client has already done an "open", |
689 | * and (hopefully) checked permission - so allow OWNER_OVERRIDE | 691 | * and (hopefully) checked permission - so allow OWNER_OVERRIDE |
@@ -740,6 +742,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, | |||
740 | out_nfserr: | 742 | out_nfserr: |
741 | err = nfserrno(host_err); | 743 | err = nfserrno(host_err); |
742 | out: | 744 | out: |
745 | validate_process_creds(); | ||
743 | return err; | 746 | return err; |
744 | } | 747 | } |
745 | 748 | ||
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 7e0b61be212e..c668bca579c1 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c | |||
@@ -209,6 +209,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, | |||
209 | * We cannot call radix_tree_preload for the kernels older | 209 | * We cannot call radix_tree_preload for the kernels older |
210 | * than 2.6.23, because it is not exported for modules. | 210 | * than 2.6.23, because it is not exported for modules. |
211 | */ | 211 | */ |
212 | retry: | ||
212 | err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | 213 | err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); |
213 | if (err) | 214 | if (err) |
214 | goto failed_unlock; | 215 | goto failed_unlock; |
@@ -219,7 +220,6 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, | |||
219 | (unsigned long long)oldkey, | 220 | (unsigned long long)oldkey, |
220 | (unsigned long long)newkey); | 221 | (unsigned long long)newkey); |
221 | 222 | ||
222 | retry: | ||
223 | spin_lock_irq(&btnc->tree_lock); | 223 | spin_lock_irq(&btnc->tree_lock); |
224 | err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); | 224 | err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); |
225 | spin_unlock_irq(&btnc->tree_lock); | 225 | spin_unlock_irq(&btnc->tree_lock); |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 8e2ec43b18f4..151964f0de4c 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -416,8 +416,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) | |||
416 | if (unlikely(err)) | 416 | if (unlikely(err)) |
417 | goto failed; | 417 | goto failed; |
418 | 418 | ||
419 | down_read(&nilfs->ns_segctor_sem); | ||
419 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, | 420 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, |
420 | &bh_cp); | 421 | &bh_cp); |
422 | up_read(&nilfs->ns_segctor_sem); | ||
421 | if (unlikely(err)) { | 423 | if (unlikely(err)) { |
422 | if (err == -ENOENT || err == -EINVAL) { | 424 | if (err == -ENOENT || err == -EINVAL) { |
423 | printk(KERN_ERR | 425 | printk(KERN_ERR |
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index e8adbffc626f..1b9caafb8662 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h | |||
@@ -253,7 +253,7 @@ nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | |||
253 | 253 | ||
254 | static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) | 254 | static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) |
255 | { | 255 | { |
256 | if (!atomic_dec_and_test(&sbi->s_count)) | 256 | if (atomic_dec_and_test(&sbi->s_count)) |
257 | kfree(sbi); | 257 | kfree(sbi); |
258 | } | 258 | } |
259 | 259 | ||
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 47cd258fd24d..c9ee67b442e1 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c | |||
@@ -62,13 +62,14 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev | |||
62 | event_priv->wd = wd; | 62 | event_priv->wd = wd; |
63 | 63 | ||
64 | ret = fsnotify_add_notify_event(group, event, fsn_event_priv); | 64 | ret = fsnotify_add_notify_event(group, event, fsn_event_priv); |
65 | /* EEXIST is not an error */ | 65 | if (ret) { |
66 | if (ret == -EEXIST) | ||
67 | ret = 0; | ||
68 | |||
69 | /* did event_priv get attached? */ | ||
70 | if (list_empty(&fsn_event_priv->event_list)) | ||
71 | inotify_free_event_priv(fsn_event_priv); | 66 | inotify_free_event_priv(fsn_event_priv); |
67 | /* EEXIST says we tail matched, EOVERFLOW isn't something | ||
68 | * to report up the stack. */ | ||
69 | if ((ret == -EEXIST) || | ||
70 | (ret == -EOVERFLOW)) | ||
71 | ret = 0; | ||
72 | } | ||
72 | 73 | ||
73 | /* | 74 | /* |
74 | * If we hold the entry until after the event is on the queue | 75 | * If we hold the entry until after the event is on the queue |
@@ -104,16 +105,45 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode | |||
104 | return send; | 105 | return send; |
105 | } | 106 | } |
106 | 107 | ||
108 | /* | ||
109 | * This is NEVER supposed to be called. Inotify marks should either have been | ||
110 | * removed from the idr when the watch was removed or in the | ||
111 | * fsnotify_destroy_mark_by_group() call when the inotify instance was being | ||
112 | * torn down. This is only called if the idr is about to be freed but there | ||
113 | * are still marks in it. | ||
114 | */ | ||
107 | static int idr_callback(int id, void *p, void *data) | 115 | static int idr_callback(int id, void *p, void *data) |
108 | { | 116 | { |
109 | BUG(); | 117 | struct fsnotify_mark_entry *entry; |
118 | struct inotify_inode_mark_entry *ientry; | ||
119 | static bool warned = false; | ||
120 | |||
121 | if (warned) | ||
122 | return 0; | ||
123 | |||
124 | warned = false; | ||
125 | entry = p; | ||
126 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | ||
127 | |||
128 | WARN(1, "inotify closing but id=%d for entry=%p in group=%p still in " | ||
129 | "idr. Probably leaking memory\n", id, p, data); | ||
130 | |||
131 | /* | ||
132 | * I'm taking the liberty of assuming that the mark in question is a | ||
133 | * valid address and I'm dereferencing it. This might help to figure | ||
134 | * out why we got here and the panic is no worse than the original | ||
135 | * BUG() that was here. | ||
136 | */ | ||
137 | if (entry) | ||
138 | printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n", | ||
139 | entry->group, entry->inode, ientry->wd); | ||
110 | return 0; | 140 | return 0; |
111 | } | 141 | } |
112 | 142 | ||
113 | static void inotify_free_group_priv(struct fsnotify_group *group) | 143 | static void inotify_free_group_priv(struct fsnotify_group *group) |
114 | { | 144 | { |
115 | /* ideally the idr is empty and we won't hit the BUG in teh callback */ | 145 | /* ideally the idr is empty and we won't hit the BUG in teh callback */ |
116 | idr_for_each(&group->inotify_data.idr, idr_callback, NULL); | 146 | idr_for_each(&group->inotify_data.idr, idr_callback, group); |
117 | idr_remove_all(&group->inotify_data.idr); | 147 | idr_remove_all(&group->inotify_data.idr); |
118 | idr_destroy(&group->inotify_data.idr); | 148 | idr_destroy(&group->inotify_data.idr); |
119 | } | 149 | } |
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index f30d9bbc2e1b..dcd2040d330c 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c | |||
@@ -47,9 +47,6 @@ | |||
47 | 47 | ||
48 | static struct vfsmount *inotify_mnt __read_mostly; | 48 | static struct vfsmount *inotify_mnt __read_mostly; |
49 | 49 | ||
50 | /* this just sits here and wastes global memory. used to just pad userspace messages with zeros */ | ||
51 | static struct inotify_event nul_inotify_event; | ||
52 | |||
53 | /* these are configurable via /proc/sys/fs/inotify/ */ | 50 | /* these are configurable via /proc/sys/fs/inotify/ */ |
54 | static int inotify_max_user_instances __read_mostly; | 51 | static int inotify_max_user_instances __read_mostly; |
55 | static int inotify_max_queued_events __read_mostly; | 52 | static int inotify_max_queued_events __read_mostly; |
@@ -157,7 +154,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, | |||
157 | 154 | ||
158 | event = fsnotify_peek_notify_event(group); | 155 | event = fsnotify_peek_notify_event(group); |
159 | 156 | ||
160 | event_size += roundup(event->name_len, event_size); | 157 | if (event->name_len) |
158 | event_size += roundup(event->name_len + 1, event_size); | ||
161 | 159 | ||
162 | if (event_size > count) | 160 | if (event_size > count) |
163 | return ERR_PTR(-EINVAL); | 161 | return ERR_PTR(-EINVAL); |
@@ -183,7 +181,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
183 | struct fsnotify_event_private_data *fsn_priv; | 181 | struct fsnotify_event_private_data *fsn_priv; |
184 | struct inotify_event_private_data *priv; | 182 | struct inotify_event_private_data *priv; |
185 | size_t event_size = sizeof(struct inotify_event); | 183 | size_t event_size = sizeof(struct inotify_event); |
186 | size_t name_len; | 184 | size_t name_len = 0; |
187 | 185 | ||
188 | /* we get the inotify watch descriptor from the event private data */ | 186 | /* we get the inotify watch descriptor from the event private data */ |
189 | spin_lock(&event->lock); | 187 | spin_lock(&event->lock); |
@@ -199,8 +197,12 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
199 | inotify_free_event_priv(fsn_priv); | 197 | inotify_free_event_priv(fsn_priv); |
200 | } | 198 | } |
201 | 199 | ||
202 | /* round up event->name_len so it is a multiple of event_size */ | 200 | /* |
203 | name_len = roundup(event->name_len, event_size); | 201 | * round up event->name_len so it is a multiple of event_size |
202 | * plus an extra byte for the terminating '\0'. | ||
203 | */ | ||
204 | if (event->name_len) | ||
205 | name_len = roundup(event->name_len + 1, event_size); | ||
204 | inotify_event.len = name_len; | 206 | inotify_event.len = name_len; |
205 | 207 | ||
206 | inotify_event.mask = inotify_mask_to_arg(event->mask); | 208 | inotify_event.mask = inotify_mask_to_arg(event->mask); |
@@ -224,8 +226,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
224 | return -EFAULT; | 226 | return -EFAULT; |
225 | buf += event->name_len; | 227 | buf += event->name_len; |
226 | 228 | ||
227 | /* fill userspace with 0's from nul_inotify_event */ | 229 | /* fill userspace with 0's */ |
228 | if (copy_to_user(buf, &nul_inotify_event, len_to_zero)) | 230 | if (clear_user(buf, len_to_zero)) |
229 | return -EFAULT; | 231 | return -EFAULT; |
230 | buf += len_to_zero; | 232 | buf += len_to_zero; |
231 | event_size += name_len; | 233 | event_size += name_len; |
@@ -326,8 +328,9 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, | |||
326 | list_for_each_entry(holder, &group->notification_list, event_list) { | 328 | list_for_each_entry(holder, &group->notification_list, event_list) { |
327 | event = holder->event; | 329 | event = holder->event; |
328 | send_len += sizeof(struct inotify_event); | 330 | send_len += sizeof(struct inotify_event); |
329 | send_len += roundup(event->name_len, | 331 | if (event->name_len) |
330 | sizeof(struct inotify_event)); | 332 | send_len += roundup(event->name_len + 1, |
333 | sizeof(struct inotify_event)); | ||
331 | } | 334 | } |
332 | mutex_unlock(&group->notification_mutex); | 335 | mutex_unlock(&group->notification_mutex); |
333 | ret = put_user(send_len, (int __user *) p); | 336 | ret = put_user(send_len, (int __user *) p); |
@@ -364,20 +367,53 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns | |||
364 | return error; | 367 | return error; |
365 | } | 368 | } |
366 | 369 | ||
370 | /* | ||
371 | * Remove the mark from the idr (if present) and drop the reference | ||
372 | * on the mark because it was in the idr. | ||
373 | */ | ||
367 | static void inotify_remove_from_idr(struct fsnotify_group *group, | 374 | static void inotify_remove_from_idr(struct fsnotify_group *group, |
368 | struct inotify_inode_mark_entry *ientry) | 375 | struct inotify_inode_mark_entry *ientry) |
369 | { | 376 | { |
370 | struct idr *idr; | 377 | struct idr *idr; |
378 | struct fsnotify_mark_entry *entry; | ||
379 | struct inotify_inode_mark_entry *found_ientry; | ||
380 | int wd; | ||
371 | 381 | ||
372 | spin_lock(&group->inotify_data.idr_lock); | 382 | spin_lock(&group->inotify_data.idr_lock); |
373 | idr = &group->inotify_data.idr; | 383 | idr = &group->inotify_data.idr; |
374 | idr_remove(idr, ientry->wd); | 384 | wd = ientry->wd; |
375 | spin_unlock(&group->inotify_data.idr_lock); | 385 | |
386 | if (wd == -1) | ||
387 | goto out; | ||
388 | |||
389 | entry = idr_find(&group->inotify_data.idr, wd); | ||
390 | if (unlikely(!entry)) | ||
391 | goto out; | ||
392 | |||
393 | found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | ||
394 | if (unlikely(found_ientry != ientry)) { | ||
395 | /* We found an entry in the idr with the right wd, but it's | ||
396 | * not the entry we were told to remove. eparis seriously | ||
397 | * fucked up somewhere. */ | ||
398 | WARN_ON(1); | ||
399 | ientry->wd = -1; | ||
400 | goto out; | ||
401 | } | ||
402 | |||
403 | /* One ref for being in the idr, one ref held by the caller */ | ||
404 | BUG_ON(atomic_read(&entry->refcnt) < 2); | ||
405 | |||
406 | idr_remove(idr, wd); | ||
376 | ientry->wd = -1; | 407 | ientry->wd = -1; |
408 | |||
409 | /* removed from the idr, drop that ref */ | ||
410 | fsnotify_put_mark(entry); | ||
411 | out: | ||
412 | spin_unlock(&group->inotify_data.idr_lock); | ||
377 | } | 413 | } |
414 | |||
378 | /* | 415 | /* |
379 | * Send IN_IGNORED for this wd, remove this wd from the idr, and drop the | 416 | * Send IN_IGNORED for this wd, remove this wd from the idr. |
380 | * internal reference help on the mark because it is in the idr. | ||
381 | */ | 417 | */ |
382 | void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, | 418 | void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, |
383 | struct fsnotify_group *group) | 419 | struct fsnotify_group *group) |
@@ -386,6 +422,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, | |||
386 | struct fsnotify_event *ignored_event; | 422 | struct fsnotify_event *ignored_event; |
387 | struct inotify_event_private_data *event_priv; | 423 | struct inotify_event_private_data *event_priv; |
388 | struct fsnotify_event_private_data *fsn_event_priv; | 424 | struct fsnotify_event_private_data *fsn_event_priv; |
425 | int ret; | ||
389 | 426 | ||
390 | ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, | 427 | ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, |
391 | FSNOTIFY_EVENT_NONE, NULL, 0, | 428 | FSNOTIFY_EVENT_NONE, NULL, 0, |
@@ -404,10 +441,8 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, | |||
404 | fsn_event_priv->group = group; | 441 | fsn_event_priv->group = group; |
405 | event_priv->wd = ientry->wd; | 442 | event_priv->wd = ientry->wd; |
406 | 443 | ||
407 | fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); | 444 | ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); |
408 | 445 | if (ret) | |
409 | /* did the private data get added? */ | ||
410 | if (list_empty(&fsn_event_priv->event_list)) | ||
411 | inotify_free_event_priv(fsn_event_priv); | 446 | inotify_free_event_priv(fsn_event_priv); |
412 | 447 | ||
413 | skip_send_ignore: | 448 | skip_send_ignore: |
@@ -418,9 +453,6 @@ skip_send_ignore: | |||
418 | /* remove this entry from the idr */ | 453 | /* remove this entry from the idr */ |
419 | inotify_remove_from_idr(group, ientry); | 454 | inotify_remove_from_idr(group, ientry); |
420 | 455 | ||
421 | /* removed from idr, drop that reference */ | ||
422 | fsnotify_put_mark(entry); | ||
423 | |||
424 | atomic_dec(&group->inotify_data.user->inotify_watches); | 456 | atomic_dec(&group->inotify_data.user->inotify_watches); |
425 | } | 457 | } |
426 | 458 | ||
@@ -432,80 +464,29 @@ static void inotify_free_mark(struct fsnotify_mark_entry *entry) | |||
432 | kmem_cache_free(inotify_inode_mark_cachep, ientry); | 464 | kmem_cache_free(inotify_inode_mark_cachep, ientry); |
433 | } | 465 | } |
434 | 466 | ||
435 | static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) | 467 | static int inotify_update_existing_watch(struct fsnotify_group *group, |
468 | struct inode *inode, | ||
469 | u32 arg) | ||
436 | { | 470 | { |
437 | struct fsnotify_mark_entry *entry = NULL; | 471 | struct fsnotify_mark_entry *entry; |
438 | struct inotify_inode_mark_entry *ientry; | 472 | struct inotify_inode_mark_entry *ientry; |
439 | struct inotify_inode_mark_entry *tmp_ientry; | ||
440 | int ret = 0; | ||
441 | int add = (arg & IN_MASK_ADD); | ||
442 | __u32 mask; | ||
443 | __u32 old_mask, new_mask; | 473 | __u32 old_mask, new_mask; |
474 | __u32 mask; | ||
475 | int add = (arg & IN_MASK_ADD); | ||
476 | int ret; | ||
444 | 477 | ||
445 | /* don't allow invalid bits: we don't want flags set */ | 478 | /* don't allow invalid bits: we don't want flags set */ |
446 | mask = inotify_arg_to_mask(arg); | 479 | mask = inotify_arg_to_mask(arg); |
447 | if (unlikely(!mask)) | 480 | if (unlikely(!mask)) |
448 | return -EINVAL; | 481 | return -EINVAL; |
449 | 482 | ||
450 | tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); | ||
451 | if (unlikely(!tmp_ientry)) | ||
452 | return -ENOMEM; | ||
453 | /* we set the mask at the end after attaching it */ | ||
454 | fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark); | ||
455 | tmp_ientry->wd = -1; | ||
456 | |||
457 | find_entry: | ||
458 | spin_lock(&inode->i_lock); | 483 | spin_lock(&inode->i_lock); |
459 | entry = fsnotify_find_mark_entry(group, inode); | 484 | entry = fsnotify_find_mark_entry(group, inode); |
460 | spin_unlock(&inode->i_lock); | 485 | spin_unlock(&inode->i_lock); |
461 | if (entry) { | 486 | if (!entry) |
462 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); | 487 | return -ENOENT; |
463 | } else { | ||
464 | ret = -ENOSPC; | ||
465 | if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) | ||
466 | goto out_err; | ||
467 | retry: | ||
468 | ret = -ENOMEM; | ||
469 | if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) | ||
470 | goto out_err; | ||
471 | |||
472 | spin_lock(&group->inotify_data.idr_lock); | ||
473 | ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, | ||
474 | group->inotify_data.last_wd, | ||
475 | &tmp_ientry->wd); | ||
476 | spin_unlock(&group->inotify_data.idr_lock); | ||
477 | if (ret) { | ||
478 | if (ret == -EAGAIN) | ||
479 | goto retry; | ||
480 | goto out_err; | ||
481 | } | ||
482 | 488 | ||
483 | ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); | 489 | ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); |
484 | if (ret) { | ||
485 | inotify_remove_from_idr(group, tmp_ientry); | ||
486 | if (ret == -EEXIST) | ||
487 | goto find_entry; | ||
488 | goto out_err; | ||
489 | } | ||
490 | |||
491 | /* tmp_ientry has been added to the inode, so we are all set up. | ||
492 | * now we just need to make sure tmp_ientry doesn't get freed and | ||
493 | * we need to set up entry and ientry so the generic code can | ||
494 | * do its thing. */ | ||
495 | ientry = tmp_ientry; | ||
496 | entry = &ientry->fsn_entry; | ||
497 | tmp_ientry = NULL; | ||
498 | |||
499 | atomic_inc(&group->inotify_data.user->inotify_watches); | ||
500 | |||
501 | /* update the idr hint */ | ||
502 | group->inotify_data.last_wd = ientry->wd; | ||
503 | |||
504 | /* we put the mark on the idr, take a reference */ | ||
505 | fsnotify_get_mark(entry); | ||
506 | } | ||
507 | |||
508 | ret = ientry->wd; | ||
509 | 490 | ||
510 | spin_lock(&entry->lock); | 491 | spin_lock(&entry->lock); |
511 | 492 | ||
@@ -537,18 +518,107 @@ retry: | |||
537 | fsnotify_recalc_group_mask(group); | 518 | fsnotify_recalc_group_mask(group); |
538 | } | 519 | } |
539 | 520 | ||
540 | /* this either matches fsnotify_find_mark_entry, or init_mark_entry | 521 | /* return the wd */ |
541 | * depending on which path we took... */ | 522 | ret = ientry->wd; |
523 | |||
524 | /* match the get from fsnotify_find_mark_entry() */ | ||
542 | fsnotify_put_mark(entry); | 525 | fsnotify_put_mark(entry); |
543 | 526 | ||
527 | return ret; | ||
528 | } | ||
529 | |||
530 | static int inotify_new_watch(struct fsnotify_group *group, | ||
531 | struct inode *inode, | ||
532 | u32 arg) | ||
533 | { | ||
534 | struct inotify_inode_mark_entry *tmp_ientry; | ||
535 | __u32 mask; | ||
536 | int ret; | ||
537 | |||
538 | /* don't allow invalid bits: we don't want flags set */ | ||
539 | mask = inotify_arg_to_mask(arg); | ||
540 | if (unlikely(!mask)) | ||
541 | return -EINVAL; | ||
542 | |||
543 | tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); | ||
544 | if (unlikely(!tmp_ientry)) | ||
545 | return -ENOMEM; | ||
546 | |||
547 | fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark); | ||
548 | tmp_ientry->fsn_entry.mask = mask; | ||
549 | tmp_ientry->wd = -1; | ||
550 | |||
551 | ret = -ENOSPC; | ||
552 | if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) | ||
553 | goto out_err; | ||
554 | retry: | ||
555 | ret = -ENOMEM; | ||
556 | if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) | ||
557 | goto out_err; | ||
558 | |||
559 | spin_lock(&group->inotify_data.idr_lock); | ||
560 | ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, | ||
561 | group->inotify_data.last_wd, | ||
562 | &tmp_ientry->wd); | ||
563 | spin_unlock(&group->inotify_data.idr_lock); | ||
564 | if (ret) { | ||
565 | /* idr was out of memory allocate and try again */ | ||
566 | if (ret == -EAGAIN) | ||
567 | goto retry; | ||
568 | goto out_err; | ||
569 | } | ||
570 | |||
571 | /* we put the mark on the idr, take a reference */ | ||
572 | fsnotify_get_mark(&tmp_ientry->fsn_entry); | ||
573 | |||
574 | /* we are on the idr, now get on the inode */ | ||
575 | ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); | ||
576 | if (ret) { | ||
577 | /* we failed to get on the inode, get off the idr */ | ||
578 | inotify_remove_from_idr(group, tmp_ientry); | ||
579 | goto out_err; | ||
580 | } | ||
581 | |||
582 | /* update the idr hint, who cares about races, it's just a hint */ | ||
583 | group->inotify_data.last_wd = tmp_ientry->wd; | ||
584 | |||
585 | /* increment the number of watches the user has */ | ||
586 | atomic_inc(&group->inotify_data.user->inotify_watches); | ||
587 | |||
588 | /* return the watch descriptor for this new entry */ | ||
589 | ret = tmp_ientry->wd; | ||
590 | |||
591 | /* match the ref from fsnotify_init_markentry() */ | ||
592 | fsnotify_put_mark(&tmp_ientry->fsn_entry); | ||
593 | |||
594 | /* if this mark added a new event update the group mask */ | ||
595 | if (mask & ~group->mask) | ||
596 | fsnotify_recalc_group_mask(group); | ||
597 | |||
544 | out_err: | 598 | out_err: |
545 | /* could be an error, could be that we found an existing mark */ | 599 | if (ret < 0) |
546 | if (tmp_ientry) { | ||
547 | /* on the idr but didn't make it on the inode */ | ||
548 | if (tmp_ientry->wd != -1) | ||
549 | inotify_remove_from_idr(group, tmp_ientry); | ||
550 | kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); | 600 | kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); |
551 | } | 601 | |
602 | return ret; | ||
603 | } | ||
604 | |||
605 | static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) | ||
606 | { | ||
607 | int ret = 0; | ||
608 | |||
609 | retry: | ||
610 | /* try to update and existing watch with the new arg */ | ||
611 | ret = inotify_update_existing_watch(group, inode, arg); | ||
612 | /* no mark present, try to add a new one */ | ||
613 | if (ret == -ENOENT) | ||
614 | ret = inotify_new_watch(group, inode, arg); | ||
615 | /* | ||
616 | * inotify_new_watch could race with another thread which did an | ||
617 | * inotify_new_watch between the update_existing and the add watch | ||
618 | * here, go back and try to update an existing mark again. | ||
619 | */ | ||
620 | if (ret == -EEXIST) | ||
621 | goto retry; | ||
552 | 622 | ||
553 | return ret; | 623 | return ret; |
554 | } | 624 | } |
@@ -568,7 +638,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign | |||
568 | 638 | ||
569 | spin_lock_init(&group->inotify_data.idr_lock); | 639 | spin_lock_init(&group->inotify_data.idr_lock); |
570 | idr_init(&group->inotify_data.idr); | 640 | idr_init(&group->inotify_data.idr); |
571 | group->inotify_data.last_wd = 0; | 641 | group->inotify_data.last_wd = 1; |
572 | group->inotify_data.user = user; | 642 | group->inotify_data.user = user; |
573 | group->inotify_data.fa = NULL; | 643 | group->inotify_data.fa = NULL; |
574 | 644 | ||
diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 521368574e97..3816d5750dd5 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c | |||
@@ -153,6 +153,10 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new | |||
153 | return true; | 153 | return true; |
154 | break; | 154 | break; |
155 | case (FSNOTIFY_EVENT_NONE): | 155 | case (FSNOTIFY_EVENT_NONE): |
156 | if (old->mask & FS_Q_OVERFLOW) | ||
157 | return true; | ||
158 | else if (old->mask & FS_IN_IGNORED) | ||
159 | return false; | ||
156 | return false; | 160 | return false; |
157 | }; | 161 | }; |
158 | } | 162 | } |
@@ -171,9 +175,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even | |||
171 | struct list_head *list = &group->notification_list; | 175 | struct list_head *list = &group->notification_list; |
172 | struct fsnotify_event_holder *last_holder; | 176 | struct fsnotify_event_holder *last_holder; |
173 | struct fsnotify_event *last_event; | 177 | struct fsnotify_event *last_event; |
174 | 178 | int ret = 0; | |
175 | /* easy to tell if priv was attached to the event */ | ||
176 | INIT_LIST_HEAD(&priv->event_list); | ||
177 | 179 | ||
178 | /* | 180 | /* |
179 | * There is one fsnotify_event_holder embedded inside each fsnotify_event. | 181 | * There is one fsnotify_event_holder embedded inside each fsnotify_event. |
@@ -194,6 +196,7 @@ alloc_holder: | |||
194 | 196 | ||
195 | if (group->q_len >= group->max_events) { | 197 | if (group->q_len >= group->max_events) { |
196 | event = &q_overflow_event; | 198 | event = &q_overflow_event; |
199 | ret = -EOVERFLOW; | ||
197 | /* sorry, no private data on the overflow event */ | 200 | /* sorry, no private data on the overflow event */ |
198 | priv = NULL; | 201 | priv = NULL; |
199 | } | 202 | } |
@@ -235,7 +238,7 @@ alloc_holder: | |||
235 | mutex_unlock(&group->notification_mutex); | 238 | mutex_unlock(&group->notification_mutex); |
236 | 239 | ||
237 | wake_up(&group->notification_waitq); | 240 | wake_up(&group->notification_waitq); |
238 | return 0; | 241 | return ret; |
239 | } | 242 | } |
240 | 243 | ||
241 | /* | 244 | /* |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index f9a3e8942669..ab513ddaeff2 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -6851,7 +6851,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, | |||
6851 | } | 6851 | } |
6852 | status = 0; | 6852 | status = 0; |
6853 | bail: | 6853 | bail: |
6854 | 6854 | brelse(last_eb_bh); | |
6855 | mlog_exit(status); | 6855 | mlog_exit(status); |
6856 | return status; | 6856 | return status; |
6857 | } | 6857 | } |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index b401654011a2..8a1e61545f41 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -1747,8 +1747,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1747 | * we know zeros will only be needed in the first and/or last cluster. | 1747 | * we know zeros will only be needed in the first and/or last cluster. |
1748 | */ | 1748 | */ |
1749 | if (clusters_to_alloc || extents_to_split || | 1749 | if (clusters_to_alloc || extents_to_split || |
1750 | wc->w_desc[0].c_needs_zero || | 1750 | (wc->w_clen && (wc->w_desc[0].c_needs_zero || |
1751 | wc->w_desc[wc->w_clen - 1].c_needs_zero) | 1751 | wc->w_desc[wc->w_clen - 1].c_needs_zero))) |
1752 | cluster_of_pages = 1; | 1752 | cluster_of_pages = 1; |
1753 | else | 1753 | else |
1754 | cluster_of_pages = 0; | 1754 | cluster_of_pages = 0; |
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 2f28b7de2c8d..b4957c7d9fe2 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
@@ -85,6 +85,17 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, | |||
85 | goto bail; | 85 | goto bail; |
86 | } | 86 | } |
87 | 87 | ||
88 | /* | ||
89 | * If the last lookup failed to create dentry lock, let us | ||
90 | * redo it. | ||
91 | */ | ||
92 | if (!dentry->d_fsdata) { | ||
93 | mlog(0, "Inode %llu doesn't have dentry lock, " | ||
94 | "returning false\n", | ||
95 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
96 | goto bail; | ||
97 | } | ||
98 | |||
88 | ret = 1; | 99 | ret = 1; |
89 | 100 | ||
90 | bail: | 101 | bail: |
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 1c9efb406a96..02bf17808bdc 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c | |||
@@ -325,6 +325,7 @@ clear_fields: | |||
325 | } | 325 | } |
326 | 326 | ||
327 | static struct backing_dev_info dlmfs_backing_dev_info = { | 327 | static struct backing_dev_info dlmfs_backing_dev_info = { |
328 | .name = "ocfs2-dlmfs", | ||
328 | .ra_pages = 0, /* No readahead */ | 329 | .ra_pages = 0, /* No readahead */ |
329 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | 330 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
330 | }; | 331 | }; |
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index fcf879ed6930..756f5b0998e0 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c | |||
@@ -122,7 +122,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
122 | * that still has AST's pending... */ | 122 | * that still has AST's pending... */ |
123 | in_use = !list_empty(&lock->ast_list); | 123 | in_use = !list_empty(&lock->ast_list); |
124 | spin_unlock(&dlm->ast_lock); | 124 | spin_unlock(&dlm->ast_lock); |
125 | if (in_use) { | 125 | if (in_use && !(flags & LKM_CANCEL)) { |
126 | mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock " | 126 | mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock " |
127 | "while waiting for an ast!", res->lockname.len, | 127 | "while waiting for an ast!", res->lockname.len, |
128 | res->lockname.name); | 128 | res->lockname.name); |
@@ -131,7 +131,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
131 | 131 | ||
132 | spin_lock(&res->spinlock); | 132 | spin_lock(&res->spinlock); |
133 | if (res->state & DLM_LOCK_RES_IN_PROGRESS) { | 133 | if (res->state & DLM_LOCK_RES_IN_PROGRESS) { |
134 | if (master_node) { | 134 | if (master_node && !(flags & LKM_CANCEL)) { |
135 | mlog(ML_ERROR, "lockres in progress!\n"); | 135 | mlog(ML_ERROR, "lockres in progress!\n"); |
136 | spin_unlock(&res->spinlock); | 136 | spin_unlock(&res->spinlock); |
137 | return DLM_FORWARD; | 137 | return DLM_FORWARD; |
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index fcdba091af3d..c212cf5a2bdf 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h | |||
@@ -108,6 +108,7 @@ static char *ocfs2_lock_type_strings[] = { | |||
108 | [OCFS2_LOCK_TYPE_OPEN] = "Open", | 108 | [OCFS2_LOCK_TYPE_OPEN] = "Open", |
109 | [OCFS2_LOCK_TYPE_FLOCK] = "Flock", | 109 | [OCFS2_LOCK_TYPE_FLOCK] = "Flock", |
110 | [OCFS2_LOCK_TYPE_QINFO] = "Quota", | 110 | [OCFS2_LOCK_TYPE_QINFO] = "Quota", |
111 | [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync", | ||
111 | [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", | 112 | [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", |
112 | }; | 113 | }; |
113 | 114 | ||
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index bf7742d0ee3b..44f2a5e1d042 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include "sysfile.h" | 23 | #include "sysfile.h" |
24 | #include "dlmglue.h" | 24 | #include "dlmglue.h" |
25 | #include "uptodate.h" | 25 | #include "uptodate.h" |
26 | #include "super.h" | ||
26 | #include "quota.h" | 27 | #include "quota.h" |
27 | 28 | ||
28 | static struct workqueue_struct *ocfs2_quota_wq = NULL; | 29 | static struct workqueue_struct *ocfs2_quota_wq = NULL; |
@@ -114,6 +115,15 @@ int ocfs2_read_quota_block(struct inode *inode, u64 v_block, | |||
114 | int rc = 0; | 115 | int rc = 0; |
115 | struct buffer_head *tmp = *bh; | 116 | struct buffer_head *tmp = *bh; |
116 | 117 | ||
118 | if (i_size_read(inode) >> inode->i_sb->s_blocksize_bits <= v_block) { | ||
119 | ocfs2_error(inode->i_sb, | ||
120 | "Quota file %llu is probably corrupted! Requested " | ||
121 | "to read block %Lu but file has size only %Lu\n", | ||
122 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
123 | (unsigned long long)v_block, | ||
124 | (unsigned long long)i_size_read(inode)); | ||
125 | return -EIO; | ||
126 | } | ||
117 | rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, | 127 | rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, |
118 | ocfs2_validate_quota_block); | 128 | ocfs2_validate_quota_block); |
119 | if (rc) | 129 | if (rc) |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index b0ee0fdf799a..a3f8871d21fd 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1218,13 +1218,17 @@ static void ocfs2_kill_sb(struct super_block *sb) | |||
1218 | { | 1218 | { |
1219 | struct ocfs2_super *osb = OCFS2_SB(sb); | 1219 | struct ocfs2_super *osb = OCFS2_SB(sb); |
1220 | 1220 | ||
1221 | /* Failed mount? */ | ||
1222 | if (!osb || atomic_read(&osb->vol_state) == VOLUME_DISABLED) | ||
1223 | goto out; | ||
1224 | |||
1221 | /* Prevent further queueing of inode drop events */ | 1225 | /* Prevent further queueing of inode drop events */ |
1222 | spin_lock(&dentry_list_lock); | 1226 | spin_lock(&dentry_list_lock); |
1223 | ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED); | 1227 | ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED); |
1224 | spin_unlock(&dentry_list_lock); | 1228 | spin_unlock(&dentry_list_lock); |
1225 | /* Wait for work to finish and/or remove it */ | 1229 | /* Wait for work to finish and/or remove it */ |
1226 | cancel_work_sync(&osb->dentry_lock_work); | 1230 | cancel_work_sync(&osb->dentry_lock_work); |
1227 | 1231 | out: | |
1228 | kill_block_super(sb); | 1232 | kill_block_super(sb); |
1229 | } | 1233 | } |
1230 | 1234 | ||
@@ -199,7 +199,7 @@ out: | |||
199 | int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, | 199 | int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, |
200 | struct file *filp) | 200 | struct file *filp) |
201 | { | 201 | { |
202 | int err; | 202 | int ret; |
203 | struct iattr newattrs; | 203 | struct iattr newattrs; |
204 | 204 | ||
205 | /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ | 205 | /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ |
@@ -214,12 +214,14 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, | |||
214 | } | 214 | } |
215 | 215 | ||
216 | /* Remove suid/sgid on truncate too */ | 216 | /* Remove suid/sgid on truncate too */ |
217 | newattrs.ia_valid |= should_remove_suid(dentry); | 217 | ret = should_remove_suid(dentry); |
218 | if (ret) | ||
219 | newattrs.ia_valid |= ret | ATTR_FORCE; | ||
218 | 220 | ||
219 | mutex_lock(&dentry->d_inode->i_mutex); | 221 | mutex_lock(&dentry->d_inode->i_mutex); |
220 | err = notify_change(dentry, &newattrs); | 222 | ret = notify_change(dentry, &newattrs); |
221 | mutex_unlock(&dentry->d_inode->i_mutex); | 223 | mutex_unlock(&dentry->d_inode->i_mutex); |
222 | return err; | 224 | return ret; |
223 | } | 225 | } |
224 | 226 | ||
225 | static long do_sys_truncate(const char __user *pathname, loff_t length) | 227 | static long do_sys_truncate(const char __user *pathname, loff_t length) |
@@ -957,6 +959,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, | |||
957 | int error; | 959 | int error; |
958 | struct file *f; | 960 | struct file *f; |
959 | 961 | ||
962 | validate_creds(cred); | ||
963 | |||
960 | /* | 964 | /* |
961 | * We must always pass in a valid mount pointer. Historically | 965 | * We must always pass in a valid mount pointer. Historically |
962 | * callers got away with not passing it, but we must enforce this at | 966 | * callers got away with not passing it, but we must enforce this at |
diff --git a/fs/proc/base.c b/fs/proc/base.c index 175db258942f..6f742f6658a9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1003,12 +1003,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, | |||
1003 | 1003 | ||
1004 | if (!task) | 1004 | if (!task) |
1005 | return -ESRCH; | 1005 | return -ESRCH; |
1006 | task_lock(task); | 1006 | oom_adjust = task->oomkilladj; |
1007 | if (task->mm) | ||
1008 | oom_adjust = task->mm->oom_adj; | ||
1009 | else | ||
1010 | oom_adjust = OOM_DISABLE; | ||
1011 | task_unlock(task); | ||
1012 | put_task_struct(task); | 1007 | put_task_struct(task); |
1013 | 1008 | ||
1014 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); | 1009 | len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); |
@@ -1037,19 +1032,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
1037 | task = get_proc_task(file->f_path.dentry->d_inode); | 1032 | task = get_proc_task(file->f_path.dentry->d_inode); |
1038 | if (!task) | 1033 | if (!task) |
1039 | return -ESRCH; | 1034 | return -ESRCH; |
1040 | task_lock(task); | 1035 | if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { |
1041 | if (!task->mm) { | ||
1042 | task_unlock(task); | ||
1043 | put_task_struct(task); | ||
1044 | return -EINVAL; | ||
1045 | } | ||
1046 | if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) { | ||
1047 | task_unlock(task); | ||
1048 | put_task_struct(task); | 1036 | put_task_struct(task); |
1049 | return -EACCES; | 1037 | return -EACCES; |
1050 | } | 1038 | } |
1051 | task->mm->oom_adj = oom_adjust; | 1039 | task->oomkilladj = oom_adjust; |
1052 | task_unlock(task); | ||
1053 | put_task_struct(task); | 1040 | put_task_struct(task); |
1054 | if (end - buffer == 0) | 1041 | if (end - buffer == 0) |
1055 | return -EIO; | 1042 | return -EIO; |
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 0ff7566c767c..a7f0110fca4c 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
@@ -46,6 +46,7 @@ static const struct super_operations ramfs_ops; | |||
46 | static const struct inode_operations ramfs_dir_inode_operations; | 46 | static const struct inode_operations ramfs_dir_inode_operations; |
47 | 47 | ||
48 | static struct backing_dev_info ramfs_backing_dev_info = { | 48 | static struct backing_dev_info ramfs_backing_dev_info = { |
49 | .name = "ramfs", | ||
49 | .ra_pages = 0, /* No readahead */ | 50 | .ra_pages = 0, /* No readahead */ |
50 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | | 51 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | |
51 | BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | | 52 | BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | |
diff --git a/fs/select.c b/fs/select.c index d870237e42c7..8084834e123e 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -110,6 +110,7 @@ void poll_initwait(struct poll_wqueues *pwq) | |||
110 | { | 110 | { |
111 | init_poll_funcptr(&pwq->pt, __pollwait); | 111 | init_poll_funcptr(&pwq->pt, __pollwait); |
112 | pwq->polling_task = current; | 112 | pwq->polling_task = current; |
113 | pwq->triggered = 0; | ||
113 | pwq->error = 0; | 114 | pwq->error = 0; |
114 | pwq->table = NULL; | 115 | pwq->table = NULL; |
115 | pwq->inline_index = 0; | 116 | pwq->inline_index = 0; |
diff --git a/fs/super.c b/fs/super.c index 2761d3e22ed9..9cda337ddae2 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -62,9 +62,6 @@ static struct super_block *alloc_super(struct file_system_type *type) | |||
62 | s = NULL; | 62 | s = NULL; |
63 | goto out; | 63 | goto out; |
64 | } | 64 | } |
65 | INIT_LIST_HEAD(&s->s_dirty); | ||
66 | INIT_LIST_HEAD(&s->s_io); | ||
67 | INIT_LIST_HEAD(&s->s_more_io); | ||
68 | INIT_LIST_HEAD(&s->s_files); | 65 | INIT_LIST_HEAD(&s->s_files); |
69 | INIT_LIST_HEAD(&s->s_instances); | 66 | INIT_LIST_HEAD(&s->s_instances); |
70 | INIT_HLIST_HEAD(&s->s_anon); | 67 | INIT_HLIST_HEAD(&s->s_anon); |
@@ -171,7 +168,7 @@ int __put_super_and_need_restart(struct super_block *sb) | |||
171 | * Drops a temporary reference, frees superblock if there's no | 168 | * Drops a temporary reference, frees superblock if there's no |
172 | * references left. | 169 | * references left. |
173 | */ | 170 | */ |
174 | static void put_super(struct super_block *sb) | 171 | void put_super(struct super_block *sb) |
175 | { | 172 | { |
176 | spin_lock(&sb_lock); | 173 | spin_lock(&sb_lock); |
177 | __put_super(sb); | 174 | __put_super(sb); |
@@ -19,20 +19,22 @@ | |||
19 | SYNC_FILE_RANGE_WAIT_AFTER) | 19 | SYNC_FILE_RANGE_WAIT_AFTER) |
20 | 20 | ||
21 | /* | 21 | /* |
22 | * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) | 22 | * Do the filesystem syncing work. For simple filesystems |
23 | * just dirties buffers with inodes so we have to submit IO for these buffers | 23 | * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to |
24 | * via __sync_blockdev(). This also speeds up the wait == 1 case since in that | 24 | * submit IO for these buffers via __sync_blockdev(). This also speeds up the |
25 | * case write_inode() functions do sync_dirty_buffer() and thus effectively | 25 | * wait == 1 case since in that case write_inode() functions do |
26 | * write one block at a time. | 26 | * sync_dirty_buffer() and thus effectively write one block at a time. |
27 | */ | 27 | */ |
28 | static int __sync_filesystem(struct super_block *sb, int wait) | 28 | static int __sync_filesystem(struct super_block *sb, int wait) |
29 | { | 29 | { |
30 | /* Avoid doing twice syncing and cache pruning for quota sync */ | 30 | /* Avoid doing twice syncing and cache pruning for quota sync */ |
31 | if (!wait) | 31 | if (!wait) { |
32 | writeout_quota_sb(sb, -1); | 32 | writeout_quota_sb(sb, -1); |
33 | else | 33 | writeback_inodes_sb(sb); |
34 | } else { | ||
34 | sync_quota_sb(sb, -1); | 35 | sync_quota_sb(sb, -1); |
35 | sync_inodes_sb(sb, wait); | 36 | sync_inodes_sb(sb); |
37 | } | ||
36 | if (sb->s_op->sync_fs) | 38 | if (sb->s_op->sync_fs) |
37 | sb->s_op->sync_fs(sb, wait); | 39 | sb->s_op->sync_fs(sb, wait); |
38 | return __sync_blockdev(sb->s_bdev, wait); | 40 | return __sync_blockdev(sb->s_bdev, wait); |
@@ -118,7 +120,7 @@ restart: | |||
118 | */ | 120 | */ |
119 | SYSCALL_DEFINE0(sync) | 121 | SYSCALL_DEFINE0(sync) |
120 | { | 122 | { |
121 | wakeup_pdflush(0); | 123 | wakeup_flusher_threads(0); |
122 | sync_filesystems(0); | 124 | sync_filesystems(0); |
123 | sync_filesystems(1); | 125 | sync_filesystems(1); |
124 | if (unlikely(laptop_mode)) | 126 | if (unlikely(laptop_mode)) |
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 14f2d71ea3ce..0050fc40e8c9 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
@@ -760,6 +760,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, | |||
760 | const struct inode_operations sysfs_dir_inode_operations = { | 760 | const struct inode_operations sysfs_dir_inode_operations = { |
761 | .lookup = sysfs_lookup, | 761 | .lookup = sysfs_lookup, |
762 | .setattr = sysfs_setattr, | 762 | .setattr = sysfs_setattr, |
763 | .setxattr = sysfs_setxattr, | ||
763 | }; | 764 | }; |
764 | 765 | ||
765 | static void remove_dir(struct sysfs_dirent *sd) | 766 | static void remove_dir(struct sysfs_dirent *sd) |
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 555f0ff988df..e28cecf179f5 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <linux/capability.h> | 18 | #include <linux/capability.h> |
19 | #include <linux/errno.h> | 19 | #include <linux/errno.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/xattr.h> | ||
22 | #include <linux/security.h> | ||
21 | #include "sysfs.h" | 23 | #include "sysfs.h" |
22 | 24 | ||
23 | extern struct super_block * sysfs_sb; | 25 | extern struct super_block * sysfs_sb; |
@@ -29,12 +31,14 @@ static const struct address_space_operations sysfs_aops = { | |||
29 | }; | 31 | }; |
30 | 32 | ||
31 | static struct backing_dev_info sysfs_backing_dev_info = { | 33 | static struct backing_dev_info sysfs_backing_dev_info = { |
34 | .name = "sysfs", | ||
32 | .ra_pages = 0, /* No readahead */ | 35 | .ra_pages = 0, /* No readahead */ |
33 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, | 36 | .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, |
34 | }; | 37 | }; |
35 | 38 | ||
36 | static const struct inode_operations sysfs_inode_operations ={ | 39 | static const struct inode_operations sysfs_inode_operations ={ |
37 | .setattr = sysfs_setattr, | 40 | .setattr = sysfs_setattr, |
41 | .setxattr = sysfs_setxattr, | ||
38 | }; | 42 | }; |
39 | 43 | ||
40 | int __init sysfs_inode_init(void) | 44 | int __init sysfs_inode_init(void) |
@@ -42,18 +46,37 @@ int __init sysfs_inode_init(void) | |||
42 | return bdi_init(&sysfs_backing_dev_info); | 46 | return bdi_init(&sysfs_backing_dev_info); |
43 | } | 47 | } |
44 | 48 | ||
49 | struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd) | ||
50 | { | ||
51 | struct sysfs_inode_attrs *attrs; | ||
52 | struct iattr *iattrs; | ||
53 | |||
54 | attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL); | ||
55 | if (!attrs) | ||
56 | return NULL; | ||
57 | iattrs = &attrs->ia_iattr; | ||
58 | |||
59 | /* assign default attributes */ | ||
60 | iattrs->ia_mode = sd->s_mode; | ||
61 | iattrs->ia_uid = 0; | ||
62 | iattrs->ia_gid = 0; | ||
63 | iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME; | ||
64 | |||
65 | return attrs; | ||
66 | } | ||
45 | int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) | 67 | int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) |
46 | { | 68 | { |
47 | struct inode * inode = dentry->d_inode; | 69 | struct inode * inode = dentry->d_inode; |
48 | struct sysfs_dirent * sd = dentry->d_fsdata; | 70 | struct sysfs_dirent * sd = dentry->d_fsdata; |
49 | struct iattr * sd_iattr; | 71 | struct sysfs_inode_attrs *sd_attrs; |
72 | struct iattr *iattrs; | ||
50 | unsigned int ia_valid = iattr->ia_valid; | 73 | unsigned int ia_valid = iattr->ia_valid; |
51 | int error; | 74 | int error; |
52 | 75 | ||
53 | if (!sd) | 76 | if (!sd) |
54 | return -EINVAL; | 77 | return -EINVAL; |
55 | 78 | ||
56 | sd_iattr = sd->s_iattr; | 79 | sd_attrs = sd->s_iattr; |
57 | 80 | ||
58 | error = inode_change_ok(inode, iattr); | 81 | error = inode_change_ok(inode, iattr); |
59 | if (error) | 82 | if (error) |
@@ -65,42 +88,77 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) | |||
65 | if (error) | 88 | if (error) |
66 | return error; | 89 | return error; |
67 | 90 | ||
68 | if (!sd_iattr) { | 91 | if (!sd_attrs) { |
69 | /* setting attributes for the first time, allocate now */ | 92 | /* setting attributes for the first time, allocate now */ |
70 | sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL); | 93 | sd_attrs = sysfs_init_inode_attrs(sd); |
71 | if (!sd_iattr) | 94 | if (!sd_attrs) |
72 | return -ENOMEM; | 95 | return -ENOMEM; |
73 | /* assign default attributes */ | 96 | sd->s_iattr = sd_attrs; |
74 | sd_iattr->ia_mode = sd->s_mode; | 97 | } else { |
75 | sd_iattr->ia_uid = 0; | 98 | /* attributes were changed at least once in past */ |
76 | sd_iattr->ia_gid = 0; | 99 | iattrs = &sd_attrs->ia_iattr; |
77 | sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; | 100 | |
78 | sd->s_iattr = sd_iattr; | 101 | if (ia_valid & ATTR_UID) |
102 | iattrs->ia_uid = iattr->ia_uid; | ||
103 | if (ia_valid & ATTR_GID) | ||
104 | iattrs->ia_gid = iattr->ia_gid; | ||
105 | if (ia_valid & ATTR_ATIME) | ||
106 | iattrs->ia_atime = timespec_trunc(iattr->ia_atime, | ||
107 | inode->i_sb->s_time_gran); | ||
108 | if (ia_valid & ATTR_MTIME) | ||
109 | iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime, | ||
110 | inode->i_sb->s_time_gran); | ||
111 | if (ia_valid & ATTR_CTIME) | ||
112 | iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime, | ||
113 | inode->i_sb->s_time_gran); | ||
114 | if (ia_valid & ATTR_MODE) { | ||
115 | umode_t mode = iattr->ia_mode; | ||
116 | |||
117 | if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) | ||
118 | mode &= ~S_ISGID; | ||
119 | iattrs->ia_mode = sd->s_mode = mode; | ||
120 | } | ||
79 | } | 121 | } |
122 | return error; | ||
123 | } | ||
80 | 124 | ||
81 | /* attributes were changed atleast once in past */ | 125 | int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, |
82 | 126 | size_t size, int flags) | |
83 | if (ia_valid & ATTR_UID) | 127 | { |
84 | sd_iattr->ia_uid = iattr->ia_uid; | 128 | struct sysfs_dirent *sd = dentry->d_fsdata; |
85 | if (ia_valid & ATTR_GID) | 129 | struct sysfs_inode_attrs *iattrs; |
86 | sd_iattr->ia_gid = iattr->ia_gid; | 130 | void *secdata; |
87 | if (ia_valid & ATTR_ATIME) | 131 | int error; |
88 | sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime, | 132 | u32 secdata_len = 0; |
89 | inode->i_sb->s_time_gran); | 133 | |
90 | if (ia_valid & ATTR_MTIME) | 134 | if (!sd) |
91 | sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime, | 135 | return -EINVAL; |
92 | inode->i_sb->s_time_gran); | 136 | if (!sd->s_iattr) |
93 | if (ia_valid & ATTR_CTIME) | 137 | sd->s_iattr = sysfs_init_inode_attrs(sd); |
94 | sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime, | 138 | if (!sd->s_iattr) |
95 | inode->i_sb->s_time_gran); | 139 | return -ENOMEM; |
96 | if (ia_valid & ATTR_MODE) { | 140 | |
97 | umode_t mode = iattr->ia_mode; | 141 | iattrs = sd->s_iattr; |
98 | 142 | ||
99 | if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) | 143 | if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) { |
100 | mode &= ~S_ISGID; | 144 | const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; |
101 | sd_iattr->ia_mode = sd->s_mode = mode; | 145 | error = security_inode_setsecurity(dentry->d_inode, suffix, |
102 | } | 146 | value, size, flags); |
147 | if (error) | ||
148 | goto out; | ||
149 | error = security_inode_getsecctx(dentry->d_inode, | ||
150 | &secdata, &secdata_len); | ||
151 | if (error) | ||
152 | goto out; | ||
153 | if (iattrs->ia_secdata) | ||
154 | security_release_secctx(iattrs->ia_secdata, | ||
155 | iattrs->ia_secdata_len); | ||
156 | iattrs->ia_secdata = secdata; | ||
157 | iattrs->ia_secdata_len = secdata_len; | ||
103 | 158 | ||
159 | } else | ||
160 | return -EINVAL; | ||
161 | out: | ||
104 | return error; | 162 | return error; |
105 | } | 163 | } |
106 | 164 | ||
@@ -146,6 +204,7 @@ static int sysfs_count_nlink(struct sysfs_dirent *sd) | |||
146 | static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) | 204 | static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) |
147 | { | 205 | { |
148 | struct bin_attribute *bin_attr; | 206 | struct bin_attribute *bin_attr; |
207 | struct sysfs_inode_attrs *iattrs; | ||
149 | 208 | ||
150 | inode->i_private = sysfs_get(sd); | 209 | inode->i_private = sysfs_get(sd); |
151 | inode->i_mapping->a_ops = &sysfs_aops; | 210 | inode->i_mapping->a_ops = &sysfs_aops; |
@@ -154,16 +213,20 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) | |||
154 | inode->i_ino = sd->s_ino; | 213 | inode->i_ino = sd->s_ino; |
155 | lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); | 214 | lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); |
156 | 215 | ||
157 | if (sd->s_iattr) { | 216 | iattrs = sd->s_iattr; |
217 | if (iattrs) { | ||
158 | /* sysfs_dirent has non-default attributes | 218 | /* sysfs_dirent has non-default attributes |
159 | * get them for the new inode from persistent copy | 219 | * get them for the new inode from persistent copy |
160 | * in sysfs_dirent | 220 | * in sysfs_dirent |
161 | */ | 221 | */ |
162 | set_inode_attr(inode, sd->s_iattr); | 222 | set_inode_attr(inode, &iattrs->ia_iattr); |
223 | if (iattrs->ia_secdata) | ||
224 | security_inode_notifysecctx(inode, | ||
225 | iattrs->ia_secdata, | ||
226 | iattrs->ia_secdata_len); | ||
163 | } else | 227 | } else |
164 | set_default_inode_attr(inode, sd->s_mode); | 228 | set_default_inode_attr(inode, sd->s_mode); |
165 | 229 | ||
166 | |||
167 | /* initialize inode according to type */ | 230 | /* initialize inode according to type */ |
168 | switch (sysfs_type(sd)) { | 231 | switch (sysfs_type(sd)) { |
169 | case SYSFS_DIR: | 232 | case SYSFS_DIR: |
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 1d897ad808e0..c5081ad77026 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/kobject.h> | 16 | #include <linux/kobject.h> |
17 | #include <linux/namei.h> | 17 | #include <linux/namei.h> |
18 | #include <linux/mutex.h> | 18 | #include <linux/mutex.h> |
19 | #include <linux/security.h> | ||
19 | 20 | ||
20 | #include "sysfs.h" | 21 | #include "sysfs.h" |
21 | 22 | ||
@@ -209,6 +210,7 @@ static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *co | |||
209 | } | 210 | } |
210 | 211 | ||
211 | const struct inode_operations sysfs_symlink_inode_operations = { | 212 | const struct inode_operations sysfs_symlink_inode_operations = { |
213 | .setxattr = sysfs_setxattr, | ||
212 | .readlink = generic_readlink, | 214 | .readlink = generic_readlink, |
213 | .follow_link = sysfs_follow_link, | 215 | .follow_link = sysfs_follow_link, |
214 | .put_link = sysfs_put_link, | 216 | .put_link = sysfs_put_link, |
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 3fa0d98481e2..af4c4e7482ac 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h | |||
@@ -8,6 +8,8 @@ | |||
8 | * This file is released under the GPLv2. | 8 | * This file is released under the GPLv2. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/fs.h> | ||
12 | |||
11 | struct sysfs_open_dirent; | 13 | struct sysfs_open_dirent; |
12 | 14 | ||
13 | /* type-specific structures for sysfs_dirent->s_* union members */ | 15 | /* type-specific structures for sysfs_dirent->s_* union members */ |
@@ -31,6 +33,12 @@ struct sysfs_elem_bin_attr { | |||
31 | struct hlist_head buffers; | 33 | struct hlist_head buffers; |
32 | }; | 34 | }; |
33 | 35 | ||
36 | struct sysfs_inode_attrs { | ||
37 | struct iattr ia_iattr; | ||
38 | void *ia_secdata; | ||
39 | u32 ia_secdata_len; | ||
40 | }; | ||
41 | |||
34 | /* | 42 | /* |
35 | * sysfs_dirent - the building block of sysfs hierarchy. Each and | 43 | * sysfs_dirent - the building block of sysfs hierarchy. Each and |
36 | * every sysfs node is represented by single sysfs_dirent. | 44 | * every sysfs node is represented by single sysfs_dirent. |
@@ -56,7 +64,7 @@ struct sysfs_dirent { | |||
56 | unsigned int s_flags; | 64 | unsigned int s_flags; |
57 | ino_t s_ino; | 65 | ino_t s_ino; |
58 | umode_t s_mode; | 66 | umode_t s_mode; |
59 | struct iattr *s_iattr; | 67 | struct sysfs_inode_attrs *s_iattr; |
60 | }; | 68 | }; |
61 | 69 | ||
62 | #define SD_DEACTIVATED_BIAS INT_MIN | 70 | #define SD_DEACTIVATED_BIAS INT_MIN |
@@ -148,6 +156,8 @@ static inline void __sysfs_put(struct sysfs_dirent *sd) | |||
148 | struct inode *sysfs_get_inode(struct sysfs_dirent *sd); | 156 | struct inode *sysfs_get_inode(struct sysfs_dirent *sd); |
149 | void sysfs_delete_inode(struct inode *inode); | 157 | void sysfs_delete_inode(struct inode *inode); |
150 | int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); | 158 | int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); |
159 | int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, | ||
160 | size_t size, int flags); | ||
151 | int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); | 161 | int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); |
152 | int sysfs_inode_init(void); | 162 | int sysfs_inode_init(void); |
153 | 163 | ||
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index eaf6d891d46f..1c8991b0db13 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -65,26 +65,14 @@ | |||
65 | static int shrink_liability(struct ubifs_info *c, int nr_to_write) | 65 | static int shrink_liability(struct ubifs_info *c, int nr_to_write) |
66 | { | 66 | { |
67 | int nr_written; | 67 | int nr_written; |
68 | struct writeback_control wbc = { | ||
69 | .sync_mode = WB_SYNC_NONE, | ||
70 | .range_end = LLONG_MAX, | ||
71 | .nr_to_write = nr_to_write, | ||
72 | }; | ||
73 | |||
74 | generic_sync_sb_inodes(c->vfs_sb, &wbc); | ||
75 | nr_written = nr_to_write - wbc.nr_to_write; | ||
76 | 68 | ||
69 | nr_written = writeback_inodes_sb(c->vfs_sb); | ||
77 | if (!nr_written) { | 70 | if (!nr_written) { |
78 | /* | 71 | /* |
79 | * Re-try again but wait on pages/inodes which are being | 72 | * Re-try again but wait on pages/inodes which are being |
80 | * written-back concurrently (e.g., by pdflush). | 73 | * written-back concurrently (e.g., by pdflush). |
81 | */ | 74 | */ |
82 | memset(&wbc, 0, sizeof(struct writeback_control)); | 75 | nr_written = sync_inodes_sb(c->vfs_sb); |
83 | wbc.sync_mode = WB_SYNC_ALL; | ||
84 | wbc.range_end = LLONG_MAX; | ||
85 | wbc.nr_to_write = nr_to_write; | ||
86 | generic_sync_sb_inodes(c->vfs_sb, &wbc); | ||
87 | nr_written = nr_to_write - wbc.nr_to_write; | ||
88 | } | 76 | } |
89 | 77 | ||
90 | dbg_budg("%d pages were written back", nr_written); | 78 | dbg_budg("%d pages were written back", nr_written); |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 26d2e0d80465..51763aa8f4de 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -438,12 +438,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) | |||
438 | { | 438 | { |
439 | int i, err; | 439 | int i, err; |
440 | struct ubifs_info *c = sb->s_fs_info; | 440 | struct ubifs_info *c = sb->s_fs_info; |
441 | struct writeback_control wbc = { | ||
442 | .sync_mode = WB_SYNC_ALL, | ||
443 | .range_start = 0, | ||
444 | .range_end = LLONG_MAX, | ||
445 | .nr_to_write = LONG_MAX, | ||
446 | }; | ||
447 | 441 | ||
448 | /* | 442 | /* |
449 | * Zero @wait is just an advisory thing to help the file system shove | 443 | * Zero @wait is just an advisory thing to help the file system shove |
@@ -462,7 +456,7 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) | |||
462 | * the user be able to get more accurate results of 'statfs()' after | 456 | * the user be able to get more accurate results of 'statfs()' after |
463 | * they synchronize the file system. | 457 | * they synchronize the file system. |
464 | */ | 458 | */ |
465 | generic_sync_sb_inodes(sb, &wbc); | 459 | sync_inodes_sb(sb); |
466 | 460 | ||
467 | /* | 461 | /* |
468 | * Synchronize write buffers, because 'ubifs_run_commit()' does not | 462 | * Synchronize write buffers, because 'ubifs_run_commit()' does not |
@@ -1971,6 +1965,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) | |||
1971 | * | 1965 | * |
1972 | * Read-ahead will be disabled because @c->bdi.ra_pages is 0. | 1966 | * Read-ahead will be disabled because @c->bdi.ra_pages is 0. |
1973 | */ | 1967 | */ |
1968 | c->bdi.name = "ubifs", | ||
1974 | c->bdi.capabilities = BDI_CAP_MAP_COPY; | 1969 | c->bdi.capabilities = BDI_CAP_MAP_COPY; |
1975 | c->bdi.unplug_io_fn = default_unplug_io_fn; | 1970 | c->bdi.unplug_io_fn = default_unplug_io_fn; |
1976 | err = bdi_init(&c->bdi); | 1971 | err = bdi_init(&c->bdi); |
diff --git a/fs/xattr.c b/fs/xattr.c index 1c3d0af59ddf..6d4f6d3449fb 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -66,22 +66,28 @@ xattr_permission(struct inode *inode, const char *name, int mask) | |||
66 | return inode_permission(inode, mask); | 66 | return inode_permission(inode, mask); |
67 | } | 67 | } |
68 | 68 | ||
69 | int | 69 | /** |
70 | vfs_setxattr(struct dentry *dentry, const char *name, const void *value, | 70 | * __vfs_setxattr_noperm - perform setxattr operation without performing |
71 | size_t size, int flags) | 71 | * permission checks. |
72 | * | ||
73 | * @dentry - object to perform setxattr on | ||
74 | * @name - xattr name to set | ||
75 | * @value - value to set @name to | ||
76 | * @size - size of @value | ||
77 | * @flags - flags to pass into filesystem operations | ||
78 | * | ||
79 | * returns the result of the internal setxattr or setsecurity operations. | ||
80 | * | ||
81 | * This function requires the caller to lock the inode's i_mutex before it | ||
82 | * is executed. It also assumes that the caller will make the appropriate | ||
83 | * permission checks. | ||
84 | */ | ||
85 | int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, | ||
86 | const void *value, size_t size, int flags) | ||
72 | { | 87 | { |
73 | struct inode *inode = dentry->d_inode; | 88 | struct inode *inode = dentry->d_inode; |
74 | int error; | 89 | int error = -EOPNOTSUPP; |
75 | |||
76 | error = xattr_permission(inode, name, MAY_WRITE); | ||
77 | if (error) | ||
78 | return error; | ||
79 | 90 | ||
80 | mutex_lock(&inode->i_mutex); | ||
81 | error = security_inode_setxattr(dentry, name, value, size, flags); | ||
82 | if (error) | ||
83 | goto out; | ||
84 | error = -EOPNOTSUPP; | ||
85 | if (inode->i_op->setxattr) { | 91 | if (inode->i_op->setxattr) { |
86 | error = inode->i_op->setxattr(dentry, name, value, size, flags); | 92 | error = inode->i_op->setxattr(dentry, name, value, size, flags); |
87 | if (!error) { | 93 | if (!error) { |
@@ -97,6 +103,29 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value, | |||
97 | if (!error) | 103 | if (!error) |
98 | fsnotify_xattr(dentry); | 104 | fsnotify_xattr(dentry); |
99 | } | 105 | } |
106 | |||
107 | return error; | ||
108 | } | ||
109 | |||
110 | |||
111 | int | ||
112 | vfs_setxattr(struct dentry *dentry, const char *name, const void *value, | ||
113 | size_t size, int flags) | ||
114 | { | ||
115 | struct inode *inode = dentry->d_inode; | ||
116 | int error; | ||
117 | |||
118 | error = xattr_permission(inode, name, MAY_WRITE); | ||
119 | if (error) | ||
120 | return error; | ||
121 | |||
122 | mutex_lock(&inode->i_mutex); | ||
123 | error = security_inode_setxattr(dentry, name, value, size, flags); | ||
124 | if (error) | ||
125 | goto out; | ||
126 | |||
127 | error = __vfs_setxattr_noperm(dentry, name, value, size, flags); | ||
128 | |||
100 | out: | 129 | out: |
101 | mutex_unlock(&inode->i_mutex); | 130 | mutex_unlock(&inode->i_mutex); |
102 | return error; | 131 | return error; |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 0882d166239a..eafcc7c18706 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c | |||
@@ -619,7 +619,7 @@ xfs_file_compat_ioctl( | |||
619 | case XFS_IOC_GETVERSION_32: | 619 | case XFS_IOC_GETVERSION_32: |
620 | cmd = _NATIVE_IOC(cmd, long); | 620 | cmd = _NATIVE_IOC(cmd, long); |
621 | return xfs_file_ioctl(filp, cmd, p); | 621 | return xfs_file_ioctl(filp, cmd, p); |
622 | case XFS_IOC_SWAPEXT: { | 622 | case XFS_IOC_SWAPEXT_32: { |
623 | struct xfs_swapext sxp; | 623 | struct xfs_swapext sxp; |
624 | struct compat_xfs_swapext __user *sxu = arg; | 624 | struct compat_xfs_swapext __user *sxu = arg; |
625 | 625 | ||
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 8070b34cc287..6c32f1d63d8c 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -485,14 +485,6 @@ xfs_vn_put_link( | |||
485 | } | 485 | } |
486 | 486 | ||
487 | STATIC int | 487 | STATIC int |
488 | xfs_vn_permission( | ||
489 | struct inode *inode, | ||
490 | int mask) | ||
491 | { | ||
492 | return generic_permission(inode, mask, xfs_check_acl); | ||
493 | } | ||
494 | |||
495 | STATIC int | ||
496 | xfs_vn_getattr( | 488 | xfs_vn_getattr( |
497 | struct vfsmount *mnt, | 489 | struct vfsmount *mnt, |
498 | struct dentry *dentry, | 490 | struct dentry *dentry, |
@@ -696,7 +688,7 @@ xfs_vn_fiemap( | |||
696 | } | 688 | } |
697 | 689 | ||
698 | static const struct inode_operations xfs_inode_operations = { | 690 | static const struct inode_operations xfs_inode_operations = { |
699 | .permission = xfs_vn_permission, | 691 | .check_acl = xfs_check_acl, |
700 | .truncate = xfs_vn_truncate, | 692 | .truncate = xfs_vn_truncate, |
701 | .getattr = xfs_vn_getattr, | 693 | .getattr = xfs_vn_getattr, |
702 | .setattr = xfs_vn_setattr, | 694 | .setattr = xfs_vn_setattr, |
@@ -724,7 +716,7 @@ static const struct inode_operations xfs_dir_inode_operations = { | |||
724 | .rmdir = xfs_vn_unlink, | 716 | .rmdir = xfs_vn_unlink, |
725 | .mknod = xfs_vn_mknod, | 717 | .mknod = xfs_vn_mknod, |
726 | .rename = xfs_vn_rename, | 718 | .rename = xfs_vn_rename, |
727 | .permission = xfs_vn_permission, | 719 | .check_acl = xfs_check_acl, |
728 | .getattr = xfs_vn_getattr, | 720 | .getattr = xfs_vn_getattr, |
729 | .setattr = xfs_vn_setattr, | 721 | .setattr = xfs_vn_setattr, |
730 | .setxattr = generic_setxattr, | 722 | .setxattr = generic_setxattr, |
@@ -749,7 +741,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { | |||
749 | .rmdir = xfs_vn_unlink, | 741 | .rmdir = xfs_vn_unlink, |
750 | .mknod = xfs_vn_mknod, | 742 | .mknod = xfs_vn_mknod, |
751 | .rename = xfs_vn_rename, | 743 | .rename = xfs_vn_rename, |
752 | .permission = xfs_vn_permission, | 744 | .check_acl = xfs_check_acl, |
753 | .getattr = xfs_vn_getattr, | 745 | .getattr = xfs_vn_getattr, |
754 | .setattr = xfs_vn_setattr, | 746 | .setattr = xfs_vn_setattr, |
755 | .setxattr = generic_setxattr, | 747 | .setxattr = generic_setxattr, |
@@ -762,7 +754,7 @@ static const struct inode_operations xfs_symlink_inode_operations = { | |||
762 | .readlink = generic_readlink, | 754 | .readlink = generic_readlink, |
763 | .follow_link = xfs_vn_follow_link, | 755 | .follow_link = xfs_vn_follow_link, |
764 | .put_link = xfs_vn_put_link, | 756 | .put_link = xfs_vn_put_link, |
765 | .permission = xfs_vn_permission, | 757 | .check_acl = xfs_check_acl, |
766 | .getattr = xfs_vn_getattr, | 758 | .getattr = xfs_vn_getattr, |
767 | .setattr = xfs_vn_setattr, | 759 | .setattr = xfs_vn_setattr, |
768 | .setxattr = generic_setxattr, | 760 | .setxattr = generic_setxattr, |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index b619d6b8ca43..98ef624d9baf 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -708,6 +708,16 @@ xfs_reclaim_inode( | |||
708 | return 0; | 708 | return 0; |
709 | } | 709 | } |
710 | 710 | ||
711 | void | ||
712 | __xfs_inode_set_reclaim_tag( | ||
713 | struct xfs_perag *pag, | ||
714 | struct xfs_inode *ip) | ||
715 | { | ||
716 | radix_tree_tag_set(&pag->pag_ici_root, | ||
717 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), | ||
718 | XFS_ICI_RECLAIM_TAG); | ||
719 | } | ||
720 | |||
711 | /* | 721 | /* |
712 | * We set the inode flag atomically with the radix tree tag. | 722 | * We set the inode flag atomically with the radix tree tag. |
713 | * Once we get tag lookups on the radix tree, this inode flag | 723 | * Once we get tag lookups on the radix tree, this inode flag |
@@ -722,8 +732,7 @@ xfs_inode_set_reclaim_tag( | |||
722 | 732 | ||
723 | read_lock(&pag->pag_ici_lock); | 733 | read_lock(&pag->pag_ici_lock); |
724 | spin_lock(&ip->i_flags_lock); | 734 | spin_lock(&ip->i_flags_lock); |
725 | radix_tree_tag_set(&pag->pag_ici_root, | 735 | __xfs_inode_set_reclaim_tag(pag, ip); |
726 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
727 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | 736 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); |
728 | spin_unlock(&ip->i_flags_lock); | 737 | spin_unlock(&ip->i_flags_lock); |
729 | read_unlock(&pag->pag_ici_lock); | 738 | read_unlock(&pag->pag_ici_lock); |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 2a10301c99c7..59120602588a 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -48,6 +48,7 @@ int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); | |||
48 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); | 48 | int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); |
49 | 49 | ||
50 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); | 50 | void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); |
51 | void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); | ||
51 | void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); | 52 | void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); |
52 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, | 53 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, |
53 | struct xfs_inode *ip); | 54 | struct xfs_inode *ip); |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 34ec86923f7e..ecbf8b4d2e2e 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -191,80 +191,82 @@ xfs_iget_cache_hit( | |||
191 | int flags, | 191 | int flags, |
192 | int lock_flags) __releases(pag->pag_ici_lock) | 192 | int lock_flags) __releases(pag->pag_ici_lock) |
193 | { | 193 | { |
194 | struct inode *inode = VFS_I(ip); | ||
194 | struct xfs_mount *mp = ip->i_mount; | 195 | struct xfs_mount *mp = ip->i_mount; |
195 | int error = EAGAIN; | 196 | int error; |
197 | |||
198 | spin_lock(&ip->i_flags_lock); | ||
196 | 199 | ||
197 | /* | 200 | /* |
198 | * If INEW is set this inode is being set up | 201 | * If we are racing with another cache hit that is currently |
199 | * If IRECLAIM is set this inode is being torn down | 202 | * instantiating this inode or currently recycling it out of |
200 | * Pause and try again. | 203 | * reclaimabe state, wait for the initialisation to complete |
204 | * before continuing. | ||
205 | * | ||
206 | * XXX(hch): eventually we should do something equivalent to | ||
207 | * wait_on_inode to wait for these flags to be cleared | ||
208 | * instead of polling for it. | ||
201 | */ | 209 | */ |
202 | if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) { | 210 | if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { |
203 | XFS_STATS_INC(xs_ig_frecycle); | 211 | XFS_STATS_INC(xs_ig_frecycle); |
212 | error = EAGAIN; | ||
204 | goto out_error; | 213 | goto out_error; |
205 | } | 214 | } |
206 | 215 | ||
207 | /* If IRECLAIMABLE is set, we've torn down the vfs inode part */ | 216 | /* |
208 | if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { | 217 | * If lookup is racing with unlink return an error immediately. |
209 | 218 | */ | |
210 | /* | 219 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { |
211 | * If lookup is racing with unlink, then we should return an | 220 | error = ENOENT; |
212 | * error immediately so we don't remove it from the reclaim | 221 | goto out_error; |
213 | * list and potentially leak the inode. | 222 | } |
214 | */ | ||
215 | if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { | ||
216 | error = ENOENT; | ||
217 | goto out_error; | ||
218 | } | ||
219 | 223 | ||
224 | /* | ||
225 | * If IRECLAIMABLE is set, we've torn down the VFS inode already. | ||
226 | * Need to carefully get it back into useable state. | ||
227 | */ | ||
228 | if (ip->i_flags & XFS_IRECLAIMABLE) { | ||
220 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); | 229 | xfs_itrace_exit_tag(ip, "xfs_iget.alloc"); |
221 | 230 | ||
222 | /* | 231 | /* |
223 | * We need to re-initialise the VFS inode as it has been | 232 | * We need to set XFS_INEW atomically with clearing the |
224 | * 'freed' by the VFS. Do this here so we can deal with | 233 | * reclaimable tag so that we do have an indicator of the |
225 | * errors cleanly, then tag it so it can be set up correctly | 234 | * inode still being initialized. |
226 | * later. | ||
227 | */ | 235 | */ |
228 | if (inode_init_always(mp->m_super, VFS_I(ip))) { | 236 | ip->i_flags |= XFS_INEW; |
229 | error = ENOMEM; | 237 | ip->i_flags &= ~XFS_IRECLAIMABLE; |
230 | goto out_error; | 238 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); |
231 | } | ||
232 | 239 | ||
233 | /* | 240 | spin_unlock(&ip->i_flags_lock); |
234 | * We must set the XFS_INEW flag before clearing the | 241 | read_unlock(&pag->pag_ici_lock); |
235 | * XFS_IRECLAIMABLE flag so that if a racing lookup does | ||
236 | * not find the XFS_IRECLAIMABLE above but has the igrab() | ||
237 | * below succeed we can safely check XFS_INEW to detect | ||
238 | * that this inode is still being initialised. | ||
239 | */ | ||
240 | xfs_iflags_set(ip, XFS_INEW); | ||
241 | xfs_iflags_clear(ip, XFS_IRECLAIMABLE); | ||
242 | 242 | ||
243 | /* clear the radix tree reclaim flag as well. */ | 243 | error = -inode_init_always(mp->m_super, inode); |
244 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 244 | if (error) { |
245 | } else if (!igrab(VFS_I(ip))) { | 245 | /* |
246 | * Re-initializing the inode failed, and we are in deep | ||
247 | * trouble. Try to re-add it to the reclaim list. | ||
248 | */ | ||
249 | read_lock(&pag->pag_ici_lock); | ||
250 | spin_lock(&ip->i_flags_lock); | ||
251 | |||
252 | ip->i_flags &= ~XFS_INEW; | ||
253 | ip->i_flags |= XFS_IRECLAIMABLE; | ||
254 | __xfs_inode_set_reclaim_tag(pag, ip); | ||
255 | goto out_error; | ||
256 | } | ||
257 | inode->i_state = I_LOCK|I_NEW; | ||
258 | } else { | ||
246 | /* If the VFS inode is being torn down, pause and try again. */ | 259 | /* If the VFS inode is being torn down, pause and try again. */ |
247 | XFS_STATS_INC(xs_ig_frecycle); | 260 | if (!igrab(inode)) { |
248 | goto out_error; | 261 | error = EAGAIN; |
249 | } else if (xfs_iflags_test(ip, XFS_INEW)) { | 262 | goto out_error; |
250 | /* | 263 | } |
251 | * We are racing with another cache hit that is | ||
252 | * currently recycling this inode out of the XFS_IRECLAIMABLE | ||
253 | * state. Wait for the initialisation to complete before | ||
254 | * continuing. | ||
255 | */ | ||
256 | wait_on_inode(VFS_I(ip)); | ||
257 | } | ||
258 | 264 | ||
259 | if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) { | 265 | /* We've got a live one. */ |
260 | error = ENOENT; | 266 | spin_unlock(&ip->i_flags_lock); |
261 | iput(VFS_I(ip)); | 267 | read_unlock(&pag->pag_ici_lock); |
262 | goto out_error; | ||
263 | } | 268 | } |
264 | 269 | ||
265 | /* We've got a live one. */ | ||
266 | read_unlock(&pag->pag_ici_lock); | ||
267 | |||
268 | if (lock_flags != 0) | 270 | if (lock_flags != 0) |
269 | xfs_ilock(ip, lock_flags); | 271 | xfs_ilock(ip, lock_flags); |
270 | 272 | ||
@@ -274,6 +276,7 @@ xfs_iget_cache_hit( | |||
274 | return 0; | 276 | return 0; |
275 | 277 | ||
276 | out_error: | 278 | out_error: |
279 | spin_unlock(&ip->i_flags_lock); | ||
277 | read_unlock(&pag->pag_ici_lock); | 280 | read_unlock(&pag->pag_ici_lock); |
278 | return error; | 281 | return error; |
279 | } | 282 | } |