From 54566b2c1594c2326a645a3551f9d989f7ba3c5e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sun, 4 Jan 2009 12:00:53 -0800 Subject: fs: symlink write_begin allocation context fix With the write_begin/write_end aops, page_symlink was broken because it could no longer pass a GFP_NOFS type mask into the point where the allocations happened. They are done in write_begin, which would always assume that the filesystem can be entered from reclaim. This bug could cause filesystem deadlocks. The funny thing with having a gfp_t mask there is that it doesn't really allow the caller to arbitrarily tinker with the context in which it can be called. It couldn't ever be GFP_ATOMIC, for example, because it needs to take the page lock. The only thing any callers care about is __GFP_FS anyway, so turn that into a single flag. Add a new flag for write_begin, AOP_FLAG_NOFS. Filesystems can now act on this flag in their write_begin function. Change __grab_cache_page to accept a nofs argument as well, to honour that flag (while we're there, change the name to grab_cache_page_write_begin which is more instructive and does away with random leading underscores). This is really a more flexible way to go in the end anyway -- if a filesystem happens to want any extra allocations aside from the pagecache ones in ints write_begin function, it may now use GFP_KERNEL (rather than GFP_NOFS) for common case allocations (eg. ocfs2_alloc_write_ctxt, for a random example). [kosaki.motohiro@jp.fujitsu.com: fix ubifs] [kosaki.motohiro@jp.fujitsu.com: fix fuse] Signed-off-by: Nick Piggin Reviewed-by: KOSAKI Motohiro Cc: [2.6.28.x] Signed-off-by: KOSAKI Motohiro Signed-off-by: Andrew Morton [ Cleaned up the calling convention: just pass in the AOP flags untouched to the grab_cache_page_write_begin() function. That just simplifies everybody, and may even allow future expansion of the logic. - Linus ] Signed-off-by: Linus Torvalds --- fs/namei.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index dd5c9f0bf829..df2d3df4f049 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2817,18 +2817,23 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) } } -int __page_symlink(struct inode *inode, const char *symname, int len, - gfp_t gfp_mask) +/* + * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS + */ +int __page_symlink(struct inode *inode, const char *symname, int len, int nofs) { struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; int err; char *kaddr; + unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE; + if (nofs) + flags |= AOP_FLAG_NOFS; retry: err = pagecache_write_begin(NULL, mapping, 0, len-1, - AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); + flags, &page, &fsdata); if (err) goto fail; @@ -2852,7 +2857,7 @@ fail: int page_symlink(struct inode *inode, const char *symname, int len) { return __page_symlink(inode, symname, len, - mapping_gfp_mask(inode->i_mapping)); + !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS)); } const struct inode_operations page_symlink_inode_operations = { -- cgit v1.2.2 From acfa4380efe77e290d3a96b11cd4c9f24f4fbb18 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 4 Dec 2008 10:06:33 -0500 Subject: inode->i_op is never NULL We used to have rather schizophrenic set of checks for NULL ->i_op even though it had been eliminated years ago. You'd need to go out of your way to set it to NULL explicitly _and_ a bunch of code would die on such inodes anyway. After killing two remaining places that still did that bogosity, all that crap can go away. Signed-off-by: Al Viro --- fs/namei.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) (limited to 'fs/namei.c') diff --git a/fs/namei.c b/fs/namei.c index dd5c9f0bf829..1f6656c3d1b9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -257,7 +257,7 @@ int inode_permission(struct inode *inode, int mask) return -EACCES; } - if (inode->i_op && inode->i_op->permission) + if (inode->i_op->permission) retval = inode->i_op->permission(inode, mask); else retval = generic_permission(inode, mask, NULL); @@ -432,7 +432,7 @@ static int exec_permission_lite(struct inode *inode) { umode_t mode = inode->i_mode; - if (inode->i_op && inode->i_op->permission) + if (inode->i_op->permission) return -EAGAIN; if (current_fsuid() == inode->i_uid) @@ -908,9 +908,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd) inode = next.dentry->d_inode; if (!inode) goto out_dput; - err = -ENOTDIR; - if (!inode->i_op) - goto out_dput; if (inode->i_op->follow_link) { err = do_follow_link(&next, nd); @@ -920,9 +917,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd) inode = nd->path.dentry->d_inode; if (!inode) break; - err = -ENOTDIR; - if (!inode->i_op) - break; } else path_to_nameidata(&next, nd); err = -ENOTDIR; @@ -961,7 +955,7 @@ last_component: break; inode = next.dentry->d_inode; if ((lookup_flags & LOOKUP_FOLLOW) - && inode && inode->i_op && inode->i_op->follow_link) { + && inode && inode->i_op->follow_link) { err = do_follow_link(&next, nd); if (err) goto return_err; @@ -973,7 +967,7 @@ last_component: break; if (lookup_flags & LOOKUP_DIRECTORY) { err = -ENOTDIR; - if (!inode->i_op || !inode->i_op->lookup) + if (!inode->i_op->lookup) break; } goto return_base; @@ -1469,7 +1463,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, if (error) return error; - if (!dir->i_op || !dir->i_op->create) + if (!dir->i_op->create) return -EACCES; /* shouldn't it be ENOSYS? */ mode &= S_IALLUGO; mode |= S_IFREG; @@ -1752,7 +1746,7 @@ do_last: error = -ENOENT; if (!path.dentry->d_inode) goto exit_dput; - if (path.dentry->d_inode->i_op && path.dentry->d_inode->i_op->follow_link) + if (path.dentry->d_inode->i_op->follow_link) goto do_link; path_to_nameidata(&path, &nd); @@ -1933,7 +1927,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) return -EPERM; - if (!dir->i_op || !dir->i_op->mknod) + if (!dir->i_op->mknod) return -EPERM; error = devcgroup_inode_mknod(mode, dev); @@ -2035,7 +2029,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (error) return error; - if (!dir->i_op || !dir->i_op->mkdir) + if (!dir->i_op->mkdir) return -EPERM; mode &= (S_IRWXUGO|S_ISVTX); @@ -2126,7 +2120,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) if (error) return error; - if (!dir->i_op || !dir->i_op->rmdir) + if (!dir->i_op->rmdir) return -EPERM; DQUOT_INIT(dir); @@ -2213,7 +2207,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) if (error) return error; - if (!dir->i_op || !dir->i_op->unlink) + if (!dir->i_op->unlink) return -EPERM; DQUOT_INIT(dir); @@ -2320,7 +2314,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) if (error) return error; - if (!dir->i_op || !dir->i_op->symlink) + if (!dir->i_op->symlink) return -EPERM; error = security_inode_symlink(dir, dentry, oldname); @@ -2401,7 +2395,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de */ if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return -EPERM; - if (!dir->i_op || !dir->i_op->link) + if (!dir->i_op->link) return -EPERM; if (S_ISDIR(inode->i_mode)) return -EPERM; @@ -2608,7 +2602,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (error) return error; - if (!old_dir->i_op || !old_dir->i_op->rename) + if (!old_dir->i_op->rename) return -EPERM; DQUOT_INIT(old_dir); -- cgit v1.2.2