diff options
author | Nick Piggin <npiggin@suse.de> | 2009-01-04 15:00:53 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-04 16:33:20 -0500 |
commit | 54566b2c1594c2326a645a3551f9d989f7ba3c5e (patch) | |
tree | b373f3283fe5e197d0df29cd6b645c35adf1076c /fs/namei.c | |
parent | e687d691cb3790d25e31c74f5941fd7c565e9df5 (diff) |
fs: symlink write_begin allocation context fix
With the write_begin/write_end aops, page_symlink was broken because it
could no longer pass a GFP_NOFS type mask into the point where the
allocations happened. They are done in write_begin, which would always
assume that the filesystem can be entered from reclaim. This bug could
cause filesystem deadlocks.
The funny thing with having a gfp_t mask there is that it doesn't really
allow the caller to arbitrarily tinker with the context in which it can be
called. It couldn't ever be GFP_ATOMIC, for example, because it needs to
take the page lock. The only thing any callers care about is __GFP_FS
anyway, so turn that into a single flag.
Add a new flag for write_begin, AOP_FLAG_NOFS. Filesystems can now act on
this flag in their write_begin function. Change __grab_cache_page to
accept a nofs argument as well, to honour that flag (while we're there,
change the name to grab_cache_page_write_begin which is more instructive
and does away with random leading underscores).
This is really a more flexible way to go in the end anyway -- if a
filesystem happens to want any extra allocations aside from the pagecache
ones in ints write_begin function, it may now use GFP_KERNEL (rather than
GFP_NOFS) for common case allocations (eg. ocfs2_alloc_write_ctxt, for a
random example).
[kosaki.motohiro@jp.fujitsu.com: fix ubifs]
[kosaki.motohiro@jp.fujitsu.com: fix fuse]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: <stable@kernel.org> [2.6.28.x]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ Cleaned up the calling convention: just pass in the AOP flags
untouched to the grab_cache_page_write_begin() function. That
just simplifies everybody, and may even allow future expansion of the
logic. - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/fs/namei.c b/fs/namei.c index dd5c9f0bf829..df2d3df4f049 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -2817,18 +2817,23 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) | |||
2817 | } | 2817 | } |
2818 | } | 2818 | } |
2819 | 2819 | ||
2820 | int __page_symlink(struct inode *inode, const char *symname, int len, | 2820 | /* |
2821 | gfp_t gfp_mask) | 2821 | * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS |
2822 | */ | ||
2823 | int __page_symlink(struct inode *inode, const char *symname, int len, int nofs) | ||
2822 | { | 2824 | { |
2823 | struct address_space *mapping = inode->i_mapping; | 2825 | struct address_space *mapping = inode->i_mapping; |
2824 | struct page *page; | 2826 | struct page *page; |
2825 | void *fsdata; | 2827 | void *fsdata; |
2826 | int err; | 2828 | int err; |
2827 | char *kaddr; | 2829 | char *kaddr; |
2830 | unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE; | ||
2831 | if (nofs) | ||
2832 | flags |= AOP_FLAG_NOFS; | ||
2828 | 2833 | ||
2829 | retry: | 2834 | retry: |
2830 | err = pagecache_write_begin(NULL, mapping, 0, len-1, | 2835 | err = pagecache_write_begin(NULL, mapping, 0, len-1, |
2831 | AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); | 2836 | flags, &page, &fsdata); |
2832 | if (err) | 2837 | if (err) |
2833 | goto fail; | 2838 | goto fail; |
2834 | 2839 | ||
@@ -2852,7 +2857,7 @@ fail: | |||
2852 | int page_symlink(struct inode *inode, const char *symname, int len) | 2857 | int page_symlink(struct inode *inode, const char *symname, int len) |
2853 | { | 2858 | { |
2854 | return __page_symlink(inode, symname, len, | 2859 | return __page_symlink(inode, symname, len, |
2855 | mapping_gfp_mask(inode->i_mapping)); | 2860 | !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS)); |
2856 | } | 2861 | } |
2857 | 2862 | ||
2858 | const struct inode_operations page_symlink_inode_operations = { | 2863 | const struct inode_operations page_symlink_inode_operations = { |