diff options
author | Nick Piggin <npiggin@suse.de> | 2009-01-04 15:00:53 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-04 16:33:20 -0500 |
commit | 54566b2c1594c2326a645a3551f9d989f7ba3c5e (patch) | |
tree | b373f3283fe5e197d0df29cd6b645c35adf1076c /include | |
parent | e687d691cb3790d25e31c74f5941fd7c565e9df5 (diff) |
fs: symlink write_begin allocation context fix
With the write_begin/write_end aops, page_symlink was broken because it
could no longer pass a GFP_NOFS type mask into the point where the
allocations happened. They are done in write_begin, which would always
assume that the filesystem can be entered from reclaim. This bug could
cause filesystem deadlocks.
The funny thing with having a gfp_t mask there is that it doesn't really
allow the caller to arbitrarily tinker with the context in which it can be
called. It couldn't ever be GFP_ATOMIC, for example, because it needs to
take the page lock. The only thing any callers care about is __GFP_FS
anyway, so turn that into a single flag.
Add a new flag for write_begin, AOP_FLAG_NOFS. Filesystems can now act on
this flag in their write_begin function. Change __grab_cache_page to
accept a nofs argument as well, to honour that flag (while we're there,
change the name to grab_cache_page_write_begin which is more instructive
and does away with random leading underscores).
This is really a more flexible way to go in the end anyway -- if a
filesystem happens to want any extra allocations aside from the pagecache
ones in ints write_begin function, it may now use GFP_KERNEL (rather than
GFP_NOFS) for common case allocations (eg. ocfs2_alloc_write_ctxt, for a
random example).
[kosaki.motohiro@jp.fujitsu.com: fix ubifs]
[kosaki.motohiro@jp.fujitsu.com: fix fuse]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: <stable@kernel.org> [2.6.28.x]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ Cleaned up the calling convention: just pass in the AOP flags
untouched to the grab_cache_page_write_begin() function. That
just simplifies everybody, and may even allow future expansion of the
logic. - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/fs.h | 5 | ||||
-rw-r--r-- | include/linux/pagemap.h | 3 |
2 files changed, 6 insertions, 2 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index e2170ee21e18..f2a3010140e3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -423,6 +423,9 @@ enum positive_aop_returns { | |||
423 | 423 | ||
424 | #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ | 424 | #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ |
425 | #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ | 425 | #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ |
426 | #define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct | ||
427 | * helper code (eg buffer layer) | ||
428 | * to clear GFP_FS from alloc */ | ||
426 | 429 | ||
427 | /* | 430 | /* |
428 | * oh the beauties of C type declarations. | 431 | * oh the beauties of C type declarations. |
@@ -2035,7 +2038,7 @@ extern int page_readlink(struct dentry *, char __user *, int); | |||
2035 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); | 2038 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); |
2036 | extern void page_put_link(struct dentry *, struct nameidata *, void *); | 2039 | extern void page_put_link(struct dentry *, struct nameidata *, void *); |
2037 | extern int __page_symlink(struct inode *inode, const char *symname, int len, | 2040 | extern int __page_symlink(struct inode *inode, const char *symname, int len, |
2038 | gfp_t gfp_mask); | 2041 | int nofs); |
2039 | extern int page_symlink(struct inode *inode, const char *symname, int len); | 2042 | extern int page_symlink(struct inode *inode, const char *symname, int len); |
2040 | extern const struct inode_operations page_symlink_inode_operations; | 2043 | extern const struct inode_operations page_symlink_inode_operations; |
2041 | extern int generic_readlink(struct dentry *, char __user *, int); | 2044 | extern int generic_readlink(struct dentry *, char __user *, int); |
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 709742be02f0..01ca0856caff 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h | |||
@@ -241,7 +241,8 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, | |||
241 | unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, | 241 | unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, |
242 | int tag, unsigned int nr_pages, struct page **pages); | 242 | int tag, unsigned int nr_pages, struct page **pages); |
243 | 243 | ||
244 | struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index); | 244 | struct page *grab_cache_page_write_begin(struct address_space *mapping, |
245 | pgoff_t index, unsigned flags); | ||
245 | 246 | ||
246 | /* | 247 | /* |
247 | * Returns locked page at given index in given cache, creating it if needed. | 248 | * Returns locked page at given index in given cache, creating it if needed. |