diff options
author | Nick Piggin <npiggin@suse.de> | 2009-01-04 15:00:53 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-04 16:33:20 -0500 |
commit | 54566b2c1594c2326a645a3551f9d989f7ba3c5e (patch) | |
tree | b373f3283fe5e197d0df29cd6b645c35adf1076c /mm | |
parent | e687d691cb3790d25e31c74f5941fd7c565e9df5 (diff) |
fs: symlink write_begin allocation context fix
With the write_begin/write_end aops, page_symlink was broken because it
could no longer pass a GFP_NOFS type mask into the point where the
allocations happened. They are done in write_begin, which would always
assume that the filesystem can be entered from reclaim. This bug could
cause filesystem deadlocks.
The funny thing with having a gfp_t mask there is that it doesn't really
allow the caller to arbitrarily tinker with the context in which it can be
called. It couldn't ever be GFP_ATOMIC, for example, because it needs to
take the page lock. The only thing any callers care about is __GFP_FS
anyway, so turn that into a single flag.
Add a new flag for write_begin, AOP_FLAG_NOFS. Filesystems can now act on
this flag in their write_begin function. Change __grab_cache_page to
accept a nofs argument as well, to honour that flag (while we're there,
change the name to grab_cache_page_write_begin which is more instructive
and does away with random leading underscores).
This is really a more flexible way to go in the end anyway -- if a
filesystem happens to want any extra allocations aside from the pagecache
ones in ints write_begin function, it may now use GFP_KERNEL (rather than
GFP_NOFS) for common case allocations (eg. ocfs2_alloc_write_ctxt, for a
random example).
[kosaki.motohiro@jp.fujitsu.com: fix ubifs]
[kosaki.motohiro@jp.fujitsu.com: fix fuse]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: <stable@kernel.org> [2.6.28.x]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ Cleaned up the calling convention: just pass in the AOP flags
untouched to the grab_cache_page_write_begin() function. That
just simplifies everybody, and may even allow future expansion of the
logic. - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index f3e5f8944d17..f8c69273c37f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -2140,19 +2140,24 @@ EXPORT_SYMBOL(generic_file_direct_write); | |||
2140 | * Find or create a page at the given pagecache position. Return the locked | 2140 | * Find or create a page at the given pagecache position. Return the locked |
2141 | * page. This function is specifically for buffered writes. | 2141 | * page. This function is specifically for buffered writes. |
2142 | */ | 2142 | */ |
2143 | struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index) | 2143 | struct page *grab_cache_page_write_begin(struct address_space *mapping, |
2144 | pgoff_t index, unsigned flags) | ||
2144 | { | 2145 | { |
2145 | int status; | 2146 | int status; |
2146 | struct page *page; | 2147 | struct page *page; |
2148 | gfp_t gfp_notmask = 0; | ||
2149 | if (flags & AOP_FLAG_NOFS) | ||
2150 | gfp_notmask = __GFP_FS; | ||
2147 | repeat: | 2151 | repeat: |
2148 | page = find_lock_page(mapping, index); | 2152 | page = find_lock_page(mapping, index); |
2149 | if (likely(page)) | 2153 | if (likely(page)) |
2150 | return page; | 2154 | return page; |
2151 | 2155 | ||
2152 | page = page_cache_alloc(mapping); | 2156 | page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask); |
2153 | if (!page) | 2157 | if (!page) |
2154 | return NULL; | 2158 | return NULL; |
2155 | status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); | 2159 | status = add_to_page_cache_lru(page, mapping, index, |
2160 | GFP_KERNEL & ~gfp_notmask); | ||
2156 | if (unlikely(status)) { | 2161 | if (unlikely(status)) { |
2157 | page_cache_release(page); | 2162 | page_cache_release(page); |
2158 | if (status == -EEXIST) | 2163 | if (status == -EEXIST) |
@@ -2161,7 +2166,7 @@ repeat: | |||
2161 | } | 2166 | } |
2162 | return page; | 2167 | return page; |
2163 | } | 2168 | } |
2164 | EXPORT_SYMBOL(__grab_cache_page); | 2169 | EXPORT_SYMBOL(grab_cache_page_write_begin); |
2165 | 2170 | ||
2166 | static ssize_t generic_perform_write(struct file *file, | 2171 | static ssize_t generic_perform_write(struct file *file, |
2167 | struct iov_iter *i, loff_t pos) | 2172 | struct iov_iter *i, loff_t pos) |