aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2007-01-11 02:15:39 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2007-01-11 21:18:21 -0500
commite3db7691e9f3dff3289f64e3d98583e28afe03db (patch)
treee05542d8d8bb545545c5b535381a8c1fcb369a03
parent07031e14c1127fc7e1a5b98dfcc59f434e025104 (diff)
[PATCH] NFS: Fix race in nfs_release_page()
NFS: Fix race in nfs_release_page() invalidate_inode_pages2() may find the dirty bit has been set on a page owing to the fact that the page may still be mapped after it was locked. Only after the call to unmap_mapping_range() are we sure that the page can no longer be dirtied. In order to fix this, NFS has hooked the releasepage() method and tries to write the page out between the call to unmap_mapping_range() and the call to remove_mapping(). This, however leads to deadlocks in the page reclaim code, where the page may be locked without holding a reference to the inode or dentry. Fix is to add a new address_space_operation, launder_page(), which will attempt to write out a dirty page without releasing the page lock. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> Also, the bare SetPageDirty() can skew all sort of accounting leading to other nasties. [akpm@osdl.org: cleanup] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--Documentation/filesystems/Locking8
-rw-r--r--fs/nfs/file.c16
-rw-r--r--include/linux/fs.h1
-rw-r--r--mm/truncate.c12
4 files changed, 28 insertions, 9 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 790ef6fbe495..28bfea75bcf2 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -171,6 +171,7 @@ prototypes:
171 int (*releasepage) (struct page *, int); 171 int (*releasepage) (struct page *, int);
172 int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, 172 int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
173 loff_t offset, unsigned long nr_segs); 173 loff_t offset, unsigned long nr_segs);
174 int (*launder_page) (struct page *);
174 175
175locking rules: 176locking rules:
176 All except set_page_dirty may block 177 All except set_page_dirty may block
@@ -188,6 +189,7 @@ bmap: yes
188invalidatepage: no yes 189invalidatepage: no yes
189releasepage: no yes 190releasepage: no yes
190direct_IO: no 191direct_IO: no
192launder_page: no yes
191 193
192 ->prepare_write(), ->commit_write(), ->sync_page() and ->readpage() 194 ->prepare_write(), ->commit_write(), ->sync_page() and ->readpage()
193may be called from the request handler (/dev/loop). 195may be called from the request handler (/dev/loop).
@@ -281,6 +283,12 @@ buffers from the page in preparation for freeing it. It returns zero to
281indicate that the buffers are (or may be) freeable. If ->releasepage is zero, 283indicate that the buffers are (or may be) freeable. If ->releasepage is zero,
282the kernel assumes that the fs has no private interest in the buffers. 284the kernel assumes that the fs has no private interest in the buffers.
283 285
286 ->launder_page() may be called prior to releasing a page if
287it is still found to be dirty. It returns zero if the page was successfully
288cleaned, or an error value if not. Note that in order to prevent the page
289getting mapped back in and redirtied, it needs to be kept locked
290across the entire operation.
291
284 Note: currently almost all instances of address_space methods are 292 Note: currently almost all instances of address_space methods are
285using BKL for internal serialization and that's one of the worst sources 293using BKL for internal serialization and that's one of the worst sources
286of contention. Normally they are calling library functions (in fs/buffer.c) 294of contention. Normally they are calling library functions (in fs/buffer.c)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0dd6be346aa7..fab20d06d936 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -315,14 +315,13 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
315 315
316static int nfs_release_page(struct page *page, gfp_t gfp) 316static int nfs_release_page(struct page *page, gfp_t gfp)
317{ 317{
318 /* 318 /* If PagePrivate() is set, then the page is not freeable */
319 * Avoid deadlock on nfs_wait_on_request(). 319 return 0;
320 */ 320}
321 if (!(gfp & __GFP_FS)) 321
322 return 0; 322static int nfs_launder_page(struct page *page)
323 /* Hack... Force nfs_wb_page() to write out the page */ 323{
324 SetPageDirty(page); 324 return nfs_wb_page(page->mapping->host, page);
325 return !nfs_wb_page(page->mapping->host, page);
326} 325}
327 326
328const struct address_space_operations nfs_file_aops = { 327const struct address_space_operations nfs_file_aops = {
@@ -338,6 +337,7 @@ const struct address_space_operations nfs_file_aops = {
338#ifdef CONFIG_NFS_DIRECTIO 337#ifdef CONFIG_NFS_DIRECTIO
339 .direct_IO = nfs_direct_IO, 338 .direct_IO = nfs_direct_IO,
340#endif 339#endif
340 .launder_page = nfs_launder_page,
341}; 341};
342 342
343static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, 343static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 186da813541e..14a337cc3464 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -426,6 +426,7 @@ struct address_space_operations {
426 /* migrate the contents of a page to the specified target */ 426 /* migrate the contents of a page to the specified target */
427 int (*migratepage) (struct address_space *, 427 int (*migratepage) (struct address_space *,
428 struct page *, struct page *); 428 struct page *, struct page *);
429 int (*launder_page) (struct page *);
429}; 430};
430 431
431struct backing_dev_info; 432struct backing_dev_info;
diff --git a/mm/truncate.c b/mm/truncate.c
index ecdfdcc50522..6c79ca4a1ca7 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -341,6 +341,15 @@ failed:
341 return 0; 341 return 0;
342} 342}
343 343
344static int do_launder_page(struct address_space *mapping, struct page *page)
345{
346 if (!PageDirty(page))
347 return 0;
348 if (page->mapping != mapping || mapping->a_ops->launder_page == NULL)
349 return 0;
350 return mapping->a_ops->launder_page(page);
351}
352
344/** 353/**
345 * invalidate_inode_pages2_range - remove range of pages from an address_space 354 * invalidate_inode_pages2_range - remove range of pages from an address_space
346 * @mapping: the address_space 355 * @mapping: the address_space
@@ -405,7 +414,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
405 PAGE_CACHE_SIZE, 0); 414 PAGE_CACHE_SIZE, 0);
406 } 415 }
407 } 416 }
408 if (!invalidate_complete_page2(mapping, page)) 417 ret = do_launder_page(mapping, page);
418 if (ret == 0 && !invalidate_complete_page2(mapping, page))
409 ret = -EIO; 419 ret = -EIO;
410 unlock_page(page); 420 unlock_page(page);
411 } 421 }