diff options
author | Nick Piggin <nickpiggin@yahoo.com.au> | 2006-09-27 04:50:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-09-27 11:26:12 -0400 |
commit | 0fd0e6b05aa096622f151cac2f81f2e6844fb1bb (patch) | |
tree | 4fd336eaea48b320f69e970323eef5dc77c62f20 | |
parent | 5b99cd0effaf846240a15441aec459a592577eaf (diff) |
[PATCH] page invalidation cleanup
Clean up the invalidate code, and use a common function to safely remove
the page from pagecache.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | mm/truncate.c | 25 | ||||
-rw-r--r-- | mm/vmscan.c | 27 |
2 files changed, 31 insertions, 21 deletions
diff --git a/mm/truncate.c b/mm/truncate.c index c6ab55ec6883..a654928323dc 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -9,6 +9,7 @@ | |||
9 | 9 | ||
10 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
11 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
12 | #include <linux/swap.h> | ||
12 | #include <linux/module.h> | 13 | #include <linux/module.h> |
13 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
14 | #include <linux/pagevec.h> | 15 | #include <linux/pagevec.h> |
@@ -52,36 +53,26 @@ truncate_complete_page(struct address_space *mapping, struct page *page) | |||
52 | /* | 53 | /* |
53 | * This is for invalidate_inode_pages(). That function can be called at | 54 | * This is for invalidate_inode_pages(). That function can be called at |
54 | * any time, and is not supposed to throw away dirty pages. But pages can | 55 | * any time, and is not supposed to throw away dirty pages. But pages can |
55 | * be marked dirty at any time too. So we re-check the dirtiness inside | 56 | * be marked dirty at any time too, so use remove_mapping which safely |
56 | * ->tree_lock. That provides exclusion against the __set_page_dirty | 57 | * discards clean, unused pages. |
57 | * functions. | ||
58 | * | 58 | * |
59 | * Returns non-zero if the page was successfully invalidated. | 59 | * Returns non-zero if the page was successfully invalidated. |
60 | */ | 60 | */ |
61 | static int | 61 | static int |
62 | invalidate_complete_page(struct address_space *mapping, struct page *page) | 62 | invalidate_complete_page(struct address_space *mapping, struct page *page) |
63 | { | 63 | { |
64 | int ret; | ||
65 | |||
64 | if (page->mapping != mapping) | 66 | if (page->mapping != mapping) |
65 | return 0; | 67 | return 0; |
66 | 68 | ||
67 | if (PagePrivate(page) && !try_to_release_page(page, 0)) | 69 | if (PagePrivate(page) && !try_to_release_page(page, 0)) |
68 | return 0; | 70 | return 0; |
69 | 71 | ||
70 | write_lock_irq(&mapping->tree_lock); | 72 | ret = remove_mapping(mapping, page); |
71 | if (PageDirty(page)) | ||
72 | goto failed; | ||
73 | if (page_count(page) != 2) /* caller's ref + pagecache ref */ | ||
74 | goto failed; | ||
75 | |||
76 | BUG_ON(PagePrivate(page)); | ||
77 | __remove_from_page_cache(page); | ||
78 | write_unlock_irq(&mapping->tree_lock); | ||
79 | ClearPageUptodate(page); | 73 | ClearPageUptodate(page); |
80 | page_cache_release(page); /* pagecache ref */ | 74 | |
81 | return 1; | 75 | return ret; |
82 | failed: | ||
83 | write_unlock_irq(&mapping->tree_lock); | ||
84 | return 0; | ||
85 | } | 76 | } |
86 | 77 | ||
87 | /** | 78 | /** |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 87d340999ce8..eca70310adb2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -384,11 +384,30 @@ int remove_mapping(struct address_space *mapping, struct page *page) | |||
384 | BUG_ON(mapping != page_mapping(page)); | 384 | BUG_ON(mapping != page_mapping(page)); |
385 | 385 | ||
386 | write_lock_irq(&mapping->tree_lock); | 386 | write_lock_irq(&mapping->tree_lock); |
387 | |||
388 | /* | 387 | /* |
389 | * The non-racy check for busy page. It is critical to check | 388 | * The non racy check for a busy page. |
390 | * PageDirty _after_ making sure that the page is freeable and | 389 | * |
391 | * not in use by anybody. (pagecache + us == 2) | 390 | * Must be careful with the order of the tests. When someone has |
391 | * a ref to the page, it may be possible that they dirty it then | ||
392 | * drop the reference. So if PageDirty is tested before page_count | ||
393 | * here, then the following race may occur: | ||
394 | * | ||
395 | * get_user_pages(&page); | ||
396 | * [user mapping goes away] | ||
397 | * write_to(page); | ||
398 | * !PageDirty(page) [good] | ||
399 | * SetPageDirty(page); | ||
400 | * put_page(page); | ||
401 | * !page_count(page) [good, discard it] | ||
402 | * | ||
403 | * [oops, our write_to data is lost] | ||
404 | * | ||
405 | * Reversing the order of the tests ensures such a situation cannot | ||
406 | * escape unnoticed. The smp_rmb is needed to ensure the page->flags | ||
407 | * load is not satisfied before that of page->_count. | ||
408 | * | ||
409 | * Note that if SetPageDirty is always performed via set_page_dirty, | ||
410 | * and thus under tree_lock, then this ordering is not required. | ||
392 | */ | 411 | */ |
393 | if (unlikely(page_count(page) != 2)) | 412 | if (unlikely(page_count(page) != 2)) |
394 | goto cannot_free; | 413 | goto cannot_free; |