diff options
author | Nick Piggin <npiggin@suse.de> | 2008-07-25 22:45:30 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-26 15:00:06 -0400 |
commit | e286781d5f2e9c846e012a39653a166e9d31777d (patch) | |
tree | 14958fe6d8f3e0459c96c68b3034ea2433ab85ac /mm/migrate.c | |
parent | 47feff2c8eefe85099f87c43d3096855f0085ca0 (diff) |
mm: speculative page references
If we can be sure that elevating the page_count on a pagecache page will
pin it, we can speculatively run this operation, and subsequently check to
see if we hit the right page rather than relying on holding a lock or
otherwise pinning a reference to the page.
This can be done if get_page/put_page behaves consistently throughout the
whole tree (ie. if we "get" the page after it has been used for something
else, we must be able to free it with a put_page).
Actually, there is a period where the count behaves differently: when the
page is free or if it is a constituent page of a compound page. We need
an atomic_inc_not_zero operation to ensure we don't try to grab the page
in either case.
This patch introduces the core locking protocol to the pagecache (ie.
adds page_cache_get_speculative, and tweaks some update-side code to make
it work).
Thanks to Hugh for pointing out an improvement to the algorithm setting
page_count to zero when we have control of all references, in order to
hold off speculative getters.
[kamezawa.hiroyu@jp.fujitsu.com: fix migration_entry_wait()]
[hugh@veritas.com: fix add_to_page_cache]
[akpm@linux-foundation.org: repair a comment]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Jeff Garzik <jeff@garzik.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index d8c65a65c61d..3ca6392e82cc 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -285,7 +285,15 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, | |||
285 | 285 | ||
286 | page = migration_entry_to_page(entry); | 286 | page = migration_entry_to_page(entry); |
287 | 287 | ||
288 | get_page(page); | 288 | /* |
289 | * Once radix-tree replacement of page migration started, page_count | ||
290 | * *must* be zero. And, we don't want to call wait_on_page_locked() | ||
291 | * against a page without get_page(). | ||
292 | * So, we use get_page_unless_zero(), here. Even failed, page fault | ||
293 | * will occur again. | ||
294 | */ | ||
295 | if (!get_page_unless_zero(page)) | ||
296 | goto out; | ||
289 | pte_unmap_unlock(ptep, ptl); | 297 | pte_unmap_unlock(ptep, ptl); |
290 | wait_on_page_locked(page); | 298 | wait_on_page_locked(page); |
291 | put_page(page); | 299 | put_page(page); |
@@ -305,6 +313,7 @@ out: | |||
305 | static int migrate_page_move_mapping(struct address_space *mapping, | 313 | static int migrate_page_move_mapping(struct address_space *mapping, |
306 | struct page *newpage, struct page *page) | 314 | struct page *newpage, struct page *page) |
307 | { | 315 | { |
316 | int expected_count; | ||
308 | void **pslot; | 317 | void **pslot; |
309 | 318 | ||
310 | if (!mapping) { | 319 | if (!mapping) { |
@@ -319,12 +328,18 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
319 | pslot = radix_tree_lookup_slot(&mapping->page_tree, | 328 | pslot = radix_tree_lookup_slot(&mapping->page_tree, |
320 | page_index(page)); | 329 | page_index(page)); |
321 | 330 | ||
322 | if (page_count(page) != 2 + !!PagePrivate(page) || | 331 | expected_count = 2 + !!PagePrivate(page); |
332 | if (page_count(page) != expected_count || | ||
323 | (struct page *)radix_tree_deref_slot(pslot) != page) { | 333 | (struct page *)radix_tree_deref_slot(pslot) != page) { |
324 | write_unlock_irq(&mapping->tree_lock); | 334 | write_unlock_irq(&mapping->tree_lock); |
325 | return -EAGAIN; | 335 | return -EAGAIN; |
326 | } | 336 | } |
327 | 337 | ||
338 | if (!page_freeze_refs(page, expected_count)) { | ||
339 | write_unlock_irq(&mapping->tree_lock); | ||
340 | return -EAGAIN; | ||
341 | } | ||
342 | |||
328 | /* | 343 | /* |
329 | * Now we know that no one else is looking at the page. | 344 | * Now we know that no one else is looking at the page. |
330 | */ | 345 | */ |
@@ -338,6 +353,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
338 | 353 | ||
339 | radix_tree_replace_slot(pslot, newpage); | 354 | radix_tree_replace_slot(pslot, newpage); |
340 | 355 | ||
356 | page_unfreeze_refs(page, expected_count); | ||
341 | /* | 357 | /* |
342 | * Drop cache reference from old page. | 358 | * Drop cache reference from old page. |
343 | * We know this isn't the last reference. | 359 | * We know this isn't the last reference. |