diff options
author | Nick Piggin <npiggin@suse.de> | 2008-07-25 22:45:30 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-26 15:00:06 -0400 |
commit | e286781d5f2e9c846e012a39653a166e9d31777d (patch) | |
tree | 14958fe6d8f3e0459c96c68b3034ea2433ab85ac /mm/vmscan.c | |
parent | 47feff2c8eefe85099f87c43d3096855f0085ca0 (diff) |
mm: speculative page references
If we can be sure that elevating the page_count on a pagecache page will
pin it, we can speculatively run this operation, and subsequently check to
see if we hit the right page rather than relying on holding a lock or
otherwise pinning a reference to the page.
This can be done if get_page/put_page behaves consistently throughout the
whole tree (ie. if we "get" the page after it has been used for something
else, we must be able to free it with a put_page).
Actually, there is a period where the count behaves differently: when the
page is free or if it is a constituent page of a compound page. We need
an atomic_inc_not_zero operation to ensure we don't try to grab the page
in either case.
This patch introduces the core locking protocol to the pagecache (ie.
adds page_cache_get_speculative, and tweaks some update-side code to make
it work).
Thanks to Hugh for pointing out an improvement to the algorithm setting
page_count to zero when we have control of all references, in order to
hold off speculative getters.
[kamezawa.hiroyu@jp.fujitsu.com: fix migration_entry_wait()]
[hugh@veritas.com: fix add_to_page_cache]
[akpm@linux-foundation.org: repair a comment]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Jeff Garzik <jeff@garzik.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 74 |
1 files changed, 54 insertions, 20 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 26672c6cd3ce..0075eac1cd04 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -391,12 +391,10 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
391 | } | 391 | } |
392 | 392 | ||
393 | /* | 393 | /* |
394 | * Attempt to detach a locked page from its ->mapping. If it is dirty or if | 394 | * Same as remove_mapping, but if the page is removed from the mapping, it |
395 | * someone else has a ref on the page, abort and return 0. If it was | 395 | * gets returned with a refcount of 0. |
396 | * successfully detached, return 1. Assumes the caller has a single ref on | ||
397 | * this page. | ||
398 | */ | 396 | */ |
399 | int remove_mapping(struct address_space *mapping, struct page *page) | 397 | static int __remove_mapping(struct address_space *mapping, struct page *page) |
400 | { | 398 | { |
401 | BUG_ON(!PageLocked(page)); | 399 | BUG_ON(!PageLocked(page)); |
402 | BUG_ON(mapping != page_mapping(page)); | 400 | BUG_ON(mapping != page_mapping(page)); |
@@ -427,24 +425,24 @@ int remove_mapping(struct address_space *mapping, struct page *page) | |||
427 | * Note that if SetPageDirty is always performed via set_page_dirty, | 425 | * Note that if SetPageDirty is always performed via set_page_dirty, |
428 | * and thus under tree_lock, then this ordering is not required. | 426 | * and thus under tree_lock, then this ordering is not required. |
429 | */ | 427 | */ |
430 | if (unlikely(page_count(page) != 2)) | 428 | if (!page_freeze_refs(page, 2)) |
431 | goto cannot_free; | 429 | goto cannot_free; |
432 | smp_rmb(); | 430 | /* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */ |
433 | if (unlikely(PageDirty(page))) | 431 | if (unlikely(PageDirty(page))) { |
432 | page_unfreeze_refs(page, 2); | ||
434 | goto cannot_free; | 433 | goto cannot_free; |
434 | } | ||
435 | 435 | ||
436 | if (PageSwapCache(page)) { | 436 | if (PageSwapCache(page)) { |
437 | swp_entry_t swap = { .val = page_private(page) }; | 437 | swp_entry_t swap = { .val = page_private(page) }; |
438 | __delete_from_swap_cache(page); | 438 | __delete_from_swap_cache(page); |
439 | write_unlock_irq(&mapping->tree_lock); | 439 | write_unlock_irq(&mapping->tree_lock); |
440 | swap_free(swap); | 440 | swap_free(swap); |
441 | __put_page(page); /* The pagecache ref */ | 441 | } else { |
442 | return 1; | 442 | __remove_from_page_cache(page); |
443 | write_unlock_irq(&mapping->tree_lock); | ||
443 | } | 444 | } |
444 | 445 | ||
445 | __remove_from_page_cache(page); | ||
446 | write_unlock_irq(&mapping->tree_lock); | ||
447 | __put_page(page); | ||
448 | return 1; | 446 | return 1; |
449 | 447 | ||
450 | cannot_free: | 448 | cannot_free: |
@@ -453,6 +451,26 @@ cannot_free: | |||
453 | } | 451 | } |
454 | 452 | ||
455 | /* | 453 | /* |
454 | * Attempt to detach a locked page from its ->mapping. If it is dirty or if | ||
455 | * someone else has a ref on the page, abort and return 0. If it was | ||
456 | * successfully detached, return 1. Assumes the caller has a single ref on | ||
457 | * this page. | ||
458 | */ | ||
459 | int remove_mapping(struct address_space *mapping, struct page *page) | ||
460 | { | ||
461 | if (__remove_mapping(mapping, page)) { | ||
462 | /* | ||
463 | * Unfreezing the refcount with 1 rather than 2 effectively | ||
464 | * drops the pagecache ref for us without requiring another | ||
465 | * atomic operation. | ||
466 | */ | ||
467 | page_unfreeze_refs(page, 1); | ||
468 | return 1; | ||
469 | } | ||
470 | return 0; | ||
471 | } | ||
472 | |||
473 | /* | ||
456 | * shrink_page_list() returns the number of reclaimed pages | 474 | * shrink_page_list() returns the number of reclaimed pages |
457 | */ | 475 | */ |
458 | static unsigned long shrink_page_list(struct list_head *page_list, | 476 | static unsigned long shrink_page_list(struct list_head *page_list, |
@@ -598,18 +616,34 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
598 | if (PagePrivate(page)) { | 616 | if (PagePrivate(page)) { |
599 | if (!try_to_release_page(page, sc->gfp_mask)) | 617 | if (!try_to_release_page(page, sc->gfp_mask)) |
600 | goto activate_locked; | 618 | goto activate_locked; |
601 | if (!mapping && page_count(page) == 1) | 619 | if (!mapping && page_count(page) == 1) { |
602 | goto free_it; | 620 | unlock_page(page); |
621 | if (put_page_testzero(page)) | ||
622 | goto free_it; | ||
623 | else { | ||
624 | /* | ||
625 | * rare race with speculative reference. | ||
626 | * the speculative reference will free | ||
627 | * this page shortly, so we may | ||
628 | * increment nr_reclaimed here (and | ||
629 | * leave it off the LRU). | ||
630 | */ | ||
631 | nr_reclaimed++; | ||
632 | continue; | ||
633 | } | ||
634 | } | ||
603 | } | 635 | } |
604 | 636 | ||
605 | if (!mapping || !remove_mapping(mapping, page)) | 637 | if (!mapping || !__remove_mapping(mapping, page)) |
606 | goto keep_locked; | 638 | goto keep_locked; |
607 | 639 | ||
608 | free_it: | ||
609 | unlock_page(page); | 640 | unlock_page(page); |
641 | free_it: | ||
610 | nr_reclaimed++; | 642 | nr_reclaimed++; |
611 | if (!pagevec_add(&freed_pvec, page)) | 643 | if (!pagevec_add(&freed_pvec, page)) { |
612 | __pagevec_release_nonlru(&freed_pvec); | 644 | __pagevec_free(&freed_pvec); |
645 | pagevec_reinit(&freed_pvec); | ||
646 | } | ||
613 | continue; | 647 | continue; |
614 | 648 | ||
615 | activate_locked: | 649 | activate_locked: |
@@ -623,7 +657,7 @@ keep: | |||
623 | } | 657 | } |
624 | list_splice(&ret_pages, page_list); | 658 | list_splice(&ret_pages, page_list); |
625 | if (pagevec_count(&freed_pvec)) | 659 | if (pagevec_count(&freed_pvec)) |
626 | __pagevec_release_nonlru(&freed_pvec); | 660 | __pagevec_free(&freed_pvec); |
627 | count_vm_events(PGACTIVATE, pgactivate); | 661 | count_vm_events(PGACTIVATE, pgactivate); |
628 | return nr_reclaimed; | 662 | return nr_reclaimed; |
629 | } | 663 | } |