aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2008-07-25 22:45:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-26 15:00:06 -0400
commite286781d5f2e9c846e012a39653a166e9d31777d (patch)
tree14958fe6d8f3e0459c96c68b3034ea2433ab85ac /mm/vmscan.c
parent47feff2c8eefe85099f87c43d3096855f0085ca0 (diff)
mm: speculative page references
If we can be sure that elevating the page_count on a pagecache page will pin it, we can speculatively run this operation, and subsequently check to see if we hit the right page rather than relying on holding a lock or otherwise pinning a reference to the page. This can be done if get_page/put_page behaves consistently throughout the whole tree (ie. if we "get" the page after it has been used for something else, we must be able to free it with a put_page). Actually, there is a period where the count behaves differently: when the page is free or if it is a constituent page of a compound page. We need an atomic_inc_not_zero operation to ensure we don't try to grab the page in either case. This patch introduces the core locking protocol to the pagecache (ie. adds page_cache_get_speculative, and tweaks some update-side code to make it work). Thanks to Hugh for pointing out an improvement to the algorithm setting page_count to zero when we have control of all references, in order to hold off speculative getters. [kamezawa.hiroyu@jp.fujitsu.com: fix migration_entry_wait()] [hugh@veritas.com: fix add_to_page_cache] [akpm@linux-foundation.org: repair a comment] Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Jeff Garzik <jeff@garzik.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Hugh Dickins <hugh@veritas.com> Cc: "Paul E. McKenney" <paulmck@us.ibm.com> Reviewed-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Acked-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c74
1 files changed, 54 insertions, 20 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 26672c6cd3ce..0075eac1cd04 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -391,12 +391,10 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
391} 391}
392 392
393/* 393/*
394 * Attempt to detach a locked page from its ->mapping. If it is dirty or if 394 * Same as remove_mapping, but if the page is removed from the mapping, it
395 * someone else has a ref on the page, abort and return 0. If it was 395 * gets returned with a refcount of 0.
396 * successfully detached, return 1. Assumes the caller has a single ref on
397 * this page.
398 */ 396 */
399int remove_mapping(struct address_space *mapping, struct page *page) 397static int __remove_mapping(struct address_space *mapping, struct page *page)
400{ 398{
401 BUG_ON(!PageLocked(page)); 399 BUG_ON(!PageLocked(page));
402 BUG_ON(mapping != page_mapping(page)); 400 BUG_ON(mapping != page_mapping(page));
@@ -427,24 +425,24 @@ int remove_mapping(struct address_space *mapping, struct page *page)
427 * Note that if SetPageDirty is always performed via set_page_dirty, 425 * Note that if SetPageDirty is always performed via set_page_dirty,
428 * and thus under tree_lock, then this ordering is not required. 426 * and thus under tree_lock, then this ordering is not required.
429 */ 427 */
430 if (unlikely(page_count(page) != 2)) 428 if (!page_freeze_refs(page, 2))
431 goto cannot_free; 429 goto cannot_free;
432 smp_rmb(); 430 /* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
433 if (unlikely(PageDirty(page))) 431 if (unlikely(PageDirty(page))) {
432 page_unfreeze_refs(page, 2);
434 goto cannot_free; 433 goto cannot_free;
434 }
435 435
436 if (PageSwapCache(page)) { 436 if (PageSwapCache(page)) {
437 swp_entry_t swap = { .val = page_private(page) }; 437 swp_entry_t swap = { .val = page_private(page) };
438 __delete_from_swap_cache(page); 438 __delete_from_swap_cache(page);
439 write_unlock_irq(&mapping->tree_lock); 439 write_unlock_irq(&mapping->tree_lock);
440 swap_free(swap); 440 swap_free(swap);
441 __put_page(page); /* The pagecache ref */ 441 } else {
442 return 1; 442 __remove_from_page_cache(page);
443 write_unlock_irq(&mapping->tree_lock);
443 } 444 }
444 445
445 __remove_from_page_cache(page);
446 write_unlock_irq(&mapping->tree_lock);
447 __put_page(page);
448 return 1; 446 return 1;
449 447
450cannot_free: 448cannot_free:
@@ -453,6 +451,26 @@ cannot_free:
453} 451}
454 452
455/* 453/*
454 * Attempt to detach a locked page from its ->mapping. If it is dirty or if
455 * someone else has a ref on the page, abort and return 0. If it was
456 * successfully detached, return 1. Assumes the caller has a single ref on
457 * this page.
458 */
459int remove_mapping(struct address_space *mapping, struct page *page)
460{
461 if (__remove_mapping(mapping, page)) {
462 /*
463 * Unfreezing the refcount with 1 rather than 2 effectively
464 * drops the pagecache ref for us without requiring another
465 * atomic operation.
466 */
467 page_unfreeze_refs(page, 1);
468 return 1;
469 }
470 return 0;
471}
472
473/*
456 * shrink_page_list() returns the number of reclaimed pages 474 * shrink_page_list() returns the number of reclaimed pages
457 */ 475 */
458static unsigned long shrink_page_list(struct list_head *page_list, 476static unsigned long shrink_page_list(struct list_head *page_list,
@@ -598,18 +616,34 @@ static unsigned long shrink_page_list(struct list_head *page_list,
598 if (PagePrivate(page)) { 616 if (PagePrivate(page)) {
599 if (!try_to_release_page(page, sc->gfp_mask)) 617 if (!try_to_release_page(page, sc->gfp_mask))
600 goto activate_locked; 618 goto activate_locked;
601 if (!mapping && page_count(page) == 1) 619 if (!mapping && page_count(page) == 1) {
602 goto free_it; 620 unlock_page(page);
621 if (put_page_testzero(page))
622 goto free_it;
623 else {
624 /*
625 * rare race with speculative reference.
626 * the speculative reference will free
627 * this page shortly, so we may
628 * increment nr_reclaimed here (and
629 * leave it off the LRU).
630 */
631 nr_reclaimed++;
632 continue;
633 }
634 }
603 } 635 }
604 636
605 if (!mapping || !remove_mapping(mapping, page)) 637 if (!mapping || !__remove_mapping(mapping, page))
606 goto keep_locked; 638 goto keep_locked;
607 639
608free_it:
609 unlock_page(page); 640 unlock_page(page);
641free_it:
610 nr_reclaimed++; 642 nr_reclaimed++;
611 if (!pagevec_add(&freed_pvec, page)) 643 if (!pagevec_add(&freed_pvec, page)) {
612 __pagevec_release_nonlru(&freed_pvec); 644 __pagevec_free(&freed_pvec);
645 pagevec_reinit(&freed_pvec);
646 }
613 continue; 647 continue;
614 648
615activate_locked: 649activate_locked:
@@ -623,7 +657,7 @@ keep:
623 } 657 }
624 list_splice(&ret_pages, page_list); 658 list_splice(&ret_pages, page_list);
625 if (pagevec_count(&freed_pvec)) 659 if (pagevec_count(&freed_pvec))
626 __pagevec_release_nonlru(&freed_pvec); 660 __pagevec_free(&freed_pvec);
627 count_vm_events(PGACTIVATE, pgactivate); 661 count_vm_events(PGACTIVATE, pgactivate);
628 return nr_reclaimed; 662 return nr_reclaimed;
629} 663}