aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-01-08 04:00:48 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-08 23:12:41 -0500
commit49d2e9cc4544369635cd6f4ef6d5bb0f757079a7 (patch)
treed8c76c5224b05b812f96424a18b1e9205d90feae
parent930d915252edda7042c944ed3c30194a2f9fe163 (diff)
[PATCH] Swap Migration V5: migrate_pages() function
This adds the basic page migration function with a minimal implementation that only allows the eviction of pages to swap space. Page eviction and migration may be useful to migrate pages, to suspend programs or for remapping single pages (useful for faulty pages or pages with soft ECC failures) The process is as follows: The function wanting to migrate pages must first build a list of pages to be migrated or evicted and take them off the lru lists via isolate_lru_page(). isolate_lru_page determines that a page is freeable based on the LRU bit set. Then the actual migration or swapout can happen by calling migrate_pages(). migrate_pages does its best to migrate or swapout the pages and does multiple passes over the list. Some pages may only be swappable if they are not dirty. migrate_pages may start writing out dirty pages in the initial passes over the pages. However, migrate_pages may not be able to migrate or evict all pages for a variety of reasons. The remaining pages may be returned to the LRU lists using putback_lru_pages(). Changelog V4->V5: - Use the lru caches to return pages to the LRU Changelog V3->V4: - Restructure code so that applying patches to support full migration does require minimal changes. Rename swapout_pages() to migrate_pages(). Changelog V2->V3: - Extract common code from shrink_list() and swapout_pages() Signed-off-by: Mike Kravetz <kravetz@us.ibm.com> Signed-off-by: Christoph Lameter <clameter@sgi.com> Cc: "Michael Kerrisk" <mtk-manpages@gmx.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/swap.h2
-rw-r--r--mm/vmscan.c214
2 files changed, 182 insertions, 34 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a49112536c02..893096e67bdb 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -178,6 +178,8 @@ extern int vm_swappiness;
178extern int isolate_lru_page(struct page *p); 178extern int isolate_lru_page(struct page *p);
179extern int putback_lru_pages(struct list_head *l); 179extern int putback_lru_pages(struct list_head *l);
180 180
181extern int migrate_pages(struct list_head *l, struct list_head *t);
182
181#ifdef CONFIG_MMU 183#ifdef CONFIG_MMU
182/* linux/mm/shmem.c */ 184/* linux/mm/shmem.c */
183extern int shmem_unuse(swp_entry_t entry, struct page *page); 185extern int shmem_unuse(swp_entry_t entry, struct page *page);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6c30a8c59795..a537a7f16357 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -373,6 +373,43 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
373 return PAGE_CLEAN; 373 return PAGE_CLEAN;
374} 374}
375 375
376static int remove_mapping(struct address_space *mapping, struct page *page)
377{
378 if (!mapping)
379 return 0; /* truncate got there first */
380
381 write_lock_irq(&mapping->tree_lock);
382
383 /*
384 * The non-racy check for busy page. It is critical to check
385 * PageDirty _after_ making sure that the page is freeable and
386 * not in use by anybody. (pagecache + us == 2)
387 */
388 if (unlikely(page_count(page) != 2))
389 goto cannot_free;
390 smp_rmb();
391 if (unlikely(PageDirty(page)))
392 goto cannot_free;
393
394 if (PageSwapCache(page)) {
395 swp_entry_t swap = { .val = page_private(page) };
396 __delete_from_swap_cache(page);
397 write_unlock_irq(&mapping->tree_lock);
398 swap_free(swap);
399 __put_page(page); /* The pagecache ref */
400 return 1;
401 }
402
403 __remove_from_page_cache(page);
404 write_unlock_irq(&mapping->tree_lock);
405 __put_page(page);
406 return 1;
407
408cannot_free:
409 write_unlock_irq(&mapping->tree_lock);
410 return 0;
411}
412
376/* 413/*
377 * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed 414 * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed
378 */ 415 */
@@ -504,36 +541,8 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
504 goto free_it; 541 goto free_it;
505 } 542 }
506 543
507 if (!mapping) 544 if (!remove_mapping(mapping, page))
508 goto keep_locked; /* truncate got there first */ 545 goto keep_locked;
509
510 write_lock_irq(&mapping->tree_lock);
511
512 /*
513 * The non-racy check for busy page. It is critical to check
514 * PageDirty _after_ making sure that the page is freeable and
515 * not in use by anybody. (pagecache + us == 2)
516 */
517 if (unlikely(page_count(page) != 2))
518 goto cannot_free;
519 smp_rmb();
520 if (unlikely(PageDirty(page)))
521 goto cannot_free;
522
523#ifdef CONFIG_SWAP
524 if (PageSwapCache(page)) {
525 swp_entry_t swap = { .val = page_private(page) };
526 __delete_from_swap_cache(page);
527 write_unlock_irq(&mapping->tree_lock);
528 swap_free(swap);
529 __put_page(page); /* The pagecache ref */
530 goto free_it;
531 }
532#endif /* CONFIG_SWAP */
533
534 __remove_from_page_cache(page);
535 write_unlock_irq(&mapping->tree_lock);
536 __put_page(page);
537 546
538free_it: 547free_it:
539 unlock_page(page); 548 unlock_page(page);
@@ -542,10 +551,6 @@ free_it:
542 __pagevec_release_nonlru(&freed_pvec); 551 __pagevec_release_nonlru(&freed_pvec);
543 continue; 552 continue;
544 553
545cannot_free:
546 write_unlock_irq(&mapping->tree_lock);
547 goto keep_locked;
548
549activate_locked: 554activate_locked:
550 SetPageActive(page); 555 SetPageActive(page);
551 pgactivate++; 556 pgactivate++;
@@ -564,6 +569,147 @@ keep:
564} 569}
565 570
566/* 571/*
572 * swapout a single page
573 * page is locked upon entry, unlocked on exit
574 *
575 * return codes:
576 * 0 = complete
577 * 1 = retry
578 */
579static int swap_page(struct page *page)
580{
581 struct address_space *mapping = page_mapping(page);
582
583 if (page_mapped(page) && mapping)
584 if (try_to_unmap(page) != SWAP_SUCCESS)
585 goto unlock_retry;
586
587 if (PageDirty(page)) {
588 /* Page is dirty, try to write it out here */
589 switch(pageout(page, mapping)) {
590 case PAGE_KEEP:
591 case PAGE_ACTIVATE:
592 goto unlock_retry;
593
594 case PAGE_SUCCESS:
595 goto retry;
596
597 case PAGE_CLEAN:
598 ; /* try to free the page below */
599 }
600 }
601
602 if (PagePrivate(page)) {
603 if (!try_to_release_page(page, GFP_KERNEL) ||
604 (!mapping && page_count(page) == 1))
605 goto unlock_retry;
606 }
607
608 if (remove_mapping(mapping, page)) {
609 /* Success */
610 unlock_page(page);
611 return 0;
612 }
613
614unlock_retry:
615 unlock_page(page);
616
617retry:
618 return 1;
619}
620/*
621 * migrate_pages
622 *
623 * Two lists are passed to this function. The first list
624 * contains the pages isolated from the LRU to be migrated.
625 * The second list contains new pages that the pages isolated
626 * can be moved to. If the second list is NULL then all
627 * pages are swapped out.
628 *
629 * The function returns after 10 attempts or if no pages
630 * are movable anymore because t has become empty
631 * or no retryable pages exist anymore.
632 *
633 * SIMPLIFIED VERSION: This implementation of migrate_pages
634 * is only swapping out pages and never touches the second
635 * list. The direct migration patchset
636 * extends this function to avoid the use of swap.
637 */
638int migrate_pages(struct list_head *l, struct list_head *t)
639{
640 int retry;
641 LIST_HEAD(failed);
642 int nr_failed = 0;
643 int pass = 0;
644 struct page *page;
645 struct page *page2;
646 int swapwrite = current->flags & PF_SWAPWRITE;
647
648 if (!swapwrite)
649 current->flags |= PF_SWAPWRITE;
650
651redo:
652 retry = 0;
653
654 list_for_each_entry_safe(page, page2, l, lru) {
655 cond_resched();
656
657 /*
658 * Skip locked pages during the first two passes to give the
659 * functions holding the lock time to release the page. Later we use
660 * lock_page to have a higher chance of acquiring the lock.
661 */
662 if (pass > 2)
663 lock_page(page);
664 else
665 if (TestSetPageLocked(page))
666 goto retry_later;
667
668 /*
669 * Only wait on writeback if we have already done a pass where
670 * we we may have triggered writeouts for lots of pages.
671 */
672 if (pass > 0)
673 wait_on_page_writeback(page);
674 else
675 if (PageWriteback(page)) {
676 unlock_page(page);
677 goto retry_later;
678 }
679
680#ifdef CONFIG_SWAP
681 if (PageAnon(page) && !PageSwapCache(page)) {
682 if (!add_to_swap(page)) {
683 unlock_page(page);
684 list_move(&page->lru, &failed);
685 nr_failed++;
686 continue;
687 }
688 }
689#endif /* CONFIG_SWAP */
690
691 /*
692 * Page is properly locked and writeback is complete.
693 * Try to migrate the page.
694 */
695 if (swap_page(page)) {
696retry_later:
697 retry++;
698 }
699 }
700 if (retry && pass++ < 10)
701 goto redo;
702
703 if (!swapwrite)
704 current->flags &= ~PF_SWAPWRITE;
705
706 if (!list_empty(&failed))
707 list_splice(&failed, l);
708
709 return nr_failed + retry;
710}
711
712/*
567 * zone->lru_lock is heavily contended. Some of the functions that 713 * zone->lru_lock is heavily contended. Some of the functions that
568 * shrink the lists perform better by taking out a batch of pages 714 * shrink the lists perform better by taking out a batch of pages
569 * and working on them outside the LRU lock. 715 * and working on them outside the LRU lock.