aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-02-01 06:05:40 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-02-01 11:53:16 -0500
commit7e2ab150d1b3b286a4c864c60a549b2601777b63 (patch)
tree9d8f4f3af382a043ada81f75c324e76dff9f0043 /mm/mempolicy.c
parenta3351e525e4768c29aa5d22ef59b5b38e0361e53 (diff)
[PATCH] Direct Migration V9: upgrade MPOL_MF_MOVE and sys_migrate_pages()
Modify policy layer to support direct page migration - Add migrate_pages_to() allowing the migration of a list of pages to a a specified node or to vma with a specific allocation policy in sets of MIGRATE_CHUNK_SIZE pages - Modify do_migrate_pages() to do a staged move of pages from the source nodes to the target nodes. Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c167
1 files changed, 146 insertions, 21 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 73790188b0eb..27da6d5c77ba 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -95,6 +95,9 @@
95#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ 95#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */
96#define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */ 96#define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */
97 97
98/* The number of pages to migrate per call to migrate_pages() */
99#define MIGRATE_CHUNK_SIZE 256
100
98static kmem_cache_t *policy_cache; 101static kmem_cache_t *policy_cache;
99static kmem_cache_t *sn_cache; 102static kmem_cache_t *sn_cache;
100 103
@@ -543,24 +546,91 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
543 } 546 }
544} 547}
545 548
546static int swap_pages(struct list_head *pagelist) 549/*
550 * Migrate the list 'pagelist' of pages to a certain destination.
551 *
552 * Specify destination with either non-NULL vma or dest_node >= 0
553 * Return the number of pages not migrated or error code
554 */
555static int migrate_pages_to(struct list_head *pagelist,
556 struct vm_area_struct *vma, int dest)
547{ 557{
558 LIST_HEAD(newlist);
548 LIST_HEAD(moved); 559 LIST_HEAD(moved);
549 LIST_HEAD(failed); 560 LIST_HEAD(failed);
550 int n; 561 int err = 0;
562 int nr_pages;
563 struct page *page;
564 struct list_head *p;
551 565
552 n = migrate_pages(pagelist, NULL, &moved, &failed); 566redo:
553 putback_lru_pages(&failed); 567 nr_pages = 0;
554 putback_lru_pages(&moved); 568 list_for_each(p, pagelist) {
569 if (vma)
570 page = alloc_page_vma(GFP_HIGHUSER, vma, vma->vm_start);
571 else
572 page = alloc_pages_node(dest, GFP_HIGHUSER, 0);
555 573
556 return n; 574 if (!page) {
575 err = -ENOMEM;
576 goto out;
577 }
578 list_add(&page->lru, &newlist);
579 nr_pages++;
580 if (nr_pages > MIGRATE_CHUNK_SIZE);
581 break;
582 }
583 err = migrate_pages(pagelist, &newlist, &moved, &failed);
584
585 putback_lru_pages(&moved); /* Call release pages instead ?? */
586
587 if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist))
588 goto redo;
589out:
590 /* Return leftover allocated pages */
591 while (!list_empty(&newlist)) {
592 page = list_entry(newlist.next, struct page, lru);
593 list_del(&page->lru);
594 __free_page(page);
595 }
596 list_splice(&failed, pagelist);
597 if (err < 0)
598 return err;
599
600 /* Calculate number of leftover pages */
601 nr_pages = 0;
602 list_for_each(p, pagelist)
603 nr_pages++;
604 return nr_pages;
605}
606
607/*
608 * Migrate pages from one node to a target node.
609 * Returns error or the number of pages not migrated.
610 */
611int migrate_to_node(struct mm_struct *mm, int source, int dest, int flags)
612{
613 nodemask_t nmask;
614 LIST_HEAD(pagelist);
615 int err = 0;
616
617 nodes_clear(nmask);
618 node_set(source, nmask);
619
620 check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nmask,
621 flags | MPOL_MF_DISCONTIG_OK, &pagelist);
622
623 if (!list_empty(&pagelist)) {
624 err = migrate_pages_to(&pagelist, NULL, dest);
625 if (!list_empty(&pagelist))
626 putback_lru_pages(&pagelist);
627 }
628 return err;
557} 629}
558 630
559/* 631/*
560 * For now migrate_pages simply swaps out the pages from nodes that are in 632 * Move pages between the two nodesets so as to preserve the physical
561 * the source set but not in the target set. In the future, we would 633 * layout as much as possible.
562 * want a function that moves pages between the two nodesets in such
563 * a way as to preserve the physical layout as much as possible.
564 * 634 *
565 * Returns the number of page that could not be moved. 635 * Returns the number of page that could not be moved.
566 */ 636 */
@@ -568,22 +638,76 @@ int do_migrate_pages(struct mm_struct *mm,
568 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) 638 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
569{ 639{
570 LIST_HEAD(pagelist); 640 LIST_HEAD(pagelist);
571 int count = 0; 641 int busy = 0;
572 nodemask_t nodes; 642 int err = 0;
643 nodemask_t tmp;
573 644
574 nodes_andnot(nodes, *from_nodes, *to_nodes); 645 down_read(&mm->mmap_sem);
575 646
576 down_read(&mm->mmap_sem); 647/*
577 check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes, 648 * Find a 'source' bit set in 'tmp' whose corresponding 'dest'
578 flags | MPOL_MF_DISCONTIG_OK, &pagelist); 649 * bit in 'to' is not also set in 'tmp'. Clear the found 'source'
650 * bit in 'tmp', and return that <source, dest> pair for migration.
651 * The pair of nodemasks 'to' and 'from' define the map.
652 *
653 * If no pair of bits is found that way, fallback to picking some
654 * pair of 'source' and 'dest' bits that are not the same. If the
655 * 'source' and 'dest' bits are the same, this represents a node
656 * that will be migrating to itself, so no pages need move.
657 *
658 * If no bits are left in 'tmp', or if all remaining bits left
659 * in 'tmp' correspond to the same bit in 'to', return false
660 * (nothing left to migrate).
661 *
662 * This lets us pick a pair of nodes to migrate between, such that
663 * if possible the dest node is not already occupied by some other
664 * source node, minimizing the risk of overloading the memory on a
665 * node that would happen if we migrated incoming memory to a node
666 * before migrating outgoing memory source that same node.
667 *
668 * A single scan of tmp is sufficient. As we go, we remember the
669 * most recent <s, d> pair that moved (s != d). If we find a pair
670 * that not only moved, but what's better, moved to an empty slot
671 * (d is not set in tmp), then we break out then, with that pair.
672 * Otherwise when we finish scannng from_tmp, we at least have the
673 * most recent <s, d> pair that moved. If we get all the way through
674 * the scan of tmp without finding any node that moved, much less
675 * moved to an empty node, then there is nothing left worth migrating.
676 */
579 677
580 if (!list_empty(&pagelist)) { 678 tmp = *from_nodes;
581 count = swap_pages(&pagelist); 679 while (!nodes_empty(tmp)) {
582 putback_lru_pages(&pagelist); 680 int s,d;
681 int source = -1;
682 int dest = 0;
683
684 for_each_node_mask(s, tmp) {
685 d = node_remap(s, *from_nodes, *to_nodes);
686 if (s == d)
687 continue;
688
689 source = s; /* Node moved. Memorize */
690 dest = d;
691
692 /* dest not in remaining from nodes? */
693 if (!node_isset(dest, tmp))
694 break;
695 }
696 if (source == -1)
697 break;
698
699 node_clear(source, tmp);
700 err = migrate_to_node(mm, source, dest, flags);
701 if (err > 0)
702 busy += err;
703 if (err < 0)
704 break;
583 } 705 }
584 706
585 up_read(&mm->mmap_sem); 707 up_read(&mm->mmap_sem);
586 return count; 708 if (err < 0)
709 return err;
710 return busy;
587} 711}
588 712
589long do_mbind(unsigned long start, unsigned long len, 713long do_mbind(unsigned long start, unsigned long len,
@@ -643,8 +767,9 @@ long do_mbind(unsigned long start, unsigned long len,
643 int nr_failed = 0; 767 int nr_failed = 0;
644 768
645 err = mbind_range(vma, start, end, new); 769 err = mbind_range(vma, start, end, new);
770
646 if (!list_empty(&pagelist)) 771 if (!list_empty(&pagelist))
647 nr_failed = swap_pages(&pagelist); 772 nr_failed = migrate_pages_to(&pagelist, vma, -1);
648 773
649 if (!err && nr_failed && (flags & MPOL_MF_STRICT)) 774 if (!err && nr_failed && (flags & MPOL_MF_STRICT))
650 err = -EIO; 775 err = -EIO;