aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@engr.sgi.com>2006-02-24 16:04:12 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-02-24 17:31:38 -0500
commit1e275d406bf6b88e4de6925cf594b64bb2ec49bc (patch)
tree6fe143317fbc442407244a3c55ecf475072a28f3
parentf68a106f224c21148c5264a429fac149dc7ad0ac (diff)
[PATCH] page migration: Fix MPOL_INTERLEAVE behavior for migration via mbind()
migrate_pages_to() allocates a list of new pages on the intended target node or with the intended policy and then uses the list of new pages as targets for the migration of a list of pages out of place. When the pages are allocated it is not clear which of the out of place pages will be moved to the new pages. So we cannot specify an address as needed by alloc_page_vma(). This causes problem for MPOL_INTERLEAVE which will currently allocate the pages on the first node of the set. If mbind is used with vma that has the policy of MPOL_INTERLEAVE then the interleaving of pages may be destroyed. This patch fixes that by generating a fake address for each alloc_page_vma which will result is a distribution of pages as prescribed by MPOL_INTERLEAVE. Lee also noted that the sequence of nodes for the new pages seems to be inverted. So we also invert the way the lists of pages for migration are build. Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Looks-ok-to: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--mm/mempolicy.c22
1 files changed, 18 insertions, 4 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 880831bd3003..67af4cea1e23 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -552,7 +552,7 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
552 */ 552 */
553 if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) { 553 if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) {
554 if (isolate_lru_page(page)) 554 if (isolate_lru_page(page))
555 list_add(&page->lru, pagelist); 555 list_add_tail(&page->lru, pagelist);
556 } 556 }
557} 557}
558 558
@@ -569,6 +569,7 @@ static int migrate_pages_to(struct list_head *pagelist,
569 LIST_HEAD(moved); 569 LIST_HEAD(moved);
570 LIST_HEAD(failed); 570 LIST_HEAD(failed);
571 int err = 0; 571 int err = 0;
572 unsigned long offset = 0;
572 int nr_pages; 573 int nr_pages;
573 struct page *page; 574 struct page *page;
574 struct list_head *p; 575 struct list_head *p;
@@ -576,8 +577,21 @@ static int migrate_pages_to(struct list_head *pagelist,
576redo: 577redo:
577 nr_pages = 0; 578 nr_pages = 0;
578 list_for_each(p, pagelist) { 579 list_for_each(p, pagelist) {
579 if (vma) 580 if (vma) {
580 page = alloc_page_vma(GFP_HIGHUSER, vma, vma->vm_start); 581 /*
582 * The address passed to alloc_page_vma is used to
583 * generate the proper interleave behavior. We fake
584 * the address here by an increasing offset in order
585 * to get the proper distribution of pages.
586 *
587 * No decision has been made as to which page
588 * a certain old page is moved to so we cannot
589 * specify the correct address.
590 */
591 page = alloc_page_vma(GFP_HIGHUSER, vma,
592 offset + vma->vm_start);
593 offset += PAGE_SIZE;
594 }
581 else 595 else
582 page = alloc_pages_node(dest, GFP_HIGHUSER, 0); 596 page = alloc_pages_node(dest, GFP_HIGHUSER, 0);
583 597
@@ -585,7 +599,7 @@ redo:
585 err = -ENOMEM; 599 err = -ENOMEM;
586 goto out; 600 goto out;
587 } 601 }
588 list_add(&page->lru, &newlist); 602 list_add_tail(&page->lru, &newlist);
589 nr_pages++; 603 nr_pages++;
590 if (nr_pages > MIGRATE_CHUNK_SIZE) 604 if (nr_pages > MIGRATE_CHUNK_SIZE)
591 break; 605 break;