diff options
author | Lee Schermerhorn <Lee.Schermerhorn@hp.com> | 2007-11-14 19:59:10 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-11-14 21:45:38 -0500 |
commit | 3ad33b2436b545cbe8b28e53f3710432cad457ab (patch) | |
tree | 581808f90a08838ee27d76cc24812c7093c216a9 | |
parent | e1a1c997afe907e6ec4799e4be0f38cffd8b418c (diff) |
Migration: find correct vma in new_vma_page()
We hit the BUG_ON() in mm/rmap.c:vma_address() when trying to migrate via
mbind(MPOL_MF_MOVE) a non-anon region that spans multiple vmas. For
anon-regions, we just fail to migrate any pages beyond the 1st vma in the
range.
This occurs because do_mbind() collects a list of pages to migrate by
calling check_range(). check_range() walks the task's mm, spanning vmas as
necessary, to collect the migratable pages into a list. Then, do_mbind()
calls migrate_pages() passing the list of pages, a function to allocate new
pages based on vma policy [new_vma_page()], and a pointer to the first vma
of the range.
For each page in the list, new_vma_page() calls page_address_in_vma()
passing the page and the vma [first in range] to obtain the address to get
for alloc_page_vma(). The page address is needed to get interleaving
policy correct. If the pages in the list come from multiple vmas,
eventually, new_page_address() will pass that page to page_address_in_vma()
with the incorrect vma. For !PageAnon pages, this will result in a bug
check in rmap.c:vma_address(). For anon pages, vma_address() will just
return EFAULT and fail the migration.
This patch modifies new_vma_page() to check the return value from
page_address_in_vma(). If the return value is EFAULT, new_vma_page()
searchs forward via vm_next for the vma that maps the page--i.e., that does
not return EFAULT. This assumes that the pages in the list handed to
migrate_pages() is in address order. This is currently case. The patch
documents this assumption in a new comment block for new_vma_page().
If new_vma_page() cannot locate the vma mapping the page in a forward
search in the mm, it will pass a NULL vma to alloc_page_vma(). This will
result in the allocation using the task policy, if any, else system default
policy. This situation is unlikely, but the patch documents this behavior
with a comment.
Note, this patch results in restarting from the first vma in a multi-vma
range each time new_vma_page() is called. If this is not acceptable, we
can make the vma argument a pointer, both in new_vma_page() and it's caller
unmap_and_move() so that the value held by the loop in migrate_pages()
always passes down the last vma in which a page was found. This will
require changes to all new_page_t functions passed to migrate_pages(). Is
this necessary?
For this patch to work, we can't bug check in vma_address() for pages
outside the argument vma. This patch removes the BUG_ON(). All other
callers [besides new_vma_page()] already check the return status.
Tested on x86_64, 4 node NUMA platform.
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/mempolicy.c | 21 | ||||
-rw-r--r-- | mm/rmap.c | 7 |
2 files changed, 23 insertions, 5 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c1592a94582f..83c69f8a64c2 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -722,12 +722,29 @@ out: | |||
722 | 722 | ||
723 | } | 723 | } |
724 | 724 | ||
725 | /* | ||
726 | * Allocate a new page for page migration based on vma policy. | ||
727 | * Start assuming that page is mapped by vma pointed to by @private. | ||
728 | * Search forward from there, if not. N.B., this assumes that the | ||
729 | * list of pages handed to migrate_pages()--which is how we get here-- | ||
730 | * is in virtual address order. | ||
731 | */ | ||
725 | static struct page *new_vma_page(struct page *page, unsigned long private, int **x) | 732 | static struct page *new_vma_page(struct page *page, unsigned long private, int **x) |
726 | { | 733 | { |
727 | struct vm_area_struct *vma = (struct vm_area_struct *)private; | 734 | struct vm_area_struct *vma = (struct vm_area_struct *)private; |
735 | unsigned long uninitialized_var(address); | ||
728 | 736 | ||
729 | return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, | 737 | while (vma) { |
730 | page_address_in_vma(page, vma)); | 738 | address = page_address_in_vma(page, vma); |
739 | if (address != -EFAULT) | ||
740 | break; | ||
741 | vma = vma->vm_next; | ||
742 | } | ||
743 | |||
744 | /* | ||
745 | * if !vma, alloc_page_vma() will use task or system default policy | ||
746 | */ | ||
747 | return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); | ||
731 | } | 748 | } |
732 | #else | 749 | #else |
733 | 750 | ||
@@ -183,7 +183,9 @@ static void page_unlock_anon_vma(struct anon_vma *anon_vma) | |||
183 | } | 183 | } |
184 | 184 | ||
185 | /* | 185 | /* |
186 | * At what user virtual address is page expected in vma? | 186 | * At what user virtual address is page expected in @vma? |
187 | * Returns virtual address or -EFAULT if page's index/offset is not | ||
188 | * within the range mapped the @vma. | ||
187 | */ | 189 | */ |
188 | static inline unsigned long | 190 | static inline unsigned long |
189 | vma_address(struct page *page, struct vm_area_struct *vma) | 191 | vma_address(struct page *page, struct vm_area_struct *vma) |
@@ -193,8 +195,7 @@ vma_address(struct page *page, struct vm_area_struct *vma) | |||
193 | 195 | ||
194 | address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); | 196 | address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); |
195 | if (unlikely(address < vma->vm_start || address >= vma->vm_end)) { | 197 | if (unlikely(address < vma->vm_start || address >= vma->vm_end)) { |
196 | /* page should be within any vma from prio_tree_next */ | 198 | /* page should be within @vma mapping range */ |
197 | BUG_ON(!PageAnon(page)); | ||
198 | return -EFAULT; | 199 | return -EFAULT; |
199 | } | 200 | } |
200 | return address; | 201 | return address; |