aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>2010-03-05 16:41:57 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-03-06 14:26:25 -0500
commit9d8cebd4bcd7c3878462fdfda34bbcdeb4df7ef4 (patch)
tree0f0a6dadb4430aef18f1491003d70d9351d7b619
parent93e4a89a8c987189b168a530a331ef6d0fcf07a7 (diff)
mm: fix mbind vma merge problem
Strangely, current mbind() doesn't merge vma with neighbor vma although it's possible. Unfortunately, many vma can reduce performance... This patch fixes it. reproduced program ---------------------------------------------------------------- #include <numaif.h> #include <numa.h> #include <sys/mman.h> #include <stdio.h> #include <unistd.h> #include <stdlib.h> #include <string.h> static unsigned long pagesize; int main(int argc, char** argv) { void* addr; int ch; int node; struct bitmask *nmask = numa_allocate_nodemask(); int err; int node_set = 0; char buf[128]; while ((ch = getopt(argc, argv, "n:")) != -1){ switch (ch){ case 'n': node = strtol(optarg, NULL, 0); numa_bitmask_setbit(nmask, node); node_set = 1; break; default: ; } } argc -= optind; argv += optind; if (!node_set) numa_bitmask_setbit(nmask, 0); pagesize = getpagesize(); addr = mmap(NULL, pagesize*3, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, 0, 0); if (addr == MAP_FAILED) perror("mmap "), exit(1); fprintf(stderr, "pid = %d \n" "addr = %p\n", getpid(), addr); /* make page populate */ memset(addr, 0, pagesize*3); /* first mbind */ err = mbind(addr+pagesize, pagesize, MPOL_BIND, nmask->maskp, nmask->size, MPOL_MF_MOVE_ALL); if (err) error("mbind1 "); /* second mbind */ err = mbind(addr, pagesize*3, MPOL_DEFAULT, NULL, 0, 0); if (err) error("mbind2 "); sprintf(buf, "cat /proc/%d/maps", getpid()); system(buf); return 0; } ---------------------------------------------------------------- result without this patch addr = 0x7fe26ef09000 [snip] 7fe26ef09000-7fe26ef0a000 rw-p 00000000 00:00 0 7fe26ef0a000-7fe26ef0b000 rw-p 00000000 00:00 0 7fe26ef0b000-7fe26ef0c000 rw-p 00000000 00:00 0 7fe26ef0c000-7fe26ef0d000 rw-p 00000000 00:00 0 => 0x7fe26ef09000-0x7fe26ef0c000 have three vmas. result with this patch addr = 0x7fc9ebc76000 [snip] 7fc9ebc76000-7fc9ebc7a000 rw-p 00000000 00:00 0 7fffbe690000-7fffbe6a5000 rw-p 00000000 00:00 0 [stack] => 0x7fc9ebc76000-0x7fc9ebc7a000 have only one vma. [minchan.kim@gmail.com: fix file offset passed to vma_merge()] Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Reviewed-by: Christoph Lameter <cl@linux-foundation.org> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Signed-off-by: Minchan Kim <minchan.kim@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/mempolicy.c52
1 files changed, 39 insertions, 13 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 290fb5bf0440..44dd9d1521ec 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -563,24 +563,50 @@ static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new)
563} 563}
564 564
565/* Step 2: apply policy to a range and do splits. */ 565/* Step 2: apply policy to a range and do splits. */
566static int mbind_range(struct vm_area_struct *vma, unsigned long start, 566static int mbind_range(struct mm_struct *mm, unsigned long start,
567 unsigned long end, struct mempolicy *new) 567 unsigned long end, struct mempolicy *new_pol)
568{ 568{
569 struct vm_area_struct *next; 569 struct vm_area_struct *next;
570 int err; 570 struct vm_area_struct *prev;
571 struct vm_area_struct *vma;
572 int err = 0;
573 pgoff_t pgoff;
574 unsigned long vmstart;
575 unsigned long vmend;
571 576
572 err = 0; 577 vma = find_vma_prev(mm, start, &prev);
573 for (; vma && vma->vm_start < end; vma = next) { 578 if (!vma || vma->vm_start > start)
579 return -EFAULT;
580
581 for (; vma && vma->vm_start < end; prev = vma, vma = next) {
574 next = vma->vm_next; 582 next = vma->vm_next;
575 if (vma->vm_start < start) 583 vmstart = max(start, vma->vm_start);
576 err = split_vma(vma->vm_mm, vma, start, 1); 584 vmend = min(end, vma->vm_end);
577 if (!err && vma->vm_end > end) 585
578 err = split_vma(vma->vm_mm, vma, end, 0); 586 pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
579 if (!err) 587 prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
580 err = policy_vma(vma, new); 588 vma->anon_vma, vma->vm_file, pgoff, new_pol);
589 if (prev) {
590 vma = prev;
591 next = vma->vm_next;
592 continue;
593 }
594 if (vma->vm_start != vmstart) {
595 err = split_vma(vma->vm_mm, vma, vmstart, 1);
596 if (err)
597 goto out;
598 }
599 if (vma->vm_end != vmend) {
600 err = split_vma(vma->vm_mm, vma, vmend, 0);
601 if (err)
602 goto out;
603 }
604 err = policy_vma(vma, new_pol);
581 if (err) 605 if (err)
582 break; 606 goto out;
583 } 607 }
608
609 out:
584 return err; 610 return err;
585} 611}
586 612
@@ -1047,7 +1073,7 @@ static long do_mbind(unsigned long start, unsigned long len,
1047 if (!IS_ERR(vma)) { 1073 if (!IS_ERR(vma)) {
1048 int nr_failed = 0; 1074 int nr_failed = 0;
1049 1075
1050 err = mbind_range(vma, start, end, new); 1076 err = mbind_range(mm, start, end, new);
1051 1077
1052 if (!list_empty(&pagelist)) 1078 if (!list_empty(&pagelist))
1053 nr_failed = migrate_pages(&pagelist, new_vma_page, 1079 nr_failed = migrate_pages(&pagelist, new_vma_page,