diff options
author | KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> | 2010-03-05 16:41:57 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-03-06 14:26:25 -0500 |
commit | 9d8cebd4bcd7c3878462fdfda34bbcdeb4df7ef4 (patch) | |
tree | 0f0a6dadb4430aef18f1491003d70d9351d7b619 | |
parent | 93e4a89a8c987189b168a530a331ef6d0fcf07a7 (diff) |
mm: fix mbind vma merge problem
Strangely, current mbind() doesn't merge vma with neighbor vma although it's possible.
Unfortunately, many vma can reduce performance...
This patch fixes it.
reproduced program
----------------------------------------------------------------
#include <numaif.h>
#include <numa.h>
#include <sys/mman.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
static unsigned long pagesize;
int main(int argc, char** argv)
{
void* addr;
int ch;
int node;
struct bitmask *nmask = numa_allocate_nodemask();
int err;
int node_set = 0;
char buf[128];
while ((ch = getopt(argc, argv, "n:")) != -1){
switch (ch){
case 'n':
node = strtol(optarg, NULL, 0);
numa_bitmask_setbit(nmask, node);
node_set = 1;
break;
default:
;
}
}
argc -= optind;
argv += optind;
if (!node_set)
numa_bitmask_setbit(nmask, 0);
pagesize = getpagesize();
addr = mmap(NULL, pagesize*3, PROT_READ|PROT_WRITE,
MAP_ANON|MAP_PRIVATE, 0, 0);
if (addr == MAP_FAILED)
perror("mmap "), exit(1);
fprintf(stderr, "pid = %d \n" "addr = %p\n", getpid(), addr);
/* make page populate */
memset(addr, 0, pagesize*3);
/* first mbind */
err = mbind(addr+pagesize, pagesize, MPOL_BIND, nmask->maskp,
nmask->size, MPOL_MF_MOVE_ALL);
if (err)
error("mbind1 ");
/* second mbind */
err = mbind(addr, pagesize*3, MPOL_DEFAULT, NULL, 0, 0);
if (err)
error("mbind2 ");
sprintf(buf, "cat /proc/%d/maps", getpid());
system(buf);
return 0;
}
----------------------------------------------------------------
result without this patch
addr = 0x7fe26ef09000
[snip]
7fe26ef09000-7fe26ef0a000 rw-p 00000000 00:00 0
7fe26ef0a000-7fe26ef0b000 rw-p 00000000 00:00 0
7fe26ef0b000-7fe26ef0c000 rw-p 00000000 00:00 0
7fe26ef0c000-7fe26ef0d000 rw-p 00000000 00:00 0
=> 0x7fe26ef09000-0x7fe26ef0c000 have three vmas.
result with this patch
addr = 0x7fc9ebc76000
[snip]
7fc9ebc76000-7fc9ebc7a000 rw-p 00000000 00:00 0
7fffbe690000-7fffbe6a5000 rw-p 00000000 00:00 0 [stack]
=> 0x7fc9ebc76000-0x7fc9ebc7a000 have only one vma.
[minchan.kim@gmail.com: fix file offset passed to vma_merge()]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/mempolicy.c | 52 |
1 files changed, 39 insertions, 13 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 290fb5bf0440..44dd9d1521ec 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -563,24 +563,50 @@ static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new) | |||
563 | } | 563 | } |
564 | 564 | ||
565 | /* Step 2: apply policy to a range and do splits. */ | 565 | /* Step 2: apply policy to a range and do splits. */ |
566 | static int mbind_range(struct vm_area_struct *vma, unsigned long start, | 566 | static int mbind_range(struct mm_struct *mm, unsigned long start, |
567 | unsigned long end, struct mempolicy *new) | 567 | unsigned long end, struct mempolicy *new_pol) |
568 | { | 568 | { |
569 | struct vm_area_struct *next; | 569 | struct vm_area_struct *next; |
570 | int err; | 570 | struct vm_area_struct *prev; |
571 | struct vm_area_struct *vma; | ||
572 | int err = 0; | ||
573 | pgoff_t pgoff; | ||
574 | unsigned long vmstart; | ||
575 | unsigned long vmend; | ||
571 | 576 | ||
572 | err = 0; | 577 | vma = find_vma_prev(mm, start, &prev); |
573 | for (; vma && vma->vm_start < end; vma = next) { | 578 | if (!vma || vma->vm_start > start) |
579 | return -EFAULT; | ||
580 | |||
581 | for (; vma && vma->vm_start < end; prev = vma, vma = next) { | ||
574 | next = vma->vm_next; | 582 | next = vma->vm_next; |
575 | if (vma->vm_start < start) | 583 | vmstart = max(start, vma->vm_start); |
576 | err = split_vma(vma->vm_mm, vma, start, 1); | 584 | vmend = min(end, vma->vm_end); |
577 | if (!err && vma->vm_end > end) | 585 | |
578 | err = split_vma(vma->vm_mm, vma, end, 0); | 586 | pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); |
579 | if (!err) | 587 | prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, |
580 | err = policy_vma(vma, new); | 588 | vma->anon_vma, vma->vm_file, pgoff, new_pol); |
589 | if (prev) { | ||
590 | vma = prev; | ||
591 | next = vma->vm_next; | ||
592 | continue; | ||
593 | } | ||
594 | if (vma->vm_start != vmstart) { | ||
595 | err = split_vma(vma->vm_mm, vma, vmstart, 1); | ||
596 | if (err) | ||
597 | goto out; | ||
598 | } | ||
599 | if (vma->vm_end != vmend) { | ||
600 | err = split_vma(vma->vm_mm, vma, vmend, 0); | ||
601 | if (err) | ||
602 | goto out; | ||
603 | } | ||
604 | err = policy_vma(vma, new_pol); | ||
581 | if (err) | 605 | if (err) |
582 | break; | 606 | goto out; |
583 | } | 607 | } |
608 | |||
609 | out: | ||
584 | return err; | 610 | return err; |
585 | } | 611 | } |
586 | 612 | ||
@@ -1047,7 +1073,7 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1047 | if (!IS_ERR(vma)) { | 1073 | if (!IS_ERR(vma)) { |
1048 | int nr_failed = 0; | 1074 | int nr_failed = 0; |
1049 | 1075 | ||
1050 | err = mbind_range(vma, start, end, new); | 1076 | err = mbind_range(mm, start, end, new); |
1051 | 1077 | ||
1052 | if (!list_empty(&pagelist)) | 1078 | if (!list_empty(&pagelist)) |
1053 | nr_failed = migrate_pages(&pagelist, new_vma_page, | 1079 | nr_failed = migrate_pages(&pagelist, new_vma_page, |