diff options
author | Joonsoo Kim <iamjoonsoo.kim@lge.com> | 2013-09-11 17:21:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-11 18:57:27 -0400 |
commit | 37a2140dc2145a6f154172286944a1861e978dfd (patch) | |
tree | 8efc5e90bd85c02adc5915b22e1c9d57ba9e1d03 /mm | |
parent | c0d934ba278935fa751057091fe4a7c02d814f68 (diff) |
mm, hugetlb: do not use a page in page cache for cow optimization
Currently, we use a page with mapped count 1 in page cache for cow
optimization. If we find this condition, we don't allocate a new page and
copy contents. Instead, we map this page directly. This may introduce a
problem that writting to private mapping overwrite hugetlb file directly.
You can find this situation with following code.
size = 20 * MB;
flag = MAP_SHARED;
p = mmap(NULL, size, PROT_READ|PROT_WRITE, flag, fd, 0);
if (p == MAP_FAILED) {
fprintf(stderr, "mmap() failed: %s\n", strerror(errno));
return -1;
}
p[0] = 's';
fprintf(stdout, "BEFORE STEAL PRIVATE WRITE: %c\n", p[0]);
munmap(p, size);
flag = MAP_PRIVATE;
p = mmap(NULL, size, PROT_READ|PROT_WRITE, flag, fd, 0);
if (p == MAP_FAILED) {
fprintf(stderr, "mmap() failed: %s\n", strerror(errno));
}
p[0] = 'c';
munmap(p, size);
flag = MAP_SHARED;
p = mmap(NULL, size, PROT_READ|PROT_WRITE, flag, fd, 0);
if (p == MAP_FAILED) {
fprintf(stderr, "mmap() failed: %s\n", strerror(errno));
return -1;
}
fprintf(stdout, "AFTER STEAL PRIVATE WRITE: %c\n", p[0]);
munmap(p, size);
We can see that "AFTER STEAL PRIVATE WRITE: c", not "AFTER STEAL PRIVATE
WRITE: s". If we turn off this optimization to a page in page cache, the
problem is disappeared.
So, I change the trigger condition of optimization. If this page is not
AnonPage, we don't do optimization. This makes this optimization turning
off for a page cache.
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Hillf Danton <dhillf@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 7 |
1 files changed, 2 insertions, 5 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a13be48b818b..da027a3307af 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2528,7 +2528,6 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2528 | { | 2528 | { |
2529 | struct hstate *h = hstate_vma(vma); | 2529 | struct hstate *h = hstate_vma(vma); |
2530 | struct page *old_page, *new_page; | 2530 | struct page *old_page, *new_page; |
2531 | int avoidcopy; | ||
2532 | int outside_reserve = 0; | 2531 | int outside_reserve = 0; |
2533 | unsigned long mmun_start; /* For mmu_notifiers */ | 2532 | unsigned long mmun_start; /* For mmu_notifiers */ |
2534 | unsigned long mmun_end; /* For mmu_notifiers */ | 2533 | unsigned long mmun_end; /* For mmu_notifiers */ |
@@ -2538,10 +2537,8 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2538 | retry_avoidcopy: | 2537 | retry_avoidcopy: |
2539 | /* If no-one else is actually using this page, avoid the copy | 2538 | /* If no-one else is actually using this page, avoid the copy |
2540 | * and just make the page writable */ | 2539 | * and just make the page writable */ |
2541 | avoidcopy = (page_mapcount(old_page) == 1); | 2540 | if (page_mapcount(old_page) == 1 && PageAnon(old_page)) { |
2542 | if (avoidcopy) { | 2541 | page_move_anon_rmap(old_page, vma, address); |
2543 | if (PageAnon(old_page)) | ||
2544 | page_move_anon_rmap(old_page, vma, address); | ||
2545 | set_huge_ptep_writable(vma, address, ptep); | 2542 | set_huge_ptep_writable(vma, address, ptep); |
2546 | return 0; | 2543 | return 0; |
2547 | } | 2544 | } |