diff options
author | Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> | 2009-12-14 20:59:58 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-15 11:53:24 -0500 |
commit | 4f16fc107d9c9b8a72aa19b189a9216e90a7aaef (patch) | |
tree | 7d2cd426ab80a30d3fc7584605738ad39a8b24be /mm/mincore.c | |
parent | 536240f2bde98216feac87b4891d19a536b8884a (diff) |
mm: hugetlb: fix hugepage memory leak in mincore()
Most callers of pmd_none_or_clear_bad() check whether the target page is
in a hugepage or not, but mincore() and walk_page_range() do not check it.
So if we use mincore() on a hugepage on x86 machine, the hugepage memory
is leaked as shown below. This patch fixes it by extending mincore()
system call to support hugepages.
Details
=======
My test program (leak_mincore) works as follows:
- creat() and mmap() a file on hugetlbfs (file size is 200MB == 100 hugepages,)
- read()/write() something on it,
- call mincore() for first ten pages and printf() the values of *vec
- munmap() and unlink() the file on hugetlbfs
Without my patch
----------------
$ cat /proc/meminfo| grep "HugePage"
HugePages_Total: 1000
HugePages_Free: 1000
HugePages_Rsvd: 0
HugePages_Surp: 0
$ ./leak_mincore
vec[0] 0
vec[1] 0
vec[2] 0
vec[3] 0
vec[4] 0
vec[5] 0
vec[6] 0
vec[7] 0
vec[8] 0
vec[9] 0
$ cat /proc/meminfo |grep "HugePage"
HugePages_Total: 1000
HugePages_Free: 999
HugePages_Rsvd: 0
HugePages_Surp: 0
$ ls /hugetlbfs/
$
Return values in *vec from mincore() are set to 0, while the hugepage
should be in memory, and 1 hugepage is still accounted as used while
there is no file on hugetlbfs.
With my patch
-------------
$ cat /proc/meminfo| grep "HugePage"
HugePages_Total: 1000
HugePages_Free: 1000
HugePages_Rsvd: 0
HugePages_Surp: 0
$ ./leak_mincore
vec[0] 1
vec[1] 1
vec[2] 1
vec[3] 1
vec[4] 1
vec[5] 1
vec[6] 1
vec[7] 1
vec[8] 1
vec[9] 1
$ cat /proc/meminfo |grep "HugePage"
HugePages_Total: 1000
HugePages_Free: 1000
HugePages_Rsvd: 0
HugePages_Surp: 0
$ ls /hugetlbfs/
$
Return value in *vec set to 1 and no memory leaks.
[akpm@linux-foundation.org: cleanup]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: David Rientjes <rientjes@google.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mincore.c')
-rw-r--r-- | mm/mincore.c | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/mm/mincore.c b/mm/mincore.c index 8cb508f84ea4..7a3436ef39eb 100644 --- a/mm/mincore.c +++ b/mm/mincore.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/syscalls.h> | 14 | #include <linux/syscalls.h> |
15 | #include <linux/swap.h> | 15 | #include <linux/swap.h> |
16 | #include <linux/swapops.h> | 16 | #include <linux/swapops.h> |
17 | #include <linux/hugetlb.h> | ||
17 | 18 | ||
18 | #include <asm/uaccess.h> | 19 | #include <asm/uaccess.h> |
19 | #include <asm/pgtable.h> | 20 | #include <asm/pgtable.h> |
@@ -72,6 +73,42 @@ static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pag | |||
72 | if (!vma || addr < vma->vm_start) | 73 | if (!vma || addr < vma->vm_start) |
73 | return -ENOMEM; | 74 | return -ENOMEM; |
74 | 75 | ||
76 | #ifdef CONFIG_HUGETLB_PAGE | ||
77 | if (is_vm_hugetlb_page(vma)) { | ||
78 | struct hstate *h; | ||
79 | unsigned long nr_huge; | ||
80 | unsigned char present; | ||
81 | |||
82 | i = 0; | ||
83 | nr = min(pages, (vma->vm_end - addr) >> PAGE_SHIFT); | ||
84 | h = hstate_vma(vma); | ||
85 | nr_huge = ((addr + pages * PAGE_SIZE - 1) >> huge_page_shift(h)) | ||
86 | - (addr >> huge_page_shift(h)) + 1; | ||
87 | nr_huge = min(nr_huge, | ||
88 | (vma->vm_end - addr) >> huge_page_shift(h)); | ||
89 | while (1) { | ||
90 | /* hugepage always in RAM for now, | ||
91 | * but generally it needs to be check */ | ||
92 | ptep = huge_pte_offset(current->mm, | ||
93 | addr & huge_page_mask(h)); | ||
94 | present = !!(ptep && | ||
95 | !huge_pte_none(huge_ptep_get(ptep))); | ||
96 | while (1) { | ||
97 | vec[i++] = present; | ||
98 | addr += PAGE_SIZE; | ||
99 | /* reach buffer limit */ | ||
100 | if (i == nr) | ||
101 | return nr; | ||
102 | /* check hugepage border */ | ||
103 | if (!((addr & ~huge_page_mask(h)) | ||
104 | >> PAGE_SHIFT)) | ||
105 | break; | ||
106 | } | ||
107 | } | ||
108 | return nr; | ||
109 | } | ||
110 | #endif | ||
111 | |||
75 | /* | 112 | /* |
76 | * Calculate how many pages there are left in the last level of the | 113 | * Calculate how many pages there are left in the last level of the |
77 | * PTE array for our address. | 114 | * PTE array for our address. |