diff options
author | Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> | 2009-12-14 20:59:59 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-15 11:53:24 -0500 |
commit | d33b9f45bd24a6391bc05e2b5a13c1b5787ca9c2 (patch) | |
tree | c79c7a452a18f513efbbc807440812432778332f /mm/pagewalk.c | |
parent | 4f16fc107d9c9b8a72aa19b189a9216e90a7aaef (diff) |
mm: hugetlb: fix hugepage memory leak in walk_page_range()
Most callers of pmd_none_or_clear_bad() check whether the target page is
in a hugepage or not, but walk_page_range() do not check it. So if we
read /proc/pid/pagemap for the hugepage on x86 machine, the hugepage
memory is leaked as shown below. This patch fixes it.
Details
=======
My test program (leak_pagemap) works as follows:
- creat() and mmap() a file on hugetlbfs (file size is 200MB == 100 hugepages,)
- read()/write() something on it,
- call page-types with option -p (walk around the page tables),
- munmap() and unlink() the file on hugetlbfs
Without my patches
------------------
$ cat /proc/meminfo |grep "HugePage"
HugePages_Total: 1000
HugePages_Free: 1000
HugePages_Rsvd: 0
HugePages_Surp: 0
$ ./leak_pagemap
[snip output]
$ cat /proc/meminfo |grep "HugePage"
HugePages_Total: 1000
HugePages_Free: 900
HugePages_Rsvd: 0
HugePages_Surp: 0
$ ls /hugetlbfs/
$
100 hugepages are accounted as used while there is no file on hugetlbfs.
With my patches
---------------
$ cat /proc/meminfo |grep "HugePage"
HugePages_Total: 1000
HugePages_Free: 1000
HugePages_Rsvd: 0
HugePages_Surp: 0
$ ./leak_pagemap
[snip output]
$ cat /proc/meminfo |grep "HugePage"
HugePages_Total: 1000
HugePages_Free: 1000
HugePages_Rsvd: 0
HugePages_Surp: 0
$ ls /hugetlbfs
$
No memory leaks.
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: David Rientjes <rientjes@google.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/pagewalk.c')
-rw-r--r-- | mm/pagewalk.c | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index d5878bed7841..a286915e23ef 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #include <linux/mm.h> | 1 | #include <linux/mm.h> |
2 | #include <linux/highmem.h> | 2 | #include <linux/highmem.h> |
3 | #include <linux/sched.h> | 3 | #include <linux/sched.h> |
4 | #include <linux/hugetlb.h> | ||
4 | 5 | ||
5 | static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 6 | static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
6 | struct mm_walk *walk) | 7 | struct mm_walk *walk) |
@@ -107,6 +108,7 @@ int walk_page_range(unsigned long addr, unsigned long end, | |||
107 | pgd_t *pgd; | 108 | pgd_t *pgd; |
108 | unsigned long next; | 109 | unsigned long next; |
109 | int err = 0; | 110 | int err = 0; |
111 | struct vm_area_struct *vma; | ||
110 | 112 | ||
111 | if (addr >= end) | 113 | if (addr >= end) |
112 | return err; | 114 | return err; |
@@ -117,11 +119,22 @@ int walk_page_range(unsigned long addr, unsigned long end, | |||
117 | pgd = pgd_offset(walk->mm, addr); | 119 | pgd = pgd_offset(walk->mm, addr); |
118 | do { | 120 | do { |
119 | next = pgd_addr_end(addr, end); | 121 | next = pgd_addr_end(addr, end); |
122 | |||
123 | /* skip hugetlb vma to avoid hugepage PMD being cleared | ||
124 | * in pmd_none_or_clear_bad(). */ | ||
125 | vma = find_vma(walk->mm, addr); | ||
126 | if (vma && is_vm_hugetlb_page(vma)) { | ||
127 | if (vma->vm_end < next) | ||
128 | next = vma->vm_end; | ||
129 | continue; | ||
130 | } | ||
131 | |||
120 | if (pgd_none_or_clear_bad(pgd)) { | 132 | if (pgd_none_or_clear_bad(pgd)) { |
121 | if (walk->pte_hole) | 133 | if (walk->pte_hole) |
122 | err = walk->pte_hole(addr, next, walk); | 134 | err = walk->pte_hole(addr, next, walk); |
123 | if (err) | 135 | if (err) |
124 | break; | 136 | break; |
137 | pgd++; | ||
125 | continue; | 138 | continue; |
126 | } | 139 | } |
127 | if (walk->pgd_entry) | 140 | if (walk->pgd_entry) |
@@ -131,7 +144,8 @@ int walk_page_range(unsigned long addr, unsigned long end, | |||
131 | err = walk_pud_range(pgd, addr, next, walk); | 144 | err = walk_pud_range(pgd, addr, next, walk); |
132 | if (err) | 145 | if (err) |
133 | break; | 146 | break; |
134 | } while (pgd++, addr = next, addr != end); | 147 | pgd++; |
148 | } while (addr = next, addr != end); | ||
135 | 149 | ||
136 | return err; | 150 | return err; |
137 | } | 151 | } |