aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2009-12-14 20:59:59 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:24 -0500
commitd33b9f45bd24a6391bc05e2b5a13c1b5787ca9c2 (patch)
treec79c7a452a18f513efbbc807440812432778332f /mm
parent4f16fc107d9c9b8a72aa19b189a9216e90a7aaef (diff)
mm: hugetlb: fix hugepage memory leak in walk_page_range()
Most callers of pmd_none_or_clear_bad() check whether the target page is in a hugepage or not, but walk_page_range() do not check it. So if we read /proc/pid/pagemap for the hugepage on x86 machine, the hugepage memory is leaked as shown below. This patch fixes it. Details ======= My test program (leak_pagemap) works as follows: - creat() and mmap() a file on hugetlbfs (file size is 200MB == 100 hugepages,) - read()/write() something on it, - call page-types with option -p (walk around the page tables), - munmap() and unlink() the file on hugetlbfs Without my patches ------------------ $ cat /proc/meminfo |grep "HugePage" HugePages_Total: 1000 HugePages_Free: 1000 HugePages_Rsvd: 0 HugePages_Surp: 0 $ ./leak_pagemap [snip output] $ cat /proc/meminfo |grep "HugePage" HugePages_Total: 1000 HugePages_Free: 900 HugePages_Rsvd: 0 HugePages_Surp: 0 $ ls /hugetlbfs/ $ 100 hugepages are accounted as used while there is no file on hugetlbfs. With my patches --------------- $ cat /proc/meminfo |grep "HugePage" HugePages_Total: 1000 HugePages_Free: 1000 HugePages_Rsvd: 0 HugePages_Surp: 0 $ ./leak_pagemap [snip output] $ cat /proc/meminfo |grep "HugePage" HugePages_Total: 1000 HugePages_Free: 1000 HugePages_Rsvd: 0 HugePages_Surp: 0 $ ls /hugetlbfs $ No memory leaks. Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Andy Whitcroft <apw@canonical.com> Cc: David Rientjes <rientjes@google.com> Cc: <stable@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/pagewalk.c16
1 files changed, 15 insertions, 1 deletions
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index d5878bed7841..a286915e23ef 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -1,6 +1,7 @@
1#include <linux/mm.h> 1#include <linux/mm.h>
2#include <linux/highmem.h> 2#include <linux/highmem.h>
3#include <linux/sched.h> 3#include <linux/sched.h>
4#include <linux/hugetlb.h>
4 5
5static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 6static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
6 struct mm_walk *walk) 7 struct mm_walk *walk)
@@ -107,6 +108,7 @@ int walk_page_range(unsigned long addr, unsigned long end,
107 pgd_t *pgd; 108 pgd_t *pgd;
108 unsigned long next; 109 unsigned long next;
109 int err = 0; 110 int err = 0;
111 struct vm_area_struct *vma;
110 112
111 if (addr >= end) 113 if (addr >= end)
112 return err; 114 return err;
@@ -117,11 +119,22 @@ int walk_page_range(unsigned long addr, unsigned long end,
117 pgd = pgd_offset(walk->mm, addr); 119 pgd = pgd_offset(walk->mm, addr);
118 do { 120 do {
119 next = pgd_addr_end(addr, end); 121 next = pgd_addr_end(addr, end);
122
123 /* skip hugetlb vma to avoid hugepage PMD being cleared
124 * in pmd_none_or_clear_bad(). */
125 vma = find_vma(walk->mm, addr);
126 if (vma && is_vm_hugetlb_page(vma)) {
127 if (vma->vm_end < next)
128 next = vma->vm_end;
129 continue;
130 }
131
120 if (pgd_none_or_clear_bad(pgd)) { 132 if (pgd_none_or_clear_bad(pgd)) {
121 if (walk->pte_hole) 133 if (walk->pte_hole)
122 err = walk->pte_hole(addr, next, walk); 134 err = walk->pte_hole(addr, next, walk);
123 if (err) 135 if (err)
124 break; 136 break;
137 pgd++;
125 continue; 138 continue;
126 } 139 }
127 if (walk->pgd_entry) 140 if (walk->pgd_entry)
@@ -131,7 +144,8 @@ int walk_page_range(unsigned long addr, unsigned long end,
131 err = walk_pud_range(pgd, addr, next, walk); 144 err = walk_pud_range(pgd, addr, next, walk);
132 if (err) 145 if (err)
133 break; 146 break;
134 } while (pgd++, addr = next, addr != end); 147 pgd++;
148 } while (addr = next, addr != end);
135 149
136 return err; 150 return err;
137} 151}