aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorCliff Wickman <cpw@sgi.com>2013-05-24 18:55:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-24 19:22:53 -0400
commita9ff785e4437c83d2179161e012f5bdfbd6381f0 (patch)
tree5aa1ae9e8a721903a0ad337a233cffce2806ea00 /mm
parent43c523bff7c3b47506d536c10637be8399dfd85f (diff)
mm/pagewalk.c: walk_page_range should avoid VM_PFNMAP areas
A panic can be caused by simply cat'ing /proc/<pid>/smaps while an application has a VM_PFNMAP range. It happened in-house when a benchmarker was trying to decipher the memory layout of his program. /proc/<pid>/smaps and similar walks through a user page table should not be looking at VM_PFNMAP areas. Certain tests in walk_page_range() (specifically split_huge_page_pmd()) assume that all the mapped PFN's are backed with page structures. And this is not usually true for VM_PFNMAP areas. This can result in panics on kernel page faults when attempting to address those page structures. There are a half dozen callers of walk_page_range() that walk through a task's entire page table (as N. Horiguchi pointed out). So rather than change all of them, this patch changes just walk_page_range() to ignore VM_PFNMAP areas. The logic of hugetlb_vma() is moved back into walk_page_range(), as we want to test any vma in the range. VM_PFNMAP areas are used by: - graphics memory manager gpu/drm/drm_gem.c - global reference unit sgi-gru/grufile.c - sgi special memory char/mspec.c - and probably several out-of-tree modules [akpm@linux-foundation.org: remove now-unused hugetlb_vma() stub] Signed-off-by: Cliff Wickman <cpw@sgi.com> Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: David Sterba <dsterba@suse.cz> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/pagewalk.c70
1 files changed, 36 insertions, 34 deletions
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 35aa294656cd..5da2cbcfdbb5 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -127,28 +127,7 @@ static int walk_hugetlb_range(struct vm_area_struct *vma,
127 return 0; 127 return 0;
128} 128}
129 129
130static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk)
131{
132 struct vm_area_struct *vma;
133
134 /* We don't need vma lookup at all. */
135 if (!walk->hugetlb_entry)
136 return NULL;
137
138 VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
139 vma = find_vma(walk->mm, addr);
140 if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma))
141 return vma;
142
143 return NULL;
144}
145
146#else /* CONFIG_HUGETLB_PAGE */ 130#else /* CONFIG_HUGETLB_PAGE */
147static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk)
148{
149 return NULL;
150}
151
152static int walk_hugetlb_range(struct vm_area_struct *vma, 131static int walk_hugetlb_range(struct vm_area_struct *vma,
153 unsigned long addr, unsigned long end, 132 unsigned long addr, unsigned long end,
154 struct mm_walk *walk) 133 struct mm_walk *walk)
@@ -198,30 +177,53 @@ int walk_page_range(unsigned long addr, unsigned long end,
198 if (!walk->mm) 177 if (!walk->mm)
199 return -EINVAL; 178 return -EINVAL;
200 179
180 VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
181
201 pgd = pgd_offset(walk->mm, addr); 182 pgd = pgd_offset(walk->mm, addr);
202 do { 183 do {
203 struct vm_area_struct *vma; 184 struct vm_area_struct *vma = NULL;
204 185
205 next = pgd_addr_end(addr, end); 186 next = pgd_addr_end(addr, end);
206 187
207 /* 188 /*
208 * handle hugetlb vma individually because pagetable walk for 189 * This function was not intended to be vma based.
209 * the hugetlb page is dependent on the architecture and 190 * But there are vma special cases to be handled:
210 * we can't handled it in the same manner as non-huge pages. 191 * - hugetlb vma's
192 * - VM_PFNMAP vma's
211 */ 193 */
212 vma = hugetlb_vma(addr, walk); 194 vma = find_vma(walk->mm, addr);
213 if (vma) { 195 if (vma) {
214 if (vma->vm_end < next) 196 /*
197 * There are no page structures backing a VM_PFNMAP
198 * range, so do not allow split_huge_page_pmd().
199 */
200 if ((vma->vm_start <= addr) &&
201 (vma->vm_flags & VM_PFNMAP)) {
215 next = vma->vm_end; 202 next = vma->vm_end;
203 pgd = pgd_offset(walk->mm, next);
204 continue;
205 }
216 /* 206 /*
217 * Hugepage is very tightly coupled with vma, so 207 * Handle hugetlb vma individually because pagetable
218 * walk through hugetlb entries within a given vma. 208 * walk for the hugetlb page is dependent on the
209 * architecture and we can't handled it in the same
210 * manner as non-huge pages.
219 */ 211 */
220 err = walk_hugetlb_range(vma, addr, next, walk); 212 if (walk->hugetlb_entry && (vma->vm_start <= addr) &&
221 if (err) 213 is_vm_hugetlb_page(vma)) {
222 break; 214 if (vma->vm_end < next)
223 pgd = pgd_offset(walk->mm, next); 215 next = vma->vm_end;
224 continue; 216 /*
217 * Hugepage is very tightly coupled with vma,
218 * so walk through hugetlb entries within a
219 * given vma.
220 */
221 err = walk_hugetlb_range(vma, addr, next, walk);
222 if (err)
223 break;
224 pgd = pgd_offset(walk->mm, next);
225 continue;
226 }
225 } 227 }
226 228
227 if (pgd_none_or_clear_bad(pgd)) { 229 if (pgd_none_or_clear_bad(pgd)) {