diff options
author | Cliff Wickman <cpw@sgi.com> | 2013-05-24 18:55:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-24 19:22:53 -0400 |
commit | a9ff785e4437c83d2179161e012f5bdfbd6381f0 (patch) | |
tree | 5aa1ae9e8a721903a0ad337a233cffce2806ea00 /mm/pagewalk.c | |
parent | 43c523bff7c3b47506d536c10637be8399dfd85f (diff) |
mm/pagewalk.c: walk_page_range should avoid VM_PFNMAP areas
A panic can be caused by simply cat'ing /proc/<pid>/smaps while an
application has a VM_PFNMAP range. It happened in-house when a
benchmarker was trying to decipher the memory layout of his program.
/proc/<pid>/smaps and similar walks through a user page table should not
be looking at VM_PFNMAP areas.
Certain tests in walk_page_range() (specifically split_huge_page_pmd())
assume that all the mapped PFN's are backed with page structures. And
this is not usually true for VM_PFNMAP areas. This can result in panics
on kernel page faults when attempting to address those page structures.
There are a half dozen callers of walk_page_range() that walk through a
task's entire page table (as N. Horiguchi pointed out). So rather than
change all of them, this patch changes just walk_page_range() to ignore
VM_PFNMAP areas.
The logic of hugetlb_vma() is moved back into walk_page_range(), as we
want to test any vma in the range.
VM_PFNMAP areas are used by:
- graphics memory manager gpu/drm/drm_gem.c
- global reference unit sgi-gru/grufile.c
- sgi special memory char/mspec.c
- and probably several out-of-tree modules
[akpm@linux-foundation.org: remove now-unused hugetlb_vma() stub]
Signed-off-by: Cliff Wickman <cpw@sgi.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Sterba <dsterba@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/pagewalk.c')
-rw-r--r-- | mm/pagewalk.c | 70 |
1 files changed, 36 insertions, 34 deletions
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 35aa294656cd..5da2cbcfdbb5 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -127,28 +127,7 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, | |||
127 | return 0; | 127 | return 0; |
128 | } | 128 | } |
129 | 129 | ||
130 | static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) | ||
131 | { | ||
132 | struct vm_area_struct *vma; | ||
133 | |||
134 | /* We don't need vma lookup at all. */ | ||
135 | if (!walk->hugetlb_entry) | ||
136 | return NULL; | ||
137 | |||
138 | VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); | ||
139 | vma = find_vma(walk->mm, addr); | ||
140 | if (vma && vma->vm_start <= addr && is_vm_hugetlb_page(vma)) | ||
141 | return vma; | ||
142 | |||
143 | return NULL; | ||
144 | } | ||
145 | |||
146 | #else /* CONFIG_HUGETLB_PAGE */ | 130 | #else /* CONFIG_HUGETLB_PAGE */ |
147 | static struct vm_area_struct* hugetlb_vma(unsigned long addr, struct mm_walk *walk) | ||
148 | { | ||
149 | return NULL; | ||
150 | } | ||
151 | |||
152 | static int walk_hugetlb_range(struct vm_area_struct *vma, | 131 | static int walk_hugetlb_range(struct vm_area_struct *vma, |
153 | unsigned long addr, unsigned long end, | 132 | unsigned long addr, unsigned long end, |
154 | struct mm_walk *walk) | 133 | struct mm_walk *walk) |
@@ -198,30 +177,53 @@ int walk_page_range(unsigned long addr, unsigned long end, | |||
198 | if (!walk->mm) | 177 | if (!walk->mm) |
199 | return -EINVAL; | 178 | return -EINVAL; |
200 | 179 | ||
180 | VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); | ||
181 | |||
201 | pgd = pgd_offset(walk->mm, addr); | 182 | pgd = pgd_offset(walk->mm, addr); |
202 | do { | 183 | do { |
203 | struct vm_area_struct *vma; | 184 | struct vm_area_struct *vma = NULL; |
204 | 185 | ||
205 | next = pgd_addr_end(addr, end); | 186 | next = pgd_addr_end(addr, end); |
206 | 187 | ||
207 | /* | 188 | /* |
208 | * handle hugetlb vma individually because pagetable walk for | 189 | * This function was not intended to be vma based. |
209 | * the hugetlb page is dependent on the architecture and | 190 | * But there are vma special cases to be handled: |
210 | * we can't handled it in the same manner as non-huge pages. | 191 | * - hugetlb vma's |
192 | * - VM_PFNMAP vma's | ||
211 | */ | 193 | */ |
212 | vma = hugetlb_vma(addr, walk); | 194 | vma = find_vma(walk->mm, addr); |
213 | if (vma) { | 195 | if (vma) { |
214 | if (vma->vm_end < next) | 196 | /* |
197 | * There are no page structures backing a VM_PFNMAP | ||
198 | * range, so do not allow split_huge_page_pmd(). | ||
199 | */ | ||
200 | if ((vma->vm_start <= addr) && | ||
201 | (vma->vm_flags & VM_PFNMAP)) { | ||
215 | next = vma->vm_end; | 202 | next = vma->vm_end; |
203 | pgd = pgd_offset(walk->mm, next); | ||
204 | continue; | ||
205 | } | ||
216 | /* | 206 | /* |
217 | * Hugepage is very tightly coupled with vma, so | 207 | * Handle hugetlb vma individually because pagetable |
218 | * walk through hugetlb entries within a given vma. | 208 | * walk for the hugetlb page is dependent on the |
209 | * architecture and we can't handled it in the same | ||
210 | * manner as non-huge pages. | ||
219 | */ | 211 | */ |
220 | err = walk_hugetlb_range(vma, addr, next, walk); | 212 | if (walk->hugetlb_entry && (vma->vm_start <= addr) && |
221 | if (err) | 213 | is_vm_hugetlb_page(vma)) { |
222 | break; | 214 | if (vma->vm_end < next) |
223 | pgd = pgd_offset(walk->mm, next); | 215 | next = vma->vm_end; |
224 | continue; | 216 | /* |
217 | * Hugepage is very tightly coupled with vma, | ||
218 | * so walk through hugetlb entries within a | ||
219 | * given vma. | ||
220 | */ | ||
221 | err = walk_hugetlb_range(vma, addr, next, walk); | ||
222 | if (err) | ||
223 | break; | ||
224 | pgd = pgd_offset(walk->mm, next); | ||
225 | continue; | ||
226 | } | ||
225 | } | 227 | } |
226 | 228 | ||
227 | if (pgd_none_or_clear_bad(pgd)) { | 229 | if (pgd_none_or_clear_bad(pgd)) { |