aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2005-06-21 20:15:07 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 21:46:19 -0400
commit91612e0df20a52f61db3cac280c153311b36df7a (patch)
tree44a19e1d03147aabb842cbaac493a7213b836e4a
parent941150a326be88af245034ef4b3e9fa00229aa2d (diff)
[PATCH] mbind: check_range use standard ptwalk
Strict mbind's check for currently mapped pages being on node has been using a slow loop which re-evaluates pgd, pud, pmd, pte for each entry: replace that by a standard four-level page table walk like others in mm. Since mmap_sem is held for writing, page_table_lock can be taken at the inner level to limit latency. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--mm/mempolicy.c115
1 files changed, 70 insertions, 45 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c512cc911e22..cb41c31e7c87 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -238,56 +238,81 @@ static struct mempolicy *mpol_new(int mode, unsigned long *nodes)
238} 238}
239 239
240/* Ensure all existing pages follow the policy. */ 240/* Ensure all existing pages follow the policy. */
241static int 241static int check_pte_range(struct mm_struct *mm, pmd_t *pmd,
242verify_pages(struct mm_struct *mm, 242 unsigned long addr, unsigned long end, unsigned long *nodes)
243 unsigned long addr, unsigned long end, unsigned long *nodes)
244{ 243{
245 int err = 0; 244 pte_t *orig_pte;
245 pte_t *pte;
246 246
247 spin_lock(&mm->page_table_lock); 247 spin_lock(&mm->page_table_lock);
248 while (addr < end) { 248 orig_pte = pte = pte_offset_map(pmd, addr);
249 struct page *p; 249 do {
250 pte_t *pte; 250 unsigned long pfn;
251 pmd_t *pmd; 251 unsigned int nid;
252 pud_t *pud; 252
253 pgd_t *pgd; 253 if (!pte_present(*pte))
254 pgd = pgd_offset(mm, addr);
255 if (pgd_none(*pgd)) {
256 unsigned long next = (addr + PGDIR_SIZE) & PGDIR_MASK;
257 if (next > addr)
258 break;
259 addr = next;
260 continue;
261 }
262 pud = pud_offset(pgd, addr);
263 if (pud_none(*pud)) {
264 addr = (addr + PUD_SIZE) & PUD_MASK;
265 continue; 254 continue;
266 } 255 pfn = pte_pfn(*pte);
267 pmd = pmd_offset(pud, addr); 256 if (!pfn_valid(pfn))
268 if (pmd_none(*pmd)) {
269 addr = (addr + PMD_SIZE) & PMD_MASK;
270 continue; 257 continue;
271 } 258 nid = pfn_to_nid(pfn);
272 p = NULL; 259 if (!test_bit(nid, nodes))
273 pte = pte_offset_map(pmd, addr); 260 break;
274 if (pte_present(*pte)) { 261 } while (pte++, addr += PAGE_SIZE, addr != end);
275 unsigned long pfn = pte_pfn(*pte); 262 pte_unmap(orig_pte);
276 if (pfn_valid(pfn))
277 p = pfn_to_page(pfn);
278 }
279 pte_unmap(pte);
280 if (p) {
281 unsigned nid = page_to_nid(p);
282 if (!test_bit(nid, nodes)) {
283 err = -EIO;
284 break;
285 }
286 }
287 addr += PAGE_SIZE;
288 }
289 spin_unlock(&mm->page_table_lock); 263 spin_unlock(&mm->page_table_lock);
290 return err; 264 return addr != end;
265}
266
267static inline int check_pmd_range(struct mm_struct *mm, pud_t *pud,
268 unsigned long addr, unsigned long end, unsigned long *nodes)
269{
270 pmd_t *pmd;
271 unsigned long next;
272
273 pmd = pmd_offset(pud, addr);
274 do {
275 next = pmd_addr_end(addr, end);
276 if (pmd_none_or_clear_bad(pmd))
277 continue;
278 if (check_pte_range(mm, pmd, addr, next, nodes))
279 return -EIO;
280 } while (pmd++, addr = next, addr != end);
281 return 0;
282}
283
284static inline int check_pud_range(struct mm_struct *mm, pgd_t *pgd,
285 unsigned long addr, unsigned long end, unsigned long *nodes)
286{
287 pud_t *pud;
288 unsigned long next;
289
290 pud = pud_offset(pgd, addr);
291 do {
292 next = pud_addr_end(addr, end);
293 if (pud_none_or_clear_bad(pud))
294 continue;
295 if (check_pmd_range(mm, pud, addr, next, nodes))
296 return -EIO;
297 } while (pud++, addr = next, addr != end);
298 return 0;
299}
300
301static inline int check_pgd_range(struct mm_struct *mm,
302 unsigned long addr, unsigned long end, unsigned long *nodes)
303{
304 pgd_t *pgd;
305 unsigned long next;
306
307 pgd = pgd_offset(mm, addr);
308 do {
309 next = pgd_addr_end(addr, end);
310 if (pgd_none_or_clear_bad(pgd))
311 continue;
312 if (check_pud_range(mm, pgd, addr, next, nodes))
313 return -EIO;
314 } while (pgd++, addr = next, addr != end);
315 return 0;
291} 316}
292 317
293/* Step 1: check the range */ 318/* Step 1: check the range */
@@ -308,7 +333,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
308 if (prev && prev->vm_end < vma->vm_start) 333 if (prev && prev->vm_end < vma->vm_start)
309 return ERR_PTR(-EFAULT); 334 return ERR_PTR(-EFAULT);
310 if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) { 335 if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) {
311 err = verify_pages(vma->vm_mm, 336 err = check_pgd_range(vma->vm_mm,
312 vma->vm_start, vma->vm_end, nodes); 337 vma->vm_start, vma->vm_end, nodes);
313 if (err) { 338 if (err) {
314 first = ERR_PTR(err); 339 first = ERR_PTR(err);