diff options
author | Hugh Dickins <hugh@veritas.com> | 2005-06-21 20:15:07 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-21 21:46:19 -0400 |
commit | 91612e0df20a52f61db3cac280c153311b36df7a (patch) | |
tree | 44a19e1d03147aabb842cbaac493a7213b836e4a | |
parent | 941150a326be88af245034ef4b3e9fa00229aa2d (diff) |
[PATCH] mbind: check_range use standard ptwalk
Strict mbind's check for currently mapped pages being on node has been
using a slow loop which re-evaluates pgd, pud, pmd, pte for each entry:
replace that by a standard four-level page table walk like others in mm.
Since mmap_sem is held for writing, page_table_lock can be taken at the
inner level to limit latency.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | mm/mempolicy.c | 115 |
1 files changed, 70 insertions, 45 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c512cc911e22..cb41c31e7c87 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -238,56 +238,81 @@ static struct mempolicy *mpol_new(int mode, unsigned long *nodes) | |||
238 | } | 238 | } |
239 | 239 | ||
240 | /* Ensure all existing pages follow the policy. */ | 240 | /* Ensure all existing pages follow the policy. */ |
241 | static int | 241 | static int check_pte_range(struct mm_struct *mm, pmd_t *pmd, |
242 | verify_pages(struct mm_struct *mm, | 242 | unsigned long addr, unsigned long end, unsigned long *nodes) |
243 | unsigned long addr, unsigned long end, unsigned long *nodes) | ||
244 | { | 243 | { |
245 | int err = 0; | 244 | pte_t *orig_pte; |
245 | pte_t *pte; | ||
246 | 246 | ||
247 | spin_lock(&mm->page_table_lock); | 247 | spin_lock(&mm->page_table_lock); |
248 | while (addr < end) { | 248 | orig_pte = pte = pte_offset_map(pmd, addr); |
249 | struct page *p; | 249 | do { |
250 | pte_t *pte; | 250 | unsigned long pfn; |
251 | pmd_t *pmd; | 251 | unsigned int nid; |
252 | pud_t *pud; | 252 | |
253 | pgd_t *pgd; | 253 | if (!pte_present(*pte)) |
254 | pgd = pgd_offset(mm, addr); | ||
255 | if (pgd_none(*pgd)) { | ||
256 | unsigned long next = (addr + PGDIR_SIZE) & PGDIR_MASK; | ||
257 | if (next > addr) | ||
258 | break; | ||
259 | addr = next; | ||
260 | continue; | ||
261 | } | ||
262 | pud = pud_offset(pgd, addr); | ||
263 | if (pud_none(*pud)) { | ||
264 | addr = (addr + PUD_SIZE) & PUD_MASK; | ||
265 | continue; | 254 | continue; |
266 | } | 255 | pfn = pte_pfn(*pte); |
267 | pmd = pmd_offset(pud, addr); | 256 | if (!pfn_valid(pfn)) |
268 | if (pmd_none(*pmd)) { | ||
269 | addr = (addr + PMD_SIZE) & PMD_MASK; | ||
270 | continue; | 257 | continue; |
271 | } | 258 | nid = pfn_to_nid(pfn); |
272 | p = NULL; | 259 | if (!test_bit(nid, nodes)) |
273 | pte = pte_offset_map(pmd, addr); | 260 | break; |
274 | if (pte_present(*pte)) { | 261 | } while (pte++, addr += PAGE_SIZE, addr != end); |
275 | unsigned long pfn = pte_pfn(*pte); | 262 | pte_unmap(orig_pte); |
276 | if (pfn_valid(pfn)) | ||
277 | p = pfn_to_page(pfn); | ||
278 | } | ||
279 | pte_unmap(pte); | ||
280 | if (p) { | ||
281 | unsigned nid = page_to_nid(p); | ||
282 | if (!test_bit(nid, nodes)) { | ||
283 | err = -EIO; | ||
284 | break; | ||
285 | } | ||
286 | } | ||
287 | addr += PAGE_SIZE; | ||
288 | } | ||
289 | spin_unlock(&mm->page_table_lock); | 263 | spin_unlock(&mm->page_table_lock); |
290 | return err; | 264 | return addr != end; |
265 | } | ||
266 | |||
267 | static inline int check_pmd_range(struct mm_struct *mm, pud_t *pud, | ||
268 | unsigned long addr, unsigned long end, unsigned long *nodes) | ||
269 | { | ||
270 | pmd_t *pmd; | ||
271 | unsigned long next; | ||
272 | |||
273 | pmd = pmd_offset(pud, addr); | ||
274 | do { | ||
275 | next = pmd_addr_end(addr, end); | ||
276 | if (pmd_none_or_clear_bad(pmd)) | ||
277 | continue; | ||
278 | if (check_pte_range(mm, pmd, addr, next, nodes)) | ||
279 | return -EIO; | ||
280 | } while (pmd++, addr = next, addr != end); | ||
281 | return 0; | ||
282 | } | ||
283 | |||
284 | static inline int check_pud_range(struct mm_struct *mm, pgd_t *pgd, | ||
285 | unsigned long addr, unsigned long end, unsigned long *nodes) | ||
286 | { | ||
287 | pud_t *pud; | ||
288 | unsigned long next; | ||
289 | |||
290 | pud = pud_offset(pgd, addr); | ||
291 | do { | ||
292 | next = pud_addr_end(addr, end); | ||
293 | if (pud_none_or_clear_bad(pud)) | ||
294 | continue; | ||
295 | if (check_pmd_range(mm, pud, addr, next, nodes)) | ||
296 | return -EIO; | ||
297 | } while (pud++, addr = next, addr != end); | ||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | static inline int check_pgd_range(struct mm_struct *mm, | ||
302 | unsigned long addr, unsigned long end, unsigned long *nodes) | ||
303 | { | ||
304 | pgd_t *pgd; | ||
305 | unsigned long next; | ||
306 | |||
307 | pgd = pgd_offset(mm, addr); | ||
308 | do { | ||
309 | next = pgd_addr_end(addr, end); | ||
310 | if (pgd_none_or_clear_bad(pgd)) | ||
311 | continue; | ||
312 | if (check_pud_range(mm, pgd, addr, next, nodes)) | ||
313 | return -EIO; | ||
314 | } while (pgd++, addr = next, addr != end); | ||
315 | return 0; | ||
291 | } | 316 | } |
292 | 317 | ||
293 | /* Step 1: check the range */ | 318 | /* Step 1: check the range */ |
@@ -308,7 +333,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
308 | if (prev && prev->vm_end < vma->vm_start) | 333 | if (prev && prev->vm_end < vma->vm_start) |
309 | return ERR_PTR(-EFAULT); | 334 | return ERR_PTR(-EFAULT); |
310 | if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) { | 335 | if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) { |
311 | err = verify_pages(vma->vm_mm, | 336 | err = check_pgd_range(vma->vm_mm, |
312 | vma->vm_start, vma->vm_end, nodes); | 337 | vma->vm_start, vma->vm_end, nodes); |
313 | if (err) { | 338 | if (err) { |
314 | first = ERR_PTR(err); | 339 | first = ERR_PTR(err); |