diff options
author | Christoph Lameter <clameter@sgi.com> | 2006-01-08 04:01:01 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-08 23:12:44 -0500 |
commit | 38e35860dbe6197a4b42eb6e8b47da940b7695dd (patch) | |
tree | 2794681eb62c4bd84e186693df265c9f1a0b28f4 /mm | |
parent | ef2bf0dc8675e14cf8cba3b7fb9f48d72640a70e (diff) |
[PATCH] mempolicies: private pointer in check_range and MPOL_MF_INVERT
This was was first posted at
http://marc.theaimsgroup.com/?l=linux-mm&m=113149240227584&w=2
(Part of this functionality is also contained in the direct migration
pathset. The functionality here is more generic and independent of that
patchset.)
- Add internal flags MPOL_MF_INVERT to control check_range() behavior.
- Replace the pagelist passed through by check_range by a general
private pointer that may be used for other purposes.
(The following patches will use that to merge numa_maps into
mempolicy.c and to better group the page migration code in
the policy layer)
- Improve some comments.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/mempolicy.c | 46 |
1 files changed, 24 insertions, 22 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 30bdafba52d8..270e9a39ec15 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -88,8 +88,9 @@ | |||
88 | #include <asm/tlbflush.h> | 88 | #include <asm/tlbflush.h> |
89 | #include <asm/uaccess.h> | 89 | #include <asm/uaccess.h> |
90 | 90 | ||
91 | /* Internal MPOL_MF_xxx flags */ | 91 | /* Internal flags */ |
92 | #define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ | 92 | #define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ |
93 | #define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ | ||
93 | 94 | ||
94 | static kmem_cache_t *policy_cache; | 95 | static kmem_cache_t *policy_cache; |
95 | static kmem_cache_t *sn_cache; | 96 | static kmem_cache_t *sn_cache; |
@@ -227,11 +228,11 @@ static void migrate_page_add(struct vm_area_struct *vma, | |||
227 | } | 228 | } |
228 | } | 229 | } |
229 | 230 | ||
230 | /* Ensure all existing pages follow the policy. */ | 231 | /* Scan through pages checking if pages follow certain conditions. */ |
231 | static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 232 | static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
232 | unsigned long addr, unsigned long end, | 233 | unsigned long addr, unsigned long end, |
233 | const nodemask_t *nodes, unsigned long flags, | 234 | const nodemask_t *nodes, unsigned long flags, |
234 | struct list_head *pagelist) | 235 | void *private) |
235 | { | 236 | { |
236 | pte_t *orig_pte; | 237 | pte_t *orig_pte; |
237 | pte_t *pte; | 238 | pte_t *pte; |
@@ -248,12 +249,13 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
248 | if (!page) | 249 | if (!page) |
249 | continue; | 250 | continue; |
250 | nid = page_to_nid(page); | 251 | nid = page_to_nid(page); |
251 | if (!node_isset(nid, *nodes)) { | 252 | if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) |
252 | if (pagelist) | 253 | continue; |
253 | migrate_page_add(vma, page, pagelist, flags); | 254 | |
254 | else | 255 | if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) |
255 | break; | 256 | migrate_page_add(vma, page, private, flags); |
256 | } | 257 | else |
258 | break; | ||
257 | } while (pte++, addr += PAGE_SIZE, addr != end); | 259 | } while (pte++, addr += PAGE_SIZE, addr != end); |
258 | pte_unmap_unlock(orig_pte, ptl); | 260 | pte_unmap_unlock(orig_pte, ptl); |
259 | return addr != end; | 261 | return addr != end; |
@@ -262,7 +264,7 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
262 | static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, | 264 | static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, |
263 | unsigned long addr, unsigned long end, | 265 | unsigned long addr, unsigned long end, |
264 | const nodemask_t *nodes, unsigned long flags, | 266 | const nodemask_t *nodes, unsigned long flags, |
265 | struct list_head *pagelist) | 267 | void *private) |
266 | { | 268 | { |
267 | pmd_t *pmd; | 269 | pmd_t *pmd; |
268 | unsigned long next; | 270 | unsigned long next; |
@@ -273,7 +275,7 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, | |||
273 | if (pmd_none_or_clear_bad(pmd)) | 275 | if (pmd_none_or_clear_bad(pmd)) |
274 | continue; | 276 | continue; |
275 | if (check_pte_range(vma, pmd, addr, next, nodes, | 277 | if (check_pte_range(vma, pmd, addr, next, nodes, |
276 | flags, pagelist)) | 278 | flags, private)) |
277 | return -EIO; | 279 | return -EIO; |
278 | } while (pmd++, addr = next, addr != end); | 280 | } while (pmd++, addr = next, addr != end); |
279 | return 0; | 281 | return 0; |
@@ -282,7 +284,7 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, | |||
282 | static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, | 284 | static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, |
283 | unsigned long addr, unsigned long end, | 285 | unsigned long addr, unsigned long end, |
284 | const nodemask_t *nodes, unsigned long flags, | 286 | const nodemask_t *nodes, unsigned long flags, |
285 | struct list_head *pagelist) | 287 | void *private) |
286 | { | 288 | { |
287 | pud_t *pud; | 289 | pud_t *pud; |
288 | unsigned long next; | 290 | unsigned long next; |
@@ -293,7 +295,7 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, | |||
293 | if (pud_none_or_clear_bad(pud)) | 295 | if (pud_none_or_clear_bad(pud)) |
294 | continue; | 296 | continue; |
295 | if (check_pmd_range(vma, pud, addr, next, nodes, | 297 | if (check_pmd_range(vma, pud, addr, next, nodes, |
296 | flags, pagelist)) | 298 | flags, private)) |
297 | return -EIO; | 299 | return -EIO; |
298 | } while (pud++, addr = next, addr != end); | 300 | } while (pud++, addr = next, addr != end); |
299 | return 0; | 301 | return 0; |
@@ -302,7 +304,7 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, | |||
302 | static inline int check_pgd_range(struct vm_area_struct *vma, | 304 | static inline int check_pgd_range(struct vm_area_struct *vma, |
303 | unsigned long addr, unsigned long end, | 305 | unsigned long addr, unsigned long end, |
304 | const nodemask_t *nodes, unsigned long flags, | 306 | const nodemask_t *nodes, unsigned long flags, |
305 | struct list_head *pagelist) | 307 | void *private) |
306 | { | 308 | { |
307 | pgd_t *pgd; | 309 | pgd_t *pgd; |
308 | unsigned long next; | 310 | unsigned long next; |
@@ -313,7 +315,7 @@ static inline int check_pgd_range(struct vm_area_struct *vma, | |||
313 | if (pgd_none_or_clear_bad(pgd)) | 315 | if (pgd_none_or_clear_bad(pgd)) |
314 | continue; | 316 | continue; |
315 | if (check_pud_range(vma, pgd, addr, next, nodes, | 317 | if (check_pud_range(vma, pgd, addr, next, nodes, |
316 | flags, pagelist)) | 318 | flags, private)) |
317 | return -EIO; | 319 | return -EIO; |
318 | } while (pgd++, addr = next, addr != end); | 320 | } while (pgd++, addr = next, addr != end); |
319 | return 0; | 321 | return 0; |
@@ -335,8 +337,7 @@ static inline int vma_migratable(struct vm_area_struct *vma) | |||
335 | */ | 337 | */ |
336 | static struct vm_area_struct * | 338 | static struct vm_area_struct * |
337 | check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | 339 | check_range(struct mm_struct *mm, unsigned long start, unsigned long end, |
338 | const nodemask_t *nodes, unsigned long flags, | 340 | const nodemask_t *nodes, unsigned long flags, void *private) |
339 | struct list_head *pagelist) | ||
340 | { | 341 | { |
341 | int err; | 342 | int err; |
342 | struct vm_area_struct *first, *vma, *prev; | 343 | struct vm_area_struct *first, *vma, *prev; |
@@ -363,7 +364,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
363 | if (vma->vm_start > start) | 364 | if (vma->vm_start > start) |
364 | start = vma->vm_start; | 365 | start = vma->vm_start; |
365 | err = check_pgd_range(vma, start, endvma, nodes, | 366 | err = check_pgd_range(vma, start, endvma, nodes, |
366 | flags, pagelist); | 367 | flags, private); |
367 | if (err) { | 368 | if (err) { |
368 | first = ERR_PTR(err); | 369 | first = ERR_PTR(err); |
369 | break; | 370 | break; |
@@ -452,7 +453,8 @@ long do_mbind(unsigned long start, unsigned long len, | |||
452 | int err; | 453 | int err; |
453 | LIST_HEAD(pagelist); | 454 | LIST_HEAD(pagelist); |
454 | 455 | ||
455 | if ((flags & ~(unsigned long)(MPOL_MF_STRICT|MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) | 456 | if ((flags & ~(unsigned long)(MPOL_MF_STRICT | |
457 | MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) | ||
456 | || mode > MPOL_MAX) | 458 | || mode > MPOL_MAX) |
457 | return -EINVAL; | 459 | return -EINVAL; |
458 | if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE)) | 460 | if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE)) |
@@ -490,8 +492,9 @@ long do_mbind(unsigned long start, unsigned long len, | |||
490 | mode,nodes_addr(nodes)[0]); | 492 | mode,nodes_addr(nodes)[0]); |
491 | 493 | ||
492 | down_write(&mm->mmap_sem); | 494 | down_write(&mm->mmap_sem); |
493 | vma = check_range(mm, start, end, nmask, flags, | 495 | vma = check_range(mm, start, end, nmask, |
494 | (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ? &pagelist : NULL); | 496 | flags | MPOL_MF_INVERT, &pagelist); |
497 | |||
495 | err = PTR_ERR(vma); | 498 | err = PTR_ERR(vma); |
496 | if (!IS_ERR(vma)) { | 499 | if (!IS_ERR(vma)) { |
497 | int nr_failed = 0; | 500 | int nr_failed = 0; |
@@ -646,7 +649,6 @@ int do_migrate_pages(struct mm_struct *mm, | |||
646 | nodemask_t nodes; | 649 | nodemask_t nodes; |
647 | 650 | ||
648 | nodes_andnot(nodes, *from_nodes, *to_nodes); | 651 | nodes_andnot(nodes, *from_nodes, *to_nodes); |
649 | nodes_complement(nodes, nodes); | ||
650 | 652 | ||
651 | down_read(&mm->mmap_sem); | 653 | down_read(&mm->mmap_sem); |
652 | check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes, | 654 | check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes, |