aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-01-08 04:01:01 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-08 23:12:44 -0500
commit38e35860dbe6197a4b42eb6e8b47da940b7695dd (patch)
tree2794681eb62c4bd84e186693df265c9f1a0b28f4
parentef2bf0dc8675e14cf8cba3b7fb9f48d72640a70e (diff)
[PATCH] mempolicies: private pointer in check_range and MPOL_MF_INVERT
This was was first posted at http://marc.theaimsgroup.com/?l=linux-mm&m=113149240227584&w=2 (Part of this functionality is also contained in the direct migration pathset. The functionality here is more generic and independent of that patchset.) - Add internal flags MPOL_MF_INVERT to control check_range() behavior. - Replace the pagelist passed through by check_range by a general private pointer that may be used for other purposes. (The following patches will use that to merge numa_maps into mempolicy.c and to better group the page migration code in the policy layer) - Improve some comments. Signed-off-by: Christoph Lameter <clameter@sgi.com> Cc: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--mm/mempolicy.c46
1 files changed, 24 insertions, 22 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 30bdafba52d8..270e9a39ec15 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -88,8 +88,9 @@
88#include <asm/tlbflush.h> 88#include <asm/tlbflush.h>
89#include <asm/uaccess.h> 89#include <asm/uaccess.h>
90 90
91/* Internal MPOL_MF_xxx flags */ 91/* Internal flags */
92#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */ 92#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0) /* Skip checks for continuous vmas */
93#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */
93 94
94static kmem_cache_t *policy_cache; 95static kmem_cache_t *policy_cache;
95static kmem_cache_t *sn_cache; 96static kmem_cache_t *sn_cache;
@@ -227,11 +228,11 @@ static void migrate_page_add(struct vm_area_struct *vma,
227 } 228 }
228} 229}
229 230
230/* Ensure all existing pages follow the policy. */ 231/* Scan through pages checking if pages follow certain conditions. */
231static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 232static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
232 unsigned long addr, unsigned long end, 233 unsigned long addr, unsigned long end,
233 const nodemask_t *nodes, unsigned long flags, 234 const nodemask_t *nodes, unsigned long flags,
234 struct list_head *pagelist) 235 void *private)
235{ 236{
236 pte_t *orig_pte; 237 pte_t *orig_pte;
237 pte_t *pte; 238 pte_t *pte;
@@ -248,12 +249,13 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
248 if (!page) 249 if (!page)
249 continue; 250 continue;
250 nid = page_to_nid(page); 251 nid = page_to_nid(page);
251 if (!node_isset(nid, *nodes)) { 252 if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
252 if (pagelist) 253 continue;
253 migrate_page_add(vma, page, pagelist, flags); 254
254 else 255 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
255 break; 256 migrate_page_add(vma, page, private, flags);
256 } 257 else
258 break;
257 } while (pte++, addr += PAGE_SIZE, addr != end); 259 } while (pte++, addr += PAGE_SIZE, addr != end);
258 pte_unmap_unlock(orig_pte, ptl); 260 pte_unmap_unlock(orig_pte, ptl);
259 return addr != end; 261 return addr != end;
@@ -262,7 +264,7 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
262static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud, 264static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
263 unsigned long addr, unsigned long end, 265 unsigned long addr, unsigned long end,
264 const nodemask_t *nodes, unsigned long flags, 266 const nodemask_t *nodes, unsigned long flags,
265 struct list_head *pagelist) 267 void *private)
266{ 268{
267 pmd_t *pmd; 269 pmd_t *pmd;
268 unsigned long next; 270 unsigned long next;
@@ -273,7 +275,7 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
273 if (pmd_none_or_clear_bad(pmd)) 275 if (pmd_none_or_clear_bad(pmd))
274 continue; 276 continue;
275 if (check_pte_range(vma, pmd, addr, next, nodes, 277 if (check_pte_range(vma, pmd, addr, next, nodes,
276 flags, pagelist)) 278 flags, private))
277 return -EIO; 279 return -EIO;
278 } while (pmd++, addr = next, addr != end); 280 } while (pmd++, addr = next, addr != end);
279 return 0; 281 return 0;
@@ -282,7 +284,7 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
282static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd, 284static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
283 unsigned long addr, unsigned long end, 285 unsigned long addr, unsigned long end,
284 const nodemask_t *nodes, unsigned long flags, 286 const nodemask_t *nodes, unsigned long flags,
285 struct list_head *pagelist) 287 void *private)
286{ 288{
287 pud_t *pud; 289 pud_t *pud;
288 unsigned long next; 290 unsigned long next;
@@ -293,7 +295,7 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
293 if (pud_none_or_clear_bad(pud)) 295 if (pud_none_or_clear_bad(pud))
294 continue; 296 continue;
295 if (check_pmd_range(vma, pud, addr, next, nodes, 297 if (check_pmd_range(vma, pud, addr, next, nodes,
296 flags, pagelist)) 298 flags, private))
297 return -EIO; 299 return -EIO;
298 } while (pud++, addr = next, addr != end); 300 } while (pud++, addr = next, addr != end);
299 return 0; 301 return 0;
@@ -302,7 +304,7 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
302static inline int check_pgd_range(struct vm_area_struct *vma, 304static inline int check_pgd_range(struct vm_area_struct *vma,
303 unsigned long addr, unsigned long end, 305 unsigned long addr, unsigned long end,
304 const nodemask_t *nodes, unsigned long flags, 306 const nodemask_t *nodes, unsigned long flags,
305 struct list_head *pagelist) 307 void *private)
306{ 308{
307 pgd_t *pgd; 309 pgd_t *pgd;
308 unsigned long next; 310 unsigned long next;
@@ -313,7 +315,7 @@ static inline int check_pgd_range(struct vm_area_struct *vma,
313 if (pgd_none_or_clear_bad(pgd)) 315 if (pgd_none_or_clear_bad(pgd))
314 continue; 316 continue;
315 if (check_pud_range(vma, pgd, addr, next, nodes, 317 if (check_pud_range(vma, pgd, addr, next, nodes,
316 flags, pagelist)) 318 flags, private))
317 return -EIO; 319 return -EIO;
318 } while (pgd++, addr = next, addr != end); 320 } while (pgd++, addr = next, addr != end);
319 return 0; 321 return 0;
@@ -335,8 +337,7 @@ static inline int vma_migratable(struct vm_area_struct *vma)
335 */ 337 */
336static struct vm_area_struct * 338static struct vm_area_struct *
337check_range(struct mm_struct *mm, unsigned long start, unsigned long end, 339check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
338 const nodemask_t *nodes, unsigned long flags, 340 const nodemask_t *nodes, unsigned long flags, void *private)
339 struct list_head *pagelist)
340{ 341{
341 int err; 342 int err;
342 struct vm_area_struct *first, *vma, *prev; 343 struct vm_area_struct *first, *vma, *prev;
@@ -363,7 +364,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
363 if (vma->vm_start > start) 364 if (vma->vm_start > start)
364 start = vma->vm_start; 365 start = vma->vm_start;
365 err = check_pgd_range(vma, start, endvma, nodes, 366 err = check_pgd_range(vma, start, endvma, nodes,
366 flags, pagelist); 367 flags, private);
367 if (err) { 368 if (err) {
368 first = ERR_PTR(err); 369 first = ERR_PTR(err);
369 break; 370 break;
@@ -452,7 +453,8 @@ long do_mbind(unsigned long start, unsigned long len,
452 int err; 453 int err;
453 LIST_HEAD(pagelist); 454 LIST_HEAD(pagelist);
454 455
455 if ((flags & ~(unsigned long)(MPOL_MF_STRICT|MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) 456 if ((flags & ~(unsigned long)(MPOL_MF_STRICT |
457 MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
456 || mode > MPOL_MAX) 458 || mode > MPOL_MAX)
457 return -EINVAL; 459 return -EINVAL;
458 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE)) 460 if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE))
@@ -490,8 +492,9 @@ long do_mbind(unsigned long start, unsigned long len,
490 mode,nodes_addr(nodes)[0]); 492 mode,nodes_addr(nodes)[0]);
491 493
492 down_write(&mm->mmap_sem); 494 down_write(&mm->mmap_sem);
493 vma = check_range(mm, start, end, nmask, flags, 495 vma = check_range(mm, start, end, nmask,
494 (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) ? &pagelist : NULL); 496 flags | MPOL_MF_INVERT, &pagelist);
497
495 err = PTR_ERR(vma); 498 err = PTR_ERR(vma);
496 if (!IS_ERR(vma)) { 499 if (!IS_ERR(vma)) {
497 int nr_failed = 0; 500 int nr_failed = 0;
@@ -646,7 +649,6 @@ int do_migrate_pages(struct mm_struct *mm,
646 nodemask_t nodes; 649 nodemask_t nodes;
647 650
648 nodes_andnot(nodes, *from_nodes, *to_nodes); 651 nodes_andnot(nodes, *from_nodes, *to_nodes);
649 nodes_complement(nodes, nodes);
650 652
651 down_read(&mm->mmap_sem); 653 down_read(&mm->mmap_sem);
652 check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes, 654 check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes,