aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mempolicy.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mempolicy.c')
-rw-r--r--mm/mempolicy.c151
1 files changed, 58 insertions, 93 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index b21869a39f0b..dec8249e972d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -86,6 +86,7 @@
86#include <linux/swap.h> 86#include <linux/swap.h>
87#include <linux/seq_file.h> 87#include <linux/seq_file.h>
88#include <linux/proc_fs.h> 88#include <linux/proc_fs.h>
89#include <linux/migrate.h>
89 90
90#include <asm/tlbflush.h> 91#include <asm/tlbflush.h>
91#include <asm/uaccess.h> 92#include <asm/uaccess.h>
@@ -95,11 +96,8 @@
95#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */ 96#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */
96#define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */ 97#define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */
97 98
98/* The number of pages to migrate per call to migrate_pages() */ 99static struct kmem_cache *policy_cache;
99#define MIGRATE_CHUNK_SIZE 256 100static struct kmem_cache *sn_cache;
100
101static kmem_cache_t *policy_cache;
102static kmem_cache_t *sn_cache;
103 101
104#define PDprintk(fmt...) 102#define PDprintk(fmt...)
105 103
@@ -331,17 +329,10 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
331 struct vm_area_struct *first, *vma, *prev; 329 struct vm_area_struct *first, *vma, *prev;
332 330
333 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { 331 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
334 /* Must have swap device for migration */
335 if (nr_swap_pages <= 0)
336 return ERR_PTR(-ENODEV);
337 332
338 /* 333 err = migrate_prep();
339 * Clear the LRU lists so pages can be isolated. 334 if (err)
340 * Note that pages may be moved off the LRU after we have 335 return ERR_PTR(err);
341 * drained them. Those pages will fail to migrate like other
342 * pages that may be busy.
343 */
344 lru_add_drain_all();
345 } 336 }
346 337
347 first = find_vma(mm, start); 338 first = find_vma(mm, start);
@@ -431,6 +422,37 @@ static int contextualize_policy(int mode, nodemask_t *nodes)
431 return mpol_check_policy(mode, nodes); 422 return mpol_check_policy(mode, nodes);
432} 423}
433 424
425
426/*
427 * Update task->flags PF_MEMPOLICY bit: set iff non-default
428 * mempolicy. Allows more rapid checking of this (combined perhaps
429 * with other PF_* flag bits) on memory allocation hot code paths.
430 *
431 * If called from outside this file, the task 'p' should -only- be
432 * a newly forked child not yet visible on the task list, because
433 * manipulating the task flags of a visible task is not safe.
434 *
435 * The above limitation is why this routine has the funny name
436 * mpol_fix_fork_child_flag().
437 *
438 * It is also safe to call this with a task pointer of current,
439 * which the static wrapper mpol_set_task_struct_flag() does,
440 * for use within this file.
441 */
442
443void mpol_fix_fork_child_flag(struct task_struct *p)
444{
445 if (p->mempolicy)
446 p->flags |= PF_MEMPOLICY;
447 else
448 p->flags &= ~PF_MEMPOLICY;
449}
450
451static void mpol_set_task_struct_flag(void)
452{
453 mpol_fix_fork_child_flag(current);
454}
455
434/* Set the process memory policy */ 456/* Set the process memory policy */
435long do_set_mempolicy(int mode, nodemask_t *nodes) 457long do_set_mempolicy(int mode, nodemask_t *nodes)
436{ 458{
@@ -443,6 +465,7 @@ long do_set_mempolicy(int mode, nodemask_t *nodes)
443 return PTR_ERR(new); 465 return PTR_ERR(new);
444 mpol_free(current->mempolicy); 466 mpol_free(current->mempolicy);
445 current->mempolicy = new; 467 current->mempolicy = new;
468 mpol_set_task_struct_flag();
446 if (new && new->policy == MPOL_INTERLEAVE) 469 if (new && new->policy == MPOL_INTERLEAVE)
447 current->il_next = first_node(new->v.nodes); 470 current->il_next = first_node(new->v.nodes);
448 return 0; 471 return 0;
@@ -550,92 +573,18 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
550 return err; 573 return err;
551} 574}
552 575
576#ifdef CONFIG_MIGRATION
553/* 577/*
554 * page migration 578 * page migration
555 */ 579 */
556
557static void migrate_page_add(struct page *page, struct list_head *pagelist, 580static void migrate_page_add(struct page *page, struct list_head *pagelist,
558 unsigned long flags) 581 unsigned long flags)
559{ 582{
560 /* 583 /*
561 * Avoid migrating a page that is shared with others. 584 * Avoid migrating a page that is shared with others.
562 */ 585 */
563 if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) { 586 if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1)
564 if (isolate_lru_page(page)) 587 isolate_lru_page(page, pagelist);
565 list_add_tail(&page->lru, pagelist);
566 }
567}
568
569/*
570 * Migrate the list 'pagelist' of pages to a certain destination.
571 *
572 * Specify destination with either non-NULL vma or dest_node >= 0
573 * Return the number of pages not migrated or error code
574 */
575static int migrate_pages_to(struct list_head *pagelist,
576 struct vm_area_struct *vma, int dest)
577{
578 LIST_HEAD(newlist);
579 LIST_HEAD(moved);
580 LIST_HEAD(failed);
581 int err = 0;
582 unsigned long offset = 0;
583 int nr_pages;
584 struct page *page;
585 struct list_head *p;
586
587redo:
588 nr_pages = 0;
589 list_for_each(p, pagelist) {
590 if (vma) {
591 /*
592 * The address passed to alloc_page_vma is used to
593 * generate the proper interleave behavior. We fake
594 * the address here by an increasing offset in order
595 * to get the proper distribution of pages.
596 *
597 * No decision has been made as to which page
598 * a certain old page is moved to so we cannot
599 * specify the correct address.
600 */
601 page = alloc_page_vma(GFP_HIGHUSER, vma,
602 offset + vma->vm_start);
603 offset += PAGE_SIZE;
604 }
605 else
606 page = alloc_pages_node(dest, GFP_HIGHUSER, 0);
607
608 if (!page) {
609 err = -ENOMEM;
610 goto out;
611 }
612 list_add_tail(&page->lru, &newlist);
613 nr_pages++;
614 if (nr_pages > MIGRATE_CHUNK_SIZE)
615 break;
616 }
617 err = migrate_pages(pagelist, &newlist, &moved, &failed);
618
619 putback_lru_pages(&moved); /* Call release pages instead ?? */
620
621 if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist))
622 goto redo;
623out:
624 /* Return leftover allocated pages */
625 while (!list_empty(&newlist)) {
626 page = list_entry(newlist.next, struct page, lru);
627 list_del(&page->lru);
628 __free_page(page);
629 }
630 list_splice(&failed, pagelist);
631 if (err < 0)
632 return err;
633
634 /* Calculate number of leftover pages */
635 nr_pages = 0;
636 list_for_each(p, pagelist)
637 nr_pages++;
638 return nr_pages;
639} 588}
640 589
641/* 590/*
@@ -742,8 +691,23 @@ int do_migrate_pages(struct mm_struct *mm,
742 if (err < 0) 691 if (err < 0)
743 return err; 692 return err;
744 return busy; 693 return busy;
694
745} 695}
746 696
697#else
698
699static void migrate_page_add(struct page *page, struct list_head *pagelist,
700 unsigned long flags)
701{
702}
703
704int do_migrate_pages(struct mm_struct *mm,
705 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
706{
707 return -ENOSYS;
708}
709#endif
710
747long do_mbind(unsigned long start, unsigned long len, 711long do_mbind(unsigned long start, unsigned long len,
748 unsigned long mode, nodemask_t *nmask, unsigned long flags) 712 unsigned long mode, nodemask_t *nmask, unsigned long flags)
749{ 713{
@@ -808,6 +772,7 @@ long do_mbind(unsigned long start, unsigned long len,
808 if (!err && nr_failed && (flags & MPOL_MF_STRICT)) 772 if (!err && nr_failed && (flags & MPOL_MF_STRICT))
809 err = -EIO; 773 err = -EIO;
810 } 774 }
775
811 if (!list_empty(&pagelist)) 776 if (!list_empty(&pagelist))
812 putback_lru_pages(&pagelist); 777 putback_lru_pages(&pagelist);
813 778
@@ -947,7 +912,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
947 /* 912 /*
948 * Check if this process has the right to modify the specified 913 * Check if this process has the right to modify the specified
949 * process. The right exists if the process has administrative 914 * process. The right exists if the process has administrative
950 * capabilities, superuser priviledges or the same 915 * capabilities, superuser privileges or the same
951 * userid as the target process. 916 * userid as the target process.
952 */ 917 */
953 if ((current->euid != task->suid) && (current->euid != task->uid) && 918 if ((current->euid != task->suid) && (current->euid != task->uid) &&