aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-01-08 04:00:51 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-08 23:12:42 -0500
commit39743889aaf76725152f16aa90ca3c45f6d52da3 (patch)
tree2a6f658d03dbbd9428934c5e030230a4acb6d5e0 /mm
parentdc9aa5b9d65fd11b1f5246b46ec610ee8b83c6dd (diff)
[PATCH] Swap Migration V5: sys_migrate_pages interface
sys_migrate_pages implementation using swap based page migration This is the original API proposed by Ray Bryant in his posts during the first half of 2005 on linux-mm@kvack.org and linux-kernel@vger.kernel.org. The intent of sys_migrate is to migrate memory of a process. A process may have migrated to another node. Memory was allocated optimally for the prior context. sys_migrate_pages allows to shift the memory to the new node. sys_migrate_pages is also useful if the processes available memory nodes have changed through cpuset operations to manually move the processes memory. Paul Jackson is working on an automated mechanism that will allow an automatic migration if the cpuset of a process is changed. However, a user may decide to manually control the migration. This implementation is put into the policy layer since it uses concepts and functions that are also needed for mbind and friends. The patch also provides a do_migrate_pages function that may be useful for cpusets to automatically move memory. sys_migrate_pages does not modify policies in contrast to Ray's implementation. The current code here is based on the swap based page migration capability and thus is not able to preserve the physical layout relative to it containing nodeset (which may be a cpuset). When direct page migration becomes available then the implementation needs to be changed to do a isomorphic move of pages between different nodesets. The current implementation simply evicts all pages in source nodeset that are not in the target nodeset. Patch supports ia64, i386 and x86_64. Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/mempolicy.c94
1 files changed, 93 insertions, 1 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9cc6d962831..20d5ad39fa4 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -615,11 +615,41 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
615} 615}
616 616
617/* 617/*
618 * For now migrate_pages simply swaps out the pages from nodes that are in
619 * the source set but not in the target set. In the future, we would
620 * want a function that moves pages between the two nodesets in such
621 * a way as to preserve the physical layout as much as possible.
622 *
623 * Returns the number of page that could not be moved.
624 */
625int do_migrate_pages(struct mm_struct *mm,
626 const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
627{
628 LIST_HEAD(pagelist);
629 int count = 0;
630 nodemask_t nodes;
631
632 nodes_andnot(nodes, *from_nodes, *to_nodes);
633 nodes_complement(nodes, nodes);
634
635 down_read(&mm->mmap_sem);
636 check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes,
637 flags | MPOL_MF_DISCONTIG_OK, &pagelist);
638 if (!list_empty(&pagelist)) {
639 migrate_pages(&pagelist, NULL);
640 if (!list_empty(&pagelist))
641 count = putback_lru_pages(&pagelist);
642 }
643 up_read(&mm->mmap_sem);
644 return count;
645}
646
647/*
618 * User space interface with variable sized bitmaps for nodelists. 648 * User space interface with variable sized bitmaps for nodelists.
619 */ 649 */
620 650
621/* Copy a node mask from user space. */ 651/* Copy a node mask from user space. */
622static int get_nodes(nodemask_t *nodes, unsigned long __user *nmask, 652static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
623 unsigned long maxnode) 653 unsigned long maxnode)
624{ 654{
625 unsigned long k; 655 unsigned long k;
@@ -708,6 +738,68 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
708 return do_set_mempolicy(mode, &nodes); 738 return do_set_mempolicy(mode, &nodes);
709} 739}
710 740
741/* Macro needed until Paul implements this function in kernel/cpusets.c */
742#define cpuset_mems_allowed(task) node_online_map
743
744asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
745 const unsigned long __user *old_nodes,
746 const unsigned long __user *new_nodes)
747{
748 struct mm_struct *mm;
749 struct task_struct *task;
750 nodemask_t old;
751 nodemask_t new;
752 nodemask_t task_nodes;
753 int err;
754
755 err = get_nodes(&old, old_nodes, maxnode);
756 if (err)
757 return err;
758
759 err = get_nodes(&new, new_nodes, maxnode);
760 if (err)
761 return err;
762
763 /* Find the mm_struct */
764 read_lock(&tasklist_lock);
765 task = pid ? find_task_by_pid(pid) : current;
766 if (!task) {
767 read_unlock(&tasklist_lock);
768 return -ESRCH;
769 }
770 mm = get_task_mm(task);
771 read_unlock(&tasklist_lock);
772
773 if (!mm)
774 return -EINVAL;
775
776 /*
777 * Check if this process has the right to modify the specified
778 * process. The right exists if the process has administrative
779 * capabilities, superuser priviledges or the same
780 * userid as the target process.
781 */
782 if ((current->euid != task->suid) && (current->euid != task->uid) &&
783 (current->uid != task->suid) && (current->uid != task->uid) &&
784 !capable(CAP_SYS_ADMIN)) {
785 err = -EPERM;
786 goto out;
787 }
788
789 task_nodes = cpuset_mems_allowed(task);
790 /* Is the user allowed to access the target nodes? */
791 if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_ADMIN)) {
792 err = -EPERM;
793 goto out;
794 }
795
796 err = do_migrate_pages(mm, &old, &new, MPOL_MF_MOVE);
797out:
798 mmput(mm);
799 return err;
800}
801
802
711/* Retrieve NUMA policy */ 803/* Retrieve NUMA policy */
712asmlinkage long sys_get_mempolicy(int __user *policy, 804asmlinkage long sys_get_mempolicy(int __user *policy,
713 unsigned long __user *nmask, 805 unsigned long __user *nmask,