diff options
-rw-r--r-- | arch/i386/kernel/syscall_table.S | 1 | ||||
-rw-r--r-- | arch/ia64/kernel/entry.S | 1 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32entry.S | 1 | ||||
-rw-r--r-- | include/asm-i386/unistd.h | 3 | ||||
-rw-r--r-- | include/asm-ia64/unistd.h | 3 | ||||
-rw-r--r-- | include/asm-x86_64/ia32_unistd.h | 3 | ||||
-rw-r--r-- | include/asm-x86_64/unistd.h | 4 | ||||
-rw-r--r-- | include/linux/mempolicy.h | 3 | ||||
-rw-r--r-- | include/linux/syscalls.h | 2 | ||||
-rw-r--r-- | kernel/sys_ni.c | 1 | ||||
-rw-r--r-- | mm/mempolicy.c | 94 |
11 files changed, 111 insertions, 5 deletions
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index f7ba4acc20e..6ff3e524322 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S | |||
@@ -293,3 +293,4 @@ ENTRY(sys_call_table) | |||
293 | .long sys_inotify_init | 293 | .long sys_inotify_init |
294 | .long sys_inotify_add_watch | 294 | .long sys_inotify_add_watch |
295 | .long sys_inotify_rm_watch | 295 | .long sys_inotify_rm_watch |
296 | .long sys_migrate_pages | ||
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 0741b066b98..7a6ffd61378 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S | |||
@@ -1600,5 +1600,6 @@ sys_call_table: | |||
1600 | data8 sys_inotify_init | 1600 | data8 sys_inotify_init |
1601 | data8 sys_inotify_add_watch | 1601 | data8 sys_inotify_add_watch |
1602 | data8 sys_inotify_rm_watch | 1602 | data8 sys_inotify_rm_watch |
1603 | data8 sys_migrate_pages // 1280 | ||
1603 | 1604 | ||
1604 | .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls | 1605 | .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls |
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index df0773c9bdb..1f0ff5adc80 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S | |||
@@ -643,6 +643,7 @@ ia32_sys_call_table: | |||
643 | .quad sys_inotify_init | 643 | .quad sys_inotify_init |
644 | .quad sys_inotify_add_watch | 644 | .quad sys_inotify_add_watch |
645 | .quad sys_inotify_rm_watch | 645 | .quad sys_inotify_rm_watch |
646 | .quad sys_migrate_pages | ||
646 | ia32_syscall_end: | 647 | ia32_syscall_end: |
647 | .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 | 648 | .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 |
648 | .quad ni_syscall | 649 | .quad ni_syscall |
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h index fe38b9a9623..481c3c0ea72 100644 --- a/include/asm-i386/unistd.h +++ b/include/asm-i386/unistd.h | |||
@@ -299,8 +299,9 @@ | |||
299 | #define __NR_inotify_init 291 | 299 | #define __NR_inotify_init 291 |
300 | #define __NR_inotify_add_watch 292 | 300 | #define __NR_inotify_add_watch 292 |
301 | #define __NR_inotify_rm_watch 293 | 301 | #define __NR_inotify_rm_watch 293 |
302 | #define __NR_migrate_pages 294 | ||
302 | 303 | ||
303 | #define NR_syscalls 294 | 304 | #define NR_syscalls 295 |
304 | 305 | ||
305 | /* | 306 | /* |
306 | * user-visible error numbers are in the range -1 - -128: see | 307 | * user-visible error numbers are in the range -1 - -128: see |
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h index 2bf543493cb..962f9bd1bdf 100644 --- a/include/asm-ia64/unistd.h +++ b/include/asm-ia64/unistd.h | |||
@@ -269,12 +269,13 @@ | |||
269 | #define __NR_inotify_init 1277 | 269 | #define __NR_inotify_init 1277 |
270 | #define __NR_inotify_add_watch 1278 | 270 | #define __NR_inotify_add_watch 1278 |
271 | #define __NR_inotify_rm_watch 1279 | 271 | #define __NR_inotify_rm_watch 1279 |
272 | #define __NR_migrate_pages 1280 | ||
272 | 273 | ||
273 | #ifdef __KERNEL__ | 274 | #ifdef __KERNEL__ |
274 | 275 | ||
275 | #include <linux/config.h> | 276 | #include <linux/config.h> |
276 | 277 | ||
277 | #define NR_syscalls 256 /* length of syscall table */ | 278 | #define NR_syscalls 270 /* length of syscall table */ |
278 | 279 | ||
279 | #define __ARCH_WANT_SYS_RT_SIGACTION | 280 | #define __ARCH_WANT_SYS_RT_SIGACTION |
280 | 281 | ||
diff --git a/include/asm-x86_64/ia32_unistd.h b/include/asm-x86_64/ia32_unistd.h index d5166ec3868..e8843362a6c 100644 --- a/include/asm-x86_64/ia32_unistd.h +++ b/include/asm-x86_64/ia32_unistd.h | |||
@@ -299,7 +299,8 @@ | |||
299 | #define __NR_ia32_inotify_init 291 | 299 | #define __NR_ia32_inotify_init 291 |
300 | #define __NR_ia32_inotify_add_watch 292 | 300 | #define __NR_ia32_inotify_add_watch 292 |
301 | #define __NR_ia32_inotify_rm_watch 293 | 301 | #define __NR_ia32_inotify_rm_watch 293 |
302 | #define __NR_ia32_migrate_pages 294 | ||
302 | 303 | ||
303 | #define IA32_NR_syscalls 294 /* must be > than biggest syscall! */ | 304 | #define IA32_NR_syscalls 295 /* must be > than biggest syscall! */ |
304 | 305 | ||
305 | #endif /* _ASM_X86_64_IA32_UNISTD_H_ */ | 306 | #endif /* _ASM_X86_64_IA32_UNISTD_H_ */ |
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index 2c42150bce0..e6f896161c1 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h | |||
@@ -571,8 +571,10 @@ __SYSCALL(__NR_inotify_init, sys_inotify_init) | |||
571 | __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) | 571 | __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) |
572 | #define __NR_inotify_rm_watch 255 | 572 | #define __NR_inotify_rm_watch 255 |
573 | __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) | 573 | __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) |
574 | #define __NR_migrate_pages 256 | ||
575 | __SYSCALL(__NR_migrate_pages, sys_migrate_pages) | ||
574 | 576 | ||
575 | #define __NR_syscall_max __NR_inotify_rm_watch | 577 | #define __NR_syscall_max __NR_migrate_pages |
576 | #ifndef __NO_STUBS | 578 | #ifndef __NO_STUBS |
577 | 579 | ||
578 | /* user-visible error numbers are in the range -1 - -4095 */ | 580 | /* user-visible error numbers are in the range -1 - -4095 */ |
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 05443a766cb..3e61e829681 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -162,6 +162,9 @@ static inline void check_highest_zone(int k) | |||
162 | policy_zone = k; | 162 | policy_zone = k; |
163 | } | 163 | } |
164 | 164 | ||
165 | int do_migrate_pages(struct mm_struct *mm, | ||
166 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags); | ||
167 | |||
165 | #else | 168 | #else |
166 | 169 | ||
167 | struct mempolicy {}; | 170 | struct mempolicy {}; |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c7007b1db91..e910d1a481d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -511,5 +511,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio); | |||
511 | asmlinkage long sys_ioprio_get(int which, int who); | 511 | asmlinkage long sys_ioprio_get(int which, int who); |
512 | asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, | 512 | asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, |
513 | unsigned long maxnode); | 513 | unsigned long maxnode); |
514 | asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, | ||
515 | const unsigned long __user *from, const unsigned long __user *to); | ||
514 | 516 | ||
515 | #endif | 517 | #endif |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 1ab2370e2ef..7a8bc7f60d9 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -82,6 +82,7 @@ cond_syscall(compat_sys_socketcall); | |||
82 | cond_syscall(sys_inotify_init); | 82 | cond_syscall(sys_inotify_init); |
83 | cond_syscall(sys_inotify_add_watch); | 83 | cond_syscall(sys_inotify_add_watch); |
84 | cond_syscall(sys_inotify_rm_watch); | 84 | cond_syscall(sys_inotify_rm_watch); |
85 | cond_syscall(sys_migrate_pages); | ||
85 | 86 | ||
86 | /* arch-specific weak syscall entries */ | 87 | /* arch-specific weak syscall entries */ |
87 | cond_syscall(sys_pciconfig_read); | 88 | cond_syscall(sys_pciconfig_read); |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9cc6d962831..20d5ad39fa4 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -615,11 +615,41 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask, | |||
615 | } | 615 | } |
616 | 616 | ||
617 | /* | 617 | /* |
618 | * For now migrate_pages simply swaps out the pages from nodes that are in | ||
619 | * the source set but not in the target set. In the future, we would | ||
620 | * want a function that moves pages between the two nodesets in such | ||
621 | * a way as to preserve the physical layout as much as possible. | ||
622 | * | ||
623 | * Returns the number of page that could not be moved. | ||
624 | */ | ||
625 | int do_migrate_pages(struct mm_struct *mm, | ||
626 | const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags) | ||
627 | { | ||
628 | LIST_HEAD(pagelist); | ||
629 | int count = 0; | ||
630 | nodemask_t nodes; | ||
631 | |||
632 | nodes_andnot(nodes, *from_nodes, *to_nodes); | ||
633 | nodes_complement(nodes, nodes); | ||
634 | |||
635 | down_read(&mm->mmap_sem); | ||
636 | check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes, | ||
637 | flags | MPOL_MF_DISCONTIG_OK, &pagelist); | ||
638 | if (!list_empty(&pagelist)) { | ||
639 | migrate_pages(&pagelist, NULL); | ||
640 | if (!list_empty(&pagelist)) | ||
641 | count = putback_lru_pages(&pagelist); | ||
642 | } | ||
643 | up_read(&mm->mmap_sem); | ||
644 | return count; | ||
645 | } | ||
646 | |||
647 | /* | ||
618 | * User space interface with variable sized bitmaps for nodelists. | 648 | * User space interface with variable sized bitmaps for nodelists. |
619 | */ | 649 | */ |
620 | 650 | ||
621 | /* Copy a node mask from user space. */ | 651 | /* Copy a node mask from user space. */ |
622 | static int get_nodes(nodemask_t *nodes, unsigned long __user *nmask, | 652 | static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, |
623 | unsigned long maxnode) | 653 | unsigned long maxnode) |
624 | { | 654 | { |
625 | unsigned long k; | 655 | unsigned long k; |
@@ -708,6 +738,68 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, | |||
708 | return do_set_mempolicy(mode, &nodes); | 738 | return do_set_mempolicy(mode, &nodes); |
709 | } | 739 | } |
710 | 740 | ||
741 | /* Macro needed until Paul implements this function in kernel/cpusets.c */ | ||
742 | #define cpuset_mems_allowed(task) node_online_map | ||
743 | |||
744 | asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, | ||
745 | const unsigned long __user *old_nodes, | ||
746 | const unsigned long __user *new_nodes) | ||
747 | { | ||
748 | struct mm_struct *mm; | ||
749 | struct task_struct *task; | ||
750 | nodemask_t old; | ||
751 | nodemask_t new; | ||
752 | nodemask_t task_nodes; | ||
753 | int err; | ||
754 | |||
755 | err = get_nodes(&old, old_nodes, maxnode); | ||
756 | if (err) | ||
757 | return err; | ||
758 | |||
759 | err = get_nodes(&new, new_nodes, maxnode); | ||
760 | if (err) | ||
761 | return err; | ||
762 | |||
763 | /* Find the mm_struct */ | ||
764 | read_lock(&tasklist_lock); | ||
765 | task = pid ? find_task_by_pid(pid) : current; | ||
766 | if (!task) { | ||
767 | read_unlock(&tasklist_lock); | ||
768 | return -ESRCH; | ||
769 | } | ||
770 | mm = get_task_mm(task); | ||
771 | read_unlock(&tasklist_lock); | ||
772 | |||
773 | if (!mm) | ||
774 | return -EINVAL; | ||
775 | |||
776 | /* | ||
777 | * Check if this process has the right to modify the specified | ||
778 | * process. The right exists if the process has administrative | ||
779 | * capabilities, superuser priviledges or the same | ||
780 | * userid as the target process. | ||
781 | */ | ||
782 | if ((current->euid != task->suid) && (current->euid != task->uid) && | ||
783 | (current->uid != task->suid) && (current->uid != task->uid) && | ||
784 | !capable(CAP_SYS_ADMIN)) { | ||
785 | err = -EPERM; | ||
786 | goto out; | ||
787 | } | ||
788 | |||
789 | task_nodes = cpuset_mems_allowed(task); | ||
790 | /* Is the user allowed to access the target nodes? */ | ||
791 | if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_ADMIN)) { | ||
792 | err = -EPERM; | ||
793 | goto out; | ||
794 | } | ||
795 | |||
796 | err = do_migrate_pages(mm, &old, &new, MPOL_MF_MOVE); | ||
797 | out: | ||
798 | mmput(mm); | ||
799 | return err; | ||
800 | } | ||
801 | |||
802 | |||
711 | /* Retrieve NUMA policy */ | 803 | /* Retrieve NUMA policy */ |
712 | asmlinkage long sys_get_mempolicy(int __user *policy, | 804 | asmlinkage long sys_get_mempolicy(int __user *policy, |
713 | unsigned long __user *nmask, | 805 | unsigned long __user *nmask, |