diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 268 |
1 files changed, 264 insertions, 4 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 251a8d158257..033a12f4c949 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -25,6 +25,8 @@ | |||
25 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
26 | #include <linux/cpuset.h> | 26 | #include <linux/cpuset.h> |
27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/mempolicy.h> | ||
29 | #include <linux/vmalloc.h> | ||
28 | 30 | ||
29 | #include "internal.h" | 31 | #include "internal.h" |
30 | 32 | ||
@@ -62,9 +64,8 @@ int isolate_lru_page(struct page *page, struct list_head *pagelist) | |||
62 | } | 64 | } |
63 | 65 | ||
64 | /* | 66 | /* |
65 | * migrate_prep() needs to be called after we have compiled the list of pages | 67 | * migrate_prep() needs to be called before we start compiling a list of pages |
66 | * to be migrated using isolate_lru_page() but before we begin a series of calls | 68 | * to be migrated using isolate_lru_page(). |
67 | * to migrate_pages(). | ||
68 | */ | 69 | */ |
69 | int migrate_prep(void) | 70 | int migrate_prep(void) |
70 | { | 71 | { |
@@ -588,7 +589,8 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
588 | struct page *page, int force) | 589 | struct page *page, int force) |
589 | { | 590 | { |
590 | int rc = 0; | 591 | int rc = 0; |
591 | struct page *newpage = get_new_page(page, private); | 592 | int *result = NULL; |
593 | struct page *newpage = get_new_page(page, private, &result); | ||
592 | 594 | ||
593 | if (!newpage) | 595 | if (!newpage) |
594 | return -ENOMEM; | 596 | return -ENOMEM; |
@@ -642,6 +644,12 @@ move_newpage: | |||
642 | * then this will free the page. | 644 | * then this will free the page. |
643 | */ | 645 | */ |
644 | move_to_lru(newpage); | 646 | move_to_lru(newpage); |
647 | if (result) { | ||
648 | if (rc) | ||
649 | *result = rc; | ||
650 | else | ||
651 | *result = page_to_nid(newpage); | ||
652 | } | ||
645 | return rc; | 653 | return rc; |
646 | } | 654 | } |
647 | 655 | ||
@@ -710,3 +718,255 @@ out: | |||
710 | return nr_failed + retry; | 718 | return nr_failed + retry; |
711 | } | 719 | } |
712 | 720 | ||
721 | #ifdef CONFIG_NUMA | ||
722 | /* | ||
723 | * Move a list of individual pages | ||
724 | */ | ||
725 | struct page_to_node { | ||
726 | unsigned long addr; | ||
727 | struct page *page; | ||
728 | int node; | ||
729 | int status; | ||
730 | }; | ||
731 | |||
732 | static struct page *new_page_node(struct page *p, unsigned long private, | ||
733 | int **result) | ||
734 | { | ||
735 | struct page_to_node *pm = (struct page_to_node *)private; | ||
736 | |||
737 | while (pm->node != MAX_NUMNODES && pm->page != p) | ||
738 | pm++; | ||
739 | |||
740 | if (pm->node == MAX_NUMNODES) | ||
741 | return NULL; | ||
742 | |||
743 | *result = &pm->status; | ||
744 | |||
745 | return alloc_pages_node(pm->node, GFP_HIGHUSER, 0); | ||
746 | } | ||
747 | |||
748 | /* | ||
749 | * Move a set of pages as indicated in the pm array. The addr | ||
750 | * field must be set to the virtual address of the page to be moved | ||
751 | * and the node number must contain a valid target node. | ||
752 | */ | ||
753 | static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, | ||
754 | int migrate_all) | ||
755 | { | ||
756 | int err; | ||
757 | struct page_to_node *pp; | ||
758 | LIST_HEAD(pagelist); | ||
759 | |||
760 | down_read(&mm->mmap_sem); | ||
761 | |||
762 | /* | ||
763 | * Build a list of pages to migrate | ||
764 | */ | ||
765 | migrate_prep(); | ||
766 | for (pp = pm; pp->node != MAX_NUMNODES; pp++) { | ||
767 | struct vm_area_struct *vma; | ||
768 | struct page *page; | ||
769 | |||
770 | /* | ||
771 | * A valid page pointer that will not match any of the | ||
772 | * pages that will be moved. | ||
773 | */ | ||
774 | pp->page = ZERO_PAGE(0); | ||
775 | |||
776 | err = -EFAULT; | ||
777 | vma = find_vma(mm, pp->addr); | ||
778 | if (!vma) | ||
779 | goto set_status; | ||
780 | |||
781 | page = follow_page(vma, pp->addr, FOLL_GET); | ||
782 | err = -ENOENT; | ||
783 | if (!page) | ||
784 | goto set_status; | ||
785 | |||
786 | if (PageReserved(page)) /* Check for zero page */ | ||
787 | goto put_and_set; | ||
788 | |||
789 | pp->page = page; | ||
790 | err = page_to_nid(page); | ||
791 | |||
792 | if (err == pp->node) | ||
793 | /* | ||
794 | * Node already in the right place | ||
795 | */ | ||
796 | goto put_and_set; | ||
797 | |||
798 | err = -EACCES; | ||
799 | if (page_mapcount(page) > 1 && | ||
800 | !migrate_all) | ||
801 | goto put_and_set; | ||
802 | |||
803 | err = isolate_lru_page(page, &pagelist); | ||
804 | put_and_set: | ||
805 | /* | ||
806 | * Either remove the duplicate refcount from | ||
807 | * isolate_lru_page() or drop the page ref if it was | ||
808 | * not isolated. | ||
809 | */ | ||
810 | put_page(page); | ||
811 | set_status: | ||
812 | pp->status = err; | ||
813 | } | ||
814 | |||
815 | if (!list_empty(&pagelist)) | ||
816 | err = migrate_pages(&pagelist, new_page_node, | ||
817 | (unsigned long)pm); | ||
818 | else | ||
819 | err = -ENOENT; | ||
820 | |||
821 | up_read(&mm->mmap_sem); | ||
822 | return err; | ||
823 | } | ||
824 | |||
825 | /* | ||
826 | * Determine the nodes of a list of pages. The addr in the pm array | ||
827 | * must have been set to the virtual address of which we want to determine | ||
828 | * the node number. | ||
829 | */ | ||
830 | static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) | ||
831 | { | ||
832 | down_read(&mm->mmap_sem); | ||
833 | |||
834 | for ( ; pm->node != MAX_NUMNODES; pm++) { | ||
835 | struct vm_area_struct *vma; | ||
836 | struct page *page; | ||
837 | int err; | ||
838 | |||
839 | err = -EFAULT; | ||
840 | vma = find_vma(mm, pm->addr); | ||
841 | if (!vma) | ||
842 | goto set_status; | ||
843 | |||
844 | page = follow_page(vma, pm->addr, 0); | ||
845 | err = -ENOENT; | ||
846 | /* Use PageReserved to check for zero page */ | ||
847 | if (!page || PageReserved(page)) | ||
848 | goto set_status; | ||
849 | |||
850 | err = page_to_nid(page); | ||
851 | set_status: | ||
852 | pm->status = err; | ||
853 | } | ||
854 | |||
855 | up_read(&mm->mmap_sem); | ||
856 | return 0; | ||
857 | } | ||
858 | |||
859 | /* | ||
860 | * Move a list of pages in the address space of the currently executing | ||
861 | * process. | ||
862 | */ | ||
863 | asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, | ||
864 | const void __user * __user *pages, | ||
865 | const int __user *nodes, | ||
866 | int __user *status, int flags) | ||
867 | { | ||
868 | int err = 0; | ||
869 | int i; | ||
870 | struct task_struct *task; | ||
871 | nodemask_t task_nodes; | ||
872 | struct mm_struct *mm; | ||
873 | struct page_to_node *pm = NULL; | ||
874 | |||
875 | /* Check flags */ | ||
876 | if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) | ||
877 | return -EINVAL; | ||
878 | |||
879 | if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) | ||
880 | return -EPERM; | ||
881 | |||
882 | /* Find the mm_struct */ | ||
883 | read_lock(&tasklist_lock); | ||
884 | task = pid ? find_task_by_pid(pid) : current; | ||
885 | if (!task) { | ||
886 | read_unlock(&tasklist_lock); | ||
887 | return -ESRCH; | ||
888 | } | ||
889 | mm = get_task_mm(task); | ||
890 | read_unlock(&tasklist_lock); | ||
891 | |||
892 | if (!mm) | ||
893 | return -EINVAL; | ||
894 | |||
895 | /* | ||
896 | * Check if this process has the right to modify the specified | ||
897 | * process. The right exists if the process has administrative | ||
898 | * capabilities, superuser privileges or the same | ||
899 | * userid as the target process. | ||
900 | */ | ||
901 | if ((current->euid != task->suid) && (current->euid != task->uid) && | ||
902 | (current->uid != task->suid) && (current->uid != task->uid) && | ||
903 | !capable(CAP_SYS_NICE)) { | ||
904 | err = -EPERM; | ||
905 | goto out2; | ||
906 | } | ||
907 | |||
908 | task_nodes = cpuset_mems_allowed(task); | ||
909 | |||
910 | /* Limit nr_pages so that the multiplication may not overflow */ | ||
911 | if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) { | ||
912 | err = -E2BIG; | ||
913 | goto out2; | ||
914 | } | ||
915 | |||
916 | pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node)); | ||
917 | if (!pm) { | ||
918 | err = -ENOMEM; | ||
919 | goto out2; | ||
920 | } | ||
921 | |||
922 | /* | ||
923 | * Get parameters from user space and initialize the pm | ||
924 | * array. Return various errors if the user did something wrong. | ||
925 | */ | ||
926 | for (i = 0; i < nr_pages; i++) { | ||
927 | const void *p; | ||
928 | |||
929 | err = -EFAULT; | ||
930 | if (get_user(p, pages + i)) | ||
931 | goto out; | ||
932 | |||
933 | pm[i].addr = (unsigned long)p; | ||
934 | if (nodes) { | ||
935 | int node; | ||
936 | |||
937 | if (get_user(node, nodes + i)) | ||
938 | goto out; | ||
939 | |||
940 | err = -ENODEV; | ||
941 | if (!node_online(node)) | ||
942 | goto out; | ||
943 | |||
944 | err = -EACCES; | ||
945 | if (!node_isset(node, task_nodes)) | ||
946 | goto out; | ||
947 | |||
948 | pm[i].node = node; | ||
949 | } | ||
950 | } | ||
951 | /* End marker */ | ||
952 | pm[nr_pages].node = MAX_NUMNODES; | ||
953 | |||
954 | if (nodes) | ||
955 | err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL); | ||
956 | else | ||
957 | err = do_pages_stat(mm, pm); | ||
958 | |||
959 | if (err >= 0) | ||
960 | /* Return status information */ | ||
961 | for (i = 0; i < nr_pages; i++) | ||
962 | if (put_user(pm[i].status, status + i)) | ||
963 | err = -EFAULT; | ||
964 | |||
965 | out: | ||
966 | vfree(pm); | ||
967 | out2: | ||
968 | mmput(mm); | ||
969 | return err; | ||
970 | } | ||
971 | #endif | ||
972 | |||