aboutsummaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c139
1 files changed, 62 insertions, 77 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 21631ab8c08b..2bb4e1d63520 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -121,20 +121,6 @@ static void remove_migration_pte(struct vm_area_struct *vma,
121 if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) 121 if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old)
122 goto out; 122 goto out;
123 123
124 /*
125 * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge.
126 * Failure is not an option here: we're now expected to remove every
127 * migration pte, and will cause crashes otherwise. Normally this
128 * is not an issue: mem_cgroup_prepare_migration bumped up the old
129 * page_cgroup count for safety, that's now attached to the new page,
130 * so this charge should just be another incrementation of the count,
131 * to keep in balance with rmap.c's mem_cgroup_uncharging. But if
132 * there's been a force_empty, those reference counts may no longer
133 * be reliable, and this charge can actually fail: oh well, we don't
134 * make the situation any worse by proceeding as if it had succeeded.
135 */
136 mem_cgroup_charge(new, mm, GFP_ATOMIC);
137
138 get_page(new); 124 get_page(new);
139 pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); 125 pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
140 if (is_write_migration_entry(entry)) 126 if (is_write_migration_entry(entry))
@@ -300,12 +286,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
300 * Now we know that no one else is looking at the page. 286 * Now we know that no one else is looking at the page.
301 */ 287 */
302 get_page(newpage); /* add cache reference */ 288 get_page(newpage); /* add cache reference */
303#ifdef CONFIG_SWAP
304 if (PageSwapCache(page)) { 289 if (PageSwapCache(page)) {
305 SetPageSwapCache(newpage); 290 SetPageSwapCache(newpage);
306 set_page_private(newpage, page_private(page)); 291 set_page_private(newpage, page_private(page));
307 } 292 }
308#endif
309 293
310 radix_tree_replace_slot(pslot, newpage); 294 radix_tree_replace_slot(pslot, newpage);
311 295
@@ -373,18 +357,13 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
373 357
374 mlock_migrate_page(newpage, page); 358 mlock_migrate_page(newpage, page);
375 359
376#ifdef CONFIG_SWAP
377 ClearPageSwapCache(page); 360 ClearPageSwapCache(page);
378#endif
379 ClearPagePrivate(page); 361 ClearPagePrivate(page);
380 set_page_private(page, 0); 362 set_page_private(page, 0);
381 /* page->mapping contains a flag for PageAnon() */ 363 /* page->mapping contains a flag for PageAnon() */
382 anon = PageAnon(page); 364 anon = PageAnon(page);
383 page->mapping = NULL; 365 page->mapping = NULL;
384 366
385 if (!anon) /* This page was removed from radix-tree. */
386 mem_cgroup_uncharge_cache_page(page);
387
388 /* 367 /*
389 * If any waiters have accumulated on the new page then 368 * If any waiters have accumulated on the new page then
390 * wake them up. 369 * wake them up.
@@ -618,6 +597,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
618 struct page *newpage = get_new_page(page, private, &result); 597 struct page *newpage = get_new_page(page, private, &result);
619 int rcu_locked = 0; 598 int rcu_locked = 0;
620 int charge = 0; 599 int charge = 0;
600 struct mem_cgroup *mem;
621 601
622 if (!newpage) 602 if (!newpage)
623 return -ENOMEM; 603 return -ENOMEM;
@@ -627,24 +607,26 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
627 goto move_newpage; 607 goto move_newpage;
628 } 608 }
629 609
630 charge = mem_cgroup_prepare_migration(page, newpage);
631 if (charge == -ENOMEM) {
632 rc = -ENOMEM;
633 goto move_newpage;
634 }
635 /* prepare cgroup just returns 0 or -ENOMEM */ 610 /* prepare cgroup just returns 0 or -ENOMEM */
636 BUG_ON(charge);
637
638 rc = -EAGAIN; 611 rc = -EAGAIN;
612
639 if (!trylock_page(page)) { 613 if (!trylock_page(page)) {
640 if (!force) 614 if (!force)
641 goto move_newpage; 615 goto move_newpage;
642 lock_page(page); 616 lock_page(page);
643 } 617 }
644 618
619 /* charge against new page */
620 charge = mem_cgroup_prepare_migration(page, &mem);
621 if (charge == -ENOMEM) {
622 rc = -ENOMEM;
623 goto unlock;
624 }
625 BUG_ON(charge);
626
645 if (PageWriteback(page)) { 627 if (PageWriteback(page)) {
646 if (!force) 628 if (!force)
647 goto unlock; 629 goto uncharge;
648 wait_on_page_writeback(page); 630 wait_on_page_writeback(page);
649 } 631 }
650 /* 632 /*
@@ -697,7 +679,9 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
697rcu_unlock: 679rcu_unlock:
698 if (rcu_locked) 680 if (rcu_locked)
699 rcu_read_unlock(); 681 rcu_read_unlock();
700 682uncharge:
683 if (!charge)
684 mem_cgroup_end_migration(mem, page, newpage);
701unlock: 685unlock:
702 unlock_page(page); 686 unlock_page(page);
703 687
@@ -713,8 +697,6 @@ unlock:
713 } 697 }
714 698
715move_newpage: 699move_newpage:
716 if (!charge)
717 mem_cgroup_end_migration(newpage);
718 700
719 /* 701 /*
720 * Move the new page to the LRU. If migration was not successful 702 * Move the new page to the LRU. If migration was not successful
@@ -848,12 +830,6 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
848 struct vm_area_struct *vma; 830 struct vm_area_struct *vma;
849 struct page *page; 831 struct page *page;
850 832
851 /*
852 * A valid page pointer that will not match any of the
853 * pages that will be moved.
854 */
855 pp->page = ZERO_PAGE(0);
856
857 err = -EFAULT; 833 err = -EFAULT;
858 vma = find_vma(mm, pp->addr); 834 vma = find_vma(mm, pp->addr);
859 if (!vma || !vma_migratable(vma)) 835 if (!vma || !vma_migratable(vma))
@@ -919,41 +895,43 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
919 const int __user *nodes, 895 const int __user *nodes,
920 int __user *status, int flags) 896 int __user *status, int flags)
921{ 897{
922 struct page_to_node *pm = NULL; 898 struct page_to_node *pm;
923 nodemask_t task_nodes; 899 nodemask_t task_nodes;
924 int err = 0; 900 unsigned long chunk_nr_pages;
925 int i; 901 unsigned long chunk_start;
902 int err;
926 903
927 task_nodes = cpuset_mems_allowed(task); 904 task_nodes = cpuset_mems_allowed(task);
928 905
929 /* Limit nr_pages so that the multiplication may not overflow */ 906 err = -ENOMEM;
930 if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) { 907 pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
931 err = -E2BIG; 908 if (!pm)
932 goto out;
933 }
934
935 pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
936 if (!pm) {
937 err = -ENOMEM;
938 goto out; 909 goto out;
939 }
940
941 /* 910 /*
942 * Get parameters from user space and initialize the pm 911 * Store a chunk of page_to_node array in a page,
943 * array. Return various errors if the user did something wrong. 912 * but keep the last one as a marker
944 */ 913 */
945 for (i = 0; i < nr_pages; i++) { 914 chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
946 const void __user *p;
947 915
948 err = -EFAULT; 916 for (chunk_start = 0;
949 if (get_user(p, pages + i)) 917 chunk_start < nr_pages;
950 goto out_pm; 918 chunk_start += chunk_nr_pages) {
919 int j;
920
921 if (chunk_start + chunk_nr_pages > nr_pages)
922 chunk_nr_pages = nr_pages - chunk_start;
951 923
952 pm[i].addr = (unsigned long)p; 924 /* fill the chunk pm with addrs and nodes from user-space */
953 if (nodes) { 925 for (j = 0; j < chunk_nr_pages; j++) {
926 const void __user *p;
954 int node; 927 int node;
955 928
956 if (get_user(node, nodes + i)) 929 err = -EFAULT;
930 if (get_user(p, pages + j + chunk_start))
931 goto out_pm;
932 pm[j].addr = (unsigned long) p;
933
934 if (get_user(node, nodes + j + chunk_start))
957 goto out_pm; 935 goto out_pm;
958 936
959 err = -ENODEV; 937 err = -ENODEV;
@@ -964,22 +942,29 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
964 if (!node_isset(node, task_nodes)) 942 if (!node_isset(node, task_nodes))
965 goto out_pm; 943 goto out_pm;
966 944
967 pm[i].node = node; 945 pm[j].node = node;
968 } else 946 }
969 pm[i].node = 0; /* anything to not match MAX_NUMNODES */ 947
970 } 948 /* End marker for this chunk */
971 /* End marker */ 949 pm[chunk_nr_pages].node = MAX_NUMNODES;
972 pm[nr_pages].node = MAX_NUMNODES; 950
951 /* Migrate this chunk */
952 err = do_move_page_to_node_array(mm, pm,
953 flags & MPOL_MF_MOVE_ALL);
954 if (err < 0)
955 goto out_pm;
973 956
974 err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL);
975 if (err >= 0)
976 /* Return status information */ 957 /* Return status information */
977 for (i = 0; i < nr_pages; i++) 958 for (j = 0; j < chunk_nr_pages; j++)
978 if (put_user(pm[i].status, status + i)) 959 if (put_user(pm[j].status, status + j + chunk_start)) {
979 err = -EFAULT; 960 err = -EFAULT;
961 goto out_pm;
962 }
963 }
964 err = 0;
980 965
981out_pm: 966out_pm:
982 vfree(pm); 967 free_page((unsigned long)pm);
983out: 968out:
984 return err; 969 return err;
985} 970}
@@ -1070,10 +1055,10 @@ out:
1070 * Move a list of pages in the address space of the currently executing 1055 * Move a list of pages in the address space of the currently executing
1071 * process. 1056 * process.
1072 */ 1057 */
1073asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, 1058SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1074 const void __user * __user *pages, 1059 const void __user * __user *, pages,
1075 const int __user *nodes, 1060 const int __user *, nodes,
1076 int __user *status, int flags) 1061 int __user *, status, int, flags)
1077{ 1062{
1078 const struct cred *cred = current_cred(), *tcred; 1063 const struct cred *cred = current_cred(), *tcred;
1079 struct task_struct *task; 1064 struct task_struct *task;