diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 139 |
1 files changed, 62 insertions, 77 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 21631ab8c08b..2bb4e1d63520 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -121,20 +121,6 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
121 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) | 121 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) |
122 | goto out; | 122 | goto out; |
123 | 123 | ||
124 | /* | ||
125 | * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge. | ||
126 | * Failure is not an option here: we're now expected to remove every | ||
127 | * migration pte, and will cause crashes otherwise. Normally this | ||
128 | * is not an issue: mem_cgroup_prepare_migration bumped up the old | ||
129 | * page_cgroup count for safety, that's now attached to the new page, | ||
130 | * so this charge should just be another incrementation of the count, | ||
131 | * to keep in balance with rmap.c's mem_cgroup_uncharging. But if | ||
132 | * there's been a force_empty, those reference counts may no longer | ||
133 | * be reliable, and this charge can actually fail: oh well, we don't | ||
134 | * make the situation any worse by proceeding as if it had succeeded. | ||
135 | */ | ||
136 | mem_cgroup_charge(new, mm, GFP_ATOMIC); | ||
137 | |||
138 | get_page(new); | 124 | get_page(new); |
139 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 125 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
140 | if (is_write_migration_entry(entry)) | 126 | if (is_write_migration_entry(entry)) |
@@ -300,12 +286,10 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
300 | * Now we know that no one else is looking at the page. | 286 | * Now we know that no one else is looking at the page. |
301 | */ | 287 | */ |
302 | get_page(newpage); /* add cache reference */ | 288 | get_page(newpage); /* add cache reference */ |
303 | #ifdef CONFIG_SWAP | ||
304 | if (PageSwapCache(page)) { | 289 | if (PageSwapCache(page)) { |
305 | SetPageSwapCache(newpage); | 290 | SetPageSwapCache(newpage); |
306 | set_page_private(newpage, page_private(page)); | 291 | set_page_private(newpage, page_private(page)); |
307 | } | 292 | } |
308 | #endif | ||
309 | 293 | ||
310 | radix_tree_replace_slot(pslot, newpage); | 294 | radix_tree_replace_slot(pslot, newpage); |
311 | 295 | ||
@@ -373,18 +357,13 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
373 | 357 | ||
374 | mlock_migrate_page(newpage, page); | 358 | mlock_migrate_page(newpage, page); |
375 | 359 | ||
376 | #ifdef CONFIG_SWAP | ||
377 | ClearPageSwapCache(page); | 360 | ClearPageSwapCache(page); |
378 | #endif | ||
379 | ClearPagePrivate(page); | 361 | ClearPagePrivate(page); |
380 | set_page_private(page, 0); | 362 | set_page_private(page, 0); |
381 | /* page->mapping contains a flag for PageAnon() */ | 363 | /* page->mapping contains a flag for PageAnon() */ |
382 | anon = PageAnon(page); | 364 | anon = PageAnon(page); |
383 | page->mapping = NULL; | 365 | page->mapping = NULL; |
384 | 366 | ||
385 | if (!anon) /* This page was removed from radix-tree. */ | ||
386 | mem_cgroup_uncharge_cache_page(page); | ||
387 | |||
388 | /* | 367 | /* |
389 | * If any waiters have accumulated on the new page then | 368 | * If any waiters have accumulated on the new page then |
390 | * wake them up. | 369 | * wake them up. |
@@ -618,6 +597,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
618 | struct page *newpage = get_new_page(page, private, &result); | 597 | struct page *newpage = get_new_page(page, private, &result); |
619 | int rcu_locked = 0; | 598 | int rcu_locked = 0; |
620 | int charge = 0; | 599 | int charge = 0; |
600 | struct mem_cgroup *mem; | ||
621 | 601 | ||
622 | if (!newpage) | 602 | if (!newpage) |
623 | return -ENOMEM; | 603 | return -ENOMEM; |
@@ -627,24 +607,26 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
627 | goto move_newpage; | 607 | goto move_newpage; |
628 | } | 608 | } |
629 | 609 | ||
630 | charge = mem_cgroup_prepare_migration(page, newpage); | ||
631 | if (charge == -ENOMEM) { | ||
632 | rc = -ENOMEM; | ||
633 | goto move_newpage; | ||
634 | } | ||
635 | /* prepare cgroup just returns 0 or -ENOMEM */ | 610 | /* prepare cgroup just returns 0 or -ENOMEM */ |
636 | BUG_ON(charge); | ||
637 | |||
638 | rc = -EAGAIN; | 611 | rc = -EAGAIN; |
612 | |||
639 | if (!trylock_page(page)) { | 613 | if (!trylock_page(page)) { |
640 | if (!force) | 614 | if (!force) |
641 | goto move_newpage; | 615 | goto move_newpage; |
642 | lock_page(page); | 616 | lock_page(page); |
643 | } | 617 | } |
644 | 618 | ||
619 | /* charge against new page */ | ||
620 | charge = mem_cgroup_prepare_migration(page, &mem); | ||
621 | if (charge == -ENOMEM) { | ||
622 | rc = -ENOMEM; | ||
623 | goto unlock; | ||
624 | } | ||
625 | BUG_ON(charge); | ||
626 | |||
645 | if (PageWriteback(page)) { | 627 | if (PageWriteback(page)) { |
646 | if (!force) | 628 | if (!force) |
647 | goto unlock; | 629 | goto uncharge; |
648 | wait_on_page_writeback(page); | 630 | wait_on_page_writeback(page); |
649 | } | 631 | } |
650 | /* | 632 | /* |
@@ -697,7 +679,9 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
697 | rcu_unlock: | 679 | rcu_unlock: |
698 | if (rcu_locked) | 680 | if (rcu_locked) |
699 | rcu_read_unlock(); | 681 | rcu_read_unlock(); |
700 | 682 | uncharge: | |
683 | if (!charge) | ||
684 | mem_cgroup_end_migration(mem, page, newpage); | ||
701 | unlock: | 685 | unlock: |
702 | unlock_page(page); | 686 | unlock_page(page); |
703 | 687 | ||
@@ -713,8 +697,6 @@ unlock: | |||
713 | } | 697 | } |
714 | 698 | ||
715 | move_newpage: | 699 | move_newpage: |
716 | if (!charge) | ||
717 | mem_cgroup_end_migration(newpage); | ||
718 | 700 | ||
719 | /* | 701 | /* |
720 | * Move the new page to the LRU. If migration was not successful | 702 | * Move the new page to the LRU. If migration was not successful |
@@ -848,12 +830,6 @@ static int do_move_page_to_node_array(struct mm_struct *mm, | |||
848 | struct vm_area_struct *vma; | 830 | struct vm_area_struct *vma; |
849 | struct page *page; | 831 | struct page *page; |
850 | 832 | ||
851 | /* | ||
852 | * A valid page pointer that will not match any of the | ||
853 | * pages that will be moved. | ||
854 | */ | ||
855 | pp->page = ZERO_PAGE(0); | ||
856 | |||
857 | err = -EFAULT; | 833 | err = -EFAULT; |
858 | vma = find_vma(mm, pp->addr); | 834 | vma = find_vma(mm, pp->addr); |
859 | if (!vma || !vma_migratable(vma)) | 835 | if (!vma || !vma_migratable(vma)) |
@@ -919,41 +895,43 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task, | |||
919 | const int __user *nodes, | 895 | const int __user *nodes, |
920 | int __user *status, int flags) | 896 | int __user *status, int flags) |
921 | { | 897 | { |
922 | struct page_to_node *pm = NULL; | 898 | struct page_to_node *pm; |
923 | nodemask_t task_nodes; | 899 | nodemask_t task_nodes; |
924 | int err = 0; | 900 | unsigned long chunk_nr_pages; |
925 | int i; | 901 | unsigned long chunk_start; |
902 | int err; | ||
926 | 903 | ||
927 | task_nodes = cpuset_mems_allowed(task); | 904 | task_nodes = cpuset_mems_allowed(task); |
928 | 905 | ||
929 | /* Limit nr_pages so that the multiplication may not overflow */ | 906 | err = -ENOMEM; |
930 | if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) { | 907 | pm = (struct page_to_node *)__get_free_page(GFP_KERNEL); |
931 | err = -E2BIG; | 908 | if (!pm) |
932 | goto out; | ||
933 | } | ||
934 | |||
935 | pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node)); | ||
936 | if (!pm) { | ||
937 | err = -ENOMEM; | ||
938 | goto out; | 909 | goto out; |
939 | } | ||
940 | |||
941 | /* | 910 | /* |
942 | * Get parameters from user space and initialize the pm | 911 | * Store a chunk of page_to_node array in a page, |
943 | * array. Return various errors if the user did something wrong. | 912 | * but keep the last one as a marker |
944 | */ | 913 | */ |
945 | for (i = 0; i < nr_pages; i++) { | 914 | chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1; |
946 | const void __user *p; | ||
947 | 915 | ||
948 | err = -EFAULT; | 916 | for (chunk_start = 0; |
949 | if (get_user(p, pages + i)) | 917 | chunk_start < nr_pages; |
950 | goto out_pm; | 918 | chunk_start += chunk_nr_pages) { |
919 | int j; | ||
920 | |||
921 | if (chunk_start + chunk_nr_pages > nr_pages) | ||
922 | chunk_nr_pages = nr_pages - chunk_start; | ||
951 | 923 | ||
952 | pm[i].addr = (unsigned long)p; | 924 | /* fill the chunk pm with addrs and nodes from user-space */ |
953 | if (nodes) { | 925 | for (j = 0; j < chunk_nr_pages; j++) { |
926 | const void __user *p; | ||
954 | int node; | 927 | int node; |
955 | 928 | ||
956 | if (get_user(node, nodes + i)) | 929 | err = -EFAULT; |
930 | if (get_user(p, pages + j + chunk_start)) | ||
931 | goto out_pm; | ||
932 | pm[j].addr = (unsigned long) p; | ||
933 | |||
934 | if (get_user(node, nodes + j + chunk_start)) | ||
957 | goto out_pm; | 935 | goto out_pm; |
958 | 936 | ||
959 | err = -ENODEV; | 937 | err = -ENODEV; |
@@ -964,22 +942,29 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task, | |||
964 | if (!node_isset(node, task_nodes)) | 942 | if (!node_isset(node, task_nodes)) |
965 | goto out_pm; | 943 | goto out_pm; |
966 | 944 | ||
967 | pm[i].node = node; | 945 | pm[j].node = node; |
968 | } else | 946 | } |
969 | pm[i].node = 0; /* anything to not match MAX_NUMNODES */ | 947 | |
970 | } | 948 | /* End marker for this chunk */ |
971 | /* End marker */ | 949 | pm[chunk_nr_pages].node = MAX_NUMNODES; |
972 | pm[nr_pages].node = MAX_NUMNODES; | 950 | |
951 | /* Migrate this chunk */ | ||
952 | err = do_move_page_to_node_array(mm, pm, | ||
953 | flags & MPOL_MF_MOVE_ALL); | ||
954 | if (err < 0) | ||
955 | goto out_pm; | ||
973 | 956 | ||
974 | err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL); | ||
975 | if (err >= 0) | ||
976 | /* Return status information */ | 957 | /* Return status information */ |
977 | for (i = 0; i < nr_pages; i++) | 958 | for (j = 0; j < chunk_nr_pages; j++) |
978 | if (put_user(pm[i].status, status + i)) | 959 | if (put_user(pm[j].status, status + j + chunk_start)) { |
979 | err = -EFAULT; | 960 | err = -EFAULT; |
961 | goto out_pm; | ||
962 | } | ||
963 | } | ||
964 | err = 0; | ||
980 | 965 | ||
981 | out_pm: | 966 | out_pm: |
982 | vfree(pm); | 967 | free_page((unsigned long)pm); |
983 | out: | 968 | out: |
984 | return err; | 969 | return err; |
985 | } | 970 | } |
@@ -1070,10 +1055,10 @@ out: | |||
1070 | * Move a list of pages in the address space of the currently executing | 1055 | * Move a list of pages in the address space of the currently executing |
1071 | * process. | 1056 | * process. |
1072 | */ | 1057 | */ |
1073 | asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, | 1058 | SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, |
1074 | const void __user * __user *pages, | 1059 | const void __user * __user *, pages, |
1075 | const int __user *nodes, | 1060 | const int __user *, nodes, |
1076 | int __user *status, int flags) | 1061 | int __user *, status, int, flags) |
1077 | { | 1062 | { |
1078 | const struct cred *cred = current_cred(), *tcred; | 1063 | const struct cred *cred = current_cred(), *tcred; |
1079 | struct task_struct *task; | 1064 | struct task_struct *task; |