aboutsummaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c274
1 files changed, 139 insertions, 135 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 2a80136b23bb..6602941bfab0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -37,36 +37,6 @@
37#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) 37#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
38 38
39/* 39/*
40 * Isolate one page from the LRU lists. If successful put it onto
41 * the indicated list with elevated page count.
42 *
43 * Result:
44 * -EBUSY: page not on LRU list
45 * 0: page removed from LRU list and added to the specified list.
46 */
47int isolate_lru_page(struct page *page, struct list_head *pagelist)
48{
49 int ret = -EBUSY;
50
51 if (PageLRU(page)) {
52 struct zone *zone = page_zone(page);
53
54 spin_lock_irq(&zone->lru_lock);
55 if (PageLRU(page) && get_page_unless_zero(page)) {
56 ret = 0;
57 ClearPageLRU(page);
58 if (PageActive(page))
59 del_page_from_active_list(zone, page);
60 else
61 del_page_from_inactive_list(zone, page);
62 list_add_tail(&page->lru, pagelist);
63 }
64 spin_unlock_irq(&zone->lru_lock);
65 }
66 return ret;
67}
68
69/*
70 * migrate_prep() needs to be called before we start compiling a list of pages 40 * migrate_prep() needs to be called before we start compiling a list of pages
71 * to be migrated using isolate_lru_page(). 41 * to be migrated using isolate_lru_page().
72 */ 42 */
@@ -83,23 +53,9 @@ int migrate_prep(void)
83 return 0; 53 return 0;
84} 54}
85 55
86static inline void move_to_lru(struct page *page)
87{
88 if (PageActive(page)) {
89 /*
90 * lru_cache_add_active checks that
91 * the PG_active bit is off.
92 */
93 ClearPageActive(page);
94 lru_cache_add_active(page);
95 } else {
96 lru_cache_add(page);
97 }
98 put_page(page);
99}
100
101/* 56/*
102 * Add isolated pages on the list back to the LRU. 57 * Add isolated pages on the list back to the LRU under page lock
58 * to avoid leaking evictable pages back onto unevictable list.
103 * 59 *
104 * returns the number of pages put back. 60 * returns the number of pages put back.
105 */ 61 */
@@ -111,7 +67,7 @@ int putback_lru_pages(struct list_head *l)
111 67
112 list_for_each_entry_safe(page, page2, l, lru) { 68 list_for_each_entry_safe(page, page2, l, lru) {
113 list_del(&page->lru); 69 list_del(&page->lru);
114 move_to_lru(page); 70 putback_lru_page(page);
115 count++; 71 count++;
116 } 72 }
117 return count; 73 return count;
@@ -374,8 +330,6 @@ static int migrate_page_move_mapping(struct address_space *mapping,
374 __inc_zone_page_state(newpage, NR_FILE_PAGES); 330 __inc_zone_page_state(newpage, NR_FILE_PAGES);
375 331
376 spin_unlock_irq(&mapping->tree_lock); 332 spin_unlock_irq(&mapping->tree_lock);
377 if (!PageSwapCache(newpage))
378 mem_cgroup_uncharge_cache_page(page);
379 333
380 return 0; 334 return 0;
381} 335}
@@ -385,6 +339,8 @@ static int migrate_page_move_mapping(struct address_space *mapping,
385 */ 339 */
386static void migrate_page_copy(struct page *newpage, struct page *page) 340static void migrate_page_copy(struct page *newpage, struct page *page)
387{ 341{
342 int anon;
343
388 copy_highpage(newpage, page); 344 copy_highpage(newpage, page);
389 345
390 if (PageError(page)) 346 if (PageError(page))
@@ -393,8 +349,11 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
393 SetPageReferenced(newpage); 349 SetPageReferenced(newpage);
394 if (PageUptodate(page)) 350 if (PageUptodate(page))
395 SetPageUptodate(newpage); 351 SetPageUptodate(newpage);
396 if (PageActive(page)) 352 if (TestClearPageActive(page)) {
353 VM_BUG_ON(PageUnevictable(page));
397 SetPageActive(newpage); 354 SetPageActive(newpage);
355 } else
356 unevictable_migrate_page(newpage, page);
398 if (PageChecked(page)) 357 if (PageChecked(page))
399 SetPageChecked(newpage); 358 SetPageChecked(newpage);
400 if (PageMappedToDisk(page)) 359 if (PageMappedToDisk(page))
@@ -412,14 +371,20 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
412 __set_page_dirty_nobuffers(newpage); 371 __set_page_dirty_nobuffers(newpage);
413 } 372 }
414 373
374 mlock_migrate_page(newpage, page);
375
415#ifdef CONFIG_SWAP 376#ifdef CONFIG_SWAP
416 ClearPageSwapCache(page); 377 ClearPageSwapCache(page);
417#endif 378#endif
418 ClearPageActive(page);
419 ClearPagePrivate(page); 379 ClearPagePrivate(page);
420 set_page_private(page, 0); 380 set_page_private(page, 0);
381 /* page->mapping contains a flag for PageAnon() */
382 anon = PageAnon(page);
421 page->mapping = NULL; 383 page->mapping = NULL;
422 384
385 if (!anon) /* This page was removed from radix-tree. */
386 mem_cgroup_uncharge_cache_page(page);
387
423 /* 388 /*
424 * If any waiters have accumulated on the new page then 389 * If any waiters have accumulated on the new page then
425 * wake them up. 390 * wake them up.
@@ -594,6 +559,10 @@ static int fallback_migrate_page(struct address_space *mapping,
594 * 559 *
595 * The new page will have replaced the old page if this function 560 * The new page will have replaced the old page if this function
596 * is successful. 561 * is successful.
562 *
563 * Return value:
564 * < 0 - error code
565 * == 0 - success
597 */ 566 */
598static int move_to_new_page(struct page *newpage, struct page *page) 567static int move_to_new_page(struct page *newpage, struct page *page)
599{ 568{
@@ -611,6 +580,8 @@ static int move_to_new_page(struct page *newpage, struct page *page)
611 /* Prepare mapping for the new page.*/ 580 /* Prepare mapping for the new page.*/
612 newpage->index = page->index; 581 newpage->index = page->index;
613 newpage->mapping = page->mapping; 582 newpage->mapping = page->mapping;
583 if (PageSwapBacked(page))
584 SetPageSwapBacked(newpage);
614 585
615 mapping = page_mapping(page); 586 mapping = page_mapping(page);
616 if (!mapping) 587 if (!mapping)
@@ -654,9 +625,10 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
654 if (!newpage) 625 if (!newpage)
655 return -ENOMEM; 626 return -ENOMEM;
656 627
657 if (page_count(page) == 1) 628 if (page_count(page) == 1) {
658 /* page was freed from under us. So we are done. */ 629 /* page was freed from under us. So we are done. */
659 goto move_newpage; 630 goto move_newpage;
631 }
660 632
661 charge = mem_cgroup_prepare_migration(page, newpage); 633 charge = mem_cgroup_prepare_migration(page, newpage);
662 if (charge == -ENOMEM) { 634 if (charge == -ENOMEM) {
@@ -730,7 +702,6 @@ rcu_unlock:
730 rcu_read_unlock(); 702 rcu_read_unlock();
731 703
732unlock: 704unlock:
733
734 unlock_page(page); 705 unlock_page(page);
735 706
736 if (rc != -EAGAIN) { 707 if (rc != -EAGAIN) {
@@ -741,17 +712,19 @@ unlock:
741 * restored. 712 * restored.
742 */ 713 */
743 list_del(&page->lru); 714 list_del(&page->lru);
744 move_to_lru(page); 715 putback_lru_page(page);
745 } 716 }
746 717
747move_newpage: 718move_newpage:
748 if (!charge) 719 if (!charge)
749 mem_cgroup_end_migration(newpage); 720 mem_cgroup_end_migration(newpage);
721
750 /* 722 /*
751 * Move the new page to the LRU. If migration was not successful 723 * Move the new page to the LRU. If migration was not successful
752 * then this will free the page. 724 * then this will free the page.
753 */ 725 */
754 move_to_lru(newpage); 726 putback_lru_page(newpage);
727
755 if (result) { 728 if (result) {
756 if (rc) 729 if (rc)
757 *result = rc; 730 *result = rc;
@@ -858,9 +831,11 @@ static struct page *new_page_node(struct page *p, unsigned long private,
858 * Move a set of pages as indicated in the pm array. The addr 831 * Move a set of pages as indicated in the pm array. The addr
859 * field must be set to the virtual address of the page to be moved 832 * field must be set to the virtual address of the page to be moved
860 * and the node number must contain a valid target node. 833 * and the node number must contain a valid target node.
834 * The pm array ends with node = MAX_NUMNODES.
861 */ 835 */
862static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, 836static int do_move_page_to_node_array(struct mm_struct *mm,
863 int migrate_all) 837 struct page_to_node *pm,
838 int migrate_all)
864{ 839{
865 int err; 840 int err;
866 struct page_to_node *pp; 841 struct page_to_node *pp;
@@ -914,7 +889,9 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
914 !migrate_all) 889 !migrate_all)
915 goto put_and_set; 890 goto put_and_set;
916 891
917 err = isolate_lru_page(page, &pagelist); 892 err = isolate_lru_page(page);
893 if (!err)
894 list_add_tail(&page->lru, &pagelist);
918put_and_set: 895put_and_set:
919 /* 896 /*
920 * Either remove the duplicate refcount from 897 * Either remove the duplicate refcount from
@@ -926,36 +903,118 @@ set_status:
926 pp->status = err; 903 pp->status = err;
927 } 904 }
928 905
906 err = 0;
929 if (!list_empty(&pagelist)) 907 if (!list_empty(&pagelist))
930 err = migrate_pages(&pagelist, new_page_node, 908 err = migrate_pages(&pagelist, new_page_node,
931 (unsigned long)pm); 909 (unsigned long)pm);
932 else
933 err = -ENOENT;
934 910
935 up_read(&mm->mmap_sem); 911 up_read(&mm->mmap_sem);
936 return err; 912 return err;
937} 913}
938 914
939/* 915/*
940 * Determine the nodes of a list of pages. The addr in the pm array 916 * Migrate an array of page address onto an array of nodes and fill
941 * must have been set to the virtual address of which we want to determine 917 * the corresponding array of status.
942 * the node number.
943 */ 918 */
944static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) 919static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
920 unsigned long nr_pages,
921 const void __user * __user *pages,
922 const int __user *nodes,
923 int __user *status, int flags)
945{ 924{
925 struct page_to_node *pm = NULL;
926 nodemask_t task_nodes;
927 int err = 0;
928 int i;
929
930 task_nodes = cpuset_mems_allowed(task);
931
932 /* Limit nr_pages so that the multiplication may not overflow */
933 if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
934 err = -E2BIG;
935 goto out;
936 }
937
938 pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
939 if (!pm) {
940 err = -ENOMEM;
941 goto out;
942 }
943
944 /*
945 * Get parameters from user space and initialize the pm
946 * array. Return various errors if the user did something wrong.
947 */
948 for (i = 0; i < nr_pages; i++) {
949 const void __user *p;
950
951 err = -EFAULT;
952 if (get_user(p, pages + i))
953 goto out_pm;
954
955 pm[i].addr = (unsigned long)p;
956 if (nodes) {
957 int node;
958
959 if (get_user(node, nodes + i))
960 goto out_pm;
961
962 err = -ENODEV;
963 if (!node_state(node, N_HIGH_MEMORY))
964 goto out_pm;
965
966 err = -EACCES;
967 if (!node_isset(node, task_nodes))
968 goto out_pm;
969
970 pm[i].node = node;
971 } else
972 pm[i].node = 0; /* anything to not match MAX_NUMNODES */
973 }
974 /* End marker */
975 pm[nr_pages].node = MAX_NUMNODES;
976
977 err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL);
978 if (err >= 0)
979 /* Return status information */
980 for (i = 0; i < nr_pages; i++)
981 if (put_user(pm[i].status, status + i))
982 err = -EFAULT;
983
984out_pm:
985 vfree(pm);
986out:
987 return err;
988}
989
990/*
991 * Determine the nodes of an array of pages and store it in an array of status.
992 */
993static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
994 const void __user * __user *pages,
995 int __user *status)
996{
997 unsigned long i;
998 int err;
999
946 down_read(&mm->mmap_sem); 1000 down_read(&mm->mmap_sem);
947 1001
948 for ( ; pm->node != MAX_NUMNODES; pm++) { 1002 for (i = 0; i < nr_pages; i++) {
1003 const void __user *p;
1004 unsigned long addr;
949 struct vm_area_struct *vma; 1005 struct vm_area_struct *vma;
950 struct page *page; 1006 struct page *page;
951 int err;
952 1007
953 err = -EFAULT; 1008 err = -EFAULT;
954 vma = find_vma(mm, pm->addr); 1009 if (get_user(p, pages+i))
1010 goto out;
1011 addr = (unsigned long) p;
1012
1013 vma = find_vma(mm, addr);
955 if (!vma) 1014 if (!vma)
956 goto set_status; 1015 goto set_status;
957 1016
958 page = follow_page(vma, pm->addr, 0); 1017 page = follow_page(vma, addr, 0);
959 1018
960 err = PTR_ERR(page); 1019 err = PTR_ERR(page);
961 if (IS_ERR(page)) 1020 if (IS_ERR(page))
@@ -968,11 +1027,13 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
968 1027
969 err = page_to_nid(page); 1028 err = page_to_nid(page);
970set_status: 1029set_status:
971 pm->status = err; 1030 put_user(err, status+i);
972 } 1031 }
1032 err = 0;
973 1033
1034out:
974 up_read(&mm->mmap_sem); 1035 up_read(&mm->mmap_sem);
975 return 0; 1036 return err;
976} 1037}
977 1038
978/* 1039/*
@@ -984,12 +1045,9 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
984 const int __user *nodes, 1045 const int __user *nodes,
985 int __user *status, int flags) 1046 int __user *status, int flags)
986{ 1047{
987 int err = 0;
988 int i;
989 struct task_struct *task; 1048 struct task_struct *task;
990 nodemask_t task_nodes;
991 struct mm_struct *mm; 1049 struct mm_struct *mm;
992 struct page_to_node *pm = NULL; 1050 int err;
993 1051
994 /* Check flags */ 1052 /* Check flags */
995 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) 1053 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
@@ -1021,75 +1079,21 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
1021 (current->uid != task->suid) && (current->uid != task->uid) && 1079 (current->uid != task->suid) && (current->uid != task->uid) &&
1022 !capable(CAP_SYS_NICE)) { 1080 !capable(CAP_SYS_NICE)) {
1023 err = -EPERM; 1081 err = -EPERM;
1024 goto out2; 1082 goto out;
1025 } 1083 }
1026 1084
1027 err = security_task_movememory(task); 1085 err = security_task_movememory(task);
1028 if (err) 1086 if (err)
1029 goto out2; 1087 goto out;
1030
1031
1032 task_nodes = cpuset_mems_allowed(task);
1033
1034 /* Limit nr_pages so that the multiplication may not overflow */
1035 if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
1036 err = -E2BIG;
1037 goto out2;
1038 }
1039
1040 pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
1041 if (!pm) {
1042 err = -ENOMEM;
1043 goto out2;
1044 }
1045
1046 /*
1047 * Get parameters from user space and initialize the pm
1048 * array. Return various errors if the user did something wrong.
1049 */
1050 for (i = 0; i < nr_pages; i++) {
1051 const void __user *p;
1052
1053 err = -EFAULT;
1054 if (get_user(p, pages + i))
1055 goto out;
1056
1057 pm[i].addr = (unsigned long)p;
1058 if (nodes) {
1059 int node;
1060
1061 if (get_user(node, nodes + i))
1062 goto out;
1063
1064 err = -ENODEV;
1065 if (!node_state(node, N_HIGH_MEMORY))
1066 goto out;
1067
1068 err = -EACCES;
1069 if (!node_isset(node, task_nodes))
1070 goto out;
1071 1088
1072 pm[i].node = node; 1089 if (nodes) {
1073 } else 1090 err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
1074 pm[i].node = 0; /* anything to not match MAX_NUMNODES */ 1091 flags);
1092 } else {
1093 err = do_pages_stat(mm, nr_pages, pages, status);
1075 } 1094 }
1076 /* End marker */
1077 pm[nr_pages].node = MAX_NUMNODES;
1078
1079 if (nodes)
1080 err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL);
1081 else
1082 err = do_pages_stat(mm, pm);
1083
1084 if (err >= 0)
1085 /* Return status information */
1086 for (i = 0; i < nr_pages; i++)
1087 if (put_user(pm[i].status, status + i))
1088 err = -EFAULT;
1089 1095
1090out: 1096out:
1091 vfree(pm);
1092out2:
1093 mmput(mm); 1097 mmput(mm);
1094 return err; 1098 return err;
1095} 1099}