aboutsummaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c281
1 files changed, 141 insertions, 140 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 2a80136b23bb..1e0d6b237f44 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -37,36 +37,6 @@
37#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) 37#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
38 38
39/* 39/*
40 * Isolate one page from the LRU lists. If successful put it onto
41 * the indicated list with elevated page count.
42 *
43 * Result:
44 * -EBUSY: page not on LRU list
45 * 0: page removed from LRU list and added to the specified list.
46 */
47int isolate_lru_page(struct page *page, struct list_head *pagelist)
48{
49 int ret = -EBUSY;
50
51 if (PageLRU(page)) {
52 struct zone *zone = page_zone(page);
53
54 spin_lock_irq(&zone->lru_lock);
55 if (PageLRU(page) && get_page_unless_zero(page)) {
56 ret = 0;
57 ClearPageLRU(page);
58 if (PageActive(page))
59 del_page_from_active_list(zone, page);
60 else
61 del_page_from_inactive_list(zone, page);
62 list_add_tail(&page->lru, pagelist);
63 }
64 spin_unlock_irq(&zone->lru_lock);
65 }
66 return ret;
67}
68
69/*
70 * migrate_prep() needs to be called before we start compiling a list of pages 40 * migrate_prep() needs to be called before we start compiling a list of pages
71 * to be migrated using isolate_lru_page(). 41 * to be migrated using isolate_lru_page().
72 */ 42 */
@@ -83,23 +53,9 @@ int migrate_prep(void)
83 return 0; 53 return 0;
84} 54}
85 55
86static inline void move_to_lru(struct page *page)
87{
88 if (PageActive(page)) {
89 /*
90 * lru_cache_add_active checks that
91 * the PG_active bit is off.
92 */
93 ClearPageActive(page);
94 lru_cache_add_active(page);
95 } else {
96 lru_cache_add(page);
97 }
98 put_page(page);
99}
100
101/* 56/*
102 * Add isolated pages on the list back to the LRU. 57 * Add isolated pages on the list back to the LRU under page lock
58 * to avoid leaking evictable pages back onto unevictable list.
103 * 59 *
104 * returns the number of pages put back. 60 * returns the number of pages put back.
105 */ 61 */
@@ -111,7 +67,7 @@ int putback_lru_pages(struct list_head *l)
111 67
112 list_for_each_entry_safe(page, page2, l, lru) { 68 list_for_each_entry_safe(page, page2, l, lru) {
113 list_del(&page->lru); 69 list_del(&page->lru);
114 move_to_lru(page); 70 putback_lru_page(page);
115 count++; 71 count++;
116 } 72 }
117 return count; 73 return count;
@@ -374,8 +330,6 @@ static int migrate_page_move_mapping(struct address_space *mapping,
374 __inc_zone_page_state(newpage, NR_FILE_PAGES); 330 __inc_zone_page_state(newpage, NR_FILE_PAGES);
375 331
376 spin_unlock_irq(&mapping->tree_lock); 332 spin_unlock_irq(&mapping->tree_lock);
377 if (!PageSwapCache(newpage))
378 mem_cgroup_uncharge_cache_page(page);
379 333
380 return 0; 334 return 0;
381} 335}
@@ -385,6 +339,8 @@ static int migrate_page_move_mapping(struct address_space *mapping,
385 */ 339 */
386static void migrate_page_copy(struct page *newpage, struct page *page) 340static void migrate_page_copy(struct page *newpage, struct page *page)
387{ 341{
342 int anon;
343
388 copy_highpage(newpage, page); 344 copy_highpage(newpage, page);
389 345
390 if (PageError(page)) 346 if (PageError(page))
@@ -393,8 +349,11 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
393 SetPageReferenced(newpage); 349 SetPageReferenced(newpage);
394 if (PageUptodate(page)) 350 if (PageUptodate(page))
395 SetPageUptodate(newpage); 351 SetPageUptodate(newpage);
396 if (PageActive(page)) 352 if (TestClearPageActive(page)) {
353 VM_BUG_ON(PageUnevictable(page));
397 SetPageActive(newpage); 354 SetPageActive(newpage);
355 } else
356 unevictable_migrate_page(newpage, page);
398 if (PageChecked(page)) 357 if (PageChecked(page))
399 SetPageChecked(newpage); 358 SetPageChecked(newpage);
400 if (PageMappedToDisk(page)) 359 if (PageMappedToDisk(page))
@@ -412,14 +371,20 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
412 __set_page_dirty_nobuffers(newpage); 371 __set_page_dirty_nobuffers(newpage);
413 } 372 }
414 373
374 mlock_migrate_page(newpage, page);
375
415#ifdef CONFIG_SWAP 376#ifdef CONFIG_SWAP
416 ClearPageSwapCache(page); 377 ClearPageSwapCache(page);
417#endif 378#endif
418 ClearPageActive(page);
419 ClearPagePrivate(page); 379 ClearPagePrivate(page);
420 set_page_private(page, 0); 380 set_page_private(page, 0);
381 /* page->mapping contains a flag for PageAnon() */
382 anon = PageAnon(page);
421 page->mapping = NULL; 383 page->mapping = NULL;
422 384
385 if (!anon) /* This page was removed from radix-tree. */
386 mem_cgroup_uncharge_cache_page(page);
387
423 /* 388 /*
424 * If any waiters have accumulated on the new page then 389 * If any waiters have accumulated on the new page then
425 * wake them up. 390 * wake them up.
@@ -557,15 +522,12 @@ static int writeout(struct address_space *mapping, struct page *page)
557 remove_migration_ptes(page, page); 522 remove_migration_ptes(page, page);
558 523
559 rc = mapping->a_ops->writepage(page, &wbc); 524 rc = mapping->a_ops->writepage(page, &wbc);
560 if (rc < 0)
561 /* I/O Error writing */
562 return -EIO;
563 525
564 if (rc != AOP_WRITEPAGE_ACTIVATE) 526 if (rc != AOP_WRITEPAGE_ACTIVATE)
565 /* unlocked. Relock */ 527 /* unlocked. Relock */
566 lock_page(page); 528 lock_page(page);
567 529
568 return -EAGAIN; 530 return (rc < 0) ? -EIO : -EAGAIN;
569} 531}
570 532
571/* 533/*
@@ -594,6 +556,10 @@ static int fallback_migrate_page(struct address_space *mapping,
594 * 556 *
595 * The new page will have replaced the old page if this function 557 * The new page will have replaced the old page if this function
596 * is successful. 558 * is successful.
559 *
560 * Return value:
561 * < 0 - error code
562 * == 0 - success
597 */ 563 */
598static int move_to_new_page(struct page *newpage, struct page *page) 564static int move_to_new_page(struct page *newpage, struct page *page)
599{ 565{
@@ -611,6 +577,8 @@ static int move_to_new_page(struct page *newpage, struct page *page)
611 /* Prepare mapping for the new page.*/ 577 /* Prepare mapping for the new page.*/
612 newpage->index = page->index; 578 newpage->index = page->index;
613 newpage->mapping = page->mapping; 579 newpage->mapping = page->mapping;
580 if (PageSwapBacked(page))
581 SetPageSwapBacked(newpage);
614 582
615 mapping = page_mapping(page); 583 mapping = page_mapping(page);
616 if (!mapping) 584 if (!mapping)
@@ -654,9 +622,10 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
654 if (!newpage) 622 if (!newpage)
655 return -ENOMEM; 623 return -ENOMEM;
656 624
657 if (page_count(page) == 1) 625 if (page_count(page) == 1) {
658 /* page was freed from under us. So we are done. */ 626 /* page was freed from under us. So we are done. */
659 goto move_newpage; 627 goto move_newpage;
628 }
660 629
661 charge = mem_cgroup_prepare_migration(page, newpage); 630 charge = mem_cgroup_prepare_migration(page, newpage);
662 if (charge == -ENOMEM) { 631 if (charge == -ENOMEM) {
@@ -730,7 +699,6 @@ rcu_unlock:
730 rcu_read_unlock(); 699 rcu_read_unlock();
731 700
732unlock: 701unlock:
733
734 unlock_page(page); 702 unlock_page(page);
735 703
736 if (rc != -EAGAIN) { 704 if (rc != -EAGAIN) {
@@ -741,17 +709,19 @@ unlock:
741 * restored. 709 * restored.
742 */ 710 */
743 list_del(&page->lru); 711 list_del(&page->lru);
744 move_to_lru(page); 712 putback_lru_page(page);
745 } 713 }
746 714
747move_newpage: 715move_newpage:
748 if (!charge) 716 if (!charge)
749 mem_cgroup_end_migration(newpage); 717 mem_cgroup_end_migration(newpage);
718
750 /* 719 /*
751 * Move the new page to the LRU. If migration was not successful 720 * Move the new page to the LRU. If migration was not successful
752 * then this will free the page. 721 * then this will free the page.
753 */ 722 */
754 move_to_lru(newpage); 723 putback_lru_page(newpage);
724
755 if (result) { 725 if (result) {
756 if (rc) 726 if (rc)
757 *result = rc; 727 *result = rc;
@@ -858,20 +828,22 @@ static struct page *new_page_node(struct page *p, unsigned long private,
858 * Move a set of pages as indicated in the pm array. The addr 828 * Move a set of pages as indicated in the pm array. The addr
859 * field must be set to the virtual address of the page to be moved 829 * field must be set to the virtual address of the page to be moved
860 * and the node number must contain a valid target node. 830 * and the node number must contain a valid target node.
831 * The pm array ends with node = MAX_NUMNODES.
861 */ 832 */
862static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, 833static int do_move_page_to_node_array(struct mm_struct *mm,
863 int migrate_all) 834 struct page_to_node *pm,
835 int migrate_all)
864{ 836{
865 int err; 837 int err;
866 struct page_to_node *pp; 838 struct page_to_node *pp;
867 LIST_HEAD(pagelist); 839 LIST_HEAD(pagelist);
868 840
841 migrate_prep();
869 down_read(&mm->mmap_sem); 842 down_read(&mm->mmap_sem);
870 843
871 /* 844 /*
872 * Build a list of pages to migrate 845 * Build a list of pages to migrate
873 */ 846 */
874 migrate_prep();
875 for (pp = pm; pp->node != MAX_NUMNODES; pp++) { 847 for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
876 struct vm_area_struct *vma; 848 struct vm_area_struct *vma;
877 struct page *page; 849 struct page *page;
@@ -914,7 +886,9 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm,
914 !migrate_all) 886 !migrate_all)
915 goto put_and_set; 887 goto put_and_set;
916 888
917 err = isolate_lru_page(page, &pagelist); 889 err = isolate_lru_page(page);
890 if (!err)
891 list_add_tail(&page->lru, &pagelist);
918put_and_set: 892put_and_set:
919 /* 893 /*
920 * Either remove the duplicate refcount from 894 * Either remove the duplicate refcount from
@@ -926,36 +900,118 @@ set_status:
926 pp->status = err; 900 pp->status = err;
927 } 901 }
928 902
903 err = 0;
929 if (!list_empty(&pagelist)) 904 if (!list_empty(&pagelist))
930 err = migrate_pages(&pagelist, new_page_node, 905 err = migrate_pages(&pagelist, new_page_node,
931 (unsigned long)pm); 906 (unsigned long)pm);
932 else
933 err = -ENOENT;
934 907
935 up_read(&mm->mmap_sem); 908 up_read(&mm->mmap_sem);
936 return err; 909 return err;
937} 910}
938 911
939/* 912/*
940 * Determine the nodes of a list of pages. The addr in the pm array 913 * Migrate an array of page address onto an array of nodes and fill
941 * must have been set to the virtual address of which we want to determine 914 * the corresponding array of status.
942 * the node number.
943 */ 915 */
944static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) 916static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
917 unsigned long nr_pages,
918 const void __user * __user *pages,
919 const int __user *nodes,
920 int __user *status, int flags)
945{ 921{
922 struct page_to_node *pm = NULL;
923 nodemask_t task_nodes;
924 int err = 0;
925 int i;
926
927 task_nodes = cpuset_mems_allowed(task);
928
929 /* Limit nr_pages so that the multiplication may not overflow */
930 if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
931 err = -E2BIG;
932 goto out;
933 }
934
935 pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
936 if (!pm) {
937 err = -ENOMEM;
938 goto out;
939 }
940
941 /*
942 * Get parameters from user space and initialize the pm
943 * array. Return various errors if the user did something wrong.
944 */
945 for (i = 0; i < nr_pages; i++) {
946 const void __user *p;
947
948 err = -EFAULT;
949 if (get_user(p, pages + i))
950 goto out_pm;
951
952 pm[i].addr = (unsigned long)p;
953 if (nodes) {
954 int node;
955
956 if (get_user(node, nodes + i))
957 goto out_pm;
958
959 err = -ENODEV;
960 if (!node_state(node, N_HIGH_MEMORY))
961 goto out_pm;
962
963 err = -EACCES;
964 if (!node_isset(node, task_nodes))
965 goto out_pm;
966
967 pm[i].node = node;
968 } else
969 pm[i].node = 0; /* anything to not match MAX_NUMNODES */
970 }
971 /* End marker */
972 pm[nr_pages].node = MAX_NUMNODES;
973
974 err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL);
975 if (err >= 0)
976 /* Return status information */
977 for (i = 0; i < nr_pages; i++)
978 if (put_user(pm[i].status, status + i))
979 err = -EFAULT;
980
981out_pm:
982 vfree(pm);
983out:
984 return err;
985}
986
987/*
988 * Determine the nodes of an array of pages and store it in an array of status.
989 */
990static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
991 const void __user * __user *pages,
992 int __user *status)
993{
994 unsigned long i;
995 int err;
996
946 down_read(&mm->mmap_sem); 997 down_read(&mm->mmap_sem);
947 998
948 for ( ; pm->node != MAX_NUMNODES; pm++) { 999 for (i = 0; i < nr_pages; i++) {
1000 const void __user *p;
1001 unsigned long addr;
949 struct vm_area_struct *vma; 1002 struct vm_area_struct *vma;
950 struct page *page; 1003 struct page *page;
951 int err;
952 1004
953 err = -EFAULT; 1005 err = -EFAULT;
954 vma = find_vma(mm, pm->addr); 1006 if (get_user(p, pages+i))
1007 goto out;
1008 addr = (unsigned long) p;
1009
1010 vma = find_vma(mm, addr);
955 if (!vma) 1011 if (!vma)
956 goto set_status; 1012 goto set_status;
957 1013
958 page = follow_page(vma, pm->addr, 0); 1014 page = follow_page(vma, addr, 0);
959 1015
960 err = PTR_ERR(page); 1016 err = PTR_ERR(page);
961 if (IS_ERR(page)) 1017 if (IS_ERR(page))
@@ -968,11 +1024,13 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm)
968 1024
969 err = page_to_nid(page); 1025 err = page_to_nid(page);
970set_status: 1026set_status:
971 pm->status = err; 1027 put_user(err, status+i);
972 } 1028 }
1029 err = 0;
973 1030
1031out:
974 up_read(&mm->mmap_sem); 1032 up_read(&mm->mmap_sem);
975 return 0; 1033 return err;
976} 1034}
977 1035
978/* 1036/*
@@ -984,12 +1042,9 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
984 const int __user *nodes, 1042 const int __user *nodes,
985 int __user *status, int flags) 1043 int __user *status, int flags)
986{ 1044{
987 int err = 0;
988 int i;
989 struct task_struct *task; 1045 struct task_struct *task;
990 nodemask_t task_nodes;
991 struct mm_struct *mm; 1046 struct mm_struct *mm;
992 struct page_to_node *pm = NULL; 1047 int err;
993 1048
994 /* Check flags */ 1049 /* Check flags */
995 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) 1050 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
@@ -1021,75 +1076,21 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
1021 (current->uid != task->suid) && (current->uid != task->uid) && 1076 (current->uid != task->suid) && (current->uid != task->uid) &&
1022 !capable(CAP_SYS_NICE)) { 1077 !capable(CAP_SYS_NICE)) {
1023 err = -EPERM; 1078 err = -EPERM;
1024 goto out2; 1079 goto out;
1025 } 1080 }
1026 1081
1027 err = security_task_movememory(task); 1082 err = security_task_movememory(task);
1028 if (err) 1083 if (err)
1029 goto out2; 1084 goto out;
1030
1031
1032 task_nodes = cpuset_mems_allowed(task);
1033
1034 /* Limit nr_pages so that the multiplication may not overflow */
1035 if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
1036 err = -E2BIG;
1037 goto out2;
1038 }
1039
1040 pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
1041 if (!pm) {
1042 err = -ENOMEM;
1043 goto out2;
1044 }
1045
1046 /*
1047 * Get parameters from user space and initialize the pm
1048 * array. Return various errors if the user did something wrong.
1049 */
1050 for (i = 0; i < nr_pages; i++) {
1051 const void __user *p;
1052
1053 err = -EFAULT;
1054 if (get_user(p, pages + i))
1055 goto out;
1056
1057 pm[i].addr = (unsigned long)p;
1058 if (nodes) {
1059 int node;
1060
1061 if (get_user(node, nodes + i))
1062 goto out;
1063
1064 err = -ENODEV;
1065 if (!node_state(node, N_HIGH_MEMORY))
1066 goto out;
1067
1068 err = -EACCES;
1069 if (!node_isset(node, task_nodes))
1070 goto out;
1071 1085
1072 pm[i].node = node; 1086 if (nodes) {
1073 } else 1087 err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
1074 pm[i].node = 0; /* anything to not match MAX_NUMNODES */ 1088 flags);
1089 } else {
1090 err = do_pages_stat(mm, nr_pages, pages, status);
1075 } 1091 }
1076 /* End marker */
1077 pm[nr_pages].node = MAX_NUMNODES;
1078
1079 if (nodes)
1080 err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL);
1081 else
1082 err = do_pages_stat(mm, pm);
1083
1084 if (err >= 0)
1085 /* Return status information */
1086 for (i = 0; i < nr_pages; i++)
1087 if (put_user(pm[i].status, status + i))
1088 err = -EFAULT;
1089 1092
1090out: 1093out:
1091 vfree(pm);
1092out2:
1093 mmput(mm); 1094 mmput(mm);
1094 return err; 1095 return err;
1095} 1096}