diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 274 |
1 files changed, 139 insertions, 135 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 2a80136b23bb..6602941bfab0 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -37,36 +37,6 @@ | |||
37 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) | 37 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * Isolate one page from the LRU lists. If successful put it onto | ||
41 | * the indicated list with elevated page count. | ||
42 | * | ||
43 | * Result: | ||
44 | * -EBUSY: page not on LRU list | ||
45 | * 0: page removed from LRU list and added to the specified list. | ||
46 | */ | ||
47 | int isolate_lru_page(struct page *page, struct list_head *pagelist) | ||
48 | { | ||
49 | int ret = -EBUSY; | ||
50 | |||
51 | if (PageLRU(page)) { | ||
52 | struct zone *zone = page_zone(page); | ||
53 | |||
54 | spin_lock_irq(&zone->lru_lock); | ||
55 | if (PageLRU(page) && get_page_unless_zero(page)) { | ||
56 | ret = 0; | ||
57 | ClearPageLRU(page); | ||
58 | if (PageActive(page)) | ||
59 | del_page_from_active_list(zone, page); | ||
60 | else | ||
61 | del_page_from_inactive_list(zone, page); | ||
62 | list_add_tail(&page->lru, pagelist); | ||
63 | } | ||
64 | spin_unlock_irq(&zone->lru_lock); | ||
65 | } | ||
66 | return ret; | ||
67 | } | ||
68 | |||
69 | /* | ||
70 | * migrate_prep() needs to be called before we start compiling a list of pages | 40 | * migrate_prep() needs to be called before we start compiling a list of pages |
71 | * to be migrated using isolate_lru_page(). | 41 | * to be migrated using isolate_lru_page(). |
72 | */ | 42 | */ |
@@ -83,23 +53,9 @@ int migrate_prep(void) | |||
83 | return 0; | 53 | return 0; |
84 | } | 54 | } |
85 | 55 | ||
86 | static inline void move_to_lru(struct page *page) | ||
87 | { | ||
88 | if (PageActive(page)) { | ||
89 | /* | ||
90 | * lru_cache_add_active checks that | ||
91 | * the PG_active bit is off. | ||
92 | */ | ||
93 | ClearPageActive(page); | ||
94 | lru_cache_add_active(page); | ||
95 | } else { | ||
96 | lru_cache_add(page); | ||
97 | } | ||
98 | put_page(page); | ||
99 | } | ||
100 | |||
101 | /* | 56 | /* |
102 | * Add isolated pages on the list back to the LRU. | 57 | * Add isolated pages on the list back to the LRU under page lock |
58 | * to avoid leaking evictable pages back onto unevictable list. | ||
103 | * | 59 | * |
104 | * returns the number of pages put back. | 60 | * returns the number of pages put back. |
105 | */ | 61 | */ |
@@ -111,7 +67,7 @@ int putback_lru_pages(struct list_head *l) | |||
111 | 67 | ||
112 | list_for_each_entry_safe(page, page2, l, lru) { | 68 | list_for_each_entry_safe(page, page2, l, lru) { |
113 | list_del(&page->lru); | 69 | list_del(&page->lru); |
114 | move_to_lru(page); | 70 | putback_lru_page(page); |
115 | count++; | 71 | count++; |
116 | } | 72 | } |
117 | return count; | 73 | return count; |
@@ -374,8 +330,6 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
374 | __inc_zone_page_state(newpage, NR_FILE_PAGES); | 330 | __inc_zone_page_state(newpage, NR_FILE_PAGES); |
375 | 331 | ||
376 | spin_unlock_irq(&mapping->tree_lock); | 332 | spin_unlock_irq(&mapping->tree_lock); |
377 | if (!PageSwapCache(newpage)) | ||
378 | mem_cgroup_uncharge_cache_page(page); | ||
379 | 333 | ||
380 | return 0; | 334 | return 0; |
381 | } | 335 | } |
@@ -385,6 +339,8 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
385 | */ | 339 | */ |
386 | static void migrate_page_copy(struct page *newpage, struct page *page) | 340 | static void migrate_page_copy(struct page *newpage, struct page *page) |
387 | { | 341 | { |
342 | int anon; | ||
343 | |||
388 | copy_highpage(newpage, page); | 344 | copy_highpage(newpage, page); |
389 | 345 | ||
390 | if (PageError(page)) | 346 | if (PageError(page)) |
@@ -393,8 +349,11 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
393 | SetPageReferenced(newpage); | 349 | SetPageReferenced(newpage); |
394 | if (PageUptodate(page)) | 350 | if (PageUptodate(page)) |
395 | SetPageUptodate(newpage); | 351 | SetPageUptodate(newpage); |
396 | if (PageActive(page)) | 352 | if (TestClearPageActive(page)) { |
353 | VM_BUG_ON(PageUnevictable(page)); | ||
397 | SetPageActive(newpage); | 354 | SetPageActive(newpage); |
355 | } else | ||
356 | unevictable_migrate_page(newpage, page); | ||
398 | if (PageChecked(page)) | 357 | if (PageChecked(page)) |
399 | SetPageChecked(newpage); | 358 | SetPageChecked(newpage); |
400 | if (PageMappedToDisk(page)) | 359 | if (PageMappedToDisk(page)) |
@@ -412,14 +371,20 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
412 | __set_page_dirty_nobuffers(newpage); | 371 | __set_page_dirty_nobuffers(newpage); |
413 | } | 372 | } |
414 | 373 | ||
374 | mlock_migrate_page(newpage, page); | ||
375 | |||
415 | #ifdef CONFIG_SWAP | 376 | #ifdef CONFIG_SWAP |
416 | ClearPageSwapCache(page); | 377 | ClearPageSwapCache(page); |
417 | #endif | 378 | #endif |
418 | ClearPageActive(page); | ||
419 | ClearPagePrivate(page); | 379 | ClearPagePrivate(page); |
420 | set_page_private(page, 0); | 380 | set_page_private(page, 0); |
381 | /* page->mapping contains a flag for PageAnon() */ | ||
382 | anon = PageAnon(page); | ||
421 | page->mapping = NULL; | 383 | page->mapping = NULL; |
422 | 384 | ||
385 | if (!anon) /* This page was removed from radix-tree. */ | ||
386 | mem_cgroup_uncharge_cache_page(page); | ||
387 | |||
423 | /* | 388 | /* |
424 | * If any waiters have accumulated on the new page then | 389 | * If any waiters have accumulated on the new page then |
425 | * wake them up. | 390 | * wake them up. |
@@ -594,6 +559,10 @@ static int fallback_migrate_page(struct address_space *mapping, | |||
594 | * | 559 | * |
595 | * The new page will have replaced the old page if this function | 560 | * The new page will have replaced the old page if this function |
596 | * is successful. | 561 | * is successful. |
562 | * | ||
563 | * Return value: | ||
564 | * < 0 - error code | ||
565 | * == 0 - success | ||
597 | */ | 566 | */ |
598 | static int move_to_new_page(struct page *newpage, struct page *page) | 567 | static int move_to_new_page(struct page *newpage, struct page *page) |
599 | { | 568 | { |
@@ -611,6 +580,8 @@ static int move_to_new_page(struct page *newpage, struct page *page) | |||
611 | /* Prepare mapping for the new page.*/ | 580 | /* Prepare mapping for the new page.*/ |
612 | newpage->index = page->index; | 581 | newpage->index = page->index; |
613 | newpage->mapping = page->mapping; | 582 | newpage->mapping = page->mapping; |
583 | if (PageSwapBacked(page)) | ||
584 | SetPageSwapBacked(newpage); | ||
614 | 585 | ||
615 | mapping = page_mapping(page); | 586 | mapping = page_mapping(page); |
616 | if (!mapping) | 587 | if (!mapping) |
@@ -654,9 +625,10 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
654 | if (!newpage) | 625 | if (!newpage) |
655 | return -ENOMEM; | 626 | return -ENOMEM; |
656 | 627 | ||
657 | if (page_count(page) == 1) | 628 | if (page_count(page) == 1) { |
658 | /* page was freed from under us. So we are done. */ | 629 | /* page was freed from under us. So we are done. */ |
659 | goto move_newpage; | 630 | goto move_newpage; |
631 | } | ||
660 | 632 | ||
661 | charge = mem_cgroup_prepare_migration(page, newpage); | 633 | charge = mem_cgroup_prepare_migration(page, newpage); |
662 | if (charge == -ENOMEM) { | 634 | if (charge == -ENOMEM) { |
@@ -730,7 +702,6 @@ rcu_unlock: | |||
730 | rcu_read_unlock(); | 702 | rcu_read_unlock(); |
731 | 703 | ||
732 | unlock: | 704 | unlock: |
733 | |||
734 | unlock_page(page); | 705 | unlock_page(page); |
735 | 706 | ||
736 | if (rc != -EAGAIN) { | 707 | if (rc != -EAGAIN) { |
@@ -741,17 +712,19 @@ unlock: | |||
741 | * restored. | 712 | * restored. |
742 | */ | 713 | */ |
743 | list_del(&page->lru); | 714 | list_del(&page->lru); |
744 | move_to_lru(page); | 715 | putback_lru_page(page); |
745 | } | 716 | } |
746 | 717 | ||
747 | move_newpage: | 718 | move_newpage: |
748 | if (!charge) | 719 | if (!charge) |
749 | mem_cgroup_end_migration(newpage); | 720 | mem_cgroup_end_migration(newpage); |
721 | |||
750 | /* | 722 | /* |
751 | * Move the new page to the LRU. If migration was not successful | 723 | * Move the new page to the LRU. If migration was not successful |
752 | * then this will free the page. | 724 | * then this will free the page. |
753 | */ | 725 | */ |
754 | move_to_lru(newpage); | 726 | putback_lru_page(newpage); |
727 | |||
755 | if (result) { | 728 | if (result) { |
756 | if (rc) | 729 | if (rc) |
757 | *result = rc; | 730 | *result = rc; |
@@ -858,9 +831,11 @@ static struct page *new_page_node(struct page *p, unsigned long private, | |||
858 | * Move a set of pages as indicated in the pm array. The addr | 831 | * Move a set of pages as indicated in the pm array. The addr |
859 | * field must be set to the virtual address of the page to be moved | 832 | * field must be set to the virtual address of the page to be moved |
860 | * and the node number must contain a valid target node. | 833 | * and the node number must contain a valid target node. |
834 | * The pm array ends with node = MAX_NUMNODES. | ||
861 | */ | 835 | */ |
862 | static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, | 836 | static int do_move_page_to_node_array(struct mm_struct *mm, |
863 | int migrate_all) | 837 | struct page_to_node *pm, |
838 | int migrate_all) | ||
864 | { | 839 | { |
865 | int err; | 840 | int err; |
866 | struct page_to_node *pp; | 841 | struct page_to_node *pp; |
@@ -914,7 +889,9 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, | |||
914 | !migrate_all) | 889 | !migrate_all) |
915 | goto put_and_set; | 890 | goto put_and_set; |
916 | 891 | ||
917 | err = isolate_lru_page(page, &pagelist); | 892 | err = isolate_lru_page(page); |
893 | if (!err) | ||
894 | list_add_tail(&page->lru, &pagelist); | ||
918 | put_and_set: | 895 | put_and_set: |
919 | /* | 896 | /* |
920 | * Either remove the duplicate refcount from | 897 | * Either remove the duplicate refcount from |
@@ -926,36 +903,118 @@ set_status: | |||
926 | pp->status = err; | 903 | pp->status = err; |
927 | } | 904 | } |
928 | 905 | ||
906 | err = 0; | ||
929 | if (!list_empty(&pagelist)) | 907 | if (!list_empty(&pagelist)) |
930 | err = migrate_pages(&pagelist, new_page_node, | 908 | err = migrate_pages(&pagelist, new_page_node, |
931 | (unsigned long)pm); | 909 | (unsigned long)pm); |
932 | else | ||
933 | err = -ENOENT; | ||
934 | 910 | ||
935 | up_read(&mm->mmap_sem); | 911 | up_read(&mm->mmap_sem); |
936 | return err; | 912 | return err; |
937 | } | 913 | } |
938 | 914 | ||
939 | /* | 915 | /* |
940 | * Determine the nodes of a list of pages. The addr in the pm array | 916 | * Migrate an array of page address onto an array of nodes and fill |
941 | * must have been set to the virtual address of which we want to determine | 917 | * the corresponding array of status. |
942 | * the node number. | ||
943 | */ | 918 | */ |
944 | static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) | 919 | static int do_pages_move(struct mm_struct *mm, struct task_struct *task, |
920 | unsigned long nr_pages, | ||
921 | const void __user * __user *pages, | ||
922 | const int __user *nodes, | ||
923 | int __user *status, int flags) | ||
945 | { | 924 | { |
925 | struct page_to_node *pm = NULL; | ||
926 | nodemask_t task_nodes; | ||
927 | int err = 0; | ||
928 | int i; | ||
929 | |||
930 | task_nodes = cpuset_mems_allowed(task); | ||
931 | |||
932 | /* Limit nr_pages so that the multiplication may not overflow */ | ||
933 | if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) { | ||
934 | err = -E2BIG; | ||
935 | goto out; | ||
936 | } | ||
937 | |||
938 | pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node)); | ||
939 | if (!pm) { | ||
940 | err = -ENOMEM; | ||
941 | goto out; | ||
942 | } | ||
943 | |||
944 | /* | ||
945 | * Get parameters from user space and initialize the pm | ||
946 | * array. Return various errors if the user did something wrong. | ||
947 | */ | ||
948 | for (i = 0; i < nr_pages; i++) { | ||
949 | const void __user *p; | ||
950 | |||
951 | err = -EFAULT; | ||
952 | if (get_user(p, pages + i)) | ||
953 | goto out_pm; | ||
954 | |||
955 | pm[i].addr = (unsigned long)p; | ||
956 | if (nodes) { | ||
957 | int node; | ||
958 | |||
959 | if (get_user(node, nodes + i)) | ||
960 | goto out_pm; | ||
961 | |||
962 | err = -ENODEV; | ||
963 | if (!node_state(node, N_HIGH_MEMORY)) | ||
964 | goto out_pm; | ||
965 | |||
966 | err = -EACCES; | ||
967 | if (!node_isset(node, task_nodes)) | ||
968 | goto out_pm; | ||
969 | |||
970 | pm[i].node = node; | ||
971 | } else | ||
972 | pm[i].node = 0; /* anything to not match MAX_NUMNODES */ | ||
973 | } | ||
974 | /* End marker */ | ||
975 | pm[nr_pages].node = MAX_NUMNODES; | ||
976 | |||
977 | err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL); | ||
978 | if (err >= 0) | ||
979 | /* Return status information */ | ||
980 | for (i = 0; i < nr_pages; i++) | ||
981 | if (put_user(pm[i].status, status + i)) | ||
982 | err = -EFAULT; | ||
983 | |||
984 | out_pm: | ||
985 | vfree(pm); | ||
986 | out: | ||
987 | return err; | ||
988 | } | ||
989 | |||
990 | /* | ||
991 | * Determine the nodes of an array of pages and store it in an array of status. | ||
992 | */ | ||
993 | static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, | ||
994 | const void __user * __user *pages, | ||
995 | int __user *status) | ||
996 | { | ||
997 | unsigned long i; | ||
998 | int err; | ||
999 | |||
946 | down_read(&mm->mmap_sem); | 1000 | down_read(&mm->mmap_sem); |
947 | 1001 | ||
948 | for ( ; pm->node != MAX_NUMNODES; pm++) { | 1002 | for (i = 0; i < nr_pages; i++) { |
1003 | const void __user *p; | ||
1004 | unsigned long addr; | ||
949 | struct vm_area_struct *vma; | 1005 | struct vm_area_struct *vma; |
950 | struct page *page; | 1006 | struct page *page; |
951 | int err; | ||
952 | 1007 | ||
953 | err = -EFAULT; | 1008 | err = -EFAULT; |
954 | vma = find_vma(mm, pm->addr); | 1009 | if (get_user(p, pages+i)) |
1010 | goto out; | ||
1011 | addr = (unsigned long) p; | ||
1012 | |||
1013 | vma = find_vma(mm, addr); | ||
955 | if (!vma) | 1014 | if (!vma) |
956 | goto set_status; | 1015 | goto set_status; |
957 | 1016 | ||
958 | page = follow_page(vma, pm->addr, 0); | 1017 | page = follow_page(vma, addr, 0); |
959 | 1018 | ||
960 | err = PTR_ERR(page); | 1019 | err = PTR_ERR(page); |
961 | if (IS_ERR(page)) | 1020 | if (IS_ERR(page)) |
@@ -968,11 +1027,13 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) | |||
968 | 1027 | ||
969 | err = page_to_nid(page); | 1028 | err = page_to_nid(page); |
970 | set_status: | 1029 | set_status: |
971 | pm->status = err; | 1030 | put_user(err, status+i); |
972 | } | 1031 | } |
1032 | err = 0; | ||
973 | 1033 | ||
1034 | out: | ||
974 | up_read(&mm->mmap_sem); | 1035 | up_read(&mm->mmap_sem); |
975 | return 0; | 1036 | return err; |
976 | } | 1037 | } |
977 | 1038 | ||
978 | /* | 1039 | /* |
@@ -984,12 +1045,9 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, | |||
984 | const int __user *nodes, | 1045 | const int __user *nodes, |
985 | int __user *status, int flags) | 1046 | int __user *status, int flags) |
986 | { | 1047 | { |
987 | int err = 0; | ||
988 | int i; | ||
989 | struct task_struct *task; | 1048 | struct task_struct *task; |
990 | nodemask_t task_nodes; | ||
991 | struct mm_struct *mm; | 1049 | struct mm_struct *mm; |
992 | struct page_to_node *pm = NULL; | 1050 | int err; |
993 | 1051 | ||
994 | /* Check flags */ | 1052 | /* Check flags */ |
995 | if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) | 1053 | if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) |
@@ -1021,75 +1079,21 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, | |||
1021 | (current->uid != task->suid) && (current->uid != task->uid) && | 1079 | (current->uid != task->suid) && (current->uid != task->uid) && |
1022 | !capable(CAP_SYS_NICE)) { | 1080 | !capable(CAP_SYS_NICE)) { |
1023 | err = -EPERM; | 1081 | err = -EPERM; |
1024 | goto out2; | 1082 | goto out; |
1025 | } | 1083 | } |
1026 | 1084 | ||
1027 | err = security_task_movememory(task); | 1085 | err = security_task_movememory(task); |
1028 | if (err) | 1086 | if (err) |
1029 | goto out2; | 1087 | goto out; |
1030 | |||
1031 | |||
1032 | task_nodes = cpuset_mems_allowed(task); | ||
1033 | |||
1034 | /* Limit nr_pages so that the multiplication may not overflow */ | ||
1035 | if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) { | ||
1036 | err = -E2BIG; | ||
1037 | goto out2; | ||
1038 | } | ||
1039 | |||
1040 | pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node)); | ||
1041 | if (!pm) { | ||
1042 | err = -ENOMEM; | ||
1043 | goto out2; | ||
1044 | } | ||
1045 | |||
1046 | /* | ||
1047 | * Get parameters from user space and initialize the pm | ||
1048 | * array. Return various errors if the user did something wrong. | ||
1049 | */ | ||
1050 | for (i = 0; i < nr_pages; i++) { | ||
1051 | const void __user *p; | ||
1052 | |||
1053 | err = -EFAULT; | ||
1054 | if (get_user(p, pages + i)) | ||
1055 | goto out; | ||
1056 | |||
1057 | pm[i].addr = (unsigned long)p; | ||
1058 | if (nodes) { | ||
1059 | int node; | ||
1060 | |||
1061 | if (get_user(node, nodes + i)) | ||
1062 | goto out; | ||
1063 | |||
1064 | err = -ENODEV; | ||
1065 | if (!node_state(node, N_HIGH_MEMORY)) | ||
1066 | goto out; | ||
1067 | |||
1068 | err = -EACCES; | ||
1069 | if (!node_isset(node, task_nodes)) | ||
1070 | goto out; | ||
1071 | 1088 | ||
1072 | pm[i].node = node; | 1089 | if (nodes) { |
1073 | } else | 1090 | err = do_pages_move(mm, task, nr_pages, pages, nodes, status, |
1074 | pm[i].node = 0; /* anything to not match MAX_NUMNODES */ | 1091 | flags); |
1092 | } else { | ||
1093 | err = do_pages_stat(mm, nr_pages, pages, status); | ||
1075 | } | 1094 | } |
1076 | /* End marker */ | ||
1077 | pm[nr_pages].node = MAX_NUMNODES; | ||
1078 | |||
1079 | if (nodes) | ||
1080 | err = do_move_pages(mm, pm, flags & MPOL_MF_MOVE_ALL); | ||
1081 | else | ||
1082 | err = do_pages_stat(mm, pm); | ||
1083 | |||
1084 | if (err >= 0) | ||
1085 | /* Return status information */ | ||
1086 | for (i = 0; i < nr_pages; i++) | ||
1087 | if (put_user(pm[i].status, status + i)) | ||
1088 | err = -EFAULT; | ||
1089 | 1095 | ||
1090 | out: | 1096 | out: |
1091 | vfree(pm); | ||
1092 | out2: | ||
1093 | mmput(mm); | 1097 | mmput(mm); |
1094 | return err; | 1098 | return err; |
1095 | } | 1099 | } |