diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 65 |
1 files changed, 49 insertions, 16 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 449d77d409f5..2a80136b23bb 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -9,7 +9,7 @@ | |||
9 | * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> | 9 | * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> |
10 | * Hirokazu Takahashi <taka@valinux.co.jp> | 10 | * Hirokazu Takahashi <taka@valinux.co.jp> |
11 | * Dave Hansen <haveblue@us.ibm.com> | 11 | * Dave Hansen <haveblue@us.ibm.com> |
12 | * Christoph Lameter <clameter@sgi.com> | 12 | * Christoph Lameter |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/migrate.h> | 15 | #include <linux/migrate.h> |
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/vmalloc.h> | 30 | #include <linux/vmalloc.h> |
31 | #include <linux/security.h> | 31 | #include <linux/security.h> |
32 | #include <linux/memcontrol.h> | 32 | #include <linux/memcontrol.h> |
33 | #include <linux/syscalls.h> | ||
33 | 34 | ||
34 | #include "internal.h" | 35 | #include "internal.h" |
35 | 36 | ||
@@ -284,7 +285,15 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, | |||
284 | 285 | ||
285 | page = migration_entry_to_page(entry); | 286 | page = migration_entry_to_page(entry); |
286 | 287 | ||
287 | get_page(page); | 288 | /* |
289 | * Once radix-tree replacement of page migration started, page_count | ||
290 | * *must* be zero. And, we don't want to call wait_on_page_locked() | ||
291 | * against a page without get_page(). | ||
292 | * So, we use get_page_unless_zero(), here. Even failed, page fault | ||
293 | * will occur again. | ||
294 | */ | ||
295 | if (!get_page_unless_zero(page)) | ||
296 | goto out; | ||
288 | pte_unmap_unlock(ptep, ptl); | 297 | pte_unmap_unlock(ptep, ptl); |
289 | wait_on_page_locked(page); | 298 | wait_on_page_locked(page); |
290 | put_page(page); | 299 | put_page(page); |
@@ -304,6 +313,7 @@ out: | |||
304 | static int migrate_page_move_mapping(struct address_space *mapping, | 313 | static int migrate_page_move_mapping(struct address_space *mapping, |
305 | struct page *newpage, struct page *page) | 314 | struct page *newpage, struct page *page) |
306 | { | 315 | { |
316 | int expected_count; | ||
307 | void **pslot; | 317 | void **pslot; |
308 | 318 | ||
309 | if (!mapping) { | 319 | if (!mapping) { |
@@ -313,14 +323,20 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
313 | return 0; | 323 | return 0; |
314 | } | 324 | } |
315 | 325 | ||
316 | write_lock_irq(&mapping->tree_lock); | 326 | spin_lock_irq(&mapping->tree_lock); |
317 | 327 | ||
318 | pslot = radix_tree_lookup_slot(&mapping->page_tree, | 328 | pslot = radix_tree_lookup_slot(&mapping->page_tree, |
319 | page_index(page)); | 329 | page_index(page)); |
320 | 330 | ||
321 | if (page_count(page) != 2 + !!PagePrivate(page) || | 331 | expected_count = 2 + !!PagePrivate(page); |
332 | if (page_count(page) != expected_count || | ||
322 | (struct page *)radix_tree_deref_slot(pslot) != page) { | 333 | (struct page *)radix_tree_deref_slot(pslot) != page) { |
323 | write_unlock_irq(&mapping->tree_lock); | 334 | spin_unlock_irq(&mapping->tree_lock); |
335 | return -EAGAIN; | ||
336 | } | ||
337 | |||
338 | if (!page_freeze_refs(page, expected_count)) { | ||
339 | spin_unlock_irq(&mapping->tree_lock); | ||
324 | return -EAGAIN; | 340 | return -EAGAIN; |
325 | } | 341 | } |
326 | 342 | ||
@@ -337,6 +353,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
337 | 353 | ||
338 | radix_tree_replace_slot(pslot, newpage); | 354 | radix_tree_replace_slot(pslot, newpage); |
339 | 355 | ||
356 | page_unfreeze_refs(page, expected_count); | ||
340 | /* | 357 | /* |
341 | * Drop cache reference from old page. | 358 | * Drop cache reference from old page. |
342 | * We know this isn't the last reference. | 359 | * We know this isn't the last reference. |
@@ -356,7 +373,9 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
356 | __dec_zone_page_state(page, NR_FILE_PAGES); | 373 | __dec_zone_page_state(page, NR_FILE_PAGES); |
357 | __inc_zone_page_state(newpage, NR_FILE_PAGES); | 374 | __inc_zone_page_state(newpage, NR_FILE_PAGES); |
358 | 375 | ||
359 | write_unlock_irq(&mapping->tree_lock); | 376 | spin_unlock_irq(&mapping->tree_lock); |
377 | if (!PageSwapCache(newpage)) | ||
378 | mem_cgroup_uncharge_cache_page(page); | ||
360 | 379 | ||
361 | return 0; | 380 | return 0; |
362 | } | 381 | } |
@@ -586,7 +605,7 @@ static int move_to_new_page(struct page *newpage, struct page *page) | |||
586 | * establishing additional references. We are the only one | 605 | * establishing additional references. We are the only one |
587 | * holding a reference to the new page at this point. | 606 | * holding a reference to the new page at this point. |
588 | */ | 607 | */ |
589 | if (TestSetPageLocked(newpage)) | 608 | if (!trylock_page(newpage)) |
590 | BUG(); | 609 | BUG(); |
591 | 610 | ||
592 | /* Prepare mapping for the new page.*/ | 611 | /* Prepare mapping for the new page.*/ |
@@ -610,7 +629,6 @@ static int move_to_new_page(struct page *newpage, struct page *page) | |||
610 | rc = fallback_migrate_page(mapping, newpage, page); | 629 | rc = fallback_migrate_page(mapping, newpage, page); |
611 | 630 | ||
612 | if (!rc) { | 631 | if (!rc) { |
613 | mem_cgroup_page_migration(page, newpage); | ||
614 | remove_migration_ptes(page, newpage); | 632 | remove_migration_ptes(page, newpage); |
615 | } else | 633 | } else |
616 | newpage->mapping = NULL; | 634 | newpage->mapping = NULL; |
@@ -640,8 +658,16 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
640 | /* page was freed from under us. So we are done. */ | 658 | /* page was freed from under us. So we are done. */ |
641 | goto move_newpage; | 659 | goto move_newpage; |
642 | 660 | ||
661 | charge = mem_cgroup_prepare_migration(page, newpage); | ||
662 | if (charge == -ENOMEM) { | ||
663 | rc = -ENOMEM; | ||
664 | goto move_newpage; | ||
665 | } | ||
666 | /* prepare cgroup just returns 0 or -ENOMEM */ | ||
667 | BUG_ON(charge); | ||
668 | |||
643 | rc = -EAGAIN; | 669 | rc = -EAGAIN; |
644 | if (TestSetPageLocked(page)) { | 670 | if (!trylock_page(page)) { |
645 | if (!force) | 671 | if (!force) |
646 | goto move_newpage; | 672 | goto move_newpage; |
647 | lock_page(page); | 673 | lock_page(page); |
@@ -691,19 +717,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
691 | goto rcu_unlock; | 717 | goto rcu_unlock; |
692 | } | 718 | } |
693 | 719 | ||
694 | charge = mem_cgroup_prepare_migration(page); | ||
695 | /* Establish migration ptes or remove ptes */ | 720 | /* Establish migration ptes or remove ptes */ |
696 | try_to_unmap(page, 1); | 721 | try_to_unmap(page, 1); |
697 | 722 | ||
698 | if (!page_mapped(page)) | 723 | if (!page_mapped(page)) |
699 | rc = move_to_new_page(newpage, page); | 724 | rc = move_to_new_page(newpage, page); |
700 | 725 | ||
701 | if (rc) { | 726 | if (rc) |
702 | remove_migration_ptes(page, page); | 727 | remove_migration_ptes(page, page); |
703 | if (charge) | ||
704 | mem_cgroup_end_migration(page); | ||
705 | } else if (charge) | ||
706 | mem_cgroup_end_migration(newpage); | ||
707 | rcu_unlock: | 728 | rcu_unlock: |
708 | if (rcu_locked) | 729 | if (rcu_locked) |
709 | rcu_read_unlock(); | 730 | rcu_read_unlock(); |
@@ -724,6 +745,8 @@ unlock: | |||
724 | } | 745 | } |
725 | 746 | ||
726 | move_newpage: | 747 | move_newpage: |
748 | if (!charge) | ||
749 | mem_cgroup_end_migration(newpage); | ||
727 | /* | 750 | /* |
728 | * Move the new page to the LRU. If migration was not successful | 751 | * Move the new page to the LRU. If migration was not successful |
729 | * then this will free the page. | 752 | * then this will free the page. |
@@ -865,6 +888,11 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, | |||
865 | goto set_status; | 888 | goto set_status; |
866 | 889 | ||
867 | page = follow_page(vma, pp->addr, FOLL_GET); | 890 | page = follow_page(vma, pp->addr, FOLL_GET); |
891 | |||
892 | err = PTR_ERR(page); | ||
893 | if (IS_ERR(page)) | ||
894 | goto set_status; | ||
895 | |||
868 | err = -ENOENT; | 896 | err = -ENOENT; |
869 | if (!page) | 897 | if (!page) |
870 | goto set_status; | 898 | goto set_status; |
@@ -928,6 +956,11 @@ static int do_pages_stat(struct mm_struct *mm, struct page_to_node *pm) | |||
928 | goto set_status; | 956 | goto set_status; |
929 | 957 | ||
930 | page = follow_page(vma, pm->addr, 0); | 958 | page = follow_page(vma, pm->addr, 0); |
959 | |||
960 | err = PTR_ERR(page); | ||
961 | if (IS_ERR(page)) | ||
962 | goto set_status; | ||
963 | |||
931 | err = -ENOENT; | 964 | err = -ENOENT; |
932 | /* Use PageReserved to check for zero page */ | 965 | /* Use PageReserved to check for zero page */ |
933 | if (!page || PageReserved(page)) | 966 | if (!page || PageReserved(page)) |
@@ -1060,7 +1093,6 @@ out2: | |||
1060 | mmput(mm); | 1093 | mmput(mm); |
1061 | return err; | 1094 | return err; |
1062 | } | 1095 | } |
1063 | #endif | ||
1064 | 1096 | ||
1065 | /* | 1097 | /* |
1066 | * Call migration functions in the vma_ops that may prepare | 1098 | * Call migration functions in the vma_ops that may prepare |
@@ -1082,3 +1114,4 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to, | |||
1082 | } | 1114 | } |
1083 | return err; | 1115 | return err; |
1084 | } | 1116 | } |
1117 | #endif | ||