diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 350 |
1 files changed, 199 insertions, 151 deletions
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/swapops.h> | 49 | #include <linux/swapops.h> |
50 | #include <linux/slab.h> | 50 | #include <linux/slab.h> |
51 | #include <linux/init.h> | 51 | #include <linux/init.h> |
52 | #include <linux/ksm.h> | ||
52 | #include <linux/rmap.h> | 53 | #include <linux/rmap.h> |
53 | #include <linux/rcupdate.h> | 54 | #include <linux/rcupdate.h> |
54 | #include <linux/module.h> | 55 | #include <linux/module.h> |
@@ -67,7 +68,7 @@ static inline struct anon_vma *anon_vma_alloc(void) | |||
67 | return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); | 68 | return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); |
68 | } | 69 | } |
69 | 70 | ||
70 | static inline void anon_vma_free(struct anon_vma *anon_vma) | 71 | void anon_vma_free(struct anon_vma *anon_vma) |
71 | { | 72 | { |
72 | kmem_cache_free(anon_vma_cachep, anon_vma); | 73 | kmem_cache_free(anon_vma_cachep, anon_vma); |
73 | } | 74 | } |
@@ -171,7 +172,7 @@ void anon_vma_unlink(struct vm_area_struct *vma) | |||
171 | list_del(&vma->anon_vma_node); | 172 | list_del(&vma->anon_vma_node); |
172 | 173 | ||
173 | /* We must garbage collect the anon_vma if it's empty */ | 174 | /* We must garbage collect the anon_vma if it's empty */ |
174 | empty = list_empty(&anon_vma->head); | 175 | empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma); |
175 | spin_unlock(&anon_vma->lock); | 176 | spin_unlock(&anon_vma->lock); |
176 | 177 | ||
177 | if (empty) | 178 | if (empty) |
@@ -183,6 +184,7 @@ static void anon_vma_ctor(void *data) | |||
183 | struct anon_vma *anon_vma = data; | 184 | struct anon_vma *anon_vma = data; |
184 | 185 | ||
185 | spin_lock_init(&anon_vma->lock); | 186 | spin_lock_init(&anon_vma->lock); |
187 | ksm_refcount_init(anon_vma); | ||
186 | INIT_LIST_HEAD(&anon_vma->head); | 188 | INIT_LIST_HEAD(&anon_vma->head); |
187 | } | 189 | } |
188 | 190 | ||
@@ -202,8 +204,8 @@ struct anon_vma *page_lock_anon_vma(struct page *page) | |||
202 | unsigned long anon_mapping; | 204 | unsigned long anon_mapping; |
203 | 205 | ||
204 | rcu_read_lock(); | 206 | rcu_read_lock(); |
205 | anon_mapping = (unsigned long) page->mapping; | 207 | anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping); |
206 | if (!(anon_mapping & PAGE_MAPPING_ANON)) | 208 | if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) |
207 | goto out; | 209 | goto out; |
208 | if (!page_mapped(page)) | 210 | if (!page_mapped(page)) |
209 | goto out; | 211 | goto out; |
@@ -248,8 +250,7 @@ vma_address(struct page *page, struct vm_area_struct *vma) | |||
248 | unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) | 250 | unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) |
249 | { | 251 | { |
250 | if (PageAnon(page)) { | 252 | if (PageAnon(page)) { |
251 | if ((void *)vma->anon_vma != | 253 | if (vma->anon_vma != page_anon_vma(page)) |
252 | (void *)page->mapping - PAGE_MAPPING_ANON) | ||
253 | return -EFAULT; | 254 | return -EFAULT; |
254 | } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { | 255 | } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { |
255 | if (!vma->vm_file || | 256 | if (!vma->vm_file || |
@@ -337,21 +338,15 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) | |||
337 | * Subfunctions of page_referenced: page_referenced_one called | 338 | * Subfunctions of page_referenced: page_referenced_one called |
338 | * repeatedly from either page_referenced_anon or page_referenced_file. | 339 | * repeatedly from either page_referenced_anon or page_referenced_file. |
339 | */ | 340 | */ |
340 | static int page_referenced_one(struct page *page, | 341 | int page_referenced_one(struct page *page, struct vm_area_struct *vma, |
341 | struct vm_area_struct *vma, | 342 | unsigned long address, unsigned int *mapcount, |
342 | unsigned int *mapcount, | 343 | unsigned long *vm_flags) |
343 | unsigned long *vm_flags) | ||
344 | { | 344 | { |
345 | struct mm_struct *mm = vma->vm_mm; | 345 | struct mm_struct *mm = vma->vm_mm; |
346 | unsigned long address; | ||
347 | pte_t *pte; | 346 | pte_t *pte; |
348 | spinlock_t *ptl; | 347 | spinlock_t *ptl; |
349 | int referenced = 0; | 348 | int referenced = 0; |
350 | 349 | ||
351 | address = vma_address(page, vma); | ||
352 | if (address == -EFAULT) | ||
353 | goto out; | ||
354 | |||
355 | pte = page_check_address(page, mm, address, &ptl, 0); | 350 | pte = page_check_address(page, mm, address, &ptl, 0); |
356 | if (!pte) | 351 | if (!pte) |
357 | goto out; | 352 | goto out; |
@@ -388,9 +383,10 @@ static int page_referenced_one(struct page *page, | |||
388 | out_unmap: | 383 | out_unmap: |
389 | (*mapcount)--; | 384 | (*mapcount)--; |
390 | pte_unmap_unlock(pte, ptl); | 385 | pte_unmap_unlock(pte, ptl); |
391 | out: | 386 | |
392 | if (referenced) | 387 | if (referenced) |
393 | *vm_flags |= vma->vm_flags; | 388 | *vm_flags |= vma->vm_flags; |
389 | out: | ||
394 | return referenced; | 390 | return referenced; |
395 | } | 391 | } |
396 | 392 | ||
@@ -409,6 +405,9 @@ static int page_referenced_anon(struct page *page, | |||
409 | 405 | ||
410 | mapcount = page_mapcount(page); | 406 | mapcount = page_mapcount(page); |
411 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 407 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { |
408 | unsigned long address = vma_address(page, vma); | ||
409 | if (address == -EFAULT) | ||
410 | continue; | ||
412 | /* | 411 | /* |
413 | * If we are reclaiming on behalf of a cgroup, skip | 412 | * If we are reclaiming on behalf of a cgroup, skip |
414 | * counting on behalf of references from different | 413 | * counting on behalf of references from different |
@@ -416,7 +415,7 @@ static int page_referenced_anon(struct page *page, | |||
416 | */ | 415 | */ |
417 | if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) | 416 | if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) |
418 | continue; | 417 | continue; |
419 | referenced += page_referenced_one(page, vma, | 418 | referenced += page_referenced_one(page, vma, address, |
420 | &mapcount, vm_flags); | 419 | &mapcount, vm_flags); |
421 | if (!mapcount) | 420 | if (!mapcount) |
422 | break; | 421 | break; |
@@ -474,6 +473,9 @@ static int page_referenced_file(struct page *page, | |||
474 | mapcount = page_mapcount(page); | 473 | mapcount = page_mapcount(page); |
475 | 474 | ||
476 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 475 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
476 | unsigned long address = vma_address(page, vma); | ||
477 | if (address == -EFAULT) | ||
478 | continue; | ||
477 | /* | 479 | /* |
478 | * If we are reclaiming on behalf of a cgroup, skip | 480 | * If we are reclaiming on behalf of a cgroup, skip |
479 | * counting on behalf of references from different | 481 | * counting on behalf of references from different |
@@ -481,7 +483,7 @@ static int page_referenced_file(struct page *page, | |||
481 | */ | 483 | */ |
482 | if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) | 484 | if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) |
483 | continue; | 485 | continue; |
484 | referenced += page_referenced_one(page, vma, | 486 | referenced += page_referenced_one(page, vma, address, |
485 | &mapcount, vm_flags); | 487 | &mapcount, vm_flags); |
486 | if (!mapcount) | 488 | if (!mapcount) |
487 | break; | 489 | break; |
@@ -507,46 +509,47 @@ int page_referenced(struct page *page, | |||
507 | unsigned long *vm_flags) | 509 | unsigned long *vm_flags) |
508 | { | 510 | { |
509 | int referenced = 0; | 511 | int referenced = 0; |
512 | int we_locked = 0; | ||
510 | 513 | ||
511 | if (TestClearPageReferenced(page)) | 514 | if (TestClearPageReferenced(page)) |
512 | referenced++; | 515 | referenced++; |
513 | 516 | ||
514 | *vm_flags = 0; | 517 | *vm_flags = 0; |
515 | if (page_mapped(page) && page->mapping) { | 518 | if (page_mapped(page) && page_rmapping(page)) { |
516 | if (PageAnon(page)) | 519 | if (!is_locked && (!PageAnon(page) || PageKsm(page))) { |
520 | we_locked = trylock_page(page); | ||
521 | if (!we_locked) { | ||
522 | referenced++; | ||
523 | goto out; | ||
524 | } | ||
525 | } | ||
526 | if (unlikely(PageKsm(page))) | ||
527 | referenced += page_referenced_ksm(page, mem_cont, | ||
528 | vm_flags); | ||
529 | else if (PageAnon(page)) | ||
517 | referenced += page_referenced_anon(page, mem_cont, | 530 | referenced += page_referenced_anon(page, mem_cont, |
518 | vm_flags); | 531 | vm_flags); |
519 | else if (is_locked) | 532 | else if (page->mapping) |
520 | referenced += page_referenced_file(page, mem_cont, | 533 | referenced += page_referenced_file(page, mem_cont, |
521 | vm_flags); | 534 | vm_flags); |
522 | else if (!trylock_page(page)) | 535 | if (we_locked) |
523 | referenced++; | ||
524 | else { | ||
525 | if (page->mapping) | ||
526 | referenced += page_referenced_file(page, | ||
527 | mem_cont, vm_flags); | ||
528 | unlock_page(page); | 536 | unlock_page(page); |
529 | } | ||
530 | } | 537 | } |
531 | 538 | out: | |
532 | if (page_test_and_clear_young(page)) | 539 | if (page_test_and_clear_young(page)) |
533 | referenced++; | 540 | referenced++; |
534 | 541 | ||
535 | return referenced; | 542 | return referenced; |
536 | } | 543 | } |
537 | 544 | ||
538 | static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) | 545 | static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, |
546 | unsigned long address) | ||
539 | { | 547 | { |
540 | struct mm_struct *mm = vma->vm_mm; | 548 | struct mm_struct *mm = vma->vm_mm; |
541 | unsigned long address; | ||
542 | pte_t *pte; | 549 | pte_t *pte; |
543 | spinlock_t *ptl; | 550 | spinlock_t *ptl; |
544 | int ret = 0; | 551 | int ret = 0; |
545 | 552 | ||
546 | address = vma_address(page, vma); | ||
547 | if (address == -EFAULT) | ||
548 | goto out; | ||
549 | |||
550 | pte = page_check_address(page, mm, address, &ptl, 1); | 553 | pte = page_check_address(page, mm, address, &ptl, 1); |
551 | if (!pte) | 554 | if (!pte) |
552 | goto out; | 555 | goto out; |
@@ -578,8 +581,12 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page) | |||
578 | 581 | ||
579 | spin_lock(&mapping->i_mmap_lock); | 582 | spin_lock(&mapping->i_mmap_lock); |
580 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 583 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
581 | if (vma->vm_flags & VM_SHARED) | 584 | if (vma->vm_flags & VM_SHARED) { |
582 | ret += page_mkclean_one(page, vma); | 585 | unsigned long address = vma_address(page, vma); |
586 | if (address == -EFAULT) | ||
587 | continue; | ||
588 | ret += page_mkclean_one(page, vma, address); | ||
589 | } | ||
583 | } | 590 | } |
584 | spin_unlock(&mapping->i_mmap_lock); | 591 | spin_unlock(&mapping->i_mmap_lock); |
585 | return ret; | 592 | return ret; |
@@ -620,14 +627,7 @@ static void __page_set_anon_rmap(struct page *page, | |||
620 | BUG_ON(!anon_vma); | 627 | BUG_ON(!anon_vma); |
621 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; | 628 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; |
622 | page->mapping = (struct address_space *) anon_vma; | 629 | page->mapping = (struct address_space *) anon_vma; |
623 | |||
624 | page->index = linear_page_index(vma, address); | 630 | page->index = linear_page_index(vma, address); |
625 | |||
626 | /* | ||
627 | * nr_mapped state can be updated without turning off | ||
628 | * interrupts because it is not modified via interrupt. | ||
629 | */ | ||
630 | __inc_zone_page_state(page, NR_ANON_PAGES); | ||
631 | } | 631 | } |
632 | 632 | ||
633 | /** | 633 | /** |
@@ -665,14 +665,23 @@ static void __page_check_anon_rmap(struct page *page, | |||
665 | * @vma: the vm area in which the mapping is added | 665 | * @vma: the vm area in which the mapping is added |
666 | * @address: the user virtual address mapped | 666 | * @address: the user virtual address mapped |
667 | * | 667 | * |
668 | * The caller needs to hold the pte lock and the page must be locked. | 668 | * The caller needs to hold the pte lock, and the page must be locked in |
669 | * the anon_vma case: to serialize mapping,index checking after setting, | ||
670 | * and to ensure that PageAnon is not being upgraded racily to PageKsm | ||
671 | * (but PageKsm is never downgraded to PageAnon). | ||
669 | */ | 672 | */ |
670 | void page_add_anon_rmap(struct page *page, | 673 | void page_add_anon_rmap(struct page *page, |
671 | struct vm_area_struct *vma, unsigned long address) | 674 | struct vm_area_struct *vma, unsigned long address) |
672 | { | 675 | { |
676 | int first = atomic_inc_and_test(&page->_mapcount); | ||
677 | if (first) | ||
678 | __inc_zone_page_state(page, NR_ANON_PAGES); | ||
679 | if (unlikely(PageKsm(page))) | ||
680 | return; | ||
681 | |||
673 | VM_BUG_ON(!PageLocked(page)); | 682 | VM_BUG_ON(!PageLocked(page)); |
674 | VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); | 683 | VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); |
675 | if (atomic_inc_and_test(&page->_mapcount)) | 684 | if (first) |
676 | __page_set_anon_rmap(page, vma, address); | 685 | __page_set_anon_rmap(page, vma, address); |
677 | else | 686 | else |
678 | __page_check_anon_rmap(page, vma, address); | 687 | __page_check_anon_rmap(page, vma, address); |
@@ -694,6 +703,7 @@ void page_add_new_anon_rmap(struct page *page, | |||
694 | VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); | 703 | VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); |
695 | SetPageSwapBacked(page); | 704 | SetPageSwapBacked(page); |
696 | atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ | 705 | atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ |
706 | __inc_zone_page_state(page, NR_ANON_PAGES); | ||
697 | __page_set_anon_rmap(page, vma, address); | 707 | __page_set_anon_rmap(page, vma, address); |
698 | if (page_evictable(page, vma)) | 708 | if (page_evictable(page, vma)) |
699 | lru_cache_add_lru(page, LRU_ACTIVE_ANON); | 709 | lru_cache_add_lru(page, LRU_ACTIVE_ANON); |
@@ -760,20 +770,15 @@ void page_remove_rmap(struct page *page) | |||
760 | * Subfunctions of try_to_unmap: try_to_unmap_one called | 770 | * Subfunctions of try_to_unmap: try_to_unmap_one called |
761 | * repeatedly from either try_to_unmap_anon or try_to_unmap_file. | 771 | * repeatedly from either try_to_unmap_anon or try_to_unmap_file. |
762 | */ | 772 | */ |
763 | static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | 773 | int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, |
764 | enum ttu_flags flags) | 774 | unsigned long address, enum ttu_flags flags) |
765 | { | 775 | { |
766 | struct mm_struct *mm = vma->vm_mm; | 776 | struct mm_struct *mm = vma->vm_mm; |
767 | unsigned long address; | ||
768 | pte_t *pte; | 777 | pte_t *pte; |
769 | pte_t pteval; | 778 | pte_t pteval; |
770 | spinlock_t *ptl; | 779 | spinlock_t *ptl; |
771 | int ret = SWAP_AGAIN; | 780 | int ret = SWAP_AGAIN; |
772 | 781 | ||
773 | address = vma_address(page, vma); | ||
774 | if (address == -EFAULT) | ||
775 | goto out; | ||
776 | |||
777 | pte = page_check_address(page, mm, address, &ptl, 0); | 782 | pte = page_check_address(page, mm, address, &ptl, 0); |
778 | if (!pte) | 783 | if (!pte) |
779 | goto out; | 784 | goto out; |
@@ -784,10 +789,11 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
784 | * skipped over this mm) then we should reactivate it. | 789 | * skipped over this mm) then we should reactivate it. |
785 | */ | 790 | */ |
786 | if (!(flags & TTU_IGNORE_MLOCK)) { | 791 | if (!(flags & TTU_IGNORE_MLOCK)) { |
787 | if (vma->vm_flags & VM_LOCKED) { | 792 | if (vma->vm_flags & VM_LOCKED) |
788 | ret = SWAP_MLOCK; | 793 | goto out_mlock; |
794 | |||
795 | if (TTU_ACTION(flags) == TTU_MUNLOCK) | ||
789 | goto out_unmap; | 796 | goto out_unmap; |
790 | } | ||
791 | } | 797 | } |
792 | if (!(flags & TTU_IGNORE_ACCESS)) { | 798 | if (!(flags & TTU_IGNORE_ACCESS)) { |
793 | if (ptep_clear_flush_young_notify(vma, address, pte)) { | 799 | if (ptep_clear_flush_young_notify(vma, address, pte)) { |
@@ -822,7 +828,11 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
822 | * Store the swap location in the pte. | 828 | * Store the swap location in the pte. |
823 | * See handle_pte_fault() ... | 829 | * See handle_pte_fault() ... |
824 | */ | 830 | */ |
825 | swap_duplicate(entry); | 831 | if (swap_duplicate(entry) < 0) { |
832 | set_pte_at(mm, address, pte, pteval); | ||
833 | ret = SWAP_FAIL; | ||
834 | goto out_unmap; | ||
835 | } | ||
826 | if (list_empty(&mm->mmlist)) { | 836 | if (list_empty(&mm->mmlist)) { |
827 | spin_lock(&mmlist_lock); | 837 | spin_lock(&mmlist_lock); |
828 | if (list_empty(&mm->mmlist)) | 838 | if (list_empty(&mm->mmlist)) |
@@ -849,7 +859,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | |||
849 | } else | 859 | } else |
850 | dec_mm_counter(mm, file_rss); | 860 | dec_mm_counter(mm, file_rss); |
851 | 861 | ||
852 | |||
853 | page_remove_rmap(page); | 862 | page_remove_rmap(page); |
854 | page_cache_release(page); | 863 | page_cache_release(page); |
855 | 864 | ||
@@ -857,6 +866,27 @@ out_unmap: | |||
857 | pte_unmap_unlock(pte, ptl); | 866 | pte_unmap_unlock(pte, ptl); |
858 | out: | 867 | out: |
859 | return ret; | 868 | return ret; |
869 | |||
870 | out_mlock: | ||
871 | pte_unmap_unlock(pte, ptl); | ||
872 | |||
873 | |||
874 | /* | ||
875 | * We need mmap_sem locking, Otherwise VM_LOCKED check makes | ||
876 | * unstable result and race. Plus, We can't wait here because | ||
877 | * we now hold anon_vma->lock or mapping->i_mmap_lock. | ||
878 | * if trylock failed, the page remain in evictable lru and later | ||
879 | * vmscan could retry to move the page to unevictable lru if the | ||
880 | * page is actually mlocked. | ||
881 | */ | ||
882 | if (down_read_trylock(&vma->vm_mm->mmap_sem)) { | ||
883 | if (vma->vm_flags & VM_LOCKED) { | ||
884 | mlock_vma_page(page); | ||
885 | ret = SWAP_MLOCK; | ||
886 | } | ||
887 | up_read(&vma->vm_mm->mmap_sem); | ||
888 | } | ||
889 | return ret; | ||
860 | } | 890 | } |
861 | 891 | ||
862 | /* | 892 | /* |
@@ -922,11 +952,10 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, | |||
922 | return ret; | 952 | return ret; |
923 | 953 | ||
924 | /* | 954 | /* |
925 | * MLOCK_PAGES => feature is configured. | 955 | * If we can acquire the mmap_sem for read, and vma is VM_LOCKED, |
926 | * if we can acquire the mmap_sem for read, and vma is VM_LOCKED, | ||
927 | * keep the sem while scanning the cluster for mlocking pages. | 956 | * keep the sem while scanning the cluster for mlocking pages. |
928 | */ | 957 | */ |
929 | if (MLOCK_PAGES && down_read_trylock(&vma->vm_mm->mmap_sem)) { | 958 | if (down_read_trylock(&vma->vm_mm->mmap_sem)) { |
930 | locked_vma = (vma->vm_flags & VM_LOCKED); | 959 | locked_vma = (vma->vm_flags & VM_LOCKED); |
931 | if (!locked_vma) | 960 | if (!locked_vma) |
932 | up_read(&vma->vm_mm->mmap_sem); /* don't need it */ | 961 | up_read(&vma->vm_mm->mmap_sem); /* don't need it */ |
@@ -976,29 +1005,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, | |||
976 | return ret; | 1005 | return ret; |
977 | } | 1006 | } |
978 | 1007 | ||
979 | /* | ||
980 | * common handling for pages mapped in VM_LOCKED vmas | ||
981 | */ | ||
982 | static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma) | ||
983 | { | ||
984 | int mlocked = 0; | ||
985 | |||
986 | if (down_read_trylock(&vma->vm_mm->mmap_sem)) { | ||
987 | if (vma->vm_flags & VM_LOCKED) { | ||
988 | mlock_vma_page(page); | ||
989 | mlocked++; /* really mlocked the page */ | ||
990 | } | ||
991 | up_read(&vma->vm_mm->mmap_sem); | ||
992 | } | ||
993 | return mlocked; | ||
994 | } | ||
995 | |||
996 | /** | 1008 | /** |
997 | * try_to_unmap_anon - unmap or unlock anonymous page using the object-based | 1009 | * try_to_unmap_anon - unmap or unlock anonymous page using the object-based |
998 | * rmap method | 1010 | * rmap method |
999 | * @page: the page to unmap/unlock | 1011 | * @page: the page to unmap/unlock |
1000 | * @unlock: request for unlock rather than unmap [unlikely] | 1012 | * @flags: action and flags |
1001 | * @migration: unmapping for migration - ignored if @unlock | ||
1002 | * | 1013 | * |
1003 | * Find all the mappings of a page using the mapping pointer and the vma chains | 1014 | * Find all the mappings of a page using the mapping pointer and the vma chains |
1004 | * contained in the anon_vma struct it points to. | 1015 | * contained in the anon_vma struct it points to. |
@@ -1014,42 +1025,22 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) | |||
1014 | { | 1025 | { |
1015 | struct anon_vma *anon_vma; | 1026 | struct anon_vma *anon_vma; |
1016 | struct vm_area_struct *vma; | 1027 | struct vm_area_struct *vma; |
1017 | unsigned int mlocked = 0; | ||
1018 | int ret = SWAP_AGAIN; | 1028 | int ret = SWAP_AGAIN; |
1019 | int unlock = TTU_ACTION(flags) == TTU_MUNLOCK; | ||
1020 | |||
1021 | if (MLOCK_PAGES && unlikely(unlock)) | ||
1022 | ret = SWAP_SUCCESS; /* default for try_to_munlock() */ | ||
1023 | 1029 | ||
1024 | anon_vma = page_lock_anon_vma(page); | 1030 | anon_vma = page_lock_anon_vma(page); |
1025 | if (!anon_vma) | 1031 | if (!anon_vma) |
1026 | return ret; | 1032 | return ret; |
1027 | 1033 | ||
1028 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 1034 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { |
1029 | if (MLOCK_PAGES && unlikely(unlock)) { | 1035 | unsigned long address = vma_address(page, vma); |
1030 | if (!((vma->vm_flags & VM_LOCKED) && | 1036 | if (address == -EFAULT) |
1031 | page_mapped_in_vma(page, vma))) | 1037 | continue; |
1032 | continue; /* must visit all unlocked vmas */ | 1038 | ret = try_to_unmap_one(page, vma, address, flags); |
1033 | ret = SWAP_MLOCK; /* saw at least one mlocked vma */ | 1039 | if (ret != SWAP_AGAIN || !page_mapped(page)) |
1034 | } else { | 1040 | break; |
1035 | ret = try_to_unmap_one(page, vma, flags); | ||
1036 | if (ret == SWAP_FAIL || !page_mapped(page)) | ||
1037 | break; | ||
1038 | } | ||
1039 | if (ret == SWAP_MLOCK) { | ||
1040 | mlocked = try_to_mlock_page(page, vma); | ||
1041 | if (mlocked) | ||
1042 | break; /* stop if actually mlocked page */ | ||
1043 | } | ||
1044 | } | 1041 | } |
1045 | 1042 | ||
1046 | page_unlock_anon_vma(anon_vma); | 1043 | page_unlock_anon_vma(anon_vma); |
1047 | |||
1048 | if (mlocked) | ||
1049 | ret = SWAP_MLOCK; /* actually mlocked the page */ | ||
1050 | else if (ret == SWAP_MLOCK) | ||
1051 | ret = SWAP_AGAIN; /* saw VM_LOCKED vma */ | ||
1052 | |||
1053 | return ret; | 1044 | return ret; |
1054 | } | 1045 | } |
1055 | 1046 | ||
@@ -1079,48 +1070,30 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | |||
1079 | unsigned long max_nl_cursor = 0; | 1070 | unsigned long max_nl_cursor = 0; |
1080 | unsigned long max_nl_size = 0; | 1071 | unsigned long max_nl_size = 0; |
1081 | unsigned int mapcount; | 1072 | unsigned int mapcount; |
1082 | unsigned int mlocked = 0; | ||
1083 | int unlock = TTU_ACTION(flags) == TTU_MUNLOCK; | ||
1084 | |||
1085 | if (MLOCK_PAGES && unlikely(unlock)) | ||
1086 | ret = SWAP_SUCCESS; /* default for try_to_munlock() */ | ||
1087 | 1073 | ||
1088 | spin_lock(&mapping->i_mmap_lock); | 1074 | spin_lock(&mapping->i_mmap_lock); |
1089 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 1075 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
1090 | if (MLOCK_PAGES && unlikely(unlock)) { | 1076 | unsigned long address = vma_address(page, vma); |
1091 | if (!((vma->vm_flags & VM_LOCKED) && | 1077 | if (address == -EFAULT) |
1092 | page_mapped_in_vma(page, vma))) | 1078 | continue; |
1093 | continue; /* must visit all vmas */ | 1079 | ret = try_to_unmap_one(page, vma, address, flags); |
1094 | ret = SWAP_MLOCK; | 1080 | if (ret != SWAP_AGAIN || !page_mapped(page)) |
1095 | } else { | 1081 | goto out; |
1096 | ret = try_to_unmap_one(page, vma, flags); | ||
1097 | if (ret == SWAP_FAIL || !page_mapped(page)) | ||
1098 | goto out; | ||
1099 | } | ||
1100 | if (ret == SWAP_MLOCK) { | ||
1101 | mlocked = try_to_mlock_page(page, vma); | ||
1102 | if (mlocked) | ||
1103 | break; /* stop if actually mlocked page */ | ||
1104 | } | ||
1105 | } | 1082 | } |
1106 | 1083 | ||
1107 | if (mlocked) | 1084 | if (list_empty(&mapping->i_mmap_nonlinear)) |
1108 | goto out; | 1085 | goto out; |
1109 | 1086 | ||
1110 | if (list_empty(&mapping->i_mmap_nonlinear)) | 1087 | /* |
1088 | * We don't bother to try to find the munlocked page in nonlinears. | ||
1089 | * It's costly. Instead, later, page reclaim logic may call | ||
1090 | * try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily. | ||
1091 | */ | ||
1092 | if (TTU_ACTION(flags) == TTU_MUNLOCK) | ||
1111 | goto out; | 1093 | goto out; |
1112 | 1094 | ||
1113 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 1095 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, |
1114 | shared.vm_set.list) { | 1096 | shared.vm_set.list) { |
1115 | if (MLOCK_PAGES && unlikely(unlock)) { | ||
1116 | if (!(vma->vm_flags & VM_LOCKED)) | ||
1117 | continue; /* must visit all vmas */ | ||
1118 | ret = SWAP_MLOCK; /* leave mlocked == 0 */ | ||
1119 | goto out; /* no need to look further */ | ||
1120 | } | ||
1121 | if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) && | ||
1122 | (vma->vm_flags & VM_LOCKED)) | ||
1123 | continue; | ||
1124 | cursor = (unsigned long) vma->vm_private_data; | 1097 | cursor = (unsigned long) vma->vm_private_data; |
1125 | if (cursor > max_nl_cursor) | 1098 | if (cursor > max_nl_cursor) |
1126 | max_nl_cursor = cursor; | 1099 | max_nl_cursor = cursor; |
@@ -1153,16 +1126,12 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | |||
1153 | do { | 1126 | do { |
1154 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 1127 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, |
1155 | shared.vm_set.list) { | 1128 | shared.vm_set.list) { |
1156 | if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) && | ||
1157 | (vma->vm_flags & VM_LOCKED)) | ||
1158 | continue; | ||
1159 | cursor = (unsigned long) vma->vm_private_data; | 1129 | cursor = (unsigned long) vma->vm_private_data; |
1160 | while ( cursor < max_nl_cursor && | 1130 | while ( cursor < max_nl_cursor && |
1161 | cursor < vma->vm_end - vma->vm_start) { | 1131 | cursor < vma->vm_end - vma->vm_start) { |
1162 | ret = try_to_unmap_cluster(cursor, &mapcount, | 1132 | if (try_to_unmap_cluster(cursor, &mapcount, |
1163 | vma, page); | 1133 | vma, page) == SWAP_MLOCK) |
1164 | if (ret == SWAP_MLOCK) | 1134 | ret = SWAP_MLOCK; |
1165 | mlocked = 2; /* to return below */ | ||
1166 | cursor += CLUSTER_SIZE; | 1135 | cursor += CLUSTER_SIZE; |
1167 | vma->vm_private_data = (void *) cursor; | 1136 | vma->vm_private_data = (void *) cursor; |
1168 | if ((int)mapcount <= 0) | 1137 | if ((int)mapcount <= 0) |
@@ -1183,10 +1152,6 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | |||
1183 | vma->vm_private_data = NULL; | 1152 | vma->vm_private_data = NULL; |
1184 | out: | 1153 | out: |
1185 | spin_unlock(&mapping->i_mmap_lock); | 1154 | spin_unlock(&mapping->i_mmap_lock); |
1186 | if (mlocked) | ||
1187 | ret = SWAP_MLOCK; /* actually mlocked the page */ | ||
1188 | else if (ret == SWAP_MLOCK) | ||
1189 | ret = SWAP_AGAIN; /* saw VM_LOCKED vma */ | ||
1190 | return ret; | 1155 | return ret; |
1191 | } | 1156 | } |
1192 | 1157 | ||
@@ -1210,7 +1175,9 @@ int try_to_unmap(struct page *page, enum ttu_flags flags) | |||
1210 | 1175 | ||
1211 | BUG_ON(!PageLocked(page)); | 1176 | BUG_ON(!PageLocked(page)); |
1212 | 1177 | ||
1213 | if (PageAnon(page)) | 1178 | if (unlikely(PageKsm(page))) |
1179 | ret = try_to_unmap_ksm(page, flags); | ||
1180 | else if (PageAnon(page)) | ||
1214 | ret = try_to_unmap_anon(page, flags); | 1181 | ret = try_to_unmap_anon(page, flags); |
1215 | else | 1182 | else |
1216 | ret = try_to_unmap_file(page, flags); | 1183 | ret = try_to_unmap_file(page, flags); |
@@ -1229,17 +1196,98 @@ int try_to_unmap(struct page *page, enum ttu_flags flags) | |||
1229 | * | 1196 | * |
1230 | * Return values are: | 1197 | * Return values are: |
1231 | * | 1198 | * |
1232 | * SWAP_SUCCESS - no vma's holding page mlocked. | 1199 | * SWAP_AGAIN - no vma is holding page mlocked, or, |
1233 | * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem | 1200 | * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem |
1201 | * SWAP_FAIL - page cannot be located at present | ||
1234 | * SWAP_MLOCK - page is now mlocked. | 1202 | * SWAP_MLOCK - page is now mlocked. |
1235 | */ | 1203 | */ |
1236 | int try_to_munlock(struct page *page) | 1204 | int try_to_munlock(struct page *page) |
1237 | { | 1205 | { |
1238 | VM_BUG_ON(!PageLocked(page) || PageLRU(page)); | 1206 | VM_BUG_ON(!PageLocked(page) || PageLRU(page)); |
1239 | 1207 | ||
1240 | if (PageAnon(page)) | 1208 | if (unlikely(PageKsm(page))) |
1209 | return try_to_unmap_ksm(page, TTU_MUNLOCK); | ||
1210 | else if (PageAnon(page)) | ||
1241 | return try_to_unmap_anon(page, TTU_MUNLOCK); | 1211 | return try_to_unmap_anon(page, TTU_MUNLOCK); |
1242 | else | 1212 | else |
1243 | return try_to_unmap_file(page, TTU_MUNLOCK); | 1213 | return try_to_unmap_file(page, TTU_MUNLOCK); |
1244 | } | 1214 | } |
1245 | 1215 | ||
1216 | #ifdef CONFIG_MIGRATION | ||
1217 | /* | ||
1218 | * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file(): | ||
1219 | * Called by migrate.c to remove migration ptes, but might be used more later. | ||
1220 | */ | ||
1221 | static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, | ||
1222 | struct vm_area_struct *, unsigned long, void *), void *arg) | ||
1223 | { | ||
1224 | struct anon_vma *anon_vma; | ||
1225 | struct vm_area_struct *vma; | ||
1226 | int ret = SWAP_AGAIN; | ||
1227 | |||
1228 | /* | ||
1229 | * Note: remove_migration_ptes() cannot use page_lock_anon_vma() | ||
1230 | * because that depends on page_mapped(); but not all its usages | ||
1231 | * are holding mmap_sem, which also gave the necessary guarantee | ||
1232 | * (that this anon_vma's slab has not already been destroyed). | ||
1233 | * This needs to be reviewed later: avoiding page_lock_anon_vma() | ||
1234 | * is risky, and currently limits the usefulness of rmap_walk(). | ||
1235 | */ | ||
1236 | anon_vma = page_anon_vma(page); | ||
1237 | if (!anon_vma) | ||
1238 | return ret; | ||
1239 | spin_lock(&anon_vma->lock); | ||
1240 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | ||
1241 | unsigned long address = vma_address(page, vma); | ||
1242 | if (address == -EFAULT) | ||
1243 | continue; | ||
1244 | ret = rmap_one(page, vma, address, arg); | ||
1245 | if (ret != SWAP_AGAIN) | ||
1246 | break; | ||
1247 | } | ||
1248 | spin_unlock(&anon_vma->lock); | ||
1249 | return ret; | ||
1250 | } | ||
1251 | |||
1252 | static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *, | ||
1253 | struct vm_area_struct *, unsigned long, void *), void *arg) | ||
1254 | { | ||
1255 | struct address_space *mapping = page->mapping; | ||
1256 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
1257 | struct vm_area_struct *vma; | ||
1258 | struct prio_tree_iter iter; | ||
1259 | int ret = SWAP_AGAIN; | ||
1260 | |||
1261 | if (!mapping) | ||
1262 | return ret; | ||
1263 | spin_lock(&mapping->i_mmap_lock); | ||
1264 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | ||
1265 | unsigned long address = vma_address(page, vma); | ||
1266 | if (address == -EFAULT) | ||
1267 | continue; | ||
1268 | ret = rmap_one(page, vma, address, arg); | ||
1269 | if (ret != SWAP_AGAIN) | ||
1270 | break; | ||
1271 | } | ||
1272 | /* | ||
1273 | * No nonlinear handling: being always shared, nonlinear vmas | ||
1274 | * never contain migration ptes. Decide what to do about this | ||
1275 | * limitation to linear when we need rmap_walk() on nonlinear. | ||
1276 | */ | ||
1277 | spin_unlock(&mapping->i_mmap_lock); | ||
1278 | return ret; | ||
1279 | } | ||
1280 | |||
1281 | int rmap_walk(struct page *page, int (*rmap_one)(struct page *, | ||
1282 | struct vm_area_struct *, unsigned long, void *), void *arg) | ||
1283 | { | ||
1284 | VM_BUG_ON(!PageLocked(page)); | ||
1285 | |||
1286 | if (unlikely(PageKsm(page))) | ||
1287 | return rmap_walk_ksm(page, rmap_one, arg); | ||
1288 | else if (PageAnon(page)) | ||
1289 | return rmap_walk_anon(page, rmap_one, arg); | ||
1290 | else | ||
1291 | return rmap_walk_file(page, rmap_one, arg); | ||
1292 | } | ||
1293 | #endif /* CONFIG_MIGRATION */ | ||