aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c350
1 files changed, 199 insertions, 151 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index dd43373a483f..98135dbd25ba 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -49,6 +49,7 @@
49#include <linux/swapops.h> 49#include <linux/swapops.h>
50#include <linux/slab.h> 50#include <linux/slab.h>
51#include <linux/init.h> 51#include <linux/init.h>
52#include <linux/ksm.h>
52#include <linux/rmap.h> 53#include <linux/rmap.h>
53#include <linux/rcupdate.h> 54#include <linux/rcupdate.h>
54#include <linux/module.h> 55#include <linux/module.h>
@@ -67,7 +68,7 @@ static inline struct anon_vma *anon_vma_alloc(void)
67 return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL); 68 return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
68} 69}
69 70
70static inline void anon_vma_free(struct anon_vma *anon_vma) 71void anon_vma_free(struct anon_vma *anon_vma)
71{ 72{
72 kmem_cache_free(anon_vma_cachep, anon_vma); 73 kmem_cache_free(anon_vma_cachep, anon_vma);
73} 74}
@@ -171,7 +172,7 @@ void anon_vma_unlink(struct vm_area_struct *vma)
171 list_del(&vma->anon_vma_node); 172 list_del(&vma->anon_vma_node);
172 173
173 /* We must garbage collect the anon_vma if it's empty */ 174 /* We must garbage collect the anon_vma if it's empty */
174 empty = list_empty(&anon_vma->head); 175 empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma);
175 spin_unlock(&anon_vma->lock); 176 spin_unlock(&anon_vma->lock);
176 177
177 if (empty) 178 if (empty)
@@ -183,6 +184,7 @@ static void anon_vma_ctor(void *data)
183 struct anon_vma *anon_vma = data; 184 struct anon_vma *anon_vma = data;
184 185
185 spin_lock_init(&anon_vma->lock); 186 spin_lock_init(&anon_vma->lock);
187 ksm_refcount_init(anon_vma);
186 INIT_LIST_HEAD(&anon_vma->head); 188 INIT_LIST_HEAD(&anon_vma->head);
187} 189}
188 190
@@ -202,8 +204,8 @@ struct anon_vma *page_lock_anon_vma(struct page *page)
202 unsigned long anon_mapping; 204 unsigned long anon_mapping;
203 205
204 rcu_read_lock(); 206 rcu_read_lock();
205 anon_mapping = (unsigned long) page->mapping; 207 anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
206 if (!(anon_mapping & PAGE_MAPPING_ANON)) 208 if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
207 goto out; 209 goto out;
208 if (!page_mapped(page)) 210 if (!page_mapped(page))
209 goto out; 211 goto out;
@@ -248,8 +250,7 @@ vma_address(struct page *page, struct vm_area_struct *vma)
248unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) 250unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
249{ 251{
250 if (PageAnon(page)) { 252 if (PageAnon(page)) {
251 if ((void *)vma->anon_vma != 253 if (vma->anon_vma != page_anon_vma(page))
252 (void *)page->mapping - PAGE_MAPPING_ANON)
253 return -EFAULT; 254 return -EFAULT;
254 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { 255 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
255 if (!vma->vm_file || 256 if (!vma->vm_file ||
@@ -337,21 +338,15 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
337 * Subfunctions of page_referenced: page_referenced_one called 338 * Subfunctions of page_referenced: page_referenced_one called
338 * repeatedly from either page_referenced_anon or page_referenced_file. 339 * repeatedly from either page_referenced_anon or page_referenced_file.
339 */ 340 */
340static int page_referenced_one(struct page *page, 341int page_referenced_one(struct page *page, struct vm_area_struct *vma,
341 struct vm_area_struct *vma, 342 unsigned long address, unsigned int *mapcount,
342 unsigned int *mapcount, 343 unsigned long *vm_flags)
343 unsigned long *vm_flags)
344{ 344{
345 struct mm_struct *mm = vma->vm_mm; 345 struct mm_struct *mm = vma->vm_mm;
346 unsigned long address;
347 pte_t *pte; 346 pte_t *pte;
348 spinlock_t *ptl; 347 spinlock_t *ptl;
349 int referenced = 0; 348 int referenced = 0;
350 349
351 address = vma_address(page, vma);
352 if (address == -EFAULT)
353 goto out;
354
355 pte = page_check_address(page, mm, address, &ptl, 0); 350 pte = page_check_address(page, mm, address, &ptl, 0);
356 if (!pte) 351 if (!pte)
357 goto out; 352 goto out;
@@ -388,9 +383,10 @@ static int page_referenced_one(struct page *page,
388out_unmap: 383out_unmap:
389 (*mapcount)--; 384 (*mapcount)--;
390 pte_unmap_unlock(pte, ptl); 385 pte_unmap_unlock(pte, ptl);
391out: 386
392 if (referenced) 387 if (referenced)
393 *vm_flags |= vma->vm_flags; 388 *vm_flags |= vma->vm_flags;
389out:
394 return referenced; 390 return referenced;
395} 391}
396 392
@@ -409,6 +405,9 @@ static int page_referenced_anon(struct page *page,
409 405
410 mapcount = page_mapcount(page); 406 mapcount = page_mapcount(page);
411 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 407 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
408 unsigned long address = vma_address(page, vma);
409 if (address == -EFAULT)
410 continue;
412 /* 411 /*
413 * If we are reclaiming on behalf of a cgroup, skip 412 * If we are reclaiming on behalf of a cgroup, skip
414 * counting on behalf of references from different 413 * counting on behalf of references from different
@@ -416,7 +415,7 @@ static int page_referenced_anon(struct page *page,
416 */ 415 */
417 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) 416 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
418 continue; 417 continue;
419 referenced += page_referenced_one(page, vma, 418 referenced += page_referenced_one(page, vma, address,
420 &mapcount, vm_flags); 419 &mapcount, vm_flags);
421 if (!mapcount) 420 if (!mapcount)
422 break; 421 break;
@@ -474,6 +473,9 @@ static int page_referenced_file(struct page *page,
474 mapcount = page_mapcount(page); 473 mapcount = page_mapcount(page);
475 474
476 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 475 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
476 unsigned long address = vma_address(page, vma);
477 if (address == -EFAULT)
478 continue;
477 /* 479 /*
478 * If we are reclaiming on behalf of a cgroup, skip 480 * If we are reclaiming on behalf of a cgroup, skip
479 * counting on behalf of references from different 481 * counting on behalf of references from different
@@ -481,7 +483,7 @@ static int page_referenced_file(struct page *page,
481 */ 483 */
482 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) 484 if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
483 continue; 485 continue;
484 referenced += page_referenced_one(page, vma, 486 referenced += page_referenced_one(page, vma, address,
485 &mapcount, vm_flags); 487 &mapcount, vm_flags);
486 if (!mapcount) 488 if (!mapcount)
487 break; 489 break;
@@ -507,46 +509,47 @@ int page_referenced(struct page *page,
507 unsigned long *vm_flags) 509 unsigned long *vm_flags)
508{ 510{
509 int referenced = 0; 511 int referenced = 0;
512 int we_locked = 0;
510 513
511 if (TestClearPageReferenced(page)) 514 if (TestClearPageReferenced(page))
512 referenced++; 515 referenced++;
513 516
514 *vm_flags = 0; 517 *vm_flags = 0;
515 if (page_mapped(page) && page->mapping) { 518 if (page_mapped(page) && page_rmapping(page)) {
516 if (PageAnon(page)) 519 if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
520 we_locked = trylock_page(page);
521 if (!we_locked) {
522 referenced++;
523 goto out;
524 }
525 }
526 if (unlikely(PageKsm(page)))
527 referenced += page_referenced_ksm(page, mem_cont,
528 vm_flags);
529 else if (PageAnon(page))
517 referenced += page_referenced_anon(page, mem_cont, 530 referenced += page_referenced_anon(page, mem_cont,
518 vm_flags); 531 vm_flags);
519 else if (is_locked) 532 else if (page->mapping)
520 referenced += page_referenced_file(page, mem_cont, 533 referenced += page_referenced_file(page, mem_cont,
521 vm_flags); 534 vm_flags);
522 else if (!trylock_page(page)) 535 if (we_locked)
523 referenced++;
524 else {
525 if (page->mapping)
526 referenced += page_referenced_file(page,
527 mem_cont, vm_flags);
528 unlock_page(page); 536 unlock_page(page);
529 }
530 } 537 }
531 538out:
532 if (page_test_and_clear_young(page)) 539 if (page_test_and_clear_young(page))
533 referenced++; 540 referenced++;
534 541
535 return referenced; 542 return referenced;
536} 543}
537 544
538static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) 545static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
546 unsigned long address)
539{ 547{
540 struct mm_struct *mm = vma->vm_mm; 548 struct mm_struct *mm = vma->vm_mm;
541 unsigned long address;
542 pte_t *pte; 549 pte_t *pte;
543 spinlock_t *ptl; 550 spinlock_t *ptl;
544 int ret = 0; 551 int ret = 0;
545 552
546 address = vma_address(page, vma);
547 if (address == -EFAULT)
548 goto out;
549
550 pte = page_check_address(page, mm, address, &ptl, 1); 553 pte = page_check_address(page, mm, address, &ptl, 1);
551 if (!pte) 554 if (!pte)
552 goto out; 555 goto out;
@@ -578,8 +581,12 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page)
578 581
579 spin_lock(&mapping->i_mmap_lock); 582 spin_lock(&mapping->i_mmap_lock);
580 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 583 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
581 if (vma->vm_flags & VM_SHARED) 584 if (vma->vm_flags & VM_SHARED) {
582 ret += page_mkclean_one(page, vma); 585 unsigned long address = vma_address(page, vma);
586 if (address == -EFAULT)
587 continue;
588 ret += page_mkclean_one(page, vma, address);
589 }
583 } 590 }
584 spin_unlock(&mapping->i_mmap_lock); 591 spin_unlock(&mapping->i_mmap_lock);
585 return ret; 592 return ret;
@@ -620,14 +627,7 @@ static void __page_set_anon_rmap(struct page *page,
620 BUG_ON(!anon_vma); 627 BUG_ON(!anon_vma);
621 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; 628 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
622 page->mapping = (struct address_space *) anon_vma; 629 page->mapping = (struct address_space *) anon_vma;
623
624 page->index = linear_page_index(vma, address); 630 page->index = linear_page_index(vma, address);
625
626 /*
627 * nr_mapped state can be updated without turning off
628 * interrupts because it is not modified via interrupt.
629 */
630 __inc_zone_page_state(page, NR_ANON_PAGES);
631} 631}
632 632
633/** 633/**
@@ -665,14 +665,23 @@ static void __page_check_anon_rmap(struct page *page,
665 * @vma: the vm area in which the mapping is added 665 * @vma: the vm area in which the mapping is added
666 * @address: the user virtual address mapped 666 * @address: the user virtual address mapped
667 * 667 *
668 * The caller needs to hold the pte lock and the page must be locked. 668 * The caller needs to hold the pte lock, and the page must be locked in
669 * the anon_vma case: to serialize mapping,index checking after setting,
670 * and to ensure that PageAnon is not being upgraded racily to PageKsm
671 * (but PageKsm is never downgraded to PageAnon).
669 */ 672 */
670void page_add_anon_rmap(struct page *page, 673void page_add_anon_rmap(struct page *page,
671 struct vm_area_struct *vma, unsigned long address) 674 struct vm_area_struct *vma, unsigned long address)
672{ 675{
676 int first = atomic_inc_and_test(&page->_mapcount);
677 if (first)
678 __inc_zone_page_state(page, NR_ANON_PAGES);
679 if (unlikely(PageKsm(page)))
680 return;
681
673 VM_BUG_ON(!PageLocked(page)); 682 VM_BUG_ON(!PageLocked(page));
674 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); 683 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
675 if (atomic_inc_and_test(&page->_mapcount)) 684 if (first)
676 __page_set_anon_rmap(page, vma, address); 685 __page_set_anon_rmap(page, vma, address);
677 else 686 else
678 __page_check_anon_rmap(page, vma, address); 687 __page_check_anon_rmap(page, vma, address);
@@ -694,6 +703,7 @@ void page_add_new_anon_rmap(struct page *page,
694 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); 703 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
695 SetPageSwapBacked(page); 704 SetPageSwapBacked(page);
696 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ 705 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
706 __inc_zone_page_state(page, NR_ANON_PAGES);
697 __page_set_anon_rmap(page, vma, address); 707 __page_set_anon_rmap(page, vma, address);
698 if (page_evictable(page, vma)) 708 if (page_evictable(page, vma))
699 lru_cache_add_lru(page, LRU_ACTIVE_ANON); 709 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
@@ -760,20 +770,15 @@ void page_remove_rmap(struct page *page)
760 * Subfunctions of try_to_unmap: try_to_unmap_one called 770 * Subfunctions of try_to_unmap: try_to_unmap_one called
761 * repeatedly from either try_to_unmap_anon or try_to_unmap_file. 771 * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
762 */ 772 */
763static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, 773int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
764 enum ttu_flags flags) 774 unsigned long address, enum ttu_flags flags)
765{ 775{
766 struct mm_struct *mm = vma->vm_mm; 776 struct mm_struct *mm = vma->vm_mm;
767 unsigned long address;
768 pte_t *pte; 777 pte_t *pte;
769 pte_t pteval; 778 pte_t pteval;
770 spinlock_t *ptl; 779 spinlock_t *ptl;
771 int ret = SWAP_AGAIN; 780 int ret = SWAP_AGAIN;
772 781
773 address = vma_address(page, vma);
774 if (address == -EFAULT)
775 goto out;
776
777 pte = page_check_address(page, mm, address, &ptl, 0); 782 pte = page_check_address(page, mm, address, &ptl, 0);
778 if (!pte) 783 if (!pte)
779 goto out; 784 goto out;
@@ -784,10 +789,11 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
784 * skipped over this mm) then we should reactivate it. 789 * skipped over this mm) then we should reactivate it.
785 */ 790 */
786 if (!(flags & TTU_IGNORE_MLOCK)) { 791 if (!(flags & TTU_IGNORE_MLOCK)) {
787 if (vma->vm_flags & VM_LOCKED) { 792 if (vma->vm_flags & VM_LOCKED)
788 ret = SWAP_MLOCK; 793 goto out_mlock;
794
795 if (TTU_ACTION(flags) == TTU_MUNLOCK)
789 goto out_unmap; 796 goto out_unmap;
790 }
791 } 797 }
792 if (!(flags & TTU_IGNORE_ACCESS)) { 798 if (!(flags & TTU_IGNORE_ACCESS)) {
793 if (ptep_clear_flush_young_notify(vma, address, pte)) { 799 if (ptep_clear_flush_young_notify(vma, address, pte)) {
@@ -822,7 +828,11 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
822 * Store the swap location in the pte. 828 * Store the swap location in the pte.
823 * See handle_pte_fault() ... 829 * See handle_pte_fault() ...
824 */ 830 */
825 swap_duplicate(entry); 831 if (swap_duplicate(entry) < 0) {
832 set_pte_at(mm, address, pte, pteval);
833 ret = SWAP_FAIL;
834 goto out_unmap;
835 }
826 if (list_empty(&mm->mmlist)) { 836 if (list_empty(&mm->mmlist)) {
827 spin_lock(&mmlist_lock); 837 spin_lock(&mmlist_lock);
828 if (list_empty(&mm->mmlist)) 838 if (list_empty(&mm->mmlist))
@@ -849,7 +859,6 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
849 } else 859 } else
850 dec_mm_counter(mm, file_rss); 860 dec_mm_counter(mm, file_rss);
851 861
852
853 page_remove_rmap(page); 862 page_remove_rmap(page);
854 page_cache_release(page); 863 page_cache_release(page);
855 864
@@ -857,6 +866,27 @@ out_unmap:
857 pte_unmap_unlock(pte, ptl); 866 pte_unmap_unlock(pte, ptl);
858out: 867out:
859 return ret; 868 return ret;
869
870out_mlock:
871 pte_unmap_unlock(pte, ptl);
872
873
874 /*
875 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
876 * unstable result and race. Plus, We can't wait here because
877 * we now hold anon_vma->lock or mapping->i_mmap_lock.
878 * if trylock failed, the page remain in evictable lru and later
879 * vmscan could retry to move the page to unevictable lru if the
880 * page is actually mlocked.
881 */
882 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
883 if (vma->vm_flags & VM_LOCKED) {
884 mlock_vma_page(page);
885 ret = SWAP_MLOCK;
886 }
887 up_read(&vma->vm_mm->mmap_sem);
888 }
889 return ret;
860} 890}
861 891
862/* 892/*
@@ -922,11 +952,10 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
922 return ret; 952 return ret;
923 953
924 /* 954 /*
925 * MLOCK_PAGES => feature is configured. 955 * If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
926 * if we can acquire the mmap_sem for read, and vma is VM_LOCKED,
927 * keep the sem while scanning the cluster for mlocking pages. 956 * keep the sem while scanning the cluster for mlocking pages.
928 */ 957 */
929 if (MLOCK_PAGES && down_read_trylock(&vma->vm_mm->mmap_sem)) { 958 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
930 locked_vma = (vma->vm_flags & VM_LOCKED); 959 locked_vma = (vma->vm_flags & VM_LOCKED);
931 if (!locked_vma) 960 if (!locked_vma)
932 up_read(&vma->vm_mm->mmap_sem); /* don't need it */ 961 up_read(&vma->vm_mm->mmap_sem); /* don't need it */
@@ -976,29 +1005,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
976 return ret; 1005 return ret;
977} 1006}
978 1007
979/*
980 * common handling for pages mapped in VM_LOCKED vmas
981 */
982static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
983{
984 int mlocked = 0;
985
986 if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
987 if (vma->vm_flags & VM_LOCKED) {
988 mlock_vma_page(page);
989 mlocked++; /* really mlocked the page */
990 }
991 up_read(&vma->vm_mm->mmap_sem);
992 }
993 return mlocked;
994}
995
996/** 1008/**
997 * try_to_unmap_anon - unmap or unlock anonymous page using the object-based 1009 * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
998 * rmap method 1010 * rmap method
999 * @page: the page to unmap/unlock 1011 * @page: the page to unmap/unlock
1000 * @unlock: request for unlock rather than unmap [unlikely] 1012 * @flags: action and flags
1001 * @migration: unmapping for migration - ignored if @unlock
1002 * 1013 *
1003 * Find all the mappings of a page using the mapping pointer and the vma chains 1014 * Find all the mappings of a page using the mapping pointer and the vma chains
1004 * contained in the anon_vma struct it points to. 1015 * contained in the anon_vma struct it points to.
@@ -1014,42 +1025,22 @@ static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1014{ 1025{
1015 struct anon_vma *anon_vma; 1026 struct anon_vma *anon_vma;
1016 struct vm_area_struct *vma; 1027 struct vm_area_struct *vma;
1017 unsigned int mlocked = 0;
1018 int ret = SWAP_AGAIN; 1028 int ret = SWAP_AGAIN;
1019 int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
1020
1021 if (MLOCK_PAGES && unlikely(unlock))
1022 ret = SWAP_SUCCESS; /* default for try_to_munlock() */
1023 1029
1024 anon_vma = page_lock_anon_vma(page); 1030 anon_vma = page_lock_anon_vma(page);
1025 if (!anon_vma) 1031 if (!anon_vma)
1026 return ret; 1032 return ret;
1027 1033
1028 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 1034 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
1029 if (MLOCK_PAGES && unlikely(unlock)) { 1035 unsigned long address = vma_address(page, vma);
1030 if (!((vma->vm_flags & VM_LOCKED) && 1036 if (address == -EFAULT)
1031 page_mapped_in_vma(page, vma))) 1037 continue;
1032 continue; /* must visit all unlocked vmas */ 1038 ret = try_to_unmap_one(page, vma, address, flags);
1033 ret = SWAP_MLOCK; /* saw at least one mlocked vma */ 1039 if (ret != SWAP_AGAIN || !page_mapped(page))
1034 } else { 1040 break;
1035 ret = try_to_unmap_one(page, vma, flags);
1036 if (ret == SWAP_FAIL || !page_mapped(page))
1037 break;
1038 }
1039 if (ret == SWAP_MLOCK) {
1040 mlocked = try_to_mlock_page(page, vma);
1041 if (mlocked)
1042 break; /* stop if actually mlocked page */
1043 }
1044 } 1041 }
1045 1042
1046 page_unlock_anon_vma(anon_vma); 1043 page_unlock_anon_vma(anon_vma);
1047
1048 if (mlocked)
1049 ret = SWAP_MLOCK; /* actually mlocked the page */
1050 else if (ret == SWAP_MLOCK)
1051 ret = SWAP_AGAIN; /* saw VM_LOCKED vma */
1052
1053 return ret; 1044 return ret;
1054} 1045}
1055 1046
@@ -1079,48 +1070,30 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1079 unsigned long max_nl_cursor = 0; 1070 unsigned long max_nl_cursor = 0;
1080 unsigned long max_nl_size = 0; 1071 unsigned long max_nl_size = 0;
1081 unsigned int mapcount; 1072 unsigned int mapcount;
1082 unsigned int mlocked = 0;
1083 int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
1084
1085 if (MLOCK_PAGES && unlikely(unlock))
1086 ret = SWAP_SUCCESS; /* default for try_to_munlock() */
1087 1073
1088 spin_lock(&mapping->i_mmap_lock); 1074 spin_lock(&mapping->i_mmap_lock);
1089 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 1075 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1090 if (MLOCK_PAGES && unlikely(unlock)) { 1076 unsigned long address = vma_address(page, vma);
1091 if (!((vma->vm_flags & VM_LOCKED) && 1077 if (address == -EFAULT)
1092 page_mapped_in_vma(page, vma))) 1078 continue;
1093 continue; /* must visit all vmas */ 1079 ret = try_to_unmap_one(page, vma, address, flags);
1094 ret = SWAP_MLOCK; 1080 if (ret != SWAP_AGAIN || !page_mapped(page))
1095 } else { 1081 goto out;
1096 ret = try_to_unmap_one(page, vma, flags);
1097 if (ret == SWAP_FAIL || !page_mapped(page))
1098 goto out;
1099 }
1100 if (ret == SWAP_MLOCK) {
1101 mlocked = try_to_mlock_page(page, vma);
1102 if (mlocked)
1103 break; /* stop if actually mlocked page */
1104 }
1105 } 1082 }
1106 1083
1107 if (mlocked) 1084 if (list_empty(&mapping->i_mmap_nonlinear))
1108 goto out; 1085 goto out;
1109 1086
1110 if (list_empty(&mapping->i_mmap_nonlinear)) 1087 /*
1088 * We don't bother to try to find the munlocked page in nonlinears.
1089 * It's costly. Instead, later, page reclaim logic may call
1090 * try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily.
1091 */
1092 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1111 goto out; 1093 goto out;
1112 1094
1113 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1095 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1114 shared.vm_set.list) { 1096 shared.vm_set.list) {
1115 if (MLOCK_PAGES && unlikely(unlock)) {
1116 if (!(vma->vm_flags & VM_LOCKED))
1117 continue; /* must visit all vmas */
1118 ret = SWAP_MLOCK; /* leave mlocked == 0 */
1119 goto out; /* no need to look further */
1120 }
1121 if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
1122 (vma->vm_flags & VM_LOCKED))
1123 continue;
1124 cursor = (unsigned long) vma->vm_private_data; 1097 cursor = (unsigned long) vma->vm_private_data;
1125 if (cursor > max_nl_cursor) 1098 if (cursor > max_nl_cursor)
1126 max_nl_cursor = cursor; 1099 max_nl_cursor = cursor;
@@ -1153,16 +1126,12 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1153 do { 1126 do {
1154 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1127 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1155 shared.vm_set.list) { 1128 shared.vm_set.list) {
1156 if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
1157 (vma->vm_flags & VM_LOCKED))
1158 continue;
1159 cursor = (unsigned long) vma->vm_private_data; 1129 cursor = (unsigned long) vma->vm_private_data;
1160 while ( cursor < max_nl_cursor && 1130 while ( cursor < max_nl_cursor &&
1161 cursor < vma->vm_end - vma->vm_start) { 1131 cursor < vma->vm_end - vma->vm_start) {
1162 ret = try_to_unmap_cluster(cursor, &mapcount, 1132 if (try_to_unmap_cluster(cursor, &mapcount,
1163 vma, page); 1133 vma, page) == SWAP_MLOCK)
1164 if (ret == SWAP_MLOCK) 1134 ret = SWAP_MLOCK;
1165 mlocked = 2; /* to return below */
1166 cursor += CLUSTER_SIZE; 1135 cursor += CLUSTER_SIZE;
1167 vma->vm_private_data = (void *) cursor; 1136 vma->vm_private_data = (void *) cursor;
1168 if ((int)mapcount <= 0) 1137 if ((int)mapcount <= 0)
@@ -1183,10 +1152,6 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1183 vma->vm_private_data = NULL; 1152 vma->vm_private_data = NULL;
1184out: 1153out:
1185 spin_unlock(&mapping->i_mmap_lock); 1154 spin_unlock(&mapping->i_mmap_lock);
1186 if (mlocked)
1187 ret = SWAP_MLOCK; /* actually mlocked the page */
1188 else if (ret == SWAP_MLOCK)
1189 ret = SWAP_AGAIN; /* saw VM_LOCKED vma */
1190 return ret; 1155 return ret;
1191} 1156}
1192 1157
@@ -1210,7 +1175,9 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1210 1175
1211 BUG_ON(!PageLocked(page)); 1176 BUG_ON(!PageLocked(page));
1212 1177
1213 if (PageAnon(page)) 1178 if (unlikely(PageKsm(page)))
1179 ret = try_to_unmap_ksm(page, flags);
1180 else if (PageAnon(page))
1214 ret = try_to_unmap_anon(page, flags); 1181 ret = try_to_unmap_anon(page, flags);
1215 else 1182 else
1216 ret = try_to_unmap_file(page, flags); 1183 ret = try_to_unmap_file(page, flags);
@@ -1229,17 +1196,98 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1229 * 1196 *
1230 * Return values are: 1197 * Return values are:
1231 * 1198 *
1232 * SWAP_SUCCESS - no vma's holding page mlocked. 1199 * SWAP_AGAIN - no vma is holding page mlocked, or,
1233 * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem 1200 * SWAP_AGAIN - page mapped in mlocked vma -- couldn't acquire mmap sem
1201 * SWAP_FAIL - page cannot be located at present
1234 * SWAP_MLOCK - page is now mlocked. 1202 * SWAP_MLOCK - page is now mlocked.
1235 */ 1203 */
1236int try_to_munlock(struct page *page) 1204int try_to_munlock(struct page *page)
1237{ 1205{
1238 VM_BUG_ON(!PageLocked(page) || PageLRU(page)); 1206 VM_BUG_ON(!PageLocked(page) || PageLRU(page));
1239 1207
1240 if (PageAnon(page)) 1208 if (unlikely(PageKsm(page)))
1209 return try_to_unmap_ksm(page, TTU_MUNLOCK);
1210 else if (PageAnon(page))
1241 return try_to_unmap_anon(page, TTU_MUNLOCK); 1211 return try_to_unmap_anon(page, TTU_MUNLOCK);
1242 else 1212 else
1243 return try_to_unmap_file(page, TTU_MUNLOCK); 1213 return try_to_unmap_file(page, TTU_MUNLOCK);
1244} 1214}
1245 1215
1216#ifdef CONFIG_MIGRATION
1217/*
1218 * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
1219 * Called by migrate.c to remove migration ptes, but might be used more later.
1220 */
1221static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1222 struct vm_area_struct *, unsigned long, void *), void *arg)
1223{
1224 struct anon_vma *anon_vma;
1225 struct vm_area_struct *vma;
1226 int ret = SWAP_AGAIN;
1227
1228 /*
1229 * Note: remove_migration_ptes() cannot use page_lock_anon_vma()
1230 * because that depends on page_mapped(); but not all its usages
1231 * are holding mmap_sem, which also gave the necessary guarantee
1232 * (that this anon_vma's slab has not already been destroyed).
1233 * This needs to be reviewed later: avoiding page_lock_anon_vma()
1234 * is risky, and currently limits the usefulness of rmap_walk().
1235 */
1236 anon_vma = page_anon_vma(page);
1237 if (!anon_vma)
1238 return ret;
1239 spin_lock(&anon_vma->lock);
1240 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
1241 unsigned long address = vma_address(page, vma);
1242 if (address == -EFAULT)
1243 continue;
1244 ret = rmap_one(page, vma, address, arg);
1245 if (ret != SWAP_AGAIN)
1246 break;
1247 }
1248 spin_unlock(&anon_vma->lock);
1249 return ret;
1250}
1251
1252static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
1253 struct vm_area_struct *, unsigned long, void *), void *arg)
1254{
1255 struct address_space *mapping = page->mapping;
1256 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1257 struct vm_area_struct *vma;
1258 struct prio_tree_iter iter;
1259 int ret = SWAP_AGAIN;
1260
1261 if (!mapping)
1262 return ret;
1263 spin_lock(&mapping->i_mmap_lock);
1264 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
1265 unsigned long address = vma_address(page, vma);
1266 if (address == -EFAULT)
1267 continue;
1268 ret = rmap_one(page, vma, address, arg);
1269 if (ret != SWAP_AGAIN)
1270 break;
1271 }
1272 /*
1273 * No nonlinear handling: being always shared, nonlinear vmas
1274 * never contain migration ptes. Decide what to do about this
1275 * limitation to linear when we need rmap_walk() on nonlinear.
1276 */
1277 spin_unlock(&mapping->i_mmap_lock);
1278 return ret;
1279}
1280
1281int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
1282 struct vm_area_struct *, unsigned long, void *), void *arg)
1283{
1284 VM_BUG_ON(!PageLocked(page));
1285
1286 if (unlikely(PageKsm(page)))
1287 return rmap_walk_ksm(page, rmap_one, arg);
1288 else if (PageAnon(page))
1289 return rmap_walk_anon(page, rmap_one, arg);
1290 else
1291 return rmap_walk_file(page, rmap_one, arg);
1292}
1293#endif /* CONFIG_MIGRATION */