aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c589
1 files changed, 261 insertions, 328 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 068522d8502a..8fc049f9a5a6 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -660,17 +660,22 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
660 return 1; 660 return 1;
661} 661}
662 662
663struct page_referenced_arg {
664 int mapcount;
665 int referenced;
666 unsigned long vm_flags;
667 struct mem_cgroup *memcg;
668};
663/* 669/*
664 * Subfunctions of page_referenced: page_referenced_one called 670 * arg: page_referenced_arg will be passed
665 * repeatedly from either page_referenced_anon or page_referenced_file.
666 */ 671 */
667int page_referenced_one(struct page *page, struct vm_area_struct *vma, 672int page_referenced_one(struct page *page, struct vm_area_struct *vma,
668 unsigned long address, unsigned int *mapcount, 673 unsigned long address, void *arg)
669 unsigned long *vm_flags)
670{ 674{
671 struct mm_struct *mm = vma->vm_mm; 675 struct mm_struct *mm = vma->vm_mm;
672 spinlock_t *ptl; 676 spinlock_t *ptl;
673 int referenced = 0; 677 int referenced = 0;
678 struct page_referenced_arg *pra = arg;
674 679
675 if (unlikely(PageTransHuge(page))) { 680 if (unlikely(PageTransHuge(page))) {
676 pmd_t *pmd; 681 pmd_t *pmd;
@@ -682,13 +687,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
682 pmd = page_check_address_pmd(page, mm, address, 687 pmd = page_check_address_pmd(page, mm, address,
683 PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); 688 PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
684 if (!pmd) 689 if (!pmd)
685 goto out; 690 return SWAP_AGAIN;
686 691
687 if (vma->vm_flags & VM_LOCKED) { 692 if (vma->vm_flags & VM_LOCKED) {
688 spin_unlock(ptl); 693 spin_unlock(ptl);
689 *mapcount = 0; /* break early from loop */ 694 pra->vm_flags |= VM_LOCKED;
690 *vm_flags |= VM_LOCKED; 695 return SWAP_FAIL; /* To break the loop */
691 goto out;
692 } 696 }
693 697
694 /* go ahead even if the pmd is pmd_trans_splitting() */ 698 /* go ahead even if the pmd is pmd_trans_splitting() */
@@ -704,13 +708,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
704 */ 708 */
705 pte = page_check_address(page, mm, address, &ptl, 0); 709 pte = page_check_address(page, mm, address, &ptl, 0);
706 if (!pte) 710 if (!pte)
707 goto out; 711 return SWAP_AGAIN;
708 712
709 if (vma->vm_flags & VM_LOCKED) { 713 if (vma->vm_flags & VM_LOCKED) {
710 pte_unmap_unlock(pte, ptl); 714 pte_unmap_unlock(pte, ptl);
711 *mapcount = 0; /* break early from loop */ 715 pra->vm_flags |= VM_LOCKED;
712 *vm_flags |= VM_LOCKED; 716 return SWAP_FAIL; /* To break the loop */
713 goto out;
714 } 717 }
715 718
716 if (ptep_clear_flush_young_notify(vma, address, pte)) { 719 if (ptep_clear_flush_young_notify(vma, address, pte)) {
@@ -727,113 +730,27 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
727 pte_unmap_unlock(pte, ptl); 730 pte_unmap_unlock(pte, ptl);
728 } 731 }
729 732
730 (*mapcount)--; 733 if (referenced) {
731 734 pra->referenced++;
732 if (referenced) 735 pra->vm_flags |= vma->vm_flags;
733 *vm_flags |= vma->vm_flags;
734out:
735 return referenced;
736}
737
738static int page_referenced_anon(struct page *page,
739 struct mem_cgroup *memcg,
740 unsigned long *vm_flags)
741{
742 unsigned int mapcount;
743 struct anon_vma *anon_vma;
744 pgoff_t pgoff;
745 struct anon_vma_chain *avc;
746 int referenced = 0;
747
748 anon_vma = page_lock_anon_vma_read(page);
749 if (!anon_vma)
750 return referenced;
751
752 mapcount = page_mapcount(page);
753 pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
754 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
755 struct vm_area_struct *vma = avc->vma;
756 unsigned long address = vma_address(page, vma);
757 /*
758 * If we are reclaiming on behalf of a cgroup, skip
759 * counting on behalf of references from different
760 * cgroups
761 */
762 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
763 continue;
764 referenced += page_referenced_one(page, vma, address,
765 &mapcount, vm_flags);
766 if (!mapcount)
767 break;
768 } 736 }
769 737
770 page_unlock_anon_vma_read(anon_vma); 738 pra->mapcount--;
771 return referenced; 739 if (!pra->mapcount)
740 return SWAP_SUCCESS; /* To break the loop */
741
742 return SWAP_AGAIN;
772} 743}
773 744
774/** 745static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
775 * page_referenced_file - referenced check for object-based rmap
776 * @page: the page we're checking references on.
777 * @memcg: target memory control group
778 * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
779 *
780 * For an object-based mapped page, find all the places it is mapped and
781 * check/clear the referenced flag. This is done by following the page->mapping
782 * pointer, then walking the chain of vmas it holds. It returns the number
783 * of references it found.
784 *
785 * This function is only called from page_referenced for object-based pages.
786 */
787static int page_referenced_file(struct page *page,
788 struct mem_cgroup *memcg,
789 unsigned long *vm_flags)
790{ 746{
791 unsigned int mapcount; 747 struct page_referenced_arg *pra = arg;
792 struct address_space *mapping = page->mapping; 748 struct mem_cgroup *memcg = pra->memcg;
793 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
794 struct vm_area_struct *vma;
795 int referenced = 0;
796
797 /*
798 * The caller's checks on page->mapping and !PageAnon have made
799 * sure that this is a file page: the check for page->mapping
800 * excludes the case just before it gets set on an anon page.
801 */
802 BUG_ON(PageAnon(page));
803
804 /*
805 * The page lock not only makes sure that page->mapping cannot
806 * suddenly be NULLified by truncation, it makes sure that the
807 * structure at mapping cannot be freed and reused yet,
808 * so we can safely take mapping->i_mmap_mutex.
809 */
810 BUG_ON(!PageLocked(page));
811
812 mutex_lock(&mapping->i_mmap_mutex);
813 749
814 /* 750 if (!mm_match_cgroup(vma->vm_mm, memcg))
815 * i_mmap_mutex does not stabilize mapcount at all, but mapcount 751 return true;
816 * is more likely to be accurate if we note it after spinning.
817 */
818 mapcount = page_mapcount(page);
819
820 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
821 unsigned long address = vma_address(page, vma);
822 /*
823 * If we are reclaiming on behalf of a cgroup, skip
824 * counting on behalf of references from different
825 * cgroups
826 */
827 if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
828 continue;
829 referenced += page_referenced_one(page, vma, address,
830 &mapcount, vm_flags);
831 if (!mapcount)
832 break;
833 }
834 752
835 mutex_unlock(&mapping->i_mmap_mutex); 753 return false;
836 return referenced;
837} 754}
838 755
839/** 756/**
@@ -851,41 +768,57 @@ int page_referenced(struct page *page,
851 struct mem_cgroup *memcg, 768 struct mem_cgroup *memcg,
852 unsigned long *vm_flags) 769 unsigned long *vm_flags)
853{ 770{
854 int referenced = 0; 771 int ret;
855 int we_locked = 0; 772 int we_locked = 0;
773 struct page_referenced_arg pra = {
774 .mapcount = page_mapcount(page),
775 .memcg = memcg,
776 };
777 struct rmap_walk_control rwc = {
778 .rmap_one = page_referenced_one,
779 .arg = (void *)&pra,
780 .anon_lock = page_lock_anon_vma_read,
781 };
856 782
857 *vm_flags = 0; 783 *vm_flags = 0;
858 if (page_mapped(page) && page_rmapping(page)) { 784 if (!page_mapped(page))
859 if (!is_locked && (!PageAnon(page) || PageKsm(page))) { 785 return 0;
860 we_locked = trylock_page(page); 786
861 if (!we_locked) { 787 if (!page_rmapping(page))
862 referenced++; 788 return 0;
863 goto out; 789
864 } 790 if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
865 } 791 we_locked = trylock_page(page);
866 if (unlikely(PageKsm(page))) 792 if (!we_locked)
867 referenced += page_referenced_ksm(page, memcg, 793 return 1;
868 vm_flags);
869 else if (PageAnon(page))
870 referenced += page_referenced_anon(page, memcg,
871 vm_flags);
872 else if (page->mapping)
873 referenced += page_referenced_file(page, memcg,
874 vm_flags);
875 if (we_locked)
876 unlock_page(page);
877 } 794 }
878out: 795
879 return referenced; 796 /*
797 * If we are reclaiming on behalf of a cgroup, skip
798 * counting on behalf of references from different
799 * cgroups
800 */
801 if (memcg) {
802 rwc.invalid_vma = invalid_page_referenced_vma;
803 }
804
805 ret = rmap_walk(page, &rwc);
806 *vm_flags = pra.vm_flags;
807
808 if (we_locked)
809 unlock_page(page);
810
811 return pra.referenced;
880} 812}
881 813
882static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, 814static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
883 unsigned long address) 815 unsigned long address, void *arg)
884{ 816{
885 struct mm_struct *mm = vma->vm_mm; 817 struct mm_struct *mm = vma->vm_mm;
886 pte_t *pte; 818 pte_t *pte;
887 spinlock_t *ptl; 819 spinlock_t *ptl;
888 int ret = 0; 820 int ret = 0;
821 int *cleaned = arg;
889 822
890 pte = page_check_address(page, mm, address, &ptl, 1); 823 pte = page_check_address(page, mm, address, &ptl, 1);
891 if (!pte) 824 if (!pte)
@@ -904,44 +837,44 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
904 837
905 pte_unmap_unlock(pte, ptl); 838 pte_unmap_unlock(pte, ptl);
906 839
907 if (ret) 840 if (ret) {
908 mmu_notifier_invalidate_page(mm, address); 841 mmu_notifier_invalidate_page(mm, address);
842 (*cleaned)++;
843 }
909out: 844out:
910 return ret; 845 return SWAP_AGAIN;
911} 846}
912 847
913static int page_mkclean_file(struct address_space *mapping, struct page *page) 848static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
914{ 849{
915 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 850 if (vma->vm_flags & VM_SHARED)
916 struct vm_area_struct *vma; 851 return false;
917 int ret = 0;
918
919 BUG_ON(PageAnon(page));
920 852
921 mutex_lock(&mapping->i_mmap_mutex); 853 return true;
922 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
923 if (vma->vm_flags & VM_SHARED) {
924 unsigned long address = vma_address(page, vma);
925 ret += page_mkclean_one(page, vma, address);
926 }
927 }
928 mutex_unlock(&mapping->i_mmap_mutex);
929 return ret;
930} 854}
931 855
932int page_mkclean(struct page *page) 856int page_mkclean(struct page *page)
933{ 857{
934 int ret = 0; 858 int cleaned = 0;
859 struct address_space *mapping;
860 struct rmap_walk_control rwc = {
861 .arg = (void *)&cleaned,
862 .rmap_one = page_mkclean_one,
863 .invalid_vma = invalid_mkclean_vma,
864 };
935 865
936 BUG_ON(!PageLocked(page)); 866 BUG_ON(!PageLocked(page));
937 867
938 if (page_mapped(page)) { 868 if (!page_mapped(page))
939 struct address_space *mapping = page_mapping(page); 869 return 0;
940 if (mapping)
941 ret = page_mkclean_file(mapping, page);
942 }
943 870
944 return ret; 871 mapping = page_mapping(page);
872 if (!mapping)
873 return 0;
874
875 rmap_walk(page, &rwc);
876
877 return cleaned;
945} 878}
946EXPORT_SYMBOL_GPL(page_mkclean); 879EXPORT_SYMBOL_GPL(page_mkclean);
947 880
@@ -961,9 +894,9 @@ void page_move_anon_rmap(struct page *page,
961{ 894{
962 struct anon_vma *anon_vma = vma->anon_vma; 895 struct anon_vma *anon_vma = vma->anon_vma;
963 896
964 VM_BUG_ON(!PageLocked(page)); 897 VM_BUG_ON_PAGE(!PageLocked(page), page);
965 VM_BUG_ON(!anon_vma); 898 VM_BUG_ON(!anon_vma);
966 VM_BUG_ON(page->index != linear_page_index(vma, address)); 899 VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page);
967 900
968 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; 901 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
969 page->mapping = (struct address_space *) anon_vma; 902 page->mapping = (struct address_space *) anon_vma;
@@ -1062,7 +995,7 @@ void do_page_add_anon_rmap(struct page *page,
1062 if (unlikely(PageKsm(page))) 995 if (unlikely(PageKsm(page)))
1063 return; 996 return;
1064 997
1065 VM_BUG_ON(!PageLocked(page)); 998 VM_BUG_ON_PAGE(!PageLocked(page), page);
1066 /* address might be in next vma when migration races vma_adjust */ 999 /* address might be in next vma when migration races vma_adjust */
1067 if (first) 1000 if (first)
1068 __page_set_anon_rmap(page, vma, address, exclusive); 1001 __page_set_anon_rmap(page, vma, address, exclusive);
@@ -1177,17 +1110,17 @@ out:
1177} 1110}
1178 1111
1179/* 1112/*
1180 * Subfunctions of try_to_unmap: try_to_unmap_one called 1113 * @arg: enum ttu_flags will be passed to this argument
1181 * repeatedly from try_to_unmap_ksm, try_to_unmap_anon or try_to_unmap_file.
1182 */ 1114 */
1183int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, 1115int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
1184 unsigned long address, enum ttu_flags flags) 1116 unsigned long address, void *arg)
1185{ 1117{
1186 struct mm_struct *mm = vma->vm_mm; 1118 struct mm_struct *mm = vma->vm_mm;
1187 pte_t *pte; 1119 pte_t *pte;
1188 pte_t pteval; 1120 pte_t pteval;
1189 spinlock_t *ptl; 1121 spinlock_t *ptl;
1190 int ret = SWAP_AGAIN; 1122 int ret = SWAP_AGAIN;
1123 enum ttu_flags flags = (enum ttu_flags)arg;
1191 1124
1192 pte = page_check_address(page, mm, address, &ptl, 0); 1125 pte = page_check_address(page, mm, address, &ptl, 0);
1193 if (!pte) 1126 if (!pte)
@@ -1426,93 +1359,9 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1426 return ret; 1359 return ret;
1427} 1360}
1428 1361
1429bool is_vma_temporary_stack(struct vm_area_struct *vma) 1362static int try_to_unmap_nonlinear(struct page *page,
1430{ 1363 struct address_space *mapping, void *arg)
1431 int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
1432
1433 if (!maybe_stack)
1434 return false;
1435
1436 if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
1437 VM_STACK_INCOMPLETE_SETUP)
1438 return true;
1439
1440 return false;
1441}
1442
1443/**
1444 * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
1445 * rmap method
1446 * @page: the page to unmap/unlock
1447 * @flags: action and flags
1448 *
1449 * Find all the mappings of a page using the mapping pointer and the vma chains
1450 * contained in the anon_vma struct it points to.
1451 *
1452 * This function is only called from try_to_unmap/try_to_munlock for
1453 * anonymous pages.
1454 * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
1455 * where the page was found will be held for write. So, we won't recheck
1456 * vm_flags for that VMA. That should be OK, because that vma shouldn't be
1457 * 'LOCKED.
1458 */
1459static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1460{
1461 struct anon_vma *anon_vma;
1462 pgoff_t pgoff;
1463 struct anon_vma_chain *avc;
1464 int ret = SWAP_AGAIN;
1465
1466 anon_vma = page_lock_anon_vma_read(page);
1467 if (!anon_vma)
1468 return ret;
1469
1470 pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1471 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
1472 struct vm_area_struct *vma = avc->vma;
1473 unsigned long address;
1474
1475 /*
1476 * During exec, a temporary VMA is setup and later moved.
1477 * The VMA is moved under the anon_vma lock but not the
1478 * page tables leading to a race where migration cannot
1479 * find the migration ptes. Rather than increasing the
1480 * locking requirements of exec(), migration skips
1481 * temporary VMAs until after exec() completes.
1482 */
1483 if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
1484 is_vma_temporary_stack(vma))
1485 continue;
1486
1487 address = vma_address(page, vma);
1488 ret = try_to_unmap_one(page, vma, address, flags);
1489 if (ret != SWAP_AGAIN || !page_mapped(page))
1490 break;
1491 }
1492
1493 page_unlock_anon_vma_read(anon_vma);
1494 return ret;
1495}
1496
1497/**
1498 * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
1499 * @page: the page to unmap/unlock
1500 * @flags: action and flags
1501 *
1502 * Find all the mappings of a page using the mapping pointer and the vma chains
1503 * contained in the address_space struct it points to.
1504 *
1505 * This function is only called from try_to_unmap/try_to_munlock for
1506 * object-based pages.
1507 * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
1508 * where the page was found will be held for write. So, we won't recheck
1509 * vm_flags for that VMA. That should be OK, because that vma shouldn't be
1510 * 'LOCKED.
1511 */
1512static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1513{ 1364{
1514 struct address_space *mapping = page->mapping;
1515 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1516 struct vm_area_struct *vma; 1365 struct vm_area_struct *vma;
1517 int ret = SWAP_AGAIN; 1366 int ret = SWAP_AGAIN;
1518 unsigned long cursor; 1367 unsigned long cursor;
@@ -1520,30 +1369,9 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1520 unsigned long max_nl_size = 0; 1369 unsigned long max_nl_size = 0;
1521 unsigned int mapcount; 1370 unsigned int mapcount;
1522 1371
1523 if (PageHuge(page)) 1372 list_for_each_entry(vma,
1524 pgoff = page->index << compound_order(page); 1373 &mapping->i_mmap_nonlinear, shared.nonlinear) {
1525 1374
1526 mutex_lock(&mapping->i_mmap_mutex);
1527 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
1528 unsigned long address = vma_address(page, vma);
1529 ret = try_to_unmap_one(page, vma, address, flags);
1530 if (ret != SWAP_AGAIN || !page_mapped(page))
1531 goto out;
1532 }
1533
1534 if (list_empty(&mapping->i_mmap_nonlinear))
1535 goto out;
1536
1537 /*
1538 * We don't bother to try to find the munlocked page in nonlinears.
1539 * It's costly. Instead, later, page reclaim logic may call
1540 * try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily.
1541 */
1542 if (TTU_ACTION(flags) == TTU_MUNLOCK)
1543 goto out;
1544
1545 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
1546 shared.nonlinear) {
1547 cursor = (unsigned long) vma->vm_private_data; 1375 cursor = (unsigned long) vma->vm_private_data;
1548 if (cursor > max_nl_cursor) 1376 if (cursor > max_nl_cursor)
1549 max_nl_cursor = cursor; 1377 max_nl_cursor = cursor;
@@ -1553,8 +1381,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1553 } 1381 }
1554 1382
1555 if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */ 1383 if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
1556 ret = SWAP_FAIL; 1384 return SWAP_FAIL;
1557 goto out;
1558 } 1385 }
1559 1386
1560 /* 1387 /*
@@ -1566,7 +1393,8 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1566 */ 1393 */
1567 mapcount = page_mapcount(page); 1394 mapcount = page_mapcount(page);
1568 if (!mapcount) 1395 if (!mapcount)
1569 goto out; 1396 return ret;
1397
1570 cond_resched(); 1398 cond_resched();
1571 1399
1572 max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK; 1400 max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
@@ -1574,10 +1402,11 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1574 max_nl_cursor = CLUSTER_SIZE; 1402 max_nl_cursor = CLUSTER_SIZE;
1575 1403
1576 do { 1404 do {
1577 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 1405 list_for_each_entry(vma,
1578 shared.nonlinear) { 1406 &mapping->i_mmap_nonlinear, shared.nonlinear) {
1407
1579 cursor = (unsigned long) vma->vm_private_data; 1408 cursor = (unsigned long) vma->vm_private_data;
1580 while ( cursor < max_nl_cursor && 1409 while (cursor < max_nl_cursor &&
1581 cursor < vma->vm_end - vma->vm_start) { 1410 cursor < vma->vm_end - vma->vm_start) {
1582 if (try_to_unmap_cluster(cursor, &mapcount, 1411 if (try_to_unmap_cluster(cursor, &mapcount,
1583 vma, page) == SWAP_MLOCK) 1412 vma, page) == SWAP_MLOCK)
@@ -1585,7 +1414,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1585 cursor += CLUSTER_SIZE; 1414 cursor += CLUSTER_SIZE;
1586 vma->vm_private_data = (void *) cursor; 1415 vma->vm_private_data = (void *) cursor;
1587 if ((int)mapcount <= 0) 1416 if ((int)mapcount <= 0)
1588 goto out; 1417 return ret;
1589 } 1418 }
1590 vma->vm_private_data = (void *) max_nl_cursor; 1419 vma->vm_private_data = (void *) max_nl_cursor;
1591 } 1420 }
@@ -1600,11 +1429,34 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
1600 */ 1429 */
1601 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear) 1430 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
1602 vma->vm_private_data = NULL; 1431 vma->vm_private_data = NULL;
1603out: 1432
1604 mutex_unlock(&mapping->i_mmap_mutex);
1605 return ret; 1433 return ret;
1606} 1434}
1607 1435
1436bool is_vma_temporary_stack(struct vm_area_struct *vma)
1437{
1438 int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
1439
1440 if (!maybe_stack)
1441 return false;
1442
1443 if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
1444 VM_STACK_INCOMPLETE_SETUP)
1445 return true;
1446
1447 return false;
1448}
1449
1450static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
1451{
1452 return is_vma_temporary_stack(vma);
1453}
1454
1455static int page_not_mapped(struct page *page)
1456{
1457 return !page_mapped(page);
1458};
1459
1608/** 1460/**
1609 * try_to_unmap - try to remove all page table mappings to a page 1461 * try_to_unmap - try to remove all page table mappings to a page
1610 * @page: the page to get unmapped 1462 * @page: the page to get unmapped
@@ -1622,16 +1474,29 @@ out:
1622int try_to_unmap(struct page *page, enum ttu_flags flags) 1474int try_to_unmap(struct page *page, enum ttu_flags flags)
1623{ 1475{
1624 int ret; 1476 int ret;
1477 struct rmap_walk_control rwc = {
1478 .rmap_one = try_to_unmap_one,
1479 .arg = (void *)flags,
1480 .done = page_not_mapped,
1481 .file_nonlinear = try_to_unmap_nonlinear,
1482 .anon_lock = page_lock_anon_vma_read,
1483 };
1625 1484
1626 BUG_ON(!PageLocked(page)); 1485 VM_BUG_ON_PAGE(!PageHuge(page) && PageTransHuge(page), page);
1627 VM_BUG_ON(!PageHuge(page) && PageTransHuge(page)); 1486
1487 /*
1488 * During exec, a temporary VMA is setup and later moved.
1489 * The VMA is moved under the anon_vma lock but not the
1490 * page tables leading to a race where migration cannot
1491 * find the migration ptes. Rather than increasing the
1492 * locking requirements of exec(), migration skips
1493 * temporary VMAs until after exec() completes.
1494 */
1495 if (flags & TTU_MIGRATION && !PageKsm(page) && PageAnon(page))
1496 rwc.invalid_vma = invalid_migration_vma;
1497
1498 ret = rmap_walk(page, &rwc);
1628 1499
1629 if (unlikely(PageKsm(page)))
1630 ret = try_to_unmap_ksm(page, flags);
1631 else if (PageAnon(page))
1632 ret = try_to_unmap_anon(page, flags);
1633 else
1634 ret = try_to_unmap_file(page, flags);
1635 if (ret != SWAP_MLOCK && !page_mapped(page)) 1500 if (ret != SWAP_MLOCK && !page_mapped(page))
1636 ret = SWAP_SUCCESS; 1501 ret = SWAP_SUCCESS;
1637 return ret; 1502 return ret;
@@ -1654,14 +1519,25 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
1654 */ 1519 */
1655int try_to_munlock(struct page *page) 1520int try_to_munlock(struct page *page)
1656{ 1521{
1657 VM_BUG_ON(!PageLocked(page) || PageLRU(page)); 1522 int ret;
1523 struct rmap_walk_control rwc = {
1524 .rmap_one = try_to_unmap_one,
1525 .arg = (void *)TTU_MUNLOCK,
1526 .done = page_not_mapped,
1527 /*
1528 * We don't bother to try to find the munlocked page in
1529 * nonlinears. It's costly. Instead, later, page reclaim logic
1530 * may call try_to_unmap() and recover PG_mlocked lazily.
1531 */
1532 .file_nonlinear = NULL,
1533 .anon_lock = page_lock_anon_vma_read,
1658 1534
1659 if (unlikely(PageKsm(page))) 1535 };
1660 return try_to_unmap_ksm(page, TTU_MUNLOCK); 1536
1661 else if (PageAnon(page)) 1537 VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
1662 return try_to_unmap_anon(page, TTU_MUNLOCK); 1538
1663 else 1539 ret = rmap_walk(page, &rwc);
1664 return try_to_unmap_file(page, TTU_MUNLOCK); 1540 return ret;
1665} 1541}
1666 1542
1667void __put_anon_vma(struct anon_vma *anon_vma) 1543void __put_anon_vma(struct anon_vma *anon_vma)
@@ -1674,18 +1550,13 @@ void __put_anon_vma(struct anon_vma *anon_vma)
1674 anon_vma_free(anon_vma); 1550 anon_vma_free(anon_vma);
1675} 1551}
1676 1552
1677#ifdef CONFIG_MIGRATION 1553static struct anon_vma *rmap_walk_anon_lock(struct page *page,
1678/* 1554 struct rmap_walk_control *rwc)
1679 * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
1680 * Called by migrate.c to remove migration ptes, but might be used more later.
1681 */
1682static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1683 struct vm_area_struct *, unsigned long, void *), void *arg)
1684{ 1555{
1685 struct anon_vma *anon_vma; 1556 struct anon_vma *anon_vma;
1686 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 1557
1687 struct anon_vma_chain *avc; 1558 if (rwc->anon_lock)
1688 int ret = SWAP_AGAIN; 1559 return rwc->anon_lock(page);
1689 1560
1690 /* 1561 /*
1691 * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read() 1562 * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
@@ -1695,58 +1566,120 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1695 */ 1566 */
1696 anon_vma = page_anon_vma(page); 1567 anon_vma = page_anon_vma(page);
1697 if (!anon_vma) 1568 if (!anon_vma)
1698 return ret; 1569 return NULL;
1570
1699 anon_vma_lock_read(anon_vma); 1571 anon_vma_lock_read(anon_vma);
1572 return anon_vma;
1573}
1574
1575/*
1576 * rmap_walk_anon - do something to anonymous page using the object-based
1577 * rmap method
1578 * @page: the page to be handled
1579 * @rwc: control variable according to each walk type
1580 *
1581 * Find all the mappings of a page using the mapping pointer and the vma chains
1582 * contained in the anon_vma struct it points to.
1583 *
1584 * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
1585 * where the page was found will be held for write. So, we won't recheck
1586 * vm_flags for that VMA. That should be OK, because that vma shouldn't be
1587 * LOCKED.
1588 */
1589static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
1590{
1591 struct anon_vma *anon_vma;
1592 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
1593 struct anon_vma_chain *avc;
1594 int ret = SWAP_AGAIN;
1595
1596 anon_vma = rmap_walk_anon_lock(page, rwc);
1597 if (!anon_vma)
1598 return ret;
1599
1700 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { 1600 anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
1701 struct vm_area_struct *vma = avc->vma; 1601 struct vm_area_struct *vma = avc->vma;
1702 unsigned long address = vma_address(page, vma); 1602 unsigned long address = vma_address(page, vma);
1703 ret = rmap_one(page, vma, address, arg); 1603
1604 if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
1605 continue;
1606
1607 ret = rwc->rmap_one(page, vma, address, rwc->arg);
1704 if (ret != SWAP_AGAIN) 1608 if (ret != SWAP_AGAIN)
1705 break; 1609 break;
1610 if (rwc->done && rwc->done(page))
1611 break;
1706 } 1612 }
1707 anon_vma_unlock_read(anon_vma); 1613 anon_vma_unlock_read(anon_vma);
1708 return ret; 1614 return ret;
1709} 1615}
1710 1616
1711static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *, 1617/*
1712 struct vm_area_struct *, unsigned long, void *), void *arg) 1618 * rmap_walk_file - do something to file page using the object-based rmap method
1619 * @page: the page to be handled
1620 * @rwc: control variable according to each walk type
1621 *
1622 * Find all the mappings of a page using the mapping pointer and the vma chains
1623 * contained in the address_space struct it points to.
1624 *
1625 * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
1626 * where the page was found will be held for write. So, we won't recheck
1627 * vm_flags for that VMA. That should be OK, because that vma shouldn't be
1628 * LOCKED.
1629 */
1630static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
1713{ 1631{
1714 struct address_space *mapping = page->mapping; 1632 struct address_space *mapping = page->mapping;
1715 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 1633 pgoff_t pgoff = page->index << compound_order(page);
1716 struct vm_area_struct *vma; 1634 struct vm_area_struct *vma;
1717 int ret = SWAP_AGAIN; 1635 int ret = SWAP_AGAIN;
1718 1636
1637 /*
1638 * The page lock not only makes sure that page->mapping cannot
1639 * suddenly be NULLified by truncation, it makes sure that the
1640 * structure at mapping cannot be freed and reused yet,
1641 * so we can safely take mapping->i_mmap_mutex.
1642 */
1643 VM_BUG_ON(!PageLocked(page));
1644
1719 if (!mapping) 1645 if (!mapping)
1720 return ret; 1646 return ret;
1721 mutex_lock(&mapping->i_mmap_mutex); 1647 mutex_lock(&mapping->i_mmap_mutex);
1722 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { 1648 vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
1723 unsigned long address = vma_address(page, vma); 1649 unsigned long address = vma_address(page, vma);
1724 ret = rmap_one(page, vma, address, arg); 1650
1651 if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
1652 continue;
1653
1654 ret = rwc->rmap_one(page, vma, address, rwc->arg);
1725 if (ret != SWAP_AGAIN) 1655 if (ret != SWAP_AGAIN)
1726 break; 1656 goto done;
1657 if (rwc->done && rwc->done(page))
1658 goto done;
1727 } 1659 }
1728 /* 1660
1729 * No nonlinear handling: being always shared, nonlinear vmas 1661 if (!rwc->file_nonlinear)
1730 * never contain migration ptes. Decide what to do about this 1662 goto done;
1731 * limitation to linear when we need rmap_walk() on nonlinear. 1663
1732 */ 1664 if (list_empty(&mapping->i_mmap_nonlinear))
1665 goto done;
1666
1667 ret = rwc->file_nonlinear(page, mapping, rwc->arg);
1668
1669done:
1733 mutex_unlock(&mapping->i_mmap_mutex); 1670 mutex_unlock(&mapping->i_mmap_mutex);
1734 return ret; 1671 return ret;
1735} 1672}
1736 1673
1737int rmap_walk(struct page *page, int (*rmap_one)(struct page *, 1674int rmap_walk(struct page *page, struct rmap_walk_control *rwc)
1738 struct vm_area_struct *, unsigned long, void *), void *arg)
1739{ 1675{
1740 VM_BUG_ON(!PageLocked(page));
1741
1742 if (unlikely(PageKsm(page))) 1676 if (unlikely(PageKsm(page)))
1743 return rmap_walk_ksm(page, rmap_one, arg); 1677 return rmap_walk_ksm(page, rwc);
1744 else if (PageAnon(page)) 1678 else if (PageAnon(page))
1745 return rmap_walk_anon(page, rmap_one, arg); 1679 return rmap_walk_anon(page, rwc);
1746 else 1680 else
1747 return rmap_walk_file(page, rmap_one, arg); 1681 return rmap_walk_file(page, rwc);
1748} 1682}
1749#endif /* CONFIG_MIGRATION */
1750 1683
1751#ifdef CONFIG_HUGETLB_PAGE 1684#ifdef CONFIG_HUGETLB_PAGE
1752/* 1685/*