diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 580 |
1 files changed, 256 insertions, 324 deletions
@@ -660,17 +660,22 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) | |||
660 | return 1; | 660 | return 1; |
661 | } | 661 | } |
662 | 662 | ||
663 | struct page_referenced_arg { | ||
664 | int mapcount; | ||
665 | int referenced; | ||
666 | unsigned long vm_flags; | ||
667 | struct mem_cgroup *memcg; | ||
668 | }; | ||
663 | /* | 669 | /* |
664 | * Subfunctions of page_referenced: page_referenced_one called | 670 | * arg: page_referenced_arg will be passed |
665 | * repeatedly from either page_referenced_anon or page_referenced_file. | ||
666 | */ | 671 | */ |
667 | int page_referenced_one(struct page *page, struct vm_area_struct *vma, | 672 | int page_referenced_one(struct page *page, struct vm_area_struct *vma, |
668 | unsigned long address, unsigned int *mapcount, | 673 | unsigned long address, void *arg) |
669 | unsigned long *vm_flags) | ||
670 | { | 674 | { |
671 | struct mm_struct *mm = vma->vm_mm; | 675 | struct mm_struct *mm = vma->vm_mm; |
672 | spinlock_t *ptl; | 676 | spinlock_t *ptl; |
673 | int referenced = 0; | 677 | int referenced = 0; |
678 | struct page_referenced_arg *pra = arg; | ||
674 | 679 | ||
675 | if (unlikely(PageTransHuge(page))) { | 680 | if (unlikely(PageTransHuge(page))) { |
676 | pmd_t *pmd; | 681 | pmd_t *pmd; |
@@ -682,13 +687,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, | |||
682 | pmd = page_check_address_pmd(page, mm, address, | 687 | pmd = page_check_address_pmd(page, mm, address, |
683 | PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); | 688 | PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); |
684 | if (!pmd) | 689 | if (!pmd) |
685 | goto out; | 690 | return SWAP_AGAIN; |
686 | 691 | ||
687 | if (vma->vm_flags & VM_LOCKED) { | 692 | if (vma->vm_flags & VM_LOCKED) { |
688 | spin_unlock(ptl); | 693 | spin_unlock(ptl); |
689 | *mapcount = 0; /* break early from loop */ | 694 | pra->vm_flags |= VM_LOCKED; |
690 | *vm_flags |= VM_LOCKED; | 695 | return SWAP_FAIL; /* To break the loop */ |
691 | goto out; | ||
692 | } | 696 | } |
693 | 697 | ||
694 | /* go ahead even if the pmd is pmd_trans_splitting() */ | 698 | /* go ahead even if the pmd is pmd_trans_splitting() */ |
@@ -704,13 +708,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, | |||
704 | */ | 708 | */ |
705 | pte = page_check_address(page, mm, address, &ptl, 0); | 709 | pte = page_check_address(page, mm, address, &ptl, 0); |
706 | if (!pte) | 710 | if (!pte) |
707 | goto out; | 711 | return SWAP_AGAIN; |
708 | 712 | ||
709 | if (vma->vm_flags & VM_LOCKED) { | 713 | if (vma->vm_flags & VM_LOCKED) { |
710 | pte_unmap_unlock(pte, ptl); | 714 | pte_unmap_unlock(pte, ptl); |
711 | *mapcount = 0; /* break early from loop */ | 715 | pra->vm_flags |= VM_LOCKED; |
712 | *vm_flags |= VM_LOCKED; | 716 | return SWAP_FAIL; /* To break the loop */ |
713 | goto out; | ||
714 | } | 717 | } |
715 | 718 | ||
716 | if (ptep_clear_flush_young_notify(vma, address, pte)) { | 719 | if (ptep_clear_flush_young_notify(vma, address, pte)) { |
@@ -727,113 +730,27 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, | |||
727 | pte_unmap_unlock(pte, ptl); | 730 | pte_unmap_unlock(pte, ptl); |
728 | } | 731 | } |
729 | 732 | ||
730 | (*mapcount)--; | 733 | if (referenced) { |
731 | 734 | pra->referenced++; | |
732 | if (referenced) | 735 | pra->vm_flags |= vma->vm_flags; |
733 | *vm_flags |= vma->vm_flags; | ||
734 | out: | ||
735 | return referenced; | ||
736 | } | ||
737 | |||
738 | static int page_referenced_anon(struct page *page, | ||
739 | struct mem_cgroup *memcg, | ||
740 | unsigned long *vm_flags) | ||
741 | { | ||
742 | unsigned int mapcount; | ||
743 | struct anon_vma *anon_vma; | ||
744 | pgoff_t pgoff; | ||
745 | struct anon_vma_chain *avc; | ||
746 | int referenced = 0; | ||
747 | |||
748 | anon_vma = page_lock_anon_vma_read(page); | ||
749 | if (!anon_vma) | ||
750 | return referenced; | ||
751 | |||
752 | mapcount = page_mapcount(page); | ||
753 | pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
754 | anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { | ||
755 | struct vm_area_struct *vma = avc->vma; | ||
756 | unsigned long address = vma_address(page, vma); | ||
757 | /* | ||
758 | * If we are reclaiming on behalf of a cgroup, skip | ||
759 | * counting on behalf of references from different | ||
760 | * cgroups | ||
761 | */ | ||
762 | if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) | ||
763 | continue; | ||
764 | referenced += page_referenced_one(page, vma, address, | ||
765 | &mapcount, vm_flags); | ||
766 | if (!mapcount) | ||
767 | break; | ||
768 | } | 736 | } |
769 | 737 | ||
770 | page_unlock_anon_vma_read(anon_vma); | 738 | pra->mapcount--; |
771 | return referenced; | 739 | if (!pra->mapcount) |
740 | return SWAP_SUCCESS; /* To break the loop */ | ||
741 | |||
742 | return SWAP_AGAIN; | ||
772 | } | 743 | } |
773 | 744 | ||
774 | /** | 745 | static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg) |
775 | * page_referenced_file - referenced check for object-based rmap | ||
776 | * @page: the page we're checking references on. | ||
777 | * @memcg: target memory control group | ||
778 | * @vm_flags: collect encountered vma->vm_flags who actually referenced the page | ||
779 | * | ||
780 | * For an object-based mapped page, find all the places it is mapped and | ||
781 | * check/clear the referenced flag. This is done by following the page->mapping | ||
782 | * pointer, then walking the chain of vmas it holds. It returns the number | ||
783 | * of references it found. | ||
784 | * | ||
785 | * This function is only called from page_referenced for object-based pages. | ||
786 | */ | ||
787 | static int page_referenced_file(struct page *page, | ||
788 | struct mem_cgroup *memcg, | ||
789 | unsigned long *vm_flags) | ||
790 | { | 746 | { |
791 | unsigned int mapcount; | 747 | struct page_referenced_arg *pra = arg; |
792 | struct address_space *mapping = page->mapping; | 748 | struct mem_cgroup *memcg = pra->memcg; |
793 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
794 | struct vm_area_struct *vma; | ||
795 | int referenced = 0; | ||
796 | |||
797 | /* | ||
798 | * The caller's checks on page->mapping and !PageAnon have made | ||
799 | * sure that this is a file page: the check for page->mapping | ||
800 | * excludes the case just before it gets set on an anon page. | ||
801 | */ | ||
802 | BUG_ON(PageAnon(page)); | ||
803 | |||
804 | /* | ||
805 | * The page lock not only makes sure that page->mapping cannot | ||
806 | * suddenly be NULLified by truncation, it makes sure that the | ||
807 | * structure at mapping cannot be freed and reused yet, | ||
808 | * so we can safely take mapping->i_mmap_mutex. | ||
809 | */ | ||
810 | BUG_ON(!PageLocked(page)); | ||
811 | |||
812 | mutex_lock(&mapping->i_mmap_mutex); | ||
813 | 749 | ||
814 | /* | 750 | if (!mm_match_cgroup(vma->vm_mm, memcg)) |
815 | * i_mmap_mutex does not stabilize mapcount at all, but mapcount | 751 | return true; |
816 | * is more likely to be accurate if we note it after spinning. | ||
817 | */ | ||
818 | mapcount = page_mapcount(page); | ||
819 | |||
820 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { | ||
821 | unsigned long address = vma_address(page, vma); | ||
822 | /* | ||
823 | * If we are reclaiming on behalf of a cgroup, skip | ||
824 | * counting on behalf of references from different | ||
825 | * cgroups | ||
826 | */ | ||
827 | if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) | ||
828 | continue; | ||
829 | referenced += page_referenced_one(page, vma, address, | ||
830 | &mapcount, vm_flags); | ||
831 | if (!mapcount) | ||
832 | break; | ||
833 | } | ||
834 | 752 | ||
835 | mutex_unlock(&mapping->i_mmap_mutex); | 753 | return false; |
836 | return referenced; | ||
837 | } | 754 | } |
838 | 755 | ||
839 | /** | 756 | /** |
@@ -851,41 +768,57 @@ int page_referenced(struct page *page, | |||
851 | struct mem_cgroup *memcg, | 768 | struct mem_cgroup *memcg, |
852 | unsigned long *vm_flags) | 769 | unsigned long *vm_flags) |
853 | { | 770 | { |
854 | int referenced = 0; | 771 | int ret; |
855 | int we_locked = 0; | 772 | int we_locked = 0; |
773 | struct page_referenced_arg pra = { | ||
774 | .mapcount = page_mapcount(page), | ||
775 | .memcg = memcg, | ||
776 | }; | ||
777 | struct rmap_walk_control rwc = { | ||
778 | .rmap_one = page_referenced_one, | ||
779 | .arg = (void *)&pra, | ||
780 | .anon_lock = page_lock_anon_vma_read, | ||
781 | }; | ||
856 | 782 | ||
857 | *vm_flags = 0; | 783 | *vm_flags = 0; |
858 | if (page_mapped(page) && page_rmapping(page)) { | 784 | if (!page_mapped(page)) |
859 | if (!is_locked && (!PageAnon(page) || PageKsm(page))) { | 785 | return 0; |
860 | we_locked = trylock_page(page); | 786 | |
861 | if (!we_locked) { | 787 | if (!page_rmapping(page)) |
862 | referenced++; | 788 | return 0; |
863 | goto out; | 789 | |
864 | } | 790 | if (!is_locked && (!PageAnon(page) || PageKsm(page))) { |
865 | } | 791 | we_locked = trylock_page(page); |
866 | if (unlikely(PageKsm(page))) | 792 | if (!we_locked) |
867 | referenced += page_referenced_ksm(page, memcg, | 793 | return 1; |
868 | vm_flags); | ||
869 | else if (PageAnon(page)) | ||
870 | referenced += page_referenced_anon(page, memcg, | ||
871 | vm_flags); | ||
872 | else if (page->mapping) | ||
873 | referenced += page_referenced_file(page, memcg, | ||
874 | vm_flags); | ||
875 | if (we_locked) | ||
876 | unlock_page(page); | ||
877 | } | 794 | } |
878 | out: | 795 | |
879 | return referenced; | 796 | /* |
797 | * If we are reclaiming on behalf of a cgroup, skip | ||
798 | * counting on behalf of references from different | ||
799 | * cgroups | ||
800 | */ | ||
801 | if (memcg) { | ||
802 | rwc.invalid_vma = invalid_page_referenced_vma; | ||
803 | } | ||
804 | |||
805 | ret = rmap_walk(page, &rwc); | ||
806 | *vm_flags = pra.vm_flags; | ||
807 | |||
808 | if (we_locked) | ||
809 | unlock_page(page); | ||
810 | |||
811 | return pra.referenced; | ||
880 | } | 812 | } |
881 | 813 | ||
882 | static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, | 814 | static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, |
883 | unsigned long address) | 815 | unsigned long address, void *arg) |
884 | { | 816 | { |
885 | struct mm_struct *mm = vma->vm_mm; | 817 | struct mm_struct *mm = vma->vm_mm; |
886 | pte_t *pte; | 818 | pte_t *pte; |
887 | spinlock_t *ptl; | 819 | spinlock_t *ptl; |
888 | int ret = 0; | 820 | int ret = 0; |
821 | int *cleaned = arg; | ||
889 | 822 | ||
890 | pte = page_check_address(page, mm, address, &ptl, 1); | 823 | pte = page_check_address(page, mm, address, &ptl, 1); |
891 | if (!pte) | 824 | if (!pte) |
@@ -904,44 +837,44 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, | |||
904 | 837 | ||
905 | pte_unmap_unlock(pte, ptl); | 838 | pte_unmap_unlock(pte, ptl); |
906 | 839 | ||
907 | if (ret) | 840 | if (ret) { |
908 | mmu_notifier_invalidate_page(mm, address); | 841 | mmu_notifier_invalidate_page(mm, address); |
842 | (*cleaned)++; | ||
843 | } | ||
909 | out: | 844 | out: |
910 | return ret; | 845 | return SWAP_AGAIN; |
911 | } | 846 | } |
912 | 847 | ||
913 | static int page_mkclean_file(struct address_space *mapping, struct page *page) | 848 | static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg) |
914 | { | 849 | { |
915 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 850 | if (vma->vm_flags & VM_SHARED) |
916 | struct vm_area_struct *vma; | 851 | return 0; |
917 | int ret = 0; | ||
918 | |||
919 | BUG_ON(PageAnon(page)); | ||
920 | 852 | ||
921 | mutex_lock(&mapping->i_mmap_mutex); | 853 | return 1; |
922 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { | ||
923 | if (vma->vm_flags & VM_SHARED) { | ||
924 | unsigned long address = vma_address(page, vma); | ||
925 | ret += page_mkclean_one(page, vma, address); | ||
926 | } | ||
927 | } | ||
928 | mutex_unlock(&mapping->i_mmap_mutex); | ||
929 | return ret; | ||
930 | } | 854 | } |
931 | 855 | ||
932 | int page_mkclean(struct page *page) | 856 | int page_mkclean(struct page *page) |
933 | { | 857 | { |
934 | int ret = 0; | 858 | int cleaned = 0; |
859 | struct address_space *mapping; | ||
860 | struct rmap_walk_control rwc = { | ||
861 | .arg = (void *)&cleaned, | ||
862 | .rmap_one = page_mkclean_one, | ||
863 | .invalid_vma = invalid_mkclean_vma, | ||
864 | }; | ||
935 | 865 | ||
936 | BUG_ON(!PageLocked(page)); | 866 | BUG_ON(!PageLocked(page)); |
937 | 867 | ||
938 | if (page_mapped(page)) { | 868 | if (!page_mapped(page)) |
939 | struct address_space *mapping = page_mapping(page); | 869 | return 0; |
940 | if (mapping) | ||
941 | ret = page_mkclean_file(mapping, page); | ||
942 | } | ||
943 | 870 | ||
944 | return ret; | 871 | mapping = page_mapping(page); |
872 | if (!mapping) | ||
873 | return 0; | ||
874 | |||
875 | rmap_walk(page, &rwc); | ||
876 | |||
877 | return cleaned; | ||
945 | } | 878 | } |
946 | EXPORT_SYMBOL_GPL(page_mkclean); | 879 | EXPORT_SYMBOL_GPL(page_mkclean); |
947 | 880 | ||
@@ -1177,17 +1110,17 @@ out: | |||
1177 | } | 1110 | } |
1178 | 1111 | ||
1179 | /* | 1112 | /* |
1180 | * Subfunctions of try_to_unmap: try_to_unmap_one called | 1113 | * @arg: enum ttu_flags will be passed to this argument |
1181 | * repeatedly from try_to_unmap_ksm, try_to_unmap_anon or try_to_unmap_file. | ||
1182 | */ | 1114 | */ |
1183 | int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | 1115 | int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, |
1184 | unsigned long address, enum ttu_flags flags) | 1116 | unsigned long address, void *arg) |
1185 | { | 1117 | { |
1186 | struct mm_struct *mm = vma->vm_mm; | 1118 | struct mm_struct *mm = vma->vm_mm; |
1187 | pte_t *pte; | 1119 | pte_t *pte; |
1188 | pte_t pteval; | 1120 | pte_t pteval; |
1189 | spinlock_t *ptl; | 1121 | spinlock_t *ptl; |
1190 | int ret = SWAP_AGAIN; | 1122 | int ret = SWAP_AGAIN; |
1123 | enum ttu_flags flags = (enum ttu_flags)arg; | ||
1191 | 1124 | ||
1192 | pte = page_check_address(page, mm, address, &ptl, 0); | 1125 | pte = page_check_address(page, mm, address, &ptl, 0); |
1193 | if (!pte) | 1126 | if (!pte) |
@@ -1426,124 +1359,18 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, | |||
1426 | return ret; | 1359 | return ret; |
1427 | } | 1360 | } |
1428 | 1361 | ||
1429 | bool is_vma_temporary_stack(struct vm_area_struct *vma) | 1362 | static int try_to_unmap_nonlinear(struct page *page, |
1430 | { | 1363 | struct address_space *mapping, struct vm_area_struct *vma) |
1431 | int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); | ||
1432 | |||
1433 | if (!maybe_stack) | ||
1434 | return false; | ||
1435 | |||
1436 | if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) == | ||
1437 | VM_STACK_INCOMPLETE_SETUP) | ||
1438 | return true; | ||
1439 | |||
1440 | return false; | ||
1441 | } | ||
1442 | |||
1443 | /** | ||
1444 | * try_to_unmap_anon - unmap or unlock anonymous page using the object-based | ||
1445 | * rmap method | ||
1446 | * @page: the page to unmap/unlock | ||
1447 | * @flags: action and flags | ||
1448 | * | ||
1449 | * Find all the mappings of a page using the mapping pointer and the vma chains | ||
1450 | * contained in the anon_vma struct it points to. | ||
1451 | * | ||
1452 | * This function is only called from try_to_unmap/try_to_munlock for | ||
1453 | * anonymous pages. | ||
1454 | * When called from try_to_munlock(), the mmap_sem of the mm containing the vma | ||
1455 | * where the page was found will be held for write. So, we won't recheck | ||
1456 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | ||
1457 | * 'LOCKED. | ||
1458 | */ | ||
1459 | static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) | ||
1460 | { | ||
1461 | struct anon_vma *anon_vma; | ||
1462 | pgoff_t pgoff; | ||
1463 | struct anon_vma_chain *avc; | ||
1464 | int ret = SWAP_AGAIN; | ||
1465 | |||
1466 | anon_vma = page_lock_anon_vma_read(page); | ||
1467 | if (!anon_vma) | ||
1468 | return ret; | ||
1469 | |||
1470 | pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
1471 | anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { | ||
1472 | struct vm_area_struct *vma = avc->vma; | ||
1473 | unsigned long address; | ||
1474 | |||
1475 | /* | ||
1476 | * During exec, a temporary VMA is setup and later moved. | ||
1477 | * The VMA is moved under the anon_vma lock but not the | ||
1478 | * page tables leading to a race where migration cannot | ||
1479 | * find the migration ptes. Rather than increasing the | ||
1480 | * locking requirements of exec(), migration skips | ||
1481 | * temporary VMAs until after exec() completes. | ||
1482 | */ | ||
1483 | if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) && | ||
1484 | is_vma_temporary_stack(vma)) | ||
1485 | continue; | ||
1486 | |||
1487 | address = vma_address(page, vma); | ||
1488 | ret = try_to_unmap_one(page, vma, address, flags); | ||
1489 | if (ret != SWAP_AGAIN || !page_mapped(page)) | ||
1490 | break; | ||
1491 | } | ||
1492 | |||
1493 | page_unlock_anon_vma_read(anon_vma); | ||
1494 | return ret; | ||
1495 | } | ||
1496 | |||
1497 | /** | ||
1498 | * try_to_unmap_file - unmap/unlock file page using the object-based rmap method | ||
1499 | * @page: the page to unmap/unlock | ||
1500 | * @flags: action and flags | ||
1501 | * | ||
1502 | * Find all the mappings of a page using the mapping pointer and the vma chains | ||
1503 | * contained in the address_space struct it points to. | ||
1504 | * | ||
1505 | * This function is only called from try_to_unmap/try_to_munlock for | ||
1506 | * object-based pages. | ||
1507 | * When called from try_to_munlock(), the mmap_sem of the mm containing the vma | ||
1508 | * where the page was found will be held for write. So, we won't recheck | ||
1509 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | ||
1510 | * 'LOCKED. | ||
1511 | */ | ||
1512 | static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | ||
1513 | { | 1364 | { |
1514 | struct address_space *mapping = page->mapping; | ||
1515 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
1516 | struct vm_area_struct *vma; | ||
1517 | int ret = SWAP_AGAIN; | 1365 | int ret = SWAP_AGAIN; |
1518 | unsigned long cursor; | 1366 | unsigned long cursor; |
1519 | unsigned long max_nl_cursor = 0; | 1367 | unsigned long max_nl_cursor = 0; |
1520 | unsigned long max_nl_size = 0; | 1368 | unsigned long max_nl_size = 0; |
1521 | unsigned int mapcount; | 1369 | unsigned int mapcount; |
1522 | 1370 | ||
1523 | if (PageHuge(page)) | 1371 | list_for_each_entry(vma, |
1524 | pgoff = page->index << compound_order(page); | 1372 | &mapping->i_mmap_nonlinear, shared.nonlinear) { |
1525 | 1373 | ||
1526 | mutex_lock(&mapping->i_mmap_mutex); | ||
1527 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { | ||
1528 | unsigned long address = vma_address(page, vma); | ||
1529 | ret = try_to_unmap_one(page, vma, address, flags); | ||
1530 | if (ret != SWAP_AGAIN || !page_mapped(page)) | ||
1531 | goto out; | ||
1532 | } | ||
1533 | |||
1534 | if (list_empty(&mapping->i_mmap_nonlinear)) | ||
1535 | goto out; | ||
1536 | |||
1537 | /* | ||
1538 | * We don't bother to try to find the munlocked page in nonlinears. | ||
1539 | * It's costly. Instead, later, page reclaim logic may call | ||
1540 | * try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily. | ||
1541 | */ | ||
1542 | if (TTU_ACTION(flags) == TTU_MUNLOCK) | ||
1543 | goto out; | ||
1544 | |||
1545 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | ||
1546 | shared.nonlinear) { | ||
1547 | cursor = (unsigned long) vma->vm_private_data; | 1374 | cursor = (unsigned long) vma->vm_private_data; |
1548 | if (cursor > max_nl_cursor) | 1375 | if (cursor > max_nl_cursor) |
1549 | max_nl_cursor = cursor; | 1376 | max_nl_cursor = cursor; |
@@ -1553,8 +1380,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | |||
1553 | } | 1380 | } |
1554 | 1381 | ||
1555 | if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */ | 1382 | if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */ |
1556 | ret = SWAP_FAIL; | 1383 | return SWAP_FAIL; |
1557 | goto out; | ||
1558 | } | 1384 | } |
1559 | 1385 | ||
1560 | /* | 1386 | /* |
@@ -1566,7 +1392,8 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | |||
1566 | */ | 1392 | */ |
1567 | mapcount = page_mapcount(page); | 1393 | mapcount = page_mapcount(page); |
1568 | if (!mapcount) | 1394 | if (!mapcount) |
1569 | goto out; | 1395 | return ret; |
1396 | |||
1570 | cond_resched(); | 1397 | cond_resched(); |
1571 | 1398 | ||
1572 | max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK; | 1399 | max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK; |
@@ -1574,10 +1401,11 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | |||
1574 | max_nl_cursor = CLUSTER_SIZE; | 1401 | max_nl_cursor = CLUSTER_SIZE; |
1575 | 1402 | ||
1576 | do { | 1403 | do { |
1577 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 1404 | list_for_each_entry(vma, |
1578 | shared.nonlinear) { | 1405 | &mapping->i_mmap_nonlinear, shared.nonlinear) { |
1406 | |||
1579 | cursor = (unsigned long) vma->vm_private_data; | 1407 | cursor = (unsigned long) vma->vm_private_data; |
1580 | while ( cursor < max_nl_cursor && | 1408 | while (cursor < max_nl_cursor && |
1581 | cursor < vma->vm_end - vma->vm_start) { | 1409 | cursor < vma->vm_end - vma->vm_start) { |
1582 | if (try_to_unmap_cluster(cursor, &mapcount, | 1410 | if (try_to_unmap_cluster(cursor, &mapcount, |
1583 | vma, page) == SWAP_MLOCK) | 1411 | vma, page) == SWAP_MLOCK) |
@@ -1585,7 +1413,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | |||
1585 | cursor += CLUSTER_SIZE; | 1413 | cursor += CLUSTER_SIZE; |
1586 | vma->vm_private_data = (void *) cursor; | 1414 | vma->vm_private_data = (void *) cursor; |
1587 | if ((int)mapcount <= 0) | 1415 | if ((int)mapcount <= 0) |
1588 | goto out; | 1416 | return ret; |
1589 | } | 1417 | } |
1590 | vma->vm_private_data = (void *) max_nl_cursor; | 1418 | vma->vm_private_data = (void *) max_nl_cursor; |
1591 | } | 1419 | } |
@@ -1600,11 +1428,34 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | |||
1600 | */ | 1428 | */ |
1601 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear) | 1429 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear) |
1602 | vma->vm_private_data = NULL; | 1430 | vma->vm_private_data = NULL; |
1603 | out: | 1431 | |
1604 | mutex_unlock(&mapping->i_mmap_mutex); | ||
1605 | return ret; | 1432 | return ret; |
1606 | } | 1433 | } |
1607 | 1434 | ||
1435 | bool is_vma_temporary_stack(struct vm_area_struct *vma) | ||
1436 | { | ||
1437 | int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); | ||
1438 | |||
1439 | if (!maybe_stack) | ||
1440 | return false; | ||
1441 | |||
1442 | if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) == | ||
1443 | VM_STACK_INCOMPLETE_SETUP) | ||
1444 | return true; | ||
1445 | |||
1446 | return false; | ||
1447 | } | ||
1448 | |||
1449 | static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg) | ||
1450 | { | ||
1451 | return is_vma_temporary_stack(vma); | ||
1452 | } | ||
1453 | |||
1454 | static int page_not_mapped(struct page *page) | ||
1455 | { | ||
1456 | return !page_mapped(page); | ||
1457 | }; | ||
1458 | |||
1608 | /** | 1459 | /** |
1609 | * try_to_unmap - try to remove all page table mappings to a page | 1460 | * try_to_unmap - try to remove all page table mappings to a page |
1610 | * @page: the page to get unmapped | 1461 | * @page: the page to get unmapped |
@@ -1622,16 +1473,29 @@ out: | |||
1622 | int try_to_unmap(struct page *page, enum ttu_flags flags) | 1473 | int try_to_unmap(struct page *page, enum ttu_flags flags) |
1623 | { | 1474 | { |
1624 | int ret; | 1475 | int ret; |
1476 | struct rmap_walk_control rwc = { | ||
1477 | .rmap_one = try_to_unmap_one, | ||
1478 | .arg = (void *)flags, | ||
1479 | .done = page_not_mapped, | ||
1480 | .file_nonlinear = try_to_unmap_nonlinear, | ||
1481 | .anon_lock = page_lock_anon_vma_read, | ||
1482 | }; | ||
1625 | 1483 | ||
1626 | BUG_ON(!PageLocked(page)); | ||
1627 | VM_BUG_ON(!PageHuge(page) && PageTransHuge(page)); | 1484 | VM_BUG_ON(!PageHuge(page) && PageTransHuge(page)); |
1628 | 1485 | ||
1629 | if (unlikely(PageKsm(page))) | 1486 | /* |
1630 | ret = try_to_unmap_ksm(page, flags); | 1487 | * During exec, a temporary VMA is setup and later moved. |
1631 | else if (PageAnon(page)) | 1488 | * The VMA is moved under the anon_vma lock but not the |
1632 | ret = try_to_unmap_anon(page, flags); | 1489 | * page tables leading to a race where migration cannot |
1633 | else | 1490 | * find the migration ptes. Rather than increasing the |
1634 | ret = try_to_unmap_file(page, flags); | 1491 | * locking requirements of exec(), migration skips |
1492 | * temporary VMAs until after exec() completes. | ||
1493 | */ | ||
1494 | if (flags & TTU_MIGRATION && !PageKsm(page) && PageAnon(page)) | ||
1495 | rwc.invalid_vma = invalid_migration_vma; | ||
1496 | |||
1497 | ret = rmap_walk(page, &rwc); | ||
1498 | |||
1635 | if (ret != SWAP_MLOCK && !page_mapped(page)) | 1499 | if (ret != SWAP_MLOCK && !page_mapped(page)) |
1636 | ret = SWAP_SUCCESS; | 1500 | ret = SWAP_SUCCESS; |
1637 | return ret; | 1501 | return ret; |
@@ -1654,14 +1518,25 @@ int try_to_unmap(struct page *page, enum ttu_flags flags) | |||
1654 | */ | 1518 | */ |
1655 | int try_to_munlock(struct page *page) | 1519 | int try_to_munlock(struct page *page) |
1656 | { | 1520 | { |
1521 | int ret; | ||
1522 | struct rmap_walk_control rwc = { | ||
1523 | .rmap_one = try_to_unmap_one, | ||
1524 | .arg = (void *)TTU_MUNLOCK, | ||
1525 | .done = page_not_mapped, | ||
1526 | /* | ||
1527 | * We don't bother to try to find the munlocked page in | ||
1528 | * nonlinears. It's costly. Instead, later, page reclaim logic | ||
1529 | * may call try_to_unmap() and recover PG_mlocked lazily. | ||
1530 | */ | ||
1531 | .file_nonlinear = NULL, | ||
1532 | .anon_lock = page_lock_anon_vma_read, | ||
1533 | |||
1534 | }; | ||
1535 | |||
1657 | VM_BUG_ON(!PageLocked(page) || PageLRU(page)); | 1536 | VM_BUG_ON(!PageLocked(page) || PageLRU(page)); |
1658 | 1537 | ||
1659 | if (unlikely(PageKsm(page))) | 1538 | ret = rmap_walk(page, &rwc); |
1660 | return try_to_unmap_ksm(page, TTU_MUNLOCK); | 1539 | return ret; |
1661 | else if (PageAnon(page)) | ||
1662 | return try_to_unmap_anon(page, TTU_MUNLOCK); | ||
1663 | else | ||
1664 | return try_to_unmap_file(page, TTU_MUNLOCK); | ||
1665 | } | 1540 | } |
1666 | 1541 | ||
1667 | void __put_anon_vma(struct anon_vma *anon_vma) | 1542 | void __put_anon_vma(struct anon_vma *anon_vma) |
@@ -1674,18 +1549,13 @@ void __put_anon_vma(struct anon_vma *anon_vma) | |||
1674 | anon_vma_free(anon_vma); | 1549 | anon_vma_free(anon_vma); |
1675 | } | 1550 | } |
1676 | 1551 | ||
1677 | #ifdef CONFIG_MIGRATION | 1552 | static struct anon_vma *rmap_walk_anon_lock(struct page *page, |
1678 | /* | 1553 | struct rmap_walk_control *rwc) |
1679 | * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file(): | ||
1680 | * Called by migrate.c to remove migration ptes, but might be used more later. | ||
1681 | */ | ||
1682 | static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, | ||
1683 | struct vm_area_struct *, unsigned long, void *), void *arg) | ||
1684 | { | 1554 | { |
1685 | struct anon_vma *anon_vma; | 1555 | struct anon_vma *anon_vma; |
1686 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 1556 | |
1687 | struct anon_vma_chain *avc; | 1557 | if (rwc->anon_lock) |
1688 | int ret = SWAP_AGAIN; | 1558 | return rwc->anon_lock(page); |
1689 | 1559 | ||
1690 | /* | 1560 | /* |
1691 | * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read() | 1561 | * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read() |
@@ -1695,58 +1565,120 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, | |||
1695 | */ | 1565 | */ |
1696 | anon_vma = page_anon_vma(page); | 1566 | anon_vma = page_anon_vma(page); |
1697 | if (!anon_vma) | 1567 | if (!anon_vma) |
1698 | return ret; | 1568 | return NULL; |
1569 | |||
1699 | anon_vma_lock_read(anon_vma); | 1570 | anon_vma_lock_read(anon_vma); |
1571 | return anon_vma; | ||
1572 | } | ||
1573 | |||
1574 | /* | ||
1575 | * rmap_walk_anon - do something to anonymous page using the object-based | ||
1576 | * rmap method | ||
1577 | * @page: the page to be handled | ||
1578 | * @rwc: control variable according to each walk type | ||
1579 | * | ||
1580 | * Find all the mappings of a page using the mapping pointer and the vma chains | ||
1581 | * contained in the anon_vma struct it points to. | ||
1582 | * | ||
1583 | * When called from try_to_munlock(), the mmap_sem of the mm containing the vma | ||
1584 | * where the page was found will be held for write. So, we won't recheck | ||
1585 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | ||
1586 | * LOCKED. | ||
1587 | */ | ||
1588 | static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc) | ||
1589 | { | ||
1590 | struct anon_vma *anon_vma; | ||
1591 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
1592 | struct anon_vma_chain *avc; | ||
1593 | int ret = SWAP_AGAIN; | ||
1594 | |||
1595 | anon_vma = rmap_walk_anon_lock(page, rwc); | ||
1596 | if (!anon_vma) | ||
1597 | return ret; | ||
1598 | |||
1700 | anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { | 1599 | anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { |
1701 | struct vm_area_struct *vma = avc->vma; | 1600 | struct vm_area_struct *vma = avc->vma; |
1702 | unsigned long address = vma_address(page, vma); | 1601 | unsigned long address = vma_address(page, vma); |
1703 | ret = rmap_one(page, vma, address, arg); | 1602 | |
1603 | if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) | ||
1604 | continue; | ||
1605 | |||
1606 | ret = rwc->rmap_one(page, vma, address, rwc->arg); | ||
1704 | if (ret != SWAP_AGAIN) | 1607 | if (ret != SWAP_AGAIN) |
1705 | break; | 1608 | break; |
1609 | if (rwc->done && rwc->done(page)) | ||
1610 | break; | ||
1706 | } | 1611 | } |
1707 | anon_vma_unlock_read(anon_vma); | 1612 | anon_vma_unlock_read(anon_vma); |
1708 | return ret; | 1613 | return ret; |
1709 | } | 1614 | } |
1710 | 1615 | ||
1711 | static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *, | 1616 | /* |
1712 | struct vm_area_struct *, unsigned long, void *), void *arg) | 1617 | * rmap_walk_file - do something to file page using the object-based rmap method |
1618 | * @page: the page to be handled | ||
1619 | * @rwc: control variable according to each walk type | ||
1620 | * | ||
1621 | * Find all the mappings of a page using the mapping pointer and the vma chains | ||
1622 | * contained in the address_space struct it points to. | ||
1623 | * | ||
1624 | * When called from try_to_munlock(), the mmap_sem of the mm containing the vma | ||
1625 | * where the page was found will be held for write. So, we won't recheck | ||
1626 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | ||
1627 | * LOCKED. | ||
1628 | */ | ||
1629 | static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) | ||
1713 | { | 1630 | { |
1714 | struct address_space *mapping = page->mapping; | 1631 | struct address_space *mapping = page->mapping; |
1715 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 1632 | pgoff_t pgoff = page->index << compound_order(page); |
1716 | struct vm_area_struct *vma; | 1633 | struct vm_area_struct *vma; |
1717 | int ret = SWAP_AGAIN; | 1634 | int ret = SWAP_AGAIN; |
1718 | 1635 | ||
1636 | /* | ||
1637 | * The page lock not only makes sure that page->mapping cannot | ||
1638 | * suddenly be NULLified by truncation, it makes sure that the | ||
1639 | * structure at mapping cannot be freed and reused yet, | ||
1640 | * so we can safely take mapping->i_mmap_mutex. | ||
1641 | */ | ||
1642 | VM_BUG_ON(!PageLocked(page)); | ||
1643 | |||
1719 | if (!mapping) | 1644 | if (!mapping) |
1720 | return ret; | 1645 | return ret; |
1721 | mutex_lock(&mapping->i_mmap_mutex); | 1646 | mutex_lock(&mapping->i_mmap_mutex); |
1722 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { | 1647 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { |
1723 | unsigned long address = vma_address(page, vma); | 1648 | unsigned long address = vma_address(page, vma); |
1724 | ret = rmap_one(page, vma, address, arg); | 1649 | |
1650 | if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) | ||
1651 | continue; | ||
1652 | |||
1653 | ret = rwc->rmap_one(page, vma, address, rwc->arg); | ||
1725 | if (ret != SWAP_AGAIN) | 1654 | if (ret != SWAP_AGAIN) |
1726 | break; | 1655 | goto done; |
1656 | if (rwc->done && rwc->done(page)) | ||
1657 | goto done; | ||
1727 | } | 1658 | } |
1728 | /* | 1659 | |
1729 | * No nonlinear handling: being always shared, nonlinear vmas | 1660 | if (!rwc->file_nonlinear) |
1730 | * never contain migration ptes. Decide what to do about this | 1661 | goto done; |
1731 | * limitation to linear when we need rmap_walk() on nonlinear. | 1662 | |
1732 | */ | 1663 | if (list_empty(&mapping->i_mmap_nonlinear)) |
1664 | goto done; | ||
1665 | |||
1666 | ret = rwc->file_nonlinear(page, mapping, vma); | ||
1667 | |||
1668 | done: | ||
1733 | mutex_unlock(&mapping->i_mmap_mutex); | 1669 | mutex_unlock(&mapping->i_mmap_mutex); |
1734 | return ret; | 1670 | return ret; |
1735 | } | 1671 | } |
1736 | 1672 | ||
1737 | int rmap_walk(struct page *page, int (*rmap_one)(struct page *, | 1673 | int rmap_walk(struct page *page, struct rmap_walk_control *rwc) |
1738 | struct vm_area_struct *, unsigned long, void *), void *arg) | ||
1739 | { | 1674 | { |
1740 | VM_BUG_ON(!PageLocked(page)); | ||
1741 | |||
1742 | if (unlikely(PageKsm(page))) | 1675 | if (unlikely(PageKsm(page))) |
1743 | return rmap_walk_ksm(page, rmap_one, arg); | 1676 | return rmap_walk_ksm(page, rwc); |
1744 | else if (PageAnon(page)) | 1677 | else if (PageAnon(page)) |
1745 | return rmap_walk_anon(page, rmap_one, arg); | 1678 | return rmap_walk_anon(page, rwc); |
1746 | else | 1679 | else |
1747 | return rmap_walk_file(page, rmap_one, arg); | 1680 | return rmap_walk_file(page, rwc); |
1748 | } | 1681 | } |
1749 | #endif /* CONFIG_MIGRATION */ | ||
1750 | 1682 | ||
1751 | #ifdef CONFIG_HUGETLB_PAGE | 1683 | #ifdef CONFIG_HUGETLB_PAGE |
1752 | /* | 1684 | /* |