diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2016-01-15 19:54:37 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-15 20:56:32 -0500 |
commit | b20ce5e03b936be077463015661dcf52be274e5b (patch) | |
tree | 1004a054c0263a471ef79a7cd84fea904a71b655 | |
parent | e90309c9f7722db4ff5bce3b9e6e04d1460f2553 (diff) |
mm: prepare page_referenced() and page_idle to new THP refcounting
Both page_referenced() and page_idle_clear_pte_refs_one() assume that
THP can only be mapped with PMD, so there's no reason to look on PTEs
for PageTransHuge() pages. That's no true anymore: THP can be mapped
with PTEs too.
The patch removes PageTransHuge() test from the functions and opencode
page table check.
[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/huge_mm.h | 5 | ||||
-rw-r--r-- | include/linux/mm.h | 23 | ||||
-rw-r--r-- | mm/huge_memory.c | 73 | ||||
-rw-r--r-- | mm/page_idle.c | 65 | ||||
-rw-r--r-- | mm/rmap.c | 117 | ||||
-rw-r--r-- | mm/util.c | 14 |
6 files changed, 185 insertions, 112 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7aec5ee9cfdf..72cd942edb22 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
@@ -48,11 +48,6 @@ enum transparent_hugepage_flag { | |||
48 | #endif | 48 | #endif |
49 | }; | 49 | }; |
50 | 50 | ||
51 | extern pmd_t *page_check_address_pmd(struct page *page, | ||
52 | struct mm_struct *mm, | ||
53 | unsigned long address, | ||
54 | spinlock_t **ptl); | ||
55 | |||
56 | #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) | 51 | #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) |
57 | #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) | 52 | #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) |
58 | 53 | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index aa8ae8330a75..0ef5f21735af 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -433,20 +433,25 @@ static inline void page_mapcount_reset(struct page *page) | |||
433 | atomic_set(&(page)->_mapcount, -1); | 433 | atomic_set(&(page)->_mapcount, -1); |
434 | } | 434 | } |
435 | 435 | ||
436 | int __page_mapcount(struct page *page); | ||
437 | |||
436 | static inline int page_mapcount(struct page *page) | 438 | static inline int page_mapcount(struct page *page) |
437 | { | 439 | { |
438 | int ret; | ||
439 | VM_BUG_ON_PAGE(PageSlab(page), page); | 440 | VM_BUG_ON_PAGE(PageSlab(page), page); |
440 | 441 | ||
441 | ret = atomic_read(&page->_mapcount) + 1; | 442 | if (unlikely(PageCompound(page))) |
442 | if (PageCompound(page)) { | 443 | return __page_mapcount(page); |
443 | page = compound_head(page); | 444 | return atomic_read(&page->_mapcount) + 1; |
444 | ret += atomic_read(compound_mapcount_ptr(page)) + 1; | 445 | } |
445 | if (PageDoubleMap(page)) | 446 | |
446 | ret--; | 447 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
447 | } | 448 | int total_mapcount(struct page *page); |
448 | return ret; | 449 | #else |
450 | static inline int total_mapcount(struct page *page) | ||
451 | { | ||
452 | return page_mapcount(page); | ||
449 | } | 453 | } |
454 | #endif | ||
450 | 455 | ||
451 | static inline int page_count(struct page *page) | 456 | static inline int page_count(struct page *page) |
452 | { | 457 | { |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f283cb7c480e..ab544b145b52 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1649,46 +1649,6 @@ bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, | |||
1649 | return false; | 1649 | return false; |
1650 | } | 1650 | } |
1651 | 1651 | ||
1652 | /* | ||
1653 | * This function returns whether a given @page is mapped onto the @address | ||
1654 | * in the virtual space of @mm. | ||
1655 | * | ||
1656 | * When it's true, this function returns *pmd with holding the page table lock | ||
1657 | * and passing it back to the caller via @ptl. | ||
1658 | * If it's false, returns NULL without holding the page table lock. | ||
1659 | */ | ||
1660 | pmd_t *page_check_address_pmd(struct page *page, | ||
1661 | struct mm_struct *mm, | ||
1662 | unsigned long address, | ||
1663 | spinlock_t **ptl) | ||
1664 | { | ||
1665 | pgd_t *pgd; | ||
1666 | pud_t *pud; | ||
1667 | pmd_t *pmd; | ||
1668 | |||
1669 | if (address & ~HPAGE_PMD_MASK) | ||
1670 | return NULL; | ||
1671 | |||
1672 | pgd = pgd_offset(mm, address); | ||
1673 | if (!pgd_present(*pgd)) | ||
1674 | return NULL; | ||
1675 | pud = pud_offset(pgd, address); | ||
1676 | if (!pud_present(*pud)) | ||
1677 | return NULL; | ||
1678 | pmd = pmd_offset(pud, address); | ||
1679 | |||
1680 | *ptl = pmd_lock(mm, pmd); | ||
1681 | if (!pmd_present(*pmd)) | ||
1682 | goto unlock; | ||
1683 | if (pmd_page(*pmd) != page) | ||
1684 | goto unlock; | ||
1685 | if (pmd_trans_huge(*pmd)) | ||
1686 | return pmd; | ||
1687 | unlock: | ||
1688 | spin_unlock(*ptl); | ||
1689 | return NULL; | ||
1690 | } | ||
1691 | |||
1692 | #define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE) | 1652 | #define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE) |
1693 | 1653 | ||
1694 | int hugepage_madvise(struct vm_area_struct *vma, | 1654 | int hugepage_madvise(struct vm_area_struct *vma, |
@@ -3097,20 +3057,6 @@ static void unfreeze_page(struct anon_vma *anon_vma, struct page *page) | |||
3097 | } | 3057 | } |
3098 | } | 3058 | } |
3099 | 3059 | ||
3100 | static int total_mapcount(struct page *page) | ||
3101 | { | ||
3102 | int i, ret; | ||
3103 | |||
3104 | ret = compound_mapcount(page); | ||
3105 | for (i = 0; i < HPAGE_PMD_NR; i++) | ||
3106 | ret += atomic_read(&page[i]._mapcount) + 1; | ||
3107 | |||
3108 | if (PageDoubleMap(page)) | ||
3109 | ret -= HPAGE_PMD_NR; | ||
3110 | |||
3111 | return ret; | ||
3112 | } | ||
3113 | |||
3114 | static int __split_huge_page_tail(struct page *head, int tail, | 3060 | static int __split_huge_page_tail(struct page *head, int tail, |
3115 | struct lruvec *lruvec, struct list_head *list) | 3061 | struct lruvec *lruvec, struct list_head *list) |
3116 | { | 3062 | { |
@@ -3211,6 +3157,25 @@ static void __split_huge_page(struct page *page, struct list_head *list) | |||
3211 | } | 3157 | } |
3212 | } | 3158 | } |
3213 | 3159 | ||
3160 | int total_mapcount(struct page *page) | ||
3161 | { | ||
3162 | int i, ret; | ||
3163 | |||
3164 | VM_BUG_ON_PAGE(PageTail(page), page); | ||
3165 | |||
3166 | if (likely(!PageCompound(page))) | ||
3167 | return atomic_read(&page->_mapcount) + 1; | ||
3168 | |||
3169 | ret = compound_mapcount(page); | ||
3170 | if (PageHuge(page)) | ||
3171 | return ret; | ||
3172 | for (i = 0; i < HPAGE_PMD_NR; i++) | ||
3173 | ret += atomic_read(&page[i]._mapcount) + 1; | ||
3174 | if (PageDoubleMap(page)) | ||
3175 | ret -= HPAGE_PMD_NR; | ||
3176 | return ret; | ||
3177 | } | ||
3178 | |||
3214 | /* | 3179 | /* |
3215 | * This function splits huge page into normal pages. @page can point to any | 3180 | * This function splits huge page into normal pages. @page can point to any |
3216 | * subpage of huge page to split. Split doesn't change the position of @page. | 3181 | * subpage of huge page to split. Split doesn't change the position of @page. |
diff --git a/mm/page_idle.c b/mm/page_idle.c index 1c245d9027e3..2c553ba969f8 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c | |||
@@ -56,23 +56,70 @@ static int page_idle_clear_pte_refs_one(struct page *page, | |||
56 | { | 56 | { |
57 | struct mm_struct *mm = vma->vm_mm; | 57 | struct mm_struct *mm = vma->vm_mm; |
58 | spinlock_t *ptl; | 58 | spinlock_t *ptl; |
59 | pgd_t *pgd; | ||
60 | pud_t *pud; | ||
59 | pmd_t *pmd; | 61 | pmd_t *pmd; |
60 | pte_t *pte; | 62 | pte_t *pte; |
61 | bool referenced = false; | 63 | bool referenced = false; |
62 | 64 | ||
63 | if (unlikely(PageTransHuge(page))) { | 65 | pgd = pgd_offset(mm, addr); |
64 | pmd = page_check_address_pmd(page, mm, addr, &ptl); | 66 | if (!pgd_present(*pgd)) |
65 | if (pmd) { | 67 | return SWAP_AGAIN; |
66 | referenced = pmdp_clear_young_notify(vma, addr, pmd); | 68 | pud = pud_offset(pgd, addr); |
69 | if (!pud_present(*pud)) | ||
70 | return SWAP_AGAIN; | ||
71 | pmd = pmd_offset(pud, addr); | ||
72 | |||
73 | if (pmd_trans_huge(*pmd)) { | ||
74 | ptl = pmd_lock(mm, pmd); | ||
75 | if (!pmd_present(*pmd)) | ||
76 | goto unlock_pmd; | ||
77 | if (unlikely(!pmd_trans_huge(*pmd))) { | ||
67 | spin_unlock(ptl); | 78 | spin_unlock(ptl); |
79 | goto map_pte; | ||
68 | } | 80 | } |
81 | |||
82 | if (pmd_page(*pmd) != page) | ||
83 | goto unlock_pmd; | ||
84 | |||
85 | referenced = pmdp_clear_young_notify(vma, addr, pmd); | ||
86 | spin_unlock(ptl); | ||
87 | goto found; | ||
88 | unlock_pmd: | ||
89 | spin_unlock(ptl); | ||
90 | return SWAP_AGAIN; | ||
69 | } else { | 91 | } else { |
70 | pte = page_check_address(page, mm, addr, &ptl, 0); | 92 | pmd_t pmde = *pmd; |
71 | if (pte) { | 93 | |
72 | referenced = ptep_clear_young_notify(vma, addr, pte); | 94 | barrier(); |
73 | pte_unmap_unlock(pte, ptl); | 95 | if (!pmd_present(pmde) || pmd_trans_huge(pmde)) |
74 | } | 96 | return SWAP_AGAIN; |
97 | |||
98 | } | ||
99 | map_pte: | ||
100 | pte = pte_offset_map(pmd, addr); | ||
101 | if (!pte_present(*pte)) { | ||
102 | pte_unmap(pte); | ||
103 | return SWAP_AGAIN; | ||
75 | } | 104 | } |
105 | |||
106 | ptl = pte_lockptr(mm, pmd); | ||
107 | spin_lock(ptl); | ||
108 | |||
109 | if (!pte_present(*pte)) { | ||
110 | pte_unmap_unlock(pte, ptl); | ||
111 | return SWAP_AGAIN; | ||
112 | } | ||
113 | |||
114 | /* THP can be referenced by any subpage */ | ||
115 | if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) { | ||
116 | pte_unmap_unlock(pte, ptl); | ||
117 | return SWAP_AGAIN; | ||
118 | } | ||
119 | |||
120 | referenced = ptep_clear_young_notify(vma, addr, pte); | ||
121 | pte_unmap_unlock(pte, ptl); | ||
122 | found: | ||
76 | if (referenced) { | 123 | if (referenced) { |
77 | clear_page_idle(page); | 124 | clear_page_idle(page); |
78 | /* | 125 | /* |
@@ -814,58 +814,105 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, | |||
814 | spinlock_t *ptl; | 814 | spinlock_t *ptl; |
815 | int referenced = 0; | 815 | int referenced = 0; |
816 | struct page_referenced_arg *pra = arg; | 816 | struct page_referenced_arg *pra = arg; |
817 | pgd_t *pgd; | ||
818 | pud_t *pud; | ||
819 | pmd_t *pmd; | ||
820 | pte_t *pte; | ||
817 | 821 | ||
818 | if (unlikely(PageTransHuge(page))) { | 822 | if (unlikely(PageHuge(page))) { |
819 | pmd_t *pmd; | 823 | /* when pud is not present, pte will be NULL */ |
820 | 824 | pte = huge_pte_offset(mm, address); | |
821 | /* | 825 | if (!pte) |
822 | * rmap might return false positives; we must filter | ||
823 | * these out using page_check_address_pmd(). | ||
824 | */ | ||
825 | pmd = page_check_address_pmd(page, mm, address, &ptl); | ||
826 | if (!pmd) | ||
827 | return SWAP_AGAIN; | 826 | return SWAP_AGAIN; |
828 | 827 | ||
829 | if (vma->vm_flags & VM_LOCKED) { | 828 | ptl = huge_pte_lockptr(page_hstate(page), mm, pte); |
829 | goto check_pte; | ||
830 | } | ||
831 | |||
832 | pgd = pgd_offset(mm, address); | ||
833 | if (!pgd_present(*pgd)) | ||
834 | return SWAP_AGAIN; | ||
835 | pud = pud_offset(pgd, address); | ||
836 | if (!pud_present(*pud)) | ||
837 | return SWAP_AGAIN; | ||
838 | pmd = pmd_offset(pud, address); | ||
839 | |||
840 | if (pmd_trans_huge(*pmd)) { | ||
841 | int ret = SWAP_AGAIN; | ||
842 | |||
843 | ptl = pmd_lock(mm, pmd); | ||
844 | if (!pmd_present(*pmd)) | ||
845 | goto unlock_pmd; | ||
846 | if (unlikely(!pmd_trans_huge(*pmd))) { | ||
830 | spin_unlock(ptl); | 847 | spin_unlock(ptl); |
848 | goto map_pte; | ||
849 | } | ||
850 | |||
851 | if (pmd_page(*pmd) != page) | ||
852 | goto unlock_pmd; | ||
853 | |||
854 | if (vma->vm_flags & VM_LOCKED) { | ||
831 | pra->vm_flags |= VM_LOCKED; | 855 | pra->vm_flags |= VM_LOCKED; |
832 | return SWAP_FAIL; /* To break the loop */ | 856 | ret = SWAP_FAIL; /* To break the loop */ |
857 | goto unlock_pmd; | ||
833 | } | 858 | } |
834 | 859 | ||
835 | if (pmdp_clear_flush_young_notify(vma, address, pmd)) | 860 | if (pmdp_clear_flush_young_notify(vma, address, pmd)) |
836 | referenced++; | 861 | referenced++; |
837 | spin_unlock(ptl); | 862 | spin_unlock(ptl); |
863 | goto found; | ||
864 | unlock_pmd: | ||
865 | spin_unlock(ptl); | ||
866 | return ret; | ||
838 | } else { | 867 | } else { |
839 | pte_t *pte; | 868 | pmd_t pmde = *pmd; |
840 | 869 | ||
841 | /* | 870 | barrier(); |
842 | * rmap might return false positives; we must filter | 871 | if (!pmd_present(pmde) || pmd_trans_huge(pmde)) |
843 | * these out using page_check_address(). | ||
844 | */ | ||
845 | pte = page_check_address(page, mm, address, &ptl, 0); | ||
846 | if (!pte) | ||
847 | return SWAP_AGAIN; | 872 | return SWAP_AGAIN; |
873 | } | ||
874 | map_pte: | ||
875 | pte = pte_offset_map(pmd, address); | ||
876 | if (!pte_present(*pte)) { | ||
877 | pte_unmap(pte); | ||
878 | return SWAP_AGAIN; | ||
879 | } | ||
848 | 880 | ||
849 | if (vma->vm_flags & VM_LOCKED) { | 881 | ptl = pte_lockptr(mm, pmd); |
850 | pte_unmap_unlock(pte, ptl); | 882 | check_pte: |
851 | pra->vm_flags |= VM_LOCKED; | 883 | spin_lock(ptl); |
852 | return SWAP_FAIL; /* To break the loop */ | ||
853 | } | ||
854 | 884 | ||
855 | if (ptep_clear_flush_young_notify(vma, address, pte)) { | 885 | if (!pte_present(*pte)) { |
856 | /* | 886 | pte_unmap_unlock(pte, ptl); |
857 | * Don't treat a reference through a sequentially read | 887 | return SWAP_AGAIN; |
858 | * mapping as such. If the page has been used in | 888 | } |
859 | * another mapping, we will catch it; if this other | 889 | |
860 | * mapping is already gone, the unmap path will have | 890 | /* THP can be referenced by any subpage */ |
861 | * set PG_referenced or activated the page. | 891 | if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) { |
862 | */ | 892 | pte_unmap_unlock(pte, ptl); |
863 | if (likely(!(vma->vm_flags & VM_SEQ_READ))) | 893 | return SWAP_AGAIN; |
864 | referenced++; | 894 | } |
865 | } | 895 | |
896 | if (vma->vm_flags & VM_LOCKED) { | ||
866 | pte_unmap_unlock(pte, ptl); | 897 | pte_unmap_unlock(pte, ptl); |
898 | pra->vm_flags |= VM_LOCKED; | ||
899 | return SWAP_FAIL; /* To break the loop */ | ||
867 | } | 900 | } |
868 | 901 | ||
902 | if (ptep_clear_flush_young_notify(vma, address, pte)) { | ||
903 | /* | ||
904 | * Don't treat a reference through a sequentially read | ||
905 | * mapping as such. If the page has been used in | ||
906 | * another mapping, we will catch it; if this other | ||
907 | * mapping is already gone, the unmap path will have | ||
908 | * set PG_referenced or activated the page. | ||
909 | */ | ||
910 | if (likely(!(vma->vm_flags & VM_SEQ_READ))) | ||
911 | referenced++; | ||
912 | } | ||
913 | pte_unmap_unlock(pte, ptl); | ||
914 | |||
915 | found: | ||
869 | if (referenced) | 916 | if (referenced) |
870 | clear_page_idle(page); | 917 | clear_page_idle(page); |
871 | if (test_and_clear_page_young(page)) | 918 | if (test_and_clear_page_young(page)) |
@@ -912,7 +959,7 @@ int page_referenced(struct page *page, | |||
912 | int ret; | 959 | int ret; |
913 | int we_locked = 0; | 960 | int we_locked = 0; |
914 | struct page_referenced_arg pra = { | 961 | struct page_referenced_arg pra = { |
915 | .mapcount = page_mapcount(page), | 962 | .mapcount = total_mapcount(page), |
916 | .memcg = memcg, | 963 | .memcg = memcg, |
917 | }; | 964 | }; |
918 | struct rmap_walk_control rwc = { | 965 | struct rmap_walk_control rwc = { |
@@ -407,6 +407,20 @@ struct address_space *page_mapping(struct page *page) | |||
407 | return mapping; | 407 | return mapping; |
408 | } | 408 | } |
409 | 409 | ||
410 | /* Slow path of page_mapcount() for compound pages */ | ||
411 | int __page_mapcount(struct page *page) | ||
412 | { | ||
413 | int ret; | ||
414 | |||
415 | ret = atomic_read(&page->_mapcount) + 1; | ||
416 | page = compound_head(page); | ||
417 | ret += atomic_read(compound_mapcount_ptr(page)) + 1; | ||
418 | if (PageDoubleMap(page)) | ||
419 | ret--; | ||
420 | return ret; | ||
421 | } | ||
422 | EXPORT_SYMBOL_GPL(__page_mapcount); | ||
423 | |||
410 | int overcommit_ratio_handler(struct ctl_table *table, int write, | 424 | int overcommit_ratio_handler(struct ctl_table *table, int write, |
411 | void __user *buffer, size_t *lenp, | 425 | void __user *buffer, size_t *lenp, |
412 | loff_t *ppos) | 426 | loff_t *ppos) |