aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2016-01-15 19:54:37 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-15 20:56:32 -0500
commitb20ce5e03b936be077463015661dcf52be274e5b (patch)
tree1004a054c0263a471ef79a7cd84fea904a71b655
parente90309c9f7722db4ff5bce3b9e6e04d1460f2553 (diff)
mm: prepare page_referenced() and page_idle to new THP refcounting
Both page_referenced() and page_idle_clear_pte_refs_one() assume that THP can only be mapped with PMD, so there's no reason to look on PTEs for PageTransHuge() pages. That's no true anymore: THP can be mapped with PTEs too. The patch removes PageTransHuge() test from the functions and opencode page table check. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Vladimir Davydov <vdavydov@parallels.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Hugh Dickins <hughd@google.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/huge_mm.h5
-rw-r--r--include/linux/mm.h23
-rw-r--r--mm/huge_memory.c73
-rw-r--r--mm/page_idle.c65
-rw-r--r--mm/rmap.c117
-rw-r--r--mm/util.c14
6 files changed, 185 insertions, 112 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 7aec5ee9cfdf..72cd942edb22 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -48,11 +48,6 @@ enum transparent_hugepage_flag {
48#endif 48#endif
49}; 49};
50 50
51extern pmd_t *page_check_address_pmd(struct page *page,
52 struct mm_struct *mm,
53 unsigned long address,
54 spinlock_t **ptl);
55
56#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) 51#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
57#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) 52#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
58 53
diff --git a/include/linux/mm.h b/include/linux/mm.h
index aa8ae8330a75..0ef5f21735af 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -433,20 +433,25 @@ static inline void page_mapcount_reset(struct page *page)
433 atomic_set(&(page)->_mapcount, -1); 433 atomic_set(&(page)->_mapcount, -1);
434} 434}
435 435
436int __page_mapcount(struct page *page);
437
436static inline int page_mapcount(struct page *page) 438static inline int page_mapcount(struct page *page)
437{ 439{
438 int ret;
439 VM_BUG_ON_PAGE(PageSlab(page), page); 440 VM_BUG_ON_PAGE(PageSlab(page), page);
440 441
441 ret = atomic_read(&page->_mapcount) + 1; 442 if (unlikely(PageCompound(page)))
442 if (PageCompound(page)) { 443 return __page_mapcount(page);
443 page = compound_head(page); 444 return atomic_read(&page->_mapcount) + 1;
444 ret += atomic_read(compound_mapcount_ptr(page)) + 1; 445}
445 if (PageDoubleMap(page)) 446
446 ret--; 447#ifdef CONFIG_TRANSPARENT_HUGEPAGE
447 } 448int total_mapcount(struct page *page);
448 return ret; 449#else
450static inline int total_mapcount(struct page *page)
451{
452 return page_mapcount(page);
449} 453}
454#endif
450 455
451static inline int page_count(struct page *page) 456static inline int page_count(struct page *page)
452{ 457{
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f283cb7c480e..ab544b145b52 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1649,46 +1649,6 @@ bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
1649 return false; 1649 return false;
1650} 1650}
1651 1651
1652/*
1653 * This function returns whether a given @page is mapped onto the @address
1654 * in the virtual space of @mm.
1655 *
1656 * When it's true, this function returns *pmd with holding the page table lock
1657 * and passing it back to the caller via @ptl.
1658 * If it's false, returns NULL without holding the page table lock.
1659 */
1660pmd_t *page_check_address_pmd(struct page *page,
1661 struct mm_struct *mm,
1662 unsigned long address,
1663 spinlock_t **ptl)
1664{
1665 pgd_t *pgd;
1666 pud_t *pud;
1667 pmd_t *pmd;
1668
1669 if (address & ~HPAGE_PMD_MASK)
1670 return NULL;
1671
1672 pgd = pgd_offset(mm, address);
1673 if (!pgd_present(*pgd))
1674 return NULL;
1675 pud = pud_offset(pgd, address);
1676 if (!pud_present(*pud))
1677 return NULL;
1678 pmd = pmd_offset(pud, address);
1679
1680 *ptl = pmd_lock(mm, pmd);
1681 if (!pmd_present(*pmd))
1682 goto unlock;
1683 if (pmd_page(*pmd) != page)
1684 goto unlock;
1685 if (pmd_trans_huge(*pmd))
1686 return pmd;
1687unlock:
1688 spin_unlock(*ptl);
1689 return NULL;
1690}
1691
1692#define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE) 1652#define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)
1693 1653
1694int hugepage_madvise(struct vm_area_struct *vma, 1654int hugepage_madvise(struct vm_area_struct *vma,
@@ -3097,20 +3057,6 @@ static void unfreeze_page(struct anon_vma *anon_vma, struct page *page)
3097 } 3057 }
3098} 3058}
3099 3059
3100static int total_mapcount(struct page *page)
3101{
3102 int i, ret;
3103
3104 ret = compound_mapcount(page);
3105 for (i = 0; i < HPAGE_PMD_NR; i++)
3106 ret += atomic_read(&page[i]._mapcount) + 1;
3107
3108 if (PageDoubleMap(page))
3109 ret -= HPAGE_PMD_NR;
3110
3111 return ret;
3112}
3113
3114static int __split_huge_page_tail(struct page *head, int tail, 3060static int __split_huge_page_tail(struct page *head, int tail,
3115 struct lruvec *lruvec, struct list_head *list) 3061 struct lruvec *lruvec, struct list_head *list)
3116{ 3062{
@@ -3211,6 +3157,25 @@ static void __split_huge_page(struct page *page, struct list_head *list)
3211 } 3157 }
3212} 3158}
3213 3159
3160int total_mapcount(struct page *page)
3161{
3162 int i, ret;
3163
3164 VM_BUG_ON_PAGE(PageTail(page), page);
3165
3166 if (likely(!PageCompound(page)))
3167 return atomic_read(&page->_mapcount) + 1;
3168
3169 ret = compound_mapcount(page);
3170 if (PageHuge(page))
3171 return ret;
3172 for (i = 0; i < HPAGE_PMD_NR; i++)
3173 ret += atomic_read(&page[i]._mapcount) + 1;
3174 if (PageDoubleMap(page))
3175 ret -= HPAGE_PMD_NR;
3176 return ret;
3177}
3178
3214/* 3179/*
3215 * This function splits huge page into normal pages. @page can point to any 3180 * This function splits huge page into normal pages. @page can point to any
3216 * subpage of huge page to split. Split doesn't change the position of @page. 3181 * subpage of huge page to split. Split doesn't change the position of @page.
diff --git a/mm/page_idle.c b/mm/page_idle.c
index 1c245d9027e3..2c553ba969f8 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -56,23 +56,70 @@ static int page_idle_clear_pte_refs_one(struct page *page,
56{ 56{
57 struct mm_struct *mm = vma->vm_mm; 57 struct mm_struct *mm = vma->vm_mm;
58 spinlock_t *ptl; 58 spinlock_t *ptl;
59 pgd_t *pgd;
60 pud_t *pud;
59 pmd_t *pmd; 61 pmd_t *pmd;
60 pte_t *pte; 62 pte_t *pte;
61 bool referenced = false; 63 bool referenced = false;
62 64
63 if (unlikely(PageTransHuge(page))) { 65 pgd = pgd_offset(mm, addr);
64 pmd = page_check_address_pmd(page, mm, addr, &ptl); 66 if (!pgd_present(*pgd))
65 if (pmd) { 67 return SWAP_AGAIN;
66 referenced = pmdp_clear_young_notify(vma, addr, pmd); 68 pud = pud_offset(pgd, addr);
69 if (!pud_present(*pud))
70 return SWAP_AGAIN;
71 pmd = pmd_offset(pud, addr);
72
73 if (pmd_trans_huge(*pmd)) {
74 ptl = pmd_lock(mm, pmd);
75 if (!pmd_present(*pmd))
76 goto unlock_pmd;
77 if (unlikely(!pmd_trans_huge(*pmd))) {
67 spin_unlock(ptl); 78 spin_unlock(ptl);
79 goto map_pte;
68 } 80 }
81
82 if (pmd_page(*pmd) != page)
83 goto unlock_pmd;
84
85 referenced = pmdp_clear_young_notify(vma, addr, pmd);
86 spin_unlock(ptl);
87 goto found;
88unlock_pmd:
89 spin_unlock(ptl);
90 return SWAP_AGAIN;
69 } else { 91 } else {
70 pte = page_check_address(page, mm, addr, &ptl, 0); 92 pmd_t pmde = *pmd;
71 if (pte) { 93
72 referenced = ptep_clear_young_notify(vma, addr, pte); 94 barrier();
73 pte_unmap_unlock(pte, ptl); 95 if (!pmd_present(pmde) || pmd_trans_huge(pmde))
74 } 96 return SWAP_AGAIN;
97
98 }
99map_pte:
100 pte = pte_offset_map(pmd, addr);
101 if (!pte_present(*pte)) {
102 pte_unmap(pte);
103 return SWAP_AGAIN;
75 } 104 }
105
106 ptl = pte_lockptr(mm, pmd);
107 spin_lock(ptl);
108
109 if (!pte_present(*pte)) {
110 pte_unmap_unlock(pte, ptl);
111 return SWAP_AGAIN;
112 }
113
114 /* THP can be referenced by any subpage */
115 if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
116 pte_unmap_unlock(pte, ptl);
117 return SWAP_AGAIN;
118 }
119
120 referenced = ptep_clear_young_notify(vma, addr, pte);
121 pte_unmap_unlock(pte, ptl);
122found:
76 if (referenced) { 123 if (referenced) {
77 clear_page_idle(page); 124 clear_page_idle(page);
78 /* 125 /*
diff --git a/mm/rmap.c b/mm/rmap.c
index 31d8866fb562..6127c00b2262 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -814,58 +814,105 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
814 spinlock_t *ptl; 814 spinlock_t *ptl;
815 int referenced = 0; 815 int referenced = 0;
816 struct page_referenced_arg *pra = arg; 816 struct page_referenced_arg *pra = arg;
817 pgd_t *pgd;
818 pud_t *pud;
819 pmd_t *pmd;
820 pte_t *pte;
817 821
818 if (unlikely(PageTransHuge(page))) { 822 if (unlikely(PageHuge(page))) {
819 pmd_t *pmd; 823 /* when pud is not present, pte will be NULL */
820 824 pte = huge_pte_offset(mm, address);
821 /* 825 if (!pte)
822 * rmap might return false positives; we must filter
823 * these out using page_check_address_pmd().
824 */
825 pmd = page_check_address_pmd(page, mm, address, &ptl);
826 if (!pmd)
827 return SWAP_AGAIN; 826 return SWAP_AGAIN;
828 827
829 if (vma->vm_flags & VM_LOCKED) { 828 ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
829 goto check_pte;
830 }
831
832 pgd = pgd_offset(mm, address);
833 if (!pgd_present(*pgd))
834 return SWAP_AGAIN;
835 pud = pud_offset(pgd, address);
836 if (!pud_present(*pud))
837 return SWAP_AGAIN;
838 pmd = pmd_offset(pud, address);
839
840 if (pmd_trans_huge(*pmd)) {
841 int ret = SWAP_AGAIN;
842
843 ptl = pmd_lock(mm, pmd);
844 if (!pmd_present(*pmd))
845 goto unlock_pmd;
846 if (unlikely(!pmd_trans_huge(*pmd))) {
830 spin_unlock(ptl); 847 spin_unlock(ptl);
848 goto map_pte;
849 }
850
851 if (pmd_page(*pmd) != page)
852 goto unlock_pmd;
853
854 if (vma->vm_flags & VM_LOCKED) {
831 pra->vm_flags |= VM_LOCKED; 855 pra->vm_flags |= VM_LOCKED;
832 return SWAP_FAIL; /* To break the loop */ 856 ret = SWAP_FAIL; /* To break the loop */
857 goto unlock_pmd;
833 } 858 }
834 859
835 if (pmdp_clear_flush_young_notify(vma, address, pmd)) 860 if (pmdp_clear_flush_young_notify(vma, address, pmd))
836 referenced++; 861 referenced++;
837 spin_unlock(ptl); 862 spin_unlock(ptl);
863 goto found;
864unlock_pmd:
865 spin_unlock(ptl);
866 return ret;
838 } else { 867 } else {
839 pte_t *pte; 868 pmd_t pmde = *pmd;
840 869
841 /* 870 barrier();
842 * rmap might return false positives; we must filter 871 if (!pmd_present(pmde) || pmd_trans_huge(pmde))
843 * these out using page_check_address().
844 */
845 pte = page_check_address(page, mm, address, &ptl, 0);
846 if (!pte)
847 return SWAP_AGAIN; 872 return SWAP_AGAIN;
873 }
874map_pte:
875 pte = pte_offset_map(pmd, address);
876 if (!pte_present(*pte)) {
877 pte_unmap(pte);
878 return SWAP_AGAIN;
879 }
848 880
849 if (vma->vm_flags & VM_LOCKED) { 881 ptl = pte_lockptr(mm, pmd);
850 pte_unmap_unlock(pte, ptl); 882check_pte:
851 pra->vm_flags |= VM_LOCKED; 883 spin_lock(ptl);
852 return SWAP_FAIL; /* To break the loop */
853 }
854 884
855 if (ptep_clear_flush_young_notify(vma, address, pte)) { 885 if (!pte_present(*pte)) {
856 /* 886 pte_unmap_unlock(pte, ptl);
857 * Don't treat a reference through a sequentially read 887 return SWAP_AGAIN;
858 * mapping as such. If the page has been used in 888 }
859 * another mapping, we will catch it; if this other 889
860 * mapping is already gone, the unmap path will have 890 /* THP can be referenced by any subpage */
861 * set PG_referenced or activated the page. 891 if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
862 */ 892 pte_unmap_unlock(pte, ptl);
863 if (likely(!(vma->vm_flags & VM_SEQ_READ))) 893 return SWAP_AGAIN;
864 referenced++; 894 }
865 } 895
896 if (vma->vm_flags & VM_LOCKED) {
866 pte_unmap_unlock(pte, ptl); 897 pte_unmap_unlock(pte, ptl);
898 pra->vm_flags |= VM_LOCKED;
899 return SWAP_FAIL; /* To break the loop */
867 } 900 }
868 901
902 if (ptep_clear_flush_young_notify(vma, address, pte)) {
903 /*
904 * Don't treat a reference through a sequentially read
905 * mapping as such. If the page has been used in
906 * another mapping, we will catch it; if this other
907 * mapping is already gone, the unmap path will have
908 * set PG_referenced or activated the page.
909 */
910 if (likely(!(vma->vm_flags & VM_SEQ_READ)))
911 referenced++;
912 }
913 pte_unmap_unlock(pte, ptl);
914
915found:
869 if (referenced) 916 if (referenced)
870 clear_page_idle(page); 917 clear_page_idle(page);
871 if (test_and_clear_page_young(page)) 918 if (test_and_clear_page_young(page))
@@ -912,7 +959,7 @@ int page_referenced(struct page *page,
912 int ret; 959 int ret;
913 int we_locked = 0; 960 int we_locked = 0;
914 struct page_referenced_arg pra = { 961 struct page_referenced_arg pra = {
915 .mapcount = page_mapcount(page), 962 .mapcount = total_mapcount(page),
916 .memcg = memcg, 963 .memcg = memcg,
917 }; 964 };
918 struct rmap_walk_control rwc = { 965 struct rmap_walk_control rwc = {
diff --git a/mm/util.c b/mm/util.c
index 8acb936a52c8..6d1f9200f74e 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -407,6 +407,20 @@ struct address_space *page_mapping(struct page *page)
407 return mapping; 407 return mapping;
408} 408}
409 409
410/* Slow path of page_mapcount() for compound pages */
411int __page_mapcount(struct page *page)
412{
413 int ret;
414
415 ret = atomic_read(&page->_mapcount) + 1;
416 page = compound_head(page);
417 ret += atomic_read(compound_mapcount_ptr(page)) + 1;
418 if (PageDoubleMap(page))
419 ret--;
420 return ret;
421}
422EXPORT_SYMBOL_GPL(__page_mapcount);
423
410int overcommit_ratio_handler(struct ctl_table *table, int write, 424int overcommit_ratio_handler(struct ctl_table *table, int write,
411 void __user *buffer, size_t *lenp, 425 void __user *buffer, size_t *lenp,
412 loff_t *ppos) 426 loff_t *ppos)