diff options
author | Dan Williams <dan.j.williams@intel.com> | 2016-01-15 19:56:52 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-15 20:56:32 -0500 |
commit | 5c7fb56e5e3f7035dd798a8e1adee639f87043e5 (patch) | |
tree | e3419de32c4b42c918267a50120549cbec2ed63f | |
parent | 5c2c2587b13235bf8b5c9027589f22eff68bdf49 (diff) |
mm, dax: dax-pmd vs thp-pmd vs hugetlbfs-pmd
A dax-huge-page mapping while it uses some thp helpers is ultimately not
a transparent huge page. The distinction is especially important in the
get_user_pages() path. pmd_devmap() is used to distinguish dax-pmds
from pmd_huge() and pmd_trans_huge() which have slightly different
semantics.
Explicitly mark the pmd_trans_huge() helpers that dax needs by adding
pmd_devmap() checks.
[kirill.shutemov@linux.intel.com: fix regression in handling mlocked pages in __split_huge_pmd()]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 9 | ||||
-rw-r--r-- | include/linux/huge_mm.h | 5 | ||||
-rw-r--r-- | include/linux/mm.h | 7 | ||||
-rw-r--r-- | mm/huge_memory.c | 38 | ||||
-rw-r--r-- | mm/memory.c | 8 | ||||
-rw-r--r-- | mm/mprotect.c | 5 | ||||
-rw-r--r-- | mm/pgtable-generic.c | 2 |
7 files changed, 47 insertions, 27 deletions
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 6585a8b10fea..6a0ad82c8d0f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -164,13 +164,20 @@ static inline int pmd_large(pmd_t pte) | |||
164 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 164 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
165 | static inline int pmd_trans_huge(pmd_t pmd) | 165 | static inline int pmd_trans_huge(pmd_t pmd) |
166 | { | 166 | { |
167 | return pmd_val(pmd) & _PAGE_PSE; | 167 | return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; |
168 | } | 168 | } |
169 | 169 | ||
170 | static inline int has_transparent_hugepage(void) | 170 | static inline int has_transparent_hugepage(void) |
171 | { | 171 | { |
172 | return cpu_has_pse; | 172 | return cpu_has_pse; |
173 | } | 173 | } |
174 | |||
175 | #ifdef __HAVE_ARCH_PTE_DEVMAP | ||
176 | static inline int pmd_devmap(pmd_t pmd) | ||
177 | { | ||
178 | return !!(pmd_val(pmd) & _PAGE_DEVMAP); | ||
179 | } | ||
180 | #endif | ||
174 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 181 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
175 | 182 | ||
176 | static inline pte_t pte_set_flags(pte_t pte, pteval_t set) | 183 | static inline pte_t pte_set_flags(pte_t pte, pteval_t set) |
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 8ca35a131904..d39fa60bd6bf 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
@@ -104,7 +104,8 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
104 | #define split_huge_pmd(__vma, __pmd, __address) \ | 104 | #define split_huge_pmd(__vma, __pmd, __address) \ |
105 | do { \ | 105 | do { \ |
106 | pmd_t *____pmd = (__pmd); \ | 106 | pmd_t *____pmd = (__pmd); \ |
107 | if (pmd_trans_huge(*____pmd)) \ | 107 | if (pmd_trans_huge(*____pmd) \ |
108 | || pmd_devmap(*____pmd)) \ | ||
108 | __split_huge_pmd(__vma, __pmd, __address); \ | 109 | __split_huge_pmd(__vma, __pmd, __address); \ |
109 | } while (0) | 110 | } while (0) |
110 | 111 | ||
@@ -124,7 +125,7 @@ static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, | |||
124 | spinlock_t **ptl) | 125 | spinlock_t **ptl) |
125 | { | 126 | { |
126 | VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma); | 127 | VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma); |
127 | if (pmd_trans_huge(*pmd)) | 128 | if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) |
128 | return __pmd_trans_huge_lock(pmd, vma, ptl); | 129 | return __pmd_trans_huge_lock(pmd, vma, ptl); |
129 | else | 130 | else |
130 | return false; | 131 | return false; |
diff --git a/include/linux/mm.h b/include/linux/mm.h index a9902152449f..cd123272d28d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -329,6 +329,13 @@ struct inode; | |||
329 | #define page_private(page) ((page)->private) | 329 | #define page_private(page) ((page)->private) |
330 | #define set_page_private(page, v) ((page)->private = (v)) | 330 | #define set_page_private(page, v) ((page)->private = (v)) |
331 | 331 | ||
332 | #if !defined(__HAVE_ARCH_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE) | ||
333 | static inline int pmd_devmap(pmd_t pmd) | ||
334 | { | ||
335 | return 0; | ||
336 | } | ||
337 | #endif | ||
338 | |||
332 | /* | 339 | /* |
333 | * FIXME: take this include out, include page-flags.h in | 340 | * FIXME: take this include out, include page-flags.h in |
334 | * files which need it (119 of them) | 341 | * files which need it (119 of them) |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d93706013a55..82bed2bec3ed 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -995,7 +995,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
995 | 995 | ||
996 | ret = -EAGAIN; | 996 | ret = -EAGAIN; |
997 | pmd = *src_pmd; | 997 | pmd = *src_pmd; |
998 | if (unlikely(!pmd_trans_huge(pmd))) { | 998 | if (unlikely(!pmd_trans_huge(pmd) && !pmd_devmap(pmd))) { |
999 | pte_free(dst_mm, pgtable); | 999 | pte_free(dst_mm, pgtable); |
1000 | goto out_unlock; | 1000 | goto out_unlock; |
1001 | } | 1001 | } |
@@ -1018,17 +1018,20 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
1018 | goto out_unlock; | 1018 | goto out_unlock; |
1019 | } | 1019 | } |
1020 | 1020 | ||
1021 | src_page = pmd_page(pmd); | 1021 | if (pmd_trans_huge(pmd)) { |
1022 | VM_BUG_ON_PAGE(!PageHead(src_page), src_page); | 1022 | /* thp accounting separate from pmd_devmap accounting */ |
1023 | get_page(src_page); | 1023 | src_page = pmd_page(pmd); |
1024 | page_dup_rmap(src_page, true); | 1024 | VM_BUG_ON_PAGE(!PageHead(src_page), src_page); |
1025 | add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); | 1025 | get_page(src_page); |
1026 | page_dup_rmap(src_page, true); | ||
1027 | add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); | ||
1028 | atomic_long_inc(&dst_mm->nr_ptes); | ||
1029 | pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); | ||
1030 | } | ||
1026 | 1031 | ||
1027 | pmdp_set_wrprotect(src_mm, addr, src_pmd); | 1032 | pmdp_set_wrprotect(src_mm, addr, src_pmd); |
1028 | pmd = pmd_mkold(pmd_wrprotect(pmd)); | 1033 | pmd = pmd_mkold(pmd_wrprotect(pmd)); |
1029 | pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); | ||
1030 | set_pmd_at(dst_mm, addr, dst_pmd, pmd); | 1034 | set_pmd_at(dst_mm, addr, dst_pmd, pmd); |
1031 | atomic_long_inc(&dst_mm->nr_ptes); | ||
1032 | 1035 | ||
1033 | ret = 0; | 1036 | ret = 0; |
1034 | out_unlock: | 1037 | out_unlock: |
@@ -1716,7 +1719,7 @@ bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, | |||
1716 | spinlock_t **ptl) | 1719 | spinlock_t **ptl) |
1717 | { | 1720 | { |
1718 | *ptl = pmd_lock(vma->vm_mm, pmd); | 1721 | *ptl = pmd_lock(vma->vm_mm, pmd); |
1719 | if (likely(pmd_trans_huge(*pmd))) | 1722 | if (likely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd))) |
1720 | return true; | 1723 | return true; |
1721 | spin_unlock(*ptl); | 1724 | spin_unlock(*ptl); |
1722 | return false; | 1725 | return false; |
@@ -2788,7 +2791,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, | |||
2788 | VM_BUG_ON(haddr & ~HPAGE_PMD_MASK); | 2791 | VM_BUG_ON(haddr & ~HPAGE_PMD_MASK); |
2789 | VM_BUG_ON_VMA(vma->vm_start > haddr, vma); | 2792 | VM_BUG_ON_VMA(vma->vm_start > haddr, vma); |
2790 | VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma); | 2793 | VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma); |
2791 | VM_BUG_ON(!pmd_trans_huge(*pmd)); | 2794 | VM_BUG_ON(!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)); |
2792 | 2795 | ||
2793 | count_vm_event(THP_SPLIT_PMD); | 2796 | count_vm_event(THP_SPLIT_PMD); |
2794 | 2797 | ||
@@ -2901,14 +2904,15 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
2901 | 2904 | ||
2902 | mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE); | 2905 | mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE); |
2903 | ptl = pmd_lock(mm, pmd); | 2906 | ptl = pmd_lock(mm, pmd); |
2904 | if (unlikely(!pmd_trans_huge(*pmd))) | 2907 | if (pmd_trans_huge(*pmd)) { |
2908 | page = pmd_page(*pmd); | ||
2909 | if (PageMlocked(page)) | ||
2910 | get_page(page); | ||
2911 | else | ||
2912 | page = NULL; | ||
2913 | } else if (!pmd_devmap(*pmd)) | ||
2905 | goto out; | 2914 | goto out; |
2906 | page = pmd_page(*pmd); | ||
2907 | __split_huge_pmd_locked(vma, pmd, haddr, false); | 2915 | __split_huge_pmd_locked(vma, pmd, haddr, false); |
2908 | if (PageMlocked(page)) | ||
2909 | get_page(page); | ||
2910 | else | ||
2911 | page = NULL; | ||
2912 | out: | 2916 | out: |
2913 | spin_unlock(ptl); | 2917 | spin_unlock(ptl); |
2914 | mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE); | 2918 | mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE); |
@@ -2938,7 +2942,7 @@ static void split_huge_pmd_address(struct vm_area_struct *vma, | |||
2938 | return; | 2942 | return; |
2939 | 2943 | ||
2940 | pmd = pmd_offset(pud, address); | 2944 | pmd = pmd_offset(pud, address); |
2941 | if (!pmd_present(*pmd) || !pmd_trans_huge(*pmd)) | 2945 | if (!pmd_present(*pmd) || (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd))) |
2942 | return; | 2946 | return; |
2943 | /* | 2947 | /* |
2944 | * Caller holds the mmap_sem write mode, so a huge pmd cannot | 2948 | * Caller holds the mmap_sem write mode, so a huge pmd cannot |
diff --git a/mm/memory.c b/mm/memory.c index 552ae3d69435..ff17850a52d9 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -950,7 +950,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src | |||
950 | src_pmd = pmd_offset(src_pud, addr); | 950 | src_pmd = pmd_offset(src_pud, addr); |
951 | do { | 951 | do { |
952 | next = pmd_addr_end(addr, end); | 952 | next = pmd_addr_end(addr, end); |
953 | if (pmd_trans_huge(*src_pmd)) { | 953 | if (pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) { |
954 | int err; | 954 | int err; |
955 | VM_BUG_ON(next-addr != HPAGE_PMD_SIZE); | 955 | VM_BUG_ON(next-addr != HPAGE_PMD_SIZE); |
956 | err = copy_huge_pmd(dst_mm, src_mm, | 956 | err = copy_huge_pmd(dst_mm, src_mm, |
@@ -1177,7 +1177,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, | |||
1177 | pmd = pmd_offset(pud, addr); | 1177 | pmd = pmd_offset(pud, addr); |
1178 | do { | 1178 | do { |
1179 | next = pmd_addr_end(addr, end); | 1179 | next = pmd_addr_end(addr, end); |
1180 | if (pmd_trans_huge(*pmd)) { | 1180 | if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { |
1181 | if (next - addr != HPAGE_PMD_SIZE) { | 1181 | if (next - addr != HPAGE_PMD_SIZE) { |
1182 | #ifdef CONFIG_DEBUG_VM | 1182 | #ifdef CONFIG_DEBUG_VM |
1183 | if (!rwsem_is_locked(&tlb->mm->mmap_sem)) { | 1183 | if (!rwsem_is_locked(&tlb->mm->mmap_sem)) { |
@@ -3375,7 +3375,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3375 | int ret; | 3375 | int ret; |
3376 | 3376 | ||
3377 | barrier(); | 3377 | barrier(); |
3378 | if (pmd_trans_huge(orig_pmd)) { | 3378 | if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) { |
3379 | unsigned int dirty = flags & FAULT_FLAG_WRITE; | 3379 | unsigned int dirty = flags & FAULT_FLAG_WRITE; |
3380 | 3380 | ||
3381 | if (pmd_protnone(orig_pmd)) | 3381 | if (pmd_protnone(orig_pmd)) |
@@ -3404,7 +3404,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3404 | unlikely(__pte_alloc(mm, vma, pmd, address))) | 3404 | unlikely(__pte_alloc(mm, vma, pmd, address))) |
3405 | return VM_FAULT_OOM; | 3405 | return VM_FAULT_OOM; |
3406 | /* if an huge pmd materialized from under us just retry later */ | 3406 | /* if an huge pmd materialized from under us just retry later */ |
3407 | if (unlikely(pmd_trans_huge(*pmd))) | 3407 | if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd))) |
3408 | return 0; | 3408 | return 0; |
3409 | /* | 3409 | /* |
3410 | * A regular pmd is established and it can't morph into a huge pmd | 3410 | * A regular pmd is established and it can't morph into a huge pmd |
diff --git a/mm/mprotect.c b/mm/mprotect.c index 6047707085c1..8eb7bb40dc40 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -149,7 +149,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, | |||
149 | unsigned long this_pages; | 149 | unsigned long this_pages; |
150 | 150 | ||
151 | next = pmd_addr_end(addr, end); | 151 | next = pmd_addr_end(addr, end); |
152 | if (!pmd_trans_huge(*pmd) && pmd_none_or_clear_bad(pmd)) | 152 | if (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) |
153 | && pmd_none_or_clear_bad(pmd)) | ||
153 | continue; | 154 | continue; |
154 | 155 | ||
155 | /* invoke the mmu notifier if the pmd is populated */ | 156 | /* invoke the mmu notifier if the pmd is populated */ |
@@ -158,7 +159,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, | |||
158 | mmu_notifier_invalidate_range_start(mm, mni_start, end); | 159 | mmu_notifier_invalidate_range_start(mm, mni_start, end); |
159 | } | 160 | } |
160 | 161 | ||
161 | if (pmd_trans_huge(*pmd)) { | 162 | if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) { |
162 | if (next - addr != HPAGE_PMD_SIZE) | 163 | if (next - addr != HPAGE_PMD_SIZE) |
163 | split_huge_pmd(vma, pmd, addr); | 164 | split_huge_pmd(vma, pmd, addr); |
164 | else { | 165 | else { |
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index c311a2ec6fea..9d4767698a1c 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c | |||
@@ -132,7 +132,7 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, | |||
132 | { | 132 | { |
133 | pmd_t pmd; | 133 | pmd_t pmd; |
134 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | 134 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); |
135 | VM_BUG_ON(!pmd_trans_huge(*pmdp)); | 135 | VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); |
136 | pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); | 136 | pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); |
137 | flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | 137 | flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); |
138 | return pmd; | 138 | return pmd; |