diff options
author | Dan Williams <dan.j.williams@intel.com> | 2016-01-15 19:56:52 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-15 20:56:32 -0500 |
commit | 5c7fb56e5e3f7035dd798a8e1adee639f87043e5 (patch) | |
tree | e3419de32c4b42c918267a50120549cbec2ed63f /mm/huge_memory.c | |
parent | 5c2c2587b13235bf8b5c9027589f22eff68bdf49 (diff) |
mm, dax: dax-pmd vs thp-pmd vs hugetlbfs-pmd
A dax-huge-page mapping while it uses some thp helpers is ultimately not
a transparent huge page. The distinction is especially important in the
get_user_pages() path. pmd_devmap() is used to distinguish dax-pmds
from pmd_huge() and pmd_trans_huge() which have slightly different
semantics.
Explicitly mark the pmd_trans_huge() helpers that dax needs by adding
pmd_devmap() checks.
[kirill.shutemov@linux.intel.com: fix regression in handling mlocked pages in __split_huge_pmd()]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 38 |
1 files changed, 21 insertions, 17 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d93706013a55..82bed2bec3ed 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -995,7 +995,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
995 | 995 | ||
996 | ret = -EAGAIN; | 996 | ret = -EAGAIN; |
997 | pmd = *src_pmd; | 997 | pmd = *src_pmd; |
998 | if (unlikely(!pmd_trans_huge(pmd))) { | 998 | if (unlikely(!pmd_trans_huge(pmd) && !pmd_devmap(pmd))) { |
999 | pte_free(dst_mm, pgtable); | 999 | pte_free(dst_mm, pgtable); |
1000 | goto out_unlock; | 1000 | goto out_unlock; |
1001 | } | 1001 | } |
@@ -1018,17 +1018,20 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
1018 | goto out_unlock; | 1018 | goto out_unlock; |
1019 | } | 1019 | } |
1020 | 1020 | ||
1021 | src_page = pmd_page(pmd); | 1021 | if (pmd_trans_huge(pmd)) { |
1022 | VM_BUG_ON_PAGE(!PageHead(src_page), src_page); | 1022 | /* thp accounting separate from pmd_devmap accounting */ |
1023 | get_page(src_page); | 1023 | src_page = pmd_page(pmd); |
1024 | page_dup_rmap(src_page, true); | 1024 | VM_BUG_ON_PAGE(!PageHead(src_page), src_page); |
1025 | add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); | 1025 | get_page(src_page); |
1026 | page_dup_rmap(src_page, true); | ||
1027 | add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); | ||
1028 | atomic_long_inc(&dst_mm->nr_ptes); | ||
1029 | pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); | ||
1030 | } | ||
1026 | 1031 | ||
1027 | pmdp_set_wrprotect(src_mm, addr, src_pmd); | 1032 | pmdp_set_wrprotect(src_mm, addr, src_pmd); |
1028 | pmd = pmd_mkold(pmd_wrprotect(pmd)); | 1033 | pmd = pmd_mkold(pmd_wrprotect(pmd)); |
1029 | pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); | ||
1030 | set_pmd_at(dst_mm, addr, dst_pmd, pmd); | 1034 | set_pmd_at(dst_mm, addr, dst_pmd, pmd); |
1031 | atomic_long_inc(&dst_mm->nr_ptes); | ||
1032 | 1035 | ||
1033 | ret = 0; | 1036 | ret = 0; |
1034 | out_unlock: | 1037 | out_unlock: |
@@ -1716,7 +1719,7 @@ bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, | |||
1716 | spinlock_t **ptl) | 1719 | spinlock_t **ptl) |
1717 | { | 1720 | { |
1718 | *ptl = pmd_lock(vma->vm_mm, pmd); | 1721 | *ptl = pmd_lock(vma->vm_mm, pmd); |
1719 | if (likely(pmd_trans_huge(*pmd))) | 1722 | if (likely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd))) |
1720 | return true; | 1723 | return true; |
1721 | spin_unlock(*ptl); | 1724 | spin_unlock(*ptl); |
1722 | return false; | 1725 | return false; |
@@ -2788,7 +2791,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, | |||
2788 | VM_BUG_ON(haddr & ~HPAGE_PMD_MASK); | 2791 | VM_BUG_ON(haddr & ~HPAGE_PMD_MASK); |
2789 | VM_BUG_ON_VMA(vma->vm_start > haddr, vma); | 2792 | VM_BUG_ON_VMA(vma->vm_start > haddr, vma); |
2790 | VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma); | 2793 | VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma); |
2791 | VM_BUG_ON(!pmd_trans_huge(*pmd)); | 2794 | VM_BUG_ON(!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)); |
2792 | 2795 | ||
2793 | count_vm_event(THP_SPLIT_PMD); | 2796 | count_vm_event(THP_SPLIT_PMD); |
2794 | 2797 | ||
@@ -2901,14 +2904,15 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
2901 | 2904 | ||
2902 | mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE); | 2905 | mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE); |
2903 | ptl = pmd_lock(mm, pmd); | 2906 | ptl = pmd_lock(mm, pmd); |
2904 | if (unlikely(!pmd_trans_huge(*pmd))) | 2907 | if (pmd_trans_huge(*pmd)) { |
2908 | page = pmd_page(*pmd); | ||
2909 | if (PageMlocked(page)) | ||
2910 | get_page(page); | ||
2911 | else | ||
2912 | page = NULL; | ||
2913 | } else if (!pmd_devmap(*pmd)) | ||
2905 | goto out; | 2914 | goto out; |
2906 | page = pmd_page(*pmd); | ||
2907 | __split_huge_pmd_locked(vma, pmd, haddr, false); | 2915 | __split_huge_pmd_locked(vma, pmd, haddr, false); |
2908 | if (PageMlocked(page)) | ||
2909 | get_page(page); | ||
2910 | else | ||
2911 | page = NULL; | ||
2912 | out: | 2916 | out: |
2913 | spin_unlock(ptl); | 2917 | spin_unlock(ptl); |
2914 | mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE); | 2918 | mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE); |
@@ -2938,7 +2942,7 @@ static void split_huge_pmd_address(struct vm_area_struct *vma, | |||
2938 | return; | 2942 | return; |
2939 | 2943 | ||
2940 | pmd = pmd_offset(pud, address); | 2944 | pmd = pmd_offset(pud, address); |
2941 | if (!pmd_present(*pmd) || !pmd_trans_huge(*pmd)) | 2945 | if (!pmd_present(*pmd) || (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd))) |
2942 | return; | 2946 | return; |
2943 | /* | 2947 | /* |
2944 | * Caller holds the mmap_sem write mode, so a huge pmd cannot | 2948 | * Caller holds the mmap_sem write mode, so a huge pmd cannot |