diff options
author | Ross Zwisler <ross.zwisler@linux.intel.com> | 2017-06-02 17:46:34 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-06-02 18:07:37 -0400 |
commit | d0f0931de936a0a468d7e59284d39581c16d3a73 (patch) | |
tree | 085c4285db534664f2624a0442127657ad52a199 /mm/memory.c | |
parent | c288983dddf714216428774e022ad78f48dd8cb1 (diff) |
mm: avoid spurious 'bad pmd' warning messages
When the pmd_devmap() checks were added by 5c7fb56e5e3f ("mm, dax:
dax-pmd vs thp-pmd vs hugetlbfs-pmd") to add better support for DAX huge
pages, they were all added to the end of if() statements after existing
pmd_trans_huge() checks. So, things like:
- if (pmd_trans_huge(*pmd))
+ if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
When further checks were added after pmd_trans_unstable() checks by
commit 7267ec008b5c ("mm: postpone page table allocation until we have
page to map") they were also added at the end of the conditional:
+ if (pmd_trans_unstable(fe->pmd) || pmd_devmap(*fe->pmd))
This ordering is fine for pmd_trans_huge(), but doesn't work for
pmd_trans_unstable(). This is because DAX huge pages trip the bad_pmd()
check inside of pmd_none_or_trans_huge_or_clear_bad() (called by
pmd_trans_unstable()), which prints out a warning and returns 1. So, we
do end up doing the right thing, but only after spamming dmesg with
suspicious looking messages:
mm/pgtable-generic.c:39: bad pmd ffff8808daa49b88(84000001006000a5)
Reorder these checks in a helper so that pmd_devmap() is checked first,
avoiding the error messages, and add a comment explaining why the
ordering is important.
Fixes: commit 7267ec008b5c ("mm: postpone page table allocation until we have page to map")
Link: http://lkml.kernel.org/r/20170522215749.23516-1-ross.zwisler@linux.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Pawel Lebioda <pawel.lebioda@intel.com>
Cc: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Xiong Zhou <xzhou@redhat.com>
Cc: Eryu Guan <eguan@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 40 |
1 files changed, 30 insertions, 10 deletions
diff --git a/mm/memory.c b/mm/memory.c index 6ff5d729ded0..2e65df1831d9 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3029,6 +3029,17 @@ static int __do_fault(struct vm_fault *vmf) | |||
3029 | return ret; | 3029 | return ret; |
3030 | } | 3030 | } |
3031 | 3031 | ||
3032 | /* | ||
3033 | * The ordering of these checks is important for pmds with _PAGE_DEVMAP set. | ||
3034 | * If we check pmd_trans_unstable() first we will trip the bad_pmd() check | ||
3035 | * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly | ||
3036 | * returning 1 but not before it spams dmesg with the pmd_clear_bad() output. | ||
3037 | */ | ||
3038 | static int pmd_devmap_trans_unstable(pmd_t *pmd) | ||
3039 | { | ||
3040 | return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); | ||
3041 | } | ||
3042 | |||
3032 | static int pte_alloc_one_map(struct vm_fault *vmf) | 3043 | static int pte_alloc_one_map(struct vm_fault *vmf) |
3033 | { | 3044 | { |
3034 | struct vm_area_struct *vma = vmf->vma; | 3045 | struct vm_area_struct *vma = vmf->vma; |
@@ -3052,18 +3063,27 @@ static int pte_alloc_one_map(struct vm_fault *vmf) | |||
3052 | map_pte: | 3063 | map_pte: |
3053 | /* | 3064 | /* |
3054 | * If a huge pmd materialized under us just retry later. Use | 3065 | * If a huge pmd materialized under us just retry later. Use |
3055 | * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd | 3066 | * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of |
3056 | * didn't become pmd_trans_huge under us and then back to pmd_none, as | 3067 | * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge |
3057 | * a result of MADV_DONTNEED running immediately after a huge pmd fault | 3068 | * under us and then back to pmd_none, as a result of MADV_DONTNEED |
3058 | * in a different thread of this mm, in turn leading to a misleading | 3069 | * running immediately after a huge pmd fault in a different thread of |
3059 | * pmd_trans_huge() retval. All we have to ensure is that it is a | 3070 | * this mm, in turn leading to a misleading pmd_trans_huge() retval. |
3060 | * regular pmd that we can walk with pte_offset_map() and we can do that | 3071 | * All we have to ensure is that it is a regular pmd that we can walk |
3061 | * through an atomic read in C, which is what pmd_trans_unstable() | 3072 | * with pte_offset_map() and we can do that through an atomic read in |
3062 | * provides. | 3073 | * C, which is what pmd_trans_unstable() provides. |
3063 | */ | 3074 | */ |
3064 | if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) | 3075 | if (pmd_devmap_trans_unstable(vmf->pmd)) |
3065 | return VM_FAULT_NOPAGE; | 3076 | return VM_FAULT_NOPAGE; |
3066 | 3077 | ||
3078 | /* | ||
3079 | * At this point we know that our vmf->pmd points to a page of ptes | ||
3080 | * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge() | ||
3081 | * for the duration of the fault. If a racing MADV_DONTNEED runs and | ||
3082 | * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still | ||
3083 | * be valid and we will re-check to make sure the vmf->pte isn't | ||
3084 | * pte_none() under vmf->ptl protection when we return to | ||
3085 | * alloc_set_pte(). | ||
3086 | */ | ||
3067 | vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, | 3087 | vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, |
3068 | &vmf->ptl); | 3088 | &vmf->ptl); |
3069 | return 0; | 3089 | return 0; |
@@ -3690,7 +3710,7 @@ static int handle_pte_fault(struct vm_fault *vmf) | |||
3690 | vmf->pte = NULL; | 3710 | vmf->pte = NULL; |
3691 | } else { | 3711 | } else { |
3692 | /* See comment in pte_alloc_one_map() */ | 3712 | /* See comment in pte_alloc_one_map() */ |
3693 | if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) | 3713 | if (pmd_devmap_trans_unstable(vmf->pmd)) |
3694 | return 0; | 3714 | return 0; |
3695 | /* | 3715 | /* |
3696 | * A regular pmd is established and it can't morph into a huge | 3716 | * A regular pmd is established and it can't morph into a huge |