aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-12-15 21:53:22 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-12-15 21:53:22 -0500
commitf6f3732162b5ae3c771b9285a5a32d72b8586920 (patch)
tree29b118b6738f63cd6e112d2e62d934842adc9bf4
parent7a3c296ae08f9b51e399074d8ef6867d65fbd22b (diff)
Revert "mm: replace p??_write with pte_access_permitted in fault + gup paths"
This reverts commits 5c9d2d5c269c, c7da82b894e9, and e7fe7b5cae90. We'll probably need to revisit this, but basically we should not complicate the get_user_pages_fast() case, and checking the actual page table protection key bits will require more care anyway, since the protection keys depend on the exact state of the VM in question. Particularly when doing a "remote" page lookup (ie in somebody elses VM, not your own), you need to be much more careful than this was. Dave Hansen says: "So, the underlying bug here is that we now a get_user_pages_remote() and then go ahead and do the p*_access_permitted() checks against the current PKRU. This was introduced recently with the addition of the new p??_access_permitted() calls. We have checks in the VMA path for the "remote" gups and we avoid consulting PKRU for them. This got missed in the pkeys selftests because I did a ptrace read, but not a *write*. I also didn't explicitly test it against something where a COW needed to be done" It's also not entirely clear that it makes sense to check the protection key bits at this level at all. But one possible eventual solution is to make the get_user_pages_fast() case just abort if it sees protection key bits set, which makes us fall back to the regular get_user_pages() case, which then has a vma and can do the check there if we want to. We'll see. Somewhat related to this all: what we _do_ want to do some day is to check the PAGE_USER bit - it should obviously always be set for user pages, but it would be a good check to have back. Because we have no generic way to test for it, we lost it as part of moving over from the architecture-specific x86 GUP implementation to the generic one in commit e585513b76f7 ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation"). Cc: Peter Zijlstra <peterz@infradead.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: "Jérôme Glisse" <jglisse@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/s390/include/asm/pgtable.h6
-rw-r--r--arch/sparc/mm/gup.c4
-rw-r--r--fs/dax.c3
-rw-r--r--mm/gup.c2
-rw-r--r--mm/hmm.c8
-rw-r--r--mm/huge_memory.c6
-rw-r--r--mm/memory.c8
7 files changed, 15 insertions, 22 deletions
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 57d7bc92e0b8..0a6b0286c32e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1264,12 +1264,6 @@ static inline pud_t pud_mkwrite(pud_t pud)
1264 return pud; 1264 return pud;
1265} 1265}
1266 1266
1267#define pud_write pud_write
1268static inline int pud_write(pud_t pud)
1269{
1270 return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
1271}
1272
1273static inline pud_t pud_mkclean(pud_t pud) 1267static inline pud_t pud_mkclean(pud_t pud)
1274{ 1268{
1275 if (pud_large(pud)) { 1269 if (pud_large(pud)) {
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 33c0f8bb0f33..5335ba3c850e 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
75 if (!(pmd_val(pmd) & _PAGE_VALID)) 75 if (!(pmd_val(pmd) & _PAGE_VALID))
76 return 0; 76 return 0;
77 77
78 if (!pmd_access_permitted(pmd, write)) 78 if (write && !pmd_write(pmd))
79 return 0; 79 return 0;
80 80
81 refs = 0; 81 refs = 0;
@@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
114 if (!(pud_val(pud) & _PAGE_VALID)) 114 if (!(pud_val(pud) & _PAGE_VALID))
115 return 0; 115 return 0;
116 116
117 if (!pud_access_permitted(pud, write)) 117 if (write && !pud_write(pud))
118 return 0; 118 return 0;
119 119
120 refs = 0; 120 refs = 0;
diff --git a/fs/dax.c b/fs/dax.c
index 78b72c48374e..95981591977a 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -627,8 +627,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
627 627
628 if (pfn != pmd_pfn(*pmdp)) 628 if (pfn != pmd_pfn(*pmdp))
629 goto unlock_pmd; 629 goto unlock_pmd;
630 if (!pmd_dirty(*pmdp) 630 if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
631 && !pmd_access_permitted(*pmdp, WRITE))
632 goto unlock_pmd; 631 goto unlock_pmd;
633 632
634 flush_cache_page(vma, address, pfn); 633 flush_cache_page(vma, address, pfn);
diff --git a/mm/gup.c b/mm/gup.c
index d3fb60e5bfac..e0d82b6706d7 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -66,7 +66,7 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
66 */ 66 */
67static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) 67static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
68{ 68{
69 return pte_access_permitted(pte, WRITE) || 69 return pte_write(pte) ||
70 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); 70 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
71} 71}
72 72
diff --git a/mm/hmm.c b/mm/hmm.c
index 3a5c172af560..ea19742a5d60 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -391,11 +391,11 @@ again:
391 if (pmd_protnone(pmd)) 391 if (pmd_protnone(pmd))
392 return hmm_vma_walk_clear(start, end, walk); 392 return hmm_vma_walk_clear(start, end, walk);
393 393
394 if (!pmd_access_permitted(pmd, write_fault)) 394 if (write_fault && !pmd_write(pmd))
395 return hmm_vma_walk_clear(start, end, walk); 395 return hmm_vma_walk_clear(start, end, walk);
396 396
397 pfn = pmd_pfn(pmd) + pte_index(addr); 397 pfn = pmd_pfn(pmd) + pte_index(addr);
398 flag |= pmd_access_permitted(pmd, WRITE) ? HMM_PFN_WRITE : 0; 398 flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
399 for (; addr < end; addr += PAGE_SIZE, i++, pfn++) 399 for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
400 pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag; 400 pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
401 return 0; 401 return 0;
@@ -456,11 +456,11 @@ again:
456 continue; 456 continue;
457 } 457 }
458 458
459 if (!pte_access_permitted(pte, write_fault)) 459 if (write_fault && !pte_write(pte))
460 goto fault; 460 goto fault;
461 461
462 pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag; 462 pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag;
463 pfns[i] |= pte_access_permitted(pte, WRITE) ? HMM_PFN_WRITE : 0; 463 pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0;
464 continue; 464 continue;
465 465
466fault: 466fault:
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2f2f5e774902..0e7ded98d114 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -870,7 +870,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
870 */ 870 */
871 WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set"); 871 WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
872 872
873 if (!pmd_access_permitted(*pmd, flags & FOLL_WRITE)) 873 if (flags & FOLL_WRITE && !pmd_write(*pmd))
874 return NULL; 874 return NULL;
875 875
876 if (pmd_present(*pmd) && pmd_devmap(*pmd)) 876 if (pmd_present(*pmd) && pmd_devmap(*pmd))
@@ -1012,7 +1012,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
1012 1012
1013 assert_spin_locked(pud_lockptr(mm, pud)); 1013 assert_spin_locked(pud_lockptr(mm, pud));
1014 1014
1015 if (!pud_access_permitted(*pud, flags & FOLL_WRITE)) 1015 if (flags & FOLL_WRITE && !pud_write(*pud))
1016 return NULL; 1016 return NULL;
1017 1017
1018 if (pud_present(*pud) && pud_devmap(*pud)) 1018 if (pud_present(*pud) && pud_devmap(*pud))
@@ -1386,7 +1386,7 @@ out_unlock:
1386 */ 1386 */
1387static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags) 1387static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
1388{ 1388{
1389 return pmd_access_permitted(pmd, WRITE) || 1389 return pmd_write(pmd) ||
1390 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd)); 1390 ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
1391} 1391}
1392 1392
diff --git a/mm/memory.c b/mm/memory.c
index cfaba6287702..ca5674cbaff2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3949,7 +3949,7 @@ static int handle_pte_fault(struct vm_fault *vmf)
3949 if (unlikely(!pte_same(*vmf->pte, entry))) 3949 if (unlikely(!pte_same(*vmf->pte, entry)))
3950 goto unlock; 3950 goto unlock;
3951 if (vmf->flags & FAULT_FLAG_WRITE) { 3951 if (vmf->flags & FAULT_FLAG_WRITE) {
3952 if (!pte_access_permitted(entry, WRITE)) 3952 if (!pte_write(entry))
3953 return do_wp_page(vmf); 3953 return do_wp_page(vmf);
3954 entry = pte_mkdirty(entry); 3954 entry = pte_mkdirty(entry);
3955 } 3955 }
@@ -4014,7 +4014,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
4014 4014
4015 /* NUMA case for anonymous PUDs would go here */ 4015 /* NUMA case for anonymous PUDs would go here */
4016 4016
4017 if (dirty && !pud_access_permitted(orig_pud, WRITE)) { 4017 if (dirty && !pud_write(orig_pud)) {
4018 ret = wp_huge_pud(&vmf, orig_pud); 4018 ret = wp_huge_pud(&vmf, orig_pud);
4019 if (!(ret & VM_FAULT_FALLBACK)) 4019 if (!(ret & VM_FAULT_FALLBACK))
4020 return ret; 4020 return ret;
@@ -4047,7 +4047,7 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
4047 if (pmd_protnone(orig_pmd) && vma_is_accessible(vma)) 4047 if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
4048 return do_huge_pmd_numa_page(&vmf, orig_pmd); 4048 return do_huge_pmd_numa_page(&vmf, orig_pmd);
4049 4049
4050 if (dirty && !pmd_access_permitted(orig_pmd, WRITE)) { 4050 if (dirty && !pmd_write(orig_pmd)) {
4051 ret = wp_huge_pmd(&vmf, orig_pmd); 4051 ret = wp_huge_pmd(&vmf, orig_pmd);
4052 if (!(ret & VM_FAULT_FALLBACK)) 4052 if (!(ret & VM_FAULT_FALLBACK))
4053 return ret; 4053 return ret;
@@ -4337,7 +4337,7 @@ int follow_phys(struct vm_area_struct *vma,
4337 goto out; 4337 goto out;
4338 pte = *ptep; 4338 pte = *ptep;
4339 4339
4340 if (!pte_access_permitted(pte, flags & FOLL_WRITE)) 4340 if ((flags & FOLL_WRITE) && !pte_write(pte))
4341 goto unlock; 4341 goto unlock;
4342 4342
4343 *prot = pgprot_val(pte_pgprot(pte)); 4343 *prot = pgprot_val(pte_pgprot(pte));