aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Hansen <dave.hansen@linux.intel.com>2018-09-28 12:02:22 -0400
committerPeter Zijlstra <peterz@infradead.org>2018-10-09 10:51:15 -0400
commit8fed62000039058adfd8b663344e2f448aed1e7a (patch)
treed6de290571da0a63fdec6d9d27e79e446c394889
parent164477c2331be75d9bd57fb76704e676b2bcd1cd (diff)
x86/mm: Break out kernel address space handling
The page fault handler (__do_page_fault()) basically has two sections: one for handling faults in the kernel portion of the address space and another for faults in the user portion of the address space. But, these two parts don't stick out that well. Let's make that more clear from code separation and naming. Pull kernel fault handling into its own helper, and reflect that naming by renaming spurious_fault() -> spurious_kernel_fault(). Also, rewrite the vmalloc() handling comment a bit. It was a bit stale and also glossed over the reserved bit handling. Cc: x86@kernel.org Cc: Jann Horn <jannh@google.com> Cc: Sean Christopherson <sean.j.christopherson@intel.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Andy Lutomirski <luto@kernel.org> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: http://lkml.kernel.org/r/20180928160222.401F4E10@viggo.jf.intel.com
-rw-r--r--arch/x86/mm/fault.c101
1 files changed, 62 insertions, 39 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index cd08f4fef836..c7e32f453852 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1032,7 +1032,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
1032 } 1032 }
1033} 1033}
1034 1034
1035static int spurious_fault_check(unsigned long error_code, pte_t *pte) 1035static int spurious_kernel_fault_check(unsigned long error_code, pte_t *pte)
1036{ 1036{
1037 if ((error_code & X86_PF_WRITE) && !pte_write(*pte)) 1037 if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
1038 return 0; 1038 return 0;
@@ -1071,7 +1071,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
1071 * (Optional Invalidation). 1071 * (Optional Invalidation).
1072 */ 1072 */
1073static noinline int 1073static noinline int
1074spurious_fault(unsigned long error_code, unsigned long address) 1074spurious_kernel_fault(unsigned long error_code, unsigned long address)
1075{ 1075{
1076 pgd_t *pgd; 1076 pgd_t *pgd;
1077 p4d_t *p4d; 1077 p4d_t *p4d;
@@ -1102,27 +1102,27 @@ spurious_fault(unsigned long error_code, unsigned long address)
1102 return 0; 1102 return 0;
1103 1103
1104 if (p4d_large(*p4d)) 1104 if (p4d_large(*p4d))
1105 return spurious_fault_check(error_code, (pte_t *) p4d); 1105 return spurious_kernel_fault_check(error_code, (pte_t *) p4d);
1106 1106
1107 pud = pud_offset(p4d, address); 1107 pud = pud_offset(p4d, address);
1108 if (!pud_present(*pud)) 1108 if (!pud_present(*pud))
1109 return 0; 1109 return 0;
1110 1110
1111 if (pud_large(*pud)) 1111 if (pud_large(*pud))
1112 return spurious_fault_check(error_code, (pte_t *) pud); 1112 return spurious_kernel_fault_check(error_code, (pte_t *) pud);
1113 1113
1114 pmd = pmd_offset(pud, address); 1114 pmd = pmd_offset(pud, address);
1115 if (!pmd_present(*pmd)) 1115 if (!pmd_present(*pmd))
1116 return 0; 1116 return 0;
1117 1117
1118 if (pmd_large(*pmd)) 1118 if (pmd_large(*pmd))
1119 return spurious_fault_check(error_code, (pte_t *) pmd); 1119 return spurious_kernel_fault_check(error_code, (pte_t *) pmd);
1120 1120
1121 pte = pte_offset_kernel(pmd, address); 1121 pte = pte_offset_kernel(pmd, address);
1122 if (!pte_present(*pte)) 1122 if (!pte_present(*pte))
1123 return 0; 1123 return 0;
1124 1124
1125 ret = spurious_fault_check(error_code, pte); 1125 ret = spurious_kernel_fault_check(error_code, pte);
1126 if (!ret) 1126 if (!ret)
1127 return 0; 1127 return 0;
1128 1128
@@ -1130,12 +1130,12 @@ spurious_fault(unsigned long error_code, unsigned long address)
1130 * Make sure we have permissions in PMD. 1130 * Make sure we have permissions in PMD.
1131 * If not, then there's a bug in the page tables: 1131 * If not, then there's a bug in the page tables:
1132 */ 1132 */
1133 ret = spurious_fault_check(error_code, (pte_t *) pmd); 1133 ret = spurious_kernel_fault_check(error_code, (pte_t *) pmd);
1134 WARN_ONCE(!ret, "PMD has incorrect permission bits\n"); 1134 WARN_ONCE(!ret, "PMD has incorrect permission bits\n");
1135 1135
1136 return ret; 1136 return ret;
1137} 1137}
1138NOKPROBE_SYMBOL(spurious_fault); 1138NOKPROBE_SYMBOL(spurious_kernel_fault);
1139 1139
1140int show_unhandled_signals = 1; 1140int show_unhandled_signals = 1;
1141 1141
@@ -1203,6 +1203,58 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
1203} 1203}
1204 1204
1205/* 1205/*
1206 * Called for all faults where 'address' is part of the kernel address
1207 * space. Might get called for faults that originate from *code* that
1208 * ran in userspace or the kernel.
1209 */
1210static void
1211do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
1212 unsigned long address)
1213{
1214 /*
1215 * We can fault-in kernel-space virtual memory on-demand. The
1216 * 'reference' page table is init_mm.pgd.
1217 *
1218 * NOTE! We MUST NOT take any locks for this case. We may
1219 * be in an interrupt or a critical region, and should
1220 * only copy the information from the master page table,
1221 * nothing more.
1222 *
1223 * Before doing this on-demand faulting, ensure that the
1224 * fault is not any of the following:
1225 * 1. A fault on a PTE with a reserved bit set.
1226 * 2. A fault caused by a user-mode access. (Do not demand-
1227 * fault kernel memory due to user-mode accesses).
1228 * 3. A fault caused by a page-level protection violation.
1229 * (A demand fault would be on a non-present page which
1230 * would have X86_PF_PROT==0).
1231 */
1232 if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
1233 if (vmalloc_fault(address) >= 0)
1234 return;
1235 }
1236
1237 /* Was the fault spurious, caused by lazy TLB invalidation? */
1238 if (spurious_kernel_fault(hw_error_code, address))
1239 return;
1240
1241 /* kprobes don't want to hook the spurious faults: */
1242 if (kprobes_fault(regs))
1243 return;
1244
1245 /*
1246 * Note, despite being a "bad area", there are quite a few
1247 * acceptable reasons to get here, such as erratum fixups
1248 * and handling kernel code that can fault, like get_user().
1249 *
1250 * Don't take the mm semaphore here. If we fixup a prefetch
1251 * fault we could otherwise deadlock:
1252 */
1253 bad_area_nosemaphore(regs, hw_error_code, address, NULL);
1254}
1255NOKPROBE_SYMBOL(do_kern_addr_fault);
1256
1257/*
1206 * This routine handles page faults. It determines the address, 1258 * This routine handles page faults. It determines the address,
1207 * and the problem, and then passes it off to one of the appropriate 1259 * and the problem, and then passes it off to one of the appropriate
1208 * routines. 1260 * routines.
@@ -1227,38 +1279,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
1227 if (unlikely(kmmio_fault(regs, address))) 1279 if (unlikely(kmmio_fault(regs, address)))
1228 return; 1280 return;
1229 1281
1230 /* 1282 /* Was the fault on kernel-controlled part of the address space? */
1231 * We fault-in kernel-space virtual memory on-demand. The
1232 * 'reference' page table is init_mm.pgd.
1233 *
1234 * NOTE! We MUST NOT take any locks for this case. We may
1235 * be in an interrupt or a critical region, and should
1236 * only copy the information from the master page table,
1237 * nothing more.
1238 *
1239 * This verifies that the fault happens in kernel space
1240 * (hw_error_code & 4) == 0, and that the fault was not a
1241 * protection error (hw_error_code & 9) == 0.
1242 */
1243 if (unlikely(fault_in_kernel_space(address))) { 1283 if (unlikely(fault_in_kernel_space(address))) {
1244 if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { 1284 do_kern_addr_fault(regs, hw_error_code, address);
1245 if (vmalloc_fault(address) >= 0)
1246 return;
1247 }
1248
1249 /* Can handle a stale RO->RW TLB: */
1250 if (spurious_fault(hw_error_code, address))
1251 return;
1252
1253 /* kprobes don't want to hook the spurious faults: */
1254 if (kprobes_fault(regs))
1255 return;
1256 /*
1257 * Don't take the mm semaphore here. If we fixup a prefetch
1258 * fault we could otherwise deadlock:
1259 */
1260 bad_area_nosemaphore(regs, hw_error_code, address, NULL);
1261
1262 return; 1285 return;
1263 } 1286 }
1264 1287