aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/mm/fault.c101
1 files changed, 62 insertions, 39 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index cd08f4fef836..c7e32f453852 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1032,7 +1032,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
1032 } 1032 }
1033} 1033}
1034 1034
1035static int spurious_fault_check(unsigned long error_code, pte_t *pte) 1035static int spurious_kernel_fault_check(unsigned long error_code, pte_t *pte)
1036{ 1036{
1037 if ((error_code & X86_PF_WRITE) && !pte_write(*pte)) 1037 if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
1038 return 0; 1038 return 0;
@@ -1071,7 +1071,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
1071 * (Optional Invalidation). 1071 * (Optional Invalidation).
1072 */ 1072 */
1073static noinline int 1073static noinline int
1074spurious_fault(unsigned long error_code, unsigned long address) 1074spurious_kernel_fault(unsigned long error_code, unsigned long address)
1075{ 1075{
1076 pgd_t *pgd; 1076 pgd_t *pgd;
1077 p4d_t *p4d; 1077 p4d_t *p4d;
@@ -1102,27 +1102,27 @@ spurious_fault(unsigned long error_code, unsigned long address)
1102 return 0; 1102 return 0;
1103 1103
1104 if (p4d_large(*p4d)) 1104 if (p4d_large(*p4d))
1105 return spurious_fault_check(error_code, (pte_t *) p4d); 1105 return spurious_kernel_fault_check(error_code, (pte_t *) p4d);
1106 1106
1107 pud = pud_offset(p4d, address); 1107 pud = pud_offset(p4d, address);
1108 if (!pud_present(*pud)) 1108 if (!pud_present(*pud))
1109 return 0; 1109 return 0;
1110 1110
1111 if (pud_large(*pud)) 1111 if (pud_large(*pud))
1112 return spurious_fault_check(error_code, (pte_t *) pud); 1112 return spurious_kernel_fault_check(error_code, (pte_t *) pud);
1113 1113
1114 pmd = pmd_offset(pud, address); 1114 pmd = pmd_offset(pud, address);
1115 if (!pmd_present(*pmd)) 1115 if (!pmd_present(*pmd))
1116 return 0; 1116 return 0;
1117 1117
1118 if (pmd_large(*pmd)) 1118 if (pmd_large(*pmd))
1119 return spurious_fault_check(error_code, (pte_t *) pmd); 1119 return spurious_kernel_fault_check(error_code, (pte_t *) pmd);
1120 1120
1121 pte = pte_offset_kernel(pmd, address); 1121 pte = pte_offset_kernel(pmd, address);
1122 if (!pte_present(*pte)) 1122 if (!pte_present(*pte))
1123 return 0; 1123 return 0;
1124 1124
1125 ret = spurious_fault_check(error_code, pte); 1125 ret = spurious_kernel_fault_check(error_code, pte);
1126 if (!ret) 1126 if (!ret)
1127 return 0; 1127 return 0;
1128 1128
@@ -1130,12 +1130,12 @@ spurious_fault(unsigned long error_code, unsigned long address)
1130 * Make sure we have permissions in PMD. 1130 * Make sure we have permissions in PMD.
1131 * If not, then there's a bug in the page tables: 1131 * If not, then there's a bug in the page tables:
1132 */ 1132 */
1133 ret = spurious_fault_check(error_code, (pte_t *) pmd); 1133 ret = spurious_kernel_fault_check(error_code, (pte_t *) pmd);
1134 WARN_ONCE(!ret, "PMD has incorrect permission bits\n"); 1134 WARN_ONCE(!ret, "PMD has incorrect permission bits\n");
1135 1135
1136 return ret; 1136 return ret;
1137} 1137}
1138NOKPROBE_SYMBOL(spurious_fault); 1138NOKPROBE_SYMBOL(spurious_kernel_fault);
1139 1139
1140int show_unhandled_signals = 1; 1140int show_unhandled_signals = 1;
1141 1141
@@ -1203,6 +1203,58 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
1203} 1203}
1204 1204
1205/* 1205/*
1206 * Called for all faults where 'address' is part of the kernel address
1207 * space. Might get called for faults that originate from *code* that
1208 * ran in userspace or the kernel.
1209 */
1210static void
1211do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
1212 unsigned long address)
1213{
1214 /*
1215 * We can fault-in kernel-space virtual memory on-demand. The
1216 * 'reference' page table is init_mm.pgd.
1217 *
1218 * NOTE! We MUST NOT take any locks for this case. We may
1219 * be in an interrupt or a critical region, and should
1220 * only copy the information from the master page table,
1221 * nothing more.
1222 *
1223 * Before doing this on-demand faulting, ensure that the
1224 * fault is not any of the following:
1225 * 1. A fault on a PTE with a reserved bit set.
1226 * 2. A fault caused by a user-mode access. (Do not demand-
1227 * fault kernel memory due to user-mode accesses).
1228 * 3. A fault caused by a page-level protection violation.
1229 * (A demand fault would be on a non-present page which
1230 * would have X86_PF_PROT==0).
1231 */
1232 if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
1233 if (vmalloc_fault(address) >= 0)
1234 return;
1235 }
1236
1237 /* Was the fault spurious, caused by lazy TLB invalidation? */
1238 if (spurious_kernel_fault(hw_error_code, address))
1239 return;
1240
1241 /* kprobes don't want to hook the spurious faults: */
1242 if (kprobes_fault(regs))
1243 return;
1244
1245 /*
1246 * Note, despite being a "bad area", there are quite a few
1247 * acceptable reasons to get here, such as erratum fixups
1248 * and handling kernel code that can fault, like get_user().
1249 *
1250 * Don't take the mm semaphore here. If we fixup a prefetch
1251 * fault we could otherwise deadlock:
1252 */
1253 bad_area_nosemaphore(regs, hw_error_code, address, NULL);
1254}
1255NOKPROBE_SYMBOL(do_kern_addr_fault);
1256
1257/*
1206 * This routine handles page faults. It determines the address, 1258 * This routine handles page faults. It determines the address,
1207 * and the problem, and then passes it off to one of the appropriate 1259 * and the problem, and then passes it off to one of the appropriate
1208 * routines. 1260 * routines.
@@ -1227,38 +1279,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
1227 if (unlikely(kmmio_fault(regs, address))) 1279 if (unlikely(kmmio_fault(regs, address)))
1228 return; 1280 return;
1229 1281
1230 /* 1282 /* Was the fault on kernel-controlled part of the address space? */
1231 * We fault-in kernel-space virtual memory on-demand. The
1232 * 'reference' page table is init_mm.pgd.
1233 *
1234 * NOTE! We MUST NOT take any locks for this case. We may
1235 * be in an interrupt or a critical region, and should
1236 * only copy the information from the master page table,
1237 * nothing more.
1238 *
1239 * This verifies that the fault happens in kernel space
1240 * (hw_error_code & 4) == 0, and that the fault was not a
1241 * protection error (hw_error_code & 9) == 0.
1242 */
1243 if (unlikely(fault_in_kernel_space(address))) { 1283 if (unlikely(fault_in_kernel_space(address))) {
1244 if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { 1284 do_kern_addr_fault(regs, hw_error_code, address);
1245 if (vmalloc_fault(address) >= 0)
1246 return;
1247 }
1248
1249 /* Can handle a stale RO->RW TLB: */
1250 if (spurious_fault(hw_error_code, address))
1251 return;
1252
1253 /* kprobes don't want to hook the spurious faults: */
1254 if (kprobes_fault(regs))
1255 return;
1256 /*
1257 * Don't take the mm semaphore here. If we fixup a prefetch
1258 * fault we could otherwise deadlock:
1259 */
1260 bad_area_nosemaphore(regs, hw_error_code, address, NULL);
1261
1262 return; 1285 return;
1263 } 1286 }
1264 1287