diff options
-rw-r--r-- | arch/x86/mm/fault.c | 101 |
1 files changed, 62 insertions, 39 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index cd08f4fef836..c7e32f453852 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -1032,7 +1032,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, | |||
1032 | } | 1032 | } |
1033 | } | 1033 | } |
1034 | 1034 | ||
1035 | static int spurious_fault_check(unsigned long error_code, pte_t *pte) | 1035 | static int spurious_kernel_fault_check(unsigned long error_code, pte_t *pte) |
1036 | { | 1036 | { |
1037 | if ((error_code & X86_PF_WRITE) && !pte_write(*pte)) | 1037 | if ((error_code & X86_PF_WRITE) && !pte_write(*pte)) |
1038 | return 0; | 1038 | return 0; |
@@ -1071,7 +1071,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) | |||
1071 | * (Optional Invalidation). | 1071 | * (Optional Invalidation). |
1072 | */ | 1072 | */ |
1073 | static noinline int | 1073 | static noinline int |
1074 | spurious_fault(unsigned long error_code, unsigned long address) | 1074 | spurious_kernel_fault(unsigned long error_code, unsigned long address) |
1075 | { | 1075 | { |
1076 | pgd_t *pgd; | 1076 | pgd_t *pgd; |
1077 | p4d_t *p4d; | 1077 | p4d_t *p4d; |
@@ -1102,27 +1102,27 @@ spurious_fault(unsigned long error_code, unsigned long address) | |||
1102 | return 0; | 1102 | return 0; |
1103 | 1103 | ||
1104 | if (p4d_large(*p4d)) | 1104 | if (p4d_large(*p4d)) |
1105 | return spurious_fault_check(error_code, (pte_t *) p4d); | 1105 | return spurious_kernel_fault_check(error_code, (pte_t *) p4d); |
1106 | 1106 | ||
1107 | pud = pud_offset(p4d, address); | 1107 | pud = pud_offset(p4d, address); |
1108 | if (!pud_present(*pud)) | 1108 | if (!pud_present(*pud)) |
1109 | return 0; | 1109 | return 0; |
1110 | 1110 | ||
1111 | if (pud_large(*pud)) | 1111 | if (pud_large(*pud)) |
1112 | return spurious_fault_check(error_code, (pte_t *) pud); | 1112 | return spurious_kernel_fault_check(error_code, (pte_t *) pud); |
1113 | 1113 | ||
1114 | pmd = pmd_offset(pud, address); | 1114 | pmd = pmd_offset(pud, address); |
1115 | if (!pmd_present(*pmd)) | 1115 | if (!pmd_present(*pmd)) |
1116 | return 0; | 1116 | return 0; |
1117 | 1117 | ||
1118 | if (pmd_large(*pmd)) | 1118 | if (pmd_large(*pmd)) |
1119 | return spurious_fault_check(error_code, (pte_t *) pmd); | 1119 | return spurious_kernel_fault_check(error_code, (pte_t *) pmd); |
1120 | 1120 | ||
1121 | pte = pte_offset_kernel(pmd, address); | 1121 | pte = pte_offset_kernel(pmd, address); |
1122 | if (!pte_present(*pte)) | 1122 | if (!pte_present(*pte)) |
1123 | return 0; | 1123 | return 0; |
1124 | 1124 | ||
1125 | ret = spurious_fault_check(error_code, pte); | 1125 | ret = spurious_kernel_fault_check(error_code, pte); |
1126 | if (!ret) | 1126 | if (!ret) |
1127 | return 0; | 1127 | return 0; |
1128 | 1128 | ||
@@ -1130,12 +1130,12 @@ spurious_fault(unsigned long error_code, unsigned long address) | |||
1130 | * Make sure we have permissions in PMD. | 1130 | * Make sure we have permissions in PMD. |
1131 | * If not, then there's a bug in the page tables: | 1131 | * If not, then there's a bug in the page tables: |
1132 | */ | 1132 | */ |
1133 | ret = spurious_fault_check(error_code, (pte_t *) pmd); | 1133 | ret = spurious_kernel_fault_check(error_code, (pte_t *) pmd); |
1134 | WARN_ONCE(!ret, "PMD has incorrect permission bits\n"); | 1134 | WARN_ONCE(!ret, "PMD has incorrect permission bits\n"); |
1135 | 1135 | ||
1136 | return ret; | 1136 | return ret; |
1137 | } | 1137 | } |
1138 | NOKPROBE_SYMBOL(spurious_fault); | 1138 | NOKPROBE_SYMBOL(spurious_kernel_fault); |
1139 | 1139 | ||
1140 | int show_unhandled_signals = 1; | 1140 | int show_unhandled_signals = 1; |
1141 | 1141 | ||
@@ -1203,6 +1203,58 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) | |||
1203 | } | 1203 | } |
1204 | 1204 | ||
1205 | /* | 1205 | /* |
1206 | * Called for all faults where 'address' is part of the kernel address | ||
1207 | * space. Might get called for faults that originate from *code* that | ||
1208 | * ran in userspace or the kernel. | ||
1209 | */ | ||
1210 | static void | ||
1211 | do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code, | ||
1212 | unsigned long address) | ||
1213 | { | ||
1214 | /* | ||
1215 | * We can fault-in kernel-space virtual memory on-demand. The | ||
1216 | * 'reference' page table is init_mm.pgd. | ||
1217 | * | ||
1218 | * NOTE! We MUST NOT take any locks for this case. We may | ||
1219 | * be in an interrupt or a critical region, and should | ||
1220 | * only copy the information from the master page table, | ||
1221 | * nothing more. | ||
1222 | * | ||
1223 | * Before doing this on-demand faulting, ensure that the | ||
1224 | * fault is not any of the following: | ||
1225 | * 1. A fault on a PTE with a reserved bit set. | ||
1226 | * 2. A fault caused by a user-mode access. (Do not demand- | ||
1227 | * fault kernel memory due to user-mode accesses). | ||
1228 | * 3. A fault caused by a page-level protection violation. | ||
1229 | * (A demand fault would be on a non-present page which | ||
1230 | * would have X86_PF_PROT==0). | ||
1231 | */ | ||
1232 | if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { | ||
1233 | if (vmalloc_fault(address) >= 0) | ||
1234 | return; | ||
1235 | } | ||
1236 | |||
1237 | /* Was the fault spurious, caused by lazy TLB invalidation? */ | ||
1238 | if (spurious_kernel_fault(hw_error_code, address)) | ||
1239 | return; | ||
1240 | |||
1241 | /* kprobes don't want to hook the spurious faults: */ | ||
1242 | if (kprobes_fault(regs)) | ||
1243 | return; | ||
1244 | |||
1245 | /* | ||
1246 | * Note, despite being a "bad area", there are quite a few | ||
1247 | * acceptable reasons to get here, such as erratum fixups | ||
1248 | * and handling kernel code that can fault, like get_user(). | ||
1249 | * | ||
1250 | * Don't take the mm semaphore here. If we fixup a prefetch | ||
1251 | * fault we could otherwise deadlock: | ||
1252 | */ | ||
1253 | bad_area_nosemaphore(regs, hw_error_code, address, NULL); | ||
1254 | } | ||
1255 | NOKPROBE_SYMBOL(do_kern_addr_fault); | ||
1256 | |||
1257 | /* | ||
1206 | * This routine handles page faults. It determines the address, | 1258 | * This routine handles page faults. It determines the address, |
1207 | * and the problem, and then passes it off to one of the appropriate | 1259 | * and the problem, and then passes it off to one of the appropriate |
1208 | * routines. | 1260 | * routines. |
@@ -1227,38 +1279,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, | |||
1227 | if (unlikely(kmmio_fault(regs, address))) | 1279 | if (unlikely(kmmio_fault(regs, address))) |
1228 | return; | 1280 | return; |
1229 | 1281 | ||
1230 | /* | 1282 | /* Was the fault on kernel-controlled part of the address space? */ |
1231 | * We fault-in kernel-space virtual memory on-demand. The | ||
1232 | * 'reference' page table is init_mm.pgd. | ||
1233 | * | ||
1234 | * NOTE! We MUST NOT take any locks for this case. We may | ||
1235 | * be in an interrupt or a critical region, and should | ||
1236 | * only copy the information from the master page table, | ||
1237 | * nothing more. | ||
1238 | * | ||
1239 | * This verifies that the fault happens in kernel space | ||
1240 | * (hw_error_code & 4) == 0, and that the fault was not a | ||
1241 | * protection error (hw_error_code & 9) == 0. | ||
1242 | */ | ||
1243 | if (unlikely(fault_in_kernel_space(address))) { | 1283 | if (unlikely(fault_in_kernel_space(address))) { |
1244 | if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { | 1284 | do_kern_addr_fault(regs, hw_error_code, address); |
1245 | if (vmalloc_fault(address) >= 0) | ||
1246 | return; | ||
1247 | } | ||
1248 | |||
1249 | /* Can handle a stale RO->RW TLB: */ | ||
1250 | if (spurious_fault(hw_error_code, address)) | ||
1251 | return; | ||
1252 | |||
1253 | /* kprobes don't want to hook the spurious faults: */ | ||
1254 | if (kprobes_fault(regs)) | ||
1255 | return; | ||
1256 | /* | ||
1257 | * Don't take the mm semaphore here. If we fixup a prefetch | ||
1258 | * fault we could otherwise deadlock: | ||
1259 | */ | ||
1260 | bad_area_nosemaphore(regs, hw_error_code, address, NULL); | ||
1261 | |||
1262 | return; | 1285 | return; |
1263 | } | 1286 | } |
1264 | 1287 | ||