aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/fault.c')
-rw-r--r--arch/x86/mm/fault.c108
1 files changed, 46 insertions, 62 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index e2baeaa053a5..febf6980e653 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Copyright (C) 1995 Linus Torvalds 3 * Copyright (C) 1995 Linus Torvalds
3 * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. 4 * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
@@ -19,7 +20,6 @@
19#include <asm/cpufeature.h> /* boot_cpu_has, ... */ 20#include <asm/cpufeature.h> /* boot_cpu_has, ... */
20#include <asm/traps.h> /* dotraplinkage, ... */ 21#include <asm/traps.h> /* dotraplinkage, ... */
21#include <asm/pgalloc.h> /* pgd_*(), ... */ 22#include <asm/pgalloc.h> /* pgd_*(), ... */
22#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
23#include <asm/fixmap.h> /* VSYSCALL_ADDR */ 23#include <asm/fixmap.h> /* VSYSCALL_ADDR */
24#include <asm/vsyscall.h> /* emulate_vsyscall */ 24#include <asm/vsyscall.h> /* emulate_vsyscall */
25#include <asm/vm86.h> /* struct vm86 */ 25#include <asm/vm86.h> /* struct vm86 */
@@ -29,26 +29,6 @@
29#include <asm/trace/exceptions.h> 29#include <asm/trace/exceptions.h>
30 30
31/* 31/*
32 * Page fault error code bits:
33 *
34 * bit 0 == 0: no page found 1: protection fault
35 * bit 1 == 0: read access 1: write access
36 * bit 2 == 0: kernel-mode access 1: user-mode access
37 * bit 3 == 1: use of reserved bit detected
38 * bit 4 == 1: fault was an instruction fetch
39 * bit 5 == 1: protection keys block access
40 */
41enum x86_pf_error_code {
42
43 PF_PROT = 1 << 0,
44 PF_WRITE = 1 << 1,
45 PF_USER = 1 << 2,
46 PF_RSVD = 1 << 3,
47 PF_INSTR = 1 << 4,
48 PF_PK = 1 << 5,
49};
50
51/*
52 * Returns 0 if mmiotrace is disabled, or if the fault is not 32 * Returns 0 if mmiotrace is disabled, or if the fault is not
53 * handled by mmiotrace: 33 * handled by mmiotrace:
54 */ 34 */
@@ -149,7 +129,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
149 * If it was a exec (instruction fetch) fault on NX page, then 129 * If it was a exec (instruction fetch) fault on NX page, then
150 * do not ignore the fault: 130 * do not ignore the fault:
151 */ 131 */
152 if (error_code & PF_INSTR) 132 if (error_code & X86_PF_INSTR)
153 return 0; 133 return 0;
154 134
155 instr = (void *)convert_ip_to_linear(current, regs); 135 instr = (void *)convert_ip_to_linear(current, regs);
@@ -179,7 +159,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
179 * siginfo so userspace can discover which protection key was set 159 * siginfo so userspace can discover which protection key was set
180 * on the PTE. 160 * on the PTE.
181 * 161 *
182 * If we get here, we know that the hardware signaled a PF_PK 162 * If we get here, we know that the hardware signaled a X86_PF_PK
183 * fault and that there was a VMA once we got in the fault 163 * fault and that there was a VMA once we got in the fault
184 * handler. It does *not* guarantee that the VMA we find here 164 * handler. It does *not* guarantee that the VMA we find here
185 * was the one that we faulted on. 165 * was the one that we faulted on.
@@ -204,7 +184,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
204 /* 184 /*
205 * force_sig_info_fault() is called from a number of 185 * force_sig_info_fault() is called from a number of
206 * contexts, some of which have a VMA and some of which 186 * contexts, some of which have a VMA and some of which
207 * do not. The PF_PK handing happens after we have a 187 * do not. The X86_PF_PK handing happens after we have a
208 * valid VMA, so we should never reach this without a 188 * valid VMA, so we should never reach this without a
209 * valid VMA. 189 * valid VMA.
210 */ 190 */
@@ -697,7 +677,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
697 if (!oops_may_print()) 677 if (!oops_may_print())
698 return; 678 return;
699 679
700 if (error_code & PF_INSTR) { 680 if (error_code & X86_PF_INSTR) {
701 unsigned int level; 681 unsigned int level;
702 pgd_t *pgd; 682 pgd_t *pgd;
703 pte_t *pte; 683 pte_t *pte;
@@ -721,7 +701,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
721 else 701 else
722 printk(KERN_CONT "paging request"); 702 printk(KERN_CONT "paging request");
723 703
724 printk(KERN_CONT " at %p\n", (void *) address); 704 printk(KERN_CONT " at %px\n", (void *) address);
725 printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip); 705 printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip);
726 706
727 dump_pagetable(address); 707 dump_pagetable(address);
@@ -779,7 +759,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
779 */ 759 */
780 if (current->thread.sig_on_uaccess_err && signal) { 760 if (current->thread.sig_on_uaccess_err && signal) {
781 tsk->thread.trap_nr = X86_TRAP_PF; 761 tsk->thread.trap_nr = X86_TRAP_PF;
782 tsk->thread.error_code = error_code | PF_USER; 762 tsk->thread.error_code = error_code | X86_PF_USER;
783 tsk->thread.cr2 = address; 763 tsk->thread.cr2 = address;
784 764
785 /* XXX: hwpoison faults will set the wrong code. */ 765 /* XXX: hwpoison faults will set the wrong code. */
@@ -897,7 +877,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
897 struct task_struct *tsk = current; 877 struct task_struct *tsk = current;
898 878
899 /* User mode accesses just cause a SIGSEGV */ 879 /* User mode accesses just cause a SIGSEGV */
900 if (error_code & PF_USER) { 880 if (error_code & X86_PF_USER) {
901 /* 881 /*
902 * It's possible to have interrupts off here: 882 * It's possible to have interrupts off here:
903 */ 883 */
@@ -918,7 +898,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
918 * Instruction fetch faults in the vsyscall page might need 898 * Instruction fetch faults in the vsyscall page might need
919 * emulation. 899 * emulation.
920 */ 900 */
921 if (unlikely((error_code & PF_INSTR) && 901 if (unlikely((error_code & X86_PF_INSTR) &&
922 ((address & ~0xfff) == VSYSCALL_ADDR))) { 902 ((address & ~0xfff) == VSYSCALL_ADDR))) {
923 if (emulate_vsyscall(regs, address)) 903 if (emulate_vsyscall(regs, address))
924 return; 904 return;
@@ -931,7 +911,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
931 * are always protection faults. 911 * are always protection faults.
932 */ 912 */
933 if (address >= TASK_SIZE_MAX) 913 if (address >= TASK_SIZE_MAX)
934 error_code |= PF_PROT; 914 error_code |= X86_PF_PROT;
935 915
936 if (likely(show_unhandled_signals)) 916 if (likely(show_unhandled_signals))
937 show_signal_msg(regs, error_code, address, tsk); 917 show_signal_msg(regs, error_code, address, tsk);
@@ -992,11 +972,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
992 972
993 if (!boot_cpu_has(X86_FEATURE_OSPKE)) 973 if (!boot_cpu_has(X86_FEATURE_OSPKE))
994 return false; 974 return false;
995 if (error_code & PF_PK) 975 if (error_code & X86_PF_PK)
996 return true; 976 return true;
997 /* this checks permission keys on the VMA: */ 977 /* this checks permission keys on the VMA: */
998 if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), 978 if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
999 (error_code & PF_INSTR), foreign)) 979 (error_code & X86_PF_INSTR), foreign))
1000 return true; 980 return true;
1001 return false; 981 return false;
1002} 982}
@@ -1024,7 +1004,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
1024 int code = BUS_ADRERR; 1004 int code = BUS_ADRERR;
1025 1005
1026 /* Kernel mode? Handle exceptions or die: */ 1006 /* Kernel mode? Handle exceptions or die: */
1027 if (!(error_code & PF_USER)) { 1007 if (!(error_code & X86_PF_USER)) {
1028 no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); 1008 no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
1029 return; 1009 return;
1030 } 1010 }
@@ -1052,14 +1032,14 @@ static noinline void
1052mm_fault_error(struct pt_regs *regs, unsigned long error_code, 1032mm_fault_error(struct pt_regs *regs, unsigned long error_code,
1053 unsigned long address, u32 *pkey, unsigned int fault) 1033 unsigned long address, u32 *pkey, unsigned int fault)
1054{ 1034{
1055 if (fatal_signal_pending(current) && !(error_code & PF_USER)) { 1035 if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
1056 no_context(regs, error_code, address, 0, 0); 1036 no_context(regs, error_code, address, 0, 0);
1057 return; 1037 return;
1058 } 1038 }
1059 1039
1060 if (fault & VM_FAULT_OOM) { 1040 if (fault & VM_FAULT_OOM) {
1061 /* Kernel mode? Handle exceptions or die: */ 1041 /* Kernel mode? Handle exceptions or die: */
1062 if (!(error_code & PF_USER)) { 1042 if (!(error_code & X86_PF_USER)) {
1063 no_context(regs, error_code, address, 1043 no_context(regs, error_code, address,
1064 SIGSEGV, SEGV_MAPERR); 1044 SIGSEGV, SEGV_MAPERR);
1065 return; 1045 return;
@@ -1084,16 +1064,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
1084 1064
1085static int spurious_fault_check(unsigned long error_code, pte_t *pte) 1065static int spurious_fault_check(unsigned long error_code, pte_t *pte)
1086{ 1066{
1087 if ((error_code & PF_WRITE) && !pte_write(*pte)) 1067 if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
1088 return 0; 1068 return 0;
1089 1069
1090 if ((error_code & PF_INSTR) && !pte_exec(*pte)) 1070 if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
1091 return 0; 1071 return 0;
1092 /* 1072 /*
1093 * Note: We do not do lazy flushing on protection key 1073 * Note: We do not do lazy flushing on protection key
1094 * changes, so no spurious fault will ever set PF_PK. 1074 * changes, so no spurious fault will ever set X86_PF_PK.
1095 */ 1075 */
1096 if ((error_code & PF_PK)) 1076 if ((error_code & X86_PF_PK))
1097 return 1; 1077 return 1;
1098 1078
1099 return 1; 1079 return 1;
@@ -1139,8 +1119,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
1139 * change, so user accesses are not expected to cause spurious 1119 * change, so user accesses are not expected to cause spurious
1140 * faults. 1120 * faults.
1141 */ 1121 */
1142 if (error_code != (PF_WRITE | PF_PROT) 1122 if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
1143 && error_code != (PF_INSTR | PF_PROT)) 1123 error_code != (X86_PF_INSTR | X86_PF_PROT))
1144 return 0; 1124 return 0;
1145 1125
1146 pgd = init_mm.pgd + pgd_index(address); 1126 pgd = init_mm.pgd + pgd_index(address);
@@ -1200,19 +1180,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
1200 * always an unconditional error and can never result in 1180 * always an unconditional error and can never result in
1201 * a follow-up action to resolve the fault, like a COW. 1181 * a follow-up action to resolve the fault, like a COW.
1202 */ 1182 */
1203 if (error_code & PF_PK) 1183 if (error_code & X86_PF_PK)
1204 return 1; 1184 return 1;
1205 1185
1206 /* 1186 /*
1207 * Make sure to check the VMA so that we do not perform 1187 * Make sure to check the VMA so that we do not perform
1208 * faults just to hit a PF_PK as soon as we fill in a 1188 * faults just to hit a X86_PF_PK as soon as we fill in a
1209 * page. 1189 * page.
1210 */ 1190 */
1211 if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), 1191 if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
1212 (error_code & PF_INSTR), foreign)) 1192 (error_code & X86_PF_INSTR), foreign))
1213 return 1; 1193 return 1;
1214 1194
1215 if (error_code & PF_WRITE) { 1195 if (error_code & X86_PF_WRITE) {
1216 /* write, present and write, not present: */ 1196 /* write, present and write, not present: */
1217 if (unlikely(!(vma->vm_flags & VM_WRITE))) 1197 if (unlikely(!(vma->vm_flags & VM_WRITE)))
1218 return 1; 1198 return 1;
@@ -1220,7 +1200,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
1220 } 1200 }
1221 1201
1222 /* read, present: */ 1202 /* read, present: */
1223 if (unlikely(error_code & PF_PROT)) 1203 if (unlikely(error_code & X86_PF_PROT))
1224 return 1; 1204 return 1;
1225 1205
1226 /* read, not present: */ 1206 /* read, not present: */
@@ -1243,7 +1223,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
1243 if (!static_cpu_has(X86_FEATURE_SMAP)) 1223 if (!static_cpu_has(X86_FEATURE_SMAP))
1244 return false; 1224 return false;
1245 1225
1246 if (error_code & PF_USER) 1226 if (error_code & X86_PF_USER)
1247 return false; 1227 return false;
1248 1228
1249 if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC)) 1229 if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
@@ -1275,8 +1255,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1275 * Detect and handle instructions that would cause a page fault for 1255 * Detect and handle instructions that would cause a page fault for
1276 * both a tracked kernel page and a userspace page. 1256 * both a tracked kernel page and a userspace page.
1277 */ 1257 */
1278 if (kmemcheck_active(regs))
1279 kmemcheck_hide(regs);
1280 prefetchw(&mm->mmap_sem); 1258 prefetchw(&mm->mmap_sem);
1281 1259
1282 if (unlikely(kmmio_fault(regs, address))) 1260 if (unlikely(kmmio_fault(regs, address)))
@@ -1296,12 +1274,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1296 * protection error (error_code & 9) == 0. 1274 * protection error (error_code & 9) == 0.
1297 */ 1275 */
1298 if (unlikely(fault_in_kernel_space(address))) { 1276 if (unlikely(fault_in_kernel_space(address))) {
1299 if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) { 1277 if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
1300 if (vmalloc_fault(address) >= 0) 1278 if (vmalloc_fault(address) >= 0)
1301 return; 1279 return;
1302
1303 if (kmemcheck_fault(regs, address, error_code))
1304 return;
1305 } 1280 }
1306 1281
1307 /* Can handle a stale RO->RW TLB: */ 1282 /* Can handle a stale RO->RW TLB: */
@@ -1324,7 +1299,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1324 if (unlikely(kprobes_fault(regs))) 1299 if (unlikely(kprobes_fault(regs)))
1325 return; 1300 return;
1326 1301
1327 if (unlikely(error_code & PF_RSVD)) 1302 if (unlikely(error_code & X86_PF_RSVD))
1328 pgtable_bad(regs, error_code, address); 1303 pgtable_bad(regs, error_code, address);
1329 1304
1330 if (unlikely(smap_violation(error_code, regs))) { 1305 if (unlikely(smap_violation(error_code, regs))) {
@@ -1350,7 +1325,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1350 */ 1325 */
1351 if (user_mode(regs)) { 1326 if (user_mode(regs)) {
1352 local_irq_enable(); 1327 local_irq_enable();
1353 error_code |= PF_USER; 1328 error_code |= X86_PF_USER;
1354 flags |= FAULT_FLAG_USER; 1329 flags |= FAULT_FLAG_USER;
1355 } else { 1330 } else {
1356 if (regs->flags & X86_EFLAGS_IF) 1331 if (regs->flags & X86_EFLAGS_IF)
@@ -1359,9 +1334,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1359 1334
1360 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 1335 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
1361 1336
1362 if (error_code & PF_WRITE) 1337 if (error_code & X86_PF_WRITE)
1363 flags |= FAULT_FLAG_WRITE; 1338 flags |= FAULT_FLAG_WRITE;
1364 if (error_code & PF_INSTR) 1339 if (error_code & X86_PF_INSTR)
1365 flags |= FAULT_FLAG_INSTRUCTION; 1340 flags |= FAULT_FLAG_INSTRUCTION;
1366 1341
1367 /* 1342 /*
@@ -1381,7 +1356,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
1381 * space check, thus avoiding the deadlock: 1356 * space check, thus avoiding the deadlock:
1382 */ 1357 */
1383 if (unlikely(!down_read_trylock(&mm->mmap_sem))) { 1358 if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
1384 if ((error_code & PF_USER) == 0 && 1359 if (!(error_code & X86_PF_USER) &&
1385 !search_exception_tables(regs->ip)) { 1360 !search_exception_tables(regs->ip)) {
1386 bad_area_nosemaphore(regs, error_code, address, NULL); 1361 bad_area_nosemaphore(regs, error_code, address, NULL);
1387 return; 1362 return;
@@ -1408,7 +1383,7 @@ retry:
1408 bad_area(regs, error_code, address); 1383 bad_area(regs, error_code, address);
1409 return; 1384 return;
1410 } 1385 }
1411 if (error_code & PF_USER) { 1386 if (error_code & X86_PF_USER) {
1412 /* 1387 /*
1413 * Accessing the stack below %sp is always a bug. 1388 * Accessing the stack below %sp is always a bug.
1414 * The large cushion allows instructions like enter 1389 * The large cushion allows instructions like enter
@@ -1440,7 +1415,17 @@ good_area:
1440 * make sure we exit gracefully rather than endlessly redo 1415 * make sure we exit gracefully rather than endlessly redo
1441 * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if 1416 * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if
1442 * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked. 1417 * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
1418 *
1419 * Note that handle_userfault() may also release and reacquire mmap_sem
1420 * (and not return with VM_FAULT_RETRY), when returning to userland to
1421 * repeat the page fault later with a VM_FAULT_NOPAGE retval
1422 * (potentially after handling any pending signal during the return to
1423 * userland). The return to userland is identified whenever
1424 * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags.
1425 * Thus we have to be careful about not touching vma after handling the
1426 * fault, so we read the pkey beforehand.
1443 */ 1427 */
1428 pkey = vma_pkey(vma);
1444 fault = handle_mm_fault(vma, address, flags); 1429 fault = handle_mm_fault(vma, address, flags);
1445 major |= fault & VM_FAULT_MAJOR; 1430 major |= fault & VM_FAULT_MAJOR;
1446 1431
@@ -1467,7 +1452,6 @@ good_area:
1467 return; 1452 return;
1468 } 1453 }
1469 1454
1470 pkey = vma_pkey(vma);
1471 up_read(&mm->mmap_sem); 1455 up_read(&mm->mmap_sem);
1472 if (unlikely(fault & VM_FAULT_ERROR)) { 1456 if (unlikely(fault & VM_FAULT_ERROR)) {
1473 mm_fault_error(regs, error_code, address, &pkey, fault); 1457 mm_fault_error(regs, error_code, address, &pkey, fault);