aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/fault.c')
-rw-r--r--arch/x86/mm/fault.c79
1 files changed, 60 insertions, 19 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9ff85bb8dd69..a10c8c792161 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -641,6 +641,20 @@ no_context(struct pt_regs *regs, unsigned long error_code,
641 641
642 /* Are we prepared to handle this kernel fault? */ 642 /* Are we prepared to handle this kernel fault? */
643 if (fixup_exception(regs)) { 643 if (fixup_exception(regs)) {
644 /*
645 * Any interrupt that takes a fault gets the fixup. This makes
646 * the below recursive fault logic only apply to a faults from
647 * task context.
648 */
649 if (in_interrupt())
650 return;
651
652 /*
653 * Per the above we're !in_interrupt(), aka. task context.
654 *
655 * In this case we need to make sure we're not recursively
656 * faulting through the emulate_vsyscall() logic.
657 */
644 if (current_thread_info()->sig_on_uaccess_error && signal) { 658 if (current_thread_info()->sig_on_uaccess_error && signal) {
645 tsk->thread.trap_nr = X86_TRAP_PF; 659 tsk->thread.trap_nr = X86_TRAP_PF;
646 tsk->thread.error_code = error_code | PF_USER; 660 tsk->thread.error_code = error_code | PF_USER;
@@ -649,6 +663,10 @@ no_context(struct pt_regs *regs, unsigned long error_code,
649 /* XXX: hwpoison faults will set the wrong code. */ 663 /* XXX: hwpoison faults will set the wrong code. */
650 force_sig_info_fault(signal, si_code, address, tsk, 0); 664 force_sig_info_fault(signal, si_code, address, tsk, 0);
651 } 665 }
666
667 /*
668 * Barring that, we can do the fixup and be happy.
669 */
652 return; 670 return;
653 } 671 }
654 672
@@ -983,6 +1001,12 @@ static int fault_in_kernel_space(unsigned long address)
983 1001
984static inline bool smap_violation(int error_code, struct pt_regs *regs) 1002static inline bool smap_violation(int error_code, struct pt_regs *regs)
985{ 1003{
1004 if (!IS_ENABLED(CONFIG_X86_SMAP))
1005 return false;
1006
1007 if (!static_cpu_has(X86_FEATURE_SMAP))
1008 return false;
1009
986 if (error_code & PF_USER) 1010 if (error_code & PF_USER)
987 return false; 1011 return false;
988 1012
@@ -996,13 +1020,17 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
996 * This routine handles page faults. It determines the address, 1020 * This routine handles page faults. It determines the address,
997 * and the problem, and then passes it off to one of the appropriate 1021 * and the problem, and then passes it off to one of the appropriate
998 * routines. 1022 * routines.
1023 *
1024 * This function must have noinline because both callers
1025 * {,trace_}do_page_fault() have notrace on. Having this an actual function
1026 * guarantees there's a function trace entry.
999 */ 1027 */
1000static void __kprobes 1028static void __kprobes noinline
1001__do_page_fault(struct pt_regs *regs, unsigned long error_code) 1029__do_page_fault(struct pt_regs *regs, unsigned long error_code,
1030 unsigned long address)
1002{ 1031{
1003 struct vm_area_struct *vma; 1032 struct vm_area_struct *vma;
1004 struct task_struct *tsk; 1033 struct task_struct *tsk;
1005 unsigned long address;
1006 struct mm_struct *mm; 1034 struct mm_struct *mm;
1007 int fault; 1035 int fault;
1008 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 1036 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -1010,9 +1038,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
1010 tsk = current; 1038 tsk = current;
1011 mm = tsk->mm; 1039 mm = tsk->mm;
1012 1040
1013 /* Get the faulting address: */
1014 address = read_cr2();
1015
1016 /* 1041 /*
1017 * Detect and handle instructions that would cause a page fault for 1042 * Detect and handle instructions that would cause a page fault for
1018 * both a tracked kernel page and a userspace page. 1043 * both a tracked kernel page and a userspace page.
@@ -1069,11 +1094,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
1069 if (unlikely(error_code & PF_RSVD)) 1094 if (unlikely(error_code & PF_RSVD))
1070 pgtable_bad(regs, error_code, address); 1095 pgtable_bad(regs, error_code, address);
1071 1096
1072 if (static_cpu_has(X86_FEATURE_SMAP)) { 1097 if (unlikely(smap_violation(error_code, regs))) {
1073 if (unlikely(smap_violation(error_code, regs))) { 1098 bad_area_nosemaphore(regs, error_code, address);
1074 bad_area_nosemaphore(regs, error_code, address); 1099 return;
1075 return;
1076 }
1077 } 1100 }
1078 1101
1079 /* 1102 /*
@@ -1226,32 +1249,50 @@ good_area:
1226 up_read(&mm->mmap_sem); 1249 up_read(&mm->mmap_sem);
1227} 1250}
1228 1251
1229dotraplinkage void __kprobes 1252dotraplinkage void __kprobes notrace
1230do_page_fault(struct pt_regs *regs, unsigned long error_code) 1253do_page_fault(struct pt_regs *regs, unsigned long error_code)
1231{ 1254{
1255 unsigned long address = read_cr2(); /* Get the faulting address */
1232 enum ctx_state prev_state; 1256 enum ctx_state prev_state;
1233 1257
1258 /*
1259 * We must have this function tagged with __kprobes, notrace and call
1260 * read_cr2() before calling anything else. To avoid calling any kind
1261 * of tracing machinery before we've observed the CR2 value.
1262 *
1263 * exception_{enter,exit}() contain all sorts of tracepoints.
1264 */
1265
1234 prev_state = exception_enter(); 1266 prev_state = exception_enter();
1235 __do_page_fault(regs, error_code); 1267 __do_page_fault(regs, error_code, address);
1236 exception_exit(prev_state); 1268 exception_exit(prev_state);
1237} 1269}
1238 1270
1239static void trace_page_fault_entries(struct pt_regs *regs, 1271#ifdef CONFIG_TRACING
1272static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
1240 unsigned long error_code) 1273 unsigned long error_code)
1241{ 1274{
1242 if (user_mode(regs)) 1275 if (user_mode(regs))
1243 trace_page_fault_user(read_cr2(), regs, error_code); 1276 trace_page_fault_user(address, regs, error_code);
1244 else 1277 else
1245 trace_page_fault_kernel(read_cr2(), regs, error_code); 1278 trace_page_fault_kernel(address, regs, error_code);
1246} 1279}
1247 1280
1248dotraplinkage void __kprobes 1281dotraplinkage void __kprobes notrace
1249trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) 1282trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
1250{ 1283{
1284 /*
1285 * The exception_enter and tracepoint processing could
1286 * trigger another page faults (user space callchain
1287 * reading) and destroy the original cr2 value, so read
1288 * the faulting address now.
1289 */
1290 unsigned long address = read_cr2();
1251 enum ctx_state prev_state; 1291 enum ctx_state prev_state;
1252 1292
1253 prev_state = exception_enter(); 1293 prev_state = exception_enter();
1254 trace_page_fault_entries(regs, error_code); 1294 trace_page_fault_entries(address, regs, error_code);
1255 __do_page_fault(regs, error_code); 1295 __do_page_fault(regs, error_code, address);
1256 exception_exit(prev_state); 1296 exception_exit(prev_state);
1257} 1297}
1298#endif /* CONFIG_TRACING */