diff options
Diffstat (limited to 'arch/x86/mm/fault.c')
-rw-r--r-- | arch/x86/mm/fault.c | 79 |
1 files changed, 60 insertions, 19 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9ff85bb8dd69..a10c8c792161 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -641,6 +641,20 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
641 | 641 | ||
642 | /* Are we prepared to handle this kernel fault? */ | 642 | /* Are we prepared to handle this kernel fault? */ |
643 | if (fixup_exception(regs)) { | 643 | if (fixup_exception(regs)) { |
644 | /* | ||
645 | * Any interrupt that takes a fault gets the fixup. This makes | ||
646 | * the below recursive fault logic only apply to a faults from | ||
647 | * task context. | ||
648 | */ | ||
649 | if (in_interrupt()) | ||
650 | return; | ||
651 | |||
652 | /* | ||
653 | * Per the above we're !in_interrupt(), aka. task context. | ||
654 | * | ||
655 | * In this case we need to make sure we're not recursively | ||
656 | * faulting through the emulate_vsyscall() logic. | ||
657 | */ | ||
644 | if (current_thread_info()->sig_on_uaccess_error && signal) { | 658 | if (current_thread_info()->sig_on_uaccess_error && signal) { |
645 | tsk->thread.trap_nr = X86_TRAP_PF; | 659 | tsk->thread.trap_nr = X86_TRAP_PF; |
646 | tsk->thread.error_code = error_code | PF_USER; | 660 | tsk->thread.error_code = error_code | PF_USER; |
@@ -649,6 +663,10 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
649 | /* XXX: hwpoison faults will set the wrong code. */ | 663 | /* XXX: hwpoison faults will set the wrong code. */ |
650 | force_sig_info_fault(signal, si_code, address, tsk, 0); | 664 | force_sig_info_fault(signal, si_code, address, tsk, 0); |
651 | } | 665 | } |
666 | |||
667 | /* | ||
668 | * Barring that, we can do the fixup and be happy. | ||
669 | */ | ||
652 | return; | 670 | return; |
653 | } | 671 | } |
654 | 672 | ||
@@ -983,6 +1001,12 @@ static int fault_in_kernel_space(unsigned long address) | |||
983 | 1001 | ||
984 | static inline bool smap_violation(int error_code, struct pt_regs *regs) | 1002 | static inline bool smap_violation(int error_code, struct pt_regs *regs) |
985 | { | 1003 | { |
1004 | if (!IS_ENABLED(CONFIG_X86_SMAP)) | ||
1005 | return false; | ||
1006 | |||
1007 | if (!static_cpu_has(X86_FEATURE_SMAP)) | ||
1008 | return false; | ||
1009 | |||
986 | if (error_code & PF_USER) | 1010 | if (error_code & PF_USER) |
987 | return false; | 1011 | return false; |
988 | 1012 | ||
@@ -996,13 +1020,17 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) | |||
996 | * This routine handles page faults. It determines the address, | 1020 | * This routine handles page faults. It determines the address, |
997 | * and the problem, and then passes it off to one of the appropriate | 1021 | * and the problem, and then passes it off to one of the appropriate |
998 | * routines. | 1022 | * routines. |
1023 | * | ||
1024 | * This function must have noinline because both callers | ||
1025 | * {,trace_}do_page_fault() have notrace on. Having this an actual function | ||
1026 | * guarantees there's a function trace entry. | ||
999 | */ | 1027 | */ |
1000 | static void __kprobes | 1028 | static void __kprobes noinline |
1001 | __do_page_fault(struct pt_regs *regs, unsigned long error_code) | 1029 | __do_page_fault(struct pt_regs *regs, unsigned long error_code, |
1030 | unsigned long address) | ||
1002 | { | 1031 | { |
1003 | struct vm_area_struct *vma; | 1032 | struct vm_area_struct *vma; |
1004 | struct task_struct *tsk; | 1033 | struct task_struct *tsk; |
1005 | unsigned long address; | ||
1006 | struct mm_struct *mm; | 1034 | struct mm_struct *mm; |
1007 | int fault; | 1035 | int fault; |
1008 | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | 1036 | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; |
@@ -1010,9 +1038,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1010 | tsk = current; | 1038 | tsk = current; |
1011 | mm = tsk->mm; | 1039 | mm = tsk->mm; |
1012 | 1040 | ||
1013 | /* Get the faulting address: */ | ||
1014 | address = read_cr2(); | ||
1015 | |||
1016 | /* | 1041 | /* |
1017 | * Detect and handle instructions that would cause a page fault for | 1042 | * Detect and handle instructions that would cause a page fault for |
1018 | * both a tracked kernel page and a userspace page. | 1043 | * both a tracked kernel page and a userspace page. |
@@ -1069,11 +1094,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1069 | if (unlikely(error_code & PF_RSVD)) | 1094 | if (unlikely(error_code & PF_RSVD)) |
1070 | pgtable_bad(regs, error_code, address); | 1095 | pgtable_bad(regs, error_code, address); |
1071 | 1096 | ||
1072 | if (static_cpu_has(X86_FEATURE_SMAP)) { | 1097 | if (unlikely(smap_violation(error_code, regs))) { |
1073 | if (unlikely(smap_violation(error_code, regs))) { | 1098 | bad_area_nosemaphore(regs, error_code, address); |
1074 | bad_area_nosemaphore(regs, error_code, address); | 1099 | return; |
1075 | return; | ||
1076 | } | ||
1077 | } | 1100 | } |
1078 | 1101 | ||
1079 | /* | 1102 | /* |
@@ -1226,32 +1249,50 @@ good_area: | |||
1226 | up_read(&mm->mmap_sem); | 1249 | up_read(&mm->mmap_sem); |
1227 | } | 1250 | } |
1228 | 1251 | ||
1229 | dotraplinkage void __kprobes | 1252 | dotraplinkage void __kprobes notrace |
1230 | do_page_fault(struct pt_regs *regs, unsigned long error_code) | 1253 | do_page_fault(struct pt_regs *regs, unsigned long error_code) |
1231 | { | 1254 | { |
1255 | unsigned long address = read_cr2(); /* Get the faulting address */ | ||
1232 | enum ctx_state prev_state; | 1256 | enum ctx_state prev_state; |
1233 | 1257 | ||
1258 | /* | ||
1259 | * We must have this function tagged with __kprobes, notrace and call | ||
1260 | * read_cr2() before calling anything else. To avoid calling any kind | ||
1261 | * of tracing machinery before we've observed the CR2 value. | ||
1262 | * | ||
1263 | * exception_{enter,exit}() contain all sorts of tracepoints. | ||
1264 | */ | ||
1265 | |||
1234 | prev_state = exception_enter(); | 1266 | prev_state = exception_enter(); |
1235 | __do_page_fault(regs, error_code); | 1267 | __do_page_fault(regs, error_code, address); |
1236 | exception_exit(prev_state); | 1268 | exception_exit(prev_state); |
1237 | } | 1269 | } |
1238 | 1270 | ||
1239 | static void trace_page_fault_entries(struct pt_regs *regs, | 1271 | #ifdef CONFIG_TRACING |
1272 | static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs, | ||
1240 | unsigned long error_code) | 1273 | unsigned long error_code) |
1241 | { | 1274 | { |
1242 | if (user_mode(regs)) | 1275 | if (user_mode(regs)) |
1243 | trace_page_fault_user(read_cr2(), regs, error_code); | 1276 | trace_page_fault_user(address, regs, error_code); |
1244 | else | 1277 | else |
1245 | trace_page_fault_kernel(read_cr2(), regs, error_code); | 1278 | trace_page_fault_kernel(address, regs, error_code); |
1246 | } | 1279 | } |
1247 | 1280 | ||
1248 | dotraplinkage void __kprobes | 1281 | dotraplinkage void __kprobes notrace |
1249 | trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) | 1282 | trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) |
1250 | { | 1283 | { |
1284 | /* | ||
1285 | * The exception_enter and tracepoint processing could | ||
1286 | * trigger another page faults (user space callchain | ||
1287 | * reading) and destroy the original cr2 value, so read | ||
1288 | * the faulting address now. | ||
1289 | */ | ||
1290 | unsigned long address = read_cr2(); | ||
1251 | enum ctx_state prev_state; | 1291 | enum ctx_state prev_state; |
1252 | 1292 | ||
1253 | prev_state = exception_enter(); | 1293 | prev_state = exception_enter(); |
1254 | trace_page_fault_entries(regs, error_code); | 1294 | trace_page_fault_entries(address, regs, error_code); |
1255 | __do_page_fault(regs, error_code); | 1295 | __do_page_fault(regs, error_code, address); |
1256 | exception_exit(prev_state); | 1296 | exception_exit(prev_state); |
1257 | } | 1297 | } |
1298 | #endif /* CONFIG_TRACING */ | ||