diff options
| -rw-r--r-- | arch/x86/include/asm/ftrace.h | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/common.c | 8 | ||||
| -rw-r--r-- | arch/x86/kernel/entry_64.S | 44 | ||||
| -rw-r--r-- | arch/x86/kernel/ftrace.c | 102 | ||||
| -rw-r--r-- | arch/x86/kernel/nmi.c | 6 | ||||
| -rw-r--r-- | arch/x86/kernel/traps.c | 8 |
6 files changed, 154 insertions, 16 deletions
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 18d9005d9e4f..b0767bc08740 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h | |||
| @@ -34,7 +34,7 @@ | |||
| 34 | 34 | ||
| 35 | #ifndef __ASSEMBLY__ | 35 | #ifndef __ASSEMBLY__ |
| 36 | extern void mcount(void); | 36 | extern void mcount(void); |
| 37 | extern int modifying_ftrace_code; | 37 | extern atomic_t modifying_ftrace_code; |
| 38 | 38 | ||
| 39 | static inline unsigned long ftrace_call_adjust(unsigned long addr) | 39 | static inline unsigned long ftrace_call_adjust(unsigned long addr) |
| 40 | { | 40 | { |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 82f29e70d058..6b9333b429ba 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -1101,14 +1101,20 @@ int is_debug_stack(unsigned long addr) | |||
| 1101 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); | 1101 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); |
| 1102 | } | 1102 | } |
| 1103 | 1103 | ||
| 1104 | static DEFINE_PER_CPU(u32, debug_stack_use_ctr); | ||
| 1105 | |||
| 1104 | void debug_stack_set_zero(void) | 1106 | void debug_stack_set_zero(void) |
| 1105 | { | 1107 | { |
| 1108 | this_cpu_inc(debug_stack_use_ctr); | ||
| 1106 | load_idt((const struct desc_ptr *)&nmi_idt_descr); | 1109 | load_idt((const struct desc_ptr *)&nmi_idt_descr); |
| 1107 | } | 1110 | } |
| 1108 | 1111 | ||
| 1109 | void debug_stack_reset(void) | 1112 | void debug_stack_reset(void) |
| 1110 | { | 1113 | { |
| 1111 | load_idt((const struct desc_ptr *)&idt_descr); | 1114 | if (WARN_ON(!this_cpu_read(debug_stack_use_ctr))) |
| 1115 | return; | ||
| 1116 | if (this_cpu_dec_return(debug_stack_use_ctr) == 0) | ||
| 1117 | load_idt((const struct desc_ptr *)&idt_descr); | ||
| 1112 | } | 1118 | } |
| 1113 | 1119 | ||
| 1114 | #else /* CONFIG_X86_64 */ | 1120 | #else /* CONFIG_X86_64 */ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 320852d02026..7d65133b51be 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
| @@ -191,6 +191,44 @@ ENDPROC(native_usergs_sysret64) | |||
| 191 | .endm | 191 | .endm |
| 192 | 192 | ||
| 193 | /* | 193 | /* |
| 194 | * When dynamic function tracer is enabled it will add a breakpoint | ||
| 195 | * to all locations that it is about to modify, sync CPUs, update | ||
| 196 | * all the code, sync CPUs, then remove the breakpoints. In this time | ||
| 197 | * if lockdep is enabled, it might jump back into the debug handler | ||
| 198 | * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). | ||
| 199 | * | ||
| 200 | * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to | ||
| 201 | * make sure the stack pointer does not get reset back to the top | ||
| 202 | * of the debug stack, and instead just reuses the current stack. | ||
| 203 | */ | ||
| 204 | #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) | ||
| 205 | |||
| 206 | .macro TRACE_IRQS_OFF_DEBUG | ||
| 207 | call debug_stack_set_zero | ||
| 208 | TRACE_IRQS_OFF | ||
| 209 | call debug_stack_reset | ||
| 210 | .endm | ||
| 211 | |||
| 212 | .macro TRACE_IRQS_ON_DEBUG | ||
| 213 | call debug_stack_set_zero | ||
| 214 | TRACE_IRQS_ON | ||
| 215 | call debug_stack_reset | ||
| 216 | .endm | ||
| 217 | |||
| 218 | .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET | ||
| 219 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | ||
| 220 | jnc 1f | ||
| 221 | TRACE_IRQS_ON_DEBUG | ||
| 222 | 1: | ||
| 223 | .endm | ||
| 224 | |||
| 225 | #else | ||
| 226 | # define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF | ||
| 227 | # define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON | ||
| 228 | # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ | ||
| 229 | #endif | ||
| 230 | |||
| 231 | /* | ||
| 194 | * C code is not supposed to know about undefined top of stack. Every time | 232 | * C code is not supposed to know about undefined top of stack. Every time |
| 195 | * a C function with an pt_regs argument is called from the SYSCALL based | 233 | * a C function with an pt_regs argument is called from the SYSCALL based |
| 196 | * fast path FIXUP_TOP_OF_STACK is needed. | 234 | * fast path FIXUP_TOP_OF_STACK is needed. |
| @@ -1098,7 +1136,7 @@ ENTRY(\sym) | |||
| 1098 | subq $ORIG_RAX-R15, %rsp | 1136 | subq $ORIG_RAX-R15, %rsp |
| 1099 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1137 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
| 1100 | call save_paranoid | 1138 | call save_paranoid |
| 1101 | TRACE_IRQS_OFF | 1139 | TRACE_IRQS_OFF_DEBUG |
| 1102 | movq %rsp,%rdi /* pt_regs pointer */ | 1140 | movq %rsp,%rdi /* pt_regs pointer */ |
| 1103 | xorl %esi,%esi /* no error code */ | 1141 | xorl %esi,%esi /* no error code */ |
| 1104 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) | 1142 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) |
| @@ -1393,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip) | |||
| 1393 | ENTRY(paranoid_exit) | 1431 | ENTRY(paranoid_exit) |
| 1394 | DEFAULT_FRAME | 1432 | DEFAULT_FRAME |
| 1395 | DISABLE_INTERRUPTS(CLBR_NONE) | 1433 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 1396 | TRACE_IRQS_OFF | 1434 | TRACE_IRQS_OFF_DEBUG |
| 1397 | testl %ebx,%ebx /* swapgs needed? */ | 1435 | testl %ebx,%ebx /* swapgs needed? */ |
| 1398 | jnz paranoid_restore | 1436 | jnz paranoid_restore |
| 1399 | testl $3,CS(%rsp) | 1437 | testl $3,CS(%rsp) |
| @@ -1404,7 +1442,7 @@ paranoid_swapgs: | |||
| 1404 | RESTORE_ALL 8 | 1442 | RESTORE_ALL 8 |
| 1405 | jmp irq_return | 1443 | jmp irq_return |
| 1406 | paranoid_restore: | 1444 | paranoid_restore: |
| 1407 | TRACE_IRQS_IRETQ 0 | 1445 | TRACE_IRQS_IRETQ_DEBUG 0 |
| 1408 | RESTORE_ALL 8 | 1446 | RESTORE_ALL 8 |
| 1409 | jmp irq_return | 1447 | jmp irq_return |
| 1410 | paranoid_userspace: | 1448 | paranoid_userspace: |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 32ff36596ab1..c3a7cb4bf6e6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
| @@ -100,7 +100,7 @@ static const unsigned char *ftrace_nop_replace(void) | |||
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | static int | 102 | static int |
| 103 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | 103 | ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, |
| 104 | unsigned const char *new_code) | 104 | unsigned const char *new_code) |
| 105 | { | 105 | { |
| 106 | unsigned char replaced[MCOUNT_INSN_SIZE]; | 106 | unsigned char replaced[MCOUNT_INSN_SIZE]; |
| @@ -141,7 +141,20 @@ int ftrace_make_nop(struct module *mod, | |||
| 141 | old = ftrace_call_replace(ip, addr); | 141 | old = ftrace_call_replace(ip, addr); |
| 142 | new = ftrace_nop_replace(); | 142 | new = ftrace_nop_replace(); |
| 143 | 143 | ||
| 144 | return ftrace_modify_code(rec->ip, old, new); | 144 | /* |
| 145 | * On boot up, and when modules are loaded, the MCOUNT_ADDR | ||
| 146 | * is converted to a nop, and will never become MCOUNT_ADDR | ||
| 147 | * again. This code is either running before SMP (on boot up) | ||
| 148 | * or before the code will ever be executed (module load). | ||
| 149 | * We do not want to use the breakpoint version in this case, | ||
| 150 | * just modify the code directly. | ||
| 151 | */ | ||
| 152 | if (addr == MCOUNT_ADDR) | ||
| 153 | return ftrace_modify_code_direct(rec->ip, old, new); | ||
| 154 | |||
| 155 | /* Normal cases use add_brk_on_nop */ | ||
| 156 | WARN_ONCE(1, "invalid use of ftrace_make_nop"); | ||
| 157 | return -EINVAL; | ||
| 145 | } | 158 | } |
| 146 | 159 | ||
| 147 | int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | 160 | int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) |
| @@ -152,9 +165,47 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | |||
| 152 | old = ftrace_nop_replace(); | 165 | old = ftrace_nop_replace(); |
| 153 | new = ftrace_call_replace(ip, addr); | 166 | new = ftrace_call_replace(ip, addr); |
| 154 | 167 | ||
| 155 | return ftrace_modify_code(rec->ip, old, new); | 168 | /* Should only be called when module is loaded */ |
| 169 | return ftrace_modify_code_direct(rec->ip, old, new); | ||
| 156 | } | 170 | } |
| 157 | 171 | ||
| 172 | /* | ||
| 173 | * The modifying_ftrace_code is used to tell the breakpoint | ||
| 174 | * handler to call ftrace_int3_handler(). If it fails to | ||
| 175 | * call this handler for a breakpoint added by ftrace, then | ||
| 176 | * the kernel may crash. | ||
| 177 | * | ||
| 178 | * As atomic_writes on x86 do not need a barrier, we do not | ||
| 179 | * need to add smp_mb()s for this to work. It is also considered | ||
| 180 | * that we can not read the modifying_ftrace_code before | ||
| 181 | * executing the breakpoint. That would be quite remarkable if | ||
| 182 | * it could do that. Here's the flow that is required: | ||
| 183 | * | ||
| 184 | * CPU-0 CPU-1 | ||
| 185 | * | ||
| 186 | * atomic_inc(mfc); | ||
| 187 | * write int3s | ||
| 188 | * <trap-int3> // implicit (r)mb | ||
| 189 | * if (atomic_read(mfc)) | ||
| 190 | * call ftrace_int3_handler() | ||
| 191 | * | ||
| 192 | * Then when we are finished: | ||
| 193 | * | ||
| 194 | * atomic_dec(mfc); | ||
| 195 | * | ||
| 196 | * If we hit a breakpoint that was not set by ftrace, it does not | ||
| 197 | * matter if ftrace_int3_handler() is called or not. It will | ||
| 198 | * simply be ignored. But it is crucial that a ftrace nop/caller | ||
| 199 | * breakpoint is handled. No other user should ever place a | ||
| 200 | * breakpoint on an ftrace nop/caller location. It must only | ||
| 201 | * be done by this code. | ||
| 202 | */ | ||
| 203 | atomic_t modifying_ftrace_code __read_mostly; | ||
| 204 | |||
| 205 | static int | ||
| 206 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | ||
| 207 | unsigned const char *new_code); | ||
| 208 | |||
| 158 | int ftrace_update_ftrace_func(ftrace_func_t func) | 209 | int ftrace_update_ftrace_func(ftrace_func_t func) |
| 159 | { | 210 | { |
| 160 | unsigned long ip = (unsigned long)(&ftrace_call); | 211 | unsigned long ip = (unsigned long)(&ftrace_call); |
| @@ -163,13 +214,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func) | |||
| 163 | 214 | ||
| 164 | memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); | 215 | memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); |
| 165 | new = ftrace_call_replace(ip, (unsigned long)func); | 216 | new = ftrace_call_replace(ip, (unsigned long)func); |
| 217 | |||
| 218 | /* See comment above by declaration of modifying_ftrace_code */ | ||
| 219 | atomic_inc(&modifying_ftrace_code); | ||
| 220 | |||
| 166 | ret = ftrace_modify_code(ip, old, new); | 221 | ret = ftrace_modify_code(ip, old, new); |
| 167 | 222 | ||
| 223 | atomic_dec(&modifying_ftrace_code); | ||
| 224 | |||
| 168 | return ret; | 225 | return ret; |
| 169 | } | 226 | } |
| 170 | 227 | ||
| 171 | int modifying_ftrace_code __read_mostly; | ||
| 172 | |||
| 173 | /* | 228 | /* |
| 174 | * A breakpoint was added to the code address we are about to | 229 | * A breakpoint was added to the code address we are about to |
| 175 | * modify, and this is the handle that will just skip over it. | 230 | * modify, and this is the handle that will just skip over it. |
| @@ -489,13 +544,46 @@ void ftrace_replace_code(int enable) | |||
| 489 | } | 544 | } |
| 490 | } | 545 | } |
| 491 | 546 | ||
| 547 | static int | ||
| 548 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | ||
| 549 | unsigned const char *new_code) | ||
| 550 | { | ||
| 551 | int ret; | ||
| 552 | |||
| 553 | ret = add_break(ip, old_code); | ||
| 554 | if (ret) | ||
| 555 | goto out; | ||
| 556 | |||
| 557 | run_sync(); | ||
| 558 | |||
| 559 | ret = add_update_code(ip, new_code); | ||
| 560 | if (ret) | ||
| 561 | goto fail_update; | ||
| 562 | |||
| 563 | run_sync(); | ||
| 564 | |||
| 565 | ret = ftrace_write(ip, new_code, 1); | ||
| 566 | if (ret) { | ||
| 567 | ret = -EPERM; | ||
| 568 | goto out; | ||
| 569 | } | ||
| 570 | run_sync(); | ||
| 571 | out: | ||
| 572 | return ret; | ||
| 573 | |||
| 574 | fail_update: | ||
| 575 | probe_kernel_write((void *)ip, &old_code[0], 1); | ||
| 576 | goto out; | ||
| 577 | } | ||
| 578 | |||
| 492 | void arch_ftrace_update_code(int command) | 579 | void arch_ftrace_update_code(int command) |
| 493 | { | 580 | { |
| 494 | modifying_ftrace_code++; | 581 | /* See comment above by declaration of modifying_ftrace_code */ |
| 582 | atomic_inc(&modifying_ftrace_code); | ||
| 495 | 583 | ||
| 496 | ftrace_modify_all_code(command); | 584 | ftrace_modify_all_code(command); |
| 497 | 585 | ||
| 498 | modifying_ftrace_code--; | 586 | atomic_dec(&modifying_ftrace_code); |
| 499 | } | 587 | } |
| 500 | 588 | ||
| 501 | int __init ftrace_dyn_arch_init(void *data) | 589 | int __init ftrace_dyn_arch_init(void *data) |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 90875279ef3d..a0b2f84457be 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
| @@ -444,14 +444,16 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) | |||
| 444 | */ | 444 | */ |
| 445 | if (unlikely(is_debug_stack(regs->sp))) { | 445 | if (unlikely(is_debug_stack(regs->sp))) { |
| 446 | debug_stack_set_zero(); | 446 | debug_stack_set_zero(); |
| 447 | __get_cpu_var(update_debug_stack) = 1; | 447 | this_cpu_write(update_debug_stack, 1); |
| 448 | } | 448 | } |
| 449 | } | 449 | } |
| 450 | 450 | ||
| 451 | static inline void nmi_nesting_postprocess(void) | 451 | static inline void nmi_nesting_postprocess(void) |
| 452 | { | 452 | { |
| 453 | if (unlikely(__get_cpu_var(update_debug_stack))) | 453 | if (unlikely(this_cpu_read(update_debug_stack))) { |
| 454 | debug_stack_reset(); | 454 | debug_stack_reset(); |
| 455 | this_cpu_write(update_debug_stack, 0); | ||
| 456 | } | ||
| 455 | } | 457 | } |
| 456 | #endif | 458 | #endif |
| 457 | 459 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ff08457a025d..05b31d92f69c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -303,8 +303,12 @@ gp_in_kernel: | |||
| 303 | dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) | 303 | dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) |
| 304 | { | 304 | { |
| 305 | #ifdef CONFIG_DYNAMIC_FTRACE | 305 | #ifdef CONFIG_DYNAMIC_FTRACE |
| 306 | /* ftrace must be first, everything else may cause a recursive crash */ | 306 | /* |
| 307 | if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs)) | 307 | * ftrace must be first, everything else may cause a recursive crash. |
| 308 | * See note by declaration of modifying_ftrace_code in ftrace.c | ||
| 309 | */ | ||
| 310 | if (unlikely(atomic_read(&modifying_ftrace_code)) && | ||
| 311 | ftrace_int3_handler(regs)) | ||
| 308 | return; | 312 | return; |
| 309 | #endif | 313 | #endif |
| 310 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | 314 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP |
