diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-02 19:17:03 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-02 19:17:03 -0400 |
commit | 63004afa718b1506fe9a286075b3b2d8c6ca2b9b (patch) | |
tree | 2ca957b939f36c7b6a8d85e162fec9d5a4bcca99 /arch/x86/kernel | |
parent | f309532bf3e1cc1b787403d84e3039812a7dbe50 (diff) | |
parent | 40b46a7d2938589a5abab132a7824fd17ae18f62 (diff) |
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull straggler x86 fixes from Peter Anvin:
"Three groups of patches:
- EFI boot stub documentation and the ability to print error messages;
- Removal for PTRACE_ARCH_PRCTL for x32 (obsolete interface which
should never have been ported, and the port is broken and
potentially dangerous.)
- ftrace stack corruption fixes. I'm not super-happy about the
technical implementation, but it is probably the least invasive in
the short term. In the future I would like a single method for
nesting the debug stack, however."
* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86, x32, ptrace: Remove PTRACE_ARCH_PRCTL for x32
x86, efi: Add EFI boot stub documentation
x86, efi; Add EFI boot stub console support
x86, efi: Only close open files in error path
ftrace/x86: Do not change stacks in DEBUG when calling lockdep
x86: Allow nesting of the debug stack IDT setting
x86: Reset the debug_stack update counter
ftrace: Use breakpoint method to update ftrace caller
ftrace: Synchronize variable setting with breakpoints
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 44 | ||||
-rw-r--r-- | arch/x86/kernel/ftrace.c | 102 | ||||
-rw-r--r-- | arch/x86/kernel/nmi.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 8 |
6 files changed, 153 insertions, 21 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 82f29e70d058..6b9333b429ba 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1101,14 +1101,20 @@ int is_debug_stack(unsigned long addr) | |||
1101 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); | 1101 | addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); |
1102 | } | 1102 | } |
1103 | 1103 | ||
1104 | static DEFINE_PER_CPU(u32, debug_stack_use_ctr); | ||
1105 | |||
1104 | void debug_stack_set_zero(void) | 1106 | void debug_stack_set_zero(void) |
1105 | { | 1107 | { |
1108 | this_cpu_inc(debug_stack_use_ctr); | ||
1106 | load_idt((const struct desc_ptr *)&nmi_idt_descr); | 1109 | load_idt((const struct desc_ptr *)&nmi_idt_descr); |
1107 | } | 1110 | } |
1108 | 1111 | ||
1109 | void debug_stack_reset(void) | 1112 | void debug_stack_reset(void) |
1110 | { | 1113 | { |
1111 | load_idt((const struct desc_ptr *)&idt_descr); | 1114 | if (WARN_ON(!this_cpu_read(debug_stack_use_ctr))) |
1115 | return; | ||
1116 | if (this_cpu_dec_return(debug_stack_use_ctr) == 0) | ||
1117 | load_idt((const struct desc_ptr *)&idt_descr); | ||
1112 | } | 1118 | } |
1113 | 1119 | ||
1114 | #else /* CONFIG_X86_64 */ | 1120 | #else /* CONFIG_X86_64 */ |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 320852d02026..7d65133b51be 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -191,6 +191,44 @@ ENDPROC(native_usergs_sysret64) | |||
191 | .endm | 191 | .endm |
192 | 192 | ||
193 | /* | 193 | /* |
194 | * When dynamic function tracer is enabled it will add a breakpoint | ||
195 | * to all locations that it is about to modify, sync CPUs, update | ||
196 | * all the code, sync CPUs, then remove the breakpoints. In this time | ||
197 | * if lockdep is enabled, it might jump back into the debug handler | ||
198 | * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). | ||
199 | * | ||
200 | * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to | ||
201 | * make sure the stack pointer does not get reset back to the top | ||
202 | * of the debug stack, and instead just reuses the current stack. | ||
203 | */ | ||
204 | #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) | ||
205 | |||
206 | .macro TRACE_IRQS_OFF_DEBUG | ||
207 | call debug_stack_set_zero | ||
208 | TRACE_IRQS_OFF | ||
209 | call debug_stack_reset | ||
210 | .endm | ||
211 | |||
212 | .macro TRACE_IRQS_ON_DEBUG | ||
213 | call debug_stack_set_zero | ||
214 | TRACE_IRQS_ON | ||
215 | call debug_stack_reset | ||
216 | .endm | ||
217 | |||
218 | .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET | ||
219 | bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ | ||
220 | jnc 1f | ||
221 | TRACE_IRQS_ON_DEBUG | ||
222 | 1: | ||
223 | .endm | ||
224 | |||
225 | #else | ||
226 | # define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF | ||
227 | # define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON | ||
228 | # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ | ||
229 | #endif | ||
230 | |||
231 | /* | ||
194 | * C code is not supposed to know about undefined top of stack. Every time | 232 | * C code is not supposed to know about undefined top of stack. Every time |
195 | * a C function with an pt_regs argument is called from the SYSCALL based | 233 | * a C function with an pt_regs argument is called from the SYSCALL based |
196 | * fast path FIXUP_TOP_OF_STACK is needed. | 234 | * fast path FIXUP_TOP_OF_STACK is needed. |
@@ -1098,7 +1136,7 @@ ENTRY(\sym) | |||
1098 | subq $ORIG_RAX-R15, %rsp | 1136 | subq $ORIG_RAX-R15, %rsp |
1099 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1137 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1100 | call save_paranoid | 1138 | call save_paranoid |
1101 | TRACE_IRQS_OFF | 1139 | TRACE_IRQS_OFF_DEBUG |
1102 | movq %rsp,%rdi /* pt_regs pointer */ | 1140 | movq %rsp,%rdi /* pt_regs pointer */ |
1103 | xorl %esi,%esi /* no error code */ | 1141 | xorl %esi,%esi /* no error code */ |
1104 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) | 1142 | subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) |
@@ -1393,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip) | |||
1393 | ENTRY(paranoid_exit) | 1431 | ENTRY(paranoid_exit) |
1394 | DEFAULT_FRAME | 1432 | DEFAULT_FRAME |
1395 | DISABLE_INTERRUPTS(CLBR_NONE) | 1433 | DISABLE_INTERRUPTS(CLBR_NONE) |
1396 | TRACE_IRQS_OFF | 1434 | TRACE_IRQS_OFF_DEBUG |
1397 | testl %ebx,%ebx /* swapgs needed? */ | 1435 | testl %ebx,%ebx /* swapgs needed? */ |
1398 | jnz paranoid_restore | 1436 | jnz paranoid_restore |
1399 | testl $3,CS(%rsp) | 1437 | testl $3,CS(%rsp) |
@@ -1404,7 +1442,7 @@ paranoid_swapgs: | |||
1404 | RESTORE_ALL 8 | 1442 | RESTORE_ALL 8 |
1405 | jmp irq_return | 1443 | jmp irq_return |
1406 | paranoid_restore: | 1444 | paranoid_restore: |
1407 | TRACE_IRQS_IRETQ 0 | 1445 | TRACE_IRQS_IRETQ_DEBUG 0 |
1408 | RESTORE_ALL 8 | 1446 | RESTORE_ALL 8 |
1409 | jmp irq_return | 1447 | jmp irq_return |
1410 | paranoid_userspace: | 1448 | paranoid_userspace: |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 32ff36596ab1..c3a7cb4bf6e6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -100,7 +100,7 @@ static const unsigned char *ftrace_nop_replace(void) | |||
100 | } | 100 | } |
101 | 101 | ||
102 | static int | 102 | static int |
103 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | 103 | ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, |
104 | unsigned const char *new_code) | 104 | unsigned const char *new_code) |
105 | { | 105 | { |
106 | unsigned char replaced[MCOUNT_INSN_SIZE]; | 106 | unsigned char replaced[MCOUNT_INSN_SIZE]; |
@@ -141,7 +141,20 @@ int ftrace_make_nop(struct module *mod, | |||
141 | old = ftrace_call_replace(ip, addr); | 141 | old = ftrace_call_replace(ip, addr); |
142 | new = ftrace_nop_replace(); | 142 | new = ftrace_nop_replace(); |
143 | 143 | ||
144 | return ftrace_modify_code(rec->ip, old, new); | 144 | /* |
145 | * On boot up, and when modules are loaded, the MCOUNT_ADDR | ||
146 | * is converted to a nop, and will never become MCOUNT_ADDR | ||
147 | * again. This code is either running before SMP (on boot up) | ||
148 | * or before the code will ever be executed (module load). | ||
149 | * We do not want to use the breakpoint version in this case, | ||
150 | * just modify the code directly. | ||
151 | */ | ||
152 | if (addr == MCOUNT_ADDR) | ||
153 | return ftrace_modify_code_direct(rec->ip, old, new); | ||
154 | |||
155 | /* Normal cases use add_brk_on_nop */ | ||
156 | WARN_ONCE(1, "invalid use of ftrace_make_nop"); | ||
157 | return -EINVAL; | ||
145 | } | 158 | } |
146 | 159 | ||
147 | int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | 160 | int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) |
@@ -152,9 +165,47 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | |||
152 | old = ftrace_nop_replace(); | 165 | old = ftrace_nop_replace(); |
153 | new = ftrace_call_replace(ip, addr); | 166 | new = ftrace_call_replace(ip, addr); |
154 | 167 | ||
155 | return ftrace_modify_code(rec->ip, old, new); | 168 | /* Should only be called when module is loaded */ |
169 | return ftrace_modify_code_direct(rec->ip, old, new); | ||
156 | } | 170 | } |
157 | 171 | ||
172 | /* | ||
173 | * The modifying_ftrace_code is used to tell the breakpoint | ||
174 | * handler to call ftrace_int3_handler(). If it fails to | ||
175 | * call this handler for a breakpoint added by ftrace, then | ||
176 | * the kernel may crash. | ||
177 | * | ||
178 | * As atomic_writes on x86 do not need a barrier, we do not | ||
179 | * need to add smp_mb()s for this to work. It is also considered | ||
180 | * that we can not read the modifying_ftrace_code before | ||
181 | * executing the breakpoint. That would be quite remarkable if | ||
182 | * it could do that. Here's the flow that is required: | ||
183 | * | ||
184 | * CPU-0 CPU-1 | ||
185 | * | ||
186 | * atomic_inc(mfc); | ||
187 | * write int3s | ||
188 | * <trap-int3> // implicit (r)mb | ||
189 | * if (atomic_read(mfc)) | ||
190 | * call ftrace_int3_handler() | ||
191 | * | ||
192 | * Then when we are finished: | ||
193 | * | ||
194 | * atomic_dec(mfc); | ||
195 | * | ||
196 | * If we hit a breakpoint that was not set by ftrace, it does not | ||
197 | * matter if ftrace_int3_handler() is called or not. It will | ||
198 | * simply be ignored. But it is crucial that a ftrace nop/caller | ||
199 | * breakpoint is handled. No other user should ever place a | ||
200 | * breakpoint on an ftrace nop/caller location. It must only | ||
201 | * be done by this code. | ||
202 | */ | ||
203 | atomic_t modifying_ftrace_code __read_mostly; | ||
204 | |||
205 | static int | ||
206 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | ||
207 | unsigned const char *new_code); | ||
208 | |||
158 | int ftrace_update_ftrace_func(ftrace_func_t func) | 209 | int ftrace_update_ftrace_func(ftrace_func_t func) |
159 | { | 210 | { |
160 | unsigned long ip = (unsigned long)(&ftrace_call); | 211 | unsigned long ip = (unsigned long)(&ftrace_call); |
@@ -163,13 +214,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func) | |||
163 | 214 | ||
164 | memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); | 215 | memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); |
165 | new = ftrace_call_replace(ip, (unsigned long)func); | 216 | new = ftrace_call_replace(ip, (unsigned long)func); |
217 | |||
218 | /* See comment above by declaration of modifying_ftrace_code */ | ||
219 | atomic_inc(&modifying_ftrace_code); | ||
220 | |||
166 | ret = ftrace_modify_code(ip, old, new); | 221 | ret = ftrace_modify_code(ip, old, new); |
167 | 222 | ||
223 | atomic_dec(&modifying_ftrace_code); | ||
224 | |||
168 | return ret; | 225 | return ret; |
169 | } | 226 | } |
170 | 227 | ||
171 | int modifying_ftrace_code __read_mostly; | ||
172 | |||
173 | /* | 228 | /* |
174 | * A breakpoint was added to the code address we are about to | 229 | * A breakpoint was added to the code address we are about to |
175 | * modify, and this is the handle that will just skip over it. | 230 | * modify, and this is the handle that will just skip over it. |
@@ -489,13 +544,46 @@ void ftrace_replace_code(int enable) | |||
489 | } | 544 | } |
490 | } | 545 | } |
491 | 546 | ||
547 | static int | ||
548 | ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | ||
549 | unsigned const char *new_code) | ||
550 | { | ||
551 | int ret; | ||
552 | |||
553 | ret = add_break(ip, old_code); | ||
554 | if (ret) | ||
555 | goto out; | ||
556 | |||
557 | run_sync(); | ||
558 | |||
559 | ret = add_update_code(ip, new_code); | ||
560 | if (ret) | ||
561 | goto fail_update; | ||
562 | |||
563 | run_sync(); | ||
564 | |||
565 | ret = ftrace_write(ip, new_code, 1); | ||
566 | if (ret) { | ||
567 | ret = -EPERM; | ||
568 | goto out; | ||
569 | } | ||
570 | run_sync(); | ||
571 | out: | ||
572 | return ret; | ||
573 | |||
574 | fail_update: | ||
575 | probe_kernel_write((void *)ip, &old_code[0], 1); | ||
576 | goto out; | ||
577 | } | ||
578 | |||
492 | void arch_ftrace_update_code(int command) | 579 | void arch_ftrace_update_code(int command) |
493 | { | 580 | { |
494 | modifying_ftrace_code++; | 581 | /* See comment above by declaration of modifying_ftrace_code */ |
582 | atomic_inc(&modifying_ftrace_code); | ||
495 | 583 | ||
496 | ftrace_modify_all_code(command); | 584 | ftrace_modify_all_code(command); |
497 | 585 | ||
498 | modifying_ftrace_code--; | 586 | atomic_dec(&modifying_ftrace_code); |
499 | } | 587 | } |
500 | 588 | ||
501 | int __init ftrace_dyn_arch_init(void *data) | 589 | int __init ftrace_dyn_arch_init(void *data) |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 90875279ef3d..a0b2f84457be 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -444,14 +444,16 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) | |||
444 | */ | 444 | */ |
445 | if (unlikely(is_debug_stack(regs->sp))) { | 445 | if (unlikely(is_debug_stack(regs->sp))) { |
446 | debug_stack_set_zero(); | 446 | debug_stack_set_zero(); |
447 | __get_cpu_var(update_debug_stack) = 1; | 447 | this_cpu_write(update_debug_stack, 1); |
448 | } | 448 | } |
449 | } | 449 | } |
450 | 450 | ||
451 | static inline void nmi_nesting_postprocess(void) | 451 | static inline void nmi_nesting_postprocess(void) |
452 | { | 452 | { |
453 | if (unlikely(__get_cpu_var(update_debug_stack))) | 453 | if (unlikely(this_cpu_read(update_debug_stack))) { |
454 | debug_stack_reset(); | 454 | debug_stack_reset(); |
455 | this_cpu_write(update_debug_stack, 0); | ||
456 | } | ||
455 | } | 457 | } |
456 | #endif | 458 | #endif |
457 | 459 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 13b1990c7c58..c4c6a5c2bf0f 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -1211,12 +1211,6 @@ static long x32_arch_ptrace(struct task_struct *child, | |||
1211 | 0, sizeof(struct user_i387_struct), | 1211 | 0, sizeof(struct user_i387_struct), |
1212 | datap); | 1212 | datap); |
1213 | 1213 | ||
1214 | /* normal 64bit interface to access TLS data. | ||
1215 | Works just like arch_prctl, except that the arguments | ||
1216 | are reversed. */ | ||
1217 | case PTRACE_ARCH_PRCTL: | ||
1218 | return do_arch_prctl(child, data, addr); | ||
1219 | |||
1220 | default: | 1214 | default: |
1221 | return compat_ptrace_request(child, request, addr, data); | 1215 | return compat_ptrace_request(child, request, addr, data); |
1222 | } | 1216 | } |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ff08457a025d..05b31d92f69c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -303,8 +303,12 @@ gp_in_kernel: | |||
303 | dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) | 303 | dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) |
304 | { | 304 | { |
305 | #ifdef CONFIG_DYNAMIC_FTRACE | 305 | #ifdef CONFIG_DYNAMIC_FTRACE |
306 | /* ftrace must be first, everything else may cause a recursive crash */ | 306 | /* |
307 | if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs)) | 307 | * ftrace must be first, everything else may cause a recursive crash. |
308 | * See note by declaration of modifying_ftrace_code in ftrace.c | ||
309 | */ | ||
310 | if (unlikely(atomic_read(&modifying_ftrace_code)) && | ||
311 | ftrace_int3_handler(regs)) | ||
308 | return; | 312 | return; |
309 | #endif | 313 | #endif |
310 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP | 314 | #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP |