diff options
author | David S. Miller <davem@davemloft.net> | 2015-07-23 03:41:16 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-07-23 03:41:16 -0400 |
commit | c5e40ee287db61a79af1746954ee03ebbf1ff8a3 (patch) | |
tree | 007da00e75e9b84766ac4868421705300e1e2e14 /arch/x86/kernel | |
parent | 052831879945be0d9fad2216b127147c565ec1b1 (diff) | |
parent | c5dfd654d0ec0a28fe81e7bd4d4fd984a9855e09 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Conflicts:
net/bridge/br_mdb.c
br_mdb.c conflict was a function call being removed to fix a bug in
'net' but whose signature was changed in 'net-next'.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/fpu/init.c | 40 | ||||
-rw-r--r-- | arch/x86/kernel/nmi.c | 123 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 11 |
4 files changed, 104 insertions, 72 deletions
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 32826791e675..0b39173dd971 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c | |||
@@ -4,6 +4,8 @@ | |||
4 | #include <asm/fpu/internal.h> | 4 | #include <asm/fpu/internal.h> |
5 | #include <asm/tlbflush.h> | 5 | #include <asm/tlbflush.h> |
6 | 6 | ||
7 | #include <linux/sched.h> | ||
8 | |||
7 | /* | 9 | /* |
8 | * Initialize the TS bit in CR0 according to the style of context-switches | 10 | * Initialize the TS bit in CR0 according to the style of context-switches |
9 | * we are using: | 11 | * we are using: |
@@ -136,6 +138,43 @@ static void __init fpu__init_system_generic(void) | |||
136 | unsigned int xstate_size; | 138 | unsigned int xstate_size; |
137 | EXPORT_SYMBOL_GPL(xstate_size); | 139 | EXPORT_SYMBOL_GPL(xstate_size); |
138 | 140 | ||
141 | /* Enforce that 'MEMBER' is the last field of 'TYPE': */ | ||
142 | #define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \ | ||
143 | BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER)) | ||
144 | |||
145 | /* | ||
146 | * We append the 'struct fpu' to the task_struct: | ||
147 | */ | ||
148 | static void __init fpu__init_task_struct_size(void) | ||
149 | { | ||
150 | int task_size = sizeof(struct task_struct); | ||
151 | |||
152 | /* | ||
153 | * Subtract off the static size of the register state. | ||
154 | * It potentially has a bunch of padding. | ||
155 | */ | ||
156 | task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state); | ||
157 | |||
158 | /* | ||
159 | * Add back the dynamically-calculated register state | ||
160 | * size. | ||
161 | */ | ||
162 | task_size += xstate_size; | ||
163 | |||
164 | /* | ||
165 | * We dynamically size 'struct fpu', so we require that | ||
166 | * it be at the end of 'thread_struct' and that | ||
167 | * 'thread_struct' be at the end of 'task_struct'. If | ||
168 | * you hit a compile error here, check the structure to | ||
169 | * see if something got added to the end. | ||
170 | */ | ||
171 | CHECK_MEMBER_AT_END_OF(struct fpu, state); | ||
172 | CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu); | ||
173 | CHECK_MEMBER_AT_END_OF(struct task_struct, thread); | ||
174 | |||
175 | arch_task_struct_size = task_size; | ||
176 | } | ||
177 | |||
139 | /* | 178 | /* |
140 | * Set up the xstate_size based on the legacy FPU context size. | 179 | * Set up the xstate_size based on the legacy FPU context size. |
141 | * | 180 | * |
@@ -287,6 +326,7 @@ void __init fpu__init_system(struct cpuinfo_x86 *c) | |||
287 | fpu__init_system_generic(); | 326 | fpu__init_system_generic(); |
288 | fpu__init_system_xstate_size_legacy(); | 327 | fpu__init_system_xstate_size_legacy(); |
289 | fpu__init_system_xstate(); | 328 | fpu__init_system_xstate(); |
329 | fpu__init_task_struct_size(); | ||
290 | 330 | ||
291 | fpu__init_system_ctx_switch(); | 331 | fpu__init_system_ctx_switch(); |
292 | } | 332 | } |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index c3e985d1751c..d05bd2e2ee91 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -408,15 +408,15 @@ static void default_do_nmi(struct pt_regs *regs) | |||
408 | NOKPROBE_SYMBOL(default_do_nmi); | 408 | NOKPROBE_SYMBOL(default_do_nmi); |
409 | 409 | ||
410 | /* | 410 | /* |
411 | * NMIs can hit breakpoints which will cause it to lose its | 411 | * NMIs can page fault or hit breakpoints which will cause it to lose |
412 | * NMI context with the CPU when the breakpoint does an iret. | 412 | * its NMI context with the CPU when the breakpoint or page fault does an IRET. |
413 | */ | 413 | * |
414 | #ifdef CONFIG_X86_32 | 414 | * As a result, NMIs can nest if NMIs get unmasked due an IRET during |
415 | /* | 415 | * NMI processing. On x86_64, the asm glue protects us from nested NMIs |
416 | * For i386, NMIs use the same stack as the kernel, and we can | 416 | * if the outer NMI came from kernel mode, but we can still nest if the |
417 | * add a workaround to the iret problem in C (preventing nested | 417 | * outer NMI came from user mode. |
418 | * NMIs if an NMI takes a trap). Simply have 3 states the NMI | 418 | * |
419 | * can be in: | 419 | * To handle these nested NMIs, we have three states: |
420 | * | 420 | * |
421 | * 1) not running | 421 | * 1) not running |
422 | * 2) executing | 422 | * 2) executing |
@@ -430,15 +430,14 @@ NOKPROBE_SYMBOL(default_do_nmi); | |||
430 | * (Note, the latch is binary, thus multiple NMIs triggering, | 430 | * (Note, the latch is binary, thus multiple NMIs triggering, |
431 | * when one is running, are ignored. Only one NMI is restarted.) | 431 | * when one is running, are ignored. Only one NMI is restarted.) |
432 | * | 432 | * |
433 | * If an NMI hits a breakpoint that executes an iret, another | 433 | * If an NMI executes an iret, another NMI can preempt it. We do not |
434 | * NMI can preempt it. We do not want to allow this new NMI | 434 | * want to allow this new NMI to run, but we want to execute it when the |
435 | * to run, but we want to execute it when the first one finishes. | 435 | * first one finishes. We set the state to "latched", and the exit of |
436 | * We set the state to "latched", and the exit of the first NMI will | 436 | * the first NMI will perform a dec_return, if the result is zero |
437 | * perform a dec_return, if the result is zero (NOT_RUNNING), then | 437 | * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the |
438 | * it will simply exit the NMI handler. If not, the dec_return | 438 | * dec_return would have set the state to NMI_EXECUTING (what we want it |
439 | * would have set the state to NMI_EXECUTING (what we want it to | 439 | * to be when we are running). In this case, we simply jump back to |
440 | * be when we are running). In this case, we simply jump back | 440 | * rerun the NMI handler again, and restart the 'latched' NMI. |
441 | * to rerun the NMI handler again, and restart the 'latched' NMI. | ||
442 | * | 441 | * |
443 | * No trap (breakpoint or page fault) should be hit before nmi_restart, | 442 | * No trap (breakpoint or page fault) should be hit before nmi_restart, |
444 | * thus there is no race between the first check of state for NOT_RUNNING | 443 | * thus there is no race between the first check of state for NOT_RUNNING |
@@ -461,49 +460,36 @@ enum nmi_states { | |||
461 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); | 460 | static DEFINE_PER_CPU(enum nmi_states, nmi_state); |
462 | static DEFINE_PER_CPU(unsigned long, nmi_cr2); | 461 | static DEFINE_PER_CPU(unsigned long, nmi_cr2); |
463 | 462 | ||
464 | #define nmi_nesting_preprocess(regs) \ | 463 | #ifdef CONFIG_X86_64 |
465 | do { \ | ||
466 | if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { \ | ||
467 | this_cpu_write(nmi_state, NMI_LATCHED); \ | ||
468 | return; \ | ||
469 | } \ | ||
470 | this_cpu_write(nmi_state, NMI_EXECUTING); \ | ||
471 | this_cpu_write(nmi_cr2, read_cr2()); \ | ||
472 | } while (0); \ | ||
473 | nmi_restart: | ||
474 | |||
475 | #define nmi_nesting_postprocess() \ | ||
476 | do { \ | ||
477 | if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) \ | ||
478 | write_cr2(this_cpu_read(nmi_cr2)); \ | ||
479 | if (this_cpu_dec_return(nmi_state)) \ | ||
480 | goto nmi_restart; \ | ||
481 | } while (0) | ||
482 | #else /* x86_64 */ | ||
483 | /* | 464 | /* |
484 | * In x86_64 things are a bit more difficult. This has the same problem | 465 | * In x86_64, we need to handle breakpoint -> NMI -> breakpoint. Without |
485 | * where an NMI hitting a breakpoint that calls iret will remove the | 466 | * some care, the inner breakpoint will clobber the outer breakpoint's |
486 | * NMI context, allowing a nested NMI to enter. What makes this more | 467 | * stack. |
487 | * difficult is that both NMIs and breakpoints have their own stack. | ||
488 | * When a new NMI or breakpoint is executed, the stack is set to a fixed | ||
489 | * point. If an NMI is nested, it will have its stack set at that same | ||
490 | * fixed address that the first NMI had, and will start corrupting the | ||
491 | * stack. This is handled in entry_64.S, but the same problem exists with | ||
492 | * the breakpoint stack. | ||
493 | * | 468 | * |
494 | * If a breakpoint is being processed, and the debug stack is being used, | 469 | * If a breakpoint is being processed, and the debug stack is being |
495 | * if an NMI comes in and also hits a breakpoint, the stack pointer | 470 | * used, if an NMI comes in and also hits a breakpoint, the stack |
496 | * will be set to the same fixed address as the breakpoint that was | 471 | * pointer will be set to the same fixed address as the breakpoint that |
497 | * interrupted, causing that stack to be corrupted. To handle this case, | 472 | * was interrupted, causing that stack to be corrupted. To handle this |
498 | * check if the stack that was interrupted is the debug stack, and if | 473 | * case, check if the stack that was interrupted is the debug stack, and |
499 | * so, change the IDT so that new breakpoints will use the current stack | 474 | * if so, change the IDT so that new breakpoints will use the current |
500 | * and not switch to the fixed address. On return of the NMI, switch back | 475 | * stack and not switch to the fixed address. On return of the NMI, |
501 | * to the original IDT. | 476 | * switch back to the original IDT. |
502 | */ | 477 | */ |
503 | static DEFINE_PER_CPU(int, update_debug_stack); | 478 | static DEFINE_PER_CPU(int, update_debug_stack); |
479 | #endif | ||
504 | 480 | ||
505 | static inline void nmi_nesting_preprocess(struct pt_regs *regs) | 481 | dotraplinkage notrace void |
482 | do_nmi(struct pt_regs *regs, long error_code) | ||
506 | { | 483 | { |
484 | if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) { | ||
485 | this_cpu_write(nmi_state, NMI_LATCHED); | ||
486 | return; | ||
487 | } | ||
488 | this_cpu_write(nmi_state, NMI_EXECUTING); | ||
489 | this_cpu_write(nmi_cr2, read_cr2()); | ||
490 | nmi_restart: | ||
491 | |||
492 | #ifdef CONFIG_X86_64 | ||
507 | /* | 493 | /* |
508 | * If we interrupted a breakpoint, it is possible that | 494 | * If we interrupted a breakpoint, it is possible that |
509 | * the nmi handler will have breakpoints too. We need to | 495 | * the nmi handler will have breakpoints too. We need to |
@@ -514,22 +500,8 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) | |||
514 | debug_stack_set_zero(); | 500 | debug_stack_set_zero(); |
515 | this_cpu_write(update_debug_stack, 1); | 501 | this_cpu_write(update_debug_stack, 1); |
516 | } | 502 | } |
517 | } | ||
518 | |||
519 | static inline void nmi_nesting_postprocess(void) | ||
520 | { | ||
521 | if (unlikely(this_cpu_read(update_debug_stack))) { | ||
522 | debug_stack_reset(); | ||
523 | this_cpu_write(update_debug_stack, 0); | ||
524 | } | ||
525 | } | ||
526 | #endif | 503 | #endif |
527 | 504 | ||
528 | dotraplinkage notrace void | ||
529 | do_nmi(struct pt_regs *regs, long error_code) | ||
530 | { | ||
531 | nmi_nesting_preprocess(regs); | ||
532 | |||
533 | nmi_enter(); | 505 | nmi_enter(); |
534 | 506 | ||
535 | inc_irq_stat(__nmi_count); | 507 | inc_irq_stat(__nmi_count); |
@@ -539,8 +511,17 @@ do_nmi(struct pt_regs *regs, long error_code) | |||
539 | 511 | ||
540 | nmi_exit(); | 512 | nmi_exit(); |
541 | 513 | ||
542 | /* On i386, may loop back to preprocess */ | 514 | #ifdef CONFIG_X86_64 |
543 | nmi_nesting_postprocess(); | 515 | if (unlikely(this_cpu_read(update_debug_stack))) { |
516 | debug_stack_reset(); | ||
517 | this_cpu_write(update_debug_stack, 0); | ||
518 | } | ||
519 | #endif | ||
520 | |||
521 | if (unlikely(this_cpu_read(nmi_cr2) != read_cr2())) | ||
522 | write_cr2(this_cpu_read(nmi_cr2)); | ||
523 | if (this_cpu_dec_return(nmi_state)) | ||
524 | goto nmi_restart; | ||
544 | } | 525 | } |
545 | NOKPROBE_SYMBOL(do_nmi); | 526 | NOKPROBE_SYMBOL(do_nmi); |
546 | 527 | ||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9cad694ed7c4..397688beed4b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -81,7 +81,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister); | |||
81 | */ | 81 | */ |
82 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 82 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
83 | { | 83 | { |
84 | *dst = *src; | 84 | memcpy(dst, src, arch_task_struct_size); |
85 | 85 | ||
86 | return fpu__copy(&dst->thread.fpu, &src->thread.fpu); | 86 | return fpu__copy(&dst->thread.fpu, &src->thread.fpu); |
87 | } | 87 | } |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index d3010aa79daf..b1f3ed9c7a9e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -992,8 +992,17 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
992 | 992 | ||
993 | common_cpu_up(cpu, tidle); | 993 | common_cpu_up(cpu, tidle); |
994 | 994 | ||
995 | /* | ||
996 | * We have to walk the irq descriptors to setup the vector | ||
997 | * space for the cpu which comes online. Prevent irq | ||
998 | * alloc/free across the bringup. | ||
999 | */ | ||
1000 | irq_lock_sparse(); | ||
1001 | |||
995 | err = do_boot_cpu(apicid, cpu, tidle); | 1002 | err = do_boot_cpu(apicid, cpu, tidle); |
1003 | |||
996 | if (err) { | 1004 | if (err) { |
1005 | irq_unlock_sparse(); | ||
997 | pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); | 1006 | pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); |
998 | return -EIO; | 1007 | return -EIO; |
999 | } | 1008 | } |
@@ -1011,6 +1020,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
1011 | touch_nmi_watchdog(); | 1020 | touch_nmi_watchdog(); |
1012 | } | 1021 | } |
1013 | 1022 | ||
1023 | irq_unlock_sparse(); | ||
1024 | |||
1014 | return 0; | 1025 | return 0; |
1015 | } | 1026 | } |
1016 | 1027 | ||