diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-06 18:56:41 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-06 18:56:41 -0400 |
| commit | 8f147727030bf9e81331ab9b8f42d4611bb6a3d9 (patch) | |
| tree | d3f1e2410174bb8c479590a8f1c7e204e3a48eaf | |
| parent | 53f8b081c184328b82c8a7b5e70b8243b3cea8bd (diff) | |
| parent | 2c4645439e8f2f6e7c37f158feae6f6a82baa910 (diff) | |
Merge branch 'x86-irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 irq updates from Ingo Molnar:
"Here are the main changes in this tree:
- Introduce x86-64 IRQ/exception/debug stack guard pages to detect
stack overflows immediately and deterministically.
- Clean up over a decade worth of cruft accumulated.
The outcome of this should be more clear-cut faults/crashes when any
of the low level x86 CPU stacks overflow, instead of silent memory
corruption and sporadic failures much later on"
* 'x86-irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits)
x86/irq: Fix outdated comments
x86/irq/64: Remove stack overflow debug code
x86/irq/64: Remap the IRQ stack with guard pages
x86/irq/64: Split the IRQ stack into its own pages
x86/irq/64: Init hardirq_stack_ptr during CPU hotplug
x86/irq/32: Handle irq stack allocation failure proper
x86/irq/32: Invoke irq_ctx_init() from init_IRQ()
x86/irq/64: Rename irq_stack_ptr to hardirq_stack_ptr
x86/irq/32: Rename hard/softirq_stack to hard/softirq_stack_ptr
x86/irq/32: Make irq stack a character array
x86/irq/32: Define IRQ_STACK_SIZE
x86/dumpstack/64: Speedup in_exception_stack()
x86/exceptions: Split debug IST stack
x86/exceptions: Enable IST guard pages
x86/exceptions: Disconnect IST index and stack order
x86/cpu: Remove orig_ist array
x86/cpu: Prepare TSS.IST setup for guard pages
x86/dumpstack/64: Use cpu_entry_area instead of orig_ist
x86/irq/64: Use cpu entry area instead of orig_ist
x86/traps: Use cpu_entry_area instead of orig_ist
...
33 files changed, 377 insertions, 317 deletions
diff --git a/Documentation/x86/kernel-stacks b/Documentation/x86/kernel-stacks index 9a0aa4d3a866..d1bfb0b95ee0 100644 --- a/Documentation/x86/kernel-stacks +++ b/Documentation/x86/kernel-stacks | |||
| @@ -59,7 +59,7 @@ If that assumption is ever broken then the stacks will become corrupt. | |||
| 59 | 59 | ||
| 60 | The currently assigned IST stacks are :- | 60 | The currently assigned IST stacks are :- |
| 61 | 61 | ||
| 62 | * DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE). | 62 | * ESTACK_DF. EXCEPTION_STKSZ (PAGE_SIZE). |
| 63 | 63 | ||
| 64 | Used for interrupt 8 - Double Fault Exception (#DF). | 64 | Used for interrupt 8 - Double Fault Exception (#DF). |
| 65 | 65 | ||
| @@ -68,7 +68,7 @@ The currently assigned IST stacks are :- | |||
| 68 | Using a separate stack allows the kernel to recover from it well enough | 68 | Using a separate stack allows the kernel to recover from it well enough |
| 69 | in many cases to still output an oops. | 69 | in many cases to still output an oops. |
| 70 | 70 | ||
| 71 | * NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE). | 71 | * ESTACK_NMI. EXCEPTION_STKSZ (PAGE_SIZE). |
| 72 | 72 | ||
| 73 | Used for non-maskable interrupts (NMI). | 73 | Used for non-maskable interrupts (NMI). |
| 74 | 74 | ||
| @@ -76,7 +76,7 @@ The currently assigned IST stacks are :- | |||
| 76 | middle of switching stacks. Using IST for NMI events avoids making | 76 | middle of switching stacks. Using IST for NMI events avoids making |
| 77 | assumptions about the previous state of the kernel stack. | 77 | assumptions about the previous state of the kernel stack. |
| 78 | 78 | ||
| 79 | * DEBUG_STACK. DEBUG_STKSZ | 79 | * ESTACK_DB. EXCEPTION_STKSZ (PAGE_SIZE). |
| 80 | 80 | ||
| 81 | Used for hardware debug interrupts (interrupt 1) and for software | 81 | Used for hardware debug interrupts (interrupt 1) and for software |
| 82 | debug interrupts (INT3). | 82 | debug interrupts (INT3). |
| @@ -86,7 +86,12 @@ The currently assigned IST stacks are :- | |||
| 86 | avoids making assumptions about the previous state of the kernel | 86 | avoids making assumptions about the previous state of the kernel |
| 87 | stack. | 87 | stack. |
| 88 | 88 | ||
| 89 | * MCE_STACK. EXCEPTION_STKSZ (PAGE_SIZE). | 89 | To handle nested #DB correctly there exist two instances of DB stacks. On |
| 90 | #DB entry the IST stackpointer for #DB is switched to the second instance | ||
| 91 | so a nested #DB starts from a clean stack. The nested #DB switches | ||
| 92 | the IST stackpointer to a guard hole to catch triple nesting. | ||
| 93 | |||
| 94 | * ESTACK_MCE. EXCEPTION_STKSZ (PAGE_SIZE). | ||
| 90 | 95 | ||
| 91 | Used for interrupt 18 - Machine Check Exception (#MC). | 96 | Used for interrupt 18 - Machine Check Exception (#MC). |
| 92 | 97 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7e59efc70b91..db95da6d644d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -14,6 +14,7 @@ config X86_32 | |||
| 14 | select ARCH_WANT_IPC_PARSE_VERSION | 14 | select ARCH_WANT_IPC_PARSE_VERSION |
| 15 | select CLKSRC_I8253 | 15 | select CLKSRC_I8253 |
| 16 | select CLONE_BACKWARDS | 16 | select CLONE_BACKWARDS |
| 17 | select HAVE_DEBUG_STACKOVERFLOW | ||
| 17 | select MODULES_USE_ELF_REL | 18 | select MODULES_USE_ELF_REL |
| 18 | select OLD_SIGACTION | 19 | select OLD_SIGACTION |
| 19 | 20 | ||
| @@ -138,7 +139,6 @@ config X86 | |||
| 138 | select HAVE_COPY_THREAD_TLS | 139 | select HAVE_COPY_THREAD_TLS |
| 139 | select HAVE_C_RECORDMCOUNT | 140 | select HAVE_C_RECORDMCOUNT |
| 140 | select HAVE_DEBUG_KMEMLEAK | 141 | select HAVE_DEBUG_KMEMLEAK |
| 141 | select HAVE_DEBUG_STACKOVERFLOW | ||
| 142 | select HAVE_DMA_CONTIGUOUS | 142 | select HAVE_DMA_CONTIGUOUS |
| 143 | select HAVE_DYNAMIC_FTRACE | 143 | select HAVE_DYNAMIC_FTRACE |
| 144 | select HAVE_DYNAMIC_FTRACE_WITH_REGS | 144 | select HAVE_DYNAMIC_FTRACE_WITH_REGS |
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index e7e270603fe7..20e45d9b4e15 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S | |||
| @@ -298,7 +298,7 @@ ENTRY(__switch_to_asm) | |||
| 298 | 298 | ||
| 299 | #ifdef CONFIG_STACKPROTECTOR | 299 | #ifdef CONFIG_STACKPROTECTOR |
| 300 | movq TASK_stack_canary(%rsi), %rbx | 300 | movq TASK_stack_canary(%rsi), %rbx |
| 301 | movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset | 301 | movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset |
| 302 | #endif | 302 | #endif |
| 303 | 303 | ||
| 304 | #ifdef CONFIG_RETPOLINE | 304 | #ifdef CONFIG_RETPOLINE |
| @@ -430,8 +430,8 @@ END(irq_entries_start) | |||
| 430 | * it before we actually move ourselves to the IRQ stack. | 430 | * it before we actually move ourselves to the IRQ stack. |
| 431 | */ | 431 | */ |
| 432 | 432 | ||
| 433 | movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8) | 433 | movq \old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8) |
| 434 | movq PER_CPU_VAR(irq_stack_ptr), %rsp | 434 | movq PER_CPU_VAR(hardirq_stack_ptr), %rsp |
| 435 | 435 | ||
| 436 | #ifdef CONFIG_DEBUG_ENTRY | 436 | #ifdef CONFIG_DEBUG_ENTRY |
| 437 | /* | 437 | /* |
| @@ -840,7 +840,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt | |||
| 840 | /* | 840 | /* |
| 841 | * Exception entry points. | 841 | * Exception entry points. |
| 842 | */ | 842 | */ |
| 843 | #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) | 843 | #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8) |
| 844 | 844 | ||
| 845 | /** | 845 | /** |
| 846 | * idtentry - Generate an IDT entry stub | 846 | * idtentry - Generate an IDT entry stub |
| @@ -878,7 +878,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt | |||
| 878 | * @paranoid == 2 is special: the stub will never switch stacks. This is for | 878 | * @paranoid == 2 is special: the stub will never switch stacks. This is for |
| 879 | * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. | 879 | * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. |
| 880 | */ | 880 | */ |
| 881 | .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 | 881 | .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 |
| 882 | ENTRY(\sym) | 882 | ENTRY(\sym) |
| 883 | UNWIND_HINT_IRET_REGS offset=\has_error_code*8 | 883 | UNWIND_HINT_IRET_REGS offset=\has_error_code*8 |
| 884 | 884 | ||
| @@ -924,13 +924,13 @@ ENTRY(\sym) | |||
| 924 | .endif | 924 | .endif |
| 925 | 925 | ||
| 926 | .if \shift_ist != -1 | 926 | .if \shift_ist != -1 |
| 927 | subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) | 927 | subq $\ist_offset, CPU_TSS_IST(\shift_ist) |
| 928 | .endif | 928 | .endif |
| 929 | 929 | ||
| 930 | call \do_sym | 930 | call \do_sym |
| 931 | 931 | ||
| 932 | .if \shift_ist != -1 | 932 | .if \shift_ist != -1 |
| 933 | addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) | 933 | addq $\ist_offset, CPU_TSS_IST(\shift_ist) |
| 934 | .endif | 934 | .endif |
| 935 | 935 | ||
| 936 | /* these procedures expect "no swapgs" flag in ebx */ | 936 | /* these procedures expect "no swapgs" flag in ebx */ |
| @@ -1128,7 +1128,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \ | |||
| 1128 | hv_stimer0_callback_vector hv_stimer0_vector_handler | 1128 | hv_stimer0_callback_vector hv_stimer0_vector_handler |
| 1129 | #endif /* CONFIG_HYPERV */ | 1129 | #endif /* CONFIG_HYPERV */ |
| 1130 | 1130 | ||
| 1131 | idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK | 1131 | idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET |
| 1132 | idtentry int3 do_int3 has_error_code=0 | 1132 | idtentry int3 do_int3 has_error_code=0 |
| 1133 | idtentry stack_segment do_stack_segment has_error_code=1 | 1133 | idtentry stack_segment do_stack_segment has_error_code=1 |
| 1134 | 1134 | ||
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 29c706415443..cff3f3f3bfe0 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h | |||
| @@ -7,6 +7,64 @@ | |||
| 7 | #include <asm/processor.h> | 7 | #include <asm/processor.h> |
| 8 | #include <asm/intel_ds.h> | 8 | #include <asm/intel_ds.h> |
| 9 | 9 | ||
| 10 | #ifdef CONFIG_X86_64 | ||
| 11 | |||
| 12 | /* Macro to enforce the same ordering and stack sizes */ | ||
| 13 | #define ESTACKS_MEMBERS(guardsize, db2_holesize)\ | ||
| 14 | char DF_stack_guard[guardsize]; \ | ||
| 15 | char DF_stack[EXCEPTION_STKSZ]; \ | ||
| 16 | char NMI_stack_guard[guardsize]; \ | ||
| 17 | char NMI_stack[EXCEPTION_STKSZ]; \ | ||
| 18 | char DB2_stack_guard[guardsize]; \ | ||
| 19 | char DB2_stack[db2_holesize]; \ | ||
| 20 | char DB1_stack_guard[guardsize]; \ | ||
| 21 | char DB1_stack[EXCEPTION_STKSZ]; \ | ||
| 22 | char DB_stack_guard[guardsize]; \ | ||
| 23 | char DB_stack[EXCEPTION_STKSZ]; \ | ||
| 24 | char MCE_stack_guard[guardsize]; \ | ||
| 25 | char MCE_stack[EXCEPTION_STKSZ]; \ | ||
| 26 | char IST_top_guard[guardsize]; \ | ||
| 27 | |||
| 28 | /* The exception stacks' physical storage. No guard pages required */ | ||
| 29 | struct exception_stacks { | ||
| 30 | ESTACKS_MEMBERS(0, 0) | ||
| 31 | }; | ||
| 32 | |||
| 33 | /* The effective cpu entry area mapping with guard pages. */ | ||
| 34 | struct cea_exception_stacks { | ||
| 35 | ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ) | ||
| 36 | }; | ||
| 37 | |||
| 38 | /* | ||
| 39 | * The exception stack ordering in [cea_]exception_stacks | ||
| 40 | */ | ||
| 41 | enum exception_stack_ordering { | ||
| 42 | ESTACK_DF, | ||
| 43 | ESTACK_NMI, | ||
| 44 | ESTACK_DB2, | ||
| 45 | ESTACK_DB1, | ||
| 46 | ESTACK_DB, | ||
| 47 | ESTACK_MCE, | ||
| 48 | N_EXCEPTION_STACKS | ||
| 49 | }; | ||
| 50 | |||
| 51 | #define CEA_ESTACK_SIZE(st) \ | ||
| 52 | sizeof(((struct cea_exception_stacks *)0)->st## _stack) | ||
| 53 | |||
| 54 | #define CEA_ESTACK_BOT(ceastp, st) \ | ||
| 55 | ((unsigned long)&(ceastp)->st## _stack) | ||
| 56 | |||
| 57 | #define CEA_ESTACK_TOP(ceastp, st) \ | ||
| 58 | (CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st)) | ||
| 59 | |||
| 60 | #define CEA_ESTACK_OFFS(st) \ | ||
| 61 | offsetof(struct cea_exception_stacks, st## _stack) | ||
| 62 | |||
| 63 | #define CEA_ESTACK_PAGES \ | ||
| 64 | (sizeof(struct cea_exception_stacks) / PAGE_SIZE) | ||
| 65 | |||
| 66 | #endif | ||
| 67 | |||
| 10 | /* | 68 | /* |
| 11 | * cpu_entry_area is a percpu region that contains things needed by the CPU | 69 | * cpu_entry_area is a percpu region that contains things needed by the CPU |
| 12 | * and early entry/exit code. Real types aren't used for all fields here | 70 | * and early entry/exit code. Real types aren't used for all fields here |
| @@ -32,12 +90,9 @@ struct cpu_entry_area { | |||
| 32 | 90 | ||
| 33 | #ifdef CONFIG_X86_64 | 91 | #ifdef CONFIG_X86_64 |
| 34 | /* | 92 | /* |
| 35 | * Exception stacks used for IST entries. | 93 | * Exception stacks used for IST entries with guard pages. |
| 36 | * | ||
| 37 | * In the future, this should have a separate slot for each stack | ||
| 38 | * with guard pages between them. | ||
| 39 | */ | 94 | */ |
| 40 | char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; | 95 | struct cea_exception_stacks estacks; |
| 41 | #endif | 96 | #endif |
| 42 | #ifdef CONFIG_CPU_SUP_INTEL | 97 | #ifdef CONFIG_CPU_SUP_INTEL |
| 43 | /* | 98 | /* |
| @@ -57,6 +112,7 @@ struct cpu_entry_area { | |||
| 57 | #define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) | 112 | #define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) |
| 58 | 113 | ||
| 59 | DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); | 114 | DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); |
| 115 | DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); | ||
| 60 | 116 | ||
| 61 | extern void setup_cpu_entry_areas(void); | 117 | extern void setup_cpu_entry_areas(void); |
| 62 | extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); | 118 | extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); |
| @@ -76,4 +132,7 @@ static inline struct entry_stack *cpu_entry_stack(int cpu) | |||
| 76 | return &get_cpu_entry_area(cpu)->entry_stack_page.stack; | 132 | return &get_cpu_entry_area(cpu)->entry_stack_page.stack; |
| 77 | } | 133 | } |
| 78 | 134 | ||
| 135 | #define __this_cpu_ist_top_va(name) \ | ||
| 136 | CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name) | ||
| 137 | |||
| 79 | #endif | 138 | #endif |
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 9e5ca30738e5..1a8609a15856 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h | |||
| @@ -104,11 +104,9 @@ static inline void debug_stack_usage_dec(void) | |||
| 104 | { | 104 | { |
| 105 | __this_cpu_dec(debug_stack_usage); | 105 | __this_cpu_dec(debug_stack_usage); |
| 106 | } | 106 | } |
| 107 | int is_debug_stack(unsigned long addr); | ||
| 108 | void debug_stack_set_zero(void); | 107 | void debug_stack_set_zero(void); |
| 109 | void debug_stack_reset(void); | 108 | void debug_stack_reset(void); |
| 110 | #else /* !X86_64 */ | 109 | #else /* !X86_64 */ |
| 111 | static inline int is_debug_stack(unsigned long addr) { return 0; } | ||
| 112 | static inline void debug_stack_set_zero(void) { } | 110 | static inline void debug_stack_set_zero(void) { } |
| 113 | static inline void debug_stack_reset(void) { } | 111 | static inline void debug_stack_reset(void) { } |
| 114 | static inline void debug_stack_usage_inc(void) { } | 112 | static inline void debug_stack_usage_inc(void) { } |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index fbb16e6b6c18..8f95686ec27e 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
| @@ -16,11 +16,7 @@ static inline int irq_canonicalize(int irq) | |||
| 16 | return ((irq == 2) ? 9 : irq); | 16 | return ((irq == 2) ? 9 : irq); |
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | #ifdef CONFIG_X86_32 | 19 | extern int irq_init_percpu_irqstack(unsigned int cpu); |
| 20 | extern void irq_ctx_init(int cpu); | ||
| 21 | #else | ||
| 22 | # define irq_ctx_init(cpu) do { } while (0) | ||
| 23 | #endif | ||
| 24 | 20 | ||
| 25 | #define __ARCH_HAS_DO_SOFTIRQ | 21 | #define __ARCH_HAS_DO_SOFTIRQ |
| 26 | 22 | ||
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 548d90bbf919..889f8b1b5b7f 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
| @@ -18,8 +18,8 @@ | |||
| 18 | * Vectors 0 ... 31 : system traps and exceptions - hardcoded events | 18 | * Vectors 0 ... 31 : system traps and exceptions - hardcoded events |
| 19 | * Vectors 32 ... 127 : device interrupts | 19 | * Vectors 32 ... 127 : device interrupts |
| 20 | * Vector 128 : legacy int80 syscall interface | 20 | * Vector 128 : legacy int80 syscall interface |
| 21 | * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts | 21 | * Vectors 129 ... LOCAL_TIMER_VECTOR-1 |
| 22 | * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts | 22 | * Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts |
| 23 | * | 23 | * |
| 24 | * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. | 24 | * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. |
| 25 | * | 25 | * |
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 0d5c739eebd7..565ad755c785 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h | |||
| @@ -22,11 +22,9 @@ | |||
| 22 | #define THREAD_SIZE_ORDER 1 | 22 | #define THREAD_SIZE_ORDER 1 |
| 23 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) | 23 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) |
| 24 | 24 | ||
| 25 | #define DOUBLEFAULT_STACK 1 | 25 | #define IRQ_STACK_SIZE THREAD_SIZE |
| 26 | #define NMI_STACK 0 | 26 | |
| 27 | #define DEBUG_STACK 0 | 27 | #define N_EXCEPTION_STACKS 1 |
| 28 | #define MCE_STACK 0 | ||
| 29 | #define N_EXCEPTION_STACKS 1 | ||
| 30 | 28 | ||
| 31 | #ifdef CONFIG_X86_PAE | 29 | #ifdef CONFIG_X86_PAE |
| 32 | /* | 30 | /* |
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 8f657286d599..793c14c372cb 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h | |||
| @@ -14,22 +14,20 @@ | |||
| 14 | 14 | ||
| 15 | #define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) | 15 | #define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) |
| 16 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) | 16 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) |
| 17 | #define CURRENT_MASK (~(THREAD_SIZE - 1)) | ||
| 18 | 17 | ||
| 19 | #define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER) | 18 | #define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER) |
| 20 | #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) | 19 | #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) |
| 21 | 20 | ||
| 22 | #define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) | ||
| 23 | #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) | ||
| 24 | |||
| 25 | #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER) | 21 | #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER) |
| 26 | #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) | 22 | #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) |
| 27 | 23 | ||
| 28 | #define DOUBLEFAULT_STACK 1 | 24 | /* |
| 29 | #define NMI_STACK 2 | 25 | * The index for the tss.ist[] array. The hardware limit is 7 entries. |
| 30 | #define DEBUG_STACK 3 | 26 | */ |
| 31 | #define MCE_STACK 4 | 27 | #define IST_INDEX_DF 0 |
| 32 | #define N_EXCEPTION_STACKS 4 /* hw limit: 7 */ | 28 | #define IST_INDEX_NMI 1 |
| 29 | #define IST_INDEX_DB 2 | ||
| 30 | #define IST_INDEX_MCE 3 | ||
| 33 | 31 | ||
| 34 | /* | 32 | /* |
| 35 | * Set __PAGE_OFFSET to the most negative possible address + | 33 | * Set __PAGE_OFFSET to the most negative possible address + |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2bb3a648fc12..7e99ef67bff0 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
| @@ -367,6 +367,13 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); | |||
| 367 | #define __KERNEL_TSS_LIMIT \ | 367 | #define __KERNEL_TSS_LIMIT \ |
| 368 | (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) | 368 | (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) |
| 369 | 369 | ||
| 370 | /* Per CPU interrupt stacks */ | ||
| 371 | struct irq_stack { | ||
| 372 | char stack[IRQ_STACK_SIZE]; | ||
| 373 | } __aligned(IRQ_STACK_SIZE); | ||
| 374 | |||
| 375 | DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); | ||
| 376 | |||
| 370 | #ifdef CONFIG_X86_32 | 377 | #ifdef CONFIG_X86_32 |
| 371 | DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); | 378 | DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); |
| 372 | #else | 379 | #else |
| @@ -374,38 +381,25 @@ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); | |||
| 374 | #define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1 | 381 | #define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1 |
| 375 | #endif | 382 | #endif |
| 376 | 383 | ||
| 377 | /* | ||
| 378 | * Save the original ist values for checking stack pointers during debugging | ||
| 379 | */ | ||
| 380 | struct orig_ist { | ||
| 381 | unsigned long ist[7]; | ||
| 382 | }; | ||
| 383 | |||
| 384 | #ifdef CONFIG_X86_64 | 384 | #ifdef CONFIG_X86_64 |
| 385 | DECLARE_PER_CPU(struct orig_ist, orig_ist); | 385 | struct fixed_percpu_data { |
| 386 | |||
| 387 | union irq_stack_union { | ||
| 388 | char irq_stack[IRQ_STACK_SIZE]; | ||
| 389 | /* | 386 | /* |
| 390 | * GCC hardcodes the stack canary as %gs:40. Since the | 387 | * GCC hardcodes the stack canary as %gs:40. Since the |
| 391 | * irq_stack is the object at %gs:0, we reserve the bottom | 388 | * irq_stack is the object at %gs:0, we reserve the bottom |
| 392 | * 48 bytes of the irq stack for the canary. | 389 | * 48 bytes of the irq stack for the canary. |
| 393 | */ | 390 | */ |
| 394 | struct { | 391 | char gs_base[40]; |
| 395 | char gs_base[40]; | 392 | unsigned long stack_canary; |
| 396 | unsigned long stack_canary; | ||
| 397 | }; | ||
| 398 | }; | 393 | }; |
| 399 | 394 | ||
| 400 | DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; | 395 | DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible; |
| 401 | DECLARE_INIT_PER_CPU(irq_stack_union); | 396 | DECLARE_INIT_PER_CPU(fixed_percpu_data); |
| 402 | 397 | ||
| 403 | static inline unsigned long cpu_kernelmode_gs_base(int cpu) | 398 | static inline unsigned long cpu_kernelmode_gs_base(int cpu) |
| 404 | { | 399 | { |
| 405 | return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu); | 400 | return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); |
| 406 | } | 401 | } |
| 407 | 402 | ||
| 408 | DECLARE_PER_CPU(char *, irq_stack_ptr); | ||
| 409 | DECLARE_PER_CPU(unsigned int, irq_count); | 403 | DECLARE_PER_CPU(unsigned int, irq_count); |
| 410 | extern asmlinkage void ignore_sysret(void); | 404 | extern asmlinkage void ignore_sysret(void); |
| 411 | 405 | ||
| @@ -427,15 +421,8 @@ struct stack_canary { | |||
| 427 | }; | 421 | }; |
| 428 | DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); | 422 | DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); |
| 429 | #endif | 423 | #endif |
| 430 | /* | 424 | /* Per CPU softirq stack pointer */ |
| 431 | * per-CPU IRQ handling stacks | 425 | DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr); |
| 432 | */ | ||
| 433 | struct irq_stack { | ||
| 434 | u32 stack[THREAD_SIZE/sizeof(u32)]; | ||
| 435 | } __aligned(THREAD_SIZE); | ||
| 436 | |||
| 437 | DECLARE_PER_CPU(struct irq_stack *, hardirq_stack); | ||
| 438 | DECLARE_PER_CPU(struct irq_stack *, softirq_stack); | ||
| 439 | #endif /* X86_64 */ | 426 | #endif /* X86_64 */ |
| 440 | 427 | ||
| 441 | extern unsigned int fpu_kernel_xstate_size; | 428 | extern unsigned int fpu_kernel_xstate_size; |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 2e95b6c1bca3..da545df207b2 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
| @@ -131,7 +131,7 @@ void native_smp_prepare_boot_cpu(void); | |||
| 131 | void native_smp_prepare_cpus(unsigned int max_cpus); | 131 | void native_smp_prepare_cpus(unsigned int max_cpus); |
| 132 | void calculate_max_logical_packages(void); | 132 | void calculate_max_logical_packages(void); |
| 133 | void native_smp_cpus_done(unsigned int max_cpus); | 133 | void native_smp_cpus_done(unsigned int max_cpus); |
| 134 | void common_cpu_up(unsigned int cpunum, struct task_struct *tidle); | 134 | int common_cpu_up(unsigned int cpunum, struct task_struct *tidle); |
| 135 | int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); | 135 | int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); |
| 136 | int native_cpu_disable(void); | 136 | int native_cpu_disable(void); |
| 137 | int common_cpu_die(unsigned int cpu); | 137 | int common_cpu_die(unsigned int cpu); |
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 8ec97a62c245..91e29b6a86a5 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | * On x86_64, %gs is shared by percpu area and stack canary. All | 13 | * On x86_64, %gs is shared by percpu area and stack canary. All |
| 14 | * percpu symbols are zero based and %gs points to the base of percpu | 14 | * percpu symbols are zero based and %gs points to the base of percpu |
| 15 | * area. The first occupant of the percpu area is always | 15 | * area. The first occupant of the percpu area is always |
| 16 | * irq_stack_union which contains stack_canary at offset 40. Userland | 16 | * fixed_percpu_data which contains stack_canary at offset 40. Userland |
| 17 | * %gs is always saved and restored on kernel entry and exit using | 17 | * %gs is always saved and restored on kernel entry and exit using |
| 18 | * swapgs, so stack protector doesn't add any complexity there. | 18 | * swapgs, so stack protector doesn't add any complexity there. |
| 19 | * | 19 | * |
| @@ -64,7 +64,7 @@ static __always_inline void boot_init_stack_canary(void) | |||
| 64 | u64 tsc; | 64 | u64 tsc; |
| 65 | 65 | ||
| 66 | #ifdef CONFIG_X86_64 | 66 | #ifdef CONFIG_X86_64 |
| 67 | BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); | 67 | BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40); |
| 68 | #endif | 68 | #endif |
| 69 | /* | 69 | /* |
| 70 | * We both use the random pool and the current TSC as a source | 70 | * We both use the random pool and the current TSC as a source |
| @@ -79,7 +79,7 @@ static __always_inline void boot_init_stack_canary(void) | |||
| 79 | 79 | ||
| 80 | current->stack_canary = canary; | 80 | current->stack_canary = canary; |
| 81 | #ifdef CONFIG_X86_64 | 81 | #ifdef CONFIG_X86_64 |
| 82 | this_cpu_write(irq_stack_union.stack_canary, canary); | 82 | this_cpu_write(fixed_percpu_data.stack_canary, canary); |
| 83 | #else | 83 | #else |
| 84 | this_cpu_write(stack_canary.canary, canary); | 84 | this_cpu_write(stack_canary.canary, canary); |
| 85 | #endif | 85 | #endif |
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index beef7ad9e43a..a8d0cdf48616 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h | |||
| @@ -9,6 +9,8 @@ | |||
| 9 | 9 | ||
| 10 | #include <linux/uaccess.h> | 10 | #include <linux/uaccess.h> |
| 11 | #include <linux/ptrace.h> | 11 | #include <linux/ptrace.h> |
| 12 | |||
| 13 | #include <asm/cpu_entry_area.h> | ||
| 12 | #include <asm/switch_to.h> | 14 | #include <asm/switch_to.h> |
| 13 | 15 | ||
| 14 | enum stack_type { | 16 | enum stack_type { |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index ddced33184b5..d3d075226c0a 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
| @@ -68,10 +68,12 @@ int main(void) | |||
| 68 | #undef ENTRY | 68 | #undef ENTRY |
| 69 | 69 | ||
| 70 | OFFSET(TSS_ist, tss_struct, x86_tss.ist); | 70 | OFFSET(TSS_ist, tss_struct, x86_tss.ist); |
| 71 | DEFINE(DB_STACK_OFFSET, offsetof(struct cea_exception_stacks, DB_stack) - | ||
| 72 | offsetof(struct cea_exception_stacks, DB1_stack)); | ||
| 71 | BLANK(); | 73 | BLANK(); |
| 72 | 74 | ||
| 73 | #ifdef CONFIG_STACKPROTECTOR | 75 | #ifdef CONFIG_STACKPROTECTOR |
| 74 | DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary)); | 76 | DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary)); |
| 75 | BLANK(); | 77 | BLANK(); |
| 76 | #endif | 78 | #endif |
| 77 | 79 | ||
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 95a5faf3a6a0..37f7d438a6ef 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -507,19 +507,6 @@ void load_percpu_segment(int cpu) | |||
| 507 | DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); | 507 | DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); |
| 508 | #endif | 508 | #endif |
| 509 | 509 | ||
| 510 | #ifdef CONFIG_X86_64 | ||
| 511 | /* | ||
| 512 | * Special IST stacks which the CPU switches to when it calls | ||
| 513 | * an IST-marked descriptor entry. Up to 7 stacks (hardware | ||
| 514 | * limit), all of them are 4K, except the debug stack which | ||
| 515 | * is 8K. | ||
| 516 | */ | ||
| 517 | static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { | ||
| 518 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, | ||
| 519 | [DEBUG_STACK - 1] = DEBUG_STKSZ | ||
| 520 | }; | ||
| 521 | #endif | ||
| 522 | |||
| 523 | /* Load the original GDT from the per-cpu structure */ | 510 | /* Load the original GDT from the per-cpu structure */ |
| 524 | void load_direct_gdt(int cpu) | 511 | void load_direct_gdt(int cpu) |
| 525 | { | 512 | { |
| @@ -1511,9 +1498,9 @@ static __init int setup_clearcpuid(char *arg) | |||
| 1511 | __setup("clearcpuid=", setup_clearcpuid); | 1498 | __setup("clearcpuid=", setup_clearcpuid); |
| 1512 | 1499 | ||
| 1513 | #ifdef CONFIG_X86_64 | 1500 | #ifdef CONFIG_X86_64 |
| 1514 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 1501 | DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, |
| 1515 | irq_stack_union) __aligned(PAGE_SIZE) __visible; | 1502 | fixed_percpu_data) __aligned(PAGE_SIZE) __visible; |
| 1516 | EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union); | 1503 | EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data); |
| 1517 | 1504 | ||
| 1518 | /* | 1505 | /* |
| 1519 | * The following percpu variables are hot. Align current_task to | 1506 | * The following percpu variables are hot. Align current_task to |
| @@ -1523,9 +1510,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = | |||
| 1523 | &init_task; | 1510 | &init_task; |
| 1524 | EXPORT_PER_CPU_SYMBOL(current_task); | 1511 | EXPORT_PER_CPU_SYMBOL(current_task); |
| 1525 | 1512 | ||
| 1526 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | 1513 | DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); |
| 1527 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE; | ||
| 1528 | |||
| 1529 | DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; | 1514 | DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; |
| 1530 | 1515 | ||
| 1531 | DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; | 1516 | DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; |
| @@ -1562,23 +1547,7 @@ void syscall_init(void) | |||
| 1562 | X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT); | 1547 | X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT); |
| 1563 | } | 1548 | } |
| 1564 | 1549 | ||
| 1565 | /* | ||
| 1566 | * Copies of the original ist values from the tss are only accessed during | ||
| 1567 | * debugging, no special alignment required. | ||
| 1568 | */ | ||
| 1569 | DEFINE_PER_CPU(struct orig_ist, orig_ist); | ||
| 1570 | |||
| 1571 | static DEFINE_PER_CPU(unsigned long, debug_stack_addr); | ||
| 1572 | DEFINE_PER_CPU(int, debug_stack_usage); | 1550 | DEFINE_PER_CPU(int, debug_stack_usage); |
| 1573 | |||
| 1574 | int is_debug_stack(unsigned long addr) | ||
| 1575 | { | ||
| 1576 | return __this_cpu_read(debug_stack_usage) || | ||
| 1577 | (addr <= __this_cpu_read(debug_stack_addr) && | ||
| 1578 | addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ)); | ||
| 1579 | } | ||
| 1580 | NOKPROBE_SYMBOL(is_debug_stack); | ||
| 1581 | |||
| 1582 | DEFINE_PER_CPU(u32, debug_idt_ctr); | 1551 | DEFINE_PER_CPU(u32, debug_idt_ctr); |
| 1583 | 1552 | ||
| 1584 | void debug_stack_set_zero(void) | 1553 | void debug_stack_set_zero(void) |
| @@ -1690,17 +1659,14 @@ static void setup_getcpu(int cpu) | |||
| 1690 | * initialized (naturally) in the bootstrap process, such as the GDT | 1659 | * initialized (naturally) in the bootstrap process, such as the GDT |
| 1691 | * and IDT. We reload them nevertheless, this function acts as a | 1660 | * and IDT. We reload them nevertheless, this function acts as a |
| 1692 | * 'CPU state barrier', nothing should get across. | 1661 | * 'CPU state barrier', nothing should get across. |
| 1693 | * A lot of state is already set up in PDA init for 64 bit | ||
| 1694 | */ | 1662 | */ |
| 1695 | #ifdef CONFIG_X86_64 | 1663 | #ifdef CONFIG_X86_64 |
| 1696 | 1664 | ||
| 1697 | void cpu_init(void) | 1665 | void cpu_init(void) |
| 1698 | { | 1666 | { |
| 1699 | struct orig_ist *oist; | 1667 | int cpu = raw_smp_processor_id(); |
| 1700 | struct task_struct *me; | 1668 | struct task_struct *me; |
| 1701 | struct tss_struct *t; | 1669 | struct tss_struct *t; |
| 1702 | unsigned long v; | ||
| 1703 | int cpu = raw_smp_processor_id(); | ||
| 1704 | int i; | 1670 | int i; |
| 1705 | 1671 | ||
| 1706 | wait_for_master_cpu(cpu); | 1672 | wait_for_master_cpu(cpu); |
| @@ -1715,7 +1681,6 @@ void cpu_init(void) | |||
| 1715 | load_ucode_ap(); | 1681 | load_ucode_ap(); |
| 1716 | 1682 | ||
| 1717 | t = &per_cpu(cpu_tss_rw, cpu); | 1683 | t = &per_cpu(cpu_tss_rw, cpu); |
| 1718 | oist = &per_cpu(orig_ist, cpu); | ||
| 1719 | 1684 | ||
| 1720 | #ifdef CONFIG_NUMA | 1685 | #ifdef CONFIG_NUMA |
| 1721 | if (this_cpu_read(numa_node) == 0 && | 1686 | if (this_cpu_read(numa_node) == 0 && |
| @@ -1753,16 +1718,11 @@ void cpu_init(void) | |||
| 1753 | /* | 1718 | /* |
| 1754 | * set up and load the per-CPU TSS | 1719 | * set up and load the per-CPU TSS |
| 1755 | */ | 1720 | */ |
| 1756 | if (!oist->ist[0]) { | 1721 | if (!t->x86_tss.ist[0]) { |
| 1757 | char *estacks = get_cpu_entry_area(cpu)->exception_stacks; | 1722 | t->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF); |
| 1758 | 1723 | t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI); | |
| 1759 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | 1724 | t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB); |
| 1760 | estacks += exception_stack_sizes[v]; | 1725 | t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); |
| 1761 | oist->ist[v] = t->x86_tss.ist[v] = | ||
| 1762 | (unsigned long)estacks; | ||
| 1763 | if (v == DEBUG_STACK-1) | ||
| 1764 | per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks; | ||
| 1765 | } | ||
| 1766 | } | 1726 | } |
| 1767 | 1727 | ||
| 1768 | t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; | 1728 | t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index cd53f3030e40..64a59d726639 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
| @@ -34,14 +34,14 @@ const char *stack_type_name(enum stack_type type) | |||
| 34 | 34 | ||
| 35 | static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) | 35 | static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) |
| 36 | { | 36 | { |
| 37 | unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack); | 37 | unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr); |
| 38 | unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); | 38 | unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); |
| 39 | 39 | ||
| 40 | /* | 40 | /* |
| 41 | * This is a software stack, so 'end' can be a valid stack pointer. | 41 | * This is a software stack, so 'end' can be a valid stack pointer. |
| 42 | * It just means the stack is empty. | 42 | * It just means the stack is empty. |
| 43 | */ | 43 | */ |
| 44 | if (stack <= begin || stack > end) | 44 | if (stack < begin || stack > end) |
| 45 | return false; | 45 | return false; |
| 46 | 46 | ||
| 47 | info->type = STACK_TYPE_IRQ; | 47 | info->type = STACK_TYPE_IRQ; |
| @@ -59,14 +59,14 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) | |||
| 59 | 59 | ||
| 60 | static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) | 60 | static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) |
| 61 | { | 61 | { |
| 62 | unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack); | 62 | unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr); |
| 63 | unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); | 63 | unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); |
| 64 | 64 | ||
| 65 | /* | 65 | /* |
| 66 | * This is a software stack, so 'end' can be a valid stack pointer. | 66 | * This is a software stack, so 'end' can be a valid stack pointer. |
| 67 | * It just means the stack is empty. | 67 | * It just means the stack is empty. |
| 68 | */ | 68 | */ |
| 69 | if (stack <= begin || stack > end) | 69 | if (stack < begin || stack > end) |
| 70 | return false; | 70 | return false; |
| 71 | 71 | ||
| 72 | info->type = STACK_TYPE_SOFTIRQ; | 72 | info->type = STACK_TYPE_SOFTIRQ; |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 5cdb9e84da57..753b8cfe8b8a 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
| @@ -16,23 +16,21 @@ | |||
| 16 | #include <linux/bug.h> | 16 | #include <linux/bug.h> |
| 17 | #include <linux/nmi.h> | 17 | #include <linux/nmi.h> |
| 18 | 18 | ||
| 19 | #include <asm/cpu_entry_area.h> | ||
| 19 | #include <asm/stacktrace.h> | 20 | #include <asm/stacktrace.h> |
| 20 | 21 | ||
| 21 | static char *exception_stack_names[N_EXCEPTION_STACKS] = { | 22 | static const char * const exception_stack_names[] = { |
| 22 | [ DOUBLEFAULT_STACK-1 ] = "#DF", | 23 | [ ESTACK_DF ] = "#DF", |
| 23 | [ NMI_STACK-1 ] = "NMI", | 24 | [ ESTACK_NMI ] = "NMI", |
| 24 | [ DEBUG_STACK-1 ] = "#DB", | 25 | [ ESTACK_DB2 ] = "#DB2", |
| 25 | [ MCE_STACK-1 ] = "#MC", | 26 | [ ESTACK_DB1 ] = "#DB1", |
| 26 | }; | 27 | [ ESTACK_DB ] = "#DB", |
| 27 | 28 | [ ESTACK_MCE ] = "#MC", | |
| 28 | static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = { | ||
| 29 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, | ||
| 30 | [DEBUG_STACK - 1] = DEBUG_STKSZ | ||
| 31 | }; | 29 | }; |
| 32 | 30 | ||
| 33 | const char *stack_type_name(enum stack_type type) | 31 | const char *stack_type_name(enum stack_type type) |
| 34 | { | 32 | { |
| 35 | BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); | 33 | BUILD_BUG_ON(N_EXCEPTION_STACKS != 6); |
| 36 | 34 | ||
| 37 | if (type == STACK_TYPE_IRQ) | 35 | if (type == STACK_TYPE_IRQ) |
| 38 | return "IRQ"; | 36 | return "IRQ"; |
| @@ -52,43 +50,84 @@ const char *stack_type_name(enum stack_type type) | |||
| 52 | return NULL; | 50 | return NULL; |
| 53 | } | 51 | } |
| 54 | 52 | ||
| 53 | /** | ||
| 54 | * struct estack_pages - Page descriptor for exception stacks | ||
| 55 | * @offs: Offset from the start of the exception stack area | ||
| 56 | * @size: Size of the exception stack | ||
| 57 | * @type: Type to store in the stack_info struct | ||
| 58 | */ | ||
| 59 | struct estack_pages { | ||
| 60 | u32 offs; | ||
| 61 | u16 size; | ||
| 62 | u16 type; | ||
| 63 | }; | ||
| 64 | |||
| 65 | #define EPAGERANGE(st) \ | ||
| 66 | [PFN_DOWN(CEA_ESTACK_OFFS(st)) ... \ | ||
| 67 | PFN_DOWN(CEA_ESTACK_OFFS(st) + CEA_ESTACK_SIZE(st) - 1)] = { \ | ||
| 68 | .offs = CEA_ESTACK_OFFS(st), \ | ||
| 69 | .size = CEA_ESTACK_SIZE(st), \ | ||
| 70 | .type = STACK_TYPE_EXCEPTION + ESTACK_ ##st, } | ||
| 71 | |||
| 72 | /* | ||
| 73 | * Array of exception stack page descriptors. If the stack is larger than | ||
| 74 | * PAGE_SIZE, all pages covering a particular stack will have the same | ||
| 75 | * info. The guard pages including the not mapped DB2 stack are zeroed | ||
| 76 | * out. | ||
| 77 | */ | ||
| 78 | static const | ||
| 79 | struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = { | ||
| 80 | EPAGERANGE(DF), | ||
| 81 | EPAGERANGE(NMI), | ||
| 82 | EPAGERANGE(DB1), | ||
| 83 | EPAGERANGE(DB), | ||
| 84 | EPAGERANGE(MCE), | ||
| 85 | }; | ||
| 86 | |||
| 55 | static bool in_exception_stack(unsigned long *stack, struct stack_info *info) | 87 | static bool in_exception_stack(unsigned long *stack, struct stack_info *info) |
| 56 | { | 88 | { |
| 57 | unsigned long *begin, *end; | 89 | unsigned long begin, end, stk = (unsigned long)stack; |
| 90 | const struct estack_pages *ep; | ||
| 58 | struct pt_regs *regs; | 91 | struct pt_regs *regs; |
| 59 | unsigned k; | 92 | unsigned int k; |
| 60 | 93 | ||
| 61 | BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); | 94 | BUILD_BUG_ON(N_EXCEPTION_STACKS != 6); |
| 62 | 95 | ||
| 63 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { | 96 | begin = (unsigned long)__this_cpu_read(cea_exception_stacks); |
| 64 | end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k]; | 97 | end = begin + sizeof(struct cea_exception_stacks); |
| 65 | begin = end - (exception_stack_sizes[k] / sizeof(long)); | 98 | /* Bail if @stack is outside the exception stack area. */ |
| 66 | regs = (struct pt_regs *)end - 1; | 99 | if (stk < begin || stk >= end) |
| 67 | 100 | return false; | |
| 68 | if (stack <= begin || stack >= end) | ||
| 69 | continue; | ||
| 70 | 101 | ||
| 71 | info->type = STACK_TYPE_EXCEPTION + k; | 102 | /* Calc page offset from start of exception stacks */ |
| 72 | info->begin = begin; | 103 | k = (stk - begin) >> PAGE_SHIFT; |
| 73 | info->end = end; | 104 | /* Lookup the page descriptor */ |
| 74 | info->next_sp = (unsigned long *)regs->sp; | 105 | ep = &estack_pages[k]; |
| 106 | /* Guard page? */ | ||
| 107 | if (!ep->size) | ||
| 108 | return false; | ||
| 75 | 109 | ||
| 76 | return true; | 110 | begin += (unsigned long)ep->offs; |
| 77 | } | 111 | end = begin + (unsigned long)ep->size; |
| 112 | regs = (struct pt_regs *)end - 1; | ||
| 78 | 113 | ||
| 79 | return false; | 114 | info->type = ep->type; |
| 115 | info->begin = (unsigned long *)begin; | ||
| 116 | info->end = (unsigned long *)end; | ||
| 117 | info->next_sp = (unsigned long *)regs->sp; | ||
| 118 | return true; | ||
| 80 | } | 119 | } |
| 81 | 120 | ||
| 82 | static bool in_irq_stack(unsigned long *stack, struct stack_info *info) | 121 | static bool in_irq_stack(unsigned long *stack, struct stack_info *info) |
| 83 | { | 122 | { |
| 84 | unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr); | 123 | unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr); |
| 85 | unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long)); | 124 | unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long)); |
| 86 | 125 | ||
| 87 | /* | 126 | /* |
| 88 | * This is a software stack, so 'end' can be a valid stack pointer. | 127 | * This is a software stack, so 'end' can be a valid stack pointer. |
| 89 | * It just means the stack is empty. | 128 | * It just means the stack is empty. |
| 90 | */ | 129 | */ |
| 91 | if (stack <= begin || stack > end) | 130 | if (stack < begin || stack >= end) |
| 92 | return false; | 131 | return false; |
| 93 | 132 | ||
| 94 | info->type = STACK_TYPE_IRQ; | 133 | info->type = STACK_TYPE_IRQ; |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d1dbe8e4eb82..bcd206c8ac90 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
| @@ -265,7 +265,7 @@ ENDPROC(start_cpu0) | |||
| 265 | GLOBAL(initial_code) | 265 | GLOBAL(initial_code) |
| 266 | .quad x86_64_start_kernel | 266 | .quad x86_64_start_kernel |
| 267 | GLOBAL(initial_gs) | 267 | GLOBAL(initial_gs) |
| 268 | .quad INIT_PER_CPU_VAR(irq_stack_union) | 268 | .quad INIT_PER_CPU_VAR(fixed_percpu_data) |
| 269 | GLOBAL(initial_stack) | 269 | GLOBAL(initial_stack) |
| 270 | /* | 270 | /* |
| 271 | * The SIZEOF_PTREGS gap is a convention which helps the in-kernel | 271 | * The SIZEOF_PTREGS gap is a convention which helps the in-kernel |
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 01adea278a71..6d8917875f44 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c | |||
| @@ -41,13 +41,12 @@ struct idt_data { | |||
| 41 | #define SYSG(_vector, _addr) \ | 41 | #define SYSG(_vector, _addr) \ |
| 42 | G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS) | 42 | G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS) |
| 43 | 43 | ||
| 44 | /* Interrupt gate with interrupt stack */ | 44 | /* |
| 45 | * Interrupt gate with interrupt stack. The _ist index is the index in | ||
| 46 | * the tss.ist[] array, but for the descriptor it needs to start at 1. | ||
| 47 | */ | ||
| 45 | #define ISTG(_vector, _addr, _ist) \ | 48 | #define ISTG(_vector, _addr, _ist) \ |
| 46 | G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS) | 49 | G(_vector, _addr, _ist + 1, GATE_INTERRUPT, DPL0, __KERNEL_CS) |
| 47 | |||
| 48 | /* System interrupt gate with interrupt stack */ | ||
| 49 | #define SISTG(_vector, _addr, _ist) \ | ||
| 50 | G(_vector, _addr, _ist, GATE_INTERRUPT, DPL3, __KERNEL_CS) | ||
| 51 | 50 | ||
| 52 | /* Task gate */ | 51 | /* Task gate */ |
| 53 | #define TSKG(_vector, _gdt) \ | 52 | #define TSKG(_vector, _gdt) \ |
| @@ -184,11 +183,11 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; | |||
| 184 | * cpu_init() when the TSS has been initialized. | 183 | * cpu_init() when the TSS has been initialized. |
| 185 | */ | 184 | */ |
| 186 | static const __initconst struct idt_data ist_idts[] = { | 185 | static const __initconst struct idt_data ist_idts[] = { |
| 187 | ISTG(X86_TRAP_DB, debug, DEBUG_STACK), | 186 | ISTG(X86_TRAP_DB, debug, IST_INDEX_DB), |
| 188 | ISTG(X86_TRAP_NMI, nmi, NMI_STACK), | 187 | ISTG(X86_TRAP_NMI, nmi, IST_INDEX_NMI), |
| 189 | ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK), | 188 | ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF), |
| 190 | #ifdef CONFIG_X86_MCE | 189 | #ifdef CONFIG_X86_MCE |
| 191 | ISTG(X86_TRAP_MC, &machine_check, MCE_STACK), | 190 | ISTG(X86_TRAP_MC, &machine_check, IST_INDEX_MCE), |
| 192 | #endif | 191 | #endif |
| 193 | }; | 192 | }; |
| 194 | 193 | ||
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 95600a99ae93..fc34816c6f04 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
| @@ -51,8 +51,8 @@ static inline int check_stack_overflow(void) { return 0; } | |||
| 51 | static inline void print_stack_overflow(void) { } | 51 | static inline void print_stack_overflow(void) { } |
| 52 | #endif | 52 | #endif |
| 53 | 53 | ||
| 54 | DEFINE_PER_CPU(struct irq_stack *, hardirq_stack); | 54 | DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); |
| 55 | DEFINE_PER_CPU(struct irq_stack *, softirq_stack); | 55 | DEFINE_PER_CPU(struct irq_stack *, softirq_stack_ptr); |
| 56 | 56 | ||
| 57 | static void call_on_stack(void *func, void *stack) | 57 | static void call_on_stack(void *func, void *stack) |
| 58 | { | 58 | { |
| @@ -76,7 +76,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) | |||
| 76 | u32 *isp, *prev_esp, arg1; | 76 | u32 *isp, *prev_esp, arg1; |
| 77 | 77 | ||
| 78 | curstk = (struct irq_stack *) current_stack(); | 78 | curstk = (struct irq_stack *) current_stack(); |
| 79 | irqstk = __this_cpu_read(hardirq_stack); | 79 | irqstk = __this_cpu_read(hardirq_stack_ptr); |
| 80 | 80 | ||
| 81 | /* | 81 | /* |
| 82 | * this is where we switch to the IRQ stack. However, if we are | 82 | * this is where we switch to the IRQ stack. However, if we are |
| @@ -107,27 +107,28 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) | |||
| 107 | } | 107 | } |
| 108 | 108 | ||
| 109 | /* | 109 | /* |
| 110 | * allocate per-cpu stacks for hardirq and for softirq processing | 110 | * Allocate per-cpu stacks for hardirq and softirq processing |
| 111 | */ | 111 | */ |
| 112 | void irq_ctx_init(int cpu) | 112 | int irq_init_percpu_irqstack(unsigned int cpu) |
| 113 | { | 113 | { |
| 114 | struct irq_stack *irqstk; | 114 | int node = cpu_to_node(cpu); |
| 115 | 115 | struct page *ph, *ps; | |
| 116 | if (per_cpu(hardirq_stack, cpu)) | ||
| 117 | return; | ||
| 118 | 116 | ||
| 119 | irqstk = page_address(alloc_pages_node(cpu_to_node(cpu), | 117 | if (per_cpu(hardirq_stack_ptr, cpu)) |
| 120 | THREADINFO_GFP, | 118 | return 0; |
| 121 | THREAD_SIZE_ORDER)); | ||
| 122 | per_cpu(hardirq_stack, cpu) = irqstk; | ||
| 123 | 119 | ||
| 124 | irqstk = page_address(alloc_pages_node(cpu_to_node(cpu), | 120 | ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); |
| 125 | THREADINFO_GFP, | 121 | if (!ph) |
| 126 | THREAD_SIZE_ORDER)); | 122 | return -ENOMEM; |
| 127 | per_cpu(softirq_stack, cpu) = irqstk; | 123 | ps = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); |
| 124 | if (!ps) { | ||
| 125 | __free_pages(ph, THREAD_SIZE_ORDER); | ||
| 126 | return -ENOMEM; | ||
| 127 | } | ||
| 128 | 128 | ||
| 129 | printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", | 129 | per_cpu(hardirq_stack_ptr, cpu) = page_address(ph); |
| 130 | cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); | 130 | per_cpu(softirq_stack_ptr, cpu) = page_address(ps); |
| 131 | return 0; | ||
| 131 | } | 132 | } |
| 132 | 133 | ||
| 133 | void do_softirq_own_stack(void) | 134 | void do_softirq_own_stack(void) |
| @@ -135,7 +136,7 @@ void do_softirq_own_stack(void) | |||
| 135 | struct irq_stack *irqstk; | 136 | struct irq_stack *irqstk; |
| 136 | u32 *isp, *prev_esp; | 137 | u32 *isp, *prev_esp; |
| 137 | 138 | ||
| 138 | irqstk = __this_cpu_read(softirq_stack); | 139 | irqstk = __this_cpu_read(softirq_stack_ptr); |
| 139 | 140 | ||
| 140 | /* build the stack frame on the softirq stack */ | 141 | /* build the stack frame on the softirq stack */ |
| 141 | isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); | 142 | isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 0469cd078db1..6bf6517a05bb 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
| @@ -18,63 +18,64 @@ | |||
| 18 | #include <linux/uaccess.h> | 18 | #include <linux/uaccess.h> |
| 19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
| 20 | #include <linux/sched/task_stack.h> | 20 | #include <linux/sched/task_stack.h> |
| 21 | |||
| 22 | #include <asm/cpu_entry_area.h> | ||
| 21 | #include <asm/io_apic.h> | 23 | #include <asm/io_apic.h> |
| 22 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
| 23 | 25 | ||
| 24 | int sysctl_panic_on_stackoverflow; | 26 | DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible; |
| 27 | DECLARE_INIT_PER_CPU(irq_stack_backing_store); | ||
| 25 | 28 | ||
| 26 | /* | 29 | bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) |
| 27 | * Probabilistic stack overflow check: | ||
| 28 | * | ||
| 29 | * Only check the stack in process context, because everything else | ||
| 30 | * runs on the big interrupt stacks. Checking reliably is too expensive, | ||
| 31 | * so we just check from interrupts. | ||
| 32 | */ | ||
| 33 | static inline void stack_overflow_check(struct pt_regs *regs) | ||
| 34 | { | 30 | { |
| 35 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | 31 | if (IS_ERR_OR_NULL(desc)) |
| 36 | #define STACK_TOP_MARGIN 128 | 32 | return false; |
| 37 | struct orig_ist *oist; | ||
| 38 | u64 irq_stack_top, irq_stack_bottom; | ||
| 39 | u64 estack_top, estack_bottom; | ||
| 40 | u64 curbase = (u64)task_stack_page(current); | ||
| 41 | 33 | ||
| 42 | if (user_mode(regs)) | 34 | generic_handle_irq_desc(desc); |
| 43 | return; | 35 | return true; |
| 36 | } | ||
| 44 | 37 | ||
| 45 | if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN && | 38 | #ifdef CONFIG_VMAP_STACK |
| 46 | regs->sp <= curbase + THREAD_SIZE) | 39 | /* |
| 47 | return; | 40 | * VMAP the backing store with guard pages |
| 41 | */ | ||
| 42 | static int map_irq_stack(unsigned int cpu) | ||
| 43 | { | ||
| 44 | char *stack = (char *)per_cpu_ptr(&irq_stack_backing_store, cpu); | ||
| 45 | struct page *pages[IRQ_STACK_SIZE / PAGE_SIZE]; | ||
| 46 | void *va; | ||
| 47 | int i; | ||
| 48 | 48 | ||
| 49 | irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) + | 49 | for (i = 0; i < IRQ_STACK_SIZE / PAGE_SIZE; i++) { |
| 50 | STACK_TOP_MARGIN; | 50 | phys_addr_t pa = per_cpu_ptr_to_phys(stack + (i << PAGE_SHIFT)); |
| 51 | irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr); | ||
| 52 | if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom) | ||
| 53 | return; | ||
| 54 | 51 | ||
| 55 | oist = this_cpu_ptr(&orig_ist); | 52 | pages[i] = pfn_to_page(pa >> PAGE_SHIFT); |
| 56 | estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN; | 53 | } |
| 57 | estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1]; | ||
| 58 | if (regs->sp >= estack_top && regs->sp <= estack_bottom) | ||
| 59 | return; | ||
| 60 | 54 | ||
| 61 | WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n", | 55 | va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); |
| 62 | current->comm, curbase, regs->sp, | 56 | if (!va) |
| 63 | irq_stack_top, irq_stack_bottom, | 57 | return -ENOMEM; |
| 64 | estack_top, estack_bottom, (void *)regs->ip); | ||
| 65 | 58 | ||
| 66 | if (sysctl_panic_on_stackoverflow) | 59 | per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE; |
| 67 | panic("low stack detected by irq handler - check messages\n"); | 60 | return 0; |
| 68 | #endif | ||
| 69 | } | 61 | } |
| 70 | 62 | #else | |
| 71 | bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) | 63 | /* |
| 64 | * If VMAP stacks are disabled due to KASAN, just use the per cpu | ||
| 65 | * backing store without guard pages. | ||
| 66 | */ | ||
| 67 | static int map_irq_stack(unsigned int cpu) | ||
| 72 | { | 68 | { |
| 73 | stack_overflow_check(regs); | 69 | void *va = per_cpu_ptr(&irq_stack_backing_store, cpu); |
| 74 | 70 | ||
| 75 | if (IS_ERR_OR_NULL(desc)) | 71 | per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE; |
| 76 | return false; | 72 | return 0; |
| 73 | } | ||
| 74 | #endif | ||
| 77 | 75 | ||
| 78 | generic_handle_irq_desc(desc); | 76 | int irq_init_percpu_irqstack(unsigned int cpu) |
| 79 | return true; | 77 | { |
| 78 | if (per_cpu(hardirq_stack_ptr, cpu)) | ||
| 79 | return 0; | ||
| 80 | return map_irq_stack(cpu); | ||
| 80 | } | 81 | } |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index a0693b71cfc1..16919a9671fa 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
| @@ -91,6 +91,8 @@ void __init init_IRQ(void) | |||
| 91 | for (i = 0; i < nr_legacy_irqs(); i++) | 91 | for (i = 0; i < nr_legacy_irqs(); i++) |
| 92 | per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i); | 92 | per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i); |
| 93 | 93 | ||
| 94 | BUG_ON(irq_init_percpu_irqstack(smp_processor_id())); | ||
| 95 | |||
| 94 | x86_init.irqs.intr_init(); | 96 | x86_init.irqs.intr_init(); |
| 95 | } | 97 | } |
| 96 | 98 | ||
| @@ -104,6 +106,4 @@ void __init native_init_IRQ(void) | |||
| 104 | 106 | ||
| 105 | if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) | 107 | if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) |
| 106 | setup_irq(2, &irq2); | 108 | setup_irq(2, &irq2); |
| 107 | |||
| 108 | irq_ctx_init(smp_processor_id()); | ||
| 109 | } | 109 | } |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 18bc9b51ac9b..3755d0310026 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
| @@ -21,13 +21,14 @@ | |||
| 21 | #include <linux/ratelimit.h> | 21 | #include <linux/ratelimit.h> |
| 22 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
| 23 | #include <linux/export.h> | 23 | #include <linux/export.h> |
| 24 | #include <linux/atomic.h> | ||
| 24 | #include <linux/sched/clock.h> | 25 | #include <linux/sched/clock.h> |
| 25 | 26 | ||
| 26 | #if defined(CONFIG_EDAC) | 27 | #if defined(CONFIG_EDAC) |
| 27 | #include <linux/edac.h> | 28 | #include <linux/edac.h> |
| 28 | #endif | 29 | #endif |
| 29 | 30 | ||
| 30 | #include <linux/atomic.h> | 31 | #include <asm/cpu_entry_area.h> |
| 31 | #include <asm/traps.h> | 32 | #include <asm/traps.h> |
| 32 | #include <asm/mach_traps.h> | 33 | #include <asm/mach_traps.h> |
| 33 | #include <asm/nmi.h> | 34 | #include <asm/nmi.h> |
| @@ -487,6 +488,23 @@ static DEFINE_PER_CPU(unsigned long, nmi_cr2); | |||
| 487 | * switch back to the original IDT. | 488 | * switch back to the original IDT. |
| 488 | */ | 489 | */ |
| 489 | static DEFINE_PER_CPU(int, update_debug_stack); | 490 | static DEFINE_PER_CPU(int, update_debug_stack); |
| 491 | |||
| 492 | static bool notrace is_debug_stack(unsigned long addr) | ||
| 493 | { | ||
| 494 | struct cea_exception_stacks *cs = __this_cpu_read(cea_exception_stacks); | ||
| 495 | unsigned long top = CEA_ESTACK_TOP(cs, DB); | ||
| 496 | unsigned long bot = CEA_ESTACK_BOT(cs, DB1); | ||
| 497 | |||
| 498 | if (__this_cpu_read(debug_stack_usage)) | ||
| 499 | return true; | ||
| 500 | /* | ||
| 501 | * Note, this covers the guard page between DB and DB1 as well to | ||
| 502 | * avoid two checks. But by all means @addr can never point into | ||
| 503 | * the guard page. | ||
| 504 | */ | ||
| 505 | return addr >= bot && addr < top; | ||
| 506 | } | ||
| 507 | NOKPROBE_SYMBOL(is_debug_stack); | ||
| 490 | #endif | 508 | #endif |
| 491 | 509 | ||
| 492 | dotraplinkage notrace void | 510 | dotraplinkage notrace void |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 4bf46575568a..86663874ef04 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
| @@ -244,11 +244,6 @@ void __init setup_per_cpu_areas(void) | |||
| 244 | per_cpu(x86_cpu_to_logical_apicid, cpu) = | 244 | per_cpu(x86_cpu_to_logical_apicid, cpu) = |
| 245 | early_per_cpu_map(x86_cpu_to_logical_apicid, cpu); | 245 | early_per_cpu_map(x86_cpu_to_logical_apicid, cpu); |
| 246 | #endif | 246 | #endif |
| 247 | #ifdef CONFIG_X86_64 | ||
| 248 | per_cpu(irq_stack_ptr, cpu) = | ||
| 249 | per_cpu(irq_stack_union.irq_stack, cpu) + | ||
| 250 | IRQ_STACK_SIZE; | ||
| 251 | #endif | ||
| 252 | #ifdef CONFIG_NUMA | 247 | #ifdef CONFIG_NUMA |
| 253 | per_cpu(x86_cpu_to_node_map, cpu) = | 248 | per_cpu(x86_cpu_to_node_map, cpu) = |
| 254 | early_per_cpu_map(x86_cpu_to_node_map, cpu); | 249 | early_per_cpu_map(x86_cpu_to_node_map, cpu); |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ce1a67b70168..c92b21f9e9dc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
| @@ -935,20 +935,27 @@ out: | |||
| 935 | return boot_error; | 935 | return boot_error; |
| 936 | } | 936 | } |
| 937 | 937 | ||
| 938 | void common_cpu_up(unsigned int cpu, struct task_struct *idle) | 938 | int common_cpu_up(unsigned int cpu, struct task_struct *idle) |
| 939 | { | 939 | { |
| 940 | int ret; | ||
| 941 | |||
| 940 | /* Just in case we booted with a single CPU. */ | 942 | /* Just in case we booted with a single CPU. */ |
| 941 | alternatives_enable_smp(); | 943 | alternatives_enable_smp(); |
| 942 | 944 | ||
| 943 | per_cpu(current_task, cpu) = idle; | 945 | per_cpu(current_task, cpu) = idle; |
| 944 | 946 | ||
| 947 | /* Initialize the interrupt stack(s) */ | ||
| 948 | ret = irq_init_percpu_irqstack(cpu); | ||
| 949 | if (ret) | ||
| 950 | return ret; | ||
| 951 | |||
| 945 | #ifdef CONFIG_X86_32 | 952 | #ifdef CONFIG_X86_32 |
| 946 | /* Stack for startup_32 can be just as for start_secondary onwards */ | 953 | /* Stack for startup_32 can be just as for start_secondary onwards */ |
| 947 | irq_ctx_init(cpu); | ||
| 948 | per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); | 954 | per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); |
| 949 | #else | 955 | #else |
| 950 | initial_gs = per_cpu_offset(cpu); | 956 | initial_gs = per_cpu_offset(cpu); |
| 951 | #endif | 957 | #endif |
| 958 | return 0; | ||
| 952 | } | 959 | } |
| 953 | 960 | ||
| 954 | /* | 961 | /* |
| @@ -1106,7 +1113,9 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
| 1106 | /* the FPU context is blank, nobody can own it */ | 1113 | /* the FPU context is blank, nobody can own it */ |
| 1107 | per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; | 1114 | per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; |
| 1108 | 1115 | ||
| 1109 | common_cpu_up(cpu, tidle); | 1116 | err = common_cpu_up(cpu, tidle); |
| 1117 | if (err) | ||
| 1118 | return err; | ||
| 1110 | 1119 | ||
| 1111 | err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered); | 1120 | err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered); |
| 1112 | if (err) { | 1121 | if (err) { |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index a5127b2c195f..4d1517022a14 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
| @@ -403,7 +403,8 @@ SECTIONS | |||
| 403 | */ | 403 | */ |
| 404 | #define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load | 404 | #define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load |
| 405 | INIT_PER_CPU(gdt_page); | 405 | INIT_PER_CPU(gdt_page); |
| 406 | INIT_PER_CPU(irq_stack_union); | 406 | INIT_PER_CPU(fixed_percpu_data); |
| 407 | INIT_PER_CPU(irq_stack_backing_store); | ||
| 407 | 408 | ||
| 408 | /* | 409 | /* |
| 409 | * Build-time check on the image size: | 410 | * Build-time check on the image size: |
| @@ -412,8 +413,8 @@ INIT_PER_CPU(irq_stack_union); | |||
| 412 | "kernel image bigger than KERNEL_IMAGE_SIZE"); | 413 | "kernel image bigger than KERNEL_IMAGE_SIZE"); |
| 413 | 414 | ||
| 414 | #ifdef CONFIG_SMP | 415 | #ifdef CONFIG_SMP |
| 415 | . = ASSERT((irq_stack_union == 0), | 416 | . = ASSERT((fixed_percpu_data == 0), |
| 416 | "irq_stack_union is not at start of per-cpu area"); | 417 | "fixed_percpu_data is not at start of per-cpu area"); |
| 417 | #endif | 418 | #endif |
| 418 | 419 | ||
| 419 | #endif /* CONFIG_X86_32 */ | 420 | #endif /* CONFIG_X86_32 */ |
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 19c6abf9ea31..752ad11d6868 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c | |||
| @@ -13,8 +13,8 @@ | |||
| 13 | static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); | 13 | static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); |
| 14 | 14 | ||
| 15 | #ifdef CONFIG_X86_64 | 15 | #ifdef CONFIG_X86_64 |
| 16 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | 16 | static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); |
| 17 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); | 17 | DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); |
| 18 | #endif | 18 | #endif |
| 19 | 19 | ||
| 20 | struct cpu_entry_area *get_cpu_entry_area(int cpu) | 20 | struct cpu_entry_area *get_cpu_entry_area(int cpu) |
| @@ -52,10 +52,10 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) | |||
| 52 | cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); | 52 | cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | static void __init percpu_setup_debug_store(int cpu) | 55 | static void __init percpu_setup_debug_store(unsigned int cpu) |
| 56 | { | 56 | { |
| 57 | #ifdef CONFIG_CPU_SUP_INTEL | 57 | #ifdef CONFIG_CPU_SUP_INTEL |
| 58 | int npages; | 58 | unsigned int npages; |
| 59 | void *cea; | 59 | void *cea; |
| 60 | 60 | ||
| 61 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | 61 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) |
| @@ -78,9 +78,43 @@ static void __init percpu_setup_debug_store(int cpu) | |||
| 78 | #endif | 78 | #endif |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | #ifdef CONFIG_X86_64 | ||
| 82 | |||
| 83 | #define cea_map_stack(name) do { \ | ||
| 84 | npages = sizeof(estacks->name## _stack) / PAGE_SIZE; \ | ||
| 85 | cea_map_percpu_pages(cea->estacks.name## _stack, \ | ||
| 86 | estacks->name## _stack, npages, PAGE_KERNEL); \ | ||
| 87 | } while (0) | ||
| 88 | |||
| 89 | static void __init percpu_setup_exception_stacks(unsigned int cpu) | ||
| 90 | { | ||
| 91 | struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu); | ||
| 92 | struct cpu_entry_area *cea = get_cpu_entry_area(cpu); | ||
| 93 | unsigned int npages; | ||
| 94 | |||
| 95 | BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); | ||
| 96 | |||
| 97 | per_cpu(cea_exception_stacks, cpu) = &cea->estacks; | ||
| 98 | |||
| 99 | /* | ||
| 100 | * The exceptions stack mappings in the per cpu area are protected | ||
| 101 | * by guard pages so each stack must be mapped separately. DB2 is | ||
| 102 | * not mapped; it just exists to catch triple nesting of #DB. | ||
| 103 | */ | ||
| 104 | cea_map_stack(DF); | ||
| 105 | cea_map_stack(NMI); | ||
| 106 | cea_map_stack(DB1); | ||
| 107 | cea_map_stack(DB); | ||
| 108 | cea_map_stack(MCE); | ||
| 109 | } | ||
| 110 | #else | ||
| 111 | static inline void percpu_setup_exception_stacks(unsigned int cpu) {} | ||
| 112 | #endif | ||
| 113 | |||
| 81 | /* Setup the fixmap mappings only once per-processor */ | 114 | /* Setup the fixmap mappings only once per-processor */ |
| 82 | static void __init setup_cpu_entry_area(int cpu) | 115 | static void __init setup_cpu_entry_area(unsigned int cpu) |
| 83 | { | 116 | { |
| 117 | struct cpu_entry_area *cea = get_cpu_entry_area(cpu); | ||
| 84 | #ifdef CONFIG_X86_64 | 118 | #ifdef CONFIG_X86_64 |
| 85 | /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ | 119 | /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ |
| 86 | pgprot_t gdt_prot = PAGE_KERNEL_RO; | 120 | pgprot_t gdt_prot = PAGE_KERNEL_RO; |
| @@ -101,10 +135,9 @@ static void __init setup_cpu_entry_area(int cpu) | |||
| 101 | pgprot_t tss_prot = PAGE_KERNEL; | 135 | pgprot_t tss_prot = PAGE_KERNEL; |
| 102 | #endif | 136 | #endif |
| 103 | 137 | ||
| 104 | cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu), | 138 | cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot); |
| 105 | gdt_prot); | ||
| 106 | 139 | ||
| 107 | cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page, | 140 | cea_map_percpu_pages(&cea->entry_stack_page, |
| 108 | per_cpu_ptr(&entry_stack_storage, cpu), 1, | 141 | per_cpu_ptr(&entry_stack_storage, cpu), 1, |
| 109 | PAGE_KERNEL); | 142 | PAGE_KERNEL); |
| 110 | 143 | ||
| @@ -128,22 +161,15 @@ static void __init setup_cpu_entry_area(int cpu) | |||
| 128 | BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ | 161 | BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ |
| 129 | offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); | 162 | offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); |
| 130 | BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); | 163 | BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); |
| 131 | cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss, | 164 | cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu), |
| 132 | &per_cpu(cpu_tss_rw, cpu), | ||
| 133 | sizeof(struct tss_struct) / PAGE_SIZE, tss_prot); | 165 | sizeof(struct tss_struct) / PAGE_SIZE, tss_prot); |
| 134 | 166 | ||
| 135 | #ifdef CONFIG_X86_32 | 167 | #ifdef CONFIG_X86_32 |
| 136 | per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); | 168 | per_cpu(cpu_entry_area, cpu) = cea; |
| 137 | #endif | 169 | #endif |
| 138 | 170 | ||
| 139 | #ifdef CONFIG_X86_64 | 171 | percpu_setup_exception_stacks(cpu); |
| 140 | BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); | 172 | |
| 141 | BUILD_BUG_ON(sizeof(exception_stacks) != | ||
| 142 | sizeof(((struct cpu_entry_area *)0)->exception_stacks)); | ||
| 143 | cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks, | ||
| 144 | &per_cpu(exception_stacks, cpu), | ||
| 145 | sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL); | ||
| 146 | #endif | ||
| 147 | percpu_setup_debug_store(cpu); | 173 | percpu_setup_debug_store(cpu); |
| 148 | } | 174 | } |
| 149 | 175 | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 667f1da36208..06c089513d39 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <asm/mmu_context.h> /* vma_pkey() */ | 28 | #include <asm/mmu_context.h> /* vma_pkey() */ |
| 29 | #include <asm/efi.h> /* efi_recover_from_page_fault()*/ | 29 | #include <asm/efi.h> /* efi_recover_from_page_fault()*/ |
| 30 | #include <asm/desc.h> /* store_idt(), ... */ | 30 | #include <asm/desc.h> /* store_idt(), ... */ |
| 31 | #include <asm/cpu_entry_area.h> /* exception stack */ | ||
| 31 | 32 | ||
| 32 | #define CREATE_TRACE_POINTS | 33 | #define CREATE_TRACE_POINTS |
| 33 | #include <asm/trace/exceptions.h> | 34 | #include <asm/trace/exceptions.h> |
| @@ -793,7 +794,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, | |||
| 793 | if (is_vmalloc_addr((void *)address) && | 794 | if (is_vmalloc_addr((void *)address) && |
| 794 | (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) || | 795 | (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) || |
| 795 | address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) { | 796 | address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) { |
| 796 | unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *); | 797 | unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *); |
| 797 | /* | 798 | /* |
| 798 | * We're likely to be running with very little stack space | 799 | * We're likely to be running with very little stack space |
| 799 | * left. It's plausible that we'd hit this condition but | 800 | * left. It's plausible that we'd hit this condition but |
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index f345586f5e50..ce7188cbdae5 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c | |||
| @@ -754,7 +754,7 @@ static void percpu_init(void) | |||
| 754 | * __per_cpu_load | 754 | * __per_cpu_load |
| 755 | * | 755 | * |
| 756 | * The "gold" linker incorrectly associates: | 756 | * The "gold" linker incorrectly associates: |
| 757 | * init_per_cpu__irq_stack_union | 757 | * init_per_cpu__fixed_percpu_data |
| 758 | * init_per_cpu__gdt_page | 758 | * init_per_cpu__gdt_page |
| 759 | */ | 759 | */ |
| 760 | static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) | 760 | static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) |
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 145506f9fdbe..590fcf863006 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c | |||
| @@ -361,7 +361,9 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle) | |||
| 361 | { | 361 | { |
| 362 | int rc; | 362 | int rc; |
| 363 | 363 | ||
| 364 | common_cpu_up(cpu, idle); | 364 | rc = common_cpu_up(cpu, idle); |
| 365 | if (rc) | ||
| 366 | return rc; | ||
| 365 | 367 | ||
| 366 | xen_setup_runstate_info(cpu); | 368 | xen_setup_runstate_info(cpu); |
| 367 | 369 | ||
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 5077ead5e59c..c1d8b90aa4e2 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
| @@ -40,13 +40,13 @@ ENTRY(startup_xen) | |||
| 40 | #ifdef CONFIG_X86_64 | 40 | #ifdef CONFIG_X86_64 |
| 41 | /* Set up %gs. | 41 | /* Set up %gs. |
| 42 | * | 42 | * |
| 43 | * The base of %gs always points to the bottom of the irqstack | 43 | * The base of %gs always points to fixed_percpu_data. If the |
| 44 | * union. If the stack protector canary is enabled, it is | 44 | * stack protector canary is enabled, it is located at %gs:40. |
| 45 | * located at %gs:40. Note that, on SMP, the boot cpu uses | 45 | * Note that, on SMP, the boot cpu uses init data section until |
| 46 | * init data section till per cpu areas are set up. | 46 | * the per cpu areas are set up. |
| 47 | */ | 47 | */ |
| 48 | movl $MSR_GS_BASE,%ecx | 48 | movl $MSR_GS_BASE,%ecx |
| 49 | movq $INIT_PER_CPU_VAR(irq_stack_union),%rax | 49 | movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax |
| 50 | cdq | 50 | cdq |
| 51 | wrmsr | 51 | wrmsr |
| 52 | #endif | 52 | #endif |
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 117e76b2f939..084e45882c73 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c | |||
| @@ -1687,7 +1687,6 @@ void __init xen_init_IRQ(void) | |||
| 1687 | 1687 | ||
| 1688 | #ifdef CONFIG_X86 | 1688 | #ifdef CONFIG_X86 |
| 1689 | if (xen_pv_domain()) { | 1689 | if (xen_pv_domain()) { |
| 1690 | irq_ctx_init(smp_processor_id()); | ||
| 1691 | if (xen_initial_domain()) | 1690 | if (xen_initial_domain()) |
| 1692 | pci_xen_initial_domain(); | 1691 | pci_xen_initial_domain(); |
| 1693 | } | 1692 | } |
| @@ -1467,53 +1467,17 @@ static bool is_debug_pagealloc_cache(struct kmem_cache *cachep) | |||
| 1467 | } | 1467 | } |
| 1468 | 1468 | ||
| 1469 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1469 | #ifdef CONFIG_DEBUG_PAGEALLOC |
| 1470 | static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, | 1470 | static void slab_kernel_map(struct kmem_cache *cachep, void *objp, int map) |
| 1471 | unsigned long caller) | ||
| 1472 | { | ||
| 1473 | int size = cachep->object_size; | ||
| 1474 | |||
| 1475 | addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)]; | ||
| 1476 | |||
| 1477 | if (size < 5 * sizeof(unsigned long)) | ||
| 1478 | return; | ||
| 1479 | |||
| 1480 | *addr++ = 0x12345678; | ||
| 1481 | *addr++ = caller; | ||
| 1482 | *addr++ = smp_processor_id(); | ||
| 1483 | size -= 3 * sizeof(unsigned long); | ||
| 1484 | { | ||
| 1485 | unsigned long *sptr = &caller; | ||
| 1486 | unsigned long svalue; | ||
| 1487 | |||
| 1488 | while (!kstack_end(sptr)) { | ||
| 1489 | svalue = *sptr++; | ||
| 1490 | if (kernel_text_address(svalue)) { | ||
| 1491 | *addr++ = svalue; | ||
| 1492 | size -= sizeof(unsigned long); | ||
| 1493 | if (size <= sizeof(unsigned long)) | ||
| 1494 | break; | ||
| 1495 | } | ||
| 1496 | } | ||
| 1497 | |||
| 1498 | } | ||
| 1499 | *addr++ = 0x87654321; | ||
| 1500 | } | ||
| 1501 | |||
| 1502 | static void slab_kernel_map(struct kmem_cache *cachep, void *objp, | ||
| 1503 | int map, unsigned long caller) | ||
| 1504 | { | 1471 | { |
| 1505 | if (!is_debug_pagealloc_cache(cachep)) | 1472 | if (!is_debug_pagealloc_cache(cachep)) |
| 1506 | return; | 1473 | return; |
| 1507 | 1474 | ||
| 1508 | if (caller) | ||
| 1509 | store_stackinfo(cachep, objp, caller); | ||
| 1510 | |||
| 1511 | kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map); | 1475 | kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map); |
| 1512 | } | 1476 | } |
| 1513 | 1477 | ||
| 1514 | #else | 1478 | #else |
| 1515 | static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp, | 1479 | static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp, |
| 1516 | int map, unsigned long caller) {} | 1480 | int map) {} |
| 1517 | 1481 | ||
| 1518 | #endif | 1482 | #endif |
| 1519 | 1483 | ||
| @@ -1661,7 +1625,7 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, | |||
| 1661 | 1625 | ||
| 1662 | if (cachep->flags & SLAB_POISON) { | 1626 | if (cachep->flags & SLAB_POISON) { |
| 1663 | check_poison_obj(cachep, objp); | 1627 | check_poison_obj(cachep, objp); |
| 1664 | slab_kernel_map(cachep, objp, 1, 0); | 1628 | slab_kernel_map(cachep, objp, 1); |
| 1665 | } | 1629 | } |
| 1666 | if (cachep->flags & SLAB_RED_ZONE) { | 1630 | if (cachep->flags & SLAB_RED_ZONE) { |
| 1667 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) | 1631 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE) |
| @@ -2433,7 +2397,7 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page) | |||
| 2433 | /* need to poison the objs? */ | 2397 | /* need to poison the objs? */ |
| 2434 | if (cachep->flags & SLAB_POISON) { | 2398 | if (cachep->flags & SLAB_POISON) { |
| 2435 | poison_obj(cachep, objp, POISON_FREE); | 2399 | poison_obj(cachep, objp, POISON_FREE); |
| 2436 | slab_kernel_map(cachep, objp, 0, 0); | 2400 | slab_kernel_map(cachep, objp, 0); |
| 2437 | } | 2401 | } |
| 2438 | } | 2402 | } |
| 2439 | #endif | 2403 | #endif |
| @@ -2812,7 +2776,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
| 2812 | 2776 | ||
| 2813 | if (cachep->flags & SLAB_POISON) { | 2777 | if (cachep->flags & SLAB_POISON) { |
| 2814 | poison_obj(cachep, objp, POISON_FREE); | 2778 | poison_obj(cachep, objp, POISON_FREE); |
| 2815 | slab_kernel_map(cachep, objp, 0, caller); | 2779 | slab_kernel_map(cachep, objp, 0); |
| 2816 | } | 2780 | } |
| 2817 | return objp; | 2781 | return objp; |
| 2818 | } | 2782 | } |
| @@ -3076,7 +3040,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | |||
| 3076 | return objp; | 3040 | return objp; |
| 3077 | if (cachep->flags & SLAB_POISON) { | 3041 | if (cachep->flags & SLAB_POISON) { |
| 3078 | check_poison_obj(cachep, objp); | 3042 | check_poison_obj(cachep, objp); |
| 3079 | slab_kernel_map(cachep, objp, 1, 0); | 3043 | slab_kernel_map(cachep, objp, 1); |
| 3080 | poison_obj(cachep, objp, POISON_INUSE); | 3044 | poison_obj(cachep, objp, POISON_INUSE); |
| 3081 | } | 3045 | } |
| 3082 | if (cachep->flags & SLAB_STORE_USER) | 3046 | if (cachep->flags & SLAB_STORE_USER) |
