aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2019-04-14 11:59:57 -0400
committerBorislav Petkov <bp@suse.de>2019-04-17 09:14:28 -0400
commit2a594d4ccf3f10f80b77d71bd3dad10813ac0137 (patch)
tree9cd3da8abe070c47436bf0217a0cb8ee9720f7ad
parent1bdb67e5aa2d5d43c48cb7d93393fcba276c9e71 (diff)
x86/exceptions: Split debug IST stack
The debug IST stack is actually two separate debug stacks to handle #DB recursion. This is required because the CPU starts always at top of stack on exception entry, which means on #DB recursion the second #DB would overwrite the stack of the first. The low level entry code therefore adjusts the top of stack on entry so a secondary #DB starts from a different stack page. But the stack pages are adjacent without a guard page between them. Split the debug stack into 3 stacks which are separated by guard pages. The 3rd stack is never mapped into the cpu_entry_area and is only there to catch triple #DB nesting: --- top of DB_stack <- Initial stack --- end of DB_stack guard page --- top of DB1_stack <- Top of stack after entering first #DB --- end of DB1_stack guard page --- top of DB2_stack <- Top of stack after entering second #DB --- end of DB2_stack guard page If DB2 would not act as the final guard hole, a second #DB would point the top of #DB stack to the stack below #DB1 which would be valid and not catch the not so desired triple nesting. The backing store does not allocate any memory for DB2 and its guard page as it is not going to be mapped into the cpu_entry_area. - Adjust the low level entry code so it adjusts top of #DB with the offset between the stacks instead of exception stack size. - Make the dumpstack code aware of the new stacks. - Adjust the in_debug_stack() implementation and move it into the NMI code where it belongs. As this is NMI hotpath code, it just checks the full area between top of DB_stack and bottom of DB1_stack without checking for the guard page. That's correct because the NMI cannot hit a stackpointer pointing to the guard page between DB and DB1 stack. Even if it would, then the NMI operation still is unaffected, but the resume of the debug exception on the topmost DB stack will crash by touching the guard page. [ bp: Make exception_stack_names static const char * const ] Suggested-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Borislav Petkov <bp@suse.de> Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Baoquan He <bhe@redhat.com> Cc: "Chang S. Bae" <chang.seok.bae@intel.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dominik Brodowski <linux@dominikbrodowski.net> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Joerg Roedel <jroedel@suse.de> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: linux-doc@vger.kernel.org Cc: Masahiro Yamada <yamada.masahiro@socionext.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Qian Cai <cai@lca.pw> Cc: Sean Christopherson <sean.j.christopherson@intel.com> Cc: x86-ml <x86@kernel.org> Link: https://lkml.kernel.org/r/20190414160145.439944544@linutronix.de
-rw-r--r--Documentation/x86/kernel-stacks7
-rw-r--r--arch/x86/entry/entry_64.S8
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h14
-rw-r--r--arch/x86/include/asm/debugreg.h2
-rw-r--r--arch/x86/include/asm/page_64_types.h3
-rw-r--r--arch/x86/kernel/asm-offsets_64.c2
-rw-r--r--arch/x86/kernel/cpu/common.c11
-rw-r--r--arch/x86/kernel/dumpstack_64.c12
-rw-r--r--arch/x86/kernel/nmi.c20
-rw-r--r--arch/x86/mm/cpu_entry_area.c4
10 files changed, 52 insertions, 31 deletions
diff --git a/Documentation/x86/kernel-stacks b/Documentation/x86/kernel-stacks
index 1b04596caea9..d1bfb0b95ee0 100644
--- a/Documentation/x86/kernel-stacks
+++ b/Documentation/x86/kernel-stacks
@@ -76,7 +76,7 @@ The currently assigned IST stacks are :-
76 middle of switching stacks. Using IST for NMI events avoids making 76 middle of switching stacks. Using IST for NMI events avoids making
77 assumptions about the previous state of the kernel stack. 77 assumptions about the previous state of the kernel stack.
78 78
79* ESTACK_DB. DEBUG_STKSZ 79* ESTACK_DB. EXCEPTION_STKSZ (PAGE_SIZE).
80 80
81 Used for hardware debug interrupts (interrupt 1) and for software 81 Used for hardware debug interrupts (interrupt 1) and for software
82 debug interrupts (INT3). 82 debug interrupts (INT3).
@@ -86,6 +86,11 @@ The currently assigned IST stacks are :-
86 avoids making assumptions about the previous state of the kernel 86 avoids making assumptions about the previous state of the kernel
87 stack. 87 stack.
88 88
89 To handle nested #DB correctly there exist two instances of DB stacks. On
90 #DB entry the IST stackpointer for #DB is switched to the second instance
91 so a nested #DB starts from a clean stack. The nested #DB switches
92 the IST stackpointer to a guard hole to catch triple nesting.
93
89* ESTACK_MCE. EXCEPTION_STKSZ (PAGE_SIZE). 94* ESTACK_MCE. EXCEPTION_STKSZ (PAGE_SIZE).
90 95
91 Used for interrupt 18 - Machine Check Exception (#MC). 96 Used for interrupt 18 - Machine Check Exception (#MC).
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 5c0348504a4b..ee649f1f279e 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -879,7 +879,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
879 * @paranoid == 2 is special: the stub will never switch stacks. This is for 879 * @paranoid == 2 is special: the stub will never switch stacks. This is for
880 * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS. 880 * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
881 */ 881 */
882.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 882.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0
883ENTRY(\sym) 883ENTRY(\sym)
884 UNWIND_HINT_IRET_REGS offset=\has_error_code*8 884 UNWIND_HINT_IRET_REGS offset=\has_error_code*8
885 885
@@ -925,13 +925,13 @@ ENTRY(\sym)
925 .endif 925 .endif
926 926
927 .if \shift_ist != -1 927 .if \shift_ist != -1
928 subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) 928 subq $\ist_offset, CPU_TSS_IST(\shift_ist)
929 .endif 929 .endif
930 930
931 call \do_sym 931 call \do_sym
932 932
933 .if \shift_ist != -1 933 .if \shift_ist != -1
934 addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist) 934 addq $\ist_offset, CPU_TSS_IST(\shift_ist)
935 .endif 935 .endif
936 936
937 /* these procedures expect "no swapgs" flag in ebx */ 937 /* these procedures expect "no swapgs" flag in ebx */
@@ -1129,7 +1129,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
1129 hv_stimer0_callback_vector hv_stimer0_vector_handler 1129 hv_stimer0_callback_vector hv_stimer0_vector_handler
1130#endif /* CONFIG_HYPERV */ 1130#endif /* CONFIG_HYPERV */
1131 1131
1132idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB 1132idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
1133idtentry int3 do_int3 has_error_code=0 1133idtentry int3 do_int3 has_error_code=0
1134idtentry stack_segment do_stack_segment has_error_code=1 1134idtentry stack_segment do_stack_segment has_error_code=1
1135 1135
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 9c96406e6d2b..cff3f3f3bfe0 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -10,25 +10,29 @@
10#ifdef CONFIG_X86_64 10#ifdef CONFIG_X86_64
11 11
12/* Macro to enforce the same ordering and stack sizes */ 12/* Macro to enforce the same ordering and stack sizes */
13#define ESTACKS_MEMBERS(guardsize) \ 13#define ESTACKS_MEMBERS(guardsize, db2_holesize)\
14 char DF_stack_guard[guardsize]; \ 14 char DF_stack_guard[guardsize]; \
15 char DF_stack[EXCEPTION_STKSZ]; \ 15 char DF_stack[EXCEPTION_STKSZ]; \
16 char NMI_stack_guard[guardsize]; \ 16 char NMI_stack_guard[guardsize]; \
17 char NMI_stack[EXCEPTION_STKSZ]; \ 17 char NMI_stack[EXCEPTION_STKSZ]; \
18 char DB2_stack_guard[guardsize]; \
19 char DB2_stack[db2_holesize]; \
20 char DB1_stack_guard[guardsize]; \
21 char DB1_stack[EXCEPTION_STKSZ]; \
18 char DB_stack_guard[guardsize]; \ 22 char DB_stack_guard[guardsize]; \
19 char DB_stack[DEBUG_STKSZ]; \ 23 char DB_stack[EXCEPTION_STKSZ]; \
20 char MCE_stack_guard[guardsize]; \ 24 char MCE_stack_guard[guardsize]; \
21 char MCE_stack[EXCEPTION_STKSZ]; \ 25 char MCE_stack[EXCEPTION_STKSZ]; \
22 char IST_top_guard[guardsize]; \ 26 char IST_top_guard[guardsize]; \
23 27
24/* The exception stacks' physical storage. No guard pages required */ 28/* The exception stacks' physical storage. No guard pages required */
25struct exception_stacks { 29struct exception_stacks {
26 ESTACKS_MEMBERS(0) 30 ESTACKS_MEMBERS(0, 0)
27}; 31};
28 32
29/* The effective cpu entry area mapping with guard pages. */ 33/* The effective cpu entry area mapping with guard pages. */
30struct cea_exception_stacks { 34struct cea_exception_stacks {
31 ESTACKS_MEMBERS(PAGE_SIZE) 35 ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
32}; 36};
33 37
34/* 38/*
@@ -37,6 +41,8 @@ struct cea_exception_stacks {
37enum exception_stack_ordering { 41enum exception_stack_ordering {
38 ESTACK_DF, 42 ESTACK_DF,
39 ESTACK_NMI, 43 ESTACK_NMI,
44 ESTACK_DB2,
45 ESTACK_DB1,
40 ESTACK_DB, 46 ESTACK_DB,
41 ESTACK_MCE, 47 ESTACK_MCE,
42 N_EXCEPTION_STACKS 48 N_EXCEPTION_STACKS
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 9e5ca30738e5..1a8609a15856 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -104,11 +104,9 @@ static inline void debug_stack_usage_dec(void)
104{ 104{
105 __this_cpu_dec(debug_stack_usage); 105 __this_cpu_dec(debug_stack_usage);
106} 106}
107int is_debug_stack(unsigned long addr);
108void debug_stack_set_zero(void); 107void debug_stack_set_zero(void);
109void debug_stack_reset(void); 108void debug_stack_reset(void);
110#else /* !X86_64 */ 109#else /* !X86_64 */
111static inline int is_debug_stack(unsigned long addr) { return 0; }
112static inline void debug_stack_set_zero(void) { } 110static inline void debug_stack_set_zero(void) { }
113static inline void debug_stack_reset(void) { } 111static inline void debug_stack_reset(void) { }
114static inline void debug_stack_usage_inc(void) { } 112static inline void debug_stack_usage_inc(void) { }
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 056de887b220..793c14c372cb 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -18,9 +18,6 @@
18#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER) 18#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
19#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) 19#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
20 20
21#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
22#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
23
24#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER) 21#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
25#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) 22#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
26 23
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index ddced33184b5..f5281567e28e 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -68,6 +68,8 @@ int main(void)
68#undef ENTRY 68#undef ENTRY
69 69
70 OFFSET(TSS_ist, tss_struct, x86_tss.ist); 70 OFFSET(TSS_ist, tss_struct, x86_tss.ist);
71 DEFINE(DB_STACK_OFFSET, offsetof(struct cea_exception_stacks, DB_stack) -
72 offsetof(struct cea_exception_stacks, DB1_stack));
71 BLANK(); 73 BLANK();
72 74
73#ifdef CONFIG_STACKPROTECTOR 75#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 143aceaf9a9a..88cab45707a9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1549,17 +1549,7 @@ void syscall_init(void)
1549 X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT); 1549 X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
1550} 1550}
1551 1551
1552static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
1553DEFINE_PER_CPU(int, debug_stack_usage); 1552DEFINE_PER_CPU(int, debug_stack_usage);
1554
1555int is_debug_stack(unsigned long addr)
1556{
1557 return __this_cpu_read(debug_stack_usage) ||
1558 (addr <= __this_cpu_read(debug_stack_addr) &&
1559 addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ));
1560}
1561NOKPROBE_SYMBOL(is_debug_stack);
1562
1563DEFINE_PER_CPU(u32, debug_idt_ctr); 1553DEFINE_PER_CPU(u32, debug_idt_ctr);
1564 1554
1565void debug_stack_set_zero(void) 1555void debug_stack_set_zero(void)
@@ -1735,7 +1725,6 @@ void cpu_init(void)
1735 t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI); 1725 t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
1736 t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB); 1726 t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
1737 t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE); 1727 t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
1738 per_cpu(debug_stack_addr, cpu) = t->x86_tss.ist[IST_INDEX_DB];
1739 } 1728 }
1740 1729
1741 t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; 1730 t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index f6fbd0438f9e..fca97bd3d8ae 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -19,16 +19,18 @@
19#include <asm/cpu_entry_area.h> 19#include <asm/cpu_entry_area.h>
20#include <asm/stacktrace.h> 20#include <asm/stacktrace.h>
21 21
22static const char *exception_stack_names[N_EXCEPTION_STACKS] = { 22static const char * const exception_stack_names[] = {
23 [ ESTACK_DF ] = "#DF", 23 [ ESTACK_DF ] = "#DF",
24 [ ESTACK_NMI ] = "NMI", 24 [ ESTACK_NMI ] = "NMI",
25 [ ESTACK_DB2 ] = "#DB2",
26 [ ESTACK_DB1 ] = "#DB1",
25 [ ESTACK_DB ] = "#DB", 27 [ ESTACK_DB ] = "#DB",
26 [ ESTACK_MCE ] = "#MC", 28 [ ESTACK_MCE ] = "#MC",
27}; 29};
28 30
29const char *stack_type_name(enum stack_type type) 31const char *stack_type_name(enum stack_type type)
30{ 32{
31 BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); 33 BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
32 34
33 if (type == STACK_TYPE_IRQ) 35 if (type == STACK_TYPE_IRQ)
34 return "IRQ"; 36 return "IRQ";
@@ -58,9 +60,11 @@ struct estack_layout {
58 .end = offsetof(struct cea_exception_stacks, x## _stack_guard) \ 60 .end = offsetof(struct cea_exception_stacks, x## _stack_guard) \
59 } 61 }
60 62
61static const struct estack_layout layout[N_EXCEPTION_STACKS] = { 63static const struct estack_layout layout[] = {
62 [ ESTACK_DF ] = ESTACK_ENTRY(DF), 64 [ ESTACK_DF ] = ESTACK_ENTRY(DF),
63 [ ESTACK_NMI ] = ESTACK_ENTRY(NMI), 65 [ ESTACK_NMI ] = ESTACK_ENTRY(NMI),
66 [ ESTACK_DB2 ] = { .begin = 0, .end = 0},
67 [ ESTACK_DB1 ] = ESTACK_ENTRY(DB1),
64 [ ESTACK_DB ] = ESTACK_ENTRY(DB), 68 [ ESTACK_DB ] = ESTACK_ENTRY(DB),
65 [ ESTACK_MCE ] = ESTACK_ENTRY(MCE), 69 [ ESTACK_MCE ] = ESTACK_ENTRY(MCE),
66}; 70};
@@ -71,7 +75,7 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
71 struct pt_regs *regs; 75 struct pt_regs *regs;
72 unsigned int k; 76 unsigned int k;
73 77
74 BUILD_BUG_ON(N_EXCEPTION_STACKS != 4); 78 BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
75 79
76 estacks = (unsigned long)__this_cpu_read(cea_exception_stacks); 80 estacks = (unsigned long)__this_cpu_read(cea_exception_stacks);
77 81
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 18bc9b51ac9b..3755d0310026 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -21,13 +21,14 @@
21#include <linux/ratelimit.h> 21#include <linux/ratelimit.h>
22#include <linux/slab.h> 22#include <linux/slab.h>
23#include <linux/export.h> 23#include <linux/export.h>
24#include <linux/atomic.h>
24#include <linux/sched/clock.h> 25#include <linux/sched/clock.h>
25 26
26#if defined(CONFIG_EDAC) 27#if defined(CONFIG_EDAC)
27#include <linux/edac.h> 28#include <linux/edac.h>
28#endif 29#endif
29 30
30#include <linux/atomic.h> 31#include <asm/cpu_entry_area.h>
31#include <asm/traps.h> 32#include <asm/traps.h>
32#include <asm/mach_traps.h> 33#include <asm/mach_traps.h>
33#include <asm/nmi.h> 34#include <asm/nmi.h>
@@ -487,6 +488,23 @@ static DEFINE_PER_CPU(unsigned long, nmi_cr2);
487 * switch back to the original IDT. 488 * switch back to the original IDT.
488 */ 489 */
489static DEFINE_PER_CPU(int, update_debug_stack); 490static DEFINE_PER_CPU(int, update_debug_stack);
491
492static bool notrace is_debug_stack(unsigned long addr)
493{
494 struct cea_exception_stacks *cs = __this_cpu_read(cea_exception_stacks);
495 unsigned long top = CEA_ESTACK_TOP(cs, DB);
496 unsigned long bot = CEA_ESTACK_BOT(cs, DB1);
497
498 if (__this_cpu_read(debug_stack_usage))
499 return true;
500 /*
501 * Note, this covers the guard page between DB and DB1 as well to
502 * avoid two checks. But by all means @addr can never point into
503 * the guard page.
504 */
505 return addr >= bot && addr < top;
506}
507NOKPROBE_SYMBOL(is_debug_stack);
490#endif 508#endif
491 509
492dotraplinkage notrace void 510dotraplinkage notrace void
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index a00d0d059c8a..752ad11d6868 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -98,10 +98,12 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu)
98 98
99 /* 99 /*
100 * The exceptions stack mappings in the per cpu area are protected 100 * The exceptions stack mappings in the per cpu area are protected
101 * by guard pages so each stack must be mapped separately. 101 * by guard pages so each stack must be mapped separately. DB2 is
102 * not mapped; it just exists to catch triple nesting of #DB.
102 */ 103 */
103 cea_map_stack(DF); 104 cea_map_stack(DF);
104 cea_map_stack(NMI); 105 cea_map_stack(NMI);
106 cea_map_stack(DB1);
105 cea_map_stack(DB); 107 cea_map_stack(DB);
106 cea_map_stack(MCE); 108 cea_map_stack(MCE);
107} 109}