diff options
author | Jan Beulich <jbeulich@novell.com> | 2006-01-11 16:43:00 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-11 22:01:13 -0500 |
commit | b556b35e98ad2b9174a7a623d152cdf412d1a999 (patch) | |
tree | 10f99d70e719fba811dee85d2f3a82b26d7a4ce8 | |
parent | ed8388a5d9db0445322f86ee8381b0f04a2057ee (diff) |
[PATCH] x86_64: Move int 3 handler to debug stack and allow to increase it.
This
- switches the INT3 handler to run on an IST stack (to cope with
breakpoints set by a kernel debugger on places where the kernel's
%gs base hasn't been set up, yet); the IST stack used is shared with
the INT1 handler's
[AK: this also allows setting a kprobe on the interrupt/exception entry
points]
- allows nesting of INT1/INT3 handlers so that one can, with a kernel
debugger, debug (at least) the user-mode portions of the INT1/INT3
handling; the nesting isn't actively enabled here since a kernel-
debugger-free kernel doesn't need it
Signed-Off-By: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/x86_64/kernel/asm-offsets.c | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/entry.S | 26 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup64.c | 22 | ||||
-rw-r--r-- | arch/x86_64/kernel/traps.c | 35 | ||||
-rw-r--r-- | include/asm-x86_64/desc.h | 5 | ||||
-rw-r--r-- | include/asm-x86_64/page.h | 15 | ||||
-rw-r--r-- | include/asm-x86_64/pda.h | 8 | ||||
-rw-r--r-- | include/asm-x86_64/processor.h | 2 |
8 files changed, 95 insertions, 22 deletions
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c index aaa6d3833517..00a08d13230a 100644 --- a/arch/x86_64/kernel/asm-offsets.c +++ b/arch/x86_64/kernel/asm-offsets.c | |||
@@ -64,5 +64,9 @@ int main(void) | |||
64 | DEFINE(pbe_address, offsetof(struct pbe, address)); | 64 | DEFINE(pbe_address, offsetof(struct pbe, address)); |
65 | DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); | 65 | DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); |
66 | DEFINE(pbe_next, offsetof(struct pbe, next)); | 66 | DEFINE(pbe_next, offsetof(struct pbe, next)); |
67 | BLANK(); | ||
68 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
69 | DEFINE(DEBUG_IST, DEBUG_STACK); | ||
70 | #endif | ||
67 | return 0; | 71 | return 0; |
68 | } | 72 | } |
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index bd21ebafaf66..28b3c8e6b044 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
@@ -673,7 +673,10 @@ ENTRY(spurious_interrupt) | |||
673 | 673 | ||
674 | /* error code is on the stack already */ | 674 | /* error code is on the stack already */ |
675 | /* handle NMI like exceptions that can happen everywhere */ | 675 | /* handle NMI like exceptions that can happen everywhere */ |
676 | .macro paranoidentry sym | 676 | #ifndef DEBUG_IST |
677 | # define DEBUG_IST 0 | ||
678 | #endif | ||
679 | .macro paranoidentry sym, ist=0 | ||
677 | SAVE_ALL | 680 | SAVE_ALL |
678 | cld | 681 | cld |
679 | movl $1,%ebx | 682 | movl $1,%ebx |
@@ -683,10 +686,20 @@ ENTRY(spurious_interrupt) | |||
683 | js 1f | 686 | js 1f |
684 | swapgs | 687 | swapgs |
685 | xorl %ebx,%ebx | 688 | xorl %ebx,%ebx |
686 | 1: movq %rsp,%rdi | 689 | 1: |
690 | .if \ist | ||
691 | movq %gs:pda_data_offset, %rbp | ||
692 | .endif | ||
693 | movq %rsp,%rdi | ||
687 | movq ORIG_RAX(%rsp),%rsi | 694 | movq ORIG_RAX(%rsp),%rsi |
688 | movq $-1,ORIG_RAX(%rsp) | 695 | movq $-1,ORIG_RAX(%rsp) |
696 | .if \ist | ||
697 | subq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
698 | .endif | ||
689 | call \sym | 699 | call \sym |
700 | .if \ist | ||
701 | addq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
702 | .endif | ||
690 | cli | 703 | cli |
691 | .endm | 704 | .endm |
692 | 705 | ||
@@ -904,7 +917,7 @@ KPROBE_ENTRY(debug) | |||
904 | INTR_FRAME | 917 | INTR_FRAME |
905 | pushq $0 | 918 | pushq $0 |
906 | CFI_ADJUST_CFA_OFFSET 8 | 919 | CFI_ADJUST_CFA_OFFSET 8 |
907 | paranoidentry do_debug | 920 | paranoidentry do_debug, DEBUG_IST |
908 | jmp paranoid_exit | 921 | jmp paranoid_exit |
909 | CFI_ENDPROC | 922 | CFI_ENDPROC |
910 | .previous .text | 923 | .previous .text |
@@ -959,7 +972,12 @@ paranoid_schedule: | |||
959 | CFI_ENDPROC | 972 | CFI_ENDPROC |
960 | 973 | ||
961 | KPROBE_ENTRY(int3) | 974 | KPROBE_ENTRY(int3) |
962 | zeroentry do_int3 | 975 | INTR_FRAME |
976 | pushq $0 | ||
977 | CFI_ADJUST_CFA_OFFSET 8 | ||
978 | paranoidentry do_int3, DEBUG_IST | ||
979 | jmp paranoid_exit | ||
980 | CFI_ENDPROC | ||
963 | .previous .text | 981 | .previous .text |
964 | 982 | ||
965 | ENTRY(overflow) | 983 | ENTRY(overflow) |
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 7b7131d8bc90..39e728cfe6a0 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c | |||
@@ -145,7 +145,7 @@ void pda_init(int cpu) | |||
145 | pda->irqstackptr += IRQSTACKSIZE-64; | 145 | pda->irqstackptr += IRQSTACKSIZE-64; |
146 | } | 146 | } |
147 | 147 | ||
148 | char boot_exception_stacks[N_EXCEPTION_STACKS * EXCEPTION_STKSZ] | 148 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 2) * EXCEPTION_STKSZ + DEBUG_STKSZ] |
149 | __attribute__((section(".bss.page_aligned"))); | 149 | __attribute__((section(".bss.page_aligned"))); |
150 | 150 | ||
151 | /* May not be marked __init: used by software suspend */ | 151 | /* May not be marked __init: used by software suspend */ |
@@ -236,13 +236,27 @@ void __cpuinit cpu_init (void) | |||
236 | */ | 236 | */ |
237 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | 237 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { |
238 | if (cpu) { | 238 | if (cpu) { |
239 | estacks = (char *)__get_free_pages(GFP_ATOMIC, | 239 | static const unsigned int order[N_EXCEPTION_STACKS] = { |
240 | EXCEPTION_STACK_ORDER); | 240 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, |
241 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | ||
242 | }; | ||
243 | |||
244 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); | ||
241 | if (!estacks) | 245 | if (!estacks) |
242 | panic("Cannot allocate exception stack %ld %d\n", | 246 | panic("Cannot allocate exception stack %ld %d\n", |
243 | v, cpu); | 247 | v, cpu); |
244 | } | 248 | } |
245 | estacks += EXCEPTION_STKSZ; | 249 | switch (v + 1) { |
250 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
251 | case DEBUG_STACK: | ||
252 | cpu_pda[cpu].debugstack = (unsigned long)estacks; | ||
253 | estacks += DEBUG_STKSZ; | ||
254 | break; | ||
255 | #endif | ||
256 | default: | ||
257 | estacks += EXCEPTION_STKSZ; | ||
258 | break; | ||
259 | } | ||
246 | t->ist[v] = (unsigned long)estacks; | 260 | t->ist[v] = (unsigned long)estacks; |
247 | } | 261 | } |
248 | 262 | ||
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index bd71ddac0dc5..1a9094dab682 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c | |||
@@ -121,19 +121,31 @@ int printk_address(unsigned long address) | |||
121 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 121 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
122 | unsigned *usedp, const char **idp) | 122 | unsigned *usedp, const char **idp) |
123 | { | 123 | { |
124 | static const char ids[N_EXCEPTION_STACKS][8] = { | 124 | static char ids[][8] = { |
125 | [DEBUG_STACK - 1] = "#DB", | 125 | [DEBUG_STACK - 1] = "#DB", |
126 | [NMI_STACK - 1] = "NMI", | 126 | [NMI_STACK - 1] = "NMI", |
127 | [DOUBLEFAULT_STACK - 1] = "#DF", | 127 | [DOUBLEFAULT_STACK - 1] = "#DF", |
128 | [STACKFAULT_STACK - 1] = "#SS", | 128 | [STACKFAULT_STACK - 1] = "#SS", |
129 | [MCE_STACK - 1] = "#MC", | 129 | [MCE_STACK - 1] = "#MC", |
130 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
131 | [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | ||
132 | #endif | ||
130 | }; | 133 | }; |
131 | unsigned k; | 134 | unsigned k; |
132 | 135 | ||
133 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { | 136 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { |
134 | unsigned long end; | 137 | unsigned long end; |
135 | 138 | ||
136 | end = per_cpu(init_tss, cpu).ist[k]; | 139 | switch (k + 1) { |
140 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
141 | case DEBUG_STACK: | ||
142 | end = cpu_pda[cpu].debugstack + DEBUG_STKSZ; | ||
143 | break; | ||
144 | #endif | ||
145 | default: | ||
146 | end = per_cpu(init_tss, cpu).ist[k]; | ||
147 | break; | ||
148 | } | ||
137 | if (stack >= end) | 149 | if (stack >= end) |
138 | continue; | 150 | continue; |
139 | if (stack >= end - EXCEPTION_STKSZ) { | 151 | if (stack >= end - EXCEPTION_STKSZ) { |
@@ -143,6 +155,22 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
143 | *idp = ids[k]; | 155 | *idp = ids[k]; |
144 | return (unsigned long *)end; | 156 | return (unsigned long *)end; |
145 | } | 157 | } |
158 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
159 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { | ||
160 | unsigned j = N_EXCEPTION_STACKS - 1; | ||
161 | |||
162 | do { | ||
163 | ++j; | ||
164 | end -= EXCEPTION_STKSZ; | ||
165 | ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); | ||
166 | } while (stack < end - EXCEPTION_STKSZ); | ||
167 | if (*usedp & (1U << j)) | ||
168 | break; | ||
169 | *usedp |= 1U << j; | ||
170 | *idp = ids[j]; | ||
171 | return (unsigned long *)end; | ||
172 | } | ||
173 | #endif | ||
146 | } | 174 | } |
147 | return NULL; | 175 | return NULL; |
148 | } | 176 | } |
@@ -613,6 +641,7 @@ asmlinkage void default_do_nmi(struct pt_regs *regs) | |||
613 | io_check_error(reason, regs); | 641 | io_check_error(reason, regs); |
614 | } | 642 | } |
615 | 643 | ||
644 | /* runs on IST stack. */ | ||
616 | asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) | 645 | asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) |
617 | { | 646 | { |
618 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { | 647 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { |
@@ -894,7 +923,7 @@ void __init trap_init(void) | |||
894 | set_intr_gate(0,÷_error); | 923 | set_intr_gate(0,÷_error); |
895 | set_intr_gate_ist(1,&debug,DEBUG_STACK); | 924 | set_intr_gate_ist(1,&debug,DEBUG_STACK); |
896 | set_intr_gate_ist(2,&nmi,NMI_STACK); | 925 | set_intr_gate_ist(2,&nmi,NMI_STACK); |
897 | set_system_gate(3,&int3); | 926 | set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */ |
898 | set_system_gate(4,&overflow); /* int4 can be called from all */ | 927 | set_system_gate(4,&overflow); /* int4 can be called from all */ |
899 | set_intr_gate(5,&bounds); | 928 | set_intr_gate(5,&bounds); |
900 | set_intr_gate(6,&invalid_op); | 929 | set_intr_gate(6,&invalid_op); |
diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h index 33764869387b..5ce0e34f9a0c 100644 --- a/include/asm-x86_64/desc.h +++ b/include/asm-x86_64/desc.h | |||
@@ -114,6 +114,11 @@ static inline void set_system_gate(int nr, void *func) | |||
114 | _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); | 114 | _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); |
115 | } | 115 | } |
116 | 116 | ||
117 | static inline void set_system_gate_ist(int nr, void *func, unsigned ist) | ||
118 | { | ||
119 | _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist); | ||
120 | } | ||
121 | |||
117 | static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, | 122 | static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, |
118 | unsigned size) | 123 | unsigned size) |
119 | { | 124 | { |
diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index 06e489f32472..dcbb4fcd9a18 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h | |||
@@ -14,13 +14,18 @@ | |||
14 | #define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) | 14 | #define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) |
15 | 15 | ||
16 | #define THREAD_ORDER 1 | 16 | #define THREAD_ORDER 1 |
17 | #ifdef __ASSEMBLY__ | 17 | #define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER) |
18 | #define THREAD_SIZE (1 << (PAGE_SHIFT + THREAD_ORDER)) | ||
19 | #else | ||
20 | #define THREAD_SIZE (1UL << (PAGE_SHIFT + THREAD_ORDER)) | ||
21 | #endif | ||
22 | #define CURRENT_MASK (~(THREAD_SIZE-1)) | 18 | #define CURRENT_MASK (~(THREAD_SIZE-1)) |
23 | 19 | ||
20 | #define EXCEPTION_STACK_ORDER 0 | ||
21 | #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) | ||
22 | |||
23 | #define DEBUG_STACK_ORDER EXCEPTION_STACK_ORDER | ||
24 | #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) | ||
25 | |||
26 | #define IRQSTACK_ORDER 2 | ||
27 | #define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) | ||
28 | |||
24 | #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) | 29 | #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1)) |
25 | #define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) | 30 | #define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) |
26 | 31 | ||
diff --git a/include/asm-x86_64/pda.h b/include/asm-x86_64/pda.h index 8733ccfa442e..431a909fbec9 100644 --- a/include/asm-x86_64/pda.h +++ b/include/asm-x86_64/pda.h | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <linux/stddef.h> | 5 | #include <linux/stddef.h> |
6 | #include <linux/types.h> | 6 | #include <linux/types.h> |
7 | #include <linux/cache.h> | 7 | #include <linux/cache.h> |
8 | #include <asm/page.h> | ||
8 | 9 | ||
9 | /* Per processor datastructure. %gs points to it while the kernel runs */ | 10 | /* Per processor datastructure. %gs points to it while the kernel runs */ |
10 | struct x8664_pda { | 11 | struct x8664_pda { |
@@ -12,6 +13,9 @@ struct x8664_pda { | |||
12 | unsigned long data_offset; /* Per cpu data offset from linker address */ | 13 | unsigned long data_offset; /* Per cpu data offset from linker address */ |
13 | unsigned long kernelstack; /* top of kernel stack for current */ | 14 | unsigned long kernelstack; /* top of kernel stack for current */ |
14 | unsigned long oldrsp; /* user rsp for system call */ | 15 | unsigned long oldrsp; /* user rsp for system call */ |
16 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
17 | unsigned long debugstack; /* #DB/#BP stack. */ | ||
18 | #endif | ||
15 | int irqcount; /* Irq nesting counter. Starts with -1 */ | 19 | int irqcount; /* Irq nesting counter. Starts with -1 */ |
16 | int cpunumber; /* Logical CPU number */ | 20 | int cpunumber; /* Logical CPU number */ |
17 | char *irqstackptr; /* top of irqstack */ | 21 | char *irqstackptr; /* top of irqstack */ |
@@ -23,10 +27,6 @@ struct x8664_pda { | |||
23 | unsigned apic_timer_irqs; | 27 | unsigned apic_timer_irqs; |
24 | } ____cacheline_aligned_in_smp; | 28 | } ____cacheline_aligned_in_smp; |
25 | 29 | ||
26 | |||
27 | #define IRQSTACK_ORDER 2 | ||
28 | #define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) | ||
29 | |||
30 | extern struct x8664_pda cpu_pda[]; | 30 | extern struct x8664_pda cpu_pda[]; |
31 | 31 | ||
32 | /* | 32 | /* |
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 4861246548f7..5cb151538cd5 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h | |||
@@ -273,8 +273,6 @@ struct thread_struct { | |||
273 | #define DEBUG_STACK 4 | 273 | #define DEBUG_STACK 4 |
274 | #define MCE_STACK 5 | 274 | #define MCE_STACK 5 |
275 | #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ | 275 | #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ |
276 | #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) | ||
277 | #define EXCEPTION_STACK_ORDER 0 | ||
278 | 276 | ||
279 | #define start_thread(regs,new_rip,new_rsp) do { \ | 277 | #define start_thread(regs,new_rip,new_rsp) do { \ |
280 | asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0)); \ | 278 | asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0)); \ |