diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-02-13 03:45:09 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-02-13 03:45:09 -0500 |
commit | ab639f3593f0b5e4439d549831442c18c3baf989 (patch) | |
tree | 118743e94e5dc86c835dbc1f1d3bf1612f4ae740 /arch/x86 | |
parent | f8a6b2b9cee298a9663cbe38ce1eb5240987cb62 (diff) | |
parent | 58105ef1857112a186696c9b8957020090226a28 (diff) |
Merge branch 'core/percpu' into x86/core
Diffstat (limited to 'arch/x86')
38 files changed, 888 insertions, 569 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 148c112c9ca4..1042d69b267d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -194,6 +194,10 @@ config X86_TRAMPOLINE | |||
194 | depends on SMP || (64BIT && ACPI_SLEEP) | 194 | depends on SMP || (64BIT && ACPI_SLEEP) |
195 | default y | 195 | default y |
196 | 196 | ||
197 | config X86_32_LAZY_GS | ||
198 | def_bool y | ||
199 | depends on X86_32 && !CC_STACKPROTECTOR | ||
200 | |||
197 | config KTIME_SCALAR | 201 | config KTIME_SCALAR |
198 | def_bool X86_32 | 202 | def_bool X86_32 |
199 | source "init/Kconfig" | 203 | source "init/Kconfig" |
@@ -1339,7 +1343,6 @@ config CC_STACKPROTECTOR_ALL | |||
1339 | 1343 | ||
1340 | config CC_STACKPROTECTOR | 1344 | config CC_STACKPROTECTOR |
1341 | bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" | 1345 | bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" |
1342 | depends on X86_64 | ||
1343 | select CC_STACKPROTECTOR_ALL | 1346 | select CC_STACKPROTECTOR_ALL |
1344 | ---help--- | 1347 | ---help--- |
1345 | This option turns on the -fstack-protector GCC feature. This | 1348 | This option turns on the -fstack-protector GCC feature. This |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 99550c407990..1836191839ee 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -70,14 +70,17 @@ else | |||
70 | # this works around some issues with generating unwind tables in older gccs | 70 | # this works around some issues with generating unwind tables in older gccs |
71 | # newer gccs do it by default | 71 | # newer gccs do it by default |
72 | KBUILD_CFLAGS += -maccumulate-outgoing-args | 72 | KBUILD_CFLAGS += -maccumulate-outgoing-args |
73 | endif | ||
73 | 74 | ||
74 | stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh | 75 | ifdef CONFIG_CC_STACKPROTECTOR |
75 | stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \ | 76 | cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh |
76 | "$(CC)" "-fstack-protector -DGCC_HAS_SP" ) | 77 | ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y) |
77 | stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \ | 78 | stackp-y := -fstack-protector |
78 | "$(CC)" -fstack-protector-all ) | 79 | stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all |
79 | 80 | KBUILD_CFLAGS += $(stackp-y) | |
80 | KBUILD_CFLAGS += $(stackp-y) | 81 | else |
82 | $(warning stack protector enabled but no compiler support) | ||
83 | endif | ||
81 | endif | 84 | endif |
82 | 85 | ||
83 | # Stackpointer is addressed different for 32 bit and 64 bit x86 | 86 | # Stackpointer is addressed different for 32 bit and 64 bit x86 |
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index 3c601f8224be..bb70e397aa84 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h | |||
@@ -55,7 +55,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) | |||
55 | dump->regs.ds = (u16)regs->ds; | 55 | dump->regs.ds = (u16)regs->ds; |
56 | dump->regs.es = (u16)regs->es; | 56 | dump->regs.es = (u16)regs->es; |
57 | dump->regs.fs = (u16)regs->fs; | 57 | dump->regs.fs = (u16)regs->fs; |
58 | savesegment(gs, dump->regs.gs); | 58 | dump->regs.gs = get_user_gs(regs); |
59 | dump->regs.orig_ax = regs->orig_ax; | 59 | dump->regs.orig_ax = regs->orig_ax; |
60 | dump->regs.ip = regs->ip; | 60 | dump->regs.ip = regs->ip; |
61 | dump->regs.cs = (u16)regs->cs; | 61 | dump->regs.cs = (u16)regs->cs; |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index f51a3ddde01a..83c1bc8d2e8a 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
@@ -112,7 +112,7 @@ extern unsigned int vdso_enabled; | |||
112 | * now struct_user_regs, they are different) | 112 | * now struct_user_regs, they are different) |
113 | */ | 113 | */ |
114 | 114 | ||
115 | #define ELF_CORE_COPY_REGS(pr_reg, regs) \ | 115 | #define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \ |
116 | do { \ | 116 | do { \ |
117 | pr_reg[0] = regs->bx; \ | 117 | pr_reg[0] = regs->bx; \ |
118 | pr_reg[1] = regs->cx; \ | 118 | pr_reg[1] = regs->cx; \ |
@@ -124,7 +124,6 @@ do { \ | |||
124 | pr_reg[7] = regs->ds & 0xffff; \ | 124 | pr_reg[7] = regs->ds & 0xffff; \ |
125 | pr_reg[8] = regs->es & 0xffff; \ | 125 | pr_reg[8] = regs->es & 0xffff; \ |
126 | pr_reg[9] = regs->fs & 0xffff; \ | 126 | pr_reg[9] = regs->fs & 0xffff; \ |
127 | savesegment(gs, pr_reg[10]); \ | ||
128 | pr_reg[11] = regs->orig_ax; \ | 127 | pr_reg[11] = regs->orig_ax; \ |
129 | pr_reg[12] = regs->ip; \ | 128 | pr_reg[12] = regs->ip; \ |
130 | pr_reg[13] = regs->cs & 0xffff; \ | 129 | pr_reg[13] = regs->cs & 0xffff; \ |
@@ -133,6 +132,18 @@ do { \ | |||
133 | pr_reg[16] = regs->ss & 0xffff; \ | 132 | pr_reg[16] = regs->ss & 0xffff; \ |
134 | } while (0); | 133 | } while (0); |
135 | 134 | ||
135 | #define ELF_CORE_COPY_REGS(pr_reg, regs) \ | ||
136 | do { \ | ||
137 | ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ | ||
138 | pr_reg[10] = get_user_gs(regs); \ | ||
139 | } while (0); | ||
140 | |||
141 | #define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \ | ||
142 | do { \ | ||
143 | ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\ | ||
144 | savesegment(gs, pr_reg[10]); \ | ||
145 | } while (0); | ||
146 | |||
136 | #define ELF_PLATFORM (utsname()->machine) | 147 | #define ELF_PLATFORM (utsname()->machine) |
137 | #define set_personality_64bit() do { } while (0) | 148 | #define set_personality_64bit() do { } while (0) |
138 | 149 | ||
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 52948df9cd1d..f923203dc39a 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h | |||
@@ -79,7 +79,7 @@ do { \ | |||
79 | #ifdef CONFIG_X86_32 | 79 | #ifdef CONFIG_X86_32 |
80 | #define deactivate_mm(tsk, mm) \ | 80 | #define deactivate_mm(tsk, mm) \ |
81 | do { \ | 81 | do { \ |
82 | loadsegment(gs, 0); \ | 82 | lazy_load_gs(0); \ |
83 | } while (0) | 83 | } while (0) |
84 | #else | 84 | #else |
85 | #define deactivate_mm(tsk, mm) \ | 85 | #define deactivate_mm(tsk, mm) \ |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0b64af4f13ac..aee103b26d01 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -34,6 +34,12 @@ | |||
34 | #define PER_CPU_VAR(var) per_cpu__##var | 34 | #define PER_CPU_VAR(var) per_cpu__##var |
35 | #endif /* SMP */ | 35 | #endif /* SMP */ |
36 | 36 | ||
37 | #ifdef CONFIG_X86_64_SMP | ||
38 | #define INIT_PER_CPU_VAR(var) init_per_cpu__##var | ||
39 | #else | ||
40 | #define INIT_PER_CPU_VAR(var) per_cpu__##var | ||
41 | #endif | ||
42 | |||
37 | #else /* ...!ASSEMBLY */ | 43 | #else /* ...!ASSEMBLY */ |
38 | 44 | ||
39 | #include <linux/stringify.h> | 45 | #include <linux/stringify.h> |
@@ -45,6 +51,22 @@ | |||
45 | #define __percpu_arg(x) "%" #x | 51 | #define __percpu_arg(x) "%" #x |
46 | #endif | 52 | #endif |
47 | 53 | ||
54 | /* | ||
55 | * Initialized pointers to per-cpu variables needed for the boot | ||
56 | * processor need to use these macros to get the proper address | ||
57 | * offset from __per_cpu_load on SMP. | ||
58 | * | ||
59 | * There also must be an entry in vmlinux_64.lds.S | ||
60 | */ | ||
61 | #define DECLARE_INIT_PER_CPU(var) \ | ||
62 | extern typeof(per_cpu_var(var)) init_per_cpu_var(var) | ||
63 | |||
64 | #ifdef CONFIG_X86_64_SMP | ||
65 | #define init_per_cpu_var(var) init_per_cpu__##var | ||
66 | #else | ||
67 | #define init_per_cpu_var(var) per_cpu_var(var) | ||
68 | #endif | ||
69 | |||
48 | /* For arch-specific code, we can use direct single-insn ops (they | 70 | /* For arch-specific code, we can use direct single-insn ops (they |
49 | * don't give an lvalue though). */ | 71 | * don't give an lvalue though). */ |
50 | extern void __bad_percpu_size(void); | 72 | extern void __bad_percpu_size(void); |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a6643f68fbb1..a0133838b67c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -393,8 +393,14 @@ union irq_stack_union { | |||
393 | }; | 393 | }; |
394 | 394 | ||
395 | DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); | 395 | DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); |
396 | DECLARE_INIT_PER_CPU(irq_stack_union); | ||
397 | |||
396 | DECLARE_PER_CPU(char *, irq_stack_ptr); | 398 | DECLARE_PER_CPU(char *, irq_stack_ptr); |
399 | #else /* X86_64 */ | ||
400 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
401 | DECLARE_PER_CPU(unsigned long, stack_canary); | ||
397 | #endif | 402 | #endif |
403 | #endif /* X86_64 */ | ||
398 | 404 | ||
399 | extern void print_cpu_info(struct cpuinfo_x86 *); | 405 | extern void print_cpu_info(struct cpuinfo_x86 *); |
400 | extern unsigned int xstate_size; | 406 | extern unsigned int xstate_size; |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 6d34d954c228..e304b66abeea 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -28,7 +28,7 @@ struct pt_regs { | |||
28 | int xds; | 28 | int xds; |
29 | int xes; | 29 | int xes; |
30 | int xfs; | 30 | int xfs; |
31 | /* int gs; */ | 31 | int xgs; |
32 | long orig_eax; | 32 | long orig_eax; |
33 | long eip; | 33 | long eip; |
34 | int xcs; | 34 | int xcs; |
@@ -50,7 +50,7 @@ struct pt_regs { | |||
50 | unsigned long ds; | 50 | unsigned long ds; |
51 | unsigned long es; | 51 | unsigned long es; |
52 | unsigned long fs; | 52 | unsigned long fs; |
53 | /* int gs; */ | 53 | unsigned long gs; |
54 | unsigned long orig_ax; | 54 | unsigned long orig_ax; |
55 | unsigned long ip; | 55 | unsigned long ip; |
56 | unsigned long cs; | 56 | unsigned long cs; |
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 1dc1b51ac623..14e0ed86a6f9 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h | |||
@@ -61,7 +61,7 @@ | |||
61 | * | 61 | * |
62 | * 26 - ESPFIX small SS | 62 | * 26 - ESPFIX small SS |
63 | * 27 - per-cpu [ offset to per-cpu data area ] | 63 | * 27 - per-cpu [ offset to per-cpu data area ] |
64 | * 28 - unused | 64 | * 28 - stack_canary-20 [ for stack protector ] |
65 | * 29 - unused | 65 | * 29 - unused |
66 | * 30 - unused | 66 | * 30 - unused |
67 | * 31 - TSS for double fault handler | 67 | * 31 - TSS for double fault handler |
@@ -95,6 +95,13 @@ | |||
95 | #define __KERNEL_PERCPU 0 | 95 | #define __KERNEL_PERCPU 0 |
96 | #endif | 96 | #endif |
97 | 97 | ||
98 | #define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE + 16) | ||
99 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
100 | #define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY * 8) | ||
101 | #else | ||
102 | #define __KERNEL_STACK_CANARY 0 | ||
103 | #endif | ||
104 | |||
98 | #define GDT_ENTRY_DOUBLEFAULT_TSS 31 | 105 | #define GDT_ENTRY_DOUBLEFAULT_TSS 31 |
99 | 106 | ||
100 | /* | 107 | /* |
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 36a700acaf2b..c2d742c6e15f 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h | |||
@@ -1,8 +1,54 @@ | |||
1 | /* | ||
2 | * GCC stack protector support. | ||
3 | * | ||
4 | * Stack protector works by putting predefined pattern at the start of | ||
5 | * the stack frame and verifying that it hasn't been overwritten when | ||
6 | * returning from the function. The pattern is called stack canary | ||
7 | * and unfortunately gcc requires it to be at a fixed offset from %gs. | ||
8 | * On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64 | ||
9 | * and x86_32 use segment registers differently and thus handles this | ||
10 | * requirement differently. | ||
11 | * | ||
12 | * On x86_64, %gs is shared by percpu area and stack canary. All | ||
13 | * percpu symbols are zero based and %gs points to the base of percpu | ||
14 | * area. The first occupant of the percpu area is always | ||
15 | * irq_stack_union which contains stack_canary at offset 40. Userland | ||
16 | * %gs is always saved and restored on kernel entry and exit using | ||
17 | * swapgs, so stack protector doesn't add any complexity there. | ||
18 | * | ||
19 | * On x86_32, it's slightly more complicated. As in x86_64, %gs is | ||
20 | * used for userland TLS. Unfortunately, some processors are much | ||
21 | * slower at loading segment registers with different value when | ||
22 | * entering and leaving the kernel, so the kernel uses %fs for percpu | ||
23 | * area and manages %gs lazily so that %gs is switched only when | ||
24 | * necessary, usually during task switch. | ||
25 | * | ||
26 | * As gcc requires the stack canary at %gs:20, %gs can't be managed | ||
27 | * lazily if stack protector is enabled, so the kernel saves and | ||
28 | * restores userland %gs on kernel entry and exit. This behavior is | ||
29 | * controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in | ||
30 | * system.h to hide the details. | ||
31 | */ | ||
32 | |||
1 | #ifndef _ASM_STACKPROTECTOR_H | 33 | #ifndef _ASM_STACKPROTECTOR_H |
2 | #define _ASM_STACKPROTECTOR_H 1 | 34 | #define _ASM_STACKPROTECTOR_H 1 |
3 | 35 | ||
36 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
37 | |||
4 | #include <asm/tsc.h> | 38 | #include <asm/tsc.h> |
5 | #include <asm/processor.h> | 39 | #include <asm/processor.h> |
40 | #include <asm/percpu.h> | ||
41 | #include <asm/system.h> | ||
42 | #include <asm/desc.h> | ||
43 | #include <linux/random.h> | ||
44 | |||
45 | /* | ||
46 | * 24 byte read-only segment initializer for stack canary. Linker | ||
47 | * can't handle the address bit shifting. Address will be set in | ||
48 | * head_32 for boot CPU and setup_per_cpu_areas() for others. | ||
49 | */ | ||
50 | #define GDT_STACK_CANARY_INIT \ | ||
51 | [GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } }, | ||
6 | 52 | ||
7 | /* | 53 | /* |
8 | * Initialize the stackprotector canary value. | 54 | * Initialize the stackprotector canary value. |
@@ -15,12 +61,9 @@ static __always_inline void boot_init_stack_canary(void) | |||
15 | u64 canary; | 61 | u64 canary; |
16 | u64 tsc; | 62 | u64 tsc; |
17 | 63 | ||
18 | /* | 64 | #ifdef CONFIG_X86_64 |
19 | * Build time only check to make sure the stack_canary is at | ||
20 | * offset 40 in the pda; this is a gcc ABI requirement | ||
21 | */ | ||
22 | BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); | 65 | BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); |
23 | 66 | #endif | |
24 | /* | 67 | /* |
25 | * We both use the random pool and the current TSC as a source | 68 | * We both use the random pool and the current TSC as a source |
26 | * of randomness. The TSC only matters for very early init, | 69 | * of randomness. The TSC only matters for very early init, |
@@ -32,7 +75,50 @@ static __always_inline void boot_init_stack_canary(void) | |||
32 | canary += tsc + (tsc << 32UL); | 75 | canary += tsc + (tsc << 32UL); |
33 | 76 | ||
34 | current->stack_canary = canary; | 77 | current->stack_canary = canary; |
78 | #ifdef CONFIG_X86_64 | ||
35 | percpu_write(irq_stack_union.stack_canary, canary); | 79 | percpu_write(irq_stack_union.stack_canary, canary); |
80 | #else | ||
81 | percpu_write(stack_canary, canary); | ||
82 | #endif | ||
36 | } | 83 | } |
37 | 84 | ||
85 | static inline void setup_stack_canary_segment(int cpu) | ||
86 | { | ||
87 | #ifdef CONFIG_X86_32 | ||
88 | unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20; | ||
89 | struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); | ||
90 | struct desc_struct desc; | ||
91 | |||
92 | desc = gdt_table[GDT_ENTRY_STACK_CANARY]; | ||
93 | desc.base0 = canary & 0xffff; | ||
94 | desc.base1 = (canary >> 16) & 0xff; | ||
95 | desc.base2 = (canary >> 24) & 0xff; | ||
96 | write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S); | ||
97 | #endif | ||
98 | } | ||
99 | |||
100 | static inline void load_stack_canary_segment(void) | ||
101 | { | ||
102 | #ifdef CONFIG_X86_32 | ||
103 | asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory"); | ||
104 | #endif | ||
105 | } | ||
106 | |||
107 | #else /* CC_STACKPROTECTOR */ | ||
108 | |||
109 | #define GDT_STACK_CANARY_INIT | ||
110 | |||
111 | /* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */ | ||
112 | |||
113 | static inline void setup_stack_canary_segment(int cpu) | ||
114 | { } | ||
115 | |||
116 | static inline void load_stack_canary_segment(void) | ||
117 | { | ||
118 | #ifdef CONFIG_X86_32 | ||
119 | asm volatile ("mov %0, %%gs" : : "r" (0)); | ||
38 | #endif | 120 | #endif |
121 | } | ||
122 | |||
123 | #endif /* CC_STACKPROTECTOR */ | ||
124 | #endif /* _ASM_STACKPROTECTOR_H */ | ||
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index c0b0bda754ee..68b1be10cfad 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h | |||
@@ -29,21 +29,21 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); | |||
29 | /* X86_32 only */ | 29 | /* X86_32 only */ |
30 | #ifdef CONFIG_X86_32 | 30 | #ifdef CONFIG_X86_32 |
31 | /* kernel/process_32.c */ | 31 | /* kernel/process_32.c */ |
32 | asmlinkage int sys_fork(struct pt_regs); | 32 | int sys_fork(struct pt_regs *); |
33 | asmlinkage int sys_clone(struct pt_regs); | 33 | int sys_clone(struct pt_regs *); |
34 | asmlinkage int sys_vfork(struct pt_regs); | 34 | int sys_vfork(struct pt_regs *); |
35 | asmlinkage int sys_execve(struct pt_regs); | 35 | int sys_execve(struct pt_regs *); |
36 | 36 | ||
37 | /* kernel/signal_32.c */ | 37 | /* kernel/signal_32.c */ |
38 | asmlinkage int sys_sigsuspend(int, int, old_sigset_t); | 38 | asmlinkage int sys_sigsuspend(int, int, old_sigset_t); |
39 | asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, | 39 | asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, |
40 | struct old_sigaction __user *); | 40 | struct old_sigaction __user *); |
41 | asmlinkage int sys_sigaltstack(unsigned long); | 41 | int sys_sigaltstack(struct pt_regs *); |
42 | asmlinkage unsigned long sys_sigreturn(unsigned long); | 42 | unsigned long sys_sigreturn(struct pt_regs *); |
43 | asmlinkage int sys_rt_sigreturn(unsigned long); | 43 | long sys_rt_sigreturn(struct pt_regs *); |
44 | 44 | ||
45 | /* kernel/ioport.c */ | 45 | /* kernel/ioport.c */ |
46 | asmlinkage long sys_iopl(unsigned long); | 46 | long sys_iopl(struct pt_regs *); |
47 | 47 | ||
48 | /* kernel/sys_i386_32.c */ | 48 | /* kernel/sys_i386_32.c */ |
49 | asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, | 49 | asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, |
@@ -59,8 +59,8 @@ struct oldold_utsname; | |||
59 | asmlinkage int sys_olduname(struct oldold_utsname __user *); | 59 | asmlinkage int sys_olduname(struct oldold_utsname __user *); |
60 | 60 | ||
61 | /* kernel/vm86_32.c */ | 61 | /* kernel/vm86_32.c */ |
62 | asmlinkage int sys_vm86old(struct pt_regs); | 62 | int sys_vm86old(struct pt_regs *); |
63 | asmlinkage int sys_vm86(struct pt_regs); | 63 | int sys_vm86(struct pt_regs *); |
64 | 64 | ||
65 | #else /* CONFIG_X86_32 */ | 65 | #else /* CONFIG_X86_32 */ |
66 | 66 | ||
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index c22383743f36..c00bfdbdd456 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h | |||
@@ -23,6 +23,20 @@ struct task_struct *__switch_to(struct task_struct *prev, | |||
23 | 23 | ||
24 | #ifdef CONFIG_X86_32 | 24 | #ifdef CONFIG_X86_32 |
25 | 25 | ||
26 | #ifdef CONFIG_CC_STACKPROTECTOR | ||
27 | #define __switch_canary \ | ||
28 | "movl %P[task_canary](%[next]), %%ebx\n\t" \ | ||
29 | "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" | ||
30 | #define __switch_canary_oparam \ | ||
31 | , [stack_canary] "=m" (per_cpu_var(stack_canary)) | ||
32 | #define __switch_canary_iparam \ | ||
33 | , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) | ||
34 | #else /* CC_STACKPROTECTOR */ | ||
35 | #define __switch_canary | ||
36 | #define __switch_canary_oparam | ||
37 | #define __switch_canary_iparam | ||
38 | #endif /* CC_STACKPROTECTOR */ | ||
39 | |||
26 | /* | 40 | /* |
27 | * Saving eflags is important. It switches not only IOPL between tasks, | 41 | * Saving eflags is important. It switches not only IOPL between tasks, |
28 | * it also protects other tasks from NT leaking through sysenter etc. | 42 | * it also protects other tasks from NT leaking through sysenter etc. |
@@ -44,6 +58,7 @@ do { \ | |||
44 | "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ | 58 | "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ |
45 | "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ | 59 | "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ |
46 | "pushl %[next_ip]\n\t" /* restore EIP */ \ | 60 | "pushl %[next_ip]\n\t" /* restore EIP */ \ |
61 | __switch_canary \ | ||
47 | "jmp __switch_to\n" /* regparm call */ \ | 62 | "jmp __switch_to\n" /* regparm call */ \ |
48 | "1:\t" \ | 63 | "1:\t" \ |
49 | "popl %%ebp\n\t" /* restore EBP */ \ | 64 | "popl %%ebp\n\t" /* restore EBP */ \ |
@@ -58,6 +73,8 @@ do { \ | |||
58 | "=b" (ebx), "=c" (ecx), "=d" (edx), \ | 73 | "=b" (ebx), "=c" (ecx), "=d" (edx), \ |
59 | "=S" (esi), "=D" (edi) \ | 74 | "=S" (esi), "=D" (edi) \ |
60 | \ | 75 | \ |
76 | __switch_canary_oparam \ | ||
77 | \ | ||
61 | /* input parameters: */ \ | 78 | /* input parameters: */ \ |
62 | : [next_sp] "m" (next->thread.sp), \ | 79 | : [next_sp] "m" (next->thread.sp), \ |
63 | [next_ip] "m" (next->thread.ip), \ | 80 | [next_ip] "m" (next->thread.ip), \ |
@@ -66,6 +83,8 @@ do { \ | |||
66 | [prev] "a" (prev), \ | 83 | [prev] "a" (prev), \ |
67 | [next] "d" (next) \ | 84 | [next] "d" (next) \ |
68 | \ | 85 | \ |
86 | __switch_canary_iparam \ | ||
87 | \ | ||
69 | : /* reloaded segment registers */ \ | 88 | : /* reloaded segment registers */ \ |
70 | "memory"); \ | 89 | "memory"); \ |
71 | } while (0) | 90 | } while (0) |
@@ -182,6 +201,25 @@ extern void native_load_gs_index(unsigned); | |||
182 | #define savesegment(seg, value) \ | 201 | #define savesegment(seg, value) \ |
183 | asm("mov %%" #seg ",%0":"=r" (value) : : "memory") | 202 | asm("mov %%" #seg ",%0":"=r" (value) : : "memory") |
184 | 203 | ||
204 | /* | ||
205 | * x86_32 user gs accessors. | ||
206 | */ | ||
207 | #ifdef CONFIG_X86_32 | ||
208 | #ifdef CONFIG_X86_32_LAZY_GS | ||
209 | #define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) | ||
210 | #define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) | ||
211 | #define task_user_gs(tsk) ((tsk)->thread.gs) | ||
212 | #define lazy_save_gs(v) savesegment(gs, (v)) | ||
213 | #define lazy_load_gs(v) loadsegment(gs, (v)) | ||
214 | #else /* X86_32_LAZY_GS */ | ||
215 | #define get_user_gs(regs) (u16)((regs)->gs) | ||
216 | #define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) | ||
217 | #define task_user_gs(tsk) (task_pt_regs(tsk)->gs) | ||
218 | #define lazy_save_gs(v) do { } while (0) | ||
219 | #define lazy_load_gs(v) do { } while (0) | ||
220 | #endif /* X86_32_LAZY_GS */ | ||
221 | #endif /* X86_32 */ | ||
222 | |||
185 | static inline unsigned long get_limit(unsigned long segment) | 223 | static inline unsigned long get_limit(unsigned long segment) |
186 | { | 224 | { |
187 | unsigned long __limit; | 225 | unsigned long __limit; |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index cf3bb053da0b..0d5342515b86 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -41,7 +41,7 @@ dotraplinkage void do_int3(struct pt_regs *, long); | |||
41 | dotraplinkage void do_overflow(struct pt_regs *, long); | 41 | dotraplinkage void do_overflow(struct pt_regs *, long); |
42 | dotraplinkage void do_bounds(struct pt_regs *, long); | 42 | dotraplinkage void do_bounds(struct pt_regs *, long); |
43 | dotraplinkage void do_invalid_op(struct pt_regs *, long); | 43 | dotraplinkage void do_invalid_op(struct pt_regs *, long); |
44 | dotraplinkage void do_device_not_available(struct pt_regs); | 44 | dotraplinkage void do_device_not_available(struct pt_regs *, long); |
45 | dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); | 45 | dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); |
46 | dotraplinkage void do_invalid_TSS(struct pt_regs *, long); | 46 | dotraplinkage void do_invalid_TSS(struct pt_regs *, long); |
47 | dotraplinkage void do_segment_not_present(struct pt_regs *, long); | 47 | dotraplinkage void do_segment_not_present(struct pt_regs *, long); |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 0ec6de4bcb0b..b685ece89d5c 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -186,7 +186,7 @@ extern int __get_user_bad(void); | |||
186 | 186 | ||
187 | 187 | ||
188 | #ifdef CONFIG_X86_32 | 188 | #ifdef CONFIG_X86_32 |
189 | #define __put_user_asm_u64(x, addr, err) \ | 189 | #define __put_user_asm_u64(x, addr, err, errret) \ |
190 | asm volatile("1: movl %%eax,0(%2)\n" \ | 190 | asm volatile("1: movl %%eax,0(%2)\n" \ |
191 | "2: movl %%edx,4(%2)\n" \ | 191 | "2: movl %%edx,4(%2)\n" \ |
192 | "3:\n" \ | 192 | "3:\n" \ |
@@ -197,7 +197,7 @@ extern int __get_user_bad(void); | |||
197 | _ASM_EXTABLE(1b, 4b) \ | 197 | _ASM_EXTABLE(1b, 4b) \ |
198 | _ASM_EXTABLE(2b, 4b) \ | 198 | _ASM_EXTABLE(2b, 4b) \ |
199 | : "=r" (err) \ | 199 | : "=r" (err) \ |
200 | : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err)) | 200 | : "A" (x), "r" (addr), "i" (errret), "0" (err)) |
201 | 201 | ||
202 | #define __put_user_asm_ex_u64(x, addr) \ | 202 | #define __put_user_asm_ex_u64(x, addr) \ |
203 | asm volatile("1: movl %%eax,0(%1)\n" \ | 203 | asm volatile("1: movl %%eax,0(%1)\n" \ |
@@ -211,8 +211,8 @@ extern int __get_user_bad(void); | |||
211 | asm volatile("call __put_user_8" : "=a" (__ret_pu) \ | 211 | asm volatile("call __put_user_8" : "=a" (__ret_pu) \ |
212 | : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") | 212 | : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") |
213 | #else | 213 | #else |
214 | #define __put_user_asm_u64(x, ptr, retval) \ | 214 | #define __put_user_asm_u64(x, ptr, retval, errret) \ |
215 | __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT) | 215 | __put_user_asm(x, ptr, retval, "q", "", "Zr", errret) |
216 | #define __put_user_asm_ex_u64(x, addr) \ | 216 | #define __put_user_asm_ex_u64(x, addr) \ |
217 | __put_user_asm_ex(x, addr, "q", "", "Zr") | 217 | __put_user_asm_ex(x, addr, "q", "", "Zr") |
218 | #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) | 218 | #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) |
@@ -289,7 +289,8 @@ do { \ | |||
289 | __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ | 289 | __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ |
290 | break; \ | 290 | break; \ |
291 | case 8: \ | 291 | case 8: \ |
292 | __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval); \ | 292 | __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \ |
293 | errret); \ | ||
293 | break; \ | 294 | break; \ |
294 | default: \ | 295 | default: \ |
295 | __put_user_bad(); \ | 296 | __put_user_bad(); \ |
@@ -525,8 +526,6 @@ struct __large_struct { unsigned long buf[100]; }; | |||
525 | */ | 526 | */ |
526 | #define get_user_try uaccess_try | 527 | #define get_user_try uaccess_try |
527 | #define get_user_catch(err) uaccess_catch(err) | 528 | #define get_user_catch(err) uaccess_catch(err) |
528 | #define put_user_try uaccess_try | ||
529 | #define put_user_catch(err) uaccess_catch(err) | ||
530 | 529 | ||
531 | #define get_user_ex(x, ptr) do { \ | 530 | #define get_user_ex(x, ptr) do { \ |
532 | unsigned long __gue_val; \ | 531 | unsigned long __gue_val; \ |
@@ -534,9 +533,29 @@ struct __large_struct { unsigned long buf[100]; }; | |||
534 | (x) = (__force __typeof__(*(ptr)))__gue_val; \ | 533 | (x) = (__force __typeof__(*(ptr)))__gue_val; \ |
535 | } while (0) | 534 | } while (0) |
536 | 535 | ||
536 | #ifdef CONFIG_X86_WP_WORKS_OK | ||
537 | |||
538 | #define put_user_try uaccess_try | ||
539 | #define put_user_catch(err) uaccess_catch(err) | ||
540 | |||
537 | #define put_user_ex(x, ptr) \ | 541 | #define put_user_ex(x, ptr) \ |
538 | __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) | 542 | __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) |
539 | 543 | ||
544 | #else /* !CONFIG_X86_WP_WORKS_OK */ | ||
545 | |||
546 | #define put_user_try do { \ | ||
547 | int __uaccess_err = 0; | ||
548 | |||
549 | #define put_user_catch(err) \ | ||
550 | (err) |= __uaccess_err; \ | ||
551 | } while (0) | ||
552 | |||
553 | #define put_user_ex(x, ptr) do { \ | ||
554 | __uaccess_err |= __put_user(x, ptr); \ | ||
555 | } while (0) | ||
556 | |||
557 | #endif /* CONFIG_X86_WP_WORKS_OK */ | ||
558 | |||
540 | /* | 559 | /* |
541 | * movsl can be slow when source and dest are not both 8-byte aligned | 560 | * movsl can be slow when source and dest are not both 8-byte aligned |
542 | */ | 561 | */ |
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 8ac1d7e312f3..8242bf965812 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h | |||
@@ -3,6 +3,9 @@ | |||
3 | 3 | ||
4 | enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; | 4 | enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; |
5 | 5 | ||
6 | struct cpumask; | ||
7 | struct mm_struct; | ||
8 | |||
6 | #ifdef CONFIG_X86_UV | 9 | #ifdef CONFIG_X86_UV |
7 | 10 | ||
8 | extern enum uv_system_type get_uv_system_type(void); | 11 | extern enum uv_system_type get_uv_system_type(void); |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index ee4df08feee6..fbf2f33e3080 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -75,6 +75,7 @@ void foo(void) | |||
75 | OFFSET(PT_DS, pt_regs, ds); | 75 | OFFSET(PT_DS, pt_regs, ds); |
76 | OFFSET(PT_ES, pt_regs, es); | 76 | OFFSET(PT_ES, pt_regs, es); |
77 | OFFSET(PT_FS, pt_regs, fs); | 77 | OFFSET(PT_FS, pt_regs, fs); |
78 | OFFSET(PT_GS, pt_regs, gs); | ||
78 | OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); | 79 | OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); |
79 | OFFSET(PT_EIP, pt_regs, ip); | 80 | OFFSET(PT_EIP, pt_regs, ip); |
80 | OFFSET(PT_CS, pt_regs, cs); | 81 | OFFSET(PT_CS, pt_regs, cs); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cbcdb796d47f..e8f4a386bd9d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <asm/sections.h> | 39 | #include <asm/sections.h> |
40 | #include <asm/setup.h> | 40 | #include <asm/setup.h> |
41 | #include <asm/hypervisor.h> | 41 | #include <asm/hypervisor.h> |
42 | #include <asm/stackprotector.h> | ||
42 | 43 | ||
43 | #include "cpu.h" | 44 | #include "cpu.h" |
44 | 45 | ||
@@ -122,6 +123,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | |||
122 | 123 | ||
123 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, | 124 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, |
124 | [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, | 125 | [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, |
126 | GDT_STACK_CANARY_INIT | ||
125 | #endif | 127 | #endif |
126 | } }; | 128 | } }; |
127 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | 129 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); |
@@ -304,6 +306,7 @@ void load_percpu_segment(int cpu) | |||
304 | loadsegment(gs, 0); | 306 | loadsegment(gs, 0); |
305 | wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); | 307 | wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); |
306 | #endif | 308 | #endif |
309 | load_stack_canary_segment(); | ||
307 | } | 310 | } |
308 | 311 | ||
309 | /* Current gdt points %fs at the "master" per-cpu area: after this, | 312 | /* Current gdt points %fs at the "master" per-cpu area: after this, |
@@ -938,12 +941,8 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | |||
938 | 941 | ||
939 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 942 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
940 | irq_stack_union) __aligned(PAGE_SIZE); | 943 | irq_stack_union) __aligned(PAGE_SIZE); |
941 | #ifdef CONFIG_SMP | ||
942 | DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ | ||
943 | #else | ||
944 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | 944 | DEFINE_PER_CPU(char *, irq_stack_ptr) = |
945 | per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; | 945 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; |
946 | #endif | ||
947 | 946 | ||
948 | DEFINE_PER_CPU(unsigned long, kernel_stack) = | 947 | DEFINE_PER_CPU(unsigned long, kernel_stack) = |
949 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; | 948 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; |
@@ -986,16 +985,21 @@ unsigned long kernel_eflags; | |||
986 | */ | 985 | */ |
987 | DEFINE_PER_CPU(struct orig_ist, orig_ist); | 986 | DEFINE_PER_CPU(struct orig_ist, orig_ist); |
988 | 987 | ||
989 | #else | 988 | #else /* x86_64 */ |
990 | 989 | ||
991 | /* Make sure %fs is initialized properly in idle threads */ | 990 | #ifdef CONFIG_CC_STACKPROTECTOR |
991 | DEFINE_PER_CPU(unsigned long, stack_canary); | ||
992 | #endif | ||
993 | |||
994 | /* Make sure %fs and %gs are initialized properly in idle threads */ | ||
992 | struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) | 995 | struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) |
993 | { | 996 | { |
994 | memset(regs, 0, sizeof(struct pt_regs)); | 997 | memset(regs, 0, sizeof(struct pt_regs)); |
995 | regs->fs = __KERNEL_PERCPU; | 998 | regs->fs = __KERNEL_PERCPU; |
999 | regs->gs = __KERNEL_STACK_CANARY; | ||
996 | return regs; | 1000 | return regs; |
997 | } | 1001 | } |
998 | #endif | 1002 | #endif /* x86_64 */ |
999 | 1003 | ||
1000 | /* | 1004 | /* |
1001 | * cpu_init() initializes state that is per-CPU. Some data is already | 1005 | * cpu_init() initializes state that is per-CPU. Some data is already |
@@ -1157,9 +1161,6 @@ void __cpuinit cpu_init(void) | |||
1157 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); | 1161 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); |
1158 | #endif | 1162 | #endif |
1159 | 1163 | ||
1160 | /* Clear %gs. */ | ||
1161 | asm volatile ("mov %0, %%gs" : : "r" (0)); | ||
1162 | |||
1163 | /* Clear all 6 debug registers: */ | 1164 | /* Clear all 6 debug registers: */ |
1164 | set_debugreg(0, 0); | 1165 | set_debugreg(0, 0); |
1165 | set_debugreg(0, 1); | 1166 | set_debugreg(0, 1); |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 65efd42454be..e99206831459 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -30,12 +30,13 @@ | |||
30 | * 1C(%esp) - %ds | 30 | * 1C(%esp) - %ds |
31 | * 20(%esp) - %es | 31 | * 20(%esp) - %es |
32 | * 24(%esp) - %fs | 32 | * 24(%esp) - %fs |
33 | * 28(%esp) - orig_eax | 33 | * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS |
34 | * 2C(%esp) - %eip | 34 | * 2C(%esp) - orig_eax |
35 | * 30(%esp) - %cs | 35 | * 30(%esp) - %eip |
36 | * 34(%esp) - %eflags | 36 | * 34(%esp) - %cs |
37 | * 38(%esp) - %oldesp | 37 | * 38(%esp) - %eflags |
38 | * 3C(%esp) - %oldss | 38 | * 3C(%esp) - %oldesp |
39 | * 40(%esp) - %oldss | ||
39 | * | 40 | * |
40 | * "current" is in register %ebx during any slow entries. | 41 | * "current" is in register %ebx during any slow entries. |
41 | */ | 42 | */ |
@@ -101,121 +102,221 @@ | |||
101 | #define resume_userspace_sig resume_userspace | 102 | #define resume_userspace_sig resume_userspace |
102 | #endif | 103 | #endif |
103 | 104 | ||
104 | #define SAVE_ALL \ | 105 | /* |
105 | cld; \ | 106 | * User gs save/restore |
106 | pushl %fs; \ | 107 | * |
107 | CFI_ADJUST_CFA_OFFSET 4;\ | 108 | * %gs is used for userland TLS and kernel only uses it for stack |
108 | /*CFI_REL_OFFSET fs, 0;*/\ | 109 | * canary which is required to be at %gs:20 by gcc. Read the comment |
109 | pushl %es; \ | 110 | * at the top of stackprotector.h for more info. |
110 | CFI_ADJUST_CFA_OFFSET 4;\ | 111 | * |
111 | /*CFI_REL_OFFSET es, 0;*/\ | 112 | * Local labels 98 and 99 are used. |
112 | pushl %ds; \ | 113 | */ |
113 | CFI_ADJUST_CFA_OFFSET 4;\ | 114 | #ifdef CONFIG_X86_32_LAZY_GS |
114 | /*CFI_REL_OFFSET ds, 0;*/\ | 115 | |
115 | pushl %eax; \ | 116 | /* unfortunately push/pop can't be no-op */ |
116 | CFI_ADJUST_CFA_OFFSET 4;\ | 117 | .macro PUSH_GS |
117 | CFI_REL_OFFSET eax, 0;\ | 118 | pushl $0 |
118 | pushl %ebp; \ | 119 | CFI_ADJUST_CFA_OFFSET 4 |
119 | CFI_ADJUST_CFA_OFFSET 4;\ | 120 | .endm |
120 | CFI_REL_OFFSET ebp, 0;\ | 121 | .macro POP_GS pop=0 |
121 | pushl %edi; \ | 122 | addl $(4 + \pop), %esp |
122 | CFI_ADJUST_CFA_OFFSET 4;\ | 123 | CFI_ADJUST_CFA_OFFSET -(4 + \pop) |
123 | CFI_REL_OFFSET edi, 0;\ | 124 | .endm |
124 | pushl %esi; \ | 125 | .macro POP_GS_EX |
125 | CFI_ADJUST_CFA_OFFSET 4;\ | 126 | .endm |
126 | CFI_REL_OFFSET esi, 0;\ | 127 | |
127 | pushl %edx; \ | 128 | /* all the rest are no-op */ |
128 | CFI_ADJUST_CFA_OFFSET 4;\ | 129 | .macro PTGS_TO_GS |
129 | CFI_REL_OFFSET edx, 0;\ | 130 | .endm |
130 | pushl %ecx; \ | 131 | .macro PTGS_TO_GS_EX |
131 | CFI_ADJUST_CFA_OFFSET 4;\ | 132 | .endm |
132 | CFI_REL_OFFSET ecx, 0;\ | 133 | .macro GS_TO_REG reg |
133 | pushl %ebx; \ | 134 | .endm |
134 | CFI_ADJUST_CFA_OFFSET 4;\ | 135 | .macro REG_TO_PTGS reg |
135 | CFI_REL_OFFSET ebx, 0;\ | 136 | .endm |
136 | movl $(__USER_DS), %edx; \ | 137 | .macro SET_KERNEL_GS reg |
137 | movl %edx, %ds; \ | 138 | .endm |
138 | movl %edx, %es; \ | 139 | |
139 | movl $(__KERNEL_PERCPU), %edx; \ | 140 | #else /* CONFIG_X86_32_LAZY_GS */ |
141 | |||
142 | .macro PUSH_GS | ||
143 | pushl %gs | ||
144 | CFI_ADJUST_CFA_OFFSET 4 | ||
145 | /*CFI_REL_OFFSET gs, 0*/ | ||
146 | .endm | ||
147 | |||
148 | .macro POP_GS pop=0 | ||
149 | 98: popl %gs | ||
150 | CFI_ADJUST_CFA_OFFSET -4 | ||
151 | /*CFI_RESTORE gs*/ | ||
152 | .if \pop <> 0 | ||
153 | add $\pop, %esp | ||
154 | CFI_ADJUST_CFA_OFFSET -\pop | ||
155 | .endif | ||
156 | .endm | ||
157 | .macro POP_GS_EX | ||
158 | .pushsection .fixup, "ax" | ||
159 | 99: movl $0, (%esp) | ||
160 | jmp 98b | ||
161 | .section __ex_table, "a" | ||
162 | .align 4 | ||
163 | .long 98b, 99b | ||
164 | .popsection | ||
165 | .endm | ||
166 | |||
167 | .macro PTGS_TO_GS | ||
168 | 98: mov PT_GS(%esp), %gs | ||
169 | .endm | ||
170 | .macro PTGS_TO_GS_EX | ||
171 | .pushsection .fixup, "ax" | ||
172 | 99: movl $0, PT_GS(%esp) | ||
173 | jmp 98b | ||
174 | .section __ex_table, "a" | ||
175 | .align 4 | ||
176 | .long 98b, 99b | ||
177 | .popsection | ||
178 | .endm | ||
179 | |||
180 | .macro GS_TO_REG reg | ||
181 | movl %gs, \reg | ||
182 | /*CFI_REGISTER gs, \reg*/ | ||
183 | .endm | ||
184 | .macro REG_TO_PTGS reg | ||
185 | movl \reg, PT_GS(%esp) | ||
186 | /*CFI_REL_OFFSET gs, PT_GS*/ | ||
187 | .endm | ||
188 | .macro SET_KERNEL_GS reg | ||
189 | movl $(__KERNEL_STACK_CANARY), \reg | ||
190 | movl \reg, %gs | ||
191 | .endm | ||
192 | |||
193 | #endif /* CONFIG_X86_32_LAZY_GS */ | ||
194 | |||
195 | .macro SAVE_ALL | ||
196 | cld | ||
197 | PUSH_GS | ||
198 | pushl %fs | ||
199 | CFI_ADJUST_CFA_OFFSET 4 | ||
200 | /*CFI_REL_OFFSET fs, 0;*/ | ||
201 | pushl %es | ||
202 | CFI_ADJUST_CFA_OFFSET 4 | ||
203 | /*CFI_REL_OFFSET es, 0;*/ | ||
204 | pushl %ds | ||
205 | CFI_ADJUST_CFA_OFFSET 4 | ||
206 | /*CFI_REL_OFFSET ds, 0;*/ | ||
207 | pushl %eax | ||
208 | CFI_ADJUST_CFA_OFFSET 4 | ||
209 | CFI_REL_OFFSET eax, 0 | ||
210 | pushl %ebp | ||
211 | CFI_ADJUST_CFA_OFFSET 4 | ||
212 | CFI_REL_OFFSET ebp, 0 | ||
213 | pushl %edi | ||
214 | CFI_ADJUST_CFA_OFFSET 4 | ||
215 | CFI_REL_OFFSET edi, 0 | ||
216 | pushl %esi | ||
217 | CFI_ADJUST_CFA_OFFSET 4 | ||
218 | CFI_REL_OFFSET esi, 0 | ||
219 | pushl %edx | ||
220 | CFI_ADJUST_CFA_OFFSET 4 | ||
221 | CFI_REL_OFFSET edx, 0 | ||
222 | pushl %ecx | ||
223 | CFI_ADJUST_CFA_OFFSET 4 | ||
224 | CFI_REL_OFFSET ecx, 0 | ||
225 | pushl %ebx | ||
226 | CFI_ADJUST_CFA_OFFSET 4 | ||
227 | CFI_REL_OFFSET ebx, 0 | ||
228 | movl $(__USER_DS), %edx | ||
229 | movl %edx, %ds | ||
230 | movl %edx, %es | ||
231 | movl $(__KERNEL_PERCPU), %edx | ||
140 | movl %edx, %fs | 232 | movl %edx, %fs |
233 | SET_KERNEL_GS %edx | ||
234 | .endm | ||
141 | 235 | ||
142 | #define RESTORE_INT_REGS \ | 236 | .macro RESTORE_INT_REGS |
143 | popl %ebx; \ | 237 | popl %ebx |
144 | CFI_ADJUST_CFA_OFFSET -4;\ | 238 | CFI_ADJUST_CFA_OFFSET -4 |
145 | CFI_RESTORE ebx;\ | 239 | CFI_RESTORE ebx |
146 | popl %ecx; \ | 240 | popl %ecx |
147 | CFI_ADJUST_CFA_OFFSET -4;\ | 241 | CFI_ADJUST_CFA_OFFSET -4 |
148 | CFI_RESTORE ecx;\ | 242 | CFI_RESTORE ecx |
149 | popl %edx; \ | 243 | popl %edx |
150 | CFI_ADJUST_CFA_OFFSET -4;\ | 244 | CFI_ADJUST_CFA_OFFSET -4 |
151 | CFI_RESTORE edx;\ | 245 | CFI_RESTORE edx |
152 | popl %esi; \ | 246 | popl %esi |
153 | CFI_ADJUST_CFA_OFFSET -4;\ | 247 | CFI_ADJUST_CFA_OFFSET -4 |
154 | CFI_RESTORE esi;\ | 248 | CFI_RESTORE esi |
155 | popl %edi; \ | 249 | popl %edi |
156 | CFI_ADJUST_CFA_OFFSET -4;\ | 250 | CFI_ADJUST_CFA_OFFSET -4 |
157 | CFI_RESTORE edi;\ | 251 | CFI_RESTORE edi |
158 | popl %ebp; \ | 252 | popl %ebp |
159 | CFI_ADJUST_CFA_OFFSET -4;\ | 253 | CFI_ADJUST_CFA_OFFSET -4 |
160 | CFI_RESTORE ebp;\ | 254 | CFI_RESTORE ebp |
161 | popl %eax; \ | 255 | popl %eax |
162 | CFI_ADJUST_CFA_OFFSET -4;\ | 256 | CFI_ADJUST_CFA_OFFSET -4 |
163 | CFI_RESTORE eax | 257 | CFI_RESTORE eax |
258 | .endm | ||
164 | 259 | ||
165 | #define RESTORE_REGS \ | 260 | .macro RESTORE_REGS pop=0 |
166 | RESTORE_INT_REGS; \ | 261 | RESTORE_INT_REGS |
167 | 1: popl %ds; \ | 262 | 1: popl %ds |
168 | CFI_ADJUST_CFA_OFFSET -4;\ | 263 | CFI_ADJUST_CFA_OFFSET -4 |
169 | /*CFI_RESTORE ds;*/\ | 264 | /*CFI_RESTORE ds;*/ |
170 | 2: popl %es; \ | 265 | 2: popl %es |
171 | CFI_ADJUST_CFA_OFFSET -4;\ | 266 | CFI_ADJUST_CFA_OFFSET -4 |
172 | /*CFI_RESTORE es;*/\ | 267 | /*CFI_RESTORE es;*/ |
173 | 3: popl %fs; \ | 268 | 3: popl %fs |
174 | CFI_ADJUST_CFA_OFFSET -4;\ | 269 | CFI_ADJUST_CFA_OFFSET -4 |
175 | /*CFI_RESTORE fs;*/\ | 270 | /*CFI_RESTORE fs;*/ |
176 | .pushsection .fixup,"ax"; \ | 271 | POP_GS \pop |
177 | 4: movl $0,(%esp); \ | 272 | .pushsection .fixup, "ax" |
178 | jmp 1b; \ | 273 | 4: movl $0, (%esp) |
179 | 5: movl $0,(%esp); \ | 274 | jmp 1b |
180 | jmp 2b; \ | 275 | 5: movl $0, (%esp) |
181 | 6: movl $0,(%esp); \ | 276 | jmp 2b |
182 | jmp 3b; \ | 277 | 6: movl $0, (%esp) |
183 | .section __ex_table,"a";\ | 278 | jmp 3b |
184 | .align 4; \ | 279 | .section __ex_table, "a" |
185 | .long 1b,4b; \ | 280 | .align 4 |
186 | .long 2b,5b; \ | 281 | .long 1b, 4b |
187 | .long 3b,6b; \ | 282 | .long 2b, 5b |
283 | .long 3b, 6b | ||
188 | .popsection | 284 | .popsection |
285 | POP_GS_EX | ||
286 | .endm | ||
189 | 287 | ||
190 | #define RING0_INT_FRAME \ | 288 | .macro RING0_INT_FRAME |
191 | CFI_STARTPROC simple;\ | 289 | CFI_STARTPROC simple |
192 | CFI_SIGNAL_FRAME;\ | 290 | CFI_SIGNAL_FRAME |
193 | CFI_DEF_CFA esp, 3*4;\ | 291 | CFI_DEF_CFA esp, 3*4 |
194 | /*CFI_OFFSET cs, -2*4;*/\ | 292 | /*CFI_OFFSET cs, -2*4;*/ |
195 | CFI_OFFSET eip, -3*4 | 293 | CFI_OFFSET eip, -3*4 |
294 | .endm | ||
196 | 295 | ||
197 | #define RING0_EC_FRAME \ | 296 | .macro RING0_EC_FRAME |
198 | CFI_STARTPROC simple;\ | 297 | CFI_STARTPROC simple |
199 | CFI_SIGNAL_FRAME;\ | 298 | CFI_SIGNAL_FRAME |
200 | CFI_DEF_CFA esp, 4*4;\ | 299 | CFI_DEF_CFA esp, 4*4 |
201 | /*CFI_OFFSET cs, -2*4;*/\ | 300 | /*CFI_OFFSET cs, -2*4;*/ |
202 | CFI_OFFSET eip, -3*4 | 301 | CFI_OFFSET eip, -3*4 |
302 | .endm | ||
203 | 303 | ||
204 | #define RING0_PTREGS_FRAME \ | 304 | .macro RING0_PTREGS_FRAME |
205 | CFI_STARTPROC simple;\ | 305 | CFI_STARTPROC simple |
206 | CFI_SIGNAL_FRAME;\ | 306 | CFI_SIGNAL_FRAME |
207 | CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ | 307 | CFI_DEF_CFA esp, PT_OLDESP-PT_EBX |
208 | /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ | 308 | /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/ |
209 | CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ | 309 | CFI_OFFSET eip, PT_EIP-PT_OLDESP |
210 | /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ | 310 | /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/ |
211 | /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ | 311 | /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/ |
212 | CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ | 312 | CFI_OFFSET eax, PT_EAX-PT_OLDESP |
213 | CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ | 313 | CFI_OFFSET ebp, PT_EBP-PT_OLDESP |
214 | CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ | 314 | CFI_OFFSET edi, PT_EDI-PT_OLDESP |
215 | CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ | 315 | CFI_OFFSET esi, PT_ESI-PT_OLDESP |
216 | CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ | 316 | CFI_OFFSET edx, PT_EDX-PT_OLDESP |
217 | CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ | 317 | CFI_OFFSET ecx, PT_ECX-PT_OLDESP |
218 | CFI_OFFSET ebx, PT_EBX-PT_OLDESP | 318 | CFI_OFFSET ebx, PT_EBX-PT_OLDESP |
319 | .endm | ||
219 | 320 | ||
220 | ENTRY(ret_from_fork) | 321 | ENTRY(ret_from_fork) |
221 | CFI_STARTPROC | 322 | CFI_STARTPROC |
@@ -362,6 +463,7 @@ sysenter_exit: | |||
362 | xorl %ebp,%ebp | 463 | xorl %ebp,%ebp |
363 | TRACE_IRQS_ON | 464 | TRACE_IRQS_ON |
364 | 1: mov PT_FS(%esp), %fs | 465 | 1: mov PT_FS(%esp), %fs |
466 | PTGS_TO_GS | ||
365 | ENABLE_INTERRUPTS_SYSEXIT | 467 | ENABLE_INTERRUPTS_SYSEXIT |
366 | 468 | ||
367 | #ifdef CONFIG_AUDITSYSCALL | 469 | #ifdef CONFIG_AUDITSYSCALL |
@@ -410,6 +512,7 @@ sysexit_audit: | |||
410 | .align 4 | 512 | .align 4 |
411 | .long 1b,2b | 513 | .long 1b,2b |
412 | .popsection | 514 | .popsection |
515 | PTGS_TO_GS_EX | ||
413 | ENDPROC(ia32_sysenter_target) | 516 | ENDPROC(ia32_sysenter_target) |
414 | 517 | ||
415 | # system call handler stub | 518 | # system call handler stub |
@@ -452,8 +555,7 @@ restore_all: | |||
452 | restore_nocheck: | 555 | restore_nocheck: |
453 | TRACE_IRQS_IRET | 556 | TRACE_IRQS_IRET |
454 | restore_nocheck_notrace: | 557 | restore_nocheck_notrace: |
455 | RESTORE_REGS | 558 | RESTORE_REGS 4 # skip orig_eax/error_code |
456 | addl $4, %esp # skip orig_eax/error_code | ||
457 | CFI_ADJUST_CFA_OFFSET -4 | 559 | CFI_ADJUST_CFA_OFFSET -4 |
458 | irq_return: | 560 | irq_return: |
459 | INTERRUPT_RETURN | 561 | INTERRUPT_RETURN |
@@ -595,28 +697,50 @@ syscall_badsys: | |||
595 | END(syscall_badsys) | 697 | END(syscall_badsys) |
596 | CFI_ENDPROC | 698 | CFI_ENDPROC |
597 | 699 | ||
598 | #define FIXUP_ESPFIX_STACK \ | 700 | /* |
599 | /* since we are on a wrong stack, we cant make it a C code :( */ \ | 701 | * System calls that need a pt_regs pointer. |
600 | PER_CPU(gdt_page, %ebx); \ | 702 | */ |
601 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ | 703 | #define PTREGSCALL(name) \ |
602 | addl %esp, %eax; \ | 704 | ALIGN; \ |
603 | pushl $__KERNEL_DS; \ | 705 | ptregs_##name: \ |
604 | CFI_ADJUST_CFA_OFFSET 4; \ | 706 | leal 4(%esp),%eax; \ |
605 | pushl %eax; \ | 707 | jmp sys_##name; |
606 | CFI_ADJUST_CFA_OFFSET 4; \ | 708 | |
607 | lss (%esp), %esp; \ | 709 | PTREGSCALL(iopl) |
608 | CFI_ADJUST_CFA_OFFSET -8; | 710 | PTREGSCALL(fork) |
609 | #define UNWIND_ESPFIX_STACK \ | 711 | PTREGSCALL(clone) |
610 | movl %ss, %eax; \ | 712 | PTREGSCALL(vfork) |
611 | /* see if on espfix stack */ \ | 713 | PTREGSCALL(execve) |
612 | cmpw $__ESPFIX_SS, %ax; \ | 714 | PTREGSCALL(sigaltstack) |
613 | jne 27f; \ | 715 | PTREGSCALL(sigreturn) |
614 | movl $__KERNEL_DS, %eax; \ | 716 | PTREGSCALL(rt_sigreturn) |
615 | movl %eax, %ds; \ | 717 | PTREGSCALL(vm86) |
616 | movl %eax, %es; \ | 718 | PTREGSCALL(vm86old) |
617 | /* switch to normal stack */ \ | 719 | |
618 | FIXUP_ESPFIX_STACK; \ | 720 | .macro FIXUP_ESPFIX_STACK |
619 | 27:; | 721 | /* since we are on a wrong stack, we cant make it a C code :( */ |
722 | PER_CPU(gdt_page, %ebx) | ||
723 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) | ||
724 | addl %esp, %eax | ||
725 | pushl $__KERNEL_DS | ||
726 | CFI_ADJUST_CFA_OFFSET 4 | ||
727 | pushl %eax | ||
728 | CFI_ADJUST_CFA_OFFSET 4 | ||
729 | lss (%esp), %esp | ||
730 | CFI_ADJUST_CFA_OFFSET -8 | ||
731 | .endm | ||
732 | .macro UNWIND_ESPFIX_STACK | ||
733 | movl %ss, %eax | ||
734 | /* see if on espfix stack */ | ||
735 | cmpw $__ESPFIX_SS, %ax | ||
736 | jne 27f | ||
737 | movl $__KERNEL_DS, %eax | ||
738 | movl %eax, %ds | ||
739 | movl %eax, %es | ||
740 | /* switch to normal stack */ | ||
741 | FIXUP_ESPFIX_STACK | ||
742 | 27: | ||
743 | .endm | ||
620 | 744 | ||
621 | /* | 745 | /* |
622 | * Build the entry stubs and pointer table with some assembler magic. | 746 | * Build the entry stubs and pointer table with some assembler magic. |
@@ -1070,7 +1194,10 @@ ENTRY(page_fault) | |||
1070 | CFI_ADJUST_CFA_OFFSET 4 | 1194 | CFI_ADJUST_CFA_OFFSET 4 |
1071 | ALIGN | 1195 | ALIGN |
1072 | error_code: | 1196 | error_code: |
1073 | /* the function address is in %fs's slot on the stack */ | 1197 | /* the function address is in %gs's slot on the stack */ |
1198 | pushl %fs | ||
1199 | CFI_ADJUST_CFA_OFFSET 4 | ||
1200 | /*CFI_REL_OFFSET fs, 0*/ | ||
1074 | pushl %es | 1201 | pushl %es |
1075 | CFI_ADJUST_CFA_OFFSET 4 | 1202 | CFI_ADJUST_CFA_OFFSET 4 |
1076 | /*CFI_REL_OFFSET es, 0*/ | 1203 | /*CFI_REL_OFFSET es, 0*/ |
@@ -1099,20 +1226,15 @@ error_code: | |||
1099 | CFI_ADJUST_CFA_OFFSET 4 | 1226 | CFI_ADJUST_CFA_OFFSET 4 |
1100 | CFI_REL_OFFSET ebx, 0 | 1227 | CFI_REL_OFFSET ebx, 0 |
1101 | cld | 1228 | cld |
1102 | pushl %fs | ||
1103 | CFI_ADJUST_CFA_OFFSET 4 | ||
1104 | /*CFI_REL_OFFSET fs, 0*/ | ||
1105 | movl $(__KERNEL_PERCPU), %ecx | 1229 | movl $(__KERNEL_PERCPU), %ecx |
1106 | movl %ecx, %fs | 1230 | movl %ecx, %fs |
1107 | UNWIND_ESPFIX_STACK | 1231 | UNWIND_ESPFIX_STACK |
1108 | popl %ecx | 1232 | GS_TO_REG %ecx |
1109 | CFI_ADJUST_CFA_OFFSET -4 | 1233 | movl PT_GS(%esp), %edi # get the function address |
1110 | /*CFI_REGISTER es, ecx*/ | ||
1111 | movl PT_FS(%esp), %edi # get the function address | ||
1112 | movl PT_ORIG_EAX(%esp), %edx # get the error code | 1234 | movl PT_ORIG_EAX(%esp), %edx # get the error code |
1113 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | 1235 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart |
1114 | mov %ecx, PT_FS(%esp) | 1236 | REG_TO_PTGS %ecx |
1115 | /*CFI_REL_OFFSET fs, ES*/ | 1237 | SET_KERNEL_GS %ecx |
1116 | movl $(__USER_DS), %ecx | 1238 | movl $(__USER_DS), %ecx |
1117 | movl %ecx, %ds | 1239 | movl %ecx, %ds |
1118 | movl %ecx, %es | 1240 | movl %ecx, %es |
@@ -1136,26 +1258,27 @@ END(page_fault) | |||
1136 | * by hand onto the new stack - while updating the return eip past | 1258 | * by hand onto the new stack - while updating the return eip past |
1137 | * the instruction that would have done it for sysenter. | 1259 | * the instruction that would have done it for sysenter. |
1138 | */ | 1260 | */ |
1139 | #define FIX_STACK(offset, ok, label) \ | 1261 | .macro FIX_STACK offset ok label |
1140 | cmpw $__KERNEL_CS,4(%esp); \ | 1262 | cmpw $__KERNEL_CS, 4(%esp) |
1141 | jne ok; \ | 1263 | jne \ok |
1142 | label: \ | 1264 | \label: |
1143 | movl TSS_sysenter_sp0+offset(%esp),%esp; \ | 1265 | movl TSS_sysenter_sp0 + \offset(%esp), %esp |
1144 | CFI_DEF_CFA esp, 0; \ | 1266 | CFI_DEF_CFA esp, 0 |
1145 | CFI_UNDEFINED eip; \ | 1267 | CFI_UNDEFINED eip |
1146 | pushfl; \ | 1268 | pushfl |
1147 | CFI_ADJUST_CFA_OFFSET 4; \ | 1269 | CFI_ADJUST_CFA_OFFSET 4 |
1148 | pushl $__KERNEL_CS; \ | 1270 | pushl $__KERNEL_CS |
1149 | CFI_ADJUST_CFA_OFFSET 4; \ | 1271 | CFI_ADJUST_CFA_OFFSET 4 |
1150 | pushl $sysenter_past_esp; \ | 1272 | pushl $sysenter_past_esp |
1151 | CFI_ADJUST_CFA_OFFSET 4; \ | 1273 | CFI_ADJUST_CFA_OFFSET 4 |
1152 | CFI_REL_OFFSET eip, 0 | 1274 | CFI_REL_OFFSET eip, 0 |
1275 | .endm | ||
1153 | 1276 | ||
1154 | ENTRY(debug) | 1277 | ENTRY(debug) |
1155 | RING0_INT_FRAME | 1278 | RING0_INT_FRAME |
1156 | cmpl $ia32_sysenter_target,(%esp) | 1279 | cmpl $ia32_sysenter_target,(%esp) |
1157 | jne debug_stack_correct | 1280 | jne debug_stack_correct |
1158 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) | 1281 | FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn |
1159 | debug_stack_correct: | 1282 | debug_stack_correct: |
1160 | pushl $-1 # mark this as an int | 1283 | pushl $-1 # mark this as an int |
1161 | CFI_ADJUST_CFA_OFFSET 4 | 1284 | CFI_ADJUST_CFA_OFFSET 4 |
@@ -1213,7 +1336,7 @@ nmi_stack_correct: | |||
1213 | 1336 | ||
1214 | nmi_stack_fixup: | 1337 | nmi_stack_fixup: |
1215 | RING0_INT_FRAME | 1338 | RING0_INT_FRAME |
1216 | FIX_STACK(12,nmi_stack_correct, 1) | 1339 | FIX_STACK 12, nmi_stack_correct, 1 |
1217 | jmp nmi_stack_correct | 1340 | jmp nmi_stack_correct |
1218 | 1341 | ||
1219 | nmi_debug_stack_check: | 1342 | nmi_debug_stack_check: |
@@ -1224,7 +1347,7 @@ nmi_debug_stack_check: | |||
1224 | jb nmi_stack_correct | 1347 | jb nmi_stack_correct |
1225 | cmpl $debug_esp_fix_insn,(%esp) | 1348 | cmpl $debug_esp_fix_insn,(%esp) |
1226 | ja nmi_stack_correct | 1349 | ja nmi_stack_correct |
1227 | FIX_STACK(24,nmi_stack_correct, 1) | 1350 | FIX_STACK 24, nmi_stack_correct, 1 |
1228 | jmp nmi_stack_correct | 1351 | jmp nmi_stack_correct |
1229 | 1352 | ||
1230 | nmi_espfix_stack: | 1353 | nmi_espfix_stack: |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 722464c520cf..2a0aad7718d5 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <asm/asm-offsets.h> | 19 | #include <asm/asm-offsets.h> |
20 | #include <asm/setup.h> | 20 | #include <asm/setup.h> |
21 | #include <asm/processor-flags.h> | 21 | #include <asm/processor-flags.h> |
22 | #include <asm/percpu.h> | ||
22 | 23 | ||
23 | /* Physical address */ | 24 | /* Physical address */ |
24 | #define pa(X) ((X) - __PAGE_OFFSET) | 25 | #define pa(X) ((X) - __PAGE_OFFSET) |
@@ -437,8 +438,26 @@ is386: movl $2,%ecx # set MP | |||
437 | movl $(__KERNEL_PERCPU), %eax | 438 | movl $(__KERNEL_PERCPU), %eax |
438 | movl %eax,%fs # set this cpu's percpu | 439 | movl %eax,%fs # set this cpu's percpu |
439 | 440 | ||
440 | xorl %eax,%eax # Clear GS and LDT | 441 | #ifdef CONFIG_CC_STACKPROTECTOR |
442 | /* | ||
443 | * The linker can't handle this by relocation. Manually set | ||
444 | * base address in stack canary segment descriptor. | ||
445 | */ | ||
446 | cmpb $0,ready | ||
447 | jne 1f | ||
448 | movl $per_cpu__gdt_page,%eax | ||
449 | movl $per_cpu__stack_canary,%ecx | ||
450 | subl $20, %ecx | ||
451 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) | ||
452 | shrl $16, %ecx | ||
453 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) | ||
454 | movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax) | ||
455 | 1: | ||
456 | #endif | ||
457 | movl $(__KERNEL_STACK_CANARY),%eax | ||
441 | movl %eax,%gs | 458 | movl %eax,%gs |
459 | |||
460 | xorl %eax,%eax # Clear LDT | ||
442 | lldt %ax | 461 | lldt %ax |
443 | 462 | ||
444 | cld # gcc2 wants the direction flag cleared at all times | 463 | cld # gcc2 wants the direction flag cleared at all times |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a0a2b5ca9b7d..2e648e3a5ea4 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -205,19 +205,6 @@ ENTRY(secondary_startup_64) | |||
205 | pushq $0 | 205 | pushq $0 |
206 | popfq | 206 | popfq |
207 | 207 | ||
208 | #ifdef CONFIG_SMP | ||
209 | /* | ||
210 | * Fix up static pointers that need __per_cpu_load added. The assembler | ||
211 | * is unable to do this directly. This is only needed for the boot cpu. | ||
212 | * These values are set up with the correct base addresses by C code for | ||
213 | * secondary cpus. | ||
214 | */ | ||
215 | movq initial_gs(%rip), %rax | ||
216 | cmpl $0, per_cpu__cpu_number(%rax) | ||
217 | jne 1f | ||
218 | addq %rax, early_gdt_descr_base(%rip) | ||
219 | 1: | ||
220 | #endif | ||
221 | /* | 208 | /* |
222 | * We must switch to a new descriptor in kernel space for the GDT | 209 | * We must switch to a new descriptor in kernel space for the GDT |
223 | * because soon the kernel won't have access anymore to the userspace | 210 | * because soon the kernel won't have access anymore to the userspace |
@@ -275,11 +262,7 @@ ENTRY(secondary_startup_64) | |||
275 | ENTRY(initial_code) | 262 | ENTRY(initial_code) |
276 | .quad x86_64_start_kernel | 263 | .quad x86_64_start_kernel |
277 | ENTRY(initial_gs) | 264 | ENTRY(initial_gs) |
278 | #ifdef CONFIG_SMP | 265 | .quad INIT_PER_CPU_VAR(irq_stack_union) |
279 | .quad __per_cpu_load | ||
280 | #else | ||
281 | .quad PER_CPU_VAR(irq_stack_union) | ||
282 | #endif | ||
283 | __FINITDATA | 266 | __FINITDATA |
284 | 267 | ||
285 | ENTRY(stack_start) | 268 | ENTRY(stack_start) |
@@ -425,7 +408,7 @@ NEXT_PAGE(level2_spare_pgt) | |||
425 | early_gdt_descr: | 408 | early_gdt_descr: |
426 | .word GDT_ENTRIES*8-1 | 409 | .word GDT_ENTRIES*8-1 |
427 | early_gdt_descr_base: | 410 | early_gdt_descr_base: |
428 | .quad per_cpu__gdt_page | 411 | .quad INIT_PER_CPU_VAR(gdt_page) |
429 | 412 | ||
430 | ENTRY(phys_base) | 413 | ENTRY(phys_base) |
431 | /* This must match the first entry in level2_kernel_pgt */ | 414 | /* This must match the first entry in level2_kernel_pgt */ |
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index b12208f4dfee..e41980a373ab 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c | |||
@@ -131,9 +131,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) | |||
131 | } | 131 | } |
132 | 132 | ||
133 | #ifdef CONFIG_X86_32 | 133 | #ifdef CONFIG_X86_32 |
134 | asmlinkage long sys_iopl(unsigned long regsp) | 134 | long sys_iopl(struct pt_regs *regs) |
135 | { | 135 | { |
136 | struct pt_regs *regs = (struct pt_regs *)®sp; | ||
137 | unsigned int level = regs->bx; | 136 | unsigned int level = regs->bx; |
138 | struct thread_struct *t = ¤t->thread; | 137 | struct thread_struct *t = ¤t->thread; |
139 | int rc; | 138 | int rc; |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 1a1ae8edc40c..fec79ad85dc6 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | #include <stdarg.h> | 12 | #include <stdarg.h> |
13 | 13 | ||
14 | #include <linux/stackprotector.h> | ||
14 | #include <linux/cpu.h> | 15 | #include <linux/cpu.h> |
15 | #include <linux/errno.h> | 16 | #include <linux/errno.h> |
16 | #include <linux/sched.h> | 17 | #include <linux/sched.h> |
@@ -91,6 +92,15 @@ void cpu_idle(void) | |||
91 | { | 92 | { |
92 | int cpu = smp_processor_id(); | 93 | int cpu = smp_processor_id(); |
93 | 94 | ||
95 | /* | ||
96 | * If we're the non-boot CPU, nothing set the stack canary up | ||
97 | * for us. CPU0 already has it initialized but no harm in | ||
98 | * doing it again. This is a good place for updating it, as | ||
99 | * we wont ever return from this function (so the invalid | ||
100 | * canaries already on the stack wont ever trigger). | ||
101 | */ | ||
102 | boot_init_stack_canary(); | ||
103 | |||
94 | current_thread_info()->status |= TS_POLLING; | 104 | current_thread_info()->status |= TS_POLLING; |
95 | 105 | ||
96 | /* endless idle loop with no priority at all */ | 106 | /* endless idle loop with no priority at all */ |
@@ -131,7 +141,7 @@ void __show_regs(struct pt_regs *regs, int all) | |||
131 | if (user_mode_vm(regs)) { | 141 | if (user_mode_vm(regs)) { |
132 | sp = regs->sp; | 142 | sp = regs->sp; |
133 | ss = regs->ss & 0xffff; | 143 | ss = regs->ss & 0xffff; |
134 | savesegment(gs, gs); | 144 | gs = get_user_gs(regs); |
135 | } else { | 145 | } else { |
136 | sp = (unsigned long) (®s->sp); | 146 | sp = (unsigned long) (®s->sp); |
137 | savesegment(ss, ss); | 147 | savesegment(ss, ss); |
@@ -212,6 +222,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | |||
212 | regs.ds = __USER_DS; | 222 | regs.ds = __USER_DS; |
213 | regs.es = __USER_DS; | 223 | regs.es = __USER_DS; |
214 | regs.fs = __KERNEL_PERCPU; | 224 | regs.fs = __KERNEL_PERCPU; |
225 | regs.gs = __KERNEL_STACK_CANARY; | ||
215 | regs.orig_ax = -1; | 226 | regs.orig_ax = -1; |
216 | regs.ip = (unsigned long) kernel_thread_helper; | 227 | regs.ip = (unsigned long) kernel_thread_helper; |
217 | regs.cs = __KERNEL_CS | get_kernel_rpl(); | 228 | regs.cs = __KERNEL_CS | get_kernel_rpl(); |
@@ -304,7 +315,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
304 | 315 | ||
305 | p->thread.ip = (unsigned long) ret_from_fork; | 316 | p->thread.ip = (unsigned long) ret_from_fork; |
306 | 317 | ||
307 | savesegment(gs, p->thread.gs); | 318 | task_user_gs(p) = get_user_gs(regs); |
308 | 319 | ||
309 | tsk = current; | 320 | tsk = current; |
310 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 321 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
@@ -342,7 +353,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
342 | void | 353 | void |
343 | start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | 354 | start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) |
344 | { | 355 | { |
345 | __asm__("movl %0, %%gs" : : "r"(0)); | 356 | set_user_gs(regs, 0); |
346 | regs->fs = 0; | 357 | regs->fs = 0; |
347 | set_fs(USER_DS); | 358 | set_fs(USER_DS); |
348 | regs->ds = __USER_DS; | 359 | regs->ds = __USER_DS; |
@@ -539,7 +550,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
539 | * used %fs or %gs (it does not today), or if the kernel is | 550 | * used %fs or %gs (it does not today), or if the kernel is |
540 | * running inside of a hypervisor layer. | 551 | * running inside of a hypervisor layer. |
541 | */ | 552 | */ |
542 | savesegment(gs, prev->gs); | 553 | lazy_save_gs(prev->gs); |
543 | 554 | ||
544 | /* | 555 | /* |
545 | * Load the per-thread Thread-Local Storage descriptor. | 556 | * Load the per-thread Thread-Local Storage descriptor. |
@@ -585,31 +596,31 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
585 | * Restore %gs if needed (which is common) | 596 | * Restore %gs if needed (which is common) |
586 | */ | 597 | */ |
587 | if (prev->gs | next->gs) | 598 | if (prev->gs | next->gs) |
588 | loadsegment(gs, next->gs); | 599 | lazy_load_gs(next->gs); |
589 | 600 | ||
590 | percpu_write(current_task, next_p); | 601 | percpu_write(current_task, next_p); |
591 | 602 | ||
592 | return prev_p; | 603 | return prev_p; |
593 | } | 604 | } |
594 | 605 | ||
595 | asmlinkage int sys_fork(struct pt_regs regs) | 606 | int sys_fork(struct pt_regs *regs) |
596 | { | 607 | { |
597 | return do_fork(SIGCHLD, regs.sp, ®s, 0, NULL, NULL); | 608 | return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); |
598 | } | 609 | } |
599 | 610 | ||
600 | asmlinkage int sys_clone(struct pt_regs regs) | 611 | int sys_clone(struct pt_regs *regs) |
601 | { | 612 | { |
602 | unsigned long clone_flags; | 613 | unsigned long clone_flags; |
603 | unsigned long newsp; | 614 | unsigned long newsp; |
604 | int __user *parent_tidptr, *child_tidptr; | 615 | int __user *parent_tidptr, *child_tidptr; |
605 | 616 | ||
606 | clone_flags = regs.bx; | 617 | clone_flags = regs->bx; |
607 | newsp = regs.cx; | 618 | newsp = regs->cx; |
608 | parent_tidptr = (int __user *)regs.dx; | 619 | parent_tidptr = (int __user *)regs->dx; |
609 | child_tidptr = (int __user *)regs.di; | 620 | child_tidptr = (int __user *)regs->di; |
610 | if (!newsp) | 621 | if (!newsp) |
611 | newsp = regs.sp; | 622 | newsp = regs->sp; |
612 | return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr); | 623 | return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); |
613 | } | 624 | } |
614 | 625 | ||
615 | /* | 626 | /* |
@@ -622,27 +633,27 @@ asmlinkage int sys_clone(struct pt_regs regs) | |||
622 | * do not have enough call-clobbered registers to hold all | 633 | * do not have enough call-clobbered registers to hold all |
623 | * the information you need. | 634 | * the information you need. |
624 | */ | 635 | */ |
625 | asmlinkage int sys_vfork(struct pt_regs regs) | 636 | int sys_vfork(struct pt_regs *regs) |
626 | { | 637 | { |
627 | return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, ®s, 0, NULL, NULL); | 638 | return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL); |
628 | } | 639 | } |
629 | 640 | ||
630 | /* | 641 | /* |
631 | * sys_execve() executes a new program. | 642 | * sys_execve() executes a new program. |
632 | */ | 643 | */ |
633 | asmlinkage int sys_execve(struct pt_regs regs) | 644 | int sys_execve(struct pt_regs *regs) |
634 | { | 645 | { |
635 | int error; | 646 | int error; |
636 | char *filename; | 647 | char *filename; |
637 | 648 | ||
638 | filename = getname((char __user *) regs.bx); | 649 | filename = getname((char __user *) regs->bx); |
639 | error = PTR_ERR(filename); | 650 | error = PTR_ERR(filename); |
640 | if (IS_ERR(filename)) | 651 | if (IS_ERR(filename)) |
641 | goto out; | 652 | goto out; |
642 | error = do_execve(filename, | 653 | error = do_execve(filename, |
643 | (char __user * __user *) regs.cx, | 654 | (char __user * __user *) regs->cx, |
644 | (char __user * __user *) regs.dx, | 655 | (char __user * __user *) regs->dx, |
645 | ®s); | 656 | regs); |
646 | if (error == 0) { | 657 | if (error == 0) { |
647 | /* Make sure we don't return using sysenter.. */ | 658 | /* Make sure we don't return using sysenter.. */ |
648 | set_thread_flag(TIF_IRET); | 659 | set_thread_flag(TIF_IRET); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 8eb169e45584..836ef6575f01 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -120,12 +120,11 @@ void cpu_idle(void) | |||
120 | current_thread_info()->status |= TS_POLLING; | 120 | current_thread_info()->status |= TS_POLLING; |
121 | 121 | ||
122 | /* | 122 | /* |
123 | * If we're the non-boot CPU, nothing set the PDA stack | 123 | * If we're the non-boot CPU, nothing set the stack canary up |
124 | * canary up for us - and if we are the boot CPU we have | 124 | * for us. CPU0 already has it initialized but no harm in |
125 | * a 0 stack canary. This is a good place for updating | 125 | * doing it again. This is a good place for updating it, as |
126 | * it, as we wont ever return from this function (so the | 126 | * we wont ever return from this function (so the invalid |
127 | * invalid canaries already on the stack wont ever | 127 | * canaries already on the stack wont ever trigger). |
128 | * trigger): | ||
129 | */ | 128 | */ |
130 | boot_init_stack_canary(); | 129 | boot_init_stack_canary(); |
131 | 130 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a5df5f82fb9..7ec39ab37a2d 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value) | |||
75 | static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) | 75 | static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) |
76 | { | 76 | { |
77 | BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); | 77 | BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); |
78 | regno >>= 2; | 78 | return ®s->bx + (regno >> 2); |
79 | if (regno > FS) | ||
80 | --regno; | ||
81 | return ®s->bx + regno; | ||
82 | } | 79 | } |
83 | 80 | ||
84 | static u16 get_segment_reg(struct task_struct *task, unsigned long offset) | 81 | static u16 get_segment_reg(struct task_struct *task, unsigned long offset) |
@@ -90,9 +87,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset) | |||
90 | if (offset != offsetof(struct user_regs_struct, gs)) | 87 | if (offset != offsetof(struct user_regs_struct, gs)) |
91 | retval = *pt_regs_access(task_pt_regs(task), offset); | 88 | retval = *pt_regs_access(task_pt_regs(task), offset); |
92 | else { | 89 | else { |
93 | retval = task->thread.gs; | ||
94 | if (task == current) | 90 | if (task == current) |
95 | savesegment(gs, retval); | 91 | retval = get_user_gs(task_pt_regs(task)); |
92 | else | ||
93 | retval = task_user_gs(task); | ||
96 | } | 94 | } |
97 | return retval; | 95 | return retval; |
98 | } | 96 | } |
@@ -126,13 +124,10 @@ static int set_segment_reg(struct task_struct *task, | |||
126 | break; | 124 | break; |
127 | 125 | ||
128 | case offsetof(struct user_regs_struct, gs): | 126 | case offsetof(struct user_regs_struct, gs): |
129 | task->thread.gs = value; | ||
130 | if (task == current) | 127 | if (task == current) |
131 | /* | 128 | set_user_gs(task_pt_regs(task), value); |
132 | * The user-mode %gs is not affected by | 129 | else |
133 | * kernel entry, so we must update the CPU. | 130 | task_user_gs(task) = value; |
134 | */ | ||
135 | loadsegment(gs, value); | ||
136 | } | 131 | } |
137 | 132 | ||
138 | return 0; | 133 | return 0; |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ef91747bbed5..d992e6cff730 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/proto.h> | 16 | #include <asm/proto.h> |
17 | #include <asm/cpumask.h> | 17 | #include <asm/cpumask.h> |
18 | #include <asm/cpu.h> | 18 | #include <asm/cpu.h> |
19 | #include <asm/stackprotector.h> | ||
19 | 20 | ||
20 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 21 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS |
21 | # define DBG(x...) printk(KERN_DEBUG x) | 22 | # define DBG(x...) printk(KERN_DEBUG x) |
@@ -95,6 +96,7 @@ void __init setup_per_cpu_areas(void) | |||
95 | per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); | 96 | per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); |
96 | per_cpu(cpu_number, cpu) = cpu; | 97 | per_cpu(cpu_number, cpu) = cpu; |
97 | setup_percpu_segment(cpu); | 98 | setup_percpu_segment(cpu); |
99 | setup_stack_canary_segment(cpu); | ||
98 | /* | 100 | /* |
99 | * Copy data used in early init routines from the | 101 | * Copy data used in early init routines from the |
100 | * initial arrays to the per cpu data areas. These | 102 | * initial arrays to the per cpu data areas. These |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 7fc78b019815..7cdcd16885ed 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -50,27 +50,23 @@ | |||
50 | # define FIX_EFLAGS __FIX_EFLAGS | 50 | # define FIX_EFLAGS __FIX_EFLAGS |
51 | #endif | 51 | #endif |
52 | 52 | ||
53 | #define COPY(x) { \ | 53 | #define COPY(x) do { \ |
54 | get_user_ex(regs->x, &sc->x); \ | 54 | get_user_ex(regs->x, &sc->x); \ |
55 | } | 55 | } while (0) |
56 | 56 | ||
57 | #define COPY_SEG(seg) { \ | 57 | #define GET_SEG(seg) ({ \ |
58 | unsigned short tmp; \ | 58 | unsigned short tmp; \ |
59 | get_user_ex(tmp, &sc->seg); \ | 59 | get_user_ex(tmp, &sc->seg); \ |
60 | regs->seg = tmp; \ | 60 | tmp; \ |
61 | } | 61 | }) |
62 | 62 | ||
63 | #define COPY_SEG_CPL3(seg) { \ | 63 | #define COPY_SEG(seg) do { \ |
64 | unsigned short tmp; \ | 64 | regs->seg = GET_SEG(seg); \ |
65 | get_user_ex(tmp, &sc->seg); \ | 65 | } while (0) |
66 | regs->seg = tmp | 3; \ | ||
67 | } | ||
68 | 66 | ||
69 | #define GET_SEG(seg) { \ | 67 | #define COPY_SEG_CPL3(seg) do { \ |
70 | unsigned short tmp; \ | 68 | regs->seg = GET_SEG(seg) | 3; \ |
71 | get_user_ex(tmp, &sc->seg); \ | 69 | } while (0) |
72 | loadsegment(seg, tmp); \ | ||
73 | } | ||
74 | 70 | ||
75 | static int | 71 | static int |
76 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | 72 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, |
@@ -86,7 +82,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
86 | get_user_try { | 82 | get_user_try { |
87 | 83 | ||
88 | #ifdef CONFIG_X86_32 | 84 | #ifdef CONFIG_X86_32 |
89 | GET_SEG(gs); | 85 | set_user_gs(regs, GET_SEG(gs)); |
90 | COPY_SEG(fs); | 86 | COPY_SEG(fs); |
91 | COPY_SEG(es); | 87 | COPY_SEG(es); |
92 | COPY_SEG(ds); | 88 | COPY_SEG(ds); |
@@ -138,12 +134,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | |||
138 | put_user_try { | 134 | put_user_try { |
139 | 135 | ||
140 | #ifdef CONFIG_X86_32 | 136 | #ifdef CONFIG_X86_32 |
141 | { | 137 | put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs); |
142 | unsigned int tmp; | ||
143 | |||
144 | savesegment(gs, tmp); | ||
145 | put_user_ex(tmp, (unsigned int __user *)&sc->gs); | ||
146 | } | ||
147 | put_user_ex(regs->fs, (unsigned int __user *)&sc->fs); | 138 | put_user_ex(regs->fs, (unsigned int __user *)&sc->fs); |
148 | put_user_ex(regs->es, (unsigned int __user *)&sc->es); | 139 | put_user_ex(regs->es, (unsigned int __user *)&sc->es); |
149 | put_user_ex(regs->ds, (unsigned int __user *)&sc->ds); | 140 | put_user_ex(regs->ds, (unsigned int __user *)&sc->ds); |
@@ -558,14 +549,9 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, | |||
558 | #endif /* CONFIG_X86_32 */ | 549 | #endif /* CONFIG_X86_32 */ |
559 | 550 | ||
560 | #ifdef CONFIG_X86_32 | 551 | #ifdef CONFIG_X86_32 |
561 | asmlinkage int sys_sigaltstack(unsigned long bx) | 552 | int sys_sigaltstack(struct pt_regs *regs) |
562 | { | 553 | { |
563 | /* | 554 | const stack_t __user *uss = (const stack_t __user *)regs->bx; |
564 | * This is needed to make gcc realize it doesn't own the | ||
565 | * "struct pt_regs" | ||
566 | */ | ||
567 | struct pt_regs *regs = (struct pt_regs *)&bx; | ||
568 | const stack_t __user *uss = (const stack_t __user *)bx; | ||
569 | stack_t __user *uoss = (stack_t __user *)regs->cx; | 555 | stack_t __user *uoss = (stack_t __user *)regs->cx; |
570 | 556 | ||
571 | return do_sigaltstack(uss, uoss, regs->sp); | 557 | return do_sigaltstack(uss, uoss, regs->sp); |
@@ -583,14 +569,12 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | |||
583 | * Do a signal return; undo the signal stack. | 569 | * Do a signal return; undo the signal stack. |
584 | */ | 570 | */ |
585 | #ifdef CONFIG_X86_32 | 571 | #ifdef CONFIG_X86_32 |
586 | asmlinkage unsigned long sys_sigreturn(unsigned long __unused) | 572 | unsigned long sys_sigreturn(struct pt_regs *regs) |
587 | { | 573 | { |
588 | struct sigframe __user *frame; | 574 | struct sigframe __user *frame; |
589 | struct pt_regs *regs; | ||
590 | unsigned long ax; | 575 | unsigned long ax; |
591 | sigset_t set; | 576 | sigset_t set; |
592 | 577 | ||
593 | regs = (struct pt_regs *) &__unused; | ||
594 | frame = (struct sigframe __user *)(regs->sp - 8); | 578 | frame = (struct sigframe __user *)(regs->sp - 8); |
595 | 579 | ||
596 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | 580 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) |
@@ -617,7 +601,7 @@ badframe: | |||
617 | } | 601 | } |
618 | #endif /* CONFIG_X86_32 */ | 602 | #endif /* CONFIG_X86_32 */ |
619 | 603 | ||
620 | static long do_rt_sigreturn(struct pt_regs *regs) | 604 | long sys_rt_sigreturn(struct pt_regs *regs) |
621 | { | 605 | { |
622 | struct rt_sigframe __user *frame; | 606 | struct rt_sigframe __user *frame; |
623 | unsigned long ax; | 607 | unsigned long ax; |
@@ -648,25 +632,6 @@ badframe: | |||
648 | return 0; | 632 | return 0; |
649 | } | 633 | } |
650 | 634 | ||
651 | #ifdef CONFIG_X86_32 | ||
652 | /* | ||
653 | * Note: do not pass in pt_regs directly as with tail-call optimization | ||
654 | * GCC will incorrectly stomp on the caller's frame and corrupt user-space | ||
655 | * register state: | ||
656 | */ | ||
657 | asmlinkage int sys_rt_sigreturn(unsigned long __unused) | ||
658 | { | ||
659 | struct pt_regs *regs = (struct pt_regs *)&__unused; | ||
660 | |||
661 | return do_rt_sigreturn(regs); | ||
662 | } | ||
663 | #else /* !CONFIG_X86_32 */ | ||
664 | asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | ||
665 | { | ||
666 | return do_rt_sigreturn(regs); | ||
667 | } | ||
668 | #endif /* CONFIG_X86_32 */ | ||
669 | |||
670 | /* | 635 | /* |
671 | * OK, we're invoking a handler: | 636 | * OK, we're invoking a handler: |
672 | */ | 637 | */ |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index e2e86a08f31d..3bdb64829b82 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -1,7 +1,7 @@ | |||
1 | ENTRY(sys_call_table) | 1 | ENTRY(sys_call_table) |
2 | .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ | 2 | .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ |
3 | .long sys_exit | 3 | .long sys_exit |
4 | .long sys_fork | 4 | .long ptregs_fork |
5 | .long sys_read | 5 | .long sys_read |
6 | .long sys_write | 6 | .long sys_write |
7 | .long sys_open /* 5 */ | 7 | .long sys_open /* 5 */ |
@@ -10,7 +10,7 @@ ENTRY(sys_call_table) | |||
10 | .long sys_creat | 10 | .long sys_creat |
11 | .long sys_link | 11 | .long sys_link |
12 | .long sys_unlink /* 10 */ | 12 | .long sys_unlink /* 10 */ |
13 | .long sys_execve | 13 | .long ptregs_execve |
14 | .long sys_chdir | 14 | .long sys_chdir |
15 | .long sys_time | 15 | .long sys_time |
16 | .long sys_mknod | 16 | .long sys_mknod |
@@ -109,17 +109,17 @@ ENTRY(sys_call_table) | |||
109 | .long sys_newlstat | 109 | .long sys_newlstat |
110 | .long sys_newfstat | 110 | .long sys_newfstat |
111 | .long sys_uname | 111 | .long sys_uname |
112 | .long sys_iopl /* 110 */ | 112 | .long ptregs_iopl /* 110 */ |
113 | .long sys_vhangup | 113 | .long sys_vhangup |
114 | .long sys_ni_syscall /* old "idle" system call */ | 114 | .long sys_ni_syscall /* old "idle" system call */ |
115 | .long sys_vm86old | 115 | .long ptregs_vm86old |
116 | .long sys_wait4 | 116 | .long sys_wait4 |
117 | .long sys_swapoff /* 115 */ | 117 | .long sys_swapoff /* 115 */ |
118 | .long sys_sysinfo | 118 | .long sys_sysinfo |
119 | .long sys_ipc | 119 | .long sys_ipc |
120 | .long sys_fsync | 120 | .long sys_fsync |
121 | .long sys_sigreturn | 121 | .long ptregs_sigreturn |
122 | .long sys_clone /* 120 */ | 122 | .long ptregs_clone /* 120 */ |
123 | .long sys_setdomainname | 123 | .long sys_setdomainname |
124 | .long sys_newuname | 124 | .long sys_newuname |
125 | .long sys_modify_ldt | 125 | .long sys_modify_ldt |
@@ -165,14 +165,14 @@ ENTRY(sys_call_table) | |||
165 | .long sys_mremap | 165 | .long sys_mremap |
166 | .long sys_setresuid16 | 166 | .long sys_setresuid16 |
167 | .long sys_getresuid16 /* 165 */ | 167 | .long sys_getresuid16 /* 165 */ |
168 | .long sys_vm86 | 168 | .long ptregs_vm86 |
169 | .long sys_ni_syscall /* Old sys_query_module */ | 169 | .long sys_ni_syscall /* Old sys_query_module */ |
170 | .long sys_poll | 170 | .long sys_poll |
171 | .long sys_nfsservctl | 171 | .long sys_nfsservctl |
172 | .long sys_setresgid16 /* 170 */ | 172 | .long sys_setresgid16 /* 170 */ |
173 | .long sys_getresgid16 | 173 | .long sys_getresgid16 |
174 | .long sys_prctl | 174 | .long sys_prctl |
175 | .long sys_rt_sigreturn | 175 | .long ptregs_rt_sigreturn |
176 | .long sys_rt_sigaction | 176 | .long sys_rt_sigaction |
177 | .long sys_rt_sigprocmask /* 175 */ | 177 | .long sys_rt_sigprocmask /* 175 */ |
178 | .long sys_rt_sigpending | 178 | .long sys_rt_sigpending |
@@ -185,11 +185,11 @@ ENTRY(sys_call_table) | |||
185 | .long sys_getcwd | 185 | .long sys_getcwd |
186 | .long sys_capget | 186 | .long sys_capget |
187 | .long sys_capset /* 185 */ | 187 | .long sys_capset /* 185 */ |
188 | .long sys_sigaltstack | 188 | .long ptregs_sigaltstack |
189 | .long sys_sendfile | 189 | .long sys_sendfile |
190 | .long sys_ni_syscall /* reserved for streams1 */ | 190 | .long sys_ni_syscall /* reserved for streams1 */ |
191 | .long sys_ni_syscall /* reserved for streams2 */ | 191 | .long sys_ni_syscall /* reserved for streams2 */ |
192 | .long sys_vfork /* 190 */ | 192 | .long ptregs_vfork /* 190 */ |
193 | .long sys_getrlimit | 193 | .long sys_getrlimit |
194 | .long sys_mmap2 | 194 | .long sys_mmap2 |
195 | .long sys_truncate64 | 195 | .long sys_truncate64 |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 0d032d2d8a18..bde57f0f1616 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -905,19 +905,20 @@ void math_emulate(struct math_emu_info *info) | |||
905 | } | 905 | } |
906 | #endif /* CONFIG_MATH_EMULATION */ | 906 | #endif /* CONFIG_MATH_EMULATION */ |
907 | 907 | ||
908 | dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) | 908 | dotraplinkage void __kprobes |
909 | do_device_not_available(struct pt_regs *regs, long error_code) | ||
909 | { | 910 | { |
910 | #ifdef CONFIG_X86_32 | 911 | #ifdef CONFIG_X86_32 |
911 | if (read_cr0() & X86_CR0_EM) { | 912 | if (read_cr0() & X86_CR0_EM) { |
912 | struct math_emu_info info = { }; | 913 | struct math_emu_info info = { }; |
913 | 914 | ||
914 | conditional_sti(®s); | 915 | conditional_sti(regs); |
915 | 916 | ||
916 | info.regs = ®s; | 917 | info.regs = regs; |
917 | math_emulate(&info); | 918 | math_emulate(&info); |
918 | } else { | 919 | } else { |
919 | math_state_restore(); /* interrupts still off */ | 920 | math_state_restore(); /* interrupts still off */ |
920 | conditional_sti(®s); | 921 | conditional_sti(regs); |
921 | } | 922 | } |
922 | #else | 923 | #else |
923 | math_state_restore(); | 924 | math_state_restore(); |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 4eeb5cf9720d..d7ac84e7fc1c 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -158,7 +158,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) | |||
158 | ret = KVM86->regs32; | 158 | ret = KVM86->regs32; |
159 | 159 | ||
160 | ret->fs = current->thread.saved_fs; | 160 | ret->fs = current->thread.saved_fs; |
161 | loadsegment(gs, current->thread.saved_gs); | 161 | set_user_gs(ret, current->thread.saved_gs); |
162 | 162 | ||
163 | return ret; | 163 | return ret; |
164 | } | 164 | } |
@@ -197,9 +197,9 @@ out: | |||
197 | static int do_vm86_irq_handling(int subfunction, int irqnumber); | 197 | static int do_vm86_irq_handling(int subfunction, int irqnumber); |
198 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); | 198 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); |
199 | 199 | ||
200 | asmlinkage int sys_vm86old(struct pt_regs regs) | 200 | int sys_vm86old(struct pt_regs *regs) |
201 | { | 201 | { |
202 | struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx; | 202 | struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx; |
203 | struct kernel_vm86_struct info; /* declare this _on top_, | 203 | struct kernel_vm86_struct info; /* declare this _on top_, |
204 | * this avoids wasting of stack space. | 204 | * this avoids wasting of stack space. |
205 | * This remains on the stack until we | 205 | * This remains on the stack until we |
@@ -218,7 +218,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs) | |||
218 | if (tmp) | 218 | if (tmp) |
219 | goto out; | 219 | goto out; |
220 | memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); | 220 | memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); |
221 | info.regs32 = ®s; | 221 | info.regs32 = regs; |
222 | tsk->thread.vm86_info = v86; | 222 | tsk->thread.vm86_info = v86; |
223 | do_sys_vm86(&info, tsk); | 223 | do_sys_vm86(&info, tsk); |
224 | ret = 0; /* we never return here */ | 224 | ret = 0; /* we never return here */ |
@@ -227,7 +227,7 @@ out: | |||
227 | } | 227 | } |
228 | 228 | ||
229 | 229 | ||
230 | asmlinkage int sys_vm86(struct pt_regs regs) | 230 | int sys_vm86(struct pt_regs *regs) |
231 | { | 231 | { |
232 | struct kernel_vm86_struct info; /* declare this _on top_, | 232 | struct kernel_vm86_struct info; /* declare this _on top_, |
233 | * this avoids wasting of stack space. | 233 | * this avoids wasting of stack space. |
@@ -239,12 +239,12 @@ asmlinkage int sys_vm86(struct pt_regs regs) | |||
239 | struct vm86plus_struct __user *v86; | 239 | struct vm86plus_struct __user *v86; |
240 | 240 | ||
241 | tsk = current; | 241 | tsk = current; |
242 | switch (regs.bx) { | 242 | switch (regs->bx) { |
243 | case VM86_REQUEST_IRQ: | 243 | case VM86_REQUEST_IRQ: |
244 | case VM86_FREE_IRQ: | 244 | case VM86_FREE_IRQ: |
245 | case VM86_GET_IRQ_BITS: | 245 | case VM86_GET_IRQ_BITS: |
246 | case VM86_GET_AND_RESET_IRQ: | 246 | case VM86_GET_AND_RESET_IRQ: |
247 | ret = do_vm86_irq_handling(regs.bx, (int)regs.cx); | 247 | ret = do_vm86_irq_handling(regs->bx, (int)regs->cx); |
248 | goto out; | 248 | goto out; |
249 | case VM86_PLUS_INSTALL_CHECK: | 249 | case VM86_PLUS_INSTALL_CHECK: |
250 | /* | 250 | /* |
@@ -261,14 +261,14 @@ asmlinkage int sys_vm86(struct pt_regs regs) | |||
261 | ret = -EPERM; | 261 | ret = -EPERM; |
262 | if (tsk->thread.saved_sp0) | 262 | if (tsk->thread.saved_sp0) |
263 | goto out; | 263 | goto out; |
264 | v86 = (struct vm86plus_struct __user *)regs.cx; | 264 | v86 = (struct vm86plus_struct __user *)regs->cx; |
265 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, | 265 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, |
266 | offsetof(struct kernel_vm86_struct, regs32) - | 266 | offsetof(struct kernel_vm86_struct, regs32) - |
267 | sizeof(info.regs)); | 267 | sizeof(info.regs)); |
268 | ret = -EFAULT; | 268 | ret = -EFAULT; |
269 | if (tmp) | 269 | if (tmp) |
270 | goto out; | 270 | goto out; |
271 | info.regs32 = ®s; | 271 | info.regs32 = regs; |
272 | info.vm86plus.is_vm86pus = 1; | 272 | info.vm86plus.is_vm86pus = 1; |
273 | tsk->thread.vm86_info = (struct vm86_struct __user *)v86; | 273 | tsk->thread.vm86_info = (struct vm86_struct __user *)v86; |
274 | do_sys_vm86(&info, tsk); | 274 | do_sys_vm86(&info, tsk); |
@@ -323,7 +323,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
323 | info->regs32->ax = 0; | 323 | info->regs32->ax = 0; |
324 | tsk->thread.saved_sp0 = tsk->thread.sp0; | 324 | tsk->thread.saved_sp0 = tsk->thread.sp0; |
325 | tsk->thread.saved_fs = info->regs32->fs; | 325 | tsk->thread.saved_fs = info->regs32->fs; |
326 | savesegment(gs, tsk->thread.saved_gs); | 326 | tsk->thread.saved_gs = get_user_gs(info->regs32); |
327 | 327 | ||
328 | tss = &per_cpu(init_tss, get_cpu()); | 328 | tss = &per_cpu(init_tss, get_cpu()); |
329 | tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; | 329 | tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; |
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 07f62d287ff0..087a7f2c639b 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S | |||
@@ -257,6 +257,14 @@ SECTIONS | |||
257 | DWARF_DEBUG | 257 | DWARF_DEBUG |
258 | } | 258 | } |
259 | 259 | ||
260 | /* | ||
261 | * Per-cpu symbols which need to be offset from __per_cpu_load | ||
262 | * for the boot processor. | ||
263 | */ | ||
264 | #define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load | ||
265 | INIT_PER_CPU(gdt_page); | ||
266 | INIT_PER_CPU(irq_stack_union); | ||
267 | |||
260 | /* | 268 | /* |
261 | * Build-time check on the image size: | 269 | * Build-time check on the image size: |
262 | */ | 270 | */ |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 19e33b6cd593..da2e314f61b5 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -283,7 +283,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) | |||
283 | /* There's one problem which normal hardware doesn't have: the Host | 283 | /* There's one problem which normal hardware doesn't have: the Host |
284 | * can't handle us removing entries we're currently using. So we clear | 284 | * can't handle us removing entries we're currently using. So we clear |
285 | * the GS register here: if it's needed it'll be reloaded anyway. */ | 285 | * the GS register here: if it's needed it'll be reloaded anyway. */ |
286 | loadsegment(gs, 0); | 286 | lazy_load_gs(0); |
287 | lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); | 287 | lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); |
288 | } | 288 | } |
289 | 289 | ||
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c index 420b3b6e3915..6ef5e99380f9 100644 --- a/arch/x86/math-emu/get_address.c +++ b/arch/x86/math-emu/get_address.c | |||
@@ -150,11 +150,9 @@ static long pm_address(u_char FPU_modrm, u_char segment, | |||
150 | #endif /* PARANOID */ | 150 | #endif /* PARANOID */ |
151 | 151 | ||
152 | switch (segment) { | 152 | switch (segment) { |
153 | /* gs isn't used by the kernel, so it still has its | ||
154 | user-space value. */ | ||
155 | case PREFIX_GS_ - 1: | 153 | case PREFIX_GS_ - 1: |
156 | /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */ | 154 | /* user gs handling can be lazy, use special accessors */ |
157 | savesegment(gs, addr->selector); | 155 | addr->selector = get_user_gs(FPU_info->regs); |
158 | break; | 156 | break; |
159 | default: | 157 | default: |
160 | addr->selector = PM_REG_(segment); | 158 | addr->selector = PM_REG_(segment); |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 08d140fbc31b..deb1c1ab7868 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -702,7 +702,7 @@ void __cpuinit numa_set_node(int cpu, int node) | |||
702 | } | 702 | } |
703 | 703 | ||
704 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 704 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS |
705 | if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) { | 705 | if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { |
706 | printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); | 706 | printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); |
707 | dump_stack(); | 707 | dump_stack(); |
708 | return; | 708 | return; |
@@ -790,7 +790,7 @@ int early_cpu_to_node(int cpu) | |||
790 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) | 790 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) |
791 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | 791 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; |
792 | 792 | ||
793 | if (!per_cpu_offset(cpu)) { | 793 | if (!cpu_possible(cpu)) { |
794 | printk(KERN_WARNING | 794 | printk(KERN_WARNING |
795 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); | 795 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); |
796 | dump_stack(); | 796 | dump_stack(); |
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 4d6ef0a336d6..16a9020c8f11 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -38,7 +38,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE | |||
38 | $(call if_changed,objcopy) | 38 | $(call if_changed,objcopy) |
39 | 39 | ||
40 | CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ | 40 | CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ |
41 | $(filter -g%,$(KBUILD_CFLAGS)) | 41 | $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) |
42 | 42 | ||
43 | $(vobjs): KBUILD_CFLAGS += $(CFL) | 43 | $(vobjs): KBUILD_CFLAGS += $(CFL) |
44 | 44 | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 37230342c2c4..95ff6a0e942a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -323,13 +323,14 @@ static void load_TLS_descriptor(struct thread_struct *t, | |||
323 | static void xen_load_tls(struct thread_struct *t, unsigned int cpu) | 323 | static void xen_load_tls(struct thread_struct *t, unsigned int cpu) |
324 | { | 324 | { |
325 | /* | 325 | /* |
326 | * XXX sleazy hack: If we're being called in a lazy-cpu zone, | 326 | * XXX sleazy hack: If we're being called in a lazy-cpu zone |
327 | * it means we're in a context switch, and %gs has just been | 327 | * and lazy gs handling is enabled, it means we're in a |
328 | * saved. This means we can zero it out to prevent faults on | 328 | * context switch, and %gs has just been saved. This means we |
329 | * exit from the hypervisor if the next process has no %gs. | 329 | * can zero it out to prevent faults on exit from the |
330 | * Either way, it has been saved, and the new value will get | 330 | * hypervisor if the next process has no %gs. Either way, it |
331 | * loaded properly. This will go away as soon as Xen has been | 331 | * has been saved, and the new value will get loaded properly. |
332 | * modified to not save/restore %gs for normal hypercalls. | 332 | * This will go away as soon as Xen has been modified to not |
333 | * save/restore %gs for normal hypercalls. | ||
333 | * | 334 | * |
334 | * On x86_64, this hack is not used for %gs, because gs points | 335 | * On x86_64, this hack is not used for %gs, because gs points |
335 | * to KERNEL_GS_BASE (and uses it for PDA references), so we | 336 | * to KERNEL_GS_BASE (and uses it for PDA references), so we |
@@ -341,7 +342,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) | |||
341 | */ | 342 | */ |
342 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { | 343 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { |
343 | #ifdef CONFIG_X86_32 | 344 | #ifdef CONFIG_X86_32 |
344 | loadsegment(gs, 0); | 345 | lazy_load_gs(0); |
345 | #else | 346 | #else |
346 | loadsegment(fs, 0); | 347 | loadsegment(fs, 0); |
347 | #endif | 348 | #endif |
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 4c6f96799131..79d7362ad6d1 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S | |||
@@ -1,14 +1,14 @@ | |||
1 | /* | 1 | /* |
2 | Asm versions of Xen pv-ops, suitable for either direct use or inlining. | 2 | * Asm versions of Xen pv-ops, suitable for either direct use or |
3 | The inline versions are the same as the direct-use versions, with the | 3 | * inlining. The inline versions are the same as the direct-use |
4 | pre- and post-amble chopped off. | 4 | * versions, with the pre- and post-amble chopped off. |
5 | 5 | * | |
6 | This code is encoded for size rather than absolute efficiency, | 6 | * This code is encoded for size rather than absolute efficiency, with |
7 | with a view to being able to inline as much as possible. | 7 | * a view to being able to inline as much as possible. |
8 | 8 | * | |
9 | We only bother with direct forms (ie, vcpu in percpu data) of | 9 | * We only bother with direct forms (ie, vcpu in percpu data) of the |
10 | the operations here; the indirect forms are better handled in | 10 | * operations here; the indirect forms are better handled in C, since |
11 | C, since they're generally too large to inline anyway. | 11 | * they're generally too large to inline anyway. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <asm/asm-offsets.h> | 14 | #include <asm/asm-offsets.h> |
@@ -18,17 +18,19 @@ | |||
18 | #include "xen-asm.h" | 18 | #include "xen-asm.h" |
19 | 19 | ||
20 | /* | 20 | /* |
21 | Enable events. This clears the event mask and tests the pending | 21 | * Enable events. This clears the event mask and tests the pending |
22 | event status with one and operation. If there are pending | 22 | * event status with one and operation. If there are pending events, |
23 | events, then enter the hypervisor to get them handled. | 23 | * then enter the hypervisor to get them handled. |
24 | */ | 24 | */ |
25 | ENTRY(xen_irq_enable_direct) | 25 | ENTRY(xen_irq_enable_direct) |
26 | /* Unmask events */ | 26 | /* Unmask events */ |
27 | movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask | 27 | movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask |
28 | 28 | ||
29 | /* Preempt here doesn't matter because that will deal with | 29 | /* |
30 | any pending interrupts. The pending check may end up being | 30 | * Preempt here doesn't matter because that will deal with any |
31 | run on the wrong CPU, but that doesn't hurt. */ | 31 | * pending interrupts. The pending check may end up being run |
32 | * on the wrong CPU, but that doesn't hurt. | ||
33 | */ | ||
32 | 34 | ||
33 | /* Test for pending */ | 35 | /* Test for pending */ |
34 | testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending | 36 | testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending |
@@ -43,8 +45,8 @@ ENDPATCH(xen_irq_enable_direct) | |||
43 | 45 | ||
44 | 46 | ||
45 | /* | 47 | /* |
46 | Disabling events is simply a matter of making the event mask | 48 | * Disabling events is simply a matter of making the event mask |
47 | non-zero. | 49 | * non-zero. |
48 | */ | 50 | */ |
49 | ENTRY(xen_irq_disable_direct) | 51 | ENTRY(xen_irq_disable_direct) |
50 | movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask | 52 | movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask |
@@ -54,18 +56,18 @@ ENDPATCH(xen_irq_disable_direct) | |||
54 | RELOC(xen_irq_disable_direct, 0) | 56 | RELOC(xen_irq_disable_direct, 0) |
55 | 57 | ||
56 | /* | 58 | /* |
57 | (xen_)save_fl is used to get the current interrupt enable status. | 59 | * (xen_)save_fl is used to get the current interrupt enable status. |
58 | Callers expect the status to be in X86_EFLAGS_IF, and other bits | 60 | * Callers expect the status to be in X86_EFLAGS_IF, and other bits |
59 | may be set in the return value. We take advantage of this by | 61 | * may be set in the return value. We take advantage of this by |
60 | making sure that X86_EFLAGS_IF has the right value (and other bits | 62 | * making sure that X86_EFLAGS_IF has the right value (and other bits |
61 | in that byte are 0), but other bits in the return value are | 63 | * in that byte are 0), but other bits in the return value are |
62 | undefined. We need to toggle the state of the bit, because | 64 | * undefined. We need to toggle the state of the bit, because Xen and |
63 | Xen and x86 use opposite senses (mask vs enable). | 65 | * x86 use opposite senses (mask vs enable). |
64 | */ | 66 | */ |
65 | ENTRY(xen_save_fl_direct) | 67 | ENTRY(xen_save_fl_direct) |
66 | testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask | 68 | testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask |
67 | setz %ah | 69 | setz %ah |
68 | addb %ah,%ah | 70 | addb %ah, %ah |
69 | ENDPATCH(xen_save_fl_direct) | 71 | ENDPATCH(xen_save_fl_direct) |
70 | ret | 72 | ret |
71 | ENDPROC(xen_save_fl_direct) | 73 | ENDPROC(xen_save_fl_direct) |
@@ -73,12 +75,11 @@ ENDPATCH(xen_save_fl_direct) | |||
73 | 75 | ||
74 | 76 | ||
75 | /* | 77 | /* |
76 | In principle the caller should be passing us a value return | 78 | * In principle the caller should be passing us a value return from |
77 | from xen_save_fl_direct, but for robustness sake we test only | 79 | * xen_save_fl_direct, but for robustness sake we test only the |
78 | the X86_EFLAGS_IF flag rather than the whole byte. After | 80 | * X86_EFLAGS_IF flag rather than the whole byte. After setting the |
79 | setting the interrupt mask state, it checks for unmasked | 81 | * interrupt mask state, it checks for unmasked pending events and |
80 | pending events and enters the hypervisor to get them delivered | 82 | * enters the hypervisor to get them delivered if so. |
81 | if so. | ||
82 | */ | 83 | */ |
83 | ENTRY(xen_restore_fl_direct) | 84 | ENTRY(xen_restore_fl_direct) |
84 | #ifdef CONFIG_X86_64 | 85 | #ifdef CONFIG_X86_64 |
@@ -87,9 +88,11 @@ ENTRY(xen_restore_fl_direct) | |||
87 | testb $X86_EFLAGS_IF>>8, %ah | 88 | testb $X86_EFLAGS_IF>>8, %ah |
88 | #endif | 89 | #endif |
89 | setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask | 90 | setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask |
90 | /* Preempt here doesn't matter because that will deal with | 91 | /* |
91 | any pending interrupts. The pending check may end up being | 92 | * Preempt here doesn't matter because that will deal with any |
92 | run on the wrong CPU, but that doesn't hurt. */ | 93 | * pending interrupts. The pending check may end up being run |
94 | * on the wrong CPU, but that doesn't hurt. | ||
95 | */ | ||
93 | 96 | ||
94 | /* check for unmasked and pending */ | 97 | /* check for unmasked and pending */ |
95 | cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending | 98 | cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending |
@@ -103,8 +106,8 @@ ENDPATCH(xen_restore_fl_direct) | |||
103 | 106 | ||
104 | 107 | ||
105 | /* | 108 | /* |
106 | Force an event check by making a hypercall, | 109 | * Force an event check by making a hypercall, but preserve regs |
107 | but preserve regs before making the call. | 110 | * before making the call. |
108 | */ | 111 | */ |
109 | check_events: | 112 | check_events: |
110 | #ifdef CONFIG_X86_32 | 113 | #ifdef CONFIG_X86_32 |
@@ -137,4 +140,3 @@ check_events: | |||
137 | pop %rax | 140 | pop %rax |
138 | #endif | 141 | #endif |
139 | ret | 142 | ret |
140 | |||
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 082d173caaf3..88e15deb8b82 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S | |||
@@ -1,17 +1,16 @@ | |||
1 | /* | 1 | /* |
2 | Asm versions of Xen pv-ops, suitable for either direct use or inlining. | 2 | * Asm versions of Xen pv-ops, suitable for either direct use or |
3 | The inline versions are the same as the direct-use versions, with the | 3 | * inlining. The inline versions are the same as the direct-use |
4 | pre- and post-amble chopped off. | 4 | * versions, with the pre- and post-amble chopped off. |
5 | 5 | * | |
6 | This code is encoded for size rather than absolute efficiency, | 6 | * This code is encoded for size rather than absolute efficiency, with |
7 | with a view to being able to inline as much as possible. | 7 | * a view to being able to inline as much as possible. |
8 | 8 | * | |
9 | We only bother with direct forms (ie, vcpu in pda) of the operations | 9 | * We only bother with direct forms (ie, vcpu in pda) of the |
10 | here; the indirect forms are better handled in C, since they're | 10 | * operations here; the indirect forms are better handled in C, since |
11 | generally too large to inline anyway. | 11 | * they're generally too large to inline anyway. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | //#include <asm/asm-offsets.h> | ||
15 | #include <asm/thread_info.h> | 14 | #include <asm/thread_info.h> |
16 | #include <asm/processor-flags.h> | 15 | #include <asm/processor-flags.h> |
17 | #include <asm/segment.h> | 16 | #include <asm/segment.h> |
@@ -21,8 +20,8 @@ | |||
21 | #include "xen-asm.h" | 20 | #include "xen-asm.h" |
22 | 21 | ||
23 | /* | 22 | /* |
24 | Force an event check by making a hypercall, | 23 | * Force an event check by making a hypercall, but preserve regs |
25 | but preserve regs before making the call. | 24 | * before making the call. |
26 | */ | 25 | */ |
27 | check_events: | 26 | check_events: |
28 | push %eax | 27 | push %eax |
@@ -35,10 +34,10 @@ check_events: | |||
35 | ret | 34 | ret |
36 | 35 | ||
37 | /* | 36 | /* |
38 | We can't use sysexit directly, because we're not running in ring0. | 37 | * We can't use sysexit directly, because we're not running in ring0. |
39 | But we can easily fake it up using iret. Assuming xen_sysexit | 38 | * But we can easily fake it up using iret. Assuming xen_sysexit is |
40 | is jumped to with a standard stack frame, we can just strip it | 39 | * jumped to with a standard stack frame, we can just strip it back to |
41 | back to a standard iret frame and use iret. | 40 | * a standard iret frame and use iret. |
42 | */ | 41 | */ |
43 | ENTRY(xen_sysexit) | 42 | ENTRY(xen_sysexit) |
44 | movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ | 43 | movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ |
@@ -49,33 +48,31 @@ ENTRY(xen_sysexit) | |||
49 | ENDPROC(xen_sysexit) | 48 | ENDPROC(xen_sysexit) |
50 | 49 | ||
51 | /* | 50 | /* |
52 | This is run where a normal iret would be run, with the same stack setup: | 51 | * This is run where a normal iret would be run, with the same stack setup: |
53 | 8: eflags | 52 | * 8: eflags |
54 | 4: cs | 53 | * 4: cs |
55 | esp-> 0: eip | 54 | * esp-> 0: eip |
56 | 55 | * | |
57 | This attempts to make sure that any pending events are dealt | 56 | * This attempts to make sure that any pending events are dealt with |
58 | with on return to usermode, but there is a small window in | 57 | * on return to usermode, but there is a small window in which an |
59 | which an event can happen just before entering usermode. If | 58 | * event can happen just before entering usermode. If the nested |
60 | the nested interrupt ends up setting one of the TIF_WORK_MASK | 59 | * interrupt ends up setting one of the TIF_WORK_MASK pending work |
61 | pending work flags, they will not be tested again before | 60 | * flags, they will not be tested again before returning to |
62 | returning to usermode. This means that a process can end up | 61 | * usermode. This means that a process can end up with pending work, |
63 | with pending work, which will be unprocessed until the process | 62 | * which will be unprocessed until the process enters and leaves the |
64 | enters and leaves the kernel again, which could be an | 63 | * kernel again, which could be an unbounded amount of time. This |
65 | unbounded amount of time. This means that a pending signal or | 64 | * means that a pending signal or reschedule event could be |
66 | reschedule event could be indefinitely delayed. | 65 | * indefinitely delayed. |
67 | 66 | * | |
68 | The fix is to notice a nested interrupt in the critical | 67 | * The fix is to notice a nested interrupt in the critical window, and |
69 | window, and if one occurs, then fold the nested interrupt into | 68 | * if one occurs, then fold the nested interrupt into the current |
70 | the current interrupt stack frame, and re-process it | 69 | * interrupt stack frame, and re-process it iteratively rather than |
71 | iteratively rather than recursively. This means that it will | 70 | * recursively. This means that it will exit via the normal path, and |
72 | exit via the normal path, and all pending work will be dealt | 71 | * all pending work will be dealt with appropriately. |
73 | with appropriately. | 72 | * |
74 | 73 | * Because the nested interrupt handler needs to deal with the current | |
75 | Because the nested interrupt handler needs to deal with the | 74 | * stack state in whatever form its in, we keep things simple by only |
76 | current stack state in whatever form its in, we keep things | 75 | * using a single register which is pushed/popped on the stack. |
77 | simple by only using a single register which is pushed/popped | ||
78 | on the stack. | ||
79 | */ | 76 | */ |
80 | ENTRY(xen_iret) | 77 | ENTRY(xen_iret) |
81 | /* test eflags for special cases */ | 78 | /* test eflags for special cases */ |
@@ -85,13 +82,15 @@ ENTRY(xen_iret) | |||
85 | push %eax | 82 | push %eax |
86 | ESP_OFFSET=4 # bytes pushed onto stack | 83 | ESP_OFFSET=4 # bytes pushed onto stack |
87 | 84 | ||
88 | /* Store vcpu_info pointer for easy access. Do it this | 85 | /* |
89 | way to avoid having to reload %fs */ | 86 | * Store vcpu_info pointer for easy access. Do it this way to |
87 | * avoid having to reload %fs | ||
88 | */ | ||
90 | #ifdef CONFIG_SMP | 89 | #ifdef CONFIG_SMP |
91 | GET_THREAD_INFO(%eax) | 90 | GET_THREAD_INFO(%eax) |
92 | movl TI_cpu(%eax),%eax | 91 | movl TI_cpu(%eax), %eax |
93 | movl __per_cpu_offset(,%eax,4),%eax | 92 | movl __per_cpu_offset(,%eax,4), %eax |
94 | mov per_cpu__xen_vcpu(%eax),%eax | 93 | mov per_cpu__xen_vcpu(%eax), %eax |
95 | #else | 94 | #else |
96 | movl per_cpu__xen_vcpu, %eax | 95 | movl per_cpu__xen_vcpu, %eax |
97 | #endif | 96 | #endif |
@@ -99,37 +98,46 @@ ENTRY(xen_iret) | |||
99 | /* check IF state we're restoring */ | 98 | /* check IF state we're restoring */ |
100 | testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) | 99 | testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) |
101 | 100 | ||
102 | /* Maybe enable events. Once this happens we could get a | 101 | /* |
103 | recursive event, so the critical region starts immediately | 102 | * Maybe enable events. Once this happens we could get a |
104 | afterwards. However, if that happens we don't end up | 103 | * recursive event, so the critical region starts immediately |
105 | resuming the code, so we don't have to be worried about | 104 | * afterwards. However, if that happens we don't end up |
106 | being preempted to another CPU. */ | 105 | * resuming the code, so we don't have to be worried about |
106 | * being preempted to another CPU. | ||
107 | */ | ||
107 | setz XEN_vcpu_info_mask(%eax) | 108 | setz XEN_vcpu_info_mask(%eax) |
108 | xen_iret_start_crit: | 109 | xen_iret_start_crit: |
109 | 110 | ||
110 | /* check for unmasked and pending */ | 111 | /* check for unmasked and pending */ |
111 | cmpw $0x0001, XEN_vcpu_info_pending(%eax) | 112 | cmpw $0x0001, XEN_vcpu_info_pending(%eax) |
112 | 113 | ||
113 | /* If there's something pending, mask events again so we | 114 | /* |
114 | can jump back into xen_hypervisor_callback */ | 115 | * If there's something pending, mask events again so we can |
116 | * jump back into xen_hypervisor_callback | ||
117 | */ | ||
115 | sete XEN_vcpu_info_mask(%eax) | 118 | sete XEN_vcpu_info_mask(%eax) |
116 | 119 | ||
117 | popl %eax | 120 | popl %eax |
118 | 121 | ||
119 | /* From this point on the registers are restored and the stack | 122 | /* |
120 | updated, so we don't need to worry about it if we're preempted */ | 123 | * From this point on the registers are restored and the stack |
124 | * updated, so we don't need to worry about it if we're | ||
125 | * preempted | ||
126 | */ | ||
121 | iret_restore_end: | 127 | iret_restore_end: |
122 | 128 | ||
123 | /* Jump to hypervisor_callback after fixing up the stack. | 129 | /* |
124 | Events are masked, so jumping out of the critical | 130 | * Jump to hypervisor_callback after fixing up the stack. |
125 | region is OK. */ | 131 | * Events are masked, so jumping out of the critical region is |
132 | * OK. | ||
133 | */ | ||
126 | je xen_hypervisor_callback | 134 | je xen_hypervisor_callback |
127 | 135 | ||
128 | 1: iret | 136 | 1: iret |
129 | xen_iret_end_crit: | 137 | xen_iret_end_crit: |
130 | .section __ex_table,"a" | 138 | .section __ex_table, "a" |
131 | .align 4 | 139 | .align 4 |
132 | .long 1b,iret_exc | 140 | .long 1b, iret_exc |
133 | .previous | 141 | .previous |
134 | 142 | ||
135 | hyper_iret: | 143 | hyper_iret: |
@@ -139,55 +147,55 @@ hyper_iret: | |||
139 | .globl xen_iret_start_crit, xen_iret_end_crit | 147 | .globl xen_iret_start_crit, xen_iret_end_crit |
140 | 148 | ||
141 | /* | 149 | /* |
142 | This is called by xen_hypervisor_callback in entry.S when it sees | 150 | * This is called by xen_hypervisor_callback in entry.S when it sees |
143 | that the EIP at the time of interrupt was between xen_iret_start_crit | 151 | * that the EIP at the time of interrupt was between |
144 | and xen_iret_end_crit. We're passed the EIP in %eax so we can do | 152 | * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in |
145 | a more refined determination of what to do. | 153 | * %eax so we can do a more refined determination of what to do. |
146 | 154 | * | |
147 | The stack format at this point is: | 155 | * The stack format at this point is: |
148 | ---------------- | 156 | * ---------------- |
149 | ss : (ss/esp may be present if we came from usermode) | 157 | * ss : (ss/esp may be present if we came from usermode) |
150 | esp : | 158 | * esp : |
151 | eflags } outer exception info | 159 | * eflags } outer exception info |
152 | cs } | 160 | * cs } |
153 | eip } | 161 | * eip } |
154 | ---------------- <- edi (copy dest) | 162 | * ---------------- <- edi (copy dest) |
155 | eax : outer eax if it hasn't been restored | 163 | * eax : outer eax if it hasn't been restored |
156 | ---------------- | 164 | * ---------------- |
157 | eflags } nested exception info | 165 | * eflags } nested exception info |
158 | cs } (no ss/esp because we're nested | 166 | * cs } (no ss/esp because we're nested |
159 | eip } from the same ring) | 167 | * eip } from the same ring) |
160 | orig_eax }<- esi (copy src) | 168 | * orig_eax }<- esi (copy src) |
161 | - - - - - - - - | 169 | * - - - - - - - - |
162 | fs } | 170 | * fs } |
163 | es } | 171 | * es } |
164 | ds } SAVE_ALL state | 172 | * ds } SAVE_ALL state |
165 | eax } | 173 | * eax } |
166 | : : | 174 | * : : |
167 | ebx }<- esp | 175 | * ebx }<- esp |
168 | ---------------- | 176 | * ---------------- |
169 | 177 | * | |
170 | In order to deliver the nested exception properly, we need to shift | 178 | * In order to deliver the nested exception properly, we need to shift |
171 | everything from the return addr up to the error code so it | 179 | * everything from the return addr up to the error code so it sits |
172 | sits just under the outer exception info. This means that when we | 180 | * just under the outer exception info. This means that when we |
173 | handle the exception, we do it in the context of the outer exception | 181 | * handle the exception, we do it in the context of the outer |
174 | rather than starting a new one. | 182 | * exception rather than starting a new one. |
175 | 183 | * | |
176 | The only caveat is that if the outer eax hasn't been | 184 | * The only caveat is that if the outer eax hasn't been restored yet |
177 | restored yet (ie, it's still on stack), we need to insert | 185 | * (ie, it's still on stack), we need to insert its value into the |
178 | its value into the SAVE_ALL state before going on, since | 186 | * SAVE_ALL state before going on, since it's usermode state which we |
179 | it's usermode state which we eventually need to restore. | 187 | * eventually need to restore. |
180 | */ | 188 | */ |
181 | ENTRY(xen_iret_crit_fixup) | 189 | ENTRY(xen_iret_crit_fixup) |
182 | /* | 190 | /* |
183 | Paranoia: Make sure we're really coming from kernel space. | 191 | * Paranoia: Make sure we're really coming from kernel space. |
184 | One could imagine a case where userspace jumps into the | 192 | * One could imagine a case where userspace jumps into the |
185 | critical range address, but just before the CPU delivers a GP, | 193 | * critical range address, but just before the CPU delivers a |
186 | it decides to deliver an interrupt instead. Unlikely? | 194 | * GP, it decides to deliver an interrupt instead. Unlikely? |
187 | Definitely. Easy to avoid? Yes. The Intel documents | 195 | * Definitely. Easy to avoid? Yes. The Intel documents |
188 | explicitly say that the reported EIP for a bad jump is the | 196 | * explicitly say that the reported EIP for a bad jump is the |
189 | jump instruction itself, not the destination, but some virtual | 197 | * jump instruction itself, not the destination, but some |
190 | environments get this wrong. | 198 | * virtual environments get this wrong. |
191 | */ | 199 | */ |
192 | movl PT_CS(%esp), %ecx | 200 | movl PT_CS(%esp), %ecx |
193 | andl $SEGMENT_RPL_MASK, %ecx | 201 | andl $SEGMENT_RPL_MASK, %ecx |
@@ -197,15 +205,17 @@ ENTRY(xen_iret_crit_fixup) | |||
197 | lea PT_ORIG_EAX(%esp), %esi | 205 | lea PT_ORIG_EAX(%esp), %esi |
198 | lea PT_EFLAGS(%esp), %edi | 206 | lea PT_EFLAGS(%esp), %edi |
199 | 207 | ||
200 | /* If eip is before iret_restore_end then stack | 208 | /* |
201 | hasn't been restored yet. */ | 209 | * If eip is before iret_restore_end then stack |
210 | * hasn't been restored yet. | ||
211 | */ | ||
202 | cmp $iret_restore_end, %eax | 212 | cmp $iret_restore_end, %eax |
203 | jae 1f | 213 | jae 1f |
204 | 214 | ||
205 | movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */ | 215 | movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */ |
206 | movl %eax, PT_EAX(%esp) | 216 | movl %eax, PT_EAX(%esp) |
207 | 217 | ||
208 | lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ | 218 | lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */ |
209 | 219 | ||
210 | /* set up the copy */ | 220 | /* set up the copy */ |
211 | 1: std | 221 | 1: std |
@@ -213,6 +223,6 @@ ENTRY(xen_iret_crit_fixup) | |||
213 | rep movsl | 223 | rep movsl |
214 | cld | 224 | cld |
215 | 225 | ||
216 | lea 4(%edi),%esp /* point esp to new frame */ | 226 | lea 4(%edi), %esp /* point esp to new frame */ |
217 | 2: jmp xen_do_upcall | 227 | 2: jmp xen_do_upcall |
218 | 228 | ||
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index d205a283efe0..02f496a8dbaa 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S | |||
@@ -1,14 +1,14 @@ | |||
1 | /* | 1 | /* |
2 | Asm versions of Xen pv-ops, suitable for either direct use or inlining. | 2 | * Asm versions of Xen pv-ops, suitable for either direct use or |
3 | The inline versions are the same as the direct-use versions, with the | 3 | * inlining. The inline versions are the same as the direct-use |
4 | pre- and post-amble chopped off. | 4 | * versions, with the pre- and post-amble chopped off. |
5 | 5 | * | |
6 | This code is encoded for size rather than absolute efficiency, | 6 | * This code is encoded for size rather than absolute efficiency, with |
7 | with a view to being able to inline as much as possible. | 7 | * a view to being able to inline as much as possible. |
8 | 8 | * | |
9 | We only bother with direct forms (ie, vcpu in pda) of the operations | 9 | * We only bother with direct forms (ie, vcpu in pda) of the |
10 | here; the indirect forms are better handled in C, since they're | 10 | * operations here; the indirect forms are better handled in C, since |
11 | generally too large to inline anyway. | 11 | * they're generally too large to inline anyway. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <asm/errno.h> | 14 | #include <asm/errno.h> |
@@ -21,25 +21,25 @@ | |||
21 | #include "xen-asm.h" | 21 | #include "xen-asm.h" |
22 | 22 | ||
23 | ENTRY(xen_adjust_exception_frame) | 23 | ENTRY(xen_adjust_exception_frame) |
24 | mov 8+0(%rsp),%rcx | 24 | mov 8+0(%rsp), %rcx |
25 | mov 8+8(%rsp),%r11 | 25 | mov 8+8(%rsp), %r11 |
26 | ret $16 | 26 | ret $16 |
27 | 27 | ||
28 | hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 | 28 | hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 |
29 | /* | 29 | /* |
30 | Xen64 iret frame: | 30 | * Xen64 iret frame: |
31 | 31 | * | |
32 | ss | 32 | * ss |
33 | rsp | 33 | * rsp |
34 | rflags | 34 | * rflags |
35 | cs | 35 | * cs |
36 | rip <-- standard iret frame | 36 | * rip <-- standard iret frame |
37 | 37 | * | |
38 | flags | 38 | * flags |
39 | 39 | * | |
40 | rcx } | 40 | * rcx } |
41 | r11 }<-- pushed by hypercall page | 41 | * r11 }<-- pushed by hypercall page |
42 | rsp -> rax } | 42 | * rsp->rax } |
43 | */ | 43 | */ |
44 | ENTRY(xen_iret) | 44 | ENTRY(xen_iret) |
45 | pushq $0 | 45 | pushq $0 |
@@ -48,8 +48,8 @@ ENDPATCH(xen_iret) | |||
48 | RELOC(xen_iret, 1b+1) | 48 | RELOC(xen_iret, 1b+1) |
49 | 49 | ||
50 | /* | 50 | /* |
51 | sysexit is not used for 64-bit processes, so it's | 51 | * sysexit is not used for 64-bit processes, so it's only ever used to |
52 | only ever used to return to 32-bit compat userspace. | 52 | * return to 32-bit compat userspace. |
53 | */ | 53 | */ |
54 | ENTRY(xen_sysexit) | 54 | ENTRY(xen_sysexit) |
55 | pushq $__USER32_DS | 55 | pushq $__USER32_DS |
@@ -64,10 +64,12 @@ ENDPATCH(xen_sysexit) | |||
64 | RELOC(xen_sysexit, 1b+1) | 64 | RELOC(xen_sysexit, 1b+1) |
65 | 65 | ||
66 | ENTRY(xen_sysret64) | 66 | ENTRY(xen_sysret64) |
67 | /* We're already on the usermode stack at this point, but still | 67 | /* |
68 | with the kernel gs, so we can easily switch back */ | 68 | * We're already on the usermode stack at this point, but |
69 | * still with the kernel gs, so we can easily switch back | ||
70 | */ | ||
69 | movq %rsp, PER_CPU_VAR(old_rsp) | 71 | movq %rsp, PER_CPU_VAR(old_rsp) |
70 | movq PER_CPU_VAR(kernel_stack),%rsp | 72 | movq PER_CPU_VAR(kernel_stack), %rsp |
71 | 73 | ||
72 | pushq $__USER_DS | 74 | pushq $__USER_DS |
73 | pushq PER_CPU_VAR(old_rsp) | 75 | pushq PER_CPU_VAR(old_rsp) |
@@ -81,8 +83,10 @@ ENDPATCH(xen_sysret64) | |||
81 | RELOC(xen_sysret64, 1b+1) | 83 | RELOC(xen_sysret64, 1b+1) |
82 | 84 | ||
83 | ENTRY(xen_sysret32) | 85 | ENTRY(xen_sysret32) |
84 | /* We're already on the usermode stack at this point, but still | 86 | /* |
85 | with the kernel gs, so we can easily switch back */ | 87 | * We're already on the usermode stack at this point, but |
88 | * still with the kernel gs, so we can easily switch back | ||
89 | */ | ||
86 | movq %rsp, PER_CPU_VAR(old_rsp) | 90 | movq %rsp, PER_CPU_VAR(old_rsp) |
87 | movq PER_CPU_VAR(kernel_stack), %rsp | 91 | movq PER_CPU_VAR(kernel_stack), %rsp |
88 | 92 | ||
@@ -98,28 +102,27 @@ ENDPATCH(xen_sysret32) | |||
98 | RELOC(xen_sysret32, 1b+1) | 102 | RELOC(xen_sysret32, 1b+1) |
99 | 103 | ||
100 | /* | 104 | /* |
101 | Xen handles syscall callbacks much like ordinary exceptions, | 105 | * Xen handles syscall callbacks much like ordinary exceptions, which |
102 | which means we have: | 106 | * means we have: |
103 | - kernel gs | 107 | * - kernel gs |
104 | - kernel rsp | 108 | * - kernel rsp |
105 | - an iret-like stack frame on the stack (including rcx and r11): | 109 | * - an iret-like stack frame on the stack (including rcx and r11): |
106 | ss | 110 | * ss |
107 | rsp | 111 | * rsp |
108 | rflags | 112 | * rflags |
109 | cs | 113 | * cs |
110 | rip | 114 | * rip |
111 | r11 | 115 | * r11 |
112 | rsp-> rcx | 116 | * rsp->rcx |
113 | 117 | * | |
114 | In all the entrypoints, we undo all that to make it look | 118 | * In all the entrypoints, we undo all that to make it look like a |
115 | like a CPU-generated syscall/sysenter and jump to the normal | 119 | * CPU-generated syscall/sysenter and jump to the normal entrypoint. |
116 | entrypoint. | ||
117 | */ | 120 | */ |
118 | 121 | ||
119 | .macro undo_xen_syscall | 122 | .macro undo_xen_syscall |
120 | mov 0*8(%rsp),%rcx | 123 | mov 0*8(%rsp), %rcx |
121 | mov 1*8(%rsp),%r11 | 124 | mov 1*8(%rsp), %r11 |
122 | mov 5*8(%rsp),%rsp | 125 | mov 5*8(%rsp), %rsp |
123 | .endm | 126 | .endm |
124 | 127 | ||
125 | /* Normal 64-bit system call target */ | 128 | /* Normal 64-bit system call target */ |
@@ -146,7 +149,7 @@ ENDPROC(xen_sysenter_target) | |||
146 | 149 | ||
147 | ENTRY(xen_syscall32_target) | 150 | ENTRY(xen_syscall32_target) |
148 | ENTRY(xen_sysenter_target) | 151 | ENTRY(xen_sysenter_target) |
149 | lea 16(%rsp), %rsp /* strip %rcx,%r11 */ | 152 | lea 16(%rsp), %rsp /* strip %rcx, %r11 */ |
150 | mov $-ENOSYS, %rax | 153 | mov $-ENOSYS, %rax |
151 | pushq $VGCF_in_syscall | 154 | pushq $VGCF_in_syscall |
152 | jmp hypercall_iret | 155 | jmp hypercall_iret |