aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-02-13 03:45:09 -0500
committerIngo Molnar <mingo@elte.hu>2009-02-13 03:45:09 -0500
commitab639f3593f0b5e4439d549831442c18c3baf989 (patch)
tree118743e94e5dc86c835dbc1f1d3bf1612f4ae740 /arch/x86
parentf8a6b2b9cee298a9663cbe38ce1eb5240987cb62 (diff)
parent58105ef1857112a186696c9b8957020090226a28 (diff)
Merge branch 'core/percpu' into x86/core
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/Makefile17
-rw-r--r--arch/x86/include/asm/a.out-core.h2
-rw-r--r--arch/x86/include/asm/elf.h15
-rw-r--r--arch/x86/include/asm/mmu_context.h2
-rw-r--r--arch/x86/include/asm/percpu.h22
-rw-r--r--arch/x86/include/asm/processor.h6
-rw-r--r--arch/x86/include/asm/ptrace.h4
-rw-r--r--arch/x86/include/asm/segment.h9
-rw-r--r--arch/x86/include/asm/stackprotector.h96
-rw-r--r--arch/x86/include/asm/syscalls.h20
-rw-r--r--arch/x86/include/asm/system.h38
-rw-r--r--arch/x86/include/asm/traps.h2
-rw-r--r--arch/x86/include/asm/uaccess.h33
-rw-r--r--arch/x86/include/asm/uv/uv.h3
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/cpu/common.c23
-rw-r--r--arch/x86/kernel/entry_32.S443
-rw-r--r--arch/x86/kernel/head_32.S21
-rw-r--r--arch/x86/kernel/head_64.S21
-rw-r--r--arch/x86/kernel/ioport.c3
-rw-r--r--arch/x86/kernel/process_32.c53
-rw-r--r--arch/x86/kernel/process_64.c11
-rw-r--r--arch/x86/kernel/ptrace.c19
-rw-r--r--arch/x86/kernel/setup_percpu.c2
-rw-r--r--arch/x86/kernel/signal.c75
-rw-r--r--arch/x86/kernel/syscall_table_32.S20
-rw-r--r--arch/x86/kernel/traps.c9
-rw-r--r--arch/x86/kernel/vm86_32.c20
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S8
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/math-emu/get_address.c6
-rw-r--r--arch/x86/mm/numa_64.c4
-rw-r--r--arch/x86/vdso/Makefile2
-rw-r--r--arch/x86/xen/enlighten.c17
-rw-r--r--arch/x86/xen/xen-asm.S78
-rw-r--r--arch/x86/xen/xen-asm_32.S238
-rw-r--r--arch/x86/xen/xen-asm_64.S107
38 files changed, 888 insertions, 569 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 148c112c9ca4..1042d69b267d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -194,6 +194,10 @@ config X86_TRAMPOLINE
194 depends on SMP || (64BIT && ACPI_SLEEP) 194 depends on SMP || (64BIT && ACPI_SLEEP)
195 default y 195 default y
196 196
197config X86_32_LAZY_GS
198 def_bool y
199 depends on X86_32 && !CC_STACKPROTECTOR
200
197config KTIME_SCALAR 201config KTIME_SCALAR
198 def_bool X86_32 202 def_bool X86_32
199source "init/Kconfig" 203source "init/Kconfig"
@@ -1339,7 +1343,6 @@ config CC_STACKPROTECTOR_ALL
1339 1343
1340config CC_STACKPROTECTOR 1344config CC_STACKPROTECTOR
1341 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" 1345 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
1342 depends on X86_64
1343 select CC_STACKPROTECTOR_ALL 1346 select CC_STACKPROTECTOR_ALL
1344 ---help--- 1347 ---help---
1345 This option turns on the -fstack-protector GCC feature. This 1348 This option turns on the -fstack-protector GCC feature. This
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 99550c407990..1836191839ee 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -70,14 +70,17 @@ else
70 # this works around some issues with generating unwind tables in older gccs 70 # this works around some issues with generating unwind tables in older gccs
71 # newer gccs do it by default 71 # newer gccs do it by default
72 KBUILD_CFLAGS += -maccumulate-outgoing-args 72 KBUILD_CFLAGS += -maccumulate-outgoing-args
73endif
73 74
74 stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh 75ifdef CONFIG_CC_STACKPROTECTOR
75 stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \ 76 cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
76 "$(CC)" "-fstack-protector -DGCC_HAS_SP" ) 77 ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y)
77 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \ 78 stackp-y := -fstack-protector
78 "$(CC)" -fstack-protector-all ) 79 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all
79 80 KBUILD_CFLAGS += $(stackp-y)
80 KBUILD_CFLAGS += $(stackp-y) 81 else
82 $(warning stack protector enabled but no compiler support)
83 endif
81endif 84endif
82 85
83# Stackpointer is addressed different for 32 bit and 64 bit x86 86# Stackpointer is addressed different for 32 bit and 64 bit x86
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index 3c601f8224be..bb70e397aa84 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -55,7 +55,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
55 dump->regs.ds = (u16)regs->ds; 55 dump->regs.ds = (u16)regs->ds;
56 dump->regs.es = (u16)regs->es; 56 dump->regs.es = (u16)regs->es;
57 dump->regs.fs = (u16)regs->fs; 57 dump->regs.fs = (u16)regs->fs;
58 savesegment(gs, dump->regs.gs); 58 dump->regs.gs = get_user_gs(regs);
59 dump->regs.orig_ax = regs->orig_ax; 59 dump->regs.orig_ax = regs->orig_ax;
60 dump->regs.ip = regs->ip; 60 dump->regs.ip = regs->ip;
61 dump->regs.cs = (u16)regs->cs; 61 dump->regs.cs = (u16)regs->cs;
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index f51a3ddde01a..83c1bc8d2e8a 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -112,7 +112,7 @@ extern unsigned int vdso_enabled;
112 * now struct_user_regs, they are different) 112 * now struct_user_regs, they are different)
113 */ 113 */
114 114
115#define ELF_CORE_COPY_REGS(pr_reg, regs) \ 115#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs) \
116do { \ 116do { \
117 pr_reg[0] = regs->bx; \ 117 pr_reg[0] = regs->bx; \
118 pr_reg[1] = regs->cx; \ 118 pr_reg[1] = regs->cx; \
@@ -124,7 +124,6 @@ do { \
124 pr_reg[7] = regs->ds & 0xffff; \ 124 pr_reg[7] = regs->ds & 0xffff; \
125 pr_reg[8] = regs->es & 0xffff; \ 125 pr_reg[8] = regs->es & 0xffff; \
126 pr_reg[9] = regs->fs & 0xffff; \ 126 pr_reg[9] = regs->fs & 0xffff; \
127 savesegment(gs, pr_reg[10]); \
128 pr_reg[11] = regs->orig_ax; \ 127 pr_reg[11] = regs->orig_ax; \
129 pr_reg[12] = regs->ip; \ 128 pr_reg[12] = regs->ip; \
130 pr_reg[13] = regs->cs & 0xffff; \ 129 pr_reg[13] = regs->cs & 0xffff; \
@@ -133,6 +132,18 @@ do { \
133 pr_reg[16] = regs->ss & 0xffff; \ 132 pr_reg[16] = regs->ss & 0xffff; \
134} while (0); 133} while (0);
135 134
135#define ELF_CORE_COPY_REGS(pr_reg, regs) \
136do { \
137 ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
138 pr_reg[10] = get_user_gs(regs); \
139} while (0);
140
141#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs) \
142do { \
143 ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
144 savesegment(gs, pr_reg[10]); \
145} while (0);
146
136#define ELF_PLATFORM (utsname()->machine) 147#define ELF_PLATFORM (utsname()->machine)
137#define set_personality_64bit() do { } while (0) 148#define set_personality_64bit() do { } while (0)
138 149
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 52948df9cd1d..f923203dc39a 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -79,7 +79,7 @@ do { \
79#ifdef CONFIG_X86_32 79#ifdef CONFIG_X86_32
80#define deactivate_mm(tsk, mm) \ 80#define deactivate_mm(tsk, mm) \
81do { \ 81do { \
82 loadsegment(gs, 0); \ 82 lazy_load_gs(0); \
83} while (0) 83} while (0)
84#else 84#else
85#define deactivate_mm(tsk, mm) \ 85#define deactivate_mm(tsk, mm) \
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 0b64af4f13ac..aee103b26d01 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -34,6 +34,12 @@
34#define PER_CPU_VAR(var) per_cpu__##var 34#define PER_CPU_VAR(var) per_cpu__##var
35#endif /* SMP */ 35#endif /* SMP */
36 36
37#ifdef CONFIG_X86_64_SMP
38#define INIT_PER_CPU_VAR(var) init_per_cpu__##var
39#else
40#define INIT_PER_CPU_VAR(var) per_cpu__##var
41#endif
42
37#else /* ...!ASSEMBLY */ 43#else /* ...!ASSEMBLY */
38 44
39#include <linux/stringify.h> 45#include <linux/stringify.h>
@@ -45,6 +51,22 @@
45#define __percpu_arg(x) "%" #x 51#define __percpu_arg(x) "%" #x
46#endif 52#endif
47 53
54/*
55 * Initialized pointers to per-cpu variables needed for the boot
56 * processor need to use these macros to get the proper address
57 * offset from __per_cpu_load on SMP.
58 *
59 * There also must be an entry in vmlinux_64.lds.S
60 */
61#define DECLARE_INIT_PER_CPU(var) \
62 extern typeof(per_cpu_var(var)) init_per_cpu_var(var)
63
64#ifdef CONFIG_X86_64_SMP
65#define init_per_cpu_var(var) init_per_cpu__##var
66#else
67#define init_per_cpu_var(var) per_cpu_var(var)
68#endif
69
48/* For arch-specific code, we can use direct single-insn ops (they 70/* For arch-specific code, we can use direct single-insn ops (they
49 * don't give an lvalue though). */ 71 * don't give an lvalue though). */
50extern void __bad_percpu_size(void); 72extern void __bad_percpu_size(void);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a6643f68fbb1..a0133838b67c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -393,8 +393,14 @@ union irq_stack_union {
393}; 393};
394 394
395DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); 395DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
396DECLARE_INIT_PER_CPU(irq_stack_union);
397
396DECLARE_PER_CPU(char *, irq_stack_ptr); 398DECLARE_PER_CPU(char *, irq_stack_ptr);
399#else /* X86_64 */
400#ifdef CONFIG_CC_STACKPROTECTOR
401DECLARE_PER_CPU(unsigned long, stack_canary);
397#endif 402#endif
403#endif /* X86_64 */
398 404
399extern void print_cpu_info(struct cpuinfo_x86 *); 405extern void print_cpu_info(struct cpuinfo_x86 *);
400extern unsigned int xstate_size; 406extern unsigned int xstate_size;
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 6d34d954c228..e304b66abeea 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -28,7 +28,7 @@ struct pt_regs {
28 int xds; 28 int xds;
29 int xes; 29 int xes;
30 int xfs; 30 int xfs;
31 /* int gs; */ 31 int xgs;
32 long orig_eax; 32 long orig_eax;
33 long eip; 33 long eip;
34 int xcs; 34 int xcs;
@@ -50,7 +50,7 @@ struct pt_regs {
50 unsigned long ds; 50 unsigned long ds;
51 unsigned long es; 51 unsigned long es;
52 unsigned long fs; 52 unsigned long fs;
53 /* int gs; */ 53 unsigned long gs;
54 unsigned long orig_ax; 54 unsigned long orig_ax;
55 unsigned long ip; 55 unsigned long ip;
56 unsigned long cs; 56 unsigned long cs;
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 1dc1b51ac623..14e0ed86a6f9 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -61,7 +61,7 @@
61 * 61 *
62 * 26 - ESPFIX small SS 62 * 26 - ESPFIX small SS
63 * 27 - per-cpu [ offset to per-cpu data area ] 63 * 27 - per-cpu [ offset to per-cpu data area ]
64 * 28 - unused 64 * 28 - stack_canary-20 [ for stack protector ]
65 * 29 - unused 65 * 29 - unused
66 * 30 - unused 66 * 30 - unused
67 * 31 - TSS for double fault handler 67 * 31 - TSS for double fault handler
@@ -95,6 +95,13 @@
95#define __KERNEL_PERCPU 0 95#define __KERNEL_PERCPU 0
96#endif 96#endif
97 97
98#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE + 16)
99#ifdef CONFIG_CC_STACKPROTECTOR
100#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY * 8)
101#else
102#define __KERNEL_STACK_CANARY 0
103#endif
104
98#define GDT_ENTRY_DOUBLEFAULT_TSS 31 105#define GDT_ENTRY_DOUBLEFAULT_TSS 31
99 106
100/* 107/*
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 36a700acaf2b..c2d742c6e15f 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -1,8 +1,54 @@
1/*
2 * GCC stack protector support.
3 *
4 * Stack protector works by putting predefined pattern at the start of
5 * the stack frame and verifying that it hasn't been overwritten when
6 * returning from the function. The pattern is called stack canary
7 * and unfortunately gcc requires it to be at a fixed offset from %gs.
8 * On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64
9 * and x86_32 use segment registers differently and thus handles this
10 * requirement differently.
11 *
12 * On x86_64, %gs is shared by percpu area and stack canary. All
13 * percpu symbols are zero based and %gs points to the base of percpu
14 * area. The first occupant of the percpu area is always
15 * irq_stack_union which contains stack_canary at offset 40. Userland
16 * %gs is always saved and restored on kernel entry and exit using
17 * swapgs, so stack protector doesn't add any complexity there.
18 *
19 * On x86_32, it's slightly more complicated. As in x86_64, %gs is
20 * used for userland TLS. Unfortunately, some processors are much
21 * slower at loading segment registers with different value when
22 * entering and leaving the kernel, so the kernel uses %fs for percpu
23 * area and manages %gs lazily so that %gs is switched only when
24 * necessary, usually during task switch.
25 *
26 * As gcc requires the stack canary at %gs:20, %gs can't be managed
27 * lazily if stack protector is enabled, so the kernel saves and
28 * restores userland %gs on kernel entry and exit. This behavior is
29 * controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
30 * system.h to hide the details.
31 */
32
1#ifndef _ASM_STACKPROTECTOR_H 33#ifndef _ASM_STACKPROTECTOR_H
2#define _ASM_STACKPROTECTOR_H 1 34#define _ASM_STACKPROTECTOR_H 1
3 35
36#ifdef CONFIG_CC_STACKPROTECTOR
37
4#include <asm/tsc.h> 38#include <asm/tsc.h>
5#include <asm/processor.h> 39#include <asm/processor.h>
40#include <asm/percpu.h>
41#include <asm/system.h>
42#include <asm/desc.h>
43#include <linux/random.h>
44
45/*
46 * 24 byte read-only segment initializer for stack canary. Linker
47 * can't handle the address bit shifting. Address will be set in
48 * head_32 for boot CPU and setup_per_cpu_areas() for others.
49 */
50#define GDT_STACK_CANARY_INIT \
51 [GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } },
6 52
7/* 53/*
8 * Initialize the stackprotector canary value. 54 * Initialize the stackprotector canary value.
@@ -15,12 +61,9 @@ static __always_inline void boot_init_stack_canary(void)
15 u64 canary; 61 u64 canary;
16 u64 tsc; 62 u64 tsc;
17 63
18 /* 64#ifdef CONFIG_X86_64
19 * Build time only check to make sure the stack_canary is at
20 * offset 40 in the pda; this is a gcc ABI requirement
21 */
22 BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40); 65 BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
23 66#endif
24 /* 67 /*
25 * We both use the random pool and the current TSC as a source 68 * We both use the random pool and the current TSC as a source
26 * of randomness. The TSC only matters for very early init, 69 * of randomness. The TSC only matters for very early init,
@@ -32,7 +75,50 @@ static __always_inline void boot_init_stack_canary(void)
32 canary += tsc + (tsc << 32UL); 75 canary += tsc + (tsc << 32UL);
33 76
34 current->stack_canary = canary; 77 current->stack_canary = canary;
78#ifdef CONFIG_X86_64
35 percpu_write(irq_stack_union.stack_canary, canary); 79 percpu_write(irq_stack_union.stack_canary, canary);
80#else
81 percpu_write(stack_canary, canary);
82#endif
36} 83}
37 84
85static inline void setup_stack_canary_segment(int cpu)
86{
87#ifdef CONFIG_X86_32
88 unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20;
89 struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
90 struct desc_struct desc;
91
92 desc = gdt_table[GDT_ENTRY_STACK_CANARY];
93 desc.base0 = canary & 0xffff;
94 desc.base1 = (canary >> 16) & 0xff;
95 desc.base2 = (canary >> 24) & 0xff;
96 write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
97#endif
98}
99
100static inline void load_stack_canary_segment(void)
101{
102#ifdef CONFIG_X86_32
103 asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
104#endif
105}
106
107#else /* CC_STACKPROTECTOR */
108
109#define GDT_STACK_CANARY_INIT
110
111/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */
112
113static inline void setup_stack_canary_segment(int cpu)
114{ }
115
116static inline void load_stack_canary_segment(void)
117{
118#ifdef CONFIG_X86_32
119 asm volatile ("mov %0, %%gs" : : "r" (0));
38#endif 120#endif
121}
122
123#endif /* CC_STACKPROTECTOR */
124#endif /* _ASM_STACKPROTECTOR_H */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index c0b0bda754ee..68b1be10cfad 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -29,21 +29,21 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *);
29/* X86_32 only */ 29/* X86_32 only */
30#ifdef CONFIG_X86_32 30#ifdef CONFIG_X86_32
31/* kernel/process_32.c */ 31/* kernel/process_32.c */
32asmlinkage int sys_fork(struct pt_regs); 32int sys_fork(struct pt_regs *);
33asmlinkage int sys_clone(struct pt_regs); 33int sys_clone(struct pt_regs *);
34asmlinkage int sys_vfork(struct pt_regs); 34int sys_vfork(struct pt_regs *);
35asmlinkage int sys_execve(struct pt_regs); 35int sys_execve(struct pt_regs *);
36 36
37/* kernel/signal_32.c */ 37/* kernel/signal_32.c */
38asmlinkage int sys_sigsuspend(int, int, old_sigset_t); 38asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
39asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, 39asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
40 struct old_sigaction __user *); 40 struct old_sigaction __user *);
41asmlinkage int sys_sigaltstack(unsigned long); 41int sys_sigaltstack(struct pt_regs *);
42asmlinkage unsigned long sys_sigreturn(unsigned long); 42unsigned long sys_sigreturn(struct pt_regs *);
43asmlinkage int sys_rt_sigreturn(unsigned long); 43long sys_rt_sigreturn(struct pt_regs *);
44 44
45/* kernel/ioport.c */ 45/* kernel/ioport.c */
46asmlinkage long sys_iopl(unsigned long); 46long sys_iopl(struct pt_regs *);
47 47
48/* kernel/sys_i386_32.c */ 48/* kernel/sys_i386_32.c */
49asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, 49asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
@@ -59,8 +59,8 @@ struct oldold_utsname;
59asmlinkage int sys_olduname(struct oldold_utsname __user *); 59asmlinkage int sys_olduname(struct oldold_utsname __user *);
60 60
61/* kernel/vm86_32.c */ 61/* kernel/vm86_32.c */
62asmlinkage int sys_vm86old(struct pt_regs); 62int sys_vm86old(struct pt_regs *);
63asmlinkage int sys_vm86(struct pt_regs); 63int sys_vm86(struct pt_regs *);
64 64
65#else /* CONFIG_X86_32 */ 65#else /* CONFIG_X86_32 */
66 66
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index c22383743f36..c00bfdbdd456 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -23,6 +23,20 @@ struct task_struct *__switch_to(struct task_struct *prev,
23 23
24#ifdef CONFIG_X86_32 24#ifdef CONFIG_X86_32
25 25
26#ifdef CONFIG_CC_STACKPROTECTOR
27#define __switch_canary \
28 "movl %P[task_canary](%[next]), %%ebx\n\t" \
29 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
30#define __switch_canary_oparam \
31 , [stack_canary] "=m" (per_cpu_var(stack_canary))
32#define __switch_canary_iparam \
33 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
34#else /* CC_STACKPROTECTOR */
35#define __switch_canary
36#define __switch_canary_oparam
37#define __switch_canary_iparam
38#endif /* CC_STACKPROTECTOR */
39
26/* 40/*
27 * Saving eflags is important. It switches not only IOPL between tasks, 41 * Saving eflags is important. It switches not only IOPL between tasks,
28 * it also protects other tasks from NT leaking through sysenter etc. 42 * it also protects other tasks from NT leaking through sysenter etc.
@@ -44,6 +58,7 @@ do { \
44 "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ 58 "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
45 "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ 59 "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
46 "pushl %[next_ip]\n\t" /* restore EIP */ \ 60 "pushl %[next_ip]\n\t" /* restore EIP */ \
61 __switch_canary \
47 "jmp __switch_to\n" /* regparm call */ \ 62 "jmp __switch_to\n" /* regparm call */ \
48 "1:\t" \ 63 "1:\t" \
49 "popl %%ebp\n\t" /* restore EBP */ \ 64 "popl %%ebp\n\t" /* restore EBP */ \
@@ -58,6 +73,8 @@ do { \
58 "=b" (ebx), "=c" (ecx), "=d" (edx), \ 73 "=b" (ebx), "=c" (ecx), "=d" (edx), \
59 "=S" (esi), "=D" (edi) \ 74 "=S" (esi), "=D" (edi) \
60 \ 75 \
76 __switch_canary_oparam \
77 \
61 /* input parameters: */ \ 78 /* input parameters: */ \
62 : [next_sp] "m" (next->thread.sp), \ 79 : [next_sp] "m" (next->thread.sp), \
63 [next_ip] "m" (next->thread.ip), \ 80 [next_ip] "m" (next->thread.ip), \
@@ -66,6 +83,8 @@ do { \
66 [prev] "a" (prev), \ 83 [prev] "a" (prev), \
67 [next] "d" (next) \ 84 [next] "d" (next) \
68 \ 85 \
86 __switch_canary_iparam \
87 \
69 : /* reloaded segment registers */ \ 88 : /* reloaded segment registers */ \
70 "memory"); \ 89 "memory"); \
71} while (0) 90} while (0)
@@ -182,6 +201,25 @@ extern void native_load_gs_index(unsigned);
182#define savesegment(seg, value) \ 201#define savesegment(seg, value) \
183 asm("mov %%" #seg ",%0":"=r" (value) : : "memory") 202 asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
184 203
204/*
205 * x86_32 user gs accessors.
206 */
207#ifdef CONFIG_X86_32
208#ifdef CONFIG_X86_32_LAZY_GS
209#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
210#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
211#define task_user_gs(tsk) ((tsk)->thread.gs)
212#define lazy_save_gs(v) savesegment(gs, (v))
213#define lazy_load_gs(v) loadsegment(gs, (v))
214#else /* X86_32_LAZY_GS */
215#define get_user_gs(regs) (u16)((regs)->gs)
216#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
217#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
218#define lazy_save_gs(v) do { } while (0)
219#define lazy_load_gs(v) do { } while (0)
220#endif /* X86_32_LAZY_GS */
221#endif /* X86_32 */
222
185static inline unsigned long get_limit(unsigned long segment) 223static inline unsigned long get_limit(unsigned long segment)
186{ 224{
187 unsigned long __limit; 225 unsigned long __limit;
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index cf3bb053da0b..0d5342515b86 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -41,7 +41,7 @@ dotraplinkage void do_int3(struct pt_regs *, long);
41dotraplinkage void do_overflow(struct pt_regs *, long); 41dotraplinkage void do_overflow(struct pt_regs *, long);
42dotraplinkage void do_bounds(struct pt_regs *, long); 42dotraplinkage void do_bounds(struct pt_regs *, long);
43dotraplinkage void do_invalid_op(struct pt_regs *, long); 43dotraplinkage void do_invalid_op(struct pt_regs *, long);
44dotraplinkage void do_device_not_available(struct pt_regs); 44dotraplinkage void do_device_not_available(struct pt_regs *, long);
45dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); 45dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long);
46dotraplinkage void do_invalid_TSS(struct pt_regs *, long); 46dotraplinkage void do_invalid_TSS(struct pt_regs *, long);
47dotraplinkage void do_segment_not_present(struct pt_regs *, long); 47dotraplinkage void do_segment_not_present(struct pt_regs *, long);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 0ec6de4bcb0b..b685ece89d5c 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -186,7 +186,7 @@ extern int __get_user_bad(void);
186 186
187 187
188#ifdef CONFIG_X86_32 188#ifdef CONFIG_X86_32
189#define __put_user_asm_u64(x, addr, err) \ 189#define __put_user_asm_u64(x, addr, err, errret) \
190 asm volatile("1: movl %%eax,0(%2)\n" \ 190 asm volatile("1: movl %%eax,0(%2)\n" \
191 "2: movl %%edx,4(%2)\n" \ 191 "2: movl %%edx,4(%2)\n" \
192 "3:\n" \ 192 "3:\n" \
@@ -197,7 +197,7 @@ extern int __get_user_bad(void);
197 _ASM_EXTABLE(1b, 4b) \ 197 _ASM_EXTABLE(1b, 4b) \
198 _ASM_EXTABLE(2b, 4b) \ 198 _ASM_EXTABLE(2b, 4b) \
199 : "=r" (err) \ 199 : "=r" (err) \
200 : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err)) 200 : "A" (x), "r" (addr), "i" (errret), "0" (err))
201 201
202#define __put_user_asm_ex_u64(x, addr) \ 202#define __put_user_asm_ex_u64(x, addr) \
203 asm volatile("1: movl %%eax,0(%1)\n" \ 203 asm volatile("1: movl %%eax,0(%1)\n" \
@@ -211,8 +211,8 @@ extern int __get_user_bad(void);
211 asm volatile("call __put_user_8" : "=a" (__ret_pu) \ 211 asm volatile("call __put_user_8" : "=a" (__ret_pu) \
212 : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx") 212 : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
213#else 213#else
214#define __put_user_asm_u64(x, ptr, retval) \ 214#define __put_user_asm_u64(x, ptr, retval, errret) \
215 __put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT) 215 __put_user_asm(x, ptr, retval, "q", "", "Zr", errret)
216#define __put_user_asm_ex_u64(x, addr) \ 216#define __put_user_asm_ex_u64(x, addr) \
217 __put_user_asm_ex(x, addr, "q", "", "Zr") 217 __put_user_asm_ex(x, addr, "q", "", "Zr")
218#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu) 218#define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
@@ -289,7 +289,8 @@ do { \
289 __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \ 289 __put_user_asm(x, ptr, retval, "l", "k", "ir", errret); \
290 break; \ 290 break; \
291 case 8: \ 291 case 8: \
292 __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval); \ 292 __put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval, \
293 errret); \
293 break; \ 294 break; \
294 default: \ 295 default: \
295 __put_user_bad(); \ 296 __put_user_bad(); \
@@ -525,8 +526,6 @@ struct __large_struct { unsigned long buf[100]; };
525 */ 526 */
526#define get_user_try uaccess_try 527#define get_user_try uaccess_try
527#define get_user_catch(err) uaccess_catch(err) 528#define get_user_catch(err) uaccess_catch(err)
528#define put_user_try uaccess_try
529#define put_user_catch(err) uaccess_catch(err)
530 529
531#define get_user_ex(x, ptr) do { \ 530#define get_user_ex(x, ptr) do { \
532 unsigned long __gue_val; \ 531 unsigned long __gue_val; \
@@ -534,9 +533,29 @@ struct __large_struct { unsigned long buf[100]; };
534 (x) = (__force __typeof__(*(ptr)))__gue_val; \ 533 (x) = (__force __typeof__(*(ptr)))__gue_val; \
535} while (0) 534} while (0)
536 535
536#ifdef CONFIG_X86_WP_WORKS_OK
537
538#define put_user_try uaccess_try
539#define put_user_catch(err) uaccess_catch(err)
540
537#define put_user_ex(x, ptr) \ 541#define put_user_ex(x, ptr) \
538 __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr))) 542 __put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
539 543
544#else /* !CONFIG_X86_WP_WORKS_OK */
545
546#define put_user_try do { \
547 int __uaccess_err = 0;
548
549#define put_user_catch(err) \
550 (err) |= __uaccess_err; \
551} while (0)
552
553#define put_user_ex(x, ptr) do { \
554 __uaccess_err |= __put_user(x, ptr); \
555} while (0)
556
557#endif /* CONFIG_X86_WP_WORKS_OK */
558
540/* 559/*
541 * movsl can be slow when source and dest are not both 8-byte aligned 560 * movsl can be slow when source and dest are not both 8-byte aligned
542 */ 561 */
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 8ac1d7e312f3..8242bf965812 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -3,6 +3,9 @@
3 3
4enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; 4enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
5 5
6struct cpumask;
7struct mm_struct;
8
6#ifdef CONFIG_X86_UV 9#ifdef CONFIG_X86_UV
7 10
8extern enum uv_system_type get_uv_system_type(void); 11extern enum uv_system_type get_uv_system_type(void);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index ee4df08feee6..fbf2f33e3080 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -75,6 +75,7 @@ void foo(void)
75 OFFSET(PT_DS, pt_regs, ds); 75 OFFSET(PT_DS, pt_regs, ds);
76 OFFSET(PT_ES, pt_regs, es); 76 OFFSET(PT_ES, pt_regs, es);
77 OFFSET(PT_FS, pt_regs, fs); 77 OFFSET(PT_FS, pt_regs, fs);
78 OFFSET(PT_GS, pt_regs, gs);
78 OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); 79 OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
79 OFFSET(PT_EIP, pt_regs, ip); 80 OFFSET(PT_EIP, pt_regs, ip);
80 OFFSET(PT_CS, pt_regs, cs); 81 OFFSET(PT_CS, pt_regs, cs);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cbcdb796d47f..e8f4a386bd9d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -39,6 +39,7 @@
39#include <asm/sections.h> 39#include <asm/sections.h>
40#include <asm/setup.h> 40#include <asm/setup.h>
41#include <asm/hypervisor.h> 41#include <asm/hypervisor.h>
42#include <asm/stackprotector.h>
42 43
43#include "cpu.h" 44#include "cpu.h"
44 45
@@ -122,6 +123,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
122 123
123 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 124 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
124 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, 125 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
126 GDT_STACK_CANARY_INIT
125#endif 127#endif
126} }; 128} };
127EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 129EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
@@ -304,6 +306,7 @@ void load_percpu_segment(int cpu)
304 loadsegment(gs, 0); 306 loadsegment(gs, 0);
305 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); 307 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
306#endif 308#endif
309 load_stack_canary_segment();
307} 310}
308 311
309/* Current gdt points %fs at the "master" per-cpu area: after this, 312/* Current gdt points %fs at the "master" per-cpu area: after this,
@@ -938,12 +941,8 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
938 941
939DEFINE_PER_CPU_FIRST(union irq_stack_union, 942DEFINE_PER_CPU_FIRST(union irq_stack_union,
940 irq_stack_union) __aligned(PAGE_SIZE); 943 irq_stack_union) __aligned(PAGE_SIZE);
941#ifdef CONFIG_SMP
942DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
943#else
944DEFINE_PER_CPU(char *, irq_stack_ptr) = 944DEFINE_PER_CPU(char *, irq_stack_ptr) =
945 per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; 945 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
946#endif
947 946
948DEFINE_PER_CPU(unsigned long, kernel_stack) = 947DEFINE_PER_CPU(unsigned long, kernel_stack) =
949 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; 948 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
@@ -986,16 +985,21 @@ unsigned long kernel_eflags;
986 */ 985 */
987DEFINE_PER_CPU(struct orig_ist, orig_ist); 986DEFINE_PER_CPU(struct orig_ist, orig_ist);
988 987
989#else 988#else /* x86_64 */
990 989
991/* Make sure %fs is initialized properly in idle threads */ 990#ifdef CONFIG_CC_STACKPROTECTOR
991DEFINE_PER_CPU(unsigned long, stack_canary);
992#endif
993
994/* Make sure %fs and %gs are initialized properly in idle threads */
992struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) 995struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
993{ 996{
994 memset(regs, 0, sizeof(struct pt_regs)); 997 memset(regs, 0, sizeof(struct pt_regs));
995 regs->fs = __KERNEL_PERCPU; 998 regs->fs = __KERNEL_PERCPU;
999 regs->gs = __KERNEL_STACK_CANARY;
996 return regs; 1000 return regs;
997} 1001}
998#endif 1002#endif /* x86_64 */
999 1003
1000/* 1004/*
1001 * cpu_init() initializes state that is per-CPU. Some data is already 1005 * cpu_init() initializes state that is per-CPU. Some data is already
@@ -1157,9 +1161,6 @@ void __cpuinit cpu_init(void)
1157 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); 1161 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
1158#endif 1162#endif
1159 1163
1160 /* Clear %gs. */
1161 asm volatile ("mov %0, %%gs" : : "r" (0));
1162
1163 /* Clear all 6 debug registers: */ 1164 /* Clear all 6 debug registers: */
1164 set_debugreg(0, 0); 1165 set_debugreg(0, 0);
1165 set_debugreg(0, 1); 1166 set_debugreg(0, 1);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 65efd42454be..e99206831459 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -30,12 +30,13 @@
30 * 1C(%esp) - %ds 30 * 1C(%esp) - %ds
31 * 20(%esp) - %es 31 * 20(%esp) - %es
32 * 24(%esp) - %fs 32 * 24(%esp) - %fs
33 * 28(%esp) - orig_eax 33 * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
34 * 2C(%esp) - %eip 34 * 2C(%esp) - orig_eax
35 * 30(%esp) - %cs 35 * 30(%esp) - %eip
36 * 34(%esp) - %eflags 36 * 34(%esp) - %cs
37 * 38(%esp) - %oldesp 37 * 38(%esp) - %eflags
38 * 3C(%esp) - %oldss 38 * 3C(%esp) - %oldesp
39 * 40(%esp) - %oldss
39 * 40 *
40 * "current" is in register %ebx during any slow entries. 41 * "current" is in register %ebx during any slow entries.
41 */ 42 */
@@ -101,121 +102,221 @@
101#define resume_userspace_sig resume_userspace 102#define resume_userspace_sig resume_userspace
102#endif 103#endif
103 104
104#define SAVE_ALL \ 105/*
105 cld; \ 106 * User gs save/restore
106 pushl %fs; \ 107 *
107 CFI_ADJUST_CFA_OFFSET 4;\ 108 * %gs is used for userland TLS and kernel only uses it for stack
108 /*CFI_REL_OFFSET fs, 0;*/\ 109 * canary which is required to be at %gs:20 by gcc. Read the comment
109 pushl %es; \ 110 * at the top of stackprotector.h for more info.
110 CFI_ADJUST_CFA_OFFSET 4;\ 111 *
111 /*CFI_REL_OFFSET es, 0;*/\ 112 * Local labels 98 and 99 are used.
112 pushl %ds; \ 113 */
113 CFI_ADJUST_CFA_OFFSET 4;\ 114#ifdef CONFIG_X86_32_LAZY_GS
114 /*CFI_REL_OFFSET ds, 0;*/\ 115
115 pushl %eax; \ 116 /* unfortunately push/pop can't be no-op */
116 CFI_ADJUST_CFA_OFFSET 4;\ 117.macro PUSH_GS
117 CFI_REL_OFFSET eax, 0;\ 118 pushl $0
118 pushl %ebp; \ 119 CFI_ADJUST_CFA_OFFSET 4
119 CFI_ADJUST_CFA_OFFSET 4;\ 120.endm
120 CFI_REL_OFFSET ebp, 0;\ 121.macro POP_GS pop=0
121 pushl %edi; \ 122 addl $(4 + \pop), %esp
122 CFI_ADJUST_CFA_OFFSET 4;\ 123 CFI_ADJUST_CFA_OFFSET -(4 + \pop)
123 CFI_REL_OFFSET edi, 0;\ 124.endm
124 pushl %esi; \ 125.macro POP_GS_EX
125 CFI_ADJUST_CFA_OFFSET 4;\ 126.endm
126 CFI_REL_OFFSET esi, 0;\ 127
127 pushl %edx; \ 128 /* all the rest are no-op */
128 CFI_ADJUST_CFA_OFFSET 4;\ 129.macro PTGS_TO_GS
129 CFI_REL_OFFSET edx, 0;\ 130.endm
130 pushl %ecx; \ 131.macro PTGS_TO_GS_EX
131 CFI_ADJUST_CFA_OFFSET 4;\ 132.endm
132 CFI_REL_OFFSET ecx, 0;\ 133.macro GS_TO_REG reg
133 pushl %ebx; \ 134.endm
134 CFI_ADJUST_CFA_OFFSET 4;\ 135.macro REG_TO_PTGS reg
135 CFI_REL_OFFSET ebx, 0;\ 136.endm
136 movl $(__USER_DS), %edx; \ 137.macro SET_KERNEL_GS reg
137 movl %edx, %ds; \ 138.endm
138 movl %edx, %es; \ 139
139 movl $(__KERNEL_PERCPU), %edx; \ 140#else /* CONFIG_X86_32_LAZY_GS */
141
142.macro PUSH_GS
143 pushl %gs
144 CFI_ADJUST_CFA_OFFSET 4
145 /*CFI_REL_OFFSET gs, 0*/
146.endm
147
148.macro POP_GS pop=0
14998: popl %gs
150 CFI_ADJUST_CFA_OFFSET -4
151 /*CFI_RESTORE gs*/
152 .if \pop <> 0
153 add $\pop, %esp
154 CFI_ADJUST_CFA_OFFSET -\pop
155 .endif
156.endm
157.macro POP_GS_EX
158.pushsection .fixup, "ax"
15999: movl $0, (%esp)
160 jmp 98b
161.section __ex_table, "a"
162 .align 4
163 .long 98b, 99b
164.popsection
165.endm
166
167.macro PTGS_TO_GS
16898: mov PT_GS(%esp), %gs
169.endm
170.macro PTGS_TO_GS_EX
171.pushsection .fixup, "ax"
17299: movl $0, PT_GS(%esp)
173 jmp 98b
174.section __ex_table, "a"
175 .align 4
176 .long 98b, 99b
177.popsection
178.endm
179
180.macro GS_TO_REG reg
181 movl %gs, \reg
182 /*CFI_REGISTER gs, \reg*/
183.endm
184.macro REG_TO_PTGS reg
185 movl \reg, PT_GS(%esp)
186 /*CFI_REL_OFFSET gs, PT_GS*/
187.endm
188.macro SET_KERNEL_GS reg
189 movl $(__KERNEL_STACK_CANARY), \reg
190 movl \reg, %gs
191.endm
192
193#endif /* CONFIG_X86_32_LAZY_GS */
194
195.macro SAVE_ALL
196 cld
197 PUSH_GS
198 pushl %fs
199 CFI_ADJUST_CFA_OFFSET 4
200 /*CFI_REL_OFFSET fs, 0;*/
201 pushl %es
202 CFI_ADJUST_CFA_OFFSET 4
203 /*CFI_REL_OFFSET es, 0;*/
204 pushl %ds
205 CFI_ADJUST_CFA_OFFSET 4
206 /*CFI_REL_OFFSET ds, 0;*/
207 pushl %eax
208 CFI_ADJUST_CFA_OFFSET 4
209 CFI_REL_OFFSET eax, 0
210 pushl %ebp
211 CFI_ADJUST_CFA_OFFSET 4
212 CFI_REL_OFFSET ebp, 0
213 pushl %edi
214 CFI_ADJUST_CFA_OFFSET 4
215 CFI_REL_OFFSET edi, 0
216 pushl %esi
217 CFI_ADJUST_CFA_OFFSET 4
218 CFI_REL_OFFSET esi, 0
219 pushl %edx
220 CFI_ADJUST_CFA_OFFSET 4
221 CFI_REL_OFFSET edx, 0
222 pushl %ecx
223 CFI_ADJUST_CFA_OFFSET 4
224 CFI_REL_OFFSET ecx, 0
225 pushl %ebx
226 CFI_ADJUST_CFA_OFFSET 4
227 CFI_REL_OFFSET ebx, 0
228 movl $(__USER_DS), %edx
229 movl %edx, %ds
230 movl %edx, %es
231 movl $(__KERNEL_PERCPU), %edx
140 movl %edx, %fs 232 movl %edx, %fs
233 SET_KERNEL_GS %edx
234.endm
141 235
142#define RESTORE_INT_REGS \ 236.macro RESTORE_INT_REGS
143 popl %ebx; \ 237 popl %ebx
144 CFI_ADJUST_CFA_OFFSET -4;\ 238 CFI_ADJUST_CFA_OFFSET -4
145 CFI_RESTORE ebx;\ 239 CFI_RESTORE ebx
146 popl %ecx; \ 240 popl %ecx
147 CFI_ADJUST_CFA_OFFSET -4;\ 241 CFI_ADJUST_CFA_OFFSET -4
148 CFI_RESTORE ecx;\ 242 CFI_RESTORE ecx
149 popl %edx; \ 243 popl %edx
150 CFI_ADJUST_CFA_OFFSET -4;\ 244 CFI_ADJUST_CFA_OFFSET -4
151 CFI_RESTORE edx;\ 245 CFI_RESTORE edx
152 popl %esi; \ 246 popl %esi
153 CFI_ADJUST_CFA_OFFSET -4;\ 247 CFI_ADJUST_CFA_OFFSET -4
154 CFI_RESTORE esi;\ 248 CFI_RESTORE esi
155 popl %edi; \ 249 popl %edi
156 CFI_ADJUST_CFA_OFFSET -4;\ 250 CFI_ADJUST_CFA_OFFSET -4
157 CFI_RESTORE edi;\ 251 CFI_RESTORE edi
158 popl %ebp; \ 252 popl %ebp
159 CFI_ADJUST_CFA_OFFSET -4;\ 253 CFI_ADJUST_CFA_OFFSET -4
160 CFI_RESTORE ebp;\ 254 CFI_RESTORE ebp
161 popl %eax; \ 255 popl %eax
162 CFI_ADJUST_CFA_OFFSET -4;\ 256 CFI_ADJUST_CFA_OFFSET -4
163 CFI_RESTORE eax 257 CFI_RESTORE eax
258.endm
164 259
165#define RESTORE_REGS \ 260.macro RESTORE_REGS pop=0
166 RESTORE_INT_REGS; \ 261 RESTORE_INT_REGS
1671: popl %ds; \ 2621: popl %ds
168 CFI_ADJUST_CFA_OFFSET -4;\ 263 CFI_ADJUST_CFA_OFFSET -4
169 /*CFI_RESTORE ds;*/\ 264 /*CFI_RESTORE ds;*/
1702: popl %es; \ 2652: popl %es
171 CFI_ADJUST_CFA_OFFSET -4;\ 266 CFI_ADJUST_CFA_OFFSET -4
172 /*CFI_RESTORE es;*/\ 267 /*CFI_RESTORE es;*/
1733: popl %fs; \ 2683: popl %fs
174 CFI_ADJUST_CFA_OFFSET -4;\ 269 CFI_ADJUST_CFA_OFFSET -4
175 /*CFI_RESTORE fs;*/\ 270 /*CFI_RESTORE fs;*/
176.pushsection .fixup,"ax"; \ 271 POP_GS \pop
1774: movl $0,(%esp); \ 272.pushsection .fixup, "ax"
178 jmp 1b; \ 2734: movl $0, (%esp)
1795: movl $0,(%esp); \ 274 jmp 1b
180 jmp 2b; \ 2755: movl $0, (%esp)
1816: movl $0,(%esp); \ 276 jmp 2b
182 jmp 3b; \ 2776: movl $0, (%esp)
183.section __ex_table,"a";\ 278 jmp 3b
184 .align 4; \ 279.section __ex_table, "a"
185 .long 1b,4b; \ 280 .align 4
186 .long 2b,5b; \ 281 .long 1b, 4b
187 .long 3b,6b; \ 282 .long 2b, 5b
283 .long 3b, 6b
188.popsection 284.popsection
285 POP_GS_EX
286.endm
189 287
190#define RING0_INT_FRAME \ 288.macro RING0_INT_FRAME
191 CFI_STARTPROC simple;\ 289 CFI_STARTPROC simple
192 CFI_SIGNAL_FRAME;\ 290 CFI_SIGNAL_FRAME
193 CFI_DEF_CFA esp, 3*4;\ 291 CFI_DEF_CFA esp, 3*4
194 /*CFI_OFFSET cs, -2*4;*/\ 292 /*CFI_OFFSET cs, -2*4;*/
195 CFI_OFFSET eip, -3*4 293 CFI_OFFSET eip, -3*4
294.endm
196 295
197#define RING0_EC_FRAME \ 296.macro RING0_EC_FRAME
198 CFI_STARTPROC simple;\ 297 CFI_STARTPROC simple
199 CFI_SIGNAL_FRAME;\ 298 CFI_SIGNAL_FRAME
200 CFI_DEF_CFA esp, 4*4;\ 299 CFI_DEF_CFA esp, 4*4
201 /*CFI_OFFSET cs, -2*4;*/\ 300 /*CFI_OFFSET cs, -2*4;*/
202 CFI_OFFSET eip, -3*4 301 CFI_OFFSET eip, -3*4
302.endm
203 303
204#define RING0_PTREGS_FRAME \ 304.macro RING0_PTREGS_FRAME
205 CFI_STARTPROC simple;\ 305 CFI_STARTPROC simple
206 CFI_SIGNAL_FRAME;\ 306 CFI_SIGNAL_FRAME
207 CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ 307 CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
208 /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ 308 /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
209 CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ 309 CFI_OFFSET eip, PT_EIP-PT_OLDESP
210 /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ 310 /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
211 /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ 311 /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
212 CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ 312 CFI_OFFSET eax, PT_EAX-PT_OLDESP
213 CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ 313 CFI_OFFSET ebp, PT_EBP-PT_OLDESP
214 CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ 314 CFI_OFFSET edi, PT_EDI-PT_OLDESP
215 CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ 315 CFI_OFFSET esi, PT_ESI-PT_OLDESP
216 CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ 316 CFI_OFFSET edx, PT_EDX-PT_OLDESP
217 CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ 317 CFI_OFFSET ecx, PT_ECX-PT_OLDESP
218 CFI_OFFSET ebx, PT_EBX-PT_OLDESP 318 CFI_OFFSET ebx, PT_EBX-PT_OLDESP
319.endm
219 320
220ENTRY(ret_from_fork) 321ENTRY(ret_from_fork)
221 CFI_STARTPROC 322 CFI_STARTPROC
@@ -362,6 +463,7 @@ sysenter_exit:
362 xorl %ebp,%ebp 463 xorl %ebp,%ebp
363 TRACE_IRQS_ON 464 TRACE_IRQS_ON
3641: mov PT_FS(%esp), %fs 4651: mov PT_FS(%esp), %fs
466 PTGS_TO_GS
365 ENABLE_INTERRUPTS_SYSEXIT 467 ENABLE_INTERRUPTS_SYSEXIT
366 468
367#ifdef CONFIG_AUDITSYSCALL 469#ifdef CONFIG_AUDITSYSCALL
@@ -410,6 +512,7 @@ sysexit_audit:
410 .align 4 512 .align 4
411 .long 1b,2b 513 .long 1b,2b
412.popsection 514.popsection
515 PTGS_TO_GS_EX
413ENDPROC(ia32_sysenter_target) 516ENDPROC(ia32_sysenter_target)
414 517
415 # system call handler stub 518 # system call handler stub
@@ -452,8 +555,7 @@ restore_all:
452restore_nocheck: 555restore_nocheck:
453 TRACE_IRQS_IRET 556 TRACE_IRQS_IRET
454restore_nocheck_notrace: 557restore_nocheck_notrace:
455 RESTORE_REGS 558 RESTORE_REGS 4 # skip orig_eax/error_code
456 addl $4, %esp # skip orig_eax/error_code
457 CFI_ADJUST_CFA_OFFSET -4 559 CFI_ADJUST_CFA_OFFSET -4
458irq_return: 560irq_return:
459 INTERRUPT_RETURN 561 INTERRUPT_RETURN
@@ -595,28 +697,50 @@ syscall_badsys:
595END(syscall_badsys) 697END(syscall_badsys)
596 CFI_ENDPROC 698 CFI_ENDPROC
597 699
598#define FIXUP_ESPFIX_STACK \ 700/*
599 /* since we are on a wrong stack, we cant make it a C code :( */ \ 701 * System calls that need a pt_regs pointer.
600 PER_CPU(gdt_page, %ebx); \ 702 */
601 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ 703#define PTREGSCALL(name) \
602 addl %esp, %eax; \ 704 ALIGN; \
603 pushl $__KERNEL_DS; \ 705ptregs_##name: \
604 CFI_ADJUST_CFA_OFFSET 4; \ 706 leal 4(%esp),%eax; \
605 pushl %eax; \ 707 jmp sys_##name;
606 CFI_ADJUST_CFA_OFFSET 4; \ 708
607 lss (%esp), %esp; \ 709PTREGSCALL(iopl)
608 CFI_ADJUST_CFA_OFFSET -8; 710PTREGSCALL(fork)
609#define UNWIND_ESPFIX_STACK \ 711PTREGSCALL(clone)
610 movl %ss, %eax; \ 712PTREGSCALL(vfork)
611 /* see if on espfix stack */ \ 713PTREGSCALL(execve)
612 cmpw $__ESPFIX_SS, %ax; \ 714PTREGSCALL(sigaltstack)
613 jne 27f; \ 715PTREGSCALL(sigreturn)
614 movl $__KERNEL_DS, %eax; \ 716PTREGSCALL(rt_sigreturn)
615 movl %eax, %ds; \ 717PTREGSCALL(vm86)
616 movl %eax, %es; \ 718PTREGSCALL(vm86old)
617 /* switch to normal stack */ \ 719
618 FIXUP_ESPFIX_STACK; \ 720.macro FIXUP_ESPFIX_STACK
61927:; 721 /* since we are on a wrong stack, we cant make it a C code :( */
722 PER_CPU(gdt_page, %ebx)
723 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
724 addl %esp, %eax
725 pushl $__KERNEL_DS
726 CFI_ADJUST_CFA_OFFSET 4
727 pushl %eax
728 CFI_ADJUST_CFA_OFFSET 4
729 lss (%esp), %esp
730 CFI_ADJUST_CFA_OFFSET -8
731.endm
732.macro UNWIND_ESPFIX_STACK
733 movl %ss, %eax
734 /* see if on espfix stack */
735 cmpw $__ESPFIX_SS, %ax
736 jne 27f
737 movl $__KERNEL_DS, %eax
738 movl %eax, %ds
739 movl %eax, %es
740 /* switch to normal stack */
741 FIXUP_ESPFIX_STACK
74227:
743.endm
620 744
621/* 745/*
622 * Build the entry stubs and pointer table with some assembler magic. 746 * Build the entry stubs and pointer table with some assembler magic.
@@ -1070,7 +1194,10 @@ ENTRY(page_fault)
1070 CFI_ADJUST_CFA_OFFSET 4 1194 CFI_ADJUST_CFA_OFFSET 4
1071 ALIGN 1195 ALIGN
1072error_code: 1196error_code:
1073 /* the function address is in %fs's slot on the stack */ 1197 /* the function address is in %gs's slot on the stack */
1198 pushl %fs
1199 CFI_ADJUST_CFA_OFFSET 4
1200 /*CFI_REL_OFFSET fs, 0*/
1074 pushl %es 1201 pushl %es
1075 CFI_ADJUST_CFA_OFFSET 4 1202 CFI_ADJUST_CFA_OFFSET 4
1076 /*CFI_REL_OFFSET es, 0*/ 1203 /*CFI_REL_OFFSET es, 0*/
@@ -1099,20 +1226,15 @@ error_code:
1099 CFI_ADJUST_CFA_OFFSET 4 1226 CFI_ADJUST_CFA_OFFSET 4
1100 CFI_REL_OFFSET ebx, 0 1227 CFI_REL_OFFSET ebx, 0
1101 cld 1228 cld
1102 pushl %fs
1103 CFI_ADJUST_CFA_OFFSET 4
1104 /*CFI_REL_OFFSET fs, 0*/
1105 movl $(__KERNEL_PERCPU), %ecx 1229 movl $(__KERNEL_PERCPU), %ecx
1106 movl %ecx, %fs 1230 movl %ecx, %fs
1107 UNWIND_ESPFIX_STACK 1231 UNWIND_ESPFIX_STACK
1108 popl %ecx 1232 GS_TO_REG %ecx
1109 CFI_ADJUST_CFA_OFFSET -4 1233 movl PT_GS(%esp), %edi # get the function address
1110 /*CFI_REGISTER es, ecx*/
1111 movl PT_FS(%esp), %edi # get the function address
1112 movl PT_ORIG_EAX(%esp), %edx # get the error code 1234 movl PT_ORIG_EAX(%esp), %edx # get the error code
1113 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 1235 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1114 mov %ecx, PT_FS(%esp) 1236 REG_TO_PTGS %ecx
1115 /*CFI_REL_OFFSET fs, ES*/ 1237 SET_KERNEL_GS %ecx
1116 movl $(__USER_DS), %ecx 1238 movl $(__USER_DS), %ecx
1117 movl %ecx, %ds 1239 movl %ecx, %ds
1118 movl %ecx, %es 1240 movl %ecx, %es
@@ -1136,26 +1258,27 @@ END(page_fault)
1136 * by hand onto the new stack - while updating the return eip past 1258 * by hand onto the new stack - while updating the return eip past
1137 * the instruction that would have done it for sysenter. 1259 * the instruction that would have done it for sysenter.
1138 */ 1260 */
1139#define FIX_STACK(offset, ok, label) \ 1261.macro FIX_STACK offset ok label
1140 cmpw $__KERNEL_CS,4(%esp); \ 1262 cmpw $__KERNEL_CS, 4(%esp)
1141 jne ok; \ 1263 jne \ok
1142label: \ 1264\label:
1143 movl TSS_sysenter_sp0+offset(%esp),%esp; \ 1265 movl TSS_sysenter_sp0 + \offset(%esp), %esp
1144 CFI_DEF_CFA esp, 0; \ 1266 CFI_DEF_CFA esp, 0
1145 CFI_UNDEFINED eip; \ 1267 CFI_UNDEFINED eip
1146 pushfl; \ 1268 pushfl
1147 CFI_ADJUST_CFA_OFFSET 4; \ 1269 CFI_ADJUST_CFA_OFFSET 4
1148 pushl $__KERNEL_CS; \ 1270 pushl $__KERNEL_CS
1149 CFI_ADJUST_CFA_OFFSET 4; \ 1271 CFI_ADJUST_CFA_OFFSET 4
1150 pushl $sysenter_past_esp; \ 1272 pushl $sysenter_past_esp
1151 CFI_ADJUST_CFA_OFFSET 4; \ 1273 CFI_ADJUST_CFA_OFFSET 4
1152 CFI_REL_OFFSET eip, 0 1274 CFI_REL_OFFSET eip, 0
1275.endm
1153 1276
1154ENTRY(debug) 1277ENTRY(debug)
1155 RING0_INT_FRAME 1278 RING0_INT_FRAME
1156 cmpl $ia32_sysenter_target,(%esp) 1279 cmpl $ia32_sysenter_target,(%esp)
1157 jne debug_stack_correct 1280 jne debug_stack_correct
1158 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) 1281 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1159debug_stack_correct: 1282debug_stack_correct:
1160 pushl $-1 # mark this as an int 1283 pushl $-1 # mark this as an int
1161 CFI_ADJUST_CFA_OFFSET 4 1284 CFI_ADJUST_CFA_OFFSET 4
@@ -1213,7 +1336,7 @@ nmi_stack_correct:
1213 1336
1214nmi_stack_fixup: 1337nmi_stack_fixup:
1215 RING0_INT_FRAME 1338 RING0_INT_FRAME
1216 FIX_STACK(12,nmi_stack_correct, 1) 1339 FIX_STACK 12, nmi_stack_correct, 1
1217 jmp nmi_stack_correct 1340 jmp nmi_stack_correct
1218 1341
1219nmi_debug_stack_check: 1342nmi_debug_stack_check:
@@ -1224,7 +1347,7 @@ nmi_debug_stack_check:
1224 jb nmi_stack_correct 1347 jb nmi_stack_correct
1225 cmpl $debug_esp_fix_insn,(%esp) 1348 cmpl $debug_esp_fix_insn,(%esp)
1226 ja nmi_stack_correct 1349 ja nmi_stack_correct
1227 FIX_STACK(24,nmi_stack_correct, 1) 1350 FIX_STACK 24, nmi_stack_correct, 1
1228 jmp nmi_stack_correct 1351 jmp nmi_stack_correct
1229 1352
1230nmi_espfix_stack: 1353nmi_espfix_stack:
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 722464c520cf..2a0aad7718d5 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,6 +19,7 @@
19#include <asm/asm-offsets.h> 19#include <asm/asm-offsets.h>
20#include <asm/setup.h> 20#include <asm/setup.h>
21#include <asm/processor-flags.h> 21#include <asm/processor-flags.h>
22#include <asm/percpu.h>
22 23
23/* Physical address */ 24/* Physical address */
24#define pa(X) ((X) - __PAGE_OFFSET) 25#define pa(X) ((X) - __PAGE_OFFSET)
@@ -437,8 +438,26 @@ is386: movl $2,%ecx # set MP
437 movl $(__KERNEL_PERCPU), %eax 438 movl $(__KERNEL_PERCPU), %eax
438 movl %eax,%fs # set this cpu's percpu 439 movl %eax,%fs # set this cpu's percpu
439 440
440 xorl %eax,%eax # Clear GS and LDT 441#ifdef CONFIG_CC_STACKPROTECTOR
442 /*
443 * The linker can't handle this by relocation. Manually set
444 * base address in stack canary segment descriptor.
445 */
446 cmpb $0,ready
447 jne 1f
448 movl $per_cpu__gdt_page,%eax
449 movl $per_cpu__stack_canary,%ecx
450 subl $20, %ecx
451 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
452 shrl $16, %ecx
453 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
454 movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
4551:
456#endif
457 movl $(__KERNEL_STACK_CANARY),%eax
441 movl %eax,%gs 458 movl %eax,%gs
459
460 xorl %eax,%eax # Clear LDT
442 lldt %ax 461 lldt %ax
443 462
444 cld # gcc2 wants the direction flag cleared at all times 463 cld # gcc2 wants the direction flag cleared at all times
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index a0a2b5ca9b7d..2e648e3a5ea4 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -205,19 +205,6 @@ ENTRY(secondary_startup_64)
205 pushq $0 205 pushq $0
206 popfq 206 popfq
207 207
208#ifdef CONFIG_SMP
209 /*
210 * Fix up static pointers that need __per_cpu_load added. The assembler
211 * is unable to do this directly. This is only needed for the boot cpu.
212 * These values are set up with the correct base addresses by C code for
213 * secondary cpus.
214 */
215 movq initial_gs(%rip), %rax
216 cmpl $0, per_cpu__cpu_number(%rax)
217 jne 1f
218 addq %rax, early_gdt_descr_base(%rip)
2191:
220#endif
221 /* 208 /*
222 * We must switch to a new descriptor in kernel space for the GDT 209 * We must switch to a new descriptor in kernel space for the GDT
223 * because soon the kernel won't have access anymore to the userspace 210 * because soon the kernel won't have access anymore to the userspace
@@ -275,11 +262,7 @@ ENTRY(secondary_startup_64)
275 ENTRY(initial_code) 262 ENTRY(initial_code)
276 .quad x86_64_start_kernel 263 .quad x86_64_start_kernel
277 ENTRY(initial_gs) 264 ENTRY(initial_gs)
278#ifdef CONFIG_SMP 265 .quad INIT_PER_CPU_VAR(irq_stack_union)
279 .quad __per_cpu_load
280#else
281 .quad PER_CPU_VAR(irq_stack_union)
282#endif
283 __FINITDATA 266 __FINITDATA
284 267
285 ENTRY(stack_start) 268 ENTRY(stack_start)
@@ -425,7 +408,7 @@ NEXT_PAGE(level2_spare_pgt)
425early_gdt_descr: 408early_gdt_descr:
426 .word GDT_ENTRIES*8-1 409 .word GDT_ENTRIES*8-1
427early_gdt_descr_base: 410early_gdt_descr_base:
428 .quad per_cpu__gdt_page 411 .quad INIT_PER_CPU_VAR(gdt_page)
429 412
430ENTRY(phys_base) 413ENTRY(phys_base)
431 /* This must match the first entry in level2_kernel_pgt */ 414 /* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index b12208f4dfee..e41980a373ab 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -131,9 +131,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs)
131} 131}
132 132
133#ifdef CONFIG_X86_32 133#ifdef CONFIG_X86_32
134asmlinkage long sys_iopl(unsigned long regsp) 134long sys_iopl(struct pt_regs *regs)
135{ 135{
136 struct pt_regs *regs = (struct pt_regs *)&regsp;
137 unsigned int level = regs->bx; 136 unsigned int level = regs->bx;
138 struct thread_struct *t = &current->thread; 137 struct thread_struct *t = &current->thread;
139 int rc; 138 int rc;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 1a1ae8edc40c..fec79ad85dc6 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -11,6 +11,7 @@
11 11
12#include <stdarg.h> 12#include <stdarg.h>
13 13
14#include <linux/stackprotector.h>
14#include <linux/cpu.h> 15#include <linux/cpu.h>
15#include <linux/errno.h> 16#include <linux/errno.h>
16#include <linux/sched.h> 17#include <linux/sched.h>
@@ -91,6 +92,15 @@ void cpu_idle(void)
91{ 92{
92 int cpu = smp_processor_id(); 93 int cpu = smp_processor_id();
93 94
95 /*
96 * If we're the non-boot CPU, nothing set the stack canary up
97 * for us. CPU0 already has it initialized but no harm in
98 * doing it again. This is a good place for updating it, as
99 * we wont ever return from this function (so the invalid
100 * canaries already on the stack wont ever trigger).
101 */
102 boot_init_stack_canary();
103
94 current_thread_info()->status |= TS_POLLING; 104 current_thread_info()->status |= TS_POLLING;
95 105
96 /* endless idle loop with no priority at all */ 106 /* endless idle loop with no priority at all */
@@ -131,7 +141,7 @@ void __show_regs(struct pt_regs *regs, int all)
131 if (user_mode_vm(regs)) { 141 if (user_mode_vm(regs)) {
132 sp = regs->sp; 142 sp = regs->sp;
133 ss = regs->ss & 0xffff; 143 ss = regs->ss & 0xffff;
134 savesegment(gs, gs); 144 gs = get_user_gs(regs);
135 } else { 145 } else {
136 sp = (unsigned long) (&regs->sp); 146 sp = (unsigned long) (&regs->sp);
137 savesegment(ss, ss); 147 savesegment(ss, ss);
@@ -212,6 +222,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
212 regs.ds = __USER_DS; 222 regs.ds = __USER_DS;
213 regs.es = __USER_DS; 223 regs.es = __USER_DS;
214 regs.fs = __KERNEL_PERCPU; 224 regs.fs = __KERNEL_PERCPU;
225 regs.gs = __KERNEL_STACK_CANARY;
215 regs.orig_ax = -1; 226 regs.orig_ax = -1;
216 regs.ip = (unsigned long) kernel_thread_helper; 227 regs.ip = (unsigned long) kernel_thread_helper;
217 regs.cs = __KERNEL_CS | get_kernel_rpl(); 228 regs.cs = __KERNEL_CS | get_kernel_rpl();
@@ -304,7 +315,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
304 315
305 p->thread.ip = (unsigned long) ret_from_fork; 316 p->thread.ip = (unsigned long) ret_from_fork;
306 317
307 savesegment(gs, p->thread.gs); 318 task_user_gs(p) = get_user_gs(regs);
308 319
309 tsk = current; 320 tsk = current;
310 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 321 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -342,7 +353,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
342void 353void
343start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) 354start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
344{ 355{
345 __asm__("movl %0, %%gs" : : "r"(0)); 356 set_user_gs(regs, 0);
346 regs->fs = 0; 357 regs->fs = 0;
347 set_fs(USER_DS); 358 set_fs(USER_DS);
348 regs->ds = __USER_DS; 359 regs->ds = __USER_DS;
@@ -539,7 +550,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
539 * used %fs or %gs (it does not today), or if the kernel is 550 * used %fs or %gs (it does not today), or if the kernel is
540 * running inside of a hypervisor layer. 551 * running inside of a hypervisor layer.
541 */ 552 */
542 savesegment(gs, prev->gs); 553 lazy_save_gs(prev->gs);
543 554
544 /* 555 /*
545 * Load the per-thread Thread-Local Storage descriptor. 556 * Load the per-thread Thread-Local Storage descriptor.
@@ -585,31 +596,31 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
585 * Restore %gs if needed (which is common) 596 * Restore %gs if needed (which is common)
586 */ 597 */
587 if (prev->gs | next->gs) 598 if (prev->gs | next->gs)
588 loadsegment(gs, next->gs); 599 lazy_load_gs(next->gs);
589 600
590 percpu_write(current_task, next_p); 601 percpu_write(current_task, next_p);
591 602
592 return prev_p; 603 return prev_p;
593} 604}
594 605
595asmlinkage int sys_fork(struct pt_regs regs) 606int sys_fork(struct pt_regs *regs)
596{ 607{
597 return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL); 608 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
598} 609}
599 610
600asmlinkage int sys_clone(struct pt_regs regs) 611int sys_clone(struct pt_regs *regs)
601{ 612{
602 unsigned long clone_flags; 613 unsigned long clone_flags;
603 unsigned long newsp; 614 unsigned long newsp;
604 int __user *parent_tidptr, *child_tidptr; 615 int __user *parent_tidptr, *child_tidptr;
605 616
606 clone_flags = regs.bx; 617 clone_flags = regs->bx;
607 newsp = regs.cx; 618 newsp = regs->cx;
608 parent_tidptr = (int __user *)regs.dx; 619 parent_tidptr = (int __user *)regs->dx;
609 child_tidptr = (int __user *)regs.di; 620 child_tidptr = (int __user *)regs->di;
610 if (!newsp) 621 if (!newsp)
611 newsp = regs.sp; 622 newsp = regs->sp;
612 return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr); 623 return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
613} 624}
614 625
615/* 626/*
@@ -622,27 +633,27 @@ asmlinkage int sys_clone(struct pt_regs regs)
622 * do not have enough call-clobbered registers to hold all 633 * do not have enough call-clobbered registers to hold all
623 * the information you need. 634 * the information you need.
624 */ 635 */
625asmlinkage int sys_vfork(struct pt_regs regs) 636int sys_vfork(struct pt_regs *regs)
626{ 637{
627 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL); 638 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL);
628} 639}
629 640
630/* 641/*
631 * sys_execve() executes a new program. 642 * sys_execve() executes a new program.
632 */ 643 */
633asmlinkage int sys_execve(struct pt_regs regs) 644int sys_execve(struct pt_regs *regs)
634{ 645{
635 int error; 646 int error;
636 char *filename; 647 char *filename;
637 648
638 filename = getname((char __user *) regs.bx); 649 filename = getname((char __user *) regs->bx);
639 error = PTR_ERR(filename); 650 error = PTR_ERR(filename);
640 if (IS_ERR(filename)) 651 if (IS_ERR(filename))
641 goto out; 652 goto out;
642 error = do_execve(filename, 653 error = do_execve(filename,
643 (char __user * __user *) regs.cx, 654 (char __user * __user *) regs->cx,
644 (char __user * __user *) regs.dx, 655 (char __user * __user *) regs->dx,
645 &regs); 656 regs);
646 if (error == 0) { 657 if (error == 0) {
647 /* Make sure we don't return using sysenter.. */ 658 /* Make sure we don't return using sysenter.. */
648 set_thread_flag(TIF_IRET); 659 set_thread_flag(TIF_IRET);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 8eb169e45584..836ef6575f01 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -120,12 +120,11 @@ void cpu_idle(void)
120 current_thread_info()->status |= TS_POLLING; 120 current_thread_info()->status |= TS_POLLING;
121 121
122 /* 122 /*
123 * If we're the non-boot CPU, nothing set the PDA stack 123 * If we're the non-boot CPU, nothing set the stack canary up
124 * canary up for us - and if we are the boot CPU we have 124 * for us. CPU0 already has it initialized but no harm in
125 * a 0 stack canary. This is a good place for updating 125 * doing it again. This is a good place for updating it, as
126 * it, as we wont ever return from this function (so the 126 * we wont ever return from this function (so the invalid
127 * invalid canaries already on the stack wont ever 127 * canaries already on the stack wont ever trigger).
128 * trigger):
129 */ 128 */
130 boot_init_stack_canary(); 129 boot_init_stack_canary();
131 130
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a5df5f82fb9..7ec39ab37a2d 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value)
75static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) 75static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
76{ 76{
77 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); 77 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
78 regno >>= 2; 78 return &regs->bx + (regno >> 2);
79 if (regno > FS)
80 --regno;
81 return &regs->bx + regno;
82} 79}
83 80
84static u16 get_segment_reg(struct task_struct *task, unsigned long offset) 81static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
@@ -90,9 +87,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
90 if (offset != offsetof(struct user_regs_struct, gs)) 87 if (offset != offsetof(struct user_regs_struct, gs))
91 retval = *pt_regs_access(task_pt_regs(task), offset); 88 retval = *pt_regs_access(task_pt_regs(task), offset);
92 else { 89 else {
93 retval = task->thread.gs;
94 if (task == current) 90 if (task == current)
95 savesegment(gs, retval); 91 retval = get_user_gs(task_pt_regs(task));
92 else
93 retval = task_user_gs(task);
96 } 94 }
97 return retval; 95 return retval;
98} 96}
@@ -126,13 +124,10 @@ static int set_segment_reg(struct task_struct *task,
126 break; 124 break;
127 125
128 case offsetof(struct user_regs_struct, gs): 126 case offsetof(struct user_regs_struct, gs):
129 task->thread.gs = value;
130 if (task == current) 127 if (task == current)
131 /* 128 set_user_gs(task_pt_regs(task), value);
132 * The user-mode %gs is not affected by 129 else
133 * kernel entry, so we must update the CPU. 130 task_user_gs(task) = value;
134 */
135 loadsegment(gs, value);
136 } 131 }
137 132
138 return 0; 133 return 0;
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ef91747bbed5..d992e6cff730 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -16,6 +16,7 @@
16#include <asm/proto.h> 16#include <asm/proto.h>
17#include <asm/cpumask.h> 17#include <asm/cpumask.h>
18#include <asm/cpu.h> 18#include <asm/cpu.h>
19#include <asm/stackprotector.h>
19 20
20#ifdef CONFIG_DEBUG_PER_CPU_MAPS 21#ifdef CONFIG_DEBUG_PER_CPU_MAPS
21# define DBG(x...) printk(KERN_DEBUG x) 22# define DBG(x...) printk(KERN_DEBUG x)
@@ -95,6 +96,7 @@ void __init setup_per_cpu_areas(void)
95 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); 96 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
96 per_cpu(cpu_number, cpu) = cpu; 97 per_cpu(cpu_number, cpu) = cpu;
97 setup_percpu_segment(cpu); 98 setup_percpu_segment(cpu);
99 setup_stack_canary_segment(cpu);
98 /* 100 /*
99 * Copy data used in early init routines from the 101 * Copy data used in early init routines from the
100 * initial arrays to the per cpu data areas. These 102 * initial arrays to the per cpu data areas. These
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 7fc78b019815..7cdcd16885ed 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -50,27 +50,23 @@
50# define FIX_EFLAGS __FIX_EFLAGS 50# define FIX_EFLAGS __FIX_EFLAGS
51#endif 51#endif
52 52
53#define COPY(x) { \ 53#define COPY(x) do { \
54 get_user_ex(regs->x, &sc->x); \ 54 get_user_ex(regs->x, &sc->x); \
55} 55} while (0)
56 56
57#define COPY_SEG(seg) { \ 57#define GET_SEG(seg) ({ \
58 unsigned short tmp; \ 58 unsigned short tmp; \
59 get_user_ex(tmp, &sc->seg); \ 59 get_user_ex(tmp, &sc->seg); \
60 regs->seg = tmp; \ 60 tmp; \
61} 61})
62 62
63#define COPY_SEG_CPL3(seg) { \ 63#define COPY_SEG(seg) do { \
64 unsigned short tmp; \ 64 regs->seg = GET_SEG(seg); \
65 get_user_ex(tmp, &sc->seg); \ 65} while (0)
66 regs->seg = tmp | 3; \
67}
68 66
69#define GET_SEG(seg) { \ 67#define COPY_SEG_CPL3(seg) do { \
70 unsigned short tmp; \ 68 regs->seg = GET_SEG(seg) | 3; \
71 get_user_ex(tmp, &sc->seg); \ 69} while (0)
72 loadsegment(seg, tmp); \
73}
74 70
75static int 71static int
76restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, 72restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
@@ -86,7 +82,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
86 get_user_try { 82 get_user_try {
87 83
88#ifdef CONFIG_X86_32 84#ifdef CONFIG_X86_32
89 GET_SEG(gs); 85 set_user_gs(regs, GET_SEG(gs));
90 COPY_SEG(fs); 86 COPY_SEG(fs);
91 COPY_SEG(es); 87 COPY_SEG(es);
92 COPY_SEG(ds); 88 COPY_SEG(ds);
@@ -138,12 +134,7 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
138 put_user_try { 134 put_user_try {
139 135
140#ifdef CONFIG_X86_32 136#ifdef CONFIG_X86_32
141 { 137 put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs);
142 unsigned int tmp;
143
144 savesegment(gs, tmp);
145 put_user_ex(tmp, (unsigned int __user *)&sc->gs);
146 }
147 put_user_ex(regs->fs, (unsigned int __user *)&sc->fs); 138 put_user_ex(regs->fs, (unsigned int __user *)&sc->fs);
148 put_user_ex(regs->es, (unsigned int __user *)&sc->es); 139 put_user_ex(regs->es, (unsigned int __user *)&sc->es);
149 put_user_ex(regs->ds, (unsigned int __user *)&sc->ds); 140 put_user_ex(regs->ds, (unsigned int __user *)&sc->ds);
@@ -558,14 +549,9 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
558#endif /* CONFIG_X86_32 */ 549#endif /* CONFIG_X86_32 */
559 550
560#ifdef CONFIG_X86_32 551#ifdef CONFIG_X86_32
561asmlinkage int sys_sigaltstack(unsigned long bx) 552int sys_sigaltstack(struct pt_regs *regs)
562{ 553{
563 /* 554 const stack_t __user *uss = (const stack_t __user *)regs->bx;
564 * This is needed to make gcc realize it doesn't own the
565 * "struct pt_regs"
566 */
567 struct pt_regs *regs = (struct pt_regs *)&bx;
568 const stack_t __user *uss = (const stack_t __user *)bx;
569 stack_t __user *uoss = (stack_t __user *)regs->cx; 555 stack_t __user *uoss = (stack_t __user *)regs->cx;
570 556
571 return do_sigaltstack(uss, uoss, regs->sp); 557 return do_sigaltstack(uss, uoss, regs->sp);
@@ -583,14 +569,12 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
583 * Do a signal return; undo the signal stack. 569 * Do a signal return; undo the signal stack.
584 */ 570 */
585#ifdef CONFIG_X86_32 571#ifdef CONFIG_X86_32
586asmlinkage unsigned long sys_sigreturn(unsigned long __unused) 572unsigned long sys_sigreturn(struct pt_regs *regs)
587{ 573{
588 struct sigframe __user *frame; 574 struct sigframe __user *frame;
589 struct pt_regs *regs;
590 unsigned long ax; 575 unsigned long ax;
591 sigset_t set; 576 sigset_t set;
592 577
593 regs = (struct pt_regs *) &__unused;
594 frame = (struct sigframe __user *)(regs->sp - 8); 578 frame = (struct sigframe __user *)(regs->sp - 8);
595 579
596 if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) 580 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@@ -617,7 +601,7 @@ badframe:
617} 601}
618#endif /* CONFIG_X86_32 */ 602#endif /* CONFIG_X86_32 */
619 603
620static long do_rt_sigreturn(struct pt_regs *regs) 604long sys_rt_sigreturn(struct pt_regs *regs)
621{ 605{
622 struct rt_sigframe __user *frame; 606 struct rt_sigframe __user *frame;
623 unsigned long ax; 607 unsigned long ax;
@@ -648,25 +632,6 @@ badframe:
648 return 0; 632 return 0;
649} 633}
650 634
651#ifdef CONFIG_X86_32
652/*
653 * Note: do not pass in pt_regs directly as with tail-call optimization
654 * GCC will incorrectly stomp on the caller's frame and corrupt user-space
655 * register state:
656 */
657asmlinkage int sys_rt_sigreturn(unsigned long __unused)
658{
659 struct pt_regs *regs = (struct pt_regs *)&__unused;
660
661 return do_rt_sigreturn(regs);
662}
663#else /* !CONFIG_X86_32 */
664asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
665{
666 return do_rt_sigreturn(regs);
667}
668#endif /* CONFIG_X86_32 */
669
670/* 635/*
671 * OK, we're invoking a handler: 636 * OK, we're invoking a handler:
672 */ 637 */
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index e2e86a08f31d..3bdb64829b82 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -1,7 +1,7 @@
1ENTRY(sys_call_table) 1ENTRY(sys_call_table)
2 .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ 2 .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
3 .long sys_exit 3 .long sys_exit
4 .long sys_fork 4 .long ptregs_fork
5 .long sys_read 5 .long sys_read
6 .long sys_write 6 .long sys_write
7 .long sys_open /* 5 */ 7 .long sys_open /* 5 */
@@ -10,7 +10,7 @@ ENTRY(sys_call_table)
10 .long sys_creat 10 .long sys_creat
11 .long sys_link 11 .long sys_link
12 .long sys_unlink /* 10 */ 12 .long sys_unlink /* 10 */
13 .long sys_execve 13 .long ptregs_execve
14 .long sys_chdir 14 .long sys_chdir
15 .long sys_time 15 .long sys_time
16 .long sys_mknod 16 .long sys_mknod
@@ -109,17 +109,17 @@ ENTRY(sys_call_table)
109 .long sys_newlstat 109 .long sys_newlstat
110 .long sys_newfstat 110 .long sys_newfstat
111 .long sys_uname 111 .long sys_uname
112 .long sys_iopl /* 110 */ 112 .long ptregs_iopl /* 110 */
113 .long sys_vhangup 113 .long sys_vhangup
114 .long sys_ni_syscall /* old "idle" system call */ 114 .long sys_ni_syscall /* old "idle" system call */
115 .long sys_vm86old 115 .long ptregs_vm86old
116 .long sys_wait4 116 .long sys_wait4
117 .long sys_swapoff /* 115 */ 117 .long sys_swapoff /* 115 */
118 .long sys_sysinfo 118 .long sys_sysinfo
119 .long sys_ipc 119 .long sys_ipc
120 .long sys_fsync 120 .long sys_fsync
121 .long sys_sigreturn 121 .long ptregs_sigreturn
122 .long sys_clone /* 120 */ 122 .long ptregs_clone /* 120 */
123 .long sys_setdomainname 123 .long sys_setdomainname
124 .long sys_newuname 124 .long sys_newuname
125 .long sys_modify_ldt 125 .long sys_modify_ldt
@@ -165,14 +165,14 @@ ENTRY(sys_call_table)
165 .long sys_mremap 165 .long sys_mremap
166 .long sys_setresuid16 166 .long sys_setresuid16
167 .long sys_getresuid16 /* 165 */ 167 .long sys_getresuid16 /* 165 */
168 .long sys_vm86 168 .long ptregs_vm86
169 .long sys_ni_syscall /* Old sys_query_module */ 169 .long sys_ni_syscall /* Old sys_query_module */
170 .long sys_poll 170 .long sys_poll
171 .long sys_nfsservctl 171 .long sys_nfsservctl
172 .long sys_setresgid16 /* 170 */ 172 .long sys_setresgid16 /* 170 */
173 .long sys_getresgid16 173 .long sys_getresgid16
174 .long sys_prctl 174 .long sys_prctl
175 .long sys_rt_sigreturn 175 .long ptregs_rt_sigreturn
176 .long sys_rt_sigaction 176 .long sys_rt_sigaction
177 .long sys_rt_sigprocmask /* 175 */ 177 .long sys_rt_sigprocmask /* 175 */
178 .long sys_rt_sigpending 178 .long sys_rt_sigpending
@@ -185,11 +185,11 @@ ENTRY(sys_call_table)
185 .long sys_getcwd 185 .long sys_getcwd
186 .long sys_capget 186 .long sys_capget
187 .long sys_capset /* 185 */ 187 .long sys_capset /* 185 */
188 .long sys_sigaltstack 188 .long ptregs_sigaltstack
189 .long sys_sendfile 189 .long sys_sendfile
190 .long sys_ni_syscall /* reserved for streams1 */ 190 .long sys_ni_syscall /* reserved for streams1 */
191 .long sys_ni_syscall /* reserved for streams2 */ 191 .long sys_ni_syscall /* reserved for streams2 */
192 .long sys_vfork /* 190 */ 192 .long ptregs_vfork /* 190 */
193 .long sys_getrlimit 193 .long sys_getrlimit
194 .long sys_mmap2 194 .long sys_mmap2
195 .long sys_truncate64 195 .long sys_truncate64
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 0d032d2d8a18..bde57f0f1616 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -905,19 +905,20 @@ void math_emulate(struct math_emu_info *info)
905} 905}
906#endif /* CONFIG_MATH_EMULATION */ 906#endif /* CONFIG_MATH_EMULATION */
907 907
908dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) 908dotraplinkage void __kprobes
909do_device_not_available(struct pt_regs *regs, long error_code)
909{ 910{
910#ifdef CONFIG_X86_32 911#ifdef CONFIG_X86_32
911 if (read_cr0() & X86_CR0_EM) { 912 if (read_cr0() & X86_CR0_EM) {
912 struct math_emu_info info = { }; 913 struct math_emu_info info = { };
913 914
914 conditional_sti(&regs); 915 conditional_sti(regs);
915 916
916 info.regs = &regs; 917 info.regs = regs;
917 math_emulate(&info); 918 math_emulate(&info);
918 } else { 919 } else {
919 math_state_restore(); /* interrupts still off */ 920 math_state_restore(); /* interrupts still off */
920 conditional_sti(&regs); 921 conditional_sti(regs);
921 } 922 }
922#else 923#else
923 math_state_restore(); 924 math_state_restore();
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 4eeb5cf9720d..d7ac84e7fc1c 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -158,7 +158,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
158 ret = KVM86->regs32; 158 ret = KVM86->regs32;
159 159
160 ret->fs = current->thread.saved_fs; 160 ret->fs = current->thread.saved_fs;
161 loadsegment(gs, current->thread.saved_gs); 161 set_user_gs(ret, current->thread.saved_gs);
162 162
163 return ret; 163 return ret;
164} 164}
@@ -197,9 +197,9 @@ out:
197static int do_vm86_irq_handling(int subfunction, int irqnumber); 197static int do_vm86_irq_handling(int subfunction, int irqnumber);
198static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); 198static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
199 199
200asmlinkage int sys_vm86old(struct pt_regs regs) 200int sys_vm86old(struct pt_regs *regs)
201{ 201{
202 struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx; 202 struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx;
203 struct kernel_vm86_struct info; /* declare this _on top_, 203 struct kernel_vm86_struct info; /* declare this _on top_,
204 * this avoids wasting of stack space. 204 * this avoids wasting of stack space.
205 * This remains on the stack until we 205 * This remains on the stack until we
@@ -218,7 +218,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs)
218 if (tmp) 218 if (tmp)
219 goto out; 219 goto out;
220 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); 220 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
221 info.regs32 = &regs; 221 info.regs32 = regs;
222 tsk->thread.vm86_info = v86; 222 tsk->thread.vm86_info = v86;
223 do_sys_vm86(&info, tsk); 223 do_sys_vm86(&info, tsk);
224 ret = 0; /* we never return here */ 224 ret = 0; /* we never return here */
@@ -227,7 +227,7 @@ out:
227} 227}
228 228
229 229
230asmlinkage int sys_vm86(struct pt_regs regs) 230int sys_vm86(struct pt_regs *regs)
231{ 231{
232 struct kernel_vm86_struct info; /* declare this _on top_, 232 struct kernel_vm86_struct info; /* declare this _on top_,
233 * this avoids wasting of stack space. 233 * this avoids wasting of stack space.
@@ -239,12 +239,12 @@ asmlinkage int sys_vm86(struct pt_regs regs)
239 struct vm86plus_struct __user *v86; 239 struct vm86plus_struct __user *v86;
240 240
241 tsk = current; 241 tsk = current;
242 switch (regs.bx) { 242 switch (regs->bx) {
243 case VM86_REQUEST_IRQ: 243 case VM86_REQUEST_IRQ:
244 case VM86_FREE_IRQ: 244 case VM86_FREE_IRQ:
245 case VM86_GET_IRQ_BITS: 245 case VM86_GET_IRQ_BITS:
246 case VM86_GET_AND_RESET_IRQ: 246 case VM86_GET_AND_RESET_IRQ:
247 ret = do_vm86_irq_handling(regs.bx, (int)regs.cx); 247 ret = do_vm86_irq_handling(regs->bx, (int)regs->cx);
248 goto out; 248 goto out;
249 case VM86_PLUS_INSTALL_CHECK: 249 case VM86_PLUS_INSTALL_CHECK:
250 /* 250 /*
@@ -261,14 +261,14 @@ asmlinkage int sys_vm86(struct pt_regs regs)
261 ret = -EPERM; 261 ret = -EPERM;
262 if (tsk->thread.saved_sp0) 262 if (tsk->thread.saved_sp0)
263 goto out; 263 goto out;
264 v86 = (struct vm86plus_struct __user *)regs.cx; 264 v86 = (struct vm86plus_struct __user *)regs->cx;
265 tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, 265 tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
266 offsetof(struct kernel_vm86_struct, regs32) - 266 offsetof(struct kernel_vm86_struct, regs32) -
267 sizeof(info.regs)); 267 sizeof(info.regs));
268 ret = -EFAULT; 268 ret = -EFAULT;
269 if (tmp) 269 if (tmp)
270 goto out; 270 goto out;
271 info.regs32 = &regs; 271 info.regs32 = regs;
272 info.vm86plus.is_vm86pus = 1; 272 info.vm86plus.is_vm86pus = 1;
273 tsk->thread.vm86_info = (struct vm86_struct __user *)v86; 273 tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
274 do_sys_vm86(&info, tsk); 274 do_sys_vm86(&info, tsk);
@@ -323,7 +323,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
323 info->regs32->ax = 0; 323 info->regs32->ax = 0;
324 tsk->thread.saved_sp0 = tsk->thread.sp0; 324 tsk->thread.saved_sp0 = tsk->thread.sp0;
325 tsk->thread.saved_fs = info->regs32->fs; 325 tsk->thread.saved_fs = info->regs32->fs;
326 savesegment(gs, tsk->thread.saved_gs); 326 tsk->thread.saved_gs = get_user_gs(info->regs32);
327 327
328 tss = &per_cpu(init_tss, get_cpu()); 328 tss = &per_cpu(init_tss, get_cpu());
329 tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; 329 tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 07f62d287ff0..087a7f2c639b 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -257,6 +257,14 @@ SECTIONS
257 DWARF_DEBUG 257 DWARF_DEBUG
258} 258}
259 259
260 /*
261 * Per-cpu symbols which need to be offset from __per_cpu_load
262 * for the boot processor.
263 */
264#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
265INIT_PER_CPU(gdt_page);
266INIT_PER_CPU(irq_stack_union);
267
260/* 268/*
261 * Build-time check on the image size: 269 * Build-time check on the image size:
262 */ 270 */
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 19e33b6cd593..da2e314f61b5 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -283,7 +283,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
283 /* There's one problem which normal hardware doesn't have: the Host 283 /* There's one problem which normal hardware doesn't have: the Host
284 * can't handle us removing entries we're currently using. So we clear 284 * can't handle us removing entries we're currently using. So we clear
285 * the GS register here: if it's needed it'll be reloaded anyway. */ 285 * the GS register here: if it's needed it'll be reloaded anyway. */
286 loadsegment(gs, 0); 286 lazy_load_gs(0);
287 lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); 287 lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
288} 288}
289 289
diff --git a/arch/x86/math-emu/get_address.c b/arch/x86/math-emu/get_address.c
index 420b3b6e3915..6ef5e99380f9 100644
--- a/arch/x86/math-emu/get_address.c
+++ b/arch/x86/math-emu/get_address.c
@@ -150,11 +150,9 @@ static long pm_address(u_char FPU_modrm, u_char segment,
150#endif /* PARANOID */ 150#endif /* PARANOID */
151 151
152 switch (segment) { 152 switch (segment) {
153 /* gs isn't used by the kernel, so it still has its
154 user-space value. */
155 case PREFIX_GS_ - 1: 153 case PREFIX_GS_ - 1:
156 /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */ 154 /* user gs handling can be lazy, use special accessors */
157 savesegment(gs, addr->selector); 155 addr->selector = get_user_gs(FPU_info->regs);
158 break; 156 break;
159 default: 157 default:
160 addr->selector = PM_REG_(segment); 158 addr->selector = PM_REG_(segment);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 08d140fbc31b..deb1c1ab7868 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -702,7 +702,7 @@ void __cpuinit numa_set_node(int cpu, int node)
702 } 702 }
703 703
704#ifdef CONFIG_DEBUG_PER_CPU_MAPS 704#ifdef CONFIG_DEBUG_PER_CPU_MAPS
705 if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) { 705 if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
706 printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); 706 printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
707 dump_stack(); 707 dump_stack();
708 return; 708 return;
@@ -790,7 +790,7 @@ int early_cpu_to_node(int cpu)
790 if (early_per_cpu_ptr(x86_cpu_to_node_map)) 790 if (early_per_cpu_ptr(x86_cpu_to_node_map))
791 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; 791 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
792 792
793 if (!per_cpu_offset(cpu)) { 793 if (!cpu_possible(cpu)) {
794 printk(KERN_WARNING 794 printk(KERN_WARNING
795 "early_cpu_to_node(%d): no per_cpu area!\n", cpu); 795 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
796 dump_stack(); 796 dump_stack();
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 4d6ef0a336d6..16a9020c8f11 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -38,7 +38,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
38 $(call if_changed,objcopy) 38 $(call if_changed,objcopy)
39 39
40CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ 40CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
41 $(filter -g%,$(KBUILD_CFLAGS)) 41 $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector)
42 42
43$(vobjs): KBUILD_CFLAGS += $(CFL) 43$(vobjs): KBUILD_CFLAGS += $(CFL)
44 44
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 37230342c2c4..95ff6a0e942a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -323,13 +323,14 @@ static void load_TLS_descriptor(struct thread_struct *t,
323static void xen_load_tls(struct thread_struct *t, unsigned int cpu) 323static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
324{ 324{
325 /* 325 /*
326 * XXX sleazy hack: If we're being called in a lazy-cpu zone, 326 * XXX sleazy hack: If we're being called in a lazy-cpu zone
327 * it means we're in a context switch, and %gs has just been 327 * and lazy gs handling is enabled, it means we're in a
328 * saved. This means we can zero it out to prevent faults on 328 * context switch, and %gs has just been saved. This means we
329 * exit from the hypervisor if the next process has no %gs. 329 * can zero it out to prevent faults on exit from the
330 * Either way, it has been saved, and the new value will get 330 * hypervisor if the next process has no %gs. Either way, it
331 * loaded properly. This will go away as soon as Xen has been 331 * has been saved, and the new value will get loaded properly.
332 * modified to not save/restore %gs for normal hypercalls. 332 * This will go away as soon as Xen has been modified to not
333 * save/restore %gs for normal hypercalls.
333 * 334 *
334 * On x86_64, this hack is not used for %gs, because gs points 335 * On x86_64, this hack is not used for %gs, because gs points
335 * to KERNEL_GS_BASE (and uses it for PDA references), so we 336 * to KERNEL_GS_BASE (and uses it for PDA references), so we
@@ -341,7 +342,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
341 */ 342 */
342 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { 343 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
343#ifdef CONFIG_X86_32 344#ifdef CONFIG_X86_32
344 loadsegment(gs, 0); 345 lazy_load_gs(0);
345#else 346#else
346 loadsegment(fs, 0); 347 loadsegment(fs, 0);
347#endif 348#endif
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 4c6f96799131..79d7362ad6d1 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -1,14 +1,14 @@
1/* 1/*
2 Asm versions of Xen pv-ops, suitable for either direct use or inlining. 2 * Asm versions of Xen pv-ops, suitable for either direct use or
3 The inline versions are the same as the direct-use versions, with the 3 * inlining. The inline versions are the same as the direct-use
4 pre- and post-amble chopped off. 4 * versions, with the pre- and post-amble chopped off.
5 5 *
6 This code is encoded for size rather than absolute efficiency, 6 * This code is encoded for size rather than absolute efficiency, with
7 with a view to being able to inline as much as possible. 7 * a view to being able to inline as much as possible.
8 8 *
9 We only bother with direct forms (ie, vcpu in percpu data) of 9 * We only bother with direct forms (ie, vcpu in percpu data) of the
10 the operations here; the indirect forms are better handled in 10 * operations here; the indirect forms are better handled in C, since
11 C, since they're generally too large to inline anyway. 11 * they're generally too large to inline anyway.
12 */ 12 */
13 13
14#include <asm/asm-offsets.h> 14#include <asm/asm-offsets.h>
@@ -18,17 +18,19 @@
18#include "xen-asm.h" 18#include "xen-asm.h"
19 19
20/* 20/*
21 Enable events. This clears the event mask and tests the pending 21 * Enable events. This clears the event mask and tests the pending
22 event status with one and operation. If there are pending 22 * event status with one and operation. If there are pending events,
23 events, then enter the hypervisor to get them handled. 23 * then enter the hypervisor to get them handled.
24 */ 24 */
25ENTRY(xen_irq_enable_direct) 25ENTRY(xen_irq_enable_direct)
26 /* Unmask events */ 26 /* Unmask events */
27 movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask 27 movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
28 28
29 /* Preempt here doesn't matter because that will deal with 29 /*
30 any pending interrupts. The pending check may end up being 30 * Preempt here doesn't matter because that will deal with any
31 run on the wrong CPU, but that doesn't hurt. */ 31 * pending interrupts. The pending check may end up being run
32 * on the wrong CPU, but that doesn't hurt.
33 */
32 34
33 /* Test for pending */ 35 /* Test for pending */
34 testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending 36 testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
@@ -43,8 +45,8 @@ ENDPATCH(xen_irq_enable_direct)
43 45
44 46
45/* 47/*
46 Disabling events is simply a matter of making the event mask 48 * Disabling events is simply a matter of making the event mask
47 non-zero. 49 * non-zero.
48 */ 50 */
49ENTRY(xen_irq_disable_direct) 51ENTRY(xen_irq_disable_direct)
50 movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask 52 movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
@@ -54,18 +56,18 @@ ENDPATCH(xen_irq_disable_direct)
54 RELOC(xen_irq_disable_direct, 0) 56 RELOC(xen_irq_disable_direct, 0)
55 57
56/* 58/*
57 (xen_)save_fl is used to get the current interrupt enable status. 59 * (xen_)save_fl is used to get the current interrupt enable status.
58 Callers expect the status to be in X86_EFLAGS_IF, and other bits 60 * Callers expect the status to be in X86_EFLAGS_IF, and other bits
59 may be set in the return value. We take advantage of this by 61 * may be set in the return value. We take advantage of this by
60 making sure that X86_EFLAGS_IF has the right value (and other bits 62 * making sure that X86_EFLAGS_IF has the right value (and other bits
61 in that byte are 0), but other bits in the return value are 63 * in that byte are 0), but other bits in the return value are
62 undefined. We need to toggle the state of the bit, because 64 * undefined. We need to toggle the state of the bit, because Xen and
63 Xen and x86 use opposite senses (mask vs enable). 65 * x86 use opposite senses (mask vs enable).
64 */ 66 */
65ENTRY(xen_save_fl_direct) 67ENTRY(xen_save_fl_direct)
66 testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask 68 testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
67 setz %ah 69 setz %ah
68 addb %ah,%ah 70 addb %ah, %ah
69ENDPATCH(xen_save_fl_direct) 71ENDPATCH(xen_save_fl_direct)
70 ret 72 ret
71 ENDPROC(xen_save_fl_direct) 73 ENDPROC(xen_save_fl_direct)
@@ -73,12 +75,11 @@ ENDPATCH(xen_save_fl_direct)
73 75
74 76
75/* 77/*
76 In principle the caller should be passing us a value return 78 * In principle the caller should be passing us a value return from
77 from xen_save_fl_direct, but for robustness sake we test only 79 * xen_save_fl_direct, but for robustness sake we test only the
78 the X86_EFLAGS_IF flag rather than the whole byte. After 80 * X86_EFLAGS_IF flag rather than the whole byte. After setting the
79 setting the interrupt mask state, it checks for unmasked 81 * interrupt mask state, it checks for unmasked pending events and
80 pending events and enters the hypervisor to get them delivered 82 * enters the hypervisor to get them delivered if so.
81 if so.
82 */ 83 */
83ENTRY(xen_restore_fl_direct) 84ENTRY(xen_restore_fl_direct)
84#ifdef CONFIG_X86_64 85#ifdef CONFIG_X86_64
@@ -87,9 +88,11 @@ ENTRY(xen_restore_fl_direct)
87 testb $X86_EFLAGS_IF>>8, %ah 88 testb $X86_EFLAGS_IF>>8, %ah
88#endif 89#endif
89 setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask 90 setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
90 /* Preempt here doesn't matter because that will deal with 91 /*
91 any pending interrupts. The pending check may end up being 92 * Preempt here doesn't matter because that will deal with any
92 run on the wrong CPU, but that doesn't hurt. */ 93 * pending interrupts. The pending check may end up being run
94 * on the wrong CPU, but that doesn't hurt.
95 */
93 96
94 /* check for unmasked and pending */ 97 /* check for unmasked and pending */
95 cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending 98 cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
@@ -103,8 +106,8 @@ ENDPATCH(xen_restore_fl_direct)
103 106
104 107
105/* 108/*
106 Force an event check by making a hypercall, 109 * Force an event check by making a hypercall, but preserve regs
107 but preserve regs before making the call. 110 * before making the call.
108 */ 111 */
109check_events: 112check_events:
110#ifdef CONFIG_X86_32 113#ifdef CONFIG_X86_32
@@ -137,4 +140,3 @@ check_events:
137 pop %rax 140 pop %rax
138#endif 141#endif
139 ret 142 ret
140
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 082d173caaf3..88e15deb8b82 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -1,17 +1,16 @@
1/* 1/*
2 Asm versions of Xen pv-ops, suitable for either direct use or inlining. 2 * Asm versions of Xen pv-ops, suitable for either direct use or
3 The inline versions are the same as the direct-use versions, with the 3 * inlining. The inline versions are the same as the direct-use
4 pre- and post-amble chopped off. 4 * versions, with the pre- and post-amble chopped off.
5 5 *
6 This code is encoded for size rather than absolute efficiency, 6 * This code is encoded for size rather than absolute efficiency, with
7 with a view to being able to inline as much as possible. 7 * a view to being able to inline as much as possible.
8 8 *
9 We only bother with direct forms (ie, vcpu in pda) of the operations 9 * We only bother with direct forms (ie, vcpu in pda) of the
10 here; the indirect forms are better handled in C, since they're 10 * operations here; the indirect forms are better handled in C, since
11 generally too large to inline anyway. 11 * they're generally too large to inline anyway.
12 */ 12 */
13 13
14//#include <asm/asm-offsets.h>
15#include <asm/thread_info.h> 14#include <asm/thread_info.h>
16#include <asm/processor-flags.h> 15#include <asm/processor-flags.h>
17#include <asm/segment.h> 16#include <asm/segment.h>
@@ -21,8 +20,8 @@
21#include "xen-asm.h" 20#include "xen-asm.h"
22 21
23/* 22/*
24 Force an event check by making a hypercall, 23 * Force an event check by making a hypercall, but preserve regs
25 but preserve regs before making the call. 24 * before making the call.
26 */ 25 */
27check_events: 26check_events:
28 push %eax 27 push %eax
@@ -35,10 +34,10 @@ check_events:
35 ret 34 ret
36 35
37/* 36/*
38 We can't use sysexit directly, because we're not running in ring0. 37 * We can't use sysexit directly, because we're not running in ring0.
39 But we can easily fake it up using iret. Assuming xen_sysexit 38 * But we can easily fake it up using iret. Assuming xen_sysexit is
40 is jumped to with a standard stack frame, we can just strip it 39 * jumped to with a standard stack frame, we can just strip it back to
41 back to a standard iret frame and use iret. 40 * a standard iret frame and use iret.
42 */ 41 */
43ENTRY(xen_sysexit) 42ENTRY(xen_sysexit)
44 movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */ 43 movl PT_EAX(%esp), %eax /* Shouldn't be necessary? */
@@ -49,33 +48,31 @@ ENTRY(xen_sysexit)
49ENDPROC(xen_sysexit) 48ENDPROC(xen_sysexit)
50 49
51/* 50/*
52 This is run where a normal iret would be run, with the same stack setup: 51 * This is run where a normal iret would be run, with the same stack setup:
53 8: eflags 52 * 8: eflags
54 4: cs 53 * 4: cs
55 esp-> 0: eip 54 * esp-> 0: eip
56 55 *
57 This attempts to make sure that any pending events are dealt 56 * This attempts to make sure that any pending events are dealt with
58 with on return to usermode, but there is a small window in 57 * on return to usermode, but there is a small window in which an
59 which an event can happen just before entering usermode. If 58 * event can happen just before entering usermode. If the nested
60 the nested interrupt ends up setting one of the TIF_WORK_MASK 59 * interrupt ends up setting one of the TIF_WORK_MASK pending work
61 pending work flags, they will not be tested again before 60 * flags, they will not be tested again before returning to
62 returning to usermode. This means that a process can end up 61 * usermode. This means that a process can end up with pending work,
63 with pending work, which will be unprocessed until the process 62 * which will be unprocessed until the process enters and leaves the
64 enters and leaves the kernel again, which could be an 63 * kernel again, which could be an unbounded amount of time. This
65 unbounded amount of time. This means that a pending signal or 64 * means that a pending signal or reschedule event could be
66 reschedule event could be indefinitely delayed. 65 * indefinitely delayed.
67 66 *
68 The fix is to notice a nested interrupt in the critical 67 * The fix is to notice a nested interrupt in the critical window, and
69 window, and if one occurs, then fold the nested interrupt into 68 * if one occurs, then fold the nested interrupt into the current
70 the current interrupt stack frame, and re-process it 69 * interrupt stack frame, and re-process it iteratively rather than
71 iteratively rather than recursively. This means that it will 70 * recursively. This means that it will exit via the normal path, and
72 exit via the normal path, and all pending work will be dealt 71 * all pending work will be dealt with appropriately.
73 with appropriately. 72 *
74 73 * Because the nested interrupt handler needs to deal with the current
75 Because the nested interrupt handler needs to deal with the 74 * stack state in whatever form its in, we keep things simple by only
76 current stack state in whatever form its in, we keep things 75 * using a single register which is pushed/popped on the stack.
77 simple by only using a single register which is pushed/popped
78 on the stack.
79 */ 76 */
80ENTRY(xen_iret) 77ENTRY(xen_iret)
81 /* test eflags for special cases */ 78 /* test eflags for special cases */
@@ -85,13 +82,15 @@ ENTRY(xen_iret)
85 push %eax 82 push %eax
86 ESP_OFFSET=4 # bytes pushed onto stack 83 ESP_OFFSET=4 # bytes pushed onto stack
87 84
88 /* Store vcpu_info pointer for easy access. Do it this 85 /*
89 way to avoid having to reload %fs */ 86 * Store vcpu_info pointer for easy access. Do it this way to
87 * avoid having to reload %fs
88 */
90#ifdef CONFIG_SMP 89#ifdef CONFIG_SMP
91 GET_THREAD_INFO(%eax) 90 GET_THREAD_INFO(%eax)
92 movl TI_cpu(%eax),%eax 91 movl TI_cpu(%eax), %eax
93 movl __per_cpu_offset(,%eax,4),%eax 92 movl __per_cpu_offset(,%eax,4), %eax
94 mov per_cpu__xen_vcpu(%eax),%eax 93 mov per_cpu__xen_vcpu(%eax), %eax
95#else 94#else
96 movl per_cpu__xen_vcpu, %eax 95 movl per_cpu__xen_vcpu, %eax
97#endif 96#endif
@@ -99,37 +98,46 @@ ENTRY(xen_iret)
99 /* check IF state we're restoring */ 98 /* check IF state we're restoring */
100 testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) 99 testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
101 100
102 /* Maybe enable events. Once this happens we could get a 101 /*
103 recursive event, so the critical region starts immediately 102 * Maybe enable events. Once this happens we could get a
104 afterwards. However, if that happens we don't end up 103 * recursive event, so the critical region starts immediately
105 resuming the code, so we don't have to be worried about 104 * afterwards. However, if that happens we don't end up
106 being preempted to another CPU. */ 105 * resuming the code, so we don't have to be worried about
106 * being preempted to another CPU.
107 */
107 setz XEN_vcpu_info_mask(%eax) 108 setz XEN_vcpu_info_mask(%eax)
108xen_iret_start_crit: 109xen_iret_start_crit:
109 110
110 /* check for unmasked and pending */ 111 /* check for unmasked and pending */
111 cmpw $0x0001, XEN_vcpu_info_pending(%eax) 112 cmpw $0x0001, XEN_vcpu_info_pending(%eax)
112 113
113 /* If there's something pending, mask events again so we 114 /*
114 can jump back into xen_hypervisor_callback */ 115 * If there's something pending, mask events again so we can
116 * jump back into xen_hypervisor_callback
117 */
115 sete XEN_vcpu_info_mask(%eax) 118 sete XEN_vcpu_info_mask(%eax)
116 119
117 popl %eax 120 popl %eax
118 121
119 /* From this point on the registers are restored and the stack 122 /*
120 updated, so we don't need to worry about it if we're preempted */ 123 * From this point on the registers are restored and the stack
124 * updated, so we don't need to worry about it if we're
125 * preempted
126 */
121iret_restore_end: 127iret_restore_end:
122 128
123 /* Jump to hypervisor_callback after fixing up the stack. 129 /*
124 Events are masked, so jumping out of the critical 130 * Jump to hypervisor_callback after fixing up the stack.
125 region is OK. */ 131 * Events are masked, so jumping out of the critical region is
132 * OK.
133 */
126 je xen_hypervisor_callback 134 je xen_hypervisor_callback
127 135
1281: iret 1361: iret
129xen_iret_end_crit: 137xen_iret_end_crit:
130.section __ex_table,"a" 138.section __ex_table, "a"
131 .align 4 139 .align 4
132 .long 1b,iret_exc 140 .long 1b, iret_exc
133.previous 141.previous
134 142
135hyper_iret: 143hyper_iret:
@@ -139,55 +147,55 @@ hyper_iret:
139 .globl xen_iret_start_crit, xen_iret_end_crit 147 .globl xen_iret_start_crit, xen_iret_end_crit
140 148
141/* 149/*
142 This is called by xen_hypervisor_callback in entry.S when it sees 150 * This is called by xen_hypervisor_callback in entry.S when it sees
143 that the EIP at the time of interrupt was between xen_iret_start_crit 151 * that the EIP at the time of interrupt was between
144 and xen_iret_end_crit. We're passed the EIP in %eax so we can do 152 * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in
145 a more refined determination of what to do. 153 * %eax so we can do a more refined determination of what to do.
146 154 *
147 The stack format at this point is: 155 * The stack format at this point is:
148 ---------------- 156 * ----------------
149 ss : (ss/esp may be present if we came from usermode) 157 * ss : (ss/esp may be present if we came from usermode)
150 esp : 158 * esp :
151 eflags } outer exception info 159 * eflags } outer exception info
152 cs } 160 * cs }
153 eip } 161 * eip }
154 ---------------- <- edi (copy dest) 162 * ---------------- <- edi (copy dest)
155 eax : outer eax if it hasn't been restored 163 * eax : outer eax if it hasn't been restored
156 ---------------- 164 * ----------------
157 eflags } nested exception info 165 * eflags } nested exception info
158 cs } (no ss/esp because we're nested 166 * cs } (no ss/esp because we're nested
159 eip } from the same ring) 167 * eip } from the same ring)
160 orig_eax }<- esi (copy src) 168 * orig_eax }<- esi (copy src)
161 - - - - - - - - 169 * - - - - - - - -
162 fs } 170 * fs }
163 es } 171 * es }
164 ds } SAVE_ALL state 172 * ds } SAVE_ALL state
165 eax } 173 * eax }
166 : : 174 * : :
167 ebx }<- esp 175 * ebx }<- esp
168 ---------------- 176 * ----------------
169 177 *
170 In order to deliver the nested exception properly, we need to shift 178 * In order to deliver the nested exception properly, we need to shift
171 everything from the return addr up to the error code so it 179 * everything from the return addr up to the error code so it sits
172 sits just under the outer exception info. This means that when we 180 * just under the outer exception info. This means that when we
173 handle the exception, we do it in the context of the outer exception 181 * handle the exception, we do it in the context of the outer
174 rather than starting a new one. 182 * exception rather than starting a new one.
175 183 *
176 The only caveat is that if the outer eax hasn't been 184 * The only caveat is that if the outer eax hasn't been restored yet
177 restored yet (ie, it's still on stack), we need to insert 185 * (ie, it's still on stack), we need to insert its value into the
178 its value into the SAVE_ALL state before going on, since 186 * SAVE_ALL state before going on, since it's usermode state which we
179 it's usermode state which we eventually need to restore. 187 * eventually need to restore.
180 */ 188 */
181ENTRY(xen_iret_crit_fixup) 189ENTRY(xen_iret_crit_fixup)
182 /* 190 /*
183 Paranoia: Make sure we're really coming from kernel space. 191 * Paranoia: Make sure we're really coming from kernel space.
184 One could imagine a case where userspace jumps into the 192 * One could imagine a case where userspace jumps into the
185 critical range address, but just before the CPU delivers a GP, 193 * critical range address, but just before the CPU delivers a
186 it decides to deliver an interrupt instead. Unlikely? 194 * GP, it decides to deliver an interrupt instead. Unlikely?
187 Definitely. Easy to avoid? Yes. The Intel documents 195 * Definitely. Easy to avoid? Yes. The Intel documents
188 explicitly say that the reported EIP for a bad jump is the 196 * explicitly say that the reported EIP for a bad jump is the
189 jump instruction itself, not the destination, but some virtual 197 * jump instruction itself, not the destination, but some
190 environments get this wrong. 198 * virtual environments get this wrong.
191 */ 199 */
192 movl PT_CS(%esp), %ecx 200 movl PT_CS(%esp), %ecx
193 andl $SEGMENT_RPL_MASK, %ecx 201 andl $SEGMENT_RPL_MASK, %ecx
@@ -197,15 +205,17 @@ ENTRY(xen_iret_crit_fixup)
197 lea PT_ORIG_EAX(%esp), %esi 205 lea PT_ORIG_EAX(%esp), %esi
198 lea PT_EFLAGS(%esp), %edi 206 lea PT_EFLAGS(%esp), %edi
199 207
200 /* If eip is before iret_restore_end then stack 208 /*
201 hasn't been restored yet. */ 209 * If eip is before iret_restore_end then stack
210 * hasn't been restored yet.
211 */
202 cmp $iret_restore_end, %eax 212 cmp $iret_restore_end, %eax
203 jae 1f 213 jae 1f
204 214
205 movl 0+4(%edi),%eax /* copy EAX (just above top of frame) */ 215 movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */
206 movl %eax, PT_EAX(%esp) 216 movl %eax, PT_EAX(%esp)
207 217
208 lea ESP_OFFSET(%edi),%edi /* move dest up over saved regs */ 218 lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */
209 219
210 /* set up the copy */ 220 /* set up the copy */
2111: std 2211: std
@@ -213,6 +223,6 @@ ENTRY(xen_iret_crit_fixup)
213 rep movsl 223 rep movsl
214 cld 224 cld
215 225
216 lea 4(%edi),%esp /* point esp to new frame */ 226 lea 4(%edi), %esp /* point esp to new frame */
2172: jmp xen_do_upcall 2272: jmp xen_do_upcall
218 228
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index d205a283efe0..02f496a8dbaa 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -1,14 +1,14 @@
1/* 1/*
2 Asm versions of Xen pv-ops, suitable for either direct use or inlining. 2 * Asm versions of Xen pv-ops, suitable for either direct use or
3 The inline versions are the same as the direct-use versions, with the 3 * inlining. The inline versions are the same as the direct-use
4 pre- and post-amble chopped off. 4 * versions, with the pre- and post-amble chopped off.
5 5 *
6 This code is encoded for size rather than absolute efficiency, 6 * This code is encoded for size rather than absolute efficiency, with
7 with a view to being able to inline as much as possible. 7 * a view to being able to inline as much as possible.
8 8 *
9 We only bother with direct forms (ie, vcpu in pda) of the operations 9 * We only bother with direct forms (ie, vcpu in pda) of the
10 here; the indirect forms are better handled in C, since they're 10 * operations here; the indirect forms are better handled in C, since
11 generally too large to inline anyway. 11 * they're generally too large to inline anyway.
12 */ 12 */
13 13
14#include <asm/errno.h> 14#include <asm/errno.h>
@@ -21,25 +21,25 @@
21#include "xen-asm.h" 21#include "xen-asm.h"
22 22
23ENTRY(xen_adjust_exception_frame) 23ENTRY(xen_adjust_exception_frame)
24 mov 8+0(%rsp),%rcx 24 mov 8+0(%rsp), %rcx
25 mov 8+8(%rsp),%r11 25 mov 8+8(%rsp), %r11
26 ret $16 26 ret $16
27 27
28hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 28hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
29/* 29/*
30 Xen64 iret frame: 30 * Xen64 iret frame:
31 31 *
32 ss 32 * ss
33 rsp 33 * rsp
34 rflags 34 * rflags
35 cs 35 * cs
36 rip <-- standard iret frame 36 * rip <-- standard iret frame
37 37 *
38 flags 38 * flags
39 39 *
40 rcx } 40 * rcx }
41 r11 }<-- pushed by hypercall page 41 * r11 }<-- pushed by hypercall page
42rsp -> rax } 42 * rsp->rax }
43 */ 43 */
44ENTRY(xen_iret) 44ENTRY(xen_iret)
45 pushq $0 45 pushq $0
@@ -48,8 +48,8 @@ ENDPATCH(xen_iret)
48RELOC(xen_iret, 1b+1) 48RELOC(xen_iret, 1b+1)
49 49
50/* 50/*
51 sysexit is not used for 64-bit processes, so it's 51 * sysexit is not used for 64-bit processes, so it's only ever used to
52 only ever used to return to 32-bit compat userspace. 52 * return to 32-bit compat userspace.
53 */ 53 */
54ENTRY(xen_sysexit) 54ENTRY(xen_sysexit)
55 pushq $__USER32_DS 55 pushq $__USER32_DS
@@ -64,10 +64,12 @@ ENDPATCH(xen_sysexit)
64RELOC(xen_sysexit, 1b+1) 64RELOC(xen_sysexit, 1b+1)
65 65
66ENTRY(xen_sysret64) 66ENTRY(xen_sysret64)
67 /* We're already on the usermode stack at this point, but still 67 /*
68 with the kernel gs, so we can easily switch back */ 68 * We're already on the usermode stack at this point, but
69 * still with the kernel gs, so we can easily switch back
70 */
69 movq %rsp, PER_CPU_VAR(old_rsp) 71 movq %rsp, PER_CPU_VAR(old_rsp)
70 movq PER_CPU_VAR(kernel_stack),%rsp 72 movq PER_CPU_VAR(kernel_stack), %rsp
71 73
72 pushq $__USER_DS 74 pushq $__USER_DS
73 pushq PER_CPU_VAR(old_rsp) 75 pushq PER_CPU_VAR(old_rsp)
@@ -81,8 +83,10 @@ ENDPATCH(xen_sysret64)
81RELOC(xen_sysret64, 1b+1) 83RELOC(xen_sysret64, 1b+1)
82 84
83ENTRY(xen_sysret32) 85ENTRY(xen_sysret32)
84 /* We're already on the usermode stack at this point, but still 86 /*
85 with the kernel gs, so we can easily switch back */ 87 * We're already on the usermode stack at this point, but
88 * still with the kernel gs, so we can easily switch back
89 */
86 movq %rsp, PER_CPU_VAR(old_rsp) 90 movq %rsp, PER_CPU_VAR(old_rsp)
87 movq PER_CPU_VAR(kernel_stack), %rsp 91 movq PER_CPU_VAR(kernel_stack), %rsp
88 92
@@ -98,28 +102,27 @@ ENDPATCH(xen_sysret32)
98RELOC(xen_sysret32, 1b+1) 102RELOC(xen_sysret32, 1b+1)
99 103
100/* 104/*
101 Xen handles syscall callbacks much like ordinary exceptions, 105 * Xen handles syscall callbacks much like ordinary exceptions, which
102 which means we have: 106 * means we have:
103 - kernel gs 107 * - kernel gs
104 - kernel rsp 108 * - kernel rsp
105 - an iret-like stack frame on the stack (including rcx and r11): 109 * - an iret-like stack frame on the stack (including rcx and r11):
106 ss 110 * ss
107 rsp 111 * rsp
108 rflags 112 * rflags
109 cs 113 * cs
110 rip 114 * rip
111 r11 115 * r11
112 rsp-> rcx 116 * rsp->rcx
113 117 *
114 In all the entrypoints, we undo all that to make it look 118 * In all the entrypoints, we undo all that to make it look like a
115 like a CPU-generated syscall/sysenter and jump to the normal 119 * CPU-generated syscall/sysenter and jump to the normal entrypoint.
116 entrypoint.
117 */ 120 */
118 121
119.macro undo_xen_syscall 122.macro undo_xen_syscall
120 mov 0*8(%rsp),%rcx 123 mov 0*8(%rsp), %rcx
121 mov 1*8(%rsp),%r11 124 mov 1*8(%rsp), %r11
122 mov 5*8(%rsp),%rsp 125 mov 5*8(%rsp), %rsp
123.endm 126.endm
124 127
125/* Normal 64-bit system call target */ 128/* Normal 64-bit system call target */
@@ -146,7 +149,7 @@ ENDPROC(xen_sysenter_target)
146 149
147ENTRY(xen_syscall32_target) 150ENTRY(xen_syscall32_target)
148ENTRY(xen_sysenter_target) 151ENTRY(xen_sysenter_target)
149 lea 16(%rsp), %rsp /* strip %rcx,%r11 */ 152 lea 16(%rsp), %rsp /* strip %rcx, %r11 */
150 mov $-ENOSYS, %rax 153 mov $-ENOSYS, %rax
151 pushq $VGCF_in_syscall 154 pushq $VGCF_in_syscall
152 jmp hypercall_iret 155 jmp hypercall_iret