aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-13 16:16:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-13 16:16:36 -0400
commit60f898eeaaa1c5d0162a4240bacf33a6c87ecef6 (patch)
tree23eeac4b1e9a616779d22c104dbc8bd45dfeefd1 /arch/x86/include
parent977e1ba50893c15121557b39de586901fe3f75cf (diff)
parent3b75232d55680ca166dffa274d0587d5faf0a016 (diff)
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm changes from Ingo Molnar: "There were lots of changes in this development cycle: - over 100 separate cleanups, restructuring changes, speedups and fixes in the x86 system call, irq, trap and other entry code, part of a heroic effort to deobfuscate a decade old spaghetti asm code and its C code dependencies (Denys Vlasenko, Andy Lutomirski) - alternatives code fixes and enhancements (Borislav Petkov) - simplifications and cleanups to the compat code (Brian Gerst) - signal handling fixes and new x86 testcases (Andy Lutomirski) - various other fixes and cleanups By their nature many of these changes are risky - we tried to test them well on many different x86 systems (there are no known regressions), and they are split up finely to help bisection - but there's still a fair bit of residual risk left so caveat emptor" * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (148 commits) perf/x86/64: Report regs_user->ax too in get_regs_user() perf/x86/64: Simplify regs_user->abi setting code in get_regs_user() perf/x86/64: Do report user_regs->cx while we are in syscall, in get_regs_user() perf/x86/64: Do not guess user_regs->cs, ss, sp in get_regs_user() x86/asm/entry/32: Tidy up JNZ instructions after TESTs x86/asm/entry/64: Reduce padding in execve stubs x86/asm/entry/64: Remove GET_THREAD_INFO() in ret_from_fork x86/asm/entry/64: Simplify jumps in ret_from_fork x86/asm/entry/64: Remove a redundant jump x86/asm/entry/64: Optimize [v]fork/clone stubs x86/asm/entry: Zero EXTRA_REGS for stub32_execve() too x86/asm/entry/64: Move stub_x32_execvecloser() to stub_execveat() x86/asm/entry/64: Use common code for rt_sigreturn() epilogue x86/asm/entry/64: Add forgotten CFI annotation x86/asm/entry/irq: Simplify interrupt dispatch table (IDT) layout x86/asm/entry/64: Move opportunistic sysret code to syscall code path x86, selftests: Add sigreturn selftest x86/alternatives: Guard NOPs optimization x86/asm/entry: Clear EXTRA_REGS for all executable formats x86/signal: Remove pax argument from restore_sigcontext ...
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/alternative-asm.h53
-rw-r--r--arch/x86/include/asm/alternative.h73
-rw-r--r--arch/x86/include/asm/apic.h2
-rw-r--r--arch/x86/include/asm/barrier.h6
-rw-r--r--arch/x86/include/asm/calling.h284
-rw-r--r--arch/x86/include/asm/compat.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h32
-rw-r--r--arch/x86/include/asm/desc.h7
-rw-r--r--arch/x86/include/asm/dwarf2.h24
-rw-r--r--arch/x86/include/asm/elf.h7
-rw-r--r--arch/x86/include/asm/hw_irq.h5
-rw-r--r--arch/x86/include/asm/insn.h2
-rw-r--r--arch/x86/include/asm/irqflags.h49
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/processor.h107
-rw-r--r--arch/x86/include/asm/ptrace.h45
-rw-r--r--arch/x86/include/asm/segment.h289
-rw-r--r--arch/x86/include/asm/setup.h5
-rw-r--r--arch/x86/include/asm/sigcontext.h6
-rw-r--r--arch/x86/include/asm/sighandling.h4
-rw-r--r--arch/x86/include/asm/smap.h30
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/special_insns.h24
-rw-r--r--arch/x86/include/asm/thread_info.h74
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h1
-rw-r--r--arch/x86/include/uapi/asm/ptrace-abi.h16
-rw-r--r--arch/x86/include/uapi/asm/ptrace.h13
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h21
28 files changed, 697 insertions, 490 deletions
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 372231c22a47..bdf02eeee765 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -18,12 +18,63 @@
18 .endm 18 .endm
19#endif 19#endif
20 20
21.macro altinstruction_entry orig alt feature orig_len alt_len 21.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
22 .long \orig - . 22 .long \orig - .
23 .long \alt - . 23 .long \alt - .
24 .word \feature 24 .word \feature
25 .byte \orig_len 25 .byte \orig_len
26 .byte \alt_len 26 .byte \alt_len
27 .byte \pad_len
28.endm
29
30.macro ALTERNATIVE oldinstr, newinstr, feature
31140:
32 \oldinstr
33141:
34 .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
35142:
36
37 .pushsection .altinstructions,"a"
38 altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
39 .popsection
40
41 .pushsection .altinstr_replacement,"ax"
42143:
43 \newinstr
44144:
45 .popsection
46.endm
47
48#define old_len 141b-140b
49#define new_len1 144f-143f
50#define new_len2 145f-144f
51
52/*
53 * max without conditionals. Idea adapted from:
54 * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
55 */
56#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
57
58.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
59140:
60 \oldinstr
61141:
62 .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
63 (alt_max_short(new_len1, new_len2) - (old_len)),0x90
64142:
65
66 .pushsection .altinstructions,"a"
67 altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
68 altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
69 .popsection
70
71 .pushsection .altinstr_replacement,"ax"
72143:
73 \newinstr1
74144:
75 \newinstr2
76145:
77 .popsection
27.endm 78.endm
28 79
29#endif /* __ASSEMBLY__ */ 80#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 473bdbee378a..ba32af062f61 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -48,8 +48,9 @@ struct alt_instr {
48 s32 repl_offset; /* offset to replacement instruction */ 48 s32 repl_offset; /* offset to replacement instruction */
49 u16 cpuid; /* cpuid bit set for replacement */ 49 u16 cpuid; /* cpuid bit set for replacement */
50 u8 instrlen; /* length of original instruction */ 50 u8 instrlen; /* length of original instruction */
51 u8 replacementlen; /* length of new instruction, <= instrlen */ 51 u8 replacementlen; /* length of new instruction */
52}; 52 u8 padlen; /* length of build-time padding */
53} __packed;
53 54
54extern void alternative_instructions(void); 55extern void alternative_instructions(void);
55extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); 56extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
@@ -76,50 +77,69 @@ static inline int alternatives_text_reserved(void *start, void *end)
76} 77}
77#endif /* CONFIG_SMP */ 78#endif /* CONFIG_SMP */
78 79
79#define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n" 80#define b_replacement(num) "664"#num
81#define e_replacement(num) "665"#num
80 82
81#define b_replacement(number) "663"#number 83#define alt_end_marker "663"
82#define e_replacement(number) "664"#number 84#define alt_slen "662b-661b"
85#define alt_pad_len alt_end_marker"b-662b"
86#define alt_total_slen alt_end_marker"b-661b"
87#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
83 88
84#define alt_slen "662b-661b" 89#define __OLDINSTR(oldinstr, num) \
85#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f" 90 "661:\n\t" oldinstr "\n662:\n" \
91 ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
92 "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
86 93
87#define ALTINSTR_ENTRY(feature, number) \ 94#define OLDINSTR(oldinstr, num) \
95 __OLDINSTR(oldinstr, num) \
96 alt_end_marker ":\n"
97
98/*
99 * max without conditionals. Idea adapted from:
100 * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
101 *
102 * The additional "-" is needed because gas works with s32s.
103 */
104#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
105
106/*
107 * Pad the second replacement alternative with additional NOPs if it is
108 * additionally longer than the first replacement alternative.
109 */
110#define OLDINSTR_2(oldinstr, num1, num2) \
111 "661:\n\t" oldinstr "\n662:\n" \
112 ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
113 "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
114 alt_end_marker ":\n"
115
116#define ALTINSTR_ENTRY(feature, num) \
88 " .long 661b - .\n" /* label */ \ 117 " .long 661b - .\n" /* label */ \
89 " .long " b_replacement(number)"f - .\n" /* new instruction */ \ 118 " .long " b_replacement(num)"f - .\n" /* new instruction */ \
90 " .word " __stringify(feature) "\n" /* feature bit */ \ 119 " .word " __stringify(feature) "\n" /* feature bit */ \
91 " .byte " alt_slen "\n" /* source len */ \ 120 " .byte " alt_total_slen "\n" /* source len */ \
92 " .byte " alt_rlen(number) "\n" /* replacement len */ 121 " .byte " alt_rlen(num) "\n" /* replacement len */ \
93 122 " .byte " alt_pad_len "\n" /* pad len */
94#define DISCARD_ENTRY(number) /* rlen <= slen */ \
95 " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
96 123
97#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \ 124#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
98 b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t" 125 b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
99 126
100/* alternative assembly primitive: */ 127/* alternative assembly primitive: */
101#define ALTERNATIVE(oldinstr, newinstr, feature) \ 128#define ALTERNATIVE(oldinstr, newinstr, feature) \
102 OLDINSTR(oldinstr) \ 129 OLDINSTR(oldinstr, 1) \
103 ".pushsection .altinstructions,\"a\"\n" \ 130 ".pushsection .altinstructions,\"a\"\n" \
104 ALTINSTR_ENTRY(feature, 1) \ 131 ALTINSTR_ENTRY(feature, 1) \
105 ".popsection\n" \ 132 ".popsection\n" \
106 ".pushsection .discard,\"aw\",@progbits\n" \
107 DISCARD_ENTRY(1) \
108 ".popsection\n" \
109 ".pushsection .altinstr_replacement, \"ax\"\n" \ 133 ".pushsection .altinstr_replacement, \"ax\"\n" \
110 ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ 134 ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
111 ".popsection" 135 ".popsection"
112 136
113#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ 137#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
114 OLDINSTR(oldinstr) \ 138 OLDINSTR_2(oldinstr, 1, 2) \
115 ".pushsection .altinstructions,\"a\"\n" \ 139 ".pushsection .altinstructions,\"a\"\n" \
116 ALTINSTR_ENTRY(feature1, 1) \ 140 ALTINSTR_ENTRY(feature1, 1) \
117 ALTINSTR_ENTRY(feature2, 2) \ 141 ALTINSTR_ENTRY(feature2, 2) \
118 ".popsection\n" \ 142 ".popsection\n" \
119 ".pushsection .discard,\"aw\",@progbits\n" \
120 DISCARD_ENTRY(1) \
121 DISCARD_ENTRY(2) \
122 ".popsection\n" \
123 ".pushsection .altinstr_replacement, \"ax\"\n" \ 143 ".pushsection .altinstr_replacement, \"ax\"\n" \
124 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ 144 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
125 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ 145 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
@@ -146,6 +166,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
146#define alternative(oldinstr, newinstr, feature) \ 166#define alternative(oldinstr, newinstr, feature) \
147 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") 167 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
148 168
169#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
170 asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
171
149/* 172/*
150 * Alternative inline assembly with input. 173 * Alternative inline assembly with input.
151 * 174 *
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 08f217354442..976b86a325e5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
91{ 91{
92 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); 92 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
93 93
94 alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP, 94 alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
95 ASM_OUTPUT2("=r" (v), "=m" (*addr)), 95 ASM_OUTPUT2("=r" (v), "=m" (*addr)),
96 ASM_OUTPUT2("0" (v), "m" (*addr))); 96 ASM_OUTPUT2("0" (v), "m" (*addr)));
97} 97}
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 2ab1eb33106e..959e45b81fe2 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -95,13 +95,11 @@ do { \
95 * Stop RDTSC speculation. This is needed when you need to use RDTSC 95 * Stop RDTSC speculation. This is needed when you need to use RDTSC
96 * (or get_cycles or vread that possibly accesses the TSC) in a defined 96 * (or get_cycles or vread that possibly accesses the TSC) in a defined
97 * code region. 97 * code region.
98 *
99 * (Could use an alternative three way for this if there was one.)
100 */ 98 */
101static __always_inline void rdtsc_barrier(void) 99static __always_inline void rdtsc_barrier(void)
102{ 100{
103 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); 101 alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
104 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); 102 "lfence", X86_FEATURE_LFENCE_RDTSC);
105} 103}
106 104
107#endif /* _ASM_X86_BARRIER_H */ 105#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 1f1297b46f83..1c8b50edb2db 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -55,143 +55,157 @@ For 32-bit we have the following conventions - kernel is built with
55 * for assembly code: 55 * for assembly code:
56 */ 56 */
57 57
58#define R15 0 58/* The layout forms the "struct pt_regs" on the stack: */
59#define R14 8 59/*
60#define R13 16 60 * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
61#define R12 24 61 * unless syscall needs a complete, fully filled "struct pt_regs".
62#define RBP 32 62 */
63#define RBX 40 63#define R15 0*8
64 64#define R14 1*8
65/* arguments: interrupts/non tracing syscalls only save up to here: */ 65#define R13 2*8
66#define R11 48 66#define R12 3*8
67#define R10 56 67#define RBP 4*8
68#define R9 64 68#define RBX 5*8
69#define R8 72 69/* These regs are callee-clobbered. Always saved on kernel entry. */
70#define RAX 80 70#define R11 6*8
71#define RCX 88 71#define R10 7*8
72#define RDX 96 72#define R9 8*8
73#define RSI 104 73#define R8 9*8
74#define RDI 112 74#define RAX 10*8
75#define ORIG_RAX 120 /* + error_code */ 75#define RCX 11*8
76/* end of arguments */ 76#define RDX 12*8
77 77#define RSI 13*8
78/* cpu exception frame or undefined in case of fast syscall: */ 78#define RDI 14*8
79#define RIP 128 79/*
80#define CS 136 80 * On syscall entry, this is syscall#. On CPU exception, this is error code.
81#define EFLAGS 144 81 * On hw interrupt, it's IRQ number:
82#define RSP 152 82 */
83#define SS 160 83#define ORIG_RAX 15*8
84 84/* Return frame for iretq */
85#define ARGOFFSET R11 85#define RIP 16*8
86 86#define CS 17*8
87 .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 87#define EFLAGS 18*8
88 subq $9*8+\addskip, %rsp 88#define RSP 19*8
89 CFI_ADJUST_CFA_OFFSET 9*8+\addskip 89#define SS 20*8
90 movq_cfi rdi, 8*8 90
91 movq_cfi rsi, 7*8 91#define SIZEOF_PTREGS 21*8
92 movq_cfi rdx, 6*8 92
93 93 .macro ALLOC_PT_GPREGS_ON_STACK addskip=0
94 .if \save_rcx 94 subq $15*8+\addskip, %rsp
95 movq_cfi rcx, 5*8 95 CFI_ADJUST_CFA_OFFSET 15*8+\addskip
96 .endif 96 .endm
97 97
98 .if \rax_enosys 98 .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
99 movq $-ENOSYS, 4*8(%rsp) 99 .if \r11
100 .else 100 movq_cfi r11, 6*8+\offset
101 movq_cfi rax, 4*8
102 .endif 101 .endif
103 102 .if \r8910
104 .if \save_r891011 103 movq_cfi r10, 7*8+\offset
105 movq_cfi r8, 3*8 104 movq_cfi r9, 8*8+\offset
106 movq_cfi r9, 2*8 105 movq_cfi r8, 9*8+\offset
107 movq_cfi r10, 1*8 106 .endif
108 movq_cfi r11, 0*8 107 .if \rax
108 movq_cfi rax, 10*8+\offset
109 .endif
110 .if \rcx
111 movq_cfi rcx, 11*8+\offset
109 .endif 112 .endif
113 movq_cfi rdx, 12*8+\offset
114 movq_cfi rsi, 13*8+\offset
115 movq_cfi rdi, 14*8+\offset
116 .endm
117 .macro SAVE_C_REGS offset=0
118 SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
119 .endm
120 .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
121 SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
122 .endm
123 .macro SAVE_C_REGS_EXCEPT_R891011
124 SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
125 .endm
126 .macro SAVE_C_REGS_EXCEPT_RCX_R891011
127 SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
128 .endm
129 .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
130 SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
131 .endm
132
133 .macro SAVE_EXTRA_REGS offset=0
134 movq_cfi r15, 0*8+\offset
135 movq_cfi r14, 1*8+\offset
136 movq_cfi r13, 2*8+\offset
137 movq_cfi r12, 3*8+\offset
138 movq_cfi rbp, 4*8+\offset
139 movq_cfi rbx, 5*8+\offset
140 .endm
141 .macro SAVE_EXTRA_REGS_RBP offset=0
142 movq_cfi rbp, 4*8+\offset
143 .endm
110 144
145 .macro RESTORE_EXTRA_REGS offset=0
146 movq_cfi_restore 0*8+\offset, r15
147 movq_cfi_restore 1*8+\offset, r14
148 movq_cfi_restore 2*8+\offset, r13
149 movq_cfi_restore 3*8+\offset, r12
150 movq_cfi_restore 4*8+\offset, rbp
151 movq_cfi_restore 5*8+\offset, rbx
111 .endm 152 .endm
112 153
113#define ARG_SKIP (9*8) 154 .macro ZERO_EXTRA_REGS
155 xorl %r15d, %r15d
156 xorl %r14d, %r14d
157 xorl %r13d, %r13d
158 xorl %r12d, %r12d
159 xorl %ebp, %ebp
160 xorl %ebx, %ebx
161 .endm
114 162
115 .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \ 163 .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
116 rstor_r8910=1, rstor_rdx=1
117 .if \rstor_r11 164 .if \rstor_r11
118 movq_cfi_restore 0*8, r11 165 movq_cfi_restore 6*8, r11
119 .endif 166 .endif
120
121 .if \rstor_r8910 167 .if \rstor_r8910
122 movq_cfi_restore 1*8, r10 168 movq_cfi_restore 7*8, r10
123 movq_cfi_restore 2*8, r9 169 movq_cfi_restore 8*8, r9
124 movq_cfi_restore 3*8, r8 170 movq_cfi_restore 9*8, r8
125 .endif 171 .endif
126
127 .if \rstor_rax 172 .if \rstor_rax
128 movq_cfi_restore 4*8, rax 173 movq_cfi_restore 10*8, rax
129 .endif 174 .endif
130
131 .if \rstor_rcx 175 .if \rstor_rcx
132 movq_cfi_restore 5*8, rcx 176 movq_cfi_restore 11*8, rcx
133 .endif 177 .endif
134
135 .if \rstor_rdx 178 .if \rstor_rdx
136 movq_cfi_restore 6*8, rdx 179 movq_cfi_restore 12*8, rdx
137 .endif
138
139 movq_cfi_restore 7*8, rsi
140 movq_cfi_restore 8*8, rdi
141
142 .if ARG_SKIP+\addskip > 0
143 addq $ARG_SKIP+\addskip, %rsp
144 CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip)
145 .endif 180 .endif
181 movq_cfi_restore 13*8, rsi
182 movq_cfi_restore 14*8, rdi
146 .endm 183 .endm
147 184 .macro RESTORE_C_REGS
148 .macro LOAD_ARGS offset, skiprax=0 185 RESTORE_C_REGS_HELPER 1,1,1,1,1
149 movq \offset(%rsp), %r11
150 movq \offset+8(%rsp), %r10
151 movq \offset+16(%rsp), %r9
152 movq \offset+24(%rsp), %r8
153 movq \offset+40(%rsp), %rcx
154 movq \offset+48(%rsp), %rdx
155 movq \offset+56(%rsp), %rsi
156 movq \offset+64(%rsp), %rdi
157 .if \skiprax
158 .else
159 movq \offset+72(%rsp), %rax
160 .endif
161 .endm 186 .endm
162 187 .macro RESTORE_C_REGS_EXCEPT_RAX
163#define REST_SKIP (6*8) 188 RESTORE_C_REGS_HELPER 0,1,1,1,1
164
165 .macro SAVE_REST
166 subq $REST_SKIP, %rsp
167 CFI_ADJUST_CFA_OFFSET REST_SKIP
168 movq_cfi rbx, 5*8
169 movq_cfi rbp, 4*8
170 movq_cfi r12, 3*8
171 movq_cfi r13, 2*8
172 movq_cfi r14, 1*8
173 movq_cfi r15, 0*8
174 .endm 189 .endm
175 190 .macro RESTORE_C_REGS_EXCEPT_RCX
176 .macro RESTORE_REST 191 RESTORE_C_REGS_HELPER 1,0,1,1,1
177 movq_cfi_restore 0*8, r15
178 movq_cfi_restore 1*8, r14
179 movq_cfi_restore 2*8, r13
180 movq_cfi_restore 3*8, r12
181 movq_cfi_restore 4*8, rbp
182 movq_cfi_restore 5*8, rbx
183 addq $REST_SKIP, %rsp
184 CFI_ADJUST_CFA_OFFSET -(REST_SKIP)
185 .endm 192 .endm
186 193 .macro RESTORE_C_REGS_EXCEPT_R11
187 .macro SAVE_ALL 194 RESTORE_C_REGS_HELPER 1,1,0,1,1
188 SAVE_ARGS 195 .endm
189 SAVE_REST 196 .macro RESTORE_C_REGS_EXCEPT_RCX_R11
197 RESTORE_C_REGS_HELPER 1,0,0,1,1
198 .endm
199 .macro RESTORE_RSI_RDI
200 RESTORE_C_REGS_HELPER 0,0,0,0,0
201 .endm
202 .macro RESTORE_RSI_RDI_RDX
203 RESTORE_C_REGS_HELPER 0,0,0,0,1
190 .endm 204 .endm
191 205
192 .macro RESTORE_ALL addskip=0 206 .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
193 RESTORE_REST 207 addq $15*8+\addskip, %rsp
194 RESTORE_ARGS 1, \addskip 208 CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
195 .endm 209 .endm
196 210
197 .macro icebp 211 .macro icebp
@@ -210,37 +224,23 @@ For 32-bit we have the following conventions - kernel is built with
210 */ 224 */
211 225
212 .macro SAVE_ALL 226 .macro SAVE_ALL
213 pushl_cfi %eax 227 pushl_cfi_reg eax
214 CFI_REL_OFFSET eax, 0 228 pushl_cfi_reg ebp
215 pushl_cfi %ebp 229 pushl_cfi_reg edi
216 CFI_REL_OFFSET ebp, 0 230 pushl_cfi_reg esi
217 pushl_cfi %edi 231 pushl_cfi_reg edx
218 CFI_REL_OFFSET edi, 0 232 pushl_cfi_reg ecx
219 pushl_cfi %esi 233 pushl_cfi_reg ebx
220 CFI_REL_OFFSET esi, 0
221 pushl_cfi %edx
222 CFI_REL_OFFSET edx, 0
223 pushl_cfi %ecx
224 CFI_REL_OFFSET ecx, 0
225 pushl_cfi %ebx
226 CFI_REL_OFFSET ebx, 0
227 .endm 234 .endm
228 235
229 .macro RESTORE_ALL 236 .macro RESTORE_ALL
230 popl_cfi %ebx 237 popl_cfi_reg ebx
231 CFI_RESTORE ebx 238 popl_cfi_reg ecx
232 popl_cfi %ecx 239 popl_cfi_reg edx
233 CFI_RESTORE ecx 240 popl_cfi_reg esi
234 popl_cfi %edx 241 popl_cfi_reg edi
235 CFI_RESTORE edx 242 popl_cfi_reg ebp
236 popl_cfi %esi 243 popl_cfi_reg eax
237 CFI_RESTORE esi
238 popl_cfi %edi
239 CFI_RESTORE edi
240 popl_cfi %ebp
241 CFI_RESTORE ebp
242 popl_cfi %eax
243 CFI_RESTORE eax
244 .endm 244 .endm
245 245
246#endif /* CONFIG_X86_64 */ 246#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 59c6c401f79f..acdee09228b3 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -301,7 +301,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
301 sp = task_pt_regs(current)->sp; 301 sp = task_pt_regs(current)->sp;
302 } else { 302 } else {
303 /* -128 for the x32 ABI redzone */ 303 /* -128 for the x32 ABI redzone */
304 sp = this_cpu_read(old_rsp) - 128; 304 sp = task_pt_regs(current)->sp - 128;
305 } 305 }
306 306
307 return (void __user *)round_down(sp - len, 16); 307 return (void __user *)round_down(sp - len, 16);
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 90a54851aedc..854c04b3c9c2 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -231,7 +231,9 @@
231#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ 231#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
232#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ 232#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
233#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ 233#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
234#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
234#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ 235#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
236#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
235#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ 237#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
236#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ 238#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
237#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ 239#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
@@ -418,6 +420,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
418 " .word %P0\n" /* 1: do replace */ 420 " .word %P0\n" /* 1: do replace */
419 " .byte 2b - 1b\n" /* source len */ 421 " .byte 2b - 1b\n" /* source len */
420 " .byte 0\n" /* replacement len */ 422 " .byte 0\n" /* replacement len */
423 " .byte 0\n" /* pad len */
421 ".previous\n" 424 ".previous\n"
422 /* skipping size check since replacement size = 0 */ 425 /* skipping size check since replacement size = 0 */
423 : : "i" (X86_FEATURE_ALWAYS) : : t_warn); 426 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
@@ -432,6 +435,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
432 " .word %P0\n" /* feature bit */ 435 " .word %P0\n" /* feature bit */
433 " .byte 2b - 1b\n" /* source len */ 436 " .byte 2b - 1b\n" /* source len */
434 " .byte 0\n" /* replacement len */ 437 " .byte 0\n" /* replacement len */
438 " .byte 0\n" /* pad len */
435 ".previous\n" 439 ".previous\n"
436 /* skipping size check since replacement size = 0 */ 440 /* skipping size check since replacement size = 0 */
437 : : "i" (bit) : : t_no); 441 : : "i" (bit) : : t_no);
@@ -457,6 +461,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
457 " .word %P1\n" /* feature bit */ 461 " .word %P1\n" /* feature bit */
458 " .byte 2b - 1b\n" /* source len */ 462 " .byte 2b - 1b\n" /* source len */
459 " .byte 4f - 3f\n" /* replacement len */ 463 " .byte 4f - 3f\n" /* replacement len */
464 " .byte 0\n" /* pad len */
460 ".previous\n" 465 ".previous\n"
461 ".section .discard,\"aw\",@progbits\n" 466 ".section .discard,\"aw\",@progbits\n"
462 " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ 467 " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -483,31 +488,30 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
483static __always_inline __pure bool _static_cpu_has_safe(u16 bit) 488static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
484{ 489{
485#ifdef CC_HAVE_ASM_GOTO 490#ifdef CC_HAVE_ASM_GOTO
486/* 491 asm_volatile_goto("1: jmp %l[t_dynamic]\n"
487 * We need to spell the jumps to the compiler because, depending on the offset,
488 * the replacement jump can be bigger than the original jump, and this we cannot
489 * have. Thus, we force the jump to the widest, 4-byte, signed relative
490 * offset even though the last would often fit in less bytes.
491 */
492 asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
493 "2:\n" 492 "2:\n"
493 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
494 "((5f-4f) - (2b-1b)),0x90\n"
495 "3:\n"
494 ".section .altinstructions,\"a\"\n" 496 ".section .altinstructions,\"a\"\n"
495 " .long 1b - .\n" /* src offset */ 497 " .long 1b - .\n" /* src offset */
496 " .long 3f - .\n" /* repl offset */ 498 " .long 4f - .\n" /* repl offset */
497 " .word %P1\n" /* always replace */ 499 " .word %P1\n" /* always replace */
498 " .byte 2b - 1b\n" /* src len */ 500 " .byte 3b - 1b\n" /* src len */
499 " .byte 4f - 3f\n" /* repl len */ 501 " .byte 5f - 4f\n" /* repl len */
502 " .byte 3b - 2b\n" /* pad len */
500 ".previous\n" 503 ".previous\n"
501 ".section .altinstr_replacement,\"ax\"\n" 504 ".section .altinstr_replacement,\"ax\"\n"
502 "3: .byte 0xe9\n .long %l[t_no] - 2b\n" 505 "4: jmp %l[t_no]\n"
503 "4:\n" 506 "5:\n"
504 ".previous\n" 507 ".previous\n"
505 ".section .altinstructions,\"a\"\n" 508 ".section .altinstructions,\"a\"\n"
506 " .long 1b - .\n" /* src offset */ 509 " .long 1b - .\n" /* src offset */
507 " .long 0\n" /* no replacement */ 510 " .long 0\n" /* no replacement */
508 " .word %P0\n" /* feature bit */ 511 " .word %P0\n" /* feature bit */
509 " .byte 2b - 1b\n" /* src len */ 512 " .byte 3b - 1b\n" /* src len */
510 " .byte 0\n" /* repl len */ 513 " .byte 0\n" /* repl len */
514 " .byte 0\n" /* pad len */
511 ".previous\n" 515 ".previous\n"
512 : : "i" (bit), "i" (X86_FEATURE_ALWAYS) 516 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
513 : : t_dynamic, t_no); 517 : : t_dynamic, t_no);
@@ -527,6 +531,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
527 " .word %P2\n" /* always replace */ 531 " .word %P2\n" /* always replace */
528 " .byte 2b - 1b\n" /* source len */ 532 " .byte 2b - 1b\n" /* source len */
529 " .byte 4f - 3f\n" /* replacement len */ 533 " .byte 4f - 3f\n" /* replacement len */
534 " .byte 0\n" /* pad len */
530 ".previous\n" 535 ".previous\n"
531 ".section .discard,\"aw\",@progbits\n" 536 ".section .discard,\"aw\",@progbits\n"
532 " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ 537 " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -541,6 +546,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
541 " .word %P1\n" /* feature bit */ 546 " .word %P1\n" /* feature bit */
542 " .byte 4b - 3b\n" /* src len */ 547 " .byte 4b - 3b\n" /* src len */
543 " .byte 6f - 5f\n" /* repl len */ 548 " .byte 6f - 5f\n" /* repl len */
549 " .byte 0\n" /* pad len */
544 ".previous\n" 550 ".previous\n"
545 ".section .discard,\"aw\",@progbits\n" 551 ".section .discard,\"aw\",@progbits\n"
546 " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */ 552 " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index a94b82e8f156..a0bf89fd2647 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -376,11 +376,16 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
376 * Pentium F0 0F bugfix can have resulted in the mapped 376 * Pentium F0 0F bugfix can have resulted in the mapped
377 * IDT being write-protected. 377 * IDT being write-protected.
378 */ 378 */
379#define set_intr_gate(n, addr) \ 379#define set_intr_gate_notrace(n, addr) \
380 do { \ 380 do { \
381 BUG_ON((unsigned)n > 0xFF); \ 381 BUG_ON((unsigned)n > 0xFF); \
382 _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \ 382 _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \
383 __KERNEL_CS); \ 383 __KERNEL_CS); \
384 } while (0)
385
386#define set_intr_gate(n, addr) \
387 do { \
388 set_intr_gate_notrace(n, addr); \
384 _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\ 389 _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
385 0, 0, __KERNEL_CS); \ 390 0, 0, __KERNEL_CS); \
386 } while (0) 391 } while (0)
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index f6f15986df6c..de1cdaf4d743 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -86,11 +86,23 @@
86 CFI_ADJUST_CFA_OFFSET 8 86 CFI_ADJUST_CFA_OFFSET 8
87 .endm 87 .endm
88 88
89 .macro pushq_cfi_reg reg
90 pushq %\reg
91 CFI_ADJUST_CFA_OFFSET 8
92 CFI_REL_OFFSET \reg, 0
93 .endm
94
89 .macro popq_cfi reg 95 .macro popq_cfi reg
90 popq \reg 96 popq \reg
91 CFI_ADJUST_CFA_OFFSET -8 97 CFI_ADJUST_CFA_OFFSET -8
92 .endm 98 .endm
93 99
100 .macro popq_cfi_reg reg
101 popq %\reg
102 CFI_ADJUST_CFA_OFFSET -8
103 CFI_RESTORE \reg
104 .endm
105
94 .macro pushfq_cfi 106 .macro pushfq_cfi
95 pushfq 107 pushfq
96 CFI_ADJUST_CFA_OFFSET 8 108 CFI_ADJUST_CFA_OFFSET 8
@@ -116,11 +128,23 @@
116 CFI_ADJUST_CFA_OFFSET 4 128 CFI_ADJUST_CFA_OFFSET 4
117 .endm 129 .endm
118 130
131 .macro pushl_cfi_reg reg
132 pushl %\reg
133 CFI_ADJUST_CFA_OFFSET 4
134 CFI_REL_OFFSET \reg, 0
135 .endm
136
119 .macro popl_cfi reg 137 .macro popl_cfi reg
120 popl \reg 138 popl \reg
121 CFI_ADJUST_CFA_OFFSET -4 139 CFI_ADJUST_CFA_OFFSET -4
122 .endm 140 .endm
123 141
142 .macro popl_cfi_reg reg
143 popl %\reg
144 CFI_ADJUST_CFA_OFFSET -4
145 CFI_RESTORE \reg
146 .endm
147
124 .macro pushfl_cfi 148 .macro pushfl_cfi
125 pushfl 149 pushfl
126 CFI_ADJUST_CFA_OFFSET 4 150 CFI_ADJUST_CFA_OFFSET 4
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index ca3347a9dab5..3563107b5060 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -171,10 +171,11 @@ do { \
171static inline void elf_common_init(struct thread_struct *t, 171static inline void elf_common_init(struct thread_struct *t,
172 struct pt_regs *regs, const u16 ds) 172 struct pt_regs *regs, const u16 ds)
173{ 173{
174 regs->ax = regs->bx = regs->cx = regs->dx = 0; 174 /* Commented-out registers are cleared in stub_execve */
175 regs->si = regs->di = regs->bp = 0; 175 /*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0;
176 regs->si = regs->di /*= regs->bp*/ = 0;
176 regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; 177 regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
177 regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; 178 /*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/
178 t->fs = t->gs = 0; 179 t->fs = t->gs = 0;
179 t->fsindex = t->gsindex = 0; 180 t->fsindex = t->gsindex = 0;
180 t->ds = t->es = ds; 181 t->ds = t->es = ds;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 9662290e0b20..e9571ddabc4f 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
181extern __visible void smp_invalidate_interrupt(struct pt_regs *); 181extern __visible void smp_invalidate_interrupt(struct pt_regs *);
182#endif 182#endif
183 183
184extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR 184extern char irq_entries_start[];
185 - FIRST_EXTERNAL_VECTOR])(void);
186#ifdef CONFIG_TRACING 185#ifdef CONFIG_TRACING
187#define trace_interrupt interrupt 186#define trace_irq_entries_start irq_entries_start
188#endif 187#endif
189 188
190#define VECTOR_UNDEFINED (-1) 189#define VECTOR_UNDEFINED (-1)
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 47f29b1d1846..e7814b74caf8 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -69,7 +69,7 @@ struct insn {
69 const insn_byte_t *next_byte; 69 const insn_byte_t *next_byte;
70}; 70};
71 71
72#define MAX_INSN_SIZE 16 72#define MAX_INSN_SIZE 15
73 73
74#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) 74#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
75#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) 75#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 0a8b519226b8..b77f5edb03b0 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -136,10 +136,6 @@ static inline notrace unsigned long arch_local_irq_save(void)
136#define USERGS_SYSRET32 \ 136#define USERGS_SYSRET32 \
137 swapgs; \ 137 swapgs; \
138 sysretl 138 sysretl
139#define ENABLE_INTERRUPTS_SYSEXIT32 \
140 swapgs; \
141 sti; \
142 sysexit
143 139
144#else 140#else
145#define INTERRUPT_RETURN iret 141#define INTERRUPT_RETURN iret
@@ -163,22 +159,27 @@ static inline int arch_irqs_disabled(void)
163 159
164 return arch_irqs_disabled_flags(flags); 160 return arch_irqs_disabled_flags(flags);
165} 161}
162#endif /* !__ASSEMBLY__ */
166 163
164#ifdef __ASSEMBLY__
165#ifdef CONFIG_TRACE_IRQFLAGS
166# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
167# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
167#else 168#else
168 169# define TRACE_IRQS_ON
169#ifdef CONFIG_X86_64 170# define TRACE_IRQS_OFF
170#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk 171#endif
171#define ARCH_LOCKDEP_SYS_EXIT_IRQ \ 172#ifdef CONFIG_DEBUG_LOCK_ALLOC
173# ifdef CONFIG_X86_64
174# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
175# define LOCKDEP_SYS_EXIT_IRQ \
172 TRACE_IRQS_ON; \ 176 TRACE_IRQS_ON; \
173 sti; \ 177 sti; \
174 SAVE_REST; \ 178 call lockdep_sys_exit_thunk; \
175 LOCKDEP_SYS_EXIT; \
176 RESTORE_REST; \
177 cli; \ 179 cli; \
178 TRACE_IRQS_OFF; 180 TRACE_IRQS_OFF;
179 181# else
180#else 182# define LOCKDEP_SYS_EXIT \
181#define ARCH_LOCKDEP_SYS_EXIT \
182 pushl %eax; \ 183 pushl %eax; \
183 pushl %ecx; \ 184 pushl %ecx; \
184 pushl %edx; \ 185 pushl %edx; \
@@ -186,24 +187,12 @@ static inline int arch_irqs_disabled(void)
186 popl %edx; \ 187 popl %edx; \
187 popl %ecx; \ 188 popl %ecx; \
188 popl %eax; 189 popl %eax;
189 190# define LOCKDEP_SYS_EXIT_IRQ
190#define ARCH_LOCKDEP_SYS_EXIT_IRQ 191# endif
191#endif
192
193#ifdef CONFIG_TRACE_IRQFLAGS
194# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
195# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
196#else 192#else
197# define TRACE_IRQS_ON
198# define TRACE_IRQS_OFF
199#endif
200#ifdef CONFIG_DEBUG_LOCK_ALLOC
201# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT
202# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ
203# else
204# define LOCKDEP_SYS_EXIT 193# define LOCKDEP_SYS_EXIT
205# define LOCKDEP_SYS_EXIT_IRQ 194# define LOCKDEP_SYS_EXIT_IRQ
206# endif 195#endif
207
208#endif /* __ASSEMBLY__ */ 196#endif /* __ASSEMBLY__ */
197
209#endif 198#endif
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 965c47d254aa..5f6051d5d139 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -976,11 +976,6 @@ extern void default_banner(void);
976 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ 976 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
977 CLBR_NONE, \ 977 CLBR_NONE, \
978 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) 978 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
979
980#define ENABLE_INTERRUPTS_SYSEXIT32 \
981 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
982 CLBR_NONE, \
983 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
984#endif /* CONFIG_X86_32 */ 979#endif /* CONFIG_X86_32 */
985 980
986#endif /* __ASSEMBLY__ */ 981#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ec1c93588cef..d2203b5d9538 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -210,8 +210,23 @@ struct x86_hw_tss {
210 unsigned long sp0; 210 unsigned long sp0;
211 unsigned short ss0, __ss0h; 211 unsigned short ss0, __ss0h;
212 unsigned long sp1; 212 unsigned long sp1;
213 /* ss1 caches MSR_IA32_SYSENTER_CS: */ 213
214 unsigned short ss1, __ss1h; 214 /*
215 * We don't use ring 1, so ss1 is a convenient scratch space in
216 * the same cacheline as sp0. We use ss1 to cache the value in
217 * MSR_IA32_SYSENTER_CS. When we context switch
218 * MSR_IA32_SYSENTER_CS, we first check if the new value being
219 * written matches ss1, and, if it's not, then we wrmsr the new
220 * value and update ss1.
221 *
222 * The only reason we context switch MSR_IA32_SYSENTER_CS is
223 * that we set it to zero in vm86 tasks to avoid corrupting the
224 * stack if we were to go through the sysenter path from vm86
225 * mode.
226 */
227 unsigned short ss1; /* MSR_IA32_SYSENTER_CS */
228
229 unsigned short __ss1h;
215 unsigned long sp2; 230 unsigned long sp2;
216 unsigned short ss2, __ss2h; 231 unsigned short ss2, __ss2h;
217 unsigned long __cr3; 232 unsigned long __cr3;
@@ -276,13 +291,17 @@ struct tss_struct {
276 unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; 291 unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
277 292
278 /* 293 /*
279 * .. and then another 0x100 bytes for the emergency kernel stack: 294 * Space for the temporary SYSENTER stack:
280 */ 295 */
281 unsigned long stack[64]; 296 unsigned long SYSENTER_stack[64];
282 297
283} ____cacheline_aligned; 298} ____cacheline_aligned;
284 299
285DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss); 300DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
301
302#ifdef CONFIG_X86_32
303DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
304#endif
286 305
287/* 306/*
288 * Save the original ist values for checking stack pointers during debugging 307 * Save the original ist values for checking stack pointers during debugging
@@ -474,7 +493,6 @@ struct thread_struct {
474#ifdef CONFIG_X86_32 493#ifdef CONFIG_X86_32
475 unsigned long sysenter_cs; 494 unsigned long sysenter_cs;
476#else 495#else
477 unsigned long usersp; /* Copy from PDA */
478 unsigned short es; 496 unsigned short es;
479 unsigned short ds; 497 unsigned short ds;
480 unsigned short fsindex; 498 unsigned short fsindex;
@@ -564,6 +582,16 @@ static inline void native_swapgs(void)
564#endif 582#endif
565} 583}
566 584
585static inline unsigned long current_top_of_stack(void)
586{
587#ifdef CONFIG_X86_64
588 return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
589#else
590 /* sp0 on x86_32 is special in and around vm86 mode. */
591 return this_cpu_read_stable(cpu_current_top_of_stack);
592#endif
593}
594
567#ifdef CONFIG_PARAVIRT 595#ifdef CONFIG_PARAVIRT
568#include <asm/paravirt.h> 596#include <asm/paravirt.h>
569#else 597#else
@@ -761,10 +789,10 @@ extern char ignore_fpu_irq;
761#define ARCH_HAS_SPINLOCK_PREFETCH 789#define ARCH_HAS_SPINLOCK_PREFETCH
762 790
763#ifdef CONFIG_X86_32 791#ifdef CONFIG_X86_32
764# define BASE_PREFETCH ASM_NOP4 792# define BASE_PREFETCH ""
765# define ARCH_HAS_PREFETCH 793# define ARCH_HAS_PREFETCH
766#else 794#else
767# define BASE_PREFETCH "prefetcht0 (%1)" 795# define BASE_PREFETCH "prefetcht0 %P1"
768#endif 796#endif
769 797
770/* 798/*
@@ -775,10 +803,9 @@ extern char ignore_fpu_irq;
775 */ 803 */
776static inline void prefetch(const void *x) 804static inline void prefetch(const void *x)
777{ 805{
778 alternative_input(BASE_PREFETCH, 806 alternative_input(BASE_PREFETCH, "prefetchnta %P1",
779 "prefetchnta (%1)",
780 X86_FEATURE_XMM, 807 X86_FEATURE_XMM,
781 "r" (x)); 808 "m" (*(const char *)x));
782} 809}
783 810
784/* 811/*
@@ -788,10 +815,9 @@ static inline void prefetch(const void *x)
788 */ 815 */
789static inline void prefetchw(const void *x) 816static inline void prefetchw(const void *x)
790{ 817{
791 alternative_input(BASE_PREFETCH, 818 alternative_input(BASE_PREFETCH, "prefetchw %P1",
792 "prefetchw (%1)", 819 X86_FEATURE_3DNOWPREFETCH,
793 X86_FEATURE_3DNOW, 820 "m" (*(const char *)x));
794 "r" (x));
795} 821}
796 822
797static inline void spin_lock_prefetch(const void *x) 823static inline void spin_lock_prefetch(const void *x)
@@ -799,6 +825,9 @@ static inline void spin_lock_prefetch(const void *x)
799 prefetchw(x); 825 prefetchw(x);
800} 826}
801 827
828#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
829 TOP_OF_KERNEL_STACK_PADDING)
830
802#ifdef CONFIG_X86_32 831#ifdef CONFIG_X86_32
803/* 832/*
804 * User space process size: 3GB (default). 833 * User space process size: 3GB (default).
@@ -809,39 +838,16 @@ static inline void spin_lock_prefetch(const void *x)
809#define STACK_TOP_MAX STACK_TOP 838#define STACK_TOP_MAX STACK_TOP
810 839
811#define INIT_THREAD { \ 840#define INIT_THREAD { \
812 .sp0 = sizeof(init_stack) + (long)&init_stack, \ 841 .sp0 = TOP_OF_INIT_STACK, \
813 .vm86_info = NULL, \ 842 .vm86_info = NULL, \
814 .sysenter_cs = __KERNEL_CS, \ 843 .sysenter_cs = __KERNEL_CS, \
815 .io_bitmap_ptr = NULL, \ 844 .io_bitmap_ptr = NULL, \
816} 845}
817 846
818/*
819 * Note that the .io_bitmap member must be extra-big. This is because
820 * the CPU will access an additional byte beyond the end of the IO
821 * permission bitmap. The extra byte must be all 1 bits, and must
822 * be within the limit.
823 */
824#define INIT_TSS { \
825 .x86_tss = { \
826 .sp0 = sizeof(init_stack) + (long)&init_stack, \
827 .ss0 = __KERNEL_DS, \
828 .ss1 = __KERNEL_CS, \
829 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
830 }, \
831 .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \
832}
833
834extern unsigned long thread_saved_pc(struct task_struct *tsk); 847extern unsigned long thread_saved_pc(struct task_struct *tsk);
835 848
836#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long))
837#define KSTK_TOP(info) \
838({ \
839 unsigned long *__ptr = (unsigned long *)(info); \
840 (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \
841})
842
843/* 849/*
844 * The below -8 is to reserve 8 bytes on top of the ring0 stack. 850 * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
845 * This is necessary to guarantee that the entire "struct pt_regs" 851 * This is necessary to guarantee that the entire "struct pt_regs"
846 * is accessible even if the CPU haven't stored the SS/ESP registers 852 * is accessible even if the CPU haven't stored the SS/ESP registers
847 * on the stack (interrupt gate does not save these registers 853 * on the stack (interrupt gate does not save these registers
@@ -850,11 +856,11 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
850 * "struct pt_regs" is possible, but they may contain the 856 * "struct pt_regs" is possible, but they may contain the
851 * completely wrong values. 857 * completely wrong values.
852 */ 858 */
853#define task_pt_regs(task) \ 859#define task_pt_regs(task) \
854({ \ 860({ \
855 struct pt_regs *__regs__; \ 861 unsigned long __ptr = (unsigned long)task_stack_page(task); \
856 __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ 862 __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
857 __regs__ - 1; \ 863 ((struct pt_regs *)__ptr) - 1; \
858}) 864})
859 865
860#define KSTK_ESP(task) (task_pt_regs(task)->sp) 866#define KSTK_ESP(task) (task_pt_regs(task)->sp)
@@ -886,11 +892,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
886#define STACK_TOP_MAX TASK_SIZE_MAX 892#define STACK_TOP_MAX TASK_SIZE_MAX
887 893
888#define INIT_THREAD { \ 894#define INIT_THREAD { \
889 .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ 895 .sp0 = TOP_OF_INIT_STACK \
890}
891
892#define INIT_TSS { \
893 .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
894} 896}
895 897
896/* 898/*
@@ -902,11 +904,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
902#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) 904#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
903extern unsigned long KSTK_ESP(struct task_struct *task); 905extern unsigned long KSTK_ESP(struct task_struct *task);
904 906
905/*
906 * User space RSP while inside the SYSCALL fast path
907 */
908DECLARE_PER_CPU(unsigned long, old_rsp);
909
910#endif /* CONFIG_X86_64 */ 907#endif /* CONFIG_X86_64 */
911 908
912extern void start_thread(struct pt_regs *regs, unsigned long new_ip, 909extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 86fc2bb82287..19507ffa5d28 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -31,13 +31,17 @@ struct pt_regs {
31#else /* __i386__ */ 31#else /* __i386__ */
32 32
33struct pt_regs { 33struct pt_regs {
34/*
35 * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
36 * unless syscall needs a complete, fully filled "struct pt_regs".
37 */
34 unsigned long r15; 38 unsigned long r15;
35 unsigned long r14; 39 unsigned long r14;
36 unsigned long r13; 40 unsigned long r13;
37 unsigned long r12; 41 unsigned long r12;
38 unsigned long bp; 42 unsigned long bp;
39 unsigned long bx; 43 unsigned long bx;
40/* arguments: non interrupts/non tracing syscalls only save up to here*/ 44/* These regs are callee-clobbered. Always saved on kernel entry. */
41 unsigned long r11; 45 unsigned long r11;
42 unsigned long r10; 46 unsigned long r10;
43 unsigned long r9; 47 unsigned long r9;
@@ -47,9 +51,12 @@ struct pt_regs {
47 unsigned long dx; 51 unsigned long dx;
48 unsigned long si; 52 unsigned long si;
49 unsigned long di; 53 unsigned long di;
54/*
55 * On syscall entry, this is syscall#. On CPU exception, this is error code.
56 * On hw interrupt, it's IRQ number:
57 */
50 unsigned long orig_ax; 58 unsigned long orig_ax;
51/* end of arguments */ 59/* Return frame for iretq */
52/* cpu exception frame or undefined */
53 unsigned long ip; 60 unsigned long ip;
54 unsigned long cs; 61 unsigned long cs;
55 unsigned long flags; 62 unsigned long flags;
@@ -89,11 +96,13 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
89} 96}
90 97
91/* 98/*
92 * user_mode_vm(regs) determines whether a register set came from user mode. 99 * user_mode(regs) determines whether a register set came from user
93 * This is true if V8086 mode was enabled OR if the register set was from 100 * mode. On x86_32, this is true if V8086 mode was enabled OR if the
94 * protected mode with RPL-3 CS value. This tricky test checks that with 101 * register set was from protected mode with RPL-3 CS value. This
95 * one comparison. Many places in the kernel can bypass this full check 102 * tricky test checks that with one comparison.
96 * if they have already ruled out V8086 mode, so user_mode(regs) can be used. 103 *
104 * On x86_64, vm86 mode is mercifully nonexistent, and we don't need
105 * the extra check.
97 */ 106 */
98static inline int user_mode(struct pt_regs *regs) 107static inline int user_mode(struct pt_regs *regs)
99{ 108{
@@ -104,16 +113,6 @@ static inline int user_mode(struct pt_regs *regs)
104#endif 113#endif
105} 114}
106 115
107static inline int user_mode_vm(struct pt_regs *regs)
108{
109#ifdef CONFIG_X86_32
110 return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >=
111 USER_RPL;
112#else
113 return user_mode(regs);
114#endif
115}
116
117static inline int v8086_mode(struct pt_regs *regs) 116static inline int v8086_mode(struct pt_regs *regs)
118{ 117{
119#ifdef CONFIG_X86_32 118#ifdef CONFIG_X86_32
@@ -138,12 +137,8 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
138#endif 137#endif
139} 138}
140 139
141#define current_user_stack_pointer() this_cpu_read(old_rsp) 140#define current_user_stack_pointer() current_pt_regs()->sp
142/* ia32 vs. x32 difference */ 141#define compat_user_stack_pointer() current_pt_regs()->sp
143#define compat_user_stack_pointer() \
144 (test_thread_flag(TIF_IA32) \
145 ? current_pt_regs()->sp \
146 : this_cpu_read(old_rsp))
147#endif 142#endif
148 143
149#ifdef CONFIG_X86_32 144#ifdef CONFIG_X86_32
@@ -248,7 +243,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
248 */ 243 */
249#define arch_ptrace_stop_needed(code, info) \ 244#define arch_ptrace_stop_needed(code, info) \
250({ \ 245({ \
251 set_thread_flag(TIF_NOTIFY_RESUME); \ 246 force_iret(); \
252 false; \ 247 false; \
253}) 248})
254 249
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index db257a58571f..5a9856eb12ba 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -3,8 +3,10 @@
3 3
4#include <linux/const.h> 4#include <linux/const.h>
5 5
6/* Constructor for a conventional segment GDT (or LDT) entry */ 6/*
7/* This is a macro so it can be used in initializers */ 7 * Constructor for a conventional segment GDT (or LDT) entry.
8 * This is a macro so it can be used in initializers.
9 */
8#define GDT_ENTRY(flags, base, limit) \ 10#define GDT_ENTRY(flags, base, limit) \
9 ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \ 11 ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \
10 (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \ 12 (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \
@@ -12,198 +14,228 @@
12 (((base) & _AC(0x00ffffff,ULL)) << 16) | \ 14 (((base) & _AC(0x00ffffff,ULL)) << 16) | \
13 (((limit) & _AC(0x0000ffff,ULL)))) 15 (((limit) & _AC(0x0000ffff,ULL))))
14 16
15/* Simple and small GDT entries for booting only */ 17/* Simple and small GDT entries for booting only: */
16 18
17#define GDT_ENTRY_BOOT_CS 2 19#define GDT_ENTRY_BOOT_CS 2
18#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8) 20#define GDT_ENTRY_BOOT_DS 3
21#define GDT_ENTRY_BOOT_TSS 4
22#define __BOOT_CS (GDT_ENTRY_BOOT_CS*8)
23#define __BOOT_DS (GDT_ENTRY_BOOT_DS*8)
24#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS*8)
25
26/*
27 * Bottom two bits of selector give the ring
28 * privilege level
29 */
30#define SEGMENT_RPL_MASK 0x3
19 31
20#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1) 32/* User mode is privilege level 3: */
21#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8) 33#define USER_RPL 0x3
22 34
23#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2) 35/* Bit 2 is Table Indicator (TI): selects between LDT or GDT */
24#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8) 36#define SEGMENT_TI_MASK 0x4
37/* LDT segment has TI set ... */
38#define SEGMENT_LDT 0x4
39/* ... GDT has it cleared */
40#define SEGMENT_GDT 0x0
25 41
26#define SEGMENT_RPL_MASK 0x3 /* 42#define GDT_ENTRY_INVALID_SEG 0
27 * Bottom two bits of selector give the ring
28 * privilege level
29 */
30#define SEGMENT_TI_MASK 0x4 /* Bit 2 is table indicator (LDT/GDT) */
31#define USER_RPL 0x3 /* User mode is privilege level 3 */
32#define SEGMENT_LDT 0x4 /* LDT segment has TI set... */
33#define SEGMENT_GDT 0x0 /* ... GDT has it cleared */
34 43
35#ifdef CONFIG_X86_32 44#ifdef CONFIG_X86_32
36/* 45/*
37 * The layout of the per-CPU GDT under Linux: 46 * The layout of the per-CPU GDT under Linux:
38 * 47 *
39 * 0 - null 48 * 0 - null <=== cacheline #1
40 * 1 - reserved 49 * 1 - reserved
41 * 2 - reserved 50 * 2 - reserved
42 * 3 - reserved 51 * 3 - reserved
43 * 52 *
44 * 4 - unused <==== new cacheline 53 * 4 - unused <=== cacheline #2
45 * 5 - unused 54 * 5 - unused
46 * 55 *
47 * ------- start of TLS (Thread-Local Storage) segments: 56 * ------- start of TLS (Thread-Local Storage) segments:
48 * 57 *
49 * 6 - TLS segment #1 [ glibc's TLS segment ] 58 * 6 - TLS segment #1 [ glibc's TLS segment ]
50 * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] 59 * 7 - TLS segment #2 [ Wine's %fs Win32 segment ]
51 * 8 - TLS segment #3 60 * 8 - TLS segment #3 <=== cacheline #3
52 * 9 - reserved 61 * 9 - reserved
53 * 10 - reserved 62 * 10 - reserved
54 * 11 - reserved 63 * 11 - reserved
55 * 64 *
56 * ------- start of kernel segments: 65 * ------- start of kernel segments:
57 * 66 *
58 * 12 - kernel code segment <==== new cacheline 67 * 12 - kernel code segment <=== cacheline #4
59 * 13 - kernel data segment 68 * 13 - kernel data segment
60 * 14 - default user CS 69 * 14 - default user CS
61 * 15 - default user DS 70 * 15 - default user DS
62 * 16 - TSS 71 * 16 - TSS <=== cacheline #5
63 * 17 - LDT 72 * 17 - LDT
64 * 18 - PNPBIOS support (16->32 gate) 73 * 18 - PNPBIOS support (16->32 gate)
65 * 19 - PNPBIOS support 74 * 19 - PNPBIOS support
66 * 20 - PNPBIOS support 75 * 20 - PNPBIOS support <=== cacheline #6
67 * 21 - PNPBIOS support 76 * 21 - PNPBIOS support
68 * 22 - PNPBIOS support 77 * 22 - PNPBIOS support
69 * 23 - APM BIOS support 78 * 23 - APM BIOS support
70 * 24 - APM BIOS support 79 * 24 - APM BIOS support <=== cacheline #7
71 * 25 - APM BIOS support 80 * 25 - APM BIOS support
72 * 81 *
73 * 26 - ESPFIX small SS 82 * 26 - ESPFIX small SS
74 * 27 - per-cpu [ offset to per-cpu data area ] 83 * 27 - per-cpu [ offset to per-cpu data area ]
75 * 28 - stack_canary-20 [ for stack protector ] 84 * 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8
76 * 29 - unused 85 * 29 - unused
77 * 30 - unused 86 * 30 - unused
78 * 31 - TSS for double fault handler 87 * 31 - TSS for double fault handler
79 */ 88 */
80#define GDT_ENTRY_TLS_MIN 6 89#define GDT_ENTRY_TLS_MIN 6
81#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) 90#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
82 91
92#define GDT_ENTRY_KERNEL_CS 12
93#define GDT_ENTRY_KERNEL_DS 13
83#define GDT_ENTRY_DEFAULT_USER_CS 14 94#define GDT_ENTRY_DEFAULT_USER_CS 14
84
85#define GDT_ENTRY_DEFAULT_USER_DS 15 95#define GDT_ENTRY_DEFAULT_USER_DS 15
96#define GDT_ENTRY_TSS 16
97#define GDT_ENTRY_LDT 17
98#define GDT_ENTRY_PNPBIOS_CS32 18
99#define GDT_ENTRY_PNPBIOS_CS16 19
100#define GDT_ENTRY_PNPBIOS_DS 20
101#define GDT_ENTRY_PNPBIOS_TS1 21
102#define GDT_ENTRY_PNPBIOS_TS2 22
103#define GDT_ENTRY_APMBIOS_BASE 23
104
105#define GDT_ENTRY_ESPFIX_SS 26
106#define GDT_ENTRY_PERCPU 27
107#define GDT_ENTRY_STACK_CANARY 28
108
109#define GDT_ENTRY_DOUBLEFAULT_TSS 31
86 110
87#define GDT_ENTRY_KERNEL_BASE (12) 111/*
112 * Number of entries in the GDT table:
113 */
114#define GDT_ENTRIES 32
88 115
89#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE+0) 116/*
117 * Segment selector values corresponding to the above entries:
118 */
90 119
91#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE+1) 120#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
121#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
122#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
123#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
124#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)
92 125
93#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE+4) 126/* segment for calling fn: */
94#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE+5) 127#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32*8)
128/* code segment for BIOS: */
129#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16*8)
95 130
96#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE+6) 131/* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */
97#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE+11) 132#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == PNP_CS32)
98 133
99#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE+14) 134/* data segment for BIOS: */
100#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8) 135#define PNP_DS (GDT_ENTRY_PNPBIOS_DS*8)
136/* transfer data segment: */
137#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1*8)
138/* another data segment: */
139#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2*8)
101 140
102#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE+15)
103#ifdef CONFIG_SMP 141#ifdef CONFIG_SMP
104#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) 142# define __KERNEL_PERCPU (GDT_ENTRY_PERCPU*8)
105#else 143#else
106#define __KERNEL_PERCPU 0 144# define __KERNEL_PERCPU 0
107#endif 145#endif
108 146
109#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE+16)
110#ifdef CONFIG_CC_STACKPROTECTOR 147#ifdef CONFIG_CC_STACKPROTECTOR
111#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8) 148# define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
112#else 149#else
113#define __KERNEL_STACK_CANARY 0 150# define __KERNEL_STACK_CANARY 0
114#endif 151#endif
115 152
116#define GDT_ENTRY_DOUBLEFAULT_TSS 31 153#else /* 64-bit: */
117
118/*
119 * The GDT has 32 entries
120 */
121#define GDT_ENTRIES 32
122 154
123/* The PnP BIOS entries in the GDT */ 155#include <asm/cache.h>
124#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0)
125#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1)
126#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2)
127#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3)
128#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4)
129
130/* The PnP BIOS selectors */
131#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */
132#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */
133#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */
134#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */
135#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */
136 156
157#define GDT_ENTRY_KERNEL32_CS 1
158#define GDT_ENTRY_KERNEL_CS 2
159#define GDT_ENTRY_KERNEL_DS 3
137 160
138/* 161/*
139 * Matching rules for certain types of segments. 162 * We cannot use the same code segment descriptor for user and kernel mode,
163 * not even in long flat mode, because of different DPL.
164 *
165 * GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes
166 * selectors:
167 *
168 * if returning to 32-bit userspace: cs = STAR.SYSRET_CS,
169 * if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16,
170 *
171 * ss = STAR.SYSRET_CS+8 (in either case)
172 *
173 * thus USER_DS should be between 32-bit and 64-bit code selectors:
140 */ 174 */
175#define GDT_ENTRY_DEFAULT_USER32_CS 4
176#define GDT_ENTRY_DEFAULT_USER_DS 5
177#define GDT_ENTRY_DEFAULT_USER_CS 6
141 178
142/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ 179/* Needs two entries */
143#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) 180#define GDT_ENTRY_TSS 8
144 181/* Needs two entries */
182#define GDT_ENTRY_LDT 10
145 183
146#else 184#define GDT_ENTRY_TLS_MIN 12
147#include <asm/cache.h> 185#define GDT_ENTRY_TLS_MAX 14
148
149#define GDT_ENTRY_KERNEL32_CS 1
150#define GDT_ENTRY_KERNEL_CS 2
151#define GDT_ENTRY_KERNEL_DS 3
152 186
153#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8) 187/* Abused to load per CPU data from limit */
188#define GDT_ENTRY_PER_CPU 15
154 189
155/* 190/*
156 * we cannot use the same code segment descriptor for user and kernel 191 * Number of entries in the GDT table:
157 * -- not even in the long flat mode, because of different DPL /kkeil
158 * The segment offset needs to contain a RPL. Grr. -AK
159 * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
160 */ 192 */
161#define GDT_ENTRY_DEFAULT_USER32_CS 4 193#define GDT_ENTRIES 16
162#define GDT_ENTRY_DEFAULT_USER_DS 5
163#define GDT_ENTRY_DEFAULT_USER_CS 6
164#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8+3)
165#define __USER32_DS __USER_DS
166
167#define GDT_ENTRY_TSS 8 /* needs two entries */
168#define GDT_ENTRY_LDT 10 /* needs two entries */
169#define GDT_ENTRY_TLS_MIN 12
170#define GDT_ENTRY_TLS_MAX 14
171
172#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */
173#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3)
174 194
175/* TLS indexes for 64bit - hardcoded in arch_prctl */ 195/*
176#define FS_TLS 0 196 * Segment selector values corresponding to the above entries:
177#define GS_TLS 1 197 *
178 198 * Note, selectors also need to have a correct RPL,
179#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) 199 * expressed with the +3 value for user-space selectors:
180#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) 200 */
181 201#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS*8)
182#define GDT_ENTRIES 16 202#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
203#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
204#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3)
205#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
206#define __USER32_DS __USER_DS
207#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
208#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3)
209
210/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
211#define FS_TLS 0
212#define GS_TLS 1
213
214#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
215#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
183 216
184#endif 217#endif
185 218
186#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
187#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
188#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8+3)
189#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8+3)
190#ifndef CONFIG_PARAVIRT 219#ifndef CONFIG_PARAVIRT
191#define get_kernel_rpl() 0 220# define get_kernel_rpl() 0
192#endif 221#endif
193 222
194#define IDT_ENTRIES 256 223#define IDT_ENTRIES 256
195#define NUM_EXCEPTION_VECTORS 32 224#define NUM_EXCEPTION_VECTORS 32
196/* Bitmask of exception vectors which push an error code on the stack */ 225
197#define EXCEPTION_ERRCODE_MASK 0x00027d00 226/* Bitmask of exception vectors which push an error code on the stack: */
198#define GDT_SIZE (GDT_ENTRIES * 8) 227#define EXCEPTION_ERRCODE_MASK 0x00027d00
199#define GDT_ENTRY_TLS_ENTRIES 3 228
200#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) 229#define GDT_SIZE (GDT_ENTRIES*8)
230#define GDT_ENTRY_TLS_ENTRIES 3
231#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8)
201 232
202#ifdef __KERNEL__ 233#ifdef __KERNEL__
203#ifndef __ASSEMBLY__ 234#ifndef __ASSEMBLY__
235
204extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5]; 236extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
205#ifdef CONFIG_TRACING 237#ifdef CONFIG_TRACING
206#define trace_early_idt_handlers early_idt_handlers 238# define trace_early_idt_handlers early_idt_handlers
207#endif 239#endif
208 240
209/* 241/*
@@ -228,37 +260,30 @@ do { \
228} while (0) 260} while (0)
229 261
230/* 262/*
231 * Save a segment register away 263 * Save a segment register away:
232 */ 264 */
233#define savesegment(seg, value) \ 265#define savesegment(seg, value) \
234 asm("mov %%" #seg ",%0":"=r" (value) : : "memory") 266 asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
235 267
236/* 268/*
237 * x86_32 user gs accessors. 269 * x86-32 user GS accessors:
238 */ 270 */
239#ifdef CONFIG_X86_32 271#ifdef CONFIG_X86_32
240#ifdef CONFIG_X86_32_LAZY_GS 272# ifdef CONFIG_X86_32_LAZY_GS
241#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) 273# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
242#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) 274# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
243#define task_user_gs(tsk) ((tsk)->thread.gs) 275# define task_user_gs(tsk) ((tsk)->thread.gs)
244#define lazy_save_gs(v) savesegment(gs, (v)) 276# define lazy_save_gs(v) savesegment(gs, (v))
245#define lazy_load_gs(v) loadsegment(gs, (v)) 277# define lazy_load_gs(v) loadsegment(gs, (v))
246#else /* X86_32_LAZY_GS */ 278# else /* X86_32_LAZY_GS */
247#define get_user_gs(regs) (u16)((regs)->gs) 279# define get_user_gs(regs) (u16)((regs)->gs)
248#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) 280# define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
249#define task_user_gs(tsk) (task_pt_regs(tsk)->gs) 281# define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
250#define lazy_save_gs(v) do { } while (0) 282# define lazy_save_gs(v) do { } while (0)
251#define lazy_load_gs(v) do { } while (0) 283# define lazy_load_gs(v) do { } while (0)
252#endif /* X86_32_LAZY_GS */ 284# endif /* X86_32_LAZY_GS */
253#endif /* X86_32 */ 285#endif /* X86_32 */
254 286
255static inline unsigned long get_limit(unsigned long segment)
256{
257 unsigned long __limit;
258 asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
259 return __limit + 1;
260}
261
262#endif /* !__ASSEMBLY__ */ 287#endif /* !__ASSEMBLY__ */
263#endif /* __KERNEL__ */ 288#endif /* __KERNEL__ */
264 289
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ff4e7b236e21..f69e06b283fb 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -66,6 +66,11 @@ static inline void x86_ce4100_early_setup(void) { }
66 */ 66 */
67extern struct boot_params boot_params; 67extern struct boot_params boot_params;
68 68
69static inline bool kaslr_enabled(void)
70{
71 return !!(boot_params.hdr.loadflags & KASLR_FLAG);
72}
73
69/* 74/*
70 * Do NOT EVER look at the BIOS memory size location. 75 * Do NOT EVER look at the BIOS memory size location.
71 * It does not work on many machines. 76 * It does not work on many machines.
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 9dfce4e0417d..6fe6b182c998 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -57,9 +57,9 @@ struct sigcontext {
57 unsigned long ip; 57 unsigned long ip;
58 unsigned long flags; 58 unsigned long flags;
59 unsigned short cs; 59 unsigned short cs;
60 unsigned short gs; 60 unsigned short __pad2; /* Was called gs, but was always zero. */
61 unsigned short fs; 61 unsigned short __pad1; /* Was called fs, but was always zero. */
62 unsigned short __pad0; 62 unsigned short ss;
63 unsigned long err; 63 unsigned long err;
64 unsigned long trapno; 64 unsigned long trapno;
65 unsigned long oldmask; 65 unsigned long oldmask;
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 7a958164088c..89db46752a8f 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -13,9 +13,7 @@
13 X86_EFLAGS_CF | X86_EFLAGS_RF) 13 X86_EFLAGS_CF | X86_EFLAGS_RF)
14 14
15void signal_fault(struct pt_regs *regs, void __user *frame, char *where); 15void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
16 16int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
17int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
18 unsigned long *pax);
19int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, 17int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
20 struct pt_regs *regs, unsigned long mask); 18 struct pt_regs *regs, unsigned long mask);
21 19
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 8d3120f4e270..ba665ebd17bb 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -27,23 +27,11 @@
27 27
28#ifdef CONFIG_X86_SMAP 28#ifdef CONFIG_X86_SMAP
29 29
30#define ASM_CLAC \ 30#define ASM_CLAC \
31 661: ASM_NOP3 ; \ 31 ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
32 .pushsection .altinstr_replacement, "ax" ; \ 32
33 662: __ASM_CLAC ; \ 33#define ASM_STAC \
34 .popsection ; \ 34 ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
35 .pushsection .altinstructions, "a" ; \
36 altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
37 .popsection
38
39#define ASM_STAC \
40 661: ASM_NOP3 ; \
41 .pushsection .altinstr_replacement, "ax" ; \
42 662: __ASM_STAC ; \
43 .popsection ; \
44 .pushsection .altinstructions, "a" ; \
45 altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
46 .popsection
47 35
48#else /* CONFIG_X86_SMAP */ 36#else /* CONFIG_X86_SMAP */
49 37
@@ -61,20 +49,20 @@
61static __always_inline void clac(void) 49static __always_inline void clac(void)
62{ 50{
63 /* Note: a barrier is implicit in alternative() */ 51 /* Note: a barrier is implicit in alternative() */
64 alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP); 52 alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
65} 53}
66 54
67static __always_inline void stac(void) 55static __always_inline void stac(void)
68{ 56{
69 /* Note: a barrier is implicit in alternative() */ 57 /* Note: a barrier is implicit in alternative() */
70 alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP); 58 alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
71} 59}
72 60
73/* These macros can be used in asm() statements */ 61/* These macros can be used in asm() statements */
74#define ASM_CLAC \ 62#define ASM_CLAC \
75 ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP) 63 ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
76#define ASM_STAC \ 64#define ASM_STAC \
77 ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP) 65 ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
78 66
79#else /* CONFIG_X86_SMAP */ 67#else /* CONFIG_X86_SMAP */
80 68
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 8cd1cc3bc835..81d02fc7dafa 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -154,6 +154,7 @@ void cpu_die_common(unsigned int cpu);
154void native_smp_prepare_boot_cpu(void); 154void native_smp_prepare_boot_cpu(void);
155void native_smp_prepare_cpus(unsigned int max_cpus); 155void native_smp_prepare_cpus(unsigned int max_cpus);
156void native_smp_cpus_done(unsigned int max_cpus); 156void native_smp_cpus_done(unsigned int max_cpus);
157void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
157int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); 158int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
158int native_cpu_disable(void); 159int native_cpu_disable(void);
159void native_cpu_die(unsigned int cpu); 160void native_cpu_die(unsigned int cpu);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 6a4b00fafb00..aeb4666e0c0a 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -4,6 +4,8 @@
4 4
5#ifdef __KERNEL__ 5#ifdef __KERNEL__
6 6
7#include <asm/nops.h>
8
7static inline void native_clts(void) 9static inline void native_clts(void)
8{ 10{
9 asm volatile("clts"); 11 asm volatile("clts");
@@ -199,6 +201,28 @@ static inline void clflushopt(volatile void *__p)
199 "+m" (*(volatile char __force *)__p)); 201 "+m" (*(volatile char __force *)__p));
200} 202}
201 203
204static inline void clwb(volatile void *__p)
205{
206 volatile struct { char x[64]; } *p = __p;
207
208 asm volatile(ALTERNATIVE_2(
209 ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
210 ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
211 X86_FEATURE_CLFLUSHOPT,
212 ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */
213 X86_FEATURE_CLWB)
214 : [p] "+m" (*p)
215 : [pax] "a" (p));
216}
217
218static inline void pcommit_sfence(void)
219{
220 alternative(ASM_NOP7,
221 ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
222 "sfence",
223 X86_FEATURE_PCOMMIT);
224}
225
202#define nop() asm volatile ("nop") 226#define nop() asm volatile ("nop")
203 227
204 228
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 1d4e4f279a32..ea2dbe82cba3 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -13,6 +13,33 @@
13#include <asm/types.h> 13#include <asm/types.h>
14 14
15/* 15/*
16 * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
17 * reserve at the top of the kernel stack. We do it because of a nasty
18 * 32-bit corner case. On x86_32, the hardware stack frame is
19 * variable-length. Except for vm86 mode, struct pt_regs assumes a
20 * maximum-length frame. If we enter from CPL 0, the top 8 bytes of
21 * pt_regs don't actually exist. Ordinarily this doesn't matter, but it
22 * does in at least one case:
23 *
24 * If we take an NMI early enough in SYSENTER, then we can end up with
25 * pt_regs that extends above sp0. On the way out, in the espfix code,
26 * we can read the saved SS value, but that value will be above sp0.
27 * Without this offset, that can result in a page fault. (We are
28 * careful that, in this case, the value we read doesn't matter.)
29 *
30 * In vm86 mode, the hardware frame is much longer still, but we neither
31 * access the extra members from NMI context, nor do we write such a
32 * frame at sp0 at all.
33 *
34 * x86_64 has a fixed-length stack frame.
35 */
36#ifdef CONFIG_X86_32
37# define TOP_OF_KERNEL_STACK_PADDING 8
38#else
39# define TOP_OF_KERNEL_STACK_PADDING 0
40#endif
41
42/*
16 * low level task data that entry.S needs immediate access to 43 * low level task data that entry.S needs immediate access to
17 * - this struct should fit entirely inside of one cache line 44 * - this struct should fit entirely inside of one cache line
18 * - this struct shares the supervisor stack pages 45 * - this struct shares the supervisor stack pages
@@ -145,7 +172,6 @@ struct thread_info {
145#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) 172#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
146 173
147#define STACK_WARN (THREAD_SIZE/8) 174#define STACK_WARN (THREAD_SIZE/8)
148#define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8))
149 175
150/* 176/*
151 * macros/functions for gaining access to the thread information structure 177 * macros/functions for gaining access to the thread information structure
@@ -158,10 +184,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
158 184
159static inline struct thread_info *current_thread_info(void) 185static inline struct thread_info *current_thread_info(void)
160{ 186{
161 struct thread_info *ti; 187 return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
162 ti = (void *)(this_cpu_read_stable(kernel_stack) +
163 KERNEL_STACK_OFFSET - THREAD_SIZE);
164 return ti;
165} 188}
166 189
167static inline unsigned long current_stack_pointer(void) 190static inline unsigned long current_stack_pointer(void)
@@ -177,16 +200,37 @@ static inline unsigned long current_stack_pointer(void)
177 200
178#else /* !__ASSEMBLY__ */ 201#else /* !__ASSEMBLY__ */
179 202
180/* how to get the thread information struct from ASM */ 203/* Load thread_info address into "reg" */
181#define GET_THREAD_INFO(reg) \ 204#define GET_THREAD_INFO(reg) \
182 _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ 205 _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
183 _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ; 206 _ASM_SUB $(THREAD_SIZE),reg ;
184 207
185/* 208/*
186 * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in 209 * ASM operand which evaluates to a 'thread_info' address of
187 * a certain register (to be used in assembler memory operands). 210 * the current task, if it is known that "reg" is exactly "off"
211 * bytes below the top of the stack currently.
212 *
213 * ( The kernel stack's size is known at build time, it is usually
214 * 2 or 4 pages, and the bottom of the kernel stack contains
215 * the thread_info structure. So to access the thread_info very
216 * quickly from assembly code we can calculate down from the
217 * top of the kernel stack to the bottom, using constant,
218 * build-time calculations only. )
219 *
220 * For example, to fetch the current thread_info->flags value into %eax
221 * on x86-64 defconfig kernels, in syscall entry code where RSP is
222 * currently at exactly SIZEOF_PTREGS bytes away from the top of the
223 * stack:
224 *
225 * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
226 *
227 * will translate to:
228 *
229 * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax
230 *
231 * which is below the current RSP by almost 16K.
188 */ 232 */
189#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg) 233#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
190 234
191#endif 235#endif
192 236
@@ -236,6 +280,16 @@ static inline bool is_ia32_task(void)
236#endif 280#endif
237 return false; 281 return false;
238} 282}
283
284/*
285 * Force syscall return via IRET by making it look as if there was
286 * some work pending. IRET is our most capable (but slowest) syscall
287 * return path, which is able to restore modified SS, CS and certain
288 * EFLAGS values that other (fast) syscall return instructions
289 * are not able to restore properly.
290 */
291#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
292
239#endif /* !__ASSEMBLY__ */ 293#endif /* !__ASSEMBLY__ */
240 294
241#ifndef __ASSEMBLY__ 295#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 225b0988043a..ab456dc233b5 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -15,6 +15,7 @@
15 15
16/* loadflags */ 16/* loadflags */
17#define LOADED_HIGH (1<<0) 17#define LOADED_HIGH (1<<0)
18#define KASLR_FLAG (1<<1)
18#define QUIET_FLAG (1<<5) 19#define QUIET_FLAG (1<<5)
19#define KEEP_SEGMENTS (1<<6) 20#define KEEP_SEGMENTS (1<<6)
20#define CAN_USE_HEAP (1<<7) 21#define CAN_USE_HEAP (1<<7)
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 7b0a55a88851..580aee3072e0 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -25,13 +25,17 @@
25#else /* __i386__ */ 25#else /* __i386__ */
26 26
27#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) 27#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
28/*
29 * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
30 * unless syscall needs a complete, fully filled "struct pt_regs".
31 */
28#define R15 0 32#define R15 0
29#define R14 8 33#define R14 8
30#define R13 16 34#define R13 16
31#define R12 24 35#define R12 24
32#define RBP 32 36#define RBP 32
33#define RBX 40 37#define RBX 40
34/* arguments: interrupts/non tracing syscalls only save up to here*/ 38/* These regs are callee-clobbered. Always saved on kernel entry. */
35#define R11 48 39#define R11 48
36#define R10 56 40#define R10 56
37#define R9 64 41#define R9 64
@@ -41,15 +45,17 @@
41#define RDX 96 45#define RDX 96
42#define RSI 104 46#define RSI 104
43#define RDI 112 47#define RDI 112
44#define ORIG_RAX 120 /* = ERROR */ 48/*
45/* end of arguments */ 49 * On syscall entry, this is syscall#. On CPU exception, this is error code.
46/* cpu exception frame or undefined in case of fast syscall. */ 50 * On hw interrupt, it's IRQ number:
51 */
52#define ORIG_RAX 120
53/* Return frame for iretq */
47#define RIP 128 54#define RIP 128
48#define CS 136 55#define CS 136
49#define EFLAGS 144 56#define EFLAGS 144
50#define RSP 152 57#define RSP 152
51#define SS 160 58#define SS 160
52#define ARGOFFSET R11
53#endif /* __ASSEMBLY__ */ 59#endif /* __ASSEMBLY__ */
54 60
55/* top of stack page */ 61/* top of stack page */
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index ac4b9aa4d999..bc16115af39b 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -41,13 +41,17 @@ struct pt_regs {
41#ifndef __KERNEL__ 41#ifndef __KERNEL__
42 42
43struct pt_regs { 43struct pt_regs {
44/*
45 * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
46 * unless syscall needs a complete, fully filled "struct pt_regs".
47 */
44 unsigned long r15; 48 unsigned long r15;
45 unsigned long r14; 49 unsigned long r14;
46 unsigned long r13; 50 unsigned long r13;
47 unsigned long r12; 51 unsigned long r12;
48 unsigned long rbp; 52 unsigned long rbp;
49 unsigned long rbx; 53 unsigned long rbx;
50/* arguments: non interrupts/non tracing syscalls only save up to here*/ 54/* These regs are callee-clobbered. Always saved on kernel entry. */
51 unsigned long r11; 55 unsigned long r11;
52 unsigned long r10; 56 unsigned long r10;
53 unsigned long r9; 57 unsigned long r9;
@@ -57,9 +61,12 @@ struct pt_regs {
57 unsigned long rdx; 61 unsigned long rdx;
58 unsigned long rsi; 62 unsigned long rsi;
59 unsigned long rdi; 63 unsigned long rdi;
64/*
65 * On syscall entry, this is syscall#. On CPU exception, this is error code.
66 * On hw interrupt, it's IRQ number:
67 */
60 unsigned long orig_rax; 68 unsigned long orig_rax;
61/* end of arguments */ 69/* Return frame for iretq */
62/* cpu exception frame or undefined */
63 unsigned long rip; 70 unsigned long rip;
64 unsigned long cs; 71 unsigned long cs;
65 unsigned long eflags; 72 unsigned long eflags;
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index d8b9f9081e86..16dc4e8a2cd3 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -177,9 +177,24 @@ struct sigcontext {
177 __u64 rip; 177 __u64 rip;
178 __u64 eflags; /* RFLAGS */ 178 __u64 eflags; /* RFLAGS */
179 __u16 cs; 179 __u16 cs;
180 __u16 gs; 180
181 __u16 fs; 181 /*
182 __u16 __pad0; 182 * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
183 * Linux saved and restored fs and gs in these slots. This
184 * was counterproductive, as fsbase and gsbase were never
185 * saved, so arch_prctl was presumably unreliable.
186 *
187 * If these slots are ever needed for any other purpose, there
188 * is some risk that very old 64-bit binaries could get
189 * confused. I doubt that many such binaries still work,
190 * though, since the same patch in 2.5.64 also removed the
191 * 64-bit set_thread_area syscall, so it appears that there is
192 * no TLS API that works in both pre- and post-2.5.64 kernels.
193 */
194 __u16 __pad2; /* Was gs. */
195 __u16 __pad1; /* Was fs. */
196
197 __u16 ss;
183 __u64 err; 198 __u64 err;
184 __u64 trapno; 199 __u64 trapno;
185 __u64 oldmask; 200 __u64 oldmask;