aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/alternative-asm.h53
-rw-r--r--arch/x86/include/asm/alternative.h73
-rw-r--r--arch/x86/include/asm/apic.h3
-rw-r--r--arch/x86/include/asm/barrier.h6
-rw-r--r--arch/x86/include/asm/calling.h284
-rw-r--r--arch/x86/include/asm/compat.h2
-rw-r--r--arch/x86/include/asm/cpu.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h42
-rw-r--r--arch/x86/include/asm/desc.h7
-rw-r--r--arch/x86/include/asm/dwarf2.h24
-rw-r--r--arch/x86/include/asm/e820.h8
-rw-r--r--arch/x86/include/asm/efi.h6
-rw-r--r--arch/x86/include/asm/elf.h11
-rw-r--r--arch/x86/include/asm/fpu-internal.h130
-rw-r--r--arch/x86/include/asm/hw_irq.h5
-rw-r--r--arch/x86/include/asm/insn.h2
-rw-r--r--arch/x86/include/asm/iommu_table.h11
-rw-r--r--arch/x86/include/asm/irqflags.h49
-rw-r--r--arch/x86/include/asm/jump_label.h5
-rw-r--r--arch/x86/include/asm/kvm_host.h28
-rw-r--r--arch/x86/include/asm/kvm_para.h2
-rw-r--r--arch/x86/include/asm/livepatch.h4
-rw-r--r--arch/x86/include/asm/mce.h16
-rw-r--r--arch/x86/include/asm/microcode.h73
-rw-r--r--arch/x86/include/asm/microcode_intel.h13
-rw-r--r--arch/x86/include/asm/mwait.h8
-rw-r--r--arch/x86/include/asm/page_types.h2
-rw-r--r--arch/x86/include/asm/paravirt.h13
-rw-r--r--arch/x86/include/asm/paravirt_types.h8
-rw-r--r--arch/x86/include/asm/pgalloc.h8
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h1
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h2
-rw-r--r--arch/x86/include/asm/pgtable.h8
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h1
-rw-r--r--arch/x86/include/asm/pgtable_types.h4
-rw-r--r--arch/x86/include/asm/processor.h110
-rw-r--r--arch/x86/include/asm/ptrace.h45
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/include/asm/segment.h289
-rw-r--r--arch/x86/include/asm/setup.h5
-rw-r--r--arch/x86/include/asm/sigcontext.h6
-rw-r--r--arch/x86/include/asm/sighandling.h4
-rw-r--r--arch/x86/include/asm/smap.h30
-rw-r--r--arch/x86/include/asm/smp.h3
-rw-r--r--arch/x86/include/asm/special_insns.h24
-rw-r--r--arch/x86/include/asm/thread_info.h74
-rw-r--r--arch/x86/include/asm/uaccess_64.h2
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h1
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h18
-rw-r--r--arch/x86/include/uapi/asm/ptrace-abi.h16
-rw-r--r--arch/x86/include/uapi/asm/ptrace.h13
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h21
-rw-r--r--arch/x86/include/uapi/asm/vmx.h1
53 files changed, 953 insertions, 624 deletions
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 372231c22a47..bdf02eeee765 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -18,12 +18,63 @@
18 .endm 18 .endm
19#endif 19#endif
20 20
21.macro altinstruction_entry orig alt feature orig_len alt_len 21.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
22 .long \orig - . 22 .long \orig - .
23 .long \alt - . 23 .long \alt - .
24 .word \feature 24 .word \feature
25 .byte \orig_len 25 .byte \orig_len
26 .byte \alt_len 26 .byte \alt_len
27 .byte \pad_len
28.endm
29
30.macro ALTERNATIVE oldinstr, newinstr, feature
31140:
32 \oldinstr
33141:
34 .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
35142:
36
37 .pushsection .altinstructions,"a"
38 altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
39 .popsection
40
41 .pushsection .altinstr_replacement,"ax"
42143:
43 \newinstr
44144:
45 .popsection
46.endm
47
48#define old_len 141b-140b
49#define new_len1 144f-143f
50#define new_len2 145f-144f
51
52/*
53 * max without conditionals. Idea adapted from:
54 * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
55 */
56#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
57
58.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
59140:
60 \oldinstr
61141:
62 .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
63 (alt_max_short(new_len1, new_len2) - (old_len)),0x90
64142:
65
66 .pushsection .altinstructions,"a"
67 altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
68 altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
69 .popsection
70
71 .pushsection .altinstr_replacement,"ax"
72143:
73 \newinstr1
74144:
75 \newinstr2
76145:
77 .popsection
27.endm 78.endm
28 79
29#endif /* __ASSEMBLY__ */ 80#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 473bdbee378a..ba32af062f61 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -48,8 +48,9 @@ struct alt_instr {
48 s32 repl_offset; /* offset to replacement instruction */ 48 s32 repl_offset; /* offset to replacement instruction */
49 u16 cpuid; /* cpuid bit set for replacement */ 49 u16 cpuid; /* cpuid bit set for replacement */
50 u8 instrlen; /* length of original instruction */ 50 u8 instrlen; /* length of original instruction */
51 u8 replacementlen; /* length of new instruction, <= instrlen */ 51 u8 replacementlen; /* length of new instruction */
52}; 52 u8 padlen; /* length of build-time padding */
53} __packed;
53 54
54extern void alternative_instructions(void); 55extern void alternative_instructions(void);
55extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); 56extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
@@ -76,50 +77,69 @@ static inline int alternatives_text_reserved(void *start, void *end)
76} 77}
77#endif /* CONFIG_SMP */ 78#endif /* CONFIG_SMP */
78 79
79#define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n" 80#define b_replacement(num) "664"#num
81#define e_replacement(num) "665"#num
80 82
81#define b_replacement(number) "663"#number 83#define alt_end_marker "663"
82#define e_replacement(number) "664"#number 84#define alt_slen "662b-661b"
85#define alt_pad_len alt_end_marker"b-662b"
86#define alt_total_slen alt_end_marker"b-661b"
87#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
83 88
84#define alt_slen "662b-661b" 89#define __OLDINSTR(oldinstr, num) \
85#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f" 90 "661:\n\t" oldinstr "\n662:\n" \
91 ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
92 "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
86 93
87#define ALTINSTR_ENTRY(feature, number) \ 94#define OLDINSTR(oldinstr, num) \
95 __OLDINSTR(oldinstr, num) \
96 alt_end_marker ":\n"
97
98/*
99 * max without conditionals. Idea adapted from:
100 * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
101 *
102 * The additional "-" is needed because gas works with s32s.
103 */
104#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
105
106/*
107 * Pad the second replacement alternative with additional NOPs if it is
108 * additionally longer than the first replacement alternative.
109 */
110#define OLDINSTR_2(oldinstr, num1, num2) \
111 "661:\n\t" oldinstr "\n662:\n" \
112 ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
113 "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
114 alt_end_marker ":\n"
115
116#define ALTINSTR_ENTRY(feature, num) \
88 " .long 661b - .\n" /* label */ \ 117 " .long 661b - .\n" /* label */ \
89 " .long " b_replacement(number)"f - .\n" /* new instruction */ \ 118 " .long " b_replacement(num)"f - .\n" /* new instruction */ \
90 " .word " __stringify(feature) "\n" /* feature bit */ \ 119 " .word " __stringify(feature) "\n" /* feature bit */ \
91 " .byte " alt_slen "\n" /* source len */ \ 120 " .byte " alt_total_slen "\n" /* source len */ \
92 " .byte " alt_rlen(number) "\n" /* replacement len */ 121 " .byte " alt_rlen(num) "\n" /* replacement len */ \
93 122 " .byte " alt_pad_len "\n" /* pad len */
94#define DISCARD_ENTRY(number) /* rlen <= slen */ \
95 " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
96 123
97#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \ 124#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
98 b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t" 125 b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
99 126
100/* alternative assembly primitive: */ 127/* alternative assembly primitive: */
101#define ALTERNATIVE(oldinstr, newinstr, feature) \ 128#define ALTERNATIVE(oldinstr, newinstr, feature) \
102 OLDINSTR(oldinstr) \ 129 OLDINSTR(oldinstr, 1) \
103 ".pushsection .altinstructions,\"a\"\n" \ 130 ".pushsection .altinstructions,\"a\"\n" \
104 ALTINSTR_ENTRY(feature, 1) \ 131 ALTINSTR_ENTRY(feature, 1) \
105 ".popsection\n" \ 132 ".popsection\n" \
106 ".pushsection .discard,\"aw\",@progbits\n" \
107 DISCARD_ENTRY(1) \
108 ".popsection\n" \
109 ".pushsection .altinstr_replacement, \"ax\"\n" \ 133 ".pushsection .altinstr_replacement, \"ax\"\n" \
110 ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ 134 ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
111 ".popsection" 135 ".popsection"
112 136
113#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ 137#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
114 OLDINSTR(oldinstr) \ 138 OLDINSTR_2(oldinstr, 1, 2) \
115 ".pushsection .altinstructions,\"a\"\n" \ 139 ".pushsection .altinstructions,\"a\"\n" \
116 ALTINSTR_ENTRY(feature1, 1) \ 140 ALTINSTR_ENTRY(feature1, 1) \
117 ALTINSTR_ENTRY(feature2, 2) \ 141 ALTINSTR_ENTRY(feature2, 2) \
118 ".popsection\n" \ 142 ".popsection\n" \
119 ".pushsection .discard,\"aw\",@progbits\n" \
120 DISCARD_ENTRY(1) \
121 DISCARD_ENTRY(2) \
122 ".popsection\n" \
123 ".pushsection .altinstr_replacement, \"ax\"\n" \ 143 ".pushsection .altinstr_replacement, \"ax\"\n" \
124 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ 144 ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
125 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ 145 ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
@@ -146,6 +166,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
146#define alternative(oldinstr, newinstr, feature) \ 166#define alternative(oldinstr, newinstr, feature) \
147 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") 167 asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
148 168
169#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
170 asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
171
149/* 172/*
150 * Alternative inline assembly with input. 173 * Alternative inline assembly with input.
151 * 174 *
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index efc3b22d896e..976b86a325e5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
91{ 91{
92 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); 92 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
93 93
94 alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP, 94 alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
95 ASM_OUTPUT2("=r" (v), "=m" (*addr)), 95 ASM_OUTPUT2("=r" (v), "=m" (*addr)),
96 ASM_OUTPUT2("0" (v), "m" (*addr))); 96 ASM_OUTPUT2("0" (v), "m" (*addr)));
97} 97}
@@ -204,7 +204,6 @@ extern void clear_local_APIC(void);
204extern void disconnect_bsp_APIC(int virt_wire_setup); 204extern void disconnect_bsp_APIC(int virt_wire_setup);
205extern void disable_local_APIC(void); 205extern void disable_local_APIC(void);
206extern void lapic_shutdown(void); 206extern void lapic_shutdown(void);
207extern int verify_local_APIC(void);
208extern void sync_Arb_IDs(void); 207extern void sync_Arb_IDs(void);
209extern void init_bsp_APIC(void); 208extern void init_bsp_APIC(void);
210extern void setup_local_APIC(void); 209extern void setup_local_APIC(void);
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 2ab1eb33106e..959e45b81fe2 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -95,13 +95,11 @@ do { \
95 * Stop RDTSC speculation. This is needed when you need to use RDTSC 95 * Stop RDTSC speculation. This is needed when you need to use RDTSC
96 * (or get_cycles or vread that possibly accesses the TSC) in a defined 96 * (or get_cycles or vread that possibly accesses the TSC) in a defined
97 * code region. 97 * code region.
98 *
99 * (Could use an alternative three way for this if there was one.)
100 */ 98 */
101static __always_inline void rdtsc_barrier(void) 99static __always_inline void rdtsc_barrier(void)
102{ 100{
103 alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); 101 alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
104 alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); 102 "lfence", X86_FEATURE_LFENCE_RDTSC);
105} 103}
106 104
107#endif /* _ASM_X86_BARRIER_H */ 105#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 1f1297b46f83..1c8b50edb2db 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -55,143 +55,157 @@ For 32-bit we have the following conventions - kernel is built with
55 * for assembly code: 55 * for assembly code:
56 */ 56 */
57 57
58#define R15 0 58/* The layout forms the "struct pt_regs" on the stack: */
59#define R14 8 59/*
60#define R13 16 60 * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
61#define R12 24 61 * unless syscall needs a complete, fully filled "struct pt_regs".
62#define RBP 32 62 */
63#define RBX 40 63#define R15 0*8
64 64#define R14 1*8
65/* arguments: interrupts/non tracing syscalls only save up to here: */ 65#define R13 2*8
66#define R11 48 66#define R12 3*8
67#define R10 56 67#define RBP 4*8
68#define R9 64 68#define RBX 5*8
69#define R8 72 69/* These regs are callee-clobbered. Always saved on kernel entry. */
70#define RAX 80 70#define R11 6*8
71#define RCX 88 71#define R10 7*8
72#define RDX 96 72#define R9 8*8
73#define RSI 104 73#define R8 9*8
74#define RDI 112 74#define RAX 10*8
75#define ORIG_RAX 120 /* + error_code */ 75#define RCX 11*8
76/* end of arguments */ 76#define RDX 12*8
77 77#define RSI 13*8
78/* cpu exception frame or undefined in case of fast syscall: */ 78#define RDI 14*8
79#define RIP 128 79/*
80#define CS 136 80 * On syscall entry, this is syscall#. On CPU exception, this is error code.
81#define EFLAGS 144 81 * On hw interrupt, it's IRQ number:
82#define RSP 152 82 */
83#define SS 160 83#define ORIG_RAX 15*8
84 84/* Return frame for iretq */
85#define ARGOFFSET R11 85#define RIP 16*8
86 86#define CS 17*8
87 .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0 87#define EFLAGS 18*8
88 subq $9*8+\addskip, %rsp 88#define RSP 19*8
89 CFI_ADJUST_CFA_OFFSET 9*8+\addskip 89#define SS 20*8
90 movq_cfi rdi, 8*8 90
91 movq_cfi rsi, 7*8 91#define SIZEOF_PTREGS 21*8
92 movq_cfi rdx, 6*8 92
93 93 .macro ALLOC_PT_GPREGS_ON_STACK addskip=0
94 .if \save_rcx 94 subq $15*8+\addskip, %rsp
95 movq_cfi rcx, 5*8 95 CFI_ADJUST_CFA_OFFSET 15*8+\addskip
96 .endif 96 .endm
97 97
98 .if \rax_enosys 98 .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
99 movq $-ENOSYS, 4*8(%rsp) 99 .if \r11
100 .else 100 movq_cfi r11, 6*8+\offset
101 movq_cfi rax, 4*8
102 .endif 101 .endif
103 102 .if \r8910
104 .if \save_r891011 103 movq_cfi r10, 7*8+\offset
105 movq_cfi r8, 3*8 104 movq_cfi r9, 8*8+\offset
106 movq_cfi r9, 2*8 105 movq_cfi r8, 9*8+\offset
107 movq_cfi r10, 1*8 106 .endif
108 movq_cfi r11, 0*8 107 .if \rax
108 movq_cfi rax, 10*8+\offset
109 .endif
110 .if \rcx
111 movq_cfi rcx, 11*8+\offset
109 .endif 112 .endif
113 movq_cfi rdx, 12*8+\offset
114 movq_cfi rsi, 13*8+\offset
115 movq_cfi rdi, 14*8+\offset
116 .endm
117 .macro SAVE_C_REGS offset=0
118 SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
119 .endm
120 .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
121 SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
122 .endm
123 .macro SAVE_C_REGS_EXCEPT_R891011
124 SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
125 .endm
126 .macro SAVE_C_REGS_EXCEPT_RCX_R891011
127 SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
128 .endm
129 .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
130 SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
131 .endm
132
133 .macro SAVE_EXTRA_REGS offset=0
134 movq_cfi r15, 0*8+\offset
135 movq_cfi r14, 1*8+\offset
136 movq_cfi r13, 2*8+\offset
137 movq_cfi r12, 3*8+\offset
138 movq_cfi rbp, 4*8+\offset
139 movq_cfi rbx, 5*8+\offset
140 .endm
141 .macro SAVE_EXTRA_REGS_RBP offset=0
142 movq_cfi rbp, 4*8+\offset
143 .endm
110 144
145 .macro RESTORE_EXTRA_REGS offset=0
146 movq_cfi_restore 0*8+\offset, r15
147 movq_cfi_restore 1*8+\offset, r14
148 movq_cfi_restore 2*8+\offset, r13
149 movq_cfi_restore 3*8+\offset, r12
150 movq_cfi_restore 4*8+\offset, rbp
151 movq_cfi_restore 5*8+\offset, rbx
111 .endm 152 .endm
112 153
113#define ARG_SKIP (9*8) 154 .macro ZERO_EXTRA_REGS
155 xorl %r15d, %r15d
156 xorl %r14d, %r14d
157 xorl %r13d, %r13d
158 xorl %r12d, %r12d
159 xorl %ebp, %ebp
160 xorl %ebx, %ebx
161 .endm
114 162
115 .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \ 163 .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
116 rstor_r8910=1, rstor_rdx=1
117 .if \rstor_r11 164 .if \rstor_r11
118 movq_cfi_restore 0*8, r11 165 movq_cfi_restore 6*8, r11
119 .endif 166 .endif
120
121 .if \rstor_r8910 167 .if \rstor_r8910
122 movq_cfi_restore 1*8, r10 168 movq_cfi_restore 7*8, r10
123 movq_cfi_restore 2*8, r9 169 movq_cfi_restore 8*8, r9
124 movq_cfi_restore 3*8, r8 170 movq_cfi_restore 9*8, r8
125 .endif 171 .endif
126
127 .if \rstor_rax 172 .if \rstor_rax
128 movq_cfi_restore 4*8, rax 173 movq_cfi_restore 10*8, rax
129 .endif 174 .endif
130
131 .if \rstor_rcx 175 .if \rstor_rcx
132 movq_cfi_restore 5*8, rcx 176 movq_cfi_restore 11*8, rcx
133 .endif 177 .endif
134
135 .if \rstor_rdx 178 .if \rstor_rdx
136 movq_cfi_restore 6*8, rdx 179 movq_cfi_restore 12*8, rdx
137 .endif
138
139 movq_cfi_restore 7*8, rsi
140 movq_cfi_restore 8*8, rdi
141
142 .if ARG_SKIP+\addskip > 0
143 addq $ARG_SKIP+\addskip, %rsp
144 CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip)
145 .endif 180 .endif
181 movq_cfi_restore 13*8, rsi
182 movq_cfi_restore 14*8, rdi
146 .endm 183 .endm
147 184 .macro RESTORE_C_REGS
148 .macro LOAD_ARGS offset, skiprax=0 185 RESTORE_C_REGS_HELPER 1,1,1,1,1
149 movq \offset(%rsp), %r11
150 movq \offset+8(%rsp), %r10
151 movq \offset+16(%rsp), %r9
152 movq \offset+24(%rsp), %r8
153 movq \offset+40(%rsp), %rcx
154 movq \offset+48(%rsp), %rdx
155 movq \offset+56(%rsp), %rsi
156 movq \offset+64(%rsp), %rdi
157 .if \skiprax
158 .else
159 movq \offset+72(%rsp), %rax
160 .endif
161 .endm 186 .endm
162 187 .macro RESTORE_C_REGS_EXCEPT_RAX
163#define REST_SKIP (6*8) 188 RESTORE_C_REGS_HELPER 0,1,1,1,1
164
165 .macro SAVE_REST
166 subq $REST_SKIP, %rsp
167 CFI_ADJUST_CFA_OFFSET REST_SKIP
168 movq_cfi rbx, 5*8
169 movq_cfi rbp, 4*8
170 movq_cfi r12, 3*8
171 movq_cfi r13, 2*8
172 movq_cfi r14, 1*8
173 movq_cfi r15, 0*8
174 .endm 189 .endm
175 190 .macro RESTORE_C_REGS_EXCEPT_RCX
176 .macro RESTORE_REST 191 RESTORE_C_REGS_HELPER 1,0,1,1,1
177 movq_cfi_restore 0*8, r15
178 movq_cfi_restore 1*8, r14
179 movq_cfi_restore 2*8, r13
180 movq_cfi_restore 3*8, r12
181 movq_cfi_restore 4*8, rbp
182 movq_cfi_restore 5*8, rbx
183 addq $REST_SKIP, %rsp
184 CFI_ADJUST_CFA_OFFSET -(REST_SKIP)
185 .endm 192 .endm
186 193 .macro RESTORE_C_REGS_EXCEPT_R11
187 .macro SAVE_ALL 194 RESTORE_C_REGS_HELPER 1,1,0,1,1
188 SAVE_ARGS 195 .endm
189 SAVE_REST 196 .macro RESTORE_C_REGS_EXCEPT_RCX_R11
197 RESTORE_C_REGS_HELPER 1,0,0,1,1
198 .endm
199 .macro RESTORE_RSI_RDI
200 RESTORE_C_REGS_HELPER 0,0,0,0,0
201 .endm
202 .macro RESTORE_RSI_RDI_RDX
203 RESTORE_C_REGS_HELPER 0,0,0,0,1
190 .endm 204 .endm
191 205
192 .macro RESTORE_ALL addskip=0 206 .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
193 RESTORE_REST 207 addq $15*8+\addskip, %rsp
194 RESTORE_ARGS 1, \addskip 208 CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
195 .endm 209 .endm
196 210
197 .macro icebp 211 .macro icebp
@@ -210,37 +224,23 @@ For 32-bit we have the following conventions - kernel is built with
210 */ 224 */
211 225
212 .macro SAVE_ALL 226 .macro SAVE_ALL
213 pushl_cfi %eax 227 pushl_cfi_reg eax
214 CFI_REL_OFFSET eax, 0 228 pushl_cfi_reg ebp
215 pushl_cfi %ebp 229 pushl_cfi_reg edi
216 CFI_REL_OFFSET ebp, 0 230 pushl_cfi_reg esi
217 pushl_cfi %edi 231 pushl_cfi_reg edx
218 CFI_REL_OFFSET edi, 0 232 pushl_cfi_reg ecx
219 pushl_cfi %esi 233 pushl_cfi_reg ebx
220 CFI_REL_OFFSET esi, 0
221 pushl_cfi %edx
222 CFI_REL_OFFSET edx, 0
223 pushl_cfi %ecx
224 CFI_REL_OFFSET ecx, 0
225 pushl_cfi %ebx
226 CFI_REL_OFFSET ebx, 0
227 .endm 234 .endm
228 235
229 .macro RESTORE_ALL 236 .macro RESTORE_ALL
230 popl_cfi %ebx 237 popl_cfi_reg ebx
231 CFI_RESTORE ebx 238 popl_cfi_reg ecx
232 popl_cfi %ecx 239 popl_cfi_reg edx
233 CFI_RESTORE ecx 240 popl_cfi_reg esi
234 popl_cfi %edx 241 popl_cfi_reg edi
235 CFI_RESTORE edx 242 popl_cfi_reg ebp
236 popl_cfi %esi 243 popl_cfi_reg eax
237 CFI_RESTORE esi
238 popl_cfi %edi
239 CFI_RESTORE edi
240 popl_cfi %ebp
241 CFI_RESTORE ebp
242 popl_cfi %eax
243 CFI_RESTORE eax
244 .endm 244 .endm
245 245
246#endif /* CONFIG_X86_64 */ 246#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 59c6c401f79f..acdee09228b3 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -301,7 +301,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
301 sp = task_pt_regs(current)->sp; 301 sp = task_pt_regs(current)->sp;
302 } else { 302 } else {
303 /* -128 for the x32 ABI redzone */ 303 /* -128 for the x32 ABI redzone */
304 sp = this_cpu_read(old_rsp) - 128; 304 sp = task_pt_regs(current)->sp - 128;
305 } 305 }
306 306
307 return (void __user *)round_down(sp - len, 16); 307 return (void __user *)round_down(sp - len, 16);
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index d2b12988d2ed..bf2caa1dedc5 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -34,8 +34,6 @@ extern int _debug_hotplug_cpu(int cpu, int action);
34#endif 34#endif
35#endif 35#endif
36 36
37DECLARE_PER_CPU(int, cpu_state);
38
39int mwait_usable(const struct cpuinfo_x86 *); 37int mwait_usable(const struct cpuinfo_x86 *);
40 38
41#endif /* _ASM_X86_CPU_H */ 39#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 90a54851aedc..7ee9b94d9921 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -12,7 +12,7 @@
12#include <asm/disabled-features.h> 12#include <asm/disabled-features.h>
13#endif 13#endif
14 14
15#define NCAPINTS 11 /* N 32-bit words worth of info */ 15#define NCAPINTS 13 /* N 32-bit words worth of info */
16#define NBUGINTS 1 /* N 32-bit bug flags */ 16#define NBUGINTS 1 /* N 32-bit bug flags */
17 17
18/* 18/*
@@ -195,6 +195,7 @@
195#define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */ 195#define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */
196#define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */ 196#define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */
197#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */ 197#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */
198#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
198 199
199/* Virtualization flags: Linux defined, word 8 */ 200/* Virtualization flags: Linux defined, word 8 */
200#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ 201#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -226,12 +227,15 @@
226#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ 227#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
227#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ 228#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
228#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ 229#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
230#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
229#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ 231#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
230#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ 232#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
231#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ 233#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
232#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ 234#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
233#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ 235#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
236#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
234#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ 237#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
238#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
235#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ 239#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
236#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ 240#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
237#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ 241#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
@@ -242,6 +246,12 @@
242#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ 246#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
243#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ 247#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
244 248
249/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
250#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
251
252/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
253#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
254
245/* 255/*
246 * BUG word(s) 256 * BUG word(s)
247 */ 257 */
@@ -418,6 +428,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
418 " .word %P0\n" /* 1: do replace */ 428 " .word %P0\n" /* 1: do replace */
419 " .byte 2b - 1b\n" /* source len */ 429 " .byte 2b - 1b\n" /* source len */
420 " .byte 0\n" /* replacement len */ 430 " .byte 0\n" /* replacement len */
431 " .byte 0\n" /* pad len */
421 ".previous\n" 432 ".previous\n"
422 /* skipping size check since replacement size = 0 */ 433 /* skipping size check since replacement size = 0 */
423 : : "i" (X86_FEATURE_ALWAYS) : : t_warn); 434 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
@@ -432,6 +443,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
432 " .word %P0\n" /* feature bit */ 443 " .word %P0\n" /* feature bit */
433 " .byte 2b - 1b\n" /* source len */ 444 " .byte 2b - 1b\n" /* source len */
434 " .byte 0\n" /* replacement len */ 445 " .byte 0\n" /* replacement len */
446 " .byte 0\n" /* pad len */
435 ".previous\n" 447 ".previous\n"
436 /* skipping size check since replacement size = 0 */ 448 /* skipping size check since replacement size = 0 */
437 : : "i" (bit) : : t_no); 449 : : "i" (bit) : : t_no);
@@ -457,6 +469,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
457 " .word %P1\n" /* feature bit */ 469 " .word %P1\n" /* feature bit */
458 " .byte 2b - 1b\n" /* source len */ 470 " .byte 2b - 1b\n" /* source len */
459 " .byte 4f - 3f\n" /* replacement len */ 471 " .byte 4f - 3f\n" /* replacement len */
472 " .byte 0\n" /* pad len */
460 ".previous\n" 473 ".previous\n"
461 ".section .discard,\"aw\",@progbits\n" 474 ".section .discard,\"aw\",@progbits\n"
462 " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ 475 " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -483,31 +496,30 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
483static __always_inline __pure bool _static_cpu_has_safe(u16 bit) 496static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
484{ 497{
485#ifdef CC_HAVE_ASM_GOTO 498#ifdef CC_HAVE_ASM_GOTO
486/* 499 asm_volatile_goto("1: jmp %l[t_dynamic]\n"
487 * We need to spell the jumps to the compiler because, depending on the offset,
488 * the replacement jump can be bigger than the original jump, and this we cannot
489 * have. Thus, we force the jump to the widest, 4-byte, signed relative
490 * offset even though the last would often fit in less bytes.
491 */
492 asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
493 "2:\n" 500 "2:\n"
501 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
502 "((5f-4f) - (2b-1b)),0x90\n"
503 "3:\n"
494 ".section .altinstructions,\"a\"\n" 504 ".section .altinstructions,\"a\"\n"
495 " .long 1b - .\n" /* src offset */ 505 " .long 1b - .\n" /* src offset */
496 " .long 3f - .\n" /* repl offset */ 506 " .long 4f - .\n" /* repl offset */
497 " .word %P1\n" /* always replace */ 507 " .word %P1\n" /* always replace */
498 " .byte 2b - 1b\n" /* src len */ 508 " .byte 3b - 1b\n" /* src len */
499 " .byte 4f - 3f\n" /* repl len */ 509 " .byte 5f - 4f\n" /* repl len */
510 " .byte 3b - 2b\n" /* pad len */
500 ".previous\n" 511 ".previous\n"
501 ".section .altinstr_replacement,\"ax\"\n" 512 ".section .altinstr_replacement,\"ax\"\n"
502 "3: .byte 0xe9\n .long %l[t_no] - 2b\n" 513 "4: jmp %l[t_no]\n"
503 "4:\n" 514 "5:\n"
504 ".previous\n" 515 ".previous\n"
505 ".section .altinstructions,\"a\"\n" 516 ".section .altinstructions,\"a\"\n"
506 " .long 1b - .\n" /* src offset */ 517 " .long 1b - .\n" /* src offset */
507 " .long 0\n" /* no replacement */ 518 " .long 0\n" /* no replacement */
508 " .word %P0\n" /* feature bit */ 519 " .word %P0\n" /* feature bit */
509 " .byte 2b - 1b\n" /* src len */ 520 " .byte 3b - 1b\n" /* src len */
510 " .byte 0\n" /* repl len */ 521 " .byte 0\n" /* repl len */
522 " .byte 0\n" /* pad len */
511 ".previous\n" 523 ".previous\n"
512 : : "i" (bit), "i" (X86_FEATURE_ALWAYS) 524 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
513 : : t_dynamic, t_no); 525 : : t_dynamic, t_no);
@@ -527,6 +539,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
527 " .word %P2\n" /* always replace */ 539 " .word %P2\n" /* always replace */
528 " .byte 2b - 1b\n" /* source len */ 540 " .byte 2b - 1b\n" /* source len */
529 " .byte 4f - 3f\n" /* replacement len */ 541 " .byte 4f - 3f\n" /* replacement len */
542 " .byte 0\n" /* pad len */
530 ".previous\n" 543 ".previous\n"
531 ".section .discard,\"aw\",@progbits\n" 544 ".section .discard,\"aw\",@progbits\n"
532 " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ 545 " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -541,6 +554,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
541 " .word %P1\n" /* feature bit */ 554 " .word %P1\n" /* feature bit */
542 " .byte 4b - 3b\n" /* src len */ 555 " .byte 4b - 3b\n" /* src len */
543 " .byte 6f - 5f\n" /* repl len */ 556 " .byte 6f - 5f\n" /* repl len */
557 " .byte 0\n" /* pad len */
544 ".previous\n" 558 ".previous\n"
545 ".section .discard,\"aw\",@progbits\n" 559 ".section .discard,\"aw\",@progbits\n"
546 " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */ 560 " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index a94b82e8f156..a0bf89fd2647 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -376,11 +376,16 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
376 * Pentium F0 0F bugfix can have resulted in the mapped 376 * Pentium F0 0F bugfix can have resulted in the mapped
377 * IDT being write-protected. 377 * IDT being write-protected.
378 */ 378 */
379#define set_intr_gate(n, addr) \ 379#define set_intr_gate_notrace(n, addr) \
380 do { \ 380 do { \
381 BUG_ON((unsigned)n > 0xFF); \ 381 BUG_ON((unsigned)n > 0xFF); \
382 _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \ 382 _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \
383 __KERNEL_CS); \ 383 __KERNEL_CS); \
384 } while (0)
385
386#define set_intr_gate(n, addr) \
387 do { \
388 set_intr_gate_notrace(n, addr); \
384 _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\ 389 _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
385 0, 0, __KERNEL_CS); \ 390 0, 0, __KERNEL_CS); \
386 } while (0) 391 } while (0)
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index f6f15986df6c..de1cdaf4d743 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -86,11 +86,23 @@
86 CFI_ADJUST_CFA_OFFSET 8 86 CFI_ADJUST_CFA_OFFSET 8
87 .endm 87 .endm
88 88
89 .macro pushq_cfi_reg reg
90 pushq %\reg
91 CFI_ADJUST_CFA_OFFSET 8
92 CFI_REL_OFFSET \reg, 0
93 .endm
94
89 .macro popq_cfi reg 95 .macro popq_cfi reg
90 popq \reg 96 popq \reg
91 CFI_ADJUST_CFA_OFFSET -8 97 CFI_ADJUST_CFA_OFFSET -8
92 .endm 98 .endm
93 99
100 .macro popq_cfi_reg reg
101 popq %\reg
102 CFI_ADJUST_CFA_OFFSET -8
103 CFI_RESTORE \reg
104 .endm
105
94 .macro pushfq_cfi 106 .macro pushfq_cfi
95 pushfq 107 pushfq
96 CFI_ADJUST_CFA_OFFSET 8 108 CFI_ADJUST_CFA_OFFSET 8
@@ -116,11 +128,23 @@
116 CFI_ADJUST_CFA_OFFSET 4 128 CFI_ADJUST_CFA_OFFSET 4
117 .endm 129 .endm
118 130
131 .macro pushl_cfi_reg reg
132 pushl %\reg
133 CFI_ADJUST_CFA_OFFSET 4
134 CFI_REL_OFFSET \reg, 0
135 .endm
136
119 .macro popl_cfi reg 137 .macro popl_cfi reg
120 popl \reg 138 popl \reg
121 CFI_ADJUST_CFA_OFFSET -4 139 CFI_ADJUST_CFA_OFFSET -4
122 .endm 140 .endm
123 141
142 .macro popl_cfi_reg reg
143 popl %\reg
144 CFI_ADJUST_CFA_OFFSET -4
145 CFI_RESTORE \reg
146 .endm
147
124 .macro pushfl_cfi 148 .macro pushfl_cfi
125 pushfl 149 pushfl
126 CFI_ADJUST_CFA_OFFSET 4 150 CFI_ADJUST_CFA_OFFSET 4
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 779c2efe2e97..3ab0537872fb 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -40,14 +40,6 @@ static inline void e820_mark_nosave_regions(unsigned long limit_pfn)
40} 40}
41#endif 41#endif
42 42
43#ifdef CONFIG_MEMTEST
44extern void early_memtest(unsigned long start, unsigned long end);
45#else
46static inline void early_memtest(unsigned long start, unsigned long end)
47{
48}
49#endif
50
51extern unsigned long e820_end_of_ram_pfn(void); 43extern unsigned long e820_end_of_ram_pfn(void);
52extern unsigned long e820_end_of_low_ram_pfn(void); 44extern unsigned long e820_end_of_low_ram_pfn(void);
53extern u64 early_reserve_e820(u64 sizet, u64 align); 45extern u64 early_reserve_e820(u64 sizet, u64 align);
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 25bce45c6fc4..3738b138b843 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -2,6 +2,8 @@
2#define _ASM_X86_EFI_H 2#define _ASM_X86_EFI_H
3 3
4#include <asm/i387.h> 4#include <asm/i387.h>
5#include <asm/pgtable.h>
6
5/* 7/*
6 * We map the EFI regions needed for runtime services non-contiguously, 8 * We map the EFI regions needed for runtime services non-contiguously,
7 * with preserved alignment on virtual addresses starting from -4G down 9 * with preserved alignment on virtual addresses starting from -4G down
@@ -89,8 +91,8 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
89extern struct efi_scratch efi_scratch; 91extern struct efi_scratch efi_scratch;
90extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable); 92extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
91extern int __init efi_memblock_x86_reserve_range(void); 93extern int __init efi_memblock_x86_reserve_range(void);
92extern void __init efi_call_phys_prolog(void); 94extern pgd_t * __init efi_call_phys_prolog(void);
93extern void __init efi_call_phys_epilog(void); 95extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
94extern void __init efi_unmap_memmap(void); 96extern void __init efi_unmap_memmap(void);
95extern void __init efi_memory_uc(u64 addr, unsigned long size); 97extern void __init efi_memory_uc(u64 addr, unsigned long size);
96extern void __init efi_map_region(efi_memory_desc_t *md); 98extern void __init efi_map_region(efi_memory_desc_t *md);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index ca3347a9dab5..f161c189c27b 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -171,10 +171,11 @@ do { \
171static inline void elf_common_init(struct thread_struct *t, 171static inline void elf_common_init(struct thread_struct *t,
172 struct pt_regs *regs, const u16 ds) 172 struct pt_regs *regs, const u16 ds)
173{ 173{
174 regs->ax = regs->bx = regs->cx = regs->dx = 0; 174 /* Commented-out registers are cleared in stub_execve */
175 regs->si = regs->di = regs->bp = 0; 175 /*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0;
176 regs->si = regs->di /*= regs->bp*/ = 0;
176 regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0; 177 regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
177 regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; 178 /*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/
178 t->fs = t->gs = 0; 179 t->fs = t->gs = 0;
179 t->fsindex = t->gsindex = 0; 180 t->fsindex = t->gsindex = 0;
180 t->ds = t->es = ds; 181 t->ds = t->es = ds;
@@ -338,9 +339,6 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
338 int uses_interp); 339 int uses_interp);
339#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages 340#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
340 341
341extern unsigned long arch_randomize_brk(struct mm_struct *mm);
342#define arch_randomize_brk arch_randomize_brk
343
344/* 342/*
345 * True on X86_32 or when emulating IA32 on X86_64 343 * True on X86_32 or when emulating IA32 on X86_64
346 */ 344 */
@@ -365,6 +363,7 @@ enum align_flags {
365struct va_alignment { 363struct va_alignment {
366 int flags; 364 int flags;
367 unsigned long mask; 365 unsigned long mask;
366 unsigned long bits;
368} ____cacheline_aligned; 367} ____cacheline_aligned;
369 368
370extern struct va_alignment va_align; 369extern struct va_alignment va_align;
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 72ba21a8b5fc..da5e96756570 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -67,6 +67,34 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft);
67static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} 67static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
68#endif 68#endif
69 69
70/*
71 * Must be run with preemption disabled: this clears the fpu_owner_task,
72 * on this CPU.
73 *
74 * This will disable any lazy FPU state restore of the current FPU state,
75 * but if the current thread owns the FPU, it will still be saved by.
76 */
77static inline void __cpu_disable_lazy_restore(unsigned int cpu)
78{
79 per_cpu(fpu_owner_task, cpu) = NULL;
80}
81
82/*
83 * Used to indicate that the FPU state in memory is newer than the FPU
84 * state in registers, and the FPU state should be reloaded next time the
85 * task is run. Only safe on the current task, or non-running tasks.
86 */
87static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
88{
89 tsk->thread.fpu.last_cpu = ~0;
90}
91
92static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
93{
94 return new == this_cpu_read_stable(fpu_owner_task) &&
95 cpu == new->thread.fpu.last_cpu;
96}
97
70static inline int is_ia32_compat_frame(void) 98static inline int is_ia32_compat_frame(void)
71{ 99{
72 return config_enabled(CONFIG_IA32_EMULATION) && 100 return config_enabled(CONFIG_IA32_EMULATION) &&
@@ -107,7 +135,6 @@ static __always_inline __pure bool use_fxsr(void)
107 135
108static inline void fx_finit(struct i387_fxsave_struct *fx) 136static inline void fx_finit(struct i387_fxsave_struct *fx)
109{ 137{
110 memset(fx, 0, xstate_size);
111 fx->cwd = 0x37f; 138 fx->cwd = 0x37f;
112 fx->mxcsr = MXCSR_DEFAULT; 139 fx->mxcsr = MXCSR_DEFAULT;
113} 140}
@@ -351,8 +378,14 @@ static inline void __thread_fpu_begin(struct task_struct *tsk)
351 __thread_set_has_fpu(tsk); 378 __thread_set_has_fpu(tsk);
352} 379}
353 380
354static inline void __drop_fpu(struct task_struct *tsk) 381static inline void drop_fpu(struct task_struct *tsk)
355{ 382{
383 /*
384 * Forget coprocessor state..
385 */
386 preempt_disable();
387 tsk->thread.fpu_counter = 0;
388
356 if (__thread_has_fpu(tsk)) { 389 if (__thread_has_fpu(tsk)) {
357 /* Ignore delayed exceptions from user space */ 390 /* Ignore delayed exceptions from user space */
358 asm volatile("1: fwait\n" 391 asm volatile("1: fwait\n"
@@ -360,30 +393,29 @@ static inline void __drop_fpu(struct task_struct *tsk)
360 _ASM_EXTABLE(1b, 2b)); 393 _ASM_EXTABLE(1b, 2b));
361 __thread_fpu_end(tsk); 394 __thread_fpu_end(tsk);
362 } 395 }
363}
364 396
365static inline void drop_fpu(struct task_struct *tsk)
366{
367 /*
368 * Forget coprocessor state..
369 */
370 preempt_disable();
371 tsk->thread.fpu_counter = 0;
372 __drop_fpu(tsk);
373 clear_stopped_child_used_math(tsk); 397 clear_stopped_child_used_math(tsk);
374 preempt_enable(); 398 preempt_enable();
375} 399}
376 400
377static inline void drop_init_fpu(struct task_struct *tsk) 401static inline void restore_init_xstate(void)
402{
403 if (use_xsave())
404 xrstor_state(init_xstate_buf, -1);
405 else
406 fxrstor_checking(&init_xstate_buf->i387);
407}
408
409/*
410 * Reset the FPU state in the eager case and drop it in the lazy case (later use
411 * will reinit it).
412 */
413static inline void fpu_reset_state(struct task_struct *tsk)
378{ 414{
379 if (!use_eager_fpu()) 415 if (!use_eager_fpu())
380 drop_fpu(tsk); 416 drop_fpu(tsk);
381 else { 417 else
382 if (use_xsave()) 418 restore_init_xstate();
383 xrstor_state(init_xstate_buf, -1);
384 else
385 fxrstor_checking(&init_xstate_buf->i387);
386 }
387} 419}
388 420
389/* 421/*
@@ -400,24 +432,6 @@ static inline void drop_init_fpu(struct task_struct *tsk)
400 */ 432 */
401typedef struct { int preload; } fpu_switch_t; 433typedef struct { int preload; } fpu_switch_t;
402 434
403/*
404 * Must be run with preemption disabled: this clears the fpu_owner_task,
405 * on this CPU.
406 *
407 * This will disable any lazy FPU state restore of the current FPU state,
408 * but if the current thread owns the FPU, it will still be saved by.
409 */
410static inline void __cpu_disable_lazy_restore(unsigned int cpu)
411{
412 per_cpu(fpu_owner_task, cpu) = NULL;
413}
414
415static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
416{
417 return new == this_cpu_read_stable(fpu_owner_task) &&
418 cpu == new->thread.fpu.last_cpu;
419}
420
421static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) 435static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
422{ 436{
423 fpu_switch_t fpu; 437 fpu_switch_t fpu;
@@ -426,13 +440,17 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
426 * If the task has used the math, pre-load the FPU on xsave processors 440 * If the task has used the math, pre-load the FPU on xsave processors
427 * or if the past 5 consecutive context-switches used math. 441 * or if the past 5 consecutive context-switches used math.
428 */ 442 */
429 fpu.preload = tsk_used_math(new) && (use_eager_fpu() || 443 fpu.preload = tsk_used_math(new) &&
430 new->thread.fpu_counter > 5); 444 (use_eager_fpu() || new->thread.fpu_counter > 5);
445
431 if (__thread_has_fpu(old)) { 446 if (__thread_has_fpu(old)) {
432 if (!__save_init_fpu(old)) 447 if (!__save_init_fpu(old))
433 cpu = ~0; 448 task_disable_lazy_fpu_restore(old);
434 old->thread.fpu.last_cpu = cpu; 449 else
435 old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ 450 old->thread.fpu.last_cpu = cpu;
451
452 /* But leave fpu_owner_task! */
453 old->thread.fpu.has_fpu = 0;
436 454
437 /* Don't change CR0.TS if we just switch! */ 455 /* Don't change CR0.TS if we just switch! */
438 if (fpu.preload) { 456 if (fpu.preload) {
@@ -443,10 +461,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
443 stts(); 461 stts();
444 } else { 462 } else {
445 old->thread.fpu_counter = 0; 463 old->thread.fpu_counter = 0;
446 old->thread.fpu.last_cpu = ~0; 464 task_disable_lazy_fpu_restore(old);
447 if (fpu.preload) { 465 if (fpu.preload) {
448 new->thread.fpu_counter++; 466 new->thread.fpu_counter++;
449 if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) 467 if (fpu_lazy_restore(new, cpu))
450 fpu.preload = 0; 468 fpu.preload = 0;
451 else 469 else
452 prefetch(new->thread.fpu.state); 470 prefetch(new->thread.fpu.state);
@@ -466,7 +484,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
466{ 484{
467 if (fpu.preload) { 485 if (fpu.preload) {
468 if (unlikely(restore_fpu_checking(new))) 486 if (unlikely(restore_fpu_checking(new)))
469 drop_init_fpu(new); 487 fpu_reset_state(new);
470 } 488 }
471} 489}
472 490
@@ -495,10 +513,12 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
495} 513}
496 514
497/* 515/*
498 * Need to be preemption-safe. 516 * Needs to be preemption-safe.
499 * 517 *
500 * NOTE! user_fpu_begin() must be used only immediately before restoring 518 * NOTE! user_fpu_begin() must be used only immediately before restoring
501 * it. This function does not do any save/restore on their own. 519 * the save state. It does not do any saving/restoring on its own. In
520 * lazy FPU mode, it is just an optimization to avoid a #NM exception,
521 * the task can lose the FPU right after preempt_enable().
502 */ 522 */
503static inline void user_fpu_begin(void) 523static inline void user_fpu_begin(void)
504{ 524{
@@ -520,24 +540,6 @@ static inline void __save_fpu(struct task_struct *tsk)
520} 540}
521 541
522/* 542/*
523 * These disable preemption on their own and are safe
524 */
525static inline void save_init_fpu(struct task_struct *tsk)
526{
527 WARN_ON_ONCE(!__thread_has_fpu(tsk));
528
529 if (use_eager_fpu()) {
530 __save_fpu(tsk);
531 return;
532 }
533
534 preempt_disable();
535 __save_init_fpu(tsk);
536 __thread_fpu_end(tsk);
537 preempt_enable();
538}
539
540/*
541 * i387 state interaction 543 * i387 state interaction
542 */ 544 */
543static inline unsigned short get_fpu_cwd(struct task_struct *tsk) 545static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 9662290e0b20..e9571ddabc4f 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
181extern __visible void smp_invalidate_interrupt(struct pt_regs *); 181extern __visible void smp_invalidate_interrupt(struct pt_regs *);
182#endif 182#endif
183 183
184extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR 184extern char irq_entries_start[];
185 - FIRST_EXTERNAL_VECTOR])(void);
186#ifdef CONFIG_TRACING 185#ifdef CONFIG_TRACING
187#define trace_interrupt interrupt 186#define trace_irq_entries_start irq_entries_start
188#endif 187#endif
189 188
190#define VECTOR_UNDEFINED (-1) 189#define VECTOR_UNDEFINED (-1)
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 47f29b1d1846..e7814b74caf8 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -69,7 +69,7 @@ struct insn {
69 const insn_byte_t *next_byte; 69 const insn_byte_t *next_byte;
70}; 70};
71 71
72#define MAX_INSN_SIZE 16 72#define MAX_INSN_SIZE 15
73 73
74#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) 74#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
75#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) 75#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h
index f42a04735a0a..e37d6b3ad983 100644
--- a/arch/x86/include/asm/iommu_table.h
+++ b/arch/x86/include/asm/iommu_table.h
@@ -79,11 +79,12 @@ struct iommu_table_entry {
79 * d). Similar to the 'init', except that this gets called from pci_iommu_init 79 * d). Similar to the 'init', except that this gets called from pci_iommu_init
80 * where we do have a memory allocator. 80 * where we do have a memory allocator.
81 * 81 *
82 * The standard vs the _FINISH differs in that the _FINISH variant will 82 * The standard IOMMU_INIT differs from the IOMMU_INIT_FINISH variant
83 * continue detecting other IOMMUs in the call list after the 83 * in that the former will continue detecting other IOMMUs in the call
84 * the detection routine returns a positive number. The _FINISH will 84 * list after the detection routine returns a positive number, while the
85 * stop the execution chain. Both will still call the 'init' and 85 * latter will stop the execution chain upon first successful detection.
86 * 'late_init' functions if they are set. 86 * Both variants will still call the 'init' and 'late_init' functions if
87 * they are set.
87 */ 88 */
88#define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init) \ 89#define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init) \
89 __IOMMU_INIT(_detect, _depend, _init, _late_init, 1) 90 __IOMMU_INIT(_detect, _depend, _init, _late_init, 1)
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 0a8b519226b8..b77f5edb03b0 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -136,10 +136,6 @@ static inline notrace unsigned long arch_local_irq_save(void)
136#define USERGS_SYSRET32 \ 136#define USERGS_SYSRET32 \
137 swapgs; \ 137 swapgs; \
138 sysretl 138 sysretl
139#define ENABLE_INTERRUPTS_SYSEXIT32 \
140 swapgs; \
141 sti; \
142 sysexit
143 139
144#else 140#else
145#define INTERRUPT_RETURN iret 141#define INTERRUPT_RETURN iret
@@ -163,22 +159,27 @@ static inline int arch_irqs_disabled(void)
163 159
164 return arch_irqs_disabled_flags(flags); 160 return arch_irqs_disabled_flags(flags);
165} 161}
162#endif /* !__ASSEMBLY__ */
166 163
164#ifdef __ASSEMBLY__
165#ifdef CONFIG_TRACE_IRQFLAGS
166# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
167# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
167#else 168#else
168 169# define TRACE_IRQS_ON
169#ifdef CONFIG_X86_64 170# define TRACE_IRQS_OFF
170#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk 171#endif
171#define ARCH_LOCKDEP_SYS_EXIT_IRQ \ 172#ifdef CONFIG_DEBUG_LOCK_ALLOC
173# ifdef CONFIG_X86_64
174# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
175# define LOCKDEP_SYS_EXIT_IRQ \
172 TRACE_IRQS_ON; \ 176 TRACE_IRQS_ON; \
173 sti; \ 177 sti; \
174 SAVE_REST; \ 178 call lockdep_sys_exit_thunk; \
175 LOCKDEP_SYS_EXIT; \
176 RESTORE_REST; \
177 cli; \ 179 cli; \
178 TRACE_IRQS_OFF; 180 TRACE_IRQS_OFF;
179 181# else
180#else 182# define LOCKDEP_SYS_EXIT \
181#define ARCH_LOCKDEP_SYS_EXIT \
182 pushl %eax; \ 183 pushl %eax; \
183 pushl %ecx; \ 184 pushl %ecx; \
184 pushl %edx; \ 185 pushl %edx; \
@@ -186,24 +187,12 @@ static inline int arch_irqs_disabled(void)
186 popl %edx; \ 187 popl %edx; \
187 popl %ecx; \ 188 popl %ecx; \
188 popl %eax; 189 popl %eax;
189 190# define LOCKDEP_SYS_EXIT_IRQ
190#define ARCH_LOCKDEP_SYS_EXIT_IRQ 191# endif
191#endif
192
193#ifdef CONFIG_TRACE_IRQFLAGS
194# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
195# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
196#else 192#else
197# define TRACE_IRQS_ON
198# define TRACE_IRQS_OFF
199#endif
200#ifdef CONFIG_DEBUG_LOCK_ALLOC
201# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT
202# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ
203# else
204# define LOCKDEP_SYS_EXIT 193# define LOCKDEP_SYS_EXIT
205# define LOCKDEP_SYS_EXIT_IRQ 194# define LOCKDEP_SYS_EXIT_IRQ
206# endif 195#endif
207
208#endif /* __ASSEMBLY__ */ 196#endif /* __ASSEMBLY__ */
197
209#endif 198#endif
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 6a2cefb4395a..a4c1cf7e93f8 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -1,7 +1,7 @@
1#ifndef _ASM_X86_JUMP_LABEL_H 1#ifndef _ASM_X86_JUMP_LABEL_H
2#define _ASM_X86_JUMP_LABEL_H 2#define _ASM_X86_JUMP_LABEL_H
3 3
4#ifdef __KERNEL__ 4#ifndef __ASSEMBLY__
5 5
6#include <linux/stringify.h> 6#include <linux/stringify.h>
7#include <linux/types.h> 7#include <linux/types.h>
@@ -30,8 +30,6 @@ l_yes:
30 return true; 30 return true;
31} 31}
32 32
33#endif /* __KERNEL__ */
34
35#ifdef CONFIG_X86_64 33#ifdef CONFIG_X86_64
36typedef u64 jump_label_t; 34typedef u64 jump_label_t;
37#else 35#else
@@ -44,4 +42,5 @@ struct jump_entry {
44 jump_label_t key; 42 jump_label_t key;
45}; 43};
46 44
45#endif /* __ASSEMBLY__ */
47#endif 46#endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a236e39cc385..dea2e7e962e3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -81,11 +81,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
81 (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); 81 (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
82} 82}
83 83
84#define SELECTOR_TI_MASK (1 << 2)
85#define SELECTOR_RPL_MASK 0x03
86
87#define IOPL_SHIFT 12
88
89#define KVM_PERMILLE_MMU_PAGES 20 84#define KVM_PERMILLE_MMU_PAGES 20
90#define KVM_MIN_ALLOC_MMU_PAGES 64 85#define KVM_MIN_ALLOC_MMU_PAGES 64
91#define KVM_MMU_HASH_SHIFT 10 86#define KVM_MMU_HASH_SHIFT 10
@@ -345,6 +340,7 @@ struct kvm_pmu {
345enum { 340enum {
346 KVM_DEBUGREG_BP_ENABLED = 1, 341 KVM_DEBUGREG_BP_ENABLED = 1,
347 KVM_DEBUGREG_WONT_EXIT = 2, 342 KVM_DEBUGREG_WONT_EXIT = 2,
343 KVM_DEBUGREG_RELOAD = 4,
348}; 344};
349 345
350struct kvm_vcpu_arch { 346struct kvm_vcpu_arch {
@@ -431,6 +427,9 @@ struct kvm_vcpu_arch {
431 427
432 int cpuid_nent; 428 int cpuid_nent;
433 struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES]; 429 struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
430
431 int maxphyaddr;
432
434 /* emulate context */ 433 /* emulate context */
435 434
436 struct x86_emulate_ctxt emulate_ctxt; 435 struct x86_emulate_ctxt emulate_ctxt;
@@ -550,11 +549,20 @@ struct kvm_arch_memory_slot {
550 struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; 549 struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
551}; 550};
552 551
552/*
553 * We use as the mode the number of bits allocated in the LDR for the
554 * logical processor ID. It happens that these are all powers of two.
555 * This makes it is very easy to detect cases where the APICs are
556 * configured for multiple modes; in that case, we cannot use the map and
557 * hence cannot use kvm_irq_delivery_to_apic_fast either.
558 */
559#define KVM_APIC_MODE_XAPIC_CLUSTER 4
560#define KVM_APIC_MODE_XAPIC_FLAT 8
561#define KVM_APIC_MODE_X2APIC 16
562
553struct kvm_apic_map { 563struct kvm_apic_map {
554 struct rcu_head rcu; 564 struct rcu_head rcu;
555 u8 ldr_bits; 565 u8 mode;
556 /* fields bellow are used to decode ldr values in different modes */
557 u32 cid_shift, cid_mask, lid_mask, broadcast;
558 struct kvm_lapic *phys_map[256]; 566 struct kvm_lapic *phys_map[256];
559 /* first index is cluster id second is cpu id in a cluster */ 567 /* first index is cluster id second is cpu id in a cluster */
560 struct kvm_lapic *logical_map[16][16]; 568 struct kvm_lapic *logical_map[16][16];
@@ -859,6 +867,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
859void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 867void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
860void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 868void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
861 struct kvm_memory_slot *memslot); 869 struct kvm_memory_slot *memslot);
870void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
871 struct kvm_memory_slot *memslot);
862void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, 872void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
863 struct kvm_memory_slot *memslot); 873 struct kvm_memory_slot *memslot);
864void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, 874void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
@@ -933,6 +943,7 @@ struct x86_emulate_ctxt;
933int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); 943int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
934void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); 944void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
935int kvm_emulate_halt(struct kvm_vcpu *vcpu); 945int kvm_emulate_halt(struct kvm_vcpu *vcpu);
946int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
936int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); 947int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
937 948
938void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); 949void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
@@ -1128,7 +1139,6 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
1128int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); 1139int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
1129int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); 1140int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
1130void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); 1141void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
1131int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
1132int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); 1142int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
1133int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); 1143int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
1134int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); 1144int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index e62cf897f781..c1adf33fdd0d 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -115,7 +115,7 @@ static inline void kvm_spinlock_init(void)
115 115
116static inline bool kvm_para_available(void) 116static inline bool kvm_para_available(void)
117{ 117{
118 return 0; 118 return false;
119} 119}
120 120
121static inline unsigned int kvm_arch_para_features(void) 121static inline unsigned int kvm_arch_para_features(void)
diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h
index a455a53d789a..2d29197bd2fb 100644
--- a/arch/x86/include/asm/livepatch.h
+++ b/arch/x86/include/asm/livepatch.h
@@ -32,8 +32,8 @@ static inline int klp_check_compiler_support(void)
32#endif 32#endif
33 return 0; 33 return 0;
34} 34}
35extern int klp_write_module_reloc(struct module *mod, unsigned long type, 35int klp_write_module_reloc(struct module *mod, unsigned long type,
36 unsigned long loc, unsigned long value); 36 unsigned long loc, unsigned long value);
37 37
38static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) 38static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
39{ 39{
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 9b3de99dc004..1f5a86d518db 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -116,6 +116,12 @@ struct mca_config {
116 u32 rip_msr; 116 u32 rip_msr;
117}; 117};
118 118
119struct mce_vendor_flags {
120 __u64 overflow_recov : 1, /* cpuid_ebx(80000007) */
121 __reserved_0 : 63;
122};
123extern struct mce_vendor_flags mce_flags;
124
119extern struct mca_config mca_cfg; 125extern struct mca_config mca_cfg;
120extern void mce_register_decode_chain(struct notifier_block *nb); 126extern void mce_register_decode_chain(struct notifier_block *nb);
121extern void mce_unregister_decode_chain(struct notifier_block *nb); 127extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -128,9 +134,11 @@ extern int mce_p5_enabled;
128#ifdef CONFIG_X86_MCE 134#ifdef CONFIG_X86_MCE
129int mcheck_init(void); 135int mcheck_init(void);
130void mcheck_cpu_init(struct cpuinfo_x86 *c); 136void mcheck_cpu_init(struct cpuinfo_x86 *c);
137void mcheck_vendor_init_severity(void);
131#else 138#else
132static inline int mcheck_init(void) { return 0; } 139static inline int mcheck_init(void) { return 0; }
133static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} 140static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
141static inline void mcheck_vendor_init_severity(void) {}
134#endif 142#endif
135 143
136#ifdef CONFIG_X86_ANCIENT_MCE 144#ifdef CONFIG_X86_ANCIENT_MCE
@@ -183,11 +191,11 @@ typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
183DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); 191DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
184 192
185enum mcp_flags { 193enum mcp_flags {
186 MCP_TIMESTAMP = (1 << 0), /* log time stamp */ 194 MCP_TIMESTAMP = BIT(0), /* log time stamp */
187 MCP_UC = (1 << 1), /* log uncorrected errors */ 195 MCP_UC = BIT(1), /* log uncorrected errors */
188 MCP_DONTLOG = (1 << 2), /* only clear, don't log */ 196 MCP_DONTLOG = BIT(2), /* only clear, don't log */
189}; 197};
190void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); 198bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
191 199
192int mce_notify_irq(void); 200int mce_notify_irq(void);
193 201
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 201b520521ed..2fb20d6f7e23 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -75,6 +75,79 @@ static inline void __exit exit_amd_microcode(void) {}
75 75
76#ifdef CONFIG_MICROCODE_EARLY 76#ifdef CONFIG_MICROCODE_EARLY
77#define MAX_UCODE_COUNT 128 77#define MAX_UCODE_COUNT 128
78
79#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
80#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
81#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
82#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
83#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
84#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
85#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
86
87#define CPUID_IS(a, b, c, ebx, ecx, edx) \
88 (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
89
90/*
91 * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
92 * x86_vendor() gets vendor id for BSP.
93 *
94 * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
95 * coding, we still use x86_vendor() to get vendor id for AP.
96 *
97 * x86_vendor() gets vendor information directly from CPUID.
98 */
99static inline int x86_vendor(void)
100{
101 u32 eax = 0x00000000;
102 u32 ebx, ecx = 0, edx;
103
104 native_cpuid(&eax, &ebx, &ecx, &edx);
105
106 if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
107 return X86_VENDOR_INTEL;
108
109 if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
110 return X86_VENDOR_AMD;
111
112 return X86_VENDOR_UNKNOWN;
113}
114
115static inline unsigned int __x86_family(unsigned int sig)
116{
117 unsigned int x86;
118
119 x86 = (sig >> 8) & 0xf;
120
121 if (x86 == 0xf)
122 x86 += (sig >> 20) & 0xff;
123
124 return x86;
125}
126
127static inline unsigned int x86_family(void)
128{
129 u32 eax = 0x00000001;
130 u32 ebx, ecx = 0, edx;
131
132 native_cpuid(&eax, &ebx, &ecx, &edx);
133
134 return __x86_family(eax);
135}
136
137static inline unsigned int x86_model(unsigned int sig)
138{
139 unsigned int x86, model;
140
141 x86 = __x86_family(sig);
142
143 model = (sig >> 4) & 0xf;
144
145 if (x86 == 0x6 || x86 == 0xf)
146 model += ((sig >> 16) & 0xf) << 4;
147
148 return model;
149}
150
78extern void __init load_ucode_bsp(void); 151extern void __init load_ucode_bsp(void);
79extern void load_ucode_ap(void); 152extern void load_ucode_ap(void);
80extern int __init save_microcode_in_initrd(void); 153extern int __init save_microcode_in_initrd(void);
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
index dd4c20043ce7..2b9209c46ca9 100644
--- a/arch/x86/include/asm/microcode_intel.h
+++ b/arch/x86/include/asm/microcode_intel.h
@@ -56,12 +56,15 @@ struct extended_sigtable {
56 56
57#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) 57#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
58 58
59extern int 59extern int get_matching_microcode(unsigned int csig, int cpf, int rev, void *mc);
60get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev);
61extern int microcode_sanity_check(void *mc, int print_err); 60extern int microcode_sanity_check(void *mc, int print_err);
62extern int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev); 61extern int get_matching_sig(unsigned int csig, int cpf, int rev, void *mc);
63extern int 62
64update_match_revision(struct microcode_header_intel *mc_header, int rev); 63static inline int
64revision_is_newer(struct microcode_header_intel *mc_header, int rev)
65{
66 return (mc_header->rev <= rev) ? 0 : 1;
67}
65 68
66#ifdef CONFIG_MICROCODE_INTEL_EARLY 69#ifdef CONFIG_MICROCODE_INTEL_EARLY
67extern void __init load_ucode_intel_bsp(void); 70extern void __init load_ucode_intel_bsp(void);
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index a1410db38a1a..653dfa7662e1 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -30,6 +30,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
30 :: "a" (eax), "c" (ecx)); 30 :: "a" (eax), "c" (ecx));
31} 31}
32 32
33static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
34{
35 trace_hardirqs_on();
36 /* "mwait %eax, %ecx;" */
37 asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
38 :: "a" (eax), "c" (ecx));
39}
40
33/* 41/*
34 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, 42 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
35 * which can obviate IPI to trigger checking of need_resched. 43 * which can obviate IPI to trigger checking of need_resched.
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index f97fbe3abb67..c7c712f2648b 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -40,8 +40,10 @@
40 40
41#ifdef CONFIG_X86_64 41#ifdef CONFIG_X86_64
42#include <asm/page_64_types.h> 42#include <asm/page_64_types.h>
43#define IOREMAP_MAX_ORDER (PUD_SHIFT)
43#else 44#else
44#include <asm/page_32_types.h> 45#include <asm/page_32_types.h>
46#define IOREMAP_MAX_ORDER (PMD_SHIFT)
45#endif /* CONFIG_X86_64 */ 47#endif /* CONFIG_X86_64 */
46 48
47#ifndef __ASSEMBLY__ 49#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 965c47d254aa..8957810ad7d1 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -545,7 +545,7 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
545 PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val); 545 PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, val);
546} 546}
547 547
548#if PAGETABLE_LEVELS >= 3 548#if CONFIG_PGTABLE_LEVELS >= 3
549static inline pmd_t __pmd(pmdval_t val) 549static inline pmd_t __pmd(pmdval_t val)
550{ 550{
551 pmdval_t ret; 551 pmdval_t ret;
@@ -585,7 +585,7 @@ static inline void set_pud(pud_t *pudp, pud_t pud)
585 PVOP_VCALL2(pv_mmu_ops.set_pud, pudp, 585 PVOP_VCALL2(pv_mmu_ops.set_pud, pudp,
586 val); 586 val);
587} 587}
588#if PAGETABLE_LEVELS == 4 588#if CONFIG_PGTABLE_LEVELS == 4
589static inline pud_t __pud(pudval_t val) 589static inline pud_t __pud(pudval_t val)
590{ 590{
591 pudval_t ret; 591 pudval_t ret;
@@ -636,9 +636,9 @@ static inline void pud_clear(pud_t *pudp)
636 set_pud(pudp, __pud(0)); 636 set_pud(pudp, __pud(0));
637} 637}
638 638
639#endif /* PAGETABLE_LEVELS == 4 */ 639#endif /* CONFIG_PGTABLE_LEVELS == 4 */
640 640
641#endif /* PAGETABLE_LEVELS >= 3 */ 641#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
642 642
643#ifdef CONFIG_X86_PAE 643#ifdef CONFIG_X86_PAE
644/* Special-case pte-setting operations for PAE, which can't update a 644/* Special-case pte-setting operations for PAE, which can't update a
@@ -976,11 +976,6 @@ extern void default_banner(void);
976 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ 976 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
977 CLBR_NONE, \ 977 CLBR_NONE, \
978 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) 978 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
979
980#define ENABLE_INTERRUPTS_SYSEXIT32 \
981 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
982 CLBR_NONE, \
983 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
984#endif /* CONFIG_X86_32 */ 979#endif /* CONFIG_X86_32 */
985 980
986#endif /* __ASSEMBLY__ */ 981#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 7549b8b369e4..f7b0b5c112f2 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -294,7 +294,7 @@ struct pv_mmu_ops {
294 struct paravirt_callee_save pgd_val; 294 struct paravirt_callee_save pgd_val;
295 struct paravirt_callee_save make_pgd; 295 struct paravirt_callee_save make_pgd;
296 296
297#if PAGETABLE_LEVELS >= 3 297#if CONFIG_PGTABLE_LEVELS >= 3
298#ifdef CONFIG_X86_PAE 298#ifdef CONFIG_X86_PAE
299 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); 299 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
300 void (*pte_clear)(struct mm_struct *mm, unsigned long addr, 300 void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
@@ -308,13 +308,13 @@ struct pv_mmu_ops {
308 struct paravirt_callee_save pmd_val; 308 struct paravirt_callee_save pmd_val;
309 struct paravirt_callee_save make_pmd; 309 struct paravirt_callee_save make_pmd;
310 310
311#if PAGETABLE_LEVELS == 4 311#if CONFIG_PGTABLE_LEVELS == 4
312 struct paravirt_callee_save pud_val; 312 struct paravirt_callee_save pud_val;
313 struct paravirt_callee_save make_pud; 313 struct paravirt_callee_save make_pud;
314 314
315 void (*set_pgd)(pgd_t *pudp, pgd_t pgdval); 315 void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
316#endif /* PAGETABLE_LEVELS == 4 */ 316#endif /* CONFIG_PGTABLE_LEVELS == 4 */
317#endif /* PAGETABLE_LEVELS >= 3 */ 317#endif /* CONFIG_PGTABLE_LEVELS >= 3 */
318 318
319 struct pv_lazy_ops lazy_mode; 319 struct pv_lazy_ops lazy_mode;
320 320
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index c4412e972bbd..bf7f8b55b0f9 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -77,7 +77,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
77 77
78#define pmd_pgtable(pmd) pmd_page(pmd) 78#define pmd_pgtable(pmd) pmd_page(pmd)
79 79
80#if PAGETABLE_LEVELS > 2 80#if CONFIG_PGTABLE_LEVELS > 2
81static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 81static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
82{ 82{
83 struct page *page; 83 struct page *page;
@@ -116,7 +116,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
116} 116}
117#endif /* CONFIG_X86_PAE */ 117#endif /* CONFIG_X86_PAE */
118 118
119#if PAGETABLE_LEVELS > 3 119#if CONFIG_PGTABLE_LEVELS > 3
120static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) 120static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
121{ 121{
122 paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT); 122 paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
@@ -142,7 +142,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
142 ___pud_free_tlb(tlb, pud); 142 ___pud_free_tlb(tlb, pud);
143} 143}
144 144
145#endif /* PAGETABLE_LEVELS > 3 */ 145#endif /* CONFIG_PGTABLE_LEVELS > 3 */
146#endif /* PAGETABLE_LEVELS > 2 */ 146#endif /* CONFIG_PGTABLE_LEVELS > 2 */
147 147
148#endif /* _ASM_X86_PGALLOC_H */ 148#endif /* _ASM_X86_PGALLOC_H */
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index daacc23e3fb9..392576433e77 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -17,7 +17,6 @@ typedef union {
17#endif /* !__ASSEMBLY__ */ 17#endif /* !__ASSEMBLY__ */
18 18
19#define SHARED_KERNEL_PMD 0 19#define SHARED_KERNEL_PMD 0
20#define PAGETABLE_LEVELS 2
21 20
22/* 21/*
23 * traditional i386 two-level paging structure: 22 * traditional i386 two-level paging structure:
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 1bd5876c8649..bcc89625ebe5 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -24,8 +24,6 @@ typedef union {
24#define SHARED_KERNEL_PMD 1 24#define SHARED_KERNEL_PMD 1
25#endif 25#endif
26 26
27#define PAGETABLE_LEVELS 3
28
29/* 27/*
30 * PGDIR_SHIFT determines what a top-level page table entry can map 28 * PGDIR_SHIFT determines what a top-level page table entry can map
31 */ 29 */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a0c35bf6cb92..fe57e7a98839 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -551,7 +551,7 @@ static inline unsigned long pages_to_mb(unsigned long npg)
551 return npg >> (20 - PAGE_SHIFT); 551 return npg >> (20 - PAGE_SHIFT);
552} 552}
553 553
554#if PAGETABLE_LEVELS > 2 554#if CONFIG_PGTABLE_LEVELS > 2
555static inline int pud_none(pud_t pud) 555static inline int pud_none(pud_t pud)
556{ 556{
557 return native_pud_val(pud) == 0; 557 return native_pud_val(pud) == 0;
@@ -594,9 +594,9 @@ static inline int pud_large(pud_t pud)
594{ 594{
595 return 0; 595 return 0;
596} 596}
597#endif /* PAGETABLE_LEVELS > 2 */ 597#endif /* CONFIG_PGTABLE_LEVELS > 2 */
598 598
599#if PAGETABLE_LEVELS > 3 599#if CONFIG_PGTABLE_LEVELS > 3
600static inline int pgd_present(pgd_t pgd) 600static inline int pgd_present(pgd_t pgd)
601{ 601{
602 return pgd_flags(pgd) & _PAGE_PRESENT; 602 return pgd_flags(pgd) & _PAGE_PRESENT;
@@ -633,7 +633,7 @@ static inline int pgd_none(pgd_t pgd)
633{ 633{
634 return !native_pgd_val(pgd); 634 return !native_pgd_val(pgd);
635} 635}
636#endif /* PAGETABLE_LEVELS > 3 */ 636#endif /* CONFIG_PGTABLE_LEVELS > 3 */
637 637
638#endif /* __ASSEMBLY__ */ 638#endif /* __ASSEMBLY__ */
639 639
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 602b6028c5b6..e6844dfb4471 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -20,7 +20,6 @@ typedef struct { pteval_t pte; } pte_t;
20#endif /* !__ASSEMBLY__ */ 20#endif /* !__ASSEMBLY__ */
21 21
22#define SHARED_KERNEL_PMD 0 22#define SHARED_KERNEL_PMD 0
23#define PAGETABLE_LEVELS 4
24 23
25/* 24/*
26 * PGDIR_SHIFT determines what a top-level page table entry can map 25 * PGDIR_SHIFT determines what a top-level page table entry can map
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 8c7c10802e9c..78f0c8cbe316 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -234,7 +234,7 @@ static inline pgdval_t pgd_flags(pgd_t pgd)
234 return native_pgd_val(pgd) & PTE_FLAGS_MASK; 234 return native_pgd_val(pgd) & PTE_FLAGS_MASK;
235} 235}
236 236
237#if PAGETABLE_LEVELS > 3 237#if CONFIG_PGTABLE_LEVELS > 3
238typedef struct { pudval_t pud; } pud_t; 238typedef struct { pudval_t pud; } pud_t;
239 239
240static inline pud_t native_make_pud(pmdval_t val) 240static inline pud_t native_make_pud(pmdval_t val)
@@ -255,7 +255,7 @@ static inline pudval_t native_pud_val(pud_t pud)
255} 255}
256#endif 256#endif
257 257
258#if PAGETABLE_LEVELS > 2 258#if CONFIG_PGTABLE_LEVELS > 2
259typedef struct { pmdval_t pmd; } pmd_t; 259typedef struct { pmdval_t pmd; } pmd_t;
260 260
261static inline pmd_t native_make_pmd(pmdval_t val) 261static inline pmd_t native_make_pmd(pmdval_t val)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ec1c93588cef..23ba6765b718 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -109,6 +109,9 @@ struct cpuinfo_x86 {
109 /* in KB - valid for CPUS which support this call: */ 109 /* in KB - valid for CPUS which support this call: */
110 int x86_cache_size; 110 int x86_cache_size;
111 int x86_cache_alignment; /* In bytes */ 111 int x86_cache_alignment; /* In bytes */
112 /* Cache QoS architectural values: */
113 int x86_cache_max_rmid; /* max index */
114 int x86_cache_occ_scale; /* scale to bytes */
112 int x86_power; 115 int x86_power;
113 unsigned long loops_per_jiffy; 116 unsigned long loops_per_jiffy;
114 /* cpuid returned max cores value: */ 117 /* cpuid returned max cores value: */
@@ -210,8 +213,23 @@ struct x86_hw_tss {
210 unsigned long sp0; 213 unsigned long sp0;
211 unsigned short ss0, __ss0h; 214 unsigned short ss0, __ss0h;
212 unsigned long sp1; 215 unsigned long sp1;
213 /* ss1 caches MSR_IA32_SYSENTER_CS: */ 216
214 unsigned short ss1, __ss1h; 217 /*
218 * We don't use ring 1, so ss1 is a convenient scratch space in
219 * the same cacheline as sp0. We use ss1 to cache the value in
220 * MSR_IA32_SYSENTER_CS. When we context switch
221 * MSR_IA32_SYSENTER_CS, we first check if the new value being
222 * written matches ss1, and, if it's not, then we wrmsr the new
223 * value and update ss1.
224 *
225 * The only reason we context switch MSR_IA32_SYSENTER_CS is
226 * that we set it to zero in vm86 tasks to avoid corrupting the
227 * stack if we were to go through the sysenter path from vm86
228 * mode.
229 */
230 unsigned short ss1; /* MSR_IA32_SYSENTER_CS */
231
232 unsigned short __ss1h;
215 unsigned long sp2; 233 unsigned long sp2;
216 unsigned short ss2, __ss2h; 234 unsigned short ss2, __ss2h;
217 unsigned long __cr3; 235 unsigned long __cr3;
@@ -276,13 +294,17 @@ struct tss_struct {
276 unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; 294 unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
277 295
278 /* 296 /*
279 * .. and then another 0x100 bytes for the emergency kernel stack: 297 * Space for the temporary SYSENTER stack:
280 */ 298 */
281 unsigned long stack[64]; 299 unsigned long SYSENTER_stack[64];
282 300
283} ____cacheline_aligned; 301} ____cacheline_aligned;
284 302
285DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss); 303DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
304
305#ifdef CONFIG_X86_32
306DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
307#endif
286 308
287/* 309/*
288 * Save the original ist values for checking stack pointers during debugging 310 * Save the original ist values for checking stack pointers during debugging
@@ -474,7 +496,6 @@ struct thread_struct {
474#ifdef CONFIG_X86_32 496#ifdef CONFIG_X86_32
475 unsigned long sysenter_cs; 497 unsigned long sysenter_cs;
476#else 498#else
477 unsigned long usersp; /* Copy from PDA */
478 unsigned short es; 499 unsigned short es;
479 unsigned short ds; 500 unsigned short ds;
480 unsigned short fsindex; 501 unsigned short fsindex;
@@ -564,6 +585,16 @@ static inline void native_swapgs(void)
564#endif 585#endif
565} 586}
566 587
588static inline unsigned long current_top_of_stack(void)
589{
590#ifdef CONFIG_X86_64
591 return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
592#else
593 /* sp0 on x86_32 is special in and around vm86 mode. */
594 return this_cpu_read_stable(cpu_current_top_of_stack);
595#endif
596}
597
567#ifdef CONFIG_PARAVIRT 598#ifdef CONFIG_PARAVIRT
568#include <asm/paravirt.h> 599#include <asm/paravirt.h>
569#else 600#else
@@ -761,10 +792,10 @@ extern char ignore_fpu_irq;
761#define ARCH_HAS_SPINLOCK_PREFETCH 792#define ARCH_HAS_SPINLOCK_PREFETCH
762 793
763#ifdef CONFIG_X86_32 794#ifdef CONFIG_X86_32
764# define BASE_PREFETCH ASM_NOP4 795# define BASE_PREFETCH ""
765# define ARCH_HAS_PREFETCH 796# define ARCH_HAS_PREFETCH
766#else 797#else
767# define BASE_PREFETCH "prefetcht0 (%1)" 798# define BASE_PREFETCH "prefetcht0 %P1"
768#endif 799#endif
769 800
770/* 801/*
@@ -775,10 +806,9 @@ extern char ignore_fpu_irq;
775 */ 806 */
776static inline void prefetch(const void *x) 807static inline void prefetch(const void *x)
777{ 808{
778 alternative_input(BASE_PREFETCH, 809 alternative_input(BASE_PREFETCH, "prefetchnta %P1",
779 "prefetchnta (%1)",
780 X86_FEATURE_XMM, 810 X86_FEATURE_XMM,
781 "r" (x)); 811 "m" (*(const char *)x));
782} 812}
783 813
784/* 814/*
@@ -788,10 +818,9 @@ static inline void prefetch(const void *x)
788 */ 818 */
789static inline void prefetchw(const void *x) 819static inline void prefetchw(const void *x)
790{ 820{
791 alternative_input(BASE_PREFETCH, 821 alternative_input(BASE_PREFETCH, "prefetchw %P1",
792 "prefetchw (%1)", 822 X86_FEATURE_3DNOWPREFETCH,
793 X86_FEATURE_3DNOW, 823 "m" (*(const char *)x));
794 "r" (x));
795} 824}
796 825
797static inline void spin_lock_prefetch(const void *x) 826static inline void spin_lock_prefetch(const void *x)
@@ -799,6 +828,9 @@ static inline void spin_lock_prefetch(const void *x)
799 prefetchw(x); 828 prefetchw(x);
800} 829}
801 830
831#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
832 TOP_OF_KERNEL_STACK_PADDING)
833
802#ifdef CONFIG_X86_32 834#ifdef CONFIG_X86_32
803/* 835/*
804 * User space process size: 3GB (default). 836 * User space process size: 3GB (default).
@@ -809,39 +841,16 @@ static inline void spin_lock_prefetch(const void *x)
809#define STACK_TOP_MAX STACK_TOP 841#define STACK_TOP_MAX STACK_TOP
810 842
811#define INIT_THREAD { \ 843#define INIT_THREAD { \
812 .sp0 = sizeof(init_stack) + (long)&init_stack, \ 844 .sp0 = TOP_OF_INIT_STACK, \
813 .vm86_info = NULL, \ 845 .vm86_info = NULL, \
814 .sysenter_cs = __KERNEL_CS, \ 846 .sysenter_cs = __KERNEL_CS, \
815 .io_bitmap_ptr = NULL, \ 847 .io_bitmap_ptr = NULL, \
816} 848}
817 849
818/*
819 * Note that the .io_bitmap member must be extra-big. This is because
820 * the CPU will access an additional byte beyond the end of the IO
821 * permission bitmap. The extra byte must be all 1 bits, and must
822 * be within the limit.
823 */
824#define INIT_TSS { \
825 .x86_tss = { \
826 .sp0 = sizeof(init_stack) + (long)&init_stack, \
827 .ss0 = __KERNEL_DS, \
828 .ss1 = __KERNEL_CS, \
829 .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
830 }, \
831 .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \
832}
833
834extern unsigned long thread_saved_pc(struct task_struct *tsk); 850extern unsigned long thread_saved_pc(struct task_struct *tsk);
835 851
836#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long))
837#define KSTK_TOP(info) \
838({ \
839 unsigned long *__ptr = (unsigned long *)(info); \
840 (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \
841})
842
843/* 852/*
844 * The below -8 is to reserve 8 bytes on top of the ring0 stack. 853 * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
845 * This is necessary to guarantee that the entire "struct pt_regs" 854 * This is necessary to guarantee that the entire "struct pt_regs"
846 * is accessible even if the CPU haven't stored the SS/ESP registers 855 * is accessible even if the CPU haven't stored the SS/ESP registers
847 * on the stack (interrupt gate does not save these registers 856 * on the stack (interrupt gate does not save these registers
@@ -850,11 +859,11 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
850 * "struct pt_regs" is possible, but they may contain the 859 * "struct pt_regs" is possible, but they may contain the
851 * completely wrong values. 860 * completely wrong values.
852 */ 861 */
853#define task_pt_regs(task) \ 862#define task_pt_regs(task) \
854({ \ 863({ \
855 struct pt_regs *__regs__; \ 864 unsigned long __ptr = (unsigned long)task_stack_page(task); \
856 __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ 865 __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
857 __regs__ - 1; \ 866 ((struct pt_regs *)__ptr) - 1; \
858}) 867})
859 868
860#define KSTK_ESP(task) (task_pt_regs(task)->sp) 869#define KSTK_ESP(task) (task_pt_regs(task)->sp)
@@ -886,11 +895,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
886#define STACK_TOP_MAX TASK_SIZE_MAX 895#define STACK_TOP_MAX TASK_SIZE_MAX
887 896
888#define INIT_THREAD { \ 897#define INIT_THREAD { \
889 .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ 898 .sp0 = TOP_OF_INIT_STACK \
890}
891
892#define INIT_TSS { \
893 .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
894} 899}
895 900
896/* 901/*
@@ -902,11 +907,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
902#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1) 907#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
903extern unsigned long KSTK_ESP(struct task_struct *task); 908extern unsigned long KSTK_ESP(struct task_struct *task);
904 909
905/*
906 * User space RSP while inside the SYSCALL fast path
907 */
908DECLARE_PER_CPU(unsigned long, old_rsp);
909
910#endif /* CONFIG_X86_64 */ 910#endif /* CONFIG_X86_64 */
911 911
912extern void start_thread(struct pt_regs *regs, unsigned long new_ip, 912extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 86fc2bb82287..19507ffa5d28 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -31,13 +31,17 @@ struct pt_regs {
31#else /* __i386__ */ 31#else /* __i386__ */
32 32
33struct pt_regs { 33struct pt_regs {
34/*
35 * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
36 * unless syscall needs a complete, fully filled "struct pt_regs".
37 */
34 unsigned long r15; 38 unsigned long r15;
35 unsigned long r14; 39 unsigned long r14;
36 unsigned long r13; 40 unsigned long r13;
37 unsigned long r12; 41 unsigned long r12;
38 unsigned long bp; 42 unsigned long bp;
39 unsigned long bx; 43 unsigned long bx;
40/* arguments: non interrupts/non tracing syscalls only save up to here*/ 44/* These regs are callee-clobbered. Always saved on kernel entry. */
41 unsigned long r11; 45 unsigned long r11;
42 unsigned long r10; 46 unsigned long r10;
43 unsigned long r9; 47 unsigned long r9;
@@ -47,9 +51,12 @@ struct pt_regs {
47 unsigned long dx; 51 unsigned long dx;
48 unsigned long si; 52 unsigned long si;
49 unsigned long di; 53 unsigned long di;
54/*
55 * On syscall entry, this is syscall#. On CPU exception, this is error code.
56 * On hw interrupt, it's IRQ number:
57 */
50 unsigned long orig_ax; 58 unsigned long orig_ax;
51/* end of arguments */ 59/* Return frame for iretq */
52/* cpu exception frame or undefined */
53 unsigned long ip; 60 unsigned long ip;
54 unsigned long cs; 61 unsigned long cs;
55 unsigned long flags; 62 unsigned long flags;
@@ -89,11 +96,13 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
89} 96}
90 97
91/* 98/*
92 * user_mode_vm(regs) determines whether a register set came from user mode. 99 * user_mode(regs) determines whether a register set came from user
93 * This is true if V8086 mode was enabled OR if the register set was from 100 * mode. On x86_32, this is true if V8086 mode was enabled OR if the
94 * protected mode with RPL-3 CS value. This tricky test checks that with 101 * register set was from protected mode with RPL-3 CS value. This
95 * one comparison. Many places in the kernel can bypass this full check 102 * tricky test checks that with one comparison.
96 * if they have already ruled out V8086 mode, so user_mode(regs) can be used. 103 *
104 * On x86_64, vm86 mode is mercifully nonexistent, and we don't need
105 * the extra check.
97 */ 106 */
98static inline int user_mode(struct pt_regs *regs) 107static inline int user_mode(struct pt_regs *regs)
99{ 108{
@@ -104,16 +113,6 @@ static inline int user_mode(struct pt_regs *regs)
104#endif 113#endif
105} 114}
106 115
107static inline int user_mode_vm(struct pt_regs *regs)
108{
109#ifdef CONFIG_X86_32
110 return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >=
111 USER_RPL;
112#else
113 return user_mode(regs);
114#endif
115}
116
117static inline int v8086_mode(struct pt_regs *regs) 116static inline int v8086_mode(struct pt_regs *regs)
118{ 117{
119#ifdef CONFIG_X86_32 118#ifdef CONFIG_X86_32
@@ -138,12 +137,8 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
138#endif 137#endif
139} 138}
140 139
141#define current_user_stack_pointer() this_cpu_read(old_rsp) 140#define current_user_stack_pointer() current_pt_regs()->sp
142/* ia32 vs. x32 difference */ 141#define compat_user_stack_pointer() current_pt_regs()->sp
143#define compat_user_stack_pointer() \
144 (test_thread_flag(TIF_IA32) \
145 ? current_pt_regs()->sp \
146 : this_cpu_read(old_rsp))
147#endif 142#endif
148 143
149#ifdef CONFIG_X86_32 144#ifdef CONFIG_X86_32
@@ -248,7 +243,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
248 */ 243 */
249#define arch_ptrace_stop_needed(code, info) \ 244#define arch_ptrace_stop_needed(code, info) \
250({ \ 245({ \
251 set_thread_flag(TIF_NOTIFY_RESUME); \ 246 force_iret(); \
252 false; \ 247 false; \
253}) 248})
254 249
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index d6b078e9fa28..25b1cc07d496 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
95 95
96struct pvclock_vsyscall_time_info { 96struct pvclock_vsyscall_time_info {
97 struct pvclock_vcpu_time_info pvti; 97 struct pvclock_vcpu_time_info pvti;
98 u32 migrate_count;
98} __attribute__((__aligned__(SMP_CACHE_BYTES))); 99} __attribute__((__aligned__(SMP_CACHE_BYTES)));
99 100
100#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) 101#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index db257a58571f..5a9856eb12ba 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -3,8 +3,10 @@
3 3
4#include <linux/const.h> 4#include <linux/const.h>
5 5
6/* Constructor for a conventional segment GDT (or LDT) entry */ 6/*
7/* This is a macro so it can be used in initializers */ 7 * Constructor for a conventional segment GDT (or LDT) entry.
8 * This is a macro so it can be used in initializers.
9 */
8#define GDT_ENTRY(flags, base, limit) \ 10#define GDT_ENTRY(flags, base, limit) \
9 ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \ 11 ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \
10 (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \ 12 (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \
@@ -12,198 +14,228 @@
12 (((base) & _AC(0x00ffffff,ULL)) << 16) | \ 14 (((base) & _AC(0x00ffffff,ULL)) << 16) | \
13 (((limit) & _AC(0x0000ffff,ULL)))) 15 (((limit) & _AC(0x0000ffff,ULL))))
14 16
15/* Simple and small GDT entries for booting only */ 17/* Simple and small GDT entries for booting only: */
16 18
17#define GDT_ENTRY_BOOT_CS 2 19#define GDT_ENTRY_BOOT_CS 2
18#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8) 20#define GDT_ENTRY_BOOT_DS 3
21#define GDT_ENTRY_BOOT_TSS 4
22#define __BOOT_CS (GDT_ENTRY_BOOT_CS*8)
23#define __BOOT_DS (GDT_ENTRY_BOOT_DS*8)
24#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS*8)
25
26/*
27 * Bottom two bits of selector give the ring
28 * privilege level
29 */
30#define SEGMENT_RPL_MASK 0x3
19 31
20#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1) 32/* User mode is privilege level 3: */
21#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8) 33#define USER_RPL 0x3
22 34
23#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2) 35/* Bit 2 is Table Indicator (TI): selects between LDT or GDT */
24#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8) 36#define SEGMENT_TI_MASK 0x4
37/* LDT segment has TI set ... */
38#define SEGMENT_LDT 0x4
39/* ... GDT has it cleared */
40#define SEGMENT_GDT 0x0
25 41
26#define SEGMENT_RPL_MASK 0x3 /* 42#define GDT_ENTRY_INVALID_SEG 0
27 * Bottom two bits of selector give the ring
28 * privilege level
29 */
30#define SEGMENT_TI_MASK 0x4 /* Bit 2 is table indicator (LDT/GDT) */
31#define USER_RPL 0x3 /* User mode is privilege level 3 */
32#define SEGMENT_LDT 0x4 /* LDT segment has TI set... */
33#define SEGMENT_GDT 0x0 /* ... GDT has it cleared */
34 43
35#ifdef CONFIG_X86_32 44#ifdef CONFIG_X86_32
36/* 45/*
37 * The layout of the per-CPU GDT under Linux: 46 * The layout of the per-CPU GDT under Linux:
38 * 47 *
39 * 0 - null 48 * 0 - null <=== cacheline #1
40 * 1 - reserved 49 * 1 - reserved
41 * 2 - reserved 50 * 2 - reserved
42 * 3 - reserved 51 * 3 - reserved
43 * 52 *
44 * 4 - unused <==== new cacheline 53 * 4 - unused <=== cacheline #2
45 * 5 - unused 54 * 5 - unused
46 * 55 *
47 * ------- start of TLS (Thread-Local Storage) segments: 56 * ------- start of TLS (Thread-Local Storage) segments:
48 * 57 *
49 * 6 - TLS segment #1 [ glibc's TLS segment ] 58 * 6 - TLS segment #1 [ glibc's TLS segment ]
50 * 7 - TLS segment #2 [ Wine's %fs Win32 segment ] 59 * 7 - TLS segment #2 [ Wine's %fs Win32 segment ]
51 * 8 - TLS segment #3 60 * 8 - TLS segment #3 <=== cacheline #3
52 * 9 - reserved 61 * 9 - reserved
53 * 10 - reserved 62 * 10 - reserved
54 * 11 - reserved 63 * 11 - reserved
55 * 64 *
56 * ------- start of kernel segments: 65 * ------- start of kernel segments:
57 * 66 *
58 * 12 - kernel code segment <==== new cacheline 67 * 12 - kernel code segment <=== cacheline #4
59 * 13 - kernel data segment 68 * 13 - kernel data segment
60 * 14 - default user CS 69 * 14 - default user CS
61 * 15 - default user DS 70 * 15 - default user DS
62 * 16 - TSS 71 * 16 - TSS <=== cacheline #5
63 * 17 - LDT 72 * 17 - LDT
64 * 18 - PNPBIOS support (16->32 gate) 73 * 18 - PNPBIOS support (16->32 gate)
65 * 19 - PNPBIOS support 74 * 19 - PNPBIOS support
66 * 20 - PNPBIOS support 75 * 20 - PNPBIOS support <=== cacheline #6
67 * 21 - PNPBIOS support 76 * 21 - PNPBIOS support
68 * 22 - PNPBIOS support 77 * 22 - PNPBIOS support
69 * 23 - APM BIOS support 78 * 23 - APM BIOS support
70 * 24 - APM BIOS support 79 * 24 - APM BIOS support <=== cacheline #7
71 * 25 - APM BIOS support 80 * 25 - APM BIOS support
72 * 81 *
73 * 26 - ESPFIX small SS 82 * 26 - ESPFIX small SS
74 * 27 - per-cpu [ offset to per-cpu data area ] 83 * 27 - per-cpu [ offset to per-cpu data area ]
75 * 28 - stack_canary-20 [ for stack protector ] 84 * 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8
76 * 29 - unused 85 * 29 - unused
77 * 30 - unused 86 * 30 - unused
78 * 31 - TSS for double fault handler 87 * 31 - TSS for double fault handler
79 */ 88 */
80#define GDT_ENTRY_TLS_MIN 6 89#define GDT_ENTRY_TLS_MIN 6
81#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1) 90#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
82 91
92#define GDT_ENTRY_KERNEL_CS 12
93#define GDT_ENTRY_KERNEL_DS 13
83#define GDT_ENTRY_DEFAULT_USER_CS 14 94#define GDT_ENTRY_DEFAULT_USER_CS 14
84
85#define GDT_ENTRY_DEFAULT_USER_DS 15 95#define GDT_ENTRY_DEFAULT_USER_DS 15
96#define GDT_ENTRY_TSS 16
97#define GDT_ENTRY_LDT 17
98#define GDT_ENTRY_PNPBIOS_CS32 18
99#define GDT_ENTRY_PNPBIOS_CS16 19
100#define GDT_ENTRY_PNPBIOS_DS 20
101#define GDT_ENTRY_PNPBIOS_TS1 21
102#define GDT_ENTRY_PNPBIOS_TS2 22
103#define GDT_ENTRY_APMBIOS_BASE 23
104
105#define GDT_ENTRY_ESPFIX_SS 26
106#define GDT_ENTRY_PERCPU 27
107#define GDT_ENTRY_STACK_CANARY 28
108
109#define GDT_ENTRY_DOUBLEFAULT_TSS 31
86 110
87#define GDT_ENTRY_KERNEL_BASE (12) 111/*
112 * Number of entries in the GDT table:
113 */
114#define GDT_ENTRIES 32
88 115
89#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE+0) 116/*
117 * Segment selector values corresponding to the above entries:
118 */
90 119
91#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE+1) 120#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
121#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
122#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
123#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
124#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)
92 125
93#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE+4) 126/* segment for calling fn: */
94#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE+5) 127#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32*8)
128/* code segment for BIOS: */
129#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16*8)
95 130
96#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE+6) 131/* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */
97#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE+11) 132#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == PNP_CS32)
98 133
99#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE+14) 134/* data segment for BIOS: */
100#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8) 135#define PNP_DS (GDT_ENTRY_PNPBIOS_DS*8)
136/* transfer data segment: */
137#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1*8)
138/* another data segment: */
139#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2*8)
101 140
102#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE+15)
103#ifdef CONFIG_SMP 141#ifdef CONFIG_SMP
104#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8) 142# define __KERNEL_PERCPU (GDT_ENTRY_PERCPU*8)
105#else 143#else
106#define __KERNEL_PERCPU 0 144# define __KERNEL_PERCPU 0
107#endif 145#endif
108 146
109#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE+16)
110#ifdef CONFIG_CC_STACKPROTECTOR 147#ifdef CONFIG_CC_STACKPROTECTOR
111#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8) 148# define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
112#else 149#else
113#define __KERNEL_STACK_CANARY 0 150# define __KERNEL_STACK_CANARY 0
114#endif 151#endif
115 152
116#define GDT_ENTRY_DOUBLEFAULT_TSS 31 153#else /* 64-bit: */
117
118/*
119 * The GDT has 32 entries
120 */
121#define GDT_ENTRIES 32
122 154
123/* The PnP BIOS entries in the GDT */ 155#include <asm/cache.h>
124#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0)
125#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1)
126#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2)
127#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3)
128#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4)
129
130/* The PnP BIOS selectors */
131#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */
132#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */
133#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */
134#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */
135#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */
136 156
157#define GDT_ENTRY_KERNEL32_CS 1
158#define GDT_ENTRY_KERNEL_CS 2
159#define GDT_ENTRY_KERNEL_DS 3
137 160
138/* 161/*
139 * Matching rules for certain types of segments. 162 * We cannot use the same code segment descriptor for user and kernel mode,
163 * not even in long flat mode, because of different DPL.
164 *
165 * GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes
166 * selectors:
167 *
168 * if returning to 32-bit userspace: cs = STAR.SYSRET_CS,
169 * if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16,
170 *
171 * ss = STAR.SYSRET_CS+8 (in either case)
172 *
173 * thus USER_DS should be between 32-bit and 64-bit code selectors:
140 */ 174 */
175#define GDT_ENTRY_DEFAULT_USER32_CS 4
176#define GDT_ENTRY_DEFAULT_USER_DS 5
177#define GDT_ENTRY_DEFAULT_USER_CS 6
141 178
142/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ 179/* Needs two entries */
143#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) 180#define GDT_ENTRY_TSS 8
144 181/* Needs two entries */
182#define GDT_ENTRY_LDT 10
145 183
146#else 184#define GDT_ENTRY_TLS_MIN 12
147#include <asm/cache.h> 185#define GDT_ENTRY_TLS_MAX 14
148
149#define GDT_ENTRY_KERNEL32_CS 1
150#define GDT_ENTRY_KERNEL_CS 2
151#define GDT_ENTRY_KERNEL_DS 3
152 186
153#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8) 187/* Abused to load per CPU data from limit */
188#define GDT_ENTRY_PER_CPU 15
154 189
155/* 190/*
156 * we cannot use the same code segment descriptor for user and kernel 191 * Number of entries in the GDT table:
157 * -- not even in the long flat mode, because of different DPL /kkeil
158 * The segment offset needs to contain a RPL. Grr. -AK
159 * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
160 */ 192 */
161#define GDT_ENTRY_DEFAULT_USER32_CS 4 193#define GDT_ENTRIES 16
162#define GDT_ENTRY_DEFAULT_USER_DS 5
163#define GDT_ENTRY_DEFAULT_USER_CS 6
164#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8+3)
165#define __USER32_DS __USER_DS
166
167#define GDT_ENTRY_TSS 8 /* needs two entries */
168#define GDT_ENTRY_LDT 10 /* needs two entries */
169#define GDT_ENTRY_TLS_MIN 12
170#define GDT_ENTRY_TLS_MAX 14
171
172#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */
173#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3)
174 194
175/* TLS indexes for 64bit - hardcoded in arch_prctl */ 195/*
176#define FS_TLS 0 196 * Segment selector values corresponding to the above entries:
177#define GS_TLS 1 197 *
178 198 * Note, selectors also need to have a correct RPL,
179#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3) 199 * expressed with the +3 value for user-space selectors:
180#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3) 200 */
181 201#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS*8)
182#define GDT_ENTRIES 16 202#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
203#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
204#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3)
205#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
206#define __USER32_DS __USER_DS
207#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
208#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3)
209
210/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
211#define FS_TLS 0
212#define GS_TLS 1
213
214#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
215#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
183 216
184#endif 217#endif
185 218
186#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
187#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
188#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8+3)
189#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8+3)
190#ifndef CONFIG_PARAVIRT 219#ifndef CONFIG_PARAVIRT
191#define get_kernel_rpl() 0 220# define get_kernel_rpl() 0
192#endif 221#endif
193 222
194#define IDT_ENTRIES 256 223#define IDT_ENTRIES 256
195#define NUM_EXCEPTION_VECTORS 32 224#define NUM_EXCEPTION_VECTORS 32
196/* Bitmask of exception vectors which push an error code on the stack */ 225
197#define EXCEPTION_ERRCODE_MASK 0x00027d00 226/* Bitmask of exception vectors which push an error code on the stack: */
198#define GDT_SIZE (GDT_ENTRIES * 8) 227#define EXCEPTION_ERRCODE_MASK 0x00027d00
199#define GDT_ENTRY_TLS_ENTRIES 3 228
200#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8) 229#define GDT_SIZE (GDT_ENTRIES*8)
230#define GDT_ENTRY_TLS_ENTRIES 3
231#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8)
201 232
202#ifdef __KERNEL__ 233#ifdef __KERNEL__
203#ifndef __ASSEMBLY__ 234#ifndef __ASSEMBLY__
235
204extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5]; 236extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
205#ifdef CONFIG_TRACING 237#ifdef CONFIG_TRACING
206#define trace_early_idt_handlers early_idt_handlers 238# define trace_early_idt_handlers early_idt_handlers
207#endif 239#endif
208 240
209/* 241/*
@@ -228,37 +260,30 @@ do { \
228} while (0) 260} while (0)
229 261
230/* 262/*
231 * Save a segment register away 263 * Save a segment register away:
232 */ 264 */
233#define savesegment(seg, value) \ 265#define savesegment(seg, value) \
234 asm("mov %%" #seg ",%0":"=r" (value) : : "memory") 266 asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
235 267
236/* 268/*
237 * x86_32 user gs accessors. 269 * x86-32 user GS accessors:
238 */ 270 */
239#ifdef CONFIG_X86_32 271#ifdef CONFIG_X86_32
240#ifdef CONFIG_X86_32_LAZY_GS 272# ifdef CONFIG_X86_32_LAZY_GS
241#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;}) 273# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
242#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v)) 274# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
243#define task_user_gs(tsk) ((tsk)->thread.gs) 275# define task_user_gs(tsk) ((tsk)->thread.gs)
244#define lazy_save_gs(v) savesegment(gs, (v)) 276# define lazy_save_gs(v) savesegment(gs, (v))
245#define lazy_load_gs(v) loadsegment(gs, (v)) 277# define lazy_load_gs(v) loadsegment(gs, (v))
246#else /* X86_32_LAZY_GS */ 278# else /* X86_32_LAZY_GS */
247#define get_user_gs(regs) (u16)((regs)->gs) 279# define get_user_gs(regs) (u16)((regs)->gs)
248#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0) 280# define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
249#define task_user_gs(tsk) (task_pt_regs(tsk)->gs) 281# define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
250#define lazy_save_gs(v) do { } while (0) 282# define lazy_save_gs(v) do { } while (0)
251#define lazy_load_gs(v) do { } while (0) 283# define lazy_load_gs(v) do { } while (0)
252#endif /* X86_32_LAZY_GS */ 284# endif /* X86_32_LAZY_GS */
253#endif /* X86_32 */ 285#endif /* X86_32 */
254 286
255static inline unsigned long get_limit(unsigned long segment)
256{
257 unsigned long __limit;
258 asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
259 return __limit + 1;
260}
261
262#endif /* !__ASSEMBLY__ */ 287#endif /* !__ASSEMBLY__ */
263#endif /* __KERNEL__ */ 288#endif /* __KERNEL__ */
264 289
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ff4e7b236e21..f69e06b283fb 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -66,6 +66,11 @@ static inline void x86_ce4100_early_setup(void) { }
66 */ 66 */
67extern struct boot_params boot_params; 67extern struct boot_params boot_params;
68 68
69static inline bool kaslr_enabled(void)
70{
71 return !!(boot_params.hdr.loadflags & KASLR_FLAG);
72}
73
69/* 74/*
70 * Do NOT EVER look at the BIOS memory size location. 75 * Do NOT EVER look at the BIOS memory size location.
71 * It does not work on many machines. 76 * It does not work on many machines.
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 9dfce4e0417d..6fe6b182c998 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -57,9 +57,9 @@ struct sigcontext {
57 unsigned long ip; 57 unsigned long ip;
58 unsigned long flags; 58 unsigned long flags;
59 unsigned short cs; 59 unsigned short cs;
60 unsigned short gs; 60 unsigned short __pad2; /* Was called gs, but was always zero. */
61 unsigned short fs; 61 unsigned short __pad1; /* Was called fs, but was always zero. */
62 unsigned short __pad0; 62 unsigned short ss;
63 unsigned long err; 63 unsigned long err;
64 unsigned long trapno; 64 unsigned long trapno;
65 unsigned long oldmask; 65 unsigned long oldmask;
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 7a958164088c..89db46752a8f 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -13,9 +13,7 @@
13 X86_EFLAGS_CF | X86_EFLAGS_RF) 13 X86_EFLAGS_CF | X86_EFLAGS_RF)
14 14
15void signal_fault(struct pt_regs *regs, void __user *frame, char *where); 15void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
16 16int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
17int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
18 unsigned long *pax);
19int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, 17int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
20 struct pt_regs *regs, unsigned long mask); 18 struct pt_regs *regs, unsigned long mask);
21 19
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 8d3120f4e270..ba665ebd17bb 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -27,23 +27,11 @@
27 27
28#ifdef CONFIG_X86_SMAP 28#ifdef CONFIG_X86_SMAP
29 29
30#define ASM_CLAC \ 30#define ASM_CLAC \
31 661: ASM_NOP3 ; \ 31 ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
32 .pushsection .altinstr_replacement, "ax" ; \ 32
33 662: __ASM_CLAC ; \ 33#define ASM_STAC \
34 .popsection ; \ 34 ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
35 .pushsection .altinstructions, "a" ; \
36 altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
37 .popsection
38
39#define ASM_STAC \
40 661: ASM_NOP3 ; \
41 .pushsection .altinstr_replacement, "ax" ; \
42 662: __ASM_STAC ; \
43 .popsection ; \
44 .pushsection .altinstructions, "a" ; \
45 altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
46 .popsection
47 35
48#else /* CONFIG_X86_SMAP */ 36#else /* CONFIG_X86_SMAP */
49 37
@@ -61,20 +49,20 @@
61static __always_inline void clac(void) 49static __always_inline void clac(void)
62{ 50{
63 /* Note: a barrier is implicit in alternative() */ 51 /* Note: a barrier is implicit in alternative() */
64 alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP); 52 alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
65} 53}
66 54
67static __always_inline void stac(void) 55static __always_inline void stac(void)
68{ 56{
69 /* Note: a barrier is implicit in alternative() */ 57 /* Note: a barrier is implicit in alternative() */
70 alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP); 58 alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
71} 59}
72 60
73/* These macros can be used in asm() statements */ 61/* These macros can be used in asm() statements */
74#define ASM_CLAC \ 62#define ASM_CLAC \
75 ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP) 63 ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
76#define ASM_STAC \ 64#define ASM_STAC \
77 ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP) 65 ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
78 66
79#else /* CONFIG_X86_SMAP */ 67#else /* CONFIG_X86_SMAP */
80 68
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 8cd1cc3bc835..17a8dced12da 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -150,12 +150,13 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
150} 150}
151 151
152void cpu_disable_common(void); 152void cpu_disable_common(void);
153void cpu_die_common(unsigned int cpu);
154void native_smp_prepare_boot_cpu(void); 153void native_smp_prepare_boot_cpu(void);
155void native_smp_prepare_cpus(unsigned int max_cpus); 154void native_smp_prepare_cpus(unsigned int max_cpus);
156void native_smp_cpus_done(unsigned int max_cpus); 155void native_smp_cpus_done(unsigned int max_cpus);
156void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
157int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); 157int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
158int native_cpu_disable(void); 158int native_cpu_disable(void);
159int common_cpu_die(unsigned int cpu);
159void native_cpu_die(unsigned int cpu); 160void native_cpu_die(unsigned int cpu);
160void native_play_dead(void); 161void native_play_dead(void);
161void play_dead_common(void); 162void play_dead_common(void);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 6a4b00fafb00..aeb4666e0c0a 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -4,6 +4,8 @@
4 4
5#ifdef __KERNEL__ 5#ifdef __KERNEL__
6 6
7#include <asm/nops.h>
8
7static inline void native_clts(void) 9static inline void native_clts(void)
8{ 10{
9 asm volatile("clts"); 11 asm volatile("clts");
@@ -199,6 +201,28 @@ static inline void clflushopt(volatile void *__p)
199 "+m" (*(volatile char __force *)__p)); 201 "+m" (*(volatile char __force *)__p));
200} 202}
201 203
204static inline void clwb(volatile void *__p)
205{
206 volatile struct { char x[64]; } *p = __p;
207
208 asm volatile(ALTERNATIVE_2(
209 ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
210 ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
211 X86_FEATURE_CLFLUSHOPT,
212 ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */
213 X86_FEATURE_CLWB)
214 : [p] "+m" (*p)
215 : [pax] "a" (p));
216}
217
218static inline void pcommit_sfence(void)
219{
220 alternative(ASM_NOP7,
221 ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
222 "sfence",
223 X86_FEATURE_PCOMMIT);
224}
225
202#define nop() asm volatile ("nop") 226#define nop() asm volatile ("nop")
203 227
204 228
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 1d4e4f279a32..ea2dbe82cba3 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -13,6 +13,33 @@
13#include <asm/types.h> 13#include <asm/types.h>
14 14
15/* 15/*
16 * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
17 * reserve at the top of the kernel stack. We do it because of a nasty
18 * 32-bit corner case. On x86_32, the hardware stack frame is
19 * variable-length. Except for vm86 mode, struct pt_regs assumes a
20 * maximum-length frame. If we enter from CPL 0, the top 8 bytes of
21 * pt_regs don't actually exist. Ordinarily this doesn't matter, but it
22 * does in at least one case:
23 *
24 * If we take an NMI early enough in SYSENTER, then we can end up with
25 * pt_regs that extends above sp0. On the way out, in the espfix code,
26 * we can read the saved SS value, but that value will be above sp0.
27 * Without this offset, that can result in a page fault. (We are
28 * careful that, in this case, the value we read doesn't matter.)
29 *
30 * In vm86 mode, the hardware frame is much longer still, but we neither
31 * access the extra members from NMI context, nor do we write such a
32 * frame at sp0 at all.
33 *
34 * x86_64 has a fixed-length stack frame.
35 */
36#ifdef CONFIG_X86_32
37# define TOP_OF_KERNEL_STACK_PADDING 8
38#else
39# define TOP_OF_KERNEL_STACK_PADDING 0
40#endif
41
42/*
16 * low level task data that entry.S needs immediate access to 43 * low level task data that entry.S needs immediate access to
17 * - this struct should fit entirely inside of one cache line 44 * - this struct should fit entirely inside of one cache line
18 * - this struct shares the supervisor stack pages 45 * - this struct shares the supervisor stack pages
@@ -145,7 +172,6 @@ struct thread_info {
145#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) 172#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
146 173
147#define STACK_WARN (THREAD_SIZE/8) 174#define STACK_WARN (THREAD_SIZE/8)
148#define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8))
149 175
150/* 176/*
151 * macros/functions for gaining access to the thread information structure 177 * macros/functions for gaining access to the thread information structure
@@ -158,10 +184,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
158 184
159static inline struct thread_info *current_thread_info(void) 185static inline struct thread_info *current_thread_info(void)
160{ 186{
161 struct thread_info *ti; 187 return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
162 ti = (void *)(this_cpu_read_stable(kernel_stack) +
163 KERNEL_STACK_OFFSET - THREAD_SIZE);
164 return ti;
165} 188}
166 189
167static inline unsigned long current_stack_pointer(void) 190static inline unsigned long current_stack_pointer(void)
@@ -177,16 +200,37 @@ static inline unsigned long current_stack_pointer(void)
177 200
178#else /* !__ASSEMBLY__ */ 201#else /* !__ASSEMBLY__ */
179 202
180/* how to get the thread information struct from ASM */ 203/* Load thread_info address into "reg" */
181#define GET_THREAD_INFO(reg) \ 204#define GET_THREAD_INFO(reg) \
182 _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ 205 _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
183 _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ; 206 _ASM_SUB $(THREAD_SIZE),reg ;
184 207
185/* 208/*
186 * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in 209 * ASM operand which evaluates to a 'thread_info' address of
187 * a certain register (to be used in assembler memory operands). 210 * the current task, if it is known that "reg" is exactly "off"
211 * bytes below the top of the stack currently.
212 *
213 * ( The kernel stack's size is known at build time, it is usually
214 * 2 or 4 pages, and the bottom of the kernel stack contains
215 * the thread_info structure. So to access the thread_info very
216 * quickly from assembly code we can calculate down from the
217 * top of the kernel stack to the bottom, using constant,
218 * build-time calculations only. )
219 *
220 * For example, to fetch the current thread_info->flags value into %eax
221 * on x86-64 defconfig kernels, in syscall entry code where RSP is
222 * currently at exactly SIZEOF_PTREGS bytes away from the top of the
223 * stack:
224 *
225 * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
226 *
227 * will translate to:
228 *
229 * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax
230 *
231 * which is below the current RSP by almost 16K.
188 */ 232 */
189#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg) 233#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
190 234
191#endif 235#endif
192 236
@@ -236,6 +280,16 @@ static inline bool is_ia32_task(void)
236#endif 280#endif
237 return false; 281 return false;
238} 282}
283
284/*
285 * Force syscall return via IRET by making it look as if there was
286 * some work pending. IRET is our most capable (but slowest) syscall
287 * return path, which is able to restore modified SS, CS and certain
288 * EFLAGS values that other (fast) syscall return instructions
289 * are not able to restore properly.
290 */
291#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
292
239#endif /* !__ASSEMBLY__ */ 293#endif /* !__ASSEMBLY__ */
240 294
241#ifndef __ASSEMBLY__ 295#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 12a26b979bf1..f2f9b39b274a 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -231,6 +231,6 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
231} 231}
232 232
233unsigned long 233unsigned long
234copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest); 234copy_user_handle_tail(char *to, char *from, unsigned len);
235 235
236#endif /* _ASM_X86_UACCESS_64_H */ 236#endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 225b0988043a..ab456dc233b5 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -15,6 +15,7 @@
15 15
16/* loadflags */ 16/* loadflags */
17#define LOADED_HIGH (1<<0) 17#define LOADED_HIGH (1<<0)
18#define KASLR_FLAG (1<<1)
18#define QUIET_FLAG (1<<5) 19#define QUIET_FLAG (1<<5)
19#define KEEP_SEGMENTS (1<<6) 20#define KEEP_SEGMENTS (1<<6)
20#define CAN_USE_HEAP (1<<7) 21#define CAN_USE_HEAP (1<<7)
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 3ce079136c11..1a4eae695ca8 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -74,6 +74,24 @@
74#define MSR_IA32_PERF_CAPABILITIES 0x00000345 74#define MSR_IA32_PERF_CAPABILITIES 0x00000345
75#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 75#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
76 76
77#define MSR_IA32_RTIT_CTL 0x00000570
78#define RTIT_CTL_TRACEEN BIT(0)
79#define RTIT_CTL_OS BIT(2)
80#define RTIT_CTL_USR BIT(3)
81#define RTIT_CTL_CR3EN BIT(7)
82#define RTIT_CTL_TOPA BIT(8)
83#define RTIT_CTL_TSC_EN BIT(10)
84#define RTIT_CTL_DISRETC BIT(11)
85#define RTIT_CTL_BRANCH_EN BIT(13)
86#define MSR_IA32_RTIT_STATUS 0x00000571
87#define RTIT_STATUS_CONTEXTEN BIT(1)
88#define RTIT_STATUS_TRIGGEREN BIT(2)
89#define RTIT_STATUS_ERROR BIT(4)
90#define RTIT_STATUS_STOPPED BIT(5)
91#define MSR_IA32_RTIT_CR3_MATCH 0x00000572
92#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560
93#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561
94
77#define MSR_MTRRfix64K_00000 0x00000250 95#define MSR_MTRRfix64K_00000 0x00000250
78#define MSR_MTRRfix16K_80000 0x00000258 96#define MSR_MTRRfix16K_80000 0x00000258
79#define MSR_MTRRfix16K_A0000 0x00000259 97#define MSR_MTRRfix16K_A0000 0x00000259
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 7b0a55a88851..580aee3072e0 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -25,13 +25,17 @@
25#else /* __i386__ */ 25#else /* __i386__ */
26 26
27#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) 27#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
28/*
29 * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
30 * unless syscall needs a complete, fully filled "struct pt_regs".
31 */
28#define R15 0 32#define R15 0
29#define R14 8 33#define R14 8
30#define R13 16 34#define R13 16
31#define R12 24 35#define R12 24
32#define RBP 32 36#define RBP 32
33#define RBX 40 37#define RBX 40
34/* arguments: interrupts/non tracing syscalls only save up to here*/ 38/* These regs are callee-clobbered. Always saved on kernel entry. */
35#define R11 48 39#define R11 48
36#define R10 56 40#define R10 56
37#define R9 64 41#define R9 64
@@ -41,15 +45,17 @@
41#define RDX 96 45#define RDX 96
42#define RSI 104 46#define RSI 104
43#define RDI 112 47#define RDI 112
44#define ORIG_RAX 120 /* = ERROR */ 48/*
45/* end of arguments */ 49 * On syscall entry, this is syscall#. On CPU exception, this is error code.
46/* cpu exception frame or undefined in case of fast syscall. */ 50 * On hw interrupt, it's IRQ number:
51 */
52#define ORIG_RAX 120
53/* Return frame for iretq */
47#define RIP 128 54#define RIP 128
48#define CS 136 55#define CS 136
49#define EFLAGS 144 56#define EFLAGS 144
50#define RSP 152 57#define RSP 152
51#define SS 160 58#define SS 160
52#define ARGOFFSET R11
53#endif /* __ASSEMBLY__ */ 59#endif /* __ASSEMBLY__ */
54 60
55/* top of stack page */ 61/* top of stack page */
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index ac4b9aa4d999..bc16115af39b 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -41,13 +41,17 @@ struct pt_regs {
41#ifndef __KERNEL__ 41#ifndef __KERNEL__
42 42
43struct pt_regs { 43struct pt_regs {
44/*
45 * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
46 * unless syscall needs a complete, fully filled "struct pt_regs".
47 */
44 unsigned long r15; 48 unsigned long r15;
45 unsigned long r14; 49 unsigned long r14;
46 unsigned long r13; 50 unsigned long r13;
47 unsigned long r12; 51 unsigned long r12;
48 unsigned long rbp; 52 unsigned long rbp;
49 unsigned long rbx; 53 unsigned long rbx;
50/* arguments: non interrupts/non tracing syscalls only save up to here*/ 54/* These regs are callee-clobbered. Always saved on kernel entry. */
51 unsigned long r11; 55 unsigned long r11;
52 unsigned long r10; 56 unsigned long r10;
53 unsigned long r9; 57 unsigned long r9;
@@ -57,9 +61,12 @@ struct pt_regs {
57 unsigned long rdx; 61 unsigned long rdx;
58 unsigned long rsi; 62 unsigned long rsi;
59 unsigned long rdi; 63 unsigned long rdi;
64/*
65 * On syscall entry, this is syscall#. On CPU exception, this is error code.
66 * On hw interrupt, it's IRQ number:
67 */
60 unsigned long orig_rax; 68 unsigned long orig_rax;
61/* end of arguments */ 69/* Return frame for iretq */
62/* cpu exception frame or undefined */
63 unsigned long rip; 70 unsigned long rip;
64 unsigned long cs; 71 unsigned long cs;
65 unsigned long eflags; 72 unsigned long eflags;
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index d8b9f9081e86..16dc4e8a2cd3 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -177,9 +177,24 @@ struct sigcontext {
177 __u64 rip; 177 __u64 rip;
178 __u64 eflags; /* RFLAGS */ 178 __u64 eflags; /* RFLAGS */
179 __u16 cs; 179 __u16 cs;
180 __u16 gs; 180
181 __u16 fs; 181 /*
182 __u16 __pad0; 182 * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
183 * Linux saved and restored fs and gs in these slots. This
184 * was counterproductive, as fsbase and gsbase were never
185 * saved, so arch_prctl was presumably unreliable.
186 *
187 * If these slots are ever needed for any other purpose, there
188 * is some risk that very old 64-bit binaries could get
189 * confused. I doubt that many such binaries still work,
190 * though, since the same patch in 2.5.64 also removed the
191 * 64-bit set_thread_area syscall, so it appears that there is
192 * no TLS API that works in both pre- and post-2.5.64 kernels.
193 */
194 __u16 __pad2; /* Was gs. */
195 __u16 __pad1; /* Was fs. */
196
197 __u16 ss;
183 __u64 err; 198 __u64 err;
184 __u64 trapno; 199 __u64 trapno;
185 __u64 oldmask; 200 __u64 oldmask;
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index c5f1a1deb91a..1fe92181ee9e 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
67#define EXIT_REASON_EPT_VIOLATION 48 67#define EXIT_REASON_EPT_VIOLATION 48
68#define EXIT_REASON_EPT_MISCONFIG 49 68#define EXIT_REASON_EPT_MISCONFIG 49
69#define EXIT_REASON_INVEPT 50 69#define EXIT_REASON_INVEPT 50
70#define EXIT_REASON_RDTSCP 51
70#define EXIT_REASON_PREEMPTION_TIMER 52 71#define EXIT_REASON_PREEMPTION_TIMER 52
71#define EXIT_REASON_INVVPID 53 72#define EXIT_REASON_INVVPID 53
72#define EXIT_REASON_WBINVD 54 73#define EXIT_REASON_WBINVD 54