diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-03 23:50:26 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-03 23:50:26 -0500 |
commit | ce4d72fac16a9540452957b526443b6080030bff (patch) | |
tree | 650abdc13edbc6150332c854a7467f06c2bf5ad9 /arch/x86/include | |
parent | 0f25f2c1b18f7e47279ec2cf1d24c11c3108873b (diff) | |
parent | 158ecc39185b885420e5136b803b29be2bbec7fb (diff) |
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fpu changes from Ingo Molnar:
"There are two main areas of changes:
- Rework of the extended FPU state code to robustify the kernel's
usage of cpuid provided xstate sizes - and related changes (Dave
Hansen)"
- math emulation enhancements: new modern FPU instructions support,
with testcases, plus cleanups (Denys Vlasnko)"
* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits)
x86/fpu: Fixup uninitialized feature_name warning
x86/fpu/math-emu: Add support for FISTTP instructions
x86/fpu/math-emu, selftests: Add test for FISTTP instructions
x86/fpu/math-emu: Add support for FCMOVcc insns
x86/fpu/math-emu: Add support for F[U]COMI[P] insns
x86/fpu/math-emu: Remove define layer for undocumented opcodes
x86/fpu/math-emu, selftests: Add tests for FCMOV and FCOMI insns
x86/fpu/math-emu: Remove !NO_UNDOC_CODE
x86/fpu: Check CPU-provided sizes against struct declarations
x86/fpu: Check to ensure increasing-offset xstate offsets
x86/fpu: Correct and check XSAVE xstate size calculations
x86/fpu: Add C structures for AVX-512 state components
x86/fpu: Rework YMM definition
x86/fpu/mpx: Rework MPX 'xstate' types
x86/fpu: Add xfeature_enabled() helper instead of test_bit()
x86/fpu: Remove 'xfeature_nr'
x86/fpu: Rework XSTATE_* macros to remove magic '2'
x86/fpu: Rename XFEATURES_NR_MAX
x86/fpu: Rename XSAVE macros
x86/fpu: Remove partial LWP support definitions
...
Diffstat (limited to 'arch/x86/include')
-rw-r--r-- | arch/x86/include/asm/fpu/types.h | 148 | ||||
-rw-r--r-- | arch/x86/include/asm/fpu/xstate.h | 15 | ||||
-rw-r--r-- | arch/x86/include/asm/trace/mpx.h | 7 |
3 files changed, 117 insertions, 53 deletions
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index c49c5173158e..1c6f6ac52ad0 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h | |||
@@ -95,63 +95,122 @@ struct swregs_state { | |||
95 | /* | 95 | /* |
96 | * List of XSAVE features Linux knows about: | 96 | * List of XSAVE features Linux knows about: |
97 | */ | 97 | */ |
98 | enum xfeature_bit { | 98 | enum xfeature { |
99 | XSTATE_BIT_FP, | 99 | XFEATURE_FP, |
100 | XSTATE_BIT_SSE, | 100 | XFEATURE_SSE, |
101 | XSTATE_BIT_YMM, | 101 | /* |
102 | XSTATE_BIT_BNDREGS, | 102 | * Values above here are "legacy states". |
103 | XSTATE_BIT_BNDCSR, | 103 | * Those below are "extended states". |
104 | XSTATE_BIT_OPMASK, | 104 | */ |
105 | XSTATE_BIT_ZMM_Hi256, | 105 | XFEATURE_YMM, |
106 | XSTATE_BIT_Hi16_ZMM, | 106 | XFEATURE_BNDREGS, |
107 | 107 | XFEATURE_BNDCSR, | |
108 | XFEATURES_NR_MAX, | 108 | XFEATURE_OPMASK, |
109 | XFEATURE_ZMM_Hi256, | ||
110 | XFEATURE_Hi16_ZMM, | ||
111 | |||
112 | XFEATURE_MAX, | ||
109 | }; | 113 | }; |
110 | 114 | ||
111 | #define XSTATE_FP (1 << XSTATE_BIT_FP) | 115 | #define XFEATURE_MASK_FP (1 << XFEATURE_FP) |
112 | #define XSTATE_SSE (1 << XSTATE_BIT_SSE) | 116 | #define XFEATURE_MASK_SSE (1 << XFEATURE_SSE) |
113 | #define XSTATE_YMM (1 << XSTATE_BIT_YMM) | 117 | #define XFEATURE_MASK_YMM (1 << XFEATURE_YMM) |
114 | #define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS) | 118 | #define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS) |
115 | #define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR) | 119 | #define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR) |
116 | #define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK) | 120 | #define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK) |
117 | #define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256) | 121 | #define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256) |
118 | #define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM) | 122 | #define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM) |
123 | |||
124 | #define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE) | ||
125 | #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \ | ||
126 | | XFEATURE_MASK_ZMM_Hi256 \ | ||
127 | | XFEATURE_MASK_Hi16_ZMM) | ||
128 | |||
129 | #define FIRST_EXTENDED_XFEATURE XFEATURE_YMM | ||
119 | 130 | ||
120 | #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) | 131 | struct reg_128_bit { |
121 | #define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) | 132 | u8 regbytes[128/8]; |
133 | }; | ||
134 | struct reg_256_bit { | ||
135 | u8 regbytes[256/8]; | ||
136 | }; | ||
137 | struct reg_512_bit { | ||
138 | u8 regbytes[512/8]; | ||
139 | }; | ||
122 | 140 | ||
123 | /* | 141 | /* |
142 | * State component 2: | ||
143 | * | ||
124 | * There are 16x 256-bit AVX registers named YMM0-YMM15. | 144 | * There are 16x 256-bit AVX registers named YMM0-YMM15. |
125 | * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) | 145 | * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) |
126 | * and are stored in 'struct fxregs_state::xmm_space[]'. | 146 | * and are stored in 'struct fxregs_state::xmm_space[]' in the |
147 | * "legacy" area. | ||
127 | * | 148 | * |
128 | * The high 128 bits are stored here: | 149 | * The high 128 bits are stored here. |
129 | * 16x 128 bits == 256 bytes. | ||
130 | */ | 150 | */ |
131 | struct ymmh_struct { | 151 | struct ymmh_struct { |
132 | u8 ymmh_space[256]; | 152 | struct reg_128_bit hi_ymm[16]; |
133 | }; | 153 | } __packed; |
134 | |||
135 | /* We don't support LWP yet: */ | ||
136 | struct lwp_struct { | ||
137 | u8 reserved[128]; | ||
138 | }; | ||
139 | 154 | ||
140 | /* Intel MPX support: */ | 155 | /* Intel MPX support: */ |
141 | struct bndreg { | 156 | |
157 | struct mpx_bndreg { | ||
142 | u64 lower_bound; | 158 | u64 lower_bound; |
143 | u64 upper_bound; | 159 | u64 upper_bound; |
144 | } __packed; | 160 | } __packed; |
161 | /* | ||
162 | * State component 3 is used for the 4 128-bit bounds registers | ||
163 | */ | ||
164 | struct mpx_bndreg_state { | ||
165 | struct mpx_bndreg bndreg[4]; | ||
166 | } __packed; | ||
145 | 167 | ||
146 | struct bndcsr { | 168 | /* |
169 | * State component 4 is used for the 64-bit user-mode MPX | ||
170 | * configuration register BNDCFGU and the 64-bit MPX status | ||
171 | * register BNDSTATUS. We call the pair "BNDCSR". | ||
172 | */ | ||
173 | struct mpx_bndcsr { | ||
147 | u64 bndcfgu; | 174 | u64 bndcfgu; |
148 | u64 bndstatus; | 175 | u64 bndstatus; |
149 | } __packed; | 176 | } __packed; |
150 | 177 | ||
151 | struct mpx_struct { | 178 | /* |
152 | struct bndreg bndreg[4]; | 179 | * The BNDCSR state is padded out to be 64-bytes in size. |
153 | struct bndcsr bndcsr; | 180 | */ |
154 | }; | 181 | struct mpx_bndcsr_state { |
182 | union { | ||
183 | struct mpx_bndcsr bndcsr; | ||
184 | u8 pad_to_64_bytes[64]; | ||
185 | }; | ||
186 | } __packed; | ||
187 | |||
188 | /* AVX-512 Components: */ | ||
189 | |||
190 | /* | ||
191 | * State component 5 is used for the 8 64-bit opmask registers | ||
192 | * k0-k7 (opmask state). | ||
193 | */ | ||
194 | struct avx_512_opmask_state { | ||
195 | u64 opmask_reg[8]; | ||
196 | } __packed; | ||
197 | |||
198 | /* | ||
199 | * State component 6 is used for the upper 256 bits of the | ||
200 | * registers ZMM0-ZMM15. These 16 256-bit values are denoted | ||
201 | * ZMM0_H-ZMM15_H (ZMM_Hi256 state). | ||
202 | */ | ||
203 | struct avx_512_zmm_uppers_state { | ||
204 | struct reg_256_bit zmm_upper[16]; | ||
205 | } __packed; | ||
206 | |||
207 | /* | ||
208 | * State component 7 is used for the 16 512-bit registers | ||
209 | * ZMM16-ZMM31 (Hi16_ZMM state). | ||
210 | */ | ||
211 | struct avx_512_hi16_state { | ||
212 | struct reg_512_bit hi16_zmm[16]; | ||
213 | } __packed; | ||
155 | 214 | ||
156 | struct xstate_header { | 215 | struct xstate_header { |
157 | u64 xfeatures; | 216 | u64 xfeatures; |
@@ -159,22 +218,19 @@ struct xstate_header { | |||
159 | u64 reserved[6]; | 218 | u64 reserved[6]; |
160 | } __attribute__((packed)); | 219 | } __attribute__((packed)); |
161 | 220 | ||
162 | /* New processor state extensions should be added here: */ | ||
163 | #define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \ | ||
164 | sizeof(struct lwp_struct) + \ | ||
165 | sizeof(struct mpx_struct) ) | ||
166 | /* | 221 | /* |
167 | * This is our most modern FPU state format, as saved by the XSAVE | 222 | * This is our most modern FPU state format, as saved by the XSAVE |
168 | * and restored by the XRSTOR instructions. | 223 | * and restored by the XRSTOR instructions. |
169 | * | 224 | * |
170 | * It consists of a legacy fxregs portion, an xstate header and | 225 | * It consists of a legacy fxregs portion, an xstate header and |
171 | * subsequent fixed size areas as defined by the xstate header. | 226 | * subsequent areas as defined by the xstate header. Not all CPUs |
172 | * Not all CPUs support all the extensions. | 227 | * support all the extensions, so the size of the extended area |
228 | * can vary quite a bit between CPUs. | ||
173 | */ | 229 | */ |
174 | struct xregs_state { | 230 | struct xregs_state { |
175 | struct fxregs_state i387; | 231 | struct fxregs_state i387; |
176 | struct xstate_header header; | 232 | struct xstate_header header; |
177 | u8 __reserved[XSTATE_RESERVE]; | 233 | u8 extended_state_area[0]; |
178 | } __attribute__ ((packed, aligned (64))); | 234 | } __attribute__ ((packed, aligned (64))); |
179 | 235 | ||
180 | /* | 236 | /* |
@@ -182,7 +238,9 @@ struct xregs_state { | |||
182 | * put together, so that we can pick the right one runtime. | 238 | * put together, so that we can pick the right one runtime. |
183 | * | 239 | * |
184 | * The size of the structure is determined by the largest | 240 | * The size of the structure is determined by the largest |
185 | * member - which is the xsave area: | 241 | * member - which is the xsave area. The padding is there |
242 | * to ensure that statically-allocated task_structs (just | ||
243 | * the init_task today) have enough space. | ||
186 | */ | 244 | */ |
187 | union fpregs_state { | 245 | union fpregs_state { |
188 | struct fregs_state fsave; | 246 | struct fregs_state fsave; |
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 4656b25bb9a7..3a6c89b70307 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h | |||
@@ -6,7 +6,7 @@ | |||
6 | #include <linux/uaccess.h> | 6 | #include <linux/uaccess.h> |
7 | 7 | ||
8 | /* Bit 63 of XCR0 is reserved for future expansion */ | 8 | /* Bit 63 of XCR0 is reserved for future expansion */ |
9 | #define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) | 9 | #define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63))) |
10 | 10 | ||
11 | #define XSTATE_CPUID 0x0000000d | 11 | #define XSTATE_CPUID 0x0000000d |
12 | 12 | ||
@@ -19,14 +19,18 @@ | |||
19 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) | 19 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) |
20 | 20 | ||
21 | /* Supported features which support lazy state saving */ | 21 | /* Supported features which support lazy state saving */ |
22 | #define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ | 22 | #define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ |
23 | | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) | 23 | XFEATURE_MASK_SSE | \ |
24 | XFEATURE_MASK_YMM | \ | ||
25 | XFEATURE_MASK_OPMASK | \ | ||
26 | XFEATURE_MASK_ZMM_Hi256 | \ | ||
27 | XFEATURE_MASK_Hi16_ZMM) | ||
24 | 28 | ||
25 | /* Supported features which require eager state saving */ | 29 | /* Supported features which require eager state saving */ |
26 | #define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) | 30 | #define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR) |
27 | 31 | ||
28 | /* All currently supported features */ | 32 | /* All currently supported features */ |
29 | #define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER) | 33 | #define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER) |
30 | 34 | ||
31 | #ifdef CONFIG_X86_64 | 35 | #ifdef CONFIG_X86_64 |
32 | #define REX_PREFIX "0x48, " | 36 | #define REX_PREFIX "0x48, " |
@@ -40,6 +44,7 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; | |||
40 | 44 | ||
41 | extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); | 45 | extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); |
42 | 46 | ||
47 | void fpu__xstate_clear_all_cpu_caps(void); | ||
43 | void *get_xsave_addr(struct xregs_state *xsave, int xstate); | 48 | void *get_xsave_addr(struct xregs_state *xsave, int xstate); |
44 | const void *get_xsave_field_ptr(int xstate_field); | 49 | const void *get_xsave_field_ptr(int xstate_field); |
45 | 50 | ||
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h index 173dd3ba108c..0f492fc50bce 100644 --- a/arch/x86/include/asm/trace/mpx.h +++ b/arch/x86/include/asm/trace/mpx.h | |||
@@ -11,7 +11,7 @@ | |||
11 | TRACE_EVENT(mpx_bounds_register_exception, | 11 | TRACE_EVENT(mpx_bounds_register_exception, |
12 | 12 | ||
13 | TP_PROTO(void *addr_referenced, | 13 | TP_PROTO(void *addr_referenced, |
14 | const struct bndreg *bndreg), | 14 | const struct mpx_bndreg *bndreg), |
15 | TP_ARGS(addr_referenced, bndreg), | 15 | TP_ARGS(addr_referenced, bndreg), |
16 | 16 | ||
17 | TP_STRUCT__entry( | 17 | TP_STRUCT__entry( |
@@ -44,7 +44,7 @@ TRACE_EVENT(mpx_bounds_register_exception, | |||
44 | 44 | ||
45 | TRACE_EVENT(bounds_exception_mpx, | 45 | TRACE_EVENT(bounds_exception_mpx, |
46 | 46 | ||
47 | TP_PROTO(const struct bndcsr *bndcsr), | 47 | TP_PROTO(const struct mpx_bndcsr *bndcsr), |
48 | TP_ARGS(bndcsr), | 48 | TP_ARGS(bndcsr), |
49 | 49 | ||
50 | TP_STRUCT__entry( | 50 | TP_STRUCT__entry( |
@@ -116,7 +116,8 @@ TRACE_EVENT(mpx_new_bounds_table, | |||
116 | /* | 116 | /* |
117 | * This gets used outside of MPX-specific code, so we need a stub. | 117 | * This gets used outside of MPX-specific code, so we need a stub. |
118 | */ | 118 | */ |
119 | static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr) | 119 | static inline |
120 | void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr) | ||
120 | { | 121 | { |
121 | } | 122 | } |
122 | 123 | ||