diff options
33 files changed, 1374 insertions, 243 deletions
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 4c65c70e628b..d84456924563 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c | |||
@@ -567,7 +567,8 @@ static int __init camellia_aesni_init(void) | |||
567 | return -ENODEV; | 567 | return -ENODEV; |
568 | } | 568 | } |
569 | 569 | ||
570 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { | 570 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, |
571 | &feature_name)) { | ||
571 | pr_info("CPU feature '%s' is not supported.\n", feature_name); | 572 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
572 | return -ENODEV; | 573 | return -ENODEV; |
573 | } | 574 | } |
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index bacaa13acac5..93d8f295784e 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c | |||
@@ -559,7 +559,8 @@ static int __init camellia_aesni_init(void) | |||
559 | return -ENODEV; | 559 | return -ENODEV; |
560 | } | 560 | } |
561 | 561 | ||
562 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { | 562 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, |
563 | &feature_name)) { | ||
563 | pr_info("CPU feature '%s' is not supported.\n", feature_name); | 564 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
564 | return -ENODEV; | 565 | return -ENODEV; |
565 | } | 566 | } |
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index be00aa48b2b5..8648158f3916 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c | |||
@@ -469,7 +469,8 @@ static int __init cast5_init(void) | |||
469 | { | 469 | { |
470 | const char *feature_name; | 470 | const char *feature_name; |
471 | 471 | ||
472 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { | 472 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, |
473 | &feature_name)) { | ||
473 | pr_info("CPU feature '%s' is not supported.\n", feature_name); | 474 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
474 | return -ENODEV; | 475 | return -ENODEV; |
475 | } | 476 | } |
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 5dbba7224221..fca459578c35 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c | |||
@@ -591,7 +591,8 @@ static int __init cast6_init(void) | |||
591 | { | 591 | { |
592 | const char *feature_name; | 592 | const char *feature_name; |
593 | 593 | ||
594 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { | 594 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, |
595 | &feature_name)) { | ||
595 | pr_info("CPU feature '%s' is not supported.\n", feature_name); | 596 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
596 | return -ENODEV; | 597 | return -ENODEV; |
597 | } | 598 | } |
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index effe2160b7c5..722bacea040e 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c | |||
@@ -130,7 +130,7 @@ static int __init chacha20_simd_mod_init(void) | |||
130 | 130 | ||
131 | #ifdef CONFIG_AS_AVX2 | 131 | #ifdef CONFIG_AS_AVX2 |
132 | chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 && | 132 | chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 && |
133 | cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL); | 133 | cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); |
134 | #endif | 134 | #endif |
135 | return crypto_register_alg(&alg); | 135 | return crypto_register_alg(&alg); |
136 | } | 136 | } |
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index f7170d764f32..4264a3d59589 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c | |||
@@ -184,7 +184,7 @@ static int __init poly1305_simd_mod_init(void) | |||
184 | 184 | ||
185 | #ifdef CONFIG_AS_AVX2 | 185 | #ifdef CONFIG_AS_AVX2 |
186 | poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 && | 186 | poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 && |
187 | cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL); | 187 | cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); |
188 | alg.descsize = sizeof(struct poly1305_simd_desc_ctx); | 188 | alg.descsize = sizeof(struct poly1305_simd_desc_ctx); |
189 | if (poly1305_use_avx2) | 189 | if (poly1305_use_avx2) |
190 | alg.descsize += 10 * sizeof(u32); | 190 | alg.descsize += 10 * sizeof(u32); |
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 7d838dc4d888..6d198342e2de 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c | |||
@@ -542,7 +542,8 @@ static int __init init(void) | |||
542 | pr_info("AVX2 instructions are not detected.\n"); | 542 | pr_info("AVX2 instructions are not detected.\n"); |
543 | return -ENODEV; | 543 | return -ENODEV; |
544 | } | 544 | } |
545 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { | 545 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, |
546 | &feature_name)) { | ||
546 | pr_info("CPU feature '%s' is not supported.\n", feature_name); | 547 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
547 | return -ENODEV; | 548 | return -ENODEV; |
548 | } | 549 | } |
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index da7dafc9b16d..5dc37026c7ce 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c | |||
@@ -597,7 +597,8 @@ static int __init serpent_init(void) | |||
597 | { | 597 | { |
598 | const char *feature_name; | 598 | const char *feature_name; |
599 | 599 | ||
600 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { | 600 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, |
601 | &feature_name)) { | ||
601 | pr_info("CPU feature '%s' is not supported.\n", feature_name); | 602 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
602 | return -ENODEV; | 603 | return -ENODEV; |
603 | } | 604 | } |
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 7c48e8b20848..00212c32d4db 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c | |||
@@ -121,7 +121,7 @@ static struct shash_alg alg = { | |||
121 | #ifdef CONFIG_AS_AVX | 121 | #ifdef CONFIG_AS_AVX |
122 | static bool __init avx_usable(void) | 122 | static bool __init avx_usable(void) |
123 | { | 123 | { |
124 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { | 124 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { |
125 | if (cpu_has_avx) | 125 | if (cpu_has_avx) |
126 | pr_info("AVX detected but unusable.\n"); | 126 | pr_info("AVX detected but unusable.\n"); |
127 | return false; | 127 | return false; |
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index f8097fc0d1d1..0e0e85aea634 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c | |||
@@ -130,7 +130,7 @@ static struct shash_alg algs[] = { { | |||
130 | #ifdef CONFIG_AS_AVX | 130 | #ifdef CONFIG_AS_AVX |
131 | static bool __init avx_usable(void) | 131 | static bool __init avx_usable(void) |
132 | { | 132 | { |
133 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { | 133 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { |
134 | if (cpu_has_avx) | 134 | if (cpu_has_avx) |
135 | pr_info("AVX detected but unusable.\n"); | 135 | pr_info("AVX detected but unusable.\n"); |
136 | return false; | 136 | return false; |
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 2edad7b81870..0c8c38c101ac 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c | |||
@@ -129,7 +129,7 @@ static struct shash_alg algs[] = { { | |||
129 | #ifdef CONFIG_AS_AVX | 129 | #ifdef CONFIG_AS_AVX |
130 | static bool __init avx_usable(void) | 130 | static bool __init avx_usable(void) |
131 | { | 131 | { |
132 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { | 132 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { |
133 | if (cpu_has_avx) | 133 | if (cpu_has_avx) |
134 | pr_info("AVX detected but unusable.\n"); | 134 | pr_info("AVX detected but unusable.\n"); |
135 | return false; | 135 | return false; |
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index c2bd0ce718ee..b7a3904b953c 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c | |||
@@ -558,7 +558,7 @@ static int __init twofish_init(void) | |||
558 | { | 558 | { |
559 | const char *feature_name; | 559 | const char *feature_name; |
560 | 560 | ||
561 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { | 561 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, &feature_name)) { |
562 | pr_info("CPU feature '%s' is not supported.\n", feature_name); | 562 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
563 | return -ENODEV; | 563 | return -ENODEV; |
564 | } | 564 | } |
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index c49c5173158e..1c6f6ac52ad0 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h | |||
@@ -95,63 +95,122 @@ struct swregs_state { | |||
95 | /* | 95 | /* |
96 | * List of XSAVE features Linux knows about: | 96 | * List of XSAVE features Linux knows about: |
97 | */ | 97 | */ |
98 | enum xfeature_bit { | 98 | enum xfeature { |
99 | XSTATE_BIT_FP, | 99 | XFEATURE_FP, |
100 | XSTATE_BIT_SSE, | 100 | XFEATURE_SSE, |
101 | XSTATE_BIT_YMM, | 101 | /* |
102 | XSTATE_BIT_BNDREGS, | 102 | * Values above here are "legacy states". |
103 | XSTATE_BIT_BNDCSR, | 103 | * Those below are "extended states". |
104 | XSTATE_BIT_OPMASK, | 104 | */ |
105 | XSTATE_BIT_ZMM_Hi256, | 105 | XFEATURE_YMM, |
106 | XSTATE_BIT_Hi16_ZMM, | 106 | XFEATURE_BNDREGS, |
107 | 107 | XFEATURE_BNDCSR, | |
108 | XFEATURES_NR_MAX, | 108 | XFEATURE_OPMASK, |
109 | XFEATURE_ZMM_Hi256, | ||
110 | XFEATURE_Hi16_ZMM, | ||
111 | |||
112 | XFEATURE_MAX, | ||
109 | }; | 113 | }; |
110 | 114 | ||
111 | #define XSTATE_FP (1 << XSTATE_BIT_FP) | 115 | #define XFEATURE_MASK_FP (1 << XFEATURE_FP) |
112 | #define XSTATE_SSE (1 << XSTATE_BIT_SSE) | 116 | #define XFEATURE_MASK_SSE (1 << XFEATURE_SSE) |
113 | #define XSTATE_YMM (1 << XSTATE_BIT_YMM) | 117 | #define XFEATURE_MASK_YMM (1 << XFEATURE_YMM) |
114 | #define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS) | 118 | #define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS) |
115 | #define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR) | 119 | #define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR) |
116 | #define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK) | 120 | #define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK) |
117 | #define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256) | 121 | #define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256) |
118 | #define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM) | 122 | #define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM) |
123 | |||
124 | #define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE) | ||
125 | #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \ | ||
126 | | XFEATURE_MASK_ZMM_Hi256 \ | ||
127 | | XFEATURE_MASK_Hi16_ZMM) | ||
128 | |||
129 | #define FIRST_EXTENDED_XFEATURE XFEATURE_YMM | ||
119 | 130 | ||
120 | #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) | 131 | struct reg_128_bit { |
121 | #define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) | 132 | u8 regbytes[128/8]; |
133 | }; | ||
134 | struct reg_256_bit { | ||
135 | u8 regbytes[256/8]; | ||
136 | }; | ||
137 | struct reg_512_bit { | ||
138 | u8 regbytes[512/8]; | ||
139 | }; | ||
122 | 140 | ||
123 | /* | 141 | /* |
142 | * State component 2: | ||
143 | * | ||
124 | * There are 16x 256-bit AVX registers named YMM0-YMM15. | 144 | * There are 16x 256-bit AVX registers named YMM0-YMM15. |
125 | * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) | 145 | * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) |
126 | * and are stored in 'struct fxregs_state::xmm_space[]'. | 146 | * and are stored in 'struct fxregs_state::xmm_space[]' in the |
147 | * "legacy" area. | ||
127 | * | 148 | * |
128 | * The high 128 bits are stored here: | 149 | * The high 128 bits are stored here. |
129 | * 16x 128 bits == 256 bytes. | ||
130 | */ | 150 | */ |
131 | struct ymmh_struct { | 151 | struct ymmh_struct { |
132 | u8 ymmh_space[256]; | 152 | struct reg_128_bit hi_ymm[16]; |
133 | }; | 153 | } __packed; |
134 | |||
135 | /* We don't support LWP yet: */ | ||
136 | struct lwp_struct { | ||
137 | u8 reserved[128]; | ||
138 | }; | ||
139 | 154 | ||
140 | /* Intel MPX support: */ | 155 | /* Intel MPX support: */ |
141 | struct bndreg { | 156 | |
157 | struct mpx_bndreg { | ||
142 | u64 lower_bound; | 158 | u64 lower_bound; |
143 | u64 upper_bound; | 159 | u64 upper_bound; |
144 | } __packed; | 160 | } __packed; |
161 | /* | ||
162 | * State component 3 is used for the 4 128-bit bounds registers | ||
163 | */ | ||
164 | struct mpx_bndreg_state { | ||
165 | struct mpx_bndreg bndreg[4]; | ||
166 | } __packed; | ||
145 | 167 | ||
146 | struct bndcsr { | 168 | /* |
169 | * State component 4 is used for the 64-bit user-mode MPX | ||
170 | * configuration register BNDCFGU and the 64-bit MPX status | ||
171 | * register BNDSTATUS. We call the pair "BNDCSR". | ||
172 | */ | ||
173 | struct mpx_bndcsr { | ||
147 | u64 bndcfgu; | 174 | u64 bndcfgu; |
148 | u64 bndstatus; | 175 | u64 bndstatus; |
149 | } __packed; | 176 | } __packed; |
150 | 177 | ||
151 | struct mpx_struct { | 178 | /* |
152 | struct bndreg bndreg[4]; | 179 | * The BNDCSR state is padded out to be 64-bytes in size. |
153 | struct bndcsr bndcsr; | 180 | */ |
154 | }; | 181 | struct mpx_bndcsr_state { |
182 | union { | ||
183 | struct mpx_bndcsr bndcsr; | ||
184 | u8 pad_to_64_bytes[64]; | ||
185 | }; | ||
186 | } __packed; | ||
187 | |||
188 | /* AVX-512 Components: */ | ||
189 | |||
190 | /* | ||
191 | * State component 5 is used for the 8 64-bit opmask registers | ||
192 | * k0-k7 (opmask state). | ||
193 | */ | ||
194 | struct avx_512_opmask_state { | ||
195 | u64 opmask_reg[8]; | ||
196 | } __packed; | ||
197 | |||
198 | /* | ||
199 | * State component 6 is used for the upper 256 bits of the | ||
200 | * registers ZMM0-ZMM15. These 16 256-bit values are denoted | ||
201 | * ZMM0_H-ZMM15_H (ZMM_Hi256 state). | ||
202 | */ | ||
203 | struct avx_512_zmm_uppers_state { | ||
204 | struct reg_256_bit zmm_upper[16]; | ||
205 | } __packed; | ||
206 | |||
207 | /* | ||
208 | * State component 7 is used for the 16 512-bit registers | ||
209 | * ZMM16-ZMM31 (Hi16_ZMM state). | ||
210 | */ | ||
211 | struct avx_512_hi16_state { | ||
212 | struct reg_512_bit hi16_zmm[16]; | ||
213 | } __packed; | ||
155 | 214 | ||
156 | struct xstate_header { | 215 | struct xstate_header { |
157 | u64 xfeatures; | 216 | u64 xfeatures; |
@@ -159,22 +218,19 @@ struct xstate_header { | |||
159 | u64 reserved[6]; | 218 | u64 reserved[6]; |
160 | } __attribute__((packed)); | 219 | } __attribute__((packed)); |
161 | 220 | ||
162 | /* New processor state extensions should be added here: */ | ||
163 | #define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \ | ||
164 | sizeof(struct lwp_struct) + \ | ||
165 | sizeof(struct mpx_struct) ) | ||
166 | /* | 221 | /* |
167 | * This is our most modern FPU state format, as saved by the XSAVE | 222 | * This is our most modern FPU state format, as saved by the XSAVE |
168 | * and restored by the XRSTOR instructions. | 223 | * and restored by the XRSTOR instructions. |
169 | * | 224 | * |
170 | * It consists of a legacy fxregs portion, an xstate header and | 225 | * It consists of a legacy fxregs portion, an xstate header and |
171 | * subsequent fixed size areas as defined by the xstate header. | 226 | * subsequent areas as defined by the xstate header. Not all CPUs |
172 | * Not all CPUs support all the extensions. | 227 | * support all the extensions, so the size of the extended area |
228 | * can vary quite a bit between CPUs. | ||
173 | */ | 229 | */ |
174 | struct xregs_state { | 230 | struct xregs_state { |
175 | struct fxregs_state i387; | 231 | struct fxregs_state i387; |
176 | struct xstate_header header; | 232 | struct xstate_header header; |
177 | u8 __reserved[XSTATE_RESERVE]; | 233 | u8 extended_state_area[0]; |
178 | } __attribute__ ((packed, aligned (64))); | 234 | } __attribute__ ((packed, aligned (64))); |
179 | 235 | ||
180 | /* | 236 | /* |
@@ -182,7 +238,9 @@ struct xregs_state { | |||
182 | * put together, so that we can pick the right one runtime. | 238 | * put together, so that we can pick the right one runtime. |
183 | * | 239 | * |
184 | * The size of the structure is determined by the largest | 240 | * The size of the structure is determined by the largest |
185 | * member - which is the xsave area: | 241 | * member - which is the xsave area. The padding is there |
242 | * to ensure that statically-allocated task_structs (just | ||
243 | * the init_task today) have enough space. | ||
186 | */ | 244 | */ |
187 | union fpregs_state { | 245 | union fpregs_state { |
188 | struct fregs_state fsave; | 246 | struct fregs_state fsave; |
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 4656b25bb9a7..3a6c89b70307 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h | |||
@@ -6,7 +6,7 @@ | |||
6 | #include <linux/uaccess.h> | 6 | #include <linux/uaccess.h> |
7 | 7 | ||
8 | /* Bit 63 of XCR0 is reserved for future expansion */ | 8 | /* Bit 63 of XCR0 is reserved for future expansion */ |
9 | #define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) | 9 | #define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63))) |
10 | 10 | ||
11 | #define XSTATE_CPUID 0x0000000d | 11 | #define XSTATE_CPUID 0x0000000d |
12 | 12 | ||
@@ -19,14 +19,18 @@ | |||
19 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) | 19 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) |
20 | 20 | ||
21 | /* Supported features which support lazy state saving */ | 21 | /* Supported features which support lazy state saving */ |
22 | #define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ | 22 | #define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \ |
23 | | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) | 23 | XFEATURE_MASK_SSE | \ |
24 | XFEATURE_MASK_YMM | \ | ||
25 | XFEATURE_MASK_OPMASK | \ | ||
26 | XFEATURE_MASK_ZMM_Hi256 | \ | ||
27 | XFEATURE_MASK_Hi16_ZMM) | ||
24 | 28 | ||
25 | /* Supported features which require eager state saving */ | 29 | /* Supported features which require eager state saving */ |
26 | #define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) | 30 | #define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR) |
27 | 31 | ||
28 | /* All currently supported features */ | 32 | /* All currently supported features */ |
29 | #define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER) | 33 | #define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER) |
30 | 34 | ||
31 | #ifdef CONFIG_X86_64 | 35 | #ifdef CONFIG_X86_64 |
32 | #define REX_PREFIX "0x48, " | 36 | #define REX_PREFIX "0x48, " |
@@ -40,6 +44,7 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; | |||
40 | 44 | ||
41 | extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); | 45 | extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); |
42 | 46 | ||
47 | void fpu__xstate_clear_all_cpu_caps(void); | ||
43 | void *get_xsave_addr(struct xregs_state *xsave, int xstate); | 48 | void *get_xsave_addr(struct xregs_state *xsave, int xstate); |
44 | const void *get_xsave_field_ptr(int xstate_field); | 49 | const void *get_xsave_field_ptr(int xstate_field); |
45 | 50 | ||
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h index 173dd3ba108c..0f492fc50bce 100644 --- a/arch/x86/include/asm/trace/mpx.h +++ b/arch/x86/include/asm/trace/mpx.h | |||
@@ -11,7 +11,7 @@ | |||
11 | TRACE_EVENT(mpx_bounds_register_exception, | 11 | TRACE_EVENT(mpx_bounds_register_exception, |
12 | 12 | ||
13 | TP_PROTO(void *addr_referenced, | 13 | TP_PROTO(void *addr_referenced, |
14 | const struct bndreg *bndreg), | 14 | const struct mpx_bndreg *bndreg), |
15 | TP_ARGS(addr_referenced, bndreg), | 15 | TP_ARGS(addr_referenced, bndreg), |
16 | 16 | ||
17 | TP_STRUCT__entry( | 17 | TP_STRUCT__entry( |
@@ -44,7 +44,7 @@ TRACE_EVENT(mpx_bounds_register_exception, | |||
44 | 44 | ||
45 | TRACE_EVENT(bounds_exception_mpx, | 45 | TRACE_EVENT(bounds_exception_mpx, |
46 | 46 | ||
47 | TP_PROTO(const struct bndcsr *bndcsr), | 47 | TP_PROTO(const struct mpx_bndcsr *bndcsr), |
48 | TP_ARGS(bndcsr), | 48 | TP_ARGS(bndcsr), |
49 | 49 | ||
50 | TP_STRUCT__entry( | 50 | TP_STRUCT__entry( |
@@ -116,7 +116,8 @@ TRACE_EVENT(mpx_new_bounds_table, | |||
116 | /* | 116 | /* |
117 | * This gets used outside of MPX-specific code, so we need a stub. | 117 | * This gets used outside of MPX-specific code, so we need a stub. |
118 | */ | 118 | */ |
119 | static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr) | 119 | static inline |
120 | void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr) | ||
120 | { | 121 | { |
121 | } | 122 | } |
122 | 123 | ||
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index d14e9ac3235a..be39b5fde4b9 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c | |||
@@ -290,11 +290,11 @@ static void __init fpu__init_system_ctx_switch(void) | |||
290 | if (cpu_has_xsaveopt && eagerfpu != DISABLE) | 290 | if (cpu_has_xsaveopt && eagerfpu != DISABLE) |
291 | eagerfpu = ENABLE; | 291 | eagerfpu = ENABLE; |
292 | 292 | ||
293 | if (xfeatures_mask & XSTATE_EAGER) { | 293 | if (xfeatures_mask & XFEATURE_MASK_EAGER) { |
294 | if (eagerfpu == DISABLE) { | 294 | if (eagerfpu == DISABLE) { |
295 | pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", | 295 | pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", |
296 | xfeatures_mask & XSTATE_EAGER); | 296 | xfeatures_mask & XFEATURE_MASK_EAGER); |
297 | xfeatures_mask &= ~XSTATE_EAGER; | 297 | xfeatures_mask &= ~XFEATURE_MASK_EAGER; |
298 | } else { | 298 | } else { |
299 | eagerfpu = ENABLE; | 299 | eagerfpu = ENABLE; |
300 | } | 300 | } |
@@ -354,17 +354,7 @@ static int __init x86_noxsave_setup(char *s) | |||
354 | if (strlen(s)) | 354 | if (strlen(s)) |
355 | return 0; | 355 | return 0; |
356 | 356 | ||
357 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | 357 | fpu__xstate_clear_all_cpu_caps(); |
358 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
359 | setup_clear_cpu_cap(X86_FEATURE_XSAVEC); | ||
360 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | ||
361 | setup_clear_cpu_cap(X86_FEATURE_AVX); | ||
362 | setup_clear_cpu_cap(X86_FEATURE_AVX2); | ||
363 | setup_clear_cpu_cap(X86_FEATURE_AVX512F); | ||
364 | setup_clear_cpu_cap(X86_FEATURE_AVX512PF); | ||
365 | setup_clear_cpu_cap(X86_FEATURE_AVX512ER); | ||
366 | setup_clear_cpu_cap(X86_FEATURE_AVX512CD); | ||
367 | setup_clear_cpu_cap(X86_FEATURE_MPX); | ||
368 | 358 | ||
369 | return 1; | 359 | return 1; |
370 | } | 360 | } |
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index dc60810c1c74..0bc3490420c5 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c | |||
@@ -66,7 +66,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
66 | * presence of FP and SSE state. | 66 | * presence of FP and SSE state. |
67 | */ | 67 | */ |
68 | if (cpu_has_xsave) | 68 | if (cpu_has_xsave) |
69 | fpu->state.xsave.header.xfeatures |= XSTATE_FPSSE; | 69 | fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; |
70 | 70 | ||
71 | return ret; | 71 | return ret; |
72 | } | 72 | } |
@@ -326,7 +326,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
326 | * presence of FP. | 326 | * presence of FP. |
327 | */ | 327 | */ |
328 | if (cpu_has_xsave) | 328 | if (cpu_has_xsave) |
329 | fpu->state.xsave.header.xfeatures |= XSTATE_FP; | 329 | fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP; |
330 | return ret; | 330 | return ret; |
331 | } | 331 | } |
332 | 332 | ||
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 50ec9af1bd51..eb032677f939 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c | |||
@@ -107,7 +107,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) | |||
107 | * header as well as change any contents in the memory layout. | 107 | * header as well as change any contents in the memory layout. |
108 | * xrestore as part of sigreturn will capture all the changes. | 108 | * xrestore as part of sigreturn will capture all the changes. |
109 | */ | 109 | */ |
110 | xfeatures |= XSTATE_FPSSE; | 110 | xfeatures |= XFEATURE_MASK_FPSSE; |
111 | 111 | ||
112 | err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures); | 112 | err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures); |
113 | 113 | ||
@@ -207,7 +207,7 @@ sanitize_restored_xstate(struct task_struct *tsk, | |||
207 | * layout and not enabled by the OS. | 207 | * layout and not enabled by the OS. |
208 | */ | 208 | */ |
209 | if (fx_only) | 209 | if (fx_only) |
210 | header->xfeatures = XSTATE_FPSSE; | 210 | header->xfeatures = XFEATURE_MASK_FPSSE; |
211 | else | 211 | else |
212 | header->xfeatures &= (xfeatures_mask & xfeatures); | 212 | header->xfeatures &= (xfeatures_mask & xfeatures); |
213 | } | 213 | } |
@@ -230,7 +230,7 @@ static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_ | |||
230 | { | 230 | { |
231 | if (use_xsave()) { | 231 | if (use_xsave()) { |
232 | if ((unsigned long)buf % 64 || fx_only) { | 232 | if ((unsigned long)buf % 64 || fx_only) { |
233 | u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE; | 233 | u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; |
234 | copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); | 234 | copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); |
235 | return copy_user_to_fxregs(buf); | 235 | return copy_user_to_fxregs(buf); |
236 | } else { | 236 | } else { |
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 62fc001c7846..6454f2731b56 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c | |||
@@ -31,12 +31,28 @@ static const char *xfeature_names[] = | |||
31 | */ | 31 | */ |
32 | u64 xfeatures_mask __read_mostly; | 32 | u64 xfeatures_mask __read_mostly; |
33 | 33 | ||
34 | static unsigned int xstate_offsets[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1}; | 34 | static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; |
35 | static unsigned int xstate_sizes[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1}; | 35 | static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; |
36 | static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; | 36 | static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; |
37 | 37 | ||
38 | /* The number of supported xfeatures in xfeatures_mask: */ | 38 | /* |
39 | static unsigned int xfeatures_nr; | 39 | * Clear all of the X86_FEATURE_* bits that are unavailable |
40 | * when the CPU has no XSAVE support. | ||
41 | */ | ||
42 | void fpu__xstate_clear_all_cpu_caps(void) | ||
43 | { | ||
44 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | ||
45 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
46 | setup_clear_cpu_cap(X86_FEATURE_XSAVEC); | ||
47 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | ||
48 | setup_clear_cpu_cap(X86_FEATURE_AVX); | ||
49 | setup_clear_cpu_cap(X86_FEATURE_AVX2); | ||
50 | setup_clear_cpu_cap(X86_FEATURE_AVX512F); | ||
51 | setup_clear_cpu_cap(X86_FEATURE_AVX512PF); | ||
52 | setup_clear_cpu_cap(X86_FEATURE_AVX512ER); | ||
53 | setup_clear_cpu_cap(X86_FEATURE_AVX512CD); | ||
54 | setup_clear_cpu_cap(X86_FEATURE_MPX); | ||
55 | } | ||
40 | 56 | ||
41 | /* | 57 | /* |
42 | * Return whether the system supports a given xfeature. | 58 | * Return whether the system supports a given xfeature. |
@@ -53,7 +69,7 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) | |||
53 | /* | 69 | /* |
54 | * So we use FLS here to be able to print the most advanced | 70 | * So we use FLS here to be able to print the most advanced |
55 | * feature that was requested but is missing. So if a driver | 71 | * feature that was requested but is missing. So if a driver |
56 | * asks about "XSTATE_SSE | XSTATE_YMM" we'll print the | 72 | * asks about "XFEATURE_MASK_SSE | XFEATURE_MASK_YMM" we'll print the |
57 | * missing AVX feature - this is the most informative message | 73 | * missing AVX feature - this is the most informative message |
58 | * to users: | 74 | * to users: |
59 | */ | 75 | */ |
@@ -112,7 +128,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu) | |||
112 | /* | 128 | /* |
113 | * FP is in init state | 129 | * FP is in init state |
114 | */ | 130 | */ |
115 | if (!(xfeatures & XSTATE_FP)) { | 131 | if (!(xfeatures & XFEATURE_MASK_FP)) { |
116 | fx->cwd = 0x37f; | 132 | fx->cwd = 0x37f; |
117 | fx->swd = 0; | 133 | fx->swd = 0; |
118 | fx->twd = 0; | 134 | fx->twd = 0; |
@@ -125,7 +141,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu) | |||
125 | /* | 141 | /* |
126 | * SSE is in init state | 142 | * SSE is in init state |
127 | */ | 143 | */ |
128 | if (!(xfeatures & XSTATE_SSE)) | 144 | if (!(xfeatures & XFEATURE_MASK_SSE)) |
129 | memset(&fx->xmm_space[0], 0, 256); | 145 | memset(&fx->xmm_space[0], 0, 256); |
130 | 146 | ||
131 | /* | 147 | /* |
@@ -169,25 +185,43 @@ void fpu__init_cpu_xstate(void) | |||
169 | } | 185 | } |
170 | 186 | ||
171 | /* | 187 | /* |
188 | * Note that in the future we will likely need a pair of | ||
189 | * functions here: one for user xstates and the other for | ||
190 | * system xstates. For now, they are the same. | ||
191 | */ | ||
192 | static int xfeature_enabled(enum xfeature xfeature) | ||
193 | { | ||
194 | return !!(xfeatures_mask & (1UL << xfeature)); | ||
195 | } | ||
196 | |||
197 | /* | ||
172 | * Record the offsets and sizes of various xstates contained | 198 | * Record the offsets and sizes of various xstates contained |
173 | * in the XSAVE state memory layout. | 199 | * in the XSAVE state memory layout. |
174 | * | ||
175 | * ( Note that certain features might be non-present, for them | ||
176 | * we'll have 0 offset and 0 size. ) | ||
177 | */ | 200 | */ |
178 | static void __init setup_xstate_features(void) | 201 | static void __init setup_xstate_features(void) |
179 | { | 202 | { |
180 | u32 eax, ebx, ecx, edx, leaf; | 203 | u32 eax, ebx, ecx, edx, i; |
181 | 204 | /* start at the beginnning of the "extended state" */ | |
182 | xfeatures_nr = fls64(xfeatures_mask); | 205 | unsigned int last_good_offset = offsetof(struct xregs_state, |
183 | 206 | extended_state_area); | |
184 | for (leaf = 2; leaf < xfeatures_nr; leaf++) { | 207 | |
185 | cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); | 208 | for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { |
186 | 209 | if (!xfeature_enabled(i)) | |
187 | xstate_offsets[leaf] = ebx; | 210 | continue; |
188 | xstate_sizes[leaf] = eax; | 211 | |
212 | cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); | ||
213 | xstate_offsets[i] = ebx; | ||
214 | xstate_sizes[i] = eax; | ||
215 | /* | ||
216 | * In our xstate size checks, we assume that the | ||
217 | * highest-numbered xstate feature has the | ||
218 | * highest offset in the buffer. Ensure it does. | ||
219 | */ | ||
220 | WARN_ONCE(last_good_offset > xstate_offsets[i], | ||
221 | "x86/fpu: misordered xstate at %d\n", last_good_offset); | ||
222 | last_good_offset = xstate_offsets[i]; | ||
189 | 223 | ||
190 | printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %04x, xstate_sizes[%d]: %04x\n", leaf, ebx, leaf, eax); | 224 | printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, ebx, i, eax); |
191 | } | 225 | } |
192 | } | 226 | } |
193 | 227 | ||
@@ -204,14 +238,14 @@ static void __init print_xstate_feature(u64 xstate_mask) | |||
204 | */ | 238 | */ |
205 | static void __init print_xstate_features(void) | 239 | static void __init print_xstate_features(void) |
206 | { | 240 | { |
207 | print_xstate_feature(XSTATE_FP); | 241 | print_xstate_feature(XFEATURE_MASK_FP); |
208 | print_xstate_feature(XSTATE_SSE); | 242 | print_xstate_feature(XFEATURE_MASK_SSE); |
209 | print_xstate_feature(XSTATE_YMM); | 243 | print_xstate_feature(XFEATURE_MASK_YMM); |
210 | print_xstate_feature(XSTATE_BNDREGS); | 244 | print_xstate_feature(XFEATURE_MASK_BNDREGS); |
211 | print_xstate_feature(XSTATE_BNDCSR); | 245 | print_xstate_feature(XFEATURE_MASK_BNDCSR); |
212 | print_xstate_feature(XSTATE_OPMASK); | 246 | print_xstate_feature(XFEATURE_MASK_OPMASK); |
213 | print_xstate_feature(XSTATE_ZMM_Hi256); | 247 | print_xstate_feature(XFEATURE_MASK_ZMM_Hi256); |
214 | print_xstate_feature(XSTATE_Hi16_ZMM); | 248 | print_xstate_feature(XFEATURE_MASK_Hi16_ZMM); |
215 | } | 249 | } |
216 | 250 | ||
217 | /* | 251 | /* |
@@ -233,8 +267,8 @@ static void __init setup_xstate_comp(void) | |||
233 | xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space); | 267 | xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space); |
234 | 268 | ||
235 | if (!cpu_has_xsaves) { | 269 | if (!cpu_has_xsaves) { |
236 | for (i = 2; i < xfeatures_nr; i++) { | 270 | for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { |
237 | if (test_bit(i, (unsigned long *)&xfeatures_mask)) { | 271 | if (xfeature_enabled(i)) { |
238 | xstate_comp_offsets[i] = xstate_offsets[i]; | 272 | xstate_comp_offsets[i] = xstate_offsets[i]; |
239 | xstate_comp_sizes[i] = xstate_sizes[i]; | 273 | xstate_comp_sizes[i] = xstate_sizes[i]; |
240 | } | 274 | } |
@@ -242,15 +276,16 @@ static void __init setup_xstate_comp(void) | |||
242 | return; | 276 | return; |
243 | } | 277 | } |
244 | 278 | ||
245 | xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; | 279 | xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] = |
280 | FXSAVE_SIZE + XSAVE_HDR_SIZE; | ||
246 | 281 | ||
247 | for (i = 2; i < xfeatures_nr; i++) { | 282 | for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { |
248 | if (test_bit(i, (unsigned long *)&xfeatures_mask)) | 283 | if (xfeature_enabled(i)) |
249 | xstate_comp_sizes[i] = xstate_sizes[i]; | 284 | xstate_comp_sizes[i] = xstate_sizes[i]; |
250 | else | 285 | else |
251 | xstate_comp_sizes[i] = 0; | 286 | xstate_comp_sizes[i] = 0; |
252 | 287 | ||
253 | if (i > 2) | 288 | if (i > FIRST_EXTENDED_XFEATURE) |
254 | xstate_comp_offsets[i] = xstate_comp_offsets[i-1] | 289 | xstate_comp_offsets[i] = xstate_comp_offsets[i-1] |
255 | + xstate_comp_sizes[i-1]; | 290 | + xstate_comp_sizes[i-1]; |
256 | 291 | ||
@@ -290,27 +325,280 @@ static void __init setup_init_fpu_buf(void) | |||
290 | copy_xregs_to_kernel_booting(&init_fpstate.xsave); | 325 | copy_xregs_to_kernel_booting(&init_fpstate.xsave); |
291 | } | 326 | } |
292 | 327 | ||
328 | static int xfeature_is_supervisor(int xfeature_nr) | ||
329 | { | ||
330 | /* | ||
331 | * We currently do not support supervisor states, but if | ||
332 | * we did, we could find out like this. | ||
333 | * | ||
334 | * SDM says: If state component i is a user state component, | ||
335 | * ECX[0] return 0; if state component i is a supervisor | ||
336 | * state component, ECX[0] returns 1. | ||
337 | u32 eax, ebx, ecx, edx; | ||
338 | cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx; | ||
339 | return !!(ecx & 1); | ||
340 | */ | ||
341 | return 0; | ||
342 | } | ||
343 | /* | ||
344 | static int xfeature_is_user(int xfeature_nr) | ||
345 | { | ||
346 | return !xfeature_is_supervisor(xfeature_nr); | ||
347 | } | ||
348 | */ | ||
349 | |||
350 | /* | ||
351 | * This check is important because it is easy to get XSTATE_* | ||
352 | * confused with XSTATE_BIT_*. | ||
353 | */ | ||
354 | #define CHECK_XFEATURE(nr) do { \ | ||
355 | WARN_ON(nr < FIRST_EXTENDED_XFEATURE); \ | ||
356 | WARN_ON(nr >= XFEATURE_MAX); \ | ||
357 | } while (0) | ||
358 | |||
359 | /* | ||
360 | * We could cache this like xstate_size[], but we only use | ||
361 | * it here, so it would be a waste of space. | ||
362 | */ | ||
363 | static int xfeature_is_aligned(int xfeature_nr) | ||
364 | { | ||
365 | u32 eax, ebx, ecx, edx; | ||
366 | |||
367 | CHECK_XFEATURE(xfeature_nr); | ||
368 | cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); | ||
369 | /* | ||
370 | * The value returned by ECX[1] indicates the alignment | ||
371 | * of state component i when the compacted format | ||
372 | * of the extended region of an XSAVE area is used | ||
373 | */ | ||
374 | return !!(ecx & 2); | ||
375 | } | ||
376 | |||
377 | static int xfeature_uncompacted_offset(int xfeature_nr) | ||
378 | { | ||
379 | u32 eax, ebx, ecx, edx; | ||
380 | |||
381 | CHECK_XFEATURE(xfeature_nr); | ||
382 | cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); | ||
383 | return ebx; | ||
384 | } | ||
385 | |||
386 | static int xfeature_size(int xfeature_nr) | ||
387 | { | ||
388 | u32 eax, ebx, ecx, edx; | ||
389 | |||
390 | CHECK_XFEATURE(xfeature_nr); | ||
391 | cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx); | ||
392 | return eax; | ||
393 | } | ||
394 | |||
395 | /* | ||
396 | * 'XSAVES' implies two different things: | ||
397 | * 1. saving of supervisor/system state | ||
398 | * 2. using the compacted format | ||
399 | * | ||
400 | * Use this function when dealing with the compacted format so | ||
401 | * that it is obvious which aspect of 'XSAVES' is being handled | ||
402 | * by the calling code. | ||
403 | */ | ||
404 | static int using_compacted_format(void) | ||
405 | { | ||
406 | return cpu_has_xsaves; | ||
407 | } | ||
408 | |||
409 | static void __xstate_dump_leaves(void) | ||
410 | { | ||
411 | int i; | ||
412 | u32 eax, ebx, ecx, edx; | ||
413 | static int should_dump = 1; | ||
414 | |||
415 | if (!should_dump) | ||
416 | return; | ||
417 | should_dump = 0; | ||
418 | /* | ||
419 | * Dump out a few leaves past the ones that we support | ||
420 | * just in case there are some goodies up there | ||
421 | */ | ||
422 | for (i = 0; i < XFEATURE_MAX + 10; i++) { | ||
423 | cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); | ||
424 | pr_warn("CPUID[%02x, %02x]: eax=%08x ebx=%08x ecx=%08x edx=%08x\n", | ||
425 | XSTATE_CPUID, i, eax, ebx, ecx, edx); | ||
426 | } | ||
427 | } | ||
428 | |||
429 | #define XSTATE_WARN_ON(x) do { \ | ||
430 | if (WARN_ONCE(x, "XSAVE consistency problem, dumping leaves")) { \ | ||
431 | __xstate_dump_leaves(); \ | ||
432 | } \ | ||
433 | } while (0) | ||
434 | |||
435 | #define XCHECK_SZ(sz, nr, nr_macro, __struct) do { \ | ||
436 | if ((nr == nr_macro) && \ | ||
437 | WARN_ONCE(sz != sizeof(__struct), \ | ||
438 | "%s: struct is %zu bytes, cpu state %d bytes\n", \ | ||
439 | __stringify(nr_macro), sizeof(__struct), sz)) { \ | ||
440 | __xstate_dump_leaves(); \ | ||
441 | } \ | ||
442 | } while (0) | ||
443 | |||
444 | /* | ||
445 | * We have a C struct for each 'xstate'. We need to ensure | ||
446 | * that our software representation matches what the CPU | ||
447 | * tells us about the state's size. | ||
448 | */ | ||
449 | static void check_xstate_against_struct(int nr) | ||
450 | { | ||
451 | /* | ||
452 | * Ask the CPU for the size of the state. | ||
453 | */ | ||
454 | int sz = xfeature_size(nr); | ||
455 | /* | ||
456 | * Match each CPU state with the corresponding software | ||
457 | * structure. | ||
458 | */ | ||
459 | XCHECK_SZ(sz, nr, XFEATURE_YMM, struct ymmh_struct); | ||
460 | XCHECK_SZ(sz, nr, XFEATURE_BNDREGS, struct mpx_bndreg_state); | ||
461 | XCHECK_SZ(sz, nr, XFEATURE_BNDCSR, struct mpx_bndcsr_state); | ||
462 | XCHECK_SZ(sz, nr, XFEATURE_OPMASK, struct avx_512_opmask_state); | ||
463 | XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state); | ||
464 | XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state); | ||
465 | |||
466 | /* | ||
467 | * Make *SURE* to add any feature numbers in below if | ||
468 | * there are "holes" in the xsave state component | ||
469 | * numbers. | ||
470 | */ | ||
471 | if ((nr < XFEATURE_YMM) || | ||
472 | (nr >= XFEATURE_MAX)) { | ||
473 | WARN_ONCE(1, "no structure for xstate: %d\n", nr); | ||
474 | XSTATE_WARN_ON(1); | ||
475 | } | ||
476 | } | ||
477 | |||
478 | /* | ||
479 | * This essentially double-checks what the cpu told us about | ||
480 | * how large the XSAVE buffer needs to be. We are recalculating | ||
481 | * it to be safe. | ||
482 | */ | ||
483 | static void do_extra_xstate_size_checks(void) | ||
484 | { | ||
485 | int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; | ||
486 | int i; | ||
487 | |||
488 | for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { | ||
489 | if (!xfeature_enabled(i)) | ||
490 | continue; | ||
491 | |||
492 | check_xstate_against_struct(i); | ||
493 | /* | ||
494 | * Supervisor state components can be managed only by | ||
495 | * XSAVES, which is compacted-format only. | ||
496 | */ | ||
497 | if (!using_compacted_format()) | ||
498 | XSTATE_WARN_ON(xfeature_is_supervisor(i)); | ||
499 | |||
500 | /* Align from the end of the previous feature */ | ||
501 | if (xfeature_is_aligned(i)) | ||
502 | paranoid_xstate_size = ALIGN(paranoid_xstate_size, 64); | ||
503 | /* | ||
504 | * The offset of a given state in the non-compacted | ||
505 | * format is given to us in a CPUID leaf. We check | ||
506 | * them for being ordered (increasing offsets) in | ||
507 | * setup_xstate_features(). | ||
508 | */ | ||
509 | if (!using_compacted_format()) | ||
510 | paranoid_xstate_size = xfeature_uncompacted_offset(i); | ||
511 | /* | ||
512 | * The compacted-format offset always depends on where | ||
513 | * the previous state ended. | ||
514 | */ | ||
515 | paranoid_xstate_size += xfeature_size(i); | ||
516 | } | ||
517 | XSTATE_WARN_ON(paranoid_xstate_size != xstate_size); | ||
518 | } | ||
519 | |||
293 | /* | 520 | /* |
294 | * Calculate total size of enabled xstates in XCR0/xfeatures_mask. | 521 | * Calculate total size of enabled xstates in XCR0/xfeatures_mask. |
522 | * | ||
523 | * Note the SDM's wording here. "sub-function 0" only enumerates | ||
524 | * the size of the *user* states. If we use it to size a buffer | ||
525 | * that we use 'XSAVES' on, we could potentially overflow the | ||
526 | * buffer because 'XSAVES' saves system states too. | ||
527 | * | ||
528 | * Note that we do not currently set any bits on IA32_XSS so | ||
529 | * 'XCR0 | IA32_XSS == XCR0' for now. | ||
295 | */ | 530 | */ |
296 | static void __init init_xstate_size(void) | 531 | static unsigned int __init calculate_xstate_size(void) |
297 | { | 532 | { |
298 | unsigned int eax, ebx, ecx, edx; | 533 | unsigned int eax, ebx, ecx, edx; |
299 | int i; | 534 | unsigned int calculated_xstate_size; |
300 | 535 | ||
301 | if (!cpu_has_xsaves) { | 536 | if (!cpu_has_xsaves) { |
537 | /* | ||
538 | * - CPUID function 0DH, sub-function 0: | ||
539 | * EBX enumerates the size (in bytes) required by | ||
540 | * the XSAVE instruction for an XSAVE area | ||
541 | * containing all the *user* state components | ||
542 | * corresponding to bits currently set in XCR0. | ||
543 | */ | ||
302 | cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); | 544 | cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); |
303 | xstate_size = ebx; | 545 | calculated_xstate_size = ebx; |
304 | return; | 546 | } else { |
547 | /* | ||
548 | * - CPUID function 0DH, sub-function 1: | ||
549 | * EBX enumerates the size (in bytes) required by | ||
550 | * the XSAVES instruction for an XSAVE area | ||
551 | * containing all the state components | ||
552 | * corresponding to bits currently set in | ||
553 | * XCR0 | IA32_XSS. | ||
554 | */ | ||
555 | cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); | ||
556 | calculated_xstate_size = ebx; | ||
305 | } | 557 | } |
558 | return calculated_xstate_size; | ||
559 | } | ||
306 | 560 | ||
307 | xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; | 561 | /* |
308 | for (i = 2; i < 64; i++) { | 562 | * Will the runtime-enumerated 'xstate_size' fit in the init |
309 | if (test_bit(i, (unsigned long *)&xfeatures_mask)) { | 563 | * task's statically-allocated buffer? |
310 | cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); | 564 | */ |
311 | xstate_size += eax; | 565 | static bool is_supported_xstate_size(unsigned int test_xstate_size) |
312 | } | 566 | { |
313 | } | 567 | if (test_xstate_size <= sizeof(union fpregs_state)) |
568 | return true; | ||
569 | |||
570 | pr_warn("x86/fpu: xstate buffer too small (%zu < %d), disabling xsave\n", | ||
571 | sizeof(union fpregs_state), test_xstate_size); | ||
572 | return false; | ||
573 | } | ||
574 | |||
575 | static int init_xstate_size(void) | ||
576 | { | ||
577 | /* Recompute the context size for enabled features: */ | ||
578 | unsigned int possible_xstate_size = calculate_xstate_size(); | ||
579 | |||
580 | /* Ensure we have the space to store all enabled: */ | ||
581 | if (!is_supported_xstate_size(possible_xstate_size)) | ||
582 | return -EINVAL; | ||
583 | |||
584 | /* | ||
585 | * The size is OK, we are definitely going to use xsave, | ||
586 | * make it known to the world that we need more space. | ||
587 | */ | ||
588 | xstate_size = possible_xstate_size; | ||
589 | do_extra_xstate_size_checks(); | ||
590 | return 0; | ||
591 | } | ||
592 | |||
593 | /* | ||
594 | * We enabled the XSAVE hardware, but something went wrong and | ||
595 | * we can not use it. Disable it. | ||
596 | */ | ||
597 | static void fpu__init_disable_system_xstate(void) | ||
598 | { | ||
599 | xfeatures_mask = 0; | ||
600 | cr4_clear_bits(X86_CR4_OSXSAVE); | ||
601 | fpu__xstate_clear_all_cpu_caps(); | ||
314 | } | 602 | } |
315 | 603 | ||
316 | /* | 604 | /* |
@@ -321,6 +609,7 @@ void __init fpu__init_system_xstate(void) | |||
321 | { | 609 | { |
322 | unsigned int eax, ebx, ecx, edx; | 610 | unsigned int eax, ebx, ecx, edx; |
323 | static int on_boot_cpu = 1; | 611 | static int on_boot_cpu = 1; |
612 | int err; | ||
324 | 613 | ||
325 | WARN_ON_FPU(!on_boot_cpu); | 614 | WARN_ON_FPU(!on_boot_cpu); |
326 | on_boot_cpu = 0; | 615 | on_boot_cpu = 0; |
@@ -338,7 +627,7 @@ void __init fpu__init_system_xstate(void) | |||
338 | cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); | 627 | cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); |
339 | xfeatures_mask = eax + ((u64)edx << 32); | 628 | xfeatures_mask = eax + ((u64)edx << 32); |
340 | 629 | ||
341 | if ((xfeatures_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { | 630 | if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { |
342 | pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); | 631 | pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); |
343 | BUG(); | 632 | BUG(); |
344 | } | 633 | } |
@@ -348,16 +637,19 @@ void __init fpu__init_system_xstate(void) | |||
348 | 637 | ||
349 | /* Enable xstate instructions to be able to continue with initialization: */ | 638 | /* Enable xstate instructions to be able to continue with initialization: */ |
350 | fpu__init_cpu_xstate(); | 639 | fpu__init_cpu_xstate(); |
351 | 640 | err = init_xstate_size(); | |
352 | /* Recompute the context size for enabled features: */ | 641 | if (err) { |
353 | init_xstate_size(); | 642 | /* something went wrong, boot without any XSAVE support */ |
643 | fpu__init_disable_system_xstate(); | ||
644 | return; | ||
645 | } | ||
354 | 646 | ||
355 | update_regset_xstate_info(xstate_size, xfeatures_mask); | 647 | update_regset_xstate_info(xstate_size, xfeatures_mask); |
356 | fpu__init_prepare_fx_sw_frame(); | 648 | fpu__init_prepare_fx_sw_frame(); |
357 | setup_init_fpu_buf(); | 649 | setup_init_fpu_buf(); |
358 | setup_xstate_comp(); | 650 | setup_xstate_comp(); |
359 | 651 | ||
360 | pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n", | 652 | pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", |
361 | xfeatures_mask, | 653 | xfeatures_mask, |
362 | xstate_size, | 654 | xstate_size, |
363 | cpu_has_xsaves ? "compacted" : "standard"); | 655 | cpu_has_xsaves ? "compacted" : "standard"); |
@@ -388,7 +680,7 @@ void fpu__resume_cpu(void) | |||
388 | * Inputs: | 680 | * Inputs: |
389 | * xstate: the thread's storage area for all FPU data | 681 | * xstate: the thread's storage area for all FPU data |
390 | * xstate_feature: state which is defined in xsave.h (e.g. | 682 | * xstate_feature: state which is defined in xsave.h (e.g. |
391 | * XSTATE_FP, XSTATE_SSE, etc...) | 683 | * XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...) |
392 | * Output: | 684 | * Output: |
393 | * address of the state in the xsave area, or NULL if the | 685 | * address of the state in the xsave area, or NULL if the |
394 | * field is not present in the xsave buffer. | 686 | * field is not present in the xsave buffer. |
@@ -439,8 +731,8 @@ EXPORT_SYMBOL_GPL(get_xsave_addr); | |||
439 | * Note that this only works on the current task. | 731 | * Note that this only works on the current task. |
440 | * | 732 | * |
441 | * Inputs: | 733 | * Inputs: |
442 | * @xsave_state: state which is defined in xsave.h (e.g. XSTATE_FP, | 734 | * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, |
443 | * XSTATE_SSE, etc...) | 735 | * XFEATURE_MASK_SSE, etc...) |
444 | * Output: | 736 | * Output: |
445 | * address of the state in the xsave area or NULL if the state | 737 | * address of the state in the xsave area or NULL if the state |
446 | * is not present or is in its 'init state'. | 738 | * is not present or is in its 'init state'. |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 346eec73f7db..ade185a46b1d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -361,7 +361,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
361 | 361 | ||
362 | dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) | 362 | dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) |
363 | { | 363 | { |
364 | const struct bndcsr *bndcsr; | 364 | const struct mpx_bndcsr *bndcsr; |
365 | siginfo_t *info; | 365 | siginfo_t *info; |
366 | 366 | ||
367 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); | 367 | RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); |
@@ -384,7 +384,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) | |||
384 | * which is all zeros which indicates MPX was not | 384 | * which is all zeros which indicates MPX was not |
385 | * responsible for the exception. | 385 | * responsible for the exception. |
386 | */ | 386 | */ |
387 | bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); | 387 | bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); |
388 | if (!bndcsr) | 388 | if (!bndcsr) |
389 | goto exit_trap; | 389 | goto exit_trap; |
390 | 390 | ||
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2fbea2544f24..156441bcaac8 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -30,7 +30,7 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted) | |||
30 | int feature_bit = 0; | 30 | int feature_bit = 0; |
31 | u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; | 31 | u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; |
32 | 32 | ||
33 | xstate_bv &= XSTATE_EXTEND_MASK; | 33 | xstate_bv &= XFEATURE_MASK_EXTEND; |
34 | while (xstate_bv) { | 34 | while (xstate_bv) { |
35 | if (xstate_bv & 0x1) { | 35 | if (xstate_bv & 0x1) { |
36 | u32 eax, ebx, ecx, edx, offset; | 36 | u32 eax, ebx, ecx, edx, offset; |
@@ -51,7 +51,7 @@ u64 kvm_supported_xcr0(void) | |||
51 | u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0; | 51 | u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0; |
52 | 52 | ||
53 | if (!kvm_x86_ops->mpx_supported()) | 53 | if (!kvm_x86_ops->mpx_supported()) |
54 | xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR); | 54 | xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); |
55 | 55 | ||
56 | return xcr0; | 56 | return xcr0; |
57 | } | 57 | } |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9a9a19830321..bda65690788e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -663,9 +663,9 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | |||
663 | /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ | 663 | /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ |
664 | if (index != XCR_XFEATURE_ENABLED_MASK) | 664 | if (index != XCR_XFEATURE_ENABLED_MASK) |
665 | return 1; | 665 | return 1; |
666 | if (!(xcr0 & XSTATE_FP)) | 666 | if (!(xcr0 & XFEATURE_MASK_FP)) |
667 | return 1; | 667 | return 1; |
668 | if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) | 668 | if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE)) |
669 | return 1; | 669 | return 1; |
670 | 670 | ||
671 | /* | 671 | /* |
@@ -673,23 +673,24 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | |||
673 | * saving. However, xcr0 bit 0 is always set, even if the | 673 | * saving. However, xcr0 bit 0 is always set, even if the |
674 | * emulated CPU does not support XSAVE (see fx_init). | 674 | * emulated CPU does not support XSAVE (see fx_init). |
675 | */ | 675 | */ |
676 | valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP; | 676 | valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP; |
677 | if (xcr0 & ~valid_bits) | 677 | if (xcr0 & ~valid_bits) |
678 | return 1; | 678 | return 1; |
679 | 679 | ||
680 | if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR))) | 680 | if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) != |
681 | (!(xcr0 & XFEATURE_MASK_BNDCSR))) | ||
681 | return 1; | 682 | return 1; |
682 | 683 | ||
683 | if (xcr0 & XSTATE_AVX512) { | 684 | if (xcr0 & XFEATURE_MASK_AVX512) { |
684 | if (!(xcr0 & XSTATE_YMM)) | 685 | if (!(xcr0 & XFEATURE_MASK_YMM)) |
685 | return 1; | 686 | return 1; |
686 | if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512) | 687 | if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512) |
687 | return 1; | 688 | return 1; |
688 | } | 689 | } |
689 | kvm_put_guest_xcr0(vcpu); | 690 | kvm_put_guest_xcr0(vcpu); |
690 | vcpu->arch.xcr0 = xcr0; | 691 | vcpu->arch.xcr0 = xcr0; |
691 | 692 | ||
692 | if ((xcr0 ^ old_xcr0) & XSTATE_EXTEND_MASK) | 693 | if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND) |
693 | kvm_update_cpuid(vcpu); | 694 | kvm_update_cpuid(vcpu); |
694 | return 0; | 695 | return 0; |
695 | } | 696 | } |
@@ -2905,7 +2906,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) | |||
2905 | * Copy each region from the possibly compacted offset to the | 2906 | * Copy each region from the possibly compacted offset to the |
2906 | * non-compacted offset. | 2907 | * non-compacted offset. |
2907 | */ | 2908 | */ |
2908 | valid = xstate_bv & ~XSTATE_FPSSE; | 2909 | valid = xstate_bv & ~XFEATURE_MASK_FPSSE; |
2909 | while (valid) { | 2910 | while (valid) { |
2910 | u64 feature = valid & -valid; | 2911 | u64 feature = valid & -valid; |
2911 | int index = fls64(feature) - 1; | 2912 | int index = fls64(feature) - 1; |
@@ -2943,7 +2944,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) | |||
2943 | * Copy each region from the non-compacted offset to the | 2944 | * Copy each region from the non-compacted offset to the |
2944 | * possibly compacted offset. | 2945 | * possibly compacted offset. |
2945 | */ | 2946 | */ |
2946 | valid = xstate_bv & ~XSTATE_FPSSE; | 2947 | valid = xstate_bv & ~XFEATURE_MASK_FPSSE; |
2947 | while (valid) { | 2948 | while (valid) { |
2948 | u64 feature = valid & -valid; | 2949 | u64 feature = valid & -valid; |
2949 | int index = fls64(feature) - 1; | 2950 | int index = fls64(feature) - 1; |
@@ -2971,7 +2972,7 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, | |||
2971 | &vcpu->arch.guest_fpu.state.fxsave, | 2972 | &vcpu->arch.guest_fpu.state.fxsave, |
2972 | sizeof(struct fxregs_state)); | 2973 | sizeof(struct fxregs_state)); |
2973 | *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = | 2974 | *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = |
2974 | XSTATE_FPSSE; | 2975 | XFEATURE_MASK_FPSSE; |
2975 | } | 2976 | } |
2976 | } | 2977 | } |
2977 | 2978 | ||
@@ -2991,7 +2992,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, | |||
2991 | return -EINVAL; | 2992 | return -EINVAL; |
2992 | load_xsave(vcpu, (u8 *)guest_xsave->region); | 2993 | load_xsave(vcpu, (u8 *)guest_xsave->region); |
2993 | } else { | 2994 | } else { |
2994 | if (xstate_bv & ~XSTATE_FPSSE) | 2995 | if (xstate_bv & ~XFEATURE_MASK_FPSSE) |
2995 | return -EINVAL; | 2996 | return -EINVAL; |
2996 | memcpy(&vcpu->arch.guest_fpu.state.fxsave, | 2997 | memcpy(&vcpu->arch.guest_fpu.state.fxsave, |
2997 | guest_xsave->region, sizeof(struct fxregs_state)); | 2998 | guest_xsave->region, sizeof(struct fxregs_state)); |
@@ -7005,7 +7006,7 @@ static void fx_init(struct kvm_vcpu *vcpu) | |||
7005 | /* | 7006 | /* |
7006 | * Ensure guest xcr0 is valid for loading | 7007 | * Ensure guest xcr0 is valid for loading |
7007 | */ | 7008 | */ |
7008 | vcpu->arch.xcr0 = XSTATE_FP; | 7009 | vcpu->arch.xcr0 = XFEATURE_MASK_FP; |
7009 | 7010 | ||
7010 | vcpu->arch.cr0 |= X86_CR0_ET; | 7011 | vcpu->arch.cr0 |= X86_CR0_ET; |
7011 | } | 7012 | } |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 2f822cd886c2..f2afa5fe48a6 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -180,9 +180,9 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); | |||
180 | bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, | 180 | bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, |
181 | int page_num); | 181 | int page_num); |
182 | 182 | ||
183 | #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ | 183 | #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ |
184 | | XSTATE_BNDREGS | XSTATE_BNDCSR \ | 184 | | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \ |
185 | | XSTATE_AVX512) | 185 | | XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512) |
186 | extern u64 host_xcr0; | 186 | extern u64 host_xcr0; |
187 | 187 | ||
188 | extern u64 kvm_supported_xcr0(void); | 188 | extern u64 kvm_supported_xcr0(void); |
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c index dd76a05729b0..024f6e971174 100644 --- a/arch/x86/math-emu/fpu_aux.c +++ b/arch/x86/math-emu/fpu_aux.c | |||
@@ -169,6 +169,76 @@ void fxch_i(void) | |||
169 | fpu_tag_word = tag_word; | 169 | fpu_tag_word = tag_word; |
170 | } | 170 | } |
171 | 171 | ||
172 | static void fcmovCC(void) | ||
173 | { | ||
174 | /* fcmovCC st(i) */ | ||
175 | int i = FPU_rm; | ||
176 | FPU_REG *st0_ptr = &st(0); | ||
177 | FPU_REG *sti_ptr = &st(i); | ||
178 | long tag_word = fpu_tag_word; | ||
179 | int regnr = top & 7; | ||
180 | int regnri = (top + i) & 7; | ||
181 | u_char sti_tag = (tag_word >> (regnri * 2)) & 3; | ||
182 | |||
183 | if (sti_tag == TAG_Empty) { | ||
184 | FPU_stack_underflow(); | ||
185 | clear_C1(); | ||
186 | return; | ||
187 | } | ||
188 | reg_copy(sti_ptr, st0_ptr); | ||
189 | tag_word &= ~(3 << (regnr * 2)); | ||
190 | tag_word |= (sti_tag << (regnr * 2)); | ||
191 | fpu_tag_word = tag_word; | ||
192 | } | ||
193 | |||
194 | void fcmovb(void) | ||
195 | { | ||
196 | if (FPU_EFLAGS & X86_EFLAGS_CF) | ||
197 | fcmovCC(); | ||
198 | } | ||
199 | |||
200 | void fcmove(void) | ||
201 | { | ||
202 | if (FPU_EFLAGS & X86_EFLAGS_ZF) | ||
203 | fcmovCC(); | ||
204 | } | ||
205 | |||
206 | void fcmovbe(void) | ||
207 | { | ||
208 | if (FPU_EFLAGS & (X86_EFLAGS_CF|X86_EFLAGS_ZF)) | ||
209 | fcmovCC(); | ||
210 | } | ||
211 | |||
212 | void fcmovu(void) | ||
213 | { | ||
214 | if (FPU_EFLAGS & X86_EFLAGS_PF) | ||
215 | fcmovCC(); | ||
216 | } | ||
217 | |||
218 | void fcmovnb(void) | ||
219 | { | ||
220 | if (!(FPU_EFLAGS & X86_EFLAGS_CF)) | ||
221 | fcmovCC(); | ||
222 | } | ||
223 | |||
224 | void fcmovne(void) | ||
225 | { | ||
226 | if (!(FPU_EFLAGS & X86_EFLAGS_ZF)) | ||
227 | fcmovCC(); | ||
228 | } | ||
229 | |||
230 | void fcmovnbe(void) | ||
231 | { | ||
232 | if (!(FPU_EFLAGS & (X86_EFLAGS_CF|X86_EFLAGS_ZF))) | ||
233 | fcmovCC(); | ||
234 | } | ||
235 | |||
236 | void fcmovnu(void) | ||
237 | { | ||
238 | if (!(FPU_EFLAGS & X86_EFLAGS_PF)) | ||
239 | fcmovCC(); | ||
240 | } | ||
241 | |||
172 | void ffree_(void) | 242 | void ffree_(void) |
173 | { | 243 | { |
174 | /* ffree st(i) */ | 244 | /* ffree st(i) */ |
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 3d8f2e421466..e945fedf1de2 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c | |||
@@ -40,49 +40,33 @@ | |||
40 | 40 | ||
41 | #define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */ | 41 | #define __BAD__ FPU_illegal /* Illegal on an 80486, causes SIGILL */ |
42 | 42 | ||
43 | #ifndef NO_UNDOC_CODE /* Un-documented FPU op-codes supported by default. */ | 43 | /* fcmovCC and f(u)comi(p) are enabled if CPUID(1).EDX(15) "cmov" is set */ |
44 | 44 | ||
45 | /* WARNING: These codes are not documented by Intel in their 80486 manual | 45 | /* WARNING: "u" entries are not documented by Intel in their 80486 manual |
46 | and may not work on FPU clones or later Intel FPUs. */ | 46 | and may not work on FPU clones or later Intel FPUs. |
47 | 47 | Changes to support them provided by Linus Torvalds. */ | |
48 | /* Changes to support the un-doc codes provided by Linus Torvalds. */ | ||
49 | |||
50 | #define _d9_d8_ fstp_i /* unofficial code (19) */ | ||
51 | #define _dc_d0_ fcom_st /* unofficial code (14) */ | ||
52 | #define _dc_d8_ fcompst /* unofficial code (1c) */ | ||
53 | #define _dd_c8_ fxch_i /* unofficial code (0d) */ | ||
54 | #define _de_d0_ fcompst /* unofficial code (16) */ | ||
55 | #define _df_c0_ ffreep /* unofficial code (07) ffree + pop */ | ||
56 | #define _df_c8_ fxch_i /* unofficial code (0f) */ | ||
57 | #define _df_d0_ fstp_i /* unofficial code (17) */ | ||
58 | #define _df_d8_ fstp_i /* unofficial code (1f) */ | ||
59 | 48 | ||
60 | static FUNC const st_instr_table[64] = { | 49 | static FUNC const st_instr_table[64] = { |
61 | fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_, | 50 | /* Opcode: d8 d9 da db */ |
62 | fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_, | 51 | /* dc dd de df */ |
63 | fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_, | 52 | /* c0..7 */ fadd__, fld_i_, fcmovb, fcmovnb, |
64 | fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_, | 53 | /* c0..7 */ fadd_i, ffree_, faddp_, ffreep,/*u*/ |
65 | fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, | 54 | /* c8..f */ fmul__, fxch_i, fcmove, fcmovne, |
66 | fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, | 55 | /* c8..f */ fmul_i, fxch_i,/*u*/ fmulp_, fxch_i,/*u*/ |
67 | fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, | 56 | /* d0..7 */ fcom_st, fp_nop, fcmovbe, fcmovnbe, |
68 | fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, | 57 | /* d0..7 */ fcom_st,/*u*/ fst_i_, fcompst,/*u*/ fstp_i,/*u*/ |
58 | /* d8..f */ fcompst, fstp_i,/*u*/ fcmovu, fcmovnu, | ||
59 | /* d8..f */ fcompst,/*u*/ fstp_i, fcompp, fstp_i,/*u*/ | ||
60 | /* e0..7 */ fsub__, FPU_etc, __BAD__, finit_, | ||
61 | /* e0..7 */ fsubri, fucom_, fsubrp, fstsw_, | ||
62 | /* e8..f */ fsubr_, fconst, fucompp, fucomi_, | ||
63 | /* e8..f */ fsub_i, fucomp, fsubp_, fucomip, | ||
64 | /* f0..7 */ fdiv__, FPU_triga, __BAD__, fcomi_, | ||
65 | /* f0..7 */ fdivri, __BAD__, fdivrp, fcomip, | ||
66 | /* f8..f */ fdivr_, FPU_trigb, __BAD__, __BAD__, | ||
67 | /* f8..f */ fdiv_i, __BAD__, fdivp_, __BAD__, | ||
69 | }; | 68 | }; |
70 | 69 | ||
71 | #else /* Support only documented FPU op-codes */ | ||
72 | |||
73 | static FUNC const st_instr_table[64] = { | ||
74 | fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__, | ||
75 | fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__, | ||
76 | fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__, | ||
77 | fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__, | ||
78 | fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_, | ||
79 | fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__, | ||
80 | fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__, | ||
81 | fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__, | ||
82 | }; | ||
83 | |||
84 | #endif /* NO_UNDOC_CODE */ | ||
85 | |||
86 | #define _NONE_ 0 /* Take no special action */ | 70 | #define _NONE_ 0 /* Take no special action */ |
87 | #define _REG0_ 1 /* Need to check for not empty st(0) */ | 71 | #define _REG0_ 1 /* Need to check for not empty st(0) */ |
88 | #define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */ | 72 | #define _REGI_ 2 /* Need to check for not empty st(0) and st(rm) */ |
@@ -94,36 +78,18 @@ static FUNC const st_instr_table[64] = { | |||
94 | #define _REGIc 0 /* Compare st(0) and st(rm) */ | 78 | #define _REGIc 0 /* Compare st(0) and st(rm) */ |
95 | #define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */ | 79 | #define _REGIn 0 /* Uses st(0) and st(rm), but handle checks later */ |
96 | 80 | ||
97 | #ifndef NO_UNDOC_CODE | ||
98 | |||
99 | /* Un-documented FPU op-codes supported by default. (see above) */ | ||
100 | |||
101 | static u_char const type_table[64] = { | 81 | static u_char const type_table[64] = { |
102 | _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_, | 82 | /* Opcode: d8 d9 da db dc dd de df */ |
103 | _REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_, | 83 | /* c0..7 */ _REGI_, _NONE_, _REGIn, _REGIn, _REGIi, _REGi_, _REGIp, _REGi_, |
104 | _REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, | 84 | /* c8..f */ _REGI_, _REGIn, _REGIn, _REGIn, _REGIi, _REGI_, _REGIp, _REGI_, |
105 | _REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_, | 85 | /* d0..7 */ _REGIc, _NONE_, _REGIn, _REGIn, _REGIc, _REG0_, _REGIc, _REG0_, |
106 | _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, | 86 | /* d8..f */ _REGIc, _REG0_, _REGIn, _REGIn, _REGIc, _REG0_, _REGIc, _REG0_, |
107 | _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, | 87 | /* e0..7 */ _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, |
108 | _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, | 88 | /* e8..f */ _REGI_, _NONE_, _REGIc, _REGIc, _REGIi, _REGIc, _REGIp, _REGIc, |
109 | _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ | 89 | /* f0..7 */ _REGI_, _NONE_, _null_, _REGIc, _REGIi, _null_, _REGIp, _REGIc, |
90 | /* f8..f */ _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, | ||
110 | }; | 91 | }; |
111 | 92 | ||
112 | #else /* Support only documented FPU op-codes */ | ||
113 | |||
114 | static u_char const type_table[64] = { | ||
115 | _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_, | ||
116 | _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_, | ||
117 | _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_, | ||
118 | _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_, | ||
119 | _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_, | ||
120 | _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_, | ||
121 | _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_, | ||
122 | _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_ | ||
123 | }; | ||
124 | |||
125 | #endif /* NO_UNDOC_CODE */ | ||
126 | |||
127 | #ifdef RE_ENTRANT_CHECKING | 93 | #ifdef RE_ENTRANT_CHECKING |
128 | u_char emulating = 0; | 94 | u_char emulating = 0; |
129 | #endif /* RE_ENTRANT_CHECKING */ | 95 | #endif /* RE_ENTRANT_CHECKING */ |
diff --git a/arch/x86/math-emu/fpu_proto.h b/arch/x86/math-emu/fpu_proto.h index 9779df436b7d..caff438b9c1d 100644 --- a/arch/x86/math-emu/fpu_proto.h +++ b/arch/x86/math-emu/fpu_proto.h | |||
@@ -46,6 +46,14 @@ extern void fstsw_(void); | |||
46 | extern void fp_nop(void); | 46 | extern void fp_nop(void); |
47 | extern void fld_i_(void); | 47 | extern void fld_i_(void); |
48 | extern void fxch_i(void); | 48 | extern void fxch_i(void); |
49 | extern void fcmovb(void); | ||
50 | extern void fcmove(void); | ||
51 | extern void fcmovbe(void); | ||
52 | extern void fcmovu(void); | ||
53 | extern void fcmovnb(void); | ||
54 | extern void fcmovne(void); | ||
55 | extern void fcmovnbe(void); | ||
56 | extern void fcmovnu(void); | ||
49 | extern void ffree_(void); | 57 | extern void ffree_(void); |
50 | extern void ffreep(void); | 58 | extern void ffreep(void); |
51 | extern void fst_i_(void); | 59 | extern void fst_i_(void); |
@@ -108,6 +116,10 @@ extern void fcompp(void); | |||
108 | extern void fucom_(void); | 116 | extern void fucom_(void); |
109 | extern void fucomp(void); | 117 | extern void fucomp(void); |
110 | extern void fucompp(void); | 118 | extern void fucompp(void); |
119 | extern void fcomi_(void); | ||
120 | extern void fcomip(void); | ||
121 | extern void fucomi_(void); | ||
122 | extern void fucomip(void); | ||
111 | /* reg_constant.c */ | 123 | /* reg_constant.c */ |
112 | extern void fconst(void); | 124 | extern void fconst(void); |
113 | /* reg_ld_str.c */ | 125 | /* reg_ld_str.c */ |
diff --git a/arch/x86/math-emu/load_store.c b/arch/x86/math-emu/load_store.c index 2931ff355218..95228ff042c0 100644 --- a/arch/x86/math-emu/load_store.c +++ b/arch/x86/math-emu/load_store.c | |||
@@ -33,11 +33,12 @@ | |||
33 | 33 | ||
34 | #define pop_0() { FPU_settag0(TAG_Empty); top++; } | 34 | #define pop_0() { FPU_settag0(TAG_Empty); top++; } |
35 | 35 | ||
36 | /* index is a 5-bit value: (3-bit FPU_modrm.reg field | opcode[2,1]) */ | ||
36 | static u_char const type_table[32] = { | 37 | static u_char const type_table[32] = { |
37 | _PUSH_, _PUSH_, _PUSH_, _PUSH_, | 38 | _PUSH_, _PUSH_, _PUSH_, _PUSH_, /* /0: d9:fld f32, db:fild m32, dd:fld f64, df:fild m16 */ |
38 | _null_, _null_, _null_, _null_, | 39 | _null_, _REG0_, _REG0_, _REG0_, /* /1: d9:undef, db,dd,df:fisttp m32/64/16 */ |
39 | _REG0_, _REG0_, _REG0_, _REG0_, | 40 | _REG0_, _REG0_, _REG0_, _REG0_, /* /2: d9:fst f32, db:fist m32, dd:fst f64, df:fist m16 */ |
40 | _REG0_, _REG0_, _REG0_, _REG0_, | 41 | _REG0_, _REG0_, _REG0_, _REG0_, /* /3: d9:fstp f32, db:fistp m32, dd:fstp f64, df:fistp m16 */ |
41 | _NONE_, _null_, _NONE_, _PUSH_, | 42 | _NONE_, _null_, _NONE_, _PUSH_, |
42 | _NONE_, _PUSH_, _null_, _PUSH_, | 43 | _NONE_, _PUSH_, _null_, _PUSH_, |
43 | _NONE_, _null_, _NONE_, _REG0_, | 44 | _NONE_, _null_, _NONE_, _REG0_, |
@@ -45,15 +46,19 @@ static u_char const type_table[32] = { | |||
45 | }; | 46 | }; |
46 | 47 | ||
47 | u_char const data_sizes_16[32] = { | 48 | u_char const data_sizes_16[32] = { |
48 | 4, 4, 8, 2, 0, 0, 0, 0, | 49 | 4, 4, 8, 2, |
49 | 4, 4, 8, 2, 4, 4, 8, 2, | 50 | 0, 4, 8, 2, /* /1: d9:undef, db,dd,df:fisttp */ |
51 | 4, 4, 8, 2, | ||
52 | 4, 4, 8, 2, | ||
50 | 14, 0, 94, 10, 2, 10, 0, 8, | 53 | 14, 0, 94, 10, 2, 10, 0, 8, |
51 | 14, 0, 94, 10, 2, 10, 2, 8 | 54 | 14, 0, 94, 10, 2, 10, 2, 8 |
52 | }; | 55 | }; |
53 | 56 | ||
54 | static u_char const data_sizes_32[32] = { | 57 | static u_char const data_sizes_32[32] = { |
55 | 4, 4, 8, 2, 0, 0, 0, 0, | 58 | 4, 4, 8, 2, |
56 | 4, 4, 8, 2, 4, 4, 8, 2, | 59 | 0, 4, 8, 2, /* /1: d9:undef, db,dd,df:fisttp */ |
60 | 4, 4, 8, 2, | ||
61 | 4, 4, 8, 2, | ||
57 | 28, 0, 108, 10, 2, 10, 0, 8, | 62 | 28, 0, 108, 10, 2, 10, 0, 8, |
58 | 28, 0, 108, 10, 2, 10, 2, 8 | 63 | 28, 0, 108, 10, 2, 10, 2, 8 |
59 | }; | 64 | }; |
@@ -65,6 +70,7 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes, | |||
65 | FPU_REG *st0_ptr; | 70 | FPU_REG *st0_ptr; |
66 | u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */ | 71 | u_char st0_tag = TAG_Empty; /* This is just to stop a gcc warning. */ |
67 | u_char loaded_tag; | 72 | u_char loaded_tag; |
73 | int sv_cw; | ||
68 | 74 | ||
69 | st0_ptr = NULL; /* Initialized just to stop compiler warnings. */ | 75 | st0_ptr = NULL; /* Initialized just to stop compiler warnings. */ |
70 | 76 | ||
@@ -111,7 +117,8 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes, | |||
111 | } | 117 | } |
112 | 118 | ||
113 | switch (type) { | 119 | switch (type) { |
114 | case 000: /* fld m32real */ | 120 | /* type is a 5-bit value: (3-bit FPU_modrm.reg field | opcode[2,1]) */ |
121 | case 000: /* fld m32real (d9 /0) */ | ||
115 | clear_C1(); | 122 | clear_C1(); |
116 | loaded_tag = | 123 | loaded_tag = |
117 | FPU_load_single((float __user *)data_address, &loaded_data); | 124 | FPU_load_single((float __user *)data_address, &loaded_data); |
@@ -123,13 +130,13 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes, | |||
123 | } | 130 | } |
124 | FPU_copy_to_reg0(&loaded_data, loaded_tag); | 131 | FPU_copy_to_reg0(&loaded_data, loaded_tag); |
125 | break; | 132 | break; |
126 | case 001: /* fild m32int */ | 133 | case 001: /* fild m32int (db /0) */ |
127 | clear_C1(); | 134 | clear_C1(); |
128 | loaded_tag = | 135 | loaded_tag = |
129 | FPU_load_int32((long __user *)data_address, &loaded_data); | 136 | FPU_load_int32((long __user *)data_address, &loaded_data); |
130 | FPU_copy_to_reg0(&loaded_data, loaded_tag); | 137 | FPU_copy_to_reg0(&loaded_data, loaded_tag); |
131 | break; | 138 | break; |
132 | case 002: /* fld m64real */ | 139 | case 002: /* fld m64real (dd /0) */ |
133 | clear_C1(); | 140 | clear_C1(); |
134 | loaded_tag = | 141 | loaded_tag = |
135 | FPU_load_double((double __user *)data_address, | 142 | FPU_load_double((double __user *)data_address, |
@@ -142,12 +149,44 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes, | |||
142 | } | 149 | } |
143 | FPU_copy_to_reg0(&loaded_data, loaded_tag); | 150 | FPU_copy_to_reg0(&loaded_data, loaded_tag); |
144 | break; | 151 | break; |
145 | case 003: /* fild m16int */ | 152 | case 003: /* fild m16int (df /0) */ |
146 | clear_C1(); | 153 | clear_C1(); |
147 | loaded_tag = | 154 | loaded_tag = |
148 | FPU_load_int16((short __user *)data_address, &loaded_data); | 155 | FPU_load_int16((short __user *)data_address, &loaded_data); |
149 | FPU_copy_to_reg0(&loaded_data, loaded_tag); | 156 | FPU_copy_to_reg0(&loaded_data, loaded_tag); |
150 | break; | 157 | break; |
158 | /* case 004: undefined (d9 /1) */ | ||
159 | /* fisttp are enabled if CPUID(1).ECX(0) "sse3" is set */ | ||
160 | case 005: /* fisttp m32int (db /1) */ | ||
161 | clear_C1(); | ||
162 | sv_cw = control_word; | ||
163 | control_word |= RC_CHOP; | ||
164 | if (FPU_store_int32 | ||
165 | (st0_ptr, st0_tag, (long __user *)data_address)) | ||
166 | pop_0(); /* pop only if the number was actually stored | ||
167 | (see the 80486 manual p16-28) */ | ||
168 | control_word = sv_cw; | ||
169 | break; | ||
170 | case 006: /* fisttp m64int (dd /1) */ | ||
171 | clear_C1(); | ||
172 | sv_cw = control_word; | ||
173 | control_word |= RC_CHOP; | ||
174 | if (FPU_store_int64 | ||
175 | (st0_ptr, st0_tag, (long long __user *)data_address)) | ||
176 | pop_0(); /* pop only if the number was actually stored | ||
177 | (see the 80486 manual p16-28) */ | ||
178 | control_word = sv_cw; | ||
179 | break; | ||
180 | case 007: /* fisttp m16int (df /1) */ | ||
181 | clear_C1(); | ||
182 | sv_cw = control_word; | ||
183 | control_word |= RC_CHOP; | ||
184 | if (FPU_store_int16 | ||
185 | (st0_ptr, st0_tag, (short __user *)data_address)) | ||
186 | pop_0(); /* pop only if the number was actually stored | ||
187 | (see the 80486 manual p16-28) */ | ||
188 | control_word = sv_cw; | ||
189 | break; | ||
151 | case 010: /* fst m32real */ | 190 | case 010: /* fst m32real */ |
152 | clear_C1(); | 191 | clear_C1(); |
153 | FPU_store_single(st0_ptr, st0_tag, | 192 | FPU_store_single(st0_ptr, st0_tag, |
diff --git a/arch/x86/math-emu/reg_compare.c b/arch/x86/math-emu/reg_compare.c index ecce55fc2e2e..b77360fdbf4a 100644 --- a/arch/x86/math-emu/reg_compare.c +++ b/arch/x86/math-emu/reg_compare.c | |||
@@ -249,6 +249,54 @@ static int compare_st_st(int nr) | |||
249 | return 0; | 249 | return 0; |
250 | } | 250 | } |
251 | 251 | ||
252 | static int compare_i_st_st(int nr) | ||
253 | { | ||
254 | int f, c; | ||
255 | FPU_REG *st_ptr; | ||
256 | |||
257 | if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) { | ||
258 | FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF); | ||
259 | /* Stack fault */ | ||
260 | EXCEPTION(EX_StackUnder); | ||
261 | return !(control_word & CW_Invalid); | ||
262 | } | ||
263 | |||
264 | partial_status &= ~SW_C0; | ||
265 | st_ptr = &st(nr); | ||
266 | c = compare(st_ptr, FPU_gettagi(nr)); | ||
267 | if (c & COMP_NaN) { | ||
268 | FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF); | ||
269 | EXCEPTION(EX_Invalid); | ||
270 | return !(control_word & CW_Invalid); | ||
271 | } | ||
272 | |||
273 | switch (c & 7) { | ||
274 | case COMP_A_lt_B: | ||
275 | f = X86_EFLAGS_CF; | ||
276 | break; | ||
277 | case COMP_A_eq_B: | ||
278 | f = X86_EFLAGS_ZF; | ||
279 | break; | ||
280 | case COMP_A_gt_B: | ||
281 | f = 0; | ||
282 | break; | ||
283 | case COMP_No_Comp: | ||
284 | f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF; | ||
285 | break; | ||
286 | #ifdef PARANOID | ||
287 | default: | ||
288 | EXCEPTION(EX_INTERNAL | 0x122); | ||
289 | f = 0; | ||
290 | break; | ||
291 | #endif /* PARANOID */ | ||
292 | } | ||
293 | FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f; | ||
294 | if (c & COMP_Denormal) { | ||
295 | return denormal_operand() < 0; | ||
296 | } | ||
297 | return 0; | ||
298 | } | ||
299 | |||
252 | static int compare_u_st_st(int nr) | 300 | static int compare_u_st_st(int nr) |
253 | { | 301 | { |
254 | int f = 0, c; | 302 | int f = 0, c; |
@@ -299,6 +347,58 @@ static int compare_u_st_st(int nr) | |||
299 | return 0; | 347 | return 0; |
300 | } | 348 | } |
301 | 349 | ||
350 | static int compare_ui_st_st(int nr) | ||
351 | { | ||
352 | int f = 0, c; | ||
353 | FPU_REG *st_ptr; | ||
354 | |||
355 | if (!NOT_EMPTY(0) || !NOT_EMPTY(nr)) { | ||
356 | FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF); | ||
357 | /* Stack fault */ | ||
358 | EXCEPTION(EX_StackUnder); | ||
359 | return !(control_word & CW_Invalid); | ||
360 | } | ||
361 | |||
362 | partial_status &= ~SW_C0; | ||
363 | st_ptr = &st(nr); | ||
364 | c = compare(st_ptr, FPU_gettagi(nr)); | ||
365 | if (c & COMP_NaN) { | ||
366 | FPU_EFLAGS |= (X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF); | ||
367 | if (c & COMP_SNaN) { /* This is the only difference between | ||
368 | un-ordered and ordinary comparisons */ | ||
369 | EXCEPTION(EX_Invalid); | ||
370 | return !(control_word & CW_Invalid); | ||
371 | } | ||
372 | return 0; | ||
373 | } | ||
374 | |||
375 | switch (c & 7) { | ||
376 | case COMP_A_lt_B: | ||
377 | f = X86_EFLAGS_CF; | ||
378 | break; | ||
379 | case COMP_A_eq_B: | ||
380 | f = X86_EFLAGS_ZF; | ||
381 | break; | ||
382 | case COMP_A_gt_B: | ||
383 | f = 0; | ||
384 | break; | ||
385 | case COMP_No_Comp: | ||
386 | f = X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF; | ||
387 | break; | ||
388 | #ifdef PARANOID | ||
389 | default: | ||
390 | EXCEPTION(EX_INTERNAL | 0x123); | ||
391 | f = 0; | ||
392 | break; | ||
393 | #endif /* PARANOID */ | ||
394 | } | ||
395 | FPU_EFLAGS = (FPU_EFLAGS & ~(X86_EFLAGS_ZF | X86_EFLAGS_PF | X86_EFLAGS_CF)) | f; | ||
396 | if (c & COMP_Denormal) { | ||
397 | return denormal_operand() < 0; | ||
398 | } | ||
399 | return 0; | ||
400 | } | ||
401 | |||
302 | /*---------------------------------------------------------------------------*/ | 402 | /*---------------------------------------------------------------------------*/ |
303 | 403 | ||
304 | void fcom_st(void) | 404 | void fcom_st(void) |
@@ -348,3 +448,31 @@ void fucompp(void) | |||
348 | } else | 448 | } else |
349 | FPU_illegal(); | 449 | FPU_illegal(); |
350 | } | 450 | } |
451 | |||
452 | /* P6+ compare-to-EFLAGS ops */ | ||
453 | |||
454 | void fcomi_(void) | ||
455 | { | ||
456 | /* fcomi st(i) */ | ||
457 | compare_i_st_st(FPU_rm); | ||
458 | } | ||
459 | |||
460 | void fcomip(void) | ||
461 | { | ||
462 | /* fcomip st(i) */ | ||
463 | if (!compare_i_st_st(FPU_rm)) | ||
464 | FPU_pop(); | ||
465 | } | ||
466 | |||
467 | void fucomi_(void) | ||
468 | { | ||
469 | /* fucomi st(i) */ | ||
470 | compare_ui_st_st(FPU_rm); | ||
471 | } | ||
472 | |||
473 | void fucomip(void) | ||
474 | { | ||
475 | /* fucomip st(i) */ | ||
476 | if (!compare_ui_st_st(FPU_rm)) | ||
477 | FPU_pop(); | ||
478 | } | ||
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 134948b0926f..b0ae85f90f10 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c | |||
@@ -237,7 +237,8 @@ bad_opcode: | |||
237 | */ | 237 | */ |
238 | siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) | 238 | siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) |
239 | { | 239 | { |
240 | const struct bndreg *bndregs, *bndreg; | 240 | const struct mpx_bndreg_state *bndregs; |
241 | const struct mpx_bndreg *bndreg; | ||
241 | siginfo_t *info = NULL; | 242 | siginfo_t *info = NULL; |
242 | struct insn insn; | 243 | struct insn insn; |
243 | uint8_t bndregno; | 244 | uint8_t bndregno; |
@@ -258,13 +259,13 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) | |||
258 | goto err_out; | 259 | goto err_out; |
259 | } | 260 | } |
260 | /* get bndregs field from current task's xsave area */ | 261 | /* get bndregs field from current task's xsave area */ |
261 | bndregs = get_xsave_field_ptr(XSTATE_BNDREGS); | 262 | bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS); |
262 | if (!bndregs) { | 263 | if (!bndregs) { |
263 | err = -EINVAL; | 264 | err = -EINVAL; |
264 | goto err_out; | 265 | goto err_out; |
265 | } | 266 | } |
266 | /* now go select the individual register in the set of 4 */ | 267 | /* now go select the individual register in the set of 4 */ |
267 | bndreg = &bndregs[bndregno]; | 268 | bndreg = &bndregs->bndreg[bndregno]; |
268 | 269 | ||
269 | info = kzalloc(sizeof(*info), GFP_KERNEL); | 270 | info = kzalloc(sizeof(*info), GFP_KERNEL); |
270 | if (!info) { | 271 | if (!info) { |
@@ -306,7 +307,7 @@ err_out: | |||
306 | 307 | ||
307 | static __user void *mpx_get_bounds_dir(void) | 308 | static __user void *mpx_get_bounds_dir(void) |
308 | { | 309 | { |
309 | const struct bndcsr *bndcsr; | 310 | const struct mpx_bndcsr *bndcsr; |
310 | 311 | ||
311 | if (!cpu_feature_enabled(X86_FEATURE_MPX)) | 312 | if (!cpu_feature_enabled(X86_FEATURE_MPX)) |
312 | return MPX_INVALID_BOUNDS_DIR; | 313 | return MPX_INVALID_BOUNDS_DIR; |
@@ -315,7 +316,7 @@ static __user void *mpx_get_bounds_dir(void) | |||
315 | * The bounds directory pointer is stored in a register | 316 | * The bounds directory pointer is stored in a register |
316 | * only accessible if we first do an xsave. | 317 | * only accessible if we first do an xsave. |
317 | */ | 318 | */ |
318 | bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); | 319 | bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); |
319 | if (!bndcsr) | 320 | if (!bndcsr) |
320 | return MPX_INVALID_BOUNDS_DIR; | 321 | return MPX_INVALID_BOUNDS_DIR; |
321 | 322 | ||
@@ -489,10 +490,10 @@ out_unmap: | |||
489 | static int do_mpx_bt_fault(void) | 490 | static int do_mpx_bt_fault(void) |
490 | { | 491 | { |
491 | unsigned long bd_entry, bd_base; | 492 | unsigned long bd_entry, bd_base; |
492 | const struct bndcsr *bndcsr; | 493 | const struct mpx_bndcsr *bndcsr; |
493 | struct mm_struct *mm = current->mm; | 494 | struct mm_struct *mm = current->mm; |
494 | 495 | ||
495 | bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); | 496 | bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); |
496 | if (!bndcsr) | 497 | if (!bndcsr) |
497 | return -EINVAL; | 498 | return -EINVAL; |
498 | /* | 499 | /* |
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 389701f59940..eabcff411984 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile | |||
@@ -5,7 +5,8 @@ include ../lib.mk | |||
5 | .PHONY: all all_32 all_64 warn_32bit_failure clean | 5 | .PHONY: all all_32 all_64 warn_32bit_failure clean |
6 | 6 | ||
7 | TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt ptrace_syscall | 7 | TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs ldt_gdt syscall_nt ptrace_syscall |
8 | TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso | 8 | TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \ |
9 | test_FCMOV test_FCOMI test_FISTTP | ||
9 | 10 | ||
10 | TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) | 11 | TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) |
11 | BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) | 12 | BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) |
@@ -35,7 +36,7 @@ clean: | |||
35 | $(RM) $(BINARIES_32) $(BINARIES_64) | 36 | $(RM) $(BINARIES_32) $(BINARIES_64) |
36 | 37 | ||
37 | $(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c | 38 | $(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c |
38 | $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl | 39 | $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm |
39 | 40 | ||
40 | $(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c | 41 | $(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c |
41 | $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl | 42 | $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl |
diff --git a/tools/testing/selftests/x86/test_FCMOV.c b/tools/testing/selftests/x86/test_FCMOV.c new file mode 100644 index 000000000000..4adcca0c80c4 --- /dev/null +++ b/tools/testing/selftests/x86/test_FCMOV.c | |||
@@ -0,0 +1,93 @@ | |||
1 | #undef _GNU_SOURCE | ||
2 | #define _GNU_SOURCE 1 | ||
3 | #undef __USE_GNU | ||
4 | #define __USE_GNU 1 | ||
5 | #include <unistd.h> | ||
6 | #include <stdlib.h> | ||
7 | #include <string.h> | ||
8 | #include <stdio.h> | ||
9 | #include <signal.h> | ||
10 | #include <sys/types.h> | ||
11 | #include <sys/select.h> | ||
12 | #include <sys/time.h> | ||
13 | #include <sys/wait.h> | ||
14 | |||
15 | #define TEST(insn) \ | ||
16 | long double __attribute__((noinline)) insn(long flags) \ | ||
17 | { \ | ||
18 | long double out; \ | ||
19 | asm ("\n" \ | ||
20 | " push %1""\n" \ | ||
21 | " popf""\n" \ | ||
22 | " fldpi""\n" \ | ||
23 | " fld1""\n" \ | ||
24 | " " #insn " %%st(1), %%st" "\n" \ | ||
25 | " ffree %%st(1)" "\n" \ | ||
26 | : "=t" (out) \ | ||
27 | : "r" (flags) \ | ||
28 | ); \ | ||
29 | return out; \ | ||
30 | } | ||
31 | |||
32 | TEST(fcmovb) | ||
33 | TEST(fcmove) | ||
34 | TEST(fcmovbe) | ||
35 | TEST(fcmovu) | ||
36 | TEST(fcmovnb) | ||
37 | TEST(fcmovne) | ||
38 | TEST(fcmovnbe) | ||
39 | TEST(fcmovnu) | ||
40 | |||
41 | enum { | ||
42 | CF = 1 << 0, | ||
43 | PF = 1 << 2, | ||
44 | ZF = 1 << 6, | ||
45 | }; | ||
46 | |||
47 | void sighandler(int sig) | ||
48 | { | ||
49 | printf("[FAIL]\tGot signal %d, exiting\n", sig); | ||
50 | exit(1); | ||
51 | } | ||
52 | |||
53 | int main(int argc, char **argv, char **envp) | ||
54 | { | ||
55 | int err = 0; | ||
56 | |||
57 | /* SIGILL triggers on 32-bit kernels w/o fcomi emulation | ||
58 | * when run with "no387 nofxsr". Other signals are caught | ||
59 | * just in case. | ||
60 | */ | ||
61 | signal(SIGILL, sighandler); | ||
62 | signal(SIGFPE, sighandler); | ||
63 | signal(SIGSEGV, sighandler); | ||
64 | |||
65 | printf("[RUN]\tTesting fcmovCC instructions\n"); | ||
66 | /* If fcmovCC() returns 1.0, the move wasn't done */ | ||
67 | err |= !(fcmovb(0) == 1.0); err |= !(fcmovnb(0) != 1.0); | ||
68 | err |= !(fcmove(0) == 1.0); err |= !(fcmovne(0) != 1.0); | ||
69 | err |= !(fcmovbe(0) == 1.0); err |= !(fcmovnbe(0) != 1.0); | ||
70 | err |= !(fcmovu(0) == 1.0); err |= !(fcmovnu(0) != 1.0); | ||
71 | |||
72 | err |= !(fcmovb(CF) != 1.0); err |= !(fcmovnb(CF) == 1.0); | ||
73 | err |= !(fcmove(CF) == 1.0); err |= !(fcmovne(CF) != 1.0); | ||
74 | err |= !(fcmovbe(CF) != 1.0); err |= !(fcmovnbe(CF) == 1.0); | ||
75 | err |= !(fcmovu(CF) == 1.0); err |= !(fcmovnu(CF) != 1.0); | ||
76 | |||
77 | err |= !(fcmovb(ZF) == 1.0); err |= !(fcmovnb(ZF) != 1.0); | ||
78 | err |= !(fcmove(ZF) != 1.0); err |= !(fcmovne(ZF) == 1.0); | ||
79 | err |= !(fcmovbe(ZF) != 1.0); err |= !(fcmovnbe(ZF) == 1.0); | ||
80 | err |= !(fcmovu(ZF) == 1.0); err |= !(fcmovnu(ZF) != 1.0); | ||
81 | |||
82 | err |= !(fcmovb(PF) == 1.0); err |= !(fcmovnb(PF) != 1.0); | ||
83 | err |= !(fcmove(PF) == 1.0); err |= !(fcmovne(PF) != 1.0); | ||
84 | err |= !(fcmovbe(PF) == 1.0); err |= !(fcmovnbe(PF) != 1.0); | ||
85 | err |= !(fcmovu(PF) != 1.0); err |= !(fcmovnu(PF) == 1.0); | ||
86 | |||
87 | if (!err) | ||
88 | printf("[OK]\tfcmovCC\n"); | ||
89 | else | ||
90 | printf("[FAIL]\tfcmovCC errors: %d\n", err); | ||
91 | |||
92 | return err; | ||
93 | } | ||
diff --git a/tools/testing/selftests/x86/test_FCOMI.c b/tools/testing/selftests/x86/test_FCOMI.c new file mode 100644 index 000000000000..db4933e31af9 --- /dev/null +++ b/tools/testing/selftests/x86/test_FCOMI.c | |||
@@ -0,0 +1,331 @@ | |||
1 | #undef _GNU_SOURCE | ||
2 | #define _GNU_SOURCE 1 | ||
3 | #undef __USE_GNU | ||
4 | #define __USE_GNU 1 | ||
5 | #include <unistd.h> | ||
6 | #include <stdlib.h> | ||
7 | #include <string.h> | ||
8 | #include <stdio.h> | ||
9 | #include <signal.h> | ||
10 | #include <sys/types.h> | ||
11 | #include <sys/select.h> | ||
12 | #include <sys/time.h> | ||
13 | #include <sys/wait.h> | ||
14 | #include <fenv.h> | ||
15 | |||
16 | enum { | ||
17 | CF = 1 << 0, | ||
18 | PF = 1 << 2, | ||
19 | ZF = 1 << 6, | ||
20 | ARITH = CF | PF | ZF, | ||
21 | }; | ||
22 | |||
23 | long res_fcomi_pi_1; | ||
24 | long res_fcomi_1_pi; | ||
25 | long res_fcomi_1_1; | ||
26 | long res_fcomi_nan_1; | ||
27 | /* sNaN is s|111 1111 1|1xx xxxx xxxx xxxx xxxx xxxx */ | ||
28 | /* qNaN is s|111 1111 1|0xx xxxx xxxx xxxx xxxx xxxx (some x must be nonzero) */ | ||
29 | int snan = 0x7fc11111; | ||
30 | int qnan = 0x7f811111; | ||
31 | unsigned short snan1[5]; | ||
32 | /* sNaN80 is s|111 1111 1111 1111 |10xx xx...xx (some x must be nonzero) */ | ||
33 | unsigned short snan80[5] = { 0x1111, 0x1111, 0x1111, 0x8111, 0x7fff }; | ||
34 | |||
35 | int test(long flags) | ||
36 | { | ||
37 | feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
38 | |||
39 | asm ("\n" | ||
40 | |||
41 | " push %0""\n" | ||
42 | " popf""\n" | ||
43 | " fld1""\n" | ||
44 | " fldpi""\n" | ||
45 | " fcomi %%st(1), %%st" "\n" | ||
46 | " ffree %%st(0)" "\n" | ||
47 | " ffree %%st(1)" "\n" | ||
48 | " pushf""\n" | ||
49 | " pop res_fcomi_1_pi""\n" | ||
50 | |||
51 | " push %0""\n" | ||
52 | " popf""\n" | ||
53 | " fldpi""\n" | ||
54 | " fld1""\n" | ||
55 | " fcomi %%st(1), %%st" "\n" | ||
56 | " ffree %%st(0)" "\n" | ||
57 | " ffree %%st(1)" "\n" | ||
58 | " pushf""\n" | ||
59 | " pop res_fcomi_pi_1""\n" | ||
60 | |||
61 | " push %0""\n" | ||
62 | " popf""\n" | ||
63 | " fld1""\n" | ||
64 | " fld1""\n" | ||
65 | " fcomi %%st(1), %%st" "\n" | ||
66 | " ffree %%st(0)" "\n" | ||
67 | " ffree %%st(1)" "\n" | ||
68 | " pushf""\n" | ||
69 | " pop res_fcomi_1_1""\n" | ||
70 | : | ||
71 | : "r" (flags) | ||
72 | ); | ||
73 | if ((res_fcomi_1_pi & ARITH) != (0)) { | ||
74 | printf("[BAD]\tfcomi_1_pi with flags:%lx\n", flags); | ||
75 | return 1; | ||
76 | } | ||
77 | if ((res_fcomi_pi_1 & ARITH) != (CF)) { | ||
78 | printf("[BAD]\tfcomi_pi_1 with flags:%lx->%lx\n", flags, res_fcomi_pi_1 & ARITH); | ||
79 | return 1; | ||
80 | } | ||
81 | if ((res_fcomi_1_1 & ARITH) != (ZF)) { | ||
82 | printf("[BAD]\tfcomi_1_1 with flags:%lx\n", flags); | ||
83 | return 1; | ||
84 | } | ||
85 | if (fetestexcept(FE_INVALID) != 0) { | ||
86 | printf("[BAD]\tFE_INVALID is set in %s\n", __func__); | ||
87 | return 1; | ||
88 | } | ||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | int test_qnan(long flags) | ||
93 | { | ||
94 | feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
95 | |||
96 | asm ("\n" | ||
97 | " push %0""\n" | ||
98 | " popf""\n" | ||
99 | " flds qnan""\n" | ||
100 | " fld1""\n" | ||
101 | " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it | ||
102 | " fcomi %%st(1), %%st" "\n" | ||
103 | " ffree %%st(0)" "\n" | ||
104 | " ffree %%st(1)" "\n" | ||
105 | " pushf""\n" | ||
106 | " pop res_fcomi_nan_1""\n" | ||
107 | : | ||
108 | : "r" (flags) | ||
109 | ); | ||
110 | if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) { | ||
111 | printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags); | ||
112 | return 1; | ||
113 | } | ||
114 | if (fetestexcept(FE_INVALID) != FE_INVALID) { | ||
115 | printf("[BAD]\tFE_INVALID is not set in %s\n", __func__); | ||
116 | return 1; | ||
117 | } | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | int testu_qnan(long flags) | ||
122 | { | ||
123 | feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
124 | |||
125 | asm ("\n" | ||
126 | " push %0""\n" | ||
127 | " popf""\n" | ||
128 | " flds qnan""\n" | ||
129 | " fld1""\n" | ||
130 | " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it | ||
131 | " fucomi %%st(1), %%st" "\n" | ||
132 | " ffree %%st(0)" "\n" | ||
133 | " ffree %%st(1)" "\n" | ||
134 | " pushf""\n" | ||
135 | " pop res_fcomi_nan_1""\n" | ||
136 | : | ||
137 | : "r" (flags) | ||
138 | ); | ||
139 | if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) { | ||
140 | printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags); | ||
141 | return 1; | ||
142 | } | ||
143 | if (fetestexcept(FE_INVALID) != 0) { | ||
144 | printf("[BAD]\tFE_INVALID is set in %s\n", __func__); | ||
145 | return 1; | ||
146 | } | ||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | int testu_snan(long flags) | ||
151 | { | ||
152 | feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
153 | |||
154 | asm ("\n" | ||
155 | " push %0""\n" | ||
156 | " popf""\n" | ||
157 | // " flds snan""\n" // WRONG, this will convert 32-bit fp snan to a *qnan* in 80-bit fp register! | ||
158 | // " fstpt snan1""\n" // if uncommented, it prints "snan1:7fff c111 1100 0000 0000" - c111, not 8111! | ||
159 | // " fnclex""\n" // flds of a snan raised FE_INVALID, clear it | ||
160 | " fldt snan80""\n" // fldt never raise FE_INVALID | ||
161 | " fld1""\n" | ||
162 | " fucomi %%st(1), %%st" "\n" | ||
163 | " ffree %%st(0)" "\n" | ||
164 | " ffree %%st(1)" "\n" | ||
165 | " pushf""\n" | ||
166 | " pop res_fcomi_nan_1""\n" | ||
167 | : | ||
168 | : "r" (flags) | ||
169 | ); | ||
170 | if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) { | ||
171 | printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags); | ||
172 | return 1; | ||
173 | } | ||
174 | // printf("snan:%x snan1:%04x %04x %04x %04x %04x\n", snan, snan1[4], snan1[3], snan1[2], snan1[1], snan1[0]); | ||
175 | if (fetestexcept(FE_INVALID) != FE_INVALID) { | ||
176 | printf("[BAD]\tFE_INVALID is not set in %s\n", __func__); | ||
177 | return 1; | ||
178 | } | ||
179 | return 0; | ||
180 | } | ||
181 | |||
182 | int testp(long flags) | ||
183 | { | ||
184 | feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
185 | |||
186 | asm ("\n" | ||
187 | |||
188 | " push %0""\n" | ||
189 | " popf""\n" | ||
190 | " fld1""\n" | ||
191 | " fldpi""\n" | ||
192 | " fcomip %%st(1), %%st" "\n" | ||
193 | " ffree %%st(0)" "\n" | ||
194 | " pushf""\n" | ||
195 | " pop res_fcomi_1_pi""\n" | ||
196 | |||
197 | " push %0""\n" | ||
198 | " popf""\n" | ||
199 | " fldpi""\n" | ||
200 | " fld1""\n" | ||
201 | " fcomip %%st(1), %%st" "\n" | ||
202 | " ffree %%st(0)" "\n" | ||
203 | " pushf""\n" | ||
204 | " pop res_fcomi_pi_1""\n" | ||
205 | |||
206 | " push %0""\n" | ||
207 | " popf""\n" | ||
208 | " fld1""\n" | ||
209 | " fld1""\n" | ||
210 | " fcomip %%st(1), %%st" "\n" | ||
211 | " ffree %%st(0)" "\n" | ||
212 | " pushf""\n" | ||
213 | " pop res_fcomi_1_1""\n" | ||
214 | : | ||
215 | : "r" (flags) | ||
216 | ); | ||
217 | if ((res_fcomi_1_pi & ARITH) != (0)) { | ||
218 | printf("[BAD]\tfcomi_1_pi with flags:%lx\n", flags); | ||
219 | return 1; | ||
220 | } | ||
221 | if ((res_fcomi_pi_1 & ARITH) != (CF)) { | ||
222 | printf("[BAD]\tfcomi_pi_1 with flags:%lx->%lx\n", flags, res_fcomi_pi_1 & ARITH); | ||
223 | return 1; | ||
224 | } | ||
225 | if ((res_fcomi_1_1 & ARITH) != (ZF)) { | ||
226 | printf("[BAD]\tfcomi_1_1 with flags:%lx\n", flags); | ||
227 | return 1; | ||
228 | } | ||
229 | if (fetestexcept(FE_INVALID) != 0) { | ||
230 | printf("[BAD]\tFE_INVALID is set in %s\n", __func__); | ||
231 | return 1; | ||
232 | } | ||
233 | return 0; | ||
234 | } | ||
235 | |||
236 | int testp_qnan(long flags) | ||
237 | { | ||
238 | feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
239 | |||
240 | asm ("\n" | ||
241 | " push %0""\n" | ||
242 | " popf""\n" | ||
243 | " flds qnan""\n" | ||
244 | " fld1""\n" | ||
245 | " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it | ||
246 | " fcomip %%st(1), %%st" "\n" | ||
247 | " ffree %%st(0)" "\n" | ||
248 | " pushf""\n" | ||
249 | " pop res_fcomi_nan_1""\n" | ||
250 | : | ||
251 | : "r" (flags) | ||
252 | ); | ||
253 | if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) { | ||
254 | printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags); | ||
255 | return 1; | ||
256 | } | ||
257 | if (fetestexcept(FE_INVALID) != FE_INVALID) { | ||
258 | printf("[BAD]\tFE_INVALID is not set in %s\n", __func__); | ||
259 | return 1; | ||
260 | } | ||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | int testup_qnan(long flags) | ||
265 | { | ||
266 | feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
267 | |||
268 | asm ("\n" | ||
269 | " push %0""\n" | ||
270 | " popf""\n" | ||
271 | " flds qnan""\n" | ||
272 | " fld1""\n" | ||
273 | " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it | ||
274 | " fucomip %%st(1), %%st" "\n" | ||
275 | " ffree %%st(0)" "\n" | ||
276 | " pushf""\n" | ||
277 | " pop res_fcomi_nan_1""\n" | ||
278 | : | ||
279 | : "r" (flags) | ||
280 | ); | ||
281 | if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) { | ||
282 | printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags); | ||
283 | return 1; | ||
284 | } | ||
285 | if (fetestexcept(FE_INVALID) != 0) { | ||
286 | printf("[BAD]\tFE_INVALID is set in %s\n", __func__); | ||
287 | return 1; | ||
288 | } | ||
289 | return 0; | ||
290 | } | ||
291 | |||
292 | void sighandler(int sig) | ||
293 | { | ||
294 | printf("[FAIL]\tGot signal %d, exiting\n", sig); | ||
295 | exit(1); | ||
296 | } | ||
297 | |||
298 | int main(int argc, char **argv, char **envp) | ||
299 | { | ||
300 | int err = 0; | ||
301 | |||
302 | /* SIGILL triggers on 32-bit kernels w/o fcomi emulation | ||
303 | * when run with "no387 nofxsr". Other signals are caught | ||
304 | * just in case. | ||
305 | */ | ||
306 | signal(SIGILL, sighandler); | ||
307 | signal(SIGFPE, sighandler); | ||
308 | signal(SIGSEGV, sighandler); | ||
309 | |||
310 | printf("[RUN]\tTesting f[u]comi[p] instructions\n"); | ||
311 | err |= test(0); | ||
312 | err |= test_qnan(0); | ||
313 | err |= testu_qnan(0); | ||
314 | err |= testu_snan(0); | ||
315 | err |= test(CF|ZF|PF); | ||
316 | err |= test_qnan(CF|ZF|PF); | ||
317 | err |= testu_qnan(CF|ZF|PF); | ||
318 | err |= testu_snan(CF|ZF|PF); | ||
319 | err |= testp(0); | ||
320 | err |= testp_qnan(0); | ||
321 | err |= testup_qnan(0); | ||
322 | err |= testp(CF|ZF|PF); | ||
323 | err |= testp_qnan(CF|ZF|PF); | ||
324 | err |= testup_qnan(CF|ZF|PF); | ||
325 | if (!err) | ||
326 | printf("[OK]\tf[u]comi[p]\n"); | ||
327 | else | ||
328 | printf("[FAIL]\tf[u]comi[p] errors: %d\n", err); | ||
329 | |||
330 | return err; | ||
331 | } | ||
diff --git a/tools/testing/selftests/x86/test_FISTTP.c b/tools/testing/selftests/x86/test_FISTTP.c new file mode 100644 index 000000000000..b8e61a047f6b --- /dev/null +++ b/tools/testing/selftests/x86/test_FISTTP.c | |||
@@ -0,0 +1,137 @@ | |||
1 | #undef _GNU_SOURCE | ||
2 | #define _GNU_SOURCE 1 | ||
3 | #undef __USE_GNU | ||
4 | #define __USE_GNU 1 | ||
5 | #include <unistd.h> | ||
6 | #include <stdlib.h> | ||
7 | #include <string.h> | ||
8 | #include <stdio.h> | ||
9 | #include <signal.h> | ||
10 | #include <sys/types.h> | ||
11 | #include <sys/select.h> | ||
12 | #include <sys/time.h> | ||
13 | #include <sys/wait.h> | ||
14 | #include <fenv.h> | ||
15 | |||
16 | unsigned long long res64 = -1; | ||
17 | unsigned int res32 = -1; | ||
18 | unsigned short res16 = -1; | ||
19 | |||
20 | int test(void) | ||
21 | { | ||
22 | int ex; | ||
23 | |||
24 | feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
25 | asm volatile ("\n" | ||
26 | " fld1""\n" | ||
27 | " fisttp res16""\n" | ||
28 | " fld1""\n" | ||
29 | " fisttpl res32""\n" | ||
30 | " fld1""\n" | ||
31 | " fisttpll res64""\n" | ||
32 | : : : "memory" | ||
33 | ); | ||
34 | if (res16 != 1 || res32 != 1 || res64 != 1) { | ||
35 | printf("[BAD]\tfisttp 1\n"); | ||
36 | return 1; | ||
37 | } | ||
38 | ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
39 | if (ex != 0) { | ||
40 | printf("[BAD]\tfisttp 1: wrong exception state\n"); | ||
41 | return 1; | ||
42 | } | ||
43 | |||
44 | feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
45 | asm volatile ("\n" | ||
46 | " fldpi""\n" | ||
47 | " fisttp res16""\n" | ||
48 | " fldpi""\n" | ||
49 | " fisttpl res32""\n" | ||
50 | " fldpi""\n" | ||
51 | " fisttpll res64""\n" | ||
52 | : : : "memory" | ||
53 | ); | ||
54 | if (res16 != 3 || res32 != 3 || res64 != 3) { | ||
55 | printf("[BAD]\tfisttp pi\n"); | ||
56 | return 1; | ||
57 | } | ||
58 | ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
59 | if (ex != FE_INEXACT) { | ||
60 | printf("[BAD]\tfisttp pi: wrong exception state\n"); | ||
61 | return 1; | ||
62 | } | ||
63 | |||
64 | feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
65 | asm volatile ("\n" | ||
66 | " fldpi""\n" | ||
67 | " fchs""\n" | ||
68 | " fisttp res16""\n" | ||
69 | " fldpi""\n" | ||
70 | " fchs""\n" | ||
71 | " fisttpl res32""\n" | ||
72 | " fldpi""\n" | ||
73 | " fchs""\n" | ||
74 | " fisttpll res64""\n" | ||
75 | : : : "memory" | ||
76 | ); | ||
77 | if (res16 != 0xfffd || res32 != 0xfffffffd || res64 != 0xfffffffffffffffdULL) { | ||
78 | printf("[BAD]\tfisttp -pi\n"); | ||
79 | return 1; | ||
80 | } | ||
81 | ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
82 | if (ex != FE_INEXACT) { | ||
83 | printf("[BAD]\tfisttp -pi: wrong exception state\n"); | ||
84 | return 1; | ||
85 | } | ||
86 | |||
87 | feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
88 | asm volatile ("\n" | ||
89 | " fldln2""\n" | ||
90 | " fisttp res16""\n" | ||
91 | " fldln2""\n" | ||
92 | " fisttpl res32""\n" | ||
93 | " fldln2""\n" | ||
94 | " fisttpll res64""\n" | ||
95 | : : : "memory" | ||
96 | ); | ||
97 | /* Test truncation to zero (round-to-nearest would give 1 here) */ | ||
98 | if (res16 != 0 || res32 != 0 || res64 != 0) { | ||
99 | printf("[BAD]\tfisttp ln2\n"); | ||
100 | return 1; | ||
101 | } | ||
102 | ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW); | ||
103 | if (ex != FE_INEXACT) { | ||
104 | printf("[BAD]\tfisttp ln2: wrong exception state\n"); | ||
105 | return 1; | ||
106 | } | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | void sighandler(int sig) | ||
112 | { | ||
113 | printf("[FAIL]\tGot signal %d, exiting\n", sig); | ||
114 | exit(1); | ||
115 | } | ||
116 | |||
117 | int main(int argc, char **argv, char **envp) | ||
118 | { | ||
119 | int err = 0; | ||
120 | |||
121 | /* SIGILL triggers on 32-bit kernels w/o fisttp emulation | ||
122 | * when run with "no387 nofxsr". Other signals are caught | ||
123 | * just in case. | ||
124 | */ | ||
125 | signal(SIGILL, sighandler); | ||
126 | signal(SIGFPE, sighandler); | ||
127 | signal(SIGSEGV, sighandler); | ||
128 | |||
129 | printf("[RUN]\tTesting fisttp instructions\n"); | ||
130 | err |= test(); | ||
131 | if (!err) | ||
132 | printf("[OK]\tfisttp\n"); | ||
133 | else | ||
134 | printf("[FAIL]\tfisttp errors: %d\n", err); | ||
135 | |||
136 | return err; | ||
137 | } | ||