aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-22 20:16:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-22 20:16:11 -0400
commite75c73ad64478c12b3a44b86a3e7f62a4f65b93e (patch)
tree9dbb1a2a4e53b480df86c49d478751b203cdccd4
parentcfe3eceb7a2eb91284d5605c5315249bb165e9d3 (diff)
parenta8424003679e90b9952e20adcd1ff1560d9dd3e9 (diff)
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 FPU updates from Ingo Molnar: "This tree contains two main changes: - The big FPU code rewrite: wide reaching cleanups and reorganization that pulls all the FPU code together into a clean base in arch/x86/fpu/. The resulting code is leaner and faster, and much easier to understand. This enables future work to further simplify the FPU code (such as removing lazy FPU restores). By its nature these changes have a substantial regression risk: FPU code related bugs are long lived, because races are often subtle and bugs mask as user-space failures that are difficult to track back to kernel side backs. I'm aware of no unfixed (or even suspected) FPU related regression so far. - MPX support rework/fixes. As this is still not a released CPU feature, there were some buglets in the code - should be much more robust now (Dave Hansen)" * 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (250 commits) x86/fpu: Fix double-increment in setup_xstate_features() x86/mpx: Allow 32-bit binaries on 64-bit kernels again x86/mpx: Do not count MPX VMAs as neighbors when unmapping x86/mpx: Rewrite the unmap code x86/mpx: Support 32-bit binaries on 64-bit kernels x86/mpx: Use 32-bit-only cmpxchg() for 32-bit apps x86/mpx: Introduce new 'directory entry' to 'addr' helper function x86/mpx: Add temporary variable to reduce masking x86: Make is_64bit_mm() widely available x86/mpx: Trace allocation of new bounds tables x86/mpx: Trace the attempts to find bounds tables x86/mpx: Trace entry to bounds exception paths x86/mpx: Trace #BR exceptions x86/mpx: Introduce a boot-time disable flag x86/mpx: Restrict the mmap() size check to bounds tables x86/mpx: Remove redundant MPX_BNDCFG_ADDR_MASK x86/mpx: Clean up the code by not passing a task pointer around when unnecessary x86/mpx: Use the new get_xsave_field_ptr()API x86/fpu/xstate: Wrap get_xsave_addr() to make it safer x86/fpu/xstate: Fix up bad get_xsave_addr() assumptions ...
-rw-r--r--Documentation/kernel-parameters.txt4
-rw-r--r--Documentation/preempt-locking.txt2
-rw-r--r--arch/x86/Kconfig.debug12
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c2
-rw-r--r--arch/x86/crypto/camellia_aesni_avx2_glue.c10
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c15
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c15
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c15
-rw-r--r--arch/x86/crypto/crc32-pclmul_glue.c2
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c3
-rw-r--r--arch/x86/crypto/crct10dif-pclmul_glue.c2
-rw-r--r--arch/x86/crypto/fpu.c2
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c2
-rw-r--r--arch/x86/crypto/serpent_avx2_glue.c11
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c15
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb.c5
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c16
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c16
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c16
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c16
-rw-r--r--arch/x86/ia32/ia32_signal.c13
-rw-r--r--arch/x86/include/asm/alternative.h6
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h2
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/fpu-internal.h626
-rw-r--r--arch/x86/include/asm/fpu/api.h48
-rw-r--r--arch/x86/include/asm/fpu/internal.h694
-rw-r--r--arch/x86/include/asm/fpu/regset.h21
-rw-r--r--arch/x86/include/asm/fpu/signal.h33
-rw-r--r--arch/x86/include/asm/fpu/types.h293
-rw-r--r--arch/x86/include/asm/fpu/xstate.h46
-rw-r--r--arch/x86/include/asm/i387.h108
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h13
-rw-r--r--arch/x86/include/asm/mpx.h74
-rw-r--r--arch/x86/include/asm/processor.h160
-rw-r--r--arch/x86/include/asm/simd.h2
-rw-r--r--arch/x86/include/asm/stackprotector.h2
-rw-r--r--arch/x86/include/asm/suspend_32.h2
-rw-r--r--arch/x86/include/asm/suspend_64.h2
-rw-r--r--arch/x86/include/asm/trace/mpx.h132
-rw-r--r--arch/x86/include/asm/user.h12
-rw-r--r--arch/x86/include/asm/xcr.h49
-rw-r--r--arch/x86/include/asm/xor.h2
-rw-r--r--arch/x86/include/asm/xor_32.h2
-rw-r--r--arch/x86/include/asm/xor_avx.h2
-rw-r--r--arch/x86/include/asm/xsave.h257
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h8
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/alternative.c5
-rw-r--r--arch/x86/kernel/cpu/bugs.c55
-rw-r--r--arch/x86/kernel/cpu/common.c47
-rw-r--r--arch/x86/kernel/fpu/Makefile5
-rw-r--r--arch/x86/kernel/fpu/bugs.c71
-rw-r--r--arch/x86/kernel/fpu/core.c523
-rw-r--r--arch/x86/kernel/fpu/init.c354
-rw-r--r--arch/x86/kernel/fpu/regset.c356
-rw-r--r--arch/x86/kernel/fpu/signal.c404
-rw-r--r--arch/x86/kernel/fpu/xstate.c461
-rw-r--r--arch/x86/kernel/i387.c671
-rw-r--r--arch/x86/kernel/process.c54
-rw-r--r--arch/x86/kernel/process_32.c15
-rw-r--r--arch/x86/kernel/process_64.c13
-rw-r--r--arch/x86/kernel/ptrace.c12
-rw-r--r--arch/x86/kernel/signal.c38
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kernel/traps.c134
-rw-r--r--arch/x86/kernel/uprobes.c10
-rw-r--r--arch/x86/kernel/xsave.c724
-rw-r--r--arch/x86/kvm/cpuid.c4
-rw-r--r--arch/x86/kvm/vmx.c5
-rw-r--r--arch/x86/kvm/x86.c68
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/lib/mmx_32.c2
-rw-r--r--arch/x86/math-emu/fpu_aux.c4
-rw-r--r--arch/x86/math-emu/fpu_entry.c20
-rw-r--r--arch/x86/math-emu/fpu_system.h2
-rw-r--r--arch/x86/mm/mpx.c519
-rw-r--r--arch/x86/power/cpu.c11
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--drivers/char/hw_random/via-rng.c2
-rw-r--r--drivers/crypto/padlock-aes.c2
-rw-r--r--drivers/crypto/padlock-sha.c2
-rw-r--r--drivers/lguest/x86/core.c12
-rw-r--r--kernel/sys.c8
-rw-r--r--lib/raid6/x86.h2
86 files changed, 4082 insertions, 3336 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 60c9d6d0fd96..705ad8e66703 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -937,6 +937,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
937 Enable debug messages at boot time. See 937 Enable debug messages at boot time. See
938 Documentation/dynamic-debug-howto.txt for details. 938 Documentation/dynamic-debug-howto.txt for details.
939 939
940 nompx [X86] Disables Intel Memory Protection Extensions.
941 See Documentation/x86/intel_mpx.txt for more
942 information about the feature.
943
940 eagerfpu= [X86] 944 eagerfpu= [X86]
941 on enable eager fpu restore 945 on enable eager fpu restore
942 off disable eager fpu restore 946 off disable eager fpu restore
diff --git a/Documentation/preempt-locking.txt b/Documentation/preempt-locking.txt
index 57883ca2498b..e89ce6624af2 100644
--- a/Documentation/preempt-locking.txt
+++ b/Documentation/preempt-locking.txt
@@ -48,7 +48,7 @@ preemption must be disabled around such regions.
48 48
49Note, some FPU functions are already explicitly preempt safe. For example, 49Note, some FPU functions are already explicitly preempt safe. For example,
50kernel_fpu_begin and kernel_fpu_end will disable and enable preemption. 50kernel_fpu_begin and kernel_fpu_end will disable and enable preemption.
51However, math_state_restore must be called with preemption disabled. 51However, fpu__restore() must be called with preemption disabled.
52 52
53 53
54RULE #3: Lock acquire and release must be performed by same task 54RULE #3: Lock acquire and release must be performed by same task
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 72484a645f05..2fd3ebbb4e33 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -332,4 +332,16 @@ config X86_DEBUG_STATIC_CPU_HAS
332 332
333 If unsure, say N. 333 If unsure, say N.
334 334
335config X86_DEBUG_FPU
336 bool "Debug the x86 FPU code"
337 depends on DEBUG_KERNEL
338 default y
339 ---help---
340 If this option is enabled then there will be extra sanity
341 checks and (boot time) debug printouts added to the kernel.
342 This debugging adds some small amount of runtime overhead
343 to the kernel.
344
345 If unsure, say N.
346
335endmenu 347endmenu
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 112cefacf2af..b419f43ce0c5 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -32,7 +32,7 @@
32#include <crypto/lrw.h> 32#include <crypto/lrw.h>
33#include <crypto/xts.h> 33#include <crypto/xts.h>
34#include <asm/cpu_device_id.h> 34#include <asm/cpu_device_id.h>
35#include <asm/i387.h> 35#include <asm/fpu/api.h>
36#include <asm/crypto/aes.h> 36#include <asm/crypto/aes.h>
37#include <crypto/ablk_helper.h> 37#include <crypto/ablk_helper.h>
38#include <crypto/scatterwalk.h> 38#include <crypto/scatterwalk.h>
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index baf0ac21ace5..4c65c70e628b 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -19,8 +19,7 @@
19#include <crypto/ctr.h> 19#include <crypto/ctr.h>
20#include <crypto/lrw.h> 20#include <crypto/lrw.h>
21#include <crypto/xts.h> 21#include <crypto/xts.h>
22#include <asm/xcr.h> 22#include <asm/fpu/api.h>
23#include <asm/xsave.h>
24#include <asm/crypto/camellia.h> 23#include <asm/crypto/camellia.h>
25#include <asm/crypto/glue_helper.h> 24#include <asm/crypto/glue_helper.h>
26 25
@@ -561,16 +560,15 @@ static struct crypto_alg cmll_algs[10] = { {
561 560
562static int __init camellia_aesni_init(void) 561static int __init camellia_aesni_init(void)
563{ 562{
564 u64 xcr0; 563 const char *feature_name;
565 564
566 if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { 565 if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
567 pr_info("AVX2 or AES-NI instructions are not detected.\n"); 566 pr_info("AVX2 or AES-NI instructions are not detected.\n");
568 return -ENODEV; 567 return -ENODEV;
569 } 568 }
570 569
571 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 570 if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
572 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { 571 pr_info("CPU feature '%s' is not supported.\n", feature_name);
573 pr_info("AVX2 detected but unusable.\n");
574 return -ENODEV; 572 return -ENODEV;
575 } 573 }
576 574
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 78818a1e73e3..80a0e4389c9a 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -19,8 +19,7 @@
19#include <crypto/ctr.h> 19#include <crypto/ctr.h>
20#include <crypto/lrw.h> 20#include <crypto/lrw.h>
21#include <crypto/xts.h> 21#include <crypto/xts.h>
22#include <asm/xcr.h> 22#include <asm/fpu/api.h>
23#include <asm/xsave.h>
24#include <asm/crypto/camellia.h> 23#include <asm/crypto/camellia.h>
25#include <asm/crypto/glue_helper.h> 24#include <asm/crypto/glue_helper.h>
26 25
@@ -553,16 +552,10 @@ static struct crypto_alg cmll_algs[10] = { {
553 552
554static int __init camellia_aesni_init(void) 553static int __init camellia_aesni_init(void)
555{ 554{
556 u64 xcr0; 555 const char *feature_name;
557 556
558 if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { 557 if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
559 pr_info("AVX or AES-NI instructions are not detected.\n"); 558 pr_info("CPU feature '%s' is not supported.\n", feature_name);
560 return -ENODEV;
561 }
562
563 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
564 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
565 pr_info("AVX detected but unusable.\n");
566 return -ENODEV; 559 return -ENODEV;
567 } 560 }
568 561
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 236c80974457..be00aa48b2b5 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -31,8 +31,7 @@
31#include <crypto/cast5.h> 31#include <crypto/cast5.h>
32#include <crypto/cryptd.h> 32#include <crypto/cryptd.h>
33#include <crypto/ctr.h> 33#include <crypto/ctr.h>
34#include <asm/xcr.h> 34#include <asm/fpu/api.h>
35#include <asm/xsave.h>
36#include <asm/crypto/glue_helper.h> 35#include <asm/crypto/glue_helper.h>
37 36
38#define CAST5_PARALLEL_BLOCKS 16 37#define CAST5_PARALLEL_BLOCKS 16
@@ -468,16 +467,10 @@ static struct crypto_alg cast5_algs[6] = { {
468 467
469static int __init cast5_init(void) 468static int __init cast5_init(void)
470{ 469{
471 u64 xcr0; 470 const char *feature_name;
472 471
473 if (!cpu_has_avx || !cpu_has_osxsave) { 472 if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
474 pr_info("AVX instructions are not detected.\n"); 473 pr_info("CPU feature '%s' is not supported.\n", feature_name);
475 return -ENODEV;
476 }
477
478 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
479 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
480 pr_info("AVX detected but unusable.\n");
481 return -ENODEV; 474 return -ENODEV;
482 } 475 }
483 476
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index f448810ca4ac..5dbba7224221 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -36,8 +36,7 @@
36#include <crypto/ctr.h> 36#include <crypto/ctr.h>
37#include <crypto/lrw.h> 37#include <crypto/lrw.h>
38#include <crypto/xts.h> 38#include <crypto/xts.h>
39#include <asm/xcr.h> 39#include <asm/fpu/api.h>
40#include <asm/xsave.h>
41#include <asm/crypto/glue_helper.h> 40#include <asm/crypto/glue_helper.h>
42 41
43#define CAST6_PARALLEL_BLOCKS 8 42#define CAST6_PARALLEL_BLOCKS 8
@@ -590,16 +589,10 @@ static struct crypto_alg cast6_algs[10] = { {
590 589
591static int __init cast6_init(void) 590static int __init cast6_init(void)
592{ 591{
593 u64 xcr0; 592 const char *feature_name;
594 593
595 if (!cpu_has_avx || !cpu_has_osxsave) { 594 if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
596 pr_info("AVX instructions are not detected.\n"); 595 pr_info("CPU feature '%s' is not supported.\n", feature_name);
597 return -ENODEV;
598 }
599
600 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
601 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
602 pr_info("AVX detected but unusable.\n");
603 return -ENODEV; 596 return -ENODEV;
604 } 597 }
605 598
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 1937fc1d8763..07d2c6c86a54 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -35,7 +35,7 @@
35 35
36#include <asm/cpufeature.h> 36#include <asm/cpufeature.h>
37#include <asm/cpu_device_id.h> 37#include <asm/cpu_device_id.h>
38#include <asm/i387.h> 38#include <asm/fpu/api.h>
39 39
40#define CHKSUM_BLOCK_SIZE 1 40#define CHKSUM_BLOCK_SIZE 1
41#define CHKSUM_DIGEST_SIZE 4 41#define CHKSUM_DIGEST_SIZE 4
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 28640c3d6af7..81a595d75cf5 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -32,8 +32,7 @@
32 32
33#include <asm/cpufeature.h> 33#include <asm/cpufeature.h>
34#include <asm/cpu_device_id.h> 34#include <asm/cpu_device_id.h>
35#include <asm/i387.h> 35#include <asm/fpu/internal.h>
36#include <asm/fpu-internal.h>
37 36
38#define CHKSUM_BLOCK_SIZE 1 37#define CHKSUM_BLOCK_SIZE 1
39#define CHKSUM_DIGEST_SIZE 4 38#define CHKSUM_DIGEST_SIZE 4
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index b6c67bf30fdf..a3fcfc97a311 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -29,7 +29,7 @@
29#include <linux/init.h> 29#include <linux/init.h>
30#include <linux/string.h> 30#include <linux/string.h>
31#include <linux/kernel.h> 31#include <linux/kernel.h>
32#include <asm/i387.h> 32#include <asm/fpu/api.h>
33#include <asm/cpufeature.h> 33#include <asm/cpufeature.h>
34#include <asm/cpu_device_id.h> 34#include <asm/cpu_device_id.h>
35 35
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
index f368ba261739..5a2f30f9f52d 100644
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -18,7 +18,7 @@
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/crypto.h> 20#include <linux/crypto.h>
21#include <asm/i387.h> 21#include <asm/fpu/api.h>
22 22
23struct crypto_fpu_ctx { 23struct crypto_fpu_ctx {
24 struct crypto_blkcipher *child; 24 struct crypto_blkcipher *child;
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 2079baf06bdd..64d7cf1b50e1 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -19,7 +19,7 @@
19#include <crypto/cryptd.h> 19#include <crypto/cryptd.h>
20#include <crypto/gf128mul.h> 20#include <crypto/gf128mul.h>
21#include <crypto/internal/hash.h> 21#include <crypto/internal/hash.h>
22#include <asm/i387.h> 22#include <asm/fpu/api.h>
23#include <asm/cpu_device_id.h> 23#include <asm/cpu_device_id.h>
24 24
25#define GHASH_BLOCK_SIZE 16 25#define GHASH_BLOCK_SIZE 16
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 2f63dc89e7a9..7d838dc4d888 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -20,8 +20,7 @@
20#include <crypto/lrw.h> 20#include <crypto/lrw.h>
21#include <crypto/xts.h> 21#include <crypto/xts.h>
22#include <crypto/serpent.h> 22#include <crypto/serpent.h>
23#include <asm/xcr.h> 23#include <asm/fpu/api.h>
24#include <asm/xsave.h>
25#include <asm/crypto/serpent-avx.h> 24#include <asm/crypto/serpent-avx.h>
26#include <asm/crypto/glue_helper.h> 25#include <asm/crypto/glue_helper.h>
27 26
@@ -537,16 +536,14 @@ static struct crypto_alg srp_algs[10] = { {
537 536
538static int __init init(void) 537static int __init init(void)
539{ 538{
540 u64 xcr0; 539 const char *feature_name;
541 540
542 if (!cpu_has_avx2 || !cpu_has_osxsave) { 541 if (!cpu_has_avx2 || !cpu_has_osxsave) {
543 pr_info("AVX2 instructions are not detected.\n"); 542 pr_info("AVX2 instructions are not detected.\n");
544 return -ENODEV; 543 return -ENODEV;
545 } 544 }
546 545 if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
547 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); 546 pr_info("CPU feature '%s' is not supported.\n", feature_name);
548 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
549 pr_info("AVX detected but unusable.\n");
550 return -ENODEV; 547 return -ENODEV;
551 } 548 }
552 549
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index c8d478af8456..da7dafc9b16d 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -36,8 +36,7 @@
36#include <crypto/ctr.h> 36#include <crypto/ctr.h>
37#include <crypto/lrw.h> 37#include <crypto/lrw.h>
38#include <crypto/xts.h> 38#include <crypto/xts.h>
39#include <asm/xcr.h> 39#include <asm/fpu/api.h>
40#include <asm/xsave.h>
41#include <asm/crypto/serpent-avx.h> 40#include <asm/crypto/serpent-avx.h>
42#include <asm/crypto/glue_helper.h> 41#include <asm/crypto/glue_helper.h>
43 42
@@ -596,16 +595,10 @@ static struct crypto_alg serpent_algs[10] = { {
596 595
597static int __init serpent_init(void) 596static int __init serpent_init(void)
598{ 597{
599 u64 xcr0; 598 const char *feature_name;
600 599
601 if (!cpu_has_avx || !cpu_has_osxsave) { 600 if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
602 printk(KERN_INFO "AVX instructions are not detected.\n"); 601 pr_info("CPU feature '%s' is not supported.\n", feature_name);
603 return -ENODEV;
604 }
605
606 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
607 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
608 printk(KERN_INFO "AVX detected but unusable.\n");
609 return -ENODEV; 602 return -ENODEV;
610 } 603 }
611 604
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index e510b1c5d690..f53ed1dc88ea 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -65,11 +65,8 @@
65#include <crypto/mcryptd.h> 65#include <crypto/mcryptd.h>
66#include <crypto/crypto_wq.h> 66#include <crypto/crypto_wq.h>
67#include <asm/byteorder.h> 67#include <asm/byteorder.h>
68#include <asm/i387.h>
69#include <asm/xcr.h>
70#include <asm/xsave.h>
71#include <linux/hardirq.h> 68#include <linux/hardirq.h>
72#include <asm/fpu-internal.h> 69#include <asm/fpu/api.h>
73#include "sha_mb_ctx.h" 70#include "sha_mb_ctx.h"
74 71
75#define FLUSH_INTERVAL 1000 /* in usec */ 72#define FLUSH_INTERVAL 1000 /* in usec */
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 33d1b9dc14cc..7c48e8b20848 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -29,9 +29,7 @@
29#include <linux/types.h> 29#include <linux/types.h>
30#include <crypto/sha.h> 30#include <crypto/sha.h>
31#include <crypto/sha1_base.h> 31#include <crypto/sha1_base.h>
32#include <asm/i387.h> 32#include <asm/fpu/api.h>
33#include <asm/xcr.h>
34#include <asm/xsave.h>
35 33
36 34
37asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, 35asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data,
@@ -123,15 +121,9 @@ static struct shash_alg alg = {
123#ifdef CONFIG_AS_AVX 121#ifdef CONFIG_AS_AVX
124static bool __init avx_usable(void) 122static bool __init avx_usable(void)
125{ 123{
126 u64 xcr0; 124 if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
127 125 if (cpu_has_avx)
128 if (!cpu_has_avx || !cpu_has_osxsave) 126 pr_info("AVX detected but unusable.\n");
129 return false;
130
131 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
132 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
133 pr_info("AVX detected but unusable.\n");
134
135 return false; 127 return false;
136 } 128 }
137 129
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index ccc338881ee8..f8097fc0d1d1 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -37,9 +37,7 @@
37#include <linux/types.h> 37#include <linux/types.h>
38#include <crypto/sha.h> 38#include <crypto/sha.h>
39#include <crypto/sha256_base.h> 39#include <crypto/sha256_base.h>
40#include <asm/i387.h> 40#include <asm/fpu/api.h>
41#include <asm/xcr.h>
42#include <asm/xsave.h>
43#include <linux/string.h> 41#include <linux/string.h>
44 42
45asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, 43asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
@@ -132,15 +130,9 @@ static struct shash_alg algs[] = { {
132#ifdef CONFIG_AS_AVX 130#ifdef CONFIG_AS_AVX
133static bool __init avx_usable(void) 131static bool __init avx_usable(void)
134{ 132{
135 u64 xcr0; 133 if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
136 134 if (cpu_has_avx)
137 if (!cpu_has_avx || !cpu_has_osxsave) 135 pr_info("AVX detected but unusable.\n");
138 return false;
139
140 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
141 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
142 pr_info("AVX detected but unusable.\n");
143
144 return false; 136 return false;
145 } 137 }
146 138
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index d9fa4c1e063f..2edad7b81870 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -35,9 +35,7 @@
35#include <linux/types.h> 35#include <linux/types.h>
36#include <crypto/sha.h> 36#include <crypto/sha.h>
37#include <crypto/sha512_base.h> 37#include <crypto/sha512_base.h>
38#include <asm/i387.h> 38#include <asm/fpu/api.h>
39#include <asm/xcr.h>
40#include <asm/xsave.h>
41 39
42#include <linux/string.h> 40#include <linux/string.h>
43 41
@@ -131,15 +129,9 @@ static struct shash_alg algs[] = { {
131#ifdef CONFIG_AS_AVX 129#ifdef CONFIG_AS_AVX
132static bool __init avx_usable(void) 130static bool __init avx_usable(void)
133{ 131{
134 u64 xcr0; 132 if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
135 133 if (cpu_has_avx)
136 if (!cpu_has_avx || !cpu_has_osxsave) 134 pr_info("AVX detected but unusable.\n");
137 return false;
138
139 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
140 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
141 pr_info("AVX detected but unusable.\n");
142
143 return false; 135 return false;
144 } 136 }
145 137
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index b5e2d5651851..c2bd0ce718ee 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -36,9 +36,7 @@
36#include <crypto/ctr.h> 36#include <crypto/ctr.h>
37#include <crypto/lrw.h> 37#include <crypto/lrw.h>
38#include <crypto/xts.h> 38#include <crypto/xts.h>
39#include <asm/i387.h> 39#include <asm/fpu/api.h>
40#include <asm/xcr.h>
41#include <asm/xsave.h>
42#include <asm/crypto/twofish.h> 40#include <asm/crypto/twofish.h>
43#include <asm/crypto/glue_helper.h> 41#include <asm/crypto/glue_helper.h>
44#include <crypto/scatterwalk.h> 42#include <crypto/scatterwalk.h>
@@ -558,16 +556,10 @@ static struct crypto_alg twofish_algs[10] = { {
558 556
559static int __init twofish_init(void) 557static int __init twofish_init(void)
560{ 558{
561 u64 xcr0; 559 const char *feature_name;
562 560
563 if (!cpu_has_avx || !cpu_has_osxsave) { 561 if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
564 printk(KERN_INFO "AVX instructions are not detected.\n"); 562 pr_info("CPU feature '%s' is not supported.\n", feature_name);
565 return -ENODEV;
566 }
567
568 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
569 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
570 printk(KERN_INFO "AVX detected but unusable.\n");
571 return -ENODEV; 563 return -ENODEV;
572 } 564 }
573 565
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index c81d35e6c7f1..ae3a29ae875b 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -21,8 +21,8 @@
21#include <linux/binfmts.h> 21#include <linux/binfmts.h>
22#include <asm/ucontext.h> 22#include <asm/ucontext.h>
23#include <asm/uaccess.h> 23#include <asm/uaccess.h>
24#include <asm/i387.h> 24#include <asm/fpu/internal.h>
25#include <asm/fpu-internal.h> 25#include <asm/fpu/signal.h>
26#include <asm/ptrace.h> 26#include <asm/ptrace.h>
27#include <asm/ia32_unistd.h> 27#include <asm/ia32_unistd.h>
28#include <asm/user32.h> 28#include <asm/user32.h>
@@ -198,7 +198,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
198 buf = compat_ptr(tmp); 198 buf = compat_ptr(tmp);
199 } get_user_catch(err); 199 } get_user_catch(err);
200 200
201 err |= restore_xstate_sig(buf, 1); 201 err |= fpu__restore_sig(buf, 1);
202 202
203 force_iret(); 203 force_iret();
204 204
@@ -308,6 +308,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
308 size_t frame_size, 308 size_t frame_size,
309 void __user **fpstate) 309 void __user **fpstate)
310{ 310{
311 struct fpu *fpu = &current->thread.fpu;
311 unsigned long sp; 312 unsigned long sp;
312 313
313 /* Default to using normal stack */ 314 /* Default to using normal stack */
@@ -322,12 +323,12 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
322 ksig->ka.sa.sa_restorer) 323 ksig->ka.sa.sa_restorer)
323 sp = (unsigned long) ksig->ka.sa.sa_restorer; 324 sp = (unsigned long) ksig->ka.sa.sa_restorer;
324 325
325 if (used_math()) { 326 if (fpu->fpstate_active) {
326 unsigned long fx_aligned, math_size; 327 unsigned long fx_aligned, math_size;
327 328
328 sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size); 329 sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
329 *fpstate = (struct _fpstate_ia32 __user *) sp; 330 *fpstate = (struct _fpstate_ia32 __user *) sp;
330 if (save_xstate_sig(*fpstate, (void __user *)fx_aligned, 331 if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
331 math_size) < 0) 332 math_size) < 0)
332 return (void __user *) -1L; 333 return (void __user *) -1L;
333 } 334 }
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index ba32af062f61..7bfc85bbb8ff 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -52,6 +52,12 @@ struct alt_instr {
52 u8 padlen; /* length of build-time padding */ 52 u8 padlen; /* length of build-time padding */
53} __packed; 53} __packed;
54 54
55/*
56 * Debug flag that can be tested to see whether alternative
57 * instructions were patched in already:
58 */
59extern int alternatives_patched;
60
55extern void alternative_instructions(void); 61extern void alternative_instructions(void);
56extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); 62extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
57 63
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 1eef55596e82..03bb1065c335 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -7,7 +7,7 @@
7 7
8#include <linux/kernel.h> 8#include <linux/kernel.h>
9#include <linux/crypto.h> 9#include <linux/crypto.h>
10#include <asm/i387.h> 10#include <asm/fpu/api.h>
11#include <crypto/b128ops.h> 11#include <crypto/b128ops.h>
12 12
13typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); 13typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 3738b138b843..155162ea0e00 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -1,7 +1,7 @@
1#ifndef _ASM_X86_EFI_H 1#ifndef _ASM_X86_EFI_H
2#define _ASM_X86_EFI_H 2#define _ASM_X86_EFI_H
3 3
4#include <asm/i387.h> 4#include <asm/fpu/api.h>
5#include <asm/pgtable.h> 5#include <asm/pgtable.h>
6 6
7/* 7/*
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
deleted file mode 100644
index da5e96756570..000000000000
--- a/arch/x86/include/asm/fpu-internal.h
+++ /dev/null
@@ -1,626 +0,0 @@
1/*
2 * Copyright (C) 1994 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * General FPU state handling cleanups
6 * Gareth Hughes <gareth@valinux.com>, May 2000
7 * x86-64 work by Andi Kleen 2002
8 */
9
10#ifndef _FPU_INTERNAL_H
11#define _FPU_INTERNAL_H
12
13#include <linux/kernel_stat.h>
14#include <linux/regset.h>
15#include <linux/compat.h>
16#include <linux/slab.h>
17#include <asm/asm.h>
18#include <asm/cpufeature.h>
19#include <asm/processor.h>
20#include <asm/sigcontext.h>
21#include <asm/user.h>
22#include <asm/uaccess.h>
23#include <asm/xsave.h>
24#include <asm/smap.h>
25
26#ifdef CONFIG_X86_64
27# include <asm/sigcontext32.h>
28# include <asm/user32.h>
29struct ksignal;
30int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
31 compat_sigset_t *set, struct pt_regs *regs);
32int ia32_setup_frame(int sig, struct ksignal *ksig,
33 compat_sigset_t *set, struct pt_regs *regs);
34#else
35# define user_i387_ia32_struct user_i387_struct
36# define user32_fxsr_struct user_fxsr_struct
37# define ia32_setup_frame __setup_frame
38# define ia32_setup_rt_frame __setup_rt_frame
39#endif
40
41extern unsigned int mxcsr_feature_mask;
42extern void fpu_init(void);
43extern void eager_fpu_init(void);
44
45DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
46
47extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
48 struct task_struct *tsk);
49extern void convert_to_fxsr(struct task_struct *tsk,
50 const struct user_i387_ia32_struct *env);
51
52extern user_regset_active_fn fpregs_active, xfpregs_active;
53extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
54 xstateregs_get;
55extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
56 xstateregs_set;
57
58/*
59 * xstateregs_active == fpregs_active. Please refer to the comment
60 * at the definition of fpregs_active.
61 */
62#define xstateregs_active fpregs_active
63
64#ifdef CONFIG_MATH_EMULATION
65extern void finit_soft_fpu(struct i387_soft_struct *soft);
66#else
67static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
68#endif
69
70/*
71 * Must be run with preemption disabled: this clears the fpu_owner_task,
72 * on this CPU.
73 *
74 * This will disable any lazy FPU state restore of the current FPU state,
75 * but if the current thread owns the FPU, it will still be saved by.
76 */
77static inline void __cpu_disable_lazy_restore(unsigned int cpu)
78{
79 per_cpu(fpu_owner_task, cpu) = NULL;
80}
81
82/*
83 * Used to indicate that the FPU state in memory is newer than the FPU
84 * state in registers, and the FPU state should be reloaded next time the
85 * task is run. Only safe on the current task, or non-running tasks.
86 */
87static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
88{
89 tsk->thread.fpu.last_cpu = ~0;
90}
91
92static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
93{
94 return new == this_cpu_read_stable(fpu_owner_task) &&
95 cpu == new->thread.fpu.last_cpu;
96}
97
98static inline int is_ia32_compat_frame(void)
99{
100 return config_enabled(CONFIG_IA32_EMULATION) &&
101 test_thread_flag(TIF_IA32);
102}
103
104static inline int is_ia32_frame(void)
105{
106 return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
107}
108
109static inline int is_x32_frame(void)
110{
111 return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
112}
113
114#define X87_FSW_ES (1 << 7) /* Exception Summary */
115
116static __always_inline __pure bool use_eager_fpu(void)
117{
118 return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
119}
120
121static __always_inline __pure bool use_xsaveopt(void)
122{
123 return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
124}
125
126static __always_inline __pure bool use_xsave(void)
127{
128 return static_cpu_has_safe(X86_FEATURE_XSAVE);
129}
130
131static __always_inline __pure bool use_fxsr(void)
132{
133 return static_cpu_has_safe(X86_FEATURE_FXSR);
134}
135
136static inline void fx_finit(struct i387_fxsave_struct *fx)
137{
138 fx->cwd = 0x37f;
139 fx->mxcsr = MXCSR_DEFAULT;
140}
141
142extern void __sanitize_i387_state(struct task_struct *);
143
144static inline void sanitize_i387_state(struct task_struct *tsk)
145{
146 if (!use_xsaveopt())
147 return;
148 __sanitize_i387_state(tsk);
149}
150
151#define user_insn(insn, output, input...) \
152({ \
153 int err; \
154 asm volatile(ASM_STAC "\n" \
155 "1:" #insn "\n\t" \
156 "2: " ASM_CLAC "\n" \
157 ".section .fixup,\"ax\"\n" \
158 "3: movl $-1,%[err]\n" \
159 " jmp 2b\n" \
160 ".previous\n" \
161 _ASM_EXTABLE(1b, 3b) \
162 : [err] "=r" (err), output \
163 : "0"(0), input); \
164 err; \
165})
166
167#define check_insn(insn, output, input...) \
168({ \
169 int err; \
170 asm volatile("1:" #insn "\n\t" \
171 "2:\n" \
172 ".section .fixup,\"ax\"\n" \
173 "3: movl $-1,%[err]\n" \
174 " jmp 2b\n" \
175 ".previous\n" \
176 _ASM_EXTABLE(1b, 3b) \
177 : [err] "=r" (err), output \
178 : "0"(0), input); \
179 err; \
180})
181
182static inline int fsave_user(struct i387_fsave_struct __user *fx)
183{
184 return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
185}
186
187static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
188{
189 if (config_enabled(CONFIG_X86_32))
190 return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
191 else if (config_enabled(CONFIG_AS_FXSAVEQ))
192 return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
193
194 /* See comment in fpu_fxsave() below. */
195 return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
196}
197
198static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
199{
200 if (config_enabled(CONFIG_X86_32))
201 return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
202 else if (config_enabled(CONFIG_AS_FXSAVEQ))
203 return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
204
205 /* See comment in fpu_fxsave() below. */
206 return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
207 "m" (*fx));
208}
209
210static inline int fxrstor_user(struct i387_fxsave_struct __user *fx)
211{
212 if (config_enabled(CONFIG_X86_32))
213 return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
214 else if (config_enabled(CONFIG_AS_FXSAVEQ))
215 return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
216
217 /* See comment in fpu_fxsave() below. */
218 return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
219 "m" (*fx));
220}
221
222static inline int frstor_checking(struct i387_fsave_struct *fx)
223{
224 return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
225}
226
227static inline int frstor_user(struct i387_fsave_struct __user *fx)
228{
229 return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
230}
231
232static inline void fpu_fxsave(struct fpu *fpu)
233{
234 if (config_enabled(CONFIG_X86_32))
235 asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave));
236 else if (config_enabled(CONFIG_AS_FXSAVEQ))
237 asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave));
238 else {
239 /* Using "rex64; fxsave %0" is broken because, if the memory
240 * operand uses any extended registers for addressing, a second
241 * REX prefix will be generated (to the assembler, rex64
242 * followed by semicolon is a separate instruction), and hence
243 * the 64-bitness is lost.
244 *
245 * Using "fxsaveq %0" would be the ideal choice, but is only
246 * supported starting with gas 2.16.
247 *
248 * Using, as a workaround, the properly prefixed form below
249 * isn't accepted by any binutils version so far released,
250 * complaining that the same type of prefix is used twice if
251 * an extended register is needed for addressing (fix submitted
252 * to mainline 2005-11-21).
253 *
254 * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave));
255 *
256 * This, however, we can work around by forcing the compiler to
257 * select an addressing mode that doesn't require extended
258 * registers.
259 */
260 asm volatile( "rex64/fxsave (%[fx])"
261 : "=m" (fpu->state->fxsave)
262 : [fx] "R" (&fpu->state->fxsave));
263 }
264}
265
266/*
267 * These must be called with preempt disabled. Returns
268 * 'true' if the FPU state is still intact.
269 */
270static inline int fpu_save_init(struct fpu *fpu)
271{
272 if (use_xsave()) {
273 fpu_xsave(fpu);
274
275 /*
276 * xsave header may indicate the init state of the FP.
277 */
278 if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
279 return 1;
280 } else if (use_fxsr()) {
281 fpu_fxsave(fpu);
282 } else {
283 asm volatile("fnsave %[fx]; fwait"
284 : [fx] "=m" (fpu->state->fsave));
285 return 0;
286 }
287
288 /*
289 * If exceptions are pending, we need to clear them so
290 * that we don't randomly get exceptions later.
291 *
292 * FIXME! Is this perhaps only true for the old-style
293 * irq13 case? Maybe we could leave the x87 state
294 * intact otherwise?
295 */
296 if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
297 asm volatile("fnclex");
298 return 0;
299 }
300 return 1;
301}
302
303static inline int __save_init_fpu(struct task_struct *tsk)
304{
305 return fpu_save_init(&tsk->thread.fpu);
306}
307
308static inline int fpu_restore_checking(struct fpu *fpu)
309{
310 if (use_xsave())
311 return fpu_xrstor_checking(&fpu->state->xsave);
312 else if (use_fxsr())
313 return fxrstor_checking(&fpu->state->fxsave);
314 else
315 return frstor_checking(&fpu->state->fsave);
316}
317
318static inline int restore_fpu_checking(struct task_struct *tsk)
319{
320 /*
321 * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
322 * pending. Clear the x87 state here by setting it to fixed values.
323 * "m" is a random variable that should be in L1.
324 */
325 if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
326 asm volatile(
327 "fnclex\n\t"
328 "emms\n\t"
329 "fildl %P[addr]" /* set F?P to defined value */
330 : : [addr] "m" (tsk->thread.fpu.has_fpu));
331 }
332
333 return fpu_restore_checking(&tsk->thread.fpu);
334}
335
336/*
337 * Software FPU state helpers. Careful: these need to
338 * be preemption protection *and* they need to be
339 * properly paired with the CR0.TS changes!
340 */
341static inline int __thread_has_fpu(struct task_struct *tsk)
342{
343 return tsk->thread.fpu.has_fpu;
344}
345
346/* Must be paired with an 'stts' after! */
347static inline void __thread_clear_has_fpu(struct task_struct *tsk)
348{
349 tsk->thread.fpu.has_fpu = 0;
350 this_cpu_write(fpu_owner_task, NULL);
351}
352
353/* Must be paired with a 'clts' before! */
354static inline void __thread_set_has_fpu(struct task_struct *tsk)
355{
356 tsk->thread.fpu.has_fpu = 1;
357 this_cpu_write(fpu_owner_task, tsk);
358}
359
360/*
361 * Encapsulate the CR0.TS handling together with the
362 * software flag.
363 *
364 * These generally need preemption protection to work,
365 * do try to avoid using these on their own.
366 */
367static inline void __thread_fpu_end(struct task_struct *tsk)
368{
369 __thread_clear_has_fpu(tsk);
370 if (!use_eager_fpu())
371 stts();
372}
373
374static inline void __thread_fpu_begin(struct task_struct *tsk)
375{
376 if (!use_eager_fpu())
377 clts();
378 __thread_set_has_fpu(tsk);
379}
380
381static inline void drop_fpu(struct task_struct *tsk)
382{
383 /*
384 * Forget coprocessor state..
385 */
386 preempt_disable();
387 tsk->thread.fpu_counter = 0;
388
389 if (__thread_has_fpu(tsk)) {
390 /* Ignore delayed exceptions from user space */
391 asm volatile("1: fwait\n"
392 "2:\n"
393 _ASM_EXTABLE(1b, 2b));
394 __thread_fpu_end(tsk);
395 }
396
397 clear_stopped_child_used_math(tsk);
398 preempt_enable();
399}
400
401static inline void restore_init_xstate(void)
402{
403 if (use_xsave())
404 xrstor_state(init_xstate_buf, -1);
405 else
406 fxrstor_checking(&init_xstate_buf->i387);
407}
408
409/*
410 * Reset the FPU state in the eager case and drop it in the lazy case (later use
411 * will reinit it).
412 */
413static inline void fpu_reset_state(struct task_struct *tsk)
414{
415 if (!use_eager_fpu())
416 drop_fpu(tsk);
417 else
418 restore_init_xstate();
419}
420
421/*
422 * FPU state switching for scheduling.
423 *
424 * This is a two-stage process:
425 *
426 * - switch_fpu_prepare() saves the old state and
427 * sets the new state of the CR0.TS bit. This is
428 * done within the context of the old process.
429 *
430 * - switch_fpu_finish() restores the new state as
431 * necessary.
432 */
433typedef struct { int preload; } fpu_switch_t;
434
435static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
436{
437 fpu_switch_t fpu;
438
439 /*
440 * If the task has used the math, pre-load the FPU on xsave processors
441 * or if the past 5 consecutive context-switches used math.
442 */
443 fpu.preload = tsk_used_math(new) &&
444 (use_eager_fpu() || new->thread.fpu_counter > 5);
445
446 if (__thread_has_fpu(old)) {
447 if (!__save_init_fpu(old))
448 task_disable_lazy_fpu_restore(old);
449 else
450 old->thread.fpu.last_cpu = cpu;
451
452 /* But leave fpu_owner_task! */
453 old->thread.fpu.has_fpu = 0;
454
455 /* Don't change CR0.TS if we just switch! */
456 if (fpu.preload) {
457 new->thread.fpu_counter++;
458 __thread_set_has_fpu(new);
459 prefetch(new->thread.fpu.state);
460 } else if (!use_eager_fpu())
461 stts();
462 } else {
463 old->thread.fpu_counter = 0;
464 task_disable_lazy_fpu_restore(old);
465 if (fpu.preload) {
466 new->thread.fpu_counter++;
467 if (fpu_lazy_restore(new, cpu))
468 fpu.preload = 0;
469 else
470 prefetch(new->thread.fpu.state);
471 __thread_fpu_begin(new);
472 }
473 }
474 return fpu;
475}
476
477/*
478 * By the time this gets called, we've already cleared CR0.TS and
479 * given the process the FPU if we are going to preload the FPU
480 * state - all we need to do is to conditionally restore the register
481 * state itself.
482 */
483static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
484{
485 if (fpu.preload) {
486 if (unlikely(restore_fpu_checking(new)))
487 fpu_reset_state(new);
488 }
489}
490
491/*
492 * Signal frame handlers...
493 */
494extern int save_xstate_sig(void __user *buf, void __user *fx, int size);
495extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size);
496
497static inline int xstate_sigframe_size(void)
498{
499 return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
500}
501
502static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
503{
504 void __user *buf_fx = buf;
505 int size = xstate_sigframe_size();
506
507 if (ia32_frame && use_fxsr()) {
508 buf_fx = buf + sizeof(struct i387_fsave_struct);
509 size += sizeof(struct i387_fsave_struct);
510 }
511
512 return __restore_xstate_sig(buf, buf_fx, size);
513}
514
515/*
516 * Needs to be preemption-safe.
517 *
518 * NOTE! user_fpu_begin() must be used only immediately before restoring
519 * the save state. It does not do any saving/restoring on its own. In
520 * lazy FPU mode, it is just an optimization to avoid a #NM exception,
521 * the task can lose the FPU right after preempt_enable().
522 */
523static inline void user_fpu_begin(void)
524{
525 preempt_disable();
526 if (!user_has_fpu())
527 __thread_fpu_begin(current);
528 preempt_enable();
529}
530
531static inline void __save_fpu(struct task_struct *tsk)
532{
533 if (use_xsave()) {
534 if (unlikely(system_state == SYSTEM_BOOTING))
535 xsave_state_booting(&tsk->thread.fpu.state->xsave, -1);
536 else
537 xsave_state(&tsk->thread.fpu.state->xsave, -1);
538 } else
539 fpu_fxsave(&tsk->thread.fpu);
540}
541
542/*
543 * i387 state interaction
544 */
545static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
546{
547 if (cpu_has_fxsr) {
548 return tsk->thread.fpu.state->fxsave.cwd;
549 } else {
550 return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
551 }
552}
553
554static inline unsigned short get_fpu_swd(struct task_struct *tsk)
555{
556 if (cpu_has_fxsr) {
557 return tsk->thread.fpu.state->fxsave.swd;
558 } else {
559 return (unsigned short)tsk->thread.fpu.state->fsave.swd;
560 }
561}
562
563static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
564{
565 if (cpu_has_xmm) {
566 return tsk->thread.fpu.state->fxsave.mxcsr;
567 } else {
568 return MXCSR_DEFAULT;
569 }
570}
571
572static bool fpu_allocated(struct fpu *fpu)
573{
574 return fpu->state != NULL;
575}
576
577static inline int fpu_alloc(struct fpu *fpu)
578{
579 if (fpu_allocated(fpu))
580 return 0;
581 fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
582 if (!fpu->state)
583 return -ENOMEM;
584 WARN_ON((unsigned long)fpu->state & 15);
585 return 0;
586}
587
588static inline void fpu_free(struct fpu *fpu)
589{
590 if (fpu->state) {
591 kmem_cache_free(task_xstate_cachep, fpu->state);
592 fpu->state = NULL;
593 }
594}
595
596static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
597{
598 if (use_eager_fpu()) {
599 memset(&dst->thread.fpu.state->xsave, 0, xstate_size);
600 __save_fpu(dst);
601 } else {
602 struct fpu *dfpu = &dst->thread.fpu;
603 struct fpu *sfpu = &src->thread.fpu;
604
605 unlazy_fpu(src);
606 memcpy(dfpu->state, sfpu->state, xstate_size);
607 }
608}
609
610static inline unsigned long
611alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx,
612 unsigned long *size)
613{
614 unsigned long frame_size = xstate_sigframe_size();
615
616 *buf_fx = sp = round_down(sp - frame_size, 64);
617 if (ia32_frame && use_fxsr()) {
618 frame_size += sizeof(struct i387_fsave_struct);
619 sp -= sizeof(struct i387_fsave_struct);
620 }
621
622 *size = frame_size;
623 return sp;
624}
625
626#endif
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
new file mode 100644
index 000000000000..1429a7c736db
--- /dev/null
+++ b/arch/x86/include/asm/fpu/api.h
@@ -0,0 +1,48 @@
1/*
2 * Copyright (C) 1994 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * General FPU state handling cleanups
6 * Gareth Hughes <gareth@valinux.com>, May 2000
7 * x86-64 work by Andi Kleen 2002
8 */
9
10#ifndef _ASM_X86_FPU_API_H
11#define _ASM_X86_FPU_API_H
12
13/*
14 * Careful: __kernel_fpu_begin/end() must be called with preempt disabled
15 * and they don't touch the preempt state on their own.
16 * If you enable preemption after __kernel_fpu_begin(), preempt notifier
17 * should call the __kernel_fpu_end() to prevent the kernel/user FPU
18 * state from getting corrupted. KVM for example uses this model.
19 *
20 * All other cases use kernel_fpu_begin/end() which disable preemption
21 * during kernel FPU usage.
22 */
23extern void __kernel_fpu_begin(void);
24extern void __kernel_fpu_end(void);
25extern void kernel_fpu_begin(void);
26extern void kernel_fpu_end(void);
27extern bool irq_fpu_usable(void);
28
29/*
30 * Some instructions like VIA's padlock instructions generate a spurious
31 * DNA fault but don't modify SSE registers. And these instructions
32 * get used from interrupt context as well. To prevent these kernel instructions
33 * in interrupt context interacting wrongly with other user/kernel fpu usage, we
34 * should use them only in the context of irq_ts_save/restore()
35 */
36extern int irq_ts_save(void);
37extern void irq_ts_restore(int TS_state);
38
39/*
40 * Query the presence of one or more xfeatures. Works on any legacy CPU as well.
41 *
42 * If 'feature_name' is set then put a human-readable description of
43 * the feature there as well - this can be used to print error (or success)
44 * messages.
45 */
46extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
47
48#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
new file mode 100644
index 000000000000..3c3550c3a4a3
--- /dev/null
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -0,0 +1,694 @@
1/*
2 * Copyright (C) 1994 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * General FPU state handling cleanups
6 * Gareth Hughes <gareth@valinux.com>, May 2000
7 * x86-64 work by Andi Kleen 2002
8 */
9
10#ifndef _ASM_X86_FPU_INTERNAL_H
11#define _ASM_X86_FPU_INTERNAL_H
12
13#include <linux/compat.h>
14#include <linux/sched.h>
15#include <linux/slab.h>
16
17#include <asm/user.h>
18#include <asm/fpu/api.h>
19#include <asm/fpu/xstate.h>
20
21/*
22 * High level FPU state handling functions:
23 */
24extern void fpu__activate_curr(struct fpu *fpu);
25extern void fpu__activate_fpstate_read(struct fpu *fpu);
26extern void fpu__activate_fpstate_write(struct fpu *fpu);
27extern void fpu__save(struct fpu *fpu);
28extern void fpu__restore(struct fpu *fpu);
29extern int fpu__restore_sig(void __user *buf, int ia32_frame);
30extern void fpu__drop(struct fpu *fpu);
31extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu);
32extern void fpu__clear(struct fpu *fpu);
33extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
34extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
35
36/*
37 * Boot time FPU initialization functions:
38 */
39extern void fpu__init_cpu(void);
40extern void fpu__init_system_xstate(void);
41extern void fpu__init_cpu_xstate(void);
42extern void fpu__init_system(struct cpuinfo_x86 *c);
43extern void fpu__init_check_bugs(void);
44extern void fpu__resume_cpu(void);
45
46/*
47 * Debugging facility:
48 */
49#ifdef CONFIG_X86_DEBUG_FPU
50# define WARN_ON_FPU(x) WARN_ON_ONCE(x)
51#else
52# define WARN_ON_FPU(x) ({ (void)(x); 0; })
53#endif
54
55/*
56 * FPU related CPU feature flag helper routines:
57 */
58static __always_inline __pure bool use_eager_fpu(void)
59{
60 return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
61}
62
63static __always_inline __pure bool use_xsaveopt(void)
64{
65 return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
66}
67
68static __always_inline __pure bool use_xsave(void)
69{
70 return static_cpu_has_safe(X86_FEATURE_XSAVE);
71}
72
73static __always_inline __pure bool use_fxsr(void)
74{
75 return static_cpu_has_safe(X86_FEATURE_FXSR);
76}
77
78/*
79 * fpstate handling functions:
80 */
81
82extern union fpregs_state init_fpstate;
83
84extern void fpstate_init(union fpregs_state *state);
85#ifdef CONFIG_MATH_EMULATION
86extern void fpstate_init_soft(struct swregs_state *soft);
87#else
88static inline void fpstate_init_soft(struct swregs_state *soft) {}
89#endif
90static inline void fpstate_init_fxstate(struct fxregs_state *fx)
91{
92 fx->cwd = 0x37f;
93 fx->mxcsr = MXCSR_DEFAULT;
94}
95extern void fpstate_sanitize_xstate(struct fpu *fpu);
96
97#define user_insn(insn, output, input...) \
98({ \
99 int err; \
100 asm volatile(ASM_STAC "\n" \
101 "1:" #insn "\n\t" \
102 "2: " ASM_CLAC "\n" \
103 ".section .fixup,\"ax\"\n" \
104 "3: movl $-1,%[err]\n" \
105 " jmp 2b\n" \
106 ".previous\n" \
107 _ASM_EXTABLE(1b, 3b) \
108 : [err] "=r" (err), output \
109 : "0"(0), input); \
110 err; \
111})
112
113#define check_insn(insn, output, input...) \
114({ \
115 int err; \
116 asm volatile("1:" #insn "\n\t" \
117 "2:\n" \
118 ".section .fixup,\"ax\"\n" \
119 "3: movl $-1,%[err]\n" \
120 " jmp 2b\n" \
121 ".previous\n" \
122 _ASM_EXTABLE(1b, 3b) \
123 : [err] "=r" (err), output \
124 : "0"(0), input); \
125 err; \
126})
127
128static inline int copy_fregs_to_user(struct fregs_state __user *fx)
129{
130 return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
131}
132
133static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
134{
135 if (config_enabled(CONFIG_X86_32))
136 return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
137 else if (config_enabled(CONFIG_AS_FXSAVEQ))
138 return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
139
140 /* See comment in copy_fxregs_to_kernel() below. */
141 return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
142}
143
144static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
145{
146 int err;
147
148 if (config_enabled(CONFIG_X86_32)) {
149 err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
150 } else {
151 if (config_enabled(CONFIG_AS_FXSAVEQ)) {
152 err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
153 } else {
154 /* See comment in copy_fxregs_to_kernel() below. */
155 err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
156 }
157 }
158 /* Copying from a kernel buffer to FPU registers should never fail: */
159 WARN_ON_FPU(err);
160}
161
162static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
163{
164 if (config_enabled(CONFIG_X86_32))
165 return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
166 else if (config_enabled(CONFIG_AS_FXSAVEQ))
167 return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
168
169 /* See comment in copy_fxregs_to_kernel() below. */
170 return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
171 "m" (*fx));
172}
173
174static inline void copy_kernel_to_fregs(struct fregs_state *fx)
175{
176 int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
177
178 WARN_ON_FPU(err);
179}
180
181static inline int copy_user_to_fregs(struct fregs_state __user *fx)
182{
183 return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
184}
185
186static inline void copy_fxregs_to_kernel(struct fpu *fpu)
187{
188 if (config_enabled(CONFIG_X86_32))
189 asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
190 else if (config_enabled(CONFIG_AS_FXSAVEQ))
191 asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
192 else {
193 /* Using "rex64; fxsave %0" is broken because, if the memory
194 * operand uses any extended registers for addressing, a second
195 * REX prefix will be generated (to the assembler, rex64
196 * followed by semicolon is a separate instruction), and hence
197 * the 64-bitness is lost.
198 *
199 * Using "fxsaveq %0" would be the ideal choice, but is only
200 * supported starting with gas 2.16.
201 *
202 * Using, as a workaround, the properly prefixed form below
203 * isn't accepted by any binutils version so far released,
204 * complaining that the same type of prefix is used twice if
205 * an extended register is needed for addressing (fix submitted
206 * to mainline 2005-11-21).
207 *
208 * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
209 *
210 * This, however, we can work around by forcing the compiler to
211 * select an addressing mode that doesn't require extended
212 * registers.
213 */
214 asm volatile( "rex64/fxsave (%[fx])"
215 : "=m" (fpu->state.fxsave)
216 : [fx] "R" (&fpu->state.fxsave));
217 }
218}
219
220/* These macros all use (%edi)/(%rdi) as the single memory argument. */
221#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
222#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
223#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
224#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
225#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
226
227/* xstate instruction fault handler: */
228#define xstate_fault(__err) \
229 \
230 ".section .fixup,\"ax\"\n" \
231 \
232 "3: movl $-2,%[_err]\n" \
233 " jmp 2b\n" \
234 \
235 ".previous\n" \
236 \
237 _ASM_EXTABLE(1b, 3b) \
238 : [_err] "=r" (__err)
239
240/*
241 * This function is called only during boot time when x86 caps are not set
242 * up and alternative can not be used yet.
243 */
244static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
245{
246 u64 mask = -1;
247 u32 lmask = mask;
248 u32 hmask = mask >> 32;
249 int err = 0;
250
251 WARN_ON(system_state != SYSTEM_BOOTING);
252
253 if (boot_cpu_has(X86_FEATURE_XSAVES))
254 asm volatile("1:"XSAVES"\n\t"
255 "2:\n\t"
256 xstate_fault(err)
257 : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
258 : "memory");
259 else
260 asm volatile("1:"XSAVE"\n\t"
261 "2:\n\t"
262 xstate_fault(err)
263 : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
264 : "memory");
265
266 /* We should never fault when copying to a kernel buffer: */
267 WARN_ON_FPU(err);
268}
269
270/*
271 * This function is called only during boot time when x86 caps are not set
272 * up and alternative can not be used yet.
273 */
274static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
275{
276 u64 mask = -1;
277 u32 lmask = mask;
278 u32 hmask = mask >> 32;
279 int err = 0;
280
281 WARN_ON(system_state != SYSTEM_BOOTING);
282
283 if (boot_cpu_has(X86_FEATURE_XSAVES))
284 asm volatile("1:"XRSTORS"\n\t"
285 "2:\n\t"
286 xstate_fault(err)
287 : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
288 : "memory");
289 else
290 asm volatile("1:"XRSTOR"\n\t"
291 "2:\n\t"
292 xstate_fault(err)
293 : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
294 : "memory");
295
296 /* We should never fault when copying from a kernel buffer: */
297 WARN_ON_FPU(err);
298}
299
300/*
301 * Save processor xstate to xsave area.
302 */
303static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
304{
305 u64 mask = -1;
306 u32 lmask = mask;
307 u32 hmask = mask >> 32;
308 int err = 0;
309
310 WARN_ON(!alternatives_patched);
311
312 /*
313 * If xsaves is enabled, xsaves replaces xsaveopt because
314 * it supports compact format and supervisor states in addition to
315 * modified optimization in xsaveopt.
316 *
317 * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
318 * because xsaveopt supports modified optimization which is not
319 * supported by xsave.
320 *
321 * If none of xsaves and xsaveopt is enabled, use xsave.
322 */
323 alternative_input_2(
324 "1:"XSAVE,
325 XSAVEOPT,
326 X86_FEATURE_XSAVEOPT,
327 XSAVES,
328 X86_FEATURE_XSAVES,
329 [xstate] "D" (xstate), "a" (lmask), "d" (hmask) :
330 "memory");
331 asm volatile("2:\n\t"
332 xstate_fault(err)
333 : "0" (err)
334 : "memory");
335
336 /* We should never fault when copying to a kernel buffer: */
337 WARN_ON_FPU(err);
338}
339
340/*
341 * Restore processor xstate from xsave area.
342 */
343static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
344{
345 u32 lmask = mask;
346 u32 hmask = mask >> 32;
347 int err = 0;
348
349 /*
350 * Use xrstors to restore context if it is enabled. xrstors supports
351 * compacted format of xsave area which is not supported by xrstor.
352 */
353 alternative_input(
354 "1: " XRSTOR,
355 XRSTORS,
356 X86_FEATURE_XSAVES,
357 "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask)
358 : "memory");
359
360 asm volatile("2:\n"
361 xstate_fault(err)
362 : "0" (err)
363 : "memory");
364
365 /* We should never fault when copying from a kernel buffer: */
366 WARN_ON_FPU(err);
367}
368
369/*
370 * Save xstate to user space xsave area.
371 *
372 * We don't use modified optimization because xrstor/xrstors might track
373 * a different application.
374 *
375 * We don't use compacted format xsave area for
376 * backward compatibility for old applications which don't understand
377 * compacted format of xsave area.
378 */
379static inline int copy_xregs_to_user(struct xregs_state __user *buf)
380{
381 int err;
382
383 /*
384 * Clear the xsave header first, so that reserved fields are
385 * initialized to zero.
386 */
387 err = __clear_user(&buf->header, sizeof(buf->header));
388 if (unlikely(err))
389 return -EFAULT;
390
391 __asm__ __volatile__(ASM_STAC "\n"
392 "1:"XSAVE"\n"
393 "2: " ASM_CLAC "\n"
394 xstate_fault(err)
395 : "D" (buf), "a" (-1), "d" (-1), "0" (err)
396 : "memory");
397 return err;
398}
399
400/*
401 * Restore xstate from user space xsave area.
402 */
403static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
404{
405 struct xregs_state *xstate = ((__force struct xregs_state *)buf);
406 u32 lmask = mask;
407 u32 hmask = mask >> 32;
408 int err = 0;
409
410 __asm__ __volatile__(ASM_STAC "\n"
411 "1:"XRSTOR"\n"
412 "2: " ASM_CLAC "\n"
413 xstate_fault(err)
414 : "D" (xstate), "a" (lmask), "d" (hmask), "0" (err)
415 : "memory"); /* memory required? */
416 return err;
417}
418
419/*
420 * These must be called with preempt disabled. Returns
421 * 'true' if the FPU state is still intact and we can
422 * keep registers active.
423 *
424 * The legacy FNSAVE instruction cleared all FPU state
425 * unconditionally, so registers are essentially destroyed.
426 * Modern FPU state can be kept in registers, if there are
427 * no pending FP exceptions.
428 */
429static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
430{
431 if (likely(use_xsave())) {
432 copy_xregs_to_kernel(&fpu->state.xsave);
433 return 1;
434 }
435
436 if (likely(use_fxsr())) {
437 copy_fxregs_to_kernel(fpu);
438 return 1;
439 }
440
441 /*
442 * Legacy FPU register saving, FNSAVE always clears FPU registers,
443 * so we have to mark them inactive:
444 */
445 asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
446
447 return 0;
448}
449
450static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate)
451{
452 if (use_xsave()) {
453 copy_kernel_to_xregs(&fpstate->xsave, -1);
454 } else {
455 if (use_fxsr())
456 copy_kernel_to_fxregs(&fpstate->fxsave);
457 else
458 copy_kernel_to_fregs(&fpstate->fsave);
459 }
460}
461
462static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
463{
464 /*
465 * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
466 * pending. Clear the x87 state here by setting it to fixed values.
467 * "m" is a random variable that should be in L1.
468 */
469 if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
470 asm volatile(
471 "fnclex\n\t"
472 "emms\n\t"
473 "fildl %P[addr]" /* set F?P to defined value */
474 : : [addr] "m" (fpstate));
475 }
476
477 __copy_kernel_to_fpregs(fpstate);
478}
479
480extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
481
482/*
483 * FPU context switch related helper methods:
484 */
485
486DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
487
488/*
489 * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx,
490 * on this CPU.
491 *
492 * This will disable any lazy FPU state restore of the current FPU state,
493 * but if the current thread owns the FPU, it will still be saved by.
494 */
495static inline void __cpu_disable_lazy_restore(unsigned int cpu)
496{
497 per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
498}
499
500static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
501{
502 return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
503}
504
505
506/*
507 * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
508 * idiom, which is then paired with the sw-flag (fpregs_active) later on:
509 */
510
511static inline void __fpregs_activate_hw(void)
512{
513 if (!use_eager_fpu())
514 clts();
515}
516
517static inline void __fpregs_deactivate_hw(void)
518{
519 if (!use_eager_fpu())
520 stts();
521}
522
523/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
524static inline void __fpregs_deactivate(struct fpu *fpu)
525{
526 WARN_ON_FPU(!fpu->fpregs_active);
527
528 fpu->fpregs_active = 0;
529 this_cpu_write(fpu_fpregs_owner_ctx, NULL);
530}
531
532/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
533static inline void __fpregs_activate(struct fpu *fpu)
534{
535 WARN_ON_FPU(fpu->fpregs_active);
536
537 fpu->fpregs_active = 1;
538 this_cpu_write(fpu_fpregs_owner_ctx, fpu);
539}
540
541/*
542 * The question "does this thread have fpu access?"
543 * is slightly racy, since preemption could come in
544 * and revoke it immediately after the test.
545 *
546 * However, even in that very unlikely scenario,
547 * we can just assume we have FPU access - typically
548 * to save the FP state - we'll just take a #NM
549 * fault and get the FPU access back.
550 */
551static inline int fpregs_active(void)
552{
553 return current->thread.fpu.fpregs_active;
554}
555
556/*
557 * Encapsulate the CR0.TS handling together with the
558 * software flag.
559 *
560 * These generally need preemption protection to work,
561 * do try to avoid using these on their own.
562 */
563static inline void fpregs_activate(struct fpu *fpu)
564{
565 __fpregs_activate_hw();
566 __fpregs_activate(fpu);
567}
568
569static inline void fpregs_deactivate(struct fpu *fpu)
570{
571 __fpregs_deactivate(fpu);
572 __fpregs_deactivate_hw();
573}
574
575/*
576 * FPU state switching for scheduling.
577 *
578 * This is a two-stage process:
579 *
580 * - switch_fpu_prepare() saves the old state and
581 * sets the new state of the CR0.TS bit. This is
582 * done within the context of the old process.
583 *
584 * - switch_fpu_finish() restores the new state as
585 * necessary.
586 */
587typedef struct { int preload; } fpu_switch_t;
588
589static inline fpu_switch_t
590switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
591{
592 fpu_switch_t fpu;
593
594 /*
595 * If the task has used the math, pre-load the FPU on xsave processors
596 * or if the past 5 consecutive context-switches used math.
597 */
598 fpu.preload = new_fpu->fpstate_active &&
599 (use_eager_fpu() || new_fpu->counter > 5);
600
601 if (old_fpu->fpregs_active) {
602 if (!copy_fpregs_to_fpstate(old_fpu))
603 old_fpu->last_cpu = -1;
604 else
605 old_fpu->last_cpu = cpu;
606
607 /* But leave fpu_fpregs_owner_ctx! */
608 old_fpu->fpregs_active = 0;
609
610 /* Don't change CR0.TS if we just switch! */
611 if (fpu.preload) {
612 new_fpu->counter++;
613 __fpregs_activate(new_fpu);
614 prefetch(&new_fpu->state);
615 } else {
616 __fpregs_deactivate_hw();
617 }
618 } else {
619 old_fpu->counter = 0;
620 old_fpu->last_cpu = -1;
621 if (fpu.preload) {
622 new_fpu->counter++;
623 if (fpu_want_lazy_restore(new_fpu, cpu))
624 fpu.preload = 0;
625 else
626 prefetch(&new_fpu->state);
627 fpregs_activate(new_fpu);
628 }
629 }
630 return fpu;
631}
632
633/*
634 * Misc helper functions:
635 */
636
637/*
638 * By the time this gets called, we've already cleared CR0.TS and
639 * given the process the FPU if we are going to preload the FPU
640 * state - all we need to do is to conditionally restore the register
641 * state itself.
642 */
643static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch)
644{
645 if (fpu_switch.preload)
646 copy_kernel_to_fpregs(&new_fpu->state);
647}
648
649/*
650 * Needs to be preemption-safe.
651 *
652 * NOTE! user_fpu_begin() must be used only immediately before restoring
653 * the save state. It does not do any saving/restoring on its own. In
654 * lazy FPU mode, it is just an optimization to avoid a #NM exception,
655 * the task can lose the FPU right after preempt_enable().
656 */
657static inline void user_fpu_begin(void)
658{
659 struct fpu *fpu = &current->thread.fpu;
660
661 preempt_disable();
662 if (!fpregs_active())
663 fpregs_activate(fpu);
664 preempt_enable();
665}
666
667/*
668 * MXCSR and XCR definitions:
669 */
670
671extern unsigned int mxcsr_feature_mask;
672
673#define XCR_XFEATURE_ENABLED_MASK 0x00000000
674
675static inline u64 xgetbv(u32 index)
676{
677 u32 eax, edx;
678
679 asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
680 : "=a" (eax), "=d" (edx)
681 : "c" (index));
682 return eax + ((u64)edx << 32);
683}
684
685static inline void xsetbv(u32 index, u64 value)
686{
687 u32 eax = value;
688 u32 edx = value >> 32;
689
690 asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
691 : : "a" (eax), "d" (edx), "c" (index));
692}
693
694#endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h
new file mode 100644
index 000000000000..39d3107ac6c7
--- /dev/null
+++ b/arch/x86/include/asm/fpu/regset.h
@@ -0,0 +1,21 @@
1/*
2 * FPU regset handling methods:
3 */
4#ifndef _ASM_X86_FPU_REGSET_H
5#define _ASM_X86_FPU_REGSET_H
6
7#include <linux/regset.h>
8
9extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active;
10extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
11 xstateregs_get;
12extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
13 xstateregs_set;
14
15/*
16 * xstateregs_active == regset_fpregs_active. Please refer to the comment
17 * at the definition of regset_fpregs_active.
18 */
19#define xstateregs_active regset_fpregs_active
20
21#endif /* _ASM_X86_FPU_REGSET_H */
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
new file mode 100644
index 000000000000..7358e9d61f1e
--- /dev/null
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -0,0 +1,33 @@
1/*
2 * x86 FPU signal frame handling methods:
3 */
4#ifndef _ASM_X86_FPU_SIGNAL_H
5#define _ASM_X86_FPU_SIGNAL_H
6
7#ifdef CONFIG_X86_64
8# include <asm/sigcontext32.h>
9# include <asm/user32.h>
10struct ksignal;
11int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
12 compat_sigset_t *set, struct pt_regs *regs);
13int ia32_setup_frame(int sig, struct ksignal *ksig,
14 compat_sigset_t *set, struct pt_regs *regs);
15#else
16# define user_i387_ia32_struct user_i387_struct
17# define user32_fxsr_struct user_fxsr_struct
18# define ia32_setup_frame __setup_frame
19# define ia32_setup_rt_frame __setup_rt_frame
20#endif
21
22extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
23 struct task_struct *tsk);
24extern void convert_to_fxsr(struct task_struct *tsk,
25 const struct user_i387_ia32_struct *env);
26
27unsigned long
28fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
29 unsigned long *buf_fx, unsigned long *size);
30
31extern void fpu__init_prepare_fx_sw_frame(void);
32
33#endif /* _ASM_X86_FPU_SIGNAL_H */
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
new file mode 100644
index 000000000000..0637826292de
--- /dev/null
+++ b/arch/x86/include/asm/fpu/types.h
@@ -0,0 +1,293 @@
1/*
2 * FPU data structures:
3 */
4#ifndef _ASM_X86_FPU_H
5#define _ASM_X86_FPU_H
6
7/*
8 * The legacy x87 FPU state format, as saved by FSAVE and
9 * restored by the FRSTOR instructions:
10 */
11struct fregs_state {
12 u32 cwd; /* FPU Control Word */
13 u32 swd; /* FPU Status Word */
14 u32 twd; /* FPU Tag Word */
15 u32 fip; /* FPU IP Offset */
16 u32 fcs; /* FPU IP Selector */
17 u32 foo; /* FPU Operand Pointer Offset */
18 u32 fos; /* FPU Operand Pointer Selector */
19
20 /* 8*10 bytes for each FP-reg = 80 bytes: */
21 u32 st_space[20];
22
23 /* Software status information [not touched by FSAVE]: */
24 u32 status;
25};
26
27/*
28 * The legacy fx SSE/MMX FPU state format, as saved by FXSAVE and
29 * restored by the FXRSTOR instructions. It's similar to the FSAVE
30 * format, but differs in some areas, plus has extensions at
31 * the end for the XMM registers.
32 */
33struct fxregs_state {
34 u16 cwd; /* Control Word */
35 u16 swd; /* Status Word */
36 u16 twd; /* Tag Word */
37 u16 fop; /* Last Instruction Opcode */
38 union {
39 struct {
40 u64 rip; /* Instruction Pointer */
41 u64 rdp; /* Data Pointer */
42 };
43 struct {
44 u32 fip; /* FPU IP Offset */
45 u32 fcs; /* FPU IP Selector */
46 u32 foo; /* FPU Operand Offset */
47 u32 fos; /* FPU Operand Selector */
48 };
49 };
50 u32 mxcsr; /* MXCSR Register State */
51 u32 mxcsr_mask; /* MXCSR Mask */
52
53 /* 8*16 bytes for each FP-reg = 128 bytes: */
54 u32 st_space[32];
55
56 /* 16*16 bytes for each XMM-reg = 256 bytes: */
57 u32 xmm_space[64];
58
59 u32 padding[12];
60
61 union {
62 u32 padding1[12];
63 u32 sw_reserved[12];
64 };
65
66} __attribute__((aligned(16)));
67
68/* Default value for fxregs_state.mxcsr: */
69#define MXCSR_DEFAULT 0x1f80
70
71/*
72 * Software based FPU emulation state. This is arbitrary really,
73 * it matches the x87 format to make it easier to understand:
74 */
75struct swregs_state {
76 u32 cwd;
77 u32 swd;
78 u32 twd;
79 u32 fip;
80 u32 fcs;
81 u32 foo;
82 u32 fos;
83 /* 8*10 bytes for each FP-reg = 80 bytes: */
84 u32 st_space[20];
85 u8 ftop;
86 u8 changed;
87 u8 lookahead;
88 u8 no_update;
89 u8 rm;
90 u8 alimit;
91 struct math_emu_info *info;
92 u32 entry_eip;
93};
94
95/*
96 * List of XSAVE features Linux knows about:
97 */
98enum xfeature_bit {
99 XSTATE_BIT_FP,
100 XSTATE_BIT_SSE,
101 XSTATE_BIT_YMM,
102 XSTATE_BIT_BNDREGS,
103 XSTATE_BIT_BNDCSR,
104 XSTATE_BIT_OPMASK,
105 XSTATE_BIT_ZMM_Hi256,
106 XSTATE_BIT_Hi16_ZMM,
107
108 XFEATURES_NR_MAX,
109};
110
111#define XSTATE_FP (1 << XSTATE_BIT_FP)
112#define XSTATE_SSE (1 << XSTATE_BIT_SSE)
113#define XSTATE_YMM (1 << XSTATE_BIT_YMM)
114#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS)
115#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR)
116#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK)
117#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256)
118#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM)
119
120#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
121#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
122
123/*
124 * There are 16x 256-bit AVX registers named YMM0-YMM15.
125 * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
126 * and are stored in 'struct fxregs_state::xmm_space[]'.
127 *
128 * The high 128 bits are stored here:
129 * 16x 128 bits == 256 bytes.
130 */
131struct ymmh_struct {
132 u8 ymmh_space[256];
133};
134
135/* We don't support LWP yet: */
136struct lwp_struct {
137 u8 reserved[128];
138};
139
140/* Intel MPX support: */
141struct bndreg {
142 u64 lower_bound;
143 u64 upper_bound;
144} __packed;
145
146struct bndcsr {
147 u64 bndcfgu;
148 u64 bndstatus;
149} __packed;
150
151struct mpx_struct {
152 struct bndreg bndreg[4];
153 struct bndcsr bndcsr;
154};
155
156struct xstate_header {
157 u64 xfeatures;
158 u64 xcomp_bv;
159 u64 reserved[6];
160} __attribute__((packed));
161
162/* New processor state extensions should be added here: */
163#define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \
164 sizeof(struct lwp_struct) + \
165 sizeof(struct mpx_struct) )
166/*
167 * This is our most modern FPU state format, as saved by the XSAVE
168 * and restored by the XRSTOR instructions.
169 *
170 * It consists of a legacy fxregs portion, an xstate header and
171 * subsequent fixed size areas as defined by the xstate header.
172 * Not all CPUs support all the extensions.
173 */
174struct xregs_state {
175 struct fxregs_state i387;
176 struct xstate_header header;
177 u8 __reserved[XSTATE_RESERVE];
178} __attribute__ ((packed, aligned (64)));
179
180/*
181 * This is a union of all the possible FPU state formats
182 * put together, so that we can pick the right one runtime.
183 *
184 * The size of the structure is determined by the largest
185 * member - which is the xsave area:
186 */
187union fpregs_state {
188 struct fregs_state fsave;
189 struct fxregs_state fxsave;
190 struct swregs_state soft;
191 struct xregs_state xsave;
192};
193
194/*
195 * Highest level per task FPU state data structure that
196 * contains the FPU register state plus various FPU
197 * state fields:
198 */
199struct fpu {
200 /*
201 * @state:
202 *
203 * In-memory copy of all FPU registers that we save/restore
204 * over context switches. If the task is using the FPU then
205 * the registers in the FPU are more recent than this state
206 * copy. If the task context-switches away then they get
207 * saved here and represent the FPU state.
208 *
209 * After context switches there may be a (short) time period
210 * during which the in-FPU hardware registers are unchanged
211 * and still perfectly match this state, if the tasks
212 * scheduled afterwards are not using the FPU.
213 *
214 * This is the 'lazy restore' window of optimization, which
215 * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
216 *
217 * We detect whether a subsequent task uses the FPU via setting
218 * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
219 *
220 * During this window, if the task gets scheduled again, we
221 * might be able to skip having to do a restore from this
222 * memory buffer to the hardware registers - at the cost of
223 * incurring the overhead of #NM fault traps.
224 *
225 * Note that on modern CPUs that support the XSAVEOPT (or other
226 * optimized XSAVE instructions), we don't use #NM traps anymore,
227 * as the hardware can track whether FPU registers need saving
228 * or not. On such CPUs we activate the non-lazy ('eagerfpu')
229 * logic, which unconditionally saves/restores all FPU state
230 * across context switches. (if FPU state exists.)
231 */
232 union fpregs_state state;
233
234 /*
235 * @last_cpu:
236 *
237 * Records the last CPU on which this context was loaded into
238 * FPU registers. (In the lazy-restore case we might be
239 * able to reuse FPU registers across multiple context switches
240 * this way, if no intermediate task used the FPU.)
241 *
242 * A value of -1 is used to indicate that the FPU state in context
243 * memory is newer than the FPU state in registers, and that the
244 * FPU state should be reloaded next time the task is run.
245 */
246 unsigned int last_cpu;
247
248 /*
249 * @fpstate_active:
250 *
251 * This flag indicates whether this context is active: if the task
252 * is not running then we can restore from this context, if the task
253 * is running then we should save into this context.
254 */
255 unsigned char fpstate_active;
256
257 /*
258 * @fpregs_active:
259 *
260 * This flag determines whether a given context is actively
261 * loaded into the FPU's registers and that those registers
262 * represent the task's current FPU state.
263 *
264 * Note the interaction with fpstate_active:
265 *
266 * # task does not use the FPU:
267 * fpstate_active == 0
268 *
269 * # task uses the FPU and regs are active:
270 * fpstate_active == 1 && fpregs_active == 1
271 *
272 * # the regs are inactive but still match fpstate:
273 * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
274 *
275 * The third state is what we use for the lazy restore optimization
276 * on lazy-switching CPUs.
277 */
278 unsigned char fpregs_active;
279
280 /*
281 * @counter:
282 *
283 * This counter contains the number of consecutive context switches
284 * during which the FPU stays used. If this is over a threshold, the
285 * lazy FPU restore logic becomes eager, to save the trap overhead.
286 * This is an unsigned char so that after 256 iterations the counter
287 * wraps and the context switch behavior turns lazy again; this is to
288 * deal with bursty apps that only use the FPU for a short time:
289 */
290 unsigned char counter;
291};
292
293#endif /* _ASM_X86_FPU_H */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
new file mode 100644
index 000000000000..4656b25bb9a7
--- /dev/null
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -0,0 +1,46 @@
1#ifndef __ASM_X86_XSAVE_H
2#define __ASM_X86_XSAVE_H
3
4#include <linux/types.h>
5#include <asm/processor.h>
6#include <linux/uaccess.h>
7
8/* Bit 63 of XCR0 is reserved for future expansion */
9#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
10
11#define XSTATE_CPUID 0x0000000d
12
13#define FXSAVE_SIZE 512
14
15#define XSAVE_HDR_SIZE 64
16#define XSAVE_HDR_OFFSET FXSAVE_SIZE
17
18#define XSAVE_YMM_SIZE 256
19#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
20
21/* Supported features which support lazy state saving */
22#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
23 | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
24
25/* Supported features which require eager state saving */
26#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR)
27
28/* All currently supported features */
29#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER)
30
31#ifdef CONFIG_X86_64
32#define REX_PREFIX "0x48, "
33#else
34#define REX_PREFIX
35#endif
36
37extern unsigned int xstate_size;
38extern u64 xfeatures_mask;
39extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
40
41extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
42
43void *get_xsave_addr(struct xregs_state *xsave, int xstate);
44const void *get_xsave_field_ptr(int xstate_field);
45
46#endif
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
deleted file mode 100644
index 6eb6fcb83f63..000000000000
--- a/arch/x86/include/asm/i387.h
+++ /dev/null
@@ -1,108 +0,0 @@
1/*
2 * Copyright (C) 1994 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * General FPU state handling cleanups
6 * Gareth Hughes <gareth@valinux.com>, May 2000
7 * x86-64 work by Andi Kleen 2002
8 */
9
10#ifndef _ASM_X86_I387_H
11#define _ASM_X86_I387_H
12
13#ifndef __ASSEMBLY__
14
15#include <linux/sched.h>
16#include <linux/hardirq.h>
17
18struct pt_regs;
19struct user_i387_struct;
20
21extern int init_fpu(struct task_struct *child);
22extern void fpu_finit(struct fpu *fpu);
23extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
24extern void math_state_restore(void);
25
26extern bool irq_fpu_usable(void);
27
28/*
29 * Careful: __kernel_fpu_begin/end() must be called with preempt disabled
30 * and they don't touch the preempt state on their own.
31 * If you enable preemption after __kernel_fpu_begin(), preempt notifier
32 * should call the __kernel_fpu_end() to prevent the kernel/user FPU
33 * state from getting corrupted. KVM for example uses this model.
34 *
35 * All other cases use kernel_fpu_begin/end() which disable preemption
36 * during kernel FPU usage.
37 */
38extern void __kernel_fpu_begin(void);
39extern void __kernel_fpu_end(void);
40
41static inline void kernel_fpu_begin(void)
42{
43 preempt_disable();
44 WARN_ON_ONCE(!irq_fpu_usable());
45 __kernel_fpu_begin();
46}
47
48static inline void kernel_fpu_end(void)
49{
50 __kernel_fpu_end();
51 preempt_enable();
52}
53
54/* Must be called with preempt disabled */
55extern void kernel_fpu_disable(void);
56extern void kernel_fpu_enable(void);
57
58/*
59 * Some instructions like VIA's padlock instructions generate a spurious
60 * DNA fault but don't modify SSE registers. And these instructions
61 * get used from interrupt context as well. To prevent these kernel instructions
62 * in interrupt context interacting wrongly with other user/kernel fpu usage, we
63 * should use them only in the context of irq_ts_save/restore()
64 */
65static inline int irq_ts_save(void)
66{
67 /*
68 * If in process context and not atomic, we can take a spurious DNA fault.
69 * Otherwise, doing clts() in process context requires disabling preemption
70 * or some heavy lifting like kernel_fpu_begin()
71 */
72 if (!in_atomic())
73 return 0;
74
75 if (read_cr0() & X86_CR0_TS) {
76 clts();
77 return 1;
78 }
79
80 return 0;
81}
82
83static inline void irq_ts_restore(int TS_state)
84{
85 if (TS_state)
86 stts();
87}
88
89/*
90 * The question "does this thread have fpu access?"
91 * is slightly racy, since preemption could come in
92 * and revoke it immediately after the test.
93 *
94 * However, even in that very unlikely scenario,
95 * we can just assume we have FPU access - typically
96 * to save the FP state - we'll just take a #NM
97 * fault and get the FPU access back.
98 */
99static inline int user_has_fpu(void)
100{
101 return current->thread.fpu.has_fpu;
102}
103
104extern void unlazy_fpu(struct task_struct *tsk);
105
106#endif /* __ASSEMBLY__ */
107
108#endif /* _ASM_X86_I387_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f4a555beef19..f8c0ec3a4a97 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1002,8 +1002,6 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
1002 1002
1003void kvm_inject_nmi(struct kvm_vcpu *vcpu); 1003void kvm_inject_nmi(struct kvm_vcpu *vcpu);
1004 1004
1005int fx_init(struct kvm_vcpu *vcpu);
1006
1007void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, 1005void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1008 const u8 *new, int bytes); 1006 const u8 *new, int bytes);
1009int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); 1007int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 883f6b933fa4..5e8daee7c5c9 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -142,6 +142,19 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
142 paravirt_arch_exit_mmap(mm); 142 paravirt_arch_exit_mmap(mm);
143} 143}
144 144
145#ifdef CONFIG_X86_64
146static inline bool is_64bit_mm(struct mm_struct *mm)
147{
148 return !config_enabled(CONFIG_IA32_EMULATION) ||
149 !(mm->context.ia32_compat == TIF_IA32);
150}
151#else
152static inline bool is_64bit_mm(struct mm_struct *mm)
153{
154 return false;
155}
156#endif
157
145static inline void arch_bprm_mm_init(struct mm_struct *mm, 158static inline void arch_bprm_mm_init(struct mm_struct *mm,
146 struct vm_area_struct *vma) 159 struct vm_area_struct *vma)
147{ 160{
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index a952a13d59a7..7a35495275a9 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -13,55 +13,50 @@
13#define MPX_BNDCFG_ENABLE_FLAG 0x1 13#define MPX_BNDCFG_ENABLE_FLAG 0x1
14#define MPX_BD_ENTRY_VALID_FLAG 0x1 14#define MPX_BD_ENTRY_VALID_FLAG 0x1
15 15
16#ifdef CONFIG_X86_64 16/*
17 17 * The upper 28 bits [47:20] of the virtual address in 64-bit
18/* upper 28 bits [47:20] of the virtual address in 64-bit used to 18 * are used to index into bounds directory (BD).
19 * index into bounds directory (BD). 19 *
20 */ 20 * The directory is 2G (2^31) in size, and with 8-byte entries
21#define MPX_BD_ENTRY_OFFSET 28 21 * it has 2^28 entries.
22#define MPX_BD_ENTRY_SHIFT 3
23/* bits [19:3] of the virtual address in 64-bit used to index into
24 * bounds table (BT).
25 */ 22 */
26#define MPX_BT_ENTRY_OFFSET 17 23#define MPX_BD_SIZE_BYTES_64 (1UL<<31)
27#define MPX_BT_ENTRY_SHIFT 5 24#define MPX_BD_ENTRY_BYTES_64 8
28#define MPX_IGN_BITS 3 25#define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64)
29#define MPX_BD_ENTRY_TAIL 3
30 26
31#else 27/*
32 28 * The 32-bit directory is 4MB (2^22) in size, and with 4-byte
33#define MPX_BD_ENTRY_OFFSET 20 29 * entries it has 2^20 entries.
34#define MPX_BD_ENTRY_SHIFT 2 30 */
35#define MPX_BT_ENTRY_OFFSET 10 31#define MPX_BD_SIZE_BYTES_32 (1UL<<22)
36#define MPX_BT_ENTRY_SHIFT 4 32#define MPX_BD_ENTRY_BYTES_32 4
37#define MPX_IGN_BITS 2 33#define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32)
38#define MPX_BD_ENTRY_TAIL 2
39 34
40#endif 35/*
36 * A 64-bit table is 4MB total in size, and an entry is
37 * 4 64-bit pointers in size.
38 */
39#define MPX_BT_SIZE_BYTES_64 (1UL<<22)
40#define MPX_BT_ENTRY_BYTES_64 32
41#define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64)
41 42
42#define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT)) 43/*
43#define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT)) 44 * A 32-bit table is 16kB total in size, and an entry is
45 * 4 32-bit pointers in size.
46 */
47#define MPX_BT_SIZE_BYTES_32 (1UL<<14)
48#define MPX_BT_ENTRY_BYTES_32 16
49#define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32)
44 50
45#define MPX_BNDSTA_TAIL 2 51#define MPX_BNDSTA_TAIL 2
46#define MPX_BNDCFG_TAIL 12 52#define MPX_BNDCFG_TAIL 12
47#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1)) 53#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1))
48#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) 54#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
49#define MPX_BT_ADDR_MASK (~((1UL<<MPX_BD_ENTRY_TAIL)-1))
50
51#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
52#define MPX_BNDSTA_ERROR_CODE 0x3 55#define MPX_BNDSTA_ERROR_CODE 0x3
53 56
54#define MPX_BD_ENTRY_MASK ((1<<MPX_BD_ENTRY_OFFSET)-1)
55#define MPX_BT_ENTRY_MASK ((1<<MPX_BT_ENTRY_OFFSET)-1)
56#define MPX_GET_BD_ENTRY_OFFSET(addr) ((((addr)>>(MPX_BT_ENTRY_OFFSET+ \
57 MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT)
58#define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \
59 MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT)
60
61#ifdef CONFIG_X86_INTEL_MPX 57#ifdef CONFIG_X86_INTEL_MPX
62siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, 58siginfo_t *mpx_generate_siginfo(struct pt_regs *regs);
63 struct xsave_struct *xsave_buf); 59int mpx_handle_bd_fault(void);
64int mpx_handle_bd_fault(struct xsave_struct *xsave_buf);
65static inline int kernel_managing_mpx_tables(struct mm_struct *mm) 60static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
66{ 61{
67 return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR); 62 return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR);
@@ -77,12 +72,11 @@ static inline void mpx_mm_init(struct mm_struct *mm)
77void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, 72void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
78 unsigned long start, unsigned long end); 73 unsigned long start, unsigned long end);
79#else 74#else
80static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, 75static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
81 struct xsave_struct *xsave_buf)
82{ 76{
83 return NULL; 77 return NULL;
84} 78}
85static inline int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) 79static inline int mpx_handle_bd_fault(void)
86{ 80{
87 return -EINVAL; 81 return -EINVAL;
88} 82}
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 9aa52fd13a78..43e6519df0d5 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -21,6 +21,7 @@ struct mm_struct;
21#include <asm/desc_defs.h> 21#include <asm/desc_defs.h>
22#include <asm/nops.h> 22#include <asm/nops.h>
23#include <asm/special_insns.h> 23#include <asm/special_insns.h>
24#include <asm/fpu/types.h>
24 25
25#include <linux/personality.h> 26#include <linux/personality.h>
26#include <linux/cpumask.h> 27#include <linux/cpumask.h>
@@ -52,11 +53,16 @@ static inline void *current_text_addr(void)
52 return pc; 53 return pc;
53} 54}
54 55
56/*
57 * These alignment constraints are for performance in the vSMP case,
58 * but in the task_struct case we must also meet hardware imposed
59 * alignment requirements of the FPU state:
60 */
55#ifdef CONFIG_X86_VSMP 61#ifdef CONFIG_X86_VSMP
56# define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) 62# define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT)
57# define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) 63# define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT)
58#else 64#else
59# define ARCH_MIN_TASKALIGN 16 65# define ARCH_MIN_TASKALIGN __alignof__(union fpregs_state)
60# define ARCH_MIN_MMSTRUCT_ALIGN 0 66# define ARCH_MIN_MMSTRUCT_ALIGN 0
61#endif 67#endif
62 68
@@ -166,7 +172,6 @@ extern const struct seq_operations cpuinfo_op;
166#define cache_line_size() (boot_cpu_data.x86_cache_alignment) 172#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
167 173
168extern void cpu_detect(struct cpuinfo_x86 *c); 174extern void cpu_detect(struct cpuinfo_x86 *c);
169extern void fpu_detect(struct cpuinfo_x86 *c);
170 175
171extern void early_cpu_init(void); 176extern void early_cpu_init(void);
172extern void identify_boot_cpu(void); 177extern void identify_boot_cpu(void);
@@ -313,128 +318,6 @@ struct orig_ist {
313 unsigned long ist[7]; 318 unsigned long ist[7];
314}; 319};
315 320
316#define MXCSR_DEFAULT 0x1f80
317
318struct i387_fsave_struct {
319 u32 cwd; /* FPU Control Word */
320 u32 swd; /* FPU Status Word */
321 u32 twd; /* FPU Tag Word */
322 u32 fip; /* FPU IP Offset */
323 u32 fcs; /* FPU IP Selector */
324 u32 foo; /* FPU Operand Pointer Offset */
325 u32 fos; /* FPU Operand Pointer Selector */
326
327 /* 8*10 bytes for each FP-reg = 80 bytes: */
328 u32 st_space[20];
329
330 /* Software status information [not touched by FSAVE ]: */
331 u32 status;
332};
333
334struct i387_fxsave_struct {
335 u16 cwd; /* Control Word */
336 u16 swd; /* Status Word */
337 u16 twd; /* Tag Word */
338 u16 fop; /* Last Instruction Opcode */
339 union {
340 struct {
341 u64 rip; /* Instruction Pointer */
342 u64 rdp; /* Data Pointer */
343 };
344 struct {
345 u32 fip; /* FPU IP Offset */
346 u32 fcs; /* FPU IP Selector */
347 u32 foo; /* FPU Operand Offset */
348 u32 fos; /* FPU Operand Selector */
349 };
350 };
351 u32 mxcsr; /* MXCSR Register State */
352 u32 mxcsr_mask; /* MXCSR Mask */
353
354 /* 8*16 bytes for each FP-reg = 128 bytes: */
355 u32 st_space[32];
356
357 /* 16*16 bytes for each XMM-reg = 256 bytes: */
358 u32 xmm_space[64];
359
360 u32 padding[12];
361
362 union {
363 u32 padding1[12];
364 u32 sw_reserved[12];
365 };
366
367} __attribute__((aligned(16)));
368
369struct i387_soft_struct {
370 u32 cwd;
371 u32 swd;
372 u32 twd;
373 u32 fip;
374 u32 fcs;
375 u32 foo;
376 u32 fos;
377 /* 8*10 bytes for each FP-reg = 80 bytes: */
378 u32 st_space[20];
379 u8 ftop;
380 u8 changed;
381 u8 lookahead;
382 u8 no_update;
383 u8 rm;
384 u8 alimit;
385 struct math_emu_info *info;
386 u32 entry_eip;
387};
388
389struct ymmh_struct {
390 /* 16 * 16 bytes for each YMMH-reg = 256 bytes */
391 u32 ymmh_space[64];
392};
393
394/* We don't support LWP yet: */
395struct lwp_struct {
396 u8 reserved[128];
397};
398
399struct bndreg {
400 u64 lower_bound;
401 u64 upper_bound;
402} __packed;
403
404struct bndcsr {
405 u64 bndcfgu;
406 u64 bndstatus;
407} __packed;
408
409struct xsave_hdr_struct {
410 u64 xstate_bv;
411 u64 xcomp_bv;
412 u64 reserved[6];
413} __attribute__((packed));
414
415struct xsave_struct {
416 struct i387_fxsave_struct i387;
417 struct xsave_hdr_struct xsave_hdr;
418 struct ymmh_struct ymmh;
419 struct lwp_struct lwp;
420 struct bndreg bndreg[4];
421 struct bndcsr bndcsr;
422 /* new processor state extensions will go here */
423} __attribute__ ((packed, aligned (64)));
424
425union thread_xstate {
426 struct i387_fsave_struct fsave;
427 struct i387_fxsave_struct fxsave;
428 struct i387_soft_struct soft;
429 struct xsave_struct xsave;
430};
431
432struct fpu {
433 unsigned int last_cpu;
434 unsigned int has_fpu;
435 union thread_xstate *state;
436};
437
438#ifdef CONFIG_X86_64 321#ifdef CONFIG_X86_64
439DECLARE_PER_CPU(struct orig_ist, orig_ist); 322DECLARE_PER_CPU(struct orig_ist, orig_ist);
440 323
@@ -483,8 +366,6 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
483#endif /* X86_64 */ 366#endif /* X86_64 */
484 367
485extern unsigned int xstate_size; 368extern unsigned int xstate_size;
486extern void free_thread_xstate(struct task_struct *);
487extern struct kmem_cache *task_xstate_cachep;
488 369
489struct perf_event; 370struct perf_event;
490 371
@@ -508,6 +389,10 @@ struct thread_struct {
508 unsigned long fs; 389 unsigned long fs;
509#endif 390#endif
510 unsigned long gs; 391 unsigned long gs;
392
393 /* Floating point and extended processor state */
394 struct fpu fpu;
395
511 /* Save middle states of ptrace breakpoints */ 396 /* Save middle states of ptrace breakpoints */
512 struct perf_event *ptrace_bps[HBP_NUM]; 397 struct perf_event *ptrace_bps[HBP_NUM];
513 /* Debug status used for traps, single steps, etc... */ 398 /* Debug status used for traps, single steps, etc... */
@@ -518,8 +403,6 @@ struct thread_struct {
518 unsigned long cr2; 403 unsigned long cr2;
519 unsigned long trap_nr; 404 unsigned long trap_nr;
520 unsigned long error_code; 405 unsigned long error_code;
521 /* floating point and extended processor state */
522 struct fpu fpu;
523#ifdef CONFIG_X86_32 406#ifdef CONFIG_X86_32
524 /* Virtual 86 mode info */ 407 /* Virtual 86 mode info */
525 struct vm86_struct __user *vm86_info; 408 struct vm86_struct __user *vm86_info;
@@ -535,15 +418,6 @@ struct thread_struct {
535 unsigned long iopl; 418 unsigned long iopl;
536 /* Max allowed port in the bitmap, in bytes: */ 419 /* Max allowed port in the bitmap, in bytes: */
537 unsigned io_bitmap_max; 420 unsigned io_bitmap_max;
538 /*
539 * fpu_counter contains the number of consecutive context switches
540 * that the FPU is used. If this is over a threshold, the lazy fpu
541 * saving becomes unlazy to save the trap. This is an unsigned char
542 * so that after 256 times the counter wraps and the behavior turns
543 * lazy again; this to deal with bursty apps that only use FPU for
544 * a short time
545 */
546 unsigned char fpu_counter;
547}; 421};
548 422
549/* 423/*
@@ -928,18 +802,18 @@ extern int get_tsc_mode(unsigned long adr);
928extern int set_tsc_mode(unsigned int val); 802extern int set_tsc_mode(unsigned int val);
929 803
930/* Register/unregister a process' MPX related resource */ 804/* Register/unregister a process' MPX related resource */
931#define MPX_ENABLE_MANAGEMENT(tsk) mpx_enable_management((tsk)) 805#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
932#define MPX_DISABLE_MANAGEMENT(tsk) mpx_disable_management((tsk)) 806#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
933 807
934#ifdef CONFIG_X86_INTEL_MPX 808#ifdef CONFIG_X86_INTEL_MPX
935extern int mpx_enable_management(struct task_struct *tsk); 809extern int mpx_enable_management(void);
936extern int mpx_disable_management(struct task_struct *tsk); 810extern int mpx_disable_management(void);
937#else 811#else
938static inline int mpx_enable_management(struct task_struct *tsk) 812static inline int mpx_enable_management(void)
939{ 813{
940 return -EINVAL; 814 return -EINVAL;
941} 815}
942static inline int mpx_disable_management(struct task_struct *tsk) 816static inline int mpx_disable_management(void)
943{ 817{
944 return -EINVAL; 818 return -EINVAL;
945} 819}
diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h
index ee80b92f0096..6c8a7ed13365 100644
--- a/arch/x86/include/asm/simd.h
+++ b/arch/x86/include/asm/simd.h
@@ -1,5 +1,5 @@
1 1
2#include <asm/i387.h> 2#include <asm/fpu/api.h>
3 3
4/* 4/*
5 * may_use_simd - whether it is allowable at this time to issue SIMD 5 * may_use_simd - whether it is allowable at this time to issue SIMD
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 6a998598f172..c2e00bb2a136 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -39,7 +39,9 @@
39#include <asm/processor.h> 39#include <asm/processor.h>
40#include <asm/percpu.h> 40#include <asm/percpu.h>
41#include <asm/desc.h> 41#include <asm/desc.h>
42
42#include <linux/random.h> 43#include <linux/random.h>
44#include <linux/sched.h>
43 45
44/* 46/*
45 * 24 byte read-only segment initializer for stack canary. Linker 47 * 24 byte read-only segment initializer for stack canary. Linker
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 552d6c90a6d4..d1793f06854d 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -7,7 +7,7 @@
7#define _ASM_X86_SUSPEND_32_H 7#define _ASM_X86_SUSPEND_32_H
8 8
9#include <asm/desc.h> 9#include <asm/desc.h>
10#include <asm/i387.h> 10#include <asm/fpu/api.h>
11 11
12/* image of the saved processor state */ 12/* image of the saved processor state */
13struct saved_context { 13struct saved_context {
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index bc6232834bab..7ebf0ebe4e68 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -7,7 +7,7 @@
7#define _ASM_X86_SUSPEND_64_H 7#define _ASM_X86_SUSPEND_64_H
8 8
9#include <asm/desc.h> 9#include <asm/desc.h>
10#include <asm/i387.h> 10#include <asm/fpu/api.h>
11 11
12/* 12/*
13 * Image of the saved processor state, used by the low level ACPI suspend to 13 * Image of the saved processor state, used by the low level ACPI suspend to
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h
new file mode 100644
index 000000000000..173dd3ba108c
--- /dev/null
+++ b/arch/x86/include/asm/trace/mpx.h
@@ -0,0 +1,132 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM mpx
3
4#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_MPX_H
6
7#include <linux/tracepoint.h>
8
9#ifdef CONFIG_X86_INTEL_MPX
10
11TRACE_EVENT(mpx_bounds_register_exception,
12
13 TP_PROTO(void *addr_referenced,
14 const struct bndreg *bndreg),
15 TP_ARGS(addr_referenced, bndreg),
16
17 TP_STRUCT__entry(
18 __field(void *, addr_referenced)
19 __field(u64, lower_bound)
20 __field(u64, upper_bound)
21 ),
22
23 TP_fast_assign(
24 __entry->addr_referenced = addr_referenced;
25 __entry->lower_bound = bndreg->lower_bound;
26 __entry->upper_bound = bndreg->upper_bound;
27 ),
28 /*
29 * Note that we are printing out the '~' of the upper
30 * bounds register here. It is actually stored in its
31 * one's complement form so that its 'init' state
32 * corresponds to all 0's. But, that looks like
33 * gibberish when printed out, so print out the 1's
34 * complement instead of the actual value here. Note
35 * though that you still need to specify filters for the
36 * actual value, not the displayed one.
37 */
38 TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx",
39 __entry->addr_referenced,
40 __entry->lower_bound,
41 ~__entry->upper_bound
42 )
43);
44
45TRACE_EVENT(bounds_exception_mpx,
46
47 TP_PROTO(const struct bndcsr *bndcsr),
48 TP_ARGS(bndcsr),
49
50 TP_STRUCT__entry(
51 __field(u64, bndcfgu)
52 __field(u64, bndstatus)
53 ),
54
55 TP_fast_assign(
56 /* need to get rid of the 'const' on bndcsr */
57 __entry->bndcfgu = (u64)bndcsr->bndcfgu;
58 __entry->bndstatus = (u64)bndcsr->bndstatus;
59 ),
60
61 TP_printk("bndcfgu:0x%llx bndstatus:0x%llx",
62 __entry->bndcfgu,
63 __entry->bndstatus)
64);
65
66DECLARE_EVENT_CLASS(mpx_range_trace,
67
68 TP_PROTO(unsigned long start,
69 unsigned long end),
70 TP_ARGS(start, end),
71
72 TP_STRUCT__entry(
73 __field(unsigned long, start)
74 __field(unsigned long, end)
75 ),
76
77 TP_fast_assign(
78 __entry->start = start;
79 __entry->end = end;
80 ),
81
82 TP_printk("[0x%p:0x%p]",
83 (void *)__entry->start,
84 (void *)__entry->end
85 )
86);
87
88DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap,
89 TP_PROTO(unsigned long start, unsigned long end),
90 TP_ARGS(start, end)
91);
92
93DEFINE_EVENT(mpx_range_trace, mpx_unmap_search,
94 TP_PROTO(unsigned long start, unsigned long end),
95 TP_ARGS(start, end)
96);
97
98TRACE_EVENT(mpx_new_bounds_table,
99
100 TP_PROTO(unsigned long table_vaddr),
101 TP_ARGS(table_vaddr),
102
103 TP_STRUCT__entry(
104 __field(unsigned long, table_vaddr)
105 ),
106
107 TP_fast_assign(
108 __entry->table_vaddr = table_vaddr;
109 ),
110
111 TP_printk("table vaddr:%p", (void *)__entry->table_vaddr)
112);
113
114#else
115
116/*
117 * This gets used outside of MPX-specific code, so we need a stub.
118 */
119static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr)
120{
121}
122
123#endif /* CONFIG_X86_INTEL_MPX */
124
125#undef TRACE_INCLUDE_PATH
126#define TRACE_INCLUDE_PATH asm/trace/
127#undef TRACE_INCLUDE_FILE
128#define TRACE_INCLUDE_FILE mpx
129#endif /* _TRACE_MPX_H */
130
131/* This part must be outside protection */
132#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h
index ccab4af1646d..59a54e869f15 100644
--- a/arch/x86/include/asm/user.h
+++ b/arch/x86/include/asm/user.h
@@ -14,8 +14,8 @@ struct user_ymmh_regs {
14 __u32 ymmh_space[64]; 14 __u32 ymmh_space[64];
15}; 15};
16 16
17struct user_xsave_hdr { 17struct user_xstate_header {
18 __u64 xstate_bv; 18 __u64 xfeatures;
19 __u64 reserved1[2]; 19 __u64 reserved1[2];
20 __u64 reserved2[5]; 20 __u64 reserved2[5];
21}; 21};
@@ -41,11 +41,11 @@ struct user_xsave_hdr {
41 * particular process/thread. 41 * particular process/thread.
42 * 42 *
43 * Also when the user modifies certain state FP/SSE/etc through the 43 * Also when the user modifies certain state FP/SSE/etc through the
44 * ptrace interface, they must ensure that the xsave_hdr.xstate_bv 44 * ptrace interface, they must ensure that the header.xfeatures
45 * bytes[512..519] of the memory layout are updated correspondingly. 45 * bytes[512..519] of the memory layout are updated correspondingly.
46 * i.e., for example when FP state is modified to a non-init state, 46 * i.e., for example when FP state is modified to a non-init state,
47 * xsave_hdr.xstate_bv's bit 0 must be set to '1', when SSE is modified to 47 * header.xfeatures's bit 0 must be set to '1', when SSE is modified to
48 * non-init state, xsave_hdr.xstate_bv's bit 1 must to be set to '1', etc. 48 * non-init state, header.xfeatures's bit 1 must to be set to '1', etc.
49 */ 49 */
50#define USER_XSTATE_FX_SW_WORDS 6 50#define USER_XSTATE_FX_SW_WORDS 6
51#define USER_XSTATE_XCR0_WORD 0 51#define USER_XSTATE_XCR0_WORD 0
@@ -55,7 +55,7 @@ struct user_xstateregs {
55 __u64 fpx_space[58]; 55 __u64 fpx_space[58];
56 __u64 xstate_fx_sw[USER_XSTATE_FX_SW_WORDS]; 56 __u64 xstate_fx_sw[USER_XSTATE_FX_SW_WORDS];
57 } i387; 57 } i387;
58 struct user_xsave_hdr xsave_hdr; 58 struct user_xstate_header header;
59 struct user_ymmh_regs ymmh; 59 struct user_ymmh_regs ymmh;
60 /* further processor state extensions go here */ 60 /* further processor state extensions go here */
61}; 61};
diff --git a/arch/x86/include/asm/xcr.h b/arch/x86/include/asm/xcr.h
deleted file mode 100644
index f2cba4e79a23..000000000000
--- a/arch/x86/include/asm/xcr.h
+++ /dev/null
@@ -1,49 +0,0 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2008 rPath, Inc. - All Rights Reserved
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2 or (at your
7 * option) any later version; incorporated herein by reference.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * asm-x86/xcr.h
13 *
14 * Definitions for the eXtended Control Register instructions
15 */
16
17#ifndef _ASM_X86_XCR_H
18#define _ASM_X86_XCR_H
19
20#define XCR_XFEATURE_ENABLED_MASK 0x00000000
21
22#ifdef __KERNEL__
23# ifndef __ASSEMBLY__
24
25#include <linux/types.h>
26
27static inline u64 xgetbv(u32 index)
28{
29 u32 eax, edx;
30
31 asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
32 : "=a" (eax), "=d" (edx)
33 : "c" (index));
34 return eax + ((u64)edx << 32);
35}
36
37static inline void xsetbv(u32 index, u64 value)
38{
39 u32 eax = value;
40 u32 edx = value >> 32;
41
42 asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
43 : : "a" (eax), "d" (edx), "c" (index));
44}
45
46# endif /* __ASSEMBLY__ */
47#endif /* __KERNEL__ */
48
49#endif /* _ASM_X86_XCR_H */
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index d8829751b3f8..1f5c5161ead6 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -36,7 +36,7 @@
36 * no advantages to be gotten from x86-64 here anyways. 36 * no advantages to be gotten from x86-64 here anyways.
37 */ 37 */
38 38
39#include <asm/i387.h> 39#include <asm/fpu/api.h>
40 40
41#ifdef CONFIG_X86_32 41#ifdef CONFIG_X86_32
42/* reduce register pressure */ 42/* reduce register pressure */
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index ce05722e3c68..5a08bc8bff33 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -26,7 +26,7 @@
26#define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" 26#define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n"
27#define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" 27#define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n"
28 28
29#include <asm/i387.h> 29#include <asm/fpu/api.h>
30 30
31static void 31static void
32xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) 32xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 492b29802f57..7c0a517ec751 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -18,7 +18,7 @@
18#ifdef CONFIG_AS_AVX 18#ifdef CONFIG_AS_AVX
19 19
20#include <linux/compiler.h> 20#include <linux/compiler.h>
21#include <asm/i387.h> 21#include <asm/fpu/api.h>
22 22
23#define BLOCK4(i) \ 23#define BLOCK4(i) \
24 BLOCK(32 * i, 0) \ 24 BLOCK(32 * i, 0) \
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
deleted file mode 100644
index c9a6d68b8d62..000000000000
--- a/arch/x86/include/asm/xsave.h
+++ /dev/null
@@ -1,257 +0,0 @@
1#ifndef __ASM_X86_XSAVE_H
2#define __ASM_X86_XSAVE_H
3
4#include <linux/types.h>
5#include <asm/processor.h>
6
7#define XSTATE_CPUID 0x0000000d
8
9#define XSTATE_FP 0x1
10#define XSTATE_SSE 0x2
11#define XSTATE_YMM 0x4
12#define XSTATE_BNDREGS 0x8
13#define XSTATE_BNDCSR 0x10
14#define XSTATE_OPMASK 0x20
15#define XSTATE_ZMM_Hi256 0x40
16#define XSTATE_Hi16_ZMM 0x80
17
18#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
19#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
20/* Bit 63 of XCR0 is reserved for future expansion */
21#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
22
23#define FXSAVE_SIZE 512
24
25#define XSAVE_HDR_SIZE 64
26#define XSAVE_HDR_OFFSET FXSAVE_SIZE
27
28#define XSAVE_YMM_SIZE 256
29#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
30
31/* Supported features which support lazy state saving */
32#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
33 | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
34
35/* Supported features which require eager state saving */
36#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR)
37
38/* All currently supported features */
39#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER)
40
41#ifdef CONFIG_X86_64
42#define REX_PREFIX "0x48, "
43#else
44#define REX_PREFIX
45#endif
46
47extern unsigned int xstate_size;
48extern u64 pcntxt_mask;
49extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
50extern struct xsave_struct *init_xstate_buf;
51
52extern void xsave_init(void);
53extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
54extern int init_fpu(struct task_struct *child);
55
56/* These macros all use (%edi)/(%rdi) as the single memory argument. */
57#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
58#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
59#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
60#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
61#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
62
63#define xstate_fault ".section .fixup,\"ax\"\n" \
64 "3: movl $-1,%[err]\n" \
65 " jmp 2b\n" \
66 ".previous\n" \
67 _ASM_EXTABLE(1b, 3b) \
68 : [err] "=r" (err)
69
70/*
71 * This function is called only during boot time when x86 caps are not set
72 * up and alternative can not be used yet.
73 */
74static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask)
75{
76 u32 lmask = mask;
77 u32 hmask = mask >> 32;
78 int err = 0;
79
80 WARN_ON(system_state != SYSTEM_BOOTING);
81
82 if (boot_cpu_has(X86_FEATURE_XSAVES))
83 asm volatile("1:"XSAVES"\n\t"
84 "2:\n\t"
85 xstate_fault
86 : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
87 : "memory");
88 else
89 asm volatile("1:"XSAVE"\n\t"
90 "2:\n\t"
91 xstate_fault
92 : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
93 : "memory");
94 return err;
95}
96
97/*
98 * This function is called only during boot time when x86 caps are not set
99 * up and alternative can not be used yet.
100 */
101static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask)
102{
103 u32 lmask = mask;
104 u32 hmask = mask >> 32;
105 int err = 0;
106
107 WARN_ON(system_state != SYSTEM_BOOTING);
108
109 if (boot_cpu_has(X86_FEATURE_XSAVES))
110 asm volatile("1:"XRSTORS"\n\t"
111 "2:\n\t"
112 xstate_fault
113 : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
114 : "memory");
115 else
116 asm volatile("1:"XRSTOR"\n\t"
117 "2:\n\t"
118 xstate_fault
119 : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
120 : "memory");
121 return err;
122}
123
124/*
125 * Save processor xstate to xsave area.
126 */
127static inline int xsave_state(struct xsave_struct *fx, u64 mask)
128{
129 u32 lmask = mask;
130 u32 hmask = mask >> 32;
131 int err = 0;
132
133 /*
134 * If xsaves is enabled, xsaves replaces xsaveopt because
135 * it supports compact format and supervisor states in addition to
136 * modified optimization in xsaveopt.
137 *
138 * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
139 * because xsaveopt supports modified optimization which is not
140 * supported by xsave.
141 *
142 * If none of xsaves and xsaveopt is enabled, use xsave.
143 */
144 alternative_input_2(
145 "1:"XSAVE,
146 XSAVEOPT,
147 X86_FEATURE_XSAVEOPT,
148 XSAVES,
149 X86_FEATURE_XSAVES,
150 [fx] "D" (fx), "a" (lmask), "d" (hmask) :
151 "memory");
152 asm volatile("2:\n\t"
153 xstate_fault
154 : "0" (0)
155 : "memory");
156
157 return err;
158}
159
160/*
161 * Restore processor xstate from xsave area.
162 */
163static inline int xrstor_state(struct xsave_struct *fx, u64 mask)
164{
165 int err = 0;
166 u32 lmask = mask;
167 u32 hmask = mask >> 32;
168
169 /*
170 * Use xrstors to restore context if it is enabled. xrstors supports
171 * compacted format of xsave area which is not supported by xrstor.
172 */
173 alternative_input(
174 "1: " XRSTOR,
175 XRSTORS,
176 X86_FEATURE_XSAVES,
177 "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
178 : "memory");
179
180 asm volatile("2:\n"
181 xstate_fault
182 : "0" (0)
183 : "memory");
184
185 return err;
186}
187
188/*
189 * Save xstate context for old process during context switch.
190 */
191static inline void fpu_xsave(struct fpu *fpu)
192{
193 xsave_state(&fpu->state->xsave, -1);
194}
195
196/*
197 * Restore xstate context for new process during context switch.
198 */
199static inline int fpu_xrstor_checking(struct xsave_struct *fx)
200{
201 return xrstor_state(fx, -1);
202}
203
204/*
205 * Save xstate to user space xsave area.
206 *
207 * We don't use modified optimization because xrstor/xrstors might track
208 * a different application.
209 *
210 * We don't use compacted format xsave area for
211 * backward compatibility for old applications which don't understand
212 * compacted format of xsave area.
213 */
214static inline int xsave_user(struct xsave_struct __user *buf)
215{
216 int err;
217
218 /*
219 * Clear the xsave header first, so that reserved fields are
220 * initialized to zero.
221 */
222 err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr));
223 if (unlikely(err))
224 return -EFAULT;
225
226 __asm__ __volatile__(ASM_STAC "\n"
227 "1:"XSAVE"\n"
228 "2: " ASM_CLAC "\n"
229 xstate_fault
230 : "D" (buf), "a" (-1), "d" (-1), "0" (0)
231 : "memory");
232 return err;
233}
234
235/*
236 * Restore xstate from user space xsave area.
237 */
238static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
239{
240 int err = 0;
241 struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
242 u32 lmask = mask;
243 u32 hmask = mask >> 32;
244
245 __asm__ __volatile__(ASM_STAC "\n"
246 "1:"XRSTOR"\n"
247 "2: " ASM_CLAC "\n"
248 xstate_fault
249 : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
250 : "memory"); /* memory required? */
251 return err;
252}
253
254void *get_xsave_addr(struct xsave_struct *xsave, int xstate);
255void setup_xstate_comp(void);
256
257#endif
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index 16dc4e8a2cd3..0e8a973de9ee 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -25,7 +25,7 @@ struct _fpx_sw_bytes {
25 __u32 extended_size; /* total size of the layout referred by 25 __u32 extended_size; /* total size of the layout referred by
26 * fpstate pointer in the sigcontext. 26 * fpstate pointer in the sigcontext.
27 */ 27 */
28 __u64 xstate_bv; 28 __u64 xfeatures;
29 /* feature bit mask (including fp/sse/extended 29 /* feature bit mask (including fp/sse/extended
30 * state) that is present in the memory 30 * state) that is present in the memory
31 * layout. 31 * layout.
@@ -209,8 +209,8 @@ struct sigcontext {
209 209
210#endif /* !__i386__ */ 210#endif /* !__i386__ */
211 211
212struct _xsave_hdr { 212struct _header {
213 __u64 xstate_bv; 213 __u64 xfeatures;
214 __u64 reserved1[2]; 214 __u64 reserved1[2];
215 __u64 reserved2[5]; 215 __u64 reserved2[5];
216}; 216};
@@ -228,7 +228,7 @@ struct _ymmh_state {
228 */ 228 */
229struct _xstate { 229struct _xstate {
230 struct _fpstate fpstate; 230 struct _fpstate fpstate;
231 struct _xsave_hdr xstate_hdr; 231 struct _header xstate_hdr;
232 struct _ymmh_state ymmh; 232 struct _ymmh_state ymmh;
233 /* new processor state extensions go here */ 233 /* new processor state extensions go here */
234}; 234};
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 9bcd0b56ca17..febaf180621b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -44,7 +44,7 @@ obj-y += pci-iommu_table.o
44obj-y += resource.o 44obj-y += resource.o
45 45
46obj-y += process.o 46obj-y += process.o
47obj-y += i387.o xsave.o 47obj-y += fpu/
48obj-y += ptrace.o 48obj-y += ptrace.o
49obj-$(CONFIG_X86_32) += tls.o 49obj-$(CONFIG_X86_32) += tls.o
50obj-$(CONFIG_IA32_EMULATION) += tls.o 50obj-$(CONFIG_IA32_EMULATION) += tls.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index aef653193160..7fe097235376 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -21,6 +21,10 @@
21#include <asm/io.h> 21#include <asm/io.h>
22#include <asm/fixmap.h> 22#include <asm/fixmap.h>
23 23
24int __read_mostly alternatives_patched;
25
26EXPORT_SYMBOL_GPL(alternatives_patched);
27
24#define MAX_PATCH_LEN (255-1) 28#define MAX_PATCH_LEN (255-1)
25 29
26static int __initdata_or_module debug_alternative; 30static int __initdata_or_module debug_alternative;
@@ -627,6 +631,7 @@ void __init alternative_instructions(void)
627 apply_paravirt(__parainstructions, __parainstructions_end); 631 apply_paravirt(__parainstructions, __parainstructions_end);
628 632
629 restart_nmi(); 633 restart_nmi();
634 alternatives_patched = 1;
630} 635}
631 636
632/** 637/**
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 03445346ee0a..bd17db15a2c1 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -12,57 +12,11 @@
12#include <asm/bugs.h> 12#include <asm/bugs.h>
13#include <asm/processor.h> 13#include <asm/processor.h>
14#include <asm/processor-flags.h> 14#include <asm/processor-flags.h>
15#include <asm/i387.h> 15#include <asm/fpu/internal.h>
16#include <asm/msr.h> 16#include <asm/msr.h>
17#include <asm/paravirt.h> 17#include <asm/paravirt.h>
18#include <asm/alternative.h> 18#include <asm/alternative.h>
19 19
20static double __initdata x = 4195835.0;
21static double __initdata y = 3145727.0;
22
23/*
24 * This used to check for exceptions..
25 * However, it turns out that to support that,
26 * the XMM trap handlers basically had to
27 * be buggy. So let's have a correct XMM trap
28 * handler, and forget about printing out
29 * some status at boot.
30 *
31 * We should really only care about bugs here
32 * anyway. Not features.
33 */
34static void __init check_fpu(void)
35{
36 s32 fdiv_bug;
37
38 kernel_fpu_begin();
39
40 /*
41 * trap_init() enabled FXSR and company _before_ testing for FP
42 * problems here.
43 *
44 * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug
45 */
46 __asm__("fninit\n\t"
47 "fldl %1\n\t"
48 "fdivl %2\n\t"
49 "fmull %2\n\t"
50 "fldl %1\n\t"
51 "fsubp %%st,%%st(1)\n\t"
52 "fistpl %0\n\t"
53 "fwait\n\t"
54 "fninit"
55 : "=m" (*&fdiv_bug)
56 : "m" (*&x), "m" (*&y));
57
58 kernel_fpu_end();
59
60 if (fdiv_bug) {
61 set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV);
62 pr_warn("Hmm, FPU with FDIV bug\n");
63 }
64}
65
66void __init check_bugs(void) 20void __init check_bugs(void)
67{ 21{
68 identify_boot_cpu(); 22 identify_boot_cpu();
@@ -85,10 +39,5 @@ void __init check_bugs(void)
85 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); 39 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
86 alternative_instructions(); 40 alternative_instructions();
87 41
88 /* 42 fpu__init_check_bugs();
89 * kernel_fpu_begin/end() in check_fpu() relies on the patched
90 * alternative instructions.
91 */
92 if (cpu_has_fpu)
93 check_fpu();
94} 43}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 351197cbbc8e..b28e5262a0a5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -32,8 +32,7 @@
32#include <asm/setup.h> 32#include <asm/setup.h>
33#include <asm/apic.h> 33#include <asm/apic.h>
34#include <asm/desc.h> 34#include <asm/desc.h>
35#include <asm/i387.h> 35#include <asm/fpu/internal.h>
36#include <asm/fpu-internal.h>
37#include <asm/mtrr.h> 36#include <asm/mtrr.h>
38#include <linux/numa.h> 37#include <linux/numa.h>
39#include <asm/asm.h> 38#include <asm/asm.h>
@@ -146,32 +145,21 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
146} }; 145} };
147EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 146EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
148 147
149static int __init x86_xsave_setup(char *s) 148static int __init x86_mpx_setup(char *s)
150{ 149{
150 /* require an exact match without trailing characters */
151 if (strlen(s)) 151 if (strlen(s))
152 return 0; 152 return 0;
153 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
154 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
155 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
156 setup_clear_cpu_cap(X86_FEATURE_AVX);
157 setup_clear_cpu_cap(X86_FEATURE_AVX2);
158 return 1;
159}
160__setup("noxsave", x86_xsave_setup);
161 153
162static int __init x86_xsaveopt_setup(char *s) 154 /* do not emit a message if the feature is not present */
163{ 155 if (!boot_cpu_has(X86_FEATURE_MPX))
164 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); 156 return 1;
165 return 1;
166}
167__setup("noxsaveopt", x86_xsaveopt_setup);
168 157
169static int __init x86_xsaves_setup(char *s) 158 setup_clear_cpu_cap(X86_FEATURE_MPX);
170{ 159 pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n");
171 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
172 return 1; 160 return 1;
173} 161}
174__setup("noxsaves", x86_xsaves_setup); 162__setup("nompx", x86_mpx_setup);
175 163
176#ifdef CONFIG_X86_32 164#ifdef CONFIG_X86_32
177static int cachesize_override = -1; 165static int cachesize_override = -1;
@@ -184,14 +172,6 @@ static int __init cachesize_setup(char *str)
184} 172}
185__setup("cachesize=", cachesize_setup); 173__setup("cachesize=", cachesize_setup);
186 174
187static int __init x86_fxsr_setup(char *s)
188{
189 setup_clear_cpu_cap(X86_FEATURE_FXSR);
190 setup_clear_cpu_cap(X86_FEATURE_XMM);
191 return 1;
192}
193__setup("nofxsr", x86_fxsr_setup);
194
195static int __init x86_sep_setup(char *s) 175static int __init x86_sep_setup(char *s)
196{ 176{
197 setup_clear_cpu_cap(X86_FEATURE_SEP); 177 setup_clear_cpu_cap(X86_FEATURE_SEP);
@@ -762,7 +742,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
762 cpu_detect(c); 742 cpu_detect(c);
763 get_cpu_vendor(c); 743 get_cpu_vendor(c);
764 get_cpu_cap(c); 744 get_cpu_cap(c);
765 fpu_detect(c); 745 fpu__init_system(c);
766 746
767 if (this_cpu->c_early_init) 747 if (this_cpu->c_early_init)
768 this_cpu->c_early_init(c); 748 this_cpu->c_early_init(c);
@@ -1186,8 +1166,6 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
1186DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; 1166DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
1187EXPORT_PER_CPU_SYMBOL(__preempt_count); 1167EXPORT_PER_CPU_SYMBOL(__preempt_count);
1188 1168
1189DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
1190
1191/* 1169/*
1192 * Special IST stacks which the CPU switches to when it calls 1170 * Special IST stacks which the CPU switches to when it calls
1193 * an IST-marked descriptor entry. Up to 7 stacks (hardware 1171 * an IST-marked descriptor entry. Up to 7 stacks (hardware
@@ -1278,7 +1256,6 @@ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
1278EXPORT_PER_CPU_SYMBOL(current_task); 1256EXPORT_PER_CPU_SYMBOL(current_task);
1279DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; 1257DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
1280EXPORT_PER_CPU_SYMBOL(__preempt_count); 1258EXPORT_PER_CPU_SYMBOL(__preempt_count);
1281DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
1282 1259
1283/* 1260/*
1284 * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find 1261 * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
@@ -1442,7 +1419,7 @@ void cpu_init(void)
1442 clear_all_debug_regs(); 1419 clear_all_debug_regs();
1443 dbg_restore_debug_regs(); 1420 dbg_restore_debug_regs();
1444 1421
1445 fpu_init(); 1422 fpu__init_cpu();
1446 1423
1447 if (is_uv_system()) 1424 if (is_uv_system())
1448 uv_cpu_init(); 1425 uv_cpu_init();
@@ -1498,7 +1475,7 @@ void cpu_init(void)
1498 clear_all_debug_regs(); 1475 clear_all_debug_regs();
1499 dbg_restore_debug_regs(); 1476 dbg_restore_debug_regs();
1500 1477
1501 fpu_init(); 1478 fpu__init_cpu();
1502} 1479}
1503#endif 1480#endif
1504 1481
diff --git a/arch/x86/kernel/fpu/Makefile b/arch/x86/kernel/fpu/Makefile
new file mode 100644
index 000000000000..68279efb811a
--- /dev/null
+++ b/arch/x86/kernel/fpu/Makefile
@@ -0,0 +1,5 @@
1#
2# Build rules for the FPU support code:
3#
4
5obj-y += init.o bugs.o core.o regset.o signal.o xstate.o
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
new file mode 100644
index 000000000000..dd9ca9b60ff3
--- /dev/null
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -0,0 +1,71 @@
1/*
2 * x86 FPU bug checks:
3 */
4#include <asm/fpu/internal.h>
5
6/*
7 * Boot time CPU/FPU FDIV bug detection code:
8 */
9
10static double __initdata x = 4195835.0;
11static double __initdata y = 3145727.0;
12
13/*
14 * This used to check for exceptions..
15 * However, it turns out that to support that,
16 * the XMM trap handlers basically had to
17 * be buggy. So let's have a correct XMM trap
18 * handler, and forget about printing out
19 * some status at boot.
20 *
21 * We should really only care about bugs here
22 * anyway. Not features.
23 */
24static void __init check_fpu(void)
25{
26 u32 cr0_saved;
27 s32 fdiv_bug;
28
29 /* We might have CR0::TS set already, clear it: */
30 cr0_saved = read_cr0();
31 write_cr0(cr0_saved & ~X86_CR0_TS);
32
33 kernel_fpu_begin();
34
35 /*
36 * trap_init() enabled FXSR and company _before_ testing for FP
37 * problems here.
38 *
39 * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug
40 */
41 __asm__("fninit\n\t"
42 "fldl %1\n\t"
43 "fdivl %2\n\t"
44 "fmull %2\n\t"
45 "fldl %1\n\t"
46 "fsubp %%st,%%st(1)\n\t"
47 "fistpl %0\n\t"
48 "fwait\n\t"
49 "fninit"
50 : "=m" (*&fdiv_bug)
51 : "m" (*&x), "m" (*&y));
52
53 kernel_fpu_end();
54
55 write_cr0(cr0_saved);
56
57 if (fdiv_bug) {
58 set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV);
59 pr_warn("Hmm, FPU with FDIV bug\n");
60 }
61}
62
63void __init fpu__init_check_bugs(void)
64{
65 /*
66 * kernel_fpu_begin/end() in check_fpu() relies on the patched
67 * alternative instructions.
68 */
69 if (cpu_has_fpu)
70 check_fpu();
71}
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
new file mode 100644
index 000000000000..79de954626fd
--- /dev/null
+++ b/arch/x86/kernel/fpu/core.c
@@ -0,0 +1,523 @@
1/*
2 * Copyright (C) 1994 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * General FPU state handling cleanups
6 * Gareth Hughes <gareth@valinux.com>, May 2000
7 */
8#include <asm/fpu/internal.h>
9#include <asm/fpu/regset.h>
10#include <asm/fpu/signal.h>
11#include <asm/traps.h>
12
13#include <linux/hardirq.h>
14
15/*
16 * Represents the initial FPU state. It's mostly (but not completely) zeroes,
17 * depending on the FPU hardware format:
18 */
19union fpregs_state init_fpstate __read_mostly;
20
21/*
22 * Track whether the kernel is using the FPU state
23 * currently.
24 *
25 * This flag is used:
26 *
27 * - by IRQ context code to potentially use the FPU
28 * if it's unused.
29 *
30 * - to debug kernel_fpu_begin()/end() correctness
31 */
32static DEFINE_PER_CPU(bool, in_kernel_fpu);
33
34/*
35 * Track which context is using the FPU on the CPU:
36 */
37DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
38
39static void kernel_fpu_disable(void)
40{
41 WARN_ON_FPU(this_cpu_read(in_kernel_fpu));
42 this_cpu_write(in_kernel_fpu, true);
43}
44
45static void kernel_fpu_enable(void)
46{
47 WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
48 this_cpu_write(in_kernel_fpu, false);
49}
50
51static bool kernel_fpu_disabled(void)
52{
53 return this_cpu_read(in_kernel_fpu);
54}
55
56/*
57 * Were we in an interrupt that interrupted kernel mode?
58 *
59 * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
60 * pair does nothing at all: the thread must not have fpu (so
61 * that we don't try to save the FPU state), and TS must
62 * be set (so that the clts/stts pair does nothing that is
63 * visible in the interrupted kernel thread).
64 *
65 * Except for the eagerfpu case when we return true; in the likely case
66 * the thread has FPU but we are not going to set/clear TS.
67 */
68static bool interrupted_kernel_fpu_idle(void)
69{
70 if (kernel_fpu_disabled())
71 return false;
72
73 if (use_eager_fpu())
74 return true;
75
76 return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS);
77}
78
79/*
80 * Were we in user mode (or vm86 mode) when we were
81 * interrupted?
82 *
83 * Doing kernel_fpu_begin/end() is ok if we are running
84 * in an interrupt context from user mode - we'll just
85 * save the FPU state as required.
86 */
87static bool interrupted_user_mode(void)
88{
89 struct pt_regs *regs = get_irq_regs();
90 return regs && user_mode(regs);
91}
92
93/*
94 * Can we use the FPU in kernel mode with the
95 * whole "kernel_fpu_begin/end()" sequence?
96 *
97 * It's always ok in process context (ie "not interrupt")
98 * but it is sometimes ok even from an irq.
99 */
100bool irq_fpu_usable(void)
101{
102 return !in_interrupt() ||
103 interrupted_user_mode() ||
104 interrupted_kernel_fpu_idle();
105}
106EXPORT_SYMBOL(irq_fpu_usable);
107
108void __kernel_fpu_begin(void)
109{
110 struct fpu *fpu = &current->thread.fpu;
111
112 WARN_ON_FPU(!irq_fpu_usable());
113
114 kernel_fpu_disable();
115
116 if (fpu->fpregs_active) {
117 copy_fpregs_to_fpstate(fpu);
118 } else {
119 this_cpu_write(fpu_fpregs_owner_ctx, NULL);
120 __fpregs_activate_hw();
121 }
122}
123EXPORT_SYMBOL(__kernel_fpu_begin);
124
125void __kernel_fpu_end(void)
126{
127 struct fpu *fpu = &current->thread.fpu;
128
129 if (fpu->fpregs_active)
130 copy_kernel_to_fpregs(&fpu->state);
131 else
132 __fpregs_deactivate_hw();
133
134 kernel_fpu_enable();
135}
136EXPORT_SYMBOL(__kernel_fpu_end);
137
138void kernel_fpu_begin(void)
139{
140 preempt_disable();
141 __kernel_fpu_begin();
142}
143EXPORT_SYMBOL_GPL(kernel_fpu_begin);
144
145void kernel_fpu_end(void)
146{
147 __kernel_fpu_end();
148 preempt_enable();
149}
150EXPORT_SYMBOL_GPL(kernel_fpu_end);
151
152/*
153 * CR0::TS save/restore functions:
154 */
155int irq_ts_save(void)
156{
157 /*
158 * If in process context and not atomic, we can take a spurious DNA fault.
159 * Otherwise, doing clts() in process context requires disabling preemption
160 * or some heavy lifting like kernel_fpu_begin()
161 */
162 if (!in_atomic())
163 return 0;
164
165 if (read_cr0() & X86_CR0_TS) {
166 clts();
167 return 1;
168 }
169
170 return 0;
171}
172EXPORT_SYMBOL_GPL(irq_ts_save);
173
174void irq_ts_restore(int TS_state)
175{
176 if (TS_state)
177 stts();
178}
179EXPORT_SYMBOL_GPL(irq_ts_restore);
180
181/*
182 * Save the FPU state (mark it for reload if necessary):
183 *
184 * This only ever gets called for the current task.
185 */
186void fpu__save(struct fpu *fpu)
187{
188 WARN_ON_FPU(fpu != &current->thread.fpu);
189
190 preempt_disable();
191 if (fpu->fpregs_active) {
192 if (!copy_fpregs_to_fpstate(fpu))
193 fpregs_deactivate(fpu);
194 }
195 preempt_enable();
196}
197EXPORT_SYMBOL_GPL(fpu__save);
198
199/*
200 * Legacy x87 fpstate state init:
201 */
202static inline void fpstate_init_fstate(struct fregs_state *fp)
203{
204 fp->cwd = 0xffff037fu;
205 fp->swd = 0xffff0000u;
206 fp->twd = 0xffffffffu;
207 fp->fos = 0xffff0000u;
208}
209
210void fpstate_init(union fpregs_state *state)
211{
212 if (!cpu_has_fpu) {
213 fpstate_init_soft(&state->soft);
214 return;
215 }
216
217 memset(state, 0, xstate_size);
218
219 if (cpu_has_fxsr)
220 fpstate_init_fxstate(&state->fxsave);
221 else
222 fpstate_init_fstate(&state->fsave);
223}
224EXPORT_SYMBOL_GPL(fpstate_init);
225
226/*
227 * Copy the current task's FPU state to a new task's FPU context.
228 *
229 * In both the 'eager' and the 'lazy' case we save hardware registers
230 * directly to the destination buffer.
231 */
232static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
233{
234 WARN_ON_FPU(src_fpu != &current->thread.fpu);
235
236 /*
237 * Don't let 'init optimized' areas of the XSAVE area
238 * leak into the child task:
239 */
240 if (use_eager_fpu())
241 memset(&dst_fpu->state.xsave, 0, xstate_size);
242
243 /*
244 * Save current FPU registers directly into the child
245 * FPU context, without any memory-to-memory copying.
246 *
247 * If the FPU context got destroyed in the process (FNSAVE
248 * done on old CPUs) then copy it back into the source
249 * context and mark the current task for lazy restore.
250 *
251 * We have to do all this with preemption disabled,
252 * mostly because of the FNSAVE case, because in that
253 * case we must not allow preemption in the window
254 * between the FNSAVE and us marking the context lazy.
255 *
256 * It shouldn't be an issue as even FNSAVE is plenty
257 * fast in terms of critical section length.
258 */
259 preempt_disable();
260 if (!copy_fpregs_to_fpstate(dst_fpu)) {
261 memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
262 fpregs_deactivate(src_fpu);
263 }
264 preempt_enable();
265}
266
267int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
268{
269 dst_fpu->counter = 0;
270 dst_fpu->fpregs_active = 0;
271 dst_fpu->last_cpu = -1;
272
273 if (src_fpu->fpstate_active)
274 fpu_copy(dst_fpu, src_fpu);
275
276 return 0;
277}
278
279/*
280 * Activate the current task's in-memory FPU context,
281 * if it has not been used before:
282 */
283void fpu__activate_curr(struct fpu *fpu)
284{
285 WARN_ON_FPU(fpu != &current->thread.fpu);
286
287 if (!fpu->fpstate_active) {
288 fpstate_init(&fpu->state);
289
290 /* Safe to do for the current task: */
291 fpu->fpstate_active = 1;
292 }
293}
294EXPORT_SYMBOL_GPL(fpu__activate_curr);
295
296/*
297 * This function must be called before we read a task's fpstate.
298 *
299 * If the task has not used the FPU before then initialize its
300 * fpstate.
301 *
302 * If the task has used the FPU before then save it.
303 */
304void fpu__activate_fpstate_read(struct fpu *fpu)
305{
306 /*
307 * If fpregs are active (in the current CPU), then
308 * copy them to the fpstate:
309 */
310 if (fpu->fpregs_active) {
311 fpu__save(fpu);
312 } else {
313 if (!fpu->fpstate_active) {
314 fpstate_init(&fpu->state);
315
316 /* Safe to do for current and for stopped child tasks: */
317 fpu->fpstate_active = 1;
318 }
319 }
320}
321
322/*
323 * This function must be called before we write a task's fpstate.
324 *
325 * If the task has used the FPU before then unlazy it.
326 * If the task has not used the FPU before then initialize its fpstate.
327 *
328 * After this function call, after registers in the fpstate are
329 * modified and the child task has woken up, the child task will
330 * restore the modified FPU state from the modified context. If we
331 * didn't clear its lazy status here then the lazy in-registers
332 * state pending on its former CPU could be restored, corrupting
333 * the modifications.
334 */
335void fpu__activate_fpstate_write(struct fpu *fpu)
336{
337 /*
338 * Only stopped child tasks can be used to modify the FPU
339 * state in the fpstate buffer:
340 */
341 WARN_ON_FPU(fpu == &current->thread.fpu);
342
343 if (fpu->fpstate_active) {
344 /* Invalidate any lazy state: */
345 fpu->last_cpu = -1;
346 } else {
347 fpstate_init(&fpu->state);
348
349 /* Safe to do for stopped child tasks: */
350 fpu->fpstate_active = 1;
351 }
352}
353
354/*
355 * 'fpu__restore()' is called to copy FPU registers from
356 * the FPU fpstate to the live hw registers and to activate
357 * access to the hardware registers, so that FPU instructions
358 * can be used afterwards.
359 *
360 * Must be called with kernel preemption disabled (for example
361 * with local interrupts disabled, as it is in the case of
362 * do_device_not_available()).
363 */
364void fpu__restore(struct fpu *fpu)
365{
366 fpu__activate_curr(fpu);
367
368 /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
369 kernel_fpu_disable();
370 fpregs_activate(fpu);
371 copy_kernel_to_fpregs(&fpu->state);
372 fpu->counter++;
373 kernel_fpu_enable();
374}
375EXPORT_SYMBOL_GPL(fpu__restore);
376
377/*
378 * Drops current FPU state: deactivates the fpregs and
379 * the fpstate. NOTE: it still leaves previous contents
380 * in the fpregs in the eager-FPU case.
381 *
382 * This function can be used in cases where we know that
383 * a state-restore is coming: either an explicit one,
384 * or a reschedule.
385 */
386void fpu__drop(struct fpu *fpu)
387{
388 preempt_disable();
389 fpu->counter = 0;
390
391 if (fpu->fpregs_active) {
392 /* Ignore delayed exceptions from user space */
393 asm volatile("1: fwait\n"
394 "2:\n"
395 _ASM_EXTABLE(1b, 2b));
396 fpregs_deactivate(fpu);
397 }
398
399 fpu->fpstate_active = 0;
400
401 preempt_enable();
402}
403
404/*
405 * Clear FPU registers by setting them up from
406 * the init fpstate:
407 */
408static inline void copy_init_fpstate_to_fpregs(void)
409{
410 if (use_xsave())
411 copy_kernel_to_xregs(&init_fpstate.xsave, -1);
412 else
413 copy_kernel_to_fxregs(&init_fpstate.fxsave);
414}
415
416/*
417 * Clear the FPU state back to init state.
418 *
419 * Called by sys_execve(), by the signal handler code and by various
420 * error paths.
421 */
422void fpu__clear(struct fpu *fpu)
423{
424 WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
425
426 if (!use_eager_fpu()) {
427 /* FPU state will be reallocated lazily at the first use. */
428 fpu__drop(fpu);
429 } else {
430 if (!fpu->fpstate_active) {
431 fpu__activate_curr(fpu);
432 user_fpu_begin();
433 }
434 copy_init_fpstate_to_fpregs();
435 }
436}
437
438/*
439 * x87 math exception handling:
440 */
441
442static inline unsigned short get_fpu_cwd(struct fpu *fpu)
443{
444 if (cpu_has_fxsr) {
445 return fpu->state.fxsave.cwd;
446 } else {
447 return (unsigned short)fpu->state.fsave.cwd;
448 }
449}
450
451static inline unsigned short get_fpu_swd(struct fpu *fpu)
452{
453 if (cpu_has_fxsr) {
454 return fpu->state.fxsave.swd;
455 } else {
456 return (unsigned short)fpu->state.fsave.swd;
457 }
458}
459
460static inline unsigned short get_fpu_mxcsr(struct fpu *fpu)
461{
462 if (cpu_has_xmm) {
463 return fpu->state.fxsave.mxcsr;
464 } else {
465 return MXCSR_DEFAULT;
466 }
467}
468
469int fpu__exception_code(struct fpu *fpu, int trap_nr)
470{
471 int err;
472
473 if (trap_nr == X86_TRAP_MF) {
474 unsigned short cwd, swd;
475 /*
476 * (~cwd & swd) will mask out exceptions that are not set to unmasked
477 * status. 0x3f is the exception bits in these regs, 0x200 is the
478 * C1 reg you need in case of a stack fault, 0x040 is the stack
479 * fault bit. We should only be taking one exception at a time,
480 * so if this combination doesn't produce any single exception,
481 * then we have a bad program that isn't synchronizing its FPU usage
482 * and it will suffer the consequences since we won't be able to
483 * fully reproduce the context of the exception
484 */
485 cwd = get_fpu_cwd(fpu);
486 swd = get_fpu_swd(fpu);
487
488 err = swd & ~cwd;
489 } else {
490 /*
491 * The SIMD FPU exceptions are handled a little differently, as there
492 * is only a single status/control register. Thus, to determine which
493 * unmasked exception was caught we must mask the exception mask bits
494 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
495 */
496 unsigned short mxcsr = get_fpu_mxcsr(fpu);
497 err = ~(mxcsr >> 7) & mxcsr;
498 }
499
500 if (err & 0x001) { /* Invalid op */
501 /*
502 * swd & 0x240 == 0x040: Stack Underflow
503 * swd & 0x240 == 0x240: Stack Overflow
504 * User must clear the SF bit (0x40) if set
505 */
506 return FPE_FLTINV;
507 } else if (err & 0x004) { /* Divide by Zero */
508 return FPE_FLTDIV;
509 } else if (err & 0x008) { /* Overflow */
510 return FPE_FLTOVF;
511 } else if (err & 0x012) { /* Denormal, Underflow */
512 return FPE_FLTUND;
513 } else if (err & 0x020) { /* Precision */
514 return FPE_FLTRES;
515 }
516
517 /*
518 * If we're using IRQ 13, or supposedly even some trap
519 * X86_TRAP_MF implementations, it's possible
520 * we get a spurious trap, which is not an error.
521 */
522 return 0;
523}
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
new file mode 100644
index 000000000000..fc878fee6a51
--- /dev/null
+++ b/arch/x86/kernel/fpu/init.c
@@ -0,0 +1,354 @@
1/*
2 * x86 FPU boot time init code:
3 */
4#include <asm/fpu/internal.h>
5#include <asm/tlbflush.h>
6
7/*
8 * Initialize the TS bit in CR0 according to the style of context-switches
9 * we are using:
10 */
11static void fpu__init_cpu_ctx_switch(void)
12{
13 if (!cpu_has_eager_fpu)
14 stts();
15 else
16 clts();
17}
18
19/*
20 * Initialize the registers found in all CPUs, CR0 and CR4:
21 */
22static void fpu__init_cpu_generic(void)
23{
24 unsigned long cr0;
25 unsigned long cr4_mask = 0;
26
27 if (cpu_has_fxsr)
28 cr4_mask |= X86_CR4_OSFXSR;
29 if (cpu_has_xmm)
30 cr4_mask |= X86_CR4_OSXMMEXCPT;
31 if (cr4_mask)
32 cr4_set_bits(cr4_mask);
33
34 cr0 = read_cr0();
35 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
36 if (!cpu_has_fpu)
37 cr0 |= X86_CR0_EM;
38 write_cr0(cr0);
39
40 /* Flush out any pending x87 state: */
41 asm volatile ("fninit");
42}
43
44/*
45 * Enable all supported FPU features. Called when a CPU is brought online:
46 */
47void fpu__init_cpu(void)
48{
49 fpu__init_cpu_generic();
50 fpu__init_cpu_xstate();
51 fpu__init_cpu_ctx_switch();
52}
53
54/*
55 * The earliest FPU detection code.
56 *
57 * Set the X86_FEATURE_FPU CPU-capability bit based on
58 * trying to execute an actual sequence of FPU instructions:
59 */
60static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
61{
62 unsigned long cr0;
63 u16 fsw, fcw;
64
65 fsw = fcw = 0xffff;
66
67 cr0 = read_cr0();
68 cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
69 write_cr0(cr0);
70
71 asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
72 : "+m" (fsw), "+m" (fcw));
73
74 if (fsw == 0 && (fcw & 0x103f) == 0x003f)
75 set_cpu_cap(c, X86_FEATURE_FPU);
76 else
77 clear_cpu_cap(c, X86_FEATURE_FPU);
78
79#ifndef CONFIG_MATH_EMULATION
80 if (!cpu_has_fpu) {
81 pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n");
82 for (;;)
83 asm volatile("hlt");
84 }
85#endif
86}
87
88/*
89 * Boot time FPU feature detection code:
90 */
91unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
92
93static void __init fpu__init_system_mxcsr(void)
94{
95 unsigned int mask = 0;
96
97 if (cpu_has_fxsr) {
98 struct fxregs_state fx_tmp __aligned(32) = { };
99
100 asm volatile("fxsave %0" : "+m" (fx_tmp));
101
102 mask = fx_tmp.mxcsr_mask;
103
104 /*
105 * If zero then use the default features mask,
106 * which has all features set, except the
107 * denormals-are-zero feature bit:
108 */
109 if (mask == 0)
110 mask = 0x0000ffbf;
111 }
112 mxcsr_feature_mask &= mask;
113}
114
115/*
116 * Once per bootup FPU initialization sequences that will run on most x86 CPUs:
117 */
118static void __init fpu__init_system_generic(void)
119{
120 /*
121 * Set up the legacy init FPU context. (xstate init might overwrite this
122 * with a more modern format, if the CPU supports it.)
123 */
124 fpstate_init_fxstate(&init_fpstate.fxsave);
125
126 fpu__init_system_mxcsr();
127}
128
129/*
130 * Size of the FPU context state. All tasks in the system use the
131 * same context size, regardless of what portion they use.
132 * This is inherent to the XSAVE architecture which puts all state
133 * components into a single, continuous memory block:
134 */
135unsigned int xstate_size;
136EXPORT_SYMBOL_GPL(xstate_size);
137
138/*
139 * Set up the xstate_size based on the legacy FPU context size.
140 *
141 * We set this up first, and later it will be overwritten by
142 * fpu__init_system_xstate() if the CPU knows about xstates.
143 */
144static void __init fpu__init_system_xstate_size_legacy(void)
145{
146 static int on_boot_cpu = 1;
147
148 WARN_ON_FPU(!on_boot_cpu);
149 on_boot_cpu = 0;
150
151 /*
152 * Note that xstate_size might be overwriten later during
153 * fpu__init_system_xstate().
154 */
155
156 if (!cpu_has_fpu) {
157 /*
158 * Disable xsave as we do not support it if i387
159 * emulation is enabled.
160 */
161 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
162 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
163 xstate_size = sizeof(struct swregs_state);
164 } else {
165 if (cpu_has_fxsr)
166 xstate_size = sizeof(struct fxregs_state);
167 else
168 xstate_size = sizeof(struct fregs_state);
169 }
170 /*
171 * Quirk: we don't yet handle the XSAVES* instructions
172 * correctly, as we don't correctly convert between
173 * standard and compacted format when interfacing
174 * with user-space - so disable it for now.
175 *
176 * The difference is small: with recent CPUs the
177 * compacted format is only marginally smaller than
178 * the standard FPU state format.
179 *
180 * ( This is easy to backport while we are fixing
181 * XSAVES* support. )
182 */
183 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
184}
185
186/*
187 * FPU context switching strategies:
188 *
189 * Against popular belief, we don't do lazy FPU saves, due to the
190 * task migration complications it brings on SMP - we only do
191 * lazy FPU restores.
192 *
193 * 'lazy' is the traditional strategy, which is based on setting
194 * CR0::TS to 1 during context-switch (instead of doing a full
195 * restore of the FPU state), which causes the first FPU instruction
196 * after the context switch (whenever it is executed) to fault - at
197 * which point we lazily restore the FPU state into FPU registers.
198 *
199 * Tasks are of course under no obligation to execute FPU instructions,
200 * so it can easily happen that another context-switch occurs without
201 * a single FPU instruction being executed. If we eventually switch
202 * back to the original task (that still owns the FPU) then we have
203 * not only saved the restores along the way, but we also have the
204 * FPU ready to be used for the original task.
205 *
206 * 'eager' switching is used on modern CPUs, there we switch the FPU
207 * state during every context switch, regardless of whether the task
208 * has used FPU instructions in that time slice or not. This is done
209 * because modern FPU context saving instructions are able to optimize
210 * state saving and restoration in hardware: they can detect both
211 * unused and untouched FPU state and optimize accordingly.
212 *
213 * [ Note that even in 'lazy' mode we might optimize context switches
214 * to use 'eager' restores, if we detect that a task is using the FPU
215 * frequently. See the fpu->counter logic in fpu/internal.h for that. ]
216 */
217static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
218
219static int __init eager_fpu_setup(char *s)
220{
221 if (!strcmp(s, "on"))
222 eagerfpu = ENABLE;
223 else if (!strcmp(s, "off"))
224 eagerfpu = DISABLE;
225 else if (!strcmp(s, "auto"))
226 eagerfpu = AUTO;
227 return 1;
228}
229__setup("eagerfpu=", eager_fpu_setup);
230
231/*
232 * Pick the FPU context switching strategy:
233 */
234static void __init fpu__init_system_ctx_switch(void)
235{
236 static bool on_boot_cpu = 1;
237
238 WARN_ON_FPU(!on_boot_cpu);
239 on_boot_cpu = 0;
240
241 WARN_ON_FPU(current->thread.fpu.fpstate_active);
242 current_thread_info()->status = 0;
243
244 /* Auto enable eagerfpu for xsaveopt */
245 if (cpu_has_xsaveopt && eagerfpu != DISABLE)
246 eagerfpu = ENABLE;
247
248 if (xfeatures_mask & XSTATE_EAGER) {
249 if (eagerfpu == DISABLE) {
250 pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n",
251 xfeatures_mask & XSTATE_EAGER);
252 xfeatures_mask &= ~XSTATE_EAGER;
253 } else {
254 eagerfpu = ENABLE;
255 }
256 }
257
258 if (eagerfpu == ENABLE)
259 setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
260
261 printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy");
262}
263
264/*
265 * Called on the boot CPU once per system bootup, to set up the initial
266 * FPU state that is later cloned into all processes:
267 */
268void __init fpu__init_system(struct cpuinfo_x86 *c)
269{
270 fpu__init_system_early_generic(c);
271
272 /*
273 * The FPU has to be operational for some of the
274 * later FPU init activities:
275 */
276 fpu__init_cpu();
277
278 /*
279 * But don't leave CR0::TS set yet, as some of the FPU setup
280 * methods depend on being able to execute FPU instructions
281 * that will fault on a set TS, such as the FXSAVE in
282 * fpu__init_system_mxcsr().
283 */
284 clts();
285
286 fpu__init_system_generic();
287 fpu__init_system_xstate_size_legacy();
288 fpu__init_system_xstate();
289
290 fpu__init_system_ctx_switch();
291}
292
293/*
294 * Boot parameter to turn off FPU support and fall back to math-emu:
295 */
296static int __init no_387(char *s)
297{
298 setup_clear_cpu_cap(X86_FEATURE_FPU);
299 return 1;
300}
301__setup("no387", no_387);
302
303/*
304 * Disable all xstate CPU features:
305 */
306static int __init x86_noxsave_setup(char *s)
307{
308 if (strlen(s))
309 return 0;
310
311 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
312 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
313 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
314 setup_clear_cpu_cap(X86_FEATURE_AVX);
315 setup_clear_cpu_cap(X86_FEATURE_AVX2);
316
317 return 1;
318}
319__setup("noxsave", x86_noxsave_setup);
320
321/*
322 * Disable the XSAVEOPT instruction specifically:
323 */
324static int __init x86_noxsaveopt_setup(char *s)
325{
326 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
327
328 return 1;
329}
330__setup("noxsaveopt", x86_noxsaveopt_setup);
331
332/*
333 * Disable the XSAVES instruction:
334 */
335static int __init x86_noxsaves_setup(char *s)
336{
337 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
338
339 return 1;
340}
341__setup("noxsaves", x86_noxsaves_setup);
342
343/*
344 * Disable FX save/restore and SSE support:
345 */
346static int __init x86_nofxsr_setup(char *s)
347{
348 setup_clear_cpu_cap(X86_FEATURE_FXSR);
349 setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT);
350 setup_clear_cpu_cap(X86_FEATURE_XMM);
351
352 return 1;
353}
354__setup("nofxsr", x86_nofxsr_setup);
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
new file mode 100644
index 000000000000..dc60810c1c74
--- /dev/null
+++ b/arch/x86/kernel/fpu/regset.c
@@ -0,0 +1,356 @@
1/*
2 * FPU register's regset abstraction, for ptrace, core dumps, etc.
3 */
4#include <asm/fpu/internal.h>
5#include <asm/fpu/signal.h>
6#include <asm/fpu/regset.h>
7
8/*
9 * The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
10 * as the "regset->n" for the xstate regset will be updated based on the feature
11 * capabilites supported by the xsave.
12 */
13int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
14{
15 struct fpu *target_fpu = &target->thread.fpu;
16
17 return target_fpu->fpstate_active ? regset->n : 0;
18}
19
20int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
21{
22 struct fpu *target_fpu = &target->thread.fpu;
23
24 return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0;
25}
26
27int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
28 unsigned int pos, unsigned int count,
29 void *kbuf, void __user *ubuf)
30{
31 struct fpu *fpu = &target->thread.fpu;
32
33 if (!cpu_has_fxsr)
34 return -ENODEV;
35
36 fpu__activate_fpstate_read(fpu);
37 fpstate_sanitize_xstate(fpu);
38
39 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
40 &fpu->state.fxsave, 0, -1);
41}
42
43int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
44 unsigned int pos, unsigned int count,
45 const void *kbuf, const void __user *ubuf)
46{
47 struct fpu *fpu = &target->thread.fpu;
48 int ret;
49
50 if (!cpu_has_fxsr)
51 return -ENODEV;
52
53 fpu__activate_fpstate_write(fpu);
54 fpstate_sanitize_xstate(fpu);
55
56 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
57 &fpu->state.fxsave, 0, -1);
58
59 /*
60 * mxcsr reserved bits must be masked to zero for security reasons.
61 */
62 fpu->state.fxsave.mxcsr &= mxcsr_feature_mask;
63
64 /*
65 * update the header bits in the xsave header, indicating the
66 * presence of FP and SSE state.
67 */
68 if (cpu_has_xsave)
69 fpu->state.xsave.header.xfeatures |= XSTATE_FPSSE;
70
71 return ret;
72}
73
74int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
75 unsigned int pos, unsigned int count,
76 void *kbuf, void __user *ubuf)
77{
78 struct fpu *fpu = &target->thread.fpu;
79 struct xregs_state *xsave;
80 int ret;
81
82 if (!cpu_has_xsave)
83 return -ENODEV;
84
85 fpu__activate_fpstate_read(fpu);
86
87 xsave = &fpu->state.xsave;
88
89 /*
90 * Copy the 48bytes defined by the software first into the xstate
91 * memory layout in the thread struct, so that we can copy the entire
92 * xstateregs to the user using one user_regset_copyout().
93 */
94 memcpy(&xsave->i387.sw_reserved,
95 xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
96 /*
97 * Copy the xstate memory layout.
98 */
99 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
100 return ret;
101}
102
103int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
104 unsigned int pos, unsigned int count,
105 const void *kbuf, const void __user *ubuf)
106{
107 struct fpu *fpu = &target->thread.fpu;
108 struct xregs_state *xsave;
109 int ret;
110
111 if (!cpu_has_xsave)
112 return -ENODEV;
113
114 fpu__activate_fpstate_write(fpu);
115
116 xsave = &fpu->state.xsave;
117
118 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
119 /*
120 * mxcsr reserved bits must be masked to zero for security reasons.
121 */
122 xsave->i387.mxcsr &= mxcsr_feature_mask;
123 xsave->header.xfeatures &= xfeatures_mask;
124 /*
125 * These bits must be zero.
126 */
127 memset(&xsave->header.reserved, 0, 48);
128
129 return ret;
130}
131
132#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
133
134/*
135 * FPU tag word conversions.
136 */
137
138static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
139{
140 unsigned int tmp; /* to avoid 16 bit prefixes in the code */
141
142 /* Transform each pair of bits into 01 (valid) or 00 (empty) */
143 tmp = ~twd;
144 tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
145 /* and move the valid bits to the lower byte. */
146 tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
147 tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
148 tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
149
150 return tmp;
151}
152
153#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16)
154#define FP_EXP_TAG_VALID 0
155#define FP_EXP_TAG_ZERO 1
156#define FP_EXP_TAG_SPECIAL 2
157#define FP_EXP_TAG_EMPTY 3
158
159static inline u32 twd_fxsr_to_i387(struct fxregs_state *fxsave)
160{
161 struct _fpxreg *st;
162 u32 tos = (fxsave->swd >> 11) & 7;
163 u32 twd = (unsigned long) fxsave->twd;
164 u32 tag;
165 u32 ret = 0xffff0000u;
166 int i;
167
168 for (i = 0; i < 8; i++, twd >>= 1) {
169 if (twd & 0x1) {
170 st = FPREG_ADDR(fxsave, (i - tos) & 7);
171
172 switch (st->exponent & 0x7fff) {
173 case 0x7fff:
174 tag = FP_EXP_TAG_SPECIAL;
175 break;
176 case 0x0000:
177 if (!st->significand[0] &&
178 !st->significand[1] &&
179 !st->significand[2] &&
180 !st->significand[3])
181 tag = FP_EXP_TAG_ZERO;
182 else
183 tag = FP_EXP_TAG_SPECIAL;
184 break;
185 default:
186 if (st->significand[3] & 0x8000)
187 tag = FP_EXP_TAG_VALID;
188 else
189 tag = FP_EXP_TAG_SPECIAL;
190 break;
191 }
192 } else {
193 tag = FP_EXP_TAG_EMPTY;
194 }
195 ret |= tag << (2 * i);
196 }
197 return ret;
198}
199
200/*
201 * FXSR floating point environment conversions.
202 */
203
204void
205convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
206{
207 struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave;
208 struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
209 struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
210 int i;
211
212 env->cwd = fxsave->cwd | 0xffff0000u;
213 env->swd = fxsave->swd | 0xffff0000u;
214 env->twd = twd_fxsr_to_i387(fxsave);
215
216#ifdef CONFIG_X86_64
217 env->fip = fxsave->rip;
218 env->foo = fxsave->rdp;
219 /*
220 * should be actually ds/cs at fpu exception time, but
221 * that information is not available in 64bit mode.
222 */
223 env->fcs = task_pt_regs(tsk)->cs;
224 if (tsk == current) {
225 savesegment(ds, env->fos);
226 } else {
227 env->fos = tsk->thread.ds;
228 }
229 env->fos |= 0xffff0000;
230#else
231 env->fip = fxsave->fip;
232 env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
233 env->foo = fxsave->foo;
234 env->fos = fxsave->fos;
235#endif
236
237 for (i = 0; i < 8; ++i)
238 memcpy(&to[i], &from[i], sizeof(to[0]));
239}
240
241void convert_to_fxsr(struct task_struct *tsk,
242 const struct user_i387_ia32_struct *env)
243
244{
245 struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave;
246 struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
247 struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
248 int i;
249
250 fxsave->cwd = env->cwd;
251 fxsave->swd = env->swd;
252 fxsave->twd = twd_i387_to_fxsr(env->twd);
253 fxsave->fop = (u16) ((u32) env->fcs >> 16);
254#ifdef CONFIG_X86_64
255 fxsave->rip = env->fip;
256 fxsave->rdp = env->foo;
257 /* cs and ds ignored */
258#else
259 fxsave->fip = env->fip;
260 fxsave->fcs = (env->fcs & 0xffff);
261 fxsave->foo = env->foo;
262 fxsave->fos = env->fos;
263#endif
264
265 for (i = 0; i < 8; ++i)
266 memcpy(&to[i], &from[i], sizeof(from[0]));
267}
268
269int fpregs_get(struct task_struct *target, const struct user_regset *regset,
270 unsigned int pos, unsigned int count,
271 void *kbuf, void __user *ubuf)
272{
273 struct fpu *fpu = &target->thread.fpu;
274 struct user_i387_ia32_struct env;
275
276 fpu__activate_fpstate_read(fpu);
277
278 if (!static_cpu_has(X86_FEATURE_FPU))
279 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
280
281 if (!cpu_has_fxsr)
282 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
283 &fpu->state.fsave, 0,
284 -1);
285
286 fpstate_sanitize_xstate(fpu);
287
288 if (kbuf && pos == 0 && count == sizeof(env)) {
289 convert_from_fxsr(kbuf, target);
290 return 0;
291 }
292
293 convert_from_fxsr(&env, target);
294
295 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
296}
297
298int fpregs_set(struct task_struct *target, const struct user_regset *regset,
299 unsigned int pos, unsigned int count,
300 const void *kbuf, const void __user *ubuf)
301{
302 struct fpu *fpu = &target->thread.fpu;
303 struct user_i387_ia32_struct env;
304 int ret;
305
306 fpu__activate_fpstate_write(fpu);
307 fpstate_sanitize_xstate(fpu);
308
309 if (!static_cpu_has(X86_FEATURE_FPU))
310 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
311
312 if (!cpu_has_fxsr)
313 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
314 &fpu->state.fsave, 0,
315 -1);
316
317 if (pos > 0 || count < sizeof(env))
318 convert_from_fxsr(&env, target);
319
320 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
321 if (!ret)
322 convert_to_fxsr(target, &env);
323
324 /*
325 * update the header bit in the xsave header, indicating the
326 * presence of FP.
327 */
328 if (cpu_has_xsave)
329 fpu->state.xsave.header.xfeatures |= XSTATE_FP;
330 return ret;
331}
332
333/*
334 * FPU state for core dumps.
335 * This is only used for a.out dumps now.
336 * It is declared generically using elf_fpregset_t (which is
337 * struct user_i387_struct) but is in fact only used for 32-bit
338 * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
339 */
340int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
341{
342 struct task_struct *tsk = current;
343 struct fpu *fpu = &tsk->thread.fpu;
344 int fpvalid;
345
346 fpvalid = fpu->fpstate_active;
347 if (fpvalid)
348 fpvalid = !fpregs_get(tsk, NULL,
349 0, sizeof(struct user_i387_ia32_struct),
350 ufpu, NULL);
351
352 return fpvalid;
353}
354EXPORT_SYMBOL(dump_fpu);
355
356#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
new file mode 100644
index 000000000000..50ec9af1bd51
--- /dev/null
+++ b/arch/x86/kernel/fpu/signal.c
@@ -0,0 +1,404 @@
1/*
2 * FPU signal frame handling routines.
3 */
4
5#include <linux/compat.h>
6#include <linux/cpu.h>
7
8#include <asm/fpu/internal.h>
9#include <asm/fpu/signal.h>
10#include <asm/fpu/regset.h>
11
12#include <asm/sigframe.h>
13
14static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
15
16/*
17 * Check for the presence of extended state information in the
18 * user fpstate pointer in the sigcontext.
19 */
20static inline int check_for_xstate(struct fxregs_state __user *buf,
21 void __user *fpstate,
22 struct _fpx_sw_bytes *fx_sw)
23{
24 int min_xstate_size = sizeof(struct fxregs_state) +
25 sizeof(struct xstate_header);
26 unsigned int magic2;
27
28 if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw)))
29 return -1;
30
31 /* Check for the first magic field and other error scenarios. */
32 if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
33 fx_sw->xstate_size < min_xstate_size ||
34 fx_sw->xstate_size > xstate_size ||
35 fx_sw->xstate_size > fx_sw->extended_size)
36 return -1;
37
38 /*
39 * Check for the presence of second magic word at the end of memory
40 * layout. This detects the case where the user just copied the legacy
41 * fpstate layout with out copying the extended state information
42 * in the memory layout.
43 */
44 if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))
45 || magic2 != FP_XSTATE_MAGIC2)
46 return -1;
47
48 return 0;
49}
50
51/*
52 * Signal frame handlers.
53 */
54static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
55{
56 if (use_fxsr()) {
57 struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
58 struct user_i387_ia32_struct env;
59 struct _fpstate_ia32 __user *fp = buf;
60
61 convert_from_fxsr(&env, tsk);
62
63 if (__copy_to_user(buf, &env, sizeof(env)) ||
64 __put_user(xsave->i387.swd, &fp->status) ||
65 __put_user(X86_FXSR_MAGIC, &fp->magic))
66 return -1;
67 } else {
68 struct fregs_state __user *fp = buf;
69 u32 swd;
70 if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
71 return -1;
72 }
73
74 return 0;
75}
76
77static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
78{
79 struct xregs_state __user *x = buf;
80 struct _fpx_sw_bytes *sw_bytes;
81 u32 xfeatures;
82 int err;
83
84 /* Setup the bytes not touched by the [f]xsave and reserved for SW. */
85 sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
86 err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
87
88 if (!use_xsave())
89 return err;
90
91 err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
92
93 /*
94 * Read the xfeatures which we copied (directly from the cpu or
95 * from the state in task struct) to the user buffers.
96 */
97 err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures);
98
99 /*
100 * For legacy compatible, we always set FP/SSE bits in the bit
101 * vector while saving the state to the user context. This will
102 * enable us capturing any changes(during sigreturn) to
103 * the FP/SSE bits by the legacy applications which don't touch
104 * xfeatures in the xsave header.
105 *
106 * xsave aware apps can change the xfeatures in the xsave
107 * header as well as change any contents in the memory layout.
108 * xrestore as part of sigreturn will capture all the changes.
109 */
110 xfeatures |= XSTATE_FPSSE;
111
112 err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures);
113
114 return err;
115}
116
117static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
118{
119 int err;
120
121 if (use_xsave())
122 err = copy_xregs_to_user(buf);
123 else if (use_fxsr())
124 err = copy_fxregs_to_user((struct fxregs_state __user *) buf);
125 else
126 err = copy_fregs_to_user((struct fregs_state __user *) buf);
127
128 if (unlikely(err) && __clear_user(buf, xstate_size))
129 err = -EFAULT;
130 return err;
131}
132
133/*
134 * Save the fpu, extended register state to the user signal frame.
135 *
136 * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
137 * state is copied.
138 * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'.
139 *
140 * buf == buf_fx for 64-bit frames and 32-bit fsave frame.
141 * buf != buf_fx for 32-bit frames with fxstate.
142 *
143 * If the fpu, extended register state is live, save the state directly
144 * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
145 * copy the thread's fpu state to the user frame starting at 'buf_fx'.
146 *
147 * If this is a 32-bit frame with fxstate, put a fsave header before
148 * the aligned state at 'buf_fx'.
149 *
150 * For [f]xsave state, update the SW reserved fields in the [f]xsave frame
151 * indicating the absence/presence of the extended state to the user.
152 */
153int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
154{
155 struct xregs_state *xsave = &current->thread.fpu.state.xsave;
156 struct task_struct *tsk = current;
157 int ia32_fxstate = (buf != buf_fx);
158
159 ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
160 config_enabled(CONFIG_IA32_EMULATION));
161
162 if (!access_ok(VERIFY_WRITE, buf, size))
163 return -EACCES;
164
165 if (!static_cpu_has(X86_FEATURE_FPU))
166 return fpregs_soft_get(current, NULL, 0,
167 sizeof(struct user_i387_ia32_struct), NULL,
168 (struct _fpstate_ia32 __user *) buf) ? -1 : 1;
169
170 if (fpregs_active()) {
171 /* Save the live register state to the user directly. */
172 if (copy_fpregs_to_sigframe(buf_fx))
173 return -1;
174 /* Update the thread's fxstate to save the fsave header. */
175 if (ia32_fxstate)
176 copy_fxregs_to_kernel(&tsk->thread.fpu);
177 } else {
178 fpstate_sanitize_xstate(&tsk->thread.fpu);
179 if (__copy_to_user(buf_fx, xsave, xstate_size))
180 return -1;
181 }
182
183 /* Save the fsave header for the 32-bit frames. */
184 if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
185 return -1;
186
187 if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
188 return -1;
189
190 return 0;
191}
192
193static inline void
194sanitize_restored_xstate(struct task_struct *tsk,
195 struct user_i387_ia32_struct *ia32_env,
196 u64 xfeatures, int fx_only)
197{
198 struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
199 struct xstate_header *header = &xsave->header;
200
201 if (use_xsave()) {
202 /* These bits must be zero. */
203 memset(header->reserved, 0, 48);
204
205 /*
206 * Init the state that is not present in the memory
207 * layout and not enabled by the OS.
208 */
209 if (fx_only)
210 header->xfeatures = XSTATE_FPSSE;
211 else
212 header->xfeatures &= (xfeatures_mask & xfeatures);
213 }
214
215 if (use_fxsr()) {
216 /*
217 * mscsr reserved bits must be masked to zero for security
218 * reasons.
219 */
220 xsave->i387.mxcsr &= mxcsr_feature_mask;
221
222 convert_to_fxsr(tsk, ia32_env);
223 }
224}
225
226/*
227 * Restore the extended state if present. Otherwise, restore the FP/SSE state.
228 */
229static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
230{
231 if (use_xsave()) {
232 if ((unsigned long)buf % 64 || fx_only) {
233 u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE;
234 copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
235 return copy_user_to_fxregs(buf);
236 } else {
237 u64 init_bv = xfeatures_mask & ~xbv;
238 if (unlikely(init_bv))
239 copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
240 return copy_user_to_xregs(buf, xbv);
241 }
242 } else if (use_fxsr()) {
243 return copy_user_to_fxregs(buf);
244 } else
245 return copy_user_to_fregs(buf);
246}
247
248static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
249{
250 int ia32_fxstate = (buf != buf_fx);
251 struct task_struct *tsk = current;
252 struct fpu *fpu = &tsk->thread.fpu;
253 int state_size = xstate_size;
254 u64 xfeatures = 0;
255 int fx_only = 0;
256
257 ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
258 config_enabled(CONFIG_IA32_EMULATION));
259
260 if (!buf) {
261 fpu__clear(fpu);
262 return 0;
263 }
264
265 if (!access_ok(VERIFY_READ, buf, size))
266 return -EACCES;
267
268 fpu__activate_curr(fpu);
269
270 if (!static_cpu_has(X86_FEATURE_FPU))
271 return fpregs_soft_set(current, NULL,
272 0, sizeof(struct user_i387_ia32_struct),
273 NULL, buf) != 0;
274
275 if (use_xsave()) {
276 struct _fpx_sw_bytes fx_sw_user;
277 if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) {
278 /*
279 * Couldn't find the extended state information in the
280 * memory layout. Restore just the FP/SSE and init all
281 * the other extended state.
282 */
283 state_size = sizeof(struct fxregs_state);
284 fx_only = 1;
285 } else {
286 state_size = fx_sw_user.xstate_size;
287 xfeatures = fx_sw_user.xfeatures;
288 }
289 }
290
291 if (ia32_fxstate) {
292 /*
293 * For 32-bit frames with fxstate, copy the user state to the
294 * thread's fpu state, reconstruct fxstate from the fsave
295 * header. Sanitize the copied state etc.
296 */
297 struct fpu *fpu = &tsk->thread.fpu;
298 struct user_i387_ia32_struct env;
299 int err = 0;
300
301 /*
302 * Drop the current fpu which clears fpu->fpstate_active. This ensures
303 * that any context-switch during the copy of the new state,
304 * avoids the intermediate state from getting restored/saved.
305 * Thus avoiding the new restored state from getting corrupted.
306 * We will be ready to restore/save the state only after
307 * fpu->fpstate_active is again set.
308 */
309 fpu__drop(fpu);
310
311 if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) ||
312 __copy_from_user(&env, buf, sizeof(env))) {
313 fpstate_init(&fpu->state);
314 err = -1;
315 } else {
316 sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
317 }
318
319 fpu->fpstate_active = 1;
320 if (use_eager_fpu()) {
321 preempt_disable();
322 fpu__restore(fpu);
323 preempt_enable();
324 }
325
326 return err;
327 } else {
328 /*
329 * For 64-bit frames and 32-bit fsave frames, restore the user
330 * state to the registers directly (with exceptions handled).
331 */
332 user_fpu_begin();
333 if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) {
334 fpu__clear(fpu);
335 return -1;
336 }
337 }
338
339 return 0;
340}
341
342static inline int xstate_sigframe_size(void)
343{
344 return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
345}
346
347/*
348 * Restore FPU state from a sigframe:
349 */
350int fpu__restore_sig(void __user *buf, int ia32_frame)
351{
352 void __user *buf_fx = buf;
353 int size = xstate_sigframe_size();
354
355 if (ia32_frame && use_fxsr()) {
356 buf_fx = buf + sizeof(struct fregs_state);
357 size += sizeof(struct fregs_state);
358 }
359
360 return __fpu__restore_sig(buf, buf_fx, size);
361}
362
363unsigned long
364fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
365 unsigned long *buf_fx, unsigned long *size)
366{
367 unsigned long frame_size = xstate_sigframe_size();
368
369 *buf_fx = sp = round_down(sp - frame_size, 64);
370 if (ia32_frame && use_fxsr()) {
371 frame_size += sizeof(struct fregs_state);
372 sp -= sizeof(struct fregs_state);
373 }
374
375 *size = frame_size;
376
377 return sp;
378}
379/*
380 * Prepare the SW reserved portion of the fxsave memory layout, indicating
381 * the presence of the extended state information in the memory layout
382 * pointed by the fpstate pointer in the sigcontext.
383 * This will be saved when ever the FP and extended state context is
384 * saved on the user stack during the signal handler delivery to the user.
385 */
386void fpu__init_prepare_fx_sw_frame(void)
387{
388 int fsave_header_size = sizeof(struct fregs_state);
389 int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
390
391 if (config_enabled(CONFIG_X86_32))
392 size += fsave_header_size;
393
394 fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
395 fx_sw_reserved.extended_size = size;
396 fx_sw_reserved.xfeatures = xfeatures_mask;
397 fx_sw_reserved.xstate_size = xstate_size;
398
399 if (config_enabled(CONFIG_IA32_EMULATION)) {
400 fx_sw_reserved_ia32 = fx_sw_reserved;
401 fx_sw_reserved_ia32.extended_size += fsave_header_size;
402 }
403}
404
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
new file mode 100644
index 000000000000..62fc001c7846
--- /dev/null
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -0,0 +1,461 @@
1/*
2 * xsave/xrstor support.
3 *
4 * Author: Suresh Siddha <suresh.b.siddha@intel.com>
5 */
6#include <linux/compat.h>
7#include <linux/cpu.h>
8
9#include <asm/fpu/api.h>
10#include <asm/fpu/internal.h>
11#include <asm/fpu/signal.h>
12#include <asm/fpu/regset.h>
13
14#include <asm/tlbflush.h>
15
16static const char *xfeature_names[] =
17{
18 "x87 floating point registers" ,
19 "SSE registers" ,
20 "AVX registers" ,
21 "MPX bounds registers" ,
22 "MPX CSR" ,
23 "AVX-512 opmask" ,
24 "AVX-512 Hi256" ,
25 "AVX-512 ZMM_Hi256" ,
26 "unknown xstate feature" ,
27};
28
29/*
30 * Mask of xstate features supported by the CPU and the kernel:
31 */
32u64 xfeatures_mask __read_mostly;
33
34static unsigned int xstate_offsets[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1};
35static unsigned int xstate_sizes[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1};
36static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
37
38/* The number of supported xfeatures in xfeatures_mask: */
39static unsigned int xfeatures_nr;
40
41/*
42 * Return whether the system supports a given xfeature.
43 *
44 * Also return the name of the (most advanced) feature that the caller requested:
45 */
46int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
47{
48 u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask;
49
50 if (unlikely(feature_name)) {
51 long xfeature_idx, max_idx;
52 u64 xfeatures_print;
53 /*
54 * So we use FLS here to be able to print the most advanced
55 * feature that was requested but is missing. So if a driver
56 * asks about "XSTATE_SSE | XSTATE_YMM" we'll print the
57 * missing AVX feature - this is the most informative message
58 * to users:
59 */
60 if (xfeatures_missing)
61 xfeatures_print = xfeatures_missing;
62 else
63 xfeatures_print = xfeatures_needed;
64
65 xfeature_idx = fls64(xfeatures_print)-1;
66 max_idx = ARRAY_SIZE(xfeature_names)-1;
67 xfeature_idx = min(xfeature_idx, max_idx);
68
69 *feature_name = xfeature_names[xfeature_idx];
70 }
71
72 if (xfeatures_missing)
73 return 0;
74
75 return 1;
76}
77EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
78
79/*
80 * When executing XSAVEOPT (or other optimized XSAVE instructions), if
81 * a processor implementation detects that an FPU state component is still
82 * (or is again) in its initialized state, it may clear the corresponding
83 * bit in the header.xfeatures field, and can skip the writeout of registers
84 * to the corresponding memory layout.
85 *
86 * This means that when the bit is zero, the state component might still contain
87 * some previous - non-initialized register state.
88 *
89 * Before writing xstate information to user-space we sanitize those components,
90 * to always ensure that the memory layout of a feature will be in the init state
91 * if the corresponding header bit is zero. This is to ensure that user-space doesn't
92 * see some stale state in the memory layout during signal handling, debugging etc.
93 */
94void fpstate_sanitize_xstate(struct fpu *fpu)
95{
96 struct fxregs_state *fx = &fpu->state.fxsave;
97 int feature_bit;
98 u64 xfeatures;
99
100 if (!use_xsaveopt())
101 return;
102
103 xfeatures = fpu->state.xsave.header.xfeatures;
104
105 /*
106 * None of the feature bits are in init state. So nothing else
107 * to do for us, as the memory layout is up to date.
108 */
109 if ((xfeatures & xfeatures_mask) == xfeatures_mask)
110 return;
111
112 /*
113 * FP is in init state
114 */
115 if (!(xfeatures & XSTATE_FP)) {
116 fx->cwd = 0x37f;
117 fx->swd = 0;
118 fx->twd = 0;
119 fx->fop = 0;
120 fx->rip = 0;
121 fx->rdp = 0;
122 memset(&fx->st_space[0], 0, 128);
123 }
124
125 /*
126 * SSE is in init state
127 */
128 if (!(xfeatures & XSTATE_SSE))
129 memset(&fx->xmm_space[0], 0, 256);
130
131 /*
132 * First two features are FPU and SSE, which above we handled
133 * in a special way already:
134 */
135 feature_bit = 0x2;
136 xfeatures = (xfeatures_mask & ~xfeatures) >> 2;
137
138 /*
139 * Update all the remaining memory layouts according to their
140 * standard xstate layout, if their header bit is in the init
141 * state:
142 */
143 while (xfeatures) {
144 if (xfeatures & 0x1) {
145 int offset = xstate_offsets[feature_bit];
146 int size = xstate_sizes[feature_bit];
147
148 memcpy((void *)fx + offset,
149 (void *)&init_fpstate.xsave + offset,
150 size);
151 }
152
153 xfeatures >>= 1;
154 feature_bit++;
155 }
156}
157
158/*
159 * Enable the extended processor state save/restore feature.
160 * Called once per CPU onlining.
161 */
162void fpu__init_cpu_xstate(void)
163{
164 if (!cpu_has_xsave || !xfeatures_mask)
165 return;
166
167 cr4_set_bits(X86_CR4_OSXSAVE);
168 xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
169}
170
171/*
172 * Record the offsets and sizes of various xstates contained
173 * in the XSAVE state memory layout.
174 *
175 * ( Note that certain features might be non-present, for them
176 * we'll have 0 offset and 0 size. )
177 */
178static void __init setup_xstate_features(void)
179{
180 u32 eax, ebx, ecx, edx, leaf;
181
182 xfeatures_nr = fls64(xfeatures_mask);
183
184 for (leaf = 2; leaf < xfeatures_nr; leaf++) {
185 cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx);
186
187 xstate_offsets[leaf] = ebx;
188 xstate_sizes[leaf] = eax;
189
190 printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %04x, xstate_sizes[%d]: %04x\n", leaf, ebx, leaf, eax);
191 }
192}
193
194static void __init print_xstate_feature(u64 xstate_mask)
195{
196 const char *feature_name;
197
198 if (cpu_has_xfeatures(xstate_mask, &feature_name))
199 pr_info("x86/fpu: Supporting XSAVE feature 0x%02Lx: '%s'\n", xstate_mask, feature_name);
200}
201
202/*
203 * Print out all the supported xstate features:
204 */
205static void __init print_xstate_features(void)
206{
207 print_xstate_feature(XSTATE_FP);
208 print_xstate_feature(XSTATE_SSE);
209 print_xstate_feature(XSTATE_YMM);
210 print_xstate_feature(XSTATE_BNDREGS);
211 print_xstate_feature(XSTATE_BNDCSR);
212 print_xstate_feature(XSTATE_OPMASK);
213 print_xstate_feature(XSTATE_ZMM_Hi256);
214 print_xstate_feature(XSTATE_Hi16_ZMM);
215}
216
217/*
218 * This function sets up offsets and sizes of all extended states in
219 * xsave area. This supports both standard format and compacted format
220 * of the xsave aread.
221 */
222static void __init setup_xstate_comp(void)
223{
224 unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8];
225 int i;
226
227 /*
228 * The FP xstates and SSE xstates are legacy states. They are always
229 * in the fixed offsets in the xsave area in either compacted form
230 * or standard form.
231 */
232 xstate_comp_offsets[0] = 0;
233 xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
234
235 if (!cpu_has_xsaves) {
236 for (i = 2; i < xfeatures_nr; i++) {
237 if (test_bit(i, (unsigned long *)&xfeatures_mask)) {
238 xstate_comp_offsets[i] = xstate_offsets[i];
239 xstate_comp_sizes[i] = xstate_sizes[i];
240 }
241 }
242 return;
243 }
244
245 xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
246
247 for (i = 2; i < xfeatures_nr; i++) {
248 if (test_bit(i, (unsigned long *)&xfeatures_mask))
249 xstate_comp_sizes[i] = xstate_sizes[i];
250 else
251 xstate_comp_sizes[i] = 0;
252
253 if (i > 2)
254 xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
255 + xstate_comp_sizes[i-1];
256
257 }
258}
259
260/*
261 * setup the xstate image representing the init state
262 */
263static void __init setup_init_fpu_buf(void)
264{
265 static int on_boot_cpu = 1;
266
267 WARN_ON_FPU(!on_boot_cpu);
268 on_boot_cpu = 0;
269
270 if (!cpu_has_xsave)
271 return;
272
273 setup_xstate_features();
274 print_xstate_features();
275
276 if (cpu_has_xsaves) {
277 init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
278 init_fpstate.xsave.header.xfeatures = xfeatures_mask;
279 }
280
281 /*
282 * Init all the features state with header_bv being 0x0
283 */
284 copy_kernel_to_xregs_booting(&init_fpstate.xsave);
285
286 /*
287 * Dump the init state again. This is to identify the init state
288 * of any feature which is not represented by all zero's.
289 */
290 copy_xregs_to_kernel_booting(&init_fpstate.xsave);
291}
292
293/*
294 * Calculate total size of enabled xstates in XCR0/xfeatures_mask.
295 */
296static void __init init_xstate_size(void)
297{
298 unsigned int eax, ebx, ecx, edx;
299 int i;
300
301 if (!cpu_has_xsaves) {
302 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
303 xstate_size = ebx;
304 return;
305 }
306
307 xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
308 for (i = 2; i < 64; i++) {
309 if (test_bit(i, (unsigned long *)&xfeatures_mask)) {
310 cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
311 xstate_size += eax;
312 }
313 }
314}
315
316/*
317 * Enable and initialize the xsave feature.
318 * Called once per system bootup.
319 */
320void __init fpu__init_system_xstate(void)
321{
322 unsigned int eax, ebx, ecx, edx;
323 static int on_boot_cpu = 1;
324
325 WARN_ON_FPU(!on_boot_cpu);
326 on_boot_cpu = 0;
327
328 if (!cpu_has_xsave) {
329 pr_info("x86/fpu: Legacy x87 FPU detected.\n");
330 return;
331 }
332
333 if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
334 WARN_ON_FPU(1);
335 return;
336 }
337
338 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
339 xfeatures_mask = eax + ((u64)edx << 32);
340
341 if ((xfeatures_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
342 pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask);
343 BUG();
344 }
345
346 /* Support only the state known to the OS: */
347 xfeatures_mask = xfeatures_mask & XCNTXT_MASK;
348
349 /* Enable xstate instructions to be able to continue with initialization: */
350 fpu__init_cpu_xstate();
351
352 /* Recompute the context size for enabled features: */
353 init_xstate_size();
354
355 update_regset_xstate_info(xstate_size, xfeatures_mask);
356 fpu__init_prepare_fx_sw_frame();
357 setup_init_fpu_buf();
358 setup_xstate_comp();
359
360 pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n",
361 xfeatures_mask,
362 xstate_size,
363 cpu_has_xsaves ? "compacted" : "standard");
364}
365
366/*
367 * Restore minimal FPU state after suspend:
368 */
369void fpu__resume_cpu(void)
370{
371 /*
372 * Restore XCR0 on xsave capable CPUs:
373 */
374 if (cpu_has_xsave)
375 xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
376}
377
378/*
379 * Given the xsave area and a state inside, this function returns the
380 * address of the state.
381 *
382 * This is the API that is called to get xstate address in either
383 * standard format or compacted format of xsave area.
384 *
385 * Note that if there is no data for the field in the xsave buffer
386 * this will return NULL.
387 *
388 * Inputs:
389 * xstate: the thread's storage area for all FPU data
390 * xstate_feature: state which is defined in xsave.h (e.g.
391 * XSTATE_FP, XSTATE_SSE, etc...)
392 * Output:
393 * address of the state in the xsave area, or NULL if the
394 * field is not present in the xsave buffer.
395 */
396void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
397{
398 int feature_nr = fls64(xstate_feature) - 1;
399 /*
400 * Do we even *have* xsave state?
401 */
402 if (!boot_cpu_has(X86_FEATURE_XSAVE))
403 return NULL;
404
405 xsave = &current->thread.fpu.state.xsave;
406 /*
407 * We should not ever be requesting features that we
408 * have not enabled. Remember that pcntxt_mask is
409 * what we write to the XCR0 register.
410 */
411 WARN_ONCE(!(xfeatures_mask & xstate_feature),
412 "get of unsupported state");
413 /*
414 * This assumes the last 'xsave*' instruction to
415 * have requested that 'xstate_feature' be saved.
416 * If it did not, we might be seeing and old value
417 * of the field in the buffer.
418 *
419 * This can happen because the last 'xsave' did not
420 * request that this feature be saved (unlikely)
421 * or because the "init optimization" caused it
422 * to not be saved.
423 */
424 if (!(xsave->header.xfeatures & xstate_feature))
425 return NULL;
426
427 return (void *)xsave + xstate_comp_offsets[feature_nr];
428}
429EXPORT_SYMBOL_GPL(get_xsave_addr);
430
431/*
432 * This wraps up the common operations that need to occur when retrieving
433 * data from xsave state. It first ensures that the current task was
434 * using the FPU and retrieves the data in to a buffer. It then calculates
435 * the offset of the requested field in the buffer.
436 *
437 * This function is safe to call whether the FPU is in use or not.
438 *
439 * Note that this only works on the current task.
440 *
441 * Inputs:
442 * @xsave_state: state which is defined in xsave.h (e.g. XSTATE_FP,
443 * XSTATE_SSE, etc...)
444 * Output:
445 * address of the state in the xsave area or NULL if the state
446 * is not present or is in its 'init state'.
447 */
448const void *get_xsave_field_ptr(int xsave_state)
449{
450 struct fpu *fpu = &current->thread.fpu;
451
452 if (!fpu->fpstate_active)
453 return NULL;
454 /*
455 * fpu__save() takes the CPU's xstate registers
456 * and saves them off to the 'fpu memory buffer.
457 */
458 fpu__save(fpu);
459
460 return get_xsave_addr(&fpu->state.xsave, xsave_state);
461}
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
deleted file mode 100644
index 6185d3141219..000000000000
--- a/arch/x86/kernel/i387.c
+++ /dev/null
@@ -1,671 +0,0 @@
1/*
2 * Copyright (C) 1994 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * General FPU state handling cleanups
6 * Gareth Hughes <gareth@valinux.com>, May 2000
7 */
8#include <linux/module.h>
9#include <linux/regset.h>
10#include <linux/sched.h>
11#include <linux/slab.h>
12
13#include <asm/sigcontext.h>
14#include <asm/processor.h>
15#include <asm/math_emu.h>
16#include <asm/tlbflush.h>
17#include <asm/uaccess.h>
18#include <asm/ptrace.h>
19#include <asm/i387.h>
20#include <asm/fpu-internal.h>
21#include <asm/user.h>
22
23static DEFINE_PER_CPU(bool, in_kernel_fpu);
24
25void kernel_fpu_disable(void)
26{
27 WARN_ON(this_cpu_read(in_kernel_fpu));
28 this_cpu_write(in_kernel_fpu, true);
29}
30
31void kernel_fpu_enable(void)
32{
33 this_cpu_write(in_kernel_fpu, false);
34}
35
36/*
37 * Were we in an interrupt that interrupted kernel mode?
38 *
39 * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
40 * pair does nothing at all: the thread must not have fpu (so
41 * that we don't try to save the FPU state), and TS must
42 * be set (so that the clts/stts pair does nothing that is
43 * visible in the interrupted kernel thread).
44 *
45 * Except for the eagerfpu case when we return true; in the likely case
46 * the thread has FPU but we are not going to set/clear TS.
47 */
48static inline bool interrupted_kernel_fpu_idle(void)
49{
50 if (this_cpu_read(in_kernel_fpu))
51 return false;
52
53 if (use_eager_fpu())
54 return true;
55
56 return !__thread_has_fpu(current) &&
57 (read_cr0() & X86_CR0_TS);
58}
59
60/*
61 * Were we in user mode (or vm86 mode) when we were
62 * interrupted?
63 *
64 * Doing kernel_fpu_begin/end() is ok if we are running
65 * in an interrupt context from user mode - we'll just
66 * save the FPU state as required.
67 */
68static inline bool interrupted_user_mode(void)
69{
70 struct pt_regs *regs = get_irq_regs();
71 return regs && user_mode(regs);
72}
73
74/*
75 * Can we use the FPU in kernel mode with the
76 * whole "kernel_fpu_begin/end()" sequence?
77 *
78 * It's always ok in process context (ie "not interrupt")
79 * but it is sometimes ok even from an irq.
80 */
81bool irq_fpu_usable(void)
82{
83 return !in_interrupt() ||
84 interrupted_user_mode() ||
85 interrupted_kernel_fpu_idle();
86}
87EXPORT_SYMBOL(irq_fpu_usable);
88
89void __kernel_fpu_begin(void)
90{
91 struct task_struct *me = current;
92
93 this_cpu_write(in_kernel_fpu, true);
94
95 if (__thread_has_fpu(me)) {
96 __save_init_fpu(me);
97 } else {
98 this_cpu_write(fpu_owner_task, NULL);
99 if (!use_eager_fpu())
100 clts();
101 }
102}
103EXPORT_SYMBOL(__kernel_fpu_begin);
104
105void __kernel_fpu_end(void)
106{
107 struct task_struct *me = current;
108
109 if (__thread_has_fpu(me)) {
110 if (WARN_ON(restore_fpu_checking(me)))
111 fpu_reset_state(me);
112 } else if (!use_eager_fpu()) {
113 stts();
114 }
115
116 this_cpu_write(in_kernel_fpu, false);
117}
118EXPORT_SYMBOL(__kernel_fpu_end);
119
120void unlazy_fpu(struct task_struct *tsk)
121{
122 preempt_disable();
123 if (__thread_has_fpu(tsk)) {
124 if (use_eager_fpu()) {
125 __save_fpu(tsk);
126 } else {
127 __save_init_fpu(tsk);
128 __thread_fpu_end(tsk);
129 }
130 }
131 preempt_enable();
132}
133EXPORT_SYMBOL(unlazy_fpu);
134
135unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
136unsigned int xstate_size;
137EXPORT_SYMBOL_GPL(xstate_size);
138static struct i387_fxsave_struct fx_scratch;
139
140static void mxcsr_feature_mask_init(void)
141{
142 unsigned long mask = 0;
143
144 if (cpu_has_fxsr) {
145 memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
146 asm volatile("fxsave %0" : "+m" (fx_scratch));
147 mask = fx_scratch.mxcsr_mask;
148 if (mask == 0)
149 mask = 0x0000ffbf;
150 }
151 mxcsr_feature_mask &= mask;
152}
153
154static void init_thread_xstate(void)
155{
156 /*
157 * Note that xstate_size might be overwriten later during
158 * xsave_init().
159 */
160
161 if (!cpu_has_fpu) {
162 /*
163 * Disable xsave as we do not support it if i387
164 * emulation is enabled.
165 */
166 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
167 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
168 xstate_size = sizeof(struct i387_soft_struct);
169 return;
170 }
171
172 if (cpu_has_fxsr)
173 xstate_size = sizeof(struct i387_fxsave_struct);
174 else
175 xstate_size = sizeof(struct i387_fsave_struct);
176
177 /*
178 * Quirk: we don't yet handle the XSAVES* instructions
179 * correctly, as we don't correctly convert between
180 * standard and compacted format when interfacing
181 * with user-space - so disable it for now.
182 *
183 * The difference is small: with recent CPUs the
184 * compacted format is only marginally smaller than
185 * the standard FPU state format.
186 *
187 * ( This is easy to backport while we are fixing
188 * XSAVES* support. )
189 */
190 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
191}
192
193/*
194 * Called at bootup to set up the initial FPU state that is later cloned
195 * into all processes.
196 */
197
198void fpu_init(void)
199{
200 unsigned long cr0;
201 unsigned long cr4_mask = 0;
202
203#ifndef CONFIG_MATH_EMULATION
204 if (!cpu_has_fpu) {
205 pr_emerg("No FPU found and no math emulation present\n");
206 pr_emerg("Giving up\n");
207 for (;;)
208 asm volatile("hlt");
209 }
210#endif
211 if (cpu_has_fxsr)
212 cr4_mask |= X86_CR4_OSFXSR;
213 if (cpu_has_xmm)
214 cr4_mask |= X86_CR4_OSXMMEXCPT;
215 if (cr4_mask)
216 cr4_set_bits(cr4_mask);
217
218 cr0 = read_cr0();
219 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
220 if (!cpu_has_fpu)
221 cr0 |= X86_CR0_EM;
222 write_cr0(cr0);
223
224 /*
225 * init_thread_xstate is only called once to avoid overriding
226 * xstate_size during boot time or during CPU hotplug.
227 */
228 if (xstate_size == 0)
229 init_thread_xstate();
230
231 mxcsr_feature_mask_init();
232 xsave_init();
233 eager_fpu_init();
234}
235
236void fpu_finit(struct fpu *fpu)
237{
238 if (!cpu_has_fpu) {
239 finit_soft_fpu(&fpu->state->soft);
240 return;
241 }
242
243 memset(fpu->state, 0, xstate_size);
244
245 if (cpu_has_fxsr) {
246 fx_finit(&fpu->state->fxsave);
247 } else {
248 struct i387_fsave_struct *fp = &fpu->state->fsave;
249 fp->cwd = 0xffff037fu;
250 fp->swd = 0xffff0000u;
251 fp->twd = 0xffffffffu;
252 fp->fos = 0xffff0000u;
253 }
254}
255EXPORT_SYMBOL_GPL(fpu_finit);
256
257/*
258 * The _current_ task is using the FPU for the first time
259 * so initialize it and set the mxcsr to its default
260 * value at reset if we support XMM instructions and then
261 * remember the current task has used the FPU.
262 */
263int init_fpu(struct task_struct *tsk)
264{
265 int ret;
266
267 if (tsk_used_math(tsk)) {
268 if (cpu_has_fpu && tsk == current)
269 unlazy_fpu(tsk);
270 task_disable_lazy_fpu_restore(tsk);
271 return 0;
272 }
273
274 /*
275 * Memory allocation at the first usage of the FPU and other state.
276 */
277 ret = fpu_alloc(&tsk->thread.fpu);
278 if (ret)
279 return ret;
280
281 fpu_finit(&tsk->thread.fpu);
282
283 set_stopped_child_used_math(tsk);
284 return 0;
285}
286EXPORT_SYMBOL_GPL(init_fpu);
287
288/*
289 * The xstateregs_active() routine is the same as the fpregs_active() routine,
290 * as the "regset->n" for the xstate regset will be updated based on the feature
291 * capabilites supported by the xsave.
292 */
293int fpregs_active(struct task_struct *target, const struct user_regset *regset)
294{
295 return tsk_used_math(target) ? regset->n : 0;
296}
297
298int xfpregs_active(struct task_struct *target, const struct user_regset *regset)
299{
300 return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0;
301}
302
303int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
304 unsigned int pos, unsigned int count,
305 void *kbuf, void __user *ubuf)
306{
307 int ret;
308
309 if (!cpu_has_fxsr)
310 return -ENODEV;
311
312 ret = init_fpu(target);
313 if (ret)
314 return ret;
315
316 sanitize_i387_state(target);
317
318 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
319 &target->thread.fpu.state->fxsave, 0, -1);
320}
321
322int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
323 unsigned int pos, unsigned int count,
324 const void *kbuf, const void __user *ubuf)
325{
326 int ret;
327
328 if (!cpu_has_fxsr)
329 return -ENODEV;
330
331 ret = init_fpu(target);
332 if (ret)
333 return ret;
334
335 sanitize_i387_state(target);
336
337 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
338 &target->thread.fpu.state->fxsave, 0, -1);
339
340 /*
341 * mxcsr reserved bits must be masked to zero for security reasons.
342 */
343 target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
344
345 /*
346 * update the header bits in the xsave header, indicating the
347 * presence of FP and SSE state.
348 */
349 if (cpu_has_xsave)
350 target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
351
352 return ret;
353}
354
355int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
356 unsigned int pos, unsigned int count,
357 void *kbuf, void __user *ubuf)
358{
359 struct xsave_struct *xsave;
360 int ret;
361
362 if (!cpu_has_xsave)
363 return -ENODEV;
364
365 ret = init_fpu(target);
366 if (ret)
367 return ret;
368
369 xsave = &target->thread.fpu.state->xsave;
370
371 /*
372 * Copy the 48bytes defined by the software first into the xstate
373 * memory layout in the thread struct, so that we can copy the entire
374 * xstateregs to the user using one user_regset_copyout().
375 */
376 memcpy(&xsave->i387.sw_reserved,
377 xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
378 /*
379 * Copy the xstate memory layout.
380 */
381 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
382 return ret;
383}
384
385int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
386 unsigned int pos, unsigned int count,
387 const void *kbuf, const void __user *ubuf)
388{
389 struct xsave_struct *xsave;
390 int ret;
391
392 if (!cpu_has_xsave)
393 return -ENODEV;
394
395 ret = init_fpu(target);
396 if (ret)
397 return ret;
398
399 xsave = &target->thread.fpu.state->xsave;
400
401 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
402 /*
403 * mxcsr reserved bits must be masked to zero for security reasons.
404 */
405 xsave->i387.mxcsr &= mxcsr_feature_mask;
406 xsave->xsave_hdr.xstate_bv &= pcntxt_mask;
407 /*
408 * These bits must be zero.
409 */
410 memset(&xsave->xsave_hdr.reserved, 0, 48);
411 return ret;
412}
413
414#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
415
416/*
417 * FPU tag word conversions.
418 */
419
420static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
421{
422 unsigned int tmp; /* to avoid 16 bit prefixes in the code */
423
424 /* Transform each pair of bits into 01 (valid) or 00 (empty) */
425 tmp = ~twd;
426 tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
427 /* and move the valid bits to the lower byte. */
428 tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
429 tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
430 tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
431
432 return tmp;
433}
434
435#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16)
436#define FP_EXP_TAG_VALID 0
437#define FP_EXP_TAG_ZERO 1
438#define FP_EXP_TAG_SPECIAL 2
439#define FP_EXP_TAG_EMPTY 3
440
441static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
442{
443 struct _fpxreg *st;
444 u32 tos = (fxsave->swd >> 11) & 7;
445 u32 twd = (unsigned long) fxsave->twd;
446 u32 tag;
447 u32 ret = 0xffff0000u;
448 int i;
449
450 for (i = 0; i < 8; i++, twd >>= 1) {
451 if (twd & 0x1) {
452 st = FPREG_ADDR(fxsave, (i - tos) & 7);
453
454 switch (st->exponent & 0x7fff) {
455 case 0x7fff:
456 tag = FP_EXP_TAG_SPECIAL;
457 break;
458 case 0x0000:
459 if (!st->significand[0] &&
460 !st->significand[1] &&
461 !st->significand[2] &&
462 !st->significand[3])
463 tag = FP_EXP_TAG_ZERO;
464 else
465 tag = FP_EXP_TAG_SPECIAL;
466 break;
467 default:
468 if (st->significand[3] & 0x8000)
469 tag = FP_EXP_TAG_VALID;
470 else
471 tag = FP_EXP_TAG_SPECIAL;
472 break;
473 }
474 } else {
475 tag = FP_EXP_TAG_EMPTY;
476 }
477 ret |= tag << (2 * i);
478 }
479 return ret;
480}
481
482/*
483 * FXSR floating point environment conversions.
484 */
485
486void
487convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
488{
489 struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
490 struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
491 struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
492 int i;
493
494 env->cwd = fxsave->cwd | 0xffff0000u;
495 env->swd = fxsave->swd | 0xffff0000u;
496 env->twd = twd_fxsr_to_i387(fxsave);
497
498#ifdef CONFIG_X86_64
499 env->fip = fxsave->rip;
500 env->foo = fxsave->rdp;
501 /*
502 * should be actually ds/cs at fpu exception time, but
503 * that information is not available in 64bit mode.
504 */
505 env->fcs = task_pt_regs(tsk)->cs;
506 if (tsk == current) {
507 savesegment(ds, env->fos);
508 } else {
509 env->fos = tsk->thread.ds;
510 }
511 env->fos |= 0xffff0000;
512#else
513 env->fip = fxsave->fip;
514 env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16);
515 env->foo = fxsave->foo;
516 env->fos = fxsave->fos;
517#endif
518
519 for (i = 0; i < 8; ++i)
520 memcpy(&to[i], &from[i], sizeof(to[0]));
521}
522
523void convert_to_fxsr(struct task_struct *tsk,
524 const struct user_i387_ia32_struct *env)
525
526{
527 struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave;
528 struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
529 struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
530 int i;
531
532 fxsave->cwd = env->cwd;
533 fxsave->swd = env->swd;
534 fxsave->twd = twd_i387_to_fxsr(env->twd);
535 fxsave->fop = (u16) ((u32) env->fcs >> 16);
536#ifdef CONFIG_X86_64
537 fxsave->rip = env->fip;
538 fxsave->rdp = env->foo;
539 /* cs and ds ignored */
540#else
541 fxsave->fip = env->fip;
542 fxsave->fcs = (env->fcs & 0xffff);
543 fxsave->foo = env->foo;
544 fxsave->fos = env->fos;
545#endif
546
547 for (i = 0; i < 8; ++i)
548 memcpy(&to[i], &from[i], sizeof(from[0]));
549}
550
551int fpregs_get(struct task_struct *target, const struct user_regset *regset,
552 unsigned int pos, unsigned int count,
553 void *kbuf, void __user *ubuf)
554{
555 struct user_i387_ia32_struct env;
556 int ret;
557
558 ret = init_fpu(target);
559 if (ret)
560 return ret;
561
562 if (!static_cpu_has(X86_FEATURE_FPU))
563 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
564
565 if (!cpu_has_fxsr)
566 return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
567 &target->thread.fpu.state->fsave, 0,
568 -1);
569
570 sanitize_i387_state(target);
571
572 if (kbuf && pos == 0 && count == sizeof(env)) {
573 convert_from_fxsr(kbuf, target);
574 return 0;
575 }
576
577 convert_from_fxsr(&env, target);
578
579 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
580}
581
582int fpregs_set(struct task_struct *target, const struct user_regset *regset,
583 unsigned int pos, unsigned int count,
584 const void *kbuf, const void __user *ubuf)
585{
586 struct user_i387_ia32_struct env;
587 int ret;
588
589 ret = init_fpu(target);
590 if (ret)
591 return ret;
592
593 sanitize_i387_state(target);
594
595 if (!static_cpu_has(X86_FEATURE_FPU))
596 return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
597
598 if (!cpu_has_fxsr)
599 return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
600 &target->thread.fpu.state->fsave, 0,
601 -1);
602
603 if (pos > 0 || count < sizeof(env))
604 convert_from_fxsr(&env, target);
605
606 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
607 if (!ret)
608 convert_to_fxsr(target, &env);
609
610 /*
611 * update the header bit in the xsave header, indicating the
612 * presence of FP.
613 */
614 if (cpu_has_xsave)
615 target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
616 return ret;
617}
618
619/*
620 * FPU state for core dumps.
621 * This is only used for a.out dumps now.
622 * It is declared generically using elf_fpregset_t (which is
623 * struct user_i387_struct) but is in fact only used for 32-bit
624 * dumps, so on 64-bit it is really struct user_i387_ia32_struct.
625 */
626int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
627{
628 struct task_struct *tsk = current;
629 int fpvalid;
630
631 fpvalid = !!used_math();
632 if (fpvalid)
633 fpvalid = !fpregs_get(tsk, NULL,
634 0, sizeof(struct user_i387_ia32_struct),
635 fpu, NULL);
636
637 return fpvalid;
638}
639EXPORT_SYMBOL(dump_fpu);
640
641#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
642
643static int __init no_387(char *s)
644{
645 setup_clear_cpu_cap(X86_FEATURE_FPU);
646 return 1;
647}
648
649__setup("no387", no_387);
650
651void fpu_detect(struct cpuinfo_x86 *c)
652{
653 unsigned long cr0;
654 u16 fsw, fcw;
655
656 fsw = fcw = 0xffff;
657
658 cr0 = read_cr0();
659 cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
660 write_cr0(cr0);
661
662 asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
663 : "+m" (fsw), "+m" (fcw));
664
665 if (fsw == 0 && (fcw & 0x103f) == 0x003f)
666 set_cpu_cap(c, X86_FEATURE_FPU);
667 else
668 clear_cpu_cap(c, X86_FEATURE_FPU);
669
670 /* The final cr0 value is set in fpu_init() */
671}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c648139d68d7..9cad694ed7c4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -25,8 +25,7 @@
25#include <asm/idle.h> 25#include <asm/idle.h>
26#include <asm/uaccess.h> 26#include <asm/uaccess.h>
27#include <asm/mwait.h> 27#include <asm/mwait.h>
28#include <asm/i387.h> 28#include <asm/fpu/internal.h>
29#include <asm/fpu-internal.h>
30#include <asm/debugreg.h> 29#include <asm/debugreg.h>
31#include <asm/nmi.h> 30#include <asm/nmi.h>
32#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
@@ -76,9 +75,6 @@ void idle_notifier_unregister(struct notifier_block *n)
76EXPORT_SYMBOL_GPL(idle_notifier_unregister); 75EXPORT_SYMBOL_GPL(idle_notifier_unregister);
77#endif 76#endif
78 77
79struct kmem_cache *task_xstate_cachep;
80EXPORT_SYMBOL_GPL(task_xstate_cachep);
81
82/* 78/*
83 * this gets called so that we can store lazy state into memory and copy the 79 * this gets called so that we can store lazy state into memory and copy the
84 * current task into the new thread. 80 * current task into the new thread.
@@ -87,36 +83,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
87{ 83{
88 *dst = *src; 84 *dst = *src;
89 85
90 dst->thread.fpu_counter = 0; 86 return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
91 dst->thread.fpu.has_fpu = 0;
92 dst->thread.fpu.state = NULL;
93 task_disable_lazy_fpu_restore(dst);
94 if (tsk_used_math(src)) {
95 int err = fpu_alloc(&dst->thread.fpu);
96 if (err)
97 return err;
98 fpu_copy(dst, src);
99 }
100 return 0;
101}
102
103void free_thread_xstate(struct task_struct *tsk)
104{
105 fpu_free(&tsk->thread.fpu);
106}
107
108void arch_release_task_struct(struct task_struct *tsk)
109{
110 free_thread_xstate(tsk);
111}
112
113void arch_task_cache_init(void)
114{
115 task_xstate_cachep =
116 kmem_cache_create("task_xstate", xstate_size,
117 __alignof__(union thread_xstate),
118 SLAB_PANIC | SLAB_NOTRACK, NULL);
119 setup_xstate_comp();
120} 87}
121 88
122/* 89/*
@@ -127,6 +94,7 @@ void exit_thread(void)
127 struct task_struct *me = current; 94 struct task_struct *me = current;
128 struct thread_struct *t = &me->thread; 95 struct thread_struct *t = &me->thread;
129 unsigned long *bp = t->io_bitmap_ptr; 96 unsigned long *bp = t->io_bitmap_ptr;
97 struct fpu *fpu = &t->fpu;
130 98
131 if (bp) { 99 if (bp) {
132 struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); 100 struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
@@ -142,7 +110,7 @@ void exit_thread(void)
142 kfree(bp); 110 kfree(bp);
143 } 111 }
144 112
145 drop_fpu(me); 113 fpu__drop(fpu);
146} 114}
147 115
148void flush_thread(void) 116void flush_thread(void)
@@ -152,19 +120,7 @@ void flush_thread(void)
152 flush_ptrace_hw_breakpoint(tsk); 120 flush_ptrace_hw_breakpoint(tsk);
153 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); 121 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
154 122
155 if (!use_eager_fpu()) { 123 fpu__clear(&tsk->thread.fpu);
156 /* FPU state will be reallocated lazily at the first use. */
157 drop_fpu(tsk);
158 free_thread_xstate(tsk);
159 } else {
160 if (!tsk_used_math(tsk)) {
161 /* kthread execs. TODO: cleanup this horror. */
162 if (WARN_ON(init_fpu(tsk)))
163 force_sig(SIGKILL, tsk);
164 user_fpu_begin();
165 }
166 restore_init_xstate();
167 }
168} 124}
169 125
170static void hard_disable_TSC(void) 126static void hard_disable_TSC(void)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8ed2106b06da..deff651835b4 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -39,8 +39,7 @@
39#include <asm/pgtable.h> 39#include <asm/pgtable.h>
40#include <asm/ldt.h> 40#include <asm/ldt.h>
41#include <asm/processor.h> 41#include <asm/processor.h>
42#include <asm/i387.h> 42#include <asm/fpu/internal.h>
43#include <asm/fpu-internal.h>
44#include <asm/desc.h> 43#include <asm/desc.h>
45#ifdef CONFIG_MATH_EMULATION 44#ifdef CONFIG_MATH_EMULATION
46#include <asm/math_emu.h> 45#include <asm/math_emu.h>
@@ -242,14 +241,16 @@ __visible __notrace_funcgraph struct task_struct *
242__switch_to(struct task_struct *prev_p, struct task_struct *next_p) 241__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
243{ 242{
244 struct thread_struct *prev = &prev_p->thread, 243 struct thread_struct *prev = &prev_p->thread,
245 *next = &next_p->thread; 244 *next = &next_p->thread;
245 struct fpu *prev_fpu = &prev->fpu;
246 struct fpu *next_fpu = &next->fpu;
246 int cpu = smp_processor_id(); 247 int cpu = smp_processor_id();
247 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 248 struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
248 fpu_switch_t fpu; 249 fpu_switch_t fpu_switch;
249 250
250 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 251 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
251 252
252 fpu = switch_fpu_prepare(prev_p, next_p, cpu); 253 fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
253 254
254 /* 255 /*
255 * Save away %gs. No need to save %fs, as it was saved on the 256 * Save away %gs. No need to save %fs, as it was saved on the
@@ -296,7 +297,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
296 * Leave lazy mode, flushing any hypercalls made here. 297 * Leave lazy mode, flushing any hypercalls made here.
297 * This must be done before restoring TLS segments so 298 * This must be done before restoring TLS segments so
298 * the GDT and LDT are properly updated, and must be 299 * the GDT and LDT are properly updated, and must be
299 * done before math_state_restore, so the TS bit is up 300 * done before fpu__restore(), so the TS bit is up
300 * to date. 301 * to date.
301 */ 302 */
302 arch_end_context_switch(next_p); 303 arch_end_context_switch(next_p);
@@ -319,7 +320,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
319 if (prev->gs | next->gs) 320 if (prev->gs | next->gs)
320 lazy_load_gs(next->gs); 321 lazy_load_gs(next->gs);
321 322
322 switch_fpu_finish(next_p, fpu); 323 switch_fpu_finish(next_fpu, fpu_switch);
323 324
324 this_cpu_write(current_task, next_p); 325 this_cpu_write(current_task, next_p);
325 326
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ddfdbf74f174..c50e013b57d2 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -38,8 +38,7 @@
38 38
39#include <asm/pgtable.h> 39#include <asm/pgtable.h>
40#include <asm/processor.h> 40#include <asm/processor.h>
41#include <asm/i387.h> 41#include <asm/fpu/internal.h>
42#include <asm/fpu-internal.h>
43#include <asm/mmu_context.h> 42#include <asm/mmu_context.h>
44#include <asm/prctl.h> 43#include <asm/prctl.h>
45#include <asm/desc.h> 44#include <asm/desc.h>
@@ -274,12 +273,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
274{ 273{
275 struct thread_struct *prev = &prev_p->thread; 274 struct thread_struct *prev = &prev_p->thread;
276 struct thread_struct *next = &next_p->thread; 275 struct thread_struct *next = &next_p->thread;
276 struct fpu *prev_fpu = &prev->fpu;
277 struct fpu *next_fpu = &next->fpu;
277 int cpu = smp_processor_id(); 278 int cpu = smp_processor_id();
278 struct tss_struct *tss = &per_cpu(cpu_tss, cpu); 279 struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
279 unsigned fsindex, gsindex; 280 unsigned fsindex, gsindex;
280 fpu_switch_t fpu; 281 fpu_switch_t fpu_switch;
281 282
282 fpu = switch_fpu_prepare(prev_p, next_p, cpu); 283 fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
283 284
284 /* We must save %fs and %gs before load_TLS() because 285 /* We must save %fs and %gs before load_TLS() because
285 * %fs and %gs may be cleared by load_TLS(). 286 * %fs and %gs may be cleared by load_TLS().
@@ -299,7 +300,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
299 * Leave lazy mode, flushing any hypercalls made here. This 300 * Leave lazy mode, flushing any hypercalls made here. This
300 * must be done after loading TLS entries in the GDT but before 301 * must be done after loading TLS entries in the GDT but before
301 * loading segments that might reference them, and and it must 302 * loading segments that might reference them, and and it must
302 * be done before math_state_restore, so the TS bit is up to 303 * be done before fpu__restore(), so the TS bit is up to
303 * date. 304 * date.
304 */ 305 */
305 arch_end_context_switch(next_p); 306 arch_end_context_switch(next_p);
@@ -391,7 +392,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
391 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 392 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
392 prev->gsindex = gsindex; 393 prev->gsindex = gsindex;
393 394
394 switch_fpu_finish(next_p, fpu); 395 switch_fpu_finish(next_fpu, fpu_switch);
395 396
396 /* 397 /*
397 * Switch the PDA and FPU contexts. 398 * Switch the PDA and FPU contexts.
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index a7bc79480719..9be72bc3613f 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -11,7 +11,6 @@
11#include <linux/errno.h> 11#include <linux/errno.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/ptrace.h> 13#include <linux/ptrace.h>
14#include <linux/regset.h>
15#include <linux/tracehook.h> 14#include <linux/tracehook.h>
16#include <linux/user.h> 15#include <linux/user.h>
17#include <linux/elf.h> 16#include <linux/elf.h>
@@ -28,8 +27,9 @@
28#include <asm/uaccess.h> 27#include <asm/uaccess.h>
29#include <asm/pgtable.h> 28#include <asm/pgtable.h>
30#include <asm/processor.h> 29#include <asm/processor.h>
31#include <asm/i387.h> 30#include <asm/fpu/internal.h>
32#include <asm/fpu-internal.h> 31#include <asm/fpu/signal.h>
32#include <asm/fpu/regset.h>
33#include <asm/debugreg.h> 33#include <asm/debugreg.h>
34#include <asm/ldt.h> 34#include <asm/ldt.h>
35#include <asm/desc.h> 35#include <asm/desc.h>
@@ -1297,7 +1297,7 @@ static struct user_regset x86_64_regsets[] __read_mostly = {
1297 .core_note_type = NT_PRFPREG, 1297 .core_note_type = NT_PRFPREG,
1298 .n = sizeof(struct user_i387_struct) / sizeof(long), 1298 .n = sizeof(struct user_i387_struct) / sizeof(long),
1299 .size = sizeof(long), .align = sizeof(long), 1299 .size = sizeof(long), .align = sizeof(long),
1300 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set 1300 .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set
1301 }, 1301 },
1302 [REGSET_XSTATE] = { 1302 [REGSET_XSTATE] = {
1303 .core_note_type = NT_X86_XSTATE, 1303 .core_note_type = NT_X86_XSTATE,
@@ -1338,13 +1338,13 @@ static struct user_regset x86_32_regsets[] __read_mostly = {
1338 .core_note_type = NT_PRFPREG, 1338 .core_note_type = NT_PRFPREG,
1339 .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), 1339 .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32),
1340 .size = sizeof(u32), .align = sizeof(u32), 1340 .size = sizeof(u32), .align = sizeof(u32),
1341 .active = fpregs_active, .get = fpregs_get, .set = fpregs_set 1341 .active = regset_fpregs_active, .get = fpregs_get, .set = fpregs_set
1342 }, 1342 },
1343 [REGSET_XFP] = { 1343 [REGSET_XFP] = {
1344 .core_note_type = NT_PRXFPREG, 1344 .core_note_type = NT_PRXFPREG,
1345 .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), 1345 .n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
1346 .size = sizeof(u32), .align = sizeof(u32), 1346 .size = sizeof(u32), .align = sizeof(u32),
1347 .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set 1347 .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set
1348 }, 1348 },
1349 [REGSET_XSTATE] = { 1349 [REGSET_XSTATE] = {
1350 .core_note_type = NT_X86_XSTATE, 1350 .core_note_type = NT_X86_XSTATE,
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 1ea14fd53933..206996c1669d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -26,8 +26,8 @@
26 26
27#include <asm/processor.h> 27#include <asm/processor.h>
28#include <asm/ucontext.h> 28#include <asm/ucontext.h>
29#include <asm/i387.h> 29#include <asm/fpu/internal.h>
30#include <asm/fpu-internal.h> 30#include <asm/fpu/signal.h>
31#include <asm/vdso.h> 31#include <asm/vdso.h>
32#include <asm/mce.h> 32#include <asm/mce.h>
33#include <asm/sighandling.h> 33#include <asm/sighandling.h>
@@ -103,7 +103,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
103 get_user_ex(buf, &sc->fpstate); 103 get_user_ex(buf, &sc->fpstate);
104 } get_user_catch(err); 104 } get_user_catch(err);
105 105
106 err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); 106 err |= fpu__restore_sig(buf, config_enabled(CONFIG_X86_32));
107 107
108 force_iret(); 108 force_iret();
109 109
@@ -199,6 +199,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
199 unsigned long sp = regs->sp; 199 unsigned long sp = regs->sp;
200 unsigned long buf_fx = 0; 200 unsigned long buf_fx = 0;
201 int onsigstack = on_sig_stack(sp); 201 int onsigstack = on_sig_stack(sp);
202 struct fpu *fpu = &current->thread.fpu;
202 203
203 /* redzone */ 204 /* redzone */
204 if (config_enabled(CONFIG_X86_64)) 205 if (config_enabled(CONFIG_X86_64))
@@ -218,9 +219,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
218 } 219 }
219 } 220 }
220 221
221 if (used_math()) { 222 if (fpu->fpstate_active) {
222 sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32), 223 sp = fpu__alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
223 &buf_fx, &math_size); 224 &buf_fx, &math_size);
224 *fpstate = (void __user *)sp; 225 *fpstate = (void __user *)sp;
225 } 226 }
226 227
@@ -234,8 +235,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
234 return (void __user *)-1L; 235 return (void __user *)-1L;
235 236
236 /* save i387 and extended state */ 237 /* save i387 and extended state */
237 if (used_math() && 238 if (fpu->fpstate_active &&
238 save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0) 239 copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
239 return (void __user *)-1L; 240 return (void __user *)-1L;
240 241
241 return (void __user *)sp; 242 return (void __user *)sp;
@@ -593,6 +594,22 @@ badframe:
593 return 0; 594 return 0;
594} 595}
595 596
597static inline int is_ia32_compat_frame(void)
598{
599 return config_enabled(CONFIG_IA32_EMULATION) &&
600 test_thread_flag(TIF_IA32);
601}
602
603static inline int is_ia32_frame(void)
604{
605 return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
606}
607
608static inline int is_x32_frame(void)
609{
610 return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
611}
612
596static int 613static int
597setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) 614setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
598{ 615{
@@ -617,6 +634,7 @@ static void
617handle_signal(struct ksignal *ksig, struct pt_regs *regs) 634handle_signal(struct ksignal *ksig, struct pt_regs *regs)
618{ 635{
619 bool stepping, failed; 636 bool stepping, failed;
637 struct fpu *fpu = &current->thread.fpu;
620 638
621 /* Are we from a system call? */ 639 /* Are we from a system call? */
622 if (syscall_get_nr(current, regs) >= 0) { 640 if (syscall_get_nr(current, regs) >= 0) {
@@ -665,8 +683,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
665 /* 683 /*
666 * Ensure the signal handler starts with the new fpu state. 684 * Ensure the signal handler starts with the new fpu state.
667 */ 685 */
668 if (used_math()) 686 if (fpu->fpstate_active)
669 fpu_reset_state(current); 687 fpu__clear(fpu);
670 } 688 }
671 signal_setup_done(failed, ksig, stepping); 689 signal_setup_done(failed, ksig, stepping);
672} 690}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0e8209619455..6d4bfea25874 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -68,8 +68,7 @@
68#include <asm/mwait.h> 68#include <asm/mwait.h>
69#include <asm/apic.h> 69#include <asm/apic.h>
70#include <asm/io_apic.h> 70#include <asm/io_apic.h>
71#include <asm/i387.h> 71#include <asm/fpu/internal.h>
72#include <asm/fpu-internal.h>
73#include <asm/setup.h> 72#include <asm/setup.h>
74#include <asm/uv/uv.h> 73#include <asm/uv/uv.h>
75#include <linux/mc146818rtc.h> 74#include <linux/mc146818rtc.h>
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 324ab5247687..36cb15b7b367 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -54,12 +54,13 @@
54#include <asm/ftrace.h> 54#include <asm/ftrace.h>
55#include <asm/traps.h> 55#include <asm/traps.h>
56#include <asm/desc.h> 56#include <asm/desc.h>
57#include <asm/i387.h> 57#include <asm/fpu/internal.h>
58#include <asm/fpu-internal.h>
59#include <asm/mce.h> 58#include <asm/mce.h>
60#include <asm/fixmap.h> 59#include <asm/fixmap.h>
61#include <asm/mach_traps.h> 60#include <asm/mach_traps.h>
62#include <asm/alternative.h> 61#include <asm/alternative.h>
62#include <asm/fpu/xstate.h>
63#include <asm/trace/mpx.h>
63#include <asm/mpx.h> 64#include <asm/mpx.h>
64 65
65#ifdef CONFIG_X86_64 66#ifdef CONFIG_X86_64
@@ -371,10 +372,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
371 372
372dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) 373dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
373{ 374{
374 struct task_struct *tsk = current;
375 struct xsave_struct *xsave_buf;
376 enum ctx_state prev_state; 375 enum ctx_state prev_state;
377 struct bndcsr *bndcsr; 376 const struct bndcsr *bndcsr;
378 siginfo_t *info; 377 siginfo_t *info;
379 378
380 prev_state = exception_enter(); 379 prev_state = exception_enter();
@@ -393,15 +392,15 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
393 392
394 /* 393 /*
395 * We need to look at BNDSTATUS to resolve this exception. 394 * We need to look at BNDSTATUS to resolve this exception.
396 * It is not directly accessible, though, so we need to 395 * A NULL here might mean that it is in its 'init state',
397 * do an xsave and then pull it out of the xsave buffer. 396 * which is all zeros which indicates MPX was not
397 * responsible for the exception.
398 */ 398 */
399 fpu_save_init(&tsk->thread.fpu); 399 bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
400 xsave_buf = &(tsk->thread.fpu.state->xsave);
401 bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR);
402 if (!bndcsr) 400 if (!bndcsr)
403 goto exit_trap; 401 goto exit_trap;
404 402
403 trace_bounds_exception_mpx(bndcsr);
405 /* 404 /*
406 * The error code field of the BNDSTATUS register communicates status 405 * The error code field of the BNDSTATUS register communicates status
407 * information of a bound range exception #BR or operation involving 406 * information of a bound range exception #BR or operation involving
@@ -409,11 +408,11 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
409 */ 408 */
410 switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) { 409 switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) {
411 case 2: /* Bound directory has invalid entry. */ 410 case 2: /* Bound directory has invalid entry. */
412 if (mpx_handle_bd_fault(xsave_buf)) 411 if (mpx_handle_bd_fault())
413 goto exit_trap; 412 goto exit_trap;
414 break; /* Success, it was handled */ 413 break; /* Success, it was handled */
415 case 1: /* Bound violation. */ 414 case 1: /* Bound violation. */
416 info = mpx_generate_siginfo(regs, xsave_buf); 415 info = mpx_generate_siginfo(regs);
417 if (IS_ERR(info)) { 416 if (IS_ERR(info)) {
418 /* 417 /*
419 * We failed to decode the MPX instruction. Act as if 418 * We failed to decode the MPX instruction. Act as if
@@ -709,8 +708,8 @@ NOKPROBE_SYMBOL(do_debug);
709static void math_error(struct pt_regs *regs, int error_code, int trapnr) 708static void math_error(struct pt_regs *regs, int error_code, int trapnr)
710{ 709{
711 struct task_struct *task = current; 710 struct task_struct *task = current;
711 struct fpu *fpu = &task->thread.fpu;
712 siginfo_t info; 712 siginfo_t info;
713 unsigned short err;
714 char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : 713 char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
715 "simd exception"; 714 "simd exception";
716 715
@@ -718,8 +717,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
718 return; 717 return;
719 conditional_sti(regs); 718 conditional_sti(regs);
720 719
721 if (!user_mode(regs)) 720 if (!user_mode(regs)) {
722 {
723 if (!fixup_exception(regs)) { 721 if (!fixup_exception(regs)) {
724 task->thread.error_code = error_code; 722 task->thread.error_code = error_code;
725 task->thread.trap_nr = trapnr; 723 task->thread.trap_nr = trapnr;
@@ -731,62 +729,20 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
731 /* 729 /*
732 * Save the info for the exception handler and clear the error. 730 * Save the info for the exception handler and clear the error.
733 */ 731 */
734 unlazy_fpu(task); 732 fpu__save(fpu);
735 task->thread.trap_nr = trapnr; 733
734 task->thread.trap_nr = trapnr;
736 task->thread.error_code = error_code; 735 task->thread.error_code = error_code;
737 info.si_signo = SIGFPE; 736 info.si_signo = SIGFPE;
738 info.si_errno = 0; 737 info.si_errno = 0;
739 info.si_addr = (void __user *)uprobe_get_trap_addr(regs); 738 info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
740 if (trapnr == X86_TRAP_MF) {
741 unsigned short cwd, swd;
742 /*
743 * (~cwd & swd) will mask out exceptions that are not set to unmasked
744 * status. 0x3f is the exception bits in these regs, 0x200 is the
745 * C1 reg you need in case of a stack fault, 0x040 is the stack
746 * fault bit. We should only be taking one exception at a time,
747 * so if this combination doesn't produce any single exception,
748 * then we have a bad program that isn't synchronizing its FPU usage
749 * and it will suffer the consequences since we won't be able to
750 * fully reproduce the context of the exception
751 */
752 cwd = get_fpu_cwd(task);
753 swd = get_fpu_swd(task);
754 739
755 err = swd & ~cwd; 740 info.si_code = fpu__exception_code(fpu, trapnr);
756 } else {
757 /*
758 * The SIMD FPU exceptions are handled a little differently, as there
759 * is only a single status/control register. Thus, to determine which
760 * unmasked exception was caught we must mask the exception mask bits
761 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
762 */
763 unsigned short mxcsr = get_fpu_mxcsr(task);
764 err = ~(mxcsr >> 7) & mxcsr;
765 }
766 741
767 if (err & 0x001) { /* Invalid op */ 742 /* Retry when we get spurious exceptions: */
768 /* 743 if (!info.si_code)
769 * swd & 0x240 == 0x040: Stack Underflow
770 * swd & 0x240 == 0x240: Stack Overflow
771 * User must clear the SF bit (0x40) if set
772 */
773 info.si_code = FPE_FLTINV;
774 } else if (err & 0x004) { /* Divide by Zero */
775 info.si_code = FPE_FLTDIV;
776 } else if (err & 0x008) { /* Overflow */
777 info.si_code = FPE_FLTOVF;
778 } else if (err & 0x012) { /* Denormal, Underflow */
779 info.si_code = FPE_FLTUND;
780 } else if (err & 0x020) { /* Precision */
781 info.si_code = FPE_FLTRES;
782 } else {
783 /*
784 * If we're using IRQ 13, or supposedly even some trap
785 * X86_TRAP_MF implementations, it's possible
786 * we get a spurious trap, which is not an error.
787 */
788 return; 744 return;
789 } 745
790 force_sig_info(SIGFPE, &info, task); 746 force_sig_info(SIGFPE, &info, task);
791} 747}
792 748
@@ -827,48 +783,6 @@ asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void)
827{ 783{
828} 784}
829 785
830/*
831 * 'math_state_restore()' saves the current math information in the
832 * old math state array, and gets the new ones from the current task
833 *
834 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
835 * Don't touch unless you *really* know how it works.
836 *
837 * Must be called with kernel preemption disabled (eg with local
838 * local interrupts as in the case of do_device_not_available).
839 */
840void math_state_restore(void)
841{
842 struct task_struct *tsk = current;
843
844 if (!tsk_used_math(tsk)) {
845 local_irq_enable();
846 /*
847 * does a slab alloc which can sleep
848 */
849 if (init_fpu(tsk)) {
850 /*
851 * ran out of memory!
852 */
853 do_group_exit(SIGKILL);
854 return;
855 }
856 local_irq_disable();
857 }
858
859 /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */
860 kernel_fpu_disable();
861 __thread_fpu_begin(tsk);
862 if (unlikely(restore_fpu_checking(tsk))) {
863 fpu_reset_state(tsk);
864 force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
865 } else {
866 tsk->thread.fpu_counter++;
867 }
868 kernel_fpu_enable();
869}
870EXPORT_SYMBOL_GPL(math_state_restore);
871
872dotraplinkage void 786dotraplinkage void
873do_device_not_available(struct pt_regs *regs, long error_code) 787do_device_not_available(struct pt_regs *regs, long error_code)
874{ 788{
@@ -889,7 +803,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
889 return; 803 return;
890 } 804 }
891#endif 805#endif
892 math_state_restore(); /* interrupts still off */ 806 fpu__restore(&current->thread.fpu); /* interrupts still off */
893#ifdef CONFIG_X86_32 807#ifdef CONFIG_X86_32
894 conditional_sti(regs); 808 conditional_sti(regs);
895#endif 809#endif
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 0b81ad67da07..66476244731e 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -29,6 +29,7 @@
29#include <linux/kdebug.h> 29#include <linux/kdebug.h>
30#include <asm/processor.h> 30#include <asm/processor.h>
31#include <asm/insn.h> 31#include <asm/insn.h>
32#include <asm/mmu_context.h>
32 33
33/* Post-execution fixups. */ 34/* Post-execution fixups. */
34 35
@@ -312,11 +313,6 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
312} 313}
313 314
314#ifdef CONFIG_X86_64 315#ifdef CONFIG_X86_64
315static inline bool is_64bit_mm(struct mm_struct *mm)
316{
317 return !config_enabled(CONFIG_IA32_EMULATION) ||
318 !(mm->context.ia32_compat == TIF_IA32);
319}
320/* 316/*
321 * If arch_uprobe->insn doesn't use rip-relative addressing, return 317 * If arch_uprobe->insn doesn't use rip-relative addressing, return
322 * immediately. Otherwise, rewrite the instruction so that it accesses 318 * immediately. Otherwise, rewrite the instruction so that it accesses
@@ -497,10 +493,6 @@ static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
497 } 493 }
498} 494}
499#else /* 32-bit: */ 495#else /* 32-bit: */
500static inline bool is_64bit_mm(struct mm_struct *mm)
501{
502 return false;
503}
504/* 496/*
505 * No RIP-relative addressing on 32-bit 497 * No RIP-relative addressing on 32-bit
506 */ 498 */
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
deleted file mode 100644
index 87a815b85f3e..000000000000
--- a/arch/x86/kernel/xsave.c
+++ /dev/null
@@ -1,724 +0,0 @@
1/*
2 * xsave/xrstor support.
3 *
4 * Author: Suresh Siddha <suresh.b.siddha@intel.com>
5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
9#include <linux/bootmem.h>
10#include <linux/compat.h>
11#include <linux/cpu.h>
12#include <asm/i387.h>
13#include <asm/fpu-internal.h>
14#include <asm/sigframe.h>
15#include <asm/tlbflush.h>
16#include <asm/xcr.h>
17
18/*
19 * Supported feature mask by the CPU and the kernel.
20 */
21u64 pcntxt_mask;
22
23/*
24 * Represents init state for the supported extended state.
25 */
26struct xsave_struct *init_xstate_buf;
27
28static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
29static unsigned int *xstate_offsets, *xstate_sizes;
30static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8];
31static unsigned int xstate_features;
32
33/*
34 * If a processor implementation discern that a processor state component is
35 * in its initialized state it may modify the corresponding bit in the
36 * xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory
37 * layout in the case of xsaveopt. While presenting the xstate information to
38 * the user, we always ensure that the memory layout of a feature will be in
39 * the init state if the corresponding header bit is zero. This is to ensure
40 * that the user doesn't see some stale state in the memory layout during
41 * signal handling, debugging etc.
42 */
43void __sanitize_i387_state(struct task_struct *tsk)
44{
45 struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave;
46 int feature_bit = 0x2;
47 u64 xstate_bv;
48
49 if (!fx)
50 return;
51
52 xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv;
53
54 /*
55 * None of the feature bits are in init state. So nothing else
56 * to do for us, as the memory layout is up to date.
57 */
58 if ((xstate_bv & pcntxt_mask) == pcntxt_mask)
59 return;
60
61 /*
62 * FP is in init state
63 */
64 if (!(xstate_bv & XSTATE_FP)) {
65 fx->cwd = 0x37f;
66 fx->swd = 0;
67 fx->twd = 0;
68 fx->fop = 0;
69 fx->rip = 0;
70 fx->rdp = 0;
71 memset(&fx->st_space[0], 0, 128);
72 }
73
74 /*
75 * SSE is in init state
76 */
77 if (!(xstate_bv & XSTATE_SSE))
78 memset(&fx->xmm_space[0], 0, 256);
79
80 xstate_bv = (pcntxt_mask & ~xstate_bv) >> 2;
81
82 /*
83 * Update all the other memory layouts for which the corresponding
84 * header bit is in the init state.
85 */
86 while (xstate_bv) {
87 if (xstate_bv & 0x1) {
88 int offset = xstate_offsets[feature_bit];
89 int size = xstate_sizes[feature_bit];
90
91 memcpy(((void *) fx) + offset,
92 ((void *) init_xstate_buf) + offset,
93 size);
94 }
95
96 xstate_bv >>= 1;
97 feature_bit++;
98 }
99}
100
101/*
102 * Check for the presence of extended state information in the
103 * user fpstate pointer in the sigcontext.
104 */
105static inline int check_for_xstate(struct i387_fxsave_struct __user *buf,
106 void __user *fpstate,
107 struct _fpx_sw_bytes *fx_sw)
108{
109 int min_xstate_size = sizeof(struct i387_fxsave_struct) +
110 sizeof(struct xsave_hdr_struct);
111 unsigned int magic2;
112
113 if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw)))
114 return -1;
115
116 /* Check for the first magic field and other error scenarios. */
117 if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
118 fx_sw->xstate_size < min_xstate_size ||
119 fx_sw->xstate_size > xstate_size ||
120 fx_sw->xstate_size > fx_sw->extended_size)
121 return -1;
122
123 /*
124 * Check for the presence of second magic word at the end of memory
125 * layout. This detects the case where the user just copied the legacy
126 * fpstate layout with out copying the extended state information
127 * in the memory layout.
128 */
129 if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size))
130 || magic2 != FP_XSTATE_MAGIC2)
131 return -1;
132
133 return 0;
134}
135
136/*
137 * Signal frame handlers.
138 */
139static inline int save_fsave_header(struct task_struct *tsk, void __user *buf)
140{
141 if (use_fxsr()) {
142 struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
143 struct user_i387_ia32_struct env;
144 struct _fpstate_ia32 __user *fp = buf;
145
146 convert_from_fxsr(&env, tsk);
147
148 if (__copy_to_user(buf, &env, sizeof(env)) ||
149 __put_user(xsave->i387.swd, &fp->status) ||
150 __put_user(X86_FXSR_MAGIC, &fp->magic))
151 return -1;
152 } else {
153 struct i387_fsave_struct __user *fp = buf;
154 u32 swd;
155 if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status))
156 return -1;
157 }
158
159 return 0;
160}
161
162static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
163{
164 struct xsave_struct __user *x = buf;
165 struct _fpx_sw_bytes *sw_bytes;
166 u32 xstate_bv;
167 int err;
168
169 /* Setup the bytes not touched by the [f]xsave and reserved for SW. */
170 sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved;
171 err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes));
172
173 if (!use_xsave())
174 return err;
175
176 err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
177
178 /*
179 * Read the xstate_bv which we copied (directly from the cpu or
180 * from the state in task struct) to the user buffers.
181 */
182 err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
183
184 /*
185 * For legacy compatible, we always set FP/SSE bits in the bit
186 * vector while saving the state to the user context. This will
187 * enable us capturing any changes(during sigreturn) to
188 * the FP/SSE bits by the legacy applications which don't touch
189 * xstate_bv in the xsave header.
190 *
191 * xsave aware apps can change the xstate_bv in the xsave
192 * header as well as change any contents in the memory layout.
193 * xrestore as part of sigreturn will capture all the changes.
194 */
195 xstate_bv |= XSTATE_FPSSE;
196
197 err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv);
198
199 return err;
200}
201
202static inline int save_user_xstate(struct xsave_struct __user *buf)
203{
204 int err;
205
206 if (use_xsave())
207 err = xsave_user(buf);
208 else if (use_fxsr())
209 err = fxsave_user((struct i387_fxsave_struct __user *) buf);
210 else
211 err = fsave_user((struct i387_fsave_struct __user *) buf);
212
213 if (unlikely(err) && __clear_user(buf, xstate_size))
214 err = -EFAULT;
215 return err;
216}
217
218/*
219 * Save the fpu, extended register state to the user signal frame.
220 *
221 * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save
222 * state is copied.
223 * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'.
224 *
225 * buf == buf_fx for 64-bit frames and 32-bit fsave frame.
226 * buf != buf_fx for 32-bit frames with fxstate.
227 *
228 * If the fpu, extended register state is live, save the state directly
229 * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
230 * copy the thread's fpu state to the user frame starting at 'buf_fx'.
231 *
232 * If this is a 32-bit frame with fxstate, put a fsave header before
233 * the aligned state at 'buf_fx'.
234 *
235 * For [f]xsave state, update the SW reserved fields in the [f]xsave frame
236 * indicating the absence/presence of the extended state to the user.
237 */
238int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
239{
240 struct xsave_struct *xsave = &current->thread.fpu.state->xsave;
241 struct task_struct *tsk = current;
242 int ia32_fxstate = (buf != buf_fx);
243
244 ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
245 config_enabled(CONFIG_IA32_EMULATION));
246
247 if (!access_ok(VERIFY_WRITE, buf, size))
248 return -EACCES;
249
250 if (!static_cpu_has(X86_FEATURE_FPU))
251 return fpregs_soft_get(current, NULL, 0,
252 sizeof(struct user_i387_ia32_struct), NULL,
253 (struct _fpstate_ia32 __user *) buf) ? -1 : 1;
254
255 if (user_has_fpu()) {
256 /* Save the live register state to the user directly. */
257 if (save_user_xstate(buf_fx))
258 return -1;
259 /* Update the thread's fxstate to save the fsave header. */
260 if (ia32_fxstate)
261 fpu_fxsave(&tsk->thread.fpu);
262 } else {
263 sanitize_i387_state(tsk);
264 if (__copy_to_user(buf_fx, xsave, xstate_size))
265 return -1;
266 }
267
268 /* Save the fsave header for the 32-bit frames. */
269 if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf))
270 return -1;
271
272 if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate))
273 return -1;
274
275 return 0;
276}
277
278static inline void
279sanitize_restored_xstate(struct task_struct *tsk,
280 struct user_i387_ia32_struct *ia32_env,
281 u64 xstate_bv, int fx_only)
282{
283 struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
284 struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr;
285
286 if (use_xsave()) {
287 /* These bits must be zero. */
288 memset(xsave_hdr->reserved, 0, 48);
289
290 /*
291 * Init the state that is not present in the memory
292 * layout and not enabled by the OS.
293 */
294 if (fx_only)
295 xsave_hdr->xstate_bv = XSTATE_FPSSE;
296 else
297 xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv);
298 }
299
300 if (use_fxsr()) {
301 /*
302 * mscsr reserved bits must be masked to zero for security
303 * reasons.
304 */
305 xsave->i387.mxcsr &= mxcsr_feature_mask;
306
307 convert_to_fxsr(tsk, ia32_env);
308 }
309}
310
311/*
312 * Restore the extended state if present. Otherwise, restore the FP/SSE state.
313 */
314static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only)
315{
316 if (use_xsave()) {
317 if ((unsigned long)buf % 64 || fx_only) {
318 u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE;
319 xrstor_state(init_xstate_buf, init_bv);
320 return fxrstor_user(buf);
321 } else {
322 u64 init_bv = pcntxt_mask & ~xbv;
323 if (unlikely(init_bv))
324 xrstor_state(init_xstate_buf, init_bv);
325 return xrestore_user(buf, xbv);
326 }
327 } else if (use_fxsr()) {
328 return fxrstor_user(buf);
329 } else
330 return frstor_user(buf);
331}
332
333int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
334{
335 int ia32_fxstate = (buf != buf_fx);
336 struct task_struct *tsk = current;
337 int state_size = xstate_size;
338 u64 xstate_bv = 0;
339 int fx_only = 0;
340
341 ia32_fxstate &= (config_enabled(CONFIG_X86_32) ||
342 config_enabled(CONFIG_IA32_EMULATION));
343
344 if (!buf) {
345 fpu_reset_state(tsk);
346 return 0;
347 }
348
349 if (!access_ok(VERIFY_READ, buf, size))
350 return -EACCES;
351
352 if (!used_math() && init_fpu(tsk))
353 return -1;
354
355 if (!static_cpu_has(X86_FEATURE_FPU))
356 return fpregs_soft_set(current, NULL,
357 0, sizeof(struct user_i387_ia32_struct),
358 NULL, buf) != 0;
359
360 if (use_xsave()) {
361 struct _fpx_sw_bytes fx_sw_user;
362 if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) {
363 /*
364 * Couldn't find the extended state information in the
365 * memory layout. Restore just the FP/SSE and init all
366 * the other extended state.
367 */
368 state_size = sizeof(struct i387_fxsave_struct);
369 fx_only = 1;
370 } else {
371 state_size = fx_sw_user.xstate_size;
372 xstate_bv = fx_sw_user.xstate_bv;
373 }
374 }
375
376 if (ia32_fxstate) {
377 /*
378 * For 32-bit frames with fxstate, copy the user state to the
379 * thread's fpu state, reconstruct fxstate from the fsave
380 * header. Sanitize the copied state etc.
381 */
382 struct fpu *fpu = &tsk->thread.fpu;
383 struct user_i387_ia32_struct env;
384 int err = 0;
385
386 /*
387 * Drop the current fpu which clears used_math(). This ensures
388 * that any context-switch during the copy of the new state,
389 * avoids the intermediate state from getting restored/saved.
390 * Thus avoiding the new restored state from getting corrupted.
391 * We will be ready to restore/save the state only after
392 * set_used_math() is again set.
393 */
394 drop_fpu(tsk);
395
396 if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) ||
397 __copy_from_user(&env, buf, sizeof(env))) {
398 fpu_finit(fpu);
399 err = -1;
400 } else {
401 sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
402 }
403
404 set_used_math();
405 if (use_eager_fpu()) {
406 preempt_disable();
407 math_state_restore();
408 preempt_enable();
409 }
410
411 return err;
412 } else {
413 /*
414 * For 64-bit frames and 32-bit fsave frames, restore the user
415 * state to the registers directly (with exceptions handled).
416 */
417 user_fpu_begin();
418 if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
419 fpu_reset_state(tsk);
420 return -1;
421 }
422 }
423
424 return 0;
425}
426
427/*
428 * Prepare the SW reserved portion of the fxsave memory layout, indicating
429 * the presence of the extended state information in the memory layout
430 * pointed by the fpstate pointer in the sigcontext.
431 * This will be saved when ever the FP and extended state context is
432 * saved on the user stack during the signal handler delivery to the user.
433 */
434static void prepare_fx_sw_frame(void)
435{
436 int fsave_header_size = sizeof(struct i387_fsave_struct);
437 int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
438
439 if (config_enabled(CONFIG_X86_32))
440 size += fsave_header_size;
441
442 fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
443 fx_sw_reserved.extended_size = size;
444 fx_sw_reserved.xstate_bv = pcntxt_mask;
445 fx_sw_reserved.xstate_size = xstate_size;
446
447 if (config_enabled(CONFIG_IA32_EMULATION)) {
448 fx_sw_reserved_ia32 = fx_sw_reserved;
449 fx_sw_reserved_ia32.extended_size += fsave_header_size;
450 }
451}
452
453/*
454 * Enable the extended processor state save/restore feature
455 */
456static inline void xstate_enable(void)
457{
458 cr4_set_bits(X86_CR4_OSXSAVE);
459 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
460}
461
462/*
463 * Record the offsets and sizes of different state managed by the xsave
464 * memory layout.
465 */
466static void __init setup_xstate_features(void)
467{
468 int eax, ebx, ecx, edx, leaf = 0x2;
469
470 xstate_features = fls64(pcntxt_mask);
471 xstate_offsets = alloc_bootmem(xstate_features * sizeof(int));
472 xstate_sizes = alloc_bootmem(xstate_features * sizeof(int));
473
474 do {
475 cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx);
476
477 if (eax == 0)
478 break;
479
480 xstate_offsets[leaf] = ebx;
481 xstate_sizes[leaf] = eax;
482
483 leaf++;
484 } while (1);
485}
486
487/*
488 * This function sets up offsets and sizes of all extended states in
489 * xsave area. This supports both standard format and compacted format
490 * of the xsave aread.
491 *
492 * Input: void
493 * Output: void
494 */
495void setup_xstate_comp(void)
496{
497 unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8];
498 int i;
499
500 /*
501 * The FP xstates and SSE xstates are legacy states. They are always
502 * in the fixed offsets in the xsave area in either compacted form
503 * or standard form.
504 */
505 xstate_comp_offsets[0] = 0;
506 xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space);
507
508 if (!cpu_has_xsaves) {
509 for (i = 2; i < xstate_features; i++) {
510 if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
511 xstate_comp_offsets[i] = xstate_offsets[i];
512 xstate_comp_sizes[i] = xstate_sizes[i];
513 }
514 }
515 return;
516 }
517
518 xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
519
520 for (i = 2; i < xstate_features; i++) {
521 if (test_bit(i, (unsigned long *)&pcntxt_mask))
522 xstate_comp_sizes[i] = xstate_sizes[i];
523 else
524 xstate_comp_sizes[i] = 0;
525
526 if (i > 2)
527 xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
528 + xstate_comp_sizes[i-1];
529
530 }
531}
532
533/*
534 * setup the xstate image representing the init state
535 */
536static void __init setup_init_fpu_buf(void)
537{
538 /*
539 * Setup init_xstate_buf to represent the init state of
540 * all the features managed by the xsave
541 */
542 init_xstate_buf = alloc_bootmem_align(xstate_size,
543 __alignof__(struct xsave_struct));
544 fx_finit(&init_xstate_buf->i387);
545
546 if (!cpu_has_xsave)
547 return;
548
549 setup_xstate_features();
550
551 if (cpu_has_xsaves) {
552 init_xstate_buf->xsave_hdr.xcomp_bv =
553 (u64)1 << 63 | pcntxt_mask;
554 init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask;
555 }
556
557 /*
558 * Init all the features state with header_bv being 0x0
559 */
560 xrstor_state_booting(init_xstate_buf, -1);
561 /*
562 * Dump the init state again. This is to identify the init state
563 * of any feature which is not represented by all zero's.
564 */
565 xsave_state_booting(init_xstate_buf, -1);
566}
567
568static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
569static int __init eager_fpu_setup(char *s)
570{
571 if (!strcmp(s, "on"))
572 eagerfpu = ENABLE;
573 else if (!strcmp(s, "off"))
574 eagerfpu = DISABLE;
575 else if (!strcmp(s, "auto"))
576 eagerfpu = AUTO;
577 return 1;
578}
579__setup("eagerfpu=", eager_fpu_setup);
580
581
582/*
583 * Calculate total size of enabled xstates in XCR0/pcntxt_mask.
584 */
585static void __init init_xstate_size(void)
586{
587 unsigned int eax, ebx, ecx, edx;
588 int i;
589
590 if (!cpu_has_xsaves) {
591 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
592 xstate_size = ebx;
593 return;
594 }
595
596 xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
597 for (i = 2; i < 64; i++) {
598 if (test_bit(i, (unsigned long *)&pcntxt_mask)) {
599 cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
600 xstate_size += eax;
601 }
602 }
603}
604
605/*
606 * Enable and initialize the xsave feature.
607 */
608static void __init xstate_enable_boot_cpu(void)
609{
610 unsigned int eax, ebx, ecx, edx;
611
612 if (boot_cpu_data.cpuid_level < XSTATE_CPUID) {
613 WARN(1, KERN_ERR "XSTATE_CPUID missing\n");
614 return;
615 }
616
617 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
618 pcntxt_mask = eax + ((u64)edx << 32);
619
620 if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
621 pr_err("FP/SSE not shown under xsave features 0x%llx\n",
622 pcntxt_mask);
623 BUG();
624 }
625
626 /*
627 * Support only the state known to OS.
628 */
629 pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
630
631 xstate_enable();
632
633 /*
634 * Recompute the context size for enabled features
635 */
636 init_xstate_size();
637
638 update_regset_xstate_info(xstate_size, pcntxt_mask);
639 prepare_fx_sw_frame();
640 setup_init_fpu_buf();
641
642 /* Auto enable eagerfpu for xsaveopt */
643 if (cpu_has_xsaveopt && eagerfpu != DISABLE)
644 eagerfpu = ENABLE;
645
646 if (pcntxt_mask & XSTATE_EAGER) {
647 if (eagerfpu == DISABLE) {
648 pr_err("eagerfpu not present, disabling some xstate features: 0x%llx\n",
649 pcntxt_mask & XSTATE_EAGER);
650 pcntxt_mask &= ~XSTATE_EAGER;
651 } else {
652 eagerfpu = ENABLE;
653 }
654 }
655
656 pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n",
657 pcntxt_mask, xstate_size,
658 cpu_has_xsaves ? "compacted form" : "standard form");
659}
660
661/*
662 * For the very first instance, this calls xstate_enable_boot_cpu();
663 * for all subsequent instances, this calls xstate_enable().
664 *
665 * This is somewhat obfuscated due to the lack of powerful enough
666 * overrides for the section checks.
667 */
668void xsave_init(void)
669{
670 static __refdata void (*next_func)(void) = xstate_enable_boot_cpu;
671 void (*this_func)(void);
672
673 if (!cpu_has_xsave)
674 return;
675
676 this_func = next_func;
677 next_func = xstate_enable;
678 this_func();
679}
680
681/*
682 * setup_init_fpu_buf() is __init and it is OK to call it here because
683 * init_xstate_buf will be unset only once during boot.
684 */
685void __init_refok eager_fpu_init(void)
686{
687 WARN_ON(used_math());
688 current_thread_info()->status = 0;
689
690 if (eagerfpu == ENABLE)
691 setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
692
693 if (!cpu_has_eager_fpu) {
694 stts();
695 return;
696 }
697
698 if (!init_xstate_buf)
699 setup_init_fpu_buf();
700}
701
702/*
703 * Given the xsave area and a state inside, this function returns the
704 * address of the state.
705 *
706 * This is the API that is called to get xstate address in either
707 * standard format or compacted format of xsave area.
708 *
709 * Inputs:
710 * xsave: base address of the xsave area;
711 * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE,
712 * etc.)
713 * Output:
714 * address of the state in the xsave area.
715 */
716void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
717{
718 int feature = fls64(xstate) - 1;
719 if (!test_bit(feature, (unsigned long *)&pcntxt_mask))
720 return NULL;
721
722 return (void *)xsave + xstate_comp_offsets[feature];
723}
724EXPORT_SYMBOL_GPL(get_xsave_addr);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 1d08ad3582d0..9f705e618af5 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -16,10 +16,8 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/vmalloc.h> 17#include <linux/vmalloc.h>
18#include <linux/uaccess.h> 18#include <linux/uaccess.h>
19#include <asm/i387.h> /* For use_eager_fpu. Ugh! */
20#include <asm/fpu-internal.h> /* For use_eager_fpu. Ugh! */
21#include <asm/user.h> 19#include <asm/user.h>
22#include <asm/xsave.h> 20#include <asm/fpu/xstate.h>
23#include "cpuid.h" 21#include "cpuid.h"
24#include "lapic.h" 22#include "lapic.h"
25#include "mmu.h" 23#include "mmu.h"
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2d73807f0d31..e11dd59398f1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -40,8 +40,7 @@
40#include <asm/vmx.h> 40#include <asm/vmx.h>
41#include <asm/virtext.h> 41#include <asm/virtext.h>
42#include <asm/mce.h> 42#include <asm/mce.h>
43#include <asm/i387.h> 43#include <asm/fpu/internal.h>
44#include <asm/xcr.h>
45#include <asm/perf_event.h> 44#include <asm/perf_event.h>
46#include <asm/debugreg.h> 45#include <asm/debugreg.h>
47#include <asm/kexec.h> 46#include <asm/kexec.h>
@@ -1883,7 +1882,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
1883 * If the FPU is not active (through the host task or 1882 * If the FPU is not active (through the host task or
1884 * the guest vcpu), then restore the cr0.TS bit. 1883 * the guest vcpu), then restore the cr0.TS bit.
1885 */ 1884 */
1886 if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded) 1885 if (!fpregs_active() && !vmx->vcpu.guest_fpu_loaded)
1887 stts(); 1886 stts();
1888 load_gdt(this_cpu_ptr(&host_gdt)); 1887 load_gdt(this_cpu_ptr(&host_gdt));
1889} 1888}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ea306adbbc13..26eaeb522cab 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -59,9 +59,8 @@
59#include <asm/desc.h> 59#include <asm/desc.h>
60#include <asm/mtrr.h> 60#include <asm/mtrr.h>
61#include <asm/mce.h> 61#include <asm/mce.h>
62#include <asm/i387.h> 62#include <linux/kernel_stat.h>
63#include <asm/fpu-internal.h> /* Ugh! */ 63#include <asm/fpu/internal.h> /* Ugh! */
64#include <asm/xcr.h>
65#include <asm/pvclock.h> 64#include <asm/pvclock.h>
66#include <asm/div64.h> 65#include <asm/div64.h>
67 66
@@ -3194,8 +3193,8 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
3194 3193
3195static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) 3194static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
3196{ 3195{
3197 struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave; 3196 struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
3198 u64 xstate_bv = xsave->xsave_hdr.xstate_bv; 3197 u64 xstate_bv = xsave->header.xfeatures;
3199 u64 valid; 3198 u64 valid;
3200 3199
3201 /* 3200 /*
@@ -3230,7 +3229,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
3230 3229
3231static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) 3230static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
3232{ 3231{
3233 struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave; 3232 struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
3234 u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET); 3233 u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
3235 u64 valid; 3234 u64 valid;
3236 3235
@@ -3241,9 +3240,9 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
3241 memcpy(xsave, src, XSAVE_HDR_OFFSET); 3240 memcpy(xsave, src, XSAVE_HDR_OFFSET);
3242 3241
3243 /* Set XSTATE_BV and possibly XCOMP_BV. */ 3242 /* Set XSTATE_BV and possibly XCOMP_BV. */
3244 xsave->xsave_hdr.xstate_bv = xstate_bv; 3243 xsave->header.xfeatures = xstate_bv;
3245 if (cpu_has_xsaves) 3244 if (cpu_has_xsaves)
3246 xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; 3245 xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
3247 3246
3248 /* 3247 /*
3249 * Copy each region from the non-compacted offset to the 3248 * Copy each region from the non-compacted offset to the
@@ -3275,8 +3274,8 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
3275 fill_xsave((u8 *) guest_xsave->region, vcpu); 3274 fill_xsave((u8 *) guest_xsave->region, vcpu);
3276 } else { 3275 } else {
3277 memcpy(guest_xsave->region, 3276 memcpy(guest_xsave->region,
3278 &vcpu->arch.guest_fpu.state->fxsave, 3277 &vcpu->arch.guest_fpu.state.fxsave,
3279 sizeof(struct i387_fxsave_struct)); 3278 sizeof(struct fxregs_state));
3280 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = 3279 *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
3281 XSTATE_FPSSE; 3280 XSTATE_FPSSE;
3282 } 3281 }
@@ -3300,8 +3299,8 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
3300 } else { 3299 } else {
3301 if (xstate_bv & ~XSTATE_FPSSE) 3300 if (xstate_bv & ~XSTATE_FPSSE)
3302 return -EINVAL; 3301 return -EINVAL;
3303 memcpy(&vcpu->arch.guest_fpu.state->fxsave, 3302 memcpy(&vcpu->arch.guest_fpu.state.fxsave,
3304 guest_xsave->region, sizeof(struct i387_fxsave_struct)); 3303 guest_xsave->region, sizeof(struct fxregs_state));
3305 } 3304 }
3306 return 0; 3305 return 0;
3307} 3306}
@@ -6597,11 +6596,11 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
6597 6596
6598int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 6597int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
6599{ 6598{
6599 struct fpu *fpu = &current->thread.fpu;
6600 int r; 6600 int r;
6601 sigset_t sigsaved; 6601 sigset_t sigsaved;
6602 6602
6603 if (!tsk_used_math(current) && init_fpu(current)) 6603 fpu__activate_curr(fpu);
6604 return -ENOMEM;
6605 6604
6606 if (vcpu->sigset_active) 6605 if (vcpu->sigset_active)
6607 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 6606 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
@@ -6971,8 +6970,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
6971 6970
6972int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 6971int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6973{ 6972{
6974 struct i387_fxsave_struct *fxsave = 6973 struct fxregs_state *fxsave =
6975 &vcpu->arch.guest_fpu.state->fxsave; 6974 &vcpu->arch.guest_fpu.state.fxsave;
6976 6975
6977 memcpy(fpu->fpr, fxsave->st_space, 128); 6976 memcpy(fpu->fpr, fxsave->st_space, 128);
6978 fpu->fcw = fxsave->cwd; 6977 fpu->fcw = fxsave->cwd;
@@ -6988,8 +6987,8 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6988 6987
6989int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 6988int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
6990{ 6989{
6991 struct i387_fxsave_struct *fxsave = 6990 struct fxregs_state *fxsave =
6992 &vcpu->arch.guest_fpu.state->fxsave; 6991 &vcpu->arch.guest_fpu.state.fxsave;
6993 6992
6994 memcpy(fxsave->st_space, fpu->fpr, 128); 6993 memcpy(fxsave->st_space, fpu->fpr, 128);
6995 fxsave->cwd = fpu->fcw; 6994 fxsave->cwd = fpu->fcw;
@@ -7003,17 +7002,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
7003 return 0; 7002 return 0;
7004} 7003}
7005 7004
7006int fx_init(struct kvm_vcpu *vcpu) 7005static void fx_init(struct kvm_vcpu *vcpu)
7007{ 7006{
7008 int err; 7007 fpstate_init(&vcpu->arch.guest_fpu.state);
7009
7010 err = fpu_alloc(&vcpu->arch.guest_fpu);
7011 if (err)
7012 return err;
7013
7014 fpu_finit(&vcpu->arch.guest_fpu);
7015 if (cpu_has_xsaves) 7008 if (cpu_has_xsaves)
7016 vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv = 7009 vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
7017 host_xcr0 | XSTATE_COMPACTION_ENABLED; 7010 host_xcr0 | XSTATE_COMPACTION_ENABLED;
7018 7011
7019 /* 7012 /*
@@ -7022,14 +7015,6 @@ int fx_init(struct kvm_vcpu *vcpu)
7022 vcpu->arch.xcr0 = XSTATE_FP; 7015 vcpu->arch.xcr0 = XSTATE_FP;
7023 7016
7024 vcpu->arch.cr0 |= X86_CR0_ET; 7017 vcpu->arch.cr0 |= X86_CR0_ET;
7025
7026 return 0;
7027}
7028EXPORT_SYMBOL_GPL(fx_init);
7029
7030static void fx_free(struct kvm_vcpu *vcpu)
7031{
7032 fpu_free(&vcpu->arch.guest_fpu);
7033} 7018}
7034 7019
7035void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 7020void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
@@ -7045,7 +7030,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
7045 kvm_put_guest_xcr0(vcpu); 7030 kvm_put_guest_xcr0(vcpu);
7046 vcpu->guest_fpu_loaded = 1; 7031 vcpu->guest_fpu_loaded = 1;
7047 __kernel_fpu_begin(); 7032 __kernel_fpu_begin();
7048 fpu_restore_checking(&vcpu->arch.guest_fpu); 7033 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
7049 trace_kvm_fpu(1); 7034 trace_kvm_fpu(1);
7050} 7035}
7051 7036
@@ -7057,7 +7042,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
7057 return; 7042 return;
7058 7043
7059 vcpu->guest_fpu_loaded = 0; 7044 vcpu->guest_fpu_loaded = 0;
7060 fpu_save_init(&vcpu->arch.guest_fpu); 7045 copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
7061 __kernel_fpu_end(); 7046 __kernel_fpu_end();
7062 ++vcpu->stat.fpu_reload; 7047 ++vcpu->stat.fpu_reload;
7063 if (!vcpu->arch.eager_fpu) 7048 if (!vcpu->arch.eager_fpu)
@@ -7071,7 +7056,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
7071 kvmclock_reset(vcpu); 7056 kvmclock_reset(vcpu);
7072 7057
7073 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); 7058 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
7074 fx_free(vcpu);
7075 kvm_x86_ops->vcpu_free(vcpu); 7059 kvm_x86_ops->vcpu_free(vcpu);
7076} 7060}
7077 7061
@@ -7137,7 +7121,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
7137 kvm_mmu_unload(vcpu); 7121 kvm_mmu_unload(vcpu);
7138 vcpu_put(vcpu); 7122 vcpu_put(vcpu);
7139 7123
7140 fx_free(vcpu);
7141 kvm_x86_ops->vcpu_free(vcpu); 7124 kvm_x86_ops->vcpu_free(vcpu);
7142} 7125}
7143 7126
@@ -7363,9 +7346,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
7363 goto fail_free_mce_banks; 7346 goto fail_free_mce_banks;
7364 } 7347 }
7365 7348
7366 r = fx_init(vcpu); 7349 fx_init(vcpu);
7367 if (r)
7368 goto fail_free_wbinvd_dirty_mask;
7369 7350
7370 vcpu->arch.ia32_tsc_adjust_msr = 0x0; 7351 vcpu->arch.ia32_tsc_adjust_msr = 0x0;
7371 vcpu->arch.pv_time_enabled = false; 7352 vcpu->arch.pv_time_enabled = false;
@@ -7379,8 +7360,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
7379 kvm_pmu_init(vcpu); 7360 kvm_pmu_init(vcpu);
7380 7361
7381 return 0; 7362 return 0;
7382fail_free_wbinvd_dirty_mask: 7363
7383 free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
7384fail_free_mce_banks: 7364fail_free_mce_banks:
7385 kfree(vcpu->arch.mce_banks); 7365 kfree(vcpu->arch.mce_banks);
7386fail_free_lapic: 7366fail_free_lapic:
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 8f9a133cc099..27f8eea0d6eb 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -70,7 +70,7 @@
70#include <asm/e820.h> 70#include <asm/e820.h>
71#include <asm/mce.h> 71#include <asm/mce.h>
72#include <asm/io.h> 72#include <asm/io.h>
73#include <asm/i387.h> 73#include <asm/fpu/api.h>
74#include <asm/stackprotector.h> 74#include <asm/stackprotector.h>
75#include <asm/reboot.h> /* for struct machine_ops */ 75#include <asm/reboot.h> /* for struct machine_ops */
76#include <asm/kvm_para.h> 76#include <asm/kvm_para.h>
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index c9f2d9ba8dd8..e5e3ed8dc079 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -22,7 +22,7 @@
22#include <linux/sched.h> 22#include <linux/sched.h>
23#include <linux/types.h> 23#include <linux/types.h>
24 24
25#include <asm/i387.h> 25#include <asm/fpu/api.h>
26#include <asm/asm.h> 26#include <asm/asm.h>
27 27
28void *_mmx_memcpy(void *to, const void *from, size_t len) 28void *_mmx_memcpy(void *to, const void *from, size_t len)
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c
index dc8adad10a2f..dd76a05729b0 100644
--- a/arch/x86/math-emu/fpu_aux.c
+++ b/arch/x86/math-emu/fpu_aux.c
@@ -30,7 +30,7 @@ static void fclex(void)
30} 30}
31 31
32/* Needs to be externally visible */ 32/* Needs to be externally visible */
33void finit_soft_fpu(struct i387_soft_struct *soft) 33void fpstate_init_soft(struct swregs_state *soft)
34{ 34{
35 struct address *oaddr, *iaddr; 35 struct address *oaddr, *iaddr;
36 memset(soft, 0, sizeof(*soft)); 36 memset(soft, 0, sizeof(*soft));
@@ -52,7 +52,7 @@ void finit_soft_fpu(struct i387_soft_struct *soft)
52 52
53void finit(void) 53void finit(void)
54{ 54{
55 finit_soft_fpu(&current->thread.fpu.state->soft); 55 fpstate_init_soft(&current->thread.fpu.state.soft);
56} 56}
57 57
58/* 58/*
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 9b868124128d..f37e84ab49f3 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -31,7 +31,7 @@
31#include <asm/traps.h> 31#include <asm/traps.h>
32#include <asm/desc.h> 32#include <asm/desc.h>
33#include <asm/user.h> 33#include <asm/user.h>
34#include <asm/i387.h> 34#include <asm/fpu/internal.h>
35 35
36#include "fpu_system.h" 36#include "fpu_system.h"
37#include "fpu_emu.h" 37#include "fpu_emu.h"
@@ -147,13 +147,9 @@ void math_emulate(struct math_emu_info *info)
147 unsigned long code_base = 0; 147 unsigned long code_base = 0;
148 unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ 148 unsigned long code_limit = 0; /* Initialized to stop compiler warnings */
149 struct desc_struct code_descriptor; 149 struct desc_struct code_descriptor;
150 struct fpu *fpu = &current->thread.fpu;
150 151
151 if (!used_math()) { 152 fpu__activate_curr(fpu);
152 if (init_fpu(current)) {
153 do_group_exit(SIGKILL);
154 return;
155 }
156 }
157 153
158#ifdef RE_ENTRANT_CHECKING 154#ifdef RE_ENTRANT_CHECKING
159 if (emulating) { 155 if (emulating) {
@@ -673,7 +669,7 @@ void math_abort(struct math_emu_info *info, unsigned int signal)
673#endif /* PARANOID */ 669#endif /* PARANOID */
674} 670}
675 671
676#define S387 ((struct i387_soft_struct *)s387) 672#define S387 ((struct swregs_state *)s387)
677#define sstatus_word() \ 673#define sstatus_word() \
678 ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top)) 674 ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top))
679 675
@@ -682,14 +678,14 @@ int fpregs_soft_set(struct task_struct *target,
682 unsigned int pos, unsigned int count, 678 unsigned int pos, unsigned int count,
683 const void *kbuf, const void __user *ubuf) 679 const void *kbuf, const void __user *ubuf)
684{ 680{
685 struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; 681 struct swregs_state *s387 = &target->thread.fpu.state.soft;
686 void *space = s387->st_space; 682 void *space = s387->st_space;
687 int ret; 683 int ret;
688 int offset, other, i, tags, regnr, tag, newtop; 684 int offset, other, i, tags, regnr, tag, newtop;
689 685
690 RE_ENTRANT_CHECK_OFF; 686 RE_ENTRANT_CHECK_OFF;
691 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, s387, 0, 687 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, s387, 0,
692 offsetof(struct i387_soft_struct, st_space)); 688 offsetof(struct swregs_state, st_space));
693 RE_ENTRANT_CHECK_ON; 689 RE_ENTRANT_CHECK_ON;
694 690
695 if (ret) 691 if (ret)
@@ -734,7 +730,7 @@ int fpregs_soft_get(struct task_struct *target,
734 unsigned int pos, unsigned int count, 730 unsigned int pos, unsigned int count,
735 void *kbuf, void __user *ubuf) 731 void *kbuf, void __user *ubuf)
736{ 732{
737 struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; 733 struct swregs_state *s387 = &target->thread.fpu.state.soft;
738 const void *space = s387->st_space; 734 const void *space = s387->st_space;
739 int ret; 735 int ret;
740 int offset = (S387->ftop & 7) * 10, other = 80 - offset; 736 int offset = (S387->ftop & 7) * 10, other = 80 - offset;
@@ -752,7 +748,7 @@ int fpregs_soft_get(struct task_struct *target,
752#endif /* PECULIAR_486 */ 748#endif /* PECULIAR_486 */
753 749
754 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, s387, 0, 750 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, s387, 0,
755 offsetof(struct i387_soft_struct, st_space)); 751 offsetof(struct swregs_state, st_space));
756 752
757 /* Copy all registers in stack order. */ 753 /* Copy all registers in stack order. */
758 if (!ret) 754 if (!ret)
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index 2c614410a5f3..9ccecb61a4fa 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -31,7 +31,7 @@
31#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ 31#define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \
32 == (1 << 10)) 32 == (1 << 10))
33 33
34#define I387 (current->thread.fpu.state) 34#define I387 (&current->thread.fpu.state)
35#define FPU_info (I387->soft.info) 35#define FPU_info (I387->soft.info)
36 36
37#define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) 37#define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs))
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index c439ec478216..7a657f58bbea 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -10,13 +10,15 @@
10#include <linux/syscalls.h> 10#include <linux/syscalls.h>
11#include <linux/sched/sysctl.h> 11#include <linux/sched/sysctl.h>
12 12
13#include <asm/i387.h>
14#include <asm/insn.h> 13#include <asm/insn.h>
15#include <asm/mman.h> 14#include <asm/mman.h>
16#include <asm/mmu_context.h> 15#include <asm/mmu_context.h>
17#include <asm/mpx.h> 16#include <asm/mpx.h>
18#include <asm/processor.h> 17#include <asm/processor.h>
19#include <asm/fpu-internal.h> 18#include <asm/fpu/internal.h>
19
20#define CREATE_TRACE_POINTS
21#include <asm/trace/mpx.h>
20 22
21static const char *mpx_mapping_name(struct vm_area_struct *vma) 23static const char *mpx_mapping_name(struct vm_area_struct *vma)
22{ 24{
@@ -32,6 +34,22 @@ static int is_mpx_vma(struct vm_area_struct *vma)
32 return (vma->vm_ops == &mpx_vma_ops); 34 return (vma->vm_ops == &mpx_vma_ops);
33} 35}
34 36
37static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm)
38{
39 if (is_64bit_mm(mm))
40 return MPX_BD_SIZE_BYTES_64;
41 else
42 return MPX_BD_SIZE_BYTES_32;
43}
44
45static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm)
46{
47 if (is_64bit_mm(mm))
48 return MPX_BT_SIZE_BYTES_64;
49 else
50 return MPX_BT_SIZE_BYTES_32;
51}
52
35/* 53/*
36 * This is really a simplified "vm_mmap". it only handles MPX 54 * This is really a simplified "vm_mmap". it only handles MPX
37 * bounds tables (the bounds directory is user-allocated). 55 * bounds tables (the bounds directory is user-allocated).
@@ -47,8 +65,8 @@ static unsigned long mpx_mmap(unsigned long len)
47 vm_flags_t vm_flags; 65 vm_flags_t vm_flags;
48 struct vm_area_struct *vma; 66 struct vm_area_struct *vma;
49 67
50 /* Only bounds table and bounds directory can be allocated here */ 68 /* Only bounds table can be allocated here */
51 if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES) 69 if (len != mpx_bt_size_bytes(mm))
52 return -EINVAL; 70 return -EINVAL;
53 71
54 down_write(&mm->mmap_sem); 72 down_write(&mm->mmap_sem);
@@ -272,10 +290,9 @@ bad_opcode:
272 * 290 *
273 * The caller is expected to kfree() the returned siginfo_t. 291 * The caller is expected to kfree() the returned siginfo_t.
274 */ 292 */
275siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, 293siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
276 struct xsave_struct *xsave_buf)
277{ 294{
278 struct bndreg *bndregs, *bndreg; 295 const struct bndreg *bndregs, *bndreg;
279 siginfo_t *info = NULL; 296 siginfo_t *info = NULL;
280 struct insn insn; 297 struct insn insn;
281 uint8_t bndregno; 298 uint8_t bndregno;
@@ -295,8 +312,8 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
295 err = -EINVAL; 312 err = -EINVAL;
296 goto err_out; 313 goto err_out;
297 } 314 }
298 /* get the bndregs _area_ of the xsave structure */ 315 /* get bndregs field from current task's xsave area */
299 bndregs = get_xsave_addr(xsave_buf, XSTATE_BNDREGS); 316 bndregs = get_xsave_field_ptr(XSTATE_BNDREGS);
300 if (!bndregs) { 317 if (!bndregs) {
301 err = -EINVAL; 318 err = -EINVAL;
302 goto err_out; 319 goto err_out;
@@ -334,6 +351,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
334 err = -EINVAL; 351 err = -EINVAL;
335 goto err_out; 352 goto err_out;
336 } 353 }
354 trace_mpx_bounds_register_exception(info->si_addr, bndreg);
337 return info; 355 return info;
338err_out: 356err_out:
339 /* info might be NULL, but kfree() handles that */ 357 /* info might be NULL, but kfree() handles that */
@@ -341,25 +359,18 @@ err_out:
341 return ERR_PTR(err); 359 return ERR_PTR(err);
342} 360}
343 361
344static __user void *task_get_bounds_dir(struct task_struct *tsk) 362static __user void *mpx_get_bounds_dir(void)
345{ 363{
346 struct bndcsr *bndcsr; 364 const struct bndcsr *bndcsr;
347 365
348 if (!cpu_feature_enabled(X86_FEATURE_MPX)) 366 if (!cpu_feature_enabled(X86_FEATURE_MPX))
349 return MPX_INVALID_BOUNDS_DIR; 367 return MPX_INVALID_BOUNDS_DIR;
350 368
351 /* 369 /*
352 * 32-bit binaries on 64-bit kernels are currently
353 * unsupported.
354 */
355 if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32))
356 return MPX_INVALID_BOUNDS_DIR;
357 /*
358 * The bounds directory pointer is stored in a register 370 * The bounds directory pointer is stored in a register
359 * only accessible if we first do an xsave. 371 * only accessible if we first do an xsave.
360 */ 372 */
361 fpu_save_init(&tsk->thread.fpu); 373 bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
362 bndcsr = get_xsave_addr(&tsk->thread.fpu.state->xsave, XSTATE_BNDCSR);
363 if (!bndcsr) 374 if (!bndcsr)
364 return MPX_INVALID_BOUNDS_DIR; 375 return MPX_INVALID_BOUNDS_DIR;
365 376
@@ -378,10 +389,10 @@ static __user void *task_get_bounds_dir(struct task_struct *tsk)
378 (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK); 389 (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK);
379} 390}
380 391
381int mpx_enable_management(struct task_struct *tsk) 392int mpx_enable_management(void)
382{ 393{
383 void __user *bd_base = MPX_INVALID_BOUNDS_DIR; 394 void __user *bd_base = MPX_INVALID_BOUNDS_DIR;
384 struct mm_struct *mm = tsk->mm; 395 struct mm_struct *mm = current->mm;
385 int ret = 0; 396 int ret = 0;
386 397
387 /* 398 /*
@@ -390,11 +401,12 @@ int mpx_enable_management(struct task_struct *tsk)
390 * directory into XSAVE/XRSTOR Save Area and enable MPX through 401 * directory into XSAVE/XRSTOR Save Area and enable MPX through
391 * XRSTOR instruction. 402 * XRSTOR instruction.
392 * 403 *
393 * fpu_xsave() is expected to be very expensive. Storing the bounds 404 * The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is
394 * directory here means that we do not have to do xsave in the unmap 405 * expected to be relatively expensive. Storing the bounds
395 * path; we can just use mm->bd_addr instead. 406 * directory here means that we do not have to do xsave in the
407 * unmap path; we can just use mm->bd_addr instead.
396 */ 408 */
397 bd_base = task_get_bounds_dir(tsk); 409 bd_base = mpx_get_bounds_dir();
398 down_write(&mm->mmap_sem); 410 down_write(&mm->mmap_sem);
399 mm->bd_addr = bd_base; 411 mm->bd_addr = bd_base;
400 if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR) 412 if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR)
@@ -404,7 +416,7 @@ int mpx_enable_management(struct task_struct *tsk)
404 return ret; 416 return ret;
405} 417}
406 418
407int mpx_disable_management(struct task_struct *tsk) 419int mpx_disable_management(void)
408{ 420{
409 struct mm_struct *mm = current->mm; 421 struct mm_struct *mm = current->mm;
410 422
@@ -417,29 +429,59 @@ int mpx_disable_management(struct task_struct *tsk)
417 return 0; 429 return 0;
418} 430}
419 431
432static int mpx_cmpxchg_bd_entry(struct mm_struct *mm,
433 unsigned long *curval,
434 unsigned long __user *addr,
435 unsigned long old_val, unsigned long new_val)
436{
437 int ret;
438 /*
439 * user_atomic_cmpxchg_inatomic() actually uses sizeof()
440 * the pointer that we pass to it to figure out how much
441 * data to cmpxchg. We have to be careful here not to
442 * pass a pointer to a 64-bit data type when we only want
443 * a 32-bit copy.
444 */
445 if (is_64bit_mm(mm)) {
446 ret = user_atomic_cmpxchg_inatomic(curval,
447 addr, old_val, new_val);
448 } else {
449 u32 uninitialized_var(curval_32);
450 u32 old_val_32 = old_val;
451 u32 new_val_32 = new_val;
452 u32 __user *addr_32 = (u32 __user *)addr;
453
454 ret = user_atomic_cmpxchg_inatomic(&curval_32,
455 addr_32, old_val_32, new_val_32);
456 *curval = curval_32;
457 }
458 return ret;
459}
460
420/* 461/*
421 * With 32-bit mode, MPX_BT_SIZE_BYTES is 4MB, and the size of each 462 * With 32-bit mode, a bounds directory is 4MB, and the size of each
422 * bounds table is 16KB. With 64-bit mode, MPX_BT_SIZE_BYTES is 2GB, 463 * bounds table is 16KB. With 64-bit mode, a bounds directory is 2GB,
423 * and the size of each bounds table is 4MB. 464 * and the size of each bounds table is 4MB.
424 */ 465 */
425static int allocate_bt(long __user *bd_entry) 466static int allocate_bt(struct mm_struct *mm, long __user *bd_entry)
426{ 467{
427 unsigned long expected_old_val = 0; 468 unsigned long expected_old_val = 0;
428 unsigned long actual_old_val = 0; 469 unsigned long actual_old_val = 0;
429 unsigned long bt_addr; 470 unsigned long bt_addr;
471 unsigned long bd_new_entry;
430 int ret = 0; 472 int ret = 0;
431 473
432 /* 474 /*
433 * Carve the virtual space out of userspace for the new 475 * Carve the virtual space out of userspace for the new
434 * bounds table: 476 * bounds table:
435 */ 477 */
436 bt_addr = mpx_mmap(MPX_BT_SIZE_BYTES); 478 bt_addr = mpx_mmap(mpx_bt_size_bytes(mm));
437 if (IS_ERR((void *)bt_addr)) 479 if (IS_ERR((void *)bt_addr))
438 return PTR_ERR((void *)bt_addr); 480 return PTR_ERR((void *)bt_addr);
439 /* 481 /*
440 * Set the valid flag (kinda like _PAGE_PRESENT in a pte) 482 * Set the valid flag (kinda like _PAGE_PRESENT in a pte)
441 */ 483 */
442 bt_addr = bt_addr | MPX_BD_ENTRY_VALID_FLAG; 484 bd_new_entry = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
443 485
444 /* 486 /*
445 * Go poke the address of the new bounds table in to the 487 * Go poke the address of the new bounds table in to the
@@ -452,8 +494,8 @@ static int allocate_bt(long __user *bd_entry)
452 * mmap_sem at this point, unlike some of the other part 494 * mmap_sem at this point, unlike some of the other part
453 * of the MPX code that have to pagefault_disable(). 495 * of the MPX code that have to pagefault_disable().
454 */ 496 */
455 ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry, 497 ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, bd_entry,
456 expected_old_val, bt_addr); 498 expected_old_val, bd_new_entry);
457 if (ret) 499 if (ret)
458 goto out_unmap; 500 goto out_unmap;
459 501
@@ -481,9 +523,10 @@ static int allocate_bt(long __user *bd_entry)
481 ret = -EINVAL; 523 ret = -EINVAL;
482 goto out_unmap; 524 goto out_unmap;
483 } 525 }
526 trace_mpx_new_bounds_table(bt_addr);
484 return 0; 527 return 0;
485out_unmap: 528out_unmap:
486 vm_munmap(bt_addr & MPX_BT_ADDR_MASK, MPX_BT_SIZE_BYTES); 529 vm_munmap(bt_addr, mpx_bt_size_bytes(mm));
487 return ret; 530 return ret;
488} 531}
489 532
@@ -498,12 +541,13 @@ out_unmap:
498 * bound table is 16KB. With 64-bit mode, the size of BD is 2GB, 541 * bound table is 16KB. With 64-bit mode, the size of BD is 2GB,
499 * and the size of each bound table is 4MB. 542 * and the size of each bound table is 4MB.
500 */ 543 */
501static int do_mpx_bt_fault(struct xsave_struct *xsave_buf) 544static int do_mpx_bt_fault(void)
502{ 545{
503 unsigned long bd_entry, bd_base; 546 unsigned long bd_entry, bd_base;
504 struct bndcsr *bndcsr; 547 const struct bndcsr *bndcsr;
548 struct mm_struct *mm = current->mm;
505 549
506 bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR); 550 bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR);
507 if (!bndcsr) 551 if (!bndcsr)
508 return -EINVAL; 552 return -EINVAL;
509 /* 553 /*
@@ -520,13 +564,13 @@ static int do_mpx_bt_fault(struct xsave_struct *xsave_buf)
520 * the directory is. 564 * the directory is.
521 */ 565 */
522 if ((bd_entry < bd_base) || 566 if ((bd_entry < bd_base) ||
523 (bd_entry >= bd_base + MPX_BD_SIZE_BYTES)) 567 (bd_entry >= bd_base + mpx_bd_size_bytes(mm)))
524 return -EINVAL; 568 return -EINVAL;
525 569
526 return allocate_bt((long __user *)bd_entry); 570 return allocate_bt(mm, (long __user *)bd_entry);
527} 571}
528 572
529int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) 573int mpx_handle_bd_fault(void)
530{ 574{
531 /* 575 /*
532 * Userspace never asked us to manage the bounds tables, 576 * Userspace never asked us to manage the bounds tables,
@@ -535,7 +579,7 @@ int mpx_handle_bd_fault(struct xsave_struct *xsave_buf)
535 if (!kernel_managing_mpx_tables(current->mm)) 579 if (!kernel_managing_mpx_tables(current->mm))
536 return -EINVAL; 580 return -EINVAL;
537 581
538 if (do_mpx_bt_fault(xsave_buf)) { 582 if (do_mpx_bt_fault()) {
539 force_sig(SIGSEGV, current); 583 force_sig(SIGSEGV, current);
540 /* 584 /*
541 * The force_sig() is essentially "handling" this 585 * The force_sig() is essentially "handling" this
@@ -572,29 +616,55 @@ static int mpx_resolve_fault(long __user *addr, int write)
572 return 0; 616 return 0;
573} 617}
574 618
619static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
620 unsigned long bd_entry)
621{
622 unsigned long bt_addr = bd_entry;
623 int align_to_bytes;
624 /*
625 * Bit 0 in a bt_entry is always the valid bit.
626 */
627 bt_addr &= ~MPX_BD_ENTRY_VALID_FLAG;
628 /*
629 * Tables are naturally aligned at 8-byte boundaries
630 * on 64-bit and 4-byte boundaries on 32-bit. The
631 * documentation makes it appear that the low bits
632 * are ignored by the hardware, so we do the same.
633 */
634 if (is_64bit_mm(mm))
635 align_to_bytes = 8;
636 else
637 align_to_bytes = 4;
638 bt_addr &= ~(align_to_bytes-1);
639 return bt_addr;
640}
641
575/* 642/*
576 * Get the base of bounds tables pointed by specific bounds 643 * Get the base of bounds tables pointed by specific bounds
577 * directory entry. 644 * directory entry.
578 */ 645 */
579static int get_bt_addr(struct mm_struct *mm, 646static int get_bt_addr(struct mm_struct *mm,
580 long __user *bd_entry, unsigned long *bt_addr) 647 long __user *bd_entry_ptr,
648 unsigned long *bt_addr_result)
581{ 649{
582 int ret; 650 int ret;
583 int valid_bit; 651 int valid_bit;
652 unsigned long bd_entry;
653 unsigned long bt_addr;
584 654
585 if (!access_ok(VERIFY_READ, (bd_entry), sizeof(*bd_entry))) 655 if (!access_ok(VERIFY_READ, (bd_entry_ptr), sizeof(*bd_entry_ptr)))
586 return -EFAULT; 656 return -EFAULT;
587 657
588 while (1) { 658 while (1) {
589 int need_write = 0; 659 int need_write = 0;
590 660
591 pagefault_disable(); 661 pagefault_disable();
592 ret = get_user(*bt_addr, bd_entry); 662 ret = get_user(bd_entry, bd_entry_ptr);
593 pagefault_enable(); 663 pagefault_enable();
594 if (!ret) 664 if (!ret)
595 break; 665 break;
596 if (ret == -EFAULT) 666 if (ret == -EFAULT)
597 ret = mpx_resolve_fault(bd_entry, need_write); 667 ret = mpx_resolve_fault(bd_entry_ptr, need_write);
598 /* 668 /*
599 * If we could not resolve the fault, consider it 669 * If we could not resolve the fault, consider it
600 * userspace's fault and error out. 670 * userspace's fault and error out.
@@ -603,8 +673,8 @@ static int get_bt_addr(struct mm_struct *mm,
603 return ret; 673 return ret;
604 } 674 }
605 675
606 valid_bit = *bt_addr & MPX_BD_ENTRY_VALID_FLAG; 676 valid_bit = bd_entry & MPX_BD_ENTRY_VALID_FLAG;
607 *bt_addr &= MPX_BT_ADDR_MASK; 677 bt_addr = mpx_bd_entry_to_bt_addr(mm, bd_entry);
608 678
609 /* 679 /*
610 * When the kernel is managing bounds tables, a bounds directory 680 * When the kernel is managing bounds tables, a bounds directory
@@ -613,7 +683,7 @@ static int get_bt_addr(struct mm_struct *mm,
613 * data in the address field, we know something is wrong. This 683 * data in the address field, we know something is wrong. This
614 * -EINVAL return will cause a SIGSEGV. 684 * -EINVAL return will cause a SIGSEGV.
615 */ 685 */
616 if (!valid_bit && *bt_addr) 686 if (!valid_bit && bt_addr)
617 return -EINVAL; 687 return -EINVAL;
618 /* 688 /*
619 * Do we have an completely zeroed bt entry? That is OK. It 689 * Do we have an completely zeroed bt entry? That is OK. It
@@ -624,19 +694,100 @@ static int get_bt_addr(struct mm_struct *mm,
624 if (!valid_bit) 694 if (!valid_bit)
625 return -ENOENT; 695 return -ENOENT;
626 696
697 *bt_addr_result = bt_addr;
627 return 0; 698 return 0;
628} 699}
629 700
701static inline int bt_entry_size_bytes(struct mm_struct *mm)
702{
703 if (is_64bit_mm(mm))
704 return MPX_BT_ENTRY_BYTES_64;
705 else
706 return MPX_BT_ENTRY_BYTES_32;
707}
708
709/*
710 * Take a virtual address and turns it in to the offset in bytes
711 * inside of the bounds table where the bounds table entry
712 * controlling 'addr' can be found.
713 */
714static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm,
715 unsigned long addr)
716{
717 unsigned long bt_table_nr_entries;
718 unsigned long offset = addr;
719
720 if (is_64bit_mm(mm)) {
721 /* Bottom 3 bits are ignored on 64-bit */
722 offset >>= 3;
723 bt_table_nr_entries = MPX_BT_NR_ENTRIES_64;
724 } else {
725 /* Bottom 2 bits are ignored on 32-bit */
726 offset >>= 2;
727 bt_table_nr_entries = MPX_BT_NR_ENTRIES_32;
728 }
729 /*
730 * We know the size of the table in to which we are
731 * indexing, and we have eliminated all the low bits
732 * which are ignored for indexing.
733 *
734 * Mask out all the high bits which we do not need
735 * to index in to the table. Note that the tables
736 * are always powers of two so this gives us a proper
737 * mask.
738 */
739 offset &= (bt_table_nr_entries-1);
740 /*
741 * We now have an entry offset in terms of *entries* in
742 * the table. We need to scale it back up to bytes.
743 */
744 offset *= bt_entry_size_bytes(mm);
745 return offset;
746}
747
748/*
749 * How much virtual address space does a single bounds
750 * directory entry cover?
751 *
752 * Note, we need a long long because 4GB doesn't fit in
753 * to a long on 32-bit.
754 */
755static inline unsigned long bd_entry_virt_space(struct mm_struct *mm)
756{
757 unsigned long long virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
758 if (is_64bit_mm(mm))
759 return virt_space / MPX_BD_NR_ENTRIES_64;
760 else
761 return virt_space / MPX_BD_NR_ENTRIES_32;
762}
763
630/* 764/*
631 * Free the backing physical pages of bounds table 'bt_addr'. 765 * Free the backing physical pages of bounds table 'bt_addr'.
632 * Assume start...end is within that bounds table. 766 * Assume start...end is within that bounds table.
633 */ 767 */
634static int zap_bt_entries(struct mm_struct *mm, 768static noinline int zap_bt_entries_mapping(struct mm_struct *mm,
635 unsigned long bt_addr, 769 unsigned long bt_addr,
636 unsigned long start, unsigned long end) 770 unsigned long start_mapping, unsigned long end_mapping)
637{ 771{
638 struct vm_area_struct *vma; 772 struct vm_area_struct *vma;
639 unsigned long addr, len; 773 unsigned long addr, len;
774 unsigned long start;
775 unsigned long end;
776
777 /*
778 * if we 'end' on a boundary, the offset will be 0 which
779 * is not what we want. Back it up a byte to get the
780 * last bt entry. Then once we have the entry itself,
781 * move 'end' back up by the table entry size.
782 */
783 start = bt_addr + mpx_get_bt_entry_offset_bytes(mm, start_mapping);
784 end = bt_addr + mpx_get_bt_entry_offset_bytes(mm, end_mapping - 1);
785 /*
786 * Move end back up by one entry. Among other things
787 * this ensures that it remains page-aligned and does
788 * not screw up zap_page_range()
789 */
790 end += bt_entry_size_bytes(mm);
640 791
641 /* 792 /*
642 * Find the first overlapping vma. If vma->vm_start > start, there 793 * Find the first overlapping vma. If vma->vm_start > start, there
@@ -648,7 +799,7 @@ static int zap_bt_entries(struct mm_struct *mm,
648 return -EINVAL; 799 return -EINVAL;
649 800
650 /* 801 /*
651 * A NUMA policy on a VM_MPX VMA could cause this bouds table to 802 * A NUMA policy on a VM_MPX VMA could cause this bounds table to
652 * be split. So we need to look across the entire 'start -> end' 803 * be split. So we need to look across the entire 'start -> end'
653 * range of this bounds table, find all of the VM_MPX VMAs, and 804 * range of this bounds table, find all of the VM_MPX VMAs, and
654 * zap only those. 805 * zap only those.
@@ -666,27 +817,65 @@ static int zap_bt_entries(struct mm_struct *mm,
666 817
667 len = min(vma->vm_end, end) - addr; 818 len = min(vma->vm_end, end) - addr;
668 zap_page_range(vma, addr, len, NULL); 819 zap_page_range(vma, addr, len, NULL);
820 trace_mpx_unmap_zap(addr, addr+len);
669 821
670 vma = vma->vm_next; 822 vma = vma->vm_next;
671 addr = vma->vm_start; 823 addr = vma->vm_start;
672 } 824 }
673
674 return 0; 825 return 0;
675} 826}
676 827
677static int unmap_single_bt(struct mm_struct *mm, 828static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm,
829 unsigned long addr)
830{
831 /*
832 * There are several ways to derive the bd offsets. We
833 * use the following approach here:
834 * 1. We know the size of the virtual address space
835 * 2. We know the number of entries in a bounds table
836 * 3. We know that each entry covers a fixed amount of
837 * virtual address space.
838 * So, we can just divide the virtual address by the
839 * virtual space used by one entry to determine which
840 * entry "controls" the given virtual address.
841 */
842 if (is_64bit_mm(mm)) {
843 int bd_entry_size = 8; /* 64-bit pointer */
844 /*
845 * Take the 64-bit addressing hole in to account.
846 */
847 addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1);
848 return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
849 } else {
850 int bd_entry_size = 4; /* 32-bit pointer */
851 /*
852 * 32-bit has no hole so this case needs no mask
853 */
854 return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
855 }
856 /*
857 * The two return calls above are exact copies. If we
858 * pull out a single copy and put it in here, gcc won't
859 * realize that we're doing a power-of-2 divide and use
860 * shifts. It uses a real divide. If we put them up
861 * there, it manages to figure it out (gcc 4.8.3).
862 */
863}
864
865static int unmap_entire_bt(struct mm_struct *mm,
678 long __user *bd_entry, unsigned long bt_addr) 866 long __user *bd_entry, unsigned long bt_addr)
679{ 867{
680 unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG; 868 unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
681 unsigned long actual_old_val = 0; 869 unsigned long uninitialized_var(actual_old_val);
682 int ret; 870 int ret;
683 871
684 while (1) { 872 while (1) {
685 int need_write = 1; 873 int need_write = 1;
874 unsigned long cleared_bd_entry = 0;
686 875
687 pagefault_disable(); 876 pagefault_disable();
688 ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry, 877 ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val,
689 expected_old_val, 0); 878 bd_entry, expected_old_val, cleared_bd_entry);
690 pagefault_enable(); 879 pagefault_enable();
691 if (!ret) 880 if (!ret)
692 break; 881 break;
@@ -705,9 +894,8 @@ static int unmap_single_bt(struct mm_struct *mm,
705 if (actual_old_val != expected_old_val) { 894 if (actual_old_val != expected_old_val) {
706 /* 895 /*
707 * Someone else raced with us to unmap the table. 896 * Someone else raced with us to unmap the table.
708 * There was no bounds table pointed to by the 897 * That is OK, since we were both trying to do
709 * directory, so declare success. Somebody freed 898 * the same thing. Declare success.
710 * it.
711 */ 899 */
712 if (!actual_old_val) 900 if (!actual_old_val)
713 return 0; 901 return 0;
@@ -720,176 +908,113 @@ static int unmap_single_bt(struct mm_struct *mm,
720 */ 908 */
721 return -EINVAL; 909 return -EINVAL;
722 } 910 }
723
724 /* 911 /*
725 * Note, we are likely being called under do_munmap() already. To 912 * Note, we are likely being called under do_munmap() already. To
726 * avoid recursion, do_munmap() will check whether it comes 913 * avoid recursion, do_munmap() will check whether it comes
727 * from one bounds table through VM_MPX flag. 914 * from one bounds table through VM_MPX flag.
728 */ 915 */
729 return do_munmap(mm, bt_addr, MPX_BT_SIZE_BYTES); 916 return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm));
730} 917}
731 918
732/* 919static int try_unmap_single_bt(struct mm_struct *mm,
733 * If the bounds table pointed by bounds directory 'bd_entry' is 920 unsigned long start, unsigned long end)
734 * not shared, unmap this whole bounds table. Otherwise, only free
735 * those backing physical pages of bounds table entries covered
736 * in this virtual address region start...end.
737 */
738static int unmap_shared_bt(struct mm_struct *mm,
739 long __user *bd_entry, unsigned long start,
740 unsigned long end, bool prev_shared, bool next_shared)
741{ 921{
742 unsigned long bt_addr; 922 struct vm_area_struct *next;
743 int ret; 923 struct vm_area_struct *prev;
744
745 ret = get_bt_addr(mm, bd_entry, &bt_addr);
746 /* 924 /*
747 * We could see an "error" ret for not-present bounds 925 * "bta" == Bounds Table Area: the area controlled by the
748 * tables (not really an error), or actual errors, but 926 * bounds table that we are unmapping.
749 * stop unmapping either way.
750 */ 927 */
751 if (ret) 928 unsigned long bta_start_vaddr = start & ~(bd_entry_virt_space(mm)-1);
752 return ret; 929 unsigned long bta_end_vaddr = bta_start_vaddr + bd_entry_virt_space(mm);
753 930 unsigned long uninitialized_var(bt_addr);
754 if (prev_shared && next_shared) 931 void __user *bde_vaddr;
755 ret = zap_bt_entries(mm, bt_addr,
756 bt_addr+MPX_GET_BT_ENTRY_OFFSET(start),
757 bt_addr+MPX_GET_BT_ENTRY_OFFSET(end));
758 else if (prev_shared)
759 ret = zap_bt_entries(mm, bt_addr,
760 bt_addr+MPX_GET_BT_ENTRY_OFFSET(start),
761 bt_addr+MPX_BT_SIZE_BYTES);
762 else if (next_shared)
763 ret = zap_bt_entries(mm, bt_addr, bt_addr,
764 bt_addr+MPX_GET_BT_ENTRY_OFFSET(end));
765 else
766 ret = unmap_single_bt(mm, bd_entry, bt_addr);
767
768 return ret;
769}
770
771/*
772 * A virtual address region being munmap()ed might share bounds table
773 * with adjacent VMAs. We only need to free the backing physical
774 * memory of these shared bounds tables entries covered in this virtual
775 * address region.
776 */
777static int unmap_edge_bts(struct mm_struct *mm,
778 unsigned long start, unsigned long end)
779{
780 int ret; 932 int ret;
781 long __user *bde_start, *bde_end;
782 struct vm_area_struct *prev, *next;
783 bool prev_shared = false, next_shared = false;
784
785 bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start);
786 bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1);
787
788 /* 933 /*
789 * Check whether bde_start and bde_end are shared with adjacent 934 * We already unlinked the VMAs from the mm's rbtree so 'start'
790 * VMAs.
791 *
792 * We already unliked the VMAs from the mm's rbtree so 'start'
793 * is guaranteed to be in a hole. This gets us the first VMA 935 * is guaranteed to be in a hole. This gets us the first VMA
794 * before the hole in to 'prev' and the next VMA after the hole 936 * before the hole in to 'prev' and the next VMA after the hole
795 * in to 'next'. 937 * in to 'next'.
796 */ 938 */
797 next = find_vma_prev(mm, start, &prev); 939 next = find_vma_prev(mm, start, &prev);
798 if (prev && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(prev->vm_end-1))
799 == bde_start)
800 prev_shared = true;
801 if (next && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(next->vm_start))
802 == bde_end)
803 next_shared = true;
804
805 /* 940 /*
806 * This virtual address region being munmap()ed is only 941 * Do not count other MPX bounds table VMAs as neighbors.
807 * covered by one bounds table. 942 * Although theoretically possible, we do not allow bounds
808 * 943 * tables for bounds tables so our heads do not explode.
809 * In this case, if this table is also shared with adjacent 944 * If we count them as neighbors here, we may end up with
810 * VMAs, only part of the backing physical memory of the bounds 945 * lots of tables even though we have no actual table
811 * table need be freeed. Otherwise the whole bounds table need 946 * entries in use.
812 * be unmapped. 947 */
813 */ 948 while (next && is_mpx_vma(next))
814 if (bde_start == bde_end) { 949 next = next->vm_next;
815 return unmap_shared_bt(mm, bde_start, start, end, 950 while (prev && is_mpx_vma(prev))
816 prev_shared, next_shared); 951 prev = prev->vm_prev;
952 /*
953 * We know 'start' and 'end' lie within an area controlled
954 * by a single bounds table. See if there are any other
955 * VMAs controlled by that bounds table. If there are not
956 * then we can "expand" the are we are unmapping to possibly
957 * cover the entire table.
958 */
959 next = find_vma_prev(mm, start, &prev);
960 if ((!prev || prev->vm_end <= bta_start_vaddr) &&
961 (!next || next->vm_start >= bta_end_vaddr)) {
962 /*
963 * No neighbor VMAs controlled by same bounds
964 * table. Try to unmap the whole thing
965 */
966 start = bta_start_vaddr;
967 end = bta_end_vaddr;
817 } 968 }
818 969
970 bde_vaddr = mm->bd_addr + mpx_get_bd_entry_offset(mm, start);
971 ret = get_bt_addr(mm, bde_vaddr, &bt_addr);
819 /* 972 /*
820 * If more than one bounds tables are covered in this virtual 973 * No bounds table there, so nothing to unmap.
821 * address region being munmap()ed, we need to separately check
822 * whether bde_start and bde_end are shared with adjacent VMAs.
823 */ 974 */
824 ret = unmap_shared_bt(mm, bde_start, start, end, prev_shared, false); 975 if (ret == -ENOENT) {
825 if (ret) 976 ret = 0;
826 return ret; 977 return 0;
827 ret = unmap_shared_bt(mm, bde_end, start, end, false, next_shared); 978 }
828 if (ret) 979 if (ret)
829 return ret; 980 return ret;
830 981 /*
831 return 0; 982 * We are unmapping an entire table. Either because the
983 * unmap that started this whole process was large enough
984 * to cover an entire table, or that the unmap was small
985 * but was the area covered by a bounds table.
986 */
987 if ((start == bta_start_vaddr) &&
988 (end == bta_end_vaddr))
989 return unmap_entire_bt(mm, bde_vaddr, bt_addr);
990 return zap_bt_entries_mapping(mm, bt_addr, start, end);
832} 991}
833 992
834static int mpx_unmap_tables(struct mm_struct *mm, 993static int mpx_unmap_tables(struct mm_struct *mm,
835 unsigned long start, unsigned long end) 994 unsigned long start, unsigned long end)
836{ 995{
837 int ret; 996 unsigned long one_unmap_start;
838 long __user *bd_entry, *bde_start, *bde_end; 997 trace_mpx_unmap_search(start, end);
839 unsigned long bt_addr; 998
840 999 one_unmap_start = start;
841 /* 1000 while (one_unmap_start < end) {
842 * "Edge" bounds tables are those which are being used by the region 1001 int ret;
843 * (start -> end), but that may be shared with adjacent areas. If they 1002 unsigned long next_unmap_start = ALIGN(one_unmap_start+1,
844 * turn out to be completely unshared, they will be freed. If they are 1003 bd_entry_virt_space(mm));
845 * shared, we will free the backing store (like an MADV_DONTNEED) for 1004 unsigned long one_unmap_end = end;
846 * areas used by this region. 1005 /*
847 */ 1006 * if the end is beyond the current bounds table,
848 ret = unmap_edge_bts(mm, start, end); 1007 * move it back so we only deal with a single one
849 switch (ret) { 1008 * at a time
850 /* non-present tables are OK */ 1009 */
851 case 0: 1010 if (one_unmap_end > next_unmap_start)
852 case -ENOENT: 1011 one_unmap_end = next_unmap_start;
853 /* Success, or no tables to unmap */ 1012 ret = try_unmap_single_bt(mm, one_unmap_start, one_unmap_end);
854 break;
855 case -EINVAL:
856 case -EFAULT:
857 default:
858 return ret;
859 }
860
861 /*
862 * Only unmap the bounds table that are
863 * 1. fully covered
864 * 2. not at the edges of the mapping, even if full aligned
865 */
866 bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start);
867 bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1);
868 for (bd_entry = bde_start + 1; bd_entry < bde_end; bd_entry++) {
869 ret = get_bt_addr(mm, bd_entry, &bt_addr);
870 switch (ret) {
871 case 0:
872 break;
873 case -ENOENT:
874 /* No table here, try the next one */
875 continue;
876 case -EINVAL:
877 case -EFAULT:
878 default:
879 /*
880 * Note: we are being strict here.
881 * Any time we run in to an issue
882 * unmapping tables, we stop and
883 * SIGSEGV.
884 */
885 return ret;
886 }
887
888 ret = unmap_single_bt(mm, bd_entry, bt_addr);
889 if (ret) 1013 if (ret)
890 return ret; 1014 return ret;
891 }
892 1015
1016 one_unmap_start = next_unmap_start;
1017 }
893 return 0; 1018 return 0;
894} 1019}
895 1020
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 757678fb26e1..0d7dd1f5ac36 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -18,10 +18,9 @@
18#include <asm/mtrr.h> 18#include <asm/mtrr.h>
19#include <asm/page.h> 19#include <asm/page.h>
20#include <asm/mce.h> 20#include <asm/mce.h>
21#include <asm/xcr.h>
22#include <asm/suspend.h> 21#include <asm/suspend.h>
22#include <asm/fpu/internal.h>
23#include <asm/debugreg.h> 23#include <asm/debugreg.h>
24#include <asm/fpu-internal.h> /* pcntxt_mask */
25#include <asm/cpu.h> 24#include <asm/cpu.h>
26 25
27#ifdef CONFIG_X86_32 26#ifdef CONFIG_X86_32
@@ -155,6 +154,8 @@ static void fix_processor_context(void)
155#endif 154#endif
156 load_TR_desc(); /* This does ltr */ 155 load_TR_desc(); /* This does ltr */
157 load_LDT(&current->active_mm->context); /* This does lldt */ 156 load_LDT(&current->active_mm->context); /* This does lldt */
157
158 fpu__resume_cpu();
158} 159}
159 160
160/** 161/**
@@ -221,12 +222,6 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
221 wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); 222 wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
222#endif 223#endif
223 224
224 /*
225 * restore XCR0 for xsave capable cpu's.
226 */
227 if (cpu_has_xsave)
228 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
229
230 fix_processor_context(); 225 fix_processor_context();
231 226
232 do_fpu_end(); 227 do_fpu_end();
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 46957ead3060..98088bf5906a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1423,7 +1423,7 @@ static void xen_pvh_set_cr_flags(int cpu)
1423 return; 1423 return;
1424 /* 1424 /*
1425 * For BSP, PSE PGE are set in probe_page_size_mask(), for APs 1425 * For BSP, PSE PGE are set in probe_page_size_mask(), for APs
1426 * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu_init. 1426 * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu().
1427 */ 1427 */
1428 if (cpu_has_pse) 1428 if (cpu_has_pse)
1429 cr4_set_bits_and_update_boot(X86_CR4_PSE); 1429 cr4_set_bits_and_update_boot(X86_CR4_PSE);
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c
index a3bebef255ad..0c98a9d51a24 100644
--- a/drivers/char/hw_random/via-rng.c
+++ b/drivers/char/hw_random/via-rng.c
@@ -33,7 +33,7 @@
33#include <asm/io.h> 33#include <asm/io.h>
34#include <asm/msr.h> 34#include <asm/msr.h>
35#include <asm/cpufeature.h> 35#include <asm/cpufeature.h>
36#include <asm/i387.h> 36#include <asm/fpu/api.h>
37 37
38 38
39 39
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index c178ed8c3908..da2d6777bd09 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -22,7 +22,7 @@
22#include <asm/cpu_device_id.h> 22#include <asm/cpu_device_id.h>
23#include <asm/byteorder.h> 23#include <asm/byteorder.h>
24#include <asm/processor.h> 24#include <asm/processor.h>
25#include <asm/i387.h> 25#include <asm/fpu/api.h>
26 26
27/* 27/*
28 * Number of data blocks actually fetched for each xcrypt insn. 28 * Number of data blocks actually fetched for each xcrypt insn.
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index 95f7d27ce491..4e154c9b9206 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -23,7 +23,7 @@
23#include <linux/kernel.h> 23#include <linux/kernel.h>
24#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
25#include <asm/cpu_device_id.h> 25#include <asm/cpu_device_id.h>
26#include <asm/i387.h> 26#include <asm/fpu/api.h>
27 27
28struct padlock_sha_desc { 28struct padlock_sha_desc {
29 struct shash_desc fallback; 29 struct shash_desc fallback;
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 30f2aef69d78..6a4cd771a2be 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -46,7 +46,7 @@
46#include <asm/setup.h> 46#include <asm/setup.h>
47#include <asm/lguest.h> 47#include <asm/lguest.h>
48#include <asm/uaccess.h> 48#include <asm/uaccess.h>
49#include <asm/i387.h> 49#include <asm/fpu/internal.h>
50#include <asm/tlbflush.h> 50#include <asm/tlbflush.h>
51#include "../lg.h" 51#include "../lg.h"
52 52
@@ -251,7 +251,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
251 * we set it now, so we can trap and pass that trap to the Guest if it 251 * we set it now, so we can trap and pass that trap to the Guest if it
252 * uses the FPU. 252 * uses the FPU.
253 */ 253 */
254 if (cpu->ts && user_has_fpu()) 254 if (cpu->ts && fpregs_active())
255 stts(); 255 stts();
256 256
257 /* 257 /*
@@ -283,7 +283,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
283 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); 283 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
284 284
285 /* Clear the host TS bit if it was set above. */ 285 /* Clear the host TS bit if it was set above. */
286 if (cpu->ts && user_has_fpu()) 286 if (cpu->ts && fpregs_active())
287 clts(); 287 clts();
288 288
289 /* 289 /*
@@ -297,12 +297,12 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
297 /* 297 /*
298 * Similarly, if we took a trap because the Guest used the FPU, 298 * Similarly, if we took a trap because the Guest used the FPU,
299 * we have to restore the FPU it expects to see. 299 * we have to restore the FPU it expects to see.
300 * math_state_restore() may sleep and we may even move off to 300 * fpu__restore() may sleep and we may even move off to
301 * a different CPU. So all the critical stuff should be done 301 * a different CPU. So all the critical stuff should be done
302 * before this. 302 * before this.
303 */ 303 */
304 else if (cpu->regs->trapnum == 7 && !user_has_fpu()) 304 else if (cpu->regs->trapnum == 7 && !fpregs_active())
305 math_state_restore(); 305 fpu__restore(&current->thread.fpu);
306} 306}
307 307
308/*H:130 308/*H:130
diff --git a/kernel/sys.c b/kernel/sys.c
index a4e372b798a5..8571296b7ddb 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -92,10 +92,10 @@
92# define SET_TSC_CTL(a) (-EINVAL) 92# define SET_TSC_CTL(a) (-EINVAL)
93#endif 93#endif
94#ifndef MPX_ENABLE_MANAGEMENT 94#ifndef MPX_ENABLE_MANAGEMENT
95# define MPX_ENABLE_MANAGEMENT(a) (-EINVAL) 95# define MPX_ENABLE_MANAGEMENT() (-EINVAL)
96#endif 96#endif
97#ifndef MPX_DISABLE_MANAGEMENT 97#ifndef MPX_DISABLE_MANAGEMENT
98# define MPX_DISABLE_MANAGEMENT(a) (-EINVAL) 98# define MPX_DISABLE_MANAGEMENT() (-EINVAL)
99#endif 99#endif
100#ifndef GET_FP_MODE 100#ifndef GET_FP_MODE
101# define GET_FP_MODE(a) (-EINVAL) 101# define GET_FP_MODE(a) (-EINVAL)
@@ -2230,12 +2230,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2230 case PR_MPX_ENABLE_MANAGEMENT: 2230 case PR_MPX_ENABLE_MANAGEMENT:
2231 if (arg2 || arg3 || arg4 || arg5) 2231 if (arg2 || arg3 || arg4 || arg5)
2232 return -EINVAL; 2232 return -EINVAL;
2233 error = MPX_ENABLE_MANAGEMENT(me); 2233 error = MPX_ENABLE_MANAGEMENT();
2234 break; 2234 break;
2235 case PR_MPX_DISABLE_MANAGEMENT: 2235 case PR_MPX_DISABLE_MANAGEMENT:
2236 if (arg2 || arg3 || arg4 || arg5) 2236 if (arg2 || arg3 || arg4 || arg5)
2237 return -EINVAL; 2237 return -EINVAL;
2238 error = MPX_DISABLE_MANAGEMENT(me); 2238 error = MPX_DISABLE_MANAGEMENT();
2239 break; 2239 break;
2240 case PR_SET_FP_MODE: 2240 case PR_SET_FP_MODE:
2241 error = SET_FP_MODE(me, arg2); 2241 error = SET_FP_MODE(me, arg2);
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index b7595484a815..8fe9d9662abb 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -23,7 +23,7 @@
23 23
24#ifdef __KERNEL__ /* Real code */ 24#ifdef __KERNEL__ /* Real code */
25 25
26#include <asm/i387.h> 26#include <asm/fpu/api.h>
27 27
28#else /* Dummy code for user space testing */ 28#else /* Dummy code for user space testing */
29 29