diff options
86 files changed, 4082 insertions, 3336 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 60c9d6d0fd96..705ad8e66703 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
| @@ -937,6 +937,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
| 937 | Enable debug messages at boot time. See | 937 | Enable debug messages at boot time. See |
| 938 | Documentation/dynamic-debug-howto.txt for details. | 938 | Documentation/dynamic-debug-howto.txt for details. |
| 939 | 939 | ||
| 940 | nompx [X86] Disables Intel Memory Protection Extensions. | ||
| 941 | See Documentation/x86/intel_mpx.txt for more | ||
| 942 | information about the feature. | ||
| 943 | |||
| 940 | eagerfpu= [X86] | 944 | eagerfpu= [X86] |
| 941 | on enable eager fpu restore | 945 | on enable eager fpu restore |
| 942 | off disable eager fpu restore | 946 | off disable eager fpu restore |
diff --git a/Documentation/preempt-locking.txt b/Documentation/preempt-locking.txt index 57883ca2498b..e89ce6624af2 100644 --- a/Documentation/preempt-locking.txt +++ b/Documentation/preempt-locking.txt | |||
| @@ -48,7 +48,7 @@ preemption must be disabled around such regions. | |||
| 48 | 48 | ||
| 49 | Note, some FPU functions are already explicitly preempt safe. For example, | 49 | Note, some FPU functions are already explicitly preempt safe. For example, |
| 50 | kernel_fpu_begin and kernel_fpu_end will disable and enable preemption. | 50 | kernel_fpu_begin and kernel_fpu_end will disable and enable preemption. |
| 51 | However, math_state_restore must be called with preemption disabled. | 51 | However, fpu__restore() must be called with preemption disabled. |
| 52 | 52 | ||
| 53 | 53 | ||
| 54 | RULE #3: Lock acquire and release must be performed by same task | 54 | RULE #3: Lock acquire and release must be performed by same task |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 72484a645f05..2fd3ebbb4e33 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
| @@ -332,4 +332,16 @@ config X86_DEBUG_STATIC_CPU_HAS | |||
| 332 | 332 | ||
| 333 | If unsure, say N. | 333 | If unsure, say N. |
| 334 | 334 | ||
| 335 | config X86_DEBUG_FPU | ||
| 336 | bool "Debug the x86 FPU code" | ||
| 337 | depends on DEBUG_KERNEL | ||
| 338 | default y | ||
| 339 | ---help--- | ||
| 340 | If this option is enabled then there will be extra sanity | ||
| 341 | checks and (boot time) debug printouts added to the kernel. | ||
| 342 | This debugging adds some small amount of runtime overhead | ||
| 343 | to the kernel. | ||
| 344 | |||
| 345 | If unsure, say N. | ||
| 346 | |||
| 335 | endmenu | 347 | endmenu |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 112cefacf2af..b419f43ce0c5 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
| @@ -32,7 +32,7 @@ | |||
| 32 | #include <crypto/lrw.h> | 32 | #include <crypto/lrw.h> |
| 33 | #include <crypto/xts.h> | 33 | #include <crypto/xts.h> |
| 34 | #include <asm/cpu_device_id.h> | 34 | #include <asm/cpu_device_id.h> |
| 35 | #include <asm/i387.h> | 35 | #include <asm/fpu/api.h> |
| 36 | #include <asm/crypto/aes.h> | 36 | #include <asm/crypto/aes.h> |
| 37 | #include <crypto/ablk_helper.h> | 37 | #include <crypto/ablk_helper.h> |
| 38 | #include <crypto/scatterwalk.h> | 38 | #include <crypto/scatterwalk.h> |
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index baf0ac21ace5..4c65c70e628b 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c | |||
| @@ -19,8 +19,7 @@ | |||
| 19 | #include <crypto/ctr.h> | 19 | #include <crypto/ctr.h> |
| 20 | #include <crypto/lrw.h> | 20 | #include <crypto/lrw.h> |
| 21 | #include <crypto/xts.h> | 21 | #include <crypto/xts.h> |
| 22 | #include <asm/xcr.h> | 22 | #include <asm/fpu/api.h> |
| 23 | #include <asm/xsave.h> | ||
| 24 | #include <asm/crypto/camellia.h> | 23 | #include <asm/crypto/camellia.h> |
| 25 | #include <asm/crypto/glue_helper.h> | 24 | #include <asm/crypto/glue_helper.h> |
| 26 | 25 | ||
| @@ -561,16 +560,15 @@ static struct crypto_alg cmll_algs[10] = { { | |||
| 561 | 560 | ||
| 562 | static int __init camellia_aesni_init(void) | 561 | static int __init camellia_aesni_init(void) |
| 563 | { | 562 | { |
| 564 | u64 xcr0; | 563 | const char *feature_name; |
| 565 | 564 | ||
| 566 | if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { | 565 | if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { |
| 567 | pr_info("AVX2 or AES-NI instructions are not detected.\n"); | 566 | pr_info("AVX2 or AES-NI instructions are not detected.\n"); |
| 568 | return -ENODEV; | 567 | return -ENODEV; |
| 569 | } | 568 | } |
| 570 | 569 | ||
| 571 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | 570 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { |
| 572 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | 571 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
| 573 | pr_info("AVX2 detected but unusable.\n"); | ||
| 574 | return -ENODEV; | 572 | return -ENODEV; |
| 575 | } | 573 | } |
| 576 | 574 | ||
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 78818a1e73e3..80a0e4389c9a 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c | |||
| @@ -19,8 +19,7 @@ | |||
| 19 | #include <crypto/ctr.h> | 19 | #include <crypto/ctr.h> |
| 20 | #include <crypto/lrw.h> | 20 | #include <crypto/lrw.h> |
| 21 | #include <crypto/xts.h> | 21 | #include <crypto/xts.h> |
| 22 | #include <asm/xcr.h> | 22 | #include <asm/fpu/api.h> |
| 23 | #include <asm/xsave.h> | ||
| 24 | #include <asm/crypto/camellia.h> | 23 | #include <asm/crypto/camellia.h> |
| 25 | #include <asm/crypto/glue_helper.h> | 24 | #include <asm/crypto/glue_helper.h> |
| 26 | 25 | ||
| @@ -553,16 +552,10 @@ static struct crypto_alg cmll_algs[10] = { { | |||
| 553 | 552 | ||
| 554 | static int __init camellia_aesni_init(void) | 553 | static int __init camellia_aesni_init(void) |
| 555 | { | 554 | { |
| 556 | u64 xcr0; | 555 | const char *feature_name; |
| 557 | 556 | ||
| 558 | if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { | 557 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { |
| 559 | pr_info("AVX or AES-NI instructions are not detected.\n"); | 558 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
| 560 | return -ENODEV; | ||
| 561 | } | ||
| 562 | |||
| 563 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
| 564 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
| 565 | pr_info("AVX detected but unusable.\n"); | ||
| 566 | return -ENODEV; | 559 | return -ENODEV; |
| 567 | } | 560 | } |
| 568 | 561 | ||
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 236c80974457..be00aa48b2b5 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c | |||
| @@ -31,8 +31,7 @@ | |||
| 31 | #include <crypto/cast5.h> | 31 | #include <crypto/cast5.h> |
| 32 | #include <crypto/cryptd.h> | 32 | #include <crypto/cryptd.h> |
| 33 | #include <crypto/ctr.h> | 33 | #include <crypto/ctr.h> |
| 34 | #include <asm/xcr.h> | 34 | #include <asm/fpu/api.h> |
| 35 | #include <asm/xsave.h> | ||
| 36 | #include <asm/crypto/glue_helper.h> | 35 | #include <asm/crypto/glue_helper.h> |
| 37 | 36 | ||
| 38 | #define CAST5_PARALLEL_BLOCKS 16 | 37 | #define CAST5_PARALLEL_BLOCKS 16 |
| @@ -468,16 +467,10 @@ static struct crypto_alg cast5_algs[6] = { { | |||
| 468 | 467 | ||
| 469 | static int __init cast5_init(void) | 468 | static int __init cast5_init(void) |
| 470 | { | 469 | { |
| 471 | u64 xcr0; | 470 | const char *feature_name; |
| 472 | 471 | ||
| 473 | if (!cpu_has_avx || !cpu_has_osxsave) { | 472 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { |
| 474 | pr_info("AVX instructions are not detected.\n"); | 473 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
| 475 | return -ENODEV; | ||
| 476 | } | ||
| 477 | |||
| 478 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
| 479 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
| 480 | pr_info("AVX detected but unusable.\n"); | ||
| 481 | return -ENODEV; | 474 | return -ENODEV; |
| 482 | } | 475 | } |
| 483 | 476 | ||
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index f448810ca4ac..5dbba7224221 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c | |||
| @@ -36,8 +36,7 @@ | |||
| 36 | #include <crypto/ctr.h> | 36 | #include <crypto/ctr.h> |
| 37 | #include <crypto/lrw.h> | 37 | #include <crypto/lrw.h> |
| 38 | #include <crypto/xts.h> | 38 | #include <crypto/xts.h> |
| 39 | #include <asm/xcr.h> | 39 | #include <asm/fpu/api.h> |
| 40 | #include <asm/xsave.h> | ||
| 41 | #include <asm/crypto/glue_helper.h> | 40 | #include <asm/crypto/glue_helper.h> |
| 42 | 41 | ||
| 43 | #define CAST6_PARALLEL_BLOCKS 8 | 42 | #define CAST6_PARALLEL_BLOCKS 8 |
| @@ -590,16 +589,10 @@ static struct crypto_alg cast6_algs[10] = { { | |||
| 590 | 589 | ||
| 591 | static int __init cast6_init(void) | 590 | static int __init cast6_init(void) |
| 592 | { | 591 | { |
| 593 | u64 xcr0; | 592 | const char *feature_name; |
| 594 | 593 | ||
| 595 | if (!cpu_has_avx || !cpu_has_osxsave) { | 594 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { |
| 596 | pr_info("AVX instructions are not detected.\n"); | 595 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
| 597 | return -ENODEV; | ||
| 598 | } | ||
| 599 | |||
| 600 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
| 601 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
| 602 | pr_info("AVX detected but unusable.\n"); | ||
| 603 | return -ENODEV; | 596 | return -ENODEV; |
| 604 | } | 597 | } |
| 605 | 598 | ||
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 1937fc1d8763..07d2c6c86a54 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c | |||
| @@ -35,7 +35,7 @@ | |||
| 35 | 35 | ||
| 36 | #include <asm/cpufeature.h> | 36 | #include <asm/cpufeature.h> |
| 37 | #include <asm/cpu_device_id.h> | 37 | #include <asm/cpu_device_id.h> |
| 38 | #include <asm/i387.h> | 38 | #include <asm/fpu/api.h> |
| 39 | 39 | ||
| 40 | #define CHKSUM_BLOCK_SIZE 1 | 40 | #define CHKSUM_BLOCK_SIZE 1 |
| 41 | #define CHKSUM_DIGEST_SIZE 4 | 41 | #define CHKSUM_DIGEST_SIZE 4 |
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 28640c3d6af7..81a595d75cf5 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c | |||
| @@ -32,8 +32,7 @@ | |||
| 32 | 32 | ||
| 33 | #include <asm/cpufeature.h> | 33 | #include <asm/cpufeature.h> |
| 34 | #include <asm/cpu_device_id.h> | 34 | #include <asm/cpu_device_id.h> |
| 35 | #include <asm/i387.h> | 35 | #include <asm/fpu/internal.h> |
| 36 | #include <asm/fpu-internal.h> | ||
| 37 | 36 | ||
| 38 | #define CHKSUM_BLOCK_SIZE 1 | 37 | #define CHKSUM_BLOCK_SIZE 1 |
| 39 | #define CHKSUM_DIGEST_SIZE 4 | 38 | #define CHKSUM_DIGEST_SIZE 4 |
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index b6c67bf30fdf..a3fcfc97a311 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c | |||
| @@ -29,7 +29,7 @@ | |||
| 29 | #include <linux/init.h> | 29 | #include <linux/init.h> |
| 30 | #include <linux/string.h> | 30 | #include <linux/string.h> |
| 31 | #include <linux/kernel.h> | 31 | #include <linux/kernel.h> |
| 32 | #include <asm/i387.h> | 32 | #include <asm/fpu/api.h> |
| 33 | #include <asm/cpufeature.h> | 33 | #include <asm/cpufeature.h> |
| 34 | #include <asm/cpu_device_id.h> | 34 | #include <asm/cpu_device_id.h> |
| 35 | 35 | ||
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index f368ba261739..5a2f30f9f52d 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
| 19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
| 20 | #include <linux/crypto.h> | 20 | #include <linux/crypto.h> |
| 21 | #include <asm/i387.h> | 21 | #include <asm/fpu/api.h> |
| 22 | 22 | ||
| 23 | struct crypto_fpu_ctx { | 23 | struct crypto_fpu_ctx { |
| 24 | struct crypto_blkcipher *child; | 24 | struct crypto_blkcipher *child; |
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 2079baf06bdd..64d7cf1b50e1 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c | |||
| @@ -19,7 +19,7 @@ | |||
| 19 | #include <crypto/cryptd.h> | 19 | #include <crypto/cryptd.h> |
| 20 | #include <crypto/gf128mul.h> | 20 | #include <crypto/gf128mul.h> |
| 21 | #include <crypto/internal/hash.h> | 21 | #include <crypto/internal/hash.h> |
| 22 | #include <asm/i387.h> | 22 | #include <asm/fpu/api.h> |
| 23 | #include <asm/cpu_device_id.h> | 23 | #include <asm/cpu_device_id.h> |
| 24 | 24 | ||
| 25 | #define GHASH_BLOCK_SIZE 16 | 25 | #define GHASH_BLOCK_SIZE 16 |
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 2f63dc89e7a9..7d838dc4d888 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c | |||
| @@ -20,8 +20,7 @@ | |||
| 20 | #include <crypto/lrw.h> | 20 | #include <crypto/lrw.h> |
| 21 | #include <crypto/xts.h> | 21 | #include <crypto/xts.h> |
| 22 | #include <crypto/serpent.h> | 22 | #include <crypto/serpent.h> |
| 23 | #include <asm/xcr.h> | 23 | #include <asm/fpu/api.h> |
| 24 | #include <asm/xsave.h> | ||
| 25 | #include <asm/crypto/serpent-avx.h> | 24 | #include <asm/crypto/serpent-avx.h> |
| 26 | #include <asm/crypto/glue_helper.h> | 25 | #include <asm/crypto/glue_helper.h> |
| 27 | 26 | ||
| @@ -537,16 +536,14 @@ static struct crypto_alg srp_algs[10] = { { | |||
| 537 | 536 | ||
| 538 | static int __init init(void) | 537 | static int __init init(void) |
| 539 | { | 538 | { |
| 540 | u64 xcr0; | 539 | const char *feature_name; |
| 541 | 540 | ||
| 542 | if (!cpu_has_avx2 || !cpu_has_osxsave) { | 541 | if (!cpu_has_avx2 || !cpu_has_osxsave) { |
| 543 | pr_info("AVX2 instructions are not detected.\n"); | 542 | pr_info("AVX2 instructions are not detected.\n"); |
| 544 | return -ENODEV; | 543 | return -ENODEV; |
| 545 | } | 544 | } |
| 546 | 545 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { | |
| 547 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | 546 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
| 548 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
| 549 | pr_info("AVX detected but unusable.\n"); | ||
| 550 | return -ENODEV; | 547 | return -ENODEV; |
| 551 | } | 548 | } |
| 552 | 549 | ||
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index c8d478af8456..da7dafc9b16d 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c | |||
| @@ -36,8 +36,7 @@ | |||
| 36 | #include <crypto/ctr.h> | 36 | #include <crypto/ctr.h> |
| 37 | #include <crypto/lrw.h> | 37 | #include <crypto/lrw.h> |
| 38 | #include <crypto/xts.h> | 38 | #include <crypto/xts.h> |
| 39 | #include <asm/xcr.h> | 39 | #include <asm/fpu/api.h> |
| 40 | #include <asm/xsave.h> | ||
| 41 | #include <asm/crypto/serpent-avx.h> | 40 | #include <asm/crypto/serpent-avx.h> |
| 42 | #include <asm/crypto/glue_helper.h> | 41 | #include <asm/crypto/glue_helper.h> |
| 43 | 42 | ||
| @@ -596,16 +595,10 @@ static struct crypto_alg serpent_algs[10] = { { | |||
| 596 | 595 | ||
| 597 | static int __init serpent_init(void) | 596 | static int __init serpent_init(void) |
| 598 | { | 597 | { |
| 599 | u64 xcr0; | 598 | const char *feature_name; |
| 600 | 599 | ||
| 601 | if (!cpu_has_avx || !cpu_has_osxsave) { | 600 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { |
| 602 | printk(KERN_INFO "AVX instructions are not detected.\n"); | 601 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
| 603 | return -ENODEV; | ||
| 604 | } | ||
| 605 | |||
| 606 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
| 607 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
| 608 | printk(KERN_INFO "AVX detected but unusable.\n"); | ||
| 609 | return -ENODEV; | 602 | return -ENODEV; |
| 610 | } | 603 | } |
| 611 | 604 | ||
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index e510b1c5d690..f53ed1dc88ea 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c | |||
| @@ -65,11 +65,8 @@ | |||
| 65 | #include <crypto/mcryptd.h> | 65 | #include <crypto/mcryptd.h> |
| 66 | #include <crypto/crypto_wq.h> | 66 | #include <crypto/crypto_wq.h> |
| 67 | #include <asm/byteorder.h> | 67 | #include <asm/byteorder.h> |
| 68 | #include <asm/i387.h> | ||
| 69 | #include <asm/xcr.h> | ||
| 70 | #include <asm/xsave.h> | ||
| 71 | #include <linux/hardirq.h> | 68 | #include <linux/hardirq.h> |
| 72 | #include <asm/fpu-internal.h> | 69 | #include <asm/fpu/api.h> |
| 73 | #include "sha_mb_ctx.h" | 70 | #include "sha_mb_ctx.h" |
| 74 | 71 | ||
| 75 | #define FLUSH_INTERVAL 1000 /* in usec */ | 72 | #define FLUSH_INTERVAL 1000 /* in usec */ |
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 33d1b9dc14cc..7c48e8b20848 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c | |||
| @@ -29,9 +29,7 @@ | |||
| 29 | #include <linux/types.h> | 29 | #include <linux/types.h> |
| 30 | #include <crypto/sha.h> | 30 | #include <crypto/sha.h> |
| 31 | #include <crypto/sha1_base.h> | 31 | #include <crypto/sha1_base.h> |
| 32 | #include <asm/i387.h> | 32 | #include <asm/fpu/api.h> |
| 33 | #include <asm/xcr.h> | ||
| 34 | #include <asm/xsave.h> | ||
| 35 | 33 | ||
| 36 | 34 | ||
| 37 | asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, | 35 | asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, |
| @@ -123,15 +121,9 @@ static struct shash_alg alg = { | |||
| 123 | #ifdef CONFIG_AS_AVX | 121 | #ifdef CONFIG_AS_AVX |
| 124 | static bool __init avx_usable(void) | 122 | static bool __init avx_usable(void) |
| 125 | { | 123 | { |
| 126 | u64 xcr0; | 124 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { |
| 127 | 125 | if (cpu_has_avx) | |
| 128 | if (!cpu_has_avx || !cpu_has_osxsave) | 126 | pr_info("AVX detected but unusable.\n"); |
| 129 | return false; | ||
| 130 | |||
| 131 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
| 132 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
| 133 | pr_info("AVX detected but unusable.\n"); | ||
| 134 | |||
| 135 | return false; | 127 | return false; |
| 136 | } | 128 | } |
| 137 | 129 | ||
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index ccc338881ee8..f8097fc0d1d1 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c | |||
| @@ -37,9 +37,7 @@ | |||
| 37 | #include <linux/types.h> | 37 | #include <linux/types.h> |
| 38 | #include <crypto/sha.h> | 38 | #include <crypto/sha.h> |
| 39 | #include <crypto/sha256_base.h> | 39 | #include <crypto/sha256_base.h> |
| 40 | #include <asm/i387.h> | 40 | #include <asm/fpu/api.h> |
| 41 | #include <asm/xcr.h> | ||
| 42 | #include <asm/xsave.h> | ||
| 43 | #include <linux/string.h> | 41 | #include <linux/string.h> |
| 44 | 42 | ||
| 45 | asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, | 43 | asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, |
| @@ -132,15 +130,9 @@ static struct shash_alg algs[] = { { | |||
| 132 | #ifdef CONFIG_AS_AVX | 130 | #ifdef CONFIG_AS_AVX |
| 133 | static bool __init avx_usable(void) | 131 | static bool __init avx_usable(void) |
| 134 | { | 132 | { |
| 135 | u64 xcr0; | 133 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { |
| 136 | 134 | if (cpu_has_avx) | |
| 137 | if (!cpu_has_avx || !cpu_has_osxsave) | 135 | pr_info("AVX detected but unusable.\n"); |
| 138 | return false; | ||
| 139 | |||
| 140 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
| 141 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
| 142 | pr_info("AVX detected but unusable.\n"); | ||
| 143 | |||
| 144 | return false; | 136 | return false; |
| 145 | } | 137 | } |
| 146 | 138 | ||
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index d9fa4c1e063f..2edad7b81870 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c | |||
| @@ -35,9 +35,7 @@ | |||
| 35 | #include <linux/types.h> | 35 | #include <linux/types.h> |
| 36 | #include <crypto/sha.h> | 36 | #include <crypto/sha.h> |
| 37 | #include <crypto/sha512_base.h> | 37 | #include <crypto/sha512_base.h> |
| 38 | #include <asm/i387.h> | 38 | #include <asm/fpu/api.h> |
| 39 | #include <asm/xcr.h> | ||
| 40 | #include <asm/xsave.h> | ||
| 41 | 39 | ||
| 42 | #include <linux/string.h> | 40 | #include <linux/string.h> |
| 43 | 41 | ||
| @@ -131,15 +129,9 @@ static struct shash_alg algs[] = { { | |||
| 131 | #ifdef CONFIG_AS_AVX | 129 | #ifdef CONFIG_AS_AVX |
| 132 | static bool __init avx_usable(void) | 130 | static bool __init avx_usable(void) |
| 133 | { | 131 | { |
| 134 | u64 xcr0; | 132 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { |
| 135 | 133 | if (cpu_has_avx) | |
| 136 | if (!cpu_has_avx || !cpu_has_osxsave) | 134 | pr_info("AVX detected but unusable.\n"); |
| 137 | return false; | ||
| 138 | |||
| 139 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
| 140 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
| 141 | pr_info("AVX detected but unusable.\n"); | ||
| 142 | |||
| 143 | return false; | 135 | return false; |
| 144 | } | 136 | } |
| 145 | 137 | ||
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index b5e2d5651851..c2bd0ce718ee 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c | |||
| @@ -36,9 +36,7 @@ | |||
| 36 | #include <crypto/ctr.h> | 36 | #include <crypto/ctr.h> |
| 37 | #include <crypto/lrw.h> | 37 | #include <crypto/lrw.h> |
| 38 | #include <crypto/xts.h> | 38 | #include <crypto/xts.h> |
| 39 | #include <asm/i387.h> | 39 | #include <asm/fpu/api.h> |
| 40 | #include <asm/xcr.h> | ||
| 41 | #include <asm/xsave.h> | ||
| 42 | #include <asm/crypto/twofish.h> | 40 | #include <asm/crypto/twofish.h> |
| 43 | #include <asm/crypto/glue_helper.h> | 41 | #include <asm/crypto/glue_helper.h> |
| 44 | #include <crypto/scatterwalk.h> | 42 | #include <crypto/scatterwalk.h> |
| @@ -558,16 +556,10 @@ static struct crypto_alg twofish_algs[10] = { { | |||
| 558 | 556 | ||
| 559 | static int __init twofish_init(void) | 557 | static int __init twofish_init(void) |
| 560 | { | 558 | { |
| 561 | u64 xcr0; | 559 | const char *feature_name; |
| 562 | 560 | ||
| 563 | if (!cpu_has_avx || !cpu_has_osxsave) { | 561 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { |
| 564 | printk(KERN_INFO "AVX instructions are not detected.\n"); | 562 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
| 565 | return -ENODEV; | ||
| 566 | } | ||
| 567 | |||
| 568 | xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); | ||
| 569 | if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { | ||
| 570 | printk(KERN_INFO "AVX detected but unusable.\n"); | ||
| 571 | return -ENODEV; | 563 | return -ENODEV; |
| 572 | } | 564 | } |
| 573 | 565 | ||
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index c81d35e6c7f1..ae3a29ae875b 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c | |||
| @@ -21,8 +21,8 @@ | |||
| 21 | #include <linux/binfmts.h> | 21 | #include <linux/binfmts.h> |
| 22 | #include <asm/ucontext.h> | 22 | #include <asm/ucontext.h> |
| 23 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
| 24 | #include <asm/i387.h> | 24 | #include <asm/fpu/internal.h> |
| 25 | #include <asm/fpu-internal.h> | 25 | #include <asm/fpu/signal.h> |
| 26 | #include <asm/ptrace.h> | 26 | #include <asm/ptrace.h> |
| 27 | #include <asm/ia32_unistd.h> | 27 | #include <asm/ia32_unistd.h> |
| 28 | #include <asm/user32.h> | 28 | #include <asm/user32.h> |
| @@ -198,7 +198,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, | |||
| 198 | buf = compat_ptr(tmp); | 198 | buf = compat_ptr(tmp); |
| 199 | } get_user_catch(err); | 199 | } get_user_catch(err); |
| 200 | 200 | ||
| 201 | err |= restore_xstate_sig(buf, 1); | 201 | err |= fpu__restore_sig(buf, 1); |
| 202 | 202 | ||
| 203 | force_iret(); | 203 | force_iret(); |
| 204 | 204 | ||
| @@ -308,6 +308,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, | |||
| 308 | size_t frame_size, | 308 | size_t frame_size, |
| 309 | void __user **fpstate) | 309 | void __user **fpstate) |
| 310 | { | 310 | { |
| 311 | struct fpu *fpu = ¤t->thread.fpu; | ||
| 311 | unsigned long sp; | 312 | unsigned long sp; |
| 312 | 313 | ||
| 313 | /* Default to using normal stack */ | 314 | /* Default to using normal stack */ |
| @@ -322,12 +323,12 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, | |||
| 322 | ksig->ka.sa.sa_restorer) | 323 | ksig->ka.sa.sa_restorer) |
| 323 | sp = (unsigned long) ksig->ka.sa.sa_restorer; | 324 | sp = (unsigned long) ksig->ka.sa.sa_restorer; |
| 324 | 325 | ||
| 325 | if (used_math()) { | 326 | if (fpu->fpstate_active) { |
| 326 | unsigned long fx_aligned, math_size; | 327 | unsigned long fx_aligned, math_size; |
| 327 | 328 | ||
| 328 | sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size); | 329 | sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); |
| 329 | *fpstate = (struct _fpstate_ia32 __user *) sp; | 330 | *fpstate = (struct _fpstate_ia32 __user *) sp; |
| 330 | if (save_xstate_sig(*fpstate, (void __user *)fx_aligned, | 331 | if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned, |
| 331 | math_size) < 0) | 332 | math_size) < 0) |
| 332 | return (void __user *) -1L; | 333 | return (void __user *) -1L; |
| 333 | } | 334 | } |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index ba32af062f61..7bfc85bbb8ff 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
| @@ -52,6 +52,12 @@ struct alt_instr { | |||
| 52 | u8 padlen; /* length of build-time padding */ | 52 | u8 padlen; /* length of build-time padding */ |
| 53 | } __packed; | 53 | } __packed; |
| 54 | 54 | ||
| 55 | /* | ||
| 56 | * Debug flag that can be tested to see whether alternative | ||
| 57 | * instructions were patched in already: | ||
| 58 | */ | ||
| 59 | extern int alternatives_patched; | ||
| 60 | |||
| 55 | extern void alternative_instructions(void); | 61 | extern void alternative_instructions(void); |
| 56 | extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); | 62 | extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); |
| 57 | 63 | ||
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 1eef55596e82..03bb1065c335 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | 7 | ||
| 8 | #include <linux/kernel.h> | 8 | #include <linux/kernel.h> |
| 9 | #include <linux/crypto.h> | 9 | #include <linux/crypto.h> |
| 10 | #include <asm/i387.h> | 10 | #include <asm/fpu/api.h> |
| 11 | #include <crypto/b128ops.h> | 11 | #include <crypto/b128ops.h> |
| 12 | 12 | ||
| 13 | typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); | 13 | typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 3738b138b843..155162ea0e00 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
| @@ -1,7 +1,7 @@ | |||
| 1 | #ifndef _ASM_X86_EFI_H | 1 | #ifndef _ASM_X86_EFI_H |
| 2 | #define _ASM_X86_EFI_H | 2 | #define _ASM_X86_EFI_H |
| 3 | 3 | ||
| 4 | #include <asm/i387.h> | 4 | #include <asm/fpu/api.h> |
| 5 | #include <asm/pgtable.h> | 5 | #include <asm/pgtable.h> |
| 6 | 6 | ||
| 7 | /* | 7 | /* |
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h deleted file mode 100644 index da5e96756570..000000000000 --- a/arch/x86/include/asm/fpu-internal.h +++ /dev/null | |||
| @@ -1,626 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1994 Linus Torvalds | ||
| 3 | * | ||
| 4 | * Pentium III FXSR, SSE support | ||
| 5 | * General FPU state handling cleanups | ||
| 6 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
| 7 | * x86-64 work by Andi Kleen 2002 | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef _FPU_INTERNAL_H | ||
| 11 | #define _FPU_INTERNAL_H | ||
| 12 | |||
| 13 | #include <linux/kernel_stat.h> | ||
| 14 | #include <linux/regset.h> | ||
| 15 | #include <linux/compat.h> | ||
| 16 | #include <linux/slab.h> | ||
| 17 | #include <asm/asm.h> | ||
| 18 | #include <asm/cpufeature.h> | ||
| 19 | #include <asm/processor.h> | ||
| 20 | #include <asm/sigcontext.h> | ||
| 21 | #include <asm/user.h> | ||
| 22 | #include <asm/uaccess.h> | ||
| 23 | #include <asm/xsave.h> | ||
| 24 | #include <asm/smap.h> | ||
| 25 | |||
| 26 | #ifdef CONFIG_X86_64 | ||
| 27 | # include <asm/sigcontext32.h> | ||
| 28 | # include <asm/user32.h> | ||
| 29 | struct ksignal; | ||
| 30 | int ia32_setup_rt_frame(int sig, struct ksignal *ksig, | ||
| 31 | compat_sigset_t *set, struct pt_regs *regs); | ||
| 32 | int ia32_setup_frame(int sig, struct ksignal *ksig, | ||
| 33 | compat_sigset_t *set, struct pt_regs *regs); | ||
| 34 | #else | ||
| 35 | # define user_i387_ia32_struct user_i387_struct | ||
| 36 | # define user32_fxsr_struct user_fxsr_struct | ||
| 37 | # define ia32_setup_frame __setup_frame | ||
| 38 | # define ia32_setup_rt_frame __setup_rt_frame | ||
| 39 | #endif | ||
| 40 | |||
| 41 | extern unsigned int mxcsr_feature_mask; | ||
| 42 | extern void fpu_init(void); | ||
| 43 | extern void eager_fpu_init(void); | ||
| 44 | |||
| 45 | DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); | ||
| 46 | |||
| 47 | extern void convert_from_fxsr(struct user_i387_ia32_struct *env, | ||
| 48 | struct task_struct *tsk); | ||
| 49 | extern void convert_to_fxsr(struct task_struct *tsk, | ||
| 50 | const struct user_i387_ia32_struct *env); | ||
| 51 | |||
| 52 | extern user_regset_active_fn fpregs_active, xfpregs_active; | ||
| 53 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, | ||
| 54 | xstateregs_get; | ||
| 55 | extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, | ||
| 56 | xstateregs_set; | ||
| 57 | |||
| 58 | /* | ||
| 59 | * xstateregs_active == fpregs_active. Please refer to the comment | ||
| 60 | * at the definition of fpregs_active. | ||
| 61 | */ | ||
| 62 | #define xstateregs_active fpregs_active | ||
| 63 | |||
| 64 | #ifdef CONFIG_MATH_EMULATION | ||
| 65 | extern void finit_soft_fpu(struct i387_soft_struct *soft); | ||
| 66 | #else | ||
| 67 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} | ||
| 68 | #endif | ||
| 69 | |||
| 70 | /* | ||
| 71 | * Must be run with preemption disabled: this clears the fpu_owner_task, | ||
| 72 | * on this CPU. | ||
| 73 | * | ||
| 74 | * This will disable any lazy FPU state restore of the current FPU state, | ||
| 75 | * but if the current thread owns the FPU, it will still be saved by. | ||
| 76 | */ | ||
| 77 | static inline void __cpu_disable_lazy_restore(unsigned int cpu) | ||
| 78 | { | ||
| 79 | per_cpu(fpu_owner_task, cpu) = NULL; | ||
| 80 | } | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Used to indicate that the FPU state in memory is newer than the FPU | ||
| 84 | * state in registers, and the FPU state should be reloaded next time the | ||
| 85 | * task is run. Only safe on the current task, or non-running tasks. | ||
| 86 | */ | ||
| 87 | static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk) | ||
| 88 | { | ||
| 89 | tsk->thread.fpu.last_cpu = ~0; | ||
| 90 | } | ||
| 91 | |||
| 92 | static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) | ||
| 93 | { | ||
| 94 | return new == this_cpu_read_stable(fpu_owner_task) && | ||
| 95 | cpu == new->thread.fpu.last_cpu; | ||
| 96 | } | ||
| 97 | |||
| 98 | static inline int is_ia32_compat_frame(void) | ||
| 99 | { | ||
| 100 | return config_enabled(CONFIG_IA32_EMULATION) && | ||
| 101 | test_thread_flag(TIF_IA32); | ||
| 102 | } | ||
| 103 | |||
| 104 | static inline int is_ia32_frame(void) | ||
| 105 | { | ||
| 106 | return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); | ||
| 107 | } | ||
| 108 | |||
| 109 | static inline int is_x32_frame(void) | ||
| 110 | { | ||
| 111 | return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); | ||
| 112 | } | ||
| 113 | |||
| 114 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ | ||
| 115 | |||
| 116 | static __always_inline __pure bool use_eager_fpu(void) | ||
| 117 | { | ||
| 118 | return static_cpu_has_safe(X86_FEATURE_EAGER_FPU); | ||
| 119 | } | ||
| 120 | |||
| 121 | static __always_inline __pure bool use_xsaveopt(void) | ||
| 122 | { | ||
| 123 | return static_cpu_has_safe(X86_FEATURE_XSAVEOPT); | ||
| 124 | } | ||
| 125 | |||
| 126 | static __always_inline __pure bool use_xsave(void) | ||
| 127 | { | ||
| 128 | return static_cpu_has_safe(X86_FEATURE_XSAVE); | ||
| 129 | } | ||
| 130 | |||
| 131 | static __always_inline __pure bool use_fxsr(void) | ||
| 132 | { | ||
| 133 | return static_cpu_has_safe(X86_FEATURE_FXSR); | ||
| 134 | } | ||
| 135 | |||
| 136 | static inline void fx_finit(struct i387_fxsave_struct *fx) | ||
| 137 | { | ||
| 138 | fx->cwd = 0x37f; | ||
| 139 | fx->mxcsr = MXCSR_DEFAULT; | ||
| 140 | } | ||
| 141 | |||
| 142 | extern void __sanitize_i387_state(struct task_struct *); | ||
| 143 | |||
| 144 | static inline void sanitize_i387_state(struct task_struct *tsk) | ||
| 145 | { | ||
| 146 | if (!use_xsaveopt()) | ||
| 147 | return; | ||
| 148 | __sanitize_i387_state(tsk); | ||
| 149 | } | ||
| 150 | |||
| 151 | #define user_insn(insn, output, input...) \ | ||
| 152 | ({ \ | ||
| 153 | int err; \ | ||
| 154 | asm volatile(ASM_STAC "\n" \ | ||
| 155 | "1:" #insn "\n\t" \ | ||
| 156 | "2: " ASM_CLAC "\n" \ | ||
| 157 | ".section .fixup,\"ax\"\n" \ | ||
| 158 | "3: movl $-1,%[err]\n" \ | ||
| 159 | " jmp 2b\n" \ | ||
| 160 | ".previous\n" \ | ||
| 161 | _ASM_EXTABLE(1b, 3b) \ | ||
| 162 | : [err] "=r" (err), output \ | ||
| 163 | : "0"(0), input); \ | ||
| 164 | err; \ | ||
| 165 | }) | ||
| 166 | |||
| 167 | #define check_insn(insn, output, input...) \ | ||
| 168 | ({ \ | ||
| 169 | int err; \ | ||
| 170 | asm volatile("1:" #insn "\n\t" \ | ||
| 171 | "2:\n" \ | ||
| 172 | ".section .fixup,\"ax\"\n" \ | ||
| 173 | "3: movl $-1,%[err]\n" \ | ||
| 174 | " jmp 2b\n" \ | ||
| 175 | ".previous\n" \ | ||
| 176 | _ASM_EXTABLE(1b, 3b) \ | ||
| 177 | : [err] "=r" (err), output \ | ||
| 178 | : "0"(0), input); \ | ||
| 179 | err; \ | ||
| 180 | }) | ||
| 181 | |||
| 182 | static inline int fsave_user(struct i387_fsave_struct __user *fx) | ||
| 183 | { | ||
| 184 | return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); | ||
| 185 | } | ||
| 186 | |||
| 187 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | ||
| 188 | { | ||
| 189 | if (config_enabled(CONFIG_X86_32)) | ||
| 190 | return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); | ||
| 191 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) | ||
| 192 | return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); | ||
| 193 | |||
| 194 | /* See comment in fpu_fxsave() below. */ | ||
| 195 | return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); | ||
| 196 | } | ||
| 197 | |||
| 198 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | ||
| 199 | { | ||
| 200 | if (config_enabled(CONFIG_X86_32)) | ||
| 201 | return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); | ||
| 202 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) | ||
| 203 | return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); | ||
| 204 | |||
| 205 | /* See comment in fpu_fxsave() below. */ | ||
| 206 | return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), | ||
| 207 | "m" (*fx)); | ||
| 208 | } | ||
| 209 | |||
| 210 | static inline int fxrstor_user(struct i387_fxsave_struct __user *fx) | ||
| 211 | { | ||
| 212 | if (config_enabled(CONFIG_X86_32)) | ||
| 213 | return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); | ||
| 214 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) | ||
| 215 | return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); | ||
| 216 | |||
| 217 | /* See comment in fpu_fxsave() below. */ | ||
| 218 | return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), | ||
| 219 | "m" (*fx)); | ||
| 220 | } | ||
| 221 | |||
| 222 | static inline int frstor_checking(struct i387_fsave_struct *fx) | ||
| 223 | { | ||
| 224 | return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); | ||
| 225 | } | ||
| 226 | |||
| 227 | static inline int frstor_user(struct i387_fsave_struct __user *fx) | ||
| 228 | { | ||
| 229 | return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); | ||
| 230 | } | ||
| 231 | |||
| 232 | static inline void fpu_fxsave(struct fpu *fpu) | ||
| 233 | { | ||
| 234 | if (config_enabled(CONFIG_X86_32)) | ||
| 235 | asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); | ||
| 236 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) | ||
| 237 | asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave)); | ||
| 238 | else { | ||
| 239 | /* Using "rex64; fxsave %0" is broken because, if the memory | ||
| 240 | * operand uses any extended registers for addressing, a second | ||
| 241 | * REX prefix will be generated (to the assembler, rex64 | ||
| 242 | * followed by semicolon is a separate instruction), and hence | ||
| 243 | * the 64-bitness is lost. | ||
| 244 | * | ||
| 245 | * Using "fxsaveq %0" would be the ideal choice, but is only | ||
| 246 | * supported starting with gas 2.16. | ||
| 247 | * | ||
| 248 | * Using, as a workaround, the properly prefixed form below | ||
| 249 | * isn't accepted by any binutils version so far released, | ||
| 250 | * complaining that the same type of prefix is used twice if | ||
| 251 | * an extended register is needed for addressing (fix submitted | ||
| 252 | * to mainline 2005-11-21). | ||
| 253 | * | ||
| 254 | * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave)); | ||
| 255 | * | ||
| 256 | * This, however, we can work around by forcing the compiler to | ||
| 257 | * select an addressing mode that doesn't require extended | ||
| 258 | * registers. | ||
| 259 | */ | ||
| 260 | asm volatile( "rex64/fxsave (%[fx])" | ||
| 261 | : "=m" (fpu->state->fxsave) | ||
| 262 | : [fx] "R" (&fpu->state->fxsave)); | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | /* | ||
| 267 | * These must be called with preempt disabled. Returns | ||
| 268 | * 'true' if the FPU state is still intact. | ||
| 269 | */ | ||
| 270 | static inline int fpu_save_init(struct fpu *fpu) | ||
| 271 | { | ||
| 272 | if (use_xsave()) { | ||
| 273 | fpu_xsave(fpu); | ||
| 274 | |||
| 275 | /* | ||
| 276 | * xsave header may indicate the init state of the FP. | ||
| 277 | */ | ||
| 278 | if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) | ||
| 279 | return 1; | ||
| 280 | } else if (use_fxsr()) { | ||
| 281 | fpu_fxsave(fpu); | ||
| 282 | } else { | ||
| 283 | asm volatile("fnsave %[fx]; fwait" | ||
| 284 | : [fx] "=m" (fpu->state->fsave)); | ||
| 285 | return 0; | ||
| 286 | } | ||
| 287 | |||
| 288 | /* | ||
| 289 | * If exceptions are pending, we need to clear them so | ||
| 290 | * that we don't randomly get exceptions later. | ||
| 291 | * | ||
| 292 | * FIXME! Is this perhaps only true for the old-style | ||
| 293 | * irq13 case? Maybe we could leave the x87 state | ||
| 294 | * intact otherwise? | ||
| 295 | */ | ||
| 296 | if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { | ||
| 297 | asm volatile("fnclex"); | ||
| 298 | return 0; | ||
| 299 | } | ||
| 300 | return 1; | ||
| 301 | } | ||
| 302 | |||
| 303 | static inline int __save_init_fpu(struct task_struct *tsk) | ||
| 304 | { | ||
| 305 | return fpu_save_init(&tsk->thread.fpu); | ||
| 306 | } | ||
| 307 | |||
| 308 | static inline int fpu_restore_checking(struct fpu *fpu) | ||
| 309 | { | ||
| 310 | if (use_xsave()) | ||
| 311 | return fpu_xrstor_checking(&fpu->state->xsave); | ||
| 312 | else if (use_fxsr()) | ||
| 313 | return fxrstor_checking(&fpu->state->fxsave); | ||
| 314 | else | ||
| 315 | return frstor_checking(&fpu->state->fsave); | ||
| 316 | } | ||
| 317 | |||
| 318 | static inline int restore_fpu_checking(struct task_struct *tsk) | ||
| 319 | { | ||
| 320 | /* | ||
| 321 | * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is | ||
| 322 | * pending. Clear the x87 state here by setting it to fixed values. | ||
| 323 | * "m" is a random variable that should be in L1. | ||
| 324 | */ | ||
| 325 | if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { | ||
| 326 | asm volatile( | ||
| 327 | "fnclex\n\t" | ||
| 328 | "emms\n\t" | ||
| 329 | "fildl %P[addr]" /* set F?P to defined value */ | ||
| 330 | : : [addr] "m" (tsk->thread.fpu.has_fpu)); | ||
| 331 | } | ||
| 332 | |||
| 333 | return fpu_restore_checking(&tsk->thread.fpu); | ||
| 334 | } | ||
| 335 | |||
| 336 | /* | ||
| 337 | * Software FPU state helpers. Careful: these need to | ||
| 338 | * be preemption protection *and* they need to be | ||
| 339 | * properly paired with the CR0.TS changes! | ||
| 340 | */ | ||
| 341 | static inline int __thread_has_fpu(struct task_struct *tsk) | ||
| 342 | { | ||
| 343 | return tsk->thread.fpu.has_fpu; | ||
| 344 | } | ||
| 345 | |||
| 346 | /* Must be paired with an 'stts' after! */ | ||
| 347 | static inline void __thread_clear_has_fpu(struct task_struct *tsk) | ||
| 348 | { | ||
| 349 | tsk->thread.fpu.has_fpu = 0; | ||
| 350 | this_cpu_write(fpu_owner_task, NULL); | ||
| 351 | } | ||
| 352 | |||
| 353 | /* Must be paired with a 'clts' before! */ | ||
| 354 | static inline void __thread_set_has_fpu(struct task_struct *tsk) | ||
| 355 | { | ||
| 356 | tsk->thread.fpu.has_fpu = 1; | ||
| 357 | this_cpu_write(fpu_owner_task, tsk); | ||
| 358 | } | ||
| 359 | |||
| 360 | /* | ||
| 361 | * Encapsulate the CR0.TS handling together with the | ||
| 362 | * software flag. | ||
| 363 | * | ||
| 364 | * These generally need preemption protection to work, | ||
| 365 | * do try to avoid using these on their own. | ||
| 366 | */ | ||
| 367 | static inline void __thread_fpu_end(struct task_struct *tsk) | ||
| 368 | { | ||
| 369 | __thread_clear_has_fpu(tsk); | ||
| 370 | if (!use_eager_fpu()) | ||
| 371 | stts(); | ||
| 372 | } | ||
| 373 | |||
| 374 | static inline void __thread_fpu_begin(struct task_struct *tsk) | ||
| 375 | { | ||
| 376 | if (!use_eager_fpu()) | ||
| 377 | clts(); | ||
| 378 | __thread_set_has_fpu(tsk); | ||
| 379 | } | ||
| 380 | |||
| 381 | static inline void drop_fpu(struct task_struct *tsk) | ||
| 382 | { | ||
| 383 | /* | ||
| 384 | * Forget coprocessor state.. | ||
| 385 | */ | ||
| 386 | preempt_disable(); | ||
| 387 | tsk->thread.fpu_counter = 0; | ||
| 388 | |||
| 389 | if (__thread_has_fpu(tsk)) { | ||
| 390 | /* Ignore delayed exceptions from user space */ | ||
| 391 | asm volatile("1: fwait\n" | ||
| 392 | "2:\n" | ||
| 393 | _ASM_EXTABLE(1b, 2b)); | ||
| 394 | __thread_fpu_end(tsk); | ||
| 395 | } | ||
| 396 | |||
| 397 | clear_stopped_child_used_math(tsk); | ||
| 398 | preempt_enable(); | ||
| 399 | } | ||
| 400 | |||
| 401 | static inline void restore_init_xstate(void) | ||
| 402 | { | ||
| 403 | if (use_xsave()) | ||
| 404 | xrstor_state(init_xstate_buf, -1); | ||
| 405 | else | ||
| 406 | fxrstor_checking(&init_xstate_buf->i387); | ||
| 407 | } | ||
| 408 | |||
| 409 | /* | ||
| 410 | * Reset the FPU state in the eager case and drop it in the lazy case (later use | ||
| 411 | * will reinit it). | ||
| 412 | */ | ||
| 413 | static inline void fpu_reset_state(struct task_struct *tsk) | ||
| 414 | { | ||
| 415 | if (!use_eager_fpu()) | ||
| 416 | drop_fpu(tsk); | ||
| 417 | else | ||
| 418 | restore_init_xstate(); | ||
| 419 | } | ||
| 420 | |||
| 421 | /* | ||
| 422 | * FPU state switching for scheduling. | ||
| 423 | * | ||
| 424 | * This is a two-stage process: | ||
| 425 | * | ||
| 426 | * - switch_fpu_prepare() saves the old state and | ||
| 427 | * sets the new state of the CR0.TS bit. This is | ||
| 428 | * done within the context of the old process. | ||
| 429 | * | ||
| 430 | * - switch_fpu_finish() restores the new state as | ||
| 431 | * necessary. | ||
| 432 | */ | ||
| 433 | typedef struct { int preload; } fpu_switch_t; | ||
| 434 | |||
| 435 | static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) | ||
| 436 | { | ||
| 437 | fpu_switch_t fpu; | ||
| 438 | |||
| 439 | /* | ||
| 440 | * If the task has used the math, pre-load the FPU on xsave processors | ||
| 441 | * or if the past 5 consecutive context-switches used math. | ||
| 442 | */ | ||
| 443 | fpu.preload = tsk_used_math(new) && | ||
| 444 | (use_eager_fpu() || new->thread.fpu_counter > 5); | ||
| 445 | |||
| 446 | if (__thread_has_fpu(old)) { | ||
| 447 | if (!__save_init_fpu(old)) | ||
| 448 | task_disable_lazy_fpu_restore(old); | ||
| 449 | else | ||
| 450 | old->thread.fpu.last_cpu = cpu; | ||
| 451 | |||
| 452 | /* But leave fpu_owner_task! */ | ||
| 453 | old->thread.fpu.has_fpu = 0; | ||
| 454 | |||
| 455 | /* Don't change CR0.TS if we just switch! */ | ||
| 456 | if (fpu.preload) { | ||
| 457 | new->thread.fpu_counter++; | ||
| 458 | __thread_set_has_fpu(new); | ||
| 459 | prefetch(new->thread.fpu.state); | ||
| 460 | } else if (!use_eager_fpu()) | ||
| 461 | stts(); | ||
| 462 | } else { | ||
| 463 | old->thread.fpu_counter = 0; | ||
| 464 | task_disable_lazy_fpu_restore(old); | ||
| 465 | if (fpu.preload) { | ||
| 466 | new->thread.fpu_counter++; | ||
| 467 | if (fpu_lazy_restore(new, cpu)) | ||
| 468 | fpu.preload = 0; | ||
| 469 | else | ||
| 470 | prefetch(new->thread.fpu.state); | ||
| 471 | __thread_fpu_begin(new); | ||
| 472 | } | ||
| 473 | } | ||
| 474 | return fpu; | ||
| 475 | } | ||
| 476 | |||
| 477 | /* | ||
| 478 | * By the time this gets called, we've already cleared CR0.TS and | ||
| 479 | * given the process the FPU if we are going to preload the FPU | ||
| 480 | * state - all we need to do is to conditionally restore the register | ||
| 481 | * state itself. | ||
| 482 | */ | ||
| 483 | static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) | ||
| 484 | { | ||
| 485 | if (fpu.preload) { | ||
| 486 | if (unlikely(restore_fpu_checking(new))) | ||
| 487 | fpu_reset_state(new); | ||
| 488 | } | ||
| 489 | } | ||
| 490 | |||
| 491 | /* | ||
| 492 | * Signal frame handlers... | ||
| 493 | */ | ||
| 494 | extern int save_xstate_sig(void __user *buf, void __user *fx, int size); | ||
| 495 | extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size); | ||
| 496 | |||
| 497 | static inline int xstate_sigframe_size(void) | ||
| 498 | { | ||
| 499 | return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; | ||
| 500 | } | ||
| 501 | |||
| 502 | static inline int restore_xstate_sig(void __user *buf, int ia32_frame) | ||
| 503 | { | ||
| 504 | void __user *buf_fx = buf; | ||
| 505 | int size = xstate_sigframe_size(); | ||
| 506 | |||
| 507 | if (ia32_frame && use_fxsr()) { | ||
| 508 | buf_fx = buf + sizeof(struct i387_fsave_struct); | ||
| 509 | size += sizeof(struct i387_fsave_struct); | ||
| 510 | } | ||
| 511 | |||
| 512 | return __restore_xstate_sig(buf, buf_fx, size); | ||
| 513 | } | ||
| 514 | |||
| 515 | /* | ||
| 516 | * Needs to be preemption-safe. | ||
| 517 | * | ||
| 518 | * NOTE! user_fpu_begin() must be used only immediately before restoring | ||
| 519 | * the save state. It does not do any saving/restoring on its own. In | ||
| 520 | * lazy FPU mode, it is just an optimization to avoid a #NM exception, | ||
| 521 | * the task can lose the FPU right after preempt_enable(). | ||
| 522 | */ | ||
| 523 | static inline void user_fpu_begin(void) | ||
| 524 | { | ||
| 525 | preempt_disable(); | ||
| 526 | if (!user_has_fpu()) | ||
| 527 | __thread_fpu_begin(current); | ||
| 528 | preempt_enable(); | ||
| 529 | } | ||
| 530 | |||
| 531 | static inline void __save_fpu(struct task_struct *tsk) | ||
| 532 | { | ||
| 533 | if (use_xsave()) { | ||
| 534 | if (unlikely(system_state == SYSTEM_BOOTING)) | ||
| 535 | xsave_state_booting(&tsk->thread.fpu.state->xsave, -1); | ||
| 536 | else | ||
| 537 | xsave_state(&tsk->thread.fpu.state->xsave, -1); | ||
| 538 | } else | ||
| 539 | fpu_fxsave(&tsk->thread.fpu); | ||
| 540 | } | ||
| 541 | |||
| 542 | /* | ||
| 543 | * i387 state interaction | ||
| 544 | */ | ||
| 545 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) | ||
| 546 | { | ||
| 547 | if (cpu_has_fxsr) { | ||
| 548 | return tsk->thread.fpu.state->fxsave.cwd; | ||
| 549 | } else { | ||
| 550 | return (unsigned short)tsk->thread.fpu.state->fsave.cwd; | ||
| 551 | } | ||
| 552 | } | ||
| 553 | |||
| 554 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) | ||
| 555 | { | ||
| 556 | if (cpu_has_fxsr) { | ||
| 557 | return tsk->thread.fpu.state->fxsave.swd; | ||
| 558 | } else { | ||
| 559 | return (unsigned short)tsk->thread.fpu.state->fsave.swd; | ||
| 560 | } | ||
| 561 | } | ||
| 562 | |||
| 563 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) | ||
| 564 | { | ||
| 565 | if (cpu_has_xmm) { | ||
| 566 | return tsk->thread.fpu.state->fxsave.mxcsr; | ||
| 567 | } else { | ||
| 568 | return MXCSR_DEFAULT; | ||
| 569 | } | ||
| 570 | } | ||
| 571 | |||
| 572 | static bool fpu_allocated(struct fpu *fpu) | ||
| 573 | { | ||
| 574 | return fpu->state != NULL; | ||
| 575 | } | ||
| 576 | |||
| 577 | static inline int fpu_alloc(struct fpu *fpu) | ||
| 578 | { | ||
| 579 | if (fpu_allocated(fpu)) | ||
| 580 | return 0; | ||
| 581 | fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); | ||
| 582 | if (!fpu->state) | ||
| 583 | return -ENOMEM; | ||
| 584 | WARN_ON((unsigned long)fpu->state & 15); | ||
| 585 | return 0; | ||
| 586 | } | ||
| 587 | |||
| 588 | static inline void fpu_free(struct fpu *fpu) | ||
| 589 | { | ||
| 590 | if (fpu->state) { | ||
| 591 | kmem_cache_free(task_xstate_cachep, fpu->state); | ||
| 592 | fpu->state = NULL; | ||
| 593 | } | ||
| 594 | } | ||
| 595 | |||
| 596 | static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) | ||
| 597 | { | ||
| 598 | if (use_eager_fpu()) { | ||
| 599 | memset(&dst->thread.fpu.state->xsave, 0, xstate_size); | ||
| 600 | __save_fpu(dst); | ||
| 601 | } else { | ||
| 602 | struct fpu *dfpu = &dst->thread.fpu; | ||
| 603 | struct fpu *sfpu = &src->thread.fpu; | ||
| 604 | |||
| 605 | unlazy_fpu(src); | ||
| 606 | memcpy(dfpu->state, sfpu->state, xstate_size); | ||
| 607 | } | ||
| 608 | } | ||
| 609 | |||
| 610 | static inline unsigned long | ||
| 611 | alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, | ||
| 612 | unsigned long *size) | ||
| 613 | { | ||
| 614 | unsigned long frame_size = xstate_sigframe_size(); | ||
| 615 | |||
| 616 | *buf_fx = sp = round_down(sp - frame_size, 64); | ||
| 617 | if (ia32_frame && use_fxsr()) { | ||
| 618 | frame_size += sizeof(struct i387_fsave_struct); | ||
| 619 | sp -= sizeof(struct i387_fsave_struct); | ||
| 620 | } | ||
| 621 | |||
| 622 | *size = frame_size; | ||
| 623 | return sp; | ||
| 624 | } | ||
| 625 | |||
| 626 | #endif | ||
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h new file mode 100644 index 000000000000..1429a7c736db --- /dev/null +++ b/arch/x86/include/asm/fpu/api.h | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1994 Linus Torvalds | ||
| 3 | * | ||
| 4 | * Pentium III FXSR, SSE support | ||
| 5 | * General FPU state handling cleanups | ||
| 6 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
| 7 | * x86-64 work by Andi Kleen 2002 | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef _ASM_X86_FPU_API_H | ||
| 11 | #define _ASM_X86_FPU_API_H | ||
| 12 | |||
| 13 | /* | ||
| 14 | * Careful: __kernel_fpu_begin/end() must be called with preempt disabled | ||
| 15 | * and they don't touch the preempt state on their own. | ||
| 16 | * If you enable preemption after __kernel_fpu_begin(), preempt notifier | ||
| 17 | * should call the __kernel_fpu_end() to prevent the kernel/user FPU | ||
| 18 | * state from getting corrupted. KVM for example uses this model. | ||
| 19 | * | ||
| 20 | * All other cases use kernel_fpu_begin/end() which disable preemption | ||
| 21 | * during kernel FPU usage. | ||
| 22 | */ | ||
| 23 | extern void __kernel_fpu_begin(void); | ||
| 24 | extern void __kernel_fpu_end(void); | ||
| 25 | extern void kernel_fpu_begin(void); | ||
| 26 | extern void kernel_fpu_end(void); | ||
| 27 | extern bool irq_fpu_usable(void); | ||
| 28 | |||
| 29 | /* | ||
| 30 | * Some instructions like VIA's padlock instructions generate a spurious | ||
| 31 | * DNA fault but don't modify SSE registers. And these instructions | ||
| 32 | * get used from interrupt context as well. To prevent these kernel instructions | ||
| 33 | * in interrupt context interacting wrongly with other user/kernel fpu usage, we | ||
| 34 | * should use them only in the context of irq_ts_save/restore() | ||
| 35 | */ | ||
| 36 | extern int irq_ts_save(void); | ||
| 37 | extern void irq_ts_restore(int TS_state); | ||
| 38 | |||
| 39 | /* | ||
| 40 | * Query the presence of one or more xfeatures. Works on any legacy CPU as well. | ||
| 41 | * | ||
| 42 | * If 'feature_name' is set then put a human-readable description of | ||
| 43 | * the feature there as well - this can be used to print error (or success) | ||
| 44 | * messages. | ||
| 45 | */ | ||
| 46 | extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); | ||
| 47 | |||
| 48 | #endif /* _ASM_X86_FPU_API_H */ | ||
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h new file mode 100644 index 000000000000..3c3550c3a4a3 --- /dev/null +++ b/arch/x86/include/asm/fpu/internal.h | |||
| @@ -0,0 +1,694 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1994 Linus Torvalds | ||
| 3 | * | ||
| 4 | * Pentium III FXSR, SSE support | ||
| 5 | * General FPU state handling cleanups | ||
| 6 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
| 7 | * x86-64 work by Andi Kleen 2002 | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef _ASM_X86_FPU_INTERNAL_H | ||
| 11 | #define _ASM_X86_FPU_INTERNAL_H | ||
| 12 | |||
| 13 | #include <linux/compat.h> | ||
| 14 | #include <linux/sched.h> | ||
| 15 | #include <linux/slab.h> | ||
| 16 | |||
| 17 | #include <asm/user.h> | ||
| 18 | #include <asm/fpu/api.h> | ||
| 19 | #include <asm/fpu/xstate.h> | ||
| 20 | |||
| 21 | /* | ||
| 22 | * High level FPU state handling functions: | ||
| 23 | */ | ||
| 24 | extern void fpu__activate_curr(struct fpu *fpu); | ||
| 25 | extern void fpu__activate_fpstate_read(struct fpu *fpu); | ||
| 26 | extern void fpu__activate_fpstate_write(struct fpu *fpu); | ||
| 27 | extern void fpu__save(struct fpu *fpu); | ||
| 28 | extern void fpu__restore(struct fpu *fpu); | ||
| 29 | extern int fpu__restore_sig(void __user *buf, int ia32_frame); | ||
| 30 | extern void fpu__drop(struct fpu *fpu); | ||
| 31 | extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); | ||
| 32 | extern void fpu__clear(struct fpu *fpu); | ||
| 33 | extern int fpu__exception_code(struct fpu *fpu, int trap_nr); | ||
| 34 | extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate); | ||
| 35 | |||
| 36 | /* | ||
| 37 | * Boot time FPU initialization functions: | ||
| 38 | */ | ||
| 39 | extern void fpu__init_cpu(void); | ||
| 40 | extern void fpu__init_system_xstate(void); | ||
| 41 | extern void fpu__init_cpu_xstate(void); | ||
| 42 | extern void fpu__init_system(struct cpuinfo_x86 *c); | ||
| 43 | extern void fpu__init_check_bugs(void); | ||
| 44 | extern void fpu__resume_cpu(void); | ||
| 45 | |||
| 46 | /* | ||
| 47 | * Debugging facility: | ||
| 48 | */ | ||
| 49 | #ifdef CONFIG_X86_DEBUG_FPU | ||
| 50 | # define WARN_ON_FPU(x) WARN_ON_ONCE(x) | ||
| 51 | #else | ||
| 52 | # define WARN_ON_FPU(x) ({ (void)(x); 0; }) | ||
| 53 | #endif | ||
| 54 | |||
| 55 | /* | ||
| 56 | * FPU related CPU feature flag helper routines: | ||
| 57 | */ | ||
| 58 | static __always_inline __pure bool use_eager_fpu(void) | ||
| 59 | { | ||
| 60 | return static_cpu_has_safe(X86_FEATURE_EAGER_FPU); | ||
| 61 | } | ||
| 62 | |||
| 63 | static __always_inline __pure bool use_xsaveopt(void) | ||
| 64 | { | ||
| 65 | return static_cpu_has_safe(X86_FEATURE_XSAVEOPT); | ||
| 66 | } | ||
| 67 | |||
| 68 | static __always_inline __pure bool use_xsave(void) | ||
| 69 | { | ||
| 70 | return static_cpu_has_safe(X86_FEATURE_XSAVE); | ||
| 71 | } | ||
| 72 | |||
| 73 | static __always_inline __pure bool use_fxsr(void) | ||
| 74 | { | ||
| 75 | return static_cpu_has_safe(X86_FEATURE_FXSR); | ||
| 76 | } | ||
| 77 | |||
| 78 | /* | ||
| 79 | * fpstate handling functions: | ||
| 80 | */ | ||
| 81 | |||
| 82 | extern union fpregs_state init_fpstate; | ||
| 83 | |||
| 84 | extern void fpstate_init(union fpregs_state *state); | ||
| 85 | #ifdef CONFIG_MATH_EMULATION | ||
| 86 | extern void fpstate_init_soft(struct swregs_state *soft); | ||
| 87 | #else | ||
| 88 | static inline void fpstate_init_soft(struct swregs_state *soft) {} | ||
| 89 | #endif | ||
| 90 | static inline void fpstate_init_fxstate(struct fxregs_state *fx) | ||
| 91 | { | ||
| 92 | fx->cwd = 0x37f; | ||
| 93 | fx->mxcsr = MXCSR_DEFAULT; | ||
| 94 | } | ||
| 95 | extern void fpstate_sanitize_xstate(struct fpu *fpu); | ||
| 96 | |||
| 97 | #define user_insn(insn, output, input...) \ | ||
| 98 | ({ \ | ||
| 99 | int err; \ | ||
| 100 | asm volatile(ASM_STAC "\n" \ | ||
| 101 | "1:" #insn "\n\t" \ | ||
| 102 | "2: " ASM_CLAC "\n" \ | ||
| 103 | ".section .fixup,\"ax\"\n" \ | ||
| 104 | "3: movl $-1,%[err]\n" \ | ||
| 105 | " jmp 2b\n" \ | ||
| 106 | ".previous\n" \ | ||
| 107 | _ASM_EXTABLE(1b, 3b) \ | ||
| 108 | : [err] "=r" (err), output \ | ||
| 109 | : "0"(0), input); \ | ||
| 110 | err; \ | ||
| 111 | }) | ||
| 112 | |||
| 113 | #define check_insn(insn, output, input...) \ | ||
| 114 | ({ \ | ||
| 115 | int err; \ | ||
| 116 | asm volatile("1:" #insn "\n\t" \ | ||
| 117 | "2:\n" \ | ||
| 118 | ".section .fixup,\"ax\"\n" \ | ||
| 119 | "3: movl $-1,%[err]\n" \ | ||
| 120 | " jmp 2b\n" \ | ||
| 121 | ".previous\n" \ | ||
| 122 | _ASM_EXTABLE(1b, 3b) \ | ||
| 123 | : [err] "=r" (err), output \ | ||
| 124 | : "0"(0), input); \ | ||
| 125 | err; \ | ||
| 126 | }) | ||
| 127 | |||
| 128 | static inline int copy_fregs_to_user(struct fregs_state __user *fx) | ||
| 129 | { | ||
| 130 | return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); | ||
| 131 | } | ||
| 132 | |||
| 133 | static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) | ||
| 134 | { | ||
| 135 | if (config_enabled(CONFIG_X86_32)) | ||
| 136 | return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); | ||
| 137 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) | ||
| 138 | return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); | ||
| 139 | |||
| 140 | /* See comment in copy_fxregs_to_kernel() below. */ | ||
| 141 | return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); | ||
| 142 | } | ||
| 143 | |||
| 144 | static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) | ||
| 145 | { | ||
| 146 | int err; | ||
| 147 | |||
| 148 | if (config_enabled(CONFIG_X86_32)) { | ||
| 149 | err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); | ||
| 150 | } else { | ||
| 151 | if (config_enabled(CONFIG_AS_FXSAVEQ)) { | ||
| 152 | err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); | ||
| 153 | } else { | ||
| 154 | /* See comment in copy_fxregs_to_kernel() below. */ | ||
| 155 | err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); | ||
| 156 | } | ||
| 157 | } | ||
| 158 | /* Copying from a kernel buffer to FPU registers should never fail: */ | ||
| 159 | WARN_ON_FPU(err); | ||
| 160 | } | ||
| 161 | |||
| 162 | static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) | ||
| 163 | { | ||
| 164 | if (config_enabled(CONFIG_X86_32)) | ||
| 165 | return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); | ||
| 166 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) | ||
| 167 | return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); | ||
| 168 | |||
| 169 | /* See comment in copy_fxregs_to_kernel() below. */ | ||
| 170 | return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), | ||
| 171 | "m" (*fx)); | ||
| 172 | } | ||
| 173 | |||
| 174 | static inline void copy_kernel_to_fregs(struct fregs_state *fx) | ||
| 175 | { | ||
| 176 | int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); | ||
| 177 | |||
| 178 | WARN_ON_FPU(err); | ||
| 179 | } | ||
| 180 | |||
| 181 | static inline int copy_user_to_fregs(struct fregs_state __user *fx) | ||
| 182 | { | ||
| 183 | return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); | ||
| 184 | } | ||
| 185 | |||
| 186 | static inline void copy_fxregs_to_kernel(struct fpu *fpu) | ||
| 187 | { | ||
| 188 | if (config_enabled(CONFIG_X86_32)) | ||
| 189 | asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave)); | ||
| 190 | else if (config_enabled(CONFIG_AS_FXSAVEQ)) | ||
| 191 | asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); | ||
| 192 | else { | ||
| 193 | /* Using "rex64; fxsave %0" is broken because, if the memory | ||
| 194 | * operand uses any extended registers for addressing, a second | ||
| 195 | * REX prefix will be generated (to the assembler, rex64 | ||
| 196 | * followed by semicolon is a separate instruction), and hence | ||
| 197 | * the 64-bitness is lost. | ||
| 198 | * | ||
| 199 | * Using "fxsaveq %0" would be the ideal choice, but is only | ||
| 200 | * supported starting with gas 2.16. | ||
| 201 | * | ||
| 202 | * Using, as a workaround, the properly prefixed form below | ||
| 203 | * isn't accepted by any binutils version so far released, | ||
| 204 | * complaining that the same type of prefix is used twice if | ||
| 205 | * an extended register is needed for addressing (fix submitted | ||
| 206 | * to mainline 2005-11-21). | ||
| 207 | * | ||
| 208 | * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave)); | ||
| 209 | * | ||
| 210 | * This, however, we can work around by forcing the compiler to | ||
| 211 | * select an addressing mode that doesn't require extended | ||
| 212 | * registers. | ||
| 213 | */ | ||
| 214 | asm volatile( "rex64/fxsave (%[fx])" | ||
| 215 | : "=m" (fpu->state.fxsave) | ||
| 216 | : [fx] "R" (&fpu->state.fxsave)); | ||
| 217 | } | ||
| 218 | } | ||
| 219 | |||
| 220 | /* These macros all use (%edi)/(%rdi) as the single memory argument. */ | ||
| 221 | #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" | ||
| 222 | #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" | ||
| 223 | #define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" | ||
| 224 | #define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" | ||
| 225 | #define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" | ||
| 226 | |||
| 227 | /* xstate instruction fault handler: */ | ||
| 228 | #define xstate_fault(__err) \ | ||
| 229 | \ | ||
| 230 | ".section .fixup,\"ax\"\n" \ | ||
| 231 | \ | ||
| 232 | "3: movl $-2,%[_err]\n" \ | ||
| 233 | " jmp 2b\n" \ | ||
| 234 | \ | ||
| 235 | ".previous\n" \ | ||
| 236 | \ | ||
| 237 | _ASM_EXTABLE(1b, 3b) \ | ||
| 238 | : [_err] "=r" (__err) | ||
| 239 | |||
| 240 | /* | ||
| 241 | * This function is called only during boot time when x86 caps are not set | ||
| 242 | * up and alternative can not be used yet. | ||
| 243 | */ | ||
| 244 | static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) | ||
| 245 | { | ||
| 246 | u64 mask = -1; | ||
| 247 | u32 lmask = mask; | ||
| 248 | u32 hmask = mask >> 32; | ||
| 249 | int err = 0; | ||
| 250 | |||
| 251 | WARN_ON(system_state != SYSTEM_BOOTING); | ||
| 252 | |||
| 253 | if (boot_cpu_has(X86_FEATURE_XSAVES)) | ||
| 254 | asm volatile("1:"XSAVES"\n\t" | ||
| 255 | "2:\n\t" | ||
| 256 | xstate_fault(err) | ||
| 257 | : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) | ||
| 258 | : "memory"); | ||
| 259 | else | ||
| 260 | asm volatile("1:"XSAVE"\n\t" | ||
| 261 | "2:\n\t" | ||
| 262 | xstate_fault(err) | ||
| 263 | : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) | ||
| 264 | : "memory"); | ||
| 265 | |||
| 266 | /* We should never fault when copying to a kernel buffer: */ | ||
| 267 | WARN_ON_FPU(err); | ||
| 268 | } | ||
| 269 | |||
| 270 | /* | ||
| 271 | * This function is called only during boot time when x86 caps are not set | ||
| 272 | * up and alternative can not be used yet. | ||
| 273 | */ | ||
| 274 | static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) | ||
| 275 | { | ||
| 276 | u64 mask = -1; | ||
| 277 | u32 lmask = mask; | ||
| 278 | u32 hmask = mask >> 32; | ||
| 279 | int err = 0; | ||
| 280 | |||
| 281 | WARN_ON(system_state != SYSTEM_BOOTING); | ||
| 282 | |||
| 283 | if (boot_cpu_has(X86_FEATURE_XSAVES)) | ||
| 284 | asm volatile("1:"XRSTORS"\n\t" | ||
| 285 | "2:\n\t" | ||
| 286 | xstate_fault(err) | ||
| 287 | : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) | ||
| 288 | : "memory"); | ||
| 289 | else | ||
| 290 | asm volatile("1:"XRSTOR"\n\t" | ||
| 291 | "2:\n\t" | ||
| 292 | xstate_fault(err) | ||
| 293 | : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) | ||
| 294 | : "memory"); | ||
| 295 | |||
| 296 | /* We should never fault when copying from a kernel buffer: */ | ||
| 297 | WARN_ON_FPU(err); | ||
| 298 | } | ||
| 299 | |||
| 300 | /* | ||
| 301 | * Save processor xstate to xsave area. | ||
| 302 | */ | ||
| 303 | static inline void copy_xregs_to_kernel(struct xregs_state *xstate) | ||
| 304 | { | ||
| 305 | u64 mask = -1; | ||
| 306 | u32 lmask = mask; | ||
| 307 | u32 hmask = mask >> 32; | ||
| 308 | int err = 0; | ||
| 309 | |||
| 310 | WARN_ON(!alternatives_patched); | ||
| 311 | |||
| 312 | /* | ||
| 313 | * If xsaves is enabled, xsaves replaces xsaveopt because | ||
| 314 | * it supports compact format and supervisor states in addition to | ||
| 315 | * modified optimization in xsaveopt. | ||
| 316 | * | ||
| 317 | * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave | ||
| 318 | * because xsaveopt supports modified optimization which is not | ||
| 319 | * supported by xsave. | ||
| 320 | * | ||
| 321 | * If none of xsaves and xsaveopt is enabled, use xsave. | ||
| 322 | */ | ||
| 323 | alternative_input_2( | ||
| 324 | "1:"XSAVE, | ||
| 325 | XSAVEOPT, | ||
| 326 | X86_FEATURE_XSAVEOPT, | ||
| 327 | XSAVES, | ||
| 328 | X86_FEATURE_XSAVES, | ||
| 329 | [xstate] "D" (xstate), "a" (lmask), "d" (hmask) : | ||
| 330 | "memory"); | ||
| 331 | asm volatile("2:\n\t" | ||
| 332 | xstate_fault(err) | ||
| 333 | : "0" (err) | ||
| 334 | : "memory"); | ||
| 335 | |||
| 336 | /* We should never fault when copying to a kernel buffer: */ | ||
| 337 | WARN_ON_FPU(err); | ||
| 338 | } | ||
| 339 | |||
| 340 | /* | ||
| 341 | * Restore processor xstate from xsave area. | ||
| 342 | */ | ||
| 343 | static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) | ||
| 344 | { | ||
| 345 | u32 lmask = mask; | ||
| 346 | u32 hmask = mask >> 32; | ||
| 347 | int err = 0; | ||
| 348 | |||
| 349 | /* | ||
| 350 | * Use xrstors to restore context if it is enabled. xrstors supports | ||
| 351 | * compacted format of xsave area which is not supported by xrstor. | ||
| 352 | */ | ||
| 353 | alternative_input( | ||
| 354 | "1: " XRSTOR, | ||
| 355 | XRSTORS, | ||
| 356 | X86_FEATURE_XSAVES, | ||
| 357 | "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask) | ||
| 358 | : "memory"); | ||
| 359 | |||
| 360 | asm volatile("2:\n" | ||
| 361 | xstate_fault(err) | ||
| 362 | : "0" (err) | ||
| 363 | : "memory"); | ||
| 364 | |||
| 365 | /* We should never fault when copying from a kernel buffer: */ | ||
| 366 | WARN_ON_FPU(err); | ||
| 367 | } | ||
| 368 | |||
| 369 | /* | ||
| 370 | * Save xstate to user space xsave area. | ||
| 371 | * | ||
| 372 | * We don't use modified optimization because xrstor/xrstors might track | ||
| 373 | * a different application. | ||
| 374 | * | ||
| 375 | * We don't use compacted format xsave area for | ||
| 376 | * backward compatibility for old applications which don't understand | ||
| 377 | * compacted format of xsave area. | ||
| 378 | */ | ||
| 379 | static inline int copy_xregs_to_user(struct xregs_state __user *buf) | ||
| 380 | { | ||
| 381 | int err; | ||
| 382 | |||
| 383 | /* | ||
| 384 | * Clear the xsave header first, so that reserved fields are | ||
| 385 | * initialized to zero. | ||
| 386 | */ | ||
| 387 | err = __clear_user(&buf->header, sizeof(buf->header)); | ||
| 388 | if (unlikely(err)) | ||
| 389 | return -EFAULT; | ||
| 390 | |||
| 391 | __asm__ __volatile__(ASM_STAC "\n" | ||
| 392 | "1:"XSAVE"\n" | ||
| 393 | "2: " ASM_CLAC "\n" | ||
| 394 | xstate_fault(err) | ||
| 395 | : "D" (buf), "a" (-1), "d" (-1), "0" (err) | ||
| 396 | : "memory"); | ||
| 397 | return err; | ||
| 398 | } | ||
| 399 | |||
| 400 | /* | ||
| 401 | * Restore xstate from user space xsave area. | ||
| 402 | */ | ||
| 403 | static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) | ||
| 404 | { | ||
| 405 | struct xregs_state *xstate = ((__force struct xregs_state *)buf); | ||
| 406 | u32 lmask = mask; | ||
| 407 | u32 hmask = mask >> 32; | ||
| 408 | int err = 0; | ||
| 409 | |||
| 410 | __asm__ __volatile__(ASM_STAC "\n" | ||
| 411 | "1:"XRSTOR"\n" | ||
| 412 | "2: " ASM_CLAC "\n" | ||
| 413 | xstate_fault(err) | ||
| 414 | : "D" (xstate), "a" (lmask), "d" (hmask), "0" (err) | ||
| 415 | : "memory"); /* memory required? */ | ||
| 416 | return err; | ||
| 417 | } | ||
| 418 | |||
| 419 | /* | ||
| 420 | * These must be called with preempt disabled. Returns | ||
| 421 | * 'true' if the FPU state is still intact and we can | ||
| 422 | * keep registers active. | ||
| 423 | * | ||
| 424 | * The legacy FNSAVE instruction cleared all FPU state | ||
| 425 | * unconditionally, so registers are essentially destroyed. | ||
| 426 | * Modern FPU state can be kept in registers, if there are | ||
| 427 | * no pending FP exceptions. | ||
| 428 | */ | ||
| 429 | static inline int copy_fpregs_to_fpstate(struct fpu *fpu) | ||
| 430 | { | ||
| 431 | if (likely(use_xsave())) { | ||
| 432 | copy_xregs_to_kernel(&fpu->state.xsave); | ||
| 433 | return 1; | ||
| 434 | } | ||
| 435 | |||
| 436 | if (likely(use_fxsr())) { | ||
| 437 | copy_fxregs_to_kernel(fpu); | ||
| 438 | return 1; | ||
| 439 | } | ||
| 440 | |||
| 441 | /* | ||
| 442 | * Legacy FPU register saving, FNSAVE always clears FPU registers, | ||
| 443 | * so we have to mark them inactive: | ||
| 444 | */ | ||
| 445 | asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave)); | ||
| 446 | |||
| 447 | return 0; | ||
| 448 | } | ||
| 449 | |||
| 450 | static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate) | ||
| 451 | { | ||
| 452 | if (use_xsave()) { | ||
| 453 | copy_kernel_to_xregs(&fpstate->xsave, -1); | ||
| 454 | } else { | ||
| 455 | if (use_fxsr()) | ||
| 456 | copy_kernel_to_fxregs(&fpstate->fxsave); | ||
| 457 | else | ||
| 458 | copy_kernel_to_fregs(&fpstate->fsave); | ||
| 459 | } | ||
| 460 | } | ||
| 461 | |||
| 462 | static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) | ||
| 463 | { | ||
| 464 | /* | ||
| 465 | * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is | ||
| 466 | * pending. Clear the x87 state here by setting it to fixed values. | ||
| 467 | * "m" is a random variable that should be in L1. | ||
| 468 | */ | ||
| 469 | if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { | ||
| 470 | asm volatile( | ||
| 471 | "fnclex\n\t" | ||
| 472 | "emms\n\t" | ||
| 473 | "fildl %P[addr]" /* set F?P to defined value */ | ||
| 474 | : : [addr] "m" (fpstate)); | ||
| 475 | } | ||
| 476 | |||
| 477 | __copy_kernel_to_fpregs(fpstate); | ||
| 478 | } | ||
| 479 | |||
| 480 | extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); | ||
| 481 | |||
| 482 | /* | ||
| 483 | * FPU context switch related helper methods: | ||
| 484 | */ | ||
| 485 | |||
| 486 | DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); | ||
| 487 | |||
| 488 | /* | ||
| 489 | * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx, | ||
| 490 | * on this CPU. | ||
| 491 | * | ||
| 492 | * This will disable any lazy FPU state restore of the current FPU state, | ||
| 493 | * but if the current thread owns the FPU, it will still be saved by. | ||
| 494 | */ | ||
| 495 | static inline void __cpu_disable_lazy_restore(unsigned int cpu) | ||
| 496 | { | ||
| 497 | per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; | ||
| 498 | } | ||
| 499 | |||
| 500 | static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu) | ||
| 501 | { | ||
| 502 | return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; | ||
| 503 | } | ||
| 504 | |||
| 505 | |||
| 506 | /* | ||
| 507 | * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation' | ||
| 508 | * idiom, which is then paired with the sw-flag (fpregs_active) later on: | ||
| 509 | */ | ||
| 510 | |||
| 511 | static inline void __fpregs_activate_hw(void) | ||
| 512 | { | ||
| 513 | if (!use_eager_fpu()) | ||
| 514 | clts(); | ||
| 515 | } | ||
| 516 | |||
| 517 | static inline void __fpregs_deactivate_hw(void) | ||
| 518 | { | ||
| 519 | if (!use_eager_fpu()) | ||
| 520 | stts(); | ||
| 521 | } | ||
| 522 | |||
| 523 | /* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */ | ||
| 524 | static inline void __fpregs_deactivate(struct fpu *fpu) | ||
| 525 | { | ||
| 526 | WARN_ON_FPU(!fpu->fpregs_active); | ||
| 527 | |||
| 528 | fpu->fpregs_active = 0; | ||
| 529 | this_cpu_write(fpu_fpregs_owner_ctx, NULL); | ||
| 530 | } | ||
| 531 | |||
| 532 | /* Must be paired with a 'clts' (fpregs_activate_hw()) before! */ | ||
| 533 | static inline void __fpregs_activate(struct fpu *fpu) | ||
| 534 | { | ||
| 535 | WARN_ON_FPU(fpu->fpregs_active); | ||
| 536 | |||
| 537 | fpu->fpregs_active = 1; | ||
| 538 | this_cpu_write(fpu_fpregs_owner_ctx, fpu); | ||
| 539 | } | ||
| 540 | |||
| 541 | /* | ||
| 542 | * The question "does this thread have fpu access?" | ||
| 543 | * is slightly racy, since preemption could come in | ||
| 544 | * and revoke it immediately after the test. | ||
| 545 | * | ||
| 546 | * However, even in that very unlikely scenario, | ||
| 547 | * we can just assume we have FPU access - typically | ||
| 548 | * to save the FP state - we'll just take a #NM | ||
| 549 | * fault and get the FPU access back. | ||
| 550 | */ | ||
| 551 | static inline int fpregs_active(void) | ||
| 552 | { | ||
| 553 | return current->thread.fpu.fpregs_active; | ||
| 554 | } | ||
| 555 | |||
| 556 | /* | ||
| 557 | * Encapsulate the CR0.TS handling together with the | ||
| 558 | * software flag. | ||
| 559 | * | ||
| 560 | * These generally need preemption protection to work, | ||
| 561 | * do try to avoid using these on their own. | ||
| 562 | */ | ||
| 563 | static inline void fpregs_activate(struct fpu *fpu) | ||
| 564 | { | ||
| 565 | __fpregs_activate_hw(); | ||
| 566 | __fpregs_activate(fpu); | ||
| 567 | } | ||
| 568 | |||
| 569 | static inline void fpregs_deactivate(struct fpu *fpu) | ||
| 570 | { | ||
| 571 | __fpregs_deactivate(fpu); | ||
| 572 | __fpregs_deactivate_hw(); | ||
| 573 | } | ||
| 574 | |||
| 575 | /* | ||
| 576 | * FPU state switching for scheduling. | ||
| 577 | * | ||
| 578 | * This is a two-stage process: | ||
| 579 | * | ||
| 580 | * - switch_fpu_prepare() saves the old state and | ||
| 581 | * sets the new state of the CR0.TS bit. This is | ||
| 582 | * done within the context of the old process. | ||
| 583 | * | ||
| 584 | * - switch_fpu_finish() restores the new state as | ||
| 585 | * necessary. | ||
| 586 | */ | ||
| 587 | typedef struct { int preload; } fpu_switch_t; | ||
| 588 | |||
| 589 | static inline fpu_switch_t | ||
| 590 | switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) | ||
| 591 | { | ||
| 592 | fpu_switch_t fpu; | ||
| 593 | |||
| 594 | /* | ||
| 595 | * If the task has used the math, pre-load the FPU on xsave processors | ||
| 596 | * or if the past 5 consecutive context-switches used math. | ||
| 597 | */ | ||
| 598 | fpu.preload = new_fpu->fpstate_active && | ||
| 599 | (use_eager_fpu() || new_fpu->counter > 5); | ||
| 600 | |||
| 601 | if (old_fpu->fpregs_active) { | ||
| 602 | if (!copy_fpregs_to_fpstate(old_fpu)) | ||
| 603 | old_fpu->last_cpu = -1; | ||
| 604 | else | ||
| 605 | old_fpu->last_cpu = cpu; | ||
| 606 | |||
| 607 | /* But leave fpu_fpregs_owner_ctx! */ | ||
| 608 | old_fpu->fpregs_active = 0; | ||
| 609 | |||
| 610 | /* Don't change CR0.TS if we just switch! */ | ||
| 611 | if (fpu.preload) { | ||
| 612 | new_fpu->counter++; | ||
| 613 | __fpregs_activate(new_fpu); | ||
| 614 | prefetch(&new_fpu->state); | ||
| 615 | } else { | ||
| 616 | __fpregs_deactivate_hw(); | ||
| 617 | } | ||
| 618 | } else { | ||
| 619 | old_fpu->counter = 0; | ||
| 620 | old_fpu->last_cpu = -1; | ||
| 621 | if (fpu.preload) { | ||
| 622 | new_fpu->counter++; | ||
| 623 | if (fpu_want_lazy_restore(new_fpu, cpu)) | ||
| 624 | fpu.preload = 0; | ||
| 625 | else | ||
| 626 | prefetch(&new_fpu->state); | ||
| 627 | fpregs_activate(new_fpu); | ||
| 628 | } | ||
| 629 | } | ||
| 630 | return fpu; | ||
| 631 | } | ||
| 632 | |||
| 633 | /* | ||
| 634 | * Misc helper functions: | ||
| 635 | */ | ||
| 636 | |||
| 637 | /* | ||
| 638 | * By the time this gets called, we've already cleared CR0.TS and | ||
| 639 | * given the process the FPU if we are going to preload the FPU | ||
| 640 | * state - all we need to do is to conditionally restore the register | ||
| 641 | * state itself. | ||
| 642 | */ | ||
| 643 | static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch) | ||
| 644 | { | ||
| 645 | if (fpu_switch.preload) | ||
| 646 | copy_kernel_to_fpregs(&new_fpu->state); | ||
| 647 | } | ||
| 648 | |||
| 649 | /* | ||
| 650 | * Needs to be preemption-safe. | ||
| 651 | * | ||
| 652 | * NOTE! user_fpu_begin() must be used only immediately before restoring | ||
| 653 | * the save state. It does not do any saving/restoring on its own. In | ||
| 654 | * lazy FPU mode, it is just an optimization to avoid a #NM exception, | ||
| 655 | * the task can lose the FPU right after preempt_enable(). | ||
| 656 | */ | ||
| 657 | static inline void user_fpu_begin(void) | ||
| 658 | { | ||
| 659 | struct fpu *fpu = ¤t->thread.fpu; | ||
| 660 | |||
| 661 | preempt_disable(); | ||
| 662 | if (!fpregs_active()) | ||
| 663 | fpregs_activate(fpu); | ||
| 664 | preempt_enable(); | ||
| 665 | } | ||
| 666 | |||
| 667 | /* | ||
| 668 | * MXCSR and XCR definitions: | ||
| 669 | */ | ||
| 670 | |||
| 671 | extern unsigned int mxcsr_feature_mask; | ||
| 672 | |||
| 673 | #define XCR_XFEATURE_ENABLED_MASK 0x00000000 | ||
| 674 | |||
| 675 | static inline u64 xgetbv(u32 index) | ||
| 676 | { | ||
| 677 | u32 eax, edx; | ||
| 678 | |||
| 679 | asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ | ||
| 680 | : "=a" (eax), "=d" (edx) | ||
| 681 | : "c" (index)); | ||
| 682 | return eax + ((u64)edx << 32); | ||
| 683 | } | ||
| 684 | |||
| 685 | static inline void xsetbv(u32 index, u64 value) | ||
| 686 | { | ||
| 687 | u32 eax = value; | ||
| 688 | u32 edx = value >> 32; | ||
| 689 | |||
| 690 | asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */ | ||
| 691 | : : "a" (eax), "d" (edx), "c" (index)); | ||
| 692 | } | ||
| 693 | |||
| 694 | #endif /* _ASM_X86_FPU_INTERNAL_H */ | ||
diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h new file mode 100644 index 000000000000..39d3107ac6c7 --- /dev/null +++ b/arch/x86/include/asm/fpu/regset.h | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | /* | ||
| 2 | * FPU regset handling methods: | ||
| 3 | */ | ||
| 4 | #ifndef _ASM_X86_FPU_REGSET_H | ||
| 5 | #define _ASM_X86_FPU_REGSET_H | ||
| 6 | |||
| 7 | #include <linux/regset.h> | ||
| 8 | |||
| 9 | extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active; | ||
| 10 | extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, | ||
| 11 | xstateregs_get; | ||
| 12 | extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, | ||
| 13 | xstateregs_set; | ||
| 14 | |||
| 15 | /* | ||
| 16 | * xstateregs_active == regset_fpregs_active. Please refer to the comment | ||
| 17 | * at the definition of regset_fpregs_active. | ||
| 18 | */ | ||
| 19 | #define xstateregs_active regset_fpregs_active | ||
| 20 | |||
| 21 | #endif /* _ASM_X86_FPU_REGSET_H */ | ||
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h new file mode 100644 index 000000000000..7358e9d61f1e --- /dev/null +++ b/arch/x86/include/asm/fpu/signal.h | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | /* | ||
| 2 | * x86 FPU signal frame handling methods: | ||
| 3 | */ | ||
| 4 | #ifndef _ASM_X86_FPU_SIGNAL_H | ||
| 5 | #define _ASM_X86_FPU_SIGNAL_H | ||
| 6 | |||
| 7 | #ifdef CONFIG_X86_64 | ||
| 8 | # include <asm/sigcontext32.h> | ||
| 9 | # include <asm/user32.h> | ||
| 10 | struct ksignal; | ||
| 11 | int ia32_setup_rt_frame(int sig, struct ksignal *ksig, | ||
| 12 | compat_sigset_t *set, struct pt_regs *regs); | ||
| 13 | int ia32_setup_frame(int sig, struct ksignal *ksig, | ||
| 14 | compat_sigset_t *set, struct pt_regs *regs); | ||
| 15 | #else | ||
| 16 | # define user_i387_ia32_struct user_i387_struct | ||
| 17 | # define user32_fxsr_struct user_fxsr_struct | ||
| 18 | # define ia32_setup_frame __setup_frame | ||
| 19 | # define ia32_setup_rt_frame __setup_rt_frame | ||
| 20 | #endif | ||
| 21 | |||
| 22 | extern void convert_from_fxsr(struct user_i387_ia32_struct *env, | ||
| 23 | struct task_struct *tsk); | ||
| 24 | extern void convert_to_fxsr(struct task_struct *tsk, | ||
| 25 | const struct user_i387_ia32_struct *env); | ||
| 26 | |||
| 27 | unsigned long | ||
| 28 | fpu__alloc_mathframe(unsigned long sp, int ia32_frame, | ||
| 29 | unsigned long *buf_fx, unsigned long *size); | ||
| 30 | |||
| 31 | extern void fpu__init_prepare_fx_sw_frame(void); | ||
| 32 | |||
| 33 | #endif /* _ASM_X86_FPU_SIGNAL_H */ | ||
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h new file mode 100644 index 000000000000..0637826292de --- /dev/null +++ b/arch/x86/include/asm/fpu/types.h | |||
| @@ -0,0 +1,293 @@ | |||
| 1 | /* | ||
| 2 | * FPU data structures: | ||
| 3 | */ | ||
| 4 | #ifndef _ASM_X86_FPU_H | ||
| 5 | #define _ASM_X86_FPU_H | ||
| 6 | |||
| 7 | /* | ||
| 8 | * The legacy x87 FPU state format, as saved by FSAVE and | ||
| 9 | * restored by the FRSTOR instructions: | ||
| 10 | */ | ||
| 11 | struct fregs_state { | ||
| 12 | u32 cwd; /* FPU Control Word */ | ||
| 13 | u32 swd; /* FPU Status Word */ | ||
| 14 | u32 twd; /* FPU Tag Word */ | ||
| 15 | u32 fip; /* FPU IP Offset */ | ||
| 16 | u32 fcs; /* FPU IP Selector */ | ||
| 17 | u32 foo; /* FPU Operand Pointer Offset */ | ||
| 18 | u32 fos; /* FPU Operand Pointer Selector */ | ||
| 19 | |||
| 20 | /* 8*10 bytes for each FP-reg = 80 bytes: */ | ||
| 21 | u32 st_space[20]; | ||
| 22 | |||
| 23 | /* Software status information [not touched by FSAVE]: */ | ||
| 24 | u32 status; | ||
| 25 | }; | ||
| 26 | |||
| 27 | /* | ||
| 28 | * The legacy fx SSE/MMX FPU state format, as saved by FXSAVE and | ||
| 29 | * restored by the FXRSTOR instructions. It's similar to the FSAVE | ||
| 30 | * format, but differs in some areas, plus has extensions at | ||
| 31 | * the end for the XMM registers. | ||
| 32 | */ | ||
| 33 | struct fxregs_state { | ||
| 34 | u16 cwd; /* Control Word */ | ||
| 35 | u16 swd; /* Status Word */ | ||
| 36 | u16 twd; /* Tag Word */ | ||
| 37 | u16 fop; /* Last Instruction Opcode */ | ||
| 38 | union { | ||
| 39 | struct { | ||
| 40 | u64 rip; /* Instruction Pointer */ | ||
| 41 | u64 rdp; /* Data Pointer */ | ||
| 42 | }; | ||
| 43 | struct { | ||
| 44 | u32 fip; /* FPU IP Offset */ | ||
| 45 | u32 fcs; /* FPU IP Selector */ | ||
| 46 | u32 foo; /* FPU Operand Offset */ | ||
| 47 | u32 fos; /* FPU Operand Selector */ | ||
| 48 | }; | ||
| 49 | }; | ||
| 50 | u32 mxcsr; /* MXCSR Register State */ | ||
| 51 | u32 mxcsr_mask; /* MXCSR Mask */ | ||
| 52 | |||
| 53 | /* 8*16 bytes for each FP-reg = 128 bytes: */ | ||
| 54 | u32 st_space[32]; | ||
| 55 | |||
| 56 | /* 16*16 bytes for each XMM-reg = 256 bytes: */ | ||
| 57 | u32 xmm_space[64]; | ||
| 58 | |||
| 59 | u32 padding[12]; | ||
| 60 | |||
| 61 | union { | ||
| 62 | u32 padding1[12]; | ||
| 63 | u32 sw_reserved[12]; | ||
| 64 | }; | ||
| 65 | |||
| 66 | } __attribute__((aligned(16))); | ||
| 67 | |||
| 68 | /* Default value for fxregs_state.mxcsr: */ | ||
| 69 | #define MXCSR_DEFAULT 0x1f80 | ||
| 70 | |||
| 71 | /* | ||
| 72 | * Software based FPU emulation state. This is arbitrary really, | ||
| 73 | * it matches the x87 format to make it easier to understand: | ||
| 74 | */ | ||
| 75 | struct swregs_state { | ||
| 76 | u32 cwd; | ||
| 77 | u32 swd; | ||
| 78 | u32 twd; | ||
| 79 | u32 fip; | ||
| 80 | u32 fcs; | ||
| 81 | u32 foo; | ||
| 82 | u32 fos; | ||
| 83 | /* 8*10 bytes for each FP-reg = 80 bytes: */ | ||
| 84 | u32 st_space[20]; | ||
| 85 | u8 ftop; | ||
| 86 | u8 changed; | ||
| 87 | u8 lookahead; | ||
| 88 | u8 no_update; | ||
| 89 | u8 rm; | ||
| 90 | u8 alimit; | ||
| 91 | struct math_emu_info *info; | ||
| 92 | u32 entry_eip; | ||
| 93 | }; | ||
| 94 | |||
| 95 | /* | ||
| 96 | * List of XSAVE features Linux knows about: | ||
| 97 | */ | ||
| 98 | enum xfeature_bit { | ||
| 99 | XSTATE_BIT_FP, | ||
| 100 | XSTATE_BIT_SSE, | ||
| 101 | XSTATE_BIT_YMM, | ||
| 102 | XSTATE_BIT_BNDREGS, | ||
| 103 | XSTATE_BIT_BNDCSR, | ||
| 104 | XSTATE_BIT_OPMASK, | ||
| 105 | XSTATE_BIT_ZMM_Hi256, | ||
| 106 | XSTATE_BIT_Hi16_ZMM, | ||
| 107 | |||
| 108 | XFEATURES_NR_MAX, | ||
| 109 | }; | ||
| 110 | |||
| 111 | #define XSTATE_FP (1 << XSTATE_BIT_FP) | ||
| 112 | #define XSTATE_SSE (1 << XSTATE_BIT_SSE) | ||
| 113 | #define XSTATE_YMM (1 << XSTATE_BIT_YMM) | ||
| 114 | #define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS) | ||
| 115 | #define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR) | ||
| 116 | #define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK) | ||
| 117 | #define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256) | ||
| 118 | #define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM) | ||
| 119 | |||
| 120 | #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) | ||
| 121 | #define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) | ||
| 122 | |||
| 123 | /* | ||
| 124 | * There are 16x 256-bit AVX registers named YMM0-YMM15. | ||
| 125 | * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) | ||
| 126 | * and are stored in 'struct fxregs_state::xmm_space[]'. | ||
| 127 | * | ||
| 128 | * The high 128 bits are stored here: | ||
| 129 | * 16x 128 bits == 256 bytes. | ||
| 130 | */ | ||
| 131 | struct ymmh_struct { | ||
| 132 | u8 ymmh_space[256]; | ||
| 133 | }; | ||
| 134 | |||
| 135 | /* We don't support LWP yet: */ | ||
| 136 | struct lwp_struct { | ||
| 137 | u8 reserved[128]; | ||
| 138 | }; | ||
| 139 | |||
| 140 | /* Intel MPX support: */ | ||
| 141 | struct bndreg { | ||
| 142 | u64 lower_bound; | ||
| 143 | u64 upper_bound; | ||
| 144 | } __packed; | ||
| 145 | |||
| 146 | struct bndcsr { | ||
| 147 | u64 bndcfgu; | ||
| 148 | u64 bndstatus; | ||
| 149 | } __packed; | ||
| 150 | |||
| 151 | struct mpx_struct { | ||
| 152 | struct bndreg bndreg[4]; | ||
| 153 | struct bndcsr bndcsr; | ||
| 154 | }; | ||
| 155 | |||
| 156 | struct xstate_header { | ||
| 157 | u64 xfeatures; | ||
| 158 | u64 xcomp_bv; | ||
| 159 | u64 reserved[6]; | ||
| 160 | } __attribute__((packed)); | ||
| 161 | |||
| 162 | /* New processor state extensions should be added here: */ | ||
| 163 | #define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \ | ||
| 164 | sizeof(struct lwp_struct) + \ | ||
| 165 | sizeof(struct mpx_struct) ) | ||
| 166 | /* | ||
| 167 | * This is our most modern FPU state format, as saved by the XSAVE | ||
| 168 | * and restored by the XRSTOR instructions. | ||
| 169 | * | ||
| 170 | * It consists of a legacy fxregs portion, an xstate header and | ||
| 171 | * subsequent fixed size areas as defined by the xstate header. | ||
| 172 | * Not all CPUs support all the extensions. | ||
| 173 | */ | ||
| 174 | struct xregs_state { | ||
| 175 | struct fxregs_state i387; | ||
| 176 | struct xstate_header header; | ||
| 177 | u8 __reserved[XSTATE_RESERVE]; | ||
| 178 | } __attribute__ ((packed, aligned (64))); | ||
| 179 | |||
| 180 | /* | ||
| 181 | * This is a union of all the possible FPU state formats | ||
| 182 | * put together, so that we can pick the right one runtime. | ||
| 183 | * | ||
| 184 | * The size of the structure is determined by the largest | ||
| 185 | * member - which is the xsave area: | ||
| 186 | */ | ||
| 187 | union fpregs_state { | ||
| 188 | struct fregs_state fsave; | ||
| 189 | struct fxregs_state fxsave; | ||
| 190 | struct swregs_state soft; | ||
| 191 | struct xregs_state xsave; | ||
| 192 | }; | ||
| 193 | |||
| 194 | /* | ||
| 195 | * Highest level per task FPU state data structure that | ||
| 196 | * contains the FPU register state plus various FPU | ||
| 197 | * state fields: | ||
| 198 | */ | ||
| 199 | struct fpu { | ||
| 200 | /* | ||
| 201 | * @state: | ||
| 202 | * | ||
| 203 | * In-memory copy of all FPU registers that we save/restore | ||
| 204 | * over context switches. If the task is using the FPU then | ||
| 205 | * the registers in the FPU are more recent than this state | ||
| 206 | * copy. If the task context-switches away then they get | ||
| 207 | * saved here and represent the FPU state. | ||
| 208 | * | ||
| 209 | * After context switches there may be a (short) time period | ||
| 210 | * during which the in-FPU hardware registers are unchanged | ||
| 211 | * and still perfectly match this state, if the tasks | ||
| 212 | * scheduled afterwards are not using the FPU. | ||
| 213 | * | ||
| 214 | * This is the 'lazy restore' window of optimization, which | ||
| 215 | * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. | ||
| 216 | * | ||
| 217 | * We detect whether a subsequent task uses the FPU via setting | ||
| 218 | * CR0::TS to 1, which causes any FPU use to raise a #NM fault. | ||
| 219 | * | ||
| 220 | * During this window, if the task gets scheduled again, we | ||
| 221 | * might be able to skip having to do a restore from this | ||
| 222 | * memory buffer to the hardware registers - at the cost of | ||
| 223 | * incurring the overhead of #NM fault traps. | ||
| 224 | * | ||
| 225 | * Note that on modern CPUs that support the XSAVEOPT (or other | ||
| 226 | * optimized XSAVE instructions), we don't use #NM traps anymore, | ||
| 227 | * as the hardware can track whether FPU registers need saving | ||
| 228 | * or not. On such CPUs we activate the non-lazy ('eagerfpu') | ||
| 229 | * logic, which unconditionally saves/restores all FPU state | ||
| 230 | * across context switches. (if FPU state exists.) | ||
| 231 | */ | ||
| 232 | union fpregs_state state; | ||
| 233 | |||
| 234 | /* | ||
| 235 | * @last_cpu: | ||
| 236 | * | ||
| 237 | * Records the last CPU on which this context was loaded into | ||
| 238 | * FPU registers. (In the lazy-restore case we might be | ||
| 239 | * able to reuse FPU registers across multiple context switches | ||
| 240 | * this way, if no intermediate task used the FPU.) | ||
| 241 | * | ||
| 242 | * A value of -1 is used to indicate that the FPU state in context | ||
| 243 | * memory is newer than the FPU state in registers, and that the | ||
| 244 | * FPU state should be reloaded next time the task is run. | ||
| 245 | */ | ||
| 246 | unsigned int last_cpu; | ||
| 247 | |||
| 248 | /* | ||
| 249 | * @fpstate_active: | ||
| 250 | * | ||
| 251 | * This flag indicates whether this context is active: if the task | ||
| 252 | * is not running then we can restore from this context, if the task | ||
| 253 | * is running then we should save into this context. | ||
| 254 | */ | ||
| 255 | unsigned char fpstate_active; | ||
| 256 | |||
| 257 | /* | ||
| 258 | * @fpregs_active: | ||
| 259 | * | ||
| 260 | * This flag determines whether a given context is actively | ||
| 261 | * loaded into the FPU's registers and that those registers | ||
| 262 | * represent the task's current FPU state. | ||
| 263 | * | ||
| 264 | * Note the interaction with fpstate_active: | ||
| 265 | * | ||
| 266 | * # task does not use the FPU: | ||
| 267 | * fpstate_active == 0 | ||
| 268 | * | ||
| 269 | * # task uses the FPU and regs are active: | ||
| 270 | * fpstate_active == 1 && fpregs_active == 1 | ||
| 271 | * | ||
| 272 | * # the regs are inactive but still match fpstate: | ||
| 273 | * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu | ||
| 274 | * | ||
| 275 | * The third state is what we use for the lazy restore optimization | ||
| 276 | * on lazy-switching CPUs. | ||
| 277 | */ | ||
| 278 | unsigned char fpregs_active; | ||
| 279 | |||
| 280 | /* | ||
| 281 | * @counter: | ||
| 282 | * | ||
| 283 | * This counter contains the number of consecutive context switches | ||
| 284 | * during which the FPU stays used. If this is over a threshold, the | ||
| 285 | * lazy FPU restore logic becomes eager, to save the trap overhead. | ||
| 286 | * This is an unsigned char so that after 256 iterations the counter | ||
| 287 | * wraps and the context switch behavior turns lazy again; this is to | ||
| 288 | * deal with bursty apps that only use the FPU for a short time: | ||
| 289 | */ | ||
| 290 | unsigned char counter; | ||
| 291 | }; | ||
| 292 | |||
| 293 | #endif /* _ASM_X86_FPU_H */ | ||
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h new file mode 100644 index 000000000000..4656b25bb9a7 --- /dev/null +++ b/arch/x86/include/asm/fpu/xstate.h | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | #ifndef __ASM_X86_XSAVE_H | ||
| 2 | #define __ASM_X86_XSAVE_H | ||
| 3 | |||
| 4 | #include <linux/types.h> | ||
| 5 | #include <asm/processor.h> | ||
| 6 | #include <linux/uaccess.h> | ||
| 7 | |||
| 8 | /* Bit 63 of XCR0 is reserved for future expansion */ | ||
| 9 | #define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) | ||
| 10 | |||
| 11 | #define XSTATE_CPUID 0x0000000d | ||
| 12 | |||
| 13 | #define FXSAVE_SIZE 512 | ||
| 14 | |||
| 15 | #define XSAVE_HDR_SIZE 64 | ||
| 16 | #define XSAVE_HDR_OFFSET FXSAVE_SIZE | ||
| 17 | |||
| 18 | #define XSAVE_YMM_SIZE 256 | ||
| 19 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) | ||
| 20 | |||
| 21 | /* Supported features which support lazy state saving */ | ||
| 22 | #define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ | ||
| 23 | | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) | ||
| 24 | |||
| 25 | /* Supported features which require eager state saving */ | ||
| 26 | #define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) | ||
| 27 | |||
| 28 | /* All currently supported features */ | ||
| 29 | #define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER) | ||
| 30 | |||
| 31 | #ifdef CONFIG_X86_64 | ||
| 32 | #define REX_PREFIX "0x48, " | ||
| 33 | #else | ||
| 34 | #define REX_PREFIX | ||
| 35 | #endif | ||
| 36 | |||
| 37 | extern unsigned int xstate_size; | ||
| 38 | extern u64 xfeatures_mask; | ||
| 39 | extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; | ||
| 40 | |||
| 41 | extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); | ||
| 42 | |||
| 43 | void *get_xsave_addr(struct xregs_state *xsave, int xstate); | ||
| 44 | const void *get_xsave_field_ptr(int xstate_field); | ||
| 45 | |||
| 46 | #endif | ||
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h deleted file mode 100644 index 6eb6fcb83f63..000000000000 --- a/arch/x86/include/asm/i387.h +++ /dev/null | |||
| @@ -1,108 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1994 Linus Torvalds | ||
| 3 | * | ||
| 4 | * Pentium III FXSR, SSE support | ||
| 5 | * General FPU state handling cleanups | ||
| 6 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
| 7 | * x86-64 work by Andi Kleen 2002 | ||
| 8 | */ | ||
| 9 | |||
| 10 | #ifndef _ASM_X86_I387_H | ||
| 11 | #define _ASM_X86_I387_H | ||
| 12 | |||
| 13 | #ifndef __ASSEMBLY__ | ||
| 14 | |||
| 15 | #include <linux/sched.h> | ||
| 16 | #include <linux/hardirq.h> | ||
| 17 | |||
| 18 | struct pt_regs; | ||
| 19 | struct user_i387_struct; | ||
| 20 | |||
| 21 | extern int init_fpu(struct task_struct *child); | ||
| 22 | extern void fpu_finit(struct fpu *fpu); | ||
| 23 | extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); | ||
| 24 | extern void math_state_restore(void); | ||
| 25 | |||
| 26 | extern bool irq_fpu_usable(void); | ||
| 27 | |||
| 28 | /* | ||
| 29 | * Careful: __kernel_fpu_begin/end() must be called with preempt disabled | ||
| 30 | * and they don't touch the preempt state on their own. | ||
| 31 | * If you enable preemption after __kernel_fpu_begin(), preempt notifier | ||
| 32 | * should call the __kernel_fpu_end() to prevent the kernel/user FPU | ||
| 33 | * state from getting corrupted. KVM for example uses this model. | ||
| 34 | * | ||
| 35 | * All other cases use kernel_fpu_begin/end() which disable preemption | ||
| 36 | * during kernel FPU usage. | ||
| 37 | */ | ||
| 38 | extern void __kernel_fpu_begin(void); | ||
| 39 | extern void __kernel_fpu_end(void); | ||
| 40 | |||
| 41 | static inline void kernel_fpu_begin(void) | ||
| 42 | { | ||
| 43 | preempt_disable(); | ||
| 44 | WARN_ON_ONCE(!irq_fpu_usable()); | ||
| 45 | __kernel_fpu_begin(); | ||
| 46 | } | ||
| 47 | |||
| 48 | static inline void kernel_fpu_end(void) | ||
| 49 | { | ||
| 50 | __kernel_fpu_end(); | ||
| 51 | preempt_enable(); | ||
| 52 | } | ||
| 53 | |||
| 54 | /* Must be called with preempt disabled */ | ||
| 55 | extern void kernel_fpu_disable(void); | ||
| 56 | extern void kernel_fpu_enable(void); | ||
| 57 | |||
| 58 | /* | ||
| 59 | * Some instructions like VIA's padlock instructions generate a spurious | ||
| 60 | * DNA fault but don't modify SSE registers. And these instructions | ||
| 61 | * get used from interrupt context as well. To prevent these kernel instructions | ||
| 62 | * in interrupt context interacting wrongly with other user/kernel fpu usage, we | ||
| 63 | * should use them only in the context of irq_ts_save/restore() | ||
| 64 | */ | ||
| 65 | static inline int irq_ts_save(void) | ||
| 66 | { | ||
| 67 | /* | ||
| 68 | * If in process context and not atomic, we can take a spurious DNA fault. | ||
| 69 | * Otherwise, doing clts() in process context requires disabling preemption | ||
| 70 | * or some heavy lifting like kernel_fpu_begin() | ||
| 71 | */ | ||
| 72 | if (!in_atomic()) | ||
| 73 | return 0; | ||
| 74 | |||
| 75 | if (read_cr0() & X86_CR0_TS) { | ||
| 76 | clts(); | ||
| 77 | return 1; | ||
| 78 | } | ||
| 79 | |||
| 80 | return 0; | ||
| 81 | } | ||
| 82 | |||
| 83 | static inline void irq_ts_restore(int TS_state) | ||
| 84 | { | ||
| 85 | if (TS_state) | ||
| 86 | stts(); | ||
| 87 | } | ||
| 88 | |||
| 89 | /* | ||
| 90 | * The question "does this thread have fpu access?" | ||
| 91 | * is slightly racy, since preemption could come in | ||
| 92 | * and revoke it immediately after the test. | ||
| 93 | * | ||
| 94 | * However, even in that very unlikely scenario, | ||
| 95 | * we can just assume we have FPU access - typically | ||
| 96 | * to save the FP state - we'll just take a #NM | ||
| 97 | * fault and get the FPU access back. | ||
| 98 | */ | ||
| 99 | static inline int user_has_fpu(void) | ||
| 100 | { | ||
| 101 | return current->thread.fpu.has_fpu; | ||
| 102 | } | ||
| 103 | |||
| 104 | extern void unlazy_fpu(struct task_struct *tsk); | ||
| 105 | |||
| 106 | #endif /* __ASSEMBLY__ */ | ||
| 107 | |||
| 108 | #endif /* _ASM_X86_I387_H */ | ||
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f4a555beef19..f8c0ec3a4a97 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -1002,8 +1002,6 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); | |||
| 1002 | 1002 | ||
| 1003 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); | 1003 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); |
| 1004 | 1004 | ||
| 1005 | int fx_init(struct kvm_vcpu *vcpu); | ||
| 1006 | |||
| 1007 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 1005 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
| 1008 | const u8 *new, int bytes); | 1006 | const u8 *new, int bytes); |
| 1009 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); | 1007 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); |
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 883f6b933fa4..5e8daee7c5c9 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h | |||
| @@ -142,6 +142,19 @@ static inline void arch_exit_mmap(struct mm_struct *mm) | |||
| 142 | paravirt_arch_exit_mmap(mm); | 142 | paravirt_arch_exit_mmap(mm); |
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | #ifdef CONFIG_X86_64 | ||
| 146 | static inline bool is_64bit_mm(struct mm_struct *mm) | ||
| 147 | { | ||
| 148 | return !config_enabled(CONFIG_IA32_EMULATION) || | ||
| 149 | !(mm->context.ia32_compat == TIF_IA32); | ||
| 150 | } | ||
| 151 | #else | ||
| 152 | static inline bool is_64bit_mm(struct mm_struct *mm) | ||
| 153 | { | ||
| 154 | return false; | ||
| 155 | } | ||
| 156 | #endif | ||
| 157 | |||
| 145 | static inline void arch_bprm_mm_init(struct mm_struct *mm, | 158 | static inline void arch_bprm_mm_init(struct mm_struct *mm, |
| 146 | struct vm_area_struct *vma) | 159 | struct vm_area_struct *vma) |
| 147 | { | 160 | { |
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index a952a13d59a7..7a35495275a9 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h | |||
| @@ -13,55 +13,50 @@ | |||
| 13 | #define MPX_BNDCFG_ENABLE_FLAG 0x1 | 13 | #define MPX_BNDCFG_ENABLE_FLAG 0x1 |
| 14 | #define MPX_BD_ENTRY_VALID_FLAG 0x1 | 14 | #define MPX_BD_ENTRY_VALID_FLAG 0x1 |
| 15 | 15 | ||
| 16 | #ifdef CONFIG_X86_64 | 16 | /* |
| 17 | 17 | * The upper 28 bits [47:20] of the virtual address in 64-bit | |
| 18 | /* upper 28 bits [47:20] of the virtual address in 64-bit used to | 18 | * are used to index into bounds directory (BD). |
| 19 | * index into bounds directory (BD). | 19 | * |
| 20 | */ | 20 | * The directory is 2G (2^31) in size, and with 8-byte entries |
| 21 | #define MPX_BD_ENTRY_OFFSET 28 | 21 | * it has 2^28 entries. |
| 22 | #define MPX_BD_ENTRY_SHIFT 3 | ||
| 23 | /* bits [19:3] of the virtual address in 64-bit used to index into | ||
| 24 | * bounds table (BT). | ||
| 25 | */ | 22 | */ |
| 26 | #define MPX_BT_ENTRY_OFFSET 17 | 23 | #define MPX_BD_SIZE_BYTES_64 (1UL<<31) |
| 27 | #define MPX_BT_ENTRY_SHIFT 5 | 24 | #define MPX_BD_ENTRY_BYTES_64 8 |
| 28 | #define MPX_IGN_BITS 3 | 25 | #define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64) |
| 29 | #define MPX_BD_ENTRY_TAIL 3 | ||
| 30 | 26 | ||
| 31 | #else | 27 | /* |
| 32 | 28 | * The 32-bit directory is 4MB (2^22) in size, and with 4-byte | |
| 33 | #define MPX_BD_ENTRY_OFFSET 20 | 29 | * entries it has 2^20 entries. |
| 34 | #define MPX_BD_ENTRY_SHIFT 2 | 30 | */ |
| 35 | #define MPX_BT_ENTRY_OFFSET 10 | 31 | #define MPX_BD_SIZE_BYTES_32 (1UL<<22) |
| 36 | #define MPX_BT_ENTRY_SHIFT 4 | 32 | #define MPX_BD_ENTRY_BYTES_32 4 |
| 37 | #define MPX_IGN_BITS 2 | 33 | #define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32) |
| 38 | #define MPX_BD_ENTRY_TAIL 2 | ||
| 39 | 34 | ||
| 40 | #endif | 35 | /* |
| 36 | * A 64-bit table is 4MB total in size, and an entry is | ||
| 37 | * 4 64-bit pointers in size. | ||
| 38 | */ | ||
| 39 | #define MPX_BT_SIZE_BYTES_64 (1UL<<22) | ||
| 40 | #define MPX_BT_ENTRY_BYTES_64 32 | ||
| 41 | #define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64) | ||
| 41 | 42 | ||
| 42 | #define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT)) | 43 | /* |
| 43 | #define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT)) | 44 | * A 32-bit table is 16kB total in size, and an entry is |
| 45 | * 4 32-bit pointers in size. | ||
| 46 | */ | ||
| 47 | #define MPX_BT_SIZE_BYTES_32 (1UL<<14) | ||
| 48 | #define MPX_BT_ENTRY_BYTES_32 16 | ||
| 49 | #define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32) | ||
| 44 | 50 | ||
| 45 | #define MPX_BNDSTA_TAIL 2 | 51 | #define MPX_BNDSTA_TAIL 2 |
| 46 | #define MPX_BNDCFG_TAIL 12 | 52 | #define MPX_BNDCFG_TAIL 12 |
| 47 | #define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1)) | 53 | #define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1)) |
| 48 | #define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) | 54 | #define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) |
| 49 | #define MPX_BT_ADDR_MASK (~((1UL<<MPX_BD_ENTRY_TAIL)-1)) | ||
| 50 | |||
| 51 | #define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) | ||
| 52 | #define MPX_BNDSTA_ERROR_CODE 0x3 | 55 | #define MPX_BNDSTA_ERROR_CODE 0x3 |
| 53 | 56 | ||
| 54 | #define MPX_BD_ENTRY_MASK ((1<<MPX_BD_ENTRY_OFFSET)-1) | ||
| 55 | #define MPX_BT_ENTRY_MASK ((1<<MPX_BT_ENTRY_OFFSET)-1) | ||
| 56 | #define MPX_GET_BD_ENTRY_OFFSET(addr) ((((addr)>>(MPX_BT_ENTRY_OFFSET+ \ | ||
| 57 | MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT) | ||
| 58 | #define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \ | ||
| 59 | MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT) | ||
| 60 | |||
| 61 | #ifdef CONFIG_X86_INTEL_MPX | 57 | #ifdef CONFIG_X86_INTEL_MPX |
| 62 | siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, | 58 | siginfo_t *mpx_generate_siginfo(struct pt_regs *regs); |
| 63 | struct xsave_struct *xsave_buf); | 59 | int mpx_handle_bd_fault(void); |
| 64 | int mpx_handle_bd_fault(struct xsave_struct *xsave_buf); | ||
| 65 | static inline int kernel_managing_mpx_tables(struct mm_struct *mm) | 60 | static inline int kernel_managing_mpx_tables(struct mm_struct *mm) |
| 66 | { | 61 | { |
| 67 | return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR); | 62 | return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR); |
| @@ -77,12 +72,11 @@ static inline void mpx_mm_init(struct mm_struct *mm) | |||
| 77 | void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, | 72 | void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, |
| 78 | unsigned long start, unsigned long end); | 73 | unsigned long start, unsigned long end); |
| 79 | #else | 74 | #else |
| 80 | static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, | 75 | static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) |
| 81 | struct xsave_struct *xsave_buf) | ||
| 82 | { | 76 | { |
| 83 | return NULL; | 77 | return NULL; |
| 84 | } | 78 | } |
| 85 | static inline int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) | 79 | static inline int mpx_handle_bd_fault(void) |
| 86 | { | 80 | { |
| 87 | return -EINVAL; | 81 | return -EINVAL; |
| 88 | } | 82 | } |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9aa52fd13a78..43e6519df0d5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
| @@ -21,6 +21,7 @@ struct mm_struct; | |||
| 21 | #include <asm/desc_defs.h> | 21 | #include <asm/desc_defs.h> |
| 22 | #include <asm/nops.h> | 22 | #include <asm/nops.h> |
| 23 | #include <asm/special_insns.h> | 23 | #include <asm/special_insns.h> |
| 24 | #include <asm/fpu/types.h> | ||
| 24 | 25 | ||
| 25 | #include <linux/personality.h> | 26 | #include <linux/personality.h> |
| 26 | #include <linux/cpumask.h> | 27 | #include <linux/cpumask.h> |
| @@ -52,11 +53,16 @@ static inline void *current_text_addr(void) | |||
| 52 | return pc; | 53 | return pc; |
| 53 | } | 54 | } |
| 54 | 55 | ||
| 56 | /* | ||
| 57 | * These alignment constraints are for performance in the vSMP case, | ||
| 58 | * but in the task_struct case we must also meet hardware imposed | ||
| 59 | * alignment requirements of the FPU state: | ||
| 60 | */ | ||
| 55 | #ifdef CONFIG_X86_VSMP | 61 | #ifdef CONFIG_X86_VSMP |
| 56 | # define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) | 62 | # define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) |
| 57 | # define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) | 63 | # define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) |
| 58 | #else | 64 | #else |
| 59 | # define ARCH_MIN_TASKALIGN 16 | 65 | # define ARCH_MIN_TASKALIGN __alignof__(union fpregs_state) |
| 60 | # define ARCH_MIN_MMSTRUCT_ALIGN 0 | 66 | # define ARCH_MIN_MMSTRUCT_ALIGN 0 |
| 61 | #endif | 67 | #endif |
| 62 | 68 | ||
| @@ -166,7 +172,6 @@ extern const struct seq_operations cpuinfo_op; | |||
| 166 | #define cache_line_size() (boot_cpu_data.x86_cache_alignment) | 172 | #define cache_line_size() (boot_cpu_data.x86_cache_alignment) |
| 167 | 173 | ||
| 168 | extern void cpu_detect(struct cpuinfo_x86 *c); | 174 | extern void cpu_detect(struct cpuinfo_x86 *c); |
| 169 | extern void fpu_detect(struct cpuinfo_x86 *c); | ||
| 170 | 175 | ||
| 171 | extern void early_cpu_init(void); | 176 | extern void early_cpu_init(void); |
| 172 | extern void identify_boot_cpu(void); | 177 | extern void identify_boot_cpu(void); |
| @@ -313,128 +318,6 @@ struct orig_ist { | |||
| 313 | unsigned long ist[7]; | 318 | unsigned long ist[7]; |
| 314 | }; | 319 | }; |
| 315 | 320 | ||
| 316 | #define MXCSR_DEFAULT 0x1f80 | ||
| 317 | |||
| 318 | struct i387_fsave_struct { | ||
| 319 | u32 cwd; /* FPU Control Word */ | ||
| 320 | u32 swd; /* FPU Status Word */ | ||
| 321 | u32 twd; /* FPU Tag Word */ | ||
| 322 | u32 fip; /* FPU IP Offset */ | ||
| 323 | u32 fcs; /* FPU IP Selector */ | ||
| 324 | u32 foo; /* FPU Operand Pointer Offset */ | ||
| 325 | u32 fos; /* FPU Operand Pointer Selector */ | ||
| 326 | |||
| 327 | /* 8*10 bytes for each FP-reg = 80 bytes: */ | ||
| 328 | u32 st_space[20]; | ||
| 329 | |||
| 330 | /* Software status information [not touched by FSAVE ]: */ | ||
| 331 | u32 status; | ||
| 332 | }; | ||
| 333 | |||
| 334 | struct i387_fxsave_struct { | ||
| 335 | u16 cwd; /* Control Word */ | ||
| 336 | u16 swd; /* Status Word */ | ||
| 337 | u16 twd; /* Tag Word */ | ||
| 338 | u16 fop; /* Last Instruction Opcode */ | ||
| 339 | union { | ||
| 340 | struct { | ||
| 341 | u64 rip; /* Instruction Pointer */ | ||
| 342 | u64 rdp; /* Data Pointer */ | ||
| 343 | }; | ||
| 344 | struct { | ||
| 345 | u32 fip; /* FPU IP Offset */ | ||
| 346 | u32 fcs; /* FPU IP Selector */ | ||
| 347 | u32 foo; /* FPU Operand Offset */ | ||
| 348 | u32 fos; /* FPU Operand Selector */ | ||
| 349 | }; | ||
| 350 | }; | ||
| 351 | u32 mxcsr; /* MXCSR Register State */ | ||
| 352 | u32 mxcsr_mask; /* MXCSR Mask */ | ||
| 353 | |||
| 354 | /* 8*16 bytes for each FP-reg = 128 bytes: */ | ||
| 355 | u32 st_space[32]; | ||
| 356 | |||
| 357 | /* 16*16 bytes for each XMM-reg = 256 bytes: */ | ||
| 358 | u32 xmm_space[64]; | ||
| 359 | |||
| 360 | u32 padding[12]; | ||
| 361 | |||
| 362 | union { | ||
| 363 | u32 padding1[12]; | ||
| 364 | u32 sw_reserved[12]; | ||
| 365 | }; | ||
| 366 | |||
| 367 | } __attribute__((aligned(16))); | ||
| 368 | |||
| 369 | struct i387_soft_struct { | ||
| 370 | u32 cwd; | ||
| 371 | u32 swd; | ||
| 372 | u32 twd; | ||
| 373 | u32 fip; | ||
| 374 | u32 fcs; | ||
| 375 | u32 foo; | ||
| 376 | u32 fos; | ||
| 377 | /* 8*10 bytes for each FP-reg = 80 bytes: */ | ||
| 378 | u32 st_space[20]; | ||
| 379 | u8 ftop; | ||
| 380 | u8 changed; | ||
| 381 | u8 lookahead; | ||
| 382 | u8 no_update; | ||
| 383 | u8 rm; | ||
| 384 | u8 alimit; | ||
| 385 | struct math_emu_info *info; | ||
| 386 | u32 entry_eip; | ||
| 387 | }; | ||
| 388 | |||
| 389 | struct ymmh_struct { | ||
| 390 | /* 16 * 16 bytes for each YMMH-reg = 256 bytes */ | ||
| 391 | u32 ymmh_space[64]; | ||
| 392 | }; | ||
| 393 | |||
| 394 | /* We don't support LWP yet: */ | ||
| 395 | struct lwp_struct { | ||
| 396 | u8 reserved[128]; | ||
| 397 | }; | ||
| 398 | |||
| 399 | struct bndreg { | ||
| 400 | u64 lower_bound; | ||
| 401 | u64 upper_bound; | ||
| 402 | } __packed; | ||
| 403 | |||
| 404 | struct bndcsr { | ||
| 405 | u64 bndcfgu; | ||
| 406 | u64 bndstatus; | ||
| 407 | } __packed; | ||
| 408 | |||
| 409 | struct xsave_hdr_struct { | ||
| 410 | u64 xstate_bv; | ||
| 411 | u64 xcomp_bv; | ||
| 412 | u64 reserved[6]; | ||
| 413 | } __attribute__((packed)); | ||
| 414 | |||
| 415 | struct xsave_struct { | ||
| 416 | struct i387_fxsave_struct i387; | ||
| 417 | struct xsave_hdr_struct xsave_hdr; | ||
| 418 | struct ymmh_struct ymmh; | ||
| 419 | struct lwp_struct lwp; | ||
| 420 | struct bndreg bndreg[4]; | ||
| 421 | struct bndcsr bndcsr; | ||
| 422 | /* new processor state extensions will go here */ | ||
| 423 | } __attribute__ ((packed, aligned (64))); | ||
| 424 | |||
| 425 | union thread_xstate { | ||
| 426 | struct i387_fsave_struct fsave; | ||
| 427 | struct i387_fxsave_struct fxsave; | ||
| 428 | struct i387_soft_struct soft; | ||
| 429 | struct xsave_struct xsave; | ||
| 430 | }; | ||
| 431 | |||
| 432 | struct fpu { | ||
| 433 | unsigned int last_cpu; | ||
| 434 | unsigned int has_fpu; | ||
| 435 | union thread_xstate *state; | ||
| 436 | }; | ||
| 437 | |||
| 438 | #ifdef CONFIG_X86_64 | 321 | #ifdef CONFIG_X86_64 |
| 439 | DECLARE_PER_CPU(struct orig_ist, orig_ist); | 322 | DECLARE_PER_CPU(struct orig_ist, orig_ist); |
| 440 | 323 | ||
| @@ -483,8 +366,6 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack); | |||
| 483 | #endif /* X86_64 */ | 366 | #endif /* X86_64 */ |
| 484 | 367 | ||
| 485 | extern unsigned int xstate_size; | 368 | extern unsigned int xstate_size; |
| 486 | extern void free_thread_xstate(struct task_struct *); | ||
| 487 | extern struct kmem_cache *task_xstate_cachep; | ||
| 488 | 369 | ||
| 489 | struct perf_event; | 370 | struct perf_event; |
| 490 | 371 | ||
| @@ -508,6 +389,10 @@ struct thread_struct { | |||
| 508 | unsigned long fs; | 389 | unsigned long fs; |
| 509 | #endif | 390 | #endif |
| 510 | unsigned long gs; | 391 | unsigned long gs; |
| 392 | |||
| 393 | /* Floating point and extended processor state */ | ||
| 394 | struct fpu fpu; | ||
| 395 | |||
| 511 | /* Save middle states of ptrace breakpoints */ | 396 | /* Save middle states of ptrace breakpoints */ |
| 512 | struct perf_event *ptrace_bps[HBP_NUM]; | 397 | struct perf_event *ptrace_bps[HBP_NUM]; |
| 513 | /* Debug status used for traps, single steps, etc... */ | 398 | /* Debug status used for traps, single steps, etc... */ |
| @@ -518,8 +403,6 @@ struct thread_struct { | |||
| 518 | unsigned long cr2; | 403 | unsigned long cr2; |
| 519 | unsigned long trap_nr; | 404 | unsigned long trap_nr; |
| 520 | unsigned long error_code; | 405 | unsigned long error_code; |
| 521 | /* floating point and extended processor state */ | ||
| 522 | struct fpu fpu; | ||
| 523 | #ifdef CONFIG_X86_32 | 406 | #ifdef CONFIG_X86_32 |
| 524 | /* Virtual 86 mode info */ | 407 | /* Virtual 86 mode info */ |
| 525 | struct vm86_struct __user *vm86_info; | 408 | struct vm86_struct __user *vm86_info; |
| @@ -535,15 +418,6 @@ struct thread_struct { | |||
| 535 | unsigned long iopl; | 418 | unsigned long iopl; |
| 536 | /* Max allowed port in the bitmap, in bytes: */ | 419 | /* Max allowed port in the bitmap, in bytes: */ |
| 537 | unsigned io_bitmap_max; | 420 | unsigned io_bitmap_max; |
| 538 | /* | ||
| 539 | * fpu_counter contains the number of consecutive context switches | ||
| 540 | * that the FPU is used. If this is over a threshold, the lazy fpu | ||
| 541 | * saving becomes unlazy to save the trap. This is an unsigned char | ||
| 542 | * so that after 256 times the counter wraps and the behavior turns | ||
| 543 | * lazy again; this to deal with bursty apps that only use FPU for | ||
| 544 | * a short time | ||
| 545 | */ | ||
| 546 | unsigned char fpu_counter; | ||
| 547 | }; | 421 | }; |
| 548 | 422 | ||
| 549 | /* | 423 | /* |
| @@ -928,18 +802,18 @@ extern int get_tsc_mode(unsigned long adr); | |||
| 928 | extern int set_tsc_mode(unsigned int val); | 802 | extern int set_tsc_mode(unsigned int val); |
| 929 | 803 | ||
| 930 | /* Register/unregister a process' MPX related resource */ | 804 | /* Register/unregister a process' MPX related resource */ |
| 931 | #define MPX_ENABLE_MANAGEMENT(tsk) mpx_enable_management((tsk)) | 805 | #define MPX_ENABLE_MANAGEMENT() mpx_enable_management() |
| 932 | #define MPX_DISABLE_MANAGEMENT(tsk) mpx_disable_management((tsk)) | 806 | #define MPX_DISABLE_MANAGEMENT() mpx_disable_management() |
| 933 | 807 | ||
| 934 | #ifdef CONFIG_X86_INTEL_MPX | 808 | #ifdef CONFIG_X86_INTEL_MPX |
| 935 | extern int mpx_enable_management(struct task_struct *tsk); | 809 | extern int mpx_enable_management(void); |
| 936 | extern int mpx_disable_management(struct task_struct *tsk); | 810 | extern int mpx_disable_management(void); |
| 937 | #else | 811 | #else |
| 938 | static inline int mpx_enable_management(struct task_struct *tsk) | 812 | static inline int mpx_enable_management(void) |
| 939 | { | 813 | { |
| 940 | return -EINVAL; | 814 | return -EINVAL; |
| 941 | } | 815 | } |
| 942 | static inline int mpx_disable_management(struct task_struct *tsk) | 816 | static inline int mpx_disable_management(void) |
| 943 | { | 817 | { |
| 944 | return -EINVAL; | 818 | return -EINVAL; |
| 945 | } | 819 | } |
diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h index ee80b92f0096..6c8a7ed13365 100644 --- a/arch/x86/include/asm/simd.h +++ b/arch/x86/include/asm/simd.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | 1 | ||
| 2 | #include <asm/i387.h> | 2 | #include <asm/fpu/api.h> |
| 3 | 3 | ||
| 4 | /* | 4 | /* |
| 5 | * may_use_simd - whether it is allowable at this time to issue SIMD | 5 | * may_use_simd - whether it is allowable at this time to issue SIMD |
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 6a998598f172..c2e00bb2a136 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h | |||
| @@ -39,7 +39,9 @@ | |||
| 39 | #include <asm/processor.h> | 39 | #include <asm/processor.h> |
| 40 | #include <asm/percpu.h> | 40 | #include <asm/percpu.h> |
| 41 | #include <asm/desc.h> | 41 | #include <asm/desc.h> |
| 42 | |||
| 42 | #include <linux/random.h> | 43 | #include <linux/random.h> |
| 44 | #include <linux/sched.h> | ||
| 43 | 45 | ||
| 44 | /* | 46 | /* |
| 45 | * 24 byte read-only segment initializer for stack canary. Linker | 47 | * 24 byte read-only segment initializer for stack canary. Linker |
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 552d6c90a6d4..d1793f06854d 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #define _ASM_X86_SUSPEND_32_H | 7 | #define _ASM_X86_SUSPEND_32_H |
| 8 | 8 | ||
| 9 | #include <asm/desc.h> | 9 | #include <asm/desc.h> |
| 10 | #include <asm/i387.h> | 10 | #include <asm/fpu/api.h> |
| 11 | 11 | ||
| 12 | /* image of the saved processor state */ | 12 | /* image of the saved processor state */ |
| 13 | struct saved_context { | 13 | struct saved_context { |
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index bc6232834bab..7ebf0ebe4e68 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #define _ASM_X86_SUSPEND_64_H | 7 | #define _ASM_X86_SUSPEND_64_H |
| 8 | 8 | ||
| 9 | #include <asm/desc.h> | 9 | #include <asm/desc.h> |
| 10 | #include <asm/i387.h> | 10 | #include <asm/fpu/api.h> |
| 11 | 11 | ||
| 12 | /* | 12 | /* |
| 13 | * Image of the saved processor state, used by the low level ACPI suspend to | 13 | * Image of the saved processor state, used by the low level ACPI suspend to |
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h new file mode 100644 index 000000000000..173dd3ba108c --- /dev/null +++ b/arch/x86/include/asm/trace/mpx.h | |||
| @@ -0,0 +1,132 @@ | |||
| 1 | #undef TRACE_SYSTEM | ||
| 2 | #define TRACE_SYSTEM mpx | ||
| 3 | |||
| 4 | #if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ) | ||
| 5 | #define _TRACE_MPX_H | ||
| 6 | |||
| 7 | #include <linux/tracepoint.h> | ||
| 8 | |||
| 9 | #ifdef CONFIG_X86_INTEL_MPX | ||
| 10 | |||
| 11 | TRACE_EVENT(mpx_bounds_register_exception, | ||
| 12 | |||
| 13 | TP_PROTO(void *addr_referenced, | ||
| 14 | const struct bndreg *bndreg), | ||
| 15 | TP_ARGS(addr_referenced, bndreg), | ||
| 16 | |||
| 17 | TP_STRUCT__entry( | ||
| 18 | __field(void *, addr_referenced) | ||
| 19 | __field(u64, lower_bound) | ||
| 20 | __field(u64, upper_bound) | ||
| 21 | ), | ||
| 22 | |||
| 23 | TP_fast_assign( | ||
| 24 | __entry->addr_referenced = addr_referenced; | ||
| 25 | __entry->lower_bound = bndreg->lower_bound; | ||
| 26 | __entry->upper_bound = bndreg->upper_bound; | ||
| 27 | ), | ||
| 28 | /* | ||
| 29 | * Note that we are printing out the '~' of the upper | ||
| 30 | * bounds register here. It is actually stored in its | ||
| 31 | * one's complement form so that its 'init' state | ||
| 32 | * corresponds to all 0's. But, that looks like | ||
| 33 | * gibberish when printed out, so print out the 1's | ||
| 34 | * complement instead of the actual value here. Note | ||
| 35 | * though that you still need to specify filters for the | ||
| 36 | * actual value, not the displayed one. | ||
| 37 | */ | ||
| 38 | TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx", | ||
| 39 | __entry->addr_referenced, | ||
| 40 | __entry->lower_bound, | ||
| 41 | ~__entry->upper_bound | ||
| 42 | ) | ||
| 43 | ); | ||
| 44 | |||
| 45 | TRACE_EVENT(bounds_exception_mpx, | ||
| 46 | |||
| 47 | TP_PROTO(const struct bndcsr *bndcsr), | ||
| 48 | TP_ARGS(bndcsr), | ||
| 49 | |||
| 50 | TP_STRUCT__entry( | ||
| 51 | __field(u64, bndcfgu) | ||
| 52 | __field(u64, bndstatus) | ||
| 53 | ), | ||
| 54 | |||
| 55 | TP_fast_assign( | ||
| 56 | /* need to get rid of the 'const' on bndcsr */ | ||
| 57 | __entry->bndcfgu = (u64)bndcsr->bndcfgu; | ||
| 58 | __entry->bndstatus = (u64)bndcsr->bndstatus; | ||
| 59 | ), | ||
| 60 | |||
| 61 | TP_printk("bndcfgu:0x%llx bndstatus:0x%llx", | ||
| 62 | __entry->bndcfgu, | ||
| 63 | __entry->bndstatus) | ||
| 64 | ); | ||
| 65 | |||
| 66 | DECLARE_EVENT_CLASS(mpx_range_trace, | ||
| 67 | |||
| 68 | TP_PROTO(unsigned long start, | ||
| 69 | unsigned long end), | ||
| 70 | TP_ARGS(start, end), | ||
| 71 | |||
| 72 | TP_STRUCT__entry( | ||
| 73 | __field(unsigned long, start) | ||
| 74 | __field(unsigned long, end) | ||
| 75 | ), | ||
| 76 | |||
| 77 | TP_fast_assign( | ||
| 78 | __entry->start = start; | ||
| 79 | __entry->end = end; | ||
| 80 | ), | ||
| 81 | |||
| 82 | TP_printk("[0x%p:0x%p]", | ||
| 83 | (void *)__entry->start, | ||
| 84 | (void *)__entry->end | ||
| 85 | ) | ||
| 86 | ); | ||
| 87 | |||
| 88 | DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap, | ||
| 89 | TP_PROTO(unsigned long start, unsigned long end), | ||
| 90 | TP_ARGS(start, end) | ||
| 91 | ); | ||
| 92 | |||
| 93 | DEFINE_EVENT(mpx_range_trace, mpx_unmap_search, | ||
| 94 | TP_PROTO(unsigned long start, unsigned long end), | ||
| 95 | TP_ARGS(start, end) | ||
| 96 | ); | ||
| 97 | |||
| 98 | TRACE_EVENT(mpx_new_bounds_table, | ||
| 99 | |||
| 100 | TP_PROTO(unsigned long table_vaddr), | ||
| 101 | TP_ARGS(table_vaddr), | ||
| 102 | |||
| 103 | TP_STRUCT__entry( | ||
| 104 | __field(unsigned long, table_vaddr) | ||
| 105 | ), | ||
| 106 | |||
| 107 | TP_fast_assign( | ||
| 108 | __entry->table_vaddr = table_vaddr; | ||
| 109 | ), | ||
| 110 | |||
| 111 | TP_printk("table vaddr:%p", (void *)__entry->table_vaddr) | ||
| 112 | ); | ||
| 113 | |||
| 114 | #else | ||
| 115 | |||
| 116 | /* | ||
| 117 | * This gets used outside of MPX-specific code, so we need a stub. | ||
| 118 | */ | ||
| 119 | static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr) | ||
| 120 | { | ||
| 121 | } | ||
| 122 | |||
| 123 | #endif /* CONFIG_X86_INTEL_MPX */ | ||
| 124 | |||
| 125 | #undef TRACE_INCLUDE_PATH | ||
| 126 | #define TRACE_INCLUDE_PATH asm/trace/ | ||
| 127 | #undef TRACE_INCLUDE_FILE | ||
| 128 | #define TRACE_INCLUDE_FILE mpx | ||
| 129 | #endif /* _TRACE_MPX_H */ | ||
| 130 | |||
| 131 | /* This part must be outside protection */ | ||
| 132 | #include <trace/define_trace.h> | ||
diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h index ccab4af1646d..59a54e869f15 100644 --- a/arch/x86/include/asm/user.h +++ b/arch/x86/include/asm/user.h | |||
| @@ -14,8 +14,8 @@ struct user_ymmh_regs { | |||
| 14 | __u32 ymmh_space[64]; | 14 | __u32 ymmh_space[64]; |
| 15 | }; | 15 | }; |
| 16 | 16 | ||
| 17 | struct user_xsave_hdr { | 17 | struct user_xstate_header { |
| 18 | __u64 xstate_bv; | 18 | __u64 xfeatures; |
| 19 | __u64 reserved1[2]; | 19 | __u64 reserved1[2]; |
| 20 | __u64 reserved2[5]; | 20 | __u64 reserved2[5]; |
| 21 | }; | 21 | }; |
| @@ -41,11 +41,11 @@ struct user_xsave_hdr { | |||
| 41 | * particular process/thread. | 41 | * particular process/thread. |
| 42 | * | 42 | * |
| 43 | * Also when the user modifies certain state FP/SSE/etc through the | 43 | * Also when the user modifies certain state FP/SSE/etc through the |
| 44 | * ptrace interface, they must ensure that the xsave_hdr.xstate_bv | 44 | * ptrace interface, they must ensure that the header.xfeatures |
| 45 | * bytes[512..519] of the memory layout are updated correspondingly. | 45 | * bytes[512..519] of the memory layout are updated correspondingly. |
| 46 | * i.e., for example when FP state is modified to a non-init state, | 46 | * i.e., for example when FP state is modified to a non-init state, |
| 47 | * xsave_hdr.xstate_bv's bit 0 must be set to '1', when SSE is modified to | 47 | * header.xfeatures's bit 0 must be set to '1', when SSE is modified to |
| 48 | * non-init state, xsave_hdr.xstate_bv's bit 1 must to be set to '1', etc. | 48 | * non-init state, header.xfeatures's bit 1 must to be set to '1', etc. |
| 49 | */ | 49 | */ |
| 50 | #define USER_XSTATE_FX_SW_WORDS 6 | 50 | #define USER_XSTATE_FX_SW_WORDS 6 |
| 51 | #define USER_XSTATE_XCR0_WORD 0 | 51 | #define USER_XSTATE_XCR0_WORD 0 |
| @@ -55,7 +55,7 @@ struct user_xstateregs { | |||
| 55 | __u64 fpx_space[58]; | 55 | __u64 fpx_space[58]; |
| 56 | __u64 xstate_fx_sw[USER_XSTATE_FX_SW_WORDS]; | 56 | __u64 xstate_fx_sw[USER_XSTATE_FX_SW_WORDS]; |
| 57 | } i387; | 57 | } i387; |
| 58 | struct user_xsave_hdr xsave_hdr; | 58 | struct user_xstate_header header; |
| 59 | struct user_ymmh_regs ymmh; | 59 | struct user_ymmh_regs ymmh; |
| 60 | /* further processor state extensions go here */ | 60 | /* further processor state extensions go here */ |
| 61 | }; | 61 | }; |
diff --git a/arch/x86/include/asm/xcr.h b/arch/x86/include/asm/xcr.h deleted file mode 100644 index f2cba4e79a23..000000000000 --- a/arch/x86/include/asm/xcr.h +++ /dev/null | |||
| @@ -1,49 +0,0 @@ | |||
| 1 | /* -*- linux-c -*- ------------------------------------------------------- * | ||
| 2 | * | ||
| 3 | * Copyright 2008 rPath, Inc. - All Rights Reserved | ||
| 4 | * | ||
| 5 | * This file is part of the Linux kernel, and is made available under | ||
| 6 | * the terms of the GNU General Public License version 2 or (at your | ||
| 7 | * option) any later version; incorporated herein by reference. | ||
| 8 | * | ||
| 9 | * ----------------------------------------------------------------------- */ | ||
| 10 | |||
| 11 | /* | ||
| 12 | * asm-x86/xcr.h | ||
| 13 | * | ||
| 14 | * Definitions for the eXtended Control Register instructions | ||
| 15 | */ | ||
| 16 | |||
| 17 | #ifndef _ASM_X86_XCR_H | ||
| 18 | #define _ASM_X86_XCR_H | ||
| 19 | |||
| 20 | #define XCR_XFEATURE_ENABLED_MASK 0x00000000 | ||
| 21 | |||
| 22 | #ifdef __KERNEL__ | ||
| 23 | # ifndef __ASSEMBLY__ | ||
| 24 | |||
| 25 | #include <linux/types.h> | ||
| 26 | |||
| 27 | static inline u64 xgetbv(u32 index) | ||
| 28 | { | ||
| 29 | u32 eax, edx; | ||
| 30 | |||
| 31 | asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ | ||
| 32 | : "=a" (eax), "=d" (edx) | ||
| 33 | : "c" (index)); | ||
| 34 | return eax + ((u64)edx << 32); | ||
| 35 | } | ||
| 36 | |||
| 37 | static inline void xsetbv(u32 index, u64 value) | ||
| 38 | { | ||
| 39 | u32 eax = value; | ||
| 40 | u32 edx = value >> 32; | ||
| 41 | |||
| 42 | asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */ | ||
| 43 | : : "a" (eax), "d" (edx), "c" (index)); | ||
| 44 | } | ||
| 45 | |||
| 46 | # endif /* __ASSEMBLY__ */ | ||
| 47 | #endif /* __KERNEL__ */ | ||
| 48 | |||
| 49 | #endif /* _ASM_X86_XCR_H */ | ||
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index d8829751b3f8..1f5c5161ead6 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h | |||
| @@ -36,7 +36,7 @@ | |||
| 36 | * no advantages to be gotten from x86-64 here anyways. | 36 | * no advantages to be gotten from x86-64 here anyways. |
| 37 | */ | 37 | */ |
| 38 | 38 | ||
| 39 | #include <asm/i387.h> | 39 | #include <asm/fpu/api.h> |
| 40 | 40 | ||
| 41 | #ifdef CONFIG_X86_32 | 41 | #ifdef CONFIG_X86_32 |
| 42 | /* reduce register pressure */ | 42 | /* reduce register pressure */ |
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index ce05722e3c68..5a08bc8bff33 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h | |||
| @@ -26,7 +26,7 @@ | |||
| 26 | #define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" | 26 | #define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" |
| 27 | #define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" | 27 | #define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" |
| 28 | 28 | ||
| 29 | #include <asm/i387.h> | 29 | #include <asm/fpu/api.h> |
| 30 | 30 | ||
| 31 | static void | 31 | static void |
| 32 | xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | 32 | xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) |
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 492b29802f57..7c0a517ec751 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h | |||
| @@ -18,7 +18,7 @@ | |||
| 18 | #ifdef CONFIG_AS_AVX | 18 | #ifdef CONFIG_AS_AVX |
| 19 | 19 | ||
| 20 | #include <linux/compiler.h> | 20 | #include <linux/compiler.h> |
| 21 | #include <asm/i387.h> | 21 | #include <asm/fpu/api.h> |
| 22 | 22 | ||
| 23 | #define BLOCK4(i) \ | 23 | #define BLOCK4(i) \ |
| 24 | BLOCK(32 * i, 0) \ | 24 | BLOCK(32 * i, 0) \ |
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h deleted file mode 100644 index c9a6d68b8d62..000000000000 --- a/arch/x86/include/asm/xsave.h +++ /dev/null | |||
| @@ -1,257 +0,0 @@ | |||
| 1 | #ifndef __ASM_X86_XSAVE_H | ||
| 2 | #define __ASM_X86_XSAVE_H | ||
| 3 | |||
| 4 | #include <linux/types.h> | ||
| 5 | #include <asm/processor.h> | ||
| 6 | |||
| 7 | #define XSTATE_CPUID 0x0000000d | ||
| 8 | |||
| 9 | #define XSTATE_FP 0x1 | ||
| 10 | #define XSTATE_SSE 0x2 | ||
| 11 | #define XSTATE_YMM 0x4 | ||
| 12 | #define XSTATE_BNDREGS 0x8 | ||
| 13 | #define XSTATE_BNDCSR 0x10 | ||
| 14 | #define XSTATE_OPMASK 0x20 | ||
| 15 | #define XSTATE_ZMM_Hi256 0x40 | ||
| 16 | #define XSTATE_Hi16_ZMM 0x80 | ||
| 17 | |||
| 18 | #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) | ||
| 19 | #define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) | ||
| 20 | /* Bit 63 of XCR0 is reserved for future expansion */ | ||
| 21 | #define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) | ||
| 22 | |||
| 23 | #define FXSAVE_SIZE 512 | ||
| 24 | |||
| 25 | #define XSAVE_HDR_SIZE 64 | ||
| 26 | #define XSAVE_HDR_OFFSET FXSAVE_SIZE | ||
| 27 | |||
| 28 | #define XSAVE_YMM_SIZE 256 | ||
| 29 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) | ||
| 30 | |||
| 31 | /* Supported features which support lazy state saving */ | ||
| 32 | #define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ | ||
| 33 | | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) | ||
| 34 | |||
| 35 | /* Supported features which require eager state saving */ | ||
| 36 | #define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) | ||
| 37 | |||
| 38 | /* All currently supported features */ | ||
| 39 | #define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER) | ||
| 40 | |||
| 41 | #ifdef CONFIG_X86_64 | ||
| 42 | #define REX_PREFIX "0x48, " | ||
| 43 | #else | ||
| 44 | #define REX_PREFIX | ||
| 45 | #endif | ||
| 46 | |||
| 47 | extern unsigned int xstate_size; | ||
| 48 | extern u64 pcntxt_mask; | ||
| 49 | extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; | ||
| 50 | extern struct xsave_struct *init_xstate_buf; | ||
| 51 | |||
| 52 | extern void xsave_init(void); | ||
| 53 | extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); | ||
| 54 | extern int init_fpu(struct task_struct *child); | ||
| 55 | |||
| 56 | /* These macros all use (%edi)/(%rdi) as the single memory argument. */ | ||
| 57 | #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" | ||
| 58 | #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" | ||
| 59 | #define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" | ||
| 60 | #define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" | ||
| 61 | #define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" | ||
| 62 | |||
| 63 | #define xstate_fault ".section .fixup,\"ax\"\n" \ | ||
| 64 | "3: movl $-1,%[err]\n" \ | ||
| 65 | " jmp 2b\n" \ | ||
| 66 | ".previous\n" \ | ||
| 67 | _ASM_EXTABLE(1b, 3b) \ | ||
| 68 | : [err] "=r" (err) | ||
| 69 | |||
| 70 | /* | ||
| 71 | * This function is called only during boot time when x86 caps are not set | ||
| 72 | * up and alternative can not be used yet. | ||
| 73 | */ | ||
| 74 | static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask) | ||
| 75 | { | ||
| 76 | u32 lmask = mask; | ||
| 77 | u32 hmask = mask >> 32; | ||
| 78 | int err = 0; | ||
| 79 | |||
| 80 | WARN_ON(system_state != SYSTEM_BOOTING); | ||
| 81 | |||
| 82 | if (boot_cpu_has(X86_FEATURE_XSAVES)) | ||
| 83 | asm volatile("1:"XSAVES"\n\t" | ||
| 84 | "2:\n\t" | ||
| 85 | xstate_fault | ||
| 86 | : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) | ||
| 87 | : "memory"); | ||
| 88 | else | ||
| 89 | asm volatile("1:"XSAVE"\n\t" | ||
| 90 | "2:\n\t" | ||
| 91 | xstate_fault | ||
| 92 | : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) | ||
| 93 | : "memory"); | ||
| 94 | return err; | ||
| 95 | } | ||
| 96 | |||
| 97 | /* | ||
| 98 | * This function is called only during boot time when x86 caps are not set | ||
| 99 | * up and alternative can not be used yet. | ||
| 100 | */ | ||
| 101 | static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask) | ||
| 102 | { | ||
| 103 | u32 lmask = mask; | ||
| 104 | u32 hmask = mask >> 32; | ||
| 105 | int err = 0; | ||
| 106 | |||
| 107 | WARN_ON(system_state != SYSTEM_BOOTING); | ||
| 108 | |||
| 109 | if (boot_cpu_has(X86_FEATURE_XSAVES)) | ||
| 110 | asm volatile("1:"XRSTORS"\n\t" | ||
| 111 | "2:\n\t" | ||
| 112 | xstate_fault | ||
| 113 | : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) | ||
| 114 | : "memory"); | ||
| 115 | else | ||
| 116 | asm volatile("1:"XRSTOR"\n\t" | ||
| 117 | "2:\n\t" | ||
| 118 | xstate_fault | ||
| 119 | : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) | ||
| 120 | : "memory"); | ||
| 121 | return err; | ||
| 122 | } | ||
| 123 | |||
| 124 | /* | ||
| 125 | * Save processor xstate to xsave area. | ||
| 126 | */ | ||
| 127 | static inline int xsave_state(struct xsave_struct *fx, u64 mask) | ||
| 128 | { | ||
| 129 | u32 lmask = mask; | ||
| 130 | u32 hmask = mask >> 32; | ||
| 131 | int err = 0; | ||
| 132 | |||
| 133 | /* | ||
| 134 | * If xsaves is enabled, xsaves replaces xsaveopt because | ||
| 135 | * it supports compact format and supervisor states in addition to | ||
| 136 | * modified optimization in xsaveopt. | ||
| 137 | * | ||
| 138 | * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave | ||
| 139 | * because xsaveopt supports modified optimization which is not | ||
| 140 | * supported by xsave. | ||
| 141 | * | ||
| 142 | * If none of xsaves and xsaveopt is enabled, use xsave. | ||
| 143 | */ | ||
| 144 | alternative_input_2( | ||
| 145 | "1:"XSAVE, | ||
| 146 | XSAVEOPT, | ||
| 147 | X86_FEATURE_XSAVEOPT, | ||
| 148 | XSAVES, | ||
| 149 | X86_FEATURE_XSAVES, | ||
| 150 | [fx] "D" (fx), "a" (lmask), "d" (hmask) : | ||
| 151 | "memory"); | ||
| 152 | asm volatile("2:\n\t" | ||
| 153 | xstate_fault | ||
| 154 | : "0" (0) | ||
| 155 | : "memory"); | ||
| 156 | |||
| 157 | return err; | ||
| 158 | } | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Restore processor xstate from xsave area. | ||
| 162 | */ | ||
| 163 | static inline int xrstor_state(struct xsave_struct *fx, u64 mask) | ||
| 164 | { | ||
| 165 | int err = 0; | ||
| 166 | u32 lmask = mask; | ||
| 167 | u32 hmask = mask >> 32; | ||
| 168 | |||
| 169 | /* | ||
| 170 | * Use xrstors to restore context if it is enabled. xrstors supports | ||
| 171 | * compacted format of xsave area which is not supported by xrstor. | ||
| 172 | */ | ||
| 173 | alternative_input( | ||
| 174 | "1: " XRSTOR, | ||
| 175 | XRSTORS, | ||
| 176 | X86_FEATURE_XSAVES, | ||
| 177 | "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) | ||
| 178 | : "memory"); | ||
| 179 | |||
| 180 | asm volatile("2:\n" | ||
| 181 | xstate_fault | ||
| 182 | : "0" (0) | ||
| 183 | : "memory"); | ||
| 184 | |||
| 185 | return err; | ||
| 186 | } | ||
| 187 | |||
| 188 | /* | ||
| 189 | * Save xstate context for old process during context switch. | ||
| 190 | */ | ||
| 191 | static inline void fpu_xsave(struct fpu *fpu) | ||
| 192 | { | ||
| 193 | xsave_state(&fpu->state->xsave, -1); | ||
| 194 | } | ||
| 195 | |||
| 196 | /* | ||
| 197 | * Restore xstate context for new process during context switch. | ||
| 198 | */ | ||
| 199 | static inline int fpu_xrstor_checking(struct xsave_struct *fx) | ||
| 200 | { | ||
| 201 | return xrstor_state(fx, -1); | ||
| 202 | } | ||
| 203 | |||
| 204 | /* | ||
| 205 | * Save xstate to user space xsave area. | ||
| 206 | * | ||
| 207 | * We don't use modified optimization because xrstor/xrstors might track | ||
| 208 | * a different application. | ||
| 209 | * | ||
| 210 | * We don't use compacted format xsave area for | ||
| 211 | * backward compatibility for old applications which don't understand | ||
| 212 | * compacted format of xsave area. | ||
| 213 | */ | ||
| 214 | static inline int xsave_user(struct xsave_struct __user *buf) | ||
| 215 | { | ||
| 216 | int err; | ||
| 217 | |||
| 218 | /* | ||
| 219 | * Clear the xsave header first, so that reserved fields are | ||
| 220 | * initialized to zero. | ||
| 221 | */ | ||
| 222 | err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr)); | ||
| 223 | if (unlikely(err)) | ||
| 224 | return -EFAULT; | ||
| 225 | |||
| 226 | __asm__ __volatile__(ASM_STAC "\n" | ||
| 227 | "1:"XSAVE"\n" | ||
| 228 | "2: " ASM_CLAC "\n" | ||
| 229 | xstate_fault | ||
| 230 | : "D" (buf), "a" (-1), "d" (-1), "0" (0) | ||
| 231 | : "memory"); | ||
| 232 | return err; | ||
| 233 | } | ||
| 234 | |||
| 235 | /* | ||
| 236 | * Restore xstate from user space xsave area. | ||
| 237 | */ | ||
| 238 | static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) | ||
| 239 | { | ||
| 240 | int err = 0; | ||
| 241 | struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); | ||
| 242 | u32 lmask = mask; | ||
| 243 | u32 hmask = mask >> 32; | ||
| 244 | |||
| 245 | __asm__ __volatile__(ASM_STAC "\n" | ||
| 246 | "1:"XRSTOR"\n" | ||
| 247 | "2: " ASM_CLAC "\n" | ||
| 248 | xstate_fault | ||
| 249 | : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) | ||
| 250 | : "memory"); /* memory required? */ | ||
| 251 | return err; | ||
| 252 | } | ||
| 253 | |||
| 254 | void *get_xsave_addr(struct xsave_struct *xsave, int xstate); | ||
| 255 | void setup_xstate_comp(void); | ||
| 256 | |||
| 257 | #endif | ||
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index 16dc4e8a2cd3..0e8a973de9ee 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h | |||
| @@ -25,7 +25,7 @@ struct _fpx_sw_bytes { | |||
| 25 | __u32 extended_size; /* total size of the layout referred by | 25 | __u32 extended_size; /* total size of the layout referred by |
| 26 | * fpstate pointer in the sigcontext. | 26 | * fpstate pointer in the sigcontext. |
| 27 | */ | 27 | */ |
| 28 | __u64 xstate_bv; | 28 | __u64 xfeatures; |
| 29 | /* feature bit mask (including fp/sse/extended | 29 | /* feature bit mask (including fp/sse/extended |
| 30 | * state) that is present in the memory | 30 | * state) that is present in the memory |
| 31 | * layout. | 31 | * layout. |
| @@ -209,8 +209,8 @@ struct sigcontext { | |||
| 209 | 209 | ||
| 210 | #endif /* !__i386__ */ | 210 | #endif /* !__i386__ */ |
| 211 | 211 | ||
| 212 | struct _xsave_hdr { | 212 | struct _header { |
| 213 | __u64 xstate_bv; | 213 | __u64 xfeatures; |
| 214 | __u64 reserved1[2]; | 214 | __u64 reserved1[2]; |
| 215 | __u64 reserved2[5]; | 215 | __u64 reserved2[5]; |
| 216 | }; | 216 | }; |
| @@ -228,7 +228,7 @@ struct _ymmh_state { | |||
| 228 | */ | 228 | */ |
| 229 | struct _xstate { | 229 | struct _xstate { |
| 230 | struct _fpstate fpstate; | 230 | struct _fpstate fpstate; |
| 231 | struct _xsave_hdr xstate_hdr; | 231 | struct _header xstate_hdr; |
| 232 | struct _ymmh_state ymmh; | 232 | struct _ymmh_state ymmh; |
| 233 | /* new processor state extensions go here */ | 233 | /* new processor state extensions go here */ |
| 234 | }; | 234 | }; |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9bcd0b56ca17..febaf180621b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
| @@ -44,7 +44,7 @@ obj-y += pci-iommu_table.o | |||
| 44 | obj-y += resource.o | 44 | obj-y += resource.o |
| 45 | 45 | ||
| 46 | obj-y += process.o | 46 | obj-y += process.o |
| 47 | obj-y += i387.o xsave.o | 47 | obj-y += fpu/ |
| 48 | obj-y += ptrace.o | 48 | obj-y += ptrace.o |
| 49 | obj-$(CONFIG_X86_32) += tls.o | 49 | obj-$(CONFIG_X86_32) += tls.o |
| 50 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 50 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index aef653193160..7fe097235376 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
| @@ -21,6 +21,10 @@ | |||
| 21 | #include <asm/io.h> | 21 | #include <asm/io.h> |
| 22 | #include <asm/fixmap.h> | 22 | #include <asm/fixmap.h> |
| 23 | 23 | ||
| 24 | int __read_mostly alternatives_patched; | ||
| 25 | |||
| 26 | EXPORT_SYMBOL_GPL(alternatives_patched); | ||
| 27 | |||
| 24 | #define MAX_PATCH_LEN (255-1) | 28 | #define MAX_PATCH_LEN (255-1) |
| 25 | 29 | ||
| 26 | static int __initdata_or_module debug_alternative; | 30 | static int __initdata_or_module debug_alternative; |
| @@ -627,6 +631,7 @@ void __init alternative_instructions(void) | |||
| 627 | apply_paravirt(__parainstructions, __parainstructions_end); | 631 | apply_paravirt(__parainstructions, __parainstructions_end); |
| 628 | 632 | ||
| 629 | restart_nmi(); | 633 | restart_nmi(); |
| 634 | alternatives_patched = 1; | ||
| 630 | } | 635 | } |
| 631 | 636 | ||
| 632 | /** | 637 | /** |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 03445346ee0a..bd17db15a2c1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
| @@ -12,57 +12,11 @@ | |||
| 12 | #include <asm/bugs.h> | 12 | #include <asm/bugs.h> |
| 13 | #include <asm/processor.h> | 13 | #include <asm/processor.h> |
| 14 | #include <asm/processor-flags.h> | 14 | #include <asm/processor-flags.h> |
| 15 | #include <asm/i387.h> | 15 | #include <asm/fpu/internal.h> |
| 16 | #include <asm/msr.h> | 16 | #include <asm/msr.h> |
| 17 | #include <asm/paravirt.h> | 17 | #include <asm/paravirt.h> |
| 18 | #include <asm/alternative.h> | 18 | #include <asm/alternative.h> |
| 19 | 19 | ||
| 20 | static double __initdata x = 4195835.0; | ||
| 21 | static double __initdata y = 3145727.0; | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This used to check for exceptions.. | ||
| 25 | * However, it turns out that to support that, | ||
| 26 | * the XMM trap handlers basically had to | ||
| 27 | * be buggy. So let's have a correct XMM trap | ||
| 28 | * handler, and forget about printing out | ||
| 29 | * some status at boot. | ||
| 30 | * | ||
| 31 | * We should really only care about bugs here | ||
| 32 | * anyway. Not features. | ||
| 33 | */ | ||
| 34 | static void __init check_fpu(void) | ||
| 35 | { | ||
| 36 | s32 fdiv_bug; | ||
| 37 | |||
| 38 | kernel_fpu_begin(); | ||
| 39 | |||
| 40 | /* | ||
| 41 | * trap_init() enabled FXSR and company _before_ testing for FP | ||
| 42 | * problems here. | ||
| 43 | * | ||
| 44 | * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug | ||
| 45 | */ | ||
| 46 | __asm__("fninit\n\t" | ||
| 47 | "fldl %1\n\t" | ||
| 48 | "fdivl %2\n\t" | ||
| 49 | "fmull %2\n\t" | ||
| 50 | "fldl %1\n\t" | ||
| 51 | "fsubp %%st,%%st(1)\n\t" | ||
| 52 | "fistpl %0\n\t" | ||
| 53 | "fwait\n\t" | ||
| 54 | "fninit" | ||
| 55 | : "=m" (*&fdiv_bug) | ||
| 56 | : "m" (*&x), "m" (*&y)); | ||
| 57 | |||
| 58 | kernel_fpu_end(); | ||
| 59 | |||
| 60 | if (fdiv_bug) { | ||
| 61 | set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV); | ||
| 62 | pr_warn("Hmm, FPU with FDIV bug\n"); | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | void __init check_bugs(void) | 20 | void __init check_bugs(void) |
| 67 | { | 21 | { |
| 68 | identify_boot_cpu(); | 22 | identify_boot_cpu(); |
| @@ -85,10 +39,5 @@ void __init check_bugs(void) | |||
| 85 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); | 39 | '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); |
| 86 | alternative_instructions(); | 40 | alternative_instructions(); |
| 87 | 41 | ||
| 88 | /* | 42 | fpu__init_check_bugs(); |
| 89 | * kernel_fpu_begin/end() in check_fpu() relies on the patched | ||
| 90 | * alternative instructions. | ||
| 91 | */ | ||
| 92 | if (cpu_has_fpu) | ||
| 93 | check_fpu(); | ||
| 94 | } | 43 | } |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 351197cbbc8e..b28e5262a0a5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
| @@ -32,8 +32,7 @@ | |||
| 32 | #include <asm/setup.h> | 32 | #include <asm/setup.h> |
| 33 | #include <asm/apic.h> | 33 | #include <asm/apic.h> |
| 34 | #include <asm/desc.h> | 34 | #include <asm/desc.h> |
| 35 | #include <asm/i387.h> | 35 | #include <asm/fpu/internal.h> |
| 36 | #include <asm/fpu-internal.h> | ||
| 37 | #include <asm/mtrr.h> | 36 | #include <asm/mtrr.h> |
| 38 | #include <linux/numa.h> | 37 | #include <linux/numa.h> |
| 39 | #include <asm/asm.h> | 38 | #include <asm/asm.h> |
| @@ -146,32 +145,21 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | |||
| 146 | } }; | 145 | } }; |
| 147 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | 146 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); |
| 148 | 147 | ||
| 149 | static int __init x86_xsave_setup(char *s) | 148 | static int __init x86_mpx_setup(char *s) |
| 150 | { | 149 | { |
| 150 | /* require an exact match without trailing characters */ | ||
| 151 | if (strlen(s)) | 151 | if (strlen(s)) |
| 152 | return 0; | 152 | return 0; |
| 153 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | ||
| 154 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
| 155 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | ||
| 156 | setup_clear_cpu_cap(X86_FEATURE_AVX); | ||
| 157 | setup_clear_cpu_cap(X86_FEATURE_AVX2); | ||
| 158 | return 1; | ||
| 159 | } | ||
| 160 | __setup("noxsave", x86_xsave_setup); | ||
| 161 | 153 | ||
| 162 | static int __init x86_xsaveopt_setup(char *s) | 154 | /* do not emit a message if the feature is not present */ |
| 163 | { | 155 | if (!boot_cpu_has(X86_FEATURE_MPX)) |
| 164 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | 156 | return 1; |
| 165 | return 1; | ||
| 166 | } | ||
| 167 | __setup("noxsaveopt", x86_xsaveopt_setup); | ||
| 168 | 157 | ||
| 169 | static int __init x86_xsaves_setup(char *s) | 158 | setup_clear_cpu_cap(X86_FEATURE_MPX); |
| 170 | { | 159 | pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n"); |
| 171 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | ||
| 172 | return 1; | 160 | return 1; |
| 173 | } | 161 | } |
| 174 | __setup("noxsaves", x86_xsaves_setup); | 162 | __setup("nompx", x86_mpx_setup); |
| 175 | 163 | ||
| 176 | #ifdef CONFIG_X86_32 | 164 | #ifdef CONFIG_X86_32 |
| 177 | static int cachesize_override = -1; | 165 | static int cachesize_override = -1; |
| @@ -184,14 +172,6 @@ static int __init cachesize_setup(char *str) | |||
| 184 | } | 172 | } |
| 185 | __setup("cachesize=", cachesize_setup); | 173 | __setup("cachesize=", cachesize_setup); |
| 186 | 174 | ||
| 187 | static int __init x86_fxsr_setup(char *s) | ||
| 188 | { | ||
| 189 | setup_clear_cpu_cap(X86_FEATURE_FXSR); | ||
| 190 | setup_clear_cpu_cap(X86_FEATURE_XMM); | ||
| 191 | return 1; | ||
| 192 | } | ||
| 193 | __setup("nofxsr", x86_fxsr_setup); | ||
| 194 | |||
| 195 | static int __init x86_sep_setup(char *s) | 175 | static int __init x86_sep_setup(char *s) |
| 196 | { | 176 | { |
| 197 | setup_clear_cpu_cap(X86_FEATURE_SEP); | 177 | setup_clear_cpu_cap(X86_FEATURE_SEP); |
| @@ -762,7 +742,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) | |||
| 762 | cpu_detect(c); | 742 | cpu_detect(c); |
| 763 | get_cpu_vendor(c); | 743 | get_cpu_vendor(c); |
| 764 | get_cpu_cap(c); | 744 | get_cpu_cap(c); |
| 765 | fpu_detect(c); | 745 | fpu__init_system(c); |
| 766 | 746 | ||
| 767 | if (this_cpu->c_early_init) | 747 | if (this_cpu->c_early_init) |
| 768 | this_cpu->c_early_init(c); | 748 | this_cpu->c_early_init(c); |
| @@ -1186,8 +1166,6 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; | |||
| 1186 | DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; | 1166 | DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; |
| 1187 | EXPORT_PER_CPU_SYMBOL(__preempt_count); | 1167 | EXPORT_PER_CPU_SYMBOL(__preempt_count); |
| 1188 | 1168 | ||
| 1189 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); | ||
| 1190 | |||
| 1191 | /* | 1169 | /* |
| 1192 | * Special IST stacks which the CPU switches to when it calls | 1170 | * Special IST stacks which the CPU switches to when it calls |
| 1193 | * an IST-marked descriptor entry. Up to 7 stacks (hardware | 1171 | * an IST-marked descriptor entry. Up to 7 stacks (hardware |
| @@ -1278,7 +1256,6 @@ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | |||
| 1278 | EXPORT_PER_CPU_SYMBOL(current_task); | 1256 | EXPORT_PER_CPU_SYMBOL(current_task); |
| 1279 | DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; | 1257 | DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; |
| 1280 | EXPORT_PER_CPU_SYMBOL(__preempt_count); | 1258 | EXPORT_PER_CPU_SYMBOL(__preempt_count); |
| 1281 | DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); | ||
| 1282 | 1259 | ||
| 1283 | /* | 1260 | /* |
| 1284 | * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find | 1261 | * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find |
| @@ -1442,7 +1419,7 @@ void cpu_init(void) | |||
| 1442 | clear_all_debug_regs(); | 1419 | clear_all_debug_regs(); |
| 1443 | dbg_restore_debug_regs(); | 1420 | dbg_restore_debug_regs(); |
| 1444 | 1421 | ||
| 1445 | fpu_init(); | 1422 | fpu__init_cpu(); |
| 1446 | 1423 | ||
| 1447 | if (is_uv_system()) | 1424 | if (is_uv_system()) |
| 1448 | uv_cpu_init(); | 1425 | uv_cpu_init(); |
| @@ -1498,7 +1475,7 @@ void cpu_init(void) | |||
| 1498 | clear_all_debug_regs(); | 1475 | clear_all_debug_regs(); |
| 1499 | dbg_restore_debug_regs(); | 1476 | dbg_restore_debug_regs(); |
| 1500 | 1477 | ||
| 1501 | fpu_init(); | 1478 | fpu__init_cpu(); |
| 1502 | } | 1479 | } |
| 1503 | #endif | 1480 | #endif |
| 1504 | 1481 | ||
diff --git a/arch/x86/kernel/fpu/Makefile b/arch/x86/kernel/fpu/Makefile new file mode 100644 index 000000000000..68279efb811a --- /dev/null +++ b/arch/x86/kernel/fpu/Makefile | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | # | ||
| 2 | # Build rules for the FPU support code: | ||
| 3 | # | ||
| 4 | |||
| 5 | obj-y += init.o bugs.o core.o regset.o signal.o xstate.o | ||
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c new file mode 100644 index 000000000000..dd9ca9b60ff3 --- /dev/null +++ b/arch/x86/kernel/fpu/bugs.c | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | /* | ||
| 2 | * x86 FPU bug checks: | ||
| 3 | */ | ||
| 4 | #include <asm/fpu/internal.h> | ||
| 5 | |||
| 6 | /* | ||
| 7 | * Boot time CPU/FPU FDIV bug detection code: | ||
| 8 | */ | ||
| 9 | |||
| 10 | static double __initdata x = 4195835.0; | ||
| 11 | static double __initdata y = 3145727.0; | ||
| 12 | |||
| 13 | /* | ||
| 14 | * This used to check for exceptions.. | ||
| 15 | * However, it turns out that to support that, | ||
| 16 | * the XMM trap handlers basically had to | ||
| 17 | * be buggy. So let's have a correct XMM trap | ||
| 18 | * handler, and forget about printing out | ||
| 19 | * some status at boot. | ||
| 20 | * | ||
| 21 | * We should really only care about bugs here | ||
| 22 | * anyway. Not features. | ||
| 23 | */ | ||
| 24 | static void __init check_fpu(void) | ||
| 25 | { | ||
| 26 | u32 cr0_saved; | ||
| 27 | s32 fdiv_bug; | ||
| 28 | |||
| 29 | /* We might have CR0::TS set already, clear it: */ | ||
| 30 | cr0_saved = read_cr0(); | ||
| 31 | write_cr0(cr0_saved & ~X86_CR0_TS); | ||
| 32 | |||
| 33 | kernel_fpu_begin(); | ||
| 34 | |||
| 35 | /* | ||
| 36 | * trap_init() enabled FXSR and company _before_ testing for FP | ||
| 37 | * problems here. | ||
| 38 | * | ||
| 39 | * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug | ||
| 40 | */ | ||
| 41 | __asm__("fninit\n\t" | ||
| 42 | "fldl %1\n\t" | ||
| 43 | "fdivl %2\n\t" | ||
| 44 | "fmull %2\n\t" | ||
| 45 | "fldl %1\n\t" | ||
| 46 | "fsubp %%st,%%st(1)\n\t" | ||
| 47 | "fistpl %0\n\t" | ||
| 48 | "fwait\n\t" | ||
| 49 | "fninit" | ||
| 50 | : "=m" (*&fdiv_bug) | ||
| 51 | : "m" (*&x), "m" (*&y)); | ||
| 52 | |||
| 53 | kernel_fpu_end(); | ||
| 54 | |||
| 55 | write_cr0(cr0_saved); | ||
| 56 | |||
| 57 | if (fdiv_bug) { | ||
| 58 | set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV); | ||
| 59 | pr_warn("Hmm, FPU with FDIV bug\n"); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | void __init fpu__init_check_bugs(void) | ||
| 64 | { | ||
| 65 | /* | ||
| 66 | * kernel_fpu_begin/end() in check_fpu() relies on the patched | ||
| 67 | * alternative instructions. | ||
| 68 | */ | ||
| 69 | if (cpu_has_fpu) | ||
| 70 | check_fpu(); | ||
| 71 | } | ||
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c new file mode 100644 index 000000000000..79de954626fd --- /dev/null +++ b/arch/x86/kernel/fpu/core.c | |||
| @@ -0,0 +1,523 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1994 Linus Torvalds | ||
| 3 | * | ||
| 4 | * Pentium III FXSR, SSE support | ||
| 5 | * General FPU state handling cleanups | ||
| 6 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
| 7 | */ | ||
| 8 | #include <asm/fpu/internal.h> | ||
| 9 | #include <asm/fpu/regset.h> | ||
| 10 | #include <asm/fpu/signal.h> | ||
| 11 | #include <asm/traps.h> | ||
| 12 | |||
| 13 | #include <linux/hardirq.h> | ||
| 14 | |||
| 15 | /* | ||
| 16 | * Represents the initial FPU state. It's mostly (but not completely) zeroes, | ||
| 17 | * depending on the FPU hardware format: | ||
| 18 | */ | ||
| 19 | union fpregs_state init_fpstate __read_mostly; | ||
| 20 | |||
| 21 | /* | ||
| 22 | * Track whether the kernel is using the FPU state | ||
| 23 | * currently. | ||
| 24 | * | ||
| 25 | * This flag is used: | ||
| 26 | * | ||
| 27 | * - by IRQ context code to potentially use the FPU | ||
| 28 | * if it's unused. | ||
| 29 | * | ||
| 30 | * - to debug kernel_fpu_begin()/end() correctness | ||
| 31 | */ | ||
| 32 | static DEFINE_PER_CPU(bool, in_kernel_fpu); | ||
| 33 | |||
| 34 | /* | ||
| 35 | * Track which context is using the FPU on the CPU: | ||
| 36 | */ | ||
| 37 | DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); | ||
| 38 | |||
| 39 | static void kernel_fpu_disable(void) | ||
| 40 | { | ||
| 41 | WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); | ||
| 42 | this_cpu_write(in_kernel_fpu, true); | ||
| 43 | } | ||
| 44 | |||
| 45 | static void kernel_fpu_enable(void) | ||
| 46 | { | ||
| 47 | WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); | ||
| 48 | this_cpu_write(in_kernel_fpu, false); | ||
| 49 | } | ||
| 50 | |||
| 51 | static bool kernel_fpu_disabled(void) | ||
| 52 | { | ||
| 53 | return this_cpu_read(in_kernel_fpu); | ||
| 54 | } | ||
| 55 | |||
| 56 | /* | ||
| 57 | * Were we in an interrupt that interrupted kernel mode? | ||
| 58 | * | ||
| 59 | * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that | ||
| 60 | * pair does nothing at all: the thread must not have fpu (so | ||
| 61 | * that we don't try to save the FPU state), and TS must | ||
| 62 | * be set (so that the clts/stts pair does nothing that is | ||
| 63 | * visible in the interrupted kernel thread). | ||
| 64 | * | ||
| 65 | * Except for the eagerfpu case when we return true; in the likely case | ||
| 66 | * the thread has FPU but we are not going to set/clear TS. | ||
| 67 | */ | ||
| 68 | static bool interrupted_kernel_fpu_idle(void) | ||
| 69 | { | ||
| 70 | if (kernel_fpu_disabled()) | ||
| 71 | return false; | ||
| 72 | |||
| 73 | if (use_eager_fpu()) | ||
| 74 | return true; | ||
| 75 | |||
| 76 | return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS); | ||
| 77 | } | ||
| 78 | |||
| 79 | /* | ||
| 80 | * Were we in user mode (or vm86 mode) when we were | ||
| 81 | * interrupted? | ||
| 82 | * | ||
| 83 | * Doing kernel_fpu_begin/end() is ok if we are running | ||
| 84 | * in an interrupt context from user mode - we'll just | ||
| 85 | * save the FPU state as required. | ||
| 86 | */ | ||
| 87 | static bool interrupted_user_mode(void) | ||
| 88 | { | ||
| 89 | struct pt_regs *regs = get_irq_regs(); | ||
| 90 | return regs && user_mode(regs); | ||
| 91 | } | ||
| 92 | |||
| 93 | /* | ||
| 94 | * Can we use the FPU in kernel mode with the | ||
| 95 | * whole "kernel_fpu_begin/end()" sequence? | ||
| 96 | * | ||
| 97 | * It's always ok in process context (ie "not interrupt") | ||
| 98 | * but it is sometimes ok even from an irq. | ||
| 99 | */ | ||
| 100 | bool irq_fpu_usable(void) | ||
| 101 | { | ||
| 102 | return !in_interrupt() || | ||
| 103 | interrupted_user_mode() || | ||
| 104 | interrupted_kernel_fpu_idle(); | ||
| 105 | } | ||
| 106 | EXPORT_SYMBOL(irq_fpu_usable); | ||
| 107 | |||
| 108 | void __kernel_fpu_begin(void) | ||
| 109 | { | ||
| 110 | struct fpu *fpu = ¤t->thread.fpu; | ||
| 111 | |||
| 112 | WARN_ON_FPU(!irq_fpu_usable()); | ||
| 113 | |||
| 114 | kernel_fpu_disable(); | ||
| 115 | |||
| 116 | if (fpu->fpregs_active) { | ||
| 117 | copy_fpregs_to_fpstate(fpu); | ||
| 118 | } else { | ||
| 119 | this_cpu_write(fpu_fpregs_owner_ctx, NULL); | ||
| 120 | __fpregs_activate_hw(); | ||
| 121 | } | ||
| 122 | } | ||
| 123 | EXPORT_SYMBOL(__kernel_fpu_begin); | ||
| 124 | |||
| 125 | void __kernel_fpu_end(void) | ||
| 126 | { | ||
| 127 | struct fpu *fpu = ¤t->thread.fpu; | ||
| 128 | |||
| 129 | if (fpu->fpregs_active) | ||
| 130 | copy_kernel_to_fpregs(&fpu->state); | ||
| 131 | else | ||
| 132 | __fpregs_deactivate_hw(); | ||
| 133 | |||
| 134 | kernel_fpu_enable(); | ||
| 135 | } | ||
| 136 | EXPORT_SYMBOL(__kernel_fpu_end); | ||
| 137 | |||
| 138 | void kernel_fpu_begin(void) | ||
| 139 | { | ||
| 140 | preempt_disable(); | ||
| 141 | __kernel_fpu_begin(); | ||
| 142 | } | ||
| 143 | EXPORT_SYMBOL_GPL(kernel_fpu_begin); | ||
| 144 | |||
| 145 | void kernel_fpu_end(void) | ||
| 146 | { | ||
| 147 | __kernel_fpu_end(); | ||
| 148 | preempt_enable(); | ||
| 149 | } | ||
| 150 | EXPORT_SYMBOL_GPL(kernel_fpu_end); | ||
| 151 | |||
| 152 | /* | ||
| 153 | * CR0::TS save/restore functions: | ||
| 154 | */ | ||
| 155 | int irq_ts_save(void) | ||
| 156 | { | ||
| 157 | /* | ||
| 158 | * If in process context and not atomic, we can take a spurious DNA fault. | ||
| 159 | * Otherwise, doing clts() in process context requires disabling preemption | ||
| 160 | * or some heavy lifting like kernel_fpu_begin() | ||
| 161 | */ | ||
| 162 | if (!in_atomic()) | ||
| 163 | return 0; | ||
| 164 | |||
| 165 | if (read_cr0() & X86_CR0_TS) { | ||
| 166 | clts(); | ||
| 167 | return 1; | ||
| 168 | } | ||
| 169 | |||
| 170 | return 0; | ||
| 171 | } | ||
| 172 | EXPORT_SYMBOL_GPL(irq_ts_save); | ||
| 173 | |||
| 174 | void irq_ts_restore(int TS_state) | ||
| 175 | { | ||
| 176 | if (TS_state) | ||
| 177 | stts(); | ||
| 178 | } | ||
| 179 | EXPORT_SYMBOL_GPL(irq_ts_restore); | ||
| 180 | |||
| 181 | /* | ||
| 182 | * Save the FPU state (mark it for reload if necessary): | ||
| 183 | * | ||
| 184 | * This only ever gets called for the current task. | ||
| 185 | */ | ||
| 186 | void fpu__save(struct fpu *fpu) | ||
| 187 | { | ||
| 188 | WARN_ON_FPU(fpu != ¤t->thread.fpu); | ||
| 189 | |||
| 190 | preempt_disable(); | ||
| 191 | if (fpu->fpregs_active) { | ||
| 192 | if (!copy_fpregs_to_fpstate(fpu)) | ||
| 193 | fpregs_deactivate(fpu); | ||
| 194 | } | ||
| 195 | preempt_enable(); | ||
| 196 | } | ||
| 197 | EXPORT_SYMBOL_GPL(fpu__save); | ||
| 198 | |||
| 199 | /* | ||
| 200 | * Legacy x87 fpstate state init: | ||
| 201 | */ | ||
| 202 | static inline void fpstate_init_fstate(struct fregs_state *fp) | ||
| 203 | { | ||
| 204 | fp->cwd = 0xffff037fu; | ||
| 205 | fp->swd = 0xffff0000u; | ||
| 206 | fp->twd = 0xffffffffu; | ||
| 207 | fp->fos = 0xffff0000u; | ||
| 208 | } | ||
| 209 | |||
| 210 | void fpstate_init(union fpregs_state *state) | ||
| 211 | { | ||
| 212 | if (!cpu_has_fpu) { | ||
| 213 | fpstate_init_soft(&state->soft); | ||
| 214 | return; | ||
| 215 | } | ||
| 216 | |||
| 217 | memset(state, 0, xstate_size); | ||
| 218 | |||
| 219 | if (cpu_has_fxsr) | ||
| 220 | fpstate_init_fxstate(&state->fxsave); | ||
| 221 | else | ||
| 222 | fpstate_init_fstate(&state->fsave); | ||
| 223 | } | ||
| 224 | EXPORT_SYMBOL_GPL(fpstate_init); | ||
| 225 | |||
| 226 | /* | ||
| 227 | * Copy the current task's FPU state to a new task's FPU context. | ||
| 228 | * | ||
| 229 | * In both the 'eager' and the 'lazy' case we save hardware registers | ||
| 230 | * directly to the destination buffer. | ||
| 231 | */ | ||
| 232 | static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) | ||
| 233 | { | ||
| 234 | WARN_ON_FPU(src_fpu != ¤t->thread.fpu); | ||
| 235 | |||
| 236 | /* | ||
| 237 | * Don't let 'init optimized' areas of the XSAVE area | ||
| 238 | * leak into the child task: | ||
| 239 | */ | ||
| 240 | if (use_eager_fpu()) | ||
| 241 | memset(&dst_fpu->state.xsave, 0, xstate_size); | ||
| 242 | |||
| 243 | /* | ||
| 244 | * Save current FPU registers directly into the child | ||
| 245 | * FPU context, without any memory-to-memory copying. | ||
| 246 | * | ||
| 247 | * If the FPU context got destroyed in the process (FNSAVE | ||
| 248 | * done on old CPUs) then copy it back into the source | ||
| 249 | * context and mark the current task for lazy restore. | ||
| 250 | * | ||
| 251 | * We have to do all this with preemption disabled, | ||
| 252 | * mostly because of the FNSAVE case, because in that | ||
| 253 | * case we must not allow preemption in the window | ||
| 254 | * between the FNSAVE and us marking the context lazy. | ||
| 255 | * | ||
| 256 | * It shouldn't be an issue as even FNSAVE is plenty | ||
| 257 | * fast in terms of critical section length. | ||
| 258 | */ | ||
| 259 | preempt_disable(); | ||
| 260 | if (!copy_fpregs_to_fpstate(dst_fpu)) { | ||
| 261 | memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); | ||
| 262 | fpregs_deactivate(src_fpu); | ||
| 263 | } | ||
| 264 | preempt_enable(); | ||
| 265 | } | ||
| 266 | |||
| 267 | int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) | ||
| 268 | { | ||
| 269 | dst_fpu->counter = 0; | ||
| 270 | dst_fpu->fpregs_active = 0; | ||
| 271 | dst_fpu->last_cpu = -1; | ||
| 272 | |||
| 273 | if (src_fpu->fpstate_active) | ||
| 274 | fpu_copy(dst_fpu, src_fpu); | ||
| 275 | |||
| 276 | return 0; | ||
| 277 | } | ||
| 278 | |||
| 279 | /* | ||
| 280 | * Activate the current task's in-memory FPU context, | ||
| 281 | * if it has not been used before: | ||
| 282 | */ | ||
| 283 | void fpu__activate_curr(struct fpu *fpu) | ||
| 284 | { | ||
| 285 | WARN_ON_FPU(fpu != ¤t->thread.fpu); | ||
| 286 | |||
| 287 | if (!fpu->fpstate_active) { | ||
| 288 | fpstate_init(&fpu->state); | ||
| 289 | |||
| 290 | /* Safe to do for the current task: */ | ||
| 291 | fpu->fpstate_active = 1; | ||
| 292 | } | ||
| 293 | } | ||
| 294 | EXPORT_SYMBOL_GPL(fpu__activate_curr); | ||
| 295 | |||
| 296 | /* | ||
| 297 | * This function must be called before we read a task's fpstate. | ||
| 298 | * | ||
| 299 | * If the task has not used the FPU before then initialize its | ||
| 300 | * fpstate. | ||
| 301 | * | ||
| 302 | * If the task has used the FPU before then save it. | ||
| 303 | */ | ||
| 304 | void fpu__activate_fpstate_read(struct fpu *fpu) | ||
| 305 | { | ||
| 306 | /* | ||
| 307 | * If fpregs are active (in the current CPU), then | ||
| 308 | * copy them to the fpstate: | ||
| 309 | */ | ||
| 310 | if (fpu->fpregs_active) { | ||
| 311 | fpu__save(fpu); | ||
| 312 | } else { | ||
| 313 | if (!fpu->fpstate_active) { | ||
| 314 | fpstate_init(&fpu->state); | ||
| 315 | |||
| 316 | /* Safe to do for current and for stopped child tasks: */ | ||
| 317 | fpu->fpstate_active = 1; | ||
| 318 | } | ||
| 319 | } | ||
| 320 | } | ||
| 321 | |||
| 322 | /* | ||
| 323 | * This function must be called before we write a task's fpstate. | ||
| 324 | * | ||
| 325 | * If the task has used the FPU before then unlazy it. | ||
| 326 | * If the task has not used the FPU before then initialize its fpstate. | ||
| 327 | * | ||
| 328 | * After this function call, after registers in the fpstate are | ||
| 329 | * modified and the child task has woken up, the child task will | ||
| 330 | * restore the modified FPU state from the modified context. If we | ||
| 331 | * didn't clear its lazy status here then the lazy in-registers | ||
| 332 | * state pending on its former CPU could be restored, corrupting | ||
| 333 | * the modifications. | ||
| 334 | */ | ||
| 335 | void fpu__activate_fpstate_write(struct fpu *fpu) | ||
| 336 | { | ||
| 337 | /* | ||
| 338 | * Only stopped child tasks can be used to modify the FPU | ||
| 339 | * state in the fpstate buffer: | ||
| 340 | */ | ||
| 341 | WARN_ON_FPU(fpu == ¤t->thread.fpu); | ||
| 342 | |||
| 343 | if (fpu->fpstate_active) { | ||
| 344 | /* Invalidate any lazy state: */ | ||
| 345 | fpu->last_cpu = -1; | ||
| 346 | } else { | ||
| 347 | fpstate_init(&fpu->state); | ||
| 348 | |||
| 349 | /* Safe to do for stopped child tasks: */ | ||
| 350 | fpu->fpstate_active = 1; | ||
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 | /* | ||
| 355 | * 'fpu__restore()' is called to copy FPU registers from | ||
| 356 | * the FPU fpstate to the live hw registers and to activate | ||
| 357 | * access to the hardware registers, so that FPU instructions | ||
| 358 | * can be used afterwards. | ||
| 359 | * | ||
| 360 | * Must be called with kernel preemption disabled (for example | ||
| 361 | * with local interrupts disabled, as it is in the case of | ||
| 362 | * do_device_not_available()). | ||
| 363 | */ | ||
| 364 | void fpu__restore(struct fpu *fpu) | ||
| 365 | { | ||
| 366 | fpu__activate_curr(fpu); | ||
| 367 | |||
| 368 | /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ | ||
| 369 | kernel_fpu_disable(); | ||
| 370 | fpregs_activate(fpu); | ||
| 371 | copy_kernel_to_fpregs(&fpu->state); | ||
| 372 | fpu->counter++; | ||
| 373 | kernel_fpu_enable(); | ||
| 374 | } | ||
| 375 | EXPORT_SYMBOL_GPL(fpu__restore); | ||
| 376 | |||
| 377 | /* | ||
| 378 | * Drops current FPU state: deactivates the fpregs and | ||
| 379 | * the fpstate. NOTE: it still leaves previous contents | ||
| 380 | * in the fpregs in the eager-FPU case. | ||
| 381 | * | ||
| 382 | * This function can be used in cases where we know that | ||
| 383 | * a state-restore is coming: either an explicit one, | ||
| 384 | * or a reschedule. | ||
| 385 | */ | ||
| 386 | void fpu__drop(struct fpu *fpu) | ||
| 387 | { | ||
| 388 | preempt_disable(); | ||
| 389 | fpu->counter = 0; | ||
| 390 | |||
| 391 | if (fpu->fpregs_active) { | ||
| 392 | /* Ignore delayed exceptions from user space */ | ||
| 393 | asm volatile("1: fwait\n" | ||
| 394 | "2:\n" | ||
| 395 | _ASM_EXTABLE(1b, 2b)); | ||
| 396 | fpregs_deactivate(fpu); | ||
| 397 | } | ||
| 398 | |||
| 399 | fpu->fpstate_active = 0; | ||
| 400 | |||
| 401 | preempt_enable(); | ||
| 402 | } | ||
| 403 | |||
| 404 | /* | ||
| 405 | * Clear FPU registers by setting them up from | ||
| 406 | * the init fpstate: | ||
| 407 | */ | ||
| 408 | static inline void copy_init_fpstate_to_fpregs(void) | ||
| 409 | { | ||
| 410 | if (use_xsave()) | ||
| 411 | copy_kernel_to_xregs(&init_fpstate.xsave, -1); | ||
| 412 | else | ||
| 413 | copy_kernel_to_fxregs(&init_fpstate.fxsave); | ||
| 414 | } | ||
| 415 | |||
| 416 | /* | ||
| 417 | * Clear the FPU state back to init state. | ||
| 418 | * | ||
| 419 | * Called by sys_execve(), by the signal handler code and by various | ||
| 420 | * error paths. | ||
| 421 | */ | ||
| 422 | void fpu__clear(struct fpu *fpu) | ||
| 423 | { | ||
| 424 | WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ | ||
| 425 | |||
| 426 | if (!use_eager_fpu()) { | ||
| 427 | /* FPU state will be reallocated lazily at the first use. */ | ||
| 428 | fpu__drop(fpu); | ||
| 429 | } else { | ||
| 430 | if (!fpu->fpstate_active) { | ||
| 431 | fpu__activate_curr(fpu); | ||
| 432 | user_fpu_begin(); | ||
| 433 | } | ||
| 434 | copy_init_fpstate_to_fpregs(); | ||
| 435 | } | ||
| 436 | } | ||
| 437 | |||
| 438 | /* | ||
| 439 | * x87 math exception handling: | ||
| 440 | */ | ||
| 441 | |||
| 442 | static inline unsigned short get_fpu_cwd(struct fpu *fpu) | ||
| 443 | { | ||
| 444 | if (cpu_has_fxsr) { | ||
| 445 | return fpu->state.fxsave.cwd; | ||
| 446 | } else { | ||
| 447 | return (unsigned short)fpu->state.fsave.cwd; | ||
| 448 | } | ||
| 449 | } | ||
| 450 | |||
| 451 | static inline unsigned short get_fpu_swd(struct fpu *fpu) | ||
| 452 | { | ||
| 453 | if (cpu_has_fxsr) { | ||
| 454 | return fpu->state.fxsave.swd; | ||
| 455 | } else { | ||
| 456 | return (unsigned short)fpu->state.fsave.swd; | ||
| 457 | } | ||
| 458 | } | ||
| 459 | |||
| 460 | static inline unsigned short get_fpu_mxcsr(struct fpu *fpu) | ||
| 461 | { | ||
| 462 | if (cpu_has_xmm) { | ||
| 463 | return fpu->state.fxsave.mxcsr; | ||
| 464 | } else { | ||
| 465 | return MXCSR_DEFAULT; | ||
| 466 | } | ||
| 467 | } | ||
| 468 | |||
| 469 | int fpu__exception_code(struct fpu *fpu, int trap_nr) | ||
| 470 | { | ||
| 471 | int err; | ||
| 472 | |||
| 473 | if (trap_nr == X86_TRAP_MF) { | ||
| 474 | unsigned short cwd, swd; | ||
| 475 | /* | ||
| 476 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | ||
| 477 | * status. 0x3f is the exception bits in these regs, 0x200 is the | ||
| 478 | * C1 reg you need in case of a stack fault, 0x040 is the stack | ||
| 479 | * fault bit. We should only be taking one exception at a time, | ||
| 480 | * so if this combination doesn't produce any single exception, | ||
| 481 | * then we have a bad program that isn't synchronizing its FPU usage | ||
| 482 | * and it will suffer the consequences since we won't be able to | ||
| 483 | * fully reproduce the context of the exception | ||
| 484 | */ | ||
| 485 | cwd = get_fpu_cwd(fpu); | ||
| 486 | swd = get_fpu_swd(fpu); | ||
| 487 | |||
| 488 | err = swd & ~cwd; | ||
| 489 | } else { | ||
| 490 | /* | ||
| 491 | * The SIMD FPU exceptions are handled a little differently, as there | ||
| 492 | * is only a single status/control register. Thus, to determine which | ||
| 493 | * unmasked exception was caught we must mask the exception mask bits | ||
| 494 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
| 495 | */ | ||
| 496 | unsigned short mxcsr = get_fpu_mxcsr(fpu); | ||
| 497 | err = ~(mxcsr >> 7) & mxcsr; | ||
| 498 | } | ||
| 499 | |||
| 500 | if (err & 0x001) { /* Invalid op */ | ||
| 501 | /* | ||
| 502 | * swd & 0x240 == 0x040: Stack Underflow | ||
| 503 | * swd & 0x240 == 0x240: Stack Overflow | ||
| 504 | * User must clear the SF bit (0x40) if set | ||
| 505 | */ | ||
| 506 | return FPE_FLTINV; | ||
| 507 | } else if (err & 0x004) { /* Divide by Zero */ | ||
| 508 | return FPE_FLTDIV; | ||
| 509 | } else if (err & 0x008) { /* Overflow */ | ||
| 510 | return FPE_FLTOVF; | ||
| 511 | } else if (err & 0x012) { /* Denormal, Underflow */ | ||
| 512 | return FPE_FLTUND; | ||
| 513 | } else if (err & 0x020) { /* Precision */ | ||
| 514 | return FPE_FLTRES; | ||
| 515 | } | ||
| 516 | |||
| 517 | /* | ||
| 518 | * If we're using IRQ 13, or supposedly even some trap | ||
| 519 | * X86_TRAP_MF implementations, it's possible | ||
| 520 | * we get a spurious trap, which is not an error. | ||
| 521 | */ | ||
| 522 | return 0; | ||
| 523 | } | ||
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c new file mode 100644 index 000000000000..fc878fee6a51 --- /dev/null +++ b/arch/x86/kernel/fpu/init.c | |||
| @@ -0,0 +1,354 @@ | |||
| 1 | /* | ||
| 2 | * x86 FPU boot time init code: | ||
| 3 | */ | ||
| 4 | #include <asm/fpu/internal.h> | ||
| 5 | #include <asm/tlbflush.h> | ||
| 6 | |||
| 7 | /* | ||
| 8 | * Initialize the TS bit in CR0 according to the style of context-switches | ||
| 9 | * we are using: | ||
| 10 | */ | ||
| 11 | static void fpu__init_cpu_ctx_switch(void) | ||
| 12 | { | ||
| 13 | if (!cpu_has_eager_fpu) | ||
| 14 | stts(); | ||
| 15 | else | ||
| 16 | clts(); | ||
| 17 | } | ||
| 18 | |||
| 19 | /* | ||
| 20 | * Initialize the registers found in all CPUs, CR0 and CR4: | ||
| 21 | */ | ||
| 22 | static void fpu__init_cpu_generic(void) | ||
| 23 | { | ||
| 24 | unsigned long cr0; | ||
| 25 | unsigned long cr4_mask = 0; | ||
| 26 | |||
| 27 | if (cpu_has_fxsr) | ||
| 28 | cr4_mask |= X86_CR4_OSFXSR; | ||
| 29 | if (cpu_has_xmm) | ||
| 30 | cr4_mask |= X86_CR4_OSXMMEXCPT; | ||
| 31 | if (cr4_mask) | ||
| 32 | cr4_set_bits(cr4_mask); | ||
| 33 | |||
| 34 | cr0 = read_cr0(); | ||
| 35 | cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ | ||
| 36 | if (!cpu_has_fpu) | ||
| 37 | cr0 |= X86_CR0_EM; | ||
| 38 | write_cr0(cr0); | ||
| 39 | |||
| 40 | /* Flush out any pending x87 state: */ | ||
| 41 | asm volatile ("fninit"); | ||
| 42 | } | ||
| 43 | |||
| 44 | /* | ||
| 45 | * Enable all supported FPU features. Called when a CPU is brought online: | ||
| 46 | */ | ||
| 47 | void fpu__init_cpu(void) | ||
| 48 | { | ||
| 49 | fpu__init_cpu_generic(); | ||
| 50 | fpu__init_cpu_xstate(); | ||
| 51 | fpu__init_cpu_ctx_switch(); | ||
| 52 | } | ||
| 53 | |||
| 54 | /* | ||
| 55 | * The earliest FPU detection code. | ||
| 56 | * | ||
| 57 | * Set the X86_FEATURE_FPU CPU-capability bit based on | ||
| 58 | * trying to execute an actual sequence of FPU instructions: | ||
| 59 | */ | ||
| 60 | static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) | ||
| 61 | { | ||
| 62 | unsigned long cr0; | ||
| 63 | u16 fsw, fcw; | ||
| 64 | |||
| 65 | fsw = fcw = 0xffff; | ||
| 66 | |||
| 67 | cr0 = read_cr0(); | ||
| 68 | cr0 &= ~(X86_CR0_TS | X86_CR0_EM); | ||
| 69 | write_cr0(cr0); | ||
| 70 | |||
| 71 | asm volatile("fninit ; fnstsw %0 ; fnstcw %1" | ||
| 72 | : "+m" (fsw), "+m" (fcw)); | ||
| 73 | |||
| 74 | if (fsw == 0 && (fcw & 0x103f) == 0x003f) | ||
| 75 | set_cpu_cap(c, X86_FEATURE_FPU); | ||
| 76 | else | ||
| 77 | clear_cpu_cap(c, X86_FEATURE_FPU); | ||
| 78 | |||
| 79 | #ifndef CONFIG_MATH_EMULATION | ||
| 80 | if (!cpu_has_fpu) { | ||
| 81 | pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n"); | ||
| 82 | for (;;) | ||
| 83 | asm volatile("hlt"); | ||
| 84 | } | ||
| 85 | #endif | ||
| 86 | } | ||
| 87 | |||
| 88 | /* | ||
| 89 | * Boot time FPU feature detection code: | ||
| 90 | */ | ||
| 91 | unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; | ||
| 92 | |||
| 93 | static void __init fpu__init_system_mxcsr(void) | ||
| 94 | { | ||
| 95 | unsigned int mask = 0; | ||
| 96 | |||
| 97 | if (cpu_has_fxsr) { | ||
| 98 | struct fxregs_state fx_tmp __aligned(32) = { }; | ||
| 99 | |||
| 100 | asm volatile("fxsave %0" : "+m" (fx_tmp)); | ||
| 101 | |||
| 102 | mask = fx_tmp.mxcsr_mask; | ||
| 103 | |||
| 104 | /* | ||
| 105 | * If zero then use the default features mask, | ||
| 106 | * which has all features set, except the | ||
| 107 | * denormals-are-zero feature bit: | ||
| 108 | */ | ||
| 109 | if (mask == 0) | ||
| 110 | mask = 0x0000ffbf; | ||
| 111 | } | ||
| 112 | mxcsr_feature_mask &= mask; | ||
| 113 | } | ||
| 114 | |||
| 115 | /* | ||
| 116 | * Once per bootup FPU initialization sequences that will run on most x86 CPUs: | ||
| 117 | */ | ||
| 118 | static void __init fpu__init_system_generic(void) | ||
| 119 | { | ||
| 120 | /* | ||
| 121 | * Set up the legacy init FPU context. (xstate init might overwrite this | ||
| 122 | * with a more modern format, if the CPU supports it.) | ||
| 123 | */ | ||
| 124 | fpstate_init_fxstate(&init_fpstate.fxsave); | ||
| 125 | |||
| 126 | fpu__init_system_mxcsr(); | ||
| 127 | } | ||
| 128 | |||
| 129 | /* | ||
| 130 | * Size of the FPU context state. All tasks in the system use the | ||
| 131 | * same context size, regardless of what portion they use. | ||
| 132 | * This is inherent to the XSAVE architecture which puts all state | ||
| 133 | * components into a single, continuous memory block: | ||
| 134 | */ | ||
| 135 | unsigned int xstate_size; | ||
| 136 | EXPORT_SYMBOL_GPL(xstate_size); | ||
| 137 | |||
| 138 | /* | ||
| 139 | * Set up the xstate_size based on the legacy FPU context size. | ||
| 140 | * | ||
| 141 | * We set this up first, and later it will be overwritten by | ||
| 142 | * fpu__init_system_xstate() if the CPU knows about xstates. | ||
| 143 | */ | ||
| 144 | static void __init fpu__init_system_xstate_size_legacy(void) | ||
| 145 | { | ||
| 146 | static int on_boot_cpu = 1; | ||
| 147 | |||
| 148 | WARN_ON_FPU(!on_boot_cpu); | ||
| 149 | on_boot_cpu = 0; | ||
| 150 | |||
| 151 | /* | ||
| 152 | * Note that xstate_size might be overwriten later during | ||
| 153 | * fpu__init_system_xstate(). | ||
| 154 | */ | ||
| 155 | |||
| 156 | if (!cpu_has_fpu) { | ||
| 157 | /* | ||
| 158 | * Disable xsave as we do not support it if i387 | ||
| 159 | * emulation is enabled. | ||
| 160 | */ | ||
| 161 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | ||
| 162 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
| 163 | xstate_size = sizeof(struct swregs_state); | ||
| 164 | } else { | ||
| 165 | if (cpu_has_fxsr) | ||
| 166 | xstate_size = sizeof(struct fxregs_state); | ||
| 167 | else | ||
| 168 | xstate_size = sizeof(struct fregs_state); | ||
| 169 | } | ||
| 170 | /* | ||
| 171 | * Quirk: we don't yet handle the XSAVES* instructions | ||
| 172 | * correctly, as we don't correctly convert between | ||
| 173 | * standard and compacted format when interfacing | ||
| 174 | * with user-space - so disable it for now. | ||
| 175 | * | ||
| 176 | * The difference is small: with recent CPUs the | ||
| 177 | * compacted format is only marginally smaller than | ||
| 178 | * the standard FPU state format. | ||
| 179 | * | ||
| 180 | * ( This is easy to backport while we are fixing | ||
| 181 | * XSAVES* support. ) | ||
| 182 | */ | ||
| 183 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | ||
| 184 | } | ||
| 185 | |||
| 186 | /* | ||
| 187 | * FPU context switching strategies: | ||
| 188 | * | ||
| 189 | * Against popular belief, we don't do lazy FPU saves, due to the | ||
| 190 | * task migration complications it brings on SMP - we only do | ||
| 191 | * lazy FPU restores. | ||
| 192 | * | ||
| 193 | * 'lazy' is the traditional strategy, which is based on setting | ||
| 194 | * CR0::TS to 1 during context-switch (instead of doing a full | ||
| 195 | * restore of the FPU state), which causes the first FPU instruction | ||
| 196 | * after the context switch (whenever it is executed) to fault - at | ||
| 197 | * which point we lazily restore the FPU state into FPU registers. | ||
| 198 | * | ||
| 199 | * Tasks are of course under no obligation to execute FPU instructions, | ||
| 200 | * so it can easily happen that another context-switch occurs without | ||
| 201 | * a single FPU instruction being executed. If we eventually switch | ||
| 202 | * back to the original task (that still owns the FPU) then we have | ||
| 203 | * not only saved the restores along the way, but we also have the | ||
| 204 | * FPU ready to be used for the original task. | ||
| 205 | * | ||
| 206 | * 'eager' switching is used on modern CPUs, there we switch the FPU | ||
| 207 | * state during every context switch, regardless of whether the task | ||
| 208 | * has used FPU instructions in that time slice or not. This is done | ||
| 209 | * because modern FPU context saving instructions are able to optimize | ||
| 210 | * state saving and restoration in hardware: they can detect both | ||
| 211 | * unused and untouched FPU state and optimize accordingly. | ||
| 212 | * | ||
| 213 | * [ Note that even in 'lazy' mode we might optimize context switches | ||
| 214 | * to use 'eager' restores, if we detect that a task is using the FPU | ||
| 215 | * frequently. See the fpu->counter logic in fpu/internal.h for that. ] | ||
| 216 | */ | ||
| 217 | static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; | ||
| 218 | |||
| 219 | static int __init eager_fpu_setup(char *s) | ||
| 220 | { | ||
| 221 | if (!strcmp(s, "on")) | ||
| 222 | eagerfpu = ENABLE; | ||
| 223 | else if (!strcmp(s, "off")) | ||
| 224 | eagerfpu = DISABLE; | ||
| 225 | else if (!strcmp(s, "auto")) | ||
| 226 | eagerfpu = AUTO; | ||
| 227 | return 1; | ||
| 228 | } | ||
| 229 | __setup("eagerfpu=", eager_fpu_setup); | ||
| 230 | |||
| 231 | /* | ||
| 232 | * Pick the FPU context switching strategy: | ||
| 233 | */ | ||
| 234 | static void __init fpu__init_system_ctx_switch(void) | ||
| 235 | { | ||
| 236 | static bool on_boot_cpu = 1; | ||
| 237 | |||
| 238 | WARN_ON_FPU(!on_boot_cpu); | ||
| 239 | on_boot_cpu = 0; | ||
| 240 | |||
| 241 | WARN_ON_FPU(current->thread.fpu.fpstate_active); | ||
| 242 | current_thread_info()->status = 0; | ||
| 243 | |||
| 244 | /* Auto enable eagerfpu for xsaveopt */ | ||
| 245 | if (cpu_has_xsaveopt && eagerfpu != DISABLE) | ||
| 246 | eagerfpu = ENABLE; | ||
| 247 | |||
| 248 | if (xfeatures_mask & XSTATE_EAGER) { | ||
| 249 | if (eagerfpu == DISABLE) { | ||
| 250 | pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", | ||
| 251 | xfeatures_mask & XSTATE_EAGER); | ||
| 252 | xfeatures_mask &= ~XSTATE_EAGER; | ||
| 253 | } else { | ||
| 254 | eagerfpu = ENABLE; | ||
| 255 | } | ||
| 256 | } | ||
| 257 | |||
| 258 | if (eagerfpu == ENABLE) | ||
| 259 | setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); | ||
| 260 | |||
| 261 | printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy"); | ||
| 262 | } | ||
| 263 | |||
| 264 | /* | ||
| 265 | * Called on the boot CPU once per system bootup, to set up the initial | ||
| 266 | * FPU state that is later cloned into all processes: | ||
| 267 | */ | ||
| 268 | void __init fpu__init_system(struct cpuinfo_x86 *c) | ||
| 269 | { | ||
| 270 | fpu__init_system_early_generic(c); | ||
| 271 | |||
| 272 | /* | ||
| 273 | * The FPU has to be operational for some of the | ||
| 274 | * later FPU init activities: | ||
| 275 | */ | ||
| 276 | fpu__init_cpu(); | ||
| 277 | |||
| 278 | /* | ||
| 279 | * But don't leave CR0::TS set yet, as some of the FPU setup | ||
| 280 | * methods depend on being able to execute FPU instructions | ||
| 281 | * that will fault on a set TS, such as the FXSAVE in | ||
| 282 | * fpu__init_system_mxcsr(). | ||
| 283 | */ | ||
| 284 | clts(); | ||
| 285 | |||
| 286 | fpu__init_system_generic(); | ||
| 287 | fpu__init_system_xstate_size_legacy(); | ||
| 288 | fpu__init_system_xstate(); | ||
| 289 | |||
| 290 | fpu__init_system_ctx_switch(); | ||
| 291 | } | ||
| 292 | |||
| 293 | /* | ||
| 294 | * Boot parameter to turn off FPU support and fall back to math-emu: | ||
| 295 | */ | ||
| 296 | static int __init no_387(char *s) | ||
| 297 | { | ||
| 298 | setup_clear_cpu_cap(X86_FEATURE_FPU); | ||
| 299 | return 1; | ||
| 300 | } | ||
| 301 | __setup("no387", no_387); | ||
| 302 | |||
| 303 | /* | ||
| 304 | * Disable all xstate CPU features: | ||
| 305 | */ | ||
| 306 | static int __init x86_noxsave_setup(char *s) | ||
| 307 | { | ||
| 308 | if (strlen(s)) | ||
| 309 | return 0; | ||
| 310 | |||
| 311 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | ||
| 312 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
| 313 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | ||
| 314 | setup_clear_cpu_cap(X86_FEATURE_AVX); | ||
| 315 | setup_clear_cpu_cap(X86_FEATURE_AVX2); | ||
| 316 | |||
| 317 | return 1; | ||
| 318 | } | ||
| 319 | __setup("noxsave", x86_noxsave_setup); | ||
| 320 | |||
| 321 | /* | ||
| 322 | * Disable the XSAVEOPT instruction specifically: | ||
| 323 | */ | ||
| 324 | static int __init x86_noxsaveopt_setup(char *s) | ||
| 325 | { | ||
| 326 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
| 327 | |||
| 328 | return 1; | ||
| 329 | } | ||
| 330 | __setup("noxsaveopt", x86_noxsaveopt_setup); | ||
| 331 | |||
| 332 | /* | ||
| 333 | * Disable the XSAVES instruction: | ||
| 334 | */ | ||
| 335 | static int __init x86_noxsaves_setup(char *s) | ||
| 336 | { | ||
| 337 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | ||
| 338 | |||
| 339 | return 1; | ||
| 340 | } | ||
| 341 | __setup("noxsaves", x86_noxsaves_setup); | ||
| 342 | |||
| 343 | /* | ||
| 344 | * Disable FX save/restore and SSE support: | ||
| 345 | */ | ||
| 346 | static int __init x86_nofxsr_setup(char *s) | ||
| 347 | { | ||
| 348 | setup_clear_cpu_cap(X86_FEATURE_FXSR); | ||
| 349 | setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); | ||
| 350 | setup_clear_cpu_cap(X86_FEATURE_XMM); | ||
| 351 | |||
| 352 | return 1; | ||
| 353 | } | ||
| 354 | __setup("nofxsr", x86_nofxsr_setup); | ||
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c new file mode 100644 index 000000000000..dc60810c1c74 --- /dev/null +++ b/arch/x86/kernel/fpu/regset.c | |||
| @@ -0,0 +1,356 @@ | |||
| 1 | /* | ||
| 2 | * FPU register's regset abstraction, for ptrace, core dumps, etc. | ||
| 3 | */ | ||
| 4 | #include <asm/fpu/internal.h> | ||
| 5 | #include <asm/fpu/signal.h> | ||
| 6 | #include <asm/fpu/regset.h> | ||
| 7 | |||
| 8 | /* | ||
| 9 | * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, | ||
| 10 | * as the "regset->n" for the xstate regset will be updated based on the feature | ||
| 11 | * capabilites supported by the xsave. | ||
| 12 | */ | ||
| 13 | int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset) | ||
| 14 | { | ||
| 15 | struct fpu *target_fpu = &target->thread.fpu; | ||
| 16 | |||
| 17 | return target_fpu->fpstate_active ? regset->n : 0; | ||
| 18 | } | ||
| 19 | |||
| 20 | int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset) | ||
| 21 | { | ||
| 22 | struct fpu *target_fpu = &target->thread.fpu; | ||
| 23 | |||
| 24 | return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0; | ||
| 25 | } | ||
| 26 | |||
| 27 | int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | ||
| 28 | unsigned int pos, unsigned int count, | ||
| 29 | void *kbuf, void __user *ubuf) | ||
| 30 | { | ||
| 31 | struct fpu *fpu = &target->thread.fpu; | ||
| 32 | |||
| 33 | if (!cpu_has_fxsr) | ||
| 34 | return -ENODEV; | ||
| 35 | |||
| 36 | fpu__activate_fpstate_read(fpu); | ||
| 37 | fpstate_sanitize_xstate(fpu); | ||
| 38 | |||
| 39 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | ||
| 40 | &fpu->state.fxsave, 0, -1); | ||
| 41 | } | ||
| 42 | |||
| 43 | int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | ||
| 44 | unsigned int pos, unsigned int count, | ||
| 45 | const void *kbuf, const void __user *ubuf) | ||
| 46 | { | ||
| 47 | struct fpu *fpu = &target->thread.fpu; | ||
| 48 | int ret; | ||
| 49 | |||
| 50 | if (!cpu_has_fxsr) | ||
| 51 | return -ENODEV; | ||
| 52 | |||
| 53 | fpu__activate_fpstate_write(fpu); | ||
| 54 | fpstate_sanitize_xstate(fpu); | ||
| 55 | |||
| 56 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | ||
| 57 | &fpu->state.fxsave, 0, -1); | ||
| 58 | |||
| 59 | /* | ||
| 60 | * mxcsr reserved bits must be masked to zero for security reasons. | ||
| 61 | */ | ||
| 62 | fpu->state.fxsave.mxcsr &= mxcsr_feature_mask; | ||
| 63 | |||
| 64 | /* | ||
| 65 | * update the header bits in the xsave header, indicating the | ||
| 66 | * presence of FP and SSE state. | ||
| 67 | */ | ||
| 68 | if (cpu_has_xsave) | ||
| 69 | fpu->state.xsave.header.xfeatures |= XSTATE_FPSSE; | ||
| 70 | |||
| 71 | return ret; | ||
| 72 | } | ||
| 73 | |||
| 74 | int xstateregs_get(struct task_struct *target, const struct user_regset *regset, | ||
| 75 | unsigned int pos, unsigned int count, | ||
| 76 | void *kbuf, void __user *ubuf) | ||
| 77 | { | ||
| 78 | struct fpu *fpu = &target->thread.fpu; | ||
| 79 | struct xregs_state *xsave; | ||
| 80 | int ret; | ||
| 81 | |||
| 82 | if (!cpu_has_xsave) | ||
| 83 | return -ENODEV; | ||
| 84 | |||
| 85 | fpu__activate_fpstate_read(fpu); | ||
| 86 | |||
| 87 | xsave = &fpu->state.xsave; | ||
| 88 | |||
| 89 | /* | ||
| 90 | * Copy the 48bytes defined by the software first into the xstate | ||
| 91 | * memory layout in the thread struct, so that we can copy the entire | ||
| 92 | * xstateregs to the user using one user_regset_copyout(). | ||
| 93 | */ | ||
| 94 | memcpy(&xsave->i387.sw_reserved, | ||
| 95 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); | ||
| 96 | /* | ||
| 97 | * Copy the xstate memory layout. | ||
| 98 | */ | ||
| 99 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); | ||
| 100 | return ret; | ||
| 101 | } | ||
| 102 | |||
| 103 | int xstateregs_set(struct task_struct *target, const struct user_regset *regset, | ||
| 104 | unsigned int pos, unsigned int count, | ||
| 105 | const void *kbuf, const void __user *ubuf) | ||
| 106 | { | ||
| 107 | struct fpu *fpu = &target->thread.fpu; | ||
| 108 | struct xregs_state *xsave; | ||
| 109 | int ret; | ||
| 110 | |||
| 111 | if (!cpu_has_xsave) | ||
| 112 | return -ENODEV; | ||
| 113 | |||
| 114 | fpu__activate_fpstate_write(fpu); | ||
| 115 | |||
| 116 | xsave = &fpu->state.xsave; | ||
| 117 | |||
| 118 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); | ||
| 119 | /* | ||
| 120 | * mxcsr reserved bits must be masked to zero for security reasons. | ||
| 121 | */ | ||
| 122 | xsave->i387.mxcsr &= mxcsr_feature_mask; | ||
| 123 | xsave->header.xfeatures &= xfeatures_mask; | ||
| 124 | /* | ||
| 125 | * These bits must be zero. | ||
| 126 | */ | ||
| 127 | memset(&xsave->header.reserved, 0, 48); | ||
| 128 | |||
| 129 | return ret; | ||
| 130 | } | ||
| 131 | |||
| 132 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | ||
| 133 | |||
| 134 | /* | ||
| 135 | * FPU tag word conversions. | ||
| 136 | */ | ||
| 137 | |||
| 138 | static inline unsigned short twd_i387_to_fxsr(unsigned short twd) | ||
| 139 | { | ||
| 140 | unsigned int tmp; /* to avoid 16 bit prefixes in the code */ | ||
| 141 | |||
| 142 | /* Transform each pair of bits into 01 (valid) or 00 (empty) */ | ||
| 143 | tmp = ~twd; | ||
| 144 | tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ | ||
| 145 | /* and move the valid bits to the lower byte. */ | ||
| 146 | tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ | ||
| 147 | tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ | ||
| 148 | tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ | ||
| 149 | |||
| 150 | return tmp; | ||
| 151 | } | ||
| 152 | |||
| 153 | #define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16) | ||
| 154 | #define FP_EXP_TAG_VALID 0 | ||
| 155 | #define FP_EXP_TAG_ZERO 1 | ||
| 156 | #define FP_EXP_TAG_SPECIAL 2 | ||
| 157 | #define FP_EXP_TAG_EMPTY 3 | ||
| 158 | |||
| 159 | static inline u32 twd_fxsr_to_i387(struct fxregs_state *fxsave) | ||
| 160 | { | ||
| 161 | struct _fpxreg *st; | ||
| 162 | u32 tos = (fxsave->swd >> 11) & 7; | ||
| 163 | u32 twd = (unsigned long) fxsave->twd; | ||
| 164 | u32 tag; | ||
| 165 | u32 ret = 0xffff0000u; | ||
| 166 | int i; | ||
| 167 | |||
| 168 | for (i = 0; i < 8; i++, twd >>= 1) { | ||
| 169 | if (twd & 0x1) { | ||
| 170 | st = FPREG_ADDR(fxsave, (i - tos) & 7); | ||
| 171 | |||
| 172 | switch (st->exponent & 0x7fff) { | ||
| 173 | case 0x7fff: | ||
| 174 | tag = FP_EXP_TAG_SPECIAL; | ||
| 175 | break; | ||
| 176 | case 0x0000: | ||
| 177 | if (!st->significand[0] && | ||
| 178 | !st->significand[1] && | ||
| 179 | !st->significand[2] && | ||
| 180 | !st->significand[3]) | ||
| 181 | tag = FP_EXP_TAG_ZERO; | ||
| 182 | else | ||
| 183 | tag = FP_EXP_TAG_SPECIAL; | ||
| 184 | break; | ||
| 185 | default: | ||
| 186 | if (st->significand[3] & 0x8000) | ||
| 187 | tag = FP_EXP_TAG_VALID; | ||
| 188 | else | ||
| 189 | tag = FP_EXP_TAG_SPECIAL; | ||
| 190 | break; | ||
| 191 | } | ||
| 192 | } else { | ||
| 193 | tag = FP_EXP_TAG_EMPTY; | ||
| 194 | } | ||
| 195 | ret |= tag << (2 * i); | ||
| 196 | } | ||
| 197 | return ret; | ||
| 198 | } | ||
| 199 | |||
| 200 | /* | ||
| 201 | * FXSR floating point environment conversions. | ||
| 202 | */ | ||
| 203 | |||
| 204 | void | ||
| 205 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | ||
| 206 | { | ||
| 207 | struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave; | ||
| 208 | struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; | ||
| 209 | struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; | ||
| 210 | int i; | ||
| 211 | |||
| 212 | env->cwd = fxsave->cwd | 0xffff0000u; | ||
| 213 | env->swd = fxsave->swd | 0xffff0000u; | ||
| 214 | env->twd = twd_fxsr_to_i387(fxsave); | ||
| 215 | |||
| 216 | #ifdef CONFIG_X86_64 | ||
| 217 | env->fip = fxsave->rip; | ||
| 218 | env->foo = fxsave->rdp; | ||
| 219 | /* | ||
| 220 | * should be actually ds/cs at fpu exception time, but | ||
| 221 | * that information is not available in 64bit mode. | ||
| 222 | */ | ||
| 223 | env->fcs = task_pt_regs(tsk)->cs; | ||
| 224 | if (tsk == current) { | ||
| 225 | savesegment(ds, env->fos); | ||
| 226 | } else { | ||
| 227 | env->fos = tsk->thread.ds; | ||
| 228 | } | ||
| 229 | env->fos |= 0xffff0000; | ||
| 230 | #else | ||
| 231 | env->fip = fxsave->fip; | ||
| 232 | env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); | ||
| 233 | env->foo = fxsave->foo; | ||
| 234 | env->fos = fxsave->fos; | ||
| 235 | #endif | ||
| 236 | |||
| 237 | for (i = 0; i < 8; ++i) | ||
| 238 | memcpy(&to[i], &from[i], sizeof(to[0])); | ||
| 239 | } | ||
| 240 | |||
| 241 | void convert_to_fxsr(struct task_struct *tsk, | ||
| 242 | const struct user_i387_ia32_struct *env) | ||
| 243 | |||
| 244 | { | ||
| 245 | struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave; | ||
| 246 | struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; | ||
| 247 | struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; | ||
| 248 | int i; | ||
| 249 | |||
| 250 | fxsave->cwd = env->cwd; | ||
| 251 | fxsave->swd = env->swd; | ||
| 252 | fxsave->twd = twd_i387_to_fxsr(env->twd); | ||
| 253 | fxsave->fop = (u16) ((u32) env->fcs >> 16); | ||
| 254 | #ifdef CONFIG_X86_64 | ||
| 255 | fxsave->rip = env->fip; | ||
| 256 | fxsave->rdp = env->foo; | ||
| 257 | /* cs and ds ignored */ | ||
| 258 | #else | ||
| 259 | fxsave->fip = env->fip; | ||
| 260 | fxsave->fcs = (env->fcs & 0xffff); | ||
| 261 | fxsave->foo = env->foo; | ||
| 262 | fxsave->fos = env->fos; | ||
| 263 | #endif | ||
| 264 | |||
| 265 | for (i = 0; i < 8; ++i) | ||
| 266 | memcpy(&to[i], &from[i], sizeof(from[0])); | ||
| 267 | } | ||
| 268 | |||
| 269 | int fpregs_get(struct task_struct *target, const struct user_regset *regset, | ||
| 270 | unsigned int pos, unsigned int count, | ||
| 271 | void *kbuf, void __user *ubuf) | ||
| 272 | { | ||
| 273 | struct fpu *fpu = &target->thread.fpu; | ||
| 274 | struct user_i387_ia32_struct env; | ||
| 275 | |||
| 276 | fpu__activate_fpstate_read(fpu); | ||
| 277 | |||
| 278 | if (!static_cpu_has(X86_FEATURE_FPU)) | ||
| 279 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); | ||
| 280 | |||
| 281 | if (!cpu_has_fxsr) | ||
| 282 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | ||
| 283 | &fpu->state.fsave, 0, | ||
| 284 | -1); | ||
| 285 | |||
| 286 | fpstate_sanitize_xstate(fpu); | ||
| 287 | |||
| 288 | if (kbuf && pos == 0 && count == sizeof(env)) { | ||
| 289 | convert_from_fxsr(kbuf, target); | ||
| 290 | return 0; | ||
| 291 | } | ||
| 292 | |||
| 293 | convert_from_fxsr(&env, target); | ||
| 294 | |||
| 295 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1); | ||
| 296 | } | ||
| 297 | |||
| 298 | int fpregs_set(struct task_struct *target, const struct user_regset *regset, | ||
| 299 | unsigned int pos, unsigned int count, | ||
| 300 | const void *kbuf, const void __user *ubuf) | ||
| 301 | { | ||
| 302 | struct fpu *fpu = &target->thread.fpu; | ||
| 303 | struct user_i387_ia32_struct env; | ||
| 304 | int ret; | ||
| 305 | |||
| 306 | fpu__activate_fpstate_write(fpu); | ||
| 307 | fpstate_sanitize_xstate(fpu); | ||
| 308 | |||
| 309 | if (!static_cpu_has(X86_FEATURE_FPU)) | ||
| 310 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); | ||
| 311 | |||
| 312 | if (!cpu_has_fxsr) | ||
| 313 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, | ||
| 314 | &fpu->state.fsave, 0, | ||
| 315 | -1); | ||
| 316 | |||
| 317 | if (pos > 0 || count < sizeof(env)) | ||
| 318 | convert_from_fxsr(&env, target); | ||
| 319 | |||
| 320 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); | ||
| 321 | if (!ret) | ||
| 322 | convert_to_fxsr(target, &env); | ||
| 323 | |||
| 324 | /* | ||
| 325 | * update the header bit in the xsave header, indicating the | ||
| 326 | * presence of FP. | ||
| 327 | */ | ||
| 328 | if (cpu_has_xsave) | ||
| 329 | fpu->state.xsave.header.xfeatures |= XSTATE_FP; | ||
| 330 | return ret; | ||
| 331 | } | ||
| 332 | |||
| 333 | /* | ||
| 334 | * FPU state for core dumps. | ||
| 335 | * This is only used for a.out dumps now. | ||
| 336 | * It is declared generically using elf_fpregset_t (which is | ||
| 337 | * struct user_i387_struct) but is in fact only used for 32-bit | ||
| 338 | * dumps, so on 64-bit it is really struct user_i387_ia32_struct. | ||
| 339 | */ | ||
| 340 | int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu) | ||
| 341 | { | ||
| 342 | struct task_struct *tsk = current; | ||
| 343 | struct fpu *fpu = &tsk->thread.fpu; | ||
| 344 | int fpvalid; | ||
| 345 | |||
| 346 | fpvalid = fpu->fpstate_active; | ||
| 347 | if (fpvalid) | ||
| 348 | fpvalid = !fpregs_get(tsk, NULL, | ||
| 349 | 0, sizeof(struct user_i387_ia32_struct), | ||
| 350 | ufpu, NULL); | ||
| 351 | |||
| 352 | return fpvalid; | ||
| 353 | } | ||
| 354 | EXPORT_SYMBOL(dump_fpu); | ||
| 355 | |||
| 356 | #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ | ||
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c new file mode 100644 index 000000000000..50ec9af1bd51 --- /dev/null +++ b/arch/x86/kernel/fpu/signal.c | |||
| @@ -0,0 +1,404 @@ | |||
| 1 | /* | ||
| 2 | * FPU signal frame handling routines. | ||
| 3 | */ | ||
| 4 | |||
| 5 | #include <linux/compat.h> | ||
| 6 | #include <linux/cpu.h> | ||
| 7 | |||
| 8 | #include <asm/fpu/internal.h> | ||
| 9 | #include <asm/fpu/signal.h> | ||
| 10 | #include <asm/fpu/regset.h> | ||
| 11 | |||
| 12 | #include <asm/sigframe.h> | ||
| 13 | |||
| 14 | static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; | ||
| 15 | |||
| 16 | /* | ||
| 17 | * Check for the presence of extended state information in the | ||
| 18 | * user fpstate pointer in the sigcontext. | ||
| 19 | */ | ||
| 20 | static inline int check_for_xstate(struct fxregs_state __user *buf, | ||
| 21 | void __user *fpstate, | ||
| 22 | struct _fpx_sw_bytes *fx_sw) | ||
| 23 | { | ||
| 24 | int min_xstate_size = sizeof(struct fxregs_state) + | ||
| 25 | sizeof(struct xstate_header); | ||
| 26 | unsigned int magic2; | ||
| 27 | |||
| 28 | if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) | ||
| 29 | return -1; | ||
| 30 | |||
| 31 | /* Check for the first magic field and other error scenarios. */ | ||
| 32 | if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || | ||
| 33 | fx_sw->xstate_size < min_xstate_size || | ||
| 34 | fx_sw->xstate_size > xstate_size || | ||
| 35 | fx_sw->xstate_size > fx_sw->extended_size) | ||
| 36 | return -1; | ||
| 37 | |||
| 38 | /* | ||
| 39 | * Check for the presence of second magic word at the end of memory | ||
| 40 | * layout. This detects the case where the user just copied the legacy | ||
| 41 | * fpstate layout with out copying the extended state information | ||
| 42 | * in the memory layout. | ||
| 43 | */ | ||
| 44 | if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) | ||
| 45 | || magic2 != FP_XSTATE_MAGIC2) | ||
| 46 | return -1; | ||
| 47 | |||
| 48 | return 0; | ||
| 49 | } | ||
| 50 | |||
| 51 | /* | ||
| 52 | * Signal frame handlers. | ||
| 53 | */ | ||
| 54 | static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) | ||
| 55 | { | ||
| 56 | if (use_fxsr()) { | ||
| 57 | struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; | ||
| 58 | struct user_i387_ia32_struct env; | ||
| 59 | struct _fpstate_ia32 __user *fp = buf; | ||
| 60 | |||
| 61 | convert_from_fxsr(&env, tsk); | ||
| 62 | |||
| 63 | if (__copy_to_user(buf, &env, sizeof(env)) || | ||
| 64 | __put_user(xsave->i387.swd, &fp->status) || | ||
| 65 | __put_user(X86_FXSR_MAGIC, &fp->magic)) | ||
| 66 | return -1; | ||
| 67 | } else { | ||
| 68 | struct fregs_state __user *fp = buf; | ||
| 69 | u32 swd; | ||
| 70 | if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) | ||
| 71 | return -1; | ||
| 72 | } | ||
| 73 | |||
| 74 | return 0; | ||
| 75 | } | ||
| 76 | |||
| 77 | static inline int save_xstate_epilog(void __user *buf, int ia32_frame) | ||
| 78 | { | ||
| 79 | struct xregs_state __user *x = buf; | ||
| 80 | struct _fpx_sw_bytes *sw_bytes; | ||
| 81 | u32 xfeatures; | ||
| 82 | int err; | ||
| 83 | |||
| 84 | /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ | ||
| 85 | sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; | ||
| 86 | err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); | ||
| 87 | |||
| 88 | if (!use_xsave()) | ||
| 89 | return err; | ||
| 90 | |||
| 91 | err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); | ||
| 92 | |||
| 93 | /* | ||
| 94 | * Read the xfeatures which we copied (directly from the cpu or | ||
| 95 | * from the state in task struct) to the user buffers. | ||
| 96 | */ | ||
| 97 | err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures); | ||
| 98 | |||
| 99 | /* | ||
| 100 | * For legacy compatible, we always set FP/SSE bits in the bit | ||
| 101 | * vector while saving the state to the user context. This will | ||
| 102 | * enable us capturing any changes(during sigreturn) to | ||
| 103 | * the FP/SSE bits by the legacy applications which don't touch | ||
| 104 | * xfeatures in the xsave header. | ||
| 105 | * | ||
| 106 | * xsave aware apps can change the xfeatures in the xsave | ||
| 107 | * header as well as change any contents in the memory layout. | ||
| 108 | * xrestore as part of sigreturn will capture all the changes. | ||
| 109 | */ | ||
| 110 | xfeatures |= XSTATE_FPSSE; | ||
| 111 | |||
| 112 | err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures); | ||
| 113 | |||
| 114 | return err; | ||
| 115 | } | ||
| 116 | |||
| 117 | static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) | ||
| 118 | { | ||
| 119 | int err; | ||
| 120 | |||
| 121 | if (use_xsave()) | ||
| 122 | err = copy_xregs_to_user(buf); | ||
| 123 | else if (use_fxsr()) | ||
| 124 | err = copy_fxregs_to_user((struct fxregs_state __user *) buf); | ||
| 125 | else | ||
| 126 | err = copy_fregs_to_user((struct fregs_state __user *) buf); | ||
| 127 | |||
| 128 | if (unlikely(err) && __clear_user(buf, xstate_size)) | ||
| 129 | err = -EFAULT; | ||
| 130 | return err; | ||
| 131 | } | ||
| 132 | |||
| 133 | /* | ||
| 134 | * Save the fpu, extended register state to the user signal frame. | ||
| 135 | * | ||
| 136 | * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save | ||
| 137 | * state is copied. | ||
| 138 | * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. | ||
| 139 | * | ||
| 140 | * buf == buf_fx for 64-bit frames and 32-bit fsave frame. | ||
| 141 | * buf != buf_fx for 32-bit frames with fxstate. | ||
| 142 | * | ||
| 143 | * If the fpu, extended register state is live, save the state directly | ||
| 144 | * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, | ||
| 145 | * copy the thread's fpu state to the user frame starting at 'buf_fx'. | ||
| 146 | * | ||
| 147 | * If this is a 32-bit frame with fxstate, put a fsave header before | ||
| 148 | * the aligned state at 'buf_fx'. | ||
| 149 | * | ||
| 150 | * For [f]xsave state, update the SW reserved fields in the [f]xsave frame | ||
| 151 | * indicating the absence/presence of the extended state to the user. | ||
| 152 | */ | ||
| 153 | int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) | ||
| 154 | { | ||
| 155 | struct xregs_state *xsave = ¤t->thread.fpu.state.xsave; | ||
| 156 | struct task_struct *tsk = current; | ||
| 157 | int ia32_fxstate = (buf != buf_fx); | ||
| 158 | |||
| 159 | ia32_fxstate &= (config_enabled(CONFIG_X86_32) || | ||
| 160 | config_enabled(CONFIG_IA32_EMULATION)); | ||
| 161 | |||
| 162 | if (!access_ok(VERIFY_WRITE, buf, size)) | ||
| 163 | return -EACCES; | ||
| 164 | |||
| 165 | if (!static_cpu_has(X86_FEATURE_FPU)) | ||
| 166 | return fpregs_soft_get(current, NULL, 0, | ||
| 167 | sizeof(struct user_i387_ia32_struct), NULL, | ||
| 168 | (struct _fpstate_ia32 __user *) buf) ? -1 : 1; | ||
| 169 | |||
| 170 | if (fpregs_active()) { | ||
| 171 | /* Save the live register state to the user directly. */ | ||
| 172 | if (copy_fpregs_to_sigframe(buf_fx)) | ||
| 173 | return -1; | ||
| 174 | /* Update the thread's fxstate to save the fsave header. */ | ||
| 175 | if (ia32_fxstate) | ||
| 176 | copy_fxregs_to_kernel(&tsk->thread.fpu); | ||
| 177 | } else { | ||
| 178 | fpstate_sanitize_xstate(&tsk->thread.fpu); | ||
| 179 | if (__copy_to_user(buf_fx, xsave, xstate_size)) | ||
| 180 | return -1; | ||
| 181 | } | ||
| 182 | |||
| 183 | /* Save the fsave header for the 32-bit frames. */ | ||
| 184 | if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) | ||
| 185 | return -1; | ||
| 186 | |||
| 187 | if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) | ||
| 188 | return -1; | ||
| 189 | |||
| 190 | return 0; | ||
| 191 | } | ||
| 192 | |||
| 193 | static inline void | ||
| 194 | sanitize_restored_xstate(struct task_struct *tsk, | ||
| 195 | struct user_i387_ia32_struct *ia32_env, | ||
| 196 | u64 xfeatures, int fx_only) | ||
| 197 | { | ||
| 198 | struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; | ||
| 199 | struct xstate_header *header = &xsave->header; | ||
| 200 | |||
| 201 | if (use_xsave()) { | ||
| 202 | /* These bits must be zero. */ | ||
| 203 | memset(header->reserved, 0, 48); | ||
| 204 | |||
| 205 | /* | ||
| 206 | * Init the state that is not present in the memory | ||
| 207 | * layout and not enabled by the OS. | ||
| 208 | */ | ||
| 209 | if (fx_only) | ||
| 210 | header->xfeatures = XSTATE_FPSSE; | ||
| 211 | else | ||
| 212 | header->xfeatures &= (xfeatures_mask & xfeatures); | ||
| 213 | } | ||
| 214 | |||
| 215 | if (use_fxsr()) { | ||
| 216 | /* | ||
| 217 | * mscsr reserved bits must be masked to zero for security | ||
| 218 | * reasons. | ||
| 219 | */ | ||
| 220 | xsave->i387.mxcsr &= mxcsr_feature_mask; | ||
| 221 | |||
| 222 | convert_to_fxsr(tsk, ia32_env); | ||
| 223 | } | ||
| 224 | } | ||
| 225 | |||
| 226 | /* | ||
| 227 | * Restore the extended state if present. Otherwise, restore the FP/SSE state. | ||
| 228 | */ | ||
| 229 | static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) | ||
| 230 | { | ||
| 231 | if (use_xsave()) { | ||
| 232 | if ((unsigned long)buf % 64 || fx_only) { | ||
| 233 | u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE; | ||
| 234 | copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); | ||
| 235 | return copy_user_to_fxregs(buf); | ||
| 236 | } else { | ||
| 237 | u64 init_bv = xfeatures_mask & ~xbv; | ||
| 238 | if (unlikely(init_bv)) | ||
| 239 | copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); | ||
| 240 | return copy_user_to_xregs(buf, xbv); | ||
| 241 | } | ||
| 242 | } else if (use_fxsr()) { | ||
| 243 | return copy_user_to_fxregs(buf); | ||
| 244 | } else | ||
| 245 | return copy_user_to_fregs(buf); | ||
| 246 | } | ||
| 247 | |||
| 248 | static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) | ||
| 249 | { | ||
| 250 | int ia32_fxstate = (buf != buf_fx); | ||
| 251 | struct task_struct *tsk = current; | ||
| 252 | struct fpu *fpu = &tsk->thread.fpu; | ||
| 253 | int state_size = xstate_size; | ||
| 254 | u64 xfeatures = 0; | ||
| 255 | int fx_only = 0; | ||
| 256 | |||
| 257 | ia32_fxstate &= (config_enabled(CONFIG_X86_32) || | ||
| 258 | config_enabled(CONFIG_IA32_EMULATION)); | ||
| 259 | |||
| 260 | if (!buf) { | ||
| 261 | fpu__clear(fpu); | ||
| 262 | return 0; | ||
| 263 | } | ||
| 264 | |||
| 265 | if (!access_ok(VERIFY_READ, buf, size)) | ||
| 266 | return -EACCES; | ||
| 267 | |||
| 268 | fpu__activate_curr(fpu); | ||
| 269 | |||
| 270 | if (!static_cpu_has(X86_FEATURE_FPU)) | ||
| 271 | return fpregs_soft_set(current, NULL, | ||
| 272 | 0, sizeof(struct user_i387_ia32_struct), | ||
| 273 | NULL, buf) != 0; | ||
| 274 | |||
| 275 | if (use_xsave()) { | ||
| 276 | struct _fpx_sw_bytes fx_sw_user; | ||
| 277 | if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { | ||
| 278 | /* | ||
| 279 | * Couldn't find the extended state information in the | ||
| 280 | * memory layout. Restore just the FP/SSE and init all | ||
| 281 | * the other extended state. | ||
| 282 | */ | ||
| 283 | state_size = sizeof(struct fxregs_state); | ||
| 284 | fx_only = 1; | ||
| 285 | } else { | ||
| 286 | state_size = fx_sw_user.xstate_size; | ||
| 287 | xfeatures = fx_sw_user.xfeatures; | ||
| 288 | } | ||
| 289 | } | ||
| 290 | |||
| 291 | if (ia32_fxstate) { | ||
| 292 | /* | ||
| 293 | * For 32-bit frames with fxstate, copy the user state to the | ||
| 294 | * thread's fpu state, reconstruct fxstate from the fsave | ||
| 295 | * header. Sanitize the copied state etc. | ||
| 296 | */ | ||
| 297 | struct fpu *fpu = &tsk->thread.fpu; | ||
| 298 | struct user_i387_ia32_struct env; | ||
| 299 | int err = 0; | ||
| 300 | |||
| 301 | /* | ||
| 302 | * Drop the current fpu which clears fpu->fpstate_active. This ensures | ||
| 303 | * that any context-switch during the copy of the new state, | ||
| 304 | * avoids the intermediate state from getting restored/saved. | ||
| 305 | * Thus avoiding the new restored state from getting corrupted. | ||
| 306 | * We will be ready to restore/save the state only after | ||
| 307 | * fpu->fpstate_active is again set. | ||
| 308 | */ | ||
| 309 | fpu__drop(fpu); | ||
| 310 | |||
| 311 | if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) || | ||
| 312 | __copy_from_user(&env, buf, sizeof(env))) { | ||
| 313 | fpstate_init(&fpu->state); | ||
| 314 | err = -1; | ||
| 315 | } else { | ||
| 316 | sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); | ||
| 317 | } | ||
| 318 | |||
| 319 | fpu->fpstate_active = 1; | ||
| 320 | if (use_eager_fpu()) { | ||
| 321 | preempt_disable(); | ||
| 322 | fpu__restore(fpu); | ||
| 323 | preempt_enable(); | ||
| 324 | } | ||
| 325 | |||
| 326 | return err; | ||
| 327 | } else { | ||
| 328 | /* | ||
| 329 | * For 64-bit frames and 32-bit fsave frames, restore the user | ||
| 330 | * state to the registers directly (with exceptions handled). | ||
| 331 | */ | ||
| 332 | user_fpu_begin(); | ||
| 333 | if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) { | ||
| 334 | fpu__clear(fpu); | ||
| 335 | return -1; | ||
| 336 | } | ||
| 337 | } | ||
| 338 | |||
| 339 | return 0; | ||
| 340 | } | ||
| 341 | |||
| 342 | static inline int xstate_sigframe_size(void) | ||
| 343 | { | ||
| 344 | return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; | ||
| 345 | } | ||
| 346 | |||
| 347 | /* | ||
| 348 | * Restore FPU state from a sigframe: | ||
| 349 | */ | ||
| 350 | int fpu__restore_sig(void __user *buf, int ia32_frame) | ||
| 351 | { | ||
| 352 | void __user *buf_fx = buf; | ||
| 353 | int size = xstate_sigframe_size(); | ||
| 354 | |||
| 355 | if (ia32_frame && use_fxsr()) { | ||
| 356 | buf_fx = buf + sizeof(struct fregs_state); | ||
| 357 | size += sizeof(struct fregs_state); | ||
| 358 | } | ||
| 359 | |||
| 360 | return __fpu__restore_sig(buf, buf_fx, size); | ||
| 361 | } | ||
| 362 | |||
| 363 | unsigned long | ||
| 364 | fpu__alloc_mathframe(unsigned long sp, int ia32_frame, | ||
| 365 | unsigned long *buf_fx, unsigned long *size) | ||
| 366 | { | ||
| 367 | unsigned long frame_size = xstate_sigframe_size(); | ||
| 368 | |||
| 369 | *buf_fx = sp = round_down(sp - frame_size, 64); | ||
| 370 | if (ia32_frame && use_fxsr()) { | ||
| 371 | frame_size += sizeof(struct fregs_state); | ||
| 372 | sp -= sizeof(struct fregs_state); | ||
| 373 | } | ||
| 374 | |||
| 375 | *size = frame_size; | ||
| 376 | |||
| 377 | return sp; | ||
| 378 | } | ||
| 379 | /* | ||
| 380 | * Prepare the SW reserved portion of the fxsave memory layout, indicating | ||
| 381 | * the presence of the extended state information in the memory layout | ||
| 382 | * pointed by the fpstate pointer in the sigcontext. | ||
| 383 | * This will be saved when ever the FP and extended state context is | ||
| 384 | * saved on the user stack during the signal handler delivery to the user. | ||
| 385 | */ | ||
| 386 | void fpu__init_prepare_fx_sw_frame(void) | ||
| 387 | { | ||
| 388 | int fsave_header_size = sizeof(struct fregs_state); | ||
| 389 | int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; | ||
| 390 | |||
| 391 | if (config_enabled(CONFIG_X86_32)) | ||
| 392 | size += fsave_header_size; | ||
| 393 | |||
| 394 | fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; | ||
| 395 | fx_sw_reserved.extended_size = size; | ||
| 396 | fx_sw_reserved.xfeatures = xfeatures_mask; | ||
| 397 | fx_sw_reserved.xstate_size = xstate_size; | ||
| 398 | |||
| 399 | if (config_enabled(CONFIG_IA32_EMULATION)) { | ||
| 400 | fx_sw_reserved_ia32 = fx_sw_reserved; | ||
| 401 | fx_sw_reserved_ia32.extended_size += fsave_header_size; | ||
| 402 | } | ||
| 403 | } | ||
| 404 | |||
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c new file mode 100644 index 000000000000..62fc001c7846 --- /dev/null +++ b/arch/x86/kernel/fpu/xstate.c | |||
| @@ -0,0 +1,461 @@ | |||
| 1 | /* | ||
| 2 | * xsave/xrstor support. | ||
| 3 | * | ||
| 4 | * Author: Suresh Siddha <suresh.b.siddha@intel.com> | ||
| 5 | */ | ||
| 6 | #include <linux/compat.h> | ||
| 7 | #include <linux/cpu.h> | ||
| 8 | |||
| 9 | #include <asm/fpu/api.h> | ||
| 10 | #include <asm/fpu/internal.h> | ||
| 11 | #include <asm/fpu/signal.h> | ||
| 12 | #include <asm/fpu/regset.h> | ||
| 13 | |||
| 14 | #include <asm/tlbflush.h> | ||
| 15 | |||
| 16 | static const char *xfeature_names[] = | ||
| 17 | { | ||
| 18 | "x87 floating point registers" , | ||
| 19 | "SSE registers" , | ||
| 20 | "AVX registers" , | ||
| 21 | "MPX bounds registers" , | ||
| 22 | "MPX CSR" , | ||
| 23 | "AVX-512 opmask" , | ||
| 24 | "AVX-512 Hi256" , | ||
| 25 | "AVX-512 ZMM_Hi256" , | ||
| 26 | "unknown xstate feature" , | ||
| 27 | }; | ||
| 28 | |||
| 29 | /* | ||
| 30 | * Mask of xstate features supported by the CPU and the kernel: | ||
| 31 | */ | ||
| 32 | u64 xfeatures_mask __read_mostly; | ||
| 33 | |||
| 34 | static unsigned int xstate_offsets[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1}; | ||
| 35 | static unsigned int xstate_sizes[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1}; | ||
| 36 | static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; | ||
| 37 | |||
| 38 | /* The number of supported xfeatures in xfeatures_mask: */ | ||
| 39 | static unsigned int xfeatures_nr; | ||
| 40 | |||
| 41 | /* | ||
| 42 | * Return whether the system supports a given xfeature. | ||
| 43 | * | ||
| 44 | * Also return the name of the (most advanced) feature that the caller requested: | ||
| 45 | */ | ||
| 46 | int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) | ||
| 47 | { | ||
| 48 | u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask; | ||
| 49 | |||
| 50 | if (unlikely(feature_name)) { | ||
| 51 | long xfeature_idx, max_idx; | ||
| 52 | u64 xfeatures_print; | ||
| 53 | /* | ||
| 54 | * So we use FLS here to be able to print the most advanced | ||
| 55 | * feature that was requested but is missing. So if a driver | ||
| 56 | * asks about "XSTATE_SSE | XSTATE_YMM" we'll print the | ||
| 57 | * missing AVX feature - this is the most informative message | ||
| 58 | * to users: | ||
| 59 | */ | ||
| 60 | if (xfeatures_missing) | ||
| 61 | xfeatures_print = xfeatures_missing; | ||
| 62 | else | ||
| 63 | xfeatures_print = xfeatures_needed; | ||
| 64 | |||
| 65 | xfeature_idx = fls64(xfeatures_print)-1; | ||
| 66 | max_idx = ARRAY_SIZE(xfeature_names)-1; | ||
| 67 | xfeature_idx = min(xfeature_idx, max_idx); | ||
| 68 | |||
| 69 | *feature_name = xfeature_names[xfeature_idx]; | ||
| 70 | } | ||
| 71 | |||
| 72 | if (xfeatures_missing) | ||
| 73 | return 0; | ||
| 74 | |||
| 75 | return 1; | ||
| 76 | } | ||
| 77 | EXPORT_SYMBOL_GPL(cpu_has_xfeatures); | ||
| 78 | |||
| 79 | /* | ||
| 80 | * When executing XSAVEOPT (or other optimized XSAVE instructions), if | ||
| 81 | * a processor implementation detects that an FPU state component is still | ||
| 82 | * (or is again) in its initialized state, it may clear the corresponding | ||
| 83 | * bit in the header.xfeatures field, and can skip the writeout of registers | ||
| 84 | * to the corresponding memory layout. | ||
| 85 | * | ||
| 86 | * This means that when the bit is zero, the state component might still contain | ||
| 87 | * some previous - non-initialized register state. | ||
| 88 | * | ||
| 89 | * Before writing xstate information to user-space we sanitize those components, | ||
| 90 | * to always ensure that the memory layout of a feature will be in the init state | ||
| 91 | * if the corresponding header bit is zero. This is to ensure that user-space doesn't | ||
| 92 | * see some stale state in the memory layout during signal handling, debugging etc. | ||
| 93 | */ | ||
| 94 | void fpstate_sanitize_xstate(struct fpu *fpu) | ||
| 95 | { | ||
| 96 | struct fxregs_state *fx = &fpu->state.fxsave; | ||
| 97 | int feature_bit; | ||
| 98 | u64 xfeatures; | ||
| 99 | |||
| 100 | if (!use_xsaveopt()) | ||
| 101 | return; | ||
| 102 | |||
| 103 | xfeatures = fpu->state.xsave.header.xfeatures; | ||
| 104 | |||
| 105 | /* | ||
| 106 | * None of the feature bits are in init state. So nothing else | ||
| 107 | * to do for us, as the memory layout is up to date. | ||
| 108 | */ | ||
| 109 | if ((xfeatures & xfeatures_mask) == xfeatures_mask) | ||
| 110 | return; | ||
| 111 | |||
| 112 | /* | ||
| 113 | * FP is in init state | ||
| 114 | */ | ||
| 115 | if (!(xfeatures & XSTATE_FP)) { | ||
| 116 | fx->cwd = 0x37f; | ||
| 117 | fx->swd = 0; | ||
| 118 | fx->twd = 0; | ||
| 119 | fx->fop = 0; | ||
| 120 | fx->rip = 0; | ||
| 121 | fx->rdp = 0; | ||
| 122 | memset(&fx->st_space[0], 0, 128); | ||
| 123 | } | ||
| 124 | |||
| 125 | /* | ||
| 126 | * SSE is in init state | ||
| 127 | */ | ||
| 128 | if (!(xfeatures & XSTATE_SSE)) | ||
| 129 | memset(&fx->xmm_space[0], 0, 256); | ||
| 130 | |||
| 131 | /* | ||
| 132 | * First two features are FPU and SSE, which above we handled | ||
| 133 | * in a special way already: | ||
| 134 | */ | ||
| 135 | feature_bit = 0x2; | ||
| 136 | xfeatures = (xfeatures_mask & ~xfeatures) >> 2; | ||
| 137 | |||
| 138 | /* | ||
| 139 | * Update all the remaining memory layouts according to their | ||
| 140 | * standard xstate layout, if their header bit is in the init | ||
| 141 | * state: | ||
| 142 | */ | ||
| 143 | while (xfeatures) { | ||
| 144 | if (xfeatures & 0x1) { | ||
| 145 | int offset = xstate_offsets[feature_bit]; | ||
| 146 | int size = xstate_sizes[feature_bit]; | ||
| 147 | |||
| 148 | memcpy((void *)fx + offset, | ||
| 149 | (void *)&init_fpstate.xsave + offset, | ||
| 150 | size); | ||
| 151 | } | ||
| 152 | |||
| 153 | xfeatures >>= 1; | ||
| 154 | feature_bit++; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | /* | ||
| 159 | * Enable the extended processor state save/restore feature. | ||
| 160 | * Called once per CPU onlining. | ||
| 161 | */ | ||
| 162 | void fpu__init_cpu_xstate(void) | ||
| 163 | { | ||
| 164 | if (!cpu_has_xsave || !xfeatures_mask) | ||
| 165 | return; | ||
| 166 | |||
| 167 | cr4_set_bits(X86_CR4_OSXSAVE); | ||
| 168 | xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); | ||
| 169 | } | ||
| 170 | |||
| 171 | /* | ||
| 172 | * Record the offsets and sizes of various xstates contained | ||
| 173 | * in the XSAVE state memory layout. | ||
| 174 | * | ||
| 175 | * ( Note that certain features might be non-present, for them | ||
| 176 | * we'll have 0 offset and 0 size. ) | ||
| 177 | */ | ||
| 178 | static void __init setup_xstate_features(void) | ||
| 179 | { | ||
| 180 | u32 eax, ebx, ecx, edx, leaf; | ||
| 181 | |||
| 182 | xfeatures_nr = fls64(xfeatures_mask); | ||
| 183 | |||
| 184 | for (leaf = 2; leaf < xfeatures_nr; leaf++) { | ||
| 185 | cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); | ||
| 186 | |||
| 187 | xstate_offsets[leaf] = ebx; | ||
| 188 | xstate_sizes[leaf] = eax; | ||
| 189 | |||
| 190 | printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %04x, xstate_sizes[%d]: %04x\n", leaf, ebx, leaf, eax); | ||
| 191 | } | ||
| 192 | } | ||
| 193 | |||
| 194 | static void __init print_xstate_feature(u64 xstate_mask) | ||
| 195 | { | ||
| 196 | const char *feature_name; | ||
| 197 | |||
| 198 | if (cpu_has_xfeatures(xstate_mask, &feature_name)) | ||
| 199 | pr_info("x86/fpu: Supporting XSAVE feature 0x%02Lx: '%s'\n", xstate_mask, feature_name); | ||
| 200 | } | ||
| 201 | |||
| 202 | /* | ||
| 203 | * Print out all the supported xstate features: | ||
| 204 | */ | ||
| 205 | static void __init print_xstate_features(void) | ||
| 206 | { | ||
| 207 | print_xstate_feature(XSTATE_FP); | ||
| 208 | print_xstate_feature(XSTATE_SSE); | ||
| 209 | print_xstate_feature(XSTATE_YMM); | ||
| 210 | print_xstate_feature(XSTATE_BNDREGS); | ||
| 211 | print_xstate_feature(XSTATE_BNDCSR); | ||
| 212 | print_xstate_feature(XSTATE_OPMASK); | ||
| 213 | print_xstate_feature(XSTATE_ZMM_Hi256); | ||
| 214 | print_xstate_feature(XSTATE_Hi16_ZMM); | ||
| 215 | } | ||
| 216 | |||
| 217 | /* | ||
| 218 | * This function sets up offsets and sizes of all extended states in | ||
| 219 | * xsave area. This supports both standard format and compacted format | ||
| 220 | * of the xsave aread. | ||
| 221 | */ | ||
| 222 | static void __init setup_xstate_comp(void) | ||
| 223 | { | ||
| 224 | unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8]; | ||
| 225 | int i; | ||
| 226 | |||
| 227 | /* | ||
| 228 | * The FP xstates and SSE xstates are legacy states. They are always | ||
| 229 | * in the fixed offsets in the xsave area in either compacted form | ||
| 230 | * or standard form. | ||
| 231 | */ | ||
| 232 | xstate_comp_offsets[0] = 0; | ||
| 233 | xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space); | ||
| 234 | |||
| 235 | if (!cpu_has_xsaves) { | ||
| 236 | for (i = 2; i < xfeatures_nr; i++) { | ||
| 237 | if (test_bit(i, (unsigned long *)&xfeatures_mask)) { | ||
| 238 | xstate_comp_offsets[i] = xstate_offsets[i]; | ||
| 239 | xstate_comp_sizes[i] = xstate_sizes[i]; | ||
| 240 | } | ||
| 241 | } | ||
| 242 | return; | ||
| 243 | } | ||
| 244 | |||
| 245 | xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; | ||
| 246 | |||
| 247 | for (i = 2; i < xfeatures_nr; i++) { | ||
| 248 | if (test_bit(i, (unsigned long *)&xfeatures_mask)) | ||
| 249 | xstate_comp_sizes[i] = xstate_sizes[i]; | ||
| 250 | else | ||
| 251 | xstate_comp_sizes[i] = 0; | ||
| 252 | |||
| 253 | if (i > 2) | ||
| 254 | xstate_comp_offsets[i] = xstate_comp_offsets[i-1] | ||
| 255 | + xstate_comp_sizes[i-1]; | ||
| 256 | |||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | /* | ||
| 261 | * setup the xstate image representing the init state | ||
| 262 | */ | ||
| 263 | static void __init setup_init_fpu_buf(void) | ||
| 264 | { | ||
| 265 | static int on_boot_cpu = 1; | ||
| 266 | |||
| 267 | WARN_ON_FPU(!on_boot_cpu); | ||
| 268 | on_boot_cpu = 0; | ||
| 269 | |||
| 270 | if (!cpu_has_xsave) | ||
| 271 | return; | ||
| 272 | |||
| 273 | setup_xstate_features(); | ||
| 274 | print_xstate_features(); | ||
| 275 | |||
| 276 | if (cpu_has_xsaves) { | ||
| 277 | init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; | ||
| 278 | init_fpstate.xsave.header.xfeatures = xfeatures_mask; | ||
| 279 | } | ||
| 280 | |||
| 281 | /* | ||
| 282 | * Init all the features state with header_bv being 0x0 | ||
| 283 | */ | ||
| 284 | copy_kernel_to_xregs_booting(&init_fpstate.xsave); | ||
| 285 | |||
| 286 | /* | ||
| 287 | * Dump the init state again. This is to identify the init state | ||
| 288 | * of any feature which is not represented by all zero's. | ||
| 289 | */ | ||
| 290 | copy_xregs_to_kernel_booting(&init_fpstate.xsave); | ||
| 291 | } | ||
| 292 | |||
| 293 | /* | ||
| 294 | * Calculate total size of enabled xstates in XCR0/xfeatures_mask. | ||
| 295 | */ | ||
| 296 | static void __init init_xstate_size(void) | ||
| 297 | { | ||
| 298 | unsigned int eax, ebx, ecx, edx; | ||
| 299 | int i; | ||
| 300 | |||
| 301 | if (!cpu_has_xsaves) { | ||
| 302 | cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); | ||
| 303 | xstate_size = ebx; | ||
| 304 | return; | ||
| 305 | } | ||
| 306 | |||
| 307 | xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; | ||
| 308 | for (i = 2; i < 64; i++) { | ||
| 309 | if (test_bit(i, (unsigned long *)&xfeatures_mask)) { | ||
| 310 | cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); | ||
| 311 | xstate_size += eax; | ||
| 312 | } | ||
| 313 | } | ||
| 314 | } | ||
| 315 | |||
| 316 | /* | ||
| 317 | * Enable and initialize the xsave feature. | ||
| 318 | * Called once per system bootup. | ||
| 319 | */ | ||
| 320 | void __init fpu__init_system_xstate(void) | ||
| 321 | { | ||
| 322 | unsigned int eax, ebx, ecx, edx; | ||
| 323 | static int on_boot_cpu = 1; | ||
| 324 | |||
| 325 | WARN_ON_FPU(!on_boot_cpu); | ||
| 326 | on_boot_cpu = 0; | ||
| 327 | |||
| 328 | if (!cpu_has_xsave) { | ||
| 329 | pr_info("x86/fpu: Legacy x87 FPU detected.\n"); | ||
| 330 | return; | ||
| 331 | } | ||
| 332 | |||
| 333 | if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { | ||
| 334 | WARN_ON_FPU(1); | ||
| 335 | return; | ||
| 336 | } | ||
| 337 | |||
| 338 | cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); | ||
| 339 | xfeatures_mask = eax + ((u64)edx << 32); | ||
| 340 | |||
| 341 | if ((xfeatures_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { | ||
| 342 | pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); | ||
| 343 | BUG(); | ||
| 344 | } | ||
| 345 | |||
| 346 | /* Support only the state known to the OS: */ | ||
| 347 | xfeatures_mask = xfeatures_mask & XCNTXT_MASK; | ||
| 348 | |||
| 349 | /* Enable xstate instructions to be able to continue with initialization: */ | ||
| 350 | fpu__init_cpu_xstate(); | ||
| 351 | |||
| 352 | /* Recompute the context size for enabled features: */ | ||
| 353 | init_xstate_size(); | ||
| 354 | |||
| 355 | update_regset_xstate_info(xstate_size, xfeatures_mask); | ||
| 356 | fpu__init_prepare_fx_sw_frame(); | ||
| 357 | setup_init_fpu_buf(); | ||
| 358 | setup_xstate_comp(); | ||
| 359 | |||
| 360 | pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n", | ||
| 361 | xfeatures_mask, | ||
| 362 | xstate_size, | ||
| 363 | cpu_has_xsaves ? "compacted" : "standard"); | ||
| 364 | } | ||
| 365 | |||
| 366 | /* | ||
| 367 | * Restore minimal FPU state after suspend: | ||
| 368 | */ | ||
| 369 | void fpu__resume_cpu(void) | ||
| 370 | { | ||
| 371 | /* | ||
| 372 | * Restore XCR0 on xsave capable CPUs: | ||
| 373 | */ | ||
| 374 | if (cpu_has_xsave) | ||
| 375 | xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); | ||
| 376 | } | ||
| 377 | |||
| 378 | /* | ||
| 379 | * Given the xsave area and a state inside, this function returns the | ||
| 380 | * address of the state. | ||
| 381 | * | ||
| 382 | * This is the API that is called to get xstate address in either | ||
| 383 | * standard format or compacted format of xsave area. | ||
| 384 | * | ||
| 385 | * Note that if there is no data for the field in the xsave buffer | ||
| 386 | * this will return NULL. | ||
| 387 | * | ||
| 388 | * Inputs: | ||
| 389 | * xstate: the thread's storage area for all FPU data | ||
| 390 | * xstate_feature: state which is defined in xsave.h (e.g. | ||
| 391 | * XSTATE_FP, XSTATE_SSE, etc...) | ||
| 392 | * Output: | ||
| 393 | * address of the state in the xsave area, or NULL if the | ||
| 394 | * field is not present in the xsave buffer. | ||
| 395 | */ | ||
| 396 | void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) | ||
| 397 | { | ||
| 398 | int feature_nr = fls64(xstate_feature) - 1; | ||
| 399 | /* | ||
| 400 | * Do we even *have* xsave state? | ||
| 401 | */ | ||
| 402 | if (!boot_cpu_has(X86_FEATURE_XSAVE)) | ||
| 403 | return NULL; | ||
| 404 | |||
| 405 | xsave = ¤t->thread.fpu.state.xsave; | ||
| 406 | /* | ||
| 407 | * We should not ever be requesting features that we | ||
| 408 | * have not enabled. Remember that pcntxt_mask is | ||
| 409 | * what we write to the XCR0 register. | ||
| 410 | */ | ||
| 411 | WARN_ONCE(!(xfeatures_mask & xstate_feature), | ||
| 412 | "get of unsupported state"); | ||
| 413 | /* | ||
| 414 | * This assumes the last 'xsave*' instruction to | ||
| 415 | * have requested that 'xstate_feature' be saved. | ||
| 416 | * If it did not, we might be seeing and old value | ||
| 417 | * of the field in the buffer. | ||
| 418 | * | ||
| 419 | * This can happen because the last 'xsave' did not | ||
| 420 | * request that this feature be saved (unlikely) | ||
| 421 | * or because the "init optimization" caused it | ||
| 422 | * to not be saved. | ||
| 423 | */ | ||
| 424 | if (!(xsave->header.xfeatures & xstate_feature)) | ||
| 425 | return NULL; | ||
| 426 | |||
| 427 | return (void *)xsave + xstate_comp_offsets[feature_nr]; | ||
| 428 | } | ||
| 429 | EXPORT_SYMBOL_GPL(get_xsave_addr); | ||
| 430 | |||
| 431 | /* | ||
| 432 | * This wraps up the common operations that need to occur when retrieving | ||
| 433 | * data from xsave state. It first ensures that the current task was | ||
| 434 | * using the FPU and retrieves the data in to a buffer. It then calculates | ||
| 435 | * the offset of the requested field in the buffer. | ||
| 436 | * | ||
| 437 | * This function is safe to call whether the FPU is in use or not. | ||
| 438 | * | ||
| 439 | * Note that this only works on the current task. | ||
| 440 | * | ||
| 441 | * Inputs: | ||
| 442 | * @xsave_state: state which is defined in xsave.h (e.g. XSTATE_FP, | ||
| 443 | * XSTATE_SSE, etc...) | ||
| 444 | * Output: | ||
| 445 | * address of the state in the xsave area or NULL if the state | ||
| 446 | * is not present or is in its 'init state'. | ||
| 447 | */ | ||
| 448 | const void *get_xsave_field_ptr(int xsave_state) | ||
| 449 | { | ||
| 450 | struct fpu *fpu = ¤t->thread.fpu; | ||
| 451 | |||
| 452 | if (!fpu->fpstate_active) | ||
| 453 | return NULL; | ||
| 454 | /* | ||
| 455 | * fpu__save() takes the CPU's xstate registers | ||
| 456 | * and saves them off to the 'fpu memory buffer. | ||
| 457 | */ | ||
| 458 | fpu__save(fpu); | ||
| 459 | |||
| 460 | return get_xsave_addr(&fpu->state.xsave, xsave_state); | ||
| 461 | } | ||
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c deleted file mode 100644 index 6185d3141219..000000000000 --- a/arch/x86/kernel/i387.c +++ /dev/null | |||
| @@ -1,671 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) 1994 Linus Torvalds | ||
| 3 | * | ||
| 4 | * Pentium III FXSR, SSE support | ||
| 5 | * General FPU state handling cleanups | ||
| 6 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
| 7 | */ | ||
| 8 | #include <linux/module.h> | ||
| 9 | #include <linux/regset.h> | ||
| 10 | #include <linux/sched.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | |||
| 13 | #include <asm/sigcontext.h> | ||
| 14 | #include <asm/processor.h> | ||
| 15 | #include <asm/math_emu.h> | ||
| 16 | #include <asm/tlbflush.h> | ||
| 17 | #include <asm/uaccess.h> | ||
| 18 | #include <asm/ptrace.h> | ||
| 19 | #include <asm/i387.h> | ||
| 20 | #include <asm/fpu-internal.h> | ||
| 21 | #include <asm/user.h> | ||
| 22 | |||
| 23 | static DEFINE_PER_CPU(bool, in_kernel_fpu); | ||
| 24 | |||
| 25 | void kernel_fpu_disable(void) | ||
| 26 | { | ||
| 27 | WARN_ON(this_cpu_read(in_kernel_fpu)); | ||
| 28 | this_cpu_write(in_kernel_fpu, true); | ||
| 29 | } | ||
| 30 | |||
| 31 | void kernel_fpu_enable(void) | ||
| 32 | { | ||
| 33 | this_cpu_write(in_kernel_fpu, false); | ||
| 34 | } | ||
| 35 | |||
| 36 | /* | ||
| 37 | * Were we in an interrupt that interrupted kernel mode? | ||
| 38 | * | ||
| 39 | * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that | ||
| 40 | * pair does nothing at all: the thread must not have fpu (so | ||
| 41 | * that we don't try to save the FPU state), and TS must | ||
| 42 | * be set (so that the clts/stts pair does nothing that is | ||
| 43 | * visible in the interrupted kernel thread). | ||
| 44 | * | ||
| 45 | * Except for the eagerfpu case when we return true; in the likely case | ||
| 46 | * the thread has FPU but we are not going to set/clear TS. | ||
| 47 | */ | ||
| 48 | static inline bool interrupted_kernel_fpu_idle(void) | ||
| 49 | { | ||
| 50 | if (this_cpu_read(in_kernel_fpu)) | ||
| 51 | return false; | ||
| 52 | |||
| 53 | if (use_eager_fpu()) | ||
| 54 | return true; | ||
| 55 | |||
| 56 | return !__thread_has_fpu(current) && | ||
| 57 | (read_cr0() & X86_CR0_TS); | ||
| 58 | } | ||
| 59 | |||
| 60 | /* | ||
| 61 | * Were we in user mode (or vm86 mode) when we were | ||
| 62 | * interrupted? | ||
| 63 | * | ||
| 64 | * Doing kernel_fpu_begin/end() is ok if we are running | ||
| 65 | * in an interrupt context from user mode - we'll just | ||
| 66 | * save the FPU state as required. | ||
| 67 | */ | ||
| 68 | static inline bool interrupted_user_mode(void) | ||
| 69 | { | ||
| 70 | struct pt_regs *regs = get_irq_regs(); | ||
| 71 | return regs && user_mode(regs); | ||
| 72 | } | ||
| 73 | |||
| 74 | /* | ||
| 75 | * Can we use the FPU in kernel mode with the | ||
| 76 | * whole "kernel_fpu_begin/end()" sequence? | ||
| 77 | * | ||
| 78 | * It's always ok in process context (ie "not interrupt") | ||
| 79 | * but it is sometimes ok even from an irq. | ||
| 80 | */ | ||
| 81 | bool irq_fpu_usable(void) | ||
| 82 | { | ||
| 83 | return !in_interrupt() || | ||
| 84 | interrupted_user_mode() || | ||
| 85 | interrupted_kernel_fpu_idle(); | ||
| 86 | } | ||
| 87 | EXPORT_SYMBOL(irq_fpu_usable); | ||
| 88 | |||
| 89 | void __kernel_fpu_begin(void) | ||
| 90 | { | ||
| 91 | struct task_struct *me = current; | ||
| 92 | |||
| 93 | this_cpu_write(in_kernel_fpu, true); | ||
| 94 | |||
| 95 | if (__thread_has_fpu(me)) { | ||
| 96 | __save_init_fpu(me); | ||
| 97 | } else { | ||
| 98 | this_cpu_write(fpu_owner_task, NULL); | ||
| 99 | if (!use_eager_fpu()) | ||
| 100 | clts(); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | EXPORT_SYMBOL(__kernel_fpu_begin); | ||
| 104 | |||
| 105 | void __kernel_fpu_end(void) | ||
| 106 | { | ||
| 107 | struct task_struct *me = current; | ||
| 108 | |||
| 109 | if (__thread_has_fpu(me)) { | ||
| 110 | if (WARN_ON(restore_fpu_checking(me))) | ||
| 111 | fpu_reset_state(me); | ||
| 112 | } else if (!use_eager_fpu()) { | ||
| 113 | stts(); | ||
| 114 | } | ||
| 115 | |||
| 116 | this_cpu_write(in_kernel_fpu, false); | ||
| 117 | } | ||
| 118 | EXPORT_SYMBOL(__kernel_fpu_end); | ||
| 119 | |||
| 120 | void unlazy_fpu(struct task_struct *tsk) | ||
| 121 | { | ||
| 122 | preempt_disable(); | ||
| 123 | if (__thread_has_fpu(tsk)) { | ||
| 124 | if (use_eager_fpu()) { | ||
| 125 | __save_fpu(tsk); | ||
| 126 | } else { | ||
| 127 | __save_init_fpu(tsk); | ||
| 128 | __thread_fpu_end(tsk); | ||
| 129 | } | ||
| 130 | } | ||
| 131 | preempt_enable(); | ||
| 132 | } | ||
| 133 | EXPORT_SYMBOL(unlazy_fpu); | ||
| 134 | |||
| 135 | unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; | ||
| 136 | unsigned int xstate_size; | ||
| 137 | EXPORT_SYMBOL_GPL(xstate_size); | ||
| 138 | static struct i387_fxsave_struct fx_scratch; | ||
| 139 | |||
| 140 | static void mxcsr_feature_mask_init(void) | ||
| 141 | { | ||
| 142 | unsigned long mask = 0; | ||
| 143 | |||
| 144 | if (cpu_has_fxsr) { | ||
| 145 | memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); | ||
| 146 | asm volatile("fxsave %0" : "+m" (fx_scratch)); | ||
| 147 | mask = fx_scratch.mxcsr_mask; | ||
| 148 | if (mask == 0) | ||
| 149 | mask = 0x0000ffbf; | ||
| 150 | } | ||
| 151 | mxcsr_feature_mask &= mask; | ||
| 152 | } | ||
| 153 | |||
| 154 | static void init_thread_xstate(void) | ||
| 155 | { | ||
| 156 | /* | ||
| 157 | * Note that xstate_size might be overwriten later during | ||
| 158 | * xsave_init(). | ||
| 159 | */ | ||
| 160 | |||
| 161 | if (!cpu_has_fpu) { | ||
| 162 | /* | ||
| 163 | * Disable xsave as we do not support it if i387 | ||
| 164 | * emulation is enabled. | ||
| 165 | */ | ||
| 166 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | ||
| 167 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | ||
| 168 | xstate_size = sizeof(struct i387_soft_struct); | ||
| 169 | return; | ||
| 170 | } | ||
| 171 | |||
| 172 | if (cpu_has_fxsr) | ||
| 173 | xstate_size = sizeof(struct i387_fxsave_struct); | ||
| 174 | else | ||
| 175 | xstate_size = sizeof(struct i387_fsave_struct); | ||
| 176 | |||
| 177 | /* | ||
| 178 | * Quirk: we don't yet handle the XSAVES* instructions | ||
| 179 | * correctly, as we don't correctly convert between | ||
| 180 | * standard and compacted format when interfacing | ||
| 181 | * with user-space - so disable it for now. | ||
| 182 | * | ||
| 183 | * The difference is small: with recent CPUs the | ||
| 184 | * compacted format is only marginally smaller than | ||
| 185 | * the standard FPU state format. | ||
| 186 | * | ||
| 187 | * ( This is easy to backport while we are fixing | ||
| 188 | * XSAVES* support. ) | ||
| 189 | */ | ||
| 190 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | ||
| 191 | } | ||
| 192 | |||
| 193 | /* | ||
| 194 | * Called at bootup to set up the initial FPU state that is later cloned | ||
| 195 | * into all processes. | ||
| 196 | */ | ||
| 197 | |||
| 198 | void fpu_init(void) | ||
| 199 | { | ||
| 200 | unsigned long cr0; | ||
| 201 | unsigned long cr4_mask = 0; | ||
| 202 | |||
| 203 | #ifndef CONFIG_MATH_EMULATION | ||
| 204 | if (!cpu_has_fpu) { | ||
| 205 | pr_emerg("No FPU found and no math emulation present\n"); | ||
| 206 | pr_emerg("Giving up\n"); | ||
| 207 | for (;;) | ||
| 208 | asm volatile("hlt"); | ||
| 209 | } | ||
| 210 | #endif | ||
| 211 | if (cpu_has_fxsr) | ||
| 212 | cr4_mask |= X86_CR4_OSFXSR; | ||
| 213 | if (cpu_has_xmm) | ||
| 214 | cr4_mask |= X86_CR4_OSXMMEXCPT; | ||
| 215 | if (cr4_mask) | ||
| 216 | cr4_set_bits(cr4_mask); | ||
| 217 | |||
| 218 | cr0 = read_cr0(); | ||
| 219 | cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ | ||
| 220 | if (!cpu_has_fpu) | ||
| 221 | cr0 |= X86_CR0_EM; | ||
| 222 | write_cr0(cr0); | ||
| 223 | |||
| 224 | /* | ||
| 225 | * init_thread_xstate is only called once to avoid overriding | ||
| 226 | * xstate_size during boot time or during CPU hotplug. | ||
| 227 | */ | ||
| 228 | if (xstate_size == 0) | ||
| 229 | init_thread_xstate(); | ||
| 230 | |||
| 231 | mxcsr_feature_mask_init(); | ||
| 232 | xsave_init(); | ||
| 233 | eager_fpu_init(); | ||
| 234 | } | ||
| 235 | |||
| 236 | void fpu_finit(struct fpu *fpu) | ||
| 237 | { | ||
| 238 | if (!cpu_has_fpu) { | ||
| 239 | finit_soft_fpu(&fpu->state->soft); | ||
| 240 | return; | ||
| 241 | } | ||
| 242 | |||
| 243 | memset(fpu->state, 0, xstate_size); | ||
| 244 | |||
| 245 | if (cpu_has_fxsr) { | ||
| 246 | fx_finit(&fpu->state->fxsave); | ||
| 247 | } else { | ||
| 248 | struct i387_fsave_struct *fp = &fpu->state->fsave; | ||
| 249 | fp->cwd = 0xffff037fu; | ||
| 250 | fp->swd = 0xffff0000u; | ||
| 251 | fp->twd = 0xffffffffu; | ||
| 252 | fp->fos = 0xffff0000u; | ||
| 253 | } | ||
| 254 | } | ||
| 255 | EXPORT_SYMBOL_GPL(fpu_finit); | ||
| 256 | |||
| 257 | /* | ||
| 258 | * The _current_ task is using the FPU for the first time | ||
| 259 | * so initialize it and set the mxcsr to its default | ||
| 260 | * value at reset if we support XMM instructions and then | ||
| 261 | * remember the current task has used the FPU. | ||
| 262 | */ | ||
| 263 | int init_fpu(struct task_struct *tsk) | ||
| 264 | { | ||
| 265 | int ret; | ||
| 266 | |||
| 267 | if (tsk_used_math(tsk)) { | ||
| 268 | if (cpu_has_fpu && tsk == current) | ||
| 269 | unlazy_fpu(tsk); | ||
| 270 | task_disable_lazy_fpu_restore(tsk); | ||
| 271 | return 0; | ||
| 272 | } | ||
| 273 | |||
| 274 | /* | ||
| 275 | * Memory allocation at the first usage of the FPU and other state. | ||
| 276 | */ | ||
| 277 | ret = fpu_alloc(&tsk->thread.fpu); | ||
| 278 | if (ret) | ||
| 279 | return ret; | ||
| 280 | |||
| 281 | fpu_finit(&tsk->thread.fpu); | ||
| 282 | |||
| 283 | set_stopped_child_used_math(tsk); | ||
| 284 | return 0; | ||
| 285 | } | ||
| 286 | EXPORT_SYMBOL_GPL(init_fpu); | ||
| 287 | |||
| 288 | /* | ||
| 289 | * The xstateregs_active() routine is the same as the fpregs_active() routine, | ||
| 290 | * as the "regset->n" for the xstate regset will be updated based on the feature | ||
| 291 | * capabilites supported by the xsave. | ||
| 292 | */ | ||
| 293 | int fpregs_active(struct task_struct *target, const struct user_regset *regset) | ||
| 294 | { | ||
| 295 | return tsk_used_math(target) ? regset->n : 0; | ||
| 296 | } | ||
| 297 | |||
| 298 | int xfpregs_active(struct task_struct *target, const struct user_regset *regset) | ||
| 299 | { | ||
| 300 | return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0; | ||
| 301 | } | ||
| 302 | |||
| 303 | int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | ||
| 304 | unsigned int pos, unsigned int count, | ||
| 305 | void *kbuf, void __user *ubuf) | ||
| 306 | { | ||
| 307 | int ret; | ||
| 308 | |||
| 309 | if (!cpu_has_fxsr) | ||
| 310 | return -ENODEV; | ||
| 311 | |||
| 312 | ret = init_fpu(target); | ||
| 313 | if (ret) | ||
| 314 | return ret; | ||
| 315 | |||
| 316 | sanitize_i387_state(target); | ||
| 317 | |||
| 318 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | ||
| 319 | &target->thread.fpu.state->fxsave, 0, -1); | ||
| 320 | } | ||
| 321 | |||
| 322 | int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | ||
| 323 | unsigned int pos, unsigned int count, | ||
| 324 | const void *kbuf, const void __user *ubuf) | ||
| 325 | { | ||
| 326 | int ret; | ||
| 327 | |||
| 328 | if (!cpu_has_fxsr) | ||
| 329 | return -ENODEV; | ||
| 330 | |||
| 331 | ret = init_fpu(target); | ||
| 332 | if (ret) | ||
| 333 | return ret; | ||
| 334 | |||
| 335 | sanitize_i387_state(target); | ||
| 336 | |||
| 337 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | ||
| 338 | &target->thread.fpu.state->fxsave, 0, -1); | ||
| 339 | |||
| 340 | /* | ||
| 341 | * mxcsr reserved bits must be masked to zero for security reasons. | ||
| 342 | */ | ||
| 343 | target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; | ||
| 344 | |||
| 345 | /* | ||
| 346 | * update the header bits in the xsave header, indicating the | ||
| 347 | * presence of FP and SSE state. | ||
| 348 | */ | ||
| 349 | if (cpu_has_xsave) | ||
| 350 | target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; | ||
| 351 | |||
| 352 | return ret; | ||
| 353 | } | ||
| 354 | |||
| 355 | int xstateregs_get(struct task_struct *target, const struct user_regset *regset, | ||
| 356 | unsigned int pos, unsigned int count, | ||
| 357 | void *kbuf, void __user *ubuf) | ||
| 358 | { | ||
| 359 | struct xsave_struct *xsave; | ||
| 360 | int ret; | ||
| 361 | |||
| 362 | if (!cpu_has_xsave) | ||
| 363 | return -ENODEV; | ||
| 364 | |||
| 365 | ret = init_fpu(target); | ||
| 366 | if (ret) | ||
| 367 | return ret; | ||
| 368 | |||
| 369 | xsave = &target->thread.fpu.state->xsave; | ||
| 370 | |||
| 371 | /* | ||
| 372 | * Copy the 48bytes defined by the software first into the xstate | ||
| 373 | * memory layout in the thread struct, so that we can copy the entire | ||
| 374 | * xstateregs to the user using one user_regset_copyout(). | ||
| 375 | */ | ||
| 376 | memcpy(&xsave->i387.sw_reserved, | ||
| 377 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); | ||
| 378 | /* | ||
| 379 | * Copy the xstate memory layout. | ||
| 380 | */ | ||
| 381 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); | ||
| 382 | return ret; | ||
| 383 | } | ||
| 384 | |||
| 385 | int xstateregs_set(struct task_struct *target, const struct user_regset *regset, | ||
| 386 | unsigned int pos, unsigned int count, | ||
| 387 | const void *kbuf, const void __user *ubuf) | ||
| 388 | { | ||
| 389 | struct xsave_struct *xsave; | ||
| 390 | int ret; | ||
| 391 | |||
| 392 | if (!cpu_has_xsave) | ||
| 393 | return -ENODEV; | ||
| 394 | |||
| 395 | ret = init_fpu(target); | ||
| 396 | if (ret) | ||
| 397 | return ret; | ||
| 398 | |||
| 399 | xsave = &target->thread.fpu.state->xsave; | ||
| 400 | |||
| 401 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); | ||
| 402 | /* | ||
| 403 | * mxcsr reserved bits must be masked to zero for security reasons. | ||
| 404 | */ | ||
| 405 | xsave->i387.mxcsr &= mxcsr_feature_mask; | ||
| 406 | xsave->xsave_hdr.xstate_bv &= pcntxt_mask; | ||
| 407 | /* | ||
| 408 | * These bits must be zero. | ||
| 409 | */ | ||
| 410 | memset(&xsave->xsave_hdr.reserved, 0, 48); | ||
| 411 | return ret; | ||
| 412 | } | ||
| 413 | |||
| 414 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | ||
| 415 | |||
| 416 | /* | ||
| 417 | * FPU tag word conversions. | ||
| 418 | */ | ||
| 419 | |||
| 420 | static inline unsigned short twd_i387_to_fxsr(unsigned short twd) | ||
| 421 | { | ||
| 422 | unsigned int tmp; /* to avoid 16 bit prefixes in the code */ | ||
| 423 | |||
| 424 | /* Transform each pair of bits into 01 (valid) or 00 (empty) */ | ||
| 425 | tmp = ~twd; | ||
| 426 | tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ | ||
| 427 | /* and move the valid bits to the lower byte. */ | ||
| 428 | tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ | ||
| 429 | tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ | ||
| 430 | tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ | ||
| 431 | |||
| 432 | return tmp; | ||
| 433 | } | ||
| 434 | |||
| 435 | #define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16) | ||
| 436 | #define FP_EXP_TAG_VALID 0 | ||
| 437 | #define FP_EXP_TAG_ZERO 1 | ||
| 438 | #define FP_EXP_TAG_SPECIAL 2 | ||
| 439 | #define FP_EXP_TAG_EMPTY 3 | ||
| 440 | |||
| 441 | static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) | ||
| 442 | { | ||
| 443 | struct _fpxreg *st; | ||
| 444 | u32 tos = (fxsave->swd >> 11) & 7; | ||
| 445 | u32 twd = (unsigned long) fxsave->twd; | ||
| 446 | u32 tag; | ||
| 447 | u32 ret = 0xffff0000u; | ||
| 448 | int i; | ||
| 449 | |||
| 450 | for (i = 0; i < 8; i++, twd >>= 1) { | ||
| 451 | if (twd & 0x1) { | ||
| 452 | st = FPREG_ADDR(fxsave, (i - tos) & 7); | ||
| 453 | |||
| 454 | switch (st->exponent & 0x7fff) { | ||
| 455 | case 0x7fff: | ||
| 456 | tag = FP_EXP_TAG_SPECIAL; | ||
| 457 | break; | ||
| 458 | case 0x0000: | ||
| 459 | if (!st->significand[0] && | ||
| 460 | !st->significand[1] && | ||
| 461 | !st->significand[2] && | ||
| 462 | !st->significand[3]) | ||
| 463 | tag = FP_EXP_TAG_ZERO; | ||
| 464 | else | ||
| 465 | tag = FP_EXP_TAG_SPECIAL; | ||
| 466 | break; | ||
| 467 | default: | ||
| 468 | if (st->significand[3] & 0x8000) | ||
| 469 | tag = FP_EXP_TAG_VALID; | ||
| 470 | else | ||
| 471 | tag = FP_EXP_TAG_SPECIAL; | ||
| 472 | break; | ||
| 473 | } | ||
| 474 | } else { | ||
| 475 | tag = FP_EXP_TAG_EMPTY; | ||
| 476 | } | ||
| 477 | ret |= tag << (2 * i); | ||
| 478 | } | ||
| 479 | return ret; | ||
| 480 | } | ||
| 481 | |||
| 482 | /* | ||
| 483 | * FXSR floating point environment conversions. | ||
| 484 | */ | ||
| 485 | |||
| 486 | void | ||
| 487 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | ||
| 488 | { | ||
| 489 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; | ||
| 490 | struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; | ||
| 491 | struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; | ||
| 492 | int i; | ||
| 493 | |||
| 494 | env->cwd = fxsave->cwd | 0xffff0000u; | ||
| 495 | env->swd = fxsave->swd | 0xffff0000u; | ||
| 496 | env->twd = twd_fxsr_to_i387(fxsave); | ||
| 497 | |||
| 498 | #ifdef CONFIG_X86_64 | ||
| 499 | env->fip = fxsave->rip; | ||
| 500 | env->foo = fxsave->rdp; | ||
| 501 | /* | ||
| 502 | * should be actually ds/cs at fpu exception time, but | ||
| 503 | * that information is not available in 64bit mode. | ||
| 504 | */ | ||
| 505 | env->fcs = task_pt_regs(tsk)->cs; | ||
| 506 | if (tsk == current) { | ||
| 507 | savesegment(ds, env->fos); | ||
| 508 | } else { | ||
| 509 | env->fos = tsk->thread.ds; | ||
| 510 | } | ||
| 511 | env->fos |= 0xffff0000; | ||
| 512 | #else | ||
| 513 | env->fip = fxsave->fip; | ||
| 514 | env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); | ||
| 515 | env->foo = fxsave->foo; | ||
| 516 | env->fos = fxsave->fos; | ||
| 517 | #endif | ||
| 518 | |||
| 519 | for (i = 0; i < 8; ++i) | ||
| 520 | memcpy(&to[i], &from[i], sizeof(to[0])); | ||
| 521 | } | ||
| 522 | |||
| 523 | void convert_to_fxsr(struct task_struct *tsk, | ||
| 524 | const struct user_i387_ia32_struct *env) | ||
| 525 | |||
| 526 | { | ||
| 527 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; | ||
| 528 | struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; | ||
| 529 | struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; | ||
| 530 | int i; | ||
| 531 | |||
| 532 | fxsave->cwd = env->cwd; | ||
| 533 | fxsave->swd = env->swd; | ||
| 534 | fxsave->twd = twd_i387_to_fxsr(env->twd); | ||
| 535 | fxsave->fop = (u16) ((u32) env->fcs >> 16); | ||
| 536 | #ifdef CONFIG_X86_64 | ||
| 537 | fxsave->rip = env->fip; | ||
| 538 | fxsave->rdp = env->foo; | ||
| 539 | /* cs and ds ignored */ | ||
| 540 | #else | ||
| 541 | fxsave->fip = env->fip; | ||
| 542 | fxsave->fcs = (env->fcs & 0xffff); | ||
| 543 | fxsave->foo = env->foo; | ||
| 544 | fxsave->fos = env->fos; | ||
| 545 | #endif | ||
| 546 | |||
| 547 | for (i = 0; i < 8; ++i) | ||
| 548 | memcpy(&to[i], &from[i], sizeof(from[0])); | ||
| 549 | } | ||
| 550 | |||
| 551 | int fpregs_get(struct task_struct *target, const struct user_regset *regset, | ||
| 552 | unsigned int pos, unsigned int count, | ||
| 553 | void *kbuf, void __user *ubuf) | ||
| 554 | { | ||
| 555 | struct user_i387_ia32_struct env; | ||
| 556 | int ret; | ||
| 557 | |||
| 558 | ret = init_fpu(target); | ||
| 559 | if (ret) | ||
| 560 | return ret; | ||
| 561 | |||
| 562 | if (!static_cpu_has(X86_FEATURE_FPU)) | ||
| 563 | return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); | ||
| 564 | |||
| 565 | if (!cpu_has_fxsr) | ||
| 566 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | ||
| 567 | &target->thread.fpu.state->fsave, 0, | ||
| 568 | -1); | ||
| 569 | |||
| 570 | sanitize_i387_state(target); | ||
| 571 | |||
| 572 | if (kbuf && pos == 0 && count == sizeof(env)) { | ||
| 573 | convert_from_fxsr(kbuf, target); | ||
| 574 | return 0; | ||
| 575 | } | ||
| 576 | |||
| 577 | convert_from_fxsr(&env, target); | ||
| 578 | |||
| 579 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1); | ||
| 580 | } | ||
| 581 | |||
| 582 | int fpregs_set(struct task_struct *target, const struct user_regset *regset, | ||
| 583 | unsigned int pos, unsigned int count, | ||
| 584 | const void *kbuf, const void __user *ubuf) | ||
| 585 | { | ||
| 586 | struct user_i387_ia32_struct env; | ||
| 587 | int ret; | ||
| 588 | |||
| 589 | ret = init_fpu(target); | ||
| 590 | if (ret) | ||
| 591 | return ret; | ||
| 592 | |||
| 593 | sanitize_i387_state(target); | ||
| 594 | |||
| 595 | if (!static_cpu_has(X86_FEATURE_FPU)) | ||
| 596 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); | ||
| 597 | |||
| 598 | if (!cpu_has_fxsr) | ||
| 599 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, | ||
| 600 | &target->thread.fpu.state->fsave, 0, | ||
| 601 | -1); | ||
| 602 | |||
| 603 | if (pos > 0 || count < sizeof(env)) | ||
| 604 | convert_from_fxsr(&env, target); | ||
| 605 | |||
| 606 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); | ||
| 607 | if (!ret) | ||
| 608 | convert_to_fxsr(target, &env); | ||
| 609 | |||
| 610 | /* | ||
| 611 | * update the header bit in the xsave header, indicating the | ||
| 612 | * presence of FP. | ||
| 613 | */ | ||
| 614 | if (cpu_has_xsave) | ||
| 615 | target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; | ||
| 616 | return ret; | ||
| 617 | } | ||
| 618 | |||
| 619 | /* | ||
| 620 | * FPU state for core dumps. | ||
| 621 | * This is only used for a.out dumps now. | ||
| 622 | * It is declared generically using elf_fpregset_t (which is | ||
| 623 | * struct user_i387_struct) but is in fact only used for 32-bit | ||
| 624 | * dumps, so on 64-bit it is really struct user_i387_ia32_struct. | ||
| 625 | */ | ||
| 626 | int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu) | ||
| 627 | { | ||
| 628 | struct task_struct *tsk = current; | ||
| 629 | int fpvalid; | ||
| 630 | |||
| 631 | fpvalid = !!used_math(); | ||
| 632 | if (fpvalid) | ||
| 633 | fpvalid = !fpregs_get(tsk, NULL, | ||
| 634 | 0, sizeof(struct user_i387_ia32_struct), | ||
| 635 | fpu, NULL); | ||
| 636 | |||
| 637 | return fpvalid; | ||
| 638 | } | ||
| 639 | EXPORT_SYMBOL(dump_fpu); | ||
| 640 | |||
| 641 | #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ | ||
| 642 | |||
| 643 | static int __init no_387(char *s) | ||
| 644 | { | ||
| 645 | setup_clear_cpu_cap(X86_FEATURE_FPU); | ||
| 646 | return 1; | ||
| 647 | } | ||
| 648 | |||
| 649 | __setup("no387", no_387); | ||
| 650 | |||
| 651 | void fpu_detect(struct cpuinfo_x86 *c) | ||
| 652 | { | ||
| 653 | unsigned long cr0; | ||
| 654 | u16 fsw, fcw; | ||
| 655 | |||
| 656 | fsw = fcw = 0xffff; | ||
| 657 | |||
| 658 | cr0 = read_cr0(); | ||
| 659 | cr0 &= ~(X86_CR0_TS | X86_CR0_EM); | ||
| 660 | write_cr0(cr0); | ||
| 661 | |||
| 662 | asm volatile("fninit ; fnstsw %0 ; fnstcw %1" | ||
| 663 | : "+m" (fsw), "+m" (fcw)); | ||
| 664 | |||
| 665 | if (fsw == 0 && (fcw & 0x103f) == 0x003f) | ||
| 666 | set_cpu_cap(c, X86_FEATURE_FPU); | ||
| 667 | else | ||
| 668 | clear_cpu_cap(c, X86_FEATURE_FPU); | ||
| 669 | |||
| 670 | /* The final cr0 value is set in fpu_init() */ | ||
| 671 | } | ||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c648139d68d7..9cad694ed7c4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -25,8 +25,7 @@ | |||
| 25 | #include <asm/idle.h> | 25 | #include <asm/idle.h> |
| 26 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
| 27 | #include <asm/mwait.h> | 27 | #include <asm/mwait.h> |
| 28 | #include <asm/i387.h> | 28 | #include <asm/fpu/internal.h> |
| 29 | #include <asm/fpu-internal.h> | ||
| 30 | #include <asm/debugreg.h> | 29 | #include <asm/debugreg.h> |
| 31 | #include <asm/nmi.h> | 30 | #include <asm/nmi.h> |
| 32 | #include <asm/tlbflush.h> | 31 | #include <asm/tlbflush.h> |
| @@ -76,9 +75,6 @@ void idle_notifier_unregister(struct notifier_block *n) | |||
| 76 | EXPORT_SYMBOL_GPL(idle_notifier_unregister); | 75 | EXPORT_SYMBOL_GPL(idle_notifier_unregister); |
| 77 | #endif | 76 | #endif |
| 78 | 77 | ||
| 79 | struct kmem_cache *task_xstate_cachep; | ||
| 80 | EXPORT_SYMBOL_GPL(task_xstate_cachep); | ||
| 81 | |||
| 82 | /* | 78 | /* |
| 83 | * this gets called so that we can store lazy state into memory and copy the | 79 | * this gets called so that we can store lazy state into memory and copy the |
| 84 | * current task into the new thread. | 80 | * current task into the new thread. |
| @@ -87,36 +83,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | |||
| 87 | { | 83 | { |
| 88 | *dst = *src; | 84 | *dst = *src; |
| 89 | 85 | ||
| 90 | dst->thread.fpu_counter = 0; | 86 | return fpu__copy(&dst->thread.fpu, &src->thread.fpu); |
| 91 | dst->thread.fpu.has_fpu = 0; | ||
| 92 | dst->thread.fpu.state = NULL; | ||
| 93 | task_disable_lazy_fpu_restore(dst); | ||
| 94 | if (tsk_used_math(src)) { | ||
| 95 | int err = fpu_alloc(&dst->thread.fpu); | ||
| 96 | if (err) | ||
| 97 | return err; | ||
| 98 | fpu_copy(dst, src); | ||
| 99 | } | ||
| 100 | return 0; | ||
| 101 | } | ||
| 102 | |||
| 103 | void free_thread_xstate(struct task_struct *tsk) | ||
| 104 | { | ||
| 105 | fpu_free(&tsk->thread.fpu); | ||
| 106 | } | ||
| 107 | |||
| 108 | void arch_release_task_struct(struct task_struct *tsk) | ||
| 109 | { | ||
| 110 | free_thread_xstate(tsk); | ||
| 111 | } | ||
| 112 | |||
| 113 | void arch_task_cache_init(void) | ||
| 114 | { | ||
| 115 | task_xstate_cachep = | ||
| 116 | kmem_cache_create("task_xstate", xstate_size, | ||
| 117 | __alignof__(union thread_xstate), | ||
| 118 | SLAB_PANIC | SLAB_NOTRACK, NULL); | ||
| 119 | setup_xstate_comp(); | ||
| 120 | } | 87 | } |
| 121 | 88 | ||
| 122 | /* | 89 | /* |
| @@ -127,6 +94,7 @@ void exit_thread(void) | |||
| 127 | struct task_struct *me = current; | 94 | struct task_struct *me = current; |
| 128 | struct thread_struct *t = &me->thread; | 95 | struct thread_struct *t = &me->thread; |
| 129 | unsigned long *bp = t->io_bitmap_ptr; | 96 | unsigned long *bp = t->io_bitmap_ptr; |
| 97 | struct fpu *fpu = &t->fpu; | ||
| 130 | 98 | ||
| 131 | if (bp) { | 99 | if (bp) { |
| 132 | struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); | 100 | struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); |
| @@ -142,7 +110,7 @@ void exit_thread(void) | |||
| 142 | kfree(bp); | 110 | kfree(bp); |
| 143 | } | 111 | } |
| 144 | 112 | ||
| 145 | drop_fpu(me); | 113 | fpu__drop(fpu); |
| 146 | } | 114 | } |
| 147 | 115 | ||
| 148 | void flush_thread(void) | 116 | void flush_thread(void) |
| @@ -152,19 +120,7 @@ void flush_thread(void) | |||
| 152 | flush_ptrace_hw_breakpoint(tsk); | 120 | flush_ptrace_hw_breakpoint(tsk); |
| 153 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 121 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
| 154 | 122 | ||
| 155 | if (!use_eager_fpu()) { | 123 | fpu__clear(&tsk->thread.fpu); |
| 156 | /* FPU state will be reallocated lazily at the first use. */ | ||
| 157 | drop_fpu(tsk); | ||
| 158 | free_thread_xstate(tsk); | ||
| 159 | } else { | ||
| 160 | if (!tsk_used_math(tsk)) { | ||
| 161 | /* kthread execs. TODO: cleanup this horror. */ | ||
| 162 | if (WARN_ON(init_fpu(tsk))) | ||
| 163 | force_sig(SIGKILL, tsk); | ||
| 164 | user_fpu_begin(); | ||
| 165 | } | ||
| 166 | restore_init_xstate(); | ||
| 167 | } | ||
| 168 | } | 124 | } |
| 169 | 125 | ||
| 170 | static void hard_disable_TSC(void) | 126 | static void hard_disable_TSC(void) |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8ed2106b06da..deff651835b4 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
| @@ -39,8 +39,7 @@ | |||
| 39 | #include <asm/pgtable.h> | 39 | #include <asm/pgtable.h> |
| 40 | #include <asm/ldt.h> | 40 | #include <asm/ldt.h> |
| 41 | #include <asm/processor.h> | 41 | #include <asm/processor.h> |
| 42 | #include <asm/i387.h> | 42 | #include <asm/fpu/internal.h> |
| 43 | #include <asm/fpu-internal.h> | ||
| 44 | #include <asm/desc.h> | 43 | #include <asm/desc.h> |
| 45 | #ifdef CONFIG_MATH_EMULATION | 44 | #ifdef CONFIG_MATH_EMULATION |
| 46 | #include <asm/math_emu.h> | 45 | #include <asm/math_emu.h> |
| @@ -242,14 +241,16 @@ __visible __notrace_funcgraph struct task_struct * | |||
| 242 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 241 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
| 243 | { | 242 | { |
| 244 | struct thread_struct *prev = &prev_p->thread, | 243 | struct thread_struct *prev = &prev_p->thread, |
| 245 | *next = &next_p->thread; | 244 | *next = &next_p->thread; |
| 245 | struct fpu *prev_fpu = &prev->fpu; | ||
| 246 | struct fpu *next_fpu = &next->fpu; | ||
| 246 | int cpu = smp_processor_id(); | 247 | int cpu = smp_processor_id(); |
| 247 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); | 248 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); |
| 248 | fpu_switch_t fpu; | 249 | fpu_switch_t fpu_switch; |
| 249 | 250 | ||
| 250 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | 251 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ |
| 251 | 252 | ||
| 252 | fpu = switch_fpu_prepare(prev_p, next_p, cpu); | 253 | fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); |
| 253 | 254 | ||
| 254 | /* | 255 | /* |
| 255 | * Save away %gs. No need to save %fs, as it was saved on the | 256 | * Save away %gs. No need to save %fs, as it was saved on the |
| @@ -296,7 +297,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 296 | * Leave lazy mode, flushing any hypercalls made here. | 297 | * Leave lazy mode, flushing any hypercalls made here. |
| 297 | * This must be done before restoring TLS segments so | 298 | * This must be done before restoring TLS segments so |
| 298 | * the GDT and LDT are properly updated, and must be | 299 | * the GDT and LDT are properly updated, and must be |
| 299 | * done before math_state_restore, so the TS bit is up | 300 | * done before fpu__restore(), so the TS bit is up |
| 300 | * to date. | 301 | * to date. |
| 301 | */ | 302 | */ |
| 302 | arch_end_context_switch(next_p); | 303 | arch_end_context_switch(next_p); |
| @@ -319,7 +320,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 319 | if (prev->gs | next->gs) | 320 | if (prev->gs | next->gs) |
| 320 | lazy_load_gs(next->gs); | 321 | lazy_load_gs(next->gs); |
| 321 | 322 | ||
| 322 | switch_fpu_finish(next_p, fpu); | 323 | switch_fpu_finish(next_fpu, fpu_switch); |
| 323 | 324 | ||
| 324 | this_cpu_write(current_task, next_p); | 325 | this_cpu_write(current_task, next_p); |
| 325 | 326 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ddfdbf74f174..c50e013b57d2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -38,8 +38,7 @@ | |||
| 38 | 38 | ||
| 39 | #include <asm/pgtable.h> | 39 | #include <asm/pgtable.h> |
| 40 | #include <asm/processor.h> | 40 | #include <asm/processor.h> |
| 41 | #include <asm/i387.h> | 41 | #include <asm/fpu/internal.h> |
| 42 | #include <asm/fpu-internal.h> | ||
| 43 | #include <asm/mmu_context.h> | 42 | #include <asm/mmu_context.h> |
| 44 | #include <asm/prctl.h> | 43 | #include <asm/prctl.h> |
| 45 | #include <asm/desc.h> | 44 | #include <asm/desc.h> |
| @@ -274,12 +273,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 274 | { | 273 | { |
| 275 | struct thread_struct *prev = &prev_p->thread; | 274 | struct thread_struct *prev = &prev_p->thread; |
| 276 | struct thread_struct *next = &next_p->thread; | 275 | struct thread_struct *next = &next_p->thread; |
| 276 | struct fpu *prev_fpu = &prev->fpu; | ||
| 277 | struct fpu *next_fpu = &next->fpu; | ||
| 277 | int cpu = smp_processor_id(); | 278 | int cpu = smp_processor_id(); |
| 278 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); | 279 | struct tss_struct *tss = &per_cpu(cpu_tss, cpu); |
| 279 | unsigned fsindex, gsindex; | 280 | unsigned fsindex, gsindex; |
| 280 | fpu_switch_t fpu; | 281 | fpu_switch_t fpu_switch; |
| 281 | 282 | ||
| 282 | fpu = switch_fpu_prepare(prev_p, next_p, cpu); | 283 | fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); |
| 283 | 284 | ||
| 284 | /* We must save %fs and %gs before load_TLS() because | 285 | /* We must save %fs and %gs before load_TLS() because |
| 285 | * %fs and %gs may be cleared by load_TLS(). | 286 | * %fs and %gs may be cleared by load_TLS(). |
| @@ -299,7 +300,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 299 | * Leave lazy mode, flushing any hypercalls made here. This | 300 | * Leave lazy mode, flushing any hypercalls made here. This |
| 300 | * must be done after loading TLS entries in the GDT but before | 301 | * must be done after loading TLS entries in the GDT but before |
| 301 | * loading segments that might reference them, and and it must | 302 | * loading segments that might reference them, and and it must |
| 302 | * be done before math_state_restore, so the TS bit is up to | 303 | * be done before fpu__restore(), so the TS bit is up to |
| 303 | * date. | 304 | * date. |
| 304 | */ | 305 | */ |
| 305 | arch_end_context_switch(next_p); | 306 | arch_end_context_switch(next_p); |
| @@ -391,7 +392,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
| 391 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | 392 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); |
| 392 | prev->gsindex = gsindex; | 393 | prev->gsindex = gsindex; |
| 393 | 394 | ||
| 394 | switch_fpu_finish(next_p, fpu); | 395 | switch_fpu_finish(next_fpu, fpu_switch); |
| 395 | 396 | ||
| 396 | /* | 397 | /* |
| 397 | * Switch the PDA and FPU contexts. | 398 | * Switch the PDA and FPU contexts. |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a7bc79480719..9be72bc3613f 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include <linux/errno.h> | 11 | #include <linux/errno.h> |
| 12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
| 13 | #include <linux/ptrace.h> | 13 | #include <linux/ptrace.h> |
| 14 | #include <linux/regset.h> | ||
| 15 | #include <linux/tracehook.h> | 14 | #include <linux/tracehook.h> |
| 16 | #include <linux/user.h> | 15 | #include <linux/user.h> |
| 17 | #include <linux/elf.h> | 16 | #include <linux/elf.h> |
| @@ -28,8 +27,9 @@ | |||
| 28 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
| 29 | #include <asm/pgtable.h> | 28 | #include <asm/pgtable.h> |
| 30 | #include <asm/processor.h> | 29 | #include <asm/processor.h> |
| 31 | #include <asm/i387.h> | 30 | #include <asm/fpu/internal.h> |
| 32 | #include <asm/fpu-internal.h> | 31 | #include <asm/fpu/signal.h> |
| 32 | #include <asm/fpu/regset.h> | ||
| 33 | #include <asm/debugreg.h> | 33 | #include <asm/debugreg.h> |
| 34 | #include <asm/ldt.h> | 34 | #include <asm/ldt.h> |
| 35 | #include <asm/desc.h> | 35 | #include <asm/desc.h> |
| @@ -1297,7 +1297,7 @@ static struct user_regset x86_64_regsets[] __read_mostly = { | |||
| 1297 | .core_note_type = NT_PRFPREG, | 1297 | .core_note_type = NT_PRFPREG, |
| 1298 | .n = sizeof(struct user_i387_struct) / sizeof(long), | 1298 | .n = sizeof(struct user_i387_struct) / sizeof(long), |
| 1299 | .size = sizeof(long), .align = sizeof(long), | 1299 | .size = sizeof(long), .align = sizeof(long), |
| 1300 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set | 1300 | .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set |
| 1301 | }, | 1301 | }, |
| 1302 | [REGSET_XSTATE] = { | 1302 | [REGSET_XSTATE] = { |
| 1303 | .core_note_type = NT_X86_XSTATE, | 1303 | .core_note_type = NT_X86_XSTATE, |
| @@ -1338,13 +1338,13 @@ static struct user_regset x86_32_regsets[] __read_mostly = { | |||
| 1338 | .core_note_type = NT_PRFPREG, | 1338 | .core_note_type = NT_PRFPREG, |
| 1339 | .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), | 1339 | .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), |
| 1340 | .size = sizeof(u32), .align = sizeof(u32), | 1340 | .size = sizeof(u32), .align = sizeof(u32), |
| 1341 | .active = fpregs_active, .get = fpregs_get, .set = fpregs_set | 1341 | .active = regset_fpregs_active, .get = fpregs_get, .set = fpregs_set |
| 1342 | }, | 1342 | }, |
| 1343 | [REGSET_XFP] = { | 1343 | [REGSET_XFP] = { |
| 1344 | .core_note_type = NT_PRXFPREG, | 1344 | .core_note_type = NT_PRXFPREG, |
| 1345 | .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), | 1345 | .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), |
| 1346 | .size = sizeof(u32), .align = sizeof(u32), | 1346 | .size = sizeof(u32), .align = sizeof(u32), |
| 1347 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set | 1347 | .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set |
| 1348 | }, | 1348 | }, |
| 1349 | [REGSET_XSTATE] = { | 1349 | [REGSET_XSTATE] = { |
| 1350 | .core_note_type = NT_X86_XSTATE, | 1350 | .core_note_type = NT_X86_XSTATE, |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 1ea14fd53933..206996c1669d 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
| @@ -26,8 +26,8 @@ | |||
| 26 | 26 | ||
| 27 | #include <asm/processor.h> | 27 | #include <asm/processor.h> |
| 28 | #include <asm/ucontext.h> | 28 | #include <asm/ucontext.h> |
| 29 | #include <asm/i387.h> | 29 | #include <asm/fpu/internal.h> |
| 30 | #include <asm/fpu-internal.h> | 30 | #include <asm/fpu/signal.h> |
| 31 | #include <asm/vdso.h> | 31 | #include <asm/vdso.h> |
| 32 | #include <asm/mce.h> | 32 | #include <asm/mce.h> |
| 33 | #include <asm/sighandling.h> | 33 | #include <asm/sighandling.h> |
| @@ -103,7 +103,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) | |||
| 103 | get_user_ex(buf, &sc->fpstate); | 103 | get_user_ex(buf, &sc->fpstate); |
| 104 | } get_user_catch(err); | 104 | } get_user_catch(err); |
| 105 | 105 | ||
| 106 | err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); | 106 | err |= fpu__restore_sig(buf, config_enabled(CONFIG_X86_32)); |
| 107 | 107 | ||
| 108 | force_iret(); | 108 | force_iret(); |
| 109 | 109 | ||
| @@ -199,6 +199,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
| 199 | unsigned long sp = regs->sp; | 199 | unsigned long sp = regs->sp; |
| 200 | unsigned long buf_fx = 0; | 200 | unsigned long buf_fx = 0; |
| 201 | int onsigstack = on_sig_stack(sp); | 201 | int onsigstack = on_sig_stack(sp); |
| 202 | struct fpu *fpu = ¤t->thread.fpu; | ||
| 202 | 203 | ||
| 203 | /* redzone */ | 204 | /* redzone */ |
| 204 | if (config_enabled(CONFIG_X86_64)) | 205 | if (config_enabled(CONFIG_X86_64)) |
| @@ -218,9 +219,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
| 218 | } | 219 | } |
| 219 | } | 220 | } |
| 220 | 221 | ||
| 221 | if (used_math()) { | 222 | if (fpu->fpstate_active) { |
| 222 | sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32), | 223 | sp = fpu__alloc_mathframe(sp, config_enabled(CONFIG_X86_32), |
| 223 | &buf_fx, &math_size); | 224 | &buf_fx, &math_size); |
| 224 | *fpstate = (void __user *)sp; | 225 | *fpstate = (void __user *)sp; |
| 225 | } | 226 | } |
| 226 | 227 | ||
| @@ -234,8 +235,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
| 234 | return (void __user *)-1L; | 235 | return (void __user *)-1L; |
| 235 | 236 | ||
| 236 | /* save i387 and extended state */ | 237 | /* save i387 and extended state */ |
| 237 | if (used_math() && | 238 | if (fpu->fpstate_active && |
| 238 | save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0) | 239 | copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0) |
| 239 | return (void __user *)-1L; | 240 | return (void __user *)-1L; |
| 240 | 241 | ||
| 241 | return (void __user *)sp; | 242 | return (void __user *)sp; |
| @@ -593,6 +594,22 @@ badframe: | |||
| 593 | return 0; | 594 | return 0; |
| 594 | } | 595 | } |
| 595 | 596 | ||
| 597 | static inline int is_ia32_compat_frame(void) | ||
| 598 | { | ||
| 599 | return config_enabled(CONFIG_IA32_EMULATION) && | ||
| 600 | test_thread_flag(TIF_IA32); | ||
| 601 | } | ||
| 602 | |||
| 603 | static inline int is_ia32_frame(void) | ||
| 604 | { | ||
| 605 | return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); | ||
| 606 | } | ||
| 607 | |||
| 608 | static inline int is_x32_frame(void) | ||
| 609 | { | ||
| 610 | return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); | ||
| 611 | } | ||
| 612 | |||
| 596 | static int | 613 | static int |
| 597 | setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) | 614 | setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) |
| 598 | { | 615 | { |
| @@ -617,6 +634,7 @@ static void | |||
| 617 | handle_signal(struct ksignal *ksig, struct pt_regs *regs) | 634 | handle_signal(struct ksignal *ksig, struct pt_regs *regs) |
| 618 | { | 635 | { |
| 619 | bool stepping, failed; | 636 | bool stepping, failed; |
| 637 | struct fpu *fpu = ¤t->thread.fpu; | ||
| 620 | 638 | ||
| 621 | /* Are we from a system call? */ | 639 | /* Are we from a system call? */ |
| 622 | if (syscall_get_nr(current, regs) >= 0) { | 640 | if (syscall_get_nr(current, regs) >= 0) { |
| @@ -665,8 +683,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) | |||
| 665 | /* | 683 | /* |
| 666 | * Ensure the signal handler starts with the new fpu state. | 684 | * Ensure the signal handler starts with the new fpu state. |
| 667 | */ | 685 | */ |
| 668 | if (used_math()) | 686 | if (fpu->fpstate_active) |
| 669 | fpu_reset_state(current); | 687 | fpu__clear(fpu); |
| 670 | } | 688 | } |
| 671 | signal_setup_done(failed, ksig, stepping); | 689 | signal_setup_done(failed, ksig, stepping); |
| 672 | } | 690 | } |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0e8209619455..6d4bfea25874 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
| @@ -68,8 +68,7 @@ | |||
| 68 | #include <asm/mwait.h> | 68 | #include <asm/mwait.h> |
| 69 | #include <asm/apic.h> | 69 | #include <asm/apic.h> |
| 70 | #include <asm/io_apic.h> | 70 | #include <asm/io_apic.h> |
| 71 | #include <asm/i387.h> | 71 | #include <asm/fpu/internal.h> |
| 72 | #include <asm/fpu-internal.h> | ||
| 73 | #include <asm/setup.h> | 72 | #include <asm/setup.h> |
| 74 | #include <asm/uv/uv.h> | 73 | #include <asm/uv/uv.h> |
| 75 | #include <linux/mc146818rtc.h> | 74 | #include <linux/mc146818rtc.h> |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 324ab5247687..36cb15b7b367 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -54,12 +54,13 @@ | |||
| 54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
| 55 | #include <asm/traps.h> | 55 | #include <asm/traps.h> |
| 56 | #include <asm/desc.h> | 56 | #include <asm/desc.h> |
| 57 | #include <asm/i387.h> | 57 | #include <asm/fpu/internal.h> |
| 58 | #include <asm/fpu-internal.h> | ||
| 59 | #include <asm/mce.h> | 58 | #include <asm/mce.h> |
| 60 | #include <asm/fixmap.h> | 59 | #include <asm/fixmap.h> |
| 61 | #include <asm/mach_traps.h> | 60 | #include <asm/mach_traps.h> |
| 62 | #include <asm/alternative.h> | 61 | #include <asm/alternative.h> |
| 62 | #include <asm/fpu/xstate.h> | ||
| 63 | #include <asm/trace/mpx.h> | ||
| 63 | #include <asm/mpx.h> | 64 | #include <asm/mpx.h> |
| 64 | 65 | ||
| 65 | #ifdef CONFIG_X86_64 | 66 | #ifdef CONFIG_X86_64 |
| @@ -371,10 +372,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
| 371 | 372 | ||
| 372 | dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) | 373 | dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) |
| 373 | { | 374 | { |
| 374 | struct task_struct *tsk = current; | ||
| 375 | struct xsave_struct *xsave_buf; | ||
| 376 | enum ctx_state prev_state; | 375 | enum ctx_state prev_state; |
| 377 | struct bndcsr *bndcsr; | 376 | const struct bndcsr *bndcsr; |
| 378 | siginfo_t *info; | 377 | siginfo_t *info; |
| 379 | 378 | ||
| 380 | prev_state = exception_enter(); | 379 | prev_state = exception_enter(); |
| @@ -393,15 +392,15 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) | |||
| 393 | 392 | ||
| 394 | /* | 393 | /* |
| 395 | * We need to look at BNDSTATUS to resolve this exception. | 394 | * We need to look at BNDSTATUS to resolve this exception. |
| 396 | * It is not directly accessible, though, so we need to | 395 | * A NULL here might mean that it is in its 'init state', |
| 397 | * do an xsave and then pull it out of the xsave buffer. | 396 | * which is all zeros which indicates MPX was not |
| 397 | * responsible for the exception. | ||
| 398 | */ | 398 | */ |
| 399 | fpu_save_init(&tsk->thread.fpu); | 399 | bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); |
| 400 | xsave_buf = &(tsk->thread.fpu.state->xsave); | ||
| 401 | bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR); | ||
| 402 | if (!bndcsr) | 400 | if (!bndcsr) |
| 403 | goto exit_trap; | 401 | goto exit_trap; |
| 404 | 402 | ||
| 403 | trace_bounds_exception_mpx(bndcsr); | ||
| 405 | /* | 404 | /* |
| 406 | * The error code field of the BNDSTATUS register communicates status | 405 | * The error code field of the BNDSTATUS register communicates status |
| 407 | * information of a bound range exception #BR or operation involving | 406 | * information of a bound range exception #BR or operation involving |
| @@ -409,11 +408,11 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) | |||
| 409 | */ | 408 | */ |
| 410 | switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) { | 409 | switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) { |
| 411 | case 2: /* Bound directory has invalid entry. */ | 410 | case 2: /* Bound directory has invalid entry. */ |
| 412 | if (mpx_handle_bd_fault(xsave_buf)) | 411 | if (mpx_handle_bd_fault()) |
| 413 | goto exit_trap; | 412 | goto exit_trap; |
| 414 | break; /* Success, it was handled */ | 413 | break; /* Success, it was handled */ |
| 415 | case 1: /* Bound violation. */ | 414 | case 1: /* Bound violation. */ |
| 416 | info = mpx_generate_siginfo(regs, xsave_buf); | 415 | info = mpx_generate_siginfo(regs); |
| 417 | if (IS_ERR(info)) { | 416 | if (IS_ERR(info)) { |
| 418 | /* | 417 | /* |
| 419 | * We failed to decode the MPX instruction. Act as if | 418 | * We failed to decode the MPX instruction. Act as if |
| @@ -709,8 +708,8 @@ NOKPROBE_SYMBOL(do_debug); | |||
| 709 | static void math_error(struct pt_regs *regs, int error_code, int trapnr) | 708 | static void math_error(struct pt_regs *regs, int error_code, int trapnr) |
| 710 | { | 709 | { |
| 711 | struct task_struct *task = current; | 710 | struct task_struct *task = current; |
| 711 | struct fpu *fpu = &task->thread.fpu; | ||
| 712 | siginfo_t info; | 712 | siginfo_t info; |
| 713 | unsigned short err; | ||
| 714 | char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : | 713 | char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : |
| 715 | "simd exception"; | 714 | "simd exception"; |
| 716 | 715 | ||
| @@ -718,8 +717,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) | |||
| 718 | return; | 717 | return; |
| 719 | conditional_sti(regs); | 718 | conditional_sti(regs); |
| 720 | 719 | ||
| 721 | if (!user_mode(regs)) | 720 | if (!user_mode(regs)) { |
| 722 | { | ||
| 723 | if (!fixup_exception(regs)) { | 721 | if (!fixup_exception(regs)) { |
| 724 | task->thread.error_code = error_code; | 722 | task->thread.error_code = error_code; |
| 725 | task->thread.trap_nr = trapnr; | 723 | task->thread.trap_nr = trapnr; |
| @@ -731,62 +729,20 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) | |||
| 731 | /* | 729 | /* |
| 732 | * Save the info for the exception handler and clear the error. | 730 | * Save the info for the exception handler and clear the error. |
| 733 | */ | 731 | */ |
| 734 | unlazy_fpu(task); | 732 | fpu__save(fpu); |
| 735 | task->thread.trap_nr = trapnr; | 733 | |
| 734 | task->thread.trap_nr = trapnr; | ||
| 736 | task->thread.error_code = error_code; | 735 | task->thread.error_code = error_code; |
| 737 | info.si_signo = SIGFPE; | 736 | info.si_signo = SIGFPE; |
| 738 | info.si_errno = 0; | 737 | info.si_errno = 0; |
| 739 | info.si_addr = (void __user *)uprobe_get_trap_addr(regs); | 738 | info.si_addr = (void __user *)uprobe_get_trap_addr(regs); |
| 740 | if (trapnr == X86_TRAP_MF) { | ||
| 741 | unsigned short cwd, swd; | ||
| 742 | /* | ||
| 743 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | ||
| 744 | * status. 0x3f is the exception bits in these regs, 0x200 is the | ||
| 745 | * C1 reg you need in case of a stack fault, 0x040 is the stack | ||
| 746 | * fault bit. We should only be taking one exception at a time, | ||
| 747 | * so if this combination doesn't produce any single exception, | ||
| 748 | * then we have a bad program that isn't synchronizing its FPU usage | ||
| 749 | * and it will suffer the consequences since we won't be able to | ||
| 750 | * fully reproduce the context of the exception | ||
| 751 | */ | ||
| 752 | cwd = get_fpu_cwd(task); | ||
| 753 | swd = get_fpu_swd(task); | ||
| 754 | 739 | ||
| 755 | err = swd & ~cwd; | 740 | info.si_code = fpu__exception_code(fpu, trapnr); |
| 756 | } else { | ||
| 757 | /* | ||
| 758 | * The SIMD FPU exceptions are handled a little differently, as there | ||
| 759 | * is only a single status/control register. Thus, to determine which | ||
| 760 | * unmasked exception was caught we must mask the exception mask bits | ||
| 761 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
| 762 | */ | ||
| 763 | unsigned short mxcsr = get_fpu_mxcsr(task); | ||
| 764 | err = ~(mxcsr >> 7) & mxcsr; | ||
| 765 | } | ||
| 766 | 741 | ||
| 767 | if (err & 0x001) { /* Invalid op */ | 742 | /* Retry when we get spurious exceptions: */ |
| 768 | /* | 743 | if (!info.si_code) |
| 769 | * swd & 0x240 == 0x040: Stack Underflow | ||
| 770 | * swd & 0x240 == 0x240: Stack Overflow | ||
| 771 | * User must clear the SF bit (0x40) if set | ||
| 772 | */ | ||
| 773 | info.si_code = FPE_FLTINV; | ||
| 774 | } else if (err & 0x004) { /* Divide by Zero */ | ||
| 775 | info.si_code = FPE_FLTDIV; | ||
| 776 | } else if (err & 0x008) { /* Overflow */ | ||
| 777 | info.si_code = FPE_FLTOVF; | ||
| 778 | } else if (err & 0x012) { /* Denormal, Underflow */ | ||
| 779 | info.si_code = FPE_FLTUND; | ||
| 780 | } else if (err & 0x020) { /* Precision */ | ||
| 781 | info.si_code = FPE_FLTRES; | ||
| 782 | } else { | ||
| 783 | /* | ||
| 784 | * If we're using IRQ 13, or supposedly even some trap | ||
| 785 | * X86_TRAP_MF implementations, it's possible | ||
| 786 | * we get a spurious trap, which is not an error. | ||
| 787 | */ | ||
| 788 | return; | 744 | return; |
| 789 | } | 745 | |
| 790 | force_sig_info(SIGFPE, &info, task); | 746 | force_sig_info(SIGFPE, &info, task); |
| 791 | } | 747 | } |
| 792 | 748 | ||
| @@ -827,48 +783,6 @@ asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void) | |||
| 827 | { | 783 | { |
| 828 | } | 784 | } |
| 829 | 785 | ||
| 830 | /* | ||
| 831 | * 'math_state_restore()' saves the current math information in the | ||
| 832 | * old math state array, and gets the new ones from the current task | ||
| 833 | * | ||
| 834 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. | ||
| 835 | * Don't touch unless you *really* know how it works. | ||
| 836 | * | ||
| 837 | * Must be called with kernel preemption disabled (eg with local | ||
| 838 | * local interrupts as in the case of do_device_not_available). | ||
| 839 | */ | ||
| 840 | void math_state_restore(void) | ||
| 841 | { | ||
| 842 | struct task_struct *tsk = current; | ||
| 843 | |||
| 844 | if (!tsk_used_math(tsk)) { | ||
| 845 | local_irq_enable(); | ||
| 846 | /* | ||
| 847 | * does a slab alloc which can sleep | ||
| 848 | */ | ||
| 849 | if (init_fpu(tsk)) { | ||
| 850 | /* | ||
| 851 | * ran out of memory! | ||
| 852 | */ | ||
| 853 | do_group_exit(SIGKILL); | ||
| 854 | return; | ||
| 855 | } | ||
| 856 | local_irq_disable(); | ||
| 857 | } | ||
| 858 | |||
| 859 | /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */ | ||
| 860 | kernel_fpu_disable(); | ||
| 861 | __thread_fpu_begin(tsk); | ||
| 862 | if (unlikely(restore_fpu_checking(tsk))) { | ||
| 863 | fpu_reset_state(tsk); | ||
| 864 | force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); | ||
| 865 | } else { | ||
| 866 | tsk->thread.fpu_counter++; | ||
| 867 | } | ||
| 868 | kernel_fpu_enable(); | ||
| 869 | } | ||
| 870 | EXPORT_SYMBOL_GPL(math_state_restore); | ||
| 871 | |||
| 872 | dotraplinkage void | 786 | dotraplinkage void |
| 873 | do_device_not_available(struct pt_regs *regs, long error_code) | 787 | do_device_not_available(struct pt_regs *regs, long error_code) |
| 874 | { | 788 | { |
| @@ -889,7 +803,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) | |||
| 889 | return; | 803 | return; |
| 890 | } | 804 | } |
| 891 | #endif | 805 | #endif |
| 892 | math_state_restore(); /* interrupts still off */ | 806 | fpu__restore(¤t->thread.fpu); /* interrupts still off */ |
| 893 | #ifdef CONFIG_X86_32 | 807 | #ifdef CONFIG_X86_32 |
| 894 | conditional_sti(regs); | 808 | conditional_sti(regs); |
| 895 | #endif | 809 | #endif |
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 0b81ad67da07..66476244731e 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <linux/kdebug.h> | 29 | #include <linux/kdebug.h> |
| 30 | #include <asm/processor.h> | 30 | #include <asm/processor.h> |
| 31 | #include <asm/insn.h> | 31 | #include <asm/insn.h> |
| 32 | #include <asm/mmu_context.h> | ||
| 32 | 33 | ||
| 33 | /* Post-execution fixups. */ | 34 | /* Post-execution fixups. */ |
| 34 | 35 | ||
| @@ -312,11 +313,6 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool | |||
| 312 | } | 313 | } |
| 313 | 314 | ||
| 314 | #ifdef CONFIG_X86_64 | 315 | #ifdef CONFIG_X86_64 |
| 315 | static inline bool is_64bit_mm(struct mm_struct *mm) | ||
| 316 | { | ||
| 317 | return !config_enabled(CONFIG_IA32_EMULATION) || | ||
| 318 | !(mm->context.ia32_compat == TIF_IA32); | ||
| 319 | } | ||
| 320 | /* | 316 | /* |
| 321 | * If arch_uprobe->insn doesn't use rip-relative addressing, return | 317 | * If arch_uprobe->insn doesn't use rip-relative addressing, return |
| 322 | * immediately. Otherwise, rewrite the instruction so that it accesses | 318 | * immediately. Otherwise, rewrite the instruction so that it accesses |
| @@ -497,10 +493,6 @@ static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) | |||
| 497 | } | 493 | } |
| 498 | } | 494 | } |
| 499 | #else /* 32-bit: */ | 495 | #else /* 32-bit: */ |
| 500 | static inline bool is_64bit_mm(struct mm_struct *mm) | ||
| 501 | { | ||
| 502 | return false; | ||
| 503 | } | ||
| 504 | /* | 496 | /* |
| 505 | * No RIP-relative addressing on 32-bit | 497 | * No RIP-relative addressing on 32-bit |
| 506 | */ | 498 | */ |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c deleted file mode 100644 index 87a815b85f3e..000000000000 --- a/arch/x86/kernel/xsave.c +++ /dev/null | |||
| @@ -1,724 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * xsave/xrstor support. | ||
| 3 | * | ||
| 4 | * Author: Suresh Siddha <suresh.b.siddha@intel.com> | ||
| 5 | */ | ||
| 6 | |||
| 7 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 8 | |||
| 9 | #include <linux/bootmem.h> | ||
| 10 | #include <linux/compat.h> | ||
| 11 | #include <linux/cpu.h> | ||
| 12 | #include <asm/i387.h> | ||
| 13 | #include <asm/fpu-internal.h> | ||
| 14 | #include <asm/sigframe.h> | ||
| 15 | #include <asm/tlbflush.h> | ||
| 16 | #include <asm/xcr.h> | ||
| 17 | |||
| 18 | /* | ||
| 19 | * Supported feature mask by the CPU and the kernel. | ||
| 20 | */ | ||
| 21 | u64 pcntxt_mask; | ||
| 22 | |||
| 23 | /* | ||
| 24 | * Represents init state for the supported extended state. | ||
| 25 | */ | ||
| 26 | struct xsave_struct *init_xstate_buf; | ||
| 27 | |||
| 28 | static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; | ||
| 29 | static unsigned int *xstate_offsets, *xstate_sizes; | ||
| 30 | static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8]; | ||
| 31 | static unsigned int xstate_features; | ||
| 32 | |||
| 33 | /* | ||
| 34 | * If a processor implementation discern that a processor state component is | ||
| 35 | * in its initialized state it may modify the corresponding bit in the | ||
| 36 | * xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory | ||
| 37 | * layout in the case of xsaveopt. While presenting the xstate information to | ||
| 38 | * the user, we always ensure that the memory layout of a feature will be in | ||
| 39 | * the init state if the corresponding header bit is zero. This is to ensure | ||
| 40 | * that the user doesn't see some stale state in the memory layout during | ||
| 41 | * signal handling, debugging etc. | ||
| 42 | */ | ||
| 43 | void __sanitize_i387_state(struct task_struct *tsk) | ||
| 44 | { | ||
| 45 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; | ||
| 46 | int feature_bit = 0x2; | ||
| 47 | u64 xstate_bv; | ||
| 48 | |||
| 49 | if (!fx) | ||
| 50 | return; | ||
| 51 | |||
| 52 | xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; | ||
| 53 | |||
| 54 | /* | ||
| 55 | * None of the feature bits are in init state. So nothing else | ||
| 56 | * to do for us, as the memory layout is up to date. | ||
| 57 | */ | ||
| 58 | if ((xstate_bv & pcntxt_mask) == pcntxt_mask) | ||
| 59 | return; | ||
| 60 | |||
| 61 | /* | ||
| 62 | * FP is in init state | ||
| 63 | */ | ||
| 64 | if (!(xstate_bv & XSTATE_FP)) { | ||
| 65 | fx->cwd = 0x37f; | ||
| 66 | fx->swd = 0; | ||
| 67 | fx->twd = 0; | ||
| 68 | fx->fop = 0; | ||
| 69 | fx->rip = 0; | ||
| 70 | fx->rdp = 0; | ||
| 71 | memset(&fx->st_space[0], 0, 128); | ||
| 72 | } | ||
| 73 | |||
| 74 | /* | ||
| 75 | * SSE is in init state | ||
| 76 | */ | ||
| 77 | if (!(xstate_bv & XSTATE_SSE)) | ||
| 78 | memset(&fx->xmm_space[0], 0, 256); | ||
| 79 | |||
| 80 | xstate_bv = (pcntxt_mask & ~xstate_bv) >> 2; | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Update all the other memory layouts for which the corresponding | ||
| 84 | * header bit is in the init state. | ||
| 85 | */ | ||
| 86 | while (xstate_bv) { | ||
| 87 | if (xstate_bv & 0x1) { | ||
| 88 | int offset = xstate_offsets[feature_bit]; | ||
| 89 | int size = xstate_sizes[feature_bit]; | ||
| 90 | |||
| 91 | memcpy(((void *) fx) + offset, | ||
| 92 | ((void *) init_xstate_buf) + offset, | ||
| 93 | size); | ||
| 94 | } | ||
| 95 | |||
| 96 | xstate_bv >>= 1; | ||
| 97 | feature_bit++; | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | /* | ||
| 102 | * Check for the presence of extended state information in the | ||
| 103 | * user fpstate pointer in the sigcontext. | ||
| 104 | */ | ||
| 105 | static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, | ||
| 106 | void __user *fpstate, | ||
| 107 | struct _fpx_sw_bytes *fx_sw) | ||
| 108 | { | ||
| 109 | int min_xstate_size = sizeof(struct i387_fxsave_struct) + | ||
| 110 | sizeof(struct xsave_hdr_struct); | ||
| 111 | unsigned int magic2; | ||
| 112 | |||
| 113 | if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) | ||
| 114 | return -1; | ||
| 115 | |||
| 116 | /* Check for the first magic field and other error scenarios. */ | ||
| 117 | if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || | ||
| 118 | fx_sw->xstate_size < min_xstate_size || | ||
| 119 | fx_sw->xstate_size > xstate_size || | ||
| 120 | fx_sw->xstate_size > fx_sw->extended_size) | ||
| 121 | return -1; | ||
| 122 | |||
| 123 | /* | ||
| 124 | * Check for the presence of second magic word at the end of memory | ||
| 125 | * layout. This detects the case where the user just copied the legacy | ||
| 126 | * fpstate layout with out copying the extended state information | ||
| 127 | * in the memory layout. | ||
| 128 | */ | ||
| 129 | if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) | ||
| 130 | || magic2 != FP_XSTATE_MAGIC2) | ||
| 131 | return -1; | ||
| 132 | |||
| 133 | return 0; | ||
| 134 | } | ||
| 135 | |||
| 136 | /* | ||
| 137 | * Signal frame handlers. | ||
| 138 | */ | ||
| 139 | static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) | ||
| 140 | { | ||
| 141 | if (use_fxsr()) { | ||
| 142 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; | ||
| 143 | struct user_i387_ia32_struct env; | ||
| 144 | struct _fpstate_ia32 __user *fp = buf; | ||
| 145 | |||
| 146 | convert_from_fxsr(&env, tsk); | ||
| 147 | |||
| 148 | if (__copy_to_user(buf, &env, sizeof(env)) || | ||
| 149 | __put_user(xsave->i387.swd, &fp->status) || | ||
| 150 | __put_user(X86_FXSR_MAGIC, &fp->magic)) | ||
| 151 | return -1; | ||
| 152 | } else { | ||
| 153 | struct i387_fsave_struct __user *fp = buf; | ||
| 154 | u32 swd; | ||
| 155 | if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) | ||
| 156 | return -1; | ||
| 157 | } | ||
| 158 | |||
| 159 | return 0; | ||
| 160 | } | ||
| 161 | |||
| 162 | static inline int save_xstate_epilog(void __user *buf, int ia32_frame) | ||
| 163 | { | ||
| 164 | struct xsave_struct __user *x = buf; | ||
| 165 | struct _fpx_sw_bytes *sw_bytes; | ||
| 166 | u32 xstate_bv; | ||
| 167 | int err; | ||
| 168 | |||
| 169 | /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ | ||
| 170 | sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; | ||
| 171 | err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); | ||
| 172 | |||
| 173 | if (!use_xsave()) | ||
| 174 | return err; | ||
| 175 | |||
| 176 | err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); | ||
| 177 | |||
| 178 | /* | ||
| 179 | * Read the xstate_bv which we copied (directly from the cpu or | ||
| 180 | * from the state in task struct) to the user buffers. | ||
| 181 | */ | ||
| 182 | err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); | ||
| 183 | |||
| 184 | /* | ||
| 185 | * For legacy compatible, we always set FP/SSE bits in the bit | ||
| 186 | * vector while saving the state to the user context. This will | ||
| 187 | * enable us capturing any changes(during sigreturn) to | ||
| 188 | * the FP/SSE bits by the legacy applications which don't touch | ||
| 189 | * xstate_bv in the xsave header. | ||
| 190 | * | ||
| 191 | * xsave aware apps can change the xstate_bv in the xsave | ||
| 192 | * header as well as change any contents in the memory layout. | ||
| 193 | * xrestore as part of sigreturn will capture all the changes. | ||
| 194 | */ | ||
| 195 | xstate_bv |= XSTATE_FPSSE; | ||
| 196 | |||
| 197 | err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); | ||
| 198 | |||
| 199 | return err; | ||
| 200 | } | ||
| 201 | |||
| 202 | static inline int save_user_xstate(struct xsave_struct __user *buf) | ||
| 203 | { | ||
| 204 | int err; | ||
| 205 | |||
| 206 | if (use_xsave()) | ||
| 207 | err = xsave_user(buf); | ||
| 208 | else if (use_fxsr()) | ||
| 209 | err = fxsave_user((struct i387_fxsave_struct __user *) buf); | ||
| 210 | else | ||
| 211 | err = fsave_user((struct i387_fsave_struct __user *) buf); | ||
| 212 | |||
| 213 | if (unlikely(err) && __clear_user(buf, xstate_size)) | ||
| 214 | err = -EFAULT; | ||
| 215 | return err; | ||
| 216 | } | ||
| 217 | |||
| 218 | /* | ||
| 219 | * Save the fpu, extended register state to the user signal frame. | ||
| 220 | * | ||
| 221 | * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save | ||
| 222 | * state is copied. | ||
| 223 | * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. | ||
| 224 | * | ||
| 225 | * buf == buf_fx for 64-bit frames and 32-bit fsave frame. | ||
| 226 | * buf != buf_fx for 32-bit frames with fxstate. | ||
| 227 | * | ||
| 228 | * If the fpu, extended register state is live, save the state directly | ||
| 229 | * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, | ||
| 230 | * copy the thread's fpu state to the user frame starting at 'buf_fx'. | ||
| 231 | * | ||
| 232 | * If this is a 32-bit frame with fxstate, put a fsave header before | ||
| 233 | * the aligned state at 'buf_fx'. | ||
| 234 | * | ||
| 235 | * For [f]xsave state, update the SW reserved fields in the [f]xsave frame | ||
| 236 | * indicating the absence/presence of the extended state to the user. | ||
| 237 | */ | ||
| 238 | int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) | ||
| 239 | { | ||
| 240 | struct xsave_struct *xsave = ¤t->thread.fpu.state->xsave; | ||
| 241 | struct task_struct *tsk = current; | ||
| 242 | int ia32_fxstate = (buf != buf_fx); | ||
| 243 | |||
| 244 | ia32_fxstate &= (config_enabled(CONFIG_X86_32) || | ||
| 245 | config_enabled(CONFIG_IA32_EMULATION)); | ||
| 246 | |||
| 247 | if (!access_ok(VERIFY_WRITE, buf, size)) | ||
| 248 | return -EACCES; | ||
| 249 | |||
| 250 | if (!static_cpu_has(X86_FEATURE_FPU)) | ||
| 251 | return fpregs_soft_get(current, NULL, 0, | ||
| 252 | sizeof(struct user_i387_ia32_struct), NULL, | ||
| 253 | (struct _fpstate_ia32 __user *) buf) ? -1 : 1; | ||
| 254 | |||
| 255 | if (user_has_fpu()) { | ||
| 256 | /* Save the live register state to the user directly. */ | ||
| 257 | if (save_user_xstate(buf_fx)) | ||
| 258 | return -1; | ||
| 259 | /* Update the thread's fxstate to save the fsave header. */ | ||
| 260 | if (ia32_fxstate) | ||
| 261 | fpu_fxsave(&tsk->thread.fpu); | ||
| 262 | } else { | ||
| 263 | sanitize_i387_state(tsk); | ||
| 264 | if (__copy_to_user(buf_fx, xsave, xstate_size)) | ||
| 265 | return -1; | ||
| 266 | } | ||
| 267 | |||
| 268 | /* Save the fsave header for the 32-bit frames. */ | ||
| 269 | if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) | ||
| 270 | return -1; | ||
| 271 | |||
| 272 | if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) | ||
| 273 | return -1; | ||
| 274 | |||
| 275 | return 0; | ||
| 276 | } | ||
| 277 | |||
| 278 | static inline void | ||
| 279 | sanitize_restored_xstate(struct task_struct *tsk, | ||
| 280 | struct user_i387_ia32_struct *ia32_env, | ||
| 281 | u64 xstate_bv, int fx_only) | ||
| 282 | { | ||
| 283 | struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; | ||
| 284 | struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr; | ||
| 285 | |||
| 286 | if (use_xsave()) { | ||
| 287 | /* These bits must be zero. */ | ||
| 288 | memset(xsave_hdr->reserved, 0, 48); | ||
| 289 | |||
| 290 | /* | ||
| 291 | * Init the state that is not present in the memory | ||
| 292 | * layout and not enabled by the OS. | ||
| 293 | */ | ||
| 294 | if (fx_only) | ||
| 295 | xsave_hdr->xstate_bv = XSTATE_FPSSE; | ||
| 296 | else | ||
| 297 | xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv); | ||
| 298 | } | ||
| 299 | |||
| 300 | if (use_fxsr()) { | ||
| 301 | /* | ||
| 302 | * mscsr reserved bits must be masked to zero for security | ||
| 303 | * reasons. | ||
| 304 | */ | ||
| 305 | xsave->i387.mxcsr &= mxcsr_feature_mask; | ||
| 306 | |||
| 307 | convert_to_fxsr(tsk, ia32_env); | ||
| 308 | } | ||
| 309 | } | ||
| 310 | |||
| 311 | /* | ||
| 312 | * Restore the extended state if present. Otherwise, restore the FP/SSE state. | ||
| 313 | */ | ||
| 314 | static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) | ||
| 315 | { | ||
| 316 | if (use_xsave()) { | ||
| 317 | if ((unsigned long)buf % 64 || fx_only) { | ||
| 318 | u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE; | ||
| 319 | xrstor_state(init_xstate_buf, init_bv); | ||
| 320 | return fxrstor_user(buf); | ||
| 321 | } else { | ||
| 322 | u64 init_bv = pcntxt_mask & ~xbv; | ||
| 323 | if (unlikely(init_bv)) | ||
| 324 | xrstor_state(init_xstate_buf, init_bv); | ||
| 325 | return xrestore_user(buf, xbv); | ||
| 326 | } | ||
| 327 | } else if (use_fxsr()) { | ||
| 328 | return fxrstor_user(buf); | ||
| 329 | } else | ||
| 330 | return frstor_user(buf); | ||
| 331 | } | ||
| 332 | |||
| 333 | int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) | ||
| 334 | { | ||
| 335 | int ia32_fxstate = (buf != buf_fx); | ||
| 336 | struct task_struct *tsk = current; | ||
| 337 | int state_size = xstate_size; | ||
| 338 | u64 xstate_bv = 0; | ||
| 339 | int fx_only = 0; | ||
| 340 | |||
| 341 | ia32_fxstate &= (config_enabled(CONFIG_X86_32) || | ||
| 342 | config_enabled(CONFIG_IA32_EMULATION)); | ||
| 343 | |||
| 344 | if (!buf) { | ||
| 345 | fpu_reset_state(tsk); | ||
| 346 | return 0; | ||
| 347 | } | ||
| 348 | |||
| 349 | if (!access_ok(VERIFY_READ, buf, size)) | ||
| 350 | return -EACCES; | ||
| 351 | |||
| 352 | if (!used_math() && init_fpu(tsk)) | ||
| 353 | return -1; | ||
| 354 | |||
| 355 | if (!static_cpu_has(X86_FEATURE_FPU)) | ||
| 356 | return fpregs_soft_set(current, NULL, | ||
| 357 | 0, sizeof(struct user_i387_ia32_struct), | ||
| 358 | NULL, buf) != 0; | ||
| 359 | |||
| 360 | if (use_xsave()) { | ||
| 361 | struct _fpx_sw_bytes fx_sw_user; | ||
| 362 | if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { | ||
| 363 | /* | ||
| 364 | * Couldn't find the extended state information in the | ||
| 365 | * memory layout. Restore just the FP/SSE and init all | ||
| 366 | * the other extended state. | ||
| 367 | */ | ||
| 368 | state_size = sizeof(struct i387_fxsave_struct); | ||
| 369 | fx_only = 1; | ||
| 370 | } else { | ||
| 371 | state_size = fx_sw_user.xstate_size; | ||
| 372 | xstate_bv = fx_sw_user.xstate_bv; | ||
| 373 | } | ||
| 374 | } | ||
| 375 | |||
| 376 | if (ia32_fxstate) { | ||
| 377 | /* | ||
| 378 | * For 32-bit frames with fxstate, copy the user state to the | ||
| 379 | * thread's fpu state, reconstruct fxstate from the fsave | ||
| 380 | * header. Sanitize the copied state etc. | ||
| 381 | */ | ||
| 382 | struct fpu *fpu = &tsk->thread.fpu; | ||
| 383 | struct user_i387_ia32_struct env; | ||
| 384 | int err = 0; | ||
| 385 | |||
| 386 | /* | ||
| 387 | * Drop the current fpu which clears used_math(). This ensures | ||
| 388 | * that any context-switch during the copy of the new state, | ||
| 389 | * avoids the intermediate state from getting restored/saved. | ||
| 390 | * Thus avoiding the new restored state from getting corrupted. | ||
| 391 | * We will be ready to restore/save the state only after | ||
| 392 | * set_used_math() is again set. | ||
| 393 | */ | ||
| 394 | drop_fpu(tsk); | ||
| 395 | |||
| 396 | if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) || | ||
| 397 | __copy_from_user(&env, buf, sizeof(env))) { | ||
| 398 | fpu_finit(fpu); | ||
| 399 | err = -1; | ||
| 400 | } else { | ||
| 401 | sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); | ||
| 402 | } | ||
| 403 | |||
| 404 | set_used_math(); | ||
| 405 | if (use_eager_fpu()) { | ||
| 406 | preempt_disable(); | ||
| 407 | math_state_restore(); | ||
| 408 | preempt_enable(); | ||
| 409 | } | ||
| 410 | |||
| 411 | return err; | ||
| 412 | } else { | ||
| 413 | /* | ||
| 414 | * For 64-bit frames and 32-bit fsave frames, restore the user | ||
| 415 | * state to the registers directly (with exceptions handled). | ||
| 416 | */ | ||
| 417 | user_fpu_begin(); | ||
| 418 | if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { | ||
| 419 | fpu_reset_state(tsk); | ||
| 420 | return -1; | ||
| 421 | } | ||
| 422 | } | ||
| 423 | |||
| 424 | return 0; | ||
| 425 | } | ||
| 426 | |||
| 427 | /* | ||
| 428 | * Prepare the SW reserved portion of the fxsave memory layout, indicating | ||
| 429 | * the presence of the extended state information in the memory layout | ||
| 430 | * pointed by the fpstate pointer in the sigcontext. | ||
| 431 | * This will be saved when ever the FP and extended state context is | ||
| 432 | * saved on the user stack during the signal handler delivery to the user. | ||
| 433 | */ | ||
| 434 | static void prepare_fx_sw_frame(void) | ||
| 435 | { | ||
| 436 | int fsave_header_size = sizeof(struct i387_fsave_struct); | ||
| 437 | int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; | ||
| 438 | |||
| 439 | if (config_enabled(CONFIG_X86_32)) | ||
| 440 | size += fsave_header_size; | ||
| 441 | |||
| 442 | fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; | ||
| 443 | fx_sw_reserved.extended_size = size; | ||
| 444 | fx_sw_reserved.xstate_bv = pcntxt_mask; | ||
| 445 | fx_sw_reserved.xstate_size = xstate_size; | ||
| 446 | |||
| 447 | if (config_enabled(CONFIG_IA32_EMULATION)) { | ||
| 448 | fx_sw_reserved_ia32 = fx_sw_reserved; | ||
| 449 | fx_sw_reserved_ia32.extended_size += fsave_header_size; | ||
| 450 | } | ||
| 451 | } | ||
| 452 | |||
| 453 | /* | ||
| 454 | * Enable the extended processor state save/restore feature | ||
| 455 | */ | ||
| 456 | static inline void xstate_enable(void) | ||
| 457 | { | ||
| 458 | cr4_set_bits(X86_CR4_OSXSAVE); | ||
| 459 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); | ||
| 460 | } | ||
| 461 | |||
| 462 | /* | ||
| 463 | * Record the offsets and sizes of different state managed by the xsave | ||
| 464 | * memory layout. | ||
| 465 | */ | ||
| 466 | static void __init setup_xstate_features(void) | ||
| 467 | { | ||
| 468 | int eax, ebx, ecx, edx, leaf = 0x2; | ||
| 469 | |||
| 470 | xstate_features = fls64(pcntxt_mask); | ||
| 471 | xstate_offsets = alloc_bootmem(xstate_features * sizeof(int)); | ||
| 472 | xstate_sizes = alloc_bootmem(xstate_features * sizeof(int)); | ||
| 473 | |||
| 474 | do { | ||
| 475 | cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); | ||
| 476 | |||
| 477 | if (eax == 0) | ||
| 478 | break; | ||
| 479 | |||
| 480 | xstate_offsets[leaf] = ebx; | ||
| 481 | xstate_sizes[leaf] = eax; | ||
| 482 | |||
| 483 | leaf++; | ||
| 484 | } while (1); | ||
| 485 | } | ||
| 486 | |||
| 487 | /* | ||
| 488 | * This function sets up offsets and sizes of all extended states in | ||
| 489 | * xsave area. This supports both standard format and compacted format | ||
| 490 | * of the xsave aread. | ||
| 491 | * | ||
| 492 | * Input: void | ||
| 493 | * Output: void | ||
| 494 | */ | ||
| 495 | void setup_xstate_comp(void) | ||
| 496 | { | ||
| 497 | unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8]; | ||
| 498 | int i; | ||
| 499 | |||
| 500 | /* | ||
| 501 | * The FP xstates and SSE xstates are legacy states. They are always | ||
| 502 | * in the fixed offsets in the xsave area in either compacted form | ||
| 503 | * or standard form. | ||
| 504 | */ | ||
| 505 | xstate_comp_offsets[0] = 0; | ||
| 506 | xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space); | ||
| 507 | |||
| 508 | if (!cpu_has_xsaves) { | ||
| 509 | for (i = 2; i < xstate_features; i++) { | ||
| 510 | if (test_bit(i, (unsigned long *)&pcntxt_mask)) { | ||
| 511 | xstate_comp_offsets[i] = xstate_offsets[i]; | ||
| 512 | xstate_comp_sizes[i] = xstate_sizes[i]; | ||
| 513 | } | ||
| 514 | } | ||
| 515 | return; | ||
| 516 | } | ||
| 517 | |||
| 518 | xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; | ||
| 519 | |||
| 520 | for (i = 2; i < xstate_features; i++) { | ||
| 521 | if (test_bit(i, (unsigned long *)&pcntxt_mask)) | ||
| 522 | xstate_comp_sizes[i] = xstate_sizes[i]; | ||
| 523 | else | ||
| 524 | xstate_comp_sizes[i] = 0; | ||
| 525 | |||
| 526 | if (i > 2) | ||
| 527 | xstate_comp_offsets[i] = xstate_comp_offsets[i-1] | ||
| 528 | + xstate_comp_sizes[i-1]; | ||
| 529 | |||
| 530 | } | ||
| 531 | } | ||
| 532 | |||
| 533 | /* | ||
| 534 | * setup the xstate image representing the init state | ||
| 535 | */ | ||
| 536 | static void __init setup_init_fpu_buf(void) | ||
| 537 | { | ||
| 538 | /* | ||
| 539 | * Setup init_xstate_buf to represent the init state of | ||
| 540 | * all the features managed by the xsave | ||
| 541 | */ | ||
| 542 | init_xstate_buf = alloc_bootmem_align(xstate_size, | ||
| 543 | __alignof__(struct xsave_struct)); | ||
| 544 | fx_finit(&init_xstate_buf->i387); | ||
| 545 | |||
| 546 | if (!cpu_has_xsave) | ||
| 547 | return; | ||
| 548 | |||
| 549 | setup_xstate_features(); | ||
| 550 | |||
| 551 | if (cpu_has_xsaves) { | ||
| 552 | init_xstate_buf->xsave_hdr.xcomp_bv = | ||
| 553 | (u64)1 << 63 | pcntxt_mask; | ||
| 554 | init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask; | ||
| 555 | } | ||
| 556 | |||
| 557 | /* | ||
| 558 | * Init all the features state with header_bv being 0x0 | ||
| 559 | */ | ||
| 560 | xrstor_state_booting(init_xstate_buf, -1); | ||
| 561 | /* | ||
| 562 | * Dump the init state again. This is to identify the init state | ||
| 563 | * of any feature which is not represented by all zero's. | ||
| 564 | */ | ||
| 565 | xsave_state_booting(init_xstate_buf, -1); | ||
| 566 | } | ||
| 567 | |||
| 568 | static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; | ||
| 569 | static int __init eager_fpu_setup(char *s) | ||
| 570 | { | ||
| 571 | if (!strcmp(s, "on")) | ||
| 572 | eagerfpu = ENABLE; | ||
| 573 | else if (!strcmp(s, "off")) | ||
| 574 | eagerfpu = DISABLE; | ||
| 575 | else if (!strcmp(s, "auto")) | ||
| 576 | eagerfpu = AUTO; | ||
| 577 | return 1; | ||
| 578 | } | ||
| 579 | __setup("eagerfpu=", eager_fpu_setup); | ||
| 580 | |||
| 581 | |||
| 582 | /* | ||
| 583 | * Calculate total size of enabled xstates in XCR0/pcntxt_mask. | ||
| 584 | */ | ||
| 585 | static void __init init_xstate_size(void) | ||
| 586 | { | ||
| 587 | unsigned int eax, ebx, ecx, edx; | ||
| 588 | int i; | ||
| 589 | |||
| 590 | if (!cpu_has_xsaves) { | ||
| 591 | cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); | ||
| 592 | xstate_size = ebx; | ||
| 593 | return; | ||
| 594 | } | ||
| 595 | |||
| 596 | xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; | ||
| 597 | for (i = 2; i < 64; i++) { | ||
| 598 | if (test_bit(i, (unsigned long *)&pcntxt_mask)) { | ||
| 599 | cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); | ||
| 600 | xstate_size += eax; | ||
| 601 | } | ||
| 602 | } | ||
| 603 | } | ||
| 604 | |||
| 605 | /* | ||
| 606 | * Enable and initialize the xsave feature. | ||
| 607 | */ | ||
| 608 | static void __init xstate_enable_boot_cpu(void) | ||
| 609 | { | ||
| 610 | unsigned int eax, ebx, ecx, edx; | ||
| 611 | |||
| 612 | if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { | ||
| 613 | WARN(1, KERN_ERR "XSTATE_CPUID missing\n"); | ||
| 614 | return; | ||
| 615 | } | ||
| 616 | |||
| 617 | cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); | ||
| 618 | pcntxt_mask = eax + ((u64)edx << 32); | ||
| 619 | |||
| 620 | if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { | ||
| 621 | pr_err("FP/SSE not shown under xsave features 0x%llx\n", | ||
| 622 | pcntxt_mask); | ||
| 623 | BUG(); | ||
| 624 | } | ||
| 625 | |||
| 626 | /* | ||
| 627 | * Support only the state known to OS. | ||
| 628 | */ | ||
| 629 | pcntxt_mask = pcntxt_mask & XCNTXT_MASK; | ||
| 630 | |||
| 631 | xstate_enable(); | ||
| 632 | |||
| 633 | /* | ||
| 634 | * Recompute the context size for enabled features | ||
| 635 | */ | ||
| 636 | init_xstate_size(); | ||
| 637 | |||
| 638 | update_regset_xstate_info(xstate_size, pcntxt_mask); | ||
| 639 | prepare_fx_sw_frame(); | ||
| 640 | setup_init_fpu_buf(); | ||
| 641 | |||
| 642 | /* Auto enable eagerfpu for xsaveopt */ | ||
| 643 | if (cpu_has_xsaveopt && eagerfpu != DISABLE) | ||
| 644 | eagerfpu = ENABLE; | ||
| 645 | |||
| 646 | if (pcntxt_mask & XSTATE_EAGER) { | ||
| 647 | if (eagerfpu == DISABLE) { | ||
| 648 | pr_err("eagerfpu not present, disabling some xstate features: 0x%llx\n", | ||
| 649 | pcntxt_mask & XSTATE_EAGER); | ||
| 650 | pcntxt_mask &= ~XSTATE_EAGER; | ||
| 651 | } else { | ||
| 652 | eagerfpu = ENABLE; | ||
| 653 | } | ||
| 654 | } | ||
| 655 | |||
| 656 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n", | ||
| 657 | pcntxt_mask, xstate_size, | ||
| 658 | cpu_has_xsaves ? "compacted form" : "standard form"); | ||
| 659 | } | ||
| 660 | |||
| 661 | /* | ||
| 662 | * For the very first instance, this calls xstate_enable_boot_cpu(); | ||
| 663 | * for all subsequent instances, this calls xstate_enable(). | ||
| 664 | * | ||
| 665 | * This is somewhat obfuscated due to the lack of powerful enough | ||
| 666 | * overrides for the section checks. | ||
| 667 | */ | ||
| 668 | void xsave_init(void) | ||
| 669 | { | ||
| 670 | static __refdata void (*next_func)(void) = xstate_enable_boot_cpu; | ||
| 671 | void (*this_func)(void); | ||
| 672 | |||
| 673 | if (!cpu_has_xsave) | ||
| 674 | return; | ||
| 675 | |||
| 676 | this_func = next_func; | ||
| 677 | next_func = xstate_enable; | ||
| 678 | this_func(); | ||
| 679 | } | ||
| 680 | |||
| 681 | /* | ||
| 682 | * setup_init_fpu_buf() is __init and it is OK to call it here because | ||
| 683 | * init_xstate_buf will be unset only once during boot. | ||
| 684 | */ | ||
| 685 | void __init_refok eager_fpu_init(void) | ||
| 686 | { | ||
| 687 | WARN_ON(used_math()); | ||
| 688 | current_thread_info()->status = 0; | ||
| 689 | |||
| 690 | if (eagerfpu == ENABLE) | ||
| 691 | setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); | ||
| 692 | |||
| 693 | if (!cpu_has_eager_fpu) { | ||
| 694 | stts(); | ||
| 695 | return; | ||
| 696 | } | ||
| 697 | |||
| 698 | if (!init_xstate_buf) | ||
| 699 | setup_init_fpu_buf(); | ||
| 700 | } | ||
| 701 | |||
| 702 | /* | ||
| 703 | * Given the xsave area and a state inside, this function returns the | ||
| 704 | * address of the state. | ||
| 705 | * | ||
| 706 | * This is the API that is called to get xstate address in either | ||
| 707 | * standard format or compacted format of xsave area. | ||
| 708 | * | ||
| 709 | * Inputs: | ||
| 710 | * xsave: base address of the xsave area; | ||
| 711 | * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE, | ||
| 712 | * etc.) | ||
| 713 | * Output: | ||
| 714 | * address of the state in the xsave area. | ||
| 715 | */ | ||
| 716 | void *get_xsave_addr(struct xsave_struct *xsave, int xstate) | ||
| 717 | { | ||
| 718 | int feature = fls64(xstate) - 1; | ||
| 719 | if (!test_bit(feature, (unsigned long *)&pcntxt_mask)) | ||
| 720 | return NULL; | ||
| 721 | |||
| 722 | return (void *)xsave + xstate_comp_offsets[feature]; | ||
| 723 | } | ||
| 724 | EXPORT_SYMBOL_GPL(get_xsave_addr); | ||
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 1d08ad3582d0..9f705e618af5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
| @@ -16,10 +16,8 @@ | |||
| 16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
| 17 | #include <linux/vmalloc.h> | 17 | #include <linux/vmalloc.h> |
| 18 | #include <linux/uaccess.h> | 18 | #include <linux/uaccess.h> |
| 19 | #include <asm/i387.h> /* For use_eager_fpu. Ugh! */ | ||
| 20 | #include <asm/fpu-internal.h> /* For use_eager_fpu. Ugh! */ | ||
| 21 | #include <asm/user.h> | 19 | #include <asm/user.h> |
| 22 | #include <asm/xsave.h> | 20 | #include <asm/fpu/xstate.h> |
| 23 | #include "cpuid.h" | 21 | #include "cpuid.h" |
| 24 | #include "lapic.h" | 22 | #include "lapic.h" |
| 25 | #include "mmu.h" | 23 | #include "mmu.h" |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2d73807f0d31..e11dd59398f1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -40,8 +40,7 @@ | |||
| 40 | #include <asm/vmx.h> | 40 | #include <asm/vmx.h> |
| 41 | #include <asm/virtext.h> | 41 | #include <asm/virtext.h> |
| 42 | #include <asm/mce.h> | 42 | #include <asm/mce.h> |
| 43 | #include <asm/i387.h> | 43 | #include <asm/fpu/internal.h> |
| 44 | #include <asm/xcr.h> | ||
| 45 | #include <asm/perf_event.h> | 44 | #include <asm/perf_event.h> |
| 46 | #include <asm/debugreg.h> | 45 | #include <asm/debugreg.h> |
| 47 | #include <asm/kexec.h> | 46 | #include <asm/kexec.h> |
| @@ -1883,7 +1882,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
| 1883 | * If the FPU is not active (through the host task or | 1882 | * If the FPU is not active (through the host task or |
| 1884 | * the guest vcpu), then restore the cr0.TS bit. | 1883 | * the guest vcpu), then restore the cr0.TS bit. |
| 1885 | */ | 1884 | */ |
| 1886 | if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded) | 1885 | if (!fpregs_active() && !vmx->vcpu.guest_fpu_loaded) |
| 1887 | stts(); | 1886 | stts(); |
| 1888 | load_gdt(this_cpu_ptr(&host_gdt)); | 1887 | load_gdt(this_cpu_ptr(&host_gdt)); |
| 1889 | } | 1888 | } |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ea306adbbc13..26eaeb522cab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -59,9 +59,8 @@ | |||
| 59 | #include <asm/desc.h> | 59 | #include <asm/desc.h> |
| 60 | #include <asm/mtrr.h> | 60 | #include <asm/mtrr.h> |
| 61 | #include <asm/mce.h> | 61 | #include <asm/mce.h> |
| 62 | #include <asm/i387.h> | 62 | #include <linux/kernel_stat.h> |
| 63 | #include <asm/fpu-internal.h> /* Ugh! */ | 63 | #include <asm/fpu/internal.h> /* Ugh! */ |
| 64 | #include <asm/xcr.h> | ||
| 65 | #include <asm/pvclock.h> | 64 | #include <asm/pvclock.h> |
| 66 | #include <asm/div64.h> | 65 | #include <asm/div64.h> |
| 67 | 66 | ||
| @@ -3194,8 +3193,8 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | |||
| 3194 | 3193 | ||
| 3195 | static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) | 3194 | static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) |
| 3196 | { | 3195 | { |
| 3197 | struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave; | 3196 | struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave; |
| 3198 | u64 xstate_bv = xsave->xsave_hdr.xstate_bv; | 3197 | u64 xstate_bv = xsave->header.xfeatures; |
| 3199 | u64 valid; | 3198 | u64 valid; |
| 3200 | 3199 | ||
| 3201 | /* | 3200 | /* |
| @@ -3230,7 +3229,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) | |||
| 3230 | 3229 | ||
| 3231 | static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) | 3230 | static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) |
| 3232 | { | 3231 | { |
| 3233 | struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave; | 3232 | struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave; |
| 3234 | u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET); | 3233 | u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET); |
| 3235 | u64 valid; | 3234 | u64 valid; |
| 3236 | 3235 | ||
| @@ -3241,9 +3240,9 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) | |||
| 3241 | memcpy(xsave, src, XSAVE_HDR_OFFSET); | 3240 | memcpy(xsave, src, XSAVE_HDR_OFFSET); |
| 3242 | 3241 | ||
| 3243 | /* Set XSTATE_BV and possibly XCOMP_BV. */ | 3242 | /* Set XSTATE_BV and possibly XCOMP_BV. */ |
| 3244 | xsave->xsave_hdr.xstate_bv = xstate_bv; | 3243 | xsave->header.xfeatures = xstate_bv; |
| 3245 | if (cpu_has_xsaves) | 3244 | if (cpu_has_xsaves) |
| 3246 | xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; | 3245 | xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; |
| 3247 | 3246 | ||
| 3248 | /* | 3247 | /* |
| 3249 | * Copy each region from the non-compacted offset to the | 3248 | * Copy each region from the non-compacted offset to the |
| @@ -3275,8 +3274,8 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, | |||
| 3275 | fill_xsave((u8 *) guest_xsave->region, vcpu); | 3274 | fill_xsave((u8 *) guest_xsave->region, vcpu); |
| 3276 | } else { | 3275 | } else { |
| 3277 | memcpy(guest_xsave->region, | 3276 | memcpy(guest_xsave->region, |
| 3278 | &vcpu->arch.guest_fpu.state->fxsave, | 3277 | &vcpu->arch.guest_fpu.state.fxsave, |
| 3279 | sizeof(struct i387_fxsave_struct)); | 3278 | sizeof(struct fxregs_state)); |
| 3280 | *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = | 3279 | *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = |
| 3281 | XSTATE_FPSSE; | 3280 | XSTATE_FPSSE; |
| 3282 | } | 3281 | } |
| @@ -3300,8 +3299,8 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, | |||
| 3300 | } else { | 3299 | } else { |
| 3301 | if (xstate_bv & ~XSTATE_FPSSE) | 3300 | if (xstate_bv & ~XSTATE_FPSSE) |
| 3302 | return -EINVAL; | 3301 | return -EINVAL; |
| 3303 | memcpy(&vcpu->arch.guest_fpu.state->fxsave, | 3302 | memcpy(&vcpu->arch.guest_fpu.state.fxsave, |
| 3304 | guest_xsave->region, sizeof(struct i387_fxsave_struct)); | 3303 | guest_xsave->region, sizeof(struct fxregs_state)); |
| 3305 | } | 3304 | } |
| 3306 | return 0; | 3305 | return 0; |
| 3307 | } | 3306 | } |
| @@ -6597,11 +6596,11 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) | |||
| 6597 | 6596 | ||
| 6598 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 6597 | int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 6599 | { | 6598 | { |
| 6599 | struct fpu *fpu = ¤t->thread.fpu; | ||
| 6600 | int r; | 6600 | int r; |
| 6601 | sigset_t sigsaved; | 6601 | sigset_t sigsaved; |
| 6602 | 6602 | ||
| 6603 | if (!tsk_used_math(current) && init_fpu(current)) | 6603 | fpu__activate_curr(fpu); |
| 6604 | return -ENOMEM; | ||
| 6605 | 6604 | ||
| 6606 | if (vcpu->sigset_active) | 6605 | if (vcpu->sigset_active) |
| 6607 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 6606 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
| @@ -6971,8 +6970,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
| 6971 | 6970 | ||
| 6972 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 6971 | int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
| 6973 | { | 6972 | { |
| 6974 | struct i387_fxsave_struct *fxsave = | 6973 | struct fxregs_state *fxsave = |
| 6975 | &vcpu->arch.guest_fpu.state->fxsave; | 6974 | &vcpu->arch.guest_fpu.state.fxsave; |
| 6976 | 6975 | ||
| 6977 | memcpy(fpu->fpr, fxsave->st_space, 128); | 6976 | memcpy(fpu->fpr, fxsave->st_space, 128); |
| 6978 | fpu->fcw = fxsave->cwd; | 6977 | fpu->fcw = fxsave->cwd; |
| @@ -6988,8 +6987,8 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
| 6988 | 6987 | ||
| 6989 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | 6988 | int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) |
| 6990 | { | 6989 | { |
| 6991 | struct i387_fxsave_struct *fxsave = | 6990 | struct fxregs_state *fxsave = |
| 6992 | &vcpu->arch.guest_fpu.state->fxsave; | 6991 | &vcpu->arch.guest_fpu.state.fxsave; |
| 6993 | 6992 | ||
| 6994 | memcpy(fxsave->st_space, fpu->fpr, 128); | 6993 | memcpy(fxsave->st_space, fpu->fpr, 128); |
| 6995 | fxsave->cwd = fpu->fcw; | 6994 | fxsave->cwd = fpu->fcw; |
| @@ -7003,17 +7002,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) | |||
| 7003 | return 0; | 7002 | return 0; |
| 7004 | } | 7003 | } |
| 7005 | 7004 | ||
| 7006 | int fx_init(struct kvm_vcpu *vcpu) | 7005 | static void fx_init(struct kvm_vcpu *vcpu) |
| 7007 | { | 7006 | { |
| 7008 | int err; | 7007 | fpstate_init(&vcpu->arch.guest_fpu.state); |
| 7009 | |||
| 7010 | err = fpu_alloc(&vcpu->arch.guest_fpu); | ||
| 7011 | if (err) | ||
| 7012 | return err; | ||
| 7013 | |||
| 7014 | fpu_finit(&vcpu->arch.guest_fpu); | ||
| 7015 | if (cpu_has_xsaves) | 7008 | if (cpu_has_xsaves) |
| 7016 | vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv = | 7009 | vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv = |
| 7017 | host_xcr0 | XSTATE_COMPACTION_ENABLED; | 7010 | host_xcr0 | XSTATE_COMPACTION_ENABLED; |
| 7018 | 7011 | ||
| 7019 | /* | 7012 | /* |
| @@ -7022,14 +7015,6 @@ int fx_init(struct kvm_vcpu *vcpu) | |||
| 7022 | vcpu->arch.xcr0 = XSTATE_FP; | 7015 | vcpu->arch.xcr0 = XSTATE_FP; |
| 7023 | 7016 | ||
| 7024 | vcpu->arch.cr0 |= X86_CR0_ET; | 7017 | vcpu->arch.cr0 |= X86_CR0_ET; |
| 7025 | |||
| 7026 | return 0; | ||
| 7027 | } | ||
| 7028 | EXPORT_SYMBOL_GPL(fx_init); | ||
| 7029 | |||
| 7030 | static void fx_free(struct kvm_vcpu *vcpu) | ||
| 7031 | { | ||
| 7032 | fpu_free(&vcpu->arch.guest_fpu); | ||
| 7033 | } | 7018 | } |
| 7034 | 7019 | ||
| 7035 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) | 7020 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) |
| @@ -7045,7 +7030,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) | |||
| 7045 | kvm_put_guest_xcr0(vcpu); | 7030 | kvm_put_guest_xcr0(vcpu); |
| 7046 | vcpu->guest_fpu_loaded = 1; | 7031 | vcpu->guest_fpu_loaded = 1; |
| 7047 | __kernel_fpu_begin(); | 7032 | __kernel_fpu_begin(); |
| 7048 | fpu_restore_checking(&vcpu->arch.guest_fpu); | 7033 | __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state); |
| 7049 | trace_kvm_fpu(1); | 7034 | trace_kvm_fpu(1); |
| 7050 | } | 7035 | } |
| 7051 | 7036 | ||
| @@ -7057,7 +7042,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
| 7057 | return; | 7042 | return; |
| 7058 | 7043 | ||
| 7059 | vcpu->guest_fpu_loaded = 0; | 7044 | vcpu->guest_fpu_loaded = 0; |
| 7060 | fpu_save_init(&vcpu->arch.guest_fpu); | 7045 | copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); |
| 7061 | __kernel_fpu_end(); | 7046 | __kernel_fpu_end(); |
| 7062 | ++vcpu->stat.fpu_reload; | 7047 | ++vcpu->stat.fpu_reload; |
| 7063 | if (!vcpu->arch.eager_fpu) | 7048 | if (!vcpu->arch.eager_fpu) |
| @@ -7071,7 +7056,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | |||
| 7071 | kvmclock_reset(vcpu); | 7056 | kvmclock_reset(vcpu); |
| 7072 | 7057 | ||
| 7073 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); | 7058 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); |
| 7074 | fx_free(vcpu); | ||
| 7075 | kvm_x86_ops->vcpu_free(vcpu); | 7059 | kvm_x86_ops->vcpu_free(vcpu); |
| 7076 | } | 7060 | } |
| 7077 | 7061 | ||
| @@ -7137,7 +7121,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
| 7137 | kvm_mmu_unload(vcpu); | 7121 | kvm_mmu_unload(vcpu); |
| 7138 | vcpu_put(vcpu); | 7122 | vcpu_put(vcpu); |
| 7139 | 7123 | ||
| 7140 | fx_free(vcpu); | ||
| 7141 | kvm_x86_ops->vcpu_free(vcpu); | 7124 | kvm_x86_ops->vcpu_free(vcpu); |
| 7142 | } | 7125 | } |
| 7143 | 7126 | ||
| @@ -7363,9 +7346,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
| 7363 | goto fail_free_mce_banks; | 7346 | goto fail_free_mce_banks; |
| 7364 | } | 7347 | } |
| 7365 | 7348 | ||
| 7366 | r = fx_init(vcpu); | 7349 | fx_init(vcpu); |
| 7367 | if (r) | ||
| 7368 | goto fail_free_wbinvd_dirty_mask; | ||
| 7369 | 7350 | ||
| 7370 | vcpu->arch.ia32_tsc_adjust_msr = 0x0; | 7351 | vcpu->arch.ia32_tsc_adjust_msr = 0x0; |
| 7371 | vcpu->arch.pv_time_enabled = false; | 7352 | vcpu->arch.pv_time_enabled = false; |
| @@ -7379,8 +7360,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
| 7379 | kvm_pmu_init(vcpu); | 7360 | kvm_pmu_init(vcpu); |
| 7380 | 7361 | ||
| 7381 | return 0; | 7362 | return 0; |
| 7382 | fail_free_wbinvd_dirty_mask: | 7363 | |
| 7383 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); | ||
| 7384 | fail_free_mce_banks: | 7364 | fail_free_mce_banks: |
| 7385 | kfree(vcpu->arch.mce_banks); | 7365 | kfree(vcpu->arch.mce_banks); |
| 7386 | fail_free_lapic: | 7366 | fail_free_lapic: |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 8f9a133cc099..27f8eea0d6eb 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
| @@ -70,7 +70,7 @@ | |||
| 70 | #include <asm/e820.h> | 70 | #include <asm/e820.h> |
| 71 | #include <asm/mce.h> | 71 | #include <asm/mce.h> |
| 72 | #include <asm/io.h> | 72 | #include <asm/io.h> |
| 73 | #include <asm/i387.h> | 73 | #include <asm/fpu/api.h> |
| 74 | #include <asm/stackprotector.h> | 74 | #include <asm/stackprotector.h> |
| 75 | #include <asm/reboot.h> /* for struct machine_ops */ | 75 | #include <asm/reboot.h> /* for struct machine_ops */ |
| 76 | #include <asm/kvm_para.h> | 76 | #include <asm/kvm_para.h> |
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c index c9f2d9ba8dd8..e5e3ed8dc079 100644 --- a/arch/x86/lib/mmx_32.c +++ b/arch/x86/lib/mmx_32.c | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
| 23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
| 24 | 24 | ||
| 25 | #include <asm/i387.h> | 25 | #include <asm/fpu/api.h> |
| 26 | #include <asm/asm.h> | 26 | #include <asm/asm.h> |
| 27 | 27 | ||
| 28 | void *_mmx_memcpy(void *to, const void *from, size_t len) | 28 | void *_mmx_memcpy(void *to, const void *from, size_t len) |
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c index dc8adad10a2f..dd76a05729b0 100644 --- a/arch/x86/math-emu/fpu_aux.c +++ b/arch/x86/math-emu/fpu_aux.c | |||
| @@ -30,7 +30,7 @@ static void fclex(void) | |||
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | /* Needs to be externally visible */ | 32 | /* Needs to be externally visible */ |
| 33 | void finit_soft_fpu(struct i387_soft_struct *soft) | 33 | void fpstate_init_soft(struct swregs_state *soft) |
| 34 | { | 34 | { |
| 35 | struct address *oaddr, *iaddr; | 35 | struct address *oaddr, *iaddr; |
| 36 | memset(soft, 0, sizeof(*soft)); | 36 | memset(soft, 0, sizeof(*soft)); |
| @@ -52,7 +52,7 @@ void finit_soft_fpu(struct i387_soft_struct *soft) | |||
| 52 | 52 | ||
| 53 | void finit(void) | 53 | void finit(void) |
| 54 | { | 54 | { |
| 55 | finit_soft_fpu(¤t->thread.fpu.state->soft); | 55 | fpstate_init_soft(¤t->thread.fpu.state.soft); |
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | /* | 58 | /* |
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 9b868124128d..f37e84ab49f3 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c | |||
| @@ -31,7 +31,7 @@ | |||
| 31 | #include <asm/traps.h> | 31 | #include <asm/traps.h> |
| 32 | #include <asm/desc.h> | 32 | #include <asm/desc.h> |
| 33 | #include <asm/user.h> | 33 | #include <asm/user.h> |
| 34 | #include <asm/i387.h> | 34 | #include <asm/fpu/internal.h> |
| 35 | 35 | ||
| 36 | #include "fpu_system.h" | 36 | #include "fpu_system.h" |
| 37 | #include "fpu_emu.h" | 37 | #include "fpu_emu.h" |
| @@ -147,13 +147,9 @@ void math_emulate(struct math_emu_info *info) | |||
| 147 | unsigned long code_base = 0; | 147 | unsigned long code_base = 0; |
| 148 | unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ | 148 | unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ |
| 149 | struct desc_struct code_descriptor; | 149 | struct desc_struct code_descriptor; |
| 150 | struct fpu *fpu = ¤t->thread.fpu; | ||
| 150 | 151 | ||
| 151 | if (!used_math()) { | 152 | fpu__activate_curr(fpu); |
| 152 | if (init_fpu(current)) { | ||
| 153 | do_group_exit(SIGKILL); | ||
| 154 | return; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | 153 | ||
| 158 | #ifdef RE_ENTRANT_CHECKING | 154 | #ifdef RE_ENTRANT_CHECKING |
| 159 | if (emulating) { | 155 | if (emulating) { |
| @@ -673,7 +669,7 @@ void math_abort(struct math_emu_info *info, unsigned int signal) | |||
| 673 | #endif /* PARANOID */ | 669 | #endif /* PARANOID */ |
| 674 | } | 670 | } |
| 675 | 671 | ||
| 676 | #define S387 ((struct i387_soft_struct *)s387) | 672 | #define S387 ((struct swregs_state *)s387) |
| 677 | #define sstatus_word() \ | 673 | #define sstatus_word() \ |
| 678 | ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top)) | 674 | ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top)) |
| 679 | 675 | ||
| @@ -682,14 +678,14 @@ int fpregs_soft_set(struct task_struct *target, | |||
| 682 | unsigned int pos, unsigned int count, | 678 | unsigned int pos, unsigned int count, |
| 683 | const void *kbuf, const void __user *ubuf) | 679 | const void *kbuf, const void __user *ubuf) |
| 684 | { | 680 | { |
| 685 | struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; | 681 | struct swregs_state *s387 = &target->thread.fpu.state.soft; |
| 686 | void *space = s387->st_space; | 682 | void *space = s387->st_space; |
| 687 | int ret; | 683 | int ret; |
| 688 | int offset, other, i, tags, regnr, tag, newtop; | 684 | int offset, other, i, tags, regnr, tag, newtop; |
| 689 | 685 | ||
| 690 | RE_ENTRANT_CHECK_OFF; | 686 | RE_ENTRANT_CHECK_OFF; |
| 691 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, s387, 0, | 687 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, s387, 0, |
| 692 | offsetof(struct i387_soft_struct, st_space)); | 688 | offsetof(struct swregs_state, st_space)); |
| 693 | RE_ENTRANT_CHECK_ON; | 689 | RE_ENTRANT_CHECK_ON; |
| 694 | 690 | ||
| 695 | if (ret) | 691 | if (ret) |
| @@ -734,7 +730,7 @@ int fpregs_soft_get(struct task_struct *target, | |||
| 734 | unsigned int pos, unsigned int count, | 730 | unsigned int pos, unsigned int count, |
| 735 | void *kbuf, void __user *ubuf) | 731 | void *kbuf, void __user *ubuf) |
| 736 | { | 732 | { |
| 737 | struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; | 733 | struct swregs_state *s387 = &target->thread.fpu.state.soft; |
| 738 | const void *space = s387->st_space; | 734 | const void *space = s387->st_space; |
| 739 | int ret; | 735 | int ret; |
| 740 | int offset = (S387->ftop & 7) * 10, other = 80 - offset; | 736 | int offset = (S387->ftop & 7) * 10, other = 80 - offset; |
| @@ -752,7 +748,7 @@ int fpregs_soft_get(struct task_struct *target, | |||
| 752 | #endif /* PECULIAR_486 */ | 748 | #endif /* PECULIAR_486 */ |
| 753 | 749 | ||
| 754 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, s387, 0, | 750 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, s387, 0, |
| 755 | offsetof(struct i387_soft_struct, st_space)); | 751 | offsetof(struct swregs_state, st_space)); |
| 756 | 752 | ||
| 757 | /* Copy all registers in stack order. */ | 753 | /* Copy all registers in stack order. */ |
| 758 | if (!ret) | 754 | if (!ret) |
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 2c614410a5f3..9ccecb61a4fa 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h | |||
| @@ -31,7 +31,7 @@ | |||
| 31 | #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ | 31 | #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ |
| 32 | == (1 << 10)) | 32 | == (1 << 10)) |
| 33 | 33 | ||
| 34 | #define I387 (current->thread.fpu.state) | 34 | #define I387 (¤t->thread.fpu.state) |
| 35 | #define FPU_info (I387->soft.info) | 35 | #define FPU_info (I387->soft.info) |
| 36 | 36 | ||
| 37 | #define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) | 37 | #define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) |
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index c439ec478216..7a657f58bbea 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c | |||
| @@ -10,13 +10,15 @@ | |||
| 10 | #include <linux/syscalls.h> | 10 | #include <linux/syscalls.h> |
| 11 | #include <linux/sched/sysctl.h> | 11 | #include <linux/sched/sysctl.h> |
| 12 | 12 | ||
| 13 | #include <asm/i387.h> | ||
| 14 | #include <asm/insn.h> | 13 | #include <asm/insn.h> |
| 15 | #include <asm/mman.h> | 14 | #include <asm/mman.h> |
| 16 | #include <asm/mmu_context.h> | 15 | #include <asm/mmu_context.h> |
| 17 | #include <asm/mpx.h> | 16 | #include <asm/mpx.h> |
| 18 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
| 19 | #include <asm/fpu-internal.h> | 18 | #include <asm/fpu/internal.h> |
| 19 | |||
| 20 | #define CREATE_TRACE_POINTS | ||
| 21 | #include <asm/trace/mpx.h> | ||
| 20 | 22 | ||
| 21 | static const char *mpx_mapping_name(struct vm_area_struct *vma) | 23 | static const char *mpx_mapping_name(struct vm_area_struct *vma) |
| 22 | { | 24 | { |
| @@ -32,6 +34,22 @@ static int is_mpx_vma(struct vm_area_struct *vma) | |||
| 32 | return (vma->vm_ops == &mpx_vma_ops); | 34 | return (vma->vm_ops == &mpx_vma_ops); |
| 33 | } | 35 | } |
| 34 | 36 | ||
| 37 | static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm) | ||
| 38 | { | ||
| 39 | if (is_64bit_mm(mm)) | ||
| 40 | return MPX_BD_SIZE_BYTES_64; | ||
| 41 | else | ||
| 42 | return MPX_BD_SIZE_BYTES_32; | ||
| 43 | } | ||
| 44 | |||
| 45 | static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm) | ||
| 46 | { | ||
| 47 | if (is_64bit_mm(mm)) | ||
| 48 | return MPX_BT_SIZE_BYTES_64; | ||
| 49 | else | ||
| 50 | return MPX_BT_SIZE_BYTES_32; | ||
| 51 | } | ||
| 52 | |||
| 35 | /* | 53 | /* |
| 36 | * This is really a simplified "vm_mmap". it only handles MPX | 54 | * This is really a simplified "vm_mmap". it only handles MPX |
| 37 | * bounds tables (the bounds directory is user-allocated). | 55 | * bounds tables (the bounds directory is user-allocated). |
| @@ -47,8 +65,8 @@ static unsigned long mpx_mmap(unsigned long len) | |||
| 47 | vm_flags_t vm_flags; | 65 | vm_flags_t vm_flags; |
| 48 | struct vm_area_struct *vma; | 66 | struct vm_area_struct *vma; |
| 49 | 67 | ||
| 50 | /* Only bounds table and bounds directory can be allocated here */ | 68 | /* Only bounds table can be allocated here */ |
| 51 | if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES) | 69 | if (len != mpx_bt_size_bytes(mm)) |
| 52 | return -EINVAL; | 70 | return -EINVAL; |
| 53 | 71 | ||
| 54 | down_write(&mm->mmap_sem); | 72 | down_write(&mm->mmap_sem); |
| @@ -272,10 +290,9 @@ bad_opcode: | |||
| 272 | * | 290 | * |
| 273 | * The caller is expected to kfree() the returned siginfo_t. | 291 | * The caller is expected to kfree() the returned siginfo_t. |
| 274 | */ | 292 | */ |
| 275 | siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, | 293 | siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) |
| 276 | struct xsave_struct *xsave_buf) | ||
| 277 | { | 294 | { |
| 278 | struct bndreg *bndregs, *bndreg; | 295 | const struct bndreg *bndregs, *bndreg; |
| 279 | siginfo_t *info = NULL; | 296 | siginfo_t *info = NULL; |
| 280 | struct insn insn; | 297 | struct insn insn; |
| 281 | uint8_t bndregno; | 298 | uint8_t bndregno; |
| @@ -295,8 +312,8 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, | |||
| 295 | err = -EINVAL; | 312 | err = -EINVAL; |
| 296 | goto err_out; | 313 | goto err_out; |
| 297 | } | 314 | } |
| 298 | /* get the bndregs _area_ of the xsave structure */ | 315 | /* get bndregs field from current task's xsave area */ |
| 299 | bndregs = get_xsave_addr(xsave_buf, XSTATE_BNDREGS); | 316 | bndregs = get_xsave_field_ptr(XSTATE_BNDREGS); |
| 300 | if (!bndregs) { | 317 | if (!bndregs) { |
| 301 | err = -EINVAL; | 318 | err = -EINVAL; |
| 302 | goto err_out; | 319 | goto err_out; |
| @@ -334,6 +351,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, | |||
| 334 | err = -EINVAL; | 351 | err = -EINVAL; |
| 335 | goto err_out; | 352 | goto err_out; |
| 336 | } | 353 | } |
| 354 | trace_mpx_bounds_register_exception(info->si_addr, bndreg); | ||
| 337 | return info; | 355 | return info; |
| 338 | err_out: | 356 | err_out: |
| 339 | /* info might be NULL, but kfree() handles that */ | 357 | /* info might be NULL, but kfree() handles that */ |
| @@ -341,25 +359,18 @@ err_out: | |||
| 341 | return ERR_PTR(err); | 359 | return ERR_PTR(err); |
| 342 | } | 360 | } |
| 343 | 361 | ||
| 344 | static __user void *task_get_bounds_dir(struct task_struct *tsk) | 362 | static __user void *mpx_get_bounds_dir(void) |
| 345 | { | 363 | { |
| 346 | struct bndcsr *bndcsr; | 364 | const struct bndcsr *bndcsr; |
| 347 | 365 | ||
| 348 | if (!cpu_feature_enabled(X86_FEATURE_MPX)) | 366 | if (!cpu_feature_enabled(X86_FEATURE_MPX)) |
| 349 | return MPX_INVALID_BOUNDS_DIR; | 367 | return MPX_INVALID_BOUNDS_DIR; |
| 350 | 368 | ||
| 351 | /* | 369 | /* |
| 352 | * 32-bit binaries on 64-bit kernels are currently | ||
| 353 | * unsupported. | ||
| 354 | */ | ||
| 355 | if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32)) | ||
| 356 | return MPX_INVALID_BOUNDS_DIR; | ||
| 357 | /* | ||
| 358 | * The bounds directory pointer is stored in a register | 370 | * The bounds directory pointer is stored in a register |
| 359 | * only accessible if we first do an xsave. | 371 | * only accessible if we first do an xsave. |
| 360 | */ | 372 | */ |
| 361 | fpu_save_init(&tsk->thread.fpu); | 373 | bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); |
| 362 | bndcsr = get_xsave_addr(&tsk->thread.fpu.state->xsave, XSTATE_BNDCSR); | ||
| 363 | if (!bndcsr) | 374 | if (!bndcsr) |
| 364 | return MPX_INVALID_BOUNDS_DIR; | 375 | return MPX_INVALID_BOUNDS_DIR; |
| 365 | 376 | ||
| @@ -378,10 +389,10 @@ static __user void *task_get_bounds_dir(struct task_struct *tsk) | |||
| 378 | (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK); | 389 | (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK); |
| 379 | } | 390 | } |
| 380 | 391 | ||
| 381 | int mpx_enable_management(struct task_struct *tsk) | 392 | int mpx_enable_management(void) |
| 382 | { | 393 | { |
| 383 | void __user *bd_base = MPX_INVALID_BOUNDS_DIR; | 394 | void __user *bd_base = MPX_INVALID_BOUNDS_DIR; |
| 384 | struct mm_struct *mm = tsk->mm; | 395 | struct mm_struct *mm = current->mm; |
| 385 | int ret = 0; | 396 | int ret = 0; |
| 386 | 397 | ||
| 387 | /* | 398 | /* |
| @@ -390,11 +401,12 @@ int mpx_enable_management(struct task_struct *tsk) | |||
| 390 | * directory into XSAVE/XRSTOR Save Area and enable MPX through | 401 | * directory into XSAVE/XRSTOR Save Area and enable MPX through |
| 391 | * XRSTOR instruction. | 402 | * XRSTOR instruction. |
| 392 | * | 403 | * |
| 393 | * fpu_xsave() is expected to be very expensive. Storing the bounds | 404 | * The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is |
| 394 | * directory here means that we do not have to do xsave in the unmap | 405 | * expected to be relatively expensive. Storing the bounds |
| 395 | * path; we can just use mm->bd_addr instead. | 406 | * directory here means that we do not have to do xsave in the |
| 407 | * unmap path; we can just use mm->bd_addr instead. | ||
| 396 | */ | 408 | */ |
| 397 | bd_base = task_get_bounds_dir(tsk); | 409 | bd_base = mpx_get_bounds_dir(); |
| 398 | down_write(&mm->mmap_sem); | 410 | down_write(&mm->mmap_sem); |
| 399 | mm->bd_addr = bd_base; | 411 | mm->bd_addr = bd_base; |
| 400 | if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR) | 412 | if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR) |
| @@ -404,7 +416,7 @@ int mpx_enable_management(struct task_struct *tsk) | |||
| 404 | return ret; | 416 | return ret; |
| 405 | } | 417 | } |
| 406 | 418 | ||
| 407 | int mpx_disable_management(struct task_struct *tsk) | 419 | int mpx_disable_management(void) |
| 408 | { | 420 | { |
| 409 | struct mm_struct *mm = current->mm; | 421 | struct mm_struct *mm = current->mm; |
| 410 | 422 | ||
| @@ -417,29 +429,59 @@ int mpx_disable_management(struct task_struct *tsk) | |||
| 417 | return 0; | 429 | return 0; |
| 418 | } | 430 | } |
| 419 | 431 | ||
| 432 | static int mpx_cmpxchg_bd_entry(struct mm_struct *mm, | ||
| 433 | unsigned long *curval, | ||
| 434 | unsigned long __user *addr, | ||
| 435 | unsigned long old_val, unsigned long new_val) | ||
| 436 | { | ||
| 437 | int ret; | ||
| 438 | /* | ||
| 439 | * user_atomic_cmpxchg_inatomic() actually uses sizeof() | ||
| 440 | * the pointer that we pass to it to figure out how much | ||
| 441 | * data to cmpxchg. We have to be careful here not to | ||
| 442 | * pass a pointer to a 64-bit data type when we only want | ||
| 443 | * a 32-bit copy. | ||
| 444 | */ | ||
| 445 | if (is_64bit_mm(mm)) { | ||
| 446 | ret = user_atomic_cmpxchg_inatomic(curval, | ||
| 447 | addr, old_val, new_val); | ||
| 448 | } else { | ||
| 449 | u32 uninitialized_var(curval_32); | ||
| 450 | u32 old_val_32 = old_val; | ||
| 451 | u32 new_val_32 = new_val; | ||
| 452 | u32 __user *addr_32 = (u32 __user *)addr; | ||
| 453 | |||
| 454 | ret = user_atomic_cmpxchg_inatomic(&curval_32, | ||
| 455 | addr_32, old_val_32, new_val_32); | ||
| 456 | *curval = curval_32; | ||
| 457 | } | ||
| 458 | return ret; | ||
| 459 | } | ||
| 460 | |||
| 420 | /* | 461 | /* |
| 421 | * With 32-bit mode, MPX_BT_SIZE_BYTES is 4MB, and the size of each | 462 | * With 32-bit mode, a bounds directory is 4MB, and the size of each |
| 422 | * bounds table is 16KB. With 64-bit mode, MPX_BT_SIZE_BYTES is 2GB, | 463 | * bounds table is 16KB. With 64-bit mode, a bounds directory is 2GB, |
| 423 | * and the size of each bounds table is 4MB. | 464 | * and the size of each bounds table is 4MB. |
| 424 | */ | 465 | */ |
| 425 | static int allocate_bt(long __user *bd_entry) | 466 | static int allocate_bt(struct mm_struct *mm, long __user *bd_entry) |
| 426 | { | 467 | { |
| 427 | unsigned long expected_old_val = 0; | 468 | unsigned long expected_old_val = 0; |
| 428 | unsigned long actual_old_val = 0; | 469 | unsigned long actual_old_val = 0; |
| 429 | unsigned long bt_addr; | 470 | unsigned long bt_addr; |
| 471 | unsigned long bd_new_entry; | ||
| 430 | int ret = 0; | 472 | int ret = 0; |
| 431 | 473 | ||
| 432 | /* | 474 | /* |
| 433 | * Carve the virtual space out of userspace for the new | 475 | * Carve the virtual space out of userspace for the new |
| 434 | * bounds table: | 476 | * bounds table: |
| 435 | */ | 477 | */ |
| 436 | bt_addr = mpx_mmap(MPX_BT_SIZE_BYTES); | 478 | bt_addr = mpx_mmap(mpx_bt_size_bytes(mm)); |
| 437 | if (IS_ERR((void *)bt_addr)) | 479 | if (IS_ERR((void *)bt_addr)) |
| 438 | return PTR_ERR((void *)bt_addr); | 480 | return PTR_ERR((void *)bt_addr); |
| 439 | /* | 481 | /* |
| 440 | * Set the valid flag (kinda like _PAGE_PRESENT in a pte) | 482 | * Set the valid flag (kinda like _PAGE_PRESENT in a pte) |
| 441 | */ | 483 | */ |
| 442 | bt_addr = bt_addr | MPX_BD_ENTRY_VALID_FLAG; | 484 | bd_new_entry = bt_addr | MPX_BD_ENTRY_VALID_FLAG; |
| 443 | 485 | ||
| 444 | /* | 486 | /* |
| 445 | * Go poke the address of the new bounds table in to the | 487 | * Go poke the address of the new bounds table in to the |
| @@ -452,8 +494,8 @@ static int allocate_bt(long __user *bd_entry) | |||
| 452 | * mmap_sem at this point, unlike some of the other part | 494 | * mmap_sem at this point, unlike some of the other part |
| 453 | * of the MPX code that have to pagefault_disable(). | 495 | * of the MPX code that have to pagefault_disable(). |
| 454 | */ | 496 | */ |
| 455 | ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry, | 497 | ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, bd_entry, |
| 456 | expected_old_val, bt_addr); | 498 | expected_old_val, bd_new_entry); |
| 457 | if (ret) | 499 | if (ret) |
| 458 | goto out_unmap; | 500 | goto out_unmap; |
| 459 | 501 | ||
| @@ -481,9 +523,10 @@ static int allocate_bt(long __user *bd_entry) | |||
| 481 | ret = -EINVAL; | 523 | ret = -EINVAL; |
| 482 | goto out_unmap; | 524 | goto out_unmap; |
| 483 | } | 525 | } |
| 526 | trace_mpx_new_bounds_table(bt_addr); | ||
| 484 | return 0; | 527 | return 0; |
| 485 | out_unmap: | 528 | out_unmap: |
| 486 | vm_munmap(bt_addr & MPX_BT_ADDR_MASK, MPX_BT_SIZE_BYTES); | 529 | vm_munmap(bt_addr, mpx_bt_size_bytes(mm)); |
| 487 | return ret; | 530 | return ret; |
| 488 | } | 531 | } |
| 489 | 532 | ||
| @@ -498,12 +541,13 @@ out_unmap: | |||
| 498 | * bound table is 16KB. With 64-bit mode, the size of BD is 2GB, | 541 | * bound table is 16KB. With 64-bit mode, the size of BD is 2GB, |
| 499 | * and the size of each bound table is 4MB. | 542 | * and the size of each bound table is 4MB. |
| 500 | */ | 543 | */ |
| 501 | static int do_mpx_bt_fault(struct xsave_struct *xsave_buf) | 544 | static int do_mpx_bt_fault(void) |
| 502 | { | 545 | { |
| 503 | unsigned long bd_entry, bd_base; | 546 | unsigned long bd_entry, bd_base; |
| 504 | struct bndcsr *bndcsr; | 547 | const struct bndcsr *bndcsr; |
| 548 | struct mm_struct *mm = current->mm; | ||
| 505 | 549 | ||
| 506 | bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR); | 550 | bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); |
| 507 | if (!bndcsr) | 551 | if (!bndcsr) |
| 508 | return -EINVAL; | 552 | return -EINVAL; |
| 509 | /* | 553 | /* |
| @@ -520,13 +564,13 @@ static int do_mpx_bt_fault(struct xsave_struct *xsave_buf) | |||
| 520 | * the directory is. | 564 | * the directory is. |
| 521 | */ | 565 | */ |
| 522 | if ((bd_entry < bd_base) || | 566 | if ((bd_entry < bd_base) || |
| 523 | (bd_entry >= bd_base + MPX_BD_SIZE_BYTES)) | 567 | (bd_entry >= bd_base + mpx_bd_size_bytes(mm))) |
| 524 | return -EINVAL; | 568 | return -EINVAL; |
| 525 | 569 | ||
| 526 | return allocate_bt((long __user *)bd_entry); | 570 | return allocate_bt(mm, (long __user *)bd_entry); |
| 527 | } | 571 | } |
| 528 | 572 | ||
| 529 | int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) | 573 | int mpx_handle_bd_fault(void) |
| 530 | { | 574 | { |
| 531 | /* | 575 | /* |
| 532 | * Userspace never asked us to manage the bounds tables, | 576 | * Userspace never asked us to manage the bounds tables, |
| @@ -535,7 +579,7 @@ int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) | |||
| 535 | if (!kernel_managing_mpx_tables(current->mm)) | 579 | if (!kernel_managing_mpx_tables(current->mm)) |
| 536 | return -EINVAL; | 580 | return -EINVAL; |
| 537 | 581 | ||
| 538 | if (do_mpx_bt_fault(xsave_buf)) { | 582 | if (do_mpx_bt_fault()) { |
| 539 | force_sig(SIGSEGV, current); | 583 | force_sig(SIGSEGV, current); |
| 540 | /* | 584 | /* |
| 541 | * The force_sig() is essentially "handling" this | 585 | * The force_sig() is essentially "handling" this |
| @@ -572,29 +616,55 @@ static int mpx_resolve_fault(long __user *addr, int write) | |||
| 572 | return 0; | 616 | return 0; |
| 573 | } | 617 | } |
| 574 | 618 | ||
| 619 | static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm, | ||
| 620 | unsigned long bd_entry) | ||
| 621 | { | ||
| 622 | unsigned long bt_addr = bd_entry; | ||
| 623 | int align_to_bytes; | ||
| 624 | /* | ||
| 625 | * Bit 0 in a bt_entry is always the valid bit. | ||
| 626 | */ | ||
| 627 | bt_addr &= ~MPX_BD_ENTRY_VALID_FLAG; | ||
| 628 | /* | ||
| 629 | * Tables are naturally aligned at 8-byte boundaries | ||
| 630 | * on 64-bit and 4-byte boundaries on 32-bit. The | ||
| 631 | * documentation makes it appear that the low bits | ||
| 632 | * are ignored by the hardware, so we do the same. | ||
| 633 | */ | ||
| 634 | if (is_64bit_mm(mm)) | ||
| 635 | align_to_bytes = 8; | ||
| 636 | else | ||
| 637 | align_to_bytes = 4; | ||
| 638 | bt_addr &= ~(align_to_bytes-1); | ||
| 639 | return bt_addr; | ||
| 640 | } | ||
| 641 | |||
| 575 | /* | 642 | /* |
| 576 | * Get the base of bounds tables pointed by specific bounds | 643 | * Get the base of bounds tables pointed by specific bounds |
| 577 | * directory entry. | 644 | * directory entry. |
| 578 | */ | 645 | */ |
| 579 | static int get_bt_addr(struct mm_struct *mm, | 646 | static int get_bt_addr(struct mm_struct *mm, |
| 580 | long __user *bd_entry, unsigned long *bt_addr) | 647 | long __user *bd_entry_ptr, |
| 648 | unsigned long *bt_addr_result) | ||
| 581 | { | 649 | { |
| 582 | int ret; | 650 | int ret; |
| 583 | int valid_bit; | 651 | int valid_bit; |
| 652 | unsigned long bd_entry; | ||
| 653 | unsigned long bt_addr; | ||
| 584 | 654 | ||
| 585 | if (!access_ok(VERIFY_READ, (bd_entry), sizeof(*bd_entry))) | 655 | if (!access_ok(VERIFY_READ, (bd_entry_ptr), sizeof(*bd_entry_ptr))) |
| 586 | return -EFAULT; | 656 | return -EFAULT; |
| 587 | 657 | ||
| 588 | while (1) { | 658 | while (1) { |
| 589 | int need_write = 0; | 659 | int need_write = 0; |
| 590 | 660 | ||
| 591 | pagefault_disable(); | 661 | pagefault_disable(); |
| 592 | ret = get_user(*bt_addr, bd_entry); | 662 | ret = get_user(bd_entry, bd_entry_ptr); |
| 593 | pagefault_enable(); | 663 | pagefault_enable(); |
| 594 | if (!ret) | 664 | if (!ret) |
| 595 | break; | 665 | break; |
| 596 | if (ret == -EFAULT) | 666 | if (ret == -EFAULT) |
| 597 | ret = mpx_resolve_fault(bd_entry, need_write); | 667 | ret = mpx_resolve_fault(bd_entry_ptr, need_write); |
| 598 | /* | 668 | /* |
| 599 | * If we could not resolve the fault, consider it | 669 | * If we could not resolve the fault, consider it |
| 600 | * userspace's fault and error out. | 670 | * userspace's fault and error out. |
| @@ -603,8 +673,8 @@ static int get_bt_addr(struct mm_struct *mm, | |||
| 603 | return ret; | 673 | return ret; |
| 604 | } | 674 | } |
| 605 | 675 | ||
| 606 | valid_bit = *bt_addr & MPX_BD_ENTRY_VALID_FLAG; | 676 | valid_bit = bd_entry & MPX_BD_ENTRY_VALID_FLAG; |
| 607 | *bt_addr &= MPX_BT_ADDR_MASK; | 677 | bt_addr = mpx_bd_entry_to_bt_addr(mm, bd_entry); |
| 608 | 678 | ||
| 609 | /* | 679 | /* |
| 610 | * When the kernel is managing bounds tables, a bounds directory | 680 | * When the kernel is managing bounds tables, a bounds directory |
| @@ -613,7 +683,7 @@ static int get_bt_addr(struct mm_struct *mm, | |||
| 613 | * data in the address field, we know something is wrong. This | 683 | * data in the address field, we know something is wrong. This |
| 614 | * -EINVAL return will cause a SIGSEGV. | 684 | * -EINVAL return will cause a SIGSEGV. |
| 615 | */ | 685 | */ |
| 616 | if (!valid_bit && *bt_addr) | 686 | if (!valid_bit && bt_addr) |
| 617 | return -EINVAL; | 687 | return -EINVAL; |
| 618 | /* | 688 | /* |
| 619 | * Do we have an completely zeroed bt entry? That is OK. It | 689 | * Do we have an completely zeroed bt entry? That is OK. It |
| @@ -624,19 +694,100 @@ static int get_bt_addr(struct mm_struct *mm, | |||
| 624 | if (!valid_bit) | 694 | if (!valid_bit) |
| 625 | return -ENOENT; | 695 | return -ENOENT; |
| 626 | 696 | ||
| 697 | *bt_addr_result = bt_addr; | ||
| 627 | return 0; | 698 | return 0; |
| 628 | } | 699 | } |
| 629 | 700 | ||
| 701 | static inline int bt_entry_size_bytes(struct mm_struct *mm) | ||
| 702 | { | ||
| 703 | if (is_64bit_mm(mm)) | ||
| 704 | return MPX_BT_ENTRY_BYTES_64; | ||
| 705 | else | ||
| 706 | return MPX_BT_ENTRY_BYTES_32; | ||
| 707 | } | ||
| 708 | |||
| 709 | /* | ||
| 710 | * Take a virtual address and turns it in to the offset in bytes | ||
| 711 | * inside of the bounds table where the bounds table entry | ||
| 712 | * controlling 'addr' can be found. | ||
| 713 | */ | ||
| 714 | static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm, | ||
| 715 | unsigned long addr) | ||
| 716 | { | ||
| 717 | unsigned long bt_table_nr_entries; | ||
| 718 | unsigned long offset = addr; | ||
| 719 | |||
| 720 | if (is_64bit_mm(mm)) { | ||
| 721 | /* Bottom 3 bits are ignored on 64-bit */ | ||
| 722 | offset >>= 3; | ||
| 723 | bt_table_nr_entries = MPX_BT_NR_ENTRIES_64; | ||
| 724 | } else { | ||
| 725 | /* Bottom 2 bits are ignored on 32-bit */ | ||
| 726 | offset >>= 2; | ||
| 727 | bt_table_nr_entries = MPX_BT_NR_ENTRIES_32; | ||
| 728 | } | ||
| 729 | /* | ||
| 730 | * We know the size of the table in to which we are | ||
| 731 | * indexing, and we have eliminated all the low bits | ||
| 732 | * which are ignored for indexing. | ||
| 733 | * | ||
| 734 | * Mask out all the high bits which we do not need | ||
| 735 | * to index in to the table. Note that the tables | ||
| 736 | * are always powers of two so this gives us a proper | ||
| 737 | * mask. | ||
| 738 | */ | ||
| 739 | offset &= (bt_table_nr_entries-1); | ||
| 740 | /* | ||
| 741 | * We now have an entry offset in terms of *entries* in | ||
| 742 | * the table. We need to scale it back up to bytes. | ||
| 743 | */ | ||
| 744 | offset *= bt_entry_size_bytes(mm); | ||
| 745 | return offset; | ||
| 746 | } | ||
| 747 | |||
| 748 | /* | ||
| 749 | * How much virtual address space does a single bounds | ||
| 750 | * directory entry cover? | ||
| 751 | * | ||
| 752 | * Note, we need a long long because 4GB doesn't fit in | ||
| 753 | * to a long on 32-bit. | ||
| 754 | */ | ||
| 755 | static inline unsigned long bd_entry_virt_space(struct mm_struct *mm) | ||
| 756 | { | ||
| 757 | unsigned long long virt_space = (1ULL << boot_cpu_data.x86_virt_bits); | ||
| 758 | if (is_64bit_mm(mm)) | ||
| 759 | return virt_space / MPX_BD_NR_ENTRIES_64; | ||
| 760 | else | ||
| 761 | return virt_space / MPX_BD_NR_ENTRIES_32; | ||
| 762 | } | ||
| 763 | |||
| 630 | /* | 764 | /* |
| 631 | * Free the backing physical pages of bounds table 'bt_addr'. | 765 | * Free the backing physical pages of bounds table 'bt_addr'. |
| 632 | * Assume start...end is within that bounds table. | 766 | * Assume start...end is within that bounds table. |
| 633 | */ | 767 | */ |
| 634 | static int zap_bt_entries(struct mm_struct *mm, | 768 | static noinline int zap_bt_entries_mapping(struct mm_struct *mm, |
| 635 | unsigned long bt_addr, | 769 | unsigned long bt_addr, |
| 636 | unsigned long start, unsigned long end) | 770 | unsigned long start_mapping, unsigned long end_mapping) |
| 637 | { | 771 | { |
| 638 | struct vm_area_struct *vma; | 772 | struct vm_area_struct *vma; |
| 639 | unsigned long addr, len; | 773 | unsigned long addr, len; |
| 774 | unsigned long start; | ||
| 775 | unsigned long end; | ||
| 776 | |||
| 777 | /* | ||
| 778 | * if we 'end' on a boundary, the offset will be 0 which | ||
| 779 | * is not what we want. Back it up a byte to get the | ||
| 780 | * last bt entry. Then once we have the entry itself, | ||
| 781 | * move 'end' back up by the table entry size. | ||
| 782 | */ | ||
| 783 | start = bt_addr + mpx_get_bt_entry_offset_bytes(mm, start_mapping); | ||
| 784 | end = bt_addr + mpx_get_bt_entry_offset_bytes(mm, end_mapping - 1); | ||
| 785 | /* | ||
| 786 | * Move end back up by one entry. Among other things | ||
| 787 | * this ensures that it remains page-aligned and does | ||
| 788 | * not screw up zap_page_range() | ||
| 789 | */ | ||
| 790 | end += bt_entry_size_bytes(mm); | ||
| 640 | 791 | ||
| 641 | /* | 792 | /* |
| 642 | * Find the first overlapping vma. If vma->vm_start > start, there | 793 | * Find the first overlapping vma. If vma->vm_start > start, there |
| @@ -648,7 +799,7 @@ static int zap_bt_entries(struct mm_struct *mm, | |||
| 648 | return -EINVAL; | 799 | return -EINVAL; |
| 649 | 800 | ||
| 650 | /* | 801 | /* |
| 651 | * A NUMA policy on a VM_MPX VMA could cause this bouds table to | 802 | * A NUMA policy on a VM_MPX VMA could cause this bounds table to |
| 652 | * be split. So we need to look across the entire 'start -> end' | 803 | * be split. So we need to look across the entire 'start -> end' |
| 653 | * range of this bounds table, find all of the VM_MPX VMAs, and | 804 | * range of this bounds table, find all of the VM_MPX VMAs, and |
| 654 | * zap only those. | 805 | * zap only those. |
| @@ -666,27 +817,65 @@ static int zap_bt_entries(struct mm_struct *mm, | |||
| 666 | 817 | ||
| 667 | len = min(vma->vm_end, end) - addr; | 818 | len = min(vma->vm_end, end) - addr; |
| 668 | zap_page_range(vma, addr, len, NULL); | 819 | zap_page_range(vma, addr, len, NULL); |
| 820 | trace_mpx_unmap_zap(addr, addr+len); | ||
| 669 | 821 | ||
| 670 | vma = vma->vm_next; | 822 | vma = vma->vm_next; |
| 671 | addr = vma->vm_start; | 823 | addr = vma->vm_start; |
| 672 | } | 824 | } |
| 673 | |||
| 674 | return 0; | 825 | return 0; |
| 675 | } | 826 | } |
| 676 | 827 | ||
| 677 | static int unmap_single_bt(struct mm_struct *mm, | 828 | static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm, |
| 829 | unsigned long addr) | ||
| 830 | { | ||
| 831 | /* | ||
| 832 | * There are several ways to derive the bd offsets. We | ||
| 833 | * use the following approach here: | ||
| 834 | * 1. We know the size of the virtual address space | ||
| 835 | * 2. We know the number of entries in a bounds table | ||
| 836 | * 3. We know that each entry covers a fixed amount of | ||
| 837 | * virtual address space. | ||
| 838 | * So, we can just divide the virtual address by the | ||
| 839 | * virtual space used by one entry to determine which | ||
| 840 | * entry "controls" the given virtual address. | ||
| 841 | */ | ||
| 842 | if (is_64bit_mm(mm)) { | ||
| 843 | int bd_entry_size = 8; /* 64-bit pointer */ | ||
| 844 | /* | ||
| 845 | * Take the 64-bit addressing hole in to account. | ||
| 846 | */ | ||
| 847 | addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1); | ||
| 848 | return (addr / bd_entry_virt_space(mm)) * bd_entry_size; | ||
| 849 | } else { | ||
| 850 | int bd_entry_size = 4; /* 32-bit pointer */ | ||
| 851 | /* | ||
| 852 | * 32-bit has no hole so this case needs no mask | ||
| 853 | */ | ||
| 854 | return (addr / bd_entry_virt_space(mm)) * bd_entry_size; | ||
| 855 | } | ||
| 856 | /* | ||
| 857 | * The two return calls above are exact copies. If we | ||
| 858 | * pull out a single copy and put it in here, gcc won't | ||
| 859 | * realize that we're doing a power-of-2 divide and use | ||
| 860 | * shifts. It uses a real divide. If we put them up | ||
| 861 | * there, it manages to figure it out (gcc 4.8.3). | ||
| 862 | */ | ||
| 863 | } | ||
| 864 | |||
| 865 | static int unmap_entire_bt(struct mm_struct *mm, | ||
| 678 | long __user *bd_entry, unsigned long bt_addr) | 866 | long __user *bd_entry, unsigned long bt_addr) |
| 679 | { | 867 | { |
| 680 | unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG; | 868 | unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG; |
| 681 | unsigned long actual_old_val = 0; | 869 | unsigned long uninitialized_var(actual_old_val); |
| 682 | int ret; | 870 | int ret; |
| 683 | 871 | ||
| 684 | while (1) { | 872 | while (1) { |
| 685 | int need_write = 1; | 873 | int need_write = 1; |
| 874 | unsigned long cleared_bd_entry = 0; | ||
| 686 | 875 | ||
| 687 | pagefault_disable(); | 876 | pagefault_disable(); |
| 688 | ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry, | 877 | ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, |
| 689 | expected_old_val, 0); | 878 | bd_entry, expected_old_val, cleared_bd_entry); |
| 690 | pagefault_enable(); | 879 | pagefault_enable(); |
| 691 | if (!ret) | 880 | if (!ret) |
| 692 | break; | 881 | break; |
| @@ -705,9 +894,8 @@ static int unmap_single_bt(struct mm_struct *mm, | |||
| 705 | if (actual_old_val != expected_old_val) { | 894 | if (actual_old_val != expected_old_val) { |
| 706 | /* | 895 | /* |
| 707 | * Someone else raced with us to unmap the table. | 896 | * Someone else raced with us to unmap the table. |
| 708 | * There was no bounds table pointed to by the | 897 | * That is OK, since we were both trying to do |
| 709 | * directory, so declare success. Somebody freed | 898 | * the same thing. Declare success. |
| 710 | * it. | ||
| 711 | */ | 899 | */ |
| 712 | if (!actual_old_val) | 900 | if (!actual_old_val) |
| 713 | return 0; | 901 | return 0; |
| @@ -720,176 +908,113 @@ static int unmap_single_bt(struct mm_struct *mm, | |||
| 720 | */ | 908 | */ |
| 721 | return -EINVAL; | 909 | return -EINVAL; |
| 722 | } | 910 | } |
| 723 | |||
| 724 | /* | 911 | /* |
| 725 | * Note, we are likely being called under do_munmap() already. To | 912 | * Note, we are likely being called under do_munmap() already. To |
| 726 | * avoid recursion, do_munmap() will check whether it comes | 913 | * avoid recursion, do_munmap() will check whether it comes |
| 727 | * from one bounds table through VM_MPX flag. | 914 | * from one bounds table through VM_MPX flag. |
| 728 | */ | 915 | */ |
| 729 | return do_munmap(mm, bt_addr, MPX_BT_SIZE_BYTES); | 916 | return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm)); |
| 730 | } | 917 | } |
| 731 | 918 | ||
| 732 | /* | 919 | static int try_unmap_single_bt(struct mm_struct *mm, |
| 733 | * If the bounds table pointed by bounds directory 'bd_entry' is | 920 | unsigned long start, unsigned long end) |
| 734 | * not shared, unmap this whole bounds table. Otherwise, only free | ||
| 735 | * those backing physical pages of bounds table entries covered | ||
| 736 | * in this virtual address region start...end. | ||
| 737 | */ | ||
| 738 | static int unmap_shared_bt(struct mm_struct *mm, | ||
| 739 | long __user *bd_entry, unsigned long start, | ||
| 740 | unsigned long end, bool prev_shared, bool next_shared) | ||
| 741 | { | 921 | { |
| 742 | unsigned long bt_addr; | 922 | struct vm_area_struct *next; |
| 743 | int ret; | 923 | struct vm_area_struct *prev; |
| 744 | |||
| 745 | ret = get_bt_addr(mm, bd_entry, &bt_addr); | ||
| 746 | /* | 924 | /* |
| 747 | * We could see an "error" ret for not-present bounds | 925 | * "bta" == Bounds Table Area: the area controlled by the |
| 748 | * tables (not really an error), or actual errors, but | 926 | * bounds table that we are unmapping. |
| 749 | * stop unmapping either way. | ||
| 750 | */ | 927 | */ |
| 751 | if (ret) | 928 | unsigned long bta_start_vaddr = start & ~(bd_entry_virt_space(mm)-1); |
| 752 | return ret; | 929 | unsigned long bta_end_vaddr = bta_start_vaddr + bd_entry_virt_space(mm); |
| 753 | 930 | unsigned long uninitialized_var(bt_addr); | |
| 754 | if (prev_shared && next_shared) | 931 | void __user *bde_vaddr; |
| 755 | ret = zap_bt_entries(mm, bt_addr, | ||
| 756 | bt_addr+MPX_GET_BT_ENTRY_OFFSET(start), | ||
| 757 | bt_addr+MPX_GET_BT_ENTRY_OFFSET(end)); | ||
| 758 | else if (prev_shared) | ||
| 759 | ret = zap_bt_entries(mm, bt_addr, | ||
| 760 | bt_addr+MPX_GET_BT_ENTRY_OFFSET(start), | ||
| 761 | bt_addr+MPX_BT_SIZE_BYTES); | ||
| 762 | else if (next_shared) | ||
| 763 | ret = zap_bt_entries(mm, bt_addr, bt_addr, | ||
| 764 | bt_addr+MPX_GET_BT_ENTRY_OFFSET(end)); | ||
| 765 | else | ||
| 766 | ret = unmap_single_bt(mm, bd_entry, bt_addr); | ||
| 767 | |||
| 768 | return ret; | ||
| 769 | } | ||
| 770 | |||
| 771 | /* | ||
| 772 | * A virtual address region being munmap()ed might share bounds table | ||
| 773 | * with adjacent VMAs. We only need to free the backing physical | ||
| 774 | * memory of these shared bounds tables entries covered in this virtual | ||
| 775 | * address region. | ||
| 776 | */ | ||
| 777 | static int unmap_edge_bts(struct mm_struct *mm, | ||
| 778 | unsigned long start, unsigned long end) | ||
| 779 | { | ||
| 780 | int ret; | 932 | int ret; |
| 781 | long __user *bde_start, *bde_end; | ||
| 782 | struct vm_area_struct *prev, *next; | ||
| 783 | bool prev_shared = false, next_shared = false; | ||
| 784 | |||
| 785 | bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start); | ||
| 786 | bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1); | ||
| 787 | |||
| 788 | /* | 933 | /* |
| 789 | * Check whether bde_start and bde_end are shared with adjacent | 934 | * We already unlinked the VMAs from the mm's rbtree so 'start' |
| 790 | * VMAs. | ||
| 791 | * | ||
| 792 | * We already unliked the VMAs from the mm's rbtree so 'start' | ||
| 793 | * is guaranteed to be in a hole. This gets us the first VMA | 935 | * is guaranteed to be in a hole. This gets us the first VMA |
| 794 | * before the hole in to 'prev' and the next VMA after the hole | 936 | * before the hole in to 'prev' and the next VMA after the hole |
| 795 | * in to 'next'. | 937 | * in to 'next'. |
| 796 | */ | 938 | */ |
| 797 | next = find_vma_prev(mm, start, &prev); | 939 | next = find_vma_prev(mm, start, &prev); |
| 798 | if (prev && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(prev->vm_end-1)) | ||
| 799 | == bde_start) | ||
| 800 | prev_shared = true; | ||
| 801 | if (next && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(next->vm_start)) | ||
| 802 | == bde_end) | ||
| 803 | next_shared = true; | ||
| 804 | |||
| 805 | /* | 940 | /* |
| 806 | * This virtual address region being munmap()ed is only | 941 | * Do not count other MPX bounds table VMAs as neighbors. |
| 807 | * covered by one bounds table. | 942 | * Although theoretically possible, we do not allow bounds |
| 808 | * | 943 | * tables for bounds tables so our heads do not explode. |
| 809 | * In this case, if this table is also shared with adjacent | 944 | * If we count them as neighbors here, we may end up with |
| 810 | * VMAs, only part of the backing physical memory of the bounds | 945 | * lots of tables even though we have no actual table |
| 811 | * table need be freeed. Otherwise the whole bounds table need | 946 | * entries in use. |
| 812 | * be unmapped. | 947 | */ |
| 813 | */ | 948 | while (next && is_mpx_vma(next)) |
| 814 | if (bde_start == bde_end) { | 949 | next = next->vm_next; |
| 815 | return unmap_shared_bt(mm, bde_start, start, end, | 950 | while (prev && is_mpx_vma(prev)) |
| 816 | prev_shared, next_shared); | 951 | prev = prev->vm_prev; |
| 952 | /* | ||
| 953 | * We know 'start' and 'end' lie within an area controlled | ||
| 954 | * by a single bounds table. See if there are any other | ||
| 955 | * VMAs controlled by that bounds table. If there are not | ||
| 956 | * then we can "expand" the are we are unmapping to possibly | ||
| 957 | * cover the entire table. | ||
| 958 | */ | ||
| 959 | next = find_vma_prev(mm, start, &prev); | ||
| 960 | if ((!prev || prev->vm_end <= bta_start_vaddr) && | ||
| 961 | (!next || next->vm_start >= bta_end_vaddr)) { | ||
| 962 | /* | ||
| 963 | * No neighbor VMAs controlled by same bounds | ||
| 964 | * table. Try to unmap the whole thing | ||
| 965 | */ | ||
| 966 | start = bta_start_vaddr; | ||
| 967 | end = bta_end_vaddr; | ||
| 817 | } | 968 | } |
| 818 | 969 | ||
| 970 | bde_vaddr = mm->bd_addr + mpx_get_bd_entry_offset(mm, start); | ||
| 971 | ret = get_bt_addr(mm, bde_vaddr, &bt_addr); | ||
| 819 | /* | 972 | /* |
| 820 | * If more than one bounds tables are covered in this virtual | 973 | * No bounds table there, so nothing to unmap. |
| 821 | * address region being munmap()ed, we need to separately check | ||
| 822 | * whether bde_start and bde_end are shared with adjacent VMAs. | ||
| 823 | */ | 974 | */ |
| 824 | ret = unmap_shared_bt(mm, bde_start, start, end, prev_shared, false); | 975 | if (ret == -ENOENT) { |
| 825 | if (ret) | 976 | ret = 0; |
| 826 | return ret; | 977 | return 0; |
| 827 | ret = unmap_shared_bt(mm, bde_end, start, end, false, next_shared); | 978 | } |
| 828 | if (ret) | 979 | if (ret) |
| 829 | return ret; | 980 | return ret; |
| 830 | 981 | /* | |
| 831 | return 0; | 982 | * We are unmapping an entire table. Either because the |
| 983 | * unmap that started this whole process was large enough | ||
| 984 | * to cover an entire table, or that the unmap was small | ||
| 985 | * but was the area covered by a bounds table. | ||
| 986 | */ | ||
| 987 | if ((start == bta_start_vaddr) && | ||
| 988 | (end == bta_end_vaddr)) | ||
| 989 | return unmap_entire_bt(mm, bde_vaddr, bt_addr); | ||
| 990 | return zap_bt_entries_mapping(mm, bt_addr, start, end); | ||
| 832 | } | 991 | } |
| 833 | 992 | ||
| 834 | static int mpx_unmap_tables(struct mm_struct *mm, | 993 | static int mpx_unmap_tables(struct mm_struct *mm, |
| 835 | unsigned long start, unsigned long end) | 994 | unsigned long start, unsigned long end) |
| 836 | { | 995 | { |
| 837 | int ret; | 996 | unsigned long one_unmap_start; |
| 838 | long __user *bd_entry, *bde_start, *bde_end; | 997 | trace_mpx_unmap_search(start, end); |
| 839 | unsigned long bt_addr; | 998 | |
| 840 | 999 | one_unmap_start = start; | |
| 841 | /* | 1000 | while (one_unmap_start < end) { |
| 842 | * "Edge" bounds tables are those which are being used by the region | 1001 | int ret; |
| 843 | * (start -> end), but that may be shared with adjacent areas. If they | 1002 | unsigned long next_unmap_start = ALIGN(one_unmap_start+1, |
| 844 | * turn out to be completely unshared, they will be freed. If they are | 1003 | bd_entry_virt_space(mm)); |
| 845 | * shared, we will free the backing store (like an MADV_DONTNEED) for | 1004 | unsigned long one_unmap_end = end; |
| 846 | * areas used by this region. | 1005 | /* |
| 847 | */ | 1006 | * if the end is beyond the current bounds table, |
| 848 | ret = unmap_edge_bts(mm, start, end); | 1007 | * move it back so we only deal with a single one |
| 849 | switch (ret) { | 1008 | * at a time |
| 850 | /* non-present tables are OK */ | 1009 | */ |
| 851 | case 0: | 1010 | if (one_unmap_end > next_unmap_start) |
| 852 | case -ENOENT: | 1011 | one_unmap_end = next_unmap_start; |
| 853 | /* Success, or no tables to unmap */ | 1012 | ret = try_unmap_single_bt(mm, one_unmap_start, one_unmap_end); |
| 854 | break; | ||
| 855 | case -EINVAL: | ||
| 856 | case -EFAULT: | ||
| 857 | default: | ||
| 858 | return ret; | ||
| 859 | } | ||
| 860 | |||
| 861 | /* | ||
| 862 | * Only unmap the bounds table that are | ||
| 863 | * 1. fully covered | ||
| 864 | * 2. not at the edges of the mapping, even if full aligned | ||
| 865 | */ | ||
| 866 | bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start); | ||
| 867 | bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1); | ||
| 868 | for (bd_entry = bde_start + 1; bd_entry < bde_end; bd_entry++) { | ||
| 869 | ret = get_bt_addr(mm, bd_entry, &bt_addr); | ||
| 870 | switch (ret) { | ||
| 871 | case 0: | ||
| 872 | break; | ||
| 873 | case -ENOENT: | ||
| 874 | /* No table here, try the next one */ | ||
| 875 | continue; | ||
| 876 | case -EINVAL: | ||
| 877 | case -EFAULT: | ||
| 878 | default: | ||
| 879 | /* | ||
| 880 | * Note: we are being strict here. | ||
| 881 | * Any time we run in to an issue | ||
| 882 | * unmapping tables, we stop and | ||
| 883 | * SIGSEGV. | ||
| 884 | */ | ||
| 885 | return ret; | ||
| 886 | } | ||
| 887 | |||
| 888 | ret = unmap_single_bt(mm, bd_entry, bt_addr); | ||
| 889 | if (ret) | 1013 | if (ret) |
| 890 | return ret; | 1014 | return ret; |
| 891 | } | ||
| 892 | 1015 | ||
| 1016 | one_unmap_start = next_unmap_start; | ||
| 1017 | } | ||
| 893 | return 0; | 1018 | return 0; |
| 894 | } | 1019 | } |
| 895 | 1020 | ||
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 757678fb26e1..0d7dd1f5ac36 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
| @@ -18,10 +18,9 @@ | |||
| 18 | #include <asm/mtrr.h> | 18 | #include <asm/mtrr.h> |
| 19 | #include <asm/page.h> | 19 | #include <asm/page.h> |
| 20 | #include <asm/mce.h> | 20 | #include <asm/mce.h> |
| 21 | #include <asm/xcr.h> | ||
| 22 | #include <asm/suspend.h> | 21 | #include <asm/suspend.h> |
| 22 | #include <asm/fpu/internal.h> | ||
| 23 | #include <asm/debugreg.h> | 23 | #include <asm/debugreg.h> |
| 24 | #include <asm/fpu-internal.h> /* pcntxt_mask */ | ||
| 25 | #include <asm/cpu.h> | 24 | #include <asm/cpu.h> |
| 26 | 25 | ||
| 27 | #ifdef CONFIG_X86_32 | 26 | #ifdef CONFIG_X86_32 |
| @@ -155,6 +154,8 @@ static void fix_processor_context(void) | |||
| 155 | #endif | 154 | #endif |
| 156 | load_TR_desc(); /* This does ltr */ | 155 | load_TR_desc(); /* This does ltr */ |
| 157 | load_LDT(¤t->active_mm->context); /* This does lldt */ | 156 | load_LDT(¤t->active_mm->context); /* This does lldt */ |
| 157 | |||
| 158 | fpu__resume_cpu(); | ||
| 158 | } | 159 | } |
| 159 | 160 | ||
| 160 | /** | 161 | /** |
| @@ -221,12 +222,6 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) | |||
| 221 | wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); | 222 | wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); |
| 222 | #endif | 223 | #endif |
| 223 | 224 | ||
| 224 | /* | ||
| 225 | * restore XCR0 for xsave capable cpu's. | ||
| 226 | */ | ||
| 227 | if (cpu_has_xsave) | ||
| 228 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); | ||
| 229 | |||
| 230 | fix_processor_context(); | 225 | fix_processor_context(); |
| 231 | 226 | ||
| 232 | do_fpu_end(); | 227 | do_fpu_end(); |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 46957ead3060..98088bf5906a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
| @@ -1423,7 +1423,7 @@ static void xen_pvh_set_cr_flags(int cpu) | |||
| 1423 | return; | 1423 | return; |
| 1424 | /* | 1424 | /* |
| 1425 | * For BSP, PSE PGE are set in probe_page_size_mask(), for APs | 1425 | * For BSP, PSE PGE are set in probe_page_size_mask(), for APs |
| 1426 | * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu_init. | 1426 | * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu(). |
| 1427 | */ | 1427 | */ |
| 1428 | if (cpu_has_pse) | 1428 | if (cpu_has_pse) |
| 1429 | cr4_set_bits_and_update_boot(X86_CR4_PSE); | 1429 | cr4_set_bits_and_update_boot(X86_CR4_PSE); |
diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index a3bebef255ad..0c98a9d51a24 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c | |||
| @@ -33,7 +33,7 @@ | |||
| 33 | #include <asm/io.h> | 33 | #include <asm/io.h> |
| 34 | #include <asm/msr.h> | 34 | #include <asm/msr.h> |
| 35 | #include <asm/cpufeature.h> | 35 | #include <asm/cpufeature.h> |
| 36 | #include <asm/i387.h> | 36 | #include <asm/fpu/api.h> |
| 37 | 37 | ||
| 38 | 38 | ||
| 39 | 39 | ||
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index c178ed8c3908..da2d6777bd09 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | #include <asm/cpu_device_id.h> | 22 | #include <asm/cpu_device_id.h> |
| 23 | #include <asm/byteorder.h> | 23 | #include <asm/byteorder.h> |
| 24 | #include <asm/processor.h> | 24 | #include <asm/processor.h> |
| 25 | #include <asm/i387.h> | 25 | #include <asm/fpu/api.h> |
| 26 | 26 | ||
| 27 | /* | 27 | /* |
| 28 | * Number of data blocks actually fetched for each xcrypt insn. | 28 | * Number of data blocks actually fetched for each xcrypt insn. |
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 95f7d27ce491..4e154c9b9206 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
| 24 | #include <linux/scatterlist.h> | 24 | #include <linux/scatterlist.h> |
| 25 | #include <asm/cpu_device_id.h> | 25 | #include <asm/cpu_device_id.h> |
| 26 | #include <asm/i387.h> | 26 | #include <asm/fpu/api.h> |
| 27 | 27 | ||
| 28 | struct padlock_sha_desc { | 28 | struct padlock_sha_desc { |
| 29 | struct shash_desc fallback; | 29 | struct shash_desc fallback; |
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 30f2aef69d78..6a4cd771a2be 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c | |||
| @@ -46,7 +46,7 @@ | |||
| 46 | #include <asm/setup.h> | 46 | #include <asm/setup.h> |
| 47 | #include <asm/lguest.h> | 47 | #include <asm/lguest.h> |
| 48 | #include <asm/uaccess.h> | 48 | #include <asm/uaccess.h> |
| 49 | #include <asm/i387.h> | 49 | #include <asm/fpu/internal.h> |
| 50 | #include <asm/tlbflush.h> | 50 | #include <asm/tlbflush.h> |
| 51 | #include "../lg.h" | 51 | #include "../lg.h" |
| 52 | 52 | ||
| @@ -251,7 +251,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) | |||
| 251 | * we set it now, so we can trap and pass that trap to the Guest if it | 251 | * we set it now, so we can trap and pass that trap to the Guest if it |
| 252 | * uses the FPU. | 252 | * uses the FPU. |
| 253 | */ | 253 | */ |
| 254 | if (cpu->ts && user_has_fpu()) | 254 | if (cpu->ts && fpregs_active()) |
| 255 | stts(); | 255 | stts(); |
| 256 | 256 | ||
| 257 | /* | 257 | /* |
| @@ -283,7 +283,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) | |||
| 283 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); | 283 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); |
| 284 | 284 | ||
| 285 | /* Clear the host TS bit if it was set above. */ | 285 | /* Clear the host TS bit if it was set above. */ |
| 286 | if (cpu->ts && user_has_fpu()) | 286 | if (cpu->ts && fpregs_active()) |
| 287 | clts(); | 287 | clts(); |
| 288 | 288 | ||
| 289 | /* | 289 | /* |
| @@ -297,12 +297,12 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) | |||
| 297 | /* | 297 | /* |
| 298 | * Similarly, if we took a trap because the Guest used the FPU, | 298 | * Similarly, if we took a trap because the Guest used the FPU, |
| 299 | * we have to restore the FPU it expects to see. | 299 | * we have to restore the FPU it expects to see. |
| 300 | * math_state_restore() may sleep and we may even move off to | 300 | * fpu__restore() may sleep and we may even move off to |
| 301 | * a different CPU. So all the critical stuff should be done | 301 | * a different CPU. So all the critical stuff should be done |
| 302 | * before this. | 302 | * before this. |
| 303 | */ | 303 | */ |
| 304 | else if (cpu->regs->trapnum == 7 && !user_has_fpu()) | 304 | else if (cpu->regs->trapnum == 7 && !fpregs_active()) |
| 305 | math_state_restore(); | 305 | fpu__restore(¤t->thread.fpu); |
| 306 | } | 306 | } |
| 307 | 307 | ||
| 308 | /*H:130 | 308 | /*H:130 |
diff --git a/kernel/sys.c b/kernel/sys.c index a4e372b798a5..8571296b7ddb 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -92,10 +92,10 @@ | |||
| 92 | # define SET_TSC_CTL(a) (-EINVAL) | 92 | # define SET_TSC_CTL(a) (-EINVAL) |
| 93 | #endif | 93 | #endif |
| 94 | #ifndef MPX_ENABLE_MANAGEMENT | 94 | #ifndef MPX_ENABLE_MANAGEMENT |
| 95 | # define MPX_ENABLE_MANAGEMENT(a) (-EINVAL) | 95 | # define MPX_ENABLE_MANAGEMENT() (-EINVAL) |
| 96 | #endif | 96 | #endif |
| 97 | #ifndef MPX_DISABLE_MANAGEMENT | 97 | #ifndef MPX_DISABLE_MANAGEMENT |
| 98 | # define MPX_DISABLE_MANAGEMENT(a) (-EINVAL) | 98 | # define MPX_DISABLE_MANAGEMENT() (-EINVAL) |
| 99 | #endif | 99 | #endif |
| 100 | #ifndef GET_FP_MODE | 100 | #ifndef GET_FP_MODE |
| 101 | # define GET_FP_MODE(a) (-EINVAL) | 101 | # define GET_FP_MODE(a) (-EINVAL) |
| @@ -2230,12 +2230,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
| 2230 | case PR_MPX_ENABLE_MANAGEMENT: | 2230 | case PR_MPX_ENABLE_MANAGEMENT: |
| 2231 | if (arg2 || arg3 || arg4 || arg5) | 2231 | if (arg2 || arg3 || arg4 || arg5) |
| 2232 | return -EINVAL; | 2232 | return -EINVAL; |
| 2233 | error = MPX_ENABLE_MANAGEMENT(me); | 2233 | error = MPX_ENABLE_MANAGEMENT(); |
| 2234 | break; | 2234 | break; |
| 2235 | case PR_MPX_DISABLE_MANAGEMENT: | 2235 | case PR_MPX_DISABLE_MANAGEMENT: |
| 2236 | if (arg2 || arg3 || arg4 || arg5) | 2236 | if (arg2 || arg3 || arg4 || arg5) |
| 2237 | return -EINVAL; | 2237 | return -EINVAL; |
| 2238 | error = MPX_DISABLE_MANAGEMENT(me); | 2238 | error = MPX_DISABLE_MANAGEMENT(); |
| 2239 | break; | 2239 | break; |
| 2240 | case PR_SET_FP_MODE: | 2240 | case PR_SET_FP_MODE: |
| 2241 | error = SET_FP_MODE(me, arg2); | 2241 | error = SET_FP_MODE(me, arg2); |
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index b7595484a815..8fe9d9662abb 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | 23 | ||
| 24 | #ifdef __KERNEL__ /* Real code */ | 24 | #ifdef __KERNEL__ /* Real code */ |
| 25 | 25 | ||
| 26 | #include <asm/i387.h> | 26 | #include <asm/fpu/api.h> |
| 27 | 27 | ||
| 28 | #else /* Dummy code for user space testing */ | 28 | #else /* Dummy code for user space testing */ |
| 29 | 29 | ||
