diff options
author | Radim Krčmář <rkrcmar@redhat.com> | 2016-11-09 13:07:06 -0500 |
---|---|---|
committer | Radim Krčmář <rkrcmar@redhat.com> | 2016-11-16 16:09:46 -0500 |
commit | 283c95d0e3891b64087706b344a4b545d04a6e62 (patch) | |
tree | ed7d530132e7ef7607fc07fc70139d23e697203c | |
parent | aabba3c6abd50b05b1fc2c6ec44244aa6bcda576 (diff) |
KVM: x86: emulate FXSAVE and FXRSTOR
Internal errors were reported on 16 bit fxsave and fxrstor with ipxe.
Old Intels don't have unrestricted_guest, so we have to emulate them.
The patch takes advantage of the hardware implementation.
AMD and Intel differ in saving and restoring other fields in first 32
bytes. A test wrote 0xff to the fxsave area, 0 to upper bits of MCSXR
in the fxsave area, executed fxrstor, rewrote the fxsave area to 0xee,
and executed fxsave:
Intel (Nehalem):
7f 1f 7f 7f ff 00 ff 07 ff ff ff ff ff ff 00 00
ff ff ff ff ff ff 00 00 ff ff 00 00 ff ff 00 00
Intel (Haswell -- deprecated FPU CS and FPU DS):
7f 1f 7f 7f ff 00 ff 07 ff ff ff ff 00 00 00 00
ff ff ff ff 00 00 00 00 ff ff 00 00 ff ff 00 00
AMD (Opteron 2300-series):
7f 1f 7f 7f ff 00 ee ee ee ee ee ee ee ee ee ee
ee ee ee ee ee ee ee ee ff ff 00 00 ff ff 02 00
fxsave/fxrstor will only be emulated on early Intels, so KVM can't do
much to improve the situation.
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
-rw-r--r-- | arch/x86/kvm/emulate.c | 129 |
1 files changed, 128 insertions, 1 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 6af3cac6ec89..7d4f9b7f06ee 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -3883,6 +3883,131 @@ static int em_movsxd(struct x86_emulate_ctxt *ctxt) | |||
3883 | return X86EMUL_CONTINUE; | 3883 | return X86EMUL_CONTINUE; |
3884 | } | 3884 | } |
3885 | 3885 | ||
3886 | static int check_fxsr(struct x86_emulate_ctxt *ctxt) | ||
3887 | { | ||
3888 | u32 eax = 1, ebx, ecx = 0, edx; | ||
3889 | |||
3890 | ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); | ||
3891 | if (!(edx & FFL(FXSR))) | ||
3892 | return emulate_ud(ctxt); | ||
3893 | |||
3894 | if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) | ||
3895 | return emulate_nm(ctxt); | ||
3896 | |||
3897 | /* | ||
3898 | * Don't emulate a case that should never be hit, instead of working | ||
3899 | * around a lack of fxsave64/fxrstor64 on old compilers. | ||
3900 | */ | ||
3901 | if (ctxt->mode >= X86EMUL_MODE_PROT64) | ||
3902 | return X86EMUL_UNHANDLEABLE; | ||
3903 | |||
3904 | return X86EMUL_CONTINUE; | ||
3905 | } | ||
3906 | |||
3907 | /* | ||
3908 | * FXSAVE and FXRSTOR have 4 different formats depending on execution mode, | ||
3909 | * 1) 16 bit mode | ||
3910 | * 2) 32 bit mode | ||
3911 | * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs | ||
3912 | * preserve whole 32 bit values, though, so (1) and (2) are the same wrt. | ||
3913 | * save and restore | ||
3914 | * 3) 64-bit mode with REX.W prefix | ||
3915 | * - like (2), but XMM 8-15 are being saved and restored | ||
3916 | * 4) 64-bit mode without REX.W prefix | ||
3917 | * - like (3), but FIP and FDP are 64 bit | ||
3918 | * | ||
3919 | * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the | ||
3920 | * desired result. (4) is not emulated. | ||
3921 | * | ||
3922 | * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS | ||
3923 | * and FPU DS) should match. | ||
3924 | */ | ||
3925 | static int em_fxsave(struct x86_emulate_ctxt *ctxt) | ||
3926 | { | ||
3927 | struct fxregs_state fx_state; | ||
3928 | size_t size; | ||
3929 | int rc; | ||
3930 | |||
3931 | rc = check_fxsr(ctxt); | ||
3932 | if (rc != X86EMUL_CONTINUE) | ||
3933 | return rc; | ||
3934 | |||
3935 | ctxt->ops->get_fpu(ctxt); | ||
3936 | |||
3937 | rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); | ||
3938 | |||
3939 | ctxt->ops->put_fpu(ctxt); | ||
3940 | |||
3941 | if (rc != X86EMUL_CONTINUE) | ||
3942 | return rc; | ||
3943 | |||
3944 | if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR) | ||
3945 | size = offsetof(struct fxregs_state, xmm_space[8 * 16/4]); | ||
3946 | else | ||
3947 | size = offsetof(struct fxregs_state, xmm_space[0]); | ||
3948 | |||
3949 | return segmented_write(ctxt, ctxt->memop.addr.mem, &fx_state, size); | ||
3950 | } | ||
3951 | |||
3952 | static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt, | ||
3953 | struct fxregs_state *new) | ||
3954 | { | ||
3955 | int rc = X86EMUL_CONTINUE; | ||
3956 | struct fxregs_state old; | ||
3957 | |||
3958 | rc = asm_safe("fxsave %[fx]", , [fx] "+m"(old)); | ||
3959 | if (rc != X86EMUL_CONTINUE) | ||
3960 | return rc; | ||
3961 | |||
3962 | /* | ||
3963 | * 64 bit host will restore XMM 8-15, which is not correct on non-64 | ||
3964 | * bit guests. Load the current values in order to preserve 64 bit | ||
3965 | * XMMs after fxrstor. | ||
3966 | */ | ||
3967 | #ifdef CONFIG_X86_64 | ||
3968 | /* XXX: accessing XMM 8-15 very awkwardly */ | ||
3969 | memcpy(&new->xmm_space[8 * 16/4], &old.xmm_space[8 * 16/4], 8 * 16); | ||
3970 | #endif | ||
3971 | |||
3972 | /* | ||
3973 | * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but | ||
3974 | * does save and restore MXCSR. | ||
3975 | */ | ||
3976 | if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)) | ||
3977 | memcpy(new->xmm_space, old.xmm_space, 8 * 16); | ||
3978 | |||
3979 | return rc; | ||
3980 | } | ||
3981 | |||
3982 | static int em_fxrstor(struct x86_emulate_ctxt *ctxt) | ||
3983 | { | ||
3984 | struct fxregs_state fx_state; | ||
3985 | int rc; | ||
3986 | |||
3987 | rc = check_fxsr(ctxt); | ||
3988 | if (rc != X86EMUL_CONTINUE) | ||
3989 | return rc; | ||
3990 | |||
3991 | rc = segmented_read(ctxt, ctxt->memop.addr.mem, &fx_state, 512); | ||
3992 | if (rc != X86EMUL_CONTINUE) | ||
3993 | return rc; | ||
3994 | |||
3995 | if (fx_state.mxcsr >> 16) | ||
3996 | return emulate_gp(ctxt, 0); | ||
3997 | |||
3998 | ctxt->ops->get_fpu(ctxt); | ||
3999 | |||
4000 | if (ctxt->mode < X86EMUL_MODE_PROT64) | ||
4001 | rc = fxrstor_fixup(ctxt, &fx_state); | ||
4002 | |||
4003 | if (rc == X86EMUL_CONTINUE) | ||
4004 | rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state)); | ||
4005 | |||
4006 | ctxt->ops->put_fpu(ctxt); | ||
4007 | |||
4008 | return rc; | ||
4009 | } | ||
4010 | |||
3886 | static bool valid_cr(int nr) | 4011 | static bool valid_cr(int nr) |
3887 | { | 4012 | { |
3888 | switch (nr) { | 4013 | switch (nr) { |
@@ -4235,7 +4360,9 @@ static const struct gprefix pfx_0f_ae_7 = { | |||
4235 | }; | 4360 | }; |
4236 | 4361 | ||
4237 | static const struct group_dual group15 = { { | 4362 | static const struct group_dual group15 = { { |
4238 | N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7), | 4363 | I(ModRM | Aligned16, em_fxsave), |
4364 | I(ModRM | Aligned16, em_fxrstor), | ||
4365 | N, N, N, N, N, GP(0, &pfx_0f_ae_7), | ||
4239 | }, { | 4366 | }, { |
4240 | N, N, N, N, N, N, N, N, | 4367 | N, N, N, N, N, N, N, N, |
4241 | } }; | 4368 | } }; |