diff options
author | Wanpeng Li <wanpengli@tencent.com> | 2018-03-12 07:53:02 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2018-03-16 17:03:51 -0400 |
commit | 4d5422cea3b61f158d58924cbb43feada456ba5c (patch) | |
tree | 0657ac655c54892dd6498d8e20b89380255d592a | |
parent | 4956aa3b8b59e336b77f1df0ac7ce1a94c0265f2 (diff) |
KVM: X86: Provide a capability to disable MWAIT intercepts
Allowing a guest to execute MWAIT without interception enables a guest
to put a (physical) CPU into a power saving state, where it takes
longer to return from than what may be desired by the host.
Don't give a guest that power over a host by default. (Especially,
since nothing prevents a guest from using MWAIT even when it is not
advertised via CPUID.)
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Jan H. Schönherr <jschoenh@amazon.de>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r-- | Documentation/virtual/kvm/api.txt | 27 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 9 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 24 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 10 | ||||
-rw-r--r-- | include/uapi/linux/kvm.h | 2 | ||||
-rw-r--r-- | tools/include/uapi/linux/kvm.h | 2 |
8 files changed, 53 insertions, 25 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 786c1b4ecb59..744a202cca31 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt | |||
@@ -4358,6 +4358,24 @@ enables QEMU to build error log and branch to guest kernel registered | |||
4358 | machine check handling routine. Without this capability KVM will | 4358 | machine check handling routine. Without this capability KVM will |
4359 | branch to guests' 0x200 interrupt vector. | 4359 | branch to guests' 0x200 interrupt vector. |
4360 | 4360 | ||
4361 | 7.13 KVM_CAP_X86_DISABLE_EXITS | ||
4362 | |||
4363 | Architectures: x86 | ||
4364 | Parameters: args[0] defines which exits are disabled | ||
4365 | Returns: 0 on success, -EINVAL when args[0] contains invalid exits | ||
4366 | |||
4367 | Valid bits in args[0] are | ||
4368 | |||
4369 | #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) | ||
4370 | |||
4371 | Enabling this capability on a VM provides userspace with a way to no | ||
4372 | longer intercept some instructions for improved latency in some | ||
4373 | workloads, and is suggested when vCPUs are associated to dedicated | ||
4374 | physical CPUs. More bits can be added in the future; userspace can | ||
4375 | just pass the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP to disable | ||
4376 | all such vmexits. | ||
4377 | |||
4378 | |||
4361 | 8. Other capabilities. | 4379 | 8. Other capabilities. |
4362 | ---------------------- | 4380 | ---------------------- |
4363 | 4381 | ||
@@ -4470,15 +4488,6 @@ reserved. | |||
4470 | Both registers and addresses are 64-bits wide. | 4488 | Both registers and addresses are 64-bits wide. |
4471 | It will be possible to run 64-bit or 32-bit guest code. | 4489 | It will be possible to run 64-bit or 32-bit guest code. |
4472 | 4490 | ||
4473 | 8.8 KVM_CAP_X86_GUEST_MWAIT | ||
4474 | |||
4475 | Architectures: x86 | ||
4476 | |||
4477 | This capability indicates that guest using memory monotoring instructions | ||
4478 | (MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit. As such time | ||
4479 | spent while virtual CPU is halted in this way will then be accounted for as | ||
4480 | guest running time on the host (as opposed to e.g. HLT). | ||
4481 | |||
4482 | 8.9 KVM_CAP_ARM_USER_IRQ | 4491 | 8.9 KVM_CAP_ARM_USER_IRQ |
4483 | 4492 | ||
4484 | Architectures: arm, arm64 | 4493 | Architectures: arm, arm64 |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 480a75b22b69..a85b640aee1e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -811,6 +811,8 @@ struct kvm_arch { | |||
811 | 811 | ||
812 | gpa_t wall_clock; | 812 | gpa_t wall_clock; |
813 | 813 | ||
814 | bool mwait_in_guest; | ||
815 | |||
814 | bool ept_identity_pagetable_done; | 816 | bool ept_identity_pagetable_done; |
815 | gpa_t ept_identity_map_addr; | 817 | gpa_t ept_identity_map_addr; |
816 | 818 | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fa1c4977e1c2..f6578cee6bb6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1398,7 +1398,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1398 | set_intercept(svm, INTERCEPT_XSETBV); | 1398 | set_intercept(svm, INTERCEPT_XSETBV); |
1399 | set_intercept(svm, INTERCEPT_RSM); | 1399 | set_intercept(svm, INTERCEPT_RSM); |
1400 | 1400 | ||
1401 | if (!kvm_mwait_in_guest()) { | 1401 | if (!kvm_mwait_in_guest(svm->vcpu.kvm)) { |
1402 | set_intercept(svm, INTERCEPT_MONITOR); | 1402 | set_intercept(svm, INTERCEPT_MONITOR); |
1403 | set_intercept(svm, INTERCEPT_MWAIT); | 1403 | set_intercept(svm, INTERCEPT_MWAIT); |
1404 | } | 1404 | } |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b4d8da6c62c8..7cef183993ba 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -3746,13 +3746,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
3746 | CPU_BASED_UNCOND_IO_EXITING | | 3746 | CPU_BASED_UNCOND_IO_EXITING | |
3747 | CPU_BASED_MOV_DR_EXITING | | 3747 | CPU_BASED_MOV_DR_EXITING | |
3748 | CPU_BASED_USE_TSC_OFFSETING | | 3748 | CPU_BASED_USE_TSC_OFFSETING | |
3749 | CPU_BASED_MWAIT_EXITING | | ||
3750 | CPU_BASED_MONITOR_EXITING | | ||
3749 | CPU_BASED_INVLPG_EXITING | | 3751 | CPU_BASED_INVLPG_EXITING | |
3750 | CPU_BASED_RDPMC_EXITING; | 3752 | CPU_BASED_RDPMC_EXITING; |
3751 | 3753 | ||
3752 | if (!kvm_mwait_in_guest()) | ||
3753 | min |= CPU_BASED_MWAIT_EXITING | | ||
3754 | CPU_BASED_MONITOR_EXITING; | ||
3755 | |||
3756 | opt = CPU_BASED_TPR_SHADOW | | 3754 | opt = CPU_BASED_TPR_SHADOW | |
3757 | CPU_BASED_USE_MSR_BITMAPS | | 3755 | CPU_BASED_USE_MSR_BITMAPS | |
3758 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 3756 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
@@ -5544,6 +5542,9 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) | |||
5544 | exec_control |= CPU_BASED_CR3_STORE_EXITING | | 5542 | exec_control |= CPU_BASED_CR3_STORE_EXITING | |
5545 | CPU_BASED_CR3_LOAD_EXITING | | 5543 | CPU_BASED_CR3_LOAD_EXITING | |
5546 | CPU_BASED_INVLPG_EXITING; | 5544 | CPU_BASED_INVLPG_EXITING; |
5545 | if (kvm_mwait_in_guest(vmx->vcpu.kvm)) | ||
5546 | exec_control &= ~(CPU_BASED_MWAIT_EXITING | | ||
5547 | CPU_BASED_MONITOR_EXITING); | ||
5547 | return exec_control; | 5548 | return exec_control; |
5548 | } | 5549 | } |
5549 | 5550 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9e1496cb2345..db95d4d6f57b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -2813,9 +2813,15 @@ out: | |||
2813 | return r; | 2813 | return r; |
2814 | } | 2814 | } |
2815 | 2815 | ||
2816 | static inline bool kvm_can_mwait_in_guest(void) | ||
2817 | { | ||
2818 | return boot_cpu_has(X86_FEATURE_MWAIT) && | ||
2819 | !boot_cpu_has_bug(X86_BUG_MONITOR); | ||
2820 | } | ||
2821 | |||
2816 | int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | 2822 | int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) |
2817 | { | 2823 | { |
2818 | int r; | 2824 | int r = 0; |
2819 | 2825 | ||
2820 | switch (ext) { | 2826 | switch (ext) { |
2821 | case KVM_CAP_IRQCHIP: | 2827 | case KVM_CAP_IRQCHIP: |
@@ -2871,8 +2877,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
2871 | case KVM_CAP_ADJUST_CLOCK: | 2877 | case KVM_CAP_ADJUST_CLOCK: |
2872 | r = KVM_CLOCK_TSC_STABLE; | 2878 | r = KVM_CLOCK_TSC_STABLE; |
2873 | break; | 2879 | break; |
2874 | case KVM_CAP_X86_GUEST_MWAIT: | 2880 | case KVM_CAP_X86_DISABLE_EXITS: |
2875 | r = kvm_mwait_in_guest(); | 2881 | if(kvm_can_mwait_in_guest()) |
2882 | r |= KVM_X86_DISABLE_EXITS_MWAIT; | ||
2876 | break; | 2883 | break; |
2877 | case KVM_CAP_X86_SMM: | 2884 | case KVM_CAP_X86_SMM: |
2878 | /* SMBASE is usually relocated above 1M on modern chipsets, | 2885 | /* SMBASE is usually relocated above 1M on modern chipsets, |
@@ -2913,7 +2920,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) | |||
2913 | r = KVM_X2APIC_API_VALID_FLAGS; | 2920 | r = KVM_X2APIC_API_VALID_FLAGS; |
2914 | break; | 2921 | break; |
2915 | default: | 2922 | default: |
2916 | r = 0; | ||
2917 | break; | 2923 | break; |
2918 | } | 2924 | } |
2919 | return r; | 2925 | return r; |
@@ -4218,6 +4224,16 @@ split_irqchip_unlock: | |||
4218 | 4224 | ||
4219 | r = 0; | 4225 | r = 0; |
4220 | break; | 4226 | break; |
4227 | case KVM_CAP_X86_DISABLE_EXITS: | ||
4228 | r = -EINVAL; | ||
4229 | if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) | ||
4230 | break; | ||
4231 | |||
4232 | if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && | ||
4233 | kvm_can_mwait_in_guest()) | ||
4234 | kvm->arch.mwait_in_guest = true; | ||
4235 | r = 0; | ||
4236 | break; | ||
4221 | default: | 4237 | default: |
4222 | r = -EINVAL; | 4238 | r = -EINVAL; |
4223 | break; | 4239 | break; |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 18e2e0a91edc..026b239bf058 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -2,8 +2,6 @@ | |||
2 | #ifndef ARCH_X86_KVM_X86_H | 2 | #ifndef ARCH_X86_KVM_X86_H |
3 | #define ARCH_X86_KVM_X86_H | 3 | #define ARCH_X86_KVM_X86_H |
4 | 4 | ||
5 | #include <asm/processor.h> | ||
6 | #include <asm/mwait.h> | ||
7 | #include <linux/kvm_host.h> | 5 | #include <linux/kvm_host.h> |
8 | #include <asm/pvclock.h> | 6 | #include <asm/pvclock.h> |
9 | #include "kvm_cache_regs.h" | 7 | #include "kvm_cache_regs.h" |
@@ -266,10 +264,12 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) | |||
266 | __rem; \ | 264 | __rem; \ |
267 | }) | 265 | }) |
268 | 266 | ||
269 | static inline bool kvm_mwait_in_guest(void) | 267 | #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) |
268 | #define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT) | ||
269 | |||
270 | static inline bool kvm_mwait_in_guest(struct kvm *kvm) | ||
270 | { | 271 | { |
271 | return boot_cpu_has(X86_FEATURE_MWAIT) && | 272 | return kvm->arch.mwait_in_guest; |
272 | !boot_cpu_has_bug(X86_BUG_MONITOR); | ||
273 | } | 273 | } |
274 | 274 | ||
275 | #endif | 275 | #endif |
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 088c2c92db55..1065006c9bf5 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h | |||
@@ -929,7 +929,7 @@ struct kvm_ppc_resize_hpt { | |||
929 | #define KVM_CAP_S390_GS 140 | 929 | #define KVM_CAP_S390_GS 140 |
930 | #define KVM_CAP_S390_AIS 141 | 930 | #define KVM_CAP_S390_AIS 141 |
931 | #define KVM_CAP_SPAPR_TCE_VFIO 142 | 931 | #define KVM_CAP_SPAPR_TCE_VFIO 142 |
932 | #define KVM_CAP_X86_GUEST_MWAIT 143 | 932 | #define KVM_CAP_X86_DISABLE_EXITS 143 |
933 | #define KVM_CAP_ARM_USER_IRQ 144 | 933 | #define KVM_CAP_ARM_USER_IRQ 144 |
934 | #define KVM_CAP_S390_CMMA_MIGRATION 145 | 934 | #define KVM_CAP_S390_CMMA_MIGRATION 145 |
935 | #define KVM_CAP_PPC_FWNMI 146 | 935 | #define KVM_CAP_PPC_FWNMI 146 |
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 0fb5ef939732..b13c257261af 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h | |||
@@ -924,7 +924,7 @@ struct kvm_ppc_resize_hpt { | |||
924 | #define KVM_CAP_S390_GS 140 | 924 | #define KVM_CAP_S390_GS 140 |
925 | #define KVM_CAP_S390_AIS 141 | 925 | #define KVM_CAP_S390_AIS 141 |
926 | #define KVM_CAP_SPAPR_TCE_VFIO 142 | 926 | #define KVM_CAP_SPAPR_TCE_VFIO 142 |
927 | #define KVM_CAP_X86_GUEST_MWAIT 143 | 927 | #define KVM_CAP_X86_DISABLE_EXITS 143 |
928 | #define KVM_CAP_ARM_USER_IRQ 144 | 928 | #define KVM_CAP_ARM_USER_IRQ 144 |
929 | #define KVM_CAP_S390_CMMA_MIGRATION 145 | 929 | #define KVM_CAP_S390_CMMA_MIGRATION 145 |
930 | #define KVM_CAP_PPC_FWNMI 146 | 930 | #define KVM_CAP_PPC_FWNMI 146 |