aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorKai Huang <kai.huang@linux.intel.com>2015-01-27 21:54:28 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2015-01-30 03:39:54 -0500
commit843e4330573cc5261ae260ce0b83dc570d8cdc05 (patch)
treed0f12d56f8f4c0abc1c4c204468d6061b4ba0be3 /arch/x86
parent88178fd4f7187bbe290c5d373fd44aabec891934 (diff)
KVM: VMX: Add PML support in VMX
This patch adds PML support in VMX. A new module parameter 'enable_pml' is added to allow user to enable/disable it manually. Signed-off-by: Kai Huang <kai.huang@linux.intel.com> Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/include/uapi/asm/vmx.h1
-rw-r--r--arch/x86/kvm/trace.h18
-rw-r--r--arch/x86/kvm/vmx.c195
-rw-r--r--arch/x86/kvm/x86.c1
5 files changed, 218 insertions, 1 deletions
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 45afaee9555c..da772edd19ab 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,7 @@
69#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 69#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
70#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 70#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
71#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 71#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
72#define SECONDARY_EXEC_ENABLE_PML 0x00020000
72#define SECONDARY_EXEC_XSAVES 0x00100000 73#define SECONDARY_EXEC_XSAVES 0x00100000
73 74
74 75
@@ -121,6 +122,7 @@ enum vmcs_field {
121 GUEST_LDTR_SELECTOR = 0x0000080c, 122 GUEST_LDTR_SELECTOR = 0x0000080c,
122 GUEST_TR_SELECTOR = 0x0000080e, 123 GUEST_TR_SELECTOR = 0x0000080e,
123 GUEST_INTR_STATUS = 0x00000810, 124 GUEST_INTR_STATUS = 0x00000810,
125 GUEST_PML_INDEX = 0x00000812,
124 HOST_ES_SELECTOR = 0x00000c00, 126 HOST_ES_SELECTOR = 0x00000c00,
125 HOST_CS_SELECTOR = 0x00000c02, 127 HOST_CS_SELECTOR = 0x00000c02,
126 HOST_SS_SELECTOR = 0x00000c04, 128 HOST_SS_SELECTOR = 0x00000c04,
@@ -140,6 +142,8 @@ enum vmcs_field {
140 VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, 142 VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
141 VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, 143 VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
142 VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, 144 VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
145 PML_ADDRESS = 0x0000200e,
146 PML_ADDRESS_HIGH = 0x0000200f,
143 TSC_OFFSET = 0x00002010, 147 TSC_OFFSET = 0x00002010,
144 TSC_OFFSET_HIGH = 0x00002011, 148 TSC_OFFSET_HIGH = 0x00002011,
145 VIRTUAL_APIC_PAGE_ADDR = 0x00002012, 149 VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index ff2b8e28883e..c5f1a1deb91a 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -73,6 +73,7 @@
73#define EXIT_REASON_XSETBV 55 73#define EXIT_REASON_XSETBV 55
74#define EXIT_REASON_APIC_WRITE 56 74#define EXIT_REASON_APIC_WRITE 56
75#define EXIT_REASON_INVPCID 58 75#define EXIT_REASON_INVPCID 58
76#define EXIT_REASON_PML_FULL 62
76#define EXIT_REASON_XSAVES 63 77#define EXIT_REASON_XSAVES 63
77#define EXIT_REASON_XRSTORS 64 78#define EXIT_REASON_XRSTORS 64
78 79
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 587149bd6f76..7c7bc8bef21f 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -848,6 +848,24 @@ TRACE_EVENT(kvm_track_tsc,
848 848
849#endif /* CONFIG_X86_64 */ 849#endif /* CONFIG_X86_64 */
850 850
851/*
852 * Tracepoint for PML full VMEXIT.
853 */
854TRACE_EVENT(kvm_pml_full,
855 TP_PROTO(unsigned int vcpu_id),
856 TP_ARGS(vcpu_id),
857
858 TP_STRUCT__entry(
859 __field( unsigned int, vcpu_id )
860 ),
861
862 TP_fast_assign(
863 __entry->vcpu_id = vcpu_id;
864 ),
865
866 TP_printk("vcpu %d: PML full", __entry->vcpu_id)
867);
868
851TRACE_EVENT(kvm_ple_window, 869TRACE_EVENT(kvm_ple_window,
852 TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old), 870 TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
853 TP_ARGS(grow, vcpu_id, new, old), 871 TP_ARGS(grow, vcpu_id, new, old),
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c987374d92c1..de5ce82b2436 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -101,6 +101,9 @@ module_param(nested, bool, S_IRUGO);
101 101
102static u64 __read_mostly host_xss; 102static u64 __read_mostly host_xss;
103 103
104static bool __read_mostly enable_pml = 1;
105module_param_named(pml, enable_pml, bool, S_IRUGO);
106
104#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) 107#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
105#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) 108#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
106#define KVM_VM_CR0_ALWAYS_ON \ 109#define KVM_VM_CR0_ALWAYS_ON \
@@ -516,6 +519,10 @@ struct vcpu_vmx {
516 /* Dynamic PLE window. */ 519 /* Dynamic PLE window. */
517 int ple_window; 520 int ple_window;
518 bool ple_window_dirty; 521 bool ple_window_dirty;
522
523 /* Support for PML */
524#define PML_ENTITY_NUM 512
525 struct page *pml_pg;
519}; 526};
520 527
521enum segment_cache_field { 528enum segment_cache_field {
@@ -1068,6 +1075,11 @@ static inline bool cpu_has_vmx_shadow_vmcs(void)
1068 SECONDARY_EXEC_SHADOW_VMCS; 1075 SECONDARY_EXEC_SHADOW_VMCS;
1069} 1076}
1070 1077
1078static inline bool cpu_has_vmx_pml(void)
1079{
1080 return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
1081}
1082
1071static inline bool report_flexpriority(void) 1083static inline bool report_flexpriority(void)
1072{ 1084{
1073 return flexpriority_enabled; 1085 return flexpriority_enabled;
@@ -2924,7 +2936,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2924 SECONDARY_EXEC_APIC_REGISTER_VIRT | 2936 SECONDARY_EXEC_APIC_REGISTER_VIRT |
2925 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 2937 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
2926 SECONDARY_EXEC_SHADOW_VMCS | 2938 SECONDARY_EXEC_SHADOW_VMCS |
2927 SECONDARY_EXEC_XSAVES; 2939 SECONDARY_EXEC_XSAVES |
2940 SECONDARY_EXEC_ENABLE_PML;
2928 if (adjust_vmx_controls(min2, opt2, 2941 if (adjust_vmx_controls(min2, opt2,
2929 MSR_IA32_VMX_PROCBASED_CTLS2, 2942 MSR_IA32_VMX_PROCBASED_CTLS2,
2930 &_cpu_based_2nd_exec_control) < 0) 2943 &_cpu_based_2nd_exec_control) < 0)
@@ -4355,6 +4368,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
4355 a current VMCS12 4368 a current VMCS12
4356 */ 4369 */
4357 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; 4370 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
4371 /* PML is enabled/disabled in creating/destorying vcpu */
4372 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
4373
4358 return exec_control; 4374 return exec_control;
4359} 4375}
4360 4376
@@ -5942,6 +5958,20 @@ static __init int hardware_setup(void)
5942 5958
5943 update_ple_window_actual_max(); 5959 update_ple_window_actual_max();
5944 5960
5961 /*
5962 * Only enable PML when hardware supports PML feature, and both EPT
5963 * and EPT A/D bit features are enabled -- PML depends on them to work.
5964 */
5965 if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
5966 enable_pml = 0;
5967
5968 if (!enable_pml) {
5969 kvm_x86_ops->slot_enable_log_dirty = NULL;
5970 kvm_x86_ops->slot_disable_log_dirty = NULL;
5971 kvm_x86_ops->flush_log_dirty = NULL;
5972 kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
5973 }
5974
5945 return alloc_kvm_area(); 5975 return alloc_kvm_area();
5946 5976
5947out7: 5977out7:
@@ -6971,6 +7001,31 @@ static bool vmx_test_pir(struct kvm_vcpu *vcpu, int vector)
6971 return pi_test_pir(vector, &vmx->pi_desc); 7001 return pi_test_pir(vector, &vmx->pi_desc);
6972} 7002}
6973 7003
7004static int handle_pml_full(struct kvm_vcpu *vcpu)
7005{
7006 unsigned long exit_qualification;
7007
7008 trace_kvm_pml_full(vcpu->vcpu_id);
7009
7010 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
7011
7012 /*
7013 * PML buffer FULL happened while executing iret from NMI,
7014 * "blocked by NMI" bit has to be set before next VM entry.
7015 */
7016 if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
7017 cpu_has_virtual_nmis() &&
7018 (exit_qualification & INTR_INFO_UNBLOCK_NMI))
7019 vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
7020 GUEST_INTR_STATE_NMI);
7021
7022 /*
7023 * PML buffer already flushed at beginning of VMEXIT. Nothing to do
7024 * here.., and there's no userspace involvement needed for PML.
7025 */
7026 return 1;
7027}
7028
6974/* 7029/*
6975 * The exit handlers return 1 if the exit was handled fully and guest execution 7030 * The exit handlers return 1 if the exit was handled fully and guest execution
6976 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 7031 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -7019,6 +7074,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
7019 [EXIT_REASON_INVVPID] = handle_invvpid, 7074 [EXIT_REASON_INVVPID] = handle_invvpid,
7020 [EXIT_REASON_XSAVES] = handle_xsaves, 7075 [EXIT_REASON_XSAVES] = handle_xsaves,
7021 [EXIT_REASON_XRSTORS] = handle_xrstors, 7076 [EXIT_REASON_XRSTORS] = handle_xrstors,
7077 [EXIT_REASON_PML_FULL] = handle_pml_full,
7022}; 7078};
7023 7079
7024static const int kvm_vmx_max_exit_handlers = 7080static const int kvm_vmx_max_exit_handlers =
@@ -7325,6 +7381,89 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
7325 *info2 = vmcs_read32(VM_EXIT_INTR_INFO); 7381 *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
7326} 7382}
7327 7383
7384static int vmx_enable_pml(struct vcpu_vmx *vmx)
7385{
7386 struct page *pml_pg;
7387 u32 exec_control;
7388
7389 pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
7390 if (!pml_pg)
7391 return -ENOMEM;
7392
7393 vmx->pml_pg = pml_pg;
7394
7395 vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
7396 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
7397
7398 exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
7399 exec_control |= SECONDARY_EXEC_ENABLE_PML;
7400 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
7401
7402 return 0;
7403}
7404
7405static void vmx_disable_pml(struct vcpu_vmx *vmx)
7406{
7407 u32 exec_control;
7408
7409 ASSERT(vmx->pml_pg);
7410 __free_page(vmx->pml_pg);
7411 vmx->pml_pg = NULL;
7412
7413 exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
7414 exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
7415 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
7416}
7417
7418static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
7419{
7420 struct kvm *kvm = vmx->vcpu.kvm;
7421 u64 *pml_buf;
7422 u16 pml_idx;
7423
7424 pml_idx = vmcs_read16(GUEST_PML_INDEX);
7425
7426 /* Do nothing if PML buffer is empty */
7427 if (pml_idx == (PML_ENTITY_NUM - 1))
7428 return;
7429
7430 /* PML index always points to next available PML buffer entity */
7431 if (pml_idx >= PML_ENTITY_NUM)
7432 pml_idx = 0;
7433 else
7434 pml_idx++;
7435
7436 pml_buf = page_address(vmx->pml_pg);
7437 for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
7438 u64 gpa;
7439
7440 gpa = pml_buf[pml_idx];
7441 WARN_ON(gpa & (PAGE_SIZE - 1));
7442 mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
7443 }
7444
7445 /* reset PML index */
7446 vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
7447}
7448
7449/*
7450 * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
7451 * Called before reporting dirty_bitmap to userspace.
7452 */
7453static void kvm_flush_pml_buffers(struct kvm *kvm)
7454{
7455 int i;
7456 struct kvm_vcpu *vcpu;
7457 /*
7458 * We only need to kick vcpu out of guest mode here, as PML buffer
7459 * is flushed at beginning of all VMEXITs, and it's obvious that only
7460 * vcpus running in guest are possible to have unflushed GPAs in PML
7461 * buffer.
7462 */
7463 kvm_for_each_vcpu(i, vcpu, kvm)
7464 kvm_vcpu_kick(vcpu);
7465}
7466
7328/* 7467/*
7329 * The guest has exited. See if we can fix it or if we need userspace 7468 * The guest has exited. See if we can fix it or if we need userspace
7330 * assistance. 7469 * assistance.
@@ -7335,6 +7474,16 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
7335 u32 exit_reason = vmx->exit_reason; 7474 u32 exit_reason = vmx->exit_reason;
7336 u32 vectoring_info = vmx->idt_vectoring_info; 7475 u32 vectoring_info = vmx->idt_vectoring_info;
7337 7476
7477 /*
7478 * Flush logged GPAs PML buffer, this will make dirty_bitmap more
7479 * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
7480 * querying dirty_bitmap, we only need to kick all vcpus out of guest
7481 * mode as if vcpus is in root mode, the PML buffer must has been
7482 * flushed already.
7483 */
7484 if (enable_pml)
7485 vmx_flush_pml_buffer(vmx);
7486
7338 /* If guest state is invalid, start emulating */ 7487 /* If guest state is invalid, start emulating */
7339 if (vmx->emulation_required) 7488 if (vmx->emulation_required)
7340 return handle_invalid_guest_state(vcpu); 7489 return handle_invalid_guest_state(vcpu);
@@ -7981,6 +8130,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
7981{ 8130{
7982 struct vcpu_vmx *vmx = to_vmx(vcpu); 8131 struct vcpu_vmx *vmx = to_vmx(vcpu);
7983 8132
8133 if (enable_pml)
8134 vmx_disable_pml(vmx);
7984 free_vpid(vmx); 8135 free_vpid(vmx);
7985 leave_guest_mode(vcpu); 8136 leave_guest_mode(vcpu);
7986 vmx_load_vmcs01(vcpu); 8137 vmx_load_vmcs01(vcpu);
@@ -8051,6 +8202,18 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
8051 vmx->nested.current_vmptr = -1ull; 8202 vmx->nested.current_vmptr = -1ull;
8052 vmx->nested.current_vmcs12 = NULL; 8203 vmx->nested.current_vmcs12 = NULL;
8053 8204
8205 /*
8206 * If PML is turned on, failure on enabling PML just results in failure
8207 * of creating the vcpu, therefore we can simplify PML logic (by
8208 * avoiding dealing with cases, such as enabling PML partially on vcpus
8209 * for the guest, etc.
8210 */
8211 if (enable_pml) {
8212 err = vmx_enable_pml(vmx);
8213 if (err)
8214 goto free_vmcs;
8215 }
8216
8054 return &vmx->vcpu; 8217 return &vmx->vcpu;
8055 8218
8056free_vmcs: 8219free_vmcs:
@@ -9492,6 +9655,31 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
9492 shrink_ple_window(vcpu); 9655 shrink_ple_window(vcpu);
9493} 9656}
9494 9657
9658static void vmx_slot_enable_log_dirty(struct kvm *kvm,
9659 struct kvm_memory_slot *slot)
9660{
9661 kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
9662 kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
9663}
9664
9665static void vmx_slot_disable_log_dirty(struct kvm *kvm,
9666 struct kvm_memory_slot *slot)
9667{
9668 kvm_mmu_slot_set_dirty(kvm, slot);
9669}
9670
9671static void vmx_flush_log_dirty(struct kvm *kvm)
9672{
9673 kvm_flush_pml_buffers(kvm);
9674}
9675
9676static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
9677 struct kvm_memory_slot *memslot,
9678 gfn_t offset, unsigned long mask)
9679{
9680 kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
9681}
9682
9495static struct kvm_x86_ops vmx_x86_ops = { 9683static struct kvm_x86_ops vmx_x86_ops = {
9496 .cpu_has_kvm_support = cpu_has_kvm_support, 9684 .cpu_has_kvm_support = cpu_has_kvm_support,
9497 .disabled_by_bios = vmx_disabled_by_bios, 9685 .disabled_by_bios = vmx_disabled_by_bios,
@@ -9601,6 +9789,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
9601 .check_nested_events = vmx_check_nested_events, 9789 .check_nested_events = vmx_check_nested_events,
9602 9790
9603 .sched_in = vmx_sched_in, 9791 .sched_in = vmx_sched_in,
9792
9793 .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
9794 .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
9795 .flush_log_dirty = vmx_flush_log_dirty,
9796 .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
9604}; 9797};
9605 9798
9606static int __init vmx_init(void) 9799static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 442ee7d90946..1373e04e1f19 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7880,3 +7880,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
7880EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); 7880EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
7881EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset); 7881EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
7882EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); 7882EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
7883EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);