KVM: VMX: Add PML support in VMX

This patch adds PML support in VMX. A new module parameter 'enable_pml' is added to allow user to enable/disable it manually. Signed-off-by: Kai Huang <kai.huang@linux.intel.com> Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
author: Kai Huang <kai.huang@linux.intel.com> 2015-01-27 21:54:28 -0500
committer: Paolo Bonzini <pbonzini@redhat.com> 2015-01-30 03:39:54 -0500
commit: 843e4330573cc5261ae260ce0b83dc570d8cdc05 (patch)
tree: d0f12d56f8f4c0abc1c4c204468d6061b4ba0be3 /arch/x86
parent: 88178fd4f7187bbe290c5d373fd44aabec891934 (diff)
5 files changed, 218 insertions, 1 deletions
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 45afaee9555c..da772edd19ab 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,7 @@
 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING       0x00000400
 #define SECONDARY_EXEC_ENABLE_INVPCID           0x00001000
 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
+#define SECONDARY_EXEC_ENABLE_PML               0x00020000
 #define SECONDARY_EXEC_XSAVES                   0x00100000
@@ -121,6 +122,7 @@ enum vmcs_field {
        GUEST_LDTR_SELECTOR             = 0x0000080c,
        GUEST_TR_SELECTOR               = 0x0000080e,
        GUEST_INTR_STATUS               = 0x00000810,
+        GUEST_PML_INDEX                 = 0x00000812,
        HOST_ES_SELECTOR                = 0x00000c00,
        HOST_CS_SELECTOR                = 0x00000c02,
        HOST_SS_SELECTOR                = 0x00000c04,
@@ -140,6 +142,8 @@ enum vmcs_field {
        VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
        VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
        VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
+        PML_ADDRESS                     = 0x0000200e,
+        PML_ADDRESS_HIGH                = 0x0000200f,
        TSC_OFFSET                      = 0x00002010,
        TSC_OFFSET_HIGH                 = 0x00002011,
        VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index ff2b8e28883e..c5f1a1deb91a 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -73,6 +73,7 @@
 #define EXIT_REASON_XSETBV              55
 #define EXIT_REASON_APIC_WRITE          56
 #define EXIT_REASON_INVPCID             58
+#define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 587149bd6f76..7c7bc8bef21f 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -848,6 +848,24 @@ TRACE_EVENT(kvm_track_tsc,
 #endif /* CONFIG_X86_64 */
+/*
+ * Tracepoint for PML full VMEXIT.
+ */
+TRACE_EVENT(kvm_pml_full,
+        TP_PROTO(unsigned int vcpu_id),
+        TP_ARGS(vcpu_id),
+        TP_STRUCT__entry(
+                __field(        unsigned int,   vcpu_id                 )
+        ),
+        TP_fast_assign(
+                __entry->vcpu_id                = vcpu_id;
+        ),
+        TP_printk("vcpu %d: PML full", __entry->vcpu_id)
+);
 TRACE_EVENT(kvm_ple_window,
        TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
        TP_ARGS(grow, vcpu_id, new, old),
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c987374d92c1..de5ce82b2436 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -101,6 +101,9 @@ module_param(nested, bool, S_IRUGO);
 static u64 __read_mostly host_xss;
+static bool __read_mostly enable_pml = 1;
+module_param_named(pml, enable_pml, bool, S_IRUGO);
 #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
 #define KVM_VM_CR0_ALWAYS_ON                                            \
@@ -516,6 +519,10 @@ struct vcpu_vmx {
        /* Dynamic PLE window. */
        int ple_window;
        bool ple_window_dirty;
+        /* Support for PML */
+#define PML_ENTITY_NUM          512
+        struct page *pml_pg;
 };
 enum segment_cache_field {
@@ -1068,6 +1075,11 @@ static inline bool cpu_has_vmx_shadow_vmcs(void)
                SECONDARY_EXEC_SHADOW_VMCS;
 }
+static inline bool cpu_has_vmx_pml(void)
+{
+        return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
+}
 static inline bool report_flexpriority(void)
 {
        return flexpriority_enabled;
@@ -2924,7 +2936,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                        SECONDARY_EXEC_APIC_REGISTER_VIRT |
                        SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                        SECONDARY_EXEC_SHADOW_VMCS |
-                        SECONDARY_EXEC_XSAVES;
+                        SECONDARY_EXEC_XSAVES |
+                        SECONDARY_EXEC_ENABLE_PML;
                if (adjust_vmx_controls(min2, opt2,
                                        MSR_IA32_VMX_PROCBASED_CTLS2,
                                        &_cpu_based_2nd_exec_control) < 0)
@@ -4355,6 +4368,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
           a current VMCS12
        */
        exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
+        /* PML is enabled/disabled in creating/destorying vcpu */
+        exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
        return exec_control;
 }
@@ -5942,6 +5958,20 @@ static __init int hardware_setup(void)
        update_ple_window_actual_max();
+        /*
+         * Only enable PML when hardware supports PML feature, and both EPT
+         * and EPT A/D bit features are enabled -- PML depends on them to work.
+         */
+        if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
+                enable_pml = 0;
+        if (!enable_pml) {
+                kvm_x86_ops->slot_enable_log_dirty = NULL;
+                kvm_x86_ops->slot_disable_log_dirty = NULL;
+                kvm_x86_ops->flush_log_dirty = NULL;
+                kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
+        }
        return alloc_kvm_area();
 out7:
@@ -6971,6 +7001,31 @@ static bool vmx_test_pir(struct kvm_vcpu *vcpu, int vector)
        return pi_test_pir(vector, &vmx->pi_desc);
 }
+static int handle_pml_full(struct kvm_vcpu *vcpu)
+{
+        unsigned long exit_qualification;
+        trace_kvm_pml_full(vcpu->vcpu_id);
+        exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+        /*
+         * PML buffer FULL happened while executing iret from NMI,
+         * "blocked by NMI" bit has to be set before next VM entry.
+         */
+        if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+                        cpu_has_virtual_nmis() &&
+                        (exit_qualification & INTR_INFO_UNBLOCK_NMI))
+                vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+                                GUEST_INTR_STATE_NMI);
+        /*
+         * PML buffer already flushed at beginning of VMEXIT. Nothing to do
+         * here.., and there's no userspace involvement needed for PML.
+         */
+        return 1;
+}
 /*
 * The exit handlers return 1 if the exit was handled fully and guest execution
 * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -7019,6 +7074,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_INVVPID]                 = handle_invvpid,
        [EXIT_REASON_XSAVES]                  = handle_xsaves,
        [EXIT_REASON_XRSTORS]                 = handle_xrstors,
+        [EXIT_REASON_PML_FULL]                = handle_pml_full,
 };
 static const int kvm_vmx_max_exit_handlers =
@@ -7325,6 +7381,89 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
        *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
 }
+static int vmx_enable_pml(struct vcpu_vmx *vmx)
+{
+        struct page *pml_pg;
+        u32 exec_control;
+        pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
+        if (!pml_pg)
+                return -ENOMEM;
+        vmx->pml_pg = pml_pg;
+        vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
+        vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+        exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+        exec_control |= SECONDARY_EXEC_ENABLE_PML;
+        vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
+        return 0;
+}
+static void vmx_disable_pml(struct vcpu_vmx *vmx)
+{
+        u32 exec_control;
+        ASSERT(vmx->pml_pg);
+        __free_page(vmx->pml_pg);
+        vmx->pml_pg = NULL;
+        exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+        exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
+        vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
+}
+static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
+{
+        struct kvm *kvm = vmx->vcpu.kvm;
+        u64 *pml_buf;
+        u16 pml_idx;
+        pml_idx = vmcs_read16(GUEST_PML_INDEX);
+        /* Do nothing if PML buffer is empty */
+        if (pml_idx == (PML_ENTITY_NUM - 1))
+                return;
+        /* PML index always points to next available PML buffer entity */
+        if (pml_idx >= PML_ENTITY_NUM)
+                pml_idx = 0;
+        else
+                pml_idx++;
+        pml_buf = page_address(vmx->pml_pg);
+        for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
+                u64 gpa;
+                gpa = pml_buf[pml_idx];
+                WARN_ON(gpa & (PAGE_SIZE - 1));
+                mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
+        }
+        /* reset PML index */
+        vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+}
+/*
+ * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
+ * Called before reporting dirty_bitmap to userspace.
+ */
+static void kvm_flush_pml_buffers(struct kvm *kvm)
+{
+        int i;
+        struct kvm_vcpu *vcpu;
+        /*
+         * We only need to kick vcpu out of guest mode here, as PML buffer
+         * is flushed at beginning of all VMEXITs, and it's obvious that only
+         * vcpus running in guest are possible to have unflushed GPAs in PML
+         * buffer.
+         */
+        kvm_for_each_vcpu(i, vcpu, kvm)
+                kvm_vcpu_kick(vcpu);
+}
 /*
 * The guest has exited.  See if we can fix it or if we need userspace
 * assistance.
@@ -7335,6 +7474,16 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
        u32 exit_reason = vmx->exit_reason;
        u32 vectoring_info = vmx->idt_vectoring_info;
+        /*
+         * Flush logged GPAs PML buffer, this will make dirty_bitmap more
+         * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
+         * querying dirty_bitmap, we only need to kick all vcpus out of guest
+         * mode as if vcpus is in root mode, the PML buffer must has been
+         * flushed already.
+         */
+        if (enable_pml)
+                vmx_flush_pml_buffer(vmx);
        /* If guest state is invalid, start emulating */
        if (vmx->emulation_required)
                return handle_invalid_guest_state(vcpu);
@@ -7981,6 +8130,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+        if (enable_pml)
+                vmx_disable_pml(vmx);
        free_vpid(vmx);
        leave_guest_mode(vcpu);
        vmx_load_vmcs01(vcpu);
@@ -8051,6 +8202,18 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
        vmx->nested.current_vmptr = -1ull;
        vmx->nested.current_vmcs12 = NULL;
+        /*
+         * If PML is turned on, failure on enabling PML just results in failure
+         * of creating the vcpu, therefore we can simplify PML logic (by
+         * avoiding dealing with cases, such as enabling PML partially on vcpus
+         * for the guest, etc.
+         */
+        if (enable_pml) {
+                err = vmx_enable_pml(vmx);
+                if (err)
+                        goto free_vmcs;
+        }
        return &vmx->vcpu;
 free_vmcs:
@@ -9492,6 +9655,31 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
                shrink_ple_window(vcpu);
 }
+static void vmx_slot_enable_log_dirty(struct kvm *kvm,
+                                     struct kvm_memory_slot *slot)
+{
+        kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
+        kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
+}
+static void vmx_slot_disable_log_dirty(struct kvm *kvm,
+                                       struct kvm_memory_slot *slot)
+{
+        kvm_mmu_slot_set_dirty(kvm, slot);
+}
+static void vmx_flush_log_dirty(struct kvm *kvm)
+{
+        kvm_flush_pml_buffers(kvm);
+}
+static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
+                                           struct kvm_memory_slot *memslot,
+                                           gfn_t offset, unsigned long mask)
+{
+        kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
+}
 static struct kvm_x86_ops vmx_x86_ops = {
        .cpu_has_kvm_support = cpu_has_kvm_support,
        .disabled_by_bios = vmx_disabled_by_bios,
@@ -9601,6 +9789,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .check_nested_events = vmx_check_nested_events,
        .sched_in = vmx_sched_in,
+        .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
+        .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
+        .flush_log_dirty = vmx_flush_log_dirty,
+        .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
 };
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 442ee7d90946..1373e04e1f19 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7880,3 +7880,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
author	Kai Huang <kai.huang@linux.intel.com>	2015-01-27 21:54:28 -0500
committer	Paolo Bonzini <pbonzini@redhat.com>	2015-01-30 03:39:54 -0500
commit	843e4330573cc5261ae260ce0b83dc570d8cdc05 (patch)
tree	d0f12d56f8f4c0abc1c4c204468d6061b4ba0be3 /arch/x86
parent	88178fd4f7187bbe290c5d373fd44aabec891934 (diff)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 45afaee9555c..da772edd19ab 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h
@@ -69,6 +69,7 @@
69	#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400	69	#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
70	#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000	70	#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
71	#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000	71	#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
		72	#define SECONDARY_EXEC_ENABLE_PML 0x00020000
72	#define SECONDARY_EXEC_XSAVES 0x00100000	73	#define SECONDARY_EXEC_XSAVES 0x00100000
73		74
74		75
@@ -121,6 +122,7 @@ enum vmcs_field {
121	GUEST_LDTR_SELECTOR = 0x0000080c,	122	GUEST_LDTR_SELECTOR = 0x0000080c,
122	GUEST_TR_SELECTOR = 0x0000080e,	123	GUEST_TR_SELECTOR = 0x0000080e,
123	GUEST_INTR_STATUS = 0x00000810,	124	GUEST_INTR_STATUS = 0x00000810,
		125	GUEST_PML_INDEX = 0x00000812,
124	HOST_ES_SELECTOR = 0x00000c00,	126	HOST_ES_SELECTOR = 0x00000c00,
125	HOST_CS_SELECTOR = 0x00000c02,	127	HOST_CS_SELECTOR = 0x00000c02,
126	HOST_SS_SELECTOR = 0x00000c04,	128	HOST_SS_SELECTOR = 0x00000c04,
@@ -140,6 +142,8 @@ enum vmcs_field {
140	VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,	142	VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
141	VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,	143	VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
142	VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,	144	VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
		145	PML_ADDRESS = 0x0000200e,
		146	PML_ADDRESS_HIGH = 0x0000200f,
143	TSC_OFFSET = 0x00002010,	147	TSC_OFFSET = 0x00002010,
144	TSC_OFFSET_HIGH = 0x00002011,	148	TSC_OFFSET_HIGH = 0x00002011,
145	VIRTUAL_APIC_PAGE_ADDR = 0x00002012,	149	VIRTUAL_APIC_PAGE_ADDR = 0x00002012,


diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index ff2b8e28883e..c5f1a1deb91a 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h
@@ -73,6 +73,7 @@
73	#define EXIT_REASON_XSETBV 55	73	#define EXIT_REASON_XSETBV 55
74	#define EXIT_REASON_APIC_WRITE 56	74	#define EXIT_REASON_APIC_WRITE 56
75	#define EXIT_REASON_INVPCID 58	75	#define EXIT_REASON_INVPCID 58
		76	#define EXIT_REASON_PML_FULL 62
76	#define EXIT_REASON_XSAVES 63	77	#define EXIT_REASON_XSAVES 63
77	#define EXIT_REASON_XRSTORS 64	78	#define EXIT_REASON_XRSTORS 64
78		79


diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 587149bd6f76..7c7bc8bef21f 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h
@@ -848,6 +848,24 @@ TRACE_EVENT(kvm_track_tsc,
848		848
849	#endif /* CONFIG_X86_64 */	849	#endif /* CONFIG_X86_64 */
850		850
		851	/*
		852	* Tracepoint for PML full VMEXIT.
		853	*/
		854	TRACE_EVENT(kvm_pml_full,
		855	TP_PROTO(unsigned int vcpu_id),
		856	TP_ARGS(vcpu_id),
		857
		858	TP_STRUCT__entry(
		859	__field( unsigned int, vcpu_id )
		860	),
		861
		862	TP_fast_assign(
		863	__entry->vcpu_id = vcpu_id;
		864	),
		865
		866	TP_printk("vcpu %d: PML full", __entry->vcpu_id)
		867	);
		868
851	TRACE_EVENT(kvm_ple_window,	869	TRACE_EVENT(kvm_ple_window,
852	TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),	870	TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
853	TP_ARGS(grow, vcpu_id, new, old),	871	TP_ARGS(grow, vcpu_id, new, old),


diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c987374d92c1..de5ce82b2436 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c
@@ -101,6 +101,9 @@ module_param(nested, bool, S_IRUGO);
101		101
102	static u64 __read_mostly host_xss;	102	static u64 __read_mostly host_xss;
103		103
		104	static bool __read_mostly enable_pml = 1;
		105	module_param_named(pml, enable_pml, bool, S_IRUGO);
		106
104	#define KVM_GUEST_CR0_MASK (X86_CR0_NW \| X86_CR0_CD)	107	#define KVM_GUEST_CR0_MASK (X86_CR0_NW \| X86_CR0_CD)
105	#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP \| X86_CR0_NE)	108	#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP \| X86_CR0_NE)
106	#define KVM_VM_CR0_ALWAYS_ON \	109	#define KVM_VM_CR0_ALWAYS_ON \
@@ -516,6 +519,10 @@ struct vcpu_vmx {
516	/* Dynamic PLE window. */	519	/* Dynamic PLE window. */
517	int ple_window;	520	int ple_window;
518	bool ple_window_dirty;	521	bool ple_window_dirty;
		522
		523	/* Support for PML */
		524	#define PML_ENTITY_NUM 512
		525	struct page *pml_pg;
519	};	526	};
520		527
521	enum segment_cache_field {	528	enum segment_cache_field {
@@ -1068,6 +1075,11 @@ static inline bool cpu_has_vmx_shadow_vmcs(void)
1068	SECONDARY_EXEC_SHADOW_VMCS;	1075	SECONDARY_EXEC_SHADOW_VMCS;
1069	}	1076	}
1070		1077
		1078	static inline bool cpu_has_vmx_pml(void)
		1079	{
		1080	return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
		1081	}
		1082
1071	static inline bool report_flexpriority(void)	1083	static inline bool report_flexpriority(void)
1072	{	1084	{
1073	return flexpriority_enabled;	1085	return flexpriority_enabled;
@@ -2924,7 +2936,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2924	SECONDARY_EXEC_APIC_REGISTER_VIRT \|	2936	SECONDARY_EXEC_APIC_REGISTER_VIRT \|
2925	SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY \|	2937	SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY \|
2926	SECONDARY_EXEC_SHADOW_VMCS \|	2938	SECONDARY_EXEC_SHADOW_VMCS \|
2927	SECONDARY_EXEC_XSAVES;	2939	SECONDARY_EXEC_XSAVES \|
		2940	SECONDARY_EXEC_ENABLE_PML;
2928	if (adjust_vmx_controls(min2, opt2,	2941	if (adjust_vmx_controls(min2, opt2,
2929	MSR_IA32_VMX_PROCBASED_CTLS2,	2942	MSR_IA32_VMX_PROCBASED_CTLS2,
2930	&_cpu_based_2nd_exec_control) < 0)	2943	&_cpu_based_2nd_exec_control) < 0)
@@ -4355,6 +4368,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
4355	a current VMCS12	4368	a current VMCS12
4356	*/	4369	*/
4357	exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;	4370	exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
		4371	/* PML is enabled/disabled in creating/destorying vcpu */
		4372	exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
		4373
4358	return exec_control;	4374	return exec_control;
4359	}	4375	}
4360		4376
@@ -5942,6 +5958,20 @@ static __init int hardware_setup(void)
5942		5958
5943	update_ple_window_actual_max();	5959	update_ple_window_actual_max();
5944		5960
		5961	/*
		5962	* Only enable PML when hardware supports PML feature, and both EPT
		5963	* and EPT A/D bit features are enabled -- PML depends on them to work.
		5964	*/
		5965	if (!enable_ept \|\| !enable_ept_ad_bits \|\| !cpu_has_vmx_pml())
		5966	enable_pml = 0;
		5967
		5968	if (!enable_pml) {
		5969	kvm_x86_ops->slot_enable_log_dirty = NULL;
		5970	kvm_x86_ops->slot_disable_log_dirty = NULL;
		5971	kvm_x86_ops->flush_log_dirty = NULL;
		5972	kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
		5973	}
		5974
5945	return alloc_kvm_area();	5975	return alloc_kvm_area();
5946		5976
5947	out7:	5977	out7:
@@ -6971,6 +7001,31 @@ static bool vmx_test_pir(struct kvm_vcpu *vcpu, int vector)
6971	return pi_test_pir(vector, &vmx->pi_desc);	7001	return pi_test_pir(vector, &vmx->pi_desc);
6972	}	7002	}
6973		7003
		7004	static int handle_pml_full(struct kvm_vcpu *vcpu)
		7005	{
		7006	unsigned long exit_qualification;
		7007
		7008	trace_kvm_pml_full(vcpu->vcpu_id);
		7009
		7010	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
		7011
		7012	/*
		7013	* PML buffer FULL happened while executing iret from NMI,
		7014	* "blocked by NMI" bit has to be set before next VM entry.
		7015	*/
		7016	if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
		7017	cpu_has_virtual_nmis() &&
		7018	(exit_qualification & INTR_INFO_UNBLOCK_NMI))
		7019	vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
		7020	GUEST_INTR_STATE_NMI);
		7021
		7022	/*
		7023	* PML buffer already flushed at beginning of VMEXIT. Nothing to do
		7024	* here.., and there's no userspace involvement needed for PML.
		7025	*/
		7026	return 1;
		7027	}
		7028
6974	/*	7029	/*
6975	* The exit handlers return 1 if the exit was handled fully and guest execution	7030	* The exit handlers return 1 if the exit was handled fully and guest execution
6976	* may resume. Otherwise they set the kvm_run parameter to indicate what needs	7031	* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -7019,6 +7074,7 @@ static int (const kvm_vmx_exit_handlers[])(struct kvm_vcpu vcpu) = {
7019	[EXIT_REASON_INVVPID] = handle_invvpid,	7074	[EXIT_REASON_INVVPID] = handle_invvpid,
7020	[EXIT_REASON_XSAVES] = handle_xsaves,	7075	[EXIT_REASON_XSAVES] = handle_xsaves,
7021	[EXIT_REASON_XRSTORS] = handle_xrstors,	7076	[EXIT_REASON_XRSTORS] = handle_xrstors,
		7077	[EXIT_REASON_PML_FULL] = handle_pml_full,
7022	};	7078	};
7023		7079
7024	static const int kvm_vmx_max_exit_handlers =	7080	static const int kvm_vmx_max_exit_handlers =
@@ -7325,6 +7381,89 @@ static void vmx_get_exit_info(struct kvm_vcpu vcpu, u64 info1, u64 *info2)
7325	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);	7381	*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
7326	}	7382	}
7327		7383
		7384	static int vmx_enable_pml(struct vcpu_vmx *vmx)
		7385	{
		7386	struct page *pml_pg;
		7387	u32 exec_control;
		7388
		7389	pml_pg = alloc_page(GFP_KERNEL \| __GFP_ZERO);
		7390	if (!pml_pg)
		7391	return -ENOMEM;
		7392
		7393	vmx->pml_pg = pml_pg;
		7394
		7395	vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
		7396	vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
		7397
		7398	exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
		7399	exec_control \|= SECONDARY_EXEC_ENABLE_PML;
		7400	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
		7401
		7402	return 0;
		7403	}
		7404
		7405	static void vmx_disable_pml(struct vcpu_vmx *vmx)
		7406	{
		7407	u32 exec_control;
		7408
		7409	ASSERT(vmx->pml_pg);
		7410	__free_page(vmx->pml_pg);
		7411	vmx->pml_pg = NULL;
		7412
		7413	exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
		7414	exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
		7415	vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
		7416	}
		7417
		7418	static void vmx_flush_pml_buffer(struct vcpu_vmx *vmx)
		7419	{
		7420	struct kvm *kvm = vmx->vcpu.kvm;
		7421	u64 *pml_buf;
		7422	u16 pml_idx;
		7423
		7424	pml_idx = vmcs_read16(GUEST_PML_INDEX);
		7425
		7426	/* Do nothing if PML buffer is empty */
		7427	if (pml_idx == (PML_ENTITY_NUM - 1))
		7428	return;
		7429
		7430	/* PML index always points to next available PML buffer entity */
		7431	if (pml_idx >= PML_ENTITY_NUM)
		7432	pml_idx = 0;
		7433	else
		7434	pml_idx++;
		7435
		7436	pml_buf = page_address(vmx->pml_pg);
		7437	for (; pml_idx < PML_ENTITY_NUM; pml_idx++) {
		7438	u64 gpa;
		7439
		7440	gpa = pml_buf[pml_idx];
		7441	WARN_ON(gpa & (PAGE_SIZE - 1));
		7442	mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
		7443	}
		7444
		7445	/* reset PML index */
		7446	vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
		7447	}
		7448
		7449	/*
		7450	* Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
		7451	* Called before reporting dirty_bitmap to userspace.
		7452	*/
		7453	static void kvm_flush_pml_buffers(struct kvm *kvm)
		7454	{
		7455	int i;
		7456	struct kvm_vcpu *vcpu;
		7457	/*
		7458	* We only need to kick vcpu out of guest mode here, as PML buffer
		7459	* is flushed at beginning of all VMEXITs, and it's obvious that only
		7460	* vcpus running in guest are possible to have unflushed GPAs in PML
		7461	* buffer.
		7462	*/
		7463	kvm_for_each_vcpu(i, vcpu, kvm)
		7464	kvm_vcpu_kick(vcpu);
		7465	}
		7466
7328	/*	7467	/*
7329	* The guest has exited. See if we can fix it or if we need userspace	7468	* The guest has exited. See if we can fix it or if we need userspace
7330	* assistance.	7469	* assistance.
@@ -7335,6 +7474,16 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
7335	u32 exit_reason = vmx->exit_reason;	7474	u32 exit_reason = vmx->exit_reason;
7336	u32 vectoring_info = vmx->idt_vectoring_info;	7475	u32 vectoring_info = vmx->idt_vectoring_info;
7337		7476
		7477	/*
		7478	* Flush logged GPAs PML buffer, this will make dirty_bitmap more
		7479	* updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
		7480	* querying dirty_bitmap, we only need to kick all vcpus out of guest
		7481	* mode as if vcpus is in root mode, the PML buffer must has been
		7482	* flushed already.
		7483	*/
		7484	if (enable_pml)
		7485	vmx_flush_pml_buffer(vmx);
		7486
7338	/* If guest state is invalid, start emulating */	7487	/* If guest state is invalid, start emulating */
7339	if (vmx->emulation_required)	7488	if (vmx->emulation_required)
7340	return handle_invalid_guest_state(vcpu);	7489	return handle_invalid_guest_state(vcpu);
@@ -7981,6 +8130,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
7981	{	8130	{
7982	struct vcpu_vmx *vmx = to_vmx(vcpu);	8131	struct vcpu_vmx *vmx = to_vmx(vcpu);
7983		8132
		8133	if (enable_pml)
		8134	vmx_disable_pml(vmx);
7984	free_vpid(vmx);	8135	free_vpid(vmx);
7985	leave_guest_mode(vcpu);	8136	leave_guest_mode(vcpu);
7986	vmx_load_vmcs01(vcpu);	8137	vmx_load_vmcs01(vcpu);
@@ -8051,6 +8202,18 @@ static struct kvm_vcpu vmx_create_vcpu(struct kvm kvm, unsigned int id)
8051	vmx->nested.current_vmptr = -1ull;	8202	vmx->nested.current_vmptr = -1ull;
8052	vmx->nested.current_vmcs12 = NULL;	8203	vmx->nested.current_vmcs12 = NULL;
8053		8204
		8205	/*
		8206	* If PML is turned on, failure on enabling PML just results in failure
		8207	* of creating the vcpu, therefore we can simplify PML logic (by
		8208	* avoiding dealing with cases, such as enabling PML partially on vcpus
		8209	* for the guest, etc.
		8210	*/
		8211	if (enable_pml) {
		8212	err = vmx_enable_pml(vmx);
		8213	if (err)
		8214	goto free_vmcs;
		8215	}
		8216
8054	return &vmx->vcpu;	8217	return &vmx->vcpu;
8055		8218
8056	free_vmcs:	8219	free_vmcs:
@@ -9492,6 +9655,31 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
9492	shrink_ple_window(vcpu);	9655	shrink_ple_window(vcpu);
9493	}	9656	}
9494		9657
		9658	static void vmx_slot_enable_log_dirty(struct kvm *kvm,
		9659	struct kvm_memory_slot *slot)
		9660	{
		9661	kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
		9662	kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
		9663	}
		9664
		9665	static void vmx_slot_disable_log_dirty(struct kvm *kvm,
		9666	struct kvm_memory_slot *slot)
		9667	{
		9668	kvm_mmu_slot_set_dirty(kvm, slot);
		9669	}
		9670
		9671	static void vmx_flush_log_dirty(struct kvm *kvm)
		9672	{
		9673	kvm_flush_pml_buffers(kvm);
		9674	}
		9675
		9676	static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
		9677	struct kvm_memory_slot *memslot,
		9678	gfn_t offset, unsigned long mask)
		9679	{
		9680	kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
		9681	}
		9682
9495	static struct kvm_x86_ops vmx_x86_ops = {	9683	static struct kvm_x86_ops vmx_x86_ops = {
9496	.cpu_has_kvm_support = cpu_has_kvm_support,	9684	.cpu_has_kvm_support = cpu_has_kvm_support,
9497	.disabled_by_bios = vmx_disabled_by_bios,	9685	.disabled_by_bios = vmx_disabled_by_bios,
@@ -9601,6 +9789,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
9601	.check_nested_events = vmx_check_nested_events,	9789	.check_nested_events = vmx_check_nested_events,
9602		9790
9603	.sched_in = vmx_sched_in,	9791	.sched_in = vmx_sched_in,
		9792
		9793	.slot_enable_log_dirty = vmx_slot_enable_log_dirty,
		9794	.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
		9795	.flush_log_dirty = vmx_flush_log_dirty,
		9796	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
9604	};	9797	};
9605		9798
9606	static int __init vmx_init(void)	9799	static int __init vmx_init(void)


diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 442ee7d90946..1373e04e1f19 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c
@@ -7880,3 +7880,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
7880	EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);	7880	EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
7881	EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);	7881	EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
7882	EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);	7882	EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
		7883	EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);