KVM: VMX: Execute WBINVD to keep data consistency with assigned devices

Some guest device driver may leverage the "Non-Snoop" I/O, and explicitly WBINVD or CLFLUSH to a RAM space. Since migration may occur before WBINVD or CLFLUSH, we need to maintain data consistency either by: 1: flushing cache (wbinvd) when the guest is scheduled out if there is no wbinvd exit, or 2: execute wbinvd on all dirty physical CPUs when guest wbinvd exits. Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com> Signed-off-by: Sheng Yang <sheng@linux.intel.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
author: Sheng Yang <sheng@linux.intel.com> 2010-06-30 00:25:15 -0400
committer: Avi Kivity <avi@redhat.com> 2010-08-01 03:47:21 -0400
commit: f5f48ee15c2ee3e44cf429e34b16c6fa9b900246 (patch)
tree: 75496197219d9aeedd3317fa007cc3b2e414c5da /arch/x86/kvm/x86.c
parent: cf3e3d3e19868ca01da163200bbfc687523df0fc (diff)
1 files changed, 41 insertions, 0 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 27322d341232..3d72fc067059 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1783,8 +1783,28 @@ out:
        return r;
 }
+static void wbinvd_ipi(void *garbage)
+{
+        wbinvd();
+}
+static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
+{
+        return vcpu->kvm->arch.iommu_domain &&
+                !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
+}
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
+        /* Address WBINVD may be executed by guest */
+        if (need_emulate_wbinvd(vcpu)) {
+                if (kvm_x86_ops->has_wbinvd_exit())
+                        cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
+                else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
+                        smp_call_function_single(vcpu->cpu,
+                                        wbinvd_ipi, NULL, 1);
+        }
        kvm_x86_ops->vcpu_load(vcpu, cpu);
        if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
                unsigned long khz = cpufreq_quick_get(cpu);
@@ -3660,6 +3680,21 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
        return X86EMUL_CONTINUE;
 }
+int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+{
+        if (!need_emulate_wbinvd(vcpu))
+                return X86EMUL_CONTINUE;
+        if (kvm_x86_ops->has_wbinvd_exit()) {
+                smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
+                                wbinvd_ipi, NULL, 1);
+                cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
+        }
+        wbinvd();
+        return X86EMUL_CONTINUE;
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
 int emulate_clts(struct kvm_vcpu *vcpu)
 {
        kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
@@ -5263,6 +5298,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
                vcpu->arch.time_page = NULL;
        }
+        free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
        fx_free(vcpu);
        kvm_x86_ops->vcpu_free(vcpu);
 }
@@ -5392,7 +5428,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        }
        vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
+        if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
+                goto fail_free_mce_banks;
        return 0;
+fail_free_mce_banks:
+        kfree(vcpu->arch.mce_banks);
 fail_free_lapic:
        kvm_free_lapic(vcpu);
 fail_mmu_destroy:
author	Sheng Yang <sheng@linux.intel.com>	2010-06-30 00:25:15 -0400
committer	Avi Kivity <avi@redhat.com>	2010-08-01 03:47:21 -0400
commit	f5f48ee15c2ee3e44cf429e34b16c6fa9b900246 (patch)
tree	75496197219d9aeedd3317fa007cc3b2e414c5da /arch/x86/kvm/x86.c
parent	cf3e3d3e19868ca01da163200bbfc687523df0fc (diff)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 27322d341232..3d72fc067059 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c
@@ -1783,8 +1783,28 @@ out:
1783	return r;	1783	return r;
1784	}	1784	}
1785		1785
		1786	static void wbinvd_ipi(void *garbage)
		1787	{
		1788	wbinvd();
		1789	}
		1790
		1791	static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
		1792	{
		1793	return vcpu->kvm->arch.iommu_domain &&
		1794	!(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
		1795	}
		1796
1786	void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)	1797	void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1787	{	1798	{
		1799	/* Address WBINVD may be executed by guest */
		1800	if (need_emulate_wbinvd(vcpu)) {
		1801	if (kvm_x86_ops->has_wbinvd_exit())
		1802	cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
		1803	else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
		1804	smp_call_function_single(vcpu->cpu,
		1805	wbinvd_ipi, NULL, 1);
		1806	}
		1807
1788	kvm_x86_ops->vcpu_load(vcpu, cpu);	1808	kvm_x86_ops->vcpu_load(vcpu, cpu);
1789	if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {	1809	if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
1790	unsigned long khz = cpufreq_quick_get(cpu);	1810	unsigned long khz = cpufreq_quick_get(cpu);
@@ -3660,6 +3680,21 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
3660	return X86EMUL_CONTINUE;	3680	return X86EMUL_CONTINUE;
3661	}	3681	}
3662		3682
		3683	int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
		3684	{
		3685	if (!need_emulate_wbinvd(vcpu))
		3686	return X86EMUL_CONTINUE;
		3687
		3688	if (kvm_x86_ops->has_wbinvd_exit()) {
		3689	smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
		3690	wbinvd_ipi, NULL, 1);
		3691	cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
		3692	}
		3693	wbinvd();
		3694	return X86EMUL_CONTINUE;
		3695	}
		3696	EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
		3697
3663	int emulate_clts(struct kvm_vcpu *vcpu)	3698	int emulate_clts(struct kvm_vcpu *vcpu)
3664	{	3699	{
3665	kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));	3700	kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
@@ -5263,6 +5298,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
5263	vcpu->arch.time_page = NULL;	5298	vcpu->arch.time_page = NULL;
5264	}	5299	}
5265		5300
		5301	free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
5266	fx_free(vcpu);	5302	fx_free(vcpu);
5267	kvm_x86_ops->vcpu_free(vcpu);	5303	kvm_x86_ops->vcpu_free(vcpu);
5268	}	5304	}
@@ -5392,7 +5428,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
5392	}	5428	}
5393	vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;	5429	vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
5394		5430
		5431	if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
		5432	goto fail_free_mce_banks;
		5433
5395	return 0;	5434	return 0;
		5435	fail_free_mce_banks:
		5436	kfree(vcpu->arch.mce_banks);
5396	fail_free_lapic:	5437	fail_free_lapic:
5397	kvm_free_lapic(vcpu);	5438	kvm_free_lapic(vcpu);
5398	fail_mmu_destroy:	5439	fail_mmu_destroy: