From c41ef344de212bd918f7765af21b5008628c03e0 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 28 Oct 2008 18:16:58 -0200 Subject: KVM: MMU: increase per-vcpu rmap cache alloc size The page fault path can use two rmap_desc structures, if: - walk_addr's dirty pte update allocates one rmap_desc. - mmu_lock is dropped, sptes are zapped resulting in rmap_desc being freed. - fetch->mmu_set_spte allocates another rmap_desc. Increase to 4 for safety. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2a5e64881d9b..f1983d9477cd 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -314,7 +314,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) if (r) goto out; r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, - rmap_desc_cache, 1); + rmap_desc_cache, 4); if (r) goto out; r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); -- cgit v1.2.2 From ca93e992fdfdc6569ac2845d7560eeb5de4a4e0b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 4 Nov 2008 11:25:17 +0200 Subject: KVM: Require the PCI subsystem PCI device assignment makes calls to pci code, so require it to be built into the kernel. Signed-off-by: Avi Kivity --- arch/x86/kvm/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index ce3251ce5504..b81125f0bdee 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -20,6 +20,8 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM + # for device assignment: + depends on PCI select PREEMPT_NOTIFIERS select MMU_NOTIFIER select ANON_INODES -- cgit v1.2.2 From 928d4bf747e9c290b690ff515d8f81e8ee226d97 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 6 Nov 2008 14:55:45 +0800 Subject: KVM: VMX: Set IGMT bit in EPT entry There is a potential issue that, when guest using pagetable without vmexit when EPT enabled, guest would use PAT/PCD/PWT bits to index PAT msr for it's memory, which would be inconsistent with host side and would cause host MCE due to inconsistent cache attribute. The patch set IGMT bit in EPT entry to ignore guest PAT and use WB as default memory type to protect host (notice that all memory mapped by KVM should be WB). Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 3 ++- arch/x86/kvm/vmx.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2643b430d83a..d06b4dc0e2ea 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3564,7 +3564,8 @@ static int __init vmx_init(void) bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | VMX_EPT_WRITABLE_MASK | - VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT | + VMX_EPT_IGMT_BIT); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK); kvm_enable_tdp(); diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 3e010d21fdd7..ec5edc339da6 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -352,6 +352,7 @@ enum vmcs_field { #define VMX_EPT_READABLE_MASK 0x1ull #define VMX_EPT_WRITABLE_MASK 0x2ull #define VMX_EPT_EXECUTABLE_MASK 0x4ull +#define VMX_EPT_IGMT_BIT (1ull << 6) #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul -- cgit v1.2.2 From e17d1dc0863767bab8fde4ba9be92c7f79e7fe50 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 11 Nov 2008 13:09:36 +0200 Subject: KVM: Fix pit memory leak if unable to allocate irq source id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-By: Daniel Marjamäki Signed-off-by: Avi Kivity --- arch/x86/kvm/i8254.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 8772dc946823..59ebd37ad79e 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -548,8 +548,10 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) mutex_lock(&kvm->lock); pit->irq_source_id = kvm_request_irq_source_id(kvm); mutex_unlock(&kvm->lock); - if (pit->irq_source_id < 0) + if (pit->irq_source_id < 0) { + kfree(pit); return NULL; + } mutex_init(&pit->pit_state.lock); mutex_lock(&pit->pit_state.lock); -- cgit v1.2.2 From bd2b3ca7686d9470b1b58df631daa03179486182 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 20 Nov 2008 11:47:18 +0200 Subject: KVM: VMX: Fix interrupt loss during race with NMI If an interrupt cannot be injected for some reason (say, page fault when fetching the IDT descriptor), the interrupt is marked for reinjection. However, if an NMI is queued at this time, the NMI will be injected instead and the NMI will be lost. Fix by deferring the NMI injection until the interrupt has been injected successfully. Analyzed by Jan Kiszka. Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d06b4dc0e2ea..a4018b01e1f9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3149,7 +3149,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) if (cpu_has_virtual_nmis()) { if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { - if (vmx_nmi_enabled(vcpu)) { + if (vcpu->arch.interrupt.pending) { + enable_nmi_window(vcpu); + } else if (vmx_nmi_enabled(vcpu)) { vcpu->arch.nmi_pending = false; vcpu->arch.nmi_injected = true; } else { -- cgit v1.2.2 From 0c0f40bdbe4ddb48ebecfb5c2b56eeb175a57c45 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 21 Nov 2008 19:13:58 +0100 Subject: KVM: MMU: fix sync of ptes addressed at owner pagetable During page sync, if a pagetable contains a self referencing pte (that points to the pagetable), the corresponding spte may be marked as writable even though all mappings are supposed to be write protected. Fix by clearing page unsync before syncing individual sptes. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f1983d9477cd..410ddbc1aa2e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1038,13 +1038,13 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) } rmap_write_protect(vcpu->kvm, sp->gfn); + kvm_unlink_unsync_page(vcpu->kvm, sp); if (vcpu->arch.mmu.sync_page(vcpu, sp)) { kvm_mmu_zap_page(vcpu->kvm, sp); return 1; } kvm_mmu_flush_tlb(vcpu); - kvm_unlink_unsync_page(vcpu->kvm, sp); return 0; } -- cgit v1.2.2 From 6c475352e87224a8f0b8cc6f6cc96b30563dc5b4 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 25 Nov 2008 15:33:10 +0100 Subject: KVM: MMU: avoid creation of unreachable pages in the shadow It is possible for a shadow page to have a parent link pointing to a freed page. When zapping a high level table, kvm_mmu_page_unlink_children fails to remove the parent_pte link. For that to happen, the child must be unreachable via the shadow tree, which can happen in shadow_walk_entry if the guest pte was modified in between walk() and fetch(). Remove the parent pte reference in such case. Possible cause for oops in bug #2217430. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kvm') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 613ec9aa674a..84eee43bbe74 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -331,6 +331,7 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], &curr_pte, sizeof(curr_pte)); if (r || curr_pte != gw->ptes[level - 2]) { + kvm_mmu_put_page(shadow_page, sptep); kvm_release_pfn_clean(sw->pfn); sw->sptep = NULL; return 1; -- cgit v1.2.2