From 7567cae105e435b53e5a3e778546dd3ec53e3204 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 9 Mar 2010 12:01:10 +0200 Subject: KVM: take srcu lock before call to complete_pio() complete_pio() may use slot table which is protected by srcu. Signed-off-by: Gleb Natapov Cc: stable@kernel.org Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 24cd0ee896e9..2eb999dc9774 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4483,7 +4483,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_set_cr8(vcpu, kvm_run->cr8); if (vcpu->arch.pio.cur_count) { + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = complete_pio(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (r) goto out; } -- cgit v1.2.2 From b7af40433870aa0636932ad39b0c48a0cb319057 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 9 Mar 2010 14:55:19 +0900 Subject: KVM: SVM: Fix memory leaks that happen when svm_create_vcpu() fails svm_create_vcpu() does not free the pages allocated during the creation when it fails to complete the allocations. This patch fixes it. Signed-off-by: Takuya Yoshikawa Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 445c59411ed0..2ba58206812a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -706,29 +706,28 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) if (err) goto free_svm; + err = -ENOMEM; page = alloc_page(GFP_KERNEL); - if (!page) { - err = -ENOMEM; + if (!page) goto uninit; - } - err = -ENOMEM; msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); if (!msrpm_pages) - goto uninit; + goto free_page1; nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); if (!nested_msrpm_pages) - goto uninit; - - svm->msrpm = page_address(msrpm_pages); - svm_vcpu_init_msrpm(svm->msrpm); + goto free_page2; hsave_page = alloc_page(GFP_KERNEL); if (!hsave_page) - goto uninit; + goto free_page3; + svm->nested.hsave = page_address(hsave_page); + svm->msrpm = page_address(msrpm_pages); + svm_vcpu_init_msrpm(svm->msrpm); + svm->nested.msrpm = page_address(nested_msrpm_pages); svm->vmcb = page_address(page); @@ -744,6 +743,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) return &svm->vcpu; +free_page3: + __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); +free_page2: + __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); +free_page1: + __free_page(page); uninit: kvm_vcpu_uninit(&svm->vcpu); free_svm: -- cgit v1.2.2 From d6a23895aa82353788a1cc5a1d9a1c963465463e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 11 Mar 2010 12:20:03 +0200 Subject: KVM: Don't spam kernel log when injecting exceptions due to bad cr writes These are guest-triggerable. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2eb999dc9774..8f9b08d72c4d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -433,8 +433,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) #ifdef CONFIG_X86_64 if (cr0 & 0xffffffff00000000UL) { - printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", - cr0, kvm_read_cr0(vcpu)); kvm_inject_gp(vcpu, 0); return; } @@ -443,14 +441,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) cr0 &= ~CR0_RESERVED_BITS; if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { - printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); kvm_inject_gp(vcpu, 0); return; } if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { - printk(KERN_DEBUG "set_cr0: #GP, set PG flag " - "and a clear PE flag\n"); kvm_inject_gp(vcpu, 0); return; } @@ -461,15 +456,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) int cs_db, cs_l; if (!is_pae(vcpu)) { - printk(KERN_DEBUG "set_cr0: #GP, start paging " - "in long mode while PAE is disabled\n"); kvm_inject_gp(vcpu, 0); return; } kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); if (cs_l) { - printk(KERN_DEBUG "set_cr0: #GP, start paging " - "in long mode while CS.L == 1\n"); kvm_inject_gp(vcpu, 0); return; @@ -477,8 +468,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } else #endif if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { - printk(KERN_DEBUG "set_cr0: #GP, pdptrs " - "reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } @@ -505,28 +494,23 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; if (cr4 & CR4_RESERVED_BITS) { - printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } if (is_long_mode(vcpu)) { if (!(cr4 & X86_CR4_PAE)) { - printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " - "in long mode\n"); kvm_inject_gp(vcpu, 0); return; } } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) && ((cr4 ^ old_cr4) & pdptr_bits) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { - printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } if (cr4 & X86_CR4_VMXE) { - printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); kvm_inject_gp(vcpu, 0); return; } @@ -547,21 +531,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (is_long_mode(vcpu)) { if (cr3 & CR3_L_MODE_RESERVED_BITS) { - printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } } else { if (is_pae(vcpu)) { if (cr3 & CR3_PAE_RESERVED_BITS) { - printk(KERN_DEBUG - "set_cr3: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { - printk(KERN_DEBUG "set_cr3: #GP, pdptrs " - "reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } @@ -593,7 +572,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3); void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { if (cr8 & CR8_RESERVED_BITS) { - printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); kvm_inject_gp(vcpu, 0); return; } @@ -649,15 +627,12 @@ static u32 emulated_msrs[] = { static void set_efer(struct kvm_vcpu *vcpu, u64 efer) { if (efer & efer_reserved_bits) { - printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", - efer); kvm_inject_gp(vcpu, 0); return; } if (is_paging(vcpu) && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { - printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); kvm_inject_gp(vcpu, 0); return; } @@ -667,7 +642,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { - printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n"); kvm_inject_gp(vcpu, 0); return; } @@ -678,7 +652,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { - printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n"); kvm_inject_gp(vcpu, 0); return; } -- cgit v1.2.2 From 114be429c8cd44e57f312af2bbd6734e5a185b0d Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 24 Mar 2010 17:46:42 +0100 Subject: KVM: allow bit 10 to be cleared in MSR_IA32_MC4_CTL There is a quirk for AMD K8 CPUs in many Linux kernels (see arch/x86/kernel/cpu/mcheck/mce.c:__mcheck_cpu_apply_quirks()) that clears bit 10 in that MCE related MSR. KVM can only cope with all zeros or all ones, so it will inject a #GP into the guest, which will let it panic. So lets add a quirk to the quirk and ignore this single cleared bit. This fixes -cpu kvm64 on all machines and -cpu host on K8 machines with some guest Linux kernels. Signed-off-by: Andre Przywara Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8f9b08d72c4d..9ad3d064c781 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -940,9 +940,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MC0_CTL + 4 * bank_num) { u32 offset = msr - MSR_IA32_MC0_CTL; - /* only 0 or all 1s can be written to IA32_MCi_CTL */ + /* only 0 or all 1s can be written to IA32_MCi_CTL + * some Linux kernels though clear bit 10 in bank 4 to + * workaround a BIOS/GART TBL issue on AMD K8s, ignore + * this to avoid an uncatched #GP in the guest + */ if ((offset & 0x3) == 0 && - data != 0 && data != ~(u64)0) + data != 0 && (data | (1 << 10)) != ~(u64)0) return -1; vcpu->arch.mce_banks[offset] = data; break; -- cgit v1.2.2 From 78ac8b47c566dd6177a3b9b291b756ccb70670b7 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 8 Apr 2010 18:19:35 +0300 Subject: KVM: VMX: Save/restore rflags.vm correctly in real mode Currently we set eflags.vm unconditionally when entering real mode emulation through virtual-8086 mode, and clear it unconditionally when we enter protected mode. The means that the following sequence KVM_SET_REGS (rflags.vm=1) KVM_SET_SREGS (cr0.pe=1) Ends up with rflags.vm clear due to KVM_SET_SREGS triggering enter_pmode(). Fix by shadowing rflags.vm (and rflags.iopl) correctly while in real mode: reads and writes to those bits access a shadow register instead of the actual register. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 686492ed3079..bc933cfb4e66 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -77,6 +77,8 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) +#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) + /* * These 2 parameters are used to config the controls for Pause-Loop Exiting: * ple_gap: upper bound on the amount of time between two successive @@ -131,7 +133,7 @@ struct vcpu_vmx { } host_state; struct { int vm86_active; - u8 save_iopl; + ulong save_rflags; struct kvm_save_segment { u16 selector; unsigned long base; @@ -818,18 +820,23 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { - unsigned long rflags; + unsigned long rflags, save_rflags; rflags = vmcs_readl(GUEST_RFLAGS); - if (to_vmx(vcpu)->rmode.vm86_active) - rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); + if (to_vmx(vcpu)->rmode.vm86_active) { + rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; + save_rflags = to_vmx(vcpu)->rmode.save_rflags; + rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + } return rflags; } static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { - if (to_vmx(vcpu)->rmode.vm86_active) + if (to_vmx(vcpu)->rmode.vm86_active) { + to_vmx(vcpu)->rmode.save_rflags = rflags; rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; + } vmcs_writel(GUEST_RFLAGS, rflags); } @@ -1483,8 +1490,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); flags = vmcs_readl(GUEST_RFLAGS); - flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); - flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); + flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; + flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; vmcs_writel(GUEST_RFLAGS, flags); vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | @@ -1557,8 +1564,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); flags = vmcs_readl(GUEST_RFLAGS); - vmx->rmode.save_iopl - = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; + vmx->rmode.save_rflags = flags; flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; -- cgit v1.2.2 From 77662e0028c7c63e34257fda03ff9625c59d939d Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 16 Apr 2010 16:34:42 +0800 Subject: KVM: MMU: fix kvm_mmu_zap_page() and its calling path This patch fix: - calculate zapped page number properly in mmu_zap_unsync_children() - calculate freeed page number properly kvm_mmu_change_mmu_pages() - if zapped children page it shoud restart hlist walking KVM-Stable-Tag. Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 48aeee8eefb0..19a8906bcaa2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1490,8 +1490,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm, for_each_sp(pages, sp, parents, i) { kvm_mmu_zap_page(kvm, sp); mmu_pages_clear_parents(&parents); + zapped++; } - zapped += pages.nr; kvm_mmu_pages_init(parent, &parents, &pages); } @@ -1542,14 +1542,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) */ if (used_pages > kvm_nr_mmu_pages) { - while (used_pages > kvm_nr_mmu_pages) { + while (used_pages > kvm_nr_mmu_pages && + !list_empty(&kvm->arch.active_mmu_pages)) { struct kvm_mmu_page *page; page = container_of(kvm->arch.active_mmu_pages.prev, struct kvm_mmu_page, link); - kvm_mmu_zap_page(kvm, page); + used_pages -= kvm_mmu_zap_page(kvm, page); used_pages--; } + kvm_nr_mmu_pages = used_pages; kvm->arch.n_free_mmu_pages = 0; } else @@ -1596,7 +1598,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) && !sp->role.invalid) { pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); - kvm_mmu_zap_page(kvm, sp); + if (kvm_mmu_zap_page(kvm, sp)) + nn = bucket->first; } } } -- cgit v1.2.2 From 87bf6e7de1134f48681fd2ce4b7c1ec45458cb6d Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Mon, 12 Apr 2010 19:35:35 +0900 Subject: KVM: fix the handling of dirty bitmaps to avoid overflows Int is not long enough to store the size of a dirty bitmap. This patch fixes this problem with the introduction of a wrapper function to calculate the sizes of dirty bitmaps. Note: in mark_page_dirty(), we have to consider the fact that __set_bit() takes the offset as int, not long. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/ia64/kvm/kvm-ia64.c | 9 +++++---- arch/powerpc/kvm/book3s.c | 5 +++-- arch/x86/kvm/x86.c | 5 +++-- include/linux/kvm_host.h | 5 +++++ virt/kvm/kvm_main.c | 13 ++++++++----- 5 files changed, 24 insertions(+), 13 deletions(-) diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 73c5c2b05f64..7f3c0a2e60cd 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1802,7 +1802,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm, { struct kvm_memory_slot *memslot; int r, i; - long n, base; + long base; + unsigned long n; unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); @@ -1815,7 +1816,7 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); base = memslot->base_gfn / BITS_PER_LONG; for (i = 0; i < n/sizeof(long); ++i) { @@ -1831,7 +1832,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { int r; - int n; + unsigned long n; struct kvm_memory_slot *memslot; int is_dirty = 0; @@ -1850,7 +1851,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (is_dirty) { kvm_flush_remote_tlbs(kvm); memslot = &kvm->memslots->memslots[log->slot]; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); memset(memslot->dirty_bitmap, 0, n); } r = 0; diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 25da07fd9f77..604af29b71ed 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -1004,7 +1004,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_vcpu *vcpu; ulong ga, ga_end; int is_dirty = 0; - int r, n; + int r; + unsigned long n; mutex_lock(&kvm->slots_lock); @@ -1022,7 +1023,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, kvm_for_each_vcpu(n, vcpu, kvm) kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); memset(memslot->dirty_bitmap, 0, n); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9ad3d064c781..45aa90f8cc57 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2612,8 +2612,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { - int r, n, i; + int r, i; struct kvm_memory_slot *memslot; + unsigned long n; unsigned long is_dirty = 0; unsigned long *dirty_bitmap = NULL; @@ -2628,7 +2629,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); r = -ENOMEM; dirty_bitmap = vmalloc(n); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a3fd0f91d943..9ad825e1c79b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -119,6 +119,11 @@ struct kvm_memory_slot { int user_alloc; }; +static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) +{ + return ALIGN(memslot->npages, BITS_PER_LONG) / 8; +} + struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5a0cd194dce0..364daacafb58 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -648,7 +648,7 @@ skip_lpage: /* Allocate page dirty bitmap if needed */ if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { - unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; + unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new); new.dirty_bitmap = vmalloc(dirty_bytes); if (!new.dirty_bitmap) @@ -768,7 +768,7 @@ int kvm_get_dirty_log(struct kvm *kvm, { struct kvm_memory_slot *memslot; int r, i; - int n; + unsigned long n; unsigned long any = 0; r = -EINVAL; @@ -780,7 +780,7 @@ int kvm_get_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); for (i = 0; !any && i < n/sizeof(long); ++i) any = memslot->dirty_bitmap[i]; @@ -1186,10 +1186,13 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) memslot = gfn_to_memslot_unaliased(kvm, gfn); if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; + unsigned long *p = memslot->dirty_bitmap + + rel_gfn / BITS_PER_LONG; + int offset = rel_gfn % BITS_PER_LONG; /* avoid RMW */ - if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap)) - generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); + if (!generic_test_le_bit(offset, p)) + generic___set_le_bit(offset, p); } } -- cgit v1.2.2 From e80e2a60ff7914dae691345a976c80bbbff3ec74 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Tue, 30 Mar 2010 16:48:25 -0700 Subject: KVM: Increase NR_IOBUS_DEVS limit to 200 This patch increases the current hardcoded limit of NR_IOBUS_DEVS from 6 to 200. We are hitting this limit when creating a guest with more than 1 virtio-net device using vhost-net backend. Each virtio-net device requires 2 such devices to service notifications from rx/tx queues. Signed-off-by: Sridhar Samudrala Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9ad825e1c79b..169d07758ee5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -54,7 +54,7 @@ extern struct kmem_cache *kvm_vcpu_cache; */ struct kvm_io_bus { int dev_count; -#define NR_IOBUS_DEVS 6 +#define NR_IOBUS_DEVS 200 struct kvm_io_device *devs[NR_IOBUS_DEVS]; }; -- cgit v1.2.2 From eda2beda835697878e309b6049aa0a3bad051590 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 20 Apr 2010 14:29:29 +0800 Subject: KVM: Add missing srcu_read_lock() for kvm_mmu_notifier_release() I got this dmesg due to srcu_read_lock() is missing in kvm_mmu_notifier_release(). =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- arch/x86/kvm/x86.h:72 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 2 locks held by qemu-system-x86/3100: #0: (rcu_read_lock){.+.+..}, at: [] __mmu_notifier_release+0x38/0xdf #1: (&(&kvm->mmu_lock)->rlock){+.+...}, at: [] kvm_mmu_zap_all+0x21/0x5e [kvm] stack backtrace: Pid: 3100, comm: qemu-system-x86 Not tainted 2.6.34-rc3-22949-gbc8a97a-dirty #2 Call Trace: [] lockdep_rcu_dereference+0xaa/0xb3 [] unalias_gfn+0x56/0xab [kvm] [] gfn_to_memslot+0x16/0x25 [kvm] [] gfn_to_rmap+0x17/0x6e [kvm] [] rmap_remove+0xa0/0x19d [kvm] [] kvm_mmu_zap_page+0x109/0x34d [kvm] [] kvm_mmu_zap_all+0x35/0x5e [kvm] [] kvm_arch_flush_shadow+0x16/0x22 [kvm] [] kvm_mmu_notifier_release+0x15/0x17 [kvm] [] __mmu_notifier_release+0x88/0xdf [] ? __mmu_notifier_release+0x38/0xdf [] ? exit_mm+0xe0/0x115 [] exit_mmap+0x2c/0x17e [] mmput+0x2d/0xd4 [] exit_mm+0x108/0x115 [...] Signed-off-by: Lai Jiangshan Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 364daacafb58..c82ae2492634 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -341,7 +341,11 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct kvm *kvm = mmu_notifier_to_kvm(mn); + int idx; + + idx = srcu_read_lock(&kvm->srcu); kvm_arch_flush_shadow(kvm); + srcu_read_unlock(&kvm->srcu, idx); } static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { -- cgit v1.2.2 From e8861cfe2c75bdce36655b64d7ce02c2b31b604d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 14 Apr 2010 16:57:11 +0200 Subject: KVM: x86: Fix TSS size check for 16-bit tasks A 16-bit TSS is only 44 bytes long. So make sure to test for the correct size on task switch. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 45aa90f8cc57..3c4ca98ad27f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5126,6 +5126,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) int ret = 0; u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); + u32 desc_limit; old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); @@ -5148,7 +5149,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) } } - if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { + desc_limit = get_desc_limit(&nseg_desc); + if (!nseg_desc.p || + ((desc_limit < 0x67 && (nseg_desc.type & 8)) || + desc_limit < 0x2b)) { kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); return 1; } -- cgit v1.2.2