aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-04-21 15:29:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-04-21 15:29:46 -0400
commita486b0af797e19ff79c6943cc8986111f2569fe8 (patch)
tree64c840c3d13ef83e774fa68356324e3544ac0bac
parent1519ae4dc785995c548bca98b75190b9f0667dde (diff)
parente8861cfe2c75bdce36655b64d7ce02c2b31b604d (diff)
Merge branch 'kvm-updates/2.6.34' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.34' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: Fix TSS size check for 16-bit tasks KVM: Add missing srcu_read_lock() for kvm_mmu_notifier_release() KVM: Increase NR_IOBUS_DEVS limit to 200 KVM: fix the handling of dirty bitmaps to avoid overflows KVM: MMU: fix kvm_mmu_zap_page() and its calling path KVM: VMX: Save/restore rflags.vm correctly in real mode KVM: allow bit 10 to be cleared in MSR_IA32_MC4_CTL KVM: Don't spam kernel log when injecting exceptions due to bad cr writes KVM: SVM: Fix memory leaks that happen when svm_create_vcpu() fails KVM: take srcu lock before call to complete_pio()
-rw-r--r--arch/ia64/kvm/kvm-ia64.c9
-rw-r--r--arch/powerpc/kvm/book3s.c5
-rw-r--r--arch/x86/kvm/mmu.c11
-rw-r--r--arch/x86/kvm/svm.c25
-rw-r--r--arch/x86/kvm/vmx.c24
-rw-r--r--arch/x86/kvm/x86.c48
-rw-r--r--include/linux/kvm_host.h7
-rw-r--r--virt/kvm/kvm_main.c17
8 files changed, 79 insertions, 67 deletions
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 73c5c2b05f64..7f3c0a2e60cd 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1802,7 +1802,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
1802{ 1802{
1803 struct kvm_memory_slot *memslot; 1803 struct kvm_memory_slot *memslot;
1804 int r, i; 1804 int r, i;
1805 long n, base; 1805 long base;
1806 unsigned long n;
1806 unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + 1807 unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
1807 offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); 1808 offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
1808 1809
@@ -1815,7 +1816,7 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
1815 if (!memslot->dirty_bitmap) 1816 if (!memslot->dirty_bitmap)
1816 goto out; 1817 goto out;
1817 1818
1818 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 1819 n = kvm_dirty_bitmap_bytes(memslot);
1819 base = memslot->base_gfn / BITS_PER_LONG; 1820 base = memslot->base_gfn / BITS_PER_LONG;
1820 1821
1821 for (i = 0; i < n/sizeof(long); ++i) { 1822 for (i = 0; i < n/sizeof(long); ++i) {
@@ -1831,7 +1832,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1831 struct kvm_dirty_log *log) 1832 struct kvm_dirty_log *log)
1832{ 1833{
1833 int r; 1834 int r;
1834 int n; 1835 unsigned long n;
1835 struct kvm_memory_slot *memslot; 1836 struct kvm_memory_slot *memslot;
1836 int is_dirty = 0; 1837 int is_dirty = 0;
1837 1838
@@ -1850,7 +1851,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1850 if (is_dirty) { 1851 if (is_dirty) {
1851 kvm_flush_remote_tlbs(kvm); 1852 kvm_flush_remote_tlbs(kvm);
1852 memslot = &kvm->memslots->memslots[log->slot]; 1853 memslot = &kvm->memslots->memslots[log->slot];
1853 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 1854 n = kvm_dirty_bitmap_bytes(memslot);
1854 memset(memslot->dirty_bitmap, 0, n); 1855 memset(memslot->dirty_bitmap, 0, n);
1855 } 1856 }
1856 r = 0; 1857 r = 0;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 25da07fd9f77..604af29b71ed 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -1004,7 +1004,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1004 struct kvm_vcpu *vcpu; 1004 struct kvm_vcpu *vcpu;
1005 ulong ga, ga_end; 1005 ulong ga, ga_end;
1006 int is_dirty = 0; 1006 int is_dirty = 0;
1007 int r, n; 1007 int r;
1008 unsigned long n;
1008 1009
1009 mutex_lock(&kvm->slots_lock); 1010 mutex_lock(&kvm->slots_lock);
1010 1011
@@ -1022,7 +1023,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1022 kvm_for_each_vcpu(n, vcpu, kvm) 1023 kvm_for_each_vcpu(n, vcpu, kvm)
1023 kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); 1024 kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
1024 1025
1025 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 1026 n = kvm_dirty_bitmap_bytes(memslot);
1026 memset(memslot->dirty_bitmap, 0, n); 1027 memset(memslot->dirty_bitmap, 0, n);
1027 } 1028 }
1028 1029
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 48aeee8eefb0..19a8906bcaa2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1490,8 +1490,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
1490 for_each_sp(pages, sp, parents, i) { 1490 for_each_sp(pages, sp, parents, i) {
1491 kvm_mmu_zap_page(kvm, sp); 1491 kvm_mmu_zap_page(kvm, sp);
1492 mmu_pages_clear_parents(&parents); 1492 mmu_pages_clear_parents(&parents);
1493 zapped++;
1493 } 1494 }
1494 zapped += pages.nr;
1495 kvm_mmu_pages_init(parent, &parents, &pages); 1495 kvm_mmu_pages_init(parent, &parents, &pages);
1496 } 1496 }
1497 1497
@@ -1542,14 +1542,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
1542 */ 1542 */
1543 1543
1544 if (used_pages > kvm_nr_mmu_pages) { 1544 if (used_pages > kvm_nr_mmu_pages) {
1545 while (used_pages > kvm_nr_mmu_pages) { 1545 while (used_pages > kvm_nr_mmu_pages &&
1546 !list_empty(&kvm->arch.active_mmu_pages)) {
1546 struct kvm_mmu_page *page; 1547 struct kvm_mmu_page *page;
1547 1548
1548 page = container_of(kvm->arch.active_mmu_pages.prev, 1549 page = container_of(kvm->arch.active_mmu_pages.prev,
1549 struct kvm_mmu_page, link); 1550 struct kvm_mmu_page, link);
1550 kvm_mmu_zap_page(kvm, page); 1551 used_pages -= kvm_mmu_zap_page(kvm, page);
1551 used_pages--; 1552 used_pages--;
1552 } 1553 }
1554 kvm_nr_mmu_pages = used_pages;
1553 kvm->arch.n_free_mmu_pages = 0; 1555 kvm->arch.n_free_mmu_pages = 0;
1554 } 1556 }
1555 else 1557 else
@@ -1596,7 +1598,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
1596 && !sp->role.invalid) { 1598 && !sp->role.invalid) {
1597 pgprintk("%s: zap %lx %x\n", 1599 pgprintk("%s: zap %lx %x\n",
1598 __func__, gfn, sp->role.word); 1600 __func__, gfn, sp->role.word);
1599 kvm_mmu_zap_page(kvm, sp); 1601 if (kvm_mmu_zap_page(kvm, sp))
1602 nn = bucket->first;
1600 } 1603 }
1601 } 1604 }
1602} 1605}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 445c59411ed0..2ba58206812a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -706,29 +706,28 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
706 if (err) 706 if (err)
707 goto free_svm; 707 goto free_svm;
708 708
709 err = -ENOMEM;
709 page = alloc_page(GFP_KERNEL); 710 page = alloc_page(GFP_KERNEL);
710 if (!page) { 711 if (!page)
711 err = -ENOMEM;
712 goto uninit; 712 goto uninit;
713 }
714 713
715 err = -ENOMEM;
716 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 714 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
717 if (!msrpm_pages) 715 if (!msrpm_pages)
718 goto uninit; 716 goto free_page1;
719 717
720 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 718 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
721 if (!nested_msrpm_pages) 719 if (!nested_msrpm_pages)
722 goto uninit; 720 goto free_page2;
723
724 svm->msrpm = page_address(msrpm_pages);
725 svm_vcpu_init_msrpm(svm->msrpm);
726 721
727 hsave_page = alloc_page(GFP_KERNEL); 722 hsave_page = alloc_page(GFP_KERNEL);
728 if (!hsave_page) 723 if (!hsave_page)
729 goto uninit; 724 goto free_page3;
725
730 svm->nested.hsave = page_address(hsave_page); 726 svm->nested.hsave = page_address(hsave_page);
731 727
728 svm->msrpm = page_address(msrpm_pages);
729 svm_vcpu_init_msrpm(svm->msrpm);
730
732 svm->nested.msrpm = page_address(nested_msrpm_pages); 731 svm->nested.msrpm = page_address(nested_msrpm_pages);
733 732
734 svm->vmcb = page_address(page); 733 svm->vmcb = page_address(page);
@@ -744,6 +743,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
744 743
745 return &svm->vcpu; 744 return &svm->vcpu;
746 745
746free_page3:
747 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
748free_page2:
749 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
750free_page1:
751 __free_page(page);
747uninit: 752uninit:
748 kvm_vcpu_uninit(&svm->vcpu); 753 kvm_vcpu_uninit(&svm->vcpu);
749free_svm: 754free_svm:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 686492ed3079..bc933cfb4e66 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -77,6 +77,8 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
77#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) 77#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
78#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) 78#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
79 79
80#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
81
80/* 82/*
81 * These 2 parameters are used to config the controls for Pause-Loop Exiting: 83 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
82 * ple_gap: upper bound on the amount of time between two successive 84 * ple_gap: upper bound on the amount of time between two successive
@@ -131,7 +133,7 @@ struct vcpu_vmx {
131 } host_state; 133 } host_state;
132 struct { 134 struct {
133 int vm86_active; 135 int vm86_active;
134 u8 save_iopl; 136 ulong save_rflags;
135 struct kvm_save_segment { 137 struct kvm_save_segment {
136 u16 selector; 138 u16 selector;
137 unsigned long base; 139 unsigned long base;
@@ -818,18 +820,23 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
818 820
819static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 821static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
820{ 822{
821 unsigned long rflags; 823 unsigned long rflags, save_rflags;
822 824
823 rflags = vmcs_readl(GUEST_RFLAGS); 825 rflags = vmcs_readl(GUEST_RFLAGS);
824 if (to_vmx(vcpu)->rmode.vm86_active) 826 if (to_vmx(vcpu)->rmode.vm86_active) {
825 rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); 827 rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
828 save_rflags = to_vmx(vcpu)->rmode.save_rflags;
829 rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
830 }
826 return rflags; 831 return rflags;
827} 832}
828 833
829static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 834static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
830{ 835{
831 if (to_vmx(vcpu)->rmode.vm86_active) 836 if (to_vmx(vcpu)->rmode.vm86_active) {
837 to_vmx(vcpu)->rmode.save_rflags = rflags;
832 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 838 rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
839 }
833 vmcs_writel(GUEST_RFLAGS, rflags); 840 vmcs_writel(GUEST_RFLAGS, rflags);
834} 841}
835 842
@@ -1483,8 +1490,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1483 vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); 1490 vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
1484 1491
1485 flags = vmcs_readl(GUEST_RFLAGS); 1492 flags = vmcs_readl(GUEST_RFLAGS);
1486 flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); 1493 flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
1487 flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); 1494 flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;
1488 vmcs_writel(GUEST_RFLAGS, flags); 1495 vmcs_writel(GUEST_RFLAGS, flags);
1489 1496
1490 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | 1497 vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) |
@@ -1557,8 +1564,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
1557 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); 1564 vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
1558 1565
1559 flags = vmcs_readl(GUEST_RFLAGS); 1566 flags = vmcs_readl(GUEST_RFLAGS);
1560 vmx->rmode.save_iopl 1567 vmx->rmode.save_rflags = flags;
1561 = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1562 1568
1563 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; 1569 flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM;
1564 1570
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 24cd0ee896e9..3c4ca98ad27f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -433,8 +433,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
433 433
434#ifdef CONFIG_X86_64 434#ifdef CONFIG_X86_64
435 if (cr0 & 0xffffffff00000000UL) { 435 if (cr0 & 0xffffffff00000000UL) {
436 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
437 cr0, kvm_read_cr0(vcpu));
438 kvm_inject_gp(vcpu, 0); 436 kvm_inject_gp(vcpu, 0);
439 return; 437 return;
440 } 438 }
@@ -443,14 +441,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
443 cr0 &= ~CR0_RESERVED_BITS; 441 cr0 &= ~CR0_RESERVED_BITS;
444 442
445 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { 443 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
446 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
447 kvm_inject_gp(vcpu, 0); 444 kvm_inject_gp(vcpu, 0);
448 return; 445 return;
449 } 446 }
450 447
451 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { 448 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
452 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
453 "and a clear PE flag\n");
454 kvm_inject_gp(vcpu, 0); 449 kvm_inject_gp(vcpu, 0);
455 return; 450 return;
456 } 451 }
@@ -461,15 +456,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
461 int cs_db, cs_l; 456 int cs_db, cs_l;
462 457
463 if (!is_pae(vcpu)) { 458 if (!is_pae(vcpu)) {
464 printk(KERN_DEBUG "set_cr0: #GP, start paging "
465 "in long mode while PAE is disabled\n");
466 kvm_inject_gp(vcpu, 0); 459 kvm_inject_gp(vcpu, 0);
467 return; 460 return;
468 } 461 }
469 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 462 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
470 if (cs_l) { 463 if (cs_l) {
471 printk(KERN_DEBUG "set_cr0: #GP, start paging "
472 "in long mode while CS.L == 1\n");
473 kvm_inject_gp(vcpu, 0); 464 kvm_inject_gp(vcpu, 0);
474 return; 465 return;
475 466
@@ -477,8 +468,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
477 } else 468 } else
478#endif 469#endif
479 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 470 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
480 printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
481 "reserved bits\n");
482 kvm_inject_gp(vcpu, 0); 471 kvm_inject_gp(vcpu, 0);
483 return; 472 return;
484 } 473 }
@@ -505,28 +494,23 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
505 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 494 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
506 495
507 if (cr4 & CR4_RESERVED_BITS) { 496 if (cr4 & CR4_RESERVED_BITS) {
508 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
509 kvm_inject_gp(vcpu, 0); 497 kvm_inject_gp(vcpu, 0);
510 return; 498 return;
511 } 499 }
512 500
513 if (is_long_mode(vcpu)) { 501 if (is_long_mode(vcpu)) {
514 if (!(cr4 & X86_CR4_PAE)) { 502 if (!(cr4 & X86_CR4_PAE)) {
515 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
516 "in long mode\n");
517 kvm_inject_gp(vcpu, 0); 503 kvm_inject_gp(vcpu, 0);
518 return; 504 return;
519 } 505 }
520 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) 506 } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
521 && ((cr4 ^ old_cr4) & pdptr_bits) 507 && ((cr4 ^ old_cr4) & pdptr_bits)
522 && !load_pdptrs(vcpu, vcpu->arch.cr3)) { 508 && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
523 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
524 kvm_inject_gp(vcpu, 0); 509 kvm_inject_gp(vcpu, 0);
525 return; 510 return;
526 } 511 }
527 512
528 if (cr4 & X86_CR4_VMXE) { 513 if (cr4 & X86_CR4_VMXE) {
529 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
530 kvm_inject_gp(vcpu, 0); 514 kvm_inject_gp(vcpu, 0);
531 return; 515 return;
532 } 516 }
@@ -547,21 +531,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
547 531
548 if (is_long_mode(vcpu)) { 532 if (is_long_mode(vcpu)) {
549 if (cr3 & CR3_L_MODE_RESERVED_BITS) { 533 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
550 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
551 kvm_inject_gp(vcpu, 0); 534 kvm_inject_gp(vcpu, 0);
552 return; 535 return;
553 } 536 }
554 } else { 537 } else {
555 if (is_pae(vcpu)) { 538 if (is_pae(vcpu)) {
556 if (cr3 & CR3_PAE_RESERVED_BITS) { 539 if (cr3 & CR3_PAE_RESERVED_BITS) {
557 printk(KERN_DEBUG
558 "set_cr3: #GP, reserved bits\n");
559 kvm_inject_gp(vcpu, 0); 540 kvm_inject_gp(vcpu, 0);
560 return; 541 return;
561 } 542 }
562 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { 543 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
563 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
564 "reserved bits\n");
565 kvm_inject_gp(vcpu, 0); 544 kvm_inject_gp(vcpu, 0);
566 return; 545 return;
567 } 546 }
@@ -593,7 +572,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3);
593void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) 572void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
594{ 573{
595 if (cr8 & CR8_RESERVED_BITS) { 574 if (cr8 & CR8_RESERVED_BITS) {
596 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
597 kvm_inject_gp(vcpu, 0); 575 kvm_inject_gp(vcpu, 0);
598 return; 576 return;
599 } 577 }
@@ -649,15 +627,12 @@ static u32 emulated_msrs[] = {
649static void set_efer(struct kvm_vcpu *vcpu, u64 efer) 627static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
650{ 628{
651 if (efer & efer_reserved_bits) { 629 if (efer & efer_reserved_bits) {
652 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
653 efer);
654 kvm_inject_gp(vcpu, 0); 630 kvm_inject_gp(vcpu, 0);
655 return; 631 return;
656 } 632 }
657 633
658 if (is_paging(vcpu) 634 if (is_paging(vcpu)
659 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { 635 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
660 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
661 kvm_inject_gp(vcpu, 0); 636 kvm_inject_gp(vcpu, 0);
662 return; 637 return;
663 } 638 }
@@ -667,7 +642,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
667 642
668 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 643 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
669 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { 644 if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) {
670 printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");
671 kvm_inject_gp(vcpu, 0); 645 kvm_inject_gp(vcpu, 0);
672 return; 646 return;
673 } 647 }
@@ -678,7 +652,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
678 652
679 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 653 feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
680 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { 654 if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
681 printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
682 kvm_inject_gp(vcpu, 0); 655 kvm_inject_gp(vcpu, 0);
683 return; 656 return;
684 } 657 }
@@ -967,9 +940,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
967 if (msr >= MSR_IA32_MC0_CTL && 940 if (msr >= MSR_IA32_MC0_CTL &&
968 msr < MSR_IA32_MC0_CTL + 4 * bank_num) { 941 msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
969 u32 offset = msr - MSR_IA32_MC0_CTL; 942 u32 offset = msr - MSR_IA32_MC0_CTL;
970 /* only 0 or all 1s can be written to IA32_MCi_CTL */ 943 /* only 0 or all 1s can be written to IA32_MCi_CTL
944 * some Linux kernels though clear bit 10 in bank 4 to
945 * workaround a BIOS/GART TBL issue on AMD K8s, ignore
946 * this to avoid an uncatched #GP in the guest
947 */
971 if ((offset & 0x3) == 0 && 948 if ((offset & 0x3) == 0 &&
972 data != 0 && data != ~(u64)0) 949 data != 0 && (data | (1 << 10)) != ~(u64)0)
973 return -1; 950 return -1;
974 vcpu->arch.mce_banks[offset] = data; 951 vcpu->arch.mce_banks[offset] = data;
975 break; 952 break;
@@ -2635,8 +2612,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
2635int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 2612int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2636 struct kvm_dirty_log *log) 2613 struct kvm_dirty_log *log)
2637{ 2614{
2638 int r, n, i; 2615 int r, i;
2639 struct kvm_memory_slot *memslot; 2616 struct kvm_memory_slot *memslot;
2617 unsigned long n;
2640 unsigned long is_dirty = 0; 2618 unsigned long is_dirty = 0;
2641 unsigned long *dirty_bitmap = NULL; 2619 unsigned long *dirty_bitmap = NULL;
2642 2620
@@ -2651,7 +2629,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2651 if (!memslot->dirty_bitmap) 2629 if (!memslot->dirty_bitmap)
2652 goto out; 2630 goto out;
2653 2631
2654 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 2632 n = kvm_dirty_bitmap_bytes(memslot);
2655 2633
2656 r = -ENOMEM; 2634 r = -ENOMEM;
2657 dirty_bitmap = vmalloc(n); 2635 dirty_bitmap = vmalloc(n);
@@ -4483,7 +4461,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4483 kvm_set_cr8(vcpu, kvm_run->cr8); 4461 kvm_set_cr8(vcpu, kvm_run->cr8);
4484 4462
4485 if (vcpu->arch.pio.cur_count) { 4463 if (vcpu->arch.pio.cur_count) {
4464 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4486 r = complete_pio(vcpu); 4465 r = complete_pio(vcpu);
4466 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4487 if (r) 4467 if (r)
4488 goto out; 4468 goto out;
4489 } 4469 }
@@ -5146,6 +5126,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
5146 int ret = 0; 5126 int ret = 0;
5147 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); 5127 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
5148 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); 5128 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
5129 u32 desc_limit;
5149 5130
5150 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); 5131 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
5151 5132
@@ -5168,7 +5149,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
5168 } 5149 }
5169 } 5150 }
5170 5151
5171 if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { 5152 desc_limit = get_desc_limit(&nseg_desc);
5153 if (!nseg_desc.p ||
5154 ((desc_limit < 0x67 && (nseg_desc.type & 8)) ||
5155 desc_limit < 0x2b)) {
5172 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); 5156 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
5173 return 1; 5157 return 1;
5174 } 5158 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a3fd0f91d943..169d07758ee5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -54,7 +54,7 @@ extern struct kmem_cache *kvm_vcpu_cache;
54 */ 54 */
55struct kvm_io_bus { 55struct kvm_io_bus {
56 int dev_count; 56 int dev_count;
57#define NR_IOBUS_DEVS 6 57#define NR_IOBUS_DEVS 200
58 struct kvm_io_device *devs[NR_IOBUS_DEVS]; 58 struct kvm_io_device *devs[NR_IOBUS_DEVS];
59}; 59};
60 60
@@ -119,6 +119,11 @@ struct kvm_memory_slot {
119 int user_alloc; 119 int user_alloc;
120}; 120};
121 121
122static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
123{
124 return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
125}
126
122struct kvm_kernel_irq_routing_entry { 127struct kvm_kernel_irq_routing_entry {
123 u32 gsi; 128 u32 gsi;
124 u32 type; 129 u32 type;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5a0cd194dce0..c82ae2492634 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -341,7 +341,11 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
341 struct mm_struct *mm) 341 struct mm_struct *mm)
342{ 342{
343 struct kvm *kvm = mmu_notifier_to_kvm(mn); 343 struct kvm *kvm = mmu_notifier_to_kvm(mn);
344 int idx;
345
346 idx = srcu_read_lock(&kvm->srcu);
344 kvm_arch_flush_shadow(kvm); 347 kvm_arch_flush_shadow(kvm);
348 srcu_read_unlock(&kvm->srcu, idx);
345} 349}
346 350
347static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { 351static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
@@ -648,7 +652,7 @@ skip_lpage:
648 652
649 /* Allocate page dirty bitmap if needed */ 653 /* Allocate page dirty bitmap if needed */
650 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { 654 if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
651 unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; 655 unsigned long dirty_bytes = kvm_dirty_bitmap_bytes(&new);
652 656
653 new.dirty_bitmap = vmalloc(dirty_bytes); 657 new.dirty_bitmap = vmalloc(dirty_bytes);
654 if (!new.dirty_bitmap) 658 if (!new.dirty_bitmap)
@@ -768,7 +772,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
768{ 772{
769 struct kvm_memory_slot *memslot; 773 struct kvm_memory_slot *memslot;
770 int r, i; 774 int r, i;
771 int n; 775 unsigned long n;
772 unsigned long any = 0; 776 unsigned long any = 0;
773 777
774 r = -EINVAL; 778 r = -EINVAL;
@@ -780,7 +784,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
780 if (!memslot->dirty_bitmap) 784 if (!memslot->dirty_bitmap)
781 goto out; 785 goto out;
782 786
783 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 787 n = kvm_dirty_bitmap_bytes(memslot);
784 788
785 for (i = 0; !any && i < n/sizeof(long); ++i) 789 for (i = 0; !any && i < n/sizeof(long); ++i)
786 any = memslot->dirty_bitmap[i]; 790 any = memslot->dirty_bitmap[i];
@@ -1186,10 +1190,13 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
1186 memslot = gfn_to_memslot_unaliased(kvm, gfn); 1190 memslot = gfn_to_memslot_unaliased(kvm, gfn);
1187 if (memslot && memslot->dirty_bitmap) { 1191 if (memslot && memslot->dirty_bitmap) {
1188 unsigned long rel_gfn = gfn - memslot->base_gfn; 1192 unsigned long rel_gfn = gfn - memslot->base_gfn;
1193 unsigned long *p = memslot->dirty_bitmap +
1194 rel_gfn / BITS_PER_LONG;
1195 int offset = rel_gfn % BITS_PER_LONG;
1189 1196
1190 /* avoid RMW */ 1197 /* avoid RMW */
1191 if (!generic_test_le_bit(rel_gfn, memslot->dirty_bitmap)) 1198 if (!generic_test_le_bit(offset, p))
1192 generic___set_le_bit(rel_gfn, memslot->dirty_bitmap); 1199 generic___set_le_bit(offset, p);
1193 } 1200 }
1194} 1201}
1195 1202