aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-03-05 16:12:34 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-03-05 16:12:34 -0500
commitc812a51d11bbe983f4c24e32b59b265705ddd3c2 (patch)
treed454f518db51a4de700cf3dcd4c3c71ee7288b47
parent9467c4fdd66f6810cecef0f1173330f3c6e67d45 (diff)
parentd2be1651b736002e0c76d7095d6c0ba77b4a897c (diff)
Merge branch 'kvm-updates/2.6.34' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.34' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (145 commits) KVM: x86: Add KVM_CAP_X86_ROBUST_SINGLESTEP KVM: VMX: Update instruction length on intercepted BP KVM: Fix emulate_sys[call, enter, exit]()'s fault handling KVM: Fix segment descriptor loading KVM: Fix load_guest_segment_descriptor() to inject page fault KVM: x86 emulator: Forbid modifying CS segment register by mov instruction KVM: Convert kvm->requests_lock to raw_spinlock_t KVM: Convert i8254/i8259 locks to raw_spinlocks KVM: x86 emulator: disallow opcode 82 in 64-bit mode KVM: x86 emulator: code style cleanup KVM: Plan obsolescence of kernel allocated slots, paravirt mmu KVM: x86 emulator: Add LOCK prefix validity checking KVM: x86 emulator: Check CPL level during privilege instruction emulation KVM: x86 emulator: Fix popf emulation KVM: x86 emulator: Check IOPL level during io instruction emulation KVM: x86 emulator: fix memory access during x86 emulation KVM: x86 emulator: Add Virtual-8086 mode of emulation KVM: x86 emulator: Add group9 instruction decoding KVM: x86 emulator: Add group8 instruction decoding KVM: do not store wqh in irqfd ... Trivial conflicts in Documentation/feature-removal-schedule.txt
-rw-r--r--Documentation/feature-removal-schedule.txt32
-rw-r--r--Documentation/kvm/api.txt12
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/ia64/kvm/Kconfig1
-rw-r--r--arch/ia64/kvm/kvm-ia64.c50
-rw-r--r--arch/ia64/kvm/kvm_fw.c28
-rw-r--r--arch/ia64/kvm/mmio.c4
-rw-r--r--arch/ia64/kvm/vcpu.c4
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h6
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h11
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64_asm.h18
-rw-r--r--arch/powerpc/include/asm/kvm_e500.h3
-rw-r--r--arch/powerpc/include/asm/kvm_host.h23
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h83
-rw-r--r--arch/powerpc/include/asm/paca.h5
-rw-r--r--arch/powerpc/include/asm/reg.h4
-rw-r--r--arch/powerpc/kernel/asm-offsets.c33
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c1
-rw-r--r--arch/powerpc/kvm/44x_emulate.c25
-rw-r--r--arch/powerpc/kvm/44x_tlb.c20
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/book3s.c309
-rw-r--r--arch/powerpc/kvm/book3s_64_emulate.c77
-rw-r--r--arch/powerpc/kvm/book3s_64_exports.c8
-rw-r--r--arch/powerpc/kvm/book3s_64_interrupts.S336
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c10
-rw-r--r--arch/powerpc/kvm/book3s_64_rmhandlers.S119
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S160
-rw-r--r--arch/powerpc/kvm/booke.c87
-rw-r--r--arch/powerpc/kvm/booke_emulate.c107
-rw-r--r--arch/powerpc/kvm/e500.c6
-rw-r--r--arch/powerpc/kvm/e500_emulate.c93
-rw-r--r--arch/powerpc/kvm/e500_tlb.c10
-rw-r--r--arch/powerpc/kvm/emulate.c118
-rw-r--r--arch/powerpc/kvm/powerpc.c40
-rw-r--r--arch/s390/kvm/kvm-s390.c26
-rw-r--r--arch/s390/kvm/kvm-s390.h10
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/hyperv.h186
-rw-r--r--arch/x86/include/asm/kvm_emulate.h17
-rw-r--r--arch/x86/include/asm/kvm_host.h60
-rw-r--r--arch/x86/include/asm/kvm_para.h1
-rw-r--r--arch/x86/include/asm/svm.h2
-rw-r--r--arch/x86/include/asm/vmx.h5
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/emulate.c440
-rw-r--r--arch/x86/kvm/i8254.c23
-rw-r--r--arch/x86/kvm/i8254.h2
-rw-r--r--arch/x86/kvm/i8259.c46
-rw-r--r--arch/x86/kvm/irq.h3
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h31
-rw-r--r--arch/x86/kvm/lapic.c31
-rw-r--r--arch/x86/kvm/lapic.h8
-rw-r--r--arch/x86/kvm/mmu.c137
-rw-r--r--arch/x86/kvm/mmu.h35
-rw-r--r--arch/x86/kvm/paging_tmpl.h13
-rw-r--r--arch/x86/kvm/svm.c237
-rw-r--r--arch/x86/kvm/trace.h59
-rw-r--r--arch/x86/kvm/vmx.c396
-rw-r--r--arch/x86/kvm/x86.c1098
-rw-r--r--arch/x86/kvm/x86.h30
-rw-r--r--include/linux/kvm.h10
-rw-r--r--include/linux/kvm_host.h71
-rw-r--r--include/trace/events/kvm.h41
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/assigned-dev.c12
-rw-r--r--virt/kvm/coalesced_mmio.c43
-rw-r--r--virt/kvm/coalesced_mmio.h15
-rw-r--r--virt/kvm/eventfd.c21
-rw-r--r--virt/kvm/ioapic.c38
-rw-r--r--virt/kvm/ioapic.h2
-rw-r--r--virt/kvm/iommu.c36
-rw-r--r--virt/kvm/kvm_main.c392
74 files changed, 3770 insertions, 1661 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 03497909539e..31575e220f3b 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -556,3 +556,35 @@ Why: udev fully replaces this special file system that only contains CAPI
556 NCCI TTY device nodes. User space (pppdcapiplugin) works without 556 NCCI TTY device nodes. User space (pppdcapiplugin) works without
557 noticing the difference. 557 noticing the difference.
558Who: Jan Kiszka <jan.kiszka@web.de> 558Who: Jan Kiszka <jan.kiszka@web.de>
559
560----------------------------
561
562What: KVM memory aliases support
563When: July 2010
564Why: Memory aliasing support is used for speeding up guest vga access
565 through the vga windows.
566
567 Modern userspace no longer uses this feature, so it's just bitrotted
568 code and can be removed with no impact.
569Who: Avi Kivity <avi@redhat.com>
570
571----------------------------
572
573What: KVM kernel-allocated memory slots
574When: July 2010
575Why: Since 2.6.25, kvm supports user-allocated memory slots, which are
576 much more flexible than kernel-allocated slots. All current userspace
577 supports the newer interface and this code can be removed with no
578 impact.
579Who: Avi Kivity <avi@redhat.com>
580
581----------------------------
582
583What: KVM paravirt mmu host support
584When: January 2011
585Why: The paravirt mmu host support is slower than non-paravirt mmu, both
586 on newer and older hardware. It is already not exposed to the guest,
587 and kept only for live migration purposes.
588Who: Avi Kivity <avi@redhat.com>
589
590----------------------------
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index 2811e452f756..c6416a398163 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -23,12 +23,12 @@ of a virtual machine. The ioctls belong to three classes
23 Only run vcpu ioctls from the same thread that was used to create the 23 Only run vcpu ioctls from the same thread that was used to create the
24 vcpu. 24 vcpu.
25 25
262. File descritpors 262. File descriptors
27 27
28The kvm API is centered around file descriptors. An initial 28The kvm API is centered around file descriptors. An initial
29open("/dev/kvm") obtains a handle to the kvm subsystem; this handle 29open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
30can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this 30can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this
31handle will create a VM file descripror which can be used to issue VM 31handle will create a VM file descriptor which can be used to issue VM
32ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu 32ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu
33and return a file descriptor pointing to it. Finally, ioctls on a vcpu 33and return a file descriptor pointing to it. Finally, ioctls on a vcpu
34fd can be used to control the vcpu, including the important task of 34fd can be used to control the vcpu, including the important task of
@@ -643,7 +643,7 @@ Type: vm ioctl
643Parameters: struct kvm_clock_data (in) 643Parameters: struct kvm_clock_data (in)
644Returns: 0 on success, -1 on error 644Returns: 0 on success, -1 on error
645 645
646Sets the current timestamp of kvmclock to the valued specific in its parameter. 646Sets the current timestamp of kvmclock to the value specified in its parameter.
647In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios 647In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
648such as migration. 648such as migration.
649 649
@@ -795,11 +795,11 @@ Unused.
795 __u64 data_offset; /* relative to kvm_run start */ 795 __u64 data_offset; /* relative to kvm_run start */
796 } io; 796 } io;
797 797
798If exit_reason is KVM_EXIT_IO_IN or KVM_EXIT_IO_OUT, then the vcpu has 798If exit_reason is KVM_EXIT_IO, then the vcpu has
799executed a port I/O instruction which could not be satisfied by kvm. 799executed a port I/O instruction which could not be satisfied by kvm.
800data_offset describes where the data is located (KVM_EXIT_IO_OUT) or 800data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
801where kvm expects application code to place the data for the next 801where kvm expects application code to place the data for the next
802KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a patcked array. 802KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array.
803 803
804 struct { 804 struct {
805 struct kvm_debug_exit_arch arch; 805 struct kvm_debug_exit_arch arch;
@@ -815,7 +815,7 @@ Unused.
815 __u8 is_write; 815 __u8 is_write;
816 } mmio; 816 } mmio;
817 817
818If exit_reason is KVM_EXIT_MMIO or KVM_EXIT_IO_OUT, then the vcpu has 818If exit_reason is KVM_EXIT_MMIO, then the vcpu has
819executed a memory-mapped I/O instruction which could not be satisfied 819executed a memory-mapped I/O instruction which could not be satisfied
820by kvm. The 'data' member contains the written data if 'is_write' is 820by kvm. The 'data' member contains the written data if 'is_write' is
821true, and should be filled by application code otherwise. 821true, and should be filled by application code otherwise.
diff --git a/MAINTAINERS b/MAINTAINERS
index c6591bca646b..51d8b5221dd8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3173,7 +3173,7 @@ F: arch/x86/include/asm/svm.h
3173F: arch/x86/kvm/svm.c 3173F: arch/x86/kvm/svm.c
3174 3174
3175KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC 3175KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
3176M: Hollis Blanchard <hollisb@us.ibm.com> 3176M: Alexander Graf <agraf@suse.de>
3177L: kvm-ppc@vger.kernel.org 3177L: kvm-ppc@vger.kernel.org
3178W: http://kvm.qumranet.com 3178W: http://kvm.qumranet.com
3179S: Supported 3179S: Supported
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 01c75797119c..fa4d1e59deb0 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -26,6 +26,7 @@ config KVM
26 select ANON_INODES 26 select ANON_INODES
27 select HAVE_KVM_IRQCHIP 27 select HAVE_KVM_IRQCHIP
28 select KVM_APIC_ARCHITECTURE 28 select KVM_APIC_ARCHITECTURE
29 select KVM_MMIO
29 ---help--- 30 ---help---
30 Support hosting fully virtualized guest machines using hardware 31 Support hosting fully virtualized guest machines using hardware
31 virtualization extensions. You will need a fairly recent 32 virtualization extensions. You will need a fairly recent
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5fdeec5fddcf..26e0e089bfe7 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -241,10 +241,10 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
241 return 0; 241 return 0;
242mmio: 242mmio:
243 if (p->dir) 243 if (p->dir)
244 r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr, 244 r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
245 p->size, &p->data); 245 p->size, &p->data);
246 else 246 else
247 r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr, 247 r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
248 p->size, &p->data); 248 p->size, &p->data);
249 if (r) 249 if (r)
250 printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); 250 printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
@@ -636,12 +636,9 @@ static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
636static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 636static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
637{ 637{
638 union context *host_ctx, *guest_ctx; 638 union context *host_ctx, *guest_ctx;
639 int r; 639 int r, idx;
640 640
641 /* 641 idx = srcu_read_lock(&vcpu->kvm->srcu);
642 * down_read() may sleep and return with interrupts enabled
643 */
644 down_read(&vcpu->kvm->slots_lock);
645 642
646again: 643again:
647 if (signal_pending(current)) { 644 if (signal_pending(current)) {
@@ -663,7 +660,7 @@ again:
663 if (r < 0) 660 if (r < 0)
664 goto vcpu_run_fail; 661 goto vcpu_run_fail;
665 662
666 up_read(&vcpu->kvm->slots_lock); 663 srcu_read_unlock(&vcpu->kvm->srcu, idx);
667 kvm_guest_enter(); 664 kvm_guest_enter();
668 665
669 /* 666 /*
@@ -687,7 +684,7 @@ again:
687 kvm_guest_exit(); 684 kvm_guest_exit();
688 preempt_enable(); 685 preempt_enable();
689 686
690 down_read(&vcpu->kvm->slots_lock); 687 idx = srcu_read_lock(&vcpu->kvm->srcu);
691 688
692 r = kvm_handle_exit(kvm_run, vcpu); 689 r = kvm_handle_exit(kvm_run, vcpu);
693 690
@@ -697,10 +694,10 @@ again:
697 } 694 }
698 695
699out: 696out:
700 up_read(&vcpu->kvm->slots_lock); 697 srcu_read_unlock(&vcpu->kvm->srcu, idx);
701 if (r > 0) { 698 if (r > 0) {
702 kvm_resched(vcpu); 699 kvm_resched(vcpu);
703 down_read(&vcpu->kvm->slots_lock); 700 idx = srcu_read_lock(&vcpu->kvm->srcu);
704 goto again; 701 goto again;
705 } 702 }
706 703
@@ -971,7 +968,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
971 goto out; 968 goto out;
972 r = kvm_setup_default_irq_routing(kvm); 969 r = kvm_setup_default_irq_routing(kvm);
973 if (r) { 970 if (r) {
974 kfree(kvm->arch.vioapic); 971 kvm_ioapic_destroy(kvm);
975 goto out; 972 goto out;
976 } 973 }
977 break; 974 break;
@@ -1377,12 +1374,14 @@ static void free_kvm(struct kvm *kvm)
1377 1374
1378static void kvm_release_vm_pages(struct kvm *kvm) 1375static void kvm_release_vm_pages(struct kvm *kvm)
1379{ 1376{
1377 struct kvm_memslots *slots;
1380 struct kvm_memory_slot *memslot; 1378 struct kvm_memory_slot *memslot;
1381 int i, j; 1379 int i, j;
1382 unsigned long base_gfn; 1380 unsigned long base_gfn;
1383 1381
1384 for (i = 0; i < kvm->nmemslots; i++) { 1382 slots = rcu_dereference(kvm->memslots);
1385 memslot = &kvm->memslots[i]; 1383 for (i = 0; i < slots->nmemslots; i++) {
1384 memslot = &slots->memslots[i];
1386 base_gfn = memslot->base_gfn; 1385 base_gfn = memslot->base_gfn;
1387 1386
1388 for (j = 0; j < memslot->npages; j++) { 1387 for (j = 0; j < memslot->npages; j++) {
@@ -1405,6 +1404,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
1405 kfree(kvm->arch.vioapic); 1404 kfree(kvm->arch.vioapic);
1406 kvm_release_vm_pages(kvm); 1405 kvm_release_vm_pages(kvm);
1407 kvm_free_physmem(kvm); 1406 kvm_free_physmem(kvm);
1407 cleanup_srcu_struct(&kvm->srcu);
1408 free_kvm(kvm); 1408 free_kvm(kvm);
1409} 1409}
1410 1410
@@ -1576,15 +1576,15 @@ out:
1576 return r; 1576 return r;
1577} 1577}
1578 1578
1579int kvm_arch_set_memory_region(struct kvm *kvm, 1579int kvm_arch_prepare_memory_region(struct kvm *kvm,
1580 struct kvm_userspace_memory_region *mem, 1580 struct kvm_memory_slot *memslot,
1581 struct kvm_memory_slot old, 1581 struct kvm_memory_slot old,
1582 struct kvm_userspace_memory_region *mem,
1582 int user_alloc) 1583 int user_alloc)
1583{ 1584{
1584 unsigned long i; 1585 unsigned long i;
1585 unsigned long pfn; 1586 unsigned long pfn;
1586 int npages = mem->memory_size >> PAGE_SHIFT; 1587 int npages = memslot->npages;
1587 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
1588 unsigned long base_gfn = memslot->base_gfn; 1588 unsigned long base_gfn = memslot->base_gfn;
1589 1589
1590 if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT)) 1590 if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
@@ -1608,6 +1608,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
1608 return 0; 1608 return 0;
1609} 1609}
1610 1610
1611void kvm_arch_commit_memory_region(struct kvm *kvm,
1612 struct kvm_userspace_memory_region *mem,
1613 struct kvm_memory_slot old,
1614 int user_alloc)
1615{
1616 return;
1617}
1618
1611void kvm_arch_flush_shadow(struct kvm *kvm) 1619void kvm_arch_flush_shadow(struct kvm *kvm)
1612{ 1620{
1613 kvm_flush_remote_tlbs(kvm); 1621 kvm_flush_remote_tlbs(kvm);
@@ -1802,7 +1810,7 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
1802 if (log->slot >= KVM_MEMORY_SLOTS) 1810 if (log->slot >= KVM_MEMORY_SLOTS)
1803 goto out; 1811 goto out;
1804 1812
1805 memslot = &kvm->memslots[log->slot]; 1813 memslot = &kvm->memslots->memslots[log->slot];
1806 r = -ENOENT; 1814 r = -ENOENT;
1807 if (!memslot->dirty_bitmap) 1815 if (!memslot->dirty_bitmap)
1808 goto out; 1816 goto out;
@@ -1827,6 +1835,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1827 struct kvm_memory_slot *memslot; 1835 struct kvm_memory_slot *memslot;
1828 int is_dirty = 0; 1836 int is_dirty = 0;
1829 1837
1838 mutex_lock(&kvm->slots_lock);
1830 spin_lock(&kvm->arch.dirty_log_lock); 1839 spin_lock(&kvm->arch.dirty_log_lock);
1831 1840
1832 r = kvm_ia64_sync_dirty_log(kvm, log); 1841 r = kvm_ia64_sync_dirty_log(kvm, log);
@@ -1840,12 +1849,13 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1840 /* If nothing is dirty, don't bother messing with page tables. */ 1849 /* If nothing is dirty, don't bother messing with page tables. */
1841 if (is_dirty) { 1850 if (is_dirty) {
1842 kvm_flush_remote_tlbs(kvm); 1851 kvm_flush_remote_tlbs(kvm);
1843 memslot = &kvm->memslots[log->slot]; 1852 memslot = &kvm->memslots->memslots[log->slot];
1844 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 1853 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
1845 memset(memslot->dirty_bitmap, 0, n); 1854 memset(memslot->dirty_bitmap, 0, n);
1846 } 1855 }
1847 r = 0; 1856 r = 0;
1848out: 1857out:
1858 mutex_unlock(&kvm->slots_lock);
1849 spin_unlock(&kvm->arch.dirty_log_lock); 1859 spin_unlock(&kvm->arch.dirty_log_lock);
1850 return r; 1860 return r;
1851} 1861}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
index e4b82319881d..cb548ee9fcae 100644
--- a/arch/ia64/kvm/kvm_fw.c
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -75,7 +75,7 @@ static void set_pal_result(struct kvm_vcpu *vcpu,
75 struct exit_ctl_data *p; 75 struct exit_ctl_data *p;
76 76
77 p = kvm_get_exit_data(vcpu); 77 p = kvm_get_exit_data(vcpu);
78 if (p && p->exit_reason == EXIT_REASON_PAL_CALL) { 78 if (p->exit_reason == EXIT_REASON_PAL_CALL) {
79 p->u.pal_data.ret = result; 79 p->u.pal_data.ret = result;
80 return ; 80 return ;
81 } 81 }
@@ -87,7 +87,7 @@ static void set_sal_result(struct kvm_vcpu *vcpu,
87 struct exit_ctl_data *p; 87 struct exit_ctl_data *p;
88 88
89 p = kvm_get_exit_data(vcpu); 89 p = kvm_get_exit_data(vcpu);
90 if (p && p->exit_reason == EXIT_REASON_SAL_CALL) { 90 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
91 p->u.sal_data.ret = result; 91 p->u.sal_data.ret = result;
92 return ; 92 return ;
93 } 93 }
@@ -322,7 +322,7 @@ static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
322 struct exit_ctl_data *p; 322 struct exit_ctl_data *p;
323 323
324 p = kvm_get_exit_data(vcpu); 324 p = kvm_get_exit_data(vcpu);
325 if (p && (p->exit_reason == EXIT_REASON_PAL_CALL)) 325 if (p->exit_reason == EXIT_REASON_PAL_CALL)
326 index = p->u.pal_data.gr28; 326 index = p->u.pal_data.gr28;
327 327
328 return index; 328 return index;
@@ -646,18 +646,16 @@ static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
646 646
647 p = kvm_get_exit_data(vcpu); 647 p = kvm_get_exit_data(vcpu);
648 648
649 if (p) { 649 if (p->exit_reason == EXIT_REASON_SAL_CALL) {
650 if (p->exit_reason == EXIT_REASON_SAL_CALL) { 650 *in0 = p->u.sal_data.in0;
651 *in0 = p->u.sal_data.in0; 651 *in1 = p->u.sal_data.in1;
652 *in1 = p->u.sal_data.in1; 652 *in2 = p->u.sal_data.in2;
653 *in2 = p->u.sal_data.in2; 653 *in3 = p->u.sal_data.in3;
654 *in3 = p->u.sal_data.in3; 654 *in4 = p->u.sal_data.in4;
655 *in4 = p->u.sal_data.in4; 655 *in5 = p->u.sal_data.in5;
656 *in5 = p->u.sal_data.in5; 656 *in6 = p->u.sal_data.in6;
657 *in6 = p->u.sal_data.in6; 657 *in7 = p->u.sal_data.in7;
658 *in7 = p->u.sal_data.in7; 658 return ;
659 return ;
660 }
661 } 659 }
662 *in0 = 0; 660 *in0 = 0;
663} 661}
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
index 9bf55afd08d0..fb8f9f59a1ed 100644
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -316,8 +316,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
316 return; 316 return;
317 } else { 317 } else {
318 inst_type = -1; 318 inst_type = -1;
319 panic_vm(vcpu, "Unsupported MMIO access instruction! \ 319 panic_vm(vcpu, "Unsupported MMIO access instruction! "
320 Bunld[0]=0x%lx, Bundle[1]=0x%lx\n", 320 "Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
321 bundle.i64[0], bundle.i64[1]); 321 bundle.i64[0], bundle.i64[1]);
322 } 322 }
323 323
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index dce75b70cdd5..958815c9787d 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -1639,8 +1639,8 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
1639 * Otherwise panic 1639 * Otherwise panic
1640 */ 1640 */
1641 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) 1641 if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
1642 panic_vm(vcpu, "Only support guests with vpsr.pk =0 \ 1642 panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
1643 & vpsr.is=0\n"); 1643 "& vpsr.is=0\n");
1644 1644
1645 /* 1645 /*
1646 * For those IA64_PSR bits: id/da/dd/ss/ed/ia 1646 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index af2abe74f544..aadf2dd6f84e 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -97,4 +97,10 @@
97#define RESUME_HOST RESUME_FLAG_HOST 97#define RESUME_HOST RESUME_FLAG_HOST
98#define RESUME_HOST_NV (RESUME_FLAG_HOST|RESUME_FLAG_NV) 98#define RESUME_HOST_NV (RESUME_FLAG_HOST|RESUME_FLAG_NV)
99 99
100#define KVM_GUEST_MODE_NONE 0
101#define KVM_GUEST_MODE_GUEST 1
102#define KVM_GUEST_MODE_SKIP 2
103
104#define KVM_INST_FETCH_FAILED -1
105
100#endif /* __POWERPC_KVM_ASM_H__ */ 106#endif /* __POWERPC_KVM_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 74b7369770d0..db7db0a96967 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -22,7 +22,7 @@
22 22
23#include <linux/types.h> 23#include <linux/types.h>
24#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
25#include <asm/kvm_ppc.h> 25#include <asm/kvm_book3s_64_asm.h>
26 26
27struct kvmppc_slb { 27struct kvmppc_slb {
28 u64 esid; 28 u64 esid;
@@ -33,7 +33,8 @@ struct kvmppc_slb {
33 bool Ks; 33 bool Ks;
34 bool Kp; 34 bool Kp;
35 bool nx; 35 bool nx;
36 bool large; 36 bool large; /* PTEs are 16MB */
37 bool tb; /* 1TB segment */
37 bool class; 38 bool class;
38}; 39};
39 40
@@ -69,6 +70,7 @@ struct kvmppc_sid_map {
69 70
70struct kvmppc_vcpu_book3s { 71struct kvmppc_vcpu_book3s {
71 struct kvm_vcpu vcpu; 72 struct kvm_vcpu vcpu;
73 struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
72 struct kvmppc_sid_map sid_map[SID_MAP_NUM]; 74 struct kvmppc_sid_map sid_map[SID_MAP_NUM];
73 struct kvmppc_slb slb[64]; 75 struct kvmppc_slb slb[64];
74 struct { 76 struct {
@@ -89,6 +91,7 @@ struct kvmppc_vcpu_book3s {
89 u64 vsid_next; 91 u64 vsid_next;
90 u64 vsid_max; 92 u64 vsid_max;
91 int context_id; 93 int context_id;
94 ulong prog_flags; /* flags to inject when giving a 700 trap */
92}; 95};
93 96
94#define CONTEXT_HOST 0 97#define CONTEXT_HOST 0
@@ -119,6 +122,10 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
119 122
120extern u32 kvmppc_trampoline_lowmem; 123extern u32 kvmppc_trampoline_lowmem;
121extern u32 kvmppc_trampoline_enter; 124extern u32 kvmppc_trampoline_enter;
125extern void kvmppc_rmcall(ulong srr0, ulong srr1);
126extern void kvmppc_load_up_fpu(void);
127extern void kvmppc_load_up_altivec(void);
128extern void kvmppc_load_up_vsx(void);
122 129
123static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) 130static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
124{ 131{
diff --git a/arch/powerpc/include/asm/kvm_book3s_64_asm.h b/arch/powerpc/include/asm/kvm_book3s_64_asm.h
index 2e06ee8184ef..183461b48407 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64_asm.h
@@ -20,6 +20,8 @@
20#ifndef __ASM_KVM_BOOK3S_ASM_H__ 20#ifndef __ASM_KVM_BOOK3S_ASM_H__
21#define __ASM_KVM_BOOK3S_ASM_H__ 21#define __ASM_KVM_BOOK3S_ASM_H__
22 22
23#ifdef __ASSEMBLY__
24
23#ifdef CONFIG_KVM_BOOK3S_64_HANDLER 25#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
24 26
25#include <asm/kvm_asm.h> 27#include <asm/kvm_asm.h>
@@ -55,4 +57,20 @@ kvmppc_resume_\intno:
55 57
56#endif /* CONFIG_KVM_BOOK3S_64_HANDLER */ 58#endif /* CONFIG_KVM_BOOK3S_64_HANDLER */
57 59
60#else /*__ASSEMBLY__ */
61
62struct kvmppc_book3s_shadow_vcpu {
63 ulong gpr[14];
64 u32 cr;
65 u32 xer;
66 ulong host_r1;
67 ulong host_r2;
68 ulong handler;
69 ulong scratch0;
70 ulong scratch1;
71 ulong vmhandler;
72};
73
74#endif /*__ASSEMBLY__ */
75
58#endif /* __ASM_KVM_BOOK3S_ASM_H__ */ 76#endif /* __ASM_KVM_BOOK3S_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
index 9d497ce49726..7fea26fffb25 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -52,9 +52,12 @@ struct kvmppc_vcpu_e500 {
52 u32 mas5; 52 u32 mas5;
53 u32 mas6; 53 u32 mas6;
54 u32 mas7; 54 u32 mas7;
55 u32 l1csr0;
55 u32 l1csr1; 56 u32 l1csr1;
56 u32 hid0; 57 u32 hid0;
57 u32 hid1; 58 u32 hid1;
59 u32 tlb0cfg;
60 u32 tlb1cfg;
58 61
59 struct kvm_vcpu vcpu; 62 struct kvm_vcpu vcpu;
60}; 63};
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1201f62d0d73..5e5bae7e152f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -167,23 +167,40 @@ struct kvm_vcpu_arch {
167 ulong trampoline_lowmem; 167 ulong trampoline_lowmem;
168 ulong trampoline_enter; 168 ulong trampoline_enter;
169 ulong highmem_handler; 169 ulong highmem_handler;
170 ulong rmcall;
170 ulong host_paca_phys; 171 ulong host_paca_phys;
171 struct kvmppc_mmu mmu; 172 struct kvmppc_mmu mmu;
172#endif 173#endif
173 174
174 u64 fpr[32];
175 ulong gpr[32]; 175 ulong gpr[32];
176 176
177 u64 fpr[32];
178 u32 fpscr;
179
180#ifdef CONFIG_ALTIVEC
181 vector128 vr[32];
182 vector128 vscr;
183#endif
184
185#ifdef CONFIG_VSX
186 u64 vsr[32];
187#endif
188
177 ulong pc; 189 ulong pc;
178 u32 cr;
179 ulong ctr; 190 ulong ctr;
180 ulong lr; 191 ulong lr;
192
193#ifdef CONFIG_BOOKE
181 ulong xer; 194 ulong xer;
195 u32 cr;
196#endif
182 197
183 ulong msr; 198 ulong msr;
184#ifdef CONFIG_PPC64 199#ifdef CONFIG_PPC64
185 ulong shadow_msr; 200 ulong shadow_msr;
201 ulong shadow_srr1;
186 ulong hflags; 202 ulong hflags;
203 ulong guest_owned_ext;
187#endif 204#endif
188 u32 mmucr; 205 u32 mmucr;
189 ulong sprg0; 206 ulong sprg0;
@@ -242,6 +259,8 @@ struct kvm_vcpu_arch {
242#endif 259#endif
243 ulong fault_dear; 260 ulong fault_dear;
244 ulong fault_esr; 261 ulong fault_esr;
262 ulong queued_dear;
263 ulong queued_esr;
245 gpa_t paddr_accessed; 264 gpa_t paddr_accessed;
246 265
247 u8 io_gpr; /* GPR used as IO source/target */ 266 u8 io_gpr; /* GPR used as IO source/target */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 269ee46ab028..e2642829e435 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -28,6 +28,9 @@
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/kvm_types.h> 29#include <linux/kvm_types.h>
30#include <linux/kvm_host.h> 30#include <linux/kvm_host.h>
31#ifdef CONFIG_PPC_BOOK3S
32#include <asm/kvm_book3s.h>
33#endif
31 34
32enum emulation_result { 35enum emulation_result {
33 EMULATE_DONE, /* no further processing */ 36 EMULATE_DONE, /* no further processing */
@@ -80,8 +83,9 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
80 83
81extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); 84extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
82extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); 85extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
83extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu); 86extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
84extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); 87extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
88extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
85extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 89extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
86 struct kvm_interrupt *irq); 90 struct kvm_interrupt *irq);
87 91
@@ -95,4 +99,81 @@ extern void kvmppc_booke_exit(void);
95 99
96extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); 100extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
97 101
102#ifdef CONFIG_PPC_BOOK3S
103
104/* We assume we're always acting on the current vcpu */
105
106static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
107{
108 if ( num < 14 ) {
109 get_paca()->shadow_vcpu.gpr[num] = val;
110 to_book3s(vcpu)->shadow_vcpu.gpr[num] = val;
111 } else
112 vcpu->arch.gpr[num] = val;
113}
114
115static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
116{
117 if ( num < 14 )
118 return get_paca()->shadow_vcpu.gpr[num];
119 else
120 return vcpu->arch.gpr[num];
121}
122
123static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
124{
125 get_paca()->shadow_vcpu.cr = val;
126 to_book3s(vcpu)->shadow_vcpu.cr = val;
127}
128
129static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
130{
131 return get_paca()->shadow_vcpu.cr;
132}
133
134static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
135{
136 get_paca()->shadow_vcpu.xer = val;
137 to_book3s(vcpu)->shadow_vcpu.xer = val;
138}
139
140static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
141{
142 return get_paca()->shadow_vcpu.xer;
143}
144
145#else
146
147static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
148{
149 vcpu->arch.gpr[num] = val;
150}
151
152static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
153{
154 return vcpu->arch.gpr[num];
155}
156
157static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
158{
159 vcpu->arch.cr = val;
160}
161
162static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
163{
164 return vcpu->arch.cr;
165}
166
167static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
168{
169 vcpu->arch.xer = val;
170}
171
172static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
173{
174 return vcpu->arch.xer;
175}
176
177#endif
178
98#endif /* __POWERPC_KVM_PPC_H__ */ 179#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 5e9b4ef71415..d8a693109c82 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -19,6 +19,9 @@
19#include <asm/mmu.h> 19#include <asm/mmu.h>
20#include <asm/page.h> 20#include <asm/page.h>
21#include <asm/exception-64e.h> 21#include <asm/exception-64e.h>
22#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
23#include <asm/kvm_book3s_64_asm.h>
24#endif
22 25
23register struct paca_struct *local_paca asm("r13"); 26register struct paca_struct *local_paca asm("r13");
24 27
@@ -135,6 +138,8 @@ struct paca_struct {
135 u64 esid; 138 u64 esid;
136 u64 vsid; 139 u64 vsid;
137 } kvm_slb[64]; /* guest SLB */ 140 } kvm_slb[64]; /* guest SLB */
141 /* We use this to store guest state in */
142 struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
138 u8 kvm_slb_max; /* highest used guest slb entry */ 143 u8 kvm_slb_max; /* highest used guest slb entry */
139 u8 kvm_in_guest; /* are we inside the guest? */ 144 u8 kvm_in_guest; /* are we inside the guest? */
140#endif 145#endif
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index bc8dd53f718a..5572e86223f4 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -426,6 +426,10 @@
426#define SRR1_WAKEMT 0x00280000 /* mtctrl */ 426#define SRR1_WAKEMT 0x00280000 /* mtctrl */
427#define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */ 427#define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */
428#define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */ 428#define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */
429#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
430#define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */
431#define SRR1_PROGTRAP 0x00020000 /* Trap */
432#define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */
429#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ 433#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */
430#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ 434#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */
431 435
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index a6c2b63227b3..957ceb7059c5 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -194,6 +194,30 @@ int main(void)
194 DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest)); 194 DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest));
195 DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb)); 195 DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb));
196 DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max)); 196 DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max));
197 DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr));
198 DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer));
199 DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0]));
200 DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1]));
201 DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2]));
202 DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3]));
203 DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4]));
204 DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5]));
205 DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6]));
206 DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7]));
207 DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8]));
208 DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9]));
209 DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10]));
210 DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11]));
211 DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12]));
212 DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13]));
213 DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1));
214 DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2));
215 DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct,
216 shadow_vcpu.vmhandler));
217 DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct,
218 shadow_vcpu.scratch0));
219 DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct,
220 shadow_vcpu.scratch1));
197#endif 221#endif
198#endif /* CONFIG_PPC64 */ 222#endif /* CONFIG_PPC64 */
199 223
@@ -389,8 +413,6 @@ int main(void)
389 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); 413 DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
390 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 414 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
391 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 415 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
392 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
393 DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
394 DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); 416 DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
395 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); 417 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
396 DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); 418 DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
@@ -411,11 +433,16 @@ int main(void)
411 DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2)); 433 DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2));
412 DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); 434 DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
413 DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); 435 DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
436 DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
414 DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); 437 DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
415 DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); 438 DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
416 DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); 439 DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
440 DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall));
417 DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); 441 DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
418#endif 442#else
443 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
444 DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
445#endif /* CONFIG_PPC64 */
419#endif 446#endif
420#ifdef CONFIG_44x 447#ifdef CONFIG_44x
421 DEFINE(PGD_T_LOG2, PGD_T_LOG2); 448 DEFINE(PGD_T_LOG2, PGD_T_LOG2);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 425451453e96..ab3e392ac63c 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -107,6 +107,7 @@ EXPORT_SYMBOL(giveup_altivec);
107#endif /* CONFIG_ALTIVEC */ 107#endif /* CONFIG_ALTIVEC */
108#ifdef CONFIG_VSX 108#ifdef CONFIG_VSX
109EXPORT_SYMBOL(giveup_vsx); 109EXPORT_SYMBOL(giveup_vsx);
110EXPORT_SYMBOL_GPL(__giveup_vsx);
110#endif /* CONFIG_VSX */ 111#endif /* CONFIG_VSX */
111#ifdef CONFIG_SPE 112#ifdef CONFIG_SPE
112EXPORT_SYMBOL(giveup_spe); 113EXPORT_SYMBOL(giveup_spe);
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 61af58fcecee..65ea083a5b27 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -65,13 +65,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
65 */ 65 */
66 switch (dcrn) { 66 switch (dcrn) {
67 case DCRN_CPR0_CONFIG_ADDR: 67 case DCRN_CPR0_CONFIG_ADDR:
68 vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; 68 kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
69 break; 69 break;
70 case DCRN_CPR0_CONFIG_DATA: 70 case DCRN_CPR0_CONFIG_DATA:
71 local_irq_disable(); 71 local_irq_disable();
72 mtdcr(DCRN_CPR0_CONFIG_ADDR, 72 mtdcr(DCRN_CPR0_CONFIG_ADDR,
73 vcpu->arch.cpr0_cfgaddr); 73 vcpu->arch.cpr0_cfgaddr);
74 vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); 74 kvmppc_set_gpr(vcpu, rt,
75 mfdcr(DCRN_CPR0_CONFIG_DATA));
75 local_irq_enable(); 76 local_irq_enable();
76 break; 77 break;
77 default: 78 default:
@@ -93,11 +94,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
93 /* emulate some access in kernel */ 94 /* emulate some access in kernel */
94 switch (dcrn) { 95 switch (dcrn) {
95 case DCRN_CPR0_CONFIG_ADDR: 96 case DCRN_CPR0_CONFIG_ADDR:
96 vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; 97 vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
97 break; 98 break;
98 default: 99 default:
99 run->dcr.dcrn = dcrn; 100 run->dcr.dcrn = dcrn;
100 run->dcr.data = vcpu->arch.gpr[rs]; 101 run->dcr.data = kvmppc_get_gpr(vcpu, rs);
101 run->dcr.is_write = 1; 102 run->dcr.is_write = 1;
102 vcpu->arch.dcr_needed = 1; 103 vcpu->arch.dcr_needed = 1;
103 kvmppc_account_exit(vcpu, DCR_EXITS); 104 kvmppc_account_exit(vcpu, DCR_EXITS);
@@ -146,13 +147,13 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
146 147
147 switch (sprn) { 148 switch (sprn) {
148 case SPRN_PID: 149 case SPRN_PID:
149 kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; 150 kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break;
150 case SPRN_MMUCR: 151 case SPRN_MMUCR:
151 vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; 152 vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break;
152 case SPRN_CCR0: 153 case SPRN_CCR0:
153 vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; 154 vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break;
154 case SPRN_CCR1: 155 case SPRN_CCR1:
155 vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; 156 vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break;
156 default: 157 default:
157 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); 158 emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
158 } 159 }
@@ -167,13 +168,13 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
167 168
168 switch (sprn) { 169 switch (sprn) {
169 case SPRN_PID: 170 case SPRN_PID:
170 vcpu->arch.gpr[rt] = vcpu->arch.pid; break; 171 kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break;
171 case SPRN_MMUCR: 172 case SPRN_MMUCR:
172 vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; 173 kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break;
173 case SPRN_CCR0: 174 case SPRN_CCR0:
174 vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; 175 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break;
175 case SPRN_CCR1: 176 case SPRN_CCR1:
176 vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; 177 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break;
177 default: 178 default:
178 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); 179 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
179 } 180 }
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index ff3cb63b8117..2570fcc7665d 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -439,7 +439,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
439 struct kvmppc_44x_tlbe *tlbe; 439 struct kvmppc_44x_tlbe *tlbe;
440 unsigned int gtlb_index; 440 unsigned int gtlb_index;
441 441
442 gtlb_index = vcpu->arch.gpr[ra]; 442 gtlb_index = kvmppc_get_gpr(vcpu, ra);
443 if (gtlb_index > KVM44x_GUEST_TLB_SIZE) { 443 if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
444 printk("%s: index %d\n", __func__, gtlb_index); 444 printk("%s: index %d\n", __func__, gtlb_index);
445 kvmppc_dump_vcpu(vcpu); 445 kvmppc_dump_vcpu(vcpu);
@@ -455,15 +455,15 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
455 switch (ws) { 455 switch (ws) {
456 case PPC44x_TLB_PAGEID: 456 case PPC44x_TLB_PAGEID:
457 tlbe->tid = get_mmucr_stid(vcpu); 457 tlbe->tid = get_mmucr_stid(vcpu);
458 tlbe->word0 = vcpu->arch.gpr[rs]; 458 tlbe->word0 = kvmppc_get_gpr(vcpu, rs);
459 break; 459 break;
460 460
461 case PPC44x_TLB_XLAT: 461 case PPC44x_TLB_XLAT:
462 tlbe->word1 = vcpu->arch.gpr[rs]; 462 tlbe->word1 = kvmppc_get_gpr(vcpu, rs);
463 break; 463 break;
464 464
465 case PPC44x_TLB_ATTRIB: 465 case PPC44x_TLB_ATTRIB:
466 tlbe->word2 = vcpu->arch.gpr[rs]; 466 tlbe->word2 = kvmppc_get_gpr(vcpu, rs);
467 break; 467 break;
468 468
469 default: 469 default:
@@ -500,18 +500,20 @@ int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
500 unsigned int as = get_mmucr_sts(vcpu); 500 unsigned int as = get_mmucr_sts(vcpu);
501 unsigned int pid = get_mmucr_stid(vcpu); 501 unsigned int pid = get_mmucr_stid(vcpu);
502 502
503 ea = vcpu->arch.gpr[rb]; 503 ea = kvmppc_get_gpr(vcpu, rb);
504 if (ra) 504 if (ra)
505 ea += vcpu->arch.gpr[ra]; 505 ea += kvmppc_get_gpr(vcpu, ra);
506 506
507 gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); 507 gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
508 if (rc) { 508 if (rc) {
509 u32 cr = kvmppc_get_cr(vcpu);
510
509 if (gtlb_index < 0) 511 if (gtlb_index < 0)
510 vcpu->arch.cr &= ~0x20000000; 512 kvmppc_set_cr(vcpu, cr & ~0x20000000);
511 else 513 else
512 vcpu->arch.cr |= 0x20000000; 514 kvmppc_set_cr(vcpu, cr | 0x20000000);
513 } 515 }
514 vcpu->arch.gpr[rt] = gtlb_index; 516 kvmppc_set_gpr(vcpu, rt, gtlb_index);
515 517
516 kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); 518 kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
517 return EMULATE_DONE; 519 return EMULATE_DONE;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index fe037fdaf1b3..60624cc9f4d4 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
20 bool 20 bool
21 select PREEMPT_NOTIFIERS 21 select PREEMPT_NOTIFIERS
22 select ANON_INODES 22 select ANON_INODES
23 select KVM_MMIO
23 24
24config KVM_BOOK3S_64_HANDLER 25config KVM_BOOK3S_64_HANDLER
25 bool 26 bool
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 3e294bd9b8c6..9a271f0929c7 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -33,12 +33,9 @@
33 33
34/* #define EXIT_DEBUG */ 34/* #define EXIT_DEBUG */
35/* #define EXIT_DEBUG_SIMPLE */ 35/* #define EXIT_DEBUG_SIMPLE */
36/* #define DEBUG_EXT */
36 37
37/* Without AGGRESSIVE_DEC we only fire off a DEC interrupt when DEC turns 0. 38static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
38 * When set, we retrigger a DEC interrupt after that if DEC <= 0.
39 * PPC32 Linux runs faster without AGGRESSIVE_DEC, PPC64 Linux requires it. */
40
41/* #define AGGRESSIVE_DEC */
42 39
43struct kvm_stats_debugfs_item debugfs_entries[] = { 40struct kvm_stats_debugfs_item debugfs_entries[] = {
44 { "exits", VCPU_STAT(sum_exits) }, 41 { "exits", VCPU_STAT(sum_exits) },
@@ -72,16 +69,24 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
72void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 69void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
73{ 70{
74 memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb)); 71 memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb));
72 memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu,
73 sizeof(get_paca()->shadow_vcpu));
75 get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max; 74 get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max;
76} 75}
77 76
78void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) 77void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
79{ 78{
80 memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb)); 79 memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb));
80 memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
81 sizeof(get_paca()->shadow_vcpu));
81 to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max; 82 to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max;
83
84 kvmppc_giveup_ext(vcpu, MSR_FP);
85 kvmppc_giveup_ext(vcpu, MSR_VEC);
86 kvmppc_giveup_ext(vcpu, MSR_VSX);
82} 87}
83 88
84#if defined(AGGRESSIVE_DEC) || defined(EXIT_DEBUG) 89#if defined(EXIT_DEBUG)
85static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu) 90static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
86{ 91{
87 u64 jd = mftb() - vcpu->arch.dec_jiffies; 92 u64 jd = mftb() - vcpu->arch.dec_jiffies;
@@ -89,6 +94,23 @@ static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
89} 94}
90#endif 95#endif
91 96
97static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
98{
99 vcpu->arch.shadow_msr = vcpu->arch.msr;
100 /* Guest MSR values */
101 vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE |
102 MSR_BE | MSR_DE;
103 /* Process MSR values */
104 vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR |
105 MSR_EE;
106 /* External providers the guest reserved */
107 vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext);
108 /* 64-bit Process MSR values */
109#ifdef CONFIG_PPC_BOOK3S_64
110 vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV;
111#endif
112}
113
92void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) 114void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
93{ 115{
94 ulong old_msr = vcpu->arch.msr; 116 ulong old_msr = vcpu->arch.msr;
@@ -96,12 +118,10 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
96#ifdef EXIT_DEBUG 118#ifdef EXIT_DEBUG
97 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); 119 printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
98#endif 120#endif
121
99 msr &= to_book3s(vcpu)->msr_mask; 122 msr &= to_book3s(vcpu)->msr_mask;
100 vcpu->arch.msr = msr; 123 vcpu->arch.msr = msr;
101 vcpu->arch.shadow_msr = msr | MSR_USER32; 124 kvmppc_recalc_shadow_msr(vcpu);
102 vcpu->arch.shadow_msr &= ( MSR_VEC | MSR_VSX | MSR_FP | MSR_FE0 |
103 MSR_USER64 | MSR_SE | MSR_BE | MSR_DE |
104 MSR_FE1);
105 125
106 if (msr & (MSR_WE|MSR_POW)) { 126 if (msr & (MSR_WE|MSR_POW)) {
107 if (!vcpu->arch.pending_exceptions) { 127 if (!vcpu->arch.pending_exceptions) {
@@ -125,11 +145,10 @@ void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
125 vcpu->arch.mmu.reset_msr(vcpu); 145 vcpu->arch.mmu.reset_msr(vcpu);
126} 146}
127 147
128void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) 148static int kvmppc_book3s_vec2irqprio(unsigned int vec)
129{ 149{
130 unsigned int prio; 150 unsigned int prio;
131 151
132 vcpu->stat.queue_intr++;
133 switch (vec) { 152 switch (vec) {
134 case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break; 153 case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break;
135 case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break; 154 case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break;
@@ -149,15 +168,31 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
149 default: prio = BOOK3S_IRQPRIO_MAX; break; 168 default: prio = BOOK3S_IRQPRIO_MAX; break;
150 } 169 }
151 170
152 set_bit(prio, &vcpu->arch.pending_exceptions); 171 return prio;
172}
173
174static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
175 unsigned int vec)
176{
177 clear_bit(kvmppc_book3s_vec2irqprio(vec),
178 &vcpu->arch.pending_exceptions);
179}
180
181void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
182{
183 vcpu->stat.queue_intr++;
184
185 set_bit(kvmppc_book3s_vec2irqprio(vec),
186 &vcpu->arch.pending_exceptions);
153#ifdef EXIT_DEBUG 187#ifdef EXIT_DEBUG
154 printk(KERN_INFO "Queueing interrupt %x\n", vec); 188 printk(KERN_INFO "Queueing interrupt %x\n", vec);
155#endif 189#endif
156} 190}
157 191
158 192
159void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) 193void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
160{ 194{
195 to_book3s(vcpu)->prog_flags = flags;
161 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM); 196 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM);
162} 197}
163 198
@@ -171,6 +206,11 @@ int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
171 return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions); 206 return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
172} 207}
173 208
209void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
210{
211 kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
212}
213
174void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 214void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
175 struct kvm_interrupt *irq) 215 struct kvm_interrupt *irq)
176{ 216{
@@ -181,6 +221,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
181{ 221{
182 int deliver = 1; 222 int deliver = 1;
183 int vec = 0; 223 int vec = 0;
224 ulong flags = 0ULL;
184 225
185 switch (priority) { 226 switch (priority) {
186 case BOOK3S_IRQPRIO_DECREMENTER: 227 case BOOK3S_IRQPRIO_DECREMENTER:
@@ -214,6 +255,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
214 break; 255 break;
215 case BOOK3S_IRQPRIO_PROGRAM: 256 case BOOK3S_IRQPRIO_PROGRAM:
216 vec = BOOK3S_INTERRUPT_PROGRAM; 257 vec = BOOK3S_INTERRUPT_PROGRAM;
258 flags = to_book3s(vcpu)->prog_flags;
217 break; 259 break;
218 case BOOK3S_IRQPRIO_VSX: 260 case BOOK3S_IRQPRIO_VSX:
219 vec = BOOK3S_INTERRUPT_VSX; 261 vec = BOOK3S_INTERRUPT_VSX;
@@ -244,7 +286,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
244#endif 286#endif
245 287
246 if (deliver) 288 if (deliver)
247 kvmppc_inject_interrupt(vcpu, vec, 0ULL); 289 kvmppc_inject_interrupt(vcpu, vec, flags);
248 290
249 return deliver; 291 return deliver;
250} 292}
@@ -254,21 +296,15 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
254 unsigned long *pending = &vcpu->arch.pending_exceptions; 296 unsigned long *pending = &vcpu->arch.pending_exceptions;
255 unsigned int priority; 297 unsigned int priority;
256 298
257 /* XXX be more clever here - no need to mftb() on every entry */
258 /* Issue DEC again if it's still active */
259#ifdef AGGRESSIVE_DEC
260 if (vcpu->arch.msr & MSR_EE)
261 if (kvmppc_get_dec(vcpu) & 0x80000000)
262 kvmppc_core_queue_dec(vcpu);
263#endif
264
265#ifdef EXIT_DEBUG 299#ifdef EXIT_DEBUG
266 if (vcpu->arch.pending_exceptions) 300 if (vcpu->arch.pending_exceptions)
267 printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); 301 printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
268#endif 302#endif
269 priority = __ffs(*pending); 303 priority = __ffs(*pending);
270 while (priority <= (sizeof(unsigned int) * 8)) { 304 while (priority <= (sizeof(unsigned int) * 8)) {
271 if (kvmppc_book3s_irqprio_deliver(vcpu, priority)) { 305 if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
306 (priority != BOOK3S_IRQPRIO_DECREMENTER)) {
307 /* DEC interrupts get cleared by mtdec */
272 clear_bit(priority, &vcpu->arch.pending_exceptions); 308 clear_bit(priority, &vcpu->arch.pending_exceptions);
273 break; 309 break;
274 } 310 }
@@ -503,14 +539,14 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
503 /* Page not found in guest PTE entries */ 539 /* Page not found in guest PTE entries */
504 vcpu->arch.dear = vcpu->arch.fault_dear; 540 vcpu->arch.dear = vcpu->arch.fault_dear;
505 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; 541 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr;
506 vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL); 542 vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
507 kvmppc_book3s_queue_irqprio(vcpu, vec); 543 kvmppc_book3s_queue_irqprio(vcpu, vec);
508 } else if (page_found == -EPERM) { 544 } else if (page_found == -EPERM) {
509 /* Storage protection */ 545 /* Storage protection */
510 vcpu->arch.dear = vcpu->arch.fault_dear; 546 vcpu->arch.dear = vcpu->arch.fault_dear;
511 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; 547 to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
512 to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; 548 to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
513 vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL); 549 vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
514 kvmppc_book3s_queue_irqprio(vcpu, vec); 550 kvmppc_book3s_queue_irqprio(vcpu, vec);
515 } else if (page_found == -EINVAL) { 551 } else if (page_found == -EINVAL) {
516 /* Page not found in guest SLB */ 552 /* Page not found in guest SLB */
@@ -532,13 +568,122 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
532 r = kvmppc_emulate_mmio(run, vcpu); 568 r = kvmppc_emulate_mmio(run, vcpu);
533 if ( r == RESUME_HOST_NV ) 569 if ( r == RESUME_HOST_NV )
534 r = RESUME_HOST; 570 r = RESUME_HOST;
535 if ( r == RESUME_GUEST_NV )
536 r = RESUME_GUEST;
537 } 571 }
538 572
539 return r; 573 return r;
540} 574}
541 575
576static inline int get_fpr_index(int i)
577{
578#ifdef CONFIG_VSX
579 i *= 2;
580#endif
581 return i;
582}
583
584/* Give up external provider (FPU, Altivec, VSX) */
585static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
586{
587 struct thread_struct *t = &current->thread;
588 u64 *vcpu_fpr = vcpu->arch.fpr;
589 u64 *vcpu_vsx = vcpu->arch.vsr;
590 u64 *thread_fpr = (u64*)t->fpr;
591 int i;
592
593 if (!(vcpu->arch.guest_owned_ext & msr))
594 return;
595
596#ifdef DEBUG_EXT
597 printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
598#endif
599
600 switch (msr) {
601 case MSR_FP:
602 giveup_fpu(current);
603 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
604 vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
605
606 vcpu->arch.fpscr = t->fpscr.val;
607 break;
608 case MSR_VEC:
609#ifdef CONFIG_ALTIVEC
610 giveup_altivec(current);
611 memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
612 vcpu->arch.vscr = t->vscr;
613#endif
614 break;
615 case MSR_VSX:
616#ifdef CONFIG_VSX
617 __giveup_vsx(current);
618 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
619 vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
620#endif
621 break;
622 default:
623 BUG();
624 }
625
626 vcpu->arch.guest_owned_ext &= ~msr;
627 current->thread.regs->msr &= ~msr;
628 kvmppc_recalc_shadow_msr(vcpu);
629}
630
631/* Handle external providers (FPU, Altivec, VSX) */
632static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
633 ulong msr)
634{
635 struct thread_struct *t = &current->thread;
636 u64 *vcpu_fpr = vcpu->arch.fpr;
637 u64 *vcpu_vsx = vcpu->arch.vsr;
638 u64 *thread_fpr = (u64*)t->fpr;
639 int i;
640
641 if (!(vcpu->arch.msr & msr)) {
642 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
643 return RESUME_GUEST;
644 }
645
646#ifdef DEBUG_EXT
647 printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
648#endif
649
650 current->thread.regs->msr |= msr;
651
652 switch (msr) {
653 case MSR_FP:
654 for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
655 thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
656
657 t->fpscr.val = vcpu->arch.fpscr;
658 t->fpexc_mode = 0;
659 kvmppc_load_up_fpu();
660 break;
661 case MSR_VEC:
662#ifdef CONFIG_ALTIVEC
663 memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
664 t->vscr = vcpu->arch.vscr;
665 t->vrsave = -1;
666 kvmppc_load_up_altivec();
667#endif
668 break;
669 case MSR_VSX:
670#ifdef CONFIG_VSX
671 for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
672 thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
673 kvmppc_load_up_vsx();
674#endif
675 break;
676 default:
677 BUG();
678 }
679
680 vcpu->arch.guest_owned_ext |= msr;
681
682 kvmppc_recalc_shadow_msr(vcpu);
683
684 return RESUME_GUEST;
685}
686
542int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, 687int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
543 unsigned int exit_nr) 688 unsigned int exit_nr)
544{ 689{
@@ -563,7 +708,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
563 case BOOK3S_INTERRUPT_INST_STORAGE: 708 case BOOK3S_INTERRUPT_INST_STORAGE:
564 vcpu->stat.pf_instruc++; 709 vcpu->stat.pf_instruc++;
565 /* only care about PTEG not found errors, but leave NX alone */ 710 /* only care about PTEG not found errors, but leave NX alone */
566 if (vcpu->arch.shadow_msr & 0x40000000) { 711 if (vcpu->arch.shadow_srr1 & 0x40000000) {
567 r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr); 712 r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr);
568 vcpu->stat.sp_instruc++; 713 vcpu->stat.sp_instruc++;
569 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && 714 } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@@ -575,7 +720,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
575 */ 720 */
576 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); 721 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
577 } else { 722 } else {
578 vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x58000000); 723 vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000;
579 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 724 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
580 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); 725 kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
581 r = RESUME_GUEST; 726 r = RESUME_GUEST;
@@ -621,6 +766,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
621 case BOOK3S_INTERRUPT_PROGRAM: 766 case BOOK3S_INTERRUPT_PROGRAM:
622 { 767 {
623 enum emulation_result er; 768 enum emulation_result er;
769 ulong flags;
770
771 flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
624 772
625 if (vcpu->arch.msr & MSR_PR) { 773 if (vcpu->arch.msr & MSR_PR) {
626#ifdef EXIT_DEBUG 774#ifdef EXIT_DEBUG
@@ -628,7 +776,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
628#endif 776#endif
629 if ((vcpu->arch.last_inst & 0xff0007ff) != 777 if ((vcpu->arch.last_inst & 0xff0007ff) !=
630 (INS_DCBZ & 0xfffffff7)) { 778 (INS_DCBZ & 0xfffffff7)) {
631 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 779 kvmppc_core_queue_program(vcpu, flags);
632 r = RESUME_GUEST; 780 r = RESUME_GUEST;
633 break; 781 break;
634 } 782 }
@@ -638,12 +786,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
638 er = kvmppc_emulate_instruction(run, vcpu); 786 er = kvmppc_emulate_instruction(run, vcpu);
639 switch (er) { 787 switch (er) {
640 case EMULATE_DONE: 788 case EMULATE_DONE:
641 r = RESUME_GUEST; 789 r = RESUME_GUEST_NV;
642 break; 790 break;
643 case EMULATE_FAIL: 791 case EMULATE_FAIL:
644 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", 792 printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
645 __func__, vcpu->arch.pc, vcpu->arch.last_inst); 793 __func__, vcpu->arch.pc, vcpu->arch.last_inst);
646 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 794 kvmppc_core_queue_program(vcpu, flags);
647 r = RESUME_GUEST; 795 r = RESUME_GUEST;
648 break; 796 break;
649 default: 797 default:
@@ -653,23 +801,30 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
653 } 801 }
654 case BOOK3S_INTERRUPT_SYSCALL: 802 case BOOK3S_INTERRUPT_SYSCALL:
655#ifdef EXIT_DEBUG 803#ifdef EXIT_DEBUG
656 printk(KERN_INFO "Syscall Nr %d\n", (int)vcpu->arch.gpr[0]); 804 printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0));
657#endif 805#endif
658 vcpu->stat.syscall_exits++; 806 vcpu->stat.syscall_exits++;
659 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 807 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
660 r = RESUME_GUEST; 808 r = RESUME_GUEST;
661 break; 809 break;
662 case BOOK3S_INTERRUPT_MACHINE_CHECK:
663 case BOOK3S_INTERRUPT_FP_UNAVAIL: 810 case BOOK3S_INTERRUPT_FP_UNAVAIL:
664 case BOOK3S_INTERRUPT_TRACE: 811 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP);
812 break;
665 case BOOK3S_INTERRUPT_ALTIVEC: 813 case BOOK3S_INTERRUPT_ALTIVEC:
814 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC);
815 break;
666 case BOOK3S_INTERRUPT_VSX: 816 case BOOK3S_INTERRUPT_VSX:
817 r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX);
818 break;
819 case BOOK3S_INTERRUPT_MACHINE_CHECK:
820 case BOOK3S_INTERRUPT_TRACE:
667 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 821 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
668 r = RESUME_GUEST; 822 r = RESUME_GUEST;
669 break; 823 break;
670 default: 824 default:
671 /* Ugh - bork here! What did we get? */ 825 /* Ugh - bork here! What did we get? */
672 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", exit_nr, vcpu->arch.pc, vcpu->arch.shadow_msr); 826 printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
827 exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1);
673 r = RESUME_HOST; 828 r = RESUME_HOST;
674 BUG(); 829 BUG();
675 break; 830 break;
@@ -712,10 +867,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
712 int i; 867 int i;
713 868
714 regs->pc = vcpu->arch.pc; 869 regs->pc = vcpu->arch.pc;
715 regs->cr = vcpu->arch.cr; 870 regs->cr = kvmppc_get_cr(vcpu);
716 regs->ctr = vcpu->arch.ctr; 871 regs->ctr = vcpu->arch.ctr;
717 regs->lr = vcpu->arch.lr; 872 regs->lr = vcpu->arch.lr;
718 regs->xer = vcpu->arch.xer; 873 regs->xer = kvmppc_get_xer(vcpu);
719 regs->msr = vcpu->arch.msr; 874 regs->msr = vcpu->arch.msr;
720 regs->srr0 = vcpu->arch.srr0; 875 regs->srr0 = vcpu->arch.srr0;
721 regs->srr1 = vcpu->arch.srr1; 876 regs->srr1 = vcpu->arch.srr1;
@@ -729,7 +884,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
729 regs->sprg7 = vcpu->arch.sprg6; 884 regs->sprg7 = vcpu->arch.sprg6;
730 885
731 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 886 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
732 regs->gpr[i] = vcpu->arch.gpr[i]; 887 regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
733 888
734 return 0; 889 return 0;
735} 890}
@@ -739,10 +894,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
739 int i; 894 int i;
740 895
741 vcpu->arch.pc = regs->pc; 896 vcpu->arch.pc = regs->pc;
742 vcpu->arch.cr = regs->cr; 897 kvmppc_set_cr(vcpu, regs->cr);
743 vcpu->arch.ctr = regs->ctr; 898 vcpu->arch.ctr = regs->ctr;
744 vcpu->arch.lr = regs->lr; 899 vcpu->arch.lr = regs->lr;
745 vcpu->arch.xer = regs->xer; 900 kvmppc_set_xer(vcpu, regs->xer);
746 kvmppc_set_msr(vcpu, regs->msr); 901 kvmppc_set_msr(vcpu, regs->msr);
747 vcpu->arch.srr0 = regs->srr0; 902 vcpu->arch.srr0 = regs->srr0;
748 vcpu->arch.srr1 = regs->srr1; 903 vcpu->arch.srr1 = regs->srr1;
@@ -754,8 +909,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
754 vcpu->arch.sprg6 = regs->sprg5; 909 vcpu->arch.sprg6 = regs->sprg5;
755 vcpu->arch.sprg7 = regs->sprg6; 910 vcpu->arch.sprg7 = regs->sprg6;
756 911
757 for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++) 912 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
758 vcpu->arch.gpr[i] = regs->gpr[i]; 913 kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
759 914
760 return 0; 915 return 0;
761} 916}
@@ -850,7 +1005,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
850 int is_dirty = 0; 1005 int is_dirty = 0;
851 int r, n; 1006 int r, n;
852 1007
853 down_write(&kvm->slots_lock); 1008 mutex_lock(&kvm->slots_lock);
854 1009
855 r = kvm_get_dirty_log(kvm, log, &is_dirty); 1010 r = kvm_get_dirty_log(kvm, log, &is_dirty);
856 if (r) 1011 if (r)
@@ -858,7 +1013,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
858 1013
859 /* If nothing is dirty, don't bother messing with page tables. */ 1014 /* If nothing is dirty, don't bother messing with page tables. */
860 if (is_dirty) { 1015 if (is_dirty) {
861 memslot = &kvm->memslots[log->slot]; 1016 memslot = &kvm->memslots->memslots[log->slot];
862 1017
863 ga = memslot->base_gfn << PAGE_SHIFT; 1018 ga = memslot->base_gfn << PAGE_SHIFT;
864 ga_end = ga + (memslot->npages << PAGE_SHIFT); 1019 ga_end = ga + (memslot->npages << PAGE_SHIFT);
@@ -872,7 +1027,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
872 1027
873 r = 0; 1028 r = 0;
874out: 1029out:
875 up_write(&kvm->slots_lock); 1030 mutex_unlock(&kvm->slots_lock);
876 return r; 1031 return r;
877} 1032}
878 1033
@@ -910,6 +1065,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
910 vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; 1065 vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
911 vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; 1066 vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
912 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; 1067 vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
1068 vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
913 1069
914 vcpu->arch.shadow_msr = MSR_USER64; 1070 vcpu->arch.shadow_msr = MSR_USER64;
915 1071
@@ -943,6 +1099,10 @@ extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
943int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) 1099int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
944{ 1100{
945 int ret; 1101 int ret;
1102 struct thread_struct ext_bkp;
1103 bool save_vec = current->thread.used_vr;
1104 bool save_vsx = current->thread.used_vsr;
1105 ulong ext_msr;
946 1106
947 /* No need to go into the guest when all we do is going out */ 1107 /* No need to go into the guest when all we do is going out */
948 if (signal_pending(current)) { 1108 if (signal_pending(current)) {
@@ -950,6 +1110,35 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
950 return -EINTR; 1110 return -EINTR;
951 } 1111 }
952 1112
1113 /* Save FPU state in stack */
1114 if (current->thread.regs->msr & MSR_FP)
1115 giveup_fpu(current);
1116 memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr));
1117 ext_bkp.fpscr = current->thread.fpscr;
1118 ext_bkp.fpexc_mode = current->thread.fpexc_mode;
1119
1120#ifdef CONFIG_ALTIVEC
1121 /* Save Altivec state in stack */
1122 if (save_vec) {
1123 if (current->thread.regs->msr & MSR_VEC)
1124 giveup_altivec(current);
1125 memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr));
1126 ext_bkp.vscr = current->thread.vscr;
1127 ext_bkp.vrsave = current->thread.vrsave;
1128 }
1129 ext_bkp.used_vr = current->thread.used_vr;
1130#endif
1131
1132#ifdef CONFIG_VSX
1133 /* Save VSX state in stack */
1134 if (save_vsx && (current->thread.regs->msr & MSR_VSX))
1135 __giveup_vsx(current);
1136 ext_bkp.used_vsr = current->thread.used_vsr;
1137#endif
1138
1139 /* Remember the MSR with disabled extensions */
1140 ext_msr = current->thread.regs->msr;
1141
953 /* XXX we get called with irq disabled - change that! */ 1142 /* XXX we get called with irq disabled - change that! */
954 local_irq_enable(); 1143 local_irq_enable();
955 1144
@@ -957,6 +1146,32 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
957 1146
958 local_irq_disable(); 1147 local_irq_disable();
959 1148
1149 current->thread.regs->msr = ext_msr;
1150
1151 /* Make sure we save the guest FPU/Altivec/VSX state */
1152 kvmppc_giveup_ext(vcpu, MSR_FP);
1153 kvmppc_giveup_ext(vcpu, MSR_VEC);
1154 kvmppc_giveup_ext(vcpu, MSR_VSX);
1155
1156 /* Restore FPU state from stack */
1157 memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr));
1158 current->thread.fpscr = ext_bkp.fpscr;
1159 current->thread.fpexc_mode = ext_bkp.fpexc_mode;
1160
1161#ifdef CONFIG_ALTIVEC
1162 /* Restore Altivec state from stack */
1163 if (save_vec && current->thread.used_vr) {
1164 memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr));
1165 current->thread.vscr = ext_bkp.vscr;
1166 current->thread.vrsave= ext_bkp.vrsave;
1167 }
1168 current->thread.used_vr = ext_bkp.used_vr;
1169#endif
1170
1171#ifdef CONFIG_VSX
1172 current->thread.used_vsr = ext_bkp.used_vsr;
1173#endif
1174
960 return ret; 1175 return ret;
961} 1176}
962 1177
diff --git a/arch/powerpc/kvm/book3s_64_emulate.c b/arch/powerpc/kvm/book3s_64_emulate.c
index 1027eac6d474..2b0ee7e040c9 100644
--- a/arch/powerpc/kvm/book3s_64_emulate.c
+++ b/arch/powerpc/kvm/book3s_64_emulate.c
@@ -65,11 +65,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
65 case 31: 65 case 31:
66 switch (get_xop(inst)) { 66 switch (get_xop(inst)) {
67 case OP_31_XOP_MFMSR: 67 case OP_31_XOP_MFMSR:
68 vcpu->arch.gpr[get_rt(inst)] = vcpu->arch.msr; 68 kvmppc_set_gpr(vcpu, get_rt(inst), vcpu->arch.msr);
69 break; 69 break;
70 case OP_31_XOP_MTMSRD: 70 case OP_31_XOP_MTMSRD:
71 { 71 {
72 ulong rs = vcpu->arch.gpr[get_rs(inst)]; 72 ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst));
73 if (inst & 0x10000) { 73 if (inst & 0x10000) {
74 vcpu->arch.msr &= ~(MSR_RI | MSR_EE); 74 vcpu->arch.msr &= ~(MSR_RI | MSR_EE);
75 vcpu->arch.msr |= rs & (MSR_RI | MSR_EE); 75 vcpu->arch.msr |= rs & (MSR_RI | MSR_EE);
@@ -78,30 +78,30 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
78 break; 78 break;
79 } 79 }
80 case OP_31_XOP_MTMSR: 80 case OP_31_XOP_MTMSR:
81 kvmppc_set_msr(vcpu, vcpu->arch.gpr[get_rs(inst)]); 81 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst)));
82 break; 82 break;
83 case OP_31_XOP_MFSRIN: 83 case OP_31_XOP_MFSRIN:
84 { 84 {
85 int srnum; 85 int srnum;
86 86
87 srnum = (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf; 87 srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf;
88 if (vcpu->arch.mmu.mfsrin) { 88 if (vcpu->arch.mmu.mfsrin) {
89 u32 sr; 89 u32 sr;
90 sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); 90 sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
91 vcpu->arch.gpr[get_rt(inst)] = sr; 91 kvmppc_set_gpr(vcpu, get_rt(inst), sr);
92 } 92 }
93 break; 93 break;
94 } 94 }
95 case OP_31_XOP_MTSRIN: 95 case OP_31_XOP_MTSRIN:
96 vcpu->arch.mmu.mtsrin(vcpu, 96 vcpu->arch.mmu.mtsrin(vcpu,
97 (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf, 97 (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf,
98 vcpu->arch.gpr[get_rs(inst)]); 98 kvmppc_get_gpr(vcpu, get_rs(inst)));
99 break; 99 break;
100 case OP_31_XOP_TLBIE: 100 case OP_31_XOP_TLBIE:
101 case OP_31_XOP_TLBIEL: 101 case OP_31_XOP_TLBIEL:
102 { 102 {
103 bool large = (inst & 0x00200000) ? true : false; 103 bool large = (inst & 0x00200000) ? true : false;
104 ulong addr = vcpu->arch.gpr[get_rb(inst)]; 104 ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst));
105 vcpu->arch.mmu.tlbie(vcpu, addr, large); 105 vcpu->arch.mmu.tlbie(vcpu, addr, large);
106 break; 106 break;
107 } 107 }
@@ -111,14 +111,16 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
111 if (!vcpu->arch.mmu.slbmte) 111 if (!vcpu->arch.mmu.slbmte)
112 return EMULATE_FAIL; 112 return EMULATE_FAIL;
113 113
114 vcpu->arch.mmu.slbmte(vcpu, vcpu->arch.gpr[get_rs(inst)], 114 vcpu->arch.mmu.slbmte(vcpu,
115 vcpu->arch.gpr[get_rb(inst)]); 115 kvmppc_get_gpr(vcpu, get_rs(inst)),
116 kvmppc_get_gpr(vcpu, get_rb(inst)));
116 break; 117 break;
117 case OP_31_XOP_SLBIE: 118 case OP_31_XOP_SLBIE:
118 if (!vcpu->arch.mmu.slbie) 119 if (!vcpu->arch.mmu.slbie)
119 return EMULATE_FAIL; 120 return EMULATE_FAIL;
120 121
121 vcpu->arch.mmu.slbie(vcpu, vcpu->arch.gpr[get_rb(inst)]); 122 vcpu->arch.mmu.slbie(vcpu,
123 kvmppc_get_gpr(vcpu, get_rb(inst)));
122 break; 124 break;
123 case OP_31_XOP_SLBIA: 125 case OP_31_XOP_SLBIA:
124 if (!vcpu->arch.mmu.slbia) 126 if (!vcpu->arch.mmu.slbia)
@@ -132,9 +134,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
132 } else { 134 } else {
133 ulong t, rb; 135 ulong t, rb;
134 136
135 rb = vcpu->arch.gpr[get_rb(inst)]; 137 rb = kvmppc_get_gpr(vcpu, get_rb(inst));
136 t = vcpu->arch.mmu.slbmfee(vcpu, rb); 138 t = vcpu->arch.mmu.slbmfee(vcpu, rb);
137 vcpu->arch.gpr[get_rt(inst)] = t; 139 kvmppc_set_gpr(vcpu, get_rt(inst), t);
138 } 140 }
139 break; 141 break;
140 case OP_31_XOP_SLBMFEV: 142 case OP_31_XOP_SLBMFEV:
@@ -143,20 +145,20 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
143 } else { 145 } else {
144 ulong t, rb; 146 ulong t, rb;
145 147
146 rb = vcpu->arch.gpr[get_rb(inst)]; 148 rb = kvmppc_get_gpr(vcpu, get_rb(inst));
147 t = vcpu->arch.mmu.slbmfev(vcpu, rb); 149 t = vcpu->arch.mmu.slbmfev(vcpu, rb);
148 vcpu->arch.gpr[get_rt(inst)] = t; 150 kvmppc_set_gpr(vcpu, get_rt(inst), t);
149 } 151 }
150 break; 152 break;
151 case OP_31_XOP_DCBZ: 153 case OP_31_XOP_DCBZ:
152 { 154 {
153 ulong rb = vcpu->arch.gpr[get_rb(inst)]; 155 ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst));
154 ulong ra = 0; 156 ulong ra = 0;
155 ulong addr; 157 ulong addr;
156 u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; 158 u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
157 159
158 if (get_ra(inst)) 160 if (get_ra(inst))
159 ra = vcpu->arch.gpr[get_ra(inst)]; 161 ra = kvmppc_get_gpr(vcpu, get_ra(inst));
160 162
161 addr = (ra + rb) & ~31ULL; 163 addr = (ra + rb) & ~31ULL;
162 if (!(vcpu->arch.msr & MSR_SF)) 164 if (!(vcpu->arch.msr & MSR_SF))
@@ -233,43 +235,44 @@ static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val)
233int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) 235int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
234{ 236{
235 int emulated = EMULATE_DONE; 237 int emulated = EMULATE_DONE;
238 ulong spr_val = kvmppc_get_gpr(vcpu, rs);
236 239
237 switch (sprn) { 240 switch (sprn) {
238 case SPRN_SDR1: 241 case SPRN_SDR1:
239 to_book3s(vcpu)->sdr1 = vcpu->arch.gpr[rs]; 242 to_book3s(vcpu)->sdr1 = spr_val;
240 break; 243 break;
241 case SPRN_DSISR: 244 case SPRN_DSISR:
242 to_book3s(vcpu)->dsisr = vcpu->arch.gpr[rs]; 245 to_book3s(vcpu)->dsisr = spr_val;
243 break; 246 break;
244 case SPRN_DAR: 247 case SPRN_DAR:
245 vcpu->arch.dear = vcpu->arch.gpr[rs]; 248 vcpu->arch.dear = spr_val;
246 break; 249 break;
247 case SPRN_HIOR: 250 case SPRN_HIOR:
248 to_book3s(vcpu)->hior = vcpu->arch.gpr[rs]; 251 to_book3s(vcpu)->hior = spr_val;
249 break; 252 break;
250 case SPRN_IBAT0U ... SPRN_IBAT3L: 253 case SPRN_IBAT0U ... SPRN_IBAT3L:
251 case SPRN_IBAT4U ... SPRN_IBAT7L: 254 case SPRN_IBAT4U ... SPRN_IBAT7L:
252 case SPRN_DBAT0U ... SPRN_DBAT3L: 255 case SPRN_DBAT0U ... SPRN_DBAT3L:
253 case SPRN_DBAT4U ... SPRN_DBAT7L: 256 case SPRN_DBAT4U ... SPRN_DBAT7L:
254 kvmppc_write_bat(vcpu, sprn, (u32)vcpu->arch.gpr[rs]); 257 kvmppc_write_bat(vcpu, sprn, (u32)spr_val);
255 /* BAT writes happen so rarely that we're ok to flush 258 /* BAT writes happen so rarely that we're ok to flush
256 * everything here */ 259 * everything here */
257 kvmppc_mmu_pte_flush(vcpu, 0, 0); 260 kvmppc_mmu_pte_flush(vcpu, 0, 0);
258 break; 261 break;
259 case SPRN_HID0: 262 case SPRN_HID0:
260 to_book3s(vcpu)->hid[0] = vcpu->arch.gpr[rs]; 263 to_book3s(vcpu)->hid[0] = spr_val;
261 break; 264 break;
262 case SPRN_HID1: 265 case SPRN_HID1:
263 to_book3s(vcpu)->hid[1] = vcpu->arch.gpr[rs]; 266 to_book3s(vcpu)->hid[1] = spr_val;
264 break; 267 break;
265 case SPRN_HID2: 268 case SPRN_HID2:
266 to_book3s(vcpu)->hid[2] = vcpu->arch.gpr[rs]; 269 to_book3s(vcpu)->hid[2] = spr_val;
267 break; 270 break;
268 case SPRN_HID4: 271 case SPRN_HID4:
269 to_book3s(vcpu)->hid[4] = vcpu->arch.gpr[rs]; 272 to_book3s(vcpu)->hid[4] = spr_val;
270 break; 273 break;
271 case SPRN_HID5: 274 case SPRN_HID5:
272 to_book3s(vcpu)->hid[5] = vcpu->arch.gpr[rs]; 275 to_book3s(vcpu)->hid[5] = spr_val;
273 /* guest HID5 set can change is_dcbz32 */ 276 /* guest HID5 set can change is_dcbz32 */
274 if (vcpu->arch.mmu.is_dcbz32(vcpu) && 277 if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
275 (mfmsr() & MSR_HV)) 278 (mfmsr() & MSR_HV))
@@ -299,38 +302,38 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
299 302
300 switch (sprn) { 303 switch (sprn) {
301 case SPRN_SDR1: 304 case SPRN_SDR1:
302 vcpu->arch.gpr[rt] = to_book3s(vcpu)->sdr1; 305 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
303 break; 306 break;
304 case SPRN_DSISR: 307 case SPRN_DSISR:
305 vcpu->arch.gpr[rt] = to_book3s(vcpu)->dsisr; 308 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->dsisr);
306 break; 309 break;
307 case SPRN_DAR: 310 case SPRN_DAR:
308 vcpu->arch.gpr[rt] = vcpu->arch.dear; 311 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear);
309 break; 312 break;
310 case SPRN_HIOR: 313 case SPRN_HIOR:
311 vcpu->arch.gpr[rt] = to_book3s(vcpu)->hior; 314 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior);
312 break; 315 break;
313 case SPRN_HID0: 316 case SPRN_HID0:
314 vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[0]; 317 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]);
315 break; 318 break;
316 case SPRN_HID1: 319 case SPRN_HID1:
317 vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[1]; 320 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]);
318 break; 321 break;
319 case SPRN_HID2: 322 case SPRN_HID2:
320 vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[2]; 323 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]);
321 break; 324 break;
322 case SPRN_HID4: 325 case SPRN_HID4:
323 vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[4]; 326 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]);
324 break; 327 break;
325 case SPRN_HID5: 328 case SPRN_HID5:
326 vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[5]; 329 kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
327 break; 330 break;
328 case SPRN_THRM1: 331 case SPRN_THRM1:
329 case SPRN_THRM2: 332 case SPRN_THRM2:
330 case SPRN_THRM3: 333 case SPRN_THRM3:
331 case SPRN_CTRLF: 334 case SPRN_CTRLF:
332 case SPRN_CTRLT: 335 case SPRN_CTRLT:
333 vcpu->arch.gpr[rt] = 0; 336 kvmppc_set_gpr(vcpu, rt, 0);
334 break; 337 break;
335 default: 338 default:
336 printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); 339 printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
diff --git a/arch/powerpc/kvm/book3s_64_exports.c b/arch/powerpc/kvm/book3s_64_exports.c
index 5b2db38ed86c..1dd5a1ddfd0d 100644
--- a/arch/powerpc/kvm/book3s_64_exports.c
+++ b/arch/powerpc/kvm/book3s_64_exports.c
@@ -22,3 +22,11 @@
22 22
23EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter); 23EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter);
24EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem); 24EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem);
25EXPORT_SYMBOL_GPL(kvmppc_rmcall);
26EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
27#ifdef CONFIG_ALTIVEC
28EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
29#endif
30#ifdef CONFIG_VSX
31EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
32#endif
diff --git a/arch/powerpc/kvm/book3s_64_interrupts.S b/arch/powerpc/kvm/book3s_64_interrupts.S
index 7b55d8094c8b..c1584d0cbce8 100644
--- a/arch/powerpc/kvm/book3s_64_interrupts.S
+++ b/arch/powerpc/kvm/book3s_64_interrupts.S
@@ -28,11 +28,6 @@
28#define ULONG_SIZE 8 28#define ULONG_SIZE 8
29#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE)) 29#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE))
30 30
31.macro mfpaca tmp_reg, src_reg, offset, vcpu_reg
32 ld \tmp_reg, (PACA_EXMC+\offset)(r13)
33 std \tmp_reg, VCPU_GPR(\src_reg)(\vcpu_reg)
34.endm
35
36.macro DISABLE_INTERRUPTS 31.macro DISABLE_INTERRUPTS
37 mfmsr r0 32 mfmsr r0
38 rldicl r0,r0,48,1 33 rldicl r0,r0,48,1
@@ -40,6 +35,26 @@
40 mtmsrd r0,1 35 mtmsrd r0,1
41.endm 36.endm
42 37
38#define VCPU_LOAD_NVGPRS(vcpu) \
39 ld r14, VCPU_GPR(r14)(vcpu); \
40 ld r15, VCPU_GPR(r15)(vcpu); \
41 ld r16, VCPU_GPR(r16)(vcpu); \
42 ld r17, VCPU_GPR(r17)(vcpu); \
43 ld r18, VCPU_GPR(r18)(vcpu); \
44 ld r19, VCPU_GPR(r19)(vcpu); \
45 ld r20, VCPU_GPR(r20)(vcpu); \
46 ld r21, VCPU_GPR(r21)(vcpu); \
47 ld r22, VCPU_GPR(r22)(vcpu); \
48 ld r23, VCPU_GPR(r23)(vcpu); \
49 ld r24, VCPU_GPR(r24)(vcpu); \
50 ld r25, VCPU_GPR(r25)(vcpu); \
51 ld r26, VCPU_GPR(r26)(vcpu); \
52 ld r27, VCPU_GPR(r27)(vcpu); \
53 ld r28, VCPU_GPR(r28)(vcpu); \
54 ld r29, VCPU_GPR(r29)(vcpu); \
55 ld r30, VCPU_GPR(r30)(vcpu); \
56 ld r31, VCPU_GPR(r31)(vcpu); \
57
43/***************************************************************************** 58/*****************************************************************************
44 * * 59 * *
45 * Guest entry / exit code that is in kernel module memory (highmem) * 60 * Guest entry / exit code that is in kernel module memory (highmem) *
@@ -67,61 +82,32 @@ kvm_start_entry:
67 SAVE_NVGPRS(r1) 82 SAVE_NVGPRS(r1)
68 83
69 /* Save LR */ 84 /* Save LR */
70 mflr r14 85 std r0, _LINK(r1)
71 std r14, _LINK(r1)
72
73/* XXX optimize non-volatile loading away */
74kvm_start_lightweight:
75 86
76 DISABLE_INTERRUPTS 87 /* Load non-volatile guest state from the vcpu */
88 VCPU_LOAD_NVGPRS(r4)
77 89
78 /* Save R1/R2 in the PACA */ 90 /* Save R1/R2 in the PACA */
79 std r1, PACAR1(r13) 91 std r1, PACA_KVM_HOST_R1(r13)
80 std r2, (PACA_EXMC+EX_SRR0)(r13) 92 std r2, PACA_KVM_HOST_R2(r13)
93
94 /* XXX swap in/out on load? */
81 ld r3, VCPU_HIGHMEM_HANDLER(r4) 95 ld r3, VCPU_HIGHMEM_HANDLER(r4)
82 std r3, PACASAVEDMSR(r13) 96 std r3, PACA_KVM_VMHANDLER(r13)
83 97
84 /* Load non-volatile guest state from the vcpu */ 98kvm_start_lightweight:
85 ld r14, VCPU_GPR(r14)(r4)
86 ld r15, VCPU_GPR(r15)(r4)
87 ld r16, VCPU_GPR(r16)(r4)
88 ld r17, VCPU_GPR(r17)(r4)
89 ld r18, VCPU_GPR(r18)(r4)
90 ld r19, VCPU_GPR(r19)(r4)
91 ld r20, VCPU_GPR(r20)(r4)
92 ld r21, VCPU_GPR(r21)(r4)
93 ld r22, VCPU_GPR(r22)(r4)
94 ld r23, VCPU_GPR(r23)(r4)
95 ld r24, VCPU_GPR(r24)(r4)
96 ld r25, VCPU_GPR(r25)(r4)
97 ld r26, VCPU_GPR(r26)(r4)
98 ld r27, VCPU_GPR(r27)(r4)
99 ld r28, VCPU_GPR(r28)(r4)
100 ld r29, VCPU_GPR(r29)(r4)
101 ld r30, VCPU_GPR(r30)(r4)
102 ld r31, VCPU_GPR(r31)(r4)
103 99
104 ld r9, VCPU_PC(r4) /* r9 = vcpu->arch.pc */ 100 ld r9, VCPU_PC(r4) /* r9 = vcpu->arch.pc */
105 ld r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ 101 ld r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */
106 102
107 ld r3, VCPU_TRAMPOLINE_ENTER(r4) 103 /* Load some guest state in the respective registers */
108 mtsrr0 r3 104 ld r5, VCPU_CTR(r4) /* r5 = vcpu->arch.ctr */
109 105 /* will be swapped in by rmcall */
110 LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
111 mtsrr1 r3
112
113 /* Load guest state in the respective registers */
114 lwz r3, VCPU_CR(r4) /* r3 = vcpu->arch.cr */
115 stw r3, (PACA_EXMC + EX_CCR)(r13)
116
117 ld r3, VCPU_CTR(r4) /* r3 = vcpu->arch.ctr */
118 mtctr r3 /* CTR = r3 */
119 106
120 ld r3, VCPU_LR(r4) /* r3 = vcpu->arch.lr */ 107 ld r3, VCPU_LR(r4) /* r3 = vcpu->arch.lr */
121 mtlr r3 /* LR = r3 */ 108 mtlr r3 /* LR = r3 */
122 109
123 ld r3, VCPU_XER(r4) /* r3 = vcpu->arch.xer */ 110 DISABLE_INTERRUPTS
124 std r3, (PACA_EXMC + EX_R3)(r13)
125 111
126 /* Some guests may need to have dcbz set to 32 byte length. 112 /* Some guests may need to have dcbz set to 32 byte length.
127 * 113 *
@@ -141,36 +127,15 @@ kvm_start_lightweight:
141 mtspr SPRN_HID5,r3 127 mtspr SPRN_HID5,r3
142 128
143no_dcbz32_on: 129no_dcbz32_on:
144 /* Load guest GPRs */ 130
145 131 ld r6, VCPU_RMCALL(r4)
146 ld r3, VCPU_GPR(r9)(r4) 132 mtctr r6
147 std r3, (PACA_EXMC + EX_R9)(r13) 133
148 ld r3, VCPU_GPR(r10)(r4) 134 ld r3, VCPU_TRAMPOLINE_ENTER(r4)
149 std r3, (PACA_EXMC + EX_R10)(r13) 135 LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR))
150 ld r3, VCPU_GPR(r11)(r4)
151 std r3, (PACA_EXMC + EX_R11)(r13)
152 ld r3, VCPU_GPR(r12)(r4)
153 std r3, (PACA_EXMC + EX_R12)(r13)
154 ld r3, VCPU_GPR(r13)(r4)
155 std r3, (PACA_EXMC + EX_R13)(r13)
156
157 ld r0, VCPU_GPR(r0)(r4)
158 ld r1, VCPU_GPR(r1)(r4)
159 ld r2, VCPU_GPR(r2)(r4)
160 ld r3, VCPU_GPR(r3)(r4)
161 ld r5, VCPU_GPR(r5)(r4)
162 ld r6, VCPU_GPR(r6)(r4)
163 ld r7, VCPU_GPR(r7)(r4)
164 ld r8, VCPU_GPR(r8)(r4)
165 ld r4, VCPU_GPR(r4)(r4)
166
167 /* This sets the Magic value for the trampoline */
168
169 li r11, 1
170 stb r11, PACA_KVM_IN_GUEST(r13)
171 136
172 /* Jump to SLB patching handlder and into our guest */ 137 /* Jump to SLB patching handlder and into our guest */
173 RFI 138 bctr
174 139
175/* 140/*
176 * This is the handler in module memory. It gets jumped at from the 141 * This is the handler in module memory. It gets jumped at from the
@@ -184,125 +149,70 @@ kvmppc_handler_highmem:
184 /* 149 /*
185 * Register usage at this point: 150 * Register usage at this point:
186 * 151 *
187 * R00 = guest R13 152 * R0 = guest last inst
188 * R01 = host R1 153 * R1 = host R1
189 * R02 = host R2 154 * R2 = host R2
190 * R10 = guest PC 155 * R3 = guest PC
191 * R11 = guest MSR 156 * R4 = guest MSR
192 * R12 = exit handler id 157 * R5 = guest DAR
193 * R13 = PACA 158 * R6 = guest DSISR
194 * PACA.exmc.R9 = guest R1 159 * R13 = PACA
195 * PACA.exmc.R10 = guest R10 160 * PACA.KVM.* = guest *
196 * PACA.exmc.R11 = guest R11
197 * PACA.exmc.R12 = guest R12
198 * PACA.exmc.R13 = guest R2
199 * PACA.exmc.DAR = guest DAR
200 * PACA.exmc.DSISR = guest DSISR
201 * PACA.exmc.LR = guest instruction
202 * PACA.exmc.CCR = guest CR
203 * PACA.exmc.SRR0 = guest R0
204 * 161 *
205 */ 162 */
206 163
207 std r3, (PACA_EXMC+EX_R3)(r13) 164 /* R7 = vcpu */
165 ld r7, GPR4(r1)
208 166
209 /* save the exit id in R3 */ 167 /* Now save the guest state */
210 mr r3, r12
211 168
212 /* R12 = vcpu */ 169 stw r0, VCPU_LAST_INST(r7)
213 ld r12, GPR4(r1)
214 170
215 /* Now save the guest state */ 171 std r3, VCPU_PC(r7)
172 std r4, VCPU_SHADOW_SRR1(r7)
173 std r5, VCPU_FAULT_DEAR(r7)
174 std r6, VCPU_FAULT_DSISR(r7)
216 175
217 std r0, VCPU_GPR(r13)(r12) 176 ld r5, VCPU_HFLAGS(r7)
218 std r4, VCPU_GPR(r4)(r12)
219 std r5, VCPU_GPR(r5)(r12)
220 std r6, VCPU_GPR(r6)(r12)
221 std r7, VCPU_GPR(r7)(r12)
222 std r8, VCPU_GPR(r8)(r12)
223 std r9, VCPU_GPR(r9)(r12)
224
225 /* get registers from PACA */
226 mfpaca r5, r0, EX_SRR0, r12
227 mfpaca r5, r3, EX_R3, r12
228 mfpaca r5, r1, EX_R9, r12
229 mfpaca r5, r10, EX_R10, r12
230 mfpaca r5, r11, EX_R11, r12
231 mfpaca r5, r12, EX_R12, r12
232 mfpaca r5, r2, EX_R13, r12
233
234 lwz r5, (PACA_EXMC+EX_LR)(r13)
235 stw r5, VCPU_LAST_INST(r12)
236
237 lwz r5, (PACA_EXMC+EX_CCR)(r13)
238 stw r5, VCPU_CR(r12)
239
240 ld r5, VCPU_HFLAGS(r12)
241 rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ 177 rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */
242 beq no_dcbz32_off 178 beq no_dcbz32_off
243 179
180 li r4, 0
244 mfspr r5,SPRN_HID5 181 mfspr r5,SPRN_HID5
245 rldimi r5,r5,6,56 182 rldimi r5,r4,6,56
246 mtspr SPRN_HID5,r5 183 mtspr SPRN_HID5,r5
247 184
248no_dcbz32_off: 185no_dcbz32_off:
249 186
250 /* XXX maybe skip on lightweight? */ 187 std r14, VCPU_GPR(r14)(r7)
251 std r14, VCPU_GPR(r14)(r12) 188 std r15, VCPU_GPR(r15)(r7)
252 std r15, VCPU_GPR(r15)(r12) 189 std r16, VCPU_GPR(r16)(r7)
253 std r16, VCPU_GPR(r16)(r12) 190 std r17, VCPU_GPR(r17)(r7)
254 std r17, VCPU_GPR(r17)(r12) 191 std r18, VCPU_GPR(r18)(r7)
255 std r18, VCPU_GPR(r18)(r12) 192 std r19, VCPU_GPR(r19)(r7)
256 std r19, VCPU_GPR(r19)(r12) 193 std r20, VCPU_GPR(r20)(r7)
257 std r20, VCPU_GPR(r20)(r12) 194 std r21, VCPU_GPR(r21)(r7)
258 std r21, VCPU_GPR(r21)(r12) 195 std r22, VCPU_GPR(r22)(r7)
259 std r22, VCPU_GPR(r22)(r12) 196 std r23, VCPU_GPR(r23)(r7)
260 std r23, VCPU_GPR(r23)(r12) 197 std r24, VCPU_GPR(r24)(r7)
261 std r24, VCPU_GPR(r24)(r12) 198 std r25, VCPU_GPR(r25)(r7)
262 std r25, VCPU_GPR(r25)(r12) 199 std r26, VCPU_GPR(r26)(r7)
263 std r26, VCPU_GPR(r26)(r12) 200 std r27, VCPU_GPR(r27)(r7)
264 std r27, VCPU_GPR(r27)(r12) 201 std r28, VCPU_GPR(r28)(r7)
265 std r28, VCPU_GPR(r28)(r12) 202 std r29, VCPU_GPR(r29)(r7)
266 std r29, VCPU_GPR(r29)(r12) 203 std r30, VCPU_GPR(r30)(r7)
267 std r30, VCPU_GPR(r30)(r12) 204 std r31, VCPU_GPR(r31)(r7)
268 std r31, VCPU_GPR(r31)(r12) 205
269 206 /* Save guest CTR */
270 /* Restore non-volatile host registers (r14 - r31) */
271 REST_NVGPRS(r1)
272
273 /* Save guest PC (R10) */
274 std r10, VCPU_PC(r12)
275
276 /* Save guest msr (R11) */
277 std r11, VCPU_SHADOW_MSR(r12)
278
279 /* Save guest CTR (in R12) */
280 mfctr r5 207 mfctr r5
281 std r5, VCPU_CTR(r12) 208 std r5, VCPU_CTR(r7)
282 209
283 /* Save guest LR */ 210 /* Save guest LR */
284 mflr r5 211 mflr r5
285 std r5, VCPU_LR(r12) 212 std r5, VCPU_LR(r7)
286
287 /* Save guest XER */
288 mfxer r5
289 std r5, VCPU_XER(r12)
290
291 /* Save guest DAR */
292 ld r5, (PACA_EXMC+EX_DAR)(r13)
293 std r5, VCPU_FAULT_DEAR(r12)
294
295 /* Save guest DSISR */
296 lwz r5, (PACA_EXMC+EX_DSISR)(r13)
297 std r5, VCPU_FAULT_DSISR(r12)
298 213
299 /* Restore host msr -> SRR1 */ 214 /* Restore host msr -> SRR1 */
300 ld r7, VCPU_HOST_MSR(r12) 215 ld r6, VCPU_HOST_MSR(r7)
301 mtsrr1 r7
302
303 /* Restore host IP -> SRR0 */
304 ld r6, VCPU_HOST_RETIP(r12)
305 mtsrr0 r6
306 216
307 /* 217 /*
308 * For some interrupts, we need to call the real Linux 218 * For some interrupts, we need to call the real Linux
@@ -314,13 +224,14 @@ no_dcbz32_off:
314 * r3 = address of interrupt handler (exit reason) 224 * r3 = address of interrupt handler (exit reason)
315 */ 225 */
316 226
317 cmpwi r3, BOOK3S_INTERRUPT_EXTERNAL 227 cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
318 beq call_linux_handler 228 beq call_linux_handler
319 cmpwi r3, BOOK3S_INTERRUPT_DECREMENTER 229 cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
320 beq call_linux_handler 230 beq call_linux_handler
321 231
322 /* Back to Interruptable Mode! (goto kvm_return_point) */ 232 /* Back to EE=1 */
323 RFI 233 mtmsr r6
234 b kvm_return_point
324 235
325call_linux_handler: 236call_linux_handler:
326 237
@@ -333,16 +244,22 @@ call_linux_handler:
333 * interrupt handler! 244 * interrupt handler!
334 * 245 *
335 * R3 still contains the exit code, 246 * R3 still contains the exit code,
336 * R6 VCPU_HOST_RETIP and 247 * R5 VCPU_HOST_RETIP and
337 * R7 VCPU_HOST_MSR 248 * R6 VCPU_HOST_MSR
338 */ 249 */
339 250
340 mtlr r3 251 /* Restore host IP -> SRR0 */
252 ld r5, VCPU_HOST_RETIP(r7)
253
254 /* XXX Better move to a safe function?
255 * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */
341 256
342 ld r5, VCPU_TRAMPOLINE_LOWMEM(r12) 257 mtlr r12
343 mtsrr0 r5 258
344 LOAD_REG_IMMEDIATE(r5, MSR_KERNEL & ~(MSR_IR | MSR_DR)) 259 ld r4, VCPU_TRAMPOLINE_LOWMEM(r7)
345 mtsrr1 r5 260 mtsrr0 r4
261 LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
262 mtsrr1 r3
346 263
347 RFI 264 RFI
348 265
@@ -351,42 +268,51 @@ kvm_return_point:
351 268
352 /* Jump back to lightweight entry if we're supposed to */ 269 /* Jump back to lightweight entry if we're supposed to */
353 /* go back into the guest */ 270 /* go back into the guest */
354 mr r5, r3 271
272 /* Pass the exit number as 3rd argument to kvmppc_handle_exit */
273 mr r5, r12
274
355 /* Restore r3 (kvm_run) and r4 (vcpu) */ 275 /* Restore r3 (kvm_run) and r4 (vcpu) */
356 REST_2GPRS(3, r1) 276 REST_2GPRS(3, r1)
357 bl KVMPPC_HANDLE_EXIT 277 bl KVMPPC_HANDLE_EXIT
358 278
359#if 0 /* XXX get lightweight exits back */ 279 /* If RESUME_GUEST, get back in the loop */
360 cmpwi r3, RESUME_GUEST 280 cmpwi r3, RESUME_GUEST
361 bne kvm_exit_heavyweight 281 beq kvm_loop_lightweight
362 282
363 /* put VCPU and KVM_RUN back into place and roll again! */ 283 cmpwi r3, RESUME_GUEST_NV
364 REST_2GPRS(3, r1) 284 beq kvm_loop_heavyweight
365 b kvm_start_lightweight
366 285
367kvm_exit_heavyweight: 286kvm_exit_loop:
368 /* Restore non-volatile host registers */
369 ld r14, _LINK(r1)
370 mtlr r14
371 REST_NVGPRS(r1)
372 287
373 addi r1, r1, SWITCH_FRAME_SIZE
374#else
375 ld r4, _LINK(r1) 288 ld r4, _LINK(r1)
376 mtlr r4 289 mtlr r4
377 290
378 cmpwi r3, RESUME_GUEST 291 /* Restore non-volatile host registers (r14 - r31) */
379 bne kvm_exit_heavyweight 292 REST_NVGPRS(r1)
293
294 addi r1, r1, SWITCH_FRAME_SIZE
295 blr
296
297kvm_loop_heavyweight:
298
299 ld r4, _LINK(r1)
300 std r4, (16 + SWITCH_FRAME_SIZE)(r1)
380 301
302 /* Load vcpu and cpu_run */
381 REST_2GPRS(3, r1) 303 REST_2GPRS(3, r1)
382 304
383 addi r1, r1, SWITCH_FRAME_SIZE 305 /* Load non-volatile guest state from the vcpu */
306 VCPU_LOAD_NVGPRS(r4)
384 307
385 b kvm_start_entry 308 /* Jump back into the beginning of this function */
309 b kvm_start_lightweight
386 310
387kvm_exit_heavyweight: 311kvm_loop_lightweight:
388 312
389 addi r1, r1, SWITCH_FRAME_SIZE 313 /* We'll need the vcpu pointer */
390#endif 314 REST_GPR(4, r1)
315
316 /* Jump back into the beginning of this function */
317 b kvm_start_lightweight
391 318
392 blr
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index e4beeb371a73..512dcff77554 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -54,7 +54,7 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
54 if (!vcpu_book3s->slb[i].valid) 54 if (!vcpu_book3s->slb[i].valid)
55 continue; 55 continue;
56 56
57 if (vcpu_book3s->slb[i].large) 57 if (vcpu_book3s->slb[i].tb)
58 cmp_esid = esid_1t; 58 cmp_esid = esid_1t;
59 59
60 if (vcpu_book3s->slb[i].esid == cmp_esid) 60 if (vcpu_book3s->slb[i].esid == cmp_esid)
@@ -65,9 +65,10 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
65 eaddr, esid, esid_1t); 65 eaddr, esid, esid_1t);
66 for (i = 0; i < vcpu_book3s->slb_nr; i++) { 66 for (i = 0; i < vcpu_book3s->slb_nr; i++) {
67 if (vcpu_book3s->slb[i].vsid) 67 if (vcpu_book3s->slb[i].vsid)
68 dprintk(" %d: %c%c %llx %llx\n", i, 68 dprintk(" %d: %c%c%c %llx %llx\n", i,
69 vcpu_book3s->slb[i].valid ? 'v' : ' ', 69 vcpu_book3s->slb[i].valid ? 'v' : ' ',
70 vcpu_book3s->slb[i].large ? 'l' : ' ', 70 vcpu_book3s->slb[i].large ? 'l' : ' ',
71 vcpu_book3s->slb[i].tb ? 't' : ' ',
71 vcpu_book3s->slb[i].esid, 72 vcpu_book3s->slb[i].esid,
72 vcpu_book3s->slb[i].vsid); 73 vcpu_book3s->slb[i].vsid);
73 } 74 }
@@ -84,7 +85,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
84 if (!slb) 85 if (!slb)
85 return 0; 86 return 0;
86 87
87 if (slb->large) 88 if (slb->tb)
88 return (((u64)eaddr >> 12) & 0xfffffff) | 89 return (((u64)eaddr >> 12) & 0xfffffff) |
89 (((u64)slb->vsid) << 28); 90 (((u64)slb->vsid) << 28);
90 91
@@ -309,7 +310,8 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
309 slbe = &vcpu_book3s->slb[slb_nr]; 310 slbe = &vcpu_book3s->slb[slb_nr];
310 311
311 slbe->large = (rs & SLB_VSID_L) ? 1 : 0; 312 slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
312 slbe->esid = slbe->large ? esid_1t : esid; 313 slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0;
314 slbe->esid = slbe->tb ? esid_1t : esid;
313 slbe->vsid = rs >> 12; 315 slbe->vsid = rs >> 12;
314 slbe->valid = (rb & SLB_ESID_V) ? 1 : 0; 316 slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
315 slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0; 317 slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0;
diff --git a/arch/powerpc/kvm/book3s_64_rmhandlers.S b/arch/powerpc/kvm/book3s_64_rmhandlers.S
index fb7dd2e9ac88..c83c60ad96c5 100644
--- a/arch/powerpc/kvm/book3s_64_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_64_rmhandlers.S
@@ -45,36 +45,25 @@ kvmppc_trampoline_\intno:
45 * To distinguish, we check a magic byte in the PACA 45 * To distinguish, we check a magic byte in the PACA
46 */ 46 */
47 mfspr r13, SPRN_SPRG_PACA /* r13 = PACA */ 47 mfspr r13, SPRN_SPRG_PACA /* r13 = PACA */
48 std r12, (PACA_EXMC + EX_R12)(r13) 48 std r12, PACA_KVM_SCRATCH0(r13)
49 mfcr r12 49 mfcr r12
50 stw r12, (PACA_EXMC + EX_CCR)(r13) 50 stw r12, PACA_KVM_SCRATCH1(r13)
51 lbz r12, PACA_KVM_IN_GUEST(r13) 51 lbz r12, PACA_KVM_IN_GUEST(r13)
52 cmpwi r12, 0 52 cmpwi r12, KVM_GUEST_MODE_NONE
53 bne ..kvmppc_handler_hasmagic_\intno 53 bne ..kvmppc_handler_hasmagic_\intno
54 /* No KVM guest? Then jump back to the Linux handler! */ 54 /* No KVM guest? Then jump back to the Linux handler! */
55 lwz r12, (PACA_EXMC + EX_CCR)(r13) 55 lwz r12, PACA_KVM_SCRATCH1(r13)
56 mtcr r12 56 mtcr r12
57 ld r12, (PACA_EXMC + EX_R12)(r13) 57 ld r12, PACA_KVM_SCRATCH0(r13)
58 mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ 58 mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */
59 b kvmppc_resume_\intno /* Get back original handler */ 59 b kvmppc_resume_\intno /* Get back original handler */
60 60
61 /* Now we know we're handling a KVM guest */ 61 /* Now we know we're handling a KVM guest */
62..kvmppc_handler_hasmagic_\intno: 62..kvmppc_handler_hasmagic_\intno:
63 /* Unset guest state */
64 li r12, 0
65 stb r12, PACA_KVM_IN_GUEST(r13)
66 63
67 std r1, (PACA_EXMC+EX_R9)(r13) 64 /* Should we just skip the faulting instruction? */
68 std r10, (PACA_EXMC+EX_R10)(r13) 65 cmpwi r12, KVM_GUEST_MODE_SKIP
69 std r11, (PACA_EXMC+EX_R11)(r13) 66 beq kvmppc_handler_skip_ins
70 std r2, (PACA_EXMC+EX_R13)(r13)
71
72 mfsrr0 r10
73 mfsrr1 r11
74
75 /* Restore R1/R2 so we can handle faults */
76 ld r1, PACAR1(r13)
77 ld r2, (PACA_EXMC+EX_SRR0)(r13)
78 67
79 /* Let's store which interrupt we're handling */ 68 /* Let's store which interrupt we're handling */
80 li r12, \intno 69 li r12, \intno
@@ -102,23 +91,107 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC
102INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX 91INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX
103 92
104/* 93/*
94 * Bring us back to the faulting code, but skip the
95 * faulting instruction.
96 *
97 * This is a generic exit path from the interrupt
98 * trampolines above.
99 *
100 * Input Registers:
101 *
102 * R12 = free
103 * R13 = PACA
104 * PACA.KVM.SCRATCH0 = guest R12
105 * PACA.KVM.SCRATCH1 = guest CR
106 * SPRG_SCRATCH0 = guest R13
107 *
108 */
109kvmppc_handler_skip_ins:
110
111 /* Patch the IP to the next instruction */
112 mfsrr0 r12
113 addi r12, r12, 4
114 mtsrr0 r12
115
116 /* Clean up all state */
117 lwz r12, PACA_KVM_SCRATCH1(r13)
118 mtcr r12
119 ld r12, PACA_KVM_SCRATCH0(r13)
120 mfspr r13, SPRN_SPRG_SCRATCH0
121
122 /* And get back into the code */
123 RFI
124
125/*
105 * This trampoline brings us back to a real mode handler 126 * This trampoline brings us back to a real mode handler
106 * 127 *
107 * Input Registers: 128 * Input Registers:
108 * 129 *
109 * R6 = SRR0 130 * R5 = SRR0
110 * R7 = SRR1 131 * R6 = SRR1
111 * LR = real-mode IP 132 * LR = real-mode IP
112 * 133 *
113 */ 134 */
114.global kvmppc_handler_lowmem_trampoline 135.global kvmppc_handler_lowmem_trampoline
115kvmppc_handler_lowmem_trampoline: 136kvmppc_handler_lowmem_trampoline:
116 137
117 mtsrr0 r6 138 mtsrr0 r5
118 mtsrr1 r7 139 mtsrr1 r6
119 blr 140 blr
120kvmppc_handler_lowmem_trampoline_end: 141kvmppc_handler_lowmem_trampoline_end:
121 142
143/*
144 * Call a function in real mode
145 *
146 * Input Registers:
147 *
148 * R3 = function
149 * R4 = MSR
150 * R5 = CTR
151 *
152 */
153_GLOBAL(kvmppc_rmcall)
154 mtmsr r4 /* Disable relocation, so mtsrr
155 doesn't get interrupted */
156 mtctr r5
157 mtsrr0 r3
158 mtsrr1 r4
159 RFI
160
161/*
162 * Activate current's external feature (FPU/Altivec/VSX)
163 */
164#define define_load_up(what) \
165 \
166_GLOBAL(kvmppc_load_up_ ## what); \
167 subi r1, r1, INT_FRAME_SIZE; \
168 mflr r3; \
169 std r3, _LINK(r1); \
170 mfmsr r4; \
171 std r31, GPR3(r1); \
172 mr r31, r4; \
173 li r5, MSR_DR; \
174 oris r5, r5, MSR_EE@h; \
175 andc r4, r4, r5; \
176 mtmsr r4; \
177 \
178 bl .load_up_ ## what; \
179 \
180 mtmsr r31; \
181 ld r3, _LINK(r1); \
182 ld r31, GPR3(r1); \
183 addi r1, r1, INT_FRAME_SIZE; \
184 mtlr r3; \
185 blr
186
187define_load_up(fpu)
188#ifdef CONFIG_ALTIVEC
189define_load_up(altivec)
190#endif
191#ifdef CONFIG_VSX
192define_load_up(vsx)
193#endif
194
122.global kvmppc_trampoline_lowmem 195.global kvmppc_trampoline_lowmem
123kvmppc_trampoline_lowmem: 196kvmppc_trampoline_lowmem:
124 .long kvmppc_handler_lowmem_trampoline - _stext 197 .long kvmppc_handler_lowmem_trampoline - _stext
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index ecd237a03fd0..35b762722187 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -31,7 +31,7 @@
31#define REBOLT_SLB_ENTRY(num) \ 31#define REBOLT_SLB_ENTRY(num) \
32 ld r10, SHADOW_SLB_ESID(num)(r11); \ 32 ld r10, SHADOW_SLB_ESID(num)(r11); \
33 cmpdi r10, 0; \ 33 cmpdi r10, 0; \
34 beq slb_exit_skip_1; \ 34 beq slb_exit_skip_ ## num; \
35 oris r10, r10, SLB_ESID_V@h; \ 35 oris r10, r10, SLB_ESID_V@h; \
36 ld r9, SHADOW_SLB_VSID(num)(r11); \ 36 ld r9, SHADOW_SLB_VSID(num)(r11); \
37 slbmte r9, r10; \ 37 slbmte r9, r10; \
@@ -51,23 +51,21 @@ kvmppc_handler_trampoline_enter:
51 * 51 *
52 * MSR = ~IR|DR 52 * MSR = ~IR|DR
53 * R13 = PACA 53 * R13 = PACA
54 * R1 = host R1
55 * R2 = host R2
54 * R9 = guest IP 56 * R9 = guest IP
55 * R10 = guest MSR 57 * R10 = guest MSR
56 * R11 = free 58 * all other GPRS = free
57 * R12 = free 59 * PACA[KVM_CR] = guest CR
58 * PACA[PACA_EXMC + EX_R9] = guest R9 60 * PACA[KVM_XER] = guest XER
59 * PACA[PACA_EXMC + EX_R10] = guest R10
60 * PACA[PACA_EXMC + EX_R11] = guest R11
61 * PACA[PACA_EXMC + EX_R12] = guest R12
62 * PACA[PACA_EXMC + EX_R13] = guest R13
63 * PACA[PACA_EXMC + EX_CCR] = guest CR
64 * PACA[PACA_EXMC + EX_R3] = guest XER
65 */ 61 */
66 62
67 mtsrr0 r9 63 mtsrr0 r9
68 mtsrr1 r10 64 mtsrr1 r10
69 65
70 mtspr SPRN_SPRG_SCRATCH0, r0 66 /* Activate guest mode, so faults get handled by KVM */
67 li r11, KVM_GUEST_MODE_GUEST
68 stb r11, PACA_KVM_IN_GUEST(r13)
71 69
72 /* Remove LPAR shadow entries */ 70 /* Remove LPAR shadow entries */
73 71
@@ -131,20 +129,27 @@ slb_do_enter:
131 129
132 /* Enter guest */ 130 /* Enter guest */
133 131
134 mfspr r0, SPRN_SPRG_SCRATCH0 132 ld r0, (PACA_KVM_R0)(r13)
135 133 ld r1, (PACA_KVM_R1)(r13)
136 ld r9, (PACA_EXMC+EX_R9)(r13) 134 ld r2, (PACA_KVM_R2)(r13)
137 ld r10, (PACA_EXMC+EX_R10)(r13) 135 ld r3, (PACA_KVM_R3)(r13)
138 ld r12, (PACA_EXMC+EX_R12)(r13) 136 ld r4, (PACA_KVM_R4)(r13)
139 137 ld r5, (PACA_KVM_R5)(r13)
140 lwz r11, (PACA_EXMC+EX_CCR)(r13) 138 ld r6, (PACA_KVM_R6)(r13)
139 ld r7, (PACA_KVM_R7)(r13)
140 ld r8, (PACA_KVM_R8)(r13)
141 ld r9, (PACA_KVM_R9)(r13)
142 ld r10, (PACA_KVM_R10)(r13)
143 ld r12, (PACA_KVM_R12)(r13)
144
145 lwz r11, (PACA_KVM_CR)(r13)
141 mtcr r11 146 mtcr r11
142 147
143 ld r11, (PACA_EXMC+EX_R3)(r13) 148 ld r11, (PACA_KVM_XER)(r13)
144 mtxer r11 149 mtxer r11
145 150
146 ld r11, (PACA_EXMC+EX_R11)(r13) 151 ld r11, (PACA_KVM_R11)(r13)
147 ld r13, (PACA_EXMC+EX_R13)(r13) 152 ld r13, (PACA_KVM_R13)(r13)
148 153
149 RFI 154 RFI
150kvmppc_handler_trampoline_enter_end: 155kvmppc_handler_trampoline_enter_end:
@@ -162,28 +167,54 @@ kvmppc_handler_trampoline_exit:
162 167
163 /* Register usage at this point: 168 /* Register usage at this point:
164 * 169 *
165 * SPRG_SCRATCH0 = guest R13 170 * SPRG_SCRATCH0 = guest R13
166 * R01 = host R1 171 * R12 = exit handler id
167 * R02 = host R2 172 * R13 = PACA
168 * R10 = guest PC 173 * PACA.KVM.SCRATCH0 = guest R12
169 * R11 = guest MSR 174 * PACA.KVM.SCRATCH1 = guest CR
170 * R12 = exit handler id
171 * R13 = PACA
172 * PACA.exmc.CCR = guest CR
173 * PACA.exmc.R9 = guest R1
174 * PACA.exmc.R10 = guest R10
175 * PACA.exmc.R11 = guest R11
176 * PACA.exmc.R12 = guest R12
177 * PACA.exmc.R13 = guest R2
178 * 175 *
179 */ 176 */
180 177
181 /* Save registers */ 178 /* Save registers */
182 179
183 std r0, (PACA_EXMC+EX_SRR0)(r13) 180 std r0, PACA_KVM_R0(r13)
184 std r9, (PACA_EXMC+EX_R3)(r13) 181 std r1, PACA_KVM_R1(r13)
185 std r10, (PACA_EXMC+EX_LR)(r13) 182 std r2, PACA_KVM_R2(r13)
186 std r11, (PACA_EXMC+EX_DAR)(r13) 183 std r3, PACA_KVM_R3(r13)
184 std r4, PACA_KVM_R4(r13)
185 std r5, PACA_KVM_R5(r13)
186 std r6, PACA_KVM_R6(r13)
187 std r7, PACA_KVM_R7(r13)
188 std r8, PACA_KVM_R8(r13)
189 std r9, PACA_KVM_R9(r13)
190 std r10, PACA_KVM_R10(r13)
191 std r11, PACA_KVM_R11(r13)
192
193 /* Restore R1/R2 so we can handle faults */
194 ld r1, PACA_KVM_HOST_R1(r13)
195 ld r2, PACA_KVM_HOST_R2(r13)
196
197 /* Save guest PC and MSR in GPRs */
198 mfsrr0 r3
199 mfsrr1 r4
200
201 /* Get scratch'ed off registers */
202 mfspr r9, SPRN_SPRG_SCRATCH0
203 std r9, PACA_KVM_R13(r13)
204
205 ld r8, PACA_KVM_SCRATCH0(r13)
206 std r8, PACA_KVM_R12(r13)
207
208 lwz r7, PACA_KVM_SCRATCH1(r13)
209 stw r7, PACA_KVM_CR(r13)
210
211 /* Save more register state */
212
213 mfxer r6
214 stw r6, PACA_KVM_XER(r13)
215
216 mfdar r5
217 mfdsisr r6
187 218
188 /* 219 /*
189 * In order for us to easily get the last instruction, 220 * In order for us to easily get the last instruction,
@@ -202,17 +233,28 @@ kvmppc_handler_trampoline_exit:
202 233
203ld_last_inst: 234ld_last_inst:
204 /* Save off the guest instruction we're at */ 235 /* Save off the guest instruction we're at */
236
237 /* Set guest mode to 'jump over instruction' so if lwz faults
238 * we'll just continue at the next IP. */
239 li r9, KVM_GUEST_MODE_SKIP
240 stb r9, PACA_KVM_IN_GUEST(r13)
241
205 /* 1) enable paging for data */ 242 /* 1) enable paging for data */
206 mfmsr r9 243 mfmsr r9
207 ori r11, r9, MSR_DR /* Enable paging for data */ 244 ori r11, r9, MSR_DR /* Enable paging for data */
208 mtmsr r11 245 mtmsr r11
209 /* 2) fetch the instruction */ 246 /* 2) fetch the instruction */
210 lwz r0, 0(r10) 247 li r0, KVM_INST_FETCH_FAILED /* In case lwz faults */
248 lwz r0, 0(r3)
211 /* 3) disable paging again */ 249 /* 3) disable paging again */
212 mtmsr r9 250 mtmsr r9
213 251
214no_ld_last_inst: 252no_ld_last_inst:
215 253
254 /* Unset guest mode */
255 li r9, KVM_GUEST_MODE_NONE
256 stb r9, PACA_KVM_IN_GUEST(r13)
257
216 /* Restore bolted entries from the shadow and fix it along the way */ 258 /* Restore bolted entries from the shadow and fix it along the way */
217 259
218 /* We don't store anything in entry 0, so we don't need to take care of it */ 260 /* We don't store anything in entry 0, so we don't need to take care of it */
@@ -233,29 +275,27 @@ no_ld_last_inst:
233 275
234slb_do_exit: 276slb_do_exit:
235 277
236 /* Restore registers */ 278 /* Register usage at this point:
237 279 *
238 ld r11, (PACA_EXMC+EX_DAR)(r13) 280 * R0 = guest last inst
239 ld r10, (PACA_EXMC+EX_LR)(r13) 281 * R1 = host R1
240 ld r9, (PACA_EXMC+EX_R3)(r13) 282 * R2 = host R2
241 283 * R3 = guest PC
242 /* Save last inst */ 284 * R4 = guest MSR
243 stw r0, (PACA_EXMC+EX_LR)(r13) 285 * R5 = guest DAR
244 286 * R6 = guest DSISR
245 /* Save DAR and DSISR before going to paged mode */ 287 * R12 = exit handler id
246 mfdar r0 288 * R13 = PACA
247 std r0, (PACA_EXMC+EX_DAR)(r13) 289 * PACA.KVM.* = guest *
248 mfdsisr r0 290 *
249 stw r0, (PACA_EXMC+EX_DSISR)(r13) 291 */
250 292
251 /* RFI into the highmem handler */ 293 /* RFI into the highmem handler */
252 mfmsr r0 294 mfmsr r7
253 ori r0, r0, MSR_IR|MSR_DR|MSR_RI /* Enable paging */ 295 ori r7, r7, MSR_IR|MSR_DR|MSR_RI /* Enable paging */
254 mtsrr1 r0 296 mtsrr1 r7
255 ld r0, PACASAVEDMSR(r13) /* Highmem handler address */ 297 ld r8, PACA_KVM_VMHANDLER(r13) /* Highmem handler address */
256 mtsrr0 r0 298 mtsrr0 r8
257
258 mfspr r0, SPRN_SPRG_SCRATCH0
259 299
260 RFI 300 RFI
261kvmppc_handler_trampoline_exit_end: 301kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 06f5a9ecc42c..4d686cc6b260 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -69,10 +69,10 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
69 69
70 for (i = 0; i < 32; i += 4) { 70 for (i = 0; i < 32; i += 4) {
71 printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i, 71 printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
72 vcpu->arch.gpr[i], 72 kvmppc_get_gpr(vcpu, i),
73 vcpu->arch.gpr[i+1], 73 kvmppc_get_gpr(vcpu, i+1),
74 vcpu->arch.gpr[i+2], 74 kvmppc_get_gpr(vcpu, i+2),
75 vcpu->arch.gpr[i+3]); 75 kvmppc_get_gpr(vcpu, i+3));
76 } 76 }
77} 77}
78 78
@@ -82,8 +82,32 @@ static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
82 set_bit(priority, &vcpu->arch.pending_exceptions); 82 set_bit(priority, &vcpu->arch.pending_exceptions);
83} 83}
84 84
85void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) 85static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
86 ulong dear_flags, ulong esr_flags)
86{ 87{
88 vcpu->arch.queued_dear = dear_flags;
89 vcpu->arch.queued_esr = esr_flags;
90 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
91}
92
93static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
94 ulong dear_flags, ulong esr_flags)
95{
96 vcpu->arch.queued_dear = dear_flags;
97 vcpu->arch.queued_esr = esr_flags;
98 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
99}
100
101static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
102 ulong esr_flags)
103{
104 vcpu->arch.queued_esr = esr_flags;
105 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
106}
107
108void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
109{
110 vcpu->arch.queued_esr = esr_flags;
87 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); 111 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
88} 112}
89 113
@@ -97,6 +121,11 @@ int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
97 return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); 121 return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
98} 122}
99 123
124void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
125{
126 clear_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
127}
128
100void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, 129void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
101 struct kvm_interrupt *irq) 130 struct kvm_interrupt *irq)
102{ 131{
@@ -109,14 +138,19 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
109{ 138{
110 int allowed = 0; 139 int allowed = 0;
111 ulong msr_mask; 140 ulong msr_mask;
141 bool update_esr = false, update_dear = false;
112 142
113 switch (priority) { 143 switch (priority) {
114 case BOOKE_IRQPRIO_PROGRAM:
115 case BOOKE_IRQPRIO_DTLB_MISS: 144 case BOOKE_IRQPRIO_DTLB_MISS:
116 case BOOKE_IRQPRIO_ITLB_MISS:
117 case BOOKE_IRQPRIO_SYSCALL:
118 case BOOKE_IRQPRIO_DATA_STORAGE: 145 case BOOKE_IRQPRIO_DATA_STORAGE:
146 update_dear = true;
147 /* fall through */
119 case BOOKE_IRQPRIO_INST_STORAGE: 148 case BOOKE_IRQPRIO_INST_STORAGE:
149 case BOOKE_IRQPRIO_PROGRAM:
150 update_esr = true;
151 /* fall through */
152 case BOOKE_IRQPRIO_ITLB_MISS:
153 case BOOKE_IRQPRIO_SYSCALL:
120 case BOOKE_IRQPRIO_FP_UNAVAIL: 154 case BOOKE_IRQPRIO_FP_UNAVAIL:
121 case BOOKE_IRQPRIO_SPE_UNAVAIL: 155 case BOOKE_IRQPRIO_SPE_UNAVAIL:
122 case BOOKE_IRQPRIO_SPE_FP_DATA: 156 case BOOKE_IRQPRIO_SPE_FP_DATA:
@@ -151,6 +185,10 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
151 vcpu->arch.srr0 = vcpu->arch.pc; 185 vcpu->arch.srr0 = vcpu->arch.pc;
152 vcpu->arch.srr1 = vcpu->arch.msr; 186 vcpu->arch.srr1 = vcpu->arch.msr;
153 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; 187 vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
188 if (update_esr == true)
189 vcpu->arch.esr = vcpu->arch.queued_esr;
190 if (update_dear == true)
191 vcpu->arch.dear = vcpu->arch.queued_dear;
154 kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask); 192 kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
155 193
156 clear_bit(priority, &vcpu->arch.pending_exceptions); 194 clear_bit(priority, &vcpu->arch.pending_exceptions);
@@ -223,8 +261,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
223 if (vcpu->arch.msr & MSR_PR) { 261 if (vcpu->arch.msr & MSR_PR) {
224 /* Program traps generated by user-level software must be handled 262 /* Program traps generated by user-level software must be handled
225 * by the guest kernel. */ 263 * by the guest kernel. */
226 vcpu->arch.esr = vcpu->arch.fault_esr; 264 kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
227 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
228 r = RESUME_GUEST; 265 r = RESUME_GUEST;
229 kvmppc_account_exit(vcpu, USR_PR_INST); 266 kvmppc_account_exit(vcpu, USR_PR_INST);
230 break; 267 break;
@@ -280,16 +317,14 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
280 break; 317 break;
281 318
282 case BOOKE_INTERRUPT_DATA_STORAGE: 319 case BOOKE_INTERRUPT_DATA_STORAGE:
283 vcpu->arch.dear = vcpu->arch.fault_dear; 320 kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
284 vcpu->arch.esr = vcpu->arch.fault_esr; 321 vcpu->arch.fault_esr);
285 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
286 kvmppc_account_exit(vcpu, DSI_EXITS); 322 kvmppc_account_exit(vcpu, DSI_EXITS);
287 r = RESUME_GUEST; 323 r = RESUME_GUEST;
288 break; 324 break;
289 325
290 case BOOKE_INTERRUPT_INST_STORAGE: 326 case BOOKE_INTERRUPT_INST_STORAGE:
291 vcpu->arch.esr = vcpu->arch.fault_esr; 327 kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_esr);
292 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
293 kvmppc_account_exit(vcpu, ISI_EXITS); 328 kvmppc_account_exit(vcpu, ISI_EXITS);
294 r = RESUME_GUEST; 329 r = RESUME_GUEST;
295 break; 330 break;
@@ -310,9 +345,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
310 gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); 345 gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
311 if (gtlb_index < 0) { 346 if (gtlb_index < 0) {
312 /* The guest didn't have a mapping for it. */ 347 /* The guest didn't have a mapping for it. */
313 kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); 348 kvmppc_core_queue_dtlb_miss(vcpu,
314 vcpu->arch.dear = vcpu->arch.fault_dear; 349 vcpu->arch.fault_dear,
315 vcpu->arch.esr = vcpu->arch.fault_esr; 350 vcpu->arch.fault_esr);
316 kvmppc_mmu_dtlb_miss(vcpu); 351 kvmppc_mmu_dtlb_miss(vcpu);
317 kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); 352 kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
318 r = RESUME_GUEST; 353 r = RESUME_GUEST;
@@ -426,7 +461,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
426{ 461{
427 vcpu->arch.pc = 0; 462 vcpu->arch.pc = 0;
428 vcpu->arch.msr = 0; 463 vcpu->arch.msr = 0;
429 vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ 464 kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
430 465
431 vcpu->arch.shadow_pid = 1; 466 vcpu->arch.shadow_pid = 1;
432 467
@@ -444,10 +479,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
444 int i; 479 int i;
445 480
446 regs->pc = vcpu->arch.pc; 481 regs->pc = vcpu->arch.pc;
447 regs->cr = vcpu->arch.cr; 482 regs->cr = kvmppc_get_cr(vcpu);
448 regs->ctr = vcpu->arch.ctr; 483 regs->ctr = vcpu->arch.ctr;
449 regs->lr = vcpu->arch.lr; 484 regs->lr = vcpu->arch.lr;
450 regs->xer = vcpu->arch.xer; 485 regs->xer = kvmppc_get_xer(vcpu);
451 regs->msr = vcpu->arch.msr; 486 regs->msr = vcpu->arch.msr;
452 regs->srr0 = vcpu->arch.srr0; 487 regs->srr0 = vcpu->arch.srr0;
453 regs->srr1 = vcpu->arch.srr1; 488 regs->srr1 = vcpu->arch.srr1;
@@ -461,7 +496,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
461 regs->sprg7 = vcpu->arch.sprg6; 496 regs->sprg7 = vcpu->arch.sprg6;
462 497
463 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) 498 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
464 regs->gpr[i] = vcpu->arch.gpr[i]; 499 regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
465 500
466 return 0; 501 return 0;
467} 502}
@@ -471,10 +506,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
471 int i; 506 int i;
472 507
473 vcpu->arch.pc = regs->pc; 508 vcpu->arch.pc = regs->pc;
474 vcpu->arch.cr = regs->cr; 509 kvmppc_set_cr(vcpu, regs->cr);
475 vcpu->arch.ctr = regs->ctr; 510 vcpu->arch.ctr = regs->ctr;
476 vcpu->arch.lr = regs->lr; 511 vcpu->arch.lr = regs->lr;
477 vcpu->arch.xer = regs->xer; 512 kvmppc_set_xer(vcpu, regs->xer);
478 kvmppc_set_msr(vcpu, regs->msr); 513 kvmppc_set_msr(vcpu, regs->msr);
479 vcpu->arch.srr0 = regs->srr0; 514 vcpu->arch.srr0 = regs->srr0;
480 vcpu->arch.srr1 = regs->srr1; 515 vcpu->arch.srr1 = regs->srr1;
@@ -486,8 +521,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
486 vcpu->arch.sprg6 = regs->sprg5; 521 vcpu->arch.sprg6 = regs->sprg5;
487 vcpu->arch.sprg7 = regs->sprg6; 522 vcpu->arch.sprg7 = regs->sprg6;
488 523
489 for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++) 524 for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
490 vcpu->arch.gpr[i] = regs->gpr[i]; 525 kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
491 526
492 return 0; 527 return 0;
493} 528}
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index aebc65e93f4b..cbc790ee1928 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -62,20 +62,20 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
62 62
63 case OP_31_XOP_MFMSR: 63 case OP_31_XOP_MFMSR:
64 rt = get_rt(inst); 64 rt = get_rt(inst);
65 vcpu->arch.gpr[rt] = vcpu->arch.msr; 65 kvmppc_set_gpr(vcpu, rt, vcpu->arch.msr);
66 kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); 66 kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
67 break; 67 break;
68 68
69 case OP_31_XOP_MTMSR: 69 case OP_31_XOP_MTMSR:
70 rs = get_rs(inst); 70 rs = get_rs(inst);
71 kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); 71 kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
72 kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); 72 kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
73 break; 73 break;
74 74
75 case OP_31_XOP_WRTEE: 75 case OP_31_XOP_WRTEE:
76 rs = get_rs(inst); 76 rs = get_rs(inst);
77 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) 77 vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
78 | (vcpu->arch.gpr[rs] & MSR_EE); 78 | (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
79 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); 79 kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
80 break; 80 break;
81 81
@@ -101,22 +101,23 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
101int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) 101int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
102{ 102{
103 int emulated = EMULATE_DONE; 103 int emulated = EMULATE_DONE;
104 ulong spr_val = kvmppc_get_gpr(vcpu, rs);
104 105
105 switch (sprn) { 106 switch (sprn) {
106 case SPRN_DEAR: 107 case SPRN_DEAR:
107 vcpu->arch.dear = vcpu->arch.gpr[rs]; break; 108 vcpu->arch.dear = spr_val; break;
108 case SPRN_ESR: 109 case SPRN_ESR:
109 vcpu->arch.esr = vcpu->arch.gpr[rs]; break; 110 vcpu->arch.esr = spr_val; break;
110 case SPRN_DBCR0: 111 case SPRN_DBCR0:
111 vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; 112 vcpu->arch.dbcr0 = spr_val; break;
112 case SPRN_DBCR1: 113 case SPRN_DBCR1:
113 vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; 114 vcpu->arch.dbcr1 = spr_val; break;
114 case SPRN_DBSR: 115 case SPRN_DBSR:
115 vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break; 116 vcpu->arch.dbsr &= ~spr_val; break;
116 case SPRN_TSR: 117 case SPRN_TSR:
117 vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; 118 vcpu->arch.tsr &= ~spr_val; break;
118 case SPRN_TCR: 119 case SPRN_TCR:
119 vcpu->arch.tcr = vcpu->arch.gpr[rs]; 120 vcpu->arch.tcr = spr_val;
120 kvmppc_emulate_dec(vcpu); 121 kvmppc_emulate_dec(vcpu);
121 break; 122 break;
122 123
@@ -124,64 +125,64 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
124 * loaded into the real SPRGs when resuming the 125 * loaded into the real SPRGs when resuming the
125 * guest. */ 126 * guest. */
126 case SPRN_SPRG4: 127 case SPRN_SPRG4:
127 vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; 128 vcpu->arch.sprg4 = spr_val; break;
128 case SPRN_SPRG5: 129 case SPRN_SPRG5:
129 vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; 130 vcpu->arch.sprg5 = spr_val; break;
130 case SPRN_SPRG6: 131 case SPRN_SPRG6:
131 vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; 132 vcpu->arch.sprg6 = spr_val; break;
132 case SPRN_SPRG7: 133 case SPRN_SPRG7:
133 vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; 134 vcpu->arch.sprg7 = spr_val; break;
134 135
135 case SPRN_IVPR: 136 case SPRN_IVPR:
136 vcpu->arch.ivpr = vcpu->arch.gpr[rs]; 137 vcpu->arch.ivpr = spr_val;
137 break; 138 break;
138 case SPRN_IVOR0: 139 case SPRN_IVOR0:
139 vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs]; 140 vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val;
140 break; 141 break;
141 case SPRN_IVOR1: 142 case SPRN_IVOR1:
142 vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs]; 143 vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = spr_val;
143 break; 144 break;
144 case SPRN_IVOR2: 145 case SPRN_IVOR2:
145 vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs]; 146 vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val;
146 break; 147 break;
147 case SPRN_IVOR3: 148 case SPRN_IVOR3:
148 vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs]; 149 vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val;
149 break; 150 break;
150 case SPRN_IVOR4: 151 case SPRN_IVOR4:
151 vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs]; 152 vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = spr_val;
152 break; 153 break;
153 case SPRN_IVOR5: 154 case SPRN_IVOR5:
154 vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs]; 155 vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = spr_val;
155 break; 156 break;
156 case SPRN_IVOR6: 157 case SPRN_IVOR6:
157 vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs]; 158 vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = spr_val;
158 break; 159 break;
159 case SPRN_IVOR7: 160 case SPRN_IVOR7:
160 vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs]; 161 vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = spr_val;
161 break; 162 break;
162 case SPRN_IVOR8: 163 case SPRN_IVOR8:
163 vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs]; 164 vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val;
164 break; 165 break;
165 case SPRN_IVOR9: 166 case SPRN_IVOR9:
166 vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs]; 167 vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val;
167 break; 168 break;
168 case SPRN_IVOR10: 169 case SPRN_IVOR10:
169 vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs]; 170 vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = spr_val;
170 break; 171 break;
171 case SPRN_IVOR11: 172 case SPRN_IVOR11:
172 vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs]; 173 vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = spr_val;
173 break; 174 break;
174 case SPRN_IVOR12: 175 case SPRN_IVOR12:
175 vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs]; 176 vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = spr_val;
176 break; 177 break;
177 case SPRN_IVOR13: 178 case SPRN_IVOR13:
178 vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs]; 179 vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = spr_val;
179 break; 180 break;
180 case SPRN_IVOR14: 181 case SPRN_IVOR14:
181 vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs]; 182 vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = spr_val;
182 break; 183 break;
183 case SPRN_IVOR15: 184 case SPRN_IVOR15:
184 vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs]; 185 vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
185 break; 186 break;
186 187
187 default: 188 default:
@@ -197,65 +198,65 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
197 198
198 switch (sprn) { 199 switch (sprn) {
199 case SPRN_IVPR: 200 case SPRN_IVPR:
200 vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; 201 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break;
201 case SPRN_DEAR: 202 case SPRN_DEAR:
202 vcpu->arch.gpr[rt] = vcpu->arch.dear; break; 203 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); break;
203 case SPRN_ESR: 204 case SPRN_ESR:
204 vcpu->arch.gpr[rt] = vcpu->arch.esr; break; 205 kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break;
205 case SPRN_DBCR0: 206 case SPRN_DBCR0:
206 vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; 207 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break;
207 case SPRN_DBCR1: 208 case SPRN_DBCR1:
208 vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; 209 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
209 case SPRN_DBSR: 210 case SPRN_DBSR:
210 vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break; 211 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
211 212
212 case SPRN_IVOR0: 213 case SPRN_IVOR0:
213 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; 214 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
214 break; 215 break;
215 case SPRN_IVOR1: 216 case SPRN_IVOR1:
216 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; 217 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]);
217 break; 218 break;
218 case SPRN_IVOR2: 219 case SPRN_IVOR2:
219 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; 220 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
220 break; 221 break;
221 case SPRN_IVOR3: 222 case SPRN_IVOR3:
222 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; 223 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]);
223 break; 224 break;
224 case SPRN_IVOR4: 225 case SPRN_IVOR4:
225 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; 226 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]);
226 break; 227 break;
227 case SPRN_IVOR5: 228 case SPRN_IVOR5:
228 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; 229 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]);
229 break; 230 break;
230 case SPRN_IVOR6: 231 case SPRN_IVOR6:
231 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; 232 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]);
232 break; 233 break;
233 case SPRN_IVOR7: 234 case SPRN_IVOR7:
234 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; 235 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]);
235 break; 236 break;
236 case SPRN_IVOR8: 237 case SPRN_IVOR8:
237 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; 238 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
238 break; 239 break;
239 case SPRN_IVOR9: 240 case SPRN_IVOR9:
240 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; 241 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]);
241 break; 242 break;
242 case SPRN_IVOR10: 243 case SPRN_IVOR10:
243 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; 244 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]);
244 break; 245 break;
245 case SPRN_IVOR11: 246 case SPRN_IVOR11:
246 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; 247 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]);
247 break; 248 break;
248 case SPRN_IVOR12: 249 case SPRN_IVOR12:
249 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; 250 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]);
250 break; 251 break;
251 case SPRN_IVOR13: 252 case SPRN_IVOR13:
252 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; 253 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]);
253 break; 254 break;
254 case SPRN_IVOR14: 255 case SPRN_IVOR14:
255 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; 256 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]);
256 break; 257 break;
257 case SPRN_IVOR15: 258 case SPRN_IVOR15:
258 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; 259 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]);
259 break; 260 break;
260 261
261 default: 262 default:
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 64949eef43f1..efa1198940ab 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -60,6 +60,12 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
60 60
61 kvmppc_e500_tlb_setup(vcpu_e500); 61 kvmppc_e500_tlb_setup(vcpu_e500);
62 62
63 /* Registers init */
64 vcpu->arch.pvr = mfspr(SPRN_PVR);
65
66 /* Since booke kvm only support one core, update all vcpus' PIR to 0 */
67 vcpu->vcpu_id = 0;
68
63 return 0; 69 return 0;
64} 70}
65 71
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index be95b8d8e3b7..8e3edfbc9634 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -74,54 +74,59 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
74{ 74{
75 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); 75 struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
76 int emulated = EMULATE_DONE; 76 int emulated = EMULATE_DONE;
77 ulong spr_val = kvmppc_get_gpr(vcpu, rs);
77 78
78 switch (sprn) { 79 switch (sprn) {
79 case SPRN_PID: 80 case SPRN_PID:
80 vcpu_e500->pid[0] = vcpu->arch.shadow_pid = 81 vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
81 vcpu->arch.pid = vcpu->arch.gpr[rs]; 82 vcpu->arch.pid = spr_val;
82 break; 83 break;
83 case SPRN_PID1: 84 case SPRN_PID1:
84 vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break; 85 vcpu_e500->pid[1] = spr_val; break;
85 case SPRN_PID2: 86 case SPRN_PID2:
86 vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break; 87 vcpu_e500->pid[2] = spr_val; break;
87 case SPRN_MAS0: 88 case SPRN_MAS0:
88 vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break; 89 vcpu_e500->mas0 = spr_val; break;
89 case SPRN_MAS1: 90 case SPRN_MAS1:
90 vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break; 91 vcpu_e500->mas1 = spr_val; break;
91 case SPRN_MAS2: 92 case SPRN_MAS2:
92 vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break; 93 vcpu_e500->mas2 = spr_val; break;
93 case SPRN_MAS3: 94 case SPRN_MAS3:
94 vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break; 95 vcpu_e500->mas3 = spr_val; break;
95 case SPRN_MAS4: 96 case SPRN_MAS4:
96 vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break; 97 vcpu_e500->mas4 = spr_val; break;
97 case SPRN_MAS6: 98 case SPRN_MAS6:
98 vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break; 99 vcpu_e500->mas6 = spr_val; break;
99 case SPRN_MAS7: 100 case SPRN_MAS7:
100 vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break; 101 vcpu_e500->mas7 = spr_val; break;
102 case SPRN_L1CSR0:
103 vcpu_e500->l1csr0 = spr_val;
104 vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
105 break;
101 case SPRN_L1CSR1: 106 case SPRN_L1CSR1:
102 vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break; 107 vcpu_e500->l1csr1 = spr_val; break;
103 case SPRN_HID0: 108 case SPRN_HID0:
104 vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break; 109 vcpu_e500->hid0 = spr_val; break;
105 case SPRN_HID1: 110 case SPRN_HID1:
106 vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break; 111 vcpu_e500->hid1 = spr_val; break;
107 112
108 case SPRN_MMUCSR0: 113 case SPRN_MMUCSR0:
109 emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500, 114 emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
110 vcpu->arch.gpr[rs]); 115 spr_val);
111 break; 116 break;
112 117
113 /* extra exceptions */ 118 /* extra exceptions */
114 case SPRN_IVOR32: 119 case SPRN_IVOR32:
115 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs]; 120 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
116 break; 121 break;
117 case SPRN_IVOR33: 122 case SPRN_IVOR33:
118 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs]; 123 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = spr_val;
119 break; 124 break;
120 case SPRN_IVOR34: 125 case SPRN_IVOR34:
121 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs]; 126 vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val;
122 break; 127 break;
123 case SPRN_IVOR35: 128 case SPRN_IVOR35:
124 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs]; 129 vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
125 break; 130 break;
126 131
127 default: 132 default:
@@ -138,63 +143,57 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
138 143
139 switch (sprn) { 144 switch (sprn) {
140 case SPRN_PID: 145 case SPRN_PID:
141 vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break; 146 kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break;
142 case SPRN_PID1: 147 case SPRN_PID1:
143 vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break; 148 kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break;
144 case SPRN_PID2: 149 case SPRN_PID2:
145 vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break; 150 kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break;
146 case SPRN_MAS0: 151 case SPRN_MAS0:
147 vcpu->arch.gpr[rt] = vcpu_e500->mas0; break; 152 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break;
148 case SPRN_MAS1: 153 case SPRN_MAS1:
149 vcpu->arch.gpr[rt] = vcpu_e500->mas1; break; 154 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break;
150 case SPRN_MAS2: 155 case SPRN_MAS2:
151 vcpu->arch.gpr[rt] = vcpu_e500->mas2; break; 156 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
152 case SPRN_MAS3: 157 case SPRN_MAS3:
153 vcpu->arch.gpr[rt] = vcpu_e500->mas3; break; 158 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
154 case SPRN_MAS4: 159 case SPRN_MAS4:
155 vcpu->arch.gpr[rt] = vcpu_e500->mas4; break; 160 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
156 case SPRN_MAS6: 161 case SPRN_MAS6:
157 vcpu->arch.gpr[rt] = vcpu_e500->mas6; break; 162 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
158 case SPRN_MAS7: 163 case SPRN_MAS7:
159 vcpu->arch.gpr[rt] = vcpu_e500->mas7; break; 164 kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
160 165
161 case SPRN_TLB0CFG: 166 case SPRN_TLB0CFG:
162 vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG); 167 kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
163 vcpu->arch.gpr[rt] &= ~0xfffUL;
164 vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0];
165 break;
166
167 case SPRN_TLB1CFG: 168 case SPRN_TLB1CFG:
168 vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG); 169 kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break;
169 vcpu->arch.gpr[rt] &= ~0xfffUL; 170 case SPRN_L1CSR0:
170 vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1]; 171 kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break;
171 break;
172
173 case SPRN_L1CSR1: 172 case SPRN_L1CSR1:
174 vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break; 173 kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break;
175 case SPRN_HID0: 174 case SPRN_HID0:
176 vcpu->arch.gpr[rt] = vcpu_e500->hid0; break; 175 kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
177 case SPRN_HID1: 176 case SPRN_HID1:
178 vcpu->arch.gpr[rt] = vcpu_e500->hid1; break; 177 kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
179 178
180 case SPRN_MMUCSR0: 179 case SPRN_MMUCSR0:
181 vcpu->arch.gpr[rt] = 0; break; 180 kvmppc_set_gpr(vcpu, rt, 0); break;
182 181
183 case SPRN_MMUCFG: 182 case SPRN_MMUCFG:
184 vcpu->arch.gpr[rt] = mfspr(SPRN_MMUCFG); break; 183 kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break;
185 184
186 /* extra exceptions */ 185 /* extra exceptions */
187 case SPRN_IVOR32: 186 case SPRN_IVOR32:
188 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; 187 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]);
189 break; 188 break;
190 case SPRN_IVOR33: 189 case SPRN_IVOR33:
191 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; 190 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]);
192 break; 191 break;
193 case SPRN_IVOR34: 192 case SPRN_IVOR34:
194 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; 193 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]);
195 break; 194 break;
196 case SPRN_IVOR35: 195 case SPRN_IVOR35:
197 vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; 196 kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]);
198 break; 197 break;
199 default: 198 default:
200 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); 199 emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index fb1e1dc11ba5..0d772e6b6318 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -417,7 +417,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
417 int esel, tlbsel; 417 int esel, tlbsel;
418 gva_t ea; 418 gva_t ea;
419 419
420 ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb]; 420 ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
421 421
422 ia = (ea >> 2) & 0x1; 422 ia = (ea >> 2) & 0x1;
423 423
@@ -470,7 +470,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
470 struct tlbe *gtlbe = NULL; 470 struct tlbe *gtlbe = NULL;
471 gva_t ea; 471 gva_t ea;
472 472
473 ea = vcpu->arch.gpr[rb]; 473 ea = kvmppc_get_gpr(vcpu, rb);
474 474
475 for (tlbsel = 0; tlbsel < 2; tlbsel++) { 475 for (tlbsel = 0; tlbsel < 2; tlbsel++) {
476 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); 476 esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
@@ -728,6 +728,12 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
728 if (vcpu_e500->shadow_pages[1] == NULL) 728 if (vcpu_e500->shadow_pages[1] == NULL)
729 goto err_out_page0; 729 goto err_out_page0;
730 730
731 /* Init TLB configuration register */
732 vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
733 vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0];
734 vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
735 vcpu_e500->tlb1cfg |= vcpu_e500->guest_tlb_size[1];
736
731 return 0; 737 return 0;
732 738
733err_out_page0: 739err_out_page0:
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 4a9ac6640fad..cb72a65f4ecc 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -83,6 +83,9 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
83 83
84 pr_debug("mtDEC: %x\n", vcpu->arch.dec); 84 pr_debug("mtDEC: %x\n", vcpu->arch.dec);
85#ifdef CONFIG_PPC64 85#ifdef CONFIG_PPC64
86 /* mtdec lowers the interrupt line when positive. */
87 kvmppc_core_dequeue_dec(vcpu);
88
86 /* POWER4+ triggers a dec interrupt if the value is < 0 */ 89 /* POWER4+ triggers a dec interrupt if the value is < 0 */
87 if (vcpu->arch.dec & 0x80000000) { 90 if (vcpu->arch.dec & 0x80000000) {
88 hrtimer_try_to_cancel(&vcpu->arch.dec_timer); 91 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
@@ -140,14 +143,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
140 143
141 pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); 144 pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
142 145
146 /* Try again next time */
147 if (inst == KVM_INST_FETCH_FAILED)
148 return EMULATE_DONE;
149
143 switch (get_op(inst)) { 150 switch (get_op(inst)) {
144 case OP_TRAP: 151 case OP_TRAP:
145#ifdef CONFIG_PPC64 152#ifdef CONFIG_PPC64
146 case OP_TRAP_64: 153 case OP_TRAP_64:
154 kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
147#else 155#else
148 vcpu->arch.esr |= ESR_PTR; 156 kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR);
149#endif 157#endif
150 kvmppc_core_queue_program(vcpu);
151 advance = 0; 158 advance = 0;
152 break; 159 break;
153 160
@@ -167,14 +174,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
167 case OP_31_XOP_STWX: 174 case OP_31_XOP_STWX:
168 rs = get_rs(inst); 175 rs = get_rs(inst);
169 emulated = kvmppc_handle_store(run, vcpu, 176 emulated = kvmppc_handle_store(run, vcpu,
170 vcpu->arch.gpr[rs], 177 kvmppc_get_gpr(vcpu, rs),
171 4, 1); 178 4, 1);
172 break; 179 break;
173 180
174 case OP_31_XOP_STBX: 181 case OP_31_XOP_STBX:
175 rs = get_rs(inst); 182 rs = get_rs(inst);
176 emulated = kvmppc_handle_store(run, vcpu, 183 emulated = kvmppc_handle_store(run, vcpu,
177 vcpu->arch.gpr[rs], 184 kvmppc_get_gpr(vcpu, rs),
178 1, 1); 185 1, 1);
179 break; 186 break;
180 187
@@ -183,14 +190,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
183 ra = get_ra(inst); 190 ra = get_ra(inst);
184 rb = get_rb(inst); 191 rb = get_rb(inst);
185 192
186 ea = vcpu->arch.gpr[rb]; 193 ea = kvmppc_get_gpr(vcpu, rb);
187 if (ra) 194 if (ra)
188 ea += vcpu->arch.gpr[ra]; 195 ea += kvmppc_get_gpr(vcpu, ra);
189 196
190 emulated = kvmppc_handle_store(run, vcpu, 197 emulated = kvmppc_handle_store(run, vcpu,
191 vcpu->arch.gpr[rs], 198 kvmppc_get_gpr(vcpu, rs),
192 1, 1); 199 1, 1);
193 vcpu->arch.gpr[rs] = ea; 200 kvmppc_set_gpr(vcpu, rs, ea);
194 break; 201 break;
195 202
196 case OP_31_XOP_LHZX: 203 case OP_31_XOP_LHZX:
@@ -203,12 +210,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
203 ra = get_ra(inst); 210 ra = get_ra(inst);
204 rb = get_rb(inst); 211 rb = get_rb(inst);
205 212
206 ea = vcpu->arch.gpr[rb]; 213 ea = kvmppc_get_gpr(vcpu, rb);
207 if (ra) 214 if (ra)
208 ea += vcpu->arch.gpr[ra]; 215 ea += kvmppc_get_gpr(vcpu, ra);
209 216
210 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 217 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
211 vcpu->arch.gpr[ra] = ea; 218 kvmppc_set_gpr(vcpu, ra, ea);
212 break; 219 break;
213 220
214 case OP_31_XOP_MFSPR: 221 case OP_31_XOP_MFSPR:
@@ -217,47 +224,49 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
217 224
218 switch (sprn) { 225 switch (sprn) {
219 case SPRN_SRR0: 226 case SPRN_SRR0:
220 vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; 227 kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr0); break;
221 case SPRN_SRR1: 228 case SPRN_SRR1:
222 vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; 229 kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr1); break;
223 case SPRN_PVR: 230 case SPRN_PVR:
224 vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; 231 kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break;
225 case SPRN_PIR: 232 case SPRN_PIR:
226 vcpu->arch.gpr[rt] = vcpu->vcpu_id; break; 233 kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break;
227 case SPRN_MSSSR0: 234 case SPRN_MSSSR0:
228 vcpu->arch.gpr[rt] = 0; break; 235 kvmppc_set_gpr(vcpu, rt, 0); break;
229 236
230 /* Note: mftb and TBRL/TBWL are user-accessible, so 237 /* Note: mftb and TBRL/TBWL are user-accessible, so
231 * the guest can always access the real TB anyways. 238 * the guest can always access the real TB anyways.
232 * In fact, we probably will never see these traps. */ 239 * In fact, we probably will never see these traps. */
233 case SPRN_TBWL: 240 case SPRN_TBWL:
234 vcpu->arch.gpr[rt] = get_tb() >> 32; break; 241 kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break;
235 case SPRN_TBWU: 242 case SPRN_TBWU:
236 vcpu->arch.gpr[rt] = get_tb(); break; 243 kvmppc_set_gpr(vcpu, rt, get_tb()); break;
237 244
238 case SPRN_SPRG0: 245 case SPRN_SPRG0:
239 vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break; 246 kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg0); break;
240 case SPRN_SPRG1: 247 case SPRN_SPRG1:
241 vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break; 248 kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg1); break;
242 case SPRN_SPRG2: 249 case SPRN_SPRG2:
243 vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break; 250 kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg2); break;
244 case SPRN_SPRG3: 251 case SPRN_SPRG3:
245 vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break; 252 kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg3); break;
246 /* Note: SPRG4-7 are user-readable, so we don't get 253 /* Note: SPRG4-7 are user-readable, so we don't get
247 * a trap. */ 254 * a trap. */
248 255
249 case SPRN_DEC: 256 case SPRN_DEC:
250 { 257 {
251 u64 jd = get_tb() - vcpu->arch.dec_jiffies; 258 u64 jd = get_tb() - vcpu->arch.dec_jiffies;
252 vcpu->arch.gpr[rt] = vcpu->arch.dec - jd; 259 kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd);
253 pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n", vcpu->arch.dec, jd, vcpu->arch.gpr[rt]); 260 pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n",
261 vcpu->arch.dec, jd,
262 kvmppc_get_gpr(vcpu, rt));
254 break; 263 break;
255 } 264 }
256 default: 265 default:
257 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt); 266 emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
258 if (emulated == EMULATE_FAIL) { 267 if (emulated == EMULATE_FAIL) {
259 printk("mfspr: unknown spr %x\n", sprn); 268 printk("mfspr: unknown spr %x\n", sprn);
260 vcpu->arch.gpr[rt] = 0; 269 kvmppc_set_gpr(vcpu, rt, 0);
261 } 270 }
262 break; 271 break;
263 } 272 }
@@ -269,7 +278,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
269 rb = get_rb(inst); 278 rb = get_rb(inst);
270 279
271 emulated = kvmppc_handle_store(run, vcpu, 280 emulated = kvmppc_handle_store(run, vcpu,
272 vcpu->arch.gpr[rs], 281 kvmppc_get_gpr(vcpu, rs),
273 2, 1); 282 2, 1);
274 break; 283 break;
275 284
@@ -278,14 +287,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
278 ra = get_ra(inst); 287 ra = get_ra(inst);
279 rb = get_rb(inst); 288 rb = get_rb(inst);
280 289
281 ea = vcpu->arch.gpr[rb]; 290 ea = kvmppc_get_gpr(vcpu, rb);
282 if (ra) 291 if (ra)
283 ea += vcpu->arch.gpr[ra]; 292 ea += kvmppc_get_gpr(vcpu, ra);
284 293
285 emulated = kvmppc_handle_store(run, vcpu, 294 emulated = kvmppc_handle_store(run, vcpu,
286 vcpu->arch.gpr[rs], 295 kvmppc_get_gpr(vcpu, rs),
287 2, 1); 296 2, 1);
288 vcpu->arch.gpr[ra] = ea; 297 kvmppc_set_gpr(vcpu, ra, ea);
289 break; 298 break;
290 299
291 case OP_31_XOP_MTSPR: 300 case OP_31_XOP_MTSPR:
@@ -293,9 +302,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
293 rs = get_rs(inst); 302 rs = get_rs(inst);
294 switch (sprn) { 303 switch (sprn) {
295 case SPRN_SRR0: 304 case SPRN_SRR0:
296 vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; 305 vcpu->arch.srr0 = kvmppc_get_gpr(vcpu, rs); break;
297 case SPRN_SRR1: 306 case SPRN_SRR1:
298 vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; 307 vcpu->arch.srr1 = kvmppc_get_gpr(vcpu, rs); break;
299 308
300 /* XXX We need to context-switch the timebase for 309 /* XXX We need to context-switch the timebase for
301 * watchdog and FIT. */ 310 * watchdog and FIT. */
@@ -305,18 +314,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
305 case SPRN_MSSSR0: break; 314 case SPRN_MSSSR0: break;
306 315
307 case SPRN_DEC: 316 case SPRN_DEC:
308 vcpu->arch.dec = vcpu->arch.gpr[rs]; 317 vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs);
309 kvmppc_emulate_dec(vcpu); 318 kvmppc_emulate_dec(vcpu);
310 break; 319 break;
311 320
312 case SPRN_SPRG0: 321 case SPRN_SPRG0:
313 vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break; 322 vcpu->arch.sprg0 = kvmppc_get_gpr(vcpu, rs); break;
314 case SPRN_SPRG1: 323 case SPRN_SPRG1:
315 vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break; 324 vcpu->arch.sprg1 = kvmppc_get_gpr(vcpu, rs); break;
316 case SPRN_SPRG2: 325 case SPRN_SPRG2:
317 vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break; 326 vcpu->arch.sprg2 = kvmppc_get_gpr(vcpu, rs); break;
318 case SPRN_SPRG3: 327 case SPRN_SPRG3:
319 vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; 328 vcpu->arch.sprg3 = kvmppc_get_gpr(vcpu, rs); break;
320 329
321 default: 330 default:
322 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); 331 emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
@@ -348,7 +357,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
348 rb = get_rb(inst); 357 rb = get_rb(inst);
349 358
350 emulated = kvmppc_handle_store(run, vcpu, 359 emulated = kvmppc_handle_store(run, vcpu,
351 vcpu->arch.gpr[rs], 360 kvmppc_get_gpr(vcpu, rs),
352 4, 0); 361 4, 0);
353 break; 362 break;
354 363
@@ -363,7 +372,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
363 rb = get_rb(inst); 372 rb = get_rb(inst);
364 373
365 emulated = kvmppc_handle_store(run, vcpu, 374 emulated = kvmppc_handle_store(run, vcpu,
366 vcpu->arch.gpr[rs], 375 kvmppc_get_gpr(vcpu, rs),
367 2, 0); 376 2, 0);
368 break; 377 break;
369 378
@@ -382,7 +391,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
382 ra = get_ra(inst); 391 ra = get_ra(inst);
383 rt = get_rt(inst); 392 rt = get_rt(inst);
384 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); 393 emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
385 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 394 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
386 break; 395 break;
387 396
388 case OP_LBZ: 397 case OP_LBZ:
@@ -394,35 +403,39 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
394 ra = get_ra(inst); 403 ra = get_ra(inst);
395 rt = get_rt(inst); 404 rt = get_rt(inst);
396 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); 405 emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
397 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 406 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
398 break; 407 break;
399 408
400 case OP_STW: 409 case OP_STW:
401 rs = get_rs(inst); 410 rs = get_rs(inst);
402 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 411 emulated = kvmppc_handle_store(run, vcpu,
412 kvmppc_get_gpr(vcpu, rs),
403 4, 1); 413 4, 1);
404 break; 414 break;
405 415
406 case OP_STWU: 416 case OP_STWU:
407 ra = get_ra(inst); 417 ra = get_ra(inst);
408 rs = get_rs(inst); 418 rs = get_rs(inst);
409 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 419 emulated = kvmppc_handle_store(run, vcpu,
420 kvmppc_get_gpr(vcpu, rs),
410 4, 1); 421 4, 1);
411 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 422 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
412 break; 423 break;
413 424
414 case OP_STB: 425 case OP_STB:
415 rs = get_rs(inst); 426 rs = get_rs(inst);
416 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 427 emulated = kvmppc_handle_store(run, vcpu,
428 kvmppc_get_gpr(vcpu, rs),
417 1, 1); 429 1, 1);
418 break; 430 break;
419 431
420 case OP_STBU: 432 case OP_STBU:
421 ra = get_ra(inst); 433 ra = get_ra(inst);
422 rs = get_rs(inst); 434 rs = get_rs(inst);
423 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 435 emulated = kvmppc_handle_store(run, vcpu,
436 kvmppc_get_gpr(vcpu, rs),
424 1, 1); 437 1, 1);
425 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 438 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
426 break; 439 break;
427 440
428 case OP_LHZ: 441 case OP_LHZ:
@@ -434,21 +447,23 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
434 ra = get_ra(inst); 447 ra = get_ra(inst);
435 rt = get_rt(inst); 448 rt = get_rt(inst);
436 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); 449 emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
437 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 450 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
438 break; 451 break;
439 452
440 case OP_STH: 453 case OP_STH:
441 rs = get_rs(inst); 454 rs = get_rs(inst);
442 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 455 emulated = kvmppc_handle_store(run, vcpu,
456 kvmppc_get_gpr(vcpu, rs),
443 2, 1); 457 2, 1);
444 break; 458 break;
445 459
446 case OP_STHU: 460 case OP_STHU:
447 ra = get_ra(inst); 461 ra = get_ra(inst);
448 rs = get_rs(inst); 462 rs = get_rs(inst);
449 emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], 463 emulated = kvmppc_handle_store(run, vcpu,
464 kvmppc_get_gpr(vcpu, rs),
450 2, 1); 465 2, 1);
451 vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; 466 kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
452 break; 467 break;
453 468
454 default: 469 default:
@@ -461,6 +476,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
461 advance = 0; 476 advance = 0;
462 printk(KERN_ERR "Couldn't emulate instruction 0x%08x " 477 printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
463 "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); 478 "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
479 kvmppc_core_queue_program(vcpu, 0);
464 } 480 }
465 } 481 }
466 482
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index f06cf93b178e..51aedd7f16bc 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -137,6 +137,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
137{ 137{
138 kvmppc_free_vcpus(kvm); 138 kvmppc_free_vcpus(kvm);
139 kvm_free_physmem(kvm); 139 kvm_free_physmem(kvm);
140 cleanup_srcu_struct(&kvm->srcu);
140 kfree(kvm); 141 kfree(kvm);
141} 142}
142 143
@@ -165,14 +166,24 @@ long kvm_arch_dev_ioctl(struct file *filp,
165 return -EINVAL; 166 return -EINVAL;
166} 167}
167 168
168int kvm_arch_set_memory_region(struct kvm *kvm, 169int kvm_arch_prepare_memory_region(struct kvm *kvm,
169 struct kvm_userspace_memory_region *mem, 170 struct kvm_memory_slot *memslot,
170 struct kvm_memory_slot old, 171 struct kvm_memory_slot old,
171 int user_alloc) 172 struct kvm_userspace_memory_region *mem,
173 int user_alloc)
172{ 174{
173 return 0; 175 return 0;
174} 176}
175 177
178void kvm_arch_commit_memory_region(struct kvm *kvm,
179 struct kvm_userspace_memory_region *mem,
180 struct kvm_memory_slot old,
181 int user_alloc)
182{
183 return;
184}
185
186
176void kvm_arch_flush_shadow(struct kvm *kvm) 187void kvm_arch_flush_shadow(struct kvm *kvm)
177{ 188{
178} 189}
@@ -260,34 +271,35 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
260static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, 271static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
261 struct kvm_run *run) 272 struct kvm_run *run)
262{ 273{
263 ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 274 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data);
264 *gpr = run->dcr.data;
265} 275}
266 276
267static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, 277static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
268 struct kvm_run *run) 278 struct kvm_run *run)
269{ 279{
270 ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; 280 ulong gpr;
271 281
272 if (run->mmio.len > sizeof(*gpr)) { 282 if (run->mmio.len > sizeof(gpr)) {
273 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); 283 printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
274 return; 284 return;
275 } 285 }
276 286
277 if (vcpu->arch.mmio_is_bigendian) { 287 if (vcpu->arch.mmio_is_bigendian) {
278 switch (run->mmio.len) { 288 switch (run->mmio.len) {
279 case 4: *gpr = *(u32 *)run->mmio.data; break; 289 case 4: gpr = *(u32 *)run->mmio.data; break;
280 case 2: *gpr = *(u16 *)run->mmio.data; break; 290 case 2: gpr = *(u16 *)run->mmio.data; break;
281 case 1: *gpr = *(u8 *)run->mmio.data; break; 291 case 1: gpr = *(u8 *)run->mmio.data; break;
282 } 292 }
283 } else { 293 } else {
284 /* Convert BE data from userland back to LE. */ 294 /* Convert BE data from userland back to LE. */
285 switch (run->mmio.len) { 295 switch (run->mmio.len) {
286 case 4: *gpr = ld_le32((u32 *)run->mmio.data); break; 296 case 4: gpr = ld_le32((u32 *)run->mmio.data); break;
287 case 2: *gpr = ld_le16((u16 *)run->mmio.data); break; 297 case 2: gpr = ld_le16((u16 *)run->mmio.data); break;
288 case 1: *gpr = *(u8 *)run->mmio.data; break; 298 case 1: gpr = *(u8 *)run->mmio.data; break;
289 } 299 }
290 } 300 }
301
302 kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
291} 303}
292 304
293int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, 305int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3fa0a10e4668..49292869a5cd 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -242,6 +242,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
242 kvm_free_physmem(kvm); 242 kvm_free_physmem(kvm);
243 free_page((unsigned long)(kvm->arch.sca)); 243 free_page((unsigned long)(kvm->arch.sca));
244 debug_unregister(kvm->arch.dbf); 244 debug_unregister(kvm->arch.dbf);
245 cleanup_srcu_struct(&kvm->srcu);
245 kfree(kvm); 246 kfree(kvm);
246} 247}
247 248
@@ -690,14 +691,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
690} 691}
691 692
692/* Section: memory related */ 693/* Section: memory related */
693int kvm_arch_set_memory_region(struct kvm *kvm, 694int kvm_arch_prepare_memory_region(struct kvm *kvm,
694 struct kvm_userspace_memory_region *mem, 695 struct kvm_memory_slot *memslot,
695 struct kvm_memory_slot old, 696 struct kvm_memory_slot old,
696 int user_alloc) 697 struct kvm_userspace_memory_region *mem,
698 int user_alloc)
697{ 699{
698 int i;
699 struct kvm_vcpu *vcpu;
700
701 /* A few sanity checks. We can have exactly one memory slot which has 700 /* A few sanity checks. We can have exactly one memory slot which has
702 to start at guest virtual zero and which has to be located at a 701 to start at guest virtual zero and which has to be located at a
703 page boundary in userland and which has to end at a page boundary. 702 page boundary in userland and which has to end at a page boundary.
@@ -720,14 +719,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
720 if (!user_alloc) 719 if (!user_alloc)
721 return -EINVAL; 720 return -EINVAL;
722 721
722 return 0;
723}
724
725void kvm_arch_commit_memory_region(struct kvm *kvm,
726 struct kvm_userspace_memory_region *mem,
727 struct kvm_memory_slot old,
728 int user_alloc)
729{
730 int i;
731 struct kvm_vcpu *vcpu;
732
723 /* request update of sie control block for all available vcpus */ 733 /* request update of sie control block for all available vcpus */
724 kvm_for_each_vcpu(i, vcpu, kvm) { 734 kvm_for_each_vcpu(i, vcpu, kvm) {
725 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) 735 if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
726 continue; 736 continue;
727 kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP); 737 kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
728 } 738 }
729
730 return 0;
731} 739}
732 740
733void kvm_arch_flush_shadow(struct kvm *kvm) 741void kvm_arch_flush_shadow(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 06cce8285ba0..60f09ab3672c 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -67,10 +67,14 @@ static inline long kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu)
67 67
68static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu) 68static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
69{ 69{
70 int idx;
70 struct kvm_memory_slot *mem; 71 struct kvm_memory_slot *mem;
72 struct kvm_memslots *memslots;
71 73
72 down_read(&vcpu->kvm->slots_lock); 74 idx = srcu_read_lock(&vcpu->kvm->srcu);
73 mem = &vcpu->kvm->memslots[0]; 75 memslots = rcu_dereference(vcpu->kvm->memslots);
76
77 mem = &memslots->memslots[0];
74 78
75 vcpu->arch.sie_block->gmsor = mem->userspace_addr; 79 vcpu->arch.sie_block->gmsor = mem->userspace_addr;
76 vcpu->arch.sie_block->gmslm = 80 vcpu->arch.sie_block->gmslm =
@@ -78,7 +82,7 @@ static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
78 (mem->npages << PAGE_SHIFT) + 82 (mem->npages << PAGE_SHIFT) +
79 VIRTIODESCSPACE - 1ul; 83 VIRTIODESCSPACE - 1ul;
80 84
81 up_read(&vcpu->kvm->slots_lock); 85 srcu_read_unlock(&vcpu->kvm->srcu, idx);
82} 86}
83 87
84/* implemented in priv.c */ 88/* implemented in priv.c */
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 9f828f87ca35..493092efaa3b 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -11,6 +11,7 @@ header-y += sigcontext32.h
11header-y += ucontext.h 11header-y += ucontext.h
12header-y += processor-flags.h 12header-y += processor-flags.h
13header-y += hw_breakpoint.h 13header-y += hw_breakpoint.h
14header-y += hyperv.h
14 15
15unifdef-y += e820.h 16unifdef-y += e820.h
16unifdef-y += ist.h 17unifdef-y += ist.h
diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h
new file mode 100644
index 000000000000..e153a2b3889a
--- /dev/null
+++ b/arch/x86/include/asm/hyperv.h
@@ -0,0 +1,186 @@
1#ifndef _ASM_X86_KVM_HYPERV_H
2#define _ASM_X86_KVM_HYPERV_H
3
4#include <linux/types.h>
5
6/*
7 * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
8 * is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
9 */
10#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
11#define HYPERV_CPUID_INTERFACE 0x40000001
12#define HYPERV_CPUID_VERSION 0x40000002
13#define HYPERV_CPUID_FEATURES 0x40000003
14#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
15#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
16
17/*
18 * Feature identification. EAX indicates which features are available
19 * to the partition based upon the current partition privileges.
20 */
21
22/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */
23#define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0)
24/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
25#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1)
26/*
27 * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
28 * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
29 */
30#define HV_X64_MSR_SYNIC_AVAILABLE (1 << 2)
31/*
32 * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through
33 * HV_X64_MSR_STIMER3_COUNT) available
34 */
35#define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3)
36/*
37 * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
38 * are available
39 */
40#define HV_X64_MSR_APIC_ACCESS_AVAILABLE (1 << 4)
41/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/
42#define HV_X64_MSR_HYPERCALL_AVAILABLE (1 << 5)
43/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/
44#define HV_X64_MSR_VP_INDEX_AVAILABLE (1 << 6)
45/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/
46#define HV_X64_MSR_RESET_AVAILABLE (1 << 7)
47 /*
48 * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE,
49 * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE,
50 * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available
51 */
52#define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8)
53
54/*
55 * Feature identification: EBX indicates which flags were specified at
56 * partition creation. The format is the same as the partition creation
57 * flag structure defined in section Partition Creation Flags.
58 */
59#define HV_X64_CREATE_PARTITIONS (1 << 0)
60#define HV_X64_ACCESS_PARTITION_ID (1 << 1)
61#define HV_X64_ACCESS_MEMORY_POOL (1 << 2)
62#define HV_X64_ADJUST_MESSAGE_BUFFERS (1 << 3)
63#define HV_X64_POST_MESSAGES (1 << 4)
64#define HV_X64_SIGNAL_EVENTS (1 << 5)
65#define HV_X64_CREATE_PORT (1 << 6)
66#define HV_X64_CONNECT_PORT (1 << 7)
67#define HV_X64_ACCESS_STATS (1 << 8)
68#define HV_X64_DEBUGGING (1 << 11)
69#define HV_X64_CPU_POWER_MANAGEMENT (1 << 12)
70#define HV_X64_CONFIGURE_PROFILER (1 << 13)
71
72/*
73 * Feature identification. EDX indicates which miscellaneous features
74 * are available to the partition.
75 */
76/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
77#define HV_X64_MWAIT_AVAILABLE (1 << 0)
78/* Guest debugging support is available */
79#define HV_X64_GUEST_DEBUGGING_AVAILABLE (1 << 1)
80/* Performance Monitor support is available*/
81#define HV_X64_PERF_MONITOR_AVAILABLE (1 << 2)
82/* Support for physical CPU dynamic partitioning events is available*/
83#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1 << 3)
84/*
85 * Support for passing hypercall input parameter block via XMM
86 * registers is available
87 */
88#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4)
89/* Support for a virtual guest idle state is available */
90#define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5)
91
92/*
93 * Implementation recommendations. Indicates which behaviors the hypervisor
94 * recommends the OS implement for optimal performance.
95 */
96 /*
97 * Recommend using hypercall for address space switches rather
98 * than MOV to CR3 instruction
99 */
100#define HV_X64_MWAIT_RECOMMENDED (1 << 0)
101/* Recommend using hypercall for local TLB flushes rather
102 * than INVLPG or MOV to CR3 instructions */
103#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED (1 << 1)
104/*
105 * Recommend using hypercall for remote TLB flushes rather
106 * than inter-processor interrupts
107 */
108#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED (1 << 2)
109/*
110 * Recommend using MSRs for accessing APIC registers
111 * EOI, ICR and TPR rather than their memory-mapped counterparts
112 */
113#define HV_X64_APIC_ACCESS_RECOMMENDED (1 << 3)
114/* Recommend using the hypervisor-provided MSR to initiate a system RESET */
115#define HV_X64_SYSTEM_RESET_RECOMMENDED (1 << 4)
116/*
117 * Recommend using relaxed timing for this partition. If used,
118 * the VM should disable any watchdog timeouts that rely on the
119 * timely delivery of external interrupts
120 */
121#define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5)
122
123/* MSR used to identify the guest OS. */
124#define HV_X64_MSR_GUEST_OS_ID 0x40000000
125
126/* MSR used to setup pages used to communicate with the hypervisor. */
127#define HV_X64_MSR_HYPERCALL 0x40000001
128
129/* MSR used to provide vcpu index */
130#define HV_X64_MSR_VP_INDEX 0x40000002
131
132/* Define the virtual APIC registers */
133#define HV_X64_MSR_EOI 0x40000070
134#define HV_X64_MSR_ICR 0x40000071
135#define HV_X64_MSR_TPR 0x40000072
136#define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073
137
138/* Define synthetic interrupt controller model specific registers. */
139#define HV_X64_MSR_SCONTROL 0x40000080
140#define HV_X64_MSR_SVERSION 0x40000081
141#define HV_X64_MSR_SIEFP 0x40000082
142#define HV_X64_MSR_SIMP 0x40000083
143#define HV_X64_MSR_EOM 0x40000084
144#define HV_X64_MSR_SINT0 0x40000090
145#define HV_X64_MSR_SINT1 0x40000091
146#define HV_X64_MSR_SINT2 0x40000092
147#define HV_X64_MSR_SINT3 0x40000093
148#define HV_X64_MSR_SINT4 0x40000094
149#define HV_X64_MSR_SINT5 0x40000095
150#define HV_X64_MSR_SINT6 0x40000096
151#define HV_X64_MSR_SINT7 0x40000097
152#define HV_X64_MSR_SINT8 0x40000098
153#define HV_X64_MSR_SINT9 0x40000099
154#define HV_X64_MSR_SINT10 0x4000009A
155#define HV_X64_MSR_SINT11 0x4000009B
156#define HV_X64_MSR_SINT12 0x4000009C
157#define HV_X64_MSR_SINT13 0x4000009D
158#define HV_X64_MSR_SINT14 0x4000009E
159#define HV_X64_MSR_SINT15 0x4000009F
160
161
162#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
163#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12
164#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
165 (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
166
167/* Declare the various hypercall operations. */
168#define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008
169
170#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001
171#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12
172#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \
173 (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
174
175#define HV_PROCESSOR_POWER_STATE_C0 0
176#define HV_PROCESSOR_POWER_STATE_C1 1
177#define HV_PROCESSOR_POWER_STATE_C2 2
178#define HV_PROCESSOR_POWER_STATE_C3 3
179
180/* hypercall status code */
181#define HV_STATUS_SUCCESS 0
182#define HV_STATUS_INVALID_HYPERCALL_CODE 2
183#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
184#define HV_STATUS_INVALID_ALIGNMENT 4
185
186#endif
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 7c18e1230f54..7a6f54fa13ba 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -54,13 +54,23 @@ struct x86_emulate_ctxt;
54struct x86_emulate_ops { 54struct x86_emulate_ops {
55 /* 55 /*
56 * read_std: Read bytes of standard (non-emulated/special) memory. 56 * read_std: Read bytes of standard (non-emulated/special) memory.
57 * Used for instruction fetch, stack operations, and others. 57 * Used for descriptor reading.
58 * @addr: [IN ] Linear address from which to read. 58 * @addr: [IN ] Linear address from which to read.
59 * @val: [OUT] Value read from memory, zero-extended to 'u_long'. 59 * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
60 * @bytes: [IN ] Number of bytes to read from memory. 60 * @bytes: [IN ] Number of bytes to read from memory.
61 */ 61 */
62 int (*read_std)(unsigned long addr, void *val, 62 int (*read_std)(unsigned long addr, void *val,
63 unsigned int bytes, struct kvm_vcpu *vcpu); 63 unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
64
65 /*
66 * fetch: Read bytes of standard (non-emulated/special) memory.
67 * Used for instruction fetch.
68 * @addr: [IN ] Linear address from which to read.
69 * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
70 * @bytes: [IN ] Number of bytes to read from memory.
71 */
72 int (*fetch)(unsigned long addr, void *val,
73 unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
64 74
65 /* 75 /*
66 * read_emulated: Read bytes from emulated/special memory area. 76 * read_emulated: Read bytes from emulated/special memory area.
@@ -74,7 +84,7 @@ struct x86_emulate_ops {
74 struct kvm_vcpu *vcpu); 84 struct kvm_vcpu *vcpu);
75 85
76 /* 86 /*
77 * write_emulated: Read bytes from emulated/special memory area. 87 * write_emulated: Write bytes to emulated/special memory area.
78 * @addr: [IN ] Linear address to which to write. 88 * @addr: [IN ] Linear address to which to write.
79 * @val: [IN ] Value to write to memory (low-order bytes used as 89 * @val: [IN ] Value to write to memory (low-order bytes used as
80 * required). 90 * required).
@@ -168,6 +178,7 @@ struct x86_emulate_ctxt {
168 178
169/* Execution mode, passed to the emulator. */ 179/* Execution mode, passed to the emulator. */
170#define X86EMUL_MODE_REAL 0 /* Real mode. */ 180#define X86EMUL_MODE_REAL 0 /* Real mode. */
181#define X86EMUL_MODE_VM86 1 /* Virtual 8086 mode. */
171#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ 182#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */
172#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ 183#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */
173#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ 184#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4f865e8b8540..06d9e79ca37d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -25,7 +25,7 @@
25#include <asm/mtrr.h> 25#include <asm/mtrr.h>
26#include <asm/msr-index.h> 26#include <asm/msr-index.h>
27 27
28#define KVM_MAX_VCPUS 16 28#define KVM_MAX_VCPUS 64
29#define KVM_MEMORY_SLOTS 32 29#define KVM_MEMORY_SLOTS 32
30/* memory slots that does not exposed to userspace */ 30/* memory slots that does not exposed to userspace */
31#define KVM_PRIVATE_MEM_SLOTS 4 31#define KVM_PRIVATE_MEM_SLOTS 4
@@ -38,19 +38,6 @@
38#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ 38#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
39 0xFFFFFF0000000000ULL) 39 0xFFFFFF0000000000ULL)
40 40
41#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
42 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
43#define KVM_GUEST_CR0_MASK \
44 (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
45#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
46 (X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
47#define KVM_VM_CR0_ALWAYS_ON \
48 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
49#define KVM_GUEST_CR4_MASK \
50 (X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
51#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
52#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
53
54#define INVALID_PAGE (~(hpa_t)0) 41#define INVALID_PAGE (~(hpa_t)0)
55#define UNMAPPED_GVA (~(gpa_t)0) 42#define UNMAPPED_GVA (~(gpa_t)0)
56 43
@@ -256,7 +243,8 @@ struct kvm_mmu {
256 void (*new_cr3)(struct kvm_vcpu *vcpu); 243 void (*new_cr3)(struct kvm_vcpu *vcpu);
257 int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); 244 int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
258 void (*free)(struct kvm_vcpu *vcpu); 245 void (*free)(struct kvm_vcpu *vcpu);
259 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); 246 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
247 u32 *error);
260 void (*prefetch_page)(struct kvm_vcpu *vcpu, 248 void (*prefetch_page)(struct kvm_vcpu *vcpu,
261 struct kvm_mmu_page *page); 249 struct kvm_mmu_page *page);
262 int (*sync_page)(struct kvm_vcpu *vcpu, 250 int (*sync_page)(struct kvm_vcpu *vcpu,
@@ -282,13 +270,15 @@ struct kvm_vcpu_arch {
282 u32 regs_dirty; 270 u32 regs_dirty;
283 271
284 unsigned long cr0; 272 unsigned long cr0;
273 unsigned long cr0_guest_owned_bits;
285 unsigned long cr2; 274 unsigned long cr2;
286 unsigned long cr3; 275 unsigned long cr3;
287 unsigned long cr4; 276 unsigned long cr4;
277 unsigned long cr4_guest_owned_bits;
288 unsigned long cr8; 278 unsigned long cr8;
289 u32 hflags; 279 u32 hflags;
290 u64 pdptrs[4]; /* pae */ 280 u64 pdptrs[4]; /* pae */
291 u64 shadow_efer; 281 u64 efer;
292 u64 apic_base; 282 u64 apic_base;
293 struct kvm_lapic *apic; /* kernel irqchip context */ 283 struct kvm_lapic *apic; /* kernel irqchip context */
294 int32_t apic_arb_prio; 284 int32_t apic_arb_prio;
@@ -374,17 +364,27 @@ struct kvm_vcpu_arch {
374 /* used for guest single stepping over the given code position */ 364 /* used for guest single stepping over the given code position */
375 u16 singlestep_cs; 365 u16 singlestep_cs;
376 unsigned long singlestep_rip; 366 unsigned long singlestep_rip;
367 /* fields used by HYPER-V emulation */
368 u64 hv_vapic;
377}; 369};
378 370
379struct kvm_mem_alias { 371struct kvm_mem_alias {
380 gfn_t base_gfn; 372 gfn_t base_gfn;
381 unsigned long npages; 373 unsigned long npages;
382 gfn_t target_gfn; 374 gfn_t target_gfn;
375#define KVM_ALIAS_INVALID 1UL
376 unsigned long flags;
383}; 377};
384 378
385struct kvm_arch{ 379#define KVM_ARCH_HAS_UNALIAS_INSTANTIATION
386 int naliases; 380
381struct kvm_mem_aliases {
387 struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; 382 struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
383 int naliases;
384};
385
386struct kvm_arch {
387 struct kvm_mem_aliases *aliases;
388 388
389 unsigned int n_free_mmu_pages; 389 unsigned int n_free_mmu_pages;
390 unsigned int n_requested_mmu_pages; 390 unsigned int n_requested_mmu_pages;
@@ -416,6 +416,10 @@ struct kvm_arch{
416 s64 kvmclock_offset; 416 s64 kvmclock_offset;
417 417
418 struct kvm_xen_hvm_config xen_hvm_config; 418 struct kvm_xen_hvm_config xen_hvm_config;
419
420 /* fields used by HYPER-V emulation */
421 u64 hv_guest_os_id;
422 u64 hv_hypercall;
419}; 423};
420 424
421struct kvm_vm_stat { 425struct kvm_vm_stat {
@@ -471,6 +475,7 @@ struct kvm_x86_ops {
471 int (*hardware_setup)(void); /* __init */ 475 int (*hardware_setup)(void); /* __init */
472 void (*hardware_unsetup)(void); /* __exit */ 476 void (*hardware_unsetup)(void); /* __exit */
473 bool (*cpu_has_accelerated_tpr)(void); 477 bool (*cpu_has_accelerated_tpr)(void);
478 void (*cpuid_update)(struct kvm_vcpu *vcpu);
474 479
475 /* Create, but do not attach this VCPU */ 480 /* Create, but do not attach this VCPU */
476 struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); 481 struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
@@ -492,6 +497,7 @@ struct kvm_x86_ops {
492 void (*set_segment)(struct kvm_vcpu *vcpu, 497 void (*set_segment)(struct kvm_vcpu *vcpu,
493 struct kvm_segment *var, int seg); 498 struct kvm_segment *var, int seg);
494 void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); 499 void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
500 void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
495 void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); 501 void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
496 void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); 502 void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
497 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); 503 void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
@@ -501,12 +507,13 @@ struct kvm_x86_ops {
501 void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 507 void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
502 void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 508 void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
503 void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); 509 void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
504 unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr); 510 int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest);
505 void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value, 511 int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value);
506 int *exception);
507 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); 512 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
508 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 513 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
509 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); 514 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
515 void (*fpu_activate)(struct kvm_vcpu *vcpu);
516 void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
510 517
511 void (*tlb_flush)(struct kvm_vcpu *vcpu); 518 void (*tlb_flush)(struct kvm_vcpu *vcpu);
512 519
@@ -531,7 +538,8 @@ struct kvm_x86_ops {
531 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); 538 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
532 int (*get_tdp_level)(void); 539 int (*get_tdp_level)(void);
533 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); 540 u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
534 bool (*gb_page_enable)(void); 541 int (*get_lpage_level)(void);
542 bool (*rdtscp_supported)(void);
535 543
536 const struct trace_print_flags *exit_reasons_str; 544 const struct trace_print_flags *exit_reasons_str;
537}; 545};
@@ -606,8 +614,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
606 unsigned long value); 614 unsigned long value);
607 615
608void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); 616void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
609int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 617int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
610 int type_bits, int seg);
611 618
612int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); 619int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason);
613 620
@@ -653,6 +660,10 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
653int kvm_mmu_load(struct kvm_vcpu *vcpu); 660int kvm_mmu_load(struct kvm_vcpu *vcpu);
654void kvm_mmu_unload(struct kvm_vcpu *vcpu); 661void kvm_mmu_unload(struct kvm_vcpu *vcpu);
655void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); 662void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
663gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
664gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
665gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
666gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
656 667
657int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); 668int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
658 669
@@ -666,6 +677,7 @@ void kvm_disable_tdp(void);
666 677
667int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); 678int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
668int complete_pio(struct kvm_vcpu *vcpu); 679int complete_pio(struct kvm_vcpu *vcpu);
680bool kvm_check_iopl(struct kvm_vcpu *vcpu);
669 681
670struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); 682struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
671 683
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index c584076a47f4..ffae1420e7d7 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -2,6 +2,7 @@
2#define _ASM_X86_KVM_PARA_H 2#define _ASM_X86_KVM_PARA_H
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <asm/hyperv.h>
5 6
6/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It 7/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
7 * should be used to determine that a VM is running under KVM. 8 * should be used to determine that a VM is running under KVM.
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1fecb7e61130..38638cd2fa4c 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -313,7 +313,7 @@ struct __attribute__ ((__packed__)) vmcb {
313 313
314#define SVM_EXIT_ERR -1 314#define SVM_EXIT_ERR -1
315 315
316#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */ 316#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
317 317
318#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" 318#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
319#define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8" 319#define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 2b4945419a84..fb9a080740ec 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -53,6 +53,7 @@
53 */ 53 */
54#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 54#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
55#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 55#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
56#define SECONDARY_EXEC_RDTSCP 0x00000008
56#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 57#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
57#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 58#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
58#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 59#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
@@ -251,6 +252,7 @@ enum vmcs_field {
251#define EXIT_REASON_MSR_READ 31 252#define EXIT_REASON_MSR_READ 31
252#define EXIT_REASON_MSR_WRITE 32 253#define EXIT_REASON_MSR_WRITE 32
253#define EXIT_REASON_MWAIT_INSTRUCTION 36 254#define EXIT_REASON_MWAIT_INSTRUCTION 36
255#define EXIT_REASON_MONITOR_INSTRUCTION 39
254#define EXIT_REASON_PAUSE_INSTRUCTION 40 256#define EXIT_REASON_PAUSE_INSTRUCTION 40
255#define EXIT_REASON_MCE_DURING_VMENTRY 41 257#define EXIT_REASON_MCE_DURING_VMENTRY 41
256#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 258#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
@@ -362,6 +364,7 @@ enum vmcs_field {
362#define VMX_EPTP_UC_BIT (1ull << 8) 364#define VMX_EPTP_UC_BIT (1ull << 8)
363#define VMX_EPTP_WB_BIT (1ull << 14) 365#define VMX_EPTP_WB_BIT (1ull << 14)
364#define VMX_EPT_2MB_PAGE_BIT (1ull << 16) 366#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
367#define VMX_EPT_1GB_PAGE_BIT (1ull << 17)
365#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) 368#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
366#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) 369#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
367#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) 370#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
@@ -374,7 +377,7 @@ enum vmcs_field {
374#define VMX_EPT_READABLE_MASK 0x1ull 377#define VMX_EPT_READABLE_MASK 0x1ull
375#define VMX_EPT_WRITABLE_MASK 0x2ull 378#define VMX_EPT_WRITABLE_MASK 0x2ull
376#define VMX_EPT_EXECUTABLE_MASK 0x4ull 379#define VMX_EPT_EXECUTABLE_MASK 0x4ull
377#define VMX_EPT_IGMT_BIT (1ull << 6) 380#define VMX_EPT_IPAT_BIT (1ull << 6)
378 381
379#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul 382#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
380 383
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 9055e5872ff0..1c0c6ab9c60f 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -301,7 +301,8 @@ static int __init vsyscall_init(void)
301 register_sysctl_table(kernel_root_table2); 301 register_sysctl_table(kernel_root_table2);
302#endif 302#endif
303 on_each_cpu(cpu_vsyscall_init, NULL, 1); 303 on_each_cpu(cpu_vsyscall_init, NULL, 1);
304 hotcpu_notifier(cpu_vsyscall_notifier, 0); 304 /* notifier priority > KVM */
305 hotcpu_notifier(cpu_vsyscall_notifier, 30);
305 return 0; 306 return 0;
306} 307}
307 308
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 3c4d0109ad20..970bbd479516 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -29,6 +29,7 @@ config KVM
29 select HAVE_KVM_EVENTFD 29 select HAVE_KVM_EVENTFD
30 select KVM_APIC_ARCHITECTURE 30 select KVM_APIC_ARCHITECTURE
31 select USER_RETURN_NOTIFIER 31 select USER_RETURN_NOTIFIER
32 select KVM_MMIO
32 ---help--- 33 ---help---
33 Support hosting fully virtualized guest machines using hardware 34 Support hosting fully virtualized guest machines using hardware
34 virtualization extensions. You will need a fairly recent 35 virtualization extensions. You will need a fairly recent
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 7e8faea4651e..4dade6ac0827 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -32,7 +32,7 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <asm/kvm_emulate.h> 33#include <asm/kvm_emulate.h>
34 34
35#include "mmu.h" /* for is_long_mode() */ 35#include "x86.h"
36 36
37/* 37/*
38 * Opcode effective-address decode tables. 38 * Opcode effective-address decode tables.
@@ -76,6 +76,8 @@
76#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ 76#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
77#define GroupMask 0xff /* Group number stored in bits 0:7 */ 77#define GroupMask 0xff /* Group number stored in bits 0:7 */
78/* Misc flags */ 78/* Misc flags */
79#define Lock (1<<26) /* lock prefix is allowed for the instruction */
80#define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
79#define No64 (1<<28) 81#define No64 (1<<28)
80/* Source 2 operand type */ 82/* Source 2 operand type */
81#define Src2None (0<<29) 83#define Src2None (0<<29)
@@ -88,39 +90,40 @@
88enum { 90enum {
89 Group1_80, Group1_81, Group1_82, Group1_83, 91 Group1_80, Group1_81, Group1_82, Group1_83,
90 Group1A, Group3_Byte, Group3, Group4, Group5, Group7, 92 Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
93 Group8, Group9,
91}; 94};
92 95
93static u32 opcode_table[256] = { 96static u32 opcode_table[256] = {
94 /* 0x00 - 0x07 */ 97 /* 0x00 - 0x07 */
95 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 98 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
96 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 99 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
97 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 100 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
98 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, 101 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
99 /* 0x08 - 0x0F */ 102 /* 0x08 - 0x0F */
100 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 103 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
101 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 104 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
102 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 105 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
103 ImplicitOps | Stack | No64, 0, 106 ImplicitOps | Stack | No64, 0,
104 /* 0x10 - 0x17 */ 107 /* 0x10 - 0x17 */
105 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 108 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
106 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 109 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
107 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 110 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
108 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, 111 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
109 /* 0x18 - 0x1F */ 112 /* 0x18 - 0x1F */
110 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 113 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
111 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 114 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
112 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 115 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
113 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, 116 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
114 /* 0x20 - 0x27 */ 117 /* 0x20 - 0x27 */
115 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 118 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
116 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 119 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
117 DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, 120 DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
118 /* 0x28 - 0x2F */ 121 /* 0x28 - 0x2F */
119 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 122 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
120 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 123 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
121 0, 0, 0, 0, 124 0, 0, 0, 0,
122 /* 0x30 - 0x37 */ 125 /* 0x30 - 0x37 */
123 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 126 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
124 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, 127 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
125 0, 0, 0, 0, 128 0, 0, 0, 0,
126 /* 0x38 - 0x3F */ 129 /* 0x38 - 0x3F */
@@ -156,7 +159,7 @@ static u32 opcode_table[256] = {
156 Group | Group1_80, Group | Group1_81, 159 Group | Group1_80, Group | Group1_81,
157 Group | Group1_82, Group | Group1_83, 160 Group | Group1_82, Group | Group1_83,
158 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 161 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
159 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 162 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
160 /* 0x88 - 0x8F */ 163 /* 0x88 - 0x8F */
161 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, 164 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
162 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, 165 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
@@ -210,7 +213,7 @@ static u32 opcode_table[256] = {
210 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, 213 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
211 /* 0xF0 - 0xF7 */ 214 /* 0xF0 - 0xF7 */
212 0, 0, 0, 0, 215 0, 0, 0, 0,
213 ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3, 216 ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
214 /* 0xF8 - 0xFF */ 217 /* 0xF8 - 0xFF */
215 ImplicitOps, 0, ImplicitOps, ImplicitOps, 218 ImplicitOps, 0, ImplicitOps, ImplicitOps,
216 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, 219 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
@@ -218,16 +221,20 @@ static u32 opcode_table[256] = {
218 221
219static u32 twobyte_table[256] = { 222static u32 twobyte_table[256] = {
220 /* 0x00 - 0x0F */ 223 /* 0x00 - 0x0F */
221 0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0, 224 0, Group | GroupDual | Group7, 0, 0,
222 ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 225 0, ImplicitOps, ImplicitOps | Priv, 0,
226 ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
227 0, ImplicitOps | ModRM, 0, 0,
223 /* 0x10 - 0x1F */ 228 /* 0x10 - 0x1F */
224 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, 229 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
225 /* 0x20 - 0x2F */ 230 /* 0x20 - 0x2F */
226 ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0, 231 ModRM | ImplicitOps | Priv, ModRM | Priv,
232 ModRM | ImplicitOps | Priv, ModRM | Priv,
233 0, 0, 0, 0,
227 0, 0, 0, 0, 0, 0, 0, 0, 234 0, 0, 0, 0, 0, 0, 0, 0,
228 /* 0x30 - 0x3F */ 235 /* 0x30 - 0x3F */
229 ImplicitOps, 0, ImplicitOps, 0, 236 ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
230 ImplicitOps, ImplicitOps, 0, 0, 237 ImplicitOps, ImplicitOps | Priv, 0, 0,
231 0, 0, 0, 0, 0, 0, 0, 0, 238 0, 0, 0, 0, 0, 0, 0, 0,
232 /* 0x40 - 0x47 */ 239 /* 0x40 - 0x47 */
233 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, 240 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
@@ -257,21 +264,23 @@ static u32 twobyte_table[256] = {
257 DstMem | SrcReg | Src2CL | ModRM, 0, 0, 264 DstMem | SrcReg | Src2CL | ModRM, 0, 0,
258 /* 0xA8 - 0xAF */ 265 /* 0xA8 - 0xAF */
259 ImplicitOps | Stack, ImplicitOps | Stack, 266 ImplicitOps | Stack, ImplicitOps | Stack,
260 0, DstMem | SrcReg | ModRM | BitOp, 267 0, DstMem | SrcReg | ModRM | BitOp | Lock,
261 DstMem | SrcReg | Src2ImmByte | ModRM, 268 DstMem | SrcReg | Src2ImmByte | ModRM,
262 DstMem | SrcReg | Src2CL | ModRM, 269 DstMem | SrcReg | Src2CL | ModRM,
263 ModRM, 0, 270 ModRM, 0,
264 /* 0xB0 - 0xB7 */ 271 /* 0xB0 - 0xB7 */
265 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, 272 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
266 DstMem | SrcReg | ModRM | BitOp, 273 0, DstMem | SrcReg | ModRM | BitOp | Lock,
267 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, 274 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
268 DstReg | SrcMem16 | ModRM | Mov, 275 DstReg | SrcMem16 | ModRM | Mov,
269 /* 0xB8 - 0xBF */ 276 /* 0xB8 - 0xBF */
270 0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp, 277 0, 0,
278 Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
271 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, 279 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
272 DstReg | SrcMem16 | ModRM | Mov, 280 DstReg | SrcMem16 | ModRM | Mov,
273 /* 0xC0 - 0xCF */ 281 /* 0xC0 - 0xCF */
274 0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM, 282 0, 0, 0, DstMem | SrcReg | ModRM | Mov,
283 0, 0, 0, Group | GroupDual | Group9,
275 0, 0, 0, 0, 0, 0, 0, 0, 284 0, 0, 0, 0, 0, 0, 0, 0,
276 /* 0xD0 - 0xDF */ 285 /* 0xD0 - 0xDF */
277 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 286 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -283,25 +292,41 @@ static u32 twobyte_table[256] = {
283 292
284static u32 group_table[] = { 293static u32 group_table[] = {
285 [Group1_80*8] = 294 [Group1_80*8] =
286 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 295 ByteOp | DstMem | SrcImm | ModRM | Lock,
287 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 296 ByteOp | DstMem | SrcImm | ModRM | Lock,
288 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 297 ByteOp | DstMem | SrcImm | ModRM | Lock,
289 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 298 ByteOp | DstMem | SrcImm | ModRM | Lock,
299 ByteOp | DstMem | SrcImm | ModRM | Lock,
300 ByteOp | DstMem | SrcImm | ModRM | Lock,
301 ByteOp | DstMem | SrcImm | ModRM | Lock,
302 ByteOp | DstMem | SrcImm | ModRM,
290 [Group1_81*8] = 303 [Group1_81*8] =
291 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 304 DstMem | SrcImm | ModRM | Lock,
292 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 305 DstMem | SrcImm | ModRM | Lock,
293 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 306 DstMem | SrcImm | ModRM | Lock,
294 DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, 307 DstMem | SrcImm | ModRM | Lock,
308 DstMem | SrcImm | ModRM | Lock,
309 DstMem | SrcImm | ModRM | Lock,
310 DstMem | SrcImm | ModRM | Lock,
311 DstMem | SrcImm | ModRM,
295 [Group1_82*8] = 312 [Group1_82*8] =
296 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 313 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
297 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 314 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
298 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 315 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
299 ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, 316 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
317 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
318 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
319 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
320 ByteOp | DstMem | SrcImm | ModRM | No64,
300 [Group1_83*8] = 321 [Group1_83*8] =
301 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, 322 DstMem | SrcImmByte | ModRM | Lock,
302 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, 323 DstMem | SrcImmByte | ModRM | Lock,
303 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, 324 DstMem | SrcImmByte | ModRM | Lock,
304 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, 325 DstMem | SrcImmByte | ModRM | Lock,
326 DstMem | SrcImmByte | ModRM | Lock,
327 DstMem | SrcImmByte | ModRM | Lock,
328 DstMem | SrcImmByte | ModRM | Lock,
329 DstMem | SrcImmByte | ModRM,
305 [Group1A*8] = 330 [Group1A*8] =
306 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, 331 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
307 [Group3_Byte*8] = 332 [Group3_Byte*8] =
@@ -320,24 +345,39 @@ static u32 group_table[] = {
320 SrcMem | ModRM | Stack, 0, 345 SrcMem | ModRM | Stack, 0,
321 SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, 346 SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0,
322 [Group7*8] = 347 [Group7*8] =
323 0, 0, ModRM | SrcMem, ModRM | SrcMem, 348 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
324 SrcNone | ModRM | DstMem | Mov, 0, 349 SrcNone | ModRM | DstMem | Mov, 0,
325 SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, 350 SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
351 [Group8*8] =
352 0, 0, 0, 0,
353 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
354 DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
355 [Group9*8] =
356 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0,
326}; 357};
327 358
328static u32 group2_table[] = { 359static u32 group2_table[] = {
329 [Group7*8] = 360 [Group7*8] =
330 SrcNone | ModRM, 0, 0, SrcNone | ModRM, 361 SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM,
331 SrcNone | ModRM | DstMem | Mov, 0, 362 SrcNone | ModRM | DstMem | Mov, 0,
332 SrcMem16 | ModRM | Mov, 0, 363 SrcMem16 | ModRM | Mov, 0,
364 [Group9*8] =
365 0, 0, 0, 0, 0, 0, 0, 0,
333}; 366};
334 367
335/* EFLAGS bit definitions. */ 368/* EFLAGS bit definitions. */
369#define EFLG_ID (1<<21)
370#define EFLG_VIP (1<<20)
371#define EFLG_VIF (1<<19)
372#define EFLG_AC (1<<18)
336#define EFLG_VM (1<<17) 373#define EFLG_VM (1<<17)
337#define EFLG_RF (1<<16) 374#define EFLG_RF (1<<16)
375#define EFLG_IOPL (3<<12)
376#define EFLG_NT (1<<14)
338#define EFLG_OF (1<<11) 377#define EFLG_OF (1<<11)
339#define EFLG_DF (1<<10) 378#define EFLG_DF (1<<10)
340#define EFLG_IF (1<<9) 379#define EFLG_IF (1<<9)
380#define EFLG_TF (1<<8)
341#define EFLG_SF (1<<7) 381#define EFLG_SF (1<<7)
342#define EFLG_ZF (1<<6) 382#define EFLG_ZF (1<<6)
343#define EFLG_AF (1<<4) 383#define EFLG_AF (1<<4)
@@ -606,7 +646,7 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
606 646
607 if (linear < fc->start || linear >= fc->end) { 647 if (linear < fc->start || linear >= fc->end) {
608 size = min(15UL, PAGE_SIZE - offset_in_page(linear)); 648 size = min(15UL, PAGE_SIZE - offset_in_page(linear));
609 rc = ops->read_std(linear, fc->data, size, ctxt->vcpu); 649 rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL);
610 if (rc) 650 if (rc)
611 return rc; 651 return rc;
612 fc->start = linear; 652 fc->start = linear;
@@ -661,11 +701,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
661 op_bytes = 3; 701 op_bytes = 3;
662 *address = 0; 702 *address = 0;
663 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, 703 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
664 ctxt->vcpu); 704 ctxt->vcpu, NULL);
665 if (rc) 705 if (rc)
666 return rc; 706 return rc;
667 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, 707 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
668 ctxt->vcpu); 708 ctxt->vcpu, NULL);
669 return rc; 709 return rc;
670} 710}
671 711
@@ -889,6 +929,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
889 929
890 switch (mode) { 930 switch (mode) {
891 case X86EMUL_MODE_REAL: 931 case X86EMUL_MODE_REAL:
932 case X86EMUL_MODE_VM86:
892 case X86EMUL_MODE_PROT16: 933 case X86EMUL_MODE_PROT16:
893 def_op_bytes = def_ad_bytes = 2; 934 def_op_bytes = def_ad_bytes = 2;
894 break; 935 break;
@@ -975,7 +1016,7 @@ done_prefixes:
975 } 1016 }
976 1017
977 if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { 1018 if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
978 kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");; 1019 kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");
979 return -1; 1020 return -1;
980 } 1021 }
981 1022
@@ -1196,13 +1237,56 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1196 rc = ops->read_emulated(register_address(c, ss_base(ctxt), 1237 rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1197 c->regs[VCPU_REGS_RSP]), 1238 c->regs[VCPU_REGS_RSP]),
1198 dest, len, ctxt->vcpu); 1239 dest, len, ctxt->vcpu);
1199 if (rc != 0) 1240 if (rc != X86EMUL_CONTINUE)
1200 return rc; 1241 return rc;
1201 1242
1202 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len); 1243 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1203 return rc; 1244 return rc;
1204} 1245}
1205 1246
1247static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1248 struct x86_emulate_ops *ops,
1249 void *dest, int len)
1250{
1251 int rc;
1252 unsigned long val, change_mask;
1253 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1254 int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu);
1255
1256 rc = emulate_pop(ctxt, ops, &val, len);
1257 if (rc != X86EMUL_CONTINUE)
1258 return rc;
1259
1260 change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1261 | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1262
1263 switch(ctxt->mode) {
1264 case X86EMUL_MODE_PROT64:
1265 case X86EMUL_MODE_PROT32:
1266 case X86EMUL_MODE_PROT16:
1267 if (cpl == 0)
1268 change_mask |= EFLG_IOPL;
1269 if (cpl <= iopl)
1270 change_mask |= EFLG_IF;
1271 break;
1272 case X86EMUL_MODE_VM86:
1273 if (iopl < 3) {
1274 kvm_inject_gp(ctxt->vcpu, 0);
1275 return X86EMUL_PROPAGATE_FAULT;
1276 }
1277 change_mask |= EFLG_IF;
1278 break;
1279 default: /* real mode */
1280 change_mask |= (EFLG_IOPL | EFLG_IF);
1281 break;
1282 }
1283
1284 *(unsigned long *)dest =
1285 (ctxt->eflags & ~change_mask) | (val & change_mask);
1286
1287 return rc;
1288}
1289
1206static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) 1290static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
1207{ 1291{
1208 struct decode_cache *c = &ctxt->decode; 1292 struct decode_cache *c = &ctxt->decode;
@@ -1225,7 +1309,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1225 if (rc != 0) 1309 if (rc != 0)
1226 return rc; 1310 return rc;
1227 1311
1228 rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg); 1312 rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg);
1229 return rc; 1313 return rc;
1230} 1314}
1231 1315
@@ -1370,7 +1454,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1370 int rc; 1454 int rc;
1371 1455
1372 rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); 1456 rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu);
1373 if (rc != 0) 1457 if (rc != X86EMUL_CONTINUE)
1374 return rc; 1458 return rc;
1375 1459
1376 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || 1460 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
@@ -1385,7 +1469,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1385 (u32) c->regs[VCPU_REGS_RBX]; 1469 (u32) c->regs[VCPU_REGS_RBX];
1386 1470
1387 rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); 1471 rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu);
1388 if (rc != 0) 1472 if (rc != X86EMUL_CONTINUE)
1389 return rc; 1473 return rc;
1390 ctxt->eflags |= EFLG_ZF; 1474 ctxt->eflags |= EFLG_ZF;
1391 } 1475 }
@@ -1407,7 +1491,7 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1407 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); 1491 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1408 if (rc) 1492 if (rc)
1409 return rc; 1493 return rc;
1410 rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS); 1494 rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS);
1411 return rc; 1495 return rc;
1412} 1496}
1413 1497
@@ -1451,7 +1535,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
1451 &c->dst.val, 1535 &c->dst.val,
1452 c->dst.bytes, 1536 c->dst.bytes,
1453 ctxt->vcpu); 1537 ctxt->vcpu);
1454 if (rc != 0) 1538 if (rc != X86EMUL_CONTINUE)
1455 return rc; 1539 return rc;
1456 break; 1540 break;
1457 case OP_NONE: 1541 case OP_NONE:
@@ -1514,9 +1598,8 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)
1514 u64 msr_data; 1598 u64 msr_data;
1515 1599
1516 /* syscall is not available in real mode */ 1600 /* syscall is not available in real mode */
1517 if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL 1601 if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86)
1518 || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) 1602 return X86EMUL_UNHANDLEABLE;
1519 return -1;
1520 1603
1521 setup_syscalls_segments(ctxt, &cs, &ss); 1604 setup_syscalls_segments(ctxt, &cs, &ss);
1522 1605
@@ -1553,7 +1636,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)
1553 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); 1636 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1554 } 1637 }
1555 1638
1556 return 0; 1639 return X86EMUL_CONTINUE;
1557} 1640}
1558 1641
1559static int 1642static int
@@ -1563,22 +1646,17 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1563 struct kvm_segment cs, ss; 1646 struct kvm_segment cs, ss;
1564 u64 msr_data; 1647 u64 msr_data;
1565 1648
1566 /* inject #UD if LOCK prefix is used */ 1649 /* inject #GP if in real mode */
1567 if (c->lock_prefix) 1650 if (ctxt->mode == X86EMUL_MODE_REAL) {
1568 return -1;
1569
1570 /* inject #GP if in real mode or paging is disabled */
1571 if (ctxt->mode == X86EMUL_MODE_REAL ||
1572 !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
1573 kvm_inject_gp(ctxt->vcpu, 0); 1651 kvm_inject_gp(ctxt->vcpu, 0);
1574 return -1; 1652 return X86EMUL_UNHANDLEABLE;
1575 } 1653 }
1576 1654
1577 /* XXX sysenter/sysexit have not been tested in 64bit mode. 1655 /* XXX sysenter/sysexit have not been tested in 64bit mode.
1578 * Therefore, we inject an #UD. 1656 * Therefore, we inject an #UD.
1579 */ 1657 */
1580 if (ctxt->mode == X86EMUL_MODE_PROT64) 1658 if (ctxt->mode == X86EMUL_MODE_PROT64)
1581 return -1; 1659 return X86EMUL_UNHANDLEABLE;
1582 1660
1583 setup_syscalls_segments(ctxt, &cs, &ss); 1661 setup_syscalls_segments(ctxt, &cs, &ss);
1584 1662
@@ -1587,13 +1665,13 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1587 case X86EMUL_MODE_PROT32: 1665 case X86EMUL_MODE_PROT32:
1588 if ((msr_data & 0xfffc) == 0x0) { 1666 if ((msr_data & 0xfffc) == 0x0) {
1589 kvm_inject_gp(ctxt->vcpu, 0); 1667 kvm_inject_gp(ctxt->vcpu, 0);
1590 return -1; 1668 return X86EMUL_PROPAGATE_FAULT;
1591 } 1669 }
1592 break; 1670 break;
1593 case X86EMUL_MODE_PROT64: 1671 case X86EMUL_MODE_PROT64:
1594 if (msr_data == 0x0) { 1672 if (msr_data == 0x0) {
1595 kvm_inject_gp(ctxt->vcpu, 0); 1673 kvm_inject_gp(ctxt->vcpu, 0);
1596 return -1; 1674 return X86EMUL_PROPAGATE_FAULT;
1597 } 1675 }
1598 break; 1676 break;
1599 } 1677 }
@@ -1618,7 +1696,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1618 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); 1696 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1619 c->regs[VCPU_REGS_RSP] = msr_data; 1697 c->regs[VCPU_REGS_RSP] = msr_data;
1620 1698
1621 return 0; 1699 return X86EMUL_CONTINUE;
1622} 1700}
1623 1701
1624static int 1702static int
@@ -1629,21 +1707,11 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1629 u64 msr_data; 1707 u64 msr_data;
1630 int usermode; 1708 int usermode;
1631 1709
1632 /* inject #UD if LOCK prefix is used */ 1710 /* inject #GP if in real mode or Virtual 8086 mode */
1633 if (c->lock_prefix) 1711 if (ctxt->mode == X86EMUL_MODE_REAL ||
1634 return -1; 1712 ctxt->mode == X86EMUL_MODE_VM86) {
1635
1636 /* inject #GP if in real mode or paging is disabled */
1637 if (ctxt->mode == X86EMUL_MODE_REAL
1638 || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
1639 kvm_inject_gp(ctxt->vcpu, 0);
1640 return -1;
1641 }
1642
1643 /* sysexit must be called from CPL 0 */
1644 if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) {
1645 kvm_inject_gp(ctxt->vcpu, 0); 1713 kvm_inject_gp(ctxt->vcpu, 0);
1646 return -1; 1714 return X86EMUL_UNHANDLEABLE;
1647 } 1715 }
1648 1716
1649 setup_syscalls_segments(ctxt, &cs, &ss); 1717 setup_syscalls_segments(ctxt, &cs, &ss);
@@ -1661,7 +1729,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1661 cs.selector = (u16)(msr_data + 16); 1729 cs.selector = (u16)(msr_data + 16);
1662 if ((msr_data & 0xfffc) == 0x0) { 1730 if ((msr_data & 0xfffc) == 0x0) {
1663 kvm_inject_gp(ctxt->vcpu, 0); 1731 kvm_inject_gp(ctxt->vcpu, 0);
1664 return -1; 1732 return X86EMUL_PROPAGATE_FAULT;
1665 } 1733 }
1666 ss.selector = (u16)(msr_data + 24); 1734 ss.selector = (u16)(msr_data + 24);
1667 break; 1735 break;
@@ -1669,7 +1737,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1669 cs.selector = (u16)(msr_data + 32); 1737 cs.selector = (u16)(msr_data + 32);
1670 if (msr_data == 0x0) { 1738 if (msr_data == 0x0) {
1671 kvm_inject_gp(ctxt->vcpu, 0); 1739 kvm_inject_gp(ctxt->vcpu, 0);
1672 return -1; 1740 return X86EMUL_PROPAGATE_FAULT;
1673 } 1741 }
1674 ss.selector = cs.selector + 8; 1742 ss.selector = cs.selector + 8;
1675 cs.db = 0; 1743 cs.db = 0;
@@ -1685,7 +1753,58 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1685 c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX]; 1753 c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
1686 c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX]; 1754 c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
1687 1755
1688 return 0; 1756 return X86EMUL_CONTINUE;
1757}
1758
1759static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
1760{
1761 int iopl;
1762 if (ctxt->mode == X86EMUL_MODE_REAL)
1763 return false;
1764 if (ctxt->mode == X86EMUL_MODE_VM86)
1765 return true;
1766 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1767 return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl;
1768}
1769
1770static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
1771 struct x86_emulate_ops *ops,
1772 u16 port, u16 len)
1773{
1774 struct kvm_segment tr_seg;
1775 int r;
1776 u16 io_bitmap_ptr;
1777 u8 perm, bit_idx = port & 0x7;
1778 unsigned mask = (1 << len) - 1;
1779
1780 kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
1781 if (tr_seg.unusable)
1782 return false;
1783 if (tr_seg.limit < 103)
1784 return false;
1785 r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
1786 NULL);
1787 if (r != X86EMUL_CONTINUE)
1788 return false;
1789 if (io_bitmap_ptr + port/8 > tr_seg.limit)
1790 return false;
1791 r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
1792 ctxt->vcpu, NULL);
1793 if (r != X86EMUL_CONTINUE)
1794 return false;
1795 if ((perm >> bit_idx) & mask)
1796 return false;
1797 return true;
1798}
1799
1800static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
1801 struct x86_emulate_ops *ops,
1802 u16 port, u16 len)
1803{
1804 if (emulator_bad_iopl(ctxt))
1805 if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
1806 return false;
1807 return true;
1689} 1808}
1690 1809
1691int 1810int
@@ -1709,6 +1828,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1709 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); 1828 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
1710 saved_eip = c->eip; 1829 saved_eip = c->eip;
1711 1830
1831 /* LOCK prefix is allowed only with some instructions */
1832 if (c->lock_prefix && !(c->d & Lock)) {
1833 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1834 goto done;
1835 }
1836
1837 /* Privileged instruction can be executed only in CPL=0 */
1838 if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) {
1839 kvm_inject_gp(ctxt->vcpu, 0);
1840 goto done;
1841 }
1842
1712 if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) 1843 if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs))
1713 memop = c->modrm_ea; 1844 memop = c->modrm_ea;
1714 1845
@@ -1749,7 +1880,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1749 &c->src.val, 1880 &c->src.val,
1750 c->src.bytes, 1881 c->src.bytes,
1751 ctxt->vcpu); 1882 ctxt->vcpu);
1752 if (rc != 0) 1883 if (rc != X86EMUL_CONTINUE)
1753 goto done; 1884 goto done;
1754 c->src.orig_val = c->src.val; 1885 c->src.orig_val = c->src.val;
1755 } 1886 }
@@ -1768,12 +1899,15 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1768 c->dst.ptr = (void *)c->dst.ptr + 1899 c->dst.ptr = (void *)c->dst.ptr +
1769 (c->src.val & mask) / 8; 1900 (c->src.val & mask) / 8;
1770 } 1901 }
1771 if (!(c->d & Mov) && 1902 if (!(c->d & Mov)) {
1772 /* optimisation - avoid slow emulated read */ 1903 /* optimisation - avoid slow emulated read */
1773 ((rc = ops->read_emulated((unsigned long)c->dst.ptr, 1904 rc = ops->read_emulated((unsigned long)c->dst.ptr,
1774 &c->dst.val, 1905 &c->dst.val,
1775 c->dst.bytes, ctxt->vcpu)) != 0)) 1906 c->dst.bytes,
1776 goto done; 1907 ctxt->vcpu);
1908 if (rc != X86EMUL_CONTINUE)
1909 goto done;
1910 }
1777 } 1911 }
1778 c->dst.orig_val = c->dst.val; 1912 c->dst.orig_val = c->dst.val;
1779 1913
@@ -1876,7 +2010,12 @@ special_insn:
1876 break; 2010 break;
1877 case 0x6c: /* insb */ 2011 case 0x6c: /* insb */
1878 case 0x6d: /* insw/insd */ 2012 case 0x6d: /* insw/insd */
1879 if (kvm_emulate_pio_string(ctxt->vcpu, 2013 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2014 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2015 kvm_inject_gp(ctxt->vcpu, 0);
2016 goto done;
2017 }
2018 if (kvm_emulate_pio_string(ctxt->vcpu,
1880 1, 2019 1,
1881 (c->d & ByteOp) ? 1 : c->op_bytes, 2020 (c->d & ByteOp) ? 1 : c->op_bytes,
1882 c->rep_prefix ? 2021 c->rep_prefix ?
@@ -1892,6 +2031,11 @@ special_insn:
1892 return 0; 2031 return 0;
1893 case 0x6e: /* outsb */ 2032 case 0x6e: /* outsb */
1894 case 0x6f: /* outsw/outsd */ 2033 case 0x6f: /* outsw/outsd */
2034 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2035 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2036 kvm_inject_gp(ctxt->vcpu, 0);
2037 goto done;
2038 }
1895 if (kvm_emulate_pio_string(ctxt->vcpu, 2039 if (kvm_emulate_pio_string(ctxt->vcpu,
1896 0, 2040 0,
1897 (c->d & ByteOp) ? 1 : c->op_bytes, 2041 (c->d & ByteOp) ? 1 : c->op_bytes,
@@ -1978,25 +2122,19 @@ special_insn:
1978 break; 2122 break;
1979 case 0x8e: { /* mov seg, r/m16 */ 2123 case 0x8e: { /* mov seg, r/m16 */
1980 uint16_t sel; 2124 uint16_t sel;
1981 int type_bits;
1982 int err;
1983 2125
1984 sel = c->src.val; 2126 sel = c->src.val;
1985 if (c->modrm_reg == VCPU_SREG_SS)
1986 toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
1987 2127
1988 if (c->modrm_reg <= 5) { 2128 if (c->modrm_reg == VCPU_SREG_CS ||
1989 type_bits = (c->modrm_reg == 1) ? 9 : 1; 2129 c->modrm_reg > VCPU_SREG_GS) {
1990 err = kvm_load_segment_descriptor(ctxt->vcpu, sel, 2130 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1991 type_bits, c->modrm_reg); 2131 goto done;
1992 } else {
1993 printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n",
1994 c->modrm);
1995 goto cannot_emulate;
1996 } 2132 }
1997 2133
1998 if (err < 0) 2134 if (c->modrm_reg == VCPU_SREG_SS)
1999 goto cannot_emulate; 2135 toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
2136
2137 rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg);
2000 2138
2001 c->dst.type = OP_NONE; /* Disable writeback. */ 2139 c->dst.type = OP_NONE; /* Disable writeback. */
2002 break; 2140 break;
@@ -2025,7 +2163,10 @@ special_insn:
2025 c->dst.type = OP_REG; 2163 c->dst.type = OP_REG;
2026 c->dst.ptr = (unsigned long *) &ctxt->eflags; 2164 c->dst.ptr = (unsigned long *) &ctxt->eflags;
2027 c->dst.bytes = c->op_bytes; 2165 c->dst.bytes = c->op_bytes;
2028 goto pop_instruction; 2166 rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
2167 if (rc != X86EMUL_CONTINUE)
2168 goto done;
2169 break;
2029 case 0xa0 ... 0xa1: /* mov */ 2170 case 0xa0 ... 0xa1: /* mov */
2030 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; 2171 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2031 c->dst.val = c->src.val; 2172 c->dst.val = c->src.val;
@@ -2039,11 +2180,12 @@ special_insn:
2039 c->dst.ptr = (unsigned long *)register_address(c, 2180 c->dst.ptr = (unsigned long *)register_address(c,
2040 es_base(ctxt), 2181 es_base(ctxt),
2041 c->regs[VCPU_REGS_RDI]); 2182 c->regs[VCPU_REGS_RDI]);
2042 if ((rc = ops->read_emulated(register_address(c, 2183 rc = ops->read_emulated(register_address(c,
2043 seg_override_base(ctxt, c), 2184 seg_override_base(ctxt, c),
2044 c->regs[VCPU_REGS_RSI]), 2185 c->regs[VCPU_REGS_RSI]),
2045 &c->dst.val, 2186 &c->dst.val,
2046 c->dst.bytes, ctxt->vcpu)) != 0) 2187 c->dst.bytes, ctxt->vcpu);
2188 if (rc != X86EMUL_CONTINUE)
2047 goto done; 2189 goto done;
2048 register_address_increment(c, &c->regs[VCPU_REGS_RSI], 2190 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2049 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 2191 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@@ -2058,10 +2200,11 @@ special_insn:
2058 c->src.ptr = (unsigned long *)register_address(c, 2200 c->src.ptr = (unsigned long *)register_address(c,
2059 seg_override_base(ctxt, c), 2201 seg_override_base(ctxt, c),
2060 c->regs[VCPU_REGS_RSI]); 2202 c->regs[VCPU_REGS_RSI]);
2061 if ((rc = ops->read_emulated((unsigned long)c->src.ptr, 2203 rc = ops->read_emulated((unsigned long)c->src.ptr,
2062 &c->src.val, 2204 &c->src.val,
2063 c->src.bytes, 2205 c->src.bytes,
2064 ctxt->vcpu)) != 0) 2206 ctxt->vcpu);
2207 if (rc != X86EMUL_CONTINUE)
2065 goto done; 2208 goto done;
2066 2209
2067 c->dst.type = OP_NONE; /* Disable writeback. */ 2210 c->dst.type = OP_NONE; /* Disable writeback. */
@@ -2069,10 +2212,11 @@ special_insn:
2069 c->dst.ptr = (unsigned long *)register_address(c, 2212 c->dst.ptr = (unsigned long *)register_address(c,
2070 es_base(ctxt), 2213 es_base(ctxt),
2071 c->regs[VCPU_REGS_RDI]); 2214 c->regs[VCPU_REGS_RDI]);
2072 if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, 2215 rc = ops->read_emulated((unsigned long)c->dst.ptr,
2073 &c->dst.val, 2216 &c->dst.val,
2074 c->dst.bytes, 2217 c->dst.bytes,
2075 ctxt->vcpu)) != 0) 2218 ctxt->vcpu);
2219 if (rc != X86EMUL_CONTINUE)
2076 goto done; 2220 goto done;
2077 2221
2078 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); 2222 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
@@ -2102,12 +2246,13 @@ special_insn:
2102 c->dst.type = OP_REG; 2246 c->dst.type = OP_REG;
2103 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; 2247 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2104 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; 2248 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2105 if ((rc = ops->read_emulated(register_address(c, 2249 rc = ops->read_emulated(register_address(c,
2106 seg_override_base(ctxt, c), 2250 seg_override_base(ctxt, c),
2107 c->regs[VCPU_REGS_RSI]), 2251 c->regs[VCPU_REGS_RSI]),
2108 &c->dst.val, 2252 &c->dst.val,
2109 c->dst.bytes, 2253 c->dst.bytes,
2110 ctxt->vcpu)) != 0) 2254 ctxt->vcpu);
2255 if (rc != X86EMUL_CONTINUE)
2111 goto done; 2256 goto done;
2112 register_address_increment(c, &c->regs[VCPU_REGS_RSI], 2257 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2113 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes 2258 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@@ -2163,11 +2308,9 @@ special_insn:
2163 case 0xe9: /* jmp rel */ 2308 case 0xe9: /* jmp rel */
2164 goto jmp; 2309 goto jmp;
2165 case 0xea: /* jmp far */ 2310 case 0xea: /* jmp far */
2166 if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9, 2311 if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val,
2167 VCPU_SREG_CS) < 0) { 2312 VCPU_SREG_CS))
2168 DPRINTF("jmp far: Failed to load CS descriptor\n"); 2313 goto done;
2169 goto cannot_emulate;
2170 }
2171 2314
2172 c->eip = c->src.val; 2315 c->eip = c->src.val;
2173 break; 2316 break;
@@ -2185,7 +2328,13 @@ special_insn:
2185 case 0xef: /* out (e/r)ax,dx */ 2328 case 0xef: /* out (e/r)ax,dx */
2186 port = c->regs[VCPU_REGS_RDX]; 2329 port = c->regs[VCPU_REGS_RDX];
2187 io_dir_in = 0; 2330 io_dir_in = 0;
2188 do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, 2331 do_io:
2332 if (!emulator_io_permited(ctxt, ops, port,
2333 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2334 kvm_inject_gp(ctxt->vcpu, 0);
2335 goto done;
2336 }
2337 if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
2189 (c->d & ByteOp) ? 1 : c->op_bytes, 2338 (c->d & ByteOp) ? 1 : c->op_bytes,
2190 port) != 0) { 2339 port) != 0) {
2191 c->eip = saved_eip; 2340 c->eip = saved_eip;
@@ -2210,13 +2359,21 @@ special_insn:
2210 c->dst.type = OP_NONE; /* Disable writeback. */ 2359 c->dst.type = OP_NONE; /* Disable writeback. */
2211 break; 2360 break;
2212 case 0xfa: /* cli */ 2361 case 0xfa: /* cli */
2213 ctxt->eflags &= ~X86_EFLAGS_IF; 2362 if (emulator_bad_iopl(ctxt))
2214 c->dst.type = OP_NONE; /* Disable writeback. */ 2363 kvm_inject_gp(ctxt->vcpu, 0);
2364 else {
2365 ctxt->eflags &= ~X86_EFLAGS_IF;
2366 c->dst.type = OP_NONE; /* Disable writeback. */
2367 }
2215 break; 2368 break;
2216 case 0xfb: /* sti */ 2369 case 0xfb: /* sti */
2217 toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); 2370 if (emulator_bad_iopl(ctxt))
2218 ctxt->eflags |= X86_EFLAGS_IF; 2371 kvm_inject_gp(ctxt->vcpu, 0);
2219 c->dst.type = OP_NONE; /* Disable writeback. */ 2372 else {
2373 toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
2374 ctxt->eflags |= X86_EFLAGS_IF;
2375 c->dst.type = OP_NONE; /* Disable writeback. */
2376 }
2220 break; 2377 break;
2221 case 0xfc: /* cld */ 2378 case 0xfc: /* cld */
2222 ctxt->eflags &= ~EFLG_DF; 2379 ctxt->eflags &= ~EFLG_DF;
@@ -2319,8 +2476,9 @@ twobyte_insn:
2319 } 2476 }
2320 break; 2477 break;
2321 case 0x05: /* syscall */ 2478 case 0x05: /* syscall */
2322 if (emulate_syscall(ctxt) == -1) 2479 rc = emulate_syscall(ctxt);
2323 goto cannot_emulate; 2480 if (rc != X86EMUL_CONTINUE)
2481 goto done;
2324 else 2482 else
2325 goto writeback; 2483 goto writeback;
2326 break; 2484 break;
@@ -2391,14 +2549,16 @@ twobyte_insn:
2391 c->dst.type = OP_NONE; 2549 c->dst.type = OP_NONE;
2392 break; 2550 break;
2393 case 0x34: /* sysenter */ 2551 case 0x34: /* sysenter */
2394 if (emulate_sysenter(ctxt) == -1) 2552 rc = emulate_sysenter(ctxt);
2395 goto cannot_emulate; 2553 if (rc != X86EMUL_CONTINUE)
2554 goto done;
2396 else 2555 else
2397 goto writeback; 2556 goto writeback;
2398 break; 2557 break;
2399 case 0x35: /* sysexit */ 2558 case 0x35: /* sysexit */
2400 if (emulate_sysexit(ctxt) == -1) 2559 rc = emulate_sysexit(ctxt);
2401 goto cannot_emulate; 2560 if (rc != X86EMUL_CONTINUE)
2561 goto done;
2402 else 2562 else
2403 goto writeback; 2563 goto writeback;
2404 break; 2564 break;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 15578f180e59..294698b6daff 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -242,11 +242,11 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
242{ 242{
243 struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, 243 struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
244 irq_ack_notifier); 244 irq_ack_notifier);
245 spin_lock(&ps->inject_lock); 245 raw_spin_lock(&ps->inject_lock);
246 if (atomic_dec_return(&ps->pit_timer.pending) < 0) 246 if (atomic_dec_return(&ps->pit_timer.pending) < 0)
247 atomic_inc(&ps->pit_timer.pending); 247 atomic_inc(&ps->pit_timer.pending);
248 ps->irq_ack = 1; 248 ps->irq_ack = 1;
249 spin_unlock(&ps->inject_lock); 249 raw_spin_unlock(&ps->inject_lock);
250} 250}
251 251
252void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) 252void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -605,7 +605,7 @@ static const struct kvm_io_device_ops speaker_dev_ops = {
605 .write = speaker_ioport_write, 605 .write = speaker_ioport_write,
606}; 606};
607 607
608/* Caller must have writers lock on slots_lock */ 608/* Caller must hold slots_lock */
609struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) 609struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
610{ 610{
611 struct kvm_pit *pit; 611 struct kvm_pit *pit;
@@ -624,7 +624,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
624 624
625 mutex_init(&pit->pit_state.lock); 625 mutex_init(&pit->pit_state.lock);
626 mutex_lock(&pit->pit_state.lock); 626 mutex_lock(&pit->pit_state.lock);
627 spin_lock_init(&pit->pit_state.inject_lock); 627 raw_spin_lock_init(&pit->pit_state.inject_lock);
628 628
629 kvm->arch.vpit = pit; 629 kvm->arch.vpit = pit;
630 pit->kvm = kvm; 630 pit->kvm = kvm;
@@ -645,13 +645,13 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
645 kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); 645 kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
646 646
647 kvm_iodevice_init(&pit->dev, &pit_dev_ops); 647 kvm_iodevice_init(&pit->dev, &pit_dev_ops);
648 ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev); 648 ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
649 if (ret < 0) 649 if (ret < 0)
650 goto fail; 650 goto fail;
651 651
652 if (flags & KVM_PIT_SPEAKER_DUMMY) { 652 if (flags & KVM_PIT_SPEAKER_DUMMY) {
653 kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); 653 kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
654 ret = __kvm_io_bus_register_dev(&kvm->pio_bus, 654 ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
655 &pit->speaker_dev); 655 &pit->speaker_dev);
656 if (ret < 0) 656 if (ret < 0)
657 goto fail_unregister; 657 goto fail_unregister;
@@ -660,11 +660,12 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
660 return pit; 660 return pit;
661 661
662fail_unregister: 662fail_unregister:
663 __kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev); 663 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
664 664
665fail: 665fail:
666 if (pit->irq_source_id >= 0) 666 kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
667 kvm_free_irq_source_id(kvm, pit->irq_source_id); 667 kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
668 kvm_free_irq_source_id(kvm, pit->irq_source_id);
668 669
669 kfree(pit); 670 kfree(pit);
670 return NULL; 671 return NULL;
@@ -723,12 +724,12 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
723 /* Try to inject pending interrupts when 724 /* Try to inject pending interrupts when
724 * last one has been acked. 725 * last one has been acked.
725 */ 726 */
726 spin_lock(&ps->inject_lock); 727 raw_spin_lock(&ps->inject_lock);
727 if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { 728 if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
728 ps->irq_ack = 0; 729 ps->irq_ack = 0;
729 inject = 1; 730 inject = 1;
730 } 731 }
731 spin_unlock(&ps->inject_lock); 732 raw_spin_unlock(&ps->inject_lock);
732 if (inject) 733 if (inject)
733 __inject_pit_timer_intr(kvm); 734 __inject_pit_timer_intr(kvm);
734 } 735 }
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index d4c1c7ffdc09..900d6b0ba7c2 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -27,7 +27,7 @@ struct kvm_kpit_state {
27 u32 speaker_data_on; 27 u32 speaker_data_on;
28 struct mutex lock; 28 struct mutex lock;
29 struct kvm_pit *pit; 29 struct kvm_pit *pit;
30 spinlock_t inject_lock; 30 raw_spinlock_t inject_lock;
31 unsigned long irq_ack; 31 unsigned long irq_ack;
32 struct kvm_irq_ack_notifier irq_ack_notifier; 32 struct kvm_irq_ack_notifier irq_ack_notifier;
33}; 33};
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index d057c0cbd245..07771da85de5 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -44,18 +44,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
44 * Other interrupt may be delivered to PIC while lock is dropped but 44 * Other interrupt may be delivered to PIC while lock is dropped but
45 * it should be safe since PIC state is already updated at this stage. 45 * it should be safe since PIC state is already updated at this stage.
46 */ 46 */
47 spin_unlock(&s->pics_state->lock); 47 raw_spin_unlock(&s->pics_state->lock);
48 kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); 48 kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
49 spin_lock(&s->pics_state->lock); 49 raw_spin_lock(&s->pics_state->lock);
50} 50}
51 51
52void kvm_pic_clear_isr_ack(struct kvm *kvm) 52void kvm_pic_clear_isr_ack(struct kvm *kvm)
53{ 53{
54 struct kvm_pic *s = pic_irqchip(kvm); 54 struct kvm_pic *s = pic_irqchip(kvm);
55 spin_lock(&s->lock); 55
56 raw_spin_lock(&s->lock);
56 s->pics[0].isr_ack = 0xff; 57 s->pics[0].isr_ack = 0xff;
57 s->pics[1].isr_ack = 0xff; 58 s->pics[1].isr_ack = 0xff;
58 spin_unlock(&s->lock); 59 raw_spin_unlock(&s->lock);
59} 60}
60 61
61/* 62/*
@@ -156,9 +157,9 @@ static void pic_update_irq(struct kvm_pic *s)
156 157
157void kvm_pic_update_irq(struct kvm_pic *s) 158void kvm_pic_update_irq(struct kvm_pic *s)
158{ 159{
159 spin_lock(&s->lock); 160 raw_spin_lock(&s->lock);
160 pic_update_irq(s); 161 pic_update_irq(s);
161 spin_unlock(&s->lock); 162 raw_spin_unlock(&s->lock);
162} 163}
163 164
164int kvm_pic_set_irq(void *opaque, int irq, int level) 165int kvm_pic_set_irq(void *opaque, int irq, int level)
@@ -166,14 +167,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
166 struct kvm_pic *s = opaque; 167 struct kvm_pic *s = opaque;
167 int ret = -1; 168 int ret = -1;
168 169
169 spin_lock(&s->lock); 170 raw_spin_lock(&s->lock);
170 if (irq >= 0 && irq < PIC_NUM_PINS) { 171 if (irq >= 0 && irq < PIC_NUM_PINS) {
171 ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); 172 ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
172 pic_update_irq(s); 173 pic_update_irq(s);
173 trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, 174 trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
174 s->pics[irq >> 3].imr, ret == 0); 175 s->pics[irq >> 3].imr, ret == 0);
175 } 176 }
176 spin_unlock(&s->lock); 177 raw_spin_unlock(&s->lock);
177 178
178 return ret; 179 return ret;
179} 180}
@@ -203,7 +204,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
203 int irq, irq2, intno; 204 int irq, irq2, intno;
204 struct kvm_pic *s = pic_irqchip(kvm); 205 struct kvm_pic *s = pic_irqchip(kvm);
205 206
206 spin_lock(&s->lock); 207 raw_spin_lock(&s->lock);
207 irq = pic_get_irq(&s->pics[0]); 208 irq = pic_get_irq(&s->pics[0]);
208 if (irq >= 0) { 209 if (irq >= 0) {
209 pic_intack(&s->pics[0], irq); 210 pic_intack(&s->pics[0], irq);
@@ -228,7 +229,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
228 intno = s->pics[0].irq_base + irq; 229 intno = s->pics[0].irq_base + irq;
229 } 230 }
230 pic_update_irq(s); 231 pic_update_irq(s);
231 spin_unlock(&s->lock); 232 raw_spin_unlock(&s->lock);
232 233
233 return intno; 234 return intno;
234} 235}
@@ -442,7 +443,7 @@ static int picdev_write(struct kvm_io_device *this,
442 printk(KERN_ERR "PIC: non byte write\n"); 443 printk(KERN_ERR "PIC: non byte write\n");
443 return 0; 444 return 0;
444 } 445 }
445 spin_lock(&s->lock); 446 raw_spin_lock(&s->lock);
446 switch (addr) { 447 switch (addr) {
447 case 0x20: 448 case 0x20:
448 case 0x21: 449 case 0x21:
@@ -455,7 +456,7 @@ static int picdev_write(struct kvm_io_device *this,
455 elcr_ioport_write(&s->pics[addr & 1], addr, data); 456 elcr_ioport_write(&s->pics[addr & 1], addr, data);
456 break; 457 break;
457 } 458 }
458 spin_unlock(&s->lock); 459 raw_spin_unlock(&s->lock);
459 return 0; 460 return 0;
460} 461}
461 462
@@ -472,7 +473,7 @@ static int picdev_read(struct kvm_io_device *this,
472 printk(KERN_ERR "PIC: non byte read\n"); 473 printk(KERN_ERR "PIC: non byte read\n");
473 return 0; 474 return 0;
474 } 475 }
475 spin_lock(&s->lock); 476 raw_spin_lock(&s->lock);
476 switch (addr) { 477 switch (addr) {
477 case 0x20: 478 case 0x20:
478 case 0x21: 479 case 0x21:
@@ -486,7 +487,7 @@ static int picdev_read(struct kvm_io_device *this,
486 break; 487 break;
487 } 488 }
488 *(unsigned char *)val = data; 489 *(unsigned char *)val = data;
489 spin_unlock(&s->lock); 490 raw_spin_unlock(&s->lock);
490 return 0; 491 return 0;
491} 492}
492 493
@@ -520,7 +521,7 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
520 s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); 521 s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
521 if (!s) 522 if (!s)
522 return NULL; 523 return NULL;
523 spin_lock_init(&s->lock); 524 raw_spin_lock_init(&s->lock);
524 s->kvm = kvm; 525 s->kvm = kvm;
525 s->pics[0].elcr_mask = 0xf8; 526 s->pics[0].elcr_mask = 0xf8;
526 s->pics[1].elcr_mask = 0xde; 527 s->pics[1].elcr_mask = 0xde;
@@ -533,7 +534,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
533 * Initialize PIO device 534 * Initialize PIO device
534 */ 535 */
535 kvm_iodevice_init(&s->dev, &picdev_ops); 536 kvm_iodevice_init(&s->dev, &picdev_ops);
536 ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev); 537 mutex_lock(&kvm->slots_lock);
538 ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
539 mutex_unlock(&kvm->slots_lock);
537 if (ret < 0) { 540 if (ret < 0) {
538 kfree(s); 541 kfree(s);
539 return NULL; 542 return NULL;
@@ -541,3 +544,14 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
541 544
542 return s; 545 return s;
543} 546}
547
548void kvm_destroy_pic(struct kvm *kvm)
549{
550 struct kvm_pic *vpic = kvm->arch.vpic;
551
552 if (vpic) {
553 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev);
554 kvm->arch.vpic = NULL;
555 kfree(vpic);
556 }
557}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index be399e207d57..34b15915754d 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -62,7 +62,7 @@ struct kvm_kpic_state {
62}; 62};
63 63
64struct kvm_pic { 64struct kvm_pic {
65 spinlock_t lock; 65 raw_spinlock_t lock;
66 unsigned pending_acks; 66 unsigned pending_acks;
67 struct kvm *kvm; 67 struct kvm *kvm;
68 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ 68 struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
@@ -75,6 +75,7 @@ struct kvm_pic {
75}; 75};
76 76
77struct kvm_pic *kvm_create_pic(struct kvm *kvm); 77struct kvm_pic *kvm_create_pic(struct kvm *kvm);
78void kvm_destroy_pic(struct kvm *kvm);
78int kvm_pic_read_irq(struct kvm *kvm); 79int kvm_pic_read_irq(struct kvm *kvm);
79void kvm_pic_update_irq(struct kvm_pic *s); 80void kvm_pic_update_irq(struct kvm_pic *s);
80void kvm_pic_clear_isr_ack(struct kvm *kvm); 81void kvm_pic_clear_isr_ack(struct kvm *kvm);
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 7bcc5b6a4403..cff851cf5322 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -1,6 +1,11 @@
1#ifndef ASM_KVM_CACHE_REGS_H 1#ifndef ASM_KVM_CACHE_REGS_H
2#define ASM_KVM_CACHE_REGS_H 2#define ASM_KVM_CACHE_REGS_H
3 3
4#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
5#define KVM_POSSIBLE_CR4_GUEST_BITS \
6 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
7 | X86_CR4_OSXMMEXCPT | X86_CR4_PGE)
8
4static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, 9static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
5 enum kvm_reg reg) 10 enum kvm_reg reg)
6{ 11{
@@ -38,4 +43,30 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
38 return vcpu->arch.pdptrs[index]; 43 return vcpu->arch.pdptrs[index];
39} 44}
40 45
46static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
47{
48 ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
49 if (tmask & vcpu->arch.cr0_guest_owned_bits)
50 kvm_x86_ops->decache_cr0_guest_bits(vcpu);
51 return vcpu->arch.cr0 & mask;
52}
53
54static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu)
55{
56 return kvm_read_cr0_bits(vcpu, ~0UL);
57}
58
59static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
60{
61 ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS;
62 if (tmask & vcpu->arch.cr4_guest_owned_bits)
63 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
64 return vcpu->arch.cr4 & mask;
65}
66
67static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
68{
69 return kvm_read_cr4_bits(vcpu, ~0UL);
70}
71
41#endif 72#endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ba8c045da782..4b224f90087b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1246,3 +1246,34 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
1246 1246
1247 return 0; 1247 return 0;
1248} 1248}
1249
1250int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
1251{
1252 struct kvm_lapic *apic = vcpu->arch.apic;
1253
1254 if (!irqchip_in_kernel(vcpu->kvm))
1255 return 1;
1256
1257 /* if this is ICR write vector before command */
1258 if (reg == APIC_ICR)
1259 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
1260 return apic_reg_write(apic, reg, (u32)data);
1261}
1262
1263int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
1264{
1265 struct kvm_lapic *apic = vcpu->arch.apic;
1266 u32 low, high = 0;
1267
1268 if (!irqchip_in_kernel(vcpu->kvm))
1269 return 1;
1270
1271 if (apic_reg_read(apic, reg, 4, &low))
1272 return 1;
1273 if (reg == APIC_ICR)
1274 apic_reg_read(apic, APIC_ICR2, 4, &high);
1275
1276 *data = (((u64)high) << 32) | low;
1277
1278 return 0;
1279}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 40010b09c4aa..f5fe32c5edad 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -48,4 +48,12 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
48 48
49int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); 49int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
50int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); 50int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
51
52int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
53int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
54
55static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
56{
57 return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
58}
51#endif 59#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 89a49fb46a27..741373e8ca77 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include "mmu.h" 20#include "mmu.h"
21#include "x86.h"
21#include "kvm_cache_regs.h" 22#include "kvm_cache_regs.h"
22 23
23#include <linux/kvm_host.h> 24#include <linux/kvm_host.h>
@@ -29,6 +30,7 @@
29#include <linux/swap.h> 30#include <linux/swap.h>
30#include <linux/hugetlb.h> 31#include <linux/hugetlb.h>
31#include <linux/compiler.h> 32#include <linux/compiler.h>
33#include <linux/srcu.h>
32 34
33#include <asm/page.h> 35#include <asm/page.h>
34#include <asm/cmpxchg.h> 36#include <asm/cmpxchg.h>
@@ -136,16 +138,6 @@ module_param(oos_shadow, bool, 0644);
136#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ 138#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
137 | PT64_NX_MASK) 139 | PT64_NX_MASK)
138 140
139#define PFERR_PRESENT_MASK (1U << 0)
140#define PFERR_WRITE_MASK (1U << 1)
141#define PFERR_USER_MASK (1U << 2)
142#define PFERR_RSVD_MASK (1U << 3)
143#define PFERR_FETCH_MASK (1U << 4)
144
145#define PT_PDPE_LEVEL 3
146#define PT_DIRECTORY_LEVEL 2
147#define PT_PAGE_TABLE_LEVEL 1
148
149#define RMAP_EXT 4 141#define RMAP_EXT 4
150 142
151#define ACC_EXEC_MASK 1 143#define ACC_EXEC_MASK 1
@@ -153,6 +145,9 @@ module_param(oos_shadow, bool, 0644);
153#define ACC_USER_MASK PT_USER_MASK 145#define ACC_USER_MASK PT_USER_MASK
154#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) 146#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
155 147
148#include <trace/events/kvm.h>
149
150#undef TRACE_INCLUDE_FILE
156#define CREATE_TRACE_POINTS 151#define CREATE_TRACE_POINTS
157#include "mmutrace.h" 152#include "mmutrace.h"
158 153
@@ -229,7 +224,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
229 224
230static int is_write_protection(struct kvm_vcpu *vcpu) 225static int is_write_protection(struct kvm_vcpu *vcpu)
231{ 226{
232 return vcpu->arch.cr0 & X86_CR0_WP; 227 return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
233} 228}
234 229
235static int is_cpuid_PSE36(void) 230static int is_cpuid_PSE36(void)
@@ -239,7 +234,7 @@ static int is_cpuid_PSE36(void)
239 234
240static int is_nx(struct kvm_vcpu *vcpu) 235static int is_nx(struct kvm_vcpu *vcpu)
241{ 236{
242 return vcpu->arch.shadow_efer & EFER_NX; 237 return vcpu->arch.efer & EFER_NX;
243} 238}
244 239
245static int is_shadow_present_pte(u64 pte) 240static int is_shadow_present_pte(u64 pte)
@@ -253,7 +248,7 @@ static int is_large_pte(u64 pte)
253 return pte & PT_PAGE_SIZE_MASK; 248 return pte & PT_PAGE_SIZE_MASK;
254} 249}
255 250
256static int is_writeble_pte(unsigned long pte) 251static int is_writable_pte(unsigned long pte)
257{ 252{
258 return pte & PT_WRITABLE_MASK; 253 return pte & PT_WRITABLE_MASK;
259} 254}
@@ -470,24 +465,10 @@ static int has_wrprotected_page(struct kvm *kvm,
470 465
471static int host_mapping_level(struct kvm *kvm, gfn_t gfn) 466static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
472{ 467{
473 unsigned long page_size = PAGE_SIZE; 468 unsigned long page_size;
474 struct vm_area_struct *vma;
475 unsigned long addr;
476 int i, ret = 0; 469 int i, ret = 0;
477 470
478 addr = gfn_to_hva(kvm, gfn); 471 page_size = kvm_host_page_size(kvm, gfn);
479 if (kvm_is_error_hva(addr))
480 return PT_PAGE_TABLE_LEVEL;
481
482 down_read(&current->mm->mmap_sem);
483 vma = find_vma(current->mm, addr);
484 if (!vma)
485 goto out;
486
487 page_size = vma_kernel_pagesize(vma);
488
489out:
490 up_read(&current->mm->mmap_sem);
491 472
492 for (i = PT_PAGE_TABLE_LEVEL; 473 for (i = PT_PAGE_TABLE_LEVEL;
493 i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { 474 i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) {
@@ -503,8 +484,7 @@ out:
503static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) 484static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
504{ 485{
505 struct kvm_memory_slot *slot; 486 struct kvm_memory_slot *slot;
506 int host_level; 487 int host_level, level, max_level;
507 int level = PT_PAGE_TABLE_LEVEL;
508 488
509 slot = gfn_to_memslot(vcpu->kvm, large_gfn); 489 slot = gfn_to_memslot(vcpu->kvm, large_gfn);
510 if (slot && slot->dirty_bitmap) 490 if (slot && slot->dirty_bitmap)
@@ -515,7 +495,10 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
515 if (host_level == PT_PAGE_TABLE_LEVEL) 495 if (host_level == PT_PAGE_TABLE_LEVEL)
516 return host_level; 496 return host_level;
517 497
518 for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) 498 max_level = kvm_x86_ops->get_lpage_level() < host_level ?
499 kvm_x86_ops->get_lpage_level() : host_level;
500
501 for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
519 if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) 502 if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
520 break; 503 break;
521 504
@@ -633,7 +616,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
633 pfn = spte_to_pfn(*spte); 616 pfn = spte_to_pfn(*spte);
634 if (*spte & shadow_accessed_mask) 617 if (*spte & shadow_accessed_mask)
635 kvm_set_pfn_accessed(pfn); 618 kvm_set_pfn_accessed(pfn);
636 if (is_writeble_pte(*spte)) 619 if (is_writable_pte(*spte))
637 kvm_set_pfn_dirty(pfn); 620 kvm_set_pfn_dirty(pfn);
638 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); 621 rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
639 if (!*rmapp) { 622 if (!*rmapp) {
@@ -662,6 +645,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
662 prev_desc = desc; 645 prev_desc = desc;
663 desc = desc->more; 646 desc = desc->more;
664 } 647 }
648 pr_err("rmap_remove: %p %llx many->many\n", spte, *spte);
665 BUG(); 649 BUG();
666 } 650 }
667} 651}
@@ -708,7 +692,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
708 BUG_ON(!spte); 692 BUG_ON(!spte);
709 BUG_ON(!(*spte & PT_PRESENT_MASK)); 693 BUG_ON(!(*spte & PT_PRESENT_MASK));
710 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); 694 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
711 if (is_writeble_pte(*spte)) { 695 if (is_writable_pte(*spte)) {
712 __set_spte(spte, *spte & ~PT_WRITABLE_MASK); 696 __set_spte(spte, *spte & ~PT_WRITABLE_MASK);
713 write_protected = 1; 697 write_protected = 1;
714 } 698 }
@@ -732,7 +716,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
732 BUG_ON(!(*spte & PT_PRESENT_MASK)); 716 BUG_ON(!(*spte & PT_PRESENT_MASK));
733 BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); 717 BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
734 pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); 718 pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
735 if (is_writeble_pte(*spte)) { 719 if (is_writable_pte(*spte)) {
736 rmap_remove(kvm, spte); 720 rmap_remove(kvm, spte);
737 --kvm->stat.lpages; 721 --kvm->stat.lpages;
738 __set_spte(spte, shadow_trap_nonpresent_pte); 722 __set_spte(spte, shadow_trap_nonpresent_pte);
@@ -787,7 +771,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
787 771
788 new_spte &= ~PT_WRITABLE_MASK; 772 new_spte &= ~PT_WRITABLE_MASK;
789 new_spte &= ~SPTE_HOST_WRITEABLE; 773 new_spte &= ~SPTE_HOST_WRITEABLE;
790 if (is_writeble_pte(*spte)) 774 if (is_writable_pte(*spte))
791 kvm_set_pfn_dirty(spte_to_pfn(*spte)); 775 kvm_set_pfn_dirty(spte_to_pfn(*spte));
792 __set_spte(spte, new_spte); 776 __set_spte(spte, new_spte);
793 spte = rmap_next(kvm, rmapp, spte); 777 spte = rmap_next(kvm, rmapp, spte);
@@ -805,35 +789,32 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
805 unsigned long data)) 789 unsigned long data))
806{ 790{
807 int i, j; 791 int i, j;
792 int ret;
808 int retval = 0; 793 int retval = 0;
794 struct kvm_memslots *slots;
809 795
810 /* 796 slots = rcu_dereference(kvm->memslots);
811 * If mmap_sem isn't taken, we can look the memslots with only 797
812 * the mmu_lock by skipping over the slots with userspace_addr == 0. 798 for (i = 0; i < slots->nmemslots; i++) {
813 */ 799 struct kvm_memory_slot *memslot = &slots->memslots[i];
814 for (i = 0; i < kvm->nmemslots; i++) {
815 struct kvm_memory_slot *memslot = &kvm->memslots[i];
816 unsigned long start = memslot->userspace_addr; 800 unsigned long start = memslot->userspace_addr;
817 unsigned long end; 801 unsigned long end;
818 802
819 /* mmu_lock protects userspace_addr */
820 if (!start)
821 continue;
822
823 end = start + (memslot->npages << PAGE_SHIFT); 803 end = start + (memslot->npages << PAGE_SHIFT);
824 if (hva >= start && hva < end) { 804 if (hva >= start && hva < end) {
825 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; 805 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
826 806
827 retval |= handler(kvm, &memslot->rmap[gfn_offset], 807 ret = handler(kvm, &memslot->rmap[gfn_offset], data);
828 data);
829 808
830 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { 809 for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
831 int idx = gfn_offset; 810 int idx = gfn_offset;
832 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); 811 idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
833 retval |= handler(kvm, 812 ret |= handler(kvm,
834 &memslot->lpage_info[j][idx].rmap_pde, 813 &memslot->lpage_info[j][idx].rmap_pde,
835 data); 814 data);
836 } 815 }
816 trace_kvm_age_page(hva, memslot, ret);
817 retval |= ret;
837 } 818 }
838 } 819 }
839 820
@@ -856,9 +837,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
856 u64 *spte; 837 u64 *spte;
857 int young = 0; 838 int young = 0;
858 839
859 /* always return old for EPT */ 840 /*
841 * Emulate the accessed bit for EPT, by checking if this page has
842 * an EPT mapping, and clearing it if it does. On the next access,
843 * a new EPT mapping will be established.
844 * This has some overhead, but not as much as the cost of swapping
845 * out actively used pages or breaking up actively used hugepages.
846 */
860 if (!shadow_accessed_mask) 847 if (!shadow_accessed_mask)
861 return 0; 848 return kvm_unmap_rmapp(kvm, rmapp, data);
862 849
863 spte = rmap_next(kvm, rmapp, NULL); 850 spte = rmap_next(kvm, rmapp, NULL);
864 while (spte) { 851 while (spte) {
@@ -1615,7 +1602,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
1615 1602
1616static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) 1603static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
1617{ 1604{
1618 int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); 1605 int slot = memslot_id(kvm, gfn);
1619 struct kvm_mmu_page *sp = page_header(__pa(pte)); 1606 struct kvm_mmu_page *sp = page_header(__pa(pte));
1620 1607
1621 __set_bit(slot, sp->slot_bitmap); 1608 __set_bit(slot, sp->slot_bitmap);
@@ -1639,7 +1626,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
1639{ 1626{
1640 struct page *page; 1627 struct page *page;
1641 1628
1642 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); 1629 gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
1643 1630
1644 if (gpa == UNMAPPED_GVA) 1631 if (gpa == UNMAPPED_GVA)
1645 return NULL; 1632 return NULL;
@@ -1852,7 +1839,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1852 * is responsibility of mmu_get_page / kvm_sync_page. 1839 * is responsibility of mmu_get_page / kvm_sync_page.
1853 * Same reasoning can be applied to dirty page accounting. 1840 * Same reasoning can be applied to dirty page accounting.
1854 */ 1841 */
1855 if (!can_unsync && is_writeble_pte(*sptep)) 1842 if (!can_unsync && is_writable_pte(*sptep))
1856 goto set_pte; 1843 goto set_pte;
1857 1844
1858 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { 1845 if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
@@ -1860,7 +1847,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1860 __func__, gfn); 1847 __func__, gfn);
1861 ret = 1; 1848 ret = 1;
1862 pte_access &= ~ACC_WRITE_MASK; 1849 pte_access &= ~ACC_WRITE_MASK;
1863 if (is_writeble_pte(spte)) 1850 if (is_writable_pte(spte))
1864 spte &= ~PT_WRITABLE_MASK; 1851 spte &= ~PT_WRITABLE_MASK;
1865 } 1852 }
1866 } 1853 }
@@ -1881,7 +1868,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1881 bool reset_host_protection) 1868 bool reset_host_protection)
1882{ 1869{
1883 int was_rmapped = 0; 1870 int was_rmapped = 0;
1884 int was_writeble = is_writeble_pte(*sptep); 1871 int was_writable = is_writable_pte(*sptep);
1885 int rmap_count; 1872 int rmap_count;
1886 1873
1887 pgprintk("%s: spte %llx access %x write_fault %d" 1874 pgprintk("%s: spte %llx access %x write_fault %d"
@@ -1932,7 +1919,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
1932 if (rmap_count > RMAP_RECYCLE_THRESHOLD) 1919 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
1933 rmap_recycle(vcpu, sptep, gfn); 1920 rmap_recycle(vcpu, sptep, gfn);
1934 } else { 1921 } else {
1935 if (was_writeble) 1922 if (was_writable)
1936 kvm_release_pfn_dirty(pfn); 1923 kvm_release_pfn_dirty(pfn);
1937 else 1924 else
1938 kvm_release_pfn_clean(pfn); 1925 kvm_release_pfn_clean(pfn);
@@ -2162,8 +2149,11 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
2162 spin_unlock(&vcpu->kvm->mmu_lock); 2149 spin_unlock(&vcpu->kvm->mmu_lock);
2163} 2150}
2164 2151
2165static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) 2152static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
2153 u32 access, u32 *error)
2166{ 2154{
2155 if (error)
2156 *error = 0;
2167 return vaddr; 2157 return vaddr;
2168} 2158}
2169 2159
@@ -2747,7 +2737,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
2747 if (tdp_enabled) 2737 if (tdp_enabled)
2748 return 0; 2738 return 0;
2749 2739
2750 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); 2740 gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
2751 2741
2752 spin_lock(&vcpu->kvm->mmu_lock); 2742 spin_lock(&vcpu->kvm->mmu_lock);
2753 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); 2743 r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
@@ -2847,16 +2837,13 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
2847 */ 2837 */
2848 page = alloc_page(GFP_KERNEL | __GFP_DMA32); 2838 page = alloc_page(GFP_KERNEL | __GFP_DMA32);
2849 if (!page) 2839 if (!page)
2850 goto error_1; 2840 return -ENOMEM;
2841
2851 vcpu->arch.mmu.pae_root = page_address(page); 2842 vcpu->arch.mmu.pae_root = page_address(page);
2852 for (i = 0; i < 4; ++i) 2843 for (i = 0; i < 4; ++i)
2853 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; 2844 vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
2854 2845
2855 return 0; 2846 return 0;
2856
2857error_1:
2858 free_mmu_pages(vcpu);
2859 return -ENOMEM;
2860} 2847}
2861 2848
2862int kvm_mmu_create(struct kvm_vcpu *vcpu) 2849int kvm_mmu_create(struct kvm_vcpu *vcpu)
@@ -2936,10 +2923,9 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
2936 spin_lock(&kvm_lock); 2923 spin_lock(&kvm_lock);
2937 2924
2938 list_for_each_entry(kvm, &vm_list, vm_list) { 2925 list_for_each_entry(kvm, &vm_list, vm_list) {
2939 int npages; 2926 int npages, idx;
2940 2927
2941 if (!down_read_trylock(&kvm->slots_lock)) 2928 idx = srcu_read_lock(&kvm->srcu);
2942 continue;
2943 spin_lock(&kvm->mmu_lock); 2929 spin_lock(&kvm->mmu_lock);
2944 npages = kvm->arch.n_alloc_mmu_pages - 2930 npages = kvm->arch.n_alloc_mmu_pages -
2945 kvm->arch.n_free_mmu_pages; 2931 kvm->arch.n_free_mmu_pages;
@@ -2952,7 +2938,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
2952 nr_to_scan--; 2938 nr_to_scan--;
2953 2939
2954 spin_unlock(&kvm->mmu_lock); 2940 spin_unlock(&kvm->mmu_lock);
2955 up_read(&kvm->slots_lock); 2941 srcu_read_unlock(&kvm->srcu, idx);
2956 } 2942 }
2957 if (kvm_freed) 2943 if (kvm_freed)
2958 list_move_tail(&kvm_freed->vm_list, &vm_list); 2944 list_move_tail(&kvm_freed->vm_list, &vm_list);
@@ -3019,9 +3005,11 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
3019 int i; 3005 int i;
3020 unsigned int nr_mmu_pages; 3006 unsigned int nr_mmu_pages;
3021 unsigned int nr_pages = 0; 3007 unsigned int nr_pages = 0;
3008 struct kvm_memslots *slots;
3022 3009
3023 for (i = 0; i < kvm->nmemslots; i++) 3010 slots = rcu_dereference(kvm->memslots);
3024 nr_pages += kvm->memslots[i].npages; 3011 for (i = 0; i < slots->nmemslots; i++)
3012 nr_pages += slots->memslots[i].npages;
3025 3013
3026 nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; 3014 nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
3027 nr_mmu_pages = max(nr_mmu_pages, 3015 nr_mmu_pages = max(nr_mmu_pages,
@@ -3246,7 +3234,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
3246 if (is_shadow_present_pte(ent) && !is_last_spte(ent, level)) 3234 if (is_shadow_present_pte(ent) && !is_last_spte(ent, level))
3247 audit_mappings_page(vcpu, ent, va, level - 1); 3235 audit_mappings_page(vcpu, ent, va, level - 1);
3248 else { 3236 else {
3249 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); 3237 gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL);
3250 gfn_t gfn = gpa >> PAGE_SHIFT; 3238 gfn_t gfn = gpa >> PAGE_SHIFT;
3251 pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); 3239 pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
3252 hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; 3240 hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
@@ -3291,10 +3279,12 @@ static void audit_mappings(struct kvm_vcpu *vcpu)
3291static int count_rmaps(struct kvm_vcpu *vcpu) 3279static int count_rmaps(struct kvm_vcpu *vcpu)
3292{ 3280{
3293 int nmaps = 0; 3281 int nmaps = 0;
3294 int i, j, k; 3282 int i, j, k, idx;
3295 3283
3284 idx = srcu_read_lock(&kvm->srcu);
3285 slots = rcu_dereference(kvm->memslots);
3296 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 3286 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
3297 struct kvm_memory_slot *m = &vcpu->kvm->memslots[i]; 3287 struct kvm_memory_slot *m = &slots->memslots[i];
3298 struct kvm_rmap_desc *d; 3288 struct kvm_rmap_desc *d;
3299 3289
3300 for (j = 0; j < m->npages; ++j) { 3290 for (j = 0; j < m->npages; ++j) {
@@ -3317,6 +3307,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
3317 } 3307 }
3318 } 3308 }
3319 } 3309 }
3310 srcu_read_unlock(&kvm->srcu, idx);
3320 return nmaps; 3311 return nmaps;
3321} 3312}
3322 3313
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 61a1b3884b49..be66759321a5 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -2,6 +2,7 @@
2#define __KVM_X86_MMU_H 2#define __KVM_X86_MMU_H
3 3
4#include <linux/kvm_host.h> 4#include <linux/kvm_host.h>
5#include "kvm_cache_regs.h"
5 6
6#define PT64_PT_BITS 9 7#define PT64_PT_BITS 9
7#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) 8#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
@@ -37,6 +38,16 @@
37#define PT32_ROOT_LEVEL 2 38#define PT32_ROOT_LEVEL 2
38#define PT32E_ROOT_LEVEL 3 39#define PT32E_ROOT_LEVEL 3
39 40
41#define PT_PDPE_LEVEL 3
42#define PT_DIRECTORY_LEVEL 2
43#define PT_PAGE_TABLE_LEVEL 1
44
45#define PFERR_PRESENT_MASK (1U << 0)
46#define PFERR_WRITE_MASK (1U << 1)
47#define PFERR_USER_MASK (1U << 2)
48#define PFERR_RSVD_MASK (1U << 3)
49#define PFERR_FETCH_MASK (1U << 4)
50
40int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); 51int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
41 52
42static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) 53static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
@@ -53,30 +64,6 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
53 return kvm_mmu_load(vcpu); 64 return kvm_mmu_load(vcpu);
54} 65}
55 66
56static inline int is_long_mode(struct kvm_vcpu *vcpu)
57{
58#ifdef CONFIG_X86_64
59 return vcpu->arch.shadow_efer & EFER_LMA;
60#else
61 return 0;
62#endif
63}
64
65static inline int is_pae(struct kvm_vcpu *vcpu)
66{
67 return vcpu->arch.cr4 & X86_CR4_PAE;
68}
69
70static inline int is_pse(struct kvm_vcpu *vcpu)
71{
72 return vcpu->arch.cr4 & X86_CR4_PSE;
73}
74
75static inline int is_paging(struct kvm_vcpu *vcpu)
76{
77 return vcpu->arch.cr0 & X86_CR0_PG;
78}
79
80static inline int is_present_gpte(unsigned long pte) 67static inline int is_present_gpte(unsigned long pte)
81{ 68{
82 return pte & PT_PRESENT_MASK; 69 return pte & PT_PRESENT_MASK;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ede2131a9225..81eab9a50e6a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -162,7 +162,7 @@ walk:
162 if (rsvd_fault) 162 if (rsvd_fault)
163 goto access_error; 163 goto access_error;
164 164
165 if (write_fault && !is_writeble_pte(pte)) 165 if (write_fault && !is_writable_pte(pte))
166 if (user_fault || is_write_protection(vcpu)) 166 if (user_fault || is_write_protection(vcpu))
167 goto access_error; 167 goto access_error;
168 168
@@ -490,18 +490,23 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
490 spin_unlock(&vcpu->kvm->mmu_lock); 490 spin_unlock(&vcpu->kvm->mmu_lock);
491} 491}
492 492
493static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) 493static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
494 u32 *error)
494{ 495{
495 struct guest_walker walker; 496 struct guest_walker walker;
496 gpa_t gpa = UNMAPPED_GVA; 497 gpa_t gpa = UNMAPPED_GVA;
497 int r; 498 int r;
498 499
499 r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0); 500 r = FNAME(walk_addr)(&walker, vcpu, vaddr,
501 !!(access & PFERR_WRITE_MASK),
502 !!(access & PFERR_USER_MASK),
503 !!(access & PFERR_FETCH_MASK));
500 504
501 if (r) { 505 if (r) {
502 gpa = gfn_to_gpa(walker.gfn); 506 gpa = gfn_to_gpa(walker.gfn);
503 gpa |= vaddr & ~PAGE_MASK; 507 gpa |= vaddr & ~PAGE_MASK;
504 } 508 } else if (error)
509 *error = walker.error_code;
505 510
506 return gpa; 511 return gpa;
507} 512}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1d9b33843c80..52f78dd03010 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -231,7 +231,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
231 efer &= ~EFER_LME; 231 efer &= ~EFER_LME;
232 232
233 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; 233 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
234 vcpu->arch.shadow_efer = efer; 234 vcpu->arch.efer = efer;
235} 235}
236 236
237static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, 237static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
@@ -540,6 +540,8 @@ static void init_vmcb(struct vcpu_svm *svm)
540 struct vmcb_control_area *control = &svm->vmcb->control; 540 struct vmcb_control_area *control = &svm->vmcb->control;
541 struct vmcb_save_area *save = &svm->vmcb->save; 541 struct vmcb_save_area *save = &svm->vmcb->save;
542 542
543 svm->vcpu.fpu_active = 1;
544
543 control->intercept_cr_read = INTERCEPT_CR0_MASK | 545 control->intercept_cr_read = INTERCEPT_CR0_MASK |
544 INTERCEPT_CR3_MASK | 546 INTERCEPT_CR3_MASK |
545 INTERCEPT_CR4_MASK; 547 INTERCEPT_CR4_MASK;
@@ -552,13 +554,19 @@ static void init_vmcb(struct vcpu_svm *svm)
552 control->intercept_dr_read = INTERCEPT_DR0_MASK | 554 control->intercept_dr_read = INTERCEPT_DR0_MASK |
553 INTERCEPT_DR1_MASK | 555 INTERCEPT_DR1_MASK |
554 INTERCEPT_DR2_MASK | 556 INTERCEPT_DR2_MASK |
555 INTERCEPT_DR3_MASK; 557 INTERCEPT_DR3_MASK |
558 INTERCEPT_DR4_MASK |
559 INTERCEPT_DR5_MASK |
560 INTERCEPT_DR6_MASK |
561 INTERCEPT_DR7_MASK;
556 562
557 control->intercept_dr_write = INTERCEPT_DR0_MASK | 563 control->intercept_dr_write = INTERCEPT_DR0_MASK |
558 INTERCEPT_DR1_MASK | 564 INTERCEPT_DR1_MASK |
559 INTERCEPT_DR2_MASK | 565 INTERCEPT_DR2_MASK |
560 INTERCEPT_DR3_MASK | 566 INTERCEPT_DR3_MASK |
567 INTERCEPT_DR4_MASK |
561 INTERCEPT_DR5_MASK | 568 INTERCEPT_DR5_MASK |
569 INTERCEPT_DR6_MASK |
562 INTERCEPT_DR7_MASK; 570 INTERCEPT_DR7_MASK;
563 571
564 control->intercept_exceptions = (1 << PF_VECTOR) | 572 control->intercept_exceptions = (1 << PF_VECTOR) |
@@ -569,6 +577,7 @@ static void init_vmcb(struct vcpu_svm *svm)
569 control->intercept = (1ULL << INTERCEPT_INTR) | 577 control->intercept = (1ULL << INTERCEPT_INTR) |
570 (1ULL << INTERCEPT_NMI) | 578 (1ULL << INTERCEPT_NMI) |
571 (1ULL << INTERCEPT_SMI) | 579 (1ULL << INTERCEPT_SMI) |
580 (1ULL << INTERCEPT_SELECTIVE_CR0) |
572 (1ULL << INTERCEPT_CPUID) | 581 (1ULL << INTERCEPT_CPUID) |
573 (1ULL << INTERCEPT_INVD) | 582 (1ULL << INTERCEPT_INVD) |
574 (1ULL << INTERCEPT_HLT) | 583 (1ULL << INTERCEPT_HLT) |
@@ -641,10 +650,8 @@ static void init_vmcb(struct vcpu_svm *svm)
641 control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | 650 control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) |
642 (1ULL << INTERCEPT_INVLPG)); 651 (1ULL << INTERCEPT_INVLPG));
643 control->intercept_exceptions &= ~(1 << PF_VECTOR); 652 control->intercept_exceptions &= ~(1 << PF_VECTOR);
644 control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK| 653 control->intercept_cr_read &= ~INTERCEPT_CR3_MASK;
645 INTERCEPT_CR3_MASK); 654 control->intercept_cr_write &= ~INTERCEPT_CR3_MASK;
646 control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
647 INTERCEPT_CR3_MASK);
648 save->g_pat = 0x0007040600070406ULL; 655 save->g_pat = 0x0007040600070406ULL;
649 save->cr3 = 0; 656 save->cr3 = 0;
650 save->cr4 = 0; 657 save->cr4 = 0;
@@ -730,7 +737,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
730 init_vmcb(svm); 737 init_vmcb(svm);
731 738
732 fx_init(&svm->vcpu); 739 fx_init(&svm->vcpu);
733 svm->vcpu.fpu_active = 1;
734 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; 740 svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
735 if (kvm_vcpu_is_bsp(&svm->vcpu)) 741 if (kvm_vcpu_is_bsp(&svm->vcpu))
736 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; 742 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
@@ -765,14 +771,16 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
765 if (unlikely(cpu != vcpu->cpu)) { 771 if (unlikely(cpu != vcpu->cpu)) {
766 u64 delta; 772 u64 delta;
767 773
768 /* 774 if (check_tsc_unstable()) {
769 * Make sure that the guest sees a monotonically 775 /*
770 * increasing TSC. 776 * Make sure that the guest sees a monotonically
771 */ 777 * increasing TSC.
772 delta = vcpu->arch.host_tsc - native_read_tsc(); 778 */
773 svm->vmcb->control.tsc_offset += delta; 779 delta = vcpu->arch.host_tsc - native_read_tsc();
774 if (is_nested(svm)) 780 svm->vmcb->control.tsc_offset += delta;
775 svm->nested.hsave->control.tsc_offset += delta; 781 if (is_nested(svm))
782 svm->nested.hsave->control.tsc_offset += delta;
783 }
776 vcpu->cpu = cpu; 784 vcpu->cpu = cpu;
777 kvm_migrate_timers(vcpu); 785 kvm_migrate_timers(vcpu);
778 svm->asid_generation = 0; 786 svm->asid_generation = 0;
@@ -954,42 +962,59 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
954 svm->vmcb->save.gdtr.base = dt->base ; 962 svm->vmcb->save.gdtr.base = dt->base ;
955} 963}
956 964
965static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
966{
967}
968
957static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 969static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
958{ 970{
959} 971}
960 972
973static void update_cr0_intercept(struct vcpu_svm *svm)
974{
975 ulong gcr0 = svm->vcpu.arch.cr0;
976 u64 *hcr0 = &svm->vmcb->save.cr0;
977
978 if (!svm->vcpu.fpu_active)
979 *hcr0 |= SVM_CR0_SELECTIVE_MASK;
980 else
981 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
982 | (gcr0 & SVM_CR0_SELECTIVE_MASK);
983
984
985 if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
986 svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
987 svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
988 } else {
989 svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
990 svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
991 }
992}
993
961static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 994static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
962{ 995{
963 struct vcpu_svm *svm = to_svm(vcpu); 996 struct vcpu_svm *svm = to_svm(vcpu);
964 997
965#ifdef CONFIG_X86_64 998#ifdef CONFIG_X86_64
966 if (vcpu->arch.shadow_efer & EFER_LME) { 999 if (vcpu->arch.efer & EFER_LME) {
967 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 1000 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
968 vcpu->arch.shadow_efer |= EFER_LMA; 1001 vcpu->arch.efer |= EFER_LMA;
969 svm->vmcb->save.efer |= EFER_LMA | EFER_LME; 1002 svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
970 } 1003 }
971 1004
972 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { 1005 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
973 vcpu->arch.shadow_efer &= ~EFER_LMA; 1006 vcpu->arch.efer &= ~EFER_LMA;
974 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); 1007 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
975 } 1008 }
976 } 1009 }
977#endif 1010#endif
978 if (npt_enabled) 1011 vcpu->arch.cr0 = cr0;
979 goto set;
980 1012
981 if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { 1013 if (!npt_enabled)
982 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 1014 cr0 |= X86_CR0_PG | X86_CR0_WP;
983 vcpu->fpu_active = 1;
984 }
985 1015
986 vcpu->arch.cr0 = cr0; 1016 if (!vcpu->fpu_active)
987 cr0 |= X86_CR0_PG | X86_CR0_WP;
988 if (!vcpu->fpu_active) {
989 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
990 cr0 |= X86_CR0_TS; 1017 cr0 |= X86_CR0_TS;
991 }
992set:
993 /* 1018 /*
994 * re-enable caching here because the QEMU bios 1019 * re-enable caching here because the QEMU bios
995 * does not do it - this results in some delay at 1020 * does not do it - this results in some delay at
@@ -997,6 +1022,7 @@ set:
997 */ 1022 */
998 cr0 &= ~(X86_CR0_CD | X86_CR0_NW); 1023 cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
999 svm->vmcb->save.cr0 = cr0; 1024 svm->vmcb->save.cr0 = cr0;
1025 update_cr0_intercept(svm);
1000} 1026}
1001 1027
1002static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1028static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -1102,76 +1128,70 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1102 svm->vmcb->control.asid = sd->next_asid++; 1128 svm->vmcb->control.asid = sd->next_asid++;
1103} 1129}
1104 1130
1105static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) 1131static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest)
1106{ 1132{
1107 struct vcpu_svm *svm = to_svm(vcpu); 1133 struct vcpu_svm *svm = to_svm(vcpu);
1108 unsigned long val;
1109 1134
1110 switch (dr) { 1135 switch (dr) {
1111 case 0 ... 3: 1136 case 0 ... 3:
1112 val = vcpu->arch.db[dr]; 1137 *dest = vcpu->arch.db[dr];
1113 break; 1138 break;
1139 case 4:
1140 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1141 return EMULATE_FAIL; /* will re-inject UD */
1142 /* fall through */
1114 case 6: 1143 case 6:
1115 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 1144 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1116 val = vcpu->arch.dr6; 1145 *dest = vcpu->arch.dr6;
1117 else 1146 else
1118 val = svm->vmcb->save.dr6; 1147 *dest = svm->vmcb->save.dr6;
1119 break; 1148 break;
1149 case 5:
1150 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1151 return EMULATE_FAIL; /* will re-inject UD */
1152 /* fall through */
1120 case 7: 1153 case 7:
1121 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) 1154 if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1122 val = vcpu->arch.dr7; 1155 *dest = vcpu->arch.dr7;
1123 else 1156 else
1124 val = svm->vmcb->save.dr7; 1157 *dest = svm->vmcb->save.dr7;
1125 break; 1158 break;
1126 default:
1127 val = 0;
1128 } 1159 }
1129 1160
1130 return val; 1161 return EMULATE_DONE;
1131} 1162}
1132 1163
1133static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, 1164static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value)
1134 int *exception)
1135{ 1165{
1136 struct vcpu_svm *svm = to_svm(vcpu); 1166 struct vcpu_svm *svm = to_svm(vcpu);
1137 1167
1138 *exception = 0;
1139
1140 switch (dr) { 1168 switch (dr) {
1141 case 0 ... 3: 1169 case 0 ... 3:
1142 vcpu->arch.db[dr] = value; 1170 vcpu->arch.db[dr] = value;
1143 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 1171 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
1144 vcpu->arch.eff_db[dr] = value; 1172 vcpu->arch.eff_db[dr] = value;
1145 return; 1173 break;
1146 case 4 ... 5: 1174 case 4:
1147 if (vcpu->arch.cr4 & X86_CR4_DE) 1175 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1148 *exception = UD_VECTOR; 1176 return EMULATE_FAIL; /* will re-inject UD */
1149 return; 1177 /* fall through */
1150 case 6: 1178 case 6:
1151 if (value & 0xffffffff00000000ULL) {
1152 *exception = GP_VECTOR;
1153 return;
1154 }
1155 vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; 1179 vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
1156 return; 1180 break;
1181 case 5:
1182 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
1183 return EMULATE_FAIL; /* will re-inject UD */
1184 /* fall through */
1157 case 7: 1185 case 7:
1158 if (value & 0xffffffff00000000ULL) {
1159 *exception = GP_VECTOR;
1160 return;
1161 }
1162 vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; 1186 vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
1163 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 1187 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
1164 svm->vmcb->save.dr7 = vcpu->arch.dr7; 1188 svm->vmcb->save.dr7 = vcpu->arch.dr7;
1165 vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); 1189 vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
1166 } 1190 }
1167 return; 1191 break;
1168 default:
1169 /* FIXME: Possible case? */
1170 printk(KERN_DEBUG "%s: unexpected dr %u\n",
1171 __func__, dr);
1172 *exception = UD_VECTOR;
1173 return;
1174 } 1192 }
1193
1194 return EMULATE_DONE;
1175} 1195}
1176 1196
1177static int pf_interception(struct vcpu_svm *svm) 1197static int pf_interception(struct vcpu_svm *svm)
@@ -1239,13 +1259,17 @@ static int ud_interception(struct vcpu_svm *svm)
1239 return 1; 1259 return 1;
1240} 1260}
1241 1261
1242static int nm_interception(struct vcpu_svm *svm) 1262static void svm_fpu_activate(struct kvm_vcpu *vcpu)
1243{ 1263{
1264 struct vcpu_svm *svm = to_svm(vcpu);
1244 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); 1265 svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
1245 if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
1246 svm->vmcb->save.cr0 &= ~X86_CR0_TS;
1247 svm->vcpu.fpu_active = 1; 1266 svm->vcpu.fpu_active = 1;
1267 update_cr0_intercept(svm);
1268}
1248 1269
1270static int nm_interception(struct vcpu_svm *svm)
1271{
1272 svm_fpu_activate(&svm->vcpu);
1249 return 1; 1273 return 1;
1250} 1274}
1251 1275
@@ -1337,7 +1361,7 @@ static int vmmcall_interception(struct vcpu_svm *svm)
1337 1361
1338static int nested_svm_check_permissions(struct vcpu_svm *svm) 1362static int nested_svm_check_permissions(struct vcpu_svm *svm)
1339{ 1363{
1340 if (!(svm->vcpu.arch.shadow_efer & EFER_SVME) 1364 if (!(svm->vcpu.arch.efer & EFER_SVME)
1341 || !is_paging(&svm->vcpu)) { 1365 || !is_paging(&svm->vcpu)) {
1342 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 1366 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1343 return 1; 1367 return 1;
@@ -1740,8 +1764,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
1740 hsave->save.ds = vmcb->save.ds; 1764 hsave->save.ds = vmcb->save.ds;
1741 hsave->save.gdtr = vmcb->save.gdtr; 1765 hsave->save.gdtr = vmcb->save.gdtr;
1742 hsave->save.idtr = vmcb->save.idtr; 1766 hsave->save.idtr = vmcb->save.idtr;
1743 hsave->save.efer = svm->vcpu.arch.shadow_efer; 1767 hsave->save.efer = svm->vcpu.arch.efer;
1744 hsave->save.cr0 = svm->vcpu.arch.cr0; 1768 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
1745 hsave->save.cr4 = svm->vcpu.arch.cr4; 1769 hsave->save.cr4 = svm->vcpu.arch.cr4;
1746 hsave->save.rflags = vmcb->save.rflags; 1770 hsave->save.rflags = vmcb->save.rflags;
1747 hsave->save.rip = svm->next_rip; 1771 hsave->save.rip = svm->next_rip;
@@ -2153,9 +2177,10 @@ static int rdmsr_interception(struct vcpu_svm *svm)
2153 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 2177 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2154 u64 data; 2178 u64 data;
2155 2179
2156 if (svm_get_msr(&svm->vcpu, ecx, &data)) 2180 if (svm_get_msr(&svm->vcpu, ecx, &data)) {
2181 trace_kvm_msr_read_ex(ecx);
2157 kvm_inject_gp(&svm->vcpu, 0); 2182 kvm_inject_gp(&svm->vcpu, 0);
2158 else { 2183 } else {
2159 trace_kvm_msr_read(ecx, data); 2184 trace_kvm_msr_read(ecx, data);
2160 2185
2161 svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; 2186 svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
@@ -2247,13 +2272,15 @@ static int wrmsr_interception(struct vcpu_svm *svm)
2247 u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) 2272 u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
2248 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); 2273 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
2249 2274
2250 trace_kvm_msr_write(ecx, data);
2251 2275
2252 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 2276 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2253 if (svm_set_msr(&svm->vcpu, ecx, data)) 2277 if (svm_set_msr(&svm->vcpu, ecx, data)) {
2278 trace_kvm_msr_write_ex(ecx, data);
2254 kvm_inject_gp(&svm->vcpu, 0); 2279 kvm_inject_gp(&svm->vcpu, 0);
2255 else 2280 } else {
2281 trace_kvm_msr_write(ecx, data);
2256 skip_emulated_instruction(&svm->vcpu); 2282 skip_emulated_instruction(&svm->vcpu);
2283 }
2257 return 1; 2284 return 1;
2258} 2285}
2259 2286
@@ -2297,7 +2324,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
2297 [SVM_EXIT_READ_CR3] = emulate_on_interception, 2324 [SVM_EXIT_READ_CR3] = emulate_on_interception,
2298 [SVM_EXIT_READ_CR4] = emulate_on_interception, 2325 [SVM_EXIT_READ_CR4] = emulate_on_interception,
2299 [SVM_EXIT_READ_CR8] = emulate_on_interception, 2326 [SVM_EXIT_READ_CR8] = emulate_on_interception,
2300 /* for now: */ 2327 [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
2301 [SVM_EXIT_WRITE_CR0] = emulate_on_interception, 2328 [SVM_EXIT_WRITE_CR0] = emulate_on_interception,
2302 [SVM_EXIT_WRITE_CR3] = emulate_on_interception, 2329 [SVM_EXIT_WRITE_CR3] = emulate_on_interception,
2303 [SVM_EXIT_WRITE_CR4] = emulate_on_interception, 2330 [SVM_EXIT_WRITE_CR4] = emulate_on_interception,
@@ -2306,11 +2333,17 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
2306 [SVM_EXIT_READ_DR1] = emulate_on_interception, 2333 [SVM_EXIT_READ_DR1] = emulate_on_interception,
2307 [SVM_EXIT_READ_DR2] = emulate_on_interception, 2334 [SVM_EXIT_READ_DR2] = emulate_on_interception,
2308 [SVM_EXIT_READ_DR3] = emulate_on_interception, 2335 [SVM_EXIT_READ_DR3] = emulate_on_interception,
2336 [SVM_EXIT_READ_DR4] = emulate_on_interception,
2337 [SVM_EXIT_READ_DR5] = emulate_on_interception,
2338 [SVM_EXIT_READ_DR6] = emulate_on_interception,
2339 [SVM_EXIT_READ_DR7] = emulate_on_interception,
2309 [SVM_EXIT_WRITE_DR0] = emulate_on_interception, 2340 [SVM_EXIT_WRITE_DR0] = emulate_on_interception,
2310 [SVM_EXIT_WRITE_DR1] = emulate_on_interception, 2341 [SVM_EXIT_WRITE_DR1] = emulate_on_interception,
2311 [SVM_EXIT_WRITE_DR2] = emulate_on_interception, 2342 [SVM_EXIT_WRITE_DR2] = emulate_on_interception,
2312 [SVM_EXIT_WRITE_DR3] = emulate_on_interception, 2343 [SVM_EXIT_WRITE_DR3] = emulate_on_interception,
2344 [SVM_EXIT_WRITE_DR4] = emulate_on_interception,
2313 [SVM_EXIT_WRITE_DR5] = emulate_on_interception, 2345 [SVM_EXIT_WRITE_DR5] = emulate_on_interception,
2346 [SVM_EXIT_WRITE_DR6] = emulate_on_interception,
2314 [SVM_EXIT_WRITE_DR7] = emulate_on_interception, 2347 [SVM_EXIT_WRITE_DR7] = emulate_on_interception,
2315 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, 2348 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
2316 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, 2349 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
@@ -2383,20 +2416,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
2383 2416
2384 svm_complete_interrupts(svm); 2417 svm_complete_interrupts(svm);
2385 2418
2386 if (npt_enabled) { 2419 if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK))
2387 int mmu_reload = 0;
2388 if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
2389 svm_set_cr0(vcpu, svm->vmcb->save.cr0);
2390 mmu_reload = 1;
2391 }
2392 vcpu->arch.cr0 = svm->vmcb->save.cr0; 2420 vcpu->arch.cr0 = svm->vmcb->save.cr0;
2421 if (npt_enabled)
2393 vcpu->arch.cr3 = svm->vmcb->save.cr3; 2422 vcpu->arch.cr3 = svm->vmcb->save.cr3;
2394 if (mmu_reload) {
2395 kvm_mmu_reset_context(vcpu);
2396 kvm_mmu_load(vcpu);
2397 }
2398 }
2399
2400 2423
2401 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 2424 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
2402 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 2425 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -2798,12 +2821,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
2798 2821
2799 svm->vmcb->save.cr3 = root; 2822 svm->vmcb->save.cr3 = root;
2800 force_new_asid(vcpu); 2823 force_new_asid(vcpu);
2801
2802 if (vcpu->fpu_active) {
2803 svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
2804 svm->vmcb->save.cr0 |= X86_CR0_TS;
2805 vcpu->fpu_active = 0;
2806 }
2807} 2824}
2808 2825
2809static int is_disabled(void) 2826static int is_disabled(void)
@@ -2852,6 +2869,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
2852 return 0; 2869 return 0;
2853} 2870}
2854 2871
2872static void svm_cpuid_update(struct kvm_vcpu *vcpu)
2873{
2874}
2875
2855static const struct trace_print_flags svm_exit_reasons_str[] = { 2876static const struct trace_print_flags svm_exit_reasons_str[] = {
2856 { SVM_EXIT_READ_CR0, "read_cr0" }, 2877 { SVM_EXIT_READ_CR0, "read_cr0" },
2857 { SVM_EXIT_READ_CR3, "read_cr3" }, 2878 { SVM_EXIT_READ_CR3, "read_cr3" },
@@ -2905,9 +2926,22 @@ static const struct trace_print_flags svm_exit_reasons_str[] = {
2905 { -1, NULL } 2926 { -1, NULL }
2906}; 2927};
2907 2928
2908static bool svm_gb_page_enable(void) 2929static int svm_get_lpage_level(void)
2909{ 2930{
2910 return true; 2931 return PT_PDPE_LEVEL;
2932}
2933
2934static bool svm_rdtscp_supported(void)
2935{
2936 return false;
2937}
2938
2939static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
2940{
2941 struct vcpu_svm *svm = to_svm(vcpu);
2942
2943 update_cr0_intercept(svm);
2944 svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR;
2911} 2945}
2912 2946
2913static struct kvm_x86_ops svm_x86_ops = { 2947static struct kvm_x86_ops svm_x86_ops = {
@@ -2936,6 +2970,7 @@ static struct kvm_x86_ops svm_x86_ops = {
2936 .set_segment = svm_set_segment, 2970 .set_segment = svm_set_segment,
2937 .get_cpl = svm_get_cpl, 2971 .get_cpl = svm_get_cpl,
2938 .get_cs_db_l_bits = kvm_get_cs_db_l_bits, 2972 .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
2973 .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
2939 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, 2974 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
2940 .set_cr0 = svm_set_cr0, 2975 .set_cr0 = svm_set_cr0,
2941 .set_cr3 = svm_set_cr3, 2976 .set_cr3 = svm_set_cr3,
@@ -2950,6 +2985,8 @@ static struct kvm_x86_ops svm_x86_ops = {
2950 .cache_reg = svm_cache_reg, 2985 .cache_reg = svm_cache_reg,
2951 .get_rflags = svm_get_rflags, 2986 .get_rflags = svm_get_rflags,
2952 .set_rflags = svm_set_rflags, 2987 .set_rflags = svm_set_rflags,
2988 .fpu_activate = svm_fpu_activate,
2989 .fpu_deactivate = svm_fpu_deactivate,
2953 2990
2954 .tlb_flush = svm_flush_tlb, 2991 .tlb_flush = svm_flush_tlb,
2955 2992
@@ -2975,7 +3012,11 @@ static struct kvm_x86_ops svm_x86_ops = {
2975 .get_mt_mask = svm_get_mt_mask, 3012 .get_mt_mask = svm_get_mt_mask,
2976 3013
2977 .exit_reasons_str = svm_exit_reasons_str, 3014 .exit_reasons_str = svm_exit_reasons_str,
2978 .gb_page_enable = svm_gb_page_enable, 3015 .get_lpage_level = svm_get_lpage_level,
3016
3017 .cpuid_update = svm_cpuid_update,
3018
3019 .rdtscp_supported = svm_rdtscp_supported,
2979}; 3020};
2980 3021
2981static int __init svm_init(void) 3022static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 816e0449db0b..6ad30a29f044 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -56,6 +56,38 @@ TRACE_EVENT(kvm_hypercall,
56); 56);
57 57
58/* 58/*
59 * Tracepoint for hypercall.
60 */
61TRACE_EVENT(kvm_hv_hypercall,
62 TP_PROTO(__u16 code, bool fast, __u16 rep_cnt, __u16 rep_idx,
63 __u64 ingpa, __u64 outgpa),
64 TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa),
65
66 TP_STRUCT__entry(
67 __field( __u16, code )
68 __field( bool, fast )
69 __field( __u16, rep_cnt )
70 __field( __u16, rep_idx )
71 __field( __u64, ingpa )
72 __field( __u64, outgpa )
73 ),
74
75 TP_fast_assign(
76 __entry->code = code;
77 __entry->fast = fast;
78 __entry->rep_cnt = rep_cnt;
79 __entry->rep_idx = rep_idx;
80 __entry->ingpa = ingpa;
81 __entry->outgpa = outgpa;
82 ),
83
84 TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx",
85 __entry->code, __entry->fast ? "fast" : "slow",
86 __entry->rep_cnt, __entry->rep_idx, __entry->ingpa,
87 __entry->outgpa)
88);
89
90/*
59 * Tracepoint for PIO. 91 * Tracepoint for PIO.
60 */ 92 */
61TRACE_EVENT(kvm_pio, 93TRACE_EVENT(kvm_pio,
@@ -214,28 +246,33 @@ TRACE_EVENT(kvm_page_fault,
214 * Tracepoint for guest MSR access. 246 * Tracepoint for guest MSR access.
215 */ 247 */
216TRACE_EVENT(kvm_msr, 248TRACE_EVENT(kvm_msr,
217 TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data), 249 TP_PROTO(unsigned write, u32 ecx, u64 data, bool exception),
218 TP_ARGS(rw, ecx, data), 250 TP_ARGS(write, ecx, data, exception),
219 251
220 TP_STRUCT__entry( 252 TP_STRUCT__entry(
221 __field( unsigned int, rw ) 253 __field( unsigned, write )
222 __field( unsigned int, ecx ) 254 __field( u32, ecx )
223 __field( unsigned long, data ) 255 __field( u64, data )
256 __field( u8, exception )
224 ), 257 ),
225 258
226 TP_fast_assign( 259 TP_fast_assign(
227 __entry->rw = rw; 260 __entry->write = write;
228 __entry->ecx = ecx; 261 __entry->ecx = ecx;
229 __entry->data = data; 262 __entry->data = data;
263 __entry->exception = exception;
230 ), 264 ),
231 265
232 TP_printk("msr_%s %x = 0x%lx", 266 TP_printk("msr_%s %x = 0x%llx%s",
233 __entry->rw ? "write" : "read", 267 __entry->write ? "write" : "read",
234 __entry->ecx, __entry->data) 268 __entry->ecx, __entry->data,
269 __entry->exception ? " (#GP)" : "")
235); 270);
236 271
237#define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data) 272#define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data, false)
238#define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data) 273#define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data, false)
274#define trace_kvm_msr_read_ex(ecx) trace_kvm_msr(0, ecx, 0, true)
275#define trace_kvm_msr_write_ex(ecx, data) trace_kvm_msr(1, ecx, data, true)
239 276
240/* 277/*
241 * Tracepoint for guest CR access. 278 * Tracepoint for guest CR access.
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d4918d6fc924..14873b9f8430 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -61,6 +61,21 @@ module_param_named(unrestricted_guest,
61static int __read_mostly emulate_invalid_guest_state = 0; 61static int __read_mostly emulate_invalid_guest_state = 0;
62module_param(emulate_invalid_guest_state, bool, S_IRUGO); 62module_param(emulate_invalid_guest_state, bool, S_IRUGO);
63 63
64#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
65 (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
66#define KVM_GUEST_CR0_MASK \
67 (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
68#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
69 (X86_CR0_WP | X86_CR0_NE)
70#define KVM_VM_CR0_ALWAYS_ON \
71 (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
72#define KVM_CR4_GUEST_OWNED_BITS \
73 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
74 | X86_CR4_OSXMMEXCPT)
75
76#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
77#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
78
64/* 79/*
65 * These 2 parameters are used to config the controls for Pause-Loop Exiting: 80 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
66 * ple_gap: upper bound on the amount of time between two successive 81 * ple_gap: upper bound on the amount of time between two successive
@@ -136,6 +151,8 @@ struct vcpu_vmx {
136 ktime_t entry_time; 151 ktime_t entry_time;
137 s64 vnmi_blocked_time; 152 s64 vnmi_blocked_time;
138 u32 exit_reason; 153 u32 exit_reason;
154
155 bool rdtscp_enabled;
139}; 156};
140 157
141static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) 158static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -210,7 +227,7 @@ static const u32 vmx_msr_index[] = {
210#ifdef CONFIG_X86_64 227#ifdef CONFIG_X86_64
211 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, 228 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
212#endif 229#endif
213 MSR_EFER, MSR_K6_STAR, 230 MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR,
214}; 231};
215#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) 232#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
216 233
@@ -301,6 +318,11 @@ static inline bool cpu_has_vmx_ept_2m_page(void)
301 return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); 318 return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT);
302} 319}
303 320
321static inline bool cpu_has_vmx_ept_1g_page(void)
322{
323 return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT);
324}
325
304static inline int cpu_has_vmx_invept_individual_addr(void) 326static inline int cpu_has_vmx_invept_individual_addr(void)
305{ 327{
306 return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); 328 return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT);
@@ -336,9 +358,7 @@ static inline int cpu_has_vmx_ple(void)
336 358
337static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) 359static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
338{ 360{
339 return flexpriority_enabled && 361 return flexpriority_enabled && irqchip_in_kernel(kvm);
340 (cpu_has_vmx_virtualize_apic_accesses()) &&
341 (irqchip_in_kernel(kvm));
342} 362}
343 363
344static inline int cpu_has_vmx_vpid(void) 364static inline int cpu_has_vmx_vpid(void)
@@ -347,6 +367,12 @@ static inline int cpu_has_vmx_vpid(void)
347 SECONDARY_EXEC_ENABLE_VPID; 367 SECONDARY_EXEC_ENABLE_VPID;
348} 368}
349 369
370static inline int cpu_has_vmx_rdtscp(void)
371{
372 return vmcs_config.cpu_based_2nd_exec_ctrl &
373 SECONDARY_EXEC_RDTSCP;
374}
375
350static inline int cpu_has_virtual_nmis(void) 376static inline int cpu_has_virtual_nmis(void)
351{ 377{
352 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; 378 return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
@@ -551,22 +577,18 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
551{ 577{
552 u32 eb; 578 u32 eb;
553 579
554 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); 580 eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
555 if (!vcpu->fpu_active) 581 (1u << NM_VECTOR) | (1u << DB_VECTOR);
556 eb |= 1u << NM_VECTOR; 582 if ((vcpu->guest_debug &
557 /* 583 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
558 * Unconditionally intercept #DB so we can maintain dr6 without 584 (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
559 * reading it every exit. 585 eb |= 1u << BP_VECTOR;
560 */
561 eb |= 1u << DB_VECTOR;
562 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
563 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
564 eb |= 1u << BP_VECTOR;
565 }
566 if (to_vmx(vcpu)->rmode.vm86_active) 586 if (to_vmx(vcpu)->rmode.vm86_active)
567 eb = ~0; 587 eb = ~0;
568 if (enable_ept) 588 if (enable_ept)
569 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ 589 eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
590 if (vcpu->fpu_active)
591 eb &= ~(1u << NM_VECTOR);
570 vmcs_write32(EXCEPTION_BITMAP, eb); 592 vmcs_write32(EXCEPTION_BITMAP, eb);
571} 593}
572 594
@@ -589,7 +611,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
589 u64 guest_efer; 611 u64 guest_efer;
590 u64 ignore_bits; 612 u64 ignore_bits;
591 613
592 guest_efer = vmx->vcpu.arch.shadow_efer; 614 guest_efer = vmx->vcpu.arch.efer;
593 615
594 /* 616 /*
595 * NX is emulated; LMA and LME handled by hardware; SCE meaninless 617 * NX is emulated; LMA and LME handled by hardware; SCE meaninless
@@ -767,22 +789,30 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
767 789
768static void vmx_fpu_activate(struct kvm_vcpu *vcpu) 790static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
769{ 791{
792 ulong cr0;
793
770 if (vcpu->fpu_active) 794 if (vcpu->fpu_active)
771 return; 795 return;
772 vcpu->fpu_active = 1; 796 vcpu->fpu_active = 1;
773 vmcs_clear_bits(GUEST_CR0, X86_CR0_TS); 797 cr0 = vmcs_readl(GUEST_CR0);
774 if (vcpu->arch.cr0 & X86_CR0_TS) 798 cr0 &= ~(X86_CR0_TS | X86_CR0_MP);
775 vmcs_set_bits(GUEST_CR0, X86_CR0_TS); 799 cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP);
800 vmcs_writel(GUEST_CR0, cr0);
776 update_exception_bitmap(vcpu); 801 update_exception_bitmap(vcpu);
802 vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
803 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
777} 804}
778 805
806static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
807
779static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) 808static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
780{ 809{
781 if (!vcpu->fpu_active) 810 vmx_decache_cr0_guest_bits(vcpu);
782 return; 811 vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
783 vcpu->fpu_active = 0;
784 vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
785 update_exception_bitmap(vcpu); 812 update_exception_bitmap(vcpu);
813 vcpu->arch.cr0_guest_owned_bits = 0;
814 vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
815 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
786} 816}
787 817
788static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) 818static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
@@ -878,6 +908,11 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
878 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); 908 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
879} 909}
880 910
911static bool vmx_rdtscp_supported(void)
912{
913 return cpu_has_vmx_rdtscp();
914}
915
881/* 916/*
882 * Swap MSR entry in host/guest MSR entry array. 917 * Swap MSR entry in host/guest MSR entry array.
883 */ 918 */
@@ -913,12 +948,15 @@ static void setup_msrs(struct vcpu_vmx *vmx)
913 index = __find_msr_index(vmx, MSR_CSTAR); 948 index = __find_msr_index(vmx, MSR_CSTAR);
914 if (index >= 0) 949 if (index >= 0)
915 move_msr_up(vmx, index, save_nmsrs++); 950 move_msr_up(vmx, index, save_nmsrs++);
951 index = __find_msr_index(vmx, MSR_TSC_AUX);
952 if (index >= 0 && vmx->rdtscp_enabled)
953 move_msr_up(vmx, index, save_nmsrs++);
916 /* 954 /*
917 * MSR_K6_STAR is only needed on long mode guests, and only 955 * MSR_K6_STAR is only needed on long mode guests, and only
918 * if efer.sce is enabled. 956 * if efer.sce is enabled.
919 */ 957 */
920 index = __find_msr_index(vmx, MSR_K6_STAR); 958 index = __find_msr_index(vmx, MSR_K6_STAR);
921 if ((index >= 0) && (vmx->vcpu.arch.shadow_efer & EFER_SCE)) 959 if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
922 move_msr_up(vmx, index, save_nmsrs++); 960 move_msr_up(vmx, index, save_nmsrs++);
923 } 961 }
924#endif 962#endif
@@ -1002,6 +1040,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
1002 case MSR_IA32_SYSENTER_ESP: 1040 case MSR_IA32_SYSENTER_ESP:
1003 data = vmcs_readl(GUEST_SYSENTER_ESP); 1041 data = vmcs_readl(GUEST_SYSENTER_ESP);
1004 break; 1042 break;
1043 case MSR_TSC_AUX:
1044 if (!to_vmx(vcpu)->rdtscp_enabled)
1045 return 1;
1046 /* Otherwise falls through */
1005 default: 1047 default:
1006 vmx_load_host_state(to_vmx(vcpu)); 1048 vmx_load_host_state(to_vmx(vcpu));
1007 msr = find_msr_entry(to_vmx(vcpu), msr_index); 1049 msr = find_msr_entry(to_vmx(vcpu), msr_index);
@@ -1065,7 +1107,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
1065 vcpu->arch.pat = data; 1107 vcpu->arch.pat = data;
1066 break; 1108 break;
1067 } 1109 }
1068 /* Otherwise falls through to kvm_set_msr_common */ 1110 ret = kvm_set_msr_common(vcpu, msr_index, data);
1111 break;
1112 case MSR_TSC_AUX:
1113 if (!vmx->rdtscp_enabled)
1114 return 1;
1115 /* Check reserved bit, higher 32 bits should be zero */
1116 if ((data >> 32) != 0)
1117 return 1;
1118 /* Otherwise falls through */
1069 default: 1119 default:
1070 msr = find_msr_entry(vmx, msr_index); 1120 msr = find_msr_entry(vmx, msr_index);
1071 if (msr) { 1121 if (msr) {
@@ -1224,6 +1274,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1224 CPU_BASED_USE_IO_BITMAPS | 1274 CPU_BASED_USE_IO_BITMAPS |
1225 CPU_BASED_MOV_DR_EXITING | 1275 CPU_BASED_MOV_DR_EXITING |
1226 CPU_BASED_USE_TSC_OFFSETING | 1276 CPU_BASED_USE_TSC_OFFSETING |
1277 CPU_BASED_MWAIT_EXITING |
1278 CPU_BASED_MONITOR_EXITING |
1227 CPU_BASED_INVLPG_EXITING; 1279 CPU_BASED_INVLPG_EXITING;
1228 opt = CPU_BASED_TPR_SHADOW | 1280 opt = CPU_BASED_TPR_SHADOW |
1229 CPU_BASED_USE_MSR_BITMAPS | 1281 CPU_BASED_USE_MSR_BITMAPS |
@@ -1243,7 +1295,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
1243 SECONDARY_EXEC_ENABLE_VPID | 1295 SECONDARY_EXEC_ENABLE_VPID |
1244 SECONDARY_EXEC_ENABLE_EPT | 1296 SECONDARY_EXEC_ENABLE_EPT |
1245 SECONDARY_EXEC_UNRESTRICTED_GUEST | 1297 SECONDARY_EXEC_UNRESTRICTED_GUEST |
1246 SECONDARY_EXEC_PAUSE_LOOP_EXITING; 1298 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
1299 SECONDARY_EXEC_RDTSCP;
1247 if (adjust_vmx_controls(min2, opt2, 1300 if (adjust_vmx_controls(min2, opt2,
1248 MSR_IA32_VMX_PROCBASED_CTLS2, 1301 MSR_IA32_VMX_PROCBASED_CTLS2,
1249 &_cpu_based_2nd_exec_control) < 0) 1302 &_cpu_based_2nd_exec_control) < 0)
@@ -1457,8 +1510,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
1457static gva_t rmode_tss_base(struct kvm *kvm) 1510static gva_t rmode_tss_base(struct kvm *kvm)
1458{ 1511{
1459 if (!kvm->arch.tss_addr) { 1512 if (!kvm->arch.tss_addr) {
1460 gfn_t base_gfn = kvm->memslots[0].base_gfn + 1513 struct kvm_memslots *slots;
1461 kvm->memslots[0].npages - 3; 1514 gfn_t base_gfn;
1515
1516 slots = rcu_dereference(kvm->memslots);
1517 base_gfn = kvm->memslots->memslots[0].base_gfn +
1518 kvm->memslots->memslots[0].npages - 3;
1462 return base_gfn << PAGE_SHIFT; 1519 return base_gfn << PAGE_SHIFT;
1463 } 1520 }
1464 return kvm->arch.tss_addr; 1521 return kvm->arch.tss_addr;
@@ -1544,9 +1601,7 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
1544 * of this msr depends on is_long_mode(). 1601 * of this msr depends on is_long_mode().
1545 */ 1602 */
1546 vmx_load_host_state(to_vmx(vcpu)); 1603 vmx_load_host_state(to_vmx(vcpu));
1547 vcpu->arch.shadow_efer = efer; 1604 vcpu->arch.efer = efer;
1548 if (!msr)
1549 return;
1550 if (efer & EFER_LMA) { 1605 if (efer & EFER_LMA) {
1551 vmcs_write32(VM_ENTRY_CONTROLS, 1606 vmcs_write32(VM_ENTRY_CONTROLS,
1552 vmcs_read32(VM_ENTRY_CONTROLS) | 1607 vmcs_read32(VM_ENTRY_CONTROLS) |
@@ -1576,13 +1631,13 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
1576 (guest_tr_ar & ~AR_TYPE_MASK) 1631 (guest_tr_ar & ~AR_TYPE_MASK)
1577 | AR_TYPE_BUSY_64_TSS); 1632 | AR_TYPE_BUSY_64_TSS);
1578 } 1633 }
1579 vcpu->arch.shadow_efer |= EFER_LMA; 1634 vcpu->arch.efer |= EFER_LMA;
1580 vmx_set_efer(vcpu, vcpu->arch.shadow_efer); 1635 vmx_set_efer(vcpu, vcpu->arch.efer);
1581} 1636}
1582 1637
1583static void exit_lmode(struct kvm_vcpu *vcpu) 1638static void exit_lmode(struct kvm_vcpu *vcpu)
1584{ 1639{
1585 vcpu->arch.shadow_efer &= ~EFER_LMA; 1640 vcpu->arch.efer &= ~EFER_LMA;
1586 1641
1587 vmcs_write32(VM_ENTRY_CONTROLS, 1642 vmcs_write32(VM_ENTRY_CONTROLS,
1588 vmcs_read32(VM_ENTRY_CONTROLS) 1643 vmcs_read32(VM_ENTRY_CONTROLS)
@@ -1598,10 +1653,20 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
1598 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); 1653 ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
1599} 1654}
1600 1655
1656static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1657{
1658 ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
1659
1660 vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
1661 vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
1662}
1663
1601static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 1664static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1602{ 1665{
1603 vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; 1666 ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
1604 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; 1667
1668 vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
1669 vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
1605} 1670}
1606 1671
1607static void ept_load_pdptrs(struct kvm_vcpu *vcpu) 1672static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
@@ -1646,7 +1711,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1646 (CPU_BASED_CR3_LOAD_EXITING | 1711 (CPU_BASED_CR3_LOAD_EXITING |
1647 CPU_BASED_CR3_STORE_EXITING)); 1712 CPU_BASED_CR3_STORE_EXITING));
1648 vcpu->arch.cr0 = cr0; 1713 vcpu->arch.cr0 = cr0;
1649 vmx_set_cr4(vcpu, vcpu->arch.cr4); 1714 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
1650 } else if (!is_paging(vcpu)) { 1715 } else if (!is_paging(vcpu)) {
1651 /* From nonpaging to paging */ 1716 /* From nonpaging to paging */
1652 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1717 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
@@ -1654,23 +1719,13 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
1654 ~(CPU_BASED_CR3_LOAD_EXITING | 1719 ~(CPU_BASED_CR3_LOAD_EXITING |
1655 CPU_BASED_CR3_STORE_EXITING)); 1720 CPU_BASED_CR3_STORE_EXITING));
1656 vcpu->arch.cr0 = cr0; 1721 vcpu->arch.cr0 = cr0;
1657 vmx_set_cr4(vcpu, vcpu->arch.cr4); 1722 vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
1658 } 1723 }
1659 1724
1660 if (!(cr0 & X86_CR0_WP)) 1725 if (!(cr0 & X86_CR0_WP))
1661 *hw_cr0 &= ~X86_CR0_WP; 1726 *hw_cr0 &= ~X86_CR0_WP;
1662} 1727}
1663 1728
1664static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
1665 struct kvm_vcpu *vcpu)
1666{
1667 if (!is_paging(vcpu)) {
1668 *hw_cr4 &= ~X86_CR4_PAE;
1669 *hw_cr4 |= X86_CR4_PSE;
1670 } else if (!(vcpu->arch.cr4 & X86_CR4_PAE))
1671 *hw_cr4 &= ~X86_CR4_PAE;
1672}
1673
1674static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1729static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1675{ 1730{
1676 struct vcpu_vmx *vmx = to_vmx(vcpu); 1731 struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -1682,8 +1737,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1682 else 1737 else
1683 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; 1738 hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
1684 1739
1685 vmx_fpu_deactivate(vcpu);
1686
1687 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) 1740 if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
1688 enter_pmode(vcpu); 1741 enter_pmode(vcpu);
1689 1742
@@ -1691,7 +1744,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1691 enter_rmode(vcpu); 1744 enter_rmode(vcpu);
1692 1745
1693#ifdef CONFIG_X86_64 1746#ifdef CONFIG_X86_64
1694 if (vcpu->arch.shadow_efer & EFER_LME) { 1747 if (vcpu->arch.efer & EFER_LME) {
1695 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) 1748 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
1696 enter_lmode(vcpu); 1749 enter_lmode(vcpu);
1697 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) 1750 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
@@ -1702,12 +1755,12 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1702 if (enable_ept) 1755 if (enable_ept)
1703 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); 1756 ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
1704 1757
1758 if (!vcpu->fpu_active)
1759 hw_cr0 |= X86_CR0_TS | X86_CR0_MP;
1760
1705 vmcs_writel(CR0_READ_SHADOW, cr0); 1761 vmcs_writel(CR0_READ_SHADOW, cr0);
1706 vmcs_writel(GUEST_CR0, hw_cr0); 1762 vmcs_writel(GUEST_CR0, hw_cr0);
1707 vcpu->arch.cr0 = cr0; 1763 vcpu->arch.cr0 = cr0;
1708
1709 if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
1710 vmx_fpu_activate(vcpu);
1711} 1764}
1712 1765
1713static u64 construct_eptp(unsigned long root_hpa) 1766static u64 construct_eptp(unsigned long root_hpa)
@@ -1738,8 +1791,6 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
1738 1791
1739 vmx_flush_tlb(vcpu); 1792 vmx_flush_tlb(vcpu);
1740 vmcs_writel(GUEST_CR3, guest_cr3); 1793 vmcs_writel(GUEST_CR3, guest_cr3);
1741 if (vcpu->arch.cr0 & X86_CR0_PE)
1742 vmx_fpu_deactivate(vcpu);
1743} 1794}
1744 1795
1745static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1796static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -1748,8 +1799,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1748 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); 1799 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
1749 1800
1750 vcpu->arch.cr4 = cr4; 1801 vcpu->arch.cr4 = cr4;
1751 if (enable_ept) 1802 if (enable_ept) {
1752 ept_update_paging_mode_cr4(&hw_cr4, vcpu); 1803 if (!is_paging(vcpu)) {
1804 hw_cr4 &= ~X86_CR4_PAE;
1805 hw_cr4 |= X86_CR4_PSE;
1806 } else if (!(cr4 & X86_CR4_PAE)) {
1807 hw_cr4 &= ~X86_CR4_PAE;
1808 }
1809 }
1753 1810
1754 vmcs_writel(CR4_READ_SHADOW, cr4); 1811 vmcs_writel(CR4_READ_SHADOW, cr4);
1755 vmcs_writel(GUEST_CR4, hw_cr4); 1812 vmcs_writel(GUEST_CR4, hw_cr4);
@@ -1787,7 +1844,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
1787 1844
1788static int vmx_get_cpl(struct kvm_vcpu *vcpu) 1845static int vmx_get_cpl(struct kvm_vcpu *vcpu)
1789{ 1846{
1790 if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ 1847 if (!is_protmode(vcpu))
1791 return 0; 1848 return 0;
1792 1849
1793 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ 1850 if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
@@ -2042,7 +2099,7 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
2042static bool guest_state_valid(struct kvm_vcpu *vcpu) 2099static bool guest_state_valid(struct kvm_vcpu *vcpu)
2043{ 2100{
2044 /* real mode guest state checks */ 2101 /* real mode guest state checks */
2045 if (!(vcpu->arch.cr0 & X86_CR0_PE)) { 2102 if (!is_protmode(vcpu)) {
2046 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) 2103 if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
2047 return false; 2104 return false;
2048 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) 2105 if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
@@ -2175,7 +2232,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
2175 struct kvm_userspace_memory_region kvm_userspace_mem; 2232 struct kvm_userspace_memory_region kvm_userspace_mem;
2176 int r = 0; 2233 int r = 0;
2177 2234
2178 down_write(&kvm->slots_lock); 2235 mutex_lock(&kvm->slots_lock);
2179 if (kvm->arch.apic_access_page) 2236 if (kvm->arch.apic_access_page)
2180 goto out; 2237 goto out;
2181 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; 2238 kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
@@ -2188,7 +2245,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
2188 2245
2189 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); 2246 kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
2190out: 2247out:
2191 up_write(&kvm->slots_lock); 2248 mutex_unlock(&kvm->slots_lock);
2192 return r; 2249 return r;
2193} 2250}
2194 2251
@@ -2197,7 +2254,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
2197 struct kvm_userspace_memory_region kvm_userspace_mem; 2254 struct kvm_userspace_memory_region kvm_userspace_mem;
2198 int r = 0; 2255 int r = 0;
2199 2256
2200 down_write(&kvm->slots_lock); 2257 mutex_lock(&kvm->slots_lock);
2201 if (kvm->arch.ept_identity_pagetable) 2258 if (kvm->arch.ept_identity_pagetable)
2202 goto out; 2259 goto out;
2203 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; 2260 kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
@@ -2212,7 +2269,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
2212 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, 2269 kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
2213 kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); 2270 kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
2214out: 2271out:
2215 up_write(&kvm->slots_lock); 2272 mutex_unlock(&kvm->slots_lock);
2216 return r; 2273 return r;
2217} 2274}
2218 2275
@@ -2384,14 +2441,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2384 for (i = 0; i < NR_VMX_MSR; ++i) { 2441 for (i = 0; i < NR_VMX_MSR; ++i) {
2385 u32 index = vmx_msr_index[i]; 2442 u32 index = vmx_msr_index[i];
2386 u32 data_low, data_high; 2443 u32 data_low, data_high;
2387 u64 data;
2388 int j = vmx->nmsrs; 2444 int j = vmx->nmsrs;
2389 2445
2390 if (rdmsr_safe(index, &data_low, &data_high) < 0) 2446 if (rdmsr_safe(index, &data_low, &data_high) < 0)
2391 continue; 2447 continue;
2392 if (wrmsr_safe(index, data_low, data_high) < 0) 2448 if (wrmsr_safe(index, data_low, data_high) < 0)
2393 continue; 2449 continue;
2394 data = data_low | ((u64)data_high << 32);
2395 vmx->guest_msrs[j].index = i; 2450 vmx->guest_msrs[j].index = i;
2396 vmx->guest_msrs[j].data = 0; 2451 vmx->guest_msrs[j].data = 0;
2397 vmx->guest_msrs[j].mask = -1ull; 2452 vmx->guest_msrs[j].mask = -1ull;
@@ -2404,7 +2459,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
2404 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); 2459 vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl);
2405 2460
2406 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); 2461 vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
2407 vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); 2462 vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
2463 if (enable_ept)
2464 vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
2465 vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
2408 2466
2409 tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; 2467 tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
2410 rdtscll(tsc_this); 2468 rdtscll(tsc_this);
@@ -2429,10 +2487,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2429{ 2487{
2430 struct vcpu_vmx *vmx = to_vmx(vcpu); 2488 struct vcpu_vmx *vmx = to_vmx(vcpu);
2431 u64 msr; 2489 u64 msr;
2432 int ret; 2490 int ret, idx;
2433 2491
2434 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); 2492 vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
2435 down_read(&vcpu->kvm->slots_lock); 2493 idx = srcu_read_lock(&vcpu->kvm->srcu);
2436 if (!init_rmode(vmx->vcpu.kvm)) { 2494 if (!init_rmode(vmx->vcpu.kvm)) {
2437 ret = -ENOMEM; 2495 ret = -ENOMEM;
2438 goto out; 2496 goto out;
@@ -2526,7 +2584,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2526 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); 2584 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
2527 2585
2528 vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; 2586 vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
2529 vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ 2587 vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
2530 vmx_set_cr4(&vmx->vcpu, 0); 2588 vmx_set_cr4(&vmx->vcpu, 0);
2531 vmx_set_efer(&vmx->vcpu, 0); 2589 vmx_set_efer(&vmx->vcpu, 0);
2532 vmx_fpu_activate(&vmx->vcpu); 2590 vmx_fpu_activate(&vmx->vcpu);
@@ -2540,7 +2598,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
2540 vmx->emulation_required = 0; 2598 vmx->emulation_required = 0;
2541 2599
2542out: 2600out:
2543 up_read(&vcpu->kvm->slots_lock); 2601 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2544 return ret; 2602 return ret;
2545} 2603}
2546 2604
@@ -2717,6 +2775,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
2717 kvm_queue_exception(vcpu, vec); 2775 kvm_queue_exception(vcpu, vec);
2718 return 1; 2776 return 1;
2719 case BP_VECTOR: 2777 case BP_VECTOR:
2778 /*
2779 * Update instruction length as we may reinject the exception
2780 * from user space while in guest debugging mode.
2781 */
2782 to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
2783 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
2720 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 2784 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
2721 return 0; 2785 return 0;
2722 /* fall through */ 2786 /* fall through */
@@ -2839,6 +2903,13 @@ static int handle_exception(struct kvm_vcpu *vcpu)
2839 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); 2903 kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
2840 /* fall through */ 2904 /* fall through */
2841 case BP_VECTOR: 2905 case BP_VECTOR:
2906 /*
2907 * Update instruction length as we may reinject #BP from
2908 * user space while in guest debugging mode. Reading it for
2909 * #DB as well causes no harm, it is not used in that case.
2910 */
2911 vmx->vcpu.arch.event_exit_inst_len =
2912 vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
2842 kvm_run->exit_reason = KVM_EXIT_DEBUG; 2913 kvm_run->exit_reason = KVM_EXIT_DEBUG;
2843 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; 2914 kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
2844 kvm_run->debug.arch.exception = ex_no; 2915 kvm_run->debug.arch.exception = ex_no;
@@ -2940,11 +3011,10 @@ static int handle_cr(struct kvm_vcpu *vcpu)
2940 }; 3011 };
2941 break; 3012 break;
2942 case 2: /* clts */ 3013 case 2: /* clts */
2943 vmx_fpu_deactivate(vcpu); 3014 vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
2944 vcpu->arch.cr0 &= ~X86_CR0_TS; 3015 trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
2945 vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
2946 vmx_fpu_activate(vcpu);
2947 skip_emulated_instruction(vcpu); 3016 skip_emulated_instruction(vcpu);
3017 vmx_fpu_activate(vcpu);
2948 return 1; 3018 return 1;
2949 case 1: /*mov from cr*/ 3019 case 1: /*mov from cr*/
2950 switch (cr) { 3020 switch (cr) {
@@ -2962,7 +3032,9 @@ static int handle_cr(struct kvm_vcpu *vcpu)
2962 } 3032 }
2963 break; 3033 break;
2964 case 3: /* lmsw */ 3034 case 3: /* lmsw */
2965 kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); 3035 val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
3036 trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
3037 kvm_lmsw(vcpu, val);
2966 3038
2967 skip_emulated_instruction(vcpu); 3039 skip_emulated_instruction(vcpu);
2968 return 1; 3040 return 1;
@@ -2975,12 +3047,22 @@ static int handle_cr(struct kvm_vcpu *vcpu)
2975 return 0; 3047 return 0;
2976} 3048}
2977 3049
3050static int check_dr_alias(struct kvm_vcpu *vcpu)
3051{
3052 if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
3053 kvm_queue_exception(vcpu, UD_VECTOR);
3054 return -1;
3055 }
3056 return 0;
3057}
3058
2978static int handle_dr(struct kvm_vcpu *vcpu) 3059static int handle_dr(struct kvm_vcpu *vcpu)
2979{ 3060{
2980 unsigned long exit_qualification; 3061 unsigned long exit_qualification;
2981 unsigned long val; 3062 unsigned long val;
2982 int dr, reg; 3063 int dr, reg;
2983 3064
3065 /* Do not handle if the CPL > 0, will trigger GP on re-entry */
2984 if (!kvm_require_cpl(vcpu, 0)) 3066 if (!kvm_require_cpl(vcpu, 0))
2985 return 1; 3067 return 1;
2986 dr = vmcs_readl(GUEST_DR7); 3068 dr = vmcs_readl(GUEST_DR7);
@@ -3016,14 +3098,20 @@ static int handle_dr(struct kvm_vcpu *vcpu)
3016 case 0 ... 3: 3098 case 0 ... 3:
3017 val = vcpu->arch.db[dr]; 3099 val = vcpu->arch.db[dr];
3018 break; 3100 break;
3101 case 4:
3102 if (check_dr_alias(vcpu) < 0)
3103 return 1;
3104 /* fall through */
3019 case 6: 3105 case 6:
3020 val = vcpu->arch.dr6; 3106 val = vcpu->arch.dr6;
3021 break; 3107 break;
3022 case 7: 3108 case 5:
3109 if (check_dr_alias(vcpu) < 0)
3110 return 1;
3111 /* fall through */
3112 default: /* 7 */
3023 val = vcpu->arch.dr7; 3113 val = vcpu->arch.dr7;
3024 break; 3114 break;
3025 default:
3026 val = 0;
3027 } 3115 }
3028 kvm_register_write(vcpu, reg, val); 3116 kvm_register_write(vcpu, reg, val);
3029 } else { 3117 } else {
@@ -3034,21 +3122,25 @@ static int handle_dr(struct kvm_vcpu *vcpu)
3034 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) 3122 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
3035 vcpu->arch.eff_db[dr] = val; 3123 vcpu->arch.eff_db[dr] = val;
3036 break; 3124 break;
3037 case 4 ... 5: 3125 case 4:
3038 if (vcpu->arch.cr4 & X86_CR4_DE) 3126 if (check_dr_alias(vcpu) < 0)
3039 kvm_queue_exception(vcpu, UD_VECTOR); 3127 return 1;
3040 break; 3128 /* fall through */
3041 case 6: 3129 case 6:
3042 if (val & 0xffffffff00000000ULL) { 3130 if (val & 0xffffffff00000000ULL) {
3043 kvm_queue_exception(vcpu, GP_VECTOR); 3131 kvm_inject_gp(vcpu, 0);
3044 break; 3132 return 1;
3045 } 3133 }
3046 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 3134 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
3047 break; 3135 break;
3048 case 7: 3136 case 5:
3137 if (check_dr_alias(vcpu) < 0)
3138 return 1;
3139 /* fall through */
3140 default: /* 7 */
3049 if (val & 0xffffffff00000000ULL) { 3141 if (val & 0xffffffff00000000ULL) {
3050 kvm_queue_exception(vcpu, GP_VECTOR); 3142 kvm_inject_gp(vcpu, 0);
3051 break; 3143 return 1;
3052 } 3144 }
3053 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; 3145 vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
3054 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { 3146 if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
@@ -3075,6 +3167,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
3075 u64 data; 3167 u64 data;
3076 3168
3077 if (vmx_get_msr(vcpu, ecx, &data)) { 3169 if (vmx_get_msr(vcpu, ecx, &data)) {
3170 trace_kvm_msr_read_ex(ecx);
3078 kvm_inject_gp(vcpu, 0); 3171 kvm_inject_gp(vcpu, 0);
3079 return 1; 3172 return 1;
3080 } 3173 }
@@ -3094,13 +3187,13 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
3094 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) 3187 u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
3095 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); 3188 | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
3096 3189
3097 trace_kvm_msr_write(ecx, data);
3098
3099 if (vmx_set_msr(vcpu, ecx, data) != 0) { 3190 if (vmx_set_msr(vcpu, ecx, data) != 0) {
3191 trace_kvm_msr_write_ex(ecx, data);
3100 kvm_inject_gp(vcpu, 0); 3192 kvm_inject_gp(vcpu, 0);
3101 return 1; 3193 return 1;
3102 } 3194 }
3103 3195
3196 trace_kvm_msr_write(ecx, data);
3104 skip_emulated_instruction(vcpu); 3197 skip_emulated_instruction(vcpu);
3105 return 1; 3198 return 1;
3106} 3199}
@@ -3385,7 +3478,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
3385 } 3478 }
3386 3479
3387 if (err != EMULATE_DONE) { 3480 if (err != EMULATE_DONE) {
3388 kvm_report_emulation_failure(vcpu, "emulation failure");
3389 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 3481 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3390 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 3482 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
3391 vcpu->run->internal.ndata = 0; 3483 vcpu->run->internal.ndata = 0;
@@ -3416,6 +3508,12 @@ static int handle_pause(struct kvm_vcpu *vcpu)
3416 return 1; 3508 return 1;
3417} 3509}
3418 3510
3511static int handle_invalid_op(struct kvm_vcpu *vcpu)
3512{
3513 kvm_queue_exception(vcpu, UD_VECTOR);
3514 return 1;
3515}
3516
3419/* 3517/*
3420 * The exit handlers return 1 if the exit was handled fully and guest execution 3518 * The exit handlers return 1 if the exit was handled fully and guest execution
3421 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 3519 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -3453,6 +3551,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
3453 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, 3551 [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
3454 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, 3552 [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
3455 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, 3553 [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
3554 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op,
3555 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op,
3456}; 3556};
3457 3557
3458static const int kvm_vmx_max_exit_handlers = 3558static const int kvm_vmx_max_exit_handlers =
@@ -3686,9 +3786,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
3686 */ 3786 */
3687 vmcs_writel(HOST_CR0, read_cr0()); 3787 vmcs_writel(HOST_CR0, read_cr0());
3688 3788
3689 if (vcpu->arch.switch_db_regs)
3690 set_debugreg(vcpu->arch.dr6, 6);
3691
3692 asm( 3789 asm(
3693 /* Store host registers */ 3790 /* Store host registers */
3694 "push %%"R"dx; push %%"R"bp;" 3791 "push %%"R"dx; push %%"R"bp;"
@@ -3789,9 +3886,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
3789 | (1 << VCPU_EXREG_PDPTR)); 3886 | (1 << VCPU_EXREG_PDPTR));
3790 vcpu->arch.regs_dirty = 0; 3887 vcpu->arch.regs_dirty = 0;
3791 3888
3792 if (vcpu->arch.switch_db_regs)
3793 get_debugreg(vcpu->arch.dr6, 6);
3794
3795 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 3889 vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
3796 if (vmx->rmode.irq.pending) 3890 if (vmx->rmode.irq.pending)
3797 fixup_rmode_irq(vmx); 3891 fixup_rmode_irq(vmx);
@@ -3920,7 +4014,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3920 * b. VT-d with snooping control feature: snooping control feature of 4014 * b. VT-d with snooping control feature: snooping control feature of
3921 * VT-d engine can guarantee the cache correctness. Just set it 4015 * VT-d engine can guarantee the cache correctness. Just set it
3922 * to WB to keep consistent with host. So the same as item 3. 4016 * to WB to keep consistent with host. So the same as item 3.
3923 * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep 4017 * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
3924 * consistent with host MTRR 4018 * consistent with host MTRR
3925 */ 4019 */
3926 if (is_mmio) 4020 if (is_mmio)
@@ -3931,37 +4025,88 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3931 VMX_EPT_MT_EPTE_SHIFT; 4025 VMX_EPT_MT_EPTE_SHIFT;
3932 else 4026 else
3933 ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) 4027 ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
3934 | VMX_EPT_IGMT_BIT; 4028 | VMX_EPT_IPAT_BIT;
3935 4029
3936 return ret; 4030 return ret;
3937} 4031}
3938 4032
4033#define _ER(x) { EXIT_REASON_##x, #x }
4034
3939static const struct trace_print_flags vmx_exit_reasons_str[] = { 4035static const struct trace_print_flags vmx_exit_reasons_str[] = {
3940 { EXIT_REASON_EXCEPTION_NMI, "exception" }, 4036 _ER(EXCEPTION_NMI),
3941 { EXIT_REASON_EXTERNAL_INTERRUPT, "ext_irq" }, 4037 _ER(EXTERNAL_INTERRUPT),
3942 { EXIT_REASON_TRIPLE_FAULT, "triple_fault" }, 4038 _ER(TRIPLE_FAULT),
3943 { EXIT_REASON_NMI_WINDOW, "nmi_window" }, 4039 _ER(PENDING_INTERRUPT),
3944 { EXIT_REASON_IO_INSTRUCTION, "io_instruction" }, 4040 _ER(NMI_WINDOW),
3945 { EXIT_REASON_CR_ACCESS, "cr_access" }, 4041 _ER(TASK_SWITCH),
3946 { EXIT_REASON_DR_ACCESS, "dr_access" }, 4042 _ER(CPUID),
3947 { EXIT_REASON_CPUID, "cpuid" }, 4043 _ER(HLT),
3948 { EXIT_REASON_MSR_READ, "rdmsr" }, 4044 _ER(INVLPG),
3949 { EXIT_REASON_MSR_WRITE, "wrmsr" }, 4045 _ER(RDPMC),
3950 { EXIT_REASON_PENDING_INTERRUPT, "interrupt_window" }, 4046 _ER(RDTSC),
3951 { EXIT_REASON_HLT, "halt" }, 4047 _ER(VMCALL),
3952 { EXIT_REASON_INVLPG, "invlpg" }, 4048 _ER(VMCLEAR),
3953 { EXIT_REASON_VMCALL, "hypercall" }, 4049 _ER(VMLAUNCH),
3954 { EXIT_REASON_TPR_BELOW_THRESHOLD, "tpr_below_thres" }, 4050 _ER(VMPTRLD),
3955 { EXIT_REASON_APIC_ACCESS, "apic_access" }, 4051 _ER(VMPTRST),
3956 { EXIT_REASON_WBINVD, "wbinvd" }, 4052 _ER(VMREAD),
3957 { EXIT_REASON_TASK_SWITCH, "task_switch" }, 4053 _ER(VMRESUME),
3958 { EXIT_REASON_EPT_VIOLATION, "ept_violation" }, 4054 _ER(VMWRITE),
4055 _ER(VMOFF),
4056 _ER(VMON),
4057 _ER(CR_ACCESS),
4058 _ER(DR_ACCESS),
4059 _ER(IO_INSTRUCTION),
4060 _ER(MSR_READ),
4061 _ER(MSR_WRITE),
4062 _ER(MWAIT_INSTRUCTION),
4063 _ER(MONITOR_INSTRUCTION),
4064 _ER(PAUSE_INSTRUCTION),
4065 _ER(MCE_DURING_VMENTRY),
4066 _ER(TPR_BELOW_THRESHOLD),
4067 _ER(APIC_ACCESS),
4068 _ER(EPT_VIOLATION),
4069 _ER(EPT_MISCONFIG),
4070 _ER(WBINVD),
3959 { -1, NULL } 4071 { -1, NULL }
3960}; 4072};
3961 4073
3962static bool vmx_gb_page_enable(void) 4074#undef _ER
4075
4076static int vmx_get_lpage_level(void)
4077{
4078 if (enable_ept && !cpu_has_vmx_ept_1g_page())
4079 return PT_DIRECTORY_LEVEL;
4080 else
4081 /* For shadow and EPT supported 1GB page */
4082 return PT_PDPE_LEVEL;
4083}
4084
4085static inline u32 bit(int bitno)
4086{
4087 return 1 << (bitno & 31);
4088}
4089
4090static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
3963{ 4091{
3964 return false; 4092 struct kvm_cpuid_entry2 *best;
4093 struct vcpu_vmx *vmx = to_vmx(vcpu);
4094 u32 exec_control;
4095
4096 vmx->rdtscp_enabled = false;
4097 if (vmx_rdtscp_supported()) {
4098 exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
4099 if (exec_control & SECONDARY_EXEC_RDTSCP) {
4100 best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
4101 if (best && (best->edx & bit(X86_FEATURE_RDTSCP)))
4102 vmx->rdtscp_enabled = true;
4103 else {
4104 exec_control &= ~SECONDARY_EXEC_RDTSCP;
4105 vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
4106 exec_control);
4107 }
4108 }
4109 }
3965} 4110}
3966 4111
3967static struct kvm_x86_ops vmx_x86_ops = { 4112static struct kvm_x86_ops vmx_x86_ops = {
@@ -3990,6 +4135,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
3990 .set_segment = vmx_set_segment, 4135 .set_segment = vmx_set_segment,
3991 .get_cpl = vmx_get_cpl, 4136 .get_cpl = vmx_get_cpl,
3992 .get_cs_db_l_bits = vmx_get_cs_db_l_bits, 4137 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
4138 .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
3993 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, 4139 .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
3994 .set_cr0 = vmx_set_cr0, 4140 .set_cr0 = vmx_set_cr0,
3995 .set_cr3 = vmx_set_cr3, 4141 .set_cr3 = vmx_set_cr3,
@@ -4002,6 +4148,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
4002 .cache_reg = vmx_cache_reg, 4148 .cache_reg = vmx_cache_reg,
4003 .get_rflags = vmx_get_rflags, 4149 .get_rflags = vmx_get_rflags,
4004 .set_rflags = vmx_set_rflags, 4150 .set_rflags = vmx_set_rflags,
4151 .fpu_activate = vmx_fpu_activate,
4152 .fpu_deactivate = vmx_fpu_deactivate,
4005 4153
4006 .tlb_flush = vmx_flush_tlb, 4154 .tlb_flush = vmx_flush_tlb,
4007 4155
@@ -4027,7 +4175,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
4027 .get_mt_mask = vmx_get_mt_mask, 4175 .get_mt_mask = vmx_get_mt_mask,
4028 4176
4029 .exit_reasons_str = vmx_exit_reasons_str, 4177 .exit_reasons_str = vmx_exit_reasons_str,
4030 .gb_page_enable = vmx_gb_page_enable, 4178 .get_lpage_level = vmx_get_lpage_level,
4179
4180 .cpuid_update = vmx_cpuid_update,
4181
4182 .rdtscp_supported = vmx_rdtscp_supported,
4031}; 4183};
4032 4184
4033static int __init vmx_init(void) 4185static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a1e1bc9d412d..e46282a56565 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -38,6 +38,7 @@
38#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
39#include <linux/cpufreq.h> 39#include <linux/cpufreq.h>
40#include <linux/user-return-notifier.h> 40#include <linux/user-return-notifier.h>
41#include <linux/srcu.h>
41#include <trace/events/kvm.h> 42#include <trace/events/kvm.h>
42#undef TRACE_INCLUDE_FILE 43#undef TRACE_INCLUDE_FILE
43#define CREATE_TRACE_POINTS 44#define CREATE_TRACE_POINTS
@@ -93,16 +94,16 @@ module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
93 94
94struct kvm_shared_msrs_global { 95struct kvm_shared_msrs_global {
95 int nr; 96 int nr;
96 struct kvm_shared_msr { 97 u32 msrs[KVM_NR_SHARED_MSRS];
97 u32 msr;
98 u64 value;
99 } msrs[KVM_NR_SHARED_MSRS];
100}; 98};
101 99
102struct kvm_shared_msrs { 100struct kvm_shared_msrs {
103 struct user_return_notifier urn; 101 struct user_return_notifier urn;
104 bool registered; 102 bool registered;
105 u64 current_value[KVM_NR_SHARED_MSRS]; 103 struct kvm_shared_msr_values {
104 u64 host;
105 u64 curr;
106 } values[KVM_NR_SHARED_MSRS];
106}; 107};
107 108
108static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; 109static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
@@ -147,53 +148,64 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
147static void kvm_on_user_return(struct user_return_notifier *urn) 148static void kvm_on_user_return(struct user_return_notifier *urn)
148{ 149{
149 unsigned slot; 150 unsigned slot;
150 struct kvm_shared_msr *global;
151 struct kvm_shared_msrs *locals 151 struct kvm_shared_msrs *locals
152 = container_of(urn, struct kvm_shared_msrs, urn); 152 = container_of(urn, struct kvm_shared_msrs, urn);
153 struct kvm_shared_msr_values *values;
153 154
154 for (slot = 0; slot < shared_msrs_global.nr; ++slot) { 155 for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
155 global = &shared_msrs_global.msrs[slot]; 156 values = &locals->values[slot];
156 if (global->value != locals->current_value[slot]) { 157 if (values->host != values->curr) {
157 wrmsrl(global->msr, global->value); 158 wrmsrl(shared_msrs_global.msrs[slot], values->host);
158 locals->current_value[slot] = global->value; 159 values->curr = values->host;
159 } 160 }
160 } 161 }
161 locals->registered = false; 162 locals->registered = false;
162 user_return_notifier_unregister(urn); 163 user_return_notifier_unregister(urn);
163} 164}
164 165
165void kvm_define_shared_msr(unsigned slot, u32 msr) 166static void shared_msr_update(unsigned slot, u32 msr)
166{ 167{
167 int cpu; 168 struct kvm_shared_msrs *smsr;
168 u64 value; 169 u64 value;
169 170
171 smsr = &__get_cpu_var(shared_msrs);
172 /* only read, and nobody should modify it at this time,
173 * so don't need lock */
174 if (slot >= shared_msrs_global.nr) {
175 printk(KERN_ERR "kvm: invalid MSR slot!");
176 return;
177 }
178 rdmsrl_safe(msr, &value);
179 smsr->values[slot].host = value;
180 smsr->values[slot].curr = value;
181}
182
183void kvm_define_shared_msr(unsigned slot, u32 msr)
184{
170 if (slot >= shared_msrs_global.nr) 185 if (slot >= shared_msrs_global.nr)
171 shared_msrs_global.nr = slot + 1; 186 shared_msrs_global.nr = slot + 1;
172 shared_msrs_global.msrs[slot].msr = msr; 187 shared_msrs_global.msrs[slot] = msr;
173 rdmsrl_safe(msr, &value); 188 /* we need ensured the shared_msr_global have been updated */
174 shared_msrs_global.msrs[slot].value = value; 189 smp_wmb();
175 for_each_online_cpu(cpu)
176 per_cpu(shared_msrs, cpu).current_value[slot] = value;
177} 190}
178EXPORT_SYMBOL_GPL(kvm_define_shared_msr); 191EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
179 192
180static void kvm_shared_msr_cpu_online(void) 193static void kvm_shared_msr_cpu_online(void)
181{ 194{
182 unsigned i; 195 unsigned i;
183 struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs);
184 196
185 for (i = 0; i < shared_msrs_global.nr; ++i) 197 for (i = 0; i < shared_msrs_global.nr; ++i)
186 locals->current_value[i] = shared_msrs_global.msrs[i].value; 198 shared_msr_update(i, shared_msrs_global.msrs[i]);
187} 199}
188 200
189void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) 201void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
190{ 202{
191 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); 203 struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
192 204
193 if (((value ^ smsr->current_value[slot]) & mask) == 0) 205 if (((value ^ smsr->values[slot].curr) & mask) == 0)
194 return; 206 return;
195 smsr->current_value[slot] = value; 207 smsr->values[slot].curr = value;
196 wrmsrl(shared_msrs_global.msrs[slot].msr, value); 208 wrmsrl(shared_msrs_global.msrs[slot], value);
197 if (!smsr->registered) { 209 if (!smsr->registered) {
198 smsr->urn.on_user_return = kvm_on_user_return; 210 smsr->urn.on_user_return = kvm_on_user_return;
199 user_return_notifier_register(&smsr->urn); 211 user_return_notifier_register(&smsr->urn);
@@ -257,12 +269,68 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
257} 269}
258EXPORT_SYMBOL_GPL(kvm_set_apic_base); 270EXPORT_SYMBOL_GPL(kvm_set_apic_base);
259 271
272#define EXCPT_BENIGN 0
273#define EXCPT_CONTRIBUTORY 1
274#define EXCPT_PF 2
275
276static int exception_class(int vector)
277{
278 switch (vector) {
279 case PF_VECTOR:
280 return EXCPT_PF;
281 case DE_VECTOR:
282 case TS_VECTOR:
283 case NP_VECTOR:
284 case SS_VECTOR:
285 case GP_VECTOR:
286 return EXCPT_CONTRIBUTORY;
287 default:
288 break;
289 }
290 return EXCPT_BENIGN;
291}
292
293static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
294 unsigned nr, bool has_error, u32 error_code)
295{
296 u32 prev_nr;
297 int class1, class2;
298
299 if (!vcpu->arch.exception.pending) {
300 queue:
301 vcpu->arch.exception.pending = true;
302 vcpu->arch.exception.has_error_code = has_error;
303 vcpu->arch.exception.nr = nr;
304 vcpu->arch.exception.error_code = error_code;
305 return;
306 }
307
308 /* to check exception */
309 prev_nr = vcpu->arch.exception.nr;
310 if (prev_nr == DF_VECTOR) {
311 /* triple fault -> shutdown */
312 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
313 return;
314 }
315 class1 = exception_class(prev_nr);
316 class2 = exception_class(nr);
317 if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY)
318 || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) {
319 /* generate double fault per SDM Table 5-5 */
320 vcpu->arch.exception.pending = true;
321 vcpu->arch.exception.has_error_code = true;
322 vcpu->arch.exception.nr = DF_VECTOR;
323 vcpu->arch.exception.error_code = 0;
324 } else
325 /* replace previous exception with a new one in a hope
326 that instruction re-execution will regenerate lost
327 exception */
328 goto queue;
329}
330
260void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) 331void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
261{ 332{
262 WARN_ON(vcpu->arch.exception.pending); 333 kvm_multiple_exception(vcpu, nr, false, 0);
263 vcpu->arch.exception.pending = true;
264 vcpu->arch.exception.has_error_code = false;
265 vcpu->arch.exception.nr = nr;
266} 334}
267EXPORT_SYMBOL_GPL(kvm_queue_exception); 335EXPORT_SYMBOL_GPL(kvm_queue_exception);
268 336
@@ -270,25 +338,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
270 u32 error_code) 338 u32 error_code)
271{ 339{
272 ++vcpu->stat.pf_guest; 340 ++vcpu->stat.pf_guest;
273
274 if (vcpu->arch.exception.pending) {
275 switch(vcpu->arch.exception.nr) {
276 case DF_VECTOR:
277 /* triple fault -> shutdown */
278 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
279 return;
280 case PF_VECTOR:
281 vcpu->arch.exception.nr = DF_VECTOR;
282 vcpu->arch.exception.error_code = 0;
283 return;
284 default:
285 /* replace previous exception with a new one in a hope
286 that instruction re-execution will regenerate lost
287 exception */
288 vcpu->arch.exception.pending = false;
289 break;
290 }
291 }
292 vcpu->arch.cr2 = addr; 341 vcpu->arch.cr2 = addr;
293 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); 342 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
294} 343}
@@ -301,11 +350,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi);
301 350
302void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) 351void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
303{ 352{
304 WARN_ON(vcpu->arch.exception.pending); 353 kvm_multiple_exception(vcpu, nr, true, error_code);
305 vcpu->arch.exception.pending = true;
306 vcpu->arch.exception.has_error_code = true;
307 vcpu->arch.exception.nr = nr;
308 vcpu->arch.exception.error_code = error_code;
309} 354}
310EXPORT_SYMBOL_GPL(kvm_queue_exception_e); 355EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
311 356
@@ -383,12 +428,18 @@ out:
383 428
384void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 429void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
385{ 430{
386 if (cr0 & CR0_RESERVED_BITS) { 431 cr0 |= X86_CR0_ET;
432
433#ifdef CONFIG_X86_64
434 if (cr0 & 0xffffffff00000000UL) {
387 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", 435 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
388 cr0, vcpu->arch.cr0); 436 cr0, kvm_read_cr0(vcpu));
389 kvm_inject_gp(vcpu, 0); 437 kvm_inject_gp(vcpu, 0);
390 return; 438 return;
391 } 439 }
440#endif
441
442 cr0 &= ~CR0_RESERVED_BITS;
392 443
393 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { 444 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
394 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); 445 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
@@ -405,7 +456,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
405 456
406 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 457 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
407#ifdef CONFIG_X86_64 458#ifdef CONFIG_X86_64
408 if ((vcpu->arch.shadow_efer & EFER_LME)) { 459 if ((vcpu->arch.efer & EFER_LME)) {
409 int cs_db, cs_l; 460 int cs_db, cs_l;
410 461
411 if (!is_pae(vcpu)) { 462 if (!is_pae(vcpu)) {
@@ -443,13 +494,13 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
443 494
444void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 495void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
445{ 496{
446 kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); 497 kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f));
447} 498}
448EXPORT_SYMBOL_GPL(kvm_lmsw); 499EXPORT_SYMBOL_GPL(kvm_lmsw);
449 500
450void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 501void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
451{ 502{
452 unsigned long old_cr4 = vcpu->arch.cr4; 503 unsigned long old_cr4 = kvm_read_cr4(vcpu);
453 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; 504 unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;
454 505
455 if (cr4 & CR4_RESERVED_BITS) { 506 if (cr4 & CR4_RESERVED_BITS) {
@@ -575,9 +626,11 @@ static inline u32 bit(int bitno)
575 * kvm-specific. Those are put in the beginning of the list. 626 * kvm-specific. Those are put in the beginning of the list.
576 */ 627 */
577 628
578#define KVM_SAVE_MSRS_BEGIN 2 629#define KVM_SAVE_MSRS_BEGIN 5
579static u32 msrs_to_save[] = { 630static u32 msrs_to_save[] = {
580 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 631 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
632 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
633 HV_X64_MSR_APIC_ASSIST_PAGE,
581 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 634 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
582 MSR_K6_STAR, 635 MSR_K6_STAR,
583#ifdef CONFIG_X86_64 636#ifdef CONFIG_X86_64
@@ -602,7 +655,7 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
602 } 655 }
603 656
604 if (is_paging(vcpu) 657 if (is_paging(vcpu)
605 && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) { 658 && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) {
606 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); 659 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
607 kvm_inject_gp(vcpu, 0); 660 kvm_inject_gp(vcpu, 0);
608 return; 661 return;
@@ -633,9 +686,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
633 kvm_x86_ops->set_efer(vcpu, efer); 686 kvm_x86_ops->set_efer(vcpu, efer);
634 687
635 efer &= ~EFER_LMA; 688 efer &= ~EFER_LMA;
636 efer |= vcpu->arch.shadow_efer & EFER_LMA; 689 efer |= vcpu->arch.efer & EFER_LMA;
637 690
638 vcpu->arch.shadow_efer = efer; 691 vcpu->arch.efer = efer;
639 692
640 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; 693 vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled;
641 kvm_mmu_reset_context(vcpu); 694 kvm_mmu_reset_context(vcpu);
@@ -957,6 +1010,100 @@ out:
957 return r; 1010 return r;
958} 1011}
959 1012
1013static bool kvm_hv_hypercall_enabled(struct kvm *kvm)
1014{
1015 return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
1016}
1017
1018static bool kvm_hv_msr_partition_wide(u32 msr)
1019{
1020 bool r = false;
1021 switch (msr) {
1022 case HV_X64_MSR_GUEST_OS_ID:
1023 case HV_X64_MSR_HYPERCALL:
1024 r = true;
1025 break;
1026 }
1027
1028 return r;
1029}
1030
1031static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1032{
1033 struct kvm *kvm = vcpu->kvm;
1034
1035 switch (msr) {
1036 case HV_X64_MSR_GUEST_OS_ID:
1037 kvm->arch.hv_guest_os_id = data;
1038 /* setting guest os id to zero disables hypercall page */
1039 if (!kvm->arch.hv_guest_os_id)
1040 kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
1041 break;
1042 case HV_X64_MSR_HYPERCALL: {
1043 u64 gfn;
1044 unsigned long addr;
1045 u8 instructions[4];
1046
1047 /* if guest os id is not set hypercall should remain disabled */
1048 if (!kvm->arch.hv_guest_os_id)
1049 break;
1050 if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
1051 kvm->arch.hv_hypercall = data;
1052 break;
1053 }
1054 gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
1055 addr = gfn_to_hva(kvm, gfn);
1056 if (kvm_is_error_hva(addr))
1057 return 1;
1058 kvm_x86_ops->patch_hypercall(vcpu, instructions);
1059 ((unsigned char *)instructions)[3] = 0xc3; /* ret */
1060 if (copy_to_user((void __user *)addr, instructions, 4))
1061 return 1;
1062 kvm->arch.hv_hypercall = data;
1063 break;
1064 }
1065 default:
1066 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1067 "data 0x%llx\n", msr, data);
1068 return 1;
1069 }
1070 return 0;
1071}
1072
1073static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1074{
1075 switch (msr) {
1076 case HV_X64_MSR_APIC_ASSIST_PAGE: {
1077 unsigned long addr;
1078
1079 if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
1080 vcpu->arch.hv_vapic = data;
1081 break;
1082 }
1083 addr = gfn_to_hva(vcpu->kvm, data >>
1084 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
1085 if (kvm_is_error_hva(addr))
1086 return 1;
1087 if (clear_user((void __user *)addr, PAGE_SIZE))
1088 return 1;
1089 vcpu->arch.hv_vapic = data;
1090 break;
1091 }
1092 case HV_X64_MSR_EOI:
1093 return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
1094 case HV_X64_MSR_ICR:
1095 return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
1096 case HV_X64_MSR_TPR:
1097 return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
1098 default:
1099 pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
1100 "data 0x%llx\n", msr, data);
1101 return 1;
1102 }
1103
1104 return 0;
1105}
1106
960int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) 1107int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
961{ 1108{
962 switch (msr) { 1109 switch (msr) {
@@ -1071,6 +1218,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1071 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " 1218 pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
1072 "0x%x data 0x%llx\n", msr, data); 1219 "0x%x data 0x%llx\n", msr, data);
1073 break; 1220 break;
1221 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1222 if (kvm_hv_msr_partition_wide(msr)) {
1223 int r;
1224 mutex_lock(&vcpu->kvm->lock);
1225 r = set_msr_hyperv_pw(vcpu, msr, data);
1226 mutex_unlock(&vcpu->kvm->lock);
1227 return r;
1228 } else
1229 return set_msr_hyperv(vcpu, msr, data);
1230 break;
1074 default: 1231 default:
1075 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) 1232 if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
1076 return xen_hvm_config(vcpu, data); 1233 return xen_hvm_config(vcpu, data);
@@ -1170,6 +1327,54 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1170 return 0; 1327 return 0;
1171} 1328}
1172 1329
1330static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1331{
1332 u64 data = 0;
1333 struct kvm *kvm = vcpu->kvm;
1334
1335 switch (msr) {
1336 case HV_X64_MSR_GUEST_OS_ID:
1337 data = kvm->arch.hv_guest_os_id;
1338 break;
1339 case HV_X64_MSR_HYPERCALL:
1340 data = kvm->arch.hv_hypercall;
1341 break;
1342 default:
1343 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1344 return 1;
1345 }
1346
1347 *pdata = data;
1348 return 0;
1349}
1350
1351static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1352{
1353 u64 data = 0;
1354
1355 switch (msr) {
1356 case HV_X64_MSR_VP_INDEX: {
1357 int r;
1358 struct kvm_vcpu *v;
1359 kvm_for_each_vcpu(r, v, vcpu->kvm)
1360 if (v == vcpu)
1361 data = r;
1362 break;
1363 }
1364 case HV_X64_MSR_EOI:
1365 return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
1366 case HV_X64_MSR_ICR:
1367 return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
1368 case HV_X64_MSR_TPR:
1369 return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
1370 default:
1371 pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
1372 return 1;
1373 }
1374 *pdata = data;
1375 return 0;
1376}
1377
1173int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) 1378int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1174{ 1379{
1175 u64 data; 1380 u64 data;
@@ -1221,7 +1426,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1221 data |= (((uint64_t)4ULL) << 40); 1426 data |= (((uint64_t)4ULL) << 40);
1222 break; 1427 break;
1223 case MSR_EFER: 1428 case MSR_EFER:
1224 data = vcpu->arch.shadow_efer; 1429 data = vcpu->arch.efer;
1225 break; 1430 break;
1226 case MSR_KVM_WALL_CLOCK: 1431 case MSR_KVM_WALL_CLOCK:
1227 data = vcpu->kvm->arch.wall_clock; 1432 data = vcpu->kvm->arch.wall_clock;
@@ -1236,6 +1441,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
1236 case MSR_IA32_MCG_STATUS: 1441 case MSR_IA32_MCG_STATUS:
1237 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: 1442 case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
1238 return get_msr_mce(vcpu, msr, pdata); 1443 return get_msr_mce(vcpu, msr, pdata);
1444 case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
1445 if (kvm_hv_msr_partition_wide(msr)) {
1446 int r;
1447 mutex_lock(&vcpu->kvm->lock);
1448 r = get_msr_hyperv_pw(vcpu, msr, pdata);
1449 mutex_unlock(&vcpu->kvm->lock);
1450 return r;
1451 } else
1452 return get_msr_hyperv(vcpu, msr, pdata);
1453 break;
1239 default: 1454 default:
1240 if (!ignore_msrs) { 1455 if (!ignore_msrs) {
1241 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); 1456 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
@@ -1261,15 +1476,15 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
1261 int (*do_msr)(struct kvm_vcpu *vcpu, 1476 int (*do_msr)(struct kvm_vcpu *vcpu,
1262 unsigned index, u64 *data)) 1477 unsigned index, u64 *data))
1263{ 1478{
1264 int i; 1479 int i, idx;
1265 1480
1266 vcpu_load(vcpu); 1481 vcpu_load(vcpu);
1267 1482
1268 down_read(&vcpu->kvm->slots_lock); 1483 idx = srcu_read_lock(&vcpu->kvm->srcu);
1269 for (i = 0; i < msrs->nmsrs; ++i) 1484 for (i = 0; i < msrs->nmsrs; ++i)
1270 if (do_msr(vcpu, entries[i].index, &entries[i].data)) 1485 if (do_msr(vcpu, entries[i].index, &entries[i].data))
1271 break; 1486 break;
1272 up_read(&vcpu->kvm->slots_lock); 1487 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1273 1488
1274 vcpu_put(vcpu); 1489 vcpu_put(vcpu);
1275 1490
@@ -1351,6 +1566,11 @@ int kvm_dev_ioctl_check_extension(long ext)
1351 case KVM_CAP_XEN_HVM: 1566 case KVM_CAP_XEN_HVM:
1352 case KVM_CAP_ADJUST_CLOCK: 1567 case KVM_CAP_ADJUST_CLOCK:
1353 case KVM_CAP_VCPU_EVENTS: 1568 case KVM_CAP_VCPU_EVENTS:
1569 case KVM_CAP_HYPERV:
1570 case KVM_CAP_HYPERV_VAPIC:
1571 case KVM_CAP_HYPERV_SPIN:
1572 case KVM_CAP_PCI_SEGMENT:
1573 case KVM_CAP_X86_ROBUST_SINGLESTEP:
1354 r = 1; 1574 r = 1;
1355 break; 1575 break;
1356 case KVM_CAP_COALESCED_MMIO: 1576 case KVM_CAP_COALESCED_MMIO:
@@ -1464,8 +1684,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1464 1684
1465void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 1685void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1466{ 1686{
1467 kvm_x86_ops->vcpu_put(vcpu);
1468 kvm_put_guest_fpu(vcpu); 1687 kvm_put_guest_fpu(vcpu);
1688 kvm_x86_ops->vcpu_put(vcpu);
1469} 1689}
1470 1690
1471static int is_efer_nx(void) 1691static int is_efer_nx(void)
@@ -1530,6 +1750,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
1530 cpuid_fix_nx_cap(vcpu); 1750 cpuid_fix_nx_cap(vcpu);
1531 r = 0; 1751 r = 0;
1532 kvm_apic_set_version(vcpu); 1752 kvm_apic_set_version(vcpu);
1753 kvm_x86_ops->cpuid_update(vcpu);
1533 1754
1534out_free: 1755out_free:
1535 vfree(cpuid_entries); 1756 vfree(cpuid_entries);
@@ -1552,6 +1773,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
1552 goto out; 1773 goto out;
1553 vcpu->arch.cpuid_nent = cpuid->nent; 1774 vcpu->arch.cpuid_nent = cpuid->nent;
1554 kvm_apic_set_version(vcpu); 1775 kvm_apic_set_version(vcpu);
1776 kvm_x86_ops->cpuid_update(vcpu);
1555 return 0; 1777 return 0;
1556 1778
1557out: 1779out:
@@ -1594,12 +1816,15 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1594 u32 index, int *nent, int maxnent) 1816 u32 index, int *nent, int maxnent)
1595{ 1817{
1596 unsigned f_nx = is_efer_nx() ? F(NX) : 0; 1818 unsigned f_nx = is_efer_nx() ? F(NX) : 0;
1597 unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0;
1598#ifdef CONFIG_X86_64 1819#ifdef CONFIG_X86_64
1820 unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
1821 ? F(GBPAGES) : 0;
1599 unsigned f_lm = F(LM); 1822 unsigned f_lm = F(LM);
1600#else 1823#else
1824 unsigned f_gbpages = 0;
1601 unsigned f_lm = 0; 1825 unsigned f_lm = 0;
1602#endif 1826#endif
1827 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
1603 1828
1604 /* cpuid 1.edx */ 1829 /* cpuid 1.edx */
1605 const u32 kvm_supported_word0_x86_features = 1830 const u32 kvm_supported_word0_x86_features =
@@ -1619,7 +1844,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1619 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | 1844 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
1620 F(PAT) | F(PSE36) | 0 /* Reserved */ | 1845 F(PAT) | F(PSE36) | 0 /* Reserved */ |
1621 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | 1846 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
1622 F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ | 1847 F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
1623 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); 1848 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
1624 /* cpuid 1.ecx */ 1849 /* cpuid 1.ecx */
1625 const u32 kvm_supported_word4_x86_features = 1850 const u32 kvm_supported_word4_x86_features =
@@ -1866,7 +2091,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
1866 return 0; 2091 return 0;
1867 if (mce->status & MCI_STATUS_UC) { 2092 if (mce->status & MCI_STATUS_UC) {
1868 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || 2093 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
1869 !(vcpu->arch.cr4 & X86_CR4_MCE)) { 2094 !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
1870 printk(KERN_DEBUG "kvm: set_mce: " 2095 printk(KERN_DEBUG "kvm: set_mce: "
1871 "injects mce exception while " 2096 "injects mce exception while "
1872 "previous one is in progress!\n"); 2097 "previous one is in progress!\n");
@@ -2160,14 +2385,14 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
2160 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) 2385 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
2161 return -EINVAL; 2386 return -EINVAL;
2162 2387
2163 down_write(&kvm->slots_lock); 2388 mutex_lock(&kvm->slots_lock);
2164 spin_lock(&kvm->mmu_lock); 2389 spin_lock(&kvm->mmu_lock);
2165 2390
2166 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); 2391 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
2167 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; 2392 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
2168 2393
2169 spin_unlock(&kvm->mmu_lock); 2394 spin_unlock(&kvm->mmu_lock);
2170 up_write(&kvm->slots_lock); 2395 mutex_unlock(&kvm->slots_lock);
2171 return 0; 2396 return 0;
2172} 2397}
2173 2398
@@ -2176,13 +2401,35 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
2176 return kvm->arch.n_alloc_mmu_pages; 2401 return kvm->arch.n_alloc_mmu_pages;
2177} 2402}
2178 2403
2404gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn)
2405{
2406 int i;
2407 struct kvm_mem_alias *alias;
2408 struct kvm_mem_aliases *aliases;
2409
2410 aliases = rcu_dereference(kvm->arch.aliases);
2411
2412 for (i = 0; i < aliases->naliases; ++i) {
2413 alias = &aliases->aliases[i];
2414 if (alias->flags & KVM_ALIAS_INVALID)
2415 continue;
2416 if (gfn >= alias->base_gfn
2417 && gfn < alias->base_gfn + alias->npages)
2418 return alias->target_gfn + gfn - alias->base_gfn;
2419 }
2420 return gfn;
2421}
2422
2179gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) 2423gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
2180{ 2424{
2181 int i; 2425 int i;
2182 struct kvm_mem_alias *alias; 2426 struct kvm_mem_alias *alias;
2427 struct kvm_mem_aliases *aliases;
2183 2428
2184 for (i = 0; i < kvm->arch.naliases; ++i) { 2429 aliases = rcu_dereference(kvm->arch.aliases);
2185 alias = &kvm->arch.aliases[i]; 2430
2431 for (i = 0; i < aliases->naliases; ++i) {
2432 alias = &aliases->aliases[i];
2186 if (gfn >= alias->base_gfn 2433 if (gfn >= alias->base_gfn
2187 && gfn < alias->base_gfn + alias->npages) 2434 && gfn < alias->base_gfn + alias->npages)
2188 return alias->target_gfn + gfn - alias->base_gfn; 2435 return alias->target_gfn + gfn - alias->base_gfn;
@@ -2200,6 +2447,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
2200{ 2447{
2201 int r, n; 2448 int r, n;
2202 struct kvm_mem_alias *p; 2449 struct kvm_mem_alias *p;
2450 struct kvm_mem_aliases *aliases, *old_aliases;
2203 2451
2204 r = -EINVAL; 2452 r = -EINVAL;
2205 /* General sanity checks */ 2453 /* General sanity checks */
@@ -2216,26 +2464,48 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
2216 < alias->target_phys_addr) 2464 < alias->target_phys_addr)
2217 goto out; 2465 goto out;
2218 2466
2219 down_write(&kvm->slots_lock); 2467 r = -ENOMEM;
2220 spin_lock(&kvm->mmu_lock); 2468 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
2469 if (!aliases)
2470 goto out;
2471
2472 mutex_lock(&kvm->slots_lock);
2221 2473
2222 p = &kvm->arch.aliases[alias->slot]; 2474 /* invalidate any gfn reference in case of deletion/shrinking */
2475 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
2476 aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID;
2477 old_aliases = kvm->arch.aliases;
2478 rcu_assign_pointer(kvm->arch.aliases, aliases);
2479 synchronize_srcu_expedited(&kvm->srcu);
2480 kvm_mmu_zap_all(kvm);
2481 kfree(old_aliases);
2482
2483 r = -ENOMEM;
2484 aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
2485 if (!aliases)
2486 goto out_unlock;
2487
2488 memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases));
2489
2490 p = &aliases->aliases[alias->slot];
2223 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; 2491 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
2224 p->npages = alias->memory_size >> PAGE_SHIFT; 2492 p->npages = alias->memory_size >> PAGE_SHIFT;
2225 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; 2493 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
2494 p->flags &= ~(KVM_ALIAS_INVALID);
2226 2495
2227 for (n = KVM_ALIAS_SLOTS; n > 0; --n) 2496 for (n = KVM_ALIAS_SLOTS; n > 0; --n)
2228 if (kvm->arch.aliases[n - 1].npages) 2497 if (aliases->aliases[n - 1].npages)
2229 break; 2498 break;
2230 kvm->arch.naliases = n; 2499 aliases->naliases = n;
2231 2500
2232 spin_unlock(&kvm->mmu_lock); 2501 old_aliases = kvm->arch.aliases;
2233 kvm_mmu_zap_all(kvm); 2502 rcu_assign_pointer(kvm->arch.aliases, aliases);
2234 2503 synchronize_srcu_expedited(&kvm->srcu);
2235 up_write(&kvm->slots_lock); 2504 kfree(old_aliases);
2236 2505 r = 0;
2237 return 0;
2238 2506
2507out_unlock:
2508 mutex_unlock(&kvm->slots_lock);
2239out: 2509out:
2240 return r; 2510 return r;
2241} 2511}
@@ -2273,18 +2543,18 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
2273 r = 0; 2543 r = 0;
2274 switch (chip->chip_id) { 2544 switch (chip->chip_id) {
2275 case KVM_IRQCHIP_PIC_MASTER: 2545 case KVM_IRQCHIP_PIC_MASTER:
2276 spin_lock(&pic_irqchip(kvm)->lock); 2546 raw_spin_lock(&pic_irqchip(kvm)->lock);
2277 memcpy(&pic_irqchip(kvm)->pics[0], 2547 memcpy(&pic_irqchip(kvm)->pics[0],
2278 &chip->chip.pic, 2548 &chip->chip.pic,
2279 sizeof(struct kvm_pic_state)); 2549 sizeof(struct kvm_pic_state));
2280 spin_unlock(&pic_irqchip(kvm)->lock); 2550 raw_spin_unlock(&pic_irqchip(kvm)->lock);
2281 break; 2551 break;
2282 case KVM_IRQCHIP_PIC_SLAVE: 2552 case KVM_IRQCHIP_PIC_SLAVE:
2283 spin_lock(&pic_irqchip(kvm)->lock); 2553 raw_spin_lock(&pic_irqchip(kvm)->lock);
2284 memcpy(&pic_irqchip(kvm)->pics[1], 2554 memcpy(&pic_irqchip(kvm)->pics[1],
2285 &chip->chip.pic, 2555 &chip->chip.pic,
2286 sizeof(struct kvm_pic_state)); 2556 sizeof(struct kvm_pic_state));
2287 spin_unlock(&pic_irqchip(kvm)->lock); 2557 raw_spin_unlock(&pic_irqchip(kvm)->lock);
2288 break; 2558 break;
2289 case KVM_IRQCHIP_IOAPIC: 2559 case KVM_IRQCHIP_IOAPIC:
2290 r = kvm_set_ioapic(kvm, &chip->chip.ioapic); 2560 r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
@@ -2364,29 +2634,62 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
2364int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 2634int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
2365 struct kvm_dirty_log *log) 2635 struct kvm_dirty_log *log)
2366{ 2636{
2367 int r; 2637 int r, n, i;
2368 int n;
2369 struct kvm_memory_slot *memslot; 2638 struct kvm_memory_slot *memslot;
2370 int is_dirty = 0; 2639 unsigned long is_dirty = 0;
2640 unsigned long *dirty_bitmap = NULL;
2371 2641
2372 down_write(&kvm->slots_lock); 2642 mutex_lock(&kvm->slots_lock);
2373 2643
2374 r = kvm_get_dirty_log(kvm, log, &is_dirty); 2644 r = -EINVAL;
2375 if (r) 2645 if (log->slot >= KVM_MEMORY_SLOTS)
2646 goto out;
2647
2648 memslot = &kvm->memslots->memslots[log->slot];
2649 r = -ENOENT;
2650 if (!memslot->dirty_bitmap)
2651 goto out;
2652
2653 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
2654
2655 r = -ENOMEM;
2656 dirty_bitmap = vmalloc(n);
2657 if (!dirty_bitmap)
2376 goto out; 2658 goto out;
2659 memset(dirty_bitmap, 0, n);
2660
2661 for (i = 0; !is_dirty && i < n/sizeof(long); i++)
2662 is_dirty = memslot->dirty_bitmap[i];
2377 2663
2378 /* If nothing is dirty, don't bother messing with page tables. */ 2664 /* If nothing is dirty, don't bother messing with page tables. */
2379 if (is_dirty) { 2665 if (is_dirty) {
2666 struct kvm_memslots *slots, *old_slots;
2667
2380 spin_lock(&kvm->mmu_lock); 2668 spin_lock(&kvm->mmu_lock);
2381 kvm_mmu_slot_remove_write_access(kvm, log->slot); 2669 kvm_mmu_slot_remove_write_access(kvm, log->slot);
2382 spin_unlock(&kvm->mmu_lock); 2670 spin_unlock(&kvm->mmu_lock);
2383 memslot = &kvm->memslots[log->slot]; 2671
2384 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; 2672 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
2385 memset(memslot->dirty_bitmap, 0, n); 2673 if (!slots)
2674 goto out_free;
2675
2676 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
2677 slots->memslots[log->slot].dirty_bitmap = dirty_bitmap;
2678
2679 old_slots = kvm->memslots;
2680 rcu_assign_pointer(kvm->memslots, slots);
2681 synchronize_srcu_expedited(&kvm->srcu);
2682 dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap;
2683 kfree(old_slots);
2386 } 2684 }
2685
2387 r = 0; 2686 r = 0;
2687 if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n))
2688 r = -EFAULT;
2689out_free:
2690 vfree(dirty_bitmap);
2388out: 2691out:
2389 up_write(&kvm->slots_lock); 2692 mutex_unlock(&kvm->slots_lock);
2390 return r; 2693 return r;
2391} 2694}
2392 2695
@@ -2469,6 +2772,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
2469 if (vpic) { 2772 if (vpic) {
2470 r = kvm_ioapic_init(kvm); 2773 r = kvm_ioapic_init(kvm);
2471 if (r) { 2774 if (r) {
2775 kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
2776 &vpic->dev);
2472 kfree(vpic); 2777 kfree(vpic);
2473 goto create_irqchip_unlock; 2778 goto create_irqchip_unlock;
2474 } 2779 }
@@ -2480,10 +2785,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
2480 r = kvm_setup_default_irq_routing(kvm); 2785 r = kvm_setup_default_irq_routing(kvm);
2481 if (r) { 2786 if (r) {
2482 mutex_lock(&kvm->irq_lock); 2787 mutex_lock(&kvm->irq_lock);
2483 kfree(kvm->arch.vpic); 2788 kvm_ioapic_destroy(kvm);
2484 kfree(kvm->arch.vioapic); 2789 kvm_destroy_pic(kvm);
2485 kvm->arch.vpic = NULL;
2486 kvm->arch.vioapic = NULL;
2487 mutex_unlock(&kvm->irq_lock); 2790 mutex_unlock(&kvm->irq_lock);
2488 } 2791 }
2489 create_irqchip_unlock: 2792 create_irqchip_unlock:
@@ -2499,7 +2802,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
2499 sizeof(struct kvm_pit_config))) 2802 sizeof(struct kvm_pit_config)))
2500 goto out; 2803 goto out;
2501 create_pit: 2804 create_pit:
2502 down_write(&kvm->slots_lock); 2805 mutex_lock(&kvm->slots_lock);
2503 r = -EEXIST; 2806 r = -EEXIST;
2504 if (kvm->arch.vpit) 2807 if (kvm->arch.vpit)
2505 goto create_pit_unlock; 2808 goto create_pit_unlock;
@@ -2508,7 +2811,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
2508 if (kvm->arch.vpit) 2811 if (kvm->arch.vpit)
2509 r = 0; 2812 r = 0;
2510 create_pit_unlock: 2813 create_pit_unlock:
2511 up_write(&kvm->slots_lock); 2814 mutex_unlock(&kvm->slots_lock);
2512 break; 2815 break;
2513 case KVM_IRQ_LINE_STATUS: 2816 case KVM_IRQ_LINE_STATUS:
2514 case KVM_IRQ_LINE: { 2817 case KVM_IRQ_LINE: {
@@ -2725,7 +3028,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
2725 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) 3028 !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
2726 return 0; 3029 return 0;
2727 3030
2728 return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v); 3031 return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
2729} 3032}
2730 3033
2731static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) 3034static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
@@ -2734,17 +3037,44 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
2734 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) 3037 !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
2735 return 0; 3038 return 0;
2736 3039
2737 return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v); 3040 return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v);
2738} 3041}
2739 3042
2740static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, 3043gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
2741 struct kvm_vcpu *vcpu) 3044{
3045 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3046 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3047}
3048
3049 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3050{
3051 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3052 access |= PFERR_FETCH_MASK;
3053 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3054}
3055
3056gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3057{
3058 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3059 access |= PFERR_WRITE_MASK;
3060 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
3061}
3062
3063/* uses this to access any guest's mapped memory without checking CPL */
3064gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
3065{
3066 return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error);
3067}
3068
3069static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
3070 struct kvm_vcpu *vcpu, u32 access,
3071 u32 *error)
2742{ 3072{
2743 void *data = val; 3073 void *data = val;
2744 int r = X86EMUL_CONTINUE; 3074 int r = X86EMUL_CONTINUE;
2745 3075
2746 while (bytes) { 3076 while (bytes) {
2747 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3077 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error);
2748 unsigned offset = addr & (PAGE_SIZE-1); 3078 unsigned offset = addr & (PAGE_SIZE-1);
2749 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); 3079 unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
2750 int ret; 3080 int ret;
@@ -2767,14 +3097,37 @@ out:
2767 return r; 3097 return r;
2768} 3098}
2769 3099
3100/* used for instruction fetching */
3101static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
3102 struct kvm_vcpu *vcpu, u32 *error)
3103{
3104 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3105 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
3106 access | PFERR_FETCH_MASK, error);
3107}
3108
3109static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
3110 struct kvm_vcpu *vcpu, u32 *error)
3111{
3112 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
3113 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
3114 error);
3115}
3116
3117static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
3118 struct kvm_vcpu *vcpu, u32 *error)
3119{
3120 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error);
3121}
3122
2770static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, 3123static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
2771 struct kvm_vcpu *vcpu) 3124 struct kvm_vcpu *vcpu, u32 *error)
2772{ 3125{
2773 void *data = val; 3126 void *data = val;
2774 int r = X86EMUL_CONTINUE; 3127 int r = X86EMUL_CONTINUE;
2775 3128
2776 while (bytes) { 3129 while (bytes) {
2777 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3130 gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error);
2778 unsigned offset = addr & (PAGE_SIZE-1); 3131 unsigned offset = addr & (PAGE_SIZE-1);
2779 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); 3132 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
2780 int ret; 3133 int ret;
@@ -2804,6 +3157,7 @@ static int emulator_read_emulated(unsigned long addr,
2804 struct kvm_vcpu *vcpu) 3157 struct kvm_vcpu *vcpu)
2805{ 3158{
2806 gpa_t gpa; 3159 gpa_t gpa;
3160 u32 error_code;
2807 3161
2808 if (vcpu->mmio_read_completed) { 3162 if (vcpu->mmio_read_completed) {
2809 memcpy(val, vcpu->mmio_data, bytes); 3163 memcpy(val, vcpu->mmio_data, bytes);
@@ -2813,17 +3167,20 @@ static int emulator_read_emulated(unsigned long addr,
2813 return X86EMUL_CONTINUE; 3167 return X86EMUL_CONTINUE;
2814 } 3168 }
2815 3169
2816 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3170 gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code);
3171
3172 if (gpa == UNMAPPED_GVA) {
3173 kvm_inject_page_fault(vcpu, addr, error_code);
3174 return X86EMUL_PROPAGATE_FAULT;
3175 }
2817 3176
2818 /* For APIC access vmexit */ 3177 /* For APIC access vmexit */
2819 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3178 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
2820 goto mmio; 3179 goto mmio;
2821 3180
2822 if (kvm_read_guest_virt(addr, val, bytes, vcpu) 3181 if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL)
2823 == X86EMUL_CONTINUE) 3182 == X86EMUL_CONTINUE)
2824 return X86EMUL_CONTINUE; 3183 return X86EMUL_CONTINUE;
2825 if (gpa == UNMAPPED_GVA)
2826 return X86EMUL_PROPAGATE_FAULT;
2827 3184
2828mmio: 3185mmio:
2829 /* 3186 /*
@@ -2862,11 +3219,12 @@ static int emulator_write_emulated_onepage(unsigned long addr,
2862 struct kvm_vcpu *vcpu) 3219 struct kvm_vcpu *vcpu)
2863{ 3220{
2864 gpa_t gpa; 3221 gpa_t gpa;
3222 u32 error_code;
2865 3223
2866 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3224 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code);
2867 3225
2868 if (gpa == UNMAPPED_GVA) { 3226 if (gpa == UNMAPPED_GVA) {
2869 kvm_inject_page_fault(vcpu, addr, 2); 3227 kvm_inject_page_fault(vcpu, addr, error_code);
2870 return X86EMUL_PROPAGATE_FAULT; 3228 return X86EMUL_PROPAGATE_FAULT;
2871 } 3229 }
2872 3230
@@ -2930,7 +3288,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
2930 char *kaddr; 3288 char *kaddr;
2931 u64 val; 3289 u64 val;
2932 3290
2933 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); 3291 gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
2934 3292
2935 if (gpa == UNMAPPED_GVA || 3293 if (gpa == UNMAPPED_GVA ||
2936 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) 3294 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -2967,35 +3325,21 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
2967 3325
2968int emulate_clts(struct kvm_vcpu *vcpu) 3326int emulate_clts(struct kvm_vcpu *vcpu)
2969{ 3327{
2970 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); 3328 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
3329 kvm_x86_ops->fpu_activate(vcpu);
2971 return X86EMUL_CONTINUE; 3330 return X86EMUL_CONTINUE;
2972} 3331}
2973 3332
2974int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) 3333int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
2975{ 3334{
2976 struct kvm_vcpu *vcpu = ctxt->vcpu; 3335 return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest);
2977
2978 switch (dr) {
2979 case 0 ... 3:
2980 *dest = kvm_x86_ops->get_dr(vcpu, dr);
2981 return X86EMUL_CONTINUE;
2982 default:
2983 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
2984 return X86EMUL_UNHANDLEABLE;
2985 }
2986} 3336}
2987 3337
2988int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) 3338int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
2989{ 3339{
2990 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; 3340 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
2991 int exception;
2992 3341
2993 kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); 3342 return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask);
2994 if (exception) {
2995 /* FIXME: better handling */
2996 return X86EMUL_UNHANDLEABLE;
2997 }
2998 return X86EMUL_CONTINUE;
2999} 3343}
3000 3344
3001void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) 3345void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
@@ -3009,7 +3353,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
3009 3353
3010 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); 3354 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
3011 3355
3012 kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu); 3356 kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL);
3013 3357
3014 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", 3358 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
3015 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); 3359 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -3017,7 +3361,8 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
3017EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); 3361EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
3018 3362
3019static struct x86_emulate_ops emulate_ops = { 3363static struct x86_emulate_ops emulate_ops = {
3020 .read_std = kvm_read_guest_virt, 3364 .read_std = kvm_read_guest_virt_system,
3365 .fetch = kvm_fetch_guest_virt,
3021 .read_emulated = emulator_read_emulated, 3366 .read_emulated = emulator_read_emulated,
3022 .write_emulated = emulator_write_emulated, 3367 .write_emulated = emulator_write_emulated,
3023 .cmpxchg_emulated = emulator_cmpxchg_emulated, 3368 .cmpxchg_emulated = emulator_cmpxchg_emulated,
@@ -3060,8 +3405,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
3060 vcpu->arch.emulate_ctxt.vcpu = vcpu; 3405 vcpu->arch.emulate_ctxt.vcpu = vcpu;
3061 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); 3406 vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
3062 vcpu->arch.emulate_ctxt.mode = 3407 vcpu->arch.emulate_ctxt.mode =
3408 (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
3063 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) 3409 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
3064 ? X86EMUL_MODE_REAL : cs_l 3410 ? X86EMUL_MODE_VM86 : cs_l
3065 ? X86EMUL_MODE_PROT64 : cs_db 3411 ? X86EMUL_MODE_PROT64 : cs_db
3066 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 3412 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
3067 3413
@@ -3153,12 +3499,17 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
3153 gva_t q = vcpu->arch.pio.guest_gva; 3499 gva_t q = vcpu->arch.pio.guest_gva;
3154 unsigned bytes; 3500 unsigned bytes;
3155 int ret; 3501 int ret;
3502 u32 error_code;
3156 3503
3157 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; 3504 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
3158 if (vcpu->arch.pio.in) 3505 if (vcpu->arch.pio.in)
3159 ret = kvm_write_guest_virt(q, p, bytes, vcpu); 3506 ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code);
3160 else 3507 else
3161 ret = kvm_read_guest_virt(q, p, bytes, vcpu); 3508 ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code);
3509
3510 if (ret == X86EMUL_PROPAGATE_FAULT)
3511 kvm_inject_page_fault(vcpu, q, error_code);
3512
3162 return ret; 3513 return ret;
3163} 3514}
3164 3515
@@ -3179,7 +3530,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
3179 if (io->in) { 3530 if (io->in) {
3180 r = pio_copy_data(vcpu); 3531 r = pio_copy_data(vcpu);
3181 if (r) 3532 if (r)
3182 return r; 3533 goto out;
3183 } 3534 }
3184 3535
3185 delta = 1; 3536 delta = 1;
@@ -3206,7 +3557,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
3206 kvm_register_write(vcpu, VCPU_REGS_RSI, val); 3557 kvm_register_write(vcpu, VCPU_REGS_RSI, val);
3207 } 3558 }
3208 } 3559 }
3209 3560out:
3210 io->count -= io->cur_count; 3561 io->count -= io->cur_count;
3211 io->cur_count = 0; 3562 io->cur_count = 0;
3212 3563
@@ -3219,11 +3570,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
3219 int r; 3570 int r;
3220 3571
3221 if (vcpu->arch.pio.in) 3572 if (vcpu->arch.pio.in)
3222 r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, 3573 r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
3223 vcpu->arch.pio.size, pd); 3574 vcpu->arch.pio.size, pd);
3224 else 3575 else
3225 r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, 3576 r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3226 vcpu->arch.pio.size, pd); 3577 vcpu->arch.pio.port, vcpu->arch.pio.size,
3578 pd);
3227 return r; 3579 return r;
3228} 3580}
3229 3581
@@ -3234,7 +3586,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
3234 int i, r = 0; 3586 int i, r = 0;
3235 3587
3236 for (i = 0; i < io->cur_count; i++) { 3588 for (i = 0; i < io->cur_count; i++) {
3237 if (kvm_io_bus_write(&vcpu->kvm->pio_bus, 3589 if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
3238 io->port, io->size, pd)) { 3590 io->port, io->size, pd)) {
3239 r = -EOPNOTSUPP; 3591 r = -EOPNOTSUPP;
3240 break; 3592 break;
@@ -3248,6 +3600,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
3248{ 3600{
3249 unsigned long val; 3601 unsigned long val;
3250 3602
3603 trace_kvm_pio(!in, port, size, 1);
3604
3251 vcpu->run->exit_reason = KVM_EXIT_IO; 3605 vcpu->run->exit_reason = KVM_EXIT_IO;
3252 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 3606 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3253 vcpu->run->io.size = vcpu->arch.pio.size = size; 3607 vcpu->run->io.size = vcpu->arch.pio.size = size;
@@ -3259,11 +3613,10 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
3259 vcpu->arch.pio.down = 0; 3613 vcpu->arch.pio.down = 0;
3260 vcpu->arch.pio.rep = 0; 3614 vcpu->arch.pio.rep = 0;
3261 3615
3262 trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, 3616 if (!vcpu->arch.pio.in) {
3263 size, 1); 3617 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
3264 3618 memcpy(vcpu->arch.pio_data, &val, 4);
3265 val = kvm_register_read(vcpu, VCPU_REGS_RAX); 3619 }
3266 memcpy(vcpu->arch.pio_data, &val, 4);
3267 3620
3268 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { 3621 if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
3269 complete_pio(vcpu); 3622 complete_pio(vcpu);
@@ -3280,6 +3633,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
3280 unsigned now, in_page; 3633 unsigned now, in_page;
3281 int ret = 0; 3634 int ret = 0;
3282 3635
3636 trace_kvm_pio(!in, port, size, count);
3637
3283 vcpu->run->exit_reason = KVM_EXIT_IO; 3638 vcpu->run->exit_reason = KVM_EXIT_IO;
3284 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; 3639 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
3285 vcpu->run->io.size = vcpu->arch.pio.size = size; 3640 vcpu->run->io.size = vcpu->arch.pio.size = size;
@@ -3291,9 +3646,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
3291 vcpu->arch.pio.down = down; 3646 vcpu->arch.pio.down = down;
3292 vcpu->arch.pio.rep = rep; 3647 vcpu->arch.pio.rep = rep;
3293 3648
3294 trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
3295 size, count);
3296
3297 if (!count) { 3649 if (!count) {
3298 kvm_x86_ops->skip_emulated_instruction(vcpu); 3650 kvm_x86_ops->skip_emulated_instruction(vcpu);
3299 return 1; 3651 return 1;
@@ -3325,10 +3677,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
3325 if (!vcpu->arch.pio.in) { 3677 if (!vcpu->arch.pio.in) {
3326 /* string PIO write */ 3678 /* string PIO write */
3327 ret = pio_copy_data(vcpu); 3679 ret = pio_copy_data(vcpu);
3328 if (ret == X86EMUL_PROPAGATE_FAULT) { 3680 if (ret == X86EMUL_PROPAGATE_FAULT)
3329 kvm_inject_gp(vcpu, 0);
3330 return 1; 3681 return 1;
3331 }
3332 if (ret == 0 && !pio_string_write(vcpu)) { 3682 if (ret == 0 && !pio_string_write(vcpu)) {
3333 complete_pio(vcpu); 3683 complete_pio(vcpu);
3334 if (vcpu->arch.pio.count == 0) 3684 if (vcpu->arch.pio.count == 0)
@@ -3487,11 +3837,76 @@ static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
3487 return a0 | ((gpa_t)a1 << 32); 3837 return a0 | ((gpa_t)a1 << 32);
3488} 3838}
3489 3839
3840int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
3841{
3842 u64 param, ingpa, outgpa, ret;
3843 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
3844 bool fast, longmode;
3845 int cs_db, cs_l;
3846
3847 /*
3848 * hypercall generates UD from non zero cpl and real mode
3849 * per HYPER-V spec
3850 */
3851 if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
3852 kvm_queue_exception(vcpu, UD_VECTOR);
3853 return 0;
3854 }
3855
3856 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
3857 longmode = is_long_mode(vcpu) && cs_l == 1;
3858
3859 if (!longmode) {
3860 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
3861 (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
3862 ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
3863 (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
3864 outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
3865 (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
3866 }
3867#ifdef CONFIG_X86_64
3868 else {
3869 param = kvm_register_read(vcpu, VCPU_REGS_RCX);
3870 ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
3871 outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
3872 }
3873#endif
3874
3875 code = param & 0xffff;
3876 fast = (param >> 16) & 0x1;
3877 rep_cnt = (param >> 32) & 0xfff;
3878 rep_idx = (param >> 48) & 0xfff;
3879
3880 trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
3881
3882 switch (code) {
3883 case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT:
3884 kvm_vcpu_on_spin(vcpu);
3885 break;
3886 default:
3887 res = HV_STATUS_INVALID_HYPERCALL_CODE;
3888 break;
3889 }
3890
3891 ret = res | (((u64)rep_done & 0xfff) << 32);
3892 if (longmode) {
3893 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
3894 } else {
3895 kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32);
3896 kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff);
3897 }
3898
3899 return 1;
3900}
3901
3490int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) 3902int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
3491{ 3903{
3492 unsigned long nr, a0, a1, a2, a3, ret; 3904 unsigned long nr, a0, a1, a2, a3, ret;
3493 int r = 1; 3905 int r = 1;
3494 3906
3907 if (kvm_hv_hypercall_enabled(vcpu->kvm))
3908 return kvm_hv_hypercall(vcpu);
3909
3495 nr = kvm_register_read(vcpu, VCPU_REGS_RAX); 3910 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
3496 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); 3911 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
3497 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); 3912 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
@@ -3534,10 +3949,8 @@ EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
3534int kvm_fix_hypercall(struct kvm_vcpu *vcpu) 3949int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
3535{ 3950{
3536 char instruction[3]; 3951 char instruction[3];
3537 int ret = 0;
3538 unsigned long rip = kvm_rip_read(vcpu); 3952 unsigned long rip = kvm_rip_read(vcpu);
3539 3953
3540
3541 /* 3954 /*
3542 * Blow out the MMU to ensure that no other VCPU has an active mapping 3955 * Blow out the MMU to ensure that no other VCPU has an active mapping
3543 * to ensure that the updated hypercall appears atomically across all 3956 * to ensure that the updated hypercall appears atomically across all
@@ -3546,11 +3959,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
3546 kvm_mmu_zap_all(vcpu->kvm); 3959 kvm_mmu_zap_all(vcpu->kvm);
3547 3960
3548 kvm_x86_ops->patch_hypercall(vcpu, instruction); 3961 kvm_x86_ops->patch_hypercall(vcpu, instruction);
3549 if (emulator_write_emulated(rip, instruction, 3, vcpu)
3550 != X86EMUL_CONTINUE)
3551 ret = -EFAULT;
3552 3962
3553 return ret; 3963 return emulator_write_emulated(rip, instruction, 3, vcpu);
3554} 3964}
3555 3965
3556static u64 mk_cr_64(u64 curr_cr, u32 new_val) 3966static u64 mk_cr_64(u64 curr_cr, u32 new_val)
@@ -3583,10 +3993,9 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
3583{ 3993{
3584 unsigned long value; 3994 unsigned long value;
3585 3995
3586 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
3587 switch (cr) { 3996 switch (cr) {
3588 case 0: 3997 case 0:
3589 value = vcpu->arch.cr0; 3998 value = kvm_read_cr0(vcpu);
3590 break; 3999 break;
3591 case 2: 4000 case 2:
3592 value = vcpu->arch.cr2; 4001 value = vcpu->arch.cr2;
@@ -3595,7 +4004,7 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
3595 value = vcpu->arch.cr3; 4004 value = vcpu->arch.cr3;
3596 break; 4005 break;
3597 case 4: 4006 case 4:
3598 value = vcpu->arch.cr4; 4007 value = kvm_read_cr4(vcpu);
3599 break; 4008 break;
3600 case 8: 4009 case 8:
3601 value = kvm_get_cr8(vcpu); 4010 value = kvm_get_cr8(vcpu);
@@ -3613,7 +4022,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
3613{ 4022{
3614 switch (cr) { 4023 switch (cr) {
3615 case 0: 4024 case 0:
3616 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); 4025 kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val));
3617 *rflags = kvm_get_rflags(vcpu); 4026 *rflags = kvm_get_rflags(vcpu);
3618 break; 4027 break;
3619 case 2: 4028 case 2:
@@ -3623,7 +4032,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
3623 kvm_set_cr3(vcpu, val); 4032 kvm_set_cr3(vcpu, val);
3624 break; 4033 break;
3625 case 4: 4034 case 4:
3626 kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); 4035 kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val));
3627 break; 4036 break;
3628 case 8: 4037 case 8:
3629 kvm_set_cr8(vcpu, val & 0xfUL); 4038 kvm_set_cr8(vcpu, val & 0xfUL);
@@ -3690,6 +4099,7 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
3690 } 4099 }
3691 return best; 4100 return best;
3692} 4101}
4102EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
3693 4103
3694int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) 4104int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
3695{ 4105{
@@ -3773,14 +4183,15 @@ static void vapic_enter(struct kvm_vcpu *vcpu)
3773static void vapic_exit(struct kvm_vcpu *vcpu) 4183static void vapic_exit(struct kvm_vcpu *vcpu)
3774{ 4184{
3775 struct kvm_lapic *apic = vcpu->arch.apic; 4185 struct kvm_lapic *apic = vcpu->arch.apic;
4186 int idx;
3776 4187
3777 if (!apic || !apic->vapic_addr) 4188 if (!apic || !apic->vapic_addr)
3778 return; 4189 return;
3779 4190
3780 down_read(&vcpu->kvm->slots_lock); 4191 idx = srcu_read_lock(&vcpu->kvm->srcu);
3781 kvm_release_page_dirty(apic->vapic_page); 4192 kvm_release_page_dirty(apic->vapic_page);
3782 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); 4193 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
3783 up_read(&vcpu->kvm->slots_lock); 4194 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3784} 4195}
3785 4196
3786static void update_cr8_intercept(struct kvm_vcpu *vcpu) 4197static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -3876,12 +4287,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
3876 r = 0; 4287 r = 0;
3877 goto out; 4288 goto out;
3878 } 4289 }
4290 if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) {
4291 vcpu->fpu_active = 0;
4292 kvm_x86_ops->fpu_deactivate(vcpu);
4293 }
3879 } 4294 }
3880 4295
3881 preempt_disable(); 4296 preempt_disable();
3882 4297
3883 kvm_x86_ops->prepare_guest_switch(vcpu); 4298 kvm_x86_ops->prepare_guest_switch(vcpu);
3884 kvm_load_guest_fpu(vcpu); 4299 if (vcpu->fpu_active)
4300 kvm_load_guest_fpu(vcpu);
3885 4301
3886 local_irq_disable(); 4302 local_irq_disable();
3887 4303
@@ -3909,7 +4325,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
3909 kvm_lapic_sync_to_vapic(vcpu); 4325 kvm_lapic_sync_to_vapic(vcpu);
3910 } 4326 }
3911 4327
3912 up_read(&vcpu->kvm->slots_lock); 4328 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3913 4329
3914 kvm_guest_enter(); 4330 kvm_guest_enter();
3915 4331
@@ -3951,7 +4367,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
3951 4367
3952 preempt_enable(); 4368 preempt_enable();
3953 4369
3954 down_read(&vcpu->kvm->slots_lock); 4370 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3955 4371
3956 /* 4372 /*
3957 * Profile KVM exit RIPs: 4373 * Profile KVM exit RIPs:
@@ -3973,6 +4389,7 @@ out:
3973static int __vcpu_run(struct kvm_vcpu *vcpu) 4389static int __vcpu_run(struct kvm_vcpu *vcpu)
3974{ 4390{
3975 int r; 4391 int r;
4392 struct kvm *kvm = vcpu->kvm;
3976 4393
3977 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { 4394 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
3978 pr_debug("vcpu %d received sipi with vector # %x\n", 4395 pr_debug("vcpu %d received sipi with vector # %x\n",
@@ -3984,7 +4401,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
3984 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 4401 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3985 } 4402 }
3986 4403
3987 down_read(&vcpu->kvm->slots_lock); 4404 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
3988 vapic_enter(vcpu); 4405 vapic_enter(vcpu);
3989 4406
3990 r = 1; 4407 r = 1;
@@ -3992,9 +4409,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
3992 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) 4409 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
3993 r = vcpu_enter_guest(vcpu); 4410 r = vcpu_enter_guest(vcpu);
3994 else { 4411 else {
3995 up_read(&vcpu->kvm->slots_lock); 4412 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
3996 kvm_vcpu_block(vcpu); 4413 kvm_vcpu_block(vcpu);
3997 down_read(&vcpu->kvm->slots_lock); 4414 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
3998 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) 4415 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
3999 { 4416 {
4000 switch(vcpu->arch.mp_state) { 4417 switch(vcpu->arch.mp_state) {
@@ -4029,13 +4446,13 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
4029 ++vcpu->stat.signal_exits; 4446 ++vcpu->stat.signal_exits;
4030 } 4447 }
4031 if (need_resched()) { 4448 if (need_resched()) {
4032 up_read(&vcpu->kvm->slots_lock); 4449 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4033 kvm_resched(vcpu); 4450 kvm_resched(vcpu);
4034 down_read(&vcpu->kvm->slots_lock); 4451 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
4035 } 4452 }
4036 } 4453 }
4037 4454
4038 up_read(&vcpu->kvm->slots_lock); 4455 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
4039 post_kvm_run_save(vcpu); 4456 post_kvm_run_save(vcpu);
4040 4457
4041 vapic_exit(vcpu); 4458 vapic_exit(vcpu);
@@ -4074,10 +4491,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4074 vcpu->mmio_read_completed = 1; 4491 vcpu->mmio_read_completed = 1;
4075 vcpu->mmio_needed = 0; 4492 vcpu->mmio_needed = 0;
4076 4493
4077 down_read(&vcpu->kvm->slots_lock); 4494 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4078 r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, 4495 r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
4079 EMULTYPE_NO_DECODE); 4496 EMULTYPE_NO_DECODE);
4080 up_read(&vcpu->kvm->slots_lock); 4497 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4081 if (r == EMULATE_DO_MMIO) { 4498 if (r == EMULATE_DO_MMIO) {
4082 /* 4499 /*
4083 * Read-modify-write. Back to userspace. 4500 * Read-modify-write. Back to userspace.
@@ -4204,13 +4621,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
4204 sregs->gdt.limit = dt.limit; 4621 sregs->gdt.limit = dt.limit;
4205 sregs->gdt.base = dt.base; 4622 sregs->gdt.base = dt.base;
4206 4623
4207 kvm_x86_ops->decache_cr4_guest_bits(vcpu); 4624 sregs->cr0 = kvm_read_cr0(vcpu);
4208 sregs->cr0 = vcpu->arch.cr0;
4209 sregs->cr2 = vcpu->arch.cr2; 4625 sregs->cr2 = vcpu->arch.cr2;
4210 sregs->cr3 = vcpu->arch.cr3; 4626 sregs->cr3 = vcpu->arch.cr3;
4211 sregs->cr4 = vcpu->arch.cr4; 4627 sregs->cr4 = kvm_read_cr4(vcpu);
4212 sregs->cr8 = kvm_get_cr8(vcpu); 4628 sregs->cr8 = kvm_get_cr8(vcpu);
4213 sregs->efer = vcpu->arch.shadow_efer; 4629 sregs->efer = vcpu->arch.efer;
4214 sregs->apic_base = kvm_get_apic_base(vcpu); 4630 sregs->apic_base = kvm_get_apic_base(vcpu);
4215 4631
4216 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); 4632 memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
@@ -4298,14 +4714,23 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4298{ 4714{
4299 struct descriptor_table dtable; 4715 struct descriptor_table dtable;
4300 u16 index = selector >> 3; 4716 u16 index = selector >> 3;
4717 int ret;
4718 u32 err;
4719 gva_t addr;
4301 4720
4302 get_segment_descriptor_dtable(vcpu, selector, &dtable); 4721 get_segment_descriptor_dtable(vcpu, selector, &dtable);
4303 4722
4304 if (dtable.limit < index * 8 + 7) { 4723 if (dtable.limit < index * 8 + 7) {
4305 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); 4724 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
4306 return 1; 4725 return X86EMUL_PROPAGATE_FAULT;
4307 } 4726 }
4308 return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); 4727 addr = dtable.base + index * 8;
4728 ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc),
4729 vcpu, &err);
4730 if (ret == X86EMUL_PROPAGATE_FAULT)
4731 kvm_inject_page_fault(vcpu, addr, err);
4732
4733 return ret;
4309} 4734}
4310 4735
4311/* allowed just for 8 bytes segments */ 4736/* allowed just for 8 bytes segments */
@@ -4319,15 +4744,23 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
4319 4744
4320 if (dtable.limit < index * 8 + 7) 4745 if (dtable.limit < index * 8 + 7)
4321 return 1; 4746 return 1;
4322 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); 4747 return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
4748}
4749
4750static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu,
4751 struct desc_struct *seg_desc)
4752{
4753 u32 base_addr = get_desc_base(seg_desc);
4754
4755 return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL);
4323} 4756}
4324 4757
4325static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu, 4758static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu,
4326 struct desc_struct *seg_desc) 4759 struct desc_struct *seg_desc)
4327{ 4760{
4328 u32 base_addr = get_desc_base(seg_desc); 4761 u32 base_addr = get_desc_base(seg_desc);
4329 4762
4330 return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); 4763 return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL);
4331} 4764}
4332 4765
4333static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) 4766static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
@@ -4338,18 +4771,6 @@ static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
4338 return kvm_seg.selector; 4771 return kvm_seg.selector;
4339} 4772}
4340 4773
4341static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
4342 u16 selector,
4343 struct kvm_segment *kvm_seg)
4344{
4345 struct desc_struct seg_desc;
4346
4347 if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
4348 return 1;
4349 seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
4350 return 0;
4351}
4352
4353static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) 4774static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
4354{ 4775{
4355 struct kvm_segment segvar = { 4776 struct kvm_segment segvar = {
@@ -4367,7 +4788,7 @@ static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int se
4367 .unusable = 0, 4788 .unusable = 0,
4368 }; 4789 };
4369 kvm_x86_ops->set_segment(vcpu, &segvar, seg); 4790 kvm_x86_ops->set_segment(vcpu, &segvar, seg);
4370 return 0; 4791 return X86EMUL_CONTINUE;
4371} 4792}
4372 4793
4373static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) 4794static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
@@ -4377,24 +4798,112 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
4377 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); 4798 (kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
4378} 4799}
4379 4800
4380int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, 4801int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg)
4381 int type_bits, int seg)
4382{ 4802{
4383 struct kvm_segment kvm_seg; 4803 struct kvm_segment kvm_seg;
4804 struct desc_struct seg_desc;
4805 u8 dpl, rpl, cpl;
4806 unsigned err_vec = GP_VECTOR;
4807 u32 err_code = 0;
4808 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
4809 int ret;
4384 4810
4385 if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE)) 4811 if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu))
4386 return kvm_load_realmode_segment(vcpu, selector, seg); 4812 return kvm_load_realmode_segment(vcpu, selector, seg);
4387 if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
4388 return 1;
4389 kvm_seg.type |= type_bits;
4390 4813
4391 if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS && 4814 /* NULL selector is not valid for TR, CS and SS */
4392 seg != VCPU_SREG_LDTR) 4815 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
4393 if (!kvm_seg.s) 4816 && null_selector)
4394 kvm_seg.unusable = 1; 4817 goto exception;
4818
4819 /* TR should be in GDT only */
4820 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
4821 goto exception;
4822
4823 ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc);
4824 if (ret)
4825 return ret;
4826
4827 seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg);
4828
4829 if (null_selector) { /* for NULL selector skip all following checks */
4830 kvm_seg.unusable = 1;
4831 goto load;
4832 }
4833
4834 err_code = selector & 0xfffc;
4835 err_vec = GP_VECTOR;
4395 4836
4837 /* can't load system descriptor into segment selecor */
4838 if (seg <= VCPU_SREG_GS && !kvm_seg.s)
4839 goto exception;
4840
4841 if (!kvm_seg.present) {
4842 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
4843 goto exception;
4844 }
4845
4846 rpl = selector & 3;
4847 dpl = kvm_seg.dpl;
4848 cpl = kvm_x86_ops->get_cpl(vcpu);
4849
4850 switch (seg) {
4851 case VCPU_SREG_SS:
4852 /*
4853 * segment is not a writable data segment or segment
4854 * selector's RPL != CPL or segment selector's RPL != CPL
4855 */
4856 if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl)
4857 goto exception;
4858 break;
4859 case VCPU_SREG_CS:
4860 if (!(kvm_seg.type & 8))
4861 goto exception;
4862
4863 if (kvm_seg.type & 4) {
4864 /* conforming */
4865 if (dpl > cpl)
4866 goto exception;
4867 } else {
4868 /* nonconforming */
4869 if (rpl > cpl || dpl != cpl)
4870 goto exception;
4871 }
4872 /* CS(RPL) <- CPL */
4873 selector = (selector & 0xfffc) | cpl;
4874 break;
4875 case VCPU_SREG_TR:
4876 if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9))
4877 goto exception;
4878 break;
4879 case VCPU_SREG_LDTR:
4880 if (kvm_seg.s || kvm_seg.type != 2)
4881 goto exception;
4882 break;
4883 default: /* DS, ES, FS, or GS */
4884 /*
4885 * segment is not a data or readable code segment or
4886 * ((segment is a data or nonconforming code segment)
4887 * and (both RPL and CPL > DPL))
4888 */
4889 if ((kvm_seg.type & 0xa) == 0x8 ||
4890 (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl)))
4891 goto exception;
4892 break;
4893 }
4894
4895 if (!kvm_seg.unusable && kvm_seg.s) {
4896 /* mark segment as accessed */
4897 kvm_seg.type |= 1;
4898 seg_desc.type |= 1;
4899 save_guest_segment_descriptor(vcpu, selector, &seg_desc);
4900 }
4901load:
4396 kvm_set_segment(vcpu, &kvm_seg, seg); 4902 kvm_set_segment(vcpu, &kvm_seg, seg);
4397 return 0; 4903 return X86EMUL_CONTINUE;
4904exception:
4905 kvm_queue_exception_e(vcpu, err_vec, err_code);
4906 return X86EMUL_PROPAGATE_FAULT;
4398} 4907}
4399 4908
4400static void save_state_to_tss32(struct kvm_vcpu *vcpu, 4909static void save_state_to_tss32(struct kvm_vcpu *vcpu,
@@ -4420,6 +4929,14 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
4420 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); 4929 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
4421} 4930}
4422 4931
4932static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg)
4933{
4934 struct kvm_segment kvm_seg;
4935 kvm_get_segment(vcpu, &kvm_seg, seg);
4936 kvm_seg.selector = sel;
4937 kvm_set_segment(vcpu, &kvm_seg, seg);
4938}
4939
4423static int load_state_from_tss32(struct kvm_vcpu *vcpu, 4940static int load_state_from_tss32(struct kvm_vcpu *vcpu,
4424 struct tss_segment_32 *tss) 4941 struct tss_segment_32 *tss)
4425{ 4942{
@@ -4437,25 +4954,41 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
4437 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); 4954 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
4438 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); 4955 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
4439 4956
4440 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) 4957 /*
4958 * SDM says that segment selectors are loaded before segment
4959 * descriptors
4960 */
4961 kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR);
4962 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
4963 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
4964 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
4965 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
4966 kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS);
4967 kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS);
4968
4969 /*
4970 * Now load segment descriptors. If fault happenes at this stage
4971 * it is handled in a context of new task
4972 */
4973 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR))
4441 return 1; 4974 return 1;
4442 4975
4443 if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) 4976 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
4444 return 1; 4977 return 1;
4445 4978
4446 if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) 4979 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
4447 return 1; 4980 return 1;
4448 4981
4449 if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) 4982 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
4450 return 1; 4983 return 1;
4451 4984
4452 if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) 4985 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
4453 return 1; 4986 return 1;
4454 4987
4455 if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) 4988 if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS))
4456 return 1; 4989 return 1;
4457 4990
4458 if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) 4991 if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS))
4459 return 1; 4992 return 1;
4460 return 0; 4993 return 0;
4461} 4994}
@@ -4495,19 +5028,33 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu,
4495 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); 5028 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
4496 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); 5029 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
4497 5030
4498 if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) 5031 /*
5032 * SDM says that segment selectors are loaded before segment
5033 * descriptors
5034 */
5035 kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR);
5036 kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES);
5037 kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS);
5038 kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS);
5039 kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS);
5040
5041 /*
5042 * Now load segment descriptors. If fault happenes at this stage
5043 * it is handled in a context of new task
5044 */
5045 if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR))
4499 return 1; 5046 return 1;
4500 5047
4501 if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) 5048 if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES))
4502 return 1; 5049 return 1;
4503 5050
4504 if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) 5051 if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS))
4505 return 1; 5052 return 1;
4506 5053
4507 if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) 5054 if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS))
4508 return 1; 5055 return 1;
4509 5056
4510 if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) 5057 if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS))
4511 return 1; 5058 return 1;
4512 return 0; 5059 return 0;
4513} 5060}
@@ -4529,7 +5076,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
4529 sizeof tss_segment_16)) 5076 sizeof tss_segment_16))
4530 goto out; 5077 goto out;
4531 5078
4532 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), 5079 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
4533 &tss_segment_16, sizeof tss_segment_16)) 5080 &tss_segment_16, sizeof tss_segment_16))
4534 goto out; 5081 goto out;
4535 5082
@@ -4537,7 +5084,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
4537 tss_segment_16.prev_task_link = old_tss_sel; 5084 tss_segment_16.prev_task_link = old_tss_sel;
4538 5085
4539 if (kvm_write_guest(vcpu->kvm, 5086 if (kvm_write_guest(vcpu->kvm,
4540 get_tss_base_addr(vcpu, nseg_desc), 5087 get_tss_base_addr_write(vcpu, nseg_desc),
4541 &tss_segment_16.prev_task_link, 5088 &tss_segment_16.prev_task_link,
4542 sizeof tss_segment_16.prev_task_link)) 5089 sizeof tss_segment_16.prev_task_link))
4543 goto out; 5090 goto out;
@@ -4568,7 +5115,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
4568 sizeof tss_segment_32)) 5115 sizeof tss_segment_32))
4569 goto out; 5116 goto out;
4570 5117
4571 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), 5118 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
4572 &tss_segment_32, sizeof tss_segment_32)) 5119 &tss_segment_32, sizeof tss_segment_32))
4573 goto out; 5120 goto out;
4574 5121
@@ -4576,7 +5123,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
4576 tss_segment_32.prev_task_link = old_tss_sel; 5123 tss_segment_32.prev_task_link = old_tss_sel;
4577 5124
4578 if (kvm_write_guest(vcpu->kvm, 5125 if (kvm_write_guest(vcpu->kvm,
4579 get_tss_base_addr(vcpu, nseg_desc), 5126 get_tss_base_addr_write(vcpu, nseg_desc),
4580 &tss_segment_32.prev_task_link, 5127 &tss_segment_32.prev_task_link,
4581 sizeof tss_segment_32.prev_task_link)) 5128 sizeof tss_segment_32.prev_task_link))
4582 goto out; 5129 goto out;
@@ -4599,7 +5146,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4599 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); 5146 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
4600 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); 5147 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
4601 5148
4602 old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); 5149 old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
4603 5150
4604 /* FIXME: Handle errors. Failure to read either TSS or their 5151 /* FIXME: Handle errors. Failure to read either TSS or their
4605 * descriptors should generate a pagefault. 5152 * descriptors should generate a pagefault.
@@ -4658,7 +5205,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
4658 &nseg_desc); 5205 &nseg_desc);
4659 } 5206 }
4660 5207
4661 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); 5208 kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS);
4662 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); 5209 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
4663 tr_seg.type = 11; 5210 tr_seg.type = 11;
4664 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); 5211 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
@@ -4689,17 +5236,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4689 5236
4690 kvm_set_cr8(vcpu, sregs->cr8); 5237 kvm_set_cr8(vcpu, sregs->cr8);
4691 5238
4692 mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; 5239 mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
4693 kvm_x86_ops->set_efer(vcpu, sregs->efer); 5240 kvm_x86_ops->set_efer(vcpu, sregs->efer);
4694 kvm_set_apic_base(vcpu, sregs->apic_base); 5241 kvm_set_apic_base(vcpu, sregs->apic_base);
4695 5242
4696 kvm_x86_ops->decache_cr4_guest_bits(vcpu); 5243 mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
4697
4698 mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
4699 kvm_x86_ops->set_cr0(vcpu, sregs->cr0); 5244 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
4700 vcpu->arch.cr0 = sregs->cr0; 5245 vcpu->arch.cr0 = sregs->cr0;
4701 5246
4702 mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; 5247 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
4703 kvm_x86_ops->set_cr4(vcpu, sregs->cr4); 5248 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
4704 if (!is_long_mode(vcpu) && is_pae(vcpu)) { 5249 if (!is_long_mode(vcpu) && is_pae(vcpu)) {
4705 load_pdptrs(vcpu, vcpu->arch.cr3); 5250 load_pdptrs(vcpu, vcpu->arch.cr3);
@@ -4734,7 +5279,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
4734 /* Older userspace won't unhalt the vcpu on reset. */ 5279 /* Older userspace won't unhalt the vcpu on reset. */
4735 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && 5280 if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
4736 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && 5281 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
4737 !(vcpu->arch.cr0 & X86_CR0_PE)) 5282 !is_protmode(vcpu))
4738 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 5283 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
4739 5284
4740 vcpu_put(vcpu); 5285 vcpu_put(vcpu);
@@ -4832,11 +5377,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
4832{ 5377{
4833 unsigned long vaddr = tr->linear_address; 5378 unsigned long vaddr = tr->linear_address;
4834 gpa_t gpa; 5379 gpa_t gpa;
5380 int idx;
4835 5381
4836 vcpu_load(vcpu); 5382 vcpu_load(vcpu);
4837 down_read(&vcpu->kvm->slots_lock); 5383 idx = srcu_read_lock(&vcpu->kvm->srcu);
4838 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); 5384 gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
4839 up_read(&vcpu->kvm->slots_lock); 5385 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4840 tr->physical_address = gpa; 5386 tr->physical_address = gpa;
4841 tr->valid = gpa != UNMAPPED_GVA; 5387 tr->valid = gpa != UNMAPPED_GVA;
4842 tr->writeable = 1; 5388 tr->writeable = 1;
@@ -4917,14 +5463,14 @@ EXPORT_SYMBOL_GPL(fx_init);
4917 5463
4918void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 5464void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
4919{ 5465{
4920 if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) 5466 if (vcpu->guest_fpu_loaded)
4921 return; 5467 return;
4922 5468
4923 vcpu->guest_fpu_loaded = 1; 5469 vcpu->guest_fpu_loaded = 1;
4924 kvm_fx_save(&vcpu->arch.host_fx_image); 5470 kvm_fx_save(&vcpu->arch.host_fx_image);
4925 kvm_fx_restore(&vcpu->arch.guest_fx_image); 5471 kvm_fx_restore(&vcpu->arch.guest_fx_image);
5472 trace_kvm_fpu(1);
4926} 5473}
4927EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
4928 5474
4929void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) 5475void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
4930{ 5476{
@@ -4935,8 +5481,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
4935 kvm_fx_save(&vcpu->arch.guest_fx_image); 5481 kvm_fx_save(&vcpu->arch.guest_fx_image);
4936 kvm_fx_restore(&vcpu->arch.host_fx_image); 5482 kvm_fx_restore(&vcpu->arch.host_fx_image);
4937 ++vcpu->stat.fpu_reload; 5483 ++vcpu->stat.fpu_reload;
5484 set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests);
5485 trace_kvm_fpu(0);
4938} 5486}
4939EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
4940 5487
4941void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) 5488void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
4942{ 5489{
@@ -5088,11 +5635,13 @@ fail:
5088 5635
5089void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 5636void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
5090{ 5637{
5638 int idx;
5639
5091 kfree(vcpu->arch.mce_banks); 5640 kfree(vcpu->arch.mce_banks);
5092 kvm_free_lapic(vcpu); 5641 kvm_free_lapic(vcpu);
5093 down_read(&vcpu->kvm->slots_lock); 5642 idx = srcu_read_lock(&vcpu->kvm->srcu);
5094 kvm_mmu_destroy(vcpu); 5643 kvm_mmu_destroy(vcpu);
5095 up_read(&vcpu->kvm->slots_lock); 5644 srcu_read_unlock(&vcpu->kvm->srcu, idx);
5096 free_page((unsigned long)vcpu->arch.pio_data); 5645 free_page((unsigned long)vcpu->arch.pio_data);
5097} 5646}
5098 5647
@@ -5103,6 +5652,12 @@ struct kvm *kvm_arch_create_vm(void)
5103 if (!kvm) 5652 if (!kvm)
5104 return ERR_PTR(-ENOMEM); 5653 return ERR_PTR(-ENOMEM);
5105 5654
5655 kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL);
5656 if (!kvm->arch.aliases) {
5657 kfree(kvm);
5658 return ERR_PTR(-ENOMEM);
5659 }
5660
5106 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 5661 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
5107 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 5662 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
5108 5663
@@ -5159,16 +5714,18 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
5159 put_page(kvm->arch.apic_access_page); 5714 put_page(kvm->arch.apic_access_page);
5160 if (kvm->arch.ept_identity_pagetable) 5715 if (kvm->arch.ept_identity_pagetable)
5161 put_page(kvm->arch.ept_identity_pagetable); 5716 put_page(kvm->arch.ept_identity_pagetable);
5717 cleanup_srcu_struct(&kvm->srcu);
5718 kfree(kvm->arch.aliases);
5162 kfree(kvm); 5719 kfree(kvm);
5163} 5720}
5164 5721
5165int kvm_arch_set_memory_region(struct kvm *kvm, 5722int kvm_arch_prepare_memory_region(struct kvm *kvm,
5166 struct kvm_userspace_memory_region *mem, 5723 struct kvm_memory_slot *memslot,
5167 struct kvm_memory_slot old, 5724 struct kvm_memory_slot old,
5725 struct kvm_userspace_memory_region *mem,
5168 int user_alloc) 5726 int user_alloc)
5169{ 5727{
5170 int npages = mem->memory_size >> PAGE_SHIFT; 5728 int npages = memslot->npages;
5171 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
5172 5729
5173 /*To keep backward compatibility with older userspace, 5730 /*To keep backward compatibility with older userspace,
5174 *x86 needs to hanlde !user_alloc case. 5731 *x86 needs to hanlde !user_alloc case.
@@ -5188,26 +5745,35 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
5188 if (IS_ERR((void *)userspace_addr)) 5745 if (IS_ERR((void *)userspace_addr))
5189 return PTR_ERR((void *)userspace_addr); 5746 return PTR_ERR((void *)userspace_addr);
5190 5747
5191 /* set userspace_addr atomically for kvm_hva_to_rmapp */
5192 spin_lock(&kvm->mmu_lock);
5193 memslot->userspace_addr = userspace_addr; 5748 memslot->userspace_addr = userspace_addr;
5194 spin_unlock(&kvm->mmu_lock);
5195 } else {
5196 if (!old.user_alloc && old.rmap) {
5197 int ret;
5198
5199 down_write(&current->mm->mmap_sem);
5200 ret = do_munmap(current->mm, old.userspace_addr,
5201 old.npages * PAGE_SIZE);
5202 up_write(&current->mm->mmap_sem);
5203 if (ret < 0)
5204 printk(KERN_WARNING
5205 "kvm_vm_ioctl_set_memory_region: "
5206 "failed to munmap memory\n");
5207 }
5208 } 5749 }
5209 } 5750 }
5210 5751
5752
5753 return 0;
5754}
5755
5756void kvm_arch_commit_memory_region(struct kvm *kvm,
5757 struct kvm_userspace_memory_region *mem,
5758 struct kvm_memory_slot old,
5759 int user_alloc)
5760{
5761
5762 int npages = mem->memory_size >> PAGE_SHIFT;
5763
5764 if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
5765 int ret;
5766
5767 down_write(&current->mm->mmap_sem);
5768 ret = do_munmap(current->mm, old.userspace_addr,
5769 old.npages * PAGE_SIZE);
5770 up_write(&current->mm->mmap_sem);
5771 if (ret < 0)
5772 printk(KERN_WARNING
5773 "kvm_vm_ioctl_set_memory_region: "
5774 "failed to munmap memory\n");
5775 }
5776
5211 spin_lock(&kvm->mmu_lock); 5777 spin_lock(&kvm->mmu_lock);
5212 if (!kvm->arch.n_requested_mmu_pages) { 5778 if (!kvm->arch.n_requested_mmu_pages) {
5213 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); 5779 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
@@ -5216,8 +5782,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
5216 5782
5217 kvm_mmu_slot_remove_write_access(kvm, mem->slot); 5783 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
5218 spin_unlock(&kvm->mmu_lock); 5784 spin_unlock(&kvm->mmu_lock);
5219
5220 return 0;
5221} 5785}
5222 5786
5223void kvm_arch_flush_shadow(struct kvm *kvm) 5787void kvm_arch_flush_shadow(struct kvm *kvm)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 5eadea585d2a..2d101639bd8d 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -2,6 +2,7 @@
2#define ARCH_X86_KVM_X86_H 2#define ARCH_X86_KVM_X86_H
3 3
4#include <linux/kvm_host.h> 4#include <linux/kvm_host.h>
5#include "kvm_cache_regs.h"
5 6
6static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) 7static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
7{ 8{
@@ -35,4 +36,33 @@ static inline bool kvm_exception_is_soft(unsigned int nr)
35struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, 36struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
36 u32 function, u32 index); 37 u32 function, u32 index);
37 38
39static inline bool is_protmode(struct kvm_vcpu *vcpu)
40{
41 return kvm_read_cr0_bits(vcpu, X86_CR0_PE);
42}
43
44static inline int is_long_mode(struct kvm_vcpu *vcpu)
45{
46#ifdef CONFIG_X86_64
47 return vcpu->arch.efer & EFER_LMA;
48#else
49 return 0;
50#endif
51}
52
53static inline int is_pae(struct kvm_vcpu *vcpu)
54{
55 return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
56}
57
58static inline int is_pse(struct kvm_vcpu *vcpu)
59{
60 return kvm_read_cr4_bits(vcpu, X86_CR4_PSE);
61}
62
63static inline int is_paging(struct kvm_vcpu *vcpu)
64{
65 return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
66}
67
38#endif 68#endif
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a24de0b1858e..60df9c84ecae 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -103,7 +103,7 @@ struct kvm_userspace_memory_region {
103 103
104/* for kvm_memory_region::flags */ 104/* for kvm_memory_region::flags */
105#define KVM_MEM_LOG_DIRTY_PAGES 1UL 105#define KVM_MEM_LOG_DIRTY_PAGES 1UL
106 106#define KVM_MEMSLOT_INVALID (1UL << 1)
107 107
108/* for KVM_IRQ_LINE */ 108/* for KVM_IRQ_LINE */
109struct kvm_irq_level { 109struct kvm_irq_level {
@@ -497,6 +497,11 @@ struct kvm_ioeventfd {
497#endif 497#endif
498#define KVM_CAP_S390_PSW 42 498#define KVM_CAP_S390_PSW 42
499#define KVM_CAP_PPC_SEGSTATE 43 499#define KVM_CAP_PPC_SEGSTATE 43
500#define KVM_CAP_HYPERV 44
501#define KVM_CAP_HYPERV_VAPIC 45
502#define KVM_CAP_HYPERV_SPIN 46
503#define KVM_CAP_PCI_SEGMENT 47
504#define KVM_CAP_X86_ROBUST_SINGLESTEP 51
500 505
501#ifdef KVM_CAP_IRQ_ROUTING 506#ifdef KVM_CAP_IRQ_ROUTING
502 507
@@ -691,8 +696,9 @@ struct kvm_assigned_pci_dev {
691 __u32 busnr; 696 __u32 busnr;
692 __u32 devfn; 697 __u32 devfn;
693 __u32 flags; 698 __u32 flags;
699 __u32 segnr;
694 union { 700 union {
695 __u32 reserved[12]; 701 __u32 reserved[11];
696 }; 702 };
697}; 703};
698 704
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index bd5a616d9373..a3fd0f91d943 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -38,6 +38,7 @@
38#define KVM_REQ_MMU_SYNC 7 38#define KVM_REQ_MMU_SYNC 7
39#define KVM_REQ_KVMCLOCK_UPDATE 8 39#define KVM_REQ_KVMCLOCK_UPDATE 8
40#define KVM_REQ_KICK 9 40#define KVM_REQ_KICK 9
41#define KVM_REQ_DEACTIVATE_FPU 10
41 42
42#define KVM_USERSPACE_IRQ_SOURCE_ID 0 43#define KVM_USERSPACE_IRQ_SOURCE_ID 0
43 44
@@ -57,20 +58,20 @@ struct kvm_io_bus {
57 struct kvm_io_device *devs[NR_IOBUS_DEVS]; 58 struct kvm_io_device *devs[NR_IOBUS_DEVS];
58}; 59};
59 60
60void kvm_io_bus_init(struct kvm_io_bus *bus); 61enum kvm_bus {
61void kvm_io_bus_destroy(struct kvm_io_bus *bus); 62 KVM_MMIO_BUS,
62int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len, 63 KVM_PIO_BUS,
63 const void *val); 64 KVM_NR_BUSES
64int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, 65};
66
67int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
68 int len, const void *val);
69int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
65 void *val); 70 void *val);
66int __kvm_io_bus_register_dev(struct kvm_io_bus *bus, 71int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
67 struct kvm_io_device *dev);
68int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
69 struct kvm_io_device *dev); 72 struct kvm_io_device *dev);
70void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, 73int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
71 struct kvm_io_device *dev); 74 struct kvm_io_device *dev);
72void kvm_io_bus_unregister_dev(struct kvm *kvm, struct kvm_io_bus *bus,
73 struct kvm_io_device *dev);
74 75
75struct kvm_vcpu { 76struct kvm_vcpu {
76 struct kvm *kvm; 77 struct kvm *kvm;
@@ -83,6 +84,8 @@ struct kvm_vcpu {
83 struct kvm_run *run; 84 struct kvm_run *run;
84 unsigned long requests; 85 unsigned long requests;
85 unsigned long guest_debug; 86 unsigned long guest_debug;
87 int srcu_idx;
88
86 int fpu_active; 89 int fpu_active;
87 int guest_fpu_loaded; 90 int guest_fpu_loaded;
88 wait_queue_head_t wq; 91 wait_queue_head_t wq;
@@ -150,14 +153,19 @@ struct kvm_irq_routing_table {};
150 153
151#endif 154#endif
152 155
153struct kvm { 156struct kvm_memslots {
154 spinlock_t mmu_lock;
155 spinlock_t requests_lock;
156 struct rw_semaphore slots_lock;
157 struct mm_struct *mm; /* userspace tied to this vm */
158 int nmemslots; 157 int nmemslots;
159 struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + 158 struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
160 KVM_PRIVATE_MEM_SLOTS]; 159 KVM_PRIVATE_MEM_SLOTS];
160};
161
162struct kvm {
163 spinlock_t mmu_lock;
164 raw_spinlock_t requests_lock;
165 struct mutex slots_lock;
166 struct mm_struct *mm; /* userspace tied to this vm */
167 struct kvm_memslots *memslots;
168 struct srcu_struct srcu;
161#ifdef CONFIG_KVM_APIC_ARCHITECTURE 169#ifdef CONFIG_KVM_APIC_ARCHITECTURE
162 u32 bsp_vcpu_id; 170 u32 bsp_vcpu_id;
163 struct kvm_vcpu *bsp_vcpu; 171 struct kvm_vcpu *bsp_vcpu;
@@ -166,8 +174,7 @@ struct kvm {
166 atomic_t online_vcpus; 174 atomic_t online_vcpus;
167 struct list_head vm_list; 175 struct list_head vm_list;
168 struct mutex lock; 176 struct mutex lock;
169 struct kvm_io_bus mmio_bus; 177 struct kvm_io_bus *buses[KVM_NR_BUSES];
170 struct kvm_io_bus pio_bus;
171#ifdef CONFIG_HAVE_KVM_EVENTFD 178#ifdef CONFIG_HAVE_KVM_EVENTFD
172 struct { 179 struct {
173 spinlock_t lock; 180 spinlock_t lock;
@@ -249,13 +256,20 @@ int kvm_set_memory_region(struct kvm *kvm,
249int __kvm_set_memory_region(struct kvm *kvm, 256int __kvm_set_memory_region(struct kvm *kvm,
250 struct kvm_userspace_memory_region *mem, 257 struct kvm_userspace_memory_region *mem,
251 int user_alloc); 258 int user_alloc);
252int kvm_arch_set_memory_region(struct kvm *kvm, 259int kvm_arch_prepare_memory_region(struct kvm *kvm,
260 struct kvm_memory_slot *memslot,
261 struct kvm_memory_slot old,
262 struct kvm_userspace_memory_region *mem,
263 int user_alloc);
264void kvm_arch_commit_memory_region(struct kvm *kvm,
253 struct kvm_userspace_memory_region *mem, 265 struct kvm_userspace_memory_region *mem,
254 struct kvm_memory_slot old, 266 struct kvm_memory_slot old,
255 int user_alloc); 267 int user_alloc);
256void kvm_disable_largepages(void); 268void kvm_disable_largepages(void);
257void kvm_arch_flush_shadow(struct kvm *kvm); 269void kvm_arch_flush_shadow(struct kvm *kvm);
258gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); 270gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
271gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn);
272
259struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); 273struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
260unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); 274unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
261void kvm_release_page_clean(struct page *page); 275void kvm_release_page_clean(struct page *page);
@@ -264,6 +278,9 @@ void kvm_set_page_dirty(struct page *page);
264void kvm_set_page_accessed(struct page *page); 278void kvm_set_page_accessed(struct page *page);
265 279
266pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); 280pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
281pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
282 struct kvm_memory_slot *slot, gfn_t gfn);
283int memslot_id(struct kvm *kvm, gfn_t gfn);
267void kvm_release_pfn_dirty(pfn_t); 284void kvm_release_pfn_dirty(pfn_t);
268void kvm_release_pfn_clean(pfn_t pfn); 285void kvm_release_pfn_clean(pfn_t pfn);
269void kvm_set_pfn_dirty(pfn_t pfn); 286void kvm_set_pfn_dirty(pfn_t pfn);
@@ -283,6 +300,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
283int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); 300int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
284struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); 301struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
285int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); 302int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
303unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
286void mark_page_dirty(struct kvm *kvm, gfn_t gfn); 304void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
287 305
288void kvm_vcpu_block(struct kvm_vcpu *vcpu); 306void kvm_vcpu_block(struct kvm_vcpu *vcpu);
@@ -383,6 +401,7 @@ struct kvm_assigned_dev_kernel {
383 struct work_struct interrupt_work; 401 struct work_struct interrupt_work;
384 struct list_head list; 402 struct list_head list;
385 int assigned_dev_id; 403 int assigned_dev_id;
404 int host_segnr;
386 int host_busnr; 405 int host_busnr;
387 int host_devfn; 406 int host_devfn;
388 unsigned int entries_nr; 407 unsigned int entries_nr;
@@ -429,8 +448,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
429#define KVM_IOMMU_CACHE_COHERENCY 0x1 448#define KVM_IOMMU_CACHE_COHERENCY 0x1
430 449
431#ifdef CONFIG_IOMMU_API 450#ifdef CONFIG_IOMMU_API
432int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, 451int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
433 unsigned long npages);
434int kvm_iommu_map_guest(struct kvm *kvm); 452int kvm_iommu_map_guest(struct kvm *kvm);
435int kvm_iommu_unmap_guest(struct kvm *kvm); 453int kvm_iommu_unmap_guest(struct kvm *kvm);
436int kvm_assign_device(struct kvm *kvm, 454int kvm_assign_device(struct kvm *kvm,
@@ -480,11 +498,6 @@ static inline void kvm_guest_exit(void)
480 current->flags &= ~PF_VCPU; 498 current->flags &= ~PF_VCPU;
481} 499}
482 500
483static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
484{
485 return slot - kvm->memslots;
486}
487
488static inline gpa_t gfn_to_gpa(gfn_t gfn) 501static inline gpa_t gfn_to_gpa(gfn_t gfn)
489{ 502{
490 return (gpa_t)gfn << PAGE_SHIFT; 503 return (gpa_t)gfn << PAGE_SHIFT;
@@ -532,6 +545,10 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
532} 545}
533#endif 546#endif
534 547
548#ifndef KVM_ARCH_HAS_UNALIAS_INSTANTIATION
549#define unalias_gfn_instantiation unalias_gfn
550#endif
551
535#ifdef CONFIG_HAVE_KVM_IRQCHIP 552#ifdef CONFIG_HAVE_KVM_IRQCHIP
536 553
537#define KVM_MAX_IRQ_ROUTES 1024 554#define KVM_MAX_IRQ_ROUTES 1024
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index dbe108455275..b17d49dfc3ef 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -145,6 +145,47 @@ TRACE_EVENT(kvm_mmio,
145 __entry->len, __entry->gpa, __entry->val) 145 __entry->len, __entry->gpa, __entry->val)
146); 146);
147 147
148#define kvm_fpu_load_symbol \
149 {0, "unload"}, \
150 {1, "load"}
151
152TRACE_EVENT(kvm_fpu,
153 TP_PROTO(int load),
154 TP_ARGS(load),
155
156 TP_STRUCT__entry(
157 __field( u32, load )
158 ),
159
160 TP_fast_assign(
161 __entry->load = load;
162 ),
163
164 TP_printk("%s", __print_symbolic(__entry->load, kvm_fpu_load_symbol))
165);
166
167TRACE_EVENT(kvm_age_page,
168 TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref),
169 TP_ARGS(hva, slot, ref),
170
171 TP_STRUCT__entry(
172 __field( u64, hva )
173 __field( u64, gfn )
174 __field( u8, referenced )
175 ),
176
177 TP_fast_assign(
178 __entry->hva = hva;
179 __entry->gfn =
180 slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT);
181 __entry->referenced = ref;
182 ),
183
184 TP_printk("hva %llx gfn %llx %s",
185 __entry->hva, __entry->gfn,
186 __entry->referenced ? "YOUNG" : "OLD")
187);
188
148#endif /* _TRACE_KVM_MAIN_H */ 189#endif /* _TRACE_KVM_MAIN_H */
149 190
150/* This part must be outside protection */ 191/* This part must be outside protection */
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index daece36c0a57..7f1178f6b839 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -12,3 +12,6 @@ config HAVE_KVM_EVENTFD
12 12
13config KVM_APIC_ARCHITECTURE 13config KVM_APIC_ARCHITECTURE
14 bool 14 bool
15
16config KVM_MMIO
17 bool
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index f73de631e3ee..057e2cca6af5 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -504,12 +504,12 @@ out:
504static int kvm_vm_ioctl_assign_device(struct kvm *kvm, 504static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
505 struct kvm_assigned_pci_dev *assigned_dev) 505 struct kvm_assigned_pci_dev *assigned_dev)
506{ 506{
507 int r = 0; 507 int r = 0, idx;
508 struct kvm_assigned_dev_kernel *match; 508 struct kvm_assigned_dev_kernel *match;
509 struct pci_dev *dev; 509 struct pci_dev *dev;
510 510
511 mutex_lock(&kvm->lock); 511 mutex_lock(&kvm->lock);
512 down_read(&kvm->slots_lock); 512 idx = srcu_read_lock(&kvm->srcu);
513 513
514 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, 514 match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
515 assigned_dev->assigned_dev_id); 515 assigned_dev->assigned_dev_id);
@@ -526,7 +526,8 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
526 r = -ENOMEM; 526 r = -ENOMEM;
527 goto out; 527 goto out;
528 } 528 }
529 dev = pci_get_bus_and_slot(assigned_dev->busnr, 529 dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
530 assigned_dev->busnr,
530 assigned_dev->devfn); 531 assigned_dev->devfn);
531 if (!dev) { 532 if (!dev) {
532 printk(KERN_INFO "%s: host device not found\n", __func__); 533 printk(KERN_INFO "%s: host device not found\n", __func__);
@@ -548,6 +549,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
548 pci_reset_function(dev); 549 pci_reset_function(dev);
549 550
550 match->assigned_dev_id = assigned_dev->assigned_dev_id; 551 match->assigned_dev_id = assigned_dev->assigned_dev_id;
552 match->host_segnr = assigned_dev->segnr;
551 match->host_busnr = assigned_dev->busnr; 553 match->host_busnr = assigned_dev->busnr;
552 match->host_devfn = assigned_dev->devfn; 554 match->host_devfn = assigned_dev->devfn;
553 match->flags = assigned_dev->flags; 555 match->flags = assigned_dev->flags;
@@ -573,7 +575,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
573 } 575 }
574 576
575out: 577out:
576 up_read(&kvm->slots_lock); 578 srcu_read_unlock(&kvm->srcu, idx);
577 mutex_unlock(&kvm->lock); 579 mutex_unlock(&kvm->lock);
578 return r; 580 return r;
579out_list_del: 581out_list_del:
@@ -585,7 +587,7 @@ out_put:
585 pci_dev_put(dev); 587 pci_dev_put(dev);
586out_free: 588out_free:
587 kfree(match); 589 kfree(match);
588 up_read(&kvm->slots_lock); 590 srcu_read_unlock(&kvm->srcu, idx);
589 mutex_unlock(&kvm->lock); 591 mutex_unlock(&kvm->lock);
590 return r; 592 return r;
591} 593}
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 04d69cd7049b..5169736377a3 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -92,41 +92,64 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = {
92int kvm_coalesced_mmio_init(struct kvm *kvm) 92int kvm_coalesced_mmio_init(struct kvm *kvm)
93{ 93{
94 struct kvm_coalesced_mmio_dev *dev; 94 struct kvm_coalesced_mmio_dev *dev;
95 struct page *page;
95 int ret; 96 int ret;
96 97
98 ret = -ENOMEM;
99 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
100 if (!page)
101 goto out_err;
102 kvm->coalesced_mmio_ring = page_address(page);
103
104 ret = -ENOMEM;
97 dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); 105 dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
98 if (!dev) 106 if (!dev)
99 return -ENOMEM; 107 goto out_free_page;
100 spin_lock_init(&dev->lock); 108 spin_lock_init(&dev->lock);
101 kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); 109 kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
102 dev->kvm = kvm; 110 dev->kvm = kvm;
103 kvm->coalesced_mmio_dev = dev; 111 kvm->coalesced_mmio_dev = dev;
104 112
105 ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev); 113 mutex_lock(&kvm->slots_lock);
114 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
115 mutex_unlock(&kvm->slots_lock);
106 if (ret < 0) 116 if (ret < 0)
107 kfree(dev); 117 goto out_free_dev;
118
119 return ret;
108 120
121out_free_dev:
122 kfree(dev);
123out_free_page:
124 __free_page(page);
125out_err:
109 return ret; 126 return ret;
110} 127}
111 128
129void kvm_coalesced_mmio_free(struct kvm *kvm)
130{
131 if (kvm->coalesced_mmio_ring)
132 free_page((unsigned long)kvm->coalesced_mmio_ring);
133}
134
112int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, 135int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
113 struct kvm_coalesced_mmio_zone *zone) 136 struct kvm_coalesced_mmio_zone *zone)
114{ 137{
115 struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; 138 struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
116 139
117 if (dev == NULL) 140 if (dev == NULL)
118 return -EINVAL; 141 return -EINVAL;
119 142
120 down_write(&kvm->slots_lock); 143 mutex_lock(&kvm->slots_lock);
121 if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { 144 if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
122 up_write(&kvm->slots_lock); 145 mutex_unlock(&kvm->slots_lock);
123 return -ENOBUFS; 146 return -ENOBUFS;
124 } 147 }
125 148
126 dev->zone[dev->nb_zones] = *zone; 149 dev->zone[dev->nb_zones] = *zone;
127 dev->nb_zones++; 150 dev->nb_zones++;
128 151
129 up_write(&kvm->slots_lock); 152 mutex_unlock(&kvm->slots_lock);
130 return 0; 153 return 0;
131} 154}
132 155
@@ -140,10 +163,10 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
140 if (dev == NULL) 163 if (dev == NULL)
141 return -EINVAL; 164 return -EINVAL;
142 165
143 down_write(&kvm->slots_lock); 166 mutex_lock(&kvm->slots_lock);
144 167
145 i = dev->nb_zones; 168 i = dev->nb_zones;
146 while(i) { 169 while (i) {
147 z = &dev->zone[i - 1]; 170 z = &dev->zone[i - 1];
148 171
149 /* unregister all zones 172 /* unregister all zones
@@ -158,7 +181,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
158 i--; 181 i--;
159 } 182 }
160 183
161 up_write(&kvm->slots_lock); 184 mutex_unlock(&kvm->slots_lock);
162 185
163 return 0; 186 return 0;
164} 187}
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h
index 4b49f27fa31e..8a5959e3535f 100644
--- a/virt/kvm/coalesced_mmio.h
+++ b/virt/kvm/coalesced_mmio.h
@@ -1,3 +1,6 @@
1#ifndef __KVM_COALESCED_MMIO_H__
2#define __KVM_COALESCED_MMIO_H__
3
1/* 4/*
2 * KVM coalesced MMIO 5 * KVM coalesced MMIO
3 * 6 *
@@ -7,6 +10,8 @@
7 * 10 *
8 */ 11 */
9 12
13#ifdef CONFIG_KVM_MMIO
14
10#define KVM_COALESCED_MMIO_ZONE_MAX 100 15#define KVM_COALESCED_MMIO_ZONE_MAX 100
11 16
12struct kvm_coalesced_mmio_dev { 17struct kvm_coalesced_mmio_dev {
@@ -18,7 +23,17 @@ struct kvm_coalesced_mmio_dev {
18}; 23};
19 24
20int kvm_coalesced_mmio_init(struct kvm *kvm); 25int kvm_coalesced_mmio_init(struct kvm *kvm);
26void kvm_coalesced_mmio_free(struct kvm *kvm);
21int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, 27int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
22 struct kvm_coalesced_mmio_zone *zone); 28 struct kvm_coalesced_mmio_zone *zone);
23int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, 29int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
24 struct kvm_coalesced_mmio_zone *zone); 30 struct kvm_coalesced_mmio_zone *zone);
31
32#else
33
34static inline int kvm_coalesced_mmio_init(struct kvm *kvm) { return 0; }
35static inline void kvm_coalesced_mmio_free(struct kvm *kvm) { }
36
37#endif
38
39#endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a9d3fc6c681c..7016319b1ec0 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -47,7 +47,6 @@ struct _irqfd {
47 int gsi; 47 int gsi;
48 struct list_head list; 48 struct list_head list;
49 poll_table pt; 49 poll_table pt;
50 wait_queue_head_t *wqh;
51 wait_queue_t wait; 50 wait_queue_t wait;
52 struct work_struct inject; 51 struct work_struct inject;
53 struct work_struct shutdown; 52 struct work_struct shutdown;
@@ -159,8 +158,6 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
159 poll_table *pt) 158 poll_table *pt)
160{ 159{
161 struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); 160 struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
162
163 irqfd->wqh = wqh;
164 add_wait_queue(wqh, &irqfd->wait); 161 add_wait_queue(wqh, &irqfd->wait);
165} 162}
166 163
@@ -463,7 +460,7 @@ static int
463kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 460kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
464{ 461{
465 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; 462 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
466 struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; 463 enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
467 struct _ioeventfd *p; 464 struct _ioeventfd *p;
468 struct eventfd_ctx *eventfd; 465 struct eventfd_ctx *eventfd;
469 int ret; 466 int ret;
@@ -508,7 +505,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
508 else 505 else
509 p->wildcard = true; 506 p->wildcard = true;
510 507
511 down_write(&kvm->slots_lock); 508 mutex_lock(&kvm->slots_lock);
512 509
513 /* Verify that there isnt a match already */ 510 /* Verify that there isnt a match already */
514 if (ioeventfd_check_collision(kvm, p)) { 511 if (ioeventfd_check_collision(kvm, p)) {
@@ -518,18 +515,18 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
518 515
519 kvm_iodevice_init(&p->dev, &ioeventfd_ops); 516 kvm_iodevice_init(&p->dev, &ioeventfd_ops);
520 517
521 ret = __kvm_io_bus_register_dev(bus, &p->dev); 518 ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
522 if (ret < 0) 519 if (ret < 0)
523 goto unlock_fail; 520 goto unlock_fail;
524 521
525 list_add_tail(&p->list, &kvm->ioeventfds); 522 list_add_tail(&p->list, &kvm->ioeventfds);
526 523
527 up_write(&kvm->slots_lock); 524 mutex_unlock(&kvm->slots_lock);
528 525
529 return 0; 526 return 0;
530 527
531unlock_fail: 528unlock_fail:
532 up_write(&kvm->slots_lock); 529 mutex_unlock(&kvm->slots_lock);
533 530
534fail: 531fail:
535 kfree(p); 532 kfree(p);
@@ -542,7 +539,7 @@ static int
542kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) 539kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
543{ 540{
544 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; 541 int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
545 struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; 542 enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
546 struct _ioeventfd *p, *tmp; 543 struct _ioeventfd *p, *tmp;
547 struct eventfd_ctx *eventfd; 544 struct eventfd_ctx *eventfd;
548 int ret = -ENOENT; 545 int ret = -ENOENT;
@@ -551,7 +548,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
551 if (IS_ERR(eventfd)) 548 if (IS_ERR(eventfd))
552 return PTR_ERR(eventfd); 549 return PTR_ERR(eventfd);
553 550
554 down_write(&kvm->slots_lock); 551 mutex_lock(&kvm->slots_lock);
555 552
556 list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { 553 list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
557 bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); 554 bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
@@ -565,13 +562,13 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
565 if (!p->wildcard && p->datamatch != args->datamatch) 562 if (!p->wildcard && p->datamatch != args->datamatch)
566 continue; 563 continue;
567 564
568 __kvm_io_bus_unregister_dev(bus, &p->dev); 565 kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
569 ioeventfd_release(p); 566 ioeventfd_release(p);
570 ret = 0; 567 ret = 0;
571 break; 568 break;
572 } 569 }
573 570
574 up_write(&kvm->slots_lock); 571 mutex_unlock(&kvm->slots_lock);
575 572
576 eventfd_ctx_put(eventfd); 573 eventfd_ctx_put(eventfd);
577 574
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 38a2d20b89de..3db15a807f80 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -100,6 +100,19 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
100 return injected; 100 return injected;
101} 101}
102 102
103static void update_handled_vectors(struct kvm_ioapic *ioapic)
104{
105 DECLARE_BITMAP(handled_vectors, 256);
106 int i;
107
108 memset(handled_vectors, 0, sizeof(handled_vectors));
109 for (i = 0; i < IOAPIC_NUM_PINS; ++i)
110 __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
111 memcpy(ioapic->handled_vectors, handled_vectors,
112 sizeof(handled_vectors));
113 smp_wmb();
114}
115
103static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) 116static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
104{ 117{
105 unsigned index; 118 unsigned index;
@@ -134,6 +147,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
134 e->bits |= (u32) val; 147 e->bits |= (u32) val;
135 e->fields.remote_irr = 0; 148 e->fields.remote_irr = 0;
136 } 149 }
150 update_handled_vectors(ioapic);
137 mask_after = e->fields.mask; 151 mask_after = e->fields.mask;
138 if (mask_before != mask_after) 152 if (mask_before != mask_after)
139 kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); 153 kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
@@ -241,6 +255,9 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
241{ 255{
242 struct kvm_ioapic *ioapic = kvm->arch.vioapic; 256 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
243 257
258 smp_rmb();
259 if (!test_bit(vector, ioapic->handled_vectors))
260 return;
244 mutex_lock(&ioapic->lock); 261 mutex_lock(&ioapic->lock);
245 __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); 262 __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
246 mutex_unlock(&ioapic->lock); 263 mutex_unlock(&ioapic->lock);
@@ -352,6 +369,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
352 ioapic->ioregsel = 0; 369 ioapic->ioregsel = 0;
353 ioapic->irr = 0; 370 ioapic->irr = 0;
354 ioapic->id = 0; 371 ioapic->id = 0;
372 update_handled_vectors(ioapic);
355} 373}
356 374
357static const struct kvm_io_device_ops ioapic_mmio_ops = { 375static const struct kvm_io_device_ops ioapic_mmio_ops = {
@@ -372,13 +390,28 @@ int kvm_ioapic_init(struct kvm *kvm)
372 kvm_ioapic_reset(ioapic); 390 kvm_ioapic_reset(ioapic);
373 kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); 391 kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
374 ioapic->kvm = kvm; 392 ioapic->kvm = kvm;
375 ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev); 393 mutex_lock(&kvm->slots_lock);
376 if (ret < 0) 394 ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
395 mutex_unlock(&kvm->slots_lock);
396 if (ret < 0) {
397 kvm->arch.vioapic = NULL;
377 kfree(ioapic); 398 kfree(ioapic);
399 }
378 400
379 return ret; 401 return ret;
380} 402}
381 403
404void kvm_ioapic_destroy(struct kvm *kvm)
405{
406 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
407
408 if (ioapic) {
409 kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
410 kvm->arch.vioapic = NULL;
411 kfree(ioapic);
412 }
413}
414
382int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) 415int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
383{ 416{
384 struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); 417 struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
@@ -399,6 +432,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
399 432
400 mutex_lock(&ioapic->lock); 433 mutex_lock(&ioapic->lock);
401 memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); 434 memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
435 update_handled_vectors(ioapic);
402 mutex_unlock(&ioapic->lock); 436 mutex_unlock(&ioapic->lock);
403 return 0; 437 return 0;
404} 438}
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 419c43b667ab..8a751b78a430 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -46,6 +46,7 @@ struct kvm_ioapic {
46 struct kvm *kvm; 46 struct kvm *kvm;
47 void (*ack_notifier)(void *opaque, int irq); 47 void (*ack_notifier)(void *opaque, int irq);
48 struct mutex lock; 48 struct mutex lock;
49 DECLARE_BITMAP(handled_vectors, 256);
49}; 50};
50 51
51#ifdef DEBUG 52#ifdef DEBUG
@@ -71,6 +72,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
71int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); 72int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
72void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); 73void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
73int kvm_ioapic_init(struct kvm *kvm); 74int kvm_ioapic_init(struct kvm *kvm);
75void kvm_ioapic_destroy(struct kvm *kvm);
74int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); 76int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
75void kvm_ioapic_reset(struct kvm_ioapic *ioapic); 77void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
76int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, 78int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 15147583abd1..80fd3ad3b2de 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -32,10 +32,10 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
32static void kvm_iommu_put_pages(struct kvm *kvm, 32static void kvm_iommu_put_pages(struct kvm *kvm,
33 gfn_t base_gfn, unsigned long npages); 33 gfn_t base_gfn, unsigned long npages);
34 34
35int kvm_iommu_map_pages(struct kvm *kvm, 35int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
36 gfn_t base_gfn, unsigned long npages)
37{ 36{
38 gfn_t gfn = base_gfn; 37 gfn_t gfn = slot->base_gfn;
38 unsigned long npages = slot->npages;
39 pfn_t pfn; 39 pfn_t pfn;
40 int i, r = 0; 40 int i, r = 0;
41 struct iommu_domain *domain = kvm->arch.iommu_domain; 41 struct iommu_domain *domain = kvm->arch.iommu_domain;
@@ -54,7 +54,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
54 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) 54 if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
55 continue; 55 continue;
56 56
57 pfn = gfn_to_pfn(kvm, gfn); 57 pfn = gfn_to_pfn_memslot(kvm, slot, gfn);
58 r = iommu_map_range(domain, 58 r = iommu_map_range(domain,
59 gfn_to_gpa(gfn), 59 gfn_to_gpa(gfn),
60 pfn_to_hpa(pfn), 60 pfn_to_hpa(pfn),
@@ -69,17 +69,19 @@ int kvm_iommu_map_pages(struct kvm *kvm,
69 return 0; 69 return 0;
70 70
71unmap_pages: 71unmap_pages:
72 kvm_iommu_put_pages(kvm, base_gfn, i); 72 kvm_iommu_put_pages(kvm, slot->base_gfn, i);
73 return r; 73 return r;
74} 74}
75 75
76static int kvm_iommu_map_memslots(struct kvm *kvm) 76static int kvm_iommu_map_memslots(struct kvm *kvm)
77{ 77{
78 int i, r = 0; 78 int i, r = 0;
79 struct kvm_memslots *slots;
80
81 slots = rcu_dereference(kvm->memslots);
79 82
80 for (i = 0; i < kvm->nmemslots; i++) { 83 for (i = 0; i < slots->nmemslots; i++) {
81 r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, 84 r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
82 kvm->memslots[i].npages);
83 if (r) 85 if (r)
84 break; 86 break;
85 } 87 }
@@ -104,7 +106,8 @@ int kvm_assign_device(struct kvm *kvm,
104 106
105 r = iommu_attach_device(domain, &pdev->dev); 107 r = iommu_attach_device(domain, &pdev->dev);
106 if (r) { 108 if (r) {
107 printk(KERN_ERR "assign device %x:%x.%x failed", 109 printk(KERN_ERR "assign device %x:%x:%x.%x failed",
110 pci_domain_nr(pdev->bus),
108 pdev->bus->number, 111 pdev->bus->number,
109 PCI_SLOT(pdev->devfn), 112 PCI_SLOT(pdev->devfn),
110 PCI_FUNC(pdev->devfn)); 113 PCI_FUNC(pdev->devfn));
@@ -125,7 +128,8 @@ int kvm_assign_device(struct kvm *kvm,
125 goto out_unmap; 128 goto out_unmap;
126 } 129 }
127 130
128 printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", 131 printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
132 assigned_dev->host_segnr,
129 assigned_dev->host_busnr, 133 assigned_dev->host_busnr,
130 PCI_SLOT(assigned_dev->host_devfn), 134 PCI_SLOT(assigned_dev->host_devfn),
131 PCI_FUNC(assigned_dev->host_devfn)); 135 PCI_FUNC(assigned_dev->host_devfn));
@@ -152,7 +156,8 @@ int kvm_deassign_device(struct kvm *kvm,
152 156
153 iommu_detach_device(domain, &pdev->dev); 157 iommu_detach_device(domain, &pdev->dev);
154 158
155 printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n", 159 printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
160 assigned_dev->host_segnr,
156 assigned_dev->host_busnr, 161 assigned_dev->host_busnr,
157 PCI_SLOT(assigned_dev->host_devfn), 162 PCI_SLOT(assigned_dev->host_devfn),
158 PCI_FUNC(assigned_dev->host_devfn)); 163 PCI_FUNC(assigned_dev->host_devfn));
@@ -210,10 +215,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
210static int kvm_iommu_unmap_memslots(struct kvm *kvm) 215static int kvm_iommu_unmap_memslots(struct kvm *kvm)
211{ 216{
212 int i; 217 int i;
218 struct kvm_memslots *slots;
219
220 slots = rcu_dereference(kvm->memslots);
213 221
214 for (i = 0; i < kvm->nmemslots; i++) { 222 for (i = 0; i < slots->nmemslots; i++) {
215 kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, 223 kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
216 kvm->memslots[i].npages); 224 slots->memslots[i].npages);
217 } 225 }
218 226
219 return 0; 227 return 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a944be392d6e..548f9253c195 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -44,6 +44,8 @@
44#include <linux/bitops.h> 44#include <linux/bitops.h>
45#include <linux/spinlock.h> 45#include <linux/spinlock.h>
46#include <linux/compat.h> 46#include <linux/compat.h>
47#include <linux/srcu.h>
48#include <linux/hugetlb.h>
47 49
48#include <asm/processor.h> 50#include <asm/processor.h>
49#include <asm/io.h> 51#include <asm/io.h>
@@ -51,9 +53,7 @@
51#include <asm/pgtable.h> 53#include <asm/pgtable.h>
52#include <asm-generic/bitops/le.h> 54#include <asm-generic/bitops/le.h>
53 55
54#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
55#include "coalesced_mmio.h" 56#include "coalesced_mmio.h"
56#endif
57 57
58#define CREATE_TRACE_POINTS 58#define CREATE_TRACE_POINTS
59#include <trace/events/kvm.h> 59#include <trace/events/kvm.h>
@@ -86,6 +86,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
86static int hardware_enable_all(void); 86static int hardware_enable_all(void);
87static void hardware_disable_all(void); 87static void hardware_disable_all(void);
88 88
89static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
90
89static bool kvm_rebooting; 91static bool kvm_rebooting;
90 92
91static bool largepages_enabled = true; 93static bool largepages_enabled = true;
@@ -136,7 +138,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
136 138
137 zalloc_cpumask_var(&cpus, GFP_ATOMIC); 139 zalloc_cpumask_var(&cpus, GFP_ATOMIC);
138 140
139 spin_lock(&kvm->requests_lock); 141 raw_spin_lock(&kvm->requests_lock);
140 me = smp_processor_id(); 142 me = smp_processor_id();
141 kvm_for_each_vcpu(i, vcpu, kvm) { 143 kvm_for_each_vcpu(i, vcpu, kvm) {
142 if (test_and_set_bit(req, &vcpu->requests)) 144 if (test_and_set_bit(req, &vcpu->requests))
@@ -151,7 +153,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
151 smp_call_function_many(cpus, ack_flush, NULL, 1); 153 smp_call_function_many(cpus, ack_flush, NULL, 1);
152 else 154 else
153 called = false; 155 called = false;
154 spin_unlock(&kvm->requests_lock); 156 raw_spin_unlock(&kvm->requests_lock);
155 free_cpumask_var(cpus); 157 free_cpumask_var(cpus);
156 return called; 158 return called;
157} 159}
@@ -215,7 +217,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
215 unsigned long address) 217 unsigned long address)
216{ 218{
217 struct kvm *kvm = mmu_notifier_to_kvm(mn); 219 struct kvm *kvm = mmu_notifier_to_kvm(mn);
218 int need_tlb_flush; 220 int need_tlb_flush, idx;
219 221
220 /* 222 /*
221 * When ->invalidate_page runs, the linux pte has been zapped 223 * When ->invalidate_page runs, the linux pte has been zapped
@@ -235,10 +237,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
235 * pte after kvm_unmap_hva returned, without noticing the page 237 * pte after kvm_unmap_hva returned, without noticing the page
236 * is going to be freed. 238 * is going to be freed.
237 */ 239 */
240 idx = srcu_read_lock(&kvm->srcu);
238 spin_lock(&kvm->mmu_lock); 241 spin_lock(&kvm->mmu_lock);
239 kvm->mmu_notifier_seq++; 242 kvm->mmu_notifier_seq++;
240 need_tlb_flush = kvm_unmap_hva(kvm, address); 243 need_tlb_flush = kvm_unmap_hva(kvm, address);
241 spin_unlock(&kvm->mmu_lock); 244 spin_unlock(&kvm->mmu_lock);
245 srcu_read_unlock(&kvm->srcu, idx);
242 246
243 /* we've to flush the tlb before the pages can be freed */ 247 /* we've to flush the tlb before the pages can be freed */
244 if (need_tlb_flush) 248 if (need_tlb_flush)
@@ -252,11 +256,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
252 pte_t pte) 256 pte_t pte)
253{ 257{
254 struct kvm *kvm = mmu_notifier_to_kvm(mn); 258 struct kvm *kvm = mmu_notifier_to_kvm(mn);
259 int idx;
255 260
261 idx = srcu_read_lock(&kvm->srcu);
256 spin_lock(&kvm->mmu_lock); 262 spin_lock(&kvm->mmu_lock);
257 kvm->mmu_notifier_seq++; 263 kvm->mmu_notifier_seq++;
258 kvm_set_spte_hva(kvm, address, pte); 264 kvm_set_spte_hva(kvm, address, pte);
259 spin_unlock(&kvm->mmu_lock); 265 spin_unlock(&kvm->mmu_lock);
266 srcu_read_unlock(&kvm->srcu, idx);
260} 267}
261 268
262static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, 269static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
@@ -265,8 +272,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
265 unsigned long end) 272 unsigned long end)
266{ 273{
267 struct kvm *kvm = mmu_notifier_to_kvm(mn); 274 struct kvm *kvm = mmu_notifier_to_kvm(mn);
268 int need_tlb_flush = 0; 275 int need_tlb_flush = 0, idx;
269 276
277 idx = srcu_read_lock(&kvm->srcu);
270 spin_lock(&kvm->mmu_lock); 278 spin_lock(&kvm->mmu_lock);
271 /* 279 /*
272 * The count increase must become visible at unlock time as no 280 * The count increase must become visible at unlock time as no
@@ -277,6 +285,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
277 for (; start < end; start += PAGE_SIZE) 285 for (; start < end; start += PAGE_SIZE)
278 need_tlb_flush |= kvm_unmap_hva(kvm, start); 286 need_tlb_flush |= kvm_unmap_hva(kvm, start);
279 spin_unlock(&kvm->mmu_lock); 287 spin_unlock(&kvm->mmu_lock);
288 srcu_read_unlock(&kvm->srcu, idx);
280 289
281 /* we've to flush the tlb before the pages can be freed */ 290 /* we've to flush the tlb before the pages can be freed */
282 if (need_tlb_flush) 291 if (need_tlb_flush)
@@ -314,11 +323,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
314 unsigned long address) 323 unsigned long address)
315{ 324{
316 struct kvm *kvm = mmu_notifier_to_kvm(mn); 325 struct kvm *kvm = mmu_notifier_to_kvm(mn);
317 int young; 326 int young, idx;
318 327
328 idx = srcu_read_lock(&kvm->srcu);
319 spin_lock(&kvm->mmu_lock); 329 spin_lock(&kvm->mmu_lock);
320 young = kvm_age_hva(kvm, address); 330 young = kvm_age_hva(kvm, address);
321 spin_unlock(&kvm->mmu_lock); 331 spin_unlock(&kvm->mmu_lock);
332 srcu_read_unlock(&kvm->srcu, idx);
322 333
323 if (young) 334 if (young)
324 kvm_flush_remote_tlbs(kvm); 335 kvm_flush_remote_tlbs(kvm);
@@ -341,15 +352,26 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
341 .change_pte = kvm_mmu_notifier_change_pte, 352 .change_pte = kvm_mmu_notifier_change_pte,
342 .release = kvm_mmu_notifier_release, 353 .release = kvm_mmu_notifier_release,
343}; 354};
355
356static int kvm_init_mmu_notifier(struct kvm *kvm)
357{
358 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
359 return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
360}
361
362#else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
363
364static int kvm_init_mmu_notifier(struct kvm *kvm)
365{
366 return 0;
367}
368
344#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ 369#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
345 370
346static struct kvm *kvm_create_vm(void) 371static struct kvm *kvm_create_vm(void)
347{ 372{
348 int r = 0; 373 int r = 0, i;
349 struct kvm *kvm = kvm_arch_create_vm(); 374 struct kvm *kvm = kvm_arch_create_vm();
350#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
351 struct page *page;
352#endif
353 375
354 if (IS_ERR(kvm)) 376 if (IS_ERR(kvm))
355 goto out; 377 goto out;
@@ -363,39 +385,35 @@ static struct kvm *kvm_create_vm(void)
363 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); 385 INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
364#endif 386#endif
365 387
366#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 388 r = -ENOMEM;
367 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 389 kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
368 if (!page) { 390 if (!kvm->memslots)
369 r = -ENOMEM;
370 goto out_err; 391 goto out_err;
371 } 392 if (init_srcu_struct(&kvm->srcu))
372 kvm->coalesced_mmio_ring = 393 goto out_err;
373 (struct kvm_coalesced_mmio_ring *)page_address(page); 394 for (i = 0; i < KVM_NR_BUSES; i++) {
374#endif 395 kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
375 396 GFP_KERNEL);
376#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 397 if (!kvm->buses[i]) {
377 { 398 cleanup_srcu_struct(&kvm->srcu);
378 kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
379 r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
380 if (r) {
381#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
382 put_page(page);
383#endif
384 goto out_err; 399 goto out_err;
385 } 400 }
386 } 401 }
387#endif 402
403 r = kvm_init_mmu_notifier(kvm);
404 if (r) {
405 cleanup_srcu_struct(&kvm->srcu);
406 goto out_err;
407 }
388 408
389 kvm->mm = current->mm; 409 kvm->mm = current->mm;
390 atomic_inc(&kvm->mm->mm_count); 410 atomic_inc(&kvm->mm->mm_count);
391 spin_lock_init(&kvm->mmu_lock); 411 spin_lock_init(&kvm->mmu_lock);
392 spin_lock_init(&kvm->requests_lock); 412 raw_spin_lock_init(&kvm->requests_lock);
393 kvm_io_bus_init(&kvm->pio_bus);
394 kvm_eventfd_init(kvm); 413 kvm_eventfd_init(kvm);
395 mutex_init(&kvm->lock); 414 mutex_init(&kvm->lock);
396 mutex_init(&kvm->irq_lock); 415 mutex_init(&kvm->irq_lock);
397 kvm_io_bus_init(&kvm->mmio_bus); 416 mutex_init(&kvm->slots_lock);
398 init_rwsem(&kvm->slots_lock);
399 atomic_set(&kvm->users_count, 1); 417 atomic_set(&kvm->users_count, 1);
400 spin_lock(&kvm_lock); 418 spin_lock(&kvm_lock);
401 list_add(&kvm->vm_list, &vm_list); 419 list_add(&kvm->vm_list, &vm_list);
@@ -406,12 +424,12 @@ static struct kvm *kvm_create_vm(void)
406out: 424out:
407 return kvm; 425 return kvm;
408 426
409#if defined(KVM_COALESCED_MMIO_PAGE_OFFSET) || \
410 (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER))
411out_err: 427out_err:
412 hardware_disable_all(); 428 hardware_disable_all();
413#endif
414out_err_nodisable: 429out_err_nodisable:
430 for (i = 0; i < KVM_NR_BUSES; i++)
431 kfree(kvm->buses[i]);
432 kfree(kvm->memslots);
415 kfree(kvm); 433 kfree(kvm);
416 return ERR_PTR(r); 434 return ERR_PTR(r);
417} 435}
@@ -446,13 +464,17 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
446void kvm_free_physmem(struct kvm *kvm) 464void kvm_free_physmem(struct kvm *kvm)
447{ 465{
448 int i; 466 int i;
467 struct kvm_memslots *slots = kvm->memslots;
468
469 for (i = 0; i < slots->nmemslots; ++i)
470 kvm_free_physmem_slot(&slots->memslots[i], NULL);
449 471
450 for (i = 0; i < kvm->nmemslots; ++i) 472 kfree(kvm->memslots);
451 kvm_free_physmem_slot(&kvm->memslots[i], NULL);
452} 473}
453 474
454static void kvm_destroy_vm(struct kvm *kvm) 475static void kvm_destroy_vm(struct kvm *kvm)
455{ 476{
477 int i;
456 struct mm_struct *mm = kvm->mm; 478 struct mm_struct *mm = kvm->mm;
457 479
458 kvm_arch_sync_events(kvm); 480 kvm_arch_sync_events(kvm);
@@ -460,12 +482,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
460 list_del(&kvm->vm_list); 482 list_del(&kvm->vm_list);
461 spin_unlock(&kvm_lock); 483 spin_unlock(&kvm_lock);
462 kvm_free_irq_routing(kvm); 484 kvm_free_irq_routing(kvm);
463 kvm_io_bus_destroy(&kvm->pio_bus); 485 for (i = 0; i < KVM_NR_BUSES; i++)
464 kvm_io_bus_destroy(&kvm->mmio_bus); 486 kvm_io_bus_destroy(kvm->buses[i]);
465#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET 487 kvm_coalesced_mmio_free(kvm);
466 if (kvm->coalesced_mmio_ring != NULL)
467 free_page((unsigned long)kvm->coalesced_mmio_ring);
468#endif
469#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) 488#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
470 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); 489 mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
471#else 490#else
@@ -512,12 +531,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
512 struct kvm_userspace_memory_region *mem, 531 struct kvm_userspace_memory_region *mem,
513 int user_alloc) 532 int user_alloc)
514{ 533{
515 int r; 534 int r, flush_shadow = 0;
516 gfn_t base_gfn; 535 gfn_t base_gfn;
517 unsigned long npages; 536 unsigned long npages;
518 unsigned long i; 537 unsigned long i;
519 struct kvm_memory_slot *memslot; 538 struct kvm_memory_slot *memslot;
520 struct kvm_memory_slot old, new; 539 struct kvm_memory_slot old, new;
540 struct kvm_memslots *slots, *old_memslots;
521 541
522 r = -EINVAL; 542 r = -EINVAL;
523 /* General sanity checks */ 543 /* General sanity checks */
@@ -532,7 +552,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
532 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) 552 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
533 goto out; 553 goto out;
534 554
535 memslot = &kvm->memslots[mem->slot]; 555 memslot = &kvm->memslots->memslots[mem->slot];
536 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; 556 base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
537 npages = mem->memory_size >> PAGE_SHIFT; 557 npages = mem->memory_size >> PAGE_SHIFT;
538 558
@@ -553,7 +573,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
553 /* Check for overlaps */ 573 /* Check for overlaps */
554 r = -EEXIST; 574 r = -EEXIST;
555 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 575 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
556 struct kvm_memory_slot *s = &kvm->memslots[i]; 576 struct kvm_memory_slot *s = &kvm->memslots->memslots[i];
557 577
558 if (s == memslot || !s->npages) 578 if (s == memslot || !s->npages)
559 continue; 579 continue;
@@ -579,15 +599,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
579 memset(new.rmap, 0, npages * sizeof(*new.rmap)); 599 memset(new.rmap, 0, npages * sizeof(*new.rmap));
580 600
581 new.user_alloc = user_alloc; 601 new.user_alloc = user_alloc;
582 /* 602 new.userspace_addr = mem->userspace_addr;
583 * hva_to_rmmap() serialzies with the mmu_lock and to be
584 * safe it has to ignore memslots with !user_alloc &&
585 * !userspace_addr.
586 */
587 if (user_alloc)
588 new.userspace_addr = mem->userspace_addr;
589 else
590 new.userspace_addr = 0;
591 } 603 }
592 if (!npages) 604 if (!npages)
593 goto skip_lpage; 605 goto skip_lpage;
@@ -642,8 +654,9 @@ skip_lpage:
642 if (!new.dirty_bitmap) 654 if (!new.dirty_bitmap)
643 goto out_free; 655 goto out_free;
644 memset(new.dirty_bitmap, 0, dirty_bytes); 656 memset(new.dirty_bitmap, 0, dirty_bytes);
657 /* destroy any largepage mappings for dirty tracking */
645 if (old.npages) 658 if (old.npages)
646 kvm_arch_flush_shadow(kvm); 659 flush_shadow = 1;
647 } 660 }
648#else /* not defined CONFIG_S390 */ 661#else /* not defined CONFIG_S390 */
649 new.user_alloc = user_alloc; 662 new.user_alloc = user_alloc;
@@ -651,36 +664,72 @@ skip_lpage:
651 new.userspace_addr = mem->userspace_addr; 664 new.userspace_addr = mem->userspace_addr;
652#endif /* not defined CONFIG_S390 */ 665#endif /* not defined CONFIG_S390 */
653 666
654 if (!npages) 667 if (!npages) {
668 r = -ENOMEM;
669 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
670 if (!slots)
671 goto out_free;
672 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
673 if (mem->slot >= slots->nmemslots)
674 slots->nmemslots = mem->slot + 1;
675 slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
676
677 old_memslots = kvm->memslots;
678 rcu_assign_pointer(kvm->memslots, slots);
679 synchronize_srcu_expedited(&kvm->srcu);
680 /* From this point no new shadow pages pointing to a deleted
681 * memslot will be created.
682 *
683 * validation of sp->gfn happens in:
684 * - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
685 * - kvm_is_visible_gfn (mmu_check_roots)
686 */
655 kvm_arch_flush_shadow(kvm); 687 kvm_arch_flush_shadow(kvm);
688 kfree(old_memslots);
689 }
656 690
657 spin_lock(&kvm->mmu_lock); 691 r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
658 if (mem->slot >= kvm->nmemslots) 692 if (r)
659 kvm->nmemslots = mem->slot + 1;
660
661 *memslot = new;
662 spin_unlock(&kvm->mmu_lock);
663
664 r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
665 if (r) {
666 spin_lock(&kvm->mmu_lock);
667 *memslot = old;
668 spin_unlock(&kvm->mmu_lock);
669 goto out_free; 693 goto out_free;
670 }
671 694
672 kvm_free_physmem_slot(&old, npages ? &new : NULL);
673 /* Slot deletion case: we have to update the current slot */
674 spin_lock(&kvm->mmu_lock);
675 if (!npages)
676 *memslot = old;
677 spin_unlock(&kvm->mmu_lock);
678#ifdef CONFIG_DMAR 695#ifdef CONFIG_DMAR
679 /* map the pages in iommu page table */ 696 /* map the pages in iommu page table */
680 r = kvm_iommu_map_pages(kvm, base_gfn, npages); 697 if (npages) {
681 if (r) 698 r = kvm_iommu_map_pages(kvm, &new);
682 goto out; 699 if (r)
700 goto out_free;
701 }
683#endif 702#endif
703
704 r = -ENOMEM;
705 slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
706 if (!slots)
707 goto out_free;
708 memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
709 if (mem->slot >= slots->nmemslots)
710 slots->nmemslots = mem->slot + 1;
711
712 /* actual memory is freed via old in kvm_free_physmem_slot below */
713 if (!npages) {
714 new.rmap = NULL;
715 new.dirty_bitmap = NULL;
716 for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
717 new.lpage_info[i] = NULL;
718 }
719
720 slots->memslots[mem->slot] = new;
721 old_memslots = kvm->memslots;
722 rcu_assign_pointer(kvm->memslots, slots);
723 synchronize_srcu_expedited(&kvm->srcu);
724
725 kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
726
727 kvm_free_physmem_slot(&old, &new);
728 kfree(old_memslots);
729
730 if (flush_shadow)
731 kvm_arch_flush_shadow(kvm);
732
684 return 0; 733 return 0;
685 734
686out_free: 735out_free:
@@ -697,9 +746,9 @@ int kvm_set_memory_region(struct kvm *kvm,
697{ 746{
698 int r; 747 int r;
699 748
700 down_write(&kvm->slots_lock); 749 mutex_lock(&kvm->slots_lock);
701 r = __kvm_set_memory_region(kvm, mem, user_alloc); 750 r = __kvm_set_memory_region(kvm, mem, user_alloc);
702 up_write(&kvm->slots_lock); 751 mutex_unlock(&kvm->slots_lock);
703 return r; 752 return r;
704} 753}
705EXPORT_SYMBOL_GPL(kvm_set_memory_region); 754EXPORT_SYMBOL_GPL(kvm_set_memory_region);
@@ -726,7 +775,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
726 if (log->slot >= KVM_MEMORY_SLOTS) 775 if (log->slot >= KVM_MEMORY_SLOTS)
727 goto out; 776 goto out;
728 777
729 memslot = &kvm->memslots[log->slot]; 778 memslot = &kvm->memslots->memslots[log->slot];
730 r = -ENOENT; 779 r = -ENOENT;
731 if (!memslot->dirty_bitmap) 780 if (!memslot->dirty_bitmap)
732 goto out; 781 goto out;
@@ -780,9 +829,10 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
780struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) 829struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
781{ 830{
782 int i; 831 int i;
832 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
783 833
784 for (i = 0; i < kvm->nmemslots; ++i) { 834 for (i = 0; i < slots->nmemslots; ++i) {
785 struct kvm_memory_slot *memslot = &kvm->memslots[i]; 835 struct kvm_memory_slot *memslot = &slots->memslots[i];
786 836
787 if (gfn >= memslot->base_gfn 837 if (gfn >= memslot->base_gfn
788 && gfn < memslot->base_gfn + memslot->npages) 838 && gfn < memslot->base_gfn + memslot->npages)
@@ -801,10 +851,14 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
801int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) 851int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
802{ 852{
803 int i; 853 int i;
854 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
804 855
805 gfn = unalias_gfn(kvm, gfn); 856 gfn = unalias_gfn_instantiation(kvm, gfn);
806 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { 857 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
807 struct kvm_memory_slot *memslot = &kvm->memslots[i]; 858 struct kvm_memory_slot *memslot = &slots->memslots[i];
859
860 if (memslot->flags & KVM_MEMSLOT_INVALID)
861 continue;
808 862
809 if (gfn >= memslot->base_gfn 863 if (gfn >= memslot->base_gfn
810 && gfn < memslot->base_gfn + memslot->npages) 864 && gfn < memslot->base_gfn + memslot->npages)
@@ -814,33 +868,68 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
814} 868}
815EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); 869EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
816 870
871unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
872{
873 struct vm_area_struct *vma;
874 unsigned long addr, size;
875
876 size = PAGE_SIZE;
877
878 addr = gfn_to_hva(kvm, gfn);
879 if (kvm_is_error_hva(addr))
880 return PAGE_SIZE;
881
882 down_read(&current->mm->mmap_sem);
883 vma = find_vma(current->mm, addr);
884 if (!vma)
885 goto out;
886
887 size = vma_kernel_pagesize(vma);
888
889out:
890 up_read(&current->mm->mmap_sem);
891
892 return size;
893}
894
895int memslot_id(struct kvm *kvm, gfn_t gfn)
896{
897 int i;
898 struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
899 struct kvm_memory_slot *memslot = NULL;
900
901 gfn = unalias_gfn(kvm, gfn);
902 for (i = 0; i < slots->nmemslots; ++i) {
903 memslot = &slots->memslots[i];
904
905 if (gfn >= memslot->base_gfn
906 && gfn < memslot->base_gfn + memslot->npages)
907 break;
908 }
909
910 return memslot - slots->memslots;
911}
912
817unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) 913unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
818{ 914{
819 struct kvm_memory_slot *slot; 915 struct kvm_memory_slot *slot;
820 916
821 gfn = unalias_gfn(kvm, gfn); 917 gfn = unalias_gfn_instantiation(kvm, gfn);
822 slot = gfn_to_memslot_unaliased(kvm, gfn); 918 slot = gfn_to_memslot_unaliased(kvm, gfn);
823 if (!slot) 919 if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
824 return bad_hva(); 920 return bad_hva();
825 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); 921 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
826} 922}
827EXPORT_SYMBOL_GPL(gfn_to_hva); 923EXPORT_SYMBOL_GPL(gfn_to_hva);
828 924
829pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) 925static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
830{ 926{
831 struct page *page[1]; 927 struct page *page[1];
832 unsigned long addr;
833 int npages; 928 int npages;
834 pfn_t pfn; 929 pfn_t pfn;
835 930
836 might_sleep(); 931 might_sleep();
837 932
838 addr = gfn_to_hva(kvm, gfn);
839 if (kvm_is_error_hva(addr)) {
840 get_page(bad_page);
841 return page_to_pfn(bad_page);
842 }
843
844 npages = get_user_pages_fast(addr, 1, 1, page); 933 npages = get_user_pages_fast(addr, 1, 1, page);
845 934
846 if (unlikely(npages != 1)) { 935 if (unlikely(npages != 1)) {
@@ -865,8 +954,32 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
865 return pfn; 954 return pfn;
866} 955}
867 956
957pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
958{
959 unsigned long addr;
960
961 addr = gfn_to_hva(kvm, gfn);
962 if (kvm_is_error_hva(addr)) {
963 get_page(bad_page);
964 return page_to_pfn(bad_page);
965 }
966
967 return hva_to_pfn(kvm, addr);
968}
868EXPORT_SYMBOL_GPL(gfn_to_pfn); 969EXPORT_SYMBOL_GPL(gfn_to_pfn);
869 970
971static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
972{
973 return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
974}
975
976pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
977 struct kvm_memory_slot *slot, gfn_t gfn)
978{
979 unsigned long addr = gfn_to_hva_memslot(slot, gfn);
980 return hva_to_pfn(kvm, addr);
981}
982
870struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) 983struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
871{ 984{
872 pfn_t pfn; 985 pfn_t pfn;
@@ -1854,12 +1967,7 @@ static struct notifier_block kvm_reboot_notifier = {
1854 .priority = 0, 1967 .priority = 0,
1855}; 1968};
1856 1969
1857void kvm_io_bus_init(struct kvm_io_bus *bus) 1970static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
1858{
1859 memset(bus, 0, sizeof(*bus));
1860}
1861
1862void kvm_io_bus_destroy(struct kvm_io_bus *bus)
1863{ 1971{
1864 int i; 1972 int i;
1865 1973
@@ -1868,13 +1976,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
1868 1976
1869 kvm_iodevice_destructor(pos); 1977 kvm_iodevice_destructor(pos);
1870 } 1978 }
1979 kfree(bus);
1871} 1980}
1872 1981
1873/* kvm_io_bus_write - called under kvm->slots_lock */ 1982/* kvm_io_bus_write - called under kvm->slots_lock */
1874int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, 1983int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
1875 int len, const void *val) 1984 int len, const void *val)
1876{ 1985{
1877 int i; 1986 int i;
1987 struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
1878 for (i = 0; i < bus->dev_count; i++) 1988 for (i = 0; i < bus->dev_count; i++)
1879 if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) 1989 if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
1880 return 0; 1990 return 0;
@@ -1882,59 +1992,71 @@ int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
1882} 1992}
1883 1993
1884/* kvm_io_bus_read - called under kvm->slots_lock */ 1994/* kvm_io_bus_read - called under kvm->slots_lock */
1885int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val) 1995int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
1996 int len, void *val)
1886{ 1997{
1887 int i; 1998 int i;
1999 struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
2000
1888 for (i = 0; i < bus->dev_count; i++) 2001 for (i = 0; i < bus->dev_count; i++)
1889 if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) 2002 if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
1890 return 0; 2003 return 0;
1891 return -EOPNOTSUPP; 2004 return -EOPNOTSUPP;
1892} 2005}
1893 2006
1894int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, 2007/* Caller must hold slots_lock. */
1895 struct kvm_io_device *dev) 2008int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
2009 struct kvm_io_device *dev)
1896{ 2010{
1897 int ret; 2011 struct kvm_io_bus *new_bus, *bus;
1898
1899 down_write(&kvm->slots_lock);
1900 ret = __kvm_io_bus_register_dev(bus, dev);
1901 up_write(&kvm->slots_lock);
1902 2012
1903 return ret; 2013 bus = kvm->buses[bus_idx];
1904}
1905
1906/* An unlocked version. Caller must have write lock on slots_lock. */
1907int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
1908 struct kvm_io_device *dev)
1909{
1910 if (bus->dev_count > NR_IOBUS_DEVS-1) 2014 if (bus->dev_count > NR_IOBUS_DEVS-1)
1911 return -ENOSPC; 2015 return -ENOSPC;
1912 2016
1913 bus->devs[bus->dev_count++] = dev; 2017 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
2018 if (!new_bus)
2019 return -ENOMEM;
2020 memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
2021 new_bus->devs[new_bus->dev_count++] = dev;
2022 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
2023 synchronize_srcu_expedited(&kvm->srcu);
2024 kfree(bus);
1914 2025
1915 return 0; 2026 return 0;
1916} 2027}
1917 2028
1918void kvm_io_bus_unregister_dev(struct kvm *kvm, 2029/* Caller must hold slots_lock. */
1919 struct kvm_io_bus *bus, 2030int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
1920 struct kvm_io_device *dev) 2031 struct kvm_io_device *dev)
1921{ 2032{
1922 down_write(&kvm->slots_lock); 2033 int i, r;
1923 __kvm_io_bus_unregister_dev(bus, dev); 2034 struct kvm_io_bus *new_bus, *bus;
1924 up_write(&kvm->slots_lock);
1925}
1926 2035
1927/* An unlocked version. Caller must have write lock on slots_lock. */ 2036 new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
1928void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, 2037 if (!new_bus)
1929 struct kvm_io_device *dev) 2038 return -ENOMEM;
1930{
1931 int i;
1932 2039
1933 for (i = 0; i < bus->dev_count; i++) 2040 bus = kvm->buses[bus_idx];
1934 if (bus->devs[i] == dev) { 2041 memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
1935 bus->devs[i] = bus->devs[--bus->dev_count]; 2042
2043 r = -ENOENT;
2044 for (i = 0; i < new_bus->dev_count; i++)
2045 if (new_bus->devs[i] == dev) {
2046 r = 0;
2047 new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
1936 break; 2048 break;
1937 } 2049 }
2050
2051 if (r) {
2052 kfree(new_bus);
2053 return r;
2054 }
2055
2056 rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
2057 synchronize_srcu_expedited(&kvm->srcu);
2058 kfree(bus);
2059 return r;
1938} 2060}
1939 2061
1940static struct notifier_block kvm_cpu_notifier = { 2062static struct notifier_block kvm_cpu_notifier = {