diff options
74 files changed, 3770 insertions, 1661 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 03497909539e..31575e220f3b 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -556,3 +556,35 @@ Why: udev fully replaces this special file system that only contains CAPI | |||
556 | NCCI TTY device nodes. User space (pppdcapiplugin) works without | 556 | NCCI TTY device nodes. User space (pppdcapiplugin) works without |
557 | noticing the difference. | 557 | noticing the difference. |
558 | Who: Jan Kiszka <jan.kiszka@web.de> | 558 | Who: Jan Kiszka <jan.kiszka@web.de> |
559 | |||
560 | ---------------------------- | ||
561 | |||
562 | What: KVM memory aliases support | ||
563 | When: July 2010 | ||
564 | Why: Memory aliasing support is used for speeding up guest vga access | ||
565 | through the vga windows. | ||
566 | |||
567 | Modern userspace no longer uses this feature, so it's just bitrotted | ||
568 | code and can be removed with no impact. | ||
569 | Who: Avi Kivity <avi@redhat.com> | ||
570 | |||
571 | ---------------------------- | ||
572 | |||
573 | What: KVM kernel-allocated memory slots | ||
574 | When: July 2010 | ||
575 | Why: Since 2.6.25, kvm supports user-allocated memory slots, which are | ||
576 | much more flexible than kernel-allocated slots. All current userspace | ||
577 | supports the newer interface and this code can be removed with no | ||
578 | impact. | ||
579 | Who: Avi Kivity <avi@redhat.com> | ||
580 | |||
581 | ---------------------------- | ||
582 | |||
583 | What: KVM paravirt mmu host support | ||
584 | When: January 2011 | ||
585 | Why: The paravirt mmu host support is slower than non-paravirt mmu, both | ||
586 | on newer and older hardware. It is already not exposed to the guest, | ||
587 | and kept only for live migration purposes. | ||
588 | Who: Avi Kivity <avi@redhat.com> | ||
589 | |||
590 | ---------------------------- | ||
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 2811e452f756..c6416a398163 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt | |||
@@ -23,12 +23,12 @@ of a virtual machine. The ioctls belong to three classes | |||
23 | Only run vcpu ioctls from the same thread that was used to create the | 23 | Only run vcpu ioctls from the same thread that was used to create the |
24 | vcpu. | 24 | vcpu. |
25 | 25 | ||
26 | 2. File descritpors | 26 | 2. File descriptors |
27 | 27 | ||
28 | The kvm API is centered around file descriptors. An initial | 28 | The kvm API is centered around file descriptors. An initial |
29 | open("/dev/kvm") obtains a handle to the kvm subsystem; this handle | 29 | open("/dev/kvm") obtains a handle to the kvm subsystem; this handle |
30 | can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this | 30 | can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this |
31 | handle will create a VM file descripror which can be used to issue VM | 31 | handle will create a VM file descriptor which can be used to issue VM |
32 | ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu | 32 | ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu |
33 | and return a file descriptor pointing to it. Finally, ioctls on a vcpu | 33 | and return a file descriptor pointing to it. Finally, ioctls on a vcpu |
34 | fd can be used to control the vcpu, including the important task of | 34 | fd can be used to control the vcpu, including the important task of |
@@ -643,7 +643,7 @@ Type: vm ioctl | |||
643 | Parameters: struct kvm_clock_data (in) | 643 | Parameters: struct kvm_clock_data (in) |
644 | Returns: 0 on success, -1 on error | 644 | Returns: 0 on success, -1 on error |
645 | 645 | ||
646 | Sets the current timestamp of kvmclock to the valued specific in its parameter. | 646 | Sets the current timestamp of kvmclock to the value specified in its parameter. |
647 | In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios | 647 | In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios |
648 | such as migration. | 648 | such as migration. |
649 | 649 | ||
@@ -795,11 +795,11 @@ Unused. | |||
795 | __u64 data_offset; /* relative to kvm_run start */ | 795 | __u64 data_offset; /* relative to kvm_run start */ |
796 | } io; | 796 | } io; |
797 | 797 | ||
798 | If exit_reason is KVM_EXIT_IO_IN or KVM_EXIT_IO_OUT, then the vcpu has | 798 | If exit_reason is KVM_EXIT_IO, then the vcpu has |
799 | executed a port I/O instruction which could not be satisfied by kvm. | 799 | executed a port I/O instruction which could not be satisfied by kvm. |
800 | data_offset describes where the data is located (KVM_EXIT_IO_OUT) or | 800 | data_offset describes where the data is located (KVM_EXIT_IO_OUT) or |
801 | where kvm expects application code to place the data for the next | 801 | where kvm expects application code to place the data for the next |
802 | KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a patcked array. | 802 | KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array. |
803 | 803 | ||
804 | struct { | 804 | struct { |
805 | struct kvm_debug_exit_arch arch; | 805 | struct kvm_debug_exit_arch arch; |
@@ -815,7 +815,7 @@ Unused. | |||
815 | __u8 is_write; | 815 | __u8 is_write; |
816 | } mmio; | 816 | } mmio; |
817 | 817 | ||
818 | If exit_reason is KVM_EXIT_MMIO or KVM_EXIT_IO_OUT, then the vcpu has | 818 | If exit_reason is KVM_EXIT_MMIO, then the vcpu has |
819 | executed a memory-mapped I/O instruction which could not be satisfied | 819 | executed a memory-mapped I/O instruction which could not be satisfied |
820 | by kvm. The 'data' member contains the written data if 'is_write' is | 820 | by kvm. The 'data' member contains the written data if 'is_write' is |
821 | true, and should be filled by application code otherwise. | 821 | true, and should be filled by application code otherwise. |
diff --git a/MAINTAINERS b/MAINTAINERS index c6591bca646b..51d8b5221dd8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -3173,7 +3173,7 @@ F: arch/x86/include/asm/svm.h | |||
3173 | F: arch/x86/kvm/svm.c | 3173 | F: arch/x86/kvm/svm.c |
3174 | 3174 | ||
3175 | KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC | 3175 | KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC |
3176 | M: Hollis Blanchard <hollisb@us.ibm.com> | 3176 | M: Alexander Graf <agraf@suse.de> |
3177 | L: kvm-ppc@vger.kernel.org | 3177 | L: kvm-ppc@vger.kernel.org |
3178 | W: http://kvm.qumranet.com | 3178 | W: http://kvm.qumranet.com |
3179 | S: Supported | 3179 | S: Supported |
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index 01c75797119c..fa4d1e59deb0 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig | |||
@@ -26,6 +26,7 @@ config KVM | |||
26 | select ANON_INODES | 26 | select ANON_INODES |
27 | select HAVE_KVM_IRQCHIP | 27 | select HAVE_KVM_IRQCHIP |
28 | select KVM_APIC_ARCHITECTURE | 28 | select KVM_APIC_ARCHITECTURE |
29 | select KVM_MMIO | ||
29 | ---help--- | 30 | ---help--- |
30 | Support hosting fully virtualized guest machines using hardware | 31 | Support hosting fully virtualized guest machines using hardware |
31 | virtualization extensions. You will need a fairly recent | 32 | virtualization extensions. You will need a fairly recent |
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 5fdeec5fddcf..26e0e089bfe7 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
@@ -241,10 +241,10 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
241 | return 0; | 241 | return 0; |
242 | mmio: | 242 | mmio: |
243 | if (p->dir) | 243 | if (p->dir) |
244 | r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr, | 244 | r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr, |
245 | p->size, &p->data); | 245 | p->size, &p->data); |
246 | else | 246 | else |
247 | r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr, | 247 | r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr, |
248 | p->size, &p->data); | 248 | p->size, &p->data); |
249 | if (r) | 249 | if (r) |
250 | printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); | 250 | printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); |
@@ -636,12 +636,9 @@ static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) | |||
636 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 636 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
637 | { | 637 | { |
638 | union context *host_ctx, *guest_ctx; | 638 | union context *host_ctx, *guest_ctx; |
639 | int r; | 639 | int r, idx; |
640 | 640 | ||
641 | /* | 641 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
642 | * down_read() may sleep and return with interrupts enabled | ||
643 | */ | ||
644 | down_read(&vcpu->kvm->slots_lock); | ||
645 | 642 | ||
646 | again: | 643 | again: |
647 | if (signal_pending(current)) { | 644 | if (signal_pending(current)) { |
@@ -663,7 +660,7 @@ again: | |||
663 | if (r < 0) | 660 | if (r < 0) |
664 | goto vcpu_run_fail; | 661 | goto vcpu_run_fail; |
665 | 662 | ||
666 | up_read(&vcpu->kvm->slots_lock); | 663 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
667 | kvm_guest_enter(); | 664 | kvm_guest_enter(); |
668 | 665 | ||
669 | /* | 666 | /* |
@@ -687,7 +684,7 @@ again: | |||
687 | kvm_guest_exit(); | 684 | kvm_guest_exit(); |
688 | preempt_enable(); | 685 | preempt_enable(); |
689 | 686 | ||
690 | down_read(&vcpu->kvm->slots_lock); | 687 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
691 | 688 | ||
692 | r = kvm_handle_exit(kvm_run, vcpu); | 689 | r = kvm_handle_exit(kvm_run, vcpu); |
693 | 690 | ||
@@ -697,10 +694,10 @@ again: | |||
697 | } | 694 | } |
698 | 695 | ||
699 | out: | 696 | out: |
700 | up_read(&vcpu->kvm->slots_lock); | 697 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
701 | if (r > 0) { | 698 | if (r > 0) { |
702 | kvm_resched(vcpu); | 699 | kvm_resched(vcpu); |
703 | down_read(&vcpu->kvm->slots_lock); | 700 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
704 | goto again; | 701 | goto again; |
705 | } | 702 | } |
706 | 703 | ||
@@ -971,7 +968,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
971 | goto out; | 968 | goto out; |
972 | r = kvm_setup_default_irq_routing(kvm); | 969 | r = kvm_setup_default_irq_routing(kvm); |
973 | if (r) { | 970 | if (r) { |
974 | kfree(kvm->arch.vioapic); | 971 | kvm_ioapic_destroy(kvm); |
975 | goto out; | 972 | goto out; |
976 | } | 973 | } |
977 | break; | 974 | break; |
@@ -1377,12 +1374,14 @@ static void free_kvm(struct kvm *kvm) | |||
1377 | 1374 | ||
1378 | static void kvm_release_vm_pages(struct kvm *kvm) | 1375 | static void kvm_release_vm_pages(struct kvm *kvm) |
1379 | { | 1376 | { |
1377 | struct kvm_memslots *slots; | ||
1380 | struct kvm_memory_slot *memslot; | 1378 | struct kvm_memory_slot *memslot; |
1381 | int i, j; | 1379 | int i, j; |
1382 | unsigned long base_gfn; | 1380 | unsigned long base_gfn; |
1383 | 1381 | ||
1384 | for (i = 0; i < kvm->nmemslots; i++) { | 1382 | slots = rcu_dereference(kvm->memslots); |
1385 | memslot = &kvm->memslots[i]; | 1383 | for (i = 0; i < slots->nmemslots; i++) { |
1384 | memslot = &slots->memslots[i]; | ||
1386 | base_gfn = memslot->base_gfn; | 1385 | base_gfn = memslot->base_gfn; |
1387 | 1386 | ||
1388 | for (j = 0; j < memslot->npages; j++) { | 1387 | for (j = 0; j < memslot->npages; j++) { |
@@ -1405,6 +1404,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
1405 | kfree(kvm->arch.vioapic); | 1404 | kfree(kvm->arch.vioapic); |
1406 | kvm_release_vm_pages(kvm); | 1405 | kvm_release_vm_pages(kvm); |
1407 | kvm_free_physmem(kvm); | 1406 | kvm_free_physmem(kvm); |
1407 | cleanup_srcu_struct(&kvm->srcu); | ||
1408 | free_kvm(kvm); | 1408 | free_kvm(kvm); |
1409 | } | 1409 | } |
1410 | 1410 | ||
@@ -1576,15 +1576,15 @@ out: | |||
1576 | return r; | 1576 | return r; |
1577 | } | 1577 | } |
1578 | 1578 | ||
1579 | int kvm_arch_set_memory_region(struct kvm *kvm, | 1579 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
1580 | struct kvm_userspace_memory_region *mem, | 1580 | struct kvm_memory_slot *memslot, |
1581 | struct kvm_memory_slot old, | 1581 | struct kvm_memory_slot old, |
1582 | struct kvm_userspace_memory_region *mem, | ||
1582 | int user_alloc) | 1583 | int user_alloc) |
1583 | { | 1584 | { |
1584 | unsigned long i; | 1585 | unsigned long i; |
1585 | unsigned long pfn; | 1586 | unsigned long pfn; |
1586 | int npages = mem->memory_size >> PAGE_SHIFT; | 1587 | int npages = memslot->npages; |
1587 | struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; | ||
1588 | unsigned long base_gfn = memslot->base_gfn; | 1588 | unsigned long base_gfn = memslot->base_gfn; |
1589 | 1589 | ||
1590 | if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT)) | 1590 | if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT)) |
@@ -1608,6 +1608,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
1608 | return 0; | 1608 | return 0; |
1609 | } | 1609 | } |
1610 | 1610 | ||
1611 | void kvm_arch_commit_memory_region(struct kvm *kvm, | ||
1612 | struct kvm_userspace_memory_region *mem, | ||
1613 | struct kvm_memory_slot old, | ||
1614 | int user_alloc) | ||
1615 | { | ||
1616 | return; | ||
1617 | } | ||
1618 | |||
1611 | void kvm_arch_flush_shadow(struct kvm *kvm) | 1619 | void kvm_arch_flush_shadow(struct kvm *kvm) |
1612 | { | 1620 | { |
1613 | kvm_flush_remote_tlbs(kvm); | 1621 | kvm_flush_remote_tlbs(kvm); |
@@ -1802,7 +1810,7 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm, | |||
1802 | if (log->slot >= KVM_MEMORY_SLOTS) | 1810 | if (log->slot >= KVM_MEMORY_SLOTS) |
1803 | goto out; | 1811 | goto out; |
1804 | 1812 | ||
1805 | memslot = &kvm->memslots[log->slot]; | 1813 | memslot = &kvm->memslots->memslots[log->slot]; |
1806 | r = -ENOENT; | 1814 | r = -ENOENT; |
1807 | if (!memslot->dirty_bitmap) | 1815 | if (!memslot->dirty_bitmap) |
1808 | goto out; | 1816 | goto out; |
@@ -1827,6 +1835,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
1827 | struct kvm_memory_slot *memslot; | 1835 | struct kvm_memory_slot *memslot; |
1828 | int is_dirty = 0; | 1836 | int is_dirty = 0; |
1829 | 1837 | ||
1838 | mutex_lock(&kvm->slots_lock); | ||
1830 | spin_lock(&kvm->arch.dirty_log_lock); | 1839 | spin_lock(&kvm->arch.dirty_log_lock); |
1831 | 1840 | ||
1832 | r = kvm_ia64_sync_dirty_log(kvm, log); | 1841 | r = kvm_ia64_sync_dirty_log(kvm, log); |
@@ -1840,12 +1849,13 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
1840 | /* If nothing is dirty, don't bother messing with page tables. */ | 1849 | /* If nothing is dirty, don't bother messing with page tables. */ |
1841 | if (is_dirty) { | 1850 | if (is_dirty) { |
1842 | kvm_flush_remote_tlbs(kvm); | 1851 | kvm_flush_remote_tlbs(kvm); |
1843 | memslot = &kvm->memslots[log->slot]; | 1852 | memslot = &kvm->memslots->memslots[log->slot]; |
1844 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; | 1853 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; |
1845 | memset(memslot->dirty_bitmap, 0, n); | 1854 | memset(memslot->dirty_bitmap, 0, n); |
1846 | } | 1855 | } |
1847 | r = 0; | 1856 | r = 0; |
1848 | out: | 1857 | out: |
1858 | mutex_unlock(&kvm->slots_lock); | ||
1849 | spin_unlock(&kvm->arch.dirty_log_lock); | 1859 | spin_unlock(&kvm->arch.dirty_log_lock); |
1850 | return r; | 1860 | return r; |
1851 | } | 1861 | } |
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c index e4b82319881d..cb548ee9fcae 100644 --- a/arch/ia64/kvm/kvm_fw.c +++ b/arch/ia64/kvm/kvm_fw.c | |||
@@ -75,7 +75,7 @@ static void set_pal_result(struct kvm_vcpu *vcpu, | |||
75 | struct exit_ctl_data *p; | 75 | struct exit_ctl_data *p; |
76 | 76 | ||
77 | p = kvm_get_exit_data(vcpu); | 77 | p = kvm_get_exit_data(vcpu); |
78 | if (p && p->exit_reason == EXIT_REASON_PAL_CALL) { | 78 | if (p->exit_reason == EXIT_REASON_PAL_CALL) { |
79 | p->u.pal_data.ret = result; | 79 | p->u.pal_data.ret = result; |
80 | return ; | 80 | return ; |
81 | } | 81 | } |
@@ -87,7 +87,7 @@ static void set_sal_result(struct kvm_vcpu *vcpu, | |||
87 | struct exit_ctl_data *p; | 87 | struct exit_ctl_data *p; |
88 | 88 | ||
89 | p = kvm_get_exit_data(vcpu); | 89 | p = kvm_get_exit_data(vcpu); |
90 | if (p && p->exit_reason == EXIT_REASON_SAL_CALL) { | 90 | if (p->exit_reason == EXIT_REASON_SAL_CALL) { |
91 | p->u.sal_data.ret = result; | 91 | p->u.sal_data.ret = result; |
92 | return ; | 92 | return ; |
93 | } | 93 | } |
@@ -322,7 +322,7 @@ static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu) | |||
322 | struct exit_ctl_data *p; | 322 | struct exit_ctl_data *p; |
323 | 323 | ||
324 | p = kvm_get_exit_data(vcpu); | 324 | p = kvm_get_exit_data(vcpu); |
325 | if (p && (p->exit_reason == EXIT_REASON_PAL_CALL)) | 325 | if (p->exit_reason == EXIT_REASON_PAL_CALL) |
326 | index = p->u.pal_data.gr28; | 326 | index = p->u.pal_data.gr28; |
327 | 327 | ||
328 | return index; | 328 | return index; |
@@ -646,18 +646,16 @@ static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1, | |||
646 | 646 | ||
647 | p = kvm_get_exit_data(vcpu); | 647 | p = kvm_get_exit_data(vcpu); |
648 | 648 | ||
649 | if (p) { | 649 | if (p->exit_reason == EXIT_REASON_SAL_CALL) { |
650 | if (p->exit_reason == EXIT_REASON_SAL_CALL) { | 650 | *in0 = p->u.sal_data.in0; |
651 | *in0 = p->u.sal_data.in0; | 651 | *in1 = p->u.sal_data.in1; |
652 | *in1 = p->u.sal_data.in1; | 652 | *in2 = p->u.sal_data.in2; |
653 | *in2 = p->u.sal_data.in2; | 653 | *in3 = p->u.sal_data.in3; |
654 | *in3 = p->u.sal_data.in3; | 654 | *in4 = p->u.sal_data.in4; |
655 | *in4 = p->u.sal_data.in4; | 655 | *in5 = p->u.sal_data.in5; |
656 | *in5 = p->u.sal_data.in5; | 656 | *in6 = p->u.sal_data.in6; |
657 | *in6 = p->u.sal_data.in6; | 657 | *in7 = p->u.sal_data.in7; |
658 | *in7 = p->u.sal_data.in7; | 658 | return ; |
659 | return ; | ||
660 | } | ||
661 | } | 659 | } |
662 | *in0 = 0; | 660 | *in0 = 0; |
663 | } | 661 | } |
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c index 9bf55afd08d0..fb8f9f59a1ed 100644 --- a/arch/ia64/kvm/mmio.c +++ b/arch/ia64/kvm/mmio.c | |||
@@ -316,8 +316,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) | |||
316 | return; | 316 | return; |
317 | } else { | 317 | } else { |
318 | inst_type = -1; | 318 | inst_type = -1; |
319 | panic_vm(vcpu, "Unsupported MMIO access instruction! \ | 319 | panic_vm(vcpu, "Unsupported MMIO access instruction! " |
320 | Bunld[0]=0x%lx, Bundle[1]=0x%lx\n", | 320 | "Bunld[0]=0x%lx, Bundle[1]=0x%lx\n", |
321 | bundle.i64[0], bundle.i64[1]); | 321 | bundle.i64[0], bundle.i64[1]); |
322 | } | 322 | } |
323 | 323 | ||
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c index dce75b70cdd5..958815c9787d 100644 --- a/arch/ia64/kvm/vcpu.c +++ b/arch/ia64/kvm/vcpu.c | |||
@@ -1639,8 +1639,8 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val) | |||
1639 | * Otherwise panic | 1639 | * Otherwise panic |
1640 | */ | 1640 | */ |
1641 | if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) | 1641 | if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) |
1642 | panic_vm(vcpu, "Only support guests with vpsr.pk =0 \ | 1642 | panic_vm(vcpu, "Only support guests with vpsr.pk =0 " |
1643 | & vpsr.is=0\n"); | 1643 | "& vpsr.is=0\n"); |
1644 | 1644 | ||
1645 | /* | 1645 | /* |
1646 | * For those IA64_PSR bits: id/da/dd/ss/ed/ia | 1646 | * For those IA64_PSR bits: id/da/dd/ss/ed/ia |
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index af2abe74f544..aadf2dd6f84e 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h | |||
@@ -97,4 +97,10 @@ | |||
97 | #define RESUME_HOST RESUME_FLAG_HOST | 97 | #define RESUME_HOST RESUME_FLAG_HOST |
98 | #define RESUME_HOST_NV (RESUME_FLAG_HOST|RESUME_FLAG_NV) | 98 | #define RESUME_HOST_NV (RESUME_FLAG_HOST|RESUME_FLAG_NV) |
99 | 99 | ||
100 | #define KVM_GUEST_MODE_NONE 0 | ||
101 | #define KVM_GUEST_MODE_GUEST 1 | ||
102 | #define KVM_GUEST_MODE_SKIP 2 | ||
103 | |||
104 | #define KVM_INST_FETCH_FAILED -1 | ||
105 | |||
100 | #endif /* __POWERPC_KVM_ASM_H__ */ | 106 | #endif /* __POWERPC_KVM_ASM_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 74b7369770d0..db7db0a96967 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
@@ -22,7 +22,7 @@ | |||
22 | 22 | ||
23 | #include <linux/types.h> | 23 | #include <linux/types.h> |
24 | #include <linux/kvm_host.h> | 24 | #include <linux/kvm_host.h> |
25 | #include <asm/kvm_ppc.h> | 25 | #include <asm/kvm_book3s_64_asm.h> |
26 | 26 | ||
27 | struct kvmppc_slb { | 27 | struct kvmppc_slb { |
28 | u64 esid; | 28 | u64 esid; |
@@ -33,7 +33,8 @@ struct kvmppc_slb { | |||
33 | bool Ks; | 33 | bool Ks; |
34 | bool Kp; | 34 | bool Kp; |
35 | bool nx; | 35 | bool nx; |
36 | bool large; | 36 | bool large; /* PTEs are 16MB */ |
37 | bool tb; /* 1TB segment */ | ||
37 | bool class; | 38 | bool class; |
38 | }; | 39 | }; |
39 | 40 | ||
@@ -69,6 +70,7 @@ struct kvmppc_sid_map { | |||
69 | 70 | ||
70 | struct kvmppc_vcpu_book3s { | 71 | struct kvmppc_vcpu_book3s { |
71 | struct kvm_vcpu vcpu; | 72 | struct kvm_vcpu vcpu; |
73 | struct kvmppc_book3s_shadow_vcpu shadow_vcpu; | ||
72 | struct kvmppc_sid_map sid_map[SID_MAP_NUM]; | 74 | struct kvmppc_sid_map sid_map[SID_MAP_NUM]; |
73 | struct kvmppc_slb slb[64]; | 75 | struct kvmppc_slb slb[64]; |
74 | struct { | 76 | struct { |
@@ -89,6 +91,7 @@ struct kvmppc_vcpu_book3s { | |||
89 | u64 vsid_next; | 91 | u64 vsid_next; |
90 | u64 vsid_max; | 92 | u64 vsid_max; |
91 | int context_id; | 93 | int context_id; |
94 | ulong prog_flags; /* flags to inject when giving a 700 trap */ | ||
92 | }; | 95 | }; |
93 | 96 | ||
94 | #define CONTEXT_HOST 0 | 97 | #define CONTEXT_HOST 0 |
@@ -119,6 +122,10 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, | |||
119 | 122 | ||
120 | extern u32 kvmppc_trampoline_lowmem; | 123 | extern u32 kvmppc_trampoline_lowmem; |
121 | extern u32 kvmppc_trampoline_enter; | 124 | extern u32 kvmppc_trampoline_enter; |
125 | extern void kvmppc_rmcall(ulong srr0, ulong srr1); | ||
126 | extern void kvmppc_load_up_fpu(void); | ||
127 | extern void kvmppc_load_up_altivec(void); | ||
128 | extern void kvmppc_load_up_vsx(void); | ||
122 | 129 | ||
123 | static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) | 130 | static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) |
124 | { | 131 | { |
diff --git a/arch/powerpc/include/asm/kvm_book3s_64_asm.h b/arch/powerpc/include/asm/kvm_book3s_64_asm.h index 2e06ee8184ef..183461b48407 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_64_asm.h | |||
@@ -20,6 +20,8 @@ | |||
20 | #ifndef __ASM_KVM_BOOK3S_ASM_H__ | 20 | #ifndef __ASM_KVM_BOOK3S_ASM_H__ |
21 | #define __ASM_KVM_BOOK3S_ASM_H__ | 21 | #define __ASM_KVM_BOOK3S_ASM_H__ |
22 | 22 | ||
23 | #ifdef __ASSEMBLY__ | ||
24 | |||
23 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | 25 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER |
24 | 26 | ||
25 | #include <asm/kvm_asm.h> | 27 | #include <asm/kvm_asm.h> |
@@ -55,4 +57,20 @@ kvmppc_resume_\intno: | |||
55 | 57 | ||
56 | #endif /* CONFIG_KVM_BOOK3S_64_HANDLER */ | 58 | #endif /* CONFIG_KVM_BOOK3S_64_HANDLER */ |
57 | 59 | ||
60 | #else /*__ASSEMBLY__ */ | ||
61 | |||
62 | struct kvmppc_book3s_shadow_vcpu { | ||
63 | ulong gpr[14]; | ||
64 | u32 cr; | ||
65 | u32 xer; | ||
66 | ulong host_r1; | ||
67 | ulong host_r2; | ||
68 | ulong handler; | ||
69 | ulong scratch0; | ||
70 | ulong scratch1; | ||
71 | ulong vmhandler; | ||
72 | }; | ||
73 | |||
74 | #endif /*__ASSEMBLY__ */ | ||
75 | |||
58 | #endif /* __ASM_KVM_BOOK3S_ASM_H__ */ | 76 | #endif /* __ASM_KVM_BOOK3S_ASM_H__ */ |
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h index 9d497ce49726..7fea26fffb25 100644 --- a/arch/powerpc/include/asm/kvm_e500.h +++ b/arch/powerpc/include/asm/kvm_e500.h | |||
@@ -52,9 +52,12 @@ struct kvmppc_vcpu_e500 { | |||
52 | u32 mas5; | 52 | u32 mas5; |
53 | u32 mas6; | 53 | u32 mas6; |
54 | u32 mas7; | 54 | u32 mas7; |
55 | u32 l1csr0; | ||
55 | u32 l1csr1; | 56 | u32 l1csr1; |
56 | u32 hid0; | 57 | u32 hid0; |
57 | u32 hid1; | 58 | u32 hid1; |
59 | u32 tlb0cfg; | ||
60 | u32 tlb1cfg; | ||
58 | 61 | ||
59 | struct kvm_vcpu vcpu; | 62 | struct kvm_vcpu vcpu; |
60 | }; | 63 | }; |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1201f62d0d73..5e5bae7e152f 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
@@ -167,23 +167,40 @@ struct kvm_vcpu_arch { | |||
167 | ulong trampoline_lowmem; | 167 | ulong trampoline_lowmem; |
168 | ulong trampoline_enter; | 168 | ulong trampoline_enter; |
169 | ulong highmem_handler; | 169 | ulong highmem_handler; |
170 | ulong rmcall; | ||
170 | ulong host_paca_phys; | 171 | ulong host_paca_phys; |
171 | struct kvmppc_mmu mmu; | 172 | struct kvmppc_mmu mmu; |
172 | #endif | 173 | #endif |
173 | 174 | ||
174 | u64 fpr[32]; | ||
175 | ulong gpr[32]; | 175 | ulong gpr[32]; |
176 | 176 | ||
177 | u64 fpr[32]; | ||
178 | u32 fpscr; | ||
179 | |||
180 | #ifdef CONFIG_ALTIVEC | ||
181 | vector128 vr[32]; | ||
182 | vector128 vscr; | ||
183 | #endif | ||
184 | |||
185 | #ifdef CONFIG_VSX | ||
186 | u64 vsr[32]; | ||
187 | #endif | ||
188 | |||
177 | ulong pc; | 189 | ulong pc; |
178 | u32 cr; | ||
179 | ulong ctr; | 190 | ulong ctr; |
180 | ulong lr; | 191 | ulong lr; |
192 | |||
193 | #ifdef CONFIG_BOOKE | ||
181 | ulong xer; | 194 | ulong xer; |
195 | u32 cr; | ||
196 | #endif | ||
182 | 197 | ||
183 | ulong msr; | 198 | ulong msr; |
184 | #ifdef CONFIG_PPC64 | 199 | #ifdef CONFIG_PPC64 |
185 | ulong shadow_msr; | 200 | ulong shadow_msr; |
201 | ulong shadow_srr1; | ||
186 | ulong hflags; | 202 | ulong hflags; |
203 | ulong guest_owned_ext; | ||
187 | #endif | 204 | #endif |
188 | u32 mmucr; | 205 | u32 mmucr; |
189 | ulong sprg0; | 206 | ulong sprg0; |
@@ -242,6 +259,8 @@ struct kvm_vcpu_arch { | |||
242 | #endif | 259 | #endif |
243 | ulong fault_dear; | 260 | ulong fault_dear; |
244 | ulong fault_esr; | 261 | ulong fault_esr; |
262 | ulong queued_dear; | ||
263 | ulong queued_esr; | ||
245 | gpa_t paddr_accessed; | 264 | gpa_t paddr_accessed; |
246 | 265 | ||
247 | u8 io_gpr; /* GPR used as IO source/target */ | 266 | u8 io_gpr; /* GPR used as IO source/target */ |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 269ee46ab028..e2642829e435 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
@@ -28,6 +28,9 @@ | |||
28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
29 | #include <linux/kvm_types.h> | 29 | #include <linux/kvm_types.h> |
30 | #include <linux/kvm_host.h> | 30 | #include <linux/kvm_host.h> |
31 | #ifdef CONFIG_PPC_BOOK3S | ||
32 | #include <asm/kvm_book3s.h> | ||
33 | #endif | ||
31 | 34 | ||
32 | enum emulation_result { | 35 | enum emulation_result { |
33 | EMULATE_DONE, /* no further processing */ | 36 | EMULATE_DONE, /* no further processing */ |
@@ -80,8 +83,9 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); | |||
80 | 83 | ||
81 | extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); | 84 | extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu); |
82 | extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); | 85 | extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); |
83 | extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu); | 86 | extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); |
84 | extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); | 87 | extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); |
88 | extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); | ||
85 | extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | 89 | extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, |
86 | struct kvm_interrupt *irq); | 90 | struct kvm_interrupt *irq); |
87 | 91 | ||
@@ -95,4 +99,81 @@ extern void kvmppc_booke_exit(void); | |||
95 | 99 | ||
96 | extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); | 100 | extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); |
97 | 101 | ||
102 | #ifdef CONFIG_PPC_BOOK3S | ||
103 | |||
104 | /* We assume we're always acting on the current vcpu */ | ||
105 | |||
106 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | ||
107 | { | ||
108 | if ( num < 14 ) { | ||
109 | get_paca()->shadow_vcpu.gpr[num] = val; | ||
110 | to_book3s(vcpu)->shadow_vcpu.gpr[num] = val; | ||
111 | } else | ||
112 | vcpu->arch.gpr[num] = val; | ||
113 | } | ||
114 | |||
115 | static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) | ||
116 | { | ||
117 | if ( num < 14 ) | ||
118 | return get_paca()->shadow_vcpu.gpr[num]; | ||
119 | else | ||
120 | return vcpu->arch.gpr[num]; | ||
121 | } | ||
122 | |||
123 | static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) | ||
124 | { | ||
125 | get_paca()->shadow_vcpu.cr = val; | ||
126 | to_book3s(vcpu)->shadow_vcpu.cr = val; | ||
127 | } | ||
128 | |||
129 | static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) | ||
130 | { | ||
131 | return get_paca()->shadow_vcpu.cr; | ||
132 | } | ||
133 | |||
134 | static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) | ||
135 | { | ||
136 | get_paca()->shadow_vcpu.xer = val; | ||
137 | to_book3s(vcpu)->shadow_vcpu.xer = val; | ||
138 | } | ||
139 | |||
140 | static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) | ||
141 | { | ||
142 | return get_paca()->shadow_vcpu.xer; | ||
143 | } | ||
144 | |||
145 | #else | ||
146 | |||
147 | static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val) | ||
148 | { | ||
149 | vcpu->arch.gpr[num] = val; | ||
150 | } | ||
151 | |||
152 | static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num) | ||
153 | { | ||
154 | return vcpu->arch.gpr[num]; | ||
155 | } | ||
156 | |||
157 | static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val) | ||
158 | { | ||
159 | vcpu->arch.cr = val; | ||
160 | } | ||
161 | |||
162 | static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu) | ||
163 | { | ||
164 | return vcpu->arch.cr; | ||
165 | } | ||
166 | |||
167 | static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val) | ||
168 | { | ||
169 | vcpu->arch.xer = val; | ||
170 | } | ||
171 | |||
172 | static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu) | ||
173 | { | ||
174 | return vcpu->arch.xer; | ||
175 | } | ||
176 | |||
177 | #endif | ||
178 | |||
98 | #endif /* __POWERPC_KVM_PPC_H__ */ | 179 | #endif /* __POWERPC_KVM_PPC_H__ */ |
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 5e9b4ef71415..d8a693109c82 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h | |||
@@ -19,6 +19,9 @@ | |||
19 | #include <asm/mmu.h> | 19 | #include <asm/mmu.h> |
20 | #include <asm/page.h> | 20 | #include <asm/page.h> |
21 | #include <asm/exception-64e.h> | 21 | #include <asm/exception-64e.h> |
22 | #ifdef CONFIG_KVM_BOOK3S_64_HANDLER | ||
23 | #include <asm/kvm_book3s_64_asm.h> | ||
24 | #endif | ||
22 | 25 | ||
23 | register struct paca_struct *local_paca asm("r13"); | 26 | register struct paca_struct *local_paca asm("r13"); |
24 | 27 | ||
@@ -135,6 +138,8 @@ struct paca_struct { | |||
135 | u64 esid; | 138 | u64 esid; |
136 | u64 vsid; | 139 | u64 vsid; |
137 | } kvm_slb[64]; /* guest SLB */ | 140 | } kvm_slb[64]; /* guest SLB */ |
141 | /* We use this to store guest state in */ | ||
142 | struct kvmppc_book3s_shadow_vcpu shadow_vcpu; | ||
138 | u8 kvm_slb_max; /* highest used guest slb entry */ | 143 | u8 kvm_slb_max; /* highest used guest slb entry */ |
139 | u8 kvm_in_guest; /* are we inside the guest? */ | 144 | u8 kvm_in_guest; /* are we inside the guest? */ |
140 | #endif | 145 | #endif |
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index bc8dd53f718a..5572e86223f4 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h | |||
@@ -426,6 +426,10 @@ | |||
426 | #define SRR1_WAKEMT 0x00280000 /* mtctrl */ | 426 | #define SRR1_WAKEMT 0x00280000 /* mtctrl */ |
427 | #define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */ | 427 | #define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */ |
428 | #define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */ | 428 | #define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */ |
429 | #define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */ | ||
430 | #define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */ | ||
431 | #define SRR1_PROGTRAP 0x00020000 /* Trap */ | ||
432 | #define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */ | ||
429 | #define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ | 433 | #define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ |
430 | #define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ | 434 | #define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ |
431 | 435 | ||
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a6c2b63227b3..957ceb7059c5 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -194,6 +194,30 @@ int main(void) | |||
194 | DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest)); | 194 | DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest)); |
195 | DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb)); | 195 | DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb)); |
196 | DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max)); | 196 | DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max)); |
197 | DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr)); | ||
198 | DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer)); | ||
199 | DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0])); | ||
200 | DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1])); | ||
201 | DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2])); | ||
202 | DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3])); | ||
203 | DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4])); | ||
204 | DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5])); | ||
205 | DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6])); | ||
206 | DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7])); | ||
207 | DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8])); | ||
208 | DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9])); | ||
209 | DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10])); | ||
210 | DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11])); | ||
211 | DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12])); | ||
212 | DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13])); | ||
213 | DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1)); | ||
214 | DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2)); | ||
215 | DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct, | ||
216 | shadow_vcpu.vmhandler)); | ||
217 | DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct, | ||
218 | shadow_vcpu.scratch0)); | ||
219 | DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct, | ||
220 | shadow_vcpu.scratch1)); | ||
197 | #endif | 221 | #endif |
198 | #endif /* CONFIG_PPC64 */ | 222 | #endif /* CONFIG_PPC64 */ |
199 | 223 | ||
@@ -389,8 +413,6 @@ int main(void) | |||
389 | DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); | 413 | DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); |
390 | DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); | 414 | DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); |
391 | DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); | 415 | DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); |
392 | DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); | ||
393 | DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); | ||
394 | DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); | 416 | DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); |
395 | DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); | 417 | DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); |
396 | DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); | 418 | DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); |
@@ -411,11 +433,16 @@ int main(void) | |||
411 | DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2)); | 433 | DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2)); |
412 | DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); | 434 | DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); |
413 | DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); | 435 | DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); |
436 | DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); | ||
414 | DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); | 437 | DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); |
415 | DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); | 438 | DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); |
416 | DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); | 439 | DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); |
440 | DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall)); | ||
417 | DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); | 441 | DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); |
418 | #endif | 442 | #else |
443 | DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); | ||
444 | DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); | ||
445 | #endif /* CONFIG_PPC64 */ | ||
419 | #endif | 446 | #endif |
420 | #ifdef CONFIG_44x | 447 | #ifdef CONFIG_44x |
421 | DEFINE(PGD_T_LOG2, PGD_T_LOG2); | 448 | DEFINE(PGD_T_LOG2, PGD_T_LOG2); |
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 425451453e96..ab3e392ac63c 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c | |||
@@ -107,6 +107,7 @@ EXPORT_SYMBOL(giveup_altivec); | |||
107 | #endif /* CONFIG_ALTIVEC */ | 107 | #endif /* CONFIG_ALTIVEC */ |
108 | #ifdef CONFIG_VSX | 108 | #ifdef CONFIG_VSX |
109 | EXPORT_SYMBOL(giveup_vsx); | 109 | EXPORT_SYMBOL(giveup_vsx); |
110 | EXPORT_SYMBOL_GPL(__giveup_vsx); | ||
110 | #endif /* CONFIG_VSX */ | 111 | #endif /* CONFIG_VSX */ |
111 | #ifdef CONFIG_SPE | 112 | #ifdef CONFIG_SPE |
112 | EXPORT_SYMBOL(giveup_spe); | 113 | EXPORT_SYMBOL(giveup_spe); |
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 61af58fcecee..65ea083a5b27 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c | |||
@@ -65,13 +65,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
65 | */ | 65 | */ |
66 | switch (dcrn) { | 66 | switch (dcrn) { |
67 | case DCRN_CPR0_CONFIG_ADDR: | 67 | case DCRN_CPR0_CONFIG_ADDR: |
68 | vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; | 68 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr); |
69 | break; | 69 | break; |
70 | case DCRN_CPR0_CONFIG_DATA: | 70 | case DCRN_CPR0_CONFIG_DATA: |
71 | local_irq_disable(); | 71 | local_irq_disable(); |
72 | mtdcr(DCRN_CPR0_CONFIG_ADDR, | 72 | mtdcr(DCRN_CPR0_CONFIG_ADDR, |
73 | vcpu->arch.cpr0_cfgaddr); | 73 | vcpu->arch.cpr0_cfgaddr); |
74 | vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); | 74 | kvmppc_set_gpr(vcpu, rt, |
75 | mfdcr(DCRN_CPR0_CONFIG_DATA)); | ||
75 | local_irq_enable(); | 76 | local_irq_enable(); |
76 | break; | 77 | break; |
77 | default: | 78 | default: |
@@ -93,11 +94,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
93 | /* emulate some access in kernel */ | 94 | /* emulate some access in kernel */ |
94 | switch (dcrn) { | 95 | switch (dcrn) { |
95 | case DCRN_CPR0_CONFIG_ADDR: | 96 | case DCRN_CPR0_CONFIG_ADDR: |
96 | vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; | 97 | vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs); |
97 | break; | 98 | break; |
98 | default: | 99 | default: |
99 | run->dcr.dcrn = dcrn; | 100 | run->dcr.dcrn = dcrn; |
100 | run->dcr.data = vcpu->arch.gpr[rs]; | 101 | run->dcr.data = kvmppc_get_gpr(vcpu, rs); |
101 | run->dcr.is_write = 1; | 102 | run->dcr.is_write = 1; |
102 | vcpu->arch.dcr_needed = 1; | 103 | vcpu->arch.dcr_needed = 1; |
103 | kvmppc_account_exit(vcpu, DCR_EXITS); | 104 | kvmppc_account_exit(vcpu, DCR_EXITS); |
@@ -146,13 +147,13 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
146 | 147 | ||
147 | switch (sprn) { | 148 | switch (sprn) { |
148 | case SPRN_PID: | 149 | case SPRN_PID: |
149 | kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; | 150 | kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break; |
150 | case SPRN_MMUCR: | 151 | case SPRN_MMUCR: |
151 | vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; | 152 | vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break; |
152 | case SPRN_CCR0: | 153 | case SPRN_CCR0: |
153 | vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; | 154 | vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break; |
154 | case SPRN_CCR1: | 155 | case SPRN_CCR1: |
155 | vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; | 156 | vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break; |
156 | default: | 157 | default: |
157 | emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); | 158 | emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs); |
158 | } | 159 | } |
@@ -167,13 +168,13 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
167 | 168 | ||
168 | switch (sprn) { | 169 | switch (sprn) { |
169 | case SPRN_PID: | 170 | case SPRN_PID: |
170 | vcpu->arch.gpr[rt] = vcpu->arch.pid; break; | 171 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break; |
171 | case SPRN_MMUCR: | 172 | case SPRN_MMUCR: |
172 | vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; | 173 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break; |
173 | case SPRN_CCR0: | 174 | case SPRN_CCR0: |
174 | vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; | 175 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break; |
175 | case SPRN_CCR1: | 176 | case SPRN_CCR1: |
176 | vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; | 177 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break; |
177 | default: | 178 | default: |
178 | emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); | 179 | emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); |
179 | } | 180 | } |
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index ff3cb63b8117..2570fcc7665d 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c | |||
@@ -439,7 +439,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) | |||
439 | struct kvmppc_44x_tlbe *tlbe; | 439 | struct kvmppc_44x_tlbe *tlbe; |
440 | unsigned int gtlb_index; | 440 | unsigned int gtlb_index; |
441 | 441 | ||
442 | gtlb_index = vcpu->arch.gpr[ra]; | 442 | gtlb_index = kvmppc_get_gpr(vcpu, ra); |
443 | if (gtlb_index > KVM44x_GUEST_TLB_SIZE) { | 443 | if (gtlb_index > KVM44x_GUEST_TLB_SIZE) { |
444 | printk("%s: index %d\n", __func__, gtlb_index); | 444 | printk("%s: index %d\n", __func__, gtlb_index); |
445 | kvmppc_dump_vcpu(vcpu); | 445 | kvmppc_dump_vcpu(vcpu); |
@@ -455,15 +455,15 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) | |||
455 | switch (ws) { | 455 | switch (ws) { |
456 | case PPC44x_TLB_PAGEID: | 456 | case PPC44x_TLB_PAGEID: |
457 | tlbe->tid = get_mmucr_stid(vcpu); | 457 | tlbe->tid = get_mmucr_stid(vcpu); |
458 | tlbe->word0 = vcpu->arch.gpr[rs]; | 458 | tlbe->word0 = kvmppc_get_gpr(vcpu, rs); |
459 | break; | 459 | break; |
460 | 460 | ||
461 | case PPC44x_TLB_XLAT: | 461 | case PPC44x_TLB_XLAT: |
462 | tlbe->word1 = vcpu->arch.gpr[rs]; | 462 | tlbe->word1 = kvmppc_get_gpr(vcpu, rs); |
463 | break; | 463 | break; |
464 | 464 | ||
465 | case PPC44x_TLB_ATTRIB: | 465 | case PPC44x_TLB_ATTRIB: |
466 | tlbe->word2 = vcpu->arch.gpr[rs]; | 466 | tlbe->word2 = kvmppc_get_gpr(vcpu, rs); |
467 | break; | 467 | break; |
468 | 468 | ||
469 | default: | 469 | default: |
@@ -500,18 +500,20 @@ int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc) | |||
500 | unsigned int as = get_mmucr_sts(vcpu); | 500 | unsigned int as = get_mmucr_sts(vcpu); |
501 | unsigned int pid = get_mmucr_stid(vcpu); | 501 | unsigned int pid = get_mmucr_stid(vcpu); |
502 | 502 | ||
503 | ea = vcpu->arch.gpr[rb]; | 503 | ea = kvmppc_get_gpr(vcpu, rb); |
504 | if (ra) | 504 | if (ra) |
505 | ea += vcpu->arch.gpr[ra]; | 505 | ea += kvmppc_get_gpr(vcpu, ra); |
506 | 506 | ||
507 | gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); | 507 | gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); |
508 | if (rc) { | 508 | if (rc) { |
509 | u32 cr = kvmppc_get_cr(vcpu); | ||
510 | |||
509 | if (gtlb_index < 0) | 511 | if (gtlb_index < 0) |
510 | vcpu->arch.cr &= ~0x20000000; | 512 | kvmppc_set_cr(vcpu, cr & ~0x20000000); |
511 | else | 513 | else |
512 | vcpu->arch.cr |= 0x20000000; | 514 | kvmppc_set_cr(vcpu, cr | 0x20000000); |
513 | } | 515 | } |
514 | vcpu->arch.gpr[rt] = gtlb_index; | 516 | kvmppc_set_gpr(vcpu, rt, gtlb_index); |
515 | 517 | ||
516 | kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); | 518 | kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS); |
517 | return EMULATE_DONE; | 519 | return EMULATE_DONE; |
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index fe037fdaf1b3..60624cc9f4d4 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig | |||
@@ -20,6 +20,7 @@ config KVM | |||
20 | bool | 20 | bool |
21 | select PREEMPT_NOTIFIERS | 21 | select PREEMPT_NOTIFIERS |
22 | select ANON_INODES | 22 | select ANON_INODES |
23 | select KVM_MMIO | ||
23 | 24 | ||
24 | config KVM_BOOK3S_64_HANDLER | 25 | config KVM_BOOK3S_64_HANDLER |
25 | bool | 26 | bool |
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 3e294bd9b8c6..9a271f0929c7 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c | |||
@@ -33,12 +33,9 @@ | |||
33 | 33 | ||
34 | /* #define EXIT_DEBUG */ | 34 | /* #define EXIT_DEBUG */ |
35 | /* #define EXIT_DEBUG_SIMPLE */ | 35 | /* #define EXIT_DEBUG_SIMPLE */ |
36 | /* #define DEBUG_EXT */ | ||
36 | 37 | ||
37 | /* Without AGGRESSIVE_DEC we only fire off a DEC interrupt when DEC turns 0. | 38 | static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); |
38 | * When set, we retrigger a DEC interrupt after that if DEC <= 0. | ||
39 | * PPC32 Linux runs faster without AGGRESSIVE_DEC, PPC64 Linux requires it. */ | ||
40 | |||
41 | /* #define AGGRESSIVE_DEC */ | ||
42 | 39 | ||
43 | struct kvm_stats_debugfs_item debugfs_entries[] = { | 40 | struct kvm_stats_debugfs_item debugfs_entries[] = { |
44 | { "exits", VCPU_STAT(sum_exits) }, | 41 | { "exits", VCPU_STAT(sum_exits) }, |
@@ -72,16 +69,24 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) | |||
72 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 69 | void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
73 | { | 70 | { |
74 | memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb)); | 71 | memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb)); |
72 | memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu, | ||
73 | sizeof(get_paca()->shadow_vcpu)); | ||
75 | get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max; | 74 | get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max; |
76 | } | 75 | } |
77 | 76 | ||
78 | void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) | 77 | void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) |
79 | { | 78 | { |
80 | memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb)); | 79 | memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb)); |
80 | memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu, | ||
81 | sizeof(get_paca()->shadow_vcpu)); | ||
81 | to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max; | 82 | to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max; |
83 | |||
84 | kvmppc_giveup_ext(vcpu, MSR_FP); | ||
85 | kvmppc_giveup_ext(vcpu, MSR_VEC); | ||
86 | kvmppc_giveup_ext(vcpu, MSR_VSX); | ||
82 | } | 87 | } |
83 | 88 | ||
84 | #if defined(AGGRESSIVE_DEC) || defined(EXIT_DEBUG) | 89 | #if defined(EXIT_DEBUG) |
85 | static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu) | 90 | static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu) |
86 | { | 91 | { |
87 | u64 jd = mftb() - vcpu->arch.dec_jiffies; | 92 | u64 jd = mftb() - vcpu->arch.dec_jiffies; |
@@ -89,6 +94,23 @@ static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu) | |||
89 | } | 94 | } |
90 | #endif | 95 | #endif |
91 | 96 | ||
97 | static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) | ||
98 | { | ||
99 | vcpu->arch.shadow_msr = vcpu->arch.msr; | ||
100 | /* Guest MSR values */ | ||
101 | vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | | ||
102 | MSR_BE | MSR_DE; | ||
103 | /* Process MSR values */ | ||
104 | vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | | ||
105 | MSR_EE; | ||
106 | /* External providers the guest reserved */ | ||
107 | vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext); | ||
108 | /* 64-bit Process MSR values */ | ||
109 | #ifdef CONFIG_PPC_BOOK3S_64 | ||
110 | vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV; | ||
111 | #endif | ||
112 | } | ||
113 | |||
92 | void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) | 114 | void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) |
93 | { | 115 | { |
94 | ulong old_msr = vcpu->arch.msr; | 116 | ulong old_msr = vcpu->arch.msr; |
@@ -96,12 +118,10 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) | |||
96 | #ifdef EXIT_DEBUG | 118 | #ifdef EXIT_DEBUG |
97 | printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); | 119 | printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr); |
98 | #endif | 120 | #endif |
121 | |||
99 | msr &= to_book3s(vcpu)->msr_mask; | 122 | msr &= to_book3s(vcpu)->msr_mask; |
100 | vcpu->arch.msr = msr; | 123 | vcpu->arch.msr = msr; |
101 | vcpu->arch.shadow_msr = msr | MSR_USER32; | 124 | kvmppc_recalc_shadow_msr(vcpu); |
102 | vcpu->arch.shadow_msr &= ( MSR_VEC | MSR_VSX | MSR_FP | MSR_FE0 | | ||
103 | MSR_USER64 | MSR_SE | MSR_BE | MSR_DE | | ||
104 | MSR_FE1); | ||
105 | 125 | ||
106 | if (msr & (MSR_WE|MSR_POW)) { | 126 | if (msr & (MSR_WE|MSR_POW)) { |
107 | if (!vcpu->arch.pending_exceptions) { | 127 | if (!vcpu->arch.pending_exceptions) { |
@@ -125,11 +145,10 @@ void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) | |||
125 | vcpu->arch.mmu.reset_msr(vcpu); | 145 | vcpu->arch.mmu.reset_msr(vcpu); |
126 | } | 146 | } |
127 | 147 | ||
128 | void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) | 148 | static int kvmppc_book3s_vec2irqprio(unsigned int vec) |
129 | { | 149 | { |
130 | unsigned int prio; | 150 | unsigned int prio; |
131 | 151 | ||
132 | vcpu->stat.queue_intr++; | ||
133 | switch (vec) { | 152 | switch (vec) { |
134 | case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break; | 153 | case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break; |
135 | case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break; | 154 | case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break; |
@@ -149,15 +168,31 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) | |||
149 | default: prio = BOOK3S_IRQPRIO_MAX; break; | 168 | default: prio = BOOK3S_IRQPRIO_MAX; break; |
150 | } | 169 | } |
151 | 170 | ||
152 | set_bit(prio, &vcpu->arch.pending_exceptions); | 171 | return prio; |
172 | } | ||
173 | |||
174 | static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, | ||
175 | unsigned int vec) | ||
176 | { | ||
177 | clear_bit(kvmppc_book3s_vec2irqprio(vec), | ||
178 | &vcpu->arch.pending_exceptions); | ||
179 | } | ||
180 | |||
181 | void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec) | ||
182 | { | ||
183 | vcpu->stat.queue_intr++; | ||
184 | |||
185 | set_bit(kvmppc_book3s_vec2irqprio(vec), | ||
186 | &vcpu->arch.pending_exceptions); | ||
153 | #ifdef EXIT_DEBUG | 187 | #ifdef EXIT_DEBUG |
154 | printk(KERN_INFO "Queueing interrupt %x\n", vec); | 188 | printk(KERN_INFO "Queueing interrupt %x\n", vec); |
155 | #endif | 189 | #endif |
156 | } | 190 | } |
157 | 191 | ||
158 | 192 | ||
159 | void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) | 193 | void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) |
160 | { | 194 | { |
195 | to_book3s(vcpu)->prog_flags = flags; | ||
161 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM); | 196 | kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM); |
162 | } | 197 | } |
163 | 198 | ||
@@ -171,6 +206,11 @@ int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) | |||
171 | return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions); | 206 | return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions); |
172 | } | 207 | } |
173 | 208 | ||
209 | void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) | ||
210 | { | ||
211 | kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); | ||
212 | } | ||
213 | |||
174 | void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | 214 | void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, |
175 | struct kvm_interrupt *irq) | 215 | struct kvm_interrupt *irq) |
176 | { | 216 | { |
@@ -181,6 +221,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) | |||
181 | { | 221 | { |
182 | int deliver = 1; | 222 | int deliver = 1; |
183 | int vec = 0; | 223 | int vec = 0; |
224 | ulong flags = 0ULL; | ||
184 | 225 | ||
185 | switch (priority) { | 226 | switch (priority) { |
186 | case BOOK3S_IRQPRIO_DECREMENTER: | 227 | case BOOK3S_IRQPRIO_DECREMENTER: |
@@ -214,6 +255,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) | |||
214 | break; | 255 | break; |
215 | case BOOK3S_IRQPRIO_PROGRAM: | 256 | case BOOK3S_IRQPRIO_PROGRAM: |
216 | vec = BOOK3S_INTERRUPT_PROGRAM; | 257 | vec = BOOK3S_INTERRUPT_PROGRAM; |
258 | flags = to_book3s(vcpu)->prog_flags; | ||
217 | break; | 259 | break; |
218 | case BOOK3S_IRQPRIO_VSX: | 260 | case BOOK3S_IRQPRIO_VSX: |
219 | vec = BOOK3S_INTERRUPT_VSX; | 261 | vec = BOOK3S_INTERRUPT_VSX; |
@@ -244,7 +286,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) | |||
244 | #endif | 286 | #endif |
245 | 287 | ||
246 | if (deliver) | 288 | if (deliver) |
247 | kvmppc_inject_interrupt(vcpu, vec, 0ULL); | 289 | kvmppc_inject_interrupt(vcpu, vec, flags); |
248 | 290 | ||
249 | return deliver; | 291 | return deliver; |
250 | } | 292 | } |
@@ -254,21 +296,15 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu) | |||
254 | unsigned long *pending = &vcpu->arch.pending_exceptions; | 296 | unsigned long *pending = &vcpu->arch.pending_exceptions; |
255 | unsigned int priority; | 297 | unsigned int priority; |
256 | 298 | ||
257 | /* XXX be more clever here - no need to mftb() on every entry */ | ||
258 | /* Issue DEC again if it's still active */ | ||
259 | #ifdef AGGRESSIVE_DEC | ||
260 | if (vcpu->arch.msr & MSR_EE) | ||
261 | if (kvmppc_get_dec(vcpu) & 0x80000000) | ||
262 | kvmppc_core_queue_dec(vcpu); | ||
263 | #endif | ||
264 | |||
265 | #ifdef EXIT_DEBUG | 299 | #ifdef EXIT_DEBUG |
266 | if (vcpu->arch.pending_exceptions) | 300 | if (vcpu->arch.pending_exceptions) |
267 | printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); | 301 | printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions); |
268 | #endif | 302 | #endif |
269 | priority = __ffs(*pending); | 303 | priority = __ffs(*pending); |
270 | while (priority <= (sizeof(unsigned int) * 8)) { | 304 | while (priority <= (sizeof(unsigned int) * 8)) { |
271 | if (kvmppc_book3s_irqprio_deliver(vcpu, priority)) { | 305 | if (kvmppc_book3s_irqprio_deliver(vcpu, priority) && |
306 | (priority != BOOK3S_IRQPRIO_DECREMENTER)) { | ||
307 | /* DEC interrupts get cleared by mtdec */ | ||
272 | clear_bit(priority, &vcpu->arch.pending_exceptions); | 308 | clear_bit(priority, &vcpu->arch.pending_exceptions); |
273 | break; | 309 | break; |
274 | } | 310 | } |
@@ -503,14 +539,14 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
503 | /* Page not found in guest PTE entries */ | 539 | /* Page not found in guest PTE entries */ |
504 | vcpu->arch.dear = vcpu->arch.fault_dear; | 540 | vcpu->arch.dear = vcpu->arch.fault_dear; |
505 | to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; | 541 | to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr; |
506 | vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL); | 542 | vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL); |
507 | kvmppc_book3s_queue_irqprio(vcpu, vec); | 543 | kvmppc_book3s_queue_irqprio(vcpu, vec); |
508 | } else if (page_found == -EPERM) { | 544 | } else if (page_found == -EPERM) { |
509 | /* Storage protection */ | 545 | /* Storage protection */ |
510 | vcpu->arch.dear = vcpu->arch.fault_dear; | 546 | vcpu->arch.dear = vcpu->arch.fault_dear; |
511 | to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; | 547 | to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE; |
512 | to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; | 548 | to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT; |
513 | vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL); | 549 | vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL); |
514 | kvmppc_book3s_queue_irqprio(vcpu, vec); | 550 | kvmppc_book3s_queue_irqprio(vcpu, vec); |
515 | } else if (page_found == -EINVAL) { | 551 | } else if (page_found == -EINVAL) { |
516 | /* Page not found in guest SLB */ | 552 | /* Page not found in guest SLB */ |
@@ -532,13 +568,122 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
532 | r = kvmppc_emulate_mmio(run, vcpu); | 568 | r = kvmppc_emulate_mmio(run, vcpu); |
533 | if ( r == RESUME_HOST_NV ) | 569 | if ( r == RESUME_HOST_NV ) |
534 | r = RESUME_HOST; | 570 | r = RESUME_HOST; |
535 | if ( r == RESUME_GUEST_NV ) | ||
536 | r = RESUME_GUEST; | ||
537 | } | 571 | } |
538 | 572 | ||
539 | return r; | 573 | return r; |
540 | } | 574 | } |
541 | 575 | ||
576 | static inline int get_fpr_index(int i) | ||
577 | { | ||
578 | #ifdef CONFIG_VSX | ||
579 | i *= 2; | ||
580 | #endif | ||
581 | return i; | ||
582 | } | ||
583 | |||
584 | /* Give up external provider (FPU, Altivec, VSX) */ | ||
585 | static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) | ||
586 | { | ||
587 | struct thread_struct *t = ¤t->thread; | ||
588 | u64 *vcpu_fpr = vcpu->arch.fpr; | ||
589 | u64 *vcpu_vsx = vcpu->arch.vsr; | ||
590 | u64 *thread_fpr = (u64*)t->fpr; | ||
591 | int i; | ||
592 | |||
593 | if (!(vcpu->arch.guest_owned_ext & msr)) | ||
594 | return; | ||
595 | |||
596 | #ifdef DEBUG_EXT | ||
597 | printk(KERN_INFO "Giving up ext 0x%lx\n", msr); | ||
598 | #endif | ||
599 | |||
600 | switch (msr) { | ||
601 | case MSR_FP: | ||
602 | giveup_fpu(current); | ||
603 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) | ||
604 | vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; | ||
605 | |||
606 | vcpu->arch.fpscr = t->fpscr.val; | ||
607 | break; | ||
608 | case MSR_VEC: | ||
609 | #ifdef CONFIG_ALTIVEC | ||
610 | giveup_altivec(current); | ||
611 | memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); | ||
612 | vcpu->arch.vscr = t->vscr; | ||
613 | #endif | ||
614 | break; | ||
615 | case MSR_VSX: | ||
616 | #ifdef CONFIG_VSX | ||
617 | __giveup_vsx(current); | ||
618 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) | ||
619 | vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1]; | ||
620 | #endif | ||
621 | break; | ||
622 | default: | ||
623 | BUG(); | ||
624 | } | ||
625 | |||
626 | vcpu->arch.guest_owned_ext &= ~msr; | ||
627 | current->thread.regs->msr &= ~msr; | ||
628 | kvmppc_recalc_shadow_msr(vcpu); | ||
629 | } | ||
630 | |||
631 | /* Handle external providers (FPU, Altivec, VSX) */ | ||
632 | static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, | ||
633 | ulong msr) | ||
634 | { | ||
635 | struct thread_struct *t = ¤t->thread; | ||
636 | u64 *vcpu_fpr = vcpu->arch.fpr; | ||
637 | u64 *vcpu_vsx = vcpu->arch.vsr; | ||
638 | u64 *thread_fpr = (u64*)t->fpr; | ||
639 | int i; | ||
640 | |||
641 | if (!(vcpu->arch.msr & msr)) { | ||
642 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | ||
643 | return RESUME_GUEST; | ||
644 | } | ||
645 | |||
646 | #ifdef DEBUG_EXT | ||
647 | printk(KERN_INFO "Loading up ext 0x%lx\n", msr); | ||
648 | #endif | ||
649 | |||
650 | current->thread.regs->msr |= msr; | ||
651 | |||
652 | switch (msr) { | ||
653 | case MSR_FP: | ||
654 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) | ||
655 | thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; | ||
656 | |||
657 | t->fpscr.val = vcpu->arch.fpscr; | ||
658 | t->fpexc_mode = 0; | ||
659 | kvmppc_load_up_fpu(); | ||
660 | break; | ||
661 | case MSR_VEC: | ||
662 | #ifdef CONFIG_ALTIVEC | ||
663 | memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); | ||
664 | t->vscr = vcpu->arch.vscr; | ||
665 | t->vrsave = -1; | ||
666 | kvmppc_load_up_altivec(); | ||
667 | #endif | ||
668 | break; | ||
669 | case MSR_VSX: | ||
670 | #ifdef CONFIG_VSX | ||
671 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) | ||
672 | thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i]; | ||
673 | kvmppc_load_up_vsx(); | ||
674 | #endif | ||
675 | break; | ||
676 | default: | ||
677 | BUG(); | ||
678 | } | ||
679 | |||
680 | vcpu->arch.guest_owned_ext |= msr; | ||
681 | |||
682 | kvmppc_recalc_shadow_msr(vcpu); | ||
683 | |||
684 | return RESUME_GUEST; | ||
685 | } | ||
686 | |||
542 | int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | 687 | int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, |
543 | unsigned int exit_nr) | 688 | unsigned int exit_nr) |
544 | { | 689 | { |
@@ -563,7 +708,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
563 | case BOOK3S_INTERRUPT_INST_STORAGE: | 708 | case BOOK3S_INTERRUPT_INST_STORAGE: |
564 | vcpu->stat.pf_instruc++; | 709 | vcpu->stat.pf_instruc++; |
565 | /* only care about PTEG not found errors, but leave NX alone */ | 710 | /* only care about PTEG not found errors, but leave NX alone */ |
566 | if (vcpu->arch.shadow_msr & 0x40000000) { | 711 | if (vcpu->arch.shadow_srr1 & 0x40000000) { |
567 | r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr); | 712 | r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr); |
568 | vcpu->stat.sp_instruc++; | 713 | vcpu->stat.sp_instruc++; |
569 | } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && | 714 | } else if (vcpu->arch.mmu.is_dcbz32(vcpu) && |
@@ -575,7 +720,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
575 | */ | 720 | */ |
576 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); | 721 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); |
577 | } else { | 722 | } else { |
578 | vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x58000000); | 723 | vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000; |
579 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 724 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); |
580 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); | 725 | kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); |
581 | r = RESUME_GUEST; | 726 | r = RESUME_GUEST; |
@@ -621,6 +766,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
621 | case BOOK3S_INTERRUPT_PROGRAM: | 766 | case BOOK3S_INTERRUPT_PROGRAM: |
622 | { | 767 | { |
623 | enum emulation_result er; | 768 | enum emulation_result er; |
769 | ulong flags; | ||
770 | |||
771 | flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; | ||
624 | 772 | ||
625 | if (vcpu->arch.msr & MSR_PR) { | 773 | if (vcpu->arch.msr & MSR_PR) { |
626 | #ifdef EXIT_DEBUG | 774 | #ifdef EXIT_DEBUG |
@@ -628,7 +776,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
628 | #endif | 776 | #endif |
629 | if ((vcpu->arch.last_inst & 0xff0007ff) != | 777 | if ((vcpu->arch.last_inst & 0xff0007ff) != |
630 | (INS_DCBZ & 0xfffffff7)) { | 778 | (INS_DCBZ & 0xfffffff7)) { |
631 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 779 | kvmppc_core_queue_program(vcpu, flags); |
632 | r = RESUME_GUEST; | 780 | r = RESUME_GUEST; |
633 | break; | 781 | break; |
634 | } | 782 | } |
@@ -638,12 +786,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
638 | er = kvmppc_emulate_instruction(run, vcpu); | 786 | er = kvmppc_emulate_instruction(run, vcpu); |
639 | switch (er) { | 787 | switch (er) { |
640 | case EMULATE_DONE: | 788 | case EMULATE_DONE: |
641 | r = RESUME_GUEST; | 789 | r = RESUME_GUEST_NV; |
642 | break; | 790 | break; |
643 | case EMULATE_FAIL: | 791 | case EMULATE_FAIL: |
644 | printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", | 792 | printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", |
645 | __func__, vcpu->arch.pc, vcpu->arch.last_inst); | 793 | __func__, vcpu->arch.pc, vcpu->arch.last_inst); |
646 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 794 | kvmppc_core_queue_program(vcpu, flags); |
647 | r = RESUME_GUEST; | 795 | r = RESUME_GUEST; |
648 | break; | 796 | break; |
649 | default: | 797 | default: |
@@ -653,23 +801,30 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
653 | } | 801 | } |
654 | case BOOK3S_INTERRUPT_SYSCALL: | 802 | case BOOK3S_INTERRUPT_SYSCALL: |
655 | #ifdef EXIT_DEBUG | 803 | #ifdef EXIT_DEBUG |
656 | printk(KERN_INFO "Syscall Nr %d\n", (int)vcpu->arch.gpr[0]); | 804 | printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0)); |
657 | #endif | 805 | #endif |
658 | vcpu->stat.syscall_exits++; | 806 | vcpu->stat.syscall_exits++; |
659 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 807 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); |
660 | r = RESUME_GUEST; | 808 | r = RESUME_GUEST; |
661 | break; | 809 | break; |
662 | case BOOK3S_INTERRUPT_MACHINE_CHECK: | ||
663 | case BOOK3S_INTERRUPT_FP_UNAVAIL: | 810 | case BOOK3S_INTERRUPT_FP_UNAVAIL: |
664 | case BOOK3S_INTERRUPT_TRACE: | 811 | r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP); |
812 | break; | ||
665 | case BOOK3S_INTERRUPT_ALTIVEC: | 813 | case BOOK3S_INTERRUPT_ALTIVEC: |
814 | r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC); | ||
815 | break; | ||
666 | case BOOK3S_INTERRUPT_VSX: | 816 | case BOOK3S_INTERRUPT_VSX: |
817 | r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX); | ||
818 | break; | ||
819 | case BOOK3S_INTERRUPT_MACHINE_CHECK: | ||
820 | case BOOK3S_INTERRUPT_TRACE: | ||
667 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); | 821 | kvmppc_book3s_queue_irqprio(vcpu, exit_nr); |
668 | r = RESUME_GUEST; | 822 | r = RESUME_GUEST; |
669 | break; | 823 | break; |
670 | default: | 824 | default: |
671 | /* Ugh - bork here! What did we get? */ | 825 | /* Ugh - bork here! What did we get? */ |
672 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", exit_nr, vcpu->arch.pc, vcpu->arch.shadow_msr); | 826 | printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", |
827 | exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1); | ||
673 | r = RESUME_HOST; | 828 | r = RESUME_HOST; |
674 | BUG(); | 829 | BUG(); |
675 | break; | 830 | break; |
@@ -712,10 +867,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
712 | int i; | 867 | int i; |
713 | 868 | ||
714 | regs->pc = vcpu->arch.pc; | 869 | regs->pc = vcpu->arch.pc; |
715 | regs->cr = vcpu->arch.cr; | 870 | regs->cr = kvmppc_get_cr(vcpu); |
716 | regs->ctr = vcpu->arch.ctr; | 871 | regs->ctr = vcpu->arch.ctr; |
717 | regs->lr = vcpu->arch.lr; | 872 | regs->lr = vcpu->arch.lr; |
718 | regs->xer = vcpu->arch.xer; | 873 | regs->xer = kvmppc_get_xer(vcpu); |
719 | regs->msr = vcpu->arch.msr; | 874 | regs->msr = vcpu->arch.msr; |
720 | regs->srr0 = vcpu->arch.srr0; | 875 | regs->srr0 = vcpu->arch.srr0; |
721 | regs->srr1 = vcpu->arch.srr1; | 876 | regs->srr1 = vcpu->arch.srr1; |
@@ -729,7 +884,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
729 | regs->sprg7 = vcpu->arch.sprg6; | 884 | regs->sprg7 = vcpu->arch.sprg6; |
730 | 885 | ||
731 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 886 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
732 | regs->gpr[i] = vcpu->arch.gpr[i]; | 887 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); |
733 | 888 | ||
734 | return 0; | 889 | return 0; |
735 | } | 890 | } |
@@ -739,10 +894,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
739 | int i; | 894 | int i; |
740 | 895 | ||
741 | vcpu->arch.pc = regs->pc; | 896 | vcpu->arch.pc = regs->pc; |
742 | vcpu->arch.cr = regs->cr; | 897 | kvmppc_set_cr(vcpu, regs->cr); |
743 | vcpu->arch.ctr = regs->ctr; | 898 | vcpu->arch.ctr = regs->ctr; |
744 | vcpu->arch.lr = regs->lr; | 899 | vcpu->arch.lr = regs->lr; |
745 | vcpu->arch.xer = regs->xer; | 900 | kvmppc_set_xer(vcpu, regs->xer); |
746 | kvmppc_set_msr(vcpu, regs->msr); | 901 | kvmppc_set_msr(vcpu, regs->msr); |
747 | vcpu->arch.srr0 = regs->srr0; | 902 | vcpu->arch.srr0 = regs->srr0; |
748 | vcpu->arch.srr1 = regs->srr1; | 903 | vcpu->arch.srr1 = regs->srr1; |
@@ -754,8 +909,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
754 | vcpu->arch.sprg6 = regs->sprg5; | 909 | vcpu->arch.sprg6 = regs->sprg5; |
755 | vcpu->arch.sprg7 = regs->sprg6; | 910 | vcpu->arch.sprg7 = regs->sprg6; |
756 | 911 | ||
757 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++) | 912 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
758 | vcpu->arch.gpr[i] = regs->gpr[i]; | 913 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); |
759 | 914 | ||
760 | return 0; | 915 | return 0; |
761 | } | 916 | } |
@@ -850,7 +1005,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
850 | int is_dirty = 0; | 1005 | int is_dirty = 0; |
851 | int r, n; | 1006 | int r, n; |
852 | 1007 | ||
853 | down_write(&kvm->slots_lock); | 1008 | mutex_lock(&kvm->slots_lock); |
854 | 1009 | ||
855 | r = kvm_get_dirty_log(kvm, log, &is_dirty); | 1010 | r = kvm_get_dirty_log(kvm, log, &is_dirty); |
856 | if (r) | 1011 | if (r) |
@@ -858,7 +1013,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
858 | 1013 | ||
859 | /* If nothing is dirty, don't bother messing with page tables. */ | 1014 | /* If nothing is dirty, don't bother messing with page tables. */ |
860 | if (is_dirty) { | 1015 | if (is_dirty) { |
861 | memslot = &kvm->memslots[log->slot]; | 1016 | memslot = &kvm->memslots->memslots[log->slot]; |
862 | 1017 | ||
863 | ga = memslot->base_gfn << PAGE_SHIFT; | 1018 | ga = memslot->base_gfn << PAGE_SHIFT; |
864 | ga_end = ga + (memslot->npages << PAGE_SHIFT); | 1019 | ga_end = ga + (memslot->npages << PAGE_SHIFT); |
@@ -872,7 +1027,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
872 | 1027 | ||
873 | r = 0; | 1028 | r = 0; |
874 | out: | 1029 | out: |
875 | up_write(&kvm->slots_lock); | 1030 | mutex_unlock(&kvm->slots_lock); |
876 | return r; | 1031 | return r; |
877 | } | 1032 | } |
878 | 1033 | ||
@@ -910,6 +1065,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) | |||
910 | vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; | 1065 | vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem; |
911 | vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; | 1066 | vcpu->arch.trampoline_enter = kvmppc_trampoline_enter; |
912 | vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; | 1067 | vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem; |
1068 | vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall; | ||
913 | 1069 | ||
914 | vcpu->arch.shadow_msr = MSR_USER64; | 1070 | vcpu->arch.shadow_msr = MSR_USER64; |
915 | 1071 | ||
@@ -943,6 +1099,10 @@ extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); | |||
943 | int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 1099 | int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) |
944 | { | 1100 | { |
945 | int ret; | 1101 | int ret; |
1102 | struct thread_struct ext_bkp; | ||
1103 | bool save_vec = current->thread.used_vr; | ||
1104 | bool save_vsx = current->thread.used_vsr; | ||
1105 | ulong ext_msr; | ||
946 | 1106 | ||
947 | /* No need to go into the guest when all we do is going out */ | 1107 | /* No need to go into the guest when all we do is going out */ |
948 | if (signal_pending(current)) { | 1108 | if (signal_pending(current)) { |
@@ -950,6 +1110,35 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
950 | return -EINTR; | 1110 | return -EINTR; |
951 | } | 1111 | } |
952 | 1112 | ||
1113 | /* Save FPU state in stack */ | ||
1114 | if (current->thread.regs->msr & MSR_FP) | ||
1115 | giveup_fpu(current); | ||
1116 | memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr)); | ||
1117 | ext_bkp.fpscr = current->thread.fpscr; | ||
1118 | ext_bkp.fpexc_mode = current->thread.fpexc_mode; | ||
1119 | |||
1120 | #ifdef CONFIG_ALTIVEC | ||
1121 | /* Save Altivec state in stack */ | ||
1122 | if (save_vec) { | ||
1123 | if (current->thread.regs->msr & MSR_VEC) | ||
1124 | giveup_altivec(current); | ||
1125 | memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr)); | ||
1126 | ext_bkp.vscr = current->thread.vscr; | ||
1127 | ext_bkp.vrsave = current->thread.vrsave; | ||
1128 | } | ||
1129 | ext_bkp.used_vr = current->thread.used_vr; | ||
1130 | #endif | ||
1131 | |||
1132 | #ifdef CONFIG_VSX | ||
1133 | /* Save VSX state in stack */ | ||
1134 | if (save_vsx && (current->thread.regs->msr & MSR_VSX)) | ||
1135 | __giveup_vsx(current); | ||
1136 | ext_bkp.used_vsr = current->thread.used_vsr; | ||
1137 | #endif | ||
1138 | |||
1139 | /* Remember the MSR with disabled extensions */ | ||
1140 | ext_msr = current->thread.regs->msr; | ||
1141 | |||
953 | /* XXX we get called with irq disabled - change that! */ | 1142 | /* XXX we get called with irq disabled - change that! */ |
954 | local_irq_enable(); | 1143 | local_irq_enable(); |
955 | 1144 | ||
@@ -957,6 +1146,32 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
957 | 1146 | ||
958 | local_irq_disable(); | 1147 | local_irq_disable(); |
959 | 1148 | ||
1149 | current->thread.regs->msr = ext_msr; | ||
1150 | |||
1151 | /* Make sure we save the guest FPU/Altivec/VSX state */ | ||
1152 | kvmppc_giveup_ext(vcpu, MSR_FP); | ||
1153 | kvmppc_giveup_ext(vcpu, MSR_VEC); | ||
1154 | kvmppc_giveup_ext(vcpu, MSR_VSX); | ||
1155 | |||
1156 | /* Restore FPU state from stack */ | ||
1157 | memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr)); | ||
1158 | current->thread.fpscr = ext_bkp.fpscr; | ||
1159 | current->thread.fpexc_mode = ext_bkp.fpexc_mode; | ||
1160 | |||
1161 | #ifdef CONFIG_ALTIVEC | ||
1162 | /* Restore Altivec state from stack */ | ||
1163 | if (save_vec && current->thread.used_vr) { | ||
1164 | memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr)); | ||
1165 | current->thread.vscr = ext_bkp.vscr; | ||
1166 | current->thread.vrsave= ext_bkp.vrsave; | ||
1167 | } | ||
1168 | current->thread.used_vr = ext_bkp.used_vr; | ||
1169 | #endif | ||
1170 | |||
1171 | #ifdef CONFIG_VSX | ||
1172 | current->thread.used_vsr = ext_bkp.used_vsr; | ||
1173 | #endif | ||
1174 | |||
960 | return ret; | 1175 | return ret; |
961 | } | 1176 | } |
962 | 1177 | ||
diff --git a/arch/powerpc/kvm/book3s_64_emulate.c b/arch/powerpc/kvm/book3s_64_emulate.c index 1027eac6d474..2b0ee7e040c9 100644 --- a/arch/powerpc/kvm/book3s_64_emulate.c +++ b/arch/powerpc/kvm/book3s_64_emulate.c | |||
@@ -65,11 +65,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
65 | case 31: | 65 | case 31: |
66 | switch (get_xop(inst)) { | 66 | switch (get_xop(inst)) { |
67 | case OP_31_XOP_MFMSR: | 67 | case OP_31_XOP_MFMSR: |
68 | vcpu->arch.gpr[get_rt(inst)] = vcpu->arch.msr; | 68 | kvmppc_set_gpr(vcpu, get_rt(inst), vcpu->arch.msr); |
69 | break; | 69 | break; |
70 | case OP_31_XOP_MTMSRD: | 70 | case OP_31_XOP_MTMSRD: |
71 | { | 71 | { |
72 | ulong rs = vcpu->arch.gpr[get_rs(inst)]; | 72 | ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst)); |
73 | if (inst & 0x10000) { | 73 | if (inst & 0x10000) { |
74 | vcpu->arch.msr &= ~(MSR_RI | MSR_EE); | 74 | vcpu->arch.msr &= ~(MSR_RI | MSR_EE); |
75 | vcpu->arch.msr |= rs & (MSR_RI | MSR_EE); | 75 | vcpu->arch.msr |= rs & (MSR_RI | MSR_EE); |
@@ -78,30 +78,30 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
78 | break; | 78 | break; |
79 | } | 79 | } |
80 | case OP_31_XOP_MTMSR: | 80 | case OP_31_XOP_MTMSR: |
81 | kvmppc_set_msr(vcpu, vcpu->arch.gpr[get_rs(inst)]); | 81 | kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst))); |
82 | break; | 82 | break; |
83 | case OP_31_XOP_MFSRIN: | 83 | case OP_31_XOP_MFSRIN: |
84 | { | 84 | { |
85 | int srnum; | 85 | int srnum; |
86 | 86 | ||
87 | srnum = (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf; | 87 | srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf; |
88 | if (vcpu->arch.mmu.mfsrin) { | 88 | if (vcpu->arch.mmu.mfsrin) { |
89 | u32 sr; | 89 | u32 sr; |
90 | sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); | 90 | sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); |
91 | vcpu->arch.gpr[get_rt(inst)] = sr; | 91 | kvmppc_set_gpr(vcpu, get_rt(inst), sr); |
92 | } | 92 | } |
93 | break; | 93 | break; |
94 | } | 94 | } |
95 | case OP_31_XOP_MTSRIN: | 95 | case OP_31_XOP_MTSRIN: |
96 | vcpu->arch.mmu.mtsrin(vcpu, | 96 | vcpu->arch.mmu.mtsrin(vcpu, |
97 | (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf, | 97 | (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf, |
98 | vcpu->arch.gpr[get_rs(inst)]); | 98 | kvmppc_get_gpr(vcpu, get_rs(inst))); |
99 | break; | 99 | break; |
100 | case OP_31_XOP_TLBIE: | 100 | case OP_31_XOP_TLBIE: |
101 | case OP_31_XOP_TLBIEL: | 101 | case OP_31_XOP_TLBIEL: |
102 | { | 102 | { |
103 | bool large = (inst & 0x00200000) ? true : false; | 103 | bool large = (inst & 0x00200000) ? true : false; |
104 | ulong addr = vcpu->arch.gpr[get_rb(inst)]; | 104 | ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst)); |
105 | vcpu->arch.mmu.tlbie(vcpu, addr, large); | 105 | vcpu->arch.mmu.tlbie(vcpu, addr, large); |
106 | break; | 106 | break; |
107 | } | 107 | } |
@@ -111,14 +111,16 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
111 | if (!vcpu->arch.mmu.slbmte) | 111 | if (!vcpu->arch.mmu.slbmte) |
112 | return EMULATE_FAIL; | 112 | return EMULATE_FAIL; |
113 | 113 | ||
114 | vcpu->arch.mmu.slbmte(vcpu, vcpu->arch.gpr[get_rs(inst)], | 114 | vcpu->arch.mmu.slbmte(vcpu, |
115 | vcpu->arch.gpr[get_rb(inst)]); | 115 | kvmppc_get_gpr(vcpu, get_rs(inst)), |
116 | kvmppc_get_gpr(vcpu, get_rb(inst))); | ||
116 | break; | 117 | break; |
117 | case OP_31_XOP_SLBIE: | 118 | case OP_31_XOP_SLBIE: |
118 | if (!vcpu->arch.mmu.slbie) | 119 | if (!vcpu->arch.mmu.slbie) |
119 | return EMULATE_FAIL; | 120 | return EMULATE_FAIL; |
120 | 121 | ||
121 | vcpu->arch.mmu.slbie(vcpu, vcpu->arch.gpr[get_rb(inst)]); | 122 | vcpu->arch.mmu.slbie(vcpu, |
123 | kvmppc_get_gpr(vcpu, get_rb(inst))); | ||
122 | break; | 124 | break; |
123 | case OP_31_XOP_SLBIA: | 125 | case OP_31_XOP_SLBIA: |
124 | if (!vcpu->arch.mmu.slbia) | 126 | if (!vcpu->arch.mmu.slbia) |
@@ -132,9 +134,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
132 | } else { | 134 | } else { |
133 | ulong t, rb; | 135 | ulong t, rb; |
134 | 136 | ||
135 | rb = vcpu->arch.gpr[get_rb(inst)]; | 137 | rb = kvmppc_get_gpr(vcpu, get_rb(inst)); |
136 | t = vcpu->arch.mmu.slbmfee(vcpu, rb); | 138 | t = vcpu->arch.mmu.slbmfee(vcpu, rb); |
137 | vcpu->arch.gpr[get_rt(inst)] = t; | 139 | kvmppc_set_gpr(vcpu, get_rt(inst), t); |
138 | } | 140 | } |
139 | break; | 141 | break; |
140 | case OP_31_XOP_SLBMFEV: | 142 | case OP_31_XOP_SLBMFEV: |
@@ -143,20 +145,20 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
143 | } else { | 145 | } else { |
144 | ulong t, rb; | 146 | ulong t, rb; |
145 | 147 | ||
146 | rb = vcpu->arch.gpr[get_rb(inst)]; | 148 | rb = kvmppc_get_gpr(vcpu, get_rb(inst)); |
147 | t = vcpu->arch.mmu.slbmfev(vcpu, rb); | 149 | t = vcpu->arch.mmu.slbmfev(vcpu, rb); |
148 | vcpu->arch.gpr[get_rt(inst)] = t; | 150 | kvmppc_set_gpr(vcpu, get_rt(inst), t); |
149 | } | 151 | } |
150 | break; | 152 | break; |
151 | case OP_31_XOP_DCBZ: | 153 | case OP_31_XOP_DCBZ: |
152 | { | 154 | { |
153 | ulong rb = vcpu->arch.gpr[get_rb(inst)]; | 155 | ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst)); |
154 | ulong ra = 0; | 156 | ulong ra = 0; |
155 | ulong addr; | 157 | ulong addr; |
156 | u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; | 158 | u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; |
157 | 159 | ||
158 | if (get_ra(inst)) | 160 | if (get_ra(inst)) |
159 | ra = vcpu->arch.gpr[get_ra(inst)]; | 161 | ra = kvmppc_get_gpr(vcpu, get_ra(inst)); |
160 | 162 | ||
161 | addr = (ra + rb) & ~31ULL; | 163 | addr = (ra + rb) & ~31ULL; |
162 | if (!(vcpu->arch.msr & MSR_SF)) | 164 | if (!(vcpu->arch.msr & MSR_SF)) |
@@ -233,43 +235,44 @@ static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) | |||
233 | int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | 235 | int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) |
234 | { | 236 | { |
235 | int emulated = EMULATE_DONE; | 237 | int emulated = EMULATE_DONE; |
238 | ulong spr_val = kvmppc_get_gpr(vcpu, rs); | ||
236 | 239 | ||
237 | switch (sprn) { | 240 | switch (sprn) { |
238 | case SPRN_SDR1: | 241 | case SPRN_SDR1: |
239 | to_book3s(vcpu)->sdr1 = vcpu->arch.gpr[rs]; | 242 | to_book3s(vcpu)->sdr1 = spr_val; |
240 | break; | 243 | break; |
241 | case SPRN_DSISR: | 244 | case SPRN_DSISR: |
242 | to_book3s(vcpu)->dsisr = vcpu->arch.gpr[rs]; | 245 | to_book3s(vcpu)->dsisr = spr_val; |
243 | break; | 246 | break; |
244 | case SPRN_DAR: | 247 | case SPRN_DAR: |
245 | vcpu->arch.dear = vcpu->arch.gpr[rs]; | 248 | vcpu->arch.dear = spr_val; |
246 | break; | 249 | break; |
247 | case SPRN_HIOR: | 250 | case SPRN_HIOR: |
248 | to_book3s(vcpu)->hior = vcpu->arch.gpr[rs]; | 251 | to_book3s(vcpu)->hior = spr_val; |
249 | break; | 252 | break; |
250 | case SPRN_IBAT0U ... SPRN_IBAT3L: | 253 | case SPRN_IBAT0U ... SPRN_IBAT3L: |
251 | case SPRN_IBAT4U ... SPRN_IBAT7L: | 254 | case SPRN_IBAT4U ... SPRN_IBAT7L: |
252 | case SPRN_DBAT0U ... SPRN_DBAT3L: | 255 | case SPRN_DBAT0U ... SPRN_DBAT3L: |
253 | case SPRN_DBAT4U ... SPRN_DBAT7L: | 256 | case SPRN_DBAT4U ... SPRN_DBAT7L: |
254 | kvmppc_write_bat(vcpu, sprn, (u32)vcpu->arch.gpr[rs]); | 257 | kvmppc_write_bat(vcpu, sprn, (u32)spr_val); |
255 | /* BAT writes happen so rarely that we're ok to flush | 258 | /* BAT writes happen so rarely that we're ok to flush |
256 | * everything here */ | 259 | * everything here */ |
257 | kvmppc_mmu_pte_flush(vcpu, 0, 0); | 260 | kvmppc_mmu_pte_flush(vcpu, 0, 0); |
258 | break; | 261 | break; |
259 | case SPRN_HID0: | 262 | case SPRN_HID0: |
260 | to_book3s(vcpu)->hid[0] = vcpu->arch.gpr[rs]; | 263 | to_book3s(vcpu)->hid[0] = spr_val; |
261 | break; | 264 | break; |
262 | case SPRN_HID1: | 265 | case SPRN_HID1: |
263 | to_book3s(vcpu)->hid[1] = vcpu->arch.gpr[rs]; | 266 | to_book3s(vcpu)->hid[1] = spr_val; |
264 | break; | 267 | break; |
265 | case SPRN_HID2: | 268 | case SPRN_HID2: |
266 | to_book3s(vcpu)->hid[2] = vcpu->arch.gpr[rs]; | 269 | to_book3s(vcpu)->hid[2] = spr_val; |
267 | break; | 270 | break; |
268 | case SPRN_HID4: | 271 | case SPRN_HID4: |
269 | to_book3s(vcpu)->hid[4] = vcpu->arch.gpr[rs]; | 272 | to_book3s(vcpu)->hid[4] = spr_val; |
270 | break; | 273 | break; |
271 | case SPRN_HID5: | 274 | case SPRN_HID5: |
272 | to_book3s(vcpu)->hid[5] = vcpu->arch.gpr[rs]; | 275 | to_book3s(vcpu)->hid[5] = spr_val; |
273 | /* guest HID5 set can change is_dcbz32 */ | 276 | /* guest HID5 set can change is_dcbz32 */ |
274 | if (vcpu->arch.mmu.is_dcbz32(vcpu) && | 277 | if (vcpu->arch.mmu.is_dcbz32(vcpu) && |
275 | (mfmsr() & MSR_HV)) | 278 | (mfmsr() & MSR_HV)) |
@@ -299,38 +302,38 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
299 | 302 | ||
300 | switch (sprn) { | 303 | switch (sprn) { |
301 | case SPRN_SDR1: | 304 | case SPRN_SDR1: |
302 | vcpu->arch.gpr[rt] = to_book3s(vcpu)->sdr1; | 305 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); |
303 | break; | 306 | break; |
304 | case SPRN_DSISR: | 307 | case SPRN_DSISR: |
305 | vcpu->arch.gpr[rt] = to_book3s(vcpu)->dsisr; | 308 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->dsisr); |
306 | break; | 309 | break; |
307 | case SPRN_DAR: | 310 | case SPRN_DAR: |
308 | vcpu->arch.gpr[rt] = vcpu->arch.dear; | 311 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); |
309 | break; | 312 | break; |
310 | case SPRN_HIOR: | 313 | case SPRN_HIOR: |
311 | vcpu->arch.gpr[rt] = to_book3s(vcpu)->hior; | 314 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior); |
312 | break; | 315 | break; |
313 | case SPRN_HID0: | 316 | case SPRN_HID0: |
314 | vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[0]; | 317 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]); |
315 | break; | 318 | break; |
316 | case SPRN_HID1: | 319 | case SPRN_HID1: |
317 | vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[1]; | 320 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]); |
318 | break; | 321 | break; |
319 | case SPRN_HID2: | 322 | case SPRN_HID2: |
320 | vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[2]; | 323 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]); |
321 | break; | 324 | break; |
322 | case SPRN_HID4: | 325 | case SPRN_HID4: |
323 | vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[4]; | 326 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]); |
324 | break; | 327 | break; |
325 | case SPRN_HID5: | 328 | case SPRN_HID5: |
326 | vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[5]; | 329 | kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); |
327 | break; | 330 | break; |
328 | case SPRN_THRM1: | 331 | case SPRN_THRM1: |
329 | case SPRN_THRM2: | 332 | case SPRN_THRM2: |
330 | case SPRN_THRM3: | 333 | case SPRN_THRM3: |
331 | case SPRN_CTRLF: | 334 | case SPRN_CTRLF: |
332 | case SPRN_CTRLT: | 335 | case SPRN_CTRLT: |
333 | vcpu->arch.gpr[rt] = 0; | 336 | kvmppc_set_gpr(vcpu, rt, 0); |
334 | break; | 337 | break; |
335 | default: | 338 | default: |
336 | printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); | 339 | printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); |
diff --git a/arch/powerpc/kvm/book3s_64_exports.c b/arch/powerpc/kvm/book3s_64_exports.c index 5b2db38ed86c..1dd5a1ddfd0d 100644 --- a/arch/powerpc/kvm/book3s_64_exports.c +++ b/arch/powerpc/kvm/book3s_64_exports.c | |||
@@ -22,3 +22,11 @@ | |||
22 | 22 | ||
23 | EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter); | 23 | EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter); |
24 | EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem); | 24 | EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem); |
25 | EXPORT_SYMBOL_GPL(kvmppc_rmcall); | ||
26 | EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); | ||
27 | #ifdef CONFIG_ALTIVEC | ||
28 | EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec); | ||
29 | #endif | ||
30 | #ifdef CONFIG_VSX | ||
31 | EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx); | ||
32 | #endif | ||
diff --git a/arch/powerpc/kvm/book3s_64_interrupts.S b/arch/powerpc/kvm/book3s_64_interrupts.S index 7b55d8094c8b..c1584d0cbce8 100644 --- a/arch/powerpc/kvm/book3s_64_interrupts.S +++ b/arch/powerpc/kvm/book3s_64_interrupts.S | |||
@@ -28,11 +28,6 @@ | |||
28 | #define ULONG_SIZE 8 | 28 | #define ULONG_SIZE 8 |
29 | #define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE)) | 29 | #define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE)) |
30 | 30 | ||
31 | .macro mfpaca tmp_reg, src_reg, offset, vcpu_reg | ||
32 | ld \tmp_reg, (PACA_EXMC+\offset)(r13) | ||
33 | std \tmp_reg, VCPU_GPR(\src_reg)(\vcpu_reg) | ||
34 | .endm | ||
35 | |||
36 | .macro DISABLE_INTERRUPTS | 31 | .macro DISABLE_INTERRUPTS |
37 | mfmsr r0 | 32 | mfmsr r0 |
38 | rldicl r0,r0,48,1 | 33 | rldicl r0,r0,48,1 |
@@ -40,6 +35,26 @@ | |||
40 | mtmsrd r0,1 | 35 | mtmsrd r0,1 |
41 | .endm | 36 | .endm |
42 | 37 | ||
38 | #define VCPU_LOAD_NVGPRS(vcpu) \ | ||
39 | ld r14, VCPU_GPR(r14)(vcpu); \ | ||
40 | ld r15, VCPU_GPR(r15)(vcpu); \ | ||
41 | ld r16, VCPU_GPR(r16)(vcpu); \ | ||
42 | ld r17, VCPU_GPR(r17)(vcpu); \ | ||
43 | ld r18, VCPU_GPR(r18)(vcpu); \ | ||
44 | ld r19, VCPU_GPR(r19)(vcpu); \ | ||
45 | ld r20, VCPU_GPR(r20)(vcpu); \ | ||
46 | ld r21, VCPU_GPR(r21)(vcpu); \ | ||
47 | ld r22, VCPU_GPR(r22)(vcpu); \ | ||
48 | ld r23, VCPU_GPR(r23)(vcpu); \ | ||
49 | ld r24, VCPU_GPR(r24)(vcpu); \ | ||
50 | ld r25, VCPU_GPR(r25)(vcpu); \ | ||
51 | ld r26, VCPU_GPR(r26)(vcpu); \ | ||
52 | ld r27, VCPU_GPR(r27)(vcpu); \ | ||
53 | ld r28, VCPU_GPR(r28)(vcpu); \ | ||
54 | ld r29, VCPU_GPR(r29)(vcpu); \ | ||
55 | ld r30, VCPU_GPR(r30)(vcpu); \ | ||
56 | ld r31, VCPU_GPR(r31)(vcpu); \ | ||
57 | |||
43 | /***************************************************************************** | 58 | /***************************************************************************** |
44 | * * | 59 | * * |
45 | * Guest entry / exit code that is in kernel module memory (highmem) * | 60 | * Guest entry / exit code that is in kernel module memory (highmem) * |
@@ -67,61 +82,32 @@ kvm_start_entry: | |||
67 | SAVE_NVGPRS(r1) | 82 | SAVE_NVGPRS(r1) |
68 | 83 | ||
69 | /* Save LR */ | 84 | /* Save LR */ |
70 | mflr r14 | 85 | std r0, _LINK(r1) |
71 | std r14, _LINK(r1) | ||
72 | |||
73 | /* XXX optimize non-volatile loading away */ | ||
74 | kvm_start_lightweight: | ||
75 | 86 | ||
76 | DISABLE_INTERRUPTS | 87 | /* Load non-volatile guest state from the vcpu */ |
88 | VCPU_LOAD_NVGPRS(r4) | ||
77 | 89 | ||
78 | /* Save R1/R2 in the PACA */ | 90 | /* Save R1/R2 in the PACA */ |
79 | std r1, PACAR1(r13) | 91 | std r1, PACA_KVM_HOST_R1(r13) |
80 | std r2, (PACA_EXMC+EX_SRR0)(r13) | 92 | std r2, PACA_KVM_HOST_R2(r13) |
93 | |||
94 | /* XXX swap in/out on load? */ | ||
81 | ld r3, VCPU_HIGHMEM_HANDLER(r4) | 95 | ld r3, VCPU_HIGHMEM_HANDLER(r4) |
82 | std r3, PACASAVEDMSR(r13) | 96 | std r3, PACA_KVM_VMHANDLER(r13) |
83 | 97 | ||
84 | /* Load non-volatile guest state from the vcpu */ | 98 | kvm_start_lightweight: |
85 | ld r14, VCPU_GPR(r14)(r4) | ||
86 | ld r15, VCPU_GPR(r15)(r4) | ||
87 | ld r16, VCPU_GPR(r16)(r4) | ||
88 | ld r17, VCPU_GPR(r17)(r4) | ||
89 | ld r18, VCPU_GPR(r18)(r4) | ||
90 | ld r19, VCPU_GPR(r19)(r4) | ||
91 | ld r20, VCPU_GPR(r20)(r4) | ||
92 | ld r21, VCPU_GPR(r21)(r4) | ||
93 | ld r22, VCPU_GPR(r22)(r4) | ||
94 | ld r23, VCPU_GPR(r23)(r4) | ||
95 | ld r24, VCPU_GPR(r24)(r4) | ||
96 | ld r25, VCPU_GPR(r25)(r4) | ||
97 | ld r26, VCPU_GPR(r26)(r4) | ||
98 | ld r27, VCPU_GPR(r27)(r4) | ||
99 | ld r28, VCPU_GPR(r28)(r4) | ||
100 | ld r29, VCPU_GPR(r29)(r4) | ||
101 | ld r30, VCPU_GPR(r30)(r4) | ||
102 | ld r31, VCPU_GPR(r31)(r4) | ||
103 | 99 | ||
104 | ld r9, VCPU_PC(r4) /* r9 = vcpu->arch.pc */ | 100 | ld r9, VCPU_PC(r4) /* r9 = vcpu->arch.pc */ |
105 | ld r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ | 101 | ld r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */ |
106 | 102 | ||
107 | ld r3, VCPU_TRAMPOLINE_ENTER(r4) | 103 | /* Load some guest state in the respective registers */ |
108 | mtsrr0 r3 | 104 | ld r5, VCPU_CTR(r4) /* r5 = vcpu->arch.ctr */ |
109 | 105 | /* will be swapped in by rmcall */ | |
110 | LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)) | ||
111 | mtsrr1 r3 | ||
112 | |||
113 | /* Load guest state in the respective registers */ | ||
114 | lwz r3, VCPU_CR(r4) /* r3 = vcpu->arch.cr */ | ||
115 | stw r3, (PACA_EXMC + EX_CCR)(r13) | ||
116 | |||
117 | ld r3, VCPU_CTR(r4) /* r3 = vcpu->arch.ctr */ | ||
118 | mtctr r3 /* CTR = r3 */ | ||
119 | 106 | ||
120 | ld r3, VCPU_LR(r4) /* r3 = vcpu->arch.lr */ | 107 | ld r3, VCPU_LR(r4) /* r3 = vcpu->arch.lr */ |
121 | mtlr r3 /* LR = r3 */ | 108 | mtlr r3 /* LR = r3 */ |
122 | 109 | ||
123 | ld r3, VCPU_XER(r4) /* r3 = vcpu->arch.xer */ | 110 | DISABLE_INTERRUPTS |
124 | std r3, (PACA_EXMC + EX_R3)(r13) | ||
125 | 111 | ||
126 | /* Some guests may need to have dcbz set to 32 byte length. | 112 | /* Some guests may need to have dcbz set to 32 byte length. |
127 | * | 113 | * |
@@ -141,36 +127,15 @@ kvm_start_lightweight: | |||
141 | mtspr SPRN_HID5,r3 | 127 | mtspr SPRN_HID5,r3 |
142 | 128 | ||
143 | no_dcbz32_on: | 129 | no_dcbz32_on: |
144 | /* Load guest GPRs */ | 130 | |
145 | 131 | ld r6, VCPU_RMCALL(r4) | |
146 | ld r3, VCPU_GPR(r9)(r4) | 132 | mtctr r6 |
147 | std r3, (PACA_EXMC + EX_R9)(r13) | 133 | |
148 | ld r3, VCPU_GPR(r10)(r4) | 134 | ld r3, VCPU_TRAMPOLINE_ENTER(r4) |
149 | std r3, (PACA_EXMC + EX_R10)(r13) | 135 | LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR)) |
150 | ld r3, VCPU_GPR(r11)(r4) | ||
151 | std r3, (PACA_EXMC + EX_R11)(r13) | ||
152 | ld r3, VCPU_GPR(r12)(r4) | ||
153 | std r3, (PACA_EXMC + EX_R12)(r13) | ||
154 | ld r3, VCPU_GPR(r13)(r4) | ||
155 | std r3, (PACA_EXMC + EX_R13)(r13) | ||
156 | |||
157 | ld r0, VCPU_GPR(r0)(r4) | ||
158 | ld r1, VCPU_GPR(r1)(r4) | ||
159 | ld r2, VCPU_GPR(r2)(r4) | ||
160 | ld r3, VCPU_GPR(r3)(r4) | ||
161 | ld r5, VCPU_GPR(r5)(r4) | ||
162 | ld r6, VCPU_GPR(r6)(r4) | ||
163 | ld r7, VCPU_GPR(r7)(r4) | ||
164 | ld r8, VCPU_GPR(r8)(r4) | ||
165 | ld r4, VCPU_GPR(r4)(r4) | ||
166 | |||
167 | /* This sets the Magic value for the trampoline */ | ||
168 | |||
169 | li r11, 1 | ||
170 | stb r11, PACA_KVM_IN_GUEST(r13) | ||
171 | 136 | ||
172 | /* Jump to SLB patching handlder and into our guest */ | 137 | /* Jump to SLB patching handlder and into our guest */ |
173 | RFI | 138 | bctr |
174 | 139 | ||
175 | /* | 140 | /* |
176 | * This is the handler in module memory. It gets jumped at from the | 141 | * This is the handler in module memory. It gets jumped at from the |
@@ -184,125 +149,70 @@ kvmppc_handler_highmem: | |||
184 | /* | 149 | /* |
185 | * Register usage at this point: | 150 | * Register usage at this point: |
186 | * | 151 | * |
187 | * R00 = guest R13 | 152 | * R0 = guest last inst |
188 | * R01 = host R1 | 153 | * R1 = host R1 |
189 | * R02 = host R2 | 154 | * R2 = host R2 |
190 | * R10 = guest PC | 155 | * R3 = guest PC |
191 | * R11 = guest MSR | 156 | * R4 = guest MSR |
192 | * R12 = exit handler id | 157 | * R5 = guest DAR |
193 | * R13 = PACA | 158 | * R6 = guest DSISR |
194 | * PACA.exmc.R9 = guest R1 | 159 | * R13 = PACA |
195 | * PACA.exmc.R10 = guest R10 | 160 | * PACA.KVM.* = guest * |
196 | * PACA.exmc.R11 = guest R11 | ||
197 | * PACA.exmc.R12 = guest R12 | ||
198 | * PACA.exmc.R13 = guest R2 | ||
199 | * PACA.exmc.DAR = guest DAR | ||
200 | * PACA.exmc.DSISR = guest DSISR | ||
201 | * PACA.exmc.LR = guest instruction | ||
202 | * PACA.exmc.CCR = guest CR | ||
203 | * PACA.exmc.SRR0 = guest R0 | ||
204 | * | 161 | * |
205 | */ | 162 | */ |
206 | 163 | ||
207 | std r3, (PACA_EXMC+EX_R3)(r13) | 164 | /* R7 = vcpu */ |
165 | ld r7, GPR4(r1) | ||
208 | 166 | ||
209 | /* save the exit id in R3 */ | 167 | /* Now save the guest state */ |
210 | mr r3, r12 | ||
211 | 168 | ||
212 | /* R12 = vcpu */ | 169 | stw r0, VCPU_LAST_INST(r7) |
213 | ld r12, GPR4(r1) | ||
214 | 170 | ||
215 | /* Now save the guest state */ | 171 | std r3, VCPU_PC(r7) |
172 | std r4, VCPU_SHADOW_SRR1(r7) | ||
173 | std r5, VCPU_FAULT_DEAR(r7) | ||
174 | std r6, VCPU_FAULT_DSISR(r7) | ||
216 | 175 | ||
217 | std r0, VCPU_GPR(r13)(r12) | 176 | ld r5, VCPU_HFLAGS(r7) |
218 | std r4, VCPU_GPR(r4)(r12) | ||
219 | std r5, VCPU_GPR(r5)(r12) | ||
220 | std r6, VCPU_GPR(r6)(r12) | ||
221 | std r7, VCPU_GPR(r7)(r12) | ||
222 | std r8, VCPU_GPR(r8)(r12) | ||
223 | std r9, VCPU_GPR(r9)(r12) | ||
224 | |||
225 | /* get registers from PACA */ | ||
226 | mfpaca r5, r0, EX_SRR0, r12 | ||
227 | mfpaca r5, r3, EX_R3, r12 | ||
228 | mfpaca r5, r1, EX_R9, r12 | ||
229 | mfpaca r5, r10, EX_R10, r12 | ||
230 | mfpaca r5, r11, EX_R11, r12 | ||
231 | mfpaca r5, r12, EX_R12, r12 | ||
232 | mfpaca r5, r2, EX_R13, r12 | ||
233 | |||
234 | lwz r5, (PACA_EXMC+EX_LR)(r13) | ||
235 | stw r5, VCPU_LAST_INST(r12) | ||
236 | |||
237 | lwz r5, (PACA_EXMC+EX_CCR)(r13) | ||
238 | stw r5, VCPU_CR(r12) | ||
239 | |||
240 | ld r5, VCPU_HFLAGS(r12) | ||
241 | rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ | 177 | rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ |
242 | beq no_dcbz32_off | 178 | beq no_dcbz32_off |
243 | 179 | ||
180 | li r4, 0 | ||
244 | mfspr r5,SPRN_HID5 | 181 | mfspr r5,SPRN_HID5 |
245 | rldimi r5,r5,6,56 | 182 | rldimi r5,r4,6,56 |
246 | mtspr SPRN_HID5,r5 | 183 | mtspr SPRN_HID5,r5 |
247 | 184 | ||
248 | no_dcbz32_off: | 185 | no_dcbz32_off: |
249 | 186 | ||
250 | /* XXX maybe skip on lightweight? */ | 187 | std r14, VCPU_GPR(r14)(r7) |
251 | std r14, VCPU_GPR(r14)(r12) | 188 | std r15, VCPU_GPR(r15)(r7) |
252 | std r15, VCPU_GPR(r15)(r12) | 189 | std r16, VCPU_GPR(r16)(r7) |
253 | std r16, VCPU_GPR(r16)(r12) | 190 | std r17, VCPU_GPR(r17)(r7) |
254 | std r17, VCPU_GPR(r17)(r12) | 191 | std r18, VCPU_GPR(r18)(r7) |
255 | std r18, VCPU_GPR(r18)(r12) | 192 | std r19, VCPU_GPR(r19)(r7) |
256 | std r19, VCPU_GPR(r19)(r12) | 193 | std r20, VCPU_GPR(r20)(r7) |
257 | std r20, VCPU_GPR(r20)(r12) | 194 | std r21, VCPU_GPR(r21)(r7) |
258 | std r21, VCPU_GPR(r21)(r12) | 195 | std r22, VCPU_GPR(r22)(r7) |
259 | std r22, VCPU_GPR(r22)(r12) | 196 | std r23, VCPU_GPR(r23)(r7) |
260 | std r23, VCPU_GPR(r23)(r12) | 197 | std r24, VCPU_GPR(r24)(r7) |
261 | std r24, VCPU_GPR(r24)(r12) | 198 | std r25, VCPU_GPR(r25)(r7) |
262 | std r25, VCPU_GPR(r25)(r12) | 199 | std r26, VCPU_GPR(r26)(r7) |
263 | std r26, VCPU_GPR(r26)(r12) | 200 | std r27, VCPU_GPR(r27)(r7) |
264 | std r27, VCPU_GPR(r27)(r12) | 201 | std r28, VCPU_GPR(r28)(r7) |
265 | std r28, VCPU_GPR(r28)(r12) | 202 | std r29, VCPU_GPR(r29)(r7) |
266 | std r29, VCPU_GPR(r29)(r12) | 203 | std r30, VCPU_GPR(r30)(r7) |
267 | std r30, VCPU_GPR(r30)(r12) | 204 | std r31, VCPU_GPR(r31)(r7) |
268 | std r31, VCPU_GPR(r31)(r12) | 205 | |
269 | 206 | /* Save guest CTR */ | |
270 | /* Restore non-volatile host registers (r14 - r31) */ | ||
271 | REST_NVGPRS(r1) | ||
272 | |||
273 | /* Save guest PC (R10) */ | ||
274 | std r10, VCPU_PC(r12) | ||
275 | |||
276 | /* Save guest msr (R11) */ | ||
277 | std r11, VCPU_SHADOW_MSR(r12) | ||
278 | |||
279 | /* Save guest CTR (in R12) */ | ||
280 | mfctr r5 | 207 | mfctr r5 |
281 | std r5, VCPU_CTR(r12) | 208 | std r5, VCPU_CTR(r7) |
282 | 209 | ||
283 | /* Save guest LR */ | 210 | /* Save guest LR */ |
284 | mflr r5 | 211 | mflr r5 |
285 | std r5, VCPU_LR(r12) | 212 | std r5, VCPU_LR(r7) |
286 | |||
287 | /* Save guest XER */ | ||
288 | mfxer r5 | ||
289 | std r5, VCPU_XER(r12) | ||
290 | |||
291 | /* Save guest DAR */ | ||
292 | ld r5, (PACA_EXMC+EX_DAR)(r13) | ||
293 | std r5, VCPU_FAULT_DEAR(r12) | ||
294 | |||
295 | /* Save guest DSISR */ | ||
296 | lwz r5, (PACA_EXMC+EX_DSISR)(r13) | ||
297 | std r5, VCPU_FAULT_DSISR(r12) | ||
298 | 213 | ||
299 | /* Restore host msr -> SRR1 */ | 214 | /* Restore host msr -> SRR1 */ |
300 | ld r7, VCPU_HOST_MSR(r12) | 215 | ld r6, VCPU_HOST_MSR(r7) |
301 | mtsrr1 r7 | ||
302 | |||
303 | /* Restore host IP -> SRR0 */ | ||
304 | ld r6, VCPU_HOST_RETIP(r12) | ||
305 | mtsrr0 r6 | ||
306 | 216 | ||
307 | /* | 217 | /* |
308 | * For some interrupts, we need to call the real Linux | 218 | * For some interrupts, we need to call the real Linux |
@@ -314,13 +224,14 @@ no_dcbz32_off: | |||
314 | * r3 = address of interrupt handler (exit reason) | 224 | * r3 = address of interrupt handler (exit reason) |
315 | */ | 225 | */ |
316 | 226 | ||
317 | cmpwi r3, BOOK3S_INTERRUPT_EXTERNAL | 227 | cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL |
318 | beq call_linux_handler | 228 | beq call_linux_handler |
319 | cmpwi r3, BOOK3S_INTERRUPT_DECREMENTER | 229 | cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER |
320 | beq call_linux_handler | 230 | beq call_linux_handler |
321 | 231 | ||
322 | /* Back to Interruptable Mode! (goto kvm_return_point) */ | 232 | /* Back to EE=1 */ |
323 | RFI | 233 | mtmsr r6 |
234 | b kvm_return_point | ||
324 | 235 | ||
325 | call_linux_handler: | 236 | call_linux_handler: |
326 | 237 | ||
@@ -333,16 +244,22 @@ call_linux_handler: | |||
333 | * interrupt handler! | 244 | * interrupt handler! |
334 | * | 245 | * |
335 | * R3 still contains the exit code, | 246 | * R3 still contains the exit code, |
336 | * R6 VCPU_HOST_RETIP and | 247 | * R5 VCPU_HOST_RETIP and |
337 | * R7 VCPU_HOST_MSR | 248 | * R6 VCPU_HOST_MSR |
338 | */ | 249 | */ |
339 | 250 | ||
340 | mtlr r3 | 251 | /* Restore host IP -> SRR0 */ |
252 | ld r5, VCPU_HOST_RETIP(r7) | ||
253 | |||
254 | /* XXX Better move to a safe function? | ||
255 | * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */ | ||
341 | 256 | ||
342 | ld r5, VCPU_TRAMPOLINE_LOWMEM(r12) | 257 | mtlr r12 |
343 | mtsrr0 r5 | 258 | |
344 | LOAD_REG_IMMEDIATE(r5, MSR_KERNEL & ~(MSR_IR | MSR_DR)) | 259 | ld r4, VCPU_TRAMPOLINE_LOWMEM(r7) |
345 | mtsrr1 r5 | 260 | mtsrr0 r4 |
261 | LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)) | ||
262 | mtsrr1 r3 | ||
346 | 263 | ||
347 | RFI | 264 | RFI |
348 | 265 | ||
@@ -351,42 +268,51 @@ kvm_return_point: | |||
351 | 268 | ||
352 | /* Jump back to lightweight entry if we're supposed to */ | 269 | /* Jump back to lightweight entry if we're supposed to */ |
353 | /* go back into the guest */ | 270 | /* go back into the guest */ |
354 | mr r5, r3 | 271 | |
272 | /* Pass the exit number as 3rd argument to kvmppc_handle_exit */ | ||
273 | mr r5, r12 | ||
274 | |||
355 | /* Restore r3 (kvm_run) and r4 (vcpu) */ | 275 | /* Restore r3 (kvm_run) and r4 (vcpu) */ |
356 | REST_2GPRS(3, r1) | 276 | REST_2GPRS(3, r1) |
357 | bl KVMPPC_HANDLE_EXIT | 277 | bl KVMPPC_HANDLE_EXIT |
358 | 278 | ||
359 | #if 0 /* XXX get lightweight exits back */ | 279 | /* If RESUME_GUEST, get back in the loop */ |
360 | cmpwi r3, RESUME_GUEST | 280 | cmpwi r3, RESUME_GUEST |
361 | bne kvm_exit_heavyweight | 281 | beq kvm_loop_lightweight |
362 | 282 | ||
363 | /* put VCPU and KVM_RUN back into place and roll again! */ | 283 | cmpwi r3, RESUME_GUEST_NV |
364 | REST_2GPRS(3, r1) | 284 | beq kvm_loop_heavyweight |
365 | b kvm_start_lightweight | ||
366 | 285 | ||
367 | kvm_exit_heavyweight: | 286 | kvm_exit_loop: |
368 | /* Restore non-volatile host registers */ | ||
369 | ld r14, _LINK(r1) | ||
370 | mtlr r14 | ||
371 | REST_NVGPRS(r1) | ||
372 | 287 | ||
373 | addi r1, r1, SWITCH_FRAME_SIZE | ||
374 | #else | ||
375 | ld r4, _LINK(r1) | 288 | ld r4, _LINK(r1) |
376 | mtlr r4 | 289 | mtlr r4 |
377 | 290 | ||
378 | cmpwi r3, RESUME_GUEST | 291 | /* Restore non-volatile host registers (r14 - r31) */ |
379 | bne kvm_exit_heavyweight | 292 | REST_NVGPRS(r1) |
293 | |||
294 | addi r1, r1, SWITCH_FRAME_SIZE | ||
295 | blr | ||
296 | |||
297 | kvm_loop_heavyweight: | ||
298 | |||
299 | ld r4, _LINK(r1) | ||
300 | std r4, (16 + SWITCH_FRAME_SIZE)(r1) | ||
380 | 301 | ||
302 | /* Load vcpu and cpu_run */ | ||
381 | REST_2GPRS(3, r1) | 303 | REST_2GPRS(3, r1) |
382 | 304 | ||
383 | addi r1, r1, SWITCH_FRAME_SIZE | 305 | /* Load non-volatile guest state from the vcpu */ |
306 | VCPU_LOAD_NVGPRS(r4) | ||
384 | 307 | ||
385 | b kvm_start_entry | 308 | /* Jump back into the beginning of this function */ |
309 | b kvm_start_lightweight | ||
386 | 310 | ||
387 | kvm_exit_heavyweight: | 311 | kvm_loop_lightweight: |
388 | 312 | ||
389 | addi r1, r1, SWITCH_FRAME_SIZE | 313 | /* We'll need the vcpu pointer */ |
390 | #endif | 314 | REST_GPR(4, r1) |
315 | |||
316 | /* Jump back into the beginning of this function */ | ||
317 | b kvm_start_lightweight | ||
391 | 318 | ||
392 | blr | ||
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index e4beeb371a73..512dcff77554 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c | |||
@@ -54,7 +54,7 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( | |||
54 | if (!vcpu_book3s->slb[i].valid) | 54 | if (!vcpu_book3s->slb[i].valid) |
55 | continue; | 55 | continue; |
56 | 56 | ||
57 | if (vcpu_book3s->slb[i].large) | 57 | if (vcpu_book3s->slb[i].tb) |
58 | cmp_esid = esid_1t; | 58 | cmp_esid = esid_1t; |
59 | 59 | ||
60 | if (vcpu_book3s->slb[i].esid == cmp_esid) | 60 | if (vcpu_book3s->slb[i].esid == cmp_esid) |
@@ -65,9 +65,10 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe( | |||
65 | eaddr, esid, esid_1t); | 65 | eaddr, esid, esid_1t); |
66 | for (i = 0; i < vcpu_book3s->slb_nr; i++) { | 66 | for (i = 0; i < vcpu_book3s->slb_nr; i++) { |
67 | if (vcpu_book3s->slb[i].vsid) | 67 | if (vcpu_book3s->slb[i].vsid) |
68 | dprintk(" %d: %c%c %llx %llx\n", i, | 68 | dprintk(" %d: %c%c%c %llx %llx\n", i, |
69 | vcpu_book3s->slb[i].valid ? 'v' : ' ', | 69 | vcpu_book3s->slb[i].valid ? 'v' : ' ', |
70 | vcpu_book3s->slb[i].large ? 'l' : ' ', | 70 | vcpu_book3s->slb[i].large ? 'l' : ' ', |
71 | vcpu_book3s->slb[i].tb ? 't' : ' ', | ||
71 | vcpu_book3s->slb[i].esid, | 72 | vcpu_book3s->slb[i].esid, |
72 | vcpu_book3s->slb[i].vsid); | 73 | vcpu_book3s->slb[i].vsid); |
73 | } | 74 | } |
@@ -84,7 +85,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
84 | if (!slb) | 85 | if (!slb) |
85 | return 0; | 86 | return 0; |
86 | 87 | ||
87 | if (slb->large) | 88 | if (slb->tb) |
88 | return (((u64)eaddr >> 12) & 0xfffffff) | | 89 | return (((u64)eaddr >> 12) & 0xfffffff) | |
89 | (((u64)slb->vsid) << 28); | 90 | (((u64)slb->vsid) << 28); |
90 | 91 | ||
@@ -309,7 +310,8 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb) | |||
309 | slbe = &vcpu_book3s->slb[slb_nr]; | 310 | slbe = &vcpu_book3s->slb[slb_nr]; |
310 | 311 | ||
311 | slbe->large = (rs & SLB_VSID_L) ? 1 : 0; | 312 | slbe->large = (rs & SLB_VSID_L) ? 1 : 0; |
312 | slbe->esid = slbe->large ? esid_1t : esid; | 313 | slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0; |
314 | slbe->esid = slbe->tb ? esid_1t : esid; | ||
313 | slbe->vsid = rs >> 12; | 315 | slbe->vsid = rs >> 12; |
314 | slbe->valid = (rb & SLB_ESID_V) ? 1 : 0; | 316 | slbe->valid = (rb & SLB_ESID_V) ? 1 : 0; |
315 | slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0; | 317 | slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0; |
diff --git a/arch/powerpc/kvm/book3s_64_rmhandlers.S b/arch/powerpc/kvm/book3s_64_rmhandlers.S index fb7dd2e9ac88..c83c60ad96c5 100644 --- a/arch/powerpc/kvm/book3s_64_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_64_rmhandlers.S | |||
@@ -45,36 +45,25 @@ kvmppc_trampoline_\intno: | |||
45 | * To distinguish, we check a magic byte in the PACA | 45 | * To distinguish, we check a magic byte in the PACA |
46 | */ | 46 | */ |
47 | mfspr r13, SPRN_SPRG_PACA /* r13 = PACA */ | 47 | mfspr r13, SPRN_SPRG_PACA /* r13 = PACA */ |
48 | std r12, (PACA_EXMC + EX_R12)(r13) | 48 | std r12, PACA_KVM_SCRATCH0(r13) |
49 | mfcr r12 | 49 | mfcr r12 |
50 | stw r12, (PACA_EXMC + EX_CCR)(r13) | 50 | stw r12, PACA_KVM_SCRATCH1(r13) |
51 | lbz r12, PACA_KVM_IN_GUEST(r13) | 51 | lbz r12, PACA_KVM_IN_GUEST(r13) |
52 | cmpwi r12, 0 | 52 | cmpwi r12, KVM_GUEST_MODE_NONE |
53 | bne ..kvmppc_handler_hasmagic_\intno | 53 | bne ..kvmppc_handler_hasmagic_\intno |
54 | /* No KVM guest? Then jump back to the Linux handler! */ | 54 | /* No KVM guest? Then jump back to the Linux handler! */ |
55 | lwz r12, (PACA_EXMC + EX_CCR)(r13) | 55 | lwz r12, PACA_KVM_SCRATCH1(r13) |
56 | mtcr r12 | 56 | mtcr r12 |
57 | ld r12, (PACA_EXMC + EX_R12)(r13) | 57 | ld r12, PACA_KVM_SCRATCH0(r13) |
58 | mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ | 58 | mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */ |
59 | b kvmppc_resume_\intno /* Get back original handler */ | 59 | b kvmppc_resume_\intno /* Get back original handler */ |
60 | 60 | ||
61 | /* Now we know we're handling a KVM guest */ | 61 | /* Now we know we're handling a KVM guest */ |
62 | ..kvmppc_handler_hasmagic_\intno: | 62 | ..kvmppc_handler_hasmagic_\intno: |
63 | /* Unset guest state */ | ||
64 | li r12, 0 | ||
65 | stb r12, PACA_KVM_IN_GUEST(r13) | ||
66 | 63 | ||
67 | std r1, (PACA_EXMC+EX_R9)(r13) | 64 | /* Should we just skip the faulting instruction? */ |
68 | std r10, (PACA_EXMC+EX_R10)(r13) | 65 | cmpwi r12, KVM_GUEST_MODE_SKIP |
69 | std r11, (PACA_EXMC+EX_R11)(r13) | 66 | beq kvmppc_handler_skip_ins |
70 | std r2, (PACA_EXMC+EX_R13)(r13) | ||
71 | |||
72 | mfsrr0 r10 | ||
73 | mfsrr1 r11 | ||
74 | |||
75 | /* Restore R1/R2 so we can handle faults */ | ||
76 | ld r1, PACAR1(r13) | ||
77 | ld r2, (PACA_EXMC+EX_SRR0)(r13) | ||
78 | 67 | ||
79 | /* Let's store which interrupt we're handling */ | 68 | /* Let's store which interrupt we're handling */ |
80 | li r12, \intno | 69 | li r12, \intno |
@@ -102,23 +91,107 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC | |||
102 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX | 91 | INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX |
103 | 92 | ||
104 | /* | 93 | /* |
94 | * Bring us back to the faulting code, but skip the | ||
95 | * faulting instruction. | ||
96 | * | ||
97 | * This is a generic exit path from the interrupt | ||
98 | * trampolines above. | ||
99 | * | ||
100 | * Input Registers: | ||
101 | * | ||
102 | * R12 = free | ||
103 | * R13 = PACA | ||
104 | * PACA.KVM.SCRATCH0 = guest R12 | ||
105 | * PACA.KVM.SCRATCH1 = guest CR | ||
106 | * SPRG_SCRATCH0 = guest R13 | ||
107 | * | ||
108 | */ | ||
109 | kvmppc_handler_skip_ins: | ||
110 | |||
111 | /* Patch the IP to the next instruction */ | ||
112 | mfsrr0 r12 | ||
113 | addi r12, r12, 4 | ||
114 | mtsrr0 r12 | ||
115 | |||
116 | /* Clean up all state */ | ||
117 | lwz r12, PACA_KVM_SCRATCH1(r13) | ||
118 | mtcr r12 | ||
119 | ld r12, PACA_KVM_SCRATCH0(r13) | ||
120 | mfspr r13, SPRN_SPRG_SCRATCH0 | ||
121 | |||
122 | /* And get back into the code */ | ||
123 | RFI | ||
124 | |||
125 | /* | ||
105 | * This trampoline brings us back to a real mode handler | 126 | * This trampoline brings us back to a real mode handler |
106 | * | 127 | * |
107 | * Input Registers: | 128 | * Input Registers: |
108 | * | 129 | * |
109 | * R6 = SRR0 | 130 | * R5 = SRR0 |
110 | * R7 = SRR1 | 131 | * R6 = SRR1 |
111 | * LR = real-mode IP | 132 | * LR = real-mode IP |
112 | * | 133 | * |
113 | */ | 134 | */ |
114 | .global kvmppc_handler_lowmem_trampoline | 135 | .global kvmppc_handler_lowmem_trampoline |
115 | kvmppc_handler_lowmem_trampoline: | 136 | kvmppc_handler_lowmem_trampoline: |
116 | 137 | ||
117 | mtsrr0 r6 | 138 | mtsrr0 r5 |
118 | mtsrr1 r7 | 139 | mtsrr1 r6 |
119 | blr | 140 | blr |
120 | kvmppc_handler_lowmem_trampoline_end: | 141 | kvmppc_handler_lowmem_trampoline_end: |
121 | 142 | ||
143 | /* | ||
144 | * Call a function in real mode | ||
145 | * | ||
146 | * Input Registers: | ||
147 | * | ||
148 | * R3 = function | ||
149 | * R4 = MSR | ||
150 | * R5 = CTR | ||
151 | * | ||
152 | */ | ||
153 | _GLOBAL(kvmppc_rmcall) | ||
154 | mtmsr r4 /* Disable relocation, so mtsrr | ||
155 | doesn't get interrupted */ | ||
156 | mtctr r5 | ||
157 | mtsrr0 r3 | ||
158 | mtsrr1 r4 | ||
159 | RFI | ||
160 | |||
161 | /* | ||
162 | * Activate current's external feature (FPU/Altivec/VSX) | ||
163 | */ | ||
164 | #define define_load_up(what) \ | ||
165 | \ | ||
166 | _GLOBAL(kvmppc_load_up_ ## what); \ | ||
167 | subi r1, r1, INT_FRAME_SIZE; \ | ||
168 | mflr r3; \ | ||
169 | std r3, _LINK(r1); \ | ||
170 | mfmsr r4; \ | ||
171 | std r31, GPR3(r1); \ | ||
172 | mr r31, r4; \ | ||
173 | li r5, MSR_DR; \ | ||
174 | oris r5, r5, MSR_EE@h; \ | ||
175 | andc r4, r4, r5; \ | ||
176 | mtmsr r4; \ | ||
177 | \ | ||
178 | bl .load_up_ ## what; \ | ||
179 | \ | ||
180 | mtmsr r31; \ | ||
181 | ld r3, _LINK(r1); \ | ||
182 | ld r31, GPR3(r1); \ | ||
183 | addi r1, r1, INT_FRAME_SIZE; \ | ||
184 | mtlr r3; \ | ||
185 | blr | ||
186 | |||
187 | define_load_up(fpu) | ||
188 | #ifdef CONFIG_ALTIVEC | ||
189 | define_load_up(altivec) | ||
190 | #endif | ||
191 | #ifdef CONFIG_VSX | ||
192 | define_load_up(vsx) | ||
193 | #endif | ||
194 | |||
122 | .global kvmppc_trampoline_lowmem | 195 | .global kvmppc_trampoline_lowmem |
123 | kvmppc_trampoline_lowmem: | 196 | kvmppc_trampoline_lowmem: |
124 | .long kvmppc_handler_lowmem_trampoline - _stext | 197 | .long kvmppc_handler_lowmem_trampoline - _stext |
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index ecd237a03fd0..35b762722187 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S | |||
@@ -31,7 +31,7 @@ | |||
31 | #define REBOLT_SLB_ENTRY(num) \ | 31 | #define REBOLT_SLB_ENTRY(num) \ |
32 | ld r10, SHADOW_SLB_ESID(num)(r11); \ | 32 | ld r10, SHADOW_SLB_ESID(num)(r11); \ |
33 | cmpdi r10, 0; \ | 33 | cmpdi r10, 0; \ |
34 | beq slb_exit_skip_1; \ | 34 | beq slb_exit_skip_ ## num; \ |
35 | oris r10, r10, SLB_ESID_V@h; \ | 35 | oris r10, r10, SLB_ESID_V@h; \ |
36 | ld r9, SHADOW_SLB_VSID(num)(r11); \ | 36 | ld r9, SHADOW_SLB_VSID(num)(r11); \ |
37 | slbmte r9, r10; \ | 37 | slbmte r9, r10; \ |
@@ -51,23 +51,21 @@ kvmppc_handler_trampoline_enter: | |||
51 | * | 51 | * |
52 | * MSR = ~IR|DR | 52 | * MSR = ~IR|DR |
53 | * R13 = PACA | 53 | * R13 = PACA |
54 | * R1 = host R1 | ||
55 | * R2 = host R2 | ||
54 | * R9 = guest IP | 56 | * R9 = guest IP |
55 | * R10 = guest MSR | 57 | * R10 = guest MSR |
56 | * R11 = free | 58 | * all other GPRS = free |
57 | * R12 = free | 59 | * PACA[KVM_CR] = guest CR |
58 | * PACA[PACA_EXMC + EX_R9] = guest R9 | 60 | * PACA[KVM_XER] = guest XER |
59 | * PACA[PACA_EXMC + EX_R10] = guest R10 | ||
60 | * PACA[PACA_EXMC + EX_R11] = guest R11 | ||
61 | * PACA[PACA_EXMC + EX_R12] = guest R12 | ||
62 | * PACA[PACA_EXMC + EX_R13] = guest R13 | ||
63 | * PACA[PACA_EXMC + EX_CCR] = guest CR | ||
64 | * PACA[PACA_EXMC + EX_R3] = guest XER | ||
65 | */ | 61 | */ |
66 | 62 | ||
67 | mtsrr0 r9 | 63 | mtsrr0 r9 |
68 | mtsrr1 r10 | 64 | mtsrr1 r10 |
69 | 65 | ||
70 | mtspr SPRN_SPRG_SCRATCH0, r0 | 66 | /* Activate guest mode, so faults get handled by KVM */ |
67 | li r11, KVM_GUEST_MODE_GUEST | ||
68 | stb r11, PACA_KVM_IN_GUEST(r13) | ||
71 | 69 | ||
72 | /* Remove LPAR shadow entries */ | 70 | /* Remove LPAR shadow entries */ |
73 | 71 | ||
@@ -131,20 +129,27 @@ slb_do_enter: | |||
131 | 129 | ||
132 | /* Enter guest */ | 130 | /* Enter guest */ |
133 | 131 | ||
134 | mfspr r0, SPRN_SPRG_SCRATCH0 | 132 | ld r0, (PACA_KVM_R0)(r13) |
135 | 133 | ld r1, (PACA_KVM_R1)(r13) | |
136 | ld r9, (PACA_EXMC+EX_R9)(r13) | 134 | ld r2, (PACA_KVM_R2)(r13) |
137 | ld r10, (PACA_EXMC+EX_R10)(r13) | 135 | ld r3, (PACA_KVM_R3)(r13) |
138 | ld r12, (PACA_EXMC+EX_R12)(r13) | 136 | ld r4, (PACA_KVM_R4)(r13) |
139 | 137 | ld r5, (PACA_KVM_R5)(r13) | |
140 | lwz r11, (PACA_EXMC+EX_CCR)(r13) | 138 | ld r6, (PACA_KVM_R6)(r13) |
139 | ld r7, (PACA_KVM_R7)(r13) | ||
140 | ld r8, (PACA_KVM_R8)(r13) | ||
141 | ld r9, (PACA_KVM_R9)(r13) | ||
142 | ld r10, (PACA_KVM_R10)(r13) | ||
143 | ld r12, (PACA_KVM_R12)(r13) | ||
144 | |||
145 | lwz r11, (PACA_KVM_CR)(r13) | ||
141 | mtcr r11 | 146 | mtcr r11 |
142 | 147 | ||
143 | ld r11, (PACA_EXMC+EX_R3)(r13) | 148 | ld r11, (PACA_KVM_XER)(r13) |
144 | mtxer r11 | 149 | mtxer r11 |
145 | 150 | ||
146 | ld r11, (PACA_EXMC+EX_R11)(r13) | 151 | ld r11, (PACA_KVM_R11)(r13) |
147 | ld r13, (PACA_EXMC+EX_R13)(r13) | 152 | ld r13, (PACA_KVM_R13)(r13) |
148 | 153 | ||
149 | RFI | 154 | RFI |
150 | kvmppc_handler_trampoline_enter_end: | 155 | kvmppc_handler_trampoline_enter_end: |
@@ -162,28 +167,54 @@ kvmppc_handler_trampoline_exit: | |||
162 | 167 | ||
163 | /* Register usage at this point: | 168 | /* Register usage at this point: |
164 | * | 169 | * |
165 | * SPRG_SCRATCH0 = guest R13 | 170 | * SPRG_SCRATCH0 = guest R13 |
166 | * R01 = host R1 | 171 | * R12 = exit handler id |
167 | * R02 = host R2 | 172 | * R13 = PACA |
168 | * R10 = guest PC | 173 | * PACA.KVM.SCRATCH0 = guest R12 |
169 | * R11 = guest MSR | 174 | * PACA.KVM.SCRATCH1 = guest CR |
170 | * R12 = exit handler id | ||
171 | * R13 = PACA | ||
172 | * PACA.exmc.CCR = guest CR | ||
173 | * PACA.exmc.R9 = guest R1 | ||
174 | * PACA.exmc.R10 = guest R10 | ||
175 | * PACA.exmc.R11 = guest R11 | ||
176 | * PACA.exmc.R12 = guest R12 | ||
177 | * PACA.exmc.R13 = guest R2 | ||
178 | * | 175 | * |
179 | */ | 176 | */ |
180 | 177 | ||
181 | /* Save registers */ | 178 | /* Save registers */ |
182 | 179 | ||
183 | std r0, (PACA_EXMC+EX_SRR0)(r13) | 180 | std r0, PACA_KVM_R0(r13) |
184 | std r9, (PACA_EXMC+EX_R3)(r13) | 181 | std r1, PACA_KVM_R1(r13) |
185 | std r10, (PACA_EXMC+EX_LR)(r13) | 182 | std r2, PACA_KVM_R2(r13) |
186 | std r11, (PACA_EXMC+EX_DAR)(r13) | 183 | std r3, PACA_KVM_R3(r13) |
184 | std r4, PACA_KVM_R4(r13) | ||
185 | std r5, PACA_KVM_R5(r13) | ||
186 | std r6, PACA_KVM_R6(r13) | ||
187 | std r7, PACA_KVM_R7(r13) | ||
188 | std r8, PACA_KVM_R8(r13) | ||
189 | std r9, PACA_KVM_R9(r13) | ||
190 | std r10, PACA_KVM_R10(r13) | ||
191 | std r11, PACA_KVM_R11(r13) | ||
192 | |||
193 | /* Restore R1/R2 so we can handle faults */ | ||
194 | ld r1, PACA_KVM_HOST_R1(r13) | ||
195 | ld r2, PACA_KVM_HOST_R2(r13) | ||
196 | |||
197 | /* Save guest PC and MSR in GPRs */ | ||
198 | mfsrr0 r3 | ||
199 | mfsrr1 r4 | ||
200 | |||
201 | /* Get scratch'ed off registers */ | ||
202 | mfspr r9, SPRN_SPRG_SCRATCH0 | ||
203 | std r9, PACA_KVM_R13(r13) | ||
204 | |||
205 | ld r8, PACA_KVM_SCRATCH0(r13) | ||
206 | std r8, PACA_KVM_R12(r13) | ||
207 | |||
208 | lwz r7, PACA_KVM_SCRATCH1(r13) | ||
209 | stw r7, PACA_KVM_CR(r13) | ||
210 | |||
211 | /* Save more register state */ | ||
212 | |||
213 | mfxer r6 | ||
214 | stw r6, PACA_KVM_XER(r13) | ||
215 | |||
216 | mfdar r5 | ||
217 | mfdsisr r6 | ||
187 | 218 | ||
188 | /* | 219 | /* |
189 | * In order for us to easily get the last instruction, | 220 | * In order for us to easily get the last instruction, |
@@ -202,17 +233,28 @@ kvmppc_handler_trampoline_exit: | |||
202 | 233 | ||
203 | ld_last_inst: | 234 | ld_last_inst: |
204 | /* Save off the guest instruction we're at */ | 235 | /* Save off the guest instruction we're at */ |
236 | |||
237 | /* Set guest mode to 'jump over instruction' so if lwz faults | ||
238 | * we'll just continue at the next IP. */ | ||
239 | li r9, KVM_GUEST_MODE_SKIP | ||
240 | stb r9, PACA_KVM_IN_GUEST(r13) | ||
241 | |||
205 | /* 1) enable paging for data */ | 242 | /* 1) enable paging for data */ |
206 | mfmsr r9 | 243 | mfmsr r9 |
207 | ori r11, r9, MSR_DR /* Enable paging for data */ | 244 | ori r11, r9, MSR_DR /* Enable paging for data */ |
208 | mtmsr r11 | 245 | mtmsr r11 |
209 | /* 2) fetch the instruction */ | 246 | /* 2) fetch the instruction */ |
210 | lwz r0, 0(r10) | 247 | li r0, KVM_INST_FETCH_FAILED /* In case lwz faults */ |
248 | lwz r0, 0(r3) | ||
211 | /* 3) disable paging again */ | 249 | /* 3) disable paging again */ |
212 | mtmsr r9 | 250 | mtmsr r9 |
213 | 251 | ||
214 | no_ld_last_inst: | 252 | no_ld_last_inst: |
215 | 253 | ||
254 | /* Unset guest mode */ | ||
255 | li r9, KVM_GUEST_MODE_NONE | ||
256 | stb r9, PACA_KVM_IN_GUEST(r13) | ||
257 | |||
216 | /* Restore bolted entries from the shadow and fix it along the way */ | 258 | /* Restore bolted entries from the shadow and fix it along the way */ |
217 | 259 | ||
218 | /* We don't store anything in entry 0, so we don't need to take care of it */ | 260 | /* We don't store anything in entry 0, so we don't need to take care of it */ |
@@ -233,29 +275,27 @@ no_ld_last_inst: | |||
233 | 275 | ||
234 | slb_do_exit: | 276 | slb_do_exit: |
235 | 277 | ||
236 | /* Restore registers */ | 278 | /* Register usage at this point: |
237 | 279 | * | |
238 | ld r11, (PACA_EXMC+EX_DAR)(r13) | 280 | * R0 = guest last inst |
239 | ld r10, (PACA_EXMC+EX_LR)(r13) | 281 | * R1 = host R1 |
240 | ld r9, (PACA_EXMC+EX_R3)(r13) | 282 | * R2 = host R2 |
241 | 283 | * R3 = guest PC | |
242 | /* Save last inst */ | 284 | * R4 = guest MSR |
243 | stw r0, (PACA_EXMC+EX_LR)(r13) | 285 | * R5 = guest DAR |
244 | 286 | * R6 = guest DSISR | |
245 | /* Save DAR and DSISR before going to paged mode */ | 287 | * R12 = exit handler id |
246 | mfdar r0 | 288 | * R13 = PACA |
247 | std r0, (PACA_EXMC+EX_DAR)(r13) | 289 | * PACA.KVM.* = guest * |
248 | mfdsisr r0 | 290 | * |
249 | stw r0, (PACA_EXMC+EX_DSISR)(r13) | 291 | */ |
250 | 292 | ||
251 | /* RFI into the highmem handler */ | 293 | /* RFI into the highmem handler */ |
252 | mfmsr r0 | 294 | mfmsr r7 |
253 | ori r0, r0, MSR_IR|MSR_DR|MSR_RI /* Enable paging */ | 295 | ori r7, r7, MSR_IR|MSR_DR|MSR_RI /* Enable paging */ |
254 | mtsrr1 r0 | 296 | mtsrr1 r7 |
255 | ld r0, PACASAVEDMSR(r13) /* Highmem handler address */ | 297 | ld r8, PACA_KVM_VMHANDLER(r13) /* Highmem handler address */ |
256 | mtsrr0 r0 | 298 | mtsrr0 r8 |
257 | |||
258 | mfspr r0, SPRN_SPRG_SCRATCH0 | ||
259 | 299 | ||
260 | RFI | 300 | RFI |
261 | kvmppc_handler_trampoline_exit_end: | 301 | kvmppc_handler_trampoline_exit_end: |
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 06f5a9ecc42c..4d686cc6b260 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c | |||
@@ -69,10 +69,10 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) | |||
69 | 69 | ||
70 | for (i = 0; i < 32; i += 4) { | 70 | for (i = 0; i < 32; i += 4) { |
71 | printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i, | 71 | printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i, |
72 | vcpu->arch.gpr[i], | 72 | kvmppc_get_gpr(vcpu, i), |
73 | vcpu->arch.gpr[i+1], | 73 | kvmppc_get_gpr(vcpu, i+1), |
74 | vcpu->arch.gpr[i+2], | 74 | kvmppc_get_gpr(vcpu, i+2), |
75 | vcpu->arch.gpr[i+3]); | 75 | kvmppc_get_gpr(vcpu, i+3)); |
76 | } | 76 | } |
77 | } | 77 | } |
78 | 78 | ||
@@ -82,8 +82,32 @@ static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, | |||
82 | set_bit(priority, &vcpu->arch.pending_exceptions); | 82 | set_bit(priority, &vcpu->arch.pending_exceptions); |
83 | } | 83 | } |
84 | 84 | ||
85 | void kvmppc_core_queue_program(struct kvm_vcpu *vcpu) | 85 | static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, |
86 | ulong dear_flags, ulong esr_flags) | ||
86 | { | 87 | { |
88 | vcpu->arch.queued_dear = dear_flags; | ||
89 | vcpu->arch.queued_esr = esr_flags; | ||
90 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); | ||
91 | } | ||
92 | |||
93 | static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, | ||
94 | ulong dear_flags, ulong esr_flags) | ||
95 | { | ||
96 | vcpu->arch.queued_dear = dear_flags; | ||
97 | vcpu->arch.queued_esr = esr_flags; | ||
98 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); | ||
99 | } | ||
100 | |||
101 | static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, | ||
102 | ulong esr_flags) | ||
103 | { | ||
104 | vcpu->arch.queued_esr = esr_flags; | ||
105 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); | ||
106 | } | ||
107 | |||
108 | void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags) | ||
109 | { | ||
110 | vcpu->arch.queued_esr = esr_flags; | ||
87 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); | 111 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); |
88 | } | 112 | } |
89 | 113 | ||
@@ -97,6 +121,11 @@ int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu) | |||
97 | return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); | 121 | return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); |
98 | } | 122 | } |
99 | 123 | ||
124 | void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu) | ||
125 | { | ||
126 | clear_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions); | ||
127 | } | ||
128 | |||
100 | void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, | 129 | void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, |
101 | struct kvm_interrupt *irq) | 130 | struct kvm_interrupt *irq) |
102 | { | 131 | { |
@@ -109,14 +138,19 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
109 | { | 138 | { |
110 | int allowed = 0; | 139 | int allowed = 0; |
111 | ulong msr_mask; | 140 | ulong msr_mask; |
141 | bool update_esr = false, update_dear = false; | ||
112 | 142 | ||
113 | switch (priority) { | 143 | switch (priority) { |
114 | case BOOKE_IRQPRIO_PROGRAM: | ||
115 | case BOOKE_IRQPRIO_DTLB_MISS: | 144 | case BOOKE_IRQPRIO_DTLB_MISS: |
116 | case BOOKE_IRQPRIO_ITLB_MISS: | ||
117 | case BOOKE_IRQPRIO_SYSCALL: | ||
118 | case BOOKE_IRQPRIO_DATA_STORAGE: | 145 | case BOOKE_IRQPRIO_DATA_STORAGE: |
146 | update_dear = true; | ||
147 | /* fall through */ | ||
119 | case BOOKE_IRQPRIO_INST_STORAGE: | 148 | case BOOKE_IRQPRIO_INST_STORAGE: |
149 | case BOOKE_IRQPRIO_PROGRAM: | ||
150 | update_esr = true; | ||
151 | /* fall through */ | ||
152 | case BOOKE_IRQPRIO_ITLB_MISS: | ||
153 | case BOOKE_IRQPRIO_SYSCALL: | ||
120 | case BOOKE_IRQPRIO_FP_UNAVAIL: | 154 | case BOOKE_IRQPRIO_FP_UNAVAIL: |
121 | case BOOKE_IRQPRIO_SPE_UNAVAIL: | 155 | case BOOKE_IRQPRIO_SPE_UNAVAIL: |
122 | case BOOKE_IRQPRIO_SPE_FP_DATA: | 156 | case BOOKE_IRQPRIO_SPE_FP_DATA: |
@@ -151,6 +185,10 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, | |||
151 | vcpu->arch.srr0 = vcpu->arch.pc; | 185 | vcpu->arch.srr0 = vcpu->arch.pc; |
152 | vcpu->arch.srr1 = vcpu->arch.msr; | 186 | vcpu->arch.srr1 = vcpu->arch.msr; |
153 | vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; | 187 | vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority]; |
188 | if (update_esr == true) | ||
189 | vcpu->arch.esr = vcpu->arch.queued_esr; | ||
190 | if (update_dear == true) | ||
191 | vcpu->arch.dear = vcpu->arch.queued_dear; | ||
154 | kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask); | 192 | kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask); |
155 | 193 | ||
156 | clear_bit(priority, &vcpu->arch.pending_exceptions); | 194 | clear_bit(priority, &vcpu->arch.pending_exceptions); |
@@ -223,8 +261,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
223 | if (vcpu->arch.msr & MSR_PR) { | 261 | if (vcpu->arch.msr & MSR_PR) { |
224 | /* Program traps generated by user-level software must be handled | 262 | /* Program traps generated by user-level software must be handled |
225 | * by the guest kernel. */ | 263 | * by the guest kernel. */ |
226 | vcpu->arch.esr = vcpu->arch.fault_esr; | 264 | kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr); |
227 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); | ||
228 | r = RESUME_GUEST; | 265 | r = RESUME_GUEST; |
229 | kvmppc_account_exit(vcpu, USR_PR_INST); | 266 | kvmppc_account_exit(vcpu, USR_PR_INST); |
230 | break; | 267 | break; |
@@ -280,16 +317,14 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
280 | break; | 317 | break; |
281 | 318 | ||
282 | case BOOKE_INTERRUPT_DATA_STORAGE: | 319 | case BOOKE_INTERRUPT_DATA_STORAGE: |
283 | vcpu->arch.dear = vcpu->arch.fault_dear; | 320 | kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear, |
284 | vcpu->arch.esr = vcpu->arch.fault_esr; | 321 | vcpu->arch.fault_esr); |
285 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); | ||
286 | kvmppc_account_exit(vcpu, DSI_EXITS); | 322 | kvmppc_account_exit(vcpu, DSI_EXITS); |
287 | r = RESUME_GUEST; | 323 | r = RESUME_GUEST; |
288 | break; | 324 | break; |
289 | 325 | ||
290 | case BOOKE_INTERRUPT_INST_STORAGE: | 326 | case BOOKE_INTERRUPT_INST_STORAGE: |
291 | vcpu->arch.esr = vcpu->arch.fault_esr; | 327 | kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_esr); |
292 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); | ||
293 | kvmppc_account_exit(vcpu, ISI_EXITS); | 328 | kvmppc_account_exit(vcpu, ISI_EXITS); |
294 | r = RESUME_GUEST; | 329 | r = RESUME_GUEST; |
295 | break; | 330 | break; |
@@ -310,9 +345,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
310 | gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); | 345 | gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr); |
311 | if (gtlb_index < 0) { | 346 | if (gtlb_index < 0) { |
312 | /* The guest didn't have a mapping for it. */ | 347 | /* The guest didn't have a mapping for it. */ |
313 | kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); | 348 | kvmppc_core_queue_dtlb_miss(vcpu, |
314 | vcpu->arch.dear = vcpu->arch.fault_dear; | 349 | vcpu->arch.fault_dear, |
315 | vcpu->arch.esr = vcpu->arch.fault_esr; | 350 | vcpu->arch.fault_esr); |
316 | kvmppc_mmu_dtlb_miss(vcpu); | 351 | kvmppc_mmu_dtlb_miss(vcpu); |
317 | kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); | 352 | kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS); |
318 | r = RESUME_GUEST; | 353 | r = RESUME_GUEST; |
@@ -426,7 +461,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
426 | { | 461 | { |
427 | vcpu->arch.pc = 0; | 462 | vcpu->arch.pc = 0; |
428 | vcpu->arch.msr = 0; | 463 | vcpu->arch.msr = 0; |
429 | vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ | 464 | kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */ |
430 | 465 | ||
431 | vcpu->arch.shadow_pid = 1; | 466 | vcpu->arch.shadow_pid = 1; |
432 | 467 | ||
@@ -444,10 +479,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
444 | int i; | 479 | int i; |
445 | 480 | ||
446 | regs->pc = vcpu->arch.pc; | 481 | regs->pc = vcpu->arch.pc; |
447 | regs->cr = vcpu->arch.cr; | 482 | regs->cr = kvmppc_get_cr(vcpu); |
448 | regs->ctr = vcpu->arch.ctr; | 483 | regs->ctr = vcpu->arch.ctr; |
449 | regs->lr = vcpu->arch.lr; | 484 | regs->lr = vcpu->arch.lr; |
450 | regs->xer = vcpu->arch.xer; | 485 | regs->xer = kvmppc_get_xer(vcpu); |
451 | regs->msr = vcpu->arch.msr; | 486 | regs->msr = vcpu->arch.msr; |
452 | regs->srr0 = vcpu->arch.srr0; | 487 | regs->srr0 = vcpu->arch.srr0; |
453 | regs->srr1 = vcpu->arch.srr1; | 488 | regs->srr1 = vcpu->arch.srr1; |
@@ -461,7 +496,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
461 | regs->sprg7 = vcpu->arch.sprg6; | 496 | regs->sprg7 = vcpu->arch.sprg6; |
462 | 497 | ||
463 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) | 498 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
464 | regs->gpr[i] = vcpu->arch.gpr[i]; | 499 | regs->gpr[i] = kvmppc_get_gpr(vcpu, i); |
465 | 500 | ||
466 | return 0; | 501 | return 0; |
467 | } | 502 | } |
@@ -471,10 +506,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
471 | int i; | 506 | int i; |
472 | 507 | ||
473 | vcpu->arch.pc = regs->pc; | 508 | vcpu->arch.pc = regs->pc; |
474 | vcpu->arch.cr = regs->cr; | 509 | kvmppc_set_cr(vcpu, regs->cr); |
475 | vcpu->arch.ctr = regs->ctr; | 510 | vcpu->arch.ctr = regs->ctr; |
476 | vcpu->arch.lr = regs->lr; | 511 | vcpu->arch.lr = regs->lr; |
477 | vcpu->arch.xer = regs->xer; | 512 | kvmppc_set_xer(vcpu, regs->xer); |
478 | kvmppc_set_msr(vcpu, regs->msr); | 513 | kvmppc_set_msr(vcpu, regs->msr); |
479 | vcpu->arch.srr0 = regs->srr0; | 514 | vcpu->arch.srr0 = regs->srr0; |
480 | vcpu->arch.srr1 = regs->srr1; | 515 | vcpu->arch.srr1 = regs->srr1; |
@@ -486,8 +521,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
486 | vcpu->arch.sprg6 = regs->sprg5; | 521 | vcpu->arch.sprg6 = regs->sprg5; |
487 | vcpu->arch.sprg7 = regs->sprg6; | 522 | vcpu->arch.sprg7 = regs->sprg6; |
488 | 523 | ||
489 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++) | 524 | for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) |
490 | vcpu->arch.gpr[i] = regs->gpr[i]; | 525 | kvmppc_set_gpr(vcpu, i, regs->gpr[i]); |
491 | 526 | ||
492 | return 0; | 527 | return 0; |
493 | } | 528 | } |
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index aebc65e93f4b..cbc790ee1928 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c | |||
@@ -62,20 +62,20 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
62 | 62 | ||
63 | case OP_31_XOP_MFMSR: | 63 | case OP_31_XOP_MFMSR: |
64 | rt = get_rt(inst); | 64 | rt = get_rt(inst); |
65 | vcpu->arch.gpr[rt] = vcpu->arch.msr; | 65 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.msr); |
66 | kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); | 66 | kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); |
67 | break; | 67 | break; |
68 | 68 | ||
69 | case OP_31_XOP_MTMSR: | 69 | case OP_31_XOP_MTMSR: |
70 | rs = get_rs(inst); | 70 | rs = get_rs(inst); |
71 | kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); | 71 | kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS); |
72 | kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); | 72 | kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs)); |
73 | break; | 73 | break; |
74 | 74 | ||
75 | case OP_31_XOP_WRTEE: | 75 | case OP_31_XOP_WRTEE: |
76 | rs = get_rs(inst); | 76 | rs = get_rs(inst); |
77 | vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) | 77 | vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) |
78 | | (vcpu->arch.gpr[rs] & MSR_EE); | 78 | | (kvmppc_get_gpr(vcpu, rs) & MSR_EE); |
79 | kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); | 79 | kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS); |
80 | break; | 80 | break; |
81 | 81 | ||
@@ -101,22 +101,23 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
101 | int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | 101 | int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) |
102 | { | 102 | { |
103 | int emulated = EMULATE_DONE; | 103 | int emulated = EMULATE_DONE; |
104 | ulong spr_val = kvmppc_get_gpr(vcpu, rs); | ||
104 | 105 | ||
105 | switch (sprn) { | 106 | switch (sprn) { |
106 | case SPRN_DEAR: | 107 | case SPRN_DEAR: |
107 | vcpu->arch.dear = vcpu->arch.gpr[rs]; break; | 108 | vcpu->arch.dear = spr_val; break; |
108 | case SPRN_ESR: | 109 | case SPRN_ESR: |
109 | vcpu->arch.esr = vcpu->arch.gpr[rs]; break; | 110 | vcpu->arch.esr = spr_val; break; |
110 | case SPRN_DBCR0: | 111 | case SPRN_DBCR0: |
111 | vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; | 112 | vcpu->arch.dbcr0 = spr_val; break; |
112 | case SPRN_DBCR1: | 113 | case SPRN_DBCR1: |
113 | vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; | 114 | vcpu->arch.dbcr1 = spr_val; break; |
114 | case SPRN_DBSR: | 115 | case SPRN_DBSR: |
115 | vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break; | 116 | vcpu->arch.dbsr &= ~spr_val; break; |
116 | case SPRN_TSR: | 117 | case SPRN_TSR: |
117 | vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; | 118 | vcpu->arch.tsr &= ~spr_val; break; |
118 | case SPRN_TCR: | 119 | case SPRN_TCR: |
119 | vcpu->arch.tcr = vcpu->arch.gpr[rs]; | 120 | vcpu->arch.tcr = spr_val; |
120 | kvmppc_emulate_dec(vcpu); | 121 | kvmppc_emulate_dec(vcpu); |
121 | break; | 122 | break; |
122 | 123 | ||
@@ -124,64 +125,64 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
124 | * loaded into the real SPRGs when resuming the | 125 | * loaded into the real SPRGs when resuming the |
125 | * guest. */ | 126 | * guest. */ |
126 | case SPRN_SPRG4: | 127 | case SPRN_SPRG4: |
127 | vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; | 128 | vcpu->arch.sprg4 = spr_val; break; |
128 | case SPRN_SPRG5: | 129 | case SPRN_SPRG5: |
129 | vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; | 130 | vcpu->arch.sprg5 = spr_val; break; |
130 | case SPRN_SPRG6: | 131 | case SPRN_SPRG6: |
131 | vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; | 132 | vcpu->arch.sprg6 = spr_val; break; |
132 | case SPRN_SPRG7: | 133 | case SPRN_SPRG7: |
133 | vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; | 134 | vcpu->arch.sprg7 = spr_val; break; |
134 | 135 | ||
135 | case SPRN_IVPR: | 136 | case SPRN_IVPR: |
136 | vcpu->arch.ivpr = vcpu->arch.gpr[rs]; | 137 | vcpu->arch.ivpr = spr_val; |
137 | break; | 138 | break; |
138 | case SPRN_IVOR0: | 139 | case SPRN_IVOR0: |
139 | vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs]; | 140 | vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val; |
140 | break; | 141 | break; |
141 | case SPRN_IVOR1: | 142 | case SPRN_IVOR1: |
142 | vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs]; | 143 | vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = spr_val; |
143 | break; | 144 | break; |
144 | case SPRN_IVOR2: | 145 | case SPRN_IVOR2: |
145 | vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs]; | 146 | vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val; |
146 | break; | 147 | break; |
147 | case SPRN_IVOR3: | 148 | case SPRN_IVOR3: |
148 | vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs]; | 149 | vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val; |
149 | break; | 150 | break; |
150 | case SPRN_IVOR4: | 151 | case SPRN_IVOR4: |
151 | vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs]; | 152 | vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = spr_val; |
152 | break; | 153 | break; |
153 | case SPRN_IVOR5: | 154 | case SPRN_IVOR5: |
154 | vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs]; | 155 | vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = spr_val; |
155 | break; | 156 | break; |
156 | case SPRN_IVOR6: | 157 | case SPRN_IVOR6: |
157 | vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs]; | 158 | vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = spr_val; |
158 | break; | 159 | break; |
159 | case SPRN_IVOR7: | 160 | case SPRN_IVOR7: |
160 | vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs]; | 161 | vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = spr_val; |
161 | break; | 162 | break; |
162 | case SPRN_IVOR8: | 163 | case SPRN_IVOR8: |
163 | vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs]; | 164 | vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val; |
164 | break; | 165 | break; |
165 | case SPRN_IVOR9: | 166 | case SPRN_IVOR9: |
166 | vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs]; | 167 | vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val; |
167 | break; | 168 | break; |
168 | case SPRN_IVOR10: | 169 | case SPRN_IVOR10: |
169 | vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs]; | 170 | vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = spr_val; |
170 | break; | 171 | break; |
171 | case SPRN_IVOR11: | 172 | case SPRN_IVOR11: |
172 | vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs]; | 173 | vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = spr_val; |
173 | break; | 174 | break; |
174 | case SPRN_IVOR12: | 175 | case SPRN_IVOR12: |
175 | vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs]; | 176 | vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = spr_val; |
176 | break; | 177 | break; |
177 | case SPRN_IVOR13: | 178 | case SPRN_IVOR13: |
178 | vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs]; | 179 | vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = spr_val; |
179 | break; | 180 | break; |
180 | case SPRN_IVOR14: | 181 | case SPRN_IVOR14: |
181 | vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs]; | 182 | vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = spr_val; |
182 | break; | 183 | break; |
183 | case SPRN_IVOR15: | 184 | case SPRN_IVOR15: |
184 | vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs]; | 185 | vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val; |
185 | break; | 186 | break; |
186 | 187 | ||
187 | default: | 188 | default: |
@@ -197,65 +198,65 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
197 | 198 | ||
198 | switch (sprn) { | 199 | switch (sprn) { |
199 | case SPRN_IVPR: | 200 | case SPRN_IVPR: |
200 | vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; | 201 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break; |
201 | case SPRN_DEAR: | 202 | case SPRN_DEAR: |
202 | vcpu->arch.gpr[rt] = vcpu->arch.dear; break; | 203 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); break; |
203 | case SPRN_ESR: | 204 | case SPRN_ESR: |
204 | vcpu->arch.gpr[rt] = vcpu->arch.esr; break; | 205 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break; |
205 | case SPRN_DBCR0: | 206 | case SPRN_DBCR0: |
206 | vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; | 207 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break; |
207 | case SPRN_DBCR1: | 208 | case SPRN_DBCR1: |
208 | vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; | 209 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break; |
209 | case SPRN_DBSR: | 210 | case SPRN_DBSR: |
210 | vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break; | 211 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break; |
211 | 212 | ||
212 | case SPRN_IVOR0: | 213 | case SPRN_IVOR0: |
213 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]; | 214 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]); |
214 | break; | 215 | break; |
215 | case SPRN_IVOR1: | 216 | case SPRN_IVOR1: |
216 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]; | 217 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]); |
217 | break; | 218 | break; |
218 | case SPRN_IVOR2: | 219 | case SPRN_IVOR2: |
219 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]; | 220 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]); |
220 | break; | 221 | break; |
221 | case SPRN_IVOR3: | 222 | case SPRN_IVOR3: |
222 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]; | 223 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]); |
223 | break; | 224 | break; |
224 | case SPRN_IVOR4: | 225 | case SPRN_IVOR4: |
225 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]; | 226 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]); |
226 | break; | 227 | break; |
227 | case SPRN_IVOR5: | 228 | case SPRN_IVOR5: |
228 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]; | 229 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]); |
229 | break; | 230 | break; |
230 | case SPRN_IVOR6: | 231 | case SPRN_IVOR6: |
231 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]; | 232 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]); |
232 | break; | 233 | break; |
233 | case SPRN_IVOR7: | 234 | case SPRN_IVOR7: |
234 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]; | 235 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]); |
235 | break; | 236 | break; |
236 | case SPRN_IVOR8: | 237 | case SPRN_IVOR8: |
237 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]; | 238 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]); |
238 | break; | 239 | break; |
239 | case SPRN_IVOR9: | 240 | case SPRN_IVOR9: |
240 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]; | 241 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]); |
241 | break; | 242 | break; |
242 | case SPRN_IVOR10: | 243 | case SPRN_IVOR10: |
243 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]; | 244 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]); |
244 | break; | 245 | break; |
245 | case SPRN_IVOR11: | 246 | case SPRN_IVOR11: |
246 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]; | 247 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]); |
247 | break; | 248 | break; |
248 | case SPRN_IVOR12: | 249 | case SPRN_IVOR12: |
249 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]; | 250 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]); |
250 | break; | 251 | break; |
251 | case SPRN_IVOR13: | 252 | case SPRN_IVOR13: |
252 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]; | 253 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]); |
253 | break; | 254 | break; |
254 | case SPRN_IVOR14: | 255 | case SPRN_IVOR14: |
255 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]; | 256 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]); |
256 | break; | 257 | break; |
257 | case SPRN_IVOR15: | 258 | case SPRN_IVOR15: |
258 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; | 259 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]); |
259 | break; | 260 | break; |
260 | 261 | ||
261 | default: | 262 | default: |
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index 64949eef43f1..efa1198940ab 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c | |||
@@ -60,6 +60,12 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) | |||
60 | 60 | ||
61 | kvmppc_e500_tlb_setup(vcpu_e500); | 61 | kvmppc_e500_tlb_setup(vcpu_e500); |
62 | 62 | ||
63 | /* Registers init */ | ||
64 | vcpu->arch.pvr = mfspr(SPRN_PVR); | ||
65 | |||
66 | /* Since booke kvm only support one core, update all vcpus' PIR to 0 */ | ||
67 | vcpu->vcpu_id = 0; | ||
68 | |||
63 | return 0; | 69 | return 0; |
64 | } | 70 | } |
65 | 71 | ||
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index be95b8d8e3b7..8e3edfbc9634 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c | |||
@@ -74,54 +74,59 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) | |||
74 | { | 74 | { |
75 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); | 75 | struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); |
76 | int emulated = EMULATE_DONE; | 76 | int emulated = EMULATE_DONE; |
77 | ulong spr_val = kvmppc_get_gpr(vcpu, rs); | ||
77 | 78 | ||
78 | switch (sprn) { | 79 | switch (sprn) { |
79 | case SPRN_PID: | 80 | case SPRN_PID: |
80 | vcpu_e500->pid[0] = vcpu->arch.shadow_pid = | 81 | vcpu_e500->pid[0] = vcpu->arch.shadow_pid = |
81 | vcpu->arch.pid = vcpu->arch.gpr[rs]; | 82 | vcpu->arch.pid = spr_val; |
82 | break; | 83 | break; |
83 | case SPRN_PID1: | 84 | case SPRN_PID1: |
84 | vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break; | 85 | vcpu_e500->pid[1] = spr_val; break; |
85 | case SPRN_PID2: | 86 | case SPRN_PID2: |
86 | vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break; | 87 | vcpu_e500->pid[2] = spr_val; break; |
87 | case SPRN_MAS0: | 88 | case SPRN_MAS0: |
88 | vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break; | 89 | vcpu_e500->mas0 = spr_val; break; |
89 | case SPRN_MAS1: | 90 | case SPRN_MAS1: |
90 | vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break; | 91 | vcpu_e500->mas1 = spr_val; break; |
91 | case SPRN_MAS2: | 92 | case SPRN_MAS2: |
92 | vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break; | 93 | vcpu_e500->mas2 = spr_val; break; |
93 | case SPRN_MAS3: | 94 | case SPRN_MAS3: |
94 | vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break; | 95 | vcpu_e500->mas3 = spr_val; break; |
95 | case SPRN_MAS4: | 96 | case SPRN_MAS4: |
96 | vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break; | 97 | vcpu_e500->mas4 = spr_val; break; |
97 | case SPRN_MAS6: | 98 | case SPRN_MAS6: |
98 | vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break; | 99 | vcpu_e500->mas6 = spr_val; break; |
99 | case SPRN_MAS7: | 100 | case SPRN_MAS7: |
100 | vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break; | 101 | vcpu_e500->mas7 = spr_val; break; |
102 | case SPRN_L1CSR0: | ||
103 | vcpu_e500->l1csr0 = spr_val; | ||
104 | vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC); | ||
105 | break; | ||
101 | case SPRN_L1CSR1: | 106 | case SPRN_L1CSR1: |
102 | vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break; | 107 | vcpu_e500->l1csr1 = spr_val; break; |
103 | case SPRN_HID0: | 108 | case SPRN_HID0: |
104 | vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break; | 109 | vcpu_e500->hid0 = spr_val; break; |
105 | case SPRN_HID1: | 110 | case SPRN_HID1: |
106 | vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break; | 111 | vcpu_e500->hid1 = spr_val; break; |
107 | 112 | ||
108 | case SPRN_MMUCSR0: | 113 | case SPRN_MMUCSR0: |
109 | emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500, | 114 | emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500, |
110 | vcpu->arch.gpr[rs]); | 115 | spr_val); |
111 | break; | 116 | break; |
112 | 117 | ||
113 | /* extra exceptions */ | 118 | /* extra exceptions */ |
114 | case SPRN_IVOR32: | 119 | case SPRN_IVOR32: |
115 | vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs]; | 120 | vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val; |
116 | break; | 121 | break; |
117 | case SPRN_IVOR33: | 122 | case SPRN_IVOR33: |
118 | vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs]; | 123 | vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = spr_val; |
119 | break; | 124 | break; |
120 | case SPRN_IVOR34: | 125 | case SPRN_IVOR34: |
121 | vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs]; | 126 | vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val; |
122 | break; | 127 | break; |
123 | case SPRN_IVOR35: | 128 | case SPRN_IVOR35: |
124 | vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs]; | 129 | vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val; |
125 | break; | 130 | break; |
126 | 131 | ||
127 | default: | 132 | default: |
@@ -138,63 +143,57 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) | |||
138 | 143 | ||
139 | switch (sprn) { | 144 | switch (sprn) { |
140 | case SPRN_PID: | 145 | case SPRN_PID: |
141 | vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break; | 146 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break; |
142 | case SPRN_PID1: | 147 | case SPRN_PID1: |
143 | vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break; | 148 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break; |
144 | case SPRN_PID2: | 149 | case SPRN_PID2: |
145 | vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break; | 150 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break; |
146 | case SPRN_MAS0: | 151 | case SPRN_MAS0: |
147 | vcpu->arch.gpr[rt] = vcpu_e500->mas0; break; | 152 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break; |
148 | case SPRN_MAS1: | 153 | case SPRN_MAS1: |
149 | vcpu->arch.gpr[rt] = vcpu_e500->mas1; break; | 154 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break; |
150 | case SPRN_MAS2: | 155 | case SPRN_MAS2: |
151 | vcpu->arch.gpr[rt] = vcpu_e500->mas2; break; | 156 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break; |
152 | case SPRN_MAS3: | 157 | case SPRN_MAS3: |
153 | vcpu->arch.gpr[rt] = vcpu_e500->mas3; break; | 158 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break; |
154 | case SPRN_MAS4: | 159 | case SPRN_MAS4: |
155 | vcpu->arch.gpr[rt] = vcpu_e500->mas4; break; | 160 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break; |
156 | case SPRN_MAS6: | 161 | case SPRN_MAS6: |
157 | vcpu->arch.gpr[rt] = vcpu_e500->mas6; break; | 162 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break; |
158 | case SPRN_MAS7: | 163 | case SPRN_MAS7: |
159 | vcpu->arch.gpr[rt] = vcpu_e500->mas7; break; | 164 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break; |
160 | 165 | ||
161 | case SPRN_TLB0CFG: | 166 | case SPRN_TLB0CFG: |
162 | vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG); | 167 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break; |
163 | vcpu->arch.gpr[rt] &= ~0xfffUL; | ||
164 | vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0]; | ||
165 | break; | ||
166 | |||
167 | case SPRN_TLB1CFG: | 168 | case SPRN_TLB1CFG: |
168 | vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG); | 169 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break; |
169 | vcpu->arch.gpr[rt] &= ~0xfffUL; | 170 | case SPRN_L1CSR0: |
170 | vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1]; | 171 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break; |
171 | break; | ||
172 | |||
173 | case SPRN_L1CSR1: | 172 | case SPRN_L1CSR1: |
174 | vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break; | 173 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break; |
175 | case SPRN_HID0: | 174 | case SPRN_HID0: |
176 | vcpu->arch.gpr[rt] = vcpu_e500->hid0; break; | 175 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break; |
177 | case SPRN_HID1: | 176 | case SPRN_HID1: |
178 | vcpu->arch.gpr[rt] = vcpu_e500->hid1; break; | 177 | kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break; |
179 | 178 | ||
180 | case SPRN_MMUCSR0: | 179 | case SPRN_MMUCSR0: |
181 | vcpu->arch.gpr[rt] = 0; break; | 180 | kvmppc_set_gpr(vcpu, rt, 0); break; |
182 | 181 | ||
183 | case SPRN_MMUCFG: | 182 | case SPRN_MMUCFG: |
184 | vcpu->arch.gpr[rt] = mfspr(SPRN_MMUCFG); break; | 183 | kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break; |
185 | 184 | ||
186 | /* extra exceptions */ | 185 | /* extra exceptions */ |
187 | case SPRN_IVOR32: | 186 | case SPRN_IVOR32: |
188 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]; | 187 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]); |
189 | break; | 188 | break; |
190 | case SPRN_IVOR33: | 189 | case SPRN_IVOR33: |
191 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]; | 190 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]); |
192 | break; | 191 | break; |
193 | case SPRN_IVOR34: | 192 | case SPRN_IVOR34: |
194 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]; | 193 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]); |
195 | break; | 194 | break; |
196 | case SPRN_IVOR35: | 195 | case SPRN_IVOR35: |
197 | vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]; | 196 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]); |
198 | break; | 197 | break; |
199 | default: | 198 | default: |
200 | emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); | 199 | emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt); |
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index fb1e1dc11ba5..0d772e6b6318 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c | |||
@@ -417,7 +417,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) | |||
417 | int esel, tlbsel; | 417 | int esel, tlbsel; |
418 | gva_t ea; | 418 | gva_t ea; |
419 | 419 | ||
420 | ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb]; | 420 | ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb); |
421 | 421 | ||
422 | ia = (ea >> 2) & 0x1; | 422 | ia = (ea >> 2) & 0x1; |
423 | 423 | ||
@@ -470,7 +470,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) | |||
470 | struct tlbe *gtlbe = NULL; | 470 | struct tlbe *gtlbe = NULL; |
471 | gva_t ea; | 471 | gva_t ea; |
472 | 472 | ||
473 | ea = vcpu->arch.gpr[rb]; | 473 | ea = kvmppc_get_gpr(vcpu, rb); |
474 | 474 | ||
475 | for (tlbsel = 0; tlbsel < 2; tlbsel++) { | 475 | for (tlbsel = 0; tlbsel < 2; tlbsel++) { |
476 | esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); | 476 | esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); |
@@ -728,6 +728,12 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) | |||
728 | if (vcpu_e500->shadow_pages[1] == NULL) | 728 | if (vcpu_e500->shadow_pages[1] == NULL) |
729 | goto err_out_page0; | 729 | goto err_out_page0; |
730 | 730 | ||
731 | /* Init TLB configuration register */ | ||
732 | vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL; | ||
733 | vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0]; | ||
734 | vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL; | ||
735 | vcpu_e500->tlb1cfg |= vcpu_e500->guest_tlb_size[1]; | ||
736 | |||
731 | return 0; | 737 | return 0; |
732 | 738 | ||
733 | err_out_page0: | 739 | err_out_page0: |
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 4a9ac6640fad..cb72a65f4ecc 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c | |||
@@ -83,6 +83,9 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) | |||
83 | 83 | ||
84 | pr_debug("mtDEC: %x\n", vcpu->arch.dec); | 84 | pr_debug("mtDEC: %x\n", vcpu->arch.dec); |
85 | #ifdef CONFIG_PPC64 | 85 | #ifdef CONFIG_PPC64 |
86 | /* mtdec lowers the interrupt line when positive. */ | ||
87 | kvmppc_core_dequeue_dec(vcpu); | ||
88 | |||
86 | /* POWER4+ triggers a dec interrupt if the value is < 0 */ | 89 | /* POWER4+ triggers a dec interrupt if the value is < 0 */ |
87 | if (vcpu->arch.dec & 0x80000000) { | 90 | if (vcpu->arch.dec & 0x80000000) { |
88 | hrtimer_try_to_cancel(&vcpu->arch.dec_timer); | 91 | hrtimer_try_to_cancel(&vcpu->arch.dec_timer); |
@@ -140,14 +143,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
140 | 143 | ||
141 | pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); | 144 | pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst)); |
142 | 145 | ||
146 | /* Try again next time */ | ||
147 | if (inst == KVM_INST_FETCH_FAILED) | ||
148 | return EMULATE_DONE; | ||
149 | |||
143 | switch (get_op(inst)) { | 150 | switch (get_op(inst)) { |
144 | case OP_TRAP: | 151 | case OP_TRAP: |
145 | #ifdef CONFIG_PPC64 | 152 | #ifdef CONFIG_PPC64 |
146 | case OP_TRAP_64: | 153 | case OP_TRAP_64: |
154 | kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP); | ||
147 | #else | 155 | #else |
148 | vcpu->arch.esr |= ESR_PTR; | 156 | kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR); |
149 | #endif | 157 | #endif |
150 | kvmppc_core_queue_program(vcpu); | ||
151 | advance = 0; | 158 | advance = 0; |
152 | break; | 159 | break; |
153 | 160 | ||
@@ -167,14 +174,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
167 | case OP_31_XOP_STWX: | 174 | case OP_31_XOP_STWX: |
168 | rs = get_rs(inst); | 175 | rs = get_rs(inst); |
169 | emulated = kvmppc_handle_store(run, vcpu, | 176 | emulated = kvmppc_handle_store(run, vcpu, |
170 | vcpu->arch.gpr[rs], | 177 | kvmppc_get_gpr(vcpu, rs), |
171 | 4, 1); | 178 | 4, 1); |
172 | break; | 179 | break; |
173 | 180 | ||
174 | case OP_31_XOP_STBX: | 181 | case OP_31_XOP_STBX: |
175 | rs = get_rs(inst); | 182 | rs = get_rs(inst); |
176 | emulated = kvmppc_handle_store(run, vcpu, | 183 | emulated = kvmppc_handle_store(run, vcpu, |
177 | vcpu->arch.gpr[rs], | 184 | kvmppc_get_gpr(vcpu, rs), |
178 | 1, 1); | 185 | 1, 1); |
179 | break; | 186 | break; |
180 | 187 | ||
@@ -183,14 +190,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
183 | ra = get_ra(inst); | 190 | ra = get_ra(inst); |
184 | rb = get_rb(inst); | 191 | rb = get_rb(inst); |
185 | 192 | ||
186 | ea = vcpu->arch.gpr[rb]; | 193 | ea = kvmppc_get_gpr(vcpu, rb); |
187 | if (ra) | 194 | if (ra) |
188 | ea += vcpu->arch.gpr[ra]; | 195 | ea += kvmppc_get_gpr(vcpu, ra); |
189 | 196 | ||
190 | emulated = kvmppc_handle_store(run, vcpu, | 197 | emulated = kvmppc_handle_store(run, vcpu, |
191 | vcpu->arch.gpr[rs], | 198 | kvmppc_get_gpr(vcpu, rs), |
192 | 1, 1); | 199 | 1, 1); |
193 | vcpu->arch.gpr[rs] = ea; | 200 | kvmppc_set_gpr(vcpu, rs, ea); |
194 | break; | 201 | break; |
195 | 202 | ||
196 | case OP_31_XOP_LHZX: | 203 | case OP_31_XOP_LHZX: |
@@ -203,12 +210,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
203 | ra = get_ra(inst); | 210 | ra = get_ra(inst); |
204 | rb = get_rb(inst); | 211 | rb = get_rb(inst); |
205 | 212 | ||
206 | ea = vcpu->arch.gpr[rb]; | 213 | ea = kvmppc_get_gpr(vcpu, rb); |
207 | if (ra) | 214 | if (ra) |
208 | ea += vcpu->arch.gpr[ra]; | 215 | ea += kvmppc_get_gpr(vcpu, ra); |
209 | 216 | ||
210 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); | 217 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); |
211 | vcpu->arch.gpr[ra] = ea; | 218 | kvmppc_set_gpr(vcpu, ra, ea); |
212 | break; | 219 | break; |
213 | 220 | ||
214 | case OP_31_XOP_MFSPR: | 221 | case OP_31_XOP_MFSPR: |
@@ -217,47 +224,49 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
217 | 224 | ||
218 | switch (sprn) { | 225 | switch (sprn) { |
219 | case SPRN_SRR0: | 226 | case SPRN_SRR0: |
220 | vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; | 227 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr0); break; |
221 | case SPRN_SRR1: | 228 | case SPRN_SRR1: |
222 | vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; | 229 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr1); break; |
223 | case SPRN_PVR: | 230 | case SPRN_PVR: |
224 | vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; | 231 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break; |
225 | case SPRN_PIR: | 232 | case SPRN_PIR: |
226 | vcpu->arch.gpr[rt] = vcpu->vcpu_id; break; | 233 | kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break; |
227 | case SPRN_MSSSR0: | 234 | case SPRN_MSSSR0: |
228 | vcpu->arch.gpr[rt] = 0; break; | 235 | kvmppc_set_gpr(vcpu, rt, 0); break; |
229 | 236 | ||
230 | /* Note: mftb and TBRL/TBWL are user-accessible, so | 237 | /* Note: mftb and TBRL/TBWL are user-accessible, so |
231 | * the guest can always access the real TB anyways. | 238 | * the guest can always access the real TB anyways. |
232 | * In fact, we probably will never see these traps. */ | 239 | * In fact, we probably will never see these traps. */ |
233 | case SPRN_TBWL: | 240 | case SPRN_TBWL: |
234 | vcpu->arch.gpr[rt] = get_tb() >> 32; break; | 241 | kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break; |
235 | case SPRN_TBWU: | 242 | case SPRN_TBWU: |
236 | vcpu->arch.gpr[rt] = get_tb(); break; | 243 | kvmppc_set_gpr(vcpu, rt, get_tb()); break; |
237 | 244 | ||
238 | case SPRN_SPRG0: | 245 | case SPRN_SPRG0: |
239 | vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break; | 246 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg0); break; |
240 | case SPRN_SPRG1: | 247 | case SPRN_SPRG1: |
241 | vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break; | 248 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg1); break; |
242 | case SPRN_SPRG2: | 249 | case SPRN_SPRG2: |
243 | vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break; | 250 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg2); break; |
244 | case SPRN_SPRG3: | 251 | case SPRN_SPRG3: |
245 | vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break; | 252 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg3); break; |
246 | /* Note: SPRG4-7 are user-readable, so we don't get | 253 | /* Note: SPRG4-7 are user-readable, so we don't get |
247 | * a trap. */ | 254 | * a trap. */ |
248 | 255 | ||
249 | case SPRN_DEC: | 256 | case SPRN_DEC: |
250 | { | 257 | { |
251 | u64 jd = get_tb() - vcpu->arch.dec_jiffies; | 258 | u64 jd = get_tb() - vcpu->arch.dec_jiffies; |
252 | vcpu->arch.gpr[rt] = vcpu->arch.dec - jd; | 259 | kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd); |
253 | pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n", vcpu->arch.dec, jd, vcpu->arch.gpr[rt]); | 260 | pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n", |
261 | vcpu->arch.dec, jd, | ||
262 | kvmppc_get_gpr(vcpu, rt)); | ||
254 | break; | 263 | break; |
255 | } | 264 | } |
256 | default: | 265 | default: |
257 | emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt); | 266 | emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt); |
258 | if (emulated == EMULATE_FAIL) { | 267 | if (emulated == EMULATE_FAIL) { |
259 | printk("mfspr: unknown spr %x\n", sprn); | 268 | printk("mfspr: unknown spr %x\n", sprn); |
260 | vcpu->arch.gpr[rt] = 0; | 269 | kvmppc_set_gpr(vcpu, rt, 0); |
261 | } | 270 | } |
262 | break; | 271 | break; |
263 | } | 272 | } |
@@ -269,7 +278,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
269 | rb = get_rb(inst); | 278 | rb = get_rb(inst); |
270 | 279 | ||
271 | emulated = kvmppc_handle_store(run, vcpu, | 280 | emulated = kvmppc_handle_store(run, vcpu, |
272 | vcpu->arch.gpr[rs], | 281 | kvmppc_get_gpr(vcpu, rs), |
273 | 2, 1); | 282 | 2, 1); |
274 | break; | 283 | break; |
275 | 284 | ||
@@ -278,14 +287,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
278 | ra = get_ra(inst); | 287 | ra = get_ra(inst); |
279 | rb = get_rb(inst); | 288 | rb = get_rb(inst); |
280 | 289 | ||
281 | ea = vcpu->arch.gpr[rb]; | 290 | ea = kvmppc_get_gpr(vcpu, rb); |
282 | if (ra) | 291 | if (ra) |
283 | ea += vcpu->arch.gpr[ra]; | 292 | ea += kvmppc_get_gpr(vcpu, ra); |
284 | 293 | ||
285 | emulated = kvmppc_handle_store(run, vcpu, | 294 | emulated = kvmppc_handle_store(run, vcpu, |
286 | vcpu->arch.gpr[rs], | 295 | kvmppc_get_gpr(vcpu, rs), |
287 | 2, 1); | 296 | 2, 1); |
288 | vcpu->arch.gpr[ra] = ea; | 297 | kvmppc_set_gpr(vcpu, ra, ea); |
289 | break; | 298 | break; |
290 | 299 | ||
291 | case OP_31_XOP_MTSPR: | 300 | case OP_31_XOP_MTSPR: |
@@ -293,9 +302,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
293 | rs = get_rs(inst); | 302 | rs = get_rs(inst); |
294 | switch (sprn) { | 303 | switch (sprn) { |
295 | case SPRN_SRR0: | 304 | case SPRN_SRR0: |
296 | vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; | 305 | vcpu->arch.srr0 = kvmppc_get_gpr(vcpu, rs); break; |
297 | case SPRN_SRR1: | 306 | case SPRN_SRR1: |
298 | vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; | 307 | vcpu->arch.srr1 = kvmppc_get_gpr(vcpu, rs); break; |
299 | 308 | ||
300 | /* XXX We need to context-switch the timebase for | 309 | /* XXX We need to context-switch the timebase for |
301 | * watchdog and FIT. */ | 310 | * watchdog and FIT. */ |
@@ -305,18 +314,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
305 | case SPRN_MSSSR0: break; | 314 | case SPRN_MSSSR0: break; |
306 | 315 | ||
307 | case SPRN_DEC: | 316 | case SPRN_DEC: |
308 | vcpu->arch.dec = vcpu->arch.gpr[rs]; | 317 | vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs); |
309 | kvmppc_emulate_dec(vcpu); | 318 | kvmppc_emulate_dec(vcpu); |
310 | break; | 319 | break; |
311 | 320 | ||
312 | case SPRN_SPRG0: | 321 | case SPRN_SPRG0: |
313 | vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break; | 322 | vcpu->arch.sprg0 = kvmppc_get_gpr(vcpu, rs); break; |
314 | case SPRN_SPRG1: | 323 | case SPRN_SPRG1: |
315 | vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break; | 324 | vcpu->arch.sprg1 = kvmppc_get_gpr(vcpu, rs); break; |
316 | case SPRN_SPRG2: | 325 | case SPRN_SPRG2: |
317 | vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break; | 326 | vcpu->arch.sprg2 = kvmppc_get_gpr(vcpu, rs); break; |
318 | case SPRN_SPRG3: | 327 | case SPRN_SPRG3: |
319 | vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; | 328 | vcpu->arch.sprg3 = kvmppc_get_gpr(vcpu, rs); break; |
320 | 329 | ||
321 | default: | 330 | default: |
322 | emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); | 331 | emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs); |
@@ -348,7 +357,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
348 | rb = get_rb(inst); | 357 | rb = get_rb(inst); |
349 | 358 | ||
350 | emulated = kvmppc_handle_store(run, vcpu, | 359 | emulated = kvmppc_handle_store(run, vcpu, |
351 | vcpu->arch.gpr[rs], | 360 | kvmppc_get_gpr(vcpu, rs), |
352 | 4, 0); | 361 | 4, 0); |
353 | break; | 362 | break; |
354 | 363 | ||
@@ -363,7 +372,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
363 | rb = get_rb(inst); | 372 | rb = get_rb(inst); |
364 | 373 | ||
365 | emulated = kvmppc_handle_store(run, vcpu, | 374 | emulated = kvmppc_handle_store(run, vcpu, |
366 | vcpu->arch.gpr[rs], | 375 | kvmppc_get_gpr(vcpu, rs), |
367 | 2, 0); | 376 | 2, 0); |
368 | break; | 377 | break; |
369 | 378 | ||
@@ -382,7 +391,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
382 | ra = get_ra(inst); | 391 | ra = get_ra(inst); |
383 | rt = get_rt(inst); | 392 | rt = get_rt(inst); |
384 | emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); | 393 | emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); |
385 | vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; | 394 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); |
386 | break; | 395 | break; |
387 | 396 | ||
388 | case OP_LBZ: | 397 | case OP_LBZ: |
@@ -394,35 +403,39 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
394 | ra = get_ra(inst); | 403 | ra = get_ra(inst); |
395 | rt = get_rt(inst); | 404 | rt = get_rt(inst); |
396 | emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); | 405 | emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); |
397 | vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; | 406 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); |
398 | break; | 407 | break; |
399 | 408 | ||
400 | case OP_STW: | 409 | case OP_STW: |
401 | rs = get_rs(inst); | 410 | rs = get_rs(inst); |
402 | emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], | 411 | emulated = kvmppc_handle_store(run, vcpu, |
412 | kvmppc_get_gpr(vcpu, rs), | ||
403 | 4, 1); | 413 | 4, 1); |
404 | break; | 414 | break; |
405 | 415 | ||
406 | case OP_STWU: | 416 | case OP_STWU: |
407 | ra = get_ra(inst); | 417 | ra = get_ra(inst); |
408 | rs = get_rs(inst); | 418 | rs = get_rs(inst); |
409 | emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], | 419 | emulated = kvmppc_handle_store(run, vcpu, |
420 | kvmppc_get_gpr(vcpu, rs), | ||
410 | 4, 1); | 421 | 4, 1); |
411 | vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; | 422 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); |
412 | break; | 423 | break; |
413 | 424 | ||
414 | case OP_STB: | 425 | case OP_STB: |
415 | rs = get_rs(inst); | 426 | rs = get_rs(inst); |
416 | emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], | 427 | emulated = kvmppc_handle_store(run, vcpu, |
428 | kvmppc_get_gpr(vcpu, rs), | ||
417 | 1, 1); | 429 | 1, 1); |
418 | break; | 430 | break; |
419 | 431 | ||
420 | case OP_STBU: | 432 | case OP_STBU: |
421 | ra = get_ra(inst); | 433 | ra = get_ra(inst); |
422 | rs = get_rs(inst); | 434 | rs = get_rs(inst); |
423 | emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], | 435 | emulated = kvmppc_handle_store(run, vcpu, |
436 | kvmppc_get_gpr(vcpu, rs), | ||
424 | 1, 1); | 437 | 1, 1); |
425 | vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; | 438 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); |
426 | break; | 439 | break; |
427 | 440 | ||
428 | case OP_LHZ: | 441 | case OP_LHZ: |
@@ -434,21 +447,23 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
434 | ra = get_ra(inst); | 447 | ra = get_ra(inst); |
435 | rt = get_rt(inst); | 448 | rt = get_rt(inst); |
436 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); | 449 | emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); |
437 | vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; | 450 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); |
438 | break; | 451 | break; |
439 | 452 | ||
440 | case OP_STH: | 453 | case OP_STH: |
441 | rs = get_rs(inst); | 454 | rs = get_rs(inst); |
442 | emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], | 455 | emulated = kvmppc_handle_store(run, vcpu, |
456 | kvmppc_get_gpr(vcpu, rs), | ||
443 | 2, 1); | 457 | 2, 1); |
444 | break; | 458 | break; |
445 | 459 | ||
446 | case OP_STHU: | 460 | case OP_STHU: |
447 | ra = get_ra(inst); | 461 | ra = get_ra(inst); |
448 | rs = get_rs(inst); | 462 | rs = get_rs(inst); |
449 | emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], | 463 | emulated = kvmppc_handle_store(run, vcpu, |
464 | kvmppc_get_gpr(vcpu, rs), | ||
450 | 2, 1); | 465 | 2, 1); |
451 | vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; | 466 | kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); |
452 | break; | 467 | break; |
453 | 468 | ||
454 | default: | 469 | default: |
@@ -461,6 +476,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
461 | advance = 0; | 476 | advance = 0; |
462 | printk(KERN_ERR "Couldn't emulate instruction 0x%08x " | 477 | printk(KERN_ERR "Couldn't emulate instruction 0x%08x " |
463 | "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); | 478 | "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); |
479 | kvmppc_core_queue_program(vcpu, 0); | ||
464 | } | 480 | } |
465 | } | 481 | } |
466 | 482 | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index f06cf93b178e..51aedd7f16bc 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
@@ -137,6 +137,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
137 | { | 137 | { |
138 | kvmppc_free_vcpus(kvm); | 138 | kvmppc_free_vcpus(kvm); |
139 | kvm_free_physmem(kvm); | 139 | kvm_free_physmem(kvm); |
140 | cleanup_srcu_struct(&kvm->srcu); | ||
140 | kfree(kvm); | 141 | kfree(kvm); |
141 | } | 142 | } |
142 | 143 | ||
@@ -165,14 +166,24 @@ long kvm_arch_dev_ioctl(struct file *filp, | |||
165 | return -EINVAL; | 166 | return -EINVAL; |
166 | } | 167 | } |
167 | 168 | ||
168 | int kvm_arch_set_memory_region(struct kvm *kvm, | 169 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
169 | struct kvm_userspace_memory_region *mem, | 170 | struct kvm_memory_slot *memslot, |
170 | struct kvm_memory_slot old, | 171 | struct kvm_memory_slot old, |
171 | int user_alloc) | 172 | struct kvm_userspace_memory_region *mem, |
173 | int user_alloc) | ||
172 | { | 174 | { |
173 | return 0; | 175 | return 0; |
174 | } | 176 | } |
175 | 177 | ||
178 | void kvm_arch_commit_memory_region(struct kvm *kvm, | ||
179 | struct kvm_userspace_memory_region *mem, | ||
180 | struct kvm_memory_slot old, | ||
181 | int user_alloc) | ||
182 | { | ||
183 | return; | ||
184 | } | ||
185 | |||
186 | |||
176 | void kvm_arch_flush_shadow(struct kvm *kvm) | 187 | void kvm_arch_flush_shadow(struct kvm *kvm) |
177 | { | 188 | { |
178 | } | 189 | } |
@@ -260,34 +271,35 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
260 | static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, | 271 | static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, |
261 | struct kvm_run *run) | 272 | struct kvm_run *run) |
262 | { | 273 | { |
263 | ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; | 274 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data); |
264 | *gpr = run->dcr.data; | ||
265 | } | 275 | } |
266 | 276 | ||
267 | static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, | 277 | static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, |
268 | struct kvm_run *run) | 278 | struct kvm_run *run) |
269 | { | 279 | { |
270 | ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; | 280 | ulong gpr; |
271 | 281 | ||
272 | if (run->mmio.len > sizeof(*gpr)) { | 282 | if (run->mmio.len > sizeof(gpr)) { |
273 | printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); | 283 | printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); |
274 | return; | 284 | return; |
275 | } | 285 | } |
276 | 286 | ||
277 | if (vcpu->arch.mmio_is_bigendian) { | 287 | if (vcpu->arch.mmio_is_bigendian) { |
278 | switch (run->mmio.len) { | 288 | switch (run->mmio.len) { |
279 | case 4: *gpr = *(u32 *)run->mmio.data; break; | 289 | case 4: gpr = *(u32 *)run->mmio.data; break; |
280 | case 2: *gpr = *(u16 *)run->mmio.data; break; | 290 | case 2: gpr = *(u16 *)run->mmio.data; break; |
281 | case 1: *gpr = *(u8 *)run->mmio.data; break; | 291 | case 1: gpr = *(u8 *)run->mmio.data; break; |
282 | } | 292 | } |
283 | } else { | 293 | } else { |
284 | /* Convert BE data from userland back to LE. */ | 294 | /* Convert BE data from userland back to LE. */ |
285 | switch (run->mmio.len) { | 295 | switch (run->mmio.len) { |
286 | case 4: *gpr = ld_le32((u32 *)run->mmio.data); break; | 296 | case 4: gpr = ld_le32((u32 *)run->mmio.data); break; |
287 | case 2: *gpr = ld_le16((u16 *)run->mmio.data); break; | 297 | case 2: gpr = ld_le16((u16 *)run->mmio.data); break; |
288 | case 1: *gpr = *(u8 *)run->mmio.data; break; | 298 | case 1: gpr = *(u8 *)run->mmio.data; break; |
289 | } | 299 | } |
290 | } | 300 | } |
301 | |||
302 | kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); | ||
291 | } | 303 | } |
292 | 304 | ||
293 | int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, | 305 | int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 3fa0a10e4668..49292869a5cd 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
@@ -242,6 +242,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
242 | kvm_free_physmem(kvm); | 242 | kvm_free_physmem(kvm); |
243 | free_page((unsigned long)(kvm->arch.sca)); | 243 | free_page((unsigned long)(kvm->arch.sca)); |
244 | debug_unregister(kvm->arch.dbf); | 244 | debug_unregister(kvm->arch.dbf); |
245 | cleanup_srcu_struct(&kvm->srcu); | ||
245 | kfree(kvm); | 246 | kfree(kvm); |
246 | } | 247 | } |
247 | 248 | ||
@@ -690,14 +691,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
690 | } | 691 | } |
691 | 692 | ||
692 | /* Section: memory related */ | 693 | /* Section: memory related */ |
693 | int kvm_arch_set_memory_region(struct kvm *kvm, | 694 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
694 | struct kvm_userspace_memory_region *mem, | 695 | struct kvm_memory_slot *memslot, |
695 | struct kvm_memory_slot old, | 696 | struct kvm_memory_slot old, |
696 | int user_alloc) | 697 | struct kvm_userspace_memory_region *mem, |
698 | int user_alloc) | ||
697 | { | 699 | { |
698 | int i; | ||
699 | struct kvm_vcpu *vcpu; | ||
700 | |||
701 | /* A few sanity checks. We can have exactly one memory slot which has | 700 | /* A few sanity checks. We can have exactly one memory slot which has |
702 | to start at guest virtual zero and which has to be located at a | 701 | to start at guest virtual zero and which has to be located at a |
703 | page boundary in userland and which has to end at a page boundary. | 702 | page boundary in userland and which has to end at a page boundary. |
@@ -720,14 +719,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
720 | if (!user_alloc) | 719 | if (!user_alloc) |
721 | return -EINVAL; | 720 | return -EINVAL; |
722 | 721 | ||
722 | return 0; | ||
723 | } | ||
724 | |||
725 | void kvm_arch_commit_memory_region(struct kvm *kvm, | ||
726 | struct kvm_userspace_memory_region *mem, | ||
727 | struct kvm_memory_slot old, | ||
728 | int user_alloc) | ||
729 | { | ||
730 | int i; | ||
731 | struct kvm_vcpu *vcpu; | ||
732 | |||
723 | /* request update of sie control block for all available vcpus */ | 733 | /* request update of sie control block for all available vcpus */ |
724 | kvm_for_each_vcpu(i, vcpu, kvm) { | 734 | kvm_for_each_vcpu(i, vcpu, kvm) { |
725 | if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | 735 | if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) |
726 | continue; | 736 | continue; |
727 | kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP); | 737 | kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP); |
728 | } | 738 | } |
729 | |||
730 | return 0; | ||
731 | } | 739 | } |
732 | 740 | ||
733 | void kvm_arch_flush_shadow(struct kvm *kvm) | 741 | void kvm_arch_flush_shadow(struct kvm *kvm) |
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 06cce8285ba0..60f09ab3672c 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h | |||
@@ -67,10 +67,14 @@ static inline long kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu) | |||
67 | 67 | ||
68 | static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu) | 68 | static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu) |
69 | { | 69 | { |
70 | int idx; | ||
70 | struct kvm_memory_slot *mem; | 71 | struct kvm_memory_slot *mem; |
72 | struct kvm_memslots *memslots; | ||
71 | 73 | ||
72 | down_read(&vcpu->kvm->slots_lock); | 74 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
73 | mem = &vcpu->kvm->memslots[0]; | 75 | memslots = rcu_dereference(vcpu->kvm->memslots); |
76 | |||
77 | mem = &memslots->memslots[0]; | ||
74 | 78 | ||
75 | vcpu->arch.sie_block->gmsor = mem->userspace_addr; | 79 | vcpu->arch.sie_block->gmsor = mem->userspace_addr; |
76 | vcpu->arch.sie_block->gmslm = | 80 | vcpu->arch.sie_block->gmslm = |
@@ -78,7 +82,7 @@ static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu) | |||
78 | (mem->npages << PAGE_SHIFT) + | 82 | (mem->npages << PAGE_SHIFT) + |
79 | VIRTIODESCSPACE - 1ul; | 83 | VIRTIODESCSPACE - 1ul; |
80 | 84 | ||
81 | up_read(&vcpu->kvm->slots_lock); | 85 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
82 | } | 86 | } |
83 | 87 | ||
84 | /* implemented in priv.c */ | 88 | /* implemented in priv.c */ |
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 9f828f87ca35..493092efaa3b 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild | |||
@@ -11,6 +11,7 @@ header-y += sigcontext32.h | |||
11 | header-y += ucontext.h | 11 | header-y += ucontext.h |
12 | header-y += processor-flags.h | 12 | header-y += processor-flags.h |
13 | header-y += hw_breakpoint.h | 13 | header-y += hw_breakpoint.h |
14 | header-y += hyperv.h | ||
14 | 15 | ||
15 | unifdef-y += e820.h | 16 | unifdef-y += e820.h |
16 | unifdef-y += ist.h | 17 | unifdef-y += ist.h |
diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h new file mode 100644 index 000000000000..e153a2b3889a --- /dev/null +++ b/arch/x86/include/asm/hyperv.h | |||
@@ -0,0 +1,186 @@ | |||
1 | #ifndef _ASM_X86_KVM_HYPERV_H | ||
2 | #define _ASM_X86_KVM_HYPERV_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | |||
6 | /* | ||
7 | * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent | ||
8 | * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). | ||
9 | */ | ||
10 | #define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 | ||
11 | #define HYPERV_CPUID_INTERFACE 0x40000001 | ||
12 | #define HYPERV_CPUID_VERSION 0x40000002 | ||
13 | #define HYPERV_CPUID_FEATURES 0x40000003 | ||
14 | #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 | ||
15 | #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 | ||
16 | |||
17 | /* | ||
18 | * Feature identification. EAX indicates which features are available | ||
19 | * to the partition based upon the current partition privileges. | ||
20 | */ | ||
21 | |||
22 | /* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */ | ||
23 | #define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0) | ||
24 | /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ | ||
25 | #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) | ||
26 | /* | ||
27 | * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM | ||
28 | * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available | ||
29 | */ | ||
30 | #define HV_X64_MSR_SYNIC_AVAILABLE (1 << 2) | ||
31 | /* | ||
32 | * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through | ||
33 | * HV_X64_MSR_STIMER3_COUNT) available | ||
34 | */ | ||
35 | #define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3) | ||
36 | /* | ||
37 | * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR) | ||
38 | * are available | ||
39 | */ | ||
40 | #define HV_X64_MSR_APIC_ACCESS_AVAILABLE (1 << 4) | ||
41 | /* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/ | ||
42 | #define HV_X64_MSR_HYPERCALL_AVAILABLE (1 << 5) | ||
43 | /* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/ | ||
44 | #define HV_X64_MSR_VP_INDEX_AVAILABLE (1 << 6) | ||
45 | /* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/ | ||
46 | #define HV_X64_MSR_RESET_AVAILABLE (1 << 7) | ||
47 | /* | ||
48 | * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE, | ||
49 | * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE, | ||
50 | * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available | ||
51 | */ | ||
52 | #define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8) | ||
53 | |||
54 | /* | ||
55 | * Feature identification: EBX indicates which flags were specified at | ||
56 | * partition creation. The format is the same as the partition creation | ||
57 | * flag structure defined in section Partition Creation Flags. | ||
58 | */ | ||
59 | #define HV_X64_CREATE_PARTITIONS (1 << 0) | ||
60 | #define HV_X64_ACCESS_PARTITION_ID (1 << 1) | ||
61 | #define HV_X64_ACCESS_MEMORY_POOL (1 << 2) | ||
62 | #define HV_X64_ADJUST_MESSAGE_BUFFERS (1 << 3) | ||
63 | #define HV_X64_POST_MESSAGES (1 << 4) | ||
64 | #define HV_X64_SIGNAL_EVENTS (1 << 5) | ||
65 | #define HV_X64_CREATE_PORT (1 << 6) | ||
66 | #define HV_X64_CONNECT_PORT (1 << 7) | ||
67 | #define HV_X64_ACCESS_STATS (1 << 8) | ||
68 | #define HV_X64_DEBUGGING (1 << 11) | ||
69 | #define HV_X64_CPU_POWER_MANAGEMENT (1 << 12) | ||
70 | #define HV_X64_CONFIGURE_PROFILER (1 << 13) | ||
71 | |||
72 | /* | ||
73 | * Feature identification. EDX indicates which miscellaneous features | ||
74 | * are available to the partition. | ||
75 | */ | ||
76 | /* The MWAIT instruction is available (per section MONITOR / MWAIT) */ | ||
77 | #define HV_X64_MWAIT_AVAILABLE (1 << 0) | ||
78 | /* Guest debugging support is available */ | ||
79 | #define HV_X64_GUEST_DEBUGGING_AVAILABLE (1 << 1) | ||
80 | /* Performance Monitor support is available*/ | ||
81 | #define HV_X64_PERF_MONITOR_AVAILABLE (1 << 2) | ||
82 | /* Support for physical CPU dynamic partitioning events is available*/ | ||
83 | #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1 << 3) | ||
84 | /* | ||
85 | * Support for passing hypercall input parameter block via XMM | ||
86 | * registers is available | ||
87 | */ | ||
88 | #define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4) | ||
89 | /* Support for a virtual guest idle state is available */ | ||
90 | #define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5) | ||
91 | |||
92 | /* | ||
93 | * Implementation recommendations. Indicates which behaviors the hypervisor | ||
94 | * recommends the OS implement for optimal performance. | ||
95 | */ | ||
96 | /* | ||
97 | * Recommend using hypercall for address space switches rather | ||
98 | * than MOV to CR3 instruction | ||
99 | */ | ||
100 | #define HV_X64_MWAIT_RECOMMENDED (1 << 0) | ||
101 | /* Recommend using hypercall for local TLB flushes rather | ||
102 | * than INVLPG or MOV to CR3 instructions */ | ||
103 | #define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED (1 << 1) | ||
104 | /* | ||
105 | * Recommend using hypercall for remote TLB flushes rather | ||
106 | * than inter-processor interrupts | ||
107 | */ | ||
108 | #define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED (1 << 2) | ||
109 | /* | ||
110 | * Recommend using MSRs for accessing APIC registers | ||
111 | * EOI, ICR and TPR rather than their memory-mapped counterparts | ||
112 | */ | ||
113 | #define HV_X64_APIC_ACCESS_RECOMMENDED (1 << 3) | ||
114 | /* Recommend using the hypervisor-provided MSR to initiate a system RESET */ | ||
115 | #define HV_X64_SYSTEM_RESET_RECOMMENDED (1 << 4) | ||
116 | /* | ||
117 | * Recommend using relaxed timing for this partition. If used, | ||
118 | * the VM should disable any watchdog timeouts that rely on the | ||
119 | * timely delivery of external interrupts | ||
120 | */ | ||
121 | #define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5) | ||
122 | |||
123 | /* MSR used to identify the guest OS. */ | ||
124 | #define HV_X64_MSR_GUEST_OS_ID 0x40000000 | ||
125 | |||
126 | /* MSR used to setup pages used to communicate with the hypervisor. */ | ||
127 | #define HV_X64_MSR_HYPERCALL 0x40000001 | ||
128 | |||
129 | /* MSR used to provide vcpu index */ | ||
130 | #define HV_X64_MSR_VP_INDEX 0x40000002 | ||
131 | |||
132 | /* Define the virtual APIC registers */ | ||
133 | #define HV_X64_MSR_EOI 0x40000070 | ||
134 | #define HV_X64_MSR_ICR 0x40000071 | ||
135 | #define HV_X64_MSR_TPR 0x40000072 | ||
136 | #define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073 | ||
137 | |||
138 | /* Define synthetic interrupt controller model specific registers. */ | ||
139 | #define HV_X64_MSR_SCONTROL 0x40000080 | ||
140 | #define HV_X64_MSR_SVERSION 0x40000081 | ||
141 | #define HV_X64_MSR_SIEFP 0x40000082 | ||
142 | #define HV_X64_MSR_SIMP 0x40000083 | ||
143 | #define HV_X64_MSR_EOM 0x40000084 | ||
144 | #define HV_X64_MSR_SINT0 0x40000090 | ||
145 | #define HV_X64_MSR_SINT1 0x40000091 | ||
146 | #define HV_X64_MSR_SINT2 0x40000092 | ||
147 | #define HV_X64_MSR_SINT3 0x40000093 | ||
148 | #define HV_X64_MSR_SINT4 0x40000094 | ||
149 | #define HV_X64_MSR_SINT5 0x40000095 | ||
150 | #define HV_X64_MSR_SINT6 0x40000096 | ||
151 | #define HV_X64_MSR_SINT7 0x40000097 | ||
152 | #define HV_X64_MSR_SINT8 0x40000098 | ||
153 | #define HV_X64_MSR_SINT9 0x40000099 | ||
154 | #define HV_X64_MSR_SINT10 0x4000009A | ||
155 | #define HV_X64_MSR_SINT11 0x4000009B | ||
156 | #define HV_X64_MSR_SINT12 0x4000009C | ||
157 | #define HV_X64_MSR_SINT13 0x4000009D | ||
158 | #define HV_X64_MSR_SINT14 0x4000009E | ||
159 | #define HV_X64_MSR_SINT15 0x4000009F | ||
160 | |||
161 | |||
162 | #define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 | ||
163 | #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 | ||
164 | #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ | ||
165 | (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) | ||
166 | |||
167 | /* Declare the various hypercall operations. */ | ||
168 | #define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008 | ||
169 | |||
170 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001 | ||
171 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12 | ||
172 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ | ||
173 | (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) | ||
174 | |||
175 | #define HV_PROCESSOR_POWER_STATE_C0 0 | ||
176 | #define HV_PROCESSOR_POWER_STATE_C1 1 | ||
177 | #define HV_PROCESSOR_POWER_STATE_C2 2 | ||
178 | #define HV_PROCESSOR_POWER_STATE_C3 3 | ||
179 | |||
180 | /* hypercall status code */ | ||
181 | #define HV_STATUS_SUCCESS 0 | ||
182 | #define HV_STATUS_INVALID_HYPERCALL_CODE 2 | ||
183 | #define HV_STATUS_INVALID_HYPERCALL_INPUT 3 | ||
184 | #define HV_STATUS_INVALID_ALIGNMENT 4 | ||
185 | |||
186 | #endif | ||
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 7c18e1230f54..7a6f54fa13ba 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -54,13 +54,23 @@ struct x86_emulate_ctxt; | |||
54 | struct x86_emulate_ops { | 54 | struct x86_emulate_ops { |
55 | /* | 55 | /* |
56 | * read_std: Read bytes of standard (non-emulated/special) memory. | 56 | * read_std: Read bytes of standard (non-emulated/special) memory. |
57 | * Used for instruction fetch, stack operations, and others. | 57 | * Used for descriptor reading. |
58 | * @addr: [IN ] Linear address from which to read. | 58 | * @addr: [IN ] Linear address from which to read. |
59 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. | 59 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. |
60 | * @bytes: [IN ] Number of bytes to read from memory. | 60 | * @bytes: [IN ] Number of bytes to read from memory. |
61 | */ | 61 | */ |
62 | int (*read_std)(unsigned long addr, void *val, | 62 | int (*read_std)(unsigned long addr, void *val, |
63 | unsigned int bytes, struct kvm_vcpu *vcpu); | 63 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); |
64 | |||
65 | /* | ||
66 | * fetch: Read bytes of standard (non-emulated/special) memory. | ||
67 | * Used for instruction fetch. | ||
68 | * @addr: [IN ] Linear address from which to read. | ||
69 | * @val: [OUT] Value read from memory, zero-extended to 'u_long'. | ||
70 | * @bytes: [IN ] Number of bytes to read from memory. | ||
71 | */ | ||
72 | int (*fetch)(unsigned long addr, void *val, | ||
73 | unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); | ||
64 | 74 | ||
65 | /* | 75 | /* |
66 | * read_emulated: Read bytes from emulated/special memory area. | 76 | * read_emulated: Read bytes from emulated/special memory area. |
@@ -74,7 +84,7 @@ struct x86_emulate_ops { | |||
74 | struct kvm_vcpu *vcpu); | 84 | struct kvm_vcpu *vcpu); |
75 | 85 | ||
76 | /* | 86 | /* |
77 | * write_emulated: Read bytes from emulated/special memory area. | 87 | * write_emulated: Write bytes to emulated/special memory area. |
78 | * @addr: [IN ] Linear address to which to write. | 88 | * @addr: [IN ] Linear address to which to write. |
79 | * @val: [IN ] Value to write to memory (low-order bytes used as | 89 | * @val: [IN ] Value to write to memory (low-order bytes used as |
80 | * required). | 90 | * required). |
@@ -168,6 +178,7 @@ struct x86_emulate_ctxt { | |||
168 | 178 | ||
169 | /* Execution mode, passed to the emulator. */ | 179 | /* Execution mode, passed to the emulator. */ |
170 | #define X86EMUL_MODE_REAL 0 /* Real mode. */ | 180 | #define X86EMUL_MODE_REAL 0 /* Real mode. */ |
181 | #define X86EMUL_MODE_VM86 1 /* Virtual 8086 mode. */ | ||
171 | #define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ | 182 | #define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ |
172 | #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ | 183 | #define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ |
173 | #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ | 184 | #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4f865e8b8540..06d9e79ca37d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <asm/mtrr.h> | 25 | #include <asm/mtrr.h> |
26 | #include <asm/msr-index.h> | 26 | #include <asm/msr-index.h> |
27 | 27 | ||
28 | #define KVM_MAX_VCPUS 16 | 28 | #define KVM_MAX_VCPUS 64 |
29 | #define KVM_MEMORY_SLOTS 32 | 29 | #define KVM_MEMORY_SLOTS 32 |
30 | /* memory slots that does not exposed to userspace */ | 30 | /* memory slots that does not exposed to userspace */ |
31 | #define KVM_PRIVATE_MEM_SLOTS 4 | 31 | #define KVM_PRIVATE_MEM_SLOTS 4 |
@@ -38,19 +38,6 @@ | |||
38 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ | 38 | #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \ |
39 | 0xFFFFFF0000000000ULL) | 39 | 0xFFFFFF0000000000ULL) |
40 | 40 | ||
41 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | ||
42 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | ||
43 | #define KVM_GUEST_CR0_MASK \ | ||
44 | (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
45 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \ | ||
46 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP) | ||
47 | #define KVM_VM_CR0_ALWAYS_ON \ | ||
48 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
49 | #define KVM_GUEST_CR4_MASK \ | ||
50 | (X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE) | ||
51 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) | ||
52 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) | ||
53 | |||
54 | #define INVALID_PAGE (~(hpa_t)0) | 41 | #define INVALID_PAGE (~(hpa_t)0) |
55 | #define UNMAPPED_GVA (~(gpa_t)0) | 42 | #define UNMAPPED_GVA (~(gpa_t)0) |
56 | 43 | ||
@@ -256,7 +243,8 @@ struct kvm_mmu { | |||
256 | void (*new_cr3)(struct kvm_vcpu *vcpu); | 243 | void (*new_cr3)(struct kvm_vcpu *vcpu); |
257 | int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); | 244 | int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); |
258 | void (*free)(struct kvm_vcpu *vcpu); | 245 | void (*free)(struct kvm_vcpu *vcpu); |
259 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); | 246 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, |
247 | u32 *error); | ||
260 | void (*prefetch_page)(struct kvm_vcpu *vcpu, | 248 | void (*prefetch_page)(struct kvm_vcpu *vcpu, |
261 | struct kvm_mmu_page *page); | 249 | struct kvm_mmu_page *page); |
262 | int (*sync_page)(struct kvm_vcpu *vcpu, | 250 | int (*sync_page)(struct kvm_vcpu *vcpu, |
@@ -282,13 +270,15 @@ struct kvm_vcpu_arch { | |||
282 | u32 regs_dirty; | 270 | u32 regs_dirty; |
283 | 271 | ||
284 | unsigned long cr0; | 272 | unsigned long cr0; |
273 | unsigned long cr0_guest_owned_bits; | ||
285 | unsigned long cr2; | 274 | unsigned long cr2; |
286 | unsigned long cr3; | 275 | unsigned long cr3; |
287 | unsigned long cr4; | 276 | unsigned long cr4; |
277 | unsigned long cr4_guest_owned_bits; | ||
288 | unsigned long cr8; | 278 | unsigned long cr8; |
289 | u32 hflags; | 279 | u32 hflags; |
290 | u64 pdptrs[4]; /* pae */ | 280 | u64 pdptrs[4]; /* pae */ |
291 | u64 shadow_efer; | 281 | u64 efer; |
292 | u64 apic_base; | 282 | u64 apic_base; |
293 | struct kvm_lapic *apic; /* kernel irqchip context */ | 283 | struct kvm_lapic *apic; /* kernel irqchip context */ |
294 | int32_t apic_arb_prio; | 284 | int32_t apic_arb_prio; |
@@ -374,17 +364,27 @@ struct kvm_vcpu_arch { | |||
374 | /* used for guest single stepping over the given code position */ | 364 | /* used for guest single stepping over the given code position */ |
375 | u16 singlestep_cs; | 365 | u16 singlestep_cs; |
376 | unsigned long singlestep_rip; | 366 | unsigned long singlestep_rip; |
367 | /* fields used by HYPER-V emulation */ | ||
368 | u64 hv_vapic; | ||
377 | }; | 369 | }; |
378 | 370 | ||
379 | struct kvm_mem_alias { | 371 | struct kvm_mem_alias { |
380 | gfn_t base_gfn; | 372 | gfn_t base_gfn; |
381 | unsigned long npages; | 373 | unsigned long npages; |
382 | gfn_t target_gfn; | 374 | gfn_t target_gfn; |
375 | #define KVM_ALIAS_INVALID 1UL | ||
376 | unsigned long flags; | ||
383 | }; | 377 | }; |
384 | 378 | ||
385 | struct kvm_arch{ | 379 | #define KVM_ARCH_HAS_UNALIAS_INSTANTIATION |
386 | int naliases; | 380 | |
381 | struct kvm_mem_aliases { | ||
387 | struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; | 382 | struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; |
383 | int naliases; | ||
384 | }; | ||
385 | |||
386 | struct kvm_arch { | ||
387 | struct kvm_mem_aliases *aliases; | ||
388 | 388 | ||
389 | unsigned int n_free_mmu_pages; | 389 | unsigned int n_free_mmu_pages; |
390 | unsigned int n_requested_mmu_pages; | 390 | unsigned int n_requested_mmu_pages; |
@@ -416,6 +416,10 @@ struct kvm_arch{ | |||
416 | s64 kvmclock_offset; | 416 | s64 kvmclock_offset; |
417 | 417 | ||
418 | struct kvm_xen_hvm_config xen_hvm_config; | 418 | struct kvm_xen_hvm_config xen_hvm_config; |
419 | |||
420 | /* fields used by HYPER-V emulation */ | ||
421 | u64 hv_guest_os_id; | ||
422 | u64 hv_hypercall; | ||
419 | }; | 423 | }; |
420 | 424 | ||
421 | struct kvm_vm_stat { | 425 | struct kvm_vm_stat { |
@@ -471,6 +475,7 @@ struct kvm_x86_ops { | |||
471 | int (*hardware_setup)(void); /* __init */ | 475 | int (*hardware_setup)(void); /* __init */ |
472 | void (*hardware_unsetup)(void); /* __exit */ | 476 | void (*hardware_unsetup)(void); /* __exit */ |
473 | bool (*cpu_has_accelerated_tpr)(void); | 477 | bool (*cpu_has_accelerated_tpr)(void); |
478 | void (*cpuid_update)(struct kvm_vcpu *vcpu); | ||
474 | 479 | ||
475 | /* Create, but do not attach this VCPU */ | 480 | /* Create, but do not attach this VCPU */ |
476 | struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); | 481 | struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); |
@@ -492,6 +497,7 @@ struct kvm_x86_ops { | |||
492 | void (*set_segment)(struct kvm_vcpu *vcpu, | 497 | void (*set_segment)(struct kvm_vcpu *vcpu, |
493 | struct kvm_segment *var, int seg); | 498 | struct kvm_segment *var, int seg); |
494 | void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); | 499 | void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); |
500 | void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); | ||
495 | void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); | 501 | void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); |
496 | void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); | 502 | void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); |
497 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); | 503 | void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
@@ -501,12 +507,13 @@ struct kvm_x86_ops { | |||
501 | void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 507 | void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); |
502 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 508 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); |
503 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); | 509 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); |
504 | unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr); | 510 | int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest); |
505 | void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value, | 511 | int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value); |
506 | int *exception); | ||
507 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); | 512 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
508 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 513 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
509 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 514 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); |
515 | void (*fpu_activate)(struct kvm_vcpu *vcpu); | ||
516 | void (*fpu_deactivate)(struct kvm_vcpu *vcpu); | ||
510 | 517 | ||
511 | void (*tlb_flush)(struct kvm_vcpu *vcpu); | 518 | void (*tlb_flush)(struct kvm_vcpu *vcpu); |
512 | 519 | ||
@@ -531,7 +538,8 @@ struct kvm_x86_ops { | |||
531 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 538 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
532 | int (*get_tdp_level)(void); | 539 | int (*get_tdp_level)(void); |
533 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); | 540 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
534 | bool (*gb_page_enable)(void); | 541 | int (*get_lpage_level)(void); |
542 | bool (*rdtscp_supported)(void); | ||
535 | 543 | ||
536 | const struct trace_print_flags *exit_reasons_str; | 544 | const struct trace_print_flags *exit_reasons_str; |
537 | }; | 545 | }; |
@@ -606,8 +614,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, | |||
606 | unsigned long value); | 614 | unsigned long value); |
607 | 615 | ||
608 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); | 616 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
609 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 617 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
610 | int type_bits, int seg); | ||
611 | 618 | ||
612 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); | 619 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); |
613 | 620 | ||
@@ -653,6 +660,10 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | |||
653 | int kvm_mmu_load(struct kvm_vcpu *vcpu); | 660 | int kvm_mmu_load(struct kvm_vcpu *vcpu); |
654 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); | 661 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); |
655 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); | 662 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); |
663 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); | ||
664 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); | ||
665 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); | ||
666 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); | ||
656 | 667 | ||
657 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); | 668 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); |
658 | 669 | ||
@@ -666,6 +677,7 @@ void kvm_disable_tdp(void); | |||
666 | 677 | ||
667 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); | 678 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); |
668 | int complete_pio(struct kvm_vcpu *vcpu); | 679 | int complete_pio(struct kvm_vcpu *vcpu); |
680 | bool kvm_check_iopl(struct kvm_vcpu *vcpu); | ||
669 | 681 | ||
670 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); | 682 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); |
671 | 683 | ||
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c584076a47f4..ffae1420e7d7 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _ASM_X86_KVM_PARA_H | 2 | #define _ASM_X86_KVM_PARA_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include <asm/hyperv.h> | ||
5 | 6 | ||
6 | /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It | 7 | /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It |
7 | * should be used to determine that a VM is running under KVM. | 8 | * should be used to determine that a VM is running under KVM. |
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 1fecb7e61130..38638cd2fa4c 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
@@ -313,7 +313,7 @@ struct __attribute__ ((__packed__)) vmcb { | |||
313 | 313 | ||
314 | #define SVM_EXIT_ERR -1 | 314 | #define SVM_EXIT_ERR -1 |
315 | 315 | ||
316 | #define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */ | 316 | #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) |
317 | 317 | ||
318 | #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" | 318 | #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" |
319 | #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8" | 319 | #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8" |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 2b4945419a84..fb9a080740ec 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -53,6 +53,7 @@ | |||
53 | */ | 53 | */ |
54 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 | 54 | #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 |
55 | #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 | 55 | #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 |
56 | #define SECONDARY_EXEC_RDTSCP 0x00000008 | ||
56 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 | 57 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 |
57 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | 58 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 |
58 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 | 59 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 |
@@ -251,6 +252,7 @@ enum vmcs_field { | |||
251 | #define EXIT_REASON_MSR_READ 31 | 252 | #define EXIT_REASON_MSR_READ 31 |
252 | #define EXIT_REASON_MSR_WRITE 32 | 253 | #define EXIT_REASON_MSR_WRITE 32 |
253 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 254 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
255 | #define EXIT_REASON_MONITOR_INSTRUCTION 39 | ||
254 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 | 256 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 |
255 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 | 257 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 |
256 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | 258 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 |
@@ -362,6 +364,7 @@ enum vmcs_field { | |||
362 | #define VMX_EPTP_UC_BIT (1ull << 8) | 364 | #define VMX_EPTP_UC_BIT (1ull << 8) |
363 | #define VMX_EPTP_WB_BIT (1ull << 14) | 365 | #define VMX_EPTP_WB_BIT (1ull << 14) |
364 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) | 366 | #define VMX_EPT_2MB_PAGE_BIT (1ull << 16) |
367 | #define VMX_EPT_1GB_PAGE_BIT (1ull << 17) | ||
365 | #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) | 368 | #define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24) |
366 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) | 369 | #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) |
367 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) | 370 | #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) |
@@ -374,7 +377,7 @@ enum vmcs_field { | |||
374 | #define VMX_EPT_READABLE_MASK 0x1ull | 377 | #define VMX_EPT_READABLE_MASK 0x1ull |
375 | #define VMX_EPT_WRITABLE_MASK 0x2ull | 378 | #define VMX_EPT_WRITABLE_MASK 0x2ull |
376 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull | 379 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull |
377 | #define VMX_EPT_IGMT_BIT (1ull << 6) | 380 | #define VMX_EPT_IPAT_BIT (1ull << 6) |
378 | 381 | ||
379 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul | 382 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul |
380 | 383 | ||
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 9055e5872ff0..1c0c6ab9c60f 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -301,7 +301,8 @@ static int __init vsyscall_init(void) | |||
301 | register_sysctl_table(kernel_root_table2); | 301 | register_sysctl_table(kernel_root_table2); |
302 | #endif | 302 | #endif |
303 | on_each_cpu(cpu_vsyscall_init, NULL, 1); | 303 | on_each_cpu(cpu_vsyscall_init, NULL, 1); |
304 | hotcpu_notifier(cpu_vsyscall_notifier, 0); | 304 | /* notifier priority > KVM */ |
305 | hotcpu_notifier(cpu_vsyscall_notifier, 30); | ||
305 | return 0; | 306 | return 0; |
306 | } | 307 | } |
307 | 308 | ||
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 3c4d0109ad20..970bbd479516 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -29,6 +29,7 @@ config KVM | |||
29 | select HAVE_KVM_EVENTFD | 29 | select HAVE_KVM_EVENTFD |
30 | select KVM_APIC_ARCHITECTURE | 30 | select KVM_APIC_ARCHITECTURE |
31 | select USER_RETURN_NOTIFIER | 31 | select USER_RETURN_NOTIFIER |
32 | select KVM_MMIO | ||
32 | ---help--- | 33 | ---help--- |
33 | Support hosting fully virtualized guest machines using hardware | 34 | Support hosting fully virtualized guest machines using hardware |
34 | virtualization extensions. You will need a fairly recent | 35 | virtualization extensions. You will need a fairly recent |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 7e8faea4651e..4dade6ac0827 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -32,7 +32,7 @@ | |||
32 | #include <linux/module.h> | 32 | #include <linux/module.h> |
33 | #include <asm/kvm_emulate.h> | 33 | #include <asm/kvm_emulate.h> |
34 | 34 | ||
35 | #include "mmu.h" /* for is_long_mode() */ | 35 | #include "x86.h" |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * Opcode effective-address decode tables. | 38 | * Opcode effective-address decode tables. |
@@ -76,6 +76,8 @@ | |||
76 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 76 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ |
77 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ | 77 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ |
78 | /* Misc flags */ | 78 | /* Misc flags */ |
79 | #define Lock (1<<26) /* lock prefix is allowed for the instruction */ | ||
80 | #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */ | ||
79 | #define No64 (1<<28) | 81 | #define No64 (1<<28) |
80 | /* Source 2 operand type */ | 82 | /* Source 2 operand type */ |
81 | #define Src2None (0<<29) | 83 | #define Src2None (0<<29) |
@@ -88,39 +90,40 @@ | |||
88 | enum { | 90 | enum { |
89 | Group1_80, Group1_81, Group1_82, Group1_83, | 91 | Group1_80, Group1_81, Group1_82, Group1_83, |
90 | Group1A, Group3_Byte, Group3, Group4, Group5, Group7, | 92 | Group1A, Group3_Byte, Group3, Group4, Group5, Group7, |
93 | Group8, Group9, | ||
91 | }; | 94 | }; |
92 | 95 | ||
93 | static u32 opcode_table[256] = { | 96 | static u32 opcode_table[256] = { |
94 | /* 0x00 - 0x07 */ | 97 | /* 0x00 - 0x07 */ |
95 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 98 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
96 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 99 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
97 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, | 100 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
98 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | 101 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, |
99 | /* 0x08 - 0x0F */ | 102 | /* 0x08 - 0x0F */ |
100 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 103 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
101 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 104 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
102 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, | 105 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
103 | ImplicitOps | Stack | No64, 0, | 106 | ImplicitOps | Stack | No64, 0, |
104 | /* 0x10 - 0x17 */ | 107 | /* 0x10 - 0x17 */ |
105 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 108 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
106 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 109 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
107 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, | 110 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
108 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | 111 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, |
109 | /* 0x18 - 0x1F */ | 112 | /* 0x18 - 0x1F */ |
110 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 113 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
111 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 114 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
112 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, | 115 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
113 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | 116 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, |
114 | /* 0x20 - 0x27 */ | 117 | /* 0x20 - 0x27 */ |
115 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 118 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
116 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 119 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
117 | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, | 120 | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, |
118 | /* 0x28 - 0x2F */ | 121 | /* 0x28 - 0x2F */ |
119 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 122 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
120 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 123 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
121 | 0, 0, 0, 0, | 124 | 0, 0, 0, 0, |
122 | /* 0x30 - 0x37 */ | 125 | /* 0x30 - 0x37 */ |
123 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 126 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
124 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 127 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
125 | 0, 0, 0, 0, | 128 | 0, 0, 0, 0, |
126 | /* 0x38 - 0x3F */ | 129 | /* 0x38 - 0x3F */ |
@@ -156,7 +159,7 @@ static u32 opcode_table[256] = { | |||
156 | Group | Group1_80, Group | Group1_81, | 159 | Group | Group1_80, Group | Group1_81, |
157 | Group | Group1_82, Group | Group1_83, | 160 | Group | Group1_82, Group | Group1_83, |
158 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 161 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
159 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 162 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
160 | /* 0x88 - 0x8F */ | 163 | /* 0x88 - 0x8F */ |
161 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, | 164 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, |
162 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, | 165 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, |
@@ -210,7 +213,7 @@ static u32 opcode_table[256] = { | |||
210 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 213 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
211 | /* 0xF0 - 0xF7 */ | 214 | /* 0xF0 - 0xF7 */ |
212 | 0, 0, 0, 0, | 215 | 0, 0, 0, 0, |
213 | ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3, | 216 | ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, |
214 | /* 0xF8 - 0xFF */ | 217 | /* 0xF8 - 0xFF */ |
215 | ImplicitOps, 0, ImplicitOps, ImplicitOps, | 218 | ImplicitOps, 0, ImplicitOps, ImplicitOps, |
216 | ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, | 219 | ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, |
@@ -218,16 +221,20 @@ static u32 opcode_table[256] = { | |||
218 | 221 | ||
219 | static u32 twobyte_table[256] = { | 222 | static u32 twobyte_table[256] = { |
220 | /* 0x00 - 0x0F */ | 223 | /* 0x00 - 0x0F */ |
221 | 0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0, | 224 | 0, Group | GroupDual | Group7, 0, 0, |
222 | ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, | 225 | 0, ImplicitOps, ImplicitOps | Priv, 0, |
226 | ImplicitOps | Priv, ImplicitOps | Priv, 0, 0, | ||
227 | 0, ImplicitOps | ModRM, 0, 0, | ||
223 | /* 0x10 - 0x1F */ | 228 | /* 0x10 - 0x1F */ |
224 | 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, | 229 | 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, |
225 | /* 0x20 - 0x2F */ | 230 | /* 0x20 - 0x2F */ |
226 | ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0, | 231 | ModRM | ImplicitOps | Priv, ModRM | Priv, |
232 | ModRM | ImplicitOps | Priv, ModRM | Priv, | ||
233 | 0, 0, 0, 0, | ||
227 | 0, 0, 0, 0, 0, 0, 0, 0, | 234 | 0, 0, 0, 0, 0, 0, 0, 0, |
228 | /* 0x30 - 0x3F */ | 235 | /* 0x30 - 0x3F */ |
229 | ImplicitOps, 0, ImplicitOps, 0, | 236 | ImplicitOps | Priv, 0, ImplicitOps | Priv, 0, |
230 | ImplicitOps, ImplicitOps, 0, 0, | 237 | ImplicitOps, ImplicitOps | Priv, 0, 0, |
231 | 0, 0, 0, 0, 0, 0, 0, 0, | 238 | 0, 0, 0, 0, 0, 0, 0, 0, |
232 | /* 0x40 - 0x47 */ | 239 | /* 0x40 - 0x47 */ |
233 | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, | 240 | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, |
@@ -257,21 +264,23 @@ static u32 twobyte_table[256] = { | |||
257 | DstMem | SrcReg | Src2CL | ModRM, 0, 0, | 264 | DstMem | SrcReg | Src2CL | ModRM, 0, 0, |
258 | /* 0xA8 - 0xAF */ | 265 | /* 0xA8 - 0xAF */ |
259 | ImplicitOps | Stack, ImplicitOps | Stack, | 266 | ImplicitOps | Stack, ImplicitOps | Stack, |
260 | 0, DstMem | SrcReg | ModRM | BitOp, | 267 | 0, DstMem | SrcReg | ModRM | BitOp | Lock, |
261 | DstMem | SrcReg | Src2ImmByte | ModRM, | 268 | DstMem | SrcReg | Src2ImmByte | ModRM, |
262 | DstMem | SrcReg | Src2CL | ModRM, | 269 | DstMem | SrcReg | Src2CL | ModRM, |
263 | ModRM, 0, | 270 | ModRM, 0, |
264 | /* 0xB0 - 0xB7 */ | 271 | /* 0xB0 - 0xB7 */ |
265 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, | 272 | ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock, |
266 | DstMem | SrcReg | ModRM | BitOp, | 273 | 0, DstMem | SrcReg | ModRM | BitOp | Lock, |
267 | 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, | 274 | 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, |
268 | DstReg | SrcMem16 | ModRM | Mov, | 275 | DstReg | SrcMem16 | ModRM | Mov, |
269 | /* 0xB8 - 0xBF */ | 276 | /* 0xB8 - 0xBF */ |
270 | 0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp, | 277 | 0, 0, |
278 | Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock, | ||
271 | 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, | 279 | 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, |
272 | DstReg | SrcMem16 | ModRM | Mov, | 280 | DstReg | SrcMem16 | ModRM | Mov, |
273 | /* 0xC0 - 0xCF */ | 281 | /* 0xC0 - 0xCF */ |
274 | 0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM, | 282 | 0, 0, 0, DstMem | SrcReg | ModRM | Mov, |
283 | 0, 0, 0, Group | GroupDual | Group9, | ||
275 | 0, 0, 0, 0, 0, 0, 0, 0, | 284 | 0, 0, 0, 0, 0, 0, 0, 0, |
276 | /* 0xD0 - 0xDF */ | 285 | /* 0xD0 - 0xDF */ |
277 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 286 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
@@ -283,25 +292,41 @@ static u32 twobyte_table[256] = { | |||
283 | 292 | ||
284 | static u32 group_table[] = { | 293 | static u32 group_table[] = { |
285 | [Group1_80*8] = | 294 | [Group1_80*8] = |
286 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 295 | ByteOp | DstMem | SrcImm | ModRM | Lock, |
287 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 296 | ByteOp | DstMem | SrcImm | ModRM | Lock, |
288 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 297 | ByteOp | DstMem | SrcImm | ModRM | Lock, |
289 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 298 | ByteOp | DstMem | SrcImm | ModRM | Lock, |
299 | ByteOp | DstMem | SrcImm | ModRM | Lock, | ||
300 | ByteOp | DstMem | SrcImm | ModRM | Lock, | ||
301 | ByteOp | DstMem | SrcImm | ModRM | Lock, | ||
302 | ByteOp | DstMem | SrcImm | ModRM, | ||
290 | [Group1_81*8] = | 303 | [Group1_81*8] = |
291 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | 304 | DstMem | SrcImm | ModRM | Lock, |
292 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | 305 | DstMem | SrcImm | ModRM | Lock, |
293 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | 306 | DstMem | SrcImm | ModRM | Lock, |
294 | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, | 307 | DstMem | SrcImm | ModRM | Lock, |
308 | DstMem | SrcImm | ModRM | Lock, | ||
309 | DstMem | SrcImm | ModRM | Lock, | ||
310 | DstMem | SrcImm | ModRM | Lock, | ||
311 | DstMem | SrcImm | ModRM, | ||
295 | [Group1_82*8] = | 312 | [Group1_82*8] = |
296 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 313 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, |
297 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 314 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, |
298 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 315 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, |
299 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 316 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, |
317 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, | ||
318 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, | ||
319 | ByteOp | DstMem | SrcImm | ModRM | No64 | Lock, | ||
320 | ByteOp | DstMem | SrcImm | ModRM | No64, | ||
300 | [Group1_83*8] = | 321 | [Group1_83*8] = |
301 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | 322 | DstMem | SrcImmByte | ModRM | Lock, |
302 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | 323 | DstMem | SrcImmByte | ModRM | Lock, |
303 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | 324 | DstMem | SrcImmByte | ModRM | Lock, |
304 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM, | 325 | DstMem | SrcImmByte | ModRM | Lock, |
326 | DstMem | SrcImmByte | ModRM | Lock, | ||
327 | DstMem | SrcImmByte | ModRM | Lock, | ||
328 | DstMem | SrcImmByte | ModRM | Lock, | ||
329 | DstMem | SrcImmByte | ModRM, | ||
305 | [Group1A*8] = | 330 | [Group1A*8] = |
306 | DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, | 331 | DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0, |
307 | [Group3_Byte*8] = | 332 | [Group3_Byte*8] = |
@@ -320,24 +345,39 @@ static u32 group_table[] = { | |||
320 | SrcMem | ModRM | Stack, 0, | 345 | SrcMem | ModRM | Stack, 0, |
321 | SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, | 346 | SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, |
322 | [Group7*8] = | 347 | [Group7*8] = |
323 | 0, 0, ModRM | SrcMem, ModRM | SrcMem, | 348 | 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, |
324 | SrcNone | ModRM | DstMem | Mov, 0, | 349 | SrcNone | ModRM | DstMem | Mov, 0, |
325 | SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, | 350 | SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv, |
351 | [Group8*8] = | ||
352 | 0, 0, 0, 0, | ||
353 | DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, | ||
354 | DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, | ||
355 | [Group9*8] = | ||
356 | 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0, | ||
326 | }; | 357 | }; |
327 | 358 | ||
328 | static u32 group2_table[] = { | 359 | static u32 group2_table[] = { |
329 | [Group7*8] = | 360 | [Group7*8] = |
330 | SrcNone | ModRM, 0, 0, SrcNone | ModRM, | 361 | SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, |
331 | SrcNone | ModRM | DstMem | Mov, 0, | 362 | SrcNone | ModRM | DstMem | Mov, 0, |
332 | SrcMem16 | ModRM | Mov, 0, | 363 | SrcMem16 | ModRM | Mov, 0, |
364 | [Group9*8] = | ||
365 | 0, 0, 0, 0, 0, 0, 0, 0, | ||
333 | }; | 366 | }; |
334 | 367 | ||
335 | /* EFLAGS bit definitions. */ | 368 | /* EFLAGS bit definitions. */ |
369 | #define EFLG_ID (1<<21) | ||
370 | #define EFLG_VIP (1<<20) | ||
371 | #define EFLG_VIF (1<<19) | ||
372 | #define EFLG_AC (1<<18) | ||
336 | #define EFLG_VM (1<<17) | 373 | #define EFLG_VM (1<<17) |
337 | #define EFLG_RF (1<<16) | 374 | #define EFLG_RF (1<<16) |
375 | #define EFLG_IOPL (3<<12) | ||
376 | #define EFLG_NT (1<<14) | ||
338 | #define EFLG_OF (1<<11) | 377 | #define EFLG_OF (1<<11) |
339 | #define EFLG_DF (1<<10) | 378 | #define EFLG_DF (1<<10) |
340 | #define EFLG_IF (1<<9) | 379 | #define EFLG_IF (1<<9) |
380 | #define EFLG_TF (1<<8) | ||
341 | #define EFLG_SF (1<<7) | 381 | #define EFLG_SF (1<<7) |
342 | #define EFLG_ZF (1<<6) | 382 | #define EFLG_ZF (1<<6) |
343 | #define EFLG_AF (1<<4) | 383 | #define EFLG_AF (1<<4) |
@@ -606,7 +646,7 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | |||
606 | 646 | ||
607 | if (linear < fc->start || linear >= fc->end) { | 647 | if (linear < fc->start || linear >= fc->end) { |
608 | size = min(15UL, PAGE_SIZE - offset_in_page(linear)); | 648 | size = min(15UL, PAGE_SIZE - offset_in_page(linear)); |
609 | rc = ops->read_std(linear, fc->data, size, ctxt->vcpu); | 649 | rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL); |
610 | if (rc) | 650 | if (rc) |
611 | return rc; | 651 | return rc; |
612 | fc->start = linear; | 652 | fc->start = linear; |
@@ -661,11 +701,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, | |||
661 | op_bytes = 3; | 701 | op_bytes = 3; |
662 | *address = 0; | 702 | *address = 0; |
663 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, | 703 | rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, |
664 | ctxt->vcpu); | 704 | ctxt->vcpu, NULL); |
665 | if (rc) | 705 | if (rc) |
666 | return rc; | 706 | return rc; |
667 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, | 707 | rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, |
668 | ctxt->vcpu); | 708 | ctxt->vcpu, NULL); |
669 | return rc; | 709 | return rc; |
670 | } | 710 | } |
671 | 711 | ||
@@ -889,6 +929,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
889 | 929 | ||
890 | switch (mode) { | 930 | switch (mode) { |
891 | case X86EMUL_MODE_REAL: | 931 | case X86EMUL_MODE_REAL: |
932 | case X86EMUL_MODE_VM86: | ||
892 | case X86EMUL_MODE_PROT16: | 933 | case X86EMUL_MODE_PROT16: |
893 | def_op_bytes = def_ad_bytes = 2; | 934 | def_op_bytes = def_ad_bytes = 2; |
894 | break; | 935 | break; |
@@ -975,7 +1016,7 @@ done_prefixes: | |||
975 | } | 1016 | } |
976 | 1017 | ||
977 | if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | 1018 | if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { |
978 | kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");; | 1019 | kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction"); |
979 | return -1; | 1020 | return -1; |
980 | } | 1021 | } |
981 | 1022 | ||
@@ -1196,13 +1237,56 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
1196 | rc = ops->read_emulated(register_address(c, ss_base(ctxt), | 1237 | rc = ops->read_emulated(register_address(c, ss_base(ctxt), |
1197 | c->regs[VCPU_REGS_RSP]), | 1238 | c->regs[VCPU_REGS_RSP]), |
1198 | dest, len, ctxt->vcpu); | 1239 | dest, len, ctxt->vcpu); |
1199 | if (rc != 0) | 1240 | if (rc != X86EMUL_CONTINUE) |
1200 | return rc; | 1241 | return rc; |
1201 | 1242 | ||
1202 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], len); | 1243 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], len); |
1203 | return rc; | 1244 | return rc; |
1204 | } | 1245 | } |
1205 | 1246 | ||
1247 | static int emulate_popf(struct x86_emulate_ctxt *ctxt, | ||
1248 | struct x86_emulate_ops *ops, | ||
1249 | void *dest, int len) | ||
1250 | { | ||
1251 | int rc; | ||
1252 | unsigned long val, change_mask; | ||
1253 | int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | ||
1254 | int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu); | ||
1255 | |||
1256 | rc = emulate_pop(ctxt, ops, &val, len); | ||
1257 | if (rc != X86EMUL_CONTINUE) | ||
1258 | return rc; | ||
1259 | |||
1260 | change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF | ||
1261 | | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID; | ||
1262 | |||
1263 | switch(ctxt->mode) { | ||
1264 | case X86EMUL_MODE_PROT64: | ||
1265 | case X86EMUL_MODE_PROT32: | ||
1266 | case X86EMUL_MODE_PROT16: | ||
1267 | if (cpl == 0) | ||
1268 | change_mask |= EFLG_IOPL; | ||
1269 | if (cpl <= iopl) | ||
1270 | change_mask |= EFLG_IF; | ||
1271 | break; | ||
1272 | case X86EMUL_MODE_VM86: | ||
1273 | if (iopl < 3) { | ||
1274 | kvm_inject_gp(ctxt->vcpu, 0); | ||
1275 | return X86EMUL_PROPAGATE_FAULT; | ||
1276 | } | ||
1277 | change_mask |= EFLG_IF; | ||
1278 | break; | ||
1279 | default: /* real mode */ | ||
1280 | change_mask |= (EFLG_IOPL | EFLG_IF); | ||
1281 | break; | ||
1282 | } | ||
1283 | |||
1284 | *(unsigned long *)dest = | ||
1285 | (ctxt->eflags & ~change_mask) | (val & change_mask); | ||
1286 | |||
1287 | return rc; | ||
1288 | } | ||
1289 | |||
1206 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) | 1290 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) |
1207 | { | 1291 | { |
1208 | struct decode_cache *c = &ctxt->decode; | 1292 | struct decode_cache *c = &ctxt->decode; |
@@ -1225,7 +1309,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | |||
1225 | if (rc != 0) | 1309 | if (rc != 0) |
1226 | return rc; | 1310 | return rc; |
1227 | 1311 | ||
1228 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg); | 1312 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg); |
1229 | return rc; | 1313 | return rc; |
1230 | } | 1314 | } |
1231 | 1315 | ||
@@ -1370,7 +1454,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | |||
1370 | int rc; | 1454 | int rc; |
1371 | 1455 | ||
1372 | rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); | 1456 | rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); |
1373 | if (rc != 0) | 1457 | if (rc != X86EMUL_CONTINUE) |
1374 | return rc; | 1458 | return rc; |
1375 | 1459 | ||
1376 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || | 1460 | if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || |
@@ -1385,7 +1469,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, | |||
1385 | (u32) c->regs[VCPU_REGS_RBX]; | 1469 | (u32) c->regs[VCPU_REGS_RBX]; |
1386 | 1470 | ||
1387 | rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); | 1471 | rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); |
1388 | if (rc != 0) | 1472 | if (rc != X86EMUL_CONTINUE) |
1389 | return rc; | 1473 | return rc; |
1390 | ctxt->eflags |= EFLG_ZF; | 1474 | ctxt->eflags |= EFLG_ZF; |
1391 | } | 1475 | } |
@@ -1407,7 +1491,7 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, | |||
1407 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); | 1491 | rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); |
1408 | if (rc) | 1492 | if (rc) |
1409 | return rc; | 1493 | return rc; |
1410 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS); | 1494 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS); |
1411 | return rc; | 1495 | return rc; |
1412 | } | 1496 | } |
1413 | 1497 | ||
@@ -1451,7 +1535,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1451 | &c->dst.val, | 1535 | &c->dst.val, |
1452 | c->dst.bytes, | 1536 | c->dst.bytes, |
1453 | ctxt->vcpu); | 1537 | ctxt->vcpu); |
1454 | if (rc != 0) | 1538 | if (rc != X86EMUL_CONTINUE) |
1455 | return rc; | 1539 | return rc; |
1456 | break; | 1540 | break; |
1457 | case OP_NONE: | 1541 | case OP_NONE: |
@@ -1514,9 +1598,8 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
1514 | u64 msr_data; | 1598 | u64 msr_data; |
1515 | 1599 | ||
1516 | /* syscall is not available in real mode */ | 1600 | /* syscall is not available in real mode */ |
1517 | if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL | 1601 | if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) |
1518 | || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) | 1602 | return X86EMUL_UNHANDLEABLE; |
1519 | return -1; | ||
1520 | 1603 | ||
1521 | setup_syscalls_segments(ctxt, &cs, &ss); | 1604 | setup_syscalls_segments(ctxt, &cs, &ss); |
1522 | 1605 | ||
@@ -1553,7 +1636,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) | |||
1553 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 1636 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); |
1554 | } | 1637 | } |
1555 | 1638 | ||
1556 | return 0; | 1639 | return X86EMUL_CONTINUE; |
1557 | } | 1640 | } |
1558 | 1641 | ||
1559 | static int | 1642 | static int |
@@ -1563,22 +1646,17 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1563 | struct kvm_segment cs, ss; | 1646 | struct kvm_segment cs, ss; |
1564 | u64 msr_data; | 1647 | u64 msr_data; |
1565 | 1648 | ||
1566 | /* inject #UD if LOCK prefix is used */ | 1649 | /* inject #GP if in real mode */ |
1567 | if (c->lock_prefix) | 1650 | if (ctxt->mode == X86EMUL_MODE_REAL) { |
1568 | return -1; | ||
1569 | |||
1570 | /* inject #GP if in real mode or paging is disabled */ | ||
1571 | if (ctxt->mode == X86EMUL_MODE_REAL || | ||
1572 | !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) { | ||
1573 | kvm_inject_gp(ctxt->vcpu, 0); | 1651 | kvm_inject_gp(ctxt->vcpu, 0); |
1574 | return -1; | 1652 | return X86EMUL_UNHANDLEABLE; |
1575 | } | 1653 | } |
1576 | 1654 | ||
1577 | /* XXX sysenter/sysexit have not been tested in 64bit mode. | 1655 | /* XXX sysenter/sysexit have not been tested in 64bit mode. |
1578 | * Therefore, we inject an #UD. | 1656 | * Therefore, we inject an #UD. |
1579 | */ | 1657 | */ |
1580 | if (ctxt->mode == X86EMUL_MODE_PROT64) | 1658 | if (ctxt->mode == X86EMUL_MODE_PROT64) |
1581 | return -1; | 1659 | return X86EMUL_UNHANDLEABLE; |
1582 | 1660 | ||
1583 | setup_syscalls_segments(ctxt, &cs, &ss); | 1661 | setup_syscalls_segments(ctxt, &cs, &ss); |
1584 | 1662 | ||
@@ -1587,13 +1665,13 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1587 | case X86EMUL_MODE_PROT32: | 1665 | case X86EMUL_MODE_PROT32: |
1588 | if ((msr_data & 0xfffc) == 0x0) { | 1666 | if ((msr_data & 0xfffc) == 0x0) { |
1589 | kvm_inject_gp(ctxt->vcpu, 0); | 1667 | kvm_inject_gp(ctxt->vcpu, 0); |
1590 | return -1; | 1668 | return X86EMUL_PROPAGATE_FAULT; |
1591 | } | 1669 | } |
1592 | break; | 1670 | break; |
1593 | case X86EMUL_MODE_PROT64: | 1671 | case X86EMUL_MODE_PROT64: |
1594 | if (msr_data == 0x0) { | 1672 | if (msr_data == 0x0) { |
1595 | kvm_inject_gp(ctxt->vcpu, 0); | 1673 | kvm_inject_gp(ctxt->vcpu, 0); |
1596 | return -1; | 1674 | return X86EMUL_PROPAGATE_FAULT; |
1597 | } | 1675 | } |
1598 | break; | 1676 | break; |
1599 | } | 1677 | } |
@@ -1618,7 +1696,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) | |||
1618 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); | 1696 | kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data); |
1619 | c->regs[VCPU_REGS_RSP] = msr_data; | 1697 | c->regs[VCPU_REGS_RSP] = msr_data; |
1620 | 1698 | ||
1621 | return 0; | 1699 | return X86EMUL_CONTINUE; |
1622 | } | 1700 | } |
1623 | 1701 | ||
1624 | static int | 1702 | static int |
@@ -1629,21 +1707,11 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1629 | u64 msr_data; | 1707 | u64 msr_data; |
1630 | int usermode; | 1708 | int usermode; |
1631 | 1709 | ||
1632 | /* inject #UD if LOCK prefix is used */ | 1710 | /* inject #GP if in real mode or Virtual 8086 mode */ |
1633 | if (c->lock_prefix) | 1711 | if (ctxt->mode == X86EMUL_MODE_REAL || |
1634 | return -1; | 1712 | ctxt->mode == X86EMUL_MODE_VM86) { |
1635 | |||
1636 | /* inject #GP if in real mode or paging is disabled */ | ||
1637 | if (ctxt->mode == X86EMUL_MODE_REAL | ||
1638 | || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) { | ||
1639 | kvm_inject_gp(ctxt->vcpu, 0); | ||
1640 | return -1; | ||
1641 | } | ||
1642 | |||
1643 | /* sysexit must be called from CPL 0 */ | ||
1644 | if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) { | ||
1645 | kvm_inject_gp(ctxt->vcpu, 0); | 1713 | kvm_inject_gp(ctxt->vcpu, 0); |
1646 | return -1; | 1714 | return X86EMUL_UNHANDLEABLE; |
1647 | } | 1715 | } |
1648 | 1716 | ||
1649 | setup_syscalls_segments(ctxt, &cs, &ss); | 1717 | setup_syscalls_segments(ctxt, &cs, &ss); |
@@ -1661,7 +1729,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1661 | cs.selector = (u16)(msr_data + 16); | 1729 | cs.selector = (u16)(msr_data + 16); |
1662 | if ((msr_data & 0xfffc) == 0x0) { | 1730 | if ((msr_data & 0xfffc) == 0x0) { |
1663 | kvm_inject_gp(ctxt->vcpu, 0); | 1731 | kvm_inject_gp(ctxt->vcpu, 0); |
1664 | return -1; | 1732 | return X86EMUL_PROPAGATE_FAULT; |
1665 | } | 1733 | } |
1666 | ss.selector = (u16)(msr_data + 24); | 1734 | ss.selector = (u16)(msr_data + 24); |
1667 | break; | 1735 | break; |
@@ -1669,7 +1737,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1669 | cs.selector = (u16)(msr_data + 32); | 1737 | cs.selector = (u16)(msr_data + 32); |
1670 | if (msr_data == 0x0) { | 1738 | if (msr_data == 0x0) { |
1671 | kvm_inject_gp(ctxt->vcpu, 0); | 1739 | kvm_inject_gp(ctxt->vcpu, 0); |
1672 | return -1; | 1740 | return X86EMUL_PROPAGATE_FAULT; |
1673 | } | 1741 | } |
1674 | ss.selector = cs.selector + 8; | 1742 | ss.selector = cs.selector + 8; |
1675 | cs.db = 0; | 1743 | cs.db = 0; |
@@ -1685,7 +1753,58 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) | |||
1685 | c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX]; | 1753 | c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX]; |
1686 | c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX]; | 1754 | c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX]; |
1687 | 1755 | ||
1688 | return 0; | 1756 | return X86EMUL_CONTINUE; |
1757 | } | ||
1758 | |||
1759 | static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) | ||
1760 | { | ||
1761 | int iopl; | ||
1762 | if (ctxt->mode == X86EMUL_MODE_REAL) | ||
1763 | return false; | ||
1764 | if (ctxt->mode == X86EMUL_MODE_VM86) | ||
1765 | return true; | ||
1766 | iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; | ||
1767 | return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl; | ||
1768 | } | ||
1769 | |||
1770 | static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | ||
1771 | struct x86_emulate_ops *ops, | ||
1772 | u16 port, u16 len) | ||
1773 | { | ||
1774 | struct kvm_segment tr_seg; | ||
1775 | int r; | ||
1776 | u16 io_bitmap_ptr; | ||
1777 | u8 perm, bit_idx = port & 0x7; | ||
1778 | unsigned mask = (1 << len) - 1; | ||
1779 | |||
1780 | kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR); | ||
1781 | if (tr_seg.unusable) | ||
1782 | return false; | ||
1783 | if (tr_seg.limit < 103) | ||
1784 | return false; | ||
1785 | r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, | ||
1786 | NULL); | ||
1787 | if (r != X86EMUL_CONTINUE) | ||
1788 | return false; | ||
1789 | if (io_bitmap_ptr + port/8 > tr_seg.limit) | ||
1790 | return false; | ||
1791 | r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1, | ||
1792 | ctxt->vcpu, NULL); | ||
1793 | if (r != X86EMUL_CONTINUE) | ||
1794 | return false; | ||
1795 | if ((perm >> bit_idx) & mask) | ||
1796 | return false; | ||
1797 | return true; | ||
1798 | } | ||
1799 | |||
1800 | static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, | ||
1801 | struct x86_emulate_ops *ops, | ||
1802 | u16 port, u16 len) | ||
1803 | { | ||
1804 | if (emulator_bad_iopl(ctxt)) | ||
1805 | if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) | ||
1806 | return false; | ||
1807 | return true; | ||
1689 | } | 1808 | } |
1690 | 1809 | ||
1691 | int | 1810 | int |
@@ -1709,6 +1828,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1709 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 1828 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
1710 | saved_eip = c->eip; | 1829 | saved_eip = c->eip; |
1711 | 1830 | ||
1831 | /* LOCK prefix is allowed only with some instructions */ | ||
1832 | if (c->lock_prefix && !(c->d & Lock)) { | ||
1833 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); | ||
1834 | goto done; | ||
1835 | } | ||
1836 | |||
1837 | /* Privileged instruction can be executed only in CPL=0 */ | ||
1838 | if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) { | ||
1839 | kvm_inject_gp(ctxt->vcpu, 0); | ||
1840 | goto done; | ||
1841 | } | ||
1842 | |||
1712 | if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) | 1843 | if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) |
1713 | memop = c->modrm_ea; | 1844 | memop = c->modrm_ea; |
1714 | 1845 | ||
@@ -1749,7 +1880,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1749 | &c->src.val, | 1880 | &c->src.val, |
1750 | c->src.bytes, | 1881 | c->src.bytes, |
1751 | ctxt->vcpu); | 1882 | ctxt->vcpu); |
1752 | if (rc != 0) | 1883 | if (rc != X86EMUL_CONTINUE) |
1753 | goto done; | 1884 | goto done; |
1754 | c->src.orig_val = c->src.val; | 1885 | c->src.orig_val = c->src.val; |
1755 | } | 1886 | } |
@@ -1768,12 +1899,15 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1768 | c->dst.ptr = (void *)c->dst.ptr + | 1899 | c->dst.ptr = (void *)c->dst.ptr + |
1769 | (c->src.val & mask) / 8; | 1900 | (c->src.val & mask) / 8; |
1770 | } | 1901 | } |
1771 | if (!(c->d & Mov) && | 1902 | if (!(c->d & Mov)) { |
1772 | /* optimisation - avoid slow emulated read */ | 1903 | /* optimisation - avoid slow emulated read */ |
1773 | ((rc = ops->read_emulated((unsigned long)c->dst.ptr, | 1904 | rc = ops->read_emulated((unsigned long)c->dst.ptr, |
1774 | &c->dst.val, | 1905 | &c->dst.val, |
1775 | c->dst.bytes, ctxt->vcpu)) != 0)) | 1906 | c->dst.bytes, |
1776 | goto done; | 1907 | ctxt->vcpu); |
1908 | if (rc != X86EMUL_CONTINUE) | ||
1909 | goto done; | ||
1910 | } | ||
1777 | } | 1911 | } |
1778 | c->dst.orig_val = c->dst.val; | 1912 | c->dst.orig_val = c->dst.val; |
1779 | 1913 | ||
@@ -1876,7 +2010,12 @@ special_insn: | |||
1876 | break; | 2010 | break; |
1877 | case 0x6c: /* insb */ | 2011 | case 0x6c: /* insb */ |
1878 | case 0x6d: /* insw/insd */ | 2012 | case 0x6d: /* insw/insd */ |
1879 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2013 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], |
2014 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
2015 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2016 | goto done; | ||
2017 | } | ||
2018 | if (kvm_emulate_pio_string(ctxt->vcpu, | ||
1880 | 1, | 2019 | 1, |
1881 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2020 | (c->d & ByteOp) ? 1 : c->op_bytes, |
1882 | c->rep_prefix ? | 2021 | c->rep_prefix ? |
@@ -1892,6 +2031,11 @@ special_insn: | |||
1892 | return 0; | 2031 | return 0; |
1893 | case 0x6e: /* outsb */ | 2032 | case 0x6e: /* outsb */ |
1894 | case 0x6f: /* outsw/outsd */ | 2033 | case 0x6f: /* outsw/outsd */ |
2034 | if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], | ||
2035 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
2036 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2037 | goto done; | ||
2038 | } | ||
1895 | if (kvm_emulate_pio_string(ctxt->vcpu, | 2039 | if (kvm_emulate_pio_string(ctxt->vcpu, |
1896 | 0, | 2040 | 0, |
1897 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2041 | (c->d & ByteOp) ? 1 : c->op_bytes, |
@@ -1978,25 +2122,19 @@ special_insn: | |||
1978 | break; | 2122 | break; |
1979 | case 0x8e: { /* mov seg, r/m16 */ | 2123 | case 0x8e: { /* mov seg, r/m16 */ |
1980 | uint16_t sel; | 2124 | uint16_t sel; |
1981 | int type_bits; | ||
1982 | int err; | ||
1983 | 2125 | ||
1984 | sel = c->src.val; | 2126 | sel = c->src.val; |
1985 | if (c->modrm_reg == VCPU_SREG_SS) | ||
1986 | toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); | ||
1987 | 2127 | ||
1988 | if (c->modrm_reg <= 5) { | 2128 | if (c->modrm_reg == VCPU_SREG_CS || |
1989 | type_bits = (c->modrm_reg == 1) ? 9 : 1; | 2129 | c->modrm_reg > VCPU_SREG_GS) { |
1990 | err = kvm_load_segment_descriptor(ctxt->vcpu, sel, | 2130 | kvm_queue_exception(ctxt->vcpu, UD_VECTOR); |
1991 | type_bits, c->modrm_reg); | 2131 | goto done; |
1992 | } else { | ||
1993 | printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n", | ||
1994 | c->modrm); | ||
1995 | goto cannot_emulate; | ||
1996 | } | 2132 | } |
1997 | 2133 | ||
1998 | if (err < 0) | 2134 | if (c->modrm_reg == VCPU_SREG_SS) |
1999 | goto cannot_emulate; | 2135 | toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); |
2136 | |||
2137 | rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); | ||
2000 | 2138 | ||
2001 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2139 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2002 | break; | 2140 | break; |
@@ -2025,7 +2163,10 @@ special_insn: | |||
2025 | c->dst.type = OP_REG; | 2163 | c->dst.type = OP_REG; |
2026 | c->dst.ptr = (unsigned long *) &ctxt->eflags; | 2164 | c->dst.ptr = (unsigned long *) &ctxt->eflags; |
2027 | c->dst.bytes = c->op_bytes; | 2165 | c->dst.bytes = c->op_bytes; |
2028 | goto pop_instruction; | 2166 | rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes); |
2167 | if (rc != X86EMUL_CONTINUE) | ||
2168 | goto done; | ||
2169 | break; | ||
2029 | case 0xa0 ... 0xa1: /* mov */ | 2170 | case 0xa0 ... 0xa1: /* mov */ |
2030 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | 2171 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; |
2031 | c->dst.val = c->src.val; | 2172 | c->dst.val = c->src.val; |
@@ -2039,11 +2180,12 @@ special_insn: | |||
2039 | c->dst.ptr = (unsigned long *)register_address(c, | 2180 | c->dst.ptr = (unsigned long *)register_address(c, |
2040 | es_base(ctxt), | 2181 | es_base(ctxt), |
2041 | c->regs[VCPU_REGS_RDI]); | 2182 | c->regs[VCPU_REGS_RDI]); |
2042 | if ((rc = ops->read_emulated(register_address(c, | 2183 | rc = ops->read_emulated(register_address(c, |
2043 | seg_override_base(ctxt, c), | 2184 | seg_override_base(ctxt, c), |
2044 | c->regs[VCPU_REGS_RSI]), | 2185 | c->regs[VCPU_REGS_RSI]), |
2045 | &c->dst.val, | 2186 | &c->dst.val, |
2046 | c->dst.bytes, ctxt->vcpu)) != 0) | 2187 | c->dst.bytes, ctxt->vcpu); |
2188 | if (rc != X86EMUL_CONTINUE) | ||
2047 | goto done; | 2189 | goto done; |
2048 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | 2190 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], |
2049 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | 2191 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes |
@@ -2058,10 +2200,11 @@ special_insn: | |||
2058 | c->src.ptr = (unsigned long *)register_address(c, | 2200 | c->src.ptr = (unsigned long *)register_address(c, |
2059 | seg_override_base(ctxt, c), | 2201 | seg_override_base(ctxt, c), |
2060 | c->regs[VCPU_REGS_RSI]); | 2202 | c->regs[VCPU_REGS_RSI]); |
2061 | if ((rc = ops->read_emulated((unsigned long)c->src.ptr, | 2203 | rc = ops->read_emulated((unsigned long)c->src.ptr, |
2062 | &c->src.val, | 2204 | &c->src.val, |
2063 | c->src.bytes, | 2205 | c->src.bytes, |
2064 | ctxt->vcpu)) != 0) | 2206 | ctxt->vcpu); |
2207 | if (rc != X86EMUL_CONTINUE) | ||
2065 | goto done; | 2208 | goto done; |
2066 | 2209 | ||
2067 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2210 | c->dst.type = OP_NONE; /* Disable writeback. */ |
@@ -2069,10 +2212,11 @@ special_insn: | |||
2069 | c->dst.ptr = (unsigned long *)register_address(c, | 2212 | c->dst.ptr = (unsigned long *)register_address(c, |
2070 | es_base(ctxt), | 2213 | es_base(ctxt), |
2071 | c->regs[VCPU_REGS_RDI]); | 2214 | c->regs[VCPU_REGS_RDI]); |
2072 | if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, | 2215 | rc = ops->read_emulated((unsigned long)c->dst.ptr, |
2073 | &c->dst.val, | 2216 | &c->dst.val, |
2074 | c->dst.bytes, | 2217 | c->dst.bytes, |
2075 | ctxt->vcpu)) != 0) | 2218 | ctxt->vcpu); |
2219 | if (rc != X86EMUL_CONTINUE) | ||
2076 | goto done; | 2220 | goto done; |
2077 | 2221 | ||
2078 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); | 2222 | DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); |
@@ -2102,12 +2246,13 @@ special_insn: | |||
2102 | c->dst.type = OP_REG; | 2246 | c->dst.type = OP_REG; |
2103 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 2247 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
2104 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | 2248 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; |
2105 | if ((rc = ops->read_emulated(register_address(c, | 2249 | rc = ops->read_emulated(register_address(c, |
2106 | seg_override_base(ctxt, c), | 2250 | seg_override_base(ctxt, c), |
2107 | c->regs[VCPU_REGS_RSI]), | 2251 | c->regs[VCPU_REGS_RSI]), |
2108 | &c->dst.val, | 2252 | &c->dst.val, |
2109 | c->dst.bytes, | 2253 | c->dst.bytes, |
2110 | ctxt->vcpu)) != 0) | 2254 | ctxt->vcpu); |
2255 | if (rc != X86EMUL_CONTINUE) | ||
2111 | goto done; | 2256 | goto done; |
2112 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], | 2257 | register_address_increment(c, &c->regs[VCPU_REGS_RSI], |
2113 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes | 2258 | (ctxt->eflags & EFLG_DF) ? -c->dst.bytes |
@@ -2163,11 +2308,9 @@ special_insn: | |||
2163 | case 0xe9: /* jmp rel */ | 2308 | case 0xe9: /* jmp rel */ |
2164 | goto jmp; | 2309 | goto jmp; |
2165 | case 0xea: /* jmp far */ | 2310 | case 0xea: /* jmp far */ |
2166 | if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9, | 2311 | if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, |
2167 | VCPU_SREG_CS) < 0) { | 2312 | VCPU_SREG_CS)) |
2168 | DPRINTF("jmp far: Failed to load CS descriptor\n"); | 2313 | goto done; |
2169 | goto cannot_emulate; | ||
2170 | } | ||
2171 | 2314 | ||
2172 | c->eip = c->src.val; | 2315 | c->eip = c->src.val; |
2173 | break; | 2316 | break; |
@@ -2185,7 +2328,13 @@ special_insn: | |||
2185 | case 0xef: /* out (e/r)ax,dx */ | 2328 | case 0xef: /* out (e/r)ax,dx */ |
2186 | port = c->regs[VCPU_REGS_RDX]; | 2329 | port = c->regs[VCPU_REGS_RDX]; |
2187 | io_dir_in = 0; | 2330 | io_dir_in = 0; |
2188 | do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, | 2331 | do_io: |
2332 | if (!emulator_io_permited(ctxt, ops, port, | ||
2333 | (c->d & ByteOp) ? 1 : c->op_bytes)) { | ||
2334 | kvm_inject_gp(ctxt->vcpu, 0); | ||
2335 | goto done; | ||
2336 | } | ||
2337 | if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, | ||
2189 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2338 | (c->d & ByteOp) ? 1 : c->op_bytes, |
2190 | port) != 0) { | 2339 | port) != 0) { |
2191 | c->eip = saved_eip; | 2340 | c->eip = saved_eip; |
@@ -2210,13 +2359,21 @@ special_insn: | |||
2210 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2359 | c->dst.type = OP_NONE; /* Disable writeback. */ |
2211 | break; | 2360 | break; |
2212 | case 0xfa: /* cli */ | 2361 | case 0xfa: /* cli */ |
2213 | ctxt->eflags &= ~X86_EFLAGS_IF; | 2362 | if (emulator_bad_iopl(ctxt)) |
2214 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2363 | kvm_inject_gp(ctxt->vcpu, 0); |
2364 | else { | ||
2365 | ctxt->eflags &= ~X86_EFLAGS_IF; | ||
2366 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
2367 | } | ||
2215 | break; | 2368 | break; |
2216 | case 0xfb: /* sti */ | 2369 | case 0xfb: /* sti */ |
2217 | toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); | 2370 | if (emulator_bad_iopl(ctxt)) |
2218 | ctxt->eflags |= X86_EFLAGS_IF; | 2371 | kvm_inject_gp(ctxt->vcpu, 0); |
2219 | c->dst.type = OP_NONE; /* Disable writeback. */ | 2372 | else { |
2373 | toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); | ||
2374 | ctxt->eflags |= X86_EFLAGS_IF; | ||
2375 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
2376 | } | ||
2220 | break; | 2377 | break; |
2221 | case 0xfc: /* cld */ | 2378 | case 0xfc: /* cld */ |
2222 | ctxt->eflags &= ~EFLG_DF; | 2379 | ctxt->eflags &= ~EFLG_DF; |
@@ -2319,8 +2476,9 @@ twobyte_insn: | |||
2319 | } | 2476 | } |
2320 | break; | 2477 | break; |
2321 | case 0x05: /* syscall */ | 2478 | case 0x05: /* syscall */ |
2322 | if (emulate_syscall(ctxt) == -1) | 2479 | rc = emulate_syscall(ctxt); |
2323 | goto cannot_emulate; | 2480 | if (rc != X86EMUL_CONTINUE) |
2481 | goto done; | ||
2324 | else | 2482 | else |
2325 | goto writeback; | 2483 | goto writeback; |
2326 | break; | 2484 | break; |
@@ -2391,14 +2549,16 @@ twobyte_insn: | |||
2391 | c->dst.type = OP_NONE; | 2549 | c->dst.type = OP_NONE; |
2392 | break; | 2550 | break; |
2393 | case 0x34: /* sysenter */ | 2551 | case 0x34: /* sysenter */ |
2394 | if (emulate_sysenter(ctxt) == -1) | 2552 | rc = emulate_sysenter(ctxt); |
2395 | goto cannot_emulate; | 2553 | if (rc != X86EMUL_CONTINUE) |
2554 | goto done; | ||
2396 | else | 2555 | else |
2397 | goto writeback; | 2556 | goto writeback; |
2398 | break; | 2557 | break; |
2399 | case 0x35: /* sysexit */ | 2558 | case 0x35: /* sysexit */ |
2400 | if (emulate_sysexit(ctxt) == -1) | 2559 | rc = emulate_sysexit(ctxt); |
2401 | goto cannot_emulate; | 2560 | if (rc != X86EMUL_CONTINUE) |
2561 | goto done; | ||
2402 | else | 2562 | else |
2403 | goto writeback; | 2563 | goto writeback; |
2404 | break; | 2564 | break; |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 15578f180e59..294698b6daff 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -242,11 +242,11 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
242 | { | 242 | { |
243 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, | 243 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, |
244 | irq_ack_notifier); | 244 | irq_ack_notifier); |
245 | spin_lock(&ps->inject_lock); | 245 | raw_spin_lock(&ps->inject_lock); |
246 | if (atomic_dec_return(&ps->pit_timer.pending) < 0) | 246 | if (atomic_dec_return(&ps->pit_timer.pending) < 0) |
247 | atomic_inc(&ps->pit_timer.pending); | 247 | atomic_inc(&ps->pit_timer.pending); |
248 | ps->irq_ack = 1; | 248 | ps->irq_ack = 1; |
249 | spin_unlock(&ps->inject_lock); | 249 | raw_spin_unlock(&ps->inject_lock); |
250 | } | 250 | } |
251 | 251 | ||
252 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | 252 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) |
@@ -605,7 +605,7 @@ static const struct kvm_io_device_ops speaker_dev_ops = { | |||
605 | .write = speaker_ioport_write, | 605 | .write = speaker_ioport_write, |
606 | }; | 606 | }; |
607 | 607 | ||
608 | /* Caller must have writers lock on slots_lock */ | 608 | /* Caller must hold slots_lock */ |
609 | struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | 609 | struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) |
610 | { | 610 | { |
611 | struct kvm_pit *pit; | 611 | struct kvm_pit *pit; |
@@ -624,7 +624,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
624 | 624 | ||
625 | mutex_init(&pit->pit_state.lock); | 625 | mutex_init(&pit->pit_state.lock); |
626 | mutex_lock(&pit->pit_state.lock); | 626 | mutex_lock(&pit->pit_state.lock); |
627 | spin_lock_init(&pit->pit_state.inject_lock); | 627 | raw_spin_lock_init(&pit->pit_state.inject_lock); |
628 | 628 | ||
629 | kvm->arch.vpit = pit; | 629 | kvm->arch.vpit = pit; |
630 | pit->kvm = kvm; | 630 | pit->kvm = kvm; |
@@ -645,13 +645,13 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
645 | kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); | 645 | kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); |
646 | 646 | ||
647 | kvm_iodevice_init(&pit->dev, &pit_dev_ops); | 647 | kvm_iodevice_init(&pit->dev, &pit_dev_ops); |
648 | ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev); | 648 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev); |
649 | if (ret < 0) | 649 | if (ret < 0) |
650 | goto fail; | 650 | goto fail; |
651 | 651 | ||
652 | if (flags & KVM_PIT_SPEAKER_DUMMY) { | 652 | if (flags & KVM_PIT_SPEAKER_DUMMY) { |
653 | kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); | 653 | kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); |
654 | ret = __kvm_io_bus_register_dev(&kvm->pio_bus, | 654 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, |
655 | &pit->speaker_dev); | 655 | &pit->speaker_dev); |
656 | if (ret < 0) | 656 | if (ret < 0) |
657 | goto fail_unregister; | 657 | goto fail_unregister; |
@@ -660,11 +660,12 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) | |||
660 | return pit; | 660 | return pit; |
661 | 661 | ||
662 | fail_unregister: | 662 | fail_unregister: |
663 | __kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev); | 663 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev); |
664 | 664 | ||
665 | fail: | 665 | fail: |
666 | if (pit->irq_source_id >= 0) | 666 | kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier); |
667 | kvm_free_irq_source_id(kvm, pit->irq_source_id); | 667 | kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); |
668 | kvm_free_irq_source_id(kvm, pit->irq_source_id); | ||
668 | 669 | ||
669 | kfree(pit); | 670 | kfree(pit); |
670 | return NULL; | 671 | return NULL; |
@@ -723,12 +724,12 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) | |||
723 | /* Try to inject pending interrupts when | 724 | /* Try to inject pending interrupts when |
724 | * last one has been acked. | 725 | * last one has been acked. |
725 | */ | 726 | */ |
726 | spin_lock(&ps->inject_lock); | 727 | raw_spin_lock(&ps->inject_lock); |
727 | if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { | 728 | if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { |
728 | ps->irq_ack = 0; | 729 | ps->irq_ack = 0; |
729 | inject = 1; | 730 | inject = 1; |
730 | } | 731 | } |
731 | spin_unlock(&ps->inject_lock); | 732 | raw_spin_unlock(&ps->inject_lock); |
732 | if (inject) | 733 | if (inject) |
733 | __inject_pit_timer_intr(kvm); | 734 | __inject_pit_timer_intr(kvm); |
734 | } | 735 | } |
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index d4c1c7ffdc09..900d6b0ba7c2 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
@@ -27,7 +27,7 @@ struct kvm_kpit_state { | |||
27 | u32 speaker_data_on; | 27 | u32 speaker_data_on; |
28 | struct mutex lock; | 28 | struct mutex lock; |
29 | struct kvm_pit *pit; | 29 | struct kvm_pit *pit; |
30 | spinlock_t inject_lock; | 30 | raw_spinlock_t inject_lock; |
31 | unsigned long irq_ack; | 31 | unsigned long irq_ack; |
32 | struct kvm_irq_ack_notifier irq_ack_notifier; | 32 | struct kvm_irq_ack_notifier irq_ack_notifier; |
33 | }; | 33 | }; |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index d057c0cbd245..07771da85de5 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -44,18 +44,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
44 | * Other interrupt may be delivered to PIC while lock is dropped but | 44 | * Other interrupt may be delivered to PIC while lock is dropped but |
45 | * it should be safe since PIC state is already updated at this stage. | 45 | * it should be safe since PIC state is already updated at this stage. |
46 | */ | 46 | */ |
47 | spin_unlock(&s->pics_state->lock); | 47 | raw_spin_unlock(&s->pics_state->lock); |
48 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); | 48 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); |
49 | spin_lock(&s->pics_state->lock); | 49 | raw_spin_lock(&s->pics_state->lock); |
50 | } | 50 | } |
51 | 51 | ||
52 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | 52 | void kvm_pic_clear_isr_ack(struct kvm *kvm) |
53 | { | 53 | { |
54 | struct kvm_pic *s = pic_irqchip(kvm); | 54 | struct kvm_pic *s = pic_irqchip(kvm); |
55 | spin_lock(&s->lock); | 55 | |
56 | raw_spin_lock(&s->lock); | ||
56 | s->pics[0].isr_ack = 0xff; | 57 | s->pics[0].isr_ack = 0xff; |
57 | s->pics[1].isr_ack = 0xff; | 58 | s->pics[1].isr_ack = 0xff; |
58 | spin_unlock(&s->lock); | 59 | raw_spin_unlock(&s->lock); |
59 | } | 60 | } |
60 | 61 | ||
61 | /* | 62 | /* |
@@ -156,9 +157,9 @@ static void pic_update_irq(struct kvm_pic *s) | |||
156 | 157 | ||
157 | void kvm_pic_update_irq(struct kvm_pic *s) | 158 | void kvm_pic_update_irq(struct kvm_pic *s) |
158 | { | 159 | { |
159 | spin_lock(&s->lock); | 160 | raw_spin_lock(&s->lock); |
160 | pic_update_irq(s); | 161 | pic_update_irq(s); |
161 | spin_unlock(&s->lock); | 162 | raw_spin_unlock(&s->lock); |
162 | } | 163 | } |
163 | 164 | ||
164 | int kvm_pic_set_irq(void *opaque, int irq, int level) | 165 | int kvm_pic_set_irq(void *opaque, int irq, int level) |
@@ -166,14 +167,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
166 | struct kvm_pic *s = opaque; | 167 | struct kvm_pic *s = opaque; |
167 | int ret = -1; | 168 | int ret = -1; |
168 | 169 | ||
169 | spin_lock(&s->lock); | 170 | raw_spin_lock(&s->lock); |
170 | if (irq >= 0 && irq < PIC_NUM_PINS) { | 171 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
171 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | 172 | ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); |
172 | pic_update_irq(s); | 173 | pic_update_irq(s); |
173 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, | 174 | trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, |
174 | s->pics[irq >> 3].imr, ret == 0); | 175 | s->pics[irq >> 3].imr, ret == 0); |
175 | } | 176 | } |
176 | spin_unlock(&s->lock); | 177 | raw_spin_unlock(&s->lock); |
177 | 178 | ||
178 | return ret; | 179 | return ret; |
179 | } | 180 | } |
@@ -203,7 +204,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
203 | int irq, irq2, intno; | 204 | int irq, irq2, intno; |
204 | struct kvm_pic *s = pic_irqchip(kvm); | 205 | struct kvm_pic *s = pic_irqchip(kvm); |
205 | 206 | ||
206 | spin_lock(&s->lock); | 207 | raw_spin_lock(&s->lock); |
207 | irq = pic_get_irq(&s->pics[0]); | 208 | irq = pic_get_irq(&s->pics[0]); |
208 | if (irq >= 0) { | 209 | if (irq >= 0) { |
209 | pic_intack(&s->pics[0], irq); | 210 | pic_intack(&s->pics[0], irq); |
@@ -228,7 +229,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
228 | intno = s->pics[0].irq_base + irq; | 229 | intno = s->pics[0].irq_base + irq; |
229 | } | 230 | } |
230 | pic_update_irq(s); | 231 | pic_update_irq(s); |
231 | spin_unlock(&s->lock); | 232 | raw_spin_unlock(&s->lock); |
232 | 233 | ||
233 | return intno; | 234 | return intno; |
234 | } | 235 | } |
@@ -442,7 +443,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
442 | printk(KERN_ERR "PIC: non byte write\n"); | 443 | printk(KERN_ERR "PIC: non byte write\n"); |
443 | return 0; | 444 | return 0; |
444 | } | 445 | } |
445 | spin_lock(&s->lock); | 446 | raw_spin_lock(&s->lock); |
446 | switch (addr) { | 447 | switch (addr) { |
447 | case 0x20: | 448 | case 0x20: |
448 | case 0x21: | 449 | case 0x21: |
@@ -455,7 +456,7 @@ static int picdev_write(struct kvm_io_device *this, | |||
455 | elcr_ioport_write(&s->pics[addr & 1], addr, data); | 456 | elcr_ioport_write(&s->pics[addr & 1], addr, data); |
456 | break; | 457 | break; |
457 | } | 458 | } |
458 | spin_unlock(&s->lock); | 459 | raw_spin_unlock(&s->lock); |
459 | return 0; | 460 | return 0; |
460 | } | 461 | } |
461 | 462 | ||
@@ -472,7 +473,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
472 | printk(KERN_ERR "PIC: non byte read\n"); | 473 | printk(KERN_ERR "PIC: non byte read\n"); |
473 | return 0; | 474 | return 0; |
474 | } | 475 | } |
475 | spin_lock(&s->lock); | 476 | raw_spin_lock(&s->lock); |
476 | switch (addr) { | 477 | switch (addr) { |
477 | case 0x20: | 478 | case 0x20: |
478 | case 0x21: | 479 | case 0x21: |
@@ -486,7 +487,7 @@ static int picdev_read(struct kvm_io_device *this, | |||
486 | break; | 487 | break; |
487 | } | 488 | } |
488 | *(unsigned char *)val = data; | 489 | *(unsigned char *)val = data; |
489 | spin_unlock(&s->lock); | 490 | raw_spin_unlock(&s->lock); |
490 | return 0; | 491 | return 0; |
491 | } | 492 | } |
492 | 493 | ||
@@ -520,7 +521,7 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
520 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); | 521 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); |
521 | if (!s) | 522 | if (!s) |
522 | return NULL; | 523 | return NULL; |
523 | spin_lock_init(&s->lock); | 524 | raw_spin_lock_init(&s->lock); |
524 | s->kvm = kvm; | 525 | s->kvm = kvm; |
525 | s->pics[0].elcr_mask = 0xf8; | 526 | s->pics[0].elcr_mask = 0xf8; |
526 | s->pics[1].elcr_mask = 0xde; | 527 | s->pics[1].elcr_mask = 0xde; |
@@ -533,7 +534,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
533 | * Initialize PIO device | 534 | * Initialize PIO device |
534 | */ | 535 | */ |
535 | kvm_iodevice_init(&s->dev, &picdev_ops); | 536 | kvm_iodevice_init(&s->dev, &picdev_ops); |
536 | ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev); | 537 | mutex_lock(&kvm->slots_lock); |
538 | ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev); | ||
539 | mutex_unlock(&kvm->slots_lock); | ||
537 | if (ret < 0) { | 540 | if (ret < 0) { |
538 | kfree(s); | 541 | kfree(s); |
539 | return NULL; | 542 | return NULL; |
@@ -541,3 +544,14 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
541 | 544 | ||
542 | return s; | 545 | return s; |
543 | } | 546 | } |
547 | |||
548 | void kvm_destroy_pic(struct kvm *kvm) | ||
549 | { | ||
550 | struct kvm_pic *vpic = kvm->arch.vpic; | ||
551 | |||
552 | if (vpic) { | ||
553 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev); | ||
554 | kvm->arch.vpic = NULL; | ||
555 | kfree(vpic); | ||
556 | } | ||
557 | } | ||
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index be399e207d57..34b15915754d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -62,7 +62,7 @@ struct kvm_kpic_state { | |||
62 | }; | 62 | }; |
63 | 63 | ||
64 | struct kvm_pic { | 64 | struct kvm_pic { |
65 | spinlock_t lock; | 65 | raw_spinlock_t lock; |
66 | unsigned pending_acks; | 66 | unsigned pending_acks; |
67 | struct kvm *kvm; | 67 | struct kvm *kvm; |
68 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 68 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
@@ -75,6 +75,7 @@ struct kvm_pic { | |||
75 | }; | 75 | }; |
76 | 76 | ||
77 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); | 77 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); |
78 | void kvm_destroy_pic(struct kvm *kvm); | ||
78 | int kvm_pic_read_irq(struct kvm *kvm); | 79 | int kvm_pic_read_irq(struct kvm *kvm); |
79 | void kvm_pic_update_irq(struct kvm_pic *s); | 80 | void kvm_pic_update_irq(struct kvm_pic *s); |
80 | void kvm_pic_clear_isr_ack(struct kvm *kvm); | 81 | void kvm_pic_clear_isr_ack(struct kvm *kvm); |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 7bcc5b6a4403..cff851cf5322 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
@@ -1,6 +1,11 @@ | |||
1 | #ifndef ASM_KVM_CACHE_REGS_H | 1 | #ifndef ASM_KVM_CACHE_REGS_H |
2 | #define ASM_KVM_CACHE_REGS_H | 2 | #define ASM_KVM_CACHE_REGS_H |
3 | 3 | ||
4 | #define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS | ||
5 | #define KVM_POSSIBLE_CR4_GUEST_BITS \ | ||
6 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
7 | | X86_CR4_OSXMMEXCPT | X86_CR4_PGE) | ||
8 | |||
4 | static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, | 9 | static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, |
5 | enum kvm_reg reg) | 10 | enum kvm_reg reg) |
6 | { | 11 | { |
@@ -38,4 +43,30 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) | |||
38 | return vcpu->arch.pdptrs[index]; | 43 | return vcpu->arch.pdptrs[index]; |
39 | } | 44 | } |
40 | 45 | ||
46 | static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) | ||
47 | { | ||
48 | ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; | ||
49 | if (tmask & vcpu->arch.cr0_guest_owned_bits) | ||
50 | kvm_x86_ops->decache_cr0_guest_bits(vcpu); | ||
51 | return vcpu->arch.cr0 & mask; | ||
52 | } | ||
53 | |||
54 | static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu) | ||
55 | { | ||
56 | return kvm_read_cr0_bits(vcpu, ~0UL); | ||
57 | } | ||
58 | |||
59 | static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) | ||
60 | { | ||
61 | ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS; | ||
62 | if (tmask & vcpu->arch.cr4_guest_owned_bits) | ||
63 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | ||
64 | return vcpu->arch.cr4 & mask; | ||
65 | } | ||
66 | |||
67 | static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) | ||
68 | { | ||
69 | return kvm_read_cr4_bits(vcpu, ~0UL); | ||
70 | } | ||
71 | |||
41 | #endif | 72 | #endif |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ba8c045da782..4b224f90087b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -1246,3 +1246,34 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) | |||
1246 | 1246 | ||
1247 | return 0; | 1247 | return 0; |
1248 | } | 1248 | } |
1249 | |||
1250 | int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) | ||
1251 | { | ||
1252 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1253 | |||
1254 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
1255 | return 1; | ||
1256 | |||
1257 | /* if this is ICR write vector before command */ | ||
1258 | if (reg == APIC_ICR) | ||
1259 | apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); | ||
1260 | return apic_reg_write(apic, reg, (u32)data); | ||
1261 | } | ||
1262 | |||
1263 | int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) | ||
1264 | { | ||
1265 | struct kvm_lapic *apic = vcpu->arch.apic; | ||
1266 | u32 low, high = 0; | ||
1267 | |||
1268 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
1269 | return 1; | ||
1270 | |||
1271 | if (apic_reg_read(apic, reg, 4, &low)) | ||
1272 | return 1; | ||
1273 | if (reg == APIC_ICR) | ||
1274 | apic_reg_read(apic, APIC_ICR2, 4, &high); | ||
1275 | |||
1276 | *data = (((u64)high) << 32) | low; | ||
1277 | |||
1278 | return 0; | ||
1279 | } | ||
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 40010b09c4aa..f5fe32c5edad 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -48,4 +48,12 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); | |||
48 | 48 | ||
49 | int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 49 | int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); |
50 | int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | 50 | int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); |
51 | |||
52 | int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data); | ||
53 | int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); | ||
54 | |||
55 | static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) | ||
56 | { | ||
57 | return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; | ||
58 | } | ||
51 | #endif | 59 | #endif |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 89a49fb46a27..741373e8ca77 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -18,6 +18,7 @@ | |||
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include "mmu.h" | 20 | #include "mmu.h" |
21 | #include "x86.h" | ||
21 | #include "kvm_cache_regs.h" | 22 | #include "kvm_cache_regs.h" |
22 | 23 | ||
23 | #include <linux/kvm_host.h> | 24 | #include <linux/kvm_host.h> |
@@ -29,6 +30,7 @@ | |||
29 | #include <linux/swap.h> | 30 | #include <linux/swap.h> |
30 | #include <linux/hugetlb.h> | 31 | #include <linux/hugetlb.h> |
31 | #include <linux/compiler.h> | 32 | #include <linux/compiler.h> |
33 | #include <linux/srcu.h> | ||
32 | 34 | ||
33 | #include <asm/page.h> | 35 | #include <asm/page.h> |
34 | #include <asm/cmpxchg.h> | 36 | #include <asm/cmpxchg.h> |
@@ -136,16 +138,6 @@ module_param(oos_shadow, bool, 0644); | |||
136 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ | 138 | #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ |
137 | | PT64_NX_MASK) | 139 | | PT64_NX_MASK) |
138 | 140 | ||
139 | #define PFERR_PRESENT_MASK (1U << 0) | ||
140 | #define PFERR_WRITE_MASK (1U << 1) | ||
141 | #define PFERR_USER_MASK (1U << 2) | ||
142 | #define PFERR_RSVD_MASK (1U << 3) | ||
143 | #define PFERR_FETCH_MASK (1U << 4) | ||
144 | |||
145 | #define PT_PDPE_LEVEL 3 | ||
146 | #define PT_DIRECTORY_LEVEL 2 | ||
147 | #define PT_PAGE_TABLE_LEVEL 1 | ||
148 | |||
149 | #define RMAP_EXT 4 | 141 | #define RMAP_EXT 4 |
150 | 142 | ||
151 | #define ACC_EXEC_MASK 1 | 143 | #define ACC_EXEC_MASK 1 |
@@ -153,6 +145,9 @@ module_param(oos_shadow, bool, 0644); | |||
153 | #define ACC_USER_MASK PT_USER_MASK | 145 | #define ACC_USER_MASK PT_USER_MASK |
154 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) | 146 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) |
155 | 147 | ||
148 | #include <trace/events/kvm.h> | ||
149 | |||
150 | #undef TRACE_INCLUDE_FILE | ||
156 | #define CREATE_TRACE_POINTS | 151 | #define CREATE_TRACE_POINTS |
157 | #include "mmutrace.h" | 152 | #include "mmutrace.h" |
158 | 153 | ||
@@ -229,7 +224,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | |||
229 | 224 | ||
230 | static int is_write_protection(struct kvm_vcpu *vcpu) | 225 | static int is_write_protection(struct kvm_vcpu *vcpu) |
231 | { | 226 | { |
232 | return vcpu->arch.cr0 & X86_CR0_WP; | 227 | return kvm_read_cr0_bits(vcpu, X86_CR0_WP); |
233 | } | 228 | } |
234 | 229 | ||
235 | static int is_cpuid_PSE36(void) | 230 | static int is_cpuid_PSE36(void) |
@@ -239,7 +234,7 @@ static int is_cpuid_PSE36(void) | |||
239 | 234 | ||
240 | static int is_nx(struct kvm_vcpu *vcpu) | 235 | static int is_nx(struct kvm_vcpu *vcpu) |
241 | { | 236 | { |
242 | return vcpu->arch.shadow_efer & EFER_NX; | 237 | return vcpu->arch.efer & EFER_NX; |
243 | } | 238 | } |
244 | 239 | ||
245 | static int is_shadow_present_pte(u64 pte) | 240 | static int is_shadow_present_pte(u64 pte) |
@@ -253,7 +248,7 @@ static int is_large_pte(u64 pte) | |||
253 | return pte & PT_PAGE_SIZE_MASK; | 248 | return pte & PT_PAGE_SIZE_MASK; |
254 | } | 249 | } |
255 | 250 | ||
256 | static int is_writeble_pte(unsigned long pte) | 251 | static int is_writable_pte(unsigned long pte) |
257 | { | 252 | { |
258 | return pte & PT_WRITABLE_MASK; | 253 | return pte & PT_WRITABLE_MASK; |
259 | } | 254 | } |
@@ -470,24 +465,10 @@ static int has_wrprotected_page(struct kvm *kvm, | |||
470 | 465 | ||
471 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | 466 | static int host_mapping_level(struct kvm *kvm, gfn_t gfn) |
472 | { | 467 | { |
473 | unsigned long page_size = PAGE_SIZE; | 468 | unsigned long page_size; |
474 | struct vm_area_struct *vma; | ||
475 | unsigned long addr; | ||
476 | int i, ret = 0; | 469 | int i, ret = 0; |
477 | 470 | ||
478 | addr = gfn_to_hva(kvm, gfn); | 471 | page_size = kvm_host_page_size(kvm, gfn); |
479 | if (kvm_is_error_hva(addr)) | ||
480 | return PT_PAGE_TABLE_LEVEL; | ||
481 | |||
482 | down_read(¤t->mm->mmap_sem); | ||
483 | vma = find_vma(current->mm, addr); | ||
484 | if (!vma) | ||
485 | goto out; | ||
486 | |||
487 | page_size = vma_kernel_pagesize(vma); | ||
488 | |||
489 | out: | ||
490 | up_read(¤t->mm->mmap_sem); | ||
491 | 472 | ||
492 | for (i = PT_PAGE_TABLE_LEVEL; | 473 | for (i = PT_PAGE_TABLE_LEVEL; |
493 | i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { | 474 | i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) { |
@@ -503,8 +484,7 @@ out: | |||
503 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 484 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) |
504 | { | 485 | { |
505 | struct kvm_memory_slot *slot; | 486 | struct kvm_memory_slot *slot; |
506 | int host_level; | 487 | int host_level, level, max_level; |
507 | int level = PT_PAGE_TABLE_LEVEL; | ||
508 | 488 | ||
509 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); | 489 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); |
510 | if (slot && slot->dirty_bitmap) | 490 | if (slot && slot->dirty_bitmap) |
@@ -515,7 +495,10 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | |||
515 | if (host_level == PT_PAGE_TABLE_LEVEL) | 495 | if (host_level == PT_PAGE_TABLE_LEVEL) |
516 | return host_level; | 496 | return host_level; |
517 | 497 | ||
518 | for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) | 498 | max_level = kvm_x86_ops->get_lpage_level() < host_level ? |
499 | kvm_x86_ops->get_lpage_level() : host_level; | ||
500 | |||
501 | for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) | ||
519 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) | 502 | if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) |
520 | break; | 503 | break; |
521 | 504 | ||
@@ -633,7 +616,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
633 | pfn = spte_to_pfn(*spte); | 616 | pfn = spte_to_pfn(*spte); |
634 | if (*spte & shadow_accessed_mask) | 617 | if (*spte & shadow_accessed_mask) |
635 | kvm_set_pfn_accessed(pfn); | 618 | kvm_set_pfn_accessed(pfn); |
636 | if (is_writeble_pte(*spte)) | 619 | if (is_writable_pte(*spte)) |
637 | kvm_set_pfn_dirty(pfn); | 620 | kvm_set_pfn_dirty(pfn); |
638 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); | 621 | rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level); |
639 | if (!*rmapp) { | 622 | if (!*rmapp) { |
@@ -662,6 +645,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) | |||
662 | prev_desc = desc; | 645 | prev_desc = desc; |
663 | desc = desc->more; | 646 | desc = desc->more; |
664 | } | 647 | } |
648 | pr_err("rmap_remove: %p %llx many->many\n", spte, *spte); | ||
665 | BUG(); | 649 | BUG(); |
666 | } | 650 | } |
667 | } | 651 | } |
@@ -708,7 +692,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
708 | BUG_ON(!spte); | 692 | BUG_ON(!spte); |
709 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 693 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
710 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); | 694 | rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte); |
711 | if (is_writeble_pte(*spte)) { | 695 | if (is_writable_pte(*spte)) { |
712 | __set_spte(spte, *spte & ~PT_WRITABLE_MASK); | 696 | __set_spte(spte, *spte & ~PT_WRITABLE_MASK); |
713 | write_protected = 1; | 697 | write_protected = 1; |
714 | } | 698 | } |
@@ -732,7 +716,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
732 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | 716 | BUG_ON(!(*spte & PT_PRESENT_MASK)); |
733 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); | 717 | BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)); |
734 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); | 718 | pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn); |
735 | if (is_writeble_pte(*spte)) { | 719 | if (is_writable_pte(*spte)) { |
736 | rmap_remove(kvm, spte); | 720 | rmap_remove(kvm, spte); |
737 | --kvm->stat.lpages; | 721 | --kvm->stat.lpages; |
738 | __set_spte(spte, shadow_trap_nonpresent_pte); | 722 | __set_spte(spte, shadow_trap_nonpresent_pte); |
@@ -787,7 +771,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
787 | 771 | ||
788 | new_spte &= ~PT_WRITABLE_MASK; | 772 | new_spte &= ~PT_WRITABLE_MASK; |
789 | new_spte &= ~SPTE_HOST_WRITEABLE; | 773 | new_spte &= ~SPTE_HOST_WRITEABLE; |
790 | if (is_writeble_pte(*spte)) | 774 | if (is_writable_pte(*spte)) |
791 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); | 775 | kvm_set_pfn_dirty(spte_to_pfn(*spte)); |
792 | __set_spte(spte, new_spte); | 776 | __set_spte(spte, new_spte); |
793 | spte = rmap_next(kvm, rmapp, spte); | 777 | spte = rmap_next(kvm, rmapp, spte); |
@@ -805,35 +789,32 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | |||
805 | unsigned long data)) | 789 | unsigned long data)) |
806 | { | 790 | { |
807 | int i, j; | 791 | int i, j; |
792 | int ret; | ||
808 | int retval = 0; | 793 | int retval = 0; |
794 | struct kvm_memslots *slots; | ||
809 | 795 | ||
810 | /* | 796 | slots = rcu_dereference(kvm->memslots); |
811 | * If mmap_sem isn't taken, we can look the memslots with only | 797 | |
812 | * the mmu_lock by skipping over the slots with userspace_addr == 0. | 798 | for (i = 0; i < slots->nmemslots; i++) { |
813 | */ | 799 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
814 | for (i = 0; i < kvm->nmemslots; i++) { | ||
815 | struct kvm_memory_slot *memslot = &kvm->memslots[i]; | ||
816 | unsigned long start = memslot->userspace_addr; | 800 | unsigned long start = memslot->userspace_addr; |
817 | unsigned long end; | 801 | unsigned long end; |
818 | 802 | ||
819 | /* mmu_lock protects userspace_addr */ | ||
820 | if (!start) | ||
821 | continue; | ||
822 | |||
823 | end = start + (memslot->npages << PAGE_SHIFT); | 803 | end = start + (memslot->npages << PAGE_SHIFT); |
824 | if (hva >= start && hva < end) { | 804 | if (hva >= start && hva < end) { |
825 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | 805 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; |
826 | 806 | ||
827 | retval |= handler(kvm, &memslot->rmap[gfn_offset], | 807 | ret = handler(kvm, &memslot->rmap[gfn_offset], data); |
828 | data); | ||
829 | 808 | ||
830 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { | 809 | for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { |
831 | int idx = gfn_offset; | 810 | int idx = gfn_offset; |
832 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); | 811 | idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j); |
833 | retval |= handler(kvm, | 812 | ret |= handler(kvm, |
834 | &memslot->lpage_info[j][idx].rmap_pde, | 813 | &memslot->lpage_info[j][idx].rmap_pde, |
835 | data); | 814 | data); |
836 | } | 815 | } |
816 | trace_kvm_age_page(hva, memslot, ret); | ||
817 | retval |= ret; | ||
837 | } | 818 | } |
838 | } | 819 | } |
839 | 820 | ||
@@ -856,9 +837,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, | |||
856 | u64 *spte; | 837 | u64 *spte; |
857 | int young = 0; | 838 | int young = 0; |
858 | 839 | ||
859 | /* always return old for EPT */ | 840 | /* |
841 | * Emulate the accessed bit for EPT, by checking if this page has | ||
842 | * an EPT mapping, and clearing it if it does. On the next access, | ||
843 | * a new EPT mapping will be established. | ||
844 | * This has some overhead, but not as much as the cost of swapping | ||
845 | * out actively used pages or breaking up actively used hugepages. | ||
846 | */ | ||
860 | if (!shadow_accessed_mask) | 847 | if (!shadow_accessed_mask) |
861 | return 0; | 848 | return kvm_unmap_rmapp(kvm, rmapp, data); |
862 | 849 | ||
863 | spte = rmap_next(kvm, rmapp, NULL); | 850 | spte = rmap_next(kvm, rmapp, NULL); |
864 | while (spte) { | 851 | while (spte) { |
@@ -1615,7 +1602,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) | |||
1615 | 1602 | ||
1616 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | 1603 | static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) |
1617 | { | 1604 | { |
1618 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); | 1605 | int slot = memslot_id(kvm, gfn); |
1619 | struct kvm_mmu_page *sp = page_header(__pa(pte)); | 1606 | struct kvm_mmu_page *sp = page_header(__pa(pte)); |
1620 | 1607 | ||
1621 | __set_bit(slot, sp->slot_bitmap); | 1608 | __set_bit(slot, sp->slot_bitmap); |
@@ -1639,7 +1626,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | |||
1639 | { | 1626 | { |
1640 | struct page *page; | 1627 | struct page *page; |
1641 | 1628 | ||
1642 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | 1629 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); |
1643 | 1630 | ||
1644 | if (gpa == UNMAPPED_GVA) | 1631 | if (gpa == UNMAPPED_GVA) |
1645 | return NULL; | 1632 | return NULL; |
@@ -1852,7 +1839,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1852 | * is responsibility of mmu_get_page / kvm_sync_page. | 1839 | * is responsibility of mmu_get_page / kvm_sync_page. |
1853 | * Same reasoning can be applied to dirty page accounting. | 1840 | * Same reasoning can be applied to dirty page accounting. |
1854 | */ | 1841 | */ |
1855 | if (!can_unsync && is_writeble_pte(*sptep)) | 1842 | if (!can_unsync && is_writable_pte(*sptep)) |
1856 | goto set_pte; | 1843 | goto set_pte; |
1857 | 1844 | ||
1858 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { | 1845 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { |
@@ -1860,7 +1847,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1860 | __func__, gfn); | 1847 | __func__, gfn); |
1861 | ret = 1; | 1848 | ret = 1; |
1862 | pte_access &= ~ACC_WRITE_MASK; | 1849 | pte_access &= ~ACC_WRITE_MASK; |
1863 | if (is_writeble_pte(spte)) | 1850 | if (is_writable_pte(spte)) |
1864 | spte &= ~PT_WRITABLE_MASK; | 1851 | spte &= ~PT_WRITABLE_MASK; |
1865 | } | 1852 | } |
1866 | } | 1853 | } |
@@ -1881,7 +1868,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1881 | bool reset_host_protection) | 1868 | bool reset_host_protection) |
1882 | { | 1869 | { |
1883 | int was_rmapped = 0; | 1870 | int was_rmapped = 0; |
1884 | int was_writeble = is_writeble_pte(*sptep); | 1871 | int was_writable = is_writable_pte(*sptep); |
1885 | int rmap_count; | 1872 | int rmap_count; |
1886 | 1873 | ||
1887 | pgprintk("%s: spte %llx access %x write_fault %d" | 1874 | pgprintk("%s: spte %llx access %x write_fault %d" |
@@ -1932,7 +1919,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, | |||
1932 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) | 1919 | if (rmap_count > RMAP_RECYCLE_THRESHOLD) |
1933 | rmap_recycle(vcpu, sptep, gfn); | 1920 | rmap_recycle(vcpu, sptep, gfn); |
1934 | } else { | 1921 | } else { |
1935 | if (was_writeble) | 1922 | if (was_writable) |
1936 | kvm_release_pfn_dirty(pfn); | 1923 | kvm_release_pfn_dirty(pfn); |
1937 | else | 1924 | else |
1938 | kvm_release_pfn_clean(pfn); | 1925 | kvm_release_pfn_clean(pfn); |
@@ -2162,8 +2149,11 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2162 | spin_unlock(&vcpu->kvm->mmu_lock); | 2149 | spin_unlock(&vcpu->kvm->mmu_lock); |
2163 | } | 2150 | } |
2164 | 2151 | ||
2165 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) | 2152 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr, |
2153 | u32 access, u32 *error) | ||
2166 | { | 2154 | { |
2155 | if (error) | ||
2156 | *error = 0; | ||
2167 | return vaddr; | 2157 | return vaddr; |
2168 | } | 2158 | } |
2169 | 2159 | ||
@@ -2747,7 +2737,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
2747 | if (tdp_enabled) | 2737 | if (tdp_enabled) |
2748 | return 0; | 2738 | return 0; |
2749 | 2739 | ||
2750 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); | 2740 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); |
2751 | 2741 | ||
2752 | spin_lock(&vcpu->kvm->mmu_lock); | 2742 | spin_lock(&vcpu->kvm->mmu_lock); |
2753 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 2743 | r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
@@ -2847,16 +2837,13 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) | |||
2847 | */ | 2837 | */ |
2848 | page = alloc_page(GFP_KERNEL | __GFP_DMA32); | 2838 | page = alloc_page(GFP_KERNEL | __GFP_DMA32); |
2849 | if (!page) | 2839 | if (!page) |
2850 | goto error_1; | 2840 | return -ENOMEM; |
2841 | |||
2851 | vcpu->arch.mmu.pae_root = page_address(page); | 2842 | vcpu->arch.mmu.pae_root = page_address(page); |
2852 | for (i = 0; i < 4; ++i) | 2843 | for (i = 0; i < 4; ++i) |
2853 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; | 2844 | vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; |
2854 | 2845 | ||
2855 | return 0; | 2846 | return 0; |
2856 | |||
2857 | error_1: | ||
2858 | free_mmu_pages(vcpu); | ||
2859 | return -ENOMEM; | ||
2860 | } | 2847 | } |
2861 | 2848 | ||
2862 | int kvm_mmu_create(struct kvm_vcpu *vcpu) | 2849 | int kvm_mmu_create(struct kvm_vcpu *vcpu) |
@@ -2936,10 +2923,9 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2936 | spin_lock(&kvm_lock); | 2923 | spin_lock(&kvm_lock); |
2937 | 2924 | ||
2938 | list_for_each_entry(kvm, &vm_list, vm_list) { | 2925 | list_for_each_entry(kvm, &vm_list, vm_list) { |
2939 | int npages; | 2926 | int npages, idx; |
2940 | 2927 | ||
2941 | if (!down_read_trylock(&kvm->slots_lock)) | 2928 | idx = srcu_read_lock(&kvm->srcu); |
2942 | continue; | ||
2943 | spin_lock(&kvm->mmu_lock); | 2929 | spin_lock(&kvm->mmu_lock); |
2944 | npages = kvm->arch.n_alloc_mmu_pages - | 2930 | npages = kvm->arch.n_alloc_mmu_pages - |
2945 | kvm->arch.n_free_mmu_pages; | 2931 | kvm->arch.n_free_mmu_pages; |
@@ -2952,7 +2938,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
2952 | nr_to_scan--; | 2938 | nr_to_scan--; |
2953 | 2939 | ||
2954 | spin_unlock(&kvm->mmu_lock); | 2940 | spin_unlock(&kvm->mmu_lock); |
2955 | up_read(&kvm->slots_lock); | 2941 | srcu_read_unlock(&kvm->srcu, idx); |
2956 | } | 2942 | } |
2957 | if (kvm_freed) | 2943 | if (kvm_freed) |
2958 | list_move_tail(&kvm_freed->vm_list, &vm_list); | 2944 | list_move_tail(&kvm_freed->vm_list, &vm_list); |
@@ -3019,9 +3005,11 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) | |||
3019 | int i; | 3005 | int i; |
3020 | unsigned int nr_mmu_pages; | 3006 | unsigned int nr_mmu_pages; |
3021 | unsigned int nr_pages = 0; | 3007 | unsigned int nr_pages = 0; |
3008 | struct kvm_memslots *slots; | ||
3022 | 3009 | ||
3023 | for (i = 0; i < kvm->nmemslots; i++) | 3010 | slots = rcu_dereference(kvm->memslots); |
3024 | nr_pages += kvm->memslots[i].npages; | 3011 | for (i = 0; i < slots->nmemslots; i++) |
3012 | nr_pages += slots->memslots[i].npages; | ||
3025 | 3013 | ||
3026 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; | 3014 | nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000; |
3027 | nr_mmu_pages = max(nr_mmu_pages, | 3015 | nr_mmu_pages = max(nr_mmu_pages, |
@@ -3246,7 +3234,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
3246 | if (is_shadow_present_pte(ent) && !is_last_spte(ent, level)) | 3234 | if (is_shadow_present_pte(ent) && !is_last_spte(ent, level)) |
3247 | audit_mappings_page(vcpu, ent, va, level - 1); | 3235 | audit_mappings_page(vcpu, ent, va, level - 1); |
3248 | else { | 3236 | else { |
3249 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); | 3237 | gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL); |
3250 | gfn_t gfn = gpa >> PAGE_SHIFT; | 3238 | gfn_t gfn = gpa >> PAGE_SHIFT; |
3251 | pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); | 3239 | pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); |
3252 | hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; | 3240 | hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; |
@@ -3291,10 +3279,12 @@ static void audit_mappings(struct kvm_vcpu *vcpu) | |||
3291 | static int count_rmaps(struct kvm_vcpu *vcpu) | 3279 | static int count_rmaps(struct kvm_vcpu *vcpu) |
3292 | { | 3280 | { |
3293 | int nmaps = 0; | 3281 | int nmaps = 0; |
3294 | int i, j, k; | 3282 | int i, j, k, idx; |
3295 | 3283 | ||
3284 | idx = srcu_read_lock(&kvm->srcu); | ||
3285 | slots = rcu_dereference(kvm->memslots); | ||
3296 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 3286 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
3297 | struct kvm_memory_slot *m = &vcpu->kvm->memslots[i]; | 3287 | struct kvm_memory_slot *m = &slots->memslots[i]; |
3298 | struct kvm_rmap_desc *d; | 3288 | struct kvm_rmap_desc *d; |
3299 | 3289 | ||
3300 | for (j = 0; j < m->npages; ++j) { | 3290 | for (j = 0; j < m->npages; ++j) { |
@@ -3317,6 +3307,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) | |||
3317 | } | 3307 | } |
3318 | } | 3308 | } |
3319 | } | 3309 | } |
3310 | srcu_read_unlock(&kvm->srcu, idx); | ||
3320 | return nmaps; | 3311 | return nmaps; |
3321 | } | 3312 | } |
3322 | 3313 | ||
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 61a1b3884b49..be66759321a5 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define __KVM_X86_MMU_H | 2 | #define __KVM_X86_MMU_H |
3 | 3 | ||
4 | #include <linux/kvm_host.h> | 4 | #include <linux/kvm_host.h> |
5 | #include "kvm_cache_regs.h" | ||
5 | 6 | ||
6 | #define PT64_PT_BITS 9 | 7 | #define PT64_PT_BITS 9 |
7 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) | 8 | #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) |
@@ -37,6 +38,16 @@ | |||
37 | #define PT32_ROOT_LEVEL 2 | 38 | #define PT32_ROOT_LEVEL 2 |
38 | #define PT32E_ROOT_LEVEL 3 | 39 | #define PT32E_ROOT_LEVEL 3 |
39 | 40 | ||
41 | #define PT_PDPE_LEVEL 3 | ||
42 | #define PT_DIRECTORY_LEVEL 2 | ||
43 | #define PT_PAGE_TABLE_LEVEL 1 | ||
44 | |||
45 | #define PFERR_PRESENT_MASK (1U << 0) | ||
46 | #define PFERR_WRITE_MASK (1U << 1) | ||
47 | #define PFERR_USER_MASK (1U << 2) | ||
48 | #define PFERR_RSVD_MASK (1U << 3) | ||
49 | #define PFERR_FETCH_MASK (1U << 4) | ||
50 | |||
40 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); | 51 | int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]); |
41 | 52 | ||
42 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 53 | static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
@@ -53,30 +64,6 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) | |||
53 | return kvm_mmu_load(vcpu); | 64 | return kvm_mmu_load(vcpu); |
54 | } | 65 | } |
55 | 66 | ||
56 | static inline int is_long_mode(struct kvm_vcpu *vcpu) | ||
57 | { | ||
58 | #ifdef CONFIG_X86_64 | ||
59 | return vcpu->arch.shadow_efer & EFER_LMA; | ||
60 | #else | ||
61 | return 0; | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | static inline int is_pae(struct kvm_vcpu *vcpu) | ||
66 | { | ||
67 | return vcpu->arch.cr4 & X86_CR4_PAE; | ||
68 | } | ||
69 | |||
70 | static inline int is_pse(struct kvm_vcpu *vcpu) | ||
71 | { | ||
72 | return vcpu->arch.cr4 & X86_CR4_PSE; | ||
73 | } | ||
74 | |||
75 | static inline int is_paging(struct kvm_vcpu *vcpu) | ||
76 | { | ||
77 | return vcpu->arch.cr0 & X86_CR0_PG; | ||
78 | } | ||
79 | |||
80 | static inline int is_present_gpte(unsigned long pte) | 67 | static inline int is_present_gpte(unsigned long pte) |
81 | { | 68 | { |
82 | return pte & PT_PRESENT_MASK; | 69 | return pte & PT_PRESENT_MASK; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index ede2131a9225..81eab9a50e6a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -162,7 +162,7 @@ walk: | |||
162 | if (rsvd_fault) | 162 | if (rsvd_fault) |
163 | goto access_error; | 163 | goto access_error; |
164 | 164 | ||
165 | if (write_fault && !is_writeble_pte(pte)) | 165 | if (write_fault && !is_writable_pte(pte)) |
166 | if (user_fault || is_write_protection(vcpu)) | 166 | if (user_fault || is_write_protection(vcpu)) |
167 | goto access_error; | 167 | goto access_error; |
168 | 168 | ||
@@ -490,18 +490,23 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
490 | spin_unlock(&vcpu->kvm->mmu_lock); | 490 | spin_unlock(&vcpu->kvm->mmu_lock); |
491 | } | 491 | } |
492 | 492 | ||
493 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | 493 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, |
494 | u32 *error) | ||
494 | { | 495 | { |
495 | struct guest_walker walker; | 496 | struct guest_walker walker; |
496 | gpa_t gpa = UNMAPPED_GVA; | 497 | gpa_t gpa = UNMAPPED_GVA; |
497 | int r; | 498 | int r; |
498 | 499 | ||
499 | r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0); | 500 | r = FNAME(walk_addr)(&walker, vcpu, vaddr, |
501 | !!(access & PFERR_WRITE_MASK), | ||
502 | !!(access & PFERR_USER_MASK), | ||
503 | !!(access & PFERR_FETCH_MASK)); | ||
500 | 504 | ||
501 | if (r) { | 505 | if (r) { |
502 | gpa = gfn_to_gpa(walker.gfn); | 506 | gpa = gfn_to_gpa(walker.gfn); |
503 | gpa |= vaddr & ~PAGE_MASK; | 507 | gpa |= vaddr & ~PAGE_MASK; |
504 | } | 508 | } else if (error) |
509 | *error = walker.error_code; | ||
505 | 510 | ||
506 | return gpa; | 511 | return gpa; |
507 | } | 512 | } |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1d9b33843c80..52f78dd03010 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -231,7 +231,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
231 | efer &= ~EFER_LME; | 231 | efer &= ~EFER_LME; |
232 | 232 | ||
233 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; | 233 | to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; |
234 | vcpu->arch.shadow_efer = efer; | 234 | vcpu->arch.efer = efer; |
235 | } | 235 | } |
236 | 236 | ||
237 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 237 | static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
@@ -540,6 +540,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
540 | struct vmcb_control_area *control = &svm->vmcb->control; | 540 | struct vmcb_control_area *control = &svm->vmcb->control; |
541 | struct vmcb_save_area *save = &svm->vmcb->save; | 541 | struct vmcb_save_area *save = &svm->vmcb->save; |
542 | 542 | ||
543 | svm->vcpu.fpu_active = 1; | ||
544 | |||
543 | control->intercept_cr_read = INTERCEPT_CR0_MASK | | 545 | control->intercept_cr_read = INTERCEPT_CR0_MASK | |
544 | INTERCEPT_CR3_MASK | | 546 | INTERCEPT_CR3_MASK | |
545 | INTERCEPT_CR4_MASK; | 547 | INTERCEPT_CR4_MASK; |
@@ -552,13 +554,19 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
552 | control->intercept_dr_read = INTERCEPT_DR0_MASK | | 554 | control->intercept_dr_read = INTERCEPT_DR0_MASK | |
553 | INTERCEPT_DR1_MASK | | 555 | INTERCEPT_DR1_MASK | |
554 | INTERCEPT_DR2_MASK | | 556 | INTERCEPT_DR2_MASK | |
555 | INTERCEPT_DR3_MASK; | 557 | INTERCEPT_DR3_MASK | |
558 | INTERCEPT_DR4_MASK | | ||
559 | INTERCEPT_DR5_MASK | | ||
560 | INTERCEPT_DR6_MASK | | ||
561 | INTERCEPT_DR7_MASK; | ||
556 | 562 | ||
557 | control->intercept_dr_write = INTERCEPT_DR0_MASK | | 563 | control->intercept_dr_write = INTERCEPT_DR0_MASK | |
558 | INTERCEPT_DR1_MASK | | 564 | INTERCEPT_DR1_MASK | |
559 | INTERCEPT_DR2_MASK | | 565 | INTERCEPT_DR2_MASK | |
560 | INTERCEPT_DR3_MASK | | 566 | INTERCEPT_DR3_MASK | |
567 | INTERCEPT_DR4_MASK | | ||
561 | INTERCEPT_DR5_MASK | | 568 | INTERCEPT_DR5_MASK | |
569 | INTERCEPT_DR6_MASK | | ||
562 | INTERCEPT_DR7_MASK; | 570 | INTERCEPT_DR7_MASK; |
563 | 571 | ||
564 | control->intercept_exceptions = (1 << PF_VECTOR) | | 572 | control->intercept_exceptions = (1 << PF_VECTOR) | |
@@ -569,6 +577,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
569 | control->intercept = (1ULL << INTERCEPT_INTR) | | 577 | control->intercept = (1ULL << INTERCEPT_INTR) | |
570 | (1ULL << INTERCEPT_NMI) | | 578 | (1ULL << INTERCEPT_NMI) | |
571 | (1ULL << INTERCEPT_SMI) | | 579 | (1ULL << INTERCEPT_SMI) | |
580 | (1ULL << INTERCEPT_SELECTIVE_CR0) | | ||
572 | (1ULL << INTERCEPT_CPUID) | | 581 | (1ULL << INTERCEPT_CPUID) | |
573 | (1ULL << INTERCEPT_INVD) | | 582 | (1ULL << INTERCEPT_INVD) | |
574 | (1ULL << INTERCEPT_HLT) | | 583 | (1ULL << INTERCEPT_HLT) | |
@@ -641,10 +650,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
641 | control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | | 650 | control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | |
642 | (1ULL << INTERCEPT_INVLPG)); | 651 | (1ULL << INTERCEPT_INVLPG)); |
643 | control->intercept_exceptions &= ~(1 << PF_VECTOR); | 652 | control->intercept_exceptions &= ~(1 << PF_VECTOR); |
644 | control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK| | 653 | control->intercept_cr_read &= ~INTERCEPT_CR3_MASK; |
645 | INTERCEPT_CR3_MASK); | 654 | control->intercept_cr_write &= ~INTERCEPT_CR3_MASK; |
646 | control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| | ||
647 | INTERCEPT_CR3_MASK); | ||
648 | save->g_pat = 0x0007040600070406ULL; | 655 | save->g_pat = 0x0007040600070406ULL; |
649 | save->cr3 = 0; | 656 | save->cr3 = 0; |
650 | save->cr4 = 0; | 657 | save->cr4 = 0; |
@@ -730,7 +737,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
730 | init_vmcb(svm); | 737 | init_vmcb(svm); |
731 | 738 | ||
732 | fx_init(&svm->vcpu); | 739 | fx_init(&svm->vcpu); |
733 | svm->vcpu.fpu_active = 1; | ||
734 | svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | 740 | svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; |
735 | if (kvm_vcpu_is_bsp(&svm->vcpu)) | 741 | if (kvm_vcpu_is_bsp(&svm->vcpu)) |
736 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; | 742 | svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; |
@@ -765,14 +771,16 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
765 | if (unlikely(cpu != vcpu->cpu)) { | 771 | if (unlikely(cpu != vcpu->cpu)) { |
766 | u64 delta; | 772 | u64 delta; |
767 | 773 | ||
768 | /* | 774 | if (check_tsc_unstable()) { |
769 | * Make sure that the guest sees a monotonically | 775 | /* |
770 | * increasing TSC. | 776 | * Make sure that the guest sees a monotonically |
771 | */ | 777 | * increasing TSC. |
772 | delta = vcpu->arch.host_tsc - native_read_tsc(); | 778 | */ |
773 | svm->vmcb->control.tsc_offset += delta; | 779 | delta = vcpu->arch.host_tsc - native_read_tsc(); |
774 | if (is_nested(svm)) | 780 | svm->vmcb->control.tsc_offset += delta; |
775 | svm->nested.hsave->control.tsc_offset += delta; | 781 | if (is_nested(svm)) |
782 | svm->nested.hsave->control.tsc_offset += delta; | ||
783 | } | ||
776 | vcpu->cpu = cpu; | 784 | vcpu->cpu = cpu; |
777 | kvm_migrate_timers(vcpu); | 785 | kvm_migrate_timers(vcpu); |
778 | svm->asid_generation = 0; | 786 | svm->asid_generation = 0; |
@@ -954,42 +962,59 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | |||
954 | svm->vmcb->save.gdtr.base = dt->base ; | 962 | svm->vmcb->save.gdtr.base = dt->base ; |
955 | } | 963 | } |
956 | 964 | ||
965 | static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | ||
966 | { | ||
967 | } | ||
968 | |||
957 | static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 969 | static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
958 | { | 970 | { |
959 | } | 971 | } |
960 | 972 | ||
973 | static void update_cr0_intercept(struct vcpu_svm *svm) | ||
974 | { | ||
975 | ulong gcr0 = svm->vcpu.arch.cr0; | ||
976 | u64 *hcr0 = &svm->vmcb->save.cr0; | ||
977 | |||
978 | if (!svm->vcpu.fpu_active) | ||
979 | *hcr0 |= SVM_CR0_SELECTIVE_MASK; | ||
980 | else | ||
981 | *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) | ||
982 | | (gcr0 & SVM_CR0_SELECTIVE_MASK); | ||
983 | |||
984 | |||
985 | if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { | ||
986 | svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; | ||
987 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; | ||
988 | } else { | ||
989 | svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; | ||
990 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; | ||
991 | } | ||
992 | } | ||
993 | |||
961 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 994 | static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
962 | { | 995 | { |
963 | struct vcpu_svm *svm = to_svm(vcpu); | 996 | struct vcpu_svm *svm = to_svm(vcpu); |
964 | 997 | ||
965 | #ifdef CONFIG_X86_64 | 998 | #ifdef CONFIG_X86_64 |
966 | if (vcpu->arch.shadow_efer & EFER_LME) { | 999 | if (vcpu->arch.efer & EFER_LME) { |
967 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 1000 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
968 | vcpu->arch.shadow_efer |= EFER_LMA; | 1001 | vcpu->arch.efer |= EFER_LMA; |
969 | svm->vmcb->save.efer |= EFER_LMA | EFER_LME; | 1002 | svm->vmcb->save.efer |= EFER_LMA | EFER_LME; |
970 | } | 1003 | } |
971 | 1004 | ||
972 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { | 1005 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { |
973 | vcpu->arch.shadow_efer &= ~EFER_LMA; | 1006 | vcpu->arch.efer &= ~EFER_LMA; |
974 | svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); | 1007 | svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); |
975 | } | 1008 | } |
976 | } | 1009 | } |
977 | #endif | 1010 | #endif |
978 | if (npt_enabled) | 1011 | vcpu->arch.cr0 = cr0; |
979 | goto set; | ||
980 | 1012 | ||
981 | if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { | 1013 | if (!npt_enabled) |
982 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1014 | cr0 |= X86_CR0_PG | X86_CR0_WP; |
983 | vcpu->fpu_active = 1; | ||
984 | } | ||
985 | 1015 | ||
986 | vcpu->arch.cr0 = cr0; | 1016 | if (!vcpu->fpu_active) |
987 | cr0 |= X86_CR0_PG | X86_CR0_WP; | ||
988 | if (!vcpu->fpu_active) { | ||
989 | svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); | ||
990 | cr0 |= X86_CR0_TS; | 1017 | cr0 |= X86_CR0_TS; |
991 | } | ||
992 | set: | ||
993 | /* | 1018 | /* |
994 | * re-enable caching here because the QEMU bios | 1019 | * re-enable caching here because the QEMU bios |
995 | * does not do it - this results in some delay at | 1020 | * does not do it - this results in some delay at |
@@ -997,6 +1022,7 @@ set: | |||
997 | */ | 1022 | */ |
998 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); | 1023 | cr0 &= ~(X86_CR0_CD | X86_CR0_NW); |
999 | svm->vmcb->save.cr0 = cr0; | 1024 | svm->vmcb->save.cr0 = cr0; |
1025 | update_cr0_intercept(svm); | ||
1000 | } | 1026 | } |
1001 | 1027 | ||
1002 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1028 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
@@ -1102,76 +1128,70 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | |||
1102 | svm->vmcb->control.asid = sd->next_asid++; | 1128 | svm->vmcb->control.asid = sd->next_asid++; |
1103 | } | 1129 | } |
1104 | 1130 | ||
1105 | static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) | 1131 | static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest) |
1106 | { | 1132 | { |
1107 | struct vcpu_svm *svm = to_svm(vcpu); | 1133 | struct vcpu_svm *svm = to_svm(vcpu); |
1108 | unsigned long val; | ||
1109 | 1134 | ||
1110 | switch (dr) { | 1135 | switch (dr) { |
1111 | case 0 ... 3: | 1136 | case 0 ... 3: |
1112 | val = vcpu->arch.db[dr]; | 1137 | *dest = vcpu->arch.db[dr]; |
1113 | break; | 1138 | break; |
1139 | case 4: | ||
1140 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1141 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1142 | /* fall through */ | ||
1114 | case 6: | 1143 | case 6: |
1115 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1144 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
1116 | val = vcpu->arch.dr6; | 1145 | *dest = vcpu->arch.dr6; |
1117 | else | 1146 | else |
1118 | val = svm->vmcb->save.dr6; | 1147 | *dest = svm->vmcb->save.dr6; |
1119 | break; | 1148 | break; |
1149 | case 5: | ||
1150 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1151 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1152 | /* fall through */ | ||
1120 | case 7: | 1153 | case 7: |
1121 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1154 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
1122 | val = vcpu->arch.dr7; | 1155 | *dest = vcpu->arch.dr7; |
1123 | else | 1156 | else |
1124 | val = svm->vmcb->save.dr7; | 1157 | *dest = svm->vmcb->save.dr7; |
1125 | break; | 1158 | break; |
1126 | default: | ||
1127 | val = 0; | ||
1128 | } | 1159 | } |
1129 | 1160 | ||
1130 | return val; | 1161 | return EMULATE_DONE; |
1131 | } | 1162 | } |
1132 | 1163 | ||
1133 | static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, | 1164 | static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value) |
1134 | int *exception) | ||
1135 | { | 1165 | { |
1136 | struct vcpu_svm *svm = to_svm(vcpu); | 1166 | struct vcpu_svm *svm = to_svm(vcpu); |
1137 | 1167 | ||
1138 | *exception = 0; | ||
1139 | |||
1140 | switch (dr) { | 1168 | switch (dr) { |
1141 | case 0 ... 3: | 1169 | case 0 ... 3: |
1142 | vcpu->arch.db[dr] = value; | 1170 | vcpu->arch.db[dr] = value; |
1143 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | 1171 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) |
1144 | vcpu->arch.eff_db[dr] = value; | 1172 | vcpu->arch.eff_db[dr] = value; |
1145 | return; | 1173 | break; |
1146 | case 4 ... 5: | 1174 | case 4: |
1147 | if (vcpu->arch.cr4 & X86_CR4_DE) | 1175 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
1148 | *exception = UD_VECTOR; | 1176 | return EMULATE_FAIL; /* will re-inject UD */ |
1149 | return; | 1177 | /* fall through */ |
1150 | case 6: | 1178 | case 6: |
1151 | if (value & 0xffffffff00000000ULL) { | ||
1152 | *exception = GP_VECTOR; | ||
1153 | return; | ||
1154 | } | ||
1155 | vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; | 1179 | vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; |
1156 | return; | 1180 | break; |
1181 | case 5: | ||
1182 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | ||
1183 | return EMULATE_FAIL; /* will re-inject UD */ | ||
1184 | /* fall through */ | ||
1157 | case 7: | 1185 | case 7: |
1158 | if (value & 0xffffffff00000000ULL) { | ||
1159 | *exception = GP_VECTOR; | ||
1160 | return; | ||
1161 | } | ||
1162 | vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; | 1186 | vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; |
1163 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | 1187 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { |
1164 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | 1188 | svm->vmcb->save.dr7 = vcpu->arch.dr7; |
1165 | vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); | 1189 | vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); |
1166 | } | 1190 | } |
1167 | return; | 1191 | break; |
1168 | default: | ||
1169 | /* FIXME: Possible case? */ | ||
1170 | printk(KERN_DEBUG "%s: unexpected dr %u\n", | ||
1171 | __func__, dr); | ||
1172 | *exception = UD_VECTOR; | ||
1173 | return; | ||
1174 | } | 1192 | } |
1193 | |||
1194 | return EMULATE_DONE; | ||
1175 | } | 1195 | } |
1176 | 1196 | ||
1177 | static int pf_interception(struct vcpu_svm *svm) | 1197 | static int pf_interception(struct vcpu_svm *svm) |
@@ -1239,13 +1259,17 @@ static int ud_interception(struct vcpu_svm *svm) | |||
1239 | return 1; | 1259 | return 1; |
1240 | } | 1260 | } |
1241 | 1261 | ||
1242 | static int nm_interception(struct vcpu_svm *svm) | 1262 | static void svm_fpu_activate(struct kvm_vcpu *vcpu) |
1243 | { | 1263 | { |
1264 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1244 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1265 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); |
1245 | if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) | ||
1246 | svm->vmcb->save.cr0 &= ~X86_CR0_TS; | ||
1247 | svm->vcpu.fpu_active = 1; | 1266 | svm->vcpu.fpu_active = 1; |
1267 | update_cr0_intercept(svm); | ||
1268 | } | ||
1248 | 1269 | ||
1270 | static int nm_interception(struct vcpu_svm *svm) | ||
1271 | { | ||
1272 | svm_fpu_activate(&svm->vcpu); | ||
1249 | return 1; | 1273 | return 1; |
1250 | } | 1274 | } |
1251 | 1275 | ||
@@ -1337,7 +1361,7 @@ static int vmmcall_interception(struct vcpu_svm *svm) | |||
1337 | 1361 | ||
1338 | static int nested_svm_check_permissions(struct vcpu_svm *svm) | 1362 | static int nested_svm_check_permissions(struct vcpu_svm *svm) |
1339 | { | 1363 | { |
1340 | if (!(svm->vcpu.arch.shadow_efer & EFER_SVME) | 1364 | if (!(svm->vcpu.arch.efer & EFER_SVME) |
1341 | || !is_paging(&svm->vcpu)) { | 1365 | || !is_paging(&svm->vcpu)) { |
1342 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 1366 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
1343 | return 1; | 1367 | return 1; |
@@ -1740,8 +1764,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
1740 | hsave->save.ds = vmcb->save.ds; | 1764 | hsave->save.ds = vmcb->save.ds; |
1741 | hsave->save.gdtr = vmcb->save.gdtr; | 1765 | hsave->save.gdtr = vmcb->save.gdtr; |
1742 | hsave->save.idtr = vmcb->save.idtr; | 1766 | hsave->save.idtr = vmcb->save.idtr; |
1743 | hsave->save.efer = svm->vcpu.arch.shadow_efer; | 1767 | hsave->save.efer = svm->vcpu.arch.efer; |
1744 | hsave->save.cr0 = svm->vcpu.arch.cr0; | 1768 | hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); |
1745 | hsave->save.cr4 = svm->vcpu.arch.cr4; | 1769 | hsave->save.cr4 = svm->vcpu.arch.cr4; |
1746 | hsave->save.rflags = vmcb->save.rflags; | 1770 | hsave->save.rflags = vmcb->save.rflags; |
1747 | hsave->save.rip = svm->next_rip; | 1771 | hsave->save.rip = svm->next_rip; |
@@ -2153,9 +2177,10 @@ static int rdmsr_interception(struct vcpu_svm *svm) | |||
2153 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 2177 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
2154 | u64 data; | 2178 | u64 data; |
2155 | 2179 | ||
2156 | if (svm_get_msr(&svm->vcpu, ecx, &data)) | 2180 | if (svm_get_msr(&svm->vcpu, ecx, &data)) { |
2181 | trace_kvm_msr_read_ex(ecx); | ||
2157 | kvm_inject_gp(&svm->vcpu, 0); | 2182 | kvm_inject_gp(&svm->vcpu, 0); |
2158 | else { | 2183 | } else { |
2159 | trace_kvm_msr_read(ecx, data); | 2184 | trace_kvm_msr_read(ecx, data); |
2160 | 2185 | ||
2161 | svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; | 2186 | svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; |
@@ -2247,13 +2272,15 @@ static int wrmsr_interception(struct vcpu_svm *svm) | |||
2247 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) | 2272 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) |
2248 | | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); | 2273 | | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); |
2249 | 2274 | ||
2250 | trace_kvm_msr_write(ecx, data); | ||
2251 | 2275 | ||
2252 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; | 2276 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
2253 | if (svm_set_msr(&svm->vcpu, ecx, data)) | 2277 | if (svm_set_msr(&svm->vcpu, ecx, data)) { |
2278 | trace_kvm_msr_write_ex(ecx, data); | ||
2254 | kvm_inject_gp(&svm->vcpu, 0); | 2279 | kvm_inject_gp(&svm->vcpu, 0); |
2255 | else | 2280 | } else { |
2281 | trace_kvm_msr_write(ecx, data); | ||
2256 | skip_emulated_instruction(&svm->vcpu); | 2282 | skip_emulated_instruction(&svm->vcpu); |
2283 | } | ||
2257 | return 1; | 2284 | return 1; |
2258 | } | 2285 | } |
2259 | 2286 | ||
@@ -2297,7 +2324,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2297 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 2324 | [SVM_EXIT_READ_CR3] = emulate_on_interception, |
2298 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 2325 | [SVM_EXIT_READ_CR4] = emulate_on_interception, |
2299 | [SVM_EXIT_READ_CR8] = emulate_on_interception, | 2326 | [SVM_EXIT_READ_CR8] = emulate_on_interception, |
2300 | /* for now: */ | 2327 | [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, |
2301 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, | 2328 | [SVM_EXIT_WRITE_CR0] = emulate_on_interception, |
2302 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, | 2329 | [SVM_EXIT_WRITE_CR3] = emulate_on_interception, |
2303 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, | 2330 | [SVM_EXIT_WRITE_CR4] = emulate_on_interception, |
@@ -2306,11 +2333,17 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | |||
2306 | [SVM_EXIT_READ_DR1] = emulate_on_interception, | 2333 | [SVM_EXIT_READ_DR1] = emulate_on_interception, |
2307 | [SVM_EXIT_READ_DR2] = emulate_on_interception, | 2334 | [SVM_EXIT_READ_DR2] = emulate_on_interception, |
2308 | [SVM_EXIT_READ_DR3] = emulate_on_interception, | 2335 | [SVM_EXIT_READ_DR3] = emulate_on_interception, |
2336 | [SVM_EXIT_READ_DR4] = emulate_on_interception, | ||
2337 | [SVM_EXIT_READ_DR5] = emulate_on_interception, | ||
2338 | [SVM_EXIT_READ_DR6] = emulate_on_interception, | ||
2339 | [SVM_EXIT_READ_DR7] = emulate_on_interception, | ||
2309 | [SVM_EXIT_WRITE_DR0] = emulate_on_interception, | 2340 | [SVM_EXIT_WRITE_DR0] = emulate_on_interception, |
2310 | [SVM_EXIT_WRITE_DR1] = emulate_on_interception, | 2341 | [SVM_EXIT_WRITE_DR1] = emulate_on_interception, |
2311 | [SVM_EXIT_WRITE_DR2] = emulate_on_interception, | 2342 | [SVM_EXIT_WRITE_DR2] = emulate_on_interception, |
2312 | [SVM_EXIT_WRITE_DR3] = emulate_on_interception, | 2343 | [SVM_EXIT_WRITE_DR3] = emulate_on_interception, |
2344 | [SVM_EXIT_WRITE_DR4] = emulate_on_interception, | ||
2313 | [SVM_EXIT_WRITE_DR5] = emulate_on_interception, | 2345 | [SVM_EXIT_WRITE_DR5] = emulate_on_interception, |
2346 | [SVM_EXIT_WRITE_DR6] = emulate_on_interception, | ||
2314 | [SVM_EXIT_WRITE_DR7] = emulate_on_interception, | 2347 | [SVM_EXIT_WRITE_DR7] = emulate_on_interception, |
2315 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, | 2348 | [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, |
2316 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, | 2349 | [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, |
@@ -2383,20 +2416,10 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
2383 | 2416 | ||
2384 | svm_complete_interrupts(svm); | 2417 | svm_complete_interrupts(svm); |
2385 | 2418 | ||
2386 | if (npt_enabled) { | 2419 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK)) |
2387 | int mmu_reload = 0; | ||
2388 | if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { | ||
2389 | svm_set_cr0(vcpu, svm->vmcb->save.cr0); | ||
2390 | mmu_reload = 1; | ||
2391 | } | ||
2392 | vcpu->arch.cr0 = svm->vmcb->save.cr0; | 2420 | vcpu->arch.cr0 = svm->vmcb->save.cr0; |
2421 | if (npt_enabled) | ||
2393 | vcpu->arch.cr3 = svm->vmcb->save.cr3; | 2422 | vcpu->arch.cr3 = svm->vmcb->save.cr3; |
2394 | if (mmu_reload) { | ||
2395 | kvm_mmu_reset_context(vcpu); | ||
2396 | kvm_mmu_load(vcpu); | ||
2397 | } | ||
2398 | } | ||
2399 | |||
2400 | 2423 | ||
2401 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { | 2424 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { |
2402 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2425 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
@@ -2798,12 +2821,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) | |||
2798 | 2821 | ||
2799 | svm->vmcb->save.cr3 = root; | 2822 | svm->vmcb->save.cr3 = root; |
2800 | force_new_asid(vcpu); | 2823 | force_new_asid(vcpu); |
2801 | |||
2802 | if (vcpu->fpu_active) { | ||
2803 | svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); | ||
2804 | svm->vmcb->save.cr0 |= X86_CR0_TS; | ||
2805 | vcpu->fpu_active = 0; | ||
2806 | } | ||
2807 | } | 2824 | } |
2808 | 2825 | ||
2809 | static int is_disabled(void) | 2826 | static int is_disabled(void) |
@@ -2852,6 +2869,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
2852 | return 0; | 2869 | return 0; |
2853 | } | 2870 | } |
2854 | 2871 | ||
2872 | static void svm_cpuid_update(struct kvm_vcpu *vcpu) | ||
2873 | { | ||
2874 | } | ||
2875 | |||
2855 | static const struct trace_print_flags svm_exit_reasons_str[] = { | 2876 | static const struct trace_print_flags svm_exit_reasons_str[] = { |
2856 | { SVM_EXIT_READ_CR0, "read_cr0" }, | 2877 | { SVM_EXIT_READ_CR0, "read_cr0" }, |
2857 | { SVM_EXIT_READ_CR3, "read_cr3" }, | 2878 | { SVM_EXIT_READ_CR3, "read_cr3" }, |
@@ -2905,9 +2926,22 @@ static const struct trace_print_flags svm_exit_reasons_str[] = { | |||
2905 | { -1, NULL } | 2926 | { -1, NULL } |
2906 | }; | 2927 | }; |
2907 | 2928 | ||
2908 | static bool svm_gb_page_enable(void) | 2929 | static int svm_get_lpage_level(void) |
2909 | { | 2930 | { |
2910 | return true; | 2931 | return PT_PDPE_LEVEL; |
2932 | } | ||
2933 | |||
2934 | static bool svm_rdtscp_supported(void) | ||
2935 | { | ||
2936 | return false; | ||
2937 | } | ||
2938 | |||
2939 | static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) | ||
2940 | { | ||
2941 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2942 | |||
2943 | update_cr0_intercept(svm); | ||
2944 | svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; | ||
2911 | } | 2945 | } |
2912 | 2946 | ||
2913 | static struct kvm_x86_ops svm_x86_ops = { | 2947 | static struct kvm_x86_ops svm_x86_ops = { |
@@ -2936,6 +2970,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2936 | .set_segment = svm_set_segment, | 2970 | .set_segment = svm_set_segment, |
2937 | .get_cpl = svm_get_cpl, | 2971 | .get_cpl = svm_get_cpl, |
2938 | .get_cs_db_l_bits = kvm_get_cs_db_l_bits, | 2972 | .get_cs_db_l_bits = kvm_get_cs_db_l_bits, |
2973 | .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, | ||
2939 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, | 2974 | .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, |
2940 | .set_cr0 = svm_set_cr0, | 2975 | .set_cr0 = svm_set_cr0, |
2941 | .set_cr3 = svm_set_cr3, | 2976 | .set_cr3 = svm_set_cr3, |
@@ -2950,6 +2985,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2950 | .cache_reg = svm_cache_reg, | 2985 | .cache_reg = svm_cache_reg, |
2951 | .get_rflags = svm_get_rflags, | 2986 | .get_rflags = svm_get_rflags, |
2952 | .set_rflags = svm_set_rflags, | 2987 | .set_rflags = svm_set_rflags, |
2988 | .fpu_activate = svm_fpu_activate, | ||
2989 | .fpu_deactivate = svm_fpu_deactivate, | ||
2953 | 2990 | ||
2954 | .tlb_flush = svm_flush_tlb, | 2991 | .tlb_flush = svm_flush_tlb, |
2955 | 2992 | ||
@@ -2975,7 +3012,11 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2975 | .get_mt_mask = svm_get_mt_mask, | 3012 | .get_mt_mask = svm_get_mt_mask, |
2976 | 3013 | ||
2977 | .exit_reasons_str = svm_exit_reasons_str, | 3014 | .exit_reasons_str = svm_exit_reasons_str, |
2978 | .gb_page_enable = svm_gb_page_enable, | 3015 | .get_lpage_level = svm_get_lpage_level, |
3016 | |||
3017 | .cpuid_update = svm_cpuid_update, | ||
3018 | |||
3019 | .rdtscp_supported = svm_rdtscp_supported, | ||
2979 | }; | 3020 | }; |
2980 | 3021 | ||
2981 | static int __init svm_init(void) | 3022 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 816e0449db0b..6ad30a29f044 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -56,6 +56,38 @@ TRACE_EVENT(kvm_hypercall, | |||
56 | ); | 56 | ); |
57 | 57 | ||
58 | /* | 58 | /* |
59 | * Tracepoint for hypercall. | ||
60 | */ | ||
61 | TRACE_EVENT(kvm_hv_hypercall, | ||
62 | TP_PROTO(__u16 code, bool fast, __u16 rep_cnt, __u16 rep_idx, | ||
63 | __u64 ingpa, __u64 outgpa), | ||
64 | TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa), | ||
65 | |||
66 | TP_STRUCT__entry( | ||
67 | __field( __u16, code ) | ||
68 | __field( bool, fast ) | ||
69 | __field( __u16, rep_cnt ) | ||
70 | __field( __u16, rep_idx ) | ||
71 | __field( __u64, ingpa ) | ||
72 | __field( __u64, outgpa ) | ||
73 | ), | ||
74 | |||
75 | TP_fast_assign( | ||
76 | __entry->code = code; | ||
77 | __entry->fast = fast; | ||
78 | __entry->rep_cnt = rep_cnt; | ||
79 | __entry->rep_idx = rep_idx; | ||
80 | __entry->ingpa = ingpa; | ||
81 | __entry->outgpa = outgpa; | ||
82 | ), | ||
83 | |||
84 | TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx", | ||
85 | __entry->code, __entry->fast ? "fast" : "slow", | ||
86 | __entry->rep_cnt, __entry->rep_idx, __entry->ingpa, | ||
87 | __entry->outgpa) | ||
88 | ); | ||
89 | |||
90 | /* | ||
59 | * Tracepoint for PIO. | 91 | * Tracepoint for PIO. |
60 | */ | 92 | */ |
61 | TRACE_EVENT(kvm_pio, | 93 | TRACE_EVENT(kvm_pio, |
@@ -214,28 +246,33 @@ TRACE_EVENT(kvm_page_fault, | |||
214 | * Tracepoint for guest MSR access. | 246 | * Tracepoint for guest MSR access. |
215 | */ | 247 | */ |
216 | TRACE_EVENT(kvm_msr, | 248 | TRACE_EVENT(kvm_msr, |
217 | TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data), | 249 | TP_PROTO(unsigned write, u32 ecx, u64 data, bool exception), |
218 | TP_ARGS(rw, ecx, data), | 250 | TP_ARGS(write, ecx, data, exception), |
219 | 251 | ||
220 | TP_STRUCT__entry( | 252 | TP_STRUCT__entry( |
221 | __field( unsigned int, rw ) | 253 | __field( unsigned, write ) |
222 | __field( unsigned int, ecx ) | 254 | __field( u32, ecx ) |
223 | __field( unsigned long, data ) | 255 | __field( u64, data ) |
256 | __field( u8, exception ) | ||
224 | ), | 257 | ), |
225 | 258 | ||
226 | TP_fast_assign( | 259 | TP_fast_assign( |
227 | __entry->rw = rw; | 260 | __entry->write = write; |
228 | __entry->ecx = ecx; | 261 | __entry->ecx = ecx; |
229 | __entry->data = data; | 262 | __entry->data = data; |
263 | __entry->exception = exception; | ||
230 | ), | 264 | ), |
231 | 265 | ||
232 | TP_printk("msr_%s %x = 0x%lx", | 266 | TP_printk("msr_%s %x = 0x%llx%s", |
233 | __entry->rw ? "write" : "read", | 267 | __entry->write ? "write" : "read", |
234 | __entry->ecx, __entry->data) | 268 | __entry->ecx, __entry->data, |
269 | __entry->exception ? " (#GP)" : "") | ||
235 | ); | 270 | ); |
236 | 271 | ||
237 | #define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data) | 272 | #define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data, false) |
238 | #define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data) | 273 | #define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data, false) |
274 | #define trace_kvm_msr_read_ex(ecx) trace_kvm_msr(0, ecx, 0, true) | ||
275 | #define trace_kvm_msr_write_ex(ecx, data) trace_kvm_msr(1, ecx, data, true) | ||
239 | 276 | ||
240 | /* | 277 | /* |
241 | * Tracepoint for guest CR access. | 278 | * Tracepoint for guest CR access. |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d4918d6fc924..14873b9f8430 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -61,6 +61,21 @@ module_param_named(unrestricted_guest, | |||
61 | static int __read_mostly emulate_invalid_guest_state = 0; | 61 | static int __read_mostly emulate_invalid_guest_state = 0; |
62 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 62 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
63 | 63 | ||
64 | #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ | ||
65 | (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) | ||
66 | #define KVM_GUEST_CR0_MASK \ | ||
67 | (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
68 | #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \ | ||
69 | (X86_CR0_WP | X86_CR0_NE) | ||
70 | #define KVM_VM_CR0_ALWAYS_ON \ | ||
71 | (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) | ||
72 | #define KVM_CR4_GUEST_OWNED_BITS \ | ||
73 | (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | ||
74 | | X86_CR4_OSXMMEXCPT) | ||
75 | |||
76 | #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) | ||
77 | #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) | ||
78 | |||
64 | /* | 79 | /* |
65 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | 80 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: |
66 | * ple_gap: upper bound on the amount of time between two successive | 81 | * ple_gap: upper bound on the amount of time between two successive |
@@ -136,6 +151,8 @@ struct vcpu_vmx { | |||
136 | ktime_t entry_time; | 151 | ktime_t entry_time; |
137 | s64 vnmi_blocked_time; | 152 | s64 vnmi_blocked_time; |
138 | u32 exit_reason; | 153 | u32 exit_reason; |
154 | |||
155 | bool rdtscp_enabled; | ||
139 | }; | 156 | }; |
140 | 157 | ||
141 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 158 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
@@ -210,7 +227,7 @@ static const u32 vmx_msr_index[] = { | |||
210 | #ifdef CONFIG_X86_64 | 227 | #ifdef CONFIG_X86_64 |
211 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, | 228 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, |
212 | #endif | 229 | #endif |
213 | MSR_EFER, MSR_K6_STAR, | 230 | MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR, |
214 | }; | 231 | }; |
215 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 232 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
216 | 233 | ||
@@ -301,6 +318,11 @@ static inline bool cpu_has_vmx_ept_2m_page(void) | |||
301 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); | 318 | return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); |
302 | } | 319 | } |
303 | 320 | ||
321 | static inline bool cpu_has_vmx_ept_1g_page(void) | ||
322 | { | ||
323 | return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); | ||
324 | } | ||
325 | |||
304 | static inline int cpu_has_vmx_invept_individual_addr(void) | 326 | static inline int cpu_has_vmx_invept_individual_addr(void) |
305 | { | 327 | { |
306 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); | 328 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); |
@@ -336,9 +358,7 @@ static inline int cpu_has_vmx_ple(void) | |||
336 | 358 | ||
337 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 359 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
338 | { | 360 | { |
339 | return flexpriority_enabled && | 361 | return flexpriority_enabled && irqchip_in_kernel(kvm); |
340 | (cpu_has_vmx_virtualize_apic_accesses()) && | ||
341 | (irqchip_in_kernel(kvm)); | ||
342 | } | 362 | } |
343 | 363 | ||
344 | static inline int cpu_has_vmx_vpid(void) | 364 | static inline int cpu_has_vmx_vpid(void) |
@@ -347,6 +367,12 @@ static inline int cpu_has_vmx_vpid(void) | |||
347 | SECONDARY_EXEC_ENABLE_VPID; | 367 | SECONDARY_EXEC_ENABLE_VPID; |
348 | } | 368 | } |
349 | 369 | ||
370 | static inline int cpu_has_vmx_rdtscp(void) | ||
371 | { | ||
372 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
373 | SECONDARY_EXEC_RDTSCP; | ||
374 | } | ||
375 | |||
350 | static inline int cpu_has_virtual_nmis(void) | 376 | static inline int cpu_has_virtual_nmis(void) |
351 | { | 377 | { |
352 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 378 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
@@ -551,22 +577,18 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
551 | { | 577 | { |
552 | u32 eb; | 578 | u32 eb; |
553 | 579 | ||
554 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); | 580 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | |
555 | if (!vcpu->fpu_active) | 581 | (1u << NM_VECTOR) | (1u << DB_VECTOR); |
556 | eb |= 1u << NM_VECTOR; | 582 | if ((vcpu->guest_debug & |
557 | /* | 583 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == |
558 | * Unconditionally intercept #DB so we can maintain dr6 without | 584 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) |
559 | * reading it every exit. | 585 | eb |= 1u << BP_VECTOR; |
560 | */ | ||
561 | eb |= 1u << DB_VECTOR; | ||
562 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | ||
563 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | ||
564 | eb |= 1u << BP_VECTOR; | ||
565 | } | ||
566 | if (to_vmx(vcpu)->rmode.vm86_active) | 586 | if (to_vmx(vcpu)->rmode.vm86_active) |
567 | eb = ~0; | 587 | eb = ~0; |
568 | if (enable_ept) | 588 | if (enable_ept) |
569 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ | 589 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ |
590 | if (vcpu->fpu_active) | ||
591 | eb &= ~(1u << NM_VECTOR); | ||
570 | vmcs_write32(EXCEPTION_BITMAP, eb); | 592 | vmcs_write32(EXCEPTION_BITMAP, eb); |
571 | } | 593 | } |
572 | 594 | ||
@@ -589,7 +611,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) | |||
589 | u64 guest_efer; | 611 | u64 guest_efer; |
590 | u64 ignore_bits; | 612 | u64 ignore_bits; |
591 | 613 | ||
592 | guest_efer = vmx->vcpu.arch.shadow_efer; | 614 | guest_efer = vmx->vcpu.arch.efer; |
593 | 615 | ||
594 | /* | 616 | /* |
595 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless | 617 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless |
@@ -767,22 +789,30 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) | |||
767 | 789 | ||
768 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) | 790 | static void vmx_fpu_activate(struct kvm_vcpu *vcpu) |
769 | { | 791 | { |
792 | ulong cr0; | ||
793 | |||
770 | if (vcpu->fpu_active) | 794 | if (vcpu->fpu_active) |
771 | return; | 795 | return; |
772 | vcpu->fpu_active = 1; | 796 | vcpu->fpu_active = 1; |
773 | vmcs_clear_bits(GUEST_CR0, X86_CR0_TS); | 797 | cr0 = vmcs_readl(GUEST_CR0); |
774 | if (vcpu->arch.cr0 & X86_CR0_TS) | 798 | cr0 &= ~(X86_CR0_TS | X86_CR0_MP); |
775 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS); | 799 | cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP); |
800 | vmcs_writel(GUEST_CR0, cr0); | ||
776 | update_exception_bitmap(vcpu); | 801 | update_exception_bitmap(vcpu); |
802 | vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; | ||
803 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
777 | } | 804 | } |
778 | 805 | ||
806 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); | ||
807 | |||
779 | static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) | 808 | static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) |
780 | { | 809 | { |
781 | if (!vcpu->fpu_active) | 810 | vmx_decache_cr0_guest_bits(vcpu); |
782 | return; | 811 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP); |
783 | vcpu->fpu_active = 0; | ||
784 | vmcs_set_bits(GUEST_CR0, X86_CR0_TS); | ||
785 | update_exception_bitmap(vcpu); | 812 | update_exception_bitmap(vcpu); |
813 | vcpu->arch.cr0_guest_owned_bits = 0; | ||
814 | vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); | ||
815 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | ||
786 | } | 816 | } |
787 | 817 | ||
788 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | 818 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) |
@@ -878,6 +908,11 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
878 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | 908 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); |
879 | } | 909 | } |
880 | 910 | ||
911 | static bool vmx_rdtscp_supported(void) | ||
912 | { | ||
913 | return cpu_has_vmx_rdtscp(); | ||
914 | } | ||
915 | |||
881 | /* | 916 | /* |
882 | * Swap MSR entry in host/guest MSR entry array. | 917 | * Swap MSR entry in host/guest MSR entry array. |
883 | */ | 918 | */ |
@@ -913,12 +948,15 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
913 | index = __find_msr_index(vmx, MSR_CSTAR); | 948 | index = __find_msr_index(vmx, MSR_CSTAR); |
914 | if (index >= 0) | 949 | if (index >= 0) |
915 | move_msr_up(vmx, index, save_nmsrs++); | 950 | move_msr_up(vmx, index, save_nmsrs++); |
951 | index = __find_msr_index(vmx, MSR_TSC_AUX); | ||
952 | if (index >= 0 && vmx->rdtscp_enabled) | ||
953 | move_msr_up(vmx, index, save_nmsrs++); | ||
916 | /* | 954 | /* |
917 | * MSR_K6_STAR is only needed on long mode guests, and only | 955 | * MSR_K6_STAR is only needed on long mode guests, and only |
918 | * if efer.sce is enabled. | 956 | * if efer.sce is enabled. |
919 | */ | 957 | */ |
920 | index = __find_msr_index(vmx, MSR_K6_STAR); | 958 | index = __find_msr_index(vmx, MSR_K6_STAR); |
921 | if ((index >= 0) && (vmx->vcpu.arch.shadow_efer & EFER_SCE)) | 959 | if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE)) |
922 | move_msr_up(vmx, index, save_nmsrs++); | 960 | move_msr_up(vmx, index, save_nmsrs++); |
923 | } | 961 | } |
924 | #endif | 962 | #endif |
@@ -1002,6 +1040,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
1002 | case MSR_IA32_SYSENTER_ESP: | 1040 | case MSR_IA32_SYSENTER_ESP: |
1003 | data = vmcs_readl(GUEST_SYSENTER_ESP); | 1041 | data = vmcs_readl(GUEST_SYSENTER_ESP); |
1004 | break; | 1042 | break; |
1043 | case MSR_TSC_AUX: | ||
1044 | if (!to_vmx(vcpu)->rdtscp_enabled) | ||
1045 | return 1; | ||
1046 | /* Otherwise falls through */ | ||
1005 | default: | 1047 | default: |
1006 | vmx_load_host_state(to_vmx(vcpu)); | 1048 | vmx_load_host_state(to_vmx(vcpu)); |
1007 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 1049 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
@@ -1065,7 +1107,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
1065 | vcpu->arch.pat = data; | 1107 | vcpu->arch.pat = data; |
1066 | break; | 1108 | break; |
1067 | } | 1109 | } |
1068 | /* Otherwise falls through to kvm_set_msr_common */ | 1110 | ret = kvm_set_msr_common(vcpu, msr_index, data); |
1111 | break; | ||
1112 | case MSR_TSC_AUX: | ||
1113 | if (!vmx->rdtscp_enabled) | ||
1114 | return 1; | ||
1115 | /* Check reserved bit, higher 32 bits should be zero */ | ||
1116 | if ((data >> 32) != 0) | ||
1117 | return 1; | ||
1118 | /* Otherwise falls through */ | ||
1069 | default: | 1119 | default: |
1070 | msr = find_msr_entry(vmx, msr_index); | 1120 | msr = find_msr_entry(vmx, msr_index); |
1071 | if (msr) { | 1121 | if (msr) { |
@@ -1224,6 +1274,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1224 | CPU_BASED_USE_IO_BITMAPS | | 1274 | CPU_BASED_USE_IO_BITMAPS | |
1225 | CPU_BASED_MOV_DR_EXITING | | 1275 | CPU_BASED_MOV_DR_EXITING | |
1226 | CPU_BASED_USE_TSC_OFFSETING | | 1276 | CPU_BASED_USE_TSC_OFFSETING | |
1277 | CPU_BASED_MWAIT_EXITING | | ||
1278 | CPU_BASED_MONITOR_EXITING | | ||
1227 | CPU_BASED_INVLPG_EXITING; | 1279 | CPU_BASED_INVLPG_EXITING; |
1228 | opt = CPU_BASED_TPR_SHADOW | | 1280 | opt = CPU_BASED_TPR_SHADOW | |
1229 | CPU_BASED_USE_MSR_BITMAPS | | 1281 | CPU_BASED_USE_MSR_BITMAPS | |
@@ -1243,7 +1295,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1243 | SECONDARY_EXEC_ENABLE_VPID | | 1295 | SECONDARY_EXEC_ENABLE_VPID | |
1244 | SECONDARY_EXEC_ENABLE_EPT | | 1296 | SECONDARY_EXEC_ENABLE_EPT | |
1245 | SECONDARY_EXEC_UNRESTRICTED_GUEST | | 1297 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
1246 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | 1298 | SECONDARY_EXEC_PAUSE_LOOP_EXITING | |
1299 | SECONDARY_EXEC_RDTSCP; | ||
1247 | if (adjust_vmx_controls(min2, opt2, | 1300 | if (adjust_vmx_controls(min2, opt2, |
1248 | MSR_IA32_VMX_PROCBASED_CTLS2, | 1301 | MSR_IA32_VMX_PROCBASED_CTLS2, |
1249 | &_cpu_based_2nd_exec_control) < 0) | 1302 | &_cpu_based_2nd_exec_control) < 0) |
@@ -1457,8 +1510,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1457 | static gva_t rmode_tss_base(struct kvm *kvm) | 1510 | static gva_t rmode_tss_base(struct kvm *kvm) |
1458 | { | 1511 | { |
1459 | if (!kvm->arch.tss_addr) { | 1512 | if (!kvm->arch.tss_addr) { |
1460 | gfn_t base_gfn = kvm->memslots[0].base_gfn + | 1513 | struct kvm_memslots *slots; |
1461 | kvm->memslots[0].npages - 3; | 1514 | gfn_t base_gfn; |
1515 | |||
1516 | slots = rcu_dereference(kvm->memslots); | ||
1517 | base_gfn = kvm->memslots->memslots[0].base_gfn + | ||
1518 | kvm->memslots->memslots[0].npages - 3; | ||
1462 | return base_gfn << PAGE_SHIFT; | 1519 | return base_gfn << PAGE_SHIFT; |
1463 | } | 1520 | } |
1464 | return kvm->arch.tss_addr; | 1521 | return kvm->arch.tss_addr; |
@@ -1544,9 +1601,7 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
1544 | * of this msr depends on is_long_mode(). | 1601 | * of this msr depends on is_long_mode(). |
1545 | */ | 1602 | */ |
1546 | vmx_load_host_state(to_vmx(vcpu)); | 1603 | vmx_load_host_state(to_vmx(vcpu)); |
1547 | vcpu->arch.shadow_efer = efer; | 1604 | vcpu->arch.efer = efer; |
1548 | if (!msr) | ||
1549 | return; | ||
1550 | if (efer & EFER_LMA) { | 1605 | if (efer & EFER_LMA) { |
1551 | vmcs_write32(VM_ENTRY_CONTROLS, | 1606 | vmcs_write32(VM_ENTRY_CONTROLS, |
1552 | vmcs_read32(VM_ENTRY_CONTROLS) | | 1607 | vmcs_read32(VM_ENTRY_CONTROLS) | |
@@ -1576,13 +1631,13 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
1576 | (guest_tr_ar & ~AR_TYPE_MASK) | 1631 | (guest_tr_ar & ~AR_TYPE_MASK) |
1577 | | AR_TYPE_BUSY_64_TSS); | 1632 | | AR_TYPE_BUSY_64_TSS); |
1578 | } | 1633 | } |
1579 | vcpu->arch.shadow_efer |= EFER_LMA; | 1634 | vcpu->arch.efer |= EFER_LMA; |
1580 | vmx_set_efer(vcpu, vcpu->arch.shadow_efer); | 1635 | vmx_set_efer(vcpu, vcpu->arch.efer); |
1581 | } | 1636 | } |
1582 | 1637 | ||
1583 | static void exit_lmode(struct kvm_vcpu *vcpu) | 1638 | static void exit_lmode(struct kvm_vcpu *vcpu) |
1584 | { | 1639 | { |
1585 | vcpu->arch.shadow_efer &= ~EFER_LMA; | 1640 | vcpu->arch.efer &= ~EFER_LMA; |
1586 | 1641 | ||
1587 | vmcs_write32(VM_ENTRY_CONTROLS, | 1642 | vmcs_write32(VM_ENTRY_CONTROLS, |
1588 | vmcs_read32(VM_ENTRY_CONTROLS) | 1643 | vmcs_read32(VM_ENTRY_CONTROLS) |
@@ -1598,10 +1653,20 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | |||
1598 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); | 1653 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); |
1599 | } | 1654 | } |
1600 | 1655 | ||
1656 | static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) | ||
1657 | { | ||
1658 | ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; | ||
1659 | |||
1660 | vcpu->arch.cr0 &= ~cr0_guest_owned_bits; | ||
1661 | vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; | ||
1662 | } | ||
1663 | |||
1601 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 1664 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
1602 | { | 1665 | { |
1603 | vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK; | 1666 | ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; |
1604 | vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; | 1667 | |
1668 | vcpu->arch.cr4 &= ~cr4_guest_owned_bits; | ||
1669 | vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits; | ||
1605 | } | 1670 | } |
1606 | 1671 | ||
1607 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) | 1672 | static void ept_load_pdptrs(struct kvm_vcpu *vcpu) |
@@ -1646,7 +1711,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1646 | (CPU_BASED_CR3_LOAD_EXITING | | 1711 | (CPU_BASED_CR3_LOAD_EXITING | |
1647 | CPU_BASED_CR3_STORE_EXITING)); | 1712 | CPU_BASED_CR3_STORE_EXITING)); |
1648 | vcpu->arch.cr0 = cr0; | 1713 | vcpu->arch.cr0 = cr0; |
1649 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | 1714 | vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); |
1650 | } else if (!is_paging(vcpu)) { | 1715 | } else if (!is_paging(vcpu)) { |
1651 | /* From nonpaging to paging */ | 1716 | /* From nonpaging to paging */ |
1652 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | 1717 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, |
@@ -1654,23 +1719,13 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1654 | ~(CPU_BASED_CR3_LOAD_EXITING | | 1719 | ~(CPU_BASED_CR3_LOAD_EXITING | |
1655 | CPU_BASED_CR3_STORE_EXITING)); | 1720 | CPU_BASED_CR3_STORE_EXITING)); |
1656 | vcpu->arch.cr0 = cr0; | 1721 | vcpu->arch.cr0 = cr0; |
1657 | vmx_set_cr4(vcpu, vcpu->arch.cr4); | 1722 | vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); |
1658 | } | 1723 | } |
1659 | 1724 | ||
1660 | if (!(cr0 & X86_CR0_WP)) | 1725 | if (!(cr0 & X86_CR0_WP)) |
1661 | *hw_cr0 &= ~X86_CR0_WP; | 1726 | *hw_cr0 &= ~X86_CR0_WP; |
1662 | } | 1727 | } |
1663 | 1728 | ||
1664 | static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, | ||
1665 | struct kvm_vcpu *vcpu) | ||
1666 | { | ||
1667 | if (!is_paging(vcpu)) { | ||
1668 | *hw_cr4 &= ~X86_CR4_PAE; | ||
1669 | *hw_cr4 |= X86_CR4_PSE; | ||
1670 | } else if (!(vcpu->arch.cr4 & X86_CR4_PAE)) | ||
1671 | *hw_cr4 &= ~X86_CR4_PAE; | ||
1672 | } | ||
1673 | |||
1674 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 1729 | static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
1675 | { | 1730 | { |
1676 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1731 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -1682,8 +1737,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1682 | else | 1737 | else |
1683 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; | 1738 | hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; |
1684 | 1739 | ||
1685 | vmx_fpu_deactivate(vcpu); | ||
1686 | |||
1687 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) | 1740 | if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) |
1688 | enter_pmode(vcpu); | 1741 | enter_pmode(vcpu); |
1689 | 1742 | ||
@@ -1691,7 +1744,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1691 | enter_rmode(vcpu); | 1744 | enter_rmode(vcpu); |
1692 | 1745 | ||
1693 | #ifdef CONFIG_X86_64 | 1746 | #ifdef CONFIG_X86_64 |
1694 | if (vcpu->arch.shadow_efer & EFER_LME) { | 1747 | if (vcpu->arch.efer & EFER_LME) { |
1695 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) | 1748 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) |
1696 | enter_lmode(vcpu); | 1749 | enter_lmode(vcpu); |
1697 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) | 1750 | if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) |
@@ -1702,12 +1755,12 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1702 | if (enable_ept) | 1755 | if (enable_ept) |
1703 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); | 1756 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); |
1704 | 1757 | ||
1758 | if (!vcpu->fpu_active) | ||
1759 | hw_cr0 |= X86_CR0_TS | X86_CR0_MP; | ||
1760 | |||
1705 | vmcs_writel(CR0_READ_SHADOW, cr0); | 1761 | vmcs_writel(CR0_READ_SHADOW, cr0); |
1706 | vmcs_writel(GUEST_CR0, hw_cr0); | 1762 | vmcs_writel(GUEST_CR0, hw_cr0); |
1707 | vcpu->arch.cr0 = cr0; | 1763 | vcpu->arch.cr0 = cr0; |
1708 | |||
1709 | if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) | ||
1710 | vmx_fpu_activate(vcpu); | ||
1711 | } | 1764 | } |
1712 | 1765 | ||
1713 | static u64 construct_eptp(unsigned long root_hpa) | 1766 | static u64 construct_eptp(unsigned long root_hpa) |
@@ -1738,8 +1791,6 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
1738 | 1791 | ||
1739 | vmx_flush_tlb(vcpu); | 1792 | vmx_flush_tlb(vcpu); |
1740 | vmcs_writel(GUEST_CR3, guest_cr3); | 1793 | vmcs_writel(GUEST_CR3, guest_cr3); |
1741 | if (vcpu->arch.cr0 & X86_CR0_PE) | ||
1742 | vmx_fpu_deactivate(vcpu); | ||
1743 | } | 1794 | } |
1744 | 1795 | ||
1745 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1796 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
@@ -1748,8 +1799,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
1748 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); | 1799 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); |
1749 | 1800 | ||
1750 | vcpu->arch.cr4 = cr4; | 1801 | vcpu->arch.cr4 = cr4; |
1751 | if (enable_ept) | 1802 | if (enable_ept) { |
1752 | ept_update_paging_mode_cr4(&hw_cr4, vcpu); | 1803 | if (!is_paging(vcpu)) { |
1804 | hw_cr4 &= ~X86_CR4_PAE; | ||
1805 | hw_cr4 |= X86_CR4_PSE; | ||
1806 | } else if (!(cr4 & X86_CR4_PAE)) { | ||
1807 | hw_cr4 &= ~X86_CR4_PAE; | ||
1808 | } | ||
1809 | } | ||
1753 | 1810 | ||
1754 | vmcs_writel(CR4_READ_SHADOW, cr4); | 1811 | vmcs_writel(CR4_READ_SHADOW, cr4); |
1755 | vmcs_writel(GUEST_CR4, hw_cr4); | 1812 | vmcs_writel(GUEST_CR4, hw_cr4); |
@@ -1787,7 +1844,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
1787 | 1844 | ||
1788 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | 1845 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) |
1789 | { | 1846 | { |
1790 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */ | 1847 | if (!is_protmode(vcpu)) |
1791 | return 0; | 1848 | return 0; |
1792 | 1849 | ||
1793 | if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ | 1850 | if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */ |
@@ -2042,7 +2099,7 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) | |||
2042 | static bool guest_state_valid(struct kvm_vcpu *vcpu) | 2099 | static bool guest_state_valid(struct kvm_vcpu *vcpu) |
2043 | { | 2100 | { |
2044 | /* real mode guest state checks */ | 2101 | /* real mode guest state checks */ |
2045 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) { | 2102 | if (!is_protmode(vcpu)) { |
2046 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) | 2103 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) |
2047 | return false; | 2104 | return false; |
2048 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) | 2105 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) |
@@ -2175,7 +2232,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
2175 | struct kvm_userspace_memory_region kvm_userspace_mem; | 2232 | struct kvm_userspace_memory_region kvm_userspace_mem; |
2176 | int r = 0; | 2233 | int r = 0; |
2177 | 2234 | ||
2178 | down_write(&kvm->slots_lock); | 2235 | mutex_lock(&kvm->slots_lock); |
2179 | if (kvm->arch.apic_access_page) | 2236 | if (kvm->arch.apic_access_page) |
2180 | goto out; | 2237 | goto out; |
2181 | kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; | 2238 | kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; |
@@ -2188,7 +2245,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
2188 | 2245 | ||
2189 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); | 2246 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); |
2190 | out: | 2247 | out: |
2191 | up_write(&kvm->slots_lock); | 2248 | mutex_unlock(&kvm->slots_lock); |
2192 | return r; | 2249 | return r; |
2193 | } | 2250 | } |
2194 | 2251 | ||
@@ -2197,7 +2254,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
2197 | struct kvm_userspace_memory_region kvm_userspace_mem; | 2254 | struct kvm_userspace_memory_region kvm_userspace_mem; |
2198 | int r = 0; | 2255 | int r = 0; |
2199 | 2256 | ||
2200 | down_write(&kvm->slots_lock); | 2257 | mutex_lock(&kvm->slots_lock); |
2201 | if (kvm->arch.ept_identity_pagetable) | 2258 | if (kvm->arch.ept_identity_pagetable) |
2202 | goto out; | 2259 | goto out; |
2203 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; | 2260 | kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; |
@@ -2212,7 +2269,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
2212 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, | 2269 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, |
2213 | kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); | 2270 | kvm->arch.ept_identity_map_addr >> PAGE_SHIFT); |
2214 | out: | 2271 | out: |
2215 | up_write(&kvm->slots_lock); | 2272 | mutex_unlock(&kvm->slots_lock); |
2216 | return r; | 2273 | return r; |
2217 | } | 2274 | } |
2218 | 2275 | ||
@@ -2384,14 +2441,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2384 | for (i = 0; i < NR_VMX_MSR; ++i) { | 2441 | for (i = 0; i < NR_VMX_MSR; ++i) { |
2385 | u32 index = vmx_msr_index[i]; | 2442 | u32 index = vmx_msr_index[i]; |
2386 | u32 data_low, data_high; | 2443 | u32 data_low, data_high; |
2387 | u64 data; | ||
2388 | int j = vmx->nmsrs; | 2444 | int j = vmx->nmsrs; |
2389 | 2445 | ||
2390 | if (rdmsr_safe(index, &data_low, &data_high) < 0) | 2446 | if (rdmsr_safe(index, &data_low, &data_high) < 0) |
2391 | continue; | 2447 | continue; |
2392 | if (wrmsr_safe(index, data_low, data_high) < 0) | 2448 | if (wrmsr_safe(index, data_low, data_high) < 0) |
2393 | continue; | 2449 | continue; |
2394 | data = data_low | ((u64)data_high << 32); | ||
2395 | vmx->guest_msrs[j].index = i; | 2450 | vmx->guest_msrs[j].index = i; |
2396 | vmx->guest_msrs[j].data = 0; | 2451 | vmx->guest_msrs[j].data = 0; |
2397 | vmx->guest_msrs[j].mask = -1ull; | 2452 | vmx->guest_msrs[j].mask = -1ull; |
@@ -2404,7 +2459,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2404 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); | 2459 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); |
2405 | 2460 | ||
2406 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); | 2461 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); |
2407 | vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); | 2462 | vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; |
2463 | if (enable_ept) | ||
2464 | vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; | ||
2465 | vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); | ||
2408 | 2466 | ||
2409 | tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; | 2467 | tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; |
2410 | rdtscll(tsc_this); | 2468 | rdtscll(tsc_this); |
@@ -2429,10 +2487,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2429 | { | 2487 | { |
2430 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2488 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2431 | u64 msr; | 2489 | u64 msr; |
2432 | int ret; | 2490 | int ret, idx; |
2433 | 2491 | ||
2434 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | 2492 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); |
2435 | down_read(&vcpu->kvm->slots_lock); | 2493 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
2436 | if (!init_rmode(vmx->vcpu.kvm)) { | 2494 | if (!init_rmode(vmx->vcpu.kvm)) { |
2437 | ret = -ENOMEM; | 2495 | ret = -ENOMEM; |
2438 | goto out; | 2496 | goto out; |
@@ -2526,7 +2584,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2526 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | 2584 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); |
2527 | 2585 | ||
2528 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; | 2586 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
2529 | vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ | 2587 | vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ |
2530 | vmx_set_cr4(&vmx->vcpu, 0); | 2588 | vmx_set_cr4(&vmx->vcpu, 0); |
2531 | vmx_set_efer(&vmx->vcpu, 0); | 2589 | vmx_set_efer(&vmx->vcpu, 0); |
2532 | vmx_fpu_activate(&vmx->vcpu); | 2590 | vmx_fpu_activate(&vmx->vcpu); |
@@ -2540,7 +2598,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2540 | vmx->emulation_required = 0; | 2598 | vmx->emulation_required = 0; |
2541 | 2599 | ||
2542 | out: | 2600 | out: |
2543 | up_read(&vcpu->kvm->slots_lock); | 2601 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
2544 | return ret; | 2602 | return ret; |
2545 | } | 2603 | } |
2546 | 2604 | ||
@@ -2717,6 +2775,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
2717 | kvm_queue_exception(vcpu, vec); | 2775 | kvm_queue_exception(vcpu, vec); |
2718 | return 1; | 2776 | return 1; |
2719 | case BP_VECTOR: | 2777 | case BP_VECTOR: |
2778 | /* | ||
2779 | * Update instruction length as we may reinject the exception | ||
2780 | * from user space while in guest debugging mode. | ||
2781 | */ | ||
2782 | to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = | ||
2783 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
2720 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | 2784 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) |
2721 | return 0; | 2785 | return 0; |
2722 | /* fall through */ | 2786 | /* fall through */ |
@@ -2839,6 +2903,13 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
2839 | kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); | 2903 | kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); |
2840 | /* fall through */ | 2904 | /* fall through */ |
2841 | case BP_VECTOR: | 2905 | case BP_VECTOR: |
2906 | /* | ||
2907 | * Update instruction length as we may reinject #BP from | ||
2908 | * user space while in guest debugging mode. Reading it for | ||
2909 | * #DB as well causes no harm, it is not used in that case. | ||
2910 | */ | ||
2911 | vmx->vcpu.arch.event_exit_inst_len = | ||
2912 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | ||
2842 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 2913 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
2843 | kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; | 2914 | kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; |
2844 | kvm_run->debug.arch.exception = ex_no; | 2915 | kvm_run->debug.arch.exception = ex_no; |
@@ -2940,11 +3011,10 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
2940 | }; | 3011 | }; |
2941 | break; | 3012 | break; |
2942 | case 2: /* clts */ | 3013 | case 2: /* clts */ |
2943 | vmx_fpu_deactivate(vcpu); | 3014 | vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); |
2944 | vcpu->arch.cr0 &= ~X86_CR0_TS; | 3015 | trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); |
2945 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | ||
2946 | vmx_fpu_activate(vcpu); | ||
2947 | skip_emulated_instruction(vcpu); | 3016 | skip_emulated_instruction(vcpu); |
3017 | vmx_fpu_activate(vcpu); | ||
2948 | return 1; | 3018 | return 1; |
2949 | case 1: /*mov from cr*/ | 3019 | case 1: /*mov from cr*/ |
2950 | switch (cr) { | 3020 | switch (cr) { |
@@ -2962,7 +3032,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
2962 | } | 3032 | } |
2963 | break; | 3033 | break; |
2964 | case 3: /* lmsw */ | 3034 | case 3: /* lmsw */ |
2965 | kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f); | 3035 | val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f; |
3036 | trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val); | ||
3037 | kvm_lmsw(vcpu, val); | ||
2966 | 3038 | ||
2967 | skip_emulated_instruction(vcpu); | 3039 | skip_emulated_instruction(vcpu); |
2968 | return 1; | 3040 | return 1; |
@@ -2975,12 +3047,22 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
2975 | return 0; | 3047 | return 0; |
2976 | } | 3048 | } |
2977 | 3049 | ||
3050 | static int check_dr_alias(struct kvm_vcpu *vcpu) | ||
3051 | { | ||
3052 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { | ||
3053 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3054 | return -1; | ||
3055 | } | ||
3056 | return 0; | ||
3057 | } | ||
3058 | |||
2978 | static int handle_dr(struct kvm_vcpu *vcpu) | 3059 | static int handle_dr(struct kvm_vcpu *vcpu) |
2979 | { | 3060 | { |
2980 | unsigned long exit_qualification; | 3061 | unsigned long exit_qualification; |
2981 | unsigned long val; | 3062 | unsigned long val; |
2982 | int dr, reg; | 3063 | int dr, reg; |
2983 | 3064 | ||
3065 | /* Do not handle if the CPL > 0, will trigger GP on re-entry */ | ||
2984 | if (!kvm_require_cpl(vcpu, 0)) | 3066 | if (!kvm_require_cpl(vcpu, 0)) |
2985 | return 1; | 3067 | return 1; |
2986 | dr = vmcs_readl(GUEST_DR7); | 3068 | dr = vmcs_readl(GUEST_DR7); |
@@ -3016,14 +3098,20 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
3016 | case 0 ... 3: | 3098 | case 0 ... 3: |
3017 | val = vcpu->arch.db[dr]; | 3099 | val = vcpu->arch.db[dr]; |
3018 | break; | 3100 | break; |
3101 | case 4: | ||
3102 | if (check_dr_alias(vcpu) < 0) | ||
3103 | return 1; | ||
3104 | /* fall through */ | ||
3019 | case 6: | 3105 | case 6: |
3020 | val = vcpu->arch.dr6; | 3106 | val = vcpu->arch.dr6; |
3021 | break; | 3107 | break; |
3022 | case 7: | 3108 | case 5: |
3109 | if (check_dr_alias(vcpu) < 0) | ||
3110 | return 1; | ||
3111 | /* fall through */ | ||
3112 | default: /* 7 */ | ||
3023 | val = vcpu->arch.dr7; | 3113 | val = vcpu->arch.dr7; |
3024 | break; | 3114 | break; |
3025 | default: | ||
3026 | val = 0; | ||
3027 | } | 3115 | } |
3028 | kvm_register_write(vcpu, reg, val); | 3116 | kvm_register_write(vcpu, reg, val); |
3029 | } else { | 3117 | } else { |
@@ -3034,21 +3122,25 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
3034 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | 3122 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) |
3035 | vcpu->arch.eff_db[dr] = val; | 3123 | vcpu->arch.eff_db[dr] = val; |
3036 | break; | 3124 | break; |
3037 | case 4 ... 5: | 3125 | case 4: |
3038 | if (vcpu->arch.cr4 & X86_CR4_DE) | 3126 | if (check_dr_alias(vcpu) < 0) |
3039 | kvm_queue_exception(vcpu, UD_VECTOR); | 3127 | return 1; |
3040 | break; | 3128 | /* fall through */ |
3041 | case 6: | 3129 | case 6: |
3042 | if (val & 0xffffffff00000000ULL) { | 3130 | if (val & 0xffffffff00000000ULL) { |
3043 | kvm_queue_exception(vcpu, GP_VECTOR); | 3131 | kvm_inject_gp(vcpu, 0); |
3044 | break; | 3132 | return 1; |
3045 | } | 3133 | } |
3046 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | 3134 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; |
3047 | break; | 3135 | break; |
3048 | case 7: | 3136 | case 5: |
3137 | if (check_dr_alias(vcpu) < 0) | ||
3138 | return 1; | ||
3139 | /* fall through */ | ||
3140 | default: /* 7 */ | ||
3049 | if (val & 0xffffffff00000000ULL) { | 3141 | if (val & 0xffffffff00000000ULL) { |
3050 | kvm_queue_exception(vcpu, GP_VECTOR); | 3142 | kvm_inject_gp(vcpu, 0); |
3051 | break; | 3143 | return 1; |
3052 | } | 3144 | } |
3053 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; | 3145 | vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; |
3054 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { | 3146 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { |
@@ -3075,6 +3167,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu) | |||
3075 | u64 data; | 3167 | u64 data; |
3076 | 3168 | ||
3077 | if (vmx_get_msr(vcpu, ecx, &data)) { | 3169 | if (vmx_get_msr(vcpu, ecx, &data)) { |
3170 | trace_kvm_msr_read_ex(ecx); | ||
3078 | kvm_inject_gp(vcpu, 0); | 3171 | kvm_inject_gp(vcpu, 0); |
3079 | return 1; | 3172 | return 1; |
3080 | } | 3173 | } |
@@ -3094,13 +3187,13 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu) | |||
3094 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) | 3187 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) |
3095 | | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); | 3188 | | ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32); |
3096 | 3189 | ||
3097 | trace_kvm_msr_write(ecx, data); | ||
3098 | |||
3099 | if (vmx_set_msr(vcpu, ecx, data) != 0) { | 3190 | if (vmx_set_msr(vcpu, ecx, data) != 0) { |
3191 | trace_kvm_msr_write_ex(ecx, data); | ||
3100 | kvm_inject_gp(vcpu, 0); | 3192 | kvm_inject_gp(vcpu, 0); |
3101 | return 1; | 3193 | return 1; |
3102 | } | 3194 | } |
3103 | 3195 | ||
3196 | trace_kvm_msr_write(ecx, data); | ||
3104 | skip_emulated_instruction(vcpu); | 3197 | skip_emulated_instruction(vcpu); |
3105 | return 1; | 3198 | return 1; |
3106 | } | 3199 | } |
@@ -3385,7 +3478,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
3385 | } | 3478 | } |
3386 | 3479 | ||
3387 | if (err != EMULATE_DONE) { | 3480 | if (err != EMULATE_DONE) { |
3388 | kvm_report_emulation_failure(vcpu, "emulation failure"); | ||
3389 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 3481 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
3390 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 3482 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
3391 | vcpu->run->internal.ndata = 0; | 3483 | vcpu->run->internal.ndata = 0; |
@@ -3416,6 +3508,12 @@ static int handle_pause(struct kvm_vcpu *vcpu) | |||
3416 | return 1; | 3508 | return 1; |
3417 | } | 3509 | } |
3418 | 3510 | ||
3511 | static int handle_invalid_op(struct kvm_vcpu *vcpu) | ||
3512 | { | ||
3513 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3514 | return 1; | ||
3515 | } | ||
3516 | |||
3419 | /* | 3517 | /* |
3420 | * The exit handlers return 1 if the exit was handled fully and guest execution | 3518 | * The exit handlers return 1 if the exit was handled fully and guest execution |
3421 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 3519 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -3453,6 +3551,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
3453 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | 3551 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, |
3454 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, | 3552 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, |
3455 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, | 3553 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, |
3554 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op, | ||
3555 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op, | ||
3456 | }; | 3556 | }; |
3457 | 3557 | ||
3458 | static const int kvm_vmx_max_exit_handlers = | 3558 | static const int kvm_vmx_max_exit_handlers = |
@@ -3686,9 +3786,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
3686 | */ | 3786 | */ |
3687 | vmcs_writel(HOST_CR0, read_cr0()); | 3787 | vmcs_writel(HOST_CR0, read_cr0()); |
3688 | 3788 | ||
3689 | if (vcpu->arch.switch_db_regs) | ||
3690 | set_debugreg(vcpu->arch.dr6, 6); | ||
3691 | |||
3692 | asm( | 3789 | asm( |
3693 | /* Store host registers */ | 3790 | /* Store host registers */ |
3694 | "push %%"R"dx; push %%"R"bp;" | 3791 | "push %%"R"dx; push %%"R"bp;" |
@@ -3789,9 +3886,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
3789 | | (1 << VCPU_EXREG_PDPTR)); | 3886 | | (1 << VCPU_EXREG_PDPTR)); |
3790 | vcpu->arch.regs_dirty = 0; | 3887 | vcpu->arch.regs_dirty = 0; |
3791 | 3888 | ||
3792 | if (vcpu->arch.switch_db_regs) | ||
3793 | get_debugreg(vcpu->arch.dr6, 6); | ||
3794 | |||
3795 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 3889 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
3796 | if (vmx->rmode.irq.pending) | 3890 | if (vmx->rmode.irq.pending) |
3797 | fixup_rmode_irq(vmx); | 3891 | fixup_rmode_irq(vmx); |
@@ -3920,7 +4014,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
3920 | * b. VT-d with snooping control feature: snooping control feature of | 4014 | * b. VT-d with snooping control feature: snooping control feature of |
3921 | * VT-d engine can guarantee the cache correctness. Just set it | 4015 | * VT-d engine can guarantee the cache correctness. Just set it |
3922 | * to WB to keep consistent with host. So the same as item 3. | 4016 | * to WB to keep consistent with host. So the same as item 3. |
3923 | * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep | 4017 | * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep |
3924 | * consistent with host MTRR | 4018 | * consistent with host MTRR |
3925 | */ | 4019 | */ |
3926 | if (is_mmio) | 4020 | if (is_mmio) |
@@ -3931,37 +4025,88 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) | |||
3931 | VMX_EPT_MT_EPTE_SHIFT; | 4025 | VMX_EPT_MT_EPTE_SHIFT; |
3932 | else | 4026 | else |
3933 | ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | 4027 | ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) |
3934 | | VMX_EPT_IGMT_BIT; | 4028 | | VMX_EPT_IPAT_BIT; |
3935 | 4029 | ||
3936 | return ret; | 4030 | return ret; |
3937 | } | 4031 | } |
3938 | 4032 | ||
4033 | #define _ER(x) { EXIT_REASON_##x, #x } | ||
4034 | |||
3939 | static const struct trace_print_flags vmx_exit_reasons_str[] = { | 4035 | static const struct trace_print_flags vmx_exit_reasons_str[] = { |
3940 | { EXIT_REASON_EXCEPTION_NMI, "exception" }, | 4036 | _ER(EXCEPTION_NMI), |
3941 | { EXIT_REASON_EXTERNAL_INTERRUPT, "ext_irq" }, | 4037 | _ER(EXTERNAL_INTERRUPT), |
3942 | { EXIT_REASON_TRIPLE_FAULT, "triple_fault" }, | 4038 | _ER(TRIPLE_FAULT), |
3943 | { EXIT_REASON_NMI_WINDOW, "nmi_window" }, | 4039 | _ER(PENDING_INTERRUPT), |
3944 | { EXIT_REASON_IO_INSTRUCTION, "io_instruction" }, | 4040 | _ER(NMI_WINDOW), |
3945 | { EXIT_REASON_CR_ACCESS, "cr_access" }, | 4041 | _ER(TASK_SWITCH), |
3946 | { EXIT_REASON_DR_ACCESS, "dr_access" }, | 4042 | _ER(CPUID), |
3947 | { EXIT_REASON_CPUID, "cpuid" }, | 4043 | _ER(HLT), |
3948 | { EXIT_REASON_MSR_READ, "rdmsr" }, | 4044 | _ER(INVLPG), |
3949 | { EXIT_REASON_MSR_WRITE, "wrmsr" }, | 4045 | _ER(RDPMC), |
3950 | { EXIT_REASON_PENDING_INTERRUPT, "interrupt_window" }, | 4046 | _ER(RDTSC), |
3951 | { EXIT_REASON_HLT, "halt" }, | 4047 | _ER(VMCALL), |
3952 | { EXIT_REASON_INVLPG, "invlpg" }, | 4048 | _ER(VMCLEAR), |
3953 | { EXIT_REASON_VMCALL, "hypercall" }, | 4049 | _ER(VMLAUNCH), |
3954 | { EXIT_REASON_TPR_BELOW_THRESHOLD, "tpr_below_thres" }, | 4050 | _ER(VMPTRLD), |
3955 | { EXIT_REASON_APIC_ACCESS, "apic_access" }, | 4051 | _ER(VMPTRST), |
3956 | { EXIT_REASON_WBINVD, "wbinvd" }, | 4052 | _ER(VMREAD), |
3957 | { EXIT_REASON_TASK_SWITCH, "task_switch" }, | 4053 | _ER(VMRESUME), |
3958 | { EXIT_REASON_EPT_VIOLATION, "ept_violation" }, | 4054 | _ER(VMWRITE), |
4055 | _ER(VMOFF), | ||
4056 | _ER(VMON), | ||
4057 | _ER(CR_ACCESS), | ||
4058 | _ER(DR_ACCESS), | ||
4059 | _ER(IO_INSTRUCTION), | ||
4060 | _ER(MSR_READ), | ||
4061 | _ER(MSR_WRITE), | ||
4062 | _ER(MWAIT_INSTRUCTION), | ||
4063 | _ER(MONITOR_INSTRUCTION), | ||
4064 | _ER(PAUSE_INSTRUCTION), | ||
4065 | _ER(MCE_DURING_VMENTRY), | ||
4066 | _ER(TPR_BELOW_THRESHOLD), | ||
4067 | _ER(APIC_ACCESS), | ||
4068 | _ER(EPT_VIOLATION), | ||
4069 | _ER(EPT_MISCONFIG), | ||
4070 | _ER(WBINVD), | ||
3959 | { -1, NULL } | 4071 | { -1, NULL } |
3960 | }; | 4072 | }; |
3961 | 4073 | ||
3962 | static bool vmx_gb_page_enable(void) | 4074 | #undef _ER |
4075 | |||
4076 | static int vmx_get_lpage_level(void) | ||
4077 | { | ||
4078 | if (enable_ept && !cpu_has_vmx_ept_1g_page()) | ||
4079 | return PT_DIRECTORY_LEVEL; | ||
4080 | else | ||
4081 | /* For shadow and EPT supported 1GB page */ | ||
4082 | return PT_PDPE_LEVEL; | ||
4083 | } | ||
4084 | |||
4085 | static inline u32 bit(int bitno) | ||
4086 | { | ||
4087 | return 1 << (bitno & 31); | ||
4088 | } | ||
4089 | |||
4090 | static void vmx_cpuid_update(struct kvm_vcpu *vcpu) | ||
3963 | { | 4091 | { |
3964 | return false; | 4092 | struct kvm_cpuid_entry2 *best; |
4093 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
4094 | u32 exec_control; | ||
4095 | |||
4096 | vmx->rdtscp_enabled = false; | ||
4097 | if (vmx_rdtscp_supported()) { | ||
4098 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | ||
4099 | if (exec_control & SECONDARY_EXEC_RDTSCP) { | ||
4100 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | ||
4101 | if (best && (best->edx & bit(X86_FEATURE_RDTSCP))) | ||
4102 | vmx->rdtscp_enabled = true; | ||
4103 | else { | ||
4104 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | ||
4105 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, | ||
4106 | exec_control); | ||
4107 | } | ||
4108 | } | ||
4109 | } | ||
3965 | } | 4110 | } |
3966 | 4111 | ||
3967 | static struct kvm_x86_ops vmx_x86_ops = { | 4112 | static struct kvm_x86_ops vmx_x86_ops = { |
@@ -3990,6 +4135,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3990 | .set_segment = vmx_set_segment, | 4135 | .set_segment = vmx_set_segment, |
3991 | .get_cpl = vmx_get_cpl, | 4136 | .get_cpl = vmx_get_cpl, |
3992 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, | 4137 | .get_cs_db_l_bits = vmx_get_cs_db_l_bits, |
4138 | .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, | ||
3993 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, | 4139 | .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, |
3994 | .set_cr0 = vmx_set_cr0, | 4140 | .set_cr0 = vmx_set_cr0, |
3995 | .set_cr3 = vmx_set_cr3, | 4141 | .set_cr3 = vmx_set_cr3, |
@@ -4002,6 +4148,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4002 | .cache_reg = vmx_cache_reg, | 4148 | .cache_reg = vmx_cache_reg, |
4003 | .get_rflags = vmx_get_rflags, | 4149 | .get_rflags = vmx_get_rflags, |
4004 | .set_rflags = vmx_set_rflags, | 4150 | .set_rflags = vmx_set_rflags, |
4151 | .fpu_activate = vmx_fpu_activate, | ||
4152 | .fpu_deactivate = vmx_fpu_deactivate, | ||
4005 | 4153 | ||
4006 | .tlb_flush = vmx_flush_tlb, | 4154 | .tlb_flush = vmx_flush_tlb, |
4007 | 4155 | ||
@@ -4027,7 +4175,11 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
4027 | .get_mt_mask = vmx_get_mt_mask, | 4175 | .get_mt_mask = vmx_get_mt_mask, |
4028 | 4176 | ||
4029 | .exit_reasons_str = vmx_exit_reasons_str, | 4177 | .exit_reasons_str = vmx_exit_reasons_str, |
4030 | .gb_page_enable = vmx_gb_page_enable, | 4178 | .get_lpage_level = vmx_get_lpage_level, |
4179 | |||
4180 | .cpuid_update = vmx_cpuid_update, | ||
4181 | |||
4182 | .rdtscp_supported = vmx_rdtscp_supported, | ||
4031 | }; | 4183 | }; |
4032 | 4184 | ||
4033 | static int __init vmx_init(void) | 4185 | static int __init vmx_init(void) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a1e1bc9d412d..e46282a56565 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/intel-iommu.h> | 38 | #include <linux/intel-iommu.h> |
39 | #include <linux/cpufreq.h> | 39 | #include <linux/cpufreq.h> |
40 | #include <linux/user-return-notifier.h> | 40 | #include <linux/user-return-notifier.h> |
41 | #include <linux/srcu.h> | ||
41 | #include <trace/events/kvm.h> | 42 | #include <trace/events/kvm.h> |
42 | #undef TRACE_INCLUDE_FILE | 43 | #undef TRACE_INCLUDE_FILE |
43 | #define CREATE_TRACE_POINTS | 44 | #define CREATE_TRACE_POINTS |
@@ -93,16 +94,16 @@ module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); | |||
93 | 94 | ||
94 | struct kvm_shared_msrs_global { | 95 | struct kvm_shared_msrs_global { |
95 | int nr; | 96 | int nr; |
96 | struct kvm_shared_msr { | 97 | u32 msrs[KVM_NR_SHARED_MSRS]; |
97 | u32 msr; | ||
98 | u64 value; | ||
99 | } msrs[KVM_NR_SHARED_MSRS]; | ||
100 | }; | 98 | }; |
101 | 99 | ||
102 | struct kvm_shared_msrs { | 100 | struct kvm_shared_msrs { |
103 | struct user_return_notifier urn; | 101 | struct user_return_notifier urn; |
104 | bool registered; | 102 | bool registered; |
105 | u64 current_value[KVM_NR_SHARED_MSRS]; | 103 | struct kvm_shared_msr_values { |
104 | u64 host; | ||
105 | u64 curr; | ||
106 | } values[KVM_NR_SHARED_MSRS]; | ||
106 | }; | 107 | }; |
107 | 108 | ||
108 | static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; | 109 | static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; |
@@ -147,53 +148,64 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
147 | static void kvm_on_user_return(struct user_return_notifier *urn) | 148 | static void kvm_on_user_return(struct user_return_notifier *urn) |
148 | { | 149 | { |
149 | unsigned slot; | 150 | unsigned slot; |
150 | struct kvm_shared_msr *global; | ||
151 | struct kvm_shared_msrs *locals | 151 | struct kvm_shared_msrs *locals |
152 | = container_of(urn, struct kvm_shared_msrs, urn); | 152 | = container_of(urn, struct kvm_shared_msrs, urn); |
153 | struct kvm_shared_msr_values *values; | ||
153 | 154 | ||
154 | for (slot = 0; slot < shared_msrs_global.nr; ++slot) { | 155 | for (slot = 0; slot < shared_msrs_global.nr; ++slot) { |
155 | global = &shared_msrs_global.msrs[slot]; | 156 | values = &locals->values[slot]; |
156 | if (global->value != locals->current_value[slot]) { | 157 | if (values->host != values->curr) { |
157 | wrmsrl(global->msr, global->value); | 158 | wrmsrl(shared_msrs_global.msrs[slot], values->host); |
158 | locals->current_value[slot] = global->value; | 159 | values->curr = values->host; |
159 | } | 160 | } |
160 | } | 161 | } |
161 | locals->registered = false; | 162 | locals->registered = false; |
162 | user_return_notifier_unregister(urn); | 163 | user_return_notifier_unregister(urn); |
163 | } | 164 | } |
164 | 165 | ||
165 | void kvm_define_shared_msr(unsigned slot, u32 msr) | 166 | static void shared_msr_update(unsigned slot, u32 msr) |
166 | { | 167 | { |
167 | int cpu; | 168 | struct kvm_shared_msrs *smsr; |
168 | u64 value; | 169 | u64 value; |
169 | 170 | ||
171 | smsr = &__get_cpu_var(shared_msrs); | ||
172 | /* only read, and nobody should modify it at this time, | ||
173 | * so don't need lock */ | ||
174 | if (slot >= shared_msrs_global.nr) { | ||
175 | printk(KERN_ERR "kvm: invalid MSR slot!"); | ||
176 | return; | ||
177 | } | ||
178 | rdmsrl_safe(msr, &value); | ||
179 | smsr->values[slot].host = value; | ||
180 | smsr->values[slot].curr = value; | ||
181 | } | ||
182 | |||
183 | void kvm_define_shared_msr(unsigned slot, u32 msr) | ||
184 | { | ||
170 | if (slot >= shared_msrs_global.nr) | 185 | if (slot >= shared_msrs_global.nr) |
171 | shared_msrs_global.nr = slot + 1; | 186 | shared_msrs_global.nr = slot + 1; |
172 | shared_msrs_global.msrs[slot].msr = msr; | 187 | shared_msrs_global.msrs[slot] = msr; |
173 | rdmsrl_safe(msr, &value); | 188 | /* we need ensured the shared_msr_global have been updated */ |
174 | shared_msrs_global.msrs[slot].value = value; | 189 | smp_wmb(); |
175 | for_each_online_cpu(cpu) | ||
176 | per_cpu(shared_msrs, cpu).current_value[slot] = value; | ||
177 | } | 190 | } |
178 | EXPORT_SYMBOL_GPL(kvm_define_shared_msr); | 191 | EXPORT_SYMBOL_GPL(kvm_define_shared_msr); |
179 | 192 | ||
180 | static void kvm_shared_msr_cpu_online(void) | 193 | static void kvm_shared_msr_cpu_online(void) |
181 | { | 194 | { |
182 | unsigned i; | 195 | unsigned i; |
183 | struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs); | ||
184 | 196 | ||
185 | for (i = 0; i < shared_msrs_global.nr; ++i) | 197 | for (i = 0; i < shared_msrs_global.nr; ++i) |
186 | locals->current_value[i] = shared_msrs_global.msrs[i].value; | 198 | shared_msr_update(i, shared_msrs_global.msrs[i]); |
187 | } | 199 | } |
188 | 200 | ||
189 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) | 201 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) |
190 | { | 202 | { |
191 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); | 203 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); |
192 | 204 | ||
193 | if (((value ^ smsr->current_value[slot]) & mask) == 0) | 205 | if (((value ^ smsr->values[slot].curr) & mask) == 0) |
194 | return; | 206 | return; |
195 | smsr->current_value[slot] = value; | 207 | smsr->values[slot].curr = value; |
196 | wrmsrl(shared_msrs_global.msrs[slot].msr, value); | 208 | wrmsrl(shared_msrs_global.msrs[slot], value); |
197 | if (!smsr->registered) { | 209 | if (!smsr->registered) { |
198 | smsr->urn.on_user_return = kvm_on_user_return; | 210 | smsr->urn.on_user_return = kvm_on_user_return; |
199 | user_return_notifier_register(&smsr->urn); | 211 | user_return_notifier_register(&smsr->urn); |
@@ -257,12 +269,68 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) | |||
257 | } | 269 | } |
258 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); | 270 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); |
259 | 271 | ||
272 | #define EXCPT_BENIGN 0 | ||
273 | #define EXCPT_CONTRIBUTORY 1 | ||
274 | #define EXCPT_PF 2 | ||
275 | |||
276 | static int exception_class(int vector) | ||
277 | { | ||
278 | switch (vector) { | ||
279 | case PF_VECTOR: | ||
280 | return EXCPT_PF; | ||
281 | case DE_VECTOR: | ||
282 | case TS_VECTOR: | ||
283 | case NP_VECTOR: | ||
284 | case SS_VECTOR: | ||
285 | case GP_VECTOR: | ||
286 | return EXCPT_CONTRIBUTORY; | ||
287 | default: | ||
288 | break; | ||
289 | } | ||
290 | return EXCPT_BENIGN; | ||
291 | } | ||
292 | |||
293 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | ||
294 | unsigned nr, bool has_error, u32 error_code) | ||
295 | { | ||
296 | u32 prev_nr; | ||
297 | int class1, class2; | ||
298 | |||
299 | if (!vcpu->arch.exception.pending) { | ||
300 | queue: | ||
301 | vcpu->arch.exception.pending = true; | ||
302 | vcpu->arch.exception.has_error_code = has_error; | ||
303 | vcpu->arch.exception.nr = nr; | ||
304 | vcpu->arch.exception.error_code = error_code; | ||
305 | return; | ||
306 | } | ||
307 | |||
308 | /* to check exception */ | ||
309 | prev_nr = vcpu->arch.exception.nr; | ||
310 | if (prev_nr == DF_VECTOR) { | ||
311 | /* triple fault -> shutdown */ | ||
312 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | ||
313 | return; | ||
314 | } | ||
315 | class1 = exception_class(prev_nr); | ||
316 | class2 = exception_class(nr); | ||
317 | if ((class1 == EXCPT_CONTRIBUTORY && class2 == EXCPT_CONTRIBUTORY) | ||
318 | || (class1 == EXCPT_PF && class2 != EXCPT_BENIGN)) { | ||
319 | /* generate double fault per SDM Table 5-5 */ | ||
320 | vcpu->arch.exception.pending = true; | ||
321 | vcpu->arch.exception.has_error_code = true; | ||
322 | vcpu->arch.exception.nr = DF_VECTOR; | ||
323 | vcpu->arch.exception.error_code = 0; | ||
324 | } else | ||
325 | /* replace previous exception with a new one in a hope | ||
326 | that instruction re-execution will regenerate lost | ||
327 | exception */ | ||
328 | goto queue; | ||
329 | } | ||
330 | |||
260 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) | 331 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) |
261 | { | 332 | { |
262 | WARN_ON(vcpu->arch.exception.pending); | 333 | kvm_multiple_exception(vcpu, nr, false, 0); |
263 | vcpu->arch.exception.pending = true; | ||
264 | vcpu->arch.exception.has_error_code = false; | ||
265 | vcpu->arch.exception.nr = nr; | ||
266 | } | 334 | } |
267 | EXPORT_SYMBOL_GPL(kvm_queue_exception); | 335 | EXPORT_SYMBOL_GPL(kvm_queue_exception); |
268 | 336 | ||
@@ -270,25 +338,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, | |||
270 | u32 error_code) | 338 | u32 error_code) |
271 | { | 339 | { |
272 | ++vcpu->stat.pf_guest; | 340 | ++vcpu->stat.pf_guest; |
273 | |||
274 | if (vcpu->arch.exception.pending) { | ||
275 | switch(vcpu->arch.exception.nr) { | ||
276 | case DF_VECTOR: | ||
277 | /* triple fault -> shutdown */ | ||
278 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | ||
279 | return; | ||
280 | case PF_VECTOR: | ||
281 | vcpu->arch.exception.nr = DF_VECTOR; | ||
282 | vcpu->arch.exception.error_code = 0; | ||
283 | return; | ||
284 | default: | ||
285 | /* replace previous exception with a new one in a hope | ||
286 | that instruction re-execution will regenerate lost | ||
287 | exception */ | ||
288 | vcpu->arch.exception.pending = false; | ||
289 | break; | ||
290 | } | ||
291 | } | ||
292 | vcpu->arch.cr2 = addr; | 341 | vcpu->arch.cr2 = addr; |
293 | kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); | 342 | kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); |
294 | } | 343 | } |
@@ -301,11 +350,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi); | |||
301 | 350 | ||
302 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | 351 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) |
303 | { | 352 | { |
304 | WARN_ON(vcpu->arch.exception.pending); | 353 | kvm_multiple_exception(vcpu, nr, true, error_code); |
305 | vcpu->arch.exception.pending = true; | ||
306 | vcpu->arch.exception.has_error_code = true; | ||
307 | vcpu->arch.exception.nr = nr; | ||
308 | vcpu->arch.exception.error_code = error_code; | ||
309 | } | 354 | } |
310 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); | 355 | EXPORT_SYMBOL_GPL(kvm_queue_exception_e); |
311 | 356 | ||
@@ -383,12 +428,18 @@ out: | |||
383 | 428 | ||
384 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | 429 | void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) |
385 | { | 430 | { |
386 | if (cr0 & CR0_RESERVED_BITS) { | 431 | cr0 |= X86_CR0_ET; |
432 | |||
433 | #ifdef CONFIG_X86_64 | ||
434 | if (cr0 & 0xffffffff00000000UL) { | ||
387 | printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", | 435 | printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", |
388 | cr0, vcpu->arch.cr0); | 436 | cr0, kvm_read_cr0(vcpu)); |
389 | kvm_inject_gp(vcpu, 0); | 437 | kvm_inject_gp(vcpu, 0); |
390 | return; | 438 | return; |
391 | } | 439 | } |
440 | #endif | ||
441 | |||
442 | cr0 &= ~CR0_RESERVED_BITS; | ||
392 | 443 | ||
393 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { | 444 | if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { |
394 | printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); | 445 | printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); |
@@ -405,7 +456,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
405 | 456 | ||
406 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { | 457 | if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { |
407 | #ifdef CONFIG_X86_64 | 458 | #ifdef CONFIG_X86_64 |
408 | if ((vcpu->arch.shadow_efer & EFER_LME)) { | 459 | if ((vcpu->arch.efer & EFER_LME)) { |
409 | int cs_db, cs_l; | 460 | int cs_db, cs_l; |
410 | 461 | ||
411 | if (!is_pae(vcpu)) { | 462 | if (!is_pae(vcpu)) { |
@@ -443,13 +494,13 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0); | |||
443 | 494 | ||
444 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) | 495 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) |
445 | { | 496 | { |
446 | kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)); | 497 | kvm_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~0x0ful) | (msw & 0x0f)); |
447 | } | 498 | } |
448 | EXPORT_SYMBOL_GPL(kvm_lmsw); | 499 | EXPORT_SYMBOL_GPL(kvm_lmsw); |
449 | 500 | ||
450 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 501 | void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
451 | { | 502 | { |
452 | unsigned long old_cr4 = vcpu->arch.cr4; | 503 | unsigned long old_cr4 = kvm_read_cr4(vcpu); |
453 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; | 504 | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; |
454 | 505 | ||
455 | if (cr4 & CR4_RESERVED_BITS) { | 506 | if (cr4 & CR4_RESERVED_BITS) { |
@@ -575,9 +626,11 @@ static inline u32 bit(int bitno) | |||
575 | * kvm-specific. Those are put in the beginning of the list. | 626 | * kvm-specific. Those are put in the beginning of the list. |
576 | */ | 627 | */ |
577 | 628 | ||
578 | #define KVM_SAVE_MSRS_BEGIN 2 | 629 | #define KVM_SAVE_MSRS_BEGIN 5 |
579 | static u32 msrs_to_save[] = { | 630 | static u32 msrs_to_save[] = { |
580 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 631 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
632 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | ||
633 | HV_X64_MSR_APIC_ASSIST_PAGE, | ||
581 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 634 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
582 | MSR_K6_STAR, | 635 | MSR_K6_STAR, |
583 | #ifdef CONFIG_X86_64 | 636 | #ifdef CONFIG_X86_64 |
@@ -602,7 +655,7 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
602 | } | 655 | } |
603 | 656 | ||
604 | if (is_paging(vcpu) | 657 | if (is_paging(vcpu) |
605 | && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) { | 658 | && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { |
606 | printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); | 659 | printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); |
607 | kvm_inject_gp(vcpu, 0); | 660 | kvm_inject_gp(vcpu, 0); |
608 | return; | 661 | return; |
@@ -633,9 +686,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
633 | kvm_x86_ops->set_efer(vcpu, efer); | 686 | kvm_x86_ops->set_efer(vcpu, efer); |
634 | 687 | ||
635 | efer &= ~EFER_LMA; | 688 | efer &= ~EFER_LMA; |
636 | efer |= vcpu->arch.shadow_efer & EFER_LMA; | 689 | efer |= vcpu->arch.efer & EFER_LMA; |
637 | 690 | ||
638 | vcpu->arch.shadow_efer = efer; | 691 | vcpu->arch.efer = efer; |
639 | 692 | ||
640 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | 693 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; |
641 | kvm_mmu_reset_context(vcpu); | 694 | kvm_mmu_reset_context(vcpu); |
@@ -957,6 +1010,100 @@ out: | |||
957 | return r; | 1010 | return r; |
958 | } | 1011 | } |
959 | 1012 | ||
1013 | static bool kvm_hv_hypercall_enabled(struct kvm *kvm) | ||
1014 | { | ||
1015 | return kvm->arch.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; | ||
1016 | } | ||
1017 | |||
1018 | static bool kvm_hv_msr_partition_wide(u32 msr) | ||
1019 | { | ||
1020 | bool r = false; | ||
1021 | switch (msr) { | ||
1022 | case HV_X64_MSR_GUEST_OS_ID: | ||
1023 | case HV_X64_MSR_HYPERCALL: | ||
1024 | r = true; | ||
1025 | break; | ||
1026 | } | ||
1027 | |||
1028 | return r; | ||
1029 | } | ||
1030 | |||
1031 | static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
1032 | { | ||
1033 | struct kvm *kvm = vcpu->kvm; | ||
1034 | |||
1035 | switch (msr) { | ||
1036 | case HV_X64_MSR_GUEST_OS_ID: | ||
1037 | kvm->arch.hv_guest_os_id = data; | ||
1038 | /* setting guest os id to zero disables hypercall page */ | ||
1039 | if (!kvm->arch.hv_guest_os_id) | ||
1040 | kvm->arch.hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; | ||
1041 | break; | ||
1042 | case HV_X64_MSR_HYPERCALL: { | ||
1043 | u64 gfn; | ||
1044 | unsigned long addr; | ||
1045 | u8 instructions[4]; | ||
1046 | |||
1047 | /* if guest os id is not set hypercall should remain disabled */ | ||
1048 | if (!kvm->arch.hv_guest_os_id) | ||
1049 | break; | ||
1050 | if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { | ||
1051 | kvm->arch.hv_hypercall = data; | ||
1052 | break; | ||
1053 | } | ||
1054 | gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; | ||
1055 | addr = gfn_to_hva(kvm, gfn); | ||
1056 | if (kvm_is_error_hva(addr)) | ||
1057 | return 1; | ||
1058 | kvm_x86_ops->patch_hypercall(vcpu, instructions); | ||
1059 | ((unsigned char *)instructions)[3] = 0xc3; /* ret */ | ||
1060 | if (copy_to_user((void __user *)addr, instructions, 4)) | ||
1061 | return 1; | ||
1062 | kvm->arch.hv_hypercall = data; | ||
1063 | break; | ||
1064 | } | ||
1065 | default: | ||
1066 | pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " | ||
1067 | "data 0x%llx\n", msr, data); | ||
1068 | return 1; | ||
1069 | } | ||
1070 | return 0; | ||
1071 | } | ||
1072 | |||
1073 | static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
1074 | { | ||
1075 | switch (msr) { | ||
1076 | case HV_X64_MSR_APIC_ASSIST_PAGE: { | ||
1077 | unsigned long addr; | ||
1078 | |||
1079 | if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { | ||
1080 | vcpu->arch.hv_vapic = data; | ||
1081 | break; | ||
1082 | } | ||
1083 | addr = gfn_to_hva(vcpu->kvm, data >> | ||
1084 | HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); | ||
1085 | if (kvm_is_error_hva(addr)) | ||
1086 | return 1; | ||
1087 | if (clear_user((void __user *)addr, PAGE_SIZE)) | ||
1088 | return 1; | ||
1089 | vcpu->arch.hv_vapic = data; | ||
1090 | break; | ||
1091 | } | ||
1092 | case HV_X64_MSR_EOI: | ||
1093 | return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); | ||
1094 | case HV_X64_MSR_ICR: | ||
1095 | return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); | ||
1096 | case HV_X64_MSR_TPR: | ||
1097 | return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); | ||
1098 | default: | ||
1099 | pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " | ||
1100 | "data 0x%llx\n", msr, data); | ||
1101 | return 1; | ||
1102 | } | ||
1103 | |||
1104 | return 0; | ||
1105 | } | ||
1106 | |||
960 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1107 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
961 | { | 1108 | { |
962 | switch (msr) { | 1109 | switch (msr) { |
@@ -1071,6 +1218,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1071 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " | 1218 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: " |
1072 | "0x%x data 0x%llx\n", msr, data); | 1219 | "0x%x data 0x%llx\n", msr, data); |
1073 | break; | 1220 | break; |
1221 | case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: | ||
1222 | if (kvm_hv_msr_partition_wide(msr)) { | ||
1223 | int r; | ||
1224 | mutex_lock(&vcpu->kvm->lock); | ||
1225 | r = set_msr_hyperv_pw(vcpu, msr, data); | ||
1226 | mutex_unlock(&vcpu->kvm->lock); | ||
1227 | return r; | ||
1228 | } else | ||
1229 | return set_msr_hyperv(vcpu, msr, data); | ||
1230 | break; | ||
1074 | default: | 1231 | default: |
1075 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 1232 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
1076 | return xen_hvm_config(vcpu, data); | 1233 | return xen_hvm_config(vcpu, data); |
@@ -1170,6 +1327,54 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1170 | return 0; | 1327 | return 0; |
1171 | } | 1328 | } |
1172 | 1329 | ||
1330 | static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | ||
1331 | { | ||
1332 | u64 data = 0; | ||
1333 | struct kvm *kvm = vcpu->kvm; | ||
1334 | |||
1335 | switch (msr) { | ||
1336 | case HV_X64_MSR_GUEST_OS_ID: | ||
1337 | data = kvm->arch.hv_guest_os_id; | ||
1338 | break; | ||
1339 | case HV_X64_MSR_HYPERCALL: | ||
1340 | data = kvm->arch.hv_hypercall; | ||
1341 | break; | ||
1342 | default: | ||
1343 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | ||
1344 | return 1; | ||
1345 | } | ||
1346 | |||
1347 | *pdata = data; | ||
1348 | return 0; | ||
1349 | } | ||
1350 | |||
1351 | static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | ||
1352 | { | ||
1353 | u64 data = 0; | ||
1354 | |||
1355 | switch (msr) { | ||
1356 | case HV_X64_MSR_VP_INDEX: { | ||
1357 | int r; | ||
1358 | struct kvm_vcpu *v; | ||
1359 | kvm_for_each_vcpu(r, v, vcpu->kvm) | ||
1360 | if (v == vcpu) | ||
1361 | data = r; | ||
1362 | break; | ||
1363 | } | ||
1364 | case HV_X64_MSR_EOI: | ||
1365 | return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); | ||
1366 | case HV_X64_MSR_ICR: | ||
1367 | return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); | ||
1368 | case HV_X64_MSR_TPR: | ||
1369 | return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); | ||
1370 | default: | ||
1371 | pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | ||
1372 | return 1; | ||
1373 | } | ||
1374 | *pdata = data; | ||
1375 | return 0; | ||
1376 | } | ||
1377 | |||
1173 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | 1378 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) |
1174 | { | 1379 | { |
1175 | u64 data; | 1380 | u64 data; |
@@ -1221,7 +1426,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1221 | data |= (((uint64_t)4ULL) << 40); | 1426 | data |= (((uint64_t)4ULL) << 40); |
1222 | break; | 1427 | break; |
1223 | case MSR_EFER: | 1428 | case MSR_EFER: |
1224 | data = vcpu->arch.shadow_efer; | 1429 | data = vcpu->arch.efer; |
1225 | break; | 1430 | break; |
1226 | case MSR_KVM_WALL_CLOCK: | 1431 | case MSR_KVM_WALL_CLOCK: |
1227 | data = vcpu->kvm->arch.wall_clock; | 1432 | data = vcpu->kvm->arch.wall_clock; |
@@ -1236,6 +1441,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1236 | case MSR_IA32_MCG_STATUS: | 1441 | case MSR_IA32_MCG_STATUS: |
1237 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: | 1442 | case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1: |
1238 | return get_msr_mce(vcpu, msr, pdata); | 1443 | return get_msr_mce(vcpu, msr, pdata); |
1444 | case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: | ||
1445 | if (kvm_hv_msr_partition_wide(msr)) { | ||
1446 | int r; | ||
1447 | mutex_lock(&vcpu->kvm->lock); | ||
1448 | r = get_msr_hyperv_pw(vcpu, msr, pdata); | ||
1449 | mutex_unlock(&vcpu->kvm->lock); | ||
1450 | return r; | ||
1451 | } else | ||
1452 | return get_msr_hyperv(vcpu, msr, pdata); | ||
1453 | break; | ||
1239 | default: | 1454 | default: |
1240 | if (!ignore_msrs) { | 1455 | if (!ignore_msrs) { |
1241 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 1456 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
@@ -1261,15 +1476,15 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, | |||
1261 | int (*do_msr)(struct kvm_vcpu *vcpu, | 1476 | int (*do_msr)(struct kvm_vcpu *vcpu, |
1262 | unsigned index, u64 *data)) | 1477 | unsigned index, u64 *data)) |
1263 | { | 1478 | { |
1264 | int i; | 1479 | int i, idx; |
1265 | 1480 | ||
1266 | vcpu_load(vcpu); | 1481 | vcpu_load(vcpu); |
1267 | 1482 | ||
1268 | down_read(&vcpu->kvm->slots_lock); | 1483 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
1269 | for (i = 0; i < msrs->nmsrs; ++i) | 1484 | for (i = 0; i < msrs->nmsrs; ++i) |
1270 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) | 1485 | if (do_msr(vcpu, entries[i].index, &entries[i].data)) |
1271 | break; | 1486 | break; |
1272 | up_read(&vcpu->kvm->slots_lock); | 1487 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
1273 | 1488 | ||
1274 | vcpu_put(vcpu); | 1489 | vcpu_put(vcpu); |
1275 | 1490 | ||
@@ -1351,6 +1566,11 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1351 | case KVM_CAP_XEN_HVM: | 1566 | case KVM_CAP_XEN_HVM: |
1352 | case KVM_CAP_ADJUST_CLOCK: | 1567 | case KVM_CAP_ADJUST_CLOCK: |
1353 | case KVM_CAP_VCPU_EVENTS: | 1568 | case KVM_CAP_VCPU_EVENTS: |
1569 | case KVM_CAP_HYPERV: | ||
1570 | case KVM_CAP_HYPERV_VAPIC: | ||
1571 | case KVM_CAP_HYPERV_SPIN: | ||
1572 | case KVM_CAP_PCI_SEGMENT: | ||
1573 | case KVM_CAP_X86_ROBUST_SINGLESTEP: | ||
1354 | r = 1; | 1574 | r = 1; |
1355 | break; | 1575 | break; |
1356 | case KVM_CAP_COALESCED_MMIO: | 1576 | case KVM_CAP_COALESCED_MMIO: |
@@ -1464,8 +1684,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
1464 | 1684 | ||
1465 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 1685 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
1466 | { | 1686 | { |
1467 | kvm_x86_ops->vcpu_put(vcpu); | ||
1468 | kvm_put_guest_fpu(vcpu); | 1687 | kvm_put_guest_fpu(vcpu); |
1688 | kvm_x86_ops->vcpu_put(vcpu); | ||
1469 | } | 1689 | } |
1470 | 1690 | ||
1471 | static int is_efer_nx(void) | 1691 | static int is_efer_nx(void) |
@@ -1530,6 +1750,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
1530 | cpuid_fix_nx_cap(vcpu); | 1750 | cpuid_fix_nx_cap(vcpu); |
1531 | r = 0; | 1751 | r = 0; |
1532 | kvm_apic_set_version(vcpu); | 1752 | kvm_apic_set_version(vcpu); |
1753 | kvm_x86_ops->cpuid_update(vcpu); | ||
1533 | 1754 | ||
1534 | out_free: | 1755 | out_free: |
1535 | vfree(cpuid_entries); | 1756 | vfree(cpuid_entries); |
@@ -1552,6 +1773,7 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
1552 | goto out; | 1773 | goto out; |
1553 | vcpu->arch.cpuid_nent = cpuid->nent; | 1774 | vcpu->arch.cpuid_nent = cpuid->nent; |
1554 | kvm_apic_set_version(vcpu); | 1775 | kvm_apic_set_version(vcpu); |
1776 | kvm_x86_ops->cpuid_update(vcpu); | ||
1555 | return 0; | 1777 | return 0; |
1556 | 1778 | ||
1557 | out: | 1779 | out: |
@@ -1594,12 +1816,15 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1594 | u32 index, int *nent, int maxnent) | 1816 | u32 index, int *nent, int maxnent) |
1595 | { | 1817 | { |
1596 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; | 1818 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; |
1597 | unsigned f_gbpages = kvm_x86_ops->gb_page_enable() ? F(GBPAGES) : 0; | ||
1598 | #ifdef CONFIG_X86_64 | 1819 | #ifdef CONFIG_X86_64 |
1820 | unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) | ||
1821 | ? F(GBPAGES) : 0; | ||
1599 | unsigned f_lm = F(LM); | 1822 | unsigned f_lm = F(LM); |
1600 | #else | 1823 | #else |
1824 | unsigned f_gbpages = 0; | ||
1601 | unsigned f_lm = 0; | 1825 | unsigned f_lm = 0; |
1602 | #endif | 1826 | #endif |
1827 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | ||
1603 | 1828 | ||
1604 | /* cpuid 1.edx */ | 1829 | /* cpuid 1.edx */ |
1605 | const u32 kvm_supported_word0_x86_features = | 1830 | const u32 kvm_supported_word0_x86_features = |
@@ -1619,7 +1844,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1619 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | 1844 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | |
1620 | F(PAT) | F(PSE36) | 0 /* Reserved */ | | 1845 | F(PAT) | F(PSE36) | 0 /* Reserved */ | |
1621 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | | 1846 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | |
1622 | F(FXSR) | F(FXSR_OPT) | f_gbpages | 0 /* RDTSCP */ | | 1847 | F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | |
1623 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | 1848 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); |
1624 | /* cpuid 1.ecx */ | 1849 | /* cpuid 1.ecx */ |
1625 | const u32 kvm_supported_word4_x86_features = | 1850 | const u32 kvm_supported_word4_x86_features = |
@@ -1866,7 +2091,7 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
1866 | return 0; | 2091 | return 0; |
1867 | if (mce->status & MCI_STATUS_UC) { | 2092 | if (mce->status & MCI_STATUS_UC) { |
1868 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || | 2093 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || |
1869 | !(vcpu->arch.cr4 & X86_CR4_MCE)) { | 2094 | !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { |
1870 | printk(KERN_DEBUG "kvm: set_mce: " | 2095 | printk(KERN_DEBUG "kvm: set_mce: " |
1871 | "injects mce exception while " | 2096 | "injects mce exception while " |
1872 | "previous one is in progress!\n"); | 2097 | "previous one is in progress!\n"); |
@@ -2160,14 +2385,14 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, | |||
2160 | if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) | 2385 | if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) |
2161 | return -EINVAL; | 2386 | return -EINVAL; |
2162 | 2387 | ||
2163 | down_write(&kvm->slots_lock); | 2388 | mutex_lock(&kvm->slots_lock); |
2164 | spin_lock(&kvm->mmu_lock); | 2389 | spin_lock(&kvm->mmu_lock); |
2165 | 2390 | ||
2166 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); | 2391 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); |
2167 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; | 2392 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; |
2168 | 2393 | ||
2169 | spin_unlock(&kvm->mmu_lock); | 2394 | spin_unlock(&kvm->mmu_lock); |
2170 | up_write(&kvm->slots_lock); | 2395 | mutex_unlock(&kvm->slots_lock); |
2171 | return 0; | 2396 | return 0; |
2172 | } | 2397 | } |
2173 | 2398 | ||
@@ -2176,13 +2401,35 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) | |||
2176 | return kvm->arch.n_alloc_mmu_pages; | 2401 | return kvm->arch.n_alloc_mmu_pages; |
2177 | } | 2402 | } |
2178 | 2403 | ||
2404 | gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn) | ||
2405 | { | ||
2406 | int i; | ||
2407 | struct kvm_mem_alias *alias; | ||
2408 | struct kvm_mem_aliases *aliases; | ||
2409 | |||
2410 | aliases = rcu_dereference(kvm->arch.aliases); | ||
2411 | |||
2412 | for (i = 0; i < aliases->naliases; ++i) { | ||
2413 | alias = &aliases->aliases[i]; | ||
2414 | if (alias->flags & KVM_ALIAS_INVALID) | ||
2415 | continue; | ||
2416 | if (gfn >= alias->base_gfn | ||
2417 | && gfn < alias->base_gfn + alias->npages) | ||
2418 | return alias->target_gfn + gfn - alias->base_gfn; | ||
2419 | } | ||
2420 | return gfn; | ||
2421 | } | ||
2422 | |||
2179 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | 2423 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) |
2180 | { | 2424 | { |
2181 | int i; | 2425 | int i; |
2182 | struct kvm_mem_alias *alias; | 2426 | struct kvm_mem_alias *alias; |
2427 | struct kvm_mem_aliases *aliases; | ||
2183 | 2428 | ||
2184 | for (i = 0; i < kvm->arch.naliases; ++i) { | 2429 | aliases = rcu_dereference(kvm->arch.aliases); |
2185 | alias = &kvm->arch.aliases[i]; | 2430 | |
2431 | for (i = 0; i < aliases->naliases; ++i) { | ||
2432 | alias = &aliases->aliases[i]; | ||
2186 | if (gfn >= alias->base_gfn | 2433 | if (gfn >= alias->base_gfn |
2187 | && gfn < alias->base_gfn + alias->npages) | 2434 | && gfn < alias->base_gfn + alias->npages) |
2188 | return alias->target_gfn + gfn - alias->base_gfn; | 2435 | return alias->target_gfn + gfn - alias->base_gfn; |
@@ -2200,6 +2447,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
2200 | { | 2447 | { |
2201 | int r, n; | 2448 | int r, n; |
2202 | struct kvm_mem_alias *p; | 2449 | struct kvm_mem_alias *p; |
2450 | struct kvm_mem_aliases *aliases, *old_aliases; | ||
2203 | 2451 | ||
2204 | r = -EINVAL; | 2452 | r = -EINVAL; |
2205 | /* General sanity checks */ | 2453 | /* General sanity checks */ |
@@ -2216,26 +2464,48 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
2216 | < alias->target_phys_addr) | 2464 | < alias->target_phys_addr) |
2217 | goto out; | 2465 | goto out; |
2218 | 2466 | ||
2219 | down_write(&kvm->slots_lock); | 2467 | r = -ENOMEM; |
2220 | spin_lock(&kvm->mmu_lock); | 2468 | aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); |
2469 | if (!aliases) | ||
2470 | goto out; | ||
2471 | |||
2472 | mutex_lock(&kvm->slots_lock); | ||
2221 | 2473 | ||
2222 | p = &kvm->arch.aliases[alias->slot]; | 2474 | /* invalidate any gfn reference in case of deletion/shrinking */ |
2475 | memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); | ||
2476 | aliases->aliases[alias->slot].flags |= KVM_ALIAS_INVALID; | ||
2477 | old_aliases = kvm->arch.aliases; | ||
2478 | rcu_assign_pointer(kvm->arch.aliases, aliases); | ||
2479 | synchronize_srcu_expedited(&kvm->srcu); | ||
2480 | kvm_mmu_zap_all(kvm); | ||
2481 | kfree(old_aliases); | ||
2482 | |||
2483 | r = -ENOMEM; | ||
2484 | aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
2485 | if (!aliases) | ||
2486 | goto out_unlock; | ||
2487 | |||
2488 | memcpy(aliases, kvm->arch.aliases, sizeof(struct kvm_mem_aliases)); | ||
2489 | |||
2490 | p = &aliases->aliases[alias->slot]; | ||
2223 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | 2491 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; |
2224 | p->npages = alias->memory_size >> PAGE_SHIFT; | 2492 | p->npages = alias->memory_size >> PAGE_SHIFT; |
2225 | p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; | 2493 | p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT; |
2494 | p->flags &= ~(KVM_ALIAS_INVALID); | ||
2226 | 2495 | ||
2227 | for (n = KVM_ALIAS_SLOTS; n > 0; --n) | 2496 | for (n = KVM_ALIAS_SLOTS; n > 0; --n) |
2228 | if (kvm->arch.aliases[n - 1].npages) | 2497 | if (aliases->aliases[n - 1].npages) |
2229 | break; | 2498 | break; |
2230 | kvm->arch.naliases = n; | 2499 | aliases->naliases = n; |
2231 | 2500 | ||
2232 | spin_unlock(&kvm->mmu_lock); | 2501 | old_aliases = kvm->arch.aliases; |
2233 | kvm_mmu_zap_all(kvm); | 2502 | rcu_assign_pointer(kvm->arch.aliases, aliases); |
2234 | 2503 | synchronize_srcu_expedited(&kvm->srcu); | |
2235 | up_write(&kvm->slots_lock); | 2504 | kfree(old_aliases); |
2236 | 2505 | r = 0; | |
2237 | return 0; | ||
2238 | 2506 | ||
2507 | out_unlock: | ||
2508 | mutex_unlock(&kvm->slots_lock); | ||
2239 | out: | 2509 | out: |
2240 | return r; | 2510 | return r; |
2241 | } | 2511 | } |
@@ -2273,18 +2543,18 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
2273 | r = 0; | 2543 | r = 0; |
2274 | switch (chip->chip_id) { | 2544 | switch (chip->chip_id) { |
2275 | case KVM_IRQCHIP_PIC_MASTER: | 2545 | case KVM_IRQCHIP_PIC_MASTER: |
2276 | spin_lock(&pic_irqchip(kvm)->lock); | 2546 | raw_spin_lock(&pic_irqchip(kvm)->lock); |
2277 | memcpy(&pic_irqchip(kvm)->pics[0], | 2547 | memcpy(&pic_irqchip(kvm)->pics[0], |
2278 | &chip->chip.pic, | 2548 | &chip->chip.pic, |
2279 | sizeof(struct kvm_pic_state)); | 2549 | sizeof(struct kvm_pic_state)); |
2280 | spin_unlock(&pic_irqchip(kvm)->lock); | 2550 | raw_spin_unlock(&pic_irqchip(kvm)->lock); |
2281 | break; | 2551 | break; |
2282 | case KVM_IRQCHIP_PIC_SLAVE: | 2552 | case KVM_IRQCHIP_PIC_SLAVE: |
2283 | spin_lock(&pic_irqchip(kvm)->lock); | 2553 | raw_spin_lock(&pic_irqchip(kvm)->lock); |
2284 | memcpy(&pic_irqchip(kvm)->pics[1], | 2554 | memcpy(&pic_irqchip(kvm)->pics[1], |
2285 | &chip->chip.pic, | 2555 | &chip->chip.pic, |
2286 | sizeof(struct kvm_pic_state)); | 2556 | sizeof(struct kvm_pic_state)); |
2287 | spin_unlock(&pic_irqchip(kvm)->lock); | 2557 | raw_spin_unlock(&pic_irqchip(kvm)->lock); |
2288 | break; | 2558 | break; |
2289 | case KVM_IRQCHIP_IOAPIC: | 2559 | case KVM_IRQCHIP_IOAPIC: |
2290 | r = kvm_set_ioapic(kvm, &chip->chip.ioapic); | 2560 | r = kvm_set_ioapic(kvm, &chip->chip.ioapic); |
@@ -2364,29 +2634,62 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, | |||
2364 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | 2634 | int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, |
2365 | struct kvm_dirty_log *log) | 2635 | struct kvm_dirty_log *log) |
2366 | { | 2636 | { |
2367 | int r; | 2637 | int r, n, i; |
2368 | int n; | ||
2369 | struct kvm_memory_slot *memslot; | 2638 | struct kvm_memory_slot *memslot; |
2370 | int is_dirty = 0; | 2639 | unsigned long is_dirty = 0; |
2640 | unsigned long *dirty_bitmap = NULL; | ||
2371 | 2641 | ||
2372 | down_write(&kvm->slots_lock); | 2642 | mutex_lock(&kvm->slots_lock); |
2373 | 2643 | ||
2374 | r = kvm_get_dirty_log(kvm, log, &is_dirty); | 2644 | r = -EINVAL; |
2375 | if (r) | 2645 | if (log->slot >= KVM_MEMORY_SLOTS) |
2646 | goto out; | ||
2647 | |||
2648 | memslot = &kvm->memslots->memslots[log->slot]; | ||
2649 | r = -ENOENT; | ||
2650 | if (!memslot->dirty_bitmap) | ||
2651 | goto out; | ||
2652 | |||
2653 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; | ||
2654 | |||
2655 | r = -ENOMEM; | ||
2656 | dirty_bitmap = vmalloc(n); | ||
2657 | if (!dirty_bitmap) | ||
2376 | goto out; | 2658 | goto out; |
2659 | memset(dirty_bitmap, 0, n); | ||
2660 | |||
2661 | for (i = 0; !is_dirty && i < n/sizeof(long); i++) | ||
2662 | is_dirty = memslot->dirty_bitmap[i]; | ||
2377 | 2663 | ||
2378 | /* If nothing is dirty, don't bother messing with page tables. */ | 2664 | /* If nothing is dirty, don't bother messing with page tables. */ |
2379 | if (is_dirty) { | 2665 | if (is_dirty) { |
2666 | struct kvm_memslots *slots, *old_slots; | ||
2667 | |||
2380 | spin_lock(&kvm->mmu_lock); | 2668 | spin_lock(&kvm->mmu_lock); |
2381 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | 2669 | kvm_mmu_slot_remove_write_access(kvm, log->slot); |
2382 | spin_unlock(&kvm->mmu_lock); | 2670 | spin_unlock(&kvm->mmu_lock); |
2383 | memslot = &kvm->memslots[log->slot]; | 2671 | |
2384 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; | 2672 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
2385 | memset(memslot->dirty_bitmap, 0, n); | 2673 | if (!slots) |
2674 | goto out_free; | ||
2675 | |||
2676 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | ||
2677 | slots->memslots[log->slot].dirty_bitmap = dirty_bitmap; | ||
2678 | |||
2679 | old_slots = kvm->memslots; | ||
2680 | rcu_assign_pointer(kvm->memslots, slots); | ||
2681 | synchronize_srcu_expedited(&kvm->srcu); | ||
2682 | dirty_bitmap = old_slots->memslots[log->slot].dirty_bitmap; | ||
2683 | kfree(old_slots); | ||
2386 | } | 2684 | } |
2685 | |||
2387 | r = 0; | 2686 | r = 0; |
2687 | if (copy_to_user(log->dirty_bitmap, dirty_bitmap, n)) | ||
2688 | r = -EFAULT; | ||
2689 | out_free: | ||
2690 | vfree(dirty_bitmap); | ||
2388 | out: | 2691 | out: |
2389 | up_write(&kvm->slots_lock); | 2692 | mutex_unlock(&kvm->slots_lock); |
2390 | return r; | 2693 | return r; |
2391 | } | 2694 | } |
2392 | 2695 | ||
@@ -2469,6 +2772,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2469 | if (vpic) { | 2772 | if (vpic) { |
2470 | r = kvm_ioapic_init(kvm); | 2773 | r = kvm_ioapic_init(kvm); |
2471 | if (r) { | 2774 | if (r) { |
2775 | kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, | ||
2776 | &vpic->dev); | ||
2472 | kfree(vpic); | 2777 | kfree(vpic); |
2473 | goto create_irqchip_unlock; | 2778 | goto create_irqchip_unlock; |
2474 | } | 2779 | } |
@@ -2480,10 +2785,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2480 | r = kvm_setup_default_irq_routing(kvm); | 2785 | r = kvm_setup_default_irq_routing(kvm); |
2481 | if (r) { | 2786 | if (r) { |
2482 | mutex_lock(&kvm->irq_lock); | 2787 | mutex_lock(&kvm->irq_lock); |
2483 | kfree(kvm->arch.vpic); | 2788 | kvm_ioapic_destroy(kvm); |
2484 | kfree(kvm->arch.vioapic); | 2789 | kvm_destroy_pic(kvm); |
2485 | kvm->arch.vpic = NULL; | ||
2486 | kvm->arch.vioapic = NULL; | ||
2487 | mutex_unlock(&kvm->irq_lock); | 2790 | mutex_unlock(&kvm->irq_lock); |
2488 | } | 2791 | } |
2489 | create_irqchip_unlock: | 2792 | create_irqchip_unlock: |
@@ -2499,7 +2802,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2499 | sizeof(struct kvm_pit_config))) | 2802 | sizeof(struct kvm_pit_config))) |
2500 | goto out; | 2803 | goto out; |
2501 | create_pit: | 2804 | create_pit: |
2502 | down_write(&kvm->slots_lock); | 2805 | mutex_lock(&kvm->slots_lock); |
2503 | r = -EEXIST; | 2806 | r = -EEXIST; |
2504 | if (kvm->arch.vpit) | 2807 | if (kvm->arch.vpit) |
2505 | goto create_pit_unlock; | 2808 | goto create_pit_unlock; |
@@ -2508,7 +2811,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
2508 | if (kvm->arch.vpit) | 2811 | if (kvm->arch.vpit) |
2509 | r = 0; | 2812 | r = 0; |
2510 | create_pit_unlock: | 2813 | create_pit_unlock: |
2511 | up_write(&kvm->slots_lock); | 2814 | mutex_unlock(&kvm->slots_lock); |
2512 | break; | 2815 | break; |
2513 | case KVM_IRQ_LINE_STATUS: | 2816 | case KVM_IRQ_LINE_STATUS: |
2514 | case KVM_IRQ_LINE: { | 2817 | case KVM_IRQ_LINE: { |
@@ -2725,7 +3028,7 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, | |||
2725 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) | 3028 | !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) |
2726 | return 0; | 3029 | return 0; |
2727 | 3030 | ||
2728 | return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v); | 3031 | return kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); |
2729 | } | 3032 | } |
2730 | 3033 | ||
2731 | static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | 3034 | static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) |
@@ -2734,17 +3037,44 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) | |||
2734 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) | 3037 | !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) |
2735 | return 0; | 3038 | return 0; |
2736 | 3039 | ||
2737 | return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v); | 3040 | return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); |
2738 | } | 3041 | } |
2739 | 3042 | ||
2740 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3043 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) |
2741 | struct kvm_vcpu *vcpu) | 3044 | { |
3045 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3046 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | ||
3047 | } | ||
3048 | |||
3049 | gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | ||
3050 | { | ||
3051 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3052 | access |= PFERR_FETCH_MASK; | ||
3053 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | ||
3054 | } | ||
3055 | |||
3056 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | ||
3057 | { | ||
3058 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3059 | access |= PFERR_WRITE_MASK; | ||
3060 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error); | ||
3061 | } | ||
3062 | |||
3063 | /* uses this to access any guest's mapped memory without checking CPL */ | ||
3064 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) | ||
3065 | { | ||
3066 | return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error); | ||
3067 | } | ||
3068 | |||
3069 | static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, | ||
3070 | struct kvm_vcpu *vcpu, u32 access, | ||
3071 | u32 *error) | ||
2742 | { | 3072 | { |
2743 | void *data = val; | 3073 | void *data = val; |
2744 | int r = X86EMUL_CONTINUE; | 3074 | int r = X86EMUL_CONTINUE; |
2745 | 3075 | ||
2746 | while (bytes) { | 3076 | while (bytes) { |
2747 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3077 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error); |
2748 | unsigned offset = addr & (PAGE_SIZE-1); | 3078 | unsigned offset = addr & (PAGE_SIZE-1); |
2749 | unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); | 3079 | unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); |
2750 | int ret; | 3080 | int ret; |
@@ -2767,14 +3097,37 @@ out: | |||
2767 | return r; | 3097 | return r; |
2768 | } | 3098 | } |
2769 | 3099 | ||
3100 | /* used for instruction fetching */ | ||
3101 | static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes, | ||
3102 | struct kvm_vcpu *vcpu, u32 *error) | ||
3103 | { | ||
3104 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3105 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, | ||
3106 | access | PFERR_FETCH_MASK, error); | ||
3107 | } | ||
3108 | |||
3109 | static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, | ||
3110 | struct kvm_vcpu *vcpu, u32 *error) | ||
3111 | { | ||
3112 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | ||
3113 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, | ||
3114 | error); | ||
3115 | } | ||
3116 | |||
3117 | static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, | ||
3118 | struct kvm_vcpu *vcpu, u32 *error) | ||
3119 | { | ||
3120 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); | ||
3121 | } | ||
3122 | |||
2770 | static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, | 3123 | static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, |
2771 | struct kvm_vcpu *vcpu) | 3124 | struct kvm_vcpu *vcpu, u32 *error) |
2772 | { | 3125 | { |
2773 | void *data = val; | 3126 | void *data = val; |
2774 | int r = X86EMUL_CONTINUE; | 3127 | int r = X86EMUL_CONTINUE; |
2775 | 3128 | ||
2776 | while (bytes) { | 3129 | while (bytes) { |
2777 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3130 | gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); |
2778 | unsigned offset = addr & (PAGE_SIZE-1); | 3131 | unsigned offset = addr & (PAGE_SIZE-1); |
2779 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); | 3132 | unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); |
2780 | int ret; | 3133 | int ret; |
@@ -2804,6 +3157,7 @@ static int emulator_read_emulated(unsigned long addr, | |||
2804 | struct kvm_vcpu *vcpu) | 3157 | struct kvm_vcpu *vcpu) |
2805 | { | 3158 | { |
2806 | gpa_t gpa; | 3159 | gpa_t gpa; |
3160 | u32 error_code; | ||
2807 | 3161 | ||
2808 | if (vcpu->mmio_read_completed) { | 3162 | if (vcpu->mmio_read_completed) { |
2809 | memcpy(val, vcpu->mmio_data, bytes); | 3163 | memcpy(val, vcpu->mmio_data, bytes); |
@@ -2813,17 +3167,20 @@ static int emulator_read_emulated(unsigned long addr, | |||
2813 | return X86EMUL_CONTINUE; | 3167 | return X86EMUL_CONTINUE; |
2814 | } | 3168 | } |
2815 | 3169 | ||
2816 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3170 | gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code); |
3171 | |||
3172 | if (gpa == UNMAPPED_GVA) { | ||
3173 | kvm_inject_page_fault(vcpu, addr, error_code); | ||
3174 | return X86EMUL_PROPAGATE_FAULT; | ||
3175 | } | ||
2817 | 3176 | ||
2818 | /* For APIC access vmexit */ | 3177 | /* For APIC access vmexit */ |
2819 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3178 | if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
2820 | goto mmio; | 3179 | goto mmio; |
2821 | 3180 | ||
2822 | if (kvm_read_guest_virt(addr, val, bytes, vcpu) | 3181 | if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL) |
2823 | == X86EMUL_CONTINUE) | 3182 | == X86EMUL_CONTINUE) |
2824 | return X86EMUL_CONTINUE; | 3183 | return X86EMUL_CONTINUE; |
2825 | if (gpa == UNMAPPED_GVA) | ||
2826 | return X86EMUL_PROPAGATE_FAULT; | ||
2827 | 3184 | ||
2828 | mmio: | 3185 | mmio: |
2829 | /* | 3186 | /* |
@@ -2862,11 +3219,12 @@ static int emulator_write_emulated_onepage(unsigned long addr, | |||
2862 | struct kvm_vcpu *vcpu) | 3219 | struct kvm_vcpu *vcpu) |
2863 | { | 3220 | { |
2864 | gpa_t gpa; | 3221 | gpa_t gpa; |
3222 | u32 error_code; | ||
2865 | 3223 | ||
2866 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3224 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code); |
2867 | 3225 | ||
2868 | if (gpa == UNMAPPED_GVA) { | 3226 | if (gpa == UNMAPPED_GVA) { |
2869 | kvm_inject_page_fault(vcpu, addr, 2); | 3227 | kvm_inject_page_fault(vcpu, addr, error_code); |
2870 | return X86EMUL_PROPAGATE_FAULT; | 3228 | return X86EMUL_PROPAGATE_FAULT; |
2871 | } | 3229 | } |
2872 | 3230 | ||
@@ -2930,7 +3288,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
2930 | char *kaddr; | 3288 | char *kaddr; |
2931 | u64 val; | 3289 | u64 val; |
2932 | 3290 | ||
2933 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); | 3291 | gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); |
2934 | 3292 | ||
2935 | if (gpa == UNMAPPED_GVA || | 3293 | if (gpa == UNMAPPED_GVA || |
2936 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) | 3294 | (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) |
@@ -2967,35 +3325,21 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | |||
2967 | 3325 | ||
2968 | int emulate_clts(struct kvm_vcpu *vcpu) | 3326 | int emulate_clts(struct kvm_vcpu *vcpu) |
2969 | { | 3327 | { |
2970 | kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); | 3328 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); |
3329 | kvm_x86_ops->fpu_activate(vcpu); | ||
2971 | return X86EMUL_CONTINUE; | 3330 | return X86EMUL_CONTINUE; |
2972 | } | 3331 | } |
2973 | 3332 | ||
2974 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) | 3333 | int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) |
2975 | { | 3334 | { |
2976 | struct kvm_vcpu *vcpu = ctxt->vcpu; | 3335 | return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); |
2977 | |||
2978 | switch (dr) { | ||
2979 | case 0 ... 3: | ||
2980 | *dest = kvm_x86_ops->get_dr(vcpu, dr); | ||
2981 | return X86EMUL_CONTINUE; | ||
2982 | default: | ||
2983 | pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr); | ||
2984 | return X86EMUL_UNHANDLEABLE; | ||
2985 | } | ||
2986 | } | 3336 | } |
2987 | 3337 | ||
2988 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | 3338 | int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) |
2989 | { | 3339 | { |
2990 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; | 3340 | unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; |
2991 | int exception; | ||
2992 | 3341 | ||
2993 | kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); | 3342 | return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); |
2994 | if (exception) { | ||
2995 | /* FIXME: better handling */ | ||
2996 | return X86EMUL_UNHANDLEABLE; | ||
2997 | } | ||
2998 | return X86EMUL_CONTINUE; | ||
2999 | } | 3343 | } |
3000 | 3344 | ||
3001 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 3345 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
@@ -3009,7 +3353,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
3009 | 3353 | ||
3010 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); | 3354 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); |
3011 | 3355 | ||
3012 | kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu); | 3356 | kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL); |
3013 | 3357 | ||
3014 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", | 3358 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", |
3015 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); | 3359 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); |
@@ -3017,7 +3361,8 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | |||
3017 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | 3361 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); |
3018 | 3362 | ||
3019 | static struct x86_emulate_ops emulate_ops = { | 3363 | static struct x86_emulate_ops emulate_ops = { |
3020 | .read_std = kvm_read_guest_virt, | 3364 | .read_std = kvm_read_guest_virt_system, |
3365 | .fetch = kvm_fetch_guest_virt, | ||
3021 | .read_emulated = emulator_read_emulated, | 3366 | .read_emulated = emulator_read_emulated, |
3022 | .write_emulated = emulator_write_emulated, | 3367 | .write_emulated = emulator_write_emulated, |
3023 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 3368 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
@@ -3060,8 +3405,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
3060 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 3405 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
3061 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); | 3406 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); |
3062 | vcpu->arch.emulate_ctxt.mode = | 3407 | vcpu->arch.emulate_ctxt.mode = |
3408 | (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : | ||
3063 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | 3409 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) |
3064 | ? X86EMUL_MODE_REAL : cs_l | 3410 | ? X86EMUL_MODE_VM86 : cs_l |
3065 | ? X86EMUL_MODE_PROT64 : cs_db | 3411 | ? X86EMUL_MODE_PROT64 : cs_db |
3066 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 3412 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
3067 | 3413 | ||
@@ -3153,12 +3499,17 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) | |||
3153 | gva_t q = vcpu->arch.pio.guest_gva; | 3499 | gva_t q = vcpu->arch.pio.guest_gva; |
3154 | unsigned bytes; | 3500 | unsigned bytes; |
3155 | int ret; | 3501 | int ret; |
3502 | u32 error_code; | ||
3156 | 3503 | ||
3157 | bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; | 3504 | bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; |
3158 | if (vcpu->arch.pio.in) | 3505 | if (vcpu->arch.pio.in) |
3159 | ret = kvm_write_guest_virt(q, p, bytes, vcpu); | 3506 | ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); |
3160 | else | 3507 | else |
3161 | ret = kvm_read_guest_virt(q, p, bytes, vcpu); | 3508 | ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); |
3509 | |||
3510 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
3511 | kvm_inject_page_fault(vcpu, q, error_code); | ||
3512 | |||
3162 | return ret; | 3513 | return ret; |
3163 | } | 3514 | } |
3164 | 3515 | ||
@@ -3179,7 +3530,7 @@ int complete_pio(struct kvm_vcpu *vcpu) | |||
3179 | if (io->in) { | 3530 | if (io->in) { |
3180 | r = pio_copy_data(vcpu); | 3531 | r = pio_copy_data(vcpu); |
3181 | if (r) | 3532 | if (r) |
3182 | return r; | 3533 | goto out; |
3183 | } | 3534 | } |
3184 | 3535 | ||
3185 | delta = 1; | 3536 | delta = 1; |
@@ -3206,7 +3557,7 @@ int complete_pio(struct kvm_vcpu *vcpu) | |||
3206 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); | 3557 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); |
3207 | } | 3558 | } |
3208 | } | 3559 | } |
3209 | 3560 | out: | |
3210 | io->count -= io->cur_count; | 3561 | io->count -= io->cur_count; |
3211 | io->cur_count = 0; | 3562 | io->cur_count = 0; |
3212 | 3563 | ||
@@ -3219,11 +3570,12 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) | |||
3219 | int r; | 3570 | int r; |
3220 | 3571 | ||
3221 | if (vcpu->arch.pio.in) | 3572 | if (vcpu->arch.pio.in) |
3222 | r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, | 3573 | r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, |
3223 | vcpu->arch.pio.size, pd); | 3574 | vcpu->arch.pio.size, pd); |
3224 | else | 3575 | else |
3225 | r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port, | 3576 | r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, |
3226 | vcpu->arch.pio.size, pd); | 3577 | vcpu->arch.pio.port, vcpu->arch.pio.size, |
3578 | pd); | ||
3227 | return r; | 3579 | return r; |
3228 | } | 3580 | } |
3229 | 3581 | ||
@@ -3234,7 +3586,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu) | |||
3234 | int i, r = 0; | 3586 | int i, r = 0; |
3235 | 3587 | ||
3236 | for (i = 0; i < io->cur_count; i++) { | 3588 | for (i = 0; i < io->cur_count; i++) { |
3237 | if (kvm_io_bus_write(&vcpu->kvm->pio_bus, | 3589 | if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, |
3238 | io->port, io->size, pd)) { | 3590 | io->port, io->size, pd)) { |
3239 | r = -EOPNOTSUPP; | 3591 | r = -EOPNOTSUPP; |
3240 | break; | 3592 | break; |
@@ -3248,6 +3600,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) | |||
3248 | { | 3600 | { |
3249 | unsigned long val; | 3601 | unsigned long val; |
3250 | 3602 | ||
3603 | trace_kvm_pio(!in, port, size, 1); | ||
3604 | |||
3251 | vcpu->run->exit_reason = KVM_EXIT_IO; | 3605 | vcpu->run->exit_reason = KVM_EXIT_IO; |
3252 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | 3606 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
3253 | vcpu->run->io.size = vcpu->arch.pio.size = size; | 3607 | vcpu->run->io.size = vcpu->arch.pio.size = size; |
@@ -3259,11 +3613,10 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) | |||
3259 | vcpu->arch.pio.down = 0; | 3613 | vcpu->arch.pio.down = 0; |
3260 | vcpu->arch.pio.rep = 0; | 3614 | vcpu->arch.pio.rep = 0; |
3261 | 3615 | ||
3262 | trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, | 3616 | if (!vcpu->arch.pio.in) { |
3263 | size, 1); | 3617 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
3264 | 3618 | memcpy(vcpu->arch.pio_data, &val, 4); | |
3265 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | 3619 | } |
3266 | memcpy(vcpu->arch.pio_data, &val, 4); | ||
3267 | 3620 | ||
3268 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { | 3621 | if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { |
3269 | complete_pio(vcpu); | 3622 | complete_pio(vcpu); |
@@ -3280,6 +3633,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | |||
3280 | unsigned now, in_page; | 3633 | unsigned now, in_page; |
3281 | int ret = 0; | 3634 | int ret = 0; |
3282 | 3635 | ||
3636 | trace_kvm_pio(!in, port, size, count); | ||
3637 | |||
3283 | vcpu->run->exit_reason = KVM_EXIT_IO; | 3638 | vcpu->run->exit_reason = KVM_EXIT_IO; |
3284 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | 3639 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
3285 | vcpu->run->io.size = vcpu->arch.pio.size = size; | 3640 | vcpu->run->io.size = vcpu->arch.pio.size = size; |
@@ -3291,9 +3646,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | |||
3291 | vcpu->arch.pio.down = down; | 3646 | vcpu->arch.pio.down = down; |
3292 | vcpu->arch.pio.rep = rep; | 3647 | vcpu->arch.pio.rep = rep; |
3293 | 3648 | ||
3294 | trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port, | ||
3295 | size, count); | ||
3296 | |||
3297 | if (!count) { | 3649 | if (!count) { |
3298 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 3650 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
3299 | return 1; | 3651 | return 1; |
@@ -3325,10 +3677,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, | |||
3325 | if (!vcpu->arch.pio.in) { | 3677 | if (!vcpu->arch.pio.in) { |
3326 | /* string PIO write */ | 3678 | /* string PIO write */ |
3327 | ret = pio_copy_data(vcpu); | 3679 | ret = pio_copy_data(vcpu); |
3328 | if (ret == X86EMUL_PROPAGATE_FAULT) { | 3680 | if (ret == X86EMUL_PROPAGATE_FAULT) |
3329 | kvm_inject_gp(vcpu, 0); | ||
3330 | return 1; | 3681 | return 1; |
3331 | } | ||
3332 | if (ret == 0 && !pio_string_write(vcpu)) { | 3682 | if (ret == 0 && !pio_string_write(vcpu)) { |
3333 | complete_pio(vcpu); | 3683 | complete_pio(vcpu); |
3334 | if (vcpu->arch.pio.count == 0) | 3684 | if (vcpu->arch.pio.count == 0) |
@@ -3487,11 +3837,76 @@ static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0, | |||
3487 | return a0 | ((gpa_t)a1 << 32); | 3837 | return a0 | ((gpa_t)a1 << 32); |
3488 | } | 3838 | } |
3489 | 3839 | ||
3840 | int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | ||
3841 | { | ||
3842 | u64 param, ingpa, outgpa, ret; | ||
3843 | uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; | ||
3844 | bool fast, longmode; | ||
3845 | int cs_db, cs_l; | ||
3846 | |||
3847 | /* | ||
3848 | * hypercall generates UD from non zero cpl and real mode | ||
3849 | * per HYPER-V spec | ||
3850 | */ | ||
3851 | if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { | ||
3852 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
3853 | return 0; | ||
3854 | } | ||
3855 | |||
3856 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | ||
3857 | longmode = is_long_mode(vcpu) && cs_l == 1; | ||
3858 | |||
3859 | if (!longmode) { | ||
3860 | param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | | ||
3861 | (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); | ||
3862 | ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | | ||
3863 | (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); | ||
3864 | outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | | ||
3865 | (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); | ||
3866 | } | ||
3867 | #ifdef CONFIG_X86_64 | ||
3868 | else { | ||
3869 | param = kvm_register_read(vcpu, VCPU_REGS_RCX); | ||
3870 | ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); | ||
3871 | outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); | ||
3872 | } | ||
3873 | #endif | ||
3874 | |||
3875 | code = param & 0xffff; | ||
3876 | fast = (param >> 16) & 0x1; | ||
3877 | rep_cnt = (param >> 32) & 0xfff; | ||
3878 | rep_idx = (param >> 48) & 0xfff; | ||
3879 | |||
3880 | trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); | ||
3881 | |||
3882 | switch (code) { | ||
3883 | case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: | ||
3884 | kvm_vcpu_on_spin(vcpu); | ||
3885 | break; | ||
3886 | default: | ||
3887 | res = HV_STATUS_INVALID_HYPERCALL_CODE; | ||
3888 | break; | ||
3889 | } | ||
3890 | |||
3891 | ret = res | (((u64)rep_done & 0xfff) << 32); | ||
3892 | if (longmode) { | ||
3893 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret); | ||
3894 | } else { | ||
3895 | kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); | ||
3896 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); | ||
3897 | } | ||
3898 | |||
3899 | return 1; | ||
3900 | } | ||
3901 | |||
3490 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | 3902 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) |
3491 | { | 3903 | { |
3492 | unsigned long nr, a0, a1, a2, a3, ret; | 3904 | unsigned long nr, a0, a1, a2, a3, ret; |
3493 | int r = 1; | 3905 | int r = 1; |
3494 | 3906 | ||
3907 | if (kvm_hv_hypercall_enabled(vcpu->kvm)) | ||
3908 | return kvm_hv_hypercall(vcpu); | ||
3909 | |||
3495 | nr = kvm_register_read(vcpu, VCPU_REGS_RAX); | 3910 | nr = kvm_register_read(vcpu, VCPU_REGS_RAX); |
3496 | a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); | 3911 | a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); |
3497 | a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); | 3912 | a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); |
@@ -3534,10 +3949,8 @@ EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); | |||
3534 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | 3949 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu) |
3535 | { | 3950 | { |
3536 | char instruction[3]; | 3951 | char instruction[3]; |
3537 | int ret = 0; | ||
3538 | unsigned long rip = kvm_rip_read(vcpu); | 3952 | unsigned long rip = kvm_rip_read(vcpu); |
3539 | 3953 | ||
3540 | |||
3541 | /* | 3954 | /* |
3542 | * Blow out the MMU to ensure that no other VCPU has an active mapping | 3955 | * Blow out the MMU to ensure that no other VCPU has an active mapping |
3543 | * to ensure that the updated hypercall appears atomically across all | 3956 | * to ensure that the updated hypercall appears atomically across all |
@@ -3546,11 +3959,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
3546 | kvm_mmu_zap_all(vcpu->kvm); | 3959 | kvm_mmu_zap_all(vcpu->kvm); |
3547 | 3960 | ||
3548 | kvm_x86_ops->patch_hypercall(vcpu, instruction); | 3961 | kvm_x86_ops->patch_hypercall(vcpu, instruction); |
3549 | if (emulator_write_emulated(rip, instruction, 3, vcpu) | ||
3550 | != X86EMUL_CONTINUE) | ||
3551 | ret = -EFAULT; | ||
3552 | 3962 | ||
3553 | return ret; | 3963 | return emulator_write_emulated(rip, instruction, 3, vcpu); |
3554 | } | 3964 | } |
3555 | 3965 | ||
3556 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) | 3966 | static u64 mk_cr_64(u64 curr_cr, u32 new_val) |
@@ -3583,10 +3993,9 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | |||
3583 | { | 3993 | { |
3584 | unsigned long value; | 3994 | unsigned long value; |
3585 | 3995 | ||
3586 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | ||
3587 | switch (cr) { | 3996 | switch (cr) { |
3588 | case 0: | 3997 | case 0: |
3589 | value = vcpu->arch.cr0; | 3998 | value = kvm_read_cr0(vcpu); |
3590 | break; | 3999 | break; |
3591 | case 2: | 4000 | case 2: |
3592 | value = vcpu->arch.cr2; | 4001 | value = vcpu->arch.cr2; |
@@ -3595,7 +4004,7 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | |||
3595 | value = vcpu->arch.cr3; | 4004 | value = vcpu->arch.cr3; |
3596 | break; | 4005 | break; |
3597 | case 4: | 4006 | case 4: |
3598 | value = vcpu->arch.cr4; | 4007 | value = kvm_read_cr4(vcpu); |
3599 | break; | 4008 | break; |
3600 | case 8: | 4009 | case 8: |
3601 | value = kvm_get_cr8(vcpu); | 4010 | value = kvm_get_cr8(vcpu); |
@@ -3613,7 +4022,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | |||
3613 | { | 4022 | { |
3614 | switch (cr) { | 4023 | switch (cr) { |
3615 | case 0: | 4024 | case 0: |
3616 | kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); | 4025 | kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); |
3617 | *rflags = kvm_get_rflags(vcpu); | 4026 | *rflags = kvm_get_rflags(vcpu); |
3618 | break; | 4027 | break; |
3619 | case 2: | 4028 | case 2: |
@@ -3623,7 +4032,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | |||
3623 | kvm_set_cr3(vcpu, val); | 4032 | kvm_set_cr3(vcpu, val); |
3624 | break; | 4033 | break; |
3625 | case 4: | 4034 | case 4: |
3626 | kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val)); | 4035 | kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); |
3627 | break; | 4036 | break; |
3628 | case 8: | 4037 | case 8: |
3629 | kvm_set_cr8(vcpu, val & 0xfUL); | 4038 | kvm_set_cr8(vcpu, val & 0xfUL); |
@@ -3690,6 +4099,7 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | |||
3690 | } | 4099 | } |
3691 | return best; | 4100 | return best; |
3692 | } | 4101 | } |
4102 | EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); | ||
3693 | 4103 | ||
3694 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | 4104 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) |
3695 | { | 4105 | { |
@@ -3773,14 +4183,15 @@ static void vapic_enter(struct kvm_vcpu *vcpu) | |||
3773 | static void vapic_exit(struct kvm_vcpu *vcpu) | 4183 | static void vapic_exit(struct kvm_vcpu *vcpu) |
3774 | { | 4184 | { |
3775 | struct kvm_lapic *apic = vcpu->arch.apic; | 4185 | struct kvm_lapic *apic = vcpu->arch.apic; |
4186 | int idx; | ||
3776 | 4187 | ||
3777 | if (!apic || !apic->vapic_addr) | 4188 | if (!apic || !apic->vapic_addr) |
3778 | return; | 4189 | return; |
3779 | 4190 | ||
3780 | down_read(&vcpu->kvm->slots_lock); | 4191 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
3781 | kvm_release_page_dirty(apic->vapic_page); | 4192 | kvm_release_page_dirty(apic->vapic_page); |
3782 | mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); | 4193 | mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); |
3783 | up_read(&vcpu->kvm->slots_lock); | 4194 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
3784 | } | 4195 | } |
3785 | 4196 | ||
3786 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) | 4197 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) |
@@ -3876,12 +4287,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
3876 | r = 0; | 4287 | r = 0; |
3877 | goto out; | 4288 | goto out; |
3878 | } | 4289 | } |
4290 | if (test_and_clear_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests)) { | ||
4291 | vcpu->fpu_active = 0; | ||
4292 | kvm_x86_ops->fpu_deactivate(vcpu); | ||
4293 | } | ||
3879 | } | 4294 | } |
3880 | 4295 | ||
3881 | preempt_disable(); | 4296 | preempt_disable(); |
3882 | 4297 | ||
3883 | kvm_x86_ops->prepare_guest_switch(vcpu); | 4298 | kvm_x86_ops->prepare_guest_switch(vcpu); |
3884 | kvm_load_guest_fpu(vcpu); | 4299 | if (vcpu->fpu_active) |
4300 | kvm_load_guest_fpu(vcpu); | ||
3885 | 4301 | ||
3886 | local_irq_disable(); | 4302 | local_irq_disable(); |
3887 | 4303 | ||
@@ -3909,7 +4325,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
3909 | kvm_lapic_sync_to_vapic(vcpu); | 4325 | kvm_lapic_sync_to_vapic(vcpu); |
3910 | } | 4326 | } |
3911 | 4327 | ||
3912 | up_read(&vcpu->kvm->slots_lock); | 4328 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
3913 | 4329 | ||
3914 | kvm_guest_enter(); | 4330 | kvm_guest_enter(); |
3915 | 4331 | ||
@@ -3951,7 +4367,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
3951 | 4367 | ||
3952 | preempt_enable(); | 4368 | preempt_enable(); |
3953 | 4369 | ||
3954 | down_read(&vcpu->kvm->slots_lock); | 4370 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
3955 | 4371 | ||
3956 | /* | 4372 | /* |
3957 | * Profile KVM exit RIPs: | 4373 | * Profile KVM exit RIPs: |
@@ -3973,6 +4389,7 @@ out: | |||
3973 | static int __vcpu_run(struct kvm_vcpu *vcpu) | 4389 | static int __vcpu_run(struct kvm_vcpu *vcpu) |
3974 | { | 4390 | { |
3975 | int r; | 4391 | int r; |
4392 | struct kvm *kvm = vcpu->kvm; | ||
3976 | 4393 | ||
3977 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { | 4394 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { |
3978 | pr_debug("vcpu %d received sipi with vector # %x\n", | 4395 | pr_debug("vcpu %d received sipi with vector # %x\n", |
@@ -3984,7 +4401,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
3984 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 4401 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
3985 | } | 4402 | } |
3986 | 4403 | ||
3987 | down_read(&vcpu->kvm->slots_lock); | 4404 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
3988 | vapic_enter(vcpu); | 4405 | vapic_enter(vcpu); |
3989 | 4406 | ||
3990 | r = 1; | 4407 | r = 1; |
@@ -3992,9 +4409,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
3992 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 4409 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) |
3993 | r = vcpu_enter_guest(vcpu); | 4410 | r = vcpu_enter_guest(vcpu); |
3994 | else { | 4411 | else { |
3995 | up_read(&vcpu->kvm->slots_lock); | 4412 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
3996 | kvm_vcpu_block(vcpu); | 4413 | kvm_vcpu_block(vcpu); |
3997 | down_read(&vcpu->kvm->slots_lock); | 4414 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
3998 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) | 4415 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) |
3999 | { | 4416 | { |
4000 | switch(vcpu->arch.mp_state) { | 4417 | switch(vcpu->arch.mp_state) { |
@@ -4029,13 +4446,13 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
4029 | ++vcpu->stat.signal_exits; | 4446 | ++vcpu->stat.signal_exits; |
4030 | } | 4447 | } |
4031 | if (need_resched()) { | 4448 | if (need_resched()) { |
4032 | up_read(&vcpu->kvm->slots_lock); | 4449 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
4033 | kvm_resched(vcpu); | 4450 | kvm_resched(vcpu); |
4034 | down_read(&vcpu->kvm->slots_lock); | 4451 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
4035 | } | 4452 | } |
4036 | } | 4453 | } |
4037 | 4454 | ||
4038 | up_read(&vcpu->kvm->slots_lock); | 4455 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
4039 | post_kvm_run_save(vcpu); | 4456 | post_kvm_run_save(vcpu); |
4040 | 4457 | ||
4041 | vapic_exit(vcpu); | 4458 | vapic_exit(vcpu); |
@@ -4074,10 +4491,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
4074 | vcpu->mmio_read_completed = 1; | 4491 | vcpu->mmio_read_completed = 1; |
4075 | vcpu->mmio_needed = 0; | 4492 | vcpu->mmio_needed = 0; |
4076 | 4493 | ||
4077 | down_read(&vcpu->kvm->slots_lock); | 4494 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
4078 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, | 4495 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, |
4079 | EMULTYPE_NO_DECODE); | 4496 | EMULTYPE_NO_DECODE); |
4080 | up_read(&vcpu->kvm->slots_lock); | 4497 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
4081 | if (r == EMULATE_DO_MMIO) { | 4498 | if (r == EMULATE_DO_MMIO) { |
4082 | /* | 4499 | /* |
4083 | * Read-modify-write. Back to userspace. | 4500 | * Read-modify-write. Back to userspace. |
@@ -4204,13 +4621,12 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
4204 | sregs->gdt.limit = dt.limit; | 4621 | sregs->gdt.limit = dt.limit; |
4205 | sregs->gdt.base = dt.base; | 4622 | sregs->gdt.base = dt.base; |
4206 | 4623 | ||
4207 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | 4624 | sregs->cr0 = kvm_read_cr0(vcpu); |
4208 | sregs->cr0 = vcpu->arch.cr0; | ||
4209 | sregs->cr2 = vcpu->arch.cr2; | 4625 | sregs->cr2 = vcpu->arch.cr2; |
4210 | sregs->cr3 = vcpu->arch.cr3; | 4626 | sregs->cr3 = vcpu->arch.cr3; |
4211 | sregs->cr4 = vcpu->arch.cr4; | 4627 | sregs->cr4 = kvm_read_cr4(vcpu); |
4212 | sregs->cr8 = kvm_get_cr8(vcpu); | 4628 | sregs->cr8 = kvm_get_cr8(vcpu); |
4213 | sregs->efer = vcpu->arch.shadow_efer; | 4629 | sregs->efer = vcpu->arch.efer; |
4214 | sregs->apic_base = kvm_get_apic_base(vcpu); | 4630 | sregs->apic_base = kvm_get_apic_base(vcpu); |
4215 | 4631 | ||
4216 | memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); | 4632 | memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); |
@@ -4298,14 +4714,23 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
4298 | { | 4714 | { |
4299 | struct descriptor_table dtable; | 4715 | struct descriptor_table dtable; |
4300 | u16 index = selector >> 3; | 4716 | u16 index = selector >> 3; |
4717 | int ret; | ||
4718 | u32 err; | ||
4719 | gva_t addr; | ||
4301 | 4720 | ||
4302 | get_segment_descriptor_dtable(vcpu, selector, &dtable); | 4721 | get_segment_descriptor_dtable(vcpu, selector, &dtable); |
4303 | 4722 | ||
4304 | if (dtable.limit < index * 8 + 7) { | 4723 | if (dtable.limit < index * 8 + 7) { |
4305 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | 4724 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); |
4306 | return 1; | 4725 | return X86EMUL_PROPAGATE_FAULT; |
4307 | } | 4726 | } |
4308 | return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); | 4727 | addr = dtable.base + index * 8; |
4728 | ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), | ||
4729 | vcpu, &err); | ||
4730 | if (ret == X86EMUL_PROPAGATE_FAULT) | ||
4731 | kvm_inject_page_fault(vcpu, addr, err); | ||
4732 | |||
4733 | return ret; | ||
4309 | } | 4734 | } |
4310 | 4735 | ||
4311 | /* allowed just for 8 bytes segments */ | 4736 | /* allowed just for 8 bytes segments */ |
@@ -4319,15 +4744,23 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
4319 | 4744 | ||
4320 | if (dtable.limit < index * 8 + 7) | 4745 | if (dtable.limit < index * 8 + 7) |
4321 | return 1; | 4746 | return 1; |
4322 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu); | 4747 | return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); |
4748 | } | ||
4749 | |||
4750 | static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, | ||
4751 | struct desc_struct *seg_desc) | ||
4752 | { | ||
4753 | u32 base_addr = get_desc_base(seg_desc); | ||
4754 | |||
4755 | return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); | ||
4323 | } | 4756 | } |
4324 | 4757 | ||
4325 | static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu, | 4758 | static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, |
4326 | struct desc_struct *seg_desc) | 4759 | struct desc_struct *seg_desc) |
4327 | { | 4760 | { |
4328 | u32 base_addr = get_desc_base(seg_desc); | 4761 | u32 base_addr = get_desc_base(seg_desc); |
4329 | 4762 | ||
4330 | return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); | 4763 | return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); |
4331 | } | 4764 | } |
4332 | 4765 | ||
4333 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | 4766 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) |
@@ -4338,18 +4771,6 @@ static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | |||
4338 | return kvm_seg.selector; | 4771 | return kvm_seg.selector; |
4339 | } | 4772 | } |
4340 | 4773 | ||
4341 | static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu, | ||
4342 | u16 selector, | ||
4343 | struct kvm_segment *kvm_seg) | ||
4344 | { | ||
4345 | struct desc_struct seg_desc; | ||
4346 | |||
4347 | if (load_guest_segment_descriptor(vcpu, selector, &seg_desc)) | ||
4348 | return 1; | ||
4349 | seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg); | ||
4350 | return 0; | ||
4351 | } | ||
4352 | |||
4353 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) | 4774 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) |
4354 | { | 4775 | { |
4355 | struct kvm_segment segvar = { | 4776 | struct kvm_segment segvar = { |
@@ -4367,7 +4788,7 @@ static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int se | |||
4367 | .unusable = 0, | 4788 | .unusable = 0, |
4368 | }; | 4789 | }; |
4369 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); | 4790 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); |
4370 | return 0; | 4791 | return X86EMUL_CONTINUE; |
4371 | } | 4792 | } |
4372 | 4793 | ||
4373 | static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | 4794 | static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) |
@@ -4377,24 +4798,112 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | |||
4377 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); | 4798 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); |
4378 | } | 4799 | } |
4379 | 4800 | ||
4380 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 4801 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) |
4381 | int type_bits, int seg) | ||
4382 | { | 4802 | { |
4383 | struct kvm_segment kvm_seg; | 4803 | struct kvm_segment kvm_seg; |
4804 | struct desc_struct seg_desc; | ||
4805 | u8 dpl, rpl, cpl; | ||
4806 | unsigned err_vec = GP_VECTOR; | ||
4807 | u32 err_code = 0; | ||
4808 | bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ | ||
4809 | int ret; | ||
4384 | 4810 | ||
4385 | if (is_vm86_segment(vcpu, seg) || !(vcpu->arch.cr0 & X86_CR0_PE)) | 4811 | if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) |
4386 | return kvm_load_realmode_segment(vcpu, selector, seg); | 4812 | return kvm_load_realmode_segment(vcpu, selector, seg); |
4387 | if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg)) | ||
4388 | return 1; | ||
4389 | kvm_seg.type |= type_bits; | ||
4390 | 4813 | ||
4391 | if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS && | 4814 | /* NULL selector is not valid for TR, CS and SS */ |
4392 | seg != VCPU_SREG_LDTR) | 4815 | if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) |
4393 | if (!kvm_seg.s) | 4816 | && null_selector) |
4394 | kvm_seg.unusable = 1; | 4817 | goto exception; |
4818 | |||
4819 | /* TR should be in GDT only */ | ||
4820 | if (seg == VCPU_SREG_TR && (selector & (1 << 2))) | ||
4821 | goto exception; | ||
4822 | |||
4823 | ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4824 | if (ret) | ||
4825 | return ret; | ||
4826 | |||
4827 | seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); | ||
4828 | |||
4829 | if (null_selector) { /* for NULL selector skip all following checks */ | ||
4830 | kvm_seg.unusable = 1; | ||
4831 | goto load; | ||
4832 | } | ||
4833 | |||
4834 | err_code = selector & 0xfffc; | ||
4835 | err_vec = GP_VECTOR; | ||
4395 | 4836 | ||
4837 | /* can't load system descriptor into segment selecor */ | ||
4838 | if (seg <= VCPU_SREG_GS && !kvm_seg.s) | ||
4839 | goto exception; | ||
4840 | |||
4841 | if (!kvm_seg.present) { | ||
4842 | err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; | ||
4843 | goto exception; | ||
4844 | } | ||
4845 | |||
4846 | rpl = selector & 3; | ||
4847 | dpl = kvm_seg.dpl; | ||
4848 | cpl = kvm_x86_ops->get_cpl(vcpu); | ||
4849 | |||
4850 | switch (seg) { | ||
4851 | case VCPU_SREG_SS: | ||
4852 | /* | ||
4853 | * segment is not a writable data segment or segment | ||
4854 | * selector's RPL != CPL or segment selector's RPL != CPL | ||
4855 | */ | ||
4856 | if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) | ||
4857 | goto exception; | ||
4858 | break; | ||
4859 | case VCPU_SREG_CS: | ||
4860 | if (!(kvm_seg.type & 8)) | ||
4861 | goto exception; | ||
4862 | |||
4863 | if (kvm_seg.type & 4) { | ||
4864 | /* conforming */ | ||
4865 | if (dpl > cpl) | ||
4866 | goto exception; | ||
4867 | } else { | ||
4868 | /* nonconforming */ | ||
4869 | if (rpl > cpl || dpl != cpl) | ||
4870 | goto exception; | ||
4871 | } | ||
4872 | /* CS(RPL) <- CPL */ | ||
4873 | selector = (selector & 0xfffc) | cpl; | ||
4874 | break; | ||
4875 | case VCPU_SREG_TR: | ||
4876 | if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) | ||
4877 | goto exception; | ||
4878 | break; | ||
4879 | case VCPU_SREG_LDTR: | ||
4880 | if (kvm_seg.s || kvm_seg.type != 2) | ||
4881 | goto exception; | ||
4882 | break; | ||
4883 | default: /* DS, ES, FS, or GS */ | ||
4884 | /* | ||
4885 | * segment is not a data or readable code segment or | ||
4886 | * ((segment is a data or nonconforming code segment) | ||
4887 | * and (both RPL and CPL > DPL)) | ||
4888 | */ | ||
4889 | if ((kvm_seg.type & 0xa) == 0x8 || | ||
4890 | (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) | ||
4891 | goto exception; | ||
4892 | break; | ||
4893 | } | ||
4894 | |||
4895 | if (!kvm_seg.unusable && kvm_seg.s) { | ||
4896 | /* mark segment as accessed */ | ||
4897 | kvm_seg.type |= 1; | ||
4898 | seg_desc.type |= 1; | ||
4899 | save_guest_segment_descriptor(vcpu, selector, &seg_desc); | ||
4900 | } | ||
4901 | load: | ||
4396 | kvm_set_segment(vcpu, &kvm_seg, seg); | 4902 | kvm_set_segment(vcpu, &kvm_seg, seg); |
4397 | return 0; | 4903 | return X86EMUL_CONTINUE; |
4904 | exception: | ||
4905 | kvm_queue_exception_e(vcpu, err_vec, err_code); | ||
4906 | return X86EMUL_PROPAGATE_FAULT; | ||
4398 | } | 4907 | } |
4399 | 4908 | ||
4400 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, | 4909 | static void save_state_to_tss32(struct kvm_vcpu *vcpu, |
@@ -4420,6 +4929,14 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, | |||
4420 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); | 4929 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); |
4421 | } | 4930 | } |
4422 | 4931 | ||
4932 | static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) | ||
4933 | { | ||
4934 | struct kvm_segment kvm_seg; | ||
4935 | kvm_get_segment(vcpu, &kvm_seg, seg); | ||
4936 | kvm_seg.selector = sel; | ||
4937 | kvm_set_segment(vcpu, &kvm_seg, seg); | ||
4938 | } | ||
4939 | |||
4423 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, | 4940 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, |
4424 | struct tss_segment_32 *tss) | 4941 | struct tss_segment_32 *tss) |
4425 | { | 4942 | { |
@@ -4437,25 +4954,41 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, | |||
4437 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); | 4954 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); |
4438 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); | 4955 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); |
4439 | 4956 | ||
4440 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) | 4957 | /* |
4958 | * SDM says that segment selectors are loaded before segment | ||
4959 | * descriptors | ||
4960 | */ | ||
4961 | kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); | ||
4962 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
4963 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
4964 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
4965 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
4966 | kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); | ||
4967 | kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); | ||
4968 | |||
4969 | /* | ||
4970 | * Now load segment descriptors. If fault happenes at this stage | ||
4971 | * it is handled in a context of new task | ||
4972 | */ | ||
4973 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) | ||
4441 | return 1; | 4974 | return 1; |
4442 | 4975 | ||
4443 | if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) | 4976 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) |
4444 | return 1; | 4977 | return 1; |
4445 | 4978 | ||
4446 | if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) | 4979 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) |
4447 | return 1; | 4980 | return 1; |
4448 | 4981 | ||
4449 | if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) | 4982 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) |
4450 | return 1; | 4983 | return 1; |
4451 | 4984 | ||
4452 | if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) | 4985 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) |
4453 | return 1; | 4986 | return 1; |
4454 | 4987 | ||
4455 | if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) | 4988 | if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) |
4456 | return 1; | 4989 | return 1; |
4457 | 4990 | ||
4458 | if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) | 4991 | if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) |
4459 | return 1; | 4992 | return 1; |
4460 | return 0; | 4993 | return 0; |
4461 | } | 4994 | } |
@@ -4495,19 +5028,33 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, | |||
4495 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); | 5028 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); |
4496 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); | 5029 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); |
4497 | 5030 | ||
4498 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) | 5031 | /* |
5032 | * SDM says that segment selectors are loaded before segment | ||
5033 | * descriptors | ||
5034 | */ | ||
5035 | kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); | ||
5036 | kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); | ||
5037 | kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); | ||
5038 | kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); | ||
5039 | kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); | ||
5040 | |||
5041 | /* | ||
5042 | * Now load segment descriptors. If fault happenes at this stage | ||
5043 | * it is handled in a context of new task | ||
5044 | */ | ||
5045 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) | ||
4499 | return 1; | 5046 | return 1; |
4500 | 5047 | ||
4501 | if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) | 5048 | if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) |
4502 | return 1; | 5049 | return 1; |
4503 | 5050 | ||
4504 | if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) | 5051 | if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) |
4505 | return 1; | 5052 | return 1; |
4506 | 5053 | ||
4507 | if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) | 5054 | if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) |
4508 | return 1; | 5055 | return 1; |
4509 | 5056 | ||
4510 | if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) | 5057 | if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) |
4511 | return 1; | 5058 | return 1; |
4512 | return 0; | 5059 | return 0; |
4513 | } | 5060 | } |
@@ -4529,7 +5076,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4529 | sizeof tss_segment_16)) | 5076 | sizeof tss_segment_16)) |
4530 | goto out; | 5077 | goto out; |
4531 | 5078 | ||
4532 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | 5079 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), |
4533 | &tss_segment_16, sizeof tss_segment_16)) | 5080 | &tss_segment_16, sizeof tss_segment_16)) |
4534 | goto out; | 5081 | goto out; |
4535 | 5082 | ||
@@ -4537,7 +5084,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4537 | tss_segment_16.prev_task_link = old_tss_sel; | 5084 | tss_segment_16.prev_task_link = old_tss_sel; |
4538 | 5085 | ||
4539 | if (kvm_write_guest(vcpu->kvm, | 5086 | if (kvm_write_guest(vcpu->kvm, |
4540 | get_tss_base_addr(vcpu, nseg_desc), | 5087 | get_tss_base_addr_write(vcpu, nseg_desc), |
4541 | &tss_segment_16.prev_task_link, | 5088 | &tss_segment_16.prev_task_link, |
4542 | sizeof tss_segment_16.prev_task_link)) | 5089 | sizeof tss_segment_16.prev_task_link)) |
4543 | goto out; | 5090 | goto out; |
@@ -4568,7 +5115,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4568 | sizeof tss_segment_32)) | 5115 | sizeof tss_segment_32)) |
4569 | goto out; | 5116 | goto out; |
4570 | 5117 | ||
4571 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | 5118 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), |
4572 | &tss_segment_32, sizeof tss_segment_32)) | 5119 | &tss_segment_32, sizeof tss_segment_32)) |
4573 | goto out; | 5120 | goto out; |
4574 | 5121 | ||
@@ -4576,7 +5123,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
4576 | tss_segment_32.prev_task_link = old_tss_sel; | 5123 | tss_segment_32.prev_task_link = old_tss_sel; |
4577 | 5124 | ||
4578 | if (kvm_write_guest(vcpu->kvm, | 5125 | if (kvm_write_guest(vcpu->kvm, |
4579 | get_tss_base_addr(vcpu, nseg_desc), | 5126 | get_tss_base_addr_write(vcpu, nseg_desc), |
4580 | &tss_segment_32.prev_task_link, | 5127 | &tss_segment_32.prev_task_link, |
4581 | sizeof tss_segment_32.prev_task_link)) | 5128 | sizeof tss_segment_32.prev_task_link)) |
4582 | goto out; | 5129 | goto out; |
@@ -4599,7 +5146,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4599 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); | 5146 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); |
4600 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); | 5147 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); |
4601 | 5148 | ||
4602 | old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); | 5149 | old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); |
4603 | 5150 | ||
4604 | /* FIXME: Handle errors. Failure to read either TSS or their | 5151 | /* FIXME: Handle errors. Failure to read either TSS or their |
4605 | * descriptors should generate a pagefault. | 5152 | * descriptors should generate a pagefault. |
@@ -4658,7 +5205,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
4658 | &nseg_desc); | 5205 | &nseg_desc); |
4659 | } | 5206 | } |
4660 | 5207 | ||
4661 | kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); | 5208 | kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); |
4662 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); | 5209 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); |
4663 | tr_seg.type = 11; | 5210 | tr_seg.type = 11; |
4664 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); | 5211 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); |
@@ -4689,17 +5236,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4689 | 5236 | ||
4690 | kvm_set_cr8(vcpu, sregs->cr8); | 5237 | kvm_set_cr8(vcpu, sregs->cr8); |
4691 | 5238 | ||
4692 | mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer; | 5239 | mmu_reset_needed |= vcpu->arch.efer != sregs->efer; |
4693 | kvm_x86_ops->set_efer(vcpu, sregs->efer); | 5240 | kvm_x86_ops->set_efer(vcpu, sregs->efer); |
4694 | kvm_set_apic_base(vcpu, sregs->apic_base); | 5241 | kvm_set_apic_base(vcpu, sregs->apic_base); |
4695 | 5242 | ||
4696 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | 5243 | mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; |
4697 | |||
4698 | mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0; | ||
4699 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); | 5244 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); |
4700 | vcpu->arch.cr0 = sregs->cr0; | 5245 | vcpu->arch.cr0 = sregs->cr0; |
4701 | 5246 | ||
4702 | mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; | 5247 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; |
4703 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 5248 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
4704 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { | 5249 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
4705 | load_pdptrs(vcpu, vcpu->arch.cr3); | 5250 | load_pdptrs(vcpu, vcpu->arch.cr3); |
@@ -4734,7 +5279,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
4734 | /* Older userspace won't unhalt the vcpu on reset. */ | 5279 | /* Older userspace won't unhalt the vcpu on reset. */ |
4735 | if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && | 5280 | if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 && |
4736 | sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && | 5281 | sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && |
4737 | !(vcpu->arch.cr0 & X86_CR0_PE)) | 5282 | !is_protmode(vcpu)) |
4738 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 5283 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
4739 | 5284 | ||
4740 | vcpu_put(vcpu); | 5285 | vcpu_put(vcpu); |
@@ -4832,11 +5377,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, | |||
4832 | { | 5377 | { |
4833 | unsigned long vaddr = tr->linear_address; | 5378 | unsigned long vaddr = tr->linear_address; |
4834 | gpa_t gpa; | 5379 | gpa_t gpa; |
5380 | int idx; | ||
4835 | 5381 | ||
4836 | vcpu_load(vcpu); | 5382 | vcpu_load(vcpu); |
4837 | down_read(&vcpu->kvm->slots_lock); | 5383 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
4838 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); | 5384 | gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL); |
4839 | up_read(&vcpu->kvm->slots_lock); | 5385 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
4840 | tr->physical_address = gpa; | 5386 | tr->physical_address = gpa; |
4841 | tr->valid = gpa != UNMAPPED_GVA; | 5387 | tr->valid = gpa != UNMAPPED_GVA; |
4842 | tr->writeable = 1; | 5388 | tr->writeable = 1; |
@@ -4917,14 +5463,14 @@ EXPORT_SYMBOL_GPL(fx_init); | |||
4917 | 5463 | ||
4918 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) | 5464 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) |
4919 | { | 5465 | { |
4920 | if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) | 5466 | if (vcpu->guest_fpu_loaded) |
4921 | return; | 5467 | return; |
4922 | 5468 | ||
4923 | vcpu->guest_fpu_loaded = 1; | 5469 | vcpu->guest_fpu_loaded = 1; |
4924 | kvm_fx_save(&vcpu->arch.host_fx_image); | 5470 | kvm_fx_save(&vcpu->arch.host_fx_image); |
4925 | kvm_fx_restore(&vcpu->arch.guest_fx_image); | 5471 | kvm_fx_restore(&vcpu->arch.guest_fx_image); |
5472 | trace_kvm_fpu(1); | ||
4926 | } | 5473 | } |
4927 | EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); | ||
4928 | 5474 | ||
4929 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | 5475 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) |
4930 | { | 5476 | { |
@@ -4935,8 +5481,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
4935 | kvm_fx_save(&vcpu->arch.guest_fx_image); | 5481 | kvm_fx_save(&vcpu->arch.guest_fx_image); |
4936 | kvm_fx_restore(&vcpu->arch.host_fx_image); | 5482 | kvm_fx_restore(&vcpu->arch.host_fx_image); |
4937 | ++vcpu->stat.fpu_reload; | 5483 | ++vcpu->stat.fpu_reload; |
5484 | set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); | ||
5485 | trace_kvm_fpu(0); | ||
4938 | } | 5486 | } |
4939 | EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); | ||
4940 | 5487 | ||
4941 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | 5488 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) |
4942 | { | 5489 | { |
@@ -5088,11 +5635,13 @@ fail: | |||
5088 | 5635 | ||
5089 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | 5636 | void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) |
5090 | { | 5637 | { |
5638 | int idx; | ||
5639 | |||
5091 | kfree(vcpu->arch.mce_banks); | 5640 | kfree(vcpu->arch.mce_banks); |
5092 | kvm_free_lapic(vcpu); | 5641 | kvm_free_lapic(vcpu); |
5093 | down_read(&vcpu->kvm->slots_lock); | 5642 | idx = srcu_read_lock(&vcpu->kvm->srcu); |
5094 | kvm_mmu_destroy(vcpu); | 5643 | kvm_mmu_destroy(vcpu); |
5095 | up_read(&vcpu->kvm->slots_lock); | 5644 | srcu_read_unlock(&vcpu->kvm->srcu, idx); |
5096 | free_page((unsigned long)vcpu->arch.pio_data); | 5645 | free_page((unsigned long)vcpu->arch.pio_data); |
5097 | } | 5646 | } |
5098 | 5647 | ||
@@ -5103,6 +5652,12 @@ struct kvm *kvm_arch_create_vm(void) | |||
5103 | if (!kvm) | 5652 | if (!kvm) |
5104 | return ERR_PTR(-ENOMEM); | 5653 | return ERR_PTR(-ENOMEM); |
5105 | 5654 | ||
5655 | kvm->arch.aliases = kzalloc(sizeof(struct kvm_mem_aliases), GFP_KERNEL); | ||
5656 | if (!kvm->arch.aliases) { | ||
5657 | kfree(kvm); | ||
5658 | return ERR_PTR(-ENOMEM); | ||
5659 | } | ||
5660 | |||
5106 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 5661 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
5107 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 5662 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
5108 | 5663 | ||
@@ -5159,16 +5714,18 @@ void kvm_arch_destroy_vm(struct kvm *kvm) | |||
5159 | put_page(kvm->arch.apic_access_page); | 5714 | put_page(kvm->arch.apic_access_page); |
5160 | if (kvm->arch.ept_identity_pagetable) | 5715 | if (kvm->arch.ept_identity_pagetable) |
5161 | put_page(kvm->arch.ept_identity_pagetable); | 5716 | put_page(kvm->arch.ept_identity_pagetable); |
5717 | cleanup_srcu_struct(&kvm->srcu); | ||
5718 | kfree(kvm->arch.aliases); | ||
5162 | kfree(kvm); | 5719 | kfree(kvm); |
5163 | } | 5720 | } |
5164 | 5721 | ||
5165 | int kvm_arch_set_memory_region(struct kvm *kvm, | 5722 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
5166 | struct kvm_userspace_memory_region *mem, | 5723 | struct kvm_memory_slot *memslot, |
5167 | struct kvm_memory_slot old, | 5724 | struct kvm_memory_slot old, |
5725 | struct kvm_userspace_memory_region *mem, | ||
5168 | int user_alloc) | 5726 | int user_alloc) |
5169 | { | 5727 | { |
5170 | int npages = mem->memory_size >> PAGE_SHIFT; | 5728 | int npages = memslot->npages; |
5171 | struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; | ||
5172 | 5729 | ||
5173 | /*To keep backward compatibility with older userspace, | 5730 | /*To keep backward compatibility with older userspace, |
5174 | *x86 needs to hanlde !user_alloc case. | 5731 | *x86 needs to hanlde !user_alloc case. |
@@ -5188,26 +5745,35 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
5188 | if (IS_ERR((void *)userspace_addr)) | 5745 | if (IS_ERR((void *)userspace_addr)) |
5189 | return PTR_ERR((void *)userspace_addr); | 5746 | return PTR_ERR((void *)userspace_addr); |
5190 | 5747 | ||
5191 | /* set userspace_addr atomically for kvm_hva_to_rmapp */ | ||
5192 | spin_lock(&kvm->mmu_lock); | ||
5193 | memslot->userspace_addr = userspace_addr; | 5748 | memslot->userspace_addr = userspace_addr; |
5194 | spin_unlock(&kvm->mmu_lock); | ||
5195 | } else { | ||
5196 | if (!old.user_alloc && old.rmap) { | ||
5197 | int ret; | ||
5198 | |||
5199 | down_write(¤t->mm->mmap_sem); | ||
5200 | ret = do_munmap(current->mm, old.userspace_addr, | ||
5201 | old.npages * PAGE_SIZE); | ||
5202 | up_write(¤t->mm->mmap_sem); | ||
5203 | if (ret < 0) | ||
5204 | printk(KERN_WARNING | ||
5205 | "kvm_vm_ioctl_set_memory_region: " | ||
5206 | "failed to munmap memory\n"); | ||
5207 | } | ||
5208 | } | 5749 | } |
5209 | } | 5750 | } |
5210 | 5751 | ||
5752 | |||
5753 | return 0; | ||
5754 | } | ||
5755 | |||
5756 | void kvm_arch_commit_memory_region(struct kvm *kvm, | ||
5757 | struct kvm_userspace_memory_region *mem, | ||
5758 | struct kvm_memory_slot old, | ||
5759 | int user_alloc) | ||
5760 | { | ||
5761 | |||
5762 | int npages = mem->memory_size >> PAGE_SHIFT; | ||
5763 | |||
5764 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { | ||
5765 | int ret; | ||
5766 | |||
5767 | down_write(¤t->mm->mmap_sem); | ||
5768 | ret = do_munmap(current->mm, old.userspace_addr, | ||
5769 | old.npages * PAGE_SIZE); | ||
5770 | up_write(¤t->mm->mmap_sem); | ||
5771 | if (ret < 0) | ||
5772 | printk(KERN_WARNING | ||
5773 | "kvm_vm_ioctl_set_memory_region: " | ||
5774 | "failed to munmap memory\n"); | ||
5775 | } | ||
5776 | |||
5211 | spin_lock(&kvm->mmu_lock); | 5777 | spin_lock(&kvm->mmu_lock); |
5212 | if (!kvm->arch.n_requested_mmu_pages) { | 5778 | if (!kvm->arch.n_requested_mmu_pages) { |
5213 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | 5779 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); |
@@ -5216,8 +5782,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
5216 | 5782 | ||
5217 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 5783 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
5218 | spin_unlock(&kvm->mmu_lock); | 5784 | spin_unlock(&kvm->mmu_lock); |
5219 | |||
5220 | return 0; | ||
5221 | } | 5785 | } |
5222 | 5786 | ||
5223 | void kvm_arch_flush_shadow(struct kvm *kvm) | 5787 | void kvm_arch_flush_shadow(struct kvm *kvm) |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 5eadea585d2a..2d101639bd8d 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define ARCH_X86_KVM_X86_H | 2 | #define ARCH_X86_KVM_X86_H |
3 | 3 | ||
4 | #include <linux/kvm_host.h> | 4 | #include <linux/kvm_host.h> |
5 | #include "kvm_cache_regs.h" | ||
5 | 6 | ||
6 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) | 7 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) |
7 | { | 8 | { |
@@ -35,4 +36,33 @@ static inline bool kvm_exception_is_soft(unsigned int nr) | |||
35 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | 36 | struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, |
36 | u32 function, u32 index); | 37 | u32 function, u32 index); |
37 | 38 | ||
39 | static inline bool is_protmode(struct kvm_vcpu *vcpu) | ||
40 | { | ||
41 | return kvm_read_cr0_bits(vcpu, X86_CR0_PE); | ||
42 | } | ||
43 | |||
44 | static inline int is_long_mode(struct kvm_vcpu *vcpu) | ||
45 | { | ||
46 | #ifdef CONFIG_X86_64 | ||
47 | return vcpu->arch.efer & EFER_LMA; | ||
48 | #else | ||
49 | return 0; | ||
50 | #endif | ||
51 | } | ||
52 | |||
53 | static inline int is_pae(struct kvm_vcpu *vcpu) | ||
54 | { | ||
55 | return kvm_read_cr4_bits(vcpu, X86_CR4_PAE); | ||
56 | } | ||
57 | |||
58 | static inline int is_pse(struct kvm_vcpu *vcpu) | ||
59 | { | ||
60 | return kvm_read_cr4_bits(vcpu, X86_CR4_PSE); | ||
61 | } | ||
62 | |||
63 | static inline int is_paging(struct kvm_vcpu *vcpu) | ||
64 | { | ||
65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | ||
66 | } | ||
67 | |||
38 | #endif | 68 | #endif |
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index a24de0b1858e..60df9c84ecae 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
@@ -103,7 +103,7 @@ struct kvm_userspace_memory_region { | |||
103 | 103 | ||
104 | /* for kvm_memory_region::flags */ | 104 | /* for kvm_memory_region::flags */ |
105 | #define KVM_MEM_LOG_DIRTY_PAGES 1UL | 105 | #define KVM_MEM_LOG_DIRTY_PAGES 1UL |
106 | 106 | #define KVM_MEMSLOT_INVALID (1UL << 1) | |
107 | 107 | ||
108 | /* for KVM_IRQ_LINE */ | 108 | /* for KVM_IRQ_LINE */ |
109 | struct kvm_irq_level { | 109 | struct kvm_irq_level { |
@@ -497,6 +497,11 @@ struct kvm_ioeventfd { | |||
497 | #endif | 497 | #endif |
498 | #define KVM_CAP_S390_PSW 42 | 498 | #define KVM_CAP_S390_PSW 42 |
499 | #define KVM_CAP_PPC_SEGSTATE 43 | 499 | #define KVM_CAP_PPC_SEGSTATE 43 |
500 | #define KVM_CAP_HYPERV 44 | ||
501 | #define KVM_CAP_HYPERV_VAPIC 45 | ||
502 | #define KVM_CAP_HYPERV_SPIN 46 | ||
503 | #define KVM_CAP_PCI_SEGMENT 47 | ||
504 | #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 | ||
500 | 505 | ||
501 | #ifdef KVM_CAP_IRQ_ROUTING | 506 | #ifdef KVM_CAP_IRQ_ROUTING |
502 | 507 | ||
@@ -691,8 +696,9 @@ struct kvm_assigned_pci_dev { | |||
691 | __u32 busnr; | 696 | __u32 busnr; |
692 | __u32 devfn; | 697 | __u32 devfn; |
693 | __u32 flags; | 698 | __u32 flags; |
699 | __u32 segnr; | ||
694 | union { | 700 | union { |
695 | __u32 reserved[12]; | 701 | __u32 reserved[11]; |
696 | }; | 702 | }; |
697 | }; | 703 | }; |
698 | 704 | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bd5a616d9373..a3fd0f91d943 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -38,6 +38,7 @@ | |||
38 | #define KVM_REQ_MMU_SYNC 7 | 38 | #define KVM_REQ_MMU_SYNC 7 |
39 | #define KVM_REQ_KVMCLOCK_UPDATE 8 | 39 | #define KVM_REQ_KVMCLOCK_UPDATE 8 |
40 | #define KVM_REQ_KICK 9 | 40 | #define KVM_REQ_KICK 9 |
41 | #define KVM_REQ_DEACTIVATE_FPU 10 | ||
41 | 42 | ||
42 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 | 43 | #define KVM_USERSPACE_IRQ_SOURCE_ID 0 |
43 | 44 | ||
@@ -57,20 +58,20 @@ struct kvm_io_bus { | |||
57 | struct kvm_io_device *devs[NR_IOBUS_DEVS]; | 58 | struct kvm_io_device *devs[NR_IOBUS_DEVS]; |
58 | }; | 59 | }; |
59 | 60 | ||
60 | void kvm_io_bus_init(struct kvm_io_bus *bus); | 61 | enum kvm_bus { |
61 | void kvm_io_bus_destroy(struct kvm_io_bus *bus); | 62 | KVM_MMIO_BUS, |
62 | int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len, | 63 | KVM_PIO_BUS, |
63 | const void *val); | 64 | KVM_NR_BUSES |
64 | int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, | 65 | }; |
66 | |||
67 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | ||
68 | int len, const void *val); | ||
69 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, | ||
65 | void *val); | 70 | void *val); |
66 | int __kvm_io_bus_register_dev(struct kvm_io_bus *bus, | 71 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
67 | struct kvm_io_device *dev); | ||
68 | int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, | ||
69 | struct kvm_io_device *dev); | 72 | struct kvm_io_device *dev); |
70 | void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, | 73 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
71 | struct kvm_io_device *dev); | 74 | struct kvm_io_device *dev); |
72 | void kvm_io_bus_unregister_dev(struct kvm *kvm, struct kvm_io_bus *bus, | ||
73 | struct kvm_io_device *dev); | ||
74 | 75 | ||
75 | struct kvm_vcpu { | 76 | struct kvm_vcpu { |
76 | struct kvm *kvm; | 77 | struct kvm *kvm; |
@@ -83,6 +84,8 @@ struct kvm_vcpu { | |||
83 | struct kvm_run *run; | 84 | struct kvm_run *run; |
84 | unsigned long requests; | 85 | unsigned long requests; |
85 | unsigned long guest_debug; | 86 | unsigned long guest_debug; |
87 | int srcu_idx; | ||
88 | |||
86 | int fpu_active; | 89 | int fpu_active; |
87 | int guest_fpu_loaded; | 90 | int guest_fpu_loaded; |
88 | wait_queue_head_t wq; | 91 | wait_queue_head_t wq; |
@@ -150,14 +153,19 @@ struct kvm_irq_routing_table {}; | |||
150 | 153 | ||
151 | #endif | 154 | #endif |
152 | 155 | ||
153 | struct kvm { | 156 | struct kvm_memslots { |
154 | spinlock_t mmu_lock; | ||
155 | spinlock_t requests_lock; | ||
156 | struct rw_semaphore slots_lock; | ||
157 | struct mm_struct *mm; /* userspace tied to this vm */ | ||
158 | int nmemslots; | 157 | int nmemslots; |
159 | struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + | 158 | struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + |
160 | KVM_PRIVATE_MEM_SLOTS]; | 159 | KVM_PRIVATE_MEM_SLOTS]; |
160 | }; | ||
161 | |||
162 | struct kvm { | ||
163 | spinlock_t mmu_lock; | ||
164 | raw_spinlock_t requests_lock; | ||
165 | struct mutex slots_lock; | ||
166 | struct mm_struct *mm; /* userspace tied to this vm */ | ||
167 | struct kvm_memslots *memslots; | ||
168 | struct srcu_struct srcu; | ||
161 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | 169 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE |
162 | u32 bsp_vcpu_id; | 170 | u32 bsp_vcpu_id; |
163 | struct kvm_vcpu *bsp_vcpu; | 171 | struct kvm_vcpu *bsp_vcpu; |
@@ -166,8 +174,7 @@ struct kvm { | |||
166 | atomic_t online_vcpus; | 174 | atomic_t online_vcpus; |
167 | struct list_head vm_list; | 175 | struct list_head vm_list; |
168 | struct mutex lock; | 176 | struct mutex lock; |
169 | struct kvm_io_bus mmio_bus; | 177 | struct kvm_io_bus *buses[KVM_NR_BUSES]; |
170 | struct kvm_io_bus pio_bus; | ||
171 | #ifdef CONFIG_HAVE_KVM_EVENTFD | 178 | #ifdef CONFIG_HAVE_KVM_EVENTFD |
172 | struct { | 179 | struct { |
173 | spinlock_t lock; | 180 | spinlock_t lock; |
@@ -249,13 +256,20 @@ int kvm_set_memory_region(struct kvm *kvm, | |||
249 | int __kvm_set_memory_region(struct kvm *kvm, | 256 | int __kvm_set_memory_region(struct kvm *kvm, |
250 | struct kvm_userspace_memory_region *mem, | 257 | struct kvm_userspace_memory_region *mem, |
251 | int user_alloc); | 258 | int user_alloc); |
252 | int kvm_arch_set_memory_region(struct kvm *kvm, | 259 | int kvm_arch_prepare_memory_region(struct kvm *kvm, |
260 | struct kvm_memory_slot *memslot, | ||
261 | struct kvm_memory_slot old, | ||
262 | struct kvm_userspace_memory_region *mem, | ||
263 | int user_alloc); | ||
264 | void kvm_arch_commit_memory_region(struct kvm *kvm, | ||
253 | struct kvm_userspace_memory_region *mem, | 265 | struct kvm_userspace_memory_region *mem, |
254 | struct kvm_memory_slot old, | 266 | struct kvm_memory_slot old, |
255 | int user_alloc); | 267 | int user_alloc); |
256 | void kvm_disable_largepages(void); | 268 | void kvm_disable_largepages(void); |
257 | void kvm_arch_flush_shadow(struct kvm *kvm); | 269 | void kvm_arch_flush_shadow(struct kvm *kvm); |
258 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); | 270 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn); |
271 | gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn); | ||
272 | |||
259 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); | 273 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); |
260 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); | 274 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); |
261 | void kvm_release_page_clean(struct page *page); | 275 | void kvm_release_page_clean(struct page *page); |
@@ -264,6 +278,9 @@ void kvm_set_page_dirty(struct page *page); | |||
264 | void kvm_set_page_accessed(struct page *page); | 278 | void kvm_set_page_accessed(struct page *page); |
265 | 279 | ||
266 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); | 280 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); |
281 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | ||
282 | struct kvm_memory_slot *slot, gfn_t gfn); | ||
283 | int memslot_id(struct kvm *kvm, gfn_t gfn); | ||
267 | void kvm_release_pfn_dirty(pfn_t); | 284 | void kvm_release_pfn_dirty(pfn_t); |
268 | void kvm_release_pfn_clean(pfn_t pfn); | 285 | void kvm_release_pfn_clean(pfn_t pfn); |
269 | void kvm_set_pfn_dirty(pfn_t pfn); | 286 | void kvm_set_pfn_dirty(pfn_t pfn); |
@@ -283,6 +300,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); | |||
283 | int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); | 300 | int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); |
284 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); | 301 | struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); |
285 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); | 302 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); |
303 | unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); | ||
286 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn); | 304 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn); |
287 | 305 | ||
288 | void kvm_vcpu_block(struct kvm_vcpu *vcpu); | 306 | void kvm_vcpu_block(struct kvm_vcpu *vcpu); |
@@ -383,6 +401,7 @@ struct kvm_assigned_dev_kernel { | |||
383 | struct work_struct interrupt_work; | 401 | struct work_struct interrupt_work; |
384 | struct list_head list; | 402 | struct list_head list; |
385 | int assigned_dev_id; | 403 | int assigned_dev_id; |
404 | int host_segnr; | ||
386 | int host_busnr; | 405 | int host_busnr; |
387 | int host_devfn; | 406 | int host_devfn; |
388 | unsigned int entries_nr; | 407 | unsigned int entries_nr; |
@@ -429,8 +448,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); | |||
429 | #define KVM_IOMMU_CACHE_COHERENCY 0x1 | 448 | #define KVM_IOMMU_CACHE_COHERENCY 0x1 |
430 | 449 | ||
431 | #ifdef CONFIG_IOMMU_API | 450 | #ifdef CONFIG_IOMMU_API |
432 | int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, | 451 | int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); |
433 | unsigned long npages); | ||
434 | int kvm_iommu_map_guest(struct kvm *kvm); | 452 | int kvm_iommu_map_guest(struct kvm *kvm); |
435 | int kvm_iommu_unmap_guest(struct kvm *kvm); | 453 | int kvm_iommu_unmap_guest(struct kvm *kvm); |
436 | int kvm_assign_device(struct kvm *kvm, | 454 | int kvm_assign_device(struct kvm *kvm, |
@@ -480,11 +498,6 @@ static inline void kvm_guest_exit(void) | |||
480 | current->flags &= ~PF_VCPU; | 498 | current->flags &= ~PF_VCPU; |
481 | } | 499 | } |
482 | 500 | ||
483 | static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot) | ||
484 | { | ||
485 | return slot - kvm->memslots; | ||
486 | } | ||
487 | |||
488 | static inline gpa_t gfn_to_gpa(gfn_t gfn) | 501 | static inline gpa_t gfn_to_gpa(gfn_t gfn) |
489 | { | 502 | { |
490 | return (gpa_t)gfn << PAGE_SHIFT; | 503 | return (gpa_t)gfn << PAGE_SHIFT; |
@@ -532,6 +545,10 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se | |||
532 | } | 545 | } |
533 | #endif | 546 | #endif |
534 | 547 | ||
548 | #ifndef KVM_ARCH_HAS_UNALIAS_INSTANTIATION | ||
549 | #define unalias_gfn_instantiation unalias_gfn | ||
550 | #endif | ||
551 | |||
535 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 552 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
536 | 553 | ||
537 | #define KVM_MAX_IRQ_ROUTES 1024 | 554 | #define KVM_MAX_IRQ_ROUTES 1024 |
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index dbe108455275..b17d49dfc3ef 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h | |||
@@ -145,6 +145,47 @@ TRACE_EVENT(kvm_mmio, | |||
145 | __entry->len, __entry->gpa, __entry->val) | 145 | __entry->len, __entry->gpa, __entry->val) |
146 | ); | 146 | ); |
147 | 147 | ||
148 | #define kvm_fpu_load_symbol \ | ||
149 | {0, "unload"}, \ | ||
150 | {1, "load"} | ||
151 | |||
152 | TRACE_EVENT(kvm_fpu, | ||
153 | TP_PROTO(int load), | ||
154 | TP_ARGS(load), | ||
155 | |||
156 | TP_STRUCT__entry( | ||
157 | __field( u32, load ) | ||
158 | ), | ||
159 | |||
160 | TP_fast_assign( | ||
161 | __entry->load = load; | ||
162 | ), | ||
163 | |||
164 | TP_printk("%s", __print_symbolic(__entry->load, kvm_fpu_load_symbol)) | ||
165 | ); | ||
166 | |||
167 | TRACE_EVENT(kvm_age_page, | ||
168 | TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref), | ||
169 | TP_ARGS(hva, slot, ref), | ||
170 | |||
171 | TP_STRUCT__entry( | ||
172 | __field( u64, hva ) | ||
173 | __field( u64, gfn ) | ||
174 | __field( u8, referenced ) | ||
175 | ), | ||
176 | |||
177 | TP_fast_assign( | ||
178 | __entry->hva = hva; | ||
179 | __entry->gfn = | ||
180 | slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT); | ||
181 | __entry->referenced = ref; | ||
182 | ), | ||
183 | |||
184 | TP_printk("hva %llx gfn %llx %s", | ||
185 | __entry->hva, __entry->gfn, | ||
186 | __entry->referenced ? "YOUNG" : "OLD") | ||
187 | ); | ||
188 | |||
148 | #endif /* _TRACE_KVM_MAIN_H */ | 189 | #endif /* _TRACE_KVM_MAIN_H */ |
149 | 190 | ||
150 | /* This part must be outside protection */ | 191 | /* This part must be outside protection */ |
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index daece36c0a57..7f1178f6b839 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig | |||
@@ -12,3 +12,6 @@ config HAVE_KVM_EVENTFD | |||
12 | 12 | ||
13 | config KVM_APIC_ARCHITECTURE | 13 | config KVM_APIC_ARCHITECTURE |
14 | bool | 14 | bool |
15 | |||
16 | config KVM_MMIO | ||
17 | bool | ||
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index f73de631e3ee..057e2cca6af5 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c | |||
@@ -504,12 +504,12 @@ out: | |||
504 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | 504 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, |
505 | struct kvm_assigned_pci_dev *assigned_dev) | 505 | struct kvm_assigned_pci_dev *assigned_dev) |
506 | { | 506 | { |
507 | int r = 0; | 507 | int r = 0, idx; |
508 | struct kvm_assigned_dev_kernel *match; | 508 | struct kvm_assigned_dev_kernel *match; |
509 | struct pci_dev *dev; | 509 | struct pci_dev *dev; |
510 | 510 | ||
511 | mutex_lock(&kvm->lock); | 511 | mutex_lock(&kvm->lock); |
512 | down_read(&kvm->slots_lock); | 512 | idx = srcu_read_lock(&kvm->srcu); |
513 | 513 | ||
514 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | 514 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, |
515 | assigned_dev->assigned_dev_id); | 515 | assigned_dev->assigned_dev_id); |
@@ -526,7 +526,8 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
526 | r = -ENOMEM; | 526 | r = -ENOMEM; |
527 | goto out; | 527 | goto out; |
528 | } | 528 | } |
529 | dev = pci_get_bus_and_slot(assigned_dev->busnr, | 529 | dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, |
530 | assigned_dev->busnr, | ||
530 | assigned_dev->devfn); | 531 | assigned_dev->devfn); |
531 | if (!dev) { | 532 | if (!dev) { |
532 | printk(KERN_INFO "%s: host device not found\n", __func__); | 533 | printk(KERN_INFO "%s: host device not found\n", __func__); |
@@ -548,6 +549,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
548 | pci_reset_function(dev); | 549 | pci_reset_function(dev); |
549 | 550 | ||
550 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | 551 | match->assigned_dev_id = assigned_dev->assigned_dev_id; |
552 | match->host_segnr = assigned_dev->segnr; | ||
551 | match->host_busnr = assigned_dev->busnr; | 553 | match->host_busnr = assigned_dev->busnr; |
552 | match->host_devfn = assigned_dev->devfn; | 554 | match->host_devfn = assigned_dev->devfn; |
553 | match->flags = assigned_dev->flags; | 555 | match->flags = assigned_dev->flags; |
@@ -573,7 +575,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | |||
573 | } | 575 | } |
574 | 576 | ||
575 | out: | 577 | out: |
576 | up_read(&kvm->slots_lock); | 578 | srcu_read_unlock(&kvm->srcu, idx); |
577 | mutex_unlock(&kvm->lock); | 579 | mutex_unlock(&kvm->lock); |
578 | return r; | 580 | return r; |
579 | out_list_del: | 581 | out_list_del: |
@@ -585,7 +587,7 @@ out_put: | |||
585 | pci_dev_put(dev); | 587 | pci_dev_put(dev); |
586 | out_free: | 588 | out_free: |
587 | kfree(match); | 589 | kfree(match); |
588 | up_read(&kvm->slots_lock); | 590 | srcu_read_unlock(&kvm->srcu, idx); |
589 | mutex_unlock(&kvm->lock); | 591 | mutex_unlock(&kvm->lock); |
590 | return r; | 592 | return r; |
591 | } | 593 | } |
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 04d69cd7049b..5169736377a3 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c | |||
@@ -92,41 +92,64 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = { | |||
92 | int kvm_coalesced_mmio_init(struct kvm *kvm) | 92 | int kvm_coalesced_mmio_init(struct kvm *kvm) |
93 | { | 93 | { |
94 | struct kvm_coalesced_mmio_dev *dev; | 94 | struct kvm_coalesced_mmio_dev *dev; |
95 | struct page *page; | ||
95 | int ret; | 96 | int ret; |
96 | 97 | ||
98 | ret = -ENOMEM; | ||
99 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | ||
100 | if (!page) | ||
101 | goto out_err; | ||
102 | kvm->coalesced_mmio_ring = page_address(page); | ||
103 | |||
104 | ret = -ENOMEM; | ||
97 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); | 105 | dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); |
98 | if (!dev) | 106 | if (!dev) |
99 | return -ENOMEM; | 107 | goto out_free_page; |
100 | spin_lock_init(&dev->lock); | 108 | spin_lock_init(&dev->lock); |
101 | kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); | 109 | kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); |
102 | dev->kvm = kvm; | 110 | dev->kvm = kvm; |
103 | kvm->coalesced_mmio_dev = dev; | 111 | kvm->coalesced_mmio_dev = dev; |
104 | 112 | ||
105 | ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev); | 113 | mutex_lock(&kvm->slots_lock); |
114 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev); | ||
115 | mutex_unlock(&kvm->slots_lock); | ||
106 | if (ret < 0) | 116 | if (ret < 0) |
107 | kfree(dev); | 117 | goto out_free_dev; |
118 | |||
119 | return ret; | ||
108 | 120 | ||
121 | out_free_dev: | ||
122 | kfree(dev); | ||
123 | out_free_page: | ||
124 | __free_page(page); | ||
125 | out_err: | ||
109 | return ret; | 126 | return ret; |
110 | } | 127 | } |
111 | 128 | ||
129 | void kvm_coalesced_mmio_free(struct kvm *kvm) | ||
130 | { | ||
131 | if (kvm->coalesced_mmio_ring) | ||
132 | free_page((unsigned long)kvm->coalesced_mmio_ring); | ||
133 | } | ||
134 | |||
112 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | 135 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, |
113 | struct kvm_coalesced_mmio_zone *zone) | 136 | struct kvm_coalesced_mmio_zone *zone) |
114 | { | 137 | { |
115 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; | 138 | struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; |
116 | 139 | ||
117 | if (dev == NULL) | 140 | if (dev == NULL) |
118 | return -EINVAL; | 141 | return -EINVAL; |
119 | 142 | ||
120 | down_write(&kvm->slots_lock); | 143 | mutex_lock(&kvm->slots_lock); |
121 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { | 144 | if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { |
122 | up_write(&kvm->slots_lock); | 145 | mutex_unlock(&kvm->slots_lock); |
123 | return -ENOBUFS; | 146 | return -ENOBUFS; |
124 | } | 147 | } |
125 | 148 | ||
126 | dev->zone[dev->nb_zones] = *zone; | 149 | dev->zone[dev->nb_zones] = *zone; |
127 | dev->nb_zones++; | 150 | dev->nb_zones++; |
128 | 151 | ||
129 | up_write(&kvm->slots_lock); | 152 | mutex_unlock(&kvm->slots_lock); |
130 | return 0; | 153 | return 0; |
131 | } | 154 | } |
132 | 155 | ||
@@ -140,10 +163,10 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | |||
140 | if (dev == NULL) | 163 | if (dev == NULL) |
141 | return -EINVAL; | 164 | return -EINVAL; |
142 | 165 | ||
143 | down_write(&kvm->slots_lock); | 166 | mutex_lock(&kvm->slots_lock); |
144 | 167 | ||
145 | i = dev->nb_zones; | 168 | i = dev->nb_zones; |
146 | while(i) { | 169 | while (i) { |
147 | z = &dev->zone[i - 1]; | 170 | z = &dev->zone[i - 1]; |
148 | 171 | ||
149 | /* unregister all zones | 172 | /* unregister all zones |
@@ -158,7 +181,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | |||
158 | i--; | 181 | i--; |
159 | } | 182 | } |
160 | 183 | ||
161 | up_write(&kvm->slots_lock); | 184 | mutex_unlock(&kvm->slots_lock); |
162 | 185 | ||
163 | return 0; | 186 | return 0; |
164 | } | 187 | } |
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index 4b49f27fa31e..8a5959e3535f 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h | |||
@@ -1,3 +1,6 @@ | |||
1 | #ifndef __KVM_COALESCED_MMIO_H__ | ||
2 | #define __KVM_COALESCED_MMIO_H__ | ||
3 | |||
1 | /* | 4 | /* |
2 | * KVM coalesced MMIO | 5 | * KVM coalesced MMIO |
3 | * | 6 | * |
@@ -7,6 +10,8 @@ | |||
7 | * | 10 | * |
8 | */ | 11 | */ |
9 | 12 | ||
13 | #ifdef CONFIG_KVM_MMIO | ||
14 | |||
10 | #define KVM_COALESCED_MMIO_ZONE_MAX 100 | 15 | #define KVM_COALESCED_MMIO_ZONE_MAX 100 |
11 | 16 | ||
12 | struct kvm_coalesced_mmio_dev { | 17 | struct kvm_coalesced_mmio_dev { |
@@ -18,7 +23,17 @@ struct kvm_coalesced_mmio_dev { | |||
18 | }; | 23 | }; |
19 | 24 | ||
20 | int kvm_coalesced_mmio_init(struct kvm *kvm); | 25 | int kvm_coalesced_mmio_init(struct kvm *kvm); |
26 | void kvm_coalesced_mmio_free(struct kvm *kvm); | ||
21 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, | 27 | int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, |
22 | struct kvm_coalesced_mmio_zone *zone); | 28 | struct kvm_coalesced_mmio_zone *zone); |
23 | int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, | 29 | int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, |
24 | struct kvm_coalesced_mmio_zone *zone); | 30 | struct kvm_coalesced_mmio_zone *zone); |
31 | |||
32 | #else | ||
33 | |||
34 | static inline int kvm_coalesced_mmio_init(struct kvm *kvm) { return 0; } | ||
35 | static inline void kvm_coalesced_mmio_free(struct kvm *kvm) { } | ||
36 | |||
37 | #endif | ||
38 | |||
39 | #endif | ||
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index a9d3fc6c681c..7016319b1ec0 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -47,7 +47,6 @@ struct _irqfd { | |||
47 | int gsi; | 47 | int gsi; |
48 | struct list_head list; | 48 | struct list_head list; |
49 | poll_table pt; | 49 | poll_table pt; |
50 | wait_queue_head_t *wqh; | ||
51 | wait_queue_t wait; | 50 | wait_queue_t wait; |
52 | struct work_struct inject; | 51 | struct work_struct inject; |
53 | struct work_struct shutdown; | 52 | struct work_struct shutdown; |
@@ -159,8 +158,6 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, | |||
159 | poll_table *pt) | 158 | poll_table *pt) |
160 | { | 159 | { |
161 | struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); | 160 | struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); |
162 | |||
163 | irqfd->wqh = wqh; | ||
164 | add_wait_queue(wqh, &irqfd->wait); | 161 | add_wait_queue(wqh, &irqfd->wait); |
165 | } | 162 | } |
166 | 163 | ||
@@ -463,7 +460,7 @@ static int | |||
463 | kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | 460 | kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) |
464 | { | 461 | { |
465 | int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; | 462 | int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; |
466 | struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; | 463 | enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; |
467 | struct _ioeventfd *p; | 464 | struct _ioeventfd *p; |
468 | struct eventfd_ctx *eventfd; | 465 | struct eventfd_ctx *eventfd; |
469 | int ret; | 466 | int ret; |
@@ -508,7 +505,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
508 | else | 505 | else |
509 | p->wildcard = true; | 506 | p->wildcard = true; |
510 | 507 | ||
511 | down_write(&kvm->slots_lock); | 508 | mutex_lock(&kvm->slots_lock); |
512 | 509 | ||
513 | /* Verify that there isnt a match already */ | 510 | /* Verify that there isnt a match already */ |
514 | if (ioeventfd_check_collision(kvm, p)) { | 511 | if (ioeventfd_check_collision(kvm, p)) { |
@@ -518,18 +515,18 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
518 | 515 | ||
519 | kvm_iodevice_init(&p->dev, &ioeventfd_ops); | 516 | kvm_iodevice_init(&p->dev, &ioeventfd_ops); |
520 | 517 | ||
521 | ret = __kvm_io_bus_register_dev(bus, &p->dev); | 518 | ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev); |
522 | if (ret < 0) | 519 | if (ret < 0) |
523 | goto unlock_fail; | 520 | goto unlock_fail; |
524 | 521 | ||
525 | list_add_tail(&p->list, &kvm->ioeventfds); | 522 | list_add_tail(&p->list, &kvm->ioeventfds); |
526 | 523 | ||
527 | up_write(&kvm->slots_lock); | 524 | mutex_unlock(&kvm->slots_lock); |
528 | 525 | ||
529 | return 0; | 526 | return 0; |
530 | 527 | ||
531 | unlock_fail: | 528 | unlock_fail: |
532 | up_write(&kvm->slots_lock); | 529 | mutex_unlock(&kvm->slots_lock); |
533 | 530 | ||
534 | fail: | 531 | fail: |
535 | kfree(p); | 532 | kfree(p); |
@@ -542,7 +539,7 @@ static int | |||
542 | kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | 539 | kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) |
543 | { | 540 | { |
544 | int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; | 541 | int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; |
545 | struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; | 542 | enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS; |
546 | struct _ioeventfd *p, *tmp; | 543 | struct _ioeventfd *p, *tmp; |
547 | struct eventfd_ctx *eventfd; | 544 | struct eventfd_ctx *eventfd; |
548 | int ret = -ENOENT; | 545 | int ret = -ENOENT; |
@@ -551,7 +548,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
551 | if (IS_ERR(eventfd)) | 548 | if (IS_ERR(eventfd)) |
552 | return PTR_ERR(eventfd); | 549 | return PTR_ERR(eventfd); |
553 | 550 | ||
554 | down_write(&kvm->slots_lock); | 551 | mutex_lock(&kvm->slots_lock); |
555 | 552 | ||
556 | list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { | 553 | list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { |
557 | bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); | 554 | bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); |
@@ -565,13 +562,13 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) | |||
565 | if (!p->wildcard && p->datamatch != args->datamatch) | 562 | if (!p->wildcard && p->datamatch != args->datamatch) |
566 | continue; | 563 | continue; |
567 | 564 | ||
568 | __kvm_io_bus_unregister_dev(bus, &p->dev); | 565 | kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); |
569 | ioeventfd_release(p); | 566 | ioeventfd_release(p); |
570 | ret = 0; | 567 | ret = 0; |
571 | break; | 568 | break; |
572 | } | 569 | } |
573 | 570 | ||
574 | up_write(&kvm->slots_lock); | 571 | mutex_unlock(&kvm->slots_lock); |
575 | 572 | ||
576 | eventfd_ctx_put(eventfd); | 573 | eventfd_ctx_put(eventfd); |
577 | 574 | ||
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 38a2d20b89de..3db15a807f80 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
@@ -100,6 +100,19 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) | |||
100 | return injected; | 100 | return injected; |
101 | } | 101 | } |
102 | 102 | ||
103 | static void update_handled_vectors(struct kvm_ioapic *ioapic) | ||
104 | { | ||
105 | DECLARE_BITMAP(handled_vectors, 256); | ||
106 | int i; | ||
107 | |||
108 | memset(handled_vectors, 0, sizeof(handled_vectors)); | ||
109 | for (i = 0; i < IOAPIC_NUM_PINS; ++i) | ||
110 | __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors); | ||
111 | memcpy(ioapic->handled_vectors, handled_vectors, | ||
112 | sizeof(handled_vectors)); | ||
113 | smp_wmb(); | ||
114 | } | ||
115 | |||
103 | static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | 116 | static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) |
104 | { | 117 | { |
105 | unsigned index; | 118 | unsigned index; |
@@ -134,6 +147,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) | |||
134 | e->bits |= (u32) val; | 147 | e->bits |= (u32) val; |
135 | e->fields.remote_irr = 0; | 148 | e->fields.remote_irr = 0; |
136 | } | 149 | } |
150 | update_handled_vectors(ioapic); | ||
137 | mask_after = e->fields.mask; | 151 | mask_after = e->fields.mask; |
138 | if (mask_before != mask_after) | 152 | if (mask_before != mask_after) |
139 | kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); | 153 | kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); |
@@ -241,6 +255,9 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) | |||
241 | { | 255 | { |
242 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 256 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
243 | 257 | ||
258 | smp_rmb(); | ||
259 | if (!test_bit(vector, ioapic->handled_vectors)) | ||
260 | return; | ||
244 | mutex_lock(&ioapic->lock); | 261 | mutex_lock(&ioapic->lock); |
245 | __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); | 262 | __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); |
246 | mutex_unlock(&ioapic->lock); | 263 | mutex_unlock(&ioapic->lock); |
@@ -352,6 +369,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic) | |||
352 | ioapic->ioregsel = 0; | 369 | ioapic->ioregsel = 0; |
353 | ioapic->irr = 0; | 370 | ioapic->irr = 0; |
354 | ioapic->id = 0; | 371 | ioapic->id = 0; |
372 | update_handled_vectors(ioapic); | ||
355 | } | 373 | } |
356 | 374 | ||
357 | static const struct kvm_io_device_ops ioapic_mmio_ops = { | 375 | static const struct kvm_io_device_ops ioapic_mmio_ops = { |
@@ -372,13 +390,28 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
372 | kvm_ioapic_reset(ioapic); | 390 | kvm_ioapic_reset(ioapic); |
373 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); | 391 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); |
374 | ioapic->kvm = kvm; | 392 | ioapic->kvm = kvm; |
375 | ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev); | 393 | mutex_lock(&kvm->slots_lock); |
376 | if (ret < 0) | 394 | ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); |
395 | mutex_unlock(&kvm->slots_lock); | ||
396 | if (ret < 0) { | ||
397 | kvm->arch.vioapic = NULL; | ||
377 | kfree(ioapic); | 398 | kfree(ioapic); |
399 | } | ||
378 | 400 | ||
379 | return ret; | 401 | return ret; |
380 | } | 402 | } |
381 | 403 | ||
404 | void kvm_ioapic_destroy(struct kvm *kvm) | ||
405 | { | ||
406 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | ||
407 | |||
408 | if (ioapic) { | ||
409 | kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); | ||
410 | kvm->arch.vioapic = NULL; | ||
411 | kfree(ioapic); | ||
412 | } | ||
413 | } | ||
414 | |||
382 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | 415 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) |
383 | { | 416 | { |
384 | struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); | 417 | struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); |
@@ -399,6 +432,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | |||
399 | 432 | ||
400 | mutex_lock(&ioapic->lock); | 433 | mutex_lock(&ioapic->lock); |
401 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); | 434 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); |
435 | update_handled_vectors(ioapic); | ||
402 | mutex_unlock(&ioapic->lock); | 436 | mutex_unlock(&ioapic->lock); |
403 | return 0; | 437 | return 0; |
404 | } | 438 | } |
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 419c43b667ab..8a751b78a430 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h | |||
@@ -46,6 +46,7 @@ struct kvm_ioapic { | |||
46 | struct kvm *kvm; | 46 | struct kvm *kvm; |
47 | void (*ack_notifier)(void *opaque, int irq); | 47 | void (*ack_notifier)(void *opaque, int irq); |
48 | struct mutex lock; | 48 | struct mutex lock; |
49 | DECLARE_BITMAP(handled_vectors, 256); | ||
49 | }; | 50 | }; |
50 | 51 | ||
51 | #ifdef DEBUG | 52 | #ifdef DEBUG |
@@ -71,6 +72,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
71 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); | 72 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); |
72 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); | 73 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); |
73 | int kvm_ioapic_init(struct kvm *kvm); | 74 | int kvm_ioapic_init(struct kvm *kvm); |
75 | void kvm_ioapic_destroy(struct kvm *kvm); | ||
74 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); | 76 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); |
75 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); | 77 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); |
76 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | 78 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 15147583abd1..80fd3ad3b2de 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
@@ -32,10 +32,10 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm); | |||
32 | static void kvm_iommu_put_pages(struct kvm *kvm, | 32 | static void kvm_iommu_put_pages(struct kvm *kvm, |
33 | gfn_t base_gfn, unsigned long npages); | 33 | gfn_t base_gfn, unsigned long npages); |
34 | 34 | ||
35 | int kvm_iommu_map_pages(struct kvm *kvm, | 35 | int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) |
36 | gfn_t base_gfn, unsigned long npages) | ||
37 | { | 36 | { |
38 | gfn_t gfn = base_gfn; | 37 | gfn_t gfn = slot->base_gfn; |
38 | unsigned long npages = slot->npages; | ||
39 | pfn_t pfn; | 39 | pfn_t pfn; |
40 | int i, r = 0; | 40 | int i, r = 0; |
41 | struct iommu_domain *domain = kvm->arch.iommu_domain; | 41 | struct iommu_domain *domain = kvm->arch.iommu_domain; |
@@ -54,7 +54,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, | |||
54 | if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) | 54 | if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) |
55 | continue; | 55 | continue; |
56 | 56 | ||
57 | pfn = gfn_to_pfn(kvm, gfn); | 57 | pfn = gfn_to_pfn_memslot(kvm, slot, gfn); |
58 | r = iommu_map_range(domain, | 58 | r = iommu_map_range(domain, |
59 | gfn_to_gpa(gfn), | 59 | gfn_to_gpa(gfn), |
60 | pfn_to_hpa(pfn), | 60 | pfn_to_hpa(pfn), |
@@ -69,17 +69,19 @@ int kvm_iommu_map_pages(struct kvm *kvm, | |||
69 | return 0; | 69 | return 0; |
70 | 70 | ||
71 | unmap_pages: | 71 | unmap_pages: |
72 | kvm_iommu_put_pages(kvm, base_gfn, i); | 72 | kvm_iommu_put_pages(kvm, slot->base_gfn, i); |
73 | return r; | 73 | return r; |
74 | } | 74 | } |
75 | 75 | ||
76 | static int kvm_iommu_map_memslots(struct kvm *kvm) | 76 | static int kvm_iommu_map_memslots(struct kvm *kvm) |
77 | { | 77 | { |
78 | int i, r = 0; | 78 | int i, r = 0; |
79 | struct kvm_memslots *slots; | ||
80 | |||
81 | slots = rcu_dereference(kvm->memslots); | ||
79 | 82 | ||
80 | for (i = 0; i < kvm->nmemslots; i++) { | 83 | for (i = 0; i < slots->nmemslots; i++) { |
81 | r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, | 84 | r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); |
82 | kvm->memslots[i].npages); | ||
83 | if (r) | 85 | if (r) |
84 | break; | 86 | break; |
85 | } | 87 | } |
@@ -104,7 +106,8 @@ int kvm_assign_device(struct kvm *kvm, | |||
104 | 106 | ||
105 | r = iommu_attach_device(domain, &pdev->dev); | 107 | r = iommu_attach_device(domain, &pdev->dev); |
106 | if (r) { | 108 | if (r) { |
107 | printk(KERN_ERR "assign device %x:%x.%x failed", | 109 | printk(KERN_ERR "assign device %x:%x:%x.%x failed", |
110 | pci_domain_nr(pdev->bus), | ||
108 | pdev->bus->number, | 111 | pdev->bus->number, |
109 | PCI_SLOT(pdev->devfn), | 112 | PCI_SLOT(pdev->devfn), |
110 | PCI_FUNC(pdev->devfn)); | 113 | PCI_FUNC(pdev->devfn)); |
@@ -125,7 +128,8 @@ int kvm_assign_device(struct kvm *kvm, | |||
125 | goto out_unmap; | 128 | goto out_unmap; |
126 | } | 129 | } |
127 | 130 | ||
128 | printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", | 131 | printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", |
132 | assigned_dev->host_segnr, | ||
129 | assigned_dev->host_busnr, | 133 | assigned_dev->host_busnr, |
130 | PCI_SLOT(assigned_dev->host_devfn), | 134 | PCI_SLOT(assigned_dev->host_devfn), |
131 | PCI_FUNC(assigned_dev->host_devfn)); | 135 | PCI_FUNC(assigned_dev->host_devfn)); |
@@ -152,7 +156,8 @@ int kvm_deassign_device(struct kvm *kvm, | |||
152 | 156 | ||
153 | iommu_detach_device(domain, &pdev->dev); | 157 | iommu_detach_device(domain, &pdev->dev); |
154 | 158 | ||
155 | printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n", | 159 | printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", |
160 | assigned_dev->host_segnr, | ||
156 | assigned_dev->host_busnr, | 161 | assigned_dev->host_busnr, |
157 | PCI_SLOT(assigned_dev->host_devfn), | 162 | PCI_SLOT(assigned_dev->host_devfn), |
158 | PCI_FUNC(assigned_dev->host_devfn)); | 163 | PCI_FUNC(assigned_dev->host_devfn)); |
@@ -210,10 +215,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm, | |||
210 | static int kvm_iommu_unmap_memslots(struct kvm *kvm) | 215 | static int kvm_iommu_unmap_memslots(struct kvm *kvm) |
211 | { | 216 | { |
212 | int i; | 217 | int i; |
218 | struct kvm_memslots *slots; | ||
219 | |||
220 | slots = rcu_dereference(kvm->memslots); | ||
213 | 221 | ||
214 | for (i = 0; i < kvm->nmemslots; i++) { | 222 | for (i = 0; i < slots->nmemslots; i++) { |
215 | kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, | 223 | kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, |
216 | kvm->memslots[i].npages); | 224 | slots->memslots[i].npages); |
217 | } | 225 | } |
218 | 226 | ||
219 | return 0; | 227 | return 0; |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a944be392d6e..548f9253c195 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -44,6 +44,8 @@ | |||
44 | #include <linux/bitops.h> | 44 | #include <linux/bitops.h> |
45 | #include <linux/spinlock.h> | 45 | #include <linux/spinlock.h> |
46 | #include <linux/compat.h> | 46 | #include <linux/compat.h> |
47 | #include <linux/srcu.h> | ||
48 | #include <linux/hugetlb.h> | ||
47 | 49 | ||
48 | #include <asm/processor.h> | 50 | #include <asm/processor.h> |
49 | #include <asm/io.h> | 51 | #include <asm/io.h> |
@@ -51,9 +53,7 @@ | |||
51 | #include <asm/pgtable.h> | 53 | #include <asm/pgtable.h> |
52 | #include <asm-generic/bitops/le.h> | 54 | #include <asm-generic/bitops/le.h> |
53 | 55 | ||
54 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
55 | #include "coalesced_mmio.h" | 56 | #include "coalesced_mmio.h" |
56 | #endif | ||
57 | 57 | ||
58 | #define CREATE_TRACE_POINTS | 58 | #define CREATE_TRACE_POINTS |
59 | #include <trace/events/kvm.h> | 59 | #include <trace/events/kvm.h> |
@@ -86,6 +86,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | |||
86 | static int hardware_enable_all(void); | 86 | static int hardware_enable_all(void); |
87 | static void hardware_disable_all(void); | 87 | static void hardware_disable_all(void); |
88 | 88 | ||
89 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus); | ||
90 | |||
89 | static bool kvm_rebooting; | 91 | static bool kvm_rebooting; |
90 | 92 | ||
91 | static bool largepages_enabled = true; | 93 | static bool largepages_enabled = true; |
@@ -136,7 +138,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
136 | 138 | ||
137 | zalloc_cpumask_var(&cpus, GFP_ATOMIC); | 139 | zalloc_cpumask_var(&cpus, GFP_ATOMIC); |
138 | 140 | ||
139 | spin_lock(&kvm->requests_lock); | 141 | raw_spin_lock(&kvm->requests_lock); |
140 | me = smp_processor_id(); | 142 | me = smp_processor_id(); |
141 | kvm_for_each_vcpu(i, vcpu, kvm) { | 143 | kvm_for_each_vcpu(i, vcpu, kvm) { |
142 | if (test_and_set_bit(req, &vcpu->requests)) | 144 | if (test_and_set_bit(req, &vcpu->requests)) |
@@ -151,7 +153,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) | |||
151 | smp_call_function_many(cpus, ack_flush, NULL, 1); | 153 | smp_call_function_many(cpus, ack_flush, NULL, 1); |
152 | else | 154 | else |
153 | called = false; | 155 | called = false; |
154 | spin_unlock(&kvm->requests_lock); | 156 | raw_spin_unlock(&kvm->requests_lock); |
155 | free_cpumask_var(cpus); | 157 | free_cpumask_var(cpus); |
156 | return called; | 158 | return called; |
157 | } | 159 | } |
@@ -215,7 +217,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
215 | unsigned long address) | 217 | unsigned long address) |
216 | { | 218 | { |
217 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 219 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
218 | int need_tlb_flush; | 220 | int need_tlb_flush, idx; |
219 | 221 | ||
220 | /* | 222 | /* |
221 | * When ->invalidate_page runs, the linux pte has been zapped | 223 | * When ->invalidate_page runs, the linux pte has been zapped |
@@ -235,10 +237,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, | |||
235 | * pte after kvm_unmap_hva returned, without noticing the page | 237 | * pte after kvm_unmap_hva returned, without noticing the page |
236 | * is going to be freed. | 238 | * is going to be freed. |
237 | */ | 239 | */ |
240 | idx = srcu_read_lock(&kvm->srcu); | ||
238 | spin_lock(&kvm->mmu_lock); | 241 | spin_lock(&kvm->mmu_lock); |
239 | kvm->mmu_notifier_seq++; | 242 | kvm->mmu_notifier_seq++; |
240 | need_tlb_flush = kvm_unmap_hva(kvm, address); | 243 | need_tlb_flush = kvm_unmap_hva(kvm, address); |
241 | spin_unlock(&kvm->mmu_lock); | 244 | spin_unlock(&kvm->mmu_lock); |
245 | srcu_read_unlock(&kvm->srcu, idx); | ||
242 | 246 | ||
243 | /* we've to flush the tlb before the pages can be freed */ | 247 | /* we've to flush the tlb before the pages can be freed */ |
244 | if (need_tlb_flush) | 248 | if (need_tlb_flush) |
@@ -252,11 +256,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, | |||
252 | pte_t pte) | 256 | pte_t pte) |
253 | { | 257 | { |
254 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 258 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
259 | int idx; | ||
255 | 260 | ||
261 | idx = srcu_read_lock(&kvm->srcu); | ||
256 | spin_lock(&kvm->mmu_lock); | 262 | spin_lock(&kvm->mmu_lock); |
257 | kvm->mmu_notifier_seq++; | 263 | kvm->mmu_notifier_seq++; |
258 | kvm_set_spte_hva(kvm, address, pte); | 264 | kvm_set_spte_hva(kvm, address, pte); |
259 | spin_unlock(&kvm->mmu_lock); | 265 | spin_unlock(&kvm->mmu_lock); |
266 | srcu_read_unlock(&kvm->srcu, idx); | ||
260 | } | 267 | } |
261 | 268 | ||
262 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | 269 | static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, |
@@ -265,8 +272,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
265 | unsigned long end) | 272 | unsigned long end) |
266 | { | 273 | { |
267 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 274 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
268 | int need_tlb_flush = 0; | 275 | int need_tlb_flush = 0, idx; |
269 | 276 | ||
277 | idx = srcu_read_lock(&kvm->srcu); | ||
270 | spin_lock(&kvm->mmu_lock); | 278 | spin_lock(&kvm->mmu_lock); |
271 | /* | 279 | /* |
272 | * The count increase must become visible at unlock time as no | 280 | * The count increase must become visible at unlock time as no |
@@ -277,6 +285,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, | |||
277 | for (; start < end; start += PAGE_SIZE) | 285 | for (; start < end; start += PAGE_SIZE) |
278 | need_tlb_flush |= kvm_unmap_hva(kvm, start); | 286 | need_tlb_flush |= kvm_unmap_hva(kvm, start); |
279 | spin_unlock(&kvm->mmu_lock); | 287 | spin_unlock(&kvm->mmu_lock); |
288 | srcu_read_unlock(&kvm->srcu, idx); | ||
280 | 289 | ||
281 | /* we've to flush the tlb before the pages can be freed */ | 290 | /* we've to flush the tlb before the pages can be freed */ |
282 | if (need_tlb_flush) | 291 | if (need_tlb_flush) |
@@ -314,11 +323,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, | |||
314 | unsigned long address) | 323 | unsigned long address) |
315 | { | 324 | { |
316 | struct kvm *kvm = mmu_notifier_to_kvm(mn); | 325 | struct kvm *kvm = mmu_notifier_to_kvm(mn); |
317 | int young; | 326 | int young, idx; |
318 | 327 | ||
328 | idx = srcu_read_lock(&kvm->srcu); | ||
319 | spin_lock(&kvm->mmu_lock); | 329 | spin_lock(&kvm->mmu_lock); |
320 | young = kvm_age_hva(kvm, address); | 330 | young = kvm_age_hva(kvm, address); |
321 | spin_unlock(&kvm->mmu_lock); | 331 | spin_unlock(&kvm->mmu_lock); |
332 | srcu_read_unlock(&kvm->srcu, idx); | ||
322 | 333 | ||
323 | if (young) | 334 | if (young) |
324 | kvm_flush_remote_tlbs(kvm); | 335 | kvm_flush_remote_tlbs(kvm); |
@@ -341,15 +352,26 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | |||
341 | .change_pte = kvm_mmu_notifier_change_pte, | 352 | .change_pte = kvm_mmu_notifier_change_pte, |
342 | .release = kvm_mmu_notifier_release, | 353 | .release = kvm_mmu_notifier_release, |
343 | }; | 354 | }; |
355 | |||
356 | static int kvm_init_mmu_notifier(struct kvm *kvm) | ||
357 | { | ||
358 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; | ||
359 | return mmu_notifier_register(&kvm->mmu_notifier, current->mm); | ||
360 | } | ||
361 | |||
362 | #else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */ | ||
363 | |||
364 | static int kvm_init_mmu_notifier(struct kvm *kvm) | ||
365 | { | ||
366 | return 0; | ||
367 | } | ||
368 | |||
344 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ | 369 | #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ |
345 | 370 | ||
346 | static struct kvm *kvm_create_vm(void) | 371 | static struct kvm *kvm_create_vm(void) |
347 | { | 372 | { |
348 | int r = 0; | 373 | int r = 0, i; |
349 | struct kvm *kvm = kvm_arch_create_vm(); | 374 | struct kvm *kvm = kvm_arch_create_vm(); |
350 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
351 | struct page *page; | ||
352 | #endif | ||
353 | 375 | ||
354 | if (IS_ERR(kvm)) | 376 | if (IS_ERR(kvm)) |
355 | goto out; | 377 | goto out; |
@@ -363,39 +385,35 @@ static struct kvm *kvm_create_vm(void) | |||
363 | INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); | 385 | INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); |
364 | #endif | 386 | #endif |
365 | 387 | ||
366 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 388 | r = -ENOMEM; |
367 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 389 | kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); |
368 | if (!page) { | 390 | if (!kvm->memslots) |
369 | r = -ENOMEM; | ||
370 | goto out_err; | 391 | goto out_err; |
371 | } | 392 | if (init_srcu_struct(&kvm->srcu)) |
372 | kvm->coalesced_mmio_ring = | 393 | goto out_err; |
373 | (struct kvm_coalesced_mmio_ring *)page_address(page); | 394 | for (i = 0; i < KVM_NR_BUSES; i++) { |
374 | #endif | 395 | kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), |
375 | 396 | GFP_KERNEL); | |
376 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 397 | if (!kvm->buses[i]) { |
377 | { | 398 | cleanup_srcu_struct(&kvm->srcu); |
378 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; | ||
379 | r = mmu_notifier_register(&kvm->mmu_notifier, current->mm); | ||
380 | if (r) { | ||
381 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | ||
382 | put_page(page); | ||
383 | #endif | ||
384 | goto out_err; | 399 | goto out_err; |
385 | } | 400 | } |
386 | } | 401 | } |
387 | #endif | 402 | |
403 | r = kvm_init_mmu_notifier(kvm); | ||
404 | if (r) { | ||
405 | cleanup_srcu_struct(&kvm->srcu); | ||
406 | goto out_err; | ||
407 | } | ||
388 | 408 | ||
389 | kvm->mm = current->mm; | 409 | kvm->mm = current->mm; |
390 | atomic_inc(&kvm->mm->mm_count); | 410 | atomic_inc(&kvm->mm->mm_count); |
391 | spin_lock_init(&kvm->mmu_lock); | 411 | spin_lock_init(&kvm->mmu_lock); |
392 | spin_lock_init(&kvm->requests_lock); | 412 | raw_spin_lock_init(&kvm->requests_lock); |
393 | kvm_io_bus_init(&kvm->pio_bus); | ||
394 | kvm_eventfd_init(kvm); | 413 | kvm_eventfd_init(kvm); |
395 | mutex_init(&kvm->lock); | 414 | mutex_init(&kvm->lock); |
396 | mutex_init(&kvm->irq_lock); | 415 | mutex_init(&kvm->irq_lock); |
397 | kvm_io_bus_init(&kvm->mmio_bus); | 416 | mutex_init(&kvm->slots_lock); |
398 | init_rwsem(&kvm->slots_lock); | ||
399 | atomic_set(&kvm->users_count, 1); | 417 | atomic_set(&kvm->users_count, 1); |
400 | spin_lock(&kvm_lock); | 418 | spin_lock(&kvm_lock); |
401 | list_add(&kvm->vm_list, &vm_list); | 419 | list_add(&kvm->vm_list, &vm_list); |
@@ -406,12 +424,12 @@ static struct kvm *kvm_create_vm(void) | |||
406 | out: | 424 | out: |
407 | return kvm; | 425 | return kvm; |
408 | 426 | ||
409 | #if defined(KVM_COALESCED_MMIO_PAGE_OFFSET) || \ | ||
410 | (defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)) | ||
411 | out_err: | 427 | out_err: |
412 | hardware_disable_all(); | 428 | hardware_disable_all(); |
413 | #endif | ||
414 | out_err_nodisable: | 429 | out_err_nodisable: |
430 | for (i = 0; i < KVM_NR_BUSES; i++) | ||
431 | kfree(kvm->buses[i]); | ||
432 | kfree(kvm->memslots); | ||
415 | kfree(kvm); | 433 | kfree(kvm); |
416 | return ERR_PTR(r); | 434 | return ERR_PTR(r); |
417 | } | 435 | } |
@@ -446,13 +464,17 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, | |||
446 | void kvm_free_physmem(struct kvm *kvm) | 464 | void kvm_free_physmem(struct kvm *kvm) |
447 | { | 465 | { |
448 | int i; | 466 | int i; |
467 | struct kvm_memslots *slots = kvm->memslots; | ||
468 | |||
469 | for (i = 0; i < slots->nmemslots; ++i) | ||
470 | kvm_free_physmem_slot(&slots->memslots[i], NULL); | ||
449 | 471 | ||
450 | for (i = 0; i < kvm->nmemslots; ++i) | 472 | kfree(kvm->memslots); |
451 | kvm_free_physmem_slot(&kvm->memslots[i], NULL); | ||
452 | } | 473 | } |
453 | 474 | ||
454 | static void kvm_destroy_vm(struct kvm *kvm) | 475 | static void kvm_destroy_vm(struct kvm *kvm) |
455 | { | 476 | { |
477 | int i; | ||
456 | struct mm_struct *mm = kvm->mm; | 478 | struct mm_struct *mm = kvm->mm; |
457 | 479 | ||
458 | kvm_arch_sync_events(kvm); | 480 | kvm_arch_sync_events(kvm); |
@@ -460,12 +482,9 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
460 | list_del(&kvm->vm_list); | 482 | list_del(&kvm->vm_list); |
461 | spin_unlock(&kvm_lock); | 483 | spin_unlock(&kvm_lock); |
462 | kvm_free_irq_routing(kvm); | 484 | kvm_free_irq_routing(kvm); |
463 | kvm_io_bus_destroy(&kvm->pio_bus); | 485 | for (i = 0; i < KVM_NR_BUSES; i++) |
464 | kvm_io_bus_destroy(&kvm->mmio_bus); | 486 | kvm_io_bus_destroy(kvm->buses[i]); |
465 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 487 | kvm_coalesced_mmio_free(kvm); |
466 | if (kvm->coalesced_mmio_ring != NULL) | ||
467 | free_page((unsigned long)kvm->coalesced_mmio_ring); | ||
468 | #endif | ||
469 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 488 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) |
470 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); | 489 | mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); |
471 | #else | 490 | #else |
@@ -512,12 +531,13 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
512 | struct kvm_userspace_memory_region *mem, | 531 | struct kvm_userspace_memory_region *mem, |
513 | int user_alloc) | 532 | int user_alloc) |
514 | { | 533 | { |
515 | int r; | 534 | int r, flush_shadow = 0; |
516 | gfn_t base_gfn; | 535 | gfn_t base_gfn; |
517 | unsigned long npages; | 536 | unsigned long npages; |
518 | unsigned long i; | 537 | unsigned long i; |
519 | struct kvm_memory_slot *memslot; | 538 | struct kvm_memory_slot *memslot; |
520 | struct kvm_memory_slot old, new; | 539 | struct kvm_memory_slot old, new; |
540 | struct kvm_memslots *slots, *old_memslots; | ||
521 | 541 | ||
522 | r = -EINVAL; | 542 | r = -EINVAL; |
523 | /* General sanity checks */ | 543 | /* General sanity checks */ |
@@ -532,7 +552,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
532 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) | 552 | if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) |
533 | goto out; | 553 | goto out; |
534 | 554 | ||
535 | memslot = &kvm->memslots[mem->slot]; | 555 | memslot = &kvm->memslots->memslots[mem->slot]; |
536 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; | 556 | base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; |
537 | npages = mem->memory_size >> PAGE_SHIFT; | 557 | npages = mem->memory_size >> PAGE_SHIFT; |
538 | 558 | ||
@@ -553,7 +573,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
553 | /* Check for overlaps */ | 573 | /* Check for overlaps */ |
554 | r = -EEXIST; | 574 | r = -EEXIST; |
555 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 575 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
556 | struct kvm_memory_slot *s = &kvm->memslots[i]; | 576 | struct kvm_memory_slot *s = &kvm->memslots->memslots[i]; |
557 | 577 | ||
558 | if (s == memslot || !s->npages) | 578 | if (s == memslot || !s->npages) |
559 | continue; | 579 | continue; |
@@ -579,15 +599,7 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
579 | memset(new.rmap, 0, npages * sizeof(*new.rmap)); | 599 | memset(new.rmap, 0, npages * sizeof(*new.rmap)); |
580 | 600 | ||
581 | new.user_alloc = user_alloc; | 601 | new.user_alloc = user_alloc; |
582 | /* | 602 | new.userspace_addr = mem->userspace_addr; |
583 | * hva_to_rmmap() serialzies with the mmu_lock and to be | ||
584 | * safe it has to ignore memslots with !user_alloc && | ||
585 | * !userspace_addr. | ||
586 | */ | ||
587 | if (user_alloc) | ||
588 | new.userspace_addr = mem->userspace_addr; | ||
589 | else | ||
590 | new.userspace_addr = 0; | ||
591 | } | 603 | } |
592 | if (!npages) | 604 | if (!npages) |
593 | goto skip_lpage; | 605 | goto skip_lpage; |
@@ -642,8 +654,9 @@ skip_lpage: | |||
642 | if (!new.dirty_bitmap) | 654 | if (!new.dirty_bitmap) |
643 | goto out_free; | 655 | goto out_free; |
644 | memset(new.dirty_bitmap, 0, dirty_bytes); | 656 | memset(new.dirty_bitmap, 0, dirty_bytes); |
657 | /* destroy any largepage mappings for dirty tracking */ | ||
645 | if (old.npages) | 658 | if (old.npages) |
646 | kvm_arch_flush_shadow(kvm); | 659 | flush_shadow = 1; |
647 | } | 660 | } |
648 | #else /* not defined CONFIG_S390 */ | 661 | #else /* not defined CONFIG_S390 */ |
649 | new.user_alloc = user_alloc; | 662 | new.user_alloc = user_alloc; |
@@ -651,36 +664,72 @@ skip_lpage: | |||
651 | new.userspace_addr = mem->userspace_addr; | 664 | new.userspace_addr = mem->userspace_addr; |
652 | #endif /* not defined CONFIG_S390 */ | 665 | #endif /* not defined CONFIG_S390 */ |
653 | 666 | ||
654 | if (!npages) | 667 | if (!npages) { |
668 | r = -ENOMEM; | ||
669 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | ||
670 | if (!slots) | ||
671 | goto out_free; | ||
672 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | ||
673 | if (mem->slot >= slots->nmemslots) | ||
674 | slots->nmemslots = mem->slot + 1; | ||
675 | slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; | ||
676 | |||
677 | old_memslots = kvm->memslots; | ||
678 | rcu_assign_pointer(kvm->memslots, slots); | ||
679 | synchronize_srcu_expedited(&kvm->srcu); | ||
680 | /* From this point no new shadow pages pointing to a deleted | ||
681 | * memslot will be created. | ||
682 | * | ||
683 | * validation of sp->gfn happens in: | ||
684 | * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) | ||
685 | * - kvm_is_visible_gfn (mmu_check_roots) | ||
686 | */ | ||
655 | kvm_arch_flush_shadow(kvm); | 687 | kvm_arch_flush_shadow(kvm); |
688 | kfree(old_memslots); | ||
689 | } | ||
656 | 690 | ||
657 | spin_lock(&kvm->mmu_lock); | 691 | r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc); |
658 | if (mem->slot >= kvm->nmemslots) | 692 | if (r) |
659 | kvm->nmemslots = mem->slot + 1; | ||
660 | |||
661 | *memslot = new; | ||
662 | spin_unlock(&kvm->mmu_lock); | ||
663 | |||
664 | r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); | ||
665 | if (r) { | ||
666 | spin_lock(&kvm->mmu_lock); | ||
667 | *memslot = old; | ||
668 | spin_unlock(&kvm->mmu_lock); | ||
669 | goto out_free; | 693 | goto out_free; |
670 | } | ||
671 | 694 | ||
672 | kvm_free_physmem_slot(&old, npages ? &new : NULL); | ||
673 | /* Slot deletion case: we have to update the current slot */ | ||
674 | spin_lock(&kvm->mmu_lock); | ||
675 | if (!npages) | ||
676 | *memslot = old; | ||
677 | spin_unlock(&kvm->mmu_lock); | ||
678 | #ifdef CONFIG_DMAR | 695 | #ifdef CONFIG_DMAR |
679 | /* map the pages in iommu page table */ | 696 | /* map the pages in iommu page table */ |
680 | r = kvm_iommu_map_pages(kvm, base_gfn, npages); | 697 | if (npages) { |
681 | if (r) | 698 | r = kvm_iommu_map_pages(kvm, &new); |
682 | goto out; | 699 | if (r) |
700 | goto out_free; | ||
701 | } | ||
683 | #endif | 702 | #endif |
703 | |||
704 | r = -ENOMEM; | ||
705 | slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); | ||
706 | if (!slots) | ||
707 | goto out_free; | ||
708 | memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); | ||
709 | if (mem->slot >= slots->nmemslots) | ||
710 | slots->nmemslots = mem->slot + 1; | ||
711 | |||
712 | /* actual memory is freed via old in kvm_free_physmem_slot below */ | ||
713 | if (!npages) { | ||
714 | new.rmap = NULL; | ||
715 | new.dirty_bitmap = NULL; | ||
716 | for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) | ||
717 | new.lpage_info[i] = NULL; | ||
718 | } | ||
719 | |||
720 | slots->memslots[mem->slot] = new; | ||
721 | old_memslots = kvm->memslots; | ||
722 | rcu_assign_pointer(kvm->memslots, slots); | ||
723 | synchronize_srcu_expedited(&kvm->srcu); | ||
724 | |||
725 | kvm_arch_commit_memory_region(kvm, mem, old, user_alloc); | ||
726 | |||
727 | kvm_free_physmem_slot(&old, &new); | ||
728 | kfree(old_memslots); | ||
729 | |||
730 | if (flush_shadow) | ||
731 | kvm_arch_flush_shadow(kvm); | ||
732 | |||
684 | return 0; | 733 | return 0; |
685 | 734 | ||
686 | out_free: | 735 | out_free: |
@@ -697,9 +746,9 @@ int kvm_set_memory_region(struct kvm *kvm, | |||
697 | { | 746 | { |
698 | int r; | 747 | int r; |
699 | 748 | ||
700 | down_write(&kvm->slots_lock); | 749 | mutex_lock(&kvm->slots_lock); |
701 | r = __kvm_set_memory_region(kvm, mem, user_alloc); | 750 | r = __kvm_set_memory_region(kvm, mem, user_alloc); |
702 | up_write(&kvm->slots_lock); | 751 | mutex_unlock(&kvm->slots_lock); |
703 | return r; | 752 | return r; |
704 | } | 753 | } |
705 | EXPORT_SYMBOL_GPL(kvm_set_memory_region); | 754 | EXPORT_SYMBOL_GPL(kvm_set_memory_region); |
@@ -726,7 +775,7 @@ int kvm_get_dirty_log(struct kvm *kvm, | |||
726 | if (log->slot >= KVM_MEMORY_SLOTS) | 775 | if (log->slot >= KVM_MEMORY_SLOTS) |
727 | goto out; | 776 | goto out; |
728 | 777 | ||
729 | memslot = &kvm->memslots[log->slot]; | 778 | memslot = &kvm->memslots->memslots[log->slot]; |
730 | r = -ENOENT; | 779 | r = -ENOENT; |
731 | if (!memslot->dirty_bitmap) | 780 | if (!memslot->dirty_bitmap) |
732 | goto out; | 781 | goto out; |
@@ -780,9 +829,10 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva); | |||
780 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) | 829 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn) |
781 | { | 830 | { |
782 | int i; | 831 | int i; |
832 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | ||
783 | 833 | ||
784 | for (i = 0; i < kvm->nmemslots; ++i) { | 834 | for (i = 0; i < slots->nmemslots; ++i) { |
785 | struct kvm_memory_slot *memslot = &kvm->memslots[i]; | 835 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
786 | 836 | ||
787 | if (gfn >= memslot->base_gfn | 837 | if (gfn >= memslot->base_gfn |
788 | && gfn < memslot->base_gfn + memslot->npages) | 838 | && gfn < memslot->base_gfn + memslot->npages) |
@@ -801,10 +851,14 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) | |||
801 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | 851 | int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) |
802 | { | 852 | { |
803 | int i; | 853 | int i; |
854 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | ||
804 | 855 | ||
805 | gfn = unalias_gfn(kvm, gfn); | 856 | gfn = unalias_gfn_instantiation(kvm, gfn); |
806 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { | 857 | for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { |
807 | struct kvm_memory_slot *memslot = &kvm->memslots[i]; | 858 | struct kvm_memory_slot *memslot = &slots->memslots[i]; |
859 | |||
860 | if (memslot->flags & KVM_MEMSLOT_INVALID) | ||
861 | continue; | ||
808 | 862 | ||
809 | if (gfn >= memslot->base_gfn | 863 | if (gfn >= memslot->base_gfn |
810 | && gfn < memslot->base_gfn + memslot->npages) | 864 | && gfn < memslot->base_gfn + memslot->npages) |
@@ -814,33 +868,68 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) | |||
814 | } | 868 | } |
815 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); | 869 | EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); |
816 | 870 | ||
871 | unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) | ||
872 | { | ||
873 | struct vm_area_struct *vma; | ||
874 | unsigned long addr, size; | ||
875 | |||
876 | size = PAGE_SIZE; | ||
877 | |||
878 | addr = gfn_to_hva(kvm, gfn); | ||
879 | if (kvm_is_error_hva(addr)) | ||
880 | return PAGE_SIZE; | ||
881 | |||
882 | down_read(¤t->mm->mmap_sem); | ||
883 | vma = find_vma(current->mm, addr); | ||
884 | if (!vma) | ||
885 | goto out; | ||
886 | |||
887 | size = vma_kernel_pagesize(vma); | ||
888 | |||
889 | out: | ||
890 | up_read(¤t->mm->mmap_sem); | ||
891 | |||
892 | return size; | ||
893 | } | ||
894 | |||
895 | int memslot_id(struct kvm *kvm, gfn_t gfn) | ||
896 | { | ||
897 | int i; | ||
898 | struct kvm_memslots *slots = rcu_dereference(kvm->memslots); | ||
899 | struct kvm_memory_slot *memslot = NULL; | ||
900 | |||
901 | gfn = unalias_gfn(kvm, gfn); | ||
902 | for (i = 0; i < slots->nmemslots; ++i) { | ||
903 | memslot = &slots->memslots[i]; | ||
904 | |||
905 | if (gfn >= memslot->base_gfn | ||
906 | && gfn < memslot->base_gfn + memslot->npages) | ||
907 | break; | ||
908 | } | ||
909 | |||
910 | return memslot - slots->memslots; | ||
911 | } | ||
912 | |||
817 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | 913 | unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) |
818 | { | 914 | { |
819 | struct kvm_memory_slot *slot; | 915 | struct kvm_memory_slot *slot; |
820 | 916 | ||
821 | gfn = unalias_gfn(kvm, gfn); | 917 | gfn = unalias_gfn_instantiation(kvm, gfn); |
822 | slot = gfn_to_memslot_unaliased(kvm, gfn); | 918 | slot = gfn_to_memslot_unaliased(kvm, gfn); |
823 | if (!slot) | 919 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID) |
824 | return bad_hva(); | 920 | return bad_hva(); |
825 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); | 921 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); |
826 | } | 922 | } |
827 | EXPORT_SYMBOL_GPL(gfn_to_hva); | 923 | EXPORT_SYMBOL_GPL(gfn_to_hva); |
828 | 924 | ||
829 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | 925 | static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr) |
830 | { | 926 | { |
831 | struct page *page[1]; | 927 | struct page *page[1]; |
832 | unsigned long addr; | ||
833 | int npages; | 928 | int npages; |
834 | pfn_t pfn; | 929 | pfn_t pfn; |
835 | 930 | ||
836 | might_sleep(); | 931 | might_sleep(); |
837 | 932 | ||
838 | addr = gfn_to_hva(kvm, gfn); | ||
839 | if (kvm_is_error_hva(addr)) { | ||
840 | get_page(bad_page); | ||
841 | return page_to_pfn(bad_page); | ||
842 | } | ||
843 | |||
844 | npages = get_user_pages_fast(addr, 1, 1, page); | 933 | npages = get_user_pages_fast(addr, 1, 1, page); |
845 | 934 | ||
846 | if (unlikely(npages != 1)) { | 935 | if (unlikely(npages != 1)) { |
@@ -865,8 +954,32 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | |||
865 | return pfn; | 954 | return pfn; |
866 | } | 955 | } |
867 | 956 | ||
957 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | ||
958 | { | ||
959 | unsigned long addr; | ||
960 | |||
961 | addr = gfn_to_hva(kvm, gfn); | ||
962 | if (kvm_is_error_hva(addr)) { | ||
963 | get_page(bad_page); | ||
964 | return page_to_pfn(bad_page); | ||
965 | } | ||
966 | |||
967 | return hva_to_pfn(kvm, addr); | ||
968 | } | ||
868 | EXPORT_SYMBOL_GPL(gfn_to_pfn); | 969 | EXPORT_SYMBOL_GPL(gfn_to_pfn); |
869 | 970 | ||
971 | static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) | ||
972 | { | ||
973 | return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); | ||
974 | } | ||
975 | |||
976 | pfn_t gfn_to_pfn_memslot(struct kvm *kvm, | ||
977 | struct kvm_memory_slot *slot, gfn_t gfn) | ||
978 | { | ||
979 | unsigned long addr = gfn_to_hva_memslot(slot, gfn); | ||
980 | return hva_to_pfn(kvm, addr); | ||
981 | } | ||
982 | |||
870 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | 983 | struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) |
871 | { | 984 | { |
872 | pfn_t pfn; | 985 | pfn_t pfn; |
@@ -1854,12 +1967,7 @@ static struct notifier_block kvm_reboot_notifier = { | |||
1854 | .priority = 0, | 1967 | .priority = 0, |
1855 | }; | 1968 | }; |
1856 | 1969 | ||
1857 | void kvm_io_bus_init(struct kvm_io_bus *bus) | 1970 | static void kvm_io_bus_destroy(struct kvm_io_bus *bus) |
1858 | { | ||
1859 | memset(bus, 0, sizeof(*bus)); | ||
1860 | } | ||
1861 | |||
1862 | void kvm_io_bus_destroy(struct kvm_io_bus *bus) | ||
1863 | { | 1971 | { |
1864 | int i; | 1972 | int i; |
1865 | 1973 | ||
@@ -1868,13 +1976,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus) | |||
1868 | 1976 | ||
1869 | kvm_iodevice_destructor(pos); | 1977 | kvm_iodevice_destructor(pos); |
1870 | } | 1978 | } |
1979 | kfree(bus); | ||
1871 | } | 1980 | } |
1872 | 1981 | ||
1873 | /* kvm_io_bus_write - called under kvm->slots_lock */ | 1982 | /* kvm_io_bus_write - called under kvm->slots_lock */ |
1874 | int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, | 1983 | int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
1875 | int len, const void *val) | 1984 | int len, const void *val) |
1876 | { | 1985 | { |
1877 | int i; | 1986 | int i; |
1987 | struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); | ||
1878 | for (i = 0; i < bus->dev_count; i++) | 1988 | for (i = 0; i < bus->dev_count; i++) |
1879 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) | 1989 | if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) |
1880 | return 0; | 1990 | return 0; |
@@ -1882,59 +1992,71 @@ int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, | |||
1882 | } | 1992 | } |
1883 | 1993 | ||
1884 | /* kvm_io_bus_read - called under kvm->slots_lock */ | 1994 | /* kvm_io_bus_read - called under kvm->slots_lock */ |
1885 | int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val) | 1995 | int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, |
1996 | int len, void *val) | ||
1886 | { | 1997 | { |
1887 | int i; | 1998 | int i; |
1999 | struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]); | ||
2000 | |||
1888 | for (i = 0; i < bus->dev_count; i++) | 2001 | for (i = 0; i < bus->dev_count; i++) |
1889 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) | 2002 | if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) |
1890 | return 0; | 2003 | return 0; |
1891 | return -EOPNOTSUPP; | 2004 | return -EOPNOTSUPP; |
1892 | } | 2005 | } |
1893 | 2006 | ||
1894 | int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, | 2007 | /* Caller must hold slots_lock. */ |
1895 | struct kvm_io_device *dev) | 2008 | int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
2009 | struct kvm_io_device *dev) | ||
1896 | { | 2010 | { |
1897 | int ret; | 2011 | struct kvm_io_bus *new_bus, *bus; |
1898 | |||
1899 | down_write(&kvm->slots_lock); | ||
1900 | ret = __kvm_io_bus_register_dev(bus, dev); | ||
1901 | up_write(&kvm->slots_lock); | ||
1902 | 2012 | ||
1903 | return ret; | 2013 | bus = kvm->buses[bus_idx]; |
1904 | } | ||
1905 | |||
1906 | /* An unlocked version. Caller must have write lock on slots_lock. */ | ||
1907 | int __kvm_io_bus_register_dev(struct kvm_io_bus *bus, | ||
1908 | struct kvm_io_device *dev) | ||
1909 | { | ||
1910 | if (bus->dev_count > NR_IOBUS_DEVS-1) | 2014 | if (bus->dev_count > NR_IOBUS_DEVS-1) |
1911 | return -ENOSPC; | 2015 | return -ENOSPC; |
1912 | 2016 | ||
1913 | bus->devs[bus->dev_count++] = dev; | 2017 | new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); |
2018 | if (!new_bus) | ||
2019 | return -ENOMEM; | ||
2020 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); | ||
2021 | new_bus->devs[new_bus->dev_count++] = dev; | ||
2022 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | ||
2023 | synchronize_srcu_expedited(&kvm->srcu); | ||
2024 | kfree(bus); | ||
1914 | 2025 | ||
1915 | return 0; | 2026 | return 0; |
1916 | } | 2027 | } |
1917 | 2028 | ||
1918 | void kvm_io_bus_unregister_dev(struct kvm *kvm, | 2029 | /* Caller must hold slots_lock. */ |
1919 | struct kvm_io_bus *bus, | 2030 | int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, |
1920 | struct kvm_io_device *dev) | 2031 | struct kvm_io_device *dev) |
1921 | { | 2032 | { |
1922 | down_write(&kvm->slots_lock); | 2033 | int i, r; |
1923 | __kvm_io_bus_unregister_dev(bus, dev); | 2034 | struct kvm_io_bus *new_bus, *bus; |
1924 | up_write(&kvm->slots_lock); | ||
1925 | } | ||
1926 | 2035 | ||
1927 | /* An unlocked version. Caller must have write lock on slots_lock. */ | 2036 | new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); |
1928 | void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, | 2037 | if (!new_bus) |
1929 | struct kvm_io_device *dev) | 2038 | return -ENOMEM; |
1930 | { | ||
1931 | int i; | ||
1932 | 2039 | ||
1933 | for (i = 0; i < bus->dev_count; i++) | 2040 | bus = kvm->buses[bus_idx]; |
1934 | if (bus->devs[i] == dev) { | 2041 | memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); |
1935 | bus->devs[i] = bus->devs[--bus->dev_count]; | 2042 | |
2043 | r = -ENOENT; | ||
2044 | for (i = 0; i < new_bus->dev_count; i++) | ||
2045 | if (new_bus->devs[i] == dev) { | ||
2046 | r = 0; | ||
2047 | new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; | ||
1936 | break; | 2048 | break; |
1937 | } | 2049 | } |
2050 | |||
2051 | if (r) { | ||
2052 | kfree(new_bus); | ||
2053 | return r; | ||
2054 | } | ||
2055 | |||
2056 | rcu_assign_pointer(kvm->buses[bus_idx], new_bus); | ||
2057 | synchronize_srcu_expedited(&kvm->srcu); | ||
2058 | kfree(bus); | ||
2059 | return r; | ||
1938 | } | 2060 | } |
1939 | 2061 | ||
1940 | static struct notifier_block kvm_cpu_notifier = { | 2062 | static struct notifier_block kvm_cpu_notifier = { |