diff options
63 files changed, 3432 insertions, 1232 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 57975bda9201..52702b057c02 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -2448,7 +2448,14 @@ S: Supported | |||
| 2448 | 2448 | ||
| 2449 | KERNEL VIRTUAL MACHINE (KVM) | 2449 | KERNEL VIRTUAL MACHINE (KVM) |
| 2450 | P: Avi Kivity | 2450 | P: Avi Kivity |
| 2451 | M: avi@qumranet.com | 2451 | M: avi@redhat.com |
| 2452 | L: kvm@vger.kernel.org | ||
| 2453 | W: http://kvm.qumranet.com | ||
| 2454 | S: Supported | ||
| 2455 | |||
| 2456 | KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V | ||
| 2457 | P: Joerg Roedel | ||
| 2458 | M: joerg.roedel@amd.com | ||
| 2452 | L: kvm@vger.kernel.org | 2459 | L: kvm@vger.kernel.org |
| 2453 | W: http://kvm.qumranet.com | 2460 | W: http://kvm.qumranet.com |
| 2454 | S: Supported | 2461 | S: Supported |
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 1efe513a9941..85db124d37f6 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h | |||
| @@ -132,7 +132,7 @@ | |||
| 132 | #define GPFN_IOSAPIC (4UL << 60) /* IOSAPIC base */ | 132 | #define GPFN_IOSAPIC (4UL << 60) /* IOSAPIC base */ |
| 133 | #define GPFN_LEGACY_IO (5UL << 60) /* Legacy I/O base */ | 133 | #define GPFN_LEGACY_IO (5UL << 60) /* Legacy I/O base */ |
| 134 | #define GPFN_GFW (6UL << 60) /* Guest Firmware */ | 134 | #define GPFN_GFW (6UL << 60) /* Guest Firmware */ |
| 135 | #define GPFN_HIGH_MMIO (7UL << 60) /* High MMIO range */ | 135 | #define GPFN_PHYS_MMIO (7UL << 60) /* Directed MMIO Range */ |
| 136 | 136 | ||
| 137 | #define GPFN_IO_MASK (7UL << 60) /* Guest pfn is I/O type */ | 137 | #define GPFN_IO_MASK (7UL << 60) /* Guest pfn is I/O type */ |
| 138 | #define GPFN_INV_MASK (1UL << 63) /* Guest pfn is invalid */ | 138 | #define GPFN_INV_MASK (1UL << 63) /* Guest pfn is invalid */ |
| @@ -413,6 +413,10 @@ struct kvm_arch { | |||
| 413 | struct kvm_ioapic *vioapic; | 413 | struct kvm_ioapic *vioapic; |
| 414 | struct kvm_vm_stat stat; | 414 | struct kvm_vm_stat stat; |
| 415 | struct kvm_sal_data rdv_sal_data; | 415 | struct kvm_sal_data rdv_sal_data; |
| 416 | |||
| 417 | struct list_head assigned_dev_head; | ||
| 418 | struct dmar_domain *intel_iommu_domain; | ||
| 419 | struct hlist_head irq_ack_notifier_list; | ||
| 416 | }; | 420 | }; |
| 417 | 421 | ||
| 418 | union cpuid3_t { | 422 | union cpuid3_t { |
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index 7914e4828504..8e99fed6b3fd 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig | |||
| @@ -46,4 +46,6 @@ config KVM_INTEL | |||
| 46 | config KVM_TRACE | 46 | config KVM_TRACE |
| 47 | bool | 47 | bool |
| 48 | 48 | ||
| 49 | source drivers/virtio/Kconfig | ||
| 50 | |||
| 49 | endif # VIRTUALIZATION | 51 | endif # VIRTUALIZATION |
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index bf22fb9e6dcf..cf37f8f490c0 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile | |||
| @@ -44,7 +44,11 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ | |||
| 44 | EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ | 44 | EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ |
| 45 | 45 | ||
| 46 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 46 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ |
| 47 | coalesced_mmio.o) | 47 | coalesced_mmio.o irq_comm.o) |
| 48 | |||
| 49 | ifeq ($(CONFIG_DMAR),y) | ||
| 50 | common-objs += $(addprefix ../../../virt/kvm/, vtd.o) | ||
| 51 | endif | ||
| 48 | 52 | ||
| 49 | kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o | 53 | kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o |
| 50 | obj-$(CONFIG_KVM) += kvm.o | 54 | obj-$(CONFIG_KVM) += kvm.o |
diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h new file mode 100644 index 000000000000..c6786e8b1bf4 --- /dev/null +++ b/arch/ia64/kvm/irq.h | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | /* | ||
| 2 | * irq.h: In-kernel interrupt controller related definitions | ||
| 3 | * Copyright (c) 2008, Intel Corporation. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms and conditions of the GNU General Public License, | ||
| 7 | * version 2, as published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License along with | ||
| 15 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
| 16 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
| 17 | * | ||
| 18 | * Authors: | ||
| 19 | * Xiantao Zhang <xiantao.zhang@intel.com> | ||
| 20 | * | ||
| 21 | */ | ||
| 22 | |||
| 23 | #ifndef __IRQ_H | ||
| 24 | #define __IRQ_H | ||
| 25 | |||
| 26 | static inline int irqchip_in_kernel(struct kvm *kvm) | ||
| 27 | { | ||
| 28 | return 1; | ||
| 29 | } | ||
| 30 | |||
| 31 | #endif | ||
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index cd0d1a7284b7..c0699f0e35a9 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/bitops.h> | 31 | #include <linux/bitops.h> |
| 32 | #include <linux/hrtimer.h> | 32 | #include <linux/hrtimer.h> |
| 33 | #include <linux/uaccess.h> | 33 | #include <linux/uaccess.h> |
| 34 | #include <linux/intel-iommu.h> | ||
| 34 | 35 | ||
| 35 | #include <asm/pgtable.h> | 36 | #include <asm/pgtable.h> |
| 36 | #include <asm/gcc_intrin.h> | 37 | #include <asm/gcc_intrin.h> |
| @@ -45,6 +46,7 @@ | |||
| 45 | #include "iodev.h" | 46 | #include "iodev.h" |
| 46 | #include "ioapic.h" | 47 | #include "ioapic.h" |
| 47 | #include "lapic.h" | 48 | #include "lapic.h" |
| 49 | #include "irq.h" | ||
| 48 | 50 | ||
| 49 | static unsigned long kvm_vmm_base; | 51 | static unsigned long kvm_vmm_base; |
| 50 | static unsigned long kvm_vsa_base; | 52 | static unsigned long kvm_vsa_base; |
| @@ -179,12 +181,16 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 179 | switch (ext) { | 181 | switch (ext) { |
| 180 | case KVM_CAP_IRQCHIP: | 182 | case KVM_CAP_IRQCHIP: |
| 181 | case KVM_CAP_USER_MEMORY: | 183 | case KVM_CAP_USER_MEMORY: |
| 184 | case KVM_CAP_MP_STATE: | ||
| 182 | 185 | ||
| 183 | r = 1; | 186 | r = 1; |
| 184 | break; | 187 | break; |
| 185 | case KVM_CAP_COALESCED_MMIO: | 188 | case KVM_CAP_COALESCED_MMIO: |
| 186 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; | 189 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; |
| 187 | break; | 190 | break; |
| 191 | case KVM_CAP_IOMMU: | ||
| 192 | r = intel_iommu_found(); | ||
| 193 | break; | ||
| 188 | default: | 194 | default: |
| 189 | r = 0; | 195 | r = 0; |
| 190 | } | 196 | } |
| @@ -771,6 +777,7 @@ static void kvm_init_vm(struct kvm *kvm) | |||
| 771 | */ | 777 | */ |
| 772 | kvm_build_io_pmt(kvm); | 778 | kvm_build_io_pmt(kvm); |
| 773 | 779 | ||
| 780 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | ||
| 774 | } | 781 | } |
| 775 | 782 | ||
| 776 | struct kvm *kvm_arch_create_vm(void) | 783 | struct kvm *kvm_arch_create_vm(void) |
| @@ -1334,6 +1341,10 @@ static void kvm_release_vm_pages(struct kvm *kvm) | |||
| 1334 | 1341 | ||
| 1335 | void kvm_arch_destroy_vm(struct kvm *kvm) | 1342 | void kvm_arch_destroy_vm(struct kvm *kvm) |
| 1336 | { | 1343 | { |
| 1344 | kvm_iommu_unmap_guest(kvm); | ||
| 1345 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
| 1346 | kvm_free_all_assigned_devices(kvm); | ||
| 1347 | #endif | ||
| 1337 | kfree(kvm->arch.vioapic); | 1348 | kfree(kvm->arch.vioapic); |
| 1338 | kvm_release_vm_pages(kvm); | 1349 | kvm_release_vm_pages(kvm); |
| 1339 | kvm_free_physmem(kvm); | 1350 | kvm_free_physmem(kvm); |
| @@ -1435,17 +1446,24 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
| 1435 | int user_alloc) | 1446 | int user_alloc) |
| 1436 | { | 1447 | { |
| 1437 | unsigned long i; | 1448 | unsigned long i; |
| 1438 | struct page *page; | 1449 | unsigned long pfn; |
| 1439 | int npages = mem->memory_size >> PAGE_SHIFT; | 1450 | int npages = mem->memory_size >> PAGE_SHIFT; |
| 1440 | struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; | 1451 | struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; |
| 1441 | unsigned long base_gfn = memslot->base_gfn; | 1452 | unsigned long base_gfn = memslot->base_gfn; |
| 1442 | 1453 | ||
| 1443 | for (i = 0; i < npages; i++) { | 1454 | for (i = 0; i < npages; i++) { |
| 1444 | page = gfn_to_page(kvm, base_gfn + i); | 1455 | pfn = gfn_to_pfn(kvm, base_gfn + i); |
| 1445 | kvm_set_pmt_entry(kvm, base_gfn + i, | 1456 | if (!kvm_is_mmio_pfn(pfn)) { |
| 1446 | page_to_pfn(page) << PAGE_SHIFT, | 1457 | kvm_set_pmt_entry(kvm, base_gfn + i, |
| 1447 | _PAGE_AR_RWX|_PAGE_MA_WB); | 1458 | pfn << PAGE_SHIFT, |
| 1448 | memslot->rmap[i] = (unsigned long)page; | 1459 | _PAGE_AR_RWX | _PAGE_MA_WB); |
| 1460 | memslot->rmap[i] = (unsigned long)pfn_to_page(pfn); | ||
| 1461 | } else { | ||
| 1462 | kvm_set_pmt_entry(kvm, base_gfn + i, | ||
| 1463 | GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT), | ||
| 1464 | _PAGE_MA_UC); | ||
| 1465 | memslot->rmap[i] = 0; | ||
| 1466 | } | ||
| 1449 | } | 1467 | } |
| 1450 | 1468 | ||
| 1451 | return 0; | 1469 | return 0; |
| @@ -1789,11 +1807,43 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
| 1789 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, | 1807 | int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, |
| 1790 | struct kvm_mp_state *mp_state) | 1808 | struct kvm_mp_state *mp_state) |
| 1791 | { | 1809 | { |
| 1792 | return -EINVAL; | 1810 | vcpu_load(vcpu); |
| 1811 | mp_state->mp_state = vcpu->arch.mp_state; | ||
| 1812 | vcpu_put(vcpu); | ||
| 1813 | return 0; | ||
| 1814 | } | ||
| 1815 | |||
| 1816 | static int vcpu_reset(struct kvm_vcpu *vcpu) | ||
| 1817 | { | ||
| 1818 | int r; | ||
| 1819 | long psr; | ||
| 1820 | local_irq_save(psr); | ||
| 1821 | r = kvm_insert_vmm_mapping(vcpu); | ||
| 1822 | if (r) | ||
| 1823 | goto fail; | ||
| 1824 | |||
| 1825 | vcpu->arch.launched = 0; | ||
| 1826 | kvm_arch_vcpu_uninit(vcpu); | ||
| 1827 | r = kvm_arch_vcpu_init(vcpu); | ||
| 1828 | if (r) | ||
| 1829 | goto fail; | ||
| 1830 | |||
| 1831 | kvm_purge_vmm_mapping(vcpu); | ||
| 1832 | r = 0; | ||
| 1833 | fail: | ||
| 1834 | local_irq_restore(psr); | ||
| 1835 | return r; | ||
| 1793 | } | 1836 | } |
| 1794 | 1837 | ||
| 1795 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | 1838 | int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, |
| 1796 | struct kvm_mp_state *mp_state) | 1839 | struct kvm_mp_state *mp_state) |
| 1797 | { | 1840 | { |
| 1798 | return -EINVAL; | 1841 | int r = 0; |
| 1842 | |||
| 1843 | vcpu_load(vcpu); | ||
| 1844 | vcpu->arch.mp_state = mp_state->mp_state; | ||
| 1845 | if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) | ||
| 1846 | r = vcpu_reset(vcpu); | ||
| 1847 | vcpu_put(vcpu); | ||
| 1848 | return r; | ||
| 1799 | } | 1849 | } |
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h index 13980d9b8bcf..2cc41d17cf99 100644 --- a/arch/ia64/kvm/kvm_minstate.h +++ b/arch/ia64/kvm/kvm_minstate.h | |||
| @@ -50,27 +50,18 @@ | |||
| 50 | 50 | ||
| 51 | #define PAL_VSA_SYNC_READ \ | 51 | #define PAL_VSA_SYNC_READ \ |
| 52 | /* begin to call pal vps sync_read */ \ | 52 | /* begin to call pal vps sync_read */ \ |
| 53 | {.mii; \ | ||
| 53 | add r25 = VMM_VPD_BASE_OFFSET, r21; \ | 54 | add r25 = VMM_VPD_BASE_OFFSET, r21; \ |
| 54 | adds r20 = VMM_VCPU_VSA_BASE_OFFSET, r21; /* entry point */ \ | 55 | nop 0x0; \ |
| 56 | mov r24=ip; \ | ||
| 55 | ;; \ | 57 | ;; \ |
| 58 | } \ | ||
| 59 | {.mmb \ | ||
| 60 | add r24=0x20, r24; \ | ||
| 56 | ld8 r25 = [r25]; /* read vpd base */ \ | 61 | ld8 r25 = [r25]; /* read vpd base */ \ |
| 57 | ld8 r20 = [r20]; \ | 62 | br.cond.sptk kvm_vps_sync_read; /*call the service*/ \ |
| 58 | ;; \ | ||
| 59 | add r20 = PAL_VPS_SYNC_READ,r20; \ | ||
| 60 | ;; \ | ||
| 61 | { .mii; \ | ||
| 62 | nop 0x0; \ | ||
| 63 | mov r24 = ip; \ | ||
| 64 | mov b0 = r20; \ | ||
| 65 | ;; \ | 63 | ;; \ |
| 66 | }; \ | 64 | }; \ |
| 67 | { .mmb; \ | ||
| 68 | add r24 = 0x20, r24; \ | ||
| 69 | nop 0x0; \ | ||
| 70 | br.cond.sptk b0; /* call the service */ \ | ||
| 71 | ;; \ | ||
| 72 | }; | ||
| 73 | |||
| 74 | 65 | ||
| 75 | 66 | ||
| 76 | #define KVM_MINSTATE_GET_CURRENT(reg) mov reg=r21 | 67 | #define KVM_MINSTATE_GET_CURRENT(reg) mov reg=r21 |
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S index e4f15d641b22..634abad979b5 100644 --- a/arch/ia64/kvm/optvfault.S +++ b/arch/ia64/kvm/optvfault.S | |||
| @@ -1,9 +1,12 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * arch/ia64/vmx/optvfault.S | 2 | * arch/ia64/kvm/optvfault.S |
| 3 | * optimize virtualization fault handler | 3 | * optimize virtualization fault handler |
| 4 | * | 4 | * |
| 5 | * Copyright (C) 2006 Intel Co | 5 | * Copyright (C) 2006 Intel Co |
| 6 | * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> | 6 | * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> |
| 7 | * Copyright (C) 2008 Intel Co | ||
| 8 | * Add the support for Tukwila processors. | ||
| 9 | * Xiantao Zhang <xiantao.zhang@intel.com> | ||
| 7 | */ | 10 | */ |
| 8 | 11 | ||
| 9 | #include <asm/asmmacro.h> | 12 | #include <asm/asmmacro.h> |
| @@ -20,6 +23,98 @@ | |||
| 20 | #define ACCE_MOV_TO_PSR | 23 | #define ACCE_MOV_TO_PSR |
| 21 | #define ACCE_THASH | 24 | #define ACCE_THASH |
| 22 | 25 | ||
| 26 | #define VMX_VPS_SYNC_READ \ | ||
| 27 | add r16=VMM_VPD_BASE_OFFSET,r21; \ | ||
| 28 | mov r17 = b0; \ | ||
| 29 | mov r18 = r24; \ | ||
| 30 | mov r19 = r25; \ | ||
| 31 | mov r20 = r31; \ | ||
| 32 | ;; \ | ||
| 33 | {.mii; \ | ||
| 34 | ld8 r16 = [r16]; \ | ||
| 35 | nop 0x0; \ | ||
| 36 | mov r24 = ip; \ | ||
| 37 | ;; \ | ||
| 38 | }; \ | ||
| 39 | {.mmb; \ | ||
| 40 | add r24=0x20, r24; \ | ||
| 41 | mov r25 =r16; \ | ||
| 42 | br.sptk.many kvm_vps_sync_read; \ | ||
| 43 | }; \ | ||
| 44 | mov b0 = r17; \ | ||
| 45 | mov r24 = r18; \ | ||
| 46 | mov r25 = r19; \ | ||
| 47 | mov r31 = r20 | ||
| 48 | |||
| 49 | ENTRY(kvm_vps_entry) | ||
| 50 | adds r29 = VMM_VCPU_VSA_BASE_OFFSET,r21 | ||
| 51 | ;; | ||
| 52 | ld8 r29 = [r29] | ||
| 53 | ;; | ||
| 54 | add r29 = r29, r30 | ||
| 55 | ;; | ||
| 56 | mov b0 = r29 | ||
| 57 | br.sptk.many b0 | ||
| 58 | END(kvm_vps_entry) | ||
| 59 | |||
| 60 | /* | ||
| 61 | * Inputs: | ||
| 62 | * r24 : return address | ||
| 63 | * r25 : vpd | ||
| 64 | * r29 : scratch | ||
| 65 | * | ||
| 66 | */ | ||
| 67 | GLOBAL_ENTRY(kvm_vps_sync_read) | ||
| 68 | movl r30 = PAL_VPS_SYNC_READ | ||
| 69 | ;; | ||
| 70 | br.sptk.many kvm_vps_entry | ||
| 71 | END(kvm_vps_sync_read) | ||
| 72 | |||
| 73 | /* | ||
| 74 | * Inputs: | ||
| 75 | * r24 : return address | ||
| 76 | * r25 : vpd | ||
| 77 | * r29 : scratch | ||
| 78 | * | ||
| 79 | */ | ||
| 80 | GLOBAL_ENTRY(kvm_vps_sync_write) | ||
| 81 | movl r30 = PAL_VPS_SYNC_WRITE | ||
| 82 | ;; | ||
| 83 | br.sptk.many kvm_vps_entry | ||
| 84 | END(kvm_vps_sync_write) | ||
| 85 | |||
| 86 | /* | ||
| 87 | * Inputs: | ||
| 88 | * r23 : pr | ||
| 89 | * r24 : guest b0 | ||
| 90 | * r25 : vpd | ||
| 91 | * | ||
| 92 | */ | ||
| 93 | GLOBAL_ENTRY(kvm_vps_resume_normal) | ||
| 94 | movl r30 = PAL_VPS_RESUME_NORMAL | ||
| 95 | ;; | ||
| 96 | mov pr=r23,-2 | ||
| 97 | br.sptk.many kvm_vps_entry | ||
| 98 | END(kvm_vps_resume_normal) | ||
| 99 | |||
| 100 | /* | ||
| 101 | * Inputs: | ||
| 102 | * r23 : pr | ||
| 103 | * r24 : guest b0 | ||
| 104 | * r25 : vpd | ||
| 105 | * r17 : isr | ||
| 106 | */ | ||
| 107 | GLOBAL_ENTRY(kvm_vps_resume_handler) | ||
| 108 | movl r30 = PAL_VPS_RESUME_HANDLER | ||
| 109 | ;; | ||
| 110 | ld8 r27=[r25] | ||
| 111 | shr r17=r17,IA64_ISR_IR_BIT | ||
| 112 | ;; | ||
| 113 | dep r27=r17,r27,63,1 // bit 63 of r27 indicate whether enable CFLE | ||
| 114 | mov pr=r23,-2 | ||
| 115 | br.sptk.many kvm_vps_entry | ||
| 116 | END(kvm_vps_resume_handler) | ||
| 117 | |||
| 23 | //mov r1=ar3 | 118 | //mov r1=ar3 |
| 24 | GLOBAL_ENTRY(kvm_asm_mov_from_ar) | 119 | GLOBAL_ENTRY(kvm_asm_mov_from_ar) |
| 25 | #ifndef ACCE_MOV_FROM_AR | 120 | #ifndef ACCE_MOV_FROM_AR |
| @@ -157,11 +252,11 @@ GLOBAL_ENTRY(kvm_asm_rsm) | |||
| 157 | #ifndef ACCE_RSM | 252 | #ifndef ACCE_RSM |
| 158 | br.many kvm_virtualization_fault_back | 253 | br.many kvm_virtualization_fault_back |
| 159 | #endif | 254 | #endif |
| 160 | add r16=VMM_VPD_BASE_OFFSET,r21 | 255 | VMX_VPS_SYNC_READ |
| 256 | ;; | ||
| 161 | extr.u r26=r25,6,21 | 257 | extr.u r26=r25,6,21 |
| 162 | extr.u r27=r25,31,2 | 258 | extr.u r27=r25,31,2 |
| 163 | ;; | 259 | ;; |
| 164 | ld8 r16=[r16] | ||
| 165 | extr.u r28=r25,36,1 | 260 | extr.u r28=r25,36,1 |
| 166 | dep r26=r27,r26,21,2 | 261 | dep r26=r27,r26,21,2 |
| 167 | ;; | 262 | ;; |
| @@ -196,7 +291,7 @@ GLOBAL_ENTRY(kvm_asm_rsm) | |||
| 196 | tbit.nz p6,p0=r23,0 | 291 | tbit.nz p6,p0=r23,0 |
| 197 | ;; | 292 | ;; |
| 198 | tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT | 293 | tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT |
| 199 | (p6) br.dptk kvm_resume_to_guest | 294 | (p6) br.dptk kvm_resume_to_guest_with_sync |
| 200 | ;; | 295 | ;; |
| 201 | add r26=VMM_VCPU_META_RR0_OFFSET,r21 | 296 | add r26=VMM_VCPU_META_RR0_OFFSET,r21 |
| 202 | add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 | 297 | add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 |
| @@ -212,7 +307,7 @@ GLOBAL_ENTRY(kvm_asm_rsm) | |||
| 212 | mov rr[r28]=r27 | 307 | mov rr[r28]=r27 |
| 213 | ;; | 308 | ;; |
| 214 | srlz.d | 309 | srlz.d |
| 215 | br.many kvm_resume_to_guest | 310 | br.many kvm_resume_to_guest_with_sync |
| 216 | END(kvm_asm_rsm) | 311 | END(kvm_asm_rsm) |
| 217 | 312 | ||
| 218 | 313 | ||
| @@ -221,11 +316,11 @@ GLOBAL_ENTRY(kvm_asm_ssm) | |||
| 221 | #ifndef ACCE_SSM | 316 | #ifndef ACCE_SSM |
| 222 | br.many kvm_virtualization_fault_back | 317 | br.many kvm_virtualization_fault_back |
| 223 | #endif | 318 | #endif |
| 224 | add r16=VMM_VPD_BASE_OFFSET,r21 | 319 | VMX_VPS_SYNC_READ |
| 320 | ;; | ||
| 225 | extr.u r26=r25,6,21 | 321 | extr.u r26=r25,6,21 |
| 226 | extr.u r27=r25,31,2 | 322 | extr.u r27=r25,31,2 |
| 227 | ;; | 323 | ;; |
| 228 | ld8 r16=[r16] | ||
| 229 | extr.u r28=r25,36,1 | 324 | extr.u r28=r25,36,1 |
| 230 | dep r26=r27,r26,21,2 | 325 | dep r26=r27,r26,21,2 |
| 231 | ;; //r26 is imm24 | 326 | ;; //r26 is imm24 |
| @@ -271,7 +366,7 @@ kvm_asm_ssm_1: | |||
| 271 | tbit.nz p6,p0=r29,IA64_PSR_I_BIT | 366 | tbit.nz p6,p0=r29,IA64_PSR_I_BIT |
| 272 | ;; | 367 | ;; |
| 273 | tbit.z.or p6,p0=r19,IA64_PSR_I_BIT | 368 | tbit.z.or p6,p0=r19,IA64_PSR_I_BIT |
| 274 | (p6) br.dptk kvm_resume_to_guest | 369 | (p6) br.dptk kvm_resume_to_guest_with_sync |
| 275 | ;; | 370 | ;; |
| 276 | add r29=VPD_VTPR_START_OFFSET,r16 | 371 | add r29=VPD_VTPR_START_OFFSET,r16 |
| 277 | add r30=VPD_VHPI_START_OFFSET,r16 | 372 | add r30=VPD_VHPI_START_OFFSET,r16 |
| @@ -286,7 +381,7 @@ kvm_asm_ssm_1: | |||
| 286 | ;; | 381 | ;; |
| 287 | cmp.gt p6,p0=r30,r17 | 382 | cmp.gt p6,p0=r30,r17 |
| 288 | (p6) br.dpnt.few kvm_asm_dispatch_vexirq | 383 | (p6) br.dpnt.few kvm_asm_dispatch_vexirq |
| 289 | br.many kvm_resume_to_guest | 384 | br.many kvm_resume_to_guest_with_sync |
| 290 | END(kvm_asm_ssm) | 385 | END(kvm_asm_ssm) |
| 291 | 386 | ||
| 292 | 387 | ||
| @@ -295,10 +390,9 @@ GLOBAL_ENTRY(kvm_asm_mov_to_psr) | |||
| 295 | #ifndef ACCE_MOV_TO_PSR | 390 | #ifndef ACCE_MOV_TO_PSR |
| 296 | br.many kvm_virtualization_fault_back | 391 | br.many kvm_virtualization_fault_back |
| 297 | #endif | 392 | #endif |
| 298 | add r16=VMM_VPD_BASE_OFFSET,r21 | 393 | VMX_VPS_SYNC_READ |
| 299 | extr.u r26=r25,13,7 //r2 | ||
| 300 | ;; | 394 | ;; |
| 301 | ld8 r16=[r16] | 395 | extr.u r26=r25,13,7 //r2 |
| 302 | addl r20=@gprel(asm_mov_from_reg),gp | 396 | addl r20=@gprel(asm_mov_from_reg),gp |
| 303 | ;; | 397 | ;; |
| 304 | adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20 | 398 | adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20 |
| @@ -374,7 +468,7 @@ kvm_asm_mov_to_psr_1: | |||
| 374 | ;; | 468 | ;; |
| 375 | tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT | 469 | tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT |
| 376 | tbit.z.or p6,p0=r30,IA64_PSR_I_BIT | 470 | tbit.z.or p6,p0=r30,IA64_PSR_I_BIT |
| 377 | (p6) br.dpnt.few kvm_resume_to_guest | 471 | (p6) br.dpnt.few kvm_resume_to_guest_with_sync |
| 378 | ;; | 472 | ;; |
| 379 | add r29=VPD_VTPR_START_OFFSET,r16 | 473 | add r29=VPD_VTPR_START_OFFSET,r16 |
| 380 | add r30=VPD_VHPI_START_OFFSET,r16 | 474 | add r30=VPD_VHPI_START_OFFSET,r16 |
| @@ -389,13 +483,29 @@ kvm_asm_mov_to_psr_1: | |||
| 389 | ;; | 483 | ;; |
| 390 | cmp.gt p6,p0=r30,r17 | 484 | cmp.gt p6,p0=r30,r17 |
| 391 | (p6) br.dpnt.few kvm_asm_dispatch_vexirq | 485 | (p6) br.dpnt.few kvm_asm_dispatch_vexirq |
| 392 | br.many kvm_resume_to_guest | 486 | br.many kvm_resume_to_guest_with_sync |
| 393 | END(kvm_asm_mov_to_psr) | 487 | END(kvm_asm_mov_to_psr) |
| 394 | 488 | ||
| 395 | 489 | ||
| 396 | ENTRY(kvm_asm_dispatch_vexirq) | 490 | ENTRY(kvm_asm_dispatch_vexirq) |
| 397 | //increment iip | 491 | //increment iip |
| 492 | mov r17 = b0 | ||
| 493 | mov r18 = r31 | ||
| 494 | {.mii | ||
| 495 | add r25=VMM_VPD_BASE_OFFSET,r21 | ||
| 496 | nop 0x0 | ||
| 497 | mov r24 = ip | ||
| 498 | ;; | ||
| 499 | } | ||
| 500 | {.mmb | ||
| 501 | add r24 = 0x20, r24 | ||
| 502 | ld8 r25 = [r25] | ||
| 503 | br.sptk.many kvm_vps_sync_write | ||
| 504 | } | ||
| 505 | mov b0 =r17 | ||
| 398 | mov r16=cr.ipsr | 506 | mov r16=cr.ipsr |
| 507 | mov r31 = r18 | ||
| 508 | mov r19 = 37 | ||
| 399 | ;; | 509 | ;; |
| 400 | extr.u r17=r16,IA64_PSR_RI_BIT,2 | 510 | extr.u r17=r16,IA64_PSR_RI_BIT,2 |
| 401 | tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 | 511 | tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 |
| @@ -435,25 +545,31 @@ GLOBAL_ENTRY(kvm_asm_thash) | |||
| 435 | ;; | 545 | ;; |
| 436 | kvm_asm_thash_back1: | 546 | kvm_asm_thash_back1: |
| 437 | shr.u r23=r19,61 // get RR number | 547 | shr.u r23=r19,61 // get RR number |
| 438 | adds r25=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr | 548 | adds r28=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr |
| 439 | adds r16=VMM_VPD_VPTA_OFFSET,r16 // get vpta | 549 | adds r16=VMM_VPD_VPTA_OFFSET,r16 // get vpta |
| 440 | ;; | 550 | ;; |
| 441 | shladd r27=r23,3,r25 // get vcpu->arch.vrr[r23]'s addr | 551 | shladd r27=r23,3,r28 // get vcpu->arch.vrr[r23]'s addr |
| 442 | ld8 r17=[r16] // get PTA | 552 | ld8 r17=[r16] // get PTA |
| 443 | mov r26=1 | 553 | mov r26=1 |
| 444 | ;; | 554 | ;; |
| 445 | extr.u r29=r17,2,6 // get pta.size | 555 | extr.u r29=r17,2,6 // get pta.size |
| 446 | ld8 r25=[r27] // get vcpu->arch.vrr[r23]'s value | 556 | ld8 r28=[r27] // get vcpu->arch.vrr[r23]'s value |
| 447 | ;; | 557 | ;; |
| 448 | extr.u r25=r25,2,6 // get rr.ps | 558 | mov b0=r24 |
| 559 | //Fallback to C if pta.vf is set | ||
| 560 | tbit.nz p6,p0=r17, 8 | ||
| 561 | ;; | ||
| 562 | (p6) mov r24=EVENT_THASH | ||
| 563 | (p6) br.cond.dpnt.many kvm_virtualization_fault_back | ||
| 564 | extr.u r28=r28,2,6 // get rr.ps | ||
| 449 | shl r22=r26,r29 // 1UL << pta.size | 565 | shl r22=r26,r29 // 1UL << pta.size |
| 450 | ;; | 566 | ;; |
| 451 | shr.u r23=r19,r25 // vaddr >> rr.ps | 567 | shr.u r23=r19,r28 // vaddr >> rr.ps |
| 452 | adds r26=3,r29 // pta.size + 3 | 568 | adds r26=3,r29 // pta.size + 3 |
| 453 | shl r27=r17,3 // pta << 3 | 569 | shl r27=r17,3 // pta << 3 |
| 454 | ;; | 570 | ;; |
| 455 | shl r23=r23,3 // (vaddr >> rr.ps) << 3 | 571 | shl r23=r23,3 // (vaddr >> rr.ps) << 3 |
| 456 | shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3) | 572 | shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3) |
| 457 | movl r16=7<<61 | 573 | movl r16=7<<61 |
| 458 | ;; | 574 | ;; |
| 459 | adds r22=-1,r22 // (1UL << pta.size) - 1 | 575 | adds r22=-1,r22 // (1UL << pta.size) - 1 |
| @@ -724,6 +840,29 @@ END(asm_mov_from_reg) | |||
| 724 | * r31: pr | 840 | * r31: pr |
| 725 | * r24: b0 | 841 | * r24: b0 |
| 726 | */ | 842 | */ |
| 843 | ENTRY(kvm_resume_to_guest_with_sync) | ||
| 844 | adds r19=VMM_VPD_BASE_OFFSET,r21 | ||
| 845 | mov r16 = r31 | ||
| 846 | mov r17 = r24 | ||
| 847 | ;; | ||
| 848 | {.mii | ||
| 849 | ld8 r25 =[r19] | ||
| 850 | nop 0x0 | ||
| 851 | mov r24 = ip | ||
| 852 | ;; | ||
| 853 | } | ||
| 854 | {.mmb | ||
| 855 | add r24 =0x20, r24 | ||
| 856 | nop 0x0 | ||
| 857 | br.sptk.many kvm_vps_sync_write | ||
| 858 | } | ||
| 859 | |||
| 860 | mov r31 = r16 | ||
| 861 | mov r24 =r17 | ||
| 862 | ;; | ||
| 863 | br.sptk.many kvm_resume_to_guest | ||
| 864 | END(kvm_resume_to_guest_with_sync) | ||
| 865 | |||
| 727 | ENTRY(kvm_resume_to_guest) | 866 | ENTRY(kvm_resume_to_guest) |
| 728 | adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 | 867 | adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 |
| 729 | ;; | 868 | ;; |
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c index 5a33f7ed29a0..3417783ae164 100644 --- a/arch/ia64/kvm/process.c +++ b/arch/ia64/kvm/process.c | |||
| @@ -962,9 +962,9 @@ static void kvm_do_resume_op(struct kvm_vcpu *vcpu) | |||
| 962 | void vmm_transition(struct kvm_vcpu *vcpu) | 962 | void vmm_transition(struct kvm_vcpu *vcpu) |
| 963 | { | 963 | { |
| 964 | ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd, | 964 | ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd, |
| 965 | 0, 0, 0, 0, 0, 0); | 965 | 1, 0, 0, 0, 0, 0); |
| 966 | vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host); | 966 | vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host); |
| 967 | ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd, | 967 | ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd, |
| 968 | 0, 0, 0, 0, 0, 0); | 968 | 1, 0, 0, 0, 0, 0); |
| 969 | kvm_do_resume_op(vcpu); | 969 | kvm_do_resume_op(vcpu); |
| 970 | } | 970 | } |
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h index b0fcfb62c49e..341e3fee280c 100644 --- a/arch/ia64/kvm/vcpu.h +++ b/arch/ia64/kvm/vcpu.h | |||
| @@ -313,21 +313,21 @@ static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir, | |||
| 313 | trp->rid = rid; | 313 | trp->rid = rid; |
| 314 | } | 314 | } |
| 315 | 315 | ||
| 316 | extern u64 kvm_lookup_mpa(u64 gpfn); | 316 | extern u64 kvm_get_mpt_entry(u64 gpfn); |
| 317 | extern u64 kvm_gpa_to_mpa(u64 gpa); | ||
| 318 | |||
| 319 | /* Return I/O type if trye */ | ||
| 320 | #define __gpfn_is_io(gpfn) \ | ||
| 321 | ({ \ | ||
| 322 | u64 pte, ret = 0; \ | ||
| 323 | pte = kvm_lookup_mpa(gpfn); \ | ||
| 324 | if (!(pte & GPFN_INV_MASK)) \ | ||
| 325 | ret = pte & GPFN_IO_MASK; \ | ||
| 326 | ret; \ | ||
| 327 | }) | ||
| 328 | 317 | ||
| 318 | /* Return I/ */ | ||
| 319 | static inline u64 __gpfn_is_io(u64 gpfn) | ||
| 320 | { | ||
| 321 | u64 pte; | ||
| 322 | pte = kvm_get_mpt_entry(gpfn); | ||
| 323 | if (!(pte & GPFN_INV_MASK)) { | ||
| 324 | pte = pte & GPFN_IO_MASK; | ||
| 325 | if (pte != GPFN_PHYS_MMIO) | ||
| 326 | return pte; | ||
| 327 | } | ||
| 328 | return 0; | ||
| 329 | } | ||
| 329 | #endif | 330 | #endif |
| 330 | |||
| 331 | #define IA64_NO_FAULT 0 | 331 | #define IA64_NO_FAULT 0 |
| 332 | #define IA64_FAULT 1 | 332 | #define IA64_FAULT 1 |
| 333 | 333 | ||
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S index 3ee5f481c06d..c1d7251a1480 100644 --- a/arch/ia64/kvm/vmm_ivt.S +++ b/arch/ia64/kvm/vmm_ivt.S | |||
| @@ -1261,11 +1261,6 @@ kvm_rse_clear_invalid: | |||
| 1261 | adds r19=VMM_VPD_VPSR_OFFSET,r18 | 1261 | adds r19=VMM_VPD_VPSR_OFFSET,r18 |
| 1262 | ;; | 1262 | ;; |
| 1263 | ld8 r19=[r19] //vpsr | 1263 | ld8 r19=[r19] //vpsr |
| 1264 | adds r20=VMM_VCPU_VSA_BASE_OFFSET,r21 | ||
| 1265 | ;; | ||
| 1266 | ld8 r20=[r20] | ||
| 1267 | ;; | ||
| 1268 | //vsa_sync_write_start | ||
| 1269 | mov r25=r18 | 1264 | mov r25=r18 |
| 1270 | adds r16= VMM_VCPU_GP_OFFSET,r21 | 1265 | adds r16= VMM_VCPU_GP_OFFSET,r21 |
| 1271 | ;; | 1266 | ;; |
| @@ -1274,10 +1269,7 @@ kvm_rse_clear_invalid: | |||
| 1274 | ;; | 1269 | ;; |
| 1275 | add r24=r24,r16 | 1270 | add r24=r24,r16 |
| 1276 | ;; | 1271 | ;; |
| 1277 | add r16=PAL_VPS_SYNC_WRITE,r20 | 1272 | br.sptk.many kvm_vps_sync_write // call the service |
| 1278 | ;; | ||
| 1279 | mov b0=r16 | ||
| 1280 | br.cond.sptk b0 // call the service | ||
| 1281 | ;; | 1273 | ;; |
| 1282 | END(ia64_leave_hypervisor) | 1274 | END(ia64_leave_hypervisor) |
| 1283 | // fall through | 1275 | // fall through |
| @@ -1288,28 +1280,15 @@ GLOBAL_ENTRY(ia64_vmm_entry) | |||
| 1288 | * r17:cr.isr | 1280 | * r17:cr.isr |
| 1289 | * r18:vpd | 1281 | * r18:vpd |
| 1290 | * r19:vpsr | 1282 | * r19:vpsr |
| 1291 | * r20:__vsa_base | ||
| 1292 | * r22:b0 | 1283 | * r22:b0 |
| 1293 | * r23:predicate | 1284 | * r23:predicate |
| 1294 | */ | 1285 | */ |
| 1295 | mov r24=r22 | 1286 | mov r24=r22 |
| 1296 | mov r25=r18 | 1287 | mov r25=r18 |
| 1297 | tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic | 1288 | tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic |
| 1289 | (p1) br.cond.sptk.few kvm_vps_resume_normal | ||
| 1290 | (p2) br.cond.sptk.many kvm_vps_resume_handler | ||
| 1298 | ;; | 1291 | ;; |
| 1299 | (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 | ||
| 1300 | (p1) br.sptk.many ia64_vmm_entry_out | ||
| 1301 | ;; | ||
| 1302 | tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT //p1=cr.isr.ir | ||
| 1303 | ;; | ||
| 1304 | (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 | ||
| 1305 | (p2) add r29=PAL_VPS_RESUME_HANDLER,r20 | ||
| 1306 | (p2) ld8 r26=[r25] | ||
| 1307 | ;; | ||
| 1308 | ia64_vmm_entry_out: | ||
| 1309 | mov pr=r23,-2 | ||
| 1310 | mov b0=r29 | ||
| 1311 | ;; | ||
| 1312 | br.cond.sptk b0 // call pal service | ||
| 1313 | END(ia64_vmm_entry) | 1292 | END(ia64_vmm_entry) |
| 1314 | 1293 | ||
| 1315 | 1294 | ||
| @@ -1376,6 +1355,9 @@ GLOBAL_ENTRY(vmm_reset_entry) | |||
| 1376 | //set up ipsr, iip, vpd.vpsr, dcr | 1355 | //set up ipsr, iip, vpd.vpsr, dcr |
| 1377 | // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 | 1356 | // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 |
| 1378 | // For DCR: all bits 0 | 1357 | // For DCR: all bits 0 |
| 1358 | bsw.0 | ||
| 1359 | ;; | ||
| 1360 | mov r21 =r13 | ||
| 1379 | adds r14=-VMM_PT_REGS_SIZE, r12 | 1361 | adds r14=-VMM_PT_REGS_SIZE, r12 |
| 1380 | ;; | 1362 | ;; |
| 1381 | movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 | 1363 | movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 |
| @@ -1387,12 +1369,6 @@ GLOBAL_ENTRY(vmm_reset_entry) | |||
| 1387 | ;; | 1369 | ;; |
| 1388 | srlz.i | 1370 | srlz.i |
| 1389 | ;; | 1371 | ;; |
| 1390 | bsw.0 | ||
| 1391 | ;; | ||
| 1392 | mov r21 =r13 | ||
| 1393 | ;; | ||
| 1394 | bsw.1 | ||
| 1395 | ;; | ||
| 1396 | mov ar.rsc = 0 | 1372 | mov ar.rsc = 0 |
| 1397 | ;; | 1373 | ;; |
| 1398 | flushrs | 1374 | flushrs |
| @@ -1406,12 +1382,9 @@ GLOBAL_ENTRY(vmm_reset_entry) | |||
| 1406 | ld8 r1 = [r20] | 1382 | ld8 r1 = [r20] |
| 1407 | ;; | 1383 | ;; |
| 1408 | mov cr.iip=r4 | 1384 | mov cr.iip=r4 |
| 1409 | ;; | ||
| 1410 | adds r16=VMM_VPD_BASE_OFFSET,r13 | 1385 | adds r16=VMM_VPD_BASE_OFFSET,r13 |
| 1411 | adds r20=VMM_VCPU_VSA_BASE_OFFSET,r13 | ||
| 1412 | ;; | 1386 | ;; |
| 1413 | ld8 r18=[r16] | 1387 | ld8 r18=[r16] |
| 1414 | ld8 r20=[r20] | ||
| 1415 | ;; | 1388 | ;; |
| 1416 | adds r19=VMM_VPD_VPSR_OFFSET,r18 | 1389 | adds r19=VMM_VPD_VPSR_OFFSET,r18 |
| 1417 | ;; | 1390 | ;; |
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c index def4576d22b1..e22b93361e08 100644 --- a/arch/ia64/kvm/vtlb.c +++ b/arch/ia64/kvm/vtlb.c | |||
| @@ -390,7 +390,7 @@ void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps) | |||
| 390 | 390 | ||
| 391 | u64 translate_phy_pte(u64 *pte, u64 itir, u64 va) | 391 | u64 translate_phy_pte(u64 *pte, u64 itir, u64 va) |
| 392 | { | 392 | { |
| 393 | u64 ps, ps_mask, paddr, maddr; | 393 | u64 ps, ps_mask, paddr, maddr, io_mask; |
| 394 | union pte_flags phy_pte; | 394 | union pte_flags phy_pte; |
| 395 | 395 | ||
| 396 | ps = itir_ps(itir); | 396 | ps = itir_ps(itir); |
| @@ -398,8 +398,9 @@ u64 translate_phy_pte(u64 *pte, u64 itir, u64 va) | |||
| 398 | phy_pte.val = *pte; | 398 | phy_pte.val = *pte; |
| 399 | paddr = *pte; | 399 | paddr = *pte; |
| 400 | paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask); | 400 | paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask); |
| 401 | maddr = kvm_lookup_mpa(paddr >> PAGE_SHIFT); | 401 | maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT); |
| 402 | if (maddr & GPFN_IO_MASK) { | 402 | io_mask = maddr & GPFN_IO_MASK; |
| 403 | if (io_mask && (io_mask != GPFN_PHYS_MMIO)) { | ||
| 403 | *pte |= VTLB_PTE_IO; | 404 | *pte |= VTLB_PTE_IO; |
| 404 | return -1; | 405 | return -1; |
| 405 | } | 406 | } |
| @@ -418,7 +419,7 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, | |||
| 418 | u64 ifa, int type) | 419 | u64 ifa, int type) |
| 419 | { | 420 | { |
| 420 | u64 ps; | 421 | u64 ps; |
| 421 | u64 phy_pte; | 422 | u64 phy_pte, io_mask, index; |
| 422 | union ia64_rr vrr, mrr; | 423 | union ia64_rr vrr, mrr; |
| 423 | int ret = 0; | 424 | int ret = 0; |
| 424 | 425 | ||
| @@ -426,13 +427,16 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, | |||
| 426 | vrr.val = vcpu_get_rr(v, ifa); | 427 | vrr.val = vcpu_get_rr(v, ifa); |
| 427 | mrr.val = ia64_get_rr(ifa); | 428 | mrr.val = ia64_get_rr(ifa); |
| 428 | 429 | ||
| 430 | index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT; | ||
| 431 | io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK; | ||
| 429 | phy_pte = translate_phy_pte(&pte, itir, ifa); | 432 | phy_pte = translate_phy_pte(&pte, itir, ifa); |
| 430 | 433 | ||
| 431 | /* Ensure WB attribute if pte is related to a normal mem page, | 434 | /* Ensure WB attribute if pte is related to a normal mem page, |
| 432 | * which is required by vga acceleration since qemu maps shared | 435 | * which is required by vga acceleration since qemu maps shared |
| 433 | * vram buffer with WB. | 436 | * vram buffer with WB. |
| 434 | */ | 437 | */ |
| 435 | if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT)) { | 438 | if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT) && |
| 439 | io_mask != GPFN_PHYS_MMIO) { | ||
| 436 | pte &= ~_PAGE_MA_MASK; | 440 | pte &= ~_PAGE_MA_MASK; |
| 437 | phy_pte &= ~_PAGE_MA_MASK; | 441 | phy_pte &= ~_PAGE_MA_MASK; |
| 438 | } | 442 | } |
| @@ -566,12 +570,19 @@ void thash_init(struct thash_cb *hcb, u64 sz) | |||
| 566 | } | 570 | } |
| 567 | } | 571 | } |
| 568 | 572 | ||
| 569 | u64 kvm_lookup_mpa(u64 gpfn) | 573 | u64 kvm_get_mpt_entry(u64 gpfn) |
| 570 | { | 574 | { |
| 571 | u64 *base = (u64 *) KVM_P2M_BASE; | 575 | u64 *base = (u64 *) KVM_P2M_BASE; |
| 572 | return *(base + gpfn); | 576 | return *(base + gpfn); |
| 573 | } | 577 | } |
| 574 | 578 | ||
| 579 | u64 kvm_lookup_mpa(u64 gpfn) | ||
| 580 | { | ||
| 581 | u64 maddr; | ||
| 582 | maddr = kvm_get_mpt_entry(gpfn); | ||
| 583 | return maddr&_PAGE_PPN_MASK; | ||
| 584 | } | ||
| 585 | |||
| 575 | u64 kvm_gpa_to_mpa(u64 gpa) | 586 | u64 kvm_gpa_to_mpa(u64 gpa) |
| 576 | { | 587 | { |
| 577 | u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT); | 588 | u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT); |
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 2655e2a4831e..34b52b7180cd 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h | |||
| @@ -81,11 +81,17 @@ struct kvm_vcpu_arch { | |||
| 81 | struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; | 81 | struct tlbe shadow_tlb[PPC44x_TLB_SIZE]; |
| 82 | /* Pages which are referenced in the shadow TLB. */ | 82 | /* Pages which are referenced in the shadow TLB. */ |
| 83 | struct page *shadow_pages[PPC44x_TLB_SIZE]; | 83 | struct page *shadow_pages[PPC44x_TLB_SIZE]; |
| 84 | /* Copy of the host's TLB. */ | 84 | |
| 85 | struct tlbe host_tlb[PPC44x_TLB_SIZE]; | 85 | /* Track which TLB entries we've modified in the current exit. */ |
| 86 | u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; | ||
| 86 | 87 | ||
| 87 | u32 host_stack; | 88 | u32 host_stack; |
| 88 | u32 host_pid; | 89 | u32 host_pid; |
| 90 | u32 host_dbcr0; | ||
| 91 | u32 host_dbcr1; | ||
| 92 | u32 host_dbcr2; | ||
| 93 | u32 host_iac[4]; | ||
| 94 | u32 host_msr; | ||
| 89 | 95 | ||
| 90 | u64 fpr[32]; | 96 | u64 fpr[32]; |
| 91 | u32 gpr[32]; | 97 | u32 gpr[32]; |
| @@ -123,7 +129,11 @@ struct kvm_vcpu_arch { | |||
| 123 | u32 ivor[16]; | 129 | u32 ivor[16]; |
| 124 | u32 ivpr; | 130 | u32 ivpr; |
| 125 | u32 pir; | 131 | u32 pir; |
| 132 | |||
| 133 | u32 shadow_pid; | ||
| 126 | u32 pid; | 134 | u32 pid; |
| 135 | u32 swap_pid; | ||
| 136 | |||
| 127 | u32 pvr; | 137 | u32 pvr; |
| 128 | u32 ccr0; | 138 | u32 ccr0; |
| 129 | u32 ccr1; | 139 | u32 ccr1; |
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a8b068792260..8931ba729d2b 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h | |||
| @@ -64,6 +64,10 @@ extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, | |||
| 64 | extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, | 64 | extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, |
| 65 | gva_t eend, u32 asid); | 65 | gva_t eend, u32 asid); |
| 66 | extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); | 66 | extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); |
| 67 | extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); | ||
| 68 | |||
| 69 | /* XXX Book E specific */ | ||
| 70 | extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i); | ||
| 67 | 71 | ||
| 68 | extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu); | 72 | extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu); |
| 69 | 73 | ||
| @@ -92,4 +96,12 @@ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) | |||
| 92 | kvm_vcpu_block(vcpu); | 96 | kvm_vcpu_block(vcpu); |
| 93 | } | 97 | } |
| 94 | 98 | ||
| 99 | static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid) | ||
| 100 | { | ||
| 101 | if (vcpu->arch.pid != new_pid) { | ||
| 102 | vcpu->arch.pid = new_pid; | ||
| 103 | vcpu->arch.swap_pid = 1; | ||
| 104 | } | ||
| 105 | } | ||
| 106 | |||
| 95 | #endif /* __POWERPC_KVM_PPC_H__ */ | 107 | #endif /* __POWERPC_KVM_PPC_H__ */ |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 09febc582584..75c5dd0138fd 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
| @@ -359,8 +359,8 @@ int main(void) | |||
| 359 | 359 | ||
| 360 | DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); | 360 | DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); |
| 361 | DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); | 361 | DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); |
| 362 | DEFINE(VCPU_HOST_TLB, offsetof(struct kvm_vcpu, arch.host_tlb)); | ||
| 363 | DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb)); | 362 | DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb)); |
| 363 | DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod)); | ||
| 364 | DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); | 364 | DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); |
| 365 | DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); | 365 | DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); |
| 366 | DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); | 366 | DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); |
| @@ -372,7 +372,7 @@ int main(void) | |||
| 372 | DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); | 372 | DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); |
| 373 | DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); | 373 | DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); |
| 374 | DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); | 374 | DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); |
| 375 | DEFINE(VCPU_PID, offsetof(struct kvm_vcpu, arch.pid)); | 375 | DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid)); |
| 376 | 376 | ||
| 377 | DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); | 377 | DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); |
| 378 | DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); | 378 | DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); |
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 5a5602da5091..2e227a412bc2 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | 19 | ||
| 20 | #include <linux/types.h> | 20 | #include <linux/types.h> |
| 21 | #include <linux/string.h> | 21 | #include <linux/string.h> |
| 22 | #include <linux/kvm.h> | ||
| 22 | #include <linux/kvm_host.h> | 23 | #include <linux/kvm_host.h> |
| 23 | #include <linux/highmem.h> | 24 | #include <linux/highmem.h> |
| 24 | #include <asm/mmu-44x.h> | 25 | #include <asm/mmu-44x.h> |
| @@ -109,7 +110,6 @@ static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe) | |||
| 109 | return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW); | 110 | return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW); |
| 110 | } | 111 | } |
| 111 | 112 | ||
| 112 | /* Must be called with mmap_sem locked for writing. */ | ||
| 113 | static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, | 113 | static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, |
| 114 | unsigned int index) | 114 | unsigned int index) |
| 115 | { | 115 | { |
| @@ -124,6 +124,11 @@ static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, | |||
| 124 | } | 124 | } |
| 125 | } | 125 | } |
| 126 | 126 | ||
| 127 | void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i) | ||
| 128 | { | ||
| 129 | vcpu->arch.shadow_tlb_mod[i] = 1; | ||
| 130 | } | ||
| 131 | |||
| 127 | /* Caller must ensure that the specified guest TLB entry is safe to insert into | 132 | /* Caller must ensure that the specified guest TLB entry is safe to insert into |
| 128 | * the shadow TLB. */ | 133 | * the shadow TLB. */ |
| 129 | void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, | 134 | void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, |
| @@ -142,19 +147,16 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, | |||
| 142 | stlbe = &vcpu->arch.shadow_tlb[victim]; | 147 | stlbe = &vcpu->arch.shadow_tlb[victim]; |
| 143 | 148 | ||
| 144 | /* Get reference to new page. */ | 149 | /* Get reference to new page. */ |
| 145 | down_read(¤t->mm->mmap_sem); | ||
| 146 | new_page = gfn_to_page(vcpu->kvm, gfn); | 150 | new_page = gfn_to_page(vcpu->kvm, gfn); |
| 147 | if (is_error_page(new_page)) { | 151 | if (is_error_page(new_page)) { |
| 148 | printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); | 152 | printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn); |
| 149 | kvm_release_page_clean(new_page); | 153 | kvm_release_page_clean(new_page); |
| 150 | up_read(¤t->mm->mmap_sem); | ||
| 151 | return; | 154 | return; |
| 152 | } | 155 | } |
| 153 | hpaddr = page_to_phys(new_page); | 156 | hpaddr = page_to_phys(new_page); |
| 154 | 157 | ||
| 155 | /* Drop reference to old page. */ | 158 | /* Drop reference to old page. */ |
| 156 | kvmppc_44x_shadow_release(vcpu, victim); | 159 | kvmppc_44x_shadow_release(vcpu, victim); |
| 157 | up_read(¤t->mm->mmap_sem); | ||
| 158 | 160 | ||
| 159 | vcpu->arch.shadow_pages[victim] = new_page; | 161 | vcpu->arch.shadow_pages[victim] = new_page; |
| 160 | 162 | ||
| @@ -164,27 +166,30 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, | |||
| 164 | 166 | ||
| 165 | /* XXX what about AS? */ | 167 | /* XXX what about AS? */ |
| 166 | 168 | ||
| 167 | stlbe->tid = asid & 0xff; | 169 | stlbe->tid = !(asid & 0xff); |
| 168 | 170 | ||
| 169 | /* Force TS=1 for all guest mappings. */ | 171 | /* Force TS=1 for all guest mappings. */ |
| 170 | /* For now we hardcode 4KB mappings, but it will be important to | 172 | /* For now we hardcode 4KB mappings, but it will be important to |
| 171 | * use host large pages in the future. */ | 173 | * use host large pages in the future. */ |
| 172 | stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS | 174 | stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS |
| 173 | | PPC44x_TLB_4K; | 175 | | PPC44x_TLB_4K; |
| 174 | |||
| 175 | stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); | 176 | stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); |
| 176 | stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, | 177 | stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, |
| 177 | vcpu->arch.msr & MSR_PR); | 178 | vcpu->arch.msr & MSR_PR); |
| 179 | kvmppc_tlbe_set_modified(vcpu, victim); | ||
| 180 | |||
| 181 | KVMTRACE_5D(STLB_WRITE, vcpu, victim, | ||
| 182 | stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2, | ||
| 183 | handler); | ||
| 178 | } | 184 | } |
| 179 | 185 | ||
| 180 | void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, | 186 | void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, |
| 181 | gva_t eend, u32 asid) | 187 | gva_t eend, u32 asid) |
| 182 | { | 188 | { |
| 183 | unsigned int pid = asid & 0xff; | 189 | unsigned int pid = !(asid & 0xff); |
| 184 | int i; | 190 | int i; |
| 185 | 191 | ||
| 186 | /* XXX Replace loop with fancy data structures. */ | 192 | /* XXX Replace loop with fancy data structures. */ |
| 187 | down_write(¤t->mm->mmap_sem); | ||
| 188 | for (i = 0; i <= tlb_44x_hwater; i++) { | 193 | for (i = 0; i <= tlb_44x_hwater; i++) { |
| 189 | struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; | 194 | struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; |
| 190 | unsigned int tid; | 195 | unsigned int tid; |
| @@ -204,21 +209,35 @@ void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, | |||
| 204 | 209 | ||
| 205 | kvmppc_44x_shadow_release(vcpu, i); | 210 | kvmppc_44x_shadow_release(vcpu, i); |
| 206 | stlbe->word0 = 0; | 211 | stlbe->word0 = 0; |
| 212 | kvmppc_tlbe_set_modified(vcpu, i); | ||
| 213 | KVMTRACE_5D(STLB_INVAL, vcpu, i, | ||
| 214 | stlbe->tid, stlbe->word0, stlbe->word1, | ||
| 215 | stlbe->word2, handler); | ||
| 207 | } | 216 | } |
| 208 | up_write(¤t->mm->mmap_sem); | ||
| 209 | } | 217 | } |
| 210 | 218 | ||
| 211 | /* Invalidate all mappings, so that when they fault back in they will get the | 219 | /* Invalidate all mappings on the privilege switch after PID has been changed. |
| 212 | * proper permission bits. */ | 220 | * The guest always runs with PID=1, so we must clear the entire TLB when |
| 221 | * switching address spaces. */ | ||
| 213 | void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) | 222 | void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) |
| 214 | { | 223 | { |
| 215 | int i; | 224 | int i; |
| 216 | 225 | ||
| 217 | /* XXX Replace loop with fancy data structures. */ | 226 | if (vcpu->arch.swap_pid) { |
| 218 | down_write(¤t->mm->mmap_sem); | 227 | /* XXX Replace loop with fancy data structures. */ |
| 219 | for (i = 0; i <= tlb_44x_hwater; i++) { | 228 | for (i = 0; i <= tlb_44x_hwater; i++) { |
| 220 | kvmppc_44x_shadow_release(vcpu, i); | 229 | struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; |
| 221 | vcpu->arch.shadow_tlb[i].word0 = 0; | 230 | |
| 231 | /* Future optimization: clear only userspace mappings. */ | ||
| 232 | kvmppc_44x_shadow_release(vcpu, i); | ||
| 233 | stlbe->word0 = 0; | ||
| 234 | kvmppc_tlbe_set_modified(vcpu, i); | ||
| 235 | KVMTRACE_5D(STLB_INVAL, vcpu, i, | ||
| 236 | stlbe->tid, stlbe->word0, stlbe->word1, | ||
| 237 | stlbe->word2, handler); | ||
| 238 | } | ||
| 239 | vcpu->arch.swap_pid = 0; | ||
| 222 | } | 240 | } |
| 223 | up_write(¤t->mm->mmap_sem); | 241 | |
| 242 | vcpu->arch.shadow_pid = !usermode; | ||
| 224 | } | 243 | } |
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 6b076010213b..53aaa66b25e5 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig | |||
| @@ -37,6 +37,17 @@ config KVM_BOOKE_HOST | |||
| 37 | Provides host support for KVM on Book E PowerPC processors. Currently | 37 | Provides host support for KVM on Book E PowerPC processors. Currently |
| 38 | this works on 440 processors only. | 38 | this works on 440 processors only. |
| 39 | 39 | ||
| 40 | config KVM_TRACE | ||
| 41 | bool "KVM trace support" | ||
| 42 | depends on KVM && MARKERS && SYSFS | ||
| 43 | select RELAY | ||
| 44 | select DEBUG_FS | ||
| 45 | default n | ||
| 46 | ---help--- | ||
| 47 | This option allows reading a trace of kvm-related events through | ||
| 48 | relayfs. Note the ABI is not considered stable and will be | ||
| 49 | modified in future updates. | ||
| 50 | |||
| 40 | source drivers/virtio/Kconfig | 51 | source drivers/virtio/Kconfig |
| 41 | 52 | ||
| 42 | endif # VIRTUALIZATION | 53 | endif # VIRTUALIZATION |
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 04e3449e1f42..2a5d4397ac4b 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile | |||
| @@ -4,9 +4,11 @@ | |||
| 4 | 4 | ||
| 5 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm | 5 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm |
| 6 | 6 | ||
| 7 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) | 7 | common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) |
| 8 | 8 | ||
| 9 | kvm-objs := $(common-objs) powerpc.o emulate.o booke_guest.o | 9 | common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o) |
| 10 | |||
| 11 | kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o | ||
| 10 | obj-$(CONFIG_KVM) += kvm.o | 12 | obj-$(CONFIG_KVM) += kvm.o |
| 11 | 13 | ||
| 12 | AFLAGS_booke_interrupts.o := -I$(obj) | 14 | AFLAGS_booke_interrupts.o := -I$(obj) |
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c index 9c8ad850c6e3..7b2591e26bae 100644 --- a/arch/powerpc/kvm/booke_guest.c +++ b/arch/powerpc/kvm/booke_guest.c | |||
| @@ -410,6 +410,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, | |||
| 410 | break; | 410 | break; |
| 411 | } | 411 | } |
| 412 | 412 | ||
| 413 | case BOOKE_INTERRUPT_DEBUG: { | ||
| 414 | u32 dbsr; | ||
| 415 | |||
| 416 | vcpu->arch.pc = mfspr(SPRN_CSRR0); | ||
| 417 | |||
| 418 | /* clear IAC events in DBSR register */ | ||
| 419 | dbsr = mfspr(SPRN_DBSR); | ||
| 420 | dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4; | ||
| 421 | mtspr(SPRN_DBSR, dbsr); | ||
| 422 | |||
| 423 | run->exit_reason = KVM_EXIT_DEBUG; | ||
| 424 | r = RESUME_HOST; | ||
| 425 | break; | ||
| 426 | } | ||
| 427 | |||
| 413 | default: | 428 | default: |
| 414 | printk(KERN_EMERG "exit_nr %d\n", exit_nr); | 429 | printk(KERN_EMERG "exit_nr %d\n", exit_nr); |
| 415 | BUG(); | 430 | BUG(); |
| @@ -471,6 +486,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
| 471 | vcpu->arch.msr = 0; | 486 | vcpu->arch.msr = 0; |
| 472 | vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ | 487 | vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ |
| 473 | 488 | ||
| 489 | vcpu->arch.shadow_pid = 1; | ||
| 490 | |||
| 474 | /* Eye-catching number so we know if the guest takes an interrupt | 491 | /* Eye-catching number so we know if the guest takes an interrupt |
| 475 | * before it's programmed its own IVPR. */ | 492 | * before it's programmed its own IVPR. */ |
| 476 | vcpu->arch.ivpr = 0x55550000; | 493 | vcpu->arch.ivpr = 0x55550000; |
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 3b653b5309b8..95e165baf85f 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S | |||
| @@ -42,7 +42,8 @@ | |||
| 42 | #define HOST_STACK_LR (HOST_STACK_SIZE + 4) /* In caller stack frame. */ | 42 | #define HOST_STACK_LR (HOST_STACK_SIZE + 4) /* In caller stack frame. */ |
| 43 | 43 | ||
| 44 | #define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \ | 44 | #define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \ |
| 45 | (1<<BOOKE_INTERRUPT_DTLB_MISS)) | 45 | (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ |
| 46 | (1<<BOOKE_INTERRUPT_DEBUG)) | ||
| 46 | 47 | ||
| 47 | #define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \ | 48 | #define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \ |
| 48 | (1<<BOOKE_INTERRUPT_DTLB_MISS)) | 49 | (1<<BOOKE_INTERRUPT_DTLB_MISS)) |
| @@ -331,51 +332,57 @@ lightweight_exit: | |||
| 331 | 332 | ||
| 332 | mfspr r3, SPRN_PID | 333 | mfspr r3, SPRN_PID |
| 333 | stw r3, VCPU_HOST_PID(r4) | 334 | stw r3, VCPU_HOST_PID(r4) |
| 334 | lwz r3, VCPU_PID(r4) | 335 | lwz r3, VCPU_SHADOW_PID(r4) |
| 335 | mtspr SPRN_PID, r3 | 336 | mtspr SPRN_PID, r3 |
| 336 | 337 | ||
| 337 | /* Prevent all TLB updates. */ | 338 | /* Prevent all asynchronous TLB updates. */ |
| 338 | mfmsr r5 | 339 | mfmsr r5 |
| 339 | lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h | 340 | lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h |
| 340 | ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l | 341 | ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l |
| 341 | andc r6, r5, r6 | 342 | andc r6, r5, r6 |
| 342 | mtmsr r6 | 343 | mtmsr r6 |
| 343 | 344 | ||
| 344 | /* Save the host's non-pinned TLB mappings, and load the guest mappings | 345 | /* Load the guest mappings, leaving the host's "pinned" kernel mappings |
| 345 | * over them. Leave the host's "pinned" kernel mappings in place. */ | 346 | * in place. */ |
| 346 | /* XXX optimization: use generation count to avoid swapping unmodified | ||
| 347 | * entries. */ | ||
| 348 | mfspr r10, SPRN_MMUCR /* Save host MMUCR. */ | 347 | mfspr r10, SPRN_MMUCR /* Save host MMUCR. */ |
| 349 | lis r8, tlb_44x_hwater@ha | 348 | li r5, PPC44x_TLB_SIZE |
| 350 | lwz r8, tlb_44x_hwater@l(r8) | 349 | lis r5, tlb_44x_hwater@ha |
| 351 | addi r3, r4, VCPU_HOST_TLB - 4 | 350 | lwz r5, tlb_44x_hwater@l(r5) |
| 352 | addi r9, r4, VCPU_SHADOW_TLB - 4 | 351 | mtctr r5 |
| 353 | li r6, 0 | 352 | addi r9, r4, VCPU_SHADOW_TLB |
| 353 | addi r5, r4, VCPU_SHADOW_MOD | ||
| 354 | li r3, 0 | ||
| 354 | 1: | 355 | 1: |
| 355 | /* Save host entry. */ | 356 | lbzx r7, r3, r5 |
| 356 | tlbre r7, r6, PPC44x_TLB_PAGEID | 357 | cmpwi r7, 0 |
| 357 | mfspr r5, SPRN_MMUCR | 358 | beq 3f |
| 358 | stwu r5, 4(r3) | 359 | |
| 359 | stwu r7, 4(r3) | ||
| 360 | tlbre r7, r6, PPC44x_TLB_XLAT | ||
| 361 | stwu r7, 4(r3) | ||
| 362 | tlbre r7, r6, PPC44x_TLB_ATTRIB | ||
| 363 | stwu r7, 4(r3) | ||
| 364 | /* Load guest entry. */ | 360 | /* Load guest entry. */ |
| 365 | lwzu r7, 4(r9) | 361 | mulli r11, r3, TLBE_BYTES |
| 362 | add r11, r11, r9 | ||
| 363 | lwz r7, 0(r11) | ||
| 366 | mtspr SPRN_MMUCR, r7 | 364 | mtspr SPRN_MMUCR, r7 |
| 367 | lwzu r7, 4(r9) | 365 | lwz r7, 4(r11) |
| 368 | tlbwe r7, r6, PPC44x_TLB_PAGEID | 366 | tlbwe r7, r3, PPC44x_TLB_PAGEID |
| 369 | lwzu r7, 4(r9) | 367 | lwz r7, 8(r11) |
| 370 | tlbwe r7, r6, PPC44x_TLB_XLAT | 368 | tlbwe r7, r3, PPC44x_TLB_XLAT |
| 371 | lwzu r7, 4(r9) | 369 | lwz r7, 12(r11) |
| 372 | tlbwe r7, r6, PPC44x_TLB_ATTRIB | 370 | tlbwe r7, r3, PPC44x_TLB_ATTRIB |
| 373 | /* Increment index. */ | 371 | 3: |
| 374 | addi r6, r6, 1 | 372 | addi r3, r3, 1 /* Increment index. */ |
| 375 | cmpw r6, r8 | 373 | bdnz 1b |
| 376 | blt 1b | 374 | |
| 377 | mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */ | 375 | mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */ |
| 378 | 376 | ||
| 377 | /* Clear bitmap of modified TLB entries */ | ||
| 378 | li r5, PPC44x_TLB_SIZE>>2 | ||
| 379 | mtctr r5 | ||
| 380 | addi r5, r4, VCPU_SHADOW_MOD - 4 | ||
| 381 | li r6, 0 | ||
| 382 | 1: | ||
| 383 | stwu r6, 4(r5) | ||
| 384 | bdnz 1b | ||
| 385 | |||
| 379 | iccci 0, 0 /* XXX hack */ | 386 | iccci 0, 0 /* XXX hack */ |
| 380 | 387 | ||
| 381 | /* Load some guest volatiles. */ | 388 | /* Load some guest volatiles. */ |
| @@ -431,6 +438,14 @@ lightweight_exit: | |||
| 431 | oris r3, r3, KVMPPC_MSR_MASK@h | 438 | oris r3, r3, KVMPPC_MSR_MASK@h |
| 432 | ori r3, r3, KVMPPC_MSR_MASK@l | 439 | ori r3, r3, KVMPPC_MSR_MASK@l |
| 433 | mtsrr1 r3 | 440 | mtsrr1 r3 |
| 441 | |||
| 442 | /* Clear any debug events which occurred since we disabled MSR[DE]. | ||
| 443 | * XXX This gives us a 3-instruction window in which a breakpoint | ||
| 444 | * intended for guest context could fire in the host instead. */ | ||
| 445 | lis r3, 0xffff | ||
| 446 | ori r3, r3, 0xffff | ||
| 447 | mtspr SPRN_DBSR, r3 | ||
| 448 | |||
| 434 | lwz r3, VCPU_GPR(r3)(r4) | 449 | lwz r3, VCPU_GPR(r3)(r4) |
| 435 | lwz r4, VCPU_GPR(r4)(r4) | 450 | lwz r4, VCPU_GPR(r4)(r4) |
| 436 | rfi | 451 | rfi |
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 8c605d0a5488..0fce4fbdc20d 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c | |||
| @@ -170,6 +170,10 @@ static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst) | |||
| 170 | kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags); | 170 | kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags); |
| 171 | } | 171 | } |
| 172 | 172 | ||
| 173 | KVMTRACE_5D(GTLB_WRITE, vcpu, index, | ||
| 174 | tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2, | ||
| 175 | handler); | ||
| 176 | |||
| 173 | return EMULATE_DONE; | 177 | return EMULATE_DONE; |
| 174 | } | 178 | } |
| 175 | 179 | ||
| @@ -504,7 +508,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 504 | case SPRN_MMUCR: | 508 | case SPRN_MMUCR: |
| 505 | vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; | 509 | vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; |
| 506 | case SPRN_PID: | 510 | case SPRN_PID: |
| 507 | vcpu->arch.pid = vcpu->arch.gpr[rs]; break; | 511 | kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break; |
| 508 | case SPRN_CCR0: | 512 | case SPRN_CCR0: |
| 509 | vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; | 513 | vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; |
| 510 | case SPRN_CCR1: | 514 | case SPRN_CCR1: |
| @@ -765,6 +769,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 765 | break; | 769 | break; |
| 766 | } | 770 | } |
| 767 | 771 | ||
| 772 | KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit); | ||
| 773 | |||
| 768 | if (advance) | 774 | if (advance) |
| 769 | vcpu->arch.pc += 4; /* Advance past emulated instruction. */ | 775 | vcpu->arch.pc += 4; /* Advance past emulated instruction. */ |
| 770 | 776 | ||
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 53826a5f6c06..90a6fc422b23 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <asm/cputable.h> | 27 | #include <asm/cputable.h> |
| 28 | #include <asm/uaccess.h> | 28 | #include <asm/uaccess.h> |
| 29 | #include <asm/kvm_ppc.h> | 29 | #include <asm/kvm_ppc.h> |
| 30 | #include <asm/tlbflush.h> | ||
| 30 | 31 | ||
| 31 | 32 | ||
| 32 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) | 33 | gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) |
| @@ -239,18 +240,114 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) | |||
| 239 | { | 240 | { |
| 240 | } | 241 | } |
| 241 | 242 | ||
| 243 | /* Note: clearing MSR[DE] just means that the debug interrupt will not be | ||
| 244 | * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits. | ||
| 245 | * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt | ||
| 246 | * will be delivered as an "imprecise debug event" (which is indicated by | ||
| 247 | * DBSR[IDE]. | ||
| 248 | */ | ||
| 249 | static void kvmppc_disable_debug_interrupts(void) | ||
| 250 | { | ||
| 251 | mtmsr(mfmsr() & ~MSR_DE); | ||
| 252 | } | ||
| 253 | |||
| 254 | static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu) | ||
| 255 | { | ||
| 256 | kvmppc_disable_debug_interrupts(); | ||
| 257 | |||
| 258 | mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]); | ||
| 259 | mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]); | ||
| 260 | mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]); | ||
| 261 | mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]); | ||
| 262 | mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1); | ||
| 263 | mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2); | ||
| 264 | mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0); | ||
| 265 | mtmsr(vcpu->arch.host_msr); | ||
| 266 | } | ||
| 267 | |||
| 268 | static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu) | ||
| 269 | { | ||
| 270 | struct kvm_guest_debug *dbg = &vcpu->guest_debug; | ||
| 271 | u32 dbcr0 = 0; | ||
| 272 | |||
| 273 | vcpu->arch.host_msr = mfmsr(); | ||
| 274 | kvmppc_disable_debug_interrupts(); | ||
| 275 | |||
| 276 | /* Save host debug register state. */ | ||
| 277 | vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1); | ||
| 278 | vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2); | ||
| 279 | vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3); | ||
| 280 | vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4); | ||
| 281 | vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0); | ||
| 282 | vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1); | ||
| 283 | vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2); | ||
| 284 | |||
| 285 | /* set registers up for guest */ | ||
| 286 | |||
| 287 | if (dbg->bp[0]) { | ||
| 288 | mtspr(SPRN_IAC1, dbg->bp[0]); | ||
| 289 | dbcr0 |= DBCR0_IAC1 | DBCR0_IDM; | ||
| 290 | } | ||
| 291 | if (dbg->bp[1]) { | ||
| 292 | mtspr(SPRN_IAC2, dbg->bp[1]); | ||
| 293 | dbcr0 |= DBCR0_IAC2 | DBCR0_IDM; | ||
| 294 | } | ||
| 295 | if (dbg->bp[2]) { | ||
| 296 | mtspr(SPRN_IAC3, dbg->bp[2]); | ||
| 297 | dbcr0 |= DBCR0_IAC3 | DBCR0_IDM; | ||
| 298 | } | ||
| 299 | if (dbg->bp[3]) { | ||
| 300 | mtspr(SPRN_IAC4, dbg->bp[3]); | ||
| 301 | dbcr0 |= DBCR0_IAC4 | DBCR0_IDM; | ||
| 302 | } | ||
| 303 | |||
| 304 | mtspr(SPRN_DBCR0, dbcr0); | ||
| 305 | mtspr(SPRN_DBCR1, 0); | ||
| 306 | mtspr(SPRN_DBCR2, 0); | ||
| 307 | } | ||
| 308 | |||
| 242 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 309 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
| 243 | { | 310 | { |
| 311 | int i; | ||
| 312 | |||
| 313 | if (vcpu->guest_debug.enabled) | ||
| 314 | kvmppc_load_guest_debug_registers(vcpu); | ||
| 315 | |||
| 316 | /* Mark every guest entry in the shadow TLB entry modified, so that they | ||
| 317 | * will all be reloaded on the next vcpu run (instead of being | ||
| 318 | * demand-faulted). */ | ||
| 319 | for (i = 0; i <= tlb_44x_hwater; i++) | ||
| 320 | kvmppc_tlbe_set_modified(vcpu, i); | ||
| 244 | } | 321 | } |
| 245 | 322 | ||
| 246 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) | 323 | void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) |
| 247 | { | 324 | { |
| 325 | if (vcpu->guest_debug.enabled) | ||
| 326 | kvmppc_restore_host_debug_state(vcpu); | ||
| 327 | |||
| 328 | /* Don't leave guest TLB entries resident when being de-scheduled. */ | ||
| 329 | /* XXX It would be nice to differentiate between heavyweight exit and | ||
| 330 | * sched_out here, since we could avoid the TLB flush for heavyweight | ||
| 331 | * exits. */ | ||
| 332 | _tlbia(); | ||
| 248 | } | 333 | } |
| 249 | 334 | ||
| 250 | int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, | 335 | int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, |
| 251 | struct kvm_debug_guest *dbg) | 336 | struct kvm_debug_guest *dbg) |
| 252 | { | 337 | { |
| 253 | return -ENOTSUPP; | 338 | int i; |
| 339 | |||
| 340 | vcpu->guest_debug.enabled = dbg->enabled; | ||
| 341 | if (vcpu->guest_debug.enabled) { | ||
| 342 | for (i=0; i < ARRAY_SIZE(vcpu->guest_debug.bp); i++) { | ||
| 343 | if (dbg->breakpoints[i].enabled) | ||
| 344 | vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address; | ||
| 345 | else | ||
| 346 | vcpu->guest_debug.bp[i] = 0; | ||
| 347 | } | ||
| 348 | } | ||
| 349 | |||
| 350 | return 0; | ||
| 254 | } | 351 | } |
| 255 | 352 | ||
| 256 | static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, | 353 | static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, |
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 4c03049e7db9..bc581d8a7cd9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
| @@ -565,13 +565,16 @@ config ZFCPDUMP | |||
| 565 | Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. | 565 | Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. |
| 566 | 566 | ||
| 567 | config S390_GUEST | 567 | config S390_GUEST |
| 568 | bool "s390 guest support (EXPERIMENTAL)" | 568 | bool "s390 guest support for KVM (EXPERIMENTAL)" |
| 569 | depends on 64BIT && EXPERIMENTAL | 569 | depends on 64BIT && EXPERIMENTAL |
| 570 | select VIRTIO | 570 | select VIRTIO |
| 571 | select VIRTIO_RING | 571 | select VIRTIO_RING |
| 572 | select VIRTIO_CONSOLE | 572 | select VIRTIO_CONSOLE |
| 573 | help | 573 | help |
| 574 | Select this option if you want to run the kernel under s390 linux | 574 | Select this option if you want to run the kernel as a guest under |
| 575 | the KVM hypervisor. This will add detection for KVM as well as a | ||
| 576 | virtio transport. If KVM is detected, the virtio console will be | ||
| 577 | the default console. | ||
| 575 | endmenu | 578 | endmenu |
| 576 | 579 | ||
| 577 | source "net/Kconfig" | 580 | source "net/Kconfig" |
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index d1faf5c54405..cce40ff2913b 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c | |||
| @@ -157,8 +157,8 @@ static int handle_stfl(struct kvm_vcpu *vcpu) | |||
| 157 | int rc; | 157 | int rc; |
| 158 | 158 | ||
| 159 | vcpu->stat.instruction_stfl++; | 159 | vcpu->stat.instruction_stfl++; |
| 160 | facility_list &= ~(1UL<<24); /* no stfle */ | 160 | /* only pass the facility bits, which we can handle */ |
| 161 | facility_list &= ~(1UL<<23); /* no large pages */ | 161 | facility_list &= 0xfe00fff3; |
| 162 | 162 | ||
| 163 | rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), | 163 | rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), |
| 164 | &facility_list, sizeof(facility_list)); | 164 | &facility_list, sizeof(facility_list)); |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index d02def06ca91..774ac4991568 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
| @@ -78,6 +78,34 @@ static cycle_t kvm_clock_read(void) | |||
| 78 | return ret; | 78 | return ret; |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | /* | ||
| 82 | * If we don't do that, there is the possibility that the guest | ||
| 83 | * will calibrate under heavy load - thus, getting a lower lpj - | ||
| 84 | * and execute the delays themselves without load. This is wrong, | ||
| 85 | * because no delay loop can finish beforehand. | ||
| 86 | * Any heuristics is subject to fail, because ultimately, a large | ||
| 87 | * poll of guests can be running and trouble each other. So we preset | ||
| 88 | * lpj here | ||
| 89 | */ | ||
| 90 | static unsigned long kvm_get_tsc_khz(void) | ||
| 91 | { | ||
| 92 | return preset_lpj; | ||
| 93 | } | ||
| 94 | |||
| 95 | static void kvm_get_preset_lpj(void) | ||
| 96 | { | ||
| 97 | struct pvclock_vcpu_time_info *src; | ||
| 98 | unsigned long khz; | ||
| 99 | u64 lpj; | ||
| 100 | |||
| 101 | src = &per_cpu(hv_clock, 0); | ||
| 102 | khz = pvclock_tsc_khz(src); | ||
| 103 | |||
| 104 | lpj = ((u64)khz * 1000); | ||
| 105 | do_div(lpj, HZ); | ||
| 106 | preset_lpj = lpj; | ||
| 107 | } | ||
| 108 | |||
| 81 | static struct clocksource kvm_clock = { | 109 | static struct clocksource kvm_clock = { |
| 82 | .name = "kvm-clock", | 110 | .name = "kvm-clock", |
| 83 | .read = kvm_clock_read, | 111 | .read = kvm_clock_read, |
| @@ -153,6 +181,7 @@ void __init kvmclock_init(void) | |||
| 153 | pv_time_ops.get_wallclock = kvm_get_wallclock; | 181 | pv_time_ops.get_wallclock = kvm_get_wallclock; |
| 154 | pv_time_ops.set_wallclock = kvm_set_wallclock; | 182 | pv_time_ops.set_wallclock = kvm_set_wallclock; |
| 155 | pv_time_ops.sched_clock = kvm_clock_read; | 183 | pv_time_ops.sched_clock = kvm_clock_read; |
| 184 | pv_time_ops.get_tsc_khz = kvm_get_tsc_khz; | ||
| 156 | #ifdef CONFIG_X86_LOCAL_APIC | 185 | #ifdef CONFIG_X86_LOCAL_APIC |
| 157 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; | 186 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; |
| 158 | #endif | 187 | #endif |
| @@ -163,6 +192,7 @@ void __init kvmclock_init(void) | |||
| 163 | #ifdef CONFIG_KEXEC | 192 | #ifdef CONFIG_KEXEC |
| 164 | machine_ops.crash_shutdown = kvm_crash_shutdown; | 193 | machine_ops.crash_shutdown = kvm_crash_shutdown; |
| 165 | #endif | 194 | #endif |
| 195 | kvm_get_preset_lpj(); | ||
| 166 | clocksource_register(&kvm_clock); | 196 | clocksource_register(&kvm_clock); |
| 167 | } | 197 | } |
| 168 | } | 198 | } |
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 05fbe9a0325a..4f9c55f3a7c0 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c | |||
| @@ -97,6 +97,18 @@ static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, | |||
| 97 | return dst->version; | 97 | return dst->version; |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) | ||
| 101 | { | ||
| 102 | u64 pv_tsc_khz = 1000000ULL << 32; | ||
| 103 | |||
| 104 | do_div(pv_tsc_khz, src->tsc_to_system_mul); | ||
| 105 | if (src->tsc_shift < 0) | ||
| 106 | pv_tsc_khz <<= -src->tsc_shift; | ||
| 107 | else | ||
| 108 | pv_tsc_khz >>= src->tsc_shift; | ||
| 109 | return pv_tsc_khz; | ||
| 110 | } | ||
| 111 | |||
| 100 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) | 112 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) |
| 101 | { | 113 | { |
| 102 | struct pvclock_shadow_time shadow; | 114 | struct pvclock_shadow_time shadow; |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index d0e940bb6f40..c02343594b4d 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
| @@ -3,10 +3,13 @@ | |||
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 5 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ |
| 6 | coalesced_mmio.o) | 6 | coalesced_mmio.o irq_comm.o) |
| 7 | ifeq ($(CONFIG_KVM_TRACE),y) | 7 | ifeq ($(CONFIG_KVM_TRACE),y) |
| 8 | common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) | 8 | common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) |
| 9 | endif | 9 | endif |
| 10 | ifeq ($(CONFIG_DMAR),y) | ||
| 11 | common-objs += $(addprefix ../../../virt/kvm/, vtd.o) | ||
| 12 | endif | ||
| 10 | 13 | ||
| 11 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm | 14 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm |
| 12 | 15 | ||
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index c0f7872a9124..634132a9a512 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
| @@ -200,13 +200,14 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps) | |||
| 200 | 200 | ||
| 201 | if (!atomic_inc_and_test(&pt->pending)) | 201 | if (!atomic_inc_and_test(&pt->pending)) |
| 202 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); | 202 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); |
| 203 | if (vcpu0 && waitqueue_active(&vcpu0->wq)) { | 203 | |
| 204 | vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 204 | if (vcpu0 && waitqueue_active(&vcpu0->wq)) |
| 205 | wake_up_interruptible(&vcpu0->wq); | 205 | wake_up_interruptible(&vcpu0->wq); |
| 206 | } | ||
| 207 | 206 | ||
| 208 | pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); | 207 | pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); |
| 209 | pt->scheduled = ktime_to_ns(pt->timer.expires); | 208 | pt->scheduled = ktime_to_ns(pt->timer.expires); |
| 209 | if (pt->period) | ||
| 210 | ps->channels[0].count_load_time = pt->timer.expires; | ||
| 210 | 211 | ||
| 211 | return (pt->period == 0 ? 0 : 1); | 212 | return (pt->period == 0 ? 0 : 1); |
| 212 | } | 213 | } |
| @@ -215,12 +216,22 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu) | |||
| 215 | { | 216 | { |
| 216 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; | 217 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; |
| 217 | 218 | ||
| 218 | if (pit && vcpu->vcpu_id == 0 && pit->pit_state.inject_pending) | 219 | if (pit && vcpu->vcpu_id == 0 && pit->pit_state.irq_ack) |
| 219 | return atomic_read(&pit->pit_state.pit_timer.pending); | 220 | return atomic_read(&pit->pit_state.pit_timer.pending); |
| 220 | |||
| 221 | return 0; | 221 | return 0; |
| 222 | } | 222 | } |
| 223 | 223 | ||
| 224 | static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) | ||
| 225 | { | ||
| 226 | struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state, | ||
| 227 | irq_ack_notifier); | ||
| 228 | spin_lock(&ps->inject_lock); | ||
| 229 | if (atomic_dec_return(&ps->pit_timer.pending) < 0) | ||
| 230 | atomic_inc(&ps->pit_timer.pending); | ||
| 231 | ps->irq_ack = 1; | ||
| 232 | spin_unlock(&ps->inject_lock); | ||
| 233 | } | ||
| 234 | |||
| 224 | static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | 235 | static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) |
| 225 | { | 236 | { |
| 226 | struct kvm_kpit_state *ps; | 237 | struct kvm_kpit_state *ps; |
| @@ -255,8 +266,9 @@ static void destroy_pit_timer(struct kvm_kpit_timer *pt) | |||
| 255 | hrtimer_cancel(&pt->timer); | 266 | hrtimer_cancel(&pt->timer); |
| 256 | } | 267 | } |
| 257 | 268 | ||
| 258 | static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period) | 269 | static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) |
| 259 | { | 270 | { |
| 271 | struct kvm_kpit_timer *pt = &ps->pit_timer; | ||
| 260 | s64 interval; | 272 | s64 interval; |
| 261 | 273 | ||
| 262 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); | 274 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); |
| @@ -268,6 +280,7 @@ static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period) | |||
| 268 | pt->period = (is_period == 0) ? 0 : interval; | 280 | pt->period = (is_period == 0) ? 0 : interval; |
| 269 | pt->timer.function = pit_timer_fn; | 281 | pt->timer.function = pit_timer_fn; |
| 270 | atomic_set(&pt->pending, 0); | 282 | atomic_set(&pt->pending, 0); |
| 283 | ps->irq_ack = 1; | ||
| 271 | 284 | ||
| 272 | hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval), | 285 | hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval), |
| 273 | HRTIMER_MODE_ABS); | 286 | HRTIMER_MODE_ABS); |
| @@ -302,11 +315,11 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
| 302 | case 1: | 315 | case 1: |
| 303 | /* FIXME: enhance mode 4 precision */ | 316 | /* FIXME: enhance mode 4 precision */ |
| 304 | case 4: | 317 | case 4: |
| 305 | create_pit_timer(&ps->pit_timer, val, 0); | 318 | create_pit_timer(ps, val, 0); |
| 306 | break; | 319 | break; |
| 307 | case 2: | 320 | case 2: |
| 308 | case 3: | 321 | case 3: |
| 309 | create_pit_timer(&ps->pit_timer, val, 1); | 322 | create_pit_timer(ps, val, 1); |
| 310 | break; | 323 | break; |
| 311 | default: | 324 | default: |
| 312 | destroy_pit_timer(&ps->pit_timer); | 325 | destroy_pit_timer(&ps->pit_timer); |
| @@ -520,7 +533,7 @@ void kvm_pit_reset(struct kvm_pit *pit) | |||
| 520 | mutex_unlock(&pit->pit_state.lock); | 533 | mutex_unlock(&pit->pit_state.lock); |
| 521 | 534 | ||
| 522 | atomic_set(&pit->pit_state.pit_timer.pending, 0); | 535 | atomic_set(&pit->pit_state.pit_timer.pending, 0); |
| 523 | pit->pit_state.inject_pending = 1; | 536 | pit->pit_state.irq_ack = 1; |
| 524 | } | 537 | } |
| 525 | 538 | ||
| 526 | struct kvm_pit *kvm_create_pit(struct kvm *kvm) | 539 | struct kvm_pit *kvm_create_pit(struct kvm *kvm) |
| @@ -534,6 +547,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) | |||
| 534 | 547 | ||
| 535 | mutex_init(&pit->pit_state.lock); | 548 | mutex_init(&pit->pit_state.lock); |
| 536 | mutex_lock(&pit->pit_state.lock); | 549 | mutex_lock(&pit->pit_state.lock); |
| 550 | spin_lock_init(&pit->pit_state.inject_lock); | ||
| 537 | 551 | ||
| 538 | /* Initialize PIO device */ | 552 | /* Initialize PIO device */ |
| 539 | pit->dev.read = pit_ioport_read; | 553 | pit->dev.read = pit_ioport_read; |
| @@ -555,6 +569,9 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) | |||
| 555 | pit_state->pit = pit; | 569 | pit_state->pit = pit; |
| 556 | hrtimer_init(&pit_state->pit_timer.timer, | 570 | hrtimer_init(&pit_state->pit_timer.timer, |
| 557 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 571 | CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
| 572 | pit_state->irq_ack_notifier.gsi = 0; | ||
| 573 | pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; | ||
| 574 | kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); | ||
| 558 | mutex_unlock(&pit->pit_state.lock); | 575 | mutex_unlock(&pit->pit_state.lock); |
| 559 | 576 | ||
| 560 | kvm_pit_reset(pit); | 577 | kvm_pit_reset(pit); |
| @@ -578,10 +595,8 @@ void kvm_free_pit(struct kvm *kvm) | |||
| 578 | static void __inject_pit_timer_intr(struct kvm *kvm) | 595 | static void __inject_pit_timer_intr(struct kvm *kvm) |
| 579 | { | 596 | { |
| 580 | mutex_lock(&kvm->lock); | 597 | mutex_lock(&kvm->lock); |
| 581 | kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1); | 598 | kvm_set_irq(kvm, 0, 1); |
| 582 | kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0); | 599 | kvm_set_irq(kvm, 0, 0); |
| 583 | kvm_pic_set_irq(pic_irqchip(kvm), 0, 1); | ||
| 584 | kvm_pic_set_irq(pic_irqchip(kvm), 0, 0); | ||
| 585 | mutex_unlock(&kvm->lock); | 600 | mutex_unlock(&kvm->lock); |
| 586 | } | 601 | } |
| 587 | 602 | ||
| @@ -592,37 +607,19 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) | |||
| 592 | struct kvm_kpit_state *ps; | 607 | struct kvm_kpit_state *ps; |
| 593 | 608 | ||
| 594 | if (vcpu && pit) { | 609 | if (vcpu && pit) { |
| 610 | int inject = 0; | ||
| 595 | ps = &pit->pit_state; | 611 | ps = &pit->pit_state; |
| 596 | 612 | ||
| 597 | /* Try to inject pending interrupts when: | 613 | /* Try to inject pending interrupts when |
| 598 | * 1. Pending exists | 614 | * last one has been acked. |
| 599 | * 2. Last interrupt was accepted or waited for too long time*/ | 615 | */ |
| 600 | if (atomic_read(&ps->pit_timer.pending) && | 616 | spin_lock(&ps->inject_lock); |
| 601 | (ps->inject_pending || | 617 | if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) { |
| 602 | (jiffies - ps->last_injected_time | 618 | ps->irq_ack = 0; |
| 603 | >= KVM_MAX_PIT_INTR_INTERVAL))) { | 619 | inject = 1; |
| 604 | ps->inject_pending = 0; | ||
| 605 | __inject_pit_timer_intr(kvm); | ||
| 606 | ps->last_injected_time = jiffies; | ||
| 607 | } | ||
| 608 | } | ||
| 609 | } | ||
| 610 | |||
| 611 | void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec) | ||
| 612 | { | ||
| 613 | struct kvm_arch *arch = &vcpu->kvm->arch; | ||
| 614 | struct kvm_kpit_state *ps; | ||
| 615 | |||
| 616 | if (vcpu && arch->vpit) { | ||
| 617 | ps = &arch->vpit->pit_state; | ||
| 618 | if (atomic_read(&ps->pit_timer.pending) && | ||
| 619 | (((arch->vpic->pics[0].imr & 1) == 0 && | ||
| 620 | arch->vpic->pics[0].irq_base == vec) || | ||
| 621 | (arch->vioapic->redirtbl[0].fields.vector == vec && | ||
| 622 | arch->vioapic->redirtbl[0].fields.mask != 1))) { | ||
| 623 | ps->inject_pending = 1; | ||
| 624 | atomic_dec(&ps->pit_timer.pending); | ||
| 625 | ps->channels[0].count_load_time = ktime_get(); | ||
| 626 | } | 620 | } |
| 621 | spin_unlock(&ps->inject_lock); | ||
| 622 | if (inject) | ||
| 623 | __inject_pit_timer_intr(kvm); | ||
| 627 | } | 624 | } |
| 628 | } | 625 | } |
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index db25c2a6c8c4..e436d4983aa1 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
| @@ -8,7 +8,6 @@ struct kvm_kpit_timer { | |||
| 8 | int irq; | 8 | int irq; |
| 9 | s64 period; /* unit: ns */ | 9 | s64 period; /* unit: ns */ |
| 10 | s64 scheduled; | 10 | s64 scheduled; |
| 11 | ktime_t last_update; | ||
| 12 | atomic_t pending; | 11 | atomic_t pending; |
| 13 | }; | 12 | }; |
| 14 | 13 | ||
| @@ -34,8 +33,9 @@ struct kvm_kpit_state { | |||
| 34 | u32 speaker_data_on; | 33 | u32 speaker_data_on; |
| 35 | struct mutex lock; | 34 | struct mutex lock; |
| 36 | struct kvm_pit *pit; | 35 | struct kvm_pit *pit; |
| 37 | bool inject_pending; /* if inject pending interrupts */ | 36 | spinlock_t inject_lock; |
| 38 | unsigned long last_injected_time; | 37 | unsigned long irq_ack; |
| 38 | struct kvm_irq_ack_notifier irq_ack_notifier; | ||
| 39 | }; | 39 | }; |
| 40 | 40 | ||
| 41 | struct kvm_pit { | 41 | struct kvm_pit { |
| @@ -54,7 +54,6 @@ struct kvm_pit { | |||
| 54 | #define KVM_PIT_CHANNEL_MASK 0x3 | 54 | #define KVM_PIT_CHANNEL_MASK 0x3 |
| 55 | 55 | ||
| 56 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); | 56 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu); |
| 57 | void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec); | ||
| 58 | void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val); | 57 | void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val); |
| 59 | struct kvm_pit *kvm_create_pit(struct kvm *kvm); | 58 | struct kvm_pit *kvm_create_pit(struct kvm *kvm); |
| 60 | void kvm_free_pit(struct kvm *kvm); | 59 | void kvm_free_pit(struct kvm *kvm); |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index c31164e8aa46..17e41e165f1a 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
| @@ -30,6 +30,19 @@ | |||
| 30 | 30 | ||
| 31 | #include <linux/kvm_host.h> | 31 | #include <linux/kvm_host.h> |
| 32 | 32 | ||
| 33 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | ||
| 34 | { | ||
| 35 | s->isr &= ~(1 << irq); | ||
| 36 | s->isr_ack |= (1 << irq); | ||
| 37 | } | ||
| 38 | |||
| 39 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | ||
| 40 | { | ||
| 41 | struct kvm_pic *s = pic_irqchip(kvm); | ||
| 42 | s->pics[0].isr_ack = 0xff; | ||
| 43 | s->pics[1].isr_ack = 0xff; | ||
| 44 | } | ||
| 45 | |||
| 33 | /* | 46 | /* |
| 34 | * set irq level. If an edge is detected, then the IRR is set to 1 | 47 | * set irq level. If an edge is detected, then the IRR is set to 1 |
| 35 | */ | 48 | */ |
| @@ -141,11 +154,12 @@ void kvm_pic_set_irq(void *opaque, int irq, int level) | |||
| 141 | */ | 154 | */ |
| 142 | static inline void pic_intack(struct kvm_kpic_state *s, int irq) | 155 | static inline void pic_intack(struct kvm_kpic_state *s, int irq) |
| 143 | { | 156 | { |
| 157 | s->isr |= 1 << irq; | ||
| 144 | if (s->auto_eoi) { | 158 | if (s->auto_eoi) { |
| 145 | if (s->rotate_on_auto_eoi) | 159 | if (s->rotate_on_auto_eoi) |
| 146 | s->priority_add = (irq + 1) & 7; | 160 | s->priority_add = (irq + 1) & 7; |
| 147 | } else | 161 | pic_clear_isr(s, irq); |
| 148 | s->isr |= (1 << irq); | 162 | } |
| 149 | /* | 163 | /* |
| 150 | * We don't clear a level sensitive interrupt here | 164 | * We don't clear a level sensitive interrupt here |
| 151 | */ | 165 | */ |
| @@ -153,9 +167,10 @@ static inline void pic_intack(struct kvm_kpic_state *s, int irq) | |||
| 153 | s->irr &= ~(1 << irq); | 167 | s->irr &= ~(1 << irq); |
| 154 | } | 168 | } |
| 155 | 169 | ||
| 156 | int kvm_pic_read_irq(struct kvm_pic *s) | 170 | int kvm_pic_read_irq(struct kvm *kvm) |
| 157 | { | 171 | { |
| 158 | int irq, irq2, intno; | 172 | int irq, irq2, intno; |
| 173 | struct kvm_pic *s = pic_irqchip(kvm); | ||
| 159 | 174 | ||
| 160 | irq = pic_get_irq(&s->pics[0]); | 175 | irq = pic_get_irq(&s->pics[0]); |
| 161 | if (irq >= 0) { | 176 | if (irq >= 0) { |
| @@ -181,16 +196,32 @@ int kvm_pic_read_irq(struct kvm_pic *s) | |||
| 181 | intno = s->pics[0].irq_base + irq; | 196 | intno = s->pics[0].irq_base + irq; |
| 182 | } | 197 | } |
| 183 | pic_update_irq(s); | 198 | pic_update_irq(s); |
| 199 | kvm_notify_acked_irq(kvm, irq); | ||
| 184 | 200 | ||
| 185 | return intno; | 201 | return intno; |
| 186 | } | 202 | } |
| 187 | 203 | ||
| 188 | void kvm_pic_reset(struct kvm_kpic_state *s) | 204 | void kvm_pic_reset(struct kvm_kpic_state *s) |
| 189 | { | 205 | { |
| 206 | int irq, irqbase; | ||
| 207 | struct kvm *kvm = s->pics_state->irq_request_opaque; | ||
| 208 | struct kvm_vcpu *vcpu0 = kvm->vcpus[0]; | ||
| 209 | |||
| 210 | if (s == &s->pics_state->pics[0]) | ||
| 211 | irqbase = 0; | ||
| 212 | else | ||
| 213 | irqbase = 8; | ||
| 214 | |||
| 215 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | ||
| 216 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | ||
| 217 | if (s->irr & (1 << irq) || s->isr & (1 << irq)) | ||
| 218 | kvm_notify_acked_irq(kvm, irq+irqbase); | ||
| 219 | } | ||
| 190 | s->last_irr = 0; | 220 | s->last_irr = 0; |
| 191 | s->irr = 0; | 221 | s->irr = 0; |
| 192 | s->imr = 0; | 222 | s->imr = 0; |
| 193 | s->isr = 0; | 223 | s->isr = 0; |
| 224 | s->isr_ack = 0xff; | ||
| 194 | s->priority_add = 0; | 225 | s->priority_add = 0; |
| 195 | s->irq_base = 0; | 226 | s->irq_base = 0; |
| 196 | s->read_reg_select = 0; | 227 | s->read_reg_select = 0; |
| @@ -243,7 +274,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
| 243 | priority = get_priority(s, s->isr); | 274 | priority = get_priority(s, s->isr); |
| 244 | if (priority != 8) { | 275 | if (priority != 8) { |
| 245 | irq = (priority + s->priority_add) & 7; | 276 | irq = (priority + s->priority_add) & 7; |
| 246 | s->isr &= ~(1 << irq); | 277 | pic_clear_isr(s, irq); |
| 247 | if (cmd == 5) | 278 | if (cmd == 5) |
| 248 | s->priority_add = (irq + 1) & 7; | 279 | s->priority_add = (irq + 1) & 7; |
| 249 | pic_update_irq(s->pics_state); | 280 | pic_update_irq(s->pics_state); |
| @@ -251,7 +282,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
| 251 | break; | 282 | break; |
| 252 | case 3: | 283 | case 3: |
| 253 | irq = val & 7; | 284 | irq = val & 7; |
| 254 | s->isr &= ~(1 << irq); | 285 | pic_clear_isr(s, irq); |
| 255 | pic_update_irq(s->pics_state); | 286 | pic_update_irq(s->pics_state); |
| 256 | break; | 287 | break; |
| 257 | case 6: | 288 | case 6: |
| @@ -260,8 +291,8 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
| 260 | break; | 291 | break; |
| 261 | case 7: | 292 | case 7: |
| 262 | irq = val & 7; | 293 | irq = val & 7; |
| 263 | s->isr &= ~(1 << irq); | ||
| 264 | s->priority_add = (irq + 1) & 7; | 294 | s->priority_add = (irq + 1) & 7; |
| 295 | pic_clear_isr(s, irq); | ||
| 265 | pic_update_irq(s->pics_state); | 296 | pic_update_irq(s->pics_state); |
| 266 | break; | 297 | break; |
| 267 | default: | 298 | default: |
| @@ -303,7 +334,7 @@ static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1) | |||
| 303 | s->pics_state->pics[0].irr &= ~(1 << 2); | 334 | s->pics_state->pics[0].irr &= ~(1 << 2); |
| 304 | } | 335 | } |
| 305 | s->irr &= ~(1 << ret); | 336 | s->irr &= ~(1 << ret); |
| 306 | s->isr &= ~(1 << ret); | 337 | pic_clear_isr(s, ret); |
| 307 | if (addr1 >> 7 || ret != 2) | 338 | if (addr1 >> 7 || ret != 2) |
| 308 | pic_update_irq(s->pics_state); | 339 | pic_update_irq(s->pics_state); |
| 309 | } else { | 340 | } else { |
| @@ -422,10 +453,14 @@ static void pic_irq_request(void *opaque, int level) | |||
| 422 | { | 453 | { |
| 423 | struct kvm *kvm = opaque; | 454 | struct kvm *kvm = opaque; |
| 424 | struct kvm_vcpu *vcpu = kvm->vcpus[0]; | 455 | struct kvm_vcpu *vcpu = kvm->vcpus[0]; |
| 456 | struct kvm_pic *s = pic_irqchip(kvm); | ||
| 457 | int irq = pic_get_irq(&s->pics[0]); | ||
| 425 | 458 | ||
| 426 | pic_irqchip(kvm)->output = level; | 459 | s->output = level; |
| 427 | if (vcpu) | 460 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { |
| 461 | s->pics[0].isr_ack &= ~(1 << irq); | ||
| 428 | kvm_vcpu_kick(vcpu); | 462 | kvm_vcpu_kick(vcpu); |
| 463 | } | ||
| 429 | } | 464 | } |
| 430 | 465 | ||
| 431 | struct kvm_pic *kvm_create_pic(struct kvm *kvm) | 466 | struct kvm_pic *kvm_create_pic(struct kvm *kvm) |
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 76d736b5f664..c019b8edcdb7 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
| @@ -72,7 +72,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) | |||
| 72 | if (kvm_apic_accept_pic_intr(v)) { | 72 | if (kvm_apic_accept_pic_intr(v)) { |
| 73 | s = pic_irqchip(v->kvm); | 73 | s = pic_irqchip(v->kvm); |
| 74 | s->output = 0; /* PIC */ | 74 | s->output = 0; /* PIC */ |
| 75 | vector = kvm_pic_read_irq(s); | 75 | vector = kvm_pic_read_irq(v->kvm); |
| 76 | } | 76 | } |
| 77 | } | 77 | } |
| 78 | return vector; | 78 | return vector; |
| @@ -90,7 +90,6 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); | |||
| 90 | void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) | 90 | void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) |
| 91 | { | 91 | { |
| 92 | kvm_apic_timer_intr_post(vcpu, vec); | 92 | kvm_apic_timer_intr_post(vcpu, vec); |
| 93 | kvm_pit_timer_intr_post(vcpu, vec); | ||
| 94 | /* TODO: PIT, RTC etc. */ | 93 | /* TODO: PIT, RTC etc. */ |
| 95 | } | 94 | } |
| 96 | EXPORT_SYMBOL_GPL(kvm_timer_intr_post); | 95 | EXPORT_SYMBOL_GPL(kvm_timer_intr_post); |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 7ca47cbb48bb..f17c8f5bbf31 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
| @@ -42,6 +42,7 @@ struct kvm_kpic_state { | |||
| 42 | u8 irr; /* interrupt request register */ | 42 | u8 irr; /* interrupt request register */ |
| 43 | u8 imr; /* interrupt mask register */ | 43 | u8 imr; /* interrupt mask register */ |
| 44 | u8 isr; /* interrupt service register */ | 44 | u8 isr; /* interrupt service register */ |
| 45 | u8 isr_ack; /* interrupt ack detection */ | ||
| 45 | u8 priority_add; /* highest irq priority */ | 46 | u8 priority_add; /* highest irq priority */ |
| 46 | u8 irq_base; | 47 | u8 irq_base; |
| 47 | u8 read_reg_select; | 48 | u8 read_reg_select; |
| @@ -63,12 +64,13 @@ struct kvm_pic { | |||
| 63 | void *irq_request_opaque; | 64 | void *irq_request_opaque; |
| 64 | int output; /* intr from master PIC */ | 65 | int output; /* intr from master PIC */ |
| 65 | struct kvm_io_device dev; | 66 | struct kvm_io_device dev; |
| 67 | void (*ack_notifier)(void *opaque, int irq); | ||
| 66 | }; | 68 | }; |
| 67 | 69 | ||
| 68 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); | 70 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); |
| 69 | void kvm_pic_set_irq(void *opaque, int irq, int level); | 71 | int kvm_pic_read_irq(struct kvm *kvm); |
| 70 | int kvm_pic_read_irq(struct kvm_pic *s); | ||
| 71 | void kvm_pic_update_irq(struct kvm_pic *s); | 72 | void kvm_pic_update_irq(struct kvm_pic *s); |
| 73 | void kvm_pic_clear_isr_ack(struct kvm *kvm); | ||
| 72 | 74 | ||
| 73 | static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) | 75 | static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) |
| 74 | { | 76 | { |
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h new file mode 100644 index 000000000000..1ff819dce7d3 --- /dev/null +++ b/arch/x86/kvm/kvm_cache_regs.h | |||
| @@ -0,0 +1,32 @@ | |||
| 1 | #ifndef ASM_KVM_CACHE_REGS_H | ||
| 2 | #define ASM_KVM_CACHE_REGS_H | ||
| 3 | |||
| 4 | static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, | ||
| 5 | enum kvm_reg reg) | ||
| 6 | { | ||
| 7 | if (!test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail)) | ||
| 8 | kvm_x86_ops->cache_reg(vcpu, reg); | ||
| 9 | |||
| 10 | return vcpu->arch.regs[reg]; | ||
| 11 | } | ||
| 12 | |||
| 13 | static inline void kvm_register_write(struct kvm_vcpu *vcpu, | ||
| 14 | enum kvm_reg reg, | ||
| 15 | unsigned long val) | ||
| 16 | { | ||
| 17 | vcpu->arch.regs[reg] = val; | ||
| 18 | __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty); | ||
| 19 | __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); | ||
| 20 | } | ||
| 21 | |||
| 22 | static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu) | ||
| 23 | { | ||
| 24 | return kvm_register_read(vcpu, VCPU_REGS_RIP); | ||
| 25 | } | ||
| 26 | |||
| 27 | static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val) | ||
| 28 | { | ||
| 29 | kvm_register_write(vcpu, VCPU_REGS_RIP, val); | ||
| 30 | } | ||
| 31 | |||
| 32 | #endif | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 73f43de69f67..6571926bfd33 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <asm/current.h> | 32 | #include <asm/current.h> |
| 33 | #include <asm/apicdef.h> | 33 | #include <asm/apicdef.h> |
| 34 | #include <asm/atomic.h> | 34 | #include <asm/atomic.h> |
| 35 | #include "kvm_cache_regs.h" | ||
| 35 | #include "irq.h" | 36 | #include "irq.h" |
| 36 | 37 | ||
| 37 | #define PRId64 "d" | 38 | #define PRId64 "d" |
| @@ -338,13 +339,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
| 338 | } else | 339 | } else |
| 339 | apic_clear_vector(vector, apic->regs + APIC_TMR); | 340 | apic_clear_vector(vector, apic->regs + APIC_TMR); |
| 340 | 341 | ||
| 341 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 342 | kvm_vcpu_kick(vcpu); |
| 342 | kvm_vcpu_kick(vcpu); | ||
| 343 | else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) { | ||
| 344 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
| 345 | if (waitqueue_active(&vcpu->wq)) | ||
| 346 | wake_up_interruptible(&vcpu->wq); | ||
| 347 | } | ||
| 348 | 343 | ||
| 349 | result = (orig_irr == 0); | 344 | result = (orig_irr == 0); |
| 350 | break; | 345 | break; |
| @@ -370,21 +365,18 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
| 370 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | 365 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; |
| 371 | kvm_vcpu_kick(vcpu); | 366 | kvm_vcpu_kick(vcpu); |
| 372 | } else { | 367 | } else { |
| 373 | printk(KERN_DEBUG | 368 | apic_debug("Ignoring de-assert INIT to vcpu %d\n", |
| 374 | "Ignoring de-assert INIT to vcpu %d\n", | 369 | vcpu->vcpu_id); |
| 375 | vcpu->vcpu_id); | ||
| 376 | } | 370 | } |
| 377 | |||
| 378 | break; | 371 | break; |
| 379 | 372 | ||
| 380 | case APIC_DM_STARTUP: | 373 | case APIC_DM_STARTUP: |
| 381 | printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n", | 374 | apic_debug("SIPI to vcpu %d vector 0x%02x\n", |
| 382 | vcpu->vcpu_id, vector); | 375 | vcpu->vcpu_id, vector); |
| 383 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | 376 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { |
| 384 | vcpu->arch.sipi_vector = vector; | 377 | vcpu->arch.sipi_vector = vector; |
| 385 | vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; | 378 | vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; |
| 386 | if (waitqueue_active(&vcpu->wq)) | 379 | kvm_vcpu_kick(vcpu); |
| 387 | wake_up_interruptible(&vcpu->wq); | ||
| 388 | } | 380 | } |
| 389 | break; | 381 | break; |
| 390 | 382 | ||
| @@ -438,7 +430,7 @@ struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, | |||
| 438 | static void apic_set_eoi(struct kvm_lapic *apic) | 430 | static void apic_set_eoi(struct kvm_lapic *apic) |
| 439 | { | 431 | { |
| 440 | int vector = apic_find_highest_isr(apic); | 432 | int vector = apic_find_highest_isr(apic); |
| 441 | 433 | int trigger_mode; | |
| 442 | /* | 434 | /* |
| 443 | * Not every write EOI will has corresponding ISR, | 435 | * Not every write EOI will has corresponding ISR, |
| 444 | * one example is when Kernel check timer on setup_IO_APIC | 436 | * one example is when Kernel check timer on setup_IO_APIC |
| @@ -450,7 +442,10 @@ static void apic_set_eoi(struct kvm_lapic *apic) | |||
| 450 | apic_update_ppr(apic); | 442 | apic_update_ppr(apic); |
| 451 | 443 | ||
| 452 | if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR)) | 444 | if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR)) |
| 453 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector); | 445 | trigger_mode = IOAPIC_LEVEL_TRIG; |
| 446 | else | ||
| 447 | trigger_mode = IOAPIC_EDGE_TRIG; | ||
| 448 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | ||
| 454 | } | 449 | } |
| 455 | 450 | ||
| 456 | static void apic_send_ipi(struct kvm_lapic *apic) | 451 | static void apic_send_ipi(struct kvm_lapic *apic) |
| @@ -558,8 +553,7 @@ static void __report_tpr_access(struct kvm_lapic *apic, bool write) | |||
| 558 | struct kvm_run *run = vcpu->run; | 553 | struct kvm_run *run = vcpu->run; |
| 559 | 554 | ||
| 560 | set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests); | 555 | set_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests); |
| 561 | kvm_x86_ops->cache_regs(vcpu); | 556 | run->tpr_access.rip = kvm_rip_read(vcpu); |
| 562 | run->tpr_access.rip = vcpu->arch.rip; | ||
| 563 | run->tpr_access.is_write = write; | 557 | run->tpr_access.is_write = write; |
| 564 | } | 558 | } |
| 565 | 559 | ||
| @@ -683,9 +677,9 @@ static void apic_mmio_write(struct kvm_io_device *this, | |||
| 683 | * Refer SDM 8.4.1 | 677 | * Refer SDM 8.4.1 |
| 684 | */ | 678 | */ |
| 685 | if (len != 4 || alignment) { | 679 | if (len != 4 || alignment) { |
| 686 | if (printk_ratelimit()) | 680 | /* Don't shout loud, $infamous_os would cause only noise. */ |
| 687 | printk(KERN_ERR "apic write: bad size=%d %lx\n", | 681 | apic_debug("apic write: bad size=%d %lx\n", |
| 688 | len, (long)address); | 682 | len, (long)address); |
| 689 | return; | 683 | return; |
| 690 | } | 684 | } |
| 691 | 685 | ||
| @@ -947,10 +941,9 @@ static int __apic_timer_fn(struct kvm_lapic *apic) | |||
| 947 | 941 | ||
| 948 | if(!atomic_inc_and_test(&apic->timer.pending)) | 942 | if(!atomic_inc_and_test(&apic->timer.pending)) |
| 949 | set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); | 943 | set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); |
| 950 | if (waitqueue_active(q)) { | 944 | if (waitqueue_active(q)) |
| 951 | apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
| 952 | wake_up_interruptible(q); | 945 | wake_up_interruptible(q); |
| 953 | } | 946 | |
| 954 | if (apic_lvtt_period(apic)) { | 947 | if (apic_lvtt_period(apic)) { |
| 955 | result = 1; | 948 | result = 1; |
| 956 | apic->timer.dev.expires = ktime_add_ns( | 949 | apic->timer.dev.expires = ktime_add_ns( |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3da2508eb22a..99c239c5c0ac 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -70,6 +70,9 @@ static int dbg = 0; | |||
| 70 | module_param(dbg, bool, 0644); | 70 | module_param(dbg, bool, 0644); |
| 71 | #endif | 71 | #endif |
| 72 | 72 | ||
| 73 | static int oos_shadow = 1; | ||
| 74 | module_param(oos_shadow, bool, 0644); | ||
| 75 | |||
| 73 | #ifndef MMU_DEBUG | 76 | #ifndef MMU_DEBUG |
| 74 | #define ASSERT(x) do { } while (0) | 77 | #define ASSERT(x) do { } while (0) |
| 75 | #else | 78 | #else |
| @@ -135,18 +138,24 @@ module_param(dbg, bool, 0644); | |||
| 135 | #define ACC_USER_MASK PT_USER_MASK | 138 | #define ACC_USER_MASK PT_USER_MASK |
| 136 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) | 139 | #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) |
| 137 | 140 | ||
| 138 | struct kvm_pv_mmu_op_buffer { | 141 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) |
| 139 | void *ptr; | ||
| 140 | unsigned len; | ||
| 141 | unsigned processed; | ||
| 142 | char buf[512] __aligned(sizeof(long)); | ||
| 143 | }; | ||
| 144 | 142 | ||
| 145 | struct kvm_rmap_desc { | 143 | struct kvm_rmap_desc { |
| 146 | u64 *shadow_ptes[RMAP_EXT]; | 144 | u64 *shadow_ptes[RMAP_EXT]; |
| 147 | struct kvm_rmap_desc *more; | 145 | struct kvm_rmap_desc *more; |
| 148 | }; | 146 | }; |
| 149 | 147 | ||
| 148 | struct kvm_shadow_walk { | ||
| 149 | int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu, | ||
| 150 | u64 addr, u64 *spte, int level); | ||
| 151 | }; | ||
| 152 | |||
| 153 | struct kvm_unsync_walk { | ||
| 154 | int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); | ||
| 155 | }; | ||
| 156 | |||
| 157 | typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); | ||
| 158 | |||
| 150 | static struct kmem_cache *pte_chain_cache; | 159 | static struct kmem_cache *pte_chain_cache; |
| 151 | static struct kmem_cache *rmap_desc_cache; | 160 | static struct kmem_cache *rmap_desc_cache; |
| 152 | static struct kmem_cache *mmu_page_header_cache; | 161 | static struct kmem_cache *mmu_page_header_cache; |
| @@ -405,16 +414,19 @@ static int host_largepage_backed(struct kvm *kvm, gfn_t gfn) | |||
| 405 | { | 414 | { |
| 406 | struct vm_area_struct *vma; | 415 | struct vm_area_struct *vma; |
| 407 | unsigned long addr; | 416 | unsigned long addr; |
| 417 | int ret = 0; | ||
| 408 | 418 | ||
| 409 | addr = gfn_to_hva(kvm, gfn); | 419 | addr = gfn_to_hva(kvm, gfn); |
| 410 | if (kvm_is_error_hva(addr)) | 420 | if (kvm_is_error_hva(addr)) |
| 411 | return 0; | 421 | return ret; |
| 412 | 422 | ||
| 423 | down_read(¤t->mm->mmap_sem); | ||
| 413 | vma = find_vma(current->mm, addr); | 424 | vma = find_vma(current->mm, addr); |
| 414 | if (vma && is_vm_hugetlb_page(vma)) | 425 | if (vma && is_vm_hugetlb_page(vma)) |
| 415 | return 1; | 426 | ret = 1; |
| 427 | up_read(¤t->mm->mmap_sem); | ||
| 416 | 428 | ||
| 417 | return 0; | 429 | return ret; |
| 418 | } | 430 | } |
| 419 | 431 | ||
| 420 | static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 432 | static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn) |
| @@ -649,8 +661,6 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
| 649 | 661 | ||
| 650 | if (write_protected) | 662 | if (write_protected) |
| 651 | kvm_flush_remote_tlbs(kvm); | 663 | kvm_flush_remote_tlbs(kvm); |
| 652 | |||
| 653 | account_shadowed(kvm, gfn); | ||
| 654 | } | 664 | } |
| 655 | 665 | ||
| 656 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | 666 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) |
| @@ -859,6 +869,77 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, | |||
| 859 | BUG(); | 869 | BUG(); |
| 860 | } | 870 | } |
| 861 | 871 | ||
| 872 | |||
| 873 | static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | ||
| 874 | mmu_parent_walk_fn fn) | ||
| 875 | { | ||
| 876 | struct kvm_pte_chain *pte_chain; | ||
| 877 | struct hlist_node *node; | ||
| 878 | struct kvm_mmu_page *parent_sp; | ||
| 879 | int i; | ||
| 880 | |||
| 881 | if (!sp->multimapped && sp->parent_pte) { | ||
| 882 | parent_sp = page_header(__pa(sp->parent_pte)); | ||
| 883 | fn(vcpu, parent_sp); | ||
| 884 | mmu_parent_walk(vcpu, parent_sp, fn); | ||
| 885 | return; | ||
| 886 | } | ||
| 887 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | ||
| 888 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | ||
| 889 | if (!pte_chain->parent_ptes[i]) | ||
| 890 | break; | ||
| 891 | parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); | ||
| 892 | fn(vcpu, parent_sp); | ||
| 893 | mmu_parent_walk(vcpu, parent_sp, fn); | ||
| 894 | } | ||
| 895 | } | ||
| 896 | |||
| 897 | static void kvm_mmu_update_unsync_bitmap(u64 *spte) | ||
| 898 | { | ||
| 899 | unsigned int index; | ||
| 900 | struct kvm_mmu_page *sp = page_header(__pa(spte)); | ||
| 901 | |||
| 902 | index = spte - sp->spt; | ||
| 903 | __set_bit(index, sp->unsync_child_bitmap); | ||
| 904 | sp->unsync_children = 1; | ||
| 905 | } | ||
| 906 | |||
| 907 | static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | ||
| 908 | { | ||
| 909 | struct kvm_pte_chain *pte_chain; | ||
| 910 | struct hlist_node *node; | ||
| 911 | int i; | ||
| 912 | |||
| 913 | if (!sp->parent_pte) | ||
| 914 | return; | ||
| 915 | |||
| 916 | if (!sp->multimapped) { | ||
| 917 | kvm_mmu_update_unsync_bitmap(sp->parent_pte); | ||
| 918 | return; | ||
| 919 | } | ||
| 920 | |||
| 921 | hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) | ||
| 922 | for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) { | ||
| 923 | if (!pte_chain->parent_ptes[i]) | ||
| 924 | break; | ||
| 925 | kvm_mmu_update_unsync_bitmap(pte_chain->parent_ptes[i]); | ||
| 926 | } | ||
| 927 | } | ||
| 928 | |||
| 929 | static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | ||
| 930 | { | ||
| 931 | sp->unsync_children = 1; | ||
| 932 | kvm_mmu_update_parents_unsync(sp); | ||
| 933 | return 1; | ||
| 934 | } | ||
| 935 | |||
| 936 | static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu, | ||
| 937 | struct kvm_mmu_page *sp) | ||
| 938 | { | ||
| 939 | mmu_parent_walk(vcpu, sp, unsync_walk_fn); | ||
| 940 | kvm_mmu_update_parents_unsync(sp); | ||
| 941 | } | ||
| 942 | |||
| 862 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | 943 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, |
| 863 | struct kvm_mmu_page *sp) | 944 | struct kvm_mmu_page *sp) |
| 864 | { | 945 | { |
| @@ -868,6 +949,58 @@ static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | |||
| 868 | sp->spt[i] = shadow_trap_nonpresent_pte; | 949 | sp->spt[i] = shadow_trap_nonpresent_pte; |
| 869 | } | 950 | } |
| 870 | 951 | ||
| 952 | static int nonpaging_sync_page(struct kvm_vcpu *vcpu, | ||
| 953 | struct kvm_mmu_page *sp) | ||
| 954 | { | ||
| 955 | return 1; | ||
| 956 | } | ||
| 957 | |||
| 958 | static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | ||
| 959 | { | ||
| 960 | } | ||
| 961 | |||
| 962 | #define for_each_unsync_children(bitmap, idx) \ | ||
| 963 | for (idx = find_first_bit(bitmap, 512); \ | ||
| 964 | idx < 512; \ | ||
| 965 | idx = find_next_bit(bitmap, 512, idx+1)) | ||
| 966 | |||
| 967 | static int mmu_unsync_walk(struct kvm_mmu_page *sp, | ||
| 968 | struct kvm_unsync_walk *walker) | ||
| 969 | { | ||
| 970 | int i, ret; | ||
| 971 | |||
| 972 | if (!sp->unsync_children) | ||
| 973 | return 0; | ||
| 974 | |||
| 975 | for_each_unsync_children(sp->unsync_child_bitmap, i) { | ||
| 976 | u64 ent = sp->spt[i]; | ||
| 977 | |||
| 978 | if (is_shadow_present_pte(ent)) { | ||
| 979 | struct kvm_mmu_page *child; | ||
| 980 | child = page_header(ent & PT64_BASE_ADDR_MASK); | ||
| 981 | |||
| 982 | if (child->unsync_children) { | ||
| 983 | ret = mmu_unsync_walk(child, walker); | ||
| 984 | if (ret) | ||
| 985 | return ret; | ||
| 986 | __clear_bit(i, sp->unsync_child_bitmap); | ||
| 987 | } | ||
| 988 | |||
| 989 | if (child->unsync) { | ||
| 990 | ret = walker->entry(child, walker); | ||
| 991 | __clear_bit(i, sp->unsync_child_bitmap); | ||
| 992 | if (ret) | ||
| 993 | return ret; | ||
| 994 | } | ||
| 995 | } | ||
| 996 | } | ||
| 997 | |||
| 998 | if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) | ||
| 999 | sp->unsync_children = 0; | ||
| 1000 | |||
| 1001 | return 0; | ||
| 1002 | } | ||
| 1003 | |||
| 871 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | 1004 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) |
| 872 | { | 1005 | { |
| 873 | unsigned index; | 1006 | unsigned index; |
| @@ -888,6 +1021,59 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | |||
| 888 | return NULL; | 1021 | return NULL; |
| 889 | } | 1022 | } |
| 890 | 1023 | ||
| 1024 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
| 1025 | { | ||
| 1026 | WARN_ON(!sp->unsync); | ||
| 1027 | sp->unsync = 0; | ||
| 1028 | --kvm->stat.mmu_unsync; | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); | ||
| 1032 | |||
| 1033 | static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | ||
| 1034 | { | ||
| 1035 | if (sp->role.glevels != vcpu->arch.mmu.root_level) { | ||
| 1036 | kvm_mmu_zap_page(vcpu->kvm, sp); | ||
| 1037 | return 1; | ||
| 1038 | } | ||
| 1039 | |||
| 1040 | rmap_write_protect(vcpu->kvm, sp->gfn); | ||
| 1041 | if (vcpu->arch.mmu.sync_page(vcpu, sp)) { | ||
| 1042 | kvm_mmu_zap_page(vcpu->kvm, sp); | ||
| 1043 | return 1; | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | kvm_mmu_flush_tlb(vcpu); | ||
| 1047 | kvm_unlink_unsync_page(vcpu->kvm, sp); | ||
| 1048 | return 0; | ||
| 1049 | } | ||
| 1050 | |||
| 1051 | struct sync_walker { | ||
| 1052 | struct kvm_vcpu *vcpu; | ||
| 1053 | struct kvm_unsync_walk walker; | ||
| 1054 | }; | ||
| 1055 | |||
| 1056 | static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) | ||
| 1057 | { | ||
| 1058 | struct sync_walker *sync_walk = container_of(walk, struct sync_walker, | ||
| 1059 | walker); | ||
| 1060 | struct kvm_vcpu *vcpu = sync_walk->vcpu; | ||
| 1061 | |||
| 1062 | kvm_sync_page(vcpu, sp); | ||
| 1063 | return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)); | ||
| 1064 | } | ||
| 1065 | |||
| 1066 | static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | ||
| 1067 | { | ||
| 1068 | struct sync_walker walker = { | ||
| 1069 | .walker = { .entry = mmu_sync_fn, }, | ||
| 1070 | .vcpu = vcpu, | ||
| 1071 | }; | ||
| 1072 | |||
| 1073 | while (mmu_unsync_walk(sp, &walker.walker)) | ||
| 1074 | cond_resched_lock(&vcpu->kvm->mmu_lock); | ||
| 1075 | } | ||
| 1076 | |||
| 891 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | 1077 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, |
| 892 | gfn_t gfn, | 1078 | gfn_t gfn, |
| 893 | gva_t gaddr, | 1079 | gva_t gaddr, |
| @@ -901,7 +1087,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 901 | unsigned quadrant; | 1087 | unsigned quadrant; |
| 902 | struct hlist_head *bucket; | 1088 | struct hlist_head *bucket; |
| 903 | struct kvm_mmu_page *sp; | 1089 | struct kvm_mmu_page *sp; |
| 904 | struct hlist_node *node; | 1090 | struct hlist_node *node, *tmp; |
| 905 | 1091 | ||
| 906 | role.word = 0; | 1092 | role.word = 0; |
| 907 | role.glevels = vcpu->arch.mmu.root_level; | 1093 | role.glevels = vcpu->arch.mmu.root_level; |
| @@ -917,9 +1103,20 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 917 | gfn, role.word); | 1103 | gfn, role.word); |
| 918 | index = kvm_page_table_hashfn(gfn); | 1104 | index = kvm_page_table_hashfn(gfn); |
| 919 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 1105 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
| 920 | hlist_for_each_entry(sp, node, bucket, hash_link) | 1106 | hlist_for_each_entry_safe(sp, node, tmp, bucket, hash_link) |
| 921 | if (sp->gfn == gfn && sp->role.word == role.word) { | 1107 | if (sp->gfn == gfn) { |
| 1108 | if (sp->unsync) | ||
| 1109 | if (kvm_sync_page(vcpu, sp)) | ||
| 1110 | continue; | ||
| 1111 | |||
| 1112 | if (sp->role.word != role.word) | ||
| 1113 | continue; | ||
| 1114 | |||
| 922 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); | 1115 | mmu_page_add_parent_pte(vcpu, sp, parent_pte); |
| 1116 | if (sp->unsync_children) { | ||
| 1117 | set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); | ||
| 1118 | kvm_mmu_mark_parents_unsync(vcpu, sp); | ||
| 1119 | } | ||
| 923 | pgprintk("%s: found\n", __func__); | 1120 | pgprintk("%s: found\n", __func__); |
| 924 | return sp; | 1121 | return sp; |
| 925 | } | 1122 | } |
| @@ -931,8 +1128,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 931 | sp->gfn = gfn; | 1128 | sp->gfn = gfn; |
| 932 | sp->role = role; | 1129 | sp->role = role; |
| 933 | hlist_add_head(&sp->hash_link, bucket); | 1130 | hlist_add_head(&sp->hash_link, bucket); |
| 934 | if (!metaphysical) | 1131 | if (!metaphysical) { |
| 935 | rmap_write_protect(vcpu->kvm, gfn); | 1132 | rmap_write_protect(vcpu->kvm, gfn); |
| 1133 | account_shadowed(vcpu->kvm, gfn); | ||
| 1134 | } | ||
| 936 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) | 1135 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) |
| 937 | vcpu->arch.mmu.prefetch_page(vcpu, sp); | 1136 | vcpu->arch.mmu.prefetch_page(vcpu, sp); |
| 938 | else | 1137 | else |
| @@ -940,6 +1139,35 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
| 940 | return sp; | 1139 | return sp; |
| 941 | } | 1140 | } |
| 942 | 1141 | ||
| 1142 | static int walk_shadow(struct kvm_shadow_walk *walker, | ||
| 1143 | struct kvm_vcpu *vcpu, u64 addr) | ||
| 1144 | { | ||
| 1145 | hpa_t shadow_addr; | ||
| 1146 | int level; | ||
| 1147 | int r; | ||
| 1148 | u64 *sptep; | ||
| 1149 | unsigned index; | ||
| 1150 | |||
| 1151 | shadow_addr = vcpu->arch.mmu.root_hpa; | ||
| 1152 | level = vcpu->arch.mmu.shadow_root_level; | ||
| 1153 | if (level == PT32E_ROOT_LEVEL) { | ||
| 1154 | shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; | ||
| 1155 | shadow_addr &= PT64_BASE_ADDR_MASK; | ||
| 1156 | --level; | ||
| 1157 | } | ||
| 1158 | |||
| 1159 | while (level >= PT_PAGE_TABLE_LEVEL) { | ||
| 1160 | index = SHADOW_PT_INDEX(addr, level); | ||
| 1161 | sptep = ((u64 *)__va(shadow_addr)) + index; | ||
| 1162 | r = walker->entry(walker, vcpu, addr, sptep, level); | ||
| 1163 | if (r) | ||
| 1164 | return r; | ||
| 1165 | shadow_addr = *sptep & PT64_BASE_ADDR_MASK; | ||
| 1166 | --level; | ||
| 1167 | } | ||
| 1168 | return 0; | ||
| 1169 | } | ||
| 1170 | |||
| 943 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, | 1171 | static void kvm_mmu_page_unlink_children(struct kvm *kvm, |
| 944 | struct kvm_mmu_page *sp) | 1172 | struct kvm_mmu_page *sp) |
| 945 | { | 1173 | { |
| @@ -955,7 +1183,6 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, | |||
| 955 | rmap_remove(kvm, &pt[i]); | 1183 | rmap_remove(kvm, &pt[i]); |
| 956 | pt[i] = shadow_trap_nonpresent_pte; | 1184 | pt[i] = shadow_trap_nonpresent_pte; |
| 957 | } | 1185 | } |
| 958 | kvm_flush_remote_tlbs(kvm); | ||
| 959 | return; | 1186 | return; |
| 960 | } | 1187 | } |
| 961 | 1188 | ||
| @@ -974,7 +1201,6 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, | |||
| 974 | } | 1201 | } |
| 975 | pt[i] = shadow_trap_nonpresent_pte; | 1202 | pt[i] = shadow_trap_nonpresent_pte; |
| 976 | } | 1203 | } |
| 977 | kvm_flush_remote_tlbs(kvm); | ||
| 978 | } | 1204 | } |
| 979 | 1205 | ||
| 980 | static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) | 1206 | static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) |
| @@ -991,11 +1217,10 @@ static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm) | |||
| 991 | kvm->vcpus[i]->arch.last_pte_updated = NULL; | 1217 | kvm->vcpus[i]->arch.last_pte_updated = NULL; |
| 992 | } | 1218 | } |
| 993 | 1219 | ||
| 994 | static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1220 | static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) |
| 995 | { | 1221 | { |
| 996 | u64 *parent_pte; | 1222 | u64 *parent_pte; |
| 997 | 1223 | ||
| 998 | ++kvm->stat.mmu_shadow_zapped; | ||
| 999 | while (sp->multimapped || sp->parent_pte) { | 1224 | while (sp->multimapped || sp->parent_pte) { |
| 1000 | if (!sp->multimapped) | 1225 | if (!sp->multimapped) |
| 1001 | parent_pte = sp->parent_pte; | 1226 | parent_pte = sp->parent_pte; |
| @@ -1010,21 +1235,59 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
| 1010 | kvm_mmu_put_page(sp, parent_pte); | 1235 | kvm_mmu_put_page(sp, parent_pte); |
| 1011 | set_shadow_pte(parent_pte, shadow_trap_nonpresent_pte); | 1236 | set_shadow_pte(parent_pte, shadow_trap_nonpresent_pte); |
| 1012 | } | 1237 | } |
| 1238 | } | ||
| 1239 | |||
| 1240 | struct zap_walker { | ||
| 1241 | struct kvm_unsync_walk walker; | ||
| 1242 | struct kvm *kvm; | ||
| 1243 | int zapped; | ||
| 1244 | }; | ||
| 1245 | |||
| 1246 | static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) | ||
| 1247 | { | ||
| 1248 | struct zap_walker *zap_walk = container_of(walk, struct zap_walker, | ||
| 1249 | walker); | ||
| 1250 | kvm_mmu_zap_page(zap_walk->kvm, sp); | ||
| 1251 | zap_walk->zapped = 1; | ||
| 1252 | return 0; | ||
| 1253 | } | ||
| 1254 | |||
| 1255 | static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
| 1256 | { | ||
| 1257 | struct zap_walker walker = { | ||
| 1258 | .walker = { .entry = mmu_zap_fn, }, | ||
| 1259 | .kvm = kvm, | ||
| 1260 | .zapped = 0, | ||
| 1261 | }; | ||
| 1262 | |||
| 1263 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) | ||
| 1264 | return 0; | ||
| 1265 | mmu_unsync_walk(sp, &walker.walker); | ||
| 1266 | return walker.zapped; | ||
| 1267 | } | ||
| 1268 | |||
| 1269 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
| 1270 | { | ||
| 1271 | int ret; | ||
| 1272 | ++kvm->stat.mmu_shadow_zapped; | ||
| 1273 | ret = mmu_zap_unsync_children(kvm, sp); | ||
| 1013 | kvm_mmu_page_unlink_children(kvm, sp); | 1274 | kvm_mmu_page_unlink_children(kvm, sp); |
| 1275 | kvm_mmu_unlink_parents(kvm, sp); | ||
| 1276 | kvm_flush_remote_tlbs(kvm); | ||
| 1277 | if (!sp->role.invalid && !sp->role.metaphysical) | ||
| 1278 | unaccount_shadowed(kvm, sp->gfn); | ||
| 1279 | if (sp->unsync) | ||
| 1280 | kvm_unlink_unsync_page(kvm, sp); | ||
| 1014 | if (!sp->root_count) { | 1281 | if (!sp->root_count) { |
| 1015 | if (!sp->role.metaphysical && !sp->role.invalid) | ||
| 1016 | unaccount_shadowed(kvm, sp->gfn); | ||
| 1017 | hlist_del(&sp->hash_link); | 1282 | hlist_del(&sp->hash_link); |
| 1018 | kvm_mmu_free_page(kvm, sp); | 1283 | kvm_mmu_free_page(kvm, sp); |
| 1019 | } else { | 1284 | } else { |
| 1020 | int invalid = sp->role.invalid; | ||
| 1021 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | ||
| 1022 | sp->role.invalid = 1; | 1285 | sp->role.invalid = 1; |
| 1286 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | ||
| 1023 | kvm_reload_remote_mmus(kvm); | 1287 | kvm_reload_remote_mmus(kvm); |
| 1024 | if (!sp->role.metaphysical && !invalid) | ||
| 1025 | unaccount_shadowed(kvm, sp->gfn); | ||
| 1026 | } | 1288 | } |
| 1027 | kvm_mmu_reset_last_pte_updated(kvm); | 1289 | kvm_mmu_reset_last_pte_updated(kvm); |
| 1290 | return ret; | ||
| 1028 | } | 1291 | } |
| 1029 | 1292 | ||
| 1030 | /* | 1293 | /* |
| @@ -1077,8 +1340,9 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) | |||
| 1077 | if (sp->gfn == gfn && !sp->role.metaphysical) { | 1340 | if (sp->gfn == gfn && !sp->role.metaphysical) { |
| 1078 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, | 1341 | pgprintk("%s: gfn %lx role %x\n", __func__, gfn, |
| 1079 | sp->role.word); | 1342 | sp->role.word); |
| 1080 | kvm_mmu_zap_page(kvm, sp); | ||
| 1081 | r = 1; | 1343 | r = 1; |
| 1344 | if (kvm_mmu_zap_page(kvm, sp)) | ||
| 1345 | n = bucket->first; | ||
| 1082 | } | 1346 | } |
| 1083 | return r; | 1347 | return r; |
| 1084 | } | 1348 | } |
| @@ -1101,6 +1365,20 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | |||
| 1101 | __set_bit(slot, &sp->slot_bitmap); | 1365 | __set_bit(slot, &sp->slot_bitmap); |
| 1102 | } | 1366 | } |
| 1103 | 1367 | ||
| 1368 | static void mmu_convert_notrap(struct kvm_mmu_page *sp) | ||
| 1369 | { | ||
| 1370 | int i; | ||
| 1371 | u64 *pt = sp->spt; | ||
| 1372 | |||
| 1373 | if (shadow_trap_nonpresent_pte == shadow_notrap_nonpresent_pte) | ||
| 1374 | return; | ||
| 1375 | |||
| 1376 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | ||
| 1377 | if (pt[i] == shadow_notrap_nonpresent_pte) | ||
| 1378 | set_shadow_pte(&pt[i], shadow_trap_nonpresent_pte); | ||
| 1379 | } | ||
| 1380 | } | ||
| 1381 | |||
| 1104 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | 1382 | struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) |
| 1105 | { | 1383 | { |
| 1106 | struct page *page; | 1384 | struct page *page; |
| @@ -1110,51 +1388,60 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 1110 | if (gpa == UNMAPPED_GVA) | 1388 | if (gpa == UNMAPPED_GVA) |
| 1111 | return NULL; | 1389 | return NULL; |
| 1112 | 1390 | ||
| 1113 | down_read(¤t->mm->mmap_sem); | ||
| 1114 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 1391 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
| 1115 | up_read(¤t->mm->mmap_sem); | ||
| 1116 | 1392 | ||
| 1117 | return page; | 1393 | return page; |
| 1118 | } | 1394 | } |
| 1119 | 1395 | ||
| 1120 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 1396 | static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
| 1121 | unsigned pt_access, unsigned pte_access, | ||
| 1122 | int user_fault, int write_fault, int dirty, | ||
| 1123 | int *ptwrite, int largepage, gfn_t gfn, | ||
| 1124 | pfn_t pfn, bool speculative) | ||
| 1125 | { | 1397 | { |
| 1126 | u64 spte; | 1398 | unsigned index; |
| 1127 | int was_rmapped = 0; | 1399 | struct hlist_head *bucket; |
| 1128 | int was_writeble = is_writeble_pte(*shadow_pte); | 1400 | struct kvm_mmu_page *s; |
| 1401 | struct hlist_node *node, *n; | ||
| 1129 | 1402 | ||
| 1130 | pgprintk("%s: spte %llx access %x write_fault %d" | 1403 | index = kvm_page_table_hashfn(sp->gfn); |
| 1131 | " user_fault %d gfn %lx\n", | 1404 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
| 1132 | __func__, *shadow_pte, pt_access, | 1405 | /* don't unsync if pagetable is shadowed with multiple roles */ |
| 1133 | write_fault, user_fault, gfn); | 1406 | hlist_for_each_entry_safe(s, node, n, bucket, hash_link) { |
| 1407 | if (s->gfn != sp->gfn || s->role.metaphysical) | ||
| 1408 | continue; | ||
| 1409 | if (s->role.word != sp->role.word) | ||
| 1410 | return 1; | ||
| 1411 | } | ||
| 1412 | kvm_mmu_mark_parents_unsync(vcpu, sp); | ||
| 1413 | ++vcpu->kvm->stat.mmu_unsync; | ||
| 1414 | sp->unsync = 1; | ||
| 1415 | mmu_convert_notrap(sp); | ||
| 1416 | return 0; | ||
| 1417 | } | ||
| 1134 | 1418 | ||
| 1135 | if (is_rmap_pte(*shadow_pte)) { | 1419 | static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, |
| 1136 | /* | 1420 | bool can_unsync) |
| 1137 | * If we overwrite a PTE page pointer with a 2MB PMD, unlink | 1421 | { |
| 1138 | * the parent of the now unreachable PTE. | 1422 | struct kvm_mmu_page *shadow; |
| 1139 | */ | ||
| 1140 | if (largepage && !is_large_pte(*shadow_pte)) { | ||
| 1141 | struct kvm_mmu_page *child; | ||
| 1142 | u64 pte = *shadow_pte; | ||
| 1143 | 1423 | ||
| 1144 | child = page_header(pte & PT64_BASE_ADDR_MASK); | 1424 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); |
| 1145 | mmu_page_remove_parent_pte(child, shadow_pte); | 1425 | if (shadow) { |
| 1146 | } else if (pfn != spte_to_pfn(*shadow_pte)) { | 1426 | if (shadow->role.level != PT_PAGE_TABLE_LEVEL) |
| 1147 | pgprintk("hfn old %lx new %lx\n", | 1427 | return 1; |
| 1148 | spte_to_pfn(*shadow_pte), pfn); | 1428 | if (shadow->unsync) |
| 1149 | rmap_remove(vcpu->kvm, shadow_pte); | 1429 | return 0; |
| 1150 | } else { | 1430 | if (can_unsync && oos_shadow) |
| 1151 | if (largepage) | 1431 | return kvm_unsync_page(vcpu, shadow); |
| 1152 | was_rmapped = is_large_pte(*shadow_pte); | 1432 | return 1; |
| 1153 | else | ||
| 1154 | was_rmapped = 1; | ||
| 1155 | } | ||
| 1156 | } | 1433 | } |
| 1434 | return 0; | ||
| 1435 | } | ||
| 1157 | 1436 | ||
| 1437 | static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | ||
| 1438 | unsigned pte_access, int user_fault, | ||
| 1439 | int write_fault, int dirty, int largepage, | ||
| 1440 | gfn_t gfn, pfn_t pfn, bool speculative, | ||
| 1441 | bool can_unsync) | ||
| 1442 | { | ||
| 1443 | u64 spte; | ||
| 1444 | int ret = 0; | ||
| 1158 | /* | 1445 | /* |
| 1159 | * We don't set the accessed bit, since we sometimes want to see | 1446 | * We don't set the accessed bit, since we sometimes want to see |
| 1160 | * whether the guest actually used the pte (in order to detect | 1447 | * whether the guest actually used the pte (in order to detect |
| @@ -1162,7 +1449,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
| 1162 | */ | 1449 | */ |
| 1163 | spte = shadow_base_present_pte | shadow_dirty_mask; | 1450 | spte = shadow_base_present_pte | shadow_dirty_mask; |
| 1164 | if (!speculative) | 1451 | if (!speculative) |
| 1165 | pte_access |= PT_ACCESSED_MASK; | 1452 | spte |= shadow_accessed_mask; |
| 1166 | if (!dirty) | 1453 | if (!dirty) |
| 1167 | pte_access &= ~ACC_WRITE_MASK; | 1454 | pte_access &= ~ACC_WRITE_MASK; |
| 1168 | if (pte_access & ACC_EXEC_MASK) | 1455 | if (pte_access & ACC_EXEC_MASK) |
| @@ -1178,35 +1465,82 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
| 1178 | 1465 | ||
| 1179 | if ((pte_access & ACC_WRITE_MASK) | 1466 | if ((pte_access & ACC_WRITE_MASK) |
| 1180 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { | 1467 | || (write_fault && !is_write_protection(vcpu) && !user_fault)) { |
| 1181 | struct kvm_mmu_page *shadow; | 1468 | |
| 1469 | if (largepage && has_wrprotected_page(vcpu->kvm, gfn)) { | ||
| 1470 | ret = 1; | ||
| 1471 | spte = shadow_trap_nonpresent_pte; | ||
| 1472 | goto set_pte; | ||
| 1473 | } | ||
| 1182 | 1474 | ||
| 1183 | spte |= PT_WRITABLE_MASK; | 1475 | spte |= PT_WRITABLE_MASK; |
| 1184 | 1476 | ||
| 1185 | shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); | 1477 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { |
| 1186 | if (shadow || | ||
| 1187 | (largepage && has_wrprotected_page(vcpu->kvm, gfn))) { | ||
| 1188 | pgprintk("%s: found shadow page for %lx, marking ro\n", | 1478 | pgprintk("%s: found shadow page for %lx, marking ro\n", |
| 1189 | __func__, gfn); | 1479 | __func__, gfn); |
| 1480 | ret = 1; | ||
| 1190 | pte_access &= ~ACC_WRITE_MASK; | 1481 | pte_access &= ~ACC_WRITE_MASK; |
| 1191 | if (is_writeble_pte(spte)) { | 1482 | if (is_writeble_pte(spte)) |
| 1192 | spte &= ~PT_WRITABLE_MASK; | 1483 | spte &= ~PT_WRITABLE_MASK; |
| 1193 | kvm_x86_ops->tlb_flush(vcpu); | ||
| 1194 | } | ||
| 1195 | if (write_fault) | ||
| 1196 | *ptwrite = 1; | ||
| 1197 | } | 1484 | } |
| 1198 | } | 1485 | } |
| 1199 | 1486 | ||
| 1200 | if (pte_access & ACC_WRITE_MASK) | 1487 | if (pte_access & ACC_WRITE_MASK) |
| 1201 | mark_page_dirty(vcpu->kvm, gfn); | 1488 | mark_page_dirty(vcpu->kvm, gfn); |
| 1202 | 1489 | ||
| 1203 | pgprintk("%s: setting spte %llx\n", __func__, spte); | 1490 | set_pte: |
| 1204 | pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n", | ||
| 1205 | (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB", | ||
| 1206 | (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte); | ||
| 1207 | set_shadow_pte(shadow_pte, spte); | 1491 | set_shadow_pte(shadow_pte, spte); |
| 1208 | if (!was_rmapped && (spte & PT_PAGE_SIZE_MASK) | 1492 | return ret; |
| 1209 | && (spte & PT_PRESENT_MASK)) | 1493 | } |
| 1494 | |||
| 1495 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | ||
| 1496 | unsigned pt_access, unsigned pte_access, | ||
| 1497 | int user_fault, int write_fault, int dirty, | ||
| 1498 | int *ptwrite, int largepage, gfn_t gfn, | ||
| 1499 | pfn_t pfn, bool speculative) | ||
| 1500 | { | ||
| 1501 | int was_rmapped = 0; | ||
| 1502 | int was_writeble = is_writeble_pte(*shadow_pte); | ||
| 1503 | |||
| 1504 | pgprintk("%s: spte %llx access %x write_fault %d" | ||
| 1505 | " user_fault %d gfn %lx\n", | ||
| 1506 | __func__, *shadow_pte, pt_access, | ||
| 1507 | write_fault, user_fault, gfn); | ||
| 1508 | |||
| 1509 | if (is_rmap_pte(*shadow_pte)) { | ||
| 1510 | /* | ||
| 1511 | * If we overwrite a PTE page pointer with a 2MB PMD, unlink | ||
| 1512 | * the parent of the now unreachable PTE. | ||
| 1513 | */ | ||
| 1514 | if (largepage && !is_large_pte(*shadow_pte)) { | ||
| 1515 | struct kvm_mmu_page *child; | ||
| 1516 | u64 pte = *shadow_pte; | ||
| 1517 | |||
| 1518 | child = page_header(pte & PT64_BASE_ADDR_MASK); | ||
| 1519 | mmu_page_remove_parent_pte(child, shadow_pte); | ||
| 1520 | } else if (pfn != spte_to_pfn(*shadow_pte)) { | ||
| 1521 | pgprintk("hfn old %lx new %lx\n", | ||
| 1522 | spte_to_pfn(*shadow_pte), pfn); | ||
| 1523 | rmap_remove(vcpu->kvm, shadow_pte); | ||
| 1524 | } else { | ||
| 1525 | if (largepage) | ||
| 1526 | was_rmapped = is_large_pte(*shadow_pte); | ||
| 1527 | else | ||
| 1528 | was_rmapped = 1; | ||
| 1529 | } | ||
| 1530 | } | ||
| 1531 | if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, | ||
| 1532 | dirty, largepage, gfn, pfn, speculative, true)) { | ||
| 1533 | if (write_fault) | ||
| 1534 | *ptwrite = 1; | ||
| 1535 | kvm_x86_ops->tlb_flush(vcpu); | ||
| 1536 | } | ||
| 1537 | |||
| 1538 | pgprintk("%s: setting spte %llx\n", __func__, *shadow_pte); | ||
| 1539 | pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n", | ||
| 1540 | is_large_pte(*shadow_pte)? "2MB" : "4kB", | ||
| 1541 | is_present_pte(*shadow_pte)?"RW":"R", gfn, | ||
| 1542 | *shadow_pte, shadow_pte); | ||
| 1543 | if (!was_rmapped && is_large_pte(*shadow_pte)) | ||
| 1210 | ++vcpu->kvm->stat.lpages; | 1544 | ++vcpu->kvm->stat.lpages; |
| 1211 | 1545 | ||
| 1212 | page_header_update_slot(vcpu->kvm, shadow_pte, gfn); | 1546 | page_header_update_slot(vcpu->kvm, shadow_pte, gfn); |
| @@ -1230,54 +1564,67 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
| 1230 | { | 1564 | { |
| 1231 | } | 1565 | } |
| 1232 | 1566 | ||
| 1233 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | 1567 | struct direct_shadow_walk { |
| 1234 | int largepage, gfn_t gfn, pfn_t pfn, | 1568 | struct kvm_shadow_walk walker; |
| 1235 | int level) | 1569 | pfn_t pfn; |
| 1236 | { | 1570 | int write; |
| 1237 | hpa_t table_addr = vcpu->arch.mmu.root_hpa; | 1571 | int largepage; |
| 1238 | int pt_write = 0; | 1572 | int pt_write; |
| 1239 | 1573 | }; | |
| 1240 | for (; ; level--) { | ||
| 1241 | u32 index = PT64_INDEX(v, level); | ||
| 1242 | u64 *table; | ||
| 1243 | |||
| 1244 | ASSERT(VALID_PAGE(table_addr)); | ||
| 1245 | table = __va(table_addr); | ||
| 1246 | 1574 | ||
| 1247 | if (level == 1) { | 1575 | static int direct_map_entry(struct kvm_shadow_walk *_walk, |
| 1248 | mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, | 1576 | struct kvm_vcpu *vcpu, |
| 1249 | 0, write, 1, &pt_write, 0, gfn, pfn, false); | 1577 | u64 addr, u64 *sptep, int level) |
| 1250 | return pt_write; | 1578 | { |
| 1251 | } | 1579 | struct direct_shadow_walk *walk = |
| 1580 | container_of(_walk, struct direct_shadow_walk, walker); | ||
| 1581 | struct kvm_mmu_page *sp; | ||
| 1582 | gfn_t pseudo_gfn; | ||
| 1583 | gfn_t gfn = addr >> PAGE_SHIFT; | ||
| 1584 | |||
| 1585 | if (level == PT_PAGE_TABLE_LEVEL | ||
| 1586 | || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { | ||
| 1587 | mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, | ||
| 1588 | 0, walk->write, 1, &walk->pt_write, | ||
| 1589 | walk->largepage, gfn, walk->pfn, false); | ||
| 1590 | ++vcpu->stat.pf_fixed; | ||
| 1591 | return 1; | ||
| 1592 | } | ||
| 1252 | 1593 | ||
| 1253 | if (largepage && level == 2) { | 1594 | if (*sptep == shadow_trap_nonpresent_pte) { |
| 1254 | mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL, | 1595 | pseudo_gfn = (addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT; |
| 1255 | 0, write, 1, &pt_write, 1, gfn, pfn, false); | 1596 | sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1, |
| 1256 | return pt_write; | 1597 | 1, ACC_ALL, sptep); |
| 1598 | if (!sp) { | ||
| 1599 | pgprintk("nonpaging_map: ENOMEM\n"); | ||
| 1600 | kvm_release_pfn_clean(walk->pfn); | ||
| 1601 | return -ENOMEM; | ||
| 1257 | } | 1602 | } |
| 1258 | 1603 | ||
| 1259 | if (table[index] == shadow_trap_nonpresent_pte) { | 1604 | set_shadow_pte(sptep, |
| 1260 | struct kvm_mmu_page *new_table; | 1605 | __pa(sp->spt) |
| 1261 | gfn_t pseudo_gfn; | 1606 | | PT_PRESENT_MASK | PT_WRITABLE_MASK |
| 1262 | 1607 | | shadow_user_mask | shadow_x_mask); | |
| 1263 | pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK) | ||
| 1264 | >> PAGE_SHIFT; | ||
| 1265 | new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, | ||
| 1266 | v, level - 1, | ||
| 1267 | 1, ACC_ALL, &table[index]); | ||
| 1268 | if (!new_table) { | ||
| 1269 | pgprintk("nonpaging_map: ENOMEM\n"); | ||
| 1270 | kvm_release_pfn_clean(pfn); | ||
| 1271 | return -ENOMEM; | ||
| 1272 | } | ||
| 1273 | |||
| 1274 | set_shadow_pte(&table[index], | ||
| 1275 | __pa(new_table->spt) | ||
| 1276 | | PT_PRESENT_MASK | PT_WRITABLE_MASK | ||
| 1277 | | shadow_user_mask | shadow_x_mask); | ||
| 1278 | } | ||
| 1279 | table_addr = table[index] & PT64_BASE_ADDR_MASK; | ||
| 1280 | } | 1608 | } |
| 1609 | return 0; | ||
| 1610 | } | ||
| 1611 | |||
| 1612 | static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | ||
| 1613 | int largepage, gfn_t gfn, pfn_t pfn) | ||
| 1614 | { | ||
| 1615 | int r; | ||
| 1616 | struct direct_shadow_walk walker = { | ||
| 1617 | .walker = { .entry = direct_map_entry, }, | ||
| 1618 | .pfn = pfn, | ||
| 1619 | .largepage = largepage, | ||
| 1620 | .write = write, | ||
| 1621 | .pt_write = 0, | ||
| 1622 | }; | ||
| 1623 | |||
| 1624 | r = walk_shadow(&walker.walker, vcpu, gfn << PAGE_SHIFT); | ||
| 1625 | if (r < 0) | ||
| 1626 | return r; | ||
| 1627 | return walker.pt_write; | ||
| 1281 | } | 1628 | } |
| 1282 | 1629 | ||
| 1283 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | 1630 | static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) |
| @@ -1287,16 +1634,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
| 1287 | pfn_t pfn; | 1634 | pfn_t pfn; |
| 1288 | unsigned long mmu_seq; | 1635 | unsigned long mmu_seq; |
| 1289 | 1636 | ||
| 1290 | down_read(¤t->mm->mmap_sem); | ||
| 1291 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { | 1637 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { |
| 1292 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 1638 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
| 1293 | largepage = 1; | 1639 | largepage = 1; |
| 1294 | } | 1640 | } |
| 1295 | 1641 | ||
| 1296 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 1642 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
| 1297 | /* implicit mb(), we'll read before PT lock is unlocked */ | 1643 | smp_rmb(); |
| 1298 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1644 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
| 1299 | up_read(¤t->mm->mmap_sem); | ||
| 1300 | 1645 | ||
| 1301 | /* mmio */ | 1646 | /* mmio */ |
| 1302 | if (is_error_pfn(pfn)) { | 1647 | if (is_error_pfn(pfn)) { |
| @@ -1308,8 +1653,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
| 1308 | if (mmu_notifier_retry(vcpu, mmu_seq)) | 1653 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
| 1309 | goto out_unlock; | 1654 | goto out_unlock; |
| 1310 | kvm_mmu_free_some_pages(vcpu); | 1655 | kvm_mmu_free_some_pages(vcpu); |
| 1311 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn, | 1656 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn); |
| 1312 | PT32E_ROOT_LEVEL); | ||
| 1313 | spin_unlock(&vcpu->kvm->mmu_lock); | 1657 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 1314 | 1658 | ||
| 1315 | 1659 | ||
| @@ -1405,6 +1749,37 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
| 1405 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); | 1749 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); |
| 1406 | } | 1750 | } |
| 1407 | 1751 | ||
| 1752 | static void mmu_sync_roots(struct kvm_vcpu *vcpu) | ||
| 1753 | { | ||
| 1754 | int i; | ||
| 1755 | struct kvm_mmu_page *sp; | ||
| 1756 | |||
| 1757 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
| 1758 | return; | ||
| 1759 | if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { | ||
| 1760 | hpa_t root = vcpu->arch.mmu.root_hpa; | ||
| 1761 | sp = page_header(root); | ||
| 1762 | mmu_sync_children(vcpu, sp); | ||
| 1763 | return; | ||
| 1764 | } | ||
| 1765 | for (i = 0; i < 4; ++i) { | ||
| 1766 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | ||
| 1767 | |||
| 1768 | if (root) { | ||
| 1769 | root &= PT64_BASE_ADDR_MASK; | ||
| 1770 | sp = page_header(root); | ||
| 1771 | mmu_sync_children(vcpu, sp); | ||
| 1772 | } | ||
| 1773 | } | ||
| 1774 | } | ||
| 1775 | |||
| 1776 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | ||
| 1777 | { | ||
| 1778 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 1779 | mmu_sync_roots(vcpu); | ||
| 1780 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 1781 | } | ||
| 1782 | |||
| 1408 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) | 1783 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) |
| 1409 | { | 1784 | { |
| 1410 | return vaddr; | 1785 | return vaddr; |
| @@ -1446,15 +1821,13 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
| 1446 | if (r) | 1821 | if (r) |
| 1447 | return r; | 1822 | return r; |
| 1448 | 1823 | ||
| 1449 | down_read(¤t->mm->mmap_sem); | ||
| 1450 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { | 1824 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { |
| 1451 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 1825 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
| 1452 | largepage = 1; | 1826 | largepage = 1; |
| 1453 | } | 1827 | } |
| 1454 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 1828 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
| 1455 | /* implicit mb(), we'll read before PT lock is unlocked */ | 1829 | smp_rmb(); |
| 1456 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1830 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
| 1457 | up_read(¤t->mm->mmap_sem); | ||
| 1458 | if (is_error_pfn(pfn)) { | 1831 | if (is_error_pfn(pfn)) { |
| 1459 | kvm_release_pfn_clean(pfn); | 1832 | kvm_release_pfn_clean(pfn); |
| 1460 | return 1; | 1833 | return 1; |
| @@ -1464,7 +1837,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
| 1464 | goto out_unlock; | 1837 | goto out_unlock; |
| 1465 | kvm_mmu_free_some_pages(vcpu); | 1838 | kvm_mmu_free_some_pages(vcpu); |
| 1466 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 1839 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, |
| 1467 | largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); | 1840 | largepage, gfn, pfn); |
| 1468 | spin_unlock(&vcpu->kvm->mmu_lock); | 1841 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 1469 | 1842 | ||
| 1470 | return r; | 1843 | return r; |
| @@ -1489,6 +1862,8 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) | |||
| 1489 | context->gva_to_gpa = nonpaging_gva_to_gpa; | 1862 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
| 1490 | context->free = nonpaging_free; | 1863 | context->free = nonpaging_free; |
| 1491 | context->prefetch_page = nonpaging_prefetch_page; | 1864 | context->prefetch_page = nonpaging_prefetch_page; |
| 1865 | context->sync_page = nonpaging_sync_page; | ||
| 1866 | context->invlpg = nonpaging_invlpg; | ||
| 1492 | context->root_level = 0; | 1867 | context->root_level = 0; |
| 1493 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 1868 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
| 1494 | context->root_hpa = INVALID_PAGE; | 1869 | context->root_hpa = INVALID_PAGE; |
| @@ -1536,6 +1911,8 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) | |||
| 1536 | context->page_fault = paging64_page_fault; | 1911 | context->page_fault = paging64_page_fault; |
| 1537 | context->gva_to_gpa = paging64_gva_to_gpa; | 1912 | context->gva_to_gpa = paging64_gva_to_gpa; |
| 1538 | context->prefetch_page = paging64_prefetch_page; | 1913 | context->prefetch_page = paging64_prefetch_page; |
| 1914 | context->sync_page = paging64_sync_page; | ||
| 1915 | context->invlpg = paging64_invlpg; | ||
| 1539 | context->free = paging_free; | 1916 | context->free = paging_free; |
| 1540 | context->root_level = level; | 1917 | context->root_level = level; |
| 1541 | context->shadow_root_level = level; | 1918 | context->shadow_root_level = level; |
| @@ -1557,6 +1934,8 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) | |||
| 1557 | context->gva_to_gpa = paging32_gva_to_gpa; | 1934 | context->gva_to_gpa = paging32_gva_to_gpa; |
| 1558 | context->free = paging_free; | 1935 | context->free = paging_free; |
| 1559 | context->prefetch_page = paging32_prefetch_page; | 1936 | context->prefetch_page = paging32_prefetch_page; |
| 1937 | context->sync_page = paging32_sync_page; | ||
| 1938 | context->invlpg = paging32_invlpg; | ||
| 1560 | context->root_level = PT32_ROOT_LEVEL; | 1939 | context->root_level = PT32_ROOT_LEVEL; |
| 1561 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 1940 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
| 1562 | context->root_hpa = INVALID_PAGE; | 1941 | context->root_hpa = INVALID_PAGE; |
| @@ -1576,6 +1955,8 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
| 1576 | context->page_fault = tdp_page_fault; | 1955 | context->page_fault = tdp_page_fault; |
| 1577 | context->free = nonpaging_free; | 1956 | context->free = nonpaging_free; |
| 1578 | context->prefetch_page = nonpaging_prefetch_page; | 1957 | context->prefetch_page = nonpaging_prefetch_page; |
| 1958 | context->sync_page = nonpaging_sync_page; | ||
| 1959 | context->invlpg = nonpaging_invlpg; | ||
| 1579 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | 1960 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); |
| 1580 | context->root_hpa = INVALID_PAGE; | 1961 | context->root_hpa = INVALID_PAGE; |
| 1581 | 1962 | ||
| @@ -1647,6 +2028,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
| 1647 | spin_lock(&vcpu->kvm->mmu_lock); | 2028 | spin_lock(&vcpu->kvm->mmu_lock); |
| 1648 | kvm_mmu_free_some_pages(vcpu); | 2029 | kvm_mmu_free_some_pages(vcpu); |
| 1649 | mmu_alloc_roots(vcpu); | 2030 | mmu_alloc_roots(vcpu); |
| 2031 | mmu_sync_roots(vcpu); | ||
| 1650 | spin_unlock(&vcpu->kvm->mmu_lock); | 2032 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 1651 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); | 2033 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); |
| 1652 | kvm_mmu_flush_tlb(vcpu); | 2034 | kvm_mmu_flush_tlb(vcpu); |
| @@ -1767,15 +2149,13 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 1767 | return; | 2149 | return; |
| 1768 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | 2150 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; |
| 1769 | 2151 | ||
| 1770 | down_read(¤t->mm->mmap_sem); | ||
| 1771 | if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) { | 2152 | if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) { |
| 1772 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 2153 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
| 1773 | vcpu->arch.update_pte.largepage = 1; | 2154 | vcpu->arch.update_pte.largepage = 1; |
| 1774 | } | 2155 | } |
| 1775 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; | 2156 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; |
| 1776 | /* implicit mb(), we'll read before PT lock is unlocked */ | 2157 | smp_rmb(); |
| 1777 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 2158 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
| 1778 | up_read(¤t->mm->mmap_sem); | ||
| 1779 | 2159 | ||
| 1780 | if (is_error_pfn(pfn)) { | 2160 | if (is_error_pfn(pfn)) { |
| 1781 | kvm_release_pfn_clean(pfn); | 2161 | kvm_release_pfn_clean(pfn); |
| @@ -1837,7 +2217,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 1837 | index = kvm_page_table_hashfn(gfn); | 2217 | index = kvm_page_table_hashfn(gfn); |
| 1838 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 2218 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
| 1839 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { | 2219 | hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { |
| 1840 | if (sp->gfn != gfn || sp->role.metaphysical) | 2220 | if (sp->gfn != gfn || sp->role.metaphysical || sp->role.invalid) |
| 1841 | continue; | 2221 | continue; |
| 1842 | pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; | 2222 | pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; |
| 1843 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); | 2223 | misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); |
| @@ -1855,7 +2235,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 1855 | */ | 2235 | */ |
| 1856 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", | 2236 | pgprintk("misaligned: gpa %llx bytes %d role %x\n", |
| 1857 | gpa, bytes, sp->role.word); | 2237 | gpa, bytes, sp->role.word); |
| 1858 | kvm_mmu_zap_page(vcpu->kvm, sp); | 2238 | if (kvm_mmu_zap_page(vcpu->kvm, sp)) |
| 2239 | n = bucket->first; | ||
| 1859 | ++vcpu->kvm->stat.mmu_flooded; | 2240 | ++vcpu->kvm->stat.mmu_flooded; |
| 1860 | continue; | 2241 | continue; |
| 1861 | } | 2242 | } |
| @@ -1969,6 +2350,16 @@ out: | |||
| 1969 | } | 2350 | } |
| 1970 | EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); | 2351 | EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); |
| 1971 | 2352 | ||
| 2353 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | ||
| 2354 | { | ||
| 2355 | spin_lock(&vcpu->kvm->mmu_lock); | ||
| 2356 | vcpu->arch.mmu.invlpg(vcpu, gva); | ||
| 2357 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 2358 | kvm_mmu_flush_tlb(vcpu); | ||
| 2359 | ++vcpu->stat.invlpg; | ||
| 2360 | } | ||
| 2361 | EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); | ||
| 2362 | |||
| 1972 | void kvm_enable_tdp(void) | 2363 | void kvm_enable_tdp(void) |
| 1973 | { | 2364 | { |
| 1974 | tdp_enabled = true; | 2365 | tdp_enabled = true; |
| @@ -2055,6 +2446,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
| 2055 | { | 2446 | { |
| 2056 | struct kvm_mmu_page *sp; | 2447 | struct kvm_mmu_page *sp; |
| 2057 | 2448 | ||
| 2449 | spin_lock(&kvm->mmu_lock); | ||
| 2058 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { | 2450 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { |
| 2059 | int i; | 2451 | int i; |
| 2060 | u64 *pt; | 2452 | u64 *pt; |
| @@ -2068,6 +2460,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
| 2068 | if (pt[i] & PT_WRITABLE_MASK) | 2460 | if (pt[i] & PT_WRITABLE_MASK) |
| 2069 | pt[i] &= ~PT_WRITABLE_MASK; | 2461 | pt[i] &= ~PT_WRITABLE_MASK; |
| 2070 | } | 2462 | } |
| 2463 | kvm_flush_remote_tlbs(kvm); | ||
| 2464 | spin_unlock(&kvm->mmu_lock); | ||
| 2071 | } | 2465 | } |
| 2072 | 2466 | ||
| 2073 | void kvm_mmu_zap_all(struct kvm *kvm) | 2467 | void kvm_mmu_zap_all(struct kvm *kvm) |
| @@ -2076,7 +2470,9 @@ void kvm_mmu_zap_all(struct kvm *kvm) | |||
| 2076 | 2470 | ||
| 2077 | spin_lock(&kvm->mmu_lock); | 2471 | spin_lock(&kvm->mmu_lock); |
| 2078 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) | 2472 | list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) |
| 2079 | kvm_mmu_zap_page(kvm, sp); | 2473 | if (kvm_mmu_zap_page(kvm, sp)) |
| 2474 | node = container_of(kvm->arch.active_mmu_pages.next, | ||
| 2475 | struct kvm_mmu_page, link); | ||
| 2080 | spin_unlock(&kvm->mmu_lock); | 2476 | spin_unlock(&kvm->mmu_lock); |
| 2081 | 2477 | ||
| 2082 | kvm_flush_remote_tlbs(kvm); | 2478 | kvm_flush_remote_tlbs(kvm); |
| @@ -2291,18 +2687,18 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | |||
| 2291 | gpa_t addr, unsigned long *ret) | 2687 | gpa_t addr, unsigned long *ret) |
| 2292 | { | 2688 | { |
| 2293 | int r; | 2689 | int r; |
| 2294 | struct kvm_pv_mmu_op_buffer buffer; | 2690 | struct kvm_pv_mmu_op_buffer *buffer = &vcpu->arch.mmu_op_buffer; |
| 2295 | 2691 | ||
| 2296 | buffer.ptr = buffer.buf; | 2692 | buffer->ptr = buffer->buf; |
| 2297 | buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf); | 2693 | buffer->len = min_t(unsigned long, bytes, sizeof buffer->buf); |
| 2298 | buffer.processed = 0; | 2694 | buffer->processed = 0; |
| 2299 | 2695 | ||
| 2300 | r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len); | 2696 | r = kvm_read_guest(vcpu->kvm, addr, buffer->buf, buffer->len); |
| 2301 | if (r) | 2697 | if (r) |
| 2302 | goto out; | 2698 | goto out; |
| 2303 | 2699 | ||
| 2304 | while (buffer.len) { | 2700 | while (buffer->len) { |
| 2305 | r = kvm_pv_mmu_op_one(vcpu, &buffer); | 2701 | r = kvm_pv_mmu_op_one(vcpu, buffer); |
| 2306 | if (r < 0) | 2702 | if (r < 0) |
| 2307 | goto out; | 2703 | goto out; |
| 2308 | if (r == 0) | 2704 | if (r == 0) |
| @@ -2311,7 +2707,7 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | |||
| 2311 | 2707 | ||
| 2312 | r = 1; | 2708 | r = 1; |
| 2313 | out: | 2709 | out: |
| 2314 | *ret = buffer.processed; | 2710 | *ret = buffer->processed; |
| 2315 | return r; | 2711 | return r; |
| 2316 | } | 2712 | } |
| 2317 | 2713 | ||
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 4a814bff21f2..613ec9aa674a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -25,11 +25,11 @@ | |||
| 25 | #if PTTYPE == 64 | 25 | #if PTTYPE == 64 |
| 26 | #define pt_element_t u64 | 26 | #define pt_element_t u64 |
| 27 | #define guest_walker guest_walker64 | 27 | #define guest_walker guest_walker64 |
| 28 | #define shadow_walker shadow_walker64 | ||
| 28 | #define FNAME(name) paging##64_##name | 29 | #define FNAME(name) paging##64_##name |
| 29 | #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK | 30 | #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK |
| 30 | #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK | 31 | #define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK |
| 31 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | 32 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) |
| 32 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | ||
| 33 | #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) | 33 | #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) |
| 34 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | 34 | #define PT_LEVEL_BITS PT64_LEVEL_BITS |
| 35 | #ifdef CONFIG_X86_64 | 35 | #ifdef CONFIG_X86_64 |
| @@ -42,11 +42,11 @@ | |||
| 42 | #elif PTTYPE == 32 | 42 | #elif PTTYPE == 32 |
| 43 | #define pt_element_t u32 | 43 | #define pt_element_t u32 |
| 44 | #define guest_walker guest_walker32 | 44 | #define guest_walker guest_walker32 |
| 45 | #define shadow_walker shadow_walker32 | ||
| 45 | #define FNAME(name) paging##32_##name | 46 | #define FNAME(name) paging##32_##name |
| 46 | #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK | 47 | #define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK |
| 47 | #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK | 48 | #define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK |
| 48 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) | 49 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) |
| 49 | #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) | ||
| 50 | #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) | 50 | #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) |
| 51 | #define PT_LEVEL_BITS PT32_LEVEL_BITS | 51 | #define PT_LEVEL_BITS PT32_LEVEL_BITS |
| 52 | #define PT_MAX_FULL_LEVELS 2 | 52 | #define PT_MAX_FULL_LEVELS 2 |
| @@ -73,6 +73,17 @@ struct guest_walker { | |||
| 73 | u32 error_code; | 73 | u32 error_code; |
| 74 | }; | 74 | }; |
| 75 | 75 | ||
| 76 | struct shadow_walker { | ||
| 77 | struct kvm_shadow_walk walker; | ||
| 78 | struct guest_walker *guest_walker; | ||
| 79 | int user_fault; | ||
| 80 | int write_fault; | ||
| 81 | int largepage; | ||
| 82 | int *ptwrite; | ||
| 83 | pfn_t pfn; | ||
| 84 | u64 *sptep; | ||
| 85 | }; | ||
| 86 | |||
| 76 | static gfn_t gpte_to_gfn(pt_element_t gpte) | 87 | static gfn_t gpte_to_gfn(pt_element_t gpte) |
| 77 | { | 88 | { |
| 78 | return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; | 89 | return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; |
| @@ -91,14 +102,10 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, | |||
| 91 | pt_element_t *table; | 102 | pt_element_t *table; |
| 92 | struct page *page; | 103 | struct page *page; |
| 93 | 104 | ||
| 94 | down_read(¤t->mm->mmap_sem); | ||
| 95 | page = gfn_to_page(kvm, table_gfn); | 105 | page = gfn_to_page(kvm, table_gfn); |
| 96 | up_read(¤t->mm->mmap_sem); | ||
| 97 | 106 | ||
| 98 | table = kmap_atomic(page, KM_USER0); | 107 | table = kmap_atomic(page, KM_USER0); |
| 99 | |||
| 100 | ret = CMPXCHG(&table[index], orig_pte, new_pte); | 108 | ret = CMPXCHG(&table[index], orig_pte, new_pte); |
| 101 | |||
| 102 | kunmap_atomic(table, KM_USER0); | 109 | kunmap_atomic(table, KM_USER0); |
| 103 | 110 | ||
| 104 | kvm_release_page_dirty(page); | 111 | kvm_release_page_dirty(page); |
| @@ -274,86 +281,89 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
| 274 | /* | 281 | /* |
| 275 | * Fetch a shadow pte for a specific level in the paging hierarchy. | 282 | * Fetch a shadow pte for a specific level in the paging hierarchy. |
| 276 | */ | 283 | */ |
| 277 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | 284 | static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, |
| 278 | struct guest_walker *walker, | 285 | struct kvm_vcpu *vcpu, u64 addr, |
| 279 | int user_fault, int write_fault, int largepage, | 286 | u64 *sptep, int level) |
| 280 | int *ptwrite, pfn_t pfn) | ||
| 281 | { | 287 | { |
| 282 | hpa_t shadow_addr; | 288 | struct shadow_walker *sw = |
| 283 | int level; | 289 | container_of(_sw, struct shadow_walker, walker); |
| 284 | u64 *shadow_ent; | 290 | struct guest_walker *gw = sw->guest_walker; |
| 285 | unsigned access = walker->pt_access; | 291 | unsigned access = gw->pt_access; |
| 286 | 292 | struct kvm_mmu_page *shadow_page; | |
| 287 | if (!is_present_pte(walker->ptes[walker->level - 1])) | 293 | u64 spte; |
| 288 | return NULL; | 294 | int metaphysical; |
| 289 | 295 | gfn_t table_gfn; | |
| 290 | shadow_addr = vcpu->arch.mmu.root_hpa; | 296 | int r; |
| 291 | level = vcpu->arch.mmu.shadow_root_level; | 297 | pt_element_t curr_pte; |
| 292 | if (level == PT32E_ROOT_LEVEL) { | 298 | |
| 293 | shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; | 299 | if (level == PT_PAGE_TABLE_LEVEL |
| 294 | shadow_addr &= PT64_BASE_ADDR_MASK; | 300 | || (sw->largepage && level == PT_DIRECTORY_LEVEL)) { |
| 295 | --level; | 301 | mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, |
| 302 | sw->user_fault, sw->write_fault, | ||
| 303 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, | ||
| 304 | sw->ptwrite, sw->largepage, gw->gfn, sw->pfn, | ||
| 305 | false); | ||
| 306 | sw->sptep = sptep; | ||
| 307 | return 1; | ||
| 296 | } | 308 | } |
| 297 | 309 | ||
| 298 | for (; ; level--) { | 310 | if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep)) |
| 299 | u32 index = SHADOW_PT_INDEX(addr, level); | 311 | return 0; |
| 300 | struct kvm_mmu_page *shadow_page; | ||
| 301 | u64 shadow_pte; | ||
| 302 | int metaphysical; | ||
| 303 | gfn_t table_gfn; | ||
| 304 | |||
| 305 | shadow_ent = ((u64 *)__va(shadow_addr)) + index; | ||
| 306 | if (level == PT_PAGE_TABLE_LEVEL) | ||
| 307 | break; | ||
| 308 | |||
| 309 | if (largepage && level == PT_DIRECTORY_LEVEL) | ||
| 310 | break; | ||
| 311 | 312 | ||
| 312 | if (is_shadow_present_pte(*shadow_ent) | 313 | if (is_large_pte(*sptep)) { |
| 313 | && !is_large_pte(*shadow_ent)) { | 314 | set_shadow_pte(sptep, shadow_trap_nonpresent_pte); |
| 314 | shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK; | 315 | kvm_flush_remote_tlbs(vcpu->kvm); |
| 315 | continue; | 316 | rmap_remove(vcpu->kvm, sptep); |
| 316 | } | 317 | } |
| 317 | 318 | ||
| 318 | if (is_large_pte(*shadow_ent)) | 319 | if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) { |
| 319 | rmap_remove(vcpu->kvm, shadow_ent); | 320 | metaphysical = 1; |
| 320 | 321 | if (!is_dirty_pte(gw->ptes[level - 1])) | |
| 321 | if (level - 1 == PT_PAGE_TABLE_LEVEL | 322 | access &= ~ACC_WRITE_MASK; |
| 322 | && walker->level == PT_DIRECTORY_LEVEL) { | 323 | table_gfn = gpte_to_gfn(gw->ptes[level - 1]); |
| 323 | metaphysical = 1; | 324 | } else { |
| 324 | if (!is_dirty_pte(walker->ptes[level - 1])) | 325 | metaphysical = 0; |
| 325 | access &= ~ACC_WRITE_MASK; | 326 | table_gfn = gw->table_gfn[level - 2]; |
| 326 | table_gfn = gpte_to_gfn(walker->ptes[level - 1]); | 327 | } |
| 327 | } else { | 328 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1, |
| 328 | metaphysical = 0; | 329 | metaphysical, access, sptep); |
| 329 | table_gfn = walker->table_gfn[level - 2]; | 330 | if (!metaphysical) { |
| 330 | } | 331 | r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], |
| 331 | shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, | 332 | &curr_pte, sizeof(curr_pte)); |
| 332 | metaphysical, access, | 333 | if (r || curr_pte != gw->ptes[level - 2]) { |
| 333 | shadow_ent); | 334 | kvm_release_pfn_clean(sw->pfn); |
| 334 | if (!metaphysical) { | 335 | sw->sptep = NULL; |
| 335 | int r; | 336 | return 1; |
| 336 | pt_element_t curr_pte; | ||
| 337 | r = kvm_read_guest_atomic(vcpu->kvm, | ||
| 338 | walker->pte_gpa[level - 2], | ||
| 339 | &curr_pte, sizeof(curr_pte)); | ||
| 340 | if (r || curr_pte != walker->ptes[level - 2]) { | ||
| 341 | kvm_release_pfn_clean(pfn); | ||
| 342 | return NULL; | ||
| 343 | } | ||
| 344 | } | 337 | } |
| 345 | shadow_addr = __pa(shadow_page->spt); | ||
| 346 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | ||
| 347 | | PT_WRITABLE_MASK | PT_USER_MASK; | ||
| 348 | set_shadow_pte(shadow_ent, shadow_pte); | ||
| 349 | } | 338 | } |
| 350 | 339 | ||
| 351 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, | 340 | spte = __pa(shadow_page->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK |
| 352 | user_fault, write_fault, | 341 | | PT_WRITABLE_MASK | PT_USER_MASK; |
| 353 | walker->ptes[walker->level-1] & PT_DIRTY_MASK, | 342 | *sptep = spte; |
| 354 | ptwrite, largepage, walker->gfn, pfn, false); | 343 | return 0; |
| 344 | } | ||
| 345 | |||
| 346 | static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | ||
| 347 | struct guest_walker *guest_walker, | ||
| 348 | int user_fault, int write_fault, int largepage, | ||
| 349 | int *ptwrite, pfn_t pfn) | ||
| 350 | { | ||
| 351 | struct shadow_walker walker = { | ||
| 352 | .walker = { .entry = FNAME(shadow_walk_entry), }, | ||
| 353 | .guest_walker = guest_walker, | ||
| 354 | .user_fault = user_fault, | ||
| 355 | .write_fault = write_fault, | ||
| 356 | .largepage = largepage, | ||
| 357 | .ptwrite = ptwrite, | ||
| 358 | .pfn = pfn, | ||
| 359 | }; | ||
| 360 | |||
| 361 | if (!is_present_pte(guest_walker->ptes[guest_walker->level - 1])) | ||
| 362 | return NULL; | ||
| 363 | |||
| 364 | walk_shadow(&walker.walker, vcpu, addr); | ||
| 355 | 365 | ||
| 356 | return shadow_ent; | 366 | return walker.sptep; |
| 357 | } | 367 | } |
| 358 | 368 | ||
| 359 | /* | 369 | /* |
| @@ -407,7 +417,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 407 | return 0; | 417 | return 0; |
| 408 | } | 418 | } |
| 409 | 419 | ||
| 410 | down_read(¤t->mm->mmap_sem); | ||
| 411 | if (walker.level == PT_DIRECTORY_LEVEL) { | 420 | if (walker.level == PT_DIRECTORY_LEVEL) { |
| 412 | gfn_t large_gfn; | 421 | gfn_t large_gfn; |
| 413 | large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1); | 422 | large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1); |
| @@ -417,9 +426,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 417 | } | 426 | } |
| 418 | } | 427 | } |
| 419 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | 428 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
| 420 | /* implicit mb(), we'll read before PT lock is unlocked */ | 429 | smp_rmb(); |
| 421 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | 430 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); |
| 422 | up_read(¤t->mm->mmap_sem); | ||
| 423 | 431 | ||
| 424 | /* mmio */ | 432 | /* mmio */ |
| 425 | if (is_error_pfn(pfn)) { | 433 | if (is_error_pfn(pfn)) { |
| @@ -453,6 +461,31 @@ out_unlock: | |||
| 453 | return 0; | 461 | return 0; |
| 454 | } | 462 | } |
| 455 | 463 | ||
| 464 | static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, | ||
| 465 | struct kvm_vcpu *vcpu, u64 addr, | ||
| 466 | u64 *sptep, int level) | ||
| 467 | { | ||
| 468 | |||
| 469 | if (level == PT_PAGE_TABLE_LEVEL) { | ||
| 470 | if (is_shadow_present_pte(*sptep)) | ||
| 471 | rmap_remove(vcpu->kvm, sptep); | ||
| 472 | set_shadow_pte(sptep, shadow_trap_nonpresent_pte); | ||
| 473 | return 1; | ||
| 474 | } | ||
| 475 | if (!is_shadow_present_pte(*sptep)) | ||
| 476 | return 1; | ||
| 477 | return 0; | ||
| 478 | } | ||
| 479 | |||
| 480 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | ||
| 481 | { | ||
| 482 | struct shadow_walker walker = { | ||
| 483 | .walker = { .entry = FNAME(shadow_invlpg_entry), }, | ||
| 484 | }; | ||
| 485 | |||
| 486 | walk_shadow(&walker.walker, vcpu, gva); | ||
| 487 | } | ||
| 488 | |||
| 456 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | 489 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) |
| 457 | { | 490 | { |
| 458 | struct guest_walker walker; | 491 | struct guest_walker walker; |
| @@ -499,12 +532,66 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | |||
| 499 | } | 532 | } |
| 500 | } | 533 | } |
| 501 | 534 | ||
| 535 | /* | ||
| 536 | * Using the cached information from sp->gfns is safe because: | ||
| 537 | * - The spte has a reference to the struct page, so the pfn for a given gfn | ||
| 538 | * can't change unless all sptes pointing to it are nuked first. | ||
| 539 | * - Alias changes zap the entire shadow cache. | ||
| 540 | */ | ||
| 541 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | ||
| 542 | { | ||
| 543 | int i, offset, nr_present; | ||
| 544 | |||
| 545 | offset = nr_present = 0; | ||
| 546 | |||
| 547 | if (PTTYPE == 32) | ||
| 548 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | ||
| 549 | |||
| 550 | for (i = 0; i < PT64_ENT_PER_PAGE; i++) { | ||
| 551 | unsigned pte_access; | ||
| 552 | pt_element_t gpte; | ||
| 553 | gpa_t pte_gpa; | ||
| 554 | gfn_t gfn = sp->gfns[i]; | ||
| 555 | |||
| 556 | if (!is_shadow_present_pte(sp->spt[i])) | ||
| 557 | continue; | ||
| 558 | |||
| 559 | pte_gpa = gfn_to_gpa(sp->gfn); | ||
| 560 | pte_gpa += (i+offset) * sizeof(pt_element_t); | ||
| 561 | |||
| 562 | if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte, | ||
| 563 | sizeof(pt_element_t))) | ||
| 564 | return -EINVAL; | ||
| 565 | |||
| 566 | if (gpte_to_gfn(gpte) != gfn || !is_present_pte(gpte) || | ||
| 567 | !(gpte & PT_ACCESSED_MASK)) { | ||
| 568 | u64 nonpresent; | ||
| 569 | |||
| 570 | rmap_remove(vcpu->kvm, &sp->spt[i]); | ||
| 571 | if (is_present_pte(gpte)) | ||
| 572 | nonpresent = shadow_trap_nonpresent_pte; | ||
| 573 | else | ||
| 574 | nonpresent = shadow_notrap_nonpresent_pte; | ||
| 575 | set_shadow_pte(&sp->spt[i], nonpresent); | ||
| 576 | continue; | ||
| 577 | } | ||
| 578 | |||
| 579 | nr_present++; | ||
| 580 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | ||
| 581 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | ||
| 582 | is_dirty_pte(gpte), 0, gfn, | ||
| 583 | spte_to_pfn(sp->spt[i]), true, false); | ||
| 584 | } | ||
| 585 | |||
| 586 | return !nr_present; | ||
| 587 | } | ||
| 588 | |||
| 502 | #undef pt_element_t | 589 | #undef pt_element_t |
| 503 | #undef guest_walker | 590 | #undef guest_walker |
| 591 | #undef shadow_walker | ||
| 504 | #undef FNAME | 592 | #undef FNAME |
| 505 | #undef PT_BASE_ADDR_MASK | 593 | #undef PT_BASE_ADDR_MASK |
| 506 | #undef PT_INDEX | 594 | #undef PT_INDEX |
| 507 | #undef SHADOW_PT_INDEX | ||
| 508 | #undef PT_LEVEL_MASK | 595 | #undef PT_LEVEL_MASK |
| 509 | #undef PT_DIR_BASE_ADDR_MASK | 596 | #undef PT_DIR_BASE_ADDR_MASK |
| 510 | #undef PT_LEVEL_BITS | 597 | #undef PT_LEVEL_BITS |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8233b86c778c..9c4ce657d963 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include "kvm_svm.h" | 18 | #include "kvm_svm.h" |
| 19 | #include "irq.h" | 19 | #include "irq.h" |
| 20 | #include "mmu.h" | 20 | #include "mmu.h" |
| 21 | #include "kvm_cache_regs.h" | ||
| 21 | 22 | ||
| 22 | #include <linux/module.h> | 23 | #include <linux/module.h> |
| 23 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
| @@ -35,10 +36,6 @@ MODULE_LICENSE("GPL"); | |||
| 35 | #define IOPM_ALLOC_ORDER 2 | 36 | #define IOPM_ALLOC_ORDER 2 |
| 36 | #define MSRPM_ALLOC_ORDER 1 | 37 | #define MSRPM_ALLOC_ORDER 1 |
| 37 | 38 | ||
| 38 | #define DB_VECTOR 1 | ||
| 39 | #define UD_VECTOR 6 | ||
| 40 | #define GP_VECTOR 13 | ||
| 41 | |||
| 42 | #define DR7_GD_MASK (1 << 13) | 39 | #define DR7_GD_MASK (1 << 13) |
| 43 | #define DR6_BD_MASK (1 << 13) | 40 | #define DR6_BD_MASK (1 << 13) |
| 44 | 41 | ||
| @@ -47,7 +44,7 @@ MODULE_LICENSE("GPL"); | |||
| 47 | 44 | ||
| 48 | #define SVM_FEATURE_NPT (1 << 0) | 45 | #define SVM_FEATURE_NPT (1 << 0) |
| 49 | #define SVM_FEATURE_LBRV (1 << 1) | 46 | #define SVM_FEATURE_LBRV (1 << 1) |
| 50 | #define SVM_DEATURE_SVML (1 << 2) | 47 | #define SVM_FEATURE_SVML (1 << 2) |
| 51 | 48 | ||
| 52 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) | 49 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) |
| 53 | 50 | ||
| @@ -236,13 +233,11 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 236 | printk(KERN_DEBUG "%s: NOP\n", __func__); | 233 | printk(KERN_DEBUG "%s: NOP\n", __func__); |
| 237 | return; | 234 | return; |
| 238 | } | 235 | } |
| 239 | if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) | 236 | if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) |
| 240 | printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", | 237 | printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n", |
| 241 | __func__, | 238 | __func__, kvm_rip_read(vcpu), svm->next_rip); |
| 242 | svm->vmcb->save.rip, | ||
| 243 | svm->next_rip); | ||
| 244 | 239 | ||
| 245 | vcpu->arch.rip = svm->vmcb->save.rip = svm->next_rip; | 240 | kvm_rip_write(vcpu, svm->next_rip); |
| 246 | svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; | 241 | svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; |
| 247 | 242 | ||
| 248 | vcpu->arch.interrupt_window_open = 1; | 243 | vcpu->arch.interrupt_window_open = 1; |
| @@ -530,6 +525,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 530 | (1ULL << INTERCEPT_CPUID) | | 525 | (1ULL << INTERCEPT_CPUID) | |
| 531 | (1ULL << INTERCEPT_INVD) | | 526 | (1ULL << INTERCEPT_INVD) | |
| 532 | (1ULL << INTERCEPT_HLT) | | 527 | (1ULL << INTERCEPT_HLT) | |
| 528 | (1ULL << INTERCEPT_INVLPG) | | ||
| 533 | (1ULL << INTERCEPT_INVLPGA) | | 529 | (1ULL << INTERCEPT_INVLPGA) | |
| 534 | (1ULL << INTERCEPT_IOIO_PROT) | | 530 | (1ULL << INTERCEPT_IOIO_PROT) | |
| 535 | (1ULL << INTERCEPT_MSR_PROT) | | 531 | (1ULL << INTERCEPT_MSR_PROT) | |
| @@ -581,6 +577,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 581 | save->dr7 = 0x400; | 577 | save->dr7 = 0x400; |
| 582 | save->rflags = 2; | 578 | save->rflags = 2; |
| 583 | save->rip = 0x0000fff0; | 579 | save->rip = 0x0000fff0; |
| 580 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | ||
| 584 | 581 | ||
| 585 | /* | 582 | /* |
| 586 | * cr0 val on cpu init should be 0x60000010, we enable cpu | 583 | * cr0 val on cpu init should be 0x60000010, we enable cpu |
| @@ -593,7 +590,8 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 593 | if (npt_enabled) { | 590 | if (npt_enabled) { |
| 594 | /* Setup VMCB for Nested Paging */ | 591 | /* Setup VMCB for Nested Paging */ |
| 595 | control->nested_ctl = 1; | 592 | control->nested_ctl = 1; |
| 596 | control->intercept &= ~(1ULL << INTERCEPT_TASK_SWITCH); | 593 | control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) | |
| 594 | (1ULL << INTERCEPT_INVLPG)); | ||
| 597 | control->intercept_exceptions &= ~(1 << PF_VECTOR); | 595 | control->intercept_exceptions &= ~(1 << PF_VECTOR); |
| 598 | control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK| | 596 | control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK| |
| 599 | INTERCEPT_CR3_MASK); | 597 | INTERCEPT_CR3_MASK); |
| @@ -615,10 +613,12 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 615 | init_vmcb(svm); | 613 | init_vmcb(svm); |
| 616 | 614 | ||
| 617 | if (vcpu->vcpu_id != 0) { | 615 | if (vcpu->vcpu_id != 0) { |
| 618 | svm->vmcb->save.rip = 0; | 616 | kvm_rip_write(vcpu, 0); |
| 619 | svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; | 617 | svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12; |
| 620 | svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; | 618 | svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8; |
| 621 | } | 619 | } |
| 620 | vcpu->arch.regs_avail = ~0; | ||
| 621 | vcpu->arch.regs_dirty = ~0; | ||
| 622 | 622 | ||
| 623 | return 0; | 623 | return 0; |
| 624 | } | 624 | } |
| @@ -721,23 +721,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
| 721 | rdtscll(vcpu->arch.host_tsc); | 721 | rdtscll(vcpu->arch.host_tsc); |
| 722 | } | 722 | } |
| 723 | 723 | ||
| 724 | static void svm_cache_regs(struct kvm_vcpu *vcpu) | ||
| 725 | { | ||
| 726 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 727 | |||
| 728 | vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; | ||
| 729 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | ||
| 730 | vcpu->arch.rip = svm->vmcb->save.rip; | ||
| 731 | } | ||
| 732 | |||
| 733 | static void svm_decache_regs(struct kvm_vcpu *vcpu) | ||
| 734 | { | ||
| 735 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 736 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
| 737 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
| 738 | svm->vmcb->save.rip = vcpu->arch.rip; | ||
| 739 | } | ||
| 740 | |||
| 741 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) | 724 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) |
| 742 | { | 725 | { |
| 743 | return to_svm(vcpu)->vmcb->save.rflags; | 726 | return to_svm(vcpu)->vmcb->save.rflags; |
| @@ -1040,7 +1023,7 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1040 | if (npt_enabled) | 1023 | if (npt_enabled) |
| 1041 | svm_flush_tlb(&svm->vcpu); | 1024 | svm_flush_tlb(&svm->vcpu); |
| 1042 | 1025 | ||
| 1043 | if (event_injection) | 1026 | if (!npt_enabled && event_injection) |
| 1044 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); | 1027 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); |
| 1045 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1028 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); |
| 1046 | } | 1029 | } |
| @@ -1139,14 +1122,14 @@ static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1139 | 1122 | ||
| 1140 | static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1123 | static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
| 1141 | { | 1124 | { |
| 1142 | svm->next_rip = svm->vmcb->save.rip + 1; | 1125 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; |
| 1143 | skip_emulated_instruction(&svm->vcpu); | 1126 | skip_emulated_instruction(&svm->vcpu); |
| 1144 | return kvm_emulate_halt(&svm->vcpu); | 1127 | return kvm_emulate_halt(&svm->vcpu); |
| 1145 | } | 1128 | } |
| 1146 | 1129 | ||
| 1147 | static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1130 | static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
| 1148 | { | 1131 | { |
| 1149 | svm->next_rip = svm->vmcb->save.rip + 3; | 1132 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
| 1150 | skip_emulated_instruction(&svm->vcpu); | 1133 | skip_emulated_instruction(&svm->vcpu); |
| 1151 | kvm_emulate_hypercall(&svm->vcpu); | 1134 | kvm_emulate_hypercall(&svm->vcpu); |
| 1152 | return 1; | 1135 | return 1; |
| @@ -1178,11 +1161,18 @@ static int task_switch_interception(struct vcpu_svm *svm, | |||
| 1178 | 1161 | ||
| 1179 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1162 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
| 1180 | { | 1163 | { |
| 1181 | svm->next_rip = svm->vmcb->save.rip + 2; | 1164 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
| 1182 | kvm_emulate_cpuid(&svm->vcpu); | 1165 | kvm_emulate_cpuid(&svm->vcpu); |
| 1183 | return 1; | 1166 | return 1; |
| 1184 | } | 1167 | } |
| 1185 | 1168 | ||
| 1169 | static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | ||
| 1170 | { | ||
| 1171 | if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) | ||
| 1172 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | ||
| 1173 | return 1; | ||
| 1174 | } | ||
| 1175 | |||
| 1186 | static int emulate_on_interception(struct vcpu_svm *svm, | 1176 | static int emulate_on_interception(struct vcpu_svm *svm, |
| 1187 | struct kvm_run *kvm_run) | 1177 | struct kvm_run *kvm_run) |
| 1188 | { | 1178 | { |
| @@ -1273,9 +1263,9 @@ static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1273 | KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data, | 1263 | KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data, |
| 1274 | (u32)(data >> 32), handler); | 1264 | (u32)(data >> 32), handler); |
| 1275 | 1265 | ||
| 1276 | svm->vmcb->save.rax = data & 0xffffffff; | 1266 | svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; |
| 1277 | svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; | 1267 | svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; |
| 1278 | svm->next_rip = svm->vmcb->save.rip + 2; | 1268 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
| 1279 | skip_emulated_instruction(&svm->vcpu); | 1269 | skip_emulated_instruction(&svm->vcpu); |
| 1280 | } | 1270 | } |
| 1281 | return 1; | 1271 | return 1; |
| @@ -1359,13 +1349,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
| 1359 | static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1349 | static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
| 1360 | { | 1350 | { |
| 1361 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 1351 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
| 1362 | u64 data = (svm->vmcb->save.rax & -1u) | 1352 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) |
| 1363 | | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); | 1353 | | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); |
| 1364 | 1354 | ||
| 1365 | KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32), | 1355 | KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32), |
| 1366 | handler); | 1356 | handler); |
| 1367 | 1357 | ||
| 1368 | svm->next_rip = svm->vmcb->save.rip + 2; | 1358 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
| 1369 | if (svm_set_msr(&svm->vcpu, ecx, data)) | 1359 | if (svm_set_msr(&svm->vcpu, ecx, data)) |
| 1370 | kvm_inject_gp(&svm->vcpu, 0); | 1360 | kvm_inject_gp(&svm->vcpu, 0); |
| 1371 | else | 1361 | else |
| @@ -1436,7 +1426,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
| 1436 | [SVM_EXIT_CPUID] = cpuid_interception, | 1426 | [SVM_EXIT_CPUID] = cpuid_interception, |
| 1437 | [SVM_EXIT_INVD] = emulate_on_interception, | 1427 | [SVM_EXIT_INVD] = emulate_on_interception, |
| 1438 | [SVM_EXIT_HLT] = halt_interception, | 1428 | [SVM_EXIT_HLT] = halt_interception, |
| 1439 | [SVM_EXIT_INVLPG] = emulate_on_interception, | 1429 | [SVM_EXIT_INVLPG] = invlpg_interception, |
| 1440 | [SVM_EXIT_INVLPGA] = invalid_op_interception, | 1430 | [SVM_EXIT_INVLPGA] = invalid_op_interception, |
| 1441 | [SVM_EXIT_IOIO] = io_interception, | 1431 | [SVM_EXIT_IOIO] = io_interception, |
| 1442 | [SVM_EXIT_MSR] = msr_interception, | 1432 | [SVM_EXIT_MSR] = msr_interception, |
| @@ -1538,6 +1528,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) | |||
| 1538 | 1528 | ||
| 1539 | KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler); | 1529 | KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler); |
| 1540 | 1530 | ||
| 1531 | ++svm->vcpu.stat.irq_injections; | ||
| 1541 | control = &svm->vmcb->control; | 1532 | control = &svm->vmcb->control; |
| 1542 | control->int_vector = irq; | 1533 | control->int_vector = irq; |
| 1543 | control->int_ctl &= ~V_INTR_PRIO_MASK; | 1534 | control->int_ctl &= ~V_INTR_PRIO_MASK; |
| @@ -1716,6 +1707,12 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) | |||
| 1716 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; | 1707 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; |
| 1717 | } | 1708 | } |
| 1718 | 1709 | ||
| 1710 | #ifdef CONFIG_X86_64 | ||
| 1711 | #define R "r" | ||
| 1712 | #else | ||
| 1713 | #define R "e" | ||
| 1714 | #endif | ||
| 1715 | |||
| 1719 | static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 1716 | static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 1720 | { | 1717 | { |
| 1721 | struct vcpu_svm *svm = to_svm(vcpu); | 1718 | struct vcpu_svm *svm = to_svm(vcpu); |
| @@ -1723,6 +1720,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1723 | u16 gs_selector; | 1720 | u16 gs_selector; |
| 1724 | u16 ldt_selector; | 1721 | u16 ldt_selector; |
| 1725 | 1722 | ||
| 1723 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | ||
| 1724 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
| 1725 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | ||
| 1726 | |||
| 1726 | pre_svm_run(svm); | 1727 | pre_svm_run(svm); |
| 1727 | 1728 | ||
| 1728 | sync_lapic_to_cr8(vcpu); | 1729 | sync_lapic_to_cr8(vcpu); |
| @@ -1750,19 +1751,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1750 | local_irq_enable(); | 1751 | local_irq_enable(); |
| 1751 | 1752 | ||
| 1752 | asm volatile ( | 1753 | asm volatile ( |
| 1754 | "push %%"R"bp; \n\t" | ||
| 1755 | "mov %c[rbx](%[svm]), %%"R"bx \n\t" | ||
| 1756 | "mov %c[rcx](%[svm]), %%"R"cx \n\t" | ||
| 1757 | "mov %c[rdx](%[svm]), %%"R"dx \n\t" | ||
| 1758 | "mov %c[rsi](%[svm]), %%"R"si \n\t" | ||
| 1759 | "mov %c[rdi](%[svm]), %%"R"di \n\t" | ||
| 1760 | "mov %c[rbp](%[svm]), %%"R"bp \n\t" | ||
| 1753 | #ifdef CONFIG_X86_64 | 1761 | #ifdef CONFIG_X86_64 |
| 1754 | "push %%rbp; \n\t" | ||
| 1755 | #else | ||
| 1756 | "push %%ebp; \n\t" | ||
| 1757 | #endif | ||
| 1758 | |||
| 1759 | #ifdef CONFIG_X86_64 | ||
| 1760 | "mov %c[rbx](%[svm]), %%rbx \n\t" | ||
| 1761 | "mov %c[rcx](%[svm]), %%rcx \n\t" | ||
| 1762 | "mov %c[rdx](%[svm]), %%rdx \n\t" | ||
| 1763 | "mov %c[rsi](%[svm]), %%rsi \n\t" | ||
| 1764 | "mov %c[rdi](%[svm]), %%rdi \n\t" | ||
| 1765 | "mov %c[rbp](%[svm]), %%rbp \n\t" | ||
| 1766 | "mov %c[r8](%[svm]), %%r8 \n\t" | 1762 | "mov %c[r8](%[svm]), %%r8 \n\t" |
| 1767 | "mov %c[r9](%[svm]), %%r9 \n\t" | 1763 | "mov %c[r9](%[svm]), %%r9 \n\t" |
| 1768 | "mov %c[r10](%[svm]), %%r10 \n\t" | 1764 | "mov %c[r10](%[svm]), %%r10 \n\t" |
| @@ -1771,41 +1767,24 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1771 | "mov %c[r13](%[svm]), %%r13 \n\t" | 1767 | "mov %c[r13](%[svm]), %%r13 \n\t" |
| 1772 | "mov %c[r14](%[svm]), %%r14 \n\t" | 1768 | "mov %c[r14](%[svm]), %%r14 \n\t" |
| 1773 | "mov %c[r15](%[svm]), %%r15 \n\t" | 1769 | "mov %c[r15](%[svm]), %%r15 \n\t" |
| 1774 | #else | ||
| 1775 | "mov %c[rbx](%[svm]), %%ebx \n\t" | ||
| 1776 | "mov %c[rcx](%[svm]), %%ecx \n\t" | ||
| 1777 | "mov %c[rdx](%[svm]), %%edx \n\t" | ||
| 1778 | "mov %c[rsi](%[svm]), %%esi \n\t" | ||
| 1779 | "mov %c[rdi](%[svm]), %%edi \n\t" | ||
| 1780 | "mov %c[rbp](%[svm]), %%ebp \n\t" | ||
| 1781 | #endif | 1770 | #endif |
| 1782 | 1771 | ||
| 1783 | #ifdef CONFIG_X86_64 | ||
| 1784 | /* Enter guest mode */ | ||
| 1785 | "push %%rax \n\t" | ||
| 1786 | "mov %c[vmcb](%[svm]), %%rax \n\t" | ||
| 1787 | __ex(SVM_VMLOAD) "\n\t" | ||
| 1788 | __ex(SVM_VMRUN) "\n\t" | ||
| 1789 | __ex(SVM_VMSAVE) "\n\t" | ||
| 1790 | "pop %%rax \n\t" | ||
| 1791 | #else | ||
| 1792 | /* Enter guest mode */ | 1772 | /* Enter guest mode */ |
| 1793 | "push %%eax \n\t" | 1773 | "push %%"R"ax \n\t" |
| 1794 | "mov %c[vmcb](%[svm]), %%eax \n\t" | 1774 | "mov %c[vmcb](%[svm]), %%"R"ax \n\t" |
| 1795 | __ex(SVM_VMLOAD) "\n\t" | 1775 | __ex(SVM_VMLOAD) "\n\t" |
| 1796 | __ex(SVM_VMRUN) "\n\t" | 1776 | __ex(SVM_VMRUN) "\n\t" |
| 1797 | __ex(SVM_VMSAVE) "\n\t" | 1777 | __ex(SVM_VMSAVE) "\n\t" |
| 1798 | "pop %%eax \n\t" | 1778 | "pop %%"R"ax \n\t" |
| 1799 | #endif | ||
| 1800 | 1779 | ||
| 1801 | /* Save guest registers, load host registers */ | 1780 | /* Save guest registers, load host registers */ |
| 1781 | "mov %%"R"bx, %c[rbx](%[svm]) \n\t" | ||
| 1782 | "mov %%"R"cx, %c[rcx](%[svm]) \n\t" | ||
| 1783 | "mov %%"R"dx, %c[rdx](%[svm]) \n\t" | ||
| 1784 | "mov %%"R"si, %c[rsi](%[svm]) \n\t" | ||
| 1785 | "mov %%"R"di, %c[rdi](%[svm]) \n\t" | ||
| 1786 | "mov %%"R"bp, %c[rbp](%[svm]) \n\t" | ||
| 1802 | #ifdef CONFIG_X86_64 | 1787 | #ifdef CONFIG_X86_64 |
| 1803 | "mov %%rbx, %c[rbx](%[svm]) \n\t" | ||
| 1804 | "mov %%rcx, %c[rcx](%[svm]) \n\t" | ||
| 1805 | "mov %%rdx, %c[rdx](%[svm]) \n\t" | ||
| 1806 | "mov %%rsi, %c[rsi](%[svm]) \n\t" | ||
| 1807 | "mov %%rdi, %c[rdi](%[svm]) \n\t" | ||
| 1808 | "mov %%rbp, %c[rbp](%[svm]) \n\t" | ||
| 1809 | "mov %%r8, %c[r8](%[svm]) \n\t" | 1788 | "mov %%r8, %c[r8](%[svm]) \n\t" |
| 1810 | "mov %%r9, %c[r9](%[svm]) \n\t" | 1789 | "mov %%r9, %c[r9](%[svm]) \n\t" |
| 1811 | "mov %%r10, %c[r10](%[svm]) \n\t" | 1790 | "mov %%r10, %c[r10](%[svm]) \n\t" |
| @@ -1814,18 +1793,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1814 | "mov %%r13, %c[r13](%[svm]) \n\t" | 1793 | "mov %%r13, %c[r13](%[svm]) \n\t" |
| 1815 | "mov %%r14, %c[r14](%[svm]) \n\t" | 1794 | "mov %%r14, %c[r14](%[svm]) \n\t" |
| 1816 | "mov %%r15, %c[r15](%[svm]) \n\t" | 1795 | "mov %%r15, %c[r15](%[svm]) \n\t" |
| 1817 | |||
| 1818 | "pop %%rbp; \n\t" | ||
| 1819 | #else | ||
| 1820 | "mov %%ebx, %c[rbx](%[svm]) \n\t" | ||
| 1821 | "mov %%ecx, %c[rcx](%[svm]) \n\t" | ||
| 1822 | "mov %%edx, %c[rdx](%[svm]) \n\t" | ||
| 1823 | "mov %%esi, %c[rsi](%[svm]) \n\t" | ||
| 1824 | "mov %%edi, %c[rdi](%[svm]) \n\t" | ||
| 1825 | "mov %%ebp, %c[rbp](%[svm]) \n\t" | ||
| 1826 | |||
| 1827 | "pop %%ebp; \n\t" | ||
| 1828 | #endif | 1796 | #endif |
| 1797 | "pop %%"R"bp" | ||
| 1829 | : | 1798 | : |
| 1830 | : [svm]"a"(svm), | 1799 | : [svm]"a"(svm), |
| 1831 | [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), | 1800 | [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), |
| @@ -1846,11 +1815,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1846 | [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) | 1815 | [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) |
| 1847 | #endif | 1816 | #endif |
| 1848 | : "cc", "memory" | 1817 | : "cc", "memory" |
| 1818 | , R"bx", R"cx", R"dx", R"si", R"di" | ||
| 1849 | #ifdef CONFIG_X86_64 | 1819 | #ifdef CONFIG_X86_64 |
| 1850 | , "rbx", "rcx", "rdx", "rsi", "rdi" | ||
| 1851 | , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" | 1820 | , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" |
| 1852 | #else | ||
| 1853 | , "ebx", "ecx", "edx" , "esi", "edi" | ||
| 1854 | #endif | 1821 | #endif |
| 1855 | ); | 1822 | ); |
| 1856 | 1823 | ||
| @@ -1858,6 +1825,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1858 | load_db_regs(svm->host_db_regs); | 1825 | load_db_regs(svm->host_db_regs); |
| 1859 | 1826 | ||
| 1860 | vcpu->arch.cr2 = svm->vmcb->save.cr2; | 1827 | vcpu->arch.cr2 = svm->vmcb->save.cr2; |
| 1828 | vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; | ||
| 1829 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | ||
| 1830 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | ||
| 1861 | 1831 | ||
| 1862 | write_dr6(svm->host_dr6); | 1832 | write_dr6(svm->host_dr6); |
| 1863 | write_dr7(svm->host_dr7); | 1833 | write_dr7(svm->host_dr7); |
| @@ -1879,6 +1849,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 1879 | svm->next_rip = 0; | 1849 | svm->next_rip = 0; |
| 1880 | } | 1850 | } |
| 1881 | 1851 | ||
| 1852 | #undef R | ||
| 1853 | |||
| 1882 | static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) | 1854 | static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) |
| 1883 | { | 1855 | { |
| 1884 | struct vcpu_svm *svm = to_svm(vcpu); | 1856 | struct vcpu_svm *svm = to_svm(vcpu); |
| @@ -1977,8 +1949,6 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
| 1977 | .set_gdt = svm_set_gdt, | 1949 | .set_gdt = svm_set_gdt, |
| 1978 | .get_dr = svm_get_dr, | 1950 | .get_dr = svm_get_dr, |
| 1979 | .set_dr = svm_set_dr, | 1951 | .set_dr = svm_set_dr, |
| 1980 | .cache_regs = svm_cache_regs, | ||
| 1981 | .decache_regs = svm_decache_regs, | ||
| 1982 | .get_rflags = svm_get_rflags, | 1952 | .get_rflags = svm_get_rflags, |
| 1983 | .set_rflags = svm_set_rflags, | 1953 | .set_rflags = svm_set_rflags, |
| 1984 | 1954 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7041cc52b562..2643b430d83a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -26,6 +26,8 @@ | |||
| 26 | #include <linux/highmem.h> | 26 | #include <linux/highmem.h> |
| 27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
| 28 | #include <linux/moduleparam.h> | 28 | #include <linux/moduleparam.h> |
| 29 | #include "kvm_cache_regs.h" | ||
| 30 | #include "x86.h" | ||
| 29 | 31 | ||
| 30 | #include <asm/io.h> | 32 | #include <asm/io.h> |
| 31 | #include <asm/desc.h> | 33 | #include <asm/desc.h> |
| @@ -47,6 +49,9 @@ module_param(flexpriority_enabled, bool, 0); | |||
| 47 | static int enable_ept = 1; | 49 | static int enable_ept = 1; |
| 48 | module_param(enable_ept, bool, 0); | 50 | module_param(enable_ept, bool, 0); |
| 49 | 51 | ||
| 52 | static int emulate_invalid_guest_state = 0; | ||
| 53 | module_param(emulate_invalid_guest_state, bool, 0); | ||
| 54 | |||
| 50 | struct vmcs { | 55 | struct vmcs { |
| 51 | u32 revision_id; | 56 | u32 revision_id; |
| 52 | u32 abort; | 57 | u32 abort; |
| @@ -56,6 +61,7 @@ struct vmcs { | |||
| 56 | struct vcpu_vmx { | 61 | struct vcpu_vmx { |
| 57 | struct kvm_vcpu vcpu; | 62 | struct kvm_vcpu vcpu; |
| 58 | struct list_head local_vcpus_link; | 63 | struct list_head local_vcpus_link; |
| 64 | unsigned long host_rsp; | ||
| 59 | int launched; | 65 | int launched; |
| 60 | u8 fail; | 66 | u8 fail; |
| 61 | u32 idt_vectoring_info; | 67 | u32 idt_vectoring_info; |
| @@ -83,6 +89,7 @@ struct vcpu_vmx { | |||
| 83 | } irq; | 89 | } irq; |
| 84 | } rmode; | 90 | } rmode; |
| 85 | int vpid; | 91 | int vpid; |
| 92 | bool emulation_required; | ||
| 86 | }; | 93 | }; |
| 87 | 94 | ||
| 88 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 95 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
| @@ -468,7 +475,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
| 468 | if (!vcpu->fpu_active) | 475 | if (!vcpu->fpu_active) |
| 469 | eb |= 1u << NM_VECTOR; | 476 | eb |= 1u << NM_VECTOR; |
| 470 | if (vcpu->guest_debug.enabled) | 477 | if (vcpu->guest_debug.enabled) |
| 471 | eb |= 1u << 1; | 478 | eb |= 1u << DB_VECTOR; |
| 472 | if (vcpu->arch.rmode.active) | 479 | if (vcpu->arch.rmode.active) |
| 473 | eb = ~0; | 480 | eb = ~0; |
| 474 | if (vm_need_ept()) | 481 | if (vm_need_ept()) |
| @@ -715,9 +722,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 715 | unsigned long rip; | 722 | unsigned long rip; |
| 716 | u32 interruptibility; | 723 | u32 interruptibility; |
| 717 | 724 | ||
| 718 | rip = vmcs_readl(GUEST_RIP); | 725 | rip = kvm_rip_read(vcpu); |
| 719 | rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 726 | rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
| 720 | vmcs_writel(GUEST_RIP, rip); | 727 | kvm_rip_write(vcpu, rip); |
| 721 | 728 | ||
| 722 | /* | 729 | /* |
| 723 | * We emulated an instruction, so temporary interrupt blocking | 730 | * We emulated an instruction, so temporary interrupt blocking |
| @@ -733,19 +740,35 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 733 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 740 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
| 734 | bool has_error_code, u32 error_code) | 741 | bool has_error_code, u32 error_code) |
| 735 | { | 742 | { |
| 743 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 744 | |||
| 745 | if (has_error_code) | ||
| 746 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); | ||
| 747 | |||
| 748 | if (vcpu->arch.rmode.active) { | ||
| 749 | vmx->rmode.irq.pending = true; | ||
| 750 | vmx->rmode.irq.vector = nr; | ||
| 751 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | ||
| 752 | if (nr == BP_VECTOR) | ||
| 753 | vmx->rmode.irq.rip++; | ||
| 754 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | ||
| 755 | nr | INTR_TYPE_SOFT_INTR | ||
| 756 | | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) | ||
| 757 | | INTR_INFO_VALID_MASK); | ||
| 758 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); | ||
| 759 | kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); | ||
| 760 | return; | ||
| 761 | } | ||
| 762 | |||
| 736 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 763 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
| 737 | nr | INTR_TYPE_EXCEPTION | 764 | nr | INTR_TYPE_EXCEPTION |
| 738 | | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) | 765 | | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) |
| 739 | | INTR_INFO_VALID_MASK); | 766 | | INTR_INFO_VALID_MASK); |
| 740 | if (has_error_code) | ||
| 741 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); | ||
| 742 | } | 767 | } |
| 743 | 768 | ||
| 744 | static bool vmx_exception_injected(struct kvm_vcpu *vcpu) | 769 | static bool vmx_exception_injected(struct kvm_vcpu *vcpu) |
| 745 | { | 770 | { |
| 746 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 771 | return false; |
| 747 | |||
| 748 | return !(vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); | ||
| 749 | } | 772 | } |
| 750 | 773 | ||
| 751 | /* | 774 | /* |
| @@ -947,24 +970,19 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
| 947 | return ret; | 970 | return ret; |
| 948 | } | 971 | } |
| 949 | 972 | ||
| 950 | /* | 973 | static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) |
| 951 | * Sync the rsp and rip registers into the vcpu structure. This allows | ||
| 952 | * registers to be accessed by indexing vcpu->arch.regs. | ||
| 953 | */ | ||
| 954 | static void vcpu_load_rsp_rip(struct kvm_vcpu *vcpu) | ||
| 955 | { | ||
| 956 | vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); | ||
| 957 | vcpu->arch.rip = vmcs_readl(GUEST_RIP); | ||
| 958 | } | ||
| 959 | |||
| 960 | /* | ||
| 961 | * Syncs rsp and rip back into the vmcs. Should be called after possible | ||
| 962 | * modification. | ||
| 963 | */ | ||
| 964 | static void vcpu_put_rsp_rip(struct kvm_vcpu *vcpu) | ||
| 965 | { | 974 | { |
| 966 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); | 975 | __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail); |
| 967 | vmcs_writel(GUEST_RIP, vcpu->arch.rip); | 976 | switch (reg) { |
| 977 | case VCPU_REGS_RSP: | ||
| 978 | vcpu->arch.regs[VCPU_REGS_RSP] = vmcs_readl(GUEST_RSP); | ||
| 979 | break; | ||
| 980 | case VCPU_REGS_RIP: | ||
| 981 | vcpu->arch.regs[VCPU_REGS_RIP] = vmcs_readl(GUEST_RIP); | ||
| 982 | break; | ||
| 983 | default: | ||
| 984 | break; | ||
| 985 | } | ||
| 968 | } | 986 | } |
| 969 | 987 | ||
| 970 | static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) | 988 | static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) |
| @@ -1007,17 +1025,9 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) | |||
| 1007 | 1025 | ||
| 1008 | static int vmx_get_irq(struct kvm_vcpu *vcpu) | 1026 | static int vmx_get_irq(struct kvm_vcpu *vcpu) |
| 1009 | { | 1027 | { |
| 1010 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1028 | if (!vcpu->arch.interrupt.pending) |
| 1011 | u32 idtv_info_field; | 1029 | return -1; |
| 1012 | 1030 | return vcpu->arch.interrupt.nr; | |
| 1013 | idtv_info_field = vmx->idt_vectoring_info; | ||
| 1014 | if (idtv_info_field & INTR_INFO_VALID_MASK) { | ||
| 1015 | if (is_external_interrupt(idtv_info_field)) | ||
| 1016 | return idtv_info_field & VECTORING_INFO_VECTOR_MASK; | ||
| 1017 | else | ||
| 1018 | printk(KERN_DEBUG "pending exception: not handled yet\n"); | ||
| 1019 | } | ||
| 1020 | return -1; | ||
| 1021 | } | 1031 | } |
| 1022 | 1032 | ||
| 1023 | static __init int cpu_has_kvm_support(void) | 1033 | static __init int cpu_has_kvm_support(void) |
| @@ -1031,9 +1041,9 @@ static __init int vmx_disabled_by_bios(void) | |||
| 1031 | u64 msr; | 1041 | u64 msr; |
| 1032 | 1042 | ||
| 1033 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); | 1043 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); |
| 1034 | return (msr & (MSR_IA32_FEATURE_CONTROL_LOCKED | | 1044 | return (msr & (FEATURE_CONTROL_LOCKED | |
| 1035 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) | 1045 | FEATURE_CONTROL_VMXON_ENABLED)) |
| 1036 | == MSR_IA32_FEATURE_CONTROL_LOCKED; | 1046 | == FEATURE_CONTROL_LOCKED; |
| 1037 | /* locked but not enabled */ | 1047 | /* locked but not enabled */ |
| 1038 | } | 1048 | } |
| 1039 | 1049 | ||
| @@ -1045,14 +1055,14 @@ static void hardware_enable(void *garbage) | |||
| 1045 | 1055 | ||
| 1046 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); | 1056 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); |
| 1047 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 1057 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
| 1048 | if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED | | 1058 | if ((old & (FEATURE_CONTROL_LOCKED | |
| 1049 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) | 1059 | FEATURE_CONTROL_VMXON_ENABLED)) |
| 1050 | != (MSR_IA32_FEATURE_CONTROL_LOCKED | | 1060 | != (FEATURE_CONTROL_LOCKED | |
| 1051 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) | 1061 | FEATURE_CONTROL_VMXON_ENABLED)) |
| 1052 | /* enable and lock */ | 1062 | /* enable and lock */ |
| 1053 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | | 1063 | wrmsrl(MSR_IA32_FEATURE_CONTROL, old | |
| 1054 | MSR_IA32_FEATURE_CONTROL_LOCKED | | 1064 | FEATURE_CONTROL_LOCKED | |
| 1055 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED); | 1065 | FEATURE_CONTROL_VMXON_ENABLED); |
| 1056 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ | 1066 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ |
| 1057 | asm volatile (ASM_VMX_VMXON_RAX | 1067 | asm volatile (ASM_VMX_VMXON_RAX |
| 1058 | : : "a"(&phys_addr), "m"(phys_addr) | 1068 | : : "a"(&phys_addr), "m"(phys_addr) |
| @@ -1120,7 +1130,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 1120 | CPU_BASED_CR3_STORE_EXITING | | 1130 | CPU_BASED_CR3_STORE_EXITING | |
| 1121 | CPU_BASED_USE_IO_BITMAPS | | 1131 | CPU_BASED_USE_IO_BITMAPS | |
| 1122 | CPU_BASED_MOV_DR_EXITING | | 1132 | CPU_BASED_MOV_DR_EXITING | |
| 1123 | CPU_BASED_USE_TSC_OFFSETING; | 1133 | CPU_BASED_USE_TSC_OFFSETING | |
| 1134 | CPU_BASED_INVLPG_EXITING; | ||
| 1124 | opt = CPU_BASED_TPR_SHADOW | | 1135 | opt = CPU_BASED_TPR_SHADOW | |
| 1125 | CPU_BASED_USE_MSR_BITMAPS | | 1136 | CPU_BASED_USE_MSR_BITMAPS | |
| 1126 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; | 1137 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
| @@ -1149,9 +1160,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 1149 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; | 1160 | _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; |
| 1150 | #endif | 1161 | #endif |
| 1151 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { | 1162 | if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { |
| 1152 | /* CR3 accesses don't need to cause VM Exits when EPT enabled */ | 1163 | /* CR3 accesses and invlpg don't need to cause VM Exits when EPT |
| 1164 | enabled */ | ||
| 1153 | min &= ~(CPU_BASED_CR3_LOAD_EXITING | | 1165 | min &= ~(CPU_BASED_CR3_LOAD_EXITING | |
| 1154 | CPU_BASED_CR3_STORE_EXITING); | 1166 | CPU_BASED_CR3_STORE_EXITING | |
| 1167 | CPU_BASED_INVLPG_EXITING); | ||
| 1155 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, | 1168 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, |
| 1156 | &_cpu_based_exec_control) < 0) | 1169 | &_cpu_based_exec_control) < 0) |
| 1157 | return -EIO; | 1170 | return -EIO; |
| @@ -1288,7 +1301,9 @@ static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save) | |||
| 1288 | static void enter_pmode(struct kvm_vcpu *vcpu) | 1301 | static void enter_pmode(struct kvm_vcpu *vcpu) |
| 1289 | { | 1302 | { |
| 1290 | unsigned long flags; | 1303 | unsigned long flags; |
| 1304 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 1291 | 1305 | ||
| 1306 | vmx->emulation_required = 1; | ||
| 1292 | vcpu->arch.rmode.active = 0; | 1307 | vcpu->arch.rmode.active = 0; |
| 1293 | 1308 | ||
| 1294 | vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base); | 1309 | vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base); |
| @@ -1305,6 +1320,9 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
| 1305 | 1320 | ||
| 1306 | update_exception_bitmap(vcpu); | 1321 | update_exception_bitmap(vcpu); |
| 1307 | 1322 | ||
| 1323 | if (emulate_invalid_guest_state) | ||
| 1324 | return; | ||
| 1325 | |||
| 1308 | fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es); | 1326 | fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es); |
| 1309 | fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds); | 1327 | fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds); |
| 1310 | fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); | 1328 | fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); |
| @@ -1345,7 +1363,9 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save) | |||
| 1345 | static void enter_rmode(struct kvm_vcpu *vcpu) | 1363 | static void enter_rmode(struct kvm_vcpu *vcpu) |
| 1346 | { | 1364 | { |
| 1347 | unsigned long flags; | 1365 | unsigned long flags; |
| 1366 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 1348 | 1367 | ||
| 1368 | vmx->emulation_required = 1; | ||
| 1349 | vcpu->arch.rmode.active = 1; | 1369 | vcpu->arch.rmode.active = 1; |
| 1350 | 1370 | ||
| 1351 | vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE); | 1371 | vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE); |
| @@ -1367,6 +1387,9 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
| 1367 | vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); | 1387 | vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); |
| 1368 | update_exception_bitmap(vcpu); | 1388 | update_exception_bitmap(vcpu); |
| 1369 | 1389 | ||
| 1390 | if (emulate_invalid_guest_state) | ||
| 1391 | goto continue_rmode; | ||
| 1392 | |||
| 1370 | vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4); | 1393 | vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4); |
| 1371 | vmcs_write32(GUEST_SS_LIMIT, 0xffff); | 1394 | vmcs_write32(GUEST_SS_LIMIT, 0xffff); |
| 1372 | vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); | 1395 | vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); |
| @@ -1382,6 +1405,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
| 1382 | fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); | 1405 | fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs); |
| 1383 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); | 1406 | fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs); |
| 1384 | 1407 | ||
| 1408 | continue_rmode: | ||
| 1385 | kvm_mmu_reset_context(vcpu); | 1409 | kvm_mmu_reset_context(vcpu); |
| 1386 | init_rmode(vcpu->kvm); | 1410 | init_rmode(vcpu->kvm); |
| 1387 | } | 1411 | } |
| @@ -1715,6 +1739,186 @@ static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) | |||
| 1715 | vmcs_writel(GUEST_GDTR_BASE, dt->base); | 1739 | vmcs_writel(GUEST_GDTR_BASE, dt->base); |
| 1716 | } | 1740 | } |
| 1717 | 1741 | ||
| 1742 | static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) | ||
| 1743 | { | ||
| 1744 | struct kvm_segment var; | ||
| 1745 | u32 ar; | ||
| 1746 | |||
| 1747 | vmx_get_segment(vcpu, &var, seg); | ||
| 1748 | ar = vmx_segment_access_rights(&var); | ||
| 1749 | |||
| 1750 | if (var.base != (var.selector << 4)) | ||
| 1751 | return false; | ||
| 1752 | if (var.limit != 0xffff) | ||
| 1753 | return false; | ||
| 1754 | if (ar != 0xf3) | ||
| 1755 | return false; | ||
| 1756 | |||
| 1757 | return true; | ||
| 1758 | } | ||
| 1759 | |||
| 1760 | static bool code_segment_valid(struct kvm_vcpu *vcpu) | ||
| 1761 | { | ||
| 1762 | struct kvm_segment cs; | ||
| 1763 | unsigned int cs_rpl; | ||
| 1764 | |||
| 1765 | vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); | ||
| 1766 | cs_rpl = cs.selector & SELECTOR_RPL_MASK; | ||
| 1767 | |||
| 1768 | if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK)) | ||
| 1769 | return false; | ||
| 1770 | if (!cs.s) | ||
| 1771 | return false; | ||
| 1772 | if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) { | ||
| 1773 | if (cs.dpl > cs_rpl) | ||
| 1774 | return false; | ||
| 1775 | } else if (cs.type & AR_TYPE_CODE_MASK) { | ||
| 1776 | if (cs.dpl != cs_rpl) | ||
| 1777 | return false; | ||
| 1778 | } | ||
| 1779 | if (!cs.present) | ||
| 1780 | return false; | ||
| 1781 | |||
| 1782 | /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */ | ||
| 1783 | return true; | ||
| 1784 | } | ||
| 1785 | |||
| 1786 | static bool stack_segment_valid(struct kvm_vcpu *vcpu) | ||
| 1787 | { | ||
| 1788 | struct kvm_segment ss; | ||
| 1789 | unsigned int ss_rpl; | ||
| 1790 | |||
| 1791 | vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); | ||
| 1792 | ss_rpl = ss.selector & SELECTOR_RPL_MASK; | ||
| 1793 | |||
| 1794 | if ((ss.type != 3) || (ss.type != 7)) | ||
| 1795 | return false; | ||
| 1796 | if (!ss.s) | ||
| 1797 | return false; | ||
| 1798 | if (ss.dpl != ss_rpl) /* DPL != RPL */ | ||
| 1799 | return false; | ||
| 1800 | if (!ss.present) | ||
| 1801 | return false; | ||
| 1802 | |||
| 1803 | return true; | ||
| 1804 | } | ||
| 1805 | |||
| 1806 | static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) | ||
| 1807 | { | ||
| 1808 | struct kvm_segment var; | ||
| 1809 | unsigned int rpl; | ||
| 1810 | |||
| 1811 | vmx_get_segment(vcpu, &var, seg); | ||
| 1812 | rpl = var.selector & SELECTOR_RPL_MASK; | ||
| 1813 | |||
| 1814 | if (!var.s) | ||
| 1815 | return false; | ||
| 1816 | if (!var.present) | ||
| 1817 | return false; | ||
| 1818 | if (~var.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) { | ||
| 1819 | if (var.dpl < rpl) /* DPL < RPL */ | ||
| 1820 | return false; | ||
| 1821 | } | ||
| 1822 | |||
| 1823 | /* TODO: Add other members to kvm_segment_field to allow checking for other access | ||
| 1824 | * rights flags | ||
| 1825 | */ | ||
| 1826 | return true; | ||
| 1827 | } | ||
| 1828 | |||
| 1829 | static bool tr_valid(struct kvm_vcpu *vcpu) | ||
| 1830 | { | ||
| 1831 | struct kvm_segment tr; | ||
| 1832 | |||
| 1833 | vmx_get_segment(vcpu, &tr, VCPU_SREG_TR); | ||
| 1834 | |||
| 1835 | if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */ | ||
| 1836 | return false; | ||
| 1837 | if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */ | ||
| 1838 | return false; | ||
| 1839 | if (!tr.present) | ||
| 1840 | return false; | ||
| 1841 | |||
| 1842 | return true; | ||
| 1843 | } | ||
| 1844 | |||
| 1845 | static bool ldtr_valid(struct kvm_vcpu *vcpu) | ||
| 1846 | { | ||
| 1847 | struct kvm_segment ldtr; | ||
| 1848 | |||
| 1849 | vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR); | ||
| 1850 | |||
| 1851 | if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */ | ||
| 1852 | return false; | ||
| 1853 | if (ldtr.type != 2) | ||
| 1854 | return false; | ||
| 1855 | if (!ldtr.present) | ||
| 1856 | return false; | ||
| 1857 | |||
| 1858 | return true; | ||
| 1859 | } | ||
| 1860 | |||
| 1861 | static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) | ||
| 1862 | { | ||
| 1863 | struct kvm_segment cs, ss; | ||
| 1864 | |||
| 1865 | vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); | ||
| 1866 | vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); | ||
| 1867 | |||
| 1868 | return ((cs.selector & SELECTOR_RPL_MASK) == | ||
| 1869 | (ss.selector & SELECTOR_RPL_MASK)); | ||
| 1870 | } | ||
| 1871 | |||
| 1872 | /* | ||
| 1873 | * Check if guest state is valid. Returns true if valid, false if | ||
| 1874 | * not. | ||
| 1875 | * We assume that registers are always usable | ||
| 1876 | */ | ||
| 1877 | static bool guest_state_valid(struct kvm_vcpu *vcpu) | ||
| 1878 | { | ||
| 1879 | /* real mode guest state checks */ | ||
| 1880 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) { | ||
| 1881 | if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) | ||
| 1882 | return false; | ||
| 1883 | if (!rmode_segment_valid(vcpu, VCPU_SREG_SS)) | ||
| 1884 | return false; | ||
| 1885 | if (!rmode_segment_valid(vcpu, VCPU_SREG_DS)) | ||
| 1886 | return false; | ||
| 1887 | if (!rmode_segment_valid(vcpu, VCPU_SREG_ES)) | ||
| 1888 | return false; | ||
| 1889 | if (!rmode_segment_valid(vcpu, VCPU_SREG_FS)) | ||
| 1890 | return false; | ||
| 1891 | if (!rmode_segment_valid(vcpu, VCPU_SREG_GS)) | ||
| 1892 | return false; | ||
| 1893 | } else { | ||
| 1894 | /* protected mode guest state checks */ | ||
| 1895 | if (!cs_ss_rpl_check(vcpu)) | ||
| 1896 | return false; | ||
| 1897 | if (!code_segment_valid(vcpu)) | ||
| 1898 | return false; | ||
| 1899 | if (!stack_segment_valid(vcpu)) | ||
| 1900 | return false; | ||
| 1901 | if (!data_segment_valid(vcpu, VCPU_SREG_DS)) | ||
| 1902 | return false; | ||
| 1903 | if (!data_segment_valid(vcpu, VCPU_SREG_ES)) | ||
| 1904 | return false; | ||
| 1905 | if (!data_segment_valid(vcpu, VCPU_SREG_FS)) | ||
| 1906 | return false; | ||
| 1907 | if (!data_segment_valid(vcpu, VCPU_SREG_GS)) | ||
| 1908 | return false; | ||
| 1909 | if (!tr_valid(vcpu)) | ||
| 1910 | return false; | ||
| 1911 | if (!ldtr_valid(vcpu)) | ||
| 1912 | return false; | ||
| 1913 | } | ||
| 1914 | /* TODO: | ||
| 1915 | * - Add checks on RIP | ||
| 1916 | * - Add checks on RFLAGS | ||
| 1917 | */ | ||
| 1918 | |||
| 1919 | return true; | ||
| 1920 | } | ||
| 1921 | |||
| 1718 | static int init_rmode_tss(struct kvm *kvm) | 1922 | static int init_rmode_tss(struct kvm *kvm) |
| 1719 | { | 1923 | { |
| 1720 | gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | 1924 | gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; |
| @@ -1726,7 +1930,8 @@ static int init_rmode_tss(struct kvm *kvm) | |||
| 1726 | if (r < 0) | 1930 | if (r < 0) |
| 1727 | goto out; | 1931 | goto out; |
| 1728 | data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; | 1932 | data = TSS_BASE_SIZE + TSS_REDIRECTION_SIZE; |
| 1729 | r = kvm_write_guest_page(kvm, fn++, &data, 0x66, sizeof(u16)); | 1933 | r = kvm_write_guest_page(kvm, fn++, &data, |
| 1934 | TSS_IOPB_BASE_OFFSET, sizeof(u16)); | ||
| 1730 | if (r < 0) | 1935 | if (r < 0) |
| 1731 | goto out; | 1936 | goto out; |
| 1732 | r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); | 1937 | r = kvm_clear_guest_page(kvm, fn++, 0, PAGE_SIZE); |
| @@ -1789,7 +1994,7 @@ static void seg_setup(int seg) | |||
| 1789 | vmcs_write16(sf->selector, 0); | 1994 | vmcs_write16(sf->selector, 0); |
| 1790 | vmcs_writel(sf->base, 0); | 1995 | vmcs_writel(sf->base, 0); |
| 1791 | vmcs_write32(sf->limit, 0xffff); | 1996 | vmcs_write32(sf->limit, 0xffff); |
| 1792 | vmcs_write32(sf->ar_bytes, 0x93); | 1997 | vmcs_write32(sf->ar_bytes, 0xf3); |
| 1793 | } | 1998 | } |
| 1794 | 1999 | ||
| 1795 | static int alloc_apic_access_page(struct kvm *kvm) | 2000 | static int alloc_apic_access_page(struct kvm *kvm) |
| @@ -1808,9 +2013,7 @@ static int alloc_apic_access_page(struct kvm *kvm) | |||
| 1808 | if (r) | 2013 | if (r) |
| 1809 | goto out; | 2014 | goto out; |
| 1810 | 2015 | ||
| 1811 | down_read(¤t->mm->mmap_sem); | ||
| 1812 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); | 2016 | kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); |
| 1813 | up_read(¤t->mm->mmap_sem); | ||
| 1814 | out: | 2017 | out: |
| 1815 | up_write(&kvm->slots_lock); | 2018 | up_write(&kvm->slots_lock); |
| 1816 | return r; | 2019 | return r; |
| @@ -1832,10 +2035,8 @@ static int alloc_identity_pagetable(struct kvm *kvm) | |||
| 1832 | if (r) | 2035 | if (r) |
| 1833 | goto out; | 2036 | goto out; |
| 1834 | 2037 | ||
| 1835 | down_read(¤t->mm->mmap_sem); | ||
| 1836 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, | 2038 | kvm->arch.ept_identity_pagetable = gfn_to_page(kvm, |
| 1837 | VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); | 2039 | VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT); |
| 1838 | up_read(¤t->mm->mmap_sem); | ||
| 1839 | out: | 2040 | out: |
| 1840 | up_write(&kvm->slots_lock); | 2041 | up_write(&kvm->slots_lock); |
| 1841 | return r; | 2042 | return r; |
| @@ -1917,7 +2118,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 1917 | } | 2118 | } |
| 1918 | if (!vm_need_ept()) | 2119 | if (!vm_need_ept()) |
| 1919 | exec_control |= CPU_BASED_CR3_STORE_EXITING | | 2120 | exec_control |= CPU_BASED_CR3_STORE_EXITING | |
| 1920 | CPU_BASED_CR3_LOAD_EXITING; | 2121 | CPU_BASED_CR3_LOAD_EXITING | |
| 2122 | CPU_BASED_INVLPG_EXITING; | ||
| 1921 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); | 2123 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); |
| 1922 | 2124 | ||
| 1923 | if (cpu_has_secondary_exec_ctrls()) { | 2125 | if (cpu_has_secondary_exec_ctrls()) { |
| @@ -2019,6 +2221,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2019 | u64 msr; | 2221 | u64 msr; |
| 2020 | int ret; | 2222 | int ret; |
| 2021 | 2223 | ||
| 2224 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | ||
| 2022 | down_read(&vcpu->kvm->slots_lock); | 2225 | down_read(&vcpu->kvm->slots_lock); |
| 2023 | if (!init_rmode(vmx->vcpu.kvm)) { | 2226 | if (!init_rmode(vmx->vcpu.kvm)) { |
| 2024 | ret = -ENOMEM; | 2227 | ret = -ENOMEM; |
| @@ -2036,6 +2239,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2036 | 2239 | ||
| 2037 | fx_init(&vmx->vcpu); | 2240 | fx_init(&vmx->vcpu); |
| 2038 | 2241 | ||
| 2242 | seg_setup(VCPU_SREG_CS); | ||
| 2039 | /* | 2243 | /* |
| 2040 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode | 2244 | * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode |
| 2041 | * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. | 2245 | * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. |
| @@ -2047,8 +2251,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2047 | vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); | 2251 | vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); |
| 2048 | vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); | 2252 | vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); |
| 2049 | } | 2253 | } |
| 2050 | vmcs_write32(GUEST_CS_LIMIT, 0xffff); | ||
| 2051 | vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); | ||
| 2052 | 2254 | ||
| 2053 | seg_setup(VCPU_SREG_DS); | 2255 | seg_setup(VCPU_SREG_DS); |
| 2054 | seg_setup(VCPU_SREG_ES); | 2256 | seg_setup(VCPU_SREG_ES); |
| @@ -2072,10 +2274,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2072 | 2274 | ||
| 2073 | vmcs_writel(GUEST_RFLAGS, 0x02); | 2275 | vmcs_writel(GUEST_RFLAGS, 0x02); |
| 2074 | if (vmx->vcpu.vcpu_id == 0) | 2276 | if (vmx->vcpu.vcpu_id == 0) |
| 2075 | vmcs_writel(GUEST_RIP, 0xfff0); | 2277 | kvm_rip_write(vcpu, 0xfff0); |
| 2076 | else | 2278 | else |
| 2077 | vmcs_writel(GUEST_RIP, 0); | 2279 | kvm_rip_write(vcpu, 0); |
| 2078 | vmcs_writel(GUEST_RSP, 0); | 2280 | kvm_register_write(vcpu, VCPU_REGS_RSP, 0); |
| 2079 | 2281 | ||
| 2080 | /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */ | 2282 | /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */ |
| 2081 | vmcs_writel(GUEST_DR7, 0x400); | 2283 | vmcs_writel(GUEST_DR7, 0x400); |
| @@ -2125,6 +2327,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2125 | 2327 | ||
| 2126 | ret = 0; | 2328 | ret = 0; |
| 2127 | 2329 | ||
| 2330 | /* HACK: Don't enable emulation on guest boot/reset */ | ||
| 2331 | vmx->emulation_required = 0; | ||
| 2332 | |||
| 2128 | out: | 2333 | out: |
| 2129 | up_read(&vcpu->kvm->slots_lock); | 2334 | up_read(&vcpu->kvm->slots_lock); |
| 2130 | return ret; | 2335 | return ret; |
| @@ -2136,14 +2341,15 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) | |||
| 2136 | 2341 | ||
| 2137 | KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); | 2342 | KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); |
| 2138 | 2343 | ||
| 2344 | ++vcpu->stat.irq_injections; | ||
| 2139 | if (vcpu->arch.rmode.active) { | 2345 | if (vcpu->arch.rmode.active) { |
| 2140 | vmx->rmode.irq.pending = true; | 2346 | vmx->rmode.irq.pending = true; |
| 2141 | vmx->rmode.irq.vector = irq; | 2347 | vmx->rmode.irq.vector = irq; |
| 2142 | vmx->rmode.irq.rip = vmcs_readl(GUEST_RIP); | 2348 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); |
| 2143 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 2349 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
| 2144 | irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK); | 2350 | irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK); |
| 2145 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); | 2351 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); |
| 2146 | vmcs_writel(GUEST_RIP, vmx->rmode.irq.rip - 1); | 2352 | kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); |
| 2147 | return; | 2353 | return; |
| 2148 | } | 2354 | } |
| 2149 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 2355 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
| @@ -2154,7 +2360,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
| 2154 | { | 2360 | { |
| 2155 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 2361 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
| 2156 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 2362 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
| 2157 | vcpu->arch.nmi_pending = 0; | ||
| 2158 | } | 2363 | } |
| 2159 | 2364 | ||
| 2160 | static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) | 2365 | static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) |
| @@ -2166,7 +2371,7 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) | |||
| 2166 | clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); | 2371 | clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); |
| 2167 | if (!vcpu->arch.irq_pending[word_index]) | 2372 | if (!vcpu->arch.irq_pending[word_index]) |
| 2168 | clear_bit(word_index, &vcpu->arch.irq_summary); | 2373 | clear_bit(word_index, &vcpu->arch.irq_summary); |
| 2169 | vmx_inject_irq(vcpu, irq); | 2374 | kvm_queue_interrupt(vcpu, irq); |
| 2170 | } | 2375 | } |
| 2171 | 2376 | ||
| 2172 | 2377 | ||
| @@ -2180,13 +2385,12 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu, | |||
| 2180 | (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); | 2385 | (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); |
| 2181 | 2386 | ||
| 2182 | if (vcpu->arch.interrupt_window_open && | 2387 | if (vcpu->arch.interrupt_window_open && |
| 2183 | vcpu->arch.irq_summary && | 2388 | vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) |
| 2184 | !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK)) | ||
| 2185 | /* | ||
| 2186 | * If interrupts enabled, and not blocked by sti or mov ss. Good. | ||
| 2187 | */ | ||
| 2188 | kvm_do_inject_irq(vcpu); | 2389 | kvm_do_inject_irq(vcpu); |
| 2189 | 2390 | ||
| 2391 | if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending) | ||
| 2392 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); | ||
| 2393 | |||
| 2190 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 2394 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
| 2191 | if (!vcpu->arch.interrupt_window_open && | 2395 | if (!vcpu->arch.interrupt_window_open && |
| 2192 | (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) | 2396 | (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) |
| @@ -2237,9 +2441,6 @@ static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu) | |||
| 2237 | static int handle_rmode_exception(struct kvm_vcpu *vcpu, | 2441 | static int handle_rmode_exception(struct kvm_vcpu *vcpu, |
| 2238 | int vec, u32 err_code) | 2442 | int vec, u32 err_code) |
| 2239 | { | 2443 | { |
| 2240 | if (!vcpu->arch.rmode.active) | ||
| 2241 | return 0; | ||
| 2242 | |||
| 2243 | /* | 2444 | /* |
| 2244 | * Instruction with address size override prefix opcode 0x67 | 2445 | * Instruction with address size override prefix opcode 0x67 |
| 2245 | * Cause the #SS fault with 0 error code in VM86 mode. | 2446 | * Cause the #SS fault with 0 error code in VM86 mode. |
| @@ -2247,6 +2448,25 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
| 2247 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | 2448 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) |
| 2248 | if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE) | 2449 | if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE) |
| 2249 | return 1; | 2450 | return 1; |
| 2451 | /* | ||
| 2452 | * Forward all other exceptions that are valid in real mode. | ||
| 2453 | * FIXME: Breaks guest debugging in real mode, needs to be fixed with | ||
| 2454 | * the required debugging infrastructure rework. | ||
| 2455 | */ | ||
| 2456 | switch (vec) { | ||
| 2457 | case DE_VECTOR: | ||
| 2458 | case DB_VECTOR: | ||
| 2459 | case BP_VECTOR: | ||
| 2460 | case OF_VECTOR: | ||
| 2461 | case BR_VECTOR: | ||
| 2462 | case UD_VECTOR: | ||
| 2463 | case DF_VECTOR: | ||
| 2464 | case SS_VECTOR: | ||
| 2465 | case GP_VECTOR: | ||
| 2466 | case MF_VECTOR: | ||
| 2467 | kvm_queue_exception(vcpu, vec); | ||
| 2468 | return 1; | ||
| 2469 | } | ||
| 2250 | return 0; | 2470 | return 0; |
| 2251 | } | 2471 | } |
| 2252 | 2472 | ||
| @@ -2288,7 +2508,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2288 | } | 2508 | } |
| 2289 | 2509 | ||
| 2290 | error_code = 0; | 2510 | error_code = 0; |
| 2291 | rip = vmcs_readl(GUEST_RIP); | 2511 | rip = kvm_rip_read(vcpu); |
| 2292 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) | 2512 | if (intr_info & INTR_INFO_DELIVER_CODE_MASK) |
| 2293 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 2513 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
| 2294 | if (is_page_fault(intr_info)) { | 2514 | if (is_page_fault(intr_info)) { |
| @@ -2298,7 +2518,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2298 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 2518 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
| 2299 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, | 2519 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, |
| 2300 | (u32)((u64)cr2 >> 32), handler); | 2520 | (u32)((u64)cr2 >> 32), handler); |
| 2301 | if (vect_info & VECTORING_INFO_VALID_MASK) | 2521 | if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending) |
| 2302 | kvm_mmu_unprotect_page_virt(vcpu, cr2); | 2522 | kvm_mmu_unprotect_page_virt(vcpu, cr2); |
| 2303 | return kvm_mmu_page_fault(vcpu, cr2, error_code); | 2523 | return kvm_mmu_page_fault(vcpu, cr2, error_code); |
| 2304 | } | 2524 | } |
| @@ -2386,27 +2606,25 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2386 | reg = (exit_qualification >> 8) & 15; | 2606 | reg = (exit_qualification >> 8) & 15; |
| 2387 | switch ((exit_qualification >> 4) & 3) { | 2607 | switch ((exit_qualification >> 4) & 3) { |
| 2388 | case 0: /* mov to cr */ | 2608 | case 0: /* mov to cr */ |
| 2389 | KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)vcpu->arch.regs[reg], | 2609 | KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, |
| 2390 | (u32)((u64)vcpu->arch.regs[reg] >> 32), handler); | 2610 | (u32)kvm_register_read(vcpu, reg), |
| 2611 | (u32)((u64)kvm_register_read(vcpu, reg) >> 32), | ||
| 2612 | handler); | ||
| 2391 | switch (cr) { | 2613 | switch (cr) { |
| 2392 | case 0: | 2614 | case 0: |
| 2393 | vcpu_load_rsp_rip(vcpu); | 2615 | kvm_set_cr0(vcpu, kvm_register_read(vcpu, reg)); |
| 2394 | kvm_set_cr0(vcpu, vcpu->arch.regs[reg]); | ||
| 2395 | skip_emulated_instruction(vcpu); | 2616 | skip_emulated_instruction(vcpu); |
| 2396 | return 1; | 2617 | return 1; |
| 2397 | case 3: | 2618 | case 3: |
| 2398 | vcpu_load_rsp_rip(vcpu); | 2619 | kvm_set_cr3(vcpu, kvm_register_read(vcpu, reg)); |
| 2399 | kvm_set_cr3(vcpu, vcpu->arch.regs[reg]); | ||
| 2400 | skip_emulated_instruction(vcpu); | 2620 | skip_emulated_instruction(vcpu); |
| 2401 | return 1; | 2621 | return 1; |
| 2402 | case 4: | 2622 | case 4: |
| 2403 | vcpu_load_rsp_rip(vcpu); | 2623 | kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg)); |
| 2404 | kvm_set_cr4(vcpu, vcpu->arch.regs[reg]); | ||
| 2405 | skip_emulated_instruction(vcpu); | 2624 | skip_emulated_instruction(vcpu); |
| 2406 | return 1; | 2625 | return 1; |
| 2407 | case 8: | 2626 | case 8: |
| 2408 | vcpu_load_rsp_rip(vcpu); | 2627 | kvm_set_cr8(vcpu, kvm_register_read(vcpu, reg)); |
| 2409 | kvm_set_cr8(vcpu, vcpu->arch.regs[reg]); | ||
| 2410 | skip_emulated_instruction(vcpu); | 2628 | skip_emulated_instruction(vcpu); |
| 2411 | if (irqchip_in_kernel(vcpu->kvm)) | 2629 | if (irqchip_in_kernel(vcpu->kvm)) |
| 2412 | return 1; | 2630 | return 1; |
| @@ -2415,7 +2633,6 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2415 | }; | 2633 | }; |
| 2416 | break; | 2634 | break; |
| 2417 | case 2: /* clts */ | 2635 | case 2: /* clts */ |
| 2418 | vcpu_load_rsp_rip(vcpu); | ||
| 2419 | vmx_fpu_deactivate(vcpu); | 2636 | vmx_fpu_deactivate(vcpu); |
| 2420 | vcpu->arch.cr0 &= ~X86_CR0_TS; | 2637 | vcpu->arch.cr0 &= ~X86_CR0_TS; |
| 2421 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); | 2638 | vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); |
| @@ -2426,21 +2643,17 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2426 | case 1: /*mov from cr*/ | 2643 | case 1: /*mov from cr*/ |
| 2427 | switch (cr) { | 2644 | switch (cr) { |
| 2428 | case 3: | 2645 | case 3: |
| 2429 | vcpu_load_rsp_rip(vcpu); | 2646 | kvm_register_write(vcpu, reg, vcpu->arch.cr3); |
| 2430 | vcpu->arch.regs[reg] = vcpu->arch.cr3; | ||
| 2431 | vcpu_put_rsp_rip(vcpu); | ||
| 2432 | KVMTRACE_3D(CR_READ, vcpu, (u32)cr, | 2647 | KVMTRACE_3D(CR_READ, vcpu, (u32)cr, |
| 2433 | (u32)vcpu->arch.regs[reg], | 2648 | (u32)kvm_register_read(vcpu, reg), |
| 2434 | (u32)((u64)vcpu->arch.regs[reg] >> 32), | 2649 | (u32)((u64)kvm_register_read(vcpu, reg) >> 32), |
| 2435 | handler); | 2650 | handler); |
| 2436 | skip_emulated_instruction(vcpu); | 2651 | skip_emulated_instruction(vcpu); |
| 2437 | return 1; | 2652 | return 1; |
| 2438 | case 8: | 2653 | case 8: |
| 2439 | vcpu_load_rsp_rip(vcpu); | 2654 | kvm_register_write(vcpu, reg, kvm_get_cr8(vcpu)); |
| 2440 | vcpu->arch.regs[reg] = kvm_get_cr8(vcpu); | ||
| 2441 | vcpu_put_rsp_rip(vcpu); | ||
| 2442 | KVMTRACE_2D(CR_READ, vcpu, (u32)cr, | 2655 | KVMTRACE_2D(CR_READ, vcpu, (u32)cr, |
| 2443 | (u32)vcpu->arch.regs[reg], handler); | 2656 | (u32)kvm_register_read(vcpu, reg), handler); |
| 2444 | skip_emulated_instruction(vcpu); | 2657 | skip_emulated_instruction(vcpu); |
| 2445 | return 1; | 2658 | return 1; |
| 2446 | } | 2659 | } |
| @@ -2472,7 +2685,6 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2472 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 2685 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 2473 | dr = exit_qualification & 7; | 2686 | dr = exit_qualification & 7; |
| 2474 | reg = (exit_qualification >> 8) & 15; | 2687 | reg = (exit_qualification >> 8) & 15; |
| 2475 | vcpu_load_rsp_rip(vcpu); | ||
| 2476 | if (exit_qualification & 16) { | 2688 | if (exit_qualification & 16) { |
| 2477 | /* mov from dr */ | 2689 | /* mov from dr */ |
| 2478 | switch (dr) { | 2690 | switch (dr) { |
| @@ -2485,12 +2697,11 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2485 | default: | 2697 | default: |
| 2486 | val = 0; | 2698 | val = 0; |
| 2487 | } | 2699 | } |
| 2488 | vcpu->arch.regs[reg] = val; | 2700 | kvm_register_write(vcpu, reg, val); |
| 2489 | KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); | 2701 | KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); |
| 2490 | } else { | 2702 | } else { |
| 2491 | /* mov to dr */ | 2703 | /* mov to dr */ |
| 2492 | } | 2704 | } |
| 2493 | vcpu_put_rsp_rip(vcpu); | ||
| 2494 | skip_emulated_instruction(vcpu); | 2705 | skip_emulated_instruction(vcpu); |
| 2495 | return 1; | 2706 | return 1; |
| 2496 | } | 2707 | } |
| @@ -2583,6 +2794,15 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2583 | return 1; | 2794 | return 1; |
| 2584 | } | 2795 | } |
| 2585 | 2796 | ||
| 2797 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
| 2798 | { | ||
| 2799 | u64 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | ||
| 2800 | |||
| 2801 | kvm_mmu_invlpg(vcpu, exit_qualification); | ||
| 2802 | skip_emulated_instruction(vcpu); | ||
| 2803 | return 1; | ||
| 2804 | } | ||
| 2805 | |||
| 2586 | static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2806 | static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 2587 | { | 2807 | { |
| 2588 | skip_emulated_instruction(vcpu); | 2808 | skip_emulated_instruction(vcpu); |
| @@ -2695,6 +2915,43 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2695 | return 1; | 2915 | return 1; |
| 2696 | } | 2916 | } |
| 2697 | 2917 | ||
| 2918 | static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | ||
| 2919 | struct kvm_run *kvm_run) | ||
| 2920 | { | ||
| 2921 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 2922 | int err; | ||
| 2923 | |||
| 2924 | preempt_enable(); | ||
| 2925 | local_irq_enable(); | ||
| 2926 | |||
| 2927 | while (!guest_state_valid(vcpu)) { | ||
| 2928 | err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | ||
| 2929 | |||
| 2930 | switch (err) { | ||
| 2931 | case EMULATE_DONE: | ||
| 2932 | break; | ||
| 2933 | case EMULATE_DO_MMIO: | ||
| 2934 | kvm_report_emulation_failure(vcpu, "mmio"); | ||
| 2935 | /* TODO: Handle MMIO */ | ||
| 2936 | return; | ||
| 2937 | default: | ||
| 2938 | kvm_report_emulation_failure(vcpu, "emulation failure"); | ||
| 2939 | return; | ||
| 2940 | } | ||
| 2941 | |||
| 2942 | if (signal_pending(current)) | ||
| 2943 | break; | ||
| 2944 | if (need_resched()) | ||
| 2945 | schedule(); | ||
| 2946 | } | ||
| 2947 | |||
| 2948 | local_irq_disable(); | ||
| 2949 | preempt_disable(); | ||
| 2950 | |||
| 2951 | /* Guest state should be valid now, no more emulation should be needed */ | ||
| 2952 | vmx->emulation_required = 0; | ||
| 2953 | } | ||
| 2954 | |||
| 2698 | /* | 2955 | /* |
| 2699 | * The exit handlers return 1 if the exit was handled fully and guest execution | 2956 | * The exit handlers return 1 if the exit was handled fully and guest execution |
| 2700 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 2957 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
| @@ -2714,6 +2971,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
| 2714 | [EXIT_REASON_MSR_WRITE] = handle_wrmsr, | 2971 | [EXIT_REASON_MSR_WRITE] = handle_wrmsr, |
| 2715 | [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, | 2972 | [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, |
| 2716 | [EXIT_REASON_HLT] = handle_halt, | 2973 | [EXIT_REASON_HLT] = handle_halt, |
| 2974 | [EXIT_REASON_INVLPG] = handle_invlpg, | ||
| 2717 | [EXIT_REASON_VMCALL] = handle_vmcall, | 2975 | [EXIT_REASON_VMCALL] = handle_vmcall, |
| 2718 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, | 2976 | [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, |
| 2719 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, | 2977 | [EXIT_REASON_APIC_ACCESS] = handle_apic_access, |
| @@ -2735,8 +2993,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 2735 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2993 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 2736 | u32 vectoring_info = vmx->idt_vectoring_info; | 2994 | u32 vectoring_info = vmx->idt_vectoring_info; |
| 2737 | 2995 | ||
| 2738 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), | 2996 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), |
| 2739 | (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); | 2997 | (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); |
| 2740 | 2998 | ||
| 2741 | /* Access CR3 don't cause VMExit in paging mode, so we need | 2999 | /* Access CR3 don't cause VMExit in paging mode, so we need |
| 2742 | * to sync with guest real CR3. */ | 3000 | * to sync with guest real CR3. */ |
| @@ -2829,88 +3087,92 @@ static void enable_intr_window(struct kvm_vcpu *vcpu) | |||
| 2829 | enable_irq_window(vcpu); | 3087 | enable_irq_window(vcpu); |
| 2830 | } | 3088 | } |
| 2831 | 3089 | ||
| 2832 | static void vmx_intr_assist(struct kvm_vcpu *vcpu) | 3090 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) |
| 2833 | { | 3091 | { |
| 2834 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3092 | u32 exit_intr_info; |
| 2835 | u32 idtv_info_field, intr_info_field, exit_intr_info_field; | 3093 | u32 idt_vectoring_info; |
| 2836 | int vector; | 3094 | bool unblock_nmi; |
| 3095 | u8 vector; | ||
| 3096 | int type; | ||
| 3097 | bool idtv_info_valid; | ||
| 3098 | u32 error; | ||
| 2837 | 3099 | ||
| 2838 | update_tpr_threshold(vcpu); | 3100 | exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
| 2839 | 3101 | if (cpu_has_virtual_nmis()) { | |
| 2840 | intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); | 3102 | unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; |
| 2841 | exit_intr_info_field = vmcs_read32(VM_EXIT_INTR_INFO); | 3103 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; |
| 2842 | idtv_info_field = vmx->idt_vectoring_info; | 3104 | /* |
| 2843 | if (intr_info_field & INTR_INFO_VALID_MASK) { | 3105 | * SDM 3: 25.7.1.2 |
| 2844 | if (idtv_info_field & INTR_INFO_VALID_MASK) { | 3106 | * Re-set bit "block by NMI" before VM entry if vmexit caused by |
| 2845 | /* TODO: fault when IDT_Vectoring */ | 3107 | * a guest IRET fault. |
| 2846 | if (printk_ratelimit()) | 3108 | */ |
| 2847 | printk(KERN_ERR "Fault when IDT_Vectoring\n"); | 3109 | if (unblock_nmi && vector != DF_VECTOR) |
| 2848 | } | 3110 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
| 2849 | enable_intr_window(vcpu); | 3111 | GUEST_INTR_STATE_NMI); |
| 2850 | return; | ||
| 2851 | } | 3112 | } |
| 2852 | if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) { | ||
| 2853 | if ((idtv_info_field & VECTORING_INFO_TYPE_MASK) | ||
| 2854 | == INTR_TYPE_EXT_INTR | ||
| 2855 | && vcpu->arch.rmode.active) { | ||
| 2856 | u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK; | ||
| 2857 | |||
| 2858 | vmx_inject_irq(vcpu, vect); | ||
| 2859 | enable_intr_window(vcpu); | ||
| 2860 | return; | ||
| 2861 | } | ||
| 2862 | |||
| 2863 | KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler); | ||
| 2864 | 3113 | ||
| 3114 | idt_vectoring_info = vmx->idt_vectoring_info; | ||
| 3115 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | ||
| 3116 | vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; | ||
| 3117 | type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; | ||
| 3118 | if (vmx->vcpu.arch.nmi_injected) { | ||
| 2865 | /* | 3119 | /* |
| 2866 | * SDM 3: 25.7.1.2 | 3120 | * SDM 3: 25.7.1.2 |
| 2867 | * Clear bit "block by NMI" before VM entry if a NMI delivery | 3121 | * Clear bit "block by NMI" before VM entry if a NMI delivery |
| 2868 | * faulted. | 3122 | * faulted. |
| 2869 | */ | 3123 | */ |
| 2870 | if ((idtv_info_field & VECTORING_INFO_TYPE_MASK) | 3124 | if (idtv_info_valid && type == INTR_TYPE_NMI_INTR) |
| 2871 | == INTR_TYPE_NMI_INTR && cpu_has_virtual_nmis()) | 3125 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, |
| 2872 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | 3126 | GUEST_INTR_STATE_NMI); |
| 2873 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 3127 | else |
| 2874 | ~GUEST_INTR_STATE_NMI); | 3128 | vmx->vcpu.arch.nmi_injected = false; |
| 2875 | 3129 | } | |
| 2876 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field | 3130 | kvm_clear_exception_queue(&vmx->vcpu); |
| 2877 | & ~INTR_INFO_RESVD_BITS_MASK); | 3131 | if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) { |
| 2878 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | 3132 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { |
| 2879 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); | 3133 | error = vmcs_read32(IDT_VECTORING_ERROR_CODE); |
| 2880 | 3134 | kvm_queue_exception_e(&vmx->vcpu, vector, error); | |
| 2881 | if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK)) | 3135 | } else |
| 2882 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, | 3136 | kvm_queue_exception(&vmx->vcpu, vector); |
| 2883 | vmcs_read32(IDT_VECTORING_ERROR_CODE)); | 3137 | vmx->idt_vectoring_info = 0; |
| 2884 | enable_intr_window(vcpu); | ||
| 2885 | return; | ||
| 2886 | } | 3138 | } |
| 3139 | kvm_clear_interrupt_queue(&vmx->vcpu); | ||
| 3140 | if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) { | ||
| 3141 | kvm_queue_interrupt(&vmx->vcpu, vector); | ||
| 3142 | vmx->idt_vectoring_info = 0; | ||
| 3143 | } | ||
| 3144 | } | ||
| 3145 | |||
| 3146 | static void vmx_intr_assist(struct kvm_vcpu *vcpu) | ||
| 3147 | { | ||
| 3148 | update_tpr_threshold(vcpu); | ||
| 3149 | |||
| 2887 | if (cpu_has_virtual_nmis()) { | 3150 | if (cpu_has_virtual_nmis()) { |
| 2888 | /* | 3151 | if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { |
| 2889 | * SDM 3: 25.7.1.2 | 3152 | if (vmx_nmi_enabled(vcpu)) { |
| 2890 | * Re-set bit "block by NMI" before VM entry if vmexit caused by | 3153 | vcpu->arch.nmi_pending = false; |
| 2891 | * a guest IRET fault. | 3154 | vcpu->arch.nmi_injected = true; |
| 2892 | */ | 3155 | } else { |
| 2893 | if ((exit_intr_info_field & INTR_INFO_UNBLOCK_NMI) && | 3156 | enable_intr_window(vcpu); |
| 2894 | (exit_intr_info_field & INTR_INFO_VECTOR_MASK) != 8) | 3157 | return; |
| 2895 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | 3158 | } |
| 2896 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) | | 3159 | } |
| 2897 | GUEST_INTR_STATE_NMI); | 3160 | if (vcpu->arch.nmi_injected) { |
| 2898 | else if (vcpu->arch.nmi_pending) { | 3161 | vmx_inject_nmi(vcpu); |
| 2899 | if (vmx_nmi_enabled(vcpu)) | ||
| 2900 | vmx_inject_nmi(vcpu); | ||
| 2901 | enable_intr_window(vcpu); | 3162 | enable_intr_window(vcpu); |
| 2902 | return; | 3163 | return; |
| 2903 | } | 3164 | } |
| 2904 | |||
| 2905 | } | 3165 | } |
| 2906 | if (!kvm_cpu_has_interrupt(vcpu)) | 3166 | if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { |
| 2907 | return; | 3167 | if (vmx_irq_enabled(vcpu)) |
| 2908 | if (vmx_irq_enabled(vcpu)) { | 3168 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); |
| 2909 | vector = kvm_cpu_get_interrupt(vcpu); | 3169 | else |
| 2910 | vmx_inject_irq(vcpu, vector); | 3170 | enable_irq_window(vcpu); |
| 2911 | kvm_timer_intr_post(vcpu, vector); | 3171 | } |
| 2912 | } else | 3172 | if (vcpu->arch.interrupt.pending) { |
| 2913 | enable_irq_window(vcpu); | 3173 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); |
| 3174 | kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); | ||
| 3175 | } | ||
| 2914 | } | 3176 | } |
| 2915 | 3177 | ||
| 2916 | /* | 3178 | /* |
| @@ -2922,9 +3184,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) | |||
| 2922 | static void fixup_rmode_irq(struct vcpu_vmx *vmx) | 3184 | static void fixup_rmode_irq(struct vcpu_vmx *vmx) |
| 2923 | { | 3185 | { |
| 2924 | vmx->rmode.irq.pending = 0; | 3186 | vmx->rmode.irq.pending = 0; |
| 2925 | if (vmcs_readl(GUEST_RIP) + 1 != vmx->rmode.irq.rip) | 3187 | if (kvm_rip_read(&vmx->vcpu) + 1 != vmx->rmode.irq.rip) |
| 2926 | return; | 3188 | return; |
| 2927 | vmcs_writel(GUEST_RIP, vmx->rmode.irq.rip); | 3189 | kvm_rip_write(&vmx->vcpu, vmx->rmode.irq.rip); |
| 2928 | if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { | 3190 | if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) { |
| 2929 | vmx->idt_vectoring_info &= ~VECTORING_INFO_TYPE_MASK; | 3191 | vmx->idt_vectoring_info &= ~VECTORING_INFO_TYPE_MASK; |
| 2930 | vmx->idt_vectoring_info |= INTR_TYPE_EXT_INTR; | 3192 | vmx->idt_vectoring_info |= INTR_TYPE_EXT_INTR; |
| @@ -2936,11 +3198,30 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) | |||
| 2936 | | vmx->rmode.irq.vector; | 3198 | | vmx->rmode.irq.vector; |
| 2937 | } | 3199 | } |
| 2938 | 3200 | ||
| 3201 | #ifdef CONFIG_X86_64 | ||
| 3202 | #define R "r" | ||
| 3203 | #define Q "q" | ||
| 3204 | #else | ||
| 3205 | #define R "e" | ||
| 3206 | #define Q "l" | ||
| 3207 | #endif | ||
| 3208 | |||
| 2939 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3209 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 2940 | { | 3210 | { |
| 2941 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3211 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 2942 | u32 intr_info; | 3212 | u32 intr_info; |
| 2943 | 3213 | ||
| 3214 | /* Handle invalid guest state instead of entering VMX */ | ||
| 3215 | if (vmx->emulation_required && emulate_invalid_guest_state) { | ||
| 3216 | handle_invalid_guest_state(vcpu, kvm_run); | ||
| 3217 | return; | ||
| 3218 | } | ||
| 3219 | |||
| 3220 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | ||
| 3221 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); | ||
| 3222 | if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) | ||
| 3223 | vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); | ||
| 3224 | |||
| 2944 | /* | 3225 | /* |
| 2945 | * Loading guest fpu may have cleared host cr0.ts | 3226 | * Loading guest fpu may have cleared host cr0.ts |
| 2946 | */ | 3227 | */ |
| @@ -2948,26 +3229,25 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2948 | 3229 | ||
| 2949 | asm( | 3230 | asm( |
| 2950 | /* Store host registers */ | 3231 | /* Store host registers */ |
| 2951 | #ifdef CONFIG_X86_64 | 3232 | "push %%"R"dx; push %%"R"bp;" |
| 2952 | "push %%rdx; push %%rbp;" | 3233 | "push %%"R"cx \n\t" |
| 2953 | "push %%rcx \n\t" | 3234 | "cmp %%"R"sp, %c[host_rsp](%0) \n\t" |
| 2954 | #else | 3235 | "je 1f \n\t" |
| 2955 | "push %%edx; push %%ebp;" | 3236 | "mov %%"R"sp, %c[host_rsp](%0) \n\t" |
| 2956 | "push %%ecx \n\t" | ||
| 2957 | #endif | ||
| 2958 | __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" | 3237 | __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" |
| 3238 | "1: \n\t" | ||
| 2959 | /* Check if vmlaunch of vmresume is needed */ | 3239 | /* Check if vmlaunch of vmresume is needed */ |
| 2960 | "cmpl $0, %c[launched](%0) \n\t" | 3240 | "cmpl $0, %c[launched](%0) \n\t" |
| 2961 | /* Load guest registers. Don't clobber flags. */ | 3241 | /* Load guest registers. Don't clobber flags. */ |
| 3242 | "mov %c[cr2](%0), %%"R"ax \n\t" | ||
| 3243 | "mov %%"R"ax, %%cr2 \n\t" | ||
| 3244 | "mov %c[rax](%0), %%"R"ax \n\t" | ||
| 3245 | "mov %c[rbx](%0), %%"R"bx \n\t" | ||
| 3246 | "mov %c[rdx](%0), %%"R"dx \n\t" | ||
| 3247 | "mov %c[rsi](%0), %%"R"si \n\t" | ||
| 3248 | "mov %c[rdi](%0), %%"R"di \n\t" | ||
| 3249 | "mov %c[rbp](%0), %%"R"bp \n\t" | ||
| 2962 | #ifdef CONFIG_X86_64 | 3250 | #ifdef CONFIG_X86_64 |
| 2963 | "mov %c[cr2](%0), %%rax \n\t" | ||
| 2964 | "mov %%rax, %%cr2 \n\t" | ||
| 2965 | "mov %c[rax](%0), %%rax \n\t" | ||
| 2966 | "mov %c[rbx](%0), %%rbx \n\t" | ||
| 2967 | "mov %c[rdx](%0), %%rdx \n\t" | ||
| 2968 | "mov %c[rsi](%0), %%rsi \n\t" | ||
| 2969 | "mov %c[rdi](%0), %%rdi \n\t" | ||
| 2970 | "mov %c[rbp](%0), %%rbp \n\t" | ||
| 2971 | "mov %c[r8](%0), %%r8 \n\t" | 3251 | "mov %c[r8](%0), %%r8 \n\t" |
| 2972 | "mov %c[r9](%0), %%r9 \n\t" | 3252 | "mov %c[r9](%0), %%r9 \n\t" |
| 2973 | "mov %c[r10](%0), %%r10 \n\t" | 3253 | "mov %c[r10](%0), %%r10 \n\t" |
| @@ -2976,18 +3256,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2976 | "mov %c[r13](%0), %%r13 \n\t" | 3256 | "mov %c[r13](%0), %%r13 \n\t" |
| 2977 | "mov %c[r14](%0), %%r14 \n\t" | 3257 | "mov %c[r14](%0), %%r14 \n\t" |
| 2978 | "mov %c[r15](%0), %%r15 \n\t" | 3258 | "mov %c[r15](%0), %%r15 \n\t" |
| 2979 | "mov %c[rcx](%0), %%rcx \n\t" /* kills %0 (rcx) */ | ||
| 2980 | #else | ||
| 2981 | "mov %c[cr2](%0), %%eax \n\t" | ||
| 2982 | "mov %%eax, %%cr2 \n\t" | ||
| 2983 | "mov %c[rax](%0), %%eax \n\t" | ||
| 2984 | "mov %c[rbx](%0), %%ebx \n\t" | ||
| 2985 | "mov %c[rdx](%0), %%edx \n\t" | ||
| 2986 | "mov %c[rsi](%0), %%esi \n\t" | ||
| 2987 | "mov %c[rdi](%0), %%edi \n\t" | ||
| 2988 | "mov %c[rbp](%0), %%ebp \n\t" | ||
| 2989 | "mov %c[rcx](%0), %%ecx \n\t" /* kills %0 (ecx) */ | ||
| 2990 | #endif | 3259 | #endif |
| 3260 | "mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */ | ||
| 3261 | |||
| 2991 | /* Enter guest mode */ | 3262 | /* Enter guest mode */ |
| 2992 | "jne .Llaunched \n\t" | 3263 | "jne .Llaunched \n\t" |
| 2993 | __ex(ASM_VMX_VMLAUNCH) "\n\t" | 3264 | __ex(ASM_VMX_VMLAUNCH) "\n\t" |
| @@ -2995,15 +3266,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2995 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" | 3266 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" |
| 2996 | ".Lkvm_vmx_return: " | 3267 | ".Lkvm_vmx_return: " |
| 2997 | /* Save guest registers, load host registers, keep flags */ | 3268 | /* Save guest registers, load host registers, keep flags */ |
| 3269 | "xchg %0, (%%"R"sp) \n\t" | ||
| 3270 | "mov %%"R"ax, %c[rax](%0) \n\t" | ||
| 3271 | "mov %%"R"bx, %c[rbx](%0) \n\t" | ||
| 3272 | "push"Q" (%%"R"sp); pop"Q" %c[rcx](%0) \n\t" | ||
| 3273 | "mov %%"R"dx, %c[rdx](%0) \n\t" | ||
| 3274 | "mov %%"R"si, %c[rsi](%0) \n\t" | ||
| 3275 | "mov %%"R"di, %c[rdi](%0) \n\t" | ||
| 3276 | "mov %%"R"bp, %c[rbp](%0) \n\t" | ||
| 2998 | #ifdef CONFIG_X86_64 | 3277 | #ifdef CONFIG_X86_64 |
| 2999 | "xchg %0, (%%rsp) \n\t" | ||
| 3000 | "mov %%rax, %c[rax](%0) \n\t" | ||
| 3001 | "mov %%rbx, %c[rbx](%0) \n\t" | ||
| 3002 | "pushq (%%rsp); popq %c[rcx](%0) \n\t" | ||
| 3003 | "mov %%rdx, %c[rdx](%0) \n\t" | ||
| 3004 | "mov %%rsi, %c[rsi](%0) \n\t" | ||
| 3005 | "mov %%rdi, %c[rdi](%0) \n\t" | ||
| 3006 | "mov %%rbp, %c[rbp](%0) \n\t" | ||
| 3007 | "mov %%r8, %c[r8](%0) \n\t" | 3278 | "mov %%r8, %c[r8](%0) \n\t" |
| 3008 | "mov %%r9, %c[r9](%0) \n\t" | 3279 | "mov %%r9, %c[r9](%0) \n\t" |
| 3009 | "mov %%r10, %c[r10](%0) \n\t" | 3280 | "mov %%r10, %c[r10](%0) \n\t" |
| @@ -3012,28 +3283,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3012 | "mov %%r13, %c[r13](%0) \n\t" | 3283 | "mov %%r13, %c[r13](%0) \n\t" |
| 3013 | "mov %%r14, %c[r14](%0) \n\t" | 3284 | "mov %%r14, %c[r14](%0) \n\t" |
| 3014 | "mov %%r15, %c[r15](%0) \n\t" | 3285 | "mov %%r15, %c[r15](%0) \n\t" |
| 3015 | "mov %%cr2, %%rax \n\t" | ||
| 3016 | "mov %%rax, %c[cr2](%0) \n\t" | ||
| 3017 | |||
| 3018 | "pop %%rbp; pop %%rbp; pop %%rdx \n\t" | ||
| 3019 | #else | ||
| 3020 | "xchg %0, (%%esp) \n\t" | ||
| 3021 | "mov %%eax, %c[rax](%0) \n\t" | ||
| 3022 | "mov %%ebx, %c[rbx](%0) \n\t" | ||
| 3023 | "pushl (%%esp); popl %c[rcx](%0) \n\t" | ||
| 3024 | "mov %%edx, %c[rdx](%0) \n\t" | ||
| 3025 | "mov %%esi, %c[rsi](%0) \n\t" | ||
| 3026 | "mov %%edi, %c[rdi](%0) \n\t" | ||
| 3027 | "mov %%ebp, %c[rbp](%0) \n\t" | ||
| 3028 | "mov %%cr2, %%eax \n\t" | ||
| 3029 | "mov %%eax, %c[cr2](%0) \n\t" | ||
| 3030 | |||
| 3031 | "pop %%ebp; pop %%ebp; pop %%edx \n\t" | ||
| 3032 | #endif | 3286 | #endif |
| 3287 | "mov %%cr2, %%"R"ax \n\t" | ||
| 3288 | "mov %%"R"ax, %c[cr2](%0) \n\t" | ||
| 3289 | |||
| 3290 | "pop %%"R"bp; pop %%"R"bp; pop %%"R"dx \n\t" | ||
| 3033 | "setbe %c[fail](%0) \n\t" | 3291 | "setbe %c[fail](%0) \n\t" |
| 3034 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), | 3292 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), |
| 3035 | [launched]"i"(offsetof(struct vcpu_vmx, launched)), | 3293 | [launched]"i"(offsetof(struct vcpu_vmx, launched)), |
| 3036 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), | 3294 | [fail]"i"(offsetof(struct vcpu_vmx, fail)), |
| 3295 | [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), | ||
| 3037 | [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), | 3296 | [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), |
| 3038 | [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])), | 3297 | [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])), |
| 3039 | [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])), | 3298 | [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])), |
| @@ -3053,14 +3312,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3053 | #endif | 3312 | #endif |
| 3054 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) | 3313 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) |
| 3055 | : "cc", "memory" | 3314 | : "cc", "memory" |
| 3315 | , R"bx", R"di", R"si" | ||
| 3056 | #ifdef CONFIG_X86_64 | 3316 | #ifdef CONFIG_X86_64 |
| 3057 | , "rbx", "rdi", "rsi" | ||
| 3058 | , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" | 3317 | , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" |
| 3059 | #else | ||
| 3060 | , "ebx", "edi", "rsi" | ||
| 3061 | #endif | 3318 | #endif |
| 3062 | ); | 3319 | ); |
| 3063 | 3320 | ||
| 3321 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | ||
| 3322 | vcpu->arch.regs_dirty = 0; | ||
| 3323 | |||
| 3064 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | 3324 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); |
| 3065 | if (vmx->rmode.irq.pending) | 3325 | if (vmx->rmode.irq.pending) |
| 3066 | fixup_rmode_irq(vmx); | 3326 | fixup_rmode_irq(vmx); |
| @@ -3080,8 +3340,13 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3080 | KVMTRACE_0D(NMI, vcpu, handler); | 3340 | KVMTRACE_0D(NMI, vcpu, handler); |
| 3081 | asm("int $2"); | 3341 | asm("int $2"); |
| 3082 | } | 3342 | } |
| 3343 | |||
| 3344 | vmx_complete_interrupts(vmx); | ||
| 3083 | } | 3345 | } |
| 3084 | 3346 | ||
| 3347 | #undef R | ||
| 3348 | #undef Q | ||
| 3349 | |||
| 3085 | static void vmx_free_vmcs(struct kvm_vcpu *vcpu) | 3350 | static void vmx_free_vmcs(struct kvm_vcpu *vcpu) |
| 3086 | { | 3351 | { |
| 3087 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3352 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| @@ -3224,8 +3489,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 3224 | .set_idt = vmx_set_idt, | 3489 | .set_idt = vmx_set_idt, |
| 3225 | .get_gdt = vmx_get_gdt, | 3490 | .get_gdt = vmx_get_gdt, |
| 3226 | .set_gdt = vmx_set_gdt, | 3491 | .set_gdt = vmx_set_gdt, |
| 3227 | .cache_regs = vcpu_load_rsp_rip, | 3492 | .cache_reg = vmx_cache_reg, |
| 3228 | .decache_regs = vcpu_put_rsp_rip, | ||
| 3229 | .get_rflags = vmx_get_rflags, | 3493 | .get_rflags = vmx_get_rflags, |
| 3230 | .set_rflags = vmx_set_rflags, | 3494 | .set_rflags = vmx_set_rflags, |
| 3231 | 3495 | ||
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 17e25995b65b..3e010d21fdd7 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h | |||
| @@ -331,9 +331,6 @@ enum vmcs_field { | |||
| 331 | 331 | ||
| 332 | #define AR_RESERVD_MASK 0xfffe0f00 | 332 | #define AR_RESERVD_MASK 0xfffe0f00 |
| 333 | 333 | ||
| 334 | #define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 | ||
| 335 | #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 | ||
| 336 | |||
| 337 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 | 334 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 |
| 338 | #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 | 335 | #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 |
| 339 | 336 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 19afbb644c7f..4f0677d1eae8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -4,10 +4,14 @@ | |||
| 4 | * derived from drivers/kvm/kvm_main.c | 4 | * derived from drivers/kvm/kvm_main.c |
| 5 | * | 5 | * |
| 6 | * Copyright (C) 2006 Qumranet, Inc. | 6 | * Copyright (C) 2006 Qumranet, Inc. |
| 7 | * Copyright (C) 2008 Qumranet, Inc. | ||
| 8 | * Copyright IBM Corporation, 2008 | ||
| 7 | * | 9 | * |
| 8 | * Authors: | 10 | * Authors: |
| 9 | * Avi Kivity <avi@qumranet.com> | 11 | * Avi Kivity <avi@qumranet.com> |
| 10 | * Yaniv Kamay <yaniv@qumranet.com> | 12 | * Yaniv Kamay <yaniv@qumranet.com> |
| 13 | * Amit Shah <amit.shah@qumranet.com> | ||
| 14 | * Ben-Ami Yassour <benami@il.ibm.com> | ||
| 11 | * | 15 | * |
| 12 | * This work is licensed under the terms of the GNU GPL, version 2. See | 16 | * This work is licensed under the terms of the GNU GPL, version 2. See |
| 13 | * the COPYING file in the top-level directory. | 17 | * the COPYING file in the top-level directory. |
| @@ -19,14 +23,18 @@ | |||
| 19 | #include "mmu.h" | 23 | #include "mmu.h" |
| 20 | #include "i8254.h" | 24 | #include "i8254.h" |
| 21 | #include "tss.h" | 25 | #include "tss.h" |
| 26 | #include "kvm_cache_regs.h" | ||
| 27 | #include "x86.h" | ||
| 22 | 28 | ||
| 23 | #include <linux/clocksource.h> | 29 | #include <linux/clocksource.h> |
| 30 | #include <linux/interrupt.h> | ||
| 24 | #include <linux/kvm.h> | 31 | #include <linux/kvm.h> |
| 25 | #include <linux/fs.h> | 32 | #include <linux/fs.h> |
| 26 | #include <linux/vmalloc.h> | 33 | #include <linux/vmalloc.h> |
| 27 | #include <linux/module.h> | 34 | #include <linux/module.h> |
| 28 | #include <linux/mman.h> | 35 | #include <linux/mman.h> |
| 29 | #include <linux/highmem.h> | 36 | #include <linux/highmem.h> |
| 37 | #include <linux/intel-iommu.h> | ||
| 30 | 38 | ||
| 31 | #include <asm/uaccess.h> | 39 | #include <asm/uaccess.h> |
| 32 | #include <asm/msr.h> | 40 | #include <asm/msr.h> |
| @@ -61,6 +69,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | |||
| 61 | struct kvm_cpuid_entry2 __user *entries); | 69 | struct kvm_cpuid_entry2 __user *entries); |
| 62 | 70 | ||
| 63 | struct kvm_x86_ops *kvm_x86_ops; | 71 | struct kvm_x86_ops *kvm_x86_ops; |
| 72 | EXPORT_SYMBOL_GPL(kvm_x86_ops); | ||
| 64 | 73 | ||
| 65 | struct kvm_stats_debugfs_item debugfs_entries[] = { | 74 | struct kvm_stats_debugfs_item debugfs_entries[] = { |
| 66 | { "pf_fixed", VCPU_STAT(pf_fixed) }, | 75 | { "pf_fixed", VCPU_STAT(pf_fixed) }, |
| @@ -83,6 +92,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 83 | { "fpu_reload", VCPU_STAT(fpu_reload) }, | 92 | { "fpu_reload", VCPU_STAT(fpu_reload) }, |
| 84 | { "insn_emulation", VCPU_STAT(insn_emulation) }, | 93 | { "insn_emulation", VCPU_STAT(insn_emulation) }, |
| 85 | { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, | 94 | { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, |
| 95 | { "irq_injections", VCPU_STAT(irq_injections) }, | ||
| 86 | { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, | 96 | { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, |
| 87 | { "mmu_pte_write", VM_STAT(mmu_pte_write) }, | 97 | { "mmu_pte_write", VM_STAT(mmu_pte_write) }, |
| 88 | { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, | 98 | { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, |
| @@ -90,12 +100,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 90 | { "mmu_flooded", VM_STAT(mmu_flooded) }, | 100 | { "mmu_flooded", VM_STAT(mmu_flooded) }, |
| 91 | { "mmu_recycled", VM_STAT(mmu_recycled) }, | 101 | { "mmu_recycled", VM_STAT(mmu_recycled) }, |
| 92 | { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, | 102 | { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, |
| 103 | { "mmu_unsync", VM_STAT(mmu_unsync) }, | ||
| 93 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, | 104 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, |
| 94 | { "largepages", VM_STAT(lpages) }, | 105 | { "largepages", VM_STAT(lpages) }, |
| 95 | { NULL } | 106 | { NULL } |
| 96 | }; | 107 | }; |
| 97 | 108 | ||
| 98 | |||
| 99 | unsigned long segment_base(u16 selector) | 109 | unsigned long segment_base(u16 selector) |
| 100 | { | 110 | { |
| 101 | struct descriptor_table gdt; | 111 | struct descriptor_table gdt; |
| @@ -352,6 +362,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); | |||
| 352 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | 362 | void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) |
| 353 | { | 363 | { |
| 354 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { | 364 | if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) { |
| 365 | kvm_mmu_sync_roots(vcpu); | ||
| 355 | kvm_mmu_flush_tlb(vcpu); | 366 | kvm_mmu_flush_tlb(vcpu); |
| 356 | return; | 367 | return; |
| 357 | } | 368 | } |
| @@ -662,6 +673,18 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 662 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", | 673 | pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n", |
| 663 | __func__, data); | 674 | __func__, data); |
| 664 | break; | 675 | break; |
| 676 | case MSR_IA32_DEBUGCTLMSR: | ||
| 677 | if (!data) { | ||
| 678 | /* We support the non-activated case already */ | ||
| 679 | break; | ||
| 680 | } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) { | ||
| 681 | /* Values other than LBR and BTF are vendor-specific, | ||
| 682 | thus reserved and should throw a #GP */ | ||
| 683 | return 1; | ||
| 684 | } | ||
| 685 | pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", | ||
| 686 | __func__, data); | ||
| 687 | break; | ||
| 665 | case MSR_IA32_UCODE_REV: | 688 | case MSR_IA32_UCODE_REV: |
| 666 | case MSR_IA32_UCODE_WRITE: | 689 | case MSR_IA32_UCODE_WRITE: |
| 667 | break; | 690 | break; |
| @@ -692,10 +715,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 692 | /* ...but clean it before doing the actual write */ | 715 | /* ...but clean it before doing the actual write */ |
| 693 | vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); | 716 | vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); |
| 694 | 717 | ||
| 695 | down_read(¤t->mm->mmap_sem); | ||
| 696 | vcpu->arch.time_page = | 718 | vcpu->arch.time_page = |
| 697 | gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); | 719 | gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); |
| 698 | up_read(¤t->mm->mmap_sem); | ||
| 699 | 720 | ||
| 700 | if (is_error_page(vcpu->arch.time_page)) { | 721 | if (is_error_page(vcpu->arch.time_page)) { |
| 701 | kvm_release_page_clean(vcpu->arch.time_page); | 722 | kvm_release_page_clean(vcpu->arch.time_page); |
| @@ -752,8 +773,14 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
| 752 | case MSR_IA32_MC0_MISC+8: | 773 | case MSR_IA32_MC0_MISC+8: |
| 753 | case MSR_IA32_MC0_MISC+12: | 774 | case MSR_IA32_MC0_MISC+12: |
| 754 | case MSR_IA32_MC0_MISC+16: | 775 | case MSR_IA32_MC0_MISC+16: |
| 776 | case MSR_IA32_MC0_MISC+20: | ||
| 755 | case MSR_IA32_UCODE_REV: | 777 | case MSR_IA32_UCODE_REV: |
| 756 | case MSR_IA32_EBL_CR_POWERON: | 778 | case MSR_IA32_EBL_CR_POWERON: |
| 779 | case MSR_IA32_DEBUGCTLMSR: | ||
| 780 | case MSR_IA32_LASTBRANCHFROMIP: | ||
| 781 | case MSR_IA32_LASTBRANCHTOIP: | ||
| 782 | case MSR_IA32_LASTINTFROMIP: | ||
| 783 | case MSR_IA32_LASTINTTOIP: | ||
| 757 | data = 0; | 784 | data = 0; |
| 758 | break; | 785 | break; |
| 759 | case MSR_MTRRcap: | 786 | case MSR_MTRRcap: |
| @@ -901,6 +928,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 901 | case KVM_CAP_PV_MMU: | 928 | case KVM_CAP_PV_MMU: |
| 902 | r = !tdp_enabled; | 929 | r = !tdp_enabled; |
| 903 | break; | 930 | break; |
| 931 | case KVM_CAP_IOMMU: | ||
| 932 | r = intel_iommu_found(); | ||
| 933 | break; | ||
| 904 | default: | 934 | default: |
| 905 | r = 0; | 935 | r = 0; |
| 906 | break; | 936 | break; |
| @@ -1303,28 +1333,33 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 1303 | struct kvm_vcpu *vcpu = filp->private_data; | 1333 | struct kvm_vcpu *vcpu = filp->private_data; |
| 1304 | void __user *argp = (void __user *)arg; | 1334 | void __user *argp = (void __user *)arg; |
| 1305 | int r; | 1335 | int r; |
| 1336 | struct kvm_lapic_state *lapic = NULL; | ||
| 1306 | 1337 | ||
| 1307 | switch (ioctl) { | 1338 | switch (ioctl) { |
| 1308 | case KVM_GET_LAPIC: { | 1339 | case KVM_GET_LAPIC: { |
| 1309 | struct kvm_lapic_state lapic; | 1340 | lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
| 1310 | 1341 | ||
| 1311 | memset(&lapic, 0, sizeof lapic); | 1342 | r = -ENOMEM; |
| 1312 | r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic); | 1343 | if (!lapic) |
| 1344 | goto out; | ||
| 1345 | r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic); | ||
| 1313 | if (r) | 1346 | if (r) |
| 1314 | goto out; | 1347 | goto out; |
| 1315 | r = -EFAULT; | 1348 | r = -EFAULT; |
| 1316 | if (copy_to_user(argp, &lapic, sizeof lapic)) | 1349 | if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state))) |
| 1317 | goto out; | 1350 | goto out; |
| 1318 | r = 0; | 1351 | r = 0; |
| 1319 | break; | 1352 | break; |
| 1320 | } | 1353 | } |
| 1321 | case KVM_SET_LAPIC: { | 1354 | case KVM_SET_LAPIC: { |
| 1322 | struct kvm_lapic_state lapic; | 1355 | lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
| 1323 | 1356 | r = -ENOMEM; | |
| 1357 | if (!lapic) | ||
| 1358 | goto out; | ||
| 1324 | r = -EFAULT; | 1359 | r = -EFAULT; |
| 1325 | if (copy_from_user(&lapic, argp, sizeof lapic)) | 1360 | if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state))) |
| 1326 | goto out; | 1361 | goto out; |
| 1327 | r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);; | 1362 | r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic); |
| 1328 | if (r) | 1363 | if (r) |
| 1329 | goto out; | 1364 | goto out; |
| 1330 | r = 0; | 1365 | r = 0; |
| @@ -1422,6 +1457,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 1422 | r = -EINVAL; | 1457 | r = -EINVAL; |
| 1423 | } | 1458 | } |
| 1424 | out: | 1459 | out: |
| 1460 | if (lapic) | ||
| 1461 | kfree(lapic); | ||
| 1425 | return r; | 1462 | return r; |
| 1426 | } | 1463 | } |
| 1427 | 1464 | ||
| @@ -1630,6 +1667,15 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 1630 | struct kvm *kvm = filp->private_data; | 1667 | struct kvm *kvm = filp->private_data; |
| 1631 | void __user *argp = (void __user *)arg; | 1668 | void __user *argp = (void __user *)arg; |
| 1632 | int r = -EINVAL; | 1669 | int r = -EINVAL; |
| 1670 | /* | ||
| 1671 | * This union makes it completely explicit to gcc-3.x | ||
| 1672 | * that these two variables' stack usage should be | ||
| 1673 | * combined, not added together. | ||
| 1674 | */ | ||
| 1675 | union { | ||
| 1676 | struct kvm_pit_state ps; | ||
| 1677 | struct kvm_memory_alias alias; | ||
| 1678 | } u; | ||
| 1633 | 1679 | ||
| 1634 | switch (ioctl) { | 1680 | switch (ioctl) { |
| 1635 | case KVM_SET_TSS_ADDR: | 1681 | case KVM_SET_TSS_ADDR: |
| @@ -1661,17 +1707,14 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 1661 | case KVM_GET_NR_MMU_PAGES: | 1707 | case KVM_GET_NR_MMU_PAGES: |
| 1662 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); | 1708 | r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); |
| 1663 | break; | 1709 | break; |
| 1664 | case KVM_SET_MEMORY_ALIAS: { | 1710 | case KVM_SET_MEMORY_ALIAS: |
| 1665 | struct kvm_memory_alias alias; | ||
| 1666 | |||
| 1667 | r = -EFAULT; | 1711 | r = -EFAULT; |
| 1668 | if (copy_from_user(&alias, argp, sizeof alias)) | 1712 | if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias))) |
| 1669 | goto out; | 1713 | goto out; |
| 1670 | r = kvm_vm_ioctl_set_memory_alias(kvm, &alias); | 1714 | r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias); |
| 1671 | if (r) | 1715 | if (r) |
| 1672 | goto out; | 1716 | goto out; |
| 1673 | break; | 1717 | break; |
| 1674 | } | ||
| 1675 | case KVM_CREATE_IRQCHIP: | 1718 | case KVM_CREATE_IRQCHIP: |
| 1676 | r = -ENOMEM; | 1719 | r = -ENOMEM; |
| 1677 | kvm->arch.vpic = kvm_create_pic(kvm); | 1720 | kvm->arch.vpic = kvm_create_pic(kvm); |
| @@ -1699,13 +1742,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 1699 | goto out; | 1742 | goto out; |
| 1700 | if (irqchip_in_kernel(kvm)) { | 1743 | if (irqchip_in_kernel(kvm)) { |
| 1701 | mutex_lock(&kvm->lock); | 1744 | mutex_lock(&kvm->lock); |
| 1702 | if (irq_event.irq < 16) | 1745 | kvm_set_irq(kvm, irq_event.irq, irq_event.level); |
| 1703 | kvm_pic_set_irq(pic_irqchip(kvm), | ||
| 1704 | irq_event.irq, | ||
| 1705 | irq_event.level); | ||
| 1706 | kvm_ioapic_set_irq(kvm->arch.vioapic, | ||
| 1707 | irq_event.irq, | ||
| 1708 | irq_event.level); | ||
| 1709 | mutex_unlock(&kvm->lock); | 1746 | mutex_unlock(&kvm->lock); |
| 1710 | r = 0; | 1747 | r = 0; |
| 1711 | } | 1748 | } |
| @@ -1713,65 +1750,77 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 1713 | } | 1750 | } |
| 1714 | case KVM_GET_IRQCHIP: { | 1751 | case KVM_GET_IRQCHIP: { |
| 1715 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | 1752 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ |
| 1716 | struct kvm_irqchip chip; | 1753 | struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL); |
| 1717 | 1754 | ||
| 1718 | r = -EFAULT; | 1755 | r = -ENOMEM; |
| 1719 | if (copy_from_user(&chip, argp, sizeof chip)) | 1756 | if (!chip) |
| 1720 | goto out; | 1757 | goto out; |
| 1758 | r = -EFAULT; | ||
| 1759 | if (copy_from_user(chip, argp, sizeof *chip)) | ||
| 1760 | goto get_irqchip_out; | ||
| 1721 | r = -ENXIO; | 1761 | r = -ENXIO; |
| 1722 | if (!irqchip_in_kernel(kvm)) | 1762 | if (!irqchip_in_kernel(kvm)) |
| 1723 | goto out; | 1763 | goto get_irqchip_out; |
| 1724 | r = kvm_vm_ioctl_get_irqchip(kvm, &chip); | 1764 | r = kvm_vm_ioctl_get_irqchip(kvm, chip); |
| 1725 | if (r) | 1765 | if (r) |
| 1726 | goto out; | 1766 | goto get_irqchip_out; |
| 1727 | r = -EFAULT; | 1767 | r = -EFAULT; |
| 1728 | if (copy_to_user(argp, &chip, sizeof chip)) | 1768 | if (copy_to_user(argp, chip, sizeof *chip)) |
| 1729 | goto out; | 1769 | goto get_irqchip_out; |
| 1730 | r = 0; | 1770 | r = 0; |
| 1771 | get_irqchip_out: | ||
| 1772 | kfree(chip); | ||
| 1773 | if (r) | ||
| 1774 | goto out; | ||
| 1731 | break; | 1775 | break; |
| 1732 | } | 1776 | } |
| 1733 | case KVM_SET_IRQCHIP: { | 1777 | case KVM_SET_IRQCHIP: { |
| 1734 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ | 1778 | /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ |
| 1735 | struct kvm_irqchip chip; | 1779 | struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL); |
| 1736 | 1780 | ||
| 1737 | r = -EFAULT; | 1781 | r = -ENOMEM; |
| 1738 | if (copy_from_user(&chip, argp, sizeof chip)) | 1782 | if (!chip) |
| 1739 | goto out; | 1783 | goto out; |
| 1784 | r = -EFAULT; | ||
| 1785 | if (copy_from_user(chip, argp, sizeof *chip)) | ||
| 1786 | goto set_irqchip_out; | ||
| 1740 | r = -ENXIO; | 1787 | r = -ENXIO; |
| 1741 | if (!irqchip_in_kernel(kvm)) | 1788 | if (!irqchip_in_kernel(kvm)) |
| 1742 | goto out; | 1789 | goto set_irqchip_out; |
| 1743 | r = kvm_vm_ioctl_set_irqchip(kvm, &chip); | 1790 | r = kvm_vm_ioctl_set_irqchip(kvm, chip); |
| 1744 | if (r) | 1791 | if (r) |
| 1745 | goto out; | 1792 | goto set_irqchip_out; |
| 1746 | r = 0; | 1793 | r = 0; |
| 1794 | set_irqchip_out: | ||
| 1795 | kfree(chip); | ||
| 1796 | if (r) | ||
| 1797 | goto out; | ||
| 1747 | break; | 1798 | break; |
| 1748 | } | 1799 | } |
| 1749 | case KVM_GET_PIT: { | 1800 | case KVM_GET_PIT: { |
| 1750 | struct kvm_pit_state ps; | ||
| 1751 | r = -EFAULT; | 1801 | r = -EFAULT; |
| 1752 | if (copy_from_user(&ps, argp, sizeof ps)) | 1802 | if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state))) |
| 1753 | goto out; | 1803 | goto out; |
| 1754 | r = -ENXIO; | 1804 | r = -ENXIO; |
| 1755 | if (!kvm->arch.vpit) | 1805 | if (!kvm->arch.vpit) |
| 1756 | goto out; | 1806 | goto out; |
| 1757 | r = kvm_vm_ioctl_get_pit(kvm, &ps); | 1807 | r = kvm_vm_ioctl_get_pit(kvm, &u.ps); |
| 1758 | if (r) | 1808 | if (r) |
| 1759 | goto out; | 1809 | goto out; |
| 1760 | r = -EFAULT; | 1810 | r = -EFAULT; |
| 1761 | if (copy_to_user(argp, &ps, sizeof ps)) | 1811 | if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state))) |
| 1762 | goto out; | 1812 | goto out; |
| 1763 | r = 0; | 1813 | r = 0; |
| 1764 | break; | 1814 | break; |
| 1765 | } | 1815 | } |
| 1766 | case KVM_SET_PIT: { | 1816 | case KVM_SET_PIT: { |
| 1767 | struct kvm_pit_state ps; | ||
| 1768 | r = -EFAULT; | 1817 | r = -EFAULT; |
| 1769 | if (copy_from_user(&ps, argp, sizeof ps)) | 1818 | if (copy_from_user(&u.ps, argp, sizeof u.ps)) |
| 1770 | goto out; | 1819 | goto out; |
| 1771 | r = -ENXIO; | 1820 | r = -ENXIO; |
| 1772 | if (!kvm->arch.vpit) | 1821 | if (!kvm->arch.vpit) |
| 1773 | goto out; | 1822 | goto out; |
| 1774 | r = kvm_vm_ioctl_set_pit(kvm, &ps); | 1823 | r = kvm_vm_ioctl_set_pit(kvm, &u.ps); |
| 1775 | if (r) | 1824 | if (r) |
| 1776 | goto out; | 1825 | goto out; |
| 1777 | r = 0; | 1826 | r = 0; |
| @@ -2018,9 +2067,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, | |||
| 2018 | 2067 | ||
| 2019 | val = *(u64 *)new; | 2068 | val = *(u64 *)new; |
| 2020 | 2069 | ||
| 2021 | down_read(¤t->mm->mmap_sem); | ||
| 2022 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); | 2070 | page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); |
| 2023 | up_read(¤t->mm->mmap_sem); | ||
| 2024 | 2071 | ||
| 2025 | kaddr = kmap_atomic(page, KM_USER0); | 2072 | kaddr = kmap_atomic(page, KM_USER0); |
| 2026 | set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); | 2073 | set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); |
| @@ -2040,6 +2087,7 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) | |||
| 2040 | 2087 | ||
| 2041 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | 2088 | int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) |
| 2042 | { | 2089 | { |
| 2090 | kvm_mmu_invlpg(vcpu, address); | ||
| 2043 | return X86EMUL_CONTINUE; | 2091 | return X86EMUL_CONTINUE; |
| 2044 | } | 2092 | } |
| 2045 | 2093 | ||
| @@ -2080,7 +2128,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | |||
| 2080 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 2128 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
| 2081 | { | 2129 | { |
| 2082 | u8 opcodes[4]; | 2130 | u8 opcodes[4]; |
| 2083 | unsigned long rip = vcpu->arch.rip; | 2131 | unsigned long rip = kvm_rip_read(vcpu); |
| 2084 | unsigned long rip_linear; | 2132 | unsigned long rip_linear; |
| 2085 | 2133 | ||
| 2086 | if (!printk_ratelimit()) | 2134 | if (!printk_ratelimit()) |
| @@ -2102,6 +2150,14 @@ static struct x86_emulate_ops emulate_ops = { | |||
| 2102 | .cmpxchg_emulated = emulator_cmpxchg_emulated, | 2150 | .cmpxchg_emulated = emulator_cmpxchg_emulated, |
| 2103 | }; | 2151 | }; |
| 2104 | 2152 | ||
| 2153 | static void cache_all_regs(struct kvm_vcpu *vcpu) | ||
| 2154 | { | ||
| 2155 | kvm_register_read(vcpu, VCPU_REGS_RAX); | ||
| 2156 | kvm_register_read(vcpu, VCPU_REGS_RSP); | ||
| 2157 | kvm_register_read(vcpu, VCPU_REGS_RIP); | ||
| 2158 | vcpu->arch.regs_dirty = ~0; | ||
| 2159 | } | ||
| 2160 | |||
| 2105 | int emulate_instruction(struct kvm_vcpu *vcpu, | 2161 | int emulate_instruction(struct kvm_vcpu *vcpu, |
| 2106 | struct kvm_run *run, | 2162 | struct kvm_run *run, |
| 2107 | unsigned long cr2, | 2163 | unsigned long cr2, |
| @@ -2111,8 +2167,15 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 2111 | int r; | 2167 | int r; |
| 2112 | struct decode_cache *c; | 2168 | struct decode_cache *c; |
| 2113 | 2169 | ||
| 2170 | kvm_clear_exception_queue(vcpu); | ||
| 2114 | vcpu->arch.mmio_fault_cr2 = cr2; | 2171 | vcpu->arch.mmio_fault_cr2 = cr2; |
| 2115 | kvm_x86_ops->cache_regs(vcpu); | 2172 | /* |
| 2173 | * TODO: fix x86_emulate.c to use guest_read/write_register | ||
| 2174 | * instead of direct ->regs accesses, can save hundred cycles | ||
| 2175 | * on Intel for instructions that don't read/change RSP, for | ||
| 2176 | * for example. | ||
| 2177 | */ | ||
| 2178 | cache_all_regs(vcpu); | ||
| 2116 | 2179 | ||
| 2117 | vcpu->mmio_is_write = 0; | 2180 | vcpu->mmio_is_write = 0; |
| 2118 | vcpu->arch.pio.string = 0; | 2181 | vcpu->arch.pio.string = 0; |
| @@ -2172,7 +2235,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 2172 | return EMULATE_DO_MMIO; | 2235 | return EMULATE_DO_MMIO; |
| 2173 | } | 2236 | } |
| 2174 | 2237 | ||
| 2175 | kvm_x86_ops->decache_regs(vcpu); | ||
| 2176 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 2238 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
| 2177 | 2239 | ||
| 2178 | if (vcpu->mmio_is_write) { | 2240 | if (vcpu->mmio_is_write) { |
| @@ -2225,20 +2287,19 @@ int complete_pio(struct kvm_vcpu *vcpu) | |||
| 2225 | struct kvm_pio_request *io = &vcpu->arch.pio; | 2287 | struct kvm_pio_request *io = &vcpu->arch.pio; |
| 2226 | long delta; | 2288 | long delta; |
| 2227 | int r; | 2289 | int r; |
| 2228 | 2290 | unsigned long val; | |
| 2229 | kvm_x86_ops->cache_regs(vcpu); | ||
| 2230 | 2291 | ||
| 2231 | if (!io->string) { | 2292 | if (!io->string) { |
| 2232 | if (io->in) | 2293 | if (io->in) { |
| 2233 | memcpy(&vcpu->arch.regs[VCPU_REGS_RAX], vcpu->arch.pio_data, | 2294 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 2234 | io->size); | 2295 | memcpy(&val, vcpu->arch.pio_data, io->size); |
| 2296 | kvm_register_write(vcpu, VCPU_REGS_RAX, val); | ||
| 2297 | } | ||
| 2235 | } else { | 2298 | } else { |
| 2236 | if (io->in) { | 2299 | if (io->in) { |
| 2237 | r = pio_copy_data(vcpu); | 2300 | r = pio_copy_data(vcpu); |
| 2238 | if (r) { | 2301 | if (r) |
| 2239 | kvm_x86_ops->cache_regs(vcpu); | ||
| 2240 | return r; | 2302 | return r; |
| 2241 | } | ||
| 2242 | } | 2303 | } |
| 2243 | 2304 | ||
| 2244 | delta = 1; | 2305 | delta = 1; |
| @@ -2248,19 +2309,24 @@ int complete_pio(struct kvm_vcpu *vcpu) | |||
| 2248 | * The size of the register should really depend on | 2309 | * The size of the register should really depend on |
| 2249 | * current address size. | 2310 | * current address size. |
| 2250 | */ | 2311 | */ |
| 2251 | vcpu->arch.regs[VCPU_REGS_RCX] -= delta; | 2312 | val = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 2313 | val -= delta; | ||
| 2314 | kvm_register_write(vcpu, VCPU_REGS_RCX, val); | ||
| 2252 | } | 2315 | } |
| 2253 | if (io->down) | 2316 | if (io->down) |
| 2254 | delta = -delta; | 2317 | delta = -delta; |
| 2255 | delta *= io->size; | 2318 | delta *= io->size; |
| 2256 | if (io->in) | 2319 | if (io->in) { |
| 2257 | vcpu->arch.regs[VCPU_REGS_RDI] += delta; | 2320 | val = kvm_register_read(vcpu, VCPU_REGS_RDI); |
| 2258 | else | 2321 | val += delta; |
| 2259 | vcpu->arch.regs[VCPU_REGS_RSI] += delta; | 2322 | kvm_register_write(vcpu, VCPU_REGS_RDI, val); |
| 2323 | } else { | ||
| 2324 | val = kvm_register_read(vcpu, VCPU_REGS_RSI); | ||
| 2325 | val += delta; | ||
| 2326 | kvm_register_write(vcpu, VCPU_REGS_RSI, val); | ||
| 2327 | } | ||
| 2260 | } | 2328 | } |
| 2261 | 2329 | ||
| 2262 | kvm_x86_ops->decache_regs(vcpu); | ||
| 2263 | |||
| 2264 | io->count -= io->cur_count; | 2330 | io->count -= io->cur_count; |
| 2265 | io->cur_count = 0; | 2331 | io->cur_count = 0; |
| 2266 | 2332 | ||
| @@ -2313,6 +2379,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
| 2313 | int size, unsigned port) | 2379 | int size, unsigned port) |
| 2314 | { | 2380 | { |
| 2315 | struct kvm_io_device *pio_dev; | 2381 | struct kvm_io_device *pio_dev; |
| 2382 | unsigned long val; | ||
| 2316 | 2383 | ||
| 2317 | vcpu->run->exit_reason = KVM_EXIT_IO; | 2384 | vcpu->run->exit_reason = KVM_EXIT_IO; |
| 2318 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; | 2385 | vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; |
| @@ -2333,8 +2400,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
| 2333 | KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, | 2400 | KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size, |
| 2334 | handler); | 2401 | handler); |
| 2335 | 2402 | ||
| 2336 | kvm_x86_ops->cache_regs(vcpu); | 2403 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 2337 | memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); | 2404 | memcpy(vcpu->arch.pio_data, &val, 4); |
| 2338 | 2405 | ||
| 2339 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 2406 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
| 2340 | 2407 | ||
| @@ -2492,11 +2559,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) | |||
| 2492 | KVMTRACE_0D(HLT, vcpu, handler); | 2559 | KVMTRACE_0D(HLT, vcpu, handler); |
| 2493 | if (irqchip_in_kernel(vcpu->kvm)) { | 2560 | if (irqchip_in_kernel(vcpu->kvm)) { |
| 2494 | vcpu->arch.mp_state = KVM_MP_STATE_HALTED; | 2561 | vcpu->arch.mp_state = KVM_MP_STATE_HALTED; |
| 2495 | up_read(&vcpu->kvm->slots_lock); | ||
| 2496 | kvm_vcpu_block(vcpu); | ||
| 2497 | down_read(&vcpu->kvm->slots_lock); | ||
| 2498 | if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) | ||
| 2499 | return -EINTR; | ||
| 2500 | return 1; | 2562 | return 1; |
| 2501 | } else { | 2563 | } else { |
| 2502 | vcpu->run->exit_reason = KVM_EXIT_HLT; | 2564 | vcpu->run->exit_reason = KVM_EXIT_HLT; |
| @@ -2519,13 +2581,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
| 2519 | unsigned long nr, a0, a1, a2, a3, ret; | 2581 | unsigned long nr, a0, a1, a2, a3, ret; |
| 2520 | int r = 1; | 2582 | int r = 1; |
| 2521 | 2583 | ||
| 2522 | kvm_x86_ops->cache_regs(vcpu); | 2584 | nr = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 2523 | 2585 | a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); | |
| 2524 | nr = vcpu->arch.regs[VCPU_REGS_RAX]; | 2586 | a1 = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 2525 | a0 = vcpu->arch.regs[VCPU_REGS_RBX]; | 2587 | a2 = kvm_register_read(vcpu, VCPU_REGS_RDX); |
| 2526 | a1 = vcpu->arch.regs[VCPU_REGS_RCX]; | 2588 | a3 = kvm_register_read(vcpu, VCPU_REGS_RSI); |
| 2527 | a2 = vcpu->arch.regs[VCPU_REGS_RDX]; | ||
| 2528 | a3 = vcpu->arch.regs[VCPU_REGS_RSI]; | ||
| 2529 | 2589 | ||
| 2530 | KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler); | 2590 | KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler); |
| 2531 | 2591 | ||
| @@ -2548,8 +2608,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
| 2548 | ret = -KVM_ENOSYS; | 2608 | ret = -KVM_ENOSYS; |
| 2549 | break; | 2609 | break; |
| 2550 | } | 2610 | } |
| 2551 | vcpu->arch.regs[VCPU_REGS_RAX] = ret; | 2611 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret); |
| 2552 | kvm_x86_ops->decache_regs(vcpu); | ||
| 2553 | ++vcpu->stat.hypercalls; | 2612 | ++vcpu->stat.hypercalls; |
| 2554 | return r; | 2613 | return r; |
| 2555 | } | 2614 | } |
| @@ -2559,6 +2618,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
| 2559 | { | 2618 | { |
| 2560 | char instruction[3]; | 2619 | char instruction[3]; |
| 2561 | int ret = 0; | 2620 | int ret = 0; |
| 2621 | unsigned long rip = kvm_rip_read(vcpu); | ||
| 2562 | 2622 | ||
| 2563 | 2623 | ||
| 2564 | /* | 2624 | /* |
| @@ -2568,9 +2628,8 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) | |||
| 2568 | */ | 2628 | */ |
| 2569 | kvm_mmu_zap_all(vcpu->kvm); | 2629 | kvm_mmu_zap_all(vcpu->kvm); |
| 2570 | 2630 | ||
| 2571 | kvm_x86_ops->cache_regs(vcpu); | ||
| 2572 | kvm_x86_ops->patch_hypercall(vcpu, instruction); | 2631 | kvm_x86_ops->patch_hypercall(vcpu, instruction); |
| 2573 | if (emulator_write_emulated(vcpu->arch.rip, instruction, 3, vcpu) | 2632 | if (emulator_write_emulated(rip, instruction, 3, vcpu) |
| 2574 | != X86EMUL_CONTINUE) | 2633 | != X86EMUL_CONTINUE) |
| 2575 | ret = -EFAULT; | 2634 | ret = -EFAULT; |
| 2576 | 2635 | ||
| @@ -2700,13 +2759,12 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | |||
| 2700 | u32 function, index; | 2759 | u32 function, index; |
| 2701 | struct kvm_cpuid_entry2 *e, *best; | 2760 | struct kvm_cpuid_entry2 *e, *best; |
| 2702 | 2761 | ||
| 2703 | kvm_x86_ops->cache_regs(vcpu); | 2762 | function = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 2704 | function = vcpu->arch.regs[VCPU_REGS_RAX]; | 2763 | index = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 2705 | index = vcpu->arch.regs[VCPU_REGS_RCX]; | 2764 | kvm_register_write(vcpu, VCPU_REGS_RAX, 0); |
| 2706 | vcpu->arch.regs[VCPU_REGS_RAX] = 0; | 2765 | kvm_register_write(vcpu, VCPU_REGS_RBX, 0); |
| 2707 | vcpu->arch.regs[VCPU_REGS_RBX] = 0; | 2766 | kvm_register_write(vcpu, VCPU_REGS_RCX, 0); |
| 2708 | vcpu->arch.regs[VCPU_REGS_RCX] = 0; | 2767 | kvm_register_write(vcpu, VCPU_REGS_RDX, 0); |
| 2709 | vcpu->arch.regs[VCPU_REGS_RDX] = 0; | ||
| 2710 | best = NULL; | 2768 | best = NULL; |
| 2711 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { | 2769 | for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { |
| 2712 | e = &vcpu->arch.cpuid_entries[i]; | 2770 | e = &vcpu->arch.cpuid_entries[i]; |
| @@ -2724,18 +2782,17 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | |||
| 2724 | best = e; | 2782 | best = e; |
| 2725 | } | 2783 | } |
| 2726 | if (best) { | 2784 | if (best) { |
| 2727 | vcpu->arch.regs[VCPU_REGS_RAX] = best->eax; | 2785 | kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); |
| 2728 | vcpu->arch.regs[VCPU_REGS_RBX] = best->ebx; | 2786 | kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); |
| 2729 | vcpu->arch.regs[VCPU_REGS_RCX] = best->ecx; | 2787 | kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx); |
| 2730 | vcpu->arch.regs[VCPU_REGS_RDX] = best->edx; | 2788 | kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx); |
| 2731 | } | 2789 | } |
| 2732 | kvm_x86_ops->decache_regs(vcpu); | ||
| 2733 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 2790 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
| 2734 | KVMTRACE_5D(CPUID, vcpu, function, | 2791 | KVMTRACE_5D(CPUID, vcpu, function, |
| 2735 | (u32)vcpu->arch.regs[VCPU_REGS_RAX], | 2792 | (u32)kvm_register_read(vcpu, VCPU_REGS_RAX), |
| 2736 | (u32)vcpu->arch.regs[VCPU_REGS_RBX], | 2793 | (u32)kvm_register_read(vcpu, VCPU_REGS_RBX), |
| 2737 | (u32)vcpu->arch.regs[VCPU_REGS_RCX], | 2794 | (u32)kvm_register_read(vcpu, VCPU_REGS_RCX), |
| 2738 | (u32)vcpu->arch.regs[VCPU_REGS_RDX], handler); | 2795 | (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler); |
| 2739 | } | 2796 | } |
| 2740 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | 2797 | EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); |
| 2741 | 2798 | ||
| @@ -2776,9 +2833,7 @@ static void vapic_enter(struct kvm_vcpu *vcpu) | |||
| 2776 | if (!apic || !apic->vapic_addr) | 2833 | if (!apic || !apic->vapic_addr) |
| 2777 | return; | 2834 | return; |
| 2778 | 2835 | ||
| 2779 | down_read(¤t->mm->mmap_sem); | ||
| 2780 | page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); | 2836 | page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); |
| 2781 | up_read(¤t->mm->mmap_sem); | ||
| 2782 | 2837 | ||
| 2783 | vcpu->arch.apic->vapic_page = page; | 2838 | vcpu->arch.apic->vapic_page = page; |
| 2784 | } | 2839 | } |
| @@ -2796,28 +2851,10 @@ static void vapic_exit(struct kvm_vcpu *vcpu) | |||
| 2796 | up_read(&vcpu->kvm->slots_lock); | 2851 | up_read(&vcpu->kvm->slots_lock); |
| 2797 | } | 2852 | } |
| 2798 | 2853 | ||
| 2799 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2854 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 2800 | { | 2855 | { |
| 2801 | int r; | 2856 | int r; |
| 2802 | 2857 | ||
| 2803 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { | ||
| 2804 | pr_debug("vcpu %d received sipi with vector # %x\n", | ||
| 2805 | vcpu->vcpu_id, vcpu->arch.sipi_vector); | ||
| 2806 | kvm_lapic_reset(vcpu); | ||
| 2807 | r = kvm_x86_ops->vcpu_reset(vcpu); | ||
| 2808 | if (r) | ||
| 2809 | return r; | ||
| 2810 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
| 2811 | } | ||
| 2812 | |||
| 2813 | down_read(&vcpu->kvm->slots_lock); | ||
| 2814 | vapic_enter(vcpu); | ||
| 2815 | |||
| 2816 | preempted: | ||
| 2817 | if (vcpu->guest_debug.enabled) | ||
| 2818 | kvm_x86_ops->guest_debug_pre(vcpu); | ||
| 2819 | |||
| 2820 | again: | ||
| 2821 | if (vcpu->requests) | 2858 | if (vcpu->requests) |
| 2822 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | 2859 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) |
| 2823 | kvm_mmu_unload(vcpu); | 2860 | kvm_mmu_unload(vcpu); |
| @@ -2829,6 +2866,8 @@ again: | |||
| 2829 | if (vcpu->requests) { | 2866 | if (vcpu->requests) { |
| 2830 | if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) | 2867 | if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) |
| 2831 | __kvm_migrate_timers(vcpu); | 2868 | __kvm_migrate_timers(vcpu); |
| 2869 | if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests)) | ||
| 2870 | kvm_mmu_sync_roots(vcpu); | ||
| 2832 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) | 2871 | if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) |
| 2833 | kvm_x86_ops->tlb_flush(vcpu); | 2872 | kvm_x86_ops->tlb_flush(vcpu); |
| 2834 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, | 2873 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, |
| @@ -2854,21 +2893,15 @@ again: | |||
| 2854 | 2893 | ||
| 2855 | local_irq_disable(); | 2894 | local_irq_disable(); |
| 2856 | 2895 | ||
| 2857 | if (vcpu->requests || need_resched()) { | 2896 | if (vcpu->requests || need_resched() || signal_pending(current)) { |
| 2858 | local_irq_enable(); | 2897 | local_irq_enable(); |
| 2859 | preempt_enable(); | 2898 | preempt_enable(); |
| 2860 | r = 1; | 2899 | r = 1; |
| 2861 | goto out; | 2900 | goto out; |
| 2862 | } | 2901 | } |
| 2863 | 2902 | ||
| 2864 | if (signal_pending(current)) { | 2903 | if (vcpu->guest_debug.enabled) |
| 2865 | local_irq_enable(); | 2904 | kvm_x86_ops->guest_debug_pre(vcpu); |
| 2866 | preempt_enable(); | ||
| 2867 | r = -EINTR; | ||
| 2868 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
| 2869 | ++vcpu->stat.signal_exits; | ||
| 2870 | goto out; | ||
| 2871 | } | ||
| 2872 | 2905 | ||
| 2873 | vcpu->guest_mode = 1; | 2906 | vcpu->guest_mode = 1; |
| 2874 | /* | 2907 | /* |
| @@ -2917,8 +2950,8 @@ again: | |||
| 2917 | * Profile KVM exit RIPs: | 2950 | * Profile KVM exit RIPs: |
| 2918 | */ | 2951 | */ |
| 2919 | if (unlikely(prof_on == KVM_PROFILING)) { | 2952 | if (unlikely(prof_on == KVM_PROFILING)) { |
| 2920 | kvm_x86_ops->cache_regs(vcpu); | 2953 | unsigned long rip = kvm_rip_read(vcpu); |
| 2921 | profile_hit(KVM_PROFILING, (void *)vcpu->arch.rip); | 2954 | profile_hit(KVM_PROFILING, (void *)rip); |
| 2922 | } | 2955 | } |
| 2923 | 2956 | ||
| 2924 | if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu)) | 2957 | if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu)) |
| @@ -2927,26 +2960,63 @@ again: | |||
| 2927 | kvm_lapic_sync_from_vapic(vcpu); | 2960 | kvm_lapic_sync_from_vapic(vcpu); |
| 2928 | 2961 | ||
| 2929 | r = kvm_x86_ops->handle_exit(kvm_run, vcpu); | 2962 | r = kvm_x86_ops->handle_exit(kvm_run, vcpu); |
| 2963 | out: | ||
| 2964 | return r; | ||
| 2965 | } | ||
| 2930 | 2966 | ||
| 2931 | if (r > 0) { | 2967 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
| 2932 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | 2968 | { |
| 2933 | r = -EINTR; | 2969 | int r; |
| 2934 | kvm_run->exit_reason = KVM_EXIT_INTR; | 2970 | |
| 2935 | ++vcpu->stat.request_irq_exits; | 2971 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) { |
| 2936 | goto out; | 2972 | pr_debug("vcpu %d received sipi with vector # %x\n", |
| 2937 | } | 2973 | vcpu->vcpu_id, vcpu->arch.sipi_vector); |
| 2938 | if (!need_resched()) | 2974 | kvm_lapic_reset(vcpu); |
| 2939 | goto again; | 2975 | r = kvm_x86_ops->vcpu_reset(vcpu); |
| 2976 | if (r) | ||
| 2977 | return r; | ||
| 2978 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
| 2940 | } | 2979 | } |
| 2941 | 2980 | ||
| 2942 | out: | 2981 | down_read(&vcpu->kvm->slots_lock); |
| 2943 | up_read(&vcpu->kvm->slots_lock); | 2982 | vapic_enter(vcpu); |
| 2944 | if (r > 0) { | 2983 | |
| 2945 | kvm_resched(vcpu); | 2984 | r = 1; |
| 2946 | down_read(&vcpu->kvm->slots_lock); | 2985 | while (r > 0) { |
| 2947 | goto preempted; | 2986 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) |
| 2987 | r = vcpu_enter_guest(vcpu, kvm_run); | ||
| 2988 | else { | ||
| 2989 | up_read(&vcpu->kvm->slots_lock); | ||
| 2990 | kvm_vcpu_block(vcpu); | ||
| 2991 | down_read(&vcpu->kvm->slots_lock); | ||
| 2992 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) | ||
| 2993 | if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) | ||
| 2994 | vcpu->arch.mp_state = | ||
| 2995 | KVM_MP_STATE_RUNNABLE; | ||
| 2996 | if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) | ||
| 2997 | r = -EINTR; | ||
| 2998 | } | ||
| 2999 | |||
| 3000 | if (r > 0) { | ||
| 3001 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | ||
| 3002 | r = -EINTR; | ||
| 3003 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
| 3004 | ++vcpu->stat.request_irq_exits; | ||
| 3005 | } | ||
| 3006 | if (signal_pending(current)) { | ||
| 3007 | r = -EINTR; | ||
| 3008 | kvm_run->exit_reason = KVM_EXIT_INTR; | ||
| 3009 | ++vcpu->stat.signal_exits; | ||
| 3010 | } | ||
| 3011 | if (need_resched()) { | ||
| 3012 | up_read(&vcpu->kvm->slots_lock); | ||
| 3013 | kvm_resched(vcpu); | ||
| 3014 | down_read(&vcpu->kvm->slots_lock); | ||
| 3015 | } | ||
| 3016 | } | ||
| 2948 | } | 3017 | } |
| 2949 | 3018 | ||
| 3019 | up_read(&vcpu->kvm->slots_lock); | ||
| 2950 | post_kvm_run_save(vcpu, kvm_run); | 3020 | post_kvm_run_save(vcpu, kvm_run); |
| 2951 | 3021 | ||
| 2952 | vapic_exit(vcpu); | 3022 | vapic_exit(vcpu); |
| @@ -2966,6 +3036,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2966 | 3036 | ||
| 2967 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { | 3037 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { |
| 2968 | kvm_vcpu_block(vcpu); | 3038 | kvm_vcpu_block(vcpu); |
| 3039 | clear_bit(KVM_REQ_UNHALT, &vcpu->requests); | ||
| 2969 | r = -EAGAIN; | 3040 | r = -EAGAIN; |
| 2970 | goto out; | 3041 | goto out; |
| 2971 | } | 3042 | } |
| @@ -2999,11 +3070,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2999 | } | 3070 | } |
| 3000 | } | 3071 | } |
| 3001 | #endif | 3072 | #endif |
| 3002 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { | 3073 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) |
| 3003 | kvm_x86_ops->cache_regs(vcpu); | 3074 | kvm_register_write(vcpu, VCPU_REGS_RAX, |
| 3004 | vcpu->arch.regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; | 3075 | kvm_run->hypercall.ret); |
| 3005 | kvm_x86_ops->decache_regs(vcpu); | ||
| 3006 | } | ||
| 3007 | 3076 | ||
| 3008 | r = __vcpu_run(vcpu, kvm_run); | 3077 | r = __vcpu_run(vcpu, kvm_run); |
| 3009 | 3078 | ||
| @@ -3019,28 +3088,26 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 3019 | { | 3088 | { |
| 3020 | vcpu_load(vcpu); | 3089 | vcpu_load(vcpu); |
| 3021 | 3090 | ||
| 3022 | kvm_x86_ops->cache_regs(vcpu); | 3091 | regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 3023 | 3092 | regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX); | |
| 3024 | regs->rax = vcpu->arch.regs[VCPU_REGS_RAX]; | 3093 | regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 3025 | regs->rbx = vcpu->arch.regs[VCPU_REGS_RBX]; | 3094 | regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX); |
| 3026 | regs->rcx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3095 | regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI); |
| 3027 | regs->rdx = vcpu->arch.regs[VCPU_REGS_RDX]; | 3096 | regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI); |
| 3028 | regs->rsi = vcpu->arch.regs[VCPU_REGS_RSI]; | 3097 | regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); |
| 3029 | regs->rdi = vcpu->arch.regs[VCPU_REGS_RDI]; | 3098 | regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP); |
| 3030 | regs->rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | ||
| 3031 | regs->rbp = vcpu->arch.regs[VCPU_REGS_RBP]; | ||
| 3032 | #ifdef CONFIG_X86_64 | 3099 | #ifdef CONFIG_X86_64 |
| 3033 | regs->r8 = vcpu->arch.regs[VCPU_REGS_R8]; | 3100 | regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8); |
| 3034 | regs->r9 = vcpu->arch.regs[VCPU_REGS_R9]; | 3101 | regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9); |
| 3035 | regs->r10 = vcpu->arch.regs[VCPU_REGS_R10]; | 3102 | regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10); |
| 3036 | regs->r11 = vcpu->arch.regs[VCPU_REGS_R11]; | 3103 | regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11); |
| 3037 | regs->r12 = vcpu->arch.regs[VCPU_REGS_R12]; | 3104 | regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12); |
| 3038 | regs->r13 = vcpu->arch.regs[VCPU_REGS_R13]; | 3105 | regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13); |
| 3039 | regs->r14 = vcpu->arch.regs[VCPU_REGS_R14]; | 3106 | regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14); |
| 3040 | regs->r15 = vcpu->arch.regs[VCPU_REGS_R15]; | 3107 | regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15); |
| 3041 | #endif | 3108 | #endif |
| 3042 | 3109 | ||
| 3043 | regs->rip = vcpu->arch.rip; | 3110 | regs->rip = kvm_rip_read(vcpu); |
| 3044 | regs->rflags = kvm_x86_ops->get_rflags(vcpu); | 3111 | regs->rflags = kvm_x86_ops->get_rflags(vcpu); |
| 3045 | 3112 | ||
| 3046 | /* | 3113 | /* |
| @@ -3058,29 +3125,29 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 3058 | { | 3125 | { |
| 3059 | vcpu_load(vcpu); | 3126 | vcpu_load(vcpu); |
| 3060 | 3127 | ||
| 3061 | vcpu->arch.regs[VCPU_REGS_RAX] = regs->rax; | 3128 | kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax); |
| 3062 | vcpu->arch.regs[VCPU_REGS_RBX] = regs->rbx; | 3129 | kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx); |
| 3063 | vcpu->arch.regs[VCPU_REGS_RCX] = regs->rcx; | 3130 | kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx); |
| 3064 | vcpu->arch.regs[VCPU_REGS_RDX] = regs->rdx; | 3131 | kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx); |
| 3065 | vcpu->arch.regs[VCPU_REGS_RSI] = regs->rsi; | 3132 | kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi); |
| 3066 | vcpu->arch.regs[VCPU_REGS_RDI] = regs->rdi; | 3133 | kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi); |
| 3067 | vcpu->arch.regs[VCPU_REGS_RSP] = regs->rsp; | 3134 | kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp); |
| 3068 | vcpu->arch.regs[VCPU_REGS_RBP] = regs->rbp; | 3135 | kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp); |
| 3069 | #ifdef CONFIG_X86_64 | 3136 | #ifdef CONFIG_X86_64 |
| 3070 | vcpu->arch.regs[VCPU_REGS_R8] = regs->r8; | 3137 | kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8); |
| 3071 | vcpu->arch.regs[VCPU_REGS_R9] = regs->r9; | 3138 | kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9); |
| 3072 | vcpu->arch.regs[VCPU_REGS_R10] = regs->r10; | 3139 | kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10); |
| 3073 | vcpu->arch.regs[VCPU_REGS_R11] = regs->r11; | 3140 | kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11); |
| 3074 | vcpu->arch.regs[VCPU_REGS_R12] = regs->r12; | 3141 | kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12); |
| 3075 | vcpu->arch.regs[VCPU_REGS_R13] = regs->r13; | 3142 | kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); |
| 3076 | vcpu->arch.regs[VCPU_REGS_R14] = regs->r14; | 3143 | kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); |
| 3077 | vcpu->arch.regs[VCPU_REGS_R15] = regs->r15; | 3144 | kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); |
| 3145 | |||
| 3078 | #endif | 3146 | #endif |
| 3079 | 3147 | ||
| 3080 | vcpu->arch.rip = regs->rip; | 3148 | kvm_rip_write(vcpu, regs->rip); |
| 3081 | kvm_x86_ops->set_rflags(vcpu, regs->rflags); | 3149 | kvm_x86_ops->set_rflags(vcpu, regs->rflags); |
| 3082 | 3150 | ||
| 3083 | kvm_x86_ops->decache_regs(vcpu); | ||
| 3084 | 3151 | ||
| 3085 | vcpu->arch.exception.pending = false; | 3152 | vcpu->arch.exception.pending = false; |
| 3086 | 3153 | ||
| @@ -3294,11 +3361,33 @@ static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu, | |||
| 3294 | return 0; | 3361 | return 0; |
| 3295 | } | 3362 | } |
| 3296 | 3363 | ||
| 3364 | static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) | ||
| 3365 | { | ||
| 3366 | struct kvm_segment segvar = { | ||
| 3367 | .base = selector << 4, | ||
| 3368 | .limit = 0xffff, | ||
| 3369 | .selector = selector, | ||
| 3370 | .type = 3, | ||
| 3371 | .present = 1, | ||
| 3372 | .dpl = 3, | ||
| 3373 | .db = 0, | ||
| 3374 | .s = 1, | ||
| 3375 | .l = 0, | ||
| 3376 | .g = 0, | ||
| 3377 | .avl = 0, | ||
| 3378 | .unusable = 0, | ||
| 3379 | }; | ||
| 3380 | kvm_x86_ops->set_segment(vcpu, &segvar, seg); | ||
| 3381 | return 0; | ||
| 3382 | } | ||
| 3383 | |||
| 3297 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 3384 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, |
| 3298 | int type_bits, int seg) | 3385 | int type_bits, int seg) |
| 3299 | { | 3386 | { |
| 3300 | struct kvm_segment kvm_seg; | 3387 | struct kvm_segment kvm_seg; |
| 3301 | 3388 | ||
| 3389 | if (!(vcpu->arch.cr0 & X86_CR0_PE)) | ||
| 3390 | return kvm_load_realmode_segment(vcpu, selector, seg); | ||
| 3302 | if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg)) | 3391 | if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg)) |
| 3303 | return 1; | 3392 | return 1; |
| 3304 | kvm_seg.type |= type_bits; | 3393 | kvm_seg.type |= type_bits; |
| @@ -3316,17 +3405,16 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, | |||
| 3316 | struct tss_segment_32 *tss) | 3405 | struct tss_segment_32 *tss) |
| 3317 | { | 3406 | { |
| 3318 | tss->cr3 = vcpu->arch.cr3; | 3407 | tss->cr3 = vcpu->arch.cr3; |
| 3319 | tss->eip = vcpu->arch.rip; | 3408 | tss->eip = kvm_rip_read(vcpu); |
| 3320 | tss->eflags = kvm_x86_ops->get_rflags(vcpu); | 3409 | tss->eflags = kvm_x86_ops->get_rflags(vcpu); |
| 3321 | tss->eax = vcpu->arch.regs[VCPU_REGS_RAX]; | 3410 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 3322 | tss->ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3411 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 3323 | tss->edx = vcpu->arch.regs[VCPU_REGS_RDX]; | 3412 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); |
| 3324 | tss->ebx = vcpu->arch.regs[VCPU_REGS_RBX]; | 3413 | tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX); |
| 3325 | tss->esp = vcpu->arch.regs[VCPU_REGS_RSP]; | 3414 | tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP); |
| 3326 | tss->ebp = vcpu->arch.regs[VCPU_REGS_RBP]; | 3415 | tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP); |
| 3327 | tss->esi = vcpu->arch.regs[VCPU_REGS_RSI]; | 3416 | tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI); |
| 3328 | tss->edi = vcpu->arch.regs[VCPU_REGS_RDI]; | 3417 | tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI); |
| 3329 | |||
| 3330 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | 3418 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); |
| 3331 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | 3419 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); |
| 3332 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | 3420 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); |
| @@ -3342,17 +3430,17 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, | |||
| 3342 | { | 3430 | { |
| 3343 | kvm_set_cr3(vcpu, tss->cr3); | 3431 | kvm_set_cr3(vcpu, tss->cr3); |
| 3344 | 3432 | ||
| 3345 | vcpu->arch.rip = tss->eip; | 3433 | kvm_rip_write(vcpu, tss->eip); |
| 3346 | kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); | 3434 | kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); |
| 3347 | 3435 | ||
| 3348 | vcpu->arch.regs[VCPU_REGS_RAX] = tss->eax; | 3436 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); |
| 3349 | vcpu->arch.regs[VCPU_REGS_RCX] = tss->ecx; | 3437 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); |
| 3350 | vcpu->arch.regs[VCPU_REGS_RDX] = tss->edx; | 3438 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx); |
| 3351 | vcpu->arch.regs[VCPU_REGS_RBX] = tss->ebx; | 3439 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx); |
| 3352 | vcpu->arch.regs[VCPU_REGS_RSP] = tss->esp; | 3440 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp); |
| 3353 | vcpu->arch.regs[VCPU_REGS_RBP] = tss->ebp; | 3441 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp); |
| 3354 | vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi; | 3442 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); |
| 3355 | vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi; | 3443 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); |
| 3356 | 3444 | ||
| 3357 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) | 3445 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) |
| 3358 | return 1; | 3446 | return 1; |
| @@ -3380,16 +3468,16 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, | |||
| 3380 | static void save_state_to_tss16(struct kvm_vcpu *vcpu, | 3468 | static void save_state_to_tss16(struct kvm_vcpu *vcpu, |
| 3381 | struct tss_segment_16 *tss) | 3469 | struct tss_segment_16 *tss) |
| 3382 | { | 3470 | { |
| 3383 | tss->ip = vcpu->arch.rip; | 3471 | tss->ip = kvm_rip_read(vcpu); |
| 3384 | tss->flag = kvm_x86_ops->get_rflags(vcpu); | 3472 | tss->flag = kvm_x86_ops->get_rflags(vcpu); |
| 3385 | tss->ax = vcpu->arch.regs[VCPU_REGS_RAX]; | 3473 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 3386 | tss->cx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3474 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 3387 | tss->dx = vcpu->arch.regs[VCPU_REGS_RDX]; | 3475 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); |
| 3388 | tss->bx = vcpu->arch.regs[VCPU_REGS_RBX]; | 3476 | tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX); |
| 3389 | tss->sp = vcpu->arch.regs[VCPU_REGS_RSP]; | 3477 | tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP); |
| 3390 | tss->bp = vcpu->arch.regs[VCPU_REGS_RBP]; | 3478 | tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP); |
| 3391 | tss->si = vcpu->arch.regs[VCPU_REGS_RSI]; | 3479 | tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI); |
| 3392 | tss->di = vcpu->arch.regs[VCPU_REGS_RDI]; | 3480 | tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI); |
| 3393 | 3481 | ||
| 3394 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); | 3482 | tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); |
| 3395 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); | 3483 | tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); |
| @@ -3402,16 +3490,16 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu, | |||
| 3402 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, | 3490 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, |
| 3403 | struct tss_segment_16 *tss) | 3491 | struct tss_segment_16 *tss) |
| 3404 | { | 3492 | { |
| 3405 | vcpu->arch.rip = tss->ip; | 3493 | kvm_rip_write(vcpu, tss->ip); |
| 3406 | kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); | 3494 | kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); |
| 3407 | vcpu->arch.regs[VCPU_REGS_RAX] = tss->ax; | 3495 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); |
| 3408 | vcpu->arch.regs[VCPU_REGS_RCX] = tss->cx; | 3496 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); |
| 3409 | vcpu->arch.regs[VCPU_REGS_RDX] = tss->dx; | 3497 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); |
| 3410 | vcpu->arch.regs[VCPU_REGS_RBX] = tss->bx; | 3498 | kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx); |
| 3411 | vcpu->arch.regs[VCPU_REGS_RSP] = tss->sp; | 3499 | kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp); |
| 3412 | vcpu->arch.regs[VCPU_REGS_RBP] = tss->bp; | 3500 | kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp); |
| 3413 | vcpu->arch.regs[VCPU_REGS_RSI] = tss->si; | 3501 | kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); |
| 3414 | vcpu->arch.regs[VCPU_REGS_RDI] = tss->di; | 3502 | kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); |
| 3415 | 3503 | ||
| 3416 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) | 3504 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) |
| 3417 | return 1; | 3505 | return 1; |
| @@ -3534,7 +3622,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
| 3534 | } | 3622 | } |
| 3535 | 3623 | ||
| 3536 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 3624 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
| 3537 | kvm_x86_ops->cache_regs(vcpu); | ||
| 3538 | 3625 | ||
| 3539 | if (nseg_desc.type & 8) | 3626 | if (nseg_desc.type & 8) |
| 3540 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, | 3627 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, |
| @@ -3559,7 +3646,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
| 3559 | tr_seg.type = 11; | 3646 | tr_seg.type = 11; |
| 3560 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); | 3647 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); |
| 3561 | out: | 3648 | out: |
| 3562 | kvm_x86_ops->decache_regs(vcpu); | ||
| 3563 | return ret; | 3649 | return ret; |
| 3564 | } | 3650 | } |
| 3565 | EXPORT_SYMBOL_GPL(kvm_task_switch); | 3651 | EXPORT_SYMBOL_GPL(kvm_task_switch); |
| @@ -3622,6 +3708,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 3622 | pr_debug("Set back pending irq %d\n", | 3708 | pr_debug("Set back pending irq %d\n", |
| 3623 | pending_vec); | 3709 | pending_vec); |
| 3624 | } | 3710 | } |
| 3711 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
| 3625 | } | 3712 | } |
| 3626 | 3713 | ||
| 3627 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 3714 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
| @@ -3634,6 +3721,12 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 3634 | kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); | 3721 | kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); |
| 3635 | kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | 3722 | kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); |
| 3636 | 3723 | ||
| 3724 | /* Older userspace won't unhalt the vcpu on reset. */ | ||
| 3725 | if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 && | ||
| 3726 | sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 && | ||
| 3727 | !(vcpu->arch.cr0 & X86_CR0_PE)) | ||
| 3728 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
| 3729 | |||
| 3637 | vcpu_put(vcpu); | 3730 | vcpu_put(vcpu); |
| 3638 | 3731 | ||
| 3639 | return 0; | 3732 | return 0; |
| @@ -3918,6 +4011,7 @@ struct kvm *kvm_arch_create_vm(void) | |||
| 3918 | return ERR_PTR(-ENOMEM); | 4011 | return ERR_PTR(-ENOMEM); |
| 3919 | 4012 | ||
| 3920 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 4013 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
| 4014 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | ||
| 3921 | 4015 | ||
| 3922 | return kvm; | 4016 | return kvm; |
| 3923 | } | 4017 | } |
| @@ -3950,6 +4044,8 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
| 3950 | 4044 | ||
| 3951 | void kvm_arch_destroy_vm(struct kvm *kvm) | 4045 | void kvm_arch_destroy_vm(struct kvm *kvm) |
| 3952 | { | 4046 | { |
| 4047 | kvm_iommu_unmap_guest(kvm); | ||
| 4048 | kvm_free_all_assigned_devices(kvm); | ||
| 3953 | kvm_free_pit(kvm); | 4049 | kvm_free_pit(kvm); |
| 3954 | kfree(kvm->arch.vpic); | 4050 | kfree(kvm->arch.vpic); |
| 3955 | kfree(kvm->arch.vioapic); | 4051 | kfree(kvm->arch.vioapic); |
| @@ -3981,7 +4077,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
| 3981 | userspace_addr = do_mmap(NULL, 0, | 4077 | userspace_addr = do_mmap(NULL, 0, |
| 3982 | npages * PAGE_SIZE, | 4078 | npages * PAGE_SIZE, |
| 3983 | PROT_READ | PROT_WRITE, | 4079 | PROT_READ | PROT_WRITE, |
| 3984 | MAP_SHARED | MAP_ANONYMOUS, | 4080 | MAP_PRIVATE | MAP_ANONYMOUS, |
| 3985 | 0); | 4081 | 0); |
| 3986 | up_write(¤t->mm->mmap_sem); | 4082 | up_write(¤t->mm->mmap_sem); |
| 3987 | 4083 | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h new file mode 100644 index 000000000000..6a4be78a7384 --- /dev/null +++ b/arch/x86/kvm/x86.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | #ifndef ARCH_X86_KVM_X86_H | ||
| 2 | #define ARCH_X86_KVM_X86_H | ||
| 3 | |||
| 4 | #include <linux/kvm_host.h> | ||
| 5 | |||
| 6 | static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) | ||
| 7 | { | ||
| 8 | vcpu->arch.exception.pending = false; | ||
| 9 | } | ||
| 10 | |||
| 11 | static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector) | ||
| 12 | { | ||
| 13 | vcpu->arch.interrupt.pending = true; | ||
| 14 | vcpu->arch.interrupt.nr = vector; | ||
| 15 | } | ||
| 16 | |||
| 17 | static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu) | ||
| 18 | { | ||
| 19 | vcpu->arch.interrupt.pending = false; | ||
| 20 | } | ||
| 21 | |||
| 22 | #endif | ||
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index f2f90468f8b1..ea051173b0da 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #define DPRINTF(_f, _a ...) printf(_f , ## _a) | 26 | #define DPRINTF(_f, _a ...) printf(_f , ## _a) |
| 27 | #else | 27 | #else |
| 28 | #include <linux/kvm_host.h> | 28 | #include <linux/kvm_host.h> |
| 29 | #include "kvm_cache_regs.h" | ||
| 29 | #define DPRINTF(x...) do {} while (0) | 30 | #define DPRINTF(x...) do {} while (0) |
| 30 | #endif | 31 | #endif |
| 31 | #include <linux/module.h> | 32 | #include <linux/module.h> |
| @@ -46,25 +47,26 @@ | |||
| 46 | #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */ | 47 | #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */ |
| 47 | #define DstReg (2<<1) /* Register operand. */ | 48 | #define DstReg (2<<1) /* Register operand. */ |
| 48 | #define DstMem (3<<1) /* Memory operand. */ | 49 | #define DstMem (3<<1) /* Memory operand. */ |
| 49 | #define DstMask (3<<1) | 50 | #define DstAcc (4<<1) /* Destination Accumulator */ |
| 51 | #define DstMask (7<<1) | ||
| 50 | /* Source operand type. */ | 52 | /* Source operand type. */ |
| 51 | #define SrcNone (0<<3) /* No source operand. */ | 53 | #define SrcNone (0<<4) /* No source operand. */ |
| 52 | #define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */ | 54 | #define SrcImplicit (0<<4) /* Source operand is implicit in the opcode. */ |
| 53 | #define SrcReg (1<<3) /* Register operand. */ | 55 | #define SrcReg (1<<4) /* Register operand. */ |
| 54 | #define SrcMem (2<<3) /* Memory operand. */ | 56 | #define SrcMem (2<<4) /* Memory operand. */ |
| 55 | #define SrcMem16 (3<<3) /* Memory operand (16-bit). */ | 57 | #define SrcMem16 (3<<4) /* Memory operand (16-bit). */ |
| 56 | #define SrcMem32 (4<<3) /* Memory operand (32-bit). */ | 58 | #define SrcMem32 (4<<4) /* Memory operand (32-bit). */ |
| 57 | #define SrcImm (5<<3) /* Immediate operand. */ | 59 | #define SrcImm (5<<4) /* Immediate operand. */ |
| 58 | #define SrcImmByte (6<<3) /* 8-bit sign-extended immediate operand. */ | 60 | #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ |
| 59 | #define SrcMask (7<<3) | 61 | #define SrcMask (7<<4) |
| 60 | /* Generic ModRM decode. */ | 62 | /* Generic ModRM decode. */ |
| 61 | #define ModRM (1<<6) | 63 | #define ModRM (1<<7) |
| 62 | /* Destination is only written; never read. */ | 64 | /* Destination is only written; never read. */ |
| 63 | #define Mov (1<<7) | 65 | #define Mov (1<<8) |
| 64 | #define BitOp (1<<8) | 66 | #define BitOp (1<<9) |
| 65 | #define MemAbs (1<<9) /* Memory operand is absolute displacement */ | 67 | #define MemAbs (1<<10) /* Memory operand is absolute displacement */ |
| 66 | #define String (1<<10) /* String instruction (rep capable) */ | 68 | #define String (1<<12) /* String instruction (rep capable) */ |
| 67 | #define Stack (1<<11) /* Stack instruction (push/pop) */ | 69 | #define Stack (1<<13) /* Stack instruction (push/pop) */ |
| 68 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 70 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
| 69 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 71 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ |
| 70 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ | 72 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ |
| @@ -94,7 +96,7 @@ static u16 opcode_table[256] = { | |||
| 94 | /* 0x20 - 0x27 */ | 96 | /* 0x20 - 0x27 */ |
| 95 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 97 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
| 96 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 98 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| 97 | SrcImmByte, SrcImm, 0, 0, | 99 | DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0, |
| 98 | /* 0x28 - 0x2F */ | 100 | /* 0x28 - 0x2F */ |
| 99 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 101 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
| 100 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 102 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| @@ -106,7 +108,8 @@ static u16 opcode_table[256] = { | |||
| 106 | /* 0x38 - 0x3F */ | 108 | /* 0x38 - 0x3F */ |
| 107 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 109 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
| 108 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 110 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| 109 | 0, 0, 0, 0, | 111 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
| 112 | 0, 0, | ||
| 110 | /* 0x40 - 0x47 */ | 113 | /* 0x40 - 0x47 */ |
| 111 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, | 114 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, |
| 112 | /* 0x48 - 0x4F */ | 115 | /* 0x48 - 0x4F */ |
| @@ -153,9 +156,16 @@ static u16 opcode_table[256] = { | |||
| 153 | 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 156 | 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, |
| 154 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 157 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, |
| 155 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 158 | ByteOp | ImplicitOps | String, ImplicitOps | String, |
| 156 | /* 0xB0 - 0xBF */ | 159 | /* 0xB0 - 0xB7 */ |
| 157 | 0, 0, 0, 0, 0, 0, 0, 0, | 160 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
| 158 | DstReg | SrcImm | Mov, 0, 0, 0, 0, 0, 0, 0, | 161 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, |
| 162 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | ||
| 163 | ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, | ||
| 164 | /* 0xB8 - 0xBF */ | ||
| 165 | DstReg | SrcImm | Mov, DstReg | SrcImm | Mov, | ||
| 166 | DstReg | SrcImm | Mov, DstReg | SrcImm | Mov, | ||
| 167 | DstReg | SrcImm | Mov, DstReg | SrcImm | Mov, | ||
| 168 | DstReg | SrcImm | Mov, DstReg | SrcImm | Mov, | ||
| 159 | /* 0xC0 - 0xC7 */ | 169 | /* 0xC0 - 0xC7 */ |
| 160 | ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, | 170 | ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, |
| 161 | 0, ImplicitOps | Stack, 0, 0, | 171 | 0, ImplicitOps | Stack, 0, 0, |
| @@ -169,17 +179,20 @@ static u16 opcode_table[256] = { | |||
| 169 | /* 0xD8 - 0xDF */ | 179 | /* 0xD8 - 0xDF */ |
| 170 | 0, 0, 0, 0, 0, 0, 0, 0, | 180 | 0, 0, 0, 0, 0, 0, 0, 0, |
| 171 | /* 0xE0 - 0xE7 */ | 181 | /* 0xE0 - 0xE7 */ |
| 172 | 0, 0, 0, 0, 0, 0, 0, 0, | 182 | 0, 0, 0, 0, |
| 183 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | ||
| 184 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | ||
| 173 | /* 0xE8 - 0xEF */ | 185 | /* 0xE8 - 0xEF */ |
| 174 | ImplicitOps | Stack, SrcImm | ImplicitOps, | 186 | ImplicitOps | Stack, SrcImm | ImplicitOps, |
| 175 | ImplicitOps, SrcImmByte | ImplicitOps, | 187 | ImplicitOps, SrcImmByte | ImplicitOps, |
| 176 | 0, 0, 0, 0, | 188 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
| 189 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | ||
| 177 | /* 0xF0 - 0xF7 */ | 190 | /* 0xF0 - 0xF7 */ |
| 178 | 0, 0, 0, 0, | 191 | 0, 0, 0, 0, |
| 179 | ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3, | 192 | ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3, |
| 180 | /* 0xF8 - 0xFF */ | 193 | /* 0xF8 - 0xFF */ |
| 181 | ImplicitOps, 0, ImplicitOps, ImplicitOps, | 194 | ImplicitOps, 0, ImplicitOps, ImplicitOps, |
| 182 | 0, 0, Group | Group4, Group | Group5, | 195 | ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, |
| 183 | }; | 196 | }; |
| 184 | 197 | ||
| 185 | static u16 twobyte_table[256] = { | 198 | static u16 twobyte_table[256] = { |
| @@ -268,15 +281,16 @@ static u16 group_table[] = { | |||
| 268 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | 281 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, |
| 269 | 0, 0, 0, 0, | 282 | 0, 0, 0, 0, |
| 270 | [Group3*8] = | 283 | [Group3*8] = |
| 271 | DstMem | SrcImm | ModRM | SrcImm, 0, | 284 | DstMem | SrcImm | ModRM, 0, |
| 272 | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | 285 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, |
| 273 | 0, 0, 0, 0, | 286 | 0, 0, 0, 0, |
| 274 | [Group4*8] = | 287 | [Group4*8] = |
| 275 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, | 288 | ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM, |
| 276 | 0, 0, 0, 0, 0, 0, | 289 | 0, 0, 0, 0, 0, 0, |
| 277 | [Group5*8] = | 290 | [Group5*8] = |
| 278 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 0, 0, | 291 | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, |
| 279 | SrcMem | ModRM, 0, SrcMem | ModRM | Stack, 0, | 292 | SrcMem | ModRM | Stack, 0, |
| 293 | SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, | ||
| 280 | [Group7*8] = | 294 | [Group7*8] = |
| 281 | 0, 0, ModRM | SrcMem, ModRM | SrcMem, | 295 | 0, 0, ModRM | SrcMem, ModRM | SrcMem, |
| 282 | SrcNone | ModRM | DstMem | Mov, 0, | 296 | SrcNone | ModRM | DstMem | Mov, 0, |
| @@ -839,7 +853,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 839 | /* Shadow copy of register state. Committed on successful emulation. */ | 853 | /* Shadow copy of register state. Committed on successful emulation. */ |
| 840 | 854 | ||
| 841 | memset(c, 0, sizeof(struct decode_cache)); | 855 | memset(c, 0, sizeof(struct decode_cache)); |
| 842 | c->eip = ctxt->vcpu->arch.rip; | 856 | c->eip = kvm_rip_read(ctxt->vcpu); |
| 843 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 857 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); |
| 844 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 858 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
| 845 | 859 | ||
| @@ -1048,6 +1062,23 @@ done_prefixes: | |||
| 1048 | } | 1062 | } |
| 1049 | c->dst.type = OP_MEM; | 1063 | c->dst.type = OP_MEM; |
| 1050 | break; | 1064 | break; |
| 1065 | case DstAcc: | ||
| 1066 | c->dst.type = OP_REG; | ||
| 1067 | c->dst.bytes = c->op_bytes; | ||
| 1068 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; | ||
| 1069 | switch (c->op_bytes) { | ||
| 1070 | case 1: | ||
| 1071 | c->dst.val = *(u8 *)c->dst.ptr; | ||
| 1072 | break; | ||
| 1073 | case 2: | ||
| 1074 | c->dst.val = *(u16 *)c->dst.ptr; | ||
| 1075 | break; | ||
| 1076 | case 4: | ||
| 1077 | c->dst.val = *(u32 *)c->dst.ptr; | ||
| 1078 | break; | ||
| 1079 | } | ||
| 1080 | c->dst.orig_val = c->dst.val; | ||
| 1081 | break; | ||
| 1051 | } | 1082 | } |
| 1052 | 1083 | ||
| 1053 | if (c->rip_relative) | 1084 | if (c->rip_relative) |
| @@ -1151,6 +1182,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, | |||
| 1151 | case 1: /* dec */ | 1182 | case 1: /* dec */ |
| 1152 | emulate_1op("dec", c->dst, ctxt->eflags); | 1183 | emulate_1op("dec", c->dst, ctxt->eflags); |
| 1153 | break; | 1184 | break; |
| 1185 | case 2: /* call near abs */ { | ||
| 1186 | long int old_eip; | ||
| 1187 | old_eip = c->eip; | ||
| 1188 | c->eip = c->src.val; | ||
| 1189 | c->src.val = old_eip; | ||
| 1190 | emulate_push(ctxt); | ||
| 1191 | break; | ||
| 1192 | } | ||
| 1154 | case 4: /* jmp abs */ | 1193 | case 4: /* jmp abs */ |
| 1155 | c->eip = c->src.val; | 1194 | c->eip = c->src.val; |
| 1156 | break; | 1195 | break; |
| @@ -1251,6 +1290,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 1251 | u64 msr_data; | 1290 | u64 msr_data; |
| 1252 | unsigned long saved_eip = 0; | 1291 | unsigned long saved_eip = 0; |
| 1253 | struct decode_cache *c = &ctxt->decode; | 1292 | struct decode_cache *c = &ctxt->decode; |
| 1293 | unsigned int port; | ||
| 1294 | int io_dir_in; | ||
| 1254 | int rc = 0; | 1295 | int rc = 0; |
| 1255 | 1296 | ||
| 1256 | /* Shadow copy of register state. Committed on successful emulation. | 1297 | /* Shadow copy of register state. Committed on successful emulation. |
| @@ -1267,7 +1308,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 1267 | if (c->rep_prefix && (c->d & String)) { | 1308 | if (c->rep_prefix && (c->d & String)) { |
| 1268 | /* All REP prefixes have the same first termination condition */ | 1309 | /* All REP prefixes have the same first termination condition */ |
| 1269 | if (c->regs[VCPU_REGS_RCX] == 0) { | 1310 | if (c->regs[VCPU_REGS_RCX] == 0) { |
| 1270 | ctxt->vcpu->arch.rip = c->eip; | 1311 | kvm_rip_write(ctxt->vcpu, c->eip); |
| 1271 | goto done; | 1312 | goto done; |
| 1272 | } | 1313 | } |
| 1273 | /* The second termination condition only applies for REPE | 1314 | /* The second termination condition only applies for REPE |
| @@ -1281,17 +1322,17 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 1281 | (c->b == 0xae) || (c->b == 0xaf)) { | 1322 | (c->b == 0xae) || (c->b == 0xaf)) { |
| 1282 | if ((c->rep_prefix == REPE_PREFIX) && | 1323 | if ((c->rep_prefix == REPE_PREFIX) && |
| 1283 | ((ctxt->eflags & EFLG_ZF) == 0)) { | 1324 | ((ctxt->eflags & EFLG_ZF) == 0)) { |
| 1284 | ctxt->vcpu->arch.rip = c->eip; | 1325 | kvm_rip_write(ctxt->vcpu, c->eip); |
| 1285 | goto done; | 1326 | goto done; |
| 1286 | } | 1327 | } |
| 1287 | if ((c->rep_prefix == REPNE_PREFIX) && | 1328 | if ((c->rep_prefix == REPNE_PREFIX) && |
| 1288 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { | 1329 | ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { |
| 1289 | ctxt->vcpu->arch.rip = c->eip; | 1330 | kvm_rip_write(ctxt->vcpu, c->eip); |
| 1290 | goto done; | 1331 | goto done; |
| 1291 | } | 1332 | } |
| 1292 | } | 1333 | } |
| 1293 | c->regs[VCPU_REGS_RCX]--; | 1334 | c->regs[VCPU_REGS_RCX]--; |
| 1294 | c->eip = ctxt->vcpu->arch.rip; | 1335 | c->eip = kvm_rip_read(ctxt->vcpu); |
| 1295 | } | 1336 | } |
| 1296 | 1337 | ||
| 1297 | if (c->src.type == OP_MEM) { | 1338 | if (c->src.type == OP_MEM) { |
| @@ -1351,27 +1392,10 @@ special_insn: | |||
| 1351 | sbb: /* sbb */ | 1392 | sbb: /* sbb */ |
| 1352 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); | 1393 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); |
| 1353 | break; | 1394 | break; |
| 1354 | case 0x20 ... 0x23: | 1395 | case 0x20 ... 0x25: |
| 1355 | and: /* and */ | 1396 | and: /* and */ |
| 1356 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); | 1397 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); |
| 1357 | break; | 1398 | break; |
| 1358 | case 0x24: /* and al imm8 */ | ||
| 1359 | c->dst.type = OP_REG; | ||
| 1360 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; | ||
| 1361 | c->dst.val = *(u8 *)c->dst.ptr; | ||
| 1362 | c->dst.bytes = 1; | ||
| 1363 | c->dst.orig_val = c->dst.val; | ||
| 1364 | goto and; | ||
| 1365 | case 0x25: /* and ax imm16, or eax imm32 */ | ||
| 1366 | c->dst.type = OP_REG; | ||
| 1367 | c->dst.bytes = c->op_bytes; | ||
| 1368 | c->dst.ptr = &c->regs[VCPU_REGS_RAX]; | ||
| 1369 | if (c->op_bytes == 2) | ||
| 1370 | c->dst.val = *(u16 *)c->dst.ptr; | ||
| 1371 | else | ||
| 1372 | c->dst.val = *(u32 *)c->dst.ptr; | ||
| 1373 | c->dst.orig_val = c->dst.val; | ||
| 1374 | goto and; | ||
| 1375 | case 0x28 ... 0x2d: | 1399 | case 0x28 ... 0x2d: |
| 1376 | sub: /* sub */ | 1400 | sub: /* sub */ |
| 1377 | emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); | 1401 | emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags); |
| @@ -1659,7 +1683,7 @@ special_insn: | |||
| 1659 | case 0xae ... 0xaf: /* scas */ | 1683 | case 0xae ... 0xaf: /* scas */ |
| 1660 | DPRINTF("Urk! I don't handle SCAS.\n"); | 1684 | DPRINTF("Urk! I don't handle SCAS.\n"); |
| 1661 | goto cannot_emulate; | 1685 | goto cannot_emulate; |
| 1662 | case 0xb8: /* mov r, imm */ | 1686 | case 0xb0 ... 0xbf: /* mov r, imm */ |
| 1663 | goto mov; | 1687 | goto mov; |
| 1664 | case 0xc0 ... 0xc1: | 1688 | case 0xc0 ... 0xc1: |
| 1665 | emulate_grp2(ctxt); | 1689 | emulate_grp2(ctxt); |
| @@ -1679,6 +1703,16 @@ special_insn: | |||
| 1679 | c->src.val = c->regs[VCPU_REGS_RCX]; | 1703 | c->src.val = c->regs[VCPU_REGS_RCX]; |
| 1680 | emulate_grp2(ctxt); | 1704 | emulate_grp2(ctxt); |
| 1681 | break; | 1705 | break; |
| 1706 | case 0xe4: /* inb */ | ||
| 1707 | case 0xe5: /* in */ | ||
| 1708 | port = insn_fetch(u8, 1, c->eip); | ||
| 1709 | io_dir_in = 1; | ||
| 1710 | goto do_io; | ||
| 1711 | case 0xe6: /* outb */ | ||
| 1712 | case 0xe7: /* out */ | ||
| 1713 | port = insn_fetch(u8, 1, c->eip); | ||
| 1714 | io_dir_in = 0; | ||
| 1715 | goto do_io; | ||
| 1682 | case 0xe8: /* call (near) */ { | 1716 | case 0xe8: /* call (near) */ { |
| 1683 | long int rel; | 1717 | long int rel; |
| 1684 | switch (c->op_bytes) { | 1718 | switch (c->op_bytes) { |
| @@ -1729,6 +1763,22 @@ special_insn: | |||
| 1729 | jmp_rel(c, c->src.val); | 1763 | jmp_rel(c, c->src.val); |
| 1730 | c->dst.type = OP_NONE; /* Disable writeback. */ | 1764 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 1731 | break; | 1765 | break; |
| 1766 | case 0xec: /* in al,dx */ | ||
| 1767 | case 0xed: /* in (e/r)ax,dx */ | ||
| 1768 | port = c->regs[VCPU_REGS_RDX]; | ||
| 1769 | io_dir_in = 1; | ||
| 1770 | goto do_io; | ||
| 1771 | case 0xee: /* out al,dx */ | ||
| 1772 | case 0xef: /* out (e/r)ax,dx */ | ||
| 1773 | port = c->regs[VCPU_REGS_RDX]; | ||
| 1774 | io_dir_in = 0; | ||
| 1775 | do_io: if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in, | ||
| 1776 | (c->d & ByteOp) ? 1 : c->op_bytes, | ||
| 1777 | port) != 0) { | ||
| 1778 | c->eip = saved_eip; | ||
| 1779 | goto cannot_emulate; | ||
| 1780 | } | ||
| 1781 | return 0; | ||
| 1732 | case 0xf4: /* hlt */ | 1782 | case 0xf4: /* hlt */ |
| 1733 | ctxt->vcpu->arch.halt_request = 1; | 1783 | ctxt->vcpu->arch.halt_request = 1; |
| 1734 | break; | 1784 | break; |
| @@ -1754,6 +1804,14 @@ special_insn: | |||
| 1754 | ctxt->eflags |= X86_EFLAGS_IF; | 1804 | ctxt->eflags |= X86_EFLAGS_IF; |
| 1755 | c->dst.type = OP_NONE; /* Disable writeback. */ | 1805 | c->dst.type = OP_NONE; /* Disable writeback. */ |
| 1756 | break; | 1806 | break; |
| 1807 | case 0xfc: /* cld */ | ||
| 1808 | ctxt->eflags &= ~EFLG_DF; | ||
| 1809 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
| 1810 | break; | ||
| 1811 | case 0xfd: /* std */ | ||
| 1812 | ctxt->eflags |= EFLG_DF; | ||
| 1813 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
| 1814 | break; | ||
| 1757 | case 0xfe ... 0xff: /* Grp4/Grp5 */ | 1815 | case 0xfe ... 0xff: /* Grp4/Grp5 */ |
| 1758 | rc = emulate_grp45(ctxt, ops); | 1816 | rc = emulate_grp45(ctxt, ops); |
| 1759 | if (rc != 0) | 1817 | if (rc != 0) |
| @@ -1768,7 +1826,7 @@ writeback: | |||
| 1768 | 1826 | ||
| 1769 | /* Commit shadow register state. */ | 1827 | /* Commit shadow register state. */ |
| 1770 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); | 1828 | memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); |
| 1771 | ctxt->vcpu->arch.rip = c->eip; | 1829 | kvm_rip_write(ctxt->vcpu, c->eip); |
| 1772 | 1830 | ||
| 1773 | done: | 1831 | done: |
| 1774 | if (rc == X86EMUL_UNHANDLEABLE) { | 1832 | if (rc == X86EMUL_UNHANDLEABLE) { |
| @@ -1793,7 +1851,7 @@ twobyte_insn: | |||
| 1793 | goto done; | 1851 | goto done; |
| 1794 | 1852 | ||
| 1795 | /* Let the processor re-execute the fixed hypercall */ | 1853 | /* Let the processor re-execute the fixed hypercall */ |
| 1796 | c->eip = ctxt->vcpu->arch.rip; | 1854 | c->eip = kvm_rip_read(ctxt->vcpu); |
| 1797 | /* Disable writeback. */ | 1855 | /* Disable writeback. */ |
| 1798 | c->dst.type = OP_NONE; | 1856 | c->dst.type = OP_NONE; |
| 1799 | break; | 1857 | break; |
| @@ -1889,7 +1947,7 @@ twobyte_insn: | |||
| 1889 | rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); | 1947 | rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); |
| 1890 | if (rc) { | 1948 | if (rc) { |
| 1891 | kvm_inject_gp(ctxt->vcpu, 0); | 1949 | kvm_inject_gp(ctxt->vcpu, 0); |
| 1892 | c->eip = ctxt->vcpu->arch.rip; | 1950 | c->eip = kvm_rip_read(ctxt->vcpu); |
| 1893 | } | 1951 | } |
| 1894 | rc = X86EMUL_CONTINUE; | 1952 | rc = X86EMUL_CONTINUE; |
| 1895 | c->dst.type = OP_NONE; | 1953 | c->dst.type = OP_NONE; |
| @@ -1899,7 +1957,7 @@ twobyte_insn: | |||
| 1899 | rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); | 1957 | rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); |
| 1900 | if (rc) { | 1958 | if (rc) { |
| 1901 | kvm_inject_gp(ctxt->vcpu, 0); | 1959 | kvm_inject_gp(ctxt->vcpu, 0); |
| 1902 | c->eip = ctxt->vcpu->arch.rip; | 1960 | c->eip = kvm_rip_read(ctxt->vcpu); |
| 1903 | } else { | 1961 | } else { |
| 1904 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; | 1962 | c->regs[VCPU_REGS_RAX] = (u32)msr_data; |
| 1905 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; | 1963 | c->regs[VCPU_REGS_RDX] = msr_data >> 32; |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 004ba86326ae..c9f7cda48ed7 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
| @@ -198,17 +198,10 @@ unsigned long long xen_sched_clock(void) | |||
| 198 | /* Get the TSC speed from Xen */ | 198 | /* Get the TSC speed from Xen */ |
| 199 | unsigned long xen_tsc_khz(void) | 199 | unsigned long xen_tsc_khz(void) |
| 200 | { | 200 | { |
| 201 | u64 xen_khz = 1000000ULL << 32; | 201 | struct pvclock_vcpu_time_info *info = |
| 202 | const struct pvclock_vcpu_time_info *info = | ||
| 203 | &HYPERVISOR_shared_info->vcpu_info[0].time; | 202 | &HYPERVISOR_shared_info->vcpu_info[0].time; |
| 204 | 203 | ||
| 205 | do_div(xen_khz, info->tsc_to_system_mul); | 204 | return pvclock_tsc_khz(info); |
| 206 | if (info->tsc_shift < 0) | ||
| 207 | xen_khz <<= -info->tsc_shift; | ||
| 208 | else | ||
| 209 | xen_khz >>= info->tsc_shift; | ||
| 210 | |||
| 211 | return xen_khz; | ||
| 212 | } | 205 | } |
| 213 | 206 | ||
| 214 | cycle_t xen_clocksource_read(void) | 207 | cycle_t xen_clocksource_read(void) |
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index bd2c01674f5e..e842e756308a 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c | |||
| @@ -28,9 +28,9 @@ | |||
| 28 | 28 | ||
| 29 | #include <linux/pci.h> | 29 | #include <linux/pci.h> |
| 30 | #include <linux/dmar.h> | 30 | #include <linux/dmar.h> |
| 31 | #include <linux/iova.h> | ||
| 32 | #include <linux/intel-iommu.h> | ||
| 31 | #include <linux/timer.h> | 33 | #include <linux/timer.h> |
| 32 | #include "iova.h" | ||
| 33 | #include "intel-iommu.h" | ||
| 34 | 34 | ||
| 35 | #undef PREFIX | 35 | #undef PREFIX |
| 36 | #define PREFIX "DMAR:" | 36 | #define PREFIX "DMAR:" |
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 389fdd6f4a9f..fc5f2dbf5323 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c | |||
| @@ -33,8 +33,8 @@ | |||
| 33 | #include <linux/dma-mapping.h> | 33 | #include <linux/dma-mapping.h> |
| 34 | #include <linux/mempool.h> | 34 | #include <linux/mempool.h> |
| 35 | #include <linux/timer.h> | 35 | #include <linux/timer.h> |
| 36 | #include "iova.h" | 36 | #include <linux/iova.h> |
| 37 | #include "intel-iommu.h" | 37 | #include <linux/intel-iommu.h> |
| 38 | #include <asm/proto.h> /* force_iommu in this header in x86-64*/ | 38 | #include <asm/proto.h> /* force_iommu in this header in x86-64*/ |
| 39 | #include <asm/cacheflush.h> | 39 | #include <asm/cacheflush.h> |
| 40 | #include <asm/iommu.h> | 40 | #include <asm/iommu.h> |
| @@ -156,7 +156,7 @@ static inline void *alloc_domain_mem(void) | |||
| 156 | return iommu_kmem_cache_alloc(iommu_domain_cache); | 156 | return iommu_kmem_cache_alloc(iommu_domain_cache); |
| 157 | } | 157 | } |
| 158 | 158 | ||
| 159 | static inline void free_domain_mem(void *vaddr) | 159 | static void free_domain_mem(void *vaddr) |
| 160 | { | 160 | { |
| 161 | kmem_cache_free(iommu_domain_cache, vaddr); | 161 | kmem_cache_free(iommu_domain_cache, vaddr); |
| 162 | } | 162 | } |
| @@ -1341,7 +1341,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain) | |||
| 1341 | * find_domain | 1341 | * find_domain |
| 1342 | * Note: we use struct pci_dev->dev.archdata.iommu stores the info | 1342 | * Note: we use struct pci_dev->dev.archdata.iommu stores the info |
| 1343 | */ | 1343 | */ |
| 1344 | struct dmar_domain * | 1344 | static struct dmar_domain * |
| 1345 | find_domain(struct pci_dev *pdev) | 1345 | find_domain(struct pci_dev *pdev) |
| 1346 | { | 1346 | { |
| 1347 | struct device_domain_info *info; | 1347 | struct device_domain_info *info; |
| @@ -2318,3 +2318,111 @@ int __init intel_iommu_init(void) | |||
| 2318 | return 0; | 2318 | return 0; |
| 2319 | } | 2319 | } |
| 2320 | 2320 | ||
| 2321 | void intel_iommu_domain_exit(struct dmar_domain *domain) | ||
| 2322 | { | ||
| 2323 | u64 end; | ||
| 2324 | |||
| 2325 | /* Domain 0 is reserved, so dont process it */ | ||
| 2326 | if (!domain) | ||
| 2327 | return; | ||
| 2328 | |||
| 2329 | end = DOMAIN_MAX_ADDR(domain->gaw); | ||
| 2330 | end = end & (~PAGE_MASK_4K); | ||
| 2331 | |||
| 2332 | /* clear ptes */ | ||
| 2333 | dma_pte_clear_range(domain, 0, end); | ||
| 2334 | |||
| 2335 | /* free page tables */ | ||
| 2336 | dma_pte_free_pagetable(domain, 0, end); | ||
| 2337 | |||
| 2338 | iommu_free_domain(domain); | ||
| 2339 | free_domain_mem(domain); | ||
| 2340 | } | ||
| 2341 | EXPORT_SYMBOL_GPL(intel_iommu_domain_exit); | ||
| 2342 | |||
| 2343 | struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev) | ||
| 2344 | { | ||
| 2345 | struct dmar_drhd_unit *drhd; | ||
| 2346 | struct dmar_domain *domain; | ||
| 2347 | struct intel_iommu *iommu; | ||
| 2348 | |||
| 2349 | drhd = dmar_find_matched_drhd_unit(pdev); | ||
| 2350 | if (!drhd) { | ||
| 2351 | printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n"); | ||
| 2352 | return NULL; | ||
| 2353 | } | ||
| 2354 | |||
| 2355 | iommu = drhd->iommu; | ||
| 2356 | if (!iommu) { | ||
| 2357 | printk(KERN_ERR | ||
| 2358 | "intel_iommu_domain_alloc: iommu == NULL\n"); | ||
| 2359 | return NULL; | ||
| 2360 | } | ||
| 2361 | domain = iommu_alloc_domain(iommu); | ||
| 2362 | if (!domain) { | ||
| 2363 | printk(KERN_ERR | ||
| 2364 | "intel_iommu_domain_alloc: domain == NULL\n"); | ||
| 2365 | return NULL; | ||
| 2366 | } | ||
| 2367 | if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { | ||
| 2368 | printk(KERN_ERR | ||
| 2369 | "intel_iommu_domain_alloc: domain_init() failed\n"); | ||
| 2370 | intel_iommu_domain_exit(domain); | ||
| 2371 | return NULL; | ||
| 2372 | } | ||
| 2373 | return domain; | ||
| 2374 | } | ||
| 2375 | EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc); | ||
| 2376 | |||
| 2377 | int intel_iommu_context_mapping( | ||
| 2378 | struct dmar_domain *domain, struct pci_dev *pdev) | ||
| 2379 | { | ||
| 2380 | int rc; | ||
| 2381 | rc = domain_context_mapping(domain, pdev); | ||
| 2382 | return rc; | ||
| 2383 | } | ||
| 2384 | EXPORT_SYMBOL_GPL(intel_iommu_context_mapping); | ||
| 2385 | |||
| 2386 | int intel_iommu_page_mapping( | ||
| 2387 | struct dmar_domain *domain, dma_addr_t iova, | ||
| 2388 | u64 hpa, size_t size, int prot) | ||
| 2389 | { | ||
| 2390 | int rc; | ||
| 2391 | rc = domain_page_mapping(domain, iova, hpa, size, prot); | ||
| 2392 | return rc; | ||
| 2393 | } | ||
| 2394 | EXPORT_SYMBOL_GPL(intel_iommu_page_mapping); | ||
| 2395 | |||
| 2396 | void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn) | ||
| 2397 | { | ||
| 2398 | detach_domain_for_dev(domain, bus, devfn); | ||
| 2399 | } | ||
| 2400 | EXPORT_SYMBOL_GPL(intel_iommu_detach_dev); | ||
| 2401 | |||
| 2402 | struct dmar_domain * | ||
| 2403 | intel_iommu_find_domain(struct pci_dev *pdev) | ||
| 2404 | { | ||
| 2405 | return find_domain(pdev); | ||
| 2406 | } | ||
| 2407 | EXPORT_SYMBOL_GPL(intel_iommu_find_domain); | ||
| 2408 | |||
| 2409 | int intel_iommu_found(void) | ||
| 2410 | { | ||
| 2411 | return g_num_of_iommus; | ||
| 2412 | } | ||
| 2413 | EXPORT_SYMBOL_GPL(intel_iommu_found); | ||
| 2414 | |||
| 2415 | u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova) | ||
| 2416 | { | ||
| 2417 | struct dma_pte *pte; | ||
| 2418 | u64 pfn; | ||
| 2419 | |||
| 2420 | pfn = 0; | ||
| 2421 | pte = addr_to_dma_pte(domain, iova); | ||
| 2422 | |||
| 2423 | if (pte) | ||
| 2424 | pfn = dma_pte_addr(*pte); | ||
| 2425 | |||
| 2426 | return pfn >> PAGE_SHIFT_4K; | ||
| 2427 | } | ||
| 2428 | EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn); | ||
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index bb642cc5e18c..738d4c89581c 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | #include <linux/pci.h> | 4 | #include <linux/pci.h> |
| 5 | #include <linux/irq.h> | 5 | #include <linux/irq.h> |
| 6 | #include <asm/io_apic.h> | 6 | #include <asm/io_apic.h> |
| 7 | #include "intel-iommu.h" | 7 | #include <linux/intel-iommu.h> |
| 8 | #include "intr_remapping.h" | 8 | #include "intr_remapping.h" |
| 9 | 9 | ||
| 10 | static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; | 10 | static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; |
diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h index 05f2635bbe4e..ca48f0df8ac9 100644 --- a/drivers/pci/intr_remapping.h +++ b/drivers/pci/intr_remapping.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #include "intel-iommu.h" | 1 | #include <linux/intel-iommu.h> |
| 2 | 2 | ||
| 3 | struct ioapic_scope { | 3 | struct ioapic_scope { |
| 4 | struct intel_iommu *iommu; | 4 | struct intel_iommu *iommu; |
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 3ef4ac064315..2287116e9822 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> | 7 | * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #include "iova.h" | 10 | #include <linux/iova.h> |
| 11 | 11 | ||
| 12 | void | 12 | void |
| 13 | init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit) | 13 | init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit) |
diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h index 78e954db1e7f..ba0dd791fadf 100644 --- a/include/asm-x86/kvm.h +++ b/include/asm-x86/kvm.h | |||
| @@ -208,26 +208,4 @@ struct kvm_pit_channel_state { | |||
| 208 | struct kvm_pit_state { | 208 | struct kvm_pit_state { |
| 209 | struct kvm_pit_channel_state channels[3]; | 209 | struct kvm_pit_channel_state channels[3]; |
| 210 | }; | 210 | }; |
| 211 | |||
| 212 | #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) | ||
| 213 | #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) | ||
| 214 | #define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) | ||
| 215 | #define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) | ||
| 216 | #define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) | ||
| 217 | #define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) | ||
| 218 | #define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) | ||
| 219 | #define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) | ||
| 220 | #define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) | ||
| 221 | #define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) | ||
| 222 | #define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) | ||
| 223 | #define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) | ||
| 224 | #define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) | ||
| 225 | #define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) | ||
| 226 | #define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) | ||
| 227 | #define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) | ||
| 228 | #define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) | ||
| 229 | #define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) | ||
| 230 | #define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) | ||
| 231 | #define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) | ||
| 232 | |||
| 233 | #endif /* ASM_X86__KVM_H */ | 211 | #endif /* ASM_X86__KVM_H */ |
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 69794547f514..411fb8cfb24e 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h | |||
| @@ -57,6 +57,10 @@ | |||
| 57 | #define KVM_PAGES_PER_HPAGE (KVM_HPAGE_SIZE / PAGE_SIZE) | 57 | #define KVM_PAGES_PER_HPAGE (KVM_HPAGE_SIZE / PAGE_SIZE) |
| 58 | 58 | ||
| 59 | #define DE_VECTOR 0 | 59 | #define DE_VECTOR 0 |
| 60 | #define DB_VECTOR 1 | ||
| 61 | #define BP_VECTOR 3 | ||
| 62 | #define OF_VECTOR 4 | ||
| 63 | #define BR_VECTOR 5 | ||
| 60 | #define UD_VECTOR 6 | 64 | #define UD_VECTOR 6 |
| 61 | #define NM_VECTOR 7 | 65 | #define NM_VECTOR 7 |
| 62 | #define DF_VECTOR 8 | 66 | #define DF_VECTOR 8 |
| @@ -65,6 +69,7 @@ | |||
| 65 | #define SS_VECTOR 12 | 69 | #define SS_VECTOR 12 |
| 66 | #define GP_VECTOR 13 | 70 | #define GP_VECTOR 13 |
| 67 | #define PF_VECTOR 14 | 71 | #define PF_VECTOR 14 |
| 72 | #define MF_VECTOR 16 | ||
| 68 | #define MC_VECTOR 18 | 73 | #define MC_VECTOR 18 |
| 69 | 74 | ||
| 70 | #define SELECTOR_TI_MASK (1 << 2) | 75 | #define SELECTOR_TI_MASK (1 << 2) |
| @@ -89,7 +94,7 @@ extern struct list_head vm_list; | |||
| 89 | struct kvm_vcpu; | 94 | struct kvm_vcpu; |
| 90 | struct kvm; | 95 | struct kvm; |
| 91 | 96 | ||
| 92 | enum { | 97 | enum kvm_reg { |
| 93 | VCPU_REGS_RAX = 0, | 98 | VCPU_REGS_RAX = 0, |
| 94 | VCPU_REGS_RCX = 1, | 99 | VCPU_REGS_RCX = 1, |
| 95 | VCPU_REGS_RDX = 2, | 100 | VCPU_REGS_RDX = 2, |
| @@ -108,6 +113,7 @@ enum { | |||
| 108 | VCPU_REGS_R14 = 14, | 113 | VCPU_REGS_R14 = 14, |
| 109 | VCPU_REGS_R15 = 15, | 114 | VCPU_REGS_R15 = 15, |
| 110 | #endif | 115 | #endif |
| 116 | VCPU_REGS_RIP, | ||
| 111 | NR_VCPU_REGS | 117 | NR_VCPU_REGS |
| 112 | }; | 118 | }; |
| 113 | 119 | ||
| @@ -189,10 +195,20 @@ struct kvm_mmu_page { | |||
| 189 | */ | 195 | */ |
| 190 | int multimapped; /* More than one parent_pte? */ | 196 | int multimapped; /* More than one parent_pte? */ |
| 191 | int root_count; /* Currently serving as active root */ | 197 | int root_count; /* Currently serving as active root */ |
| 198 | bool unsync; | ||
| 199 | bool unsync_children; | ||
| 192 | union { | 200 | union { |
| 193 | u64 *parent_pte; /* !multimapped */ | 201 | u64 *parent_pte; /* !multimapped */ |
| 194 | struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ | 202 | struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ |
| 195 | }; | 203 | }; |
| 204 | DECLARE_BITMAP(unsync_child_bitmap, 512); | ||
| 205 | }; | ||
| 206 | |||
| 207 | struct kvm_pv_mmu_op_buffer { | ||
| 208 | void *ptr; | ||
| 209 | unsigned len; | ||
| 210 | unsigned processed; | ||
| 211 | char buf[512] __aligned(sizeof(long)); | ||
| 196 | }; | 212 | }; |
| 197 | 213 | ||
| 198 | /* | 214 | /* |
| @@ -207,6 +223,9 @@ struct kvm_mmu { | |||
| 207 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); | 223 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); |
| 208 | void (*prefetch_page)(struct kvm_vcpu *vcpu, | 224 | void (*prefetch_page)(struct kvm_vcpu *vcpu, |
| 209 | struct kvm_mmu_page *page); | 225 | struct kvm_mmu_page *page); |
| 226 | int (*sync_page)(struct kvm_vcpu *vcpu, | ||
| 227 | struct kvm_mmu_page *sp); | ||
| 228 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); | ||
| 210 | hpa_t root_hpa; | 229 | hpa_t root_hpa; |
| 211 | int root_level; | 230 | int root_level; |
| 212 | int shadow_root_level; | 231 | int shadow_root_level; |
| @@ -219,8 +238,13 @@ struct kvm_vcpu_arch { | |||
| 219 | int interrupt_window_open; | 238 | int interrupt_window_open; |
| 220 | unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ | 239 | unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ |
| 221 | DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); | 240 | DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); |
| 222 | unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */ | 241 | /* |
| 223 | unsigned long rip; /* needs vcpu_load_rsp_rip() */ | 242 | * rip and regs accesses must go through |
| 243 | * kvm_{register,rip}_{read,write} functions. | ||
| 244 | */ | ||
| 245 | unsigned long regs[NR_VCPU_REGS]; | ||
| 246 | u32 regs_avail; | ||
| 247 | u32 regs_dirty; | ||
| 224 | 248 | ||
| 225 | unsigned long cr0; | 249 | unsigned long cr0; |
| 226 | unsigned long cr2; | 250 | unsigned long cr2; |
| @@ -237,6 +261,9 @@ struct kvm_vcpu_arch { | |||
| 237 | bool tpr_access_reporting; | 261 | bool tpr_access_reporting; |
| 238 | 262 | ||
| 239 | struct kvm_mmu mmu; | 263 | struct kvm_mmu mmu; |
| 264 | /* only needed in kvm_pv_mmu_op() path, but it's hot so | ||
| 265 | * put it here to avoid allocation */ | ||
| 266 | struct kvm_pv_mmu_op_buffer mmu_op_buffer; | ||
| 240 | 267 | ||
| 241 | struct kvm_mmu_memory_cache mmu_pte_chain_cache; | 268 | struct kvm_mmu_memory_cache mmu_pte_chain_cache; |
| 242 | struct kvm_mmu_memory_cache mmu_rmap_desc_cache; | 269 | struct kvm_mmu_memory_cache mmu_rmap_desc_cache; |
| @@ -269,6 +296,11 @@ struct kvm_vcpu_arch { | |||
| 269 | u32 error_code; | 296 | u32 error_code; |
| 270 | } exception; | 297 | } exception; |
| 271 | 298 | ||
| 299 | struct kvm_queued_interrupt { | ||
| 300 | bool pending; | ||
| 301 | u8 nr; | ||
| 302 | } interrupt; | ||
| 303 | |||
| 272 | struct { | 304 | struct { |
| 273 | int active; | 305 | int active; |
| 274 | u8 save_iopl; | 306 | u8 save_iopl; |
| @@ -294,6 +326,7 @@ struct kvm_vcpu_arch { | |||
| 294 | struct page *time_page; | 326 | struct page *time_page; |
| 295 | 327 | ||
| 296 | bool nmi_pending; | 328 | bool nmi_pending; |
| 329 | bool nmi_injected; | ||
| 297 | 330 | ||
| 298 | u64 mtrr[0x100]; | 331 | u64 mtrr[0x100]; |
| 299 | }; | 332 | }; |
| @@ -316,9 +349,12 @@ struct kvm_arch{ | |||
| 316 | * Hash table of struct kvm_mmu_page. | 349 | * Hash table of struct kvm_mmu_page. |
| 317 | */ | 350 | */ |
| 318 | struct list_head active_mmu_pages; | 351 | struct list_head active_mmu_pages; |
| 352 | struct list_head assigned_dev_head; | ||
| 353 | struct dmar_domain *intel_iommu_domain; | ||
| 319 | struct kvm_pic *vpic; | 354 | struct kvm_pic *vpic; |
| 320 | struct kvm_ioapic *vioapic; | 355 | struct kvm_ioapic *vioapic; |
| 321 | struct kvm_pit *vpit; | 356 | struct kvm_pit *vpit; |
| 357 | struct hlist_head irq_ack_notifier_list; | ||
| 322 | 358 | ||
| 323 | int round_robin_prev_vcpu; | 359 | int round_robin_prev_vcpu; |
| 324 | unsigned int tss_addr; | 360 | unsigned int tss_addr; |
| @@ -338,6 +374,7 @@ struct kvm_vm_stat { | |||
| 338 | u32 mmu_flooded; | 374 | u32 mmu_flooded; |
| 339 | u32 mmu_recycled; | 375 | u32 mmu_recycled; |
| 340 | u32 mmu_cache_miss; | 376 | u32 mmu_cache_miss; |
| 377 | u32 mmu_unsync; | ||
| 341 | u32 remote_tlb_flush; | 378 | u32 remote_tlb_flush; |
| 342 | u32 lpages; | 379 | u32 lpages; |
| 343 | }; | 380 | }; |
| @@ -364,6 +401,7 @@ struct kvm_vcpu_stat { | |||
| 364 | u32 insn_emulation; | 401 | u32 insn_emulation; |
| 365 | u32 insn_emulation_fail; | 402 | u32 insn_emulation_fail; |
| 366 | u32 hypercalls; | 403 | u32 hypercalls; |
| 404 | u32 irq_injections; | ||
| 367 | }; | 405 | }; |
| 368 | 406 | ||
| 369 | struct descriptor_table { | 407 | struct descriptor_table { |
| @@ -414,8 +452,7 @@ struct kvm_x86_ops { | |||
| 414 | unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr); | 452 | unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr); |
| 415 | void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value, | 453 | void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value, |
| 416 | int *exception); | 454 | int *exception); |
| 417 | void (*cache_regs)(struct kvm_vcpu *vcpu); | 455 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
| 418 | void (*decache_regs)(struct kvm_vcpu *vcpu); | ||
| 419 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 456 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
| 420 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 457 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); |
| 421 | 458 | ||
| @@ -528,6 +565,8 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | |||
| 528 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, | 565 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, |
| 529 | u32 error_code); | 566 | u32 error_code); |
| 530 | 567 | ||
| 568 | void kvm_pic_set_irq(void *opaque, int irq, int level); | ||
| 569 | |||
| 531 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); | 570 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); |
| 532 | 571 | ||
| 533 | void fx_init(struct kvm_vcpu *vcpu); | 572 | void fx_init(struct kvm_vcpu *vcpu); |
| @@ -550,12 +589,14 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); | |||
| 550 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | 589 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); |
| 551 | int kvm_mmu_load(struct kvm_vcpu *vcpu); | 590 | int kvm_mmu_load(struct kvm_vcpu *vcpu); |
| 552 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); | 591 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); |
| 592 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); | ||
| 553 | 593 | ||
| 554 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); | 594 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); |
| 555 | 595 | ||
| 556 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu); | 596 | int kvm_fix_hypercall(struct kvm_vcpu *vcpu); |
| 557 | 597 | ||
| 558 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); | 598 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); |
| 599 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); | ||
| 559 | 600 | ||
| 560 | void kvm_enable_tdp(void); | 601 | void kvm_enable_tdp(void); |
| 561 | void kvm_disable_tdp(void); | 602 | void kvm_disable_tdp(void); |
| @@ -686,33 +727,6 @@ enum { | |||
| 686 | TASK_SWITCH_GATE = 3, | 727 | TASK_SWITCH_GATE = 3, |
| 687 | }; | 728 | }; |
| 688 | 729 | ||
| 689 | #define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \ | ||
| 690 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
| 691 | vcpu, 5, d1, d2, d3, d4, d5) | ||
| 692 | #define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \ | ||
| 693 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
| 694 | vcpu, 4, d1, d2, d3, d4, 0) | ||
| 695 | #define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \ | ||
| 696 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
| 697 | vcpu, 3, d1, d2, d3, 0, 0) | ||
| 698 | #define KVMTRACE_2D(evt, vcpu, d1, d2, name) \ | ||
| 699 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
| 700 | vcpu, 2, d1, d2, 0, 0, 0) | ||
| 701 | #define KVMTRACE_1D(evt, vcpu, d1, name) \ | ||
| 702 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
| 703 | vcpu, 1, d1, 0, 0, 0, 0) | ||
| 704 | #define KVMTRACE_0D(evt, vcpu, name) \ | ||
| 705 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
| 706 | vcpu, 0, 0, 0, 0, 0, 0) | ||
| 707 | |||
| 708 | #ifdef CONFIG_64BIT | ||
| 709 | # define KVM_EX_ENTRY ".quad" | ||
| 710 | # define KVM_EX_PUSH "pushq" | ||
| 711 | #else | ||
| 712 | # define KVM_EX_ENTRY ".long" | ||
| 713 | # define KVM_EX_PUSH "pushl" | ||
| 714 | #endif | ||
| 715 | |||
| 716 | /* | 730 | /* |
| 717 | * Hardware virtualization extension instructions may fault if a | 731 | * Hardware virtualization extension instructions may fault if a |
| 718 | * reboot turns off virtualization while processes are running. | 732 | * reboot turns off virtualization while processes are running. |
| @@ -724,11 +738,11 @@ asmlinkage void kvm_handle_fault_on_reboot(void); | |||
| 724 | "666: " insn "\n\t" \ | 738 | "666: " insn "\n\t" \ |
| 725 | ".pushsection .fixup, \"ax\" \n" \ | 739 | ".pushsection .fixup, \"ax\" \n" \ |
| 726 | "667: \n\t" \ | 740 | "667: \n\t" \ |
| 727 | KVM_EX_PUSH " $666b \n\t" \ | 741 | __ASM_SIZE(push) " $666b \n\t" \ |
| 728 | "jmp kvm_handle_fault_on_reboot \n\t" \ | 742 | "jmp kvm_handle_fault_on_reboot \n\t" \ |
| 729 | ".popsection \n\t" \ | 743 | ".popsection \n\t" \ |
| 730 | ".pushsection __ex_table, \"a\" \n\t" \ | 744 | ".pushsection __ex_table, \"a\" \n\t" \ |
| 731 | KVM_EX_ENTRY " 666b, 667b \n\t" \ | 745 | _ASM_PTR " 666b, 667b \n\t" \ |
| 732 | ".popsection" | 746 | ".popsection" |
| 733 | 747 | ||
| 734 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 748 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h index 0bb43301a202..dabd10f0bbee 100644 --- a/include/asm-x86/msr-index.h +++ b/include/asm-x86/msr-index.h | |||
| @@ -178,6 +178,9 @@ | |||
| 178 | #define MSR_IA32_EBL_CR_POWERON 0x0000002a | 178 | #define MSR_IA32_EBL_CR_POWERON 0x0000002a |
| 179 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a | 179 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a |
| 180 | 180 | ||
| 181 | #define FEATURE_CONTROL_LOCKED (1<<0) | ||
| 182 | #define FEATURE_CONTROL_VMXON_ENABLED (1<<2) | ||
| 183 | |||
| 181 | #define MSR_IA32_APICBASE 0x0000001b | 184 | #define MSR_IA32_APICBASE 0x0000001b |
| 182 | #define MSR_IA32_APICBASE_BSP (1<<8) | 185 | #define MSR_IA32_APICBASE_BSP (1<<8) |
| 183 | #define MSR_IA32_APICBASE_ENABLE (1<<11) | 186 | #define MSR_IA32_APICBASE_ENABLE (1<<11) |
diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h index 1a38f6834800..ad29e277fd6d 100644 --- a/include/asm-x86/pvclock.h +++ b/include/asm-x86/pvclock.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | /* some helper functions for xen and kvm pv clock sources */ | 7 | /* some helper functions for xen and kvm pv clock sources */ |
| 8 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); | 8 | cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); |
| 9 | unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); | ||
| 9 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, | 10 | void pvclock_read_wallclock(struct pvclock_wall_clock *wall, |
| 10 | struct pvclock_vcpu_time_info *vcpu, | 11 | struct pvclock_vcpu_time_info *vcpu, |
| 11 | struct timespec *ts); | 12 | struct timespec *ts); |
diff --git a/drivers/pci/dma_remapping.h b/include/linux/dma_remapping.h index bff5c65f81dc..bff5c65f81dc 100644 --- a/drivers/pci/dma_remapping.h +++ b/include/linux/dma_remapping.h | |||
diff --git a/drivers/pci/intel-iommu.h b/include/linux/intel-iommu.h index 2142c01e0143..2e117f30a76c 100644 --- a/drivers/pci/intel-iommu.h +++ b/include/linux/intel-iommu.h | |||
| @@ -25,10 +25,10 @@ | |||
| 25 | #include <linux/types.h> | 25 | #include <linux/types.h> |
| 26 | #include <linux/msi.h> | 26 | #include <linux/msi.h> |
| 27 | #include <linux/sysdev.h> | 27 | #include <linux/sysdev.h> |
| 28 | #include "iova.h" | 28 | #include <linux/iova.h> |
| 29 | #include <linux/io.h> | 29 | #include <linux/io.h> |
| 30 | #include <linux/dma_remapping.h> | ||
| 30 | #include <asm/cacheflush.h> | 31 | #include <asm/cacheflush.h> |
| 31 | #include "dma_remapping.h" | ||
| 32 | 32 | ||
| 33 | /* | 33 | /* |
| 34 | * Intel IOMMU register specification per version 1.0 public spec. | 34 | * Intel IOMMU register specification per version 1.0 public spec. |
| @@ -304,4 +304,24 @@ extern int dmar_enable_qi(struct intel_iommu *iommu); | |||
| 304 | extern void qi_global_iec(struct intel_iommu *iommu); | 304 | extern void qi_global_iec(struct intel_iommu *iommu); |
| 305 | 305 | ||
| 306 | extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); | 306 | extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); |
| 307 | |||
| 308 | void intel_iommu_domain_exit(struct dmar_domain *domain); | ||
| 309 | struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev); | ||
| 310 | int intel_iommu_context_mapping(struct dmar_domain *domain, | ||
| 311 | struct pci_dev *pdev); | ||
| 312 | int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova, | ||
| 313 | u64 hpa, size_t size, int prot); | ||
| 314 | void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn); | ||
| 315 | struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev); | ||
| 316 | u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova); | ||
| 317 | |||
| 318 | #ifdef CONFIG_DMAR | ||
| 319 | int intel_iommu_found(void); | ||
| 320 | #else /* CONFIG_DMAR */ | ||
| 321 | static inline int intel_iommu_found(void) | ||
| 322 | { | ||
| 323 | return 0; | ||
| 324 | } | ||
| 325 | #endif /* CONFIG_DMAR */ | ||
| 326 | |||
| 307 | #endif | 327 | #endif |
diff --git a/drivers/pci/iova.h b/include/linux/iova.h index 228f6c94b69c..228f6c94b69c 100644 --- a/drivers/pci/iova.h +++ b/include/linux/iova.h | |||
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 70a30651cd12..797fcd781242 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
| @@ -311,22 +311,33 @@ struct kvm_s390_interrupt { | |||
| 311 | 311 | ||
| 312 | /* This structure represents a single trace buffer record. */ | 312 | /* This structure represents a single trace buffer record. */ |
| 313 | struct kvm_trace_rec { | 313 | struct kvm_trace_rec { |
| 314 | __u32 event:28; | 314 | /* variable rec_val |
| 315 | __u32 extra_u32:3; | 315 | * is split into: |
| 316 | __u32 cycle_in:1; | 316 | * bits 0 - 27 -> event id |
| 317 | * bits 28 -30 -> number of extra data args of size u32 | ||
| 318 | * bits 31 -> binary indicator for if tsc is in record | ||
| 319 | */ | ||
| 320 | __u32 rec_val; | ||
| 317 | __u32 pid; | 321 | __u32 pid; |
| 318 | __u32 vcpu_id; | 322 | __u32 vcpu_id; |
| 319 | union { | 323 | union { |
| 320 | struct { | 324 | struct { |
| 321 | __u64 cycle_u64; | 325 | __u64 timestamp; |
| 322 | __u32 extra_u32[KVM_TRC_EXTRA_MAX]; | 326 | __u32 extra_u32[KVM_TRC_EXTRA_MAX]; |
| 323 | } __attribute__((packed)) cycle; | 327 | } __attribute__((packed)) timestamp; |
| 324 | struct { | 328 | struct { |
| 325 | __u32 extra_u32[KVM_TRC_EXTRA_MAX]; | 329 | __u32 extra_u32[KVM_TRC_EXTRA_MAX]; |
| 326 | } nocycle; | 330 | } notimestamp; |
| 327 | } u; | 331 | } u; |
| 328 | }; | 332 | }; |
| 329 | 333 | ||
| 334 | #define TRACE_REC_EVENT_ID(val) \ | ||
| 335 | (0x0fffffff & (val)) | ||
| 336 | #define TRACE_REC_NUM_DATA_ARGS(val) \ | ||
| 337 | (0x70000000 & ((val) << 28)) | ||
| 338 | #define TRACE_REC_TCS(val) \ | ||
| 339 | (0x80000000 & ((val) << 31)) | ||
| 340 | |||
| 330 | #define KVMIO 0xAE | 341 | #define KVMIO 0xAE |
| 331 | 342 | ||
| 332 | /* | 343 | /* |
| @@ -372,6 +383,10 @@ struct kvm_trace_rec { | |||
| 372 | #define KVM_CAP_MP_STATE 14 | 383 | #define KVM_CAP_MP_STATE 14 |
| 373 | #define KVM_CAP_COALESCED_MMIO 15 | 384 | #define KVM_CAP_COALESCED_MMIO 15 |
| 374 | #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ | 385 | #define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ |
| 386 | #if defined(CONFIG_X86)||defined(CONFIG_IA64) | ||
| 387 | #define KVM_CAP_DEVICE_ASSIGNMENT 17 | ||
| 388 | #endif | ||
| 389 | #define KVM_CAP_IOMMU 18 | ||
| 375 | 390 | ||
| 376 | /* | 391 | /* |
| 377 | * ioctls for VM fds | 392 | * ioctls for VM fds |
| @@ -401,6 +416,10 @@ struct kvm_trace_rec { | |||
| 401 | _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) | 416 | _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) |
| 402 | #define KVM_UNREGISTER_COALESCED_MMIO \ | 417 | #define KVM_UNREGISTER_COALESCED_MMIO \ |
| 403 | _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) | 418 | _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) |
| 419 | #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ | ||
| 420 | struct kvm_assigned_pci_dev) | ||
| 421 | #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ | ||
| 422 | struct kvm_assigned_irq) | ||
| 404 | 423 | ||
| 405 | /* | 424 | /* |
| 406 | * ioctls for vcpu fds | 425 | * ioctls for vcpu fds |
| @@ -440,4 +459,45 @@ struct kvm_trace_rec { | |||
| 440 | #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) | 459 | #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) |
| 441 | #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) | 460 | #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) |
| 442 | 461 | ||
| 462 | #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) | ||
| 463 | #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) | ||
| 464 | #define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) | ||
| 465 | #define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) | ||
| 466 | #define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) | ||
| 467 | #define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) | ||
| 468 | #define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) | ||
| 469 | #define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) | ||
| 470 | #define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) | ||
| 471 | #define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) | ||
| 472 | #define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) | ||
| 473 | #define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) | ||
| 474 | #define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) | ||
| 475 | #define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) | ||
| 476 | #define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) | ||
| 477 | #define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) | ||
| 478 | #define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) | ||
| 479 | #define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) | ||
| 480 | #define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) | ||
| 481 | #define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) | ||
| 482 | #define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) | ||
| 483 | #define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) | ||
| 484 | #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) | ||
| 485 | #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) | ||
| 486 | |||
| 487 | struct kvm_assigned_pci_dev { | ||
| 488 | __u32 assigned_dev_id; | ||
| 489 | __u32 busnr; | ||
| 490 | __u32 devfn; | ||
| 491 | __u32 flags; | ||
| 492 | }; | ||
| 493 | |||
| 494 | struct kvm_assigned_irq { | ||
| 495 | __u32 assigned_dev_id; | ||
| 496 | __u32 host_irq; | ||
| 497 | __u32 guest_irq; | ||
| 498 | __u32 flags; | ||
| 499 | }; | ||
| 500 | |||
| 501 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) | ||
| 502 | |||
| 443 | #endif | 503 | #endif |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8525afc53107..3833c48fae3a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -34,6 +34,8 @@ | |||
| 34 | #define KVM_REQ_MMU_RELOAD 3 | 34 | #define KVM_REQ_MMU_RELOAD 3 |
| 35 | #define KVM_REQ_TRIPLE_FAULT 4 | 35 | #define KVM_REQ_TRIPLE_FAULT 4 |
| 36 | #define KVM_REQ_PENDING_TIMER 5 | 36 | #define KVM_REQ_PENDING_TIMER 5 |
| 37 | #define KVM_REQ_UNHALT 6 | ||
| 38 | #define KVM_REQ_MMU_SYNC 7 | ||
| 37 | 39 | ||
| 38 | struct kvm_vcpu; | 40 | struct kvm_vcpu; |
| 39 | extern struct kmem_cache *kvm_vcpu_cache; | 41 | extern struct kmem_cache *kvm_vcpu_cache; |
| @@ -279,12 +281,68 @@ void kvm_free_physmem(struct kvm *kvm); | |||
| 279 | 281 | ||
| 280 | struct kvm *kvm_arch_create_vm(void); | 282 | struct kvm *kvm_arch_create_vm(void); |
| 281 | void kvm_arch_destroy_vm(struct kvm *kvm); | 283 | void kvm_arch_destroy_vm(struct kvm *kvm); |
| 284 | void kvm_free_all_assigned_devices(struct kvm *kvm); | ||
| 282 | 285 | ||
| 283 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | 286 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
| 284 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v); | 287 | int kvm_cpu_has_interrupt(struct kvm_vcpu *v); |
| 285 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); | 288 | int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); |
| 286 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); | 289 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu); |
| 287 | 290 | ||
| 291 | int kvm_is_mmio_pfn(pfn_t pfn); | ||
| 292 | |||
| 293 | struct kvm_irq_ack_notifier { | ||
| 294 | struct hlist_node link; | ||
| 295 | unsigned gsi; | ||
| 296 | void (*irq_acked)(struct kvm_irq_ack_notifier *kian); | ||
| 297 | }; | ||
| 298 | |||
| 299 | struct kvm_assigned_dev_kernel { | ||
| 300 | struct kvm_irq_ack_notifier ack_notifier; | ||
| 301 | struct work_struct interrupt_work; | ||
| 302 | struct list_head list; | ||
| 303 | int assigned_dev_id; | ||
| 304 | int host_busnr; | ||
| 305 | int host_devfn; | ||
| 306 | int host_irq; | ||
| 307 | int guest_irq; | ||
| 308 | int irq_requested; | ||
| 309 | struct pci_dev *dev; | ||
| 310 | struct kvm *kvm; | ||
| 311 | }; | ||
| 312 | void kvm_set_irq(struct kvm *kvm, int irq, int level); | ||
| 313 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi); | ||
| 314 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | ||
| 315 | struct kvm_irq_ack_notifier *kian); | ||
| 316 | void kvm_unregister_irq_ack_notifier(struct kvm *kvm, | ||
| 317 | struct kvm_irq_ack_notifier *kian); | ||
| 318 | |||
| 319 | #ifdef CONFIG_DMAR | ||
| 320 | int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, | ||
| 321 | unsigned long npages); | ||
| 322 | int kvm_iommu_map_guest(struct kvm *kvm, | ||
| 323 | struct kvm_assigned_dev_kernel *assigned_dev); | ||
| 324 | int kvm_iommu_unmap_guest(struct kvm *kvm); | ||
| 325 | #else /* CONFIG_DMAR */ | ||
| 326 | static inline int kvm_iommu_map_pages(struct kvm *kvm, | ||
| 327 | gfn_t base_gfn, | ||
| 328 | unsigned long npages) | ||
| 329 | { | ||
| 330 | return 0; | ||
| 331 | } | ||
| 332 | |||
| 333 | static inline int kvm_iommu_map_guest(struct kvm *kvm, | ||
| 334 | struct kvm_assigned_dev_kernel | ||
| 335 | *assigned_dev) | ||
| 336 | { | ||
| 337 | return -ENODEV; | ||
| 338 | } | ||
| 339 | |||
| 340 | static inline int kvm_iommu_unmap_guest(struct kvm *kvm) | ||
| 341 | { | ||
| 342 | return 0; | ||
| 343 | } | ||
| 344 | #endif /* CONFIG_DMAR */ | ||
| 345 | |||
| 288 | static inline void kvm_guest_enter(void) | 346 | static inline void kvm_guest_enter(void) |
| 289 | { | 347 | { |
| 290 | account_system_vtime(current); | 348 | account_system_vtime(current); |
| @@ -307,6 +365,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn) | |||
| 307 | return (gpa_t)gfn << PAGE_SHIFT; | 365 | return (gpa_t)gfn << PAGE_SHIFT; |
| 308 | } | 366 | } |
| 309 | 367 | ||
| 368 | static inline hpa_t pfn_to_hpa(pfn_t pfn) | ||
| 369 | { | ||
| 370 | return (hpa_t)pfn << PAGE_SHIFT; | ||
| 371 | } | ||
| 372 | |||
| 310 | static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) | 373 | static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) |
| 311 | { | 374 | { |
| 312 | set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); | 375 | set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); |
| @@ -326,6 +389,25 @@ struct kvm_stats_debugfs_item { | |||
| 326 | extern struct kvm_stats_debugfs_item debugfs_entries[]; | 389 | extern struct kvm_stats_debugfs_item debugfs_entries[]; |
| 327 | extern struct dentry *kvm_debugfs_dir; | 390 | extern struct dentry *kvm_debugfs_dir; |
| 328 | 391 | ||
| 392 | #define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \ | ||
| 393 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
| 394 | vcpu, 5, d1, d2, d3, d4, d5) | ||
| 395 | #define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \ | ||
| 396 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
| 397 | vcpu, 4, d1, d2, d3, d4, 0) | ||
| 398 | #define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \ | ||
| 399 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
| 400 | vcpu, 3, d1, d2, d3, 0, 0) | ||
| 401 | #define KVMTRACE_2D(evt, vcpu, d1, d2, name) \ | ||
| 402 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
| 403 | vcpu, 2, d1, d2, 0, 0, 0) | ||
| 404 | #define KVMTRACE_1D(evt, vcpu, d1, name) \ | ||
| 405 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
| 406 | vcpu, 1, d1, 0, 0, 0, 0) | ||
| 407 | #define KVMTRACE_0D(evt, vcpu, name) \ | ||
| 408 | trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ | ||
| 409 | vcpu, 0, 0, 0, 0, 0, 0) | ||
| 410 | |||
| 329 | #ifdef CONFIG_KVM_TRACE | 411 | #ifdef CONFIG_KVM_TRACE |
| 330 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg); | 412 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg); |
| 331 | void kvm_trace_cleanup(void); | 413 | void kvm_trace_cleanup(void); |
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index c0d22870ee9c..53772bb46320 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | 39 | ||
| 40 | #include "ioapic.h" | 40 | #include "ioapic.h" |
| 41 | #include "lapic.h" | 41 | #include "lapic.h" |
| 42 | #include "irq.h" | ||
| 42 | 43 | ||
| 43 | #if 0 | 44 | #if 0 |
| 44 | #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) | 45 | #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) |
| @@ -285,26 +286,31 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | |||
| 285 | } | 286 | } |
| 286 | } | 287 | } |
| 287 | 288 | ||
| 288 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi) | 289 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi, |
| 290 | int trigger_mode) | ||
| 289 | { | 291 | { |
| 290 | union ioapic_redir_entry *ent; | 292 | union ioapic_redir_entry *ent; |
| 291 | 293 | ||
| 292 | ent = &ioapic->redirtbl[gsi]; | 294 | ent = &ioapic->redirtbl[gsi]; |
| 293 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); | ||
| 294 | 295 | ||
| 295 | ent->fields.remote_irr = 0; | 296 | kvm_notify_acked_irq(ioapic->kvm, gsi); |
| 296 | if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) | 297 | |
| 297 | ioapic_service(ioapic, gsi); | 298 | if (trigger_mode == IOAPIC_LEVEL_TRIG) { |
| 299 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); | ||
| 300 | ent->fields.remote_irr = 0; | ||
| 301 | if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) | ||
| 302 | ioapic_service(ioapic, gsi); | ||
| 303 | } | ||
| 298 | } | 304 | } |
| 299 | 305 | ||
| 300 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) | 306 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) |
| 301 | { | 307 | { |
| 302 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 308 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
| 303 | int i; | 309 | int i; |
| 304 | 310 | ||
| 305 | for (i = 0; i < IOAPIC_NUM_PINS; i++) | 311 | for (i = 0; i < IOAPIC_NUM_PINS; i++) |
| 306 | if (ioapic->redirtbl[i].fields.vector == vector) | 312 | if (ioapic->redirtbl[i].fields.vector == vector) |
| 307 | __kvm_ioapic_update_eoi(ioapic, i); | 313 | __kvm_ioapic_update_eoi(ioapic, i, trigger_mode); |
| 308 | } | 314 | } |
| 309 | 315 | ||
| 310 | static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr, | 316 | static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr, |
| @@ -380,7 +386,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | |||
| 380 | break; | 386 | break; |
| 381 | #ifdef CONFIG_IA64 | 387 | #ifdef CONFIG_IA64 |
| 382 | case IOAPIC_REG_EOI: | 388 | case IOAPIC_REG_EOI: |
| 383 | kvm_ioapic_update_eoi(ioapic->kvm, data); | 389 | kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG); |
| 384 | break; | 390 | break; |
| 385 | #endif | 391 | #endif |
| 386 | 392 | ||
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 7f16675fe783..cd7ae7691c9d 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h | |||
| @@ -58,6 +58,7 @@ struct kvm_ioapic { | |||
| 58 | } redirtbl[IOAPIC_NUM_PINS]; | 58 | } redirtbl[IOAPIC_NUM_PINS]; |
| 59 | struct kvm_io_device dev; | 59 | struct kvm_io_device dev; |
| 60 | struct kvm *kvm; | 60 | struct kvm *kvm; |
| 61 | void (*ack_notifier)(void *opaque, int irq); | ||
| 61 | }; | 62 | }; |
| 62 | 63 | ||
| 63 | #ifdef DEBUG | 64 | #ifdef DEBUG |
| @@ -78,16 +79,9 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) | |||
| 78 | return kvm->arch.vioapic; | 79 | return kvm->arch.vioapic; |
| 79 | } | 80 | } |
| 80 | 81 | ||
| 81 | #ifdef CONFIG_IA64 | ||
| 82 | static inline int irqchip_in_kernel(struct kvm *kvm) | ||
| 83 | { | ||
| 84 | return 1; | ||
| 85 | } | ||
| 86 | #endif | ||
| 87 | |||
| 88 | struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, | 82 | struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, |
| 89 | unsigned long bitmap); | 83 | unsigned long bitmap); |
| 90 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector); | 84 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); |
| 91 | int kvm_ioapic_init(struct kvm *kvm); | 85 | int kvm_ioapic_init(struct kvm *kvm); |
| 92 | void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); | 86 | void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); |
| 93 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); | 87 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c new file mode 100644 index 000000000000..d0169f5e6047 --- /dev/null +++ b/virt/kvm/irq_comm.c | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | /* | ||
| 2 | * irq_comm.c: Common API for in kernel interrupt controller | ||
| 3 | * Copyright (c) 2007, Intel Corporation. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms and conditions of the GNU General Public License, | ||
| 7 | * version 2, as published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License along with | ||
| 15 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
| 16 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
| 17 | * Authors: | ||
| 18 | * Yaozu (Eddie) Dong <Eddie.dong@intel.com> | ||
| 19 | * | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include <linux/kvm_host.h> | ||
| 23 | #include "irq.h" | ||
| 24 | |||
| 25 | #include "ioapic.h" | ||
| 26 | |||
| 27 | /* This should be called with the kvm->lock mutex held */ | ||
| 28 | void kvm_set_irq(struct kvm *kvm, int irq, int level) | ||
| 29 | { | ||
| 30 | /* Not possible to detect if the guest uses the PIC or the | ||
| 31 | * IOAPIC. So set the bit in both. The guest will ignore | ||
| 32 | * writes to the unused one. | ||
| 33 | */ | ||
| 34 | kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level); | ||
| 35 | #ifdef CONFIG_X86 | ||
| 36 | kvm_pic_set_irq(pic_irqchip(kvm), irq, level); | ||
| 37 | #endif | ||
| 38 | } | ||
| 39 | |||
| 40 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi) | ||
| 41 | { | ||
| 42 | struct kvm_irq_ack_notifier *kian; | ||
| 43 | struct hlist_node *n; | ||
| 44 | |||
| 45 | hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) | ||
| 46 | if (kian->gsi == gsi) | ||
| 47 | kian->irq_acked(kian); | ||
| 48 | } | ||
| 49 | |||
| 50 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | ||
| 51 | struct kvm_irq_ack_notifier *kian) | ||
| 52 | { | ||
| 53 | hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); | ||
| 54 | } | ||
| 55 | |||
| 56 | void kvm_unregister_irq_ack_notifier(struct kvm *kvm, | ||
| 57 | struct kvm_irq_ack_notifier *kian) | ||
| 58 | { | ||
| 59 | hlist_del(&kian->link); | ||
| 60 | } | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7dd9b0b85e4e..cf0ab8ed3845 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -51,6 +51,12 @@ | |||
| 51 | #include "coalesced_mmio.h" | 51 | #include "coalesced_mmio.h" |
| 52 | #endif | 52 | #endif |
| 53 | 53 | ||
| 54 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
| 55 | #include <linux/pci.h> | ||
| 56 | #include <linux/interrupt.h> | ||
| 57 | #include "irq.h" | ||
| 58 | #endif | ||
| 59 | |||
| 54 | MODULE_AUTHOR("Qumranet"); | 60 | MODULE_AUTHOR("Qumranet"); |
| 55 | MODULE_LICENSE("GPL"); | 61 | MODULE_LICENSE("GPL"); |
| 56 | 62 | ||
| @@ -71,11 +77,253 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | |||
| 71 | 77 | ||
| 72 | bool kvm_rebooting; | 78 | bool kvm_rebooting; |
| 73 | 79 | ||
| 80 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
| 81 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | ||
| 82 | int assigned_dev_id) | ||
| 83 | { | ||
| 84 | struct list_head *ptr; | ||
| 85 | struct kvm_assigned_dev_kernel *match; | ||
| 86 | |||
| 87 | list_for_each(ptr, head) { | ||
| 88 | match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); | ||
| 89 | if (match->assigned_dev_id == assigned_dev_id) | ||
| 90 | return match; | ||
| 91 | } | ||
| 92 | return NULL; | ||
| 93 | } | ||
| 94 | |||
| 95 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | ||
| 96 | { | ||
| 97 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
| 98 | |||
| 99 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | ||
| 100 | interrupt_work); | ||
| 101 | |||
| 102 | /* This is taken to safely inject irq inside the guest. When | ||
| 103 | * the interrupt injection (or the ioapic code) uses a | ||
| 104 | * finer-grained lock, update this | ||
| 105 | */ | ||
| 106 | mutex_lock(&assigned_dev->kvm->lock); | ||
| 107 | kvm_set_irq(assigned_dev->kvm, | ||
| 108 | assigned_dev->guest_irq, 1); | ||
| 109 | mutex_unlock(&assigned_dev->kvm->lock); | ||
| 110 | kvm_put_kvm(assigned_dev->kvm); | ||
| 111 | } | ||
| 112 | |||
| 113 | /* FIXME: Implement the OR logic needed to make shared interrupts on | ||
| 114 | * this line behave properly | ||
| 115 | */ | ||
| 116 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | ||
| 117 | { | ||
| 118 | struct kvm_assigned_dev_kernel *assigned_dev = | ||
| 119 | (struct kvm_assigned_dev_kernel *) dev_id; | ||
| 120 | |||
| 121 | kvm_get_kvm(assigned_dev->kvm); | ||
| 122 | schedule_work(&assigned_dev->interrupt_work); | ||
| 123 | disable_irq_nosync(irq); | ||
| 124 | return IRQ_HANDLED; | ||
| 125 | } | ||
| 126 | |||
| 127 | /* Ack the irq line for an assigned device */ | ||
| 128 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | ||
| 129 | { | ||
| 130 | struct kvm_assigned_dev_kernel *dev; | ||
| 131 | |||
| 132 | if (kian->gsi == -1) | ||
| 133 | return; | ||
| 134 | |||
| 135 | dev = container_of(kian, struct kvm_assigned_dev_kernel, | ||
| 136 | ack_notifier); | ||
| 137 | kvm_set_irq(dev->kvm, dev->guest_irq, 0); | ||
| 138 | enable_irq(dev->host_irq); | ||
| 139 | } | ||
| 140 | |||
| 141 | static void kvm_free_assigned_device(struct kvm *kvm, | ||
| 142 | struct kvm_assigned_dev_kernel | ||
| 143 | *assigned_dev) | ||
| 144 | { | ||
| 145 | if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) | ||
| 146 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | ||
| 147 | |||
| 148 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); | ||
| 149 | |||
| 150 | if (cancel_work_sync(&assigned_dev->interrupt_work)) | ||
| 151 | /* We had pending work. That means we will have to take | ||
| 152 | * care of kvm_put_kvm. | ||
| 153 | */ | ||
| 154 | kvm_put_kvm(kvm); | ||
| 155 | |||
| 156 | pci_release_regions(assigned_dev->dev); | ||
| 157 | pci_disable_device(assigned_dev->dev); | ||
| 158 | pci_dev_put(assigned_dev->dev); | ||
| 159 | |||
| 160 | list_del(&assigned_dev->list); | ||
| 161 | kfree(assigned_dev); | ||
| 162 | } | ||
| 163 | |||
| 164 | void kvm_free_all_assigned_devices(struct kvm *kvm) | ||
| 165 | { | ||
| 166 | struct list_head *ptr, *ptr2; | ||
| 167 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
| 168 | |||
| 169 | list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { | ||
| 170 | assigned_dev = list_entry(ptr, | ||
| 171 | struct kvm_assigned_dev_kernel, | ||
| 172 | list); | ||
| 173 | |||
| 174 | kvm_free_assigned_device(kvm, assigned_dev); | ||
| 175 | } | ||
| 176 | } | ||
| 177 | |||
| 178 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | ||
| 179 | struct kvm_assigned_irq | ||
| 180 | *assigned_irq) | ||
| 181 | { | ||
| 182 | int r = 0; | ||
| 183 | struct kvm_assigned_dev_kernel *match; | ||
| 184 | |||
| 185 | mutex_lock(&kvm->lock); | ||
| 186 | |||
| 187 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 188 | assigned_irq->assigned_dev_id); | ||
| 189 | if (!match) { | ||
| 190 | mutex_unlock(&kvm->lock); | ||
| 191 | return -EINVAL; | ||
| 192 | } | ||
| 193 | |||
| 194 | if (match->irq_requested) { | ||
| 195 | match->guest_irq = assigned_irq->guest_irq; | ||
| 196 | match->ack_notifier.gsi = assigned_irq->guest_irq; | ||
| 197 | mutex_unlock(&kvm->lock); | ||
| 198 | return 0; | ||
| 199 | } | ||
| 200 | |||
| 201 | INIT_WORK(&match->interrupt_work, | ||
| 202 | kvm_assigned_dev_interrupt_work_handler); | ||
| 203 | |||
| 204 | if (irqchip_in_kernel(kvm)) { | ||
| 205 | if (!capable(CAP_SYS_RAWIO)) { | ||
| 206 | r = -EPERM; | ||
| 207 | goto out_release; | ||
| 208 | } | ||
| 209 | |||
| 210 | if (assigned_irq->host_irq) | ||
| 211 | match->host_irq = assigned_irq->host_irq; | ||
| 212 | else | ||
| 213 | match->host_irq = match->dev->irq; | ||
| 214 | match->guest_irq = assigned_irq->guest_irq; | ||
| 215 | match->ack_notifier.gsi = assigned_irq->guest_irq; | ||
| 216 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | ||
| 217 | kvm_register_irq_ack_notifier(kvm, &match->ack_notifier); | ||
| 218 | |||
| 219 | /* Even though this is PCI, we don't want to use shared | ||
| 220 | * interrupts. Sharing host devices with guest-assigned devices | ||
| 221 | * on the same interrupt line is not a happy situation: there | ||
| 222 | * are going to be long delays in accepting, acking, etc. | ||
| 223 | */ | ||
| 224 | if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0, | ||
| 225 | "kvm_assigned_device", (void *)match)) { | ||
| 226 | r = -EIO; | ||
| 227 | goto out_release; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | match->irq_requested = true; | ||
| 232 | mutex_unlock(&kvm->lock); | ||
| 233 | return r; | ||
| 234 | out_release: | ||
| 235 | mutex_unlock(&kvm->lock); | ||
| 236 | kvm_free_assigned_device(kvm, match); | ||
| 237 | return r; | ||
| 238 | } | ||
| 239 | |||
| 240 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | ||
| 241 | struct kvm_assigned_pci_dev *assigned_dev) | ||
| 242 | { | ||
| 243 | int r = 0; | ||
| 244 | struct kvm_assigned_dev_kernel *match; | ||
| 245 | struct pci_dev *dev; | ||
| 246 | |||
| 247 | mutex_lock(&kvm->lock); | ||
| 248 | |||
| 249 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 250 | assigned_dev->assigned_dev_id); | ||
| 251 | if (match) { | ||
| 252 | /* device already assigned */ | ||
| 253 | r = -EINVAL; | ||
| 254 | goto out; | ||
| 255 | } | ||
| 256 | |||
| 257 | match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); | ||
| 258 | if (match == NULL) { | ||
| 259 | printk(KERN_INFO "%s: Couldn't allocate memory\n", | ||
| 260 | __func__); | ||
| 261 | r = -ENOMEM; | ||
| 262 | goto out; | ||
| 263 | } | ||
| 264 | dev = pci_get_bus_and_slot(assigned_dev->busnr, | ||
| 265 | assigned_dev->devfn); | ||
| 266 | if (!dev) { | ||
| 267 | printk(KERN_INFO "%s: host device not found\n", __func__); | ||
| 268 | r = -EINVAL; | ||
| 269 | goto out_free; | ||
| 270 | } | ||
| 271 | if (pci_enable_device(dev)) { | ||
| 272 | printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); | ||
| 273 | r = -EBUSY; | ||
| 274 | goto out_put; | ||
| 275 | } | ||
| 276 | r = pci_request_regions(dev, "kvm_assigned_device"); | ||
| 277 | if (r) { | ||
| 278 | printk(KERN_INFO "%s: Could not get access to device regions\n", | ||
| 279 | __func__); | ||
| 280 | goto out_disable; | ||
| 281 | } | ||
| 282 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | ||
| 283 | match->host_busnr = assigned_dev->busnr; | ||
| 284 | match->host_devfn = assigned_dev->devfn; | ||
| 285 | match->dev = dev; | ||
| 286 | |||
| 287 | match->kvm = kvm; | ||
| 288 | |||
| 289 | list_add(&match->list, &kvm->arch.assigned_dev_head); | ||
| 290 | |||
| 291 | if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { | ||
| 292 | r = kvm_iommu_map_guest(kvm, match); | ||
| 293 | if (r) | ||
| 294 | goto out_list_del; | ||
| 295 | } | ||
| 296 | |||
| 297 | out: | ||
| 298 | mutex_unlock(&kvm->lock); | ||
| 299 | return r; | ||
| 300 | out_list_del: | ||
| 301 | list_del(&match->list); | ||
| 302 | pci_release_regions(dev); | ||
| 303 | out_disable: | ||
| 304 | pci_disable_device(dev); | ||
| 305 | out_put: | ||
| 306 | pci_dev_put(dev); | ||
| 307 | out_free: | ||
| 308 | kfree(match); | ||
| 309 | mutex_unlock(&kvm->lock); | ||
| 310 | return r; | ||
| 311 | } | ||
| 312 | #endif | ||
| 313 | |||
| 74 | static inline int valid_vcpu(int n) | 314 | static inline int valid_vcpu(int n) |
| 75 | { | 315 | { |
| 76 | return likely(n >= 0 && n < KVM_MAX_VCPUS); | 316 | return likely(n >= 0 && n < KVM_MAX_VCPUS); |
| 77 | } | 317 | } |
| 78 | 318 | ||
| 319 | inline int kvm_is_mmio_pfn(pfn_t pfn) | ||
| 320 | { | ||
| 321 | if (pfn_valid(pfn)) | ||
| 322 | return PageReserved(pfn_to_page(pfn)); | ||
| 323 | |||
| 324 | return true; | ||
| 325 | } | ||
| 326 | |||
| 79 | /* | 327 | /* |
| 80 | * Switches to specified vcpu, until a matching vcpu_put() | 328 | * Switches to specified vcpu, until a matching vcpu_put() |
| 81 | */ | 329 | */ |
| @@ -570,6 +818,12 @@ int __kvm_set_memory_region(struct kvm *kvm, | |||
| 570 | } | 818 | } |
| 571 | 819 | ||
| 572 | kvm_free_physmem_slot(&old, &new); | 820 | kvm_free_physmem_slot(&old, &new); |
| 821 | #ifdef CONFIG_DMAR | ||
| 822 | /* map the pages in iommu page table */ | ||
| 823 | r = kvm_iommu_map_pages(kvm, base_gfn, npages); | ||
| 824 | if (r) | ||
| 825 | goto out; | ||
| 826 | #endif | ||
| 573 | return 0; | 827 | return 0; |
| 574 | 828 | ||
| 575 | out_free: | 829 | out_free: |
| @@ -708,9 +962,6 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) | |||
| 708 | } | 962 | } |
| 709 | EXPORT_SYMBOL_GPL(gfn_to_hva); | 963 | EXPORT_SYMBOL_GPL(gfn_to_hva); |
| 710 | 964 | ||
| 711 | /* | ||
| 712 | * Requires current->mm->mmap_sem to be held | ||
| 713 | */ | ||
| 714 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | 965 | pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) |
| 715 | { | 966 | { |
| 716 | struct page *page[1]; | 967 | struct page *page[1]; |
| @@ -726,21 +977,24 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) | |||
| 726 | return page_to_pfn(bad_page); | 977 | return page_to_pfn(bad_page); |
| 727 | } | 978 | } |
| 728 | 979 | ||
| 729 | npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, | 980 | npages = get_user_pages_fast(addr, 1, 1, page); |
| 730 | NULL); | ||
| 731 | 981 | ||
| 732 | if (unlikely(npages != 1)) { | 982 | if (unlikely(npages != 1)) { |
| 733 | struct vm_area_struct *vma; | 983 | struct vm_area_struct *vma; |
| 734 | 984 | ||
| 985 | down_read(¤t->mm->mmap_sem); | ||
| 735 | vma = find_vma(current->mm, addr); | 986 | vma = find_vma(current->mm, addr); |
| 987 | |||
| 736 | if (vma == NULL || addr < vma->vm_start || | 988 | if (vma == NULL || addr < vma->vm_start || |
| 737 | !(vma->vm_flags & VM_PFNMAP)) { | 989 | !(vma->vm_flags & VM_PFNMAP)) { |
| 990 | up_read(¤t->mm->mmap_sem); | ||
| 738 | get_page(bad_page); | 991 | get_page(bad_page); |
| 739 | return page_to_pfn(bad_page); | 992 | return page_to_pfn(bad_page); |
| 740 | } | 993 | } |
| 741 | 994 | ||
| 742 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; | 995 | pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; |
| 743 | BUG_ON(pfn_valid(pfn)); | 996 | up_read(¤t->mm->mmap_sem); |
| 997 | BUG_ON(!kvm_is_mmio_pfn(pfn)); | ||
| 744 | } else | 998 | } else |
| 745 | pfn = page_to_pfn(page[0]); | 999 | pfn = page_to_pfn(page[0]); |
| 746 | 1000 | ||
| @@ -754,10 +1008,10 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) | |||
| 754 | pfn_t pfn; | 1008 | pfn_t pfn; |
| 755 | 1009 | ||
| 756 | pfn = gfn_to_pfn(kvm, gfn); | 1010 | pfn = gfn_to_pfn(kvm, gfn); |
| 757 | if (pfn_valid(pfn)) | 1011 | if (!kvm_is_mmio_pfn(pfn)) |
| 758 | return pfn_to_page(pfn); | 1012 | return pfn_to_page(pfn); |
| 759 | 1013 | ||
| 760 | WARN_ON(!pfn_valid(pfn)); | 1014 | WARN_ON(kvm_is_mmio_pfn(pfn)); |
| 761 | 1015 | ||
| 762 | get_page(bad_page); | 1016 | get_page(bad_page); |
| 763 | return bad_page; | 1017 | return bad_page; |
| @@ -773,7 +1027,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); | |||
| 773 | 1027 | ||
| 774 | void kvm_release_pfn_clean(pfn_t pfn) | 1028 | void kvm_release_pfn_clean(pfn_t pfn) |
| 775 | { | 1029 | { |
| 776 | if (pfn_valid(pfn)) | 1030 | if (!kvm_is_mmio_pfn(pfn)) |
| 777 | put_page(pfn_to_page(pfn)); | 1031 | put_page(pfn_to_page(pfn)); |
| 778 | } | 1032 | } |
| 779 | EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); | 1033 | EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); |
| @@ -799,7 +1053,7 @@ EXPORT_SYMBOL_GPL(kvm_set_page_dirty); | |||
| 799 | 1053 | ||
| 800 | void kvm_set_pfn_dirty(pfn_t pfn) | 1054 | void kvm_set_pfn_dirty(pfn_t pfn) |
| 801 | { | 1055 | { |
| 802 | if (pfn_valid(pfn)) { | 1056 | if (!kvm_is_mmio_pfn(pfn)) { |
| 803 | struct page *page = pfn_to_page(pfn); | 1057 | struct page *page = pfn_to_page(pfn); |
| 804 | if (!PageReserved(page)) | 1058 | if (!PageReserved(page)) |
| 805 | SetPageDirty(page); | 1059 | SetPageDirty(page); |
| @@ -809,14 +1063,14 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); | |||
| 809 | 1063 | ||
| 810 | void kvm_set_pfn_accessed(pfn_t pfn) | 1064 | void kvm_set_pfn_accessed(pfn_t pfn) |
| 811 | { | 1065 | { |
| 812 | if (pfn_valid(pfn)) | 1066 | if (!kvm_is_mmio_pfn(pfn)) |
| 813 | mark_page_accessed(pfn_to_page(pfn)); | 1067 | mark_page_accessed(pfn_to_page(pfn)); |
| 814 | } | 1068 | } |
| 815 | EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); | 1069 | EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); |
| 816 | 1070 | ||
| 817 | void kvm_get_pfn(pfn_t pfn) | 1071 | void kvm_get_pfn(pfn_t pfn) |
| 818 | { | 1072 | { |
| 819 | if (pfn_valid(pfn)) | 1073 | if (!kvm_is_mmio_pfn(pfn)) |
| 820 | get_page(pfn_to_page(pfn)); | 1074 | get_page(pfn_to_page(pfn)); |
| 821 | } | 1075 | } |
| 822 | EXPORT_SYMBOL_GPL(kvm_get_pfn); | 1076 | EXPORT_SYMBOL_GPL(kvm_get_pfn); |
| @@ -972,12 +1226,12 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
| 972 | for (;;) { | 1226 | for (;;) { |
| 973 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | 1227 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); |
| 974 | 1228 | ||
| 975 | if (kvm_cpu_has_interrupt(vcpu)) | 1229 | if (kvm_cpu_has_interrupt(vcpu) || |
| 976 | break; | 1230 | kvm_cpu_has_pending_timer(vcpu) || |
| 977 | if (kvm_cpu_has_pending_timer(vcpu)) | 1231 | kvm_arch_vcpu_runnable(vcpu)) { |
| 978 | break; | 1232 | set_bit(KVM_REQ_UNHALT, &vcpu->requests); |
| 979 | if (kvm_arch_vcpu_runnable(vcpu)) | ||
| 980 | break; | 1233 | break; |
| 1234 | } | ||
| 981 | if (signal_pending(current)) | 1235 | if (signal_pending(current)) |
| 982 | break; | 1236 | break; |
| 983 | 1237 | ||
| @@ -1074,12 +1328,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
| 1074 | 1328 | ||
| 1075 | r = kvm_arch_vcpu_setup(vcpu); | 1329 | r = kvm_arch_vcpu_setup(vcpu); |
| 1076 | if (r) | 1330 | if (r) |
| 1077 | goto vcpu_destroy; | 1331 | return r; |
| 1078 | 1332 | ||
| 1079 | mutex_lock(&kvm->lock); | 1333 | mutex_lock(&kvm->lock); |
| 1080 | if (kvm->vcpus[n]) { | 1334 | if (kvm->vcpus[n]) { |
| 1081 | r = -EEXIST; | 1335 | r = -EEXIST; |
| 1082 | mutex_unlock(&kvm->lock); | ||
| 1083 | goto vcpu_destroy; | 1336 | goto vcpu_destroy; |
| 1084 | } | 1337 | } |
| 1085 | kvm->vcpus[n] = vcpu; | 1338 | kvm->vcpus[n] = vcpu; |
| @@ -1095,8 +1348,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) | |||
| 1095 | unlink: | 1348 | unlink: |
| 1096 | mutex_lock(&kvm->lock); | 1349 | mutex_lock(&kvm->lock); |
| 1097 | kvm->vcpus[n] = NULL; | 1350 | kvm->vcpus[n] = NULL; |
| 1098 | mutex_unlock(&kvm->lock); | ||
| 1099 | vcpu_destroy: | 1351 | vcpu_destroy: |
| 1352 | mutex_unlock(&kvm->lock); | ||
| 1100 | kvm_arch_vcpu_destroy(vcpu); | 1353 | kvm_arch_vcpu_destroy(vcpu); |
| 1101 | return r; | 1354 | return r; |
| 1102 | } | 1355 | } |
| @@ -1118,6 +1371,8 @@ static long kvm_vcpu_ioctl(struct file *filp, | |||
| 1118 | struct kvm_vcpu *vcpu = filp->private_data; | 1371 | struct kvm_vcpu *vcpu = filp->private_data; |
| 1119 | void __user *argp = (void __user *)arg; | 1372 | void __user *argp = (void __user *)arg; |
| 1120 | int r; | 1373 | int r; |
| 1374 | struct kvm_fpu *fpu = NULL; | ||
| 1375 | struct kvm_sregs *kvm_sregs = NULL; | ||
| 1121 | 1376 | ||
| 1122 | if (vcpu->kvm->mm != current->mm) | 1377 | if (vcpu->kvm->mm != current->mm) |
| 1123 | return -EIO; | 1378 | return -EIO; |
| @@ -1165,25 +1420,28 @@ out_free2: | |||
| 1165 | break; | 1420 | break; |
| 1166 | } | 1421 | } |
| 1167 | case KVM_GET_SREGS: { | 1422 | case KVM_GET_SREGS: { |
| 1168 | struct kvm_sregs kvm_sregs; | 1423 | kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); |
| 1169 | 1424 | r = -ENOMEM; | |
| 1170 | memset(&kvm_sregs, 0, sizeof kvm_sregs); | 1425 | if (!kvm_sregs) |
| 1171 | r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs); | 1426 | goto out; |
| 1427 | r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); | ||
| 1172 | if (r) | 1428 | if (r) |
| 1173 | goto out; | 1429 | goto out; |
| 1174 | r = -EFAULT; | 1430 | r = -EFAULT; |
| 1175 | if (copy_to_user(argp, &kvm_sregs, sizeof kvm_sregs)) | 1431 | if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) |
| 1176 | goto out; | 1432 | goto out; |
| 1177 | r = 0; | 1433 | r = 0; |
| 1178 | break; | 1434 | break; |
| 1179 | } | 1435 | } |
| 1180 | case KVM_SET_SREGS: { | 1436 | case KVM_SET_SREGS: { |
| 1181 | struct kvm_sregs kvm_sregs; | 1437 | kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); |
| 1182 | 1438 | r = -ENOMEM; | |
| 1439 | if (!kvm_sregs) | ||
| 1440 | goto out; | ||
| 1183 | r = -EFAULT; | 1441 | r = -EFAULT; |
| 1184 | if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs)) | 1442 | if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) |
| 1185 | goto out; | 1443 | goto out; |
| 1186 | r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs); | 1444 | r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); |
| 1187 | if (r) | 1445 | if (r) |
| 1188 | goto out; | 1446 | goto out; |
| 1189 | r = 0; | 1447 | r = 0; |
| @@ -1264,25 +1522,28 @@ out_free2: | |||
| 1264 | break; | 1522 | break; |
| 1265 | } | 1523 | } |
| 1266 | case KVM_GET_FPU: { | 1524 | case KVM_GET_FPU: { |
| 1267 | struct kvm_fpu fpu; | 1525 | fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); |
| 1268 | 1526 | r = -ENOMEM; | |
| 1269 | memset(&fpu, 0, sizeof fpu); | 1527 | if (!fpu) |
| 1270 | r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, &fpu); | 1528 | goto out; |
| 1529 | r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); | ||
| 1271 | if (r) | 1530 | if (r) |
| 1272 | goto out; | 1531 | goto out; |
| 1273 | r = -EFAULT; | 1532 | r = -EFAULT; |
| 1274 | if (copy_to_user(argp, &fpu, sizeof fpu)) | 1533 | if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) |
| 1275 | goto out; | 1534 | goto out; |
| 1276 | r = 0; | 1535 | r = 0; |
| 1277 | break; | 1536 | break; |
| 1278 | } | 1537 | } |
| 1279 | case KVM_SET_FPU: { | 1538 | case KVM_SET_FPU: { |
| 1280 | struct kvm_fpu fpu; | 1539 | fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); |
| 1281 | 1540 | r = -ENOMEM; | |
| 1541 | if (!fpu) | ||
| 1542 | goto out; | ||
| 1282 | r = -EFAULT; | 1543 | r = -EFAULT; |
| 1283 | if (copy_from_user(&fpu, argp, sizeof fpu)) | 1544 | if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) |
| 1284 | goto out; | 1545 | goto out; |
| 1285 | r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, &fpu); | 1546 | r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); |
| 1286 | if (r) | 1547 | if (r) |
| 1287 | goto out; | 1548 | goto out; |
| 1288 | r = 0; | 1549 | r = 0; |
| @@ -1292,6 +1553,8 @@ out_free2: | |||
| 1292 | r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); | 1553 | r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); |
| 1293 | } | 1554 | } |
| 1294 | out: | 1555 | out: |
| 1556 | kfree(fpu); | ||
| 1557 | kfree(kvm_sregs); | ||
| 1295 | return r; | 1558 | return r; |
| 1296 | } | 1559 | } |
| 1297 | 1560 | ||
| @@ -1360,6 +1623,30 @@ static long kvm_vm_ioctl(struct file *filp, | |||
| 1360 | break; | 1623 | break; |
| 1361 | } | 1624 | } |
| 1362 | #endif | 1625 | #endif |
| 1626 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
| 1627 | case KVM_ASSIGN_PCI_DEVICE: { | ||
| 1628 | struct kvm_assigned_pci_dev assigned_dev; | ||
| 1629 | |||
| 1630 | r = -EFAULT; | ||
| 1631 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
| 1632 | goto out; | ||
| 1633 | r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); | ||
| 1634 | if (r) | ||
| 1635 | goto out; | ||
| 1636 | break; | ||
| 1637 | } | ||
| 1638 | case KVM_ASSIGN_IRQ: { | ||
| 1639 | struct kvm_assigned_irq assigned_irq; | ||
| 1640 | |||
| 1641 | r = -EFAULT; | ||
| 1642 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
| 1643 | goto out; | ||
| 1644 | r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); | ||
| 1645 | if (r) | ||
| 1646 | goto out; | ||
| 1647 | break; | ||
| 1648 | } | ||
| 1649 | #endif | ||
| 1363 | default: | 1650 | default: |
| 1364 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); | 1651 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); |
| 1365 | } | 1652 | } |
| @@ -1369,17 +1656,22 @@ out: | |||
| 1369 | 1656 | ||
| 1370 | static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1657 | static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
| 1371 | { | 1658 | { |
| 1659 | struct page *page[1]; | ||
| 1660 | unsigned long addr; | ||
| 1661 | int npages; | ||
| 1662 | gfn_t gfn = vmf->pgoff; | ||
| 1372 | struct kvm *kvm = vma->vm_file->private_data; | 1663 | struct kvm *kvm = vma->vm_file->private_data; |
| 1373 | struct page *page; | ||
| 1374 | 1664 | ||
| 1375 | if (!kvm_is_visible_gfn(kvm, vmf->pgoff)) | 1665 | addr = gfn_to_hva(kvm, gfn); |
| 1666 | if (kvm_is_error_hva(addr)) | ||
| 1376 | return VM_FAULT_SIGBUS; | 1667 | return VM_FAULT_SIGBUS; |
| 1377 | page = gfn_to_page(kvm, vmf->pgoff); | 1668 | |
| 1378 | if (is_error_page(page)) { | 1669 | npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page, |
| 1379 | kvm_release_page_clean(page); | 1670 | NULL); |
| 1671 | if (unlikely(npages != 1)) | ||
| 1380 | return VM_FAULT_SIGBUS; | 1672 | return VM_FAULT_SIGBUS; |
| 1381 | } | 1673 | |
| 1382 | vmf->page = page; | 1674 | vmf->page = page[0]; |
| 1383 | return 0; | 1675 | return 0; |
| 1384 | } | 1676 | } |
| 1385 | 1677 | ||
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c index 58141f31ea8f..41dcc845f78c 100644 --- a/virt/kvm/kvm_trace.c +++ b/virt/kvm/kvm_trace.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
| 18 | #include <linux/relay.h> | 18 | #include <linux/relay.h> |
| 19 | #include <linux/debugfs.h> | 19 | #include <linux/debugfs.h> |
| 20 | #include <linux/ktime.h> | ||
| 20 | 21 | ||
| 21 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
| 22 | 23 | ||
| @@ -35,16 +36,16 @@ static struct kvm_trace *kvm_trace; | |||
| 35 | struct kvm_trace_probe { | 36 | struct kvm_trace_probe { |
| 36 | const char *name; | 37 | const char *name; |
| 37 | const char *format; | 38 | const char *format; |
| 38 | u32 cycle_in; | 39 | u32 timestamp_in; |
| 39 | marker_probe_func *probe_func; | 40 | marker_probe_func *probe_func; |
| 40 | }; | 41 | }; |
| 41 | 42 | ||
| 42 | static inline int calc_rec_size(int cycle, int extra) | 43 | static inline int calc_rec_size(int timestamp, int extra) |
| 43 | { | 44 | { |
| 44 | int rec_size = KVM_TRC_HEAD_SIZE; | 45 | int rec_size = KVM_TRC_HEAD_SIZE; |
| 45 | 46 | ||
| 46 | rec_size += extra; | 47 | rec_size += extra; |
| 47 | return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; | 48 | return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; |
| 48 | } | 49 | } |
| 49 | 50 | ||
| 50 | static void kvm_add_trace(void *probe_private, void *call_data, | 51 | static void kvm_add_trace(void *probe_private, void *call_data, |
| @@ -54,12 +55,13 @@ static void kvm_add_trace(void *probe_private, void *call_data, | |||
| 54 | struct kvm_trace *kt = kvm_trace; | 55 | struct kvm_trace *kt = kvm_trace; |
| 55 | struct kvm_trace_rec rec; | 56 | struct kvm_trace_rec rec; |
| 56 | struct kvm_vcpu *vcpu; | 57 | struct kvm_vcpu *vcpu; |
| 57 | int i, extra, size; | 58 | int i, size; |
| 59 | u32 extra; | ||
| 58 | 60 | ||
| 59 | if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) | 61 | if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) |
| 60 | return; | 62 | return; |
| 61 | 63 | ||
| 62 | rec.event = va_arg(*args, u32); | 64 | rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32)); |
| 63 | vcpu = va_arg(*args, struct kvm_vcpu *); | 65 | vcpu = va_arg(*args, struct kvm_vcpu *); |
| 64 | rec.pid = current->tgid; | 66 | rec.pid = current->tgid; |
| 65 | rec.vcpu_id = vcpu->vcpu_id; | 67 | rec.vcpu_id = vcpu->vcpu_id; |
| @@ -67,21 +69,21 @@ static void kvm_add_trace(void *probe_private, void *call_data, | |||
| 67 | extra = va_arg(*args, u32); | 69 | extra = va_arg(*args, u32); |
| 68 | WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); | 70 | WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); |
| 69 | extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); | 71 | extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); |
| 70 | rec.extra_u32 = extra; | ||
| 71 | 72 | ||
| 72 | rec.cycle_in = p->cycle_in; | 73 | rec.rec_val |= TRACE_REC_TCS(p->timestamp_in) |
| 74 | | TRACE_REC_NUM_DATA_ARGS(extra); | ||
| 73 | 75 | ||
| 74 | if (rec.cycle_in) { | 76 | if (p->timestamp_in) { |
| 75 | rec.u.cycle.cycle_u64 = get_cycles(); | 77 | rec.u.timestamp.timestamp = ktime_to_ns(ktime_get()); |
| 76 | 78 | ||
| 77 | for (i = 0; i < rec.extra_u32; i++) | 79 | for (i = 0; i < extra; i++) |
| 78 | rec.u.cycle.extra_u32[i] = va_arg(*args, u32); | 80 | rec.u.timestamp.extra_u32[i] = va_arg(*args, u32); |
| 79 | } else { | 81 | } else { |
| 80 | for (i = 0; i < rec.extra_u32; i++) | 82 | for (i = 0; i < extra; i++) |
| 81 | rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); | 83 | rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32); |
| 82 | } | 84 | } |
| 83 | 85 | ||
| 84 | size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32)); | 86 | size = calc_rec_size(p->timestamp_in, extra * sizeof(u32)); |
| 85 | relay_write(kt->rchan, &rec, size); | 87 | relay_write(kt->rchan, &rec, size); |
| 86 | } | 88 | } |
| 87 | 89 | ||
diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c new file mode 100644 index 000000000000..a770874f3a3a --- /dev/null +++ b/virt/kvm/vtd.c | |||
| @@ -0,0 +1,191 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2006, Intel Corporation. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License along with | ||
| 14 | * this program; if not, write to the Free Software Foundation, Inc., 59 Temple | ||
| 15 | * Place - Suite 330, Boston, MA 02111-1307 USA. | ||
| 16 | * | ||
| 17 | * Copyright (C) 2006-2008 Intel Corporation | ||
| 18 | * Copyright IBM Corporation, 2008 | ||
| 19 | * Author: Allen M. Kay <allen.m.kay@intel.com> | ||
| 20 | * Author: Weidong Han <weidong.han@intel.com> | ||
| 21 | * Author: Ben-Ami Yassour <benami@il.ibm.com> | ||
| 22 | */ | ||
| 23 | |||
| 24 | #include <linux/list.h> | ||
| 25 | #include <linux/kvm_host.h> | ||
| 26 | #include <linux/pci.h> | ||
| 27 | #include <linux/dmar.h> | ||
| 28 | #include <linux/intel-iommu.h> | ||
| 29 | |||
| 30 | static int kvm_iommu_unmap_memslots(struct kvm *kvm); | ||
| 31 | static void kvm_iommu_put_pages(struct kvm *kvm, | ||
| 32 | gfn_t base_gfn, unsigned long npages); | ||
| 33 | |||
| 34 | int kvm_iommu_map_pages(struct kvm *kvm, | ||
| 35 | gfn_t base_gfn, unsigned long npages) | ||
| 36 | { | ||
| 37 | gfn_t gfn = base_gfn; | ||
| 38 | pfn_t pfn; | ||
| 39 | int i, r = 0; | ||
| 40 | struct dmar_domain *domain = kvm->arch.intel_iommu_domain; | ||
| 41 | |||
| 42 | /* check if iommu exists and in use */ | ||
| 43 | if (!domain) | ||
| 44 | return 0; | ||
| 45 | |||
| 46 | for (i = 0; i < npages; i++) { | ||
| 47 | /* check if already mapped */ | ||
| 48 | pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, | ||
| 49 | gfn_to_gpa(gfn)); | ||
| 50 | if (pfn) | ||
| 51 | continue; | ||
| 52 | |||
| 53 | pfn = gfn_to_pfn(kvm, gfn); | ||
| 54 | r = intel_iommu_page_mapping(domain, | ||
| 55 | gfn_to_gpa(gfn), | ||
| 56 | pfn_to_hpa(pfn), | ||
| 57 | PAGE_SIZE, | ||
| 58 | DMA_PTE_READ | | ||
| 59 | DMA_PTE_WRITE); | ||
| 60 | if (r) { | ||
| 61 | printk(KERN_ERR "kvm_iommu_map_pages:" | ||
| 62 | "iommu failed to map pfn=%lx\n", pfn); | ||
| 63 | goto unmap_pages; | ||
| 64 | } | ||
| 65 | gfn++; | ||
| 66 | } | ||
| 67 | return 0; | ||
| 68 | |||
| 69 | unmap_pages: | ||
| 70 | kvm_iommu_put_pages(kvm, base_gfn, i); | ||
| 71 | return r; | ||
| 72 | } | ||
| 73 | |||
| 74 | static int kvm_iommu_map_memslots(struct kvm *kvm) | ||
| 75 | { | ||
| 76 | int i, r; | ||
| 77 | |||
| 78 | down_read(&kvm->slots_lock); | ||
| 79 | for (i = 0; i < kvm->nmemslots; i++) { | ||
| 80 | r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, | ||
| 81 | kvm->memslots[i].npages); | ||
| 82 | if (r) | ||
| 83 | break; | ||
| 84 | } | ||
| 85 | up_read(&kvm->slots_lock); | ||
| 86 | return r; | ||
| 87 | } | ||
| 88 | |||
| 89 | int kvm_iommu_map_guest(struct kvm *kvm, | ||
| 90 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
| 91 | { | ||
| 92 | struct pci_dev *pdev = NULL; | ||
| 93 | int r; | ||
| 94 | |||
| 95 | if (!intel_iommu_found()) { | ||
| 96 | printk(KERN_ERR "%s: intel iommu not found\n", __func__); | ||
| 97 | return -ENODEV; | ||
| 98 | } | ||
| 99 | |||
| 100 | printk(KERN_DEBUG "VT-d direct map: host bdf = %x:%x:%x\n", | ||
| 101 | assigned_dev->host_busnr, | ||
| 102 | PCI_SLOT(assigned_dev->host_devfn), | ||
| 103 | PCI_FUNC(assigned_dev->host_devfn)); | ||
| 104 | |||
| 105 | pdev = assigned_dev->dev; | ||
| 106 | |||
| 107 | if (pdev == NULL) { | ||
| 108 | if (kvm->arch.intel_iommu_domain) { | ||
| 109 | intel_iommu_domain_exit(kvm->arch.intel_iommu_domain); | ||
| 110 | kvm->arch.intel_iommu_domain = NULL; | ||
| 111 | } | ||
| 112 | return -ENODEV; | ||
| 113 | } | ||
| 114 | |||
| 115 | kvm->arch.intel_iommu_domain = intel_iommu_domain_alloc(pdev); | ||
| 116 | if (!kvm->arch.intel_iommu_domain) | ||
| 117 | return -ENODEV; | ||
| 118 | |||
| 119 | r = kvm_iommu_map_memslots(kvm); | ||
| 120 | if (r) | ||
| 121 | goto out_unmap; | ||
| 122 | |||
| 123 | intel_iommu_detach_dev(kvm->arch.intel_iommu_domain, | ||
| 124 | pdev->bus->number, pdev->devfn); | ||
| 125 | |||
| 126 | r = intel_iommu_context_mapping(kvm->arch.intel_iommu_domain, | ||
| 127 | pdev); | ||
| 128 | if (r) { | ||
| 129 | printk(KERN_ERR "Domain context map for %s failed", | ||
| 130 | pci_name(pdev)); | ||
| 131 | goto out_unmap; | ||
| 132 | } | ||
| 133 | return 0; | ||
| 134 | |||
| 135 | out_unmap: | ||
| 136 | kvm_iommu_unmap_memslots(kvm); | ||
| 137 | return r; | ||
| 138 | } | ||
| 139 | |||
| 140 | static void kvm_iommu_put_pages(struct kvm *kvm, | ||
| 141 | gfn_t base_gfn, unsigned long npages) | ||
| 142 | { | ||
| 143 | gfn_t gfn = base_gfn; | ||
| 144 | pfn_t pfn; | ||
| 145 | struct dmar_domain *domain = kvm->arch.intel_iommu_domain; | ||
| 146 | int i; | ||
| 147 | |||
| 148 | for (i = 0; i < npages; i++) { | ||
| 149 | pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, | ||
| 150 | gfn_to_gpa(gfn)); | ||
| 151 | kvm_release_pfn_clean(pfn); | ||
| 152 | gfn++; | ||
| 153 | } | ||
| 154 | } | ||
| 155 | |||
| 156 | static int kvm_iommu_unmap_memslots(struct kvm *kvm) | ||
| 157 | { | ||
| 158 | int i; | ||
| 159 | down_read(&kvm->slots_lock); | ||
| 160 | for (i = 0; i < kvm->nmemslots; i++) { | ||
| 161 | kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, | ||
| 162 | kvm->memslots[i].npages); | ||
| 163 | } | ||
| 164 | up_read(&kvm->slots_lock); | ||
| 165 | |||
| 166 | return 0; | ||
| 167 | } | ||
| 168 | |||
| 169 | int kvm_iommu_unmap_guest(struct kvm *kvm) | ||
| 170 | { | ||
| 171 | struct kvm_assigned_dev_kernel *entry; | ||
| 172 | struct dmar_domain *domain = kvm->arch.intel_iommu_domain; | ||
| 173 | |||
| 174 | /* check if iommu exists and in use */ | ||
| 175 | if (!domain) | ||
| 176 | return 0; | ||
| 177 | |||
| 178 | list_for_each_entry(entry, &kvm->arch.assigned_dev_head, list) { | ||
| 179 | printk(KERN_DEBUG "VT-d unmap: host bdf = %x:%x:%x\n", | ||
| 180 | entry->host_busnr, | ||
| 181 | PCI_SLOT(entry->host_devfn), | ||
| 182 | PCI_FUNC(entry->host_devfn)); | ||
| 183 | |||
| 184 | /* detach kvm dmar domain */ | ||
| 185 | intel_iommu_detach_dev(domain, entry->host_busnr, | ||
| 186 | entry->host_devfn); | ||
| 187 | } | ||
| 188 | kvm_iommu_unmap_memslots(kvm); | ||
| 189 | intel_iommu_domain_exit(domain); | ||
| 190 | return 0; | ||
| 191 | } | ||
