diff options
45 files changed, 2936 insertions, 1579 deletions
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 5a4bc8cf6d04..e1a114161027 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt | |||
| @@ -593,6 +593,115 @@ struct kvm_irqchip { | |||
| 593 | } chip; | 593 | } chip; |
| 594 | }; | 594 | }; |
| 595 | 595 | ||
| 596 | 4.27 KVM_XEN_HVM_CONFIG | ||
| 597 | |||
| 598 | Capability: KVM_CAP_XEN_HVM | ||
| 599 | Architectures: x86 | ||
| 600 | Type: vm ioctl | ||
| 601 | Parameters: struct kvm_xen_hvm_config (in) | ||
| 602 | Returns: 0 on success, -1 on error | ||
| 603 | |||
| 604 | Sets the MSR that the Xen HVM guest uses to initialize its hypercall | ||
| 605 | page, and provides the starting address and size of the hypercall | ||
| 606 | blobs in userspace. When the guest writes the MSR, kvm copies one | ||
| 607 | page of a blob (32- or 64-bit, depending on the vcpu mode) to guest | ||
| 608 | memory. | ||
| 609 | |||
| 610 | struct kvm_xen_hvm_config { | ||
| 611 | __u32 flags; | ||
| 612 | __u32 msr; | ||
| 613 | __u64 blob_addr_32; | ||
| 614 | __u64 blob_addr_64; | ||
| 615 | __u8 blob_size_32; | ||
| 616 | __u8 blob_size_64; | ||
| 617 | __u8 pad2[30]; | ||
| 618 | }; | ||
| 619 | |||
| 620 | 4.27 KVM_GET_CLOCK | ||
| 621 | |||
| 622 | Capability: KVM_CAP_ADJUST_CLOCK | ||
| 623 | Architectures: x86 | ||
| 624 | Type: vm ioctl | ||
| 625 | Parameters: struct kvm_clock_data (out) | ||
| 626 | Returns: 0 on success, -1 on error | ||
| 627 | |||
| 628 | Gets the current timestamp of kvmclock as seen by the current guest. In | ||
| 629 | conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios | ||
| 630 | such as migration. | ||
| 631 | |||
| 632 | struct kvm_clock_data { | ||
| 633 | __u64 clock; /* kvmclock current value */ | ||
| 634 | __u32 flags; | ||
| 635 | __u32 pad[9]; | ||
| 636 | }; | ||
| 637 | |||
| 638 | 4.28 KVM_SET_CLOCK | ||
| 639 | |||
| 640 | Capability: KVM_CAP_ADJUST_CLOCK | ||
| 641 | Architectures: x86 | ||
| 642 | Type: vm ioctl | ||
| 643 | Parameters: struct kvm_clock_data (in) | ||
| 644 | Returns: 0 on success, -1 on error | ||
| 645 | |||
| 646 | Sets the current timestamp of kvmclock to the valued specific in its parameter. | ||
| 647 | In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios | ||
| 648 | such as migration. | ||
| 649 | |||
| 650 | struct kvm_clock_data { | ||
| 651 | __u64 clock; /* kvmclock current value */ | ||
| 652 | __u32 flags; | ||
| 653 | __u32 pad[9]; | ||
| 654 | }; | ||
| 655 | |||
| 656 | 4.29 KVM_GET_VCPU_EVENTS | ||
| 657 | |||
| 658 | Capability: KVM_CAP_VCPU_EVENTS | ||
| 659 | Architectures: x86 | ||
| 660 | Type: vm ioctl | ||
| 661 | Parameters: struct kvm_vcpu_event (out) | ||
| 662 | Returns: 0 on success, -1 on error | ||
| 663 | |||
| 664 | Gets currently pending exceptions, interrupts, and NMIs as well as related | ||
| 665 | states of the vcpu. | ||
| 666 | |||
| 667 | struct kvm_vcpu_events { | ||
| 668 | struct { | ||
| 669 | __u8 injected; | ||
| 670 | __u8 nr; | ||
| 671 | __u8 has_error_code; | ||
| 672 | __u8 pad; | ||
| 673 | __u32 error_code; | ||
| 674 | } exception; | ||
| 675 | struct { | ||
| 676 | __u8 injected; | ||
| 677 | __u8 nr; | ||
| 678 | __u8 soft; | ||
| 679 | __u8 pad; | ||
| 680 | } interrupt; | ||
| 681 | struct { | ||
| 682 | __u8 injected; | ||
| 683 | __u8 pending; | ||
| 684 | __u8 masked; | ||
| 685 | __u8 pad; | ||
| 686 | } nmi; | ||
| 687 | __u32 sipi_vector; | ||
| 688 | __u32 flags; /* must be zero */ | ||
| 689 | }; | ||
| 690 | |||
| 691 | 4.30 KVM_SET_VCPU_EVENTS | ||
| 692 | |||
| 693 | Capability: KVM_CAP_VCPU_EVENTS | ||
| 694 | Architectures: x86 | ||
| 695 | Type: vm ioctl | ||
| 696 | Parameters: struct kvm_vcpu_event (in) | ||
| 697 | Returns: 0 on success, -1 on error | ||
| 698 | |||
| 699 | Set pending exceptions, interrupts, and NMIs as well as related states of the | ||
| 700 | vcpu. | ||
| 701 | |||
| 702 | See KVM_GET_VCPU_EVENTS for the data structure. | ||
| 703 | |||
| 704 | |||
| 596 | 5. The kvm_run structure | 705 | 5. The kvm_run structure |
| 597 | 706 | ||
| 598 | Application code obtains a pointer to the kvm_run structure by | 707 | Application code obtains a pointer to the kvm_run structure by |
diff --git a/arch/Kconfig b/arch/Kconfig index eef3bbb97075..d82875820a15 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
| @@ -83,6 +83,13 @@ config KRETPROBES | |||
| 83 | def_bool y | 83 | def_bool y |
| 84 | depends on KPROBES && HAVE_KRETPROBES | 84 | depends on KPROBES && HAVE_KRETPROBES |
| 85 | 85 | ||
| 86 | config USER_RETURN_NOTIFIER | ||
| 87 | bool | ||
| 88 | depends on HAVE_USER_RETURN_NOTIFIER | ||
| 89 | help | ||
| 90 | Provide a kernel-internal notification when a cpu is about to | ||
| 91 | switch to user mode. | ||
| 92 | |||
| 86 | config HAVE_IOREMAP_PROT | 93 | config HAVE_IOREMAP_PROT |
| 87 | bool | 94 | bool |
| 88 | 95 | ||
| @@ -132,5 +139,7 @@ config HAVE_HW_BREAKPOINT | |||
| 132 | select ANON_INODES | 139 | select ANON_INODES |
| 133 | select PERF_EVENTS | 140 | select PERF_EVENTS |
| 134 | 141 | ||
| 142 | config HAVE_USER_RETURN_NOTIFIER | ||
| 143 | bool | ||
| 135 | 144 | ||
| 136 | source "kernel/gcov/Kconfig" | 145 | source "kernel/gcov/Kconfig" |
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h index 18a7e49abbc5..bc90c75adf67 100644 --- a/arch/ia64/include/asm/kvm.h +++ b/arch/ia64/include/asm/kvm.h | |||
| @@ -60,6 +60,7 @@ struct kvm_ioapic_state { | |||
| 60 | #define KVM_IRQCHIP_PIC_MASTER 0 | 60 | #define KVM_IRQCHIP_PIC_MASTER 0 |
| 61 | #define KVM_IRQCHIP_PIC_SLAVE 1 | 61 | #define KVM_IRQCHIP_PIC_SLAVE 1 |
| 62 | #define KVM_IRQCHIP_IOAPIC 2 | 62 | #define KVM_IRQCHIP_IOAPIC 2 |
| 63 | #define KVM_NR_IRQCHIPS 3 | ||
| 63 | 64 | ||
| 64 | #define KVM_CONTEXT_SIZE 8*1024 | 65 | #define KVM_CONTEXT_SIZE 8*1024 |
| 65 | 66 | ||
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index d9b6325a9328..a362e67e0ca6 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h | |||
| @@ -475,7 +475,6 @@ struct kvm_arch { | |||
| 475 | struct list_head assigned_dev_head; | 475 | struct list_head assigned_dev_head; |
| 476 | struct iommu_domain *iommu_domain; | 476 | struct iommu_domain *iommu_domain; |
| 477 | int iommu_flags; | 477 | int iommu_flags; |
| 478 | struct hlist_head irq_ack_notifier_list; | ||
| 479 | 478 | ||
| 480 | unsigned long irq_sources_bitmap; | 479 | unsigned long irq_sources_bitmap; |
| 481 | unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; | 480 | unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; |
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 0bb99b732908..1089b3e918ac 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile | |||
| @@ -49,7 +49,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ | |||
| 49 | EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ | 49 | EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ |
| 50 | 50 | ||
| 51 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 51 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ |
| 52 | coalesced_mmio.o irq_comm.o) | 52 | coalesced_mmio.o irq_comm.o assigned-dev.o) |
| 53 | 53 | ||
| 54 | ifeq ($(CONFIG_IOMMU_API),y) | 54 | ifeq ($(CONFIG_IOMMU_API),y) |
| 55 | common-objs += $(addprefix ../../../virt/kvm/, iommu.o) | 55 | common-objs += $(addprefix ../../../virt/kvm/, iommu.o) |
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 0ad09f05efa9..5fdeec5fddcf 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c | |||
| @@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) | |||
| 124 | 124 | ||
| 125 | static DEFINE_SPINLOCK(vp_lock); | 125 | static DEFINE_SPINLOCK(vp_lock); |
| 126 | 126 | ||
| 127 | void kvm_arch_hardware_enable(void *garbage) | 127 | int kvm_arch_hardware_enable(void *garbage) |
| 128 | { | 128 | { |
| 129 | long status; | 129 | long status; |
| 130 | long tmp_base; | 130 | long tmp_base; |
| @@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage) | |||
| 137 | slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); | 137 | slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); |
| 138 | local_irq_restore(saved_psr); | 138 | local_irq_restore(saved_psr); |
| 139 | if (slot < 0) | 139 | if (slot < 0) |
| 140 | return; | 140 | return -EINVAL; |
| 141 | 141 | ||
| 142 | spin_lock(&vp_lock); | 142 | spin_lock(&vp_lock); |
| 143 | status = ia64_pal_vp_init_env(kvm_vsa_base ? | 143 | status = ia64_pal_vp_init_env(kvm_vsa_base ? |
| @@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage) | |||
| 145 | __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); | 145 | __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); |
| 146 | if (status != 0) { | 146 | if (status != 0) { |
| 147 | printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); | 147 | printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); |
| 148 | return ; | 148 | return -EINVAL; |
| 149 | } | 149 | } |
| 150 | 150 | ||
| 151 | if (!kvm_vsa_base) { | 151 | if (!kvm_vsa_base) { |
| @@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage) | |||
| 154 | } | 154 | } |
| 155 | spin_unlock(&vp_lock); | 155 | spin_unlock(&vp_lock); |
| 156 | ia64_ptr_entry(0x3, slot); | 156 | ia64_ptr_entry(0x3, slot); |
| 157 | |||
| 158 | return 0; | ||
| 157 | } | 159 | } |
| 158 | 160 | ||
| 159 | void kvm_arch_hardware_disable(void *garbage) | 161 | void kvm_arch_hardware_disable(void *garbage) |
| @@ -851,8 +853,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, | |||
| 851 | r = 0; | 853 | r = 0; |
| 852 | switch (chip->chip_id) { | 854 | switch (chip->chip_id) { |
| 853 | case KVM_IRQCHIP_IOAPIC: | 855 | case KVM_IRQCHIP_IOAPIC: |
| 854 | memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm), | 856 | r = kvm_get_ioapic(kvm, &chip->chip.ioapic); |
| 855 | sizeof(struct kvm_ioapic_state)); | ||
| 856 | break; | 857 | break; |
| 857 | default: | 858 | default: |
| 858 | r = -EINVAL; | 859 | r = -EINVAL; |
| @@ -868,9 +869,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
| 868 | r = 0; | 869 | r = 0; |
| 869 | switch (chip->chip_id) { | 870 | switch (chip->chip_id) { |
| 870 | case KVM_IRQCHIP_IOAPIC: | 871 | case KVM_IRQCHIP_IOAPIC: |
| 871 | memcpy(ioapic_irqchip(kvm), | 872 | r = kvm_set_ioapic(kvm, &chip->chip.ioapic); |
| 872 | &chip->chip.ioapic, | ||
| 873 | sizeof(struct kvm_ioapic_state)); | ||
| 874 | break; | 873 | break; |
| 875 | default: | 874 | default: |
| 876 | r = -EINVAL; | 875 | r = -EINVAL; |
| @@ -944,7 +943,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 944 | { | 943 | { |
| 945 | struct kvm *kvm = filp->private_data; | 944 | struct kvm *kvm = filp->private_data; |
| 946 | void __user *argp = (void __user *)arg; | 945 | void __user *argp = (void __user *)arg; |
| 947 | int r = -EINVAL; | 946 | int r = -ENOTTY; |
| 948 | 947 | ||
| 949 | switch (ioctl) { | 948 | switch (ioctl) { |
| 950 | case KVM_SET_MEMORY_REGION: { | 949 | case KVM_SET_MEMORY_REGION: { |
| @@ -985,10 +984,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 985 | goto out; | 984 | goto out; |
| 986 | if (irqchip_in_kernel(kvm)) { | 985 | if (irqchip_in_kernel(kvm)) { |
| 987 | __s32 status; | 986 | __s32 status; |
| 988 | mutex_lock(&kvm->irq_lock); | ||
| 989 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 987 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
| 990 | irq_event.irq, irq_event.level); | 988 | irq_event.irq, irq_event.level); |
| 991 | mutex_unlock(&kvm->irq_lock); | ||
| 992 | if (ioctl == KVM_IRQ_LINE_STATUS) { | 989 | if (ioctl == KVM_IRQ_LINE_STATUS) { |
| 993 | irq_event.status = status; | 990 | irq_event.status = status; |
| 994 | if (copy_to_user(argp, &irq_event, | 991 | if (copy_to_user(argp, &irq_event, |
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2a4551f78f60..5902bbc2411e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c | |||
| @@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) | |||
| 78 | return r; | 78 | return r; |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | void kvm_arch_hardware_enable(void *garbage) | 81 | int kvm_arch_hardware_enable(void *garbage) |
| 82 | { | 82 | { |
| 83 | return 0; | ||
| 83 | } | 84 | } |
| 84 | 85 | ||
| 85 | void kvm_arch_hardware_disable(void *garbage) | 86 | void kvm_arch_hardware_disable(void *garbage) |
| @@ -421,7 +422,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 421 | 422 | ||
| 422 | switch (ioctl) { | 423 | switch (ioctl) { |
| 423 | default: | 424 | default: |
| 424 | r = -EINVAL; | 425 | r = -ENOTTY; |
| 425 | } | 426 | } |
| 426 | 427 | ||
| 427 | return r; | 428 | return r; |
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index 806ef67868bd..8167d42a776f 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h | |||
| @@ -51,7 +51,7 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) | |||
| 51 | 51 | ||
| 52 | /* The BUILD_BUG_ON below breaks in funny ways, commented out | 52 | /* The BUILD_BUG_ON below breaks in funny ways, commented out |
| 53 | * for now ... -BenH | 53 | * for now ... -BenH |
| 54 | BUILD_BUG_ON(__builtin_constant_p(type)); | 54 | BUILD_BUG_ON(!__builtin_constant_p(type)); |
| 55 | */ | 55 | */ |
| 56 | switch (type) { | 56 | switch (type) { |
| 57 | case EXT_INTR_EXITS: | 57 | case EXT_INTR_EXITS: |
diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h index 3dfcaeb5d7f4..82b32a100c7d 100644 --- a/arch/s390/include/asm/kvm.h +++ b/arch/s390/include/asm/kvm.h | |||
| @@ -1,6 +1,5 @@ | |||
| 1 | #ifndef __LINUX_KVM_S390_H | 1 | #ifndef __LINUX_KVM_S390_H |
| 2 | #define __LINUX_KVM_S390_H | 2 | #define __LINUX_KVM_S390_H |
| 3 | |||
| 4 | /* | 3 | /* |
| 5 | * asm-s390/kvm.h - KVM s390 specific structures and definitions | 4 | * asm-s390/kvm.h - KVM s390 specific structures and definitions |
| 6 | * | 5 | * |
| @@ -15,6 +14,8 @@ | |||
| 15 | */ | 14 | */ |
| 16 | #include <linux/types.h> | 15 | #include <linux/types.h> |
| 17 | 16 | ||
| 17 | #define __KVM_S390 | ||
| 18 | |||
| 18 | /* for KVM_GET_REGS and KVM_SET_REGS */ | 19 | /* for KVM_GET_REGS and KVM_SET_REGS */ |
| 19 | struct kvm_regs { | 20 | struct kvm_regs { |
| 20 | /* general purpose regs for s390 */ | 21 | /* general purpose regs for s390 */ |
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 07ced89740d7..f8bcaefd7d34 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c | |||
| @@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 74 | static unsigned long long *facilities; | 74 | static unsigned long long *facilities; |
| 75 | 75 | ||
| 76 | /* Section: not file related */ | 76 | /* Section: not file related */ |
| 77 | void kvm_arch_hardware_enable(void *garbage) | 77 | int kvm_arch_hardware_enable(void *garbage) |
| 78 | { | 78 | { |
| 79 | /* every s390 is virtualization enabled ;-) */ | 79 | /* every s390 is virtualization enabled ;-) */ |
| 80 | return 0; | ||
| 80 | } | 81 | } |
| 81 | 82 | ||
| 82 | void kvm_arch_hardware_disable(void *garbage) | 83 | void kvm_arch_hardware_disable(void *garbage) |
| @@ -116,10 +117,16 @@ long kvm_arch_dev_ioctl(struct file *filp, | |||
| 116 | 117 | ||
| 117 | int kvm_dev_ioctl_check_extension(long ext) | 118 | int kvm_dev_ioctl_check_extension(long ext) |
| 118 | { | 119 | { |
| 120 | int r; | ||
| 121 | |||
| 119 | switch (ext) { | 122 | switch (ext) { |
| 123 | case KVM_CAP_S390_PSW: | ||
| 124 | r = 1; | ||
| 125 | break; | ||
| 120 | default: | 126 | default: |
| 121 | return 0; | 127 | r = 0; |
| 122 | } | 128 | } |
| 129 | return r; | ||
| 123 | } | 130 | } |
| 124 | 131 | ||
| 125 | /* Section: vm related */ | 132 | /* Section: vm related */ |
| @@ -150,7 +157,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 150 | break; | 157 | break; |
| 151 | } | 158 | } |
| 152 | default: | 159 | default: |
| 153 | r = -EINVAL; | 160 | r = -ENOTTY; |
| 154 | } | 161 | } |
| 155 | 162 | ||
| 156 | return r; | 163 | return r; |
| @@ -419,8 +426,10 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) | |||
| 419 | vcpu_load(vcpu); | 426 | vcpu_load(vcpu); |
| 420 | if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) | 427 | if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) |
| 421 | rc = -EBUSY; | 428 | rc = -EBUSY; |
| 422 | else | 429 | else { |
| 423 | vcpu->arch.sie_block->gpsw = psw; | 430 | vcpu->run->psw_mask = psw.mask; |
| 431 | vcpu->run->psw_addr = psw.addr; | ||
| 432 | } | ||
| 424 | vcpu_put(vcpu); | 433 | vcpu_put(vcpu); |
| 425 | return rc; | 434 | return rc; |
| 426 | } | 435 | } |
| @@ -508,9 +517,6 @@ rerun_vcpu: | |||
| 508 | 517 | ||
| 509 | switch (kvm_run->exit_reason) { | 518 | switch (kvm_run->exit_reason) { |
| 510 | case KVM_EXIT_S390_SIEIC: | 519 | case KVM_EXIT_S390_SIEIC: |
| 511 | vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask; | ||
| 512 | vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr; | ||
| 513 | break; | ||
| 514 | case KVM_EXIT_UNKNOWN: | 520 | case KVM_EXIT_UNKNOWN: |
| 515 | case KVM_EXIT_INTR: | 521 | case KVM_EXIT_INTR: |
| 516 | case KVM_EXIT_S390_RESET: | 522 | case KVM_EXIT_S390_RESET: |
| @@ -519,6 +525,9 @@ rerun_vcpu: | |||
| 519 | BUG(); | 525 | BUG(); |
| 520 | } | 526 | } |
| 521 | 527 | ||
| 528 | vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; | ||
| 529 | vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; | ||
| 530 | |||
| 522 | might_fault(); | 531 | might_fault(); |
| 523 | 532 | ||
| 524 | do { | 533 | do { |
| @@ -538,8 +547,6 @@ rerun_vcpu: | |||
| 538 | /* intercept cannot be handled in-kernel, prepare kvm-run */ | 547 | /* intercept cannot be handled in-kernel, prepare kvm-run */ |
| 539 | kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; | 548 | kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; |
| 540 | kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; | 549 | kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; |
| 541 | kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask; | ||
| 542 | kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr; | ||
| 543 | kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; | 550 | kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; |
| 544 | kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; | 551 | kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; |
| 545 | rc = 0; | 552 | rc = 0; |
| @@ -551,6 +558,9 @@ rerun_vcpu: | |||
| 551 | rc = 0; | 558 | rc = 0; |
| 552 | } | 559 | } |
| 553 | 560 | ||
| 561 | kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; | ||
| 562 | kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; | ||
| 563 | |||
| 554 | if (vcpu->sigset_active) | 564 | if (vcpu->sigset_active) |
| 555 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); | 565 | sigprocmask(SIG_SETMASK, &sigsaved, NULL); |
| 556 | 566 | ||
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 40c8c6748cfe..15ee1111de58 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c | |||
| @@ -188,9 +188,9 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, | |||
| 188 | 188 | ||
| 189 | /* make sure that the new value is valid memory */ | 189 | /* make sure that the new value is valid memory */ |
| 190 | address = address & 0x7fffe000u; | 190 | address = address & 0x7fffe000u; |
| 191 | if ((copy_from_guest(vcpu, &tmp, | 191 | if ((copy_from_user(&tmp, (void __user *) |
| 192 | (u64) (address + vcpu->arch.sie_block->gmsor) , 1)) || | 192 | (address + vcpu->arch.sie_block->gmsor) , 1)) || |
| 193 | (copy_from_guest(vcpu, &tmp, (u64) (address + | 193 | (copy_from_user(&tmp, (void __user *)(address + |
| 194 | vcpu->arch.sie_block->gmsor + PAGE_SIZE), 1))) { | 194 | vcpu->arch.sie_block->gmsor + PAGE_SIZE), 1))) { |
| 195 | *reg |= SIGP_STAT_INVALID_PARAMETER; | 195 | *reg |= SIGP_STAT_INVALID_PARAMETER; |
| 196 | return 1; /* invalid parameter */ | 196 | return 1; /* invalid parameter */ |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 178084b4377c..1b2182b4d5c8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -51,6 +51,7 @@ config X86 | |||
| 51 | select HAVE_KERNEL_LZMA | 51 | select HAVE_KERNEL_LZMA |
| 52 | select HAVE_HW_BREAKPOINT | 52 | select HAVE_HW_BREAKPOINT |
| 53 | select HAVE_ARCH_KMEMCHECK | 53 | select HAVE_ARCH_KMEMCHECK |
| 54 | select HAVE_USER_RETURN_NOTIFIER | ||
| 54 | 55 | ||
| 55 | config OUTPUT_FORMAT | 56 | config OUTPUT_FORMAT |
| 56 | string | 57 | string |
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 4a5fe914dc59..950df434763f 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
| @@ -19,6 +19,8 @@ | |||
| 19 | #define __KVM_HAVE_MSIX | 19 | #define __KVM_HAVE_MSIX |
| 20 | #define __KVM_HAVE_MCE | 20 | #define __KVM_HAVE_MCE |
| 21 | #define __KVM_HAVE_PIT_STATE2 | 21 | #define __KVM_HAVE_PIT_STATE2 |
| 22 | #define __KVM_HAVE_XEN_HVM | ||
| 23 | #define __KVM_HAVE_VCPU_EVENTS | ||
| 22 | 24 | ||
| 23 | /* Architectural interrupt line count. */ | 25 | /* Architectural interrupt line count. */ |
| 24 | #define KVM_NR_INTERRUPTS 256 | 26 | #define KVM_NR_INTERRUPTS 256 |
| @@ -79,6 +81,7 @@ struct kvm_ioapic_state { | |||
| 79 | #define KVM_IRQCHIP_PIC_MASTER 0 | 81 | #define KVM_IRQCHIP_PIC_MASTER 0 |
| 80 | #define KVM_IRQCHIP_PIC_SLAVE 1 | 82 | #define KVM_IRQCHIP_PIC_SLAVE 1 |
| 81 | #define KVM_IRQCHIP_IOAPIC 2 | 83 | #define KVM_IRQCHIP_IOAPIC 2 |
| 84 | #define KVM_NR_IRQCHIPS 3 | ||
| 82 | 85 | ||
| 83 | /* for KVM_GET_REGS and KVM_SET_REGS */ | 86 | /* for KVM_GET_REGS and KVM_SET_REGS */ |
| 84 | struct kvm_regs { | 87 | struct kvm_regs { |
| @@ -250,4 +253,31 @@ struct kvm_reinject_control { | |||
| 250 | __u8 pit_reinject; | 253 | __u8 pit_reinject; |
| 251 | __u8 reserved[31]; | 254 | __u8 reserved[31]; |
| 252 | }; | 255 | }; |
| 256 | |||
| 257 | /* for KVM_GET/SET_VCPU_EVENTS */ | ||
| 258 | struct kvm_vcpu_events { | ||
| 259 | struct { | ||
| 260 | __u8 injected; | ||
| 261 | __u8 nr; | ||
| 262 | __u8 has_error_code; | ||
| 263 | __u8 pad; | ||
| 264 | __u32 error_code; | ||
| 265 | } exception; | ||
| 266 | struct { | ||
| 267 | __u8 injected; | ||
| 268 | __u8 nr; | ||
| 269 | __u8 soft; | ||
| 270 | __u8 pad; | ||
| 271 | } interrupt; | ||
| 272 | struct { | ||
| 273 | __u8 injected; | ||
| 274 | __u8 pending; | ||
| 275 | __u8 masked; | ||
| 276 | __u8 pad; | ||
| 277 | } nmi; | ||
| 278 | __u32 sipi_vector; | ||
| 279 | __u32 flags; | ||
| 280 | __u32 reserved[10]; | ||
| 281 | }; | ||
| 282 | |||
| 253 | #endif /* _ASM_X86_KVM_H */ | 283 | #endif /* _ASM_X86_KVM_H */ |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b7ed2c423116..7c18e1230f54 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
| @@ -129,7 +129,7 @@ struct decode_cache { | |||
| 129 | u8 seg_override; | 129 | u8 seg_override; |
| 130 | unsigned int d; | 130 | unsigned int d; |
| 131 | unsigned long regs[NR_VCPU_REGS]; | 131 | unsigned long regs[NR_VCPU_REGS]; |
| 132 | unsigned long eip; | 132 | unsigned long eip, eip_orig; |
| 133 | /* modrm */ | 133 | /* modrm */ |
| 134 | u8 modrm; | 134 | u8 modrm; |
| 135 | u8 modrm_mod; | 135 | u8 modrm_mod; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d83892226f73..4f865e8b8540 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
| @@ -354,7 +354,6 @@ struct kvm_vcpu_arch { | |||
| 354 | unsigned int time_offset; | 354 | unsigned int time_offset; |
| 355 | struct page *time_page; | 355 | struct page *time_page; |
| 356 | 356 | ||
| 357 | bool singlestep; /* guest is single stepped by KVM */ | ||
| 358 | bool nmi_pending; | 357 | bool nmi_pending; |
| 359 | bool nmi_injected; | 358 | bool nmi_injected; |
| 360 | 359 | ||
| @@ -371,6 +370,10 @@ struct kvm_vcpu_arch { | |||
| 371 | u64 mcg_status; | 370 | u64 mcg_status; |
| 372 | u64 mcg_ctl; | 371 | u64 mcg_ctl; |
| 373 | u64 *mce_banks; | 372 | u64 *mce_banks; |
| 373 | |||
| 374 | /* used for guest single stepping over the given code position */ | ||
| 375 | u16 singlestep_cs; | ||
| 376 | unsigned long singlestep_rip; | ||
| 374 | }; | 377 | }; |
| 375 | 378 | ||
| 376 | struct kvm_mem_alias { | 379 | struct kvm_mem_alias { |
| @@ -397,7 +400,6 @@ struct kvm_arch{ | |||
| 397 | struct kvm_pic *vpic; | 400 | struct kvm_pic *vpic; |
| 398 | struct kvm_ioapic *vioapic; | 401 | struct kvm_ioapic *vioapic; |
| 399 | struct kvm_pit *vpit; | 402 | struct kvm_pit *vpit; |
| 400 | struct hlist_head irq_ack_notifier_list; | ||
| 401 | int vapics_in_nmi_mode; | 403 | int vapics_in_nmi_mode; |
| 402 | 404 | ||
| 403 | unsigned int tss_addr; | 405 | unsigned int tss_addr; |
| @@ -410,8 +412,10 @@ struct kvm_arch{ | |||
| 410 | gpa_t ept_identity_map_addr; | 412 | gpa_t ept_identity_map_addr; |
| 411 | 413 | ||
| 412 | unsigned long irq_sources_bitmap; | 414 | unsigned long irq_sources_bitmap; |
| 413 | unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; | ||
| 414 | u64 vm_init_tsc; | 415 | u64 vm_init_tsc; |
| 416 | s64 kvmclock_offset; | ||
| 417 | |||
| 418 | struct kvm_xen_hvm_config xen_hvm_config; | ||
| 415 | }; | 419 | }; |
| 416 | 420 | ||
| 417 | struct kvm_vm_stat { | 421 | struct kvm_vm_stat { |
| @@ -461,7 +465,7 @@ struct descriptor_table { | |||
| 461 | struct kvm_x86_ops { | 465 | struct kvm_x86_ops { |
| 462 | int (*cpu_has_kvm_support)(void); /* __init */ | 466 | int (*cpu_has_kvm_support)(void); /* __init */ |
| 463 | int (*disabled_by_bios)(void); /* __init */ | 467 | int (*disabled_by_bios)(void); /* __init */ |
| 464 | void (*hardware_enable)(void *dummy); /* __init */ | 468 | int (*hardware_enable)(void *dummy); |
| 465 | void (*hardware_disable)(void *dummy); | 469 | void (*hardware_disable)(void *dummy); |
| 466 | void (*check_processor_compatibility)(void *rtn); | 470 | void (*check_processor_compatibility)(void *rtn); |
| 467 | int (*hardware_setup)(void); /* __init */ | 471 | int (*hardware_setup)(void); /* __init */ |
| @@ -477,8 +481,8 @@ struct kvm_x86_ops { | |||
| 477 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); | 481 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); |
| 478 | void (*vcpu_put)(struct kvm_vcpu *vcpu); | 482 | void (*vcpu_put)(struct kvm_vcpu *vcpu); |
| 479 | 483 | ||
| 480 | int (*set_guest_debug)(struct kvm_vcpu *vcpu, | 484 | void (*set_guest_debug)(struct kvm_vcpu *vcpu, |
| 481 | struct kvm_guest_debug *dbg); | 485 | struct kvm_guest_debug *dbg); |
| 482 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); | 486 | int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); |
| 483 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | 487 | int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); |
| 484 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); | 488 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); |
| @@ -506,8 +510,8 @@ struct kvm_x86_ops { | |||
| 506 | 510 | ||
| 507 | void (*tlb_flush)(struct kvm_vcpu *vcpu); | 511 | void (*tlb_flush)(struct kvm_vcpu *vcpu); |
| 508 | 512 | ||
| 509 | void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); | 513 | void (*run)(struct kvm_vcpu *vcpu); |
| 510 | int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); | 514 | int (*handle_exit)(struct kvm_vcpu *vcpu); |
| 511 | void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); | 515 | void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); |
| 512 | void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); | 516 | void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); |
| 513 | u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); | 517 | u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); |
| @@ -519,6 +523,8 @@ struct kvm_x86_ops { | |||
| 519 | bool has_error_code, u32 error_code); | 523 | bool has_error_code, u32 error_code); |
| 520 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); | 524 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); |
| 521 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); | 525 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); |
| 526 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); | ||
| 527 | void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); | ||
| 522 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); | 528 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
| 523 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); | 529 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); |
| 524 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | 530 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
| @@ -568,7 +574,7 @@ enum emulation_result { | |||
| 568 | #define EMULTYPE_NO_DECODE (1 << 0) | 574 | #define EMULTYPE_NO_DECODE (1 << 0) |
| 569 | #define EMULTYPE_TRAP_UD (1 << 1) | 575 | #define EMULTYPE_TRAP_UD (1 << 1) |
| 570 | #define EMULTYPE_SKIP (1 << 2) | 576 | #define EMULTYPE_SKIP (1 << 2) |
| 571 | int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, | 577 | int emulate_instruction(struct kvm_vcpu *vcpu, |
| 572 | unsigned long cr2, u16 error_code, int emulation_type); | 578 | unsigned long cr2, u16 error_code, int emulation_type); |
| 573 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); | 579 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); |
| 574 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); | 580 | void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); |
| @@ -585,9 +591,9 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); | |||
| 585 | 591 | ||
| 586 | struct x86_emulate_ctxt; | 592 | struct x86_emulate_ctxt; |
| 587 | 593 | ||
| 588 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 594 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, |
| 589 | int size, unsigned port); | 595 | int size, unsigned port); |
| 590 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 596 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, |
| 591 | int size, unsigned long count, int down, | 597 | int size, unsigned long count, int down, |
| 592 | gva_t address, int rep, unsigned port); | 598 | gva_t address, int rep, unsigned port); |
| 593 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); | 599 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
| @@ -616,6 +622,9 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); | |||
| 616 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); | 622 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); |
| 617 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); | 623 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); |
| 618 | 624 | ||
| 625 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); | ||
| 626 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | ||
| 627 | |||
| 619 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); | 628 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
| 620 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | 629 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); |
| 621 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, | 630 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, |
| @@ -802,4 +811,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); | |||
| 802 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); | 811 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
| 803 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); | 812 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
| 804 | 813 | ||
| 814 | void kvm_define_shared_msr(unsigned index, u32 msr); | ||
| 815 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | ||
| 816 | |||
| 805 | #endif /* _ASM_X86_KVM_HOST_H */ | 817 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 85574b7c1bc1..1fecb7e61130 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
| @@ -57,7 +57,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { | |||
| 57 | u16 intercept_dr_write; | 57 | u16 intercept_dr_write; |
| 58 | u32 intercept_exceptions; | 58 | u32 intercept_exceptions; |
| 59 | u64 intercept; | 59 | u64 intercept; |
| 60 | u8 reserved_1[44]; | 60 | u8 reserved_1[42]; |
| 61 | u16 pause_filter_count; | ||
| 61 | u64 iopm_base_pa; | 62 | u64 iopm_base_pa; |
| 62 | u64 msrpm_base_pa; | 63 | u64 msrpm_base_pa; |
| 63 | u64 tsc_offset; | 64 | u64 tsc_offset; |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d27d0a2fec4c..375c917c37d2 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
| @@ -83,6 +83,7 @@ struct thread_info { | |||
| 83 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | 83 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ |
| 84 | #define TIF_SECCOMP 8 /* secure computing */ | 84 | #define TIF_SECCOMP 8 /* secure computing */ |
| 85 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ | 85 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ |
| 86 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ | ||
| 86 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | 87 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ |
| 87 | #define TIF_IA32 17 /* 32bit process */ | 88 | #define TIF_IA32 17 /* 32bit process */ |
| 88 | #define TIF_FORK 18 /* ret_from_fork */ | 89 | #define TIF_FORK 18 /* ret_from_fork */ |
| @@ -107,6 +108,7 @@ struct thread_info { | |||
| 107 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | 108 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) |
| 108 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | 109 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) |
| 109 | #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) | 110 | #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) |
| 111 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) | ||
| 110 | #define _TIF_NOTSC (1 << TIF_NOTSC) | 112 | #define _TIF_NOTSC (1 << TIF_NOTSC) |
| 111 | #define _TIF_IA32 (1 << TIF_IA32) | 113 | #define _TIF_IA32 (1 << TIF_IA32) |
| 112 | #define _TIF_FORK (1 << TIF_FORK) | 114 | #define _TIF_FORK (1 << TIF_FORK) |
| @@ -142,13 +144,14 @@ struct thread_info { | |||
| 142 | 144 | ||
| 143 | /* Only used for 64 bit */ | 145 | /* Only used for 64 bit */ |
| 144 | #define _TIF_DO_NOTIFY_MASK \ | 146 | #define _TIF_DO_NOTIFY_MASK \ |
| 145 | (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) | 147 | (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ |
| 148 | _TIF_USER_RETURN_NOTIFY) | ||
| 146 | 149 | ||
| 147 | /* flags to check in __switch_to() */ | 150 | /* flags to check in __switch_to() */ |
| 148 | #define _TIF_WORK_CTXSW \ | 151 | #define _TIF_WORK_CTXSW \ |
| 149 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) | 152 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) |
| 150 | 153 | ||
| 151 | #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW | 154 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) |
| 152 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) | 155 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) |
| 153 | 156 | ||
| 154 | #define PREEMPT_ACTIVE 0x10000000 | 157 | #define PREEMPT_ACTIVE 0x10000000 |
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 272514c2d456..2b4945419a84 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
| @@ -56,6 +56,7 @@ | |||
| 56 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 | 56 | #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 |
| 57 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 | 57 | #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 |
| 58 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 | 58 | #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 |
| 59 | #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 | ||
| 59 | 60 | ||
| 60 | 61 | ||
| 61 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 | 62 | #define PIN_BASED_EXT_INTR_MASK 0x00000001 |
| @@ -144,6 +145,8 @@ enum vmcs_field { | |||
| 144 | VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, | 145 | VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, |
| 145 | TPR_THRESHOLD = 0x0000401c, | 146 | TPR_THRESHOLD = 0x0000401c, |
| 146 | SECONDARY_VM_EXEC_CONTROL = 0x0000401e, | 147 | SECONDARY_VM_EXEC_CONTROL = 0x0000401e, |
| 148 | PLE_GAP = 0x00004020, | ||
| 149 | PLE_WINDOW = 0x00004022, | ||
| 147 | VM_INSTRUCTION_ERROR = 0x00004400, | 150 | VM_INSTRUCTION_ERROR = 0x00004400, |
| 148 | VM_EXIT_REASON = 0x00004402, | 151 | VM_EXIT_REASON = 0x00004402, |
| 149 | VM_EXIT_INTR_INFO = 0x00004404, | 152 | VM_EXIT_INTR_INFO = 0x00004404, |
| @@ -248,6 +251,7 @@ enum vmcs_field { | |||
| 248 | #define EXIT_REASON_MSR_READ 31 | 251 | #define EXIT_REASON_MSR_READ 31 |
| 249 | #define EXIT_REASON_MSR_WRITE 32 | 252 | #define EXIT_REASON_MSR_WRITE 32 |
| 250 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 253 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
| 254 | #define EXIT_REASON_PAUSE_INSTRUCTION 40 | ||
| 251 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 | 255 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 |
| 252 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | 256 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 |
| 253 | #define EXIT_REASON_APIC_ACCESS 44 | 257 | #define EXIT_REASON_APIC_ACCESS 44 |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 744508e7cfdd..5e2ba634ea15 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <linux/pm.h> | 9 | #include <linux/pm.h> |
| 10 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
| 11 | #include <linux/random.h> | 11 | #include <linux/random.h> |
| 12 | #include <linux/user-return-notifier.h> | ||
| 12 | #include <trace/events/power.h> | 13 | #include <trace/events/power.h> |
| 13 | #include <linux/hw_breakpoint.h> | 14 | #include <linux/hw_breakpoint.h> |
| 14 | #include <asm/system.h> | 15 | #include <asm/system.h> |
| @@ -209,6 +210,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
| 209 | */ | 210 | */ |
| 210 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 211 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
| 211 | } | 212 | } |
| 213 | propagate_user_return_notify(prev_p, next_p); | ||
| 212 | } | 214 | } |
| 213 | 215 | ||
| 214 | int sys_fork(struct pt_regs *regs) | 216 | int sys_fork(struct pt_regs *regs) |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index fbf3b07c8567..74fe6d86dc5d 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include <linux/stddef.h> | 19 | #include <linux/stddef.h> |
| 20 | #include <linux/personality.h> | 20 | #include <linux/personality.h> |
| 21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
| 22 | #include <linux/user-return-notifier.h> | ||
| 22 | 23 | ||
| 23 | #include <asm/processor.h> | 24 | #include <asm/processor.h> |
| 24 | #include <asm/ucontext.h> | 25 | #include <asm/ucontext.h> |
| @@ -863,6 +864,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
| 863 | if (current->replacement_session_keyring) | 864 | if (current->replacement_session_keyring) |
| 864 | key_replace_session_keyring(); | 865 | key_replace_session_keyring(); |
| 865 | } | 866 | } |
| 867 | if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) | ||
| 868 | fire_user_return_notifiers(); | ||
| 866 | 869 | ||
| 867 | #ifdef CONFIG_X86_32 | 870 | #ifdef CONFIG_X86_32 |
| 868 | clear_thread_flag(TIF_IRET); | 871 | clear_thread_flag(TIF_IRET); |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b84e571f4175..4cd498332466 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
| @@ -28,6 +28,7 @@ config KVM | |||
| 28 | select HAVE_KVM_IRQCHIP | 28 | select HAVE_KVM_IRQCHIP |
| 29 | select HAVE_KVM_EVENTFD | 29 | select HAVE_KVM_EVENTFD |
| 30 | select KVM_APIC_ARCHITECTURE | 30 | select KVM_APIC_ARCHITECTURE |
| 31 | select USER_RETURN_NOTIFIER | ||
| 31 | ---help--- | 32 | ---help--- |
| 32 | Support hosting fully virtualized guest machines using hardware | 33 | Support hosting fully virtualized guest machines using hardware |
| 33 | virtualization extensions. You will need a fairly recent | 34 | virtualization extensions. You will need a fairly recent |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 0e7fe78d0f74..31a7035c4bd9 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
| @@ -6,7 +6,8 @@ CFLAGS_svm.o := -I. | |||
| 6 | CFLAGS_vmx.o := -I. | 6 | CFLAGS_vmx.o := -I. |
| 7 | 7 | ||
| 8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | 8 | kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ |
| 9 | coalesced_mmio.o irq_comm.o eventfd.o) | 9 | coalesced_mmio.o irq_comm.o eventfd.o \ |
| 10 | assigned-dev.o) | ||
| 10 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) | 11 | kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) |
| 11 | 12 | ||
| 12 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | 13 | kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1be5cd640e93..7e8faea4651e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
| @@ -75,6 +75,8 @@ | |||
| 75 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 75 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
| 76 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 76 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ |
| 77 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ | 77 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ |
| 78 | /* Misc flags */ | ||
| 79 | #define No64 (1<<28) | ||
| 78 | /* Source 2 operand type */ | 80 | /* Source 2 operand type */ |
| 79 | #define Src2None (0<<29) | 81 | #define Src2None (0<<29) |
| 80 | #define Src2CL (1<<29) | 82 | #define Src2CL (1<<29) |
| @@ -92,19 +94,23 @@ static u32 opcode_table[256] = { | |||
| 92 | /* 0x00 - 0x07 */ | 94 | /* 0x00 - 0x07 */ |
| 93 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 95 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
| 94 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 96 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| 95 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, | 97 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
| 98 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | ||
| 96 | /* 0x08 - 0x0F */ | 99 | /* 0x08 - 0x0F */ |
| 97 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 100 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
| 98 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 101 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| 99 | 0, 0, 0, 0, | 102 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
| 103 | ImplicitOps | Stack | No64, 0, | ||
| 100 | /* 0x10 - 0x17 */ | 104 | /* 0x10 - 0x17 */ |
| 101 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 105 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
| 102 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 106 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| 103 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, | 107 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
| 108 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | ||
| 104 | /* 0x18 - 0x1F */ | 109 | /* 0x18 - 0x1F */ |
| 105 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 110 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
| 106 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 111 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| 107 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, | 112 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, |
| 113 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, | ||
| 108 | /* 0x20 - 0x27 */ | 114 | /* 0x20 - 0x27 */ |
| 109 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 115 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
| 110 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 116 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
| @@ -133,7 +139,8 @@ static u32 opcode_table[256] = { | |||
| 133 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, | 139 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, |
| 134 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, | 140 | DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack, |
| 135 | /* 0x60 - 0x67 */ | 141 | /* 0x60 - 0x67 */ |
| 136 | 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , | 142 | ImplicitOps | Stack | No64, ImplicitOps | Stack | No64, |
| 143 | 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , | ||
| 137 | 0, 0, 0, 0, | 144 | 0, 0, 0, 0, |
| 138 | /* 0x68 - 0x6F */ | 145 | /* 0x68 - 0x6F */ |
| 139 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, | 146 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, |
| @@ -158,7 +165,7 @@ static u32 opcode_table[256] = { | |||
| 158 | /* 0x90 - 0x97 */ | 165 | /* 0x90 - 0x97 */ |
| 159 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, | 166 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, |
| 160 | /* 0x98 - 0x9F */ | 167 | /* 0x98 - 0x9F */ |
| 161 | 0, 0, SrcImm | Src2Imm16, 0, | 168 | 0, 0, SrcImm | Src2Imm16 | No64, 0, |
| 162 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, | 169 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, |
| 163 | /* 0xA0 - 0xA7 */ | 170 | /* 0xA0 - 0xA7 */ |
| 164 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 171 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, |
| @@ -185,7 +192,7 @@ static u32 opcode_table[256] = { | |||
| 185 | ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, | 192 | ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, |
| 186 | /* 0xC8 - 0xCF */ | 193 | /* 0xC8 - 0xCF */ |
| 187 | 0, 0, 0, ImplicitOps | Stack, | 194 | 0, 0, 0, ImplicitOps | Stack, |
| 188 | ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps, | 195 | ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps, |
| 189 | /* 0xD0 - 0xD7 */ | 196 | /* 0xD0 - 0xD7 */ |
| 190 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, | 197 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, |
| 191 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, | 198 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, |
| @@ -198,7 +205,7 @@ static u32 opcode_table[256] = { | |||
| 198 | ByteOp | SrcImmUByte, SrcImmUByte, | 205 | ByteOp | SrcImmUByte, SrcImmUByte, |
| 199 | /* 0xE8 - 0xEF */ | 206 | /* 0xE8 - 0xEF */ |
| 200 | SrcImm | Stack, SrcImm | ImplicitOps, | 207 | SrcImm | Stack, SrcImm | ImplicitOps, |
| 201 | SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps, | 208 | SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, |
| 202 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 209 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
| 203 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 210 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
| 204 | /* 0xF0 - 0xF7 */ | 211 | /* 0xF0 - 0xF7 */ |
| @@ -244,11 +251,13 @@ static u32 twobyte_table[256] = { | |||
| 244 | /* 0x90 - 0x9F */ | 251 | /* 0x90 - 0x9F */ |
| 245 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 252 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 246 | /* 0xA0 - 0xA7 */ | 253 | /* 0xA0 - 0xA7 */ |
| 247 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, | 254 | ImplicitOps | Stack, ImplicitOps | Stack, |
| 255 | 0, DstMem | SrcReg | ModRM | BitOp, | ||
| 248 | DstMem | SrcReg | Src2ImmByte | ModRM, | 256 | DstMem | SrcReg | Src2ImmByte | ModRM, |
| 249 | DstMem | SrcReg | Src2CL | ModRM, 0, 0, | 257 | DstMem | SrcReg | Src2CL | ModRM, 0, 0, |
| 250 | /* 0xA8 - 0xAF */ | 258 | /* 0xA8 - 0xAF */ |
| 251 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, | 259 | ImplicitOps | Stack, ImplicitOps | Stack, |
| 260 | 0, DstMem | SrcReg | ModRM | BitOp, | ||
| 252 | DstMem | SrcReg | Src2ImmByte | ModRM, | 261 | DstMem | SrcReg | Src2ImmByte | ModRM, |
| 253 | DstMem | SrcReg | Src2CL | ModRM, | 262 | DstMem | SrcReg | Src2CL | ModRM, |
| 254 | ModRM, 0, | 263 | ModRM, 0, |
| @@ -613,6 +622,9 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | |||
| 613 | { | 622 | { |
| 614 | int rc = 0; | 623 | int rc = 0; |
| 615 | 624 | ||
| 625 | /* x86 instructions are limited to 15 bytes. */ | ||
| 626 | if (eip + size - ctxt->decode.eip_orig > 15) | ||
| 627 | return X86EMUL_UNHANDLEABLE; | ||
| 616 | eip += ctxt->cs_base; | 628 | eip += ctxt->cs_base; |
| 617 | while (size--) { | 629 | while (size--) { |
| 618 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); | 630 | rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); |
| @@ -871,7 +883,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
| 871 | /* Shadow copy of register state. Committed on successful emulation. */ | 883 | /* Shadow copy of register state. Committed on successful emulation. */ |
| 872 | 884 | ||
| 873 | memset(c, 0, sizeof(struct decode_cache)); | 885 | memset(c, 0, sizeof(struct decode_cache)); |
| 874 | c->eip = kvm_rip_read(ctxt->vcpu); | 886 | c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); |
| 875 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | 887 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); |
| 876 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 888 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
| 877 | 889 | ||
| @@ -962,6 +974,11 @@ done_prefixes: | |||
| 962 | } | 974 | } |
| 963 | } | 975 | } |
| 964 | 976 | ||
| 977 | if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { | ||
| 978 | kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");; | ||
| 979 | return -1; | ||
| 980 | } | ||
| 981 | |||
| 965 | if (c->d & Group) { | 982 | if (c->d & Group) { |
| 966 | group = c->d & GroupMask; | 983 | group = c->d & GroupMask; |
| 967 | c->modrm = insn_fetch(u8, 1, c->eip); | 984 | c->modrm = insn_fetch(u8, 1, c->eip); |
| @@ -1186,6 +1203,69 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, | |||
| 1186 | return rc; | 1203 | return rc; |
| 1187 | } | 1204 | } |
| 1188 | 1205 | ||
| 1206 | static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) | ||
| 1207 | { | ||
| 1208 | struct decode_cache *c = &ctxt->decode; | ||
| 1209 | struct kvm_segment segment; | ||
| 1210 | |||
| 1211 | kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg); | ||
| 1212 | |||
| 1213 | c->src.val = segment.selector; | ||
| 1214 | emulate_push(ctxt); | ||
| 1215 | } | ||
| 1216 | |||
| 1217 | static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, | ||
| 1218 | struct x86_emulate_ops *ops, int seg) | ||
| 1219 | { | ||
| 1220 | struct decode_cache *c = &ctxt->decode; | ||
| 1221 | unsigned long selector; | ||
| 1222 | int rc; | ||
| 1223 | |||
| 1224 | rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); | ||
| 1225 | if (rc != 0) | ||
| 1226 | return rc; | ||
| 1227 | |||
| 1228 | rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg); | ||
| 1229 | return rc; | ||
| 1230 | } | ||
| 1231 | |||
| 1232 | static void emulate_pusha(struct x86_emulate_ctxt *ctxt) | ||
| 1233 | { | ||
| 1234 | struct decode_cache *c = &ctxt->decode; | ||
| 1235 | unsigned long old_esp = c->regs[VCPU_REGS_RSP]; | ||
| 1236 | int reg = VCPU_REGS_RAX; | ||
| 1237 | |||
| 1238 | while (reg <= VCPU_REGS_RDI) { | ||
| 1239 | (reg == VCPU_REGS_RSP) ? | ||
| 1240 | (c->src.val = old_esp) : (c->src.val = c->regs[reg]); | ||
| 1241 | |||
| 1242 | emulate_push(ctxt); | ||
| 1243 | ++reg; | ||
| 1244 | } | ||
| 1245 | } | ||
| 1246 | |||
| 1247 | static int emulate_popa(struct x86_emulate_ctxt *ctxt, | ||
| 1248 | struct x86_emulate_ops *ops) | ||
| 1249 | { | ||
| 1250 | struct decode_cache *c = &ctxt->decode; | ||
| 1251 | int rc = 0; | ||
| 1252 | int reg = VCPU_REGS_RDI; | ||
| 1253 | |||
| 1254 | while (reg >= VCPU_REGS_RAX) { | ||
| 1255 | if (reg == VCPU_REGS_RSP) { | ||
| 1256 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], | ||
| 1257 | c->op_bytes); | ||
| 1258 | --reg; | ||
| 1259 | } | ||
| 1260 | |||
| 1261 | rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); | ||
| 1262 | if (rc != 0) | ||
| 1263 | break; | ||
| 1264 | --reg; | ||
| 1265 | } | ||
| 1266 | return rc; | ||
| 1267 | } | ||
| 1268 | |||
| 1189 | static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | 1269 | static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, |
| 1190 | struct x86_emulate_ops *ops) | 1270 | struct x86_emulate_ops *ops) |
| 1191 | { | 1271 | { |
| @@ -1707,18 +1787,45 @@ special_insn: | |||
| 1707 | add: /* add */ | 1787 | add: /* add */ |
| 1708 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); | 1788 | emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags); |
| 1709 | break; | 1789 | break; |
| 1790 | case 0x06: /* push es */ | ||
| 1791 | emulate_push_sreg(ctxt, VCPU_SREG_ES); | ||
| 1792 | break; | ||
| 1793 | case 0x07: /* pop es */ | ||
| 1794 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); | ||
| 1795 | if (rc != 0) | ||
| 1796 | goto done; | ||
| 1797 | break; | ||
| 1710 | case 0x08 ... 0x0d: | 1798 | case 0x08 ... 0x0d: |
| 1711 | or: /* or */ | 1799 | or: /* or */ |
| 1712 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); | 1800 | emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags); |
| 1713 | break; | 1801 | break; |
| 1802 | case 0x0e: /* push cs */ | ||
| 1803 | emulate_push_sreg(ctxt, VCPU_SREG_CS); | ||
| 1804 | break; | ||
| 1714 | case 0x10 ... 0x15: | 1805 | case 0x10 ... 0x15: |
| 1715 | adc: /* adc */ | 1806 | adc: /* adc */ |
| 1716 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); | 1807 | emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags); |
| 1717 | break; | 1808 | break; |
| 1809 | case 0x16: /* push ss */ | ||
| 1810 | emulate_push_sreg(ctxt, VCPU_SREG_SS); | ||
| 1811 | break; | ||
| 1812 | case 0x17: /* pop ss */ | ||
| 1813 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); | ||
| 1814 | if (rc != 0) | ||
| 1815 | goto done; | ||
| 1816 | break; | ||
| 1718 | case 0x18 ... 0x1d: | 1817 | case 0x18 ... 0x1d: |
| 1719 | sbb: /* sbb */ | 1818 | sbb: /* sbb */ |
| 1720 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); | 1819 | emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags); |
| 1721 | break; | 1820 | break; |
| 1821 | case 0x1e: /* push ds */ | ||
| 1822 | emulate_push_sreg(ctxt, VCPU_SREG_DS); | ||
| 1823 | break; | ||
| 1824 | case 0x1f: /* pop ds */ | ||
| 1825 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); | ||
| 1826 | if (rc != 0) | ||
| 1827 | goto done; | ||
| 1828 | break; | ||
| 1722 | case 0x20 ... 0x25: | 1829 | case 0x20 ... 0x25: |
| 1723 | and: /* and */ | 1830 | and: /* and */ |
| 1724 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); | 1831 | emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags); |
| @@ -1750,6 +1857,14 @@ special_insn: | |||
| 1750 | if (rc != 0) | 1857 | if (rc != 0) |
| 1751 | goto done; | 1858 | goto done; |
| 1752 | break; | 1859 | break; |
| 1860 | case 0x60: /* pusha */ | ||
| 1861 | emulate_pusha(ctxt); | ||
| 1862 | break; | ||
| 1863 | case 0x61: /* popa */ | ||
| 1864 | rc = emulate_popa(ctxt, ops); | ||
| 1865 | if (rc != 0) | ||
| 1866 | goto done; | ||
| 1867 | break; | ||
| 1753 | case 0x63: /* movsxd */ | 1868 | case 0x63: /* movsxd */ |
| 1754 | if (ctxt->mode != X86EMUL_MODE_PROT64) | 1869 | if (ctxt->mode != X86EMUL_MODE_PROT64) |
| 1755 | goto cannot_emulate; | 1870 | goto cannot_emulate; |
| @@ -1761,7 +1876,7 @@ special_insn: | |||
| 1761 | break; | 1876 | break; |
| 1762 | case 0x6c: /* insb */ | 1877 | case 0x6c: /* insb */ |
| 1763 | case 0x6d: /* insw/insd */ | 1878 | case 0x6d: /* insw/insd */ |
| 1764 | if (kvm_emulate_pio_string(ctxt->vcpu, NULL, | 1879 | if (kvm_emulate_pio_string(ctxt->vcpu, |
| 1765 | 1, | 1880 | 1, |
| 1766 | (c->d & ByteOp) ? 1 : c->op_bytes, | 1881 | (c->d & ByteOp) ? 1 : c->op_bytes, |
| 1767 | c->rep_prefix ? | 1882 | c->rep_prefix ? |
| @@ -1777,7 +1892,7 @@ special_insn: | |||
| 1777 | return 0; | 1892 | return 0; |
| 1778 | case 0x6e: /* outsb */ | 1893 | case 0x6e: /* outsb */ |
| 1779 | case 0x6f: /* outsw/outsd */ | 1894 | case 0x6f: /* outsw/outsd */ |
| 1780 | if (kvm_emulate_pio_string(ctxt->vcpu, NULL, | 1895 | if (kvm_emulate_pio_string(ctxt->vcpu, |
| 1781 | 0, | 1896 | 0, |
| 1782 | (c->d & ByteOp) ? 1 : c->op_bytes, | 1897 | (c->d & ByteOp) ? 1 : c->op_bytes, |
| 1783 | c->rep_prefix ? | 1898 | c->rep_prefix ? |
| @@ -2070,7 +2185,7 @@ special_insn: | |||
| 2070 | case 0xef: /* out (e/r)ax,dx */ | 2185 | case 0xef: /* out (e/r)ax,dx */ |
| 2071 | port = c->regs[VCPU_REGS_RDX]; | 2186 | port = c->regs[VCPU_REGS_RDX]; |
| 2072 | io_dir_in = 0; | 2187 | io_dir_in = 0; |
| 2073 | do_io: if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in, | 2188 | do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, |
| 2074 | (c->d & ByteOp) ? 1 : c->op_bytes, | 2189 | (c->d & ByteOp) ? 1 : c->op_bytes, |
| 2075 | port) != 0) { | 2190 | port) != 0) { |
| 2076 | c->eip = saved_eip; | 2191 | c->eip = saved_eip; |
| @@ -2297,6 +2412,14 @@ twobyte_insn: | |||
| 2297 | jmp_rel(c, c->src.val); | 2412 | jmp_rel(c, c->src.val); |
| 2298 | c->dst.type = OP_NONE; | 2413 | c->dst.type = OP_NONE; |
| 2299 | break; | 2414 | break; |
| 2415 | case 0xa0: /* push fs */ | ||
| 2416 | emulate_push_sreg(ctxt, VCPU_SREG_FS); | ||
| 2417 | break; | ||
| 2418 | case 0xa1: /* pop fs */ | ||
| 2419 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); | ||
| 2420 | if (rc != 0) | ||
| 2421 | goto done; | ||
| 2422 | break; | ||
| 2300 | case 0xa3: | 2423 | case 0xa3: |
| 2301 | bt: /* bt */ | 2424 | bt: /* bt */ |
| 2302 | c->dst.type = OP_NONE; | 2425 | c->dst.type = OP_NONE; |
| @@ -2308,6 +2431,14 @@ twobyte_insn: | |||
| 2308 | case 0xa5: /* shld cl, r, r/m */ | 2431 | case 0xa5: /* shld cl, r, r/m */ |
| 2309 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); | 2432 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); |
| 2310 | break; | 2433 | break; |
| 2434 | case 0xa8: /* push gs */ | ||
| 2435 | emulate_push_sreg(ctxt, VCPU_SREG_GS); | ||
| 2436 | break; | ||
| 2437 | case 0xa9: /* pop gs */ | ||
| 2438 | rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); | ||
| 2439 | if (rc != 0) | ||
| 2440 | goto done; | ||
| 2441 | break; | ||
| 2311 | case 0xab: | 2442 | case 0xab: |
| 2312 | bts: /* bts */ | 2443 | bts: /* bts */ |
| 2313 | /* only subword offset */ | 2444 | /* only subword offset */ |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 144e7f60b5e2..fab7440c9bb2 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
| @@ -688,10 +688,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm) | |||
| 688 | struct kvm_vcpu *vcpu; | 688 | struct kvm_vcpu *vcpu; |
| 689 | int i; | 689 | int i; |
| 690 | 690 | ||
| 691 | mutex_lock(&kvm->irq_lock); | ||
| 692 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | 691 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); |
| 693 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | 692 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); |
| 694 | mutex_unlock(&kvm->irq_lock); | ||
| 695 | 693 | ||
| 696 | /* | 694 | /* |
| 697 | * Provides NMI watchdog support via Virtual Wire mode. | 695 | * Provides NMI watchdog support via Virtual Wire mode. |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 01f151682802..d057c0cbd245 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
| @@ -38,7 +38,15 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
| 38 | s->isr_ack |= (1 << irq); | 38 | s->isr_ack |= (1 << irq); |
| 39 | if (s != &s->pics_state->pics[0]) | 39 | if (s != &s->pics_state->pics[0]) |
| 40 | irq += 8; | 40 | irq += 8; |
| 41 | /* | ||
| 42 | * We are dropping lock while calling ack notifiers since ack | ||
| 43 | * notifier callbacks for assigned devices call into PIC recursively. | ||
| 44 | * Other interrupt may be delivered to PIC while lock is dropped but | ||
| 45 | * it should be safe since PIC state is already updated at this stage. | ||
| 46 | */ | ||
| 47 | spin_unlock(&s->pics_state->lock); | ||
| 41 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); | 48 | kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); |
| 49 | spin_lock(&s->pics_state->lock); | ||
| 42 | } | 50 | } |
| 43 | 51 | ||
| 44 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | 52 | void kvm_pic_clear_isr_ack(struct kvm *kvm) |
| @@ -176,16 +184,18 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) | |||
| 176 | static inline void pic_intack(struct kvm_kpic_state *s, int irq) | 184 | static inline void pic_intack(struct kvm_kpic_state *s, int irq) |
| 177 | { | 185 | { |
| 178 | s->isr |= 1 << irq; | 186 | s->isr |= 1 << irq; |
| 179 | if (s->auto_eoi) { | ||
| 180 | if (s->rotate_on_auto_eoi) | ||
| 181 | s->priority_add = (irq + 1) & 7; | ||
| 182 | pic_clear_isr(s, irq); | ||
| 183 | } | ||
| 184 | /* | 187 | /* |
| 185 | * We don't clear a level sensitive interrupt here | 188 | * We don't clear a level sensitive interrupt here |
| 186 | */ | 189 | */ |
| 187 | if (!(s->elcr & (1 << irq))) | 190 | if (!(s->elcr & (1 << irq))) |
| 188 | s->irr &= ~(1 << irq); | 191 | s->irr &= ~(1 << irq); |
| 192 | |||
| 193 | if (s->auto_eoi) { | ||
| 194 | if (s->rotate_on_auto_eoi) | ||
| 195 | s->priority_add = (irq + 1) & 7; | ||
| 196 | pic_clear_isr(s, irq); | ||
| 197 | } | ||
| 198 | |||
| 189 | } | 199 | } |
| 190 | 200 | ||
| 191 | int kvm_pic_read_irq(struct kvm *kvm) | 201 | int kvm_pic_read_irq(struct kvm *kvm) |
| @@ -225,22 +235,11 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
| 225 | 235 | ||
| 226 | void kvm_pic_reset(struct kvm_kpic_state *s) | 236 | void kvm_pic_reset(struct kvm_kpic_state *s) |
| 227 | { | 237 | { |
| 228 | int irq, irqbase, n; | 238 | int irq; |
| 229 | struct kvm *kvm = s->pics_state->irq_request_opaque; | 239 | struct kvm *kvm = s->pics_state->irq_request_opaque; |
| 230 | struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu; | 240 | struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu; |
| 241 | u8 irr = s->irr, isr = s->imr; | ||
| 231 | 242 | ||
| 232 | if (s == &s->pics_state->pics[0]) | ||
| 233 | irqbase = 0; | ||
| 234 | else | ||
| 235 | irqbase = 8; | ||
| 236 | |||
| 237 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | ||
| 238 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | ||
| 239 | if (s->irr & (1 << irq) || s->isr & (1 << irq)) { | ||
| 240 | n = irq + irqbase; | ||
| 241 | kvm_notify_acked_irq(kvm, SELECT_PIC(n), n); | ||
| 242 | } | ||
| 243 | } | ||
| 244 | s->last_irr = 0; | 243 | s->last_irr = 0; |
| 245 | s->irr = 0; | 244 | s->irr = 0; |
| 246 | s->imr = 0; | 245 | s->imr = 0; |
| @@ -256,6 +255,13 @@ void kvm_pic_reset(struct kvm_kpic_state *s) | |||
| 256 | s->rotate_on_auto_eoi = 0; | 255 | s->rotate_on_auto_eoi = 0; |
| 257 | s->special_fully_nested_mode = 0; | 256 | s->special_fully_nested_mode = 0; |
| 258 | s->init4 = 0; | 257 | s->init4 = 0; |
| 258 | |||
| 259 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | ||
| 260 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | ||
| 261 | if (irr & (1 << irq) || isr & (1 << irq)) { | ||
| 262 | pic_clear_isr(s, irq); | ||
| 263 | } | ||
| 264 | } | ||
| 259 | } | 265 | } |
| 260 | 266 | ||
| 261 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) | 267 | static void pic_ioport_write(void *opaque, u32 addr, u32 val) |
| @@ -298,9 +304,9 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) | |||
| 298 | priority = get_priority(s, s->isr); | 304 | priority = get_priority(s, s->isr); |
| 299 | if (priority != 8) { | 305 | if (priority != 8) { |
| 300 | irq = (priority + s->priority_add) & 7; | 306 | irq = (priority + s->priority_add) & 7; |
| 301 | pic_clear_isr(s, irq); | ||
| 302 | if (cmd == 5) | 307 | if (cmd == 5) |
| 303 | s->priority_add = (irq + 1) & 7; | 308 | s->priority_add = (irq + 1) & 7; |
| 309 | pic_clear_isr(s, irq); | ||
| 304 | pic_update_irq(s->pics_state); | 310 | pic_update_irq(s->pics_state); |
| 305 | } | 311 | } |
| 306 | break; | 312 | break; |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 7d6058a2fd38..be399e207d57 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
| @@ -71,6 +71,7 @@ struct kvm_pic { | |||
| 71 | int output; /* intr from master PIC */ | 71 | int output; /* intr from master PIC */ |
| 72 | struct kvm_io_device dev; | 72 | struct kvm_io_device dev; |
| 73 | void (*ack_notifier)(void *opaque, int irq); | 73 | void (*ack_notifier)(void *opaque, int irq); |
| 74 | unsigned long irq_states[16]; | ||
| 74 | }; | 75 | }; |
| 75 | 76 | ||
| 76 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); | 77 | struct kvm_pic *kvm_create_pic(struct kvm *kvm); |
| @@ -85,7 +86,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) | |||
| 85 | 86 | ||
| 86 | static inline int irqchip_in_kernel(struct kvm *kvm) | 87 | static inline int irqchip_in_kernel(struct kvm *kvm) |
| 87 | { | 88 | { |
| 88 | return pic_irqchip(kvm) != NULL; | 89 | int ret; |
| 90 | |||
| 91 | ret = (pic_irqchip(kvm) != NULL); | ||
| 92 | smp_rmb(); | ||
| 93 | return ret; | ||
| 89 | } | 94 | } |
| 90 | 95 | ||
| 91 | void kvm_pic_reset(struct kvm_kpic_state *s); | 96 | void kvm_pic_reset(struct kvm_kpic_state *s); |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 23c217692ea9..cd60c0bd1b32 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
| @@ -32,7 +32,6 @@ | |||
| 32 | #include <asm/current.h> | 32 | #include <asm/current.h> |
| 33 | #include <asm/apicdef.h> | 33 | #include <asm/apicdef.h> |
| 34 | #include <asm/atomic.h> | 34 | #include <asm/atomic.h> |
| 35 | #include <asm/apicdef.h> | ||
| 36 | #include "kvm_cache_regs.h" | 35 | #include "kvm_cache_regs.h" |
| 37 | #include "irq.h" | 36 | #include "irq.h" |
| 38 | #include "trace.h" | 37 | #include "trace.h" |
| @@ -471,11 +470,8 @@ static void apic_set_eoi(struct kvm_lapic *apic) | |||
| 471 | trigger_mode = IOAPIC_LEVEL_TRIG; | 470 | trigger_mode = IOAPIC_LEVEL_TRIG; |
| 472 | else | 471 | else |
| 473 | trigger_mode = IOAPIC_EDGE_TRIG; | 472 | trigger_mode = IOAPIC_EDGE_TRIG; |
| 474 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) { | 473 | if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) |
| 475 | mutex_lock(&apic->vcpu->kvm->irq_lock); | ||
| 476 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); | 474 | kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); |
| 477 | mutex_unlock(&apic->vcpu->kvm->irq_lock); | ||
| 478 | } | ||
| 479 | } | 475 | } |
| 480 | 476 | ||
| 481 | static void apic_send_ipi(struct kvm_lapic *apic) | 477 | static void apic_send_ipi(struct kvm_lapic *apic) |
| @@ -504,9 +500,7 @@ static void apic_send_ipi(struct kvm_lapic *apic) | |||
| 504 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, | 500 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, |
| 505 | irq.vector); | 501 | irq.vector); |
| 506 | 502 | ||
| 507 | mutex_lock(&apic->vcpu->kvm->irq_lock); | ||
| 508 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); | 503 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); |
| 509 | mutex_unlock(&apic->vcpu->kvm->irq_lock); | ||
| 510 | } | 504 | } |
| 511 | 505 | ||
| 512 | static u32 apic_get_tmcct(struct kvm_lapic *apic) | 506 | static u32 apic_get_tmcct(struct kvm_lapic *apic) |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 818b92ad82cf..4c3e5b2314cb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -2789,7 +2789,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
| 2789 | if (r) | 2789 | if (r) |
| 2790 | goto out; | 2790 | goto out; |
| 2791 | 2791 | ||
| 2792 | er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0); | 2792 | er = emulate_instruction(vcpu, cr2, error_code, 0); |
| 2793 | 2793 | ||
| 2794 | switch (er) { | 2794 | switch (er) { |
| 2795 | case EMULATE_DONE: | 2795 | case EMULATE_DONE: |
| @@ -2800,6 +2800,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code) | |||
| 2800 | case EMULATE_FAIL: | 2800 | case EMULATE_FAIL: |
| 2801 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | 2801 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
| 2802 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | 2802 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; |
| 2803 | vcpu->run->internal.ndata = 0; | ||
| 2803 | return 0; | 2804 | return 0; |
| 2804 | default: | 2805 | default: |
| 2805 | BUG(); | 2806 | BUG(); |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 72558f8ff3f5..a6017132fba8 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -467,7 +467,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 467 | level = iterator.level; | 467 | level = iterator.level; |
| 468 | sptep = iterator.sptep; | 468 | sptep = iterator.sptep; |
| 469 | 469 | ||
| 470 | /* FIXME: properly handle invlpg on large guest pages */ | ||
| 471 | if (level == PT_PAGE_TABLE_LEVEL || | 470 | if (level == PT_PAGE_TABLE_LEVEL || |
| 472 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || | 471 | ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || |
| 473 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { | 472 | ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c17404add91f..3de0b37ec038 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -46,6 +46,7 @@ MODULE_LICENSE("GPL"); | |||
| 46 | #define SVM_FEATURE_NPT (1 << 0) | 46 | #define SVM_FEATURE_NPT (1 << 0) |
| 47 | #define SVM_FEATURE_LBRV (1 << 1) | 47 | #define SVM_FEATURE_LBRV (1 << 1) |
| 48 | #define SVM_FEATURE_SVML (1 << 2) | 48 | #define SVM_FEATURE_SVML (1 << 2) |
| 49 | #define SVM_FEATURE_PAUSE_FILTER (1 << 10) | ||
| 49 | 50 | ||
| 50 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ | 51 | #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ |
| 51 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ | 52 | #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ |
| @@ -53,15 +54,6 @@ MODULE_LICENSE("GPL"); | |||
| 53 | 54 | ||
| 54 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) | 55 | #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) |
| 55 | 56 | ||
| 56 | /* Turn on to get debugging output*/ | ||
| 57 | /* #define NESTED_DEBUG */ | ||
| 58 | |||
| 59 | #ifdef NESTED_DEBUG | ||
| 60 | #define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args) | ||
| 61 | #else | ||
| 62 | #define nsvm_printk(fmt, args...) do {} while(0) | ||
| 63 | #endif | ||
| 64 | |||
| 65 | static const u32 host_save_user_msrs[] = { | 57 | static const u32 host_save_user_msrs[] = { |
| 66 | #ifdef CONFIG_X86_64 | 58 | #ifdef CONFIG_X86_64 |
| 67 | MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, | 59 | MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, |
| @@ -85,6 +77,9 @@ struct nested_state { | |||
| 85 | /* gpa pointers to the real vectors */ | 77 | /* gpa pointers to the real vectors */ |
| 86 | u64 vmcb_msrpm; | 78 | u64 vmcb_msrpm; |
| 87 | 79 | ||
| 80 | /* A VMEXIT is required but not yet emulated */ | ||
| 81 | bool exit_required; | ||
| 82 | |||
| 88 | /* cache for intercepts of the guest */ | 83 | /* cache for intercepts of the guest */ |
| 89 | u16 intercept_cr_read; | 84 | u16 intercept_cr_read; |
| 90 | u16 intercept_cr_write; | 85 | u16 intercept_cr_write; |
| @@ -112,6 +107,8 @@ struct vcpu_svm { | |||
| 112 | u32 *msrpm; | 107 | u32 *msrpm; |
| 113 | 108 | ||
| 114 | struct nested_state nested; | 109 | struct nested_state nested; |
| 110 | |||
| 111 | bool nmi_singlestep; | ||
| 115 | }; | 112 | }; |
| 116 | 113 | ||
| 117 | /* enable NPT for AMD64 and X86 with PAE */ | 114 | /* enable NPT for AMD64 and X86 with PAE */ |
| @@ -286,7 +283,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
| 286 | struct vcpu_svm *svm = to_svm(vcpu); | 283 | struct vcpu_svm *svm = to_svm(vcpu); |
| 287 | 284 | ||
| 288 | if (!svm->next_rip) { | 285 | if (!svm->next_rip) { |
| 289 | if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) != | 286 | if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != |
| 290 | EMULATE_DONE) | 287 | EMULATE_DONE) |
| 291 | printk(KERN_DEBUG "%s: NOP\n", __func__); | 288 | printk(KERN_DEBUG "%s: NOP\n", __func__); |
| 292 | return; | 289 | return; |
| @@ -316,7 +313,7 @@ static void svm_hardware_disable(void *garbage) | |||
| 316 | cpu_svm_disable(); | 313 | cpu_svm_disable(); |
| 317 | } | 314 | } |
| 318 | 315 | ||
| 319 | static void svm_hardware_enable(void *garbage) | 316 | static int svm_hardware_enable(void *garbage) |
| 320 | { | 317 | { |
| 321 | 318 | ||
| 322 | struct svm_cpu_data *svm_data; | 319 | struct svm_cpu_data *svm_data; |
| @@ -325,16 +322,21 @@ static void svm_hardware_enable(void *garbage) | |||
| 325 | struct desc_struct *gdt; | 322 | struct desc_struct *gdt; |
| 326 | int me = raw_smp_processor_id(); | 323 | int me = raw_smp_processor_id(); |
| 327 | 324 | ||
| 325 | rdmsrl(MSR_EFER, efer); | ||
| 326 | if (efer & EFER_SVME) | ||
| 327 | return -EBUSY; | ||
| 328 | |||
| 328 | if (!has_svm()) { | 329 | if (!has_svm()) { |
| 329 | printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); | 330 | printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n", |
| 330 | return; | 331 | me); |
| 332 | return -EINVAL; | ||
| 331 | } | 333 | } |
| 332 | svm_data = per_cpu(svm_data, me); | 334 | svm_data = per_cpu(svm_data, me); |
| 333 | 335 | ||
| 334 | if (!svm_data) { | 336 | if (!svm_data) { |
| 335 | printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", | 337 | printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n", |
| 336 | me); | 338 | me); |
| 337 | return; | 339 | return -EINVAL; |
| 338 | } | 340 | } |
| 339 | 341 | ||
| 340 | svm_data->asid_generation = 1; | 342 | svm_data->asid_generation = 1; |
| @@ -345,11 +347,12 @@ static void svm_hardware_enable(void *garbage) | |||
| 345 | gdt = (struct desc_struct *)gdt_descr.base; | 347 | gdt = (struct desc_struct *)gdt_descr.base; |
| 346 | svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); | 348 | svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); |
| 347 | 349 | ||
| 348 | rdmsrl(MSR_EFER, efer); | ||
| 349 | wrmsrl(MSR_EFER, efer | EFER_SVME); | 350 | wrmsrl(MSR_EFER, efer | EFER_SVME); |
| 350 | 351 | ||
| 351 | wrmsrl(MSR_VM_HSAVE_PA, | 352 | wrmsrl(MSR_VM_HSAVE_PA, |
| 352 | page_to_pfn(svm_data->save_area) << PAGE_SHIFT); | 353 | page_to_pfn(svm_data->save_area) << PAGE_SHIFT); |
| 354 | |||
| 355 | return 0; | ||
| 353 | } | 356 | } |
| 354 | 357 | ||
| 355 | static void svm_cpu_uninit(int cpu) | 358 | static void svm_cpu_uninit(int cpu) |
| @@ -476,7 +479,7 @@ static __init int svm_hardware_setup(void) | |||
| 476 | kvm_enable_efer_bits(EFER_SVME); | 479 | kvm_enable_efer_bits(EFER_SVME); |
| 477 | } | 480 | } |
| 478 | 481 | ||
| 479 | for_each_online_cpu(cpu) { | 482 | for_each_possible_cpu(cpu) { |
| 480 | r = svm_cpu_init(cpu); | 483 | r = svm_cpu_init(cpu); |
| 481 | if (r) | 484 | if (r) |
| 482 | goto err; | 485 | goto err; |
| @@ -510,7 +513,7 @@ static __exit void svm_hardware_unsetup(void) | |||
| 510 | { | 513 | { |
| 511 | int cpu; | 514 | int cpu; |
| 512 | 515 | ||
| 513 | for_each_online_cpu(cpu) | 516 | for_each_possible_cpu(cpu) |
| 514 | svm_cpu_uninit(cpu); | 517 | svm_cpu_uninit(cpu); |
| 515 | 518 | ||
| 516 | __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); | 519 | __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); |
| @@ -625,11 +628,12 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 625 | save->rip = 0x0000fff0; | 628 | save->rip = 0x0000fff0; |
| 626 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; | 629 | svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; |
| 627 | 630 | ||
| 628 | /* | 631 | /* This is the guest-visible cr0 value. |
| 629 | * cr0 val on cpu init should be 0x60000010, we enable cpu | 632 | * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. |
| 630 | * cache by default. the orderly way is to enable cache in bios. | ||
| 631 | */ | 633 | */ |
| 632 | save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; | 634 | svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
| 635 | kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0); | ||
| 636 | |||
| 633 | save->cr4 = X86_CR4_PAE; | 637 | save->cr4 = X86_CR4_PAE; |
| 634 | /* rdx = ?? */ | 638 | /* rdx = ?? */ |
| 635 | 639 | ||
| @@ -644,8 +648,6 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 644 | control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| | 648 | control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| |
| 645 | INTERCEPT_CR3_MASK); | 649 | INTERCEPT_CR3_MASK); |
| 646 | save->g_pat = 0x0007040600070406ULL; | 650 | save->g_pat = 0x0007040600070406ULL; |
| 647 | /* enable caching because the QEMU Bios doesn't enable it */ | ||
| 648 | save->cr0 = X86_CR0_ET; | ||
| 649 | save->cr3 = 0; | 651 | save->cr3 = 0; |
| 650 | save->cr4 = 0; | 652 | save->cr4 = 0; |
| 651 | } | 653 | } |
| @@ -654,6 +656,11 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
| 654 | svm->nested.vmcb = 0; | 656 | svm->nested.vmcb = 0; |
| 655 | svm->vcpu.arch.hflags = 0; | 657 | svm->vcpu.arch.hflags = 0; |
| 656 | 658 | ||
| 659 | if (svm_has(SVM_FEATURE_PAUSE_FILTER)) { | ||
| 660 | control->pause_filter_count = 3000; | ||
| 661 | control->intercept |= (1ULL << INTERCEPT_PAUSE); | ||
| 662 | } | ||
| 663 | |||
| 657 | enable_gif(svm); | 664 | enable_gif(svm); |
| 658 | } | 665 | } |
| 659 | 666 | ||
| @@ -758,14 +765,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
| 758 | int i; | 765 | int i; |
| 759 | 766 | ||
| 760 | if (unlikely(cpu != vcpu->cpu)) { | 767 | if (unlikely(cpu != vcpu->cpu)) { |
| 761 | u64 tsc_this, delta; | 768 | u64 delta; |
| 762 | 769 | ||
| 763 | /* | 770 | /* |
| 764 | * Make sure that the guest sees a monotonically | 771 | * Make sure that the guest sees a monotonically |
| 765 | * increasing TSC. | 772 | * increasing TSC. |
| 766 | */ | 773 | */ |
| 767 | rdtscll(tsc_this); | 774 | delta = vcpu->arch.host_tsc - native_read_tsc(); |
| 768 | delta = vcpu->arch.host_tsc - tsc_this; | ||
| 769 | svm->vmcb->control.tsc_offset += delta; | 775 | svm->vmcb->control.tsc_offset += delta; |
| 770 | if (is_nested(svm)) | 776 | if (is_nested(svm)) |
| 771 | svm->nested.hsave->control.tsc_offset += delta; | 777 | svm->nested.hsave->control.tsc_offset += delta; |
| @@ -787,7 +793,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
| 787 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 793 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
| 788 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 794 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
| 789 | 795 | ||
| 790 | rdtscll(vcpu->arch.host_tsc); | 796 | vcpu->arch.host_tsc = native_read_tsc(); |
| 791 | } | 797 | } |
| 792 | 798 | ||
| 793 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) | 799 | static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) |
| @@ -1045,7 +1051,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) | |||
| 1045 | svm->vmcb->control.intercept_exceptions &= | 1051 | svm->vmcb->control.intercept_exceptions &= |
| 1046 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); | 1052 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); |
| 1047 | 1053 | ||
| 1048 | if (vcpu->arch.singlestep) | 1054 | if (svm->nmi_singlestep) |
| 1049 | svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); | 1055 | svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); |
| 1050 | 1056 | ||
| 1051 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | 1057 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { |
| @@ -1060,26 +1066,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) | |||
| 1060 | vcpu->guest_debug = 0; | 1066 | vcpu->guest_debug = 0; |
| 1061 | } | 1067 | } |
| 1062 | 1068 | ||
| 1063 | static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | 1069 | static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) |
| 1064 | { | 1070 | { |
| 1065 | int old_debug = vcpu->guest_debug; | ||
| 1066 | struct vcpu_svm *svm = to_svm(vcpu); | 1071 | struct vcpu_svm *svm = to_svm(vcpu); |
| 1067 | 1072 | ||
| 1068 | vcpu->guest_debug = dbg->control; | ||
| 1069 | |||
| 1070 | update_db_intercept(vcpu); | ||
| 1071 | |||
| 1072 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1073 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
| 1073 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; | 1074 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; |
| 1074 | else | 1075 | else |
| 1075 | svm->vmcb->save.dr7 = vcpu->arch.dr7; | 1076 | svm->vmcb->save.dr7 = vcpu->arch.dr7; |
| 1076 | 1077 | ||
| 1077 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 1078 | update_db_intercept(vcpu); |
| 1078 | svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
| 1079 | else if (old_debug & KVM_GUESTDBG_SINGLESTEP) | ||
| 1080 | svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
| 1081 | |||
| 1082 | return 0; | ||
| 1083 | } | 1079 | } |
| 1084 | 1080 | ||
| 1085 | static void load_host_msrs(struct kvm_vcpu *vcpu) | 1081 | static void load_host_msrs(struct kvm_vcpu *vcpu) |
| @@ -1180,7 +1176,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, | |||
| 1180 | } | 1176 | } |
| 1181 | } | 1177 | } |
| 1182 | 1178 | ||
| 1183 | static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1179 | static int pf_interception(struct vcpu_svm *svm) |
| 1184 | { | 1180 | { |
| 1185 | u64 fault_address; | 1181 | u64 fault_address; |
| 1186 | u32 error_code; | 1182 | u32 error_code; |
| @@ -1194,17 +1190,19 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1194 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1190 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); |
| 1195 | } | 1191 | } |
| 1196 | 1192 | ||
| 1197 | static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1193 | static int db_interception(struct vcpu_svm *svm) |
| 1198 | { | 1194 | { |
| 1195 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
| 1196 | |||
| 1199 | if (!(svm->vcpu.guest_debug & | 1197 | if (!(svm->vcpu.guest_debug & |
| 1200 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && | 1198 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && |
| 1201 | !svm->vcpu.arch.singlestep) { | 1199 | !svm->nmi_singlestep) { |
| 1202 | kvm_queue_exception(&svm->vcpu, DB_VECTOR); | 1200 | kvm_queue_exception(&svm->vcpu, DB_VECTOR); |
| 1203 | return 1; | 1201 | return 1; |
| 1204 | } | 1202 | } |
| 1205 | 1203 | ||
| 1206 | if (svm->vcpu.arch.singlestep) { | 1204 | if (svm->nmi_singlestep) { |
| 1207 | svm->vcpu.arch.singlestep = false; | 1205 | svm->nmi_singlestep = false; |
| 1208 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) | 1206 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) |
| 1209 | svm->vmcb->save.rflags &= | 1207 | svm->vmcb->save.rflags &= |
| 1210 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | 1208 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); |
| @@ -1223,25 +1221,27 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1223 | return 1; | 1221 | return 1; |
| 1224 | } | 1222 | } |
| 1225 | 1223 | ||
| 1226 | static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1224 | static int bp_interception(struct vcpu_svm *svm) |
| 1227 | { | 1225 | { |
| 1226 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
| 1227 | |||
| 1228 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 1228 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
| 1229 | kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; | 1229 | kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; |
| 1230 | kvm_run->debug.arch.exception = BP_VECTOR; | 1230 | kvm_run->debug.arch.exception = BP_VECTOR; |
| 1231 | return 0; | 1231 | return 0; |
| 1232 | } | 1232 | } |
| 1233 | 1233 | ||
| 1234 | static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1234 | static int ud_interception(struct vcpu_svm *svm) |
| 1235 | { | 1235 | { |
| 1236 | int er; | 1236 | int er; |
| 1237 | 1237 | ||
| 1238 | er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); | 1238 | er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD); |
| 1239 | if (er != EMULATE_DONE) | 1239 | if (er != EMULATE_DONE) |
| 1240 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 1240 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
| 1241 | return 1; | 1241 | return 1; |
| 1242 | } | 1242 | } |
| 1243 | 1243 | ||
| 1244 | static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1244 | static int nm_interception(struct vcpu_svm *svm) |
| 1245 | { | 1245 | { |
| 1246 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); | 1246 | svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); |
| 1247 | if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) | 1247 | if (!(svm->vcpu.arch.cr0 & X86_CR0_TS)) |
| @@ -1251,7 +1251,7 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1251 | return 1; | 1251 | return 1; |
| 1252 | } | 1252 | } |
| 1253 | 1253 | ||
| 1254 | static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1254 | static int mc_interception(struct vcpu_svm *svm) |
| 1255 | { | 1255 | { |
| 1256 | /* | 1256 | /* |
| 1257 | * On an #MC intercept the MCE handler is not called automatically in | 1257 | * On an #MC intercept the MCE handler is not called automatically in |
| @@ -1264,8 +1264,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1264 | return 1; | 1264 | return 1; |
| 1265 | } | 1265 | } |
| 1266 | 1266 | ||
| 1267 | static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1267 | static int shutdown_interception(struct vcpu_svm *svm) |
| 1268 | { | 1268 | { |
| 1269 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
| 1270 | |||
| 1269 | /* | 1271 | /* |
| 1270 | * VMCB is undefined after a SHUTDOWN intercept | 1272 | * VMCB is undefined after a SHUTDOWN intercept |
| 1271 | * so reinitialize it. | 1273 | * so reinitialize it. |
| @@ -1277,7 +1279,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1277 | return 0; | 1279 | return 0; |
| 1278 | } | 1280 | } |
| 1279 | 1281 | ||
| 1280 | static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1282 | static int io_interception(struct vcpu_svm *svm) |
| 1281 | { | 1283 | { |
| 1282 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ | 1284 | u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ |
| 1283 | int size, in, string; | 1285 | int size, in, string; |
| @@ -1291,7 +1293,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1291 | 1293 | ||
| 1292 | if (string) { | 1294 | if (string) { |
| 1293 | if (emulate_instruction(&svm->vcpu, | 1295 | if (emulate_instruction(&svm->vcpu, |
| 1294 | kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) | 1296 | 0, 0, 0) == EMULATE_DO_MMIO) |
| 1295 | return 0; | 1297 | return 0; |
| 1296 | return 1; | 1298 | return 1; |
| 1297 | } | 1299 | } |
| @@ -1301,33 +1303,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1301 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; | 1303 | size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; |
| 1302 | 1304 | ||
| 1303 | skip_emulated_instruction(&svm->vcpu); | 1305 | skip_emulated_instruction(&svm->vcpu); |
| 1304 | return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); | 1306 | return kvm_emulate_pio(&svm->vcpu, in, size, port); |
| 1305 | } | 1307 | } |
| 1306 | 1308 | ||
| 1307 | static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1309 | static int nmi_interception(struct vcpu_svm *svm) |
| 1308 | { | 1310 | { |
| 1309 | return 1; | 1311 | return 1; |
| 1310 | } | 1312 | } |
| 1311 | 1313 | ||
| 1312 | static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1314 | static int intr_interception(struct vcpu_svm *svm) |
| 1313 | { | 1315 | { |
| 1314 | ++svm->vcpu.stat.irq_exits; | 1316 | ++svm->vcpu.stat.irq_exits; |
| 1315 | return 1; | 1317 | return 1; |
| 1316 | } | 1318 | } |
| 1317 | 1319 | ||
| 1318 | static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1320 | static int nop_on_interception(struct vcpu_svm *svm) |
| 1319 | { | 1321 | { |
| 1320 | return 1; | 1322 | return 1; |
| 1321 | } | 1323 | } |
| 1322 | 1324 | ||
| 1323 | static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1325 | static int halt_interception(struct vcpu_svm *svm) |
| 1324 | { | 1326 | { |
| 1325 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; | 1327 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; |
| 1326 | skip_emulated_instruction(&svm->vcpu); | 1328 | skip_emulated_instruction(&svm->vcpu); |
| 1327 | return kvm_emulate_halt(&svm->vcpu); | 1329 | return kvm_emulate_halt(&svm->vcpu); |
| 1328 | } | 1330 | } |
| 1329 | 1331 | ||
| 1330 | static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1332 | static int vmmcall_interception(struct vcpu_svm *svm) |
| 1331 | { | 1333 | { |
| 1332 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; | 1334 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; |
| 1333 | skip_emulated_instruction(&svm->vcpu); | 1335 | skip_emulated_instruction(&svm->vcpu); |
| @@ -1378,8 +1380,15 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) | |||
| 1378 | 1380 | ||
| 1379 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; | 1381 | svm->vmcb->control.exit_code = SVM_EXIT_INTR; |
| 1380 | 1382 | ||
| 1381 | if (nested_svm_exit_handled(svm)) { | 1383 | if (svm->nested.intercept & 1ULL) { |
| 1382 | nsvm_printk("VMexit -> INTR\n"); | 1384 | /* |
| 1385 | * The #vmexit can't be emulated here directly because this | ||
| 1386 | * code path runs with irqs and preemtion disabled. A | ||
| 1387 | * #vmexit emulation might sleep. Only signal request for | ||
| 1388 | * the #vmexit here. | ||
| 1389 | */ | ||
| 1390 | svm->nested.exit_required = true; | ||
| 1391 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); | ||
| 1383 | return 1; | 1392 | return 1; |
| 1384 | } | 1393 | } |
| 1385 | 1394 | ||
| @@ -1390,10 +1399,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) | |||
| 1390 | { | 1399 | { |
| 1391 | struct page *page; | 1400 | struct page *page; |
| 1392 | 1401 | ||
| 1393 | down_read(¤t->mm->mmap_sem); | ||
| 1394 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); | 1402 | page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); |
| 1395 | up_read(¤t->mm->mmap_sem); | ||
| 1396 | |||
| 1397 | if (is_error_page(page)) | 1403 | if (is_error_page(page)) |
| 1398 | goto error; | 1404 | goto error; |
| 1399 | 1405 | ||
| @@ -1532,14 +1538,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) | |||
| 1532 | } | 1538 | } |
| 1533 | default: { | 1539 | default: { |
| 1534 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); | 1540 | u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); |
| 1535 | nsvm_printk("exit code: 0x%x\n", exit_code); | ||
| 1536 | if (svm->nested.intercept & exit_bits) | 1541 | if (svm->nested.intercept & exit_bits) |
| 1537 | vmexit = NESTED_EXIT_DONE; | 1542 | vmexit = NESTED_EXIT_DONE; |
| 1538 | } | 1543 | } |
| 1539 | } | 1544 | } |
| 1540 | 1545 | ||
| 1541 | if (vmexit == NESTED_EXIT_DONE) { | 1546 | if (vmexit == NESTED_EXIT_DONE) { |
| 1542 | nsvm_printk("#VMEXIT reason=%04x\n", exit_code); | ||
| 1543 | nested_svm_vmexit(svm); | 1547 | nested_svm_vmexit(svm); |
| 1544 | } | 1548 | } |
| 1545 | 1549 | ||
| @@ -1584,6 +1588,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 1584 | struct vmcb *hsave = svm->nested.hsave; | 1588 | struct vmcb *hsave = svm->nested.hsave; |
| 1585 | struct vmcb *vmcb = svm->vmcb; | 1589 | struct vmcb *vmcb = svm->vmcb; |
| 1586 | 1590 | ||
| 1591 | trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, | ||
| 1592 | vmcb->control.exit_info_1, | ||
| 1593 | vmcb->control.exit_info_2, | ||
| 1594 | vmcb->control.exit_int_info, | ||
| 1595 | vmcb->control.exit_int_info_err); | ||
| 1596 | |||
| 1587 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); | 1597 | nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); |
| 1588 | if (!nested_vmcb) | 1598 | if (!nested_vmcb) |
| 1589 | return 1; | 1599 | return 1; |
| @@ -1617,6 +1627,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 1617 | nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; | 1627 | nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; |
| 1618 | nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; | 1628 | nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; |
| 1619 | nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; | 1629 | nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; |
| 1630 | |||
| 1631 | /* | ||
| 1632 | * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have | ||
| 1633 | * to make sure that we do not lose injected events. So check event_inj | ||
| 1634 | * here and copy it to exit_int_info if it is valid. | ||
| 1635 | * Exit_int_info and event_inj can't be both valid because the case | ||
| 1636 | * below only happens on a VMRUN instruction intercept which has | ||
| 1637 | * no valid exit_int_info set. | ||
| 1638 | */ | ||
| 1639 | if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { | ||
| 1640 | struct vmcb_control_area *nc = &nested_vmcb->control; | ||
| 1641 | |||
| 1642 | nc->exit_int_info = vmcb->control.event_inj; | ||
| 1643 | nc->exit_int_info_err = vmcb->control.event_inj_err; | ||
| 1644 | } | ||
| 1645 | |||
| 1620 | nested_vmcb->control.tlb_ctl = 0; | 1646 | nested_vmcb->control.tlb_ctl = 0; |
| 1621 | nested_vmcb->control.event_inj = 0; | 1647 | nested_vmcb->control.event_inj = 0; |
| 1622 | nested_vmcb->control.event_inj_err = 0; | 1648 | nested_vmcb->control.event_inj_err = 0; |
| @@ -1628,10 +1654,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) | |||
| 1628 | /* Restore the original control entries */ | 1654 | /* Restore the original control entries */ |
| 1629 | copy_vmcb_control_area(vmcb, hsave); | 1655 | copy_vmcb_control_area(vmcb, hsave); |
| 1630 | 1656 | ||
| 1631 | /* Kill any pending exceptions */ | ||
| 1632 | if (svm->vcpu.arch.exception.pending == true) | ||
| 1633 | nsvm_printk("WARNING: Pending Exception\n"); | ||
| 1634 | |||
| 1635 | kvm_clear_exception_queue(&svm->vcpu); | 1657 | kvm_clear_exception_queue(&svm->vcpu); |
| 1636 | kvm_clear_interrupt_queue(&svm->vcpu); | 1658 | kvm_clear_interrupt_queue(&svm->vcpu); |
| 1637 | 1659 | ||
| @@ -1702,6 +1724,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
| 1702 | /* nested_vmcb is our indicator if nested SVM is activated */ | 1724 | /* nested_vmcb is our indicator if nested SVM is activated */ |
| 1703 | svm->nested.vmcb = svm->vmcb->save.rax; | 1725 | svm->nested.vmcb = svm->vmcb->save.rax; |
| 1704 | 1726 | ||
| 1727 | trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, | ||
| 1728 | nested_vmcb->save.rip, | ||
| 1729 | nested_vmcb->control.int_ctl, | ||
| 1730 | nested_vmcb->control.event_inj, | ||
| 1731 | nested_vmcb->control.nested_ctl); | ||
| 1732 | |||
| 1705 | /* Clear internal status */ | 1733 | /* Clear internal status */ |
| 1706 | kvm_clear_exception_queue(&svm->vcpu); | 1734 | kvm_clear_exception_queue(&svm->vcpu); |
| 1707 | kvm_clear_interrupt_queue(&svm->vcpu); | 1735 | kvm_clear_interrupt_queue(&svm->vcpu); |
| @@ -1789,28 +1817,15 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) | |||
| 1789 | svm->nested.intercept = nested_vmcb->control.intercept; | 1817 | svm->nested.intercept = nested_vmcb->control.intercept; |
| 1790 | 1818 | ||
| 1791 | force_new_asid(&svm->vcpu); | 1819 | force_new_asid(&svm->vcpu); |
| 1792 | svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info; | ||
| 1793 | svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err; | ||
| 1794 | svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; | 1820 | svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; |
| 1795 | if (nested_vmcb->control.int_ctl & V_IRQ_MASK) { | ||
| 1796 | nsvm_printk("nSVM Injecting Interrupt: 0x%x\n", | ||
| 1797 | nested_vmcb->control.int_ctl); | ||
| 1798 | } | ||
| 1799 | if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) | 1821 | if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) |
| 1800 | svm->vcpu.arch.hflags |= HF_VINTR_MASK; | 1822 | svm->vcpu.arch.hflags |= HF_VINTR_MASK; |
| 1801 | else | 1823 | else |
| 1802 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; | 1824 | svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; |
| 1803 | 1825 | ||
| 1804 | nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n", | ||
| 1805 | nested_vmcb->control.exit_int_info, | ||
| 1806 | nested_vmcb->control.int_state); | ||
| 1807 | |||
| 1808 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; | 1826 | svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; |
| 1809 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; | 1827 | svm->vmcb->control.int_state = nested_vmcb->control.int_state; |
| 1810 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; | 1828 | svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; |
| 1811 | if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID) | ||
| 1812 | nsvm_printk("Injecting Event: 0x%x\n", | ||
| 1813 | nested_vmcb->control.event_inj); | ||
| 1814 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; | 1829 | svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; |
| 1815 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; | 1830 | svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; |
| 1816 | 1831 | ||
| @@ -1837,7 +1852,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) | |||
| 1837 | to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; | 1852 | to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; |
| 1838 | } | 1853 | } |
| 1839 | 1854 | ||
| 1840 | static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1855 | static int vmload_interception(struct vcpu_svm *svm) |
| 1841 | { | 1856 | { |
| 1842 | struct vmcb *nested_vmcb; | 1857 | struct vmcb *nested_vmcb; |
| 1843 | 1858 | ||
| @@ -1857,7 +1872,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1857 | return 1; | 1872 | return 1; |
| 1858 | } | 1873 | } |
| 1859 | 1874 | ||
| 1860 | static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1875 | static int vmsave_interception(struct vcpu_svm *svm) |
| 1861 | { | 1876 | { |
| 1862 | struct vmcb *nested_vmcb; | 1877 | struct vmcb *nested_vmcb; |
| 1863 | 1878 | ||
| @@ -1877,10 +1892,8 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1877 | return 1; | 1892 | return 1; |
| 1878 | } | 1893 | } |
| 1879 | 1894 | ||
| 1880 | static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1895 | static int vmrun_interception(struct vcpu_svm *svm) |
| 1881 | { | 1896 | { |
| 1882 | nsvm_printk("VMrun\n"); | ||
| 1883 | |||
| 1884 | if (nested_svm_check_permissions(svm)) | 1897 | if (nested_svm_check_permissions(svm)) |
| 1885 | return 1; | 1898 | return 1; |
| 1886 | 1899 | ||
| @@ -1907,7 +1920,7 @@ failed: | |||
| 1907 | return 1; | 1920 | return 1; |
| 1908 | } | 1921 | } |
| 1909 | 1922 | ||
| 1910 | static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1923 | static int stgi_interception(struct vcpu_svm *svm) |
| 1911 | { | 1924 | { |
| 1912 | if (nested_svm_check_permissions(svm)) | 1925 | if (nested_svm_check_permissions(svm)) |
| 1913 | return 1; | 1926 | return 1; |
| @@ -1920,7 +1933,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1920 | return 1; | 1933 | return 1; |
| 1921 | } | 1934 | } |
| 1922 | 1935 | ||
| 1923 | static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1936 | static int clgi_interception(struct vcpu_svm *svm) |
| 1924 | { | 1937 | { |
| 1925 | if (nested_svm_check_permissions(svm)) | 1938 | if (nested_svm_check_permissions(svm)) |
| 1926 | return 1; | 1939 | return 1; |
| @@ -1937,10 +1950,12 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1937 | return 1; | 1950 | return 1; |
| 1938 | } | 1951 | } |
| 1939 | 1952 | ||
| 1940 | static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1953 | static int invlpga_interception(struct vcpu_svm *svm) |
| 1941 | { | 1954 | { |
| 1942 | struct kvm_vcpu *vcpu = &svm->vcpu; | 1955 | struct kvm_vcpu *vcpu = &svm->vcpu; |
| 1943 | nsvm_printk("INVLPGA\n"); | 1956 | |
| 1957 | trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], | ||
| 1958 | vcpu->arch.regs[VCPU_REGS_RAX]); | ||
| 1944 | 1959 | ||
| 1945 | /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ | 1960 | /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ |
| 1946 | kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); | 1961 | kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); |
| @@ -1950,15 +1965,21 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1950 | return 1; | 1965 | return 1; |
| 1951 | } | 1966 | } |
| 1952 | 1967 | ||
| 1953 | static int invalid_op_interception(struct vcpu_svm *svm, | 1968 | static int skinit_interception(struct vcpu_svm *svm) |
| 1954 | struct kvm_run *kvm_run) | ||
| 1955 | { | 1969 | { |
| 1970 | trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]); | ||
| 1971 | |||
| 1956 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | 1972 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); |
| 1957 | return 1; | 1973 | return 1; |
| 1958 | } | 1974 | } |
| 1959 | 1975 | ||
| 1960 | static int task_switch_interception(struct vcpu_svm *svm, | 1976 | static int invalid_op_interception(struct vcpu_svm *svm) |
| 1961 | struct kvm_run *kvm_run) | 1977 | { |
| 1978 | kvm_queue_exception(&svm->vcpu, UD_VECTOR); | ||
| 1979 | return 1; | ||
| 1980 | } | ||
| 1981 | |||
| 1982 | static int task_switch_interception(struct vcpu_svm *svm) | ||
| 1962 | { | 1983 | { |
| 1963 | u16 tss_selector; | 1984 | u16 tss_selector; |
| 1964 | int reason; | 1985 | int reason; |
| @@ -2008,14 +2029,14 @@ static int task_switch_interception(struct vcpu_svm *svm, | |||
| 2008 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); | 2029 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); |
| 2009 | } | 2030 | } |
| 2010 | 2031 | ||
| 2011 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2032 | static int cpuid_interception(struct vcpu_svm *svm) |
| 2012 | { | 2033 | { |
| 2013 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; | 2034 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
| 2014 | kvm_emulate_cpuid(&svm->vcpu); | 2035 | kvm_emulate_cpuid(&svm->vcpu); |
| 2015 | return 1; | 2036 | return 1; |
| 2016 | } | 2037 | } |
| 2017 | 2038 | ||
| 2018 | static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2039 | static int iret_interception(struct vcpu_svm *svm) |
| 2019 | { | 2040 | { |
| 2020 | ++svm->vcpu.stat.nmi_window_exits; | 2041 | ++svm->vcpu.stat.nmi_window_exits; |
| 2021 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | 2042 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); |
| @@ -2023,26 +2044,27 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 2023 | return 1; | 2044 | return 1; |
| 2024 | } | 2045 | } |
| 2025 | 2046 | ||
| 2026 | static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2047 | static int invlpg_interception(struct vcpu_svm *svm) |
| 2027 | { | 2048 | { |
| 2028 | if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) | 2049 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) |
| 2029 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | 2050 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); |
| 2030 | return 1; | 2051 | return 1; |
| 2031 | } | 2052 | } |
| 2032 | 2053 | ||
| 2033 | static int emulate_on_interception(struct vcpu_svm *svm, | 2054 | static int emulate_on_interception(struct vcpu_svm *svm) |
| 2034 | struct kvm_run *kvm_run) | ||
| 2035 | { | 2055 | { |
| 2036 | if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE) | 2056 | if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE) |
| 2037 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); | 2057 | pr_unimpl(&svm->vcpu, "%s: failed\n", __func__); |
| 2038 | return 1; | 2058 | return 1; |
| 2039 | } | 2059 | } |
| 2040 | 2060 | ||
| 2041 | static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2061 | static int cr8_write_interception(struct vcpu_svm *svm) |
| 2042 | { | 2062 | { |
| 2063 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
| 2064 | |||
| 2043 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); | 2065 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); |
| 2044 | /* instruction emulation calls kvm_set_cr8() */ | 2066 | /* instruction emulation calls kvm_set_cr8() */ |
| 2045 | emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); | 2067 | emulate_instruction(&svm->vcpu, 0, 0, 0); |
| 2046 | if (irqchip_in_kernel(svm->vcpu.kvm)) { | 2068 | if (irqchip_in_kernel(svm->vcpu.kvm)) { |
| 2047 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | 2069 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; |
| 2048 | return 1; | 2070 | return 1; |
| @@ -2128,7 +2150,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) | |||
| 2128 | return 0; | 2150 | return 0; |
| 2129 | } | 2151 | } |
| 2130 | 2152 | ||
| 2131 | static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2153 | static int rdmsr_interception(struct vcpu_svm *svm) |
| 2132 | { | 2154 | { |
| 2133 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 2155 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
| 2134 | u64 data; | 2156 | u64 data; |
| @@ -2221,7 +2243,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
| 2221 | return 0; | 2243 | return 0; |
| 2222 | } | 2244 | } |
| 2223 | 2245 | ||
| 2224 | static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2246 | static int wrmsr_interception(struct vcpu_svm *svm) |
| 2225 | { | 2247 | { |
| 2226 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 2248 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
| 2227 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) | 2249 | u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) |
| @@ -2237,17 +2259,18 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 2237 | return 1; | 2259 | return 1; |
| 2238 | } | 2260 | } |
| 2239 | 2261 | ||
| 2240 | static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 2262 | static int msr_interception(struct vcpu_svm *svm) |
| 2241 | { | 2263 | { |
| 2242 | if (svm->vmcb->control.exit_info_1) | 2264 | if (svm->vmcb->control.exit_info_1) |
| 2243 | return wrmsr_interception(svm, kvm_run); | 2265 | return wrmsr_interception(svm); |
| 2244 | else | 2266 | else |
| 2245 | return rdmsr_interception(svm, kvm_run); | 2267 | return rdmsr_interception(svm); |
| 2246 | } | 2268 | } |
| 2247 | 2269 | ||
| 2248 | static int interrupt_window_interception(struct vcpu_svm *svm, | 2270 | static int interrupt_window_interception(struct vcpu_svm *svm) |
| 2249 | struct kvm_run *kvm_run) | ||
| 2250 | { | 2271 | { |
| 2272 | struct kvm_run *kvm_run = svm->vcpu.run; | ||
| 2273 | |||
| 2251 | svm_clear_vintr(svm); | 2274 | svm_clear_vintr(svm); |
| 2252 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | 2275 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; |
| 2253 | /* | 2276 | /* |
| @@ -2265,8 +2288,13 @@ static int interrupt_window_interception(struct vcpu_svm *svm, | |||
| 2265 | return 1; | 2288 | return 1; |
| 2266 | } | 2289 | } |
| 2267 | 2290 | ||
| 2268 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | 2291 | static int pause_interception(struct vcpu_svm *svm) |
| 2269 | struct kvm_run *kvm_run) = { | 2292 | { |
| 2293 | kvm_vcpu_on_spin(&(svm->vcpu)); | ||
| 2294 | return 1; | ||
| 2295 | } | ||
| 2296 | |||
| 2297 | static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { | ||
| 2270 | [SVM_EXIT_READ_CR0] = emulate_on_interception, | 2298 | [SVM_EXIT_READ_CR0] = emulate_on_interception, |
| 2271 | [SVM_EXIT_READ_CR3] = emulate_on_interception, | 2299 | [SVM_EXIT_READ_CR3] = emulate_on_interception, |
| 2272 | [SVM_EXIT_READ_CR4] = emulate_on_interception, | 2300 | [SVM_EXIT_READ_CR4] = emulate_on_interception, |
| @@ -2301,6 +2329,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
| 2301 | [SVM_EXIT_CPUID] = cpuid_interception, | 2329 | [SVM_EXIT_CPUID] = cpuid_interception, |
| 2302 | [SVM_EXIT_IRET] = iret_interception, | 2330 | [SVM_EXIT_IRET] = iret_interception, |
| 2303 | [SVM_EXIT_INVD] = emulate_on_interception, | 2331 | [SVM_EXIT_INVD] = emulate_on_interception, |
| 2332 | [SVM_EXIT_PAUSE] = pause_interception, | ||
| 2304 | [SVM_EXIT_HLT] = halt_interception, | 2333 | [SVM_EXIT_HLT] = halt_interception, |
| 2305 | [SVM_EXIT_INVLPG] = invlpg_interception, | 2334 | [SVM_EXIT_INVLPG] = invlpg_interception, |
| 2306 | [SVM_EXIT_INVLPGA] = invlpga_interception, | 2335 | [SVM_EXIT_INVLPGA] = invlpga_interception, |
| @@ -2314,26 +2343,36 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
| 2314 | [SVM_EXIT_VMSAVE] = vmsave_interception, | 2343 | [SVM_EXIT_VMSAVE] = vmsave_interception, |
| 2315 | [SVM_EXIT_STGI] = stgi_interception, | 2344 | [SVM_EXIT_STGI] = stgi_interception, |
| 2316 | [SVM_EXIT_CLGI] = clgi_interception, | 2345 | [SVM_EXIT_CLGI] = clgi_interception, |
| 2317 | [SVM_EXIT_SKINIT] = invalid_op_interception, | 2346 | [SVM_EXIT_SKINIT] = skinit_interception, |
| 2318 | [SVM_EXIT_WBINVD] = emulate_on_interception, | 2347 | [SVM_EXIT_WBINVD] = emulate_on_interception, |
| 2319 | [SVM_EXIT_MONITOR] = invalid_op_interception, | 2348 | [SVM_EXIT_MONITOR] = invalid_op_interception, |
| 2320 | [SVM_EXIT_MWAIT] = invalid_op_interception, | 2349 | [SVM_EXIT_MWAIT] = invalid_op_interception, |
| 2321 | [SVM_EXIT_NPF] = pf_interception, | 2350 | [SVM_EXIT_NPF] = pf_interception, |
| 2322 | }; | 2351 | }; |
| 2323 | 2352 | ||
| 2324 | static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 2353 | static int handle_exit(struct kvm_vcpu *vcpu) |
| 2325 | { | 2354 | { |
| 2326 | struct vcpu_svm *svm = to_svm(vcpu); | 2355 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2356 | struct kvm_run *kvm_run = vcpu->run; | ||
| 2327 | u32 exit_code = svm->vmcb->control.exit_code; | 2357 | u32 exit_code = svm->vmcb->control.exit_code; |
| 2328 | 2358 | ||
| 2329 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); | 2359 | trace_kvm_exit(exit_code, svm->vmcb->save.rip); |
| 2330 | 2360 | ||
| 2361 | if (unlikely(svm->nested.exit_required)) { | ||
| 2362 | nested_svm_vmexit(svm); | ||
| 2363 | svm->nested.exit_required = false; | ||
| 2364 | |||
| 2365 | return 1; | ||
| 2366 | } | ||
| 2367 | |||
| 2331 | if (is_nested(svm)) { | 2368 | if (is_nested(svm)) { |
| 2332 | int vmexit; | 2369 | int vmexit; |
| 2333 | 2370 | ||
| 2334 | nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n", | 2371 | trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, |
| 2335 | exit_code, svm->vmcb->control.exit_info_1, | 2372 | svm->vmcb->control.exit_info_1, |
| 2336 | svm->vmcb->control.exit_info_2, svm->vmcb->save.rip); | 2373 | svm->vmcb->control.exit_info_2, |
| 2374 | svm->vmcb->control.exit_int_info, | ||
| 2375 | svm->vmcb->control.exit_int_info_err); | ||
| 2337 | 2376 | ||
| 2338 | vmexit = nested_svm_exit_special(svm); | 2377 | vmexit = nested_svm_exit_special(svm); |
| 2339 | 2378 | ||
| @@ -2383,7 +2422,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 2383 | return 0; | 2422 | return 0; |
| 2384 | } | 2423 | } |
| 2385 | 2424 | ||
| 2386 | return svm_exit_handlers[exit_code](svm, kvm_run); | 2425 | return svm_exit_handlers[exit_code](svm); |
| 2387 | } | 2426 | } |
| 2388 | 2427 | ||
| 2389 | static void reload_tss(struct kvm_vcpu *vcpu) | 2428 | static void reload_tss(struct kvm_vcpu *vcpu) |
| @@ -2460,20 +2499,47 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu) | |||
| 2460 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | 2499 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); |
| 2461 | } | 2500 | } |
| 2462 | 2501 | ||
| 2502 | static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) | ||
| 2503 | { | ||
| 2504 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 2505 | |||
| 2506 | return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); | ||
| 2507 | } | ||
| 2508 | |||
| 2509 | static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | ||
| 2510 | { | ||
| 2511 | struct vcpu_svm *svm = to_svm(vcpu); | ||
| 2512 | |||
| 2513 | if (masked) { | ||
| 2514 | svm->vcpu.arch.hflags |= HF_NMI_MASK; | ||
| 2515 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | ||
| 2516 | } else { | ||
| 2517 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; | ||
| 2518 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | ||
| 2519 | } | ||
| 2520 | } | ||
| 2521 | |||
| 2463 | static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) | 2522 | static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) |
| 2464 | { | 2523 | { |
| 2465 | struct vcpu_svm *svm = to_svm(vcpu); | 2524 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2466 | struct vmcb *vmcb = svm->vmcb; | 2525 | struct vmcb *vmcb = svm->vmcb; |
| 2467 | return (vmcb->save.rflags & X86_EFLAGS_IF) && | 2526 | int ret; |
| 2468 | !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && | 2527 | |
| 2469 | gif_set(svm) && | 2528 | if (!gif_set(svm) || |
| 2470 | !(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK)); | 2529 | (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) |
| 2530 | return 0; | ||
| 2531 | |||
| 2532 | ret = !!(vmcb->save.rflags & X86_EFLAGS_IF); | ||
| 2533 | |||
| 2534 | if (is_nested(svm)) | ||
| 2535 | return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); | ||
| 2536 | |||
| 2537 | return ret; | ||
| 2471 | } | 2538 | } |
| 2472 | 2539 | ||
| 2473 | static void enable_irq_window(struct kvm_vcpu *vcpu) | 2540 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
| 2474 | { | 2541 | { |
| 2475 | struct vcpu_svm *svm = to_svm(vcpu); | 2542 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2476 | nsvm_printk("Trying to open IRQ window\n"); | ||
| 2477 | 2543 | ||
| 2478 | nested_svm_intr(svm); | 2544 | nested_svm_intr(svm); |
| 2479 | 2545 | ||
| @@ -2498,7 +2564,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
| 2498 | /* Something prevents NMI from been injected. Single step over | 2564 | /* Something prevents NMI from been injected. Single step over |
| 2499 | possible problem (IRET or exception injection or interrupt | 2565 | possible problem (IRET or exception injection or interrupt |
| 2500 | shadow) */ | 2566 | shadow) */ |
| 2501 | vcpu->arch.singlestep = true; | 2567 | svm->nmi_singlestep = true; |
| 2502 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 2568 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
| 2503 | update_db_intercept(vcpu); | 2569 | update_db_intercept(vcpu); |
| 2504 | } | 2570 | } |
| @@ -2588,13 +2654,20 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
| 2588 | #define R "e" | 2654 | #define R "e" |
| 2589 | #endif | 2655 | #endif |
| 2590 | 2656 | ||
| 2591 | static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2657 | static void svm_vcpu_run(struct kvm_vcpu *vcpu) |
| 2592 | { | 2658 | { |
| 2593 | struct vcpu_svm *svm = to_svm(vcpu); | 2659 | struct vcpu_svm *svm = to_svm(vcpu); |
| 2594 | u16 fs_selector; | 2660 | u16 fs_selector; |
| 2595 | u16 gs_selector; | 2661 | u16 gs_selector; |
| 2596 | u16 ldt_selector; | 2662 | u16 ldt_selector; |
| 2597 | 2663 | ||
| 2664 | /* | ||
| 2665 | * A vmexit emulation is required before the vcpu can be executed | ||
| 2666 | * again. | ||
| 2667 | */ | ||
| 2668 | if (unlikely(svm->nested.exit_required)) | ||
| 2669 | return; | ||
| 2670 | |||
| 2598 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; | 2671 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; |
| 2599 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; | 2672 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; |
| 2600 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; | 2673 | svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; |
| @@ -2893,6 +2966,8 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
| 2893 | .queue_exception = svm_queue_exception, | 2966 | .queue_exception = svm_queue_exception, |
| 2894 | .interrupt_allowed = svm_interrupt_allowed, | 2967 | .interrupt_allowed = svm_interrupt_allowed, |
| 2895 | .nmi_allowed = svm_nmi_allowed, | 2968 | .nmi_allowed = svm_nmi_allowed, |
| 2969 | .get_nmi_mask = svm_get_nmi_mask, | ||
| 2970 | .set_nmi_mask = svm_set_nmi_mask, | ||
| 2896 | .enable_nmi_window = enable_nmi_window, | 2971 | .enable_nmi_window = enable_nmi_window, |
| 2897 | .enable_irq_window = enable_irq_window, | 2972 | .enable_irq_window = enable_irq_window, |
| 2898 | .update_cr8_intercept = update_cr8_intercept, | 2973 | .update_cr8_intercept = update_cr8_intercept, |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 0d480e77eacf..816e0449db0b 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
| @@ -349,6 +349,171 @@ TRACE_EVENT(kvm_apic_accept_irq, | |||
| 349 | __entry->coalesced ? " (coalesced)" : "") | 349 | __entry->coalesced ? " (coalesced)" : "") |
| 350 | ); | 350 | ); |
| 351 | 351 | ||
| 352 | /* | ||
| 353 | * Tracepoint for nested VMRUN | ||
| 354 | */ | ||
| 355 | TRACE_EVENT(kvm_nested_vmrun, | ||
| 356 | TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl, | ||
| 357 | __u32 event_inj, bool npt), | ||
| 358 | TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt), | ||
| 359 | |||
| 360 | TP_STRUCT__entry( | ||
| 361 | __field( __u64, rip ) | ||
| 362 | __field( __u64, vmcb ) | ||
| 363 | __field( __u64, nested_rip ) | ||
| 364 | __field( __u32, int_ctl ) | ||
| 365 | __field( __u32, event_inj ) | ||
| 366 | __field( bool, npt ) | ||
| 367 | ), | ||
| 368 | |||
| 369 | TP_fast_assign( | ||
| 370 | __entry->rip = rip; | ||
| 371 | __entry->vmcb = vmcb; | ||
| 372 | __entry->nested_rip = nested_rip; | ||
| 373 | __entry->int_ctl = int_ctl; | ||
| 374 | __entry->event_inj = event_inj; | ||
| 375 | __entry->npt = npt; | ||
| 376 | ), | ||
| 377 | |||
| 378 | TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " | ||
| 379 | "event_inj: 0x%08x npt: %s\n", | ||
| 380 | __entry->rip, __entry->vmcb, __entry->nested_rip, | ||
| 381 | __entry->int_ctl, __entry->event_inj, | ||
| 382 | __entry->npt ? "on" : "off") | ||
| 383 | ); | ||
| 384 | |||
| 385 | /* | ||
| 386 | * Tracepoint for #VMEXIT while nested | ||
| 387 | */ | ||
| 388 | TRACE_EVENT(kvm_nested_vmexit, | ||
| 389 | TP_PROTO(__u64 rip, __u32 exit_code, | ||
| 390 | __u64 exit_info1, __u64 exit_info2, | ||
| 391 | __u32 exit_int_info, __u32 exit_int_info_err), | ||
| 392 | TP_ARGS(rip, exit_code, exit_info1, exit_info2, | ||
| 393 | exit_int_info, exit_int_info_err), | ||
| 394 | |||
| 395 | TP_STRUCT__entry( | ||
| 396 | __field( __u64, rip ) | ||
| 397 | __field( __u32, exit_code ) | ||
| 398 | __field( __u64, exit_info1 ) | ||
| 399 | __field( __u64, exit_info2 ) | ||
| 400 | __field( __u32, exit_int_info ) | ||
| 401 | __field( __u32, exit_int_info_err ) | ||
| 402 | ), | ||
| 403 | |||
| 404 | TP_fast_assign( | ||
| 405 | __entry->rip = rip; | ||
| 406 | __entry->exit_code = exit_code; | ||
| 407 | __entry->exit_info1 = exit_info1; | ||
| 408 | __entry->exit_info2 = exit_info2; | ||
| 409 | __entry->exit_int_info = exit_int_info; | ||
| 410 | __entry->exit_int_info_err = exit_int_info_err; | ||
| 411 | ), | ||
| 412 | TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " | ||
| 413 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | ||
| 414 | __entry->rip, | ||
| 415 | ftrace_print_symbols_seq(p, __entry->exit_code, | ||
| 416 | kvm_x86_ops->exit_reasons_str), | ||
| 417 | __entry->exit_info1, __entry->exit_info2, | ||
| 418 | __entry->exit_int_info, __entry->exit_int_info_err) | ||
| 419 | ); | ||
| 420 | |||
| 421 | /* | ||
| 422 | * Tracepoint for #VMEXIT reinjected to the guest | ||
| 423 | */ | ||
| 424 | TRACE_EVENT(kvm_nested_vmexit_inject, | ||
| 425 | TP_PROTO(__u32 exit_code, | ||
| 426 | __u64 exit_info1, __u64 exit_info2, | ||
| 427 | __u32 exit_int_info, __u32 exit_int_info_err), | ||
| 428 | TP_ARGS(exit_code, exit_info1, exit_info2, | ||
| 429 | exit_int_info, exit_int_info_err), | ||
| 430 | |||
| 431 | TP_STRUCT__entry( | ||
| 432 | __field( __u32, exit_code ) | ||
| 433 | __field( __u64, exit_info1 ) | ||
| 434 | __field( __u64, exit_info2 ) | ||
| 435 | __field( __u32, exit_int_info ) | ||
| 436 | __field( __u32, exit_int_info_err ) | ||
| 437 | ), | ||
| 438 | |||
| 439 | TP_fast_assign( | ||
| 440 | __entry->exit_code = exit_code; | ||
| 441 | __entry->exit_info1 = exit_info1; | ||
| 442 | __entry->exit_info2 = exit_info2; | ||
| 443 | __entry->exit_int_info = exit_int_info; | ||
| 444 | __entry->exit_int_info_err = exit_int_info_err; | ||
| 445 | ), | ||
| 446 | |||
| 447 | TP_printk("reason: %s ext_inf1: 0x%016llx " | ||
| 448 | "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", | ||
| 449 | ftrace_print_symbols_seq(p, __entry->exit_code, | ||
| 450 | kvm_x86_ops->exit_reasons_str), | ||
| 451 | __entry->exit_info1, __entry->exit_info2, | ||
| 452 | __entry->exit_int_info, __entry->exit_int_info_err) | ||
| 453 | ); | ||
| 454 | |||
| 455 | /* | ||
| 456 | * Tracepoint for nested #vmexit because of interrupt pending | ||
| 457 | */ | ||
| 458 | TRACE_EVENT(kvm_nested_intr_vmexit, | ||
| 459 | TP_PROTO(__u64 rip), | ||
| 460 | TP_ARGS(rip), | ||
| 461 | |||
| 462 | TP_STRUCT__entry( | ||
| 463 | __field( __u64, rip ) | ||
| 464 | ), | ||
| 465 | |||
| 466 | TP_fast_assign( | ||
| 467 | __entry->rip = rip | ||
| 468 | ), | ||
| 469 | |||
| 470 | TP_printk("rip: 0x%016llx\n", __entry->rip) | ||
| 471 | ); | ||
| 472 | |||
| 473 | /* | ||
| 474 | * Tracepoint for nested #vmexit because of interrupt pending | ||
| 475 | */ | ||
| 476 | TRACE_EVENT(kvm_invlpga, | ||
| 477 | TP_PROTO(__u64 rip, int asid, u64 address), | ||
| 478 | TP_ARGS(rip, asid, address), | ||
| 479 | |||
| 480 | TP_STRUCT__entry( | ||
| 481 | __field( __u64, rip ) | ||
| 482 | __field( int, asid ) | ||
| 483 | __field( __u64, address ) | ||
| 484 | ), | ||
| 485 | |||
| 486 | TP_fast_assign( | ||
| 487 | __entry->rip = rip; | ||
| 488 | __entry->asid = asid; | ||
| 489 | __entry->address = address; | ||
| 490 | ), | ||
| 491 | |||
| 492 | TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", | ||
| 493 | __entry->rip, __entry->asid, __entry->address) | ||
| 494 | ); | ||
| 495 | |||
| 496 | /* | ||
| 497 | * Tracepoint for nested #vmexit because of interrupt pending | ||
| 498 | */ | ||
| 499 | TRACE_EVENT(kvm_skinit, | ||
| 500 | TP_PROTO(__u64 rip, __u32 slb), | ||
| 501 | TP_ARGS(rip, slb), | ||
| 502 | |||
| 503 | TP_STRUCT__entry( | ||
| 504 | __field( __u64, rip ) | ||
| 505 | __field( __u32, slb ) | ||
| 506 | ), | ||
| 507 | |||
| 508 | TP_fast_assign( | ||
| 509 | __entry->rip = rip; | ||
| 510 | __entry->slb = slb; | ||
| 511 | ), | ||
| 512 | |||
| 513 | TP_printk("rip: 0x%016llx slb: 0x%08x\n", | ||
| 514 | __entry->rip, __entry->slb) | ||
| 515 | ); | ||
| 516 | |||
| 352 | #endif /* _TRACE_KVM_H */ | 517 | #endif /* _TRACE_KVM_H */ |
| 353 | 518 | ||
| 354 | /* This part must be outside protection */ | 519 | /* This part must be outside protection */ |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ed53b42caba1..d4918d6fc924 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -61,12 +61,37 @@ module_param_named(unrestricted_guest, | |||
| 61 | static int __read_mostly emulate_invalid_guest_state = 0; | 61 | static int __read_mostly emulate_invalid_guest_state = 0; |
| 62 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); | 62 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
| 63 | 63 | ||
| 64 | /* | ||
| 65 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | ||
| 66 | * ple_gap: upper bound on the amount of time between two successive | ||
| 67 | * executions of PAUSE in a loop. Also indicate if ple enabled. | ||
| 68 | * According to test, this time is usually small than 41 cycles. | ||
| 69 | * ple_window: upper bound on the amount of time a guest is allowed to execute | ||
| 70 | * in a PAUSE loop. Tests indicate that most spinlocks are held for | ||
| 71 | * less than 2^12 cycles | ||
| 72 | * Time is measured based on a counter that runs at the same rate as the TSC, | ||
| 73 | * refer SDM volume 3b section 21.6.13 & 22.1.3. | ||
| 74 | */ | ||
| 75 | #define KVM_VMX_DEFAULT_PLE_GAP 41 | ||
| 76 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 | ||
| 77 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; | ||
| 78 | module_param(ple_gap, int, S_IRUGO); | ||
| 79 | |||
| 80 | static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; | ||
| 81 | module_param(ple_window, int, S_IRUGO); | ||
| 82 | |||
| 64 | struct vmcs { | 83 | struct vmcs { |
| 65 | u32 revision_id; | 84 | u32 revision_id; |
| 66 | u32 abort; | 85 | u32 abort; |
| 67 | char data[0]; | 86 | char data[0]; |
| 68 | }; | 87 | }; |
| 69 | 88 | ||
| 89 | struct shared_msr_entry { | ||
| 90 | unsigned index; | ||
| 91 | u64 data; | ||
| 92 | u64 mask; | ||
| 93 | }; | ||
| 94 | |||
| 70 | struct vcpu_vmx { | 95 | struct vcpu_vmx { |
| 71 | struct kvm_vcpu vcpu; | 96 | struct kvm_vcpu vcpu; |
| 72 | struct list_head local_vcpus_link; | 97 | struct list_head local_vcpus_link; |
| @@ -74,13 +99,12 @@ struct vcpu_vmx { | |||
| 74 | int launched; | 99 | int launched; |
| 75 | u8 fail; | 100 | u8 fail; |
| 76 | u32 idt_vectoring_info; | 101 | u32 idt_vectoring_info; |
| 77 | struct kvm_msr_entry *guest_msrs; | 102 | struct shared_msr_entry *guest_msrs; |
| 78 | struct kvm_msr_entry *host_msrs; | ||
| 79 | int nmsrs; | 103 | int nmsrs; |
| 80 | int save_nmsrs; | 104 | int save_nmsrs; |
| 81 | int msr_offset_efer; | ||
| 82 | #ifdef CONFIG_X86_64 | 105 | #ifdef CONFIG_X86_64 |
| 83 | int msr_offset_kernel_gs_base; | 106 | u64 msr_host_kernel_gs_base; |
| 107 | u64 msr_guest_kernel_gs_base; | ||
| 84 | #endif | 108 | #endif |
| 85 | struct vmcs *vmcs; | 109 | struct vmcs *vmcs; |
| 86 | struct { | 110 | struct { |
| @@ -88,7 +112,6 @@ struct vcpu_vmx { | |||
| 88 | u16 fs_sel, gs_sel, ldt_sel; | 112 | u16 fs_sel, gs_sel, ldt_sel; |
| 89 | int gs_ldt_reload_needed; | 113 | int gs_ldt_reload_needed; |
| 90 | int fs_reload_needed; | 114 | int fs_reload_needed; |
| 91 | int guest_efer_loaded; | ||
| 92 | } host_state; | 115 | } host_state; |
| 93 | struct { | 116 | struct { |
| 94 | int vm86_active; | 117 | int vm86_active; |
| @@ -107,7 +130,6 @@ struct vcpu_vmx { | |||
| 107 | } rmode; | 130 | } rmode; |
| 108 | int vpid; | 131 | int vpid; |
| 109 | bool emulation_required; | 132 | bool emulation_required; |
| 110 | enum emulation_result invalid_state_emulation_result; | ||
| 111 | 133 | ||
| 112 | /* Support for vnmi-less CPUs */ | 134 | /* Support for vnmi-less CPUs */ |
| 113 | int soft_vnmi_blocked; | 135 | int soft_vnmi_blocked; |
| @@ -176,6 +198,8 @@ static struct kvm_vmx_segment_field { | |||
| 176 | VMX_SEGMENT_FIELD(LDTR), | 198 | VMX_SEGMENT_FIELD(LDTR), |
| 177 | }; | 199 | }; |
| 178 | 200 | ||
| 201 | static u64 host_efer; | ||
| 202 | |||
| 179 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu); | 203 | static void ept_save_pdptrs(struct kvm_vcpu *vcpu); |
| 180 | 204 | ||
| 181 | /* | 205 | /* |
| @@ -184,28 +208,12 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu); | |||
| 184 | */ | 208 | */ |
| 185 | static const u32 vmx_msr_index[] = { | 209 | static const u32 vmx_msr_index[] = { |
| 186 | #ifdef CONFIG_X86_64 | 210 | #ifdef CONFIG_X86_64 |
| 187 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE, | 211 | MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, |
| 188 | #endif | 212 | #endif |
| 189 | MSR_EFER, MSR_K6_STAR, | 213 | MSR_EFER, MSR_K6_STAR, |
| 190 | }; | 214 | }; |
| 191 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | 215 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) |
| 192 | 216 | ||
| 193 | static void load_msrs(struct kvm_msr_entry *e, int n) | ||
| 194 | { | ||
| 195 | int i; | ||
| 196 | |||
| 197 | for (i = 0; i < n; ++i) | ||
| 198 | wrmsrl(e[i].index, e[i].data); | ||
| 199 | } | ||
| 200 | |||
| 201 | static void save_msrs(struct kvm_msr_entry *e, int n) | ||
| 202 | { | ||
| 203 | int i; | ||
| 204 | |||
| 205 | for (i = 0; i < n; ++i) | ||
| 206 | rdmsrl(e[i].index, e[i].data); | ||
| 207 | } | ||
| 208 | |||
| 209 | static inline int is_page_fault(u32 intr_info) | 217 | static inline int is_page_fault(u32 intr_info) |
| 210 | { | 218 | { |
| 211 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | 219 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | |
| @@ -320,6 +328,12 @@ static inline int cpu_has_vmx_unrestricted_guest(void) | |||
| 320 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 328 | SECONDARY_EXEC_UNRESTRICTED_GUEST; |
| 321 | } | 329 | } |
| 322 | 330 | ||
| 331 | static inline int cpu_has_vmx_ple(void) | ||
| 332 | { | ||
| 333 | return vmcs_config.cpu_based_2nd_exec_ctrl & | ||
| 334 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
| 335 | } | ||
| 336 | |||
| 323 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 337 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
| 324 | { | 338 | { |
| 325 | return flexpriority_enabled && | 339 | return flexpriority_enabled && |
| @@ -348,7 +362,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) | |||
| 348 | int i; | 362 | int i; |
| 349 | 363 | ||
| 350 | for (i = 0; i < vmx->nmsrs; ++i) | 364 | for (i = 0; i < vmx->nmsrs; ++i) |
| 351 | if (vmx->guest_msrs[i].index == msr) | 365 | if (vmx_msr_index[vmx->guest_msrs[i].index] == msr) |
| 352 | return i; | 366 | return i; |
| 353 | return -1; | 367 | return -1; |
| 354 | } | 368 | } |
| @@ -379,7 +393,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa) | |||
| 379 | : : "a" (&operand), "c" (ext) : "cc", "memory"); | 393 | : : "a" (&operand), "c" (ext) : "cc", "memory"); |
| 380 | } | 394 | } |
| 381 | 395 | ||
| 382 | static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) | 396 | static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) |
| 383 | { | 397 | { |
| 384 | int i; | 398 | int i; |
| 385 | 399 | ||
| @@ -570,17 +584,12 @@ static void reload_tss(void) | |||
| 570 | load_TR_desc(); | 584 | load_TR_desc(); |
| 571 | } | 585 | } |
| 572 | 586 | ||
| 573 | static void load_transition_efer(struct vcpu_vmx *vmx) | 587 | static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) |
| 574 | { | 588 | { |
| 575 | int efer_offset = vmx->msr_offset_efer; | ||
| 576 | u64 host_efer; | ||
| 577 | u64 guest_efer; | 589 | u64 guest_efer; |
| 578 | u64 ignore_bits; | 590 | u64 ignore_bits; |
| 579 | 591 | ||
| 580 | if (efer_offset < 0) | 592 | guest_efer = vmx->vcpu.arch.shadow_efer; |
| 581 | return; | ||
| 582 | host_efer = vmx->host_msrs[efer_offset].data; | ||
| 583 | guest_efer = vmx->guest_msrs[efer_offset].data; | ||
| 584 | 593 | ||
| 585 | /* | 594 | /* |
| 586 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless | 595 | * NX is emulated; LMA and LME handled by hardware; SCE meaninless |
| @@ -593,27 +602,17 @@ static void load_transition_efer(struct vcpu_vmx *vmx) | |||
| 593 | if (guest_efer & EFER_LMA) | 602 | if (guest_efer & EFER_LMA) |
| 594 | ignore_bits &= ~(u64)EFER_SCE; | 603 | ignore_bits &= ~(u64)EFER_SCE; |
| 595 | #endif | 604 | #endif |
| 596 | if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits)) | ||
| 597 | return; | ||
| 598 | |||
| 599 | vmx->host_state.guest_efer_loaded = 1; | ||
| 600 | guest_efer &= ~ignore_bits; | 605 | guest_efer &= ~ignore_bits; |
| 601 | guest_efer |= host_efer & ignore_bits; | 606 | guest_efer |= host_efer & ignore_bits; |
| 602 | wrmsrl(MSR_EFER, guest_efer); | 607 | vmx->guest_msrs[efer_offset].data = guest_efer; |
| 603 | vmx->vcpu.stat.efer_reload++; | 608 | vmx->guest_msrs[efer_offset].mask = ~ignore_bits; |
| 604 | } | 609 | return true; |
| 605 | |||
| 606 | static void reload_host_efer(struct vcpu_vmx *vmx) | ||
| 607 | { | ||
| 608 | if (vmx->host_state.guest_efer_loaded) { | ||
| 609 | vmx->host_state.guest_efer_loaded = 0; | ||
| 610 | load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1); | ||
| 611 | } | ||
| 612 | } | 610 | } |
| 613 | 611 | ||
| 614 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) | 612 | static void vmx_save_host_state(struct kvm_vcpu *vcpu) |
| 615 | { | 613 | { |
| 616 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 614 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 615 | int i; | ||
| 617 | 616 | ||
| 618 | if (vmx->host_state.loaded) | 617 | if (vmx->host_state.loaded) |
| 619 | return; | 618 | return; |
| @@ -650,13 +649,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
| 650 | #endif | 649 | #endif |
| 651 | 650 | ||
| 652 | #ifdef CONFIG_X86_64 | 651 | #ifdef CONFIG_X86_64 |
| 653 | if (is_long_mode(&vmx->vcpu)) | 652 | if (is_long_mode(&vmx->vcpu)) { |
| 654 | save_msrs(vmx->host_msrs + | 653 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
| 655 | vmx->msr_offset_kernel_gs_base, 1); | 654 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
| 656 | 655 | } | |
| 657 | #endif | 656 | #endif |
| 658 | load_msrs(vmx->guest_msrs, vmx->save_nmsrs); | 657 | for (i = 0; i < vmx->save_nmsrs; ++i) |
| 659 | load_transition_efer(vmx); | 658 | kvm_set_shared_msr(vmx->guest_msrs[i].index, |
| 659 | vmx->guest_msrs[i].data, | ||
| 660 | vmx->guest_msrs[i].mask); | ||
| 660 | } | 661 | } |
| 661 | 662 | ||
| 662 | static void __vmx_load_host_state(struct vcpu_vmx *vmx) | 663 | static void __vmx_load_host_state(struct vcpu_vmx *vmx) |
| @@ -684,9 +685,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
| 684 | local_irq_restore(flags); | 685 | local_irq_restore(flags); |
| 685 | } | 686 | } |
| 686 | reload_tss(); | 687 | reload_tss(); |
| 687 | save_msrs(vmx->guest_msrs, vmx->save_nmsrs); | 688 | #ifdef CONFIG_X86_64 |
| 688 | load_msrs(vmx->host_msrs, vmx->save_nmsrs); | 689 | if (is_long_mode(&vmx->vcpu)) { |
| 689 | reload_host_efer(vmx); | 690 | rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
| 691 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | ||
| 692 | } | ||
| 693 | #endif | ||
| 690 | } | 694 | } |
| 691 | 695 | ||
| 692 | static void vmx_load_host_state(struct vcpu_vmx *vmx) | 696 | static void vmx_load_host_state(struct vcpu_vmx *vmx) |
| @@ -877,19 +881,14 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
| 877 | /* | 881 | /* |
| 878 | * Swap MSR entry in host/guest MSR entry array. | 882 | * Swap MSR entry in host/guest MSR entry array. |
| 879 | */ | 883 | */ |
| 880 | #ifdef CONFIG_X86_64 | ||
| 881 | static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | 884 | static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) |
| 882 | { | 885 | { |
| 883 | struct kvm_msr_entry tmp; | 886 | struct shared_msr_entry tmp; |
| 884 | 887 | ||
| 885 | tmp = vmx->guest_msrs[to]; | 888 | tmp = vmx->guest_msrs[to]; |
| 886 | vmx->guest_msrs[to] = vmx->guest_msrs[from]; | 889 | vmx->guest_msrs[to] = vmx->guest_msrs[from]; |
| 887 | vmx->guest_msrs[from] = tmp; | 890 | vmx->guest_msrs[from] = tmp; |
| 888 | tmp = vmx->host_msrs[to]; | ||
| 889 | vmx->host_msrs[to] = vmx->host_msrs[from]; | ||
| 890 | vmx->host_msrs[from] = tmp; | ||
| 891 | } | 891 | } |
| 892 | #endif | ||
| 893 | 892 | ||
| 894 | /* | 893 | /* |
| 895 | * Set up the vmcs to automatically save and restore system | 894 | * Set up the vmcs to automatically save and restore system |
| @@ -898,15 +897,13 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
| 898 | */ | 897 | */ |
| 899 | static void setup_msrs(struct vcpu_vmx *vmx) | 898 | static void setup_msrs(struct vcpu_vmx *vmx) |
| 900 | { | 899 | { |
| 901 | int save_nmsrs; | 900 | int save_nmsrs, index; |
| 902 | unsigned long *msr_bitmap; | 901 | unsigned long *msr_bitmap; |
| 903 | 902 | ||
| 904 | vmx_load_host_state(vmx); | 903 | vmx_load_host_state(vmx); |
| 905 | save_nmsrs = 0; | 904 | save_nmsrs = 0; |
| 906 | #ifdef CONFIG_X86_64 | 905 | #ifdef CONFIG_X86_64 |
| 907 | if (is_long_mode(&vmx->vcpu)) { | 906 | if (is_long_mode(&vmx->vcpu)) { |
| 908 | int index; | ||
| 909 | |||
| 910 | index = __find_msr_index(vmx, MSR_SYSCALL_MASK); | 907 | index = __find_msr_index(vmx, MSR_SYSCALL_MASK); |
| 911 | if (index >= 0) | 908 | if (index >= 0) |
| 912 | move_msr_up(vmx, index, save_nmsrs++); | 909 | move_msr_up(vmx, index, save_nmsrs++); |
| @@ -916,9 +913,6 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
| 916 | index = __find_msr_index(vmx, MSR_CSTAR); | 913 | index = __find_msr_index(vmx, MSR_CSTAR); |
| 917 | if (index >= 0) | 914 | if (index >= 0) |
| 918 | move_msr_up(vmx, index, save_nmsrs++); | 915 | move_msr_up(vmx, index, save_nmsrs++); |
| 919 | index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE); | ||
| 920 | if (index >= 0) | ||
| 921 | move_msr_up(vmx, index, save_nmsrs++); | ||
| 922 | /* | 916 | /* |
| 923 | * MSR_K6_STAR is only needed on long mode guests, and only | 917 | * MSR_K6_STAR is only needed on long mode guests, and only |
| 924 | * if efer.sce is enabled. | 918 | * if efer.sce is enabled. |
| @@ -928,13 +922,11 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
| 928 | move_msr_up(vmx, index, save_nmsrs++); | 922 | move_msr_up(vmx, index, save_nmsrs++); |
| 929 | } | 923 | } |
| 930 | #endif | 924 | #endif |
| 931 | vmx->save_nmsrs = save_nmsrs; | 925 | index = __find_msr_index(vmx, MSR_EFER); |
| 926 | if (index >= 0 && update_transition_efer(vmx, index)) | ||
| 927 | move_msr_up(vmx, index, save_nmsrs++); | ||
| 932 | 928 | ||
| 933 | #ifdef CONFIG_X86_64 | 929 | vmx->save_nmsrs = save_nmsrs; |
| 934 | vmx->msr_offset_kernel_gs_base = | ||
| 935 | __find_msr_index(vmx, MSR_KERNEL_GS_BASE); | ||
| 936 | #endif | ||
| 937 | vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); | ||
| 938 | 930 | ||
| 939 | if (cpu_has_vmx_msr_bitmap()) { | 931 | if (cpu_has_vmx_msr_bitmap()) { |
| 940 | if (is_long_mode(&vmx->vcpu)) | 932 | if (is_long_mode(&vmx->vcpu)) |
| @@ -976,7 +968,7 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) | |||
| 976 | static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 968 | static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) |
| 977 | { | 969 | { |
| 978 | u64 data; | 970 | u64 data; |
| 979 | struct kvm_msr_entry *msr; | 971 | struct shared_msr_entry *msr; |
| 980 | 972 | ||
| 981 | if (!pdata) { | 973 | if (!pdata) { |
| 982 | printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); | 974 | printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); |
| @@ -991,9 +983,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
| 991 | case MSR_GS_BASE: | 983 | case MSR_GS_BASE: |
| 992 | data = vmcs_readl(GUEST_GS_BASE); | 984 | data = vmcs_readl(GUEST_GS_BASE); |
| 993 | break; | 985 | break; |
| 986 | case MSR_KERNEL_GS_BASE: | ||
| 987 | vmx_load_host_state(to_vmx(vcpu)); | ||
| 988 | data = to_vmx(vcpu)->msr_guest_kernel_gs_base; | ||
| 989 | break; | ||
| 990 | #endif | ||
| 994 | case MSR_EFER: | 991 | case MSR_EFER: |
| 995 | return kvm_get_msr_common(vcpu, msr_index, pdata); | 992 | return kvm_get_msr_common(vcpu, msr_index, pdata); |
| 996 | #endif | ||
| 997 | case MSR_IA32_TSC: | 993 | case MSR_IA32_TSC: |
| 998 | data = guest_read_tsc(); | 994 | data = guest_read_tsc(); |
| 999 | break; | 995 | break; |
| @@ -1007,6 +1003,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
| 1007 | data = vmcs_readl(GUEST_SYSENTER_ESP); | 1003 | data = vmcs_readl(GUEST_SYSENTER_ESP); |
| 1008 | break; | 1004 | break; |
| 1009 | default: | 1005 | default: |
| 1006 | vmx_load_host_state(to_vmx(vcpu)); | ||
| 1010 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 1007 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
| 1011 | if (msr) { | 1008 | if (msr) { |
| 1012 | vmx_load_host_state(to_vmx(vcpu)); | 1009 | vmx_load_host_state(to_vmx(vcpu)); |
| @@ -1028,7 +1025,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
| 1028 | static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | 1025 | static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) |
| 1029 | { | 1026 | { |
| 1030 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1027 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 1031 | struct kvm_msr_entry *msr; | 1028 | struct shared_msr_entry *msr; |
| 1032 | u64 host_tsc; | 1029 | u64 host_tsc; |
| 1033 | int ret = 0; | 1030 | int ret = 0; |
| 1034 | 1031 | ||
| @@ -1044,6 +1041,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
| 1044 | case MSR_GS_BASE: | 1041 | case MSR_GS_BASE: |
| 1045 | vmcs_writel(GUEST_GS_BASE, data); | 1042 | vmcs_writel(GUEST_GS_BASE, data); |
| 1046 | break; | 1043 | break; |
| 1044 | case MSR_KERNEL_GS_BASE: | ||
| 1045 | vmx_load_host_state(vmx); | ||
| 1046 | vmx->msr_guest_kernel_gs_base = data; | ||
| 1047 | break; | ||
| 1047 | #endif | 1048 | #endif |
| 1048 | case MSR_IA32_SYSENTER_CS: | 1049 | case MSR_IA32_SYSENTER_CS: |
| 1049 | vmcs_write32(GUEST_SYSENTER_CS, data); | 1050 | vmcs_write32(GUEST_SYSENTER_CS, data); |
| @@ -1097,30 +1098,14 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) | |||
| 1097 | } | 1098 | } |
| 1098 | } | 1099 | } |
| 1099 | 1100 | ||
| 1100 | static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | 1101 | static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) |
| 1101 | { | 1102 | { |
| 1102 | int old_debug = vcpu->guest_debug; | ||
| 1103 | unsigned long flags; | ||
| 1104 | |||
| 1105 | vcpu->guest_debug = dbg->control; | ||
| 1106 | if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) | ||
| 1107 | vcpu->guest_debug = 0; | ||
| 1108 | |||
| 1109 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 1103 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
| 1110 | vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); | 1104 | vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); |
| 1111 | else | 1105 | else |
| 1112 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); | 1106 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); |
| 1113 | 1107 | ||
| 1114 | flags = vmcs_readl(GUEST_RFLAGS); | ||
| 1115 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
| 1116 | flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
| 1117 | else if (old_debug & KVM_GUESTDBG_SINGLESTEP) | ||
| 1118 | flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
| 1119 | vmcs_writel(GUEST_RFLAGS, flags); | ||
| 1120 | |||
| 1121 | update_exception_bitmap(vcpu); | 1108 | update_exception_bitmap(vcpu); |
| 1122 | |||
| 1123 | return 0; | ||
| 1124 | } | 1109 | } |
| 1125 | 1110 | ||
| 1126 | static __init int cpu_has_kvm_support(void) | 1111 | static __init int cpu_has_kvm_support(void) |
| @@ -1139,12 +1124,15 @@ static __init int vmx_disabled_by_bios(void) | |||
| 1139 | /* locked but not enabled */ | 1124 | /* locked but not enabled */ |
| 1140 | } | 1125 | } |
| 1141 | 1126 | ||
| 1142 | static void hardware_enable(void *garbage) | 1127 | static int hardware_enable(void *garbage) |
| 1143 | { | 1128 | { |
| 1144 | int cpu = raw_smp_processor_id(); | 1129 | int cpu = raw_smp_processor_id(); |
| 1145 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | 1130 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); |
| 1146 | u64 old; | 1131 | u64 old; |
| 1147 | 1132 | ||
| 1133 | if (read_cr4() & X86_CR4_VMXE) | ||
| 1134 | return -EBUSY; | ||
| 1135 | |||
| 1148 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); | 1136 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); |
| 1149 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 1137 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
| 1150 | if ((old & (FEATURE_CONTROL_LOCKED | | 1138 | if ((old & (FEATURE_CONTROL_LOCKED | |
| @@ -1159,6 +1147,10 @@ static void hardware_enable(void *garbage) | |||
| 1159 | asm volatile (ASM_VMX_VMXON_RAX | 1147 | asm volatile (ASM_VMX_VMXON_RAX |
| 1160 | : : "a"(&phys_addr), "m"(phys_addr) | 1148 | : : "a"(&phys_addr), "m"(phys_addr) |
| 1161 | : "memory", "cc"); | 1149 | : "memory", "cc"); |
| 1150 | |||
| 1151 | ept_sync_global(); | ||
| 1152 | |||
| 1153 | return 0; | ||
| 1162 | } | 1154 | } |
| 1163 | 1155 | ||
| 1164 | static void vmclear_local_vcpus(void) | 1156 | static void vmclear_local_vcpus(void) |
| @@ -1250,7 +1242,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
| 1250 | SECONDARY_EXEC_WBINVD_EXITING | | 1242 | SECONDARY_EXEC_WBINVD_EXITING | |
| 1251 | SECONDARY_EXEC_ENABLE_VPID | | 1243 | SECONDARY_EXEC_ENABLE_VPID | |
| 1252 | SECONDARY_EXEC_ENABLE_EPT | | 1244 | SECONDARY_EXEC_ENABLE_EPT | |
| 1253 | SECONDARY_EXEC_UNRESTRICTED_GUEST; | 1245 | SECONDARY_EXEC_UNRESTRICTED_GUEST | |
| 1246 | SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
| 1254 | if (adjust_vmx_controls(min2, opt2, | 1247 | if (adjust_vmx_controls(min2, opt2, |
| 1255 | MSR_IA32_VMX_PROCBASED_CTLS2, | 1248 | MSR_IA32_VMX_PROCBASED_CTLS2, |
| 1256 | &_cpu_based_2nd_exec_control) < 0) | 1249 | &_cpu_based_2nd_exec_control) < 0) |
| @@ -1344,15 +1337,17 @@ static void free_kvm_area(void) | |||
| 1344 | { | 1337 | { |
| 1345 | int cpu; | 1338 | int cpu; |
| 1346 | 1339 | ||
| 1347 | for_each_online_cpu(cpu) | 1340 | for_each_possible_cpu(cpu) { |
| 1348 | free_vmcs(per_cpu(vmxarea, cpu)); | 1341 | free_vmcs(per_cpu(vmxarea, cpu)); |
| 1342 | per_cpu(vmxarea, cpu) = NULL; | ||
| 1343 | } | ||
| 1349 | } | 1344 | } |
| 1350 | 1345 | ||
| 1351 | static __init int alloc_kvm_area(void) | 1346 | static __init int alloc_kvm_area(void) |
| 1352 | { | 1347 | { |
| 1353 | int cpu; | 1348 | int cpu; |
| 1354 | 1349 | ||
| 1355 | for_each_online_cpu(cpu) { | 1350 | for_each_possible_cpu(cpu) { |
| 1356 | struct vmcs *vmcs; | 1351 | struct vmcs *vmcs; |
| 1357 | 1352 | ||
| 1358 | vmcs = alloc_vmcs_cpu(cpu); | 1353 | vmcs = alloc_vmcs_cpu(cpu); |
| @@ -1394,6 +1389,9 @@ static __init int hardware_setup(void) | |||
| 1394 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) | 1389 | if (enable_ept && !cpu_has_vmx_ept_2m_page()) |
| 1395 | kvm_disable_largepages(); | 1390 | kvm_disable_largepages(); |
| 1396 | 1391 | ||
| 1392 | if (!cpu_has_vmx_ple()) | ||
| 1393 | ple_gap = 0; | ||
| 1394 | |||
| 1397 | return alloc_kvm_area(); | 1395 | return alloc_kvm_area(); |
| 1398 | } | 1396 | } |
| 1399 | 1397 | ||
| @@ -1536,8 +1534,16 @@ continue_rmode: | |||
| 1536 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1534 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
| 1537 | { | 1535 | { |
| 1538 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1536 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 1539 | struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); | 1537 | struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); |
| 1538 | |||
| 1539 | if (!msr) | ||
| 1540 | return; | ||
| 1540 | 1541 | ||
| 1542 | /* | ||
| 1543 | * Force kernel_gs_base reloading before EFER changes, as control | ||
| 1544 | * of this msr depends on is_long_mode(). | ||
| 1545 | */ | ||
| 1546 | vmx_load_host_state(to_vmx(vcpu)); | ||
| 1541 | vcpu->arch.shadow_efer = efer; | 1547 | vcpu->arch.shadow_efer = efer; |
| 1542 | if (!msr) | 1548 | if (!msr) |
| 1543 | return; | 1549 | return; |
| @@ -1727,6 +1733,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
| 1727 | vmcs_write64(EPT_POINTER, eptp); | 1733 | vmcs_write64(EPT_POINTER, eptp); |
| 1728 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : | 1734 | guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 : |
| 1729 | vcpu->kvm->arch.ept_identity_map_addr; | 1735 | vcpu->kvm->arch.ept_identity_map_addr; |
| 1736 | ept_load_pdptrs(vcpu); | ||
| 1730 | } | 1737 | } |
| 1731 | 1738 | ||
| 1732 | vmx_flush_tlb(vcpu); | 1739 | vmx_flush_tlb(vcpu); |
| @@ -2302,13 +2309,22 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 2302 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 2309 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
| 2303 | if (vmx->vpid == 0) | 2310 | if (vmx->vpid == 0) |
| 2304 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 2311 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
| 2305 | if (!enable_ept) | 2312 | if (!enable_ept) { |
| 2306 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | 2313 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; |
| 2314 | enable_unrestricted_guest = 0; | ||
| 2315 | } | ||
| 2307 | if (!enable_unrestricted_guest) | 2316 | if (!enable_unrestricted_guest) |
| 2308 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; | 2317 | exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; |
| 2318 | if (!ple_gap) | ||
| 2319 | exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; | ||
| 2309 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 2320 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
| 2310 | } | 2321 | } |
| 2311 | 2322 | ||
| 2323 | if (ple_gap) { | ||
| 2324 | vmcs_write32(PLE_GAP, ple_gap); | ||
| 2325 | vmcs_write32(PLE_WINDOW, ple_window); | ||
| 2326 | } | ||
| 2327 | |||
| 2312 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); | 2328 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf); |
| 2313 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); | 2329 | vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf); |
| 2314 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ | 2330 | vmcs_write32(CR3_TARGET_COUNT, 0); /* 22.2.1 */ |
| @@ -2376,10 +2392,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
| 2376 | if (wrmsr_safe(index, data_low, data_high) < 0) | 2392 | if (wrmsr_safe(index, data_low, data_high) < 0) |
| 2377 | continue; | 2393 | continue; |
| 2378 | data = data_low | ((u64)data_high << 32); | 2394 | data = data_low | ((u64)data_high << 32); |
| 2379 | vmx->host_msrs[j].index = index; | 2395 | vmx->guest_msrs[j].index = i; |
| 2380 | vmx->host_msrs[j].reserved = 0; | 2396 | vmx->guest_msrs[j].data = 0; |
| 2381 | vmx->host_msrs[j].data = data; | 2397 | vmx->guest_msrs[j].mask = -1ull; |
| 2382 | vmx->guest_msrs[j] = vmx->host_msrs[j]; | ||
| 2383 | ++vmx->nmsrs; | 2398 | ++vmx->nmsrs; |
| 2384 | } | 2399 | } |
| 2385 | 2400 | ||
| @@ -2510,7 +2525,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 2510 | if (vmx->vpid != 0) | 2525 | if (vmx->vpid != 0) |
| 2511 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); | 2526 | vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); |
| 2512 | 2527 | ||
| 2513 | vmx->vcpu.arch.cr0 = 0x60000010; | 2528 | vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; |
| 2514 | vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ | 2529 | vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */ |
| 2515 | vmx_set_cr4(&vmx->vcpu, 0); | 2530 | vmx_set_cr4(&vmx->vcpu, 0); |
| 2516 | vmx_set_efer(&vmx->vcpu, 0); | 2531 | vmx_set_efer(&vmx->vcpu, 0); |
| @@ -2627,6 +2642,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
| 2627 | GUEST_INTR_STATE_NMI)); | 2642 | GUEST_INTR_STATE_NMI)); |
| 2628 | } | 2643 | } |
| 2629 | 2644 | ||
| 2645 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | ||
| 2646 | { | ||
| 2647 | if (!cpu_has_virtual_nmis()) | ||
| 2648 | return to_vmx(vcpu)->soft_vnmi_blocked; | ||
| 2649 | else | ||
| 2650 | return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
| 2651 | GUEST_INTR_STATE_NMI); | ||
| 2652 | } | ||
| 2653 | |||
| 2654 | static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | ||
| 2655 | { | ||
| 2656 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
| 2657 | |||
| 2658 | if (!cpu_has_virtual_nmis()) { | ||
| 2659 | if (vmx->soft_vnmi_blocked != masked) { | ||
| 2660 | vmx->soft_vnmi_blocked = masked; | ||
| 2661 | vmx->vnmi_blocked_time = 0; | ||
| 2662 | } | ||
| 2663 | } else { | ||
| 2664 | if (masked) | ||
| 2665 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
| 2666 | GUEST_INTR_STATE_NMI); | ||
| 2667 | else | ||
| 2668 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
| 2669 | GUEST_INTR_STATE_NMI); | ||
| 2670 | } | ||
| 2671 | } | ||
| 2672 | |||
| 2630 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 2673 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
| 2631 | { | 2674 | { |
| 2632 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | 2675 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && |
| @@ -2659,7 +2702,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
| 2659 | * Cause the #SS fault with 0 error code in VM86 mode. | 2702 | * Cause the #SS fault with 0 error code in VM86 mode. |
| 2660 | */ | 2703 | */ |
| 2661 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) | 2704 | if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) |
| 2662 | if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE) | 2705 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE) |
| 2663 | return 1; | 2706 | return 1; |
| 2664 | /* | 2707 | /* |
| 2665 | * Forward all other exceptions that are valid in real mode. | 2708 | * Forward all other exceptions that are valid in real mode. |
| @@ -2710,15 +2753,16 @@ static void kvm_machine_check(void) | |||
| 2710 | #endif | 2753 | #endif |
| 2711 | } | 2754 | } |
| 2712 | 2755 | ||
| 2713 | static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2756 | static int handle_machine_check(struct kvm_vcpu *vcpu) |
| 2714 | { | 2757 | { |
| 2715 | /* already handled by vcpu_run */ | 2758 | /* already handled by vcpu_run */ |
| 2716 | return 1; | 2759 | return 1; |
| 2717 | } | 2760 | } |
| 2718 | 2761 | ||
| 2719 | static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2762 | static int handle_exception(struct kvm_vcpu *vcpu) |
| 2720 | { | 2763 | { |
| 2721 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2764 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 2765 | struct kvm_run *kvm_run = vcpu->run; | ||
| 2722 | u32 intr_info, ex_no, error_code; | 2766 | u32 intr_info, ex_no, error_code; |
| 2723 | unsigned long cr2, rip, dr6; | 2767 | unsigned long cr2, rip, dr6; |
| 2724 | u32 vect_info; | 2768 | u32 vect_info; |
| @@ -2728,12 +2772,17 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2728 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 2772 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
| 2729 | 2773 | ||
| 2730 | if (is_machine_check(intr_info)) | 2774 | if (is_machine_check(intr_info)) |
| 2731 | return handle_machine_check(vcpu, kvm_run); | 2775 | return handle_machine_check(vcpu); |
| 2732 | 2776 | ||
| 2733 | if ((vect_info & VECTORING_INFO_VALID_MASK) && | 2777 | if ((vect_info & VECTORING_INFO_VALID_MASK) && |
| 2734 | !is_page_fault(intr_info)) | 2778 | !is_page_fault(intr_info)) { |
| 2735 | printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " | 2779 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
| 2736 | "intr info 0x%x\n", __func__, vect_info, intr_info); | 2780 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; |
| 2781 | vcpu->run->internal.ndata = 2; | ||
| 2782 | vcpu->run->internal.data[0] = vect_info; | ||
| 2783 | vcpu->run->internal.data[1] = intr_info; | ||
| 2784 | return 0; | ||
| 2785 | } | ||
| 2737 | 2786 | ||
| 2738 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) | 2787 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) |
| 2739 | return 1; /* already handled by vmx_vcpu_run() */ | 2788 | return 1; /* already handled by vmx_vcpu_run() */ |
| @@ -2744,7 +2793,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2744 | } | 2793 | } |
| 2745 | 2794 | ||
| 2746 | if (is_invalid_opcode(intr_info)) { | 2795 | if (is_invalid_opcode(intr_info)) { |
| 2747 | er = emulate_instruction(vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD); | 2796 | er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD); |
| 2748 | if (er != EMULATE_DONE) | 2797 | if (er != EMULATE_DONE) |
| 2749 | kvm_queue_exception(vcpu, UD_VECTOR); | 2798 | kvm_queue_exception(vcpu, UD_VECTOR); |
| 2750 | return 1; | 2799 | return 1; |
| @@ -2803,20 +2852,19 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2803 | return 0; | 2852 | return 0; |
| 2804 | } | 2853 | } |
| 2805 | 2854 | ||
| 2806 | static int handle_external_interrupt(struct kvm_vcpu *vcpu, | 2855 | static int handle_external_interrupt(struct kvm_vcpu *vcpu) |
| 2807 | struct kvm_run *kvm_run) | ||
| 2808 | { | 2856 | { |
| 2809 | ++vcpu->stat.irq_exits; | 2857 | ++vcpu->stat.irq_exits; |
| 2810 | return 1; | 2858 | return 1; |
| 2811 | } | 2859 | } |
| 2812 | 2860 | ||
| 2813 | static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2861 | static int handle_triple_fault(struct kvm_vcpu *vcpu) |
| 2814 | { | 2862 | { |
| 2815 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | 2863 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
| 2816 | return 0; | 2864 | return 0; |
| 2817 | } | 2865 | } |
| 2818 | 2866 | ||
| 2819 | static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2867 | static int handle_io(struct kvm_vcpu *vcpu) |
| 2820 | { | 2868 | { |
| 2821 | unsigned long exit_qualification; | 2869 | unsigned long exit_qualification; |
| 2822 | int size, in, string; | 2870 | int size, in, string; |
| @@ -2827,8 +2875,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2827 | string = (exit_qualification & 16) != 0; | 2875 | string = (exit_qualification & 16) != 0; |
| 2828 | 2876 | ||
| 2829 | if (string) { | 2877 | if (string) { |
| 2830 | if (emulate_instruction(vcpu, | 2878 | if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) |
| 2831 | kvm_run, 0, 0, 0) == EMULATE_DO_MMIO) | ||
| 2832 | return 0; | 2879 | return 0; |
| 2833 | return 1; | 2880 | return 1; |
| 2834 | } | 2881 | } |
| @@ -2838,7 +2885,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2838 | port = exit_qualification >> 16; | 2885 | port = exit_qualification >> 16; |
| 2839 | 2886 | ||
| 2840 | skip_emulated_instruction(vcpu); | 2887 | skip_emulated_instruction(vcpu); |
| 2841 | return kvm_emulate_pio(vcpu, kvm_run, in, size, port); | 2888 | return kvm_emulate_pio(vcpu, in, size, port); |
| 2842 | } | 2889 | } |
| 2843 | 2890 | ||
| 2844 | static void | 2891 | static void |
| @@ -2852,7 +2899,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) | |||
| 2852 | hypercall[2] = 0xc1; | 2899 | hypercall[2] = 0xc1; |
| 2853 | } | 2900 | } |
| 2854 | 2901 | ||
| 2855 | static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2902 | static int handle_cr(struct kvm_vcpu *vcpu) |
| 2856 | { | 2903 | { |
| 2857 | unsigned long exit_qualification, val; | 2904 | unsigned long exit_qualification, val; |
| 2858 | int cr; | 2905 | int cr; |
| @@ -2887,7 +2934,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2887 | return 1; | 2934 | return 1; |
| 2888 | if (cr8_prev <= cr8) | 2935 | if (cr8_prev <= cr8) |
| 2889 | return 1; | 2936 | return 1; |
| 2890 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; | 2937 | vcpu->run->exit_reason = KVM_EXIT_SET_TPR; |
| 2891 | return 0; | 2938 | return 0; |
| 2892 | } | 2939 | } |
| 2893 | }; | 2940 | }; |
| @@ -2922,13 +2969,13 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2922 | default: | 2969 | default: |
| 2923 | break; | 2970 | break; |
| 2924 | } | 2971 | } |
| 2925 | kvm_run->exit_reason = 0; | 2972 | vcpu->run->exit_reason = 0; |
| 2926 | pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", | 2973 | pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", |
| 2927 | (int)(exit_qualification >> 4) & 3, cr); | 2974 | (int)(exit_qualification >> 4) & 3, cr); |
| 2928 | return 0; | 2975 | return 0; |
| 2929 | } | 2976 | } |
| 2930 | 2977 | ||
| 2931 | static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2978 | static int handle_dr(struct kvm_vcpu *vcpu) |
| 2932 | { | 2979 | { |
| 2933 | unsigned long exit_qualification; | 2980 | unsigned long exit_qualification; |
| 2934 | unsigned long val; | 2981 | unsigned long val; |
| @@ -2944,13 +2991,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2944 | * guest debugging itself. | 2991 | * guest debugging itself. |
| 2945 | */ | 2992 | */ |
| 2946 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { | 2993 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { |
| 2947 | kvm_run->debug.arch.dr6 = vcpu->arch.dr6; | 2994 | vcpu->run->debug.arch.dr6 = vcpu->arch.dr6; |
| 2948 | kvm_run->debug.arch.dr7 = dr; | 2995 | vcpu->run->debug.arch.dr7 = dr; |
| 2949 | kvm_run->debug.arch.pc = | 2996 | vcpu->run->debug.arch.pc = |
| 2950 | vmcs_readl(GUEST_CS_BASE) + | 2997 | vmcs_readl(GUEST_CS_BASE) + |
| 2951 | vmcs_readl(GUEST_RIP); | 2998 | vmcs_readl(GUEST_RIP); |
| 2952 | kvm_run->debug.arch.exception = DB_VECTOR; | 2999 | vcpu->run->debug.arch.exception = DB_VECTOR; |
| 2953 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 3000 | vcpu->run->exit_reason = KVM_EXIT_DEBUG; |
| 2954 | return 0; | 3001 | return 0; |
| 2955 | } else { | 3002 | } else { |
| 2956 | vcpu->arch.dr7 &= ~DR7_GD; | 3003 | vcpu->arch.dr7 &= ~DR7_GD; |
| @@ -3016,13 +3063,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3016 | return 1; | 3063 | return 1; |
| 3017 | } | 3064 | } |
| 3018 | 3065 | ||
| 3019 | static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3066 | static int handle_cpuid(struct kvm_vcpu *vcpu) |
| 3020 | { | 3067 | { |
| 3021 | kvm_emulate_cpuid(vcpu); | 3068 | kvm_emulate_cpuid(vcpu); |
| 3022 | return 1; | 3069 | return 1; |
| 3023 | } | 3070 | } |
| 3024 | 3071 | ||
| 3025 | static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3072 | static int handle_rdmsr(struct kvm_vcpu *vcpu) |
| 3026 | { | 3073 | { |
| 3027 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3074 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; |
| 3028 | u64 data; | 3075 | u64 data; |
| @@ -3041,7 +3088,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3041 | return 1; | 3088 | return 1; |
| 3042 | } | 3089 | } |
| 3043 | 3090 | ||
| 3044 | static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3091 | static int handle_wrmsr(struct kvm_vcpu *vcpu) |
| 3045 | { | 3092 | { |
| 3046 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; | 3093 | u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX]; |
| 3047 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) | 3094 | u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u) |
| @@ -3058,14 +3105,12 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3058 | return 1; | 3105 | return 1; |
| 3059 | } | 3106 | } |
| 3060 | 3107 | ||
| 3061 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu, | 3108 | static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) |
| 3062 | struct kvm_run *kvm_run) | ||
| 3063 | { | 3109 | { |
| 3064 | return 1; | 3110 | return 1; |
| 3065 | } | 3111 | } |
| 3066 | 3112 | ||
| 3067 | static int handle_interrupt_window(struct kvm_vcpu *vcpu, | 3113 | static int handle_interrupt_window(struct kvm_vcpu *vcpu) |
| 3068 | struct kvm_run *kvm_run) | ||
| 3069 | { | 3114 | { |
| 3070 | u32 cpu_based_vm_exec_control; | 3115 | u32 cpu_based_vm_exec_control; |
| 3071 | 3116 | ||
| @@ -3081,34 +3126,34 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
| 3081 | * possible | 3126 | * possible |
| 3082 | */ | 3127 | */ |
| 3083 | if (!irqchip_in_kernel(vcpu->kvm) && | 3128 | if (!irqchip_in_kernel(vcpu->kvm) && |
| 3084 | kvm_run->request_interrupt_window && | 3129 | vcpu->run->request_interrupt_window && |
| 3085 | !kvm_cpu_has_interrupt(vcpu)) { | 3130 | !kvm_cpu_has_interrupt(vcpu)) { |
| 3086 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 3131 | vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
| 3087 | return 0; | 3132 | return 0; |
| 3088 | } | 3133 | } |
| 3089 | return 1; | 3134 | return 1; |
| 3090 | } | 3135 | } |
| 3091 | 3136 | ||
| 3092 | static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3137 | static int handle_halt(struct kvm_vcpu *vcpu) |
| 3093 | { | 3138 | { |
| 3094 | skip_emulated_instruction(vcpu); | 3139 | skip_emulated_instruction(vcpu); |
| 3095 | return kvm_emulate_halt(vcpu); | 3140 | return kvm_emulate_halt(vcpu); |
| 3096 | } | 3141 | } |
| 3097 | 3142 | ||
| 3098 | static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3143 | static int handle_vmcall(struct kvm_vcpu *vcpu) |
| 3099 | { | 3144 | { |
| 3100 | skip_emulated_instruction(vcpu); | 3145 | skip_emulated_instruction(vcpu); |
| 3101 | kvm_emulate_hypercall(vcpu); | 3146 | kvm_emulate_hypercall(vcpu); |
| 3102 | return 1; | 3147 | return 1; |
| 3103 | } | 3148 | } |
| 3104 | 3149 | ||
| 3105 | static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3150 | static int handle_vmx_insn(struct kvm_vcpu *vcpu) |
| 3106 | { | 3151 | { |
| 3107 | kvm_queue_exception(vcpu, UD_VECTOR); | 3152 | kvm_queue_exception(vcpu, UD_VECTOR); |
| 3108 | return 1; | 3153 | return 1; |
| 3109 | } | 3154 | } |
| 3110 | 3155 | ||
| 3111 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3156 | static int handle_invlpg(struct kvm_vcpu *vcpu) |
| 3112 | { | 3157 | { |
| 3113 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3158 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 3114 | 3159 | ||
| @@ -3117,14 +3162,14 @@ static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3117 | return 1; | 3162 | return 1; |
| 3118 | } | 3163 | } |
| 3119 | 3164 | ||
| 3120 | static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3165 | static int handle_wbinvd(struct kvm_vcpu *vcpu) |
| 3121 | { | 3166 | { |
| 3122 | skip_emulated_instruction(vcpu); | 3167 | skip_emulated_instruction(vcpu); |
| 3123 | /* TODO: Add support for VT-d/pass-through device */ | 3168 | /* TODO: Add support for VT-d/pass-through device */ |
| 3124 | return 1; | 3169 | return 1; |
| 3125 | } | 3170 | } |
| 3126 | 3171 | ||
| 3127 | static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3172 | static int handle_apic_access(struct kvm_vcpu *vcpu) |
| 3128 | { | 3173 | { |
| 3129 | unsigned long exit_qualification; | 3174 | unsigned long exit_qualification; |
| 3130 | enum emulation_result er; | 3175 | enum emulation_result er; |
| @@ -3133,7 +3178,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3133 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3178 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
| 3134 | offset = exit_qualification & 0xffful; | 3179 | offset = exit_qualification & 0xffful; |
| 3135 | 3180 | ||
| 3136 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3181 | er = emulate_instruction(vcpu, 0, 0, 0); |
| 3137 | 3182 | ||
| 3138 | if (er != EMULATE_DONE) { | 3183 | if (er != EMULATE_DONE) { |
| 3139 | printk(KERN_ERR | 3184 | printk(KERN_ERR |
| @@ -3144,7 +3189,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3144 | return 1; | 3189 | return 1; |
| 3145 | } | 3190 | } |
| 3146 | 3191 | ||
| 3147 | static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3192 | static int handle_task_switch(struct kvm_vcpu *vcpu) |
| 3148 | { | 3193 | { |
| 3149 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3194 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 3150 | unsigned long exit_qualification; | 3195 | unsigned long exit_qualification; |
| @@ -3198,7 +3243,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3198 | return 1; | 3243 | return 1; |
| 3199 | } | 3244 | } |
| 3200 | 3245 | ||
| 3201 | static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3246 | static int handle_ept_violation(struct kvm_vcpu *vcpu) |
| 3202 | { | 3247 | { |
| 3203 | unsigned long exit_qualification; | 3248 | unsigned long exit_qualification; |
| 3204 | gpa_t gpa; | 3249 | gpa_t gpa; |
| @@ -3219,8 +3264,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3219 | vmcs_readl(GUEST_LINEAR_ADDRESS)); | 3264 | vmcs_readl(GUEST_LINEAR_ADDRESS)); |
| 3220 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | 3265 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", |
| 3221 | (long unsigned int)exit_qualification); | 3266 | (long unsigned int)exit_qualification); |
| 3222 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3267 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
| 3223 | kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; | 3268 | vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; |
| 3224 | return 0; | 3269 | return 0; |
| 3225 | } | 3270 | } |
| 3226 | 3271 | ||
| @@ -3290,7 +3335,7 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte, | |||
| 3290 | } | 3335 | } |
| 3291 | } | 3336 | } |
| 3292 | 3337 | ||
| 3293 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3338 | static int handle_ept_misconfig(struct kvm_vcpu *vcpu) |
| 3294 | { | 3339 | { |
| 3295 | u64 sptes[4]; | 3340 | u64 sptes[4]; |
| 3296 | int nr_sptes, i; | 3341 | int nr_sptes, i; |
| @@ -3306,13 +3351,13 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3306 | for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) | 3351 | for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i) |
| 3307 | ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); | 3352 | ept_misconfig_inspect_spte(vcpu, sptes[i-1], i); |
| 3308 | 3353 | ||
| 3309 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3354 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
| 3310 | kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; | 3355 | vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; |
| 3311 | 3356 | ||
| 3312 | return 0; | 3357 | return 0; |
| 3313 | } | 3358 | } |
| 3314 | 3359 | ||
| 3315 | static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3360 | static int handle_nmi_window(struct kvm_vcpu *vcpu) |
| 3316 | { | 3361 | { |
| 3317 | u32 cpu_based_vm_exec_control; | 3362 | u32 cpu_based_vm_exec_control; |
| 3318 | 3363 | ||
| @@ -3325,36 +3370,50 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3325 | return 1; | 3370 | return 1; |
| 3326 | } | 3371 | } |
| 3327 | 3372 | ||
| 3328 | static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | 3373 | static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) |
| 3329 | struct kvm_run *kvm_run) | ||
| 3330 | { | 3374 | { |
| 3331 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3375 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 3332 | enum emulation_result err = EMULATE_DONE; | 3376 | enum emulation_result err = EMULATE_DONE; |
| 3333 | 3377 | int ret = 1; | |
| 3334 | local_irq_enable(); | ||
| 3335 | preempt_enable(); | ||
| 3336 | 3378 | ||
| 3337 | while (!guest_state_valid(vcpu)) { | 3379 | while (!guest_state_valid(vcpu)) { |
| 3338 | err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3380 | err = emulate_instruction(vcpu, 0, 0, 0); |
| 3339 | 3381 | ||
| 3340 | if (err == EMULATE_DO_MMIO) | 3382 | if (err == EMULATE_DO_MMIO) { |
| 3341 | break; | 3383 | ret = 0; |
| 3384 | goto out; | ||
| 3385 | } | ||
| 3342 | 3386 | ||
| 3343 | if (err != EMULATE_DONE) { | 3387 | if (err != EMULATE_DONE) { |
| 3344 | kvm_report_emulation_failure(vcpu, "emulation failure"); | 3388 | kvm_report_emulation_failure(vcpu, "emulation failure"); |
| 3345 | break; | 3389 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |
| 3390 | vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; | ||
| 3391 | vcpu->run->internal.ndata = 0; | ||
| 3392 | ret = 0; | ||
| 3393 | goto out; | ||
| 3346 | } | 3394 | } |
| 3347 | 3395 | ||
| 3348 | if (signal_pending(current)) | 3396 | if (signal_pending(current)) |
| 3349 | break; | 3397 | goto out; |
| 3350 | if (need_resched()) | 3398 | if (need_resched()) |
| 3351 | schedule(); | 3399 | schedule(); |
| 3352 | } | 3400 | } |
| 3353 | 3401 | ||
| 3354 | preempt_disable(); | 3402 | vmx->emulation_required = 0; |
| 3355 | local_irq_disable(); | 3403 | out: |
| 3404 | return ret; | ||
| 3405 | } | ||
| 3356 | 3406 | ||
| 3357 | vmx->invalid_state_emulation_result = err; | 3407 | /* |
| 3408 | * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE | ||
| 3409 | * exiting, so only get here on cpu with PAUSE-Loop-Exiting. | ||
| 3410 | */ | ||
| 3411 | static int handle_pause(struct kvm_vcpu *vcpu) | ||
| 3412 | { | ||
| 3413 | skip_emulated_instruction(vcpu); | ||
| 3414 | kvm_vcpu_on_spin(vcpu); | ||
| 3415 | |||
| 3416 | return 1; | ||
| 3358 | } | 3417 | } |
| 3359 | 3418 | ||
| 3360 | /* | 3419 | /* |
| @@ -3362,8 +3421,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | |||
| 3362 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 3421 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
| 3363 | * to be done to userspace and return 0. | 3422 | * to be done to userspace and return 0. |
| 3364 | */ | 3423 | */ |
| 3365 | static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | 3424 | static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { |
| 3366 | struct kvm_run *kvm_run) = { | ||
| 3367 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, | 3425 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, |
| 3368 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, | 3426 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, |
| 3369 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, | 3427 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, |
| @@ -3394,6 +3452,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
| 3394 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, | 3452 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, |
| 3395 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | 3453 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, |
| 3396 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, | 3454 | [EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig, |
| 3455 | [EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause, | ||
| 3397 | }; | 3456 | }; |
| 3398 | 3457 | ||
| 3399 | static const int kvm_vmx_max_exit_handlers = | 3458 | static const int kvm_vmx_max_exit_handlers = |
| @@ -3403,7 +3462,7 @@ static const int kvm_vmx_max_exit_handlers = | |||
| 3403 | * The guest has exited. See if we can fix it or if we need userspace | 3462 | * The guest has exited. See if we can fix it or if we need userspace |
| 3404 | * assistance. | 3463 | * assistance. |
| 3405 | */ | 3464 | */ |
| 3406 | static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 3465 | static int vmx_handle_exit(struct kvm_vcpu *vcpu) |
| 3407 | { | 3466 | { |
| 3408 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3467 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 3409 | u32 exit_reason = vmx->exit_reason; | 3468 | u32 exit_reason = vmx->exit_reason; |
| @@ -3411,13 +3470,9 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 3411 | 3470 | ||
| 3412 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); | 3471 | trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); |
| 3413 | 3472 | ||
| 3414 | /* If we need to emulate an MMIO from handle_invalid_guest_state | 3473 | /* If guest state is invalid, start emulating */ |
| 3415 | * we just return 0 */ | 3474 | if (vmx->emulation_required && emulate_invalid_guest_state) |
| 3416 | if (vmx->emulation_required && emulate_invalid_guest_state) { | 3475 | return handle_invalid_guest_state(vcpu); |
| 3417 | if (guest_state_valid(vcpu)) | ||
| 3418 | vmx->emulation_required = 0; | ||
| 3419 | return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO; | ||
| 3420 | } | ||
| 3421 | 3476 | ||
| 3422 | /* Access CR3 don't cause VMExit in paging mode, so we need | 3477 | /* Access CR3 don't cause VMExit in paging mode, so we need |
| 3423 | * to sync with guest real CR3. */ | 3478 | * to sync with guest real CR3. */ |
| @@ -3425,8 +3480,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 3425 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | 3480 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); |
| 3426 | 3481 | ||
| 3427 | if (unlikely(vmx->fail)) { | 3482 | if (unlikely(vmx->fail)) { |
| 3428 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 3483 | vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
| 3429 | kvm_run->fail_entry.hardware_entry_failure_reason | 3484 | vcpu->run->fail_entry.hardware_entry_failure_reason |
| 3430 | = vmcs_read32(VM_INSTRUCTION_ERROR); | 3485 | = vmcs_read32(VM_INSTRUCTION_ERROR); |
| 3431 | return 0; | 3486 | return 0; |
| 3432 | } | 3487 | } |
| @@ -3459,10 +3514,10 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
| 3459 | 3514 | ||
| 3460 | if (exit_reason < kvm_vmx_max_exit_handlers | 3515 | if (exit_reason < kvm_vmx_max_exit_handlers |
| 3461 | && kvm_vmx_exit_handlers[exit_reason]) | 3516 | && kvm_vmx_exit_handlers[exit_reason]) |
| 3462 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); | 3517 | return kvm_vmx_exit_handlers[exit_reason](vcpu); |
| 3463 | else { | 3518 | else { |
| 3464 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3519 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; |
| 3465 | kvm_run->hw.hardware_exit_reason = exit_reason; | 3520 | vcpu->run->hw.hardware_exit_reason = exit_reason; |
| 3466 | } | 3521 | } |
| 3467 | return 0; | 3522 | return 0; |
| 3468 | } | 3523 | } |
| @@ -3600,23 +3655,18 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) | |||
| 3600 | #define Q "l" | 3655 | #define Q "l" |
| 3601 | #endif | 3656 | #endif |
| 3602 | 3657 | ||
| 3603 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3658 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu) |
| 3604 | { | 3659 | { |
| 3605 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3660 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
| 3606 | 3661 | ||
| 3607 | if (enable_ept && is_paging(vcpu)) { | ||
| 3608 | vmcs_writel(GUEST_CR3, vcpu->arch.cr3); | ||
| 3609 | ept_load_pdptrs(vcpu); | ||
| 3610 | } | ||
| 3611 | /* Record the guest's net vcpu time for enforced NMI injections. */ | 3662 | /* Record the guest's net vcpu time for enforced NMI injections. */ |
| 3612 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | 3663 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) |
| 3613 | vmx->entry_time = ktime_get(); | 3664 | vmx->entry_time = ktime_get(); |
| 3614 | 3665 | ||
| 3615 | /* Handle invalid guest state instead of entering VMX */ | 3666 | /* Don't enter VMX if guest state is invalid, let the exit handler |
| 3616 | if (vmx->emulation_required && emulate_invalid_guest_state) { | 3667 | start emulation until we arrive back to a valid state */ |
| 3617 | handle_invalid_guest_state(vcpu, kvm_run); | 3668 | if (vmx->emulation_required && emulate_invalid_guest_state) |
| 3618 | return; | 3669 | return; |
| 3619 | } | ||
| 3620 | 3670 | ||
| 3621 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) | 3671 | if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) |
| 3622 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); | 3672 | vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); |
| @@ -3775,7 +3825,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
| 3775 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); | 3825 | __clear_bit(vmx->vpid, vmx_vpid_bitmap); |
| 3776 | spin_unlock(&vmx_vpid_lock); | 3826 | spin_unlock(&vmx_vpid_lock); |
| 3777 | vmx_free_vmcs(vcpu); | 3827 | vmx_free_vmcs(vcpu); |
| 3778 | kfree(vmx->host_msrs); | ||
| 3779 | kfree(vmx->guest_msrs); | 3828 | kfree(vmx->guest_msrs); |
| 3780 | kvm_vcpu_uninit(vcpu); | 3829 | kvm_vcpu_uninit(vcpu); |
| 3781 | kmem_cache_free(kvm_vcpu_cache, vmx); | 3830 | kmem_cache_free(kvm_vcpu_cache, vmx); |
| @@ -3802,10 +3851,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
| 3802 | goto uninit_vcpu; | 3851 | goto uninit_vcpu; |
| 3803 | } | 3852 | } |
| 3804 | 3853 | ||
| 3805 | vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
| 3806 | if (!vmx->host_msrs) | ||
| 3807 | goto free_guest_msrs; | ||
| 3808 | |||
| 3809 | vmx->vmcs = alloc_vmcs(); | 3854 | vmx->vmcs = alloc_vmcs(); |
| 3810 | if (!vmx->vmcs) | 3855 | if (!vmx->vmcs) |
| 3811 | goto free_msrs; | 3856 | goto free_msrs; |
| @@ -3836,8 +3881,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
| 3836 | free_vmcs: | 3881 | free_vmcs: |
| 3837 | free_vmcs(vmx->vmcs); | 3882 | free_vmcs(vmx->vmcs); |
| 3838 | free_msrs: | 3883 | free_msrs: |
| 3839 | kfree(vmx->host_msrs); | ||
| 3840 | free_guest_msrs: | ||
| 3841 | kfree(vmx->guest_msrs); | 3884 | kfree(vmx->guest_msrs); |
| 3842 | uninit_vcpu: | 3885 | uninit_vcpu: |
| 3843 | kvm_vcpu_uninit(&vmx->vcpu); | 3886 | kvm_vcpu_uninit(&vmx->vcpu); |
| @@ -3973,6 +4016,8 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 3973 | .queue_exception = vmx_queue_exception, | 4016 | .queue_exception = vmx_queue_exception, |
| 3974 | .interrupt_allowed = vmx_interrupt_allowed, | 4017 | .interrupt_allowed = vmx_interrupt_allowed, |
| 3975 | .nmi_allowed = vmx_nmi_allowed, | 4018 | .nmi_allowed = vmx_nmi_allowed, |
| 4019 | .get_nmi_mask = vmx_get_nmi_mask, | ||
| 4020 | .set_nmi_mask = vmx_set_nmi_mask, | ||
| 3976 | .enable_nmi_window = enable_nmi_window, | 4021 | .enable_nmi_window = enable_nmi_window, |
| 3977 | .enable_irq_window = enable_irq_window, | 4022 | .enable_irq_window = enable_irq_window, |
| 3978 | .update_cr8_intercept = update_cr8_intercept, | 4023 | .update_cr8_intercept = update_cr8_intercept, |
| @@ -3987,7 +4032,12 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
| 3987 | 4032 | ||
| 3988 | static int __init vmx_init(void) | 4033 | static int __init vmx_init(void) |
| 3989 | { | 4034 | { |
| 3990 | int r; | 4035 | int r, i; |
| 4036 | |||
| 4037 | rdmsrl_safe(MSR_EFER, &host_efer); | ||
| 4038 | |||
| 4039 | for (i = 0; i < NR_VMX_MSR; ++i) | ||
| 4040 | kvm_define_shared_msr(i, vmx_msr_index[i]); | ||
| 3991 | 4041 | ||
| 3992 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); | 4042 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); |
| 3993 | if (!vmx_io_bitmap_a) | 4043 | if (!vmx_io_bitmap_a) |
| @@ -4049,8 +4099,6 @@ static int __init vmx_init(void) | |||
| 4049 | if (bypass_guest_pf) | 4099 | if (bypass_guest_pf) |
| 4050 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | 4100 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); |
| 4051 | 4101 | ||
| 4052 | ept_sync_global(); | ||
| 4053 | |||
| 4054 | return 0; | 4102 | return 0; |
| 4055 | 4103 | ||
| 4056 | out3: | 4104 | out3: |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4fc80174191c..9d068966fb2a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include <linux/iommu.h> | 37 | #include <linux/iommu.h> |
| 38 | #include <linux/intel-iommu.h> | 38 | #include <linux/intel-iommu.h> |
| 39 | #include <linux/cpufreq.h> | 39 | #include <linux/cpufreq.h> |
| 40 | #include <linux/user-return-notifier.h> | ||
| 40 | #include <trace/events/kvm.h> | 41 | #include <trace/events/kvm.h> |
| 41 | #undef TRACE_INCLUDE_FILE | 42 | #undef TRACE_INCLUDE_FILE |
| 42 | #define CREATE_TRACE_POINTS | 43 | #define CREATE_TRACE_POINTS |
| @@ -88,6 +89,25 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); | |||
| 88 | int ignore_msrs = 0; | 89 | int ignore_msrs = 0; |
| 89 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); | 90 | module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR); |
| 90 | 91 | ||
| 92 | #define KVM_NR_SHARED_MSRS 16 | ||
| 93 | |||
| 94 | struct kvm_shared_msrs_global { | ||
| 95 | int nr; | ||
| 96 | struct kvm_shared_msr { | ||
| 97 | u32 msr; | ||
| 98 | u64 value; | ||
| 99 | } msrs[KVM_NR_SHARED_MSRS]; | ||
| 100 | }; | ||
| 101 | |||
| 102 | struct kvm_shared_msrs { | ||
| 103 | struct user_return_notifier urn; | ||
| 104 | bool registered; | ||
| 105 | u64 current_value[KVM_NR_SHARED_MSRS]; | ||
| 106 | }; | ||
| 107 | |||
| 108 | static struct kvm_shared_msrs_global __read_mostly shared_msrs_global; | ||
| 109 | static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs); | ||
| 110 | |||
| 91 | struct kvm_stats_debugfs_item debugfs_entries[] = { | 111 | struct kvm_stats_debugfs_item debugfs_entries[] = { |
| 92 | { "pf_fixed", VCPU_STAT(pf_fixed) }, | 112 | { "pf_fixed", VCPU_STAT(pf_fixed) }, |
| 93 | { "pf_guest", VCPU_STAT(pf_guest) }, | 113 | { "pf_guest", VCPU_STAT(pf_guest) }, |
| @@ -124,6 +144,72 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
| 124 | { NULL } | 144 | { NULL } |
| 125 | }; | 145 | }; |
| 126 | 146 | ||
| 147 | static void kvm_on_user_return(struct user_return_notifier *urn) | ||
| 148 | { | ||
| 149 | unsigned slot; | ||
| 150 | struct kvm_shared_msr *global; | ||
| 151 | struct kvm_shared_msrs *locals | ||
| 152 | = container_of(urn, struct kvm_shared_msrs, urn); | ||
| 153 | |||
| 154 | for (slot = 0; slot < shared_msrs_global.nr; ++slot) { | ||
| 155 | global = &shared_msrs_global.msrs[slot]; | ||
| 156 | if (global->value != locals->current_value[slot]) { | ||
| 157 | wrmsrl(global->msr, global->value); | ||
| 158 | locals->current_value[slot] = global->value; | ||
| 159 | } | ||
| 160 | } | ||
| 161 | locals->registered = false; | ||
| 162 | user_return_notifier_unregister(urn); | ||
| 163 | } | ||
| 164 | |||
| 165 | void kvm_define_shared_msr(unsigned slot, u32 msr) | ||
| 166 | { | ||
| 167 | int cpu; | ||
| 168 | u64 value; | ||
| 169 | |||
| 170 | if (slot >= shared_msrs_global.nr) | ||
| 171 | shared_msrs_global.nr = slot + 1; | ||
| 172 | shared_msrs_global.msrs[slot].msr = msr; | ||
| 173 | rdmsrl_safe(msr, &value); | ||
| 174 | shared_msrs_global.msrs[slot].value = value; | ||
| 175 | for_each_online_cpu(cpu) | ||
| 176 | per_cpu(shared_msrs, cpu).current_value[slot] = value; | ||
| 177 | } | ||
| 178 | EXPORT_SYMBOL_GPL(kvm_define_shared_msr); | ||
| 179 | |||
| 180 | static void kvm_shared_msr_cpu_online(void) | ||
| 181 | { | ||
| 182 | unsigned i; | ||
| 183 | struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs); | ||
| 184 | |||
| 185 | for (i = 0; i < shared_msrs_global.nr; ++i) | ||
| 186 | locals->current_value[i] = shared_msrs_global.msrs[i].value; | ||
| 187 | } | ||
| 188 | |||
| 189 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) | ||
| 190 | { | ||
| 191 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); | ||
| 192 | |||
| 193 | if (((value ^ smsr->current_value[slot]) & mask) == 0) | ||
| 194 | return; | ||
| 195 | smsr->current_value[slot] = value; | ||
| 196 | wrmsrl(shared_msrs_global.msrs[slot].msr, value); | ||
| 197 | if (!smsr->registered) { | ||
| 198 | smsr->urn.on_user_return = kvm_on_user_return; | ||
| 199 | user_return_notifier_register(&smsr->urn); | ||
| 200 | smsr->registered = true; | ||
| 201 | } | ||
| 202 | } | ||
| 203 | EXPORT_SYMBOL_GPL(kvm_set_shared_msr); | ||
| 204 | |||
| 205 | static void drop_user_return_notifiers(void *ignore) | ||
| 206 | { | ||
| 207 | struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs); | ||
| 208 | |||
| 209 | if (smsr->registered) | ||
| 210 | kvm_on_user_return(&smsr->urn); | ||
| 211 | } | ||
| 212 | |||
| 127 | unsigned long segment_base(u16 selector) | 213 | unsigned long segment_base(u16 selector) |
| 128 | { | 214 | { |
| 129 | struct descriptor_table gdt; | 215 | struct descriptor_table gdt; |
| @@ -485,16 +571,19 @@ static inline u32 bit(int bitno) | |||
| 485 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. | 571 | * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. |
| 486 | * | 572 | * |
| 487 | * This list is modified at module load time to reflect the | 573 | * This list is modified at module load time to reflect the |
| 488 | * capabilities of the host cpu. | 574 | * capabilities of the host cpu. This capabilities test skips MSRs that are |
| 575 | * kvm-specific. Those are put in the beginning of the list. | ||
| 489 | */ | 576 | */ |
| 577 | |||
| 578 | #define KVM_SAVE_MSRS_BEGIN 2 | ||
| 490 | static u32 msrs_to_save[] = { | 579 | static u32 msrs_to_save[] = { |
| 580 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | ||
| 491 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 581 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
| 492 | MSR_K6_STAR, | 582 | MSR_K6_STAR, |
| 493 | #ifdef CONFIG_X86_64 | 583 | #ifdef CONFIG_X86_64 |
| 494 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | 584 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, |
| 495 | #endif | 585 | #endif |
| 496 | MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 586 | MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA |
| 497 | MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA | ||
| 498 | }; | 587 | }; |
| 499 | 588 | ||
| 500 | static unsigned num_msrs_to_save; | 589 | static unsigned num_msrs_to_save; |
| @@ -678,7 +767,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
| 678 | /* With all the info we got, fill in the values */ | 767 | /* With all the info we got, fill in the values */ |
| 679 | 768 | ||
| 680 | vcpu->hv_clock.system_time = ts.tv_nsec + | 769 | vcpu->hv_clock.system_time = ts.tv_nsec + |
| 681 | (NSEC_PER_SEC * (u64)ts.tv_sec); | 770 | (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset; |
| 771 | |||
| 682 | /* | 772 | /* |
| 683 | * The interface expects us to write an even number signaling that the | 773 | * The interface expects us to write an even number signaling that the |
| 684 | * update is finished. Since the guest won't see the intermediate | 774 | * update is finished. Since the guest won't see the intermediate |
| @@ -836,6 +926,38 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 836 | return 0; | 926 | return 0; |
| 837 | } | 927 | } |
| 838 | 928 | ||
| 929 | static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) | ||
| 930 | { | ||
| 931 | struct kvm *kvm = vcpu->kvm; | ||
| 932 | int lm = is_long_mode(vcpu); | ||
| 933 | u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64 | ||
| 934 | : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32; | ||
| 935 | u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 | ||
| 936 | : kvm->arch.xen_hvm_config.blob_size_32; | ||
| 937 | u32 page_num = data & ~PAGE_MASK; | ||
| 938 | u64 page_addr = data & PAGE_MASK; | ||
| 939 | u8 *page; | ||
| 940 | int r; | ||
| 941 | |||
| 942 | r = -E2BIG; | ||
| 943 | if (page_num >= blob_size) | ||
| 944 | goto out; | ||
| 945 | r = -ENOMEM; | ||
| 946 | page = kzalloc(PAGE_SIZE, GFP_KERNEL); | ||
| 947 | if (!page) | ||
| 948 | goto out; | ||
| 949 | r = -EFAULT; | ||
| 950 | if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE)) | ||
| 951 | goto out_free; | ||
| 952 | if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE)) | ||
| 953 | goto out_free; | ||
| 954 | r = 0; | ||
| 955 | out_free: | ||
| 956 | kfree(page); | ||
| 957 | out: | ||
| 958 | return r; | ||
| 959 | } | ||
| 960 | |||
| 839 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 961 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
| 840 | { | 962 | { |
| 841 | switch (msr) { | 963 | switch (msr) { |
| @@ -951,6 +1073,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
| 951 | "0x%x data 0x%llx\n", msr, data); | 1073 | "0x%x data 0x%llx\n", msr, data); |
| 952 | break; | 1074 | break; |
| 953 | default: | 1075 | default: |
| 1076 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | ||
| 1077 | return xen_hvm_config(vcpu, data); | ||
| 954 | if (!ignore_msrs) { | 1078 | if (!ignore_msrs) { |
| 955 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", | 1079 | pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", |
| 956 | msr, data); | 1080 | msr, data); |
| @@ -1225,6 +1349,9 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 1225 | case KVM_CAP_PIT2: | 1349 | case KVM_CAP_PIT2: |
| 1226 | case KVM_CAP_PIT_STATE2: | 1350 | case KVM_CAP_PIT_STATE2: |
| 1227 | case KVM_CAP_SET_IDENTITY_MAP_ADDR: | 1351 | case KVM_CAP_SET_IDENTITY_MAP_ADDR: |
| 1352 | case KVM_CAP_XEN_HVM: | ||
| 1353 | case KVM_CAP_ADJUST_CLOCK: | ||
| 1354 | case KVM_CAP_VCPU_EVENTS: | ||
| 1228 | r = 1; | 1355 | r = 1; |
| 1229 | break; | 1356 | break; |
| 1230 | case KVM_CAP_COALESCED_MMIO: | 1357 | case KVM_CAP_COALESCED_MMIO: |
| @@ -1239,8 +1366,8 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 1239 | case KVM_CAP_NR_MEMSLOTS: | 1366 | case KVM_CAP_NR_MEMSLOTS: |
| 1240 | r = KVM_MEMORY_SLOTS; | 1367 | r = KVM_MEMORY_SLOTS; |
| 1241 | break; | 1368 | break; |
| 1242 | case KVM_CAP_PV_MMU: | 1369 | case KVM_CAP_PV_MMU: /* obsolete */ |
| 1243 | r = !tdp_enabled; | 1370 | r = 0; |
| 1244 | break; | 1371 | break; |
| 1245 | case KVM_CAP_IOMMU: | 1372 | case KVM_CAP_IOMMU: |
| 1246 | r = iommu_found(); | 1373 | r = iommu_found(); |
| @@ -1327,6 +1454,12 @@ out: | |||
| 1327 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | 1454 | void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) |
| 1328 | { | 1455 | { |
| 1329 | kvm_x86_ops->vcpu_load(vcpu, cpu); | 1456 | kvm_x86_ops->vcpu_load(vcpu, cpu); |
| 1457 | if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) { | ||
| 1458 | unsigned long khz = cpufreq_quick_get(cpu); | ||
| 1459 | if (!khz) | ||
| 1460 | khz = tsc_khz; | ||
| 1461 | per_cpu(cpu_tsc_khz, cpu) = khz; | ||
| 1462 | } | ||
| 1330 | kvm_request_guest_time_update(vcpu); | 1463 | kvm_request_guest_time_update(vcpu); |
| 1331 | } | 1464 | } |
| 1332 | 1465 | ||
| @@ -1760,6 +1893,61 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
| 1760 | return 0; | 1893 | return 0; |
| 1761 | } | 1894 | } |
| 1762 | 1895 | ||
| 1896 | static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | ||
| 1897 | struct kvm_vcpu_events *events) | ||
| 1898 | { | ||
| 1899 | vcpu_load(vcpu); | ||
| 1900 | |||
| 1901 | events->exception.injected = vcpu->arch.exception.pending; | ||
| 1902 | events->exception.nr = vcpu->arch.exception.nr; | ||
| 1903 | events->exception.has_error_code = vcpu->arch.exception.has_error_code; | ||
| 1904 | events->exception.error_code = vcpu->arch.exception.error_code; | ||
| 1905 | |||
| 1906 | events->interrupt.injected = vcpu->arch.interrupt.pending; | ||
| 1907 | events->interrupt.nr = vcpu->arch.interrupt.nr; | ||
| 1908 | events->interrupt.soft = vcpu->arch.interrupt.soft; | ||
| 1909 | |||
| 1910 | events->nmi.injected = vcpu->arch.nmi_injected; | ||
| 1911 | events->nmi.pending = vcpu->arch.nmi_pending; | ||
| 1912 | events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu); | ||
| 1913 | |||
| 1914 | events->sipi_vector = vcpu->arch.sipi_vector; | ||
| 1915 | |||
| 1916 | events->flags = 0; | ||
| 1917 | |||
| 1918 | vcpu_put(vcpu); | ||
| 1919 | } | ||
| 1920 | |||
| 1921 | static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | ||
| 1922 | struct kvm_vcpu_events *events) | ||
| 1923 | { | ||
| 1924 | if (events->flags) | ||
| 1925 | return -EINVAL; | ||
| 1926 | |||
| 1927 | vcpu_load(vcpu); | ||
| 1928 | |||
| 1929 | vcpu->arch.exception.pending = events->exception.injected; | ||
| 1930 | vcpu->arch.exception.nr = events->exception.nr; | ||
| 1931 | vcpu->arch.exception.has_error_code = events->exception.has_error_code; | ||
| 1932 | vcpu->arch.exception.error_code = events->exception.error_code; | ||
| 1933 | |||
| 1934 | vcpu->arch.interrupt.pending = events->interrupt.injected; | ||
| 1935 | vcpu->arch.interrupt.nr = events->interrupt.nr; | ||
| 1936 | vcpu->arch.interrupt.soft = events->interrupt.soft; | ||
| 1937 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) | ||
| 1938 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
| 1939 | |||
| 1940 | vcpu->arch.nmi_injected = events->nmi.injected; | ||
| 1941 | vcpu->arch.nmi_pending = events->nmi.pending; | ||
| 1942 | kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked); | ||
| 1943 | |||
| 1944 | vcpu->arch.sipi_vector = events->sipi_vector; | ||
| 1945 | |||
| 1946 | vcpu_put(vcpu); | ||
| 1947 | |||
| 1948 | return 0; | ||
| 1949 | } | ||
| 1950 | |||
| 1763 | long kvm_arch_vcpu_ioctl(struct file *filp, | 1951 | long kvm_arch_vcpu_ioctl(struct file *filp, |
| 1764 | unsigned int ioctl, unsigned long arg) | 1952 | unsigned int ioctl, unsigned long arg) |
| 1765 | { | 1953 | { |
| @@ -1770,6 +1958,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 1770 | 1958 | ||
| 1771 | switch (ioctl) { | 1959 | switch (ioctl) { |
| 1772 | case KVM_GET_LAPIC: { | 1960 | case KVM_GET_LAPIC: { |
| 1961 | r = -EINVAL; | ||
| 1962 | if (!vcpu->arch.apic) | ||
| 1963 | goto out; | ||
| 1773 | lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 1964 | lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
| 1774 | 1965 | ||
| 1775 | r = -ENOMEM; | 1966 | r = -ENOMEM; |
| @@ -1785,6 +1976,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 1785 | break; | 1976 | break; |
| 1786 | } | 1977 | } |
| 1787 | case KVM_SET_LAPIC: { | 1978 | case KVM_SET_LAPIC: { |
| 1979 | r = -EINVAL; | ||
| 1980 | if (!vcpu->arch.apic) | ||
| 1981 | goto out; | ||
| 1788 | lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); | 1982 | lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); |
| 1789 | r = -ENOMEM; | 1983 | r = -ENOMEM; |
| 1790 | if (!lapic) | 1984 | if (!lapic) |
| @@ -1911,6 +2105,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
| 1911 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); | 2105 | r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce); |
| 1912 | break; | 2106 | break; |
| 1913 | } | 2107 | } |
| 2108 | case KVM_GET_VCPU_EVENTS: { | ||
| 2109 | struct kvm_vcpu_events events; | ||
| 2110 | |||
| 2111 | kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events); | ||
| 2112 | |||
| 2113 | r = -EFAULT; | ||
| 2114 | if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events))) | ||
| 2115 | break; | ||
| 2116 | r = 0; | ||
| 2117 | break; | ||
| 2118 | } | ||
| 2119 | case KVM_SET_VCPU_EVENTS: { | ||
| 2120 | struct kvm_vcpu_events events; | ||
| 2121 | |||
| 2122 | r = -EFAULT; | ||
| 2123 | if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events))) | ||
| 2124 | break; | ||
| 2125 | |||
| 2126 | r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); | ||
| 2127 | break; | ||
| 2128 | } | ||
| 1914 | default: | 2129 | default: |
| 1915 | r = -EINVAL; | 2130 | r = -EINVAL; |
| 1916 | } | 2131 | } |
| @@ -2039,9 +2254,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
| 2039 | sizeof(struct kvm_pic_state)); | 2254 | sizeof(struct kvm_pic_state)); |
| 2040 | break; | 2255 | break; |
| 2041 | case KVM_IRQCHIP_IOAPIC: | 2256 | case KVM_IRQCHIP_IOAPIC: |
| 2042 | memcpy(&chip->chip.ioapic, | 2257 | r = kvm_get_ioapic(kvm, &chip->chip.ioapic); |
| 2043 | ioapic_irqchip(kvm), | ||
| 2044 | sizeof(struct kvm_ioapic_state)); | ||
| 2045 | break; | 2258 | break; |
| 2046 | default: | 2259 | default: |
| 2047 | r = -EINVAL; | 2260 | r = -EINVAL; |
| @@ -2071,11 +2284,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) | |||
| 2071 | spin_unlock(&pic_irqchip(kvm)->lock); | 2284 | spin_unlock(&pic_irqchip(kvm)->lock); |
| 2072 | break; | 2285 | break; |
| 2073 | case KVM_IRQCHIP_IOAPIC: | 2286 | case KVM_IRQCHIP_IOAPIC: |
| 2074 | mutex_lock(&kvm->irq_lock); | 2287 | r = kvm_set_ioapic(kvm, &chip->chip.ioapic); |
| 2075 | memcpy(ioapic_irqchip(kvm), | ||
| 2076 | &chip->chip.ioapic, | ||
| 2077 | sizeof(struct kvm_ioapic_state)); | ||
| 2078 | mutex_unlock(&kvm->irq_lock); | ||
| 2079 | break; | 2288 | break; |
| 2080 | default: | 2289 | default: |
| 2081 | r = -EINVAL; | 2290 | r = -EINVAL; |
| @@ -2183,7 +2392,7 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 2183 | { | 2392 | { |
| 2184 | struct kvm *kvm = filp->private_data; | 2393 | struct kvm *kvm = filp->private_data; |
| 2185 | void __user *argp = (void __user *)arg; | 2394 | void __user *argp = (void __user *)arg; |
| 2186 | int r = -EINVAL; | 2395 | int r = -ENOTTY; |
| 2187 | /* | 2396 | /* |
| 2188 | * This union makes it completely explicit to gcc-3.x | 2397 | * This union makes it completely explicit to gcc-3.x |
| 2189 | * that these two variables' stack usage should be | 2398 | * that these two variables' stack usage should be |
| @@ -2245,25 +2454,39 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 2245 | if (r) | 2454 | if (r) |
| 2246 | goto out; | 2455 | goto out; |
| 2247 | break; | 2456 | break; |
| 2248 | case KVM_CREATE_IRQCHIP: | 2457 | case KVM_CREATE_IRQCHIP: { |
| 2458 | struct kvm_pic *vpic; | ||
| 2459 | |||
| 2460 | mutex_lock(&kvm->lock); | ||
| 2461 | r = -EEXIST; | ||
| 2462 | if (kvm->arch.vpic) | ||
| 2463 | goto create_irqchip_unlock; | ||
| 2249 | r = -ENOMEM; | 2464 | r = -ENOMEM; |
| 2250 | kvm->arch.vpic = kvm_create_pic(kvm); | 2465 | vpic = kvm_create_pic(kvm); |
| 2251 | if (kvm->arch.vpic) { | 2466 | if (vpic) { |
| 2252 | r = kvm_ioapic_init(kvm); | 2467 | r = kvm_ioapic_init(kvm); |
| 2253 | if (r) { | 2468 | if (r) { |
| 2254 | kfree(kvm->arch.vpic); | 2469 | kfree(vpic); |
| 2255 | kvm->arch.vpic = NULL; | 2470 | goto create_irqchip_unlock; |
| 2256 | goto out; | ||
| 2257 | } | 2471 | } |
| 2258 | } else | 2472 | } else |
| 2259 | goto out; | 2473 | goto create_irqchip_unlock; |
| 2474 | smp_wmb(); | ||
| 2475 | kvm->arch.vpic = vpic; | ||
| 2476 | smp_wmb(); | ||
| 2260 | r = kvm_setup_default_irq_routing(kvm); | 2477 | r = kvm_setup_default_irq_routing(kvm); |
| 2261 | if (r) { | 2478 | if (r) { |
| 2479 | mutex_lock(&kvm->irq_lock); | ||
| 2262 | kfree(kvm->arch.vpic); | 2480 | kfree(kvm->arch.vpic); |
| 2263 | kfree(kvm->arch.vioapic); | 2481 | kfree(kvm->arch.vioapic); |
| 2264 | goto out; | 2482 | kvm->arch.vpic = NULL; |
| 2483 | kvm->arch.vioapic = NULL; | ||
| 2484 | mutex_unlock(&kvm->irq_lock); | ||
| 2265 | } | 2485 | } |
| 2486 | create_irqchip_unlock: | ||
| 2487 | mutex_unlock(&kvm->lock); | ||
| 2266 | break; | 2488 | break; |
| 2489 | } | ||
| 2267 | case KVM_CREATE_PIT: | 2490 | case KVM_CREATE_PIT: |
| 2268 | u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; | 2491 | u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY; |
| 2269 | goto create_pit; | 2492 | goto create_pit; |
| @@ -2293,10 +2516,8 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 2293 | goto out; | 2516 | goto out; |
| 2294 | if (irqchip_in_kernel(kvm)) { | 2517 | if (irqchip_in_kernel(kvm)) { |
| 2295 | __s32 status; | 2518 | __s32 status; |
| 2296 | mutex_lock(&kvm->irq_lock); | ||
| 2297 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, | 2519 | status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, |
| 2298 | irq_event.irq, irq_event.level); | 2520 | irq_event.irq, irq_event.level); |
| 2299 | mutex_unlock(&kvm->irq_lock); | ||
| 2300 | if (ioctl == KVM_IRQ_LINE_STATUS) { | 2521 | if (ioctl == KVM_IRQ_LINE_STATUS) { |
| 2301 | irq_event.status = status; | 2522 | irq_event.status = status; |
| 2302 | if (copy_to_user(argp, &irq_event, | 2523 | if (copy_to_user(argp, &irq_event, |
| @@ -2422,6 +2643,55 @@ long kvm_arch_vm_ioctl(struct file *filp, | |||
| 2422 | r = 0; | 2643 | r = 0; |
| 2423 | break; | 2644 | break; |
| 2424 | } | 2645 | } |
| 2646 | case KVM_XEN_HVM_CONFIG: { | ||
| 2647 | r = -EFAULT; | ||
| 2648 | if (copy_from_user(&kvm->arch.xen_hvm_config, argp, | ||
| 2649 | sizeof(struct kvm_xen_hvm_config))) | ||
| 2650 | goto out; | ||
| 2651 | r = -EINVAL; | ||
| 2652 | if (kvm->arch.xen_hvm_config.flags) | ||
| 2653 | goto out; | ||
| 2654 | r = 0; | ||
| 2655 | break; | ||
| 2656 | } | ||
| 2657 | case KVM_SET_CLOCK: { | ||
| 2658 | struct timespec now; | ||
| 2659 | struct kvm_clock_data user_ns; | ||
| 2660 | u64 now_ns; | ||
| 2661 | s64 delta; | ||
| 2662 | |||
| 2663 | r = -EFAULT; | ||
| 2664 | if (copy_from_user(&user_ns, argp, sizeof(user_ns))) | ||
| 2665 | goto out; | ||
| 2666 | |||
| 2667 | r = -EINVAL; | ||
| 2668 | if (user_ns.flags) | ||
| 2669 | goto out; | ||
| 2670 | |||
| 2671 | r = 0; | ||
| 2672 | ktime_get_ts(&now); | ||
| 2673 | now_ns = timespec_to_ns(&now); | ||
| 2674 | delta = user_ns.clock - now_ns; | ||
| 2675 | kvm->arch.kvmclock_offset = delta; | ||
| 2676 | break; | ||
| 2677 | } | ||
| 2678 | case KVM_GET_CLOCK: { | ||
| 2679 | struct timespec now; | ||
| 2680 | struct kvm_clock_data user_ns; | ||
| 2681 | u64 now_ns; | ||
| 2682 | |||
| 2683 | ktime_get_ts(&now); | ||
| 2684 | now_ns = timespec_to_ns(&now); | ||
| 2685 | user_ns.clock = kvm->arch.kvmclock_offset + now_ns; | ||
| 2686 | user_ns.flags = 0; | ||
| 2687 | |||
| 2688 | r = -EFAULT; | ||
| 2689 | if (copy_to_user(argp, &user_ns, sizeof(user_ns))) | ||
| 2690 | goto out; | ||
| 2691 | r = 0; | ||
| 2692 | break; | ||
| 2693 | } | ||
| 2694 | |||
| 2425 | default: | 2695 | default: |
| 2426 | ; | 2696 | ; |
| 2427 | } | 2697 | } |
| @@ -2434,7 +2704,8 @@ static void kvm_init_msr_list(void) | |||
| 2434 | u32 dummy[2]; | 2704 | u32 dummy[2]; |
| 2435 | unsigned i, j; | 2705 | unsigned i, j; |
| 2436 | 2706 | ||
| 2437 | for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { | 2707 | /* skip the first msrs in the list. KVM-specific */ |
| 2708 | for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) { | ||
| 2438 | if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) | 2709 | if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) |
| 2439 | continue; | 2710 | continue; |
| 2440 | if (j < i) | 2711 | if (j < i) |
| @@ -2758,13 +3029,13 @@ static void cache_all_regs(struct kvm_vcpu *vcpu) | |||
| 2758 | } | 3029 | } |
| 2759 | 3030 | ||
| 2760 | int emulate_instruction(struct kvm_vcpu *vcpu, | 3031 | int emulate_instruction(struct kvm_vcpu *vcpu, |
| 2761 | struct kvm_run *run, | ||
| 2762 | unsigned long cr2, | 3032 | unsigned long cr2, |
| 2763 | u16 error_code, | 3033 | u16 error_code, |
| 2764 | int emulation_type) | 3034 | int emulation_type) |
| 2765 | { | 3035 | { |
| 2766 | int r, shadow_mask; | 3036 | int r, shadow_mask; |
| 2767 | struct decode_cache *c; | 3037 | struct decode_cache *c; |
| 3038 | struct kvm_run *run = vcpu->run; | ||
| 2768 | 3039 | ||
| 2769 | kvm_clear_exception_queue(vcpu); | 3040 | kvm_clear_exception_queue(vcpu); |
| 2770 | vcpu->arch.mmio_fault_cr2 = cr2; | 3041 | vcpu->arch.mmio_fault_cr2 = cr2; |
| @@ -2784,7 +3055,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 2784 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 3055 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); |
| 2785 | 3056 | ||
| 2786 | vcpu->arch.emulate_ctxt.vcpu = vcpu; | 3057 | vcpu->arch.emulate_ctxt.vcpu = vcpu; |
| 2787 | vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); | 3058 | vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); |
| 2788 | vcpu->arch.emulate_ctxt.mode = | 3059 | vcpu->arch.emulate_ctxt.mode = |
| 2789 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) | 3060 | (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) |
| 2790 | ? X86EMUL_MODE_REAL : cs_l | 3061 | ? X86EMUL_MODE_REAL : cs_l |
| @@ -2862,7 +3133,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
| 2862 | return EMULATE_DO_MMIO; | 3133 | return EMULATE_DO_MMIO; |
| 2863 | } | 3134 | } |
| 2864 | 3135 | ||
| 2865 | kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); | 3136 | kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); |
| 2866 | 3137 | ||
| 2867 | if (vcpu->mmio_is_write) { | 3138 | if (vcpu->mmio_is_write) { |
| 2868 | vcpu->mmio_needed = 0; | 3139 | vcpu->mmio_needed = 0; |
| @@ -2970,8 +3241,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu) | |||
| 2970 | return r; | 3241 | return r; |
| 2971 | } | 3242 | } |
| 2972 | 3243 | ||
| 2973 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 3244 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) |
| 2974 | int size, unsigned port) | ||
| 2975 | { | 3245 | { |
| 2976 | unsigned long val; | 3246 | unsigned long val; |
| 2977 | 3247 | ||
| @@ -3000,7 +3270,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
| 3000 | } | 3270 | } |
| 3001 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); | 3271 | EXPORT_SYMBOL_GPL(kvm_emulate_pio); |
| 3002 | 3272 | ||
| 3003 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 3273 | int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, |
| 3004 | int size, unsigned long count, int down, | 3274 | int size, unsigned long count, int down, |
| 3005 | gva_t address, int rep, unsigned port) | 3275 | gva_t address, int rep, unsigned port) |
| 3006 | { | 3276 | { |
| @@ -3073,9 +3343,6 @@ static void bounce_off(void *info) | |||
| 3073 | /* nothing */ | 3343 | /* nothing */ |
| 3074 | } | 3344 | } |
| 3075 | 3345 | ||
| 3076 | static unsigned int ref_freq; | ||
| 3077 | static unsigned long tsc_khz_ref; | ||
| 3078 | |||
| 3079 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | 3346 | static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val, |
| 3080 | void *data) | 3347 | void *data) |
| 3081 | { | 3348 | { |
| @@ -3084,14 +3351,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
| 3084 | struct kvm_vcpu *vcpu; | 3351 | struct kvm_vcpu *vcpu; |
| 3085 | int i, send_ipi = 0; | 3352 | int i, send_ipi = 0; |
| 3086 | 3353 | ||
| 3087 | if (!ref_freq) | ||
| 3088 | ref_freq = freq->old; | ||
| 3089 | |||
| 3090 | if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) | 3354 | if (val == CPUFREQ_PRECHANGE && freq->old > freq->new) |
| 3091 | return 0; | 3355 | return 0; |
| 3092 | if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) | 3356 | if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new) |
| 3093 | return 0; | 3357 | return 0; |
| 3094 | per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); | 3358 | per_cpu(cpu_tsc_khz, freq->cpu) = freq->new; |
| 3095 | 3359 | ||
| 3096 | spin_lock(&kvm_lock); | 3360 | spin_lock(&kvm_lock); |
| 3097 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3361 | list_for_each_entry(kvm, &vm_list, vm_list) { |
| @@ -3128,9 +3392,28 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = { | |||
| 3128 | .notifier_call = kvmclock_cpufreq_notifier | 3392 | .notifier_call = kvmclock_cpufreq_notifier |
| 3129 | }; | 3393 | }; |
| 3130 | 3394 | ||
| 3395 | static void kvm_timer_init(void) | ||
| 3396 | { | ||
| 3397 | int cpu; | ||
| 3398 | |||
| 3399 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
| 3400 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, | ||
| 3401 | CPUFREQ_TRANSITION_NOTIFIER); | ||
| 3402 | for_each_online_cpu(cpu) { | ||
| 3403 | unsigned long khz = cpufreq_get(cpu); | ||
| 3404 | if (!khz) | ||
| 3405 | khz = tsc_khz; | ||
| 3406 | per_cpu(cpu_tsc_khz, cpu) = khz; | ||
| 3407 | } | ||
| 3408 | } else { | ||
| 3409 | for_each_possible_cpu(cpu) | ||
| 3410 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; | ||
| 3411 | } | ||
| 3412 | } | ||
| 3413 | |||
| 3131 | int kvm_arch_init(void *opaque) | 3414 | int kvm_arch_init(void *opaque) |
| 3132 | { | 3415 | { |
| 3133 | int r, cpu; | 3416 | int r; |
| 3134 | struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; | 3417 | struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; |
| 3135 | 3418 | ||
| 3136 | if (kvm_x86_ops) { | 3419 | if (kvm_x86_ops) { |
| @@ -3162,13 +3445,7 @@ int kvm_arch_init(void *opaque) | |||
| 3162 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 3445 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
| 3163 | PT_DIRTY_MASK, PT64_NX_MASK, 0); | 3446 | PT_DIRTY_MASK, PT64_NX_MASK, 0); |
| 3164 | 3447 | ||
| 3165 | for_each_possible_cpu(cpu) | 3448 | kvm_timer_init(); |
| 3166 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; | ||
| 3167 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
| 3168 | tsc_khz_ref = tsc_khz; | ||
| 3169 | cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, | ||
| 3170 | CPUFREQ_TRANSITION_NOTIFIER); | ||
| 3171 | } | ||
| 3172 | 3449 | ||
| 3173 | return 0; | 3450 | return 0; |
| 3174 | 3451 | ||
| @@ -3296,7 +3573,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | |||
| 3296 | unsigned long *rflags) | 3573 | unsigned long *rflags) |
| 3297 | { | 3574 | { |
| 3298 | kvm_lmsw(vcpu, msw); | 3575 | kvm_lmsw(vcpu, msw); |
| 3299 | *rflags = kvm_x86_ops->get_rflags(vcpu); | 3576 | *rflags = kvm_get_rflags(vcpu); |
| 3300 | } | 3577 | } |
| 3301 | 3578 | ||
| 3302 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | 3579 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) |
| @@ -3334,7 +3611,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | |||
| 3334 | switch (cr) { | 3611 | switch (cr) { |
| 3335 | case 0: | 3612 | case 0: |
| 3336 | kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); | 3613 | kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); |
| 3337 | *rflags = kvm_x86_ops->get_rflags(vcpu); | 3614 | *rflags = kvm_get_rflags(vcpu); |
| 3338 | break; | 3615 | break; |
| 3339 | case 2: | 3616 | case 2: |
| 3340 | vcpu->arch.cr2 = val; | 3617 | vcpu->arch.cr2 = val; |
| @@ -3454,18 +3731,18 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | |||
| 3454 | * | 3731 | * |
| 3455 | * No need to exit to userspace if we already have an interrupt queued. | 3732 | * No need to exit to userspace if we already have an interrupt queued. |
| 3456 | */ | 3733 | */ |
| 3457 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, | 3734 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) |
| 3458 | struct kvm_run *kvm_run) | ||
| 3459 | { | 3735 | { |
| 3460 | return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && | 3736 | return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && |
| 3461 | kvm_run->request_interrupt_window && | 3737 | vcpu->run->request_interrupt_window && |
| 3462 | kvm_arch_interrupt_allowed(vcpu)); | 3738 | kvm_arch_interrupt_allowed(vcpu)); |
| 3463 | } | 3739 | } |
| 3464 | 3740 | ||
| 3465 | static void post_kvm_run_save(struct kvm_vcpu *vcpu, | 3741 | static void post_kvm_run_save(struct kvm_vcpu *vcpu) |
| 3466 | struct kvm_run *kvm_run) | ||
| 3467 | { | 3742 | { |
| 3468 | kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; | 3743 | struct kvm_run *kvm_run = vcpu->run; |
| 3744 | |||
| 3745 | kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; | ||
| 3469 | kvm_run->cr8 = kvm_get_cr8(vcpu); | 3746 | kvm_run->cr8 = kvm_get_cr8(vcpu); |
| 3470 | kvm_run->apic_base = kvm_get_apic_base(vcpu); | 3747 | kvm_run->apic_base = kvm_get_apic_base(vcpu); |
| 3471 | if (irqchip_in_kernel(vcpu->kvm)) | 3748 | if (irqchip_in_kernel(vcpu->kvm)) |
| @@ -3526,7 +3803,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) | |||
| 3526 | kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); | 3803 | kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); |
| 3527 | } | 3804 | } |
| 3528 | 3805 | ||
| 3529 | static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3806 | static void inject_pending_event(struct kvm_vcpu *vcpu) |
| 3530 | { | 3807 | { |
| 3531 | /* try to reinject previous events if any */ | 3808 | /* try to reinject previous events if any */ |
| 3532 | if (vcpu->arch.exception.pending) { | 3809 | if (vcpu->arch.exception.pending) { |
| @@ -3562,11 +3839,11 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3562 | } | 3839 | } |
| 3563 | } | 3840 | } |
| 3564 | 3841 | ||
| 3565 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3842 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
| 3566 | { | 3843 | { |
| 3567 | int r; | 3844 | int r; |
| 3568 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | 3845 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && |
| 3569 | kvm_run->request_interrupt_window; | 3846 | vcpu->run->request_interrupt_window; |
| 3570 | 3847 | ||
| 3571 | if (vcpu->requests) | 3848 | if (vcpu->requests) |
| 3572 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | 3849 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) |
| @@ -3587,12 +3864,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3587 | kvm_x86_ops->tlb_flush(vcpu); | 3864 | kvm_x86_ops->tlb_flush(vcpu); |
| 3588 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, | 3865 | if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, |
| 3589 | &vcpu->requests)) { | 3866 | &vcpu->requests)) { |
| 3590 | kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; | 3867 | vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; |
| 3591 | r = 0; | 3868 | r = 0; |
| 3592 | goto out; | 3869 | goto out; |
| 3593 | } | 3870 | } |
| 3594 | if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { | 3871 | if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) { |
| 3595 | kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; | 3872 | vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; |
| 3596 | r = 0; | 3873 | r = 0; |
| 3597 | goto out; | 3874 | goto out; |
| 3598 | } | 3875 | } |
| @@ -3616,7 +3893,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3616 | goto out; | 3893 | goto out; |
| 3617 | } | 3894 | } |
| 3618 | 3895 | ||
| 3619 | inject_pending_event(vcpu, kvm_run); | 3896 | inject_pending_event(vcpu); |
| 3620 | 3897 | ||
| 3621 | /* enable NMI/IRQ window open exits if needed */ | 3898 | /* enable NMI/IRQ window open exits if needed */ |
| 3622 | if (vcpu->arch.nmi_pending) | 3899 | if (vcpu->arch.nmi_pending) |
| @@ -3642,7 +3919,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3642 | } | 3919 | } |
| 3643 | 3920 | ||
| 3644 | trace_kvm_entry(vcpu->vcpu_id); | 3921 | trace_kvm_entry(vcpu->vcpu_id); |
| 3645 | kvm_x86_ops->run(vcpu, kvm_run); | 3922 | kvm_x86_ops->run(vcpu); |
| 3646 | 3923 | ||
| 3647 | /* | 3924 | /* |
| 3648 | * If the guest has used debug registers, at least dr7 | 3925 | * If the guest has used debug registers, at least dr7 |
| @@ -3684,13 +3961,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3684 | 3961 | ||
| 3685 | kvm_lapic_sync_from_vapic(vcpu); | 3962 | kvm_lapic_sync_from_vapic(vcpu); |
| 3686 | 3963 | ||
| 3687 | r = kvm_x86_ops->handle_exit(kvm_run, vcpu); | 3964 | r = kvm_x86_ops->handle_exit(vcpu); |
| 3688 | out: | 3965 | out: |
| 3689 | return r; | 3966 | return r; |
| 3690 | } | 3967 | } |
| 3691 | 3968 | ||
| 3692 | 3969 | ||
| 3693 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3970 | static int __vcpu_run(struct kvm_vcpu *vcpu) |
| 3694 | { | 3971 | { |
| 3695 | int r; | 3972 | int r; |
| 3696 | 3973 | ||
| @@ -3710,7 +3987,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3710 | r = 1; | 3987 | r = 1; |
| 3711 | while (r > 0) { | 3988 | while (r > 0) { |
| 3712 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 3989 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) |
| 3713 | r = vcpu_enter_guest(vcpu, kvm_run); | 3990 | r = vcpu_enter_guest(vcpu); |
| 3714 | else { | 3991 | else { |
| 3715 | up_read(&vcpu->kvm->slots_lock); | 3992 | up_read(&vcpu->kvm->slots_lock); |
| 3716 | kvm_vcpu_block(vcpu); | 3993 | kvm_vcpu_block(vcpu); |
| @@ -3738,14 +4015,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3738 | if (kvm_cpu_has_pending_timer(vcpu)) | 4015 | if (kvm_cpu_has_pending_timer(vcpu)) |
| 3739 | kvm_inject_pending_timer_irqs(vcpu); | 4016 | kvm_inject_pending_timer_irqs(vcpu); |
| 3740 | 4017 | ||
| 3741 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | 4018 | if (dm_request_for_irq_injection(vcpu)) { |
| 3742 | r = -EINTR; | 4019 | r = -EINTR; |
| 3743 | kvm_run->exit_reason = KVM_EXIT_INTR; | 4020 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
| 3744 | ++vcpu->stat.request_irq_exits; | 4021 | ++vcpu->stat.request_irq_exits; |
| 3745 | } | 4022 | } |
| 3746 | if (signal_pending(current)) { | 4023 | if (signal_pending(current)) { |
| 3747 | r = -EINTR; | 4024 | r = -EINTR; |
| 3748 | kvm_run->exit_reason = KVM_EXIT_INTR; | 4025 | vcpu->run->exit_reason = KVM_EXIT_INTR; |
| 3749 | ++vcpu->stat.signal_exits; | 4026 | ++vcpu->stat.signal_exits; |
| 3750 | } | 4027 | } |
| 3751 | if (need_resched()) { | 4028 | if (need_resched()) { |
| @@ -3756,7 +4033,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3756 | } | 4033 | } |
| 3757 | 4034 | ||
| 3758 | up_read(&vcpu->kvm->slots_lock); | 4035 | up_read(&vcpu->kvm->slots_lock); |
| 3759 | post_kvm_run_save(vcpu, kvm_run); | 4036 | post_kvm_run_save(vcpu); |
| 3760 | 4037 | ||
| 3761 | vapic_exit(vcpu); | 4038 | vapic_exit(vcpu); |
| 3762 | 4039 | ||
| @@ -3789,15 +4066,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3789 | if (r) | 4066 | if (r) |
| 3790 | goto out; | 4067 | goto out; |
| 3791 | } | 4068 | } |
| 3792 | #if CONFIG_HAS_IOMEM | ||
| 3793 | if (vcpu->mmio_needed) { | 4069 | if (vcpu->mmio_needed) { |
| 3794 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); | 4070 | memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); |
| 3795 | vcpu->mmio_read_completed = 1; | 4071 | vcpu->mmio_read_completed = 1; |
| 3796 | vcpu->mmio_needed = 0; | 4072 | vcpu->mmio_needed = 0; |
| 3797 | 4073 | ||
| 3798 | down_read(&vcpu->kvm->slots_lock); | 4074 | down_read(&vcpu->kvm->slots_lock); |
| 3799 | r = emulate_instruction(vcpu, kvm_run, | 4075 | r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, |
| 3800 | vcpu->arch.mmio_fault_cr2, 0, | ||
| 3801 | EMULTYPE_NO_DECODE); | 4076 | EMULTYPE_NO_DECODE); |
| 3802 | up_read(&vcpu->kvm->slots_lock); | 4077 | up_read(&vcpu->kvm->slots_lock); |
| 3803 | if (r == EMULATE_DO_MMIO) { | 4078 | if (r == EMULATE_DO_MMIO) { |
| @@ -3808,12 +4083,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 3808 | goto out; | 4083 | goto out; |
| 3809 | } | 4084 | } |
| 3810 | } | 4085 | } |
| 3811 | #endif | ||
| 3812 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) | 4086 | if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) |
| 3813 | kvm_register_write(vcpu, VCPU_REGS_RAX, | 4087 | kvm_register_write(vcpu, VCPU_REGS_RAX, |
| 3814 | kvm_run->hypercall.ret); | 4088 | kvm_run->hypercall.ret); |
| 3815 | 4089 | ||
| 3816 | r = __vcpu_run(vcpu, kvm_run); | 4090 | r = __vcpu_run(vcpu); |
| 3817 | 4091 | ||
| 3818 | out: | 4092 | out: |
| 3819 | if (vcpu->sigset_active) | 4093 | if (vcpu->sigset_active) |
| @@ -3847,13 +4121,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 3847 | #endif | 4121 | #endif |
| 3848 | 4122 | ||
| 3849 | regs->rip = kvm_rip_read(vcpu); | 4123 | regs->rip = kvm_rip_read(vcpu); |
| 3850 | regs->rflags = kvm_x86_ops->get_rflags(vcpu); | 4124 | regs->rflags = kvm_get_rflags(vcpu); |
| 3851 | |||
| 3852 | /* | ||
| 3853 | * Don't leak debug flags in case they were set for guest debugging | ||
| 3854 | */ | ||
| 3855 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
| 3856 | regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
| 3857 | 4125 | ||
| 3858 | vcpu_put(vcpu); | 4126 | vcpu_put(vcpu); |
| 3859 | 4127 | ||
| @@ -3881,12 +4149,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
| 3881 | kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); | 4149 | kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13); |
| 3882 | kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); | 4150 | kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14); |
| 3883 | kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); | 4151 | kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15); |
| 3884 | |||
| 3885 | #endif | 4152 | #endif |
| 3886 | 4153 | ||
| 3887 | kvm_rip_write(vcpu, regs->rip); | 4154 | kvm_rip_write(vcpu, regs->rip); |
| 3888 | kvm_x86_ops->set_rflags(vcpu, regs->rflags); | 4155 | kvm_set_rflags(vcpu, regs->rflags); |
| 3889 | |||
| 3890 | 4156 | ||
| 3891 | vcpu->arch.exception.pending = false; | 4157 | vcpu->arch.exception.pending = false; |
| 3892 | 4158 | ||
| @@ -4105,7 +4371,7 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) | |||
| 4105 | { | 4371 | { |
| 4106 | return (seg != VCPU_SREG_LDTR) && | 4372 | return (seg != VCPU_SREG_LDTR) && |
| 4107 | (seg != VCPU_SREG_TR) && | 4373 | (seg != VCPU_SREG_TR) && |
| 4108 | (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM); | 4374 | (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); |
| 4109 | } | 4375 | } |
| 4110 | 4376 | ||
| 4111 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 4377 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, |
| @@ -4133,7 +4399,7 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, | |||
| 4133 | { | 4399 | { |
| 4134 | tss->cr3 = vcpu->arch.cr3; | 4400 | tss->cr3 = vcpu->arch.cr3; |
| 4135 | tss->eip = kvm_rip_read(vcpu); | 4401 | tss->eip = kvm_rip_read(vcpu); |
| 4136 | tss->eflags = kvm_x86_ops->get_rflags(vcpu); | 4402 | tss->eflags = kvm_get_rflags(vcpu); |
| 4137 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 4403 | tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 4138 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 4404 | tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 4139 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); | 4405 | tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); |
| @@ -4157,7 +4423,7 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, | |||
| 4157 | kvm_set_cr3(vcpu, tss->cr3); | 4423 | kvm_set_cr3(vcpu, tss->cr3); |
| 4158 | 4424 | ||
| 4159 | kvm_rip_write(vcpu, tss->eip); | 4425 | kvm_rip_write(vcpu, tss->eip); |
| 4160 | kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2); | 4426 | kvm_set_rflags(vcpu, tss->eflags | 2); |
| 4161 | 4427 | ||
| 4162 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); | 4428 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); |
| 4163 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); | 4429 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); |
| @@ -4195,7 +4461,7 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu, | |||
| 4195 | struct tss_segment_16 *tss) | 4461 | struct tss_segment_16 *tss) |
| 4196 | { | 4462 | { |
| 4197 | tss->ip = kvm_rip_read(vcpu); | 4463 | tss->ip = kvm_rip_read(vcpu); |
| 4198 | tss->flag = kvm_x86_ops->get_rflags(vcpu); | 4464 | tss->flag = kvm_get_rflags(vcpu); |
| 4199 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); | 4465 | tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); |
| 4200 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); | 4466 | tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); |
| 4201 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); | 4467 | tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); |
| @@ -4210,14 +4476,13 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu, | |||
| 4210 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); | 4476 | tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); |
| 4211 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); | 4477 | tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); |
| 4212 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); | 4478 | tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); |
| 4213 | tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
| 4214 | } | 4479 | } |
| 4215 | 4480 | ||
| 4216 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, | 4481 | static int load_state_from_tss16(struct kvm_vcpu *vcpu, |
| 4217 | struct tss_segment_16 *tss) | 4482 | struct tss_segment_16 *tss) |
| 4218 | { | 4483 | { |
| 4219 | kvm_rip_write(vcpu, tss->ip); | 4484 | kvm_rip_write(vcpu, tss->ip); |
| 4220 | kvm_x86_ops->set_rflags(vcpu, tss->flag | 2); | 4485 | kvm_set_rflags(vcpu, tss->flag | 2); |
| 4221 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); | 4486 | kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); |
| 4222 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); | 4487 | kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); |
| 4223 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); | 4488 | kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); |
| @@ -4363,8 +4628,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
| 4363 | } | 4628 | } |
| 4364 | 4629 | ||
| 4365 | if (reason == TASK_SWITCH_IRET) { | 4630 | if (reason == TASK_SWITCH_IRET) { |
| 4366 | u32 eflags = kvm_x86_ops->get_rflags(vcpu); | 4631 | u32 eflags = kvm_get_rflags(vcpu); |
| 4367 | kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); | 4632 | kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); |
| 4368 | } | 4633 | } |
| 4369 | 4634 | ||
| 4370 | /* set back link to prev task only if NT bit is set in eflags | 4635 | /* set back link to prev task only if NT bit is set in eflags |
| @@ -4372,11 +4637,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
| 4372 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | 4637 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) |
| 4373 | old_tss_sel = 0xffff; | 4638 | old_tss_sel = 0xffff; |
| 4374 | 4639 | ||
| 4375 | /* set back link to prev task only if NT bit is set in eflags | ||
| 4376 | note that old_tss_sel is not used afetr this point */ | ||
| 4377 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
| 4378 | old_tss_sel = 0xffff; | ||
| 4379 | |||
| 4380 | if (nseg_desc.type & 8) | 4640 | if (nseg_desc.type & 8) |
| 4381 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, | 4641 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, |
| 4382 | old_tss_base, &nseg_desc); | 4642 | old_tss_base, &nseg_desc); |
| @@ -4385,8 +4645,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
| 4385 | old_tss_base, &nseg_desc); | 4645 | old_tss_base, &nseg_desc); |
| 4386 | 4646 | ||
| 4387 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | 4647 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { |
| 4388 | u32 eflags = kvm_x86_ops->get_rflags(vcpu); | 4648 | u32 eflags = kvm_get_rflags(vcpu); |
| 4389 | kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT); | 4649 | kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); |
| 4390 | } | 4650 | } |
| 4391 | 4651 | ||
| 4392 | if (reason != TASK_SWITCH_IRET) { | 4652 | if (reason != TASK_SWITCH_IRET) { |
| @@ -4438,8 +4698,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 4438 | 4698 | ||
| 4439 | mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; | 4699 | mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4; |
| 4440 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 4700 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
| 4441 | if (!is_long_mode(vcpu) && is_pae(vcpu)) | 4701 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
| 4442 | load_pdptrs(vcpu, vcpu->arch.cr3); | 4702 | load_pdptrs(vcpu, vcpu->arch.cr3); |
| 4703 | mmu_reset_needed = 1; | ||
| 4704 | } | ||
| 4443 | 4705 | ||
| 4444 | if (mmu_reset_needed) | 4706 | if (mmu_reset_needed) |
| 4445 | kvm_mmu_reset_context(vcpu); | 4707 | kvm_mmu_reset_context(vcpu); |
| @@ -4480,12 +4742,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
| 4480 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | 4742 | int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, |
| 4481 | struct kvm_guest_debug *dbg) | 4743 | struct kvm_guest_debug *dbg) |
| 4482 | { | 4744 | { |
| 4745 | unsigned long rflags; | ||
| 4483 | int i, r; | 4746 | int i, r; |
| 4484 | 4747 | ||
| 4485 | vcpu_load(vcpu); | 4748 | vcpu_load(vcpu); |
| 4486 | 4749 | ||
| 4487 | if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) == | 4750 | if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) { |
| 4488 | (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) { | 4751 | r = -EBUSY; |
| 4752 | if (vcpu->arch.exception.pending) | ||
| 4753 | goto unlock_out; | ||
| 4754 | if (dbg->control & KVM_GUESTDBG_INJECT_DB) | ||
| 4755 | kvm_queue_exception(vcpu, DB_VECTOR); | ||
| 4756 | else | ||
| 4757 | kvm_queue_exception(vcpu, BP_VECTOR); | ||
| 4758 | } | ||
| 4759 | |||
| 4760 | /* | ||
| 4761 | * Read rflags as long as potentially injected trace flags are still | ||
| 4762 | * filtered out. | ||
| 4763 | */ | ||
| 4764 | rflags = kvm_get_rflags(vcpu); | ||
| 4765 | |||
| 4766 | vcpu->guest_debug = dbg->control; | ||
| 4767 | if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) | ||
| 4768 | vcpu->guest_debug = 0; | ||
| 4769 | |||
| 4770 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { | ||
| 4489 | for (i = 0; i < KVM_NR_DB_REGS; ++i) | 4771 | for (i = 0; i < KVM_NR_DB_REGS; ++i) |
| 4490 | vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; | 4772 | vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; |
| 4491 | vcpu->arch.switch_db_regs = | 4773 | vcpu->arch.switch_db_regs = |
| @@ -4496,13 +4778,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, | |||
| 4496 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); | 4778 | vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); |
| 4497 | } | 4779 | } |
| 4498 | 4780 | ||
| 4499 | r = kvm_x86_ops->set_guest_debug(vcpu, dbg); | 4781 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { |
| 4782 | vcpu->arch.singlestep_cs = | ||
| 4783 | get_segment_selector(vcpu, VCPU_SREG_CS); | ||
| 4784 | vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); | ||
| 4785 | } | ||
| 4786 | |||
| 4787 | /* | ||
| 4788 | * Trigger an rflags update that will inject or remove the trace | ||
| 4789 | * flags. | ||
| 4790 | */ | ||
| 4791 | kvm_set_rflags(vcpu, rflags); | ||
| 4792 | |||
| 4793 | kvm_x86_ops->set_guest_debug(vcpu, dbg); | ||
| 4500 | 4794 | ||
| 4501 | if (dbg->control & KVM_GUESTDBG_INJECT_DB) | 4795 | r = 0; |
| 4502 | kvm_queue_exception(vcpu, DB_VECTOR); | ||
| 4503 | else if (dbg->control & KVM_GUESTDBG_INJECT_BP) | ||
| 4504 | kvm_queue_exception(vcpu, BP_VECTOR); | ||
| 4505 | 4796 | ||
| 4797 | unlock_out: | ||
| 4506 | vcpu_put(vcpu); | 4798 | vcpu_put(vcpu); |
| 4507 | 4799 | ||
| 4508 | return r; | 4800 | return r; |
| @@ -4703,14 +4995,26 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
| 4703 | return kvm_x86_ops->vcpu_reset(vcpu); | 4995 | return kvm_x86_ops->vcpu_reset(vcpu); |
| 4704 | } | 4996 | } |
| 4705 | 4997 | ||
| 4706 | void kvm_arch_hardware_enable(void *garbage) | 4998 | int kvm_arch_hardware_enable(void *garbage) |
| 4707 | { | 4999 | { |
| 4708 | kvm_x86_ops->hardware_enable(garbage); | 5000 | /* |
| 5001 | * Since this may be called from a hotplug notifcation, | ||
| 5002 | * we can't get the CPU frequency directly. | ||
| 5003 | */ | ||
| 5004 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | ||
| 5005 | int cpu = raw_smp_processor_id(); | ||
| 5006 | per_cpu(cpu_tsc_khz, cpu) = 0; | ||
| 5007 | } | ||
| 5008 | |||
| 5009 | kvm_shared_msr_cpu_online(); | ||
| 5010 | |||
| 5011 | return kvm_x86_ops->hardware_enable(garbage); | ||
| 4709 | } | 5012 | } |
| 4710 | 5013 | ||
| 4711 | void kvm_arch_hardware_disable(void *garbage) | 5014 | void kvm_arch_hardware_disable(void *garbage) |
| 4712 | { | 5015 | { |
| 4713 | kvm_x86_ops->hardware_disable(garbage); | 5016 | kvm_x86_ops->hardware_disable(garbage); |
| 5017 | drop_user_return_notifiers(garbage); | ||
| 4714 | } | 5018 | } |
| 4715 | 5019 | ||
| 4716 | int kvm_arch_hardware_setup(void) | 5020 | int kvm_arch_hardware_setup(void) |
| @@ -4948,8 +5252,36 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
| 4948 | return kvm_x86_ops->interrupt_allowed(vcpu); | 5252 | return kvm_x86_ops->interrupt_allowed(vcpu); |
| 4949 | } | 5253 | } |
| 4950 | 5254 | ||
| 5255 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) | ||
| 5256 | { | ||
| 5257 | unsigned long rflags; | ||
| 5258 | |||
| 5259 | rflags = kvm_x86_ops->get_rflags(vcpu); | ||
| 5260 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
| 5261 | rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
| 5262 | return rflags; | ||
| 5263 | } | ||
| 5264 | EXPORT_SYMBOL_GPL(kvm_get_rflags); | ||
| 5265 | |||
| 5266 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||
| 5267 | { | ||
| 5268 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && | ||
| 5269 | vcpu->arch.singlestep_cs == | ||
| 5270 | get_segment_selector(vcpu, VCPU_SREG_CS) && | ||
| 5271 | vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) | ||
| 5272 | rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; | ||
| 5273 | kvm_x86_ops->set_rflags(vcpu, rflags); | ||
| 5274 | } | ||
| 5275 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | ||
| 5276 | |||
| 4951 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); | 5277 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); |
| 4952 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); | 5278 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); |
| 4953 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); | 5279 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); |
| 4954 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); | 5280 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr); |
| 4955 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); | 5281 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr); |
| 5282 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun); | ||
| 5283 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit); | ||
| 5284 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); | ||
| 5285 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); | ||
| 5286 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); | ||
| 5287 | EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); | ||
diff --git a/include/linux/kvm.h b/include/linux/kvm.h index f8f8900fc5ec..2d241da07236 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h | |||
| @@ -14,12 +14,76 @@ | |||
| 14 | 14 | ||
| 15 | #define KVM_API_VERSION 12 | 15 | #define KVM_API_VERSION 12 |
| 16 | 16 | ||
| 17 | /* for KVM_TRACE_ENABLE, deprecated */ | 17 | /* *** Deprecated interfaces *** */ |
| 18 | |||
| 19 | #define KVM_TRC_SHIFT 16 | ||
| 20 | |||
| 21 | #define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) | ||
| 22 | #define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) | ||
| 23 | |||
| 24 | #define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) | ||
| 25 | #define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) | ||
| 26 | #define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) | ||
| 27 | |||
| 28 | #define KVM_TRC_HEAD_SIZE 12 | ||
| 29 | #define KVM_TRC_CYCLE_SIZE 8 | ||
| 30 | #define KVM_TRC_EXTRA_MAX 7 | ||
| 31 | |||
| 32 | #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) | ||
| 33 | #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) | ||
| 34 | #define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) | ||
| 35 | #define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) | ||
| 36 | #define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) | ||
| 37 | #define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) | ||
| 38 | #define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) | ||
| 39 | #define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) | ||
| 40 | #define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) | ||
| 41 | #define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) | ||
| 42 | #define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) | ||
| 43 | #define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) | ||
| 44 | #define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) | ||
| 45 | #define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) | ||
| 46 | #define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) | ||
| 47 | #define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) | ||
| 48 | #define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) | ||
| 49 | #define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) | ||
| 50 | #define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) | ||
| 51 | #define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) | ||
| 52 | #define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) | ||
| 53 | #define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) | ||
| 54 | #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) | ||
| 55 | #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) | ||
| 56 | |||
| 18 | struct kvm_user_trace_setup { | 57 | struct kvm_user_trace_setup { |
| 19 | __u32 buf_size; /* sub_buffer size of each per-cpu */ | 58 | __u32 buf_size; |
| 20 | __u32 buf_nr; /* the number of sub_buffers of each per-cpu */ | 59 | __u32 buf_nr; |
| 60 | }; | ||
| 61 | |||
| 62 | #define __KVM_DEPRECATED_MAIN_W_0x06 \ | ||
| 63 | _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) | ||
| 64 | #define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07) | ||
| 65 | #define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08) | ||
| 66 | |||
| 67 | #define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq) | ||
| 68 | |||
| 69 | struct kvm_breakpoint { | ||
| 70 | __u32 enabled; | ||
| 71 | __u32 padding; | ||
| 72 | __u64 address; | ||
| 73 | }; | ||
| 74 | |||
| 75 | struct kvm_debug_guest { | ||
| 76 | __u32 enabled; | ||
| 77 | __u32 pad; | ||
| 78 | struct kvm_breakpoint breakpoints[4]; | ||
| 79 | __u32 singlestep; | ||
| 21 | }; | 80 | }; |
| 22 | 81 | ||
| 82 | #define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest) | ||
| 83 | |||
| 84 | /* *** End of deprecated interfaces *** */ | ||
| 85 | |||
| 86 | |||
| 23 | /* for KVM_CREATE_MEMORY_REGION */ | 87 | /* for KVM_CREATE_MEMORY_REGION */ |
| 24 | struct kvm_memory_region { | 88 | struct kvm_memory_region { |
| 25 | __u32 slot; | 89 | __u32 slot; |
| @@ -99,6 +163,7 @@ struct kvm_pit_config { | |||
| 99 | 163 | ||
| 100 | /* For KVM_EXIT_INTERNAL_ERROR */ | 164 | /* For KVM_EXIT_INTERNAL_ERROR */ |
| 101 | #define KVM_INTERNAL_ERROR_EMULATION 1 | 165 | #define KVM_INTERNAL_ERROR_EMULATION 1 |
| 166 | #define KVM_INTERNAL_ERROR_SIMUL_EX 2 | ||
| 102 | 167 | ||
| 103 | /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ | 168 | /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ |
| 104 | struct kvm_run { | 169 | struct kvm_run { |
| @@ -116,6 +181,11 @@ struct kvm_run { | |||
| 116 | __u64 cr8; | 181 | __u64 cr8; |
| 117 | __u64 apic_base; | 182 | __u64 apic_base; |
| 118 | 183 | ||
| 184 | #ifdef __KVM_S390 | ||
| 185 | /* the processor status word for s390 */ | ||
| 186 | __u64 psw_mask; /* psw upper half */ | ||
| 187 | __u64 psw_addr; /* psw lower half */ | ||
| 188 | #endif | ||
| 119 | union { | 189 | union { |
| 120 | /* KVM_EXIT_UNKNOWN */ | 190 | /* KVM_EXIT_UNKNOWN */ |
| 121 | struct { | 191 | struct { |
| @@ -167,8 +237,6 @@ struct kvm_run { | |||
| 167 | /* KVM_EXIT_S390_SIEIC */ | 237 | /* KVM_EXIT_S390_SIEIC */ |
| 168 | struct { | 238 | struct { |
| 169 | __u8 icptcode; | 239 | __u8 icptcode; |
| 170 | __u64 mask; /* psw upper half */ | ||
| 171 | __u64 addr; /* psw lower half */ | ||
| 172 | __u16 ipa; | 240 | __u16 ipa; |
| 173 | __u32 ipb; | 241 | __u32 ipb; |
| 174 | } s390_sieic; | 242 | } s390_sieic; |
| @@ -187,6 +255,9 @@ struct kvm_run { | |||
| 187 | } dcr; | 255 | } dcr; |
| 188 | struct { | 256 | struct { |
| 189 | __u32 suberror; | 257 | __u32 suberror; |
| 258 | /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */ | ||
| 259 | __u32 ndata; | ||
| 260 | __u64 data[16]; | ||
| 190 | } internal; | 261 | } internal; |
| 191 | /* Fix the size of the union. */ | 262 | /* Fix the size of the union. */ |
| 192 | char padding[256]; | 263 | char padding[256]; |
| @@ -329,24 +400,6 @@ struct kvm_ioeventfd { | |||
| 329 | __u8 pad[36]; | 400 | __u8 pad[36]; |
| 330 | }; | 401 | }; |
| 331 | 402 | ||
| 332 | #define KVM_TRC_SHIFT 16 | ||
| 333 | /* | ||
| 334 | * kvm trace categories | ||
| 335 | */ | ||
| 336 | #define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) | ||
| 337 | #define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) /* only 12 bits */ | ||
| 338 | |||
| 339 | /* | ||
| 340 | * kvm trace action | ||
| 341 | */ | ||
| 342 | #define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) | ||
| 343 | #define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) | ||
| 344 | #define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) | ||
| 345 | |||
| 346 | #define KVM_TRC_HEAD_SIZE 12 | ||
| 347 | #define KVM_TRC_CYCLE_SIZE 8 | ||
| 348 | #define KVM_TRC_EXTRA_MAX 7 | ||
| 349 | |||
| 350 | #define KVMIO 0xAE | 403 | #define KVMIO 0xAE |
| 351 | 404 | ||
| 352 | /* | 405 | /* |
| @@ -367,12 +420,10 @@ struct kvm_ioeventfd { | |||
| 367 | */ | 420 | */ |
| 368 | #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ | 421 | #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ |
| 369 | #define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) | 422 | #define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) |
| 370 | /* | 423 | #define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 |
| 371 | * ioctls for kvm trace | 424 | #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 |
| 372 | */ | 425 | #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 |
| 373 | #define KVM_TRACE_ENABLE _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) | 426 | |
| 374 | #define KVM_TRACE_PAUSE _IO(KVMIO, 0x07) | ||
| 375 | #define KVM_TRACE_DISABLE _IO(KVMIO, 0x08) | ||
| 376 | /* | 427 | /* |
| 377 | * Extension capability list. | 428 | * Extension capability list. |
| 378 | */ | 429 | */ |
| @@ -436,6 +487,15 @@ struct kvm_ioeventfd { | |||
| 436 | #endif | 487 | #endif |
| 437 | #define KVM_CAP_IOEVENTFD 36 | 488 | #define KVM_CAP_IOEVENTFD 36 |
| 438 | #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 | 489 | #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 |
| 490 | #ifdef __KVM_HAVE_XEN_HVM | ||
| 491 | #define KVM_CAP_XEN_HVM 38 | ||
| 492 | #endif | ||
| 493 | #define KVM_CAP_ADJUST_CLOCK 39 | ||
| 494 | #define KVM_CAP_INTERNAL_ERROR_DATA 40 | ||
| 495 | #ifdef __KVM_HAVE_VCPU_EVENTS | ||
| 496 | #define KVM_CAP_VCPU_EVENTS 41 | ||
| 497 | #endif | ||
| 498 | #define KVM_CAP_S390_PSW 42 | ||
| 439 | 499 | ||
| 440 | #ifdef KVM_CAP_IRQ_ROUTING | 500 | #ifdef KVM_CAP_IRQ_ROUTING |
| 441 | 501 | ||
| @@ -488,6 +548,18 @@ struct kvm_x86_mce { | |||
| 488 | }; | 548 | }; |
| 489 | #endif | 549 | #endif |
| 490 | 550 | ||
| 551 | #ifdef KVM_CAP_XEN_HVM | ||
| 552 | struct kvm_xen_hvm_config { | ||
| 553 | __u32 flags; | ||
| 554 | __u32 msr; | ||
| 555 | __u64 blob_addr_32; | ||
| 556 | __u64 blob_addr_64; | ||
| 557 | __u8 blob_size_32; | ||
| 558 | __u8 blob_size_64; | ||
| 559 | __u8 pad2[30]; | ||
| 560 | }; | ||
| 561 | #endif | ||
| 562 | |||
| 491 | #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) | 563 | #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) |
| 492 | 564 | ||
| 493 | struct kvm_irqfd { | 565 | struct kvm_irqfd { |
| @@ -497,55 +569,66 @@ struct kvm_irqfd { | |||
| 497 | __u8 pad[20]; | 569 | __u8 pad[20]; |
| 498 | }; | 570 | }; |
| 499 | 571 | ||
| 572 | struct kvm_clock_data { | ||
| 573 | __u64 clock; | ||
| 574 | __u32 flags; | ||
| 575 | __u32 pad[9]; | ||
| 576 | }; | ||
| 577 | |||
| 500 | /* | 578 | /* |
| 501 | * ioctls for VM fds | 579 | * ioctls for VM fds |
| 502 | */ | 580 | */ |
| 503 | #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) | 581 | #define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) |
| 504 | /* | 582 | /* |
| 505 | * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns | 583 | * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns |
| 506 | * a vcpu fd. | 584 | * a vcpu fd. |
| 507 | */ | 585 | */ |
| 508 | #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) | 586 | #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) |
| 509 | #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) | 587 | #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) |
| 510 | #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) | 588 | #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) |
| 511 | #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) | 589 | #define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) |
| 512 | #define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) | 590 | #define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) |
| 513 | #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\ | 591 | #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \ |
| 514 | struct kvm_userspace_memory_region) | 592 | struct kvm_userspace_memory_region) |
| 515 | #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) | 593 | #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) |
| 516 | #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) | 594 | #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) |
| 517 | /* Device model IOC */ | 595 | /* Device model IOC */ |
| 518 | #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) | 596 | #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) |
| 519 | #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) | 597 | #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) |
| 520 | #define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) | 598 | #define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) |
| 521 | #define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) | 599 | #define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) |
| 522 | #define KVM_CREATE_PIT _IO(KVMIO, 0x64) | 600 | #define KVM_CREATE_PIT _IO(KVMIO, 0x64) |
| 523 | #define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state) | 601 | #define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state) |
| 524 | #define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state) | 602 | #define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state) |
| 525 | #define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level) | 603 | #define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level) |
| 526 | #define KVM_REGISTER_COALESCED_MMIO \ | 604 | #define KVM_REGISTER_COALESCED_MMIO \ |
| 527 | _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) | 605 | _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) |
| 528 | #define KVM_UNREGISTER_COALESCED_MMIO \ | 606 | #define KVM_UNREGISTER_COALESCED_MMIO \ |
| 529 | _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) | 607 | _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) |
| 530 | #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ | 608 | #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ |
| 531 | struct kvm_assigned_pci_dev) | 609 | struct kvm_assigned_pci_dev) |
| 532 | #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) | 610 | #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) |
| 533 | /* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ | 611 | /* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ |
| 534 | #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ | 612 | #define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70 |
| 535 | struct kvm_assigned_irq) | 613 | #define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) |
| 536 | #define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) | 614 | #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) |
| 537 | #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) | 615 | #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ |
| 538 | #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ | 616 | struct kvm_assigned_pci_dev) |
| 539 | struct kvm_assigned_pci_dev) | 617 | #define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \ |
| 540 | #define KVM_ASSIGN_SET_MSIX_NR \ | 618 | struct kvm_assigned_msix_nr) |
| 541 | _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr) | 619 | #define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \ |
| 542 | #define KVM_ASSIGN_SET_MSIX_ENTRY \ | 620 | struct kvm_assigned_msix_entry) |
| 543 | _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) | 621 | #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) |
| 544 | #define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) | 622 | #define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) |
| 545 | #define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) | 623 | #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) |
| 546 | #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) | 624 | #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) |
| 547 | #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) | 625 | #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) |
| 548 | #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) | 626 | #define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) |
| 627 | #define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) | ||
| 628 | #define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) | ||
| 629 | /* Available with KVM_CAP_PIT_STATE2 */ | ||
| 630 | #define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) | ||
| 631 | #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) | ||
| 549 | 632 | ||
| 550 | /* | 633 | /* |
| 551 | * ioctls for vcpu fds | 634 | * ioctls for vcpu fds |
| @@ -558,7 +641,7 @@ struct kvm_irqfd { | |||
| 558 | #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) | 641 | #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) |
| 559 | #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) | 642 | #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) |
| 560 | /* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ | 643 | /* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ |
| 561 | #define KVM_DEBUG_GUEST __KVM_DEPRECATED_DEBUG_GUEST | 644 | #define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87 |
| 562 | #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) | 645 | #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) |
| 563 | #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) | 646 | #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) |
| 564 | #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) | 647 | #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) |
| @@ -570,7 +653,7 @@ struct kvm_irqfd { | |||
| 570 | #define KVM_SET_CPUID2 _IOW(KVMIO, 0x90, struct kvm_cpuid2) | 653 | #define KVM_SET_CPUID2 _IOW(KVMIO, 0x90, struct kvm_cpuid2) |
| 571 | #define KVM_GET_CPUID2 _IOWR(KVMIO, 0x91, struct kvm_cpuid2) | 654 | #define KVM_GET_CPUID2 _IOWR(KVMIO, 0x91, struct kvm_cpuid2) |
| 572 | /* Available with KVM_CAP_VAPIC */ | 655 | /* Available with KVM_CAP_VAPIC */ |
| 573 | #define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl) | 656 | #define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl) |
| 574 | /* Available with KVM_CAP_VAPIC */ | 657 | /* Available with KVM_CAP_VAPIC */ |
| 575 | #define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr) | 658 | #define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr) |
| 576 | /* valid for virtual machine (for floating interrupt)_and_ vcpu */ | 659 | /* valid for virtual machine (for floating interrupt)_and_ vcpu */ |
| @@ -582,66 +665,23 @@ struct kvm_irqfd { | |||
| 582 | /* initial ipl psw for s390 */ | 665 | /* initial ipl psw for s390 */ |
| 583 | #define KVM_S390_SET_INITIAL_PSW _IOW(KVMIO, 0x96, struct kvm_s390_psw) | 666 | #define KVM_S390_SET_INITIAL_PSW _IOW(KVMIO, 0x96, struct kvm_s390_psw) |
| 584 | /* initial reset for s390 */ | 667 | /* initial reset for s390 */ |
| 585 | #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) | 668 | #define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) |
| 586 | #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) | 669 | #define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) |
| 587 | #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) | 670 | #define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) |
| 588 | /* Available with KVM_CAP_NMI */ | 671 | /* Available with KVM_CAP_NMI */ |
| 589 | #define KVM_NMI _IO(KVMIO, 0x9a) | 672 | #define KVM_NMI _IO(KVMIO, 0x9a) |
| 590 | /* Available with KVM_CAP_SET_GUEST_DEBUG */ | 673 | /* Available with KVM_CAP_SET_GUEST_DEBUG */ |
| 591 | #define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) | 674 | #define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) |
| 592 | /* MCE for x86 */ | 675 | /* MCE for x86 */ |
| 593 | #define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64) | 676 | #define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64) |
| 594 | #define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64) | 677 | #define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64) |
| 595 | #define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce) | 678 | #define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce) |
| 596 | 679 | /* IA64 stack access */ | |
| 597 | /* | ||
| 598 | * Deprecated interfaces | ||
| 599 | */ | ||
| 600 | struct kvm_breakpoint { | ||
| 601 | __u32 enabled; | ||
| 602 | __u32 padding; | ||
| 603 | __u64 address; | ||
| 604 | }; | ||
| 605 | |||
| 606 | struct kvm_debug_guest { | ||
| 607 | __u32 enabled; | ||
| 608 | __u32 pad; | ||
| 609 | struct kvm_breakpoint breakpoints[4]; | ||
| 610 | __u32 singlestep; | ||
| 611 | }; | ||
| 612 | |||
| 613 | #define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest) | ||
| 614 | |||
| 615 | #define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *) | 680 | #define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *) |
| 616 | #define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *) | 681 | #define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *) |
| 617 | 682 | /* Available with KVM_CAP_VCPU_EVENTS */ | |
| 618 | #define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) | 683 | #define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) |
| 619 | #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) | 684 | #define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) |
| 620 | |||
| 621 | #define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) | ||
| 622 | #define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) | ||
| 623 | #define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) | ||
| 624 | #define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) | ||
| 625 | #define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) | ||
| 626 | #define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) | ||
| 627 | #define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) | ||
| 628 | #define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) | ||
| 629 | #define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) | ||
| 630 | #define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) | ||
| 631 | #define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) | ||
| 632 | #define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) | ||
| 633 | #define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) | ||
| 634 | #define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) | ||
| 635 | #define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) | ||
| 636 | #define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) | ||
| 637 | #define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) | ||
| 638 | #define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) | ||
| 639 | #define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) | ||
| 640 | #define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) | ||
| 641 | #define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) | ||
| 642 | #define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) | ||
| 643 | #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) | ||
| 644 | #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) | ||
| 645 | 685 | ||
| 646 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) | 686 | #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) |
| 647 | 687 | ||
| @@ -696,4 +736,4 @@ struct kvm_assigned_msix_entry { | |||
| 696 | __u16 padding[3]; | 736 | __u16 padding[3]; |
| 697 | }; | 737 | }; |
| 698 | 738 | ||
| 699 | #endif | 739 | #endif /* __LINUX_KVM_H */ |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b7bbb5ddd7ae..bd5a616d9373 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
| @@ -120,7 +120,7 @@ struct kvm_kernel_irq_routing_entry { | |||
| 120 | u32 gsi; | 120 | u32 gsi; |
| 121 | u32 type; | 121 | u32 type; |
| 122 | int (*set)(struct kvm_kernel_irq_routing_entry *e, | 122 | int (*set)(struct kvm_kernel_irq_routing_entry *e, |
| 123 | struct kvm *kvm, int level); | 123 | struct kvm *kvm, int irq_source_id, int level); |
| 124 | union { | 124 | union { |
| 125 | struct { | 125 | struct { |
| 126 | unsigned irqchip; | 126 | unsigned irqchip; |
| @@ -128,9 +128,28 @@ struct kvm_kernel_irq_routing_entry { | |||
| 128 | } irqchip; | 128 | } irqchip; |
| 129 | struct msi_msg msi; | 129 | struct msi_msg msi; |
| 130 | }; | 130 | }; |
| 131 | struct list_head link; | 131 | struct hlist_node link; |
| 132 | }; | ||
| 133 | |||
| 134 | #ifdef __KVM_HAVE_IOAPIC | ||
| 135 | |||
| 136 | struct kvm_irq_routing_table { | ||
| 137 | int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS]; | ||
| 138 | struct kvm_kernel_irq_routing_entry *rt_entries; | ||
| 139 | u32 nr_rt_entries; | ||
| 140 | /* | ||
| 141 | * Array indexed by gsi. Each entry contains list of irq chips | ||
| 142 | * the gsi is connected to. | ||
| 143 | */ | ||
| 144 | struct hlist_head map[0]; | ||
| 132 | }; | 145 | }; |
| 133 | 146 | ||
| 147 | #else | ||
| 148 | |||
| 149 | struct kvm_irq_routing_table {}; | ||
| 150 | |||
| 151 | #endif | ||
| 152 | |||
| 134 | struct kvm { | 153 | struct kvm { |
| 135 | spinlock_t mmu_lock; | 154 | spinlock_t mmu_lock; |
| 136 | spinlock_t requests_lock; | 155 | spinlock_t requests_lock; |
| @@ -166,8 +185,9 @@ struct kvm { | |||
| 166 | 185 | ||
| 167 | struct mutex irq_lock; | 186 | struct mutex irq_lock; |
| 168 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 187 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
| 169 | struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */ | 188 | struct kvm_irq_routing_table *irq_routing; |
| 170 | struct hlist_head mask_notifier_list; | 189 | struct hlist_head mask_notifier_list; |
| 190 | struct hlist_head irq_ack_notifier_list; | ||
| 171 | #endif | 191 | #endif |
| 172 | 192 | ||
| 173 | #ifdef KVM_ARCH_WANT_MMU_NOTIFIER | 193 | #ifdef KVM_ARCH_WANT_MMU_NOTIFIER |
| @@ -266,6 +286,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); | |||
| 266 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn); | 286 | void mark_page_dirty(struct kvm *kvm, gfn_t gfn); |
| 267 | 287 | ||
| 268 | void kvm_vcpu_block(struct kvm_vcpu *vcpu); | 288 | void kvm_vcpu_block(struct kvm_vcpu *vcpu); |
| 289 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); | ||
| 269 | void kvm_resched(struct kvm_vcpu *vcpu); | 290 | void kvm_resched(struct kvm_vcpu *vcpu); |
| 270 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); | 291 | void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); |
| 271 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); | 292 | void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); |
| @@ -325,7 +346,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); | |||
| 325 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); | 346 | void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); |
| 326 | 347 | ||
| 327 | int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu); | 348 | int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu); |
| 328 | void kvm_arch_hardware_enable(void *garbage); | 349 | int kvm_arch_hardware_enable(void *garbage); |
| 329 | void kvm_arch_hardware_disable(void *garbage); | 350 | void kvm_arch_hardware_disable(void *garbage); |
| 330 | int kvm_arch_hardware_setup(void); | 351 | int kvm_arch_hardware_setup(void); |
| 331 | void kvm_arch_hardware_unsetup(void); | 352 | void kvm_arch_hardware_unsetup(void); |
| @@ -390,7 +411,12 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, | |||
| 390 | struct kvm_irq_mask_notifier *kimn); | 411 | struct kvm_irq_mask_notifier *kimn); |
| 391 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); | 412 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask); |
| 392 | 413 | ||
| 393 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level); | 414 | #ifdef __KVM_HAVE_IOAPIC |
| 415 | void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, | ||
| 416 | union kvm_ioapic_redirect_entry *entry, | ||
| 417 | unsigned long *deliver_bitmask); | ||
| 418 | #endif | ||
| 419 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level); | ||
| 394 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); | 420 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); |
| 395 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | 421 | void kvm_register_irq_ack_notifier(struct kvm *kvm, |
| 396 | struct kvm_irq_ack_notifier *kian); | 422 | struct kvm_irq_ack_notifier *kian); |
| @@ -552,4 +578,21 @@ static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) | |||
| 552 | return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; | 578 | return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; |
| 553 | } | 579 | } |
| 554 | #endif | 580 | #endif |
| 581 | |||
| 582 | #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT | ||
| 583 | |||
| 584 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | ||
| 585 | unsigned long arg); | ||
| 586 | |||
| 587 | #else | ||
| 588 | |||
| 589 | static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | ||
| 590 | unsigned long arg) | ||
| 591 | { | ||
| 592 | return -ENOTTY; | ||
| 593 | } | ||
| 594 | |||
| 555 | #endif | 595 | #endif |
| 596 | |||
| 597 | #endif | ||
| 598 | |||
diff --git a/include/linux/user-return-notifier.h b/include/linux/user-return-notifier.h new file mode 100644 index 000000000000..9c4a445bb43c --- /dev/null +++ b/include/linux/user-return-notifier.h | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | #ifndef _LINUX_USER_RETURN_NOTIFIER_H | ||
| 2 | #define _LINUX_USER_RETURN_NOTIFIER_H | ||
| 3 | |||
| 4 | #ifdef CONFIG_USER_RETURN_NOTIFIER | ||
| 5 | |||
| 6 | #include <linux/list.h> | ||
| 7 | #include <linux/sched.h> | ||
| 8 | |||
| 9 | struct user_return_notifier { | ||
| 10 | void (*on_user_return)(struct user_return_notifier *urn); | ||
| 11 | struct hlist_node link; | ||
| 12 | }; | ||
| 13 | |||
| 14 | |||
| 15 | void user_return_notifier_register(struct user_return_notifier *urn); | ||
| 16 | void user_return_notifier_unregister(struct user_return_notifier *urn); | ||
| 17 | |||
| 18 | static inline void propagate_user_return_notify(struct task_struct *prev, | ||
| 19 | struct task_struct *next) | ||
| 20 | { | ||
| 21 | if (test_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY)) { | ||
| 22 | clear_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY); | ||
| 23 | set_tsk_thread_flag(next, TIF_USER_RETURN_NOTIFY); | ||
| 24 | } | ||
| 25 | } | ||
| 26 | |||
| 27 | void fire_user_return_notifiers(void); | ||
| 28 | |||
| 29 | static inline void clear_user_return_notifier(struct task_struct *p) | ||
| 30 | { | ||
| 31 | clear_tsk_thread_flag(p, TIF_USER_RETURN_NOTIFY); | ||
| 32 | } | ||
| 33 | |||
| 34 | #else | ||
| 35 | |||
| 36 | struct user_return_notifier {}; | ||
| 37 | |||
| 38 | static inline void propagate_user_return_notify(struct task_struct *prev, | ||
| 39 | struct task_struct *next) | ||
| 40 | { | ||
| 41 | } | ||
| 42 | |||
| 43 | static inline void fire_user_return_notifiers(void) {} | ||
| 44 | |||
| 45 | static inline void clear_user_return_notifier(struct task_struct *p) {} | ||
| 46 | |||
| 47 | #endif | ||
| 48 | |||
| 49 | #endif | ||
diff --git a/kernel/Makefile b/kernel/Makefile index 9943202b4355..864ff75d65f2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -99,6 +99,7 @@ obj-$(CONFIG_SLOW_WORK) += slow-work.o | |||
| 99 | obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o | 99 | obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o |
| 100 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o | 100 | obj-$(CONFIG_PERF_EVENTS) += perf_event.o |
| 101 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o | 101 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o |
| 102 | obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o | ||
| 102 | 103 | ||
| 103 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 104 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
| 104 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 105 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/fork.c b/kernel/fork.c index 3d6f121bbe8a..edeff9ceaab9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -64,6 +64,7 @@ | |||
| 64 | #include <linux/magic.h> | 64 | #include <linux/magic.h> |
| 65 | #include <linux/perf_event.h> | 65 | #include <linux/perf_event.h> |
| 66 | #include <linux/posix-timers.h> | 66 | #include <linux/posix-timers.h> |
| 67 | #include <linux/user-return-notifier.h> | ||
| 67 | 68 | ||
| 68 | #include <asm/pgtable.h> | 69 | #include <asm/pgtable.h> |
| 69 | #include <asm/pgalloc.h> | 70 | #include <asm/pgalloc.h> |
| @@ -249,6 +250,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 249 | goto out; | 250 | goto out; |
| 250 | 251 | ||
| 251 | setup_thread_stack(tsk, orig); | 252 | setup_thread_stack(tsk, orig); |
| 253 | clear_user_return_notifier(tsk); | ||
| 252 | stackend = end_of_stack(tsk); | 254 | stackend = end_of_stack(tsk); |
| 253 | *stackend = STACK_END_MAGIC; /* for overflow detection */ | 255 | *stackend = STACK_END_MAGIC; /* for overflow detection */ |
| 254 | 256 | ||
diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c new file mode 100644 index 000000000000..03e2d6fd9b18 --- /dev/null +++ b/kernel/user-return-notifier.c | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | |||
| 2 | #include <linux/user-return-notifier.h> | ||
| 3 | #include <linux/percpu.h> | ||
| 4 | #include <linux/sched.h> | ||
| 5 | #include <linux/module.h> | ||
| 6 | |||
| 7 | static DEFINE_PER_CPU(struct hlist_head, return_notifier_list); | ||
| 8 | |||
| 9 | #define URN_LIST_HEAD per_cpu(return_notifier_list, raw_smp_processor_id()) | ||
| 10 | |||
| 11 | /* | ||
| 12 | * Request a notification when the current cpu returns to userspace. Must be | ||
| 13 | * called in atomic context. The notifier will also be called in atomic | ||
| 14 | * context. | ||
| 15 | */ | ||
| 16 | void user_return_notifier_register(struct user_return_notifier *urn) | ||
| 17 | { | ||
| 18 | set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); | ||
| 19 | hlist_add_head(&urn->link, &URN_LIST_HEAD); | ||
| 20 | } | ||
| 21 | EXPORT_SYMBOL_GPL(user_return_notifier_register); | ||
| 22 | |||
| 23 | /* | ||
| 24 | * Removes a registered user return notifier. Must be called from atomic | ||
| 25 | * context, and from the same cpu registration occured in. | ||
| 26 | */ | ||
| 27 | void user_return_notifier_unregister(struct user_return_notifier *urn) | ||
| 28 | { | ||
| 29 | hlist_del(&urn->link); | ||
| 30 | if (hlist_empty(&URN_LIST_HEAD)) | ||
| 31 | clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY); | ||
| 32 | } | ||
| 33 | EXPORT_SYMBOL_GPL(user_return_notifier_unregister); | ||
| 34 | |||
| 35 | /* Calls registered user return notifiers */ | ||
| 36 | void fire_user_return_notifiers(void) | ||
| 37 | { | ||
| 38 | struct user_return_notifier *urn; | ||
| 39 | struct hlist_node *tmp1, *tmp2; | ||
| 40 | struct hlist_head *head; | ||
| 41 | |||
| 42 | head = &get_cpu_var(return_notifier_list); | ||
| 43 | hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link) | ||
| 44 | urn->on_user_return(urn); | ||
| 45 | put_cpu_var(return_notifier_list); | ||
| 46 | } | ||
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c new file mode 100644 index 000000000000..fd9c097b760a --- /dev/null +++ b/virt/kvm/assigned-dev.c | |||
| @@ -0,0 +1,818 @@ | |||
| 1 | /* | ||
| 2 | * Kernel-based Virtual Machine - device assignment support | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-9 Red Hat, Inc | ||
| 5 | * | ||
| 6 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
| 7 | * the COPYING file in the top-level directory. | ||
| 8 | * | ||
| 9 | */ | ||
| 10 | |||
| 11 | #include <linux/kvm_host.h> | ||
| 12 | #include <linux/kvm.h> | ||
| 13 | #include <linux/uaccess.h> | ||
| 14 | #include <linux/vmalloc.h> | ||
| 15 | #include <linux/errno.h> | ||
| 16 | #include <linux/spinlock.h> | ||
| 17 | #include <linux/pci.h> | ||
| 18 | #include <linux/interrupt.h> | ||
| 19 | #include "irq.h" | ||
| 20 | |||
| 21 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | ||
| 22 | int assigned_dev_id) | ||
| 23 | { | ||
| 24 | struct list_head *ptr; | ||
| 25 | struct kvm_assigned_dev_kernel *match; | ||
| 26 | |||
| 27 | list_for_each(ptr, head) { | ||
| 28 | match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); | ||
| 29 | if (match->assigned_dev_id == assigned_dev_id) | ||
| 30 | return match; | ||
| 31 | } | ||
| 32 | return NULL; | ||
| 33 | } | ||
| 34 | |||
| 35 | static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | ||
| 36 | *assigned_dev, int irq) | ||
| 37 | { | ||
| 38 | int i, index; | ||
| 39 | struct msix_entry *host_msix_entries; | ||
| 40 | |||
| 41 | host_msix_entries = assigned_dev->host_msix_entries; | ||
| 42 | |||
| 43 | index = -1; | ||
| 44 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
| 45 | if (irq == host_msix_entries[i].vector) { | ||
| 46 | index = i; | ||
| 47 | break; | ||
| 48 | } | ||
| 49 | if (index < 0) { | ||
| 50 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); | ||
| 51 | return 0; | ||
| 52 | } | ||
| 53 | |||
| 54 | return index; | ||
| 55 | } | ||
| 56 | |||
| 57 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | ||
| 58 | { | ||
| 59 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
| 60 | struct kvm *kvm; | ||
| 61 | int i; | ||
| 62 | |||
| 63 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | ||
| 64 | interrupt_work); | ||
| 65 | kvm = assigned_dev->kvm; | ||
| 66 | |||
| 67 | spin_lock_irq(&assigned_dev->assigned_dev_lock); | ||
| 68 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
| 69 | struct kvm_guest_msix_entry *guest_entries = | ||
| 70 | assigned_dev->guest_msix_entries; | ||
| 71 | for (i = 0; i < assigned_dev->entries_nr; i++) { | ||
| 72 | if (!(guest_entries[i].flags & | ||
| 73 | KVM_ASSIGNED_MSIX_PENDING)) | ||
| 74 | continue; | ||
| 75 | guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; | ||
| 76 | kvm_set_irq(assigned_dev->kvm, | ||
| 77 | assigned_dev->irq_source_id, | ||
| 78 | guest_entries[i].vector, 1); | ||
| 79 | } | ||
| 80 | } else | ||
| 81 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | ||
| 82 | assigned_dev->guest_irq, 1); | ||
| 83 | |||
| 84 | spin_unlock_irq(&assigned_dev->assigned_dev_lock); | ||
| 85 | } | ||
| 86 | |||
| 87 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | ||
| 88 | { | ||
| 89 | unsigned long flags; | ||
| 90 | struct kvm_assigned_dev_kernel *assigned_dev = | ||
| 91 | (struct kvm_assigned_dev_kernel *) dev_id; | ||
| 92 | |||
| 93 | spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); | ||
| 94 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
| 95 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
| 96 | if (index < 0) | ||
| 97 | goto out; | ||
| 98 | assigned_dev->guest_msix_entries[index].flags |= | ||
| 99 | KVM_ASSIGNED_MSIX_PENDING; | ||
| 100 | } | ||
| 101 | |||
| 102 | schedule_work(&assigned_dev->interrupt_work); | ||
| 103 | |||
| 104 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { | ||
| 105 | disable_irq_nosync(irq); | ||
| 106 | assigned_dev->host_irq_disabled = true; | ||
| 107 | } | ||
| 108 | |||
| 109 | out: | ||
| 110 | spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); | ||
| 111 | return IRQ_HANDLED; | ||
| 112 | } | ||
| 113 | |||
| 114 | /* Ack the irq line for an assigned device */ | ||
| 115 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | ||
| 116 | { | ||
| 117 | struct kvm_assigned_dev_kernel *dev; | ||
| 118 | unsigned long flags; | ||
| 119 | |||
| 120 | if (kian->gsi == -1) | ||
| 121 | return; | ||
| 122 | |||
| 123 | dev = container_of(kian, struct kvm_assigned_dev_kernel, | ||
| 124 | ack_notifier); | ||
| 125 | |||
| 126 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | ||
| 127 | |||
| 128 | /* The guest irq may be shared so this ack may be | ||
| 129 | * from another device. | ||
| 130 | */ | ||
| 131 | spin_lock_irqsave(&dev->assigned_dev_lock, flags); | ||
| 132 | if (dev->host_irq_disabled) { | ||
| 133 | enable_irq(dev->host_irq); | ||
| 134 | dev->host_irq_disabled = false; | ||
| 135 | } | ||
| 136 | spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); | ||
| 137 | } | ||
| 138 | |||
| 139 | static void deassign_guest_irq(struct kvm *kvm, | ||
| 140 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
| 141 | { | ||
| 142 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); | ||
| 143 | assigned_dev->ack_notifier.gsi = -1; | ||
| 144 | |||
| 145 | if (assigned_dev->irq_source_id != -1) | ||
| 146 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | ||
| 147 | assigned_dev->irq_source_id = -1; | ||
| 148 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); | ||
| 149 | } | ||
| 150 | |||
| 151 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ | ||
| 152 | static void deassign_host_irq(struct kvm *kvm, | ||
| 153 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
| 154 | { | ||
| 155 | /* | ||
| 156 | * In kvm_free_device_irq, cancel_work_sync return true if: | ||
| 157 | * 1. work is scheduled, and then cancelled. | ||
| 158 | * 2. work callback is executed. | ||
| 159 | * | ||
| 160 | * The first one ensured that the irq is disabled and no more events | ||
| 161 | * would happen. But for the second one, the irq may be enabled (e.g. | ||
| 162 | * for MSI). So we disable irq here to prevent further events. | ||
| 163 | * | ||
| 164 | * Notice this maybe result in nested disable if the interrupt type is | ||
| 165 | * INTx, but it's OK for we are going to free it. | ||
| 166 | * | ||
| 167 | * If this function is a part of VM destroy, please ensure that till | ||
| 168 | * now, the kvm state is still legal for probably we also have to wait | ||
| 169 | * interrupt_work done. | ||
| 170 | */ | ||
| 171 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
| 172 | int i; | ||
| 173 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
| 174 | disable_irq_nosync(assigned_dev-> | ||
| 175 | host_msix_entries[i].vector); | ||
| 176 | |||
| 177 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
| 178 | |||
| 179 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
| 180 | free_irq(assigned_dev->host_msix_entries[i].vector, | ||
| 181 | (void *)assigned_dev); | ||
| 182 | |||
| 183 | assigned_dev->entries_nr = 0; | ||
| 184 | kfree(assigned_dev->host_msix_entries); | ||
| 185 | kfree(assigned_dev->guest_msix_entries); | ||
| 186 | pci_disable_msix(assigned_dev->dev); | ||
| 187 | } else { | ||
| 188 | /* Deal with MSI and INTx */ | ||
| 189 | disable_irq_nosync(assigned_dev->host_irq); | ||
| 190 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
| 191 | |||
| 192 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | ||
| 193 | |||
| 194 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | ||
| 195 | pci_disable_msi(assigned_dev->dev); | ||
| 196 | } | ||
| 197 | |||
| 198 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); | ||
| 199 | } | ||
| 200 | |||
| 201 | static int kvm_deassign_irq(struct kvm *kvm, | ||
| 202 | struct kvm_assigned_dev_kernel *assigned_dev, | ||
| 203 | unsigned long irq_requested_type) | ||
| 204 | { | ||
| 205 | unsigned long guest_irq_type, host_irq_type; | ||
| 206 | |||
| 207 | if (!irqchip_in_kernel(kvm)) | ||
| 208 | return -EINVAL; | ||
| 209 | /* no irq assignment to deassign */ | ||
| 210 | if (!assigned_dev->irq_requested_type) | ||
| 211 | return -ENXIO; | ||
| 212 | |||
| 213 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; | ||
| 214 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; | ||
| 215 | |||
| 216 | if (host_irq_type) | ||
| 217 | deassign_host_irq(kvm, assigned_dev); | ||
| 218 | if (guest_irq_type) | ||
| 219 | deassign_guest_irq(kvm, assigned_dev); | ||
| 220 | |||
| 221 | return 0; | ||
| 222 | } | ||
| 223 | |||
| 224 | static void kvm_free_assigned_irq(struct kvm *kvm, | ||
| 225 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
| 226 | { | ||
| 227 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); | ||
| 228 | } | ||
| 229 | |||
| 230 | static void kvm_free_assigned_device(struct kvm *kvm, | ||
| 231 | struct kvm_assigned_dev_kernel | ||
| 232 | *assigned_dev) | ||
| 233 | { | ||
| 234 | kvm_free_assigned_irq(kvm, assigned_dev); | ||
| 235 | |||
| 236 | pci_reset_function(assigned_dev->dev); | ||
| 237 | |||
| 238 | pci_release_regions(assigned_dev->dev); | ||
| 239 | pci_disable_device(assigned_dev->dev); | ||
| 240 | pci_dev_put(assigned_dev->dev); | ||
| 241 | |||
| 242 | list_del(&assigned_dev->list); | ||
| 243 | kfree(assigned_dev); | ||
| 244 | } | ||
| 245 | |||
| 246 | void kvm_free_all_assigned_devices(struct kvm *kvm) | ||
| 247 | { | ||
| 248 | struct list_head *ptr, *ptr2; | ||
| 249 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
| 250 | |||
| 251 | list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { | ||
| 252 | assigned_dev = list_entry(ptr, | ||
| 253 | struct kvm_assigned_dev_kernel, | ||
| 254 | list); | ||
| 255 | |||
| 256 | kvm_free_assigned_device(kvm, assigned_dev); | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | static int assigned_device_enable_host_intx(struct kvm *kvm, | ||
| 261 | struct kvm_assigned_dev_kernel *dev) | ||
| 262 | { | ||
| 263 | dev->host_irq = dev->dev->irq; | ||
| 264 | /* Even though this is PCI, we don't want to use shared | ||
| 265 | * interrupts. Sharing host devices with guest-assigned devices | ||
| 266 | * on the same interrupt line is not a happy situation: there | ||
| 267 | * are going to be long delays in accepting, acking, etc. | ||
| 268 | */ | ||
| 269 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, | ||
| 270 | 0, "kvm_assigned_intx_device", (void *)dev)) | ||
| 271 | return -EIO; | ||
| 272 | return 0; | ||
| 273 | } | ||
| 274 | |||
| 275 | #ifdef __KVM_HAVE_MSI | ||
| 276 | static int assigned_device_enable_host_msi(struct kvm *kvm, | ||
| 277 | struct kvm_assigned_dev_kernel *dev) | ||
| 278 | { | ||
| 279 | int r; | ||
| 280 | |||
| 281 | if (!dev->dev->msi_enabled) { | ||
| 282 | r = pci_enable_msi(dev->dev); | ||
| 283 | if (r) | ||
| 284 | return r; | ||
| 285 | } | ||
| 286 | |||
| 287 | dev->host_irq = dev->dev->irq; | ||
| 288 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, | ||
| 289 | "kvm_assigned_msi_device", (void *)dev)) { | ||
| 290 | pci_disable_msi(dev->dev); | ||
| 291 | return -EIO; | ||
| 292 | } | ||
| 293 | |||
| 294 | return 0; | ||
| 295 | } | ||
| 296 | #endif | ||
| 297 | |||
| 298 | #ifdef __KVM_HAVE_MSIX | ||
| 299 | static int assigned_device_enable_host_msix(struct kvm *kvm, | ||
| 300 | struct kvm_assigned_dev_kernel *dev) | ||
| 301 | { | ||
| 302 | int i, r = -EINVAL; | ||
| 303 | |||
| 304 | /* host_msix_entries and guest_msix_entries should have been | ||
| 305 | * initialized */ | ||
| 306 | if (dev->entries_nr == 0) | ||
| 307 | return r; | ||
| 308 | |||
| 309 | r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); | ||
| 310 | if (r) | ||
| 311 | return r; | ||
| 312 | |||
| 313 | for (i = 0; i < dev->entries_nr; i++) { | ||
| 314 | r = request_irq(dev->host_msix_entries[i].vector, | ||
| 315 | kvm_assigned_dev_intr, 0, | ||
| 316 | "kvm_assigned_msix_device", | ||
| 317 | (void *)dev); | ||
| 318 | /* FIXME: free requested_irq's on failure */ | ||
| 319 | if (r) | ||
| 320 | return r; | ||
| 321 | } | ||
| 322 | |||
| 323 | return 0; | ||
| 324 | } | ||
| 325 | |||
| 326 | #endif | ||
| 327 | |||
| 328 | static int assigned_device_enable_guest_intx(struct kvm *kvm, | ||
| 329 | struct kvm_assigned_dev_kernel *dev, | ||
| 330 | struct kvm_assigned_irq *irq) | ||
| 331 | { | ||
| 332 | dev->guest_irq = irq->guest_irq; | ||
| 333 | dev->ack_notifier.gsi = irq->guest_irq; | ||
| 334 | return 0; | ||
| 335 | } | ||
| 336 | |||
| 337 | #ifdef __KVM_HAVE_MSI | ||
| 338 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | ||
| 339 | struct kvm_assigned_dev_kernel *dev, | ||
| 340 | struct kvm_assigned_irq *irq) | ||
| 341 | { | ||
| 342 | dev->guest_irq = irq->guest_irq; | ||
| 343 | dev->ack_notifier.gsi = -1; | ||
| 344 | dev->host_irq_disabled = false; | ||
| 345 | return 0; | ||
| 346 | } | ||
| 347 | #endif | ||
| 348 | |||
| 349 | #ifdef __KVM_HAVE_MSIX | ||
| 350 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | ||
| 351 | struct kvm_assigned_dev_kernel *dev, | ||
| 352 | struct kvm_assigned_irq *irq) | ||
| 353 | { | ||
| 354 | dev->guest_irq = irq->guest_irq; | ||
| 355 | dev->ack_notifier.gsi = -1; | ||
| 356 | dev->host_irq_disabled = false; | ||
| 357 | return 0; | ||
| 358 | } | ||
| 359 | #endif | ||
| 360 | |||
| 361 | static int assign_host_irq(struct kvm *kvm, | ||
| 362 | struct kvm_assigned_dev_kernel *dev, | ||
| 363 | __u32 host_irq_type) | ||
| 364 | { | ||
| 365 | int r = -EEXIST; | ||
| 366 | |||
| 367 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | ||
| 368 | return r; | ||
| 369 | |||
| 370 | switch (host_irq_type) { | ||
| 371 | case KVM_DEV_IRQ_HOST_INTX: | ||
| 372 | r = assigned_device_enable_host_intx(kvm, dev); | ||
| 373 | break; | ||
| 374 | #ifdef __KVM_HAVE_MSI | ||
| 375 | case KVM_DEV_IRQ_HOST_MSI: | ||
| 376 | r = assigned_device_enable_host_msi(kvm, dev); | ||
| 377 | break; | ||
| 378 | #endif | ||
| 379 | #ifdef __KVM_HAVE_MSIX | ||
| 380 | case KVM_DEV_IRQ_HOST_MSIX: | ||
| 381 | r = assigned_device_enable_host_msix(kvm, dev); | ||
| 382 | break; | ||
| 383 | #endif | ||
| 384 | default: | ||
| 385 | r = -EINVAL; | ||
| 386 | } | ||
| 387 | |||
| 388 | if (!r) | ||
| 389 | dev->irq_requested_type |= host_irq_type; | ||
| 390 | |||
| 391 | return r; | ||
| 392 | } | ||
| 393 | |||
| 394 | static int assign_guest_irq(struct kvm *kvm, | ||
| 395 | struct kvm_assigned_dev_kernel *dev, | ||
| 396 | struct kvm_assigned_irq *irq, | ||
| 397 | unsigned long guest_irq_type) | ||
| 398 | { | ||
| 399 | int id; | ||
| 400 | int r = -EEXIST; | ||
| 401 | |||
| 402 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) | ||
| 403 | return r; | ||
| 404 | |||
| 405 | id = kvm_request_irq_source_id(kvm); | ||
| 406 | if (id < 0) | ||
| 407 | return id; | ||
| 408 | |||
| 409 | dev->irq_source_id = id; | ||
| 410 | |||
| 411 | switch (guest_irq_type) { | ||
| 412 | case KVM_DEV_IRQ_GUEST_INTX: | ||
| 413 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | ||
| 414 | break; | ||
| 415 | #ifdef __KVM_HAVE_MSI | ||
| 416 | case KVM_DEV_IRQ_GUEST_MSI: | ||
| 417 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | ||
| 418 | break; | ||
| 419 | #endif | ||
| 420 | #ifdef __KVM_HAVE_MSIX | ||
| 421 | case KVM_DEV_IRQ_GUEST_MSIX: | ||
| 422 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | ||
| 423 | break; | ||
| 424 | #endif | ||
| 425 | default: | ||
| 426 | r = -EINVAL; | ||
| 427 | } | ||
| 428 | |||
| 429 | if (!r) { | ||
| 430 | dev->irq_requested_type |= guest_irq_type; | ||
| 431 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
| 432 | } else | ||
| 433 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | ||
| 434 | |||
| 435 | return r; | ||
| 436 | } | ||
| 437 | |||
| 438 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | ||
| 439 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | ||
| 440 | struct kvm_assigned_irq *assigned_irq) | ||
| 441 | { | ||
| 442 | int r = -EINVAL; | ||
| 443 | struct kvm_assigned_dev_kernel *match; | ||
| 444 | unsigned long host_irq_type, guest_irq_type; | ||
| 445 | |||
| 446 | if (!capable(CAP_SYS_RAWIO)) | ||
| 447 | return -EPERM; | ||
| 448 | |||
| 449 | if (!irqchip_in_kernel(kvm)) | ||
| 450 | return r; | ||
| 451 | |||
| 452 | mutex_lock(&kvm->lock); | ||
| 453 | r = -ENODEV; | ||
| 454 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 455 | assigned_irq->assigned_dev_id); | ||
| 456 | if (!match) | ||
| 457 | goto out; | ||
| 458 | |||
| 459 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); | ||
| 460 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); | ||
| 461 | |||
| 462 | r = -EINVAL; | ||
| 463 | /* can only assign one type at a time */ | ||
| 464 | if (hweight_long(host_irq_type) > 1) | ||
| 465 | goto out; | ||
| 466 | if (hweight_long(guest_irq_type) > 1) | ||
| 467 | goto out; | ||
| 468 | if (host_irq_type == 0 && guest_irq_type == 0) | ||
| 469 | goto out; | ||
| 470 | |||
| 471 | r = 0; | ||
| 472 | if (host_irq_type) | ||
| 473 | r = assign_host_irq(kvm, match, host_irq_type); | ||
| 474 | if (r) | ||
| 475 | goto out; | ||
| 476 | |||
| 477 | if (guest_irq_type) | ||
| 478 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); | ||
| 479 | out: | ||
| 480 | mutex_unlock(&kvm->lock); | ||
| 481 | return r; | ||
| 482 | } | ||
| 483 | |||
| 484 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | ||
| 485 | struct kvm_assigned_irq | ||
| 486 | *assigned_irq) | ||
| 487 | { | ||
| 488 | int r = -ENODEV; | ||
| 489 | struct kvm_assigned_dev_kernel *match; | ||
| 490 | |||
| 491 | mutex_lock(&kvm->lock); | ||
| 492 | |||
| 493 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 494 | assigned_irq->assigned_dev_id); | ||
| 495 | if (!match) | ||
| 496 | goto out; | ||
| 497 | |||
| 498 | r = kvm_deassign_irq(kvm, match, assigned_irq->flags); | ||
| 499 | out: | ||
| 500 | mutex_unlock(&kvm->lock); | ||
| 501 | return r; | ||
| 502 | } | ||
| 503 | |||
| 504 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | ||
| 505 | struct kvm_assigned_pci_dev *assigned_dev) | ||
| 506 | { | ||
| 507 | int r = 0; | ||
| 508 | struct kvm_assigned_dev_kernel *match; | ||
| 509 | struct pci_dev *dev; | ||
| 510 | |||
| 511 | down_read(&kvm->slots_lock); | ||
| 512 | mutex_lock(&kvm->lock); | ||
| 513 | |||
| 514 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 515 | assigned_dev->assigned_dev_id); | ||
| 516 | if (match) { | ||
| 517 | /* device already assigned */ | ||
| 518 | r = -EEXIST; | ||
| 519 | goto out; | ||
| 520 | } | ||
| 521 | |||
| 522 | match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); | ||
| 523 | if (match == NULL) { | ||
| 524 | printk(KERN_INFO "%s: Couldn't allocate memory\n", | ||
| 525 | __func__); | ||
| 526 | r = -ENOMEM; | ||
| 527 | goto out; | ||
| 528 | } | ||
| 529 | dev = pci_get_bus_and_slot(assigned_dev->busnr, | ||
| 530 | assigned_dev->devfn); | ||
| 531 | if (!dev) { | ||
| 532 | printk(KERN_INFO "%s: host device not found\n", __func__); | ||
| 533 | r = -EINVAL; | ||
| 534 | goto out_free; | ||
| 535 | } | ||
| 536 | if (pci_enable_device(dev)) { | ||
| 537 | printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); | ||
| 538 | r = -EBUSY; | ||
| 539 | goto out_put; | ||
| 540 | } | ||
| 541 | r = pci_request_regions(dev, "kvm_assigned_device"); | ||
| 542 | if (r) { | ||
| 543 | printk(KERN_INFO "%s: Could not get access to device regions\n", | ||
| 544 | __func__); | ||
| 545 | goto out_disable; | ||
| 546 | } | ||
| 547 | |||
| 548 | pci_reset_function(dev); | ||
| 549 | |||
| 550 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | ||
| 551 | match->host_busnr = assigned_dev->busnr; | ||
| 552 | match->host_devfn = assigned_dev->devfn; | ||
| 553 | match->flags = assigned_dev->flags; | ||
| 554 | match->dev = dev; | ||
| 555 | spin_lock_init(&match->assigned_dev_lock); | ||
| 556 | match->irq_source_id = -1; | ||
| 557 | match->kvm = kvm; | ||
| 558 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | ||
| 559 | INIT_WORK(&match->interrupt_work, | ||
| 560 | kvm_assigned_dev_interrupt_work_handler); | ||
| 561 | |||
| 562 | list_add(&match->list, &kvm->arch.assigned_dev_head); | ||
| 563 | |||
| 564 | if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { | ||
| 565 | if (!kvm->arch.iommu_domain) { | ||
| 566 | r = kvm_iommu_map_guest(kvm); | ||
| 567 | if (r) | ||
| 568 | goto out_list_del; | ||
| 569 | } | ||
| 570 | r = kvm_assign_device(kvm, match); | ||
| 571 | if (r) | ||
| 572 | goto out_list_del; | ||
| 573 | } | ||
| 574 | |||
| 575 | out: | ||
| 576 | mutex_unlock(&kvm->lock); | ||
| 577 | up_read(&kvm->slots_lock); | ||
| 578 | return r; | ||
| 579 | out_list_del: | ||
| 580 | list_del(&match->list); | ||
| 581 | pci_release_regions(dev); | ||
| 582 | out_disable: | ||
| 583 | pci_disable_device(dev); | ||
| 584 | out_put: | ||
| 585 | pci_dev_put(dev); | ||
| 586 | out_free: | ||
| 587 | kfree(match); | ||
| 588 | mutex_unlock(&kvm->lock); | ||
| 589 | up_read(&kvm->slots_lock); | ||
| 590 | return r; | ||
| 591 | } | ||
| 592 | |||
| 593 | static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, | ||
| 594 | struct kvm_assigned_pci_dev *assigned_dev) | ||
| 595 | { | ||
| 596 | int r = 0; | ||
| 597 | struct kvm_assigned_dev_kernel *match; | ||
| 598 | |||
| 599 | mutex_lock(&kvm->lock); | ||
| 600 | |||
| 601 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 602 | assigned_dev->assigned_dev_id); | ||
| 603 | if (!match) { | ||
| 604 | printk(KERN_INFO "%s: device hasn't been assigned before, " | ||
| 605 | "so cannot be deassigned\n", __func__); | ||
| 606 | r = -EINVAL; | ||
| 607 | goto out; | ||
| 608 | } | ||
| 609 | |||
| 610 | if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) | ||
| 611 | kvm_deassign_device(kvm, match); | ||
| 612 | |||
| 613 | kvm_free_assigned_device(kvm, match); | ||
| 614 | |||
| 615 | out: | ||
| 616 | mutex_unlock(&kvm->lock); | ||
| 617 | return r; | ||
| 618 | } | ||
| 619 | |||
| 620 | |||
| 621 | #ifdef __KVM_HAVE_MSIX | ||
| 622 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | ||
| 623 | struct kvm_assigned_msix_nr *entry_nr) | ||
| 624 | { | ||
| 625 | int r = 0; | ||
| 626 | struct kvm_assigned_dev_kernel *adev; | ||
| 627 | |||
| 628 | mutex_lock(&kvm->lock); | ||
| 629 | |||
| 630 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 631 | entry_nr->assigned_dev_id); | ||
| 632 | if (!adev) { | ||
| 633 | r = -EINVAL; | ||
| 634 | goto msix_nr_out; | ||
| 635 | } | ||
| 636 | |||
| 637 | if (adev->entries_nr == 0) { | ||
| 638 | adev->entries_nr = entry_nr->entry_nr; | ||
| 639 | if (adev->entries_nr == 0 || | ||
| 640 | adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { | ||
| 641 | r = -EINVAL; | ||
| 642 | goto msix_nr_out; | ||
| 643 | } | ||
| 644 | |||
| 645 | adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * | ||
| 646 | entry_nr->entry_nr, | ||
| 647 | GFP_KERNEL); | ||
| 648 | if (!adev->host_msix_entries) { | ||
| 649 | r = -ENOMEM; | ||
| 650 | goto msix_nr_out; | ||
| 651 | } | ||
| 652 | adev->guest_msix_entries = kzalloc( | ||
| 653 | sizeof(struct kvm_guest_msix_entry) * | ||
| 654 | entry_nr->entry_nr, GFP_KERNEL); | ||
| 655 | if (!adev->guest_msix_entries) { | ||
| 656 | kfree(adev->host_msix_entries); | ||
| 657 | r = -ENOMEM; | ||
| 658 | goto msix_nr_out; | ||
| 659 | } | ||
| 660 | } else /* Not allowed set MSI-X number twice */ | ||
| 661 | r = -EINVAL; | ||
| 662 | msix_nr_out: | ||
| 663 | mutex_unlock(&kvm->lock); | ||
| 664 | return r; | ||
| 665 | } | ||
| 666 | |||
| 667 | static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, | ||
| 668 | struct kvm_assigned_msix_entry *entry) | ||
| 669 | { | ||
| 670 | int r = 0, i; | ||
| 671 | struct kvm_assigned_dev_kernel *adev; | ||
| 672 | |||
| 673 | mutex_lock(&kvm->lock); | ||
| 674 | |||
| 675 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 676 | entry->assigned_dev_id); | ||
| 677 | |||
| 678 | if (!adev) { | ||
| 679 | r = -EINVAL; | ||
| 680 | goto msix_entry_out; | ||
| 681 | } | ||
| 682 | |||
| 683 | for (i = 0; i < adev->entries_nr; i++) | ||
| 684 | if (adev->guest_msix_entries[i].vector == 0 || | ||
| 685 | adev->guest_msix_entries[i].entry == entry->entry) { | ||
| 686 | adev->guest_msix_entries[i].entry = entry->entry; | ||
| 687 | adev->guest_msix_entries[i].vector = entry->gsi; | ||
| 688 | adev->host_msix_entries[i].entry = entry->entry; | ||
| 689 | break; | ||
| 690 | } | ||
| 691 | if (i == adev->entries_nr) { | ||
| 692 | r = -ENOSPC; | ||
| 693 | goto msix_entry_out; | ||
| 694 | } | ||
| 695 | |||
| 696 | msix_entry_out: | ||
| 697 | mutex_unlock(&kvm->lock); | ||
| 698 | |||
| 699 | return r; | ||
| 700 | } | ||
| 701 | #endif | ||
| 702 | |||
| 703 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | ||
| 704 | unsigned long arg) | ||
| 705 | { | ||
| 706 | void __user *argp = (void __user *)arg; | ||
| 707 | int r = -ENOTTY; | ||
| 708 | |||
| 709 | switch (ioctl) { | ||
| 710 | case KVM_ASSIGN_PCI_DEVICE: { | ||
| 711 | struct kvm_assigned_pci_dev assigned_dev; | ||
| 712 | |||
| 713 | r = -EFAULT; | ||
| 714 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
| 715 | goto out; | ||
| 716 | r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); | ||
| 717 | if (r) | ||
| 718 | goto out; | ||
| 719 | break; | ||
| 720 | } | ||
| 721 | case KVM_ASSIGN_IRQ: { | ||
| 722 | r = -EOPNOTSUPP; | ||
| 723 | break; | ||
| 724 | } | ||
| 725 | #ifdef KVM_CAP_ASSIGN_DEV_IRQ | ||
| 726 | case KVM_ASSIGN_DEV_IRQ: { | ||
| 727 | struct kvm_assigned_irq assigned_irq; | ||
| 728 | |||
| 729 | r = -EFAULT; | ||
| 730 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
| 731 | goto out; | ||
| 732 | r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); | ||
| 733 | if (r) | ||
| 734 | goto out; | ||
| 735 | break; | ||
| 736 | } | ||
| 737 | case KVM_DEASSIGN_DEV_IRQ: { | ||
| 738 | struct kvm_assigned_irq assigned_irq; | ||
| 739 | |||
| 740 | r = -EFAULT; | ||
| 741 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
| 742 | goto out; | ||
| 743 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); | ||
| 744 | if (r) | ||
| 745 | goto out; | ||
| 746 | break; | ||
| 747 | } | ||
| 748 | #endif | ||
| 749 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
| 750 | case KVM_DEASSIGN_PCI_DEVICE: { | ||
| 751 | struct kvm_assigned_pci_dev assigned_dev; | ||
| 752 | |||
| 753 | r = -EFAULT; | ||
| 754 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
| 755 | goto out; | ||
| 756 | r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); | ||
| 757 | if (r) | ||
| 758 | goto out; | ||
| 759 | break; | ||
| 760 | } | ||
| 761 | #endif | ||
| 762 | #ifdef KVM_CAP_IRQ_ROUTING | ||
| 763 | case KVM_SET_GSI_ROUTING: { | ||
| 764 | struct kvm_irq_routing routing; | ||
| 765 | struct kvm_irq_routing __user *urouting; | ||
| 766 | struct kvm_irq_routing_entry *entries; | ||
| 767 | |||
| 768 | r = -EFAULT; | ||
| 769 | if (copy_from_user(&routing, argp, sizeof(routing))) | ||
| 770 | goto out; | ||
| 771 | r = -EINVAL; | ||
| 772 | if (routing.nr >= KVM_MAX_IRQ_ROUTES) | ||
| 773 | goto out; | ||
| 774 | if (routing.flags) | ||
| 775 | goto out; | ||
| 776 | r = -ENOMEM; | ||
| 777 | entries = vmalloc(routing.nr * sizeof(*entries)); | ||
| 778 | if (!entries) | ||
| 779 | goto out; | ||
| 780 | r = -EFAULT; | ||
| 781 | urouting = argp; | ||
| 782 | if (copy_from_user(entries, urouting->entries, | ||
| 783 | routing.nr * sizeof(*entries))) | ||
| 784 | goto out_free_irq_routing; | ||
| 785 | r = kvm_set_irq_routing(kvm, entries, routing.nr, | ||
| 786 | routing.flags); | ||
| 787 | out_free_irq_routing: | ||
| 788 | vfree(entries); | ||
| 789 | break; | ||
| 790 | } | ||
| 791 | #endif /* KVM_CAP_IRQ_ROUTING */ | ||
| 792 | #ifdef __KVM_HAVE_MSIX | ||
| 793 | case KVM_ASSIGN_SET_MSIX_NR: { | ||
| 794 | struct kvm_assigned_msix_nr entry_nr; | ||
| 795 | r = -EFAULT; | ||
| 796 | if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) | ||
| 797 | goto out; | ||
| 798 | r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); | ||
| 799 | if (r) | ||
| 800 | goto out; | ||
| 801 | break; | ||
| 802 | } | ||
| 803 | case KVM_ASSIGN_SET_MSIX_ENTRY: { | ||
| 804 | struct kvm_assigned_msix_entry entry; | ||
| 805 | r = -EFAULT; | ||
| 806 | if (copy_from_user(&entry, argp, sizeof entry)) | ||
| 807 | goto out; | ||
| 808 | r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); | ||
| 809 | if (r) | ||
| 810 | goto out; | ||
| 811 | break; | ||
| 812 | } | ||
| 813 | #endif | ||
| 814 | } | ||
| 815 | out: | ||
| 816 | return r; | ||
| 817 | } | ||
| 818 | |||
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index bb4ebd89b9ff..30f70fd511c4 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
| @@ -61,10 +61,8 @@ irqfd_inject(struct work_struct *work) | |||
| 61 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); | 61 | struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); |
| 62 | struct kvm *kvm = irqfd->kvm; | 62 | struct kvm *kvm = irqfd->kvm; |
| 63 | 63 | ||
| 64 | mutex_lock(&kvm->irq_lock); | ||
| 65 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); | 64 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); |
| 66 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); | 65 | kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); |
| 67 | mutex_unlock(&kvm->irq_lock); | ||
| 68 | } | 66 | } |
| 69 | 67 | ||
| 70 | /* | 68 | /* |
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 9fe140bb38ec..38a2d20b89de 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c | |||
| @@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | |||
| 182 | union kvm_ioapic_redirect_entry entry; | 182 | union kvm_ioapic_redirect_entry entry; |
| 183 | int ret = 1; | 183 | int ret = 1; |
| 184 | 184 | ||
| 185 | mutex_lock(&ioapic->lock); | ||
| 185 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { | 186 | if (irq >= 0 && irq < IOAPIC_NUM_PINS) { |
| 186 | entry = ioapic->redirtbl[irq]; | 187 | entry = ioapic->redirtbl[irq]; |
| 187 | level ^= entry.fields.polarity; | 188 | level ^= entry.fields.polarity; |
| @@ -198,34 +199,51 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) | |||
| 198 | } | 199 | } |
| 199 | trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); | 200 | trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); |
| 200 | } | 201 | } |
| 202 | mutex_unlock(&ioapic->lock); | ||
| 203 | |||
| 201 | return ret; | 204 | return ret; |
| 202 | } | 205 | } |
| 203 | 206 | ||
| 204 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, | 207 | static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, |
| 205 | int trigger_mode) | 208 | int trigger_mode) |
| 206 | { | 209 | { |
| 207 | union kvm_ioapic_redirect_entry *ent; | 210 | int i; |
| 211 | |||
| 212 | for (i = 0; i < IOAPIC_NUM_PINS; i++) { | ||
| 213 | union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; | ||
| 208 | 214 | ||
| 209 | ent = &ioapic->redirtbl[pin]; | 215 | if (ent->fields.vector != vector) |
| 216 | continue; | ||
| 210 | 217 | ||
| 211 | kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin); | 218 | /* |
| 219 | * We are dropping lock while calling ack notifiers because ack | ||
| 220 | * notifier callbacks for assigned devices call into IOAPIC | ||
| 221 | * recursively. Since remote_irr is cleared only after call | ||
| 222 | * to notifiers if the same vector will be delivered while lock | ||
| 223 | * is dropped it will be put into irr and will be delivered | ||
| 224 | * after ack notifier returns. | ||
| 225 | */ | ||
| 226 | mutex_unlock(&ioapic->lock); | ||
| 227 | kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); | ||
| 228 | mutex_lock(&ioapic->lock); | ||
| 229 | |||
| 230 | if (trigger_mode != IOAPIC_LEVEL_TRIG) | ||
| 231 | continue; | ||
| 212 | 232 | ||
| 213 | if (trigger_mode == IOAPIC_LEVEL_TRIG) { | ||
| 214 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); | 233 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); |
| 215 | ent->fields.remote_irr = 0; | 234 | ent->fields.remote_irr = 0; |
| 216 | if (!ent->fields.mask && (ioapic->irr & (1 << pin))) | 235 | if (!ent->fields.mask && (ioapic->irr & (1 << i))) |
| 217 | ioapic_service(ioapic, pin); | 236 | ioapic_service(ioapic, i); |
| 218 | } | 237 | } |
| 219 | } | 238 | } |
| 220 | 239 | ||
| 221 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) | 240 | void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) |
| 222 | { | 241 | { |
| 223 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; | 242 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
| 224 | int i; | ||
| 225 | 243 | ||
| 226 | for (i = 0; i < IOAPIC_NUM_PINS; i++) | 244 | mutex_lock(&ioapic->lock); |
| 227 | if (ioapic->redirtbl[i].fields.vector == vector) | 245 | __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); |
| 228 | __kvm_ioapic_update_eoi(ioapic, i, trigger_mode); | 246 | mutex_unlock(&ioapic->lock); |
| 229 | } | 247 | } |
| 230 | 248 | ||
| 231 | static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) | 249 | static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) |
| @@ -250,8 +268,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | |||
| 250 | ioapic_debug("addr %lx\n", (unsigned long)addr); | 268 | ioapic_debug("addr %lx\n", (unsigned long)addr); |
| 251 | ASSERT(!(addr & 0xf)); /* check alignment */ | 269 | ASSERT(!(addr & 0xf)); /* check alignment */ |
| 252 | 270 | ||
| 253 | mutex_lock(&ioapic->kvm->irq_lock); | ||
| 254 | addr &= 0xff; | 271 | addr &= 0xff; |
| 272 | mutex_lock(&ioapic->lock); | ||
| 255 | switch (addr) { | 273 | switch (addr) { |
| 256 | case IOAPIC_REG_SELECT: | 274 | case IOAPIC_REG_SELECT: |
| 257 | result = ioapic->ioregsel; | 275 | result = ioapic->ioregsel; |
| @@ -265,6 +283,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | |||
| 265 | result = 0; | 283 | result = 0; |
| 266 | break; | 284 | break; |
| 267 | } | 285 | } |
| 286 | mutex_unlock(&ioapic->lock); | ||
| 287 | |||
| 268 | switch (len) { | 288 | switch (len) { |
| 269 | case 8: | 289 | case 8: |
| 270 | *(u64 *) val = result; | 290 | *(u64 *) val = result; |
| @@ -277,7 +297,6 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, | |||
| 277 | default: | 297 | default: |
| 278 | printk(KERN_WARNING "ioapic: wrong length %d\n", len); | 298 | printk(KERN_WARNING "ioapic: wrong length %d\n", len); |
| 279 | } | 299 | } |
| 280 | mutex_unlock(&ioapic->kvm->irq_lock); | ||
| 281 | return 0; | 300 | return 0; |
| 282 | } | 301 | } |
| 283 | 302 | ||
| @@ -293,15 +312,15 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | |||
| 293 | (void*)addr, len, val); | 312 | (void*)addr, len, val); |
| 294 | ASSERT(!(addr & 0xf)); /* check alignment */ | 313 | ASSERT(!(addr & 0xf)); /* check alignment */ |
| 295 | 314 | ||
| 296 | mutex_lock(&ioapic->kvm->irq_lock); | ||
| 297 | if (len == 4 || len == 8) | 315 | if (len == 4 || len == 8) |
| 298 | data = *(u32 *) val; | 316 | data = *(u32 *) val; |
| 299 | else { | 317 | else { |
| 300 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); | 318 | printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); |
| 301 | goto unlock; | 319 | return 0; |
| 302 | } | 320 | } |
| 303 | 321 | ||
| 304 | addr &= 0xff; | 322 | addr &= 0xff; |
| 323 | mutex_lock(&ioapic->lock); | ||
| 305 | switch (addr) { | 324 | switch (addr) { |
| 306 | case IOAPIC_REG_SELECT: | 325 | case IOAPIC_REG_SELECT: |
| 307 | ioapic->ioregsel = data; | 326 | ioapic->ioregsel = data; |
| @@ -312,15 +331,14 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, | |||
| 312 | break; | 331 | break; |
| 313 | #ifdef CONFIG_IA64 | 332 | #ifdef CONFIG_IA64 |
| 314 | case IOAPIC_REG_EOI: | 333 | case IOAPIC_REG_EOI: |
| 315 | kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG); | 334 | __kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG); |
| 316 | break; | 335 | break; |
| 317 | #endif | 336 | #endif |
| 318 | 337 | ||
| 319 | default: | 338 | default: |
| 320 | break; | 339 | break; |
| 321 | } | 340 | } |
| 322 | unlock: | 341 | mutex_unlock(&ioapic->lock); |
| 323 | mutex_unlock(&ioapic->kvm->irq_lock); | ||
| 324 | return 0; | 342 | return 0; |
| 325 | } | 343 | } |
| 326 | 344 | ||
| @@ -349,6 +367,7 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
| 349 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); | 367 | ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); |
| 350 | if (!ioapic) | 368 | if (!ioapic) |
| 351 | return -ENOMEM; | 369 | return -ENOMEM; |
| 370 | mutex_init(&ioapic->lock); | ||
| 352 | kvm->arch.vioapic = ioapic; | 371 | kvm->arch.vioapic = ioapic; |
| 353 | kvm_ioapic_reset(ioapic); | 372 | kvm_ioapic_reset(ioapic); |
| 354 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); | 373 | kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); |
| @@ -360,3 +379,26 @@ int kvm_ioapic_init(struct kvm *kvm) | |||
| 360 | return ret; | 379 | return ret; |
| 361 | } | 380 | } |
| 362 | 381 | ||
| 382 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | ||
| 383 | { | ||
| 384 | struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); | ||
| 385 | if (!ioapic) | ||
| 386 | return -EINVAL; | ||
| 387 | |||
| 388 | mutex_lock(&ioapic->lock); | ||
| 389 | memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); | ||
| 390 | mutex_unlock(&ioapic->lock); | ||
| 391 | return 0; | ||
| 392 | } | ||
| 393 | |||
| 394 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) | ||
| 395 | { | ||
| 396 | struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); | ||
| 397 | if (!ioapic) | ||
| 398 | return -EINVAL; | ||
| 399 | |||
| 400 | mutex_lock(&ioapic->lock); | ||
| 401 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); | ||
| 402 | mutex_unlock(&ioapic->lock); | ||
| 403 | return 0; | ||
| 404 | } | ||
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 7080b713c160..419c43b667ab 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h | |||
| @@ -41,9 +41,11 @@ struct kvm_ioapic { | |||
| 41 | u32 irr; | 41 | u32 irr; |
| 42 | u32 pad; | 42 | u32 pad; |
| 43 | union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; | 43 | union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; |
| 44 | unsigned long irq_states[IOAPIC_NUM_PINS]; | ||
| 44 | struct kvm_io_device dev; | 45 | struct kvm_io_device dev; |
| 45 | struct kvm *kvm; | 46 | struct kvm *kvm; |
| 46 | void (*ack_notifier)(void *opaque, int irq); | 47 | void (*ack_notifier)(void *opaque, int irq); |
| 48 | struct mutex lock; | ||
| 47 | }; | 49 | }; |
| 48 | 50 | ||
| 49 | #ifdef DEBUG | 51 | #ifdef DEBUG |
| @@ -73,4 +75,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); | |||
| 73 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); | 75 | void kvm_ioapic_reset(struct kvm_ioapic *ioapic); |
| 74 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | 76 | int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, |
| 75 | struct kvm_lapic_irq *irq); | 77 | struct kvm_lapic_irq *irq); |
| 78 | int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | ||
| 79 | int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); | ||
| 80 | |||
| 76 | #endif | 81 | #endif |
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 001663ff401a..9b077342ab54 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c | |||
| @@ -31,20 +31,39 @@ | |||
| 31 | 31 | ||
| 32 | #include "ioapic.h" | 32 | #include "ioapic.h" |
| 33 | 33 | ||
| 34 | static inline int kvm_irq_line_state(unsigned long *irq_state, | ||
| 35 | int irq_source_id, int level) | ||
| 36 | { | ||
| 37 | /* Logical OR for level trig interrupt */ | ||
| 38 | if (level) | ||
| 39 | set_bit(irq_source_id, irq_state); | ||
| 40 | else | ||
| 41 | clear_bit(irq_source_id, irq_state); | ||
| 42 | |||
| 43 | return !!(*irq_state); | ||
| 44 | } | ||
| 45 | |||
| 34 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, | 46 | static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, |
| 35 | struct kvm *kvm, int level) | 47 | struct kvm *kvm, int irq_source_id, int level) |
| 36 | { | 48 | { |
| 37 | #ifdef CONFIG_X86 | 49 | #ifdef CONFIG_X86 |
| 38 | return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level); | 50 | struct kvm_pic *pic = pic_irqchip(kvm); |
| 51 | level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin], | ||
| 52 | irq_source_id, level); | ||
| 53 | return kvm_pic_set_irq(pic, e->irqchip.pin, level); | ||
| 39 | #else | 54 | #else |
| 40 | return -1; | 55 | return -1; |
| 41 | #endif | 56 | #endif |
| 42 | } | 57 | } |
| 43 | 58 | ||
| 44 | static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, | 59 | static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, |
| 45 | struct kvm *kvm, int level) | 60 | struct kvm *kvm, int irq_source_id, int level) |
| 46 | { | 61 | { |
| 47 | return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); | 62 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
| 63 | level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin], | ||
| 64 | irq_source_id, level); | ||
| 65 | |||
| 66 | return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level); | ||
| 48 | } | 67 | } |
| 49 | 68 | ||
| 50 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) | 69 | inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) |
| @@ -63,8 +82,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
| 63 | int i, r = -1; | 82 | int i, r = -1; |
| 64 | struct kvm_vcpu *vcpu, *lowest = NULL; | 83 | struct kvm_vcpu *vcpu, *lowest = NULL; |
| 65 | 84 | ||
| 66 | WARN_ON(!mutex_is_locked(&kvm->irq_lock)); | ||
| 67 | |||
| 68 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && | 85 | if (irq->dest_mode == 0 && irq->dest_id == 0xff && |
| 69 | kvm_is_dm_lowest_prio(irq)) | 86 | kvm_is_dm_lowest_prio(irq)) |
| 70 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); | 87 | printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); |
| @@ -96,10 +113,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, | |||
| 96 | } | 113 | } |
| 97 | 114 | ||
| 98 | static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | 115 | static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, |
| 99 | struct kvm *kvm, int level) | 116 | struct kvm *kvm, int irq_source_id, int level) |
| 100 | { | 117 | { |
| 101 | struct kvm_lapic_irq irq; | 118 | struct kvm_lapic_irq irq; |
| 102 | 119 | ||
| 120 | if (!level) | ||
| 121 | return -1; | ||
| 122 | |||
| 103 | trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); | 123 | trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); |
| 104 | 124 | ||
| 105 | irq.dest_id = (e->msi.address_lo & | 125 | irq.dest_id = (e->msi.address_lo & |
| @@ -116,78 +136,67 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, | |||
| 116 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq); | 136 | return kvm_irq_delivery_to_apic(kvm, NULL, &irq); |
| 117 | } | 137 | } |
| 118 | 138 | ||
| 119 | /* This should be called with the kvm->irq_lock mutex held | 139 | /* |
| 120 | * Return value: | 140 | * Return value: |
| 121 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) | 141 | * < 0 Interrupt was ignored (masked or not delivered for other reasons) |
| 122 | * = 0 Interrupt was coalesced (previous irq is still pending) | 142 | * = 0 Interrupt was coalesced (previous irq is still pending) |
| 123 | * > 0 Number of CPUs interrupt was delivered to | 143 | * > 0 Number of CPUs interrupt was delivered to |
| 124 | */ | 144 | */ |
| 125 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) | 145 | int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) |
| 126 | { | 146 | { |
| 127 | struct kvm_kernel_irq_routing_entry *e; | 147 | struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; |
| 128 | unsigned long *irq_state, sig_level; | 148 | int ret = -1, i = 0; |
| 129 | int ret = -1; | 149 | struct kvm_irq_routing_table *irq_rt; |
| 150 | struct hlist_node *n; | ||
| 130 | 151 | ||
| 131 | trace_kvm_set_irq(irq, level, irq_source_id); | 152 | trace_kvm_set_irq(irq, level, irq_source_id); |
| 132 | 153 | ||
| 133 | WARN_ON(!mutex_is_locked(&kvm->irq_lock)); | ||
| 134 | |||
| 135 | if (irq < KVM_IOAPIC_NUM_PINS) { | ||
| 136 | irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; | ||
| 137 | |||
| 138 | /* Logical OR for level trig interrupt */ | ||
| 139 | if (level) | ||
| 140 | set_bit(irq_source_id, irq_state); | ||
| 141 | else | ||
| 142 | clear_bit(irq_source_id, irq_state); | ||
| 143 | sig_level = !!(*irq_state); | ||
| 144 | } else if (!level) | ||
| 145 | return ret; | ||
| 146 | else /* Deal with MSI/MSI-X */ | ||
| 147 | sig_level = 1; | ||
| 148 | |||
| 149 | /* Not possible to detect if the guest uses the PIC or the | 154 | /* Not possible to detect if the guest uses the PIC or the |
| 150 | * IOAPIC. So set the bit in both. The guest will ignore | 155 | * IOAPIC. So set the bit in both. The guest will ignore |
| 151 | * writes to the unused one. | 156 | * writes to the unused one. |
| 152 | */ | 157 | */ |
| 153 | list_for_each_entry(e, &kvm->irq_routing, link) | 158 | rcu_read_lock(); |
| 154 | if (e->gsi == irq) { | 159 | irq_rt = rcu_dereference(kvm->irq_routing); |
| 155 | int r = e->set(e, kvm, sig_level); | 160 | if (irq < irq_rt->nr_rt_entries) |
| 156 | if (r < 0) | 161 | hlist_for_each_entry(e, n, &irq_rt->map[irq], link) |
| 157 | continue; | 162 | irq_set[i++] = *e; |
| 163 | rcu_read_unlock(); | ||
| 164 | |||
| 165 | while(i--) { | ||
| 166 | int r; | ||
| 167 | r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level); | ||
| 168 | if (r < 0) | ||
| 169 | continue; | ||
| 170 | |||
| 171 | ret = r + ((ret < 0) ? 0 : ret); | ||
| 172 | } | ||
| 158 | 173 | ||
| 159 | ret = r + ((ret < 0) ? 0 : ret); | ||
| 160 | } | ||
| 161 | return ret; | 174 | return ret; |
| 162 | } | 175 | } |
| 163 | 176 | ||
| 164 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) | 177 | void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) |
| 165 | { | 178 | { |
| 166 | struct kvm_kernel_irq_routing_entry *e; | ||
| 167 | struct kvm_irq_ack_notifier *kian; | 179 | struct kvm_irq_ack_notifier *kian; |
| 168 | struct hlist_node *n; | 180 | struct hlist_node *n; |
| 169 | unsigned gsi = pin; | 181 | int gsi; |
| 170 | 182 | ||
| 171 | trace_kvm_ack_irq(irqchip, pin); | 183 | trace_kvm_ack_irq(irqchip, pin); |
| 172 | 184 | ||
| 173 | list_for_each_entry(e, &kvm->irq_routing, link) | 185 | rcu_read_lock(); |
| 174 | if (e->type == KVM_IRQ_ROUTING_IRQCHIP && | 186 | gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; |
| 175 | e->irqchip.irqchip == irqchip && | 187 | if (gsi != -1) |
| 176 | e->irqchip.pin == pin) { | 188 | hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, |
| 177 | gsi = e->gsi; | 189 | link) |
| 178 | break; | 190 | if (kian->gsi == gsi) |
| 179 | } | 191 | kian->irq_acked(kian); |
| 180 | 192 | rcu_read_unlock(); | |
| 181 | hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) | ||
| 182 | if (kian->gsi == gsi) | ||
| 183 | kian->irq_acked(kian); | ||
| 184 | } | 193 | } |
| 185 | 194 | ||
| 186 | void kvm_register_irq_ack_notifier(struct kvm *kvm, | 195 | void kvm_register_irq_ack_notifier(struct kvm *kvm, |
| 187 | struct kvm_irq_ack_notifier *kian) | 196 | struct kvm_irq_ack_notifier *kian) |
| 188 | { | 197 | { |
| 189 | mutex_lock(&kvm->irq_lock); | 198 | mutex_lock(&kvm->irq_lock); |
| 190 | hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); | 199 | hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); |
| 191 | mutex_unlock(&kvm->irq_lock); | 200 | mutex_unlock(&kvm->irq_lock); |
| 192 | } | 201 | } |
| 193 | 202 | ||
| @@ -195,8 +204,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, | |||
| 195 | struct kvm_irq_ack_notifier *kian) | 204 | struct kvm_irq_ack_notifier *kian) |
| 196 | { | 205 | { |
| 197 | mutex_lock(&kvm->irq_lock); | 206 | mutex_lock(&kvm->irq_lock); |
| 198 | hlist_del_init(&kian->link); | 207 | hlist_del_init_rcu(&kian->link); |
| 199 | mutex_unlock(&kvm->irq_lock); | 208 | mutex_unlock(&kvm->irq_lock); |
| 209 | synchronize_rcu(); | ||
| 200 | } | 210 | } |
| 201 | 211 | ||
| 202 | int kvm_request_irq_source_id(struct kvm *kvm) | 212 | int kvm_request_irq_source_id(struct kvm *kvm) |
| @@ -205,16 +215,17 @@ int kvm_request_irq_source_id(struct kvm *kvm) | |||
| 205 | int irq_source_id; | 215 | int irq_source_id; |
| 206 | 216 | ||
| 207 | mutex_lock(&kvm->irq_lock); | 217 | mutex_lock(&kvm->irq_lock); |
| 208 | irq_source_id = find_first_zero_bit(bitmap, | 218 | irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG); |
| 209 | sizeof(kvm->arch.irq_sources_bitmap)); | ||
| 210 | 219 | ||
| 211 | if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { | 220 | if (irq_source_id >= BITS_PER_LONG) { |
| 212 | printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); | 221 | printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); |
| 213 | return -EFAULT; | 222 | irq_source_id = -EFAULT; |
| 223 | goto unlock; | ||
| 214 | } | 224 | } |
| 215 | 225 | ||
| 216 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); | 226 | ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); |
| 217 | set_bit(irq_source_id, bitmap); | 227 | set_bit(irq_source_id, bitmap); |
| 228 | unlock: | ||
| 218 | mutex_unlock(&kvm->irq_lock); | 229 | mutex_unlock(&kvm->irq_lock); |
| 219 | 230 | ||
| 220 | return irq_source_id; | 231 | return irq_source_id; |
| @@ -228,13 +239,23 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) | |||
| 228 | 239 | ||
| 229 | mutex_lock(&kvm->irq_lock); | 240 | mutex_lock(&kvm->irq_lock); |
| 230 | if (irq_source_id < 0 || | 241 | if (irq_source_id < 0 || |
| 231 | irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) { | 242 | irq_source_id >= BITS_PER_LONG) { |
| 232 | printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); | 243 | printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); |
| 233 | return; | 244 | goto unlock; |
| 234 | } | 245 | } |
| 235 | for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) | ||
| 236 | clear_bit(irq_source_id, &kvm->arch.irq_states[i]); | ||
| 237 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); | 246 | clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); |
| 247 | if (!irqchip_in_kernel(kvm)) | ||
| 248 | goto unlock; | ||
| 249 | |||
| 250 | for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) { | ||
| 251 | clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]); | ||
| 252 | if (i >= 16) | ||
| 253 | continue; | ||
| 254 | #ifdef CONFIG_X86 | ||
| 255 | clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]); | ||
| 256 | #endif | ||
| 257 | } | ||
| 258 | unlock: | ||
| 238 | mutex_unlock(&kvm->irq_lock); | 259 | mutex_unlock(&kvm->irq_lock); |
| 239 | } | 260 | } |
| 240 | 261 | ||
| @@ -243,7 +264,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, | |||
| 243 | { | 264 | { |
| 244 | mutex_lock(&kvm->irq_lock); | 265 | mutex_lock(&kvm->irq_lock); |
| 245 | kimn->irq = irq; | 266 | kimn->irq = irq; |
| 246 | hlist_add_head(&kimn->link, &kvm->mask_notifier_list); | 267 | hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list); |
| 247 | mutex_unlock(&kvm->irq_lock); | 268 | mutex_unlock(&kvm->irq_lock); |
| 248 | } | 269 | } |
| 249 | 270 | ||
| @@ -251,8 +272,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, | |||
| 251 | struct kvm_irq_mask_notifier *kimn) | 272 | struct kvm_irq_mask_notifier *kimn) |
| 252 | { | 273 | { |
| 253 | mutex_lock(&kvm->irq_lock); | 274 | mutex_lock(&kvm->irq_lock); |
| 254 | hlist_del(&kimn->link); | 275 | hlist_del_rcu(&kimn->link); |
| 255 | mutex_unlock(&kvm->irq_lock); | 276 | mutex_unlock(&kvm->irq_lock); |
| 277 | synchronize_rcu(); | ||
| 256 | } | 278 | } |
| 257 | 279 | ||
| 258 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) | 280 | void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) |
| @@ -260,33 +282,37 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) | |||
| 260 | struct kvm_irq_mask_notifier *kimn; | 282 | struct kvm_irq_mask_notifier *kimn; |
| 261 | struct hlist_node *n; | 283 | struct hlist_node *n; |
| 262 | 284 | ||
| 263 | WARN_ON(!mutex_is_locked(&kvm->irq_lock)); | 285 | rcu_read_lock(); |
| 264 | 286 | hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) | |
| 265 | hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link) | ||
| 266 | if (kimn->irq == irq) | 287 | if (kimn->irq == irq) |
| 267 | kimn->func(kimn, mask); | 288 | kimn->func(kimn, mask); |
| 268 | } | 289 | rcu_read_unlock(); |
| 269 | |||
| 270 | static void __kvm_free_irq_routing(struct list_head *irq_routing) | ||
| 271 | { | ||
| 272 | struct kvm_kernel_irq_routing_entry *e, *n; | ||
| 273 | |||
| 274 | list_for_each_entry_safe(e, n, irq_routing, link) | ||
| 275 | kfree(e); | ||
| 276 | } | 290 | } |
| 277 | 291 | ||
| 278 | void kvm_free_irq_routing(struct kvm *kvm) | 292 | void kvm_free_irq_routing(struct kvm *kvm) |
| 279 | { | 293 | { |
| 280 | mutex_lock(&kvm->irq_lock); | 294 | /* Called only during vm destruction. Nobody can use the pointer |
| 281 | __kvm_free_irq_routing(&kvm->irq_routing); | 295 | at this stage */ |
| 282 | mutex_unlock(&kvm->irq_lock); | 296 | kfree(kvm->irq_routing); |
| 283 | } | 297 | } |
| 284 | 298 | ||
| 285 | static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | 299 | static int setup_routing_entry(struct kvm_irq_routing_table *rt, |
| 300 | struct kvm_kernel_irq_routing_entry *e, | ||
| 286 | const struct kvm_irq_routing_entry *ue) | 301 | const struct kvm_irq_routing_entry *ue) |
| 287 | { | 302 | { |
| 288 | int r = -EINVAL; | 303 | int r = -EINVAL; |
| 289 | int delta; | 304 | int delta; |
| 305 | struct kvm_kernel_irq_routing_entry *ei; | ||
| 306 | struct hlist_node *n; | ||
| 307 | |||
| 308 | /* | ||
| 309 | * Do not allow GSI to be mapped to the same irqchip more than once. | ||
| 310 | * Allow only one to one mapping between GSI and MSI. | ||
| 311 | */ | ||
| 312 | hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) | ||
| 313 | if (ei->type == KVM_IRQ_ROUTING_MSI || | ||
| 314 | ue->u.irqchip.irqchip == ei->irqchip.irqchip) | ||
| 315 | return r; | ||
| 290 | 316 | ||
| 291 | e->gsi = ue->gsi; | 317 | e->gsi = ue->gsi; |
| 292 | e->type = ue->type; | 318 | e->type = ue->type; |
| @@ -309,6 +335,9 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | |||
| 309 | } | 335 | } |
| 310 | e->irqchip.irqchip = ue->u.irqchip.irqchip; | 336 | e->irqchip.irqchip = ue->u.irqchip.irqchip; |
| 311 | e->irqchip.pin = ue->u.irqchip.pin + delta; | 337 | e->irqchip.pin = ue->u.irqchip.pin + delta; |
| 338 | if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS) | ||
| 339 | goto out; | ||
| 340 | rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; | ||
| 312 | break; | 341 | break; |
| 313 | case KVM_IRQ_ROUTING_MSI: | 342 | case KVM_IRQ_ROUTING_MSI: |
| 314 | e->set = kvm_set_msi; | 343 | e->set = kvm_set_msi; |
| @@ -319,6 +348,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, | |||
| 319 | default: | 348 | default: |
| 320 | goto out; | 349 | goto out; |
| 321 | } | 350 | } |
| 351 | |||
| 352 | hlist_add_head(&e->link, &rt->map[e->gsi]); | ||
| 322 | r = 0; | 353 | r = 0; |
| 323 | out: | 354 | out: |
| 324 | return r; | 355 | return r; |
| @@ -330,43 +361,53 @@ int kvm_set_irq_routing(struct kvm *kvm, | |||
| 330 | unsigned nr, | 361 | unsigned nr, |
| 331 | unsigned flags) | 362 | unsigned flags) |
| 332 | { | 363 | { |
| 333 | struct list_head irq_list = LIST_HEAD_INIT(irq_list); | 364 | struct kvm_irq_routing_table *new, *old; |
| 334 | struct list_head tmp = LIST_HEAD_INIT(tmp); | 365 | u32 i, j, nr_rt_entries = 0; |
| 335 | struct kvm_kernel_irq_routing_entry *e = NULL; | ||
| 336 | unsigned i; | ||
| 337 | int r; | 366 | int r; |
| 338 | 367 | ||
| 339 | for (i = 0; i < nr; ++i) { | 368 | for (i = 0; i < nr; ++i) { |
| 369 | if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) | ||
| 370 | return -EINVAL; | ||
| 371 | nr_rt_entries = max(nr_rt_entries, ue[i].gsi); | ||
| 372 | } | ||
| 373 | |||
| 374 | nr_rt_entries += 1; | ||
| 375 | |||
| 376 | new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) | ||
| 377 | + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), | ||
| 378 | GFP_KERNEL); | ||
| 379 | |||
| 380 | if (!new) | ||
| 381 | return -ENOMEM; | ||
| 382 | |||
| 383 | new->rt_entries = (void *)&new->map[nr_rt_entries]; | ||
| 384 | |||
| 385 | new->nr_rt_entries = nr_rt_entries; | ||
| 386 | for (i = 0; i < 3; i++) | ||
| 387 | for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++) | ||
| 388 | new->chip[i][j] = -1; | ||
| 389 | |||
| 390 | for (i = 0; i < nr; ++i) { | ||
| 340 | r = -EINVAL; | 391 | r = -EINVAL; |
| 341 | if (ue->gsi >= KVM_MAX_IRQ_ROUTES) | ||
| 342 | goto out; | ||
| 343 | if (ue->flags) | 392 | if (ue->flags) |
| 344 | goto out; | 393 | goto out; |
| 345 | r = -ENOMEM; | 394 | r = setup_routing_entry(new, &new->rt_entries[i], ue); |
| 346 | e = kzalloc(sizeof(*e), GFP_KERNEL); | ||
| 347 | if (!e) | ||
| 348 | goto out; | ||
| 349 | r = setup_routing_entry(e, ue); | ||
| 350 | if (r) | 395 | if (r) |
| 351 | goto out; | 396 | goto out; |
| 352 | ++ue; | 397 | ++ue; |
| 353 | list_add(&e->link, &irq_list); | ||
| 354 | e = NULL; | ||
| 355 | } | 398 | } |
| 356 | 399 | ||
| 357 | mutex_lock(&kvm->irq_lock); | 400 | mutex_lock(&kvm->irq_lock); |
| 358 | list_splice(&kvm->irq_routing, &tmp); | 401 | old = kvm->irq_routing; |
| 359 | INIT_LIST_HEAD(&kvm->irq_routing); | 402 | rcu_assign_pointer(kvm->irq_routing, new); |
| 360 | list_splice(&irq_list, &kvm->irq_routing); | ||
| 361 | INIT_LIST_HEAD(&irq_list); | ||
| 362 | list_splice(&tmp, &irq_list); | ||
| 363 | mutex_unlock(&kvm->irq_lock); | 403 | mutex_unlock(&kvm->irq_lock); |
| 404 | synchronize_rcu(); | ||
| 364 | 405 | ||
| 406 | new = old; | ||
| 365 | r = 0; | 407 | r = 0; |
| 366 | 408 | ||
| 367 | out: | 409 | out: |
| 368 | kfree(e); | 410 | kfree(new); |
| 369 | __kvm_free_irq_routing(&irq_list); | ||
| 370 | return r; | 411 | return r; |
| 371 | } | 412 | } |
| 372 | 413 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7495ce347344..f92ba138007a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
| @@ -43,6 +43,7 @@ | |||
| 43 | #include <linux/swap.h> | 43 | #include <linux/swap.h> |
| 44 | #include <linux/bitops.h> | 44 | #include <linux/bitops.h> |
| 45 | #include <linux/spinlock.h> | 45 | #include <linux/spinlock.h> |
| 46 | #include <linux/compat.h> | ||
| 46 | 47 | ||
| 47 | #include <asm/processor.h> | 48 | #include <asm/processor.h> |
| 48 | #include <asm/io.h> | 49 | #include <asm/io.h> |
| @@ -53,12 +54,6 @@ | |||
| 53 | #include "coalesced_mmio.h" | 54 | #include "coalesced_mmio.h" |
| 54 | #endif | 55 | #endif |
| 55 | 56 | ||
| 56 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
| 57 | #include <linux/pci.h> | ||
| 58 | #include <linux/interrupt.h> | ||
| 59 | #include "irq.h" | ||
| 60 | #endif | ||
| 61 | |||
| 62 | #define CREATE_TRACE_POINTS | 57 | #define CREATE_TRACE_POINTS |
| 63 | #include <trace/events/kvm.h> | 58 | #include <trace/events/kvm.h> |
| 64 | 59 | ||
| @@ -75,6 +70,8 @@ DEFINE_SPINLOCK(kvm_lock); | |||
| 75 | LIST_HEAD(vm_list); | 70 | LIST_HEAD(vm_list); |
| 76 | 71 | ||
| 77 | static cpumask_var_t cpus_hardware_enabled; | 72 | static cpumask_var_t cpus_hardware_enabled; |
| 73 | static int kvm_usage_count = 0; | ||
| 74 | static atomic_t hardware_enable_failed; | ||
| 78 | 75 | ||
| 79 | struct kmem_cache *kvm_vcpu_cache; | 76 | struct kmem_cache *kvm_vcpu_cache; |
| 80 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); | 77 | EXPORT_SYMBOL_GPL(kvm_vcpu_cache); |
| @@ -85,615 +82,13 @@ struct dentry *kvm_debugfs_dir; | |||
| 85 | 82 | ||
| 86 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 83 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
| 87 | unsigned long arg); | 84 | unsigned long arg); |
| 85 | static int hardware_enable_all(void); | ||
| 86 | static void hardware_disable_all(void); | ||
| 88 | 87 | ||
| 89 | static bool kvm_rebooting; | 88 | static bool kvm_rebooting; |
| 90 | 89 | ||
| 91 | static bool largepages_enabled = true; | 90 | static bool largepages_enabled = true; |
| 92 | 91 | ||
| 93 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
| 94 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | ||
| 95 | int assigned_dev_id) | ||
| 96 | { | ||
| 97 | struct list_head *ptr; | ||
| 98 | struct kvm_assigned_dev_kernel *match; | ||
| 99 | |||
| 100 | list_for_each(ptr, head) { | ||
| 101 | match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); | ||
| 102 | if (match->assigned_dev_id == assigned_dev_id) | ||
| 103 | return match; | ||
| 104 | } | ||
| 105 | return NULL; | ||
| 106 | } | ||
| 107 | |||
| 108 | static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | ||
| 109 | *assigned_dev, int irq) | ||
| 110 | { | ||
| 111 | int i, index; | ||
| 112 | struct msix_entry *host_msix_entries; | ||
| 113 | |||
| 114 | host_msix_entries = assigned_dev->host_msix_entries; | ||
| 115 | |||
| 116 | index = -1; | ||
| 117 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
| 118 | if (irq == host_msix_entries[i].vector) { | ||
| 119 | index = i; | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | if (index < 0) { | ||
| 123 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); | ||
| 124 | return 0; | ||
| 125 | } | ||
| 126 | |||
| 127 | return index; | ||
| 128 | } | ||
| 129 | |||
| 130 | static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) | ||
| 131 | { | ||
| 132 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
| 133 | struct kvm *kvm; | ||
| 134 | int i; | ||
| 135 | |||
| 136 | assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, | ||
| 137 | interrupt_work); | ||
| 138 | kvm = assigned_dev->kvm; | ||
| 139 | |||
| 140 | mutex_lock(&kvm->irq_lock); | ||
| 141 | spin_lock_irq(&assigned_dev->assigned_dev_lock); | ||
| 142 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
| 143 | struct kvm_guest_msix_entry *guest_entries = | ||
| 144 | assigned_dev->guest_msix_entries; | ||
| 145 | for (i = 0; i < assigned_dev->entries_nr; i++) { | ||
| 146 | if (!(guest_entries[i].flags & | ||
| 147 | KVM_ASSIGNED_MSIX_PENDING)) | ||
| 148 | continue; | ||
| 149 | guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; | ||
| 150 | kvm_set_irq(assigned_dev->kvm, | ||
| 151 | assigned_dev->irq_source_id, | ||
| 152 | guest_entries[i].vector, 1); | ||
| 153 | } | ||
| 154 | } else | ||
| 155 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, | ||
| 156 | assigned_dev->guest_irq, 1); | ||
| 157 | |||
| 158 | spin_unlock_irq(&assigned_dev->assigned_dev_lock); | ||
| 159 | mutex_unlock(&assigned_dev->kvm->irq_lock); | ||
| 160 | } | ||
| 161 | |||
| 162 | static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) | ||
| 163 | { | ||
| 164 | unsigned long flags; | ||
| 165 | struct kvm_assigned_dev_kernel *assigned_dev = | ||
| 166 | (struct kvm_assigned_dev_kernel *) dev_id; | ||
| 167 | |||
| 168 | spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); | ||
| 169 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
| 170 | int index = find_index_from_host_irq(assigned_dev, irq); | ||
| 171 | if (index < 0) | ||
| 172 | goto out; | ||
| 173 | assigned_dev->guest_msix_entries[index].flags |= | ||
| 174 | KVM_ASSIGNED_MSIX_PENDING; | ||
| 175 | } | ||
| 176 | |||
| 177 | schedule_work(&assigned_dev->interrupt_work); | ||
| 178 | |||
| 179 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { | ||
| 180 | disable_irq_nosync(irq); | ||
| 181 | assigned_dev->host_irq_disabled = true; | ||
| 182 | } | ||
| 183 | |||
| 184 | out: | ||
| 185 | spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); | ||
| 186 | return IRQ_HANDLED; | ||
| 187 | } | ||
| 188 | |||
| 189 | /* Ack the irq line for an assigned device */ | ||
| 190 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | ||
| 191 | { | ||
| 192 | struct kvm_assigned_dev_kernel *dev; | ||
| 193 | unsigned long flags; | ||
| 194 | |||
| 195 | if (kian->gsi == -1) | ||
| 196 | return; | ||
| 197 | |||
| 198 | dev = container_of(kian, struct kvm_assigned_dev_kernel, | ||
| 199 | ack_notifier); | ||
| 200 | |||
| 201 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | ||
| 202 | |||
| 203 | /* The guest irq may be shared so this ack may be | ||
| 204 | * from another device. | ||
| 205 | */ | ||
| 206 | spin_lock_irqsave(&dev->assigned_dev_lock, flags); | ||
| 207 | if (dev->host_irq_disabled) { | ||
| 208 | enable_irq(dev->host_irq); | ||
| 209 | dev->host_irq_disabled = false; | ||
| 210 | } | ||
| 211 | spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); | ||
| 212 | } | ||
| 213 | |||
| 214 | static void deassign_guest_irq(struct kvm *kvm, | ||
| 215 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
| 216 | { | ||
| 217 | kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); | ||
| 218 | assigned_dev->ack_notifier.gsi = -1; | ||
| 219 | |||
| 220 | if (assigned_dev->irq_source_id != -1) | ||
| 221 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | ||
| 222 | assigned_dev->irq_source_id = -1; | ||
| 223 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); | ||
| 224 | } | ||
| 225 | |||
| 226 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ | ||
| 227 | static void deassign_host_irq(struct kvm *kvm, | ||
| 228 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
| 229 | { | ||
| 230 | /* | ||
| 231 | * In kvm_free_device_irq, cancel_work_sync return true if: | ||
| 232 | * 1. work is scheduled, and then cancelled. | ||
| 233 | * 2. work callback is executed. | ||
| 234 | * | ||
| 235 | * The first one ensured that the irq is disabled and no more events | ||
| 236 | * would happen. But for the second one, the irq may be enabled (e.g. | ||
| 237 | * for MSI). So we disable irq here to prevent further events. | ||
| 238 | * | ||
| 239 | * Notice this maybe result in nested disable if the interrupt type is | ||
| 240 | * INTx, but it's OK for we are going to free it. | ||
| 241 | * | ||
| 242 | * If this function is a part of VM destroy, please ensure that till | ||
| 243 | * now, the kvm state is still legal for probably we also have to wait | ||
| 244 | * interrupt_work done. | ||
| 245 | */ | ||
| 246 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | ||
| 247 | int i; | ||
| 248 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
| 249 | disable_irq_nosync(assigned_dev-> | ||
| 250 | host_msix_entries[i].vector); | ||
| 251 | |||
| 252 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
| 253 | |||
| 254 | for (i = 0; i < assigned_dev->entries_nr; i++) | ||
| 255 | free_irq(assigned_dev->host_msix_entries[i].vector, | ||
| 256 | (void *)assigned_dev); | ||
| 257 | |||
| 258 | assigned_dev->entries_nr = 0; | ||
| 259 | kfree(assigned_dev->host_msix_entries); | ||
| 260 | kfree(assigned_dev->guest_msix_entries); | ||
| 261 | pci_disable_msix(assigned_dev->dev); | ||
| 262 | } else { | ||
| 263 | /* Deal with MSI and INTx */ | ||
| 264 | disable_irq_nosync(assigned_dev->host_irq); | ||
| 265 | cancel_work_sync(&assigned_dev->interrupt_work); | ||
| 266 | |||
| 267 | free_irq(assigned_dev->host_irq, (void *)assigned_dev); | ||
| 268 | |||
| 269 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | ||
| 270 | pci_disable_msi(assigned_dev->dev); | ||
| 271 | } | ||
| 272 | |||
| 273 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); | ||
| 274 | } | ||
| 275 | |||
| 276 | static int kvm_deassign_irq(struct kvm *kvm, | ||
| 277 | struct kvm_assigned_dev_kernel *assigned_dev, | ||
| 278 | unsigned long irq_requested_type) | ||
| 279 | { | ||
| 280 | unsigned long guest_irq_type, host_irq_type; | ||
| 281 | |||
| 282 | if (!irqchip_in_kernel(kvm)) | ||
| 283 | return -EINVAL; | ||
| 284 | /* no irq assignment to deassign */ | ||
| 285 | if (!assigned_dev->irq_requested_type) | ||
| 286 | return -ENXIO; | ||
| 287 | |||
| 288 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; | ||
| 289 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; | ||
| 290 | |||
| 291 | if (host_irq_type) | ||
| 292 | deassign_host_irq(kvm, assigned_dev); | ||
| 293 | if (guest_irq_type) | ||
| 294 | deassign_guest_irq(kvm, assigned_dev); | ||
| 295 | |||
| 296 | return 0; | ||
| 297 | } | ||
| 298 | |||
| 299 | static void kvm_free_assigned_irq(struct kvm *kvm, | ||
| 300 | struct kvm_assigned_dev_kernel *assigned_dev) | ||
| 301 | { | ||
| 302 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); | ||
| 303 | } | ||
| 304 | |||
| 305 | static void kvm_free_assigned_device(struct kvm *kvm, | ||
| 306 | struct kvm_assigned_dev_kernel | ||
| 307 | *assigned_dev) | ||
| 308 | { | ||
| 309 | kvm_free_assigned_irq(kvm, assigned_dev); | ||
| 310 | |||
| 311 | pci_reset_function(assigned_dev->dev); | ||
| 312 | |||
| 313 | pci_release_regions(assigned_dev->dev); | ||
| 314 | pci_disable_device(assigned_dev->dev); | ||
| 315 | pci_dev_put(assigned_dev->dev); | ||
| 316 | |||
| 317 | list_del(&assigned_dev->list); | ||
| 318 | kfree(assigned_dev); | ||
| 319 | } | ||
| 320 | |||
| 321 | void kvm_free_all_assigned_devices(struct kvm *kvm) | ||
| 322 | { | ||
| 323 | struct list_head *ptr, *ptr2; | ||
| 324 | struct kvm_assigned_dev_kernel *assigned_dev; | ||
| 325 | |||
| 326 | list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { | ||
| 327 | assigned_dev = list_entry(ptr, | ||
| 328 | struct kvm_assigned_dev_kernel, | ||
| 329 | list); | ||
| 330 | |||
| 331 | kvm_free_assigned_device(kvm, assigned_dev); | ||
| 332 | } | ||
| 333 | } | ||
| 334 | |||
| 335 | static int assigned_device_enable_host_intx(struct kvm *kvm, | ||
| 336 | struct kvm_assigned_dev_kernel *dev) | ||
| 337 | { | ||
| 338 | dev->host_irq = dev->dev->irq; | ||
| 339 | /* Even though this is PCI, we don't want to use shared | ||
| 340 | * interrupts. Sharing host devices with guest-assigned devices | ||
| 341 | * on the same interrupt line is not a happy situation: there | ||
| 342 | * are going to be long delays in accepting, acking, etc. | ||
| 343 | */ | ||
| 344 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, | ||
| 345 | 0, "kvm_assigned_intx_device", (void *)dev)) | ||
| 346 | return -EIO; | ||
| 347 | return 0; | ||
| 348 | } | ||
| 349 | |||
| 350 | #ifdef __KVM_HAVE_MSI | ||
| 351 | static int assigned_device_enable_host_msi(struct kvm *kvm, | ||
| 352 | struct kvm_assigned_dev_kernel *dev) | ||
| 353 | { | ||
| 354 | int r; | ||
| 355 | |||
| 356 | if (!dev->dev->msi_enabled) { | ||
| 357 | r = pci_enable_msi(dev->dev); | ||
| 358 | if (r) | ||
| 359 | return r; | ||
| 360 | } | ||
| 361 | |||
| 362 | dev->host_irq = dev->dev->irq; | ||
| 363 | if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, | ||
| 364 | "kvm_assigned_msi_device", (void *)dev)) { | ||
| 365 | pci_disable_msi(dev->dev); | ||
| 366 | return -EIO; | ||
| 367 | } | ||
| 368 | |||
| 369 | return 0; | ||
| 370 | } | ||
| 371 | #endif | ||
| 372 | |||
| 373 | #ifdef __KVM_HAVE_MSIX | ||
| 374 | static int assigned_device_enable_host_msix(struct kvm *kvm, | ||
| 375 | struct kvm_assigned_dev_kernel *dev) | ||
| 376 | { | ||
| 377 | int i, r = -EINVAL; | ||
| 378 | |||
| 379 | /* host_msix_entries and guest_msix_entries should have been | ||
| 380 | * initialized */ | ||
| 381 | if (dev->entries_nr == 0) | ||
| 382 | return r; | ||
| 383 | |||
| 384 | r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); | ||
| 385 | if (r) | ||
| 386 | return r; | ||
| 387 | |||
| 388 | for (i = 0; i < dev->entries_nr; i++) { | ||
| 389 | r = request_irq(dev->host_msix_entries[i].vector, | ||
| 390 | kvm_assigned_dev_intr, 0, | ||
| 391 | "kvm_assigned_msix_device", | ||
| 392 | (void *)dev); | ||
| 393 | /* FIXME: free requested_irq's on failure */ | ||
| 394 | if (r) | ||
| 395 | return r; | ||
| 396 | } | ||
| 397 | |||
| 398 | return 0; | ||
| 399 | } | ||
| 400 | |||
| 401 | #endif | ||
| 402 | |||
| 403 | static int assigned_device_enable_guest_intx(struct kvm *kvm, | ||
| 404 | struct kvm_assigned_dev_kernel *dev, | ||
| 405 | struct kvm_assigned_irq *irq) | ||
| 406 | { | ||
| 407 | dev->guest_irq = irq->guest_irq; | ||
| 408 | dev->ack_notifier.gsi = irq->guest_irq; | ||
| 409 | return 0; | ||
| 410 | } | ||
| 411 | |||
| 412 | #ifdef __KVM_HAVE_MSI | ||
| 413 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | ||
| 414 | struct kvm_assigned_dev_kernel *dev, | ||
| 415 | struct kvm_assigned_irq *irq) | ||
| 416 | { | ||
| 417 | dev->guest_irq = irq->guest_irq; | ||
| 418 | dev->ack_notifier.gsi = -1; | ||
| 419 | dev->host_irq_disabled = false; | ||
| 420 | return 0; | ||
| 421 | } | ||
| 422 | #endif | ||
| 423 | #ifdef __KVM_HAVE_MSIX | ||
| 424 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | ||
| 425 | struct kvm_assigned_dev_kernel *dev, | ||
| 426 | struct kvm_assigned_irq *irq) | ||
| 427 | { | ||
| 428 | dev->guest_irq = irq->guest_irq; | ||
| 429 | dev->ack_notifier.gsi = -1; | ||
| 430 | dev->host_irq_disabled = false; | ||
| 431 | return 0; | ||
| 432 | } | ||
| 433 | #endif | ||
| 434 | |||
| 435 | static int assign_host_irq(struct kvm *kvm, | ||
| 436 | struct kvm_assigned_dev_kernel *dev, | ||
| 437 | __u32 host_irq_type) | ||
| 438 | { | ||
| 439 | int r = -EEXIST; | ||
| 440 | |||
| 441 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | ||
| 442 | return r; | ||
| 443 | |||
| 444 | switch (host_irq_type) { | ||
| 445 | case KVM_DEV_IRQ_HOST_INTX: | ||
| 446 | r = assigned_device_enable_host_intx(kvm, dev); | ||
| 447 | break; | ||
| 448 | #ifdef __KVM_HAVE_MSI | ||
| 449 | case KVM_DEV_IRQ_HOST_MSI: | ||
| 450 | r = assigned_device_enable_host_msi(kvm, dev); | ||
| 451 | break; | ||
| 452 | #endif | ||
| 453 | #ifdef __KVM_HAVE_MSIX | ||
| 454 | case KVM_DEV_IRQ_HOST_MSIX: | ||
| 455 | r = assigned_device_enable_host_msix(kvm, dev); | ||
| 456 | break; | ||
| 457 | #endif | ||
| 458 | default: | ||
| 459 | r = -EINVAL; | ||
| 460 | } | ||
| 461 | |||
| 462 | if (!r) | ||
| 463 | dev->irq_requested_type |= host_irq_type; | ||
| 464 | |||
| 465 | return r; | ||
| 466 | } | ||
| 467 | |||
| 468 | static int assign_guest_irq(struct kvm *kvm, | ||
| 469 | struct kvm_assigned_dev_kernel *dev, | ||
| 470 | struct kvm_assigned_irq *irq, | ||
| 471 | unsigned long guest_irq_type) | ||
| 472 | { | ||
| 473 | int id; | ||
| 474 | int r = -EEXIST; | ||
| 475 | |||
| 476 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) | ||
| 477 | return r; | ||
| 478 | |||
| 479 | id = kvm_request_irq_source_id(kvm); | ||
| 480 | if (id < 0) | ||
| 481 | return id; | ||
| 482 | |||
| 483 | dev->irq_source_id = id; | ||
| 484 | |||
| 485 | switch (guest_irq_type) { | ||
| 486 | case KVM_DEV_IRQ_GUEST_INTX: | ||
| 487 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | ||
| 488 | break; | ||
| 489 | #ifdef __KVM_HAVE_MSI | ||
| 490 | case KVM_DEV_IRQ_GUEST_MSI: | ||
| 491 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | ||
| 492 | break; | ||
| 493 | #endif | ||
| 494 | #ifdef __KVM_HAVE_MSIX | ||
| 495 | case KVM_DEV_IRQ_GUEST_MSIX: | ||
| 496 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | ||
| 497 | break; | ||
| 498 | #endif | ||
| 499 | default: | ||
| 500 | r = -EINVAL; | ||
| 501 | } | ||
| 502 | |||
| 503 | if (!r) { | ||
| 504 | dev->irq_requested_type |= guest_irq_type; | ||
| 505 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | ||
| 506 | } else | ||
| 507 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | ||
| 508 | |||
| 509 | return r; | ||
| 510 | } | ||
| 511 | |||
| 512 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | ||
| 513 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | ||
| 514 | struct kvm_assigned_irq *assigned_irq) | ||
| 515 | { | ||
| 516 | int r = -EINVAL; | ||
| 517 | struct kvm_assigned_dev_kernel *match; | ||
| 518 | unsigned long host_irq_type, guest_irq_type; | ||
| 519 | |||
| 520 | if (!capable(CAP_SYS_RAWIO)) | ||
| 521 | return -EPERM; | ||
| 522 | |||
| 523 | if (!irqchip_in_kernel(kvm)) | ||
| 524 | return r; | ||
| 525 | |||
| 526 | mutex_lock(&kvm->lock); | ||
| 527 | r = -ENODEV; | ||
| 528 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 529 | assigned_irq->assigned_dev_id); | ||
| 530 | if (!match) | ||
| 531 | goto out; | ||
| 532 | |||
| 533 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); | ||
| 534 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); | ||
| 535 | |||
| 536 | r = -EINVAL; | ||
| 537 | /* can only assign one type at a time */ | ||
| 538 | if (hweight_long(host_irq_type) > 1) | ||
| 539 | goto out; | ||
| 540 | if (hweight_long(guest_irq_type) > 1) | ||
| 541 | goto out; | ||
| 542 | if (host_irq_type == 0 && guest_irq_type == 0) | ||
| 543 | goto out; | ||
| 544 | |||
| 545 | r = 0; | ||
| 546 | if (host_irq_type) | ||
| 547 | r = assign_host_irq(kvm, match, host_irq_type); | ||
| 548 | if (r) | ||
| 549 | goto out; | ||
| 550 | |||
| 551 | if (guest_irq_type) | ||
| 552 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); | ||
| 553 | out: | ||
| 554 | mutex_unlock(&kvm->lock); | ||
| 555 | return r; | ||
| 556 | } | ||
| 557 | |||
| 558 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | ||
| 559 | struct kvm_assigned_irq | ||
| 560 | *assigned_irq) | ||
| 561 | { | ||
| 562 | int r = -ENODEV; | ||
| 563 | struct kvm_assigned_dev_kernel *match; | ||
| 564 | |||
| 565 | mutex_lock(&kvm->lock); | ||
| 566 | |||
| 567 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 568 | assigned_irq->assigned_dev_id); | ||
| 569 | if (!match) | ||
| 570 | goto out; | ||
| 571 | |||
| 572 | r = kvm_deassign_irq(kvm, match, assigned_irq->flags); | ||
| 573 | out: | ||
| 574 | mutex_unlock(&kvm->lock); | ||
| 575 | return r; | ||
| 576 | } | ||
| 577 | |||
| 578 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, | ||
| 579 | struct kvm_assigned_pci_dev *assigned_dev) | ||
| 580 | { | ||
| 581 | int r = 0; | ||
| 582 | struct kvm_assigned_dev_kernel *match; | ||
| 583 | struct pci_dev *dev; | ||
| 584 | |||
| 585 | down_read(&kvm->slots_lock); | ||
| 586 | mutex_lock(&kvm->lock); | ||
| 587 | |||
| 588 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 589 | assigned_dev->assigned_dev_id); | ||
| 590 | if (match) { | ||
| 591 | /* device already assigned */ | ||
| 592 | r = -EEXIST; | ||
| 593 | goto out; | ||
| 594 | } | ||
| 595 | |||
| 596 | match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); | ||
| 597 | if (match == NULL) { | ||
| 598 | printk(KERN_INFO "%s: Couldn't allocate memory\n", | ||
| 599 | __func__); | ||
| 600 | r = -ENOMEM; | ||
| 601 | goto out; | ||
| 602 | } | ||
| 603 | dev = pci_get_bus_and_slot(assigned_dev->busnr, | ||
| 604 | assigned_dev->devfn); | ||
| 605 | if (!dev) { | ||
| 606 | printk(KERN_INFO "%s: host device not found\n", __func__); | ||
| 607 | r = -EINVAL; | ||
| 608 | goto out_free; | ||
| 609 | } | ||
| 610 | if (pci_enable_device(dev)) { | ||
| 611 | printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); | ||
| 612 | r = -EBUSY; | ||
| 613 | goto out_put; | ||
| 614 | } | ||
| 615 | r = pci_request_regions(dev, "kvm_assigned_device"); | ||
| 616 | if (r) { | ||
| 617 | printk(KERN_INFO "%s: Could not get access to device regions\n", | ||
| 618 | __func__); | ||
| 619 | goto out_disable; | ||
| 620 | } | ||
| 621 | |||
| 622 | pci_reset_function(dev); | ||
| 623 | |||
| 624 | match->assigned_dev_id = assigned_dev->assigned_dev_id; | ||
| 625 | match->host_busnr = assigned_dev->busnr; | ||
| 626 | match->host_devfn = assigned_dev->devfn; | ||
| 627 | match->flags = assigned_dev->flags; | ||
| 628 | match->dev = dev; | ||
| 629 | spin_lock_init(&match->assigned_dev_lock); | ||
| 630 | match->irq_source_id = -1; | ||
| 631 | match->kvm = kvm; | ||
| 632 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | ||
| 633 | INIT_WORK(&match->interrupt_work, | ||
| 634 | kvm_assigned_dev_interrupt_work_handler); | ||
| 635 | |||
| 636 | list_add(&match->list, &kvm->arch.assigned_dev_head); | ||
| 637 | |||
| 638 | if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { | ||
| 639 | if (!kvm->arch.iommu_domain) { | ||
| 640 | r = kvm_iommu_map_guest(kvm); | ||
| 641 | if (r) | ||
| 642 | goto out_list_del; | ||
| 643 | } | ||
| 644 | r = kvm_assign_device(kvm, match); | ||
| 645 | if (r) | ||
| 646 | goto out_list_del; | ||
| 647 | } | ||
| 648 | |||
| 649 | out: | ||
| 650 | mutex_unlock(&kvm->lock); | ||
| 651 | up_read(&kvm->slots_lock); | ||
| 652 | return r; | ||
| 653 | out_list_del: | ||
| 654 | list_del(&match->list); | ||
| 655 | pci_release_regions(dev); | ||
| 656 | out_disable: | ||
| 657 | pci_disable_device(dev); | ||
| 658 | out_put: | ||
| 659 | pci_dev_put(dev); | ||
| 660 | out_free: | ||
| 661 | kfree(match); | ||
| 662 | mutex_unlock(&kvm->lock); | ||
| 663 | up_read(&kvm->slots_lock); | ||
| 664 | return r; | ||
| 665 | } | ||
| 666 | #endif | ||
| 667 | |||
| 668 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
| 669 | static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, | ||
| 670 | struct kvm_assigned_pci_dev *assigned_dev) | ||
| 671 | { | ||
| 672 | int r = 0; | ||
| 673 | struct kvm_assigned_dev_kernel *match; | ||
| 674 | |||
| 675 | mutex_lock(&kvm->lock); | ||
| 676 | |||
| 677 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 678 | assigned_dev->assigned_dev_id); | ||
| 679 | if (!match) { | ||
| 680 | printk(KERN_INFO "%s: device hasn't been assigned before, " | ||
| 681 | "so cannot be deassigned\n", __func__); | ||
| 682 | r = -EINVAL; | ||
| 683 | goto out; | ||
| 684 | } | ||
| 685 | |||
| 686 | if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) | ||
| 687 | kvm_deassign_device(kvm, match); | ||
| 688 | |||
| 689 | kvm_free_assigned_device(kvm, match); | ||
| 690 | |||
| 691 | out: | ||
| 692 | mutex_unlock(&kvm->lock); | ||
| 693 | return r; | ||
| 694 | } | ||
| 695 | #endif | ||
| 696 | |||
| 697 | inline int kvm_is_mmio_pfn(pfn_t pfn) | 92 | inline int kvm_is_mmio_pfn(pfn_t pfn) |
| 698 | { | 93 | { |
| 699 | if (pfn_valid(pfn)) { | 94 | if (pfn_valid(pfn)) { |
| @@ -949,6 +344,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { | |||
| 949 | 344 | ||
| 950 | static struct kvm *kvm_create_vm(void) | 345 | static struct kvm *kvm_create_vm(void) |
| 951 | { | 346 | { |
| 347 | int r = 0; | ||
| 952 | struct kvm *kvm = kvm_arch_create_vm(); | 348 | struct kvm *kvm = kvm_arch_create_vm(); |
| 953 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 349 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
| 954 | struct page *page; | 350 | struct page *page; |
| @@ -956,16 +352,21 @@ static struct kvm *kvm_create_vm(void) | |||
| 956 | 352 | ||
| 957 | if (IS_ERR(kvm)) | 353 | if (IS_ERR(kvm)) |
| 958 | goto out; | 354 | goto out; |
| 355 | |||
| 356 | r = hardware_enable_all(); | ||
| 357 | if (r) | ||
| 358 | goto out_err_nodisable; | ||
| 359 | |||
| 959 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 360 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
| 960 | INIT_LIST_HEAD(&kvm->irq_routing); | ||
| 961 | INIT_HLIST_HEAD(&kvm->mask_notifier_list); | 361 | INIT_HLIST_HEAD(&kvm->mask_notifier_list); |
| 362 | INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); | ||
| 962 | #endif | 363 | #endif |
| 963 | 364 | ||
| 964 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 365 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
| 965 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); | 366 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
| 966 | if (!page) { | 367 | if (!page) { |
| 967 | kfree(kvm); | 368 | r = -ENOMEM; |
| 968 | return ERR_PTR(-ENOMEM); | 369 | goto out_err; |
| 969 | } | 370 | } |
| 970 | kvm->coalesced_mmio_ring = | 371 | kvm->coalesced_mmio_ring = |
| 971 | (struct kvm_coalesced_mmio_ring *)page_address(page); | 372 | (struct kvm_coalesced_mmio_ring *)page_address(page); |
| @@ -973,15 +374,13 @@ static struct kvm *kvm_create_vm(void) | |||
| 973 | 374 | ||
| 974 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) | 375 | #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) |
| 975 | { | 376 | { |
| 976 | int err; | ||
| 977 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; | 377 | kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; |
| 978 | err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); | 378 | r = mmu_notifier_register(&kvm->mmu_notifier, current->mm); |
| 979 | if (err) { | 379 | if (r) { |
| 980 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET | 380 | #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET |
| 981 | put_page(page); | 381 | put_page(page); |
| 982 | #endif | 382 | #endif |
| 983 | kfree(kvm); | 383 | goto out_err; |
| 984 | return ERR_PTR(err); | ||
| 985 | } | 384 | } |
| 986 | } | 385 | } |
| 987 | #endif | 386 | #endif |
| @@ -1005,6 +404,12 @@ static struct kvm *kvm_create_vm(void) | |||
| 1005 | #endif | 404 | #endif |
| 1006 | out: | 405 | out: |
| 1007 | return kvm; | 406 | return kvm; |
| 407 | |||
| 408 | out_err: | ||
| 409 | hardware_disable_all(); | ||
| 410 | out_err_nodisable: | ||
| 411 | kfree(kvm); | ||
| 412 | return ERR_PTR(r); | ||
| 1008 | } | 413 | } |
| 1009 | 414 | ||
| 1010 | /* | 415 | /* |
| @@ -1063,6 +468,7 @@ static void kvm_destroy_vm(struct kvm *kvm) | |||
| 1063 | kvm_arch_flush_shadow(kvm); | 468 | kvm_arch_flush_shadow(kvm); |
| 1064 | #endif | 469 | #endif |
| 1065 | kvm_arch_destroy_vm(kvm); | 470 | kvm_arch_destroy_vm(kvm); |
| 471 | hardware_disable_all(); | ||
| 1066 | mmdrop(mm); | 472 | mmdrop(mm); |
| 1067 | } | 473 | } |
| 1068 | 474 | ||
| @@ -1689,9 +1095,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |||
| 1689 | if (signal_pending(current)) | 1095 | if (signal_pending(current)) |
| 1690 | break; | 1096 | break; |
| 1691 | 1097 | ||
| 1692 | vcpu_put(vcpu); | ||
| 1693 | schedule(); | 1098 | schedule(); |
| 1694 | vcpu_load(vcpu); | ||
| 1695 | } | 1099 | } |
| 1696 | 1100 | ||
| 1697 | finish_wait(&vcpu->wq, &wait); | 1101 | finish_wait(&vcpu->wq, &wait); |
| @@ -1705,6 +1109,21 @@ void kvm_resched(struct kvm_vcpu *vcpu) | |||
| 1705 | } | 1109 | } |
| 1706 | EXPORT_SYMBOL_GPL(kvm_resched); | 1110 | EXPORT_SYMBOL_GPL(kvm_resched); |
| 1707 | 1111 | ||
| 1112 | void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu) | ||
| 1113 | { | ||
| 1114 | ktime_t expires; | ||
| 1115 | DEFINE_WAIT(wait); | ||
| 1116 | |||
| 1117 | prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | ||
| 1118 | |||
| 1119 | /* Sleep for 100 us, and hope lock-holder got scheduled */ | ||
| 1120 | expires = ktime_add_ns(ktime_get(), 100000UL); | ||
| 1121 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | ||
| 1122 | |||
| 1123 | finish_wait(&vcpu->wq, &wait); | ||
| 1124 | } | ||
| 1125 | EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin); | ||
| 1126 | |||
| 1708 | static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1127 | static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
| 1709 | { | 1128 | { |
| 1710 | struct kvm_vcpu *vcpu = vma->vm_file->private_data; | 1129 | struct kvm_vcpu *vcpu = vma->vm_file->private_data; |
| @@ -1828,88 +1247,6 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) | |||
| 1828 | return 0; | 1247 | return 0; |
| 1829 | } | 1248 | } |
| 1830 | 1249 | ||
| 1831 | #ifdef __KVM_HAVE_MSIX | ||
| 1832 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | ||
| 1833 | struct kvm_assigned_msix_nr *entry_nr) | ||
| 1834 | { | ||
| 1835 | int r = 0; | ||
| 1836 | struct kvm_assigned_dev_kernel *adev; | ||
| 1837 | |||
| 1838 | mutex_lock(&kvm->lock); | ||
| 1839 | |||
| 1840 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 1841 | entry_nr->assigned_dev_id); | ||
| 1842 | if (!adev) { | ||
| 1843 | r = -EINVAL; | ||
| 1844 | goto msix_nr_out; | ||
| 1845 | } | ||
| 1846 | |||
| 1847 | if (adev->entries_nr == 0) { | ||
| 1848 | adev->entries_nr = entry_nr->entry_nr; | ||
| 1849 | if (adev->entries_nr == 0 || | ||
| 1850 | adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { | ||
| 1851 | r = -EINVAL; | ||
| 1852 | goto msix_nr_out; | ||
| 1853 | } | ||
| 1854 | |||
| 1855 | adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * | ||
| 1856 | entry_nr->entry_nr, | ||
| 1857 | GFP_KERNEL); | ||
| 1858 | if (!adev->host_msix_entries) { | ||
| 1859 | r = -ENOMEM; | ||
| 1860 | goto msix_nr_out; | ||
| 1861 | } | ||
| 1862 | adev->guest_msix_entries = kzalloc( | ||
| 1863 | sizeof(struct kvm_guest_msix_entry) * | ||
| 1864 | entry_nr->entry_nr, GFP_KERNEL); | ||
| 1865 | if (!adev->guest_msix_entries) { | ||
| 1866 | kfree(adev->host_msix_entries); | ||
| 1867 | r = -ENOMEM; | ||
| 1868 | goto msix_nr_out; | ||
| 1869 | } | ||
| 1870 | } else /* Not allowed set MSI-X number twice */ | ||
| 1871 | r = -EINVAL; | ||
| 1872 | msix_nr_out: | ||
| 1873 | mutex_unlock(&kvm->lock); | ||
| 1874 | return r; | ||
| 1875 | } | ||
| 1876 | |||
| 1877 | static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, | ||
| 1878 | struct kvm_assigned_msix_entry *entry) | ||
| 1879 | { | ||
| 1880 | int r = 0, i; | ||
| 1881 | struct kvm_assigned_dev_kernel *adev; | ||
| 1882 | |||
| 1883 | mutex_lock(&kvm->lock); | ||
| 1884 | |||
| 1885 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | ||
| 1886 | entry->assigned_dev_id); | ||
| 1887 | |||
| 1888 | if (!adev) { | ||
| 1889 | r = -EINVAL; | ||
| 1890 | goto msix_entry_out; | ||
| 1891 | } | ||
| 1892 | |||
| 1893 | for (i = 0; i < adev->entries_nr; i++) | ||
| 1894 | if (adev->guest_msix_entries[i].vector == 0 || | ||
| 1895 | adev->guest_msix_entries[i].entry == entry->entry) { | ||
| 1896 | adev->guest_msix_entries[i].entry = entry->entry; | ||
| 1897 | adev->guest_msix_entries[i].vector = entry->gsi; | ||
| 1898 | adev->host_msix_entries[i].entry = entry->entry; | ||
| 1899 | break; | ||
| 1900 | } | ||
| 1901 | if (i == adev->entries_nr) { | ||
| 1902 | r = -ENOSPC; | ||
| 1903 | goto msix_entry_out; | ||
| 1904 | } | ||
| 1905 | |||
| 1906 | msix_entry_out: | ||
| 1907 | mutex_unlock(&kvm->lock); | ||
| 1908 | |||
| 1909 | return r; | ||
| 1910 | } | ||
| 1911 | #endif | ||
| 1912 | |||
| 1913 | static long kvm_vcpu_ioctl(struct file *filp, | 1250 | static long kvm_vcpu_ioctl(struct file *filp, |
| 1914 | unsigned int ioctl, unsigned long arg) | 1251 | unsigned int ioctl, unsigned long arg) |
| 1915 | { | 1252 | { |
| @@ -2168,112 +1505,6 @@ static long kvm_vm_ioctl(struct file *filp, | |||
| 2168 | break; | 1505 | break; |
| 2169 | } | 1506 | } |
| 2170 | #endif | 1507 | #endif |
| 2171 | #ifdef KVM_CAP_DEVICE_ASSIGNMENT | ||
| 2172 | case KVM_ASSIGN_PCI_DEVICE: { | ||
| 2173 | struct kvm_assigned_pci_dev assigned_dev; | ||
| 2174 | |||
| 2175 | r = -EFAULT; | ||
| 2176 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
| 2177 | goto out; | ||
| 2178 | r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); | ||
| 2179 | if (r) | ||
| 2180 | goto out; | ||
| 2181 | break; | ||
| 2182 | } | ||
| 2183 | case KVM_ASSIGN_IRQ: { | ||
| 2184 | r = -EOPNOTSUPP; | ||
| 2185 | break; | ||
| 2186 | } | ||
| 2187 | #ifdef KVM_CAP_ASSIGN_DEV_IRQ | ||
| 2188 | case KVM_ASSIGN_DEV_IRQ: { | ||
| 2189 | struct kvm_assigned_irq assigned_irq; | ||
| 2190 | |||
| 2191 | r = -EFAULT; | ||
| 2192 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
| 2193 | goto out; | ||
| 2194 | r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); | ||
| 2195 | if (r) | ||
| 2196 | goto out; | ||
| 2197 | break; | ||
| 2198 | } | ||
| 2199 | case KVM_DEASSIGN_DEV_IRQ: { | ||
| 2200 | struct kvm_assigned_irq assigned_irq; | ||
| 2201 | |||
| 2202 | r = -EFAULT; | ||
| 2203 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | ||
| 2204 | goto out; | ||
| 2205 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); | ||
| 2206 | if (r) | ||
| 2207 | goto out; | ||
| 2208 | break; | ||
| 2209 | } | ||
| 2210 | #endif | ||
| 2211 | #endif | ||
| 2212 | #ifdef KVM_CAP_DEVICE_DEASSIGNMENT | ||
| 2213 | case KVM_DEASSIGN_PCI_DEVICE: { | ||
| 2214 | struct kvm_assigned_pci_dev assigned_dev; | ||
| 2215 | |||
| 2216 | r = -EFAULT; | ||
| 2217 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | ||
| 2218 | goto out; | ||
| 2219 | r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); | ||
| 2220 | if (r) | ||
| 2221 | goto out; | ||
| 2222 | break; | ||
| 2223 | } | ||
| 2224 | #endif | ||
| 2225 | #ifdef KVM_CAP_IRQ_ROUTING | ||
| 2226 | case KVM_SET_GSI_ROUTING: { | ||
| 2227 | struct kvm_irq_routing routing; | ||
| 2228 | struct kvm_irq_routing __user *urouting; | ||
| 2229 | struct kvm_irq_routing_entry *entries; | ||
| 2230 | |||
| 2231 | r = -EFAULT; | ||
| 2232 | if (copy_from_user(&routing, argp, sizeof(routing))) | ||
| 2233 | goto out; | ||
| 2234 | r = -EINVAL; | ||
| 2235 | if (routing.nr >= KVM_MAX_IRQ_ROUTES) | ||
| 2236 | goto out; | ||
| 2237 | if (routing.flags) | ||
| 2238 | goto out; | ||
| 2239 | r = -ENOMEM; | ||
| 2240 | entries = vmalloc(routing.nr * sizeof(*entries)); | ||
| 2241 | if (!entries) | ||
| 2242 | goto out; | ||
| 2243 | r = -EFAULT; | ||
| 2244 | urouting = argp; | ||
| 2245 | if (copy_from_user(entries, urouting->entries, | ||
| 2246 | routing.nr * sizeof(*entries))) | ||
| 2247 | goto out_free_irq_routing; | ||
| 2248 | r = kvm_set_irq_routing(kvm, entries, routing.nr, | ||
| 2249 | routing.flags); | ||
| 2250 | out_free_irq_routing: | ||
| 2251 | vfree(entries); | ||
| 2252 | break; | ||
| 2253 | } | ||
| 2254 | #endif /* KVM_CAP_IRQ_ROUTING */ | ||
| 2255 | #ifdef __KVM_HAVE_MSIX | ||
| 2256 | case KVM_ASSIGN_SET_MSIX_NR: { | ||
| 2257 | struct kvm_assigned_msix_nr entry_nr; | ||
| 2258 | r = -EFAULT; | ||
| 2259 | if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) | ||
| 2260 | goto out; | ||
| 2261 | r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); | ||
| 2262 | if (r) | ||
| 2263 | goto out; | ||
| 2264 | break; | ||
| 2265 | } | ||
| 2266 | case KVM_ASSIGN_SET_MSIX_ENTRY: { | ||
| 2267 | struct kvm_assigned_msix_entry entry; | ||
| 2268 | r = -EFAULT; | ||
| 2269 | if (copy_from_user(&entry, argp, sizeof entry)) | ||
| 2270 | goto out; | ||
| 2271 | r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); | ||
| 2272 | if (r) | ||
| 2273 | goto out; | ||
| 2274 | break; | ||
| 2275 | } | ||
| 2276 | #endif | ||
| 2277 | case KVM_IRQFD: { | 1508 | case KVM_IRQFD: { |
| 2278 | struct kvm_irqfd data; | 1509 | struct kvm_irqfd data; |
| 2279 | 1510 | ||
| @@ -2305,11 +1536,59 @@ static long kvm_vm_ioctl(struct file *filp, | |||
| 2305 | #endif | 1536 | #endif |
| 2306 | default: | 1537 | default: |
| 2307 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); | 1538 | r = kvm_arch_vm_ioctl(filp, ioctl, arg); |
| 1539 | if (r == -ENOTTY) | ||
| 1540 | r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); | ||
| 2308 | } | 1541 | } |
| 2309 | out: | 1542 | out: |
| 2310 | return r; | 1543 | return r; |
| 2311 | } | 1544 | } |
| 2312 | 1545 | ||
| 1546 | #ifdef CONFIG_COMPAT | ||
| 1547 | struct compat_kvm_dirty_log { | ||
| 1548 | __u32 slot; | ||
| 1549 | __u32 padding1; | ||
| 1550 | union { | ||
| 1551 | compat_uptr_t dirty_bitmap; /* one bit per page */ | ||
| 1552 | __u64 padding2; | ||
| 1553 | }; | ||
| 1554 | }; | ||
| 1555 | |||
| 1556 | static long kvm_vm_compat_ioctl(struct file *filp, | ||
| 1557 | unsigned int ioctl, unsigned long arg) | ||
| 1558 | { | ||
| 1559 | struct kvm *kvm = filp->private_data; | ||
| 1560 | int r; | ||
| 1561 | |||
| 1562 | if (kvm->mm != current->mm) | ||
| 1563 | return -EIO; | ||
| 1564 | switch (ioctl) { | ||
| 1565 | case KVM_GET_DIRTY_LOG: { | ||
| 1566 | struct compat_kvm_dirty_log compat_log; | ||
| 1567 | struct kvm_dirty_log log; | ||
| 1568 | |||
| 1569 | r = -EFAULT; | ||
| 1570 | if (copy_from_user(&compat_log, (void __user *)arg, | ||
| 1571 | sizeof(compat_log))) | ||
| 1572 | goto out; | ||
| 1573 | log.slot = compat_log.slot; | ||
| 1574 | log.padding1 = compat_log.padding1; | ||
| 1575 | log.padding2 = compat_log.padding2; | ||
| 1576 | log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); | ||
| 1577 | |||
| 1578 | r = kvm_vm_ioctl_get_dirty_log(kvm, &log); | ||
| 1579 | if (r) | ||
| 1580 | goto out; | ||
| 1581 | break; | ||
| 1582 | } | ||
| 1583 | default: | ||
| 1584 | r = kvm_vm_ioctl(filp, ioctl, arg); | ||
| 1585 | } | ||
| 1586 | |||
| 1587 | out: | ||
| 1588 | return r; | ||
| 1589 | } | ||
| 1590 | #endif | ||
| 1591 | |||
| 2313 | static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 1592 | static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
| 2314 | { | 1593 | { |
| 2315 | struct page *page[1]; | 1594 | struct page *page[1]; |
| @@ -2344,7 +1623,9 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 2344 | static struct file_operations kvm_vm_fops = { | 1623 | static struct file_operations kvm_vm_fops = { |
| 2345 | .release = kvm_vm_release, | 1624 | .release = kvm_vm_release, |
| 2346 | .unlocked_ioctl = kvm_vm_ioctl, | 1625 | .unlocked_ioctl = kvm_vm_ioctl, |
| 2347 | .compat_ioctl = kvm_vm_ioctl, | 1626 | #ifdef CONFIG_COMPAT |
| 1627 | .compat_ioctl = kvm_vm_compat_ioctl, | ||
| 1628 | #endif | ||
| 2348 | .mmap = kvm_vm_mmap, | 1629 | .mmap = kvm_vm_mmap, |
| 2349 | }; | 1630 | }; |
| 2350 | 1631 | ||
| @@ -2372,6 +1653,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) | |||
| 2372 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE | 1653 | #ifdef CONFIG_KVM_APIC_ARCHITECTURE |
| 2373 | case KVM_CAP_SET_BOOT_CPU_ID: | 1654 | case KVM_CAP_SET_BOOT_CPU_ID: |
| 2374 | #endif | 1655 | #endif |
| 1656 | case KVM_CAP_INTERNAL_ERROR_DATA: | ||
| 2375 | return 1; | 1657 | return 1; |
| 2376 | #ifdef CONFIG_HAVE_KVM_IRQCHIP | 1658 | #ifdef CONFIG_HAVE_KVM_IRQCHIP |
| 2377 | case KVM_CAP_IRQ_ROUTING: | 1659 | case KVM_CAP_IRQ_ROUTING: |
| @@ -2442,11 +1724,21 @@ static struct miscdevice kvm_dev = { | |||
| 2442 | static void hardware_enable(void *junk) | 1724 | static void hardware_enable(void *junk) |
| 2443 | { | 1725 | { |
| 2444 | int cpu = raw_smp_processor_id(); | 1726 | int cpu = raw_smp_processor_id(); |
| 1727 | int r; | ||
| 2445 | 1728 | ||
| 2446 | if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) | 1729 | if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) |
| 2447 | return; | 1730 | return; |
| 1731 | |||
| 2448 | cpumask_set_cpu(cpu, cpus_hardware_enabled); | 1732 | cpumask_set_cpu(cpu, cpus_hardware_enabled); |
| 2449 | kvm_arch_hardware_enable(NULL); | 1733 | |
| 1734 | r = kvm_arch_hardware_enable(NULL); | ||
| 1735 | |||
| 1736 | if (r) { | ||
| 1737 | cpumask_clear_cpu(cpu, cpus_hardware_enabled); | ||
| 1738 | atomic_inc(&hardware_enable_failed); | ||
| 1739 | printk(KERN_INFO "kvm: enabling virtualization on " | ||
| 1740 | "CPU%d failed\n", cpu); | ||
| 1741 | } | ||
| 2450 | } | 1742 | } |
| 2451 | 1743 | ||
| 2452 | static void hardware_disable(void *junk) | 1744 | static void hardware_disable(void *junk) |
| @@ -2459,11 +1751,52 @@ static void hardware_disable(void *junk) | |||
| 2459 | kvm_arch_hardware_disable(NULL); | 1751 | kvm_arch_hardware_disable(NULL); |
| 2460 | } | 1752 | } |
| 2461 | 1753 | ||
| 1754 | static void hardware_disable_all_nolock(void) | ||
| 1755 | { | ||
| 1756 | BUG_ON(!kvm_usage_count); | ||
| 1757 | |||
| 1758 | kvm_usage_count--; | ||
| 1759 | if (!kvm_usage_count) | ||
| 1760 | on_each_cpu(hardware_disable, NULL, 1); | ||
| 1761 | } | ||
| 1762 | |||
| 1763 | static void hardware_disable_all(void) | ||
| 1764 | { | ||
| 1765 | spin_lock(&kvm_lock); | ||
| 1766 | hardware_disable_all_nolock(); | ||
| 1767 | spin_unlock(&kvm_lock); | ||
| 1768 | } | ||
| 1769 | |||
| 1770 | static int hardware_enable_all(void) | ||
| 1771 | { | ||
| 1772 | int r = 0; | ||
| 1773 | |||
| 1774 | spin_lock(&kvm_lock); | ||
| 1775 | |||
| 1776 | kvm_usage_count++; | ||
| 1777 | if (kvm_usage_count == 1) { | ||
| 1778 | atomic_set(&hardware_enable_failed, 0); | ||
| 1779 | on_each_cpu(hardware_enable, NULL, 1); | ||
| 1780 | |||
| 1781 | if (atomic_read(&hardware_enable_failed)) { | ||
| 1782 | hardware_disable_all_nolock(); | ||
| 1783 | r = -EBUSY; | ||
| 1784 | } | ||
| 1785 | } | ||
| 1786 | |||
| 1787 | spin_unlock(&kvm_lock); | ||
| 1788 | |||
| 1789 | return r; | ||
| 1790 | } | ||
| 1791 | |||
| 2462 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, | 1792 | static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, |
| 2463 | void *v) | 1793 | void *v) |
| 2464 | { | 1794 | { |
| 2465 | int cpu = (long)v; | 1795 | int cpu = (long)v; |
| 2466 | 1796 | ||
| 1797 | if (!kvm_usage_count) | ||
| 1798 | return NOTIFY_OK; | ||
| 1799 | |||
| 2467 | val &= ~CPU_TASKS_FROZEN; | 1800 | val &= ~CPU_TASKS_FROZEN; |
| 2468 | switch (val) { | 1801 | switch (val) { |
| 2469 | case CPU_DYING: | 1802 | case CPU_DYING: |
| @@ -2666,13 +1999,15 @@ static void kvm_exit_debug(void) | |||
| 2666 | 1999 | ||
| 2667 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) | 2000 | static int kvm_suspend(struct sys_device *dev, pm_message_t state) |
| 2668 | { | 2001 | { |
| 2669 | hardware_disable(NULL); | 2002 | if (kvm_usage_count) |
| 2003 | hardware_disable(NULL); | ||
| 2670 | return 0; | 2004 | return 0; |
| 2671 | } | 2005 | } |
| 2672 | 2006 | ||
| 2673 | static int kvm_resume(struct sys_device *dev) | 2007 | static int kvm_resume(struct sys_device *dev) |
| 2674 | { | 2008 | { |
| 2675 | hardware_enable(NULL); | 2009 | if (kvm_usage_count) |
| 2010 | hardware_enable(NULL); | ||
| 2676 | return 0; | 2011 | return 0; |
| 2677 | } | 2012 | } |
| 2678 | 2013 | ||
| @@ -2747,7 +2082,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, | |||
| 2747 | goto out_free_1; | 2082 | goto out_free_1; |
| 2748 | } | 2083 | } |
| 2749 | 2084 | ||
| 2750 | on_each_cpu(hardware_enable, NULL, 1); | ||
| 2751 | r = register_cpu_notifier(&kvm_cpu_notifier); | 2085 | r = register_cpu_notifier(&kvm_cpu_notifier); |
| 2752 | if (r) | 2086 | if (r) |
| 2753 | goto out_free_2; | 2087 | goto out_free_2; |
| @@ -2797,7 +2131,6 @@ out_free_3: | |||
| 2797 | unregister_reboot_notifier(&kvm_reboot_notifier); | 2131 | unregister_reboot_notifier(&kvm_reboot_notifier); |
| 2798 | unregister_cpu_notifier(&kvm_cpu_notifier); | 2132 | unregister_cpu_notifier(&kvm_cpu_notifier); |
| 2799 | out_free_2: | 2133 | out_free_2: |
| 2800 | on_each_cpu(hardware_disable, NULL, 1); | ||
| 2801 | out_free_1: | 2134 | out_free_1: |
| 2802 | kvm_arch_hardware_unsetup(); | 2135 | kvm_arch_hardware_unsetup(); |
| 2803 | out_free_0a: | 2136 | out_free_0a: |
