diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-15 18:43:55 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-15 18:43:55 -0400 |
commit | 9db59599ae502b38b27cff6462273f84acd59927 (patch) | |
tree | 96d90a2f7bcddc837987579ad2d3e58b891db716 | |
parent | b38923a068c10fc36ca8f596d650d095ce390b85 (diff) | |
parent | 4f350c6dbcb9000e18907515ec8a7b205ac33c69 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more KVM updates from Paolo Bonzini:
- PPC bugfixes
- RCU splat fix
- swait races fix
- pointless userspace-triggerable BUG() fix
- misc fixes for KVM_RUN corner cases
- nested virt correctness fixes + one host DoS
- some cleanups
- clang build fix
- fix AMD AVIC with default QEMU command line options
- x86 bugfixes
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits)
kvm: nVMX: Handle deferred early VMLAUNCH/VMRESUME failure properly
kvm: vmx: Handle VMLAUNCH/VMRESUME failure properly
kvm: nVMX: Remove nested_vmx_succeed after successful VM-entry
kvm,mips: Fix potential swait_active() races
kvm,powerpc: Serialize wq active checks in ops->vcpu_kick
kvm: Serialize wq active checks in kvm_vcpu_wake_up()
kvm,x86: Fix apf_task_wake_one() wq serialization
kvm,lapic: Justify use of swait_active()
kvm,async_pf: Use swq_has_sleeper()
sched/wait: Add swq_has_sleeper()
KVM: VMX: Do not BUG() on out-of-bounds guest IRQ
KVM: Don't accept obviously wrong gsi values via KVM_IRQFD
kvm: nVMX: Don't allow L2 to access the hardware CR8
KVM: trace events: update list of exit reasons
KVM: async_pf: Fix #DF due to inject "Page not Present" and "Page Ready" exceptions simultaneously
KVM: X86: Don't block vCPU if there is pending exception
KVM: SVM: Add irqchip_split() checks before enabling AVIC
KVM: Add struct kvm_vcpu pointer parameter to get_enable_apicv()
KVM: SVM: Refactor AVIC vcpu initialization into avic_init_vcpu()
KVM: x86: fix clang build
...
-rw-r--r-- | arch/mips/kvm/mips.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_xive.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 17 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive_template.c | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/kvm.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 38 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 162 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 51 | ||||
-rw-r--r-- | include/linux/swait.h | 58 | ||||
-rw-r--r-- | include/trace/events/kvm.h | 4 | ||||
-rw-r--r-- | virt/kvm/async_pf.c | 6 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 2 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 3 |
18 files changed, 257 insertions, 111 deletions
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index bce2a6431430..d535edc01434 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c | |||
@@ -514,7 +514,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | |||
514 | 514 | ||
515 | dvcpu->arch.wait = 0; | 515 | dvcpu->arch.wait = 0; |
516 | 516 | ||
517 | if (swait_active(&dvcpu->wq)) | 517 | if (swq_has_sleeper(&dvcpu->wq)) |
518 | swake_up(&dvcpu->wq); | 518 | swake_up(&dvcpu->wq); |
519 | 519 | ||
520 | return 0; | 520 | return 0; |
@@ -1179,7 +1179,7 @@ static void kvm_mips_comparecount_func(unsigned long data) | |||
1179 | kvm_mips_callbacks->queue_timer_int(vcpu); | 1179 | kvm_mips_callbacks->queue_timer_int(vcpu); |
1180 | 1180 | ||
1181 | vcpu->arch.wait = 0; | 1181 | vcpu->arch.wait = 0; |
1182 | if (swait_active(&vcpu->wq)) | 1182 | if (swq_has_sleeper(&vcpu->wq)) |
1183 | swake_up(&vcpu->wq); | 1183 | swake_up(&vcpu->wq); |
1184 | } | 1184 | } |
1185 | 1185 | ||
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 18e974a34fce..73bf1ebfa78f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -181,7 +181,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) | |||
181 | struct swait_queue_head *wqp; | 181 | struct swait_queue_head *wqp; |
182 | 182 | ||
183 | wqp = kvm_arch_vcpu_wq(vcpu); | 183 | wqp = kvm_arch_vcpu_wq(vcpu); |
184 | if (swait_active(wqp)) { | 184 | if (swq_has_sleeper(wqp)) { |
185 | swake_up(wqp); | 185 | swake_up(wqp); |
186 | ++vcpu->stat.halt_wakeup; | 186 | ++vcpu->stat.halt_wakeup; |
187 | } | 187 | } |
@@ -4212,11 +4212,13 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg) | |||
4212 | if ((cfg->process_table & PRTS_MASK) > 24) | 4212 | if ((cfg->process_table & PRTS_MASK) > 24) |
4213 | return -EINVAL; | 4213 | return -EINVAL; |
4214 | 4214 | ||
4215 | mutex_lock(&kvm->lock); | ||
4215 | kvm->arch.process_table = cfg->process_table; | 4216 | kvm->arch.process_table = cfg->process_table; |
4216 | kvmppc_setup_partition_table(kvm); | 4217 | kvmppc_setup_partition_table(kvm); |
4217 | 4218 | ||
4218 | lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0; | 4219 | lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0; |
4219 | kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE); | 4220 | kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE); |
4221 | mutex_unlock(&kvm->lock); | ||
4220 | 4222 | ||
4221 | return 0; | 4223 | return 0; |
4222 | } | 4224 | } |
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xive.c b/arch/powerpc/kvm/book3s_hv_rm_xive.c index abf5f01b6eb1..5b81a807d742 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xive.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xive.c | |||
@@ -38,7 +38,6 @@ static inline void __iomem *get_tima_phys(void) | |||
38 | #define __x_tima get_tima_phys() | 38 | #define __x_tima get_tima_phys() |
39 | #define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_page)) | 39 | #define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_page)) |
40 | #define __x_trig_page(xd) ((void __iomem *)((xd)->trig_page)) | 40 | #define __x_trig_page(xd) ((void __iomem *)((xd)->trig_page)) |
41 | #define __x_readb __raw_rm_readb | ||
42 | #define __x_writeb __raw_rm_writeb | 41 | #define __x_writeb __raw_rm_writeb |
43 | #define __x_readw __raw_rm_readw | 42 | #define __x_readw __raw_rm_readw |
44 | #define __x_readq __raw_rm_readq | 43 | #define __x_readq __raw_rm_readq |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 663a4a861e7f..17936f82d3c7 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -771,6 +771,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) | |||
771 | 771 | ||
772 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM | 772 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
773 | BEGIN_FTR_SECTION | 773 | BEGIN_FTR_SECTION |
774 | /* | ||
775 | * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR | ||
776 | */ | ||
774 | bl kvmppc_restore_tm | 777 | bl kvmppc_restore_tm |
775 | END_FTR_SECTION_IFSET(CPU_FTR_TM) | 778 | END_FTR_SECTION_IFSET(CPU_FTR_TM) |
776 | #endif | 779 | #endif |
@@ -1630,6 +1633,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) | |||
1630 | 1633 | ||
1631 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM | 1634 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
1632 | BEGIN_FTR_SECTION | 1635 | BEGIN_FTR_SECTION |
1636 | /* | ||
1637 | * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR | ||
1638 | */ | ||
1633 | bl kvmppc_save_tm | 1639 | bl kvmppc_save_tm |
1634 | END_FTR_SECTION_IFSET(CPU_FTR_TM) | 1640 | END_FTR_SECTION_IFSET(CPU_FTR_TM) |
1635 | #endif | 1641 | #endif |
@@ -1749,7 +1755,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | |||
1749 | /* | 1755 | /* |
1750 | * Are we running hash or radix ? | 1756 | * Are we running hash or radix ? |
1751 | */ | 1757 | */ |
1752 | beq cr2,3f | 1758 | ld r5, VCPU_KVM(r9) |
1759 | lbz r0, KVM_RADIX(r5) | ||
1760 | cmpwi cr2, r0, 0 | ||
1761 | beq cr2, 3f | ||
1753 | 1762 | ||
1754 | /* Radix: Handle the case where the guest used an illegal PID */ | 1763 | /* Radix: Handle the case where the guest used an illegal PID */ |
1755 | LOAD_REG_ADDR(r4, mmu_base_pid) | 1764 | LOAD_REG_ADDR(r4, mmu_base_pid) |
@@ -2466,6 +2475,9 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */ | |||
2466 | 2475 | ||
2467 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM | 2476 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
2468 | BEGIN_FTR_SECTION | 2477 | BEGIN_FTR_SECTION |
2478 | /* | ||
2479 | * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR | ||
2480 | */ | ||
2469 | ld r9, HSTATE_KVM_VCPU(r13) | 2481 | ld r9, HSTATE_KVM_VCPU(r13) |
2470 | bl kvmppc_save_tm | 2482 | bl kvmppc_save_tm |
2471 | END_FTR_SECTION_IFSET(CPU_FTR_TM) | 2483 | END_FTR_SECTION_IFSET(CPU_FTR_TM) |
@@ -2578,6 +2590,9 @@ kvm_end_cede: | |||
2578 | 2590 | ||
2579 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM | 2591 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
2580 | BEGIN_FTR_SECTION | 2592 | BEGIN_FTR_SECTION |
2593 | /* | ||
2594 | * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR | ||
2595 | */ | ||
2581 | bl kvmppc_restore_tm | 2596 | bl kvmppc_restore_tm |
2582 | END_FTR_SECTION_IFSET(CPU_FTR_TM) | 2597 | END_FTR_SECTION_IFSET(CPU_FTR_TM) |
2583 | #endif | 2598 | #endif |
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 08b200a0bbce..13304622ab1c 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c | |||
@@ -48,7 +48,6 @@ | |||
48 | #define __x_tima xive_tima | 48 | #define __x_tima xive_tima |
49 | #define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio)) | 49 | #define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio)) |
50 | #define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio)) | 50 | #define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio)) |
51 | #define __x_readb __raw_readb | ||
52 | #define __x_writeb __raw_writeb | 51 | #define __x_writeb __raw_writeb |
53 | #define __x_readw __raw_readw | 52 | #define __x_readw __raw_readw |
54 | #define __x_readq __raw_readq | 53 | #define __x_readq __raw_readq |
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index d1ed2c41b5d2..c7a5deadd1cc 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c | |||
@@ -28,7 +28,8 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) | |||
28 | * bit. | 28 | * bit. |
29 | */ | 29 | */ |
30 | if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { | 30 | if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { |
31 | u8 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR); | 31 | __be64 qw1 = __x_readq(__x_tima + TM_QW1_OS); |
32 | u8 pipr = be64_to_cpu(qw1) & 0xff; | ||
32 | if (pipr >= xc->hw_cppr) | 33 | if (pipr >= xc->hw_cppr) |
33 | return; | 34 | return; |
34 | } | 35 | } |
@@ -336,7 +337,6 @@ X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long | |||
336 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | 337 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
337 | u8 pending = xc->pending; | 338 | u8 pending = xc->pending; |
338 | u32 hirq; | 339 | u32 hirq; |
339 | u8 pipr; | ||
340 | 340 | ||
341 | pr_devel("H_IPOLL(server=%ld)\n", server); | 341 | pr_devel("H_IPOLL(server=%ld)\n", server); |
342 | 342 | ||
@@ -353,7 +353,8 @@ X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long | |||
353 | pending = 0xff; | 353 | pending = 0xff; |
354 | } else { | 354 | } else { |
355 | /* Grab pending interrupt if any */ | 355 | /* Grab pending interrupt if any */ |
356 | pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR); | 356 | __be64 qw1 = __x_readq(__x_tima + TM_QW1_OS); |
357 | u8 pipr = be64_to_cpu(qw1) & 0xff; | ||
357 | if (pipr < 8) | 358 | if (pipr < 8) |
358 | pending |= 1 << pipr; | 359 | pending |= 1 << pipr; |
359 | } | 360 | } |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8844eee290b2..c73e493adf07 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -951,7 +951,6 @@ struct kvm_x86_ops { | |||
951 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); | 951 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
952 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 952 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
953 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | 953 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); |
954 | u32 (*get_pkru)(struct kvm_vcpu *vcpu); | ||
955 | 954 | ||
956 | void (*tlb_flush)(struct kvm_vcpu *vcpu); | 955 | void (*tlb_flush)(struct kvm_vcpu *vcpu); |
957 | 956 | ||
@@ -973,7 +972,7 @@ struct kvm_x86_ops { | |||
973 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); | 972 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
974 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); | 973 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); |
975 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | 974 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
976 | bool (*get_enable_apicv)(void); | 975 | bool (*get_enable_apicv)(struct kvm_vcpu *vcpu); |
977 | void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); | 976 | void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); |
978 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); | 977 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); |
979 | void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); | 978 | void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 874827b0d7ca..aa60a08b65b1 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -180,7 +180,7 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n) | |||
180 | hlist_del_init(&n->link); | 180 | hlist_del_init(&n->link); |
181 | if (n->halted) | 181 | if (n->halted) |
182 | smp_send_reschedule(n->cpu); | 182 | smp_send_reschedule(n->cpu); |
183 | else if (swait_active(&n->wq)) | 183 | else if (swq_has_sleeper(&n->wq)) |
184 | swake_up(&n->wq); | 184 | swake_up(&n->wq); |
185 | } | 185 | } |
186 | 186 | ||
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 1ea3c0e1e3a9..0bc5c1315708 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -59,7 +59,6 @@ static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature) | |||
59 | { | 59 | { |
60 | unsigned x86_leaf = x86_feature / 32; | 60 | unsigned x86_leaf = x86_feature / 32; |
61 | 61 | ||
62 | BUILD_BUG_ON(!__builtin_constant_p(x86_leaf)); | ||
63 | BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); | 62 | BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); |
64 | BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); | 63 | BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); |
65 | 64 | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index aaf10b6f5380..69c5612be786 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -1324,6 +1324,10 @@ static void apic_timer_expired(struct kvm_lapic *apic) | |||
1324 | atomic_inc(&apic->lapic_timer.pending); | 1324 | atomic_inc(&apic->lapic_timer.pending); |
1325 | kvm_set_pending_timer(vcpu); | 1325 | kvm_set_pending_timer(vcpu); |
1326 | 1326 | ||
1327 | /* | ||
1328 | * For x86, the atomic_inc() is serialized, thus | ||
1329 | * using swait_active() is safe. | ||
1330 | */ | ||
1327 | if (swait_active(q)) | 1331 | if (swait_active(q)) |
1328 | swake_up(q); | 1332 | swake_up(q); |
1329 | 1333 | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2c1cfe68a9af..0e68f0b3cbf7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -1200,7 +1200,6 @@ static void avic_init_vmcb(struct vcpu_svm *svm) | |||
1200 | vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK; | 1200 | vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK; |
1201 | vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT; | 1201 | vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT; |
1202 | vmcb->control.int_ctl |= AVIC_ENABLE_MASK; | 1202 | vmcb->control.int_ctl |= AVIC_ENABLE_MASK; |
1203 | svm->vcpu.arch.apicv_active = true; | ||
1204 | } | 1203 | } |
1205 | 1204 | ||
1206 | static void init_vmcb(struct vcpu_svm *svm) | 1205 | static void init_vmcb(struct vcpu_svm *svm) |
@@ -1316,7 +1315,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1316 | set_intercept(svm, INTERCEPT_PAUSE); | 1315 | set_intercept(svm, INTERCEPT_PAUSE); |
1317 | } | 1316 | } |
1318 | 1317 | ||
1319 | if (avic) | 1318 | if (kvm_vcpu_apicv_active(&svm->vcpu)) |
1320 | avic_init_vmcb(svm); | 1319 | avic_init_vmcb(svm); |
1321 | 1320 | ||
1322 | /* | 1321 | /* |
@@ -1600,6 +1599,23 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) | |||
1600 | avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE); | 1599 | avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE); |
1601 | } | 1600 | } |
1602 | 1601 | ||
1602 | static int avic_init_vcpu(struct vcpu_svm *svm) | ||
1603 | { | ||
1604 | int ret; | ||
1605 | |||
1606 | if (!kvm_vcpu_apicv_active(&svm->vcpu)) | ||
1607 | return 0; | ||
1608 | |||
1609 | ret = avic_init_backing_page(&svm->vcpu); | ||
1610 | if (ret) | ||
1611 | return ret; | ||
1612 | |||
1613 | INIT_LIST_HEAD(&svm->ir_list); | ||
1614 | spin_lock_init(&svm->ir_list_lock); | ||
1615 | |||
1616 | return ret; | ||
1617 | } | ||
1618 | |||
1603 | static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | 1619 | static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) |
1604 | { | 1620 | { |
1605 | struct vcpu_svm *svm; | 1621 | struct vcpu_svm *svm; |
@@ -1636,14 +1652,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) | |||
1636 | if (!hsave_page) | 1652 | if (!hsave_page) |
1637 | goto free_page3; | 1653 | goto free_page3; |
1638 | 1654 | ||
1639 | if (avic) { | 1655 | err = avic_init_vcpu(svm); |
1640 | err = avic_init_backing_page(&svm->vcpu); | 1656 | if (err) |
1641 | if (err) | 1657 | goto free_page4; |
1642 | goto free_page4; | ||
1643 | |||
1644 | INIT_LIST_HEAD(&svm->ir_list); | ||
1645 | spin_lock_init(&svm->ir_list_lock); | ||
1646 | } | ||
1647 | 1658 | ||
1648 | /* We initialize this flag to true to make sure that the is_running | 1659 | /* We initialize this flag to true to make sure that the is_running |
1649 | * bit would be set the first time the vcpu is loaded. | 1660 | * bit would be set the first time the vcpu is loaded. |
@@ -4395,9 +4406,9 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) | |||
4395 | return; | 4406 | return; |
4396 | } | 4407 | } |
4397 | 4408 | ||
4398 | static bool svm_get_enable_apicv(void) | 4409 | static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu) |
4399 | { | 4410 | { |
4400 | return avic; | 4411 | return avic && irqchip_split(vcpu->kvm); |
4401 | } | 4412 | } |
4402 | 4413 | ||
4403 | static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | 4414 | static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) |
@@ -4414,7 +4425,7 @@ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) | |||
4414 | struct vcpu_svm *svm = to_svm(vcpu); | 4425 | struct vcpu_svm *svm = to_svm(vcpu); |
4415 | struct vmcb *vmcb = svm->vmcb; | 4426 | struct vmcb *vmcb = svm->vmcb; |
4416 | 4427 | ||
4417 | if (!avic) | 4428 | if (!kvm_vcpu_apicv_active(&svm->vcpu)) |
4418 | return; | 4429 | return; |
4419 | 4430 | ||
4420 | vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; | 4431 | vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; |
@@ -5302,6 +5313,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, | |||
5302 | */ | 5313 | */ |
5303 | if (info->rep_prefix != REPE_PREFIX) | 5314 | if (info->rep_prefix != REPE_PREFIX) |
5304 | goto out; | 5315 | goto out; |
5316 | break; | ||
5305 | case SVM_EXIT_IOIO: { | 5317 | case SVM_EXIT_IOIO: { |
5306 | u64 exit_info; | 5318 | u64 exit_info; |
5307 | u32 bytes; | 5319 | u32 bytes; |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 699704d4bc9e..06c0c6d0541e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -5012,7 +5012,7 @@ static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_activ | |||
5012 | } | 5012 | } |
5013 | } | 5013 | } |
5014 | 5014 | ||
5015 | static bool vmx_get_enable_apicv(void) | 5015 | static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu) |
5016 | { | 5016 | { |
5017 | return enable_apicv; | 5017 | return enable_apicv; |
5018 | } | 5018 | } |
@@ -8344,12 +8344,14 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) | |||
8344 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 8344 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
8345 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 8345 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
8346 | 8346 | ||
8347 | trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, | 8347 | if (vmx->nested.nested_run_pending) |
8348 | vmcs_readl(EXIT_QUALIFICATION), | 8348 | return false; |
8349 | vmx->idt_vectoring_info, | 8349 | |
8350 | intr_info, | 8350 | if (unlikely(vmx->fail)) { |
8351 | vmcs_read32(VM_EXIT_INTR_ERROR_CODE), | 8351 | pr_info_ratelimited("%s failed vm entry %x\n", __func__, |
8352 | KVM_ISA_VMX); | 8352 | vmcs_read32(VM_INSTRUCTION_ERROR)); |
8353 | return true; | ||
8354 | } | ||
8353 | 8355 | ||
8354 | /* | 8356 | /* |
8355 | * The host physical addresses of some pages of guest memory | 8357 | * The host physical addresses of some pages of guest memory |
@@ -8363,14 +8365,12 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) | |||
8363 | */ | 8365 | */ |
8364 | nested_mark_vmcs12_pages_dirty(vcpu); | 8366 | nested_mark_vmcs12_pages_dirty(vcpu); |
8365 | 8367 | ||
8366 | if (vmx->nested.nested_run_pending) | 8368 | trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, |
8367 | return false; | 8369 | vmcs_readl(EXIT_QUALIFICATION), |
8368 | 8370 | vmx->idt_vectoring_info, | |
8369 | if (unlikely(vmx->fail)) { | 8371 | intr_info, |
8370 | pr_info_ratelimited("%s failed vm entry %x\n", __func__, | 8372 | vmcs_read32(VM_EXIT_INTR_ERROR_CODE), |
8371 | vmcs_read32(VM_INSTRUCTION_ERROR)); | 8373 | KVM_ISA_VMX); |
8372 | return true; | ||
8373 | } | ||
8374 | 8374 | ||
8375 | switch (exit_reason) { | 8375 | switch (exit_reason) { |
8376 | case EXIT_REASON_EXCEPTION_NMI: | 8376 | case EXIT_REASON_EXCEPTION_NMI: |
@@ -9424,12 +9424,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
9424 | | (1 << VCPU_EXREG_CR3)); | 9424 | | (1 << VCPU_EXREG_CR3)); |
9425 | vcpu->arch.regs_dirty = 0; | 9425 | vcpu->arch.regs_dirty = 0; |
9426 | 9426 | ||
9427 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | ||
9428 | |||
9429 | vmx->loaded_vmcs->launched = 1; | ||
9430 | |||
9431 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | ||
9432 | |||
9433 | /* | 9427 | /* |
9434 | * eager fpu is enabled if PKEY is supported and CR4 is switched | 9428 | * eager fpu is enabled if PKEY is supported and CR4 is switched |
9435 | * back on host, so it is safe to read guest PKRU from current | 9429 | * back on host, so it is safe to read guest PKRU from current |
@@ -9451,6 +9445,14 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
9451 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 9445 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
9452 | 9446 | ||
9453 | vmx->nested.nested_run_pending = 0; | 9447 | vmx->nested.nested_run_pending = 0; |
9448 | vmx->idt_vectoring_info = 0; | ||
9449 | |||
9450 | vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON); | ||
9451 | if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) | ||
9452 | return; | ||
9453 | |||
9454 | vmx->loaded_vmcs->launched = 1; | ||
9455 | vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); | ||
9454 | 9456 | ||
9455 | vmx_complete_atomic_exit(vmx); | 9457 | vmx_complete_atomic_exit(vmx); |
9456 | vmx_recover_nmi_blocking(vmx); | 9458 | vmx_recover_nmi_blocking(vmx); |
@@ -10525,6 +10527,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
10525 | if (exec_control & CPU_BASED_TPR_SHADOW) { | 10527 | if (exec_control & CPU_BASED_TPR_SHADOW) { |
10526 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull); | 10528 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull); |
10527 | vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); | 10529 | vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); |
10530 | } else { | ||
10531 | #ifdef CONFIG_X86_64 | ||
10532 | exec_control |= CPU_BASED_CR8_LOAD_EXITING | | ||
10533 | CPU_BASED_CR8_STORE_EXITING; | ||
10534 | #endif | ||
10528 | } | 10535 | } |
10529 | 10536 | ||
10530 | /* | 10537 | /* |
@@ -11388,46 +11395,30 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
11388 | { | 11395 | { |
11389 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 11396 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
11390 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 11397 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
11391 | u32 vm_inst_error = 0; | ||
11392 | 11398 | ||
11393 | /* trying to cancel vmlaunch/vmresume is a bug */ | 11399 | /* trying to cancel vmlaunch/vmresume is a bug */ |
11394 | WARN_ON_ONCE(vmx->nested.nested_run_pending); | 11400 | WARN_ON_ONCE(vmx->nested.nested_run_pending); |
11395 | 11401 | ||
11402 | /* | ||
11403 | * The only expected VM-instruction error is "VM entry with | ||
11404 | * invalid control field(s)." Anything else indicates a | ||
11405 | * problem with L0. | ||
11406 | */ | ||
11407 | WARN_ON_ONCE(vmx->fail && (vmcs_read32(VM_INSTRUCTION_ERROR) != | ||
11408 | VMXERR_ENTRY_INVALID_CONTROL_FIELD)); | ||
11409 | |||
11396 | leave_guest_mode(vcpu); | 11410 | leave_guest_mode(vcpu); |
11397 | prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, | ||
11398 | exit_qualification); | ||
11399 | 11411 | ||
11400 | if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr, | 11412 | if (likely(!vmx->fail)) { |
11401 | vmcs12->vm_exit_msr_store_count)) | 11413 | prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, |
11402 | nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL); | 11414 | exit_qualification); |
11403 | 11415 | ||
11404 | if (unlikely(vmx->fail)) | 11416 | if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr, |
11405 | vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR); | 11417 | vmcs12->vm_exit_msr_store_count)) |
11418 | nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL); | ||
11419 | } | ||
11406 | 11420 | ||
11407 | vmx_switch_vmcs(vcpu, &vmx->vmcs01); | 11421 | vmx_switch_vmcs(vcpu, &vmx->vmcs01); |
11408 | |||
11409 | /* | ||
11410 | * TODO: SDM says that with acknowledge interrupt on exit, bit 31 of | ||
11411 | * the VM-exit interrupt information (valid interrupt) is always set to | ||
11412 | * 1 on EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't need | ||
11413 | * kvm_cpu_has_interrupt(). See the commit message for details. | ||
11414 | */ | ||
11415 | if (nested_exit_intr_ack_set(vcpu) && | ||
11416 | exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && | ||
11417 | kvm_cpu_has_interrupt(vcpu)) { | ||
11418 | int irq = kvm_cpu_get_interrupt(vcpu); | ||
11419 | WARN_ON(irq < 0); | ||
11420 | vmcs12->vm_exit_intr_info = irq | | ||
11421 | INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR; | ||
11422 | } | ||
11423 | |||
11424 | trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, | ||
11425 | vmcs12->exit_qualification, | ||
11426 | vmcs12->idt_vectoring_info_field, | ||
11427 | vmcs12->vm_exit_intr_info, | ||
11428 | vmcs12->vm_exit_intr_error_code, | ||
11429 | KVM_ISA_VMX); | ||
11430 | |||
11431 | vm_entry_controls_reset_shadow(vmx); | 11422 | vm_entry_controls_reset_shadow(vmx); |
11432 | vm_exit_controls_reset_shadow(vmx); | 11423 | vm_exit_controls_reset_shadow(vmx); |
11433 | vmx_segment_cache_clear(vmx); | 11424 | vmx_segment_cache_clear(vmx); |
@@ -11436,8 +11427,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
11436 | if (VMCS02_POOL_SIZE == 0) | 11427 | if (VMCS02_POOL_SIZE == 0) |
11437 | nested_free_vmcs02(vmx, vmx->nested.current_vmptr); | 11428 | nested_free_vmcs02(vmx, vmx->nested.current_vmptr); |
11438 | 11429 | ||
11439 | load_vmcs12_host_state(vcpu, vmcs12); | ||
11440 | |||
11441 | /* Update any VMCS fields that might have changed while L2 ran */ | 11430 | /* Update any VMCS fields that might have changed while L2 ran */ |
11442 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); | 11431 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr); |
11443 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); | 11432 | vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr); |
@@ -11486,21 +11475,57 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
11486 | */ | 11475 | */ |
11487 | kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); | 11476 | kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); |
11488 | 11477 | ||
11489 | /* | ||
11490 | * Exiting from L2 to L1, we're now back to L1 which thinks it just | ||
11491 | * finished a VMLAUNCH or VMRESUME instruction, so we need to set the | ||
11492 | * success or failure flag accordingly. | ||
11493 | */ | ||
11494 | if (unlikely(vmx->fail)) { | ||
11495 | vmx->fail = 0; | ||
11496 | nested_vmx_failValid(vcpu, vm_inst_error); | ||
11497 | } else | ||
11498 | nested_vmx_succeed(vcpu); | ||
11499 | if (enable_shadow_vmcs) | 11478 | if (enable_shadow_vmcs) |
11500 | vmx->nested.sync_shadow_vmcs = true; | 11479 | vmx->nested.sync_shadow_vmcs = true; |
11501 | 11480 | ||
11502 | /* in case we halted in L2 */ | 11481 | /* in case we halted in L2 */ |
11503 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 11482 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
11483 | |||
11484 | if (likely(!vmx->fail)) { | ||
11485 | /* | ||
11486 | * TODO: SDM says that with acknowledge interrupt on | ||
11487 | * exit, bit 31 of the VM-exit interrupt information | ||
11488 | * (valid interrupt) is always set to 1 on | ||
11489 | * EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't | ||
11490 | * need kvm_cpu_has_interrupt(). See the commit | ||
11491 | * message for details. | ||
11492 | */ | ||
11493 | if (nested_exit_intr_ack_set(vcpu) && | ||
11494 | exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && | ||
11495 | kvm_cpu_has_interrupt(vcpu)) { | ||
11496 | int irq = kvm_cpu_get_interrupt(vcpu); | ||
11497 | WARN_ON(irq < 0); | ||
11498 | vmcs12->vm_exit_intr_info = irq | | ||
11499 | INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR; | ||
11500 | } | ||
11501 | |||
11502 | trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, | ||
11503 | vmcs12->exit_qualification, | ||
11504 | vmcs12->idt_vectoring_info_field, | ||
11505 | vmcs12->vm_exit_intr_info, | ||
11506 | vmcs12->vm_exit_intr_error_code, | ||
11507 | KVM_ISA_VMX); | ||
11508 | |||
11509 | load_vmcs12_host_state(vcpu, vmcs12); | ||
11510 | |||
11511 | return; | ||
11512 | } | ||
11513 | |||
11514 | /* | ||
11515 | * After an early L2 VM-entry failure, we're now back | ||
11516 | * in L1 which thinks it just finished a VMLAUNCH or | ||
11517 | * VMRESUME instruction, so we need to set the failure | ||
11518 | * flag and the VM-instruction error field of the VMCS | ||
11519 | * accordingly. | ||
11520 | */ | ||
11521 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | ||
11522 | /* | ||
11523 | * The emulated instruction was already skipped in | ||
11524 | * nested_vmx_run, but the updated RIP was never | ||
11525 | * written back to the vmcs01. | ||
11526 | */ | ||
11527 | skip_emulated_instruction(vcpu); | ||
11528 | vmx->fail = 0; | ||
11504 | } | 11529 | } |
11505 | 11530 | ||
11506 | /* | 11531 | /* |
@@ -11829,7 +11854,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, | |||
11829 | struct kvm_lapic_irq irq; | 11854 | struct kvm_lapic_irq irq; |
11830 | struct kvm_vcpu *vcpu; | 11855 | struct kvm_vcpu *vcpu; |
11831 | struct vcpu_data vcpu_info; | 11856 | struct vcpu_data vcpu_info; |
11832 | int idx, ret = -EINVAL; | 11857 | int idx, ret = 0; |
11833 | 11858 | ||
11834 | if (!kvm_arch_has_assigned_device(kvm) || | 11859 | if (!kvm_arch_has_assigned_device(kvm) || |
11835 | !irq_remapping_cap(IRQ_POSTING_CAP) || | 11860 | !irq_remapping_cap(IRQ_POSTING_CAP) || |
@@ -11838,7 +11863,12 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, | |||
11838 | 11863 | ||
11839 | idx = srcu_read_lock(&kvm->irq_srcu); | 11864 | idx = srcu_read_lock(&kvm->irq_srcu); |
11840 | irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); | 11865 | irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); |
11841 | BUG_ON(guest_irq >= irq_rt->nr_rt_entries); | 11866 | if (guest_irq >= irq_rt->nr_rt_entries || |
11867 | hlist_empty(&irq_rt->map[guest_irq])) { | ||
11868 | pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n", | ||
11869 | guest_irq, irq_rt->nr_rt_entries); | ||
11870 | goto out; | ||
11871 | } | ||
11842 | 11872 | ||
11843 | hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { | 11873 | hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { |
11844 | if (e->type != KVM_IRQ_ROUTING_MSI) | 11874 | if (e->type != KVM_IRQ_ROUTING_MSI) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6069af86da3b..cd17b7d9a107 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -7231,10 +7231,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
7231 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | 7231 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); |
7232 | 7232 | ||
7233 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { | 7233 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { |
7234 | if (kvm_run->immediate_exit) { | ||
7235 | r = -EINTR; | ||
7236 | goto out; | ||
7237 | } | ||
7234 | kvm_vcpu_block(vcpu); | 7238 | kvm_vcpu_block(vcpu); |
7235 | kvm_apic_accept_events(vcpu); | 7239 | kvm_apic_accept_events(vcpu); |
7236 | kvm_clear_request(KVM_REQ_UNHALT, vcpu); | 7240 | kvm_clear_request(KVM_REQ_UNHALT, vcpu); |
7237 | r = -EAGAIN; | 7241 | r = -EAGAIN; |
7242 | if (signal_pending(current)) { | ||
7243 | r = -EINTR; | ||
7244 | vcpu->run->exit_reason = KVM_EXIT_INTR; | ||
7245 | ++vcpu->stat.signal_exits; | ||
7246 | } | ||
7238 | goto out; | 7247 | goto out; |
7239 | } | 7248 | } |
7240 | 7249 | ||
@@ -7971,7 +7980,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) | |||
7971 | BUG_ON(vcpu->kvm == NULL); | 7980 | BUG_ON(vcpu->kvm == NULL); |
7972 | kvm = vcpu->kvm; | 7981 | kvm = vcpu->kvm; |
7973 | 7982 | ||
7974 | vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(); | 7983 | vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); |
7975 | vcpu->arch.pv.pv_unhalted = false; | 7984 | vcpu->arch.pv.pv_unhalted = false; |
7976 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; | 7985 | vcpu->arch.emulate_ctxt.ops = &emulate_ops; |
7977 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) | 7986 | if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) |
@@ -8452,6 +8461,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) | |||
8452 | if (vcpu->arch.pv.pv_unhalted) | 8461 | if (vcpu->arch.pv.pv_unhalted) |
8453 | return true; | 8462 | return true; |
8454 | 8463 | ||
8464 | if (vcpu->arch.exception.pending) | ||
8465 | return true; | ||
8466 | |||
8455 | if (kvm_test_request(KVM_REQ_NMI, vcpu) || | 8467 | if (kvm_test_request(KVM_REQ_NMI, vcpu) || |
8456 | (vcpu->arch.nmi_pending && | 8468 | (vcpu->arch.nmi_pending && |
8457 | kvm_x86_ops->nmi_allowed(vcpu))) | 8469 | kvm_x86_ops->nmi_allowed(vcpu))) |
@@ -8619,6 +8631,13 @@ static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) | |||
8619 | sizeof(val)); | 8631 | sizeof(val)); |
8620 | } | 8632 | } |
8621 | 8633 | ||
8634 | static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val) | ||
8635 | { | ||
8636 | |||
8637 | return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val, | ||
8638 | sizeof(u32)); | ||
8639 | } | ||
8640 | |||
8622 | void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, | 8641 | void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, |
8623 | struct kvm_async_pf *work) | 8642 | struct kvm_async_pf *work) |
8624 | { | 8643 | { |
@@ -8646,6 +8665,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, | |||
8646 | struct kvm_async_pf *work) | 8665 | struct kvm_async_pf *work) |
8647 | { | 8666 | { |
8648 | struct x86_exception fault; | 8667 | struct x86_exception fault; |
8668 | u32 val; | ||
8649 | 8669 | ||
8650 | if (work->wakeup_all) | 8670 | if (work->wakeup_all) |
8651 | work->arch.token = ~0; /* broadcast wakeup */ | 8671 | work->arch.token = ~0; /* broadcast wakeup */ |
@@ -8653,15 +8673,26 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, | |||
8653 | kvm_del_async_pf_gfn(vcpu, work->arch.gfn); | 8673 | kvm_del_async_pf_gfn(vcpu, work->arch.gfn); |
8654 | trace_kvm_async_pf_ready(work->arch.token, work->gva); | 8674 | trace_kvm_async_pf_ready(work->arch.token, work->gva); |
8655 | 8675 | ||
8656 | if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) && | 8676 | if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED && |
8657 | !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { | 8677 | !apf_get_user(vcpu, &val)) { |
8658 | fault.vector = PF_VECTOR; | 8678 | if (val == KVM_PV_REASON_PAGE_NOT_PRESENT && |
8659 | fault.error_code_valid = true; | 8679 | vcpu->arch.exception.pending && |
8660 | fault.error_code = 0; | 8680 | vcpu->arch.exception.nr == PF_VECTOR && |
8661 | fault.nested_page_fault = false; | 8681 | !apf_put_user(vcpu, 0)) { |
8662 | fault.address = work->arch.token; | 8682 | vcpu->arch.exception.injected = false; |
8663 | fault.async_page_fault = true; | 8683 | vcpu->arch.exception.pending = false; |
8664 | kvm_inject_page_fault(vcpu, &fault); | 8684 | vcpu->arch.exception.nr = 0; |
8685 | vcpu->arch.exception.has_error_code = false; | ||
8686 | vcpu->arch.exception.error_code = 0; | ||
8687 | } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { | ||
8688 | fault.vector = PF_VECTOR; | ||
8689 | fault.error_code_valid = true; | ||
8690 | fault.error_code = 0; | ||
8691 | fault.nested_page_fault = false; | ||
8692 | fault.address = work->arch.token; | ||
8693 | fault.async_page_fault = true; | ||
8694 | kvm_inject_page_fault(vcpu, &fault); | ||
8695 | } | ||
8665 | } | 8696 | } |
8666 | vcpu->arch.apf.halted = false; | 8697 | vcpu->arch.apf.halted = false; |
8667 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 8698 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
diff --git a/include/linux/swait.h b/include/linux/swait.h index 4a4e180d0a35..73e97a08d3d0 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h | |||
@@ -79,9 +79,63 @@ extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name | |||
79 | DECLARE_SWAIT_QUEUE_HEAD(name) | 79 | DECLARE_SWAIT_QUEUE_HEAD(name) |
80 | #endif | 80 | #endif |
81 | 81 | ||
82 | static inline int swait_active(struct swait_queue_head *q) | 82 | /** |
83 | * swait_active -- locklessly test for waiters on the queue | ||
84 | * @wq: the waitqueue to test for waiters | ||
85 | * | ||
86 | * returns true if the wait list is not empty | ||
87 | * | ||
88 | * NOTE: this function is lockless and requires care, incorrect usage _will_ | ||
89 | * lead to sporadic and non-obvious failure. | ||
90 | * | ||
91 | * NOTE2: this function has the same above implications as regular waitqueues. | ||
92 | * | ||
93 | * Use either while holding swait_queue_head::lock or when used for wakeups | ||
94 | * with an extra smp_mb() like: | ||
95 | * | ||
96 | * CPU0 - waker CPU1 - waiter | ||
97 | * | ||
98 | * for (;;) { | ||
99 | * @cond = true; prepare_to_swait(&wq_head, &wait, state); | ||
100 | * smp_mb(); // smp_mb() from set_current_state() | ||
101 | * if (swait_active(wq_head)) if (@cond) | ||
102 | * wake_up(wq_head); break; | ||
103 | * schedule(); | ||
104 | * } | ||
105 | * finish_swait(&wq_head, &wait); | ||
106 | * | ||
107 | * Because without the explicit smp_mb() it's possible for the | ||
108 | * swait_active() load to get hoisted over the @cond store such that we'll | ||
109 | * observe an empty wait list while the waiter might not observe @cond. | ||
110 | * This, in turn, can trigger missing wakeups. | ||
111 | * | ||
112 | * Also note that this 'optimization' trades a spin_lock() for an smp_mb(), | ||
113 | * which (when the lock is uncontended) are of roughly equal cost. | ||
114 | */ | ||
115 | static inline int swait_active(struct swait_queue_head *wq) | ||
116 | { | ||
117 | return !list_empty(&wq->task_list); | ||
118 | } | ||
119 | |||
120 | /** | ||
121 | * swq_has_sleeper - check if there are any waiting processes | ||
122 | * @wq: the waitqueue to test for waiters | ||
123 | * | ||
124 | * Returns true if @wq has waiting processes | ||
125 | * | ||
126 | * Please refer to the comment for swait_active. | ||
127 | */ | ||
128 | static inline bool swq_has_sleeper(struct swait_queue_head *wq) | ||
83 | { | 129 | { |
84 | return !list_empty(&q->task_list); | 130 | /* |
131 | * We need to be sure we are in sync with the list_add() | ||
132 | * modifications to the wait queue (task_list). | ||
133 | * | ||
134 | * This memory barrier should be paired with one on the | ||
135 | * waiting side. | ||
136 | */ | ||
137 | smp_mb(); | ||
138 | return swait_active(wq); | ||
85 | } | 139 | } |
86 | 140 | ||
87 | extern void swake_up(struct swait_queue_head *q); | 141 | extern void swake_up(struct swait_queue_head *q); |
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 8ade3eb6c640..dcffedfac431 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h | |||
@@ -14,7 +14,9 @@ | |||
14 | ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR), \ | 14 | ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR), \ |
15 | ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\ | 15 | ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\ |
16 | ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL), \ | 16 | ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL), \ |
17 | ERSN(S390_UCONTROL), ERSN(WATCHDOG), ERSN(S390_TSCH) | 17 | ERSN(S390_UCONTROL), ERSN(WATCHDOG), ERSN(S390_TSCH), ERSN(EPR),\ |
18 | ERSN(SYSTEM_EVENT), ERSN(S390_STSI), ERSN(IOAPIC_EOI), \ | ||
19 | ERSN(HYPERV) | ||
18 | 20 | ||
19 | TRACE_EVENT(kvm_userspace_exit, | 21 | TRACE_EVENT(kvm_userspace_exit, |
20 | TP_PROTO(__u32 reason, int errno), | 22 | TP_PROTO(__u32 reason, int errno), |
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index bb298a200cd3..57bcb27dcf30 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c | |||
@@ -106,11 +106,7 @@ static void async_pf_execute(struct work_struct *work) | |||
106 | 106 | ||
107 | trace_kvm_async_pf_completed(addr, gva); | 107 | trace_kvm_async_pf_completed(addr, gva); |
108 | 108 | ||
109 | /* | 109 | if (swq_has_sleeper(&vcpu->wq)) |
110 | * This memory barrier pairs with prepare_to_wait's set_current_state() | ||
111 | */ | ||
112 | smp_mb(); | ||
113 | if (swait_active(&vcpu->wq)) | ||
114 | swake_up(&vcpu->wq); | 110 | swake_up(&vcpu->wq); |
115 | 111 | ||
116 | mmput(mm); | 112 | mmput(mm); |
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index f2ac53ab8243..c608ab495282 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c | |||
@@ -565,6 +565,8 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) | |||
565 | { | 565 | { |
566 | if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE)) | 566 | if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE)) |
567 | return -EINVAL; | 567 | return -EINVAL; |
568 | if (args->gsi >= KVM_MAX_IRQ_ROUTES) | ||
569 | return -EINVAL; | ||
568 | 570 | ||
569 | if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) | 571 | if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) |
570 | return kvm_irqfd_deassign(kvm, args); | 572 | return kvm_irqfd_deassign(kvm, args); |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6ed1c2021198..9deb5a245b83 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -674,6 +674,7 @@ out_err_no_irq_srcu: | |||
674 | out_err_no_srcu: | 674 | out_err_no_srcu: |
675 | hardware_disable_all(); | 675 | hardware_disable_all(); |
676 | out_err_no_disable: | 676 | out_err_no_disable: |
677 | refcount_set(&kvm->users_count, 0); | ||
677 | for (i = 0; i < KVM_NR_BUSES; i++) | 678 | for (i = 0; i < KVM_NR_BUSES; i++) |
678 | kfree(kvm_get_bus(kvm, i)); | 679 | kfree(kvm_get_bus(kvm, i)); |
679 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) | 680 | for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) |
@@ -2186,7 +2187,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) | |||
2186 | struct swait_queue_head *wqp; | 2187 | struct swait_queue_head *wqp; |
2187 | 2188 | ||
2188 | wqp = kvm_arch_vcpu_wq(vcpu); | 2189 | wqp = kvm_arch_vcpu_wq(vcpu); |
2189 | if (swait_active(wqp)) { | 2190 | if (swq_has_sleeper(wqp)) { |
2190 | swake_up(wqp); | 2191 | swake_up(wqp); |
2191 | ++vcpu->stat.halt_wakeup; | 2192 | ++vcpu->stat.halt_wakeup; |
2192 | return true; | 2193 | return true; |