aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-05-26 13:46:57 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-05-26 13:46:57 -0400
commitec30dcf7f425dc811ac365b5c4b0f097f98e569f (patch)
tree3169a5bcc21b90b1fab7c4c3cb473bcf7fdc4f81
parentbc2dbc5420e82560e650f8531ceca597441ca171 (diff)
parent696ca779a928d0e93d61c38ffc3a4d8914a9b9a0 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Radim Krčmář: "PPC: - Close a hole which could possibly lead to the host timebase getting out of sync. - Three fixes relating to PTEs and TLB entries for radix guests. - Fix a bug which could lead to an interrupt never getting delivered to the guest, if it is pending for a guest vCPU when the vCPU gets offlined. s390: - Fix false negatives in VSIE validity check (Cc stable) x86: - Fix time drift of VMX preemption timer when a guest uses LAPIC timer in periodic mode (Cc stable) - Unconditionally expose CPUID.IA32_ARCH_CAPABILITIES to allow migration from hosts that don't need retpoline mitigation (Cc stable) - Fix guest crashes on reboot by properly coupling CR4.OSXSAVE and CPUID.OSXSAVE (Cc stable) - Report correct RIP after Hyper-V hypercall #UD (introduced in -rc6)" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: fix #UD address of failed Hyper-V hypercalls kvm: x86: IA32_ARCH_CAPABILITIES is always supported KVM: x86: Update cpuid properly when CR4.OSXAVE or CR4.PKE is changed x86/kvm: fix LAPIC timer drift when guest uses periodic mode KVM: s390: vsie: fix < 8k check for the itdba KVM: PPC: Book 3S HV: Do ptesync in radix guest exit path KVM: PPC: Book3S HV: XIVE: Resend re-routed interrupts on CPU priority change KVM: PPC: Book3S HV: Make radix clear pte when unmapping KVM: PPC: Book3S HV: Make radix use correct tlbie sequence in kvmppc_radix_tlbie_page KVM: PPC: Book3S HV: Snapshot timebase offset on guest entry
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h1
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S97
-rw-r--r--arch/powerpc/kvm/book3s_xive_template.c108
-rw-r--r--arch/s390/kvm/vsie.c2
-rw-r--r--arch/x86/kvm/cpuid.c5
-rw-r--r--arch/x86/kvm/hyperv.c19
-rw-r--r--arch/x86/kvm/lapic.c16
-rw-r--r--arch/x86/kvm/x86.c17
11 files changed, 198 insertions, 75 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 4c02a7378d06..e7377b73cfec 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -96,6 +96,7 @@ struct kvmppc_vcore {
96 struct kvm_vcpu *runner; 96 struct kvm_vcpu *runner;
97 struct kvm *kvm; 97 struct kvm *kvm;
98 u64 tb_offset; /* guest timebase - host timebase */ 98 u64 tb_offset; /* guest timebase - host timebase */
99 u64 tb_offset_applied; /* timebase offset currently in force */
99 ulong lpcr; 100 ulong lpcr;
100 u32 arch_compat; 101 u32 arch_compat;
101 ulong pcr; 102 ulong pcr;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6bee65f3cfd3..373dc1d6ef44 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -562,6 +562,7 @@ int main(void)
562 OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads); 562 OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
563 OFFSET(VCORE_KVM, kvmppc_vcore, kvm); 563 OFFSET(VCORE_KVM, kvmppc_vcore, kvm);
564 OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset); 564 OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset);
565 OFFSET(VCORE_TB_OFFSET_APPL, kvmppc_vcore, tb_offset_applied);
565 OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr); 566 OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr);
566 OFFSET(VCORE_PCR, kvmppc_vcore, pcr); 567 OFFSET(VCORE_PCR, kvmppc_vcore, pcr);
567 OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes); 568 OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index a57eafec4dc2..361f42c8c73e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -162,7 +162,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
162 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) 162 if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG))
163 asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) 163 asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1)
164 : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); 164 : : "r" (addr), "r" (kvm->arch.lpid) : "memory");
165 asm volatile("ptesync": : :"memory"); 165 asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
166} 166}
167 167
168static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) 168static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
@@ -173,7 +173,7 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
173 /* RIC=1 PRS=0 R=1 IS=2 */ 173 /* RIC=1 PRS=0 R=1 IS=2 */
174 asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1) 174 asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
175 : : "r" (rb), "r" (kvm->arch.lpid) : "memory"); 175 : : "r" (rb), "r" (kvm->arch.lpid) : "memory");
176 asm volatile("ptesync": : :"memory"); 176 asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
177} 177}
178 178
179unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, 179unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
@@ -584,7 +584,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
584 584
585 ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); 585 ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
586 if (ptep && pte_present(*ptep)) { 586 if (ptep && pte_present(*ptep)) {
587 old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, 587 old = kvmppc_radix_update_pte(kvm, ptep, ~0UL, 0,
588 gpa, shift); 588 gpa, shift);
589 kvmppc_radix_tlbie_page(kvm, gpa, shift); 589 kvmppc_radix_tlbie_page(kvm, gpa, shift);
590 if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) { 590 if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 4d07fca5121c..9963f65c212b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2441,6 +2441,7 @@ static void init_vcore_to_run(struct kvmppc_vcore *vc)
2441 vc->in_guest = 0; 2441 vc->in_guest = 0;
2442 vc->napping_threads = 0; 2442 vc->napping_threads = 0;
2443 vc->conferring_threads = 0; 2443 vc->conferring_threads = 0;
2444 vc->tb_offset_applied = 0;
2444} 2445}
2445 2446
2446static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) 2447static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bd63fa8a08b5..07ca1b2a7966 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -692,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
69222: ld r8,VCORE_TB_OFFSET(r5) 69222: ld r8,VCORE_TB_OFFSET(r5)
693 cmpdi r8,0 693 cmpdi r8,0
694 beq 37f 694 beq 37f
695 std r8, VCORE_TB_OFFSET_APPL(r5)
695 mftb r6 /* current host timebase */ 696 mftb r6 /* current host timebase */
696 add r8,r8,r6 697 add r8,r8,r6
697 mtspr SPRN_TBU40,r8 /* update upper 40 bits */ 698 mtspr SPRN_TBU40,r8 /* update upper 40 bits */
@@ -940,18 +941,6 @@ FTR_SECTION_ELSE
940ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 941ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
9418: 9428:
942 943
943 /*
944 * Set the decrementer to the guest decrementer.
945 */
946 ld r8,VCPU_DEC_EXPIRES(r4)
947 /* r8 is a host timebase value here, convert to guest TB */
948 ld r5,HSTATE_KVM_VCORE(r13)
949 ld r6,VCORE_TB_OFFSET(r5)
950 add r8,r8,r6
951 mftb r7
952 subf r3,r7,r8
953 mtspr SPRN_DEC,r3
954
955 ld r5, VCPU_SPRG0(r4) 944 ld r5, VCPU_SPRG0(r4)
956 ld r6, VCPU_SPRG1(r4) 945 ld r6, VCPU_SPRG1(r4)
957 ld r7, VCPU_SPRG2(r4) 946 ld r7, VCPU_SPRG2(r4)
@@ -1005,6 +994,18 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
1005 mtspr SPRN_LPCR,r8 994 mtspr SPRN_LPCR,r8
1006 isync 995 isync
1007 996
997 /*
998 * Set the decrementer to the guest decrementer.
999 */
1000 ld r8,VCPU_DEC_EXPIRES(r4)
1001 /* r8 is a host timebase value here, convert to guest TB */
1002 ld r5,HSTATE_KVM_VCORE(r13)
1003 ld r6,VCORE_TB_OFFSET_APPL(r5)
1004 add r8,r8,r6
1005 mftb r7
1006 subf r3,r7,r8
1007 mtspr SPRN_DEC,r3
1008
1008 /* Check if HDEC expires soon */ 1009 /* Check if HDEC expires soon */
1009 mfspr r3, SPRN_HDEC 1010 mfspr r3, SPRN_HDEC
1010 EXTEND_HDEC(r3) 1011 EXTEND_HDEC(r3)
@@ -1597,8 +1598,27 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
1597 1598
1598guest_bypass: 1599guest_bypass:
1599 stw r12, STACK_SLOT_TRAP(r1) 1600 stw r12, STACK_SLOT_TRAP(r1)
1600 mr r3, r12 1601
1602 /* Save DEC */
1603 /* Do this before kvmhv_commence_exit so we know TB is guest TB */
1604 ld r3, HSTATE_KVM_VCORE(r13)
1605 mfspr r5,SPRN_DEC
1606 mftb r6
1607 /* On P9, if the guest has large decr enabled, don't sign extend */
1608BEGIN_FTR_SECTION
1609 ld r4, VCORE_LPCR(r3)
1610 andis. r4, r4, LPCR_LD@h
1611 bne 16f
1612END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1613 extsw r5,r5
161416: add r5,r5,r6
1615 /* r5 is a guest timebase value here, convert to host TB */
1616 ld r4,VCORE_TB_OFFSET_APPL(r3)
1617 subf r5,r4,r5
1618 std r5,VCPU_DEC_EXPIRES(r9)
1619
1601 /* Increment exit count, poke other threads to exit */ 1620 /* Increment exit count, poke other threads to exit */
1621 mr r3, r12
1602 bl kvmhv_commence_exit 1622 bl kvmhv_commence_exit
1603 nop 1623 nop
1604 ld r9, HSTATE_KVM_VCPU(r13) 1624 ld r9, HSTATE_KVM_VCPU(r13)
@@ -1639,23 +1659,6 @@ guest_bypass:
1639 mtspr SPRN_PURR,r3 1659 mtspr SPRN_PURR,r3
1640 mtspr SPRN_SPURR,r4 1660 mtspr SPRN_SPURR,r4
1641 1661
1642 /* Save DEC */
1643 ld r3, HSTATE_KVM_VCORE(r13)
1644 mfspr r5,SPRN_DEC
1645 mftb r6
1646 /* On P9, if the guest has large decr enabled, don't sign extend */
1647BEGIN_FTR_SECTION
1648 ld r4, VCORE_LPCR(r3)
1649 andis. r4, r4, LPCR_LD@h
1650 bne 16f
1651END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1652 extsw r5,r5
165316: add r5,r5,r6
1654 /* r5 is a guest timebase value here, convert to host TB */
1655 ld r4,VCORE_TB_OFFSET(r3)
1656 subf r5,r4,r5
1657 std r5,VCPU_DEC_EXPIRES(r9)
1658
1659BEGIN_FTR_SECTION 1662BEGIN_FTR_SECTION
1660 b 8f 1663 b 8f
1661END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 1664END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
@@ -1905,6 +1908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1905 cmpwi cr2, r0, 0 1908 cmpwi cr2, r0, 0
1906 beq cr2, 4f 1909 beq cr2, 4f
1907 1910
1911 /*
1912 * Radix: do eieio; tlbsync; ptesync sequence in case we
1913 * interrupted the guest between a tlbie and a ptesync.
1914 */
1915 eieio
1916 tlbsync
1917 ptesync
1918
1908 /* Radix: Handle the case where the guest used an illegal PID */ 1919 /* Radix: Handle the case where the guest used an illegal PID */
1909 LOAD_REG_ADDR(r4, mmu_base_pid) 1920 LOAD_REG_ADDR(r4, mmu_base_pid)
1910 lwz r3, VCPU_GUEST_PID(r9) 1921 lwz r3, VCPU_GUEST_PID(r9)
@@ -2017,9 +2028,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
2017 2028
201827: 202927:
2019 /* Subtract timebase offset from timebase */ 2030 /* Subtract timebase offset from timebase */
2020 ld r8,VCORE_TB_OFFSET(r5) 2031 ld r8, VCORE_TB_OFFSET_APPL(r5)
2021 cmpdi r8,0 2032 cmpdi r8,0
2022 beq 17f 2033 beq 17f
2034 li r0, 0
2035 std r0, VCORE_TB_OFFSET_APPL(r5)
2023 mftb r6 /* current guest timebase */ 2036 mftb r6 /* current guest timebase */
2024 subf r8,r8,r6 2037 subf r8,r8,r6
2025 mtspr SPRN_TBU40,r8 /* update upper 40 bits */ 2038 mtspr SPRN_TBU40,r8 /* update upper 40 bits */
@@ -2700,7 +2713,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
2700 add r3, r3, r5 2713 add r3, r3, r5
2701 ld r4, HSTATE_KVM_VCPU(r13) 2714 ld r4, HSTATE_KVM_VCPU(r13)
2702 ld r5, HSTATE_KVM_VCORE(r13) 2715 ld r5, HSTATE_KVM_VCORE(r13)
2703 ld r6, VCORE_TB_OFFSET(r5) 2716 ld r6, VCORE_TB_OFFSET_APPL(r5)
2704 subf r3, r6, r3 /* convert to host TB value */ 2717 subf r3, r6, r3 /* convert to host TB value */
2705 std r3, VCPU_DEC_EXPIRES(r4) 2718 std r3, VCPU_DEC_EXPIRES(r4)
2706 2719
@@ -2799,7 +2812,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
2799 /* Restore guest decrementer */ 2812 /* Restore guest decrementer */
2800 ld r3, VCPU_DEC_EXPIRES(r4) 2813 ld r3, VCPU_DEC_EXPIRES(r4)
2801 ld r5, HSTATE_KVM_VCORE(r13) 2814 ld r5, HSTATE_KVM_VCORE(r13)
2802 ld r6, VCORE_TB_OFFSET(r5) 2815 ld r6, VCORE_TB_OFFSET_APPL(r5)
2803 add r3, r3, r6 /* convert host TB to guest TB value */ 2816 add r3, r3, r6 /* convert host TB to guest TB value */
2804 mftb r7 2817 mftb r7
2805 subf r3, r7, r3 2818 subf r3, r7, r3
@@ -3606,12 +3619,9 @@ kvmppc_fix_pmao:
3606 */ 3619 */
3607kvmhv_start_timing: 3620kvmhv_start_timing:
3608 ld r5, HSTATE_KVM_VCORE(r13) 3621 ld r5, HSTATE_KVM_VCORE(r13)
3609 lbz r6, VCORE_IN_GUEST(r5) 3622 ld r6, VCORE_TB_OFFSET_APPL(r5)
3610 cmpwi r6, 0 3623 mftb r5
3611 beq 5f /* if in guest, need to */ 3624 subf r5, r6, r5 /* subtract current timebase offset */
3612 ld r6, VCORE_TB_OFFSET(r5) /* subtract timebase offset */
36135: mftb r5
3614 subf r5, r6, r5
3615 std r3, VCPU_CUR_ACTIVITY(r4) 3625 std r3, VCPU_CUR_ACTIVITY(r4)
3616 std r5, VCPU_ACTIVITY_START(r4) 3626 std r5, VCPU_ACTIVITY_START(r4)
3617 blr 3627 blr
@@ -3622,15 +3632,12 @@ kvmhv_start_timing:
3622 */ 3632 */
3623kvmhv_accumulate_time: 3633kvmhv_accumulate_time:
3624 ld r5, HSTATE_KVM_VCORE(r13) 3634 ld r5, HSTATE_KVM_VCORE(r13)
3625 lbz r8, VCORE_IN_GUEST(r5) 3635 ld r8, VCORE_TB_OFFSET_APPL(r5)
3626 cmpwi r8, 0 3636 ld r5, VCPU_CUR_ACTIVITY(r4)
3627 beq 4f /* if in guest, need to */
3628 ld r8, VCORE_TB_OFFSET(r5) /* subtract timebase offset */
36294: ld r5, VCPU_CUR_ACTIVITY(r4)
3630 ld r6, VCPU_ACTIVITY_START(r4) 3637 ld r6, VCPU_ACTIVITY_START(r4)
3631 std r3, VCPU_CUR_ACTIVITY(r4) 3638 std r3, VCPU_CUR_ACTIVITY(r4)
3632 mftb r7 3639 mftb r7
3633 subf r7, r8, r7 3640 subf r7, r8, r7 /* subtract current timebase offset */
3634 std r7, VCPU_ACTIVITY_START(r4) 3641 std r7, VCPU_ACTIVITY_START(r4)
3635 cmpdi r5, 0 3642 cmpdi r5, 0
3636 beqlr 3643 beqlr
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index c7a5deadd1cc..99c3620b40d9 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -11,6 +11,9 @@
11#define XGLUE(a,b) a##b 11#define XGLUE(a,b) a##b
12#define GLUE(a,b) XGLUE(a,b) 12#define GLUE(a,b) XGLUE(a,b)
13 13
14/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
15#define XICS_DUMMY 1
16
14static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) 17static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
15{ 18{
16 u8 cppr; 19 u8 cppr;
@@ -205,6 +208,10 @@ skip_ipi:
205 goto skip_ipi; 208 goto skip_ipi;
206 } 209 }
207 210
211 /* If it's the dummy interrupt, continue searching */
212 if (hirq == XICS_DUMMY)
213 goto skip_ipi;
214
208 /* If fetching, update queue pointers */ 215 /* If fetching, update queue pointers */
209 if (scan_type == scan_fetch) { 216 if (scan_type == scan_fetch) {
210 q->idx = idx; 217 q->idx = idx;
@@ -385,9 +392,76 @@ static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
385 __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING); 392 __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
386} 393}
387 394
395static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
396 struct kvmppc_xive_vcpu *xc)
397{
398 unsigned int prio;
399
400 /* For each priority that is now masked */
401 for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
402 struct xive_q *q = &xc->queues[prio];
403 struct kvmppc_xive_irq_state *state;
404 struct kvmppc_xive_src_block *sb;
405 u32 idx, toggle, entry, irq, hw_num;
406 struct xive_irq_data *xd;
407 __be32 *qpage;
408 u16 src;
409
410 idx = q->idx;
411 toggle = q->toggle;
412 qpage = READ_ONCE(q->qpage);
413 if (!qpage)
414 continue;
415
416 /* For each interrupt in the queue */
417 for (;;) {
418 entry = be32_to_cpup(qpage + idx);
419
420 /* No more ? */
421 if ((entry >> 31) == toggle)
422 break;
423 irq = entry & 0x7fffffff;
424
425 /* Skip dummies and IPIs */
426 if (irq == XICS_DUMMY || irq == XICS_IPI)
427 goto next;
428 sb = kvmppc_xive_find_source(xive, irq, &src);
429 if (!sb)
430 goto next;
431 state = &sb->irq_state[src];
432
433 /* Has it been rerouted ? */
434 if (xc->server_num == state->act_server)
435 goto next;
436
437 /*
438 * Allright, it *has* been re-routed, kill it from
439 * the queue.
440 */
441 qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
442
443 /* Find the HW interrupt */
444 kvmppc_xive_select_irq(state, &hw_num, &xd);
445
446 /* If it's not an LSI, set PQ to 11 the EOI will force a resend */
447 if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
448 GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
449
450 /* EOI the source */
451 GLUE(X_PFX,source_eoi)(hw_num, xd);
452
453 next:
454 idx = (idx + 1) & q->msk;
455 if (idx == 0)
456 toggle ^= 1;
457 }
458 }
459}
460
388X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) 461X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
389{ 462{
390 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; 463 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
464 struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
391 u8 old_cppr; 465 u8 old_cppr;
392 466
393 pr_devel("H_CPPR(cppr=%ld)\n", cppr); 467 pr_devel("H_CPPR(cppr=%ld)\n", cppr);
@@ -407,14 +481,34 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
407 */ 481 */
408 smp_mb(); 482 smp_mb();
409 483
410 /* 484 if (cppr > old_cppr) {
411 * We are masking less, we need to look for pending things 485 /*
412 * to deliver and set VP pending bits accordingly to trigger 486 * We are masking less, we need to look for pending things
413 * a new interrupt otherwise we might miss MFRR changes for 487 * to deliver and set VP pending bits accordingly to trigger
414 * which we have optimized out sending an IPI signal. 488 * a new interrupt otherwise we might miss MFRR changes for
415 */ 489 * which we have optimized out sending an IPI signal.
416 if (cppr > old_cppr) 490 */
417 GLUE(X_PFX,push_pending_to_hw)(xc); 491 GLUE(X_PFX,push_pending_to_hw)(xc);
492 } else {
493 /*
494 * We are masking more, we need to check the queue for any
495 * interrupt that has been routed to another CPU, take
496 * it out (replace it with the dummy) and retrigger it.
497 *
498 * This is necessary since those interrupts may otherwise
499 * never be processed, at least not until this CPU restores
500 * its CPPR.
501 *
502 * This is in theory racy vs. HW adding new interrupts to
503 * the queue. In practice this works because the interesting
504 * cases are when the guest has done a set_xive() to move the
505 * interrupt away, which flushes the xive, followed by the
506 * target CPU doing a H_CPPR. So any new interrupt coming into
507 * the queue must still be routed to us and isn't a source
508 * of concern.
509 */
510 GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
511 }
418 512
419 /* Apply new CPPR */ 513 /* Apply new CPPR */
420 xc->hw_cppr = cppr; 514 xc->hw_cppr = cppr;
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 8961e3970901..969882b54266 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -578,7 +578,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
578 578
579 gpa = READ_ONCE(scb_o->itdba) & ~0xffUL; 579 gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
580 if (gpa && (scb_s->ecb & ECB_TE)) { 580 if (gpa && (scb_s->ecb & ECB_TE)) {
581 if (!(gpa & ~0x1fffU)) { 581 if (!(gpa & ~0x1fffUL)) {
582 rc = set_validity_icpt(scb_s, 0x0080U); 582 rc = set_validity_icpt(scb_s, 0x0080U);
583 goto unpin; 583 goto unpin;
584 } 584 }
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ced851169730..9bffb5228f31 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -495,6 +495,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
495 entry->ecx &= ~F(PKU); 495 entry->ecx &= ~F(PKU);
496 entry->edx &= kvm_cpuid_7_0_edx_x86_features; 496 entry->edx &= kvm_cpuid_7_0_edx_x86_features;
497 cpuid_mask(&entry->edx, CPUID_7_EDX); 497 cpuid_mask(&entry->edx, CPUID_7_EDX);
498 /*
499 * We emulate ARCH_CAPABILITIES in software even
500 * if the host doesn't support it.
501 */
502 entry->edx |= F(ARCH_CAPABILITIES);
498 } else { 503 } else {
499 entry->ebx = 0; 504 entry->ebx = 0;
500 entry->ecx = 0; 505 entry->ecx = 0;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 5708e951a5c6..46ff64da44ca 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1260,14 +1260,18 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
1260 } 1260 }
1261} 1261}
1262 1262
1263static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) 1263static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
1264{ 1264{
1265 struct kvm_run *run = vcpu->run; 1265 kvm_hv_hypercall_set_result(vcpu, result);
1266 1266 ++vcpu->stat.hypercalls;
1267 kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result);
1268 return kvm_skip_emulated_instruction(vcpu); 1267 return kvm_skip_emulated_instruction(vcpu);
1269} 1268}
1270 1269
1270static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
1271{
1272 return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
1273}
1274
1271static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) 1275static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
1272{ 1276{
1273 struct eventfd_ctx *eventfd; 1277 struct eventfd_ctx *eventfd;
@@ -1350,7 +1354,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
1350 /* Hypercall continuation is not supported yet */ 1354 /* Hypercall continuation is not supported yet */
1351 if (rep_cnt || rep_idx) { 1355 if (rep_cnt || rep_idx) {
1352 ret = HV_STATUS_INVALID_HYPERCALL_CODE; 1356 ret = HV_STATUS_INVALID_HYPERCALL_CODE;
1353 goto set_result; 1357 goto out;
1354 } 1358 }
1355 1359
1356 switch (code) { 1360 switch (code) {
@@ -1381,9 +1385,8 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
1381 break; 1385 break;
1382 } 1386 }
1383 1387
1384set_result: 1388out:
1385 kvm_hv_hypercall_set_result(vcpu, ret); 1389 return kvm_hv_hypercall_complete(vcpu, ret);
1386 return 1;
1387} 1390}
1388 1391
1389void kvm_hv_init_vm(struct kvm *kvm) 1392void kvm_hv_init_vm(struct kvm *kvm)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b74c9c1405b9..3773c4625114 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1522,11 +1522,23 @@ static bool set_target_expiration(struct kvm_lapic *apic)
1522 1522
1523static void advance_periodic_target_expiration(struct kvm_lapic *apic) 1523static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1524{ 1524{
1525 apic->lapic_timer.tscdeadline += 1525 ktime_t now = ktime_get();
1526 nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); 1526 u64 tscl = rdtsc();
1527 ktime_t delta;
1528
1529 /*
1530 * Synchronize both deadlines to the same time source or
1531 * differences in the periods (caused by differences in the
1532 * underlying clocks or numerical approximation errors) will
1533 * cause the two to drift apart over time as the errors
1534 * accumulate.
1535 */
1527 apic->lapic_timer.target_expiration = 1536 apic->lapic_timer.target_expiration =
1528 ktime_add_ns(apic->lapic_timer.target_expiration, 1537 ktime_add_ns(apic->lapic_timer.target_expiration,
1529 apic->lapic_timer.period); 1538 apic->lapic_timer.period);
1539 delta = ktime_sub(apic->lapic_timer.target_expiration, now);
1540 apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1541 nsec_to_cycles(apic->vcpu, delta);
1530} 1542}
1531 1543
1532static void start_sw_period(struct kvm_lapic *apic) 1544static void start_sw_period(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 22a183aac1c6..71e7cda6d014 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6671,11 +6671,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
6671 unsigned long nr, a0, a1, a2, a3, ret; 6671 unsigned long nr, a0, a1, a2, a3, ret;
6672 int op_64_bit; 6672 int op_64_bit;
6673 6673
6674 if (kvm_hv_hypercall_enabled(vcpu->kvm)) { 6674 if (kvm_hv_hypercall_enabled(vcpu->kvm))
6675 if (!kvm_hv_hypercall(vcpu)) 6675 return kvm_hv_hypercall(vcpu);
6676 return 0;
6677 goto out;
6678 }
6679 6676
6680 nr = kvm_register_read(vcpu, VCPU_REGS_RAX); 6677 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
6681 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); 6678 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
@@ -6696,7 +6693,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
6696 6693
6697 if (kvm_x86_ops->get_cpl(vcpu) != 0) { 6694 if (kvm_x86_ops->get_cpl(vcpu) != 0) {
6698 ret = -KVM_EPERM; 6695 ret = -KVM_EPERM;
6699 goto out_error; 6696 goto out;
6700 } 6697 }
6701 6698
6702 switch (nr) { 6699 switch (nr) {
@@ -6716,12 +6713,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
6716 ret = -KVM_ENOSYS; 6713 ret = -KVM_ENOSYS;
6717 break; 6714 break;
6718 } 6715 }
6719out_error: 6716out:
6720 if (!op_64_bit) 6717 if (!op_64_bit)
6721 ret = (u32)ret; 6718 ret = (u32)ret;
6722 kvm_register_write(vcpu, VCPU_REGS_RAX, ret); 6719 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
6723 6720
6724out:
6725 ++vcpu->stat.hypercalls; 6721 ++vcpu->stat.hypercalls;
6726 return kvm_skip_emulated_instruction(vcpu); 6722 return kvm_skip_emulated_instruction(vcpu);
6727} 6723}
@@ -7980,6 +7976,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
7980{ 7976{
7981 struct msr_data apic_base_msr; 7977 struct msr_data apic_base_msr;
7982 int mmu_reset_needed = 0; 7978 int mmu_reset_needed = 0;
7979 int cpuid_update_needed = 0;
7983 int pending_vec, max_bits, idx; 7980 int pending_vec, max_bits, idx;
7984 struct desc_ptr dt; 7981 struct desc_ptr dt;
7985 int ret = -EINVAL; 7982 int ret = -EINVAL;
@@ -8018,8 +8015,10 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
8018 vcpu->arch.cr0 = sregs->cr0; 8015 vcpu->arch.cr0 = sregs->cr0;
8019 8016
8020 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; 8017 mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
8018 cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
8019 (X86_CR4_OSXSAVE | X86_CR4_PKE));
8021 kvm_x86_ops->set_cr4(vcpu, sregs->cr4); 8020 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
8022 if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE)) 8021 if (cpuid_update_needed)
8023 kvm_update_cpuid(vcpu); 8022 kvm_update_cpuid(vcpu);
8024 8023
8025 idx = srcu_read_lock(&vcpu->kvm->srcu); 8024 idx = srcu_read_lock(&vcpu->kvm->srcu);