diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-05-26 13:46:57 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-05-26 13:46:57 -0400 |
commit | ec30dcf7f425dc811ac365b5c4b0f097f98e569f (patch) | |
tree | 3169a5bcc21b90b1fab7c4c3cb473bcf7fdc4f81 | |
parent | bc2dbc5420e82560e650f8531ceca597441ca171 (diff) | |
parent | 696ca779a928d0e93d61c38ffc3a4d8914a9b9a0 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Radim Krčmář:
"PPC:
- Close a hole which could possibly lead to the host timebase getting
out of sync.
- Three fixes relating to PTEs and TLB entries for radix guests.
- Fix a bug which could lead to an interrupt never getting delivered
to the guest, if it is pending for a guest vCPU when the vCPU gets
offlined.
s390:
- Fix false negatives in VSIE validity check (Cc stable)
x86:
- Fix time drift of VMX preemption timer when a guest uses LAPIC
timer in periodic mode (Cc stable)
- Unconditionally expose CPUID.IA32_ARCH_CAPABILITIES to allow
migration from hosts that don't need retpoline mitigation (Cc
stable)
- Fix guest crashes on reboot by properly coupling CR4.OSXSAVE and
CPUID.OSXSAVE (Cc stable)
- Report correct RIP after Hyper-V hypercall #UD (introduced in
-rc6)"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: x86: fix #UD address of failed Hyper-V hypercalls
kvm: x86: IA32_ARCH_CAPABILITIES is always supported
KVM: x86: Update cpuid properly when CR4.OSXAVE or CR4.PKE is changed
x86/kvm: fix LAPIC timer drift when guest uses periodic mode
KVM: s390: vsie: fix < 8k check for the itdba
KVM: PPC: Book 3S HV: Do ptesync in radix guest exit path
KVM: PPC: Book3S HV: XIVE: Resend re-routed interrupts on CPU priority change
KVM: PPC: Book3S HV: Make radix clear pte when unmapping
KVM: PPC: Book3S HV: Make radix use correct tlbie sequence in kvmppc_radix_tlbie_page
KVM: PPC: Book3S HV: Snapshot timebase offset on guest entry
-rw-r--r-- | arch/powerpc/include/asm/kvm_book3s.h | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/asm-offsets.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_radix.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 97 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive_template.c | 108 | ||||
-rw-r--r-- | arch/s390/kvm/vsie.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/hyperv.c | 19 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 16 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 17 |
11 files changed, 198 insertions, 75 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 4c02a7378d06..e7377b73cfec 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h | |||
@@ -96,6 +96,7 @@ struct kvmppc_vcore { | |||
96 | struct kvm_vcpu *runner; | 96 | struct kvm_vcpu *runner; |
97 | struct kvm *kvm; | 97 | struct kvm *kvm; |
98 | u64 tb_offset; /* guest timebase - host timebase */ | 98 | u64 tb_offset; /* guest timebase - host timebase */ |
99 | u64 tb_offset_applied; /* timebase offset currently in force */ | ||
99 | ulong lpcr; | 100 | ulong lpcr; |
100 | u32 arch_compat; | 101 | u32 arch_compat; |
101 | ulong pcr; | 102 | ulong pcr; |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6bee65f3cfd3..373dc1d6ef44 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -562,6 +562,7 @@ int main(void) | |||
562 | OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads); | 562 | OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads); |
563 | OFFSET(VCORE_KVM, kvmppc_vcore, kvm); | 563 | OFFSET(VCORE_KVM, kvmppc_vcore, kvm); |
564 | OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset); | 564 | OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset); |
565 | OFFSET(VCORE_TB_OFFSET_APPL, kvmppc_vcore, tb_offset_applied); | ||
565 | OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr); | 566 | OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr); |
566 | OFFSET(VCORE_PCR, kvmppc_vcore, pcr); | 567 | OFFSET(VCORE_PCR, kvmppc_vcore, pcr); |
567 | OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes); | 568 | OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes); |
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index a57eafec4dc2..361f42c8c73e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c | |||
@@ -162,7 +162,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, | |||
162 | if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) | 162 | if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) |
163 | asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) | 163 | asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) |
164 | : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); | 164 | : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); |
165 | asm volatile("ptesync": : :"memory"); | 165 | asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); |
166 | } | 166 | } |
167 | 167 | ||
168 | static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) | 168 | static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) |
@@ -173,7 +173,7 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) | |||
173 | /* RIC=1 PRS=0 R=1 IS=2 */ | 173 | /* RIC=1 PRS=0 R=1 IS=2 */ |
174 | asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1) | 174 | asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1) |
175 | : : "r" (rb), "r" (kvm->arch.lpid) : "memory"); | 175 | : : "r" (rb), "r" (kvm->arch.lpid) : "memory"); |
176 | asm volatile("ptesync": : :"memory"); | 176 | asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); |
177 | } | 177 | } |
178 | 178 | ||
179 | unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, | 179 | unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, |
@@ -584,7 +584,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, | |||
584 | 584 | ||
585 | ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); | 585 | ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); |
586 | if (ptep && pte_present(*ptep)) { | 586 | if (ptep && pte_present(*ptep)) { |
587 | old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, | 587 | old = kvmppc_radix_update_pte(kvm, ptep, ~0UL, 0, |
588 | gpa, shift); | 588 | gpa, shift); |
589 | kvmppc_radix_tlbie_page(kvm, gpa, shift); | 589 | kvmppc_radix_tlbie_page(kvm, gpa, shift); |
590 | if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) { | 590 | if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) { |
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 4d07fca5121c..9963f65c212b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c | |||
@@ -2441,6 +2441,7 @@ static void init_vcore_to_run(struct kvmppc_vcore *vc) | |||
2441 | vc->in_guest = 0; | 2441 | vc->in_guest = 0; |
2442 | vc->napping_threads = 0; | 2442 | vc->napping_threads = 0; |
2443 | vc->conferring_threads = 0; | 2443 | vc->conferring_threads = 0; |
2444 | vc->tb_offset_applied = 0; | ||
2444 | } | 2445 | } |
2445 | 2446 | ||
2446 | static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) | 2447 | static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) |
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bd63fa8a08b5..07ca1b2a7966 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S | |||
@@ -692,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | |||
692 | 22: ld r8,VCORE_TB_OFFSET(r5) | 692 | 22: ld r8,VCORE_TB_OFFSET(r5) |
693 | cmpdi r8,0 | 693 | cmpdi r8,0 |
694 | beq 37f | 694 | beq 37f |
695 | std r8, VCORE_TB_OFFSET_APPL(r5) | ||
695 | mftb r6 /* current host timebase */ | 696 | mftb r6 /* current host timebase */ |
696 | add r8,r8,r6 | 697 | add r8,r8,r6 |
697 | mtspr SPRN_TBU40,r8 /* update upper 40 bits */ | 698 | mtspr SPRN_TBU40,r8 /* update upper 40 bits */ |
@@ -940,18 +941,6 @@ FTR_SECTION_ELSE | |||
940 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) | 941 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) |
941 | 8: | 942 | 8: |
942 | 943 | ||
943 | /* | ||
944 | * Set the decrementer to the guest decrementer. | ||
945 | */ | ||
946 | ld r8,VCPU_DEC_EXPIRES(r4) | ||
947 | /* r8 is a host timebase value here, convert to guest TB */ | ||
948 | ld r5,HSTATE_KVM_VCORE(r13) | ||
949 | ld r6,VCORE_TB_OFFSET(r5) | ||
950 | add r8,r8,r6 | ||
951 | mftb r7 | ||
952 | subf r3,r7,r8 | ||
953 | mtspr SPRN_DEC,r3 | ||
954 | |||
955 | ld r5, VCPU_SPRG0(r4) | 944 | ld r5, VCPU_SPRG0(r4) |
956 | ld r6, VCPU_SPRG1(r4) | 945 | ld r6, VCPU_SPRG1(r4) |
957 | ld r7, VCPU_SPRG2(r4) | 946 | ld r7, VCPU_SPRG2(r4) |
@@ -1005,6 +994,18 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) | |||
1005 | mtspr SPRN_LPCR,r8 | 994 | mtspr SPRN_LPCR,r8 |
1006 | isync | 995 | isync |
1007 | 996 | ||
997 | /* | ||
998 | * Set the decrementer to the guest decrementer. | ||
999 | */ | ||
1000 | ld r8,VCPU_DEC_EXPIRES(r4) | ||
1001 | /* r8 is a host timebase value here, convert to guest TB */ | ||
1002 | ld r5,HSTATE_KVM_VCORE(r13) | ||
1003 | ld r6,VCORE_TB_OFFSET_APPL(r5) | ||
1004 | add r8,r8,r6 | ||
1005 | mftb r7 | ||
1006 | subf r3,r7,r8 | ||
1007 | mtspr SPRN_DEC,r3 | ||
1008 | |||
1008 | /* Check if HDEC expires soon */ | 1009 | /* Check if HDEC expires soon */ |
1009 | mfspr r3, SPRN_HDEC | 1010 | mfspr r3, SPRN_HDEC |
1010 | EXTEND_HDEC(r3) | 1011 | EXTEND_HDEC(r3) |
@@ -1597,8 +1598,27 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) | |||
1597 | 1598 | ||
1598 | guest_bypass: | 1599 | guest_bypass: |
1599 | stw r12, STACK_SLOT_TRAP(r1) | 1600 | stw r12, STACK_SLOT_TRAP(r1) |
1600 | mr r3, r12 | 1601 | |
1602 | /* Save DEC */ | ||
1603 | /* Do this before kvmhv_commence_exit so we know TB is guest TB */ | ||
1604 | ld r3, HSTATE_KVM_VCORE(r13) | ||
1605 | mfspr r5,SPRN_DEC | ||
1606 | mftb r6 | ||
1607 | /* On P9, if the guest has large decr enabled, don't sign extend */ | ||
1608 | BEGIN_FTR_SECTION | ||
1609 | ld r4, VCORE_LPCR(r3) | ||
1610 | andis. r4, r4, LPCR_LD@h | ||
1611 | bne 16f | ||
1612 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | ||
1613 | extsw r5,r5 | ||
1614 | 16: add r5,r5,r6 | ||
1615 | /* r5 is a guest timebase value here, convert to host TB */ | ||
1616 | ld r4,VCORE_TB_OFFSET_APPL(r3) | ||
1617 | subf r5,r4,r5 | ||
1618 | std r5,VCPU_DEC_EXPIRES(r9) | ||
1619 | |||
1601 | /* Increment exit count, poke other threads to exit */ | 1620 | /* Increment exit count, poke other threads to exit */ |
1621 | mr r3, r12 | ||
1602 | bl kvmhv_commence_exit | 1622 | bl kvmhv_commence_exit |
1603 | nop | 1623 | nop |
1604 | ld r9, HSTATE_KVM_VCPU(r13) | 1624 | ld r9, HSTATE_KVM_VCPU(r13) |
@@ -1639,23 +1659,6 @@ guest_bypass: | |||
1639 | mtspr SPRN_PURR,r3 | 1659 | mtspr SPRN_PURR,r3 |
1640 | mtspr SPRN_SPURR,r4 | 1660 | mtspr SPRN_SPURR,r4 |
1641 | 1661 | ||
1642 | /* Save DEC */ | ||
1643 | ld r3, HSTATE_KVM_VCORE(r13) | ||
1644 | mfspr r5,SPRN_DEC | ||
1645 | mftb r6 | ||
1646 | /* On P9, if the guest has large decr enabled, don't sign extend */ | ||
1647 | BEGIN_FTR_SECTION | ||
1648 | ld r4, VCORE_LPCR(r3) | ||
1649 | andis. r4, r4, LPCR_LD@h | ||
1650 | bne 16f | ||
1651 | END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | ||
1652 | extsw r5,r5 | ||
1653 | 16: add r5,r5,r6 | ||
1654 | /* r5 is a guest timebase value here, convert to host TB */ | ||
1655 | ld r4,VCORE_TB_OFFSET(r3) | ||
1656 | subf r5,r4,r5 | ||
1657 | std r5,VCPU_DEC_EXPIRES(r9) | ||
1658 | |||
1659 | BEGIN_FTR_SECTION | 1662 | BEGIN_FTR_SECTION |
1660 | b 8f | 1663 | b 8f |
1661 | END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) | 1664 | END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) |
@@ -1905,6 +1908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | |||
1905 | cmpwi cr2, r0, 0 | 1908 | cmpwi cr2, r0, 0 |
1906 | beq cr2, 4f | 1909 | beq cr2, 4f |
1907 | 1910 | ||
1911 | /* | ||
1912 | * Radix: do eieio; tlbsync; ptesync sequence in case we | ||
1913 | * interrupted the guest between a tlbie and a ptesync. | ||
1914 | */ | ||
1915 | eieio | ||
1916 | tlbsync | ||
1917 | ptesync | ||
1918 | |||
1908 | /* Radix: Handle the case where the guest used an illegal PID */ | 1919 | /* Radix: Handle the case where the guest used an illegal PID */ |
1909 | LOAD_REG_ADDR(r4, mmu_base_pid) | 1920 | LOAD_REG_ADDR(r4, mmu_base_pid) |
1910 | lwz r3, VCPU_GUEST_PID(r9) | 1921 | lwz r3, VCPU_GUEST_PID(r9) |
@@ -2017,9 +2028,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) | |||
2017 | 2028 | ||
2018 | 27: | 2029 | 27: |
2019 | /* Subtract timebase offset from timebase */ | 2030 | /* Subtract timebase offset from timebase */ |
2020 | ld r8,VCORE_TB_OFFSET(r5) | 2031 | ld r8, VCORE_TB_OFFSET_APPL(r5) |
2021 | cmpdi r8,0 | 2032 | cmpdi r8,0 |
2022 | beq 17f | 2033 | beq 17f |
2034 | li r0, 0 | ||
2035 | std r0, VCORE_TB_OFFSET_APPL(r5) | ||
2023 | mftb r6 /* current guest timebase */ | 2036 | mftb r6 /* current guest timebase */ |
2024 | subf r8,r8,r6 | 2037 | subf r8,r8,r6 |
2025 | mtspr SPRN_TBU40,r8 /* update upper 40 bits */ | 2038 | mtspr SPRN_TBU40,r8 /* update upper 40 bits */ |
@@ -2700,7 +2713,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) | |||
2700 | add r3, r3, r5 | 2713 | add r3, r3, r5 |
2701 | ld r4, HSTATE_KVM_VCPU(r13) | 2714 | ld r4, HSTATE_KVM_VCPU(r13) |
2702 | ld r5, HSTATE_KVM_VCORE(r13) | 2715 | ld r5, HSTATE_KVM_VCORE(r13) |
2703 | ld r6, VCORE_TB_OFFSET(r5) | 2716 | ld r6, VCORE_TB_OFFSET_APPL(r5) |
2704 | subf r3, r6, r3 /* convert to host TB value */ | 2717 | subf r3, r6, r3 /* convert to host TB value */ |
2705 | std r3, VCPU_DEC_EXPIRES(r4) | 2718 | std r3, VCPU_DEC_EXPIRES(r4) |
2706 | 2719 | ||
@@ -2799,7 +2812,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) | |||
2799 | /* Restore guest decrementer */ | 2812 | /* Restore guest decrementer */ |
2800 | ld r3, VCPU_DEC_EXPIRES(r4) | 2813 | ld r3, VCPU_DEC_EXPIRES(r4) |
2801 | ld r5, HSTATE_KVM_VCORE(r13) | 2814 | ld r5, HSTATE_KVM_VCORE(r13) |
2802 | ld r6, VCORE_TB_OFFSET(r5) | 2815 | ld r6, VCORE_TB_OFFSET_APPL(r5) |
2803 | add r3, r3, r6 /* convert host TB to guest TB value */ | 2816 | add r3, r3, r6 /* convert host TB to guest TB value */ |
2804 | mftb r7 | 2817 | mftb r7 |
2805 | subf r3, r7, r3 | 2818 | subf r3, r7, r3 |
@@ -3606,12 +3619,9 @@ kvmppc_fix_pmao: | |||
3606 | */ | 3619 | */ |
3607 | kvmhv_start_timing: | 3620 | kvmhv_start_timing: |
3608 | ld r5, HSTATE_KVM_VCORE(r13) | 3621 | ld r5, HSTATE_KVM_VCORE(r13) |
3609 | lbz r6, VCORE_IN_GUEST(r5) | 3622 | ld r6, VCORE_TB_OFFSET_APPL(r5) |
3610 | cmpwi r6, 0 | 3623 | mftb r5 |
3611 | beq 5f /* if in guest, need to */ | 3624 | subf r5, r6, r5 /* subtract current timebase offset */ |
3612 | ld r6, VCORE_TB_OFFSET(r5) /* subtract timebase offset */ | ||
3613 | 5: mftb r5 | ||
3614 | subf r5, r6, r5 | ||
3615 | std r3, VCPU_CUR_ACTIVITY(r4) | 3625 | std r3, VCPU_CUR_ACTIVITY(r4) |
3616 | std r5, VCPU_ACTIVITY_START(r4) | 3626 | std r5, VCPU_ACTIVITY_START(r4) |
3617 | blr | 3627 | blr |
@@ -3622,15 +3632,12 @@ kvmhv_start_timing: | |||
3622 | */ | 3632 | */ |
3623 | kvmhv_accumulate_time: | 3633 | kvmhv_accumulate_time: |
3624 | ld r5, HSTATE_KVM_VCORE(r13) | 3634 | ld r5, HSTATE_KVM_VCORE(r13) |
3625 | lbz r8, VCORE_IN_GUEST(r5) | 3635 | ld r8, VCORE_TB_OFFSET_APPL(r5) |
3626 | cmpwi r8, 0 | 3636 | ld r5, VCPU_CUR_ACTIVITY(r4) |
3627 | beq 4f /* if in guest, need to */ | ||
3628 | ld r8, VCORE_TB_OFFSET(r5) /* subtract timebase offset */ | ||
3629 | 4: ld r5, VCPU_CUR_ACTIVITY(r4) | ||
3630 | ld r6, VCPU_ACTIVITY_START(r4) | 3637 | ld r6, VCPU_ACTIVITY_START(r4) |
3631 | std r3, VCPU_CUR_ACTIVITY(r4) | 3638 | std r3, VCPU_CUR_ACTIVITY(r4) |
3632 | mftb r7 | 3639 | mftb r7 |
3633 | subf r7, r8, r7 | 3640 | subf r7, r8, r7 /* subtract current timebase offset */ |
3634 | std r7, VCPU_ACTIVITY_START(r4) | 3641 | std r7, VCPU_ACTIVITY_START(r4) |
3635 | cmpdi r5, 0 | 3642 | cmpdi r5, 0 |
3636 | beqlr | 3643 | beqlr |
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index c7a5deadd1cc..99c3620b40d9 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c | |||
@@ -11,6 +11,9 @@ | |||
11 | #define XGLUE(a,b) a##b | 11 | #define XGLUE(a,b) a##b |
12 | #define GLUE(a,b) XGLUE(a,b) | 12 | #define GLUE(a,b) XGLUE(a,b) |
13 | 13 | ||
14 | /* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */ | ||
15 | #define XICS_DUMMY 1 | ||
16 | |||
14 | static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) | 17 | static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) |
15 | { | 18 | { |
16 | u8 cppr; | 19 | u8 cppr; |
@@ -205,6 +208,10 @@ skip_ipi: | |||
205 | goto skip_ipi; | 208 | goto skip_ipi; |
206 | } | 209 | } |
207 | 210 | ||
211 | /* If it's the dummy interrupt, continue searching */ | ||
212 | if (hirq == XICS_DUMMY) | ||
213 | goto skip_ipi; | ||
214 | |||
208 | /* If fetching, update queue pointers */ | 215 | /* If fetching, update queue pointers */ |
209 | if (scan_type == scan_fetch) { | 216 | if (scan_type == scan_fetch) { |
210 | q->idx = idx; | 217 | q->idx = idx; |
@@ -385,9 +392,76 @@ static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc) | |||
385 | __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING); | 392 | __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING); |
386 | } | 393 | } |
387 | 394 | ||
395 | static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive, | ||
396 | struct kvmppc_xive_vcpu *xc) | ||
397 | { | ||
398 | unsigned int prio; | ||
399 | |||
400 | /* For each priority that is now masked */ | ||
401 | for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) { | ||
402 | struct xive_q *q = &xc->queues[prio]; | ||
403 | struct kvmppc_xive_irq_state *state; | ||
404 | struct kvmppc_xive_src_block *sb; | ||
405 | u32 idx, toggle, entry, irq, hw_num; | ||
406 | struct xive_irq_data *xd; | ||
407 | __be32 *qpage; | ||
408 | u16 src; | ||
409 | |||
410 | idx = q->idx; | ||
411 | toggle = q->toggle; | ||
412 | qpage = READ_ONCE(q->qpage); | ||
413 | if (!qpage) | ||
414 | continue; | ||
415 | |||
416 | /* For each interrupt in the queue */ | ||
417 | for (;;) { | ||
418 | entry = be32_to_cpup(qpage + idx); | ||
419 | |||
420 | /* No more ? */ | ||
421 | if ((entry >> 31) == toggle) | ||
422 | break; | ||
423 | irq = entry & 0x7fffffff; | ||
424 | |||
425 | /* Skip dummies and IPIs */ | ||
426 | if (irq == XICS_DUMMY || irq == XICS_IPI) | ||
427 | goto next; | ||
428 | sb = kvmppc_xive_find_source(xive, irq, &src); | ||
429 | if (!sb) | ||
430 | goto next; | ||
431 | state = &sb->irq_state[src]; | ||
432 | |||
433 | /* Has it been rerouted ? */ | ||
434 | if (xc->server_num == state->act_server) | ||
435 | goto next; | ||
436 | |||
437 | /* | ||
438 | * Allright, it *has* been re-routed, kill it from | ||
439 | * the queue. | ||
440 | */ | ||
441 | qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY); | ||
442 | |||
443 | /* Find the HW interrupt */ | ||
444 | kvmppc_xive_select_irq(state, &hw_num, &xd); | ||
445 | |||
446 | /* If it's not an LSI, set PQ to 11 the EOI will force a resend */ | ||
447 | if (!(xd->flags & XIVE_IRQ_FLAG_LSI)) | ||
448 | GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11); | ||
449 | |||
450 | /* EOI the source */ | ||
451 | GLUE(X_PFX,source_eoi)(hw_num, xd); | ||
452 | |||
453 | next: | ||
454 | idx = (idx + 1) & q->msk; | ||
455 | if (idx == 0) | ||
456 | toggle ^= 1; | ||
457 | } | ||
458 | } | ||
459 | } | ||
460 | |||
388 | X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) | 461 | X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) |
389 | { | 462 | { |
390 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; | 463 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
464 | struct kvmppc_xive *xive = vcpu->kvm->arch.xive; | ||
391 | u8 old_cppr; | 465 | u8 old_cppr; |
392 | 466 | ||
393 | pr_devel("H_CPPR(cppr=%ld)\n", cppr); | 467 | pr_devel("H_CPPR(cppr=%ld)\n", cppr); |
@@ -407,14 +481,34 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) | |||
407 | */ | 481 | */ |
408 | smp_mb(); | 482 | smp_mb(); |
409 | 483 | ||
410 | /* | 484 | if (cppr > old_cppr) { |
411 | * We are masking less, we need to look for pending things | 485 | /* |
412 | * to deliver and set VP pending bits accordingly to trigger | 486 | * We are masking less, we need to look for pending things |
413 | * a new interrupt otherwise we might miss MFRR changes for | 487 | * to deliver and set VP pending bits accordingly to trigger |
414 | * which we have optimized out sending an IPI signal. | 488 | * a new interrupt otherwise we might miss MFRR changes for |
415 | */ | 489 | * which we have optimized out sending an IPI signal. |
416 | if (cppr > old_cppr) | 490 | */ |
417 | GLUE(X_PFX,push_pending_to_hw)(xc); | 491 | GLUE(X_PFX,push_pending_to_hw)(xc); |
492 | } else { | ||
493 | /* | ||
494 | * We are masking more, we need to check the queue for any | ||
495 | * interrupt that has been routed to another CPU, take | ||
496 | * it out (replace it with the dummy) and retrigger it. | ||
497 | * | ||
498 | * This is necessary since those interrupts may otherwise | ||
499 | * never be processed, at least not until this CPU restores | ||
500 | * its CPPR. | ||
501 | * | ||
502 | * This is in theory racy vs. HW adding new interrupts to | ||
503 | * the queue. In practice this works because the interesting | ||
504 | * cases are when the guest has done a set_xive() to move the | ||
505 | * interrupt away, which flushes the xive, followed by the | ||
506 | * target CPU doing a H_CPPR. So any new interrupt coming into | ||
507 | * the queue must still be routed to us and isn't a source | ||
508 | * of concern. | ||
509 | */ | ||
510 | GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc); | ||
511 | } | ||
418 | 512 | ||
419 | /* Apply new CPPR */ | 513 | /* Apply new CPPR */ |
420 | xc->hw_cppr = cppr; | 514 | xc->hw_cppr = cppr; |
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 8961e3970901..969882b54266 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c | |||
@@ -578,7 +578,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) | |||
578 | 578 | ||
579 | gpa = READ_ONCE(scb_o->itdba) & ~0xffUL; | 579 | gpa = READ_ONCE(scb_o->itdba) & ~0xffUL; |
580 | if (gpa && (scb_s->ecb & ECB_TE)) { | 580 | if (gpa && (scb_s->ecb & ECB_TE)) { |
581 | if (!(gpa & ~0x1fffU)) { | 581 | if (!(gpa & ~0x1fffUL)) { |
582 | rc = set_validity_icpt(scb_s, 0x0080U); | 582 | rc = set_validity_icpt(scb_s, 0x0080U); |
583 | goto unpin; | 583 | goto unpin; |
584 | } | 584 | } |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index ced851169730..9bffb5228f31 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -495,6 +495,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
495 | entry->ecx &= ~F(PKU); | 495 | entry->ecx &= ~F(PKU); |
496 | entry->edx &= kvm_cpuid_7_0_edx_x86_features; | 496 | entry->edx &= kvm_cpuid_7_0_edx_x86_features; |
497 | cpuid_mask(&entry->edx, CPUID_7_EDX); | 497 | cpuid_mask(&entry->edx, CPUID_7_EDX); |
498 | /* | ||
499 | * We emulate ARCH_CAPABILITIES in software even | ||
500 | * if the host doesn't support it. | ||
501 | */ | ||
502 | entry->edx |= F(ARCH_CAPABILITIES); | ||
498 | } else { | 503 | } else { |
499 | entry->ebx = 0; | 504 | entry->ebx = 0; |
500 | entry->ecx = 0; | 505 | entry->ecx = 0; |
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 5708e951a5c6..46ff64da44ca 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c | |||
@@ -1260,14 +1260,18 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) | |||
1260 | } | 1260 | } |
1261 | } | 1261 | } |
1262 | 1262 | ||
1263 | static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) | 1263 | static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) |
1264 | { | 1264 | { |
1265 | struct kvm_run *run = vcpu->run; | 1265 | kvm_hv_hypercall_set_result(vcpu, result); |
1266 | 1266 | ++vcpu->stat.hypercalls; | |
1267 | kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result); | ||
1268 | return kvm_skip_emulated_instruction(vcpu); | 1267 | return kvm_skip_emulated_instruction(vcpu); |
1269 | } | 1268 | } |
1270 | 1269 | ||
1270 | static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) | ||
1271 | { | ||
1272 | return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); | ||
1273 | } | ||
1274 | |||
1271 | static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) | 1275 | static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) |
1272 | { | 1276 | { |
1273 | struct eventfd_ctx *eventfd; | 1277 | struct eventfd_ctx *eventfd; |
@@ -1350,7 +1354,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
1350 | /* Hypercall continuation is not supported yet */ | 1354 | /* Hypercall continuation is not supported yet */ |
1351 | if (rep_cnt || rep_idx) { | 1355 | if (rep_cnt || rep_idx) { |
1352 | ret = HV_STATUS_INVALID_HYPERCALL_CODE; | 1356 | ret = HV_STATUS_INVALID_HYPERCALL_CODE; |
1353 | goto set_result; | 1357 | goto out; |
1354 | } | 1358 | } |
1355 | 1359 | ||
1356 | switch (code) { | 1360 | switch (code) { |
@@ -1381,9 +1385,8 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
1381 | break; | 1385 | break; |
1382 | } | 1386 | } |
1383 | 1387 | ||
1384 | set_result: | 1388 | out: |
1385 | kvm_hv_hypercall_set_result(vcpu, ret); | 1389 | return kvm_hv_hypercall_complete(vcpu, ret); |
1386 | return 1; | ||
1387 | } | 1390 | } |
1388 | 1391 | ||
1389 | void kvm_hv_init_vm(struct kvm *kvm) | 1392 | void kvm_hv_init_vm(struct kvm *kvm) |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index b74c9c1405b9..3773c4625114 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -1522,11 +1522,23 @@ static bool set_target_expiration(struct kvm_lapic *apic) | |||
1522 | 1522 | ||
1523 | static void advance_periodic_target_expiration(struct kvm_lapic *apic) | 1523 | static void advance_periodic_target_expiration(struct kvm_lapic *apic) |
1524 | { | 1524 | { |
1525 | apic->lapic_timer.tscdeadline += | 1525 | ktime_t now = ktime_get(); |
1526 | nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); | 1526 | u64 tscl = rdtsc(); |
1527 | ktime_t delta; | ||
1528 | |||
1529 | /* | ||
1530 | * Synchronize both deadlines to the same time source or | ||
1531 | * differences in the periods (caused by differences in the | ||
1532 | * underlying clocks or numerical approximation errors) will | ||
1533 | * cause the two to drift apart over time as the errors | ||
1534 | * accumulate. | ||
1535 | */ | ||
1527 | apic->lapic_timer.target_expiration = | 1536 | apic->lapic_timer.target_expiration = |
1528 | ktime_add_ns(apic->lapic_timer.target_expiration, | 1537 | ktime_add_ns(apic->lapic_timer.target_expiration, |
1529 | apic->lapic_timer.period); | 1538 | apic->lapic_timer.period); |
1539 | delta = ktime_sub(apic->lapic_timer.target_expiration, now); | ||
1540 | apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + | ||
1541 | nsec_to_cycles(apic->vcpu, delta); | ||
1530 | } | 1542 | } |
1531 | 1543 | ||
1532 | static void start_sw_period(struct kvm_lapic *apic) | 1544 | static void start_sw_period(struct kvm_lapic *apic) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 22a183aac1c6..71e7cda6d014 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -6671,11 +6671,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
6671 | unsigned long nr, a0, a1, a2, a3, ret; | 6671 | unsigned long nr, a0, a1, a2, a3, ret; |
6672 | int op_64_bit; | 6672 | int op_64_bit; |
6673 | 6673 | ||
6674 | if (kvm_hv_hypercall_enabled(vcpu->kvm)) { | 6674 | if (kvm_hv_hypercall_enabled(vcpu->kvm)) |
6675 | if (!kvm_hv_hypercall(vcpu)) | 6675 | return kvm_hv_hypercall(vcpu); |
6676 | return 0; | ||
6677 | goto out; | ||
6678 | } | ||
6679 | 6676 | ||
6680 | nr = kvm_register_read(vcpu, VCPU_REGS_RAX); | 6677 | nr = kvm_register_read(vcpu, VCPU_REGS_RAX); |
6681 | a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); | 6678 | a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); |
@@ -6696,7 +6693,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
6696 | 6693 | ||
6697 | if (kvm_x86_ops->get_cpl(vcpu) != 0) { | 6694 | if (kvm_x86_ops->get_cpl(vcpu) != 0) { |
6698 | ret = -KVM_EPERM; | 6695 | ret = -KVM_EPERM; |
6699 | goto out_error; | 6696 | goto out; |
6700 | } | 6697 | } |
6701 | 6698 | ||
6702 | switch (nr) { | 6699 | switch (nr) { |
@@ -6716,12 +6713,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
6716 | ret = -KVM_ENOSYS; | 6713 | ret = -KVM_ENOSYS; |
6717 | break; | 6714 | break; |
6718 | } | 6715 | } |
6719 | out_error: | 6716 | out: |
6720 | if (!op_64_bit) | 6717 | if (!op_64_bit) |
6721 | ret = (u32)ret; | 6718 | ret = (u32)ret; |
6722 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret); | 6719 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret); |
6723 | 6720 | ||
6724 | out: | ||
6725 | ++vcpu->stat.hypercalls; | 6721 | ++vcpu->stat.hypercalls; |
6726 | return kvm_skip_emulated_instruction(vcpu); | 6722 | return kvm_skip_emulated_instruction(vcpu); |
6727 | } | 6723 | } |
@@ -7980,6 +7976,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | |||
7980 | { | 7976 | { |
7981 | struct msr_data apic_base_msr; | 7977 | struct msr_data apic_base_msr; |
7982 | int mmu_reset_needed = 0; | 7978 | int mmu_reset_needed = 0; |
7979 | int cpuid_update_needed = 0; | ||
7983 | int pending_vec, max_bits, idx; | 7980 | int pending_vec, max_bits, idx; |
7984 | struct desc_ptr dt; | 7981 | struct desc_ptr dt; |
7985 | int ret = -EINVAL; | 7982 | int ret = -EINVAL; |
@@ -8018,8 +8015,10 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) | |||
8018 | vcpu->arch.cr0 = sregs->cr0; | 8015 | vcpu->arch.cr0 = sregs->cr0; |
8019 | 8016 | ||
8020 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; | 8017 | mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; |
8018 | cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) & | ||
8019 | (X86_CR4_OSXSAVE | X86_CR4_PKE)); | ||
8021 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 8020 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
8022 | if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE)) | 8021 | if (cpuid_update_needed) |
8023 | kvm_update_cpuid(vcpu); | 8022 | kvm_update_cpuid(vcpu); |
8024 | 8023 | ||
8025 | idx = srcu_read_lock(&vcpu->kvm->srcu); | 8024 | idx = srcu_read_lock(&vcpu->kvm->srcu); |