aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-19 14:27:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-19 14:27:09 -0400
commit7beaa24ba49717419e24d1f6321e8b3c265a719c (patch)
treea5c5433d3c7bfc4c23e67174463ccf519c8406f0
parent07b75260ebc2c789724c594d7eaf0194fa47b3be (diff)
parent9842df62004f366b9fed2423e24df10542ee0dc5 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "Small release overall. x86: - miscellaneous fixes - AVIC support (local APIC virtualization, AMD version) s390: - polling for interrupts after a VCPU goes to halted state is now enabled for s390 - use hardware provided information about facility bits that do not need any hypervisor activity, and other fixes for cpu models and facilities - improve perf output - floating interrupt controller improvements. MIPS: - miscellaneous fixes PPC: - bugfixes only ARM: - 16K page size support - generic firmware probing layer for timer and GIC Christoffer Dall (KVM-ARM maintainer) says: "There are a few changes in this pull request touching things outside KVM, but they should all carry the necessary acks and it made the merge process much easier to do it this way." though actually the irqchip maintainers' acks didn't make it into the patches. Marc Zyngier, who is both irqchip and KVM-ARM maintainer, later acked at http://mid.gmane.org/573351D1.4060303@arm.com ('more formally and for documentation purposes')" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (82 commits) KVM: MTRR: remove MSR 0x2f8 KVM: x86: make hwapic_isr_update and hwapic_irr_update look the same svm: Manage vcpu load/unload when enable AVIC svm: Do not intercept CR8 when enable AVIC svm: Do not expose x2APIC when enable AVIC KVM: x86: Introducing kvm_x86_ops.apicv_post_state_restore svm: Add VMEXIT handlers for AVIC svm: Add interrupt injection via AVIC KVM: x86: Detect and Initialize AVIC support svm: Introduce new AVIC VMCB registers KVM: split kvm_vcpu_wake_up from kvm_vcpu_kick KVM: x86: Introducing kvm_x86_ops VCPU blocking/unblocking hooks KVM: x86: Introducing kvm_x86_ops VM init/destroy hooks KVM: x86: Rename kvm_apic_get_reg to kvm_lapic_get_reg KVM: x86: Misc LAPIC changes to expose helper functions KVM: shrink halt polling even more for invalid wakeups KVM: s390: set halt polling to 80 microseconds KVM: halt_polling: provide a way to qualify wakeups during poll KVM: PPC: Book3S HV: Re-enable XICS fast path for irqfd-generated interrupts kvm: Conditionally register IRQ bypass consumer ...
-rw-r--r--Documentation/virtual/kvm/api.txt18
-rw-r--r--Documentation/virtual/kvm/devices/s390_flic.txt14
-rw-r--r--arch/arm/include/asm/kvm_host.h2
-rw-r--r--arch/arm/include/asm/kvm_mmu.h43
-rw-r--r--arch/arm/include/asm/stage2_pgtable.h61
-rw-r--r--arch/arm/kvm/arm.c2
-rw-r--r--arch/arm/kvm/mmu.c408
-rw-r--r--arch/arm64/include/asm/kvm_arm.h85
-rw-r--r--arch/arm64/include/asm/kvm_host.h2
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h111
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h80
-rw-r--r--arch/arm64/include/asm/pgtable.h15
-rw-r--r--arch/arm64/include/asm/stage2_pgtable-nopmd.h42
-rw-r--r--arch/arm64/include/asm/stage2_pgtable-nopud.h39
-rw-r--r--arch/arm64/include/asm/stage2_pgtable.h142
-rw-r--r--arch/arm64/kvm/Kconfig1
-rw-r--r--arch/arm64/kvm/hyp/s2-setup.c8
-rw-r--r--arch/mips/include/asm/kvm_host.h4
-rw-r--r--arch/mips/kvm/emulate.c89
-rw-r--r--arch/mips/kvm/mips.c9
-rw-r--r--arch/mips/kvm/tlb.c26
-rw-r--r--arch/mips/kvm/trap_emul.c2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h5
-rw-r--r--arch/powerpc/kvm/book3s.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv.c1
-rw-r--r--arch/powerpc/kvm/book3s_pr.c32
-rw-r--r--arch/powerpc/kvm/book3s_xics.c29
-rw-r--r--arch/powerpc/kvm/book3s_xics.h1
-rw-r--r--arch/powerpc/kvm/booke.c1
-rw-r--r--arch/powerpc/kvm/powerpc.c22
-rw-r--r--arch/s390/include/asm/kvm_host.h11
-rw-r--r--arch/s390/include/asm/sclp.h1
-rw-r--r--arch/s390/include/asm/sigp.h1
-rw-r--r--arch/s390/include/uapi/asm/kvm.h1
-rw-r--r--arch/s390/include/uapi/asm/sie.h7
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/interrupt.c47
-rw-r--r--arch/s390/kvm/kvm-s390.c61
-rw-r--r--arch/s390/kvm/priv.c21
-rw-r--r--arch/s390/kvm/sigp.c6
-rw-r--r--arch/x86/include/asm/kvm_host.h32
-rw-r--r--arch/x86/include/asm/svm.h12
-rw-r--r--arch/x86/include/uapi/asm/kvm.h6
-rw-r--r--arch/x86/include/uapi/asm/svm.h9
-rw-r--r--arch/x86/kvm/ioapic.c2
-rw-r--r--arch/x86/kvm/irq_comm.c3
-rw-r--r--arch/x86/kvm/lapic.c193
-rw-r--r--arch/x86/kvm/lapic.h38
-rw-r--r--arch/x86/kvm/mmu.c28
-rw-r--r--arch/x86/kvm/mtrr.c2
-rw-r--r--arch/x86/kvm/svm.c670
-rw-r--r--arch/x86/kvm/trace.h57
-rw-r--r--arch/x86/kvm/vmx.c12
-rw-r--r--arch/x86/kvm/x86.c64
-rw-r--r--drivers/clocksource/arm_arch_timer.c11
-rw-r--r--drivers/irqchip/irq-gic-common.c13
-rw-r--r--drivers/irqchip/irq-gic-common.h3
-rw-r--r--drivers/irqchip/irq-gic-v3.c175
-rw-r--r--drivers/irqchip/irq-gic.c89
-rw-r--r--drivers/s390/char/sclp_early.c6
-rw-r--r--include/clocksource/arm_arch_timer.h12
-rw-r--r--include/kvm/arm_vgic.h7
-rw-r--r--include/linux/irqbypass.h4
-rw-r--r--include/linux/irqchip/arm-gic-common.h34
-rw-r--r--include/linux/kvm_host.h39
-rw-r--r--include/trace/events/kvm.h11
-rw-r--r--include/uapi/linux/kvm.h1
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/arm/arch_timer.c40
-rw-r--r--virt/kvm/arm/vgic-v2.c61
-rw-r--r--virt/kvm/arm/vgic-v3.c47
-rw-r--r--virt/kvm/arm/vgic.c50
-rw-r--r--virt/kvm/eventfd.c18
-rw-r--r--virt/kvm/kvm_main.c32
-rw-r--r--virt/lib/irqbypass.c12
75 files changed, 2398 insertions, 850 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 4d0542c5206b..a4482cce4bae 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -199,8 +199,8 @@ Type: vm ioctl
199Parameters: vcpu id (apic id on x86) 199Parameters: vcpu id (apic id on x86)
200Returns: vcpu fd on success, -1 on error 200Returns: vcpu fd on success, -1 on error
201 201
202This API adds a vcpu to a virtual machine. The vcpu id is a small integer 202This API adds a vcpu to a virtual machine. No more than max_vcpus may be added.
203in the range [0, max_vcpus). 203The vcpu id is an integer in the range [0, max_vcpu_id).
204 204
205The recommended max_vcpus value can be retrieved using the KVM_CAP_NR_VCPUS of 205The recommended max_vcpus value can be retrieved using the KVM_CAP_NR_VCPUS of
206the KVM_CHECK_EXTENSION ioctl() at run-time. 206the KVM_CHECK_EXTENSION ioctl() at run-time.
@@ -212,6 +212,12 @@ cpus max.
212If the KVM_CAP_MAX_VCPUS does not exist, you should assume that max_vcpus is 212If the KVM_CAP_MAX_VCPUS does not exist, you should assume that max_vcpus is
213same as the value returned from KVM_CAP_NR_VCPUS. 213same as the value returned from KVM_CAP_NR_VCPUS.
214 214
215The maximum possible value for max_vcpu_id can be retrieved using the
216KVM_CAP_MAX_VCPU_ID of the KVM_CHECK_EXTENSION ioctl() at run-time.
217
218If the KVM_CAP_MAX_VCPU_ID does not exist, you should assume that max_vcpu_id
219is the same as the value returned from KVM_CAP_MAX_VCPUS.
220
215On powerpc using book3s_hv mode, the vcpus are mapped onto virtual 221On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
216threads in one or more virtual CPU cores. (This is because the 222threads in one or more virtual CPU cores. (This is because the
217hardware requires all the hardware threads in a CPU core to be in the 223hardware requires all the hardware threads in a CPU core to be in the
@@ -3788,6 +3794,14 @@ a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
3788Fails if VCPU has already been created, or if the irqchip is already in the 3794Fails if VCPU has already been created, or if the irqchip is already in the
3789kernel (i.e. KVM_CREATE_IRQCHIP has already been called). 3795kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
3790 3796
37977.6 KVM_CAP_S390_RI
3798
3799Architectures: s390
3800Parameters: none
3801
3802Allows use of runtime-instrumentation introduced with zEC12 processor.
3803Will return -EINVAL if the machine does not support runtime-instrumentation.
3804Will return -EBUSY if a VCPU has already been created.
3791 3805
37928. Other capabilities. 38068. Other capabilities.
3793---------------------- 3807----------------------
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt
index e3e314cb83e8..6b0e115301c8 100644
--- a/Documentation/virtual/kvm/devices/s390_flic.txt
+++ b/Documentation/virtual/kvm/devices/s390_flic.txt
@@ -11,6 +11,7 @@ FLIC provides support to
11- add interrupts (KVM_DEV_FLIC_ENQUEUE) 11- add interrupts (KVM_DEV_FLIC_ENQUEUE)
12- inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS) 12- inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS)
13- purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS) 13- purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS)
14- purge one pending floating I/O interrupt (KVM_DEV_FLIC_CLEAR_IO_IRQ)
14- enable/disable for the guest transparent async page faults 15- enable/disable for the guest transparent async page faults
15- register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*) 16- register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*)
16 17
@@ -40,6 +41,11 @@ Groups:
40 Simply deletes all elements from the list of currently pending floating 41 Simply deletes all elements from the list of currently pending floating
41 interrupts. No interrupts are injected into the guest. 42 interrupts. No interrupts are injected into the guest.
42 43
44 KVM_DEV_FLIC_CLEAR_IO_IRQ
45 Deletes one (if any) I/O interrupt for a subchannel identified by the
46 subsystem identification word passed via the buffer specified by
47 attr->addr (address) and attr->attr (length).
48
43 KVM_DEV_FLIC_APF_ENABLE 49 KVM_DEV_FLIC_APF_ENABLE
44 Enables async page faults for the guest. So in case of a major page fault 50 Enables async page faults for the guest. So in case of a major page fault
45 the host is allowed to handle this async and continues the guest. 51 the host is allowed to handle this async and continues the guest.
@@ -68,7 +74,7 @@ struct kvm_s390_io_adapter {
68 74
69 KVM_DEV_FLIC_ADAPTER_MODIFY 75 KVM_DEV_FLIC_ADAPTER_MODIFY
70 Modifies attributes of an existing I/O adapter interrupt source. Takes 76 Modifies attributes of an existing I/O adapter interrupt source. Takes
71 a kvm_s390_io_adapter_req specifiying the adapter and the operation: 77 a kvm_s390_io_adapter_req specifying the adapter and the operation:
72 78
73struct kvm_s390_io_adapter_req { 79struct kvm_s390_io_adapter_req {
74 __u32 id; 80 __u32 id;
@@ -94,3 +100,9 @@ struct kvm_s390_io_adapter_req {
94 KVM_S390_IO_ADAPTER_UNMAP 100 KVM_S390_IO_ADAPTER_UNMAP
95 release a userspace page for the translated address specified in addr 101 release a userspace page for the translated address specified in addr
96 from the list of mappings 102 from the list of mappings
103
104Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on
105FLIC with an unknown group or attribute gives the error code EINVAL (instead of
106ENXIO, as specified in the API documentation). It is not possible to conclude
107that a FLIC operation is unavailable based on the error code resulting from a
108usage attempt.
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 738d5eee91de..0df6b1fc9655 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -187,6 +187,7 @@ struct kvm_vm_stat {
187struct kvm_vcpu_stat { 187struct kvm_vcpu_stat {
188 u32 halt_successful_poll; 188 u32 halt_successful_poll;
189 u32 halt_attempted_poll; 189 u32 halt_attempted_poll;
190 u32 halt_poll_invalid;
190 u32 halt_wakeup; 191 u32 halt_wakeup;
191 u32 hvc_exit_stat; 192 u32 hvc_exit_stat;
192 u64 wfe_exit_stat; 193 u64 wfe_exit_stat;
@@ -290,6 +291,7 @@ static inline void kvm_arch_hardware_unsetup(void) {}
290static inline void kvm_arch_sync_events(struct kvm *kvm) {} 291static inline void kvm_arch_sync_events(struct kvm *kvm) {}
291static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} 292static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
292static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} 293static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
294static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
293 295
294static inline void kvm_arm_init_debug(void) {} 296static inline void kvm_arm_init_debug(void) {}
295static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} 297static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index f17a8d41822c..f9a65061130b 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -47,6 +47,7 @@
47#include <linux/highmem.h> 47#include <linux/highmem.h>
48#include <asm/cacheflush.h> 48#include <asm/cacheflush.h>
49#include <asm/pgalloc.h> 49#include <asm/pgalloc.h>
50#include <asm/stage2_pgtable.h>
50 51
51int create_hyp_mappings(void *from, void *to); 52int create_hyp_mappings(void *from, void *to);
52int create_hyp_io_mappings(void *from, void *to, phys_addr_t); 53int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
@@ -106,14 +107,16 @@ static inline void kvm_clean_pte(pte_t *pte)
106 clean_pte_table(pte); 107 clean_pte_table(pte);
107} 108}
108 109
109static inline void kvm_set_s2pte_writable(pte_t *pte) 110static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
110{ 111{
111 pte_val(*pte) |= L_PTE_S2_RDWR; 112 pte_val(pte) |= L_PTE_S2_RDWR;
113 return pte;
112} 114}
113 115
114static inline void kvm_set_s2pmd_writable(pmd_t *pmd) 116static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
115{ 117{
116 pmd_val(*pmd) |= L_PMD_S2_RDWR; 118 pmd_val(pmd) |= L_PMD_S2_RDWR;
119 return pmd;
117} 120}
118 121
119static inline void kvm_set_s2pte_readonly(pte_t *pte) 122static inline void kvm_set_s2pte_readonly(pte_t *pte)
@@ -136,22 +139,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
136 return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; 139 return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY;
137} 140}
138 141
139
140/* Open coded p*d_addr_end that can deal with 64bit addresses */
141#define kvm_pgd_addr_end(addr, end) \
142({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
143 (__boundary - 1 < (end) - 1)? __boundary: (end); \
144})
145
146#define kvm_pud_addr_end(addr,end) (end)
147
148#define kvm_pmd_addr_end(addr, end) \
149({ u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
150 (__boundary - 1 < (end) - 1)? __boundary: (end); \
151})
152
153#define kvm_pgd_index(addr) pgd_index(addr)
154
155static inline bool kvm_page_empty(void *ptr) 142static inline bool kvm_page_empty(void *ptr)
156{ 143{
157 struct page *ptr_page = virt_to_page(ptr); 144 struct page *ptr_page = virt_to_page(ptr);
@@ -160,19 +147,11 @@ static inline bool kvm_page_empty(void *ptr)
160 147
161#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) 148#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
162#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp) 149#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
163#define kvm_pud_table_empty(kvm, pudp) (0) 150#define kvm_pud_table_empty(kvm, pudp) false
164
165#define KVM_PREALLOC_LEVEL 0
166 151
167static inline void *kvm_get_hwpgd(struct kvm *kvm) 152#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
168{ 153#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
169 return kvm->arch.pgd; 154#define hyp_pud_table_empty(pudp) false
170}
171
172static inline unsigned int kvm_get_hwpgd_size(void)
173{
174 return PTRS_PER_S2_PGD * sizeof(pgd_t);
175}
176 155
177struct kvm; 156struct kvm;
178 157
diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h
new file mode 100644
index 000000000000..460d616bb2d6
--- /dev/null
+++ b/arch/arm/include/asm/stage2_pgtable.h
@@ -0,0 +1,61 @@
1/*
2 * Copyright (C) 2016 - ARM Ltd
3 *
4 * stage2 page table helpers
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef __ARM_S2_PGTABLE_H_
20#define __ARM_S2_PGTABLE_H_
21
22#define stage2_pgd_none(pgd) pgd_none(pgd)
23#define stage2_pgd_clear(pgd) pgd_clear(pgd)
24#define stage2_pgd_present(pgd) pgd_present(pgd)
25#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud)
26#define stage2_pud_offset(pgd, address) pud_offset(pgd, address)
27#define stage2_pud_free(pud) pud_free(NULL, pud)
28
29#define stage2_pud_none(pud) pud_none(pud)
30#define stage2_pud_clear(pud) pud_clear(pud)
31#define stage2_pud_present(pud) pud_present(pud)
32#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd)
33#define stage2_pmd_offset(pud, address) pmd_offset(pud, address)
34#define stage2_pmd_free(pmd) pmd_free(NULL, pmd)
35
36#define stage2_pud_huge(pud) pud_huge(pud)
37
38/* Open coded p*d_addr_end that can deal with 64bit addresses */
39static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
40{
41 phys_addr_t boundary = (addr + PGDIR_SIZE) & PGDIR_MASK;
42
43 return (boundary - 1 < end - 1) ? boundary : end;
44}
45
46#define stage2_pud_addr_end(addr, end) (end)
47
48static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
49{
50 phys_addr_t boundary = (addr + PMD_SIZE) & PMD_MASK;
51
52 return (boundary - 1 < end - 1) ? boundary : end;
53}
54
55#define stage2_pgd_index(addr) pgd_index(addr)
56
57#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep)
58#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
59#define stage2_pud_table_empty(pudp) false
60
61#endif /* __ARM_S2_PGTABLE_H_ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9ef013d86cc5..237d5d82f0af 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -444,7 +444,7 @@ static void update_vttbr(struct kvm *kvm)
444 kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; 444 kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
445 445
446 /* update vttbr to be used with the new vmid */ 446 /* update vttbr to be used with the new vmid */
447 pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm)); 447 pgd_phys = virt_to_phys(kvm->arch.pgd);
448 BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); 448 BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
449 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); 449 vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
450 kvm->arch.vttbr = pgd_phys | vmid; 450 kvm->arch.vttbr = pgd_phys | vmid;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index be302128c5d7..45c43aecb8f2 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -43,11 +43,9 @@ static unsigned long hyp_idmap_start;
43static unsigned long hyp_idmap_end; 43static unsigned long hyp_idmap_end;
44static phys_addr_t hyp_idmap_vector; 44static phys_addr_t hyp_idmap_vector;
45 45
46#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t))
46#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) 47#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
47 48
48#define kvm_pmd_huge(_x) (pmd_huge(_x) || pmd_trans_huge(_x))
49#define kvm_pud_huge(_x) pud_huge(_x)
50
51#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) 49#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
52#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1) 50#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
53 51
@@ -69,14 +67,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
69 67
70static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) 68static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
71{ 69{
72 /* 70 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
73 * This function also gets called when dealing with HYP page
74 * tables. As HYP doesn't have an associated struct kvm (and
75 * the HYP page tables are fairly static), we don't do
76 * anything there.
77 */
78 if (kvm)
79 kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa);
80} 71}
81 72
82/* 73/*
@@ -115,7 +106,7 @@ static bool kvm_is_device_pfn(unsigned long pfn)
115 */ 106 */
116static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) 107static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd)
117{ 108{
118 if (!kvm_pmd_huge(*pmd)) 109 if (!pmd_thp_or_huge(*pmd))
119 return; 110 return;
120 111
121 pmd_clear(pmd); 112 pmd_clear(pmd);
@@ -155,29 +146,29 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
155 return p; 146 return p;
156} 147}
157 148
158static void clear_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) 149static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr)
159{ 150{
160 pud_t *pud_table __maybe_unused = pud_offset(pgd, 0); 151 pud_t *pud_table __maybe_unused = stage2_pud_offset(pgd, 0UL);
161 pgd_clear(pgd); 152 stage2_pgd_clear(pgd);
162 kvm_tlb_flush_vmid_ipa(kvm, addr); 153 kvm_tlb_flush_vmid_ipa(kvm, addr);
163 pud_free(NULL, pud_table); 154 stage2_pud_free(pud_table);
164 put_page(virt_to_page(pgd)); 155 put_page(virt_to_page(pgd));
165} 156}
166 157
167static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) 158static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
168{ 159{
169 pmd_t *pmd_table = pmd_offset(pud, 0); 160 pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(pud, 0);
170 VM_BUG_ON(pud_huge(*pud)); 161 VM_BUG_ON(stage2_pud_huge(*pud));
171 pud_clear(pud); 162 stage2_pud_clear(pud);
172 kvm_tlb_flush_vmid_ipa(kvm, addr); 163 kvm_tlb_flush_vmid_ipa(kvm, addr);
173 pmd_free(NULL, pmd_table); 164 stage2_pmd_free(pmd_table);
174 put_page(virt_to_page(pud)); 165 put_page(virt_to_page(pud));
175} 166}
176 167
177static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) 168static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
178{ 169{
179 pte_t *pte_table = pte_offset_kernel(pmd, 0); 170 pte_t *pte_table = pte_offset_kernel(pmd, 0);
180 VM_BUG_ON(kvm_pmd_huge(*pmd)); 171 VM_BUG_ON(pmd_thp_or_huge(*pmd));
181 pmd_clear(pmd); 172 pmd_clear(pmd);
182 kvm_tlb_flush_vmid_ipa(kvm, addr); 173 kvm_tlb_flush_vmid_ipa(kvm, addr);
183 pte_free_kernel(NULL, pte_table); 174 pte_free_kernel(NULL, pte_table);
@@ -204,7 +195,7 @@ static void clear_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr)
204 * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure 195 * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
205 * the IO subsystem will never hit in the cache. 196 * the IO subsystem will never hit in the cache.
206 */ 197 */
207static void unmap_ptes(struct kvm *kvm, pmd_t *pmd, 198static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
208 phys_addr_t addr, phys_addr_t end) 199 phys_addr_t addr, phys_addr_t end)
209{ 200{
210 phys_addr_t start_addr = addr; 201 phys_addr_t start_addr = addr;
@@ -226,21 +217,21 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
226 } 217 }
227 } while (pte++, addr += PAGE_SIZE, addr != end); 218 } while (pte++, addr += PAGE_SIZE, addr != end);
228 219
229 if (kvm_pte_table_empty(kvm, start_pte)) 220 if (stage2_pte_table_empty(start_pte))
230 clear_pmd_entry(kvm, pmd, start_addr); 221 clear_stage2_pmd_entry(kvm, pmd, start_addr);
231} 222}
232 223
233static void unmap_pmds(struct kvm *kvm, pud_t *pud, 224static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud,
234 phys_addr_t addr, phys_addr_t end) 225 phys_addr_t addr, phys_addr_t end)
235{ 226{
236 phys_addr_t next, start_addr = addr; 227 phys_addr_t next, start_addr = addr;
237 pmd_t *pmd, *start_pmd; 228 pmd_t *pmd, *start_pmd;
238 229
239 start_pmd = pmd = pmd_offset(pud, addr); 230 start_pmd = pmd = stage2_pmd_offset(pud, addr);
240 do { 231 do {
241 next = kvm_pmd_addr_end(addr, end); 232 next = stage2_pmd_addr_end(addr, end);
242 if (!pmd_none(*pmd)) { 233 if (!pmd_none(*pmd)) {
243 if (kvm_pmd_huge(*pmd)) { 234 if (pmd_thp_or_huge(*pmd)) {
244 pmd_t old_pmd = *pmd; 235 pmd_t old_pmd = *pmd;
245 236
246 pmd_clear(pmd); 237 pmd_clear(pmd);
@@ -250,57 +241,64 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
250 241
251 put_page(virt_to_page(pmd)); 242 put_page(virt_to_page(pmd));
252 } else { 243 } else {
253 unmap_ptes(kvm, pmd, addr, next); 244 unmap_stage2_ptes(kvm, pmd, addr, next);
254 } 245 }
255 } 246 }
256 } while (pmd++, addr = next, addr != end); 247 } while (pmd++, addr = next, addr != end);
257 248
258 if (kvm_pmd_table_empty(kvm, start_pmd)) 249 if (stage2_pmd_table_empty(start_pmd))
259 clear_pud_entry(kvm, pud, start_addr); 250 clear_stage2_pud_entry(kvm, pud, start_addr);
260} 251}
261 252
262static void unmap_puds(struct kvm *kvm, pgd_t *pgd, 253static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd,
263 phys_addr_t addr, phys_addr_t end) 254 phys_addr_t addr, phys_addr_t end)
264{ 255{
265 phys_addr_t next, start_addr = addr; 256 phys_addr_t next, start_addr = addr;
266 pud_t *pud, *start_pud; 257 pud_t *pud, *start_pud;
267 258
268 start_pud = pud = pud_offset(pgd, addr); 259 start_pud = pud = stage2_pud_offset(pgd, addr);
269 do { 260 do {
270 next = kvm_pud_addr_end(addr, end); 261 next = stage2_pud_addr_end(addr, end);
271 if (!pud_none(*pud)) { 262 if (!stage2_pud_none(*pud)) {
272 if (pud_huge(*pud)) { 263 if (stage2_pud_huge(*pud)) {
273 pud_t old_pud = *pud; 264 pud_t old_pud = *pud;
274 265
275 pud_clear(pud); 266 stage2_pud_clear(pud);
276 kvm_tlb_flush_vmid_ipa(kvm, addr); 267 kvm_tlb_flush_vmid_ipa(kvm, addr);
277
278 kvm_flush_dcache_pud(old_pud); 268 kvm_flush_dcache_pud(old_pud);
279
280 put_page(virt_to_page(pud)); 269 put_page(virt_to_page(pud));
281 } else { 270 } else {
282 unmap_pmds(kvm, pud, addr, next); 271 unmap_stage2_pmds(kvm, pud, addr, next);
283 } 272 }
284 } 273 }
285 } while (pud++, addr = next, addr != end); 274 } while (pud++, addr = next, addr != end);
286 275
287 if (kvm_pud_table_empty(kvm, start_pud)) 276 if (stage2_pud_table_empty(start_pud))
288 clear_pgd_entry(kvm, pgd, start_addr); 277 clear_stage2_pgd_entry(kvm, pgd, start_addr);
289} 278}
290 279
291 280/**
292static void unmap_range(struct kvm *kvm, pgd_t *pgdp, 281 * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
293 phys_addr_t start, u64 size) 282 * @kvm: The VM pointer
283 * @start: The intermediate physical base address of the range to unmap
284 * @size: The size of the area to unmap
285 *
286 * Clear a range of stage-2 mappings, lowering the various ref-counts. Must
287 * be called while holding mmu_lock (unless for freeing the stage2 pgd before
288 * destroying the VM), otherwise another faulting VCPU may come in and mess
289 * with things behind our backs.
290 */
291static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
294{ 292{
295 pgd_t *pgd; 293 pgd_t *pgd;
296 phys_addr_t addr = start, end = start + size; 294 phys_addr_t addr = start, end = start + size;
297 phys_addr_t next; 295 phys_addr_t next;
298 296
299 pgd = pgdp + kvm_pgd_index(addr); 297 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
300 do { 298 do {
301 next = kvm_pgd_addr_end(addr, end); 299 next = stage2_pgd_addr_end(addr, end);
302 if (!pgd_none(*pgd)) 300 if (!stage2_pgd_none(*pgd))
303 unmap_puds(kvm, pgd, addr, next); 301 unmap_stage2_puds(kvm, pgd, addr, next);
304 } while (pgd++, addr = next, addr != end); 302 } while (pgd++, addr = next, addr != end);
305} 303}
306 304
@@ -322,11 +320,11 @@ static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
322 pmd_t *pmd; 320 pmd_t *pmd;
323 phys_addr_t next; 321 phys_addr_t next;
324 322
325 pmd = pmd_offset(pud, addr); 323 pmd = stage2_pmd_offset(pud, addr);
326 do { 324 do {
327 next = kvm_pmd_addr_end(addr, end); 325 next = stage2_pmd_addr_end(addr, end);
328 if (!pmd_none(*pmd)) { 326 if (!pmd_none(*pmd)) {
329 if (kvm_pmd_huge(*pmd)) 327 if (pmd_thp_or_huge(*pmd))
330 kvm_flush_dcache_pmd(*pmd); 328 kvm_flush_dcache_pmd(*pmd);
331 else 329 else
332 stage2_flush_ptes(kvm, pmd, addr, next); 330 stage2_flush_ptes(kvm, pmd, addr, next);
@@ -340,11 +338,11 @@ static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
340 pud_t *pud; 338 pud_t *pud;
341 phys_addr_t next; 339 phys_addr_t next;
342 340
343 pud = pud_offset(pgd, addr); 341 pud = stage2_pud_offset(pgd, addr);
344 do { 342 do {
345 next = kvm_pud_addr_end(addr, end); 343 next = stage2_pud_addr_end(addr, end);
346 if (!pud_none(*pud)) { 344 if (!stage2_pud_none(*pud)) {
347 if (pud_huge(*pud)) 345 if (stage2_pud_huge(*pud))
348 kvm_flush_dcache_pud(*pud); 346 kvm_flush_dcache_pud(*pud);
349 else 347 else
350 stage2_flush_pmds(kvm, pud, addr, next); 348 stage2_flush_pmds(kvm, pud, addr, next);
@@ -360,9 +358,9 @@ static void stage2_flush_memslot(struct kvm *kvm,
360 phys_addr_t next; 358 phys_addr_t next;
361 pgd_t *pgd; 359 pgd_t *pgd;
362 360
363 pgd = kvm->arch.pgd + kvm_pgd_index(addr); 361 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
364 do { 362 do {
365 next = kvm_pgd_addr_end(addr, end); 363 next = stage2_pgd_addr_end(addr, end);
366 stage2_flush_puds(kvm, pgd, addr, next); 364 stage2_flush_puds(kvm, pgd, addr, next);
367 } while (pgd++, addr = next, addr != end); 365 } while (pgd++, addr = next, addr != end);
368} 366}
@@ -391,6 +389,100 @@ static void stage2_flush_vm(struct kvm *kvm)
391 srcu_read_unlock(&kvm->srcu, idx); 389 srcu_read_unlock(&kvm->srcu, idx);
392} 390}
393 391
392static void clear_hyp_pgd_entry(pgd_t *pgd)
393{
394 pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL);
395 pgd_clear(pgd);
396 pud_free(NULL, pud_table);
397 put_page(virt_to_page(pgd));
398}
399
400static void clear_hyp_pud_entry(pud_t *pud)
401{
402 pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0);
403 VM_BUG_ON(pud_huge(*pud));
404 pud_clear(pud);
405 pmd_free(NULL, pmd_table);
406 put_page(virt_to_page(pud));
407}
408
409static void clear_hyp_pmd_entry(pmd_t *pmd)
410{
411 pte_t *pte_table = pte_offset_kernel(pmd, 0);
412 VM_BUG_ON(pmd_thp_or_huge(*pmd));
413 pmd_clear(pmd);
414 pte_free_kernel(NULL, pte_table);
415 put_page(virt_to_page(pmd));
416}
417
418static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end)
419{
420 pte_t *pte, *start_pte;
421
422 start_pte = pte = pte_offset_kernel(pmd, addr);
423 do {
424 if (!pte_none(*pte)) {
425 kvm_set_pte(pte, __pte(0));
426 put_page(virt_to_page(pte));
427 }
428 } while (pte++, addr += PAGE_SIZE, addr != end);
429
430 if (hyp_pte_table_empty(start_pte))
431 clear_hyp_pmd_entry(pmd);
432}
433
434static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
435{
436 phys_addr_t next;
437 pmd_t *pmd, *start_pmd;
438
439 start_pmd = pmd = pmd_offset(pud, addr);
440 do {
441 next = pmd_addr_end(addr, end);
442 /* Hyp doesn't use huge pmds */
443 if (!pmd_none(*pmd))
444 unmap_hyp_ptes(pmd, addr, next);
445 } while (pmd++, addr = next, addr != end);
446
447 if (hyp_pmd_table_empty(start_pmd))
448 clear_hyp_pud_entry(pud);
449}
450
451static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
452{
453 phys_addr_t next;
454 pud_t *pud, *start_pud;
455
456 start_pud = pud = pud_offset(pgd, addr);
457 do {
458 next = pud_addr_end(addr, end);
459 /* Hyp doesn't use huge puds */
460 if (!pud_none(*pud))
461 unmap_hyp_pmds(pud, addr, next);
462 } while (pud++, addr = next, addr != end);
463
464 if (hyp_pud_table_empty(start_pud))
465 clear_hyp_pgd_entry(pgd);
466}
467
468static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
469{
470 pgd_t *pgd;
471 phys_addr_t addr = start, end = start + size;
472 phys_addr_t next;
473
474 /*
475 * We don't unmap anything from HYP, except at the hyp tear down.
476 * Hence, we don't have to invalidate the TLBs here.
477 */
478 pgd = pgdp + pgd_index(addr);
479 do {
480 next = pgd_addr_end(addr, end);
481 if (!pgd_none(*pgd))
482 unmap_hyp_puds(pgd, addr, next);
483 } while (pgd++, addr = next, addr != end);
484}
485
394/** 486/**
395 * free_boot_hyp_pgd - free HYP boot page tables 487 * free_boot_hyp_pgd - free HYP boot page tables
396 * 488 *
@@ -401,14 +493,14 @@ void free_boot_hyp_pgd(void)
401 mutex_lock(&kvm_hyp_pgd_mutex); 493 mutex_lock(&kvm_hyp_pgd_mutex);
402 494
403 if (boot_hyp_pgd) { 495 if (boot_hyp_pgd) {
404 unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); 496 unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
405 unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 497 unmap_hyp_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
406 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); 498 free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
407 boot_hyp_pgd = NULL; 499 boot_hyp_pgd = NULL;
408 } 500 }
409 501
410 if (hyp_pgd) 502 if (hyp_pgd)
411 unmap_range(NULL, hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); 503 unmap_hyp_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
412 504
413 mutex_unlock(&kvm_hyp_pgd_mutex); 505 mutex_unlock(&kvm_hyp_pgd_mutex);
414} 506}
@@ -433,9 +525,9 @@ void free_hyp_pgds(void)
433 525
434 if (hyp_pgd) { 526 if (hyp_pgd) {
435 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) 527 for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
436 unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 528 unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
437 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) 529 for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
438 unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); 530 unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
439 531
440 free_pages((unsigned long)hyp_pgd, hyp_pgd_order); 532 free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
441 hyp_pgd = NULL; 533 hyp_pgd = NULL;
@@ -645,20 +737,6 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
645 __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); 737 __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
646} 738}
647 739
648/* Free the HW pgd, one page at a time */
649static void kvm_free_hwpgd(void *hwpgd)
650{
651 free_pages_exact(hwpgd, kvm_get_hwpgd_size());
652}
653
654/* Allocate the HW PGD, making sure that each page gets its own refcount */
655static void *kvm_alloc_hwpgd(void)
656{
657 unsigned int size = kvm_get_hwpgd_size();
658
659 return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
660}
661
662/** 740/**
663 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. 741 * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
664 * @kvm: The KVM struct pointer for the VM. 742 * @kvm: The KVM struct pointer for the VM.
@@ -673,81 +751,22 @@ static void *kvm_alloc_hwpgd(void)
673int kvm_alloc_stage2_pgd(struct kvm *kvm) 751int kvm_alloc_stage2_pgd(struct kvm *kvm)
674{ 752{
675 pgd_t *pgd; 753 pgd_t *pgd;
676 void *hwpgd;
677 754
678 if (kvm->arch.pgd != NULL) { 755 if (kvm->arch.pgd != NULL) {
679 kvm_err("kvm_arch already initialized?\n"); 756 kvm_err("kvm_arch already initialized?\n");
680 return -EINVAL; 757 return -EINVAL;
681 } 758 }
682 759
683 hwpgd = kvm_alloc_hwpgd(); 760 /* Allocate the HW PGD, making sure that each page gets its own refcount */
684 if (!hwpgd) 761 pgd = alloc_pages_exact(S2_PGD_SIZE, GFP_KERNEL | __GFP_ZERO);
762 if (!pgd)
685 return -ENOMEM; 763 return -ENOMEM;
686 764
687 /* When the kernel uses more levels of page tables than the
688 * guest, we allocate a fake PGD and pre-populate it to point
689 * to the next-level page table, which will be the real
690 * initial page table pointed to by the VTTBR.
691 *
692 * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
693 * the PMD and the kernel will use folded pud.
694 * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
695 * pages.
696 */
697 if (KVM_PREALLOC_LEVEL > 0) {
698 int i;
699
700 /*
701 * Allocate fake pgd for the page table manipulation macros to
702 * work. This is not used by the hardware and we have no
703 * alignment requirement for this allocation.
704 */
705 pgd = kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
706 GFP_KERNEL | __GFP_ZERO);
707
708 if (!pgd) {
709 kvm_free_hwpgd(hwpgd);
710 return -ENOMEM;
711 }
712
713 /* Plug the HW PGD into the fake one. */
714 for (i = 0; i < PTRS_PER_S2_PGD; i++) {
715 if (KVM_PREALLOC_LEVEL == 1)
716 pgd_populate(NULL, pgd + i,
717 (pud_t *)hwpgd + i * PTRS_PER_PUD);
718 else if (KVM_PREALLOC_LEVEL == 2)
719 pud_populate(NULL, pud_offset(pgd, 0) + i,
720 (pmd_t *)hwpgd + i * PTRS_PER_PMD);
721 }
722 } else {
723 /*
724 * Allocate actual first-level Stage-2 page table used by the
725 * hardware for Stage-2 page table walks.
726 */
727 pgd = (pgd_t *)hwpgd;
728 }
729
730 kvm_clean_pgd(pgd); 765 kvm_clean_pgd(pgd);
731 kvm->arch.pgd = pgd; 766 kvm->arch.pgd = pgd;
732 return 0; 767 return 0;
733} 768}
734 769
735/**
736 * unmap_stage2_range -- Clear stage2 page table entries to unmap a range
737 * @kvm: The VM pointer
738 * @start: The intermediate physical base address of the range to unmap
739 * @size: The size of the area to unmap
740 *
741 * Clear a range of stage-2 mappings, lowering the various ref-counts. Must
742 * be called while holding mmu_lock (unless for freeing the stage2 pgd before
743 * destroying the VM), otherwise another faulting VCPU may come in and mess
744 * with things behind our backs.
745 */
746static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
747{
748 unmap_range(kvm, kvm->arch.pgd, start, size);
749}
750
751static void stage2_unmap_memslot(struct kvm *kvm, 770static void stage2_unmap_memslot(struct kvm *kvm,
752 struct kvm_memory_slot *memslot) 771 struct kvm_memory_slot *memslot)
753{ 772{
@@ -830,10 +849,8 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
830 return; 849 return;
831 850
832 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); 851 unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
833 kvm_free_hwpgd(kvm_get_hwpgd(kvm)); 852 /* Free the HW pgd, one page at a time */
834 if (KVM_PREALLOC_LEVEL > 0) 853 free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
835 kfree(kvm->arch.pgd);
836
837 kvm->arch.pgd = NULL; 854 kvm->arch.pgd = NULL;
838} 855}
839 856
@@ -843,16 +860,16 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
843 pgd_t *pgd; 860 pgd_t *pgd;
844 pud_t *pud; 861 pud_t *pud;
845 862
846 pgd = kvm->arch.pgd + kvm_pgd_index(addr); 863 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
847 if (WARN_ON(pgd_none(*pgd))) { 864 if (WARN_ON(stage2_pgd_none(*pgd))) {
848 if (!cache) 865 if (!cache)
849 return NULL; 866 return NULL;
850 pud = mmu_memory_cache_alloc(cache); 867 pud = mmu_memory_cache_alloc(cache);
851 pgd_populate(NULL, pgd, pud); 868 stage2_pgd_populate(pgd, pud);
852 get_page(virt_to_page(pgd)); 869 get_page(virt_to_page(pgd));
853 } 870 }
854 871
855 return pud_offset(pgd, addr); 872 return stage2_pud_offset(pgd, addr);
856} 873}
857 874
858static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, 875static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
@@ -862,15 +879,15 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
862 pmd_t *pmd; 879 pmd_t *pmd;
863 880
864 pud = stage2_get_pud(kvm, cache, addr); 881 pud = stage2_get_pud(kvm, cache, addr);
865 if (pud_none(*pud)) { 882 if (stage2_pud_none(*pud)) {
866 if (!cache) 883 if (!cache)
867 return NULL; 884 return NULL;
868 pmd = mmu_memory_cache_alloc(cache); 885 pmd = mmu_memory_cache_alloc(cache);
869 pud_populate(NULL, pud, pmd); 886 stage2_pud_populate(pud, pmd);
870 get_page(virt_to_page(pud)); 887 get_page(virt_to_page(pud));
871 } 888 }
872 889
873 return pmd_offset(pud, addr); 890 return stage2_pmd_offset(pud, addr);
874} 891}
875 892
876static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache 893static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
@@ -893,11 +910,14 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
893 VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd)); 910 VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
894 911
895 old_pmd = *pmd; 912 old_pmd = *pmd;
896 kvm_set_pmd(pmd, *new_pmd); 913 if (pmd_present(old_pmd)) {
897 if (pmd_present(old_pmd)) 914 pmd_clear(pmd);
898 kvm_tlb_flush_vmid_ipa(kvm, addr); 915 kvm_tlb_flush_vmid_ipa(kvm, addr);
899 else 916 } else {
900 get_page(virt_to_page(pmd)); 917 get_page(virt_to_page(pmd));
918 }
919
920 kvm_set_pmd(pmd, *new_pmd);
901 return 0; 921 return 0;
902} 922}
903 923
@@ -946,15 +966,38 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
946 966
947 /* Create 2nd stage page table mapping - Level 3 */ 967 /* Create 2nd stage page table mapping - Level 3 */
948 old_pte = *pte; 968 old_pte = *pte;
949 kvm_set_pte(pte, *new_pte); 969 if (pte_present(old_pte)) {
950 if (pte_present(old_pte)) 970 kvm_set_pte(pte, __pte(0));
951 kvm_tlb_flush_vmid_ipa(kvm, addr); 971 kvm_tlb_flush_vmid_ipa(kvm, addr);
952 else 972 } else {
953 get_page(virt_to_page(pte)); 973 get_page(virt_to_page(pte));
974 }
954 975
976 kvm_set_pte(pte, *new_pte);
955 return 0; 977 return 0;
956} 978}
957 979
980#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
981static int stage2_ptep_test_and_clear_young(pte_t *pte)
982{
983 if (pte_young(*pte)) {
984 *pte = pte_mkold(*pte);
985 return 1;
986 }
987 return 0;
988}
989#else
990static int stage2_ptep_test_and_clear_young(pte_t *pte)
991{
992 return __ptep_test_and_clear_young(pte);
993}
994#endif
995
996static int stage2_pmdp_test_and_clear_young(pmd_t *pmd)
997{
998 return stage2_ptep_test_and_clear_young((pte_t *)pmd);
999}
1000
958/** 1001/**
959 * kvm_phys_addr_ioremap - map a device range to guest IPA 1002 * kvm_phys_addr_ioremap - map a device range to guest IPA
960 * 1003 *
@@ -978,7 +1021,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
978 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); 1021 pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
979 1022
980 if (writable) 1023 if (writable)
981 kvm_set_s2pte_writable(&pte); 1024 pte = kvm_s2pte_mkwrite(pte);
982 1025
983 ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES, 1026 ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
984 KVM_NR_MEM_OBJS); 1027 KVM_NR_MEM_OBJS);
@@ -1078,12 +1121,12 @@ static void stage2_wp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end)
1078 pmd_t *pmd; 1121 pmd_t *pmd;
1079 phys_addr_t next; 1122 phys_addr_t next;
1080 1123
1081 pmd = pmd_offset(pud, addr); 1124 pmd = stage2_pmd_offset(pud, addr);
1082 1125
1083 do { 1126 do {
1084 next = kvm_pmd_addr_end(addr, end); 1127 next = stage2_pmd_addr_end(addr, end);
1085 if (!pmd_none(*pmd)) { 1128 if (!pmd_none(*pmd)) {
1086 if (kvm_pmd_huge(*pmd)) { 1129 if (pmd_thp_or_huge(*pmd)) {
1087 if (!kvm_s2pmd_readonly(pmd)) 1130 if (!kvm_s2pmd_readonly(pmd))
1088 kvm_set_s2pmd_readonly(pmd); 1131 kvm_set_s2pmd_readonly(pmd);
1089 } else { 1132 } else {
@@ -1106,12 +1149,12 @@ static void stage2_wp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
1106 pud_t *pud; 1149 pud_t *pud;
1107 phys_addr_t next; 1150 phys_addr_t next;
1108 1151
1109 pud = pud_offset(pgd, addr); 1152 pud = stage2_pud_offset(pgd, addr);
1110 do { 1153 do {
1111 next = kvm_pud_addr_end(addr, end); 1154 next = stage2_pud_addr_end(addr, end);
1112 if (!pud_none(*pud)) { 1155 if (!stage2_pud_none(*pud)) {
1113 /* TODO:PUD not supported, revisit later if supported */ 1156 /* TODO:PUD not supported, revisit later if supported */
1114 BUG_ON(kvm_pud_huge(*pud)); 1157 BUG_ON(stage2_pud_huge(*pud));
1115 stage2_wp_pmds(pud, addr, next); 1158 stage2_wp_pmds(pud, addr, next);
1116 } 1159 }
1117 } while (pud++, addr = next, addr != end); 1160 } while (pud++, addr = next, addr != end);
@@ -1128,7 +1171,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1128 pgd_t *pgd; 1171 pgd_t *pgd;
1129 phys_addr_t next; 1172 phys_addr_t next;
1130 1173
1131 pgd = kvm->arch.pgd + kvm_pgd_index(addr); 1174 pgd = kvm->arch.pgd + stage2_pgd_index(addr);
1132 do { 1175 do {
1133 /* 1176 /*
1134 * Release kvm_mmu_lock periodically if the memory region is 1177 * Release kvm_mmu_lock periodically if the memory region is
@@ -1140,8 +1183,8 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
1140 if (need_resched() || spin_needbreak(&kvm->mmu_lock)) 1183 if (need_resched() || spin_needbreak(&kvm->mmu_lock))
1141 cond_resched_lock(&kvm->mmu_lock); 1184 cond_resched_lock(&kvm->mmu_lock);
1142 1185
1143 next = kvm_pgd_addr_end(addr, end); 1186 next = stage2_pgd_addr_end(addr, end);
1144 if (pgd_present(*pgd)) 1187 if (stage2_pgd_present(*pgd))
1145 stage2_wp_puds(pgd, addr, next); 1188 stage2_wp_puds(pgd, addr, next);
1146 } while (pgd++, addr = next, addr != end); 1189 } while (pgd++, addr = next, addr != end);
1147} 1190}
@@ -1320,7 +1363,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1320 pmd_t new_pmd = pfn_pmd(pfn, mem_type); 1363 pmd_t new_pmd = pfn_pmd(pfn, mem_type);
1321 new_pmd = pmd_mkhuge(new_pmd); 1364 new_pmd = pmd_mkhuge(new_pmd);
1322 if (writable) { 1365 if (writable) {
1323 kvm_set_s2pmd_writable(&new_pmd); 1366 new_pmd = kvm_s2pmd_mkwrite(new_pmd);
1324 kvm_set_pfn_dirty(pfn); 1367 kvm_set_pfn_dirty(pfn);
1325 } 1368 }
1326 coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached); 1369 coherent_cache_guest_page(vcpu, pfn, PMD_SIZE, fault_ipa_uncached);
@@ -1329,7 +1372,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
1329 pte_t new_pte = pfn_pte(pfn, mem_type); 1372 pte_t new_pte = pfn_pte(pfn, mem_type);
1330 1373
1331 if (writable) { 1374 if (writable) {
1332 kvm_set_s2pte_writable(&new_pte); 1375 new_pte = kvm_s2pte_mkwrite(new_pte);
1333 kvm_set_pfn_dirty(pfn); 1376 kvm_set_pfn_dirty(pfn);
1334 mark_page_dirty(kvm, gfn); 1377 mark_page_dirty(kvm, gfn);
1335 } 1378 }
@@ -1348,6 +1391,8 @@ out_unlock:
1348 * Resolve the access fault by making the page young again. 1391 * Resolve the access fault by making the page young again.
1349 * Note that because the faulting entry is guaranteed not to be 1392 * Note that because the faulting entry is guaranteed not to be
1350 * cached in the TLB, we don't need to invalidate anything. 1393 * cached in the TLB, we don't need to invalidate anything.
1394 * Only the HW Access Flag updates are supported for Stage 2 (no DBM),
1395 * so there is no need for atomic (pte|pmd)_mkyoung operations.
1351 */ 1396 */
1352static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) 1397static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
1353{ 1398{
@@ -1364,7 +1409,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
1364 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1409 if (!pmd || pmd_none(*pmd)) /* Nothing there */
1365 goto out; 1410 goto out;
1366 1411
1367 if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ 1412 if (pmd_thp_or_huge(*pmd)) { /* THP, HugeTLB */
1368 *pmd = pmd_mkyoung(*pmd); 1413 *pmd = pmd_mkyoung(*pmd);
1369 pfn = pmd_pfn(*pmd); 1414 pfn = pmd_pfn(*pmd);
1370 pfn_valid = true; 1415 pfn_valid = true;
@@ -1588,25 +1633,14 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
1588 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1633 if (!pmd || pmd_none(*pmd)) /* Nothing there */
1589 return 0; 1634 return 0;
1590 1635
1591 if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ 1636 if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */
1592 if (pmd_young(*pmd)) { 1637 return stage2_pmdp_test_and_clear_young(pmd);
1593 *pmd = pmd_mkold(*pmd);
1594 return 1;
1595 }
1596
1597 return 0;
1598 }
1599 1638
1600 pte = pte_offset_kernel(pmd, gpa); 1639 pte = pte_offset_kernel(pmd, gpa);
1601 if (pte_none(*pte)) 1640 if (pte_none(*pte))
1602 return 0; 1641 return 0;
1603 1642
1604 if (pte_young(*pte)) { 1643 return stage2_ptep_test_and_clear_young(pte);
1605 *pte = pte_mkold(*pte); /* Just a page... */
1606 return 1;
1607 }
1608
1609 return 0;
1610} 1644}
1611 1645
1612static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) 1646static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
@@ -1618,7 +1652,7 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
1618 if (!pmd || pmd_none(*pmd)) /* Nothing there */ 1652 if (!pmd || pmd_none(*pmd)) /* Nothing there */
1619 return 0; 1653 return 0;
1620 1654
1621 if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */ 1655 if (pmd_thp_or_huge(*pmd)) /* THP, HugeTLB */
1622 return pmd_young(*pmd); 1656 return pmd_young(*pmd);
1623 1657
1624 pte = pte_offset_kernel(pmd, gpa); 1658 pte = pte_offset_kernel(pmd, gpa);
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 1b3dc9df5257..2cdb6b551ac6 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -85,32 +85,37 @@
85#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) 85#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
86 86
87/* TCR_EL2 Registers bits */ 87/* TCR_EL2 Registers bits */
88#define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) 88#define TCR_EL2_RES1 ((1 << 31) | (1 << 23))
89#define TCR_EL2_TBI (1 << 20) 89#define TCR_EL2_TBI (1 << 20)
90#define TCR_EL2_PS (7 << 16) 90#define TCR_EL2_PS_SHIFT 16
91#define TCR_EL2_PS_40B (2 << 16) 91#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
92#define TCR_EL2_TG0 (1 << 14) 92#define TCR_EL2_PS_40B (2 << TCR_EL2_PS_SHIFT)
93#define TCR_EL2_SH0 (3 << 12) 93#define TCR_EL2_TG0_MASK TCR_TG0_MASK
94#define TCR_EL2_ORGN0 (3 << 10) 94#define TCR_EL2_SH0_MASK TCR_SH0_MASK
95#define TCR_EL2_IRGN0 (3 << 8) 95#define TCR_EL2_ORGN0_MASK TCR_ORGN0_MASK
96#define TCR_EL2_T0SZ 0x3f 96#define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK
97#define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ 97#define TCR_EL2_T0SZ_MASK 0x3f
98 TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) 98#define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \
99 TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
99 100
100/* VTCR_EL2 Registers bits */ 101/* VTCR_EL2 Registers bits */
101#define VTCR_EL2_RES1 (1 << 31) 102#define VTCR_EL2_RES1 (1 << 31)
102#define VTCR_EL2_PS_MASK (7 << 16) 103#define VTCR_EL2_HD (1 << 22)
103#define VTCR_EL2_TG0_MASK (1 << 14) 104#define VTCR_EL2_HA (1 << 21)
104#define VTCR_EL2_TG0_4K (0 << 14) 105#define VTCR_EL2_PS_MASK TCR_EL2_PS_MASK
105#define VTCR_EL2_TG0_64K (1 << 14) 106#define VTCR_EL2_TG0_MASK TCR_TG0_MASK
106#define VTCR_EL2_SH0_MASK (3 << 12) 107#define VTCR_EL2_TG0_4K TCR_TG0_4K
107#define VTCR_EL2_SH0_INNER (3 << 12) 108#define VTCR_EL2_TG0_16K TCR_TG0_16K
108#define VTCR_EL2_ORGN0_MASK (3 << 10) 109#define VTCR_EL2_TG0_64K TCR_TG0_64K
109#define VTCR_EL2_ORGN0_WBWA (1 << 10) 110#define VTCR_EL2_SH0_MASK TCR_SH0_MASK
110#define VTCR_EL2_IRGN0_MASK (3 << 8) 111#define VTCR_EL2_SH0_INNER TCR_SH0_INNER
111#define VTCR_EL2_IRGN0_WBWA (1 << 8) 112#define VTCR_EL2_ORGN0_MASK TCR_ORGN0_MASK
112#define VTCR_EL2_SL0_MASK (3 << 6) 113#define VTCR_EL2_ORGN0_WBWA TCR_ORGN0_WBWA
113#define VTCR_EL2_SL0_LVL1 (1 << 6) 114#define VTCR_EL2_IRGN0_MASK TCR_IRGN0_MASK
115#define VTCR_EL2_IRGN0_WBWA TCR_IRGN0_WBWA
116#define VTCR_EL2_SL0_SHIFT 6
117#define VTCR_EL2_SL0_MASK (3 << VTCR_EL2_SL0_SHIFT)
118#define VTCR_EL2_SL0_LVL1 (1 << VTCR_EL2_SL0_SHIFT)
114#define VTCR_EL2_T0SZ_MASK 0x3f 119#define VTCR_EL2_T0SZ_MASK 0x3f
115#define VTCR_EL2_T0SZ_40B 24 120#define VTCR_EL2_T0SZ_40B 24
116#define VTCR_EL2_VS_SHIFT 19 121#define VTCR_EL2_VS_SHIFT 19
@@ -126,35 +131,45 @@
126 * (see hyp-init.S). 131 * (see hyp-init.S).
127 * 132 *
128 * Note that when using 4K pages, we concatenate two first level page tables 133 * Note that when using 4K pages, we concatenate two first level page tables
129 * together. 134 * together. With 16K pages, we concatenate 16 first level page tables.
130 * 135 *
131 * The magic numbers used for VTTBR_X in this patch can be found in Tables 136 * The magic numbers used for VTTBR_X in this patch can be found in Tables
132 * D4-23 and D4-25 in ARM DDI 0487A.b. 137 * D4-23 and D4-25 in ARM DDI 0487A.b.
133 */ 138 */
139
140#define VTCR_EL2_T0SZ_IPA VTCR_EL2_T0SZ_40B
141#define VTCR_EL2_COMMON_BITS (VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
142 VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1)
143
134#ifdef CONFIG_ARM64_64K_PAGES 144#ifdef CONFIG_ARM64_64K_PAGES
135/* 145/*
136 * Stage2 translation configuration: 146 * Stage2 translation configuration:
137 * 40bits input (T0SZ = 24)
138 * 64kB pages (TG0 = 1) 147 * 64kB pages (TG0 = 1)
139 * 2 level page tables (SL = 1) 148 * 2 level page tables (SL = 1)
140 */ 149 */
141#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ 150#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SL0_LVL1)
142 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ 151#define VTTBR_X_TGRAN_MAGIC 38
143 VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1) 152#elif defined(CONFIG_ARM64_16K_PAGES)
144#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) 153/*
145#else 154 * Stage2 translation configuration:
155 * 16kB pages (TG0 = 2)
156 * 2 level page tables (SL = 1)
157 */
158#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_16K | VTCR_EL2_SL0_LVL1)
159#define VTTBR_X_TGRAN_MAGIC 42
160#else /* 4K */
146/* 161/*
147 * Stage2 translation configuration: 162 * Stage2 translation configuration:
148 * 40bits input (T0SZ = 24)
149 * 4kB pages (TG0 = 0) 163 * 4kB pages (TG0 = 0)
150 * 3 level page tables (SL = 1) 164 * 3 level page tables (SL = 1)
151 */ 165 */
152#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ 166#define VTCR_EL2_TGRAN_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SL0_LVL1)
153 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ 167#define VTTBR_X_TGRAN_MAGIC 37
154 VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1)
155#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B)
156#endif 168#endif
157 169
170#define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS | VTCR_EL2_TGRAN_FLAGS)
171#define VTTBR_X (VTTBR_X_TGRAN_MAGIC - VTCR_EL2_T0SZ_IPA)
172
158#define VTTBR_BADDR_SHIFT (VTTBR_X - 1) 173#define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
159#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) 174#define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
160#define VTTBR_VMID_SHIFT (UL(48)) 175#define VTTBR_VMID_SHIFT (UL(48))
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 90a8d2336ceb..e63d23bad36e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -295,6 +295,7 @@ struct kvm_vm_stat {
295struct kvm_vcpu_stat { 295struct kvm_vcpu_stat {
296 u32 halt_successful_poll; 296 u32 halt_successful_poll;
297 u32 halt_attempted_poll; 297 u32 halt_attempted_poll;
298 u32 halt_poll_invalid;
298 u32 halt_wakeup; 299 u32 halt_wakeup;
299 u32 hvc_exit_stat; 300 u32 hvc_exit_stat;
300 u64 wfe_exit_stat; 301 u64 wfe_exit_stat;
@@ -369,6 +370,7 @@ static inline void kvm_arch_hardware_unsetup(void) {}
369static inline void kvm_arch_sync_events(struct kvm *kvm) {} 370static inline void kvm_arch_sync_events(struct kvm *kvm) {}
370static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} 371static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
371static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} 372static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
373static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
372 374
373void kvm_arm_init_debug(void); 375void kvm_arm_init_debug(void);
374void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); 376void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index e8d39d4f86b6..f05ac27d033e 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -45,18 +45,6 @@
45 */ 45 */
46#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK) 46#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK)
47 47
48/*
49 * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
50 * levels in addition to the PGD and potentially the PUD which are
51 * pre-allocated (we pre-allocate the fake PGD and the PUD when the Stage-2
52 * tables use one level of tables less than the kernel.
53 */
54#ifdef CONFIG_ARM64_64K_PAGES
55#define KVM_MMU_CACHE_MIN_PAGES 1
56#else
57#define KVM_MMU_CACHE_MIN_PAGES 2
58#endif
59
60#ifdef __ASSEMBLY__ 48#ifdef __ASSEMBLY__
61 49
62#include <asm/alternative.h> 50#include <asm/alternative.h>
@@ -91,6 +79,8 @@ alternative_endif
91#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT) 79#define KVM_PHYS_SIZE (1UL << KVM_PHYS_SHIFT)
92#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL) 80#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1UL)
93 81
82#include <asm/stage2_pgtable.h>
83
94int create_hyp_mappings(void *from, void *to); 84int create_hyp_mappings(void *from, void *to);
95int create_hyp_io_mappings(void *from, void *to, phys_addr_t); 85int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
96void free_boot_hyp_pgd(void); 86void free_boot_hyp_pgd(void);
@@ -122,19 +112,32 @@ static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
122static inline void kvm_clean_pte(pte_t *pte) {} 112static inline void kvm_clean_pte(pte_t *pte) {}
123static inline void kvm_clean_pte_entry(pte_t *pte) {} 113static inline void kvm_clean_pte_entry(pte_t *pte) {}
124 114
125static inline void kvm_set_s2pte_writable(pte_t *pte) 115static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
126{ 116{
127 pte_val(*pte) |= PTE_S2_RDWR; 117 pte_val(pte) |= PTE_S2_RDWR;
118 return pte;
128} 119}
129 120
130static inline void kvm_set_s2pmd_writable(pmd_t *pmd) 121static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
131{ 122{
132 pmd_val(*pmd) |= PMD_S2_RDWR; 123 pmd_val(pmd) |= PMD_S2_RDWR;
124 return pmd;
133} 125}
134 126
135static inline void kvm_set_s2pte_readonly(pte_t *pte) 127static inline void kvm_set_s2pte_readonly(pte_t *pte)
136{ 128{
137 pte_val(*pte) = (pte_val(*pte) & ~PTE_S2_RDWR) | PTE_S2_RDONLY; 129 pteval_t pteval;
130 unsigned long tmp;
131
132 asm volatile("// kvm_set_s2pte_readonly\n"
133 " prfm pstl1strm, %2\n"
134 "1: ldxr %0, %2\n"
135 " and %0, %0, %3 // clear PTE_S2_RDWR\n"
136 " orr %0, %0, %4 // set PTE_S2_RDONLY\n"
137 " stxr %w1, %0, %2\n"
138 " cbnz %w1, 1b\n"
139 : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*pte))
140 : "L" (~PTE_S2_RDWR), "L" (PTE_S2_RDONLY));
138} 141}
139 142
140static inline bool kvm_s2pte_readonly(pte_t *pte) 143static inline bool kvm_s2pte_readonly(pte_t *pte)
@@ -144,69 +147,12 @@ static inline bool kvm_s2pte_readonly(pte_t *pte)
144 147
145static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) 148static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
146{ 149{
147 pmd_val(*pmd) = (pmd_val(*pmd) & ~PMD_S2_RDWR) | PMD_S2_RDONLY; 150 kvm_set_s2pte_readonly((pte_t *)pmd);
148} 151}
149 152
150static inline bool kvm_s2pmd_readonly(pmd_t *pmd) 153static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
151{ 154{
152 return (pmd_val(*pmd) & PMD_S2_RDWR) == PMD_S2_RDONLY; 155 return kvm_s2pte_readonly((pte_t *)pmd);
153}
154
155
156#define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end)
157#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end)
158#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end)
159
160/*
161 * In the case where PGDIR_SHIFT is larger than KVM_PHYS_SHIFT, we can address
162 * the entire IPA input range with a single pgd entry, and we would only need
163 * one pgd entry. Note that in this case, the pgd is actually not used by
164 * the MMU for Stage-2 translations, but is merely a fake pgd used as a data
165 * structure for the kernel pgtable macros to work.
166 */
167#if PGDIR_SHIFT > KVM_PHYS_SHIFT
168#define PTRS_PER_S2_PGD_SHIFT 0
169#else
170#define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT)
171#endif
172#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
173
174#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
175
176/*
177 * If we are concatenating first level stage-2 page tables, we would have less
178 * than or equal to 16 pointers in the fake PGD, because that's what the
179 * architecture allows. In this case, (4 - CONFIG_PGTABLE_LEVELS)
180 * represents the first level for the host, and we add 1 to go to the next
181 * level (which uses contatenation) for the stage-2 tables.
182 */
183#if PTRS_PER_S2_PGD <= 16
184#define KVM_PREALLOC_LEVEL (4 - CONFIG_PGTABLE_LEVELS + 1)
185#else
186#define KVM_PREALLOC_LEVEL (0)
187#endif
188
189static inline void *kvm_get_hwpgd(struct kvm *kvm)
190{
191 pgd_t *pgd = kvm->arch.pgd;
192 pud_t *pud;
193
194 if (KVM_PREALLOC_LEVEL == 0)
195 return pgd;
196
197 pud = pud_offset(pgd, 0);
198 if (KVM_PREALLOC_LEVEL == 1)
199 return pud;
200
201 BUG_ON(KVM_PREALLOC_LEVEL != 2);
202 return pmd_offset(pud, 0);
203}
204
205static inline unsigned int kvm_get_hwpgd_size(void)
206{
207 if (KVM_PREALLOC_LEVEL > 0)
208 return PTRS_PER_S2_PGD * PAGE_SIZE;
209 return PTRS_PER_S2_PGD * sizeof(pgd_t);
210} 156}
211 157
212static inline bool kvm_page_empty(void *ptr) 158static inline bool kvm_page_empty(void *ptr)
@@ -215,23 +161,20 @@ static inline bool kvm_page_empty(void *ptr)
215 return page_count(ptr_page) == 1; 161 return page_count(ptr_page) == 1;
216} 162}
217 163
218#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep) 164#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep)
219 165
220#ifdef __PAGETABLE_PMD_FOLDED 166#ifdef __PAGETABLE_PMD_FOLDED
221#define kvm_pmd_table_empty(kvm, pmdp) (0) 167#define hyp_pmd_table_empty(pmdp) (0)
222#else 168#else
223#define kvm_pmd_table_empty(kvm, pmdp) \ 169#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
224 (kvm_page_empty(pmdp) && (!(kvm) || KVM_PREALLOC_LEVEL < 2))
225#endif 170#endif
226 171
227#ifdef __PAGETABLE_PUD_FOLDED 172#ifdef __PAGETABLE_PUD_FOLDED
228#define kvm_pud_table_empty(kvm, pudp) (0) 173#define hyp_pud_table_empty(pudp) (0)
229#else 174#else
230#define kvm_pud_table_empty(kvm, pudp) \ 175#define hyp_pud_table_empty(pudp) kvm_page_empty(pudp)
231 (kvm_page_empty(pudp) && (!(kvm) || KVM_PREALLOC_LEVEL < 1))
232#endif 176#endif
233 177
234
235struct kvm; 178struct kvm;
236 179
237#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) 180#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 9786f770088d..2813748e2f24 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -207,23 +207,69 @@
207#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET) 207#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET)
208#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) 208#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x))
209#define TCR_TxSZ_WIDTH 6 209#define TCR_TxSZ_WIDTH 6
210#define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24)) 210
211#define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24)) 211#define TCR_IRGN0_SHIFT 8
212#define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24)) 212#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT)
213#define TCR_IRGN_WBnWA ((UL(3) << 8) | (UL(3) << 24)) 213#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT)
214#define TCR_IRGN_MASK ((UL(3) << 8) | (UL(3) << 24)) 214#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT)
215#define TCR_ORGN_NC ((UL(0) << 10) | (UL(0) << 26)) 215#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT)
216#define TCR_ORGN_WBWA ((UL(1) << 10) | (UL(1) << 26)) 216#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT)
217#define TCR_ORGN_WT ((UL(2) << 10) | (UL(2) << 26)) 217
218#define TCR_ORGN_WBnWA ((UL(3) << 10) | (UL(3) << 26)) 218#define TCR_IRGN1_SHIFT 24
219#define TCR_ORGN_MASK ((UL(3) << 10) | (UL(3) << 26)) 219#define TCR_IRGN1_MASK (UL(3) << TCR_IRGN1_SHIFT)
220#define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28)) 220#define TCR_IRGN1_NC (UL(0) << TCR_IRGN1_SHIFT)
221#define TCR_TG0_4K (UL(0) << 14) 221#define TCR_IRGN1_WBWA (UL(1) << TCR_IRGN1_SHIFT)
222#define TCR_TG0_64K (UL(1) << 14) 222#define TCR_IRGN1_WT (UL(2) << TCR_IRGN1_SHIFT)
223#define TCR_TG0_16K (UL(2) << 14) 223#define TCR_IRGN1_WBnWA (UL(3) << TCR_IRGN1_SHIFT)
224#define TCR_TG1_16K (UL(1) << 30) 224
225#define TCR_TG1_4K (UL(2) << 30) 225#define TCR_IRGN_NC (TCR_IRGN0_NC | TCR_IRGN1_NC)
226#define TCR_TG1_64K (UL(3) << 30) 226#define TCR_IRGN_WBWA (TCR_IRGN0_WBWA | TCR_IRGN1_WBWA)
227#define TCR_IRGN_WT (TCR_IRGN0_WT | TCR_IRGN1_WT)
228#define TCR_IRGN_WBnWA (TCR_IRGN0_WBnWA | TCR_IRGN1_WBnWA)
229#define TCR_IRGN_MASK (TCR_IRGN0_MASK | TCR_IRGN1_MASK)
230
231
232#define TCR_ORGN0_SHIFT 10
233#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT)
234#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT)
235#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT)
236#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT)
237#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT)
238
239#define TCR_ORGN1_SHIFT 26
240#define TCR_ORGN1_MASK (UL(3) << TCR_ORGN1_SHIFT)
241#define TCR_ORGN1_NC (UL(0) << TCR_ORGN1_SHIFT)
242#define TCR_ORGN1_WBWA (UL(1) << TCR_ORGN1_SHIFT)
243#define TCR_ORGN1_WT (UL(2) << TCR_ORGN1_SHIFT)
244#define TCR_ORGN1_WBnWA (UL(3) << TCR_ORGN1_SHIFT)
245
246#define TCR_ORGN_NC (TCR_ORGN0_NC | TCR_ORGN1_NC)
247#define TCR_ORGN_WBWA (TCR_ORGN0_WBWA | TCR_ORGN1_WBWA)
248#define TCR_ORGN_WT (TCR_ORGN0_WT | TCR_ORGN1_WT)
249#define TCR_ORGN_WBnWA (TCR_ORGN0_WBnWA | TCR_ORGN1_WBnWA)
250#define TCR_ORGN_MASK (TCR_ORGN0_MASK | TCR_ORGN1_MASK)
251
252#define TCR_SH0_SHIFT 12
253#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT)
254#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT)
255
256#define TCR_SH1_SHIFT 28
257#define TCR_SH1_MASK (UL(3) << TCR_SH1_SHIFT)
258#define TCR_SH1_INNER (UL(3) << TCR_SH1_SHIFT)
259#define TCR_SHARED (TCR_SH0_INNER | TCR_SH1_INNER)
260
261#define TCR_TG0_SHIFT 14
262#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT)
263#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT)
264#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT)
265#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT)
266
267#define TCR_TG1_SHIFT 30
268#define TCR_TG1_MASK (UL(3) << TCR_TG1_SHIFT)
269#define TCR_TG1_16K (UL(1) << TCR_TG1_SHIFT)
270#define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT)
271#define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT)
272
227#define TCR_ASID16 (UL(1) << 36) 273#define TCR_ASID16 (UL(1) << 36)
228#define TCR_TBI0 (UL(1) << 37) 274#define TCR_TBI0 (UL(1) << 37)
229#define TCR_HA (UL(1) << 39) 275#define TCR_HA (UL(1) << 39)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 2da46ae9c991..1910bf47d4a3 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -300,6 +300,8 @@ static inline int pmd_protnone(pmd_t pmd)
300#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) 300#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
301#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID)) 301#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID))
302 302
303#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
304
303#define __HAVE_ARCH_PMD_WRITE 305#define __HAVE_ARCH_PMD_WRITE
304#define pmd_write(pmd) pte_write(pmd_pte(pmd)) 306#define pmd_write(pmd) pte_write(pmd_pte(pmd))
305 307
@@ -554,14 +556,12 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma,
554 * Atomic pte/pmd modifications. 556 * Atomic pte/pmd modifications.
555 */ 557 */
556#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 558#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
557static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, 559static inline int __ptep_test_and_clear_young(pte_t *ptep)
558 unsigned long address,
559 pte_t *ptep)
560{ 560{
561 pteval_t pteval; 561 pteval_t pteval;
562 unsigned int tmp, res; 562 unsigned int tmp, res;
563 563
564 asm volatile("// ptep_test_and_clear_young\n" 564 asm volatile("// __ptep_test_and_clear_young\n"
565 " prfm pstl1strm, %2\n" 565 " prfm pstl1strm, %2\n"
566 "1: ldxr %0, %2\n" 566 "1: ldxr %0, %2\n"
567 " ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n" 567 " ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n"
@@ -574,6 +574,13 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
574 return res; 574 return res;
575} 575}
576 576
577static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
578 unsigned long address,
579 pte_t *ptep)
580{
581 return __ptep_test_and_clear_young(ptep);
582}
583
577#ifdef CONFIG_TRANSPARENT_HUGEPAGE 584#ifdef CONFIG_TRANSPARENT_HUGEPAGE
578#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG 585#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
579static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, 586static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopmd.h b/arch/arm64/include/asm/stage2_pgtable-nopmd.h
new file mode 100644
index 000000000000..2656a0fd05a6
--- /dev/null
+++ b/arch/arm64/include/asm/stage2_pgtable-nopmd.h
@@ -0,0 +1,42 @@
1/*
2 * Copyright (C) 2016 - ARM Ltd
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __ARM64_S2_PGTABLE_NOPMD_H_
18#define __ARM64_S2_PGTABLE_NOPMD_H_
19
20#include <asm/stage2_pgtable-nopud.h>
21
22#define __S2_PGTABLE_PMD_FOLDED
23
24#define S2_PMD_SHIFT S2_PUD_SHIFT
25#define S2_PTRS_PER_PMD 1
26#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT)
27#define S2_PMD_MASK (~(S2_PMD_SIZE-1))
28
29#define stage2_pud_none(pud) (0)
30#define stage2_pud_present(pud) (1)
31#define stage2_pud_clear(pud) do { } while (0)
32#define stage2_pud_populate(pud, pmd) do { } while (0)
33#define stage2_pmd_offset(pud, address) ((pmd_t *)(pud))
34
35#define stage2_pmd_free(pmd) do { } while (0)
36
37#define stage2_pmd_addr_end(addr, end) (end)
38
39#define stage2_pud_huge(pud) (0)
40#define stage2_pmd_table_empty(pmdp) (0)
41
42#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable-nopud.h b/arch/arm64/include/asm/stage2_pgtable-nopud.h
new file mode 100644
index 000000000000..5ee87b54ebf3
--- /dev/null
+++ b/arch/arm64/include/asm/stage2_pgtable-nopud.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (C) 2016 - ARM Ltd
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#ifndef __ARM64_S2_PGTABLE_NOPUD_H_
18#define __ARM64_S2_PGTABLE_NOPUD_H_
19
20#define __S2_PGTABLE_PUD_FOLDED
21
22#define S2_PUD_SHIFT S2_PGDIR_SHIFT
23#define S2_PTRS_PER_PUD 1
24#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
25#define S2_PUD_MASK (~(S2_PUD_SIZE-1))
26
27#define stage2_pgd_none(pgd) (0)
28#define stage2_pgd_present(pgd) (1)
29#define stage2_pgd_clear(pgd) do { } while (0)
30#define stage2_pgd_populate(pgd, pud) do { } while (0)
31
32#define stage2_pud_offset(pgd, address) ((pud_t *)(pgd))
33
34#define stage2_pud_free(x) do { } while (0)
35
36#define stage2_pud_addr_end(addr, end) (end)
37#define stage2_pud_table_empty(pmdp) (0)
38
39#endif
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
new file mode 100644
index 000000000000..8b68099348e5
--- /dev/null
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -0,0 +1,142 @@
1/*
2 * Copyright (C) 2016 - ARM Ltd
3 *
4 * stage2 page table helpers
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef __ARM64_S2_PGTABLE_H_
20#define __ARM64_S2_PGTABLE_H_
21
22#include <asm/pgtable.h>
23
24/*
25 * The hardware supports concatenation of up to 16 tables at stage2 entry level
26 * and we use the feature whenever possible.
27 *
28 * Now, the minimum number of bits resolved at any level is (PAGE_SHIFT - 3).
29 * On arm64, the smallest PAGE_SIZE supported is 4k, which means
30 * (PAGE_SHIFT - 3) > 4 holds for all page sizes.
31 * This implies, the total number of page table levels at stage2 expected
32 * by the hardware is actually the number of levels required for (KVM_PHYS_SHIFT - 4)
33 * in normal translations(e.g, stage1), since we cannot have another level in
34 * the range (KVM_PHYS_SHIFT, KVM_PHYS_SHIFT - 4).
35 */
36#define STAGE2_PGTABLE_LEVELS ARM64_HW_PGTABLE_LEVELS(KVM_PHYS_SHIFT - 4)
37
38/*
39 * With all the supported VA_BITs and 40bit guest IPA, the following condition
40 * is always true:
41 *
42 * STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS
43 *
44 * We base our stage-2 page table walker helpers on this assumption and
45 * fall back to using the host version of the helper wherever possible.
46 * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall back
47 * to using the host version, since it is guaranteed it is not folded at host.
48 *
49 * If the condition breaks in the future, we can rearrange the host level
50 * definitions and reuse them for stage2. Till then...
51 */
52#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS
53#error "Unsupported combination of guest IPA and host VA_BITS."
54#endif
55
56/* S2_PGDIR_SHIFT is the size mapped by top-level stage2 entry */
57#define S2_PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - STAGE2_PGTABLE_LEVELS)
58#define S2_PGDIR_SIZE (_AC(1, UL) << S2_PGDIR_SHIFT)
59#define S2_PGDIR_MASK (~(S2_PGDIR_SIZE - 1))
60
61/*
62 * The number of PTRS across all concatenated stage2 tables given by the
63 * number of bits resolved at the initial level.
64 */
65#define PTRS_PER_S2_PGD (1 << (KVM_PHYS_SHIFT - S2_PGDIR_SHIFT))
66
67/*
68 * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation
69 * levels in addition to the PGD.
70 */
71#define KVM_MMU_CACHE_MIN_PAGES (STAGE2_PGTABLE_LEVELS - 1)
72
73
74#if STAGE2_PGTABLE_LEVELS > 3
75
76#define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
77#define S2_PUD_SIZE (_AC(1, UL) << S2_PUD_SHIFT)
78#define S2_PUD_MASK (~(S2_PUD_SIZE - 1))
79
80#define stage2_pgd_none(pgd) pgd_none(pgd)
81#define stage2_pgd_clear(pgd) pgd_clear(pgd)
82#define stage2_pgd_present(pgd) pgd_present(pgd)
83#define stage2_pgd_populate(pgd, pud) pgd_populate(NULL, pgd, pud)
84#define stage2_pud_offset(pgd, address) pud_offset(pgd, address)
85#define stage2_pud_free(pud) pud_free(NULL, pud)
86
87#define stage2_pud_table_empty(pudp) kvm_page_empty(pudp)
88
89static inline phys_addr_t stage2_pud_addr_end(phys_addr_t addr, phys_addr_t end)
90{
91 phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK;
92
93 return (boundary - 1 < end - 1) ? boundary : end;
94}
95
96#endif /* STAGE2_PGTABLE_LEVELS > 3 */
97
98
99#if STAGE2_PGTABLE_LEVELS > 2
100
101#define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
102#define S2_PMD_SIZE (_AC(1, UL) << S2_PMD_SHIFT)
103#define S2_PMD_MASK (~(S2_PMD_SIZE - 1))
104
105#define stage2_pud_none(pud) pud_none(pud)
106#define stage2_pud_clear(pud) pud_clear(pud)
107#define stage2_pud_present(pud) pud_present(pud)
108#define stage2_pud_populate(pud, pmd) pud_populate(NULL, pud, pmd)
109#define stage2_pmd_offset(pud, address) pmd_offset(pud, address)
110#define stage2_pmd_free(pmd) pmd_free(NULL, pmd)
111
112#define stage2_pud_huge(pud) pud_huge(pud)
113#define stage2_pmd_table_empty(pmdp) kvm_page_empty(pmdp)
114
115static inline phys_addr_t stage2_pmd_addr_end(phys_addr_t addr, phys_addr_t end)
116{
117 phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK;
118
119 return (boundary - 1 < end - 1) ? boundary : end;
120}
121
122#endif /* STAGE2_PGTABLE_LEVELS > 2 */
123
124#define stage2_pte_table_empty(ptep) kvm_page_empty(ptep)
125
126#if STAGE2_PGTABLE_LEVELS == 2
127#include <asm/stage2_pgtable-nopmd.h>
128#elif STAGE2_PGTABLE_LEVELS == 3
129#include <asm/stage2_pgtable-nopud.h>
130#endif
131
132
133#define stage2_pgd_index(addr) (((addr) >> S2_PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
134
135static inline phys_addr_t stage2_pgd_addr_end(phys_addr_t addr, phys_addr_t end)
136{
137 phys_addr_t boundary = (addr + S2_PGDIR_SIZE) & S2_PGDIR_MASK;
138
139 return (boundary - 1 < end - 1) ? boundary : end;
140}
141
142#endif /* __ARM64_S2_PGTABLE_H_ */
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index de7450df7629..aa2e34e99582 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -22,7 +22,6 @@ config KVM_ARM_VGIC_V3
22config KVM 22config KVM
23 bool "Kernel-based Virtual Machine (KVM) support" 23 bool "Kernel-based Virtual Machine (KVM) support"
24 depends on OF 24 depends on OF
25 depends on !ARM64_16K_PAGES
26 select MMU_NOTIFIER 25 select MMU_NOTIFIER
27 select PREEMPT_NOTIFIERS 26 select PREEMPT_NOTIFIERS
28 select ANON_INODES 27 select ANON_INODES
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index bcbe761a5a3d..b81f4091c909 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -66,6 +66,14 @@ u32 __hyp_text __init_stage2_translation(void)
66 val |= 64 - (parange > 40 ? 40 : parange); 66 val |= 64 - (parange > 40 ? 40 : parange);
67 67
68 /* 68 /*
69 * Check the availability of Hardware Access Flag / Dirty Bit
70 * Management in ID_AA64MMFR1_EL1 and enable the feature in VTCR_EL2.
71 */
72 tmp = (read_sysreg(id_aa64mmfr1_el1) >> ID_AA64MMFR1_HADBS_SHIFT) & 0xf;
73 if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && tmp)
74 val |= VTCR_EL2_HA;
75
76 /*
69 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS 77 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
70 * bit in VTCR_EL2. 78 * bit in VTCR_EL2.
71 */ 79 */
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index b76e132c87e4..6733ac575da4 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -122,6 +122,7 @@ struct kvm_vcpu_stat {
122 u32 flush_dcache_exits; 122 u32 flush_dcache_exits;
123 u32 halt_successful_poll; 123 u32 halt_successful_poll;
124 u32 halt_attempted_poll; 124 u32 halt_attempted_poll;
125 u32 halt_poll_invalid;
125 u32 halt_wakeup; 126 u32 halt_wakeup;
126}; 127};
127 128
@@ -748,7 +749,7 @@ extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
748 749
749uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu); 750uint32_t kvm_mips_read_count(struct kvm_vcpu *vcpu);
750void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count); 751void kvm_mips_write_count(struct kvm_vcpu *vcpu, uint32_t count);
751void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare); 752void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack);
752void kvm_mips_init_count(struct kvm_vcpu *vcpu); 753void kvm_mips_init_count(struct kvm_vcpu *vcpu);
753int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl); 754int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl);
754int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume); 755int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume);
@@ -813,5 +814,6 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
813static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} 814static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
814static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} 815static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
815static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} 816static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
817static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
816 818
817#endif /* __MIPS_KVM_HOST_H__ */ 819#endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 8e945e866a73..396df6eb0a12 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -302,12 +302,31 @@ static inline ktime_t kvm_mips_count_time(struct kvm_vcpu *vcpu)
302 */ 302 */
303static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now) 303static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
304{ 304{
305 ktime_t expires; 305 struct mips_coproc *cop0 = vcpu->arch.cop0;
306 ktime_t expires, threshold;
307 uint32_t count, compare;
306 int running; 308 int running;
307 309
308 /* Is the hrtimer pending? */ 310 /* Calculate the biased and scaled guest CP0_Count */
311 count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
312 compare = kvm_read_c0_guest_compare(cop0);
313
314 /*
315 * Find whether CP0_Count has reached the closest timer interrupt. If
316 * not, we shouldn't inject it.
317 */
318 if ((int32_t)(count - compare) < 0)
319 return count;
320
321 /*
322 * The CP0_Count we're going to return has already reached the closest
323 * timer interrupt. Quickly check if it really is a new interrupt by
324 * looking at whether the interval until the hrtimer expiry time is
325 * less than 1/4 of the timer period.
326 */
309 expires = hrtimer_get_expires(&vcpu->arch.comparecount_timer); 327 expires = hrtimer_get_expires(&vcpu->arch.comparecount_timer);
310 if (ktime_compare(now, expires) >= 0) { 328 threshold = ktime_add_ns(now, vcpu->arch.count_period / 4);
329 if (ktime_before(expires, threshold)) {
311 /* 330 /*
312 * Cancel it while we handle it so there's no chance of 331 * Cancel it while we handle it so there's no chance of
313 * interference with the timeout handler. 332 * interference with the timeout handler.
@@ -329,8 +348,7 @@ static uint32_t kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now)
329 } 348 }
330 } 349 }
331 350
332 /* Return the biased and scaled guest CP0_Count */ 351 return count;
333 return vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
334} 352}
335 353
336/** 354/**
@@ -420,32 +438,6 @@ static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu,
420} 438}
421 439
422/** 440/**
423 * kvm_mips_update_hrtimer() - Update next expiry time of hrtimer.
424 * @vcpu: Virtual CPU.
425 *
426 * Recalculates and updates the expiry time of the hrtimer. This can be used
427 * after timer parameters have been altered which do not depend on the time that
428 * the change occurs (in those cases kvm_mips_freeze_hrtimer() and
429 * kvm_mips_resume_hrtimer() are used directly).
430 *
431 * It is guaranteed that no timer interrupts will be lost in the process.
432 *
433 * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is running).
434 */
435static void kvm_mips_update_hrtimer(struct kvm_vcpu *vcpu)
436{
437 ktime_t now;
438 uint32_t count;
439
440 /*
441 * freeze_hrtimer takes care of a timer interrupts <= count, and
442 * resume_hrtimer the hrtimer takes care of a timer interrupts > count.
443 */
444 now = kvm_mips_freeze_hrtimer(vcpu, &count);
445 kvm_mips_resume_hrtimer(vcpu, now, count);
446}
447
448/**
449 * kvm_mips_write_count() - Modify the count and update timer. 441 * kvm_mips_write_count() - Modify the count and update timer.
450 * @vcpu: Virtual CPU. 442 * @vcpu: Virtual CPU.
451 * @count: Guest CP0_Count value to set. 443 * @count: Guest CP0_Count value to set.
@@ -540,23 +532,42 @@ int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz)
540 * kvm_mips_write_compare() - Modify compare and update timer. 532 * kvm_mips_write_compare() - Modify compare and update timer.
541 * @vcpu: Virtual CPU. 533 * @vcpu: Virtual CPU.
542 * @compare: New CP0_Compare value. 534 * @compare: New CP0_Compare value.
535 * @ack: Whether to acknowledge timer interrupt.
543 * 536 *
544 * Update CP0_Compare to a new value and update the timeout. 537 * Update CP0_Compare to a new value and update the timeout.
538 * If @ack, atomically acknowledge any pending timer interrupt, otherwise ensure
539 * any pending timer interrupt is preserved.
545 */ 540 */
546void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare) 541void kvm_mips_write_compare(struct kvm_vcpu *vcpu, uint32_t compare, bool ack)
547{ 542{
548 struct mips_coproc *cop0 = vcpu->arch.cop0; 543 struct mips_coproc *cop0 = vcpu->arch.cop0;
544 int dc;
545 u32 old_compare = kvm_read_c0_guest_compare(cop0);
546 ktime_t now;
547 uint32_t count;
549 548
550 /* if unchanged, must just be an ack */ 549 /* if unchanged, must just be an ack */
551 if (kvm_read_c0_guest_compare(cop0) == compare) 550 if (old_compare == compare) {
551 if (!ack)
552 return;
553 kvm_mips_callbacks->dequeue_timer_int(vcpu);
554 kvm_write_c0_guest_compare(cop0, compare);
552 return; 555 return;
556 }
557
558 /* freeze_hrtimer() takes care of timer interrupts <= count */
559 dc = kvm_mips_count_disabled(vcpu);
560 if (!dc)
561 now = kvm_mips_freeze_hrtimer(vcpu, &count);
562
563 if (ack)
564 kvm_mips_callbacks->dequeue_timer_int(vcpu);
553 565
554 /* Update compare */
555 kvm_write_c0_guest_compare(cop0, compare); 566 kvm_write_c0_guest_compare(cop0, compare);
556 567
557 /* Update timeout if count enabled */ 568 /* resume_hrtimer() takes care of timer interrupts > count */
558 if (!kvm_mips_count_disabled(vcpu)) 569 if (!dc)
559 kvm_mips_update_hrtimer(vcpu); 570 kvm_mips_resume_hrtimer(vcpu, now, count);
560} 571}
561 572
562/** 573/**
@@ -1095,9 +1106,9 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
1095 1106
1096 /* If we are writing to COMPARE */ 1107 /* If we are writing to COMPARE */
1097 /* Clear pending timer interrupt, if any */ 1108 /* Clear pending timer interrupt, if any */
1098 kvm_mips_callbacks->dequeue_timer_int(vcpu);
1099 kvm_mips_write_compare(vcpu, 1109 kvm_mips_write_compare(vcpu,
1100 vcpu->arch.gprs[rt]); 1110 vcpu->arch.gprs[rt],
1111 true);
1101 } else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) { 1112 } else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
1102 unsigned int old_val, val, change; 1113 unsigned int old_val, val, change;
1103 1114
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 70ef1a43c114..dc052fb5c7a2 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -56,6 +56,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
56 { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, 56 { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
57 { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, 57 { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
58 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU }, 58 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU },
59 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid), KVM_STAT_VCPU },
59 { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, 60 { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU },
60 {NULL} 61 {NULL}
61}; 62};
@@ -1079,7 +1080,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
1079 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 1080 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
1080 break; 1081 break;
1081 case KVM_CAP_MIPS_FPU: 1082 case KVM_CAP_MIPS_FPU:
1082 r = !!cpu_has_fpu; 1083 /* We don't handle systems with inconsistent cpu_has_fpu */
1084 r = !!raw_cpu_has_fpu;
1083 break; 1085 break;
1084 case KVM_CAP_MIPS_MSA: 1086 case KVM_CAP_MIPS_MSA:
1085 /* 1087 /*
@@ -1555,8 +1557,10 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu)
1555 1557
1556 /* Disable MSA & FPU */ 1558 /* Disable MSA & FPU */
1557 disable_msa(); 1559 disable_msa();
1558 if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) 1560 if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
1559 clear_c0_status(ST0_CU1 | ST0_FR); 1561 clear_c0_status(ST0_CU1 | ST0_FR);
1562 disable_fpu_hazard();
1563 }
1560 vcpu->arch.fpu_inuse &= ~(KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA); 1564 vcpu->arch.fpu_inuse &= ~(KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA);
1561 } else if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) { 1565 } else if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
1562 set_c0_status(ST0_CU1); 1566 set_c0_status(ST0_CU1);
@@ -1567,6 +1571,7 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu)
1567 1571
1568 /* Disable FPU */ 1572 /* Disable FPU */
1569 clear_c0_status(ST0_CU1 | ST0_FR); 1573 clear_c0_status(ST0_CU1 | ST0_FR);
1574 disable_fpu_hazard();
1570 } 1575 }
1571 preempt_enable(); 1576 preempt_enable();
1572} 1577}
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index b9c52c1d35d6..ed021ae7867a 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -275,6 +275,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
275 int even; 275 int even;
276 struct kvm *kvm = vcpu->kvm; 276 struct kvm *kvm = vcpu->kvm;
277 const int flush_dcache_mask = 0; 277 const int flush_dcache_mask = 0;
278 int ret;
278 279
279 if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) { 280 if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) {
280 kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr); 281 kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr);
@@ -306,14 +307,18 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
306 pfn1 = kvm->arch.guest_pmap[gfn]; 307 pfn1 = kvm->arch.guest_pmap[gfn];
307 } 308 }
308 309
309 entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu));
310 entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | 310 entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) |
311 (1 << 2) | (0x1 << 1); 311 (1 << 2) | (0x1 << 1);
312 entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | 312 entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) |
313 (1 << 2) | (0x1 << 1); 313 (1 << 2) | (0x1 << 1);
314 314
315 return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, 315 preempt_disable();
316 flush_dcache_mask); 316 entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu));
317 ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
318 flush_dcache_mask);
319 preempt_enable();
320
321 return ret;
317} 322}
318EXPORT_SYMBOL_GPL(kvm_mips_handle_kseg0_tlb_fault); 323EXPORT_SYMBOL_GPL(kvm_mips_handle_kseg0_tlb_fault);
319 324
@@ -368,6 +373,7 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
368 unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; 373 unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
369 struct kvm *kvm = vcpu->kvm; 374 struct kvm *kvm = vcpu->kvm;
370 kvm_pfn_t pfn0, pfn1; 375 kvm_pfn_t pfn0, pfn1;
376 int ret;
371 377
372 if ((tlb->tlb_hi & VPN2_MASK) == 0) { 378 if ((tlb->tlb_hi & VPN2_MASK) == 0) {
373 pfn0 = 0; 379 pfn0 = 0;
@@ -394,9 +400,6 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
394 *hpa1 = pfn1 << PAGE_SHIFT; 400 *hpa1 = pfn1 << PAGE_SHIFT;
395 401
396 /* Get attributes from the Guest TLB */ 402 /* Get attributes from the Guest TLB */
397 entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ?
398 kvm_mips_get_kernel_asid(vcpu) :
399 kvm_mips_get_user_asid(vcpu));
400 entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) | 403 entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | (0x3 << 3) |
401 (tlb->tlb_lo0 & MIPS3_PG_D) | (tlb->tlb_lo0 & MIPS3_PG_V); 404 (tlb->tlb_lo0 & MIPS3_PG_D) | (tlb->tlb_lo0 & MIPS3_PG_V);
402 entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) | 405 entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | (0x3 << 3) |
@@ -405,8 +408,15 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
405 kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, 408 kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
406 tlb->tlb_lo0, tlb->tlb_lo1); 409 tlb->tlb_lo0, tlb->tlb_lo1);
407 410
408 return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, 411 preempt_disable();
409 tlb->tlb_mask); 412 entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ?
413 kvm_mips_get_kernel_asid(vcpu) :
414 kvm_mips_get_user_asid(vcpu));
415 ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
416 tlb->tlb_mask);
417 preempt_enable();
418
419 return ret;
410} 420}
411EXPORT_SYMBOL_GPL(kvm_mips_handle_mapped_seg_tlb_fault); 421EXPORT_SYMBOL_GPL(kvm_mips_handle_mapped_seg_tlb_fault);
412 422
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index fd43f0afdb9f..6ba0fafcecbc 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -547,7 +547,7 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
547 kvm_mips_write_count(vcpu, v); 547 kvm_mips_write_count(vcpu, v);
548 break; 548 break;
549 case KVM_REG_MIPS_CP0_COMPARE: 549 case KVM_REG_MIPS_CP0_COMPARE:
550 kvm_mips_write_compare(vcpu, v); 550 kvm_mips_write_compare(vcpu, v, false);
551 break; 551 break;
552 case KVM_REG_MIPS_CP0_CAUSE: 552 case KVM_REG_MIPS_CP0_CAUSE:
553 /* 553 /*
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d7b343170453..ec35af34a3fb 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -40,6 +40,9 @@
40#define KVM_MAX_VCORES NR_CPUS 40#define KVM_MAX_VCORES NR_CPUS
41#define KVM_USER_MEM_SLOTS 512 41#define KVM_USER_MEM_SLOTS 512
42 42
43#include <asm/cputhreads.h>
44#define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES)
45
43#ifdef CONFIG_KVM_MMIO 46#ifdef CONFIG_KVM_MMIO
44#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 47#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
45#endif 48#endif
@@ -113,6 +116,7 @@ struct kvm_vcpu_stat {
113 u32 ext_intr_exits; 116 u32 ext_intr_exits;
114 u32 halt_successful_poll; 117 u32 halt_successful_poll;
115 u32 halt_attempted_poll; 118 u32 halt_attempted_poll;
119 u32 halt_poll_invalid;
116 u32 halt_wakeup; 120 u32 halt_wakeup;
117 u32 dbell_exits; 121 u32 dbell_exits;
118 u32 gdbell_exits; 122 u32 gdbell_exits;
@@ -724,5 +728,6 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
724static inline void kvm_arch_exit(void) {} 728static inline void kvm_arch_exit(void) {}
725static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} 729static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
726static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} 730static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
731static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
727 732
728#endif /* __POWERPC_KVM_HOST_H__ */ 733#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index b34220d2aa42..47018fcbf7d6 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -54,6 +54,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
54 { "queue_intr", VCPU_STAT(queue_intr) }, 54 { "queue_intr", VCPU_STAT(queue_intr) },
55 { "halt_successful_poll", VCPU_STAT(halt_successful_poll), }, 55 { "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
56 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), }, 56 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), },
57 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
57 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 58 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
58 { "pf_storage", VCPU_STAT(pf_storage) }, 59 { "pf_storage", VCPU_STAT(pf_storage) },
59 { "sp_storage", VCPU_STAT(sp_storage) }, 60 { "sp_storage", VCPU_STAT(sp_storage) },
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 84fb4fcfaa41..93243554cae9 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -27,6 +27,7 @@
27#include <linux/export.h> 27#include <linux/export.h>
28#include <linux/fs.h> 28#include <linux/fs.h>
29#include <linux/anon_inodes.h> 29#include <linux/anon_inodes.h>
30#include <linux/cpu.h>
30#include <linux/cpumask.h> 31#include <linux/cpumask.h>
31#include <linux/spinlock.h> 32#include <linux/spinlock.h>
32#include <linux/page-flags.h> 33#include <linux/page-flags.h>
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 95bceca8f40e..8129b0db131e 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -882,6 +882,24 @@ void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr)
882} 882}
883#endif 883#endif
884 884
885static void kvmppc_setup_debug(struct kvm_vcpu *vcpu)
886{
887 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
888 u64 msr = kvmppc_get_msr(vcpu);
889
890 kvmppc_set_msr(vcpu, msr | MSR_SE);
891 }
892}
893
894static void kvmppc_clear_debug(struct kvm_vcpu *vcpu)
895{
896 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
897 u64 msr = kvmppc_get_msr(vcpu);
898
899 kvmppc_set_msr(vcpu, msr & ~MSR_SE);
900 }
901}
902
885int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, 903int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
886 unsigned int exit_nr) 904 unsigned int exit_nr)
887{ 905{
@@ -1207,10 +1225,18 @@ program_interrupt:
1207 break; 1225 break;
1208#endif 1226#endif
1209 case BOOK3S_INTERRUPT_MACHINE_CHECK: 1227 case BOOK3S_INTERRUPT_MACHINE_CHECK:
1210 case BOOK3S_INTERRUPT_TRACE:
1211 kvmppc_book3s_queue_irqprio(vcpu, exit_nr); 1228 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
1212 r = RESUME_GUEST; 1229 r = RESUME_GUEST;
1213 break; 1230 break;
1231 case BOOK3S_INTERRUPT_TRACE:
1232 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
1233 run->exit_reason = KVM_EXIT_DEBUG;
1234 r = RESUME_HOST;
1235 } else {
1236 kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
1237 r = RESUME_GUEST;
1238 }
1239 break;
1214 default: 1240 default:
1215 { 1241 {
1216 ulong shadow_srr1 = vcpu->arch.shadow_srr1; 1242 ulong shadow_srr1 = vcpu->arch.shadow_srr1;
@@ -1479,6 +1505,8 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1479 goto out; 1505 goto out;
1480 } 1506 }
1481 1507
1508 kvmppc_setup_debug(vcpu);
1509
1482 /* 1510 /*
1483 * Interrupts could be timers for the guest which we have to inject 1511 * Interrupts could be timers for the guest which we have to inject
1484 * again, so let's postpone them until we're in the guest and if we 1512 * again, so let's postpone them until we're in the guest and if we
@@ -1501,6 +1529,8 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1501 1529
1502 ret = __kvmppc_vcpu_run(kvm_run, vcpu); 1530 ret = __kvmppc_vcpu_run(kvm_run, vcpu);
1503 1531
1532 kvmppc_clear_debug(vcpu);
1533
1504 /* No need for kvm_guest_exit. It's done in handle_exit. 1534 /* No need for kvm_guest_exit. It's done in handle_exit.
1505 We also get here with interrupts enabled. */ 1535 We also get here with interrupts enabled. */
1506 1536
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 46871d554057..a75ba38a2d81 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -92,7 +92,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
92 * we are the only setter, thus concurrent access is undefined 92 * we are the only setter, thus concurrent access is undefined
93 * to begin with. 93 * to begin with.
94 */ 94 */
95 if (level == 1 || level == KVM_INTERRUPT_SET_LEVEL) 95 if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL)
96 state->asserted = 1; 96 state->asserted = 1;
97 else if (level == 0 || level == KVM_INTERRUPT_UNSET) { 97 else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
98 state->asserted = 0; 98 state->asserted = 0;
@@ -280,7 +280,7 @@ static inline bool icp_try_update(struct kvmppc_icp *icp,
280 if (!success) 280 if (!success)
281 goto bail; 281 goto bail;
282 282
283 XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n", 283 XICS_DBG("UPD [%04lx] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
284 icp->server_num, 284 icp->server_num,
285 old.cppr, old.mfrr, old.pending_pri, old.xisr, 285 old.cppr, old.mfrr, old.pending_pri, old.xisr,
286 old.need_resend, old.out_ee); 286 old.need_resend, old.out_ee);
@@ -336,7 +336,7 @@ static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
336 union kvmppc_icp_state old_state, new_state; 336 union kvmppc_icp_state old_state, new_state;
337 bool success; 337 bool success;
338 338
339 XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority, 339 XICS_DBG("try deliver %#x(P:%#x) to server %#lx\n", irq, priority,
340 icp->server_num); 340 icp->server_num);
341 341
342 do { 342 do {
@@ -1174,9 +1174,11 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
1174 prio = irqp->saved_priority; 1174 prio = irqp->saved_priority;
1175 } 1175 }
1176 val |= prio << KVM_XICS_PRIORITY_SHIFT; 1176 val |= prio << KVM_XICS_PRIORITY_SHIFT;
1177 if (irqp->asserted) 1177 if (irqp->lsi) {
1178 val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING; 1178 val |= KVM_XICS_LEVEL_SENSITIVE;
1179 else if (irqp->masked_pending || irqp->resend) 1179 if (irqp->asserted)
1180 val |= KVM_XICS_PENDING;
1181 } else if (irqp->masked_pending || irqp->resend)
1180 val |= KVM_XICS_PENDING; 1182 val |= KVM_XICS_PENDING;
1181 ret = 0; 1183 ret = 0;
1182 } 1184 }
@@ -1228,9 +1230,13 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
1228 irqp->priority = prio; 1230 irqp->priority = prio;
1229 irqp->resend = 0; 1231 irqp->resend = 0;
1230 irqp->masked_pending = 0; 1232 irqp->masked_pending = 0;
1233 irqp->lsi = 0;
1231 irqp->asserted = 0; 1234 irqp->asserted = 0;
1232 if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE)) 1235 if (val & KVM_XICS_LEVEL_SENSITIVE) {
1233 irqp->asserted = 1; 1236 irqp->lsi = 1;
1237 if (val & KVM_XICS_PENDING)
1238 irqp->asserted = 1;
1239 }
1234 irqp->exists = 1; 1240 irqp->exists = 1;
1235 arch_spin_unlock(&ics->lock); 1241 arch_spin_unlock(&ics->lock);
1236 local_irq_restore(flags); 1242 local_irq_restore(flags);
@@ -1249,11 +1255,10 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
1249 return ics_deliver_irq(xics, irq, level); 1255 return ics_deliver_irq(xics, irq, level);
1250} 1256}
1251 1257
1252int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, 1258int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *irq_entry,
1253 int irq_source_id, int level, bool line_status) 1259 struct kvm *kvm, int irq_source_id,
1260 int level, bool line_status)
1254{ 1261{
1255 if (!level)
1256 return -1;
1257 return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi, 1262 return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
1258 level, line_status); 1263 level, line_status);
1259} 1264}
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index 56ea44f9867f..a46b954055c4 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -39,6 +39,7 @@ struct ics_irq_state {
39 u8 saved_priority; 39 u8 saved_priority;
40 u8 resend; 40 u8 resend;
41 u8 masked_pending; 41 u8 masked_pending;
42 u8 lsi; /* level-sensitive interrupt */
42 u8 asserted; /* Only for LSI */ 43 u8 asserted; /* Only for LSI */
43 u8 exists; 44 u8 exists;
44}; 45};
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 4d66f44a1657..4afae695899a 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -64,6 +64,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
64 { "ext_intr", VCPU_STAT(ext_intr_exits) }, 64 { "ext_intr", VCPU_STAT(ext_intr_exits) },
65 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, 65 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, 66 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
67 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 68 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 { "doorbell", VCPU_STAT(dbell_exits) }, 69 { "doorbell", VCPU_STAT(dbell_exits) },
69 { "guest doorbell", VCPU_STAT(gdbell_exits) }, 70 { "guest doorbell", VCPU_STAT(gdbell_exits) },
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6a68730774ee..02416fea7653 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -800,9 +800,9 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
800 } 800 }
801} 801}
802 802
803int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, 803static int __kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
804 unsigned int rt, unsigned int bytes, 804 unsigned int rt, unsigned int bytes,
805 int is_default_endian) 805 int is_default_endian, int sign_extend)
806{ 806{
807 int idx, ret; 807 int idx, ret;
808 bool host_swabbed; 808 bool host_swabbed;
@@ -827,7 +827,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
827 vcpu->arch.mmio_host_swabbed = host_swabbed; 827 vcpu->arch.mmio_host_swabbed = host_swabbed;
828 vcpu->mmio_needed = 1; 828 vcpu->mmio_needed = 1;
829 vcpu->mmio_is_write = 0; 829 vcpu->mmio_is_write = 0;
830 vcpu->arch.mmio_sign_extend = 0; 830 vcpu->arch.mmio_sign_extend = sign_extend;
831 831
832 idx = srcu_read_lock(&vcpu->kvm->srcu); 832 idx = srcu_read_lock(&vcpu->kvm->srcu);
833 833
@@ -844,6 +844,13 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
844 844
845 return EMULATE_DO_MMIO; 845 return EMULATE_DO_MMIO;
846} 846}
847
848int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
849 unsigned int rt, unsigned int bytes,
850 int is_default_endian)
851{
852 return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 0);
853}
847EXPORT_SYMBOL_GPL(kvmppc_handle_load); 854EXPORT_SYMBOL_GPL(kvmppc_handle_load);
848 855
849/* Same as above, but sign extends */ 856/* Same as above, but sign extends */
@@ -851,12 +858,7 @@ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
851 unsigned int rt, unsigned int bytes, 858 unsigned int rt, unsigned int bytes,
852 int is_default_endian) 859 int is_default_endian)
853{ 860{
854 int r; 861 return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 1);
855
856 vcpu->arch.mmio_sign_extend = 1;
857 r = kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian);
858
859 return r;
860} 862}
861 863
862int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, 864int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 6da41fab70fb..37b9017c6a96 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -38,7 +38,7 @@
38 */ 38 */
39#define KVM_NR_IRQCHIPS 1 39#define KVM_NR_IRQCHIPS 1
40#define KVM_IRQCHIP_NUM_PINS 4096 40#define KVM_IRQCHIP_NUM_PINS 4096
41#define KVM_HALT_POLL_NS_DEFAULT 0 41#define KVM_HALT_POLL_NS_DEFAULT 80000
42 42
43/* s390-specific vcpu->requests bit members */ 43/* s390-specific vcpu->requests bit members */
44#define KVM_REQ_ENABLE_IBS 8 44#define KVM_REQ_ENABLE_IBS 8
@@ -247,6 +247,7 @@ struct kvm_vcpu_stat {
247 u32 exit_instruction; 247 u32 exit_instruction;
248 u32 halt_successful_poll; 248 u32 halt_successful_poll;
249 u32 halt_attempted_poll; 249 u32 halt_attempted_poll;
250 u32 halt_poll_invalid;
250 u32 halt_wakeup; 251 u32 halt_wakeup;
251 u32 instruction_lctl; 252 u32 instruction_lctl;
252 u32 instruction_lctlg; 253 u32 instruction_lctlg;
@@ -544,10 +545,6 @@ struct kvm_vcpu_arch {
544 struct kvm_s390_local_interrupt local_int; 545 struct kvm_s390_local_interrupt local_int;
545 struct hrtimer ckc_timer; 546 struct hrtimer ckc_timer;
546 struct kvm_s390_pgm_info pgm; 547 struct kvm_s390_pgm_info pgm;
547 union {
548 struct cpuid cpu_id;
549 u64 stidp_data;
550 };
551 struct gmap *gmap; 548 struct gmap *gmap;
552 struct kvm_guestdbg_info_arch guestdbg; 549 struct kvm_guestdbg_info_arch guestdbg;
553 unsigned long pfault_token; 550 unsigned long pfault_token;
@@ -605,7 +602,7 @@ struct kvm_s390_cpu_model {
605 __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64]; 602 __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
606 /* facility list requested by guest (in dma page) */ 603 /* facility list requested by guest (in dma page) */
607 __u64 *fac_list; 604 __u64 *fac_list;
608 struct cpuid cpu_id; 605 u64 cpuid;
609 unsigned short ibc; 606 unsigned short ibc;
610}; 607};
611 608
@@ -700,4 +697,6 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
700static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} 697static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
701static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} 698static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
702 699
700void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
701
703#endif 702#endif
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index bd7893d274fa..e4f6f73afe2f 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -69,6 +69,7 @@ struct sclp_info {
69 unsigned int max_cores; 69 unsigned int max_cores;
70 unsigned long hsa_size; 70 unsigned long hsa_size;
71 unsigned long facilities; 71 unsigned long facilities;
72 unsigned int hmfai;
72}; 73};
73extern struct sclp_info sclp; 74extern struct sclp_info sclp;
74 75
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index ec60cf7fa0a2..1c8f33fca356 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -27,6 +27,7 @@
27 27
28/* SIGP cpu status bits */ 28/* SIGP cpu status bits */
29 29
30#define SIGP_STATUS_INVALID_ORDER 0x00000002UL
30#define SIGP_STATUS_CHECK_STOP 0x00000010UL 31#define SIGP_STATUS_CHECK_STOP 0x00000010UL
31#define SIGP_STATUS_STOPPED 0x00000040UL 32#define SIGP_STATUS_STOPPED 0x00000040UL
32#define SIGP_STATUS_EXT_CALL_PENDING 0x00000080UL 33#define SIGP_STATUS_EXT_CALL_PENDING 0x00000080UL
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 347fe5afa419..3b8e99ef9d58 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -25,6 +25,7 @@
25#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5 25#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5
26#define KVM_DEV_FLIC_ADAPTER_REGISTER 6 26#define KVM_DEV_FLIC_ADAPTER_REGISTER 6
27#define KVM_DEV_FLIC_ADAPTER_MODIFY 7 27#define KVM_DEV_FLIC_ADAPTER_MODIFY 7
28#define KVM_DEV_FLIC_CLEAR_IO_IRQ 8
28/* 29/*
29 * We can have up to 4*64k pending subchannels + 8 adapter interrupts, 30 * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
30 * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. 31 * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index 5dbaa72baa64..8fb5d4a6dd25 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -16,14 +16,19 @@
16 { 0x01, "SIGP sense" }, \ 16 { 0x01, "SIGP sense" }, \
17 { 0x02, "SIGP external call" }, \ 17 { 0x02, "SIGP external call" }, \
18 { 0x03, "SIGP emergency signal" }, \ 18 { 0x03, "SIGP emergency signal" }, \
19 { 0x04, "SIGP start" }, \
19 { 0x05, "SIGP stop" }, \ 20 { 0x05, "SIGP stop" }, \
20 { 0x06, "SIGP restart" }, \ 21 { 0x06, "SIGP restart" }, \
21 { 0x09, "SIGP stop and store status" }, \ 22 { 0x09, "SIGP stop and store status" }, \
22 { 0x0b, "SIGP initial cpu reset" }, \ 23 { 0x0b, "SIGP initial cpu reset" }, \
24 { 0x0c, "SIGP cpu reset" }, \
23 { 0x0d, "SIGP set prefix" }, \ 25 { 0x0d, "SIGP set prefix" }, \
24 { 0x0e, "SIGP store status at address" }, \ 26 { 0x0e, "SIGP store status at address" }, \
25 { 0x12, "SIGP set architecture" }, \ 27 { 0x12, "SIGP set architecture" }, \
26 { 0x15, "SIGP sense running" } 28 { 0x13, "SIGP conditional emergency signal" }, \
29 { 0x15, "SIGP sense running" }, \
30 { 0x16, "SIGP set multithreading"}, \
31 { 0x17, "SIGP store additional status ait address"}
27 32
28#define icpt_prog_codes \ 33#define icpt_prog_codes \
29 { 0x0001, "Prog Operation" }, \ 34 { 0x0001, "Prog Operation" }, \
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 5ea5af3c7db7..b1900239b0ab 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -28,6 +28,7 @@ config KVM
28 select HAVE_KVM_IRQCHIP 28 select HAVE_KVM_IRQCHIP
29 select HAVE_KVM_IRQFD 29 select HAVE_KVM_IRQFD
30 select HAVE_KVM_IRQ_ROUTING 30 select HAVE_KVM_IRQ_ROUTING
31 select HAVE_KVM_INVALID_WAKEUPS
31 select SRCU 32 select SRCU
32 select KVM_VFIO 33 select KVM_VFIO
33 ---help--- 34 ---help---
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 84efc2ba6a90..5a80af740d3e 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -977,6 +977,11 @@ no_timer:
977 977
978void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) 978void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
979{ 979{
980 /*
981 * We cannot move this into the if, as the CPU might be already
982 * in kvm_vcpu_block without having the waitqueue set (polling)
983 */
984 vcpu->valid_wakeup = true;
980 if (swait_active(&vcpu->wq)) { 985 if (swait_active(&vcpu->wq)) {
981 /* 986 /*
982 * The vcpu gave up the cpu voluntarily, mark it as a good 987 * The vcpu gave up the cpu voluntarily, mark it as a good
@@ -2034,6 +2039,27 @@ static int modify_io_adapter(struct kvm_device *dev,
2034 return ret; 2039 return ret;
2035} 2040}
2036 2041
2042static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr)
2043
2044{
2045 const u64 isc_mask = 0xffUL << 24; /* all iscs set */
2046 u32 schid;
2047
2048 if (attr->flags)
2049 return -EINVAL;
2050 if (attr->attr != sizeof(schid))
2051 return -EINVAL;
2052 if (copy_from_user(&schid, (void __user *) attr->addr, sizeof(schid)))
2053 return -EFAULT;
2054 kfree(kvm_s390_get_io_int(kvm, isc_mask, schid));
2055 /*
2056 * If userspace is conforming to the architecture, we can have at most
2057 * one pending I/O interrupt per subchannel, so this is effectively a
2058 * clear all.
2059 */
2060 return 0;
2061}
2062
2037static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) 2063static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2038{ 2064{
2039 int r = 0; 2065 int r = 0;
@@ -2067,6 +2093,9 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2067 case KVM_DEV_FLIC_ADAPTER_MODIFY: 2093 case KVM_DEV_FLIC_ADAPTER_MODIFY:
2068 r = modify_io_adapter(dev, attr); 2094 r = modify_io_adapter(dev, attr);
2069 break; 2095 break;
2096 case KVM_DEV_FLIC_CLEAR_IO_IRQ:
2097 r = clear_io_irq(dev->kvm, attr);
2098 break;
2070 default: 2099 default:
2071 r = -EINVAL; 2100 r = -EINVAL;
2072 } 2101 }
@@ -2074,6 +2103,23 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2074 return r; 2103 return r;
2075} 2104}
2076 2105
2106static int flic_has_attr(struct kvm_device *dev,
2107 struct kvm_device_attr *attr)
2108{
2109 switch (attr->group) {
2110 case KVM_DEV_FLIC_GET_ALL_IRQS:
2111 case KVM_DEV_FLIC_ENQUEUE:
2112 case KVM_DEV_FLIC_CLEAR_IRQS:
2113 case KVM_DEV_FLIC_APF_ENABLE:
2114 case KVM_DEV_FLIC_APF_DISABLE_WAIT:
2115 case KVM_DEV_FLIC_ADAPTER_REGISTER:
2116 case KVM_DEV_FLIC_ADAPTER_MODIFY:
2117 case KVM_DEV_FLIC_CLEAR_IO_IRQ:
2118 return 0;
2119 }
2120 return -ENXIO;
2121}
2122
2077static int flic_create(struct kvm_device *dev, u32 type) 2123static int flic_create(struct kvm_device *dev, u32 type)
2078{ 2124{
2079 if (!dev) 2125 if (!dev)
@@ -2095,6 +2141,7 @@ struct kvm_device_ops kvm_flic_ops = {
2095 .name = "kvm-flic", 2141 .name = "kvm-flic",
2096 .get_attr = flic_get_attr, 2142 .get_attr = flic_get_attr,
2097 .set_attr = flic_set_attr, 2143 .set_attr = flic_set_attr,
2144 .has_attr = flic_has_attr,
2098 .create = flic_create, 2145 .create = flic_create,
2099 .destroy = flic_destroy, 2146 .destroy = flic_destroy,
2100}; 2147};
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 668c087513e5..6d8ec3ac9dd8 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -65,6 +65,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
65 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 65 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
66 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, 66 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
67 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, 67 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
68 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
68 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 69 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
69 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 70 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
70 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 71 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
@@ -118,9 +119,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
118}; 119};
119 120
120/* upper facilities limit for kvm */ 121/* upper facilities limit for kvm */
121unsigned long kvm_s390_fac_list_mask[] = { 122unsigned long kvm_s390_fac_list_mask[16] = {
122 0xffe6fffbfcfdfc40UL, 123 0xffe6000000000000UL,
123 0x005e800000000000UL, 124 0x005e000000000000UL,
124}; 125};
125 126
126unsigned long kvm_s390_fac_list_mask_size(void) 127unsigned long kvm_s390_fac_list_mask_size(void)
@@ -638,6 +639,7 @@ static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
638static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) 639static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
639{ 640{
640 struct kvm_s390_vm_cpu_processor *proc; 641 struct kvm_s390_vm_cpu_processor *proc;
642 u16 lowest_ibc, unblocked_ibc;
641 int ret = 0; 643 int ret = 0;
642 644
643 mutex_lock(&kvm->lock); 645 mutex_lock(&kvm->lock);
@@ -652,9 +654,17 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
652 } 654 }
653 if (!copy_from_user(proc, (void __user *)attr->addr, 655 if (!copy_from_user(proc, (void __user *)attr->addr,
654 sizeof(*proc))) { 656 sizeof(*proc))) {
655 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid, 657 kvm->arch.model.cpuid = proc->cpuid;
656 sizeof(struct cpuid)); 658 lowest_ibc = sclp.ibc >> 16 & 0xfff;
657 kvm->arch.model.ibc = proc->ibc; 659 unblocked_ibc = sclp.ibc & 0xfff;
660 if (lowest_ibc) {
661 if (proc->ibc > unblocked_ibc)
662 kvm->arch.model.ibc = unblocked_ibc;
663 else if (proc->ibc < lowest_ibc)
664 kvm->arch.model.ibc = lowest_ibc;
665 else
666 kvm->arch.model.ibc = proc->ibc;
667 }
658 memcpy(kvm->arch.model.fac_list, proc->fac_list, 668 memcpy(kvm->arch.model.fac_list, proc->fac_list,
659 S390_ARCH_FAC_LIST_SIZE_BYTE); 669 S390_ARCH_FAC_LIST_SIZE_BYTE);
660 } else 670 } else
@@ -687,7 +697,7 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
687 ret = -ENOMEM; 697 ret = -ENOMEM;
688 goto out; 698 goto out;
689 } 699 }
690 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid)); 700 proc->cpuid = kvm->arch.model.cpuid;
691 proc->ibc = kvm->arch.model.ibc; 701 proc->ibc = kvm->arch.model.ibc;
692 memcpy(&proc->fac_list, kvm->arch.model.fac_list, 702 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
693 S390_ARCH_FAC_LIST_SIZE_BYTE); 703 S390_ARCH_FAC_LIST_SIZE_BYTE);
@@ -1081,10 +1091,13 @@ static void kvm_s390_set_crycb_format(struct kvm *kvm)
1081 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1; 1091 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1082} 1092}
1083 1093
1084static void kvm_s390_get_cpu_id(struct cpuid *cpu_id) 1094static u64 kvm_s390_get_initial_cpuid(void)
1085{ 1095{
1086 get_cpu_id(cpu_id); 1096 struct cpuid cpuid;
1087 cpu_id->version = 0xff; 1097
1098 get_cpu_id(&cpuid);
1099 cpuid.version = 0xff;
1100 return *((u64 *) &cpuid);
1088} 1101}
1089 1102
1090static void kvm_s390_crypto_init(struct kvm *kvm) 1103static void kvm_s390_crypto_init(struct kvm *kvm)
@@ -1175,7 +1188,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1175 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, 1188 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1176 S390_ARCH_FAC_LIST_SIZE_BYTE); 1189 S390_ARCH_FAC_LIST_SIZE_BYTE);
1177 1190
1178 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id); 1191 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1179 kvm->arch.model.ibc = sclp.ibc & 0x0fff; 1192 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1180 1193
1181 kvm_s390_crypto_init(kvm); 1194 kvm_s390_crypto_init(kvm);
@@ -1624,7 +1637,6 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1624{ 1637{
1625 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; 1638 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1626 1639
1627 vcpu->arch.cpu_id = model->cpu_id;
1628 vcpu->arch.sie_block->ibc = model->ibc; 1640 vcpu->arch.sie_block->ibc = model->ibc;
1629 if (test_kvm_facility(vcpu->kvm, 7)) 1641 if (test_kvm_facility(vcpu->kvm, 7))
1630 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; 1642 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
@@ -1645,11 +1657,14 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1645 1657
1646 kvm_s390_vcpu_setup_model(vcpu); 1658 kvm_s390_vcpu_setup_model(vcpu);
1647 1659
1648 vcpu->arch.sie_block->ecb = 6; 1660 vcpu->arch.sie_block->ecb = 0x02;
1661 if (test_kvm_facility(vcpu->kvm, 9))
1662 vcpu->arch.sie_block->ecb |= 0x04;
1649 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73)) 1663 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1650 vcpu->arch.sie_block->ecb |= 0x10; 1664 vcpu->arch.sie_block->ecb |= 0x10;
1651 1665
1652 vcpu->arch.sie_block->ecb2 = 8; 1666 if (test_kvm_facility(vcpu->kvm, 8))
1667 vcpu->arch.sie_block->ecb2 |= 0x08;
1653 vcpu->arch.sie_block->eca = 0xC1002000U; 1668 vcpu->arch.sie_block->eca = 0xC1002000U;
1654 if (sclp.has_siif) 1669 if (sclp.has_siif)
1655 vcpu->arch.sie_block->eca |= 1; 1670 vcpu->arch.sie_block->eca |= 1;
@@ -2971,13 +2986,31 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
2971 return; 2986 return;
2972} 2987}
2973 2988
2989static inline unsigned long nonhyp_mask(int i)
2990{
2991 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
2992
2993 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
2994}
2995
2996void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
2997{
2998 vcpu->valid_wakeup = false;
2999}
3000
2974static int __init kvm_s390_init(void) 3001static int __init kvm_s390_init(void)
2975{ 3002{
3003 int i;
3004
2976 if (!sclp.has_sief2) { 3005 if (!sclp.has_sief2) {
2977 pr_info("SIE not available\n"); 3006 pr_info("SIE not available\n");
2978 return -ENODEV; 3007 return -ENODEV;
2979 } 3008 }
2980 3009
3010 for (i = 0; i < 16; i++)
3011 kvm_s390_fac_list_mask[i] |=
3012 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3013
2981 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 3014 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2982} 3015}
2983 3016
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 0a1591d3d25d..95916fa7c670 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -439,7 +439,7 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
439 439
440static int handle_stidp(struct kvm_vcpu *vcpu) 440static int handle_stidp(struct kvm_vcpu *vcpu)
441{ 441{
442 u64 stidp_data = vcpu->arch.stidp_data; 442 u64 stidp_data = vcpu->kvm->arch.model.cpuid;
443 u64 operand2; 443 u64 operand2;
444 int rc; 444 int rc;
445 ar_t ar; 445 ar_t ar;
@@ -670,8 +670,9 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
670 if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED) 670 if (vcpu->run->s.regs.gprs[reg1] & PFMF_RESERVED)
671 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 671 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
672 672
673 /* Only provide non-quiescing support if the host supports it */ 673 /* Only provide non-quiescing support if enabled for the guest */
674 if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14)) 674 if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ &&
675 !test_kvm_facility(vcpu->kvm, 14))
675 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 676 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
676 677
677 /* No support for conditional-SSKE */ 678 /* No support for conditional-SSKE */
@@ -744,7 +745,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
744{ 745{
745 /* entries expected to be 1FF */ 746 /* entries expected to be 1FF */
746 int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3; 747 int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
747 unsigned long *cbrlo, cbrle; 748 unsigned long *cbrlo;
748 struct gmap *gmap; 749 struct gmap *gmap;
749 int i; 750 int i;
750 751
@@ -765,17 +766,9 @@ static int handle_essa(struct kvm_vcpu *vcpu)
765 vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ 766 vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
766 cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); 767 cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
767 down_read(&gmap->mm->mmap_sem); 768 down_read(&gmap->mm->mmap_sem);
768 for (i = 0; i < entries; ++i) { 769 for (i = 0; i < entries; ++i)
769 cbrle = cbrlo[i]; 770 __gmap_zap(gmap, cbrlo[i]);
770 if (unlikely(cbrle & ~PAGE_MASK || cbrle < 2 * PAGE_SIZE))
771 /* invalid entry */
772 break;
773 /* try to free backing */
774 __gmap_zap(gmap, cbrle);
775 }
776 up_read(&gmap->mm->mmap_sem); 771 up_read(&gmap->mm->mmap_sem);
777 if (i < entries)
778 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
779 return 0; 772 return 0;
780} 773}
781 774
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 77c22d685c7a..28ea0cab1f1b 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -240,6 +240,12 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu,
240 struct kvm_s390_local_interrupt *li; 240 struct kvm_s390_local_interrupt *li;
241 int rc; 241 int rc;
242 242
243 if (!test_kvm_facility(vcpu->kvm, 9)) {
244 *reg &= 0xffffffff00000000UL;
245 *reg |= SIGP_STATUS_INVALID_ORDER;
246 return SIGP_CC_STATUS_STORED;
247 }
248
243 li = &dst_vcpu->arch.local_int; 249 li = &dst_vcpu->arch.local_int;
244 if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { 250 if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
245 /* running */ 251 /* running */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b7e394485a5f..e0fbe7e70dc1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -562,7 +562,6 @@ struct kvm_vcpu_arch {
562 struct { 562 struct {
563 u64 msr_val; 563 u64 msr_val;
564 u64 last_steal; 564 u64 last_steal;
565 u64 accum_steal;
566 struct gfn_to_hva_cache stime; 565 struct gfn_to_hva_cache stime;
567 struct kvm_steal_time steal; 566 struct kvm_steal_time steal;
568 } st; 567 } st;
@@ -774,6 +773,11 @@ struct kvm_arch {
774 u8 nr_reserved_ioapic_pins; 773 u8 nr_reserved_ioapic_pins;
775 774
776 bool disabled_lapic_found; 775 bool disabled_lapic_found;
776
777 /* Struct members for AVIC */
778 u32 ldr_mode;
779 struct page *avic_logical_id_table_page;
780 struct page *avic_physical_id_table_page;
777}; 781};
778 782
779struct kvm_vm_stat { 783struct kvm_vm_stat {
@@ -804,6 +808,7 @@ struct kvm_vcpu_stat {
804 u32 halt_exits; 808 u32 halt_exits;
805 u32 halt_successful_poll; 809 u32 halt_successful_poll;
806 u32 halt_attempted_poll; 810 u32 halt_attempted_poll;
811 u32 halt_poll_invalid;
807 u32 halt_wakeup; 812 u32 halt_wakeup;
808 u32 request_irq_exits; 813 u32 request_irq_exits;
809 u32 irq_exits; 814 u32 irq_exits;
@@ -848,6 +853,9 @@ struct kvm_x86_ops {
848 bool (*cpu_has_high_real_mode_segbase)(void); 853 bool (*cpu_has_high_real_mode_segbase)(void);
849 void (*cpuid_update)(struct kvm_vcpu *vcpu); 854 void (*cpuid_update)(struct kvm_vcpu *vcpu);
850 855
856 int (*vm_init)(struct kvm *kvm);
857 void (*vm_destroy)(struct kvm *kvm);
858
851 /* Create, but do not attach this VCPU */ 859 /* Create, but do not attach this VCPU */
852 struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); 860 struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
853 void (*vcpu_free)(struct kvm_vcpu *vcpu); 861 void (*vcpu_free)(struct kvm_vcpu *vcpu);
@@ -914,7 +922,7 @@ struct kvm_x86_ops {
914 bool (*get_enable_apicv)(void); 922 bool (*get_enable_apicv)(void);
915 void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); 923 void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
916 void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); 924 void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
917 void (*hwapic_isr_update)(struct kvm *kvm, int isr); 925 void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
918 void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); 926 void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
919 void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); 927 void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
920 void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); 928 void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
@@ -990,8 +998,13 @@ struct kvm_x86_ops {
990 */ 998 */
991 int (*pre_block)(struct kvm_vcpu *vcpu); 999 int (*pre_block)(struct kvm_vcpu *vcpu);
992 void (*post_block)(struct kvm_vcpu *vcpu); 1000 void (*post_block)(struct kvm_vcpu *vcpu);
1001
1002 void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
1003 void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
1004
993 int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, 1005 int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
994 uint32_t guest_irq, bool set); 1006 uint32_t guest_irq, bool set);
1007 void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
995}; 1008};
996 1009
997struct kvm_arch_async_pf { 1010struct kvm_arch_async_pf {
@@ -1341,7 +1354,18 @@ bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
1341void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, 1354void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
1342 struct kvm_lapic_irq *irq); 1355 struct kvm_lapic_irq *irq);
1343 1356
1344static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} 1357static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
1345static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} 1358{
1359 if (kvm_x86_ops->vcpu_blocking)
1360 kvm_x86_ops->vcpu_blocking(vcpu);
1361}
1362
1363static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
1364{
1365 if (kvm_x86_ops->vcpu_unblocking)
1366 kvm_x86_ops->vcpu_unblocking(vcpu);
1367}
1368
1369static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
1346 1370
1347#endif /* _ASM_X86_KVM_HOST_H */ 1371#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 6136d99f537b..d0fe23ec7e98 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -78,7 +78,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
78 u32 exit_int_info; 78 u32 exit_int_info;
79 u32 exit_int_info_err; 79 u32 exit_int_info_err;
80 u64 nested_ctl; 80 u64 nested_ctl;
81 u8 reserved_4[16]; 81 u64 avic_vapic_bar;
82 u8 reserved_4[8];
82 u32 event_inj; 83 u32 event_inj;
83 u32 event_inj_err; 84 u32 event_inj_err;
84 u64 nested_cr3; 85 u64 nested_cr3;
@@ -88,7 +89,11 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
88 u64 next_rip; 89 u64 next_rip;
89 u8 insn_len; 90 u8 insn_len;
90 u8 insn_bytes[15]; 91 u8 insn_bytes[15];
91 u8 reserved_6[800]; 92 u64 avic_backing_page; /* Offset 0xe0 */
93 u8 reserved_6[8]; /* Offset 0xe8 */
94 u64 avic_logical_id; /* Offset 0xf0 */
95 u64 avic_physical_id; /* Offset 0xf8 */
96 u8 reserved_7[768];
92}; 97};
93 98
94 99
@@ -111,6 +116,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
111#define V_INTR_MASKING_SHIFT 24 116#define V_INTR_MASKING_SHIFT 24
112#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) 117#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
113 118
119#define AVIC_ENABLE_SHIFT 31
120#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
121
114#define SVM_INTERRUPT_SHADOW_MASK 1 122#define SVM_INTERRUPT_SHADOW_MASK 1
115 123
116#define SVM_IOIO_STR_SHIFT 2 124#define SVM_IOIO_STR_SHIFT 2
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index cd54147cb365..739c0c594022 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -216,9 +216,9 @@ struct kvm_cpuid_entry2 {
216 __u32 padding[3]; 216 __u32 padding[3];
217}; 217};
218 218
219#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) 219#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX (1 << 0)
220#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) 220#define KVM_CPUID_FLAG_STATEFUL_FUNC (1 << 1)
221#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) 221#define KVM_CPUID_FLAG_STATE_READ_NEXT (1 << 2)
222 222
223/* for KVM_SET_CPUID2 */ 223/* for KVM_SET_CPUID2 */
224struct kvm_cpuid2 { 224struct kvm_cpuid2 {
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 8a4add8e4639..b9e9bb2c6089 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -73,6 +73,8 @@
73#define SVM_EXIT_MWAIT_COND 0x08c 73#define SVM_EXIT_MWAIT_COND 0x08c
74#define SVM_EXIT_XSETBV 0x08d 74#define SVM_EXIT_XSETBV 0x08d
75#define SVM_EXIT_NPF 0x400 75#define SVM_EXIT_NPF 0x400
76#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
77#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
76 78
77#define SVM_EXIT_ERR -1 79#define SVM_EXIT_ERR -1
78 80
@@ -107,8 +109,10 @@
107 { SVM_EXIT_SMI, "smi" }, \ 109 { SVM_EXIT_SMI, "smi" }, \
108 { SVM_EXIT_INIT, "init" }, \ 110 { SVM_EXIT_INIT, "init" }, \
109 { SVM_EXIT_VINTR, "vintr" }, \ 111 { SVM_EXIT_VINTR, "vintr" }, \
112 { SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \
110 { SVM_EXIT_CPUID, "cpuid" }, \ 113 { SVM_EXIT_CPUID, "cpuid" }, \
111 { SVM_EXIT_INVD, "invd" }, \ 114 { SVM_EXIT_INVD, "invd" }, \
115 { SVM_EXIT_PAUSE, "pause" }, \
112 { SVM_EXIT_HLT, "hlt" }, \ 116 { SVM_EXIT_HLT, "hlt" }, \
113 { SVM_EXIT_INVLPG, "invlpg" }, \ 117 { SVM_EXIT_INVLPG, "invlpg" }, \
114 { SVM_EXIT_INVLPGA, "invlpga" }, \ 118 { SVM_EXIT_INVLPGA, "invlpga" }, \
@@ -127,7 +131,10 @@
127 { SVM_EXIT_MONITOR, "monitor" }, \ 131 { SVM_EXIT_MONITOR, "monitor" }, \
128 { SVM_EXIT_MWAIT, "mwait" }, \ 132 { SVM_EXIT_MWAIT, "mwait" }, \
129 { SVM_EXIT_XSETBV, "xsetbv" }, \ 133 { SVM_EXIT_XSETBV, "xsetbv" }, \
130 { SVM_EXIT_NPF, "npf" } 134 { SVM_EXIT_NPF, "npf" }, \
135 { SVM_EXIT_RSM, "rsm" }, \
136 { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
137 { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }
131 138
132 139
133#endif /* _UAPI__SVM_H */ 140#endif /* _UAPI__SVM_H */
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 9db47090ead0..5f42d038fcb4 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -443,7 +443,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
443 spin_lock(&ioapic->lock); 443 spin_lock(&ioapic->lock);
444 444
445 if (trigger_mode != IOAPIC_LEVEL_TRIG || 445 if (trigger_mode != IOAPIC_LEVEL_TRIG ||
446 kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) 446 kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
447 continue; 447 continue;
448 448
449 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); 449 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 54ead79e444b..dfb4c6476877 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -382,9 +382,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
382 u32 i, nr_ioapic_pins; 382 u32 i, nr_ioapic_pins;
383 int idx; 383 int idx;
384 384
385 /* kvm->irq_routing must be read after clearing
386 * KVM_SCAN_IOAPIC. */
387 smp_mb();
388 idx = srcu_read_lock(&kvm->irq_srcu); 385 idx = srcu_read_lock(&kvm->irq_srcu);
389 table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); 386 table = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
390 nr_ioapic_pins = min_t(u32, table->nr_rt_entries, 387 nr_ioapic_pins = min_t(u32, table->nr_rt_entries,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1a2da0e5a373..bbb5b283ff63 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -59,9 +59,8 @@
59/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ 59/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
60#define apic_debug(fmt, arg...) 60#define apic_debug(fmt, arg...)
61 61
62#define APIC_LVT_NUM 6
63/* 14 is the version for Xeon and Pentium 8.4.8*/ 62/* 14 is the version for Xeon and Pentium 8.4.8*/
64#define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16)) 63#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
65#define LAPIC_MMIO_LENGTH (1 << 12) 64#define LAPIC_MMIO_LENGTH (1 << 12)
66/* followed define is not in apicdef.h */ 65/* followed define is not in apicdef.h */
67#define APIC_SHORT_MASK 0xc0000 66#define APIC_SHORT_MASK 0xc0000
@@ -73,14 +72,6 @@
73#define APIC_BROADCAST 0xFF 72#define APIC_BROADCAST 0xFF
74#define X2APIC_BROADCAST 0xFFFFFFFFul 73#define X2APIC_BROADCAST 0xFFFFFFFFul
75 74
76#define VEC_POS(v) ((v) & (32 - 1))
77#define REG_POS(v) (((v) >> 5) << 4)
78
79static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
80{
81 *((u32 *) (apic->regs + reg_off)) = val;
82}
83
84static inline int apic_test_vector(int vec, void *bitmap) 75static inline int apic_test_vector(int vec, void *bitmap)
85{ 76{
86 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 77 return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -94,11 +85,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
94 apic_test_vector(vector, apic->regs + APIC_IRR); 85 apic_test_vector(vector, apic->regs + APIC_IRR);
95} 86}
96 87
97static inline void apic_set_vector(int vec, void *bitmap)
98{
99 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
100}
101
102static inline void apic_clear_vector(int vec, void *bitmap) 88static inline void apic_clear_vector(int vec, void *bitmap)
103{ 89{
104 clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); 90 clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -173,7 +159,7 @@ static void recalculate_apic_map(struct kvm *kvm)
173 continue; 159 continue;
174 160
175 aid = kvm_apic_id(apic); 161 aid = kvm_apic_id(apic);
176 ldr = kvm_apic_get_reg(apic, APIC_LDR); 162 ldr = kvm_lapic_get_reg(apic, APIC_LDR);
177 163
178 if (aid < ARRAY_SIZE(new->phys_map)) 164 if (aid < ARRAY_SIZE(new->phys_map))
179 new->phys_map[aid] = apic; 165 new->phys_map[aid] = apic;
@@ -182,7 +168,7 @@ static void recalculate_apic_map(struct kvm *kvm)
182 new->mode |= KVM_APIC_MODE_X2APIC; 168 new->mode |= KVM_APIC_MODE_X2APIC;
183 } else if (ldr) { 169 } else if (ldr) {
184 ldr = GET_APIC_LOGICAL_ID(ldr); 170 ldr = GET_APIC_LOGICAL_ID(ldr);
185 if (kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT) 171 if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
186 new->mode |= KVM_APIC_MODE_XAPIC_FLAT; 172 new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
187 else 173 else
188 new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER; 174 new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
@@ -212,7 +198,7 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
212{ 198{
213 bool enabled = val & APIC_SPIV_APIC_ENABLED; 199 bool enabled = val & APIC_SPIV_APIC_ENABLED;
214 200
215 apic_set_reg(apic, APIC_SPIV, val); 201 kvm_lapic_set_reg(apic, APIC_SPIV, val);
216 202
217 if (enabled != apic->sw_enabled) { 203 if (enabled != apic->sw_enabled) {
218 apic->sw_enabled = enabled; 204 apic->sw_enabled = enabled;
@@ -226,13 +212,13 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
226 212
227static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) 213static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
228{ 214{
229 apic_set_reg(apic, APIC_ID, id << 24); 215 kvm_lapic_set_reg(apic, APIC_ID, id << 24);
230 recalculate_apic_map(apic->vcpu->kvm); 216 recalculate_apic_map(apic->vcpu->kvm);
231} 217}
232 218
233static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) 219static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
234{ 220{
235 apic_set_reg(apic, APIC_LDR, id); 221 kvm_lapic_set_reg(apic, APIC_LDR, id);
236 recalculate_apic_map(apic->vcpu->kvm); 222 recalculate_apic_map(apic->vcpu->kvm);
237} 223}
238 224
@@ -240,19 +226,19 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u8 id)
240{ 226{
241 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); 227 u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
242 228
243 apic_set_reg(apic, APIC_ID, id << 24); 229 kvm_lapic_set_reg(apic, APIC_ID, id << 24);
244 apic_set_reg(apic, APIC_LDR, ldr); 230 kvm_lapic_set_reg(apic, APIC_LDR, ldr);
245 recalculate_apic_map(apic->vcpu->kvm); 231 recalculate_apic_map(apic->vcpu->kvm);
246} 232}
247 233
248static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) 234static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
249{ 235{
250 return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); 236 return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
251} 237}
252 238
253static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) 239static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
254{ 240{
255 return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; 241 return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
256} 242}
257 243
258static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) 244static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
@@ -287,10 +273,10 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
287 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); 273 feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
288 if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) 274 if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
289 v |= APIC_LVR_DIRECTED_EOI; 275 v |= APIC_LVR_DIRECTED_EOI;
290 apic_set_reg(apic, APIC_LVR, v); 276 kvm_lapic_set_reg(apic, APIC_LVR, v);
291} 277}
292 278
293static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = { 279static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
294 LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ 280 LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */
295 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ 281 LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
296 LVT_MASK | APIC_MODE_MASK, /* LVTPC */ 282 LVT_MASK | APIC_MODE_MASK, /* LVTPC */
@@ -349,16 +335,6 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
349} 335}
350EXPORT_SYMBOL_GPL(kvm_apic_update_irr); 336EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
351 337
352static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
353{
354 apic_set_vector(vec, apic->regs + APIC_IRR);
355 /*
356 * irr_pending must be true if any interrupt is pending; set it after
357 * APIC_IRR to avoid race with apic_clear_irr
358 */
359 apic->irr_pending = true;
360}
361
362static inline int apic_search_irr(struct kvm_lapic *apic) 338static inline int apic_search_irr(struct kvm_lapic *apic)
363{ 339{
364 return find_highest_vector(apic->regs + APIC_IRR); 340 return find_highest_vector(apic->regs + APIC_IRR);
@@ -416,7 +392,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
416 * just set SVI. 392 * just set SVI.
417 */ 393 */
418 if (unlikely(vcpu->arch.apicv_active)) 394 if (unlikely(vcpu->arch.apicv_active))
419 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec); 395 kvm_x86_ops->hwapic_isr_update(vcpu, vec);
420 else { 396 else {
421 ++apic->isr_count; 397 ++apic->isr_count;
422 BUG_ON(apic->isr_count > MAX_APIC_VECTOR); 398 BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
@@ -464,7 +440,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
464 * and must be left alone. 440 * and must be left alone.
465 */ 441 */
466 if (unlikely(vcpu->arch.apicv_active)) 442 if (unlikely(vcpu->arch.apicv_active))
467 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, 443 kvm_x86_ops->hwapic_isr_update(vcpu,
468 apic_find_highest_isr(apic)); 444 apic_find_highest_isr(apic));
469 else { 445 else {
470 --apic->isr_count; 446 --apic->isr_count;
@@ -549,8 +525,8 @@ static void apic_update_ppr(struct kvm_lapic *apic)
549 u32 tpr, isrv, ppr, old_ppr; 525 u32 tpr, isrv, ppr, old_ppr;
550 int isr; 526 int isr;
551 527
552 old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI); 528 old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
553 tpr = kvm_apic_get_reg(apic, APIC_TASKPRI); 529 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
554 isr = apic_find_highest_isr(apic); 530 isr = apic_find_highest_isr(apic);
555 isrv = (isr != -1) ? isr : 0; 531 isrv = (isr != -1) ? isr : 0;
556 532
@@ -563,7 +539,7 @@ static void apic_update_ppr(struct kvm_lapic *apic)
563 apic, ppr, isr, isrv); 539 apic, ppr, isr, isrv);
564 540
565 if (old_ppr != ppr) { 541 if (old_ppr != ppr) {
566 apic_set_reg(apic, APIC_PROCPRI, ppr); 542 kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
567 if (ppr < old_ppr) 543 if (ppr < old_ppr)
568 kvm_make_request(KVM_REQ_EVENT, apic->vcpu); 544 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
569 } 545 }
@@ -571,7 +547,7 @@ static void apic_update_ppr(struct kvm_lapic *apic)
571 547
572static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) 548static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
573{ 549{
574 apic_set_reg(apic, APIC_TASKPRI, tpr); 550 kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
575 apic_update_ppr(apic); 551 apic_update_ppr(apic);
576} 552}
577 553
@@ -601,7 +577,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
601 if (kvm_apic_broadcast(apic, mda)) 577 if (kvm_apic_broadcast(apic, mda))
602 return true; 578 return true;
603 579
604 logical_id = kvm_apic_get_reg(apic, APIC_LDR); 580 logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
605 581
606 if (apic_x2apic_mode(apic)) 582 if (apic_x2apic_mode(apic))
607 return ((logical_id >> 16) == (mda >> 16)) 583 return ((logical_id >> 16) == (mda >> 16))
@@ -610,7 +586,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
610 logical_id = GET_APIC_LOGICAL_ID(logical_id); 586 logical_id = GET_APIC_LOGICAL_ID(logical_id);
611 mda = GET_APIC_DEST_FIELD(mda); 587 mda = GET_APIC_DEST_FIELD(mda);
612 588
613 switch (kvm_apic_get_reg(apic, APIC_DFR)) { 589 switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
614 case APIC_DFR_FLAT: 590 case APIC_DFR_FLAT:
615 return (logical_id & mda) != 0; 591 return (logical_id & mda) != 0;
616 case APIC_DFR_CLUSTER: 592 case APIC_DFR_CLUSTER:
@@ -618,7 +594,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
618 && (logical_id & mda & 0xf) != 0; 594 && (logical_id & mda & 0xf) != 0;
619 default: 595 default:
620 apic_debug("Bad DFR vcpu %d: %08x\n", 596 apic_debug("Bad DFR vcpu %d: %08x\n",
621 apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR)); 597 apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR));
622 return false; 598 return false;
623 } 599 }
624} 600}
@@ -668,6 +644,7 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
668 return false; 644 return false;
669 } 645 }
670} 646}
647EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
671 648
672int kvm_vector_to_index(u32 vector, u32 dest_vcpus, 649int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
673 const unsigned long *bitmap, u32 bitmap_size) 650 const unsigned long *bitmap, u32 bitmap_size)
@@ -921,7 +898,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
921 898
922 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { 899 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
923 if (trig_mode) 900 if (trig_mode)
924 apic_set_vector(vector, apic->regs + APIC_TMR); 901 kvm_lapic_set_vector(vector, apic->regs + APIC_TMR);
925 else 902 else
926 apic_clear_vector(vector, apic->regs + APIC_TMR); 903 apic_clear_vector(vector, apic->regs + APIC_TMR);
927 } 904 }
@@ -929,7 +906,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
929 if (vcpu->arch.apicv_active) 906 if (vcpu->arch.apicv_active)
930 kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); 907 kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
931 else { 908 else {
932 apic_set_irr(vector, apic); 909 kvm_lapic_set_irr(vector, apic);
933 910
934 kvm_make_request(KVM_REQ_EVENT, vcpu); 911 kvm_make_request(KVM_REQ_EVENT, vcpu);
935 kvm_vcpu_kick(vcpu); 912 kvm_vcpu_kick(vcpu);
@@ -1073,8 +1050,8 @@ EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1073 1050
1074static void apic_send_ipi(struct kvm_lapic *apic) 1051static void apic_send_ipi(struct kvm_lapic *apic)
1075{ 1052{
1076 u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); 1053 u32 icr_low = kvm_lapic_get_reg(apic, APIC_ICR);
1077 u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2); 1054 u32 icr_high = kvm_lapic_get_reg(apic, APIC_ICR2);
1078 struct kvm_lapic_irq irq; 1055 struct kvm_lapic_irq irq;
1079 1056
1080 irq.vector = icr_low & APIC_VECTOR_MASK; 1057 irq.vector = icr_low & APIC_VECTOR_MASK;
@@ -1111,7 +1088,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
1111 ASSERT(apic != NULL); 1088 ASSERT(apic != NULL);
1112 1089
1113 /* if initial count is 0, current count should also be 0 */ 1090 /* if initial count is 0, current count should also be 0 */
1114 if (kvm_apic_get_reg(apic, APIC_TMICT) == 0 || 1091 if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1115 apic->lapic_timer.period == 0) 1092 apic->lapic_timer.period == 0)
1116 return 0; 1093 return 0;
1117 1094
@@ -1168,13 +1145,13 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1168 break; 1145 break;
1169 case APIC_PROCPRI: 1146 case APIC_PROCPRI:
1170 apic_update_ppr(apic); 1147 apic_update_ppr(apic);
1171 val = kvm_apic_get_reg(apic, offset); 1148 val = kvm_lapic_get_reg(apic, offset);
1172 break; 1149 break;
1173 case APIC_TASKPRI: 1150 case APIC_TASKPRI:
1174 report_tpr_access(apic, false); 1151 report_tpr_access(apic, false);
1175 /* fall thru */ 1152 /* fall thru */
1176 default: 1153 default:
1177 val = kvm_apic_get_reg(apic, offset); 1154 val = kvm_lapic_get_reg(apic, offset);
1178 break; 1155 break;
1179 } 1156 }
1180 1157
@@ -1186,7 +1163,7 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1186 return container_of(dev, struct kvm_lapic, dev); 1163 return container_of(dev, struct kvm_lapic, dev);
1187} 1164}
1188 1165
1189static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, 1166int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1190 void *data) 1167 void *data)
1191{ 1168{
1192 unsigned char alignment = offset & 0xf; 1169 unsigned char alignment = offset & 0xf;
@@ -1223,6 +1200,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1223 } 1200 }
1224 return 0; 1201 return 0;
1225} 1202}
1203EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
1226 1204
1227static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) 1205static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1228{ 1206{
@@ -1240,7 +1218,7 @@ static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1240 if (!apic_mmio_in_range(apic, address)) 1218 if (!apic_mmio_in_range(apic, address))
1241 return -EOPNOTSUPP; 1219 return -EOPNOTSUPP;
1242 1220
1243 apic_reg_read(apic, offset, len, data); 1221 kvm_lapic_reg_read(apic, offset, len, data);
1244 1222
1245 return 0; 1223 return 0;
1246} 1224}
@@ -1249,7 +1227,7 @@ static void update_divide_count(struct kvm_lapic *apic)
1249{ 1227{
1250 u32 tmp1, tmp2, tdcr; 1228 u32 tmp1, tmp2, tdcr;
1251 1229
1252 tdcr = kvm_apic_get_reg(apic, APIC_TDCR); 1230 tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1253 tmp1 = tdcr & 0xf; 1231 tmp1 = tdcr & 0xf;
1254 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; 1232 tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1255 apic->divide_count = 0x1 << (tmp2 & 0x7); 1233 apic->divide_count = 0x1 << (tmp2 & 0x7);
@@ -1260,7 +1238,7 @@ static void update_divide_count(struct kvm_lapic *apic)
1260 1238
1261static void apic_update_lvtt(struct kvm_lapic *apic) 1239static void apic_update_lvtt(struct kvm_lapic *apic)
1262{ 1240{
1263 u32 timer_mode = kvm_apic_get_reg(apic, APIC_LVTT) & 1241 u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1264 apic->lapic_timer.timer_mode_mask; 1242 apic->lapic_timer.timer_mode_mask;
1265 1243
1266 if (apic->lapic_timer.timer_mode != timer_mode) { 1244 if (apic->lapic_timer.timer_mode != timer_mode) {
@@ -1296,7 +1274,7 @@ static void apic_timer_expired(struct kvm_lapic *apic)
1296static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) 1274static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1297{ 1275{
1298 struct kvm_lapic *apic = vcpu->arch.apic; 1276 struct kvm_lapic *apic = vcpu->arch.apic;
1299 u32 reg = kvm_apic_get_reg(apic, APIC_LVTT); 1277 u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1300 1278
1301 if (kvm_apic_hw_enabled(apic)) { 1279 if (kvm_apic_hw_enabled(apic)) {
1302 int vec = reg & APIC_VECTOR_MASK; 1280 int vec = reg & APIC_VECTOR_MASK;
@@ -1344,7 +1322,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
1344 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { 1322 if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
1345 /* lapic timer in oneshot or periodic mode */ 1323 /* lapic timer in oneshot or periodic mode */
1346 now = apic->lapic_timer.timer.base->get_time(); 1324 now = apic->lapic_timer.timer.base->get_time();
1347 apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT) 1325 apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1348 * APIC_BUS_CYCLE_NS * apic->divide_count; 1326 * APIC_BUS_CYCLE_NS * apic->divide_count;
1349 1327
1350 if (!apic->lapic_timer.period) 1328 if (!apic->lapic_timer.period)
@@ -1376,7 +1354,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
1376 "timer initial count 0x%x, period %lldns, " 1354 "timer initial count 0x%x, period %lldns, "
1377 "expire @ 0x%016" PRIx64 ".\n", __func__, 1355 "expire @ 0x%016" PRIx64 ".\n", __func__,
1378 APIC_BUS_CYCLE_NS, ktime_to_ns(now), 1356 APIC_BUS_CYCLE_NS, ktime_to_ns(now),
1379 kvm_apic_get_reg(apic, APIC_TMICT), 1357 kvm_lapic_get_reg(apic, APIC_TMICT),
1380 apic->lapic_timer.period, 1358 apic->lapic_timer.period,
1381 ktime_to_ns(ktime_add_ns(now, 1359 ktime_to_ns(ktime_add_ns(now,
1382 apic->lapic_timer.period))); 1360 apic->lapic_timer.period)));
@@ -1425,7 +1403,7 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
1425 } 1403 }
1426} 1404}
1427 1405
1428static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 1406int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1429{ 1407{
1430 int ret = 0; 1408 int ret = 0;
1431 1409
@@ -1457,7 +1435,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1457 1435
1458 case APIC_DFR: 1436 case APIC_DFR:
1459 if (!apic_x2apic_mode(apic)) { 1437 if (!apic_x2apic_mode(apic)) {
1460 apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); 1438 kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
1461 recalculate_apic_map(apic->vcpu->kvm); 1439 recalculate_apic_map(apic->vcpu->kvm);
1462 } else 1440 } else
1463 ret = 1; 1441 ret = 1;
@@ -1465,17 +1443,17 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1465 1443
1466 case APIC_SPIV: { 1444 case APIC_SPIV: {
1467 u32 mask = 0x3ff; 1445 u32 mask = 0x3ff;
1468 if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) 1446 if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
1469 mask |= APIC_SPIV_DIRECTED_EOI; 1447 mask |= APIC_SPIV_DIRECTED_EOI;
1470 apic_set_spiv(apic, val & mask); 1448 apic_set_spiv(apic, val & mask);
1471 if (!(val & APIC_SPIV_APIC_ENABLED)) { 1449 if (!(val & APIC_SPIV_APIC_ENABLED)) {
1472 int i; 1450 int i;
1473 u32 lvt_val; 1451 u32 lvt_val;
1474 1452
1475 for (i = 0; i < APIC_LVT_NUM; i++) { 1453 for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
1476 lvt_val = kvm_apic_get_reg(apic, 1454 lvt_val = kvm_lapic_get_reg(apic,
1477 APIC_LVTT + 0x10 * i); 1455 APIC_LVTT + 0x10 * i);
1478 apic_set_reg(apic, APIC_LVTT + 0x10 * i, 1456 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
1479 lvt_val | APIC_LVT_MASKED); 1457 lvt_val | APIC_LVT_MASKED);
1480 } 1458 }
1481 apic_update_lvtt(apic); 1459 apic_update_lvtt(apic);
@@ -1486,14 +1464,14 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1486 } 1464 }
1487 case APIC_ICR: 1465 case APIC_ICR:
1488 /* No delay here, so we always clear the pending bit */ 1466 /* No delay here, so we always clear the pending bit */
1489 apic_set_reg(apic, APIC_ICR, val & ~(1 << 12)); 1467 kvm_lapic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
1490 apic_send_ipi(apic); 1468 apic_send_ipi(apic);
1491 break; 1469 break;
1492 1470
1493 case APIC_ICR2: 1471 case APIC_ICR2:
1494 if (!apic_x2apic_mode(apic)) 1472 if (!apic_x2apic_mode(apic))
1495 val &= 0xff000000; 1473 val &= 0xff000000;
1496 apic_set_reg(apic, APIC_ICR2, val); 1474 kvm_lapic_set_reg(apic, APIC_ICR2, val);
1497 break; 1475 break;
1498 1476
1499 case APIC_LVT0: 1477 case APIC_LVT0:
@@ -1507,7 +1485,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1507 val |= APIC_LVT_MASKED; 1485 val |= APIC_LVT_MASKED;
1508 1486
1509 val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; 1487 val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
1510 apic_set_reg(apic, reg, val); 1488 kvm_lapic_set_reg(apic, reg, val);
1511 1489
1512 break; 1490 break;
1513 1491
@@ -1515,7 +1493,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1515 if (!kvm_apic_sw_enabled(apic)) 1493 if (!kvm_apic_sw_enabled(apic))
1516 val |= APIC_LVT_MASKED; 1494 val |= APIC_LVT_MASKED;
1517 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); 1495 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
1518 apic_set_reg(apic, APIC_LVTT, val); 1496 kvm_lapic_set_reg(apic, APIC_LVTT, val);
1519 apic_update_lvtt(apic); 1497 apic_update_lvtt(apic);
1520 break; 1498 break;
1521 1499
@@ -1524,14 +1502,14 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1524 break; 1502 break;
1525 1503
1526 hrtimer_cancel(&apic->lapic_timer.timer); 1504 hrtimer_cancel(&apic->lapic_timer.timer);
1527 apic_set_reg(apic, APIC_TMICT, val); 1505 kvm_lapic_set_reg(apic, APIC_TMICT, val);
1528 start_apic_timer(apic); 1506 start_apic_timer(apic);
1529 break; 1507 break;
1530 1508
1531 case APIC_TDCR: 1509 case APIC_TDCR:
1532 if (val & 4) 1510 if (val & 4)
1533 apic_debug("KVM_WRITE:TDCR %x\n", val); 1511 apic_debug("KVM_WRITE:TDCR %x\n", val);
1534 apic_set_reg(apic, APIC_TDCR, val); 1512 kvm_lapic_set_reg(apic, APIC_TDCR, val);
1535 update_divide_count(apic); 1513 update_divide_count(apic);
1536 break; 1514 break;
1537 1515
@@ -1544,7 +1522,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1544 1522
1545 case APIC_SELF_IPI: 1523 case APIC_SELF_IPI:
1546 if (apic_x2apic_mode(apic)) { 1524 if (apic_x2apic_mode(apic)) {
1547 apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff)); 1525 kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
1548 } else 1526 } else
1549 ret = 1; 1527 ret = 1;
1550 break; 1528 break;
@@ -1556,6 +1534,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1556 apic_debug("Local APIC Write to read-only register %x\n", reg); 1534 apic_debug("Local APIC Write to read-only register %x\n", reg);
1557 return ret; 1535 return ret;
1558} 1536}
1537EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
1559 1538
1560static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, 1539static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1561 gpa_t address, int len, const void *data) 1540 gpa_t address, int len, const void *data)
@@ -1585,14 +1564,14 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1585 apic_debug("%s: offset 0x%x with length 0x%x, and value is " 1564 apic_debug("%s: offset 0x%x with length 0x%x, and value is "
1586 "0x%x\n", __func__, offset, len, val); 1565 "0x%x\n", __func__, offset, len, val);
1587 1566
1588 apic_reg_write(apic, offset & 0xff0, val); 1567 kvm_lapic_reg_write(apic, offset & 0xff0, val);
1589 1568
1590 return 0; 1569 return 0;
1591} 1570}
1592 1571
1593void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) 1572void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
1594{ 1573{
1595 apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); 1574 kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
1596} 1575}
1597EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); 1576EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
1598 1577
@@ -1604,10 +1583,10 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
1604 /* hw has done the conditional check and inst decode */ 1583 /* hw has done the conditional check and inst decode */
1605 offset &= 0xff0; 1584 offset &= 0xff0;
1606 1585
1607 apic_reg_read(vcpu->arch.apic, offset, 4, &val); 1586 kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
1608 1587
1609 /* TODO: optimize to just emulate side effect w/o one more write */ 1588 /* TODO: optimize to just emulate side effect w/o one more write */
1610 apic_reg_write(vcpu->arch.apic, offset, val); 1589 kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
1611} 1590}
1612EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); 1591EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
1613 1592
@@ -1667,14 +1646,14 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
1667 struct kvm_lapic *apic = vcpu->arch.apic; 1646 struct kvm_lapic *apic = vcpu->arch.apic;
1668 1647
1669 apic_set_tpr(apic, ((cr8 & 0x0f) << 4) 1648 apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
1670 | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); 1649 | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
1671} 1650}
1672 1651
1673u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) 1652u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
1674{ 1653{
1675 u64 tpr; 1654 u64 tpr;
1676 1655
1677 tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); 1656 tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
1678 1657
1679 return (tpr & 0xf0) >> 4; 1658 return (tpr & 0xf0) >> 4;
1680} 1659}
@@ -1740,28 +1719,28 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
1740 kvm_apic_set_id(apic, vcpu->vcpu_id); 1719 kvm_apic_set_id(apic, vcpu->vcpu_id);
1741 kvm_apic_set_version(apic->vcpu); 1720 kvm_apic_set_version(apic->vcpu);
1742 1721
1743 for (i = 0; i < APIC_LVT_NUM; i++) 1722 for (i = 0; i < KVM_APIC_LVT_NUM; i++)
1744 apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); 1723 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
1745 apic_update_lvtt(apic); 1724 apic_update_lvtt(apic);
1746 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED)) 1725 if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
1747 apic_set_reg(apic, APIC_LVT0, 1726 kvm_lapic_set_reg(apic, APIC_LVT0,
1748 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); 1727 SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
1749 apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0)); 1728 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
1750 1729
1751 apic_set_reg(apic, APIC_DFR, 0xffffffffU); 1730 kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU);
1752 apic_set_spiv(apic, 0xff); 1731 apic_set_spiv(apic, 0xff);
1753 apic_set_reg(apic, APIC_TASKPRI, 0); 1732 kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
1754 if (!apic_x2apic_mode(apic)) 1733 if (!apic_x2apic_mode(apic))
1755 kvm_apic_set_ldr(apic, 0); 1734 kvm_apic_set_ldr(apic, 0);
1756 apic_set_reg(apic, APIC_ESR, 0); 1735 kvm_lapic_set_reg(apic, APIC_ESR, 0);
1757 apic_set_reg(apic, APIC_ICR, 0); 1736 kvm_lapic_set_reg(apic, APIC_ICR, 0);
1758 apic_set_reg(apic, APIC_ICR2, 0); 1737 kvm_lapic_set_reg(apic, APIC_ICR2, 0);
1759 apic_set_reg(apic, APIC_TDCR, 0); 1738 kvm_lapic_set_reg(apic, APIC_TDCR, 0);
1760 apic_set_reg(apic, APIC_TMICT, 0); 1739 kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1761 for (i = 0; i < 8; i++) { 1740 for (i = 0; i < 8; i++) {
1762 apic_set_reg(apic, APIC_IRR + 0x10 * i, 0); 1741 kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
1763 apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); 1742 kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
1764 apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); 1743 kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
1765 } 1744 }
1766 apic->irr_pending = vcpu->arch.apicv_active; 1745 apic->irr_pending = vcpu->arch.apicv_active;
1767 apic->isr_count = vcpu->arch.apicv_active ? 1 : 0; 1746 apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
@@ -1806,7 +1785,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
1806 1785
1807int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) 1786int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
1808{ 1787{
1809 u32 reg = kvm_apic_get_reg(apic, lvt_type); 1788 u32 reg = kvm_lapic_get_reg(apic, lvt_type);
1810 int vector, mode, trig_mode; 1789 int vector, mode, trig_mode;
1811 1790
1812 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { 1791 if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
@@ -1901,14 +1880,14 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
1901 apic_update_ppr(apic); 1880 apic_update_ppr(apic);
1902 highest_irr = apic_find_highest_irr(apic); 1881 highest_irr = apic_find_highest_irr(apic);
1903 if ((highest_irr == -1) || 1882 if ((highest_irr == -1) ||
1904 ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI))) 1883 ((highest_irr & 0xF0) <= kvm_lapic_get_reg(apic, APIC_PROCPRI)))
1905 return -1; 1884 return -1;
1906 return highest_irr; 1885 return highest_irr;
1907} 1886}
1908 1887
1909int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) 1888int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
1910{ 1889{
1911 u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0); 1890 u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
1912 int r = 0; 1891 int r = 0;
1913 1892
1914 if (!kvm_apic_hw_enabled(vcpu->arch.apic)) 1893 if (!kvm_apic_hw_enabled(vcpu->arch.apic))
@@ -1974,7 +1953,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
1974 apic_update_ppr(apic); 1953 apic_update_ppr(apic);
1975 hrtimer_cancel(&apic->lapic_timer.timer); 1954 hrtimer_cancel(&apic->lapic_timer.timer);
1976 apic_update_lvtt(apic); 1955 apic_update_lvtt(apic);
1977 apic_manage_nmi_watchdog(apic, kvm_apic_get_reg(apic, APIC_LVT0)); 1956 apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
1978 update_divide_count(apic); 1957 update_divide_count(apic);
1979 start_apic_timer(apic); 1958 start_apic_timer(apic);
1980 apic->irr_pending = true; 1959 apic->irr_pending = true;
@@ -1982,9 +1961,11 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
1982 1 : count_vectors(apic->regs + APIC_ISR); 1961 1 : count_vectors(apic->regs + APIC_ISR);
1983 apic->highest_isr_cache = -1; 1962 apic->highest_isr_cache = -1;
1984 if (vcpu->arch.apicv_active) { 1963 if (vcpu->arch.apicv_active) {
1964 if (kvm_x86_ops->apicv_post_state_restore)
1965 kvm_x86_ops->apicv_post_state_restore(vcpu);
1985 kvm_x86_ops->hwapic_irr_update(vcpu, 1966 kvm_x86_ops->hwapic_irr_update(vcpu,
1986 apic_find_highest_irr(apic)); 1967 apic_find_highest_irr(apic));
1987 kvm_x86_ops->hwapic_isr_update(vcpu->kvm, 1968 kvm_x86_ops->hwapic_isr_update(vcpu,
1988 apic_find_highest_isr(apic)); 1969 apic_find_highest_isr(apic));
1989 } 1970 }
1990 kvm_make_request(KVM_REQ_EVENT, vcpu); 1971 kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -2097,7 +2078,7 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2097 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) 2078 if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2098 return; 2079 return;
2099 2080
2100 tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff; 2081 tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2101 max_irr = apic_find_highest_irr(apic); 2082 max_irr = apic_find_highest_irr(apic);
2102 if (max_irr < 0) 2083 if (max_irr < 0)
2103 max_irr = 0; 2084 max_irr = 0;
@@ -2139,8 +2120,8 @@ int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2139 2120
2140 /* if this is ICR write vector before command */ 2121 /* if this is ICR write vector before command */
2141 if (reg == APIC_ICR) 2122 if (reg == APIC_ICR)
2142 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 2123 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2143 return apic_reg_write(apic, reg, (u32)data); 2124 return kvm_lapic_reg_write(apic, reg, (u32)data);
2144} 2125}
2145 2126
2146int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data) 2127int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
@@ -2157,10 +2138,10 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2157 return 1; 2138 return 1;
2158 } 2139 }
2159 2140
2160 if (apic_reg_read(apic, reg, 4, &low)) 2141 if (kvm_lapic_reg_read(apic, reg, 4, &low))
2161 return 1; 2142 return 1;
2162 if (reg == APIC_ICR) 2143 if (reg == APIC_ICR)
2163 apic_reg_read(apic, APIC_ICR2, 4, &high); 2144 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2164 2145
2165 *data = (((u64)high) << 32) | low; 2146 *data = (((u64)high) << 32) | low;
2166 2147
@@ -2176,8 +2157,8 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2176 2157
2177 /* if this is ICR write vector before command */ 2158 /* if this is ICR write vector before command */
2178 if (reg == APIC_ICR) 2159 if (reg == APIC_ICR)
2179 apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32)); 2160 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2180 return apic_reg_write(apic, reg, (u32)data); 2161 return kvm_lapic_reg_write(apic, reg, (u32)data);
2181} 2162}
2182 2163
2183int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) 2164int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
@@ -2188,10 +2169,10 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2188 if (!lapic_in_kernel(vcpu)) 2169 if (!lapic_in_kernel(vcpu))
2189 return 1; 2170 return 1;
2190 2171
2191 if (apic_reg_read(apic, reg, 4, &low)) 2172 if (kvm_lapic_reg_read(apic, reg, 4, &low))
2192 return 1; 2173 return 1;
2193 if (reg == APIC_ICR) 2174 if (reg == APIC_ICR)
2194 apic_reg_read(apic, APIC_ICR2, 4, &high); 2175 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2195 2176
2196 *data = (((u64)high) << 32) | low; 2177 *data = (((u64)high) << 32) | low;
2197 2178
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index f71183e502ee..891c6da7d4aa 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -7,6 +7,10 @@
7 7
8#define KVM_APIC_INIT 0 8#define KVM_APIC_INIT 0
9#define KVM_APIC_SIPI 1 9#define KVM_APIC_SIPI 1
10#define KVM_APIC_LVT_NUM 6
11
12#define KVM_APIC_SHORT_MASK 0xc0000
13#define KVM_APIC_DEST_MASK 0x800
10 14
11struct kvm_timer { 15struct kvm_timer {
12 struct hrtimer timer; 16 struct hrtimer timer;
@@ -59,6 +63,11 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
59void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); 63void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
60u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); 64u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
61void kvm_apic_set_version(struct kvm_vcpu *vcpu); 65void kvm_apic_set_version(struct kvm_vcpu *vcpu);
66int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val);
67int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
68 void *data);
69bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
70 int short_hand, unsigned int dest, int dest_mode);
62 71
63void __kvm_apic_update_irr(u32 *pir, void *regs); 72void __kvm_apic_update_irr(u32 *pir, void *regs);
64void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); 73void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
@@ -99,9 +108,32 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
99int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); 108int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
100void kvm_lapic_init(void); 109void kvm_lapic_init(void);
101 110
102static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off) 111#define VEC_POS(v) ((v) & (32 - 1))
112#define REG_POS(v) (((v) >> 5) << 4)
113
114static inline void kvm_lapic_set_vector(int vec, void *bitmap)
115{
116 set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
117}
118
119static inline void kvm_lapic_set_irr(int vec, struct kvm_lapic *apic)
120{
121 kvm_lapic_set_vector(vec, apic->regs + APIC_IRR);
122 /*
123 * irr_pending must be true if any interrupt is pending; set it after
124 * APIC_IRR to avoid race with apic_clear_irr
125 */
126 apic->irr_pending = true;
127}
128
129static inline u32 kvm_lapic_get_reg(struct kvm_lapic *apic, int reg_off)
130{
131 return *((u32 *) (apic->regs + reg_off));
132}
133
134static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
103{ 135{
104 return *((u32 *) (apic->regs + reg_off)); 136 *((u32 *) (apic->regs + reg_off)) = val;
105} 137}
106 138
107extern struct static_key kvm_no_apic_vcpu; 139extern struct static_key kvm_no_apic_vcpu;
@@ -169,7 +201,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
169 201
170static inline int kvm_apic_id(struct kvm_lapic *apic) 202static inline int kvm_apic_id(struct kvm_lapic *apic)
171{ 203{
172 return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; 204 return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff;
173} 205}
174 206
175bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); 207bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 38c0c32926c9..24e800116ab4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1909,18 +1909,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
1909 * since it has been deleted from active_mmu_pages but still can be found 1909 * since it has been deleted from active_mmu_pages but still can be found
1910 * at hast list. 1910 * at hast list.
1911 * 1911 *
1912 * for_each_gfn_indirect_valid_sp has skipped that kind of page and 1912 * for_each_gfn_valid_sp() has skipped that kind of pages.
1913 * kvm_mmu_get_page(), the only user of for_each_gfn_sp(), has skipped
1914 * all the obsolete pages.
1915 */ 1913 */
1916#define for_each_gfn_sp(_kvm, _sp, _gfn) \ 1914#define for_each_gfn_valid_sp(_kvm, _sp, _gfn) \
1917 hlist_for_each_entry(_sp, \ 1915 hlist_for_each_entry(_sp, \
1918 &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ 1916 &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
1919 if ((_sp)->gfn != (_gfn)) {} else 1917 if ((_sp)->gfn != (_gfn) || is_obsolete_sp((_kvm), (_sp)) \
1918 || (_sp)->role.invalid) {} else
1920 1919
1921#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ 1920#define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \
1922 for_each_gfn_sp(_kvm, _sp, _gfn) \ 1921 for_each_gfn_valid_sp(_kvm, _sp, _gfn) \
1923 if ((_sp)->role.direct || (_sp)->role.invalid) {} else 1922 if ((_sp)->role.direct) {} else
1924 1923
1925/* @sp->gfn should be write-protected at the call site */ 1924/* @sp->gfn should be write-protected at the call site */
1926static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 1925static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
@@ -1961,6 +1960,11 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { }
1961static void mmu_audit_disable(void) { } 1960static void mmu_audit_disable(void) { }
1962#endif 1961#endif
1963 1962
1963static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
1964{
1965 return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
1966}
1967
1964static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, 1968static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
1965 struct list_head *invalid_list) 1969 struct list_head *invalid_list)
1966{ 1970{
@@ -2105,11 +2109,6 @@ static void clear_sp_write_flooding_count(u64 *spte)
2105 __clear_sp_write_flooding_count(sp); 2109 __clear_sp_write_flooding_count(sp);
2106} 2110}
2107 2111
2108static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
2109{
2110 return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
2111}
2112
2113static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, 2112static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
2114 gfn_t gfn, 2113 gfn_t gfn,
2115 gva_t gaddr, 2114 gva_t gaddr,
@@ -2136,10 +2135,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
2136 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; 2135 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
2137 role.quadrant = quadrant; 2136 role.quadrant = quadrant;
2138 } 2137 }
2139 for_each_gfn_sp(vcpu->kvm, sp, gfn) { 2138 for_each_gfn_valid_sp(vcpu->kvm, sp, gfn) {
2140 if (is_obsolete_sp(vcpu->kvm, sp))
2141 continue;
2142
2143 if (!need_sync && sp->unsync) 2139 if (!need_sync && sp->unsync)
2144 need_sync = true; 2140 need_sync = true;
2145 2141
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 3f8c732117ec..c146f3c262c3 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -44,8 +44,6 @@ static bool msr_mtrr_valid(unsigned msr)
44 case MSR_MTRRdefType: 44 case MSR_MTRRdefType:
45 case MSR_IA32_CR_PAT: 45 case MSR_IA32_CR_PAT:
46 return true; 46 return true;
47 case 0x2f8:
48 return true;
49 } 47 }
50 return false; 48 return false;
51} 49}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index fafd720ce10a..2214214c786b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -14,6 +14,9 @@
14 * the COPYING file in the top-level directory. 14 * the COPYING file in the top-level directory.
15 * 15 *
16 */ 16 */
17
18#define pr_fmt(fmt) "SVM: " fmt
19
17#include <linux/kvm_host.h> 20#include <linux/kvm_host.h>
18 21
19#include "irq.h" 22#include "irq.h"
@@ -32,6 +35,7 @@
32#include <linux/trace_events.h> 35#include <linux/trace_events.h>
33#include <linux/slab.h> 36#include <linux/slab.h>
34 37
38#include <asm/apic.h>
35#include <asm/perf_event.h> 39#include <asm/perf_event.h>
36#include <asm/tlbflush.h> 40#include <asm/tlbflush.h>
37#include <asm/desc.h> 41#include <asm/desc.h>
@@ -68,6 +72,8 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
68#define SVM_FEATURE_DECODE_ASSIST (1 << 7) 72#define SVM_FEATURE_DECODE_ASSIST (1 << 7)
69#define SVM_FEATURE_PAUSE_FILTER (1 << 10) 73#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
70 74
75#define SVM_AVIC_DOORBELL 0xc001011b
76
71#define NESTED_EXIT_HOST 0 /* Exit handled on host level */ 77#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
72#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ 78#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
73#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */ 79#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
@@ -78,6 +84,18 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
78#define TSC_RATIO_MIN 0x0000000000000001ULL 84#define TSC_RATIO_MIN 0x0000000000000001ULL
79#define TSC_RATIO_MAX 0x000000ffffffffffULL 85#define TSC_RATIO_MAX 0x000000ffffffffffULL
80 86
87#define AVIC_HPA_MASK ~((0xFFFULL << 52) || 0xFFF)
88
89/*
90 * 0xff is broadcast, so the max index allowed for physical APIC ID
91 * table is 0xfe. APIC IDs above 0xff are reserved.
92 */
93#define AVIC_MAX_PHYSICAL_ID_COUNT 255
94
95#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
96#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
97#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
98
81static bool erratum_383_found __read_mostly; 99static bool erratum_383_found __read_mostly;
82 100
83static const u32 host_save_user_msrs[] = { 101static const u32 host_save_user_msrs[] = {
@@ -162,8 +180,21 @@ struct vcpu_svm {
162 180
163 /* cached guest cpuid flags for faster access */ 181 /* cached guest cpuid flags for faster access */
164 bool nrips_enabled : 1; 182 bool nrips_enabled : 1;
183
184 u32 ldr_reg;
185 struct page *avic_backing_page;
186 u64 *avic_physical_id_cache;
187 bool avic_is_running;
165}; 188};
166 189
190#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
191#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
192
193#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
194#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
195#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
196#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
197
167static DEFINE_PER_CPU(u64, current_tsc_ratio); 198static DEFINE_PER_CPU(u64, current_tsc_ratio);
168#define TSC_RATIO_DEFAULT 0x0100000000ULL 199#define TSC_RATIO_DEFAULT 0x0100000000ULL
169 200
@@ -205,6 +236,10 @@ module_param(npt, int, S_IRUGO);
205static int nested = true; 236static int nested = true;
206module_param(nested, int, S_IRUGO); 237module_param(nested, int, S_IRUGO);
207 238
239/* enable / disable AVIC */
240static int avic;
241module_param(avic, int, S_IRUGO);
242
208static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); 243static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
209static void svm_flush_tlb(struct kvm_vcpu *vcpu); 244static void svm_flush_tlb(struct kvm_vcpu *vcpu);
210static void svm_complete_interrupts(struct vcpu_svm *svm); 245static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -228,12 +263,18 @@ enum {
228 VMCB_SEG, /* CS, DS, SS, ES, CPL */ 263 VMCB_SEG, /* CS, DS, SS, ES, CPL */
229 VMCB_CR2, /* CR2 only */ 264 VMCB_CR2, /* CR2 only */
230 VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */ 265 VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
266 VMCB_AVIC, /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE,
267 * AVIC PHYSICAL_TABLE pointer,
268 * AVIC LOGICAL_TABLE pointer
269 */
231 VMCB_DIRTY_MAX, 270 VMCB_DIRTY_MAX,
232}; 271};
233 272
234/* TPR and CR2 are always written before VMRUN */ 273/* TPR and CR2 are always written before VMRUN */
235#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2)) 274#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
236 275
276#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
277
237static inline void mark_all_dirty(struct vmcb *vmcb) 278static inline void mark_all_dirty(struct vmcb *vmcb)
238{ 279{
239 vmcb->control.clean = 0; 280 vmcb->control.clean = 0;
@@ -255,6 +296,23 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
255 return container_of(vcpu, struct vcpu_svm, vcpu); 296 return container_of(vcpu, struct vcpu_svm, vcpu);
256} 297}
257 298
299static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
300{
301 svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
302 mark_dirty(svm->vmcb, VMCB_AVIC);
303}
304
305static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
306{
307 struct vcpu_svm *svm = to_svm(vcpu);
308 u64 *entry = svm->avic_physical_id_cache;
309
310 if (!entry)
311 return false;
312
313 return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
314}
315
258static void recalc_intercepts(struct vcpu_svm *svm) 316static void recalc_intercepts(struct vcpu_svm *svm)
259{ 317{
260 struct vmcb_control_area *c, *h; 318 struct vmcb_control_area *c, *h;
@@ -923,6 +981,12 @@ static __init int svm_hardware_setup(void)
923 } else 981 } else
924 kvm_disable_tdp(); 982 kvm_disable_tdp();
925 983
984 if (avic && (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)))
985 avic = false;
986
987 if (avic)
988 pr_info("AVIC enabled\n");
989
926 return 0; 990 return 0;
927 991
928err: 992err:
@@ -1000,6 +1064,22 @@ static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
1000 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 1064 mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
1001} 1065}
1002 1066
1067static void avic_init_vmcb(struct vcpu_svm *svm)
1068{
1069 struct vmcb *vmcb = svm->vmcb;
1070 struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
1071 phys_addr_t bpa = page_to_phys(svm->avic_backing_page);
1072 phys_addr_t lpa = page_to_phys(vm_data->avic_logical_id_table_page);
1073 phys_addr_t ppa = page_to_phys(vm_data->avic_physical_id_table_page);
1074
1075 vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
1076 vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
1077 vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
1078 vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
1079 vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
1080 svm->vcpu.arch.apicv_active = true;
1081}
1082
1003static void init_vmcb(struct vcpu_svm *svm) 1083static void init_vmcb(struct vcpu_svm *svm)
1004{ 1084{
1005 struct vmcb_control_area *control = &svm->vmcb->control; 1085 struct vmcb_control_area *control = &svm->vmcb->control;
@@ -1014,7 +1094,8 @@ static void init_vmcb(struct vcpu_svm *svm)
1014 set_cr_intercept(svm, INTERCEPT_CR0_WRITE); 1094 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1015 set_cr_intercept(svm, INTERCEPT_CR3_WRITE); 1095 set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1016 set_cr_intercept(svm, INTERCEPT_CR4_WRITE); 1096 set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
1017 set_cr_intercept(svm, INTERCEPT_CR8_WRITE); 1097 if (!kvm_vcpu_apicv_active(&svm->vcpu))
1098 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
1018 1099
1019 set_dr_intercepts(svm); 1100 set_dr_intercepts(svm);
1020 1101
@@ -1110,9 +1191,197 @@ static void init_vmcb(struct vcpu_svm *svm)
1110 set_intercept(svm, INTERCEPT_PAUSE); 1191 set_intercept(svm, INTERCEPT_PAUSE);
1111 } 1192 }
1112 1193
1194 if (avic)
1195 avic_init_vmcb(svm);
1196
1113 mark_all_dirty(svm->vmcb); 1197 mark_all_dirty(svm->vmcb);
1114 1198
1115 enable_gif(svm); 1199 enable_gif(svm);
1200
1201}
1202
1203static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, int index)
1204{
1205 u64 *avic_physical_id_table;
1206 struct kvm_arch *vm_data = &vcpu->kvm->arch;
1207
1208 if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
1209 return NULL;
1210
1211 avic_physical_id_table = page_address(vm_data->avic_physical_id_table_page);
1212
1213 return &avic_physical_id_table[index];
1214}
1215
1216/**
1217 * Note:
1218 * AVIC hardware walks the nested page table to check permissions,
1219 * but does not use the SPA address specified in the leaf page
1220 * table entry since it uses address in the AVIC_BACKING_PAGE pointer
1221 * field of the VMCB. Therefore, we set up the
1222 * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
1223 */
1224static int avic_init_access_page(struct kvm_vcpu *vcpu)
1225{
1226 struct kvm *kvm = vcpu->kvm;
1227 int ret;
1228
1229 if (kvm->arch.apic_access_page_done)
1230 return 0;
1231
1232 ret = x86_set_memory_region(kvm,
1233 APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
1234 APIC_DEFAULT_PHYS_BASE,
1235 PAGE_SIZE);
1236 if (ret)
1237 return ret;
1238
1239 kvm->arch.apic_access_page_done = true;
1240 return 0;
1241}
1242
1243static int avic_init_backing_page(struct kvm_vcpu *vcpu)
1244{
1245 int ret;
1246 u64 *entry, new_entry;
1247 int id = vcpu->vcpu_id;
1248 struct vcpu_svm *svm = to_svm(vcpu);
1249
1250 ret = avic_init_access_page(vcpu);
1251 if (ret)
1252 return ret;
1253
1254 if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
1255 return -EINVAL;
1256
1257 if (!svm->vcpu.arch.apic->regs)
1258 return -EINVAL;
1259
1260 svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
1261
1262 /* Setting AVIC backing page address in the phy APIC ID table */
1263 entry = avic_get_physical_id_entry(vcpu, id);
1264 if (!entry)
1265 return -EINVAL;
1266
1267 new_entry = READ_ONCE(*entry);
1268 new_entry = (page_to_phys(svm->avic_backing_page) &
1269 AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
1270 AVIC_PHYSICAL_ID_ENTRY_VALID_MASK;
1271 WRITE_ONCE(*entry, new_entry);
1272
1273 svm->avic_physical_id_cache = entry;
1274
1275 return 0;
1276}
1277
1278static void avic_vm_destroy(struct kvm *kvm)
1279{
1280 struct kvm_arch *vm_data = &kvm->arch;
1281
1282 if (vm_data->avic_logical_id_table_page)
1283 __free_page(vm_data->avic_logical_id_table_page);
1284 if (vm_data->avic_physical_id_table_page)
1285 __free_page(vm_data->avic_physical_id_table_page);
1286}
1287
1288static int avic_vm_init(struct kvm *kvm)
1289{
1290 int err = -ENOMEM;
1291 struct kvm_arch *vm_data = &kvm->arch;
1292 struct page *p_page;
1293 struct page *l_page;
1294
1295 if (!avic)
1296 return 0;
1297
1298 /* Allocating physical APIC ID table (4KB) */
1299 p_page = alloc_page(GFP_KERNEL);
1300 if (!p_page)
1301 goto free_avic;
1302
1303 vm_data->avic_physical_id_table_page = p_page;
1304 clear_page(page_address(p_page));
1305
1306 /* Allocating logical APIC ID table (4KB) */
1307 l_page = alloc_page(GFP_KERNEL);
1308 if (!l_page)
1309 goto free_avic;
1310
1311 vm_data->avic_logical_id_table_page = l_page;
1312 clear_page(page_address(l_page));
1313
1314 return 0;
1315
1316free_avic:
1317 avic_vm_destroy(kvm);
1318 return err;
1319}
1320
1321/**
1322 * This function is called during VCPU halt/unhalt.
1323 */
1324static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
1325{
1326 u64 entry;
1327 int h_physical_id = __default_cpu_present_to_apicid(vcpu->cpu);
1328 struct vcpu_svm *svm = to_svm(vcpu);
1329
1330 if (!kvm_vcpu_apicv_active(vcpu))
1331 return;
1332
1333 svm->avic_is_running = is_run;
1334
1335 /* ID = 0xff (broadcast), ID > 0xff (reserved) */
1336 if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
1337 return;
1338
1339 entry = READ_ONCE(*(svm->avic_physical_id_cache));
1340 WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK));
1341
1342 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1343 if (is_run)
1344 entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1345 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
1346}
1347
1348static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1349{
1350 u64 entry;
1351 /* ID = 0xff (broadcast), ID > 0xff (reserved) */
1352 int h_physical_id = __default_cpu_present_to_apicid(cpu);
1353 struct vcpu_svm *svm = to_svm(vcpu);
1354
1355 if (!kvm_vcpu_apicv_active(vcpu))
1356 return;
1357
1358 if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
1359 return;
1360
1361 entry = READ_ONCE(*(svm->avic_physical_id_cache));
1362 WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
1363
1364 entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
1365 entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
1366
1367 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1368 if (svm->avic_is_running)
1369 entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1370
1371 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
1372}
1373
1374static void avic_vcpu_put(struct kvm_vcpu *vcpu)
1375{
1376 u64 entry;
1377 struct vcpu_svm *svm = to_svm(vcpu);
1378
1379 if (!kvm_vcpu_apicv_active(vcpu))
1380 return;
1381
1382 entry = READ_ONCE(*(svm->avic_physical_id_cache));
1383 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1384 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
1116} 1385}
1117 1386
1118static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) 1387static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -1131,6 +1400,9 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
1131 1400
1132 kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); 1401 kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
1133 kvm_register_write(vcpu, VCPU_REGS_RDX, eax); 1402 kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
1403
1404 if (kvm_vcpu_apicv_active(vcpu) && !init_event)
1405 avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
1134} 1406}
1135 1407
1136static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) 1408static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
@@ -1169,6 +1441,17 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1169 if (!hsave_page) 1441 if (!hsave_page)
1170 goto free_page3; 1442 goto free_page3;
1171 1443
1444 if (avic) {
1445 err = avic_init_backing_page(&svm->vcpu);
1446 if (err)
1447 goto free_page4;
1448 }
1449
1450 /* We initialize this flag to true to make sure that the is_running
1451 * bit would be set the first time the vcpu is loaded.
1452 */
1453 svm->avic_is_running = true;
1454
1172 svm->nested.hsave = page_address(hsave_page); 1455 svm->nested.hsave = page_address(hsave_page);
1173 1456
1174 svm->msrpm = page_address(msrpm_pages); 1457 svm->msrpm = page_address(msrpm_pages);
@@ -1187,6 +1470,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1187 1470
1188 return &svm->vcpu; 1471 return &svm->vcpu;
1189 1472
1473free_page4:
1474 __free_page(hsave_page);
1190free_page3: 1475free_page3:
1191 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); 1476 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
1192free_page2: 1477free_page2:
@@ -1243,6 +1528,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1243 /* This assumes that the kernel never uses MSR_TSC_AUX */ 1528 /* This assumes that the kernel never uses MSR_TSC_AUX */
1244 if (static_cpu_has(X86_FEATURE_RDTSCP)) 1529 if (static_cpu_has(X86_FEATURE_RDTSCP))
1245 wrmsrl(MSR_TSC_AUX, svm->tsc_aux); 1530 wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
1531
1532 avic_vcpu_load(vcpu, cpu);
1246} 1533}
1247 1534
1248static void svm_vcpu_put(struct kvm_vcpu *vcpu) 1535static void svm_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1250,6 +1537,8 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1250 struct vcpu_svm *svm = to_svm(vcpu); 1537 struct vcpu_svm *svm = to_svm(vcpu);
1251 int i; 1538 int i;
1252 1539
1540 avic_vcpu_put(vcpu);
1541
1253 ++vcpu->stat.host_state_reload; 1542 ++vcpu->stat.host_state_reload;
1254 kvm_load_ldt(svm->host.ldt); 1543 kvm_load_ldt(svm->host.ldt);
1255#ifdef CONFIG_X86_64 1544#ifdef CONFIG_X86_64
@@ -1265,6 +1554,16 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1265 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 1554 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1266} 1555}
1267 1556
1557static void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
1558{
1559 avic_set_running(vcpu, false);
1560}
1561
1562static void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
1563{
1564 avic_set_running(vcpu, true);
1565}
1566
1268static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) 1567static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1269{ 1568{
1270 return to_svm(vcpu)->vmcb->save.rflags; 1569 return to_svm(vcpu)->vmcb->save.rflags;
@@ -2673,10 +2972,11 @@ static int clgi_interception(struct vcpu_svm *svm)
2673 disable_gif(svm); 2972 disable_gif(svm);
2674 2973
2675 /* After a CLGI no interrupts should come */ 2974 /* After a CLGI no interrupts should come */
2676 svm_clear_vintr(svm); 2975 if (!kvm_vcpu_apicv_active(&svm->vcpu)) {
2677 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 2976 svm_clear_vintr(svm);
2678 2977 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2679 mark_dirty(svm->vmcb, VMCB_INTR); 2978 mark_dirty(svm->vmcb, VMCB_INTR);
2979 }
2680 2980
2681 return 1; 2981 return 1;
2682} 2982}
@@ -3212,6 +3512,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
3212 case MSR_VM_IGNNE: 3512 case MSR_VM_IGNNE:
3213 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); 3513 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
3214 break; 3514 break;
3515 case MSR_IA32_APICBASE:
3516 if (kvm_vcpu_apicv_active(vcpu))
3517 avic_update_vapic_bar(to_svm(vcpu), data);
3518 /* Follow through */
3215 default: 3519 default:
3216 return kvm_set_msr_common(vcpu, msr); 3520 return kvm_set_msr_common(vcpu, msr);
3217 } 3521 }
@@ -3281,6 +3585,278 @@ static int mwait_interception(struct vcpu_svm *svm)
3281 return nop_interception(svm); 3585 return nop_interception(svm);
3282} 3586}
3283 3587
3588enum avic_ipi_failure_cause {
3589 AVIC_IPI_FAILURE_INVALID_INT_TYPE,
3590 AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
3591 AVIC_IPI_FAILURE_INVALID_TARGET,
3592 AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
3593};
3594
3595static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
3596{
3597 u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
3598 u32 icrl = svm->vmcb->control.exit_info_1;
3599 u32 id = svm->vmcb->control.exit_info_2 >> 32;
3600 u32 index = svm->vmcb->control.exit_info_2 && 0xFF;
3601 struct kvm_lapic *apic = svm->vcpu.arch.apic;
3602
3603 trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
3604
3605 switch (id) {
3606 case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
3607 /*
3608 * AVIC hardware handles the generation of
3609 * IPIs when the specified Message Type is Fixed
3610 * (also known as fixed delivery mode) and
3611 * the Trigger Mode is edge-triggered. The hardware
3612 * also supports self and broadcast delivery modes
3613 * specified via the Destination Shorthand(DSH)
3614 * field of the ICRL. Logical and physical APIC ID
3615 * formats are supported. All other IPI types cause
3616 * a #VMEXIT, which needs to emulated.
3617 */
3618 kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
3619 kvm_lapic_reg_write(apic, APIC_ICR, icrl);
3620 break;
3621 case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
3622 int i;
3623 struct kvm_vcpu *vcpu;
3624 struct kvm *kvm = svm->vcpu.kvm;
3625 struct kvm_lapic *apic = svm->vcpu.arch.apic;
3626
3627 /*
3628 * At this point, we expect that the AVIC HW has already
3629 * set the appropriate IRR bits on the valid target
3630 * vcpus. So, we just need to kick the appropriate vcpu.
3631 */
3632 kvm_for_each_vcpu(i, vcpu, kvm) {
3633 bool m = kvm_apic_match_dest(vcpu, apic,
3634 icrl & KVM_APIC_SHORT_MASK,
3635 GET_APIC_DEST_FIELD(icrh),
3636 icrl & KVM_APIC_DEST_MASK);
3637
3638 if (m && !avic_vcpu_is_running(vcpu))
3639 kvm_vcpu_wake_up(vcpu);
3640 }
3641 break;
3642 }
3643 case AVIC_IPI_FAILURE_INVALID_TARGET:
3644 break;
3645 case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
3646 WARN_ONCE(1, "Invalid backing page\n");
3647 break;
3648 default:
3649 pr_err("Unknown IPI interception\n");
3650 }
3651
3652 return 1;
3653}
3654
3655static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
3656{
3657 struct kvm_arch *vm_data = &vcpu->kvm->arch;
3658 int index;
3659 u32 *logical_apic_id_table;
3660 int dlid = GET_APIC_LOGICAL_ID(ldr);
3661
3662 if (!dlid)
3663 return NULL;
3664
3665 if (flat) { /* flat */
3666 index = ffs(dlid) - 1;
3667 if (index > 7)
3668 return NULL;
3669 } else { /* cluster */
3670 int cluster = (dlid & 0xf0) >> 4;
3671 int apic = ffs(dlid & 0x0f) - 1;
3672
3673 if ((apic < 0) || (apic > 7) ||
3674 (cluster >= 0xf))
3675 return NULL;
3676 index = (cluster << 2) + apic;
3677 }
3678
3679 logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page);
3680
3681 return &logical_apic_id_table[index];
3682}
3683
3684static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr,
3685 bool valid)
3686{
3687 bool flat;
3688 u32 *entry, new_entry;
3689
3690 flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
3691 entry = avic_get_logical_id_entry(vcpu, ldr, flat);
3692 if (!entry)
3693 return -EINVAL;
3694
3695 new_entry = READ_ONCE(*entry);
3696 new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
3697 new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
3698 if (valid)
3699 new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
3700 else
3701 new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
3702 WRITE_ONCE(*entry, new_entry);
3703
3704 return 0;
3705}
3706
3707static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
3708{
3709 int ret;
3710 struct vcpu_svm *svm = to_svm(vcpu);
3711 u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
3712
3713 if (!ldr)
3714 return 1;
3715
3716 ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true);
3717 if (ret && svm->ldr_reg) {
3718 avic_ldr_write(vcpu, 0, svm->ldr_reg, false);
3719 svm->ldr_reg = 0;
3720 } else {
3721 svm->ldr_reg = ldr;
3722 }
3723 return ret;
3724}
3725
3726static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
3727{
3728 u64 *old, *new;
3729 struct vcpu_svm *svm = to_svm(vcpu);
3730 u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID);
3731 u32 id = (apic_id_reg >> 24) & 0xff;
3732
3733 if (vcpu->vcpu_id == id)
3734 return 0;
3735
3736 old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
3737 new = avic_get_physical_id_entry(vcpu, id);
3738 if (!new || !old)
3739 return 1;
3740
3741 /* We need to move physical_id_entry to new offset */
3742 *new = *old;
3743 *old = 0ULL;
3744 to_svm(vcpu)->avic_physical_id_cache = new;
3745
3746 /*
3747 * Also update the guest physical APIC ID in the logical
3748 * APIC ID table entry if already setup the LDR.
3749 */
3750 if (svm->ldr_reg)
3751 avic_handle_ldr_update(vcpu);
3752
3753 return 0;
3754}
3755
3756static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
3757{
3758 struct vcpu_svm *svm = to_svm(vcpu);
3759 struct kvm_arch *vm_data = &vcpu->kvm->arch;
3760 u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
3761 u32 mod = (dfr >> 28) & 0xf;
3762
3763 /*
3764 * We assume that all local APICs are using the same type.
3765 * If this changes, we need to flush the AVIC logical
3766 * APID id table.
3767 */
3768 if (vm_data->ldr_mode == mod)
3769 return 0;
3770
3771 clear_page(page_address(vm_data->avic_logical_id_table_page));
3772 vm_data->ldr_mode = mod;
3773
3774 if (svm->ldr_reg)
3775 avic_handle_ldr_update(vcpu);
3776 return 0;
3777}
3778
3779static int avic_unaccel_trap_write(struct vcpu_svm *svm)
3780{
3781 struct kvm_lapic *apic = svm->vcpu.arch.apic;
3782 u32 offset = svm->vmcb->control.exit_info_1 &
3783 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
3784
3785 switch (offset) {
3786 case APIC_ID:
3787 if (avic_handle_apic_id_update(&svm->vcpu))
3788 return 0;
3789 break;
3790 case APIC_LDR:
3791 if (avic_handle_ldr_update(&svm->vcpu))
3792 return 0;
3793 break;
3794 case APIC_DFR:
3795 avic_handle_dfr_update(&svm->vcpu);
3796 break;
3797 default:
3798 break;
3799 }
3800
3801 kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
3802
3803 return 1;
3804}
3805
3806static bool is_avic_unaccelerated_access_trap(u32 offset)
3807{
3808 bool ret = false;
3809
3810 switch (offset) {
3811 case APIC_ID:
3812 case APIC_EOI:
3813 case APIC_RRR:
3814 case APIC_LDR:
3815 case APIC_DFR:
3816 case APIC_SPIV:
3817 case APIC_ESR:
3818 case APIC_ICR:
3819 case APIC_LVTT:
3820 case APIC_LVTTHMR:
3821 case APIC_LVTPC:
3822 case APIC_LVT0:
3823 case APIC_LVT1:
3824 case APIC_LVTERR:
3825 case APIC_TMICT:
3826 case APIC_TDCR:
3827 ret = true;
3828 break;
3829 default:
3830 break;
3831 }
3832 return ret;
3833}
3834
3835static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
3836{
3837 int ret = 0;
3838 u32 offset = svm->vmcb->control.exit_info_1 &
3839 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
3840 u32 vector = svm->vmcb->control.exit_info_2 &
3841 AVIC_UNACCEL_ACCESS_VECTOR_MASK;
3842 bool write = (svm->vmcb->control.exit_info_1 >> 32) &
3843 AVIC_UNACCEL_ACCESS_WRITE_MASK;
3844 bool trap = is_avic_unaccelerated_access_trap(offset);
3845
3846 trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
3847 trap, write, vector);
3848 if (trap) {
3849 /* Handling Trap */
3850 WARN_ONCE(!write, "svm: Handling trap read.\n");
3851 ret = avic_unaccel_trap_write(svm);
3852 } else {
3853 /* Handling Fault */
3854 ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
3855 }
3856
3857 return ret;
3858}
3859
3284static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { 3860static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
3285 [SVM_EXIT_READ_CR0] = cr_interception, 3861 [SVM_EXIT_READ_CR0] = cr_interception,
3286 [SVM_EXIT_READ_CR3] = cr_interception, 3862 [SVM_EXIT_READ_CR3] = cr_interception,
@@ -3344,6 +3920,8 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
3344 [SVM_EXIT_XSETBV] = xsetbv_interception, 3920 [SVM_EXIT_XSETBV] = xsetbv_interception,
3345 [SVM_EXIT_NPF] = pf_interception, 3921 [SVM_EXIT_NPF] = pf_interception,
3346 [SVM_EXIT_RSM] = emulate_on_interception, 3922 [SVM_EXIT_RSM] = emulate_on_interception,
3923 [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
3924 [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
3347}; 3925};
3348 3926
3349static void dump_vmcb(struct kvm_vcpu *vcpu) 3927static void dump_vmcb(struct kvm_vcpu *vcpu)
@@ -3375,10 +3953,14 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
3375 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); 3953 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
3376 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); 3954 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
3377 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); 3955 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
3956 pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
3378 pr_err("%-20s%08x\n", "event_inj:", control->event_inj); 3957 pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
3379 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); 3958 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
3380 pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); 3959 pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
3381 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); 3960 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
3961 pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
3962 pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
3963 pr_err("%-20s%016llx\n", "avic_physical_id:", control->avic_physical_id);
3382 pr_err("VMCB State Save Area:\n"); 3964 pr_err("VMCB State Save Area:\n");
3383 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3965 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3384 "es:", 3966 "es:",
@@ -3562,6 +4144,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
3562{ 4144{
3563 struct vmcb_control_area *control; 4145 struct vmcb_control_area *control;
3564 4146
4147 /* The following fields are ignored when AVIC is enabled */
3565 control = &svm->vmcb->control; 4148 control = &svm->vmcb->control;
3566 control->int_vector = irq; 4149 control->int_vector = irq;
3567 control->int_ctl &= ~V_INTR_PRIO_MASK; 4150 control->int_ctl &= ~V_INTR_PRIO_MASK;
@@ -3583,11 +4166,17 @@ static void svm_set_irq(struct kvm_vcpu *vcpu)
3583 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; 4166 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
3584} 4167}
3585 4168
4169static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
4170{
4171 return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
4172}
4173
3586static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 4174static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3587{ 4175{
3588 struct vcpu_svm *svm = to_svm(vcpu); 4176 struct vcpu_svm *svm = to_svm(vcpu);
3589 4177
3590 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) 4178 if (svm_nested_virtualize_tpr(vcpu) ||
4179 kvm_vcpu_apicv_active(vcpu))
3591 return; 4180 return;
3592 4181
3593 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); 4182 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
@@ -3606,11 +4195,28 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
3606 4195
3607static bool svm_get_enable_apicv(void) 4196static bool svm_get_enable_apicv(void)
3608{ 4197{
3609 return false; 4198 return avic;
4199}
4200
4201static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
4202{
3610} 4203}
3611 4204
4205static void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
4206{
4207}
4208
4209/* Note: Currently only used by Hyper-V. */
3612static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) 4210static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
3613{ 4211{
4212 struct vcpu_svm *svm = to_svm(vcpu);
4213 struct vmcb *vmcb = svm->vmcb;
4214
4215 if (!avic)
4216 return;
4217
4218 vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
4219 mark_dirty(vmcb, VMCB_INTR);
3614} 4220}
3615 4221
3616static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 4222static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
@@ -3623,6 +4229,18 @@ static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
3623 return; 4229 return;
3624} 4230}
3625 4231
4232static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
4233{
4234 kvm_lapic_set_irr(vec, vcpu->arch.apic);
4235 smp_mb__after_atomic();
4236
4237 if (avic_vcpu_is_running(vcpu))
4238 wrmsrl(SVM_AVIC_DOORBELL,
4239 __default_cpu_present_to_apicid(vcpu->cpu));
4240 else
4241 kvm_vcpu_wake_up(vcpu);
4242}
4243
3626static int svm_nmi_allowed(struct kvm_vcpu *vcpu) 4244static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
3627{ 4245{
3628 struct vcpu_svm *svm = to_svm(vcpu); 4246 struct vcpu_svm *svm = to_svm(vcpu);
@@ -3677,6 +4295,9 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
3677{ 4295{
3678 struct vcpu_svm *svm = to_svm(vcpu); 4296 struct vcpu_svm *svm = to_svm(vcpu);
3679 4297
4298 if (kvm_vcpu_apicv_active(vcpu))
4299 return;
4300
3680 /* 4301 /*
3681 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes 4302 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
3682 * 1, because that's a separate STGI/VMRUN intercept. The next time we 4303 * 1, because that's a separate STGI/VMRUN intercept. The next time we
@@ -3728,7 +4349,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
3728{ 4349{
3729 struct vcpu_svm *svm = to_svm(vcpu); 4350 struct vcpu_svm *svm = to_svm(vcpu);
3730 4351
3731 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) 4352 if (svm_nested_virtualize_tpr(vcpu))
3732 return; 4353 return;
3733 4354
3734 if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) { 4355 if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
@@ -3742,7 +4363,8 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
3742 struct vcpu_svm *svm = to_svm(vcpu); 4363 struct vcpu_svm *svm = to_svm(vcpu);
3743 u64 cr8; 4364 u64 cr8;
3744 4365
3745 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) 4366 if (svm_nested_virtualize_tpr(vcpu) ||
4367 kvm_vcpu_apicv_active(vcpu))
3746 return; 4368 return;
3747 4369
3748 cr8 = kvm_get_cr8(vcpu); 4370 cr8 = kvm_get_cr8(vcpu);
@@ -4045,14 +4667,26 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
4045static void svm_cpuid_update(struct kvm_vcpu *vcpu) 4667static void svm_cpuid_update(struct kvm_vcpu *vcpu)
4046{ 4668{
4047 struct vcpu_svm *svm = to_svm(vcpu); 4669 struct vcpu_svm *svm = to_svm(vcpu);
4670 struct kvm_cpuid_entry2 *entry;
4048 4671
4049 /* Update nrips enabled cache */ 4672 /* Update nrips enabled cache */
4050 svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu); 4673 svm->nrips_enabled = !!guest_cpuid_has_nrips(&svm->vcpu);
4674
4675 if (!kvm_vcpu_apicv_active(vcpu))
4676 return;
4677
4678 entry = kvm_find_cpuid_entry(vcpu, 1, 0);
4679 if (entry)
4680 entry->ecx &= ~bit(X86_FEATURE_X2APIC);
4051} 4681}
4052 4682
4053static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) 4683static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
4054{ 4684{
4055 switch (func) { 4685 switch (func) {
4686 case 0x1:
4687 if (avic)
4688 entry->ecx &= ~bit(X86_FEATURE_X2APIC);
4689 break;
4056 case 0x80000001: 4690 case 0x80000001:
4057 if (nested) 4691 if (nested)
4058 entry->ecx |= (1 << 2); /* Set SVM bit */ 4692 entry->ecx |= (1 << 2); /* Set SVM bit */
@@ -4307,6 +4941,15 @@ static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
4307{ 4941{
4308} 4942}
4309 4943
4944static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
4945{
4946 if (avic_handle_apic_id_update(vcpu) != 0)
4947 return;
4948 if (avic_handle_dfr_update(vcpu) != 0)
4949 return;
4950 avic_handle_ldr_update(vcpu);
4951}
4952
4310static struct kvm_x86_ops svm_x86_ops = { 4953static struct kvm_x86_ops svm_x86_ops = {
4311 .cpu_has_kvm_support = has_svm, 4954 .cpu_has_kvm_support = has_svm,
4312 .disabled_by_bios = is_disabled, 4955 .disabled_by_bios = is_disabled,
@@ -4322,9 +4965,14 @@ static struct kvm_x86_ops svm_x86_ops = {
4322 .vcpu_free = svm_free_vcpu, 4965 .vcpu_free = svm_free_vcpu,
4323 .vcpu_reset = svm_vcpu_reset, 4966 .vcpu_reset = svm_vcpu_reset,
4324 4967
4968 .vm_init = avic_vm_init,
4969 .vm_destroy = avic_vm_destroy,
4970
4325 .prepare_guest_switch = svm_prepare_guest_switch, 4971 .prepare_guest_switch = svm_prepare_guest_switch,
4326 .vcpu_load = svm_vcpu_load, 4972 .vcpu_load = svm_vcpu_load,
4327 .vcpu_put = svm_vcpu_put, 4973 .vcpu_put = svm_vcpu_put,
4974 .vcpu_blocking = svm_vcpu_blocking,
4975 .vcpu_unblocking = svm_vcpu_unblocking,
4328 4976
4329 .update_bp_intercept = update_bp_intercept, 4977 .update_bp_intercept = update_bp_intercept,
4330 .get_msr = svm_get_msr, 4978 .get_msr = svm_get_msr,
@@ -4382,6 +5030,9 @@ static struct kvm_x86_ops svm_x86_ops = {
4382 .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, 5030 .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
4383 .load_eoi_exitmap = svm_load_eoi_exitmap, 5031 .load_eoi_exitmap = svm_load_eoi_exitmap,
4384 .sync_pir_to_irr = svm_sync_pir_to_irr, 5032 .sync_pir_to_irr = svm_sync_pir_to_irr,
5033 .hwapic_irr_update = svm_hwapic_irr_update,
5034 .hwapic_isr_update = svm_hwapic_isr_update,
5035 .apicv_post_state_restore = avic_post_state_restore,
4385 5036
4386 .set_tss_addr = svm_set_tss_addr, 5037 .set_tss_addr = svm_set_tss_addr,
4387 .get_tdp_level = get_npt_level, 5038 .get_tdp_level = get_npt_level,
@@ -4415,6 +5066,7 @@ static struct kvm_x86_ops svm_x86_ops = {
4415 .sched_in = svm_sched_in, 5066 .sched_in = svm_sched_in,
4416 5067
4417 .pmu_ops = &amd_pmu_ops, 5068 .pmu_ops = &amd_pmu_ops,
5069 .deliver_posted_interrupt = svm_deliver_avic_intr,
4418}; 5070};
4419 5071
4420static int __init svm_init(void) 5072static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index b72743c5668d..8de925031b5c 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1291,6 +1291,63 @@ TRACE_EVENT(kvm_hv_stimer_cleanup,
1291 __entry->vcpu_id, __entry->timer_index) 1291 __entry->vcpu_id, __entry->timer_index)
1292); 1292);
1293 1293
1294/*
1295 * Tracepoint for AMD AVIC
1296 */
1297TRACE_EVENT(kvm_avic_incomplete_ipi,
1298 TP_PROTO(u32 vcpu, u32 icrh, u32 icrl, u32 id, u32 index),
1299 TP_ARGS(vcpu, icrh, icrl, id, index),
1300
1301 TP_STRUCT__entry(
1302 __field(u32, vcpu)
1303 __field(u32, icrh)
1304 __field(u32, icrl)
1305 __field(u32, id)
1306 __field(u32, index)
1307 ),
1308
1309 TP_fast_assign(
1310 __entry->vcpu = vcpu;
1311 __entry->icrh = icrh;
1312 __entry->icrl = icrl;
1313 __entry->id = id;
1314 __entry->index = index;
1315 ),
1316
1317 TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u\n",
1318 __entry->vcpu, __entry->icrh, __entry->icrl,
1319 __entry->id, __entry->index)
1320);
1321
1322TRACE_EVENT(kvm_avic_unaccelerated_access,
1323 TP_PROTO(u32 vcpu, u32 offset, bool ft, bool rw, u32 vec),
1324 TP_ARGS(vcpu, offset, ft, rw, vec),
1325
1326 TP_STRUCT__entry(
1327 __field(u32, vcpu)
1328 __field(u32, offset)
1329 __field(bool, ft)
1330 __field(bool, rw)
1331 __field(u32, vec)
1332 ),
1333
1334 TP_fast_assign(
1335 __entry->vcpu = vcpu;
1336 __entry->offset = offset;
1337 __entry->ft = ft;
1338 __entry->rw = rw;
1339 __entry->vec = vec;
1340 ),
1341
1342 TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x\n",
1343 __entry->vcpu,
1344 __entry->offset,
1345 __print_symbolic(__entry->offset, kvm_trace_symbol_apic),
1346 __entry->ft ? "trap" : "fault",
1347 __entry->rw ? "write" : "read",
1348 __entry->vec)
1349);
1350
1294#endif /* _TRACE_KVM_H */ 1351#endif /* _TRACE_KVM_H */
1295 1352
1296#undef TRACE_INCLUDE_PATH 1353#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index cb47fe3da292..e605d1ed334f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5050,8 +5050,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
5050 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); 5050 vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
5051 5051
5052 cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; 5052 cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
5053 vmx_set_cr0(vcpu, cr0); /* enter rmode */
5054 vmx->vcpu.arch.cr0 = cr0; 5053 vmx->vcpu.arch.cr0 = cr0;
5054 vmx_set_cr0(vcpu, cr0); /* enter rmode */
5055 vmx_set_cr4(vcpu, 0); 5055 vmx_set_cr4(vcpu, 0);
5056 vmx_set_efer(vcpu, 0); 5056 vmx_set_efer(vcpu, 0);
5057 vmx_fpu_activate(vcpu); 5057 vmx_fpu_activate(vcpu);
@@ -8318,19 +8318,19 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
8318 vmcs_write64(APIC_ACCESS_ADDR, hpa); 8318 vmcs_write64(APIC_ACCESS_ADDR, hpa);
8319} 8319}
8320 8320
8321static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) 8321static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
8322{ 8322{
8323 u16 status; 8323 u16 status;
8324 u8 old; 8324 u8 old;
8325 8325
8326 if (isr == -1) 8326 if (max_isr == -1)
8327 isr = 0; 8327 max_isr = 0;
8328 8328
8329 status = vmcs_read16(GUEST_INTR_STATUS); 8329 status = vmcs_read16(GUEST_INTR_STATUS);
8330 old = status >> 8; 8330 old = status >> 8;
8331 if (isr != old) { 8331 if (max_isr != old) {
8332 status &= 0xff; 8332 status &= 0xff;
8333 status |= isr << 8; 8333 status |= max_isr << 8;
8334 vmcs_write16(GUEST_INTR_STATUS, status); 8334 vmcs_write16(GUEST_INTR_STATUS, status);
8335 } 8335 }
8336} 8336}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 12f33e662382..c805cf494154 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -161,6 +161,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
161 { "halt_exits", VCPU_STAT(halt_exits) }, 161 { "halt_exits", VCPU_STAT(halt_exits) },
162 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, 162 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
163 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, 163 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
164 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
164 { "halt_wakeup", VCPU_STAT(halt_wakeup) }, 165 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
165 { "hypercalls", VCPU_STAT(hypercalls) }, 166 { "hypercalls", VCPU_STAT(hypercalls) },
166 { "request_irq", VCPU_STAT(request_irq_exits) }, 167 { "request_irq", VCPU_STAT(request_irq_exits) },
@@ -2002,22 +2003,8 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
2002 vcpu->arch.pv_time_enabled = false; 2003 vcpu->arch.pv_time_enabled = false;
2003} 2004}
2004 2005
2005static void accumulate_steal_time(struct kvm_vcpu *vcpu)
2006{
2007 u64 delta;
2008
2009 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2010 return;
2011
2012 delta = current->sched_info.run_delay - vcpu->arch.st.last_steal;
2013 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2014 vcpu->arch.st.accum_steal = delta;
2015}
2016
2017static void record_steal_time(struct kvm_vcpu *vcpu) 2006static void record_steal_time(struct kvm_vcpu *vcpu)
2018{ 2007{
2019 accumulate_steal_time(vcpu);
2020
2021 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) 2008 if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
2022 return; 2009 return;
2023 2010
@@ -2025,9 +2012,26 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
2025 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) 2012 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
2026 return; 2013 return;
2027 2014
2028 vcpu->arch.st.steal.steal += vcpu->arch.st.accum_steal; 2015 if (vcpu->arch.st.steal.version & 1)
2029 vcpu->arch.st.steal.version += 2; 2016 vcpu->arch.st.steal.version += 1; /* first time write, random junk */
2030 vcpu->arch.st.accum_steal = 0; 2017
2018 vcpu->arch.st.steal.version += 1;
2019
2020 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2021 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2022
2023 smp_wmb();
2024
2025 vcpu->arch.st.steal.steal += current->sched_info.run_delay -
2026 vcpu->arch.st.last_steal;
2027 vcpu->arch.st.last_steal = current->sched_info.run_delay;
2028
2029 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2030 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
2031
2032 smp_wmb();
2033
2034 vcpu->arch.st.steal.version += 1;
2031 2035
2032 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, 2036 kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime,
2033 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); 2037 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
@@ -7752,6 +7756,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
7752 kvm_page_track_init(kvm); 7756 kvm_page_track_init(kvm);
7753 kvm_mmu_init_vm(kvm); 7757 kvm_mmu_init_vm(kvm);
7754 7758
7759 if (kvm_x86_ops->vm_init)
7760 return kvm_x86_ops->vm_init(kvm);
7761
7755 return 0; 7762 return 0;
7756} 7763}
7757 7764
@@ -7873,6 +7880,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
7873 x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0); 7880 x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
7874 x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0); 7881 x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
7875 } 7882 }
7883 if (kvm_x86_ops->vm_destroy)
7884 kvm_x86_ops->vm_destroy(kvm);
7876 kvm_iommu_unmap_guest(kvm); 7885 kvm_iommu_unmap_guest(kvm);
7877 kfree(kvm->arch.vpic); 7886 kfree(kvm->arch.vpic);
7878 kfree(kvm->arch.vioapic); 7887 kfree(kvm->arch.vioapic);
@@ -8355,19 +8364,21 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
8355} 8364}
8356EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); 8365EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
8357 8366
8367bool kvm_arch_has_irq_bypass(void)
8368{
8369 return kvm_x86_ops->update_pi_irte != NULL;
8370}
8371
8358int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, 8372int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
8359 struct irq_bypass_producer *prod) 8373 struct irq_bypass_producer *prod)
8360{ 8374{
8361 struct kvm_kernel_irqfd *irqfd = 8375 struct kvm_kernel_irqfd *irqfd =
8362 container_of(cons, struct kvm_kernel_irqfd, consumer); 8376 container_of(cons, struct kvm_kernel_irqfd, consumer);
8363 8377
8364 if (kvm_x86_ops->update_pi_irte) { 8378 irqfd->producer = prod;
8365 irqfd->producer = prod;
8366 return kvm_x86_ops->update_pi_irte(irqfd->kvm,
8367 prod->irq, irqfd->gsi, 1);
8368 }
8369 8379
8370 return -EINVAL; 8380 return kvm_x86_ops->update_pi_irte(irqfd->kvm,
8381 prod->irq, irqfd->gsi, 1);
8371} 8382}
8372 8383
8373void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, 8384void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
@@ -8377,11 +8388,6 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
8377 struct kvm_kernel_irqfd *irqfd = 8388 struct kvm_kernel_irqfd *irqfd =
8378 container_of(cons, struct kvm_kernel_irqfd, consumer); 8389 container_of(cons, struct kvm_kernel_irqfd, consumer);
8379 8390
8380 if (!kvm_x86_ops->update_pi_irte) {
8381 WARN_ON(irqfd->producer != NULL);
8382 return;
8383 }
8384
8385 WARN_ON(irqfd->producer != prod); 8391 WARN_ON(irqfd->producer != prod);
8386 irqfd->producer = NULL; 8392 irqfd->producer = NULL;
8387 8393
@@ -8429,3 +8435,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
8429EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window); 8435EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
8430EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full); 8436EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
8431EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update); 8437EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
8438EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
8439EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 5152b3898155..4814446a0024 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -468,11 +468,11 @@ static struct cyclecounter cyclecounter = {
468 .mask = CLOCKSOURCE_MASK(56), 468 .mask = CLOCKSOURCE_MASK(56),
469}; 469};
470 470
471static struct timecounter timecounter; 471static struct arch_timer_kvm_info arch_timer_kvm_info;
472 472
473struct timecounter *arch_timer_get_timecounter(void) 473struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
474{ 474{
475 return &timecounter; 475 return &arch_timer_kvm_info;
476} 476}
477 477
478static void __init arch_counter_register(unsigned type) 478static void __init arch_counter_register(unsigned type)
@@ -500,7 +500,8 @@ static void __init arch_counter_register(unsigned type)
500 clocksource_register_hz(&clocksource_counter, arch_timer_rate); 500 clocksource_register_hz(&clocksource_counter, arch_timer_rate);
501 cyclecounter.mult = clocksource_counter.mult; 501 cyclecounter.mult = clocksource_counter.mult;
502 cyclecounter.shift = clocksource_counter.shift; 502 cyclecounter.shift = clocksource_counter.shift;
503 timecounter_init(&timecounter, &cyclecounter, start_count); 503 timecounter_init(&arch_timer_kvm_info.timecounter,
504 &cyclecounter, start_count);
504 505
505 /* 56 bits minimum, so we assume worst case rollover */ 506 /* 56 bits minimum, so we assume worst case rollover */
506 sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate); 507 sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate);
@@ -744,6 +745,8 @@ static void __init arch_timer_init(void)
744 745
745 arch_timer_register(); 746 arch_timer_register();
746 arch_timer_common_init(); 747 arch_timer_common_init();
748
749 arch_timer_kvm_info.virtual_irq = arch_timer_ppi[VIRT_PPI];
747} 750}
748 751
749static void __init arch_timer_of_init(struct device_node *np) 752static void __init arch_timer_of_init(struct device_node *np)
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index 97c0028e8388..89e7423f0ebb 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -21,6 +21,19 @@
21 21
22#include "irq-gic-common.h" 22#include "irq-gic-common.h"
23 23
24static const struct gic_kvm_info *gic_kvm_info;
25
26const struct gic_kvm_info *gic_get_kvm_info(void)
27{
28 return gic_kvm_info;
29}
30
31void gic_set_kvm_info(const struct gic_kvm_info *info)
32{
33 BUG_ON(gic_kvm_info != NULL);
34 gic_kvm_info = info;
35}
36
24void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, 37void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
25 void *data) 38 void *data)
26{ 39{
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index fff697db8e22..205e5fddf6da 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -19,6 +19,7 @@
19 19
20#include <linux/of.h> 20#include <linux/of.h>
21#include <linux/irqdomain.h> 21#include <linux/irqdomain.h>
22#include <linux/irqchip/arm-gic-common.h>
22 23
23struct gic_quirk { 24struct gic_quirk {
24 const char *desc; 25 const char *desc;
@@ -35,4 +36,6 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void));
35void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, 36void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
36 void *data); 37 void *data);
37 38
39void gic_set_kvm_info(const struct gic_kvm_info *info);
40
38#endif /* _IRQ_GIC_COMMON_H */ 41#endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 1a1ea4f733c1..fb042ba9a3db 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -15,6 +15,8 @@
15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18#define pr_fmt(fmt) "GICv3: " fmt
19
18#include <linux/acpi.h> 20#include <linux/acpi.h>
19#include <linux/cpu.h> 21#include <linux/cpu.h>
20#include <linux/cpu_pm.h> 22#include <linux/cpu_pm.h>
@@ -28,6 +30,7 @@
28#include <linux/slab.h> 30#include <linux/slab.h>
29 31
30#include <linux/irqchip.h> 32#include <linux/irqchip.h>
33#include <linux/irqchip/arm-gic-common.h>
31#include <linux/irqchip/arm-gic-v3.h> 34#include <linux/irqchip/arm-gic-v3.h>
32#include <linux/irqchip/irq-partition-percpu.h> 35#include <linux/irqchip/irq-partition-percpu.h>
33 36
@@ -59,6 +62,8 @@ struct gic_chip_data {
59static struct gic_chip_data gic_data __read_mostly; 62static struct gic_chip_data gic_data __read_mostly;
60static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE; 63static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
61 64
65static struct gic_kvm_info gic_v3_kvm_info;
66
62#define gic_data_rdist() (this_cpu_ptr(gic_data.rdists.rdist)) 67#define gic_data_rdist() (this_cpu_ptr(gic_data.rdists.rdist))
63#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) 68#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
64#define gic_data_rdist_sgi_base() (gic_data_rdist_rd_base() + SZ_64K) 69#define gic_data_rdist_sgi_base() (gic_data_rdist_rd_base() + SZ_64K)
@@ -1002,7 +1007,7 @@ static int get_cpu_number(struct device_node *dn)
1002} 1007}
1003 1008
1004/* Create all possible partitions at boot time */ 1009/* Create all possible partitions at boot time */
1005static void gic_populate_ppi_partitions(struct device_node *gic_node) 1010static void __init gic_populate_ppi_partitions(struct device_node *gic_node)
1006{ 1011{
1007 struct device_node *parts_node, *child_part; 1012 struct device_node *parts_node, *child_part;
1008 int part_idx = 0, i; 1013 int part_idx = 0, i;
@@ -1089,6 +1094,30 @@ static void gic_populate_ppi_partitions(struct device_node *gic_node)
1089 } 1094 }
1090} 1095}
1091 1096
1097static void __init gic_of_setup_kvm_info(struct device_node *node)
1098{
1099 int ret;
1100 struct resource r;
1101 u32 gicv_idx;
1102
1103 gic_v3_kvm_info.type = GIC_V3;
1104
1105 gic_v3_kvm_info.maint_irq = irq_of_parse_and_map(node, 0);
1106 if (!gic_v3_kvm_info.maint_irq)
1107 return;
1108
1109 if (of_property_read_u32(node, "#redistributor-regions",
1110 &gicv_idx))
1111 gicv_idx = 1;
1112
1113 gicv_idx += 3; /* Also skip GICD, GICC, GICH */
1114 ret = of_address_to_resource(node, gicv_idx, &r);
1115 if (!ret)
1116 gic_v3_kvm_info.vcpu = r;
1117
1118 gic_set_kvm_info(&gic_v3_kvm_info);
1119}
1120
1092static int __init gic_of_init(struct device_node *node, struct device_node *parent) 1121static int __init gic_of_init(struct device_node *node, struct device_node *parent)
1093{ 1122{
1094 void __iomem *dist_base; 1123 void __iomem *dist_base;
@@ -1144,6 +1173,7 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
1144 goto out_unmap_rdist; 1173 goto out_unmap_rdist;
1145 1174
1146 gic_populate_ppi_partitions(node); 1175 gic_populate_ppi_partitions(node);
1176 gic_of_setup_kvm_info(node);
1147 return 0; 1177 return 0;
1148 1178
1149out_unmap_rdist: 1179out_unmap_rdist:
@@ -1159,19 +1189,25 @@ out_unmap_dist:
1159IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init); 1189IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init);
1160 1190
1161#ifdef CONFIG_ACPI 1191#ifdef CONFIG_ACPI
1162static void __iomem *dist_base; 1192static struct
1163static struct redist_region *redist_regs __initdata; 1193{
1164static u32 nr_redist_regions __initdata; 1194 void __iomem *dist_base;
1165static bool single_redist; 1195 struct redist_region *redist_regs;
1196 u32 nr_redist_regions;
1197 bool single_redist;
1198 u32 maint_irq;
1199 int maint_irq_mode;
1200 phys_addr_t vcpu_base;
1201} acpi_data __initdata;
1166 1202
1167static void __init 1203static void __init
1168gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base) 1204gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base)
1169{ 1205{
1170 static int count = 0; 1206 static int count = 0;
1171 1207
1172 redist_regs[count].phys_base = phys_base; 1208 acpi_data.redist_regs[count].phys_base = phys_base;
1173 redist_regs[count].redist_base = redist_base; 1209 acpi_data.redist_regs[count].redist_base = redist_base;
1174 redist_regs[count].single_redist = single_redist; 1210 acpi_data.redist_regs[count].single_redist = acpi_data.single_redist;
1175 count++; 1211 count++;
1176} 1212}
1177 1213
@@ -1199,7 +1235,7 @@ gic_acpi_parse_madt_gicc(struct acpi_subtable_header *header,
1199{ 1235{
1200 struct acpi_madt_generic_interrupt *gicc = 1236 struct acpi_madt_generic_interrupt *gicc =
1201 (struct acpi_madt_generic_interrupt *)header; 1237 (struct acpi_madt_generic_interrupt *)header;
1202 u32 reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK; 1238 u32 reg = readl_relaxed(acpi_data.dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK;
1203 u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2; 1239 u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2;
1204 void __iomem *redist_base; 1240 void __iomem *redist_base;
1205 1241
@@ -1216,7 +1252,7 @@ static int __init gic_acpi_collect_gicr_base(void)
1216 acpi_tbl_entry_handler redist_parser; 1252 acpi_tbl_entry_handler redist_parser;
1217 enum acpi_madt_type type; 1253 enum acpi_madt_type type;
1218 1254
1219 if (single_redist) { 1255 if (acpi_data.single_redist) {
1220 type = ACPI_MADT_TYPE_GENERIC_INTERRUPT; 1256 type = ACPI_MADT_TYPE_GENERIC_INTERRUPT;
1221 redist_parser = gic_acpi_parse_madt_gicc; 1257 redist_parser = gic_acpi_parse_madt_gicc;
1222 } else { 1258 } else {
@@ -1267,14 +1303,14 @@ static int __init gic_acpi_count_gicr_regions(void)
1267 count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR, 1303 count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR,
1268 gic_acpi_match_gicr, 0); 1304 gic_acpi_match_gicr, 0);
1269 if (count > 0) { 1305 if (count > 0) {
1270 single_redist = false; 1306 acpi_data.single_redist = false;
1271 return count; 1307 return count;
1272 } 1308 }
1273 1309
1274 count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, 1310 count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
1275 gic_acpi_match_gicc, 0); 1311 gic_acpi_match_gicc, 0);
1276 if (count > 0) 1312 if (count > 0)
1277 single_redist = true; 1313 acpi_data.single_redist = true;
1278 1314
1279 return count; 1315 return count;
1280} 1316}
@@ -1294,36 +1330,117 @@ static bool __init acpi_validate_gic_table(struct acpi_subtable_header *header,
1294 if (count <= 0) 1330 if (count <= 0)
1295 return false; 1331 return false;
1296 1332
1297 nr_redist_regions = count; 1333 acpi_data.nr_redist_regions = count;
1298 return true; 1334 return true;
1299} 1335}
1300 1336
1337static int __init gic_acpi_parse_virt_madt_gicc(struct acpi_subtable_header *header,
1338 const unsigned long end)
1339{
1340 struct acpi_madt_generic_interrupt *gicc =
1341 (struct acpi_madt_generic_interrupt *)header;
1342 int maint_irq_mode;
1343 static int first_madt = true;
1344
1345 /* Skip unusable CPUs */
1346 if (!(gicc->flags & ACPI_MADT_ENABLED))
1347 return 0;
1348
1349 maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
1350 ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
1351
1352 if (first_madt) {
1353 first_madt = false;
1354
1355 acpi_data.maint_irq = gicc->vgic_interrupt;
1356 acpi_data.maint_irq_mode = maint_irq_mode;
1357 acpi_data.vcpu_base = gicc->gicv_base_address;
1358
1359 return 0;
1360 }
1361
1362 /*
1363 * The maintenance interrupt and GICV should be the same for every CPU
1364 */
1365 if ((acpi_data.maint_irq != gicc->vgic_interrupt) ||
1366 (acpi_data.maint_irq_mode != maint_irq_mode) ||
1367 (acpi_data.vcpu_base != gicc->gicv_base_address))
1368 return -EINVAL;
1369
1370 return 0;
1371}
1372
1373static bool __init gic_acpi_collect_virt_info(void)
1374{
1375 int count;
1376
1377 count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
1378 gic_acpi_parse_virt_madt_gicc, 0);
1379
1380 return (count > 0);
1381}
1382
1301#define ACPI_GICV3_DIST_MEM_SIZE (SZ_64K) 1383#define ACPI_GICV3_DIST_MEM_SIZE (SZ_64K)
1384#define ACPI_GICV2_VCTRL_MEM_SIZE (SZ_4K)
1385#define ACPI_GICV2_VCPU_MEM_SIZE (SZ_8K)
1386
1387static void __init gic_acpi_setup_kvm_info(void)
1388{
1389 int irq;
1390
1391 if (!gic_acpi_collect_virt_info()) {
1392 pr_warn("Unable to get hardware information used for virtualization\n");
1393 return;
1394 }
1395
1396 gic_v3_kvm_info.type = GIC_V3;
1397
1398 irq = acpi_register_gsi(NULL, acpi_data.maint_irq,
1399 acpi_data.maint_irq_mode,
1400 ACPI_ACTIVE_HIGH);
1401 if (irq <= 0)
1402 return;
1403
1404 gic_v3_kvm_info.maint_irq = irq;
1405
1406 if (acpi_data.vcpu_base) {
1407 struct resource *vcpu = &gic_v3_kvm_info.vcpu;
1408
1409 vcpu->flags = IORESOURCE_MEM;
1410 vcpu->start = acpi_data.vcpu_base;
1411 vcpu->end = vcpu->start + ACPI_GICV2_VCPU_MEM_SIZE - 1;
1412 }
1413
1414 gic_set_kvm_info(&gic_v3_kvm_info);
1415}
1302 1416
1303static int __init 1417static int __init
1304gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end) 1418gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end)
1305{ 1419{
1306 struct acpi_madt_generic_distributor *dist; 1420 struct acpi_madt_generic_distributor *dist;
1307 struct fwnode_handle *domain_handle; 1421 struct fwnode_handle *domain_handle;
1422 size_t size;
1308 int i, err; 1423 int i, err;
1309 1424
1310 /* Get distributor base address */ 1425 /* Get distributor base address */
1311 dist = (struct acpi_madt_generic_distributor *)header; 1426 dist = (struct acpi_madt_generic_distributor *)header;
1312 dist_base = ioremap(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE); 1427 acpi_data.dist_base = ioremap(dist->base_address,
1313 if (!dist_base) { 1428 ACPI_GICV3_DIST_MEM_SIZE);
1429 if (!acpi_data.dist_base) {
1314 pr_err("Unable to map GICD registers\n"); 1430 pr_err("Unable to map GICD registers\n");
1315 return -ENOMEM; 1431 return -ENOMEM;
1316 } 1432 }
1317 1433
1318 err = gic_validate_dist_version(dist_base); 1434 err = gic_validate_dist_version(acpi_data.dist_base);
1319 if (err) { 1435 if (err) {
1320 pr_err("No distributor detected at @%p, giving up", dist_base); 1436 pr_err("No distributor detected at @%p, giving up",
1437 acpi_data.dist_base);
1321 goto out_dist_unmap; 1438 goto out_dist_unmap;
1322 } 1439 }
1323 1440
1324 redist_regs = kzalloc(sizeof(*redist_regs) * nr_redist_regions, 1441 size = sizeof(*acpi_data.redist_regs) * acpi_data.nr_redist_regions;
1325 GFP_KERNEL); 1442 acpi_data.redist_regs = kzalloc(size, GFP_KERNEL);
1326 if (!redist_regs) { 1443 if (!acpi_data.redist_regs) {
1327 err = -ENOMEM; 1444 err = -ENOMEM;
1328 goto out_dist_unmap; 1445 goto out_dist_unmap;
1329 } 1446 }
@@ -1332,29 +1449,31 @@ gic_acpi_init(struct acpi_subtable_header *header, const unsigned long end)
1332 if (err) 1449 if (err)
1333 goto out_redist_unmap; 1450 goto out_redist_unmap;
1334 1451
1335 domain_handle = irq_domain_alloc_fwnode(dist_base); 1452 domain_handle = irq_domain_alloc_fwnode(acpi_data.dist_base);
1336 if (!domain_handle) { 1453 if (!domain_handle) {
1337 err = -ENOMEM; 1454 err = -ENOMEM;
1338 goto out_redist_unmap; 1455 goto out_redist_unmap;
1339 } 1456 }
1340 1457
1341 err = gic_init_bases(dist_base, redist_regs, nr_redist_regions, 0, 1458 err = gic_init_bases(acpi_data.dist_base, acpi_data.redist_regs,
1342 domain_handle); 1459 acpi_data.nr_redist_regions, 0, domain_handle);
1343 if (err) 1460 if (err)
1344 goto out_fwhandle_free; 1461 goto out_fwhandle_free;
1345 1462
1346 acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle); 1463 acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle);
1464 gic_acpi_setup_kvm_info();
1465
1347 return 0; 1466 return 0;
1348 1467
1349out_fwhandle_free: 1468out_fwhandle_free:
1350 irq_domain_free_fwnode(domain_handle); 1469 irq_domain_free_fwnode(domain_handle);
1351out_redist_unmap: 1470out_redist_unmap:
1352 for (i = 0; i < nr_redist_regions; i++) 1471 for (i = 0; i < acpi_data.nr_redist_regions; i++)
1353 if (redist_regs[i].redist_base) 1472 if (acpi_data.redist_regs[i].redist_base)
1354 iounmap(redist_regs[i].redist_base); 1473 iounmap(acpi_data.redist_regs[i].redist_base);
1355 kfree(redist_regs); 1474 kfree(acpi_data.redist_regs);
1356out_dist_unmap: 1475out_dist_unmap:
1357 iounmap(dist_base); 1476 iounmap(acpi_data.dist_base);
1358 return err; 1477 return err;
1359} 1478}
1360IRQCHIP_ACPI_DECLARE(gic_v3, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 1479IRQCHIP_ACPI_DECLARE(gic_v3, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 1de20e14a721..b4e647179346 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -105,6 +105,8 @@ static struct static_key supports_deactivate = STATIC_KEY_INIT_TRUE;
105 105
106static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly; 106static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly;
107 107
108static struct gic_kvm_info gic_v2_kvm_info;
109
108#ifdef CONFIG_GIC_NON_BANKED 110#ifdef CONFIG_GIC_NON_BANKED
109static void __iomem *gic_get_percpu_base(union gic_base *base) 111static void __iomem *gic_get_percpu_base(union gic_base *base)
110{ 112{
@@ -1248,7 +1250,7 @@ static bool gic_check_eoimode(struct device_node *node, void __iomem **base)
1248 return true; 1250 return true;
1249} 1251}
1250 1252
1251static int gic_of_setup(struct gic_chip_data *gic, struct device_node *node) 1253static int __init gic_of_setup(struct gic_chip_data *gic, struct device_node *node)
1252{ 1254{
1253 if (!gic || !node) 1255 if (!gic || !node)
1254 return -EINVAL; 1256 return -EINVAL;
@@ -1272,6 +1274,29 @@ error:
1272 return -ENOMEM; 1274 return -ENOMEM;
1273} 1275}
1274 1276
1277static void __init gic_of_setup_kvm_info(struct device_node *node)
1278{
1279 int ret;
1280 struct resource *vctrl_res = &gic_v2_kvm_info.vctrl;
1281 struct resource *vcpu_res = &gic_v2_kvm_info.vcpu;
1282
1283 gic_v2_kvm_info.type = GIC_V2;
1284
1285 gic_v2_kvm_info.maint_irq = irq_of_parse_and_map(node, 0);
1286 if (!gic_v2_kvm_info.maint_irq)
1287 return;
1288
1289 ret = of_address_to_resource(node, 2, vctrl_res);
1290 if (ret)
1291 return;
1292
1293 ret = of_address_to_resource(node, 3, vcpu_res);
1294 if (ret)
1295 return;
1296
1297 gic_set_kvm_info(&gic_v2_kvm_info);
1298}
1299
1275int __init 1300int __init
1276gic_of_init(struct device_node *node, struct device_node *parent) 1301gic_of_init(struct device_node *node, struct device_node *parent)
1277{ 1302{
@@ -1303,8 +1328,10 @@ gic_of_init(struct device_node *node, struct device_node *parent)
1303 return ret; 1328 return ret;
1304 } 1329 }
1305 1330
1306 if (!gic_cnt) 1331 if (!gic_cnt) {
1307 gic_init_physaddr(node); 1332 gic_init_physaddr(node);
1333 gic_of_setup_kvm_info(node);
1334 }
1308 1335
1309 if (parent) { 1336 if (parent) {
1310 irq = irq_of_parse_and_map(node, 0); 1337 irq = irq_of_parse_and_map(node, 0);
@@ -1330,7 +1357,14 @@ IRQCHIP_DECLARE(pl390, "arm,pl390", gic_of_init);
1330#endif 1357#endif
1331 1358
1332#ifdef CONFIG_ACPI 1359#ifdef CONFIG_ACPI
1333static phys_addr_t cpu_phy_base __initdata; 1360static struct
1361{
1362 phys_addr_t cpu_phys_base;
1363 u32 maint_irq;
1364 int maint_irq_mode;
1365 phys_addr_t vctrl_base;
1366 phys_addr_t vcpu_base;
1367} acpi_data __initdata;
1334 1368
1335static int __init 1369static int __init
1336gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header, 1370gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header,
@@ -1350,10 +1384,16 @@ gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header,
1350 * All CPU interface addresses have to be the same. 1384 * All CPU interface addresses have to be the same.
1351 */ 1385 */
1352 gic_cpu_base = processor->base_address; 1386 gic_cpu_base = processor->base_address;
1353 if (cpu_base_assigned && gic_cpu_base != cpu_phy_base) 1387 if (cpu_base_assigned && gic_cpu_base != acpi_data.cpu_phys_base)
1354 return -EINVAL; 1388 return -EINVAL;
1355 1389
1356 cpu_phy_base = gic_cpu_base; 1390 acpi_data.cpu_phys_base = gic_cpu_base;
1391 acpi_data.maint_irq = processor->vgic_interrupt;
1392 acpi_data.maint_irq_mode = (processor->flags & ACPI_MADT_VGIC_IRQ_MODE) ?
1393 ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE;
1394 acpi_data.vctrl_base = processor->gich_base_address;
1395 acpi_data.vcpu_base = processor->gicv_base_address;
1396
1357 cpu_base_assigned = 1; 1397 cpu_base_assigned = 1;
1358 return 0; 1398 return 0;
1359} 1399}
@@ -1384,6 +1424,41 @@ static bool __init gic_validate_dist(struct acpi_subtable_header *header,
1384 1424
1385#define ACPI_GICV2_DIST_MEM_SIZE (SZ_4K) 1425#define ACPI_GICV2_DIST_MEM_SIZE (SZ_4K)
1386#define ACPI_GIC_CPU_IF_MEM_SIZE (SZ_8K) 1426#define ACPI_GIC_CPU_IF_MEM_SIZE (SZ_8K)
1427#define ACPI_GICV2_VCTRL_MEM_SIZE (SZ_4K)
1428#define ACPI_GICV2_VCPU_MEM_SIZE (SZ_8K)
1429
1430static void __init gic_acpi_setup_kvm_info(void)
1431{
1432 int irq;
1433 struct resource *vctrl_res = &gic_v2_kvm_info.vctrl;
1434 struct resource *vcpu_res = &gic_v2_kvm_info.vcpu;
1435
1436 gic_v2_kvm_info.type = GIC_V2;
1437
1438 if (!acpi_data.vctrl_base)
1439 return;
1440
1441 vctrl_res->flags = IORESOURCE_MEM;
1442 vctrl_res->start = acpi_data.vctrl_base;
1443 vctrl_res->end = vctrl_res->start + ACPI_GICV2_VCTRL_MEM_SIZE - 1;
1444
1445 if (!acpi_data.vcpu_base)
1446 return;
1447
1448 vcpu_res->flags = IORESOURCE_MEM;
1449 vcpu_res->start = acpi_data.vcpu_base;
1450 vcpu_res->end = vcpu_res->start + ACPI_GICV2_VCPU_MEM_SIZE - 1;
1451
1452 irq = acpi_register_gsi(NULL, acpi_data.maint_irq,
1453 acpi_data.maint_irq_mode,
1454 ACPI_ACTIVE_HIGH);
1455 if (irq <= 0)
1456 return;
1457
1458 gic_v2_kvm_info.maint_irq = irq;
1459
1460 gic_set_kvm_info(&gic_v2_kvm_info);
1461}
1387 1462
1388static int __init gic_v2_acpi_init(struct acpi_subtable_header *header, 1463static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
1389 const unsigned long end) 1464 const unsigned long end)
@@ -1401,7 +1476,7 @@ static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
1401 return -EINVAL; 1476 return -EINVAL;
1402 } 1477 }
1403 1478
1404 gic->raw_cpu_base = ioremap(cpu_phy_base, ACPI_GIC_CPU_IF_MEM_SIZE); 1479 gic->raw_cpu_base = ioremap(acpi_data.cpu_phys_base, ACPI_GIC_CPU_IF_MEM_SIZE);
1405 if (!gic->raw_cpu_base) { 1480 if (!gic->raw_cpu_base) {
1406 pr_err("Unable to map GICC registers\n"); 1481 pr_err("Unable to map GICC registers\n");
1407 return -ENOMEM; 1482 return -ENOMEM;
@@ -1447,6 +1522,8 @@ static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
1447 if (IS_ENABLED(CONFIG_ARM_GIC_V2M)) 1522 if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
1448 gicv2m_init(NULL, gic_data[0].domain); 1523 gicv2m_init(NULL, gic_data[0].domain);
1449 1524
1525 gic_acpi_setup_kvm_info();
1526
1450 return 0; 1527 return 0;
1451} 1528}
1452IRQCHIP_ACPI_DECLARE(gic_v2, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 1529IRQCHIP_ACPI_DECLARE(gic_v2, ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR,
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index 6804354c42bd..0ac520dd1b21 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -49,7 +49,9 @@ struct read_info_sccb {
49 u8 _pad_117[119 - 117]; /* 117-118 */ 49 u8 _pad_117[119 - 117]; /* 117-118 */
50 u8 fac119; /* 119 */ 50 u8 fac119; /* 119 */
51 u16 hcpua; /* 120-121 */ 51 u16 hcpua; /* 120-121 */
52 u8 _pad_122[4096 - 122]; /* 122-4095 */ 52 u8 _pad_122[124 - 122]; /* 122-123 */
53 u32 hmfai; /* 124-127 */
54 u8 _pad_128[4096 - 128]; /* 128-4095 */
53} __packed __aligned(PAGE_SIZE); 55} __packed __aligned(PAGE_SIZE);
54 56
55static char sccb_early[PAGE_SIZE] __aligned(PAGE_SIZE) __initdata; 57static char sccb_early[PAGE_SIZE] __aligned(PAGE_SIZE) __initdata;
@@ -155,6 +157,8 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
155 sclp.mtid = (sccb->fac42 & 0x80) ? (sccb->fac42 & 31) : 0; 157 sclp.mtid = (sccb->fac42 & 0x80) ? (sccb->fac42 & 31) : 0;
156 sclp.mtid_cp = (sccb->fac42 & 0x80) ? (sccb->fac43 & 31) : 0; 158 sclp.mtid_cp = (sccb->fac42 & 0x80) ? (sccb->fac43 & 31) : 0;
157 sclp.mtid_prev = (sccb->fac42 & 0x80) ? (sccb->fac66 & 31) : 0; 159 sclp.mtid_prev = (sccb->fac42 & 0x80) ? (sccb->fac66 & 31) : 0;
160
161 sclp.hmfai = sccb->hmfai;
158} 162}
159 163
160/* 164/*
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 25d0914481a2..caedb74c9210 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -49,11 +49,16 @@ enum arch_timer_reg {
49 49
50#define ARCH_TIMER_EVT_STREAM_FREQ 10000 /* 100us */ 50#define ARCH_TIMER_EVT_STREAM_FREQ 10000 /* 100us */
51 51
52struct arch_timer_kvm_info {
53 struct timecounter timecounter;
54 int virtual_irq;
55};
56
52#ifdef CONFIG_ARM_ARCH_TIMER 57#ifdef CONFIG_ARM_ARCH_TIMER
53 58
54extern u32 arch_timer_get_rate(void); 59extern u32 arch_timer_get_rate(void);
55extern u64 (*arch_timer_read_counter)(void); 60extern u64 (*arch_timer_read_counter)(void);
56extern struct timecounter *arch_timer_get_timecounter(void); 61extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void);
57 62
58#else 63#else
59 64
@@ -67,11 +72,6 @@ static inline u64 arch_timer_read_counter(void)
67 return 0; 72 return 0;
68} 73}
69 74
70static inline struct timecounter *arch_timer_get_timecounter(void)
71{
72 return NULL;
73}
74
75#endif 75#endif
76 76
77#endif 77#endif
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 281caf847fad..be6037aa703d 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -25,6 +25,7 @@
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <linux/types.h> 26#include <linux/types.h>
27#include <kvm/iodev.h> 27#include <kvm/iodev.h>
28#include <linux/irqchip/arm-gic-common.h>
28 29
29#define VGIC_NR_IRQS_LEGACY 256 30#define VGIC_NR_IRQS_LEGACY 256
30#define VGIC_NR_SGIS 16 31#define VGIC_NR_SGIS 16
@@ -353,15 +354,15 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
353#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) 354#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus))
354#define vgic_ready(k) ((k)->arch.vgic.ready) 355#define vgic_ready(k) ((k)->arch.vgic.ready)
355 356
356int vgic_v2_probe(struct device_node *vgic_node, 357int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info,
357 const struct vgic_ops **ops, 358 const struct vgic_ops **ops,
358 const struct vgic_params **params); 359 const struct vgic_params **params);
359#ifdef CONFIG_KVM_ARM_VGIC_V3 360#ifdef CONFIG_KVM_ARM_VGIC_V3
360int vgic_v3_probe(struct device_node *vgic_node, 361int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
361 const struct vgic_ops **ops, 362 const struct vgic_ops **ops,
362 const struct vgic_params **params); 363 const struct vgic_params **params);
363#else 364#else
364static inline int vgic_v3_probe(struct device_node *vgic_node, 365static inline int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
365 const struct vgic_ops **ops, 366 const struct vgic_ops **ops,
366 const struct vgic_params **params) 367 const struct vgic_params **params)
367{ 368{
diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h
index 1551b5b2f4c2..f0f5d2671509 100644
--- a/include/linux/irqbypass.h
+++ b/include/linux/irqbypass.h
@@ -34,7 +34,7 @@ struct irq_bypass_consumer;
34/** 34/**
35 * struct irq_bypass_producer - IRQ bypass producer definition 35 * struct irq_bypass_producer - IRQ bypass producer definition
36 * @node: IRQ bypass manager private list management 36 * @node: IRQ bypass manager private list management
37 * @token: opaque token to match between producer and consumer 37 * @token: opaque token to match between producer and consumer (non-NULL)
38 * @irq: Linux IRQ number for the producer device 38 * @irq: Linux IRQ number for the producer device
39 * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional) 39 * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional)
40 * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional) 40 * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional)
@@ -60,7 +60,7 @@ struct irq_bypass_producer {
60/** 60/**
61 * struct irq_bypass_consumer - IRQ bypass consumer definition 61 * struct irq_bypass_consumer - IRQ bypass consumer definition
62 * @node: IRQ bypass manager private list management 62 * @node: IRQ bypass manager private list management
63 * @token: opaque token to match between producer and consumer 63 * @token: opaque token to match between producer and consumer (non-NULL)
64 * @add_producer: Connect the IRQ consumer to an IRQ producer 64 * @add_producer: Connect the IRQ consumer to an IRQ producer
65 * @del_producer: Disconnect the IRQ consumer from an IRQ producer 65 * @del_producer: Disconnect the IRQ consumer from an IRQ producer
66 * @stop: Perform any quiesce operations necessary prior to add/del (optional) 66 * @stop: Perform any quiesce operations necessary prior to add/del (optional)
diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h
new file mode 100644
index 000000000000..c647b0547bcd
--- /dev/null
+++ b/include/linux/irqchip/arm-gic-common.h
@@ -0,0 +1,34 @@
1/*
2 * include/linux/irqchip/arm-gic-common.h
3 *
4 * Copyright (C) 2016 ARM Limited, All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __LINUX_IRQCHIP_ARM_GIC_COMMON_H
11#define __LINUX_IRQCHIP_ARM_GIC_COMMON_H
12
13#include <linux/types.h>
14#include <linux/ioport.h>
15
16enum gic_type {
17 GIC_V2,
18 GIC_V3,
19};
20
21struct gic_kvm_info {
22 /* GIC type */
23 enum gic_type type;
24 /* Virtual CPU interface */
25 struct resource vcpu;
26 /* Interrupt number */
27 unsigned int maint_irq;
28 /* Virtual control interface */
29 struct resource vctrl;
30};
31
32const struct gic_kvm_info *gic_get_kvm_info(void);
33
34#endif /* __LINUX_IRQCHIP_ARM_GIC_COMMON_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5276fe0916fc..b1fa8f11c95b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -35,6 +35,10 @@
35 35
36#include <asm/kvm_host.h> 36#include <asm/kvm_host.h>
37 37
38#ifndef KVM_MAX_VCPU_ID
39#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
40#endif
41
38/* 42/*
39 * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used 43 * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
40 * in kvm, other bits are visible for userspace which are defined in 44 * in kvm, other bits are visible for userspace which are defined in
@@ -225,6 +229,7 @@ struct kvm_vcpu {
225 sigset_t sigset; 229 sigset_t sigset;
226 struct kvm_vcpu_stat stat; 230 struct kvm_vcpu_stat stat;
227 unsigned int halt_poll_ns; 231 unsigned int halt_poll_ns;
232 bool valid_wakeup;
228 233
229#ifdef CONFIG_HAS_IOMEM 234#ifdef CONFIG_HAS_IOMEM
230 int mmio_needed; 235 int mmio_needed;
@@ -447,12 +452,13 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
447 452
448static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) 453static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
449{ 454{
450 struct kvm_vcpu *vcpu; 455 struct kvm_vcpu *vcpu = NULL;
451 int i; 456 int i;
452 457
453 if (id < 0 || id >= KVM_MAX_VCPUS) 458 if (id < 0)
454 return NULL; 459 return NULL;
455 vcpu = kvm_get_vcpu(kvm, id); 460 if (id < KVM_MAX_VCPUS)
461 vcpu = kvm_get_vcpu(kvm, id);
456 if (vcpu && vcpu->vcpu_id == id) 462 if (vcpu && vcpu->vcpu_id == id)
457 return vcpu; 463 return vcpu;
458 kvm_for_each_vcpu(i, vcpu, kvm) 464 kvm_for_each_vcpu(i, vcpu, kvm)
@@ -651,6 +657,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
651void kvm_vcpu_block(struct kvm_vcpu *vcpu); 657void kvm_vcpu_block(struct kvm_vcpu *vcpu);
652void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); 658void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
653void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); 659void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
660void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
654void kvm_vcpu_kick(struct kvm_vcpu *vcpu); 661void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
655int kvm_vcpu_yield_to(struct kvm_vcpu *target); 662int kvm_vcpu_yield_to(struct kvm_vcpu *target);
656void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); 663void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
@@ -1091,6 +1098,11 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
1091 1098
1092static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) 1099static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
1093{ 1100{
1101 /*
1102 * Ensure the rest of the request is published to kvm_check_request's
1103 * caller. Paired with the smp_mb__after_atomic in kvm_check_request.
1104 */
1105 smp_wmb();
1094 set_bit(req, &vcpu->requests); 1106 set_bit(req, &vcpu->requests);
1095} 1107}
1096 1108
@@ -1098,6 +1110,12 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
1098{ 1110{
1099 if (test_bit(req, &vcpu->requests)) { 1111 if (test_bit(req, &vcpu->requests)) {
1100 clear_bit(req, &vcpu->requests); 1112 clear_bit(req, &vcpu->requests);
1113
1114 /*
1115 * Ensure the rest of the request is visible to kvm_check_request's
1116 * caller. Paired with the smp_wmb in kvm_make_request.
1117 */
1118 smp_mb__after_atomic();
1101 return true; 1119 return true;
1102 } else { 1120 } else {
1103 return false; 1121 return false;
@@ -1169,6 +1187,7 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val)
1169#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ 1187#endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */
1170 1188
1171#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS 1189#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
1190bool kvm_arch_has_irq_bypass(void);
1172int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, 1191int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *,
1173 struct irq_bypass_producer *); 1192 struct irq_bypass_producer *);
1174void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, 1193void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *,
@@ -1179,4 +1198,18 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
1179 uint32_t guest_irq, bool set); 1198 uint32_t guest_irq, bool set);
1180#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ 1199#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
1181 1200
1201#ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS
1202/* If we wakeup during the poll time, was it a sucessful poll? */
1203static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu)
1204{
1205 return vcpu->valid_wakeup;
1206}
1207
1208#else
1209static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu)
1210{
1211 return true;
1212}
1213#endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */
1214
1182#endif 1215#endif
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index aa69253ecc7d..526fb3d2e43a 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -38,22 +38,25 @@ TRACE_EVENT(kvm_userspace_exit,
38); 38);
39 39
40TRACE_EVENT(kvm_vcpu_wakeup, 40TRACE_EVENT(kvm_vcpu_wakeup,
41 TP_PROTO(__u64 ns, bool waited), 41 TP_PROTO(__u64 ns, bool waited, bool valid),
42 TP_ARGS(ns, waited), 42 TP_ARGS(ns, waited, valid),
43 43
44 TP_STRUCT__entry( 44 TP_STRUCT__entry(
45 __field( __u64, ns ) 45 __field( __u64, ns )
46 __field( bool, waited ) 46 __field( bool, waited )
47 __field( bool, valid )
47 ), 48 ),
48 49
49 TP_fast_assign( 50 TP_fast_assign(
50 __entry->ns = ns; 51 __entry->ns = ns;
51 __entry->waited = waited; 52 __entry->waited = waited;
53 __entry->valid = valid;
52 ), 54 ),
53 55
54 TP_printk("%s time %lld ns", 56 TP_printk("%s time %lld ns, polling %s",
55 __entry->waited ? "wait" : "poll", 57 __entry->waited ? "wait" : "poll",
56 __entry->ns) 58 __entry->ns,
59 __entry->valid ? "valid" : "invalid")
57); 60);
58 61
59#if defined(CONFIG_HAVE_KVM_IRQFD) 62#if defined(CONFIG_HAVE_KVM_IRQFD)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a7f1f8032ec1..05ebf475104c 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -865,6 +865,7 @@ struct kvm_ppc_smmu_info {
865#define KVM_CAP_SPAPR_TCE_64 125 865#define KVM_CAP_SPAPR_TCE_64 125
866#define KVM_CAP_ARM_PMU_V3 126 866#define KVM_CAP_ARM_PMU_V3 126
867#define KVM_CAP_VCPU_ATTRIBUTES 127 867#define KVM_CAP_VCPU_ATTRIBUTES 127
868#define KVM_CAP_MAX_VCPU_ID 128
868 869
869#ifdef KVM_CAP_IRQ_ROUTING 870#ifdef KVM_CAP_IRQ_ROUTING
870 871
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 7a79b6853583..e5d6108f5e85 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -41,6 +41,9 @@ config KVM_VFIO
41config HAVE_KVM_ARCH_TLB_FLUSH_ALL 41config HAVE_KVM_ARCH_TLB_FLUSH_ALL
42 bool 42 bool
43 43
44config HAVE_KVM_INVALID_WAKEUPS
45 bool
46
44config KVM_GENERIC_DIRTYLOG_READ_PROTECT 47config KVM_GENERIC_DIRTYLOG_READ_PROTECT
45 bool 48 bool
46 49
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 9aaa35dd9144..409db3304471 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -17,7 +17,6 @@
17 */ 17 */
18 18
19#include <linux/cpu.h> 19#include <linux/cpu.h>
20#include <linux/of_irq.h>
21#include <linux/kvm.h> 20#include <linux/kvm.h>
22#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
23#include <linux/interrupt.h> 22#include <linux/interrupt.h>
@@ -438,45 +437,29 @@ static struct notifier_block kvm_timer_cpu_nb = {
438 .notifier_call = kvm_timer_cpu_notify, 437 .notifier_call = kvm_timer_cpu_notify,
439}; 438};
440 439
441static const struct of_device_id arch_timer_of_match[] = {
442 { .compatible = "arm,armv7-timer", },
443 { .compatible = "arm,armv8-timer", },
444 {},
445};
446
447int kvm_timer_hyp_init(void) 440int kvm_timer_hyp_init(void)
448{ 441{
449 struct device_node *np; 442 struct arch_timer_kvm_info *info;
450 unsigned int ppi;
451 int err; 443 int err;
452 444
453 timecounter = arch_timer_get_timecounter(); 445 info = arch_timer_get_kvm_info();
454 if (!timecounter) 446 timecounter = &info->timecounter;
455 return -ENODEV;
456 447
457 np = of_find_matching_node(NULL, arch_timer_of_match); 448 if (info->virtual_irq <= 0) {
458 if (!np) { 449 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
459 kvm_err("kvm_arch_timer: can't find DT node\n"); 450 info->virtual_irq);
460 return -ENODEV; 451 return -ENODEV;
461 } 452 }
453 host_vtimer_irq = info->virtual_irq;
462 454
463 ppi = irq_of_parse_and_map(np, 2); 455 err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
464 if (!ppi) {
465 kvm_err("kvm_arch_timer: no virtual timer interrupt\n");
466 err = -EINVAL;
467 goto out;
468 }
469
470 err = request_percpu_irq(ppi, kvm_arch_timer_handler,
471 "kvm guest timer", kvm_get_running_vcpus()); 456 "kvm guest timer", kvm_get_running_vcpus());
472 if (err) { 457 if (err) {
473 kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", 458 kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
474 ppi, err); 459 host_vtimer_irq, err);
475 goto out; 460 goto out;
476 } 461 }
477 462
478 host_vtimer_irq = ppi;
479
480 err = __register_cpu_notifier(&kvm_timer_cpu_nb); 463 err = __register_cpu_notifier(&kvm_timer_cpu_nb);
481 if (err) { 464 if (err) {
482 kvm_err("Cannot register timer CPU notifier\n"); 465 kvm_err("Cannot register timer CPU notifier\n");
@@ -489,14 +472,13 @@ int kvm_timer_hyp_init(void)
489 goto out_free; 472 goto out_free;
490 } 473 }
491 474
492 kvm_info("%s IRQ%d\n", np->name, ppi); 475 kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
493 on_each_cpu(kvm_timer_init_interrupt, NULL, 1); 476 on_each_cpu(kvm_timer_init_interrupt, NULL, 1);
494 477
495 goto out; 478 goto out;
496out_free: 479out_free:
497 free_percpu_irq(ppi, kvm_get_running_vcpus()); 480 free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
498out: 481out:
499 of_node_put(np);
500 return err; 482 return err;
501} 483}
502 484
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 67ec334ce1d0..7e826c9b2b0a 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -20,9 +20,6 @@
20#include <linux/kvm_host.h> 20#include <linux/kvm_host.h>
21#include <linux/interrupt.h> 21#include <linux/interrupt.h>
22#include <linux/io.h> 22#include <linux/io.h>
23#include <linux/of.h>
24#include <linux/of_address.h>
25#include <linux/of_irq.h>
26 23
27#include <linux/irqchip/arm-gic.h> 24#include <linux/irqchip/arm-gic.h>
28 25
@@ -186,38 +183,39 @@ static void vgic_cpu_init_lrs(void *params)
186} 183}
187 184
188/** 185/**
189 * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT 186 * vgic_v2_probe - probe for a GICv2 compatible interrupt controller
190 * @node: pointer to the DT node 187 * @gic_kvm_info: pointer to the GIC description
191 * @ops: address of a pointer to the GICv2 operations 188 * @ops: address of a pointer to the GICv2 operations
192 * @params: address of a pointer to HW-specific parameters 189 * @params: address of a pointer to HW-specific parameters
193 * 190 *
194 * Returns 0 if a GICv2 has been found, with the low level operations 191 * Returns 0 if a GICv2 has been found, with the low level operations
195 * in *ops and the HW parameters in *params. Returns an error code 192 * in *ops and the HW parameters in *params. Returns an error code
196 * otherwise. 193 * otherwise.
197 */ 194 */
198int vgic_v2_probe(struct device_node *vgic_node, 195int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info,
199 const struct vgic_ops **ops, 196 const struct vgic_ops **ops,
200 const struct vgic_params **params) 197 const struct vgic_params **params)
201{ 198{
202 int ret; 199 int ret;
203 struct resource vctrl_res;
204 struct resource vcpu_res;
205 struct vgic_params *vgic = &vgic_v2_params; 200 struct vgic_params *vgic = &vgic_v2_params;
201 const struct resource *vctrl_res = &gic_kvm_info->vctrl;
202 const struct resource *vcpu_res = &gic_kvm_info->vcpu;
206 203
207 vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); 204 if (!gic_kvm_info->maint_irq) {
208 if (!vgic->maint_irq) { 205 kvm_err("error getting vgic maintenance irq\n");
209 kvm_err("error getting vgic maintenance irq from DT\n");
210 ret = -ENXIO; 206 ret = -ENXIO;
211 goto out; 207 goto out;
212 } 208 }
209 vgic->maint_irq = gic_kvm_info->maint_irq;
213 210
214 ret = of_address_to_resource(vgic_node, 2, &vctrl_res); 211 if (!gic_kvm_info->vctrl.start) {
215 if (ret) { 212 kvm_err("GICH not present in the firmware table\n");
216 kvm_err("Cannot obtain GICH resource\n"); 213 ret = -ENXIO;
217 goto out; 214 goto out;
218 } 215 }
219 216
220 vgic->vctrl_base = of_iomap(vgic_node, 2); 217 vgic->vctrl_base = ioremap(gic_kvm_info->vctrl.start,
218 resource_size(&gic_kvm_info->vctrl));
221 if (!vgic->vctrl_base) { 219 if (!vgic->vctrl_base) {
222 kvm_err("Cannot ioremap GICH\n"); 220 kvm_err("Cannot ioremap GICH\n");
223 ret = -ENOMEM; 221 ret = -ENOMEM;
@@ -228,29 +226,23 @@ int vgic_v2_probe(struct device_node *vgic_node,
228 vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1; 226 vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
229 227
230 ret = create_hyp_io_mappings(vgic->vctrl_base, 228 ret = create_hyp_io_mappings(vgic->vctrl_base,
231 vgic->vctrl_base + resource_size(&vctrl_res), 229 vgic->vctrl_base + resource_size(vctrl_res),
232 vctrl_res.start); 230 vctrl_res->start);
233 if (ret) { 231 if (ret) {
234 kvm_err("Cannot map VCTRL into hyp\n"); 232 kvm_err("Cannot map VCTRL into hyp\n");
235 goto out_unmap; 233 goto out_unmap;
236 } 234 }
237 235
238 if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { 236 if (!PAGE_ALIGNED(vcpu_res->start)) {
239 kvm_err("Cannot obtain GICV resource\n");
240 ret = -ENXIO;
241 goto out_unmap;
242 }
243
244 if (!PAGE_ALIGNED(vcpu_res.start)) {
245 kvm_err("GICV physical address 0x%llx not page aligned\n", 237 kvm_err("GICV physical address 0x%llx not page aligned\n",
246 (unsigned long long)vcpu_res.start); 238 (unsigned long long)vcpu_res->start);
247 ret = -ENXIO; 239 ret = -ENXIO;
248 goto out_unmap; 240 goto out_unmap;
249 } 241 }
250 242
251 if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { 243 if (!PAGE_ALIGNED(resource_size(vcpu_res))) {
252 kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", 244 kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
253 (unsigned long long)resource_size(&vcpu_res), 245 (unsigned long long)resource_size(vcpu_res),
254 PAGE_SIZE); 246 PAGE_SIZE);
255 ret = -ENXIO; 247 ret = -ENXIO;
256 goto out_unmap; 248 goto out_unmap;
@@ -259,10 +251,10 @@ int vgic_v2_probe(struct device_node *vgic_node,
259 vgic->can_emulate_gicv2 = true; 251 vgic->can_emulate_gicv2 = true;
260 kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2); 252 kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2);
261 253
262 vgic->vcpu_base = vcpu_res.start; 254 vgic->vcpu_base = vcpu_res->start;
263 255
264 kvm_info("%s@%llx IRQ%d\n", vgic_node->name, 256 kvm_info("GICH base=0x%llx, GICV base=0x%llx, IRQ=%d\n",
265 vctrl_res.start, vgic->maint_irq); 257 gic_kvm_info->vctrl.start, vgic->vcpu_base, vgic->maint_irq);
266 258
267 vgic->type = VGIC_V2; 259 vgic->type = VGIC_V2;
268 vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS; 260 vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS;
@@ -276,6 +268,5 @@ int vgic_v2_probe(struct device_node *vgic_node,
276out_unmap: 268out_unmap:
277 iounmap(vgic->vctrl_base); 269 iounmap(vgic->vctrl_base);
278out: 270out:
279 of_node_put(vgic_node);
280 return ret; 271 return ret;
281} 272}
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 999bdc6d9d9f..c02a1b1cf855 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -20,11 +20,9 @@
20#include <linux/kvm_host.h> 20#include <linux/kvm_host.h>
21#include <linux/interrupt.h> 21#include <linux/interrupt.h>
22#include <linux/io.h> 22#include <linux/io.h>
23#include <linux/of.h>
24#include <linux/of_address.h>
25#include <linux/of_irq.h>
26 23
27#include <linux/irqchip/arm-gic-v3.h> 24#include <linux/irqchip/arm-gic-v3.h>
25#include <linux/irqchip/arm-gic-common.h>
28 26
29#include <asm/kvm_emulate.h> 27#include <asm/kvm_emulate.h>
30#include <asm/kvm_arm.h> 28#include <asm/kvm_arm.h>
@@ -222,30 +220,24 @@ static void vgic_cpu_init_lrs(void *params)
222} 220}
223 221
224/** 222/**
225 * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT 223 * vgic_v3_probe - probe for a GICv3 compatible interrupt controller
226 * @node: pointer to the DT node 224 * @gic_kvm_info: pointer to the GIC description
227 * @ops: address of a pointer to the GICv3 operations 225 * @ops: address of a pointer to the GICv3 operations
228 * @params: address of a pointer to HW-specific parameters 226 * @params: address of a pointer to HW-specific parameters
229 * 227 *
230 * Returns 0 if a GICv3 has been found, with the low level operations 228 * Returns 0 if a GICv3 has been found, with the low level operations
231 * in *ops and the HW parameters in *params. Returns an error code 229 * in *ops and the HW parameters in *params. Returns an error code
232 * otherwise. 230 * otherwise.
233 */ 231 */
234int vgic_v3_probe(struct device_node *vgic_node, 232int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
235 const struct vgic_ops **ops, 233 const struct vgic_ops **ops,
236 const struct vgic_params **params) 234 const struct vgic_params **params)
237{ 235{
238 int ret = 0; 236 int ret = 0;
239 u32 gicv_idx;
240 struct resource vcpu_res;
241 struct vgic_params *vgic = &vgic_v3_params; 237 struct vgic_params *vgic = &vgic_v3_params;
238 const struct resource *vcpu_res = &gic_kvm_info->vcpu;
242 239
243 vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0); 240 vgic->maint_irq = gic_kvm_info->maint_irq;
244 if (!vgic->maint_irq) {
245 kvm_err("error getting vgic maintenance irq from DT\n");
246 ret = -ENXIO;
247 goto out;
248 }
249 241
250 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); 242 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
251 243
@@ -256,24 +248,19 @@ int vgic_v3_probe(struct device_node *vgic_node,
256 vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; 248 vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
257 vgic->can_emulate_gicv2 = false; 249 vgic->can_emulate_gicv2 = false;
258 250
259 if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx)) 251 if (!vcpu_res->start) {
260 gicv_idx = 1;
261
262 gicv_idx += 3; /* Also skip GICD, GICC, GICH */
263 if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
264 kvm_info("GICv3: no GICV resource entry\n"); 252 kvm_info("GICv3: no GICV resource entry\n");
265 vgic->vcpu_base = 0; 253 vgic->vcpu_base = 0;
266 } else if (!PAGE_ALIGNED(vcpu_res.start)) { 254 } else if (!PAGE_ALIGNED(vcpu_res->start)) {
267 pr_warn("GICV physical address 0x%llx not page aligned\n", 255 pr_warn("GICV physical address 0x%llx not page aligned\n",
268 (unsigned long long)vcpu_res.start); 256 (unsigned long long)vcpu_res->start);
269 vgic->vcpu_base = 0; 257 vgic->vcpu_base = 0;
270 } else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { 258 } else if (!PAGE_ALIGNED(resource_size(vcpu_res))) {
271 pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", 259 pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
272 (unsigned long long)resource_size(&vcpu_res), 260 (unsigned long long)resource_size(vcpu_res),
273 PAGE_SIZE); 261 PAGE_SIZE);
274 vgic->vcpu_base = 0;
275 } else { 262 } else {
276 vgic->vcpu_base = vcpu_res.start; 263 vgic->vcpu_base = vcpu_res->start;
277 vgic->can_emulate_gicv2 = true; 264 vgic->can_emulate_gicv2 = true;
278 kvm_register_device_ops(&kvm_arm_vgic_v2_ops, 265 kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
279 KVM_DEV_TYPE_ARM_VGIC_V2); 266 KVM_DEV_TYPE_ARM_VGIC_V2);
@@ -286,15 +273,13 @@ int vgic_v3_probe(struct device_node *vgic_node,
286 vgic->type = VGIC_V3; 273 vgic->type = VGIC_V3;
287 vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS; 274 vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS;
288 275
289 kvm_info("%s@%llx IRQ%d\n", vgic_node->name, 276 kvm_info("GICV base=0x%llx, IRQ=%d\n",
290 vcpu_res.start, vgic->maint_irq); 277 vgic->vcpu_base, vgic->maint_irq);
291 278
292 on_each_cpu(vgic_cpu_init_lrs, vgic, 1); 279 on_each_cpu(vgic_cpu_init_lrs, vgic, 1);
293 280
294 *ops = &vgic_v3_ops; 281 *ops = &vgic_v3_ops;
295 *params = vgic; 282 *params = vgic;
296 283
297out:
298 of_node_put(vgic_node);
299 return ret; 284 return ret;
300} 285}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 00429b392c61..60668a7f319a 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -21,9 +21,7 @@
21#include <linux/kvm_host.h> 21#include <linux/kvm_host.h>
22#include <linux/interrupt.h> 22#include <linux/interrupt.h>
23#include <linux/io.h> 23#include <linux/io.h>
24#include <linux/of.h> 24#include <linux/irq.h>
25#include <linux/of_address.h>
26#include <linux/of_irq.h>
27#include <linux/rculist.h> 25#include <linux/rculist.h>
28#include <linux/uaccess.h> 26#include <linux/uaccess.h>
29 27
@@ -33,6 +31,7 @@
33#include <trace/events/kvm.h> 31#include <trace/events/kvm.h>
34#include <asm/kvm.h> 32#include <asm/kvm.h>
35#include <kvm/iodev.h> 33#include <kvm/iodev.h>
34#include <linux/irqchip/arm-gic-common.h>
36 35
37#define CREATE_TRACE_POINTS 36#define CREATE_TRACE_POINTS
38#include "trace.h" 37#include "trace.h"
@@ -2389,33 +2388,38 @@ static struct notifier_block vgic_cpu_nb = {
2389 .notifier_call = vgic_cpu_notify, 2388 .notifier_call = vgic_cpu_notify,
2390}; 2389};
2391 2390
2392static const struct of_device_id vgic_ids[] = { 2391static int kvm_vgic_probe(void)
2393 { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
2394 { .compatible = "arm,cortex-a7-gic", .data = vgic_v2_probe, },
2395 { .compatible = "arm,gic-400", .data = vgic_v2_probe, },
2396 { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
2397 {},
2398};
2399
2400int kvm_vgic_hyp_init(void)
2401{ 2392{
2402 const struct of_device_id *matched_id; 2393 const struct gic_kvm_info *gic_kvm_info;
2403 const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
2404 const struct vgic_params **);
2405 struct device_node *vgic_node;
2406 int ret; 2394 int ret;
2407 2395
2408 vgic_node = of_find_matching_node_and_match(NULL, 2396 gic_kvm_info = gic_get_kvm_info();
2409 vgic_ids, &matched_id); 2397 if (!gic_kvm_info)
2410 if (!vgic_node) {
2411 kvm_err("error: no compatible GIC node found\n");
2412 return -ENODEV; 2398 return -ENODEV;
2399
2400 switch (gic_kvm_info->type) {
2401 case GIC_V2:
2402 ret = vgic_v2_probe(gic_kvm_info, &vgic_ops, &vgic);
2403 break;
2404 case GIC_V3:
2405 ret = vgic_v3_probe(gic_kvm_info, &vgic_ops, &vgic);
2406 break;
2407 default:
2408 ret = -ENODEV;
2413 } 2409 }
2414 2410
2415 vgic_probe = matched_id->data; 2411 return ret;
2416 ret = vgic_probe(vgic_node, &vgic_ops, &vgic); 2412}
2417 if (ret) 2413
2414int kvm_vgic_hyp_init(void)
2415{
2416 int ret;
2417
2418 ret = kvm_vgic_probe();
2419 if (ret) {
2420 kvm_err("error: KVM vGIC probing failed\n");
2418 return ret; 2421 return ret;
2422 }
2419 2423
2420 ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, 2424 ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
2421 "vgic", kvm_get_running_vcpus()); 2425 "vgic", kvm_get_running_vcpus());
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 46dbc0a7dfc1..e469b6012471 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -408,15 +408,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
408 */ 408 */
409 fdput(f); 409 fdput(f);
410#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS 410#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
411 irqfd->consumer.token = (void *)irqfd->eventfd; 411 if (kvm_arch_has_irq_bypass()) {
412 irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; 412 irqfd->consumer.token = (void *)irqfd->eventfd;
413 irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer; 413 irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
414 irqfd->consumer.stop = kvm_arch_irq_bypass_stop; 414 irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
415 irqfd->consumer.start = kvm_arch_irq_bypass_start; 415 irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
416 ret = irq_bypass_register_consumer(&irqfd->consumer); 416 irqfd->consumer.start = kvm_arch_irq_bypass_start;
417 if (ret) 417 ret = irq_bypass_register_consumer(&irqfd->consumer);
418 pr_info("irq bypass consumer (token %p) registration fails: %d\n", 418 if (ret)
419 pr_info("irq bypass consumer (token %p) registration fails: %d\n",
419 irqfd->consumer.token, ret); 420 irqfd->consumer.token, ret);
421 }
420#endif 422#endif
421 423
422 return 0; 424 return 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4fd482fb9260..dd4ac9d9e8f5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2028,6 +2028,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
2028 */ 2028 */
2029 if (kvm_vcpu_check_block(vcpu) < 0) { 2029 if (kvm_vcpu_check_block(vcpu) < 0) {
2030 ++vcpu->stat.halt_successful_poll; 2030 ++vcpu->stat.halt_successful_poll;
2031 if (!vcpu_valid_wakeup(vcpu))
2032 ++vcpu->stat.halt_poll_invalid;
2031 goto out; 2033 goto out;
2032 } 2034 }
2033 cur = ktime_get(); 2035 cur = ktime_get();
@@ -2053,7 +2055,9 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
2053out: 2055out:
2054 block_ns = ktime_to_ns(cur) - ktime_to_ns(start); 2056 block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
2055 2057
2056 if (halt_poll_ns) { 2058 if (!vcpu_valid_wakeup(vcpu))
2059 shrink_halt_poll_ns(vcpu);
2060 else if (halt_poll_ns) {
2057 if (block_ns <= vcpu->halt_poll_ns) 2061 if (block_ns <= vcpu->halt_poll_ns)
2058 ; 2062 ;
2059 /* we had a long block, shrink polling */ 2063 /* we had a long block, shrink polling */
@@ -2066,18 +2070,14 @@ out:
2066 } else 2070 } else
2067 vcpu->halt_poll_ns = 0; 2071 vcpu->halt_poll_ns = 0;
2068 2072
2069 trace_kvm_vcpu_wakeup(block_ns, waited); 2073 trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
2074 kvm_arch_vcpu_block_finish(vcpu);
2070} 2075}
2071EXPORT_SYMBOL_GPL(kvm_vcpu_block); 2076EXPORT_SYMBOL_GPL(kvm_vcpu_block);
2072 2077
2073#ifndef CONFIG_S390 2078#ifndef CONFIG_S390
2074/* 2079void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
2075 * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode.
2076 */
2077void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
2078{ 2080{
2079 int me;
2080 int cpu = vcpu->cpu;
2081 struct swait_queue_head *wqp; 2081 struct swait_queue_head *wqp;
2082 2082
2083 wqp = kvm_arch_vcpu_wq(vcpu); 2083 wqp = kvm_arch_vcpu_wq(vcpu);
@@ -2086,6 +2086,18 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
2086 ++vcpu->stat.halt_wakeup; 2086 ++vcpu->stat.halt_wakeup;
2087 } 2087 }
2088 2088
2089}
2090EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up);
2091
2092/*
2093 * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode.
2094 */
2095void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
2096{
2097 int me;
2098 int cpu = vcpu->cpu;
2099
2100 kvm_vcpu_wake_up(vcpu);
2089 me = get_cpu(); 2101 me = get_cpu();
2090 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) 2102 if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
2091 if (kvm_arch_vcpu_should_kick(vcpu)) 2103 if (kvm_arch_vcpu_should_kick(vcpu))
@@ -2272,7 +2284,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
2272 int r; 2284 int r;
2273 struct kvm_vcpu *vcpu; 2285 struct kvm_vcpu *vcpu;
2274 2286
2275 if (id >= KVM_MAX_VCPUS) 2287 if (id >= KVM_MAX_VCPU_ID)
2276 return -EINVAL; 2288 return -EINVAL;
2277 2289
2278 vcpu = kvm_arch_vcpu_create(kvm, id); 2290 vcpu = kvm_arch_vcpu_create(kvm, id);
@@ -2746,6 +2758,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
2746 case KVM_CAP_MULTI_ADDRESS_SPACE: 2758 case KVM_CAP_MULTI_ADDRESS_SPACE:
2747 return KVM_ADDRESS_SPACE_NUM; 2759 return KVM_ADDRESS_SPACE_NUM;
2748#endif 2760#endif
2761 case KVM_CAP_MAX_VCPU_ID:
2762 return KVM_MAX_VCPU_ID;
2749 default: 2763 default:
2750 break; 2764 break;
2751 } 2765 }
diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c
index 09a03b5a21ff..52abac4bb6a2 100644
--- a/virt/lib/irqbypass.c
+++ b/virt/lib/irqbypass.c
@@ -89,6 +89,9 @@ int irq_bypass_register_producer(struct irq_bypass_producer *producer)
89 struct irq_bypass_producer *tmp; 89 struct irq_bypass_producer *tmp;
90 struct irq_bypass_consumer *consumer; 90 struct irq_bypass_consumer *consumer;
91 91
92 if (!producer->token)
93 return -EINVAL;
94
92 might_sleep(); 95 might_sleep();
93 96
94 if (!try_module_get(THIS_MODULE)) 97 if (!try_module_get(THIS_MODULE))
@@ -136,6 +139,9 @@ void irq_bypass_unregister_producer(struct irq_bypass_producer *producer)
136 struct irq_bypass_producer *tmp; 139 struct irq_bypass_producer *tmp;
137 struct irq_bypass_consumer *consumer; 140 struct irq_bypass_consumer *consumer;
138 141
142 if (!producer->token)
143 return;
144
139 might_sleep(); 145 might_sleep();
140 146
141 if (!try_module_get(THIS_MODULE)) 147 if (!try_module_get(THIS_MODULE))
@@ -177,7 +183,8 @@ int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer)
177 struct irq_bypass_consumer *tmp; 183 struct irq_bypass_consumer *tmp;
178 struct irq_bypass_producer *producer; 184 struct irq_bypass_producer *producer;
179 185
180 if (!consumer->add_producer || !consumer->del_producer) 186 if (!consumer->token ||
187 !consumer->add_producer || !consumer->del_producer)
181 return -EINVAL; 188 return -EINVAL;
182 189
183 might_sleep(); 190 might_sleep();
@@ -227,6 +234,9 @@ void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer)
227 struct irq_bypass_consumer *tmp; 234 struct irq_bypass_consumer *tmp;
228 struct irq_bypass_producer *producer; 235 struct irq_bypass_producer *producer;
229 236
237 if (!consumer->token)
238 return;
239
230 might_sleep(); 240 might_sleep();
231 241
232 if (!try_module_get(THIS_MODULE)) 242 if (!try_module_get(THIS_MODULE))