aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/kvm_arm.h4
-rw-r--r--arch/arm/include/asm/kvm_asm.h4
-rw-r--r--arch/arm/include/asm/kvm_host.h9
-rw-r--r--arch/arm/include/asm/kvm_mmu.h30
-rw-r--r--arch/arm/kernel/asm-offsets.c1
-rw-r--r--arch/arm/kvm/coproc.c84
-rw-r--r--arch/arm/kvm/coproc.h14
-rw-r--r--arch/arm/kvm/coproc_a15.c2
-rw-r--r--arch/arm/kvm/coproc_a7.c2
-rw-r--r--arch/arm/kvm/guest.c1
-rw-r--r--arch/arm/kvm/interrupts_head.S21
-rw-r--r--arch/arm/kvm/mmu.c110
-rw-r--r--arch/arm64/include/asm/kvm_arm.h3
-rw-r--r--arch/arm64/include/asm/kvm_asm.h3
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h22
-rw-r--r--arch/arm64/kvm/sys_regs.c99
-rw-r--r--arch/arm64/kvm/sys_regs.h2
-rw-r--r--arch/ia64/kvm/kvm-ia64.c1
-rw-r--r--arch/mips/include/asm/kvm_host.h417
-rw-r--r--arch/mips/kvm/kvm_mips_emul.c40
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h5
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h12
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h2
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h2
-rw-r--r--arch/powerpc/include/asm/reg.h1
-rw-r--r--arch/powerpc/include/asm/tm.h4
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c9
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c28
-rw-r--r--arch/powerpc/kvm/book3s_hv.c159
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S22
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S187
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c7
-rw-r--r--arch/s390/include/asm/irq.h1
-rw-r--r--arch/s390/include/asm/kvm_host.h98
-rw-r--r--arch/s390/include/asm/pgtable.h2
-rw-r--r--arch/s390/include/asm/processor.h1
-rw-r--r--arch/s390/include/uapi/asm/kvm.h43
-rw-r--r--arch/s390/kernel/irq.c1
-rw-r--r--arch/s390/kvm/Kconfig4
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/diag.c84
-rw-r--r--arch/s390/kvm/interrupt.c704
-rw-r--r--arch/s390/kvm/irq.h22
-rw-r--r--arch/s390/kvm/kvm-s390.c212
-rw-r--r--arch/s390/kvm/kvm-s390.h7
-rw-r--r--arch/s390/kvm/priv.c7
-rw-r--r--arch/s390/kvm/sigp.c157
-rw-r--r--arch/s390/kvm/trace.h46
-rw-r--r--arch/s390/mm/fault.c26
-rw-r--r--arch/x86/include/asm/kvm_host.h18
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/include/asm/xsave.h2
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/kvm.c1
-rw-r--r--arch/x86/kernel/kvmclock.c2
-rw-r--r--arch/x86/kvm/cpuid.c37
-rw-r--r--arch/x86/kvm/emulate.c8
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/paging_tmpl.h7
-rw-r--r--arch/x86/kvm/svm.c84
-rw-r--r--arch/x86/kvm/vmx.c334
-rw-r--r--arch/x86/kvm/x86.c145
-rw-r--r--arch/x86/kvm/x86.h5
64 files changed, 2619 insertions, 761 deletions
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 1d3153c7eb41..816db0bf2dd8 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -55,6 +55,7 @@
55 * The bits we set in HCR: 55 * The bits we set in HCR:
56 * TAC: Trap ACTLR 56 * TAC: Trap ACTLR
57 * TSC: Trap SMC 57 * TSC: Trap SMC
58 * TVM: Trap VM ops (until MMU and caches are on)
58 * TSW: Trap cache operations by set/way 59 * TSW: Trap cache operations by set/way
59 * TWI: Trap WFI 60 * TWI: Trap WFI
60 * TWE: Trap WFE 61 * TWE: Trap WFE
@@ -68,8 +69,7 @@
68 */ 69 */
69#define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \ 70#define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
70 HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \ 71 HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
71 HCR_TWE | HCR_SWIO | HCR_TIDCP) 72 HCR_TVM | HCR_TWE | HCR_SWIO | HCR_TIDCP)
72#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
73 73
74/* System Control Register (SCTLR) bits */ 74/* System Control Register (SCTLR) bits */
75#define SCTLR_TE (1 << 30) 75#define SCTLR_TE (1 << 30)
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 661da11f76f4..53b3c4a50d5c 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -48,7 +48,9 @@
48#define c13_TID_URO 26 /* Thread ID, User R/O */ 48#define c13_TID_URO 26 /* Thread ID, User R/O */
49#define c13_TID_PRIV 27 /* Thread ID, Privileged */ 49#define c13_TID_PRIV 27 /* Thread ID, Privileged */
50#define c14_CNTKCTL 28 /* Timer Control Register (PL1) */ 50#define c14_CNTKCTL 28 /* Timer Control Register (PL1) */
51#define NR_CP15_REGS 29 /* Number of regs (incl. invalid) */ 51#define c10_AMAIR0 29 /* Auxilary Memory Attribute Indirection Reg0 */
52#define c10_AMAIR1 30 /* Auxilary Memory Attribute Indirection Reg1 */
53#define NR_CP15_REGS 31 /* Number of regs (incl. invalid) */
52 54
53#define ARM_EXCEPTION_RESET 0 55#define ARM_EXCEPTION_RESET 0
54#define ARM_EXCEPTION_UNDEFINED 1 56#define ARM_EXCEPTION_UNDEFINED 1
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 098f7dd6d564..09af14999c9b 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -101,6 +101,12 @@ struct kvm_vcpu_arch {
101 /* The CPU type we expose to the VM */ 101 /* The CPU type we expose to the VM */
102 u32 midr; 102 u32 midr;
103 103
104 /* HYP trapping configuration */
105 u32 hcr;
106
107 /* Interrupt related fields */
108 u32 irq_lines; /* IRQ and FIQ levels */
109
104 /* Exception Information */ 110 /* Exception Information */
105 struct kvm_vcpu_fault_info fault; 111 struct kvm_vcpu_fault_info fault;
106 112
@@ -128,9 +134,6 @@ struct kvm_vcpu_arch {
128 /* IO related fields */ 134 /* IO related fields */
129 struct kvm_decode mmio_decode; 135 struct kvm_decode mmio_decode;
130 136
131 /* Interrupt related fields */
132 u32 irq_lines; /* IRQ and FIQ levels */
133
134 /* Cache some mmu pages needed inside spinlock regions */ 137 /* Cache some mmu pages needed inside spinlock regions */
135 struct kvm_mmu_memory_cache mmu_page_cache; 138 struct kvm_mmu_memory_cache mmu_page_cache;
136 139
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 2d122adcdb22..5c7aa3c1519f 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -114,11 +114,34 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
114 pmd_val(*pmd) |= L_PMD_S2_RDWR; 114 pmd_val(*pmd) |= L_PMD_S2_RDWR;
115} 115}
116 116
117/* Open coded p*d_addr_end that can deal with 64bit addresses */
118#define kvm_pgd_addr_end(addr, end) \
119({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
120 (__boundary - 1 < (end) - 1)? __boundary: (end); \
121})
122
123#define kvm_pud_addr_end(addr,end) (end)
124
125#define kvm_pmd_addr_end(addr, end) \
126({ u64 __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
127 (__boundary - 1 < (end) - 1)? __boundary: (end); \
128})
129
117struct kvm; 130struct kvm;
118 131
119static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, 132#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l))
120 unsigned long size) 133
134static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
121{ 135{
136 return (vcpu->arch.cp15[c1_SCTLR] & 0b101) == 0b101;
137}
138
139static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
140 unsigned long size)
141{
142 if (!vcpu_has_cache_enabled(vcpu))
143 kvm_flush_dcache_to_poc((void *)hva, size);
144
122 /* 145 /*
123 * If we are going to insert an instruction page and the icache is 146 * If we are going to insert an instruction page and the icache is
124 * either VIPT or PIPT, there is a potential problem where the host 147 * either VIPT or PIPT, there is a potential problem where the host
@@ -139,9 +162,10 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
139 } 162 }
140} 163}
141 164
142#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l))
143#define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) 165#define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x))
144 166
167void stage2_flush_vm(struct kvm *kvm);
168
145#endif /* !__ASSEMBLY__ */ 169#endif /* !__ASSEMBLY__ */
146 170
147#endif /* __ARM_KVM_MMU_H__ */ 171#endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index ded041711beb..85598b5d1efd 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -174,6 +174,7 @@ int main(void)
174 DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs)); 174 DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
175 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc)); 175 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc));
176 DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr)); 176 DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr));
177 DEFINE(VCPU_HCR, offsetof(struct kvm_vcpu, arch.hcr));
177 DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); 178 DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
178 DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.fault.hsr)); 179 DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.fault.hsr));
179 DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar)); 180 DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar));
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 78c0885d6501..c58a35116f63 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -23,6 +23,7 @@
23#include <asm/kvm_host.h> 23#include <asm/kvm_host.h>
24#include <asm/kvm_emulate.h> 24#include <asm/kvm_emulate.h>
25#include <asm/kvm_coproc.h> 25#include <asm/kvm_coproc.h>
26#include <asm/kvm_mmu.h>
26#include <asm/cacheflush.h> 27#include <asm/cacheflush.h>
27#include <asm/cputype.h> 28#include <asm/cputype.h>
28#include <trace/events/kvm.h> 29#include <trace/events/kvm.h>
@@ -205,6 +206,44 @@ done:
205} 206}
206 207
207/* 208/*
209 * Generic accessor for VM registers. Only called as long as HCR_TVM
210 * is set.
211 */
212static bool access_vm_reg(struct kvm_vcpu *vcpu,
213 const struct coproc_params *p,
214 const struct coproc_reg *r)
215{
216 BUG_ON(!p->is_write);
217
218 vcpu->arch.cp15[r->reg] = *vcpu_reg(vcpu, p->Rt1);
219 if (p->is_64bit)
220 vcpu->arch.cp15[r->reg + 1] = *vcpu_reg(vcpu, p->Rt2);
221
222 return true;
223}
224
225/*
226 * SCTLR accessor. Only called as long as HCR_TVM is set. If the
227 * guest enables the MMU, we stop trapping the VM sys_regs and leave
228 * it in complete control of the caches.
229 *
230 * Used by the cpu-specific code.
231 */
232bool access_sctlr(struct kvm_vcpu *vcpu,
233 const struct coproc_params *p,
234 const struct coproc_reg *r)
235{
236 access_vm_reg(vcpu, p, r);
237
238 if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */
239 vcpu->arch.hcr &= ~HCR_TVM;
240 stage2_flush_vm(vcpu->kvm);
241 }
242
243 return true;
244}
245
246/*
208 * We could trap ID_DFR0 and tell the guest we don't support performance 247 * We could trap ID_DFR0 and tell the guest we don't support performance
209 * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was 248 * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was
210 * NAKed, so it will read the PMCR anyway. 249 * NAKed, so it will read the PMCR anyway.
@@ -261,33 +300,36 @@ static const struct coproc_reg cp15_regs[] = {
261 { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32, 300 { CRn( 1), CRm( 0), Op1( 0), Op2( 2), is32,
262 NULL, reset_val, c1_CPACR, 0x00000000 }, 301 NULL, reset_val, c1_CPACR, 0x00000000 },
263 302
264 /* TTBR0/TTBR1: swapped by interrupt.S. */ 303 /* TTBR0/TTBR1/TTBCR: swapped by interrupt.S. */
265 { CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 }, 304 { CRm64( 2), Op1( 0), is64, access_vm_reg, reset_unknown64, c2_TTBR0 },
266 { CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 }, 305 { CRn(2), CRm( 0), Op1( 0), Op2( 0), is32,
267 306 access_vm_reg, reset_unknown, c2_TTBR0 },
268 /* TTBCR: swapped by interrupt.S. */ 307 { CRn(2), CRm( 0), Op1( 0), Op2( 1), is32,
308 access_vm_reg, reset_unknown, c2_TTBR1 },
269 { CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32, 309 { CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32,
270 NULL, reset_val, c2_TTBCR, 0x00000000 }, 310 access_vm_reg, reset_val, c2_TTBCR, 0x00000000 },
311 { CRm64( 2), Op1( 1), is64, access_vm_reg, reset_unknown64, c2_TTBR1 },
312
271 313
272 /* DACR: swapped by interrupt.S. */ 314 /* DACR: swapped by interrupt.S. */
273 { CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32, 315 { CRn( 3), CRm( 0), Op1( 0), Op2( 0), is32,
274 NULL, reset_unknown, c3_DACR }, 316 access_vm_reg, reset_unknown, c3_DACR },
275 317
276 /* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */ 318 /* DFSR/IFSR/ADFSR/AIFSR: swapped by interrupt.S. */
277 { CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32, 319 { CRn( 5), CRm( 0), Op1( 0), Op2( 0), is32,
278 NULL, reset_unknown, c5_DFSR }, 320 access_vm_reg, reset_unknown, c5_DFSR },
279 { CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32, 321 { CRn( 5), CRm( 0), Op1( 0), Op2( 1), is32,
280 NULL, reset_unknown, c5_IFSR }, 322 access_vm_reg, reset_unknown, c5_IFSR },
281 { CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32, 323 { CRn( 5), CRm( 1), Op1( 0), Op2( 0), is32,
282 NULL, reset_unknown, c5_ADFSR }, 324 access_vm_reg, reset_unknown, c5_ADFSR },
283 { CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32, 325 { CRn( 5), CRm( 1), Op1( 0), Op2( 1), is32,
284 NULL, reset_unknown, c5_AIFSR }, 326 access_vm_reg, reset_unknown, c5_AIFSR },
285 327
286 /* DFAR/IFAR: swapped by interrupt.S. */ 328 /* DFAR/IFAR: swapped by interrupt.S. */
287 { CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32, 329 { CRn( 6), CRm( 0), Op1( 0), Op2( 0), is32,
288 NULL, reset_unknown, c6_DFAR }, 330 access_vm_reg, reset_unknown, c6_DFAR },
289 { CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32, 331 { CRn( 6), CRm( 0), Op1( 0), Op2( 2), is32,
290 NULL, reset_unknown, c6_IFAR }, 332 access_vm_reg, reset_unknown, c6_IFAR },
291 333
292 /* PAR swapped by interrupt.S */ 334 /* PAR swapped by interrupt.S */
293 { CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR }, 335 { CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
@@ -324,9 +366,15 @@ static const struct coproc_reg cp15_regs[] = {
324 366
325 /* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */ 367 /* PRRR/NMRR (aka MAIR0/MAIR1): swapped by interrupt.S. */
326 { CRn(10), CRm( 2), Op1( 0), Op2( 0), is32, 368 { CRn(10), CRm( 2), Op1( 0), Op2( 0), is32,
327 NULL, reset_unknown, c10_PRRR}, 369 access_vm_reg, reset_unknown, c10_PRRR},
328 { CRn(10), CRm( 2), Op1( 0), Op2( 1), is32, 370 { CRn(10), CRm( 2), Op1( 0), Op2( 1), is32,
329 NULL, reset_unknown, c10_NMRR}, 371 access_vm_reg, reset_unknown, c10_NMRR},
372
373 /* AMAIR0/AMAIR1: swapped by interrupt.S. */
374 { CRn(10), CRm( 3), Op1( 0), Op2( 0), is32,
375 access_vm_reg, reset_unknown, c10_AMAIR0},
376 { CRn(10), CRm( 3), Op1( 0), Op2( 1), is32,
377 access_vm_reg, reset_unknown, c10_AMAIR1},
330 378
331 /* VBAR: swapped by interrupt.S. */ 379 /* VBAR: swapped by interrupt.S. */
332 { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32, 380 { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
@@ -334,7 +382,7 @@ static const struct coproc_reg cp15_regs[] = {
334 382
335 /* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */ 383 /* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */
336 { CRn(13), CRm( 0), Op1( 0), Op2( 1), is32, 384 { CRn(13), CRm( 0), Op1( 0), Op2( 1), is32,
337 NULL, reset_val, c13_CID, 0x00000000 }, 385 access_vm_reg, reset_val, c13_CID, 0x00000000 },
338 { CRn(13), CRm( 0), Op1( 0), Op2( 2), is32, 386 { CRn(13), CRm( 0), Op1( 0), Op2( 2), is32,
339 NULL, reset_unknown, c13_TID_URW }, 387 NULL, reset_unknown, c13_TID_URW },
340 { CRn(13), CRm( 0), Op1( 0), Op2( 3), is32, 388 { CRn(13), CRm( 0), Op1( 0), Op2( 3), is32,
@@ -443,7 +491,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
443{ 491{
444 struct coproc_params params; 492 struct coproc_params params;
445 493
446 params.CRm = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf; 494 params.CRn = (kvm_vcpu_get_hsr(vcpu) >> 1) & 0xf;
447 params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf; 495 params.Rt1 = (kvm_vcpu_get_hsr(vcpu) >> 5) & 0xf;
448 params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0); 496 params.is_write = ((kvm_vcpu_get_hsr(vcpu) & 1) == 0);
449 params.is_64bit = true; 497 params.is_64bit = true;
@@ -451,7 +499,7 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
451 params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf; 499 params.Op1 = (kvm_vcpu_get_hsr(vcpu) >> 16) & 0xf;
452 params.Op2 = 0; 500 params.Op2 = 0;
453 params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf; 501 params.Rt2 = (kvm_vcpu_get_hsr(vcpu) >> 10) & 0xf;
454 params.CRn = 0; 502 params.CRm = 0;
455 503
456 return emulate_cp15(vcpu, &params); 504 return emulate_cp15(vcpu, &params);
457} 505}
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index 0461d5c8d3de..1a44bbe39643 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -58,8 +58,8 @@ static inline void print_cp_instr(const struct coproc_params *p)
58{ 58{
59 /* Look, we even formatted it for you to paste into the table! */ 59 /* Look, we even formatted it for you to paste into the table! */
60 if (p->is_64bit) { 60 if (p->is_64bit) {
61 kvm_pr_unimpl(" { CRm(%2lu), Op1(%2lu), is64, func_%s },\n", 61 kvm_pr_unimpl(" { CRm64(%2lu), Op1(%2lu), is64, func_%s },\n",
62 p->CRm, p->Op1, p->is_write ? "write" : "read"); 62 p->CRn, p->Op1, p->is_write ? "write" : "read");
63 } else { 63 } else {
64 kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32," 64 kvm_pr_unimpl(" { CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32,"
65 " func_%s },\n", 65 " func_%s },\n",
@@ -135,13 +135,13 @@ static inline int cmp_reg(const struct coproc_reg *i1,
135 return -1; 135 return -1;
136 if (i1->CRn != i2->CRn) 136 if (i1->CRn != i2->CRn)
137 return i1->CRn - i2->CRn; 137 return i1->CRn - i2->CRn;
138 if (i1->is_64 != i2->is_64)
139 return i2->is_64 - i1->is_64;
140 if (i1->CRm != i2->CRm) 138 if (i1->CRm != i2->CRm)
141 return i1->CRm - i2->CRm; 139 return i1->CRm - i2->CRm;
142 if (i1->Op1 != i2->Op1) 140 if (i1->Op1 != i2->Op1)
143 return i1->Op1 - i2->Op1; 141 return i1->Op1 - i2->Op1;
144 return i1->Op2 - i2->Op2; 142 if (i1->Op2 != i2->Op2)
143 return i1->Op2 - i2->Op2;
144 return i2->is_64 - i1->is_64;
145} 145}
146 146
147 147
@@ -153,4 +153,8 @@ static inline int cmp_reg(const struct coproc_reg *i1,
153#define is64 .is_64 = true 153#define is64 .is_64 = true
154#define is32 .is_64 = false 154#define is32 .is_64 = false
155 155
156bool access_sctlr(struct kvm_vcpu *vcpu,
157 const struct coproc_params *p,
158 const struct coproc_reg *r);
159
156#endif /* __ARM_KVM_COPROC_LOCAL_H__ */ 160#endif /* __ARM_KVM_COPROC_LOCAL_H__ */
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index bb0cac1410cc..e6f4ae48bda9 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -34,7 +34,7 @@
34static const struct coproc_reg a15_regs[] = { 34static const struct coproc_reg a15_regs[] = {
35 /* SCTLR: swapped by interrupt.S. */ 35 /* SCTLR: swapped by interrupt.S. */
36 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, 36 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
37 NULL, reset_val, c1_SCTLR, 0x00C50078 }, 37 access_sctlr, reset_val, c1_SCTLR, 0x00C50078 },
38}; 38};
39 39
40static struct kvm_coproc_target_table a15_target_table = { 40static struct kvm_coproc_target_table a15_target_table = {
diff --git a/arch/arm/kvm/coproc_a7.c b/arch/arm/kvm/coproc_a7.c
index 1df767331588..17fc7cd479d3 100644
--- a/arch/arm/kvm/coproc_a7.c
+++ b/arch/arm/kvm/coproc_a7.c
@@ -37,7 +37,7 @@
37static const struct coproc_reg a7_regs[] = { 37static const struct coproc_reg a7_regs[] = {
38 /* SCTLR: swapped by interrupt.S. */ 38 /* SCTLR: swapped by interrupt.S. */
39 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32, 39 { CRn( 1), CRm( 0), Op1( 0), Op2( 0), is32,
40 NULL, reset_val, c1_SCTLR, 0x00C50878 }, 40 access_sctlr, reset_val, c1_SCTLR, 0x00C50878 },
41}; 41};
42 42
43static struct kvm_coproc_target_table a7_target_table = { 43static struct kvm_coproc_target_table a7_target_table = {
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 2786eae10c0d..b23a59c1c522 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,6 +38,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
38 38
39int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 39int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
40{ 40{
41 vcpu->arch.hcr = HCR_GUEST_MASK;
41 return 0; 42 return 0;
42} 43}
43 44
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 6f18695a09cb..76af93025574 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -303,13 +303,17 @@ vcpu .req r0 @ vcpu pointer always in r0
303 303
304 mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL 304 mrc p15, 0, r2, c14, c1, 0 @ CNTKCTL
305 mrrc p15, 0, r4, r5, c7 @ PAR 305 mrrc p15, 0, r4, r5, c7 @ PAR
306 mrc p15, 0, r6, c10, c3, 0 @ AMAIR0
307 mrc p15, 0, r7, c10, c3, 1 @ AMAIR1
306 308
307 .if \store_to_vcpu == 0 309 .if \store_to_vcpu == 0
308 push {r2,r4-r5} 310 push {r2,r4-r7}
309 .else 311 .else
310 str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] 312 str r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
311 add r12, vcpu, #CP15_OFFSET(c7_PAR) 313 add r12, vcpu, #CP15_OFFSET(c7_PAR)
312 strd r4, r5, [r12] 314 strd r4, r5, [r12]
315 str r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
316 str r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
313 .endif 317 .endif
314.endm 318.endm
315 319
@@ -322,15 +326,19 @@ vcpu .req r0 @ vcpu pointer always in r0
322 */ 326 */
323.macro write_cp15_state read_from_vcpu 327.macro write_cp15_state read_from_vcpu
324 .if \read_from_vcpu == 0 328 .if \read_from_vcpu == 0
325 pop {r2,r4-r5} 329 pop {r2,r4-r7}
326 .else 330 .else
327 ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)] 331 ldr r2, [vcpu, #CP15_OFFSET(c14_CNTKCTL)]
328 add r12, vcpu, #CP15_OFFSET(c7_PAR) 332 add r12, vcpu, #CP15_OFFSET(c7_PAR)
329 ldrd r4, r5, [r12] 333 ldrd r4, r5, [r12]
334 ldr r6, [vcpu, #CP15_OFFSET(c10_AMAIR0)]
335 ldr r7, [vcpu, #CP15_OFFSET(c10_AMAIR1)]
330 .endif 336 .endif
331 337
332 mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL 338 mcr p15, 0, r2, c14, c1, 0 @ CNTKCTL
333 mcrr p15, 0, r4, r5, c7 @ PAR 339 mcrr p15, 0, r4, r5, c7 @ PAR
340 mcr p15, 0, r6, c10, c3, 0 @ AMAIR0
341 mcr p15, 0, r7, c10, c3, 1 @ AMAIR1
334 342
335 .if \read_from_vcpu == 0 343 .if \read_from_vcpu == 0
336 pop {r2-r12} 344 pop {r2-r12}
@@ -597,17 +605,14 @@ vcpu .req r0 @ vcpu pointer always in r0
597 605
598/* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */ 606/* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */
599.macro configure_hyp_role operation 607.macro configure_hyp_role operation
600 mrc p15, 4, r2, c1, c1, 0 @ HCR
601 bic r2, r2, #HCR_VIRT_EXCP_MASK
602 ldr r3, =HCR_GUEST_MASK
603 .if \operation == vmentry 608 .if \operation == vmentry
604 orr r2, r2, r3 609 ldr r2, [vcpu, #VCPU_HCR]
605 ldr r3, [vcpu, #VCPU_IRQ_LINES] 610 ldr r3, [vcpu, #VCPU_IRQ_LINES]
606 orr r2, r2, r3 611 orr r2, r2, r3
607 .else 612 .else
608 bic r2, r2, r3 613 mov r2, #0
609 .endif 614 .endif
610 mcr p15, 4, r2, c1, c1, 0 615 mcr p15, 4, r2, c1, c1, 0 @ HCR
611.endm 616.endm
612 617
613.macro load_vcpu 618.macro load_vcpu
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 7789857d1470..80bb1e6c2c29 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -144,8 +144,9 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
144 while (addr < end) { 144 while (addr < end) {
145 pgd = pgdp + pgd_index(addr); 145 pgd = pgdp + pgd_index(addr);
146 pud = pud_offset(pgd, addr); 146 pud = pud_offset(pgd, addr);
147 pte = NULL;
147 if (pud_none(*pud)) { 148 if (pud_none(*pud)) {
148 addr = pud_addr_end(addr, end); 149 addr = kvm_pud_addr_end(addr, end);
149 continue; 150 continue;
150 } 151 }
151 152
@@ -155,13 +156,13 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
155 * move on. 156 * move on.
156 */ 157 */
157 clear_pud_entry(kvm, pud, addr); 158 clear_pud_entry(kvm, pud, addr);
158 addr = pud_addr_end(addr, end); 159 addr = kvm_pud_addr_end(addr, end);
159 continue; 160 continue;
160 } 161 }
161 162
162 pmd = pmd_offset(pud, addr); 163 pmd = pmd_offset(pud, addr);
163 if (pmd_none(*pmd)) { 164 if (pmd_none(*pmd)) {
164 addr = pmd_addr_end(addr, end); 165 addr = kvm_pmd_addr_end(addr, end);
165 continue; 166 continue;
166 } 167 }
167 168
@@ -174,12 +175,12 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
174 /* 175 /*
175 * If the pmd entry is to be cleared, walk back up the ladder 176 * If the pmd entry is to be cleared, walk back up the ladder
176 */ 177 */
177 if (kvm_pmd_huge(*pmd) || page_empty(pte)) { 178 if (kvm_pmd_huge(*pmd) || (pte && page_empty(pte))) {
178 clear_pmd_entry(kvm, pmd, addr); 179 clear_pmd_entry(kvm, pmd, addr);
179 next = pmd_addr_end(addr, end); 180 next = kvm_pmd_addr_end(addr, end);
180 if (page_empty(pmd) && !page_empty(pud)) { 181 if (page_empty(pmd) && !page_empty(pud)) {
181 clear_pud_entry(kvm, pud, addr); 182 clear_pud_entry(kvm, pud, addr);
182 next = pud_addr_end(addr, end); 183 next = kvm_pud_addr_end(addr, end);
183 } 184 }
184 } 185 }
185 186
@@ -187,6 +188,99 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
187 } 188 }
188} 189}
189 190
191static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd,
192 phys_addr_t addr, phys_addr_t end)
193{
194 pte_t *pte;
195
196 pte = pte_offset_kernel(pmd, addr);
197 do {
198 if (!pte_none(*pte)) {
199 hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
200 kvm_flush_dcache_to_poc((void*)hva, PAGE_SIZE);
201 }
202 } while (pte++, addr += PAGE_SIZE, addr != end);
203}
204
205static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud,
206 phys_addr_t addr, phys_addr_t end)
207{
208 pmd_t *pmd;
209 phys_addr_t next;
210
211 pmd = pmd_offset(pud, addr);
212 do {
213 next = kvm_pmd_addr_end(addr, end);
214 if (!pmd_none(*pmd)) {
215 if (kvm_pmd_huge(*pmd)) {
216 hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
217 kvm_flush_dcache_to_poc((void*)hva, PMD_SIZE);
218 } else {
219 stage2_flush_ptes(kvm, pmd, addr, next);
220 }
221 }
222 } while (pmd++, addr = next, addr != end);
223}
224
225static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd,
226 phys_addr_t addr, phys_addr_t end)
227{
228 pud_t *pud;
229 phys_addr_t next;
230
231 pud = pud_offset(pgd, addr);
232 do {
233 next = kvm_pud_addr_end(addr, end);
234 if (!pud_none(*pud)) {
235 if (pud_huge(*pud)) {
236 hva_t hva = gfn_to_hva(kvm, addr >> PAGE_SHIFT);
237 kvm_flush_dcache_to_poc((void*)hva, PUD_SIZE);
238 } else {
239 stage2_flush_pmds(kvm, pud, addr, next);
240 }
241 }
242 } while (pud++, addr = next, addr != end);
243}
244
245static void stage2_flush_memslot(struct kvm *kvm,
246 struct kvm_memory_slot *memslot)
247{
248 phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
249 phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
250 phys_addr_t next;
251 pgd_t *pgd;
252
253 pgd = kvm->arch.pgd + pgd_index(addr);
254 do {
255 next = kvm_pgd_addr_end(addr, end);
256 stage2_flush_puds(kvm, pgd, addr, next);
257 } while (pgd++, addr = next, addr != end);
258}
259
260/**
261 * stage2_flush_vm - Invalidate cache for pages mapped in stage 2
262 * @kvm: The struct kvm pointer
263 *
264 * Go through the stage 2 page tables and invalidate any cache lines
265 * backing memory already mapped to the VM.
266 */
267void stage2_flush_vm(struct kvm *kvm)
268{
269 struct kvm_memslots *slots;
270 struct kvm_memory_slot *memslot;
271 int idx;
272
273 idx = srcu_read_lock(&kvm->srcu);
274 spin_lock(&kvm->mmu_lock);
275
276 slots = kvm_memslots(kvm);
277 kvm_for_each_memslot(memslot, slots)
278 stage2_flush_memslot(kvm, memslot);
279
280 spin_unlock(&kvm->mmu_lock);
281 srcu_read_unlock(&kvm->srcu, idx);
282}
283
190/** 284/**
191 * free_boot_hyp_pgd - free HYP boot page tables 285 * free_boot_hyp_pgd - free HYP boot page tables
192 * 286 *
@@ -715,7 +809,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
715 kvm_set_s2pmd_writable(&new_pmd); 809 kvm_set_s2pmd_writable(&new_pmd);
716 kvm_set_pfn_dirty(pfn); 810 kvm_set_pfn_dirty(pfn);
717 } 811 }
718 coherent_icache_guest_page(kvm, hva & PMD_MASK, PMD_SIZE); 812 coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
719 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); 813 ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
720 } else { 814 } else {
721 pte_t new_pte = pfn_pte(pfn, PAGE_S2); 815 pte_t new_pte = pfn_pte(pfn, PAGE_S2);
@@ -723,7 +817,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
723 kvm_set_s2pte_writable(&new_pte); 817 kvm_set_s2pte_writable(&new_pte);
724 kvm_set_pfn_dirty(pfn); 818 kvm_set_pfn_dirty(pfn);
725 } 819 }
726 coherent_icache_guest_page(kvm, hva, PAGE_SIZE); 820 coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
727 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false); 821 ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, false);
728 } 822 }
729 823
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 21ef48d32ff2..3d6903006a8a 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -62,6 +62,7 @@
62 * RW: 64bit by default, can be overriden for 32bit VMs 62 * RW: 64bit by default, can be overriden for 32bit VMs
63 * TAC: Trap ACTLR 63 * TAC: Trap ACTLR
64 * TSC: Trap SMC 64 * TSC: Trap SMC
65 * TVM: Trap VM ops (until M+C set in SCTLR_EL1)
65 * TSW: Trap cache operations by set/way 66 * TSW: Trap cache operations by set/way
66 * TWE: Trap WFE 67 * TWE: Trap WFE
67 * TWI: Trap WFI 68 * TWI: Trap WFI
@@ -74,7 +75,7 @@
74 * SWIO: Turn set/way invalidates into set/way clean+invalidate 75 * SWIO: Turn set/way invalidates into set/way clean+invalidate
75 */ 76 */
76#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ 77#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
77 HCR_BSU_IS | HCR_FB | HCR_TAC | \ 78 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
78 HCR_AMO | HCR_IMO | HCR_FMO | \ 79 HCR_AMO | HCR_IMO | HCR_FMO | \
79 HCR_SWIO | HCR_TIDCP | HCR_RW) 80 HCR_SWIO | HCR_TIDCP | HCR_RW)
80#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) 81#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index b25763bc0ec4..9fcd54b1e16d 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -79,7 +79,8 @@
79#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ 79#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */
80#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ 80#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
81#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ 81#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */
82#define c10_AMAIR (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ 82#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */
83#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
83#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ 84#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
84#define NR_CP15_REGS (NR_SYS_REGS * 2) 85#define NR_CP15_REGS (NR_SYS_REGS * 2)
85 86
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7f1f9408ff66..7d29847a893b 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -106,7 +106,6 @@ static inline bool kvm_is_write_fault(unsigned long esr)
106 return true; 106 return true;
107} 107}
108 108
109static inline void kvm_clean_dcache_area(void *addr, size_t size) {}
110static inline void kvm_clean_pgd(pgd_t *pgd) {} 109static inline void kvm_clean_pgd(pgd_t *pgd) {}
111static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} 110static inline void kvm_clean_pmd_entry(pmd_t *pmd) {}
112static inline void kvm_clean_pte(pte_t *pte) {} 111static inline void kvm_clean_pte(pte_t *pte) {}
@@ -122,11 +121,25 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd)
122 pmd_val(*pmd) |= PMD_S2_RDWR; 121 pmd_val(*pmd) |= PMD_S2_RDWR;
123} 122}
124 123
124#define kvm_pgd_addr_end(addr, end) pgd_addr_end(addr, end)
125#define kvm_pud_addr_end(addr, end) pud_addr_end(addr, end)
126#define kvm_pmd_addr_end(addr, end) pmd_addr_end(addr, end)
127
125struct kvm; 128struct kvm;
126 129
127static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, 130#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
128 unsigned long size) 131
132static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
129{ 133{
134 return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
135}
136
137static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
138 unsigned long size)
139{
140 if (!vcpu_has_cache_enabled(vcpu))
141 kvm_flush_dcache_to_poc((void *)hva, size);
142
130 if (!icache_is_aliasing()) { /* PIPT */ 143 if (!icache_is_aliasing()) { /* PIPT */
131 flush_icache_range(hva, hva + size); 144 flush_icache_range(hva, hva + size);
132 } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */ 145 } else if (!icache_is_aivivt()) { /* non ASID-tagged VIVT */
@@ -135,8 +148,9 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
135 } 148 }
136} 149}
137 150
138#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l))
139#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) 151#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x))
140 152
153void stage2_flush_vm(struct kvm *kvm);
154
141#endif /* __ASSEMBLY__ */ 155#endif /* __ASSEMBLY__ */
142#endif /* __ARM64_KVM_MMU_H__ */ 156#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 02e9d09e1d80..03244582bc55 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -27,6 +27,7 @@
27#include <asm/kvm_host.h> 27#include <asm/kvm_host.h>
28#include <asm/kvm_emulate.h> 28#include <asm/kvm_emulate.h>
29#include <asm/kvm_coproc.h> 29#include <asm/kvm_coproc.h>
30#include <asm/kvm_mmu.h>
30#include <asm/cacheflush.h> 31#include <asm/cacheflush.h>
31#include <asm/cputype.h> 32#include <asm/cputype.h>
32#include <trace/events/kvm.h> 33#include <trace/events/kvm.h>
@@ -121,6 +122,48 @@ done:
121} 122}
122 123
123/* 124/*
125 * Generic accessor for VM registers. Only called as long as HCR_TVM
126 * is set.
127 */
128static bool access_vm_reg(struct kvm_vcpu *vcpu,
129 const struct sys_reg_params *p,
130 const struct sys_reg_desc *r)
131{
132 unsigned long val;
133
134 BUG_ON(!p->is_write);
135
136 val = *vcpu_reg(vcpu, p->Rt);
137 if (!p->is_aarch32) {
138 vcpu_sys_reg(vcpu, r->reg) = val;
139 } else {
140 vcpu_cp15(vcpu, r->reg) = val & 0xffffffffUL;
141 if (!p->is_32bit)
142 vcpu_cp15(vcpu, r->reg + 1) = val >> 32;
143 }
144 return true;
145}
146
147/*
148 * SCTLR_EL1 accessor. Only called as long as HCR_TVM is set. If the
149 * guest enables the MMU, we stop trapping the VM sys_regs and leave
150 * it in complete control of the caches.
151 */
152static bool access_sctlr(struct kvm_vcpu *vcpu,
153 const struct sys_reg_params *p,
154 const struct sys_reg_desc *r)
155{
156 access_vm_reg(vcpu, p, r);
157
158 if (vcpu_has_cache_enabled(vcpu)) { /* MMU+Caches enabled? */
159 vcpu->arch.hcr_el2 &= ~HCR_TVM;
160 stage2_flush_vm(vcpu->kvm);
161 }
162
163 return true;
164}
165
166/*
124 * We could trap ID_DFR0 and tell the guest we don't support performance 167 * We could trap ID_DFR0 and tell the guest we don't support performance
125 * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was 168 * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was
126 * NAKed, so it will read the PMCR anyway. 169 * NAKed, so it will read the PMCR anyway.
@@ -185,32 +228,32 @@ static const struct sys_reg_desc sys_reg_descs[] = {
185 NULL, reset_mpidr, MPIDR_EL1 }, 228 NULL, reset_mpidr, MPIDR_EL1 },
186 /* SCTLR_EL1 */ 229 /* SCTLR_EL1 */
187 { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), 230 { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
188 NULL, reset_val, SCTLR_EL1, 0x00C50078 }, 231 access_sctlr, reset_val, SCTLR_EL1, 0x00C50078 },
189 /* CPACR_EL1 */ 232 /* CPACR_EL1 */
190 { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), 233 { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010),
191 NULL, reset_val, CPACR_EL1, 0 }, 234 NULL, reset_val, CPACR_EL1, 0 },
192 /* TTBR0_EL1 */ 235 /* TTBR0_EL1 */
193 { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000), 236 { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000),
194 NULL, reset_unknown, TTBR0_EL1 }, 237 access_vm_reg, reset_unknown, TTBR0_EL1 },
195 /* TTBR1_EL1 */ 238 /* TTBR1_EL1 */
196 { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001), 239 { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001),
197 NULL, reset_unknown, TTBR1_EL1 }, 240 access_vm_reg, reset_unknown, TTBR1_EL1 },
198 /* TCR_EL1 */ 241 /* TCR_EL1 */
199 { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010), 242 { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010),
200 NULL, reset_val, TCR_EL1, 0 }, 243 access_vm_reg, reset_val, TCR_EL1, 0 },
201 244
202 /* AFSR0_EL1 */ 245 /* AFSR0_EL1 */
203 { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000), 246 { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000),
204 NULL, reset_unknown, AFSR0_EL1 }, 247 access_vm_reg, reset_unknown, AFSR0_EL1 },
205 /* AFSR1_EL1 */ 248 /* AFSR1_EL1 */
206 { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001), 249 { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001),
207 NULL, reset_unknown, AFSR1_EL1 }, 250 access_vm_reg, reset_unknown, AFSR1_EL1 },
208 /* ESR_EL1 */ 251 /* ESR_EL1 */
209 { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000), 252 { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000),
210 NULL, reset_unknown, ESR_EL1 }, 253 access_vm_reg, reset_unknown, ESR_EL1 },
211 /* FAR_EL1 */ 254 /* FAR_EL1 */
212 { Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000), 255 { Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
213 NULL, reset_unknown, FAR_EL1 }, 256 access_vm_reg, reset_unknown, FAR_EL1 },
214 /* PAR_EL1 */ 257 /* PAR_EL1 */
215 { Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000), 258 { Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
216 NULL, reset_unknown, PAR_EL1 }, 259 NULL, reset_unknown, PAR_EL1 },
@@ -224,17 +267,17 @@ static const struct sys_reg_desc sys_reg_descs[] = {
224 267
225 /* MAIR_EL1 */ 268 /* MAIR_EL1 */
226 { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), 269 { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000),
227 NULL, reset_unknown, MAIR_EL1 }, 270 access_vm_reg, reset_unknown, MAIR_EL1 },
228 /* AMAIR_EL1 */ 271 /* AMAIR_EL1 */
229 { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000), 272 { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000),
230 NULL, reset_amair_el1, AMAIR_EL1 }, 273 access_vm_reg, reset_amair_el1, AMAIR_EL1 },
231 274
232 /* VBAR_EL1 */ 275 /* VBAR_EL1 */
233 { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000), 276 { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000),
234 NULL, reset_val, VBAR_EL1, 0 }, 277 NULL, reset_val, VBAR_EL1, 0 },
235 /* CONTEXTIDR_EL1 */ 278 /* CONTEXTIDR_EL1 */
236 { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001), 279 { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001),
237 NULL, reset_val, CONTEXTIDR_EL1, 0 }, 280 access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
238 /* TPIDR_EL1 */ 281 /* TPIDR_EL1 */
239 { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100), 282 { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100),
240 NULL, reset_unknown, TPIDR_EL1 }, 283 NULL, reset_unknown, TPIDR_EL1 },
@@ -305,14 +348,32 @@ static const struct sys_reg_desc sys_reg_descs[] = {
305 NULL, reset_val, FPEXC32_EL2, 0x70 }, 348 NULL, reset_val, FPEXC32_EL2, 0x70 },
306}; 349};
307 350
308/* Trapped cp15 registers */ 351/*
352 * Trapped cp15 registers. TTBR0/TTBR1 get a double encoding,
353 * depending on the way they are accessed (as a 32bit or a 64bit
354 * register).
355 */
309static const struct sys_reg_desc cp15_regs[] = { 356static const struct sys_reg_desc cp15_regs[] = {
357 { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
358 { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_sctlr, NULL, c1_SCTLR },
359 { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
360 { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
361 { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR },
362 { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR },
363 { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR },
364 { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR },
365 { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR },
366 { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR },
367 { Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR },
368 { Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR },
369
310 /* 370 /*
311 * DC{C,I,CI}SW operations: 371 * DC{C,I,CI}SW operations:
312 */ 372 */
313 { Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw }, 373 { Op1( 0), CRn( 7), CRm( 6), Op2( 2), access_dcsw },
314 { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw }, 374 { Op1( 0), CRn( 7), CRm(10), Op2( 2), access_dcsw },
315 { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw }, 375 { Op1( 0), CRn( 7), CRm(14), Op2( 2), access_dcsw },
376
316 { Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake }, 377 { Op1( 0), CRn( 9), CRm(12), Op2( 0), pm_fake },
317 { Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake }, 378 { Op1( 0), CRn( 9), CRm(12), Op2( 1), pm_fake },
318 { Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake }, 379 { Op1( 0), CRn( 9), CRm(12), Op2( 2), pm_fake },
@@ -326,6 +387,14 @@ static const struct sys_reg_desc cp15_regs[] = {
326 { Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake }, 387 { Op1( 0), CRn( 9), CRm(14), Op2( 0), pm_fake },
327 { Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake }, 388 { Op1( 0), CRn( 9), CRm(14), Op2( 1), pm_fake },
328 { Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake }, 389 { Op1( 0), CRn( 9), CRm(14), Op2( 2), pm_fake },
390
391 { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR },
392 { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR },
393 { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 },
394 { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
395 { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
396
397 { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
329}; 398};
330 399
331/* Target specific emulation tables */ 400/* Target specific emulation tables */
@@ -437,6 +506,8 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
437 u32 hsr = kvm_vcpu_get_hsr(vcpu); 506 u32 hsr = kvm_vcpu_get_hsr(vcpu);
438 int Rt2 = (hsr >> 10) & 0xf; 507 int Rt2 = (hsr >> 10) & 0xf;
439 508
509 params.is_aarch32 = true;
510 params.is_32bit = false;
440 params.CRm = (hsr >> 1) & 0xf; 511 params.CRm = (hsr >> 1) & 0xf;
441 params.Rt = (hsr >> 5) & 0xf; 512 params.Rt = (hsr >> 5) & 0xf;
442 params.is_write = ((hsr & 1) == 0); 513 params.is_write = ((hsr & 1) == 0);
@@ -480,6 +551,8 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
480 struct sys_reg_params params; 551 struct sys_reg_params params;
481 u32 hsr = kvm_vcpu_get_hsr(vcpu); 552 u32 hsr = kvm_vcpu_get_hsr(vcpu);
482 553
554 params.is_aarch32 = true;
555 params.is_32bit = true;
483 params.CRm = (hsr >> 1) & 0xf; 556 params.CRm = (hsr >> 1) & 0xf;
484 params.Rt = (hsr >> 5) & 0xf; 557 params.Rt = (hsr >> 5) & 0xf;
485 params.is_write = ((hsr & 1) == 0); 558 params.is_write = ((hsr & 1) == 0);
@@ -549,6 +622,8 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run)
549 struct sys_reg_params params; 622 struct sys_reg_params params;
550 unsigned long esr = kvm_vcpu_get_hsr(vcpu); 623 unsigned long esr = kvm_vcpu_get_hsr(vcpu);
551 624
625 params.is_aarch32 = false;
626 params.is_32bit = false;
552 params.Op0 = (esr >> 20) & 3; 627 params.Op0 = (esr >> 20) & 3;
553 params.Op1 = (esr >> 14) & 0x7; 628 params.Op1 = (esr >> 14) & 0x7;
554 params.CRn = (esr >> 10) & 0xf; 629 params.CRn = (esr >> 10) & 0xf;
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index d50d3722998e..d411e251412c 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -30,6 +30,8 @@ struct sys_reg_params {
30 u8 Op2; 30 u8 Op2;
31 u8 Rt; 31 u8 Rt;
32 bool is_write; 32 bool is_write;
33 bool is_aarch32;
34 bool is_32bit; /* Only valid if is_aarch32 is true */
33}; 35};
34 36
35struct sys_reg_desc { 37struct sys_reg_desc {
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 53f44bee9ebb..6a4309bb821a 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -199,6 +199,7 @@ int kvm_dev_ioctl_check_extension(long ext)
199 case KVM_CAP_IRQCHIP: 199 case KVM_CAP_IRQCHIP:
200 case KVM_CAP_MP_STATE: 200 case KVM_CAP_MP_STATE:
201 case KVM_CAP_IRQ_INJECT_STATUS: 201 case KVM_CAP_IRQ_INJECT_STATUS:
202 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
202 r = 1; 203 r = 1;
203 break; 204 break;
204 case KVM_CAP_COALESCED_MMIO: 205 case KVM_CAP_COALESCED_MMIO:
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index a995fce87791..060aaa6348d7 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -30,16 +30,16 @@
30 30
31 31
32/* Special address that contains the comm page, used for reducing # of traps */ 32/* Special address that contains the comm page, used for reducing # of traps */
33#define KVM_GUEST_COMMPAGE_ADDR 0x0 33#define KVM_GUEST_COMMPAGE_ADDR 0x0
34 34
35#define KVM_GUEST_KERNEL_MODE(vcpu) ((kvm_read_c0_guest_status(vcpu->arch.cop0) & (ST0_EXL | ST0_ERL)) || \ 35#define KVM_GUEST_KERNEL_MODE(vcpu) ((kvm_read_c0_guest_status(vcpu->arch.cop0) & (ST0_EXL | ST0_ERL)) || \
36 ((kvm_read_c0_guest_status(vcpu->arch.cop0) & KSU_USER) == 0)) 36 ((kvm_read_c0_guest_status(vcpu->arch.cop0) & KSU_USER) == 0))
37 37
38#define KVM_GUEST_KUSEG 0x00000000UL 38#define KVM_GUEST_KUSEG 0x00000000UL
39#define KVM_GUEST_KSEG0 0x40000000UL 39#define KVM_GUEST_KSEG0 0x40000000UL
40#define KVM_GUEST_KSEG23 0x60000000UL 40#define KVM_GUEST_KSEG23 0x60000000UL
41#define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0x60000000) 41#define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0x60000000)
42#define KVM_GUEST_CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff) 42#define KVM_GUEST_CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff)
43 43
44#define KVM_GUEST_CKSEG0ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG0) 44#define KVM_GUEST_CKSEG0ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG0)
45#define KVM_GUEST_CKSEG1ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG1) 45#define KVM_GUEST_CKSEG1ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG1)
@@ -52,17 +52,17 @@
52#define KVM_GUEST_KSEG1ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG1) 52#define KVM_GUEST_KSEG1ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG1)
53#define KVM_GUEST_KSEG23ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG23) 53#define KVM_GUEST_KSEG23ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG23)
54 54
55#define KVM_INVALID_PAGE 0xdeadbeef 55#define KVM_INVALID_PAGE 0xdeadbeef
56#define KVM_INVALID_INST 0xdeadbeef 56#define KVM_INVALID_INST 0xdeadbeef
57#define KVM_INVALID_ADDR 0xdeadbeef 57#define KVM_INVALID_ADDR 0xdeadbeef
58 58
59#define KVM_MALTA_GUEST_RTC_ADDR 0xb8000070UL 59#define KVM_MALTA_GUEST_RTC_ADDR 0xb8000070UL
60 60
61#define GUEST_TICKS_PER_JIFFY (40000000/HZ) 61#define GUEST_TICKS_PER_JIFFY (40000000/HZ)
62#define MS_TO_NS(x) (x * 1E6L) 62#define MS_TO_NS(x) (x * 1E6L)
63 63
64#define CAUSEB_DC 27 64#define CAUSEB_DC 27
65#define CAUSEF_DC (_ULCAST_(1) << 27) 65#define CAUSEF_DC (_ULCAST_(1) << 27)
66 66
67struct kvm; 67struct kvm;
68struct kvm_run; 68struct kvm_run;
@@ -126,8 +126,8 @@ struct kvm_arch {
126 int commpage_tlb; 126 int commpage_tlb;
127}; 127};
128 128
129#define N_MIPS_COPROC_REGS 32 129#define N_MIPS_COPROC_REGS 32
130#define N_MIPS_COPROC_SEL 8 130#define N_MIPS_COPROC_SEL 8
131 131
132struct mips_coproc { 132struct mips_coproc {
133 unsigned long reg[N_MIPS_COPROC_REGS][N_MIPS_COPROC_SEL]; 133 unsigned long reg[N_MIPS_COPROC_REGS][N_MIPS_COPROC_SEL];
@@ -139,124 +139,124 @@ struct mips_coproc {
139/* 139/*
140 * Coprocessor 0 register names 140 * Coprocessor 0 register names
141 */ 141 */
142#define MIPS_CP0_TLB_INDEX 0 142#define MIPS_CP0_TLB_INDEX 0
143#define MIPS_CP0_TLB_RANDOM 1 143#define MIPS_CP0_TLB_RANDOM 1
144#define MIPS_CP0_TLB_LOW 2 144#define MIPS_CP0_TLB_LOW 2
145#define MIPS_CP0_TLB_LO0 2 145#define MIPS_CP0_TLB_LO0 2
146#define MIPS_CP0_TLB_LO1 3 146#define MIPS_CP0_TLB_LO1 3
147#define MIPS_CP0_TLB_CONTEXT 4 147#define MIPS_CP0_TLB_CONTEXT 4
148#define MIPS_CP0_TLB_PG_MASK 5 148#define MIPS_CP0_TLB_PG_MASK 5
149#define MIPS_CP0_TLB_WIRED 6 149#define MIPS_CP0_TLB_WIRED 6
150#define MIPS_CP0_HWRENA 7 150#define MIPS_CP0_HWRENA 7
151#define MIPS_CP0_BAD_VADDR 8 151#define MIPS_CP0_BAD_VADDR 8
152#define MIPS_CP0_COUNT 9 152#define MIPS_CP0_COUNT 9
153#define MIPS_CP0_TLB_HI 10 153#define MIPS_CP0_TLB_HI 10
154#define MIPS_CP0_COMPARE 11 154#define MIPS_CP0_COMPARE 11
155#define MIPS_CP0_STATUS 12 155#define MIPS_CP0_STATUS 12
156#define MIPS_CP0_CAUSE 13 156#define MIPS_CP0_CAUSE 13
157#define MIPS_CP0_EXC_PC 14 157#define MIPS_CP0_EXC_PC 14
158#define MIPS_CP0_PRID 15 158#define MIPS_CP0_PRID 15
159#define MIPS_CP0_CONFIG 16 159#define MIPS_CP0_CONFIG 16
160#define MIPS_CP0_LLADDR 17 160#define MIPS_CP0_LLADDR 17
161#define MIPS_CP0_WATCH_LO 18 161#define MIPS_CP0_WATCH_LO 18
162#define MIPS_CP0_WATCH_HI 19 162#define MIPS_CP0_WATCH_HI 19
163#define MIPS_CP0_TLB_XCONTEXT 20 163#define MIPS_CP0_TLB_XCONTEXT 20
164#define MIPS_CP0_ECC 26 164#define MIPS_CP0_ECC 26
165#define MIPS_CP0_CACHE_ERR 27 165#define MIPS_CP0_CACHE_ERR 27
166#define MIPS_CP0_TAG_LO 28 166#define MIPS_CP0_TAG_LO 28
167#define MIPS_CP0_TAG_HI 29 167#define MIPS_CP0_TAG_HI 29
168#define MIPS_CP0_ERROR_PC 30 168#define MIPS_CP0_ERROR_PC 30
169#define MIPS_CP0_DEBUG 23 169#define MIPS_CP0_DEBUG 23
170#define MIPS_CP0_DEPC 24 170#define MIPS_CP0_DEPC 24
171#define MIPS_CP0_PERFCNT 25 171#define MIPS_CP0_PERFCNT 25
172#define MIPS_CP0_ERRCTL 26 172#define MIPS_CP0_ERRCTL 26
173#define MIPS_CP0_DATA_LO 28 173#define MIPS_CP0_DATA_LO 28
174#define MIPS_CP0_DATA_HI 29 174#define MIPS_CP0_DATA_HI 29
175#define MIPS_CP0_DESAVE 31 175#define MIPS_CP0_DESAVE 31
176 176
177#define MIPS_CP0_CONFIG_SEL 0 177#define MIPS_CP0_CONFIG_SEL 0
178#define MIPS_CP0_CONFIG1_SEL 1 178#define MIPS_CP0_CONFIG1_SEL 1
179#define MIPS_CP0_CONFIG2_SEL 2 179#define MIPS_CP0_CONFIG2_SEL 2
180#define MIPS_CP0_CONFIG3_SEL 3 180#define MIPS_CP0_CONFIG3_SEL 3
181 181
182/* Config0 register bits */ 182/* Config0 register bits */
183#define CP0C0_M 31 183#define CP0C0_M 31
184#define CP0C0_K23 28 184#define CP0C0_K23 28
185#define CP0C0_KU 25 185#define CP0C0_KU 25
186#define CP0C0_MDU 20 186#define CP0C0_MDU 20
187#define CP0C0_MM 17 187#define CP0C0_MM 17
188#define CP0C0_BM 16 188#define CP0C0_BM 16
189#define CP0C0_BE 15 189#define CP0C0_BE 15
190#define CP0C0_AT 13 190#define CP0C0_AT 13
191#define CP0C0_AR 10 191#define CP0C0_AR 10
192#define CP0C0_MT 7 192#define CP0C0_MT 7
193#define CP0C0_VI 3 193#define CP0C0_VI 3
194#define CP0C0_K0 0 194#define CP0C0_K0 0
195 195
196/* Config1 register bits */ 196/* Config1 register bits */
197#define CP0C1_M 31 197#define CP0C1_M 31
198#define CP0C1_MMU 25 198#define CP0C1_MMU 25
199#define CP0C1_IS 22 199#define CP0C1_IS 22
200#define CP0C1_IL 19 200#define CP0C1_IL 19
201#define CP0C1_IA 16 201#define CP0C1_IA 16
202#define CP0C1_DS 13 202#define CP0C1_DS 13
203#define CP0C1_DL 10 203#define CP0C1_DL 10
204#define CP0C1_DA 7 204#define CP0C1_DA 7
205#define CP0C1_C2 6 205#define CP0C1_C2 6
206#define CP0C1_MD 5 206#define CP0C1_MD 5
207#define CP0C1_PC 4 207#define CP0C1_PC 4
208#define CP0C1_WR 3 208#define CP0C1_WR 3
209#define CP0C1_CA 2 209#define CP0C1_CA 2
210#define CP0C1_EP 1 210#define CP0C1_EP 1
211#define CP0C1_FP 0 211#define CP0C1_FP 0
212 212
213/* Config2 Register bits */ 213/* Config2 Register bits */
214#define CP0C2_M 31 214#define CP0C2_M 31
215#define CP0C2_TU 28 215#define CP0C2_TU 28
216#define CP0C2_TS 24 216#define CP0C2_TS 24
217#define CP0C2_TL 20 217#define CP0C2_TL 20
218#define CP0C2_TA 16 218#define CP0C2_TA 16
219#define CP0C2_SU 12 219#define CP0C2_SU 12
220#define CP0C2_SS 8 220#define CP0C2_SS 8
221#define CP0C2_SL 4 221#define CP0C2_SL 4
222#define CP0C2_SA 0 222#define CP0C2_SA 0
223 223
224/* Config3 Register bits */ 224/* Config3 Register bits */
225#define CP0C3_M 31 225#define CP0C3_M 31
226#define CP0C3_ISA_ON_EXC 16 226#define CP0C3_ISA_ON_EXC 16
227#define CP0C3_ULRI 13 227#define CP0C3_ULRI 13
228#define CP0C3_DSPP 10 228#define CP0C3_DSPP 10
229#define CP0C3_LPA 7 229#define CP0C3_LPA 7
230#define CP0C3_VEIC 6 230#define CP0C3_VEIC 6
231#define CP0C3_VInt 5 231#define CP0C3_VInt 5
232#define CP0C3_SP 4 232#define CP0C3_SP 4
233#define CP0C3_MT 2 233#define CP0C3_MT 2
234#define CP0C3_SM 1 234#define CP0C3_SM 1
235#define CP0C3_TL 0 235#define CP0C3_TL 0
236 236
237/* Have config1, Cacheable, noncoherent, write-back, write allocate*/ 237/* Have config1, Cacheable, noncoherent, write-back, write allocate*/
238#define MIPS_CONFIG0 \ 238#define MIPS_CONFIG0 \
239 ((1 << CP0C0_M) | (0x3 << CP0C0_K0)) 239 ((1 << CP0C0_M) | (0x3 << CP0C0_K0))
240 240
241/* Have config2, no coprocessor2 attached, no MDMX support attached, 241/* Have config2, no coprocessor2 attached, no MDMX support attached,
242 no performance counters, watch registers present, 242 no performance counters, watch registers present,
243 no code compression, EJTAG present, no FPU, no watch registers */ 243 no code compression, EJTAG present, no FPU, no watch registers */
244#define MIPS_CONFIG1 \ 244#define MIPS_CONFIG1 \
245((1 << CP0C1_M) | \ 245((1 << CP0C1_M) | \
246 (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) | \ 246 (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) | \
247 (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) | \ 247 (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) | \
248 (0 << CP0C1_FP)) 248 (0 << CP0C1_FP))
249 249
250/* Have config3, no tertiary/secondary caches implemented */ 250/* Have config3, no tertiary/secondary caches implemented */
251#define MIPS_CONFIG2 \ 251#define MIPS_CONFIG2 \
252((1 << CP0C2_M)) 252((1 << CP0C2_M))
253 253
254/* No config4, no DSP ASE, no large physaddr (PABITS), 254/* No config4, no DSP ASE, no large physaddr (PABITS),
255 no external interrupt controller, no vectored interrupts, 255 no external interrupt controller, no vectored interrupts,
256 no 1kb pages, no SmartMIPS ASE, no trace logic */ 256 no 1kb pages, no SmartMIPS ASE, no trace logic */
257#define MIPS_CONFIG3 \ 257#define MIPS_CONFIG3 \
258((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) | \ 258((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) | \
259 (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) | \ 259 (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) | \
260 (0 << CP0C3_SM) | (0 << CP0C3_TL)) 260 (0 << CP0C3_SM) | (0 << CP0C3_TL))
261 261
262/* MMU types, the first four entries have the same layout as the 262/* MMU types, the first four entries have the same layout as the
@@ -274,36 +274,36 @@ enum mips_mmu_types {
274/* 274/*
275 * Trap codes 275 * Trap codes
276 */ 276 */
277#define T_INT 0 /* Interrupt pending */ 277#define T_INT 0 /* Interrupt pending */
278#define T_TLB_MOD 1 /* TLB modified fault */ 278#define T_TLB_MOD 1 /* TLB modified fault */
279#define T_TLB_LD_MISS 2 /* TLB miss on load or ifetch */ 279#define T_TLB_LD_MISS 2 /* TLB miss on load or ifetch */
280#define T_TLB_ST_MISS 3 /* TLB miss on a store */ 280#define T_TLB_ST_MISS 3 /* TLB miss on a store */
281#define T_ADDR_ERR_LD 4 /* Address error on a load or ifetch */ 281#define T_ADDR_ERR_LD 4 /* Address error on a load or ifetch */
282#define T_ADDR_ERR_ST 5 /* Address error on a store */ 282#define T_ADDR_ERR_ST 5 /* Address error on a store */
283#define T_BUS_ERR_IFETCH 6 /* Bus error on an ifetch */ 283#define T_BUS_ERR_IFETCH 6 /* Bus error on an ifetch */
284#define T_BUS_ERR_LD_ST 7 /* Bus error on a load or store */ 284#define T_BUS_ERR_LD_ST 7 /* Bus error on a load or store */
285#define T_SYSCALL 8 /* System call */ 285#define T_SYSCALL 8 /* System call */
286#define T_BREAK 9 /* Breakpoint */ 286#define T_BREAK 9 /* Breakpoint */
287#define T_RES_INST 10 /* Reserved instruction exception */ 287#define T_RES_INST 10 /* Reserved instruction exception */
288#define T_COP_UNUSABLE 11 /* Coprocessor unusable */ 288#define T_COP_UNUSABLE 11 /* Coprocessor unusable */
289#define T_OVFLOW 12 /* Arithmetic overflow */ 289#define T_OVFLOW 12 /* Arithmetic overflow */
290 290
291/* 291/*
292 * Trap definitions added for r4000 port. 292 * Trap definitions added for r4000 port.
293 */ 293 */
294#define T_TRAP 13 /* Trap instruction */ 294#define T_TRAP 13 /* Trap instruction */
295#define T_VCEI 14 /* Virtual coherency exception */ 295#define T_VCEI 14 /* Virtual coherency exception */
296#define T_FPE 15 /* Floating point exception */ 296#define T_FPE 15 /* Floating point exception */
297#define T_WATCH 23 /* Watch address reference */ 297#define T_WATCH 23 /* Watch address reference */
298#define T_VCED 31 /* Virtual coherency data */ 298#define T_VCED 31 /* Virtual coherency data */
299 299
300/* Resume Flags */ 300/* Resume Flags */
301#define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */ 301#define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */
302#define RESUME_FLAG_HOST (1<<1) /* Resume host? */ 302#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
303 303
304#define RESUME_GUEST 0 304#define RESUME_GUEST 0
305#define RESUME_GUEST_DR RESUME_FLAG_DR 305#define RESUME_GUEST_DR RESUME_FLAG_DR
306#define RESUME_HOST RESUME_FLAG_HOST 306#define RESUME_HOST RESUME_FLAG_HOST
307 307
308enum emulation_result { 308enum emulation_result {
309 EMULATE_DONE, /* no further processing */ 309 EMULATE_DONE, /* no further processing */
@@ -313,24 +313,27 @@ enum emulation_result {
313 EMULATE_PRIV_FAIL, 313 EMULATE_PRIV_FAIL,
314}; 314};
315 315
316#define MIPS3_PG_G 0x00000001 /* Global; ignore ASID if in lo0 & lo1 */ 316#define MIPS3_PG_G 0x00000001 /* Global; ignore ASID if in lo0 & lo1 */
317#define MIPS3_PG_V 0x00000002 /* Valid */ 317#define MIPS3_PG_V 0x00000002 /* Valid */
318#define MIPS3_PG_NV 0x00000000 318#define MIPS3_PG_NV 0x00000000
319#define MIPS3_PG_D 0x00000004 /* Dirty */ 319#define MIPS3_PG_D 0x00000004 /* Dirty */
320 320
321#define mips3_paddr_to_tlbpfn(x) \ 321#define mips3_paddr_to_tlbpfn(x) \
322 (((unsigned long)(x) >> MIPS3_PG_SHIFT) & MIPS3_PG_FRAME) 322 (((unsigned long)(x) >> MIPS3_PG_SHIFT) & MIPS3_PG_FRAME)
323#define mips3_tlbpfn_to_paddr(x) \ 323#define mips3_tlbpfn_to_paddr(x) \
324 ((unsigned long)((x) & MIPS3_PG_FRAME) << MIPS3_PG_SHIFT) 324 ((unsigned long)((x) & MIPS3_PG_FRAME) << MIPS3_PG_SHIFT)
325 325
326#define MIPS3_PG_SHIFT 6 326#define MIPS3_PG_SHIFT 6
327#define MIPS3_PG_FRAME 0x3fffffc0 327#define MIPS3_PG_FRAME 0x3fffffc0
328 328
329#define VPN2_MASK 0xffffe000 329#define VPN2_MASK 0xffffe000
330#define TLB_IS_GLOBAL(x) (((x).tlb_lo0 & MIPS3_PG_G) && ((x).tlb_lo1 & MIPS3_PG_G)) 330#define TLB_IS_GLOBAL(x) (((x).tlb_lo0 & MIPS3_PG_G) && \
331#define TLB_VPN2(x) ((x).tlb_hi & VPN2_MASK) 331 ((x).tlb_lo1 & MIPS3_PG_G))
332#define TLB_ASID(x) ((x).tlb_hi & ASID_MASK) 332#define TLB_VPN2(x) ((x).tlb_hi & VPN2_MASK)
333#define TLB_IS_VALID(x, va) (((va) & (1 << PAGE_SHIFT)) ? ((x).tlb_lo1 & MIPS3_PG_V) : ((x).tlb_lo0 & MIPS3_PG_V)) 333#define TLB_ASID(x) ((x).tlb_hi & ASID_MASK)
334#define TLB_IS_VALID(x, va) (((va) & (1 << PAGE_SHIFT)) \
335 ? ((x).tlb_lo1 & MIPS3_PG_V) \
336 : ((x).tlb_lo0 & MIPS3_PG_V))
334 337
335struct kvm_mips_tlb { 338struct kvm_mips_tlb {
336 long tlb_mask; 339 long tlb_mask;
@@ -339,7 +342,7 @@ struct kvm_mips_tlb {
339 long tlb_lo1; 342 long tlb_lo1;
340}; 343};
341 344
342#define KVM_MIPS_GUEST_TLB_SIZE 64 345#define KVM_MIPS_GUEST_TLB_SIZE 64
343struct kvm_vcpu_arch { 346struct kvm_vcpu_arch {
344 void *host_ebase, *guest_ebase; 347 void *host_ebase, *guest_ebase;
345 unsigned long host_stack; 348 unsigned long host_stack;
@@ -400,65 +403,67 @@ struct kvm_vcpu_arch {
400}; 403};
401 404
402 405
403#define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0]) 406#define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0])
404#define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val) 407#define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val)
405#define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0]) 408#define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0])
406#define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0]) 409#define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0])
407#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0]) 410#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0])
408#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val)) 411#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val))
409#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2]) 412#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2])
410#define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0]) 413#define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0])
411#define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val)) 414#define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val))
412#define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0]) 415#define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0])
413#define kvm_write_c0_guest_wired(cop0, val) (cop0->reg[MIPS_CP0_TLB_WIRED][0] = (val)) 416#define kvm_write_c0_guest_wired(cop0, val) (cop0->reg[MIPS_CP0_TLB_WIRED][0] = (val))
414#define kvm_read_c0_guest_badvaddr(cop0) (cop0->reg[MIPS_CP0_BAD_VADDR][0]) 417#define kvm_read_c0_guest_hwrena(cop0) (cop0->reg[MIPS_CP0_HWRENA][0])
415#define kvm_write_c0_guest_badvaddr(cop0, val) (cop0->reg[MIPS_CP0_BAD_VADDR][0] = (val)) 418#define kvm_write_c0_guest_hwrena(cop0, val) (cop0->reg[MIPS_CP0_HWRENA][0] = (val))
416#define kvm_read_c0_guest_count(cop0) (cop0->reg[MIPS_CP0_COUNT][0]) 419#define kvm_read_c0_guest_badvaddr(cop0) (cop0->reg[MIPS_CP0_BAD_VADDR][0])
417#define kvm_write_c0_guest_count(cop0, val) (cop0->reg[MIPS_CP0_COUNT][0] = (val)) 420#define kvm_write_c0_guest_badvaddr(cop0, val) (cop0->reg[MIPS_CP0_BAD_VADDR][0] = (val))
418#define kvm_read_c0_guest_entryhi(cop0) (cop0->reg[MIPS_CP0_TLB_HI][0]) 421#define kvm_read_c0_guest_count(cop0) (cop0->reg[MIPS_CP0_COUNT][0])
419#define kvm_write_c0_guest_entryhi(cop0, val) (cop0->reg[MIPS_CP0_TLB_HI][0] = (val)) 422#define kvm_write_c0_guest_count(cop0, val) (cop0->reg[MIPS_CP0_COUNT][0] = (val))
420#define kvm_read_c0_guest_compare(cop0) (cop0->reg[MIPS_CP0_COMPARE][0]) 423#define kvm_read_c0_guest_entryhi(cop0) (cop0->reg[MIPS_CP0_TLB_HI][0])
421#define kvm_write_c0_guest_compare(cop0, val) (cop0->reg[MIPS_CP0_COMPARE][0] = (val)) 424#define kvm_write_c0_guest_entryhi(cop0, val) (cop0->reg[MIPS_CP0_TLB_HI][0] = (val))
422#define kvm_read_c0_guest_status(cop0) (cop0->reg[MIPS_CP0_STATUS][0]) 425#define kvm_read_c0_guest_compare(cop0) (cop0->reg[MIPS_CP0_COMPARE][0])
423#define kvm_write_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] = (val)) 426#define kvm_write_c0_guest_compare(cop0, val) (cop0->reg[MIPS_CP0_COMPARE][0] = (val))
424#define kvm_read_c0_guest_intctl(cop0) (cop0->reg[MIPS_CP0_STATUS][1]) 427#define kvm_read_c0_guest_status(cop0) (cop0->reg[MIPS_CP0_STATUS][0])
425#define kvm_write_c0_guest_intctl(cop0, val) (cop0->reg[MIPS_CP0_STATUS][1] = (val)) 428#define kvm_write_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] = (val))
426#define kvm_read_c0_guest_cause(cop0) (cop0->reg[MIPS_CP0_CAUSE][0]) 429#define kvm_read_c0_guest_intctl(cop0) (cop0->reg[MIPS_CP0_STATUS][1])
427#define kvm_write_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] = (val)) 430#define kvm_write_c0_guest_intctl(cop0, val) (cop0->reg[MIPS_CP0_STATUS][1] = (val))
428#define kvm_read_c0_guest_epc(cop0) (cop0->reg[MIPS_CP0_EXC_PC][0]) 431#define kvm_read_c0_guest_cause(cop0) (cop0->reg[MIPS_CP0_CAUSE][0])
429#define kvm_write_c0_guest_epc(cop0, val) (cop0->reg[MIPS_CP0_EXC_PC][0] = (val)) 432#define kvm_write_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] = (val))
430#define kvm_read_c0_guest_prid(cop0) (cop0->reg[MIPS_CP0_PRID][0]) 433#define kvm_read_c0_guest_epc(cop0) (cop0->reg[MIPS_CP0_EXC_PC][0])
431#define kvm_write_c0_guest_prid(cop0, val) (cop0->reg[MIPS_CP0_PRID][0] = (val)) 434#define kvm_write_c0_guest_epc(cop0, val) (cop0->reg[MIPS_CP0_EXC_PC][0] = (val))
432#define kvm_read_c0_guest_ebase(cop0) (cop0->reg[MIPS_CP0_PRID][1]) 435#define kvm_read_c0_guest_prid(cop0) (cop0->reg[MIPS_CP0_PRID][0])
433#define kvm_write_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] = (val)) 436#define kvm_write_c0_guest_prid(cop0, val) (cop0->reg[MIPS_CP0_PRID][0] = (val))
434#define kvm_read_c0_guest_config(cop0) (cop0->reg[MIPS_CP0_CONFIG][0]) 437#define kvm_read_c0_guest_ebase(cop0) (cop0->reg[MIPS_CP0_PRID][1])
435#define kvm_read_c0_guest_config1(cop0) (cop0->reg[MIPS_CP0_CONFIG][1]) 438#define kvm_write_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] = (val))
436#define kvm_read_c0_guest_config2(cop0) (cop0->reg[MIPS_CP0_CONFIG][2]) 439#define kvm_read_c0_guest_config(cop0) (cop0->reg[MIPS_CP0_CONFIG][0])
437#define kvm_read_c0_guest_config3(cop0) (cop0->reg[MIPS_CP0_CONFIG][3]) 440#define kvm_read_c0_guest_config1(cop0) (cop0->reg[MIPS_CP0_CONFIG][1])
438#define kvm_read_c0_guest_config7(cop0) (cop0->reg[MIPS_CP0_CONFIG][7]) 441#define kvm_read_c0_guest_config2(cop0) (cop0->reg[MIPS_CP0_CONFIG][2])
439#define kvm_write_c0_guest_config(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][0] = (val)) 442#define kvm_read_c0_guest_config3(cop0) (cop0->reg[MIPS_CP0_CONFIG][3])
440#define kvm_write_c0_guest_config1(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][1] = (val)) 443#define kvm_read_c0_guest_config7(cop0) (cop0->reg[MIPS_CP0_CONFIG][7])
441#define kvm_write_c0_guest_config2(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][2] = (val)) 444#define kvm_write_c0_guest_config(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][0] = (val))
442#define kvm_write_c0_guest_config3(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][3] = (val)) 445#define kvm_write_c0_guest_config1(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][1] = (val))
443#define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val)) 446#define kvm_write_c0_guest_config2(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][2] = (val))
444#define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0]) 447#define kvm_write_c0_guest_config3(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][3] = (val))
445#define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val)) 448#define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val))
446 449#define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0])
447#define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val)) 450#define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
448#define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val)) 451
449#define kvm_set_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] |= (val)) 452#define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val))
450#define kvm_clear_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val)) 453#define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val))
451#define kvm_change_c0_guest_cause(cop0, change, val) \ 454#define kvm_set_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] |= (val))
452{ \ 455#define kvm_clear_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] &= ~(val))
453 kvm_clear_c0_guest_cause(cop0, change); \ 456#define kvm_change_c0_guest_cause(cop0, change, val) \
454 kvm_set_c0_guest_cause(cop0, ((val) & (change))); \ 457{ \
458 kvm_clear_c0_guest_cause(cop0, change); \
459 kvm_set_c0_guest_cause(cop0, ((val) & (change))); \
455} 460}
456#define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val)) 461#define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val))
457#define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val)) 462#define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val))
458#define kvm_change_c0_guest_ebase(cop0, change, val) \ 463#define kvm_change_c0_guest_ebase(cop0, change, val) \
459{ \ 464{ \
460 kvm_clear_c0_guest_ebase(cop0, change); \ 465 kvm_clear_c0_guest_ebase(cop0, change); \
461 kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \ 466 kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \
462} 467}
463 468
464 469
diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/kvm_mips_emul.c
index 4b6274b47f33..e3fec99941a7 100644
--- a/arch/mips/kvm/kvm_mips_emul.c
+++ b/arch/mips/kvm/kvm_mips_emul.c
@@ -436,13 +436,6 @@ kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause,
436 sel = inst & 0x7; 436 sel = inst & 0x7;
437 co_bit = (inst >> 25) & 1; 437 co_bit = (inst >> 25) & 1;
438 438
439 /* Verify that the register is valid */
440 if (rd > MIPS_CP0_DESAVE) {
441 printk("Invalid rd: %d\n", rd);
442 er = EMULATE_FAIL;
443 goto done;
444 }
445
446 if (co_bit) { 439 if (co_bit) {
447 op = (inst) & 0xff; 440 op = (inst) & 0xff;
448 441
@@ -1542,8 +1535,15 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc,
1542 } 1535 }
1543 1536
1544 if ((inst & OPCODE) == SPEC3 && (inst & FUNC) == RDHWR) { 1537 if ((inst & OPCODE) == SPEC3 && (inst & FUNC) == RDHWR) {
1538 int usermode = !KVM_GUEST_KERNEL_MODE(vcpu);
1545 int rd = (inst & RD) >> 11; 1539 int rd = (inst & RD) >> 11;
1546 int rt = (inst & RT) >> 16; 1540 int rt = (inst & RT) >> 16;
1541 /* If usermode, check RDHWR rd is allowed by guest HWREna */
1542 if (usermode && !(kvm_read_c0_guest_hwrena(cop0) & BIT(rd))) {
1543 kvm_debug("RDHWR %#x disallowed by HWREna @ %p\n",
1544 rd, opc);
1545 goto emulate_ri;
1546 }
1547 switch (rd) { 1547 switch (rd) {
1548 case 0: /* CPU number */ 1548 case 0: /* CPU number */
1549 arch->gprs[rt] = 0; 1549 arch->gprs[rt] = 0;
@@ -1567,31 +1567,27 @@ kvm_mips_handle_ri(unsigned long cause, uint32_t *opc,
1567 } 1567 }
1568 break; 1568 break;
1569 case 29: 1569 case 29:
1570#if 1
1571 arch->gprs[rt] = kvm_read_c0_guest_userlocal(cop0); 1570 arch->gprs[rt] = kvm_read_c0_guest_userlocal(cop0);
1572#else
1573 /* UserLocal not implemented */
1574 er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
1575#endif
1576 break; 1571 break;
1577 1572
1578 default: 1573 default:
1579 printk("RDHWR not supported\n"); 1574 kvm_debug("RDHWR %#x not supported @ %p\n", rd, opc);
1580 er = EMULATE_FAIL; 1575 goto emulate_ri;
1581 break;
1582 } 1576 }
1583 } else { 1577 } else {
1584 printk("Emulate RI not supported @ %p: %#x\n", opc, inst); 1578 kvm_debug("Emulate RI not supported @ %p: %#x\n", opc, inst);
1585 er = EMULATE_FAIL; 1579 goto emulate_ri;
1586 } 1580 }
1587 1581
1582 return EMULATE_DONE;
1583
1584emulate_ri:
1588 /* 1585 /*
1589 * Rollback PC only if emulation was unsuccessful 1586 * Rollback PC (if in branch delay slot then the PC already points to
1587 * branch target), and pass the RI exception to the guest OS.
1590 */ 1588 */
1591 if (er == EMULATE_FAIL) { 1589 vcpu->arch.pc = curr_pc;
1592 vcpu->arch.pc = curr_pc; 1590 return kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
1593 }
1594 return er;
1595} 1591}
1596 1592
1597enum emulation_result 1593enum emulation_result
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 83851aabfdc8..bb1e38a23ac7 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -304,6 +304,11 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
304 return vcpu->arch.fault_dar; 304 return vcpu->arch.fault_dar;
305} 305}
306 306
307static inline bool is_kvmppc_resume_guest(int r)
308{
309 return (r == RESUME_GUEST || r == RESUME_GUEST_NV);
310}
311
307/* Magic register values loaded into r3 and r4 before the 'sc' assembly 312/* Magic register values loaded into r3 and r4 before the 'sc' assembly
308 * instruction for the OSI hypercalls */ 313 * instruction for the OSI hypercalls */
309#define OSI_SC_MAGIC_R3 0x113724FA 314#define OSI_SC_MAGIC_R3 0x113724FA
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index bf0fa8b0a883..51388befeddb 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -289,6 +289,18 @@ static inline void note_hpte_modification(struct kvm *kvm,
289 if (atomic_read(&kvm->arch.hpte_mod_interest)) 289 if (atomic_read(&kvm->arch.hpte_mod_interest))
290 rev->guest_rpte |= HPTE_GR_MODIFIED; 290 rev->guest_rpte |= HPTE_GR_MODIFIED;
291} 291}
292
293/*
294 * Like kvm_memslots(), but for use in real mode when we can't do
295 * any RCU stuff (since the secondary threads are offline from the
296 * kernel's point of view), and we can't print anything.
297 * Thus we use rcu_dereference_raw() rather than rcu_dereference_check().
298 */
299static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
300{
301 return rcu_dereference_raw_notrace(kvm->memslots);
302}
303
292#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 304#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
293 305
294#endif /* __ASM_KVM_BOOK3S_64_H__ */ 306#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index f3a91dc02c98..821725c1bf46 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -94,7 +94,7 @@ struct kvmppc_host_state {
94 unsigned long xics_phys; 94 unsigned long xics_phys;
95 u32 saved_xirr; 95 u32 saved_xirr;
96 u64 dabr; 96 u64 dabr;
97 u64 host_mmcr[3]; 97 u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
98 u32 host_pmc[8]; 98 u32 host_pmc[8];
99 u64 host_purr; 99 u64 host_purr;
100 u64 host_spurr; 100 u64 host_spurr;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index fcd53f0d34ba..4096f16502a9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -129,6 +129,8 @@ extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
129 struct kvm_create_spapr_tce *args); 129 struct kvm_create_spapr_tce *args);
130extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, 130extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
131 unsigned long ioba, unsigned long tce); 131 unsigned long ioba, unsigned long tce);
132extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
133 unsigned long ioba);
132extern struct kvm_rma_info *kvm_alloc_rma(void); 134extern struct kvm_rma_info *kvm_alloc_rma(void);
133extern void kvm_release_rma(struct kvm_rma_info *ri); 135extern void kvm_release_rma(struct kvm_rma_info *ri);
134extern struct page *kvm_alloc_hpt(unsigned long nr_pages); 136extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1a36b8ede417..0dcc48af25a3 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -213,6 +213,7 @@
213#define SPRN_ACOP 0x1F /* Available Coprocessor Register */ 213#define SPRN_ACOP 0x1F /* Available Coprocessor Register */
214#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ 214#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */
215#define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ 215#define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */
216#define TEXASR_FS __MASK(63-36) /* Transaction Failure Summary */
216#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ 217#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
217#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ 218#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
218#define SPRN_CTRLF 0x088 219#define SPRN_CTRLF 0x088
diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h
index 0c9f8b74dd97..c22d704b6d41 100644
--- a/arch/powerpc/include/asm/tm.h
+++ b/arch/powerpc/include/asm/tm.h
@@ -7,6 +7,8 @@
7 7
8#include <uapi/asm/tm.h> 8#include <uapi/asm/tm.h>
9 9
10#ifndef __ASSEMBLY__
11
10#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 12#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
11extern void do_load_up_transact_fpu(struct thread_struct *thread); 13extern void do_load_up_transact_fpu(struct thread_struct *thread);
12extern void do_load_up_transact_altivec(struct thread_struct *thread); 14extern void do_load_up_transact_altivec(struct thread_struct *thread);
@@ -21,3 +23,5 @@ extern void tm_recheckpoint(struct thread_struct *thread,
21extern void tm_abort(uint8_t cause); 23extern void tm_abort(uint8_t cause);
22extern void tm_save_sprs(struct thread_struct *thread); 24extern void tm_save_sprs(struct thread_struct *thread);
23extern void tm_restore_sprs(struct thread_struct *thread); 25extern void tm_restore_sprs(struct thread_struct *thread);
26
27#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 303ece75b8e4..fb25ebc0af0c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -262,7 +262,14 @@ int kvmppc_mmu_hv_init(void)
262 262
263static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) 263static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
264{ 264{
265 kvmppc_set_msr(vcpu, vcpu->arch.intr_msr); 265 unsigned long msr = vcpu->arch.intr_msr;
266
267 /* If transactional, change to suspend mode on IRQ delivery */
268 if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
269 msr |= MSR_TS_S;
270 else
271 msr |= vcpu->arch.shregs.msr & MSR_TS_MASK;
272 kvmppc_set_msr(vcpu, msr);
266} 273}
267 274
268/* 275/*
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 2c25f5412bdb..89e96b3e0039 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -75,3 +75,31 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
75 return H_TOO_HARD; 75 return H_TOO_HARD;
76} 76}
77EXPORT_SYMBOL_GPL(kvmppc_h_put_tce); 77EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
78
79long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
80 unsigned long ioba)
81{
82 struct kvm *kvm = vcpu->kvm;
83 struct kvmppc_spapr_tce_table *stt;
84
85 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
86 if (stt->liobn == liobn) {
87 unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
88 struct page *page;
89 u64 *tbl;
90
91 if (ioba >= stt->window_size)
92 return H_PARAMETER;
93
94 page = stt->pages[idx / TCES_PER_PAGE];
95 tbl = (u64 *)page_address(page);
96
97 vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
98 return H_SUCCESS;
99 }
100 }
101
102 /* Didn't find the liobn, punt it to userspace */
103 return H_TOO_HARD;
104}
105EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 17fc9496b6ac..8227dba5af0f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -86,7 +86,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
86 86
87 /* CPU points to the first thread of the core */ 87 /* CPU points to the first thread of the core */
88 if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) { 88 if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
89#ifdef CONFIG_KVM_XICS 89#ifdef CONFIG_PPC_ICP_NATIVE
90 int real_cpu = cpu + vcpu->arch.ptid; 90 int real_cpu = cpu + vcpu->arch.ptid;
91 if (paca[real_cpu].kvm_hstate.xics_phys) 91 if (paca[real_cpu].kvm_hstate.xics_phys)
92 xics_wake_cpu(real_cpu); 92 xics_wake_cpu(real_cpu);
@@ -879,17 +879,6 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
879 case KVM_REG_PPC_IAMR: 879 case KVM_REG_PPC_IAMR:
880 *val = get_reg_val(id, vcpu->arch.iamr); 880 *val = get_reg_val(id, vcpu->arch.iamr);
881 break; 881 break;
882#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
883 case KVM_REG_PPC_TFHAR:
884 *val = get_reg_val(id, vcpu->arch.tfhar);
885 break;
886 case KVM_REG_PPC_TFIAR:
887 *val = get_reg_val(id, vcpu->arch.tfiar);
888 break;
889 case KVM_REG_PPC_TEXASR:
890 *val = get_reg_val(id, vcpu->arch.texasr);
891 break;
892#endif
893 case KVM_REG_PPC_FSCR: 882 case KVM_REG_PPC_FSCR:
894 *val = get_reg_val(id, vcpu->arch.fscr); 883 *val = get_reg_val(id, vcpu->arch.fscr);
895 break; 884 break;
@@ -970,6 +959,69 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
970 case KVM_REG_PPC_PPR: 959 case KVM_REG_PPC_PPR:
971 *val = get_reg_val(id, vcpu->arch.ppr); 960 *val = get_reg_val(id, vcpu->arch.ppr);
972 break; 961 break;
962#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
963 case KVM_REG_PPC_TFHAR:
964 *val = get_reg_val(id, vcpu->arch.tfhar);
965 break;
966 case KVM_REG_PPC_TFIAR:
967 *val = get_reg_val(id, vcpu->arch.tfiar);
968 break;
969 case KVM_REG_PPC_TEXASR:
970 *val = get_reg_val(id, vcpu->arch.texasr);
971 break;
972 case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
973 i = id - KVM_REG_PPC_TM_GPR0;
974 *val = get_reg_val(id, vcpu->arch.gpr_tm[i]);
975 break;
976 case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
977 {
978 int j;
979 i = id - KVM_REG_PPC_TM_VSR0;
980 if (i < 32)
981 for (j = 0; j < TS_FPRWIDTH; j++)
982 val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j];
983 else {
984 if (cpu_has_feature(CPU_FTR_ALTIVEC))
985 val->vval = vcpu->arch.vr_tm.vr[i-32];
986 else
987 r = -ENXIO;
988 }
989 break;
990 }
991 case KVM_REG_PPC_TM_CR:
992 *val = get_reg_val(id, vcpu->arch.cr_tm);
993 break;
994 case KVM_REG_PPC_TM_LR:
995 *val = get_reg_val(id, vcpu->arch.lr_tm);
996 break;
997 case KVM_REG_PPC_TM_CTR:
998 *val = get_reg_val(id, vcpu->arch.ctr_tm);
999 break;
1000 case KVM_REG_PPC_TM_FPSCR:
1001 *val = get_reg_val(id, vcpu->arch.fp_tm.fpscr);
1002 break;
1003 case KVM_REG_PPC_TM_AMR:
1004 *val = get_reg_val(id, vcpu->arch.amr_tm);
1005 break;
1006 case KVM_REG_PPC_TM_PPR:
1007 *val = get_reg_val(id, vcpu->arch.ppr_tm);
1008 break;
1009 case KVM_REG_PPC_TM_VRSAVE:
1010 *val = get_reg_val(id, vcpu->arch.vrsave_tm);
1011 break;
1012 case KVM_REG_PPC_TM_VSCR:
1013 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1014 *val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]);
1015 else
1016 r = -ENXIO;
1017 break;
1018 case KVM_REG_PPC_TM_DSCR:
1019 *val = get_reg_val(id, vcpu->arch.dscr_tm);
1020 break;
1021 case KVM_REG_PPC_TM_TAR:
1022 *val = get_reg_val(id, vcpu->arch.tar_tm);
1023 break;
1024#endif
973 case KVM_REG_PPC_ARCH_COMPAT: 1025 case KVM_REG_PPC_ARCH_COMPAT:
974 *val = get_reg_val(id, vcpu->arch.vcore->arch_compat); 1026 *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
975 break; 1027 break;
@@ -1039,17 +1091,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1039 case KVM_REG_PPC_IAMR: 1091 case KVM_REG_PPC_IAMR:
1040 vcpu->arch.iamr = set_reg_val(id, *val); 1092 vcpu->arch.iamr = set_reg_val(id, *val);
1041 break; 1093 break;
1042#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1043 case KVM_REG_PPC_TFHAR:
1044 vcpu->arch.tfhar = set_reg_val(id, *val);
1045 break;
1046 case KVM_REG_PPC_TFIAR:
1047 vcpu->arch.tfiar = set_reg_val(id, *val);
1048 break;
1049 case KVM_REG_PPC_TEXASR:
1050 vcpu->arch.texasr = set_reg_val(id, *val);
1051 break;
1052#endif
1053 case KVM_REG_PPC_FSCR: 1094 case KVM_REG_PPC_FSCR:
1054 vcpu->arch.fscr = set_reg_val(id, *val); 1095 vcpu->arch.fscr = set_reg_val(id, *val);
1055 break; 1096 break;
@@ -1144,6 +1185,68 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1144 case KVM_REG_PPC_PPR: 1185 case KVM_REG_PPC_PPR:
1145 vcpu->arch.ppr = set_reg_val(id, *val); 1186 vcpu->arch.ppr = set_reg_val(id, *val);
1146 break; 1187 break;
1188#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1189 case KVM_REG_PPC_TFHAR:
1190 vcpu->arch.tfhar = set_reg_val(id, *val);
1191 break;
1192 case KVM_REG_PPC_TFIAR:
1193 vcpu->arch.tfiar = set_reg_val(id, *val);
1194 break;
1195 case KVM_REG_PPC_TEXASR:
1196 vcpu->arch.texasr = set_reg_val(id, *val);
1197 break;
1198 case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
1199 i = id - KVM_REG_PPC_TM_GPR0;
1200 vcpu->arch.gpr_tm[i] = set_reg_val(id, *val);
1201 break;
1202 case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
1203 {
1204 int j;
1205 i = id - KVM_REG_PPC_TM_VSR0;
1206 if (i < 32)
1207 for (j = 0; j < TS_FPRWIDTH; j++)
1208 vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j];
1209 else
1210 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1211 vcpu->arch.vr_tm.vr[i-32] = val->vval;
1212 else
1213 r = -ENXIO;
1214 break;
1215 }
1216 case KVM_REG_PPC_TM_CR:
1217 vcpu->arch.cr_tm = set_reg_val(id, *val);
1218 break;
1219 case KVM_REG_PPC_TM_LR:
1220 vcpu->arch.lr_tm = set_reg_val(id, *val);
1221 break;
1222 case KVM_REG_PPC_TM_CTR:
1223 vcpu->arch.ctr_tm = set_reg_val(id, *val);
1224 break;
1225 case KVM_REG_PPC_TM_FPSCR:
1226 vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val);
1227 break;
1228 case KVM_REG_PPC_TM_AMR:
1229 vcpu->arch.amr_tm = set_reg_val(id, *val);
1230 break;
1231 case KVM_REG_PPC_TM_PPR:
1232 vcpu->arch.ppr_tm = set_reg_val(id, *val);
1233 break;
1234 case KVM_REG_PPC_TM_VRSAVE:
1235 vcpu->arch.vrsave_tm = set_reg_val(id, *val);
1236 break;
1237 case KVM_REG_PPC_TM_VSCR:
1238 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1239 vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
1240 else
1241 r = - ENXIO;
1242 break;
1243 case KVM_REG_PPC_TM_DSCR:
1244 vcpu->arch.dscr_tm = set_reg_val(id, *val);
1245 break;
1246 case KVM_REG_PPC_TM_TAR:
1247 vcpu->arch.tar_tm = set_reg_val(id, *val);
1248 break;
1249#endif
1147 case KVM_REG_PPC_ARCH_COMPAT: 1250 case KVM_REG_PPC_ARCH_COMPAT:
1148 r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val)); 1251 r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
1149 break; 1252 break;
@@ -1360,9 +1463,7 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
1360 smp_wmb(); 1463 smp_wmb();
1361#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) 1464#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
1362 if (cpu != smp_processor_id()) { 1465 if (cpu != smp_processor_id()) {
1363#ifdef CONFIG_KVM_XICS
1364 xics_wake_cpu(cpu); 1466 xics_wake_cpu(cpu);
1365#endif
1366 if (vcpu->arch.ptid) 1467 if (vcpu->arch.ptid)
1367 ++vc->n_woken; 1468 ++vc->n_woken;
1368 } 1469 }
@@ -1530,7 +1631,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
1530 vcpu->arch.trap = 0; 1631 vcpu->arch.trap = 0;
1531 1632
1532 if (vcpu->arch.ceded) { 1633 if (vcpu->arch.ceded) {
1533 if (ret != RESUME_GUEST) 1634 if (!is_kvmppc_resume_guest(ret))
1534 kvmppc_end_cede(vcpu); 1635 kvmppc_end_cede(vcpu);
1535 else 1636 else
1536 kvmppc_set_timer(vcpu); 1637 kvmppc_set_timer(vcpu);
@@ -1541,7 +1642,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
1541 vc->vcore_state = VCORE_INACTIVE; 1642 vc->vcore_state = VCORE_INACTIVE;
1542 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, 1643 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
1543 arch.run_list) { 1644 arch.run_list) {
1544 if (vcpu->arch.ret != RESUME_GUEST) { 1645 if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {
1545 kvmppc_remove_runnable(vc, vcpu); 1646 kvmppc_remove_runnable(vc, vcpu);
1546 wake_up(&vcpu->arch.cpu_run); 1647 wake_up(&vcpu->arch.cpu_run);
1547 } 1648 }
@@ -1731,7 +1832,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
1731 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); 1832 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
1732 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); 1833 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
1733 } 1834 }
1734 } while (r == RESUME_GUEST); 1835 } while (is_kvmppc_resume_guest(r));
1735 1836
1736 out: 1837 out:
1737 vcpu->arch.state = KVMPPC_VCPU_NOTREADY; 1838 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@ -2366,7 +2467,7 @@ static int kvmppc_book3s_init_hv(void)
2366 */ 2467 */
2367 r = kvmppc_core_check_processor_compat_hv(); 2468 r = kvmppc_core_check_processor_compat_hv();
2368 if (r < 0) 2469 if (r < 0)
2369 return r; 2470 return -ENODEV;
2370 2471
2371 kvm_ops_hv.owner = THIS_MODULE; 2472 kvm_ops_hv.owner = THIS_MODULE;
2372 kvmppc_hv_ops = &kvm_ops_hv; 2473 kvmppc_hv_ops = &kvm_ops_hv;
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index e873796b1a29..e18e3cfc32de 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -71,6 +71,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
71 mtmsrd r10,1 71 mtmsrd r10,1
72 72
73 /* Save host PMU registers */ 73 /* Save host PMU registers */
74BEGIN_FTR_SECTION
75 /* Work around P8 PMAE bug */
76 li r3, -1
77 clrrdi r3, r3, 10
78 mfspr r8, SPRN_MMCR2
79 mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
80 isync
81END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
74 li r3, 1 82 li r3, 1
75 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */ 83 sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
76 mfspr r7, SPRN_MMCR0 /* save MMCR0 */ 84 mfspr r7, SPRN_MMCR0 /* save MMCR0 */
@@ -87,9 +95,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
87 cmpwi r5, 0 95 cmpwi r5, 0
88 beq 31f /* skip if not */ 96 beq 31f /* skip if not */
89 mfspr r5, SPRN_MMCR1 97 mfspr r5, SPRN_MMCR1
98 mfspr r9, SPRN_SIAR
99 mfspr r10, SPRN_SDAR
90 std r7, HSTATE_MMCR(r13) 100 std r7, HSTATE_MMCR(r13)
91 std r5, HSTATE_MMCR + 8(r13) 101 std r5, HSTATE_MMCR + 8(r13)
92 std r6, HSTATE_MMCR + 16(r13) 102 std r6, HSTATE_MMCR + 16(r13)
103 std r9, HSTATE_MMCR + 24(r13)
104 std r10, HSTATE_MMCR + 32(r13)
105BEGIN_FTR_SECTION
106 mfspr r9, SPRN_SIER
107 std r8, HSTATE_MMCR + 40(r13)
108 std r9, HSTATE_MMCR + 48(r13)
109END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
93 mfspr r3, SPRN_PMC1 110 mfspr r3, SPRN_PMC1
94 mfspr r5, SPRN_PMC2 111 mfspr r5, SPRN_PMC2
95 mfspr r6, SPRN_PMC3 112 mfspr r6, SPRN_PMC3
@@ -110,6 +127,11 @@ BEGIN_FTR_SECTION
110 stw r10, HSTATE_PMC + 24(r13) 127 stw r10, HSTATE_PMC + 24(r13)
111 stw r11, HSTATE_PMC + 28(r13) 128 stw r11, HSTATE_PMC + 28(r13)
112END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) 129END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
130BEGIN_FTR_SECTION
131 mfspr r9, SPRN_SIER
132 std r8, HSTATE_MMCR + 40(r13)
133 std r9, HSTATE_MMCR + 48(r13)
134END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
11331: 13531:
114 136
115 /* 137 /*
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 37fb3caa4c80..1d6c56ad5b60 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -111,7 +111,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
111 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); 111 rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
112 ptel = rev->guest_rpte |= rcbits; 112 ptel = rev->guest_rpte |= rcbits;
113 gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); 113 gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
114 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); 114 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
115 if (!memslot) 115 if (!memslot)
116 return; 116 return;
117 117
@@ -192,7 +192,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
192 /* Find the memslot (if any) for this address */ 192 /* Find the memslot (if any) for this address */
193 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); 193 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
194 gfn = gpa >> PAGE_SHIFT; 194 gfn = gpa >> PAGE_SHIFT;
195 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); 195 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
196 pa = 0; 196 pa = 0;
197 is_io = ~0ul; 197 is_io = ~0ul;
198 rmap = NULL; 198 rmap = NULL;
@@ -670,7 +670,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
670 670
671 psize = hpte_page_size(v, r); 671 psize = hpte_page_size(v, r);
672 gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; 672 gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
673 memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); 673 memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
674 if (memslot) { 674 if (memslot) {
675 hva = __gfn_to_hva_memslot(memslot, gfn); 675 hva = __gfn_to_hva_memslot(memslot, gfn);
676 pte = lookup_linux_pte_and_update(pgdir, hva, 676 pte = lookup_linux_pte_and_update(pgdir, hva,
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 53d647f8e741..ffbb871c2bd8 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -28,6 +28,9 @@
28#include <asm/exception-64s.h> 28#include <asm/exception-64s.h>
29#include <asm/kvm_book3s_asm.h> 29#include <asm/kvm_book3s_asm.h>
30#include <asm/mmu-hash64.h> 30#include <asm/mmu-hash64.h>
31#include <asm/tm.h>
32
33#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
31 34
32#ifdef __LITTLE_ENDIAN__ 35#ifdef __LITTLE_ENDIAN__
33#error Need to fix lppaca and SLB shadow accesses in little endian mode 36#error Need to fix lppaca and SLB shadow accesses in little endian mode
@@ -106,8 +109,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
106 ld r3, HSTATE_MMCR(r13) 109 ld r3, HSTATE_MMCR(r13)
107 ld r4, HSTATE_MMCR + 8(r13) 110 ld r4, HSTATE_MMCR + 8(r13)
108 ld r5, HSTATE_MMCR + 16(r13) 111 ld r5, HSTATE_MMCR + 16(r13)
112 ld r6, HSTATE_MMCR + 24(r13)
113 ld r7, HSTATE_MMCR + 32(r13)
109 mtspr SPRN_MMCR1, r4 114 mtspr SPRN_MMCR1, r4
110 mtspr SPRN_MMCRA, r5 115 mtspr SPRN_MMCRA, r5
116 mtspr SPRN_SIAR, r6
117 mtspr SPRN_SDAR, r7
118BEGIN_FTR_SECTION
119 ld r8, HSTATE_MMCR + 40(r13)
120 ld r9, HSTATE_MMCR + 48(r13)
121 mtspr SPRN_MMCR2, r8
122 mtspr SPRN_SIER, r9
123END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
111 mtspr SPRN_MMCR0, r3 124 mtspr SPRN_MMCR0, r3
112 isync 125 isync
11323: 12623:
@@ -597,6 +610,116 @@ BEGIN_FTR_SECTION
597 END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89) 610 END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
598END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 611END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
599 612
613#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
614BEGIN_FTR_SECTION
615 b skip_tm
616END_FTR_SECTION_IFCLR(CPU_FTR_TM)
617
618 /* Turn on TM/FP/VSX/VMX so we can restore them. */
619 mfmsr r5
620 li r6, MSR_TM >> 32
621 sldi r6, r6, 32
622 or r5, r5, r6
623 ori r5, r5, MSR_FP
624 oris r5, r5, (MSR_VEC | MSR_VSX)@h
625 mtmsrd r5
626
627 /*
628 * The user may change these outside of a transaction, so they must
629 * always be context switched.
630 */
631 ld r5, VCPU_TFHAR(r4)
632 ld r6, VCPU_TFIAR(r4)
633 ld r7, VCPU_TEXASR(r4)
634 mtspr SPRN_TFHAR, r5
635 mtspr SPRN_TFIAR, r6
636 mtspr SPRN_TEXASR, r7
637
638 ld r5, VCPU_MSR(r4)
639 rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
640 beq skip_tm /* TM not active in guest */
641
642 /* Make sure the failure summary is set, otherwise we'll program check
643 * when we trechkpt. It's possible that this might have been not set
644 * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
645 * host.
646 */
647 oris r7, r7, (TEXASR_FS)@h
648 mtspr SPRN_TEXASR, r7
649
650 /*
651 * We need to load up the checkpointed state for the guest.
652 * We need to do this early as it will blow away any GPRs, VSRs and
653 * some SPRs.
654 */
655
656 mr r31, r4
657 addi r3, r31, VCPU_FPRS_TM
658 bl .load_fp_state
659 addi r3, r31, VCPU_VRS_TM
660 bl .load_vr_state
661 mr r4, r31
662 lwz r7, VCPU_VRSAVE_TM(r4)
663 mtspr SPRN_VRSAVE, r7
664
665 ld r5, VCPU_LR_TM(r4)
666 lwz r6, VCPU_CR_TM(r4)
667 ld r7, VCPU_CTR_TM(r4)
668 ld r8, VCPU_AMR_TM(r4)
669 ld r9, VCPU_TAR_TM(r4)
670 mtlr r5
671 mtcr r6
672 mtctr r7
673 mtspr SPRN_AMR, r8
674 mtspr SPRN_TAR, r9
675
676 /*
677 * Load up PPR and DSCR values but don't put them in the actual SPRs
678 * till the last moment to avoid running with userspace PPR and DSCR for
679 * too long.
680 */
681 ld r29, VCPU_DSCR_TM(r4)
682 ld r30, VCPU_PPR_TM(r4)
683
684 std r2, PACATMSCRATCH(r13) /* Save TOC */
685
686 /* Clear the MSR RI since r1, r13 are all going to be foobar. */
687 li r5, 0
688 mtmsrd r5, 1
689
690 /* Load GPRs r0-r28 */
691 reg = 0
692 .rept 29
693 ld reg, VCPU_GPRS_TM(reg)(r31)
694 reg = reg + 1
695 .endr
696
697 mtspr SPRN_DSCR, r29
698 mtspr SPRN_PPR, r30
699
700 /* Load final GPRs */
701 ld 29, VCPU_GPRS_TM(29)(r31)
702 ld 30, VCPU_GPRS_TM(30)(r31)
703 ld 31, VCPU_GPRS_TM(31)(r31)
704
705 /* TM checkpointed state is now setup. All GPRs are now volatile. */
706 TRECHKPT
707
708 /* Now let's get back the state we need. */
709 HMT_MEDIUM
710 GET_PACA(r13)
711 ld r29, HSTATE_DSCR(r13)
712 mtspr SPRN_DSCR, r29
713 ld r4, HSTATE_KVM_VCPU(r13)
714 ld r1, HSTATE_HOST_R1(r13)
715 ld r2, PACATMSCRATCH(r13)
716
717 /* Set the MSR RI since we have our registers back. */
718 li r5, MSR_RI
719 mtmsrd r5, 1
720skip_tm:
721#endif
722
600 /* Load guest PMU registers */ 723 /* Load guest PMU registers */
601 /* R4 is live here (vcpu pointer) */ 724 /* R4 is live here (vcpu pointer) */
602 li r3, 1 725 li r3, 1
@@ -704,14 +827,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
704 ld r6, VCPU_VTB(r4) 827 ld r6, VCPU_VTB(r4)
705 mtspr SPRN_IC, r5 828 mtspr SPRN_IC, r5
706 mtspr SPRN_VTB, r6 829 mtspr SPRN_VTB, r6
707#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
708 ld r5, VCPU_TFHAR(r4)
709 ld r6, VCPU_TFIAR(r4)
710 ld r7, VCPU_TEXASR(r4)
711 mtspr SPRN_TFHAR, r5
712 mtspr SPRN_TFIAR, r6
713 mtspr SPRN_TEXASR, r7
714#endif
715 ld r8, VCPU_EBBHR(r4) 830 ld r8, VCPU_EBBHR(r4)
716 mtspr SPRN_EBBHR, r8 831 mtspr SPRN_EBBHR, r8
717 ld r5, VCPU_EBBRR(r4) 832 ld r5, VCPU_EBBRR(r4)
@@ -736,6 +851,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
736 * Set the decrementer to the guest decrementer. 851 * Set the decrementer to the guest decrementer.
737 */ 852 */
738 ld r8,VCPU_DEC_EXPIRES(r4) 853 ld r8,VCPU_DEC_EXPIRES(r4)
854 /* r8 is a host timebase value here, convert to guest TB */
855 ld r5,HSTATE_KVM_VCORE(r13)
856 ld r6,VCORE_TB_OFFSET(r5)
857 add r8,r8,r6
739 mftb r7 858 mftb r7
740 subf r3,r7,r8 859 subf r3,r7,r8
741 mtspr SPRN_DEC,r3 860 mtspr SPRN_DEC,r3
@@ -817,7 +936,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
81712: mtspr SPRN_SRR0, r10 93612: mtspr SPRN_SRR0, r10
818 mr r10,r0 937 mr r10,r0
819 mtspr SPRN_SRR1, r11 938 mtspr SPRN_SRR1, r11
820 ld r11, VCPU_INTR_MSR(r4) 939 mr r9, r4
940 bl kvmppc_msr_interrupt
8215: 9415:
822 942
823/* 943/*
@@ -1098,17 +1218,15 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
1098 mftb r6 1218 mftb r6
1099 extsw r5,r5 1219 extsw r5,r5
1100 add r5,r5,r6 1220 add r5,r5,r6
1221 /* r5 is a guest timebase value here, convert to host TB */
1222 ld r3,HSTATE_KVM_VCORE(r13)
1223 ld r4,VCORE_TB_OFFSET(r3)
1224 subf r5,r4,r5
1101 std r5,VCPU_DEC_EXPIRES(r9) 1225 std r5,VCPU_DEC_EXPIRES(r9)
1102 1226
1103BEGIN_FTR_SECTION 1227BEGIN_FTR_SECTION
1104 b 8f 1228 b 8f
1105END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) 1229END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
1106 /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
1107 mfmsr r8
1108 li r0, 1
1109 rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
1110 mtmsrd r8
1111
1112 /* Save POWER8-specific registers */ 1230 /* Save POWER8-specific registers */
1113 mfspr r5, SPRN_IAMR 1231 mfspr r5, SPRN_IAMR
1114 mfspr r6, SPRN_PSPB 1232 mfspr r6, SPRN_PSPB
@@ -1122,14 +1240,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
1122 std r5, VCPU_IC(r9) 1240 std r5, VCPU_IC(r9)
1123 std r6, VCPU_VTB(r9) 1241 std r6, VCPU_VTB(r9)
1124 std r7, VCPU_TAR(r9) 1242 std r7, VCPU_TAR(r9)
1125#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1126 mfspr r5, SPRN_TFHAR
1127 mfspr r6, SPRN_TFIAR
1128 mfspr r7, SPRN_TEXASR
1129 std r5, VCPU_TFHAR(r9)
1130 std r6, VCPU_TFIAR(r9)
1131 std r7, VCPU_TEXASR(r9)
1132#endif
1133 mfspr r8, SPRN_EBBHR 1243 mfspr r8, SPRN_EBBHR
1134 std r8, VCPU_EBBHR(r9) 1244 std r8, VCPU_EBBHR(r9)
1135 mfspr r5, SPRN_EBBRR 1245 mfspr r5, SPRN_EBBRR
@@ -1387,7 +1497,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
1387 ld r8,VCORE_TB_OFFSET(r5) 1497 ld r8,VCORE_TB_OFFSET(r5)
1388 cmpdi r8,0 1498 cmpdi r8,0
1389 beq 17f 1499 beq 17f
1390 mftb r6 /* current host timebase */ 1500 mftb r6 /* current guest timebase */
1391 subf r8,r8,r6 1501 subf r8,r8,r6
1392 mtspr SPRN_TBU40,r8 /* update upper 40 bits */ 1502 mtspr SPRN_TBU40,r8 /* update upper 40 bits */
1393 mftb r7 /* check if lower 24 bits overflowed */ 1503 mftb r7 /* check if lower 24 bits overflowed */
@@ -1557,7 +1667,7 @@ kvmppc_hdsi:
1557 mtspr SPRN_SRR0, r10 1667 mtspr SPRN_SRR0, r10
1558 mtspr SPRN_SRR1, r11 1668 mtspr SPRN_SRR1, r11
1559 li r10, BOOK3S_INTERRUPT_DATA_STORAGE 1669 li r10, BOOK3S_INTERRUPT_DATA_STORAGE
1560 ld r11, VCPU_INTR_MSR(r9) 1670 bl kvmppc_msr_interrupt
1561fast_interrupt_c_return: 1671fast_interrupt_c_return:
15626: ld r7, VCPU_CTR(r9) 16726: ld r7, VCPU_CTR(r9)
1563 lwz r8, VCPU_XER(r9) 1673 lwz r8, VCPU_XER(r9)
@@ -1626,7 +1736,7 @@ kvmppc_hisi:
16261: mtspr SPRN_SRR0, r10 17361: mtspr SPRN_SRR0, r10
1627 mtspr SPRN_SRR1, r11 1737 mtspr SPRN_SRR1, r11
1628 li r10, BOOK3S_INTERRUPT_INST_STORAGE 1738 li r10, BOOK3S_INTERRUPT_INST_STORAGE
1629 ld r11, VCPU_INTR_MSR(r9) 1739 bl kvmppc_msr_interrupt
1630 b fast_interrupt_c_return 1740 b fast_interrupt_c_return
1631 1741
16323: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */ 17423: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
@@ -1669,7 +1779,7 @@ sc_1_fast_return:
1669 mtspr SPRN_SRR0,r10 1779 mtspr SPRN_SRR0,r10
1670 mtspr SPRN_SRR1,r11 1780 mtspr SPRN_SRR1,r11
1671 li r10, BOOK3S_INTERRUPT_SYSCALL 1781 li r10, BOOK3S_INTERRUPT_SYSCALL
1672 ld r11, VCPU_INTR_MSR(r9) 1782 bl kvmppc_msr_interrupt
1673 mr r4,r9 1783 mr r4,r9
1674 b fast_guest_return 1784 b fast_guest_return
1675 1785
@@ -1691,7 +1801,7 @@ hcall_real_table:
1691 .long 0 /* 0x10 - H_CLEAR_MOD */ 1801 .long 0 /* 0x10 - H_CLEAR_MOD */
1692 .long 0 /* 0x14 - H_CLEAR_REF */ 1802 .long 0 /* 0x14 - H_CLEAR_REF */
1693 .long .kvmppc_h_protect - hcall_real_table 1803 .long .kvmppc_h_protect - hcall_real_table
1694 .long 0 /* 0x1c - H_GET_TCE */ 1804 .long .kvmppc_h_get_tce - hcall_real_table
1695 .long .kvmppc_h_put_tce - hcall_real_table 1805 .long .kvmppc_h_put_tce - hcall_real_table
1696 .long 0 /* 0x24 - H_SET_SPRG0 */ 1806 .long 0 /* 0x24 - H_SET_SPRG0 */
1697 .long .kvmppc_h_set_dabr - hcall_real_table 1807 .long .kvmppc_h_set_dabr - hcall_real_table
@@ -1997,7 +2107,7 @@ machine_check_realmode:
1997 beq mc_cont 2107 beq mc_cont
1998 /* If not, deliver a machine check. SRR0/1 are already set */ 2108 /* If not, deliver a machine check. SRR0/1 are already set */
1999 li r10, BOOK3S_INTERRUPT_MACHINE_CHECK 2109 li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
2000 ld r11, VCPU_INTR_MSR(r9) 2110 bl kvmppc_msr_interrupt
2001 b fast_interrupt_c_return 2111 b fast_interrupt_c_return
2002 2112
2003/* 2113/*
@@ -2138,8 +2248,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
2138 mfspr r6,SPRN_VRSAVE 2248 mfspr r6,SPRN_VRSAVE
2139 stw r6,VCPU_VRSAVE(r31) 2249 stw r6,VCPU_VRSAVE(r31)
2140 mtlr r30 2250 mtlr r30
2141 mtmsrd r5
2142 isync
2143 blr 2251 blr
2144 2252
2145/* 2253/*
@@ -2186,3 +2294,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
2186 */ 2294 */
2187kvmppc_bad_host_intr: 2295kvmppc_bad_host_intr:
2188 b . 2296 b .
2297
2298/*
2299 * This mimics the MSR transition on IRQ delivery. The new guest MSR is taken
2300 * from VCPU_INTR_MSR and is modified based on the required TM state changes.
2301 * r11 has the guest MSR value (in/out)
2302 * r9 has a vcpu pointer (in)
2303 * r0 is used as a scratch register
2304 */
2305kvmppc_msr_interrupt:
2306 rldicl r0, r11, 64 - MSR_TS_S_LG, 62
2307 cmpwi r0, 2 /* Check if we are in transactional state.. */
2308 ld r11, VCPU_INTR_MSR(r9)
2309 bne 1f
2310 /* ... if transactional, change to suspended */
2311 li r0, 1
23121: rldimi r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
2313 blr
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index cf95cdef73c9..7a053157483b 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -213,8 +213,11 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
213 gpa_t args_phys; 213 gpa_t args_phys;
214 int rc; 214 int rc;
215 215
216 /* r4 contains the guest physical address of the RTAS args */ 216 /*
217 args_phys = kvmppc_get_gpr(vcpu, 4); 217 * r4 contains the guest physical address of the RTAS args
218 * Mask off the top 4 bits since this is a guest real address
219 */
220 args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM;
218 221
219 rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args)); 222 rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
220 if (rc) 223 if (rc)
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index 5f8bcc5fe423..35f0faab5361 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -53,6 +53,7 @@ enum interruption_class {
53 IRQIO_PCI, 53 IRQIO_PCI,
54 IRQIO_MSI, 54 IRQIO_MSI,
55 IRQIO_VIR, 55 IRQIO_VIR,
56 IRQIO_VAI,
56 NMI_NMI, 57 NMI_NMI,
57 CPU_RST, 58 CPU_RST,
58 NR_ARCH_IRQS 59 NR_ARCH_IRQS
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 9bf95bb30f1a..154b60089be9 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -16,12 +16,22 @@
16#include <linux/hrtimer.h> 16#include <linux/hrtimer.h>
17#include <linux/interrupt.h> 17#include <linux/interrupt.h>
18#include <linux/kvm_host.h> 18#include <linux/kvm_host.h>
19#include <linux/kvm.h>
19#include <asm/debug.h> 20#include <asm/debug.h>
20#include <asm/cpu.h> 21#include <asm/cpu.h>
22#include <asm/isc.h>
21 23
22#define KVM_MAX_VCPUS 64 24#define KVM_MAX_VCPUS 64
23#define KVM_USER_MEM_SLOTS 32 25#define KVM_USER_MEM_SLOTS 32
24 26
27/*
28 * These seem to be used for allocating ->chip in the routing table,
29 * which we don't use. 4096 is an out-of-thin-air value. If we need
30 * to look at ->chip later on, we'll need to revisit this.
31 */
32#define KVM_NR_IRQCHIPS 1
33#define KVM_IRQCHIP_NUM_PINS 4096
34
25struct sca_entry { 35struct sca_entry {
26 atomic_t scn; 36 atomic_t scn;
27 __u32 reserved; 37 __u32 reserved;
@@ -108,7 +118,9 @@ struct kvm_s390_sie_block {
108 __u32 fac; /* 0x01a0 */ 118 __u32 fac; /* 0x01a0 */
109 __u8 reserved1a4[20]; /* 0x01a4 */ 119 __u8 reserved1a4[20]; /* 0x01a4 */
110 __u64 cbrlo; /* 0x01b8 */ 120 __u64 cbrlo; /* 0x01b8 */
111 __u8 reserved1c0[40]; /* 0x01c0 */ 121 __u8 reserved1c0[30]; /* 0x01c0 */
122 __u64 pp; /* 0x01de */
123 __u8 reserved1e6[2]; /* 0x01e6 */
112 __u64 itdba; /* 0x01e8 */ 124 __u64 itdba; /* 0x01e8 */
113 __u8 reserved1f0[16]; /* 0x01f0 */ 125 __u8 reserved1f0[16]; /* 0x01f0 */
114} __attribute__((packed)); 126} __attribute__((packed));
@@ -171,18 +183,6 @@ struct kvm_vcpu_stat {
171 u32 diagnose_9c; 183 u32 diagnose_9c;
172}; 184};
173 185
174struct kvm_s390_io_info {
175 __u16 subchannel_id; /* 0x0b8 */
176 __u16 subchannel_nr; /* 0x0ba */
177 __u32 io_int_parm; /* 0x0bc */
178 __u32 io_int_word; /* 0x0c0 */
179};
180
181struct kvm_s390_ext_info {
182 __u32 ext_params;
183 __u64 ext_params2;
184};
185
186#define PGM_OPERATION 0x01 186#define PGM_OPERATION 0x01
187#define PGM_PRIVILEGED_OP 0x02 187#define PGM_PRIVILEGED_OP 0x02
188#define PGM_EXECUTE 0x03 188#define PGM_EXECUTE 0x03
@@ -191,27 +191,6 @@ struct kvm_s390_ext_info {
191#define PGM_SPECIFICATION 0x06 191#define PGM_SPECIFICATION 0x06
192#define PGM_DATA 0x07 192#define PGM_DATA 0x07
193 193
194struct kvm_s390_pgm_info {
195 __u16 code;
196};
197
198struct kvm_s390_prefix_info {
199 __u32 address;
200};
201
202struct kvm_s390_extcall_info {
203 __u16 code;
204};
205
206struct kvm_s390_emerg_info {
207 __u16 code;
208};
209
210struct kvm_s390_mchk_info {
211 __u64 cr14;
212 __u64 mcic;
213};
214
215struct kvm_s390_interrupt_info { 194struct kvm_s390_interrupt_info {
216 struct list_head list; 195 struct list_head list;
217 u64 type; 196 u64 type;
@@ -246,9 +225,8 @@ struct kvm_s390_float_interrupt {
246 struct list_head list; 225 struct list_head list;
247 atomic_t active; 226 atomic_t active;
248 int next_rr_cpu; 227 int next_rr_cpu;
249 unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1) 228 unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
250 / sizeof(long)]; 229 unsigned int irq_count;
251 struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS];
252}; 230};
253 231
254 232
@@ -265,6 +243,10 @@ struct kvm_vcpu_arch {
265 u64 stidp_data; 243 u64 stidp_data;
266 }; 244 };
267 struct gmap *gmap; 245 struct gmap *gmap;
246#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
247 unsigned long pfault_token;
248 unsigned long pfault_select;
249 unsigned long pfault_compare;
268}; 250};
269 251
270struct kvm_vm_stat { 252struct kvm_vm_stat {
@@ -274,12 +256,36 @@ struct kvm_vm_stat {
274struct kvm_arch_memory_slot { 256struct kvm_arch_memory_slot {
275}; 257};
276 258
259struct s390_map_info {
260 struct list_head list;
261 __u64 guest_addr;
262 __u64 addr;
263 struct page *page;
264};
265
266struct s390_io_adapter {
267 unsigned int id;
268 int isc;
269 bool maskable;
270 bool masked;
271 bool swap;
272 struct rw_semaphore maps_lock;
273 struct list_head maps;
274 atomic_t nr_maps;
275};
276
277#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
278#define MAX_S390_ADAPTER_MAPS 256
279
277struct kvm_arch{ 280struct kvm_arch{
278 struct sca_block *sca; 281 struct sca_block *sca;
279 debug_info_t *dbf; 282 debug_info_t *dbf;
280 struct kvm_s390_float_interrupt float_int; 283 struct kvm_s390_float_interrupt float_int;
284 struct kvm_device *flic;
281 struct gmap *gmap; 285 struct gmap *gmap;
282 int css_support; 286 int css_support;
287 int use_irqchip;
288 struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
283}; 289};
284 290
285#define KVM_HVA_ERR_BAD (-1UL) 291#define KVM_HVA_ERR_BAD (-1UL)
@@ -290,6 +296,24 @@ static inline bool kvm_is_error_hva(unsigned long addr)
290 return IS_ERR_VALUE(addr); 296 return IS_ERR_VALUE(addr);
291} 297}
292 298
299#define ASYNC_PF_PER_VCPU 64
300struct kvm_vcpu;
301struct kvm_async_pf;
302struct kvm_arch_async_pf {
303 unsigned long pfault_token;
304};
305
306bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
307
308void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
309 struct kvm_async_pf *work);
310
311void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
312 struct kvm_async_pf *work);
313
314void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
315 struct kvm_async_pf *work);
316
293extern int sie64a(struct kvm_s390_sie_block *, u64 *); 317extern int sie64a(struct kvm_s390_sie_block *, u64 *);
294extern char sie_exit; 318extern char sie_exit;
295#endif 319#endif
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 1ab75eaacbd4..50a75d96f939 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -782,6 +782,7 @@ static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
782 * @table: pointer to the page directory 782 * @table: pointer to the page directory
783 * @asce: address space control element for gmap page table 783 * @asce: address space control element for gmap page table
784 * @crst_list: list of all crst tables used in the guest address space 784 * @crst_list: list of all crst tables used in the guest address space
785 * @pfault_enabled: defines if pfaults are applicable for the guest
785 */ 786 */
786struct gmap { 787struct gmap {
787 struct list_head list; 788 struct list_head list;
@@ -790,6 +791,7 @@ struct gmap {
790 unsigned long asce; 791 unsigned long asce;
791 void *private; 792 void *private;
792 struct list_head crst_list; 793 struct list_head crst_list;
794 bool pfault_enabled;
793}; 795};
794 796
795/** 797/**
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 0a876bc543d3..dc5fc4f90e52 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -79,6 +79,7 @@ struct thread_struct {
79 unsigned long ksp; /* kernel stack pointer */ 79 unsigned long ksp; /* kernel stack pointer */
80 mm_segment_t mm_segment; 80 mm_segment_t mm_segment;
81 unsigned long gmap_addr; /* address of last gmap fault. */ 81 unsigned long gmap_addr; /* address of last gmap fault. */
82 unsigned int gmap_pfault; /* signal of a pending guest pfault */
82 struct per_regs per_user; /* User specified PER registers */ 83 struct per_regs per_user; /* User specified PER registers */
83 struct per_event per_event; /* Cause of the last PER trap */ 84 struct per_event per_event; /* Cause of the last PER trap */
84 unsigned long per_flags; /* Flags to control debug behavior */ 85 unsigned long per_flags; /* Flags to control debug behavior */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index d25da598ec62..c003c6a73b1e 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -16,6 +16,44 @@
16 16
17#define __KVM_S390 17#define __KVM_S390
18 18
19/* Device control API: s390-specific devices */
20#define KVM_DEV_FLIC_GET_ALL_IRQS 1
21#define KVM_DEV_FLIC_ENQUEUE 2
22#define KVM_DEV_FLIC_CLEAR_IRQS 3
23#define KVM_DEV_FLIC_APF_ENABLE 4
24#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5
25#define KVM_DEV_FLIC_ADAPTER_REGISTER 6
26#define KVM_DEV_FLIC_ADAPTER_MODIFY 7
27/*
28 * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
29 * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
30 * There are also sclp and machine checks. This gives us
31 * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
32 * Lets round up to 8192 pages.
33 */
34#define KVM_S390_MAX_FLOAT_IRQS 266250
35#define KVM_S390_FLIC_MAX_BUFFER 0x2000000
36
37struct kvm_s390_io_adapter {
38 __u32 id;
39 __u8 isc;
40 __u8 maskable;
41 __u8 swap;
42 __u8 pad;
43};
44
45#define KVM_S390_IO_ADAPTER_MASK 1
46#define KVM_S390_IO_ADAPTER_MAP 2
47#define KVM_S390_IO_ADAPTER_UNMAP 3
48
49struct kvm_s390_io_adapter_req {
50 __u32 id;
51 __u8 type;
52 __u8 mask;
53 __u16 pad0;
54 __u64 addr;
55};
56
19/* for KVM_GET_REGS and KVM_SET_REGS */ 57/* for KVM_GET_REGS and KVM_SET_REGS */
20struct kvm_regs { 58struct kvm_regs {
21 /* general purpose regs for s390 */ 59 /* general purpose regs for s390 */
@@ -57,4 +95,9 @@ struct kvm_sync_regs {
57#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2) 95#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
58#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3) 96#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
59#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4) 97#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
98#define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5)
99#define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6)
100#define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7)
101#define KVM_REG_S390_PP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8)
102#define KVM_REG_S390_GBEA (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x9)
60#endif 103#endif
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index a770be97db4d..d42b14cc72a4 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -85,6 +85,7 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
85 [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" }, 85 [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" },
86 [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" }, 86 [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" },
87 [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, 87 [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
88 [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
88 [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"}, 89 [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"},
89 [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, 90 [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"},
90}; 91};
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 70b46eacf8e1..10d529ac9821 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -23,6 +23,10 @@ config KVM
23 select ANON_INODES 23 select ANON_INODES
24 select HAVE_KVM_CPU_RELAX_INTERCEPT 24 select HAVE_KVM_CPU_RELAX_INTERCEPT
25 select HAVE_KVM_EVENTFD 25 select HAVE_KVM_EVENTFD
26 select KVM_ASYNC_PF
27 select KVM_ASYNC_PF_SYNC
28 select HAVE_KVM_IRQCHIP
29 select HAVE_KVM_IRQ_ROUTING
26 ---help--- 30 ---help---
27 Support hosting paravirtualized guest machines using the SIE 31 Support hosting paravirtualized guest machines using the SIE
28 virtualization capability on the mainframe. This should work 32 virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 40b4c6470f88..d3adb37e93a4 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -7,7 +7,7 @@
7# as published by the Free Software Foundation. 7# as published by the Free Software Foundation.
8 8
9KVM := ../../../virt/kvm 9KVM := ../../../virt/kvm
10common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o 10common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o
11 11
12ccflags-y := -Ivirt/kvm -Iarch/s390/kvm 12ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
13 13
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 6f9cfa500372..03a05ffb662f 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -18,6 +18,7 @@
18#include "kvm-s390.h" 18#include "kvm-s390.h"
19#include "trace.h" 19#include "trace.h"
20#include "trace-s390.h" 20#include "trace-s390.h"
21#include "gaccess.h"
21 22
22static int diag_release_pages(struct kvm_vcpu *vcpu) 23static int diag_release_pages(struct kvm_vcpu *vcpu)
23{ 24{
@@ -47,6 +48,87 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
47 return 0; 48 return 0;
48} 49}
49 50
51static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
52{
53 struct prs_parm {
54 u16 code;
55 u16 subcode;
56 u16 parm_len;
57 u16 parm_version;
58 u64 token_addr;
59 u64 select_mask;
60 u64 compare_mask;
61 u64 zarch;
62 };
63 struct prs_parm parm;
64 int rc;
65 u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
66 u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
67 unsigned long hva_token = KVM_HVA_ERR_BAD;
68
69 if (vcpu->run->s.regs.gprs[rx] & 7)
70 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
71 if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
72 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
73 if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
74 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
75
76 switch (parm.subcode) {
77 case 0: /* TOKEN */
78 if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
79 /*
80 * If the pagefault handshake is already activated,
81 * the token must not be changed. We have to return
82 * decimal 8 instead, as mandated in SC24-6084.
83 */
84 vcpu->run->s.regs.gprs[ry] = 8;
85 return 0;
86 }
87
88 if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
89 parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
90 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
91
92 hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
93 if (kvm_is_error_hva(hva_token))
94 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
95
96 vcpu->arch.pfault_token = parm.token_addr;
97 vcpu->arch.pfault_select = parm.select_mask;
98 vcpu->arch.pfault_compare = parm.compare_mask;
99 vcpu->run->s.regs.gprs[ry] = 0;
100 rc = 0;
101 break;
102 case 1: /*
103 * CANCEL
104 * Specification allows to let already pending tokens survive
105 * the cancel, therefore to reduce code complexity, we assume
106 * all outstanding tokens are already pending.
107 */
108 if (parm.token_addr || parm.select_mask ||
109 parm.compare_mask || parm.zarch)
110 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
111
112 vcpu->run->s.regs.gprs[ry] = 0;
113 /*
114 * If the pfault handling was not established or is already
115 * canceled SC24-6084 requests to return decimal 4.
116 */
117 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
118 vcpu->run->s.regs.gprs[ry] = 4;
119 else
120 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
121
122 rc = 0;
123 break;
124 default:
125 rc = -EOPNOTSUPP;
126 break;
127 }
128
129 return rc;
130}
131
50static int __diag_time_slice_end(struct kvm_vcpu *vcpu) 132static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
51{ 133{
52 VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); 134 VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
@@ -153,6 +235,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
153 return __diag_time_slice_end(vcpu); 235 return __diag_time_slice_end(vcpu);
154 case 0x9c: 236 case 0x9c:
155 return __diag_time_slice_end_directed(vcpu); 237 return __diag_time_slice_end_directed(vcpu);
238 case 0x258:
239 return __diag_page_ref_service(vcpu);
156 case 0x308: 240 case 0x308:
157 return __diag_ipl_functions(vcpu); 241 return __diag_ipl_functions(vcpu);
158 case 0x500: 242 case 0x500:
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5f79d2d79ca7..200a8f9390b6 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * handling kvm guest interrupts 2 * handling kvm guest interrupts
3 * 3 *
4 * Copyright IBM Corp. 2008 4 * Copyright IBM Corp. 2008,2014
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only) 7 * it under the terms of the GNU General Public License (version 2 only)
@@ -13,6 +13,7 @@
13#include <linux/interrupt.h> 13#include <linux/interrupt.h>
14#include <linux/kvm_host.h> 14#include <linux/kvm_host.h>
15#include <linux/hrtimer.h> 15#include <linux/hrtimer.h>
16#include <linux/mmu_context.h>
16#include <linux/signal.h> 17#include <linux/signal.h>
17#include <linux/slab.h> 18#include <linux/slab.h>
18#include <asm/asm-offsets.h> 19#include <asm/asm-offsets.h>
@@ -31,7 +32,7 @@ static int is_ioint(u64 type)
31 return ((type & 0xfffe0000u) != 0xfffe0000u); 32 return ((type & 0xfffe0000u) != 0xfffe0000u);
32} 33}
33 34
34static int psw_extint_disabled(struct kvm_vcpu *vcpu) 35int psw_extint_disabled(struct kvm_vcpu *vcpu)
35{ 36{
36 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); 37 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
37} 38}
@@ -78,11 +79,8 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
78 return 1; 79 return 1;
79 return 0; 80 return 0;
80 case KVM_S390_INT_SERVICE: 81 case KVM_S390_INT_SERVICE:
81 if (psw_extint_disabled(vcpu)) 82 case KVM_S390_INT_PFAULT_INIT:
82 return 0; 83 case KVM_S390_INT_PFAULT_DONE:
83 if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
84 return 1;
85 return 0;
86 case KVM_S390_INT_VIRTIO: 84 case KVM_S390_INT_VIRTIO:
87 if (psw_extint_disabled(vcpu)) 85 if (psw_extint_disabled(vcpu))
88 return 0; 86 return 0;
@@ -117,14 +115,12 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
117 115
118static void __set_cpu_idle(struct kvm_vcpu *vcpu) 116static void __set_cpu_idle(struct kvm_vcpu *vcpu)
119{ 117{
120 BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
121 atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); 118 atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
122 set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); 119 set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
123} 120}
124 121
125static void __unset_cpu_idle(struct kvm_vcpu *vcpu) 122static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
126{ 123{
127 BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
128 atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); 124 atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
129 clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); 125 clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
130} 126}
@@ -150,6 +146,8 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
150 case KVM_S390_INT_EXTERNAL_CALL: 146 case KVM_S390_INT_EXTERNAL_CALL:
151 case KVM_S390_INT_EMERGENCY: 147 case KVM_S390_INT_EMERGENCY:
152 case KVM_S390_INT_SERVICE: 148 case KVM_S390_INT_SERVICE:
149 case KVM_S390_INT_PFAULT_INIT:
150 case KVM_S390_INT_PFAULT_DONE:
153 case KVM_S390_INT_VIRTIO: 151 case KVM_S390_INT_VIRTIO:
154 if (psw_extint_disabled(vcpu)) 152 if (psw_extint_disabled(vcpu))
155 __set_cpuflag(vcpu, CPUSTAT_EXT_INT); 153 __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
@@ -223,6 +221,30 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
223 rc |= put_guest(vcpu, inti->ext.ext_params, 221 rc |= put_guest(vcpu, inti->ext.ext_params,
224 (u32 __user *)__LC_EXT_PARAMS); 222 (u32 __user *)__LC_EXT_PARAMS);
225 break; 223 break;
224 case KVM_S390_INT_PFAULT_INIT:
225 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
226 inti->ext.ext_params2);
227 rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
228 rc |= put_guest(vcpu, 0x0600, (u16 __user *) __LC_EXT_CPU_ADDR);
229 rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
230 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
231 rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
232 __LC_EXT_NEW_PSW, sizeof(psw_t));
233 rc |= put_guest(vcpu, inti->ext.ext_params2,
234 (u64 __user *) __LC_EXT_PARAMS2);
235 break;
236 case KVM_S390_INT_PFAULT_DONE:
237 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
238 inti->ext.ext_params2);
239 rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
240 rc |= put_guest(vcpu, 0x0680, (u16 __user *) __LC_EXT_CPU_ADDR);
241 rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
242 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
243 rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
244 __LC_EXT_NEW_PSW, sizeof(psw_t));
245 rc |= put_guest(vcpu, inti->ext.ext_params2,
246 (u64 __user *) __LC_EXT_PARAMS2);
247 break;
226 case KVM_S390_INT_VIRTIO: 248 case KVM_S390_INT_VIRTIO:
227 VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", 249 VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
228 inti->ext.ext_params, inti->ext.ext_params2); 250 inti->ext.ext_params, inti->ext.ext_params2);
@@ -357,7 +379,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
357 return 1; 379 return 1;
358} 380}
359 381
360static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) 382int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
361{ 383{
362 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 384 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
363 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; 385 struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
@@ -482,11 +504,26 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
482 struct kvm_vcpu *vcpu; 504 struct kvm_vcpu *vcpu;
483 505
484 vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); 506 vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
507 vcpu->preempted = true;
485 tasklet_schedule(&vcpu->arch.tasklet); 508 tasklet_schedule(&vcpu->arch.tasklet);
486 509
487 return HRTIMER_NORESTART; 510 return HRTIMER_NORESTART;
488} 511}
489 512
513void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
514{
515 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
516 struct kvm_s390_interrupt_info *n, *inti = NULL;
517
518 spin_lock_bh(&li->lock);
519 list_for_each_entry_safe(inti, n, &li->list, list) {
520 list_del(&inti->list);
521 kfree(inti);
522 }
523 atomic_set(&li->active, 0);
524 spin_unlock_bh(&li->lock);
525}
526
490void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) 527void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
491{ 528{
492 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; 529 struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -528,6 +565,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
528 list_for_each_entry_safe(inti, n, &fi->list, list) { 565 list_for_each_entry_safe(inti, n, &fi->list, list) {
529 if (__interrupt_is_deliverable(vcpu, inti)) { 566 if (__interrupt_is_deliverable(vcpu, inti)) {
530 list_del(&inti->list); 567 list_del(&inti->list);
568 fi->irq_count--;
531 deliver = 1; 569 deliver = 1;
532 break; 570 break;
533 } 571 }
@@ -583,6 +621,7 @@ void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
583 if ((inti->type == KVM_S390_MCHK) && 621 if ((inti->type == KVM_S390_MCHK) &&
584 __interrupt_is_deliverable(vcpu, inti)) { 622 __interrupt_is_deliverable(vcpu, inti)) {
585 list_del(&inti->list); 623 list_del(&inti->list);
624 fi->irq_count--;
586 deliver = 1; 625 deliver = 1;
587 break; 626 break;
588 } 627 }
@@ -650,8 +689,10 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
650 inti = iter; 689 inti = iter;
651 break; 690 break;
652 } 691 }
653 if (inti) 692 if (inti) {
654 list_del_init(&inti->list); 693 list_del_init(&inti->list);
694 fi->irq_count--;
695 }
655 if (list_empty(&fi->list)) 696 if (list_empty(&fi->list))
656 atomic_set(&fi->active, 0); 697 atomic_set(&fi->active, 0);
657 spin_unlock(&fi->lock); 698 spin_unlock(&fi->lock);
@@ -659,53 +700,101 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
659 return inti; 700 return inti;
660} 701}
661 702
662int kvm_s390_inject_vm(struct kvm *kvm, 703static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
663 struct kvm_s390_interrupt *s390int)
664{ 704{
665 struct kvm_s390_local_interrupt *li; 705 struct kvm_s390_local_interrupt *li;
666 struct kvm_s390_float_interrupt *fi; 706 struct kvm_s390_float_interrupt *fi;
667 struct kvm_s390_interrupt_info *inti, *iter; 707 struct kvm_s390_interrupt_info *iter;
708 struct kvm_vcpu *dst_vcpu = NULL;
668 int sigcpu; 709 int sigcpu;
710 int rc = 0;
711
712 mutex_lock(&kvm->lock);
713 fi = &kvm->arch.float_int;
714 spin_lock(&fi->lock);
715 if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
716 rc = -EINVAL;
717 goto unlock_fi;
718 }
719 fi->irq_count++;
720 if (!is_ioint(inti->type)) {
721 list_add_tail(&inti->list, &fi->list);
722 } else {
723 u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
724
725 /* Keep I/O interrupts sorted in isc order. */
726 list_for_each_entry(iter, &fi->list, list) {
727 if (!is_ioint(iter->type))
728 continue;
729 if (int_word_to_isc_bits(iter->io.io_int_word)
730 <= isc_bits)
731 continue;
732 break;
733 }
734 list_add_tail(&inti->list, &iter->list);
735 }
736 atomic_set(&fi->active, 1);
737 sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
738 if (sigcpu == KVM_MAX_VCPUS) {
739 do {
740 sigcpu = fi->next_rr_cpu++;
741 if (sigcpu == KVM_MAX_VCPUS)
742 sigcpu = fi->next_rr_cpu = 0;
743 } while (kvm_get_vcpu(kvm, sigcpu) == NULL);
744 }
745 dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
746 li = &dst_vcpu->arch.local_int;
747 spin_lock_bh(&li->lock);
748 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
749 if (waitqueue_active(li->wq))
750 wake_up_interruptible(li->wq);
751 kvm_get_vcpu(kvm, sigcpu)->preempted = true;
752 spin_unlock_bh(&li->lock);
753unlock_fi:
754 spin_unlock(&fi->lock);
755 mutex_unlock(&kvm->lock);
756 return rc;
757}
758
759int kvm_s390_inject_vm(struct kvm *kvm,
760 struct kvm_s390_interrupt *s390int)
761{
762 struct kvm_s390_interrupt_info *inti;
669 763
670 inti = kzalloc(sizeof(*inti), GFP_KERNEL); 764 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
671 if (!inti) 765 if (!inti)
672 return -ENOMEM; 766 return -ENOMEM;
673 767
674 switch (s390int->type) { 768 inti->type = s390int->type;
769 switch (inti->type) {
675 case KVM_S390_INT_VIRTIO: 770 case KVM_S390_INT_VIRTIO:
676 VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx", 771 VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%llx",
677 s390int->parm, s390int->parm64); 772 s390int->parm, s390int->parm64);
678 inti->type = s390int->type;
679 inti->ext.ext_params = s390int->parm; 773 inti->ext.ext_params = s390int->parm;
680 inti->ext.ext_params2 = s390int->parm64; 774 inti->ext.ext_params2 = s390int->parm64;
681 break; 775 break;
682 case KVM_S390_INT_SERVICE: 776 case KVM_S390_INT_SERVICE:
683 VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); 777 VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
684 inti->type = s390int->type;
685 inti->ext.ext_params = s390int->parm; 778 inti->ext.ext_params = s390int->parm;
686 break; 779 break;
687 case KVM_S390_PROGRAM_INT: 780 case KVM_S390_INT_PFAULT_DONE:
688 case KVM_S390_SIGP_STOP: 781 inti->type = s390int->type;
689 case KVM_S390_INT_EXTERNAL_CALL: 782 inti->ext.ext_params2 = s390int->parm64;
690 case KVM_S390_INT_EMERGENCY: 783 break;
691 kfree(inti);
692 return -EINVAL;
693 case KVM_S390_MCHK: 784 case KVM_S390_MCHK:
694 VM_EVENT(kvm, 5, "inject: machine check parm64:%llx", 785 VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
695 s390int->parm64); 786 s390int->parm64);
696 inti->type = s390int->type;
697 inti->mchk.cr14 = s390int->parm; /* upper bits are not used */ 787 inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
698 inti->mchk.mcic = s390int->parm64; 788 inti->mchk.mcic = s390int->parm64;
699 break; 789 break;
700 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: 790 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
701 if (s390int->type & IOINT_AI_MASK) 791 if (inti->type & IOINT_AI_MASK)
702 VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); 792 VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
703 else 793 else
704 VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", 794 VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
705 s390int->type & IOINT_CSSID_MASK, 795 s390int->type & IOINT_CSSID_MASK,
706 s390int->type & IOINT_SSID_MASK, 796 s390int->type & IOINT_SSID_MASK,
707 s390int->type & IOINT_SCHID_MASK); 797 s390int->type & IOINT_SCHID_MASK);
708 inti->type = s390int->type;
709 inti->io.subchannel_id = s390int->parm >> 16; 798 inti->io.subchannel_id = s390int->parm >> 16;
710 inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; 799 inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
711 inti->io.io_int_parm = s390int->parm64 >> 32; 800 inti->io.io_int_parm = s390int->parm64 >> 32;
@@ -718,43 +807,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
718 trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64, 807 trace_kvm_s390_inject_vm(s390int->type, s390int->parm, s390int->parm64,
719 2); 808 2);
720 809
721 mutex_lock(&kvm->lock); 810 return __inject_vm(kvm, inti);
722 fi = &kvm->arch.float_int;
723 spin_lock(&fi->lock);
724 if (!is_ioint(inti->type))
725 list_add_tail(&inti->list, &fi->list);
726 else {
727 u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
728
729 /* Keep I/O interrupts sorted in isc order. */
730 list_for_each_entry(iter, &fi->list, list) {
731 if (!is_ioint(iter->type))
732 continue;
733 if (int_word_to_isc_bits(iter->io.io_int_word)
734 <= isc_bits)
735 continue;
736 break;
737 }
738 list_add_tail(&inti->list, &iter->list);
739 }
740 atomic_set(&fi->active, 1);
741 sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
742 if (sigcpu == KVM_MAX_VCPUS) {
743 do {
744 sigcpu = fi->next_rr_cpu++;
745 if (sigcpu == KVM_MAX_VCPUS)
746 sigcpu = fi->next_rr_cpu = 0;
747 } while (fi->local_int[sigcpu] == NULL);
748 }
749 li = fi->local_int[sigcpu];
750 spin_lock_bh(&li->lock);
751 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
752 if (waitqueue_active(li->wq))
753 wake_up_interruptible(li->wq);
754 spin_unlock_bh(&li->lock);
755 spin_unlock(&fi->lock);
756 mutex_unlock(&kvm->lock);
757 return 0;
758} 811}
759 812
760int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, 813int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
@@ -814,6 +867,10 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
814 inti->type = s390int->type; 867 inti->type = s390int->type;
815 inti->mchk.mcic = s390int->parm64; 868 inti->mchk.mcic = s390int->parm64;
816 break; 869 break;
870 case KVM_S390_INT_PFAULT_INIT:
871 inti->type = s390int->type;
872 inti->ext.ext_params2 = s390int->parm64;
873 break;
817 case KVM_S390_INT_VIRTIO: 874 case KVM_S390_INT_VIRTIO:
818 case KVM_S390_INT_SERVICE: 875 case KVM_S390_INT_SERVICE:
819 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: 876 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
@@ -837,7 +894,528 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
837 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); 894 atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
838 if (waitqueue_active(&vcpu->wq)) 895 if (waitqueue_active(&vcpu->wq))
839 wake_up_interruptible(&vcpu->wq); 896 wake_up_interruptible(&vcpu->wq);
897 vcpu->preempted = true;
840 spin_unlock_bh(&li->lock); 898 spin_unlock_bh(&li->lock);
841 mutex_unlock(&vcpu->kvm->lock); 899 mutex_unlock(&vcpu->kvm->lock);
842 return 0; 900 return 0;
843} 901}
902
903static void clear_floating_interrupts(struct kvm *kvm)
904{
905 struct kvm_s390_float_interrupt *fi;
906 struct kvm_s390_interrupt_info *n, *inti = NULL;
907
908 mutex_lock(&kvm->lock);
909 fi = &kvm->arch.float_int;
910 spin_lock(&fi->lock);
911 list_for_each_entry_safe(inti, n, &fi->list, list) {
912 list_del(&inti->list);
913 kfree(inti);
914 }
915 fi->irq_count = 0;
916 atomic_set(&fi->active, 0);
917 spin_unlock(&fi->lock);
918 mutex_unlock(&kvm->lock);
919}
920
921static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
922 u8 *addr)
923{
924 struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
925 struct kvm_s390_irq irq = {0};
926
927 irq.type = inti->type;
928 switch (inti->type) {
929 case KVM_S390_INT_PFAULT_INIT:
930 case KVM_S390_INT_PFAULT_DONE:
931 case KVM_S390_INT_VIRTIO:
932 case KVM_S390_INT_SERVICE:
933 irq.u.ext = inti->ext;
934 break;
935 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
936 irq.u.io = inti->io;
937 break;
938 case KVM_S390_MCHK:
939 irq.u.mchk = inti->mchk;
940 break;
941 default:
942 return -EINVAL;
943 }
944
945 if (copy_to_user(uptr, &irq, sizeof(irq)))
946 return -EFAULT;
947
948 return 0;
949}
950
951static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
952{
953 struct kvm_s390_interrupt_info *inti;
954 struct kvm_s390_float_interrupt *fi;
955 int ret = 0;
956 int n = 0;
957
958 mutex_lock(&kvm->lock);
959 fi = &kvm->arch.float_int;
960 spin_lock(&fi->lock);
961
962 list_for_each_entry(inti, &fi->list, list) {
963 if (len < sizeof(struct kvm_s390_irq)) {
964 /* signal userspace to try again */
965 ret = -ENOMEM;
966 break;
967 }
968 ret = copy_irq_to_user(inti, buf);
969 if (ret)
970 break;
971 buf += sizeof(struct kvm_s390_irq);
972 len -= sizeof(struct kvm_s390_irq);
973 n++;
974 }
975
976 spin_unlock(&fi->lock);
977 mutex_unlock(&kvm->lock);
978
979 return ret < 0 ? ret : n;
980}
981
982static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
983{
984 int r;
985
986 switch (attr->group) {
987 case KVM_DEV_FLIC_GET_ALL_IRQS:
988 r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr,
989 attr->attr);
990 break;
991 default:
992 r = -EINVAL;
993 }
994
995 return r;
996}
997
998static inline int copy_irq_from_user(struct kvm_s390_interrupt_info *inti,
999 u64 addr)
1000{
1001 struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
1002 void *target = NULL;
1003 void __user *source;
1004 u64 size;
1005
1006 if (get_user(inti->type, (u64 __user *)addr))
1007 return -EFAULT;
1008
1009 switch (inti->type) {
1010 case KVM_S390_INT_PFAULT_INIT:
1011 case KVM_S390_INT_PFAULT_DONE:
1012 case KVM_S390_INT_VIRTIO:
1013 case KVM_S390_INT_SERVICE:
1014 target = (void *) &inti->ext;
1015 source = &uptr->u.ext;
1016 size = sizeof(inti->ext);
1017 break;
1018 case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
1019 target = (void *) &inti->io;
1020 source = &uptr->u.io;
1021 size = sizeof(inti->io);
1022 break;
1023 case KVM_S390_MCHK:
1024 target = (void *) &inti->mchk;
1025 source = &uptr->u.mchk;
1026 size = sizeof(inti->mchk);
1027 break;
1028 default:
1029 return -EINVAL;
1030 }
1031
1032 if (copy_from_user(target, source, size))
1033 return -EFAULT;
1034
1035 return 0;
1036}
1037
1038static int enqueue_floating_irq(struct kvm_device *dev,
1039 struct kvm_device_attr *attr)
1040{
1041 struct kvm_s390_interrupt_info *inti = NULL;
1042 int r = 0;
1043 int len = attr->attr;
1044
1045 if (len % sizeof(struct kvm_s390_irq) != 0)
1046 return -EINVAL;
1047 else if (len > KVM_S390_FLIC_MAX_BUFFER)
1048 return -EINVAL;
1049
1050 while (len >= sizeof(struct kvm_s390_irq)) {
1051 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
1052 if (!inti)
1053 return -ENOMEM;
1054
1055 r = copy_irq_from_user(inti, attr->addr);
1056 if (r) {
1057 kfree(inti);
1058 return r;
1059 }
1060 r = __inject_vm(dev->kvm, inti);
1061 if (r) {
1062 kfree(inti);
1063 return r;
1064 }
1065 len -= sizeof(struct kvm_s390_irq);
1066 attr->addr += sizeof(struct kvm_s390_irq);
1067 }
1068
1069 return r;
1070}
1071
1072static struct s390_io_adapter *get_io_adapter(struct kvm *kvm, unsigned int id)
1073{
1074 if (id >= MAX_S390_IO_ADAPTERS)
1075 return NULL;
1076 return kvm->arch.adapters[id];
1077}
1078
1079static int register_io_adapter(struct kvm_device *dev,
1080 struct kvm_device_attr *attr)
1081{
1082 struct s390_io_adapter *adapter;
1083 struct kvm_s390_io_adapter adapter_info;
1084
1085 if (copy_from_user(&adapter_info,
1086 (void __user *)attr->addr, sizeof(adapter_info)))
1087 return -EFAULT;
1088
1089 if ((adapter_info.id >= MAX_S390_IO_ADAPTERS) ||
1090 (dev->kvm->arch.adapters[adapter_info.id] != NULL))
1091 return -EINVAL;
1092
1093 adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
1094 if (!adapter)
1095 return -ENOMEM;
1096
1097 INIT_LIST_HEAD(&adapter->maps);
1098 init_rwsem(&adapter->maps_lock);
1099 atomic_set(&adapter->nr_maps, 0);
1100 adapter->id = adapter_info.id;
1101 adapter->isc = adapter_info.isc;
1102 adapter->maskable = adapter_info.maskable;
1103 adapter->masked = false;
1104 adapter->swap = adapter_info.swap;
1105 dev->kvm->arch.adapters[adapter->id] = adapter;
1106
1107 return 0;
1108}
1109
1110int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
1111{
1112 int ret;
1113 struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
1114
1115 if (!adapter || !adapter->maskable)
1116 return -EINVAL;
1117 ret = adapter->masked;
1118 adapter->masked = masked;
1119 return ret;
1120}
1121
1122static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
1123{
1124 struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
1125 struct s390_map_info *map;
1126 int ret;
1127
1128 if (!adapter || !addr)
1129 return -EINVAL;
1130
1131 map = kzalloc(sizeof(*map), GFP_KERNEL);
1132 if (!map) {
1133 ret = -ENOMEM;
1134 goto out;
1135 }
1136 INIT_LIST_HEAD(&map->list);
1137 map->guest_addr = addr;
1138 map->addr = gmap_translate(addr, kvm->arch.gmap);
1139 if (map->addr == -EFAULT) {
1140 ret = -EFAULT;
1141 goto out;
1142 }
1143 ret = get_user_pages_fast(map->addr, 1, 1, &map->page);
1144 if (ret < 0)
1145 goto out;
1146 BUG_ON(ret != 1);
1147 down_write(&adapter->maps_lock);
1148 if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
1149 list_add_tail(&map->list, &adapter->maps);
1150 ret = 0;
1151 } else {
1152 put_page(map->page);
1153 ret = -EINVAL;
1154 }
1155 up_write(&adapter->maps_lock);
1156out:
1157 if (ret)
1158 kfree(map);
1159 return ret;
1160}
1161
1162static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
1163{
1164 struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
1165 struct s390_map_info *map, *tmp;
1166 int found = 0;
1167
1168 if (!adapter || !addr)
1169 return -EINVAL;
1170
1171 down_write(&adapter->maps_lock);
1172 list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
1173 if (map->guest_addr == addr) {
1174 found = 1;
1175 atomic_dec(&adapter->nr_maps);
1176 list_del(&map->list);
1177 put_page(map->page);
1178 kfree(map);
1179 break;
1180 }
1181 }
1182 up_write(&adapter->maps_lock);
1183
1184 return found ? 0 : -EINVAL;
1185}
1186
1187void kvm_s390_destroy_adapters(struct kvm *kvm)
1188{
1189 int i;
1190 struct s390_map_info *map, *tmp;
1191
1192 for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
1193 if (!kvm->arch.adapters[i])
1194 continue;
1195 list_for_each_entry_safe(map, tmp,
1196 &kvm->arch.adapters[i]->maps, list) {
1197 list_del(&map->list);
1198 put_page(map->page);
1199 kfree(map);
1200 }
1201 kfree(kvm->arch.adapters[i]);
1202 }
1203}
1204
1205static int modify_io_adapter(struct kvm_device *dev,
1206 struct kvm_device_attr *attr)
1207{
1208 struct kvm_s390_io_adapter_req req;
1209 struct s390_io_adapter *adapter;
1210 int ret;
1211
1212 if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
1213 return -EFAULT;
1214
1215 adapter = get_io_adapter(dev->kvm, req.id);
1216 if (!adapter)
1217 return -EINVAL;
1218 switch (req.type) {
1219 case KVM_S390_IO_ADAPTER_MASK:
1220 ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask);
1221 if (ret > 0)
1222 ret = 0;
1223 break;
1224 case KVM_S390_IO_ADAPTER_MAP:
1225 ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
1226 break;
1227 case KVM_S390_IO_ADAPTER_UNMAP:
1228 ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
1229 break;
1230 default:
1231 ret = -EINVAL;
1232 }
1233
1234 return ret;
1235}
1236
1237static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1238{
1239 int r = 0;
1240 unsigned int i;
1241 struct kvm_vcpu *vcpu;
1242
1243 switch (attr->group) {
1244 case KVM_DEV_FLIC_ENQUEUE:
1245 r = enqueue_floating_irq(dev, attr);
1246 break;
1247 case KVM_DEV_FLIC_CLEAR_IRQS:
1248 r = 0;
1249 clear_floating_interrupts(dev->kvm);
1250 break;
1251 case KVM_DEV_FLIC_APF_ENABLE:
1252 dev->kvm->arch.gmap->pfault_enabled = 1;
1253 break;
1254 case KVM_DEV_FLIC_APF_DISABLE_WAIT:
1255 dev->kvm->arch.gmap->pfault_enabled = 0;
1256 /*
1257 * Make sure no async faults are in transition when
1258 * clearing the queues. So we don't need to worry
1259 * about late coming workers.
1260 */
1261 synchronize_srcu(&dev->kvm->srcu);
1262 kvm_for_each_vcpu(i, vcpu, dev->kvm)
1263 kvm_clear_async_pf_completion_queue(vcpu);
1264 break;
1265 case KVM_DEV_FLIC_ADAPTER_REGISTER:
1266 r = register_io_adapter(dev, attr);
1267 break;
1268 case KVM_DEV_FLIC_ADAPTER_MODIFY:
1269 r = modify_io_adapter(dev, attr);
1270 break;
1271 default:
1272 r = -EINVAL;
1273 }
1274
1275 return r;
1276}
1277
1278static int flic_create(struct kvm_device *dev, u32 type)
1279{
1280 if (!dev)
1281 return -EINVAL;
1282 if (dev->kvm->arch.flic)
1283 return -EINVAL;
1284 dev->kvm->arch.flic = dev;
1285 return 0;
1286}
1287
1288static void flic_destroy(struct kvm_device *dev)
1289{
1290 dev->kvm->arch.flic = NULL;
1291 kfree(dev);
1292}
1293
1294/* s390 floating irq controller (flic) */
1295struct kvm_device_ops kvm_flic_ops = {
1296 .name = "kvm-flic",
1297 .get_attr = flic_get_attr,
1298 .set_attr = flic_set_attr,
1299 .create = flic_create,
1300 .destroy = flic_destroy,
1301};
1302
1303static unsigned long get_ind_bit(__u64 addr, unsigned long bit_nr, bool swap)
1304{
1305 unsigned long bit;
1306
1307 bit = bit_nr + (addr % PAGE_SIZE) * 8;
1308
1309 return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
1310}
1311
1312static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
1313 u64 addr)
1314{
1315 struct s390_map_info *map;
1316
1317 if (!adapter)
1318 return NULL;
1319
1320 list_for_each_entry(map, &adapter->maps, list) {
1321 if (map->guest_addr == addr)
1322 return map;
1323 }
1324 return NULL;
1325}
1326
1327static int adapter_indicators_set(struct kvm *kvm,
1328 struct s390_io_adapter *adapter,
1329 struct kvm_s390_adapter_int *adapter_int)
1330{
1331 unsigned long bit;
1332 int summary_set, idx;
1333 struct s390_map_info *info;
1334 void *map;
1335
1336 info = get_map_info(adapter, adapter_int->ind_addr);
1337 if (!info)
1338 return -1;
1339 map = page_address(info->page);
1340 bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
1341 set_bit(bit, map);
1342 idx = srcu_read_lock(&kvm->srcu);
1343 mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
1344 set_page_dirty_lock(info->page);
1345 info = get_map_info(adapter, adapter_int->summary_addr);
1346 if (!info) {
1347 srcu_read_unlock(&kvm->srcu, idx);
1348 return -1;
1349 }
1350 map = page_address(info->page);
1351 bit = get_ind_bit(info->addr, adapter_int->summary_offset,
1352 adapter->swap);
1353 summary_set = test_and_set_bit(bit, map);
1354 mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
1355 set_page_dirty_lock(info->page);
1356 srcu_read_unlock(&kvm->srcu, idx);
1357 return summary_set ? 0 : 1;
1358}
1359
1360/*
1361 * < 0 - not injected due to error
1362 * = 0 - coalesced, summary indicator already active
1363 * > 0 - injected interrupt
1364 */
1365static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
1366 struct kvm *kvm, int irq_source_id, int level,
1367 bool line_status)
1368{
1369 int ret;
1370 struct s390_io_adapter *adapter;
1371
1372 /* We're only interested in the 0->1 transition. */
1373 if (!level)
1374 return 0;
1375 adapter = get_io_adapter(kvm, e->adapter.adapter_id);
1376 if (!adapter)
1377 return -1;
1378 down_read(&adapter->maps_lock);
1379 ret = adapter_indicators_set(kvm, adapter, &e->adapter);
1380 up_read(&adapter->maps_lock);
1381 if ((ret > 0) && !adapter->masked) {
1382 struct kvm_s390_interrupt s390int = {
1383 .type = KVM_S390_INT_IO(1, 0, 0, 0),
1384 .parm = 0,
1385 .parm64 = (adapter->isc << 27) | 0x80000000,
1386 };
1387 ret = kvm_s390_inject_vm(kvm, &s390int);
1388 if (ret == 0)
1389 ret = 1;
1390 }
1391 return ret;
1392}
1393
1394int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
1395 struct kvm_kernel_irq_routing_entry *e,
1396 const struct kvm_irq_routing_entry *ue)
1397{
1398 int ret;
1399
1400 switch (ue->type) {
1401 case KVM_IRQ_ROUTING_S390_ADAPTER:
1402 e->set = set_adapter_int;
1403 e->adapter.summary_addr = ue->u.adapter.summary_addr;
1404 e->adapter.ind_addr = ue->u.adapter.ind_addr;
1405 e->adapter.summary_offset = ue->u.adapter.summary_offset;
1406 e->adapter.ind_offset = ue->u.adapter.ind_offset;
1407 e->adapter.adapter_id = ue->u.adapter.adapter_id;
1408 ret = 0;
1409 break;
1410 default:
1411 ret = -EINVAL;
1412 }
1413
1414 return ret;
1415}
1416
1417int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
1418 int irq_source_id, int level, bool line_status)
1419{
1420 return -EINVAL;
1421}
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
new file mode 100644
index 000000000000..d98e4159643d
--- /dev/null
+++ b/arch/s390/kvm/irq.h
@@ -0,0 +1,22 @@
1/*
2 * s390 irqchip routines
3 *
4 * Copyright IBM Corp. 2014
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
11 */
12#ifndef __KVM_IRQ_H
13#define __KVM_IRQ_H
14
15#include <linux/kvm_host.h>
16
17static inline int irqchip_in_kernel(struct kvm *kvm)
18{
19 return 1;
20}
21
22#endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 10b5db3c9bc4..b3ecb8f5b6ce 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -153,11 +153,14 @@ int kvm_dev_ioctl_check_extension(long ext)
153#ifdef CONFIG_KVM_S390_UCONTROL 153#ifdef CONFIG_KVM_S390_UCONTROL
154 case KVM_CAP_S390_UCONTROL: 154 case KVM_CAP_S390_UCONTROL:
155#endif 155#endif
156 case KVM_CAP_ASYNC_PF:
156 case KVM_CAP_SYNC_REGS: 157 case KVM_CAP_SYNC_REGS:
157 case KVM_CAP_ONE_REG: 158 case KVM_CAP_ONE_REG:
158 case KVM_CAP_ENABLE_CAP: 159 case KVM_CAP_ENABLE_CAP:
159 case KVM_CAP_S390_CSS_SUPPORT: 160 case KVM_CAP_S390_CSS_SUPPORT:
160 case KVM_CAP_IOEVENTFD: 161 case KVM_CAP_IOEVENTFD:
162 case KVM_CAP_DEVICE_CTRL:
163 case KVM_CAP_ENABLE_CAP_VM:
161 r = 1; 164 r = 1;
162 break; 165 break;
163 case KVM_CAP_NR_VCPUS: 166 case KVM_CAP_NR_VCPUS:
@@ -186,6 +189,25 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
186 return 0; 189 return 0;
187} 190}
188 191
192static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
193{
194 int r;
195
196 if (cap->flags)
197 return -EINVAL;
198
199 switch (cap->cap) {
200 case KVM_CAP_S390_IRQCHIP:
201 kvm->arch.use_irqchip = 1;
202 r = 0;
203 break;
204 default:
205 r = -EINVAL;
206 break;
207 }
208 return r;
209}
210
189long kvm_arch_vm_ioctl(struct file *filp, 211long kvm_arch_vm_ioctl(struct file *filp,
190 unsigned int ioctl, unsigned long arg) 212 unsigned int ioctl, unsigned long arg)
191{ 213{
@@ -203,6 +225,26 @@ long kvm_arch_vm_ioctl(struct file *filp,
203 r = kvm_s390_inject_vm(kvm, &s390int); 225 r = kvm_s390_inject_vm(kvm, &s390int);
204 break; 226 break;
205 } 227 }
228 case KVM_ENABLE_CAP: {
229 struct kvm_enable_cap cap;
230 r = -EFAULT;
231 if (copy_from_user(&cap, argp, sizeof(cap)))
232 break;
233 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
234 break;
235 }
236 case KVM_CREATE_IRQCHIP: {
237 struct kvm_irq_routing_entry routing;
238
239 r = -EINVAL;
240 if (kvm->arch.use_irqchip) {
241 /* Set up dummy routing. */
242 memset(&routing, 0, sizeof(routing));
243 kvm_set_irq_routing(kvm, &routing, 0, 0);
244 r = 0;
245 }
246 break;
247 }
206 default: 248 default:
207 r = -ENOTTY; 249 r = -ENOTTY;
208 } 250 }
@@ -214,6 +256,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
214{ 256{
215 int rc; 257 int rc;
216 char debug_name[16]; 258 char debug_name[16];
259 static unsigned long sca_offset;
217 260
218 rc = -EINVAL; 261 rc = -EINVAL;
219#ifdef CONFIG_KVM_S390_UCONTROL 262#ifdef CONFIG_KVM_S390_UCONTROL
@@ -235,6 +278,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
235 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); 278 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
236 if (!kvm->arch.sca) 279 if (!kvm->arch.sca)
237 goto out_err; 280 goto out_err;
281 spin_lock(&kvm_lock);
282 sca_offset = (sca_offset + 16) & 0x7f0;
283 kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
284 spin_unlock(&kvm_lock);
238 285
239 sprintf(debug_name, "kvm-%u", current->pid); 286 sprintf(debug_name, "kvm-%u", current->pid);
240 287
@@ -255,9 +302,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
255 if (!kvm->arch.gmap) 302 if (!kvm->arch.gmap)
256 goto out_nogmap; 303 goto out_nogmap;
257 kvm->arch.gmap->private = kvm; 304 kvm->arch.gmap->private = kvm;
305 kvm->arch.gmap->pfault_enabled = 0;
258 } 306 }
259 307
260 kvm->arch.css_support = 0; 308 kvm->arch.css_support = 0;
309 kvm->arch.use_irqchip = 0;
261 310
262 return 0; 311 return 0;
263out_nogmap: 312out_nogmap:
@@ -272,6 +321,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
272{ 321{
273 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 322 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
274 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 323 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
324 kvm_clear_async_pf_completion_queue(vcpu);
275 if (!kvm_is_ucontrol(vcpu->kvm)) { 325 if (!kvm_is_ucontrol(vcpu->kvm)) {
276 clear_bit(63 - vcpu->vcpu_id, 326 clear_bit(63 - vcpu->vcpu_id,
277 (unsigned long *) &vcpu->kvm->arch.sca->mcn); 327 (unsigned long *) &vcpu->kvm->arch.sca->mcn);
@@ -320,11 +370,14 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
320 debug_unregister(kvm->arch.dbf); 370 debug_unregister(kvm->arch.dbf);
321 if (!kvm_is_ucontrol(kvm)) 371 if (!kvm_is_ucontrol(kvm))
322 gmap_free(kvm->arch.gmap); 372 gmap_free(kvm->arch.gmap);
373 kvm_s390_destroy_adapters(kvm);
323} 374}
324 375
325/* Section: vcpu related */ 376/* Section: vcpu related */
326int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 377int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
327{ 378{
379 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
380 kvm_clear_async_pf_completion_queue(vcpu);
328 if (kvm_is_ucontrol(vcpu->kvm)) { 381 if (kvm_is_ucontrol(vcpu->kvm)) {
329 vcpu->arch.gmap = gmap_alloc(current->mm); 382 vcpu->arch.gmap = gmap_alloc(current->mm);
330 if (!vcpu->arch.gmap) 383 if (!vcpu->arch.gmap)
@@ -385,7 +438,11 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
385 vcpu->arch.guest_fpregs.fpc = 0; 438 vcpu->arch.guest_fpregs.fpc = 0;
386 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); 439 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
387 vcpu->arch.sie_block->gbea = 1; 440 vcpu->arch.sie_block->gbea = 1;
441 vcpu->arch.sie_block->pp = 0;
442 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
443 kvm_clear_async_pf_completion_queue(vcpu);
388 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 444 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
445 kvm_s390_clear_local_irqs(vcpu);
389} 446}
390 447
391int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 448int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -466,11 +523,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
466 spin_lock_init(&vcpu->arch.local_int.lock); 523 spin_lock_init(&vcpu->arch.local_int.lock);
467 INIT_LIST_HEAD(&vcpu->arch.local_int.list); 524 INIT_LIST_HEAD(&vcpu->arch.local_int.list);
468 vcpu->arch.local_int.float_int = &kvm->arch.float_int; 525 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
469 spin_lock(&kvm->arch.float_int.lock);
470 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
471 vcpu->arch.local_int.wq = &vcpu->wq; 526 vcpu->arch.local_int.wq = &vcpu->wq;
472 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; 527 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
473 spin_unlock(&kvm->arch.float_int.lock);
474 528
475 rc = kvm_vcpu_init(vcpu, kvm, id); 529 rc = kvm_vcpu_init(vcpu, kvm, id);
476 if (rc) 530 if (rc)
@@ -490,9 +544,7 @@ out:
490 544
491int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 545int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
492{ 546{
493 /* kvm common code refers to this, but never calls it */ 547 return kvm_cpu_has_interrupt(vcpu);
494 BUG();
495 return 0;
496} 548}
497 549
498void s390_vcpu_block(struct kvm_vcpu *vcpu) 550void s390_vcpu_block(struct kvm_vcpu *vcpu)
@@ -568,6 +620,26 @@ static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
568 r = put_user(vcpu->arch.sie_block->ckc, 620 r = put_user(vcpu->arch.sie_block->ckc,
569 (u64 __user *)reg->addr); 621 (u64 __user *)reg->addr);
570 break; 622 break;
623 case KVM_REG_S390_PFTOKEN:
624 r = put_user(vcpu->arch.pfault_token,
625 (u64 __user *)reg->addr);
626 break;
627 case KVM_REG_S390_PFCOMPARE:
628 r = put_user(vcpu->arch.pfault_compare,
629 (u64 __user *)reg->addr);
630 break;
631 case KVM_REG_S390_PFSELECT:
632 r = put_user(vcpu->arch.pfault_select,
633 (u64 __user *)reg->addr);
634 break;
635 case KVM_REG_S390_PP:
636 r = put_user(vcpu->arch.sie_block->pp,
637 (u64 __user *)reg->addr);
638 break;
639 case KVM_REG_S390_GBEA:
640 r = put_user(vcpu->arch.sie_block->gbea,
641 (u64 __user *)reg->addr);
642 break;
571 default: 643 default:
572 break; 644 break;
573 } 645 }
@@ -597,6 +669,26 @@ static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
597 r = get_user(vcpu->arch.sie_block->ckc, 669 r = get_user(vcpu->arch.sie_block->ckc,
598 (u64 __user *)reg->addr); 670 (u64 __user *)reg->addr);
599 break; 671 break;
672 case KVM_REG_S390_PFTOKEN:
673 r = get_user(vcpu->arch.pfault_token,
674 (u64 __user *)reg->addr);
675 break;
676 case KVM_REG_S390_PFCOMPARE:
677 r = get_user(vcpu->arch.pfault_compare,
678 (u64 __user *)reg->addr);
679 break;
680 case KVM_REG_S390_PFSELECT:
681 r = get_user(vcpu->arch.pfault_select,
682 (u64 __user *)reg->addr);
683 break;
684 case KVM_REG_S390_PP:
685 r = get_user(vcpu->arch.sie_block->pp,
686 (u64 __user *)reg->addr);
687 break;
688 case KVM_REG_S390_GBEA:
689 r = get_user(vcpu->arch.sie_block->gbea,
690 (u64 __user *)reg->addr);
691 break;
600 default: 692 default:
601 break; 693 break;
602 } 694 }
@@ -715,10 +807,100 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
715 return 0; 807 return 0;
716} 808}
717 809
810static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
811{
812 long rc;
813 hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
814 struct mm_struct *mm = current->mm;
815 down_read(&mm->mmap_sem);
816 rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
817 up_read(&mm->mmap_sem);
818 return rc;
819}
820
821static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
822 unsigned long token)
823{
824 struct kvm_s390_interrupt inti;
825 inti.parm64 = token;
826
827 if (start_token) {
828 inti.type = KVM_S390_INT_PFAULT_INIT;
829 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
830 } else {
831 inti.type = KVM_S390_INT_PFAULT_DONE;
832 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
833 }
834}
835
836void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
837 struct kvm_async_pf *work)
838{
839 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
840 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
841}
842
843void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
844 struct kvm_async_pf *work)
845{
846 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
847 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
848}
849
850void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
851 struct kvm_async_pf *work)
852{
853 /* s390 will always inject the page directly */
854}
855
856bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
857{
858 /*
859 * s390 will always inject the page directly,
860 * but we still want check_async_completion to cleanup
861 */
862 return true;
863}
864
865static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
866{
867 hva_t hva;
868 struct kvm_arch_async_pf arch;
869 int rc;
870
871 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
872 return 0;
873 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
874 vcpu->arch.pfault_compare)
875 return 0;
876 if (psw_extint_disabled(vcpu))
877 return 0;
878 if (kvm_cpu_has_interrupt(vcpu))
879 return 0;
880 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
881 return 0;
882 if (!vcpu->arch.gmap->pfault_enabled)
883 return 0;
884
885 hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
886 if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
887 return 0;
888
889 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
890 return rc;
891}
892
718static int vcpu_pre_run(struct kvm_vcpu *vcpu) 893static int vcpu_pre_run(struct kvm_vcpu *vcpu)
719{ 894{
720 int rc, cpuflags; 895 int rc, cpuflags;
721 896
897 /*
898 * On s390 notifications for arriving pages will be delivered directly
899 * to the guest but the house keeping for completed pfaults is
900 * handled outside the worker.
901 */
902 kvm_check_async_pf_completion(vcpu);
903
722 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); 904 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
723 905
724 if (need_resched()) 906 if (need_resched())
@@ -744,7 +926,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
744 926
745static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 927static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
746{ 928{
747 int rc; 929 int rc = -1;
748 930
749 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 931 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
750 vcpu->arch.sie_block->icptcode); 932 vcpu->arch.sie_block->icptcode);
@@ -758,7 +940,16 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
758 current->thread.gmap_addr; 940 current->thread.gmap_addr;
759 vcpu->run->s390_ucontrol.pgm_code = 0x10; 941 vcpu->run->s390_ucontrol.pgm_code = 0x10;
760 rc = -EREMOTE; 942 rc = -EREMOTE;
761 } else { 943
944 } else if (current->thread.gmap_pfault) {
945 trace_kvm_s390_major_guest_pfault(vcpu);
946 current->thread.gmap_pfault = 0;
947 if (kvm_arch_setup_async_pf(vcpu) ||
948 (kvm_arch_fault_in_sync(vcpu) >= 0))
949 rc = 0;
950 }
951
952 if (rc == -1) {
762 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 953 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
763 trace_kvm_s390_sie_fault(vcpu); 954 trace_kvm_s390_sie_fault(vcpu);
764 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 955 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
@@ -768,7 +959,8 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
768 959
769 if (rc == 0) { 960 if (rc == 0) {
770 if (kvm_is_ucontrol(vcpu->kvm)) 961 if (kvm_is_ucontrol(vcpu->kvm))
771 rc = -EOPNOTSUPP; 962 /* Don't exit for host interrupts. */
963 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
772 else 964 else
773 rc = kvm_handle_sie_intercept(vcpu); 965 rc = kvm_handle_sie_intercept(vcpu);
774 } 966 }
@@ -831,8 +1023,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
831 1023
832 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 1024 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
833 1025
834 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
835
836 switch (kvm_run->exit_reason) { 1026 switch (kvm_run->exit_reason) {
837 case KVM_EXIT_S390_SIEIC: 1027 case KVM_EXIT_S390_SIEIC:
838 case KVM_EXIT_UNKNOWN: 1028 case KVM_EXIT_UNKNOWN:
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 564514f410f4..3c1e2274d9ea 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -129,6 +129,7 @@ enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
129void kvm_s390_tasklet(unsigned long parm); 129void kvm_s390_tasklet(unsigned long parm);
130void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); 130void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
131void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu); 131void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
132void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
132int __must_check kvm_s390_inject_vm(struct kvm *kvm, 133int __must_check kvm_s390_inject_vm(struct kvm *kvm,
133 struct kvm_s390_interrupt *s390int); 134 struct kvm_s390_interrupt *s390int);
134int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, 135int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
@@ -136,6 +137,7 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
136int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); 137int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
137struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, 138struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
138 u64 cr6, u64 schid); 139 u64 cr6, u64 schid);
140int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
139 141
140/* implemented in priv.c */ 142/* implemented in priv.c */
141int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); 143int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
@@ -161,4 +163,9 @@ bool kvm_enabled_cmma(void);
161/* implemented in diag.c */ 163/* implemented in diag.c */
162int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); 164int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
163 165
166/* implemented in interrupt.c */
167int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
168int psw_extint_disabled(struct kvm_vcpu *vcpu);
169void kvm_s390_destroy_adapters(struct kvm *kvm);
170
164#endif 171#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index aacb6b129914..476e9e218f43 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -396,15 +396,10 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
396 396
397static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) 397static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
398{ 398{
399 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
400 int cpus = 0; 399 int cpus = 0;
401 int n; 400 int n;
402 401
403 spin_lock(&fi->lock); 402 cpus = atomic_read(&vcpu->kvm->online_vcpus);
404 for (n = 0; n < KVM_MAX_VCPUS; n++)
405 if (fi->local_int[n])
406 cpus++;
407 spin_unlock(&fi->lock);
408 403
409 /* deal with other level 3 hypervisors */ 404 /* deal with other level 3 hypervisors */
410 if (stsi(mem, 3, 2, 2)) 405 if (stsi(mem, 3, 2, 2))
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 87c2b3a3bd3e..26caeb530a78 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -23,29 +23,30 @@
23static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, 23static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
24 u64 *reg) 24 u64 *reg)
25{ 25{
26 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; 26 struct kvm_s390_local_interrupt *li;
27 struct kvm_vcpu *dst_vcpu = NULL;
28 int cpuflags;
27 int rc; 29 int rc;
28 30
29 if (cpu_addr >= KVM_MAX_VCPUS) 31 if (cpu_addr >= KVM_MAX_VCPUS)
30 return SIGP_CC_NOT_OPERATIONAL; 32 return SIGP_CC_NOT_OPERATIONAL;
31 33
32 spin_lock(&fi->lock); 34 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
33 if (fi->local_int[cpu_addr] == NULL) 35 if (!dst_vcpu)
34 rc = SIGP_CC_NOT_OPERATIONAL; 36 return SIGP_CC_NOT_OPERATIONAL;
35 else if (!(atomic_read(fi->local_int[cpu_addr]->cpuflags) 37 li = &dst_vcpu->arch.local_int;
36 & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED))) 38
39 cpuflags = atomic_read(li->cpuflags);
40 if (!(cpuflags & (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED)))
37 rc = SIGP_CC_ORDER_CODE_ACCEPTED; 41 rc = SIGP_CC_ORDER_CODE_ACCEPTED;
38 else { 42 else {
39 *reg &= 0xffffffff00000000UL; 43 *reg &= 0xffffffff00000000UL;
40 if (atomic_read(fi->local_int[cpu_addr]->cpuflags) 44 if (cpuflags & CPUSTAT_ECALL_PEND)
41 & CPUSTAT_ECALL_PEND)
42 *reg |= SIGP_STATUS_EXT_CALL_PENDING; 45 *reg |= SIGP_STATUS_EXT_CALL_PENDING;
43 if (atomic_read(fi->local_int[cpu_addr]->cpuflags) 46 if (cpuflags & CPUSTAT_STOPPED)
44 & CPUSTAT_STOPPED)
45 *reg |= SIGP_STATUS_STOPPED; 47 *reg |= SIGP_STATUS_STOPPED;
46 rc = SIGP_CC_STATUS_STORED; 48 rc = SIGP_CC_STATUS_STORED;
47 } 49 }
48 spin_unlock(&fi->lock);
49 50
50 VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); 51 VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
51 return rc; 52 return rc;
@@ -53,12 +54,13 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
53 54
54static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) 55static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
55{ 56{
56 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
57 struct kvm_s390_local_interrupt *li; 57 struct kvm_s390_local_interrupt *li;
58 struct kvm_s390_interrupt_info *inti; 58 struct kvm_s390_interrupt_info *inti;
59 int rc; 59 struct kvm_vcpu *dst_vcpu = NULL;
60 60
61 if (cpu_addr >= KVM_MAX_VCPUS) 61 if (cpu_addr < KVM_MAX_VCPUS)
62 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
63 if (!dst_vcpu)
62 return SIGP_CC_NOT_OPERATIONAL; 64 return SIGP_CC_NOT_OPERATIONAL;
63 65
64 inti = kzalloc(sizeof(*inti), GFP_KERNEL); 66 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
@@ -68,13 +70,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
68 inti->type = KVM_S390_INT_EMERGENCY; 70 inti->type = KVM_S390_INT_EMERGENCY;
69 inti->emerg.code = vcpu->vcpu_id; 71 inti->emerg.code = vcpu->vcpu_id;
70 72
71 spin_lock(&fi->lock); 73 li = &dst_vcpu->arch.local_int;
72 li = fi->local_int[cpu_addr];
73 if (li == NULL) {
74 rc = SIGP_CC_NOT_OPERATIONAL;
75 kfree(inti);
76 goto unlock;
77 }
78 spin_lock_bh(&li->lock); 74 spin_lock_bh(&li->lock);
79 list_add_tail(&inti->list, &li->list); 75 list_add_tail(&inti->list, &li->list);
80 atomic_set(&li->active, 1); 76 atomic_set(&li->active, 1);
@@ -82,11 +78,9 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
82 if (waitqueue_active(li->wq)) 78 if (waitqueue_active(li->wq))
83 wake_up_interruptible(li->wq); 79 wake_up_interruptible(li->wq);
84 spin_unlock_bh(&li->lock); 80 spin_unlock_bh(&li->lock);
85 rc = SIGP_CC_ORDER_CODE_ACCEPTED;
86 VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); 81 VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
87unlock: 82
88 spin_unlock(&fi->lock); 83 return SIGP_CC_ORDER_CODE_ACCEPTED;
89 return rc;
90} 84}
91 85
92static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, 86static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
@@ -122,12 +116,13 @@ static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
122 116
123static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) 117static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
124{ 118{
125 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
126 struct kvm_s390_local_interrupt *li; 119 struct kvm_s390_local_interrupt *li;
127 struct kvm_s390_interrupt_info *inti; 120 struct kvm_s390_interrupt_info *inti;
128 int rc; 121 struct kvm_vcpu *dst_vcpu = NULL;
129 122
130 if (cpu_addr >= KVM_MAX_VCPUS) 123 if (cpu_addr < KVM_MAX_VCPUS)
124 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
125 if (!dst_vcpu)
131 return SIGP_CC_NOT_OPERATIONAL; 126 return SIGP_CC_NOT_OPERATIONAL;
132 127
133 inti = kzalloc(sizeof(*inti), GFP_KERNEL); 128 inti = kzalloc(sizeof(*inti), GFP_KERNEL);
@@ -137,13 +132,7 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
137 inti->type = KVM_S390_INT_EXTERNAL_CALL; 132 inti->type = KVM_S390_INT_EXTERNAL_CALL;
138 inti->extcall.code = vcpu->vcpu_id; 133 inti->extcall.code = vcpu->vcpu_id;
139 134
140 spin_lock(&fi->lock); 135 li = &dst_vcpu->arch.local_int;
141 li = fi->local_int[cpu_addr];
142 if (li == NULL) {
143 rc = SIGP_CC_NOT_OPERATIONAL;
144 kfree(inti);
145 goto unlock;
146 }
147 spin_lock_bh(&li->lock); 136 spin_lock_bh(&li->lock);
148 list_add_tail(&inti->list, &li->list); 137 list_add_tail(&inti->list, &li->list);
149 atomic_set(&li->active, 1); 138 atomic_set(&li->active, 1);
@@ -151,11 +140,9 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
151 if (waitqueue_active(li->wq)) 140 if (waitqueue_active(li->wq))
152 wake_up_interruptible(li->wq); 141 wake_up_interruptible(li->wq);
153 spin_unlock_bh(&li->lock); 142 spin_unlock_bh(&li->lock);
154 rc = SIGP_CC_ORDER_CODE_ACCEPTED;
155 VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr); 143 VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
156unlock: 144
157 spin_unlock(&fi->lock); 145 return SIGP_CC_ORDER_CODE_ACCEPTED;
158 return rc;
159} 146}
160 147
161static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) 148static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
@@ -189,31 +176,26 @@ out:
189 176
190static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) 177static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
191{ 178{
192 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
193 struct kvm_s390_local_interrupt *li; 179 struct kvm_s390_local_interrupt *li;
180 struct kvm_vcpu *dst_vcpu = NULL;
194 int rc; 181 int rc;
195 182
196 if (cpu_addr >= KVM_MAX_VCPUS) 183 if (cpu_addr >= KVM_MAX_VCPUS)
197 return SIGP_CC_NOT_OPERATIONAL; 184 return SIGP_CC_NOT_OPERATIONAL;
198 185
199 spin_lock(&fi->lock); 186 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
200 li = fi->local_int[cpu_addr]; 187 if (!dst_vcpu)
201 if (li == NULL) { 188 return SIGP_CC_NOT_OPERATIONAL;
202 rc = SIGP_CC_NOT_OPERATIONAL; 189 li = &dst_vcpu->arch.local_int;
203 goto unlock;
204 }
205 190
206 rc = __inject_sigp_stop(li, action); 191 rc = __inject_sigp_stop(li, action);
207 192
208unlock:
209 spin_unlock(&fi->lock);
210 VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); 193 VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
211 194
212 if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) { 195 if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
213 /* If the CPU has already been stopped, we still have 196 /* If the CPU has already been stopped, we still have
214 * to save the status when doing stop-and-store. This 197 * to save the status when doing stop-and-store. This
215 * has to be done after unlocking all spinlocks. */ 198 * has to be done after unlocking all spinlocks. */
216 struct kvm_vcpu *dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
217 rc = kvm_s390_store_status_unloaded(dst_vcpu, 199 rc = kvm_s390_store_status_unloaded(dst_vcpu,
218 KVM_S390_STORE_STATUS_NOADDR); 200 KVM_S390_STORE_STATUS_NOADDR);
219 } 201 }
@@ -224,6 +206,8 @@ unlock:
224static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) 206static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
225{ 207{
226 int rc; 208 int rc;
209 unsigned int i;
210 struct kvm_vcpu *v;
227 211
228 switch (parameter & 0xff) { 212 switch (parameter & 0xff) {
229 case 0: 213 case 0:
@@ -231,6 +215,11 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
231 break; 215 break;
232 case 1: 216 case 1:
233 case 2: 217 case 2:
218 kvm_for_each_vcpu(i, v, vcpu->kvm) {
219 v->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
220 kvm_clear_async_pf_completion_queue(v);
221 }
222
234 rc = SIGP_CC_ORDER_CODE_ACCEPTED; 223 rc = SIGP_CC_ORDER_CODE_ACCEPTED;
235 break; 224 break;
236 default: 225 default:
@@ -242,12 +231,18 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
242static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, 231static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
243 u64 *reg) 232 u64 *reg)
244{ 233{
245 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; 234 struct kvm_s390_local_interrupt *li;
246 struct kvm_s390_local_interrupt *li = NULL; 235 struct kvm_vcpu *dst_vcpu = NULL;
247 struct kvm_s390_interrupt_info *inti; 236 struct kvm_s390_interrupt_info *inti;
248 int rc; 237 int rc;
249 u8 tmp; 238 u8 tmp;
250 239
240 if (cpu_addr < KVM_MAX_VCPUS)
241 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
242 if (!dst_vcpu)
243 return SIGP_CC_NOT_OPERATIONAL;
244 li = &dst_vcpu->arch.local_int;
245
251 /* make sure that the new value is valid memory */ 246 /* make sure that the new value is valid memory */
252 address = address & 0x7fffe000u; 247 address = address & 0x7fffe000u;
253 if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || 248 if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
@@ -261,18 +256,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
261 if (!inti) 256 if (!inti)
262 return SIGP_CC_BUSY; 257 return SIGP_CC_BUSY;
263 258
264 spin_lock(&fi->lock);
265 if (cpu_addr < KVM_MAX_VCPUS)
266 li = fi->local_int[cpu_addr];
267
268 if (li == NULL) {
269 *reg &= 0xffffffff00000000UL;
270 *reg |= SIGP_STATUS_INCORRECT_STATE;
271 rc = SIGP_CC_STATUS_STORED;
272 kfree(inti);
273 goto out_fi;
274 }
275
276 spin_lock_bh(&li->lock); 259 spin_lock_bh(&li->lock);
277 /* cpu must be in stopped state */ 260 /* cpu must be in stopped state */
278 if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { 261 if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
@@ -295,8 +278,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
295 VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); 278 VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
296out_li: 279out_li:
297 spin_unlock_bh(&li->lock); 280 spin_unlock_bh(&li->lock);
298out_fi:
299 spin_unlock(&fi->lock);
300 return rc; 281 return rc;
301} 282}
302 283
@@ -334,28 +315,26 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
334static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, 315static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
335 u64 *reg) 316 u64 *reg)
336{ 317{
318 struct kvm_s390_local_interrupt *li;
319 struct kvm_vcpu *dst_vcpu = NULL;
337 int rc; 320 int rc;
338 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
339 321
340 if (cpu_addr >= KVM_MAX_VCPUS) 322 if (cpu_addr >= KVM_MAX_VCPUS)
341 return SIGP_CC_NOT_OPERATIONAL; 323 return SIGP_CC_NOT_OPERATIONAL;
342 324
343 spin_lock(&fi->lock); 325 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
344 if (fi->local_int[cpu_addr] == NULL) 326 if (!dst_vcpu)
345 rc = SIGP_CC_NOT_OPERATIONAL; 327 return SIGP_CC_NOT_OPERATIONAL;
346 else { 328 li = &dst_vcpu->arch.local_int;
347 if (atomic_read(fi->local_int[cpu_addr]->cpuflags) 329 if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
348 & CPUSTAT_RUNNING) { 330 /* running */
349 /* running */ 331 rc = SIGP_CC_ORDER_CODE_ACCEPTED;
350 rc = SIGP_CC_ORDER_CODE_ACCEPTED; 332 } else {
351 } else { 333 /* not running */
352 /* not running */ 334 *reg &= 0xffffffff00000000UL;
353 *reg &= 0xffffffff00000000UL; 335 *reg |= SIGP_STATUS_NOT_RUNNING;
354 *reg |= SIGP_STATUS_NOT_RUNNING; 336 rc = SIGP_CC_STATUS_STORED;
355 rc = SIGP_CC_STATUS_STORED;
356 }
357 } 337 }
358 spin_unlock(&fi->lock);
359 338
360 VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr, 339 VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr,
361 rc); 340 rc);
@@ -366,26 +345,22 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
366/* Test whether the destination CPU is available and not busy */ 345/* Test whether the destination CPU is available and not busy */
367static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) 346static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
368{ 347{
369 struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
370 struct kvm_s390_local_interrupt *li; 348 struct kvm_s390_local_interrupt *li;
371 int rc = SIGP_CC_ORDER_CODE_ACCEPTED; 349 int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
350 struct kvm_vcpu *dst_vcpu = NULL;
372 351
373 if (cpu_addr >= KVM_MAX_VCPUS) 352 if (cpu_addr >= KVM_MAX_VCPUS)
374 return SIGP_CC_NOT_OPERATIONAL; 353 return SIGP_CC_NOT_OPERATIONAL;
375 354
376 spin_lock(&fi->lock); 355 dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
377 li = fi->local_int[cpu_addr]; 356 if (!dst_vcpu)
378 if (li == NULL) { 357 return SIGP_CC_NOT_OPERATIONAL;
379 rc = SIGP_CC_NOT_OPERATIONAL; 358 li = &dst_vcpu->arch.local_int;
380 goto out;
381 }
382
383 spin_lock_bh(&li->lock); 359 spin_lock_bh(&li->lock);
384 if (li->action_bits & ACTION_STOP_ON_STOP) 360 if (li->action_bits & ACTION_STOP_ON_STOP)
385 rc = SIGP_CC_BUSY; 361 rc = SIGP_CC_BUSY;
386 spin_unlock_bh(&li->lock); 362 spin_unlock_bh(&li->lock);
387out: 363
388 spin_unlock(&fi->lock);
389 return rc; 364 return rc;
390} 365}
391 366
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index 3db76b2daed7..e8e7213d4cc5 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -30,6 +30,52 @@
30 TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \ 30 TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \
31 __entry->pswmask, __entry->pswaddr, p_args) 31 __entry->pswmask, __entry->pswaddr, p_args)
32 32
33TRACE_EVENT(kvm_s390_major_guest_pfault,
34 TP_PROTO(VCPU_PROTO_COMMON),
35 TP_ARGS(VCPU_ARGS_COMMON),
36
37 TP_STRUCT__entry(
38 VCPU_FIELD_COMMON
39 ),
40
41 TP_fast_assign(
42 VCPU_ASSIGN_COMMON
43 ),
44 VCPU_TP_PRINTK("%s", "major fault, maybe applicable for pfault")
45 );
46
47TRACE_EVENT(kvm_s390_pfault_init,
48 TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
49 TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
50
51 TP_STRUCT__entry(
52 VCPU_FIELD_COMMON
53 __field(long, pfault_token)
54 ),
55
56 TP_fast_assign(
57 VCPU_ASSIGN_COMMON
58 __entry->pfault_token = pfault_token;
59 ),
60 VCPU_TP_PRINTK("init pfault token %ld", __entry->pfault_token)
61 );
62
63TRACE_EVENT(kvm_s390_pfault_done,
64 TP_PROTO(VCPU_PROTO_COMMON, long pfault_token),
65 TP_ARGS(VCPU_ARGS_COMMON, pfault_token),
66
67 TP_STRUCT__entry(
68 VCPU_FIELD_COMMON
69 __field(long, pfault_token)
70 ),
71
72 TP_fast_assign(
73 VCPU_ASSIGN_COMMON
74 __entry->pfault_token = pfault_token;
75 ),
76 VCPU_TP_PRINTK("done pfault token %ld", __entry->pfault_token)
77 );
78
33/* 79/*
34 * Tracepoints for SIE entry and exit. 80 * Tracepoints for SIE entry and exit.
35 */ 81 */
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index d95265b2719f..88cef505453b 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -50,6 +50,7 @@
50#define VM_FAULT_BADMAP 0x020000 50#define VM_FAULT_BADMAP 0x020000
51#define VM_FAULT_BADACCESS 0x040000 51#define VM_FAULT_BADACCESS 0x040000
52#define VM_FAULT_SIGNAL 0x080000 52#define VM_FAULT_SIGNAL 0x080000
53#define VM_FAULT_PFAULT 0x100000
53 54
54static unsigned long store_indication __read_mostly; 55static unsigned long store_indication __read_mostly;
55 56
@@ -227,6 +228,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
227 return; 228 return;
228 } 229 }
229 case VM_FAULT_BADCONTEXT: 230 case VM_FAULT_BADCONTEXT:
231 case VM_FAULT_PFAULT:
230 do_no_context(regs); 232 do_no_context(regs);
231 break; 233 break;
232 case VM_FAULT_SIGNAL: 234 case VM_FAULT_SIGNAL:
@@ -264,6 +266,9 @@ static noinline void do_fault_error(struct pt_regs *regs, int fault)
264 */ 266 */
265static inline int do_exception(struct pt_regs *regs, int access) 267static inline int do_exception(struct pt_regs *regs, int access)
266{ 268{
269#ifdef CONFIG_PGSTE
270 struct gmap *gmap;
271#endif
267 struct task_struct *tsk; 272 struct task_struct *tsk;
268 struct mm_struct *mm; 273 struct mm_struct *mm;
269 struct vm_area_struct *vma; 274 struct vm_area_struct *vma;
@@ -304,9 +309,10 @@ static inline int do_exception(struct pt_regs *regs, int access)
304 down_read(&mm->mmap_sem); 309 down_read(&mm->mmap_sem);
305 310
306#ifdef CONFIG_PGSTE 311#ifdef CONFIG_PGSTE
307 if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { 312 gmap = (struct gmap *)
308 address = __gmap_fault(address, 313 ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
309 (struct gmap *) S390_lowcore.gmap); 314 if (gmap) {
315 address = __gmap_fault(address, gmap);
310 if (address == -EFAULT) { 316 if (address == -EFAULT) {
311 fault = VM_FAULT_BADMAP; 317 fault = VM_FAULT_BADMAP;
312 goto out_up; 318 goto out_up;
@@ -315,6 +321,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
315 fault = VM_FAULT_OOM; 321 fault = VM_FAULT_OOM;
316 goto out_up; 322 goto out_up;
317 } 323 }
324 if (gmap->pfault_enabled)
325 flags |= FAULT_FLAG_RETRY_NOWAIT;
318 } 326 }
319#endif 327#endif
320 328
@@ -371,9 +379,19 @@ retry:
371 regs, address); 379 regs, address);
372 } 380 }
373 if (fault & VM_FAULT_RETRY) { 381 if (fault & VM_FAULT_RETRY) {
382#ifdef CONFIG_PGSTE
383 if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) {
384 /* FAULT_FLAG_RETRY_NOWAIT has been set,
385 * mmap_sem has not been released */
386 current->thread.gmap_pfault = 1;
387 fault = VM_FAULT_PFAULT;
388 goto out_up;
389 }
390#endif
374 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 391 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
375 * of starvation. */ 392 * of starvation. */
376 flags &= ~FAULT_FLAG_ALLOW_RETRY; 393 flags &= ~(FAULT_FLAG_ALLOW_RETRY |
394 FAULT_FLAG_RETRY_NOWAIT);
377 flags |= FAULT_FLAG_TRIED; 395 flags |= FAULT_FLAG_TRIED;
378 down_read(&mm->mmap_sem); 396 down_read(&mm->mmap_sem);
379 goto retry; 397 goto retry;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fdf83afbb7d9..fcaf9c961265 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -337,6 +337,11 @@ struct kvm_pmu {
337 u64 reprogram_pmi; 337 u64 reprogram_pmi;
338}; 338};
339 339
340enum {
341 KVM_DEBUGREG_BP_ENABLED = 1,
342 KVM_DEBUGREG_WONT_EXIT = 2,
343};
344
340struct kvm_vcpu_arch { 345struct kvm_vcpu_arch {
341 /* 346 /*
342 * rip and regs accesses must go through 347 * rip and regs accesses must go through
@@ -444,7 +449,6 @@ struct kvm_vcpu_arch {
444 } st; 449 } st;
445 450
446 u64 last_guest_tsc; 451 u64 last_guest_tsc;
447 u64 last_kernel_ns;
448 u64 last_host_tsc; 452 u64 last_host_tsc;
449 u64 tsc_offset_adjustment; 453 u64 tsc_offset_adjustment;
450 u64 this_tsc_nsec; 454 u64 this_tsc_nsec;
@@ -464,7 +468,7 @@ struct kvm_vcpu_arch {
464 struct mtrr_state_type mtrr_state; 468 struct mtrr_state_type mtrr_state;
465 u32 pat; 469 u32 pat;
466 470
467 int switch_db_regs; 471 unsigned switch_db_regs;
468 unsigned long db[KVM_NR_DB_REGS]; 472 unsigned long db[KVM_NR_DB_REGS];
469 unsigned long dr6; 473 unsigned long dr6;
470 unsigned long dr7; 474 unsigned long dr7;
@@ -599,6 +603,8 @@ struct kvm_arch {
599 bool use_master_clock; 603 bool use_master_clock;
600 u64 master_kernel_ns; 604 u64 master_kernel_ns;
601 cycle_t master_cycle_now; 605 cycle_t master_cycle_now;
606 struct delayed_work kvmclock_update_work;
607 struct delayed_work kvmclock_sync_work;
602 608
603 struct kvm_xen_hvm_config xen_hvm_config; 609 struct kvm_xen_hvm_config xen_hvm_config;
604 610
@@ -702,6 +708,7 @@ struct kvm_x86_ops {
702 void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); 708 void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
703 u64 (*get_dr6)(struct kvm_vcpu *vcpu); 709 u64 (*get_dr6)(struct kvm_vcpu *vcpu);
704 void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); 710 void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
711 void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
705 void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); 712 void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
706 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); 713 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
707 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); 714 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
@@ -728,8 +735,8 @@ struct kvm_x86_ops {
728 int (*nmi_allowed)(struct kvm_vcpu *vcpu); 735 int (*nmi_allowed)(struct kvm_vcpu *vcpu);
729 bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); 736 bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
730 void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); 737 void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
731 int (*enable_nmi_window)(struct kvm_vcpu *vcpu); 738 void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
732 int (*enable_irq_window)(struct kvm_vcpu *vcpu); 739 void (*enable_irq_window)(struct kvm_vcpu *vcpu);
733 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); 740 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
734 int (*vm_has_apicv)(struct kvm *kvm); 741 int (*vm_has_apicv)(struct kvm *kvm);
735 void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); 742 void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
@@ -765,6 +772,9 @@ struct kvm_x86_ops {
765 struct x86_instruction_info *info, 772 struct x86_instruction_info *info,
766 enum x86_intercept_stage stage); 773 enum x86_intercept_stage stage);
767 void (*handle_external_intr)(struct kvm_vcpu *vcpu); 774 void (*handle_external_intr)(struct kvm_vcpu *vcpu);
775 bool (*mpx_supported)(void);
776
777 int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
768}; 778};
769 779
770struct kvm_arch_async_pf { 780struct kvm_arch_async_pf {
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 2067264fb7f5..7004d21e6219 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -85,6 +85,7 @@
85#define VM_EXIT_SAVE_IA32_EFER 0x00100000 85#define VM_EXIT_SAVE_IA32_EFER 0x00100000
86#define VM_EXIT_LOAD_IA32_EFER 0x00200000 86#define VM_EXIT_LOAD_IA32_EFER 0x00200000
87#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 87#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
88#define VM_EXIT_CLEAR_BNDCFGS 0x00800000
88 89
89#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff 90#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
90 91
@@ -95,6 +96,7 @@
95#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 96#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000
96#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 97#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
97#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 98#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
99#define VM_ENTRY_LOAD_BNDCFGS 0x00010000
98 100
99#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff 101#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
100 102
@@ -174,6 +176,8 @@ enum vmcs_field {
174 GUEST_PDPTR2_HIGH = 0x0000280f, 176 GUEST_PDPTR2_HIGH = 0x0000280f,
175 GUEST_PDPTR3 = 0x00002810, 177 GUEST_PDPTR3 = 0x00002810,
176 GUEST_PDPTR3_HIGH = 0x00002811, 178 GUEST_PDPTR3_HIGH = 0x00002811,
179 GUEST_BNDCFGS = 0x00002812,
180 GUEST_BNDCFGS_HIGH = 0x00002813,
177 HOST_IA32_PAT = 0x00002c00, 181 HOST_IA32_PAT = 0x00002c00,
178 HOST_IA32_PAT_HIGH = 0x00002c01, 182 HOST_IA32_PAT_HIGH = 0x00002c01,
179 HOST_IA32_EFER = 0x00002c02, 183 HOST_IA32_EFER = 0x00002c02,
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 6c1d7411eb00..d949ef28c48b 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -16,6 +16,8 @@
16#define XSTATE_Hi16_ZMM 0x80 16#define XSTATE_Hi16_ZMM 0x80
17 17
18#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) 18#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
19/* Bit 63 of XCR0 is reserved for future expansion */
20#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
19 21
20#define FXSAVE_SIZE 512 22#define FXSAVE_SIZE 512
21 23
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 4924f4be2b99..c827ace3121b 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -295,6 +295,7 @@
295#define MSR_SMI_COUNT 0x00000034 295#define MSR_SMI_COUNT 0x00000034
296#define MSR_IA32_FEATURE_CONTROL 0x0000003a 296#define MSR_IA32_FEATURE_CONTROL 0x0000003a
297#define MSR_IA32_TSC_ADJUST 0x0000003b 297#define MSR_IA32_TSC_ADJUST 0x0000003b
298#define MSR_IA32_BNDCFGS 0x00000d90
298 299
299#define FEATURE_CONTROL_LOCKED (1<<0) 300#define FEATURE_CONTROL_LOCKED (1<<0)
300#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) 301#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 713f1b3bad52..0331cb389d68 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -417,7 +417,6 @@ void kvm_disable_steal_time(void)
417#ifdef CONFIG_SMP 417#ifdef CONFIG_SMP
418static void __init kvm_smp_prepare_boot_cpu(void) 418static void __init kvm_smp_prepare_boot_cpu(void)
419{ 419{
420 WARN_ON(kvm_register_clock("primary cpu clock"));
421 kvm_guest_cpu_init(); 420 kvm_guest_cpu_init();
422 native_smp_prepare_boot_cpu(); 421 native_smp_prepare_boot_cpu();
423 kvm_spinlock_init(); 422 kvm_spinlock_init();
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e6041094ff26..d9156ceecdff 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -242,7 +242,7 @@ void __init kvmclock_init(void)
242 hv_clock = __va(mem); 242 hv_clock = __va(mem);
243 memset(hv_clock, 0, size); 243 memset(hv_clock, 0, size);
244 244
245 if (kvm_register_clock("boot clock")) { 245 if (kvm_register_clock("primary cpu clock")) {
246 hv_clock = NULL; 246 hv_clock = NULL;
247 memblock_free(mem, size); 247 memblock_free(mem, size);
248 return; 248 return;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index e5503d8aec1d..bea60671ef8a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -28,7 +28,7 @@ static u32 xstate_required_size(u64 xstate_bv)
28 int feature_bit = 0; 28 int feature_bit = 0;
29 u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; 29 u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
30 30
31 xstate_bv &= ~XSTATE_FPSSE; 31 xstate_bv &= XSTATE_EXTEND_MASK;
32 while (xstate_bv) { 32 while (xstate_bv) {
33 if (xstate_bv & 0x1) { 33 if (xstate_bv & 0x1) {
34 u32 eax, ebx, ecx, edx; 34 u32 eax, ebx, ecx, edx;
@@ -43,6 +43,16 @@ static u32 xstate_required_size(u64 xstate_bv)
43 return ret; 43 return ret;
44} 44}
45 45
46u64 kvm_supported_xcr0(void)
47{
48 u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
49
50 if (!kvm_x86_ops->mpx_supported())
51 xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
52
53 return xcr0;
54}
55
46void kvm_update_cpuid(struct kvm_vcpu *vcpu) 56void kvm_update_cpuid(struct kvm_vcpu *vcpu)
47{ 57{
48 struct kvm_cpuid_entry2 *best; 58 struct kvm_cpuid_entry2 *best;
@@ -73,9 +83,9 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
73 } else { 83 } else {
74 vcpu->arch.guest_supported_xcr0 = 84 vcpu->arch.guest_supported_xcr0 =
75 (best->eax | ((u64)best->edx << 32)) & 85 (best->eax | ((u64)best->edx << 32)) &
76 host_xcr0 & KVM_SUPPORTED_XCR0; 86 kvm_supported_xcr0();
77 vcpu->arch.guest_xstate_size = 87 vcpu->arch.guest_xstate_size = best->ebx =
78 xstate_required_size(vcpu->arch.guest_supported_xcr0); 88 xstate_required_size(vcpu->arch.xcr0);
79 } 89 }
80 90
81 kvm_pmu_cpuid_update(vcpu); 91 kvm_pmu_cpuid_update(vcpu);
@@ -210,13 +220,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
210 entry->flags = 0; 220 entry->flags = 0;
211} 221}
212 222
213static bool supported_xcr0_bit(unsigned bit)
214{
215 u64 mask = ((u64)1 << bit);
216
217 return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
218}
219
220#define F(x) bit(X86_FEATURE_##x) 223#define F(x) bit(X86_FEATURE_##x)
221 224
222static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, 225static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
@@ -256,6 +259,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
256#endif 259#endif
257 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; 260 unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
258 unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; 261 unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
262 unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
259 263
260 /* cpuid 1.edx */ 264 /* cpuid 1.edx */
261 const u32 kvm_supported_word0_x86_features = 265 const u32 kvm_supported_word0_x86_features =
@@ -303,7 +307,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
303 /* cpuid 7.0.ebx */ 307 /* cpuid 7.0.ebx */
304 const u32 kvm_supported_word9_x86_features = 308 const u32 kvm_supported_word9_x86_features =
305 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | 309 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
306 F(BMI2) | F(ERMS) | f_invpcid | F(RTM); 310 F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
311 F(ADX);
307 312
308 /* all calls to cpuid_count() should be made on the same cpu */ 313 /* all calls to cpuid_count() should be made on the same cpu */
309 get_cpu(); 314 get_cpu();
@@ -436,16 +441,18 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
436 } 441 }
437 case 0xd: { 442 case 0xd: {
438 int idx, i; 443 int idx, i;
444 u64 supported = kvm_supported_xcr0();
439 445
440 entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0; 446 entry->eax &= supported;
441 entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32; 447 entry->edx &= supported >> 32;
442 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 448 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
443 for (idx = 1, i = 1; idx < 64; ++idx) { 449 for (idx = 1, i = 1; idx < 64; ++idx) {
450 u64 mask = ((u64)1 << idx);
444 if (*nent >= maxnent) 451 if (*nent >= maxnent)
445 goto out; 452 goto out;
446 453
447 do_cpuid_1_ent(&entry[i], function, idx); 454 do_cpuid_1_ent(&entry[i], function, idx);
448 if (entry[i].eax == 0 || !supported_xcr0_bit(idx)) 455 if (entry[i].eax == 0 || !(supported & mask))
449 continue; 456 continue;
450 entry[i].flags |= 457 entry[i].flags |=
451 KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 458 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 07ffca0a89e9..205b17eed93c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3668,6 +3668,10 @@ static const struct gprefix pfx_vmovntpx = {
3668 I(0, em_mov), N, N, N, 3668 I(0, em_mov), N, N, N,
3669}; 3669};
3670 3670
3671static const struct gprefix pfx_0f_28_0f_29 = {
3672 I(Aligned, em_mov), I(Aligned, em_mov), N, N,
3673};
3674
3671static const struct escape escape_d9 = { { 3675static const struct escape escape_d9 = { {
3672 N, N, N, N, N, N, N, I(DstMem, em_fnstcw), 3676 N, N, N, N, N, N, N, I(DstMem, em_fnstcw),
3673}, { 3677}, {
@@ -3870,7 +3874,9 @@ static const struct opcode twobyte_table[256] = {
3870 IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write), 3874 IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write),
3871 IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write), 3875 IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write),
3872 N, N, N, N, 3876 N, N, N, N,
3873 N, N, N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx), 3877 GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29),
3878 GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29),
3879 N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx),
3874 N, N, N, N, 3880 N, N, N, N,
3875 /* 0x30 - 0x3F */ 3881 /* 0x30 - 0x3F */
3876 II(ImplicitOps | Priv, em_wrmsr, wrmsr), 3882 II(ImplicitOps | Priv, em_wrmsr, wrmsr),
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9b531351a587..f5704d9e5ddc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3329,7 +3329,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
3329 arch.direct_map = vcpu->arch.mmu.direct_map; 3329 arch.direct_map = vcpu->arch.mmu.direct_map;
3330 arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); 3330 arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu);
3331 3331
3332 return kvm_setup_async_pf(vcpu, gva, gfn, &arch); 3332 return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch);
3333} 3333}
3334 3334
3335static bool can_do_async_pf(struct kvm_vcpu *vcpu) 3335static bool can_do_async_pf(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index cba218a2f08d..b1e6c1bf68d3 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -913,7 +913,8 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
913 * and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't 913 * and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
914 * used by guest then tlbs are not flushed, so guest is allowed to access the 914 * used by guest then tlbs are not flushed, so guest is allowed to access the
915 * freed pages. 915 * freed pages.
916 * And we increase kvm->tlbs_dirty to delay tlbs flush in this case. 916 * We set tlbs_dirty to let the notifier know this change and delay the flush
917 * until such a case actually happens.
917 */ 918 */
918static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) 919static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
919{ 920{
@@ -942,7 +943,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
942 return -EINVAL; 943 return -EINVAL;
943 944
944 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { 945 if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
945 vcpu->kvm->tlbs_dirty++; 946 vcpu->kvm->tlbs_dirty = true;
946 continue; 947 continue;
947 } 948 }
948 949
@@ -957,7 +958,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
957 958
958 if (gfn != sp->gfns[i]) { 959 if (gfn != sp->gfns[i]) {
959 drop_spte(vcpu->kvm, &sp->spt[i]); 960 drop_spte(vcpu->kvm, &sp->spt[i]);
960 vcpu->kvm->tlbs_dirty++; 961 vcpu->kvm->tlbs_dirty = true;
961 continue; 962 continue;
962 } 963 }
963 964
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2de1bc09a8d4..7f4f9c2badae 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -34,6 +34,7 @@
34#include <asm/perf_event.h> 34#include <asm/perf_event.h>
35#include <asm/tlbflush.h> 35#include <asm/tlbflush.h>
36#include <asm/desc.h> 36#include <asm/desc.h>
37#include <asm/debugreg.h>
37#include <asm/kvm_para.h> 38#include <asm/kvm_para.h>
38 39
39#include <asm/virtext.h> 40#include <asm/virtext.h>
@@ -303,20 +304,35 @@ static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
303 return vmcb->control.intercept_cr & (1U << bit); 304 return vmcb->control.intercept_cr & (1U << bit);
304} 305}
305 306
306static inline void set_dr_intercept(struct vcpu_svm *svm, int bit) 307static inline void set_dr_intercepts(struct vcpu_svm *svm)
307{ 308{
308 struct vmcb *vmcb = get_host_vmcb(svm); 309 struct vmcb *vmcb = get_host_vmcb(svm);
309 310
310 vmcb->control.intercept_dr |= (1U << bit); 311 vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
312 | (1 << INTERCEPT_DR1_READ)
313 | (1 << INTERCEPT_DR2_READ)
314 | (1 << INTERCEPT_DR3_READ)
315 | (1 << INTERCEPT_DR4_READ)
316 | (1 << INTERCEPT_DR5_READ)
317 | (1 << INTERCEPT_DR6_READ)
318 | (1 << INTERCEPT_DR7_READ)
319 | (1 << INTERCEPT_DR0_WRITE)
320 | (1 << INTERCEPT_DR1_WRITE)
321 | (1 << INTERCEPT_DR2_WRITE)
322 | (1 << INTERCEPT_DR3_WRITE)
323 | (1 << INTERCEPT_DR4_WRITE)
324 | (1 << INTERCEPT_DR5_WRITE)
325 | (1 << INTERCEPT_DR6_WRITE)
326 | (1 << INTERCEPT_DR7_WRITE);
311 327
312 recalc_intercepts(svm); 328 recalc_intercepts(svm);
313} 329}
314 330
315static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit) 331static inline void clr_dr_intercepts(struct vcpu_svm *svm)
316{ 332{
317 struct vmcb *vmcb = get_host_vmcb(svm); 333 struct vmcb *vmcb = get_host_vmcb(svm);
318 334
319 vmcb->control.intercept_dr &= ~(1U << bit); 335 vmcb->control.intercept_dr = 0;
320 336
321 recalc_intercepts(svm); 337 recalc_intercepts(svm);
322} 338}
@@ -1080,23 +1096,7 @@ static void init_vmcb(struct vcpu_svm *svm)
1080 set_cr_intercept(svm, INTERCEPT_CR4_WRITE); 1096 set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
1081 set_cr_intercept(svm, INTERCEPT_CR8_WRITE); 1097 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
1082 1098
1083 set_dr_intercept(svm, INTERCEPT_DR0_READ); 1099 set_dr_intercepts(svm);
1084 set_dr_intercept(svm, INTERCEPT_DR1_READ);
1085 set_dr_intercept(svm, INTERCEPT_DR2_READ);
1086 set_dr_intercept(svm, INTERCEPT_DR3_READ);
1087 set_dr_intercept(svm, INTERCEPT_DR4_READ);
1088 set_dr_intercept(svm, INTERCEPT_DR5_READ);
1089 set_dr_intercept(svm, INTERCEPT_DR6_READ);
1090 set_dr_intercept(svm, INTERCEPT_DR7_READ);
1091
1092 set_dr_intercept(svm, INTERCEPT_DR0_WRITE);
1093 set_dr_intercept(svm, INTERCEPT_DR1_WRITE);
1094 set_dr_intercept(svm, INTERCEPT_DR2_WRITE);
1095 set_dr_intercept(svm, INTERCEPT_DR3_WRITE);
1096 set_dr_intercept(svm, INTERCEPT_DR4_WRITE);
1097 set_dr_intercept(svm, INTERCEPT_DR5_WRITE);
1098 set_dr_intercept(svm, INTERCEPT_DR6_WRITE);
1099 set_dr_intercept(svm, INTERCEPT_DR7_WRITE);
1100 1100
1101 set_exception_intercept(svm, PF_VECTOR); 1101 set_exception_intercept(svm, PF_VECTOR);
1102 set_exception_intercept(svm, UD_VECTOR); 1102 set_exception_intercept(svm, UD_VECTOR);
@@ -1684,6 +1684,21 @@ static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
1684 mark_dirty(svm->vmcb, VMCB_DR); 1684 mark_dirty(svm->vmcb, VMCB_DR);
1685} 1685}
1686 1686
1687static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
1688{
1689 struct vcpu_svm *svm = to_svm(vcpu);
1690
1691 get_debugreg(vcpu->arch.db[0], 0);
1692 get_debugreg(vcpu->arch.db[1], 1);
1693 get_debugreg(vcpu->arch.db[2], 2);
1694 get_debugreg(vcpu->arch.db[3], 3);
1695 vcpu->arch.dr6 = svm_get_dr6(vcpu);
1696 vcpu->arch.dr7 = svm->vmcb->save.dr7;
1697
1698 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
1699 set_dr_intercepts(svm);
1700}
1701
1687static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) 1702static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1688{ 1703{
1689 struct vcpu_svm *svm = to_svm(vcpu); 1704 struct vcpu_svm *svm = to_svm(vcpu);
@@ -2842,6 +2857,7 @@ static int iret_interception(struct vcpu_svm *svm)
2842 clr_intercept(svm, INTERCEPT_IRET); 2857 clr_intercept(svm, INTERCEPT_IRET);
2843 svm->vcpu.arch.hflags |= HF_IRET_MASK; 2858 svm->vcpu.arch.hflags |= HF_IRET_MASK;
2844 svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); 2859 svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
2860 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
2845 return 1; 2861 return 1;
2846} 2862}
2847 2863
@@ -2974,6 +2990,17 @@ static int dr_interception(struct vcpu_svm *svm)
2974 unsigned long val; 2990 unsigned long val;
2975 int err; 2991 int err;
2976 2992
2993 if (svm->vcpu.guest_debug == 0) {
2994 /*
2995 * No more DR vmexits; force a reload of the debug registers
2996 * and reenter on this instruction. The next vmexit will
2997 * retrieve the full state of the debug registers.
2998 */
2999 clr_dr_intercepts(svm);
3000 svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
3001 return 1;
3002 }
3003
2977 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) 3004 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
2978 return emulate_on_interception(svm); 3005 return emulate_on_interception(svm);
2979 3006
@@ -3649,7 +3676,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
3649 return ret; 3676 return ret;
3650} 3677}
3651 3678
3652static int enable_irq_window(struct kvm_vcpu *vcpu) 3679static void enable_irq_window(struct kvm_vcpu *vcpu)
3653{ 3680{
3654 struct vcpu_svm *svm = to_svm(vcpu); 3681 struct vcpu_svm *svm = to_svm(vcpu);
3655 3682
@@ -3663,16 +3690,15 @@ static int enable_irq_window(struct kvm_vcpu *vcpu)
3663 svm_set_vintr(svm); 3690 svm_set_vintr(svm);
3664 svm_inject_irq(svm, 0x0); 3691 svm_inject_irq(svm, 0x0);
3665 } 3692 }
3666 return 0;
3667} 3693}
3668 3694
3669static int enable_nmi_window(struct kvm_vcpu *vcpu) 3695static void enable_nmi_window(struct kvm_vcpu *vcpu)
3670{ 3696{
3671 struct vcpu_svm *svm = to_svm(vcpu); 3697 struct vcpu_svm *svm = to_svm(vcpu);
3672 3698
3673 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) 3699 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
3674 == HF_NMI_MASK) 3700 == HF_NMI_MASK)
3675 return 0; /* IRET will cause a vm exit */ 3701 return; /* IRET will cause a vm exit */
3676 3702
3677 /* 3703 /*
3678 * Something prevents NMI from been injected. Single step over possible 3704 * Something prevents NMI from been injected. Single step over possible
@@ -3681,7 +3707,6 @@ static int enable_nmi_window(struct kvm_vcpu *vcpu)
3681 svm->nmi_singlestep = true; 3707 svm->nmi_singlestep = true;
3682 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); 3708 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
3683 update_db_bp_intercept(vcpu); 3709 update_db_bp_intercept(vcpu);
3684 return 0;
3685} 3710}
3686 3711
3687static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) 3712static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -4064,6 +4089,11 @@ static bool svm_invpcid_supported(void)
4064 return false; 4089 return false;
4065} 4090}
4066 4091
4092static bool svm_mpx_supported(void)
4093{
4094 return false;
4095}
4096
4067static bool svm_has_wbinvd_exit(void) 4097static bool svm_has_wbinvd_exit(void)
4068{ 4098{
4069 return true; 4099 return true;
@@ -4302,6 +4332,7 @@ static struct kvm_x86_ops svm_x86_ops = {
4302 .get_dr6 = svm_get_dr6, 4332 .get_dr6 = svm_get_dr6,
4303 .set_dr6 = svm_set_dr6, 4333 .set_dr6 = svm_set_dr6,
4304 .set_dr7 = svm_set_dr7, 4334 .set_dr7 = svm_set_dr7,
4335 .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
4305 .cache_reg = svm_cache_reg, 4336 .cache_reg = svm_cache_reg,
4306 .get_rflags = svm_get_rflags, 4337 .get_rflags = svm_get_rflags,
4307 .set_rflags = svm_set_rflags, 4338 .set_rflags = svm_set_rflags,
@@ -4345,6 +4376,7 @@ static struct kvm_x86_ops svm_x86_ops = {
4345 4376
4346 .rdtscp_supported = svm_rdtscp_supported, 4377 .rdtscp_supported = svm_rdtscp_supported,
4347 .invpcid_supported = svm_invpcid_supported, 4378 .invpcid_supported = svm_invpcid_supported,
4379 .mpx_supported = svm_mpx_supported,
4348 4380
4349 .set_supported_cpuid = svm_set_supported_cpuid, 4381 .set_supported_cpuid = svm_set_supported_cpuid,
4350 4382
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 392752834751..1320e0f8e611 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -31,6 +31,7 @@
31#include <linux/ftrace_event.h> 31#include <linux/ftrace_event.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/tboot.h> 33#include <linux/tboot.h>
34#include <linux/hrtimer.h>
34#include "kvm_cache_regs.h" 35#include "kvm_cache_regs.h"
35#include "x86.h" 36#include "x86.h"
36 37
@@ -42,6 +43,7 @@
42#include <asm/i387.h> 43#include <asm/i387.h>
43#include <asm/xcr.h> 44#include <asm/xcr.h>
44#include <asm/perf_event.h> 45#include <asm/perf_event.h>
46#include <asm/debugreg.h>
45#include <asm/kexec.h> 47#include <asm/kexec.h>
46 48
47#include "trace.h" 49#include "trace.h"
@@ -110,6 +112,8 @@ module_param(nested, bool, S_IRUGO);
110 112
111#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) 113#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
112 114
115#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
116
113/* 117/*
114 * These 2 parameters are used to config the controls for Pause-Loop Exiting: 118 * These 2 parameters are used to config the controls for Pause-Loop Exiting:
115 * ple_gap: upper bound on the amount of time between two successive 119 * ple_gap: upper bound on the amount of time between two successive
@@ -202,6 +206,7 @@ struct __packed vmcs12 {
202 u64 guest_pdptr1; 206 u64 guest_pdptr1;
203 u64 guest_pdptr2; 207 u64 guest_pdptr2;
204 u64 guest_pdptr3; 208 u64 guest_pdptr3;
209 u64 guest_bndcfgs;
205 u64 host_ia32_pat; 210 u64 host_ia32_pat;
206 u64 host_ia32_efer; 211 u64 host_ia32_efer;
207 u64 host_ia32_perf_global_ctrl; 212 u64 host_ia32_perf_global_ctrl;
@@ -374,6 +379,9 @@ struct nested_vmx {
374 */ 379 */
375 struct page *apic_access_page; 380 struct page *apic_access_page;
376 u64 msr_ia32_feature_control; 381 u64 msr_ia32_feature_control;
382
383 struct hrtimer preemption_timer;
384 bool preemption_timer_expired;
377}; 385};
378 386
379#define POSTED_INTR_ON 0 387#define POSTED_INTR_ON 0
@@ -441,6 +449,7 @@ struct vcpu_vmx {
441#endif 449#endif
442 int gs_ldt_reload_needed; 450 int gs_ldt_reload_needed;
443 int fs_reload_needed; 451 int fs_reload_needed;
452 u64 msr_host_bndcfgs;
444 } host_state; 453 } host_state;
445 struct { 454 struct {
446 int vm86_active; 455 int vm86_active;
@@ -533,6 +542,7 @@ static const unsigned long shadow_read_write_fields[] = {
533 GUEST_CS_LIMIT, 542 GUEST_CS_LIMIT,
534 GUEST_CS_BASE, 543 GUEST_CS_BASE,
535 GUEST_ES_BASE, 544 GUEST_ES_BASE,
545 GUEST_BNDCFGS,
536 CR0_GUEST_HOST_MASK, 546 CR0_GUEST_HOST_MASK,
537 CR0_READ_SHADOW, 547 CR0_READ_SHADOW,
538 CR4_READ_SHADOW, 548 CR4_READ_SHADOW,
@@ -588,6 +598,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
588 FIELD64(GUEST_PDPTR1, guest_pdptr1), 598 FIELD64(GUEST_PDPTR1, guest_pdptr1),
589 FIELD64(GUEST_PDPTR2, guest_pdptr2), 599 FIELD64(GUEST_PDPTR2, guest_pdptr2),
590 FIELD64(GUEST_PDPTR3, guest_pdptr3), 600 FIELD64(GUEST_PDPTR3, guest_pdptr3),
601 FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
591 FIELD64(HOST_IA32_PAT, host_ia32_pat), 602 FIELD64(HOST_IA32_PAT, host_ia32_pat),
592 FIELD64(HOST_IA32_EFER, host_ia32_efer), 603 FIELD64(HOST_IA32_EFER, host_ia32_efer),
593 FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl), 604 FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
@@ -718,6 +729,7 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
718static u64 construct_eptp(unsigned long root_hpa); 729static u64 construct_eptp(unsigned long root_hpa);
719static void kvm_cpu_vmxon(u64 addr); 730static void kvm_cpu_vmxon(u64 addr);
720static void kvm_cpu_vmxoff(void); 731static void kvm_cpu_vmxoff(void);
732static bool vmx_mpx_supported(void);
721static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); 733static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
722static void vmx_set_segment(struct kvm_vcpu *vcpu, 734static void vmx_set_segment(struct kvm_vcpu *vcpu,
723 struct kvm_segment *var, int seg); 735 struct kvm_segment *var, int seg);
@@ -728,6 +740,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
728static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); 740static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
729static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); 741static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
730static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); 742static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
743static bool vmx_mpx_supported(void);
731 744
732static DEFINE_PER_CPU(struct vmcs *, vmxarea); 745static DEFINE_PER_CPU(struct vmcs *, vmxarea);
733static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 746static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -1047,6 +1060,12 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
1047 return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; 1060 return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
1048} 1061}
1049 1062
1063static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
1064{
1065 return vmcs12->pin_based_vm_exec_control &
1066 PIN_BASED_VMX_PREEMPTION_TIMER;
1067}
1068
1050static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) 1069static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
1051{ 1070{
1052 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); 1071 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
@@ -1710,6 +1729,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
1710 if (is_long_mode(&vmx->vcpu)) 1729 if (is_long_mode(&vmx->vcpu))
1711 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 1730 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
1712#endif 1731#endif
1732 if (boot_cpu_has(X86_FEATURE_MPX))
1733 rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
1713 for (i = 0; i < vmx->save_nmsrs; ++i) 1734 for (i = 0; i < vmx->save_nmsrs; ++i)
1714 kvm_set_shared_msr(vmx->guest_msrs[i].index, 1735 kvm_set_shared_msr(vmx->guest_msrs[i].index,
1715 vmx->guest_msrs[i].data, 1736 vmx->guest_msrs[i].data,
@@ -1747,6 +1768,8 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
1747#ifdef CONFIG_X86_64 1768#ifdef CONFIG_X86_64
1748 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); 1769 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
1749#endif 1770#endif
1771 if (vmx->host_state.msr_host_bndcfgs)
1772 wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
1750 /* 1773 /*
1751 * If the FPU is not active (through the host task or 1774 * If the FPU is not active (through the host task or
1752 * the guest vcpu), then restore the cr0.TS bit. 1775 * the guest vcpu), then restore the cr0.TS bit.
@@ -2248,9 +2271,9 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2248 */ 2271 */
2249 nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; 2272 nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
2250 nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | 2273 nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
2251 PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS | 2274 PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS;
2275 nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
2252 PIN_BASED_VMX_PREEMPTION_TIMER; 2276 PIN_BASED_VMX_PREEMPTION_TIMER;
2253 nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
2254 2277
2255 /* 2278 /*
2256 * Exit controls 2279 * Exit controls
@@ -2265,15 +2288,12 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2265#ifdef CONFIG_X86_64 2288#ifdef CONFIG_X86_64
2266 VM_EXIT_HOST_ADDR_SPACE_SIZE | 2289 VM_EXIT_HOST_ADDR_SPACE_SIZE |
2267#endif 2290#endif
2268 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | 2291 VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
2292 nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
2293 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
2269 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; 2294 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
2270 if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) || 2295 if (vmx_mpx_supported())
2271 !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { 2296 nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
2272 nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
2273 nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
2274 }
2275 nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
2276 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
2277 2297
2278 /* entry controls */ 2298 /* entry controls */
2279 rdmsr(MSR_IA32_VMX_ENTRY_CTLS, 2299 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2287,6 +2307,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2287 VM_ENTRY_LOAD_IA32_PAT; 2307 VM_ENTRY_LOAD_IA32_PAT;
2288 nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | 2308 nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
2289 VM_ENTRY_LOAD_IA32_EFER); 2309 VM_ENTRY_LOAD_IA32_EFER);
2310 if (vmx_mpx_supported())
2311 nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
2290 2312
2291 /* cpu-based controls */ 2313 /* cpu-based controls */
2292 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, 2314 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -2342,9 +2364,9 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2342 2364
2343 /* miscellaneous data */ 2365 /* miscellaneous data */
2344 rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); 2366 rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
2345 nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | 2367 nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
2346 VMX_MISC_SAVE_EFER_LMA; 2368 nested_vmx_misc_low |= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
2347 nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT; 2369 VMX_MISC_ACTIVITY_HLT;
2348 nested_vmx_misc_high = 0; 2370 nested_vmx_misc_high = 0;
2349} 2371}
2350 2372
@@ -2479,6 +2501,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2479 case MSR_IA32_SYSENTER_ESP: 2501 case MSR_IA32_SYSENTER_ESP:
2480 data = vmcs_readl(GUEST_SYSENTER_ESP); 2502 data = vmcs_readl(GUEST_SYSENTER_ESP);
2481 break; 2503 break;
2504 case MSR_IA32_BNDCFGS:
2505 if (!vmx_mpx_supported())
2506 return 1;
2507 data = vmcs_read64(GUEST_BNDCFGS);
2508 break;
2482 case MSR_IA32_FEATURE_CONTROL: 2509 case MSR_IA32_FEATURE_CONTROL:
2483 if (!nested_vmx_allowed(vcpu)) 2510 if (!nested_vmx_allowed(vcpu))
2484 return 1; 2511 return 1;
@@ -2547,6 +2574,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2547 case MSR_IA32_SYSENTER_ESP: 2574 case MSR_IA32_SYSENTER_ESP:
2548 vmcs_writel(GUEST_SYSENTER_ESP, data); 2575 vmcs_writel(GUEST_SYSENTER_ESP, data);
2549 break; 2576 break;
2577 case MSR_IA32_BNDCFGS:
2578 if (!vmx_mpx_supported())
2579 return 1;
2580 vmcs_write64(GUEST_BNDCFGS, data);
2581 break;
2550 case MSR_IA32_TSC: 2582 case MSR_IA32_TSC:
2551 kvm_write_tsc(vcpu, msr_info); 2583 kvm_write_tsc(vcpu, msr_info);
2552 break; 2584 break;
@@ -2832,12 +2864,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2832 vmx_capability.ept, vmx_capability.vpid); 2864 vmx_capability.ept, vmx_capability.vpid);
2833 } 2865 }
2834 2866
2835 min = 0; 2867 min = VM_EXIT_SAVE_DEBUG_CONTROLS;
2836#ifdef CONFIG_X86_64 2868#ifdef CONFIG_X86_64
2837 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; 2869 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
2838#endif 2870#endif
2839 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | 2871 opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
2840 VM_EXIT_ACK_INTR_ON_EXIT; 2872 VM_EXIT_ACK_INTR_ON_EXIT | VM_EXIT_CLEAR_BNDCFGS;
2841 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, 2873 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
2842 &_vmexit_control) < 0) 2874 &_vmexit_control) < 0)
2843 return -EIO; 2875 return -EIO;
@@ -2853,8 +2885,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
2853 !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) 2885 !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
2854 _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; 2886 _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
2855 2887
2856 min = 0; 2888 min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
2857 opt = VM_ENTRY_LOAD_IA32_PAT; 2889 opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS;
2858 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, 2890 if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
2859 &_vmentry_control) < 0) 2891 &_vmentry_control) < 0)
2860 return -EIO; 2892 return -EIO;
@@ -4223,6 +4255,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
4223static u32 vmx_exec_control(struct vcpu_vmx *vmx) 4255static u32 vmx_exec_control(struct vcpu_vmx *vmx)
4224{ 4256{
4225 u32 exec_control = vmcs_config.cpu_based_exec_ctrl; 4257 u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
4258
4259 if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
4260 exec_control &= ~CPU_BASED_MOV_DR_EXITING;
4261
4226 if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) { 4262 if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) {
4227 exec_control &= ~CPU_BASED_TPR_SHADOW; 4263 exec_control &= ~CPU_BASED_TPR_SHADOW;
4228#ifdef CONFIG_X86_64 4264#ifdef CONFIG_X86_64
@@ -4496,39 +4532,28 @@ static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
4496 PIN_BASED_NMI_EXITING; 4532 PIN_BASED_NMI_EXITING;
4497} 4533}
4498 4534
4499static int enable_irq_window(struct kvm_vcpu *vcpu) 4535static void enable_irq_window(struct kvm_vcpu *vcpu)
4500{ 4536{
4501 u32 cpu_based_vm_exec_control; 4537 u32 cpu_based_vm_exec_control;
4502 4538
4503 if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
4504 /*
4505 * We get here if vmx_interrupt_allowed() said we can't
4506 * inject to L1 now because L2 must run. The caller will have
4507 * to make L2 exit right after entry, so we can inject to L1
4508 * more promptly.
4509 */
4510 return -EBUSY;
4511
4512 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 4539 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
4513 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; 4540 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
4514 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 4541 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
4515 return 0;
4516} 4542}
4517 4543
4518static int enable_nmi_window(struct kvm_vcpu *vcpu) 4544static void enable_nmi_window(struct kvm_vcpu *vcpu)
4519{ 4545{
4520 u32 cpu_based_vm_exec_control; 4546 u32 cpu_based_vm_exec_control;
4521 4547
4522 if (!cpu_has_virtual_nmis()) 4548 if (!cpu_has_virtual_nmis() ||
4523 return enable_irq_window(vcpu); 4549 vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
4524 4550 enable_irq_window(vcpu);
4525 if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) 4551 return;
4526 return enable_irq_window(vcpu); 4552 }
4527 4553
4528 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 4554 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
4529 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; 4555 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
4530 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); 4556 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
4531 return 0;
4532} 4557}
4533 4558
4534static void vmx_inject_irq(struct kvm_vcpu *vcpu) 4559static void vmx_inject_irq(struct kvm_vcpu *vcpu)
@@ -4620,22 +4645,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
4620 4645
4621static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) 4646static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4622{ 4647{
4623 if (is_guest_mode(vcpu)) { 4648 if (to_vmx(vcpu)->nested.nested_run_pending)
4624 if (to_vmx(vcpu)->nested.nested_run_pending) 4649 return 0;
4625 return 0;
4626 if (nested_exit_on_nmi(vcpu)) {
4627 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
4628 NMI_VECTOR | INTR_TYPE_NMI_INTR |
4629 INTR_INFO_VALID_MASK, 0);
4630 /*
4631 * The NMI-triggered VM exit counts as injection:
4632 * clear this one and block further NMIs.
4633 */
4634 vcpu->arch.nmi_pending = 0;
4635 vmx_set_nmi_mask(vcpu, true);
4636 return 0;
4637 }
4638 }
4639 4650
4640 if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) 4651 if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
4641 return 0; 4652 return 0;
@@ -4647,19 +4658,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
4647 4658
4648static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) 4659static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
4649{ 4660{
4650 if (is_guest_mode(vcpu)) { 4661 return (!to_vmx(vcpu)->nested.nested_run_pending &&
4651 if (to_vmx(vcpu)->nested.nested_run_pending) 4662 vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
4652 return 0;
4653 if (nested_exit_on_intr(vcpu)) {
4654 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
4655 0, 0);
4656 /*
4657 * fall through to normal code, but now in L1, not L2
4658 */
4659 }
4660 }
4661
4662 return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
4663 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 4663 !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
4664 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); 4664 (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
4665} 4665}
@@ -5102,6 +5102,22 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5102 } 5102 }
5103 } 5103 }
5104 5104
5105 if (vcpu->guest_debug == 0) {
5106 u32 cpu_based_vm_exec_control;
5107
5108 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
5109 cpu_based_vm_exec_control &= ~CPU_BASED_MOV_DR_EXITING;
5110 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
5111
5112 /*
5113 * No more DR vmexits; force a reload of the debug registers
5114 * and reenter on this instruction. The next vmexit will
5115 * retrieve the full state of the debug registers.
5116 */
5117 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
5118 return 1;
5119 }
5120
5105 exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 5121 exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
5106 dr = exit_qualification & DEBUG_REG_ACCESS_NUM; 5122 dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
5107 reg = DEBUG_REG_ACCESS_REG(exit_qualification); 5123 reg = DEBUG_REG_ACCESS_REG(exit_qualification);
@@ -5128,6 +5144,24 @@ static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
5128{ 5144{
5129} 5145}
5130 5146
5147static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
5148{
5149 u32 cpu_based_vm_exec_control;
5150
5151 get_debugreg(vcpu->arch.db[0], 0);
5152 get_debugreg(vcpu->arch.db[1], 1);
5153 get_debugreg(vcpu->arch.db[2], 2);
5154 get_debugreg(vcpu->arch.db[3], 3);
5155 get_debugreg(vcpu->arch.dr6, 6);
5156 vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
5157
5158 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
5159
5160 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
5161 cpu_based_vm_exec_control |= CPU_BASED_MOV_DR_EXITING;
5162 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
5163}
5164
5131static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) 5165static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
5132{ 5166{
5133 vmcs_writel(GUEST_DR7, val); 5167 vmcs_writel(GUEST_DR7, val);
@@ -5727,6 +5761,18 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
5727 */ 5761 */
5728} 5762}
5729 5763
5764static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
5765{
5766 struct vcpu_vmx *vmx =
5767 container_of(timer, struct vcpu_vmx, nested.preemption_timer);
5768
5769 vmx->nested.preemption_timer_expired = true;
5770 kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
5771 kvm_vcpu_kick(&vmx->vcpu);
5772
5773 return HRTIMER_NORESTART;
5774}
5775
5730/* 5776/*
5731 * Emulate the VMXON instruction. 5777 * Emulate the VMXON instruction.
5732 * Currently, we just remember that VMX is active, and do not save or even 5778 * Currently, we just remember that VMX is active, and do not save or even
@@ -5791,6 +5837,10 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
5791 INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); 5837 INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
5792 vmx->nested.vmcs02_num = 0; 5838 vmx->nested.vmcs02_num = 0;
5793 5839
5840 hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
5841 HRTIMER_MODE_REL);
5842 vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
5843
5794 vmx->nested.vmxon = true; 5844 vmx->nested.vmxon = true;
5795 5845
5796 skip_emulated_instruction(vcpu); 5846 skip_emulated_instruction(vcpu);
@@ -6767,9 +6817,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
6767 * table is L0's fault. 6817 * table is L0's fault.
6768 */ 6818 */
6769 return 0; 6819 return 0;
6770 case EXIT_REASON_PREEMPTION_TIMER:
6771 return vmcs12->pin_based_vm_exec_control &
6772 PIN_BASED_VMX_PREEMPTION_TIMER;
6773 case EXIT_REASON_WBINVD: 6820 case EXIT_REASON_WBINVD:
6774 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); 6821 return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
6775 case EXIT_REASON_XSETBV: 6822 case EXIT_REASON_XSETBV:
@@ -6785,27 +6832,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
6785 *info2 = vmcs_read32(VM_EXIT_INTR_INFO); 6832 *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
6786} 6833}
6787 6834
6788static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
6789{
6790 u64 delta_tsc_l1;
6791 u32 preempt_val_l1, preempt_val_l2, preempt_scale;
6792
6793 if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control &
6794 PIN_BASED_VMX_PREEMPTION_TIMER))
6795 return;
6796 preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
6797 MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
6798 preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
6799 delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc())
6800 - vcpu->arch.last_guest_tsc;
6801 preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
6802 if (preempt_val_l2 <= preempt_val_l1)
6803 preempt_val_l2 = 0;
6804 else
6805 preempt_val_l2 -= preempt_val_l1;
6806 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
6807}
6808
6809/* 6835/*
6810 * The guest has exited. See if we can fix it or if we need userspace 6836 * The guest has exited. See if we can fix it or if we need userspace
6811 * assistance. 6837 * assistance.
@@ -7052,6 +7078,12 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
7052 local_irq_enable(); 7078 local_irq_enable();
7053} 7079}
7054 7080
7081static bool vmx_mpx_supported(void)
7082{
7083 return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
7084 (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
7085}
7086
7055static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) 7087static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
7056{ 7088{
7057 u32 exit_intr_info; 7089 u32 exit_intr_info;
@@ -7218,8 +7250,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
7218 atomic_switch_perf_msrs(vmx); 7250 atomic_switch_perf_msrs(vmx);
7219 debugctlmsr = get_debugctlmsr(); 7251 debugctlmsr = get_debugctlmsr();
7220 7252
7221 if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending)
7222 nested_adjust_preemption_timer(vcpu);
7223 vmx->__launched = vmx->loaded_vmcs->launched; 7253 vmx->__launched = vmx->loaded_vmcs->launched;
7224 asm( 7254 asm(
7225 /* Store host registers */ 7255 /* Store host registers */
@@ -7616,6 +7646,28 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
7616 kvm_inject_page_fault(vcpu, fault); 7646 kvm_inject_page_fault(vcpu, fault);
7617} 7647}
7618 7648
7649static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
7650{
7651 u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
7652 struct vcpu_vmx *vmx = to_vmx(vcpu);
7653
7654 if (vcpu->arch.virtual_tsc_khz == 0)
7655 return;
7656
7657 /* Make sure short timeouts reliably trigger an immediate vmexit.
7658 * hrtimer_start does not guarantee this. */
7659 if (preemption_timeout <= 1) {
7660 vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
7661 return;
7662 }
7663
7664 preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
7665 preemption_timeout *= 1000000;
7666 do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
7667 hrtimer_start(&vmx->nested.preemption_timer,
7668 ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
7669}
7670
7619/* 7671/*
7620 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested 7672 * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
7621 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it 7673 * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -7629,7 +7681,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7629{ 7681{
7630 struct vcpu_vmx *vmx = to_vmx(vcpu); 7682 struct vcpu_vmx *vmx = to_vmx(vcpu);
7631 u32 exec_control; 7683 u32 exec_control;
7632 u32 exit_control;
7633 7684
7634 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); 7685 vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
7635 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); 7686 vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7687,13 +7738,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7687 7738
7688 vmcs_write64(VMCS_LINK_POINTER, -1ull); 7739 vmcs_write64(VMCS_LINK_POINTER, -1ull);
7689 7740
7690 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, 7741 exec_control = vmcs12->pin_based_vm_exec_control;
7691 (vmcs_config.pin_based_exec_ctrl | 7742 exec_control |= vmcs_config.pin_based_exec_ctrl;
7692 vmcs12->pin_based_vm_exec_control)); 7743 exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
7744 vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control);
7693 7745
7694 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) 7746 vmx->nested.preemption_timer_expired = false;
7695 vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 7747 if (nested_cpu_has_preemption_timer(vmcs12))
7696 vmcs12->vmx_preemption_timer_value); 7748 vmx_start_preemption_timer(vcpu);
7697 7749
7698 /* 7750 /*
7699 * Whether page-faults are trapped is determined by a combination of 7751 * Whether page-faults are trapped is determined by a combination of
@@ -7721,7 +7773,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7721 enable_ept ? vmcs12->page_fault_error_code_match : 0); 7773 enable_ept ? vmcs12->page_fault_error_code_match : 0);
7722 7774
7723 if (cpu_has_secondary_exec_ctrls()) { 7775 if (cpu_has_secondary_exec_ctrls()) {
7724 u32 exec_control = vmx_secondary_exec_control(vmx); 7776 exec_control = vmx_secondary_exec_control(vmx);
7725 if (!vmx->rdtscp_enabled) 7777 if (!vmx->rdtscp_enabled)
7726 exec_control &= ~SECONDARY_EXEC_RDTSCP; 7778 exec_control &= ~SECONDARY_EXEC_RDTSCP;
7727 /* Take the following fields only from vmcs12 */ 7779 /* Take the following fields only from vmcs12 */
@@ -7808,10 +7860,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7808 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER 7860 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
7809 * bits are further modified by vmx_set_efer() below. 7861 * bits are further modified by vmx_set_efer() below.
7810 */ 7862 */
7811 exit_control = vmcs_config.vmexit_ctrl; 7863 vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
7812 if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
7813 exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
7814 vm_exit_controls_init(vmx, exit_control);
7815 7864
7816 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are 7865 /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
7817 * emulated by vmx_set_efer(), below. 7866 * emulated by vmx_set_efer(), below.
@@ -7830,6 +7879,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7830 7879
7831 set_cr4_guest_host_mask(vmx); 7880 set_cr4_guest_host_mask(vmx);
7832 7881
7882 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
7883 vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
7884
7833 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) 7885 if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
7834 vmcs_write64(TSC_OFFSET, 7886 vmcs_write64(TSC_OFFSET,
7835 vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); 7887 vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
@@ -8155,6 +8207,58 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
8155 } 8207 }
8156} 8208}
8157 8209
8210static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
8211{
8212 struct vcpu_vmx *vmx = to_vmx(vcpu);
8213
8214 if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
8215 vmx->nested.preemption_timer_expired) {
8216 if (vmx->nested.nested_run_pending)
8217 return -EBUSY;
8218 nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
8219 return 0;
8220 }
8221
8222 if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
8223 if (vmx->nested.nested_run_pending ||
8224 vcpu->arch.interrupt.pending)
8225 return -EBUSY;
8226 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
8227 NMI_VECTOR | INTR_TYPE_NMI_INTR |
8228 INTR_INFO_VALID_MASK, 0);
8229 /*
8230 * The NMI-triggered VM exit counts as injection:
8231 * clear this one and block further NMIs.
8232 */
8233 vcpu->arch.nmi_pending = 0;
8234 vmx_set_nmi_mask(vcpu, true);
8235 return 0;
8236 }
8237
8238 if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
8239 nested_exit_on_intr(vcpu)) {
8240 if (vmx->nested.nested_run_pending)
8241 return -EBUSY;
8242 nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
8243 }
8244
8245 return 0;
8246}
8247
8248static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
8249{
8250 ktime_t remaining =
8251 hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
8252 u64 value;
8253
8254 if (ktime_to_ns(remaining) <= 0)
8255 return 0;
8256
8257 value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
8258 do_div(value, 1000000);
8259 return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
8260}
8261
8158/* 8262/*
8159 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits 8263 * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
8160 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), 8264 * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -8225,10 +8329,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8225 else 8329 else
8226 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; 8330 vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
8227 8331
8228 if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && 8332 if (nested_cpu_has_preemption_timer(vmcs12)) {
8229 (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) 8333 if (vmcs12->vm_exit_controls &
8230 vmcs12->vmx_preemption_timer_value = 8334 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
8231 vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); 8335 vmcs12->vmx_preemption_timer_value =
8336 vmx_get_preemption_timer_value(vcpu);
8337 hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
8338 }
8232 8339
8233 /* 8340 /*
8234 * In some cases (usually, nested EPT), L2 is allowed to change its 8341 * In some cases (usually, nested EPT), L2 is allowed to change its
@@ -8260,6 +8367,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8260 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); 8367 vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
8261 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); 8368 vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
8262 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); 8369 vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
8370 if (vmx_mpx_supported())
8371 vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
8263 8372
8264 /* update exit information fields: */ 8373 /* update exit information fields: */
8265 8374
@@ -8369,6 +8478,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
8369 vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); 8478 vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
8370 vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); 8479 vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
8371 8480
8481 /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
8482 if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
8483 vmcs_write64(GUEST_BNDCFGS, 0);
8484
8372 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { 8485 if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
8373 vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); 8486 vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
8374 vcpu->arch.pat = vmcs12->host_ia32_pat; 8487 vcpu->arch.pat = vmcs12->host_ia32_pat;
@@ -8495,6 +8608,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8495 nested_vmx_succeed(vcpu); 8608 nested_vmx_succeed(vcpu);
8496 if (enable_shadow_vmcs) 8609 if (enable_shadow_vmcs)
8497 vmx->nested.sync_shadow_vmcs = true; 8610 vmx->nested.sync_shadow_vmcs = true;
8611
8612 /* in case we halted in L2 */
8613 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
8498} 8614}
8499 8615
8500/* 8616/*
@@ -8573,6 +8689,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
8573 .get_dr6 = vmx_get_dr6, 8689 .get_dr6 = vmx_get_dr6,
8574 .set_dr6 = vmx_set_dr6, 8690 .set_dr6 = vmx_set_dr6,
8575 .set_dr7 = vmx_set_dr7, 8691 .set_dr7 = vmx_set_dr7,
8692 .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
8576 .cache_reg = vmx_cache_reg, 8693 .cache_reg = vmx_cache_reg,
8577 .get_rflags = vmx_get_rflags, 8694 .get_rflags = vmx_get_rflags,
8578 .set_rflags = vmx_set_rflags, 8695 .set_rflags = vmx_set_rflags,
@@ -8634,6 +8751,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
8634 8751
8635 .check_intercept = vmx_check_intercept, 8752 .check_intercept = vmx_check_intercept,
8636 .handle_external_intr = vmx_handle_external_intr, 8753 .handle_external_intr = vmx_handle_external_intr,
8754 .mpx_supported = vmx_mpx_supported,
8755
8756 .check_nested_events = vmx_check_nested_events,
8637}; 8757};
8638 8758
8639static int __init vmx_init(void) 8759static int __init vmx_init(void)
@@ -8721,6 +8841,8 @@ static int __init vmx_init(void)
8721 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); 8841 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
8722 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); 8842 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
8723 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); 8843 vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
8844 vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
8845
8724 memcpy(vmx_msr_bitmap_legacy_x2apic, 8846 memcpy(vmx_msr_bitmap_legacy_x2apic,
8725 vmx_msr_bitmap_legacy, PAGE_SIZE); 8847 vmx_msr_bitmap_legacy, PAGE_SIZE);
8726 memcpy(vmx_msr_bitmap_longmode_x2apic, 8848 memcpy(vmx_msr_bitmap_longmode_x2apic,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2b8578432d5b..d1c55f8722c6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -595,13 +595,13 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
595 595
596int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) 596int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
597{ 597{
598 u64 xcr0; 598 u64 xcr0 = xcr;
599 u64 old_xcr0 = vcpu->arch.xcr0;
599 u64 valid_bits; 600 u64 valid_bits;
600 601
601 /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ 602 /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */
602 if (index != XCR_XFEATURE_ENABLED_MASK) 603 if (index != XCR_XFEATURE_ENABLED_MASK)
603 return 1; 604 return 1;
604 xcr0 = xcr;
605 if (!(xcr0 & XSTATE_FP)) 605 if (!(xcr0 & XSTATE_FP))
606 return 1; 606 return 1;
607 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) 607 if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
@@ -616,8 +616,14 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
616 if (xcr0 & ~valid_bits) 616 if (xcr0 & ~valid_bits)
617 return 1; 617 return 1;
618 618
619 if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
620 return 1;
621
619 kvm_put_guest_xcr0(vcpu); 622 kvm_put_guest_xcr0(vcpu);
620 vcpu->arch.xcr0 = xcr0; 623 vcpu->arch.xcr0 = xcr0;
624
625 if ((xcr0 ^ old_xcr0) & XSTATE_EXTEND_MASK)
626 kvm_update_cpuid(vcpu);
621 return 0; 627 return 0;
622} 628}
623 629
@@ -753,7 +759,9 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu)
753 else 759 else
754 dr7 = vcpu->arch.dr7; 760 dr7 = vcpu->arch.dr7;
755 kvm_x86_ops->set_dr7(vcpu, dr7); 761 kvm_x86_ops->set_dr7(vcpu, dr7);
756 vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK); 762 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
763 if (dr7 & DR7_BP_EN_MASK)
764 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
757} 765}
758 766
759static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) 767static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
@@ -879,7 +887,7 @@ static u32 msrs_to_save[] = {
879 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, 887 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
880#endif 888#endif
881 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, 889 MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
882 MSR_IA32_FEATURE_CONTROL 890 MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS
883}; 891};
884 892
885static unsigned num_msrs_to_save; 893static unsigned num_msrs_to_save;
@@ -1581,7 +1589,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1581 /* With all the info we got, fill in the values */ 1589 /* With all the info we got, fill in the values */
1582 vcpu->hv_clock.tsc_timestamp = tsc_timestamp; 1590 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
1583 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; 1591 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
1584 vcpu->last_kernel_ns = kernel_ns;
1585 vcpu->last_guest_tsc = tsc_timestamp; 1592 vcpu->last_guest_tsc = tsc_timestamp;
1586 1593
1587 /* 1594 /*
@@ -1623,14 +1630,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
1623 * the others. 1630 * the others.
1624 * 1631 *
1625 * So in those cases, request a kvmclock update for all vcpus. 1632 * So in those cases, request a kvmclock update for all vcpus.
1626 * The worst case for a remote vcpu to update its kvmclock 1633 * We need to rate-limit these requests though, as they can
1627 * is then bounded by maximum nohz sleep latency. 1634 * considerably slow guests that have a large number of vcpus.
1635 * The time for a remote vcpu to update its kvmclock is bound
1636 * by the delay we use to rate-limit the updates.
1628 */ 1637 */
1629 1638
1630static void kvm_gen_kvmclock_update(struct kvm_vcpu *v) 1639#define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100)
1640
1641static void kvmclock_update_fn(struct work_struct *work)
1631{ 1642{
1632 int i; 1643 int i;
1633 struct kvm *kvm = v->kvm; 1644 struct delayed_work *dwork = to_delayed_work(work);
1645 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
1646 kvmclock_update_work);
1647 struct kvm *kvm = container_of(ka, struct kvm, arch);
1634 struct kvm_vcpu *vcpu; 1648 struct kvm_vcpu *vcpu;
1635 1649
1636 kvm_for_each_vcpu(i, vcpu, kvm) { 1650 kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -1639,6 +1653,29 @@ static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
1639 } 1653 }
1640} 1654}
1641 1655
1656static void kvm_gen_kvmclock_update(struct kvm_vcpu *v)
1657{
1658 struct kvm *kvm = v->kvm;
1659
1660 set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests);
1661 schedule_delayed_work(&kvm->arch.kvmclock_update_work,
1662 KVMCLOCK_UPDATE_DELAY);
1663}
1664
1665#define KVMCLOCK_SYNC_PERIOD (300 * HZ)
1666
1667static void kvmclock_sync_fn(struct work_struct *work)
1668{
1669 struct delayed_work *dwork = to_delayed_work(work);
1670 struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
1671 kvmclock_sync_work);
1672 struct kvm *kvm = container_of(ka, struct kvm, arch);
1673
1674 schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0);
1675 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
1676 KVMCLOCK_SYNC_PERIOD);
1677}
1678
1642static bool msr_mtrr_valid(unsigned msr) 1679static bool msr_mtrr_valid(unsigned msr)
1643{ 1680{
1644 switch (msr) { 1681 switch (msr) {
@@ -2323,9 +2360,12 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
2323 case HV_X64_MSR_VP_INDEX: { 2360 case HV_X64_MSR_VP_INDEX: {
2324 int r; 2361 int r;
2325 struct kvm_vcpu *v; 2362 struct kvm_vcpu *v;
2326 kvm_for_each_vcpu(r, v, vcpu->kvm) 2363 kvm_for_each_vcpu(r, v, vcpu->kvm) {
2327 if (v == vcpu) 2364 if (v == vcpu) {
2328 data = r; 2365 data = r;
2366 break;
2367 }
2368 }
2329 break; 2369 break;
2330 } 2370 }
2331 case HV_X64_MSR_EOI: 2371 case HV_X64_MSR_EOI:
@@ -2617,6 +2657,7 @@ int kvm_dev_ioctl_check_extension(long ext)
2617 case KVM_CAP_KVMCLOCK_CTRL: 2657 case KVM_CAP_KVMCLOCK_CTRL:
2618 case KVM_CAP_READONLY_MEM: 2658 case KVM_CAP_READONLY_MEM:
2619 case KVM_CAP_HYPERV_TIME: 2659 case KVM_CAP_HYPERV_TIME:
2660 case KVM_CAP_IOAPIC_POLARITY_IGNORED:
2620#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT 2661#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
2621 case KVM_CAP_ASSIGN_DEV_IRQ: 2662 case KVM_CAP_ASSIGN_DEV_IRQ:
2622 case KVM_CAP_PCI_2_3: 2663 case KVM_CAP_PCI_2_3:
@@ -3043,9 +3084,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
3043 * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility 3084 * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility
3044 * with old userspace. 3085 * with old userspace.
3045 */ 3086 */
3046 if (xstate_bv & ~KVM_SUPPORTED_XCR0) 3087 if (xstate_bv & ~kvm_supported_xcr0())
3047 return -EINVAL;
3048 if (xstate_bv & ~host_xcr0)
3049 return -EINVAL; 3088 return -EINVAL;
3050 memcpy(&vcpu->arch.guest_fpu.state->xsave, 3089 memcpy(&vcpu->arch.guest_fpu.state->xsave,
3051 guest_xsave->region, vcpu->arch.guest_xstate_size); 3090 guest_xsave->region, vcpu->arch.guest_xstate_size);
@@ -3898,6 +3937,23 @@ static void kvm_init_msr_list(void)
3898 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) { 3937 for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
3899 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) 3938 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
3900 continue; 3939 continue;
3940
3941 /*
3942 * Even MSRs that are valid in the host may not be exposed
3943 * to the guests in some cases. We could work around this
3944 * in VMX with the generic MSR save/load machinery, but it
3945 * is not really worthwhile since it will really only
3946 * happen with nested virtualization.
3947 */
3948 switch (msrs_to_save[i]) {
3949 case MSR_IA32_BNDCFGS:
3950 if (!kvm_x86_ops->mpx_supported())
3951 continue;
3952 break;
3953 default:
3954 break;
3955 }
3956
3901 if (j < i) 3957 if (j < i)
3902 msrs_to_save[j] = msrs_to_save[i]; 3958 msrs_to_save[j] = msrs_to_save[i];
3903 j++; 3959 j++;
@@ -4394,6 +4450,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
4394 if (!exchanged) 4450 if (!exchanged)
4395 return X86EMUL_CMPXCHG_FAILED; 4451 return X86EMUL_CMPXCHG_FAILED;
4396 4452
4453 mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT);
4397 kvm_mmu_pte_write(vcpu, gpa, new, bytes); 4454 kvm_mmu_pte_write(vcpu, gpa, new, bytes);
4398 4455
4399 return X86EMUL_CONTINUE; 4456 return X86EMUL_CONTINUE;
@@ -5537,9 +5594,10 @@ int kvm_arch_init(void *opaque)
5537 goto out_free_percpu; 5594 goto out_free_percpu;
5538 5595
5539 kvm_set_mmio_spte_mask(); 5596 kvm_set_mmio_spte_mask();
5540 kvm_init_msr_list();
5541 5597
5542 kvm_x86_ops = ops; 5598 kvm_x86_ops = ops;
5599 kvm_init_msr_list();
5600
5543 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, 5601 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
5544 PT_DIRTY_MASK, PT64_NX_MASK, 0); 5602 PT_DIRTY_MASK, PT64_NX_MASK, 0);
5545 5603
@@ -5782,8 +5840,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
5782 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); 5840 kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
5783} 5841}
5784 5842
5785static void inject_pending_event(struct kvm_vcpu *vcpu) 5843static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
5786{ 5844{
5845 int r;
5846
5787 /* try to reinject previous events if any */ 5847 /* try to reinject previous events if any */
5788 if (vcpu->arch.exception.pending) { 5848 if (vcpu->arch.exception.pending) {
5789 trace_kvm_inj_exception(vcpu->arch.exception.nr, 5849 trace_kvm_inj_exception(vcpu->arch.exception.nr,
@@ -5793,17 +5853,23 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
5793 vcpu->arch.exception.has_error_code, 5853 vcpu->arch.exception.has_error_code,
5794 vcpu->arch.exception.error_code, 5854 vcpu->arch.exception.error_code,
5795 vcpu->arch.exception.reinject); 5855 vcpu->arch.exception.reinject);
5796 return; 5856 return 0;
5797 } 5857 }
5798 5858
5799 if (vcpu->arch.nmi_injected) { 5859 if (vcpu->arch.nmi_injected) {
5800 kvm_x86_ops->set_nmi(vcpu); 5860 kvm_x86_ops->set_nmi(vcpu);
5801 return; 5861 return 0;
5802 } 5862 }
5803 5863
5804 if (vcpu->arch.interrupt.pending) { 5864 if (vcpu->arch.interrupt.pending) {
5805 kvm_x86_ops->set_irq(vcpu); 5865 kvm_x86_ops->set_irq(vcpu);
5806 return; 5866 return 0;
5867 }
5868
5869 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
5870 r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
5871 if (r != 0)
5872 return r;
5807 } 5873 }
5808 5874
5809 /* try to inject new event if pending */ 5875 /* try to inject new event if pending */
@@ -5820,6 +5886,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
5820 kvm_x86_ops->set_irq(vcpu); 5886 kvm_x86_ops->set_irq(vcpu);
5821 } 5887 }
5822 } 5888 }
5889 return 0;
5823} 5890}
5824 5891
5825static void process_nmi(struct kvm_vcpu *vcpu) 5892static void process_nmi(struct kvm_vcpu *vcpu)
@@ -5924,15 +5991,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5924 goto out; 5991 goto out;
5925 } 5992 }
5926 5993
5927 inject_pending_event(vcpu); 5994 if (inject_pending_event(vcpu, req_int_win) != 0)
5928 5995 req_immediate_exit = true;
5929 /* enable NMI/IRQ window open exits if needed */ 5996 /* enable NMI/IRQ window open exits if needed */
5930 if (vcpu->arch.nmi_pending) 5997 else if (vcpu->arch.nmi_pending)
5931 req_immediate_exit = 5998 kvm_x86_ops->enable_nmi_window(vcpu);
5932 kvm_x86_ops->enable_nmi_window(vcpu) != 0;
5933 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) 5999 else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
5934 req_immediate_exit = 6000 kvm_x86_ops->enable_irq_window(vcpu);
5935 kvm_x86_ops->enable_irq_window(vcpu) != 0;
5936 6001
5937 if (kvm_lapic_enabled(vcpu)) { 6002 if (kvm_lapic_enabled(vcpu)) {
5938 /* 6003 /*
@@ -5992,12 +6057,28 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
5992 set_debugreg(vcpu->arch.eff_db[1], 1); 6057 set_debugreg(vcpu->arch.eff_db[1], 1);
5993 set_debugreg(vcpu->arch.eff_db[2], 2); 6058 set_debugreg(vcpu->arch.eff_db[2], 2);
5994 set_debugreg(vcpu->arch.eff_db[3], 3); 6059 set_debugreg(vcpu->arch.eff_db[3], 3);
6060 set_debugreg(vcpu->arch.dr6, 6);
5995 } 6061 }
5996 6062
5997 trace_kvm_entry(vcpu->vcpu_id); 6063 trace_kvm_entry(vcpu->vcpu_id);
5998 kvm_x86_ops->run(vcpu); 6064 kvm_x86_ops->run(vcpu);
5999 6065
6000 /* 6066 /*
6067 * Do this here before restoring debug registers on the host. And
6068 * since we do this before handling the vmexit, a DR access vmexit
6069 * can (a) read the correct value of the debug registers, (b) set
6070 * KVM_DEBUGREG_WONT_EXIT again.
6071 */
6072 if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
6073 int i;
6074
6075 WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
6076 kvm_x86_ops->sync_dirty_debug_regs(vcpu);
6077 for (i = 0; i < KVM_NR_DB_REGS; i++)
6078 vcpu->arch.eff_db[i] = vcpu->arch.db[i];
6079 }
6080
6081 /*
6001 * If the guest has used debug registers, at least dr7 6082 * If the guest has used debug registers, at least dr7
6002 * will be disabled while returning to the host. 6083 * will be disabled while returning to the host.
6003 * If we don't have active breakpoints in the host, we don't 6084 * If we don't have active breakpoints in the host, we don't
@@ -6711,6 +6792,7 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
6711{ 6792{
6712 int r; 6793 int r;
6713 struct msr_data msr; 6794 struct msr_data msr;
6795 struct kvm *kvm = vcpu->kvm;
6714 6796
6715 r = vcpu_load(vcpu); 6797 r = vcpu_load(vcpu);
6716 if (r) 6798 if (r)
@@ -6721,6 +6803,9 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
6721 kvm_write_tsc(vcpu, &msr); 6803 kvm_write_tsc(vcpu, &msr);
6722 vcpu_put(vcpu); 6804 vcpu_put(vcpu);
6723 6805
6806 schedule_delayed_work(&kvm->arch.kvmclock_sync_work,
6807 KVMCLOCK_SYNC_PERIOD);
6808
6724 return r; 6809 return r;
6725} 6810}
6726 6811
@@ -7013,6 +7098,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
7013 7098
7014 pvclock_update_vm_gtod_copy(kvm); 7099 pvclock_update_vm_gtod_copy(kvm);
7015 7100
7101 INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
7102 INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
7103
7016 return 0; 7104 return 0;
7017} 7105}
7018 7106
@@ -7050,6 +7138,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
7050 7138
7051void kvm_arch_sync_events(struct kvm *kvm) 7139void kvm_arch_sync_events(struct kvm *kvm)
7052{ 7140{
7141 cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
7142 cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
7053 kvm_free_all_assigned_devices(kvm); 7143 kvm_free_all_assigned_devices(kvm);
7054 kvm_free_pit(kvm); 7144 kvm_free_pit(kvm);
7055} 7145}
@@ -7248,6 +7338,9 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
7248 7338
7249int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 7339int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
7250{ 7340{
7341 if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
7342 kvm_x86_ops->check_nested_events(vcpu, false);
7343
7251 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && 7344 return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
7252 !vcpu->arch.apf.halted) 7345 !vcpu->arch.apf.halted)
7253 || !list_empty_careful(&vcpu->async_pf.done) 7346 || !list_empty_careful(&vcpu->async_pf.done)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 8da5823bcde6..8c97bac9a895 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -122,9 +122,12 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
122 gva_t addr, void *val, unsigned int bytes, 122 gva_t addr, void *val, unsigned int bytes,
123 struct x86_exception *exception); 123 struct x86_exception *exception);
124 124
125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) 125#define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
126 | XSTATE_BNDREGS | XSTATE_BNDCSR)
126extern u64 host_xcr0; 127extern u64 host_xcr0;
127 128
129extern u64 kvm_supported_xcr0(void);
130
128extern unsigned int min_timer_period_us; 131extern unsigned int min_timer_period_us;
129 132
130extern struct static_key kvm_no_apic_vcpu; 133extern struct static_key kvm_no_apic_vcpu;