aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-08-25 20:46:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-08-25 20:46:23 -0400
commit67a3b5cb33633f39db8809ae56c8c1752b541daa (patch)
treea439b19325452b22aacee03a5ce34125f55ec5c6
parent17e34c4fd0be14e989b08734b302cd357126fe2d (diff)
parent47c5310a8dbe7c2cb9f0083daa43ceed76c257fa (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull Paolo Bonzini: "Bugfixes for x86, PPC and s390" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: PPC: Book3S: Fix race and leak in kvm_vm_ioctl_create_spapr_tce() KVM, pkeys: do not use PKRU value in vcpu->arch.guest_fpu.state KVM: x86: simplify handling of PKRU KVM: x86: block guest protection keys unless the host has them enabled KVM: PPC: Book3S HV: Add missing barriers to XIVE code and document them KVM: PPC: Book3S HV: Workaround POWER9 DD1.0 bug causing IPB bit loss KVM: PPC: Book3S HV: Use msgsync with hypervisor doorbells on POWER9 KVM: s390: sthyi: fix specification exception detection KVM: s390: sthyi: fix sthyi inline assembly
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c56
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S3
-rw-r--r--arch/powerpc/kvm/book3s_xive_template.c68
-rw-r--r--arch/s390/kvm/sthyi.c7
-rw-r--r--arch/x86/include/asm/fpu/internal.h6
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h5
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/svm.c7
-rw-r--r--arch/x86/kvm/vmx.c25
-rw-r--r--arch/x86/kvm/x86.c17
12 files changed, 135 insertions, 64 deletions
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index a160c14304eb..53766e2bc029 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -294,32 +294,26 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
294 struct kvm_create_spapr_tce_64 *args) 294 struct kvm_create_spapr_tce_64 *args)
295{ 295{
296 struct kvmppc_spapr_tce_table *stt = NULL; 296 struct kvmppc_spapr_tce_table *stt = NULL;
297 struct kvmppc_spapr_tce_table *siter;
297 unsigned long npages, size; 298 unsigned long npages, size;
298 int ret = -ENOMEM; 299 int ret = -ENOMEM;
299 int i; 300 int i;
301 int fd = -1;
300 302
301 if (!args->size) 303 if (!args->size)
302 return -EINVAL; 304 return -EINVAL;
303 305
304 /* Check this LIOBN hasn't been previously allocated */
305 list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
306 if (stt->liobn == args->liobn)
307 return -EBUSY;
308 }
309
310 size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); 306 size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
311 npages = kvmppc_tce_pages(size); 307 npages = kvmppc_tce_pages(size);
312 ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); 308 ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
313 if (ret) { 309 if (ret)
314 stt = NULL; 310 return ret;
315 goto fail;
316 }
317 311
318 ret = -ENOMEM; 312 ret = -ENOMEM;
319 stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *), 313 stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
320 GFP_KERNEL); 314 GFP_KERNEL);
321 if (!stt) 315 if (!stt)
322 goto fail; 316 goto fail_acct;
323 317
324 stt->liobn = args->liobn; 318 stt->liobn = args->liobn;
325 stt->page_shift = args->page_shift; 319 stt->page_shift = args->page_shift;
@@ -334,24 +328,42 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
334 goto fail; 328 goto fail;
335 } 329 }
336 330
337 kvm_get_kvm(kvm); 331 ret = fd = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
332 stt, O_RDWR | O_CLOEXEC);
333 if (ret < 0)
334 goto fail;
338 335
339 mutex_lock(&kvm->lock); 336 mutex_lock(&kvm->lock);
340 list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); 337
338 /* Check this LIOBN hasn't been previously allocated */
339 ret = 0;
340 list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
341 if (siter->liobn == args->liobn) {
342 ret = -EBUSY;
343 break;
344 }
345 }
346
347 if (!ret) {
348 list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
349 kvm_get_kvm(kvm);
350 }
341 351
342 mutex_unlock(&kvm->lock); 352 mutex_unlock(&kvm->lock);
343 353
344 return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, 354 if (!ret)
345 stt, O_RDWR | O_CLOEXEC); 355 return fd;
346 356
347fail: 357 put_unused_fd(fd);
348 if (stt) {
349 for (i = 0; i < npages; i++)
350 if (stt->pages[i])
351 __free_page(stt->pages[i]);
352 358
353 kfree(stt); 359 fail:
354 } 360 for (i = 0; i < npages; i++)
361 if (stt->pages[i])
362 __free_page(stt->pages[i]);
363
364 kfree(stt);
365 fail_acct:
366 kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
355 return ret; 367 return ret;
356} 368}
357 369
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index c52184a8efdf..9c9c983b864f 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1291,6 +1291,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
1291 /* Hypervisor doorbell - exit only if host IPI flag set */ 1291 /* Hypervisor doorbell - exit only if host IPI flag set */
1292 cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL 1292 cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
1293 bne 3f 1293 bne 3f
1294BEGIN_FTR_SECTION
1295 PPC_MSGSYNC
1296END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1294 lbz r0, HSTATE_HOST_IPI(r13) 1297 lbz r0, HSTATE_HOST_IPI(r13)
1295 cmpwi r0, 0 1298 cmpwi r0, 0
1296 beq 4f 1299 beq 4f
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 4636ca6e7d38..d1ed2c41b5d2 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -16,7 +16,22 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
16 u8 cppr; 16 u8 cppr;
17 u16 ack; 17 u16 ack;
18 18
19 /* XXX DD1 bug workaround: Check PIPR vs. CPPR first ! */ 19 /*
20 * Ensure any previous store to CPPR is ordered vs.
21 * the subsequent loads from PIPR or ACK.
22 */
23 eieio();
24
25 /*
26 * DD1 bug workaround: If PIPR is less favored than CPPR
27 * ignore the interrupt or we might incorrectly lose an IPB
28 * bit.
29 */
30 if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
31 u8 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR);
32 if (pipr >= xc->hw_cppr)
33 return;
34 }
20 35
21 /* Perform the acknowledge OS to register cycle. */ 36 /* Perform the acknowledge OS to register cycle. */
22 ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG)); 37 ack = be16_to_cpu(__x_readw(__x_tima + TM_SPC_ACK_OS_REG));
@@ -235,6 +250,11 @@ skip_ipi:
235 /* 250 /*
236 * If we found an interrupt, adjust what the guest CPPR should 251 * If we found an interrupt, adjust what the guest CPPR should
237 * be as if we had just fetched that interrupt from HW. 252 * be as if we had just fetched that interrupt from HW.
253 *
254 * Note: This can only make xc->cppr smaller as the previous
255 * loop will only exit with hirq != 0 if prio is lower than
256 * the current xc->cppr. Thus we don't need to re-check xc->mfrr
257 * for pending IPIs.
238 */ 258 */
239 if (hirq) 259 if (hirq)
240 xc->cppr = prio; 260 xc->cppr = prio;
@@ -381,6 +401,12 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
381 xc->cppr = cppr; 401 xc->cppr = cppr;
382 402
383 /* 403 /*
404 * Order the above update of xc->cppr with the subsequent
405 * read of xc->mfrr inside push_pending_to_hw()
406 */
407 smp_mb();
408
409 /*
384 * We are masking less, we need to look for pending things 410 * We are masking less, we need to look for pending things
385 * to deliver and set VP pending bits accordingly to trigger 411 * to deliver and set VP pending bits accordingly to trigger
386 * a new interrupt otherwise we might miss MFRR changes for 412 * a new interrupt otherwise we might miss MFRR changes for
@@ -420,21 +446,37 @@ X_STATIC int GLUE(X_PFX,h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr)
420 * used to signal MFRR changes is EOId when fetched from 446 * used to signal MFRR changes is EOId when fetched from
421 * the queue. 447 * the queue.
422 */ 448 */
423 if (irq == XICS_IPI || irq == 0) 449 if (irq == XICS_IPI || irq == 0) {
450 /*
451 * This barrier orders the setting of xc->cppr vs.
452 * subsquent test of xc->mfrr done inside
453 * scan_interrupts and push_pending_to_hw
454 */
455 smp_mb();
424 goto bail; 456 goto bail;
457 }
425 458
426 /* Find interrupt source */ 459 /* Find interrupt source */
427 sb = kvmppc_xive_find_source(xive, irq, &src); 460 sb = kvmppc_xive_find_source(xive, irq, &src);
428 if (!sb) { 461 if (!sb) {
429 pr_devel(" source not found !\n"); 462 pr_devel(" source not found !\n");
430 rc = H_PARAMETER; 463 rc = H_PARAMETER;
464 /* Same as above */
465 smp_mb();
431 goto bail; 466 goto bail;
432 } 467 }
433 state = &sb->irq_state[src]; 468 state = &sb->irq_state[src];
434 kvmppc_xive_select_irq(state, &hw_num, &xd); 469 kvmppc_xive_select_irq(state, &hw_num, &xd);
435 470
436 state->in_eoi = true; 471 state->in_eoi = true;
437 mb(); 472
473 /*
474 * This barrier orders both setting of in_eoi above vs,
475 * subsequent test of guest_priority, and the setting
476 * of xc->cppr vs. subsquent test of xc->mfrr done inside
477 * scan_interrupts and push_pending_to_hw
478 */
479 smp_mb();
438 480
439again: 481again:
440 if (state->guest_priority == MASKED) { 482 if (state->guest_priority == MASKED) {
@@ -461,6 +503,14 @@ again:
461 503
462 } 504 }
463 505
506 /*
507 * This barrier orders the above guest_priority check
508 * and spin_lock/unlock with clearing in_eoi below.
509 *
510 * It also has to be a full mb() as it must ensure
511 * the MMIOs done in source_eoi() are completed before
512 * state->in_eoi is visible.
513 */
464 mb(); 514 mb();
465 state->in_eoi = false; 515 state->in_eoi = false;
466bail: 516bail:
@@ -495,6 +545,18 @@ X_STATIC int GLUE(X_PFX,h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
495 /* Locklessly write over MFRR */ 545 /* Locklessly write over MFRR */
496 xc->mfrr = mfrr; 546 xc->mfrr = mfrr;
497 547
548 /*
549 * The load of xc->cppr below and the subsequent MMIO store
550 * to the IPI must happen after the above mfrr update is
551 * globally visible so that:
552 *
553 * - Synchronize with another CPU doing an H_EOI or a H_CPPR
554 * updating xc->cppr then reading xc->mfrr.
555 *
556 * - The target of the IPI sees the xc->mfrr update
557 */
558 mb();
559
498 /* Shoot the IPI if most favored than target cppr */ 560 /* Shoot the IPI if most favored than target cppr */
499 if (mfrr < xc->cppr) 561 if (mfrr < xc->cppr)
500 __x_writeq(0, __x_trig_page(&xc->vp_ipi_data)); 562 __x_writeq(0, __x_trig_page(&xc->vp_ipi_data));
diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c
index 926b5244263e..a2e5c24f47a7 100644
--- a/arch/s390/kvm/sthyi.c
+++ b/arch/s390/kvm/sthyi.c
@@ -394,7 +394,7 @@ static int sthyi(u64 vaddr)
394 "srl %[cc],28\n" 394 "srl %[cc],28\n"
395 : [cc] "=d" (cc) 395 : [cc] "=d" (cc)
396 : [code] "d" (code), [addr] "a" (addr) 396 : [code] "d" (code), [addr] "a" (addr)
397 : "memory", "cc"); 397 : "3", "memory", "cc");
398 return cc; 398 return cc;
399} 399}
400 400
@@ -425,7 +425,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
425 VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr); 425 VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr);
426 trace_kvm_s390_handle_sthyi(vcpu, code, addr); 426 trace_kvm_s390_handle_sthyi(vcpu, code, addr);
427 427
428 if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK) 428 if (reg1 == reg2 || reg1 & 1 || reg2 & 1)
429 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); 429 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
430 430
431 if (code & 0xffff) { 431 if (code & 0xffff) {
@@ -433,6 +433,9 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
433 goto out; 433 goto out;
434 } 434 }
435 435
436 if (addr & ~PAGE_MASK)
437 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
438
436 /* 439 /*
437 * If the page has not yet been faulted in, we want to do that 440 * If the page has not yet been faulted in, we want to do that
438 * now and not after all the expensive calculations. 441 * now and not after all the expensive calculations.
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 255645f60ca2..554cdb205d17 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -450,10 +450,10 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
450 return 0; 450 return 0;
451} 451}
452 452
453static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate) 453static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
454{ 454{
455 if (use_xsave()) { 455 if (use_xsave()) {
456 copy_kernel_to_xregs(&fpstate->xsave, -1); 456 copy_kernel_to_xregs(&fpstate->xsave, mask);
457 } else { 457 } else {
458 if (use_fxsr()) 458 if (use_fxsr())
459 copy_kernel_to_fxregs(&fpstate->fxsave); 459 copy_kernel_to_fxregs(&fpstate->fxsave);
@@ -477,7 +477,7 @@ static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
477 : : [addr] "m" (fpstate)); 477 : : [addr] "m" (fpstate));
478 } 478 }
479 479
480 __copy_kernel_to_fpregs(fpstate); 480 __copy_kernel_to_fpregs(fpstate, -1);
481} 481}
482 482
483extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); 483extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 87ac4fba6d8e..f4d120a3e22e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -492,6 +492,7 @@ struct kvm_vcpu_arch {
492 unsigned long cr4; 492 unsigned long cr4;
493 unsigned long cr4_guest_owned_bits; 493 unsigned long cr4_guest_owned_bits;
494 unsigned long cr8; 494 unsigned long cr8;
495 u32 pkru;
495 u32 hflags; 496 u32 hflags;
496 u64 efer; 497 u64 efer;
497 u64 apic_base; 498 u64 apic_base;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 59ca2eea522c..19adbb418443 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -469,7 +469,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
469 entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; 469 entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
470 cpuid_mask(&entry->ecx, CPUID_7_ECX); 470 cpuid_mask(&entry->ecx, CPUID_7_ECX);
471 /* PKU is not yet implemented for shadow paging. */ 471 /* PKU is not yet implemented for shadow paging. */
472 if (!tdp_enabled) 472 if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
473 entry->ecx &= ~F(PKU); 473 entry->ecx &= ~F(PKU);
474 entry->edx &= kvm_cpuid_7_0_edx_x86_features; 474 entry->edx &= kvm_cpuid_7_0_edx_x86_features;
475 entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX); 475 entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX);
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 762cdf2595f9..e1e89ee4af75 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -84,11 +84,6 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
84 | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); 84 | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
85} 85}
86 86
87static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu)
88{
89 return kvm_x86_ops->get_pkru(vcpu);
90}
91
92static inline void enter_guest_mode(struct kvm_vcpu *vcpu) 87static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
93{ 88{
94 vcpu->arch.hflags |= HF_GUEST_MASK; 89 vcpu->arch.hflags |= HF_GUEST_MASK;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index d7d248a000dd..4b9a3ae6b725 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -185,7 +185,7 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
185 * index of the protection domain, so pte_pkey * 2 is 185 * index of the protection domain, so pte_pkey * 2 is
186 * is the index of the first bit for the domain. 186 * is the index of the first bit for the domain.
187 */ 187 */
188 pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3; 188 pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3;
189 189
190 /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */ 190 /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
191 offset = (pfec & ~1) + 191 offset = (pfec & ~1) +
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 56ba05312759..af256b786a70 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1777,11 +1777,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1777 to_svm(vcpu)->vmcb->save.rflags = rflags; 1777 to_svm(vcpu)->vmcb->save.rflags = rflags;
1778} 1778}
1779 1779
1780static u32 svm_get_pkru(struct kvm_vcpu *vcpu)
1781{
1782 return 0;
1783}
1784
1785static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) 1780static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
1786{ 1781{
1787 switch (reg) { 1782 switch (reg) {
@@ -5413,8 +5408,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
5413 .get_rflags = svm_get_rflags, 5408 .get_rflags = svm_get_rflags,
5414 .set_rflags = svm_set_rflags, 5409 .set_rflags = svm_set_rflags,
5415 5410
5416 .get_pkru = svm_get_pkru,
5417
5418 .tlb_flush = svm_flush_tlb, 5411 .tlb_flush = svm_flush_tlb,
5419 5412
5420 .run = svm_vcpu_run, 5413 .run = svm_vcpu_run,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9b21b1223035..c6ef2940119b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -636,8 +636,6 @@ struct vcpu_vmx {
636 636
637 u64 current_tsc_ratio; 637 u64 current_tsc_ratio;
638 638
639 bool guest_pkru_valid;
640 u32 guest_pkru;
641 u32 host_pkru; 639 u32 host_pkru;
642 640
643 /* 641 /*
@@ -2383,11 +2381,6 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
2383 to_vmx(vcpu)->emulation_required = emulation_required(vcpu); 2381 to_vmx(vcpu)->emulation_required = emulation_required(vcpu);
2384} 2382}
2385 2383
2386static u32 vmx_get_pkru(struct kvm_vcpu *vcpu)
2387{
2388 return to_vmx(vcpu)->guest_pkru;
2389}
2390
2391static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) 2384static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
2392{ 2385{
2393 u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 2386 u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
@@ -9020,8 +9013,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9020 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) 9013 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
9021 vmx_set_interrupt_shadow(vcpu, 0); 9014 vmx_set_interrupt_shadow(vcpu, 0);
9022 9015
9023 if (vmx->guest_pkru_valid) 9016 if (static_cpu_has(X86_FEATURE_PKU) &&
9024 __write_pkru(vmx->guest_pkru); 9017 kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
9018 vcpu->arch.pkru != vmx->host_pkru)
9019 __write_pkru(vcpu->arch.pkru);
9025 9020
9026 atomic_switch_perf_msrs(vmx); 9021 atomic_switch_perf_msrs(vmx);
9027 debugctlmsr = get_debugctlmsr(); 9022 debugctlmsr = get_debugctlmsr();
@@ -9169,13 +9164,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
9169 * back on host, so it is safe to read guest PKRU from current 9164 * back on host, so it is safe to read guest PKRU from current
9170 * XSAVE. 9165 * XSAVE.
9171 */ 9166 */
9172 if (boot_cpu_has(X86_FEATURE_OSPKE)) { 9167 if (static_cpu_has(X86_FEATURE_PKU) &&
9173 vmx->guest_pkru = __read_pkru(); 9168 kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
9174 if (vmx->guest_pkru != vmx->host_pkru) { 9169 vcpu->arch.pkru = __read_pkru();
9175 vmx->guest_pkru_valid = true; 9170 if (vcpu->arch.pkru != vmx->host_pkru)
9176 __write_pkru(vmx->host_pkru); 9171 __write_pkru(vmx->host_pkru);
9177 } else
9178 vmx->guest_pkru_valid = false;
9179 } 9172 }
9180 9173
9181 /* 9174 /*
@@ -11682,8 +11675,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
11682 .get_rflags = vmx_get_rflags, 11675 .get_rflags = vmx_get_rflags,
11683 .set_rflags = vmx_set_rflags, 11676 .set_rflags = vmx_set_rflags,
11684 11677
11685 .get_pkru = vmx_get_pkru,
11686
11687 .tlb_flush = vmx_flush_tlb, 11678 .tlb_flush = vmx_flush_tlb,
11688 11679
11689 .run = vmx_vcpu_run, 11680 .run = vmx_vcpu_run,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d734aa8c5b4f..05a5e57c6f39 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3245,7 +3245,12 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
3245 u32 size, offset, ecx, edx; 3245 u32 size, offset, ecx, edx;
3246 cpuid_count(XSTATE_CPUID, index, 3246 cpuid_count(XSTATE_CPUID, index,
3247 &size, &offset, &ecx, &edx); 3247 &size, &offset, &ecx, &edx);
3248 memcpy(dest + offset, src, size); 3248 if (feature == XFEATURE_MASK_PKRU)
3249 memcpy(dest + offset, &vcpu->arch.pkru,
3250 sizeof(vcpu->arch.pkru));
3251 else
3252 memcpy(dest + offset, src, size);
3253
3249 } 3254 }
3250 3255
3251 valid -= feature; 3256 valid -= feature;
@@ -3283,7 +3288,11 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
3283 u32 size, offset, ecx, edx; 3288 u32 size, offset, ecx, edx;
3284 cpuid_count(XSTATE_CPUID, index, 3289 cpuid_count(XSTATE_CPUID, index,
3285 &size, &offset, &ecx, &edx); 3290 &size, &offset, &ecx, &edx);
3286 memcpy(dest, src + offset, size); 3291 if (feature == XFEATURE_MASK_PKRU)
3292 memcpy(&vcpu->arch.pkru, src + offset,
3293 sizeof(vcpu->arch.pkru));
3294 else
3295 memcpy(dest, src + offset, size);
3287 } 3296 }
3288 3297
3289 valid -= feature; 3298 valid -= feature;
@@ -7633,7 +7642,9 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
7633 */ 7642 */
7634 vcpu->guest_fpu_loaded = 1; 7643 vcpu->guest_fpu_loaded = 1;
7635 __kernel_fpu_begin(); 7644 __kernel_fpu_begin();
7636 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state); 7645 /* PKRU is separately restored in kvm_x86_ops->run. */
7646 __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
7647 ~XFEATURE_MASK_PKRU);
7637 trace_kvm_fpu(1); 7648 trace_kvm_fpu(1);
7638} 7649}
7639 7650