aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-24 15:42:55 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-24 15:42:55 -0400
commit96971e9aa9578322648b2de593fd4863f3d9fc39 (patch)
treef2320a488e63583664692e94f37edc9f07174d8a
parent20ca57cde5557d8623af8cbf81a17733bbbce3a6 (diff)
parent571ee1b6859869a09ed718d390aac2b9414646a2 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "This is a pretty large update. I think it is roughly as big as what I usually had for the _whole_ rc period. There are a few bad bugs where the guest can OOPS or crash the host. We have also started looking at attack models for nested virtualization; bugs that usually result in the guest ring 0 crashing itself become more worrisome if you have nested virtualization, because the nested guest might bring down the non-nested guest as well. For current uses of nested virtualization these do not really have a security impact, but you never know and bugs are bugs nevertheless. A lot of these bugs are in 3.17 too, resulting in a large number of stable@ Ccs. I checked that all the patches apply there with no conflicts" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: kvm: vfio: fix unregister kvm_device_ops of vfio KVM: x86: Wrong assertion on paging_tmpl.h kvm: fix excessive pages un-pinning in kvm_iommu_map error path. KVM: x86: PREFETCH and HINT_NOP should have SrcMem flag KVM: x86: Emulator does not decode clflush well KVM: emulate: avoid accessing NULL ctxt->memopp KVM: x86: Decoding guest instructions which cross page boundary may fail kvm: x86: don't kill guest on unknown exit reason kvm: vmx: handle invvpid vm exit gracefully KVM: x86: Handle errors when RIP is set during far jumps KVM: x86: Emulator fixes for eip canonical checks on near branches KVM: x86: Fix wrong masking on relative jump/call KVM: x86: Improve thread safety in pit KVM: x86: Prevent host from panicking on shared MSR writes. KVM: x86: Check non-canonical addresses upon WRMSR
-rw-r--r--arch/x86/include/asm/kvm_host.h16
-rw-r--r--arch/x86/include/uapi/asm/vmx.h2
-rw-r--r--arch/x86/kvm/emulate.c250
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/svm.c8
-rw-r--r--arch/x86/kvm/vmx.c24
-rw-r--r--arch/x86/kvm/x86.c38
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--virt/kvm/iommu.c8
-rw-r--r--virt/kvm/kvm_main.c7
-rw-r--r--virt/kvm/vfio.c5
-rw-r--r--virt/kvm/vfio.h4
13 files changed, 282 insertions, 85 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7d603a71ab3a..6ed0c30d6a0c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -989,6 +989,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
989 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); 989 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
990} 990}
991 991
992static inline u64 get_canonical(u64 la)
993{
994 return ((int64_t)la << 16) >> 16;
995}
996
997static inline bool is_noncanonical_address(u64 la)
998{
999#ifdef CONFIG_X86_64
1000 return get_canonical(la) != la;
1001#else
1002 return false;
1003#endif
1004}
1005
992#define TSS_IOPB_BASE_OFFSET 0x66 1006#define TSS_IOPB_BASE_OFFSET 0x66
993#define TSS_BASE_SIZE 0x68 1007#define TSS_BASE_SIZE 0x68
994#define TSS_IOPB_SIZE (65536 / 8) 1008#define TSS_IOPB_SIZE (65536 / 8)
@@ -1050,7 +1064,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
1050 unsigned long address); 1064 unsigned long address);
1051 1065
1052void kvm_define_shared_msr(unsigned index, u32 msr); 1066void kvm_define_shared_msr(unsigned index, u32 msr);
1053void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); 1067int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
1054 1068
1055bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); 1069bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
1056 1070
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 0e79420376eb..990a2fe1588d 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
67#define EXIT_REASON_EPT_MISCONFIG 49 67#define EXIT_REASON_EPT_MISCONFIG 49
68#define EXIT_REASON_INVEPT 50 68#define EXIT_REASON_INVEPT 50
69#define EXIT_REASON_PREEMPTION_TIMER 52 69#define EXIT_REASON_PREEMPTION_TIMER 52
70#define EXIT_REASON_INVVPID 53
70#define EXIT_REASON_WBINVD 54 71#define EXIT_REASON_WBINVD 54
71#define EXIT_REASON_XSETBV 55 72#define EXIT_REASON_XSETBV 55
72#define EXIT_REASON_APIC_WRITE 56 73#define EXIT_REASON_APIC_WRITE 56
@@ -114,6 +115,7 @@
114 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ 115 { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
115 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ 116 { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
116 { EXIT_REASON_INVD, "INVD" }, \ 117 { EXIT_REASON_INVD, "INVD" }, \
118 { EXIT_REASON_INVVPID, "INVVPID" }, \
117 { EXIT_REASON_INVPCID, "INVPCID" } 119 { EXIT_REASON_INVPCID, "INVPCID" }
118 120
119#endif /* _UAPIVMX_H */ 121#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a46207a05835..749f9fa38254 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
504 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); 504 masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
505} 505}
506 506
507static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
508{
509 register_address_increment(ctxt, &ctxt->_eip, rel);
510}
511
512static u32 desc_limit_scaled(struct desc_struct *desc) 507static u32 desc_limit_scaled(struct desc_struct *desc)
513{ 508{
514 u32 limit = get_desc_limit(desc); 509 u32 limit = get_desc_limit(desc);
@@ -569,6 +564,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
569 return emulate_exception(ctxt, NM_VECTOR, 0, false); 564 return emulate_exception(ctxt, NM_VECTOR, 0, false);
570} 565}
571 566
567static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
568 int cs_l)
569{
570 switch (ctxt->op_bytes) {
571 case 2:
572 ctxt->_eip = (u16)dst;
573 break;
574 case 4:
575 ctxt->_eip = (u32)dst;
576 break;
577 case 8:
578 if ((cs_l && is_noncanonical_address(dst)) ||
579 (!cs_l && (dst & ~(u32)-1)))
580 return emulate_gp(ctxt, 0);
581 ctxt->_eip = dst;
582 break;
583 default:
584 WARN(1, "unsupported eip assignment size\n");
585 }
586 return X86EMUL_CONTINUE;
587}
588
589static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
590{
591 return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
592}
593
594static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
595{
596 return assign_eip_near(ctxt, ctxt->_eip + rel);
597}
598
572static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) 599static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
573{ 600{
574 u16 selector; 601 u16 selector;
@@ -751,8 +778,10 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
751static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, 778static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
752 unsigned size) 779 unsigned size)
753{ 780{
754 if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size)) 781 unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr;
755 return __do_insn_fetch_bytes(ctxt, size); 782
783 if (unlikely(done_size < size))
784 return __do_insn_fetch_bytes(ctxt, size - done_size);
756 else 785 else
757 return X86EMUL_CONTINUE; 786 return X86EMUL_CONTINUE;
758} 787}
@@ -1416,7 +1445,9 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1416 1445
1417/* Does not support long mode */ 1446/* Does not support long mode */
1418static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, 1447static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1419 u16 selector, int seg, u8 cpl, bool in_task_switch) 1448 u16 selector, int seg, u8 cpl,
1449 bool in_task_switch,
1450 struct desc_struct *desc)
1420{ 1451{
1421 struct desc_struct seg_desc, old_desc; 1452 struct desc_struct seg_desc, old_desc;
1422 u8 dpl, rpl; 1453 u8 dpl, rpl;
@@ -1557,6 +1588,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1557 } 1588 }
1558load: 1589load:
1559 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); 1590 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1591 if (desc)
1592 *desc = seg_desc;
1560 return X86EMUL_CONTINUE; 1593 return X86EMUL_CONTINUE;
1561exception: 1594exception:
1562 return emulate_exception(ctxt, err_vec, err_code, true); 1595 return emulate_exception(ctxt, err_vec, err_code, true);
@@ -1566,7 +1599,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1566 u16 selector, int seg) 1599 u16 selector, int seg)
1567{ 1600{
1568 u8 cpl = ctxt->ops->cpl(ctxt); 1601 u8 cpl = ctxt->ops->cpl(ctxt);
1569 return __load_segment_descriptor(ctxt, selector, seg, cpl, false); 1602 return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL);
1570} 1603}
1571 1604
1572static void write_register_operand(struct operand *op) 1605static void write_register_operand(struct operand *op)
@@ -1960,17 +1993,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
1960static int em_jmp_far(struct x86_emulate_ctxt *ctxt) 1993static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
1961{ 1994{
1962 int rc; 1995 int rc;
1963 unsigned short sel; 1996 unsigned short sel, old_sel;
1997 struct desc_struct old_desc, new_desc;
1998 const struct x86_emulate_ops *ops = ctxt->ops;
1999 u8 cpl = ctxt->ops->cpl(ctxt);
2000
2001 /* Assignment of RIP may only fail in 64-bit mode */
2002 if (ctxt->mode == X86EMUL_MODE_PROT64)
2003 ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
2004 VCPU_SREG_CS);
1964 2005
1965 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); 2006 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
1966 2007
1967 rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); 2008 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
2009 &new_desc);
1968 if (rc != X86EMUL_CONTINUE) 2010 if (rc != X86EMUL_CONTINUE)
1969 return rc; 2011 return rc;
1970 2012
1971 ctxt->_eip = 0; 2013 rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
1972 memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); 2014 if (rc != X86EMUL_CONTINUE) {
1973 return X86EMUL_CONTINUE; 2015 WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
2016 /* assigning eip failed; restore the old cs */
2017 ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
2018 return rc;
2019 }
2020 return rc;
1974} 2021}
1975 2022
1976static int em_grp45(struct x86_emulate_ctxt *ctxt) 2023static int em_grp45(struct x86_emulate_ctxt *ctxt)
@@ -1981,13 +2028,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
1981 case 2: /* call near abs */ { 2028 case 2: /* call near abs */ {
1982 long int old_eip; 2029 long int old_eip;
1983 old_eip = ctxt->_eip; 2030 old_eip = ctxt->_eip;
1984 ctxt->_eip = ctxt->src.val; 2031 rc = assign_eip_near(ctxt, ctxt->src.val);
2032 if (rc != X86EMUL_CONTINUE)
2033 break;
1985 ctxt->src.val = old_eip; 2034 ctxt->src.val = old_eip;
1986 rc = em_push(ctxt); 2035 rc = em_push(ctxt);
1987 break; 2036 break;
1988 } 2037 }
1989 case 4: /* jmp abs */ 2038 case 4: /* jmp abs */
1990 ctxt->_eip = ctxt->src.val; 2039 rc = assign_eip_near(ctxt, ctxt->src.val);
1991 break; 2040 break;
1992 case 5: /* jmp far */ 2041 case 5: /* jmp far */
1993 rc = em_jmp_far(ctxt); 2042 rc = em_jmp_far(ctxt);
@@ -2022,30 +2071,47 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
2022 2071
2023static int em_ret(struct x86_emulate_ctxt *ctxt) 2072static int em_ret(struct x86_emulate_ctxt *ctxt)
2024{ 2073{
2025 ctxt->dst.type = OP_REG; 2074 int rc;
2026 ctxt->dst.addr.reg = &ctxt->_eip; 2075 unsigned long eip;
2027 ctxt->dst.bytes = ctxt->op_bytes; 2076
2028 return em_pop(ctxt); 2077 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2078 if (rc != X86EMUL_CONTINUE)
2079 return rc;
2080
2081 return assign_eip_near(ctxt, eip);
2029} 2082}
2030 2083
2031static int em_ret_far(struct x86_emulate_ctxt *ctxt) 2084static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2032{ 2085{
2033 int rc; 2086 int rc;
2034 unsigned long cs; 2087 unsigned long eip, cs;
2088 u16 old_cs;
2035 int cpl = ctxt->ops->cpl(ctxt); 2089 int cpl = ctxt->ops->cpl(ctxt);
2090 struct desc_struct old_desc, new_desc;
2091 const struct x86_emulate_ops *ops = ctxt->ops;
2036 2092
2037 rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); 2093 if (ctxt->mode == X86EMUL_MODE_PROT64)
2094 ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
2095 VCPU_SREG_CS);
2096
2097 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2038 if (rc != X86EMUL_CONTINUE) 2098 if (rc != X86EMUL_CONTINUE)
2039 return rc; 2099 return rc;
2040 if (ctxt->op_bytes == 4)
2041 ctxt->_eip = (u32)ctxt->_eip;
2042 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); 2100 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2043 if (rc != X86EMUL_CONTINUE) 2101 if (rc != X86EMUL_CONTINUE)
2044 return rc; 2102 return rc;
2045 /* Outer-privilege level return is not implemented */ 2103 /* Outer-privilege level return is not implemented */
2046 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) 2104 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2047 return X86EMUL_UNHANDLEABLE; 2105 return X86EMUL_UNHANDLEABLE;
2048 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); 2106 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
2107 &new_desc);
2108 if (rc != X86EMUL_CONTINUE)
2109 return rc;
2110 rc = assign_eip_far(ctxt, eip, new_desc.l);
2111 if (rc != X86EMUL_CONTINUE) {
2112 WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
2113 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
2114 }
2049 return rc; 2115 return rc;
2050} 2116}
2051 2117
@@ -2306,7 +2372,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2306{ 2372{
2307 const struct x86_emulate_ops *ops = ctxt->ops; 2373 const struct x86_emulate_ops *ops = ctxt->ops;
2308 struct desc_struct cs, ss; 2374 struct desc_struct cs, ss;
2309 u64 msr_data; 2375 u64 msr_data, rcx, rdx;
2310 int usermode; 2376 int usermode;
2311 u16 cs_sel = 0, ss_sel = 0; 2377 u16 cs_sel = 0, ss_sel = 0;
2312 2378
@@ -2322,6 +2388,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2322 else 2388 else
2323 usermode = X86EMUL_MODE_PROT32; 2389 usermode = X86EMUL_MODE_PROT32;
2324 2390
2391 rcx = reg_read(ctxt, VCPU_REGS_RCX);
2392 rdx = reg_read(ctxt, VCPU_REGS_RDX);
2393
2325 cs.dpl = 3; 2394 cs.dpl = 3;
2326 ss.dpl = 3; 2395 ss.dpl = 3;
2327 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); 2396 ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
@@ -2339,6 +2408,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2339 ss_sel = cs_sel + 8; 2408 ss_sel = cs_sel + 8;
2340 cs.d = 0; 2409 cs.d = 0;
2341 cs.l = 1; 2410 cs.l = 1;
2411 if (is_noncanonical_address(rcx) ||
2412 is_noncanonical_address(rdx))
2413 return emulate_gp(ctxt, 0);
2342 break; 2414 break;
2343 } 2415 }
2344 cs_sel |= SELECTOR_RPL_MASK; 2416 cs_sel |= SELECTOR_RPL_MASK;
@@ -2347,8 +2419,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
2347 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); 2419 ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
2348 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); 2420 ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
2349 2421
2350 ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); 2422 ctxt->_eip = rdx;
2351 *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); 2423 *reg_write(ctxt, VCPU_REGS_RSP) = rcx;
2352 2424
2353 return X86EMUL_CONTINUE; 2425 return X86EMUL_CONTINUE;
2354} 2426}
@@ -2466,19 +2538,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2466 * Now load segment descriptors. If fault happens at this stage 2538 * Now load segment descriptors. If fault happens at this stage
2467 * it is handled in a context of new task 2539 * it is handled in a context of new task
2468 */ 2540 */
2469 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true); 2541 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2542 true, NULL);
2470 if (ret != X86EMUL_CONTINUE) 2543 if (ret != X86EMUL_CONTINUE)
2471 return ret; 2544 return ret;
2472 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); 2545 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2546 true, NULL);
2473 if (ret != X86EMUL_CONTINUE) 2547 if (ret != X86EMUL_CONTINUE)
2474 return ret; 2548 return ret;
2475 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); 2549 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2550 true, NULL);
2476 if (ret != X86EMUL_CONTINUE) 2551 if (ret != X86EMUL_CONTINUE)
2477 return ret; 2552 return ret;
2478 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); 2553 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2554 true, NULL);
2479 if (ret != X86EMUL_CONTINUE) 2555 if (ret != X86EMUL_CONTINUE)
2480 return ret; 2556 return ret;
2481 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); 2557 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2558 true, NULL);
2482 if (ret != X86EMUL_CONTINUE) 2559 if (ret != X86EMUL_CONTINUE)
2483 return ret; 2560 return ret;
2484 2561
@@ -2603,25 +2680,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2603 * Now load segment descriptors. If fault happenes at this stage 2680 * Now load segment descriptors. If fault happenes at this stage
2604 * it is handled in a context of new task 2681 * it is handled in a context of new task
2605 */ 2682 */
2606 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true); 2683 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
2684 cpl, true, NULL);
2607 if (ret != X86EMUL_CONTINUE) 2685 if (ret != X86EMUL_CONTINUE)
2608 return ret; 2686 return ret;
2609 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); 2687 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2688 true, NULL);
2610 if (ret != X86EMUL_CONTINUE) 2689 if (ret != X86EMUL_CONTINUE)
2611 return ret; 2690 return ret;
2612 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); 2691 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2692 true, NULL);
2613 if (ret != X86EMUL_CONTINUE) 2693 if (ret != X86EMUL_CONTINUE)
2614 return ret; 2694 return ret;
2615 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); 2695 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2696 true, NULL);
2616 if (ret != X86EMUL_CONTINUE) 2697 if (ret != X86EMUL_CONTINUE)
2617 return ret; 2698 return ret;
2618 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); 2699 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2700 true, NULL);
2619 if (ret != X86EMUL_CONTINUE) 2701 if (ret != X86EMUL_CONTINUE)
2620 return ret; 2702 return ret;
2621 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true); 2703 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
2704 true, NULL);
2622 if (ret != X86EMUL_CONTINUE) 2705 if (ret != X86EMUL_CONTINUE)
2623 return ret; 2706 return ret;
2624 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true); 2707 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
2708 true, NULL);
2625 if (ret != X86EMUL_CONTINUE) 2709 if (ret != X86EMUL_CONTINUE)
2626 return ret; 2710 return ret;
2627 2711
@@ -2888,10 +2972,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
2888 2972
2889static int em_call(struct x86_emulate_ctxt *ctxt) 2973static int em_call(struct x86_emulate_ctxt *ctxt)
2890{ 2974{
2975 int rc;
2891 long rel = ctxt->src.val; 2976 long rel = ctxt->src.val;
2892 2977
2893 ctxt->src.val = (unsigned long)ctxt->_eip; 2978 ctxt->src.val = (unsigned long)ctxt->_eip;
2894 jmp_rel(ctxt, rel); 2979 rc = jmp_rel(ctxt, rel);
2980 if (rc != X86EMUL_CONTINUE)
2981 return rc;
2895 return em_push(ctxt); 2982 return em_push(ctxt);
2896} 2983}
2897 2984
@@ -2900,34 +2987,50 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
2900 u16 sel, old_cs; 2987 u16 sel, old_cs;
2901 ulong old_eip; 2988 ulong old_eip;
2902 int rc; 2989 int rc;
2990 struct desc_struct old_desc, new_desc;
2991 const struct x86_emulate_ops *ops = ctxt->ops;
2992 int cpl = ctxt->ops->cpl(ctxt);
2903 2993
2904 old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
2905 old_eip = ctxt->_eip; 2994 old_eip = ctxt->_eip;
2995 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
2906 2996
2907 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); 2997 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2908 if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) 2998 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false,
2999 &new_desc);
3000 if (rc != X86EMUL_CONTINUE)
2909 return X86EMUL_CONTINUE; 3001 return X86EMUL_CONTINUE;
2910 3002
2911 ctxt->_eip = 0; 3003 rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
2912 memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); 3004 if (rc != X86EMUL_CONTINUE)
3005 goto fail;
2913 3006
2914 ctxt->src.val = old_cs; 3007 ctxt->src.val = old_cs;
2915 rc = em_push(ctxt); 3008 rc = em_push(ctxt);
2916 if (rc != X86EMUL_CONTINUE) 3009 if (rc != X86EMUL_CONTINUE)
2917 return rc; 3010 goto fail;
2918 3011
2919 ctxt->src.val = old_eip; 3012 ctxt->src.val = old_eip;
2920 return em_push(ctxt); 3013 rc = em_push(ctxt);
3014 /* If we failed, we tainted the memory, but the very least we should
3015 restore cs */
3016 if (rc != X86EMUL_CONTINUE)
3017 goto fail;
3018 return rc;
3019fail:
3020 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3021 return rc;
3022
2921} 3023}
2922 3024
2923static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) 3025static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
2924{ 3026{
2925 int rc; 3027 int rc;
3028 unsigned long eip;
2926 3029
2927 ctxt->dst.type = OP_REG; 3030 rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
2928 ctxt->dst.addr.reg = &ctxt->_eip; 3031 if (rc != X86EMUL_CONTINUE)
2929 ctxt->dst.bytes = ctxt->op_bytes; 3032 return rc;
2930 rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); 3033 rc = assign_eip_near(ctxt, eip);
2931 if (rc != X86EMUL_CONTINUE) 3034 if (rc != X86EMUL_CONTINUE)
2932 return rc; 3035 return rc;
2933 rsp_increment(ctxt, ctxt->src.val); 3036 rsp_increment(ctxt, ctxt->src.val);
@@ -3254,20 +3357,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
3254 3357
3255static int em_loop(struct x86_emulate_ctxt *ctxt) 3358static int em_loop(struct x86_emulate_ctxt *ctxt)
3256{ 3359{
3360 int rc = X86EMUL_CONTINUE;
3361
3257 register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); 3362 register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
3258 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && 3363 if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
3259 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) 3364 (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
3260 jmp_rel(ctxt, ctxt->src.val); 3365 rc = jmp_rel(ctxt, ctxt->src.val);
3261 3366
3262 return X86EMUL_CONTINUE; 3367 return rc;
3263} 3368}
3264 3369
3265static int em_jcxz(struct x86_emulate_ctxt *ctxt) 3370static int em_jcxz(struct x86_emulate_ctxt *ctxt)
3266{ 3371{
3372 int rc = X86EMUL_CONTINUE;
3373
3267 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) 3374 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
3268 jmp_rel(ctxt, ctxt->src.val); 3375 rc = jmp_rel(ctxt, ctxt->src.val);
3269 3376
3270 return X86EMUL_CONTINUE; 3377 return rc;
3271} 3378}
3272 3379
3273static int em_in(struct x86_emulate_ctxt *ctxt) 3380static int em_in(struct x86_emulate_ctxt *ctxt)
@@ -3355,6 +3462,12 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt)
3355 return X86EMUL_CONTINUE; 3462 return X86EMUL_CONTINUE;
3356} 3463}
3357 3464
3465static int em_clflush(struct x86_emulate_ctxt *ctxt)
3466{
3467 /* emulating clflush regardless of cpuid */
3468 return X86EMUL_CONTINUE;
3469}
3470
3358static bool valid_cr(int nr) 3471static bool valid_cr(int nr)
3359{ 3472{
3360 switch (nr) { 3473 switch (nr) {
@@ -3693,6 +3806,16 @@ static const struct opcode group11[] = {
3693 X7(D(Undefined)), 3806 X7(D(Undefined)),
3694}; 3807};
3695 3808
3809static const struct gprefix pfx_0f_ae_7 = {
3810 I(SrcMem | ByteOp, em_clflush), N, N, N,
3811};
3812
3813static const struct group_dual group15 = { {
3814 N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
3815}, {
3816 N, N, N, N, N, N, N, N,
3817} };
3818
3696static const struct gprefix pfx_0f_6f_0f_7f = { 3819static const struct gprefix pfx_0f_6f_0f_7f = {
3697 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), 3820 I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
3698}; 3821};
@@ -3901,10 +4024,11 @@ static const struct opcode twobyte_table[256] = {
3901 N, I(ImplicitOps | EmulateOnUD, em_syscall), 4024 N, I(ImplicitOps | EmulateOnUD, em_syscall),
3902 II(ImplicitOps | Priv, em_clts, clts), N, 4025 II(ImplicitOps | Priv, em_clts, clts), N,
3903 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, 4026 DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
3904 N, D(ImplicitOps | ModRM), N, N, 4027 N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
3905 /* 0x10 - 0x1F */ 4028 /* 0x10 - 0x1F */
3906 N, N, N, N, N, N, N, N, 4029 N, N, N, N, N, N, N, N,
3907 D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), 4030 D(ImplicitOps | ModRM | SrcMem | NoAccess),
4031 N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
3908 /* 0x20 - 0x2F */ 4032 /* 0x20 - 0x2F */
3909 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), 4033 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read),
3910 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), 4034 DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read),
@@ -3956,7 +4080,7 @@ static const struct opcode twobyte_table[256] = {
3956 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), 4080 F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
3957 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), 4081 F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
3958 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), 4082 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
3959 D(ModRM), F(DstReg | SrcMem | ModRM, em_imul), 4083 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
3960 /* 0xB0 - 0xB7 */ 4084 /* 0xB0 - 0xB7 */
3961 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), 4085 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg),
3962 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), 4086 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
@@ -4473,10 +4597,10 @@ done_prefixes:
4473 /* Decode and fetch the destination operand: register or memory. */ 4597 /* Decode and fetch the destination operand: register or memory. */
4474 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); 4598 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
4475 4599
4476done:
4477 if (ctxt->rip_relative) 4600 if (ctxt->rip_relative)
4478 ctxt->memopp->addr.mem.ea += ctxt->_eip; 4601 ctxt->memopp->addr.mem.ea += ctxt->_eip;
4479 4602
4603done:
4480 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; 4604 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
4481} 4605}
4482 4606
@@ -4726,7 +4850,7 @@ special_insn:
4726 break; 4850 break;
4727 case 0x70 ... 0x7f: /* jcc (short) */ 4851 case 0x70 ... 0x7f: /* jcc (short) */
4728 if (test_cc(ctxt->b, ctxt->eflags)) 4852 if (test_cc(ctxt->b, ctxt->eflags))
4729 jmp_rel(ctxt, ctxt->src.val); 4853 rc = jmp_rel(ctxt, ctxt->src.val);
4730 break; 4854 break;
4731 case 0x8d: /* lea r16/r32, m */ 4855 case 0x8d: /* lea r16/r32, m */
4732 ctxt->dst.val = ctxt->src.addr.mem.ea; 4856 ctxt->dst.val = ctxt->src.addr.mem.ea;
@@ -4756,7 +4880,7 @@ special_insn:
4756 break; 4880 break;
4757 case 0xe9: /* jmp rel */ 4881 case 0xe9: /* jmp rel */
4758 case 0xeb: /* jmp rel short */ 4882 case 0xeb: /* jmp rel short */
4759 jmp_rel(ctxt, ctxt->src.val); 4883 rc = jmp_rel(ctxt, ctxt->src.val);
4760 ctxt->dst.type = OP_NONE; /* Disable writeback. */ 4884 ctxt->dst.type = OP_NONE; /* Disable writeback. */
4761 break; 4885 break;
4762 case 0xf4: /* hlt */ 4886 case 0xf4: /* hlt */
@@ -4881,13 +5005,11 @@ twobyte_insn:
4881 break; 5005 break;
4882 case 0x80 ... 0x8f: /* jnz rel, etc*/ 5006 case 0x80 ... 0x8f: /* jnz rel, etc*/
4883 if (test_cc(ctxt->b, ctxt->eflags)) 5007 if (test_cc(ctxt->b, ctxt->eflags))
4884 jmp_rel(ctxt, ctxt->src.val); 5008 rc = jmp_rel(ctxt, ctxt->src.val);
4885 break; 5009 break;
4886 case 0x90 ... 0x9f: /* setcc r/m8 */ 5010 case 0x90 ... 0x9f: /* setcc r/m8 */
4887 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); 5011 ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
4888 break; 5012 break;
4889 case 0xae: /* clflush */
4890 break;
4891 case 0xb6 ... 0xb7: /* movzx */ 5013 case 0xb6 ... 0xb7: /* movzx */
4892 ctxt->dst.bytes = ctxt->op_bytes; 5014 ctxt->dst.bytes = ctxt->op_bytes;
4893 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val 5015 ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 518d86471b76..298781d4cfb4 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
262 return; 262 return;
263 263
264 timer = &pit->pit_state.timer; 264 timer = &pit->pit_state.timer;
265 mutex_lock(&pit->pit_state.lock);
265 if (hrtimer_cancel(timer)) 266 if (hrtimer_cancel(timer))
266 hrtimer_start_expires(timer, HRTIMER_MODE_ABS); 267 hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
268 mutex_unlock(&pit->pit_state.lock);
267} 269}
268 270
269static void destroy_pit_timer(struct kvm_pit *pit) 271static void destroy_pit_timer(struct kvm_pit *pit)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 806d58e3c320..fd49c867b25a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -298,7 +298,7 @@ retry_walk:
298 } 298 }
299#endif 299#endif
300 walker->max_level = walker->level; 300 walker->max_level = walker->level;
301 ASSERT(!is_long_mode(vcpu) && is_pae(vcpu)); 301 ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
302 302
303 accessed_dirty = PT_GUEST_ACCESSED_MASK; 303 accessed_dirty = PT_GUEST_ACCESSED_MASK;
304 pt_access = pte_access = ACC_ALL; 304 pt_access = pte_access = ACC_ALL;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 65510f624dfe..7527cefc5a43 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3251,7 +3251,7 @@ static int wrmsr_interception(struct vcpu_svm *svm)
3251 msr.host_initiated = false; 3251 msr.host_initiated = false;
3252 3252
3253 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 3253 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3254 if (svm_set_msr(&svm->vcpu, &msr)) { 3254 if (kvm_set_msr(&svm->vcpu, &msr)) {
3255 trace_kvm_msr_write_ex(ecx, data); 3255 trace_kvm_msr_write_ex(ecx, data);
3256 kvm_inject_gp(&svm->vcpu, 0); 3256 kvm_inject_gp(&svm->vcpu, 0);
3257 } else { 3257 } else {
@@ -3551,9 +3551,9 @@ static int handle_exit(struct kvm_vcpu *vcpu)
3551 3551
3552 if (exit_code >= ARRAY_SIZE(svm_exit_handlers) 3552 if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
3553 || !svm_exit_handlers[exit_code]) { 3553 || !svm_exit_handlers[exit_code]) {
3554 kvm_run->exit_reason = KVM_EXIT_UNKNOWN; 3554 WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
3555 kvm_run->hw.hardware_exit_reason = exit_code; 3555 kvm_queue_exception(vcpu, UD_VECTOR);
3556 return 0; 3556 return 1;
3557 } 3557 }
3558 3558
3559 return svm_exit_handlers[exit_code](svm); 3559 return svm_exit_handlers[exit_code](svm);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0acac81f198b..a8b76c4c95e2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2659,12 +2659,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2659 default: 2659 default:
2660 msr = find_msr_entry(vmx, msr_index); 2660 msr = find_msr_entry(vmx, msr_index);
2661 if (msr) { 2661 if (msr) {
2662 u64 old_msr_data = msr->data;
2662 msr->data = data; 2663 msr->data = data;
2663 if (msr - vmx->guest_msrs < vmx->save_nmsrs) { 2664 if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
2664 preempt_disable(); 2665 preempt_disable();
2665 kvm_set_shared_msr(msr->index, msr->data, 2666 ret = kvm_set_shared_msr(msr->index, msr->data,
2666 msr->mask); 2667 msr->mask);
2667 preempt_enable(); 2668 preempt_enable();
2669 if (ret)
2670 msr->data = old_msr_data;
2668 } 2671 }
2669 break; 2672 break;
2670 } 2673 }
@@ -5291,7 +5294,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
5291 msr.data = data; 5294 msr.data = data;
5292 msr.index = ecx; 5295 msr.index = ecx;
5293 msr.host_initiated = false; 5296 msr.host_initiated = false;
5294 if (vmx_set_msr(vcpu, &msr) != 0) { 5297 if (kvm_set_msr(vcpu, &msr) != 0) {
5295 trace_kvm_msr_write_ex(ecx, data); 5298 trace_kvm_msr_write_ex(ecx, data);
5296 kvm_inject_gp(vcpu, 0); 5299 kvm_inject_gp(vcpu, 0);
5297 return 1; 5300 return 1;
@@ -6743,6 +6746,12 @@ static int handle_invept(struct kvm_vcpu *vcpu)
6743 return 1; 6746 return 1;
6744} 6747}
6745 6748
6749static int handle_invvpid(struct kvm_vcpu *vcpu)
6750{
6751 kvm_queue_exception(vcpu, UD_VECTOR);
6752 return 1;
6753}
6754
6746/* 6755/*
6747 * The exit handlers return 1 if the exit was handled fully and guest execution 6756 * The exit handlers return 1 if the exit was handled fully and guest execution
6748 * may resume. Otherwise they set the kvm_run parameter to indicate what needs 6757 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -6788,6 +6797,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
6788 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, 6797 [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait,
6789 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, 6798 [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor,
6790 [EXIT_REASON_INVEPT] = handle_invept, 6799 [EXIT_REASON_INVEPT] = handle_invept,
6800 [EXIT_REASON_INVVPID] = handle_invvpid,
6791}; 6801};
6792 6802
6793static const int kvm_vmx_max_exit_handlers = 6803static const int kvm_vmx_max_exit_handlers =
@@ -7023,7 +7033,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
7023 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: 7033 case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
7024 case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: 7034 case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
7025 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: 7035 case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
7026 case EXIT_REASON_INVEPT: 7036 case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
7027 /* 7037 /*
7028 * VMX instructions trap unconditionally. This allows L1 to 7038 * VMX instructions trap unconditionally. This allows L1 to
7029 * emulate them for its L2 guest, i.e., allows 3-level nesting! 7039 * emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -7164,10 +7174,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
7164 && kvm_vmx_exit_handlers[exit_reason]) 7174 && kvm_vmx_exit_handlers[exit_reason])
7165 return kvm_vmx_exit_handlers[exit_reason](vcpu); 7175 return kvm_vmx_exit_handlers[exit_reason](vcpu);
7166 else { 7176 else {
7167 vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; 7177 WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
7168 vcpu->run->hw.hardware_exit_reason = exit_reason; 7178 kvm_queue_exception(vcpu, UD_VECTOR);
7179 return 1;
7169 } 7180 }
7170 return 0;
7171} 7181}
7172 7182
7173static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 7183static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 34c8f94331f8..0033df32a745 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -229,20 +229,25 @@ static void kvm_shared_msr_cpu_online(void)
229 shared_msr_update(i, shared_msrs_global.msrs[i]); 229 shared_msr_update(i, shared_msrs_global.msrs[i]);
230} 230}
231 231
232void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) 232int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
233{ 233{
234 unsigned int cpu = smp_processor_id(); 234 unsigned int cpu = smp_processor_id();
235 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); 235 struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
236 int err;
236 237
237 if (((value ^ smsr->values[slot].curr) & mask) == 0) 238 if (((value ^ smsr->values[slot].curr) & mask) == 0)
238 return; 239 return 0;
239 smsr->values[slot].curr = value; 240 smsr->values[slot].curr = value;
240 wrmsrl(shared_msrs_global.msrs[slot], value); 241 err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
242 if (err)
243 return 1;
244
241 if (!smsr->registered) { 245 if (!smsr->registered) {
242 smsr->urn.on_user_return = kvm_on_user_return; 246 smsr->urn.on_user_return = kvm_on_user_return;
243 user_return_notifier_register(&smsr->urn); 247 user_return_notifier_register(&smsr->urn);
244 smsr->registered = true; 248 smsr->registered = true;
245 } 249 }
250 return 0;
246} 251}
247EXPORT_SYMBOL_GPL(kvm_set_shared_msr); 252EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
248 253
@@ -987,7 +992,6 @@ void kvm_enable_efer_bits(u64 mask)
987} 992}
988EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); 993EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
989 994
990
991/* 995/*
992 * Writes msr value into into the appropriate "register". 996 * Writes msr value into into the appropriate "register".
993 * Returns 0 on success, non-0 otherwise. 997 * Returns 0 on success, non-0 otherwise.
@@ -995,8 +999,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
995 */ 999 */
996int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) 1000int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
997{ 1001{
1002 switch (msr->index) {
1003 case MSR_FS_BASE:
1004 case MSR_GS_BASE:
1005 case MSR_KERNEL_GS_BASE:
1006 case MSR_CSTAR:
1007 case MSR_LSTAR:
1008 if (is_noncanonical_address(msr->data))
1009 return 1;
1010 break;
1011 case MSR_IA32_SYSENTER_EIP:
1012 case MSR_IA32_SYSENTER_ESP:
1013 /*
1014 * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
1015 * non-canonical address is written on Intel but not on
1016 * AMD (which ignores the top 32-bits, because it does
1017 * not implement 64-bit SYSENTER).
1018 *
1019 * 64-bit code should hence be able to write a non-canonical
1020 * value on AMD. Making the address canonical ensures that
1021 * vmentry does not fail on Intel after writing a non-canonical
1022 * value, and that something deterministic happens if the guest
1023 * invokes 64-bit SYSENTER.
1024 */
1025 msr->data = get_canonical(msr->data);
1026 }
998 return kvm_x86_ops->set_msr(vcpu, msr); 1027 return kvm_x86_ops->set_msr(vcpu, msr);
999} 1028}
1029EXPORT_SYMBOL_GPL(kvm_set_msr);
1000 1030
1001/* 1031/*
1002 * Adapt set_msr() to msr_io()'s calling convention 1032 * Adapt set_msr() to msr_io()'s calling convention
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 28be31f49250..ea53b04993f2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1080,6 +1080,7 @@ void kvm_device_get(struct kvm_device *dev);
1080void kvm_device_put(struct kvm_device *dev); 1080void kvm_device_put(struct kvm_device *dev);
1081struct kvm_device *kvm_device_from_filp(struct file *filp); 1081struct kvm_device *kvm_device_from_filp(struct file *filp);
1082int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); 1082int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
1083void kvm_unregister_device_ops(u32 type);
1083 1084
1084extern struct kvm_device_ops kvm_mpic_ops; 1085extern struct kvm_device_ops kvm_mpic_ops;
1085extern struct kvm_device_ops kvm_xics_ops; 1086extern struct kvm_device_ops kvm_xics_ops;
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index e51d9f9b995f..c1e6ae989a43 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -43,13 +43,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
43 gfn_t base_gfn, unsigned long npages); 43 gfn_t base_gfn, unsigned long npages);
44 44
45static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, 45static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
46 unsigned long size) 46 unsigned long npages)
47{ 47{
48 gfn_t end_gfn; 48 gfn_t end_gfn;
49 pfn_t pfn; 49 pfn_t pfn;
50 50
51 pfn = gfn_to_pfn_memslot(slot, gfn); 51 pfn = gfn_to_pfn_memslot(slot, gfn);
52 end_gfn = gfn + (size >> PAGE_SHIFT); 52 end_gfn = gfn + npages;
53 gfn += 1; 53 gfn += 1;
54 54
55 if (is_error_noslot_pfn(pfn)) 55 if (is_error_noslot_pfn(pfn))
@@ -119,7 +119,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
119 * Pin all pages we are about to map in memory. This is 119 * Pin all pages we are about to map in memory. This is
120 * important because we unmap and unpin in 4kb steps later. 120 * important because we unmap and unpin in 4kb steps later.
121 */ 121 */
122 pfn = kvm_pin_pages(slot, gfn, page_size); 122 pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
123 if (is_error_noslot_pfn(pfn)) { 123 if (is_error_noslot_pfn(pfn)) {
124 gfn += 1; 124 gfn += 1;
125 continue; 125 continue;
@@ -131,7 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
131 if (r) { 131 if (r) {
132 printk(KERN_ERR "kvm_iommu_map_address:" 132 printk(KERN_ERR "kvm_iommu_map_address:"
133 "iommu failed to map pfn=%llx\n", pfn); 133 "iommu failed to map pfn=%llx\n", pfn);
134 kvm_unpin_pages(kvm, pfn, page_size); 134 kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
135 goto unmap_pages; 135 goto unmap_pages;
136 } 136 }
137 137
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 384eaa7b02fa..25ffac9e947d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2354,6 +2354,12 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
2354 return 0; 2354 return 0;
2355} 2355}
2356 2356
2357void kvm_unregister_device_ops(u32 type)
2358{
2359 if (kvm_device_ops_table[type] != NULL)
2360 kvm_device_ops_table[type] = NULL;
2361}
2362
2357static int kvm_ioctl_create_device(struct kvm *kvm, 2363static int kvm_ioctl_create_device(struct kvm *kvm,
2358 struct kvm_create_device *cd) 2364 struct kvm_create_device *cd)
2359{ 2365{
@@ -3328,5 +3334,6 @@ void kvm_exit(void)
3328 kvm_arch_exit(); 3334 kvm_arch_exit();
3329 kvm_irqfd_exit(); 3335 kvm_irqfd_exit();
3330 free_cpumask_var(cpus_hardware_enabled); 3336 free_cpumask_var(cpus_hardware_enabled);
3337 kvm_vfio_ops_exit();
3331} 3338}
3332EXPORT_SYMBOL_GPL(kvm_exit); 3339EXPORT_SYMBOL_GPL(kvm_exit);
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 281e7cf2b8e5..620e37f741b8 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -283,3 +283,8 @@ int kvm_vfio_ops_init(void)
283{ 283{
284 return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); 284 return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
285} 285}
286
287void kvm_vfio_ops_exit(void)
288{
289 kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO);
290}
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h
index 92eac75d6b62..ab88c7dc0514 100644
--- a/virt/kvm/vfio.h
+++ b/virt/kvm/vfio.h
@@ -3,11 +3,15 @@
3 3
4#ifdef CONFIG_KVM_VFIO 4#ifdef CONFIG_KVM_VFIO
5int kvm_vfio_ops_init(void); 5int kvm_vfio_ops_init(void);
6void kvm_vfio_ops_exit(void);
6#else 7#else
7static inline int kvm_vfio_ops_init(void) 8static inline int kvm_vfio_ops_init(void)
8{ 9{
9 return 0; 10 return 0;
10} 11}
12static inline void kvm_vfio_ops_exit(void)
13{
14}
11#endif 15#endif
12 16
13#endif 17#endif