diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-24 15:42:55 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-24 15:42:55 -0400 |
commit | 96971e9aa9578322648b2de593fd4863f3d9fc39 (patch) | |
tree | f2320a488e63583664692e94f37edc9f07174d8a | |
parent | 20ca57cde5557d8623af8cbf81a17733bbbce3a6 (diff) | |
parent | 571ee1b6859869a09ed718d390aac2b9414646a2 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
"This is a pretty large update. I think it is roughly as big as what I
usually had for the _whole_ rc period.
There are a few bad bugs where the guest can OOPS or crash the host.
We have also started looking at attack models for nested
virtualization; bugs that usually result in the guest ring 0 crashing
itself become more worrisome if you have nested virtualization,
because the nested guest might bring down the non-nested guest as
well. For current uses of nested virtualization these do not really
have a security impact, but you never know and bugs are bugs
nevertheless.
A lot of these bugs are in 3.17 too, resulting in a large number of
stable@ Ccs. I checked that all the patches apply there with no
conflicts"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
kvm: vfio: fix unregister kvm_device_ops of vfio
KVM: x86: Wrong assertion on paging_tmpl.h
kvm: fix excessive pages un-pinning in kvm_iommu_map error path.
KVM: x86: PREFETCH and HINT_NOP should have SrcMem flag
KVM: x86: Emulator does not decode clflush well
KVM: emulate: avoid accessing NULL ctxt->memopp
KVM: x86: Decoding guest instructions which cross page boundary may fail
kvm: x86: don't kill guest on unknown exit reason
kvm: vmx: handle invvpid vm exit gracefully
KVM: x86: Handle errors when RIP is set during far jumps
KVM: x86: Emulator fixes for eip canonical checks on near branches
KVM: x86: Fix wrong masking on relative jump/call
KVM: x86: Improve thread safety in pit
KVM: x86: Prevent host from panicking on shared MSR writes.
KVM: x86: Check non-canonical addresses upon WRMSR
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 16 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/vmx.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 250 | ||||
-rw-r--r-- | arch/x86/kvm/i8254.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 24 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 38 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 1 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 8 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 7 | ||||
-rw-r--r-- | virt/kvm/vfio.c | 5 | ||||
-rw-r--r-- | virt/kvm/vfio.h | 4 |
13 files changed, 282 insertions, 85 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7d603a71ab3a..6ed0c30d6a0c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -989,6 +989,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) | |||
989 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); | 989 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); |
990 | } | 990 | } |
991 | 991 | ||
992 | static inline u64 get_canonical(u64 la) | ||
993 | { | ||
994 | return ((int64_t)la << 16) >> 16; | ||
995 | } | ||
996 | |||
997 | static inline bool is_noncanonical_address(u64 la) | ||
998 | { | ||
999 | #ifdef CONFIG_X86_64 | ||
1000 | return get_canonical(la) != la; | ||
1001 | #else | ||
1002 | return false; | ||
1003 | #endif | ||
1004 | } | ||
1005 | |||
992 | #define TSS_IOPB_BASE_OFFSET 0x66 | 1006 | #define TSS_IOPB_BASE_OFFSET 0x66 |
993 | #define TSS_BASE_SIZE 0x68 | 1007 | #define TSS_BASE_SIZE 0x68 |
994 | #define TSS_IOPB_SIZE (65536 / 8) | 1008 | #define TSS_IOPB_SIZE (65536 / 8) |
@@ -1050,7 +1064,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, | |||
1050 | unsigned long address); | 1064 | unsigned long address); |
1051 | 1065 | ||
1052 | void kvm_define_shared_msr(unsigned index, u32 msr); | 1066 | void kvm_define_shared_msr(unsigned index, u32 msr); |
1053 | void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); | 1067 | int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); |
1054 | 1068 | ||
1055 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); | 1069 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); |
1056 | 1070 | ||
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 0e79420376eb..990a2fe1588d 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h | |||
@@ -67,6 +67,7 @@ | |||
67 | #define EXIT_REASON_EPT_MISCONFIG 49 | 67 | #define EXIT_REASON_EPT_MISCONFIG 49 |
68 | #define EXIT_REASON_INVEPT 50 | 68 | #define EXIT_REASON_INVEPT 50 |
69 | #define EXIT_REASON_PREEMPTION_TIMER 52 | 69 | #define EXIT_REASON_PREEMPTION_TIMER 52 |
70 | #define EXIT_REASON_INVVPID 53 | ||
70 | #define EXIT_REASON_WBINVD 54 | 71 | #define EXIT_REASON_WBINVD 54 |
71 | #define EXIT_REASON_XSETBV 55 | 72 | #define EXIT_REASON_XSETBV 55 |
72 | #define EXIT_REASON_APIC_WRITE 56 | 73 | #define EXIT_REASON_APIC_WRITE 56 |
@@ -114,6 +115,7 @@ | |||
114 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ | 115 | { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ |
115 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ | 116 | { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ |
116 | { EXIT_REASON_INVD, "INVD" }, \ | 117 | { EXIT_REASON_INVD, "INVD" }, \ |
118 | { EXIT_REASON_INVVPID, "INVVPID" }, \ | ||
117 | { EXIT_REASON_INVPCID, "INVPCID" } | 119 | { EXIT_REASON_INVPCID, "INVPCID" } |
118 | 120 | ||
119 | #endif /* _UAPIVMX_H */ | 121 | #endif /* _UAPIVMX_H */ |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a46207a05835..749f9fa38254 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -504,11 +504,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) | |||
504 | masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); | 504 | masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); |
505 | } | 505 | } |
506 | 506 | ||
507 | static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) | ||
508 | { | ||
509 | register_address_increment(ctxt, &ctxt->_eip, rel); | ||
510 | } | ||
511 | |||
512 | static u32 desc_limit_scaled(struct desc_struct *desc) | 507 | static u32 desc_limit_scaled(struct desc_struct *desc) |
513 | { | 508 | { |
514 | u32 limit = get_desc_limit(desc); | 509 | u32 limit = get_desc_limit(desc); |
@@ -569,6 +564,38 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) | |||
569 | return emulate_exception(ctxt, NM_VECTOR, 0, false); | 564 | return emulate_exception(ctxt, NM_VECTOR, 0, false); |
570 | } | 565 | } |
571 | 566 | ||
567 | static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, | ||
568 | int cs_l) | ||
569 | { | ||
570 | switch (ctxt->op_bytes) { | ||
571 | case 2: | ||
572 | ctxt->_eip = (u16)dst; | ||
573 | break; | ||
574 | case 4: | ||
575 | ctxt->_eip = (u32)dst; | ||
576 | break; | ||
577 | case 8: | ||
578 | if ((cs_l && is_noncanonical_address(dst)) || | ||
579 | (!cs_l && (dst & ~(u32)-1))) | ||
580 | return emulate_gp(ctxt, 0); | ||
581 | ctxt->_eip = dst; | ||
582 | break; | ||
583 | default: | ||
584 | WARN(1, "unsupported eip assignment size\n"); | ||
585 | } | ||
586 | return X86EMUL_CONTINUE; | ||
587 | } | ||
588 | |||
589 | static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) | ||
590 | { | ||
591 | return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64); | ||
592 | } | ||
593 | |||
594 | static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) | ||
595 | { | ||
596 | return assign_eip_near(ctxt, ctxt->_eip + rel); | ||
597 | } | ||
598 | |||
572 | static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) | 599 | static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) |
573 | { | 600 | { |
574 | u16 selector; | 601 | u16 selector; |
@@ -751,8 +778,10 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) | |||
751 | static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, | 778 | static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, |
752 | unsigned size) | 779 | unsigned size) |
753 | { | 780 | { |
754 | if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size)) | 781 | unsigned done_size = ctxt->fetch.end - ctxt->fetch.ptr; |
755 | return __do_insn_fetch_bytes(ctxt, size); | 782 | |
783 | if (unlikely(done_size < size)) | ||
784 | return __do_insn_fetch_bytes(ctxt, size - done_size); | ||
756 | else | 785 | else |
757 | return X86EMUL_CONTINUE; | 786 | return X86EMUL_CONTINUE; |
758 | } | 787 | } |
@@ -1416,7 +1445,9 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1416 | 1445 | ||
1417 | /* Does not support long mode */ | 1446 | /* Does not support long mode */ |
1418 | static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 1447 | static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
1419 | u16 selector, int seg, u8 cpl, bool in_task_switch) | 1448 | u16 selector, int seg, u8 cpl, |
1449 | bool in_task_switch, | ||
1450 | struct desc_struct *desc) | ||
1420 | { | 1451 | { |
1421 | struct desc_struct seg_desc, old_desc; | 1452 | struct desc_struct seg_desc, old_desc; |
1422 | u8 dpl, rpl; | 1453 | u8 dpl, rpl; |
@@ -1557,6 +1588,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1557 | } | 1588 | } |
1558 | load: | 1589 | load: |
1559 | ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); | 1590 | ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); |
1591 | if (desc) | ||
1592 | *desc = seg_desc; | ||
1560 | return X86EMUL_CONTINUE; | 1593 | return X86EMUL_CONTINUE; |
1561 | exception: | 1594 | exception: |
1562 | return emulate_exception(ctxt, err_vec, err_code, true); | 1595 | return emulate_exception(ctxt, err_vec, err_code, true); |
@@ -1566,7 +1599,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1566 | u16 selector, int seg) | 1599 | u16 selector, int seg) |
1567 | { | 1600 | { |
1568 | u8 cpl = ctxt->ops->cpl(ctxt); | 1601 | u8 cpl = ctxt->ops->cpl(ctxt); |
1569 | return __load_segment_descriptor(ctxt, selector, seg, cpl, false); | 1602 | return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL); |
1570 | } | 1603 | } |
1571 | 1604 | ||
1572 | static void write_register_operand(struct operand *op) | 1605 | static void write_register_operand(struct operand *op) |
@@ -1960,17 +1993,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt) | |||
1960 | static int em_jmp_far(struct x86_emulate_ctxt *ctxt) | 1993 | static int em_jmp_far(struct x86_emulate_ctxt *ctxt) |
1961 | { | 1994 | { |
1962 | int rc; | 1995 | int rc; |
1963 | unsigned short sel; | 1996 | unsigned short sel, old_sel; |
1997 | struct desc_struct old_desc, new_desc; | ||
1998 | const struct x86_emulate_ops *ops = ctxt->ops; | ||
1999 | u8 cpl = ctxt->ops->cpl(ctxt); | ||
2000 | |||
2001 | /* Assignment of RIP may only fail in 64-bit mode */ | ||
2002 | if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
2003 | ops->get_segment(ctxt, &old_sel, &old_desc, NULL, | ||
2004 | VCPU_SREG_CS); | ||
1964 | 2005 | ||
1965 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); | 2006 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); |
1966 | 2007 | ||
1967 | rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); | 2008 | rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, |
2009 | &new_desc); | ||
1968 | if (rc != X86EMUL_CONTINUE) | 2010 | if (rc != X86EMUL_CONTINUE) |
1969 | return rc; | 2011 | return rc; |
1970 | 2012 | ||
1971 | ctxt->_eip = 0; | 2013 | rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); |
1972 | memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); | 2014 | if (rc != X86EMUL_CONTINUE) { |
1973 | return X86EMUL_CONTINUE; | 2015 | WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); |
2016 | /* assigning eip failed; restore the old cs */ | ||
2017 | ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); | ||
2018 | return rc; | ||
2019 | } | ||
2020 | return rc; | ||
1974 | } | 2021 | } |
1975 | 2022 | ||
1976 | static int em_grp45(struct x86_emulate_ctxt *ctxt) | 2023 | static int em_grp45(struct x86_emulate_ctxt *ctxt) |
@@ -1981,13 +2028,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) | |||
1981 | case 2: /* call near abs */ { | 2028 | case 2: /* call near abs */ { |
1982 | long int old_eip; | 2029 | long int old_eip; |
1983 | old_eip = ctxt->_eip; | 2030 | old_eip = ctxt->_eip; |
1984 | ctxt->_eip = ctxt->src.val; | 2031 | rc = assign_eip_near(ctxt, ctxt->src.val); |
2032 | if (rc != X86EMUL_CONTINUE) | ||
2033 | break; | ||
1985 | ctxt->src.val = old_eip; | 2034 | ctxt->src.val = old_eip; |
1986 | rc = em_push(ctxt); | 2035 | rc = em_push(ctxt); |
1987 | break; | 2036 | break; |
1988 | } | 2037 | } |
1989 | case 4: /* jmp abs */ | 2038 | case 4: /* jmp abs */ |
1990 | ctxt->_eip = ctxt->src.val; | 2039 | rc = assign_eip_near(ctxt, ctxt->src.val); |
1991 | break; | 2040 | break; |
1992 | case 5: /* jmp far */ | 2041 | case 5: /* jmp far */ |
1993 | rc = em_jmp_far(ctxt); | 2042 | rc = em_jmp_far(ctxt); |
@@ -2022,30 +2071,47 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) | |||
2022 | 2071 | ||
2023 | static int em_ret(struct x86_emulate_ctxt *ctxt) | 2072 | static int em_ret(struct x86_emulate_ctxt *ctxt) |
2024 | { | 2073 | { |
2025 | ctxt->dst.type = OP_REG; | 2074 | int rc; |
2026 | ctxt->dst.addr.reg = &ctxt->_eip; | 2075 | unsigned long eip; |
2027 | ctxt->dst.bytes = ctxt->op_bytes; | 2076 | |
2028 | return em_pop(ctxt); | 2077 | rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); |
2078 | if (rc != X86EMUL_CONTINUE) | ||
2079 | return rc; | ||
2080 | |||
2081 | return assign_eip_near(ctxt, eip); | ||
2029 | } | 2082 | } |
2030 | 2083 | ||
2031 | static int em_ret_far(struct x86_emulate_ctxt *ctxt) | 2084 | static int em_ret_far(struct x86_emulate_ctxt *ctxt) |
2032 | { | 2085 | { |
2033 | int rc; | 2086 | int rc; |
2034 | unsigned long cs; | 2087 | unsigned long eip, cs; |
2088 | u16 old_cs; | ||
2035 | int cpl = ctxt->ops->cpl(ctxt); | 2089 | int cpl = ctxt->ops->cpl(ctxt); |
2090 | struct desc_struct old_desc, new_desc; | ||
2091 | const struct x86_emulate_ops *ops = ctxt->ops; | ||
2036 | 2092 | ||
2037 | rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); | 2093 | if (ctxt->mode == X86EMUL_MODE_PROT64) |
2094 | ops->get_segment(ctxt, &old_cs, &old_desc, NULL, | ||
2095 | VCPU_SREG_CS); | ||
2096 | |||
2097 | rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); | ||
2038 | if (rc != X86EMUL_CONTINUE) | 2098 | if (rc != X86EMUL_CONTINUE) |
2039 | return rc; | 2099 | return rc; |
2040 | if (ctxt->op_bytes == 4) | ||
2041 | ctxt->_eip = (u32)ctxt->_eip; | ||
2042 | rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); | 2100 | rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); |
2043 | if (rc != X86EMUL_CONTINUE) | 2101 | if (rc != X86EMUL_CONTINUE) |
2044 | return rc; | 2102 | return rc; |
2045 | /* Outer-privilege level return is not implemented */ | 2103 | /* Outer-privilege level return is not implemented */ |
2046 | if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) | 2104 | if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) |
2047 | return X86EMUL_UNHANDLEABLE; | 2105 | return X86EMUL_UNHANDLEABLE; |
2048 | rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); | 2106 | rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false, |
2107 | &new_desc); | ||
2108 | if (rc != X86EMUL_CONTINUE) | ||
2109 | return rc; | ||
2110 | rc = assign_eip_far(ctxt, eip, new_desc.l); | ||
2111 | if (rc != X86EMUL_CONTINUE) { | ||
2112 | WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); | ||
2113 | ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); | ||
2114 | } | ||
2049 | return rc; | 2115 | return rc; |
2050 | } | 2116 | } |
2051 | 2117 | ||
@@ -2306,7 +2372,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2306 | { | 2372 | { |
2307 | const struct x86_emulate_ops *ops = ctxt->ops; | 2373 | const struct x86_emulate_ops *ops = ctxt->ops; |
2308 | struct desc_struct cs, ss; | 2374 | struct desc_struct cs, ss; |
2309 | u64 msr_data; | 2375 | u64 msr_data, rcx, rdx; |
2310 | int usermode; | 2376 | int usermode; |
2311 | u16 cs_sel = 0, ss_sel = 0; | 2377 | u16 cs_sel = 0, ss_sel = 0; |
2312 | 2378 | ||
@@ -2322,6 +2388,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2322 | else | 2388 | else |
2323 | usermode = X86EMUL_MODE_PROT32; | 2389 | usermode = X86EMUL_MODE_PROT32; |
2324 | 2390 | ||
2391 | rcx = reg_read(ctxt, VCPU_REGS_RCX); | ||
2392 | rdx = reg_read(ctxt, VCPU_REGS_RDX); | ||
2393 | |||
2325 | cs.dpl = 3; | 2394 | cs.dpl = 3; |
2326 | ss.dpl = 3; | 2395 | ss.dpl = 3; |
2327 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); | 2396 | ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); |
@@ -2339,6 +2408,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2339 | ss_sel = cs_sel + 8; | 2408 | ss_sel = cs_sel + 8; |
2340 | cs.d = 0; | 2409 | cs.d = 0; |
2341 | cs.l = 1; | 2410 | cs.l = 1; |
2411 | if (is_noncanonical_address(rcx) || | ||
2412 | is_noncanonical_address(rdx)) | ||
2413 | return emulate_gp(ctxt, 0); | ||
2342 | break; | 2414 | break; |
2343 | } | 2415 | } |
2344 | cs_sel |= SELECTOR_RPL_MASK; | 2416 | cs_sel |= SELECTOR_RPL_MASK; |
@@ -2347,8 +2419,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) | |||
2347 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); | 2419 | ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); |
2348 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); | 2420 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); |
2349 | 2421 | ||
2350 | ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); | 2422 | ctxt->_eip = rdx; |
2351 | *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); | 2423 | *reg_write(ctxt, VCPU_REGS_RSP) = rcx; |
2352 | 2424 | ||
2353 | return X86EMUL_CONTINUE; | 2425 | return X86EMUL_CONTINUE; |
2354 | } | 2426 | } |
@@ -2466,19 +2538,24 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, | |||
2466 | * Now load segment descriptors. If fault happens at this stage | 2538 | * Now load segment descriptors. If fault happens at this stage |
2467 | * it is handled in a context of new task | 2539 | * it is handled in a context of new task |
2468 | */ | 2540 | */ |
2469 | ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true); | 2541 | ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, |
2542 | true, NULL); | ||
2470 | if (ret != X86EMUL_CONTINUE) | 2543 | if (ret != X86EMUL_CONTINUE) |
2471 | return ret; | 2544 | return ret; |
2472 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); | 2545 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, |
2546 | true, NULL); | ||
2473 | if (ret != X86EMUL_CONTINUE) | 2547 | if (ret != X86EMUL_CONTINUE) |
2474 | return ret; | 2548 | return ret; |
2475 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); | 2549 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, |
2550 | true, NULL); | ||
2476 | if (ret != X86EMUL_CONTINUE) | 2551 | if (ret != X86EMUL_CONTINUE) |
2477 | return ret; | 2552 | return ret; |
2478 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); | 2553 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, |
2554 | true, NULL); | ||
2479 | if (ret != X86EMUL_CONTINUE) | 2555 | if (ret != X86EMUL_CONTINUE) |
2480 | return ret; | 2556 | return ret; |
2481 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); | 2557 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, |
2558 | true, NULL); | ||
2482 | if (ret != X86EMUL_CONTINUE) | 2559 | if (ret != X86EMUL_CONTINUE) |
2483 | return ret; | 2560 | return ret; |
2484 | 2561 | ||
@@ -2603,25 +2680,32 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, | |||
2603 | * Now load segment descriptors. If fault happenes at this stage | 2680 | * Now load segment descriptors. If fault happenes at this stage |
2604 | * it is handled in a context of new task | 2681 | * it is handled in a context of new task |
2605 | */ | 2682 | */ |
2606 | ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true); | 2683 | ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, |
2684 | cpl, true, NULL); | ||
2607 | if (ret != X86EMUL_CONTINUE) | 2685 | if (ret != X86EMUL_CONTINUE) |
2608 | return ret; | 2686 | return ret; |
2609 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); | 2687 | ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, |
2688 | true, NULL); | ||
2610 | if (ret != X86EMUL_CONTINUE) | 2689 | if (ret != X86EMUL_CONTINUE) |
2611 | return ret; | 2690 | return ret; |
2612 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); | 2691 | ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, |
2692 | true, NULL); | ||
2613 | if (ret != X86EMUL_CONTINUE) | 2693 | if (ret != X86EMUL_CONTINUE) |
2614 | return ret; | 2694 | return ret; |
2615 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); | 2695 | ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, |
2696 | true, NULL); | ||
2616 | if (ret != X86EMUL_CONTINUE) | 2697 | if (ret != X86EMUL_CONTINUE) |
2617 | return ret; | 2698 | return ret; |
2618 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); | 2699 | ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, |
2700 | true, NULL); | ||
2619 | if (ret != X86EMUL_CONTINUE) | 2701 | if (ret != X86EMUL_CONTINUE) |
2620 | return ret; | 2702 | return ret; |
2621 | ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true); | 2703 | ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, |
2704 | true, NULL); | ||
2622 | if (ret != X86EMUL_CONTINUE) | 2705 | if (ret != X86EMUL_CONTINUE) |
2623 | return ret; | 2706 | return ret; |
2624 | ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true); | 2707 | ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, |
2708 | true, NULL); | ||
2625 | if (ret != X86EMUL_CONTINUE) | 2709 | if (ret != X86EMUL_CONTINUE) |
2626 | return ret; | 2710 | return ret; |
2627 | 2711 | ||
@@ -2888,10 +2972,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) | |||
2888 | 2972 | ||
2889 | static int em_call(struct x86_emulate_ctxt *ctxt) | 2973 | static int em_call(struct x86_emulate_ctxt *ctxt) |
2890 | { | 2974 | { |
2975 | int rc; | ||
2891 | long rel = ctxt->src.val; | 2976 | long rel = ctxt->src.val; |
2892 | 2977 | ||
2893 | ctxt->src.val = (unsigned long)ctxt->_eip; | 2978 | ctxt->src.val = (unsigned long)ctxt->_eip; |
2894 | jmp_rel(ctxt, rel); | 2979 | rc = jmp_rel(ctxt, rel); |
2980 | if (rc != X86EMUL_CONTINUE) | ||
2981 | return rc; | ||
2895 | return em_push(ctxt); | 2982 | return em_push(ctxt); |
2896 | } | 2983 | } |
2897 | 2984 | ||
@@ -2900,34 +2987,50 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) | |||
2900 | u16 sel, old_cs; | 2987 | u16 sel, old_cs; |
2901 | ulong old_eip; | 2988 | ulong old_eip; |
2902 | int rc; | 2989 | int rc; |
2990 | struct desc_struct old_desc, new_desc; | ||
2991 | const struct x86_emulate_ops *ops = ctxt->ops; | ||
2992 | int cpl = ctxt->ops->cpl(ctxt); | ||
2903 | 2993 | ||
2904 | old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); | ||
2905 | old_eip = ctxt->_eip; | 2994 | old_eip = ctxt->_eip; |
2995 | ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS); | ||
2906 | 2996 | ||
2907 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); | 2997 | memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); |
2908 | if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) | 2998 | rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, |
2999 | &new_desc); | ||
3000 | if (rc != X86EMUL_CONTINUE) | ||
2909 | return X86EMUL_CONTINUE; | 3001 | return X86EMUL_CONTINUE; |
2910 | 3002 | ||
2911 | ctxt->_eip = 0; | 3003 | rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); |
2912 | memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); | 3004 | if (rc != X86EMUL_CONTINUE) |
3005 | goto fail; | ||
2913 | 3006 | ||
2914 | ctxt->src.val = old_cs; | 3007 | ctxt->src.val = old_cs; |
2915 | rc = em_push(ctxt); | 3008 | rc = em_push(ctxt); |
2916 | if (rc != X86EMUL_CONTINUE) | 3009 | if (rc != X86EMUL_CONTINUE) |
2917 | return rc; | 3010 | goto fail; |
2918 | 3011 | ||
2919 | ctxt->src.val = old_eip; | 3012 | ctxt->src.val = old_eip; |
2920 | return em_push(ctxt); | 3013 | rc = em_push(ctxt); |
3014 | /* If we failed, we tainted the memory, but the very least we should | ||
3015 | restore cs */ | ||
3016 | if (rc != X86EMUL_CONTINUE) | ||
3017 | goto fail; | ||
3018 | return rc; | ||
3019 | fail: | ||
3020 | ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); | ||
3021 | return rc; | ||
3022 | |||
2921 | } | 3023 | } |
2922 | 3024 | ||
2923 | static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) | 3025 | static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) |
2924 | { | 3026 | { |
2925 | int rc; | 3027 | int rc; |
3028 | unsigned long eip; | ||
2926 | 3029 | ||
2927 | ctxt->dst.type = OP_REG; | 3030 | rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); |
2928 | ctxt->dst.addr.reg = &ctxt->_eip; | 3031 | if (rc != X86EMUL_CONTINUE) |
2929 | ctxt->dst.bytes = ctxt->op_bytes; | 3032 | return rc; |
2930 | rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); | 3033 | rc = assign_eip_near(ctxt, eip); |
2931 | if (rc != X86EMUL_CONTINUE) | 3034 | if (rc != X86EMUL_CONTINUE) |
2932 | return rc; | 3035 | return rc; |
2933 | rsp_increment(ctxt, ctxt->src.val); | 3036 | rsp_increment(ctxt, ctxt->src.val); |
@@ -3254,20 +3357,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt) | |||
3254 | 3357 | ||
3255 | static int em_loop(struct x86_emulate_ctxt *ctxt) | 3358 | static int em_loop(struct x86_emulate_ctxt *ctxt) |
3256 | { | 3359 | { |
3360 | int rc = X86EMUL_CONTINUE; | ||
3361 | |||
3257 | register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); | 3362 | register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); |
3258 | if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && | 3363 | if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && |
3259 | (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) | 3364 | (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) |
3260 | jmp_rel(ctxt, ctxt->src.val); | 3365 | rc = jmp_rel(ctxt, ctxt->src.val); |
3261 | 3366 | ||
3262 | return X86EMUL_CONTINUE; | 3367 | return rc; |
3263 | } | 3368 | } |
3264 | 3369 | ||
3265 | static int em_jcxz(struct x86_emulate_ctxt *ctxt) | 3370 | static int em_jcxz(struct x86_emulate_ctxt *ctxt) |
3266 | { | 3371 | { |
3372 | int rc = X86EMUL_CONTINUE; | ||
3373 | |||
3267 | if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) | 3374 | if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) |
3268 | jmp_rel(ctxt, ctxt->src.val); | 3375 | rc = jmp_rel(ctxt, ctxt->src.val); |
3269 | 3376 | ||
3270 | return X86EMUL_CONTINUE; | 3377 | return rc; |
3271 | } | 3378 | } |
3272 | 3379 | ||
3273 | static int em_in(struct x86_emulate_ctxt *ctxt) | 3380 | static int em_in(struct x86_emulate_ctxt *ctxt) |
@@ -3355,6 +3462,12 @@ static int em_bswap(struct x86_emulate_ctxt *ctxt) | |||
3355 | return X86EMUL_CONTINUE; | 3462 | return X86EMUL_CONTINUE; |
3356 | } | 3463 | } |
3357 | 3464 | ||
3465 | static int em_clflush(struct x86_emulate_ctxt *ctxt) | ||
3466 | { | ||
3467 | /* emulating clflush regardless of cpuid */ | ||
3468 | return X86EMUL_CONTINUE; | ||
3469 | } | ||
3470 | |||
3358 | static bool valid_cr(int nr) | 3471 | static bool valid_cr(int nr) |
3359 | { | 3472 | { |
3360 | switch (nr) { | 3473 | switch (nr) { |
@@ -3693,6 +3806,16 @@ static const struct opcode group11[] = { | |||
3693 | X7(D(Undefined)), | 3806 | X7(D(Undefined)), |
3694 | }; | 3807 | }; |
3695 | 3808 | ||
3809 | static const struct gprefix pfx_0f_ae_7 = { | ||
3810 | I(SrcMem | ByteOp, em_clflush), N, N, N, | ||
3811 | }; | ||
3812 | |||
3813 | static const struct group_dual group15 = { { | ||
3814 | N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7), | ||
3815 | }, { | ||
3816 | N, N, N, N, N, N, N, N, | ||
3817 | } }; | ||
3818 | |||
3696 | static const struct gprefix pfx_0f_6f_0f_7f = { | 3819 | static const struct gprefix pfx_0f_6f_0f_7f = { |
3697 | I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), | 3820 | I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), |
3698 | }; | 3821 | }; |
@@ -3901,10 +4024,11 @@ static const struct opcode twobyte_table[256] = { | |||
3901 | N, I(ImplicitOps | EmulateOnUD, em_syscall), | 4024 | N, I(ImplicitOps | EmulateOnUD, em_syscall), |
3902 | II(ImplicitOps | Priv, em_clts, clts), N, | 4025 | II(ImplicitOps | Priv, em_clts, clts), N, |
3903 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, | 4026 | DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, |
3904 | N, D(ImplicitOps | ModRM), N, N, | 4027 | N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, |
3905 | /* 0x10 - 0x1F */ | 4028 | /* 0x10 - 0x1F */ |
3906 | N, N, N, N, N, N, N, N, | 4029 | N, N, N, N, N, N, N, N, |
3907 | D(ImplicitOps | ModRM), N, N, N, N, N, N, D(ImplicitOps | ModRM), | 4030 | D(ImplicitOps | ModRM | SrcMem | NoAccess), |
4031 | N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), | ||
3908 | /* 0x20 - 0x2F */ | 4032 | /* 0x20 - 0x2F */ |
3909 | DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), | 4033 | DIP(ModRM | DstMem | Priv | Op3264 | NoMod, cr_read, check_cr_read), |
3910 | DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), | 4034 | DIP(ModRM | DstMem | Priv | Op3264 | NoMod, dr_read, check_dr_read), |
@@ -3956,7 +4080,7 @@ static const struct opcode twobyte_table[256] = { | |||
3956 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), | 4080 | F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), |
3957 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), | 4081 | F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), |
3958 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), | 4082 | F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), |
3959 | D(ModRM), F(DstReg | SrcMem | ModRM, em_imul), | 4083 | GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul), |
3960 | /* 0xB0 - 0xB7 */ | 4084 | /* 0xB0 - 0xB7 */ |
3961 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), | 4085 | I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), |
3962 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), | 4086 | I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), |
@@ -4473,10 +4597,10 @@ done_prefixes: | |||
4473 | /* Decode and fetch the destination operand: register or memory. */ | 4597 | /* Decode and fetch the destination operand: register or memory. */ |
4474 | rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); | 4598 | rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); |
4475 | 4599 | ||
4476 | done: | ||
4477 | if (ctxt->rip_relative) | 4600 | if (ctxt->rip_relative) |
4478 | ctxt->memopp->addr.mem.ea += ctxt->_eip; | 4601 | ctxt->memopp->addr.mem.ea += ctxt->_eip; |
4479 | 4602 | ||
4603 | done: | ||
4480 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; | 4604 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; |
4481 | } | 4605 | } |
4482 | 4606 | ||
@@ -4726,7 +4850,7 @@ special_insn: | |||
4726 | break; | 4850 | break; |
4727 | case 0x70 ... 0x7f: /* jcc (short) */ | 4851 | case 0x70 ... 0x7f: /* jcc (short) */ |
4728 | if (test_cc(ctxt->b, ctxt->eflags)) | 4852 | if (test_cc(ctxt->b, ctxt->eflags)) |
4729 | jmp_rel(ctxt, ctxt->src.val); | 4853 | rc = jmp_rel(ctxt, ctxt->src.val); |
4730 | break; | 4854 | break; |
4731 | case 0x8d: /* lea r16/r32, m */ | 4855 | case 0x8d: /* lea r16/r32, m */ |
4732 | ctxt->dst.val = ctxt->src.addr.mem.ea; | 4856 | ctxt->dst.val = ctxt->src.addr.mem.ea; |
@@ -4756,7 +4880,7 @@ special_insn: | |||
4756 | break; | 4880 | break; |
4757 | case 0xe9: /* jmp rel */ | 4881 | case 0xe9: /* jmp rel */ |
4758 | case 0xeb: /* jmp rel short */ | 4882 | case 0xeb: /* jmp rel short */ |
4759 | jmp_rel(ctxt, ctxt->src.val); | 4883 | rc = jmp_rel(ctxt, ctxt->src.val); |
4760 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ | 4884 | ctxt->dst.type = OP_NONE; /* Disable writeback. */ |
4761 | break; | 4885 | break; |
4762 | case 0xf4: /* hlt */ | 4886 | case 0xf4: /* hlt */ |
@@ -4881,13 +5005,11 @@ twobyte_insn: | |||
4881 | break; | 5005 | break; |
4882 | case 0x80 ... 0x8f: /* jnz rel, etc*/ | 5006 | case 0x80 ... 0x8f: /* jnz rel, etc*/ |
4883 | if (test_cc(ctxt->b, ctxt->eflags)) | 5007 | if (test_cc(ctxt->b, ctxt->eflags)) |
4884 | jmp_rel(ctxt, ctxt->src.val); | 5008 | rc = jmp_rel(ctxt, ctxt->src.val); |
4885 | break; | 5009 | break; |
4886 | case 0x90 ... 0x9f: /* setcc r/m8 */ | 5010 | case 0x90 ... 0x9f: /* setcc r/m8 */ |
4887 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); | 5011 | ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); |
4888 | break; | 5012 | break; |
4889 | case 0xae: /* clflush */ | ||
4890 | break; | ||
4891 | case 0xb6 ... 0xb7: /* movzx */ | 5013 | case 0xb6 ... 0xb7: /* movzx */ |
4892 | ctxt->dst.bytes = ctxt->op_bytes; | 5014 | ctxt->dst.bytes = ctxt->op_bytes; |
4893 | ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val | 5015 | ctxt->dst.val = (ctxt->src.bytes == 1) ? (u8) ctxt->src.val |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 518d86471b76..298781d4cfb4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | |||
262 | return; | 262 | return; |
263 | 263 | ||
264 | timer = &pit->pit_state.timer; | 264 | timer = &pit->pit_state.timer; |
265 | mutex_lock(&pit->pit_state.lock); | ||
265 | if (hrtimer_cancel(timer)) | 266 | if (hrtimer_cancel(timer)) |
266 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 267 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
268 | mutex_unlock(&pit->pit_state.lock); | ||
267 | } | 269 | } |
268 | 270 | ||
269 | static void destroy_pit_timer(struct kvm_pit *pit) | 271 | static void destroy_pit_timer(struct kvm_pit *pit) |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 806d58e3c320..fd49c867b25a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -298,7 +298,7 @@ retry_walk: | |||
298 | } | 298 | } |
299 | #endif | 299 | #endif |
300 | walker->max_level = walker->level; | 300 | walker->max_level = walker->level; |
301 | ASSERT(!is_long_mode(vcpu) && is_pae(vcpu)); | 301 | ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); |
302 | 302 | ||
303 | accessed_dirty = PT_GUEST_ACCESSED_MASK; | 303 | accessed_dirty = PT_GUEST_ACCESSED_MASK; |
304 | pt_access = pte_access = ACC_ALL; | 304 | pt_access = pte_access = ACC_ALL; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 65510f624dfe..7527cefc5a43 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -3251,7 +3251,7 @@ static int wrmsr_interception(struct vcpu_svm *svm) | |||
3251 | msr.host_initiated = false; | 3251 | msr.host_initiated = false; |
3252 | 3252 | ||
3253 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; | 3253 | svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; |
3254 | if (svm_set_msr(&svm->vcpu, &msr)) { | 3254 | if (kvm_set_msr(&svm->vcpu, &msr)) { |
3255 | trace_kvm_msr_write_ex(ecx, data); | 3255 | trace_kvm_msr_write_ex(ecx, data); |
3256 | kvm_inject_gp(&svm->vcpu, 0); | 3256 | kvm_inject_gp(&svm->vcpu, 0); |
3257 | } else { | 3257 | } else { |
@@ -3551,9 +3551,9 @@ static int handle_exit(struct kvm_vcpu *vcpu) | |||
3551 | 3551 | ||
3552 | if (exit_code >= ARRAY_SIZE(svm_exit_handlers) | 3552 | if (exit_code >= ARRAY_SIZE(svm_exit_handlers) |
3553 | || !svm_exit_handlers[exit_code]) { | 3553 | || !svm_exit_handlers[exit_code]) { |
3554 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3554 | WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code); |
3555 | kvm_run->hw.hardware_exit_reason = exit_code; | 3555 | kvm_queue_exception(vcpu, UD_VECTOR); |
3556 | return 0; | 3556 | return 1; |
3557 | } | 3557 | } |
3558 | 3558 | ||
3559 | return svm_exit_handlers[exit_code](svm); | 3559 | return svm_exit_handlers[exit_code](svm); |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0acac81f198b..a8b76c4c95e2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -2659,12 +2659,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2659 | default: | 2659 | default: |
2660 | msr = find_msr_entry(vmx, msr_index); | 2660 | msr = find_msr_entry(vmx, msr_index); |
2661 | if (msr) { | 2661 | if (msr) { |
2662 | u64 old_msr_data = msr->data; | ||
2662 | msr->data = data; | 2663 | msr->data = data; |
2663 | if (msr - vmx->guest_msrs < vmx->save_nmsrs) { | 2664 | if (msr - vmx->guest_msrs < vmx->save_nmsrs) { |
2664 | preempt_disable(); | 2665 | preempt_disable(); |
2665 | kvm_set_shared_msr(msr->index, msr->data, | 2666 | ret = kvm_set_shared_msr(msr->index, msr->data, |
2666 | msr->mask); | 2667 | msr->mask); |
2667 | preempt_enable(); | 2668 | preempt_enable(); |
2669 | if (ret) | ||
2670 | msr->data = old_msr_data; | ||
2668 | } | 2671 | } |
2669 | break; | 2672 | break; |
2670 | } | 2673 | } |
@@ -5291,7 +5294,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu) | |||
5291 | msr.data = data; | 5294 | msr.data = data; |
5292 | msr.index = ecx; | 5295 | msr.index = ecx; |
5293 | msr.host_initiated = false; | 5296 | msr.host_initiated = false; |
5294 | if (vmx_set_msr(vcpu, &msr) != 0) { | 5297 | if (kvm_set_msr(vcpu, &msr) != 0) { |
5295 | trace_kvm_msr_write_ex(ecx, data); | 5298 | trace_kvm_msr_write_ex(ecx, data); |
5296 | kvm_inject_gp(vcpu, 0); | 5299 | kvm_inject_gp(vcpu, 0); |
5297 | return 1; | 5300 | return 1; |
@@ -6743,6 +6746,12 @@ static int handle_invept(struct kvm_vcpu *vcpu) | |||
6743 | return 1; | 6746 | return 1; |
6744 | } | 6747 | } |
6745 | 6748 | ||
6749 | static int handle_invvpid(struct kvm_vcpu *vcpu) | ||
6750 | { | ||
6751 | kvm_queue_exception(vcpu, UD_VECTOR); | ||
6752 | return 1; | ||
6753 | } | ||
6754 | |||
6746 | /* | 6755 | /* |
6747 | * The exit handlers return 1 if the exit was handled fully and guest execution | 6756 | * The exit handlers return 1 if the exit was handled fully and guest execution |
6748 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 6757 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -6788,6 +6797,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { | |||
6788 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, | 6797 | [EXIT_REASON_MWAIT_INSTRUCTION] = handle_mwait, |
6789 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, | 6798 | [EXIT_REASON_MONITOR_INSTRUCTION] = handle_monitor, |
6790 | [EXIT_REASON_INVEPT] = handle_invept, | 6799 | [EXIT_REASON_INVEPT] = handle_invept, |
6800 | [EXIT_REASON_INVVPID] = handle_invvpid, | ||
6791 | }; | 6801 | }; |
6792 | 6802 | ||
6793 | static const int kvm_vmx_max_exit_handlers = | 6803 | static const int kvm_vmx_max_exit_handlers = |
@@ -7023,7 +7033,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
7023 | case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: | 7033 | case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD: |
7024 | case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: | 7034 | case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE: |
7025 | case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: | 7035 | case EXIT_REASON_VMOFF: case EXIT_REASON_VMON: |
7026 | case EXIT_REASON_INVEPT: | 7036 | case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID: |
7027 | /* | 7037 | /* |
7028 | * VMX instructions trap unconditionally. This allows L1 to | 7038 | * VMX instructions trap unconditionally. This allows L1 to |
7029 | * emulate them for its L2 guest, i.e., allows 3-level nesting! | 7039 | * emulate them for its L2 guest, i.e., allows 3-level nesting! |
@@ -7164,10 +7174,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
7164 | && kvm_vmx_exit_handlers[exit_reason]) | 7174 | && kvm_vmx_exit_handlers[exit_reason]) |
7165 | return kvm_vmx_exit_handlers[exit_reason](vcpu); | 7175 | return kvm_vmx_exit_handlers[exit_reason](vcpu); |
7166 | else { | 7176 | else { |
7167 | vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; | 7177 | WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason); |
7168 | vcpu->run->hw.hardware_exit_reason = exit_reason; | 7178 | kvm_queue_exception(vcpu, UD_VECTOR); |
7179 | return 1; | ||
7169 | } | 7180 | } |
7170 | return 0; | ||
7171 | } | 7181 | } |
7172 | 7182 | ||
7173 | static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | 7183 | static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 34c8f94331f8..0033df32a745 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -229,20 +229,25 @@ static void kvm_shared_msr_cpu_online(void) | |||
229 | shared_msr_update(i, shared_msrs_global.msrs[i]); | 229 | shared_msr_update(i, shared_msrs_global.msrs[i]); |
230 | } | 230 | } |
231 | 231 | ||
232 | void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) | 232 | int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) |
233 | { | 233 | { |
234 | unsigned int cpu = smp_processor_id(); | 234 | unsigned int cpu = smp_processor_id(); |
235 | struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); | 235 | struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); |
236 | int err; | ||
236 | 237 | ||
237 | if (((value ^ smsr->values[slot].curr) & mask) == 0) | 238 | if (((value ^ smsr->values[slot].curr) & mask) == 0) |
238 | return; | 239 | return 0; |
239 | smsr->values[slot].curr = value; | 240 | smsr->values[slot].curr = value; |
240 | wrmsrl(shared_msrs_global.msrs[slot], value); | 241 | err = wrmsrl_safe(shared_msrs_global.msrs[slot], value); |
242 | if (err) | ||
243 | return 1; | ||
244 | |||
241 | if (!smsr->registered) { | 245 | if (!smsr->registered) { |
242 | smsr->urn.on_user_return = kvm_on_user_return; | 246 | smsr->urn.on_user_return = kvm_on_user_return; |
243 | user_return_notifier_register(&smsr->urn); | 247 | user_return_notifier_register(&smsr->urn); |
244 | smsr->registered = true; | 248 | smsr->registered = true; |
245 | } | 249 | } |
250 | return 0; | ||
246 | } | 251 | } |
247 | EXPORT_SYMBOL_GPL(kvm_set_shared_msr); | 252 | EXPORT_SYMBOL_GPL(kvm_set_shared_msr); |
248 | 253 | ||
@@ -987,7 +992,6 @@ void kvm_enable_efer_bits(u64 mask) | |||
987 | } | 992 | } |
988 | EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); | 993 | EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); |
989 | 994 | ||
990 | |||
991 | /* | 995 | /* |
992 | * Writes msr value into into the appropriate "register". | 996 | * Writes msr value into into the appropriate "register". |
993 | * Returns 0 on success, non-0 otherwise. | 997 | * Returns 0 on success, non-0 otherwise. |
@@ -995,8 +999,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); | |||
995 | */ | 999 | */ |
996 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) | 1000 | int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) |
997 | { | 1001 | { |
1002 | switch (msr->index) { | ||
1003 | case MSR_FS_BASE: | ||
1004 | case MSR_GS_BASE: | ||
1005 | case MSR_KERNEL_GS_BASE: | ||
1006 | case MSR_CSTAR: | ||
1007 | case MSR_LSTAR: | ||
1008 | if (is_noncanonical_address(msr->data)) | ||
1009 | return 1; | ||
1010 | break; | ||
1011 | case MSR_IA32_SYSENTER_EIP: | ||
1012 | case MSR_IA32_SYSENTER_ESP: | ||
1013 | /* | ||
1014 | * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if | ||
1015 | * non-canonical address is written on Intel but not on | ||
1016 | * AMD (which ignores the top 32-bits, because it does | ||
1017 | * not implement 64-bit SYSENTER). | ||
1018 | * | ||
1019 | * 64-bit code should hence be able to write a non-canonical | ||
1020 | * value on AMD. Making the address canonical ensures that | ||
1021 | * vmentry does not fail on Intel after writing a non-canonical | ||
1022 | * value, and that something deterministic happens if the guest | ||
1023 | * invokes 64-bit SYSENTER. | ||
1024 | */ | ||
1025 | msr->data = get_canonical(msr->data); | ||
1026 | } | ||
998 | return kvm_x86_ops->set_msr(vcpu, msr); | 1027 | return kvm_x86_ops->set_msr(vcpu, msr); |
999 | } | 1028 | } |
1029 | EXPORT_SYMBOL_GPL(kvm_set_msr); | ||
1000 | 1030 | ||
1001 | /* | 1031 | /* |
1002 | * Adapt set_msr() to msr_io()'s calling convention | 1032 | * Adapt set_msr() to msr_io()'s calling convention |
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 28be31f49250..ea53b04993f2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -1080,6 +1080,7 @@ void kvm_device_get(struct kvm_device *dev); | |||
1080 | void kvm_device_put(struct kvm_device *dev); | 1080 | void kvm_device_put(struct kvm_device *dev); |
1081 | struct kvm_device *kvm_device_from_filp(struct file *filp); | 1081 | struct kvm_device *kvm_device_from_filp(struct file *filp); |
1082 | int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); | 1082 | int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); |
1083 | void kvm_unregister_device_ops(u32 type); | ||
1083 | 1084 | ||
1084 | extern struct kvm_device_ops kvm_mpic_ops; | 1085 | extern struct kvm_device_ops kvm_mpic_ops; |
1085 | extern struct kvm_device_ops kvm_xics_ops; | 1086 | extern struct kvm_device_ops kvm_xics_ops; |
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index e51d9f9b995f..c1e6ae989a43 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c | |||
@@ -43,13 +43,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm, | |||
43 | gfn_t base_gfn, unsigned long npages); | 43 | gfn_t base_gfn, unsigned long npages); |
44 | 44 | ||
45 | static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, | 45 | static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, |
46 | unsigned long size) | 46 | unsigned long npages) |
47 | { | 47 | { |
48 | gfn_t end_gfn; | 48 | gfn_t end_gfn; |
49 | pfn_t pfn; | 49 | pfn_t pfn; |
50 | 50 | ||
51 | pfn = gfn_to_pfn_memslot(slot, gfn); | 51 | pfn = gfn_to_pfn_memslot(slot, gfn); |
52 | end_gfn = gfn + (size >> PAGE_SHIFT); | 52 | end_gfn = gfn + npages; |
53 | gfn += 1; | 53 | gfn += 1; |
54 | 54 | ||
55 | if (is_error_noslot_pfn(pfn)) | 55 | if (is_error_noslot_pfn(pfn)) |
@@ -119,7 +119,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) | |||
119 | * Pin all pages we are about to map in memory. This is | 119 | * Pin all pages we are about to map in memory. This is |
120 | * important because we unmap and unpin in 4kb steps later. | 120 | * important because we unmap and unpin in 4kb steps later. |
121 | */ | 121 | */ |
122 | pfn = kvm_pin_pages(slot, gfn, page_size); | 122 | pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT); |
123 | if (is_error_noslot_pfn(pfn)) { | 123 | if (is_error_noslot_pfn(pfn)) { |
124 | gfn += 1; | 124 | gfn += 1; |
125 | continue; | 125 | continue; |
@@ -131,7 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) | |||
131 | if (r) { | 131 | if (r) { |
132 | printk(KERN_ERR "kvm_iommu_map_address:" | 132 | printk(KERN_ERR "kvm_iommu_map_address:" |
133 | "iommu failed to map pfn=%llx\n", pfn); | 133 | "iommu failed to map pfn=%llx\n", pfn); |
134 | kvm_unpin_pages(kvm, pfn, page_size); | 134 | kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT); |
135 | goto unmap_pages; | 135 | goto unmap_pages; |
136 | } | 136 | } |
137 | 137 | ||
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 384eaa7b02fa..25ffac9e947d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -2354,6 +2354,12 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type) | |||
2354 | return 0; | 2354 | return 0; |
2355 | } | 2355 | } |
2356 | 2356 | ||
2357 | void kvm_unregister_device_ops(u32 type) | ||
2358 | { | ||
2359 | if (kvm_device_ops_table[type] != NULL) | ||
2360 | kvm_device_ops_table[type] = NULL; | ||
2361 | } | ||
2362 | |||
2357 | static int kvm_ioctl_create_device(struct kvm *kvm, | 2363 | static int kvm_ioctl_create_device(struct kvm *kvm, |
2358 | struct kvm_create_device *cd) | 2364 | struct kvm_create_device *cd) |
2359 | { | 2365 | { |
@@ -3328,5 +3334,6 @@ void kvm_exit(void) | |||
3328 | kvm_arch_exit(); | 3334 | kvm_arch_exit(); |
3329 | kvm_irqfd_exit(); | 3335 | kvm_irqfd_exit(); |
3330 | free_cpumask_var(cpus_hardware_enabled); | 3336 | free_cpumask_var(cpus_hardware_enabled); |
3337 | kvm_vfio_ops_exit(); | ||
3331 | } | 3338 | } |
3332 | EXPORT_SYMBOL_GPL(kvm_exit); | 3339 | EXPORT_SYMBOL_GPL(kvm_exit); |
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 281e7cf2b8e5..620e37f741b8 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c | |||
@@ -283,3 +283,8 @@ int kvm_vfio_ops_init(void) | |||
283 | { | 283 | { |
284 | return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); | 284 | return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO); |
285 | } | 285 | } |
286 | |||
287 | void kvm_vfio_ops_exit(void) | ||
288 | { | ||
289 | kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO); | ||
290 | } | ||
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h index 92eac75d6b62..ab88c7dc0514 100644 --- a/virt/kvm/vfio.h +++ b/virt/kvm/vfio.h | |||
@@ -3,11 +3,15 @@ | |||
3 | 3 | ||
4 | #ifdef CONFIG_KVM_VFIO | 4 | #ifdef CONFIG_KVM_VFIO |
5 | int kvm_vfio_ops_init(void); | 5 | int kvm_vfio_ops_init(void); |
6 | void kvm_vfio_ops_exit(void); | ||
6 | #else | 7 | #else |
7 | static inline int kvm_vfio_ops_init(void) | 8 | static inline int kvm_vfio_ops_init(void) |
8 | { | 9 | { |
9 | return 0; | 10 | return 0; |
10 | } | 11 | } |
12 | static inline void kvm_vfio_ops_exit(void) | ||
13 | { | ||
14 | } | ||
11 | #endif | 15 | #endif |
12 | 16 | ||
13 | #endif | 17 | #endif |