diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-04 15:16:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-04 15:16:46 -0400 |
commit | 8533ce72718871fb528d853391746f36243273af (patch) | |
tree | a3ac06520e45cb6a472ed83979b0d48b6c2cec15 /arch/x86/kvm | |
parent | c9b88e9581828bb8bba06c5e7ee8ed1761172b6e (diff) | |
parent | 42cbc04fd3b5e3f9b011bf9fa3ce0b3d1e10b58b (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM changes from Paolo Bonzini:
"These are the x86, MIPS and s390 changes; PPC and ARM will come in a
few days.
MIPS and s390 have little going on this release; just bugfixes, some
small, some larger.
The highlights for x86 are nested VMX improvements (Jan Kiszka),
optimizations for old processor (up to Nehalem, by me and Bandan Das),
and a lot of x86 emulator bugfixes (Nadav Amit).
Stephen Rothwell reported a trivial conflict with the tracing branch"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (104 commits)
x86/kvm: Resolve shadow warnings in macro expansion
KVM: s390: rework broken SIGP STOP interrupt handling
KVM: x86: always exit on EOIs for interrupts listed in the IOAPIC redir table
KVM: vmx: remove duplicate vmx_mpx_supported() prototype
KVM: s390: Fix memory leak on busy SIGP stop
x86/kvm: Resolve shadow warning from min macro
kvm: Resolve missing-field-initializers warnings
Replace NR_VMX_MSR with its definition
KVM: x86: Assertions to check no overrun in MSR lists
KVM: x86: set rflags.rf during fault injection
KVM: x86: Setting rflags.rf during rep-string emulation
KVM: x86: DR6/7.RTM cannot be written
KVM: nVMX: clean up nested_release_vmcs12 and code around it
KVM: nVMX: fix lifetime issues for vmcs02
KVM: x86: Defining missing x86 vectors
KVM: x86: emulator injects #DB when RFLAGS.RF is set
KVM: x86: Cleanup of rflags.rf cleaning
KVM: x86: Clear rflags.rf on emulated instructions
KVM: x86: popf emulation should not change RF
KVM: x86: Clearing rflags.rf upon skipped emulated instruction
...
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/cpuid.h | 8 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 494 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmutrace.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.c | 9 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 57 | ||||
-rw-r--r-- | arch/x86/kvm/trace.h | 6 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 239 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 171 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 27 |
10 files changed, 641 insertions, 378 deletions
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index f9087315e0cd..a5380590ab0e 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -95,4 +95,12 @@ static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu) | |||
95 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); | 95 | best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); |
96 | return best && (best->edx & bit(X86_FEATURE_GBPAGES)); | 96 | return best && (best->edx & bit(X86_FEATURE_GBPAGES)); |
97 | } | 97 | } |
98 | |||
99 | static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) | ||
100 | { | ||
101 | struct kvm_cpuid_entry2 *best; | ||
102 | |||
103 | best = kvm_find_cpuid_entry(vcpu, 7, 0); | ||
104 | return best && (best->ebx & bit(X86_FEATURE_RTM)); | ||
105 | } | ||
98 | #endif | 106 | #endif |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e4e833d3d7d7..56657b0bb3bb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -162,6 +162,10 @@ | |||
162 | #define NoWrite ((u64)1 << 45) /* No writeback */ | 162 | #define NoWrite ((u64)1 << 45) /* No writeback */ |
163 | #define SrcWrite ((u64)1 << 46) /* Write back src operand */ | 163 | #define SrcWrite ((u64)1 << 46) /* Write back src operand */ |
164 | #define NoMod ((u64)1 << 47) /* Mod field is ignored */ | 164 | #define NoMod ((u64)1 << 47) /* Mod field is ignored */ |
165 | #define Intercept ((u64)1 << 48) /* Has valid intercept field */ | ||
166 | #define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */ | ||
167 | #define NoBigReal ((u64)1 << 50) /* No big real mode */ | ||
168 | #define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */ | ||
165 | 169 | ||
166 | #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) | 170 | #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) |
167 | 171 | ||
@@ -426,6 +430,7 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, | |||
426 | .modrm_reg = ctxt->modrm_reg, | 430 | .modrm_reg = ctxt->modrm_reg, |
427 | .modrm_rm = ctxt->modrm_rm, | 431 | .modrm_rm = ctxt->modrm_rm, |
428 | .src_val = ctxt->src.val64, | 432 | .src_val = ctxt->src.val64, |
433 | .dst_val = ctxt->dst.val64, | ||
429 | .src_bytes = ctxt->src.bytes, | 434 | .src_bytes = ctxt->src.bytes, |
430 | .dst_bytes = ctxt->dst.bytes, | 435 | .dst_bytes = ctxt->dst.bytes, |
431 | .ad_bytes = ctxt->ad_bytes, | 436 | .ad_bytes = ctxt->ad_bytes, |
@@ -511,12 +516,6 @@ static u32 desc_limit_scaled(struct desc_struct *desc) | |||
511 | return desc->g ? (limit << 12) | 0xfff : limit; | 516 | return desc->g ? (limit << 12) | 0xfff : limit; |
512 | } | 517 | } |
513 | 518 | ||
514 | static void set_seg_override(struct x86_emulate_ctxt *ctxt, int seg) | ||
515 | { | ||
516 | ctxt->has_seg_override = true; | ||
517 | ctxt->seg_override = seg; | ||
518 | } | ||
519 | |||
520 | static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) | 519 | static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) |
521 | { | 520 | { |
522 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) | 521 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) |
@@ -525,14 +524,6 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) | |||
525 | return ctxt->ops->get_cached_segment_base(ctxt, seg); | 524 | return ctxt->ops->get_cached_segment_base(ctxt, seg); |
526 | } | 525 | } |
527 | 526 | ||
528 | static unsigned seg_override(struct x86_emulate_ctxt *ctxt) | ||
529 | { | ||
530 | if (!ctxt->has_seg_override) | ||
531 | return 0; | ||
532 | |||
533 | return ctxt->seg_override; | ||
534 | } | ||
535 | |||
536 | static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, | 527 | static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, |
537 | u32 error, bool valid) | 528 | u32 error, bool valid) |
538 | { | 529 | { |
@@ -651,7 +642,12 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, | |||
651 | if (!fetch && (desc.type & 8) && !(desc.type & 2)) | 642 | if (!fetch && (desc.type & 8) && !(desc.type & 2)) |
652 | goto bad; | 643 | goto bad; |
653 | lim = desc_limit_scaled(&desc); | 644 | lim = desc_limit_scaled(&desc); |
654 | if ((desc.type & 8) || !(desc.type & 4)) { | 645 | if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch && |
646 | (ctxt->d & NoBigReal)) { | ||
647 | /* la is between zero and 0xffff */ | ||
648 | if (la > 0xffff || (u32)(la + size - 1) > 0xffff) | ||
649 | goto bad; | ||
650 | } else if ((desc.type & 8) || !(desc.type & 4)) { | ||
655 | /* expand-up segment */ | 651 | /* expand-up segment */ |
656 | if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) | 652 | if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) |
657 | goto bad; | 653 | goto bad; |
@@ -716,68 +712,71 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, | |||
716 | } | 712 | } |
717 | 713 | ||
718 | /* | 714 | /* |
719 | * Fetch the next byte of the instruction being emulated which is pointed to | 715 | * Prefetch the remaining bytes of the instruction without crossing page |
720 | * by ctxt->_eip, then increment ctxt->_eip. | ||
721 | * | ||
722 | * Also prefetch the remaining bytes of the instruction without crossing page | ||
723 | * boundary if they are not in fetch_cache yet. | 716 | * boundary if they are not in fetch_cache yet. |
724 | */ | 717 | */ |
725 | static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, u8 *dest) | 718 | static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size) |
726 | { | 719 | { |
727 | struct fetch_cache *fc = &ctxt->fetch; | ||
728 | int rc; | 720 | int rc; |
729 | int size, cur_size; | 721 | unsigned size; |
730 | 722 | unsigned long linear; | |
731 | if (ctxt->_eip == fc->end) { | 723 | int cur_size = ctxt->fetch.end - ctxt->fetch.data; |
732 | unsigned long linear; | 724 | struct segmented_address addr = { .seg = VCPU_SREG_CS, |
733 | struct segmented_address addr = { .seg = VCPU_SREG_CS, | 725 | .ea = ctxt->eip + cur_size }; |
734 | .ea = ctxt->_eip }; | 726 | |
735 | cur_size = fc->end - fc->start; | 727 | size = 15UL ^ cur_size; |
736 | size = min(15UL - cur_size, | 728 | rc = __linearize(ctxt, addr, size, false, true, &linear); |
737 | PAGE_SIZE - offset_in_page(ctxt->_eip)); | 729 | if (unlikely(rc != X86EMUL_CONTINUE)) |
738 | rc = __linearize(ctxt, addr, size, false, true, &linear); | 730 | return rc; |
739 | if (unlikely(rc != X86EMUL_CONTINUE)) | ||
740 | return rc; | ||
741 | rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size, | ||
742 | size, &ctxt->exception); | ||
743 | if (unlikely(rc != X86EMUL_CONTINUE)) | ||
744 | return rc; | ||
745 | fc->end += size; | ||
746 | } | ||
747 | *dest = fc->data[ctxt->_eip - fc->start]; | ||
748 | ctxt->_eip++; | ||
749 | return X86EMUL_CONTINUE; | ||
750 | } | ||
751 | 731 | ||
752 | static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, | 732 | size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear)); |
753 | void *dest, unsigned size) | ||
754 | { | ||
755 | int rc; | ||
756 | 733 | ||
757 | /* x86 instructions are limited to 15 bytes. */ | 734 | /* |
758 | if (unlikely(ctxt->_eip + size - ctxt->eip > 15)) | 735 | * One instruction can only straddle two pages, |
736 | * and one has been loaded at the beginning of | ||
737 | * x86_decode_insn. So, if not enough bytes | ||
738 | * still, we must have hit the 15-byte boundary. | ||
739 | */ | ||
740 | if (unlikely(size < op_size)) | ||
759 | return X86EMUL_UNHANDLEABLE; | 741 | return X86EMUL_UNHANDLEABLE; |
760 | while (size--) { | 742 | rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end, |
761 | rc = do_insn_fetch_byte(ctxt, dest++); | 743 | size, &ctxt->exception); |
762 | if (rc != X86EMUL_CONTINUE) | 744 | if (unlikely(rc != X86EMUL_CONTINUE)) |
763 | return rc; | 745 | return rc; |
764 | } | 746 | ctxt->fetch.end += size; |
765 | return X86EMUL_CONTINUE; | 747 | return X86EMUL_CONTINUE; |
766 | } | 748 | } |
767 | 749 | ||
750 | static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, | ||
751 | unsigned size) | ||
752 | { | ||
753 | if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size)) | ||
754 | return __do_insn_fetch_bytes(ctxt, size); | ||
755 | else | ||
756 | return X86EMUL_CONTINUE; | ||
757 | } | ||
758 | |||
768 | /* Fetch next part of the instruction being emulated. */ | 759 | /* Fetch next part of the instruction being emulated. */ |
769 | #define insn_fetch(_type, _ctxt) \ | 760 | #define insn_fetch(_type, _ctxt) \ |
770 | ({ unsigned long _x; \ | 761 | ({ _type _x; \ |
771 | rc = do_insn_fetch(_ctxt, &_x, sizeof(_type)); \ | 762 | \ |
763 | rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \ | ||
772 | if (rc != X86EMUL_CONTINUE) \ | 764 | if (rc != X86EMUL_CONTINUE) \ |
773 | goto done; \ | 765 | goto done; \ |
774 | (_type)_x; \ | 766 | ctxt->_eip += sizeof(_type); \ |
767 | _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \ | ||
768 | ctxt->fetch.ptr += sizeof(_type); \ | ||
769 | _x; \ | ||
775 | }) | 770 | }) |
776 | 771 | ||
777 | #define insn_fetch_arr(_arr, _size, _ctxt) \ | 772 | #define insn_fetch_arr(_arr, _size, _ctxt) \ |
778 | ({ rc = do_insn_fetch(_ctxt, _arr, (_size)); \ | 773 | ({ \ |
774 | rc = do_insn_fetch_bytes(_ctxt, _size); \ | ||
779 | if (rc != X86EMUL_CONTINUE) \ | 775 | if (rc != X86EMUL_CONTINUE) \ |
780 | goto done; \ | 776 | goto done; \ |
777 | ctxt->_eip += (_size); \ | ||
778 | memcpy(_arr, ctxt->fetch.ptr, _size); \ | ||
779 | ctxt->fetch.ptr += (_size); \ | ||
781 | }) | 780 | }) |
782 | 781 | ||
783 | /* | 782 | /* |
@@ -1063,19 +1062,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
1063 | struct operand *op) | 1062 | struct operand *op) |
1064 | { | 1063 | { |
1065 | u8 sib; | 1064 | u8 sib; |
1066 | int index_reg = 0, base_reg = 0, scale; | 1065 | int index_reg, base_reg, scale; |
1067 | int rc = X86EMUL_CONTINUE; | 1066 | int rc = X86EMUL_CONTINUE; |
1068 | ulong modrm_ea = 0; | 1067 | ulong modrm_ea = 0; |
1069 | 1068 | ||
1070 | if (ctxt->rex_prefix) { | 1069 | ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */ |
1071 | ctxt->modrm_reg = (ctxt->rex_prefix & 4) << 1; /* REX.R */ | 1070 | index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */ |
1072 | index_reg = (ctxt->rex_prefix & 2) << 2; /* REX.X */ | 1071 | base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */ |
1073 | ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */ | ||
1074 | } | ||
1075 | 1072 | ||
1076 | ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6; | 1073 | ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6; |
1077 | ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; | 1074 | ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; |
1078 | ctxt->modrm_rm |= (ctxt->modrm & 0x07); | 1075 | ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07); |
1079 | ctxt->modrm_seg = VCPU_SREG_DS; | 1076 | ctxt->modrm_seg = VCPU_SREG_DS; |
1080 | 1077 | ||
1081 | if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) { | 1078 | if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) { |
@@ -1093,7 +1090,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
1093 | if (ctxt->d & Mmx) { | 1090 | if (ctxt->d & Mmx) { |
1094 | op->type = OP_MM; | 1091 | op->type = OP_MM; |
1095 | op->bytes = 8; | 1092 | op->bytes = 8; |
1096 | op->addr.xmm = ctxt->modrm_rm & 7; | 1093 | op->addr.mm = ctxt->modrm_rm & 7; |
1097 | return rc; | 1094 | return rc; |
1098 | } | 1095 | } |
1099 | fetch_register_operand(op); | 1096 | fetch_register_operand(op); |
@@ -1190,6 +1187,9 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
1190 | } | 1187 | } |
1191 | } | 1188 | } |
1192 | op->addr.mem.ea = modrm_ea; | 1189 | op->addr.mem.ea = modrm_ea; |
1190 | if (ctxt->ad_bytes != 8) | ||
1191 | ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea; | ||
1192 | |||
1193 | done: | 1193 | done: |
1194 | return rc; | 1194 | return rc; |
1195 | } | 1195 | } |
@@ -1220,12 +1220,14 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt) | |||
1220 | long sv = 0, mask; | 1220 | long sv = 0, mask; |
1221 | 1221 | ||
1222 | if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) { | 1222 | if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) { |
1223 | mask = ~(ctxt->dst.bytes * 8 - 1); | 1223 | mask = ~((long)ctxt->dst.bytes * 8 - 1); |
1224 | 1224 | ||
1225 | if (ctxt->src.bytes == 2) | 1225 | if (ctxt->src.bytes == 2) |
1226 | sv = (s16)ctxt->src.val & (s16)mask; | 1226 | sv = (s16)ctxt->src.val & (s16)mask; |
1227 | else if (ctxt->src.bytes == 4) | 1227 | else if (ctxt->src.bytes == 4) |
1228 | sv = (s32)ctxt->src.val & (s32)mask; | 1228 | sv = (s32)ctxt->src.val & (s32)mask; |
1229 | else | ||
1230 | sv = (s64)ctxt->src.val & (s64)mask; | ||
1229 | 1231 | ||
1230 | ctxt->dst.addr.mem.ea += (sv >> 3); | 1232 | ctxt->dst.addr.mem.ea += (sv >> 3); |
1231 | } | 1233 | } |
@@ -1315,8 +1317,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, | |||
1315 | in_page = (ctxt->eflags & EFLG_DF) ? | 1317 | in_page = (ctxt->eflags & EFLG_DF) ? |
1316 | offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : | 1318 | offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : |
1317 | PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); | 1319 | PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); |
1318 | n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, | 1320 | n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count); |
1319 | count); | ||
1320 | if (n == 0) | 1321 | if (n == 0) |
1321 | n = 1; | 1322 | n = 1; |
1322 | rc->pos = rc->end = 0; | 1323 | rc->pos = rc->end = 0; |
@@ -1358,17 +1359,19 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | |||
1358 | u16 selector, struct desc_ptr *dt) | 1359 | u16 selector, struct desc_ptr *dt) |
1359 | { | 1360 | { |
1360 | const struct x86_emulate_ops *ops = ctxt->ops; | 1361 | const struct x86_emulate_ops *ops = ctxt->ops; |
1362 | u32 base3 = 0; | ||
1361 | 1363 | ||
1362 | if (selector & 1 << 2) { | 1364 | if (selector & 1 << 2) { |
1363 | struct desc_struct desc; | 1365 | struct desc_struct desc; |
1364 | u16 sel; | 1366 | u16 sel; |
1365 | 1367 | ||
1366 | memset (dt, 0, sizeof *dt); | 1368 | memset (dt, 0, sizeof *dt); |
1367 | if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR)) | 1369 | if (!ops->get_segment(ctxt, &sel, &desc, &base3, |
1370 | VCPU_SREG_LDTR)) | ||
1368 | return; | 1371 | return; |
1369 | 1372 | ||
1370 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ | 1373 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ |
1371 | dt->address = get_desc_base(&desc); | 1374 | dt->address = get_desc_base(&desc) | ((u64)base3 << 32); |
1372 | } else | 1375 | } else |
1373 | ops->get_gdt(ctxt, dt); | 1376 | ops->get_gdt(ctxt, dt); |
1374 | } | 1377 | } |
@@ -1422,6 +1425,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1422 | ulong desc_addr; | 1425 | ulong desc_addr; |
1423 | int ret; | 1426 | int ret; |
1424 | u16 dummy; | 1427 | u16 dummy; |
1428 | u32 base3 = 0; | ||
1425 | 1429 | ||
1426 | memset(&seg_desc, 0, sizeof seg_desc); | 1430 | memset(&seg_desc, 0, sizeof seg_desc); |
1427 | 1431 | ||
@@ -1538,9 +1542,14 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1538 | ret = write_segment_descriptor(ctxt, selector, &seg_desc); | 1542 | ret = write_segment_descriptor(ctxt, selector, &seg_desc); |
1539 | if (ret != X86EMUL_CONTINUE) | 1543 | if (ret != X86EMUL_CONTINUE) |
1540 | return ret; | 1544 | return ret; |
1545 | } else if (ctxt->mode == X86EMUL_MODE_PROT64) { | ||
1546 | ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3, | ||
1547 | sizeof(base3), &ctxt->exception); | ||
1548 | if (ret != X86EMUL_CONTINUE) | ||
1549 | return ret; | ||
1541 | } | 1550 | } |
1542 | load: | 1551 | load: |
1543 | ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg); | 1552 | ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); |
1544 | return X86EMUL_CONTINUE; | 1553 | return X86EMUL_CONTINUE; |
1545 | exception: | 1554 | exception: |
1546 | emulate_exception(ctxt, err_vec, err_code, true); | 1555 | emulate_exception(ctxt, err_vec, err_code, true); |
@@ -1575,34 +1584,28 @@ static void write_register_operand(struct operand *op) | |||
1575 | 1584 | ||
1576 | static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) | 1585 | static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) |
1577 | { | 1586 | { |
1578 | int rc; | ||
1579 | |||
1580 | switch (op->type) { | 1587 | switch (op->type) { |
1581 | case OP_REG: | 1588 | case OP_REG: |
1582 | write_register_operand(op); | 1589 | write_register_operand(op); |
1583 | break; | 1590 | break; |
1584 | case OP_MEM: | 1591 | case OP_MEM: |
1585 | if (ctxt->lock_prefix) | 1592 | if (ctxt->lock_prefix) |
1586 | rc = segmented_cmpxchg(ctxt, | 1593 | return segmented_cmpxchg(ctxt, |
1594 | op->addr.mem, | ||
1595 | &op->orig_val, | ||
1596 | &op->val, | ||
1597 | op->bytes); | ||
1598 | else | ||
1599 | return segmented_write(ctxt, | ||
1587 | op->addr.mem, | 1600 | op->addr.mem, |
1588 | &op->orig_val, | ||
1589 | &op->val, | 1601 | &op->val, |
1590 | op->bytes); | 1602 | op->bytes); |
1591 | else | ||
1592 | rc = segmented_write(ctxt, | ||
1593 | op->addr.mem, | ||
1594 | &op->val, | ||
1595 | op->bytes); | ||
1596 | if (rc != X86EMUL_CONTINUE) | ||
1597 | return rc; | ||
1598 | break; | 1603 | break; |
1599 | case OP_MEM_STR: | 1604 | case OP_MEM_STR: |
1600 | rc = segmented_write(ctxt, | 1605 | return segmented_write(ctxt, |
1601 | op->addr.mem, | 1606 | op->addr.mem, |
1602 | op->data, | 1607 | op->data, |
1603 | op->bytes * op->count); | 1608 | op->bytes * op->count); |
1604 | if (rc != X86EMUL_CONTINUE) | ||
1605 | return rc; | ||
1606 | break; | 1609 | break; |
1607 | case OP_XMM: | 1610 | case OP_XMM: |
1608 | write_sse_reg(ctxt, &op->vec_val, op->addr.xmm); | 1611 | write_sse_reg(ctxt, &op->vec_val, op->addr.xmm); |
@@ -1671,7 +1674,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, | |||
1671 | return rc; | 1674 | return rc; |
1672 | 1675 | ||
1673 | change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF | 1676 | change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF |
1674 | | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID; | 1677 | | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID; |
1675 | 1678 | ||
1676 | switch(ctxt->mode) { | 1679 | switch(ctxt->mode) { |
1677 | case X86EMUL_MODE_PROT64: | 1680 | case X86EMUL_MODE_PROT64: |
@@ -1754,6 +1757,9 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) | |||
1754 | if (rc != X86EMUL_CONTINUE) | 1757 | if (rc != X86EMUL_CONTINUE) |
1755 | return rc; | 1758 | return rc; |
1756 | 1759 | ||
1760 | if (ctxt->modrm_reg == VCPU_SREG_SS) | ||
1761 | ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; | ||
1762 | |||
1757 | rc = load_segment_descriptor(ctxt, (u16)selector, seg); | 1763 | rc = load_segment_descriptor(ctxt, (u16)selector, seg); |
1758 | return rc; | 1764 | return rc; |
1759 | } | 1765 | } |
@@ -1991,6 +1997,9 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) | |||
1991 | { | 1997 | { |
1992 | u64 old = ctxt->dst.orig_val64; | 1998 | u64 old = ctxt->dst.orig_val64; |
1993 | 1999 | ||
2000 | if (ctxt->dst.bytes == 16) | ||
2001 | return X86EMUL_UNHANDLEABLE; | ||
2002 | |||
1994 | if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || | 2003 | if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || |
1995 | ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { | 2004 | ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { |
1996 | *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); | 2005 | *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); |
@@ -2017,6 +2026,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) | |||
2017 | { | 2026 | { |
2018 | int rc; | 2027 | int rc; |
2019 | unsigned long cs; | 2028 | unsigned long cs; |
2029 | int cpl = ctxt->ops->cpl(ctxt); | ||
2020 | 2030 | ||
2021 | rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); | 2031 | rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); |
2022 | if (rc != X86EMUL_CONTINUE) | 2032 | if (rc != X86EMUL_CONTINUE) |
@@ -2026,6 +2036,9 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) | |||
2026 | rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); | 2036 | rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); |
2027 | if (rc != X86EMUL_CONTINUE) | 2037 | if (rc != X86EMUL_CONTINUE) |
2028 | return rc; | 2038 | return rc; |
2039 | /* Outer-privilege level return is not implemented */ | ||
2040 | if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) | ||
2041 | return X86EMUL_UNHANDLEABLE; | ||
2029 | rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); | 2042 | rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); |
2030 | return rc; | 2043 | return rc; |
2031 | } | 2044 | } |
@@ -2044,8 +2057,10 @@ static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt) | |||
2044 | static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) | 2057 | static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) |
2045 | { | 2058 | { |
2046 | /* Save real source value, then compare EAX against destination. */ | 2059 | /* Save real source value, then compare EAX against destination. */ |
2060 | ctxt->dst.orig_val = ctxt->dst.val; | ||
2061 | ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX); | ||
2047 | ctxt->src.orig_val = ctxt->src.val; | 2062 | ctxt->src.orig_val = ctxt->src.val; |
2048 | ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); | 2063 | ctxt->src.val = ctxt->dst.orig_val; |
2049 | fastop(ctxt, em_cmp); | 2064 | fastop(ctxt, em_cmp); |
2050 | 2065 | ||
2051 | if (ctxt->eflags & EFLG_ZF) { | 2066 | if (ctxt->eflags & EFLG_ZF) { |
@@ -2055,6 +2070,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) | |||
2055 | /* Failure: write the value we saw to EAX. */ | 2070 | /* Failure: write the value we saw to EAX. */ |
2056 | ctxt->dst.type = OP_REG; | 2071 | ctxt->dst.type = OP_REG; |
2057 | ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); | 2072 | ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); |
2073 | ctxt->dst.val = ctxt->dst.orig_val; | ||
2058 | } | 2074 | } |
2059 | return X86EMUL_CONTINUE; | 2075 | return X86EMUL_CONTINUE; |
2060 | } | 2076 | } |
@@ -2194,7 +2210,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) | |||
2194 | *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip; | 2210 | *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip; |
2195 | if (efer & EFER_LMA) { | 2211 | if (efer & EFER_LMA) { |
2196 | #ifdef CONFIG_X86_64 | 2212 | #ifdef CONFIG_X86_64 |
2197 | *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags & ~EFLG_RF; | 2213 | *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags; |
2198 | 2214 | ||
2199 | ops->get_msr(ctxt, | 2215 | ops->get_msr(ctxt, |
2200 | ctxt->mode == X86EMUL_MODE_PROT64 ? | 2216 | ctxt->mode == X86EMUL_MODE_PROT64 ? |
@@ -2202,14 +2218,14 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) | |||
2202 | ctxt->_eip = msr_data; | 2218 | ctxt->_eip = msr_data; |
2203 | 2219 | ||
2204 | ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); | 2220 | ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); |
2205 | ctxt->eflags &= ~(msr_data | EFLG_RF); | 2221 | ctxt->eflags &= ~msr_data; |
2206 | #endif | 2222 | #endif |
2207 | } else { | 2223 | } else { |
2208 | /* legacy mode */ | 2224 | /* legacy mode */ |
2209 | ops->get_msr(ctxt, MSR_STAR, &msr_data); | 2225 | ops->get_msr(ctxt, MSR_STAR, &msr_data); |
2210 | ctxt->_eip = (u32)msr_data; | 2226 | ctxt->_eip = (u32)msr_data; |
2211 | 2227 | ||
2212 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 2228 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF); |
2213 | } | 2229 | } |
2214 | 2230 | ||
2215 | return X86EMUL_CONTINUE; | 2231 | return X86EMUL_CONTINUE; |
@@ -2258,7 +2274,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) | |||
2258 | break; | 2274 | break; |
2259 | } | 2275 | } |
2260 | 2276 | ||
2261 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); | 2277 | ctxt->eflags &= ~(EFLG_VM | EFLG_IF); |
2262 | cs_sel = (u16)msr_data; | 2278 | cs_sel = (u16)msr_data; |
2263 | cs_sel &= ~SELECTOR_RPL_MASK; | 2279 | cs_sel &= ~SELECTOR_RPL_MASK; |
2264 | ss_sel = cs_sel + 8; | 2280 | ss_sel = cs_sel + 8; |
@@ -2964,7 +2980,7 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt) | |||
2964 | 2980 | ||
2965 | static int em_mov(struct x86_emulate_ctxt *ctxt) | 2981 | static int em_mov(struct x86_emulate_ctxt *ctxt) |
2966 | { | 2982 | { |
2967 | memcpy(ctxt->dst.valptr, ctxt->src.valptr, ctxt->op_bytes); | 2983 | memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr)); |
2968 | return X86EMUL_CONTINUE; | 2984 | return X86EMUL_CONTINUE; |
2969 | } | 2985 | } |
2970 | 2986 | ||
@@ -3221,7 +3237,8 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt) | |||
3221 | 3237 | ||
3222 | static int em_smsw(struct x86_emulate_ctxt *ctxt) | 3238 | static int em_smsw(struct x86_emulate_ctxt *ctxt) |
3223 | { | 3239 | { |
3224 | ctxt->dst.bytes = 2; | 3240 | if (ctxt->dst.type == OP_MEM) |
3241 | ctxt->dst.bytes = 2; | ||
3225 | ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0); | 3242 | ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0); |
3226 | return X86EMUL_CONTINUE; | 3243 | return X86EMUL_CONTINUE; |
3227 | } | 3244 | } |
@@ -3496,7 +3513,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) | |||
3496 | u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); | 3513 | u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); |
3497 | 3514 | ||
3498 | if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || | 3515 | if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || |
3499 | (rcx > 3)) | 3516 | ctxt->ops->check_pmc(ctxt, rcx)) |
3500 | return emulate_gp(ctxt, 0); | 3517 | return emulate_gp(ctxt, 0); |
3501 | 3518 | ||
3502 | return X86EMUL_CONTINUE; | 3519 | return X86EMUL_CONTINUE; |
@@ -3521,9 +3538,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
3521 | } | 3538 | } |
3522 | 3539 | ||
3523 | #define D(_y) { .flags = (_y) } | 3540 | #define D(_y) { .flags = (_y) } |
3524 | #define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } | 3541 | #define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i } |
3525 | #define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ | 3542 | #define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \ |
3526 | .check_perm = (_p) } | 3543 | .intercept = x86_intercept_##_i, .check_perm = (_p) } |
3527 | #define N D(NotImpl) | 3544 | #define N D(NotImpl) |
3528 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } | 3545 | #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } |
3529 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } | 3546 | #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } |
@@ -3532,10 +3549,10 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) | |||
3532 | #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } | 3549 | #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } |
3533 | #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } | 3550 | #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } |
3534 | #define II(_f, _e, _i) \ | 3551 | #define II(_f, _e, _i) \ |
3535 | { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } | 3552 | { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i } |
3536 | #define IIP(_f, _e, _i, _p) \ | 3553 | #define IIP(_f, _e, _i, _p) \ |
3537 | { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \ | 3554 | { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \ |
3538 | .check_perm = (_p) } | 3555 | .intercept = x86_intercept_##_i, .check_perm = (_p) } |
3539 | #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } | 3556 | #define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } |
3540 | 3557 | ||
3541 | #define D2bv(_f) D((_f) | ByteOp), D(_f) | 3558 | #define D2bv(_f) D((_f) | ByteOp), D(_f) |
@@ -3634,8 +3651,8 @@ static const struct opcode group6[] = { | |||
3634 | }; | 3651 | }; |
3635 | 3652 | ||
3636 | static const struct group_dual group7 = { { | 3653 | static const struct group_dual group7 = { { |
3637 | II(Mov | DstMem | Priv, em_sgdt, sgdt), | 3654 | II(Mov | DstMem, em_sgdt, sgdt), |
3638 | II(Mov | DstMem | Priv, em_sidt, sidt), | 3655 | II(Mov | DstMem, em_sidt, sidt), |
3639 | II(SrcMem | Priv, em_lgdt, lgdt), | 3656 | II(SrcMem | Priv, em_lgdt, lgdt), |
3640 | II(SrcMem | Priv, em_lidt, lidt), | 3657 | II(SrcMem | Priv, em_lidt, lidt), |
3641 | II(SrcNone | DstMem | Mov, em_smsw, smsw), N, | 3658 | II(SrcNone | DstMem | Mov, em_smsw, smsw), N, |
@@ -3899,7 +3916,7 @@ static const struct opcode twobyte_table[256] = { | |||
3899 | N, N, | 3916 | N, N, |
3900 | N, N, N, N, N, N, N, N, | 3917 | N, N, N, N, N, N, N, N, |
3901 | /* 0x40 - 0x4F */ | 3918 | /* 0x40 - 0x4F */ |
3902 | X16(D(DstReg | SrcMem | ModRM | Mov)), | 3919 | X16(D(DstReg | SrcMem | ModRM)), |
3903 | /* 0x50 - 0x5F */ | 3920 | /* 0x50 - 0x5F */ |
3904 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, | 3921 | N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, |
3905 | /* 0x60 - 0x6F */ | 3922 | /* 0x60 - 0x6F */ |
@@ -4061,12 +4078,12 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
4061 | mem_common: | 4078 | mem_common: |
4062 | *op = ctxt->memop; | 4079 | *op = ctxt->memop; |
4063 | ctxt->memopp = op; | 4080 | ctxt->memopp = op; |
4064 | if ((ctxt->d & BitOp) && op == &ctxt->dst) | 4081 | if (ctxt->d & BitOp) |
4065 | fetch_bit_operand(ctxt); | 4082 | fetch_bit_operand(ctxt); |
4066 | op->orig_val = op->val; | 4083 | op->orig_val = op->val; |
4067 | break; | 4084 | break; |
4068 | case OpMem64: | 4085 | case OpMem64: |
4069 | ctxt->memop.bytes = 8; | 4086 | ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8; |
4070 | goto mem_common; | 4087 | goto mem_common; |
4071 | case OpAcc: | 4088 | case OpAcc: |
4072 | op->type = OP_REG; | 4089 | op->type = OP_REG; |
@@ -4150,7 +4167,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
4150 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; | 4167 | op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; |
4151 | op->addr.mem.ea = | 4168 | op->addr.mem.ea = |
4152 | register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); | 4169 | register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); |
4153 | op->addr.mem.seg = seg_override(ctxt); | 4170 | op->addr.mem.seg = ctxt->seg_override; |
4154 | op->val = 0; | 4171 | op->val = 0; |
4155 | op->count = 1; | 4172 | op->count = 1; |
4156 | break; | 4173 | break; |
@@ -4161,7 +4178,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, | |||
4161 | register_address(ctxt, | 4178 | register_address(ctxt, |
4162 | reg_read(ctxt, VCPU_REGS_RBX) + | 4179 | reg_read(ctxt, VCPU_REGS_RBX) + |
4163 | (reg_read(ctxt, VCPU_REGS_RAX) & 0xff)); | 4180 | (reg_read(ctxt, VCPU_REGS_RAX) & 0xff)); |
4164 | op->addr.mem.seg = seg_override(ctxt); | 4181 | op->addr.mem.seg = ctxt->seg_override; |
4165 | op->val = 0; | 4182 | op->val = 0; |
4166 | break; | 4183 | break; |
4167 | case OpImmFAddr: | 4184 | case OpImmFAddr: |
@@ -4208,16 +4225,22 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
4208 | int mode = ctxt->mode; | 4225 | int mode = ctxt->mode; |
4209 | int def_op_bytes, def_ad_bytes, goffset, simd_prefix; | 4226 | int def_op_bytes, def_ad_bytes, goffset, simd_prefix; |
4210 | bool op_prefix = false; | 4227 | bool op_prefix = false; |
4228 | bool has_seg_override = false; | ||
4211 | struct opcode opcode; | 4229 | struct opcode opcode; |
4212 | 4230 | ||
4213 | ctxt->memop.type = OP_NONE; | 4231 | ctxt->memop.type = OP_NONE; |
4214 | ctxt->memopp = NULL; | 4232 | ctxt->memopp = NULL; |
4215 | ctxt->_eip = ctxt->eip; | 4233 | ctxt->_eip = ctxt->eip; |
4216 | ctxt->fetch.start = ctxt->_eip; | 4234 | ctxt->fetch.ptr = ctxt->fetch.data; |
4217 | ctxt->fetch.end = ctxt->fetch.start + insn_len; | 4235 | ctxt->fetch.end = ctxt->fetch.data + insn_len; |
4218 | ctxt->opcode_len = 1; | 4236 | ctxt->opcode_len = 1; |
4219 | if (insn_len > 0) | 4237 | if (insn_len > 0) |
4220 | memcpy(ctxt->fetch.data, insn, insn_len); | 4238 | memcpy(ctxt->fetch.data, insn, insn_len); |
4239 | else { | ||
4240 | rc = __do_insn_fetch_bytes(ctxt, 1); | ||
4241 | if (rc != X86EMUL_CONTINUE) | ||
4242 | return rc; | ||
4243 | } | ||
4221 | 4244 | ||
4222 | switch (mode) { | 4245 | switch (mode) { |
4223 | case X86EMUL_MODE_REAL: | 4246 | case X86EMUL_MODE_REAL: |
@@ -4261,11 +4284,13 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) | |||
4261 | case 0x2e: /* CS override */ | 4284 | case 0x2e: /* CS override */ |
4262 | case 0x36: /* SS override */ | 4285 | case 0x36: /* SS override */ |
4263 | case 0x3e: /* DS override */ | 4286 | case 0x3e: /* DS override */ |
4264 | set_seg_override(ctxt, (ctxt->b >> 3) & 3); | 4287 | has_seg_override = true; |
4288 | ctxt->seg_override = (ctxt->b >> 3) & 3; | ||
4265 | break; | 4289 | break; |
4266 | case 0x64: /* FS override */ | 4290 | case 0x64: /* FS override */ |
4267 | case 0x65: /* GS override */ | 4291 | case 0x65: /* GS override */ |
4268 | set_seg_override(ctxt, ctxt->b & 7); | 4292 | has_seg_override = true; |
4293 | ctxt->seg_override = ctxt->b & 7; | ||
4269 | break; | 4294 | break; |
4270 | case 0x40 ... 0x4f: /* REX */ | 4295 | case 0x40 ... 0x4f: /* REX */ |
4271 | if (mode != X86EMUL_MODE_PROT64) | 4296 | if (mode != X86EMUL_MODE_PROT64) |
@@ -4314,6 +4339,13 @@ done_prefixes: | |||
4314 | if (ctxt->d & ModRM) | 4339 | if (ctxt->d & ModRM) |
4315 | ctxt->modrm = insn_fetch(u8, ctxt); | 4340 | ctxt->modrm = insn_fetch(u8, ctxt); |
4316 | 4341 | ||
4342 | /* vex-prefix instructions are not implemented */ | ||
4343 | if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) && | ||
4344 | (mode == X86EMUL_MODE_PROT64 || | ||
4345 | (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) { | ||
4346 | ctxt->d = NotImpl; | ||
4347 | } | ||
4348 | |||
4317 | while (ctxt->d & GroupMask) { | 4349 | while (ctxt->d & GroupMask) { |
4318 | switch (ctxt->d & GroupMask) { | 4350 | switch (ctxt->d & GroupMask) { |
4319 | case Group: | 4351 | case Group: |
@@ -4356,49 +4388,59 @@ done_prefixes: | |||
4356 | ctxt->d |= opcode.flags; | 4388 | ctxt->d |= opcode.flags; |
4357 | } | 4389 | } |
4358 | 4390 | ||
4359 | ctxt->execute = opcode.u.execute; | ||
4360 | ctxt->check_perm = opcode.check_perm; | ||
4361 | ctxt->intercept = opcode.intercept; | ||
4362 | |||
4363 | /* Unrecognised? */ | 4391 | /* Unrecognised? */ |
4364 | if (ctxt->d == 0 || (ctxt->d & NotImpl)) | 4392 | if (ctxt->d == 0) |
4365 | return EMULATION_FAILED; | 4393 | return EMULATION_FAILED; |
4366 | 4394 | ||
4367 | if (!(ctxt->d & EmulateOnUD) && ctxt->ud) | 4395 | ctxt->execute = opcode.u.execute; |
4368 | return EMULATION_FAILED; | ||
4369 | 4396 | ||
4370 | if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) | 4397 | if (unlikely(ctxt->d & |
4371 | ctxt->op_bytes = 8; | 4398 | (NotImpl|EmulateOnUD|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) { |
4399 | /* | ||
4400 | * These are copied unconditionally here, and checked unconditionally | ||
4401 | * in x86_emulate_insn. | ||
4402 | */ | ||
4403 | ctxt->check_perm = opcode.check_perm; | ||
4404 | ctxt->intercept = opcode.intercept; | ||
4405 | |||
4406 | if (ctxt->d & NotImpl) | ||
4407 | return EMULATION_FAILED; | ||
4408 | |||
4409 | if (!(ctxt->d & EmulateOnUD) && ctxt->ud) | ||
4410 | return EMULATION_FAILED; | ||
4372 | 4411 | ||
4373 | if (ctxt->d & Op3264) { | 4412 | if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) |
4374 | if (mode == X86EMUL_MODE_PROT64) | ||
4375 | ctxt->op_bytes = 8; | 4413 | ctxt->op_bytes = 8; |
4376 | else | ||
4377 | ctxt->op_bytes = 4; | ||
4378 | } | ||
4379 | 4414 | ||
4380 | if (ctxt->d & Sse) | 4415 | if (ctxt->d & Op3264) { |
4381 | ctxt->op_bytes = 16; | 4416 | if (mode == X86EMUL_MODE_PROT64) |
4382 | else if (ctxt->d & Mmx) | 4417 | ctxt->op_bytes = 8; |
4383 | ctxt->op_bytes = 8; | 4418 | else |
4419 | ctxt->op_bytes = 4; | ||
4420 | } | ||
4421 | |||
4422 | if (ctxt->d & Sse) | ||
4423 | ctxt->op_bytes = 16; | ||
4424 | else if (ctxt->d & Mmx) | ||
4425 | ctxt->op_bytes = 8; | ||
4426 | } | ||
4384 | 4427 | ||
4385 | /* ModRM and SIB bytes. */ | 4428 | /* ModRM and SIB bytes. */ |
4386 | if (ctxt->d & ModRM) { | 4429 | if (ctxt->d & ModRM) { |
4387 | rc = decode_modrm(ctxt, &ctxt->memop); | 4430 | rc = decode_modrm(ctxt, &ctxt->memop); |
4388 | if (!ctxt->has_seg_override) | 4431 | if (!has_seg_override) { |
4389 | set_seg_override(ctxt, ctxt->modrm_seg); | 4432 | has_seg_override = true; |
4433 | ctxt->seg_override = ctxt->modrm_seg; | ||
4434 | } | ||
4390 | } else if (ctxt->d & MemAbs) | 4435 | } else if (ctxt->d & MemAbs) |
4391 | rc = decode_abs(ctxt, &ctxt->memop); | 4436 | rc = decode_abs(ctxt, &ctxt->memop); |
4392 | if (rc != X86EMUL_CONTINUE) | 4437 | if (rc != X86EMUL_CONTINUE) |
4393 | goto done; | 4438 | goto done; |
4394 | 4439 | ||
4395 | if (!ctxt->has_seg_override) | 4440 | if (!has_seg_override) |
4396 | set_seg_override(ctxt, VCPU_SREG_DS); | 4441 | ctxt->seg_override = VCPU_SREG_DS; |
4397 | |||
4398 | ctxt->memop.addr.mem.seg = seg_override(ctxt); | ||
4399 | 4442 | ||
4400 | if (ctxt->memop.type == OP_MEM && ctxt->ad_bytes != 8) | 4443 | ctxt->memop.addr.mem.seg = ctxt->seg_override; |
4401 | ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea; | ||
4402 | 4444 | ||
4403 | /* | 4445 | /* |
4404 | * Decode and fetch the source operand: register, memory | 4446 | * Decode and fetch the source operand: register, memory |
@@ -4420,7 +4462,7 @@ done_prefixes: | |||
4420 | rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); | 4462 | rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); |
4421 | 4463 | ||
4422 | done: | 4464 | done: |
4423 | if (ctxt->memopp && ctxt->memopp->type == OP_MEM && ctxt->rip_relative) | 4465 | if (ctxt->rip_relative) |
4424 | ctxt->memopp->addr.mem.ea += ctxt->_eip; | 4466 | ctxt->memopp->addr.mem.ea += ctxt->_eip; |
4425 | 4467 | ||
4426 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; | 4468 | return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; |
@@ -4495,6 +4537,16 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) | |||
4495 | return X86EMUL_CONTINUE; | 4537 | return X86EMUL_CONTINUE; |
4496 | } | 4538 | } |
4497 | 4539 | ||
4540 | void init_decode_cache(struct x86_emulate_ctxt *ctxt) | ||
4541 | { | ||
4542 | memset(&ctxt->rip_relative, 0, | ||
4543 | (void *)&ctxt->modrm - (void *)&ctxt->rip_relative); | ||
4544 | |||
4545 | ctxt->io_read.pos = 0; | ||
4546 | ctxt->io_read.end = 0; | ||
4547 | ctxt->mem_read.end = 0; | ||
4548 | } | ||
4549 | |||
4498 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | 4550 | int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) |
4499 | { | 4551 | { |
4500 | const struct x86_emulate_ops *ops = ctxt->ops; | 4552 | const struct x86_emulate_ops *ops = ctxt->ops; |
@@ -4503,12 +4555,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
4503 | 4555 | ||
4504 | ctxt->mem_read.pos = 0; | 4556 | ctxt->mem_read.pos = 0; |
4505 | 4557 | ||
4506 | if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || | ||
4507 | (ctxt->d & Undefined)) { | ||
4508 | rc = emulate_ud(ctxt); | ||
4509 | goto done; | ||
4510 | } | ||
4511 | |||
4512 | /* LOCK prefix is allowed only with some instructions */ | 4558 | /* LOCK prefix is allowed only with some instructions */ |
4513 | if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) { | 4559 | if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) { |
4514 | rc = emulate_ud(ctxt); | 4560 | rc = emulate_ud(ctxt); |
@@ -4520,69 +4566,82 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
4520 | goto done; | 4566 | goto done; |
4521 | } | 4567 | } |
4522 | 4568 | ||
4523 | if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) | 4569 | if (unlikely(ctxt->d & |
4524 | || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { | 4570 | (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) { |
4525 | rc = emulate_ud(ctxt); | 4571 | if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || |
4526 | goto done; | 4572 | (ctxt->d & Undefined)) { |
4527 | } | 4573 | rc = emulate_ud(ctxt); |
4528 | 4574 | goto done; | |
4529 | if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { | 4575 | } |
4530 | rc = emulate_nm(ctxt); | ||
4531 | goto done; | ||
4532 | } | ||
4533 | 4576 | ||
4534 | if (ctxt->d & Mmx) { | 4577 | if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) |
4535 | rc = flush_pending_x87_faults(ctxt); | 4578 | || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { |
4536 | if (rc != X86EMUL_CONTINUE) | 4579 | rc = emulate_ud(ctxt); |
4537 | goto done; | 4580 | goto done; |
4538 | /* | 4581 | } |
4539 | * Now that we know the fpu is exception safe, we can fetch | ||
4540 | * operands from it. | ||
4541 | */ | ||
4542 | fetch_possible_mmx_operand(ctxt, &ctxt->src); | ||
4543 | fetch_possible_mmx_operand(ctxt, &ctxt->src2); | ||
4544 | if (!(ctxt->d & Mov)) | ||
4545 | fetch_possible_mmx_operand(ctxt, &ctxt->dst); | ||
4546 | } | ||
4547 | 4582 | ||
4548 | if (unlikely(ctxt->guest_mode) && ctxt->intercept) { | 4583 | if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { |
4549 | rc = emulator_check_intercept(ctxt, ctxt->intercept, | 4584 | rc = emulate_nm(ctxt); |
4550 | X86_ICPT_PRE_EXCEPT); | ||
4551 | if (rc != X86EMUL_CONTINUE) | ||
4552 | goto done; | 4585 | goto done; |
4553 | } | 4586 | } |
4554 | 4587 | ||
4555 | /* Privileged instruction can be executed only in CPL=0 */ | 4588 | if (ctxt->d & Mmx) { |
4556 | if ((ctxt->d & Priv) && ops->cpl(ctxt)) { | 4589 | rc = flush_pending_x87_faults(ctxt); |
4557 | rc = emulate_gp(ctxt, 0); | 4590 | if (rc != X86EMUL_CONTINUE) |
4558 | goto done; | 4591 | goto done; |
4559 | } | 4592 | /* |
4593 | * Now that we know the fpu is exception safe, we can fetch | ||
4594 | * operands from it. | ||
4595 | */ | ||
4596 | fetch_possible_mmx_operand(ctxt, &ctxt->src); | ||
4597 | fetch_possible_mmx_operand(ctxt, &ctxt->src2); | ||
4598 | if (!(ctxt->d & Mov)) | ||
4599 | fetch_possible_mmx_operand(ctxt, &ctxt->dst); | ||
4600 | } | ||
4560 | 4601 | ||
4561 | /* Instruction can only be executed in protected mode */ | 4602 | if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) { |
4562 | if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { | 4603 | rc = emulator_check_intercept(ctxt, ctxt->intercept, |
4563 | rc = emulate_ud(ctxt); | 4604 | X86_ICPT_PRE_EXCEPT); |
4564 | goto done; | 4605 | if (rc != X86EMUL_CONTINUE) |
4565 | } | 4606 | goto done; |
4607 | } | ||
4566 | 4608 | ||
4567 | /* Do instruction specific permission checks */ | 4609 | /* Privileged instruction can be executed only in CPL=0 */ |
4568 | if (ctxt->check_perm) { | 4610 | if ((ctxt->d & Priv) && ops->cpl(ctxt)) { |
4569 | rc = ctxt->check_perm(ctxt); | 4611 | if (ctxt->d & PrivUD) |
4570 | if (rc != X86EMUL_CONTINUE) | 4612 | rc = emulate_ud(ctxt); |
4613 | else | ||
4614 | rc = emulate_gp(ctxt, 0); | ||
4571 | goto done; | 4615 | goto done; |
4572 | } | 4616 | } |
4573 | 4617 | ||
4574 | if (unlikely(ctxt->guest_mode) && ctxt->intercept) { | 4618 | /* Instruction can only be executed in protected mode */ |
4575 | rc = emulator_check_intercept(ctxt, ctxt->intercept, | 4619 | if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { |
4576 | X86_ICPT_POST_EXCEPT); | 4620 | rc = emulate_ud(ctxt); |
4577 | if (rc != X86EMUL_CONTINUE) | ||
4578 | goto done; | 4621 | goto done; |
4579 | } | 4622 | } |
4580 | 4623 | ||
4581 | if (ctxt->rep_prefix && (ctxt->d & String)) { | 4624 | /* Do instruction specific permission checks */ |
4582 | /* All REP prefixes have the same first termination condition */ | 4625 | if (ctxt->d & CheckPerm) { |
4583 | if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { | 4626 | rc = ctxt->check_perm(ctxt); |
4584 | ctxt->eip = ctxt->_eip; | 4627 | if (rc != X86EMUL_CONTINUE) |
4585 | goto done; | 4628 | goto done; |
4629 | } | ||
4630 | |||
4631 | if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) { | ||
4632 | rc = emulator_check_intercept(ctxt, ctxt->intercept, | ||
4633 | X86_ICPT_POST_EXCEPT); | ||
4634 | if (rc != X86EMUL_CONTINUE) | ||
4635 | goto done; | ||
4636 | } | ||
4637 | |||
4638 | if (ctxt->rep_prefix && (ctxt->d & String)) { | ||
4639 | /* All REP prefixes have the same first termination condition */ | ||
4640 | if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { | ||
4641 | ctxt->eip = ctxt->_eip; | ||
4642 | ctxt->eflags &= ~EFLG_RF; | ||
4643 | goto done; | ||
4644 | } | ||
4586 | } | 4645 | } |
4587 | } | 4646 | } |
4588 | 4647 | ||
@@ -4616,13 +4675,18 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) | |||
4616 | 4675 | ||
4617 | special_insn: | 4676 | special_insn: |
4618 | 4677 | ||
4619 | if (unlikely(ctxt->guest_mode) && ctxt->intercept) { | 4678 | if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) { |
4620 | rc = emulator_check_intercept(ctxt, ctxt->intercept, | 4679 | rc = emulator_check_intercept(ctxt, ctxt->intercept, |
4621 | X86_ICPT_POST_MEMACCESS); | 4680 | X86_ICPT_POST_MEMACCESS); |
4622 | if (rc != X86EMUL_CONTINUE) | 4681 | if (rc != X86EMUL_CONTINUE) |
4623 | goto done; | 4682 | goto done; |
4624 | } | 4683 | } |
4625 | 4684 | ||
4685 | if (ctxt->rep_prefix && (ctxt->d & String)) | ||
4686 | ctxt->eflags |= EFLG_RF; | ||
4687 | else | ||
4688 | ctxt->eflags &= ~EFLG_RF; | ||
4689 | |||
4626 | if (ctxt->execute) { | 4690 | if (ctxt->execute) { |
4627 | if (ctxt->d & Fastop) { | 4691 | if (ctxt->d & Fastop) { |
4628 | void (*fop)(struct fastop *) = (void *)ctxt->execute; | 4692 | void (*fop)(struct fastop *) = (void *)ctxt->execute; |
@@ -4657,8 +4721,9 @@ special_insn: | |||
4657 | break; | 4721 | break; |
4658 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ | 4722 | case 0x90 ... 0x97: /* nop / xchg reg, rax */ |
4659 | if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX)) | 4723 | if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX)) |
4660 | break; | 4724 | ctxt->dst.type = OP_NONE; |
4661 | rc = em_xchg(ctxt); | 4725 | else |
4726 | rc = em_xchg(ctxt); | ||
4662 | break; | 4727 | break; |
4663 | case 0x98: /* cbw/cwde/cdqe */ | 4728 | case 0x98: /* cbw/cwde/cdqe */ |
4664 | switch (ctxt->op_bytes) { | 4729 | switch (ctxt->op_bytes) { |
@@ -4709,17 +4774,17 @@ special_insn: | |||
4709 | goto done; | 4774 | goto done; |
4710 | 4775 | ||
4711 | writeback: | 4776 | writeback: |
4712 | if (!(ctxt->d & NoWrite)) { | ||
4713 | rc = writeback(ctxt, &ctxt->dst); | ||
4714 | if (rc != X86EMUL_CONTINUE) | ||
4715 | goto done; | ||
4716 | } | ||
4717 | if (ctxt->d & SrcWrite) { | 4777 | if (ctxt->d & SrcWrite) { |
4718 | BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR); | 4778 | BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR); |
4719 | rc = writeback(ctxt, &ctxt->src); | 4779 | rc = writeback(ctxt, &ctxt->src); |
4720 | if (rc != X86EMUL_CONTINUE) | 4780 | if (rc != X86EMUL_CONTINUE) |
4721 | goto done; | 4781 | goto done; |
4722 | } | 4782 | } |
4783 | if (!(ctxt->d & NoWrite)) { | ||
4784 | rc = writeback(ctxt, &ctxt->dst); | ||
4785 | if (rc != X86EMUL_CONTINUE) | ||
4786 | goto done; | ||
4787 | } | ||
4723 | 4788 | ||
4724 | /* | 4789 | /* |
4725 | * restore dst type in case the decoding will be reused | 4790 | * restore dst type in case the decoding will be reused |
@@ -4761,6 +4826,7 @@ writeback: | |||
4761 | } | 4826 | } |
4762 | goto done; /* skip rip writeback */ | 4827 | goto done; /* skip rip writeback */ |
4763 | } | 4828 | } |
4829 | ctxt->eflags &= ~EFLG_RF; | ||
4764 | } | 4830 | } |
4765 | 4831 | ||
4766 | ctxt->eip = ctxt->_eip; | 4832 | ctxt->eip = ctxt->_eip; |
@@ -4793,8 +4859,10 @@ twobyte_insn: | |||
4793 | ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); | 4859 | ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); |
4794 | break; | 4860 | break; |
4795 | case 0x40 ... 0x4f: /* cmov */ | 4861 | case 0x40 ... 0x4f: /* cmov */ |
4796 | ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val; | 4862 | if (test_cc(ctxt->b, ctxt->eflags)) |
4797 | if (!test_cc(ctxt->b, ctxt->eflags)) | 4863 | ctxt->dst.val = ctxt->src.val; |
4864 | else if (ctxt->mode != X86EMUL_MODE_PROT64 || | ||
4865 | ctxt->op_bytes != 4) | ||
4798 | ctxt->dst.type = OP_NONE; /* no writeback */ | 4866 | ctxt->dst.type = OP_NONE; /* no writeback */ |
4799 | break; | 4867 | break; |
4800 | case 0x80 ... 0x8f: /* jnz rel, etc*/ | 4868 | case 0x80 ... 0x8f: /* jnz rel, etc*/ |
@@ -4818,8 +4886,8 @@ twobyte_insn: | |||
4818 | break; | 4886 | break; |
4819 | case 0xc3: /* movnti */ | 4887 | case 0xc3: /* movnti */ |
4820 | ctxt->dst.bytes = ctxt->op_bytes; | 4888 | ctxt->dst.bytes = ctxt->op_bytes; |
4821 | ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : | 4889 | ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val : |
4822 | (u64) ctxt->src.val; | 4890 | (u32) ctxt->src.val; |
4823 | break; | 4891 | break; |
4824 | default: | 4892 | default: |
4825 | goto cannot_emulate; | 4893 | goto cannot_emulate; |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 006911858174..3855103f71fd 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -1451,7 +1451,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
1451 | vcpu->arch.apic_arb_prio = 0; | 1451 | vcpu->arch.apic_arb_prio = 0; |
1452 | vcpu->arch.apic_attention = 0; | 1452 | vcpu->arch.apic_attention = 0; |
1453 | 1453 | ||
1454 | apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" | 1454 | apic_debug("%s: vcpu=%p, id=%d, base_msr=" |
1455 | "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, | 1455 | "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, |
1456 | vcpu, kvm_apic_id(apic), | 1456 | vcpu, kvm_apic_id(apic), |
1457 | vcpu->arch.apic_base, apic->base_address); | 1457 | vcpu->arch.apic_base, apic->base_address); |
@@ -1895,7 +1895,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) | |||
1895 | /* evaluate pending_events before reading the vector */ | 1895 | /* evaluate pending_events before reading the vector */ |
1896 | smp_rmb(); | 1896 | smp_rmb(); |
1897 | sipi_vector = apic->sipi_vector; | 1897 | sipi_vector = apic->sipi_vector; |
1898 | pr_debug("vcpu %d received sipi with vector # %x\n", | 1898 | apic_debug("vcpu %d received sipi with vector # %x\n", |
1899 | vcpu->vcpu_id, sipi_vector); | 1899 | vcpu->vcpu_id, sipi_vector); |
1900 | kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); | 1900 | kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); |
1901 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 1901 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 2e5652b62fd6..5aaf35641768 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h | |||
@@ -22,7 +22,7 @@ | |||
22 | __entry->unsync = sp->unsync; | 22 | __entry->unsync = sp->unsync; |
23 | 23 | ||
24 | #define KVM_MMU_PAGE_PRINTK() ({ \ | 24 | #define KVM_MMU_PAGE_PRINTK() ({ \ |
25 | const char *ret = trace_seq_buffer_ptr(p); \ | 25 | const u32 saved_len = p->len; \ |
26 | static const char *access_str[] = { \ | 26 | static const char *access_str[] = { \ |
27 | "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \ | 27 | "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \ |
28 | }; \ | 28 | }; \ |
@@ -41,7 +41,7 @@ | |||
41 | role.nxe ? "" : "!", \ | 41 | role.nxe ? "" : "!", \ |
42 | __entry->root_count, \ | 42 | __entry->root_count, \ |
43 | __entry->unsync ? "unsync" : "sync", 0); \ | 43 | __entry->unsync ? "unsync" : "sync", 0); \ |
44 | ret; \ | 44 | p->buffer + saved_len; \ |
45 | }) | 45 | }) |
46 | 46 | ||
47 | #define kvm_mmu_trace_pferr_flags \ | 47 | #define kvm_mmu_trace_pferr_flags \ |
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index cbecaa90399c..3dd6accb64ec 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c | |||
@@ -428,6 +428,15 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
428 | return 1; | 428 | return 1; |
429 | } | 429 | } |
430 | 430 | ||
431 | int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc) | ||
432 | { | ||
433 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | ||
434 | bool fixed = pmc & (1u << 30); | ||
435 | pmc &= ~(3u << 30); | ||
436 | return (!fixed && pmc >= pmu->nr_arch_gp_counters) || | ||
437 | (fixed && pmc >= pmu->nr_arch_fixed_counters); | ||
438 | } | ||
439 | |||
431 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) | 440 | int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) |
432 | { | 441 | { |
433 | struct kvm_pmu *pmu = &vcpu->arch.pmu; | 442 | struct kvm_pmu *pmu = &vcpu->arch.pmu; |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b5e994ad0135..ddf742768ecf 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -486,14 +486,14 @@ static int is_external_interrupt(u32 info) | |||
486 | return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); | 486 | return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); |
487 | } | 487 | } |
488 | 488 | ||
489 | static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | 489 | static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) |
490 | { | 490 | { |
491 | struct vcpu_svm *svm = to_svm(vcpu); | 491 | struct vcpu_svm *svm = to_svm(vcpu); |
492 | u32 ret = 0; | 492 | u32 ret = 0; |
493 | 493 | ||
494 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) | 494 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) |
495 | ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; | 495 | ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; |
496 | return ret & mask; | 496 | return ret; |
497 | } | 497 | } |
498 | 498 | ||
499 | static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | 499 | static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) |
@@ -1415,7 +1415,16 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
1415 | var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1; | 1415 | var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1; |
1416 | var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; | 1416 | var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; |
1417 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; | 1417 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; |
1418 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; | 1418 | |
1419 | /* | ||
1420 | * AMD CPUs circa 2014 track the G bit for all segments except CS. | ||
1421 | * However, the SVM spec states that the G bit is not observed by the | ||
1422 | * CPU, and some VMware virtual CPUs drop the G bit for all segments. | ||
1423 | * So let's synthesize a legal G bit for all segments, this helps | ||
1424 | * running KVM nested. It also helps cross-vendor migration, because | ||
1425 | * Intel's vmentry has a check on the 'G' bit. | ||
1426 | */ | ||
1427 | var->g = s->limit > 0xfffff; | ||
1419 | 1428 | ||
1420 | /* | 1429 | /* |
1421 | * AMD's VMCB does not have an explicit unusable field, so emulate it | 1430 | * AMD's VMCB does not have an explicit unusable field, so emulate it |
@@ -1424,14 +1433,6 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
1424 | var->unusable = !var->present || (var->type == 0); | 1433 | var->unusable = !var->present || (var->type == 0); |
1425 | 1434 | ||
1426 | switch (seg) { | 1435 | switch (seg) { |
1427 | case VCPU_SREG_CS: | ||
1428 | /* | ||
1429 | * SVM always stores 0 for the 'G' bit in the CS selector in | ||
1430 | * the VMCB on a VMEXIT. This hurts cross-vendor migration: | ||
1431 | * Intel's VMENTRY has a check on the 'G' bit. | ||
1432 | */ | ||
1433 | var->g = s->limit > 0xfffff; | ||
1434 | break; | ||
1435 | case VCPU_SREG_TR: | 1436 | case VCPU_SREG_TR: |
1436 | /* | 1437 | /* |
1437 | * Work around a bug where the busy flag in the tr selector | 1438 | * Work around a bug where the busy flag in the tr selector |
@@ -2116,22 +2117,27 @@ static void nested_svm_unmap(struct page *page) | |||
2116 | 2117 | ||
2117 | static int nested_svm_intercept_ioio(struct vcpu_svm *svm) | 2118 | static int nested_svm_intercept_ioio(struct vcpu_svm *svm) |
2118 | { | 2119 | { |
2119 | unsigned port; | 2120 | unsigned port, size, iopm_len; |
2120 | u8 val, bit; | 2121 | u16 val, mask; |
2122 | u8 start_bit; | ||
2121 | u64 gpa; | 2123 | u64 gpa; |
2122 | 2124 | ||
2123 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) | 2125 | if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) |
2124 | return NESTED_EXIT_HOST; | 2126 | return NESTED_EXIT_HOST; |
2125 | 2127 | ||
2126 | port = svm->vmcb->control.exit_info_1 >> 16; | 2128 | port = svm->vmcb->control.exit_info_1 >> 16; |
2129 | size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >> | ||
2130 | SVM_IOIO_SIZE_SHIFT; | ||
2127 | gpa = svm->nested.vmcb_iopm + (port / 8); | 2131 | gpa = svm->nested.vmcb_iopm + (port / 8); |
2128 | bit = port % 8; | 2132 | start_bit = port % 8; |
2129 | val = 0; | 2133 | iopm_len = (start_bit + size > 8) ? 2 : 1; |
2134 | mask = (0xf >> (4 - size)) << start_bit; | ||
2135 | val = 0; | ||
2130 | 2136 | ||
2131 | if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1)) | 2137 | if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, iopm_len)) |
2132 | val &= (1 << bit); | 2138 | return NESTED_EXIT_DONE; |
2133 | 2139 | ||
2134 | return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; | 2140 | return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; |
2135 | } | 2141 | } |
2136 | 2142 | ||
2137 | static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) | 2143 | static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) |
@@ -4205,7 +4211,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, | |||
4205 | if (info->intercept == x86_intercept_cr_write) | 4211 | if (info->intercept == x86_intercept_cr_write) |
4206 | icpt_info.exit_code += info->modrm_reg; | 4212 | icpt_info.exit_code += info->modrm_reg; |
4207 | 4213 | ||
4208 | if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0) | 4214 | if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 || |
4215 | info->intercept == x86_intercept_clts) | ||
4209 | break; | 4216 | break; |
4210 | 4217 | ||
4211 | intercept = svm->nested.intercept; | 4218 | intercept = svm->nested.intercept; |
@@ -4250,14 +4257,14 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, | |||
4250 | u64 exit_info; | 4257 | u64 exit_info; |
4251 | u32 bytes; | 4258 | u32 bytes; |
4252 | 4259 | ||
4253 | exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16; | ||
4254 | |||
4255 | if (info->intercept == x86_intercept_in || | 4260 | if (info->intercept == x86_intercept_in || |
4256 | info->intercept == x86_intercept_ins) { | 4261 | info->intercept == x86_intercept_ins) { |
4257 | exit_info |= SVM_IOIO_TYPE_MASK; | 4262 | exit_info = ((info->src_val & 0xffff) << 16) | |
4258 | bytes = info->src_bytes; | 4263 | SVM_IOIO_TYPE_MASK; |
4259 | } else { | ||
4260 | bytes = info->dst_bytes; | 4264 | bytes = info->dst_bytes; |
4265 | } else { | ||
4266 | exit_info = (info->dst_val & 0xffff) << 16; | ||
4267 | bytes = info->src_bytes; | ||
4261 | } | 4268 | } |
4262 | 4269 | ||
4263 | if (info->intercept == x86_intercept_outs || | 4270 | if (info->intercept == x86_intercept_outs || |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 33574c95220d..e850a7d332be 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -721,10 +721,10 @@ TRACE_EVENT(kvm_emulate_insn, | |||
721 | ), | 721 | ), |
722 | 722 | ||
723 | TP_fast_assign( | 723 | TP_fast_assign( |
724 | __entry->rip = vcpu->arch.emulate_ctxt.fetch.start; | ||
725 | __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); | 724 | __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); |
726 | __entry->len = vcpu->arch.emulate_ctxt._eip | 725 | __entry->len = vcpu->arch.emulate_ctxt.fetch.ptr |
727 | - vcpu->arch.emulate_ctxt.fetch.start; | 726 | - vcpu->arch.emulate_ctxt.fetch.data; |
727 | __entry->rip = vcpu->arch.emulate_ctxt._eip - __entry->len; | ||
728 | memcpy(__entry->insn, | 728 | memcpy(__entry->insn, |
729 | vcpu->arch.emulate_ctxt.fetch.data, | 729 | vcpu->arch.emulate_ctxt.fetch.data, |
730 | 15); | 730 | 15); |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 801332edefc3..e618f34bde2d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -383,6 +383,9 @@ struct nested_vmx { | |||
383 | 383 | ||
384 | struct hrtimer preemption_timer; | 384 | struct hrtimer preemption_timer; |
385 | bool preemption_timer_expired; | 385 | bool preemption_timer_expired; |
386 | |||
387 | /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ | ||
388 | u64 vmcs01_debugctl; | ||
386 | }; | 389 | }; |
387 | 390 | ||
388 | #define POSTED_INTR_ON 0 | 391 | #define POSTED_INTR_ON 0 |
@@ -740,7 +743,6 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); | |||
740 | static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); | 743 | static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); |
741 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); | 744 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); |
742 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); | 745 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); |
743 | static bool vmx_mpx_supported(void); | ||
744 | 746 | ||
745 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 747 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
746 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 748 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -820,7 +822,6 @@ static const u32 vmx_msr_index[] = { | |||
820 | #endif | 822 | #endif |
821 | MSR_EFER, MSR_TSC_AUX, MSR_STAR, | 823 | MSR_EFER, MSR_TSC_AUX, MSR_STAR, |
822 | }; | 824 | }; |
823 | #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) | ||
824 | 825 | ||
825 | static inline bool is_page_fault(u32 intr_info) | 826 | static inline bool is_page_fault(u32 intr_info) |
826 | { | 827 | { |
@@ -1940,7 +1941,7 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | |||
1940 | vmcs_writel(GUEST_RFLAGS, rflags); | 1941 | vmcs_writel(GUEST_RFLAGS, rflags); |
1941 | } | 1942 | } |
1942 | 1943 | ||
1943 | static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | 1944 | static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) |
1944 | { | 1945 | { |
1945 | u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | 1946 | u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); |
1946 | int ret = 0; | 1947 | int ret = 0; |
@@ -1950,7 +1951,7 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | |||
1950 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) | 1951 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) |
1951 | ret |= KVM_X86_SHADOW_INT_MOV_SS; | 1952 | ret |= KVM_X86_SHADOW_INT_MOV_SS; |
1952 | 1953 | ||
1953 | return ret & mask; | 1954 | return ret; |
1954 | } | 1955 | } |
1955 | 1956 | ||
1956 | static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | 1957 | static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) |
@@ -2239,10 +2240,13 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) | |||
2239 | * or other means. | 2240 | * or other means. |
2240 | */ | 2241 | */ |
2241 | static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high; | 2242 | static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high; |
2243 | static u32 nested_vmx_true_procbased_ctls_low; | ||
2242 | static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; | 2244 | static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; |
2243 | static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; | 2245 | static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; |
2244 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; | 2246 | static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; |
2247 | static u32 nested_vmx_true_exit_ctls_low; | ||
2245 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; | 2248 | static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; |
2249 | static u32 nested_vmx_true_entry_ctls_low; | ||
2246 | static u32 nested_vmx_misc_low, nested_vmx_misc_high; | 2250 | static u32 nested_vmx_misc_low, nested_vmx_misc_high; |
2247 | static u32 nested_vmx_ept_caps; | 2251 | static u32 nested_vmx_ept_caps; |
2248 | static __init void nested_vmx_setup_ctls_msrs(void) | 2252 | static __init void nested_vmx_setup_ctls_msrs(void) |
@@ -2265,21 +2269,13 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2265 | /* pin-based controls */ | 2269 | /* pin-based controls */ |
2266 | rdmsr(MSR_IA32_VMX_PINBASED_CTLS, | 2270 | rdmsr(MSR_IA32_VMX_PINBASED_CTLS, |
2267 | nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); | 2271 | nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); |
2268 | /* | ||
2269 | * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is | ||
2270 | * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. | ||
2271 | */ | ||
2272 | nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | 2272 | nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; |
2273 | nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | | 2273 | nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | |
2274 | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; | 2274 | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; |
2275 | nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | | 2275 | nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | |
2276 | PIN_BASED_VMX_PREEMPTION_TIMER; | 2276 | PIN_BASED_VMX_PREEMPTION_TIMER; |
2277 | 2277 | ||
2278 | /* | 2278 | /* exit controls */ |
2279 | * Exit controls | ||
2280 | * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and | ||
2281 | * 17 must be 1. | ||
2282 | */ | ||
2283 | rdmsr(MSR_IA32_VMX_EXIT_CTLS, | 2279 | rdmsr(MSR_IA32_VMX_EXIT_CTLS, |
2284 | nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); | 2280 | nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); |
2285 | nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; | 2281 | nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; |
@@ -2296,10 +2292,13 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2296 | if (vmx_mpx_supported()) | 2292 | if (vmx_mpx_supported()) |
2297 | nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; | 2293 | nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; |
2298 | 2294 | ||
2295 | /* We support free control of debug control saving. */ | ||
2296 | nested_vmx_true_exit_ctls_low = nested_vmx_exit_ctls_low & | ||
2297 | ~VM_EXIT_SAVE_DEBUG_CONTROLS; | ||
2298 | |||
2299 | /* entry controls */ | 2299 | /* entry controls */ |
2300 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, | 2300 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, |
2301 | nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); | 2301 | nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); |
2302 | /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ | ||
2303 | nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; | 2302 | nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; |
2304 | nested_vmx_entry_ctls_high &= | 2303 | nested_vmx_entry_ctls_high &= |
2305 | #ifdef CONFIG_X86_64 | 2304 | #ifdef CONFIG_X86_64 |
@@ -2311,10 +2310,14 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2311 | if (vmx_mpx_supported()) | 2310 | if (vmx_mpx_supported()) |
2312 | nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; | 2311 | nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; |
2313 | 2312 | ||
2313 | /* We support free control of debug control loading. */ | ||
2314 | nested_vmx_true_entry_ctls_low = nested_vmx_entry_ctls_low & | ||
2315 | ~VM_ENTRY_LOAD_DEBUG_CONTROLS; | ||
2316 | |||
2314 | /* cpu-based controls */ | 2317 | /* cpu-based controls */ |
2315 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, | 2318 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, |
2316 | nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); | 2319 | nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); |
2317 | nested_vmx_procbased_ctls_low = 0; | 2320 | nested_vmx_procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; |
2318 | nested_vmx_procbased_ctls_high &= | 2321 | nested_vmx_procbased_ctls_high &= |
2319 | CPU_BASED_VIRTUAL_INTR_PENDING | | 2322 | CPU_BASED_VIRTUAL_INTR_PENDING | |
2320 | CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | | 2323 | CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | |
@@ -2335,7 +2338,12 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2335 | * can use it to avoid exits to L1 - even when L0 runs L2 | 2338 | * can use it to avoid exits to L1 - even when L0 runs L2 |
2336 | * without MSR bitmaps. | 2339 | * without MSR bitmaps. |
2337 | */ | 2340 | */ |
2338 | nested_vmx_procbased_ctls_high |= CPU_BASED_USE_MSR_BITMAPS; | 2341 | nested_vmx_procbased_ctls_high |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | |
2342 | CPU_BASED_USE_MSR_BITMAPS; | ||
2343 | |||
2344 | /* We support free control of CR3 access interception. */ | ||
2345 | nested_vmx_true_procbased_ctls_low = nested_vmx_procbased_ctls_low & | ||
2346 | ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); | ||
2339 | 2347 | ||
2340 | /* secondary cpu-based controls */ | 2348 | /* secondary cpu-based controls */ |
2341 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, | 2349 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, |
@@ -2394,7 +2402,7 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2394 | * guest, and the VMCS structure we give it - not about the | 2402 | * guest, and the VMCS structure we give it - not about the |
2395 | * VMX support of the underlying hardware. | 2403 | * VMX support of the underlying hardware. |
2396 | */ | 2404 | */ |
2397 | *pdata = VMCS12_REVISION | | 2405 | *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS | |
2398 | ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | | 2406 | ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | |
2399 | (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); | 2407 | (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); |
2400 | break; | 2408 | break; |
@@ -2404,16 +2412,25 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2404 | nested_vmx_pinbased_ctls_high); | 2412 | nested_vmx_pinbased_ctls_high); |
2405 | break; | 2413 | break; |
2406 | case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: | 2414 | case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: |
2415 | *pdata = vmx_control_msr(nested_vmx_true_procbased_ctls_low, | ||
2416 | nested_vmx_procbased_ctls_high); | ||
2417 | break; | ||
2407 | case MSR_IA32_VMX_PROCBASED_CTLS: | 2418 | case MSR_IA32_VMX_PROCBASED_CTLS: |
2408 | *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low, | 2419 | *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low, |
2409 | nested_vmx_procbased_ctls_high); | 2420 | nested_vmx_procbased_ctls_high); |
2410 | break; | 2421 | break; |
2411 | case MSR_IA32_VMX_TRUE_EXIT_CTLS: | 2422 | case MSR_IA32_VMX_TRUE_EXIT_CTLS: |
2423 | *pdata = vmx_control_msr(nested_vmx_true_exit_ctls_low, | ||
2424 | nested_vmx_exit_ctls_high); | ||
2425 | break; | ||
2412 | case MSR_IA32_VMX_EXIT_CTLS: | 2426 | case MSR_IA32_VMX_EXIT_CTLS: |
2413 | *pdata = vmx_control_msr(nested_vmx_exit_ctls_low, | 2427 | *pdata = vmx_control_msr(nested_vmx_exit_ctls_low, |
2414 | nested_vmx_exit_ctls_high); | 2428 | nested_vmx_exit_ctls_high); |
2415 | break; | 2429 | break; |
2416 | case MSR_IA32_VMX_TRUE_ENTRY_CTLS: | 2430 | case MSR_IA32_VMX_TRUE_ENTRY_CTLS: |
2431 | *pdata = vmx_control_msr(nested_vmx_true_entry_ctls_low, | ||
2432 | nested_vmx_entry_ctls_high); | ||
2433 | break; | ||
2417 | case MSR_IA32_VMX_ENTRY_CTLS: | 2434 | case MSR_IA32_VMX_ENTRY_CTLS: |
2418 | *pdata = vmx_control_msr(nested_vmx_entry_ctls_low, | 2435 | *pdata = vmx_control_msr(nested_vmx_entry_ctls_low, |
2419 | nested_vmx_entry_ctls_high); | 2436 | nested_vmx_entry_ctls_high); |
@@ -2442,7 +2459,7 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2442 | *pdata = -1ULL; | 2459 | *pdata = -1ULL; |
2443 | break; | 2460 | break; |
2444 | case MSR_IA32_VMX_VMCS_ENUM: | 2461 | case MSR_IA32_VMX_VMCS_ENUM: |
2445 | *pdata = 0x1f; | 2462 | *pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */ |
2446 | break; | 2463 | break; |
2447 | case MSR_IA32_VMX_PROCBASED_CTLS2: | 2464 | case MSR_IA32_VMX_PROCBASED_CTLS2: |
2448 | *pdata = vmx_control_msr(nested_vmx_secondary_ctls_low, | 2465 | *pdata = vmx_control_msr(nested_vmx_secondary_ctls_low, |
@@ -3653,7 +3670,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
3653 | vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); | 3670 | vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); |
3654 | 3671 | ||
3655 | out: | 3672 | out: |
3656 | vmx->emulation_required |= emulation_required(vcpu); | 3673 | vmx->emulation_required = emulation_required(vcpu); |
3657 | } | 3674 | } |
3658 | 3675 | ||
3659 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | 3676 | static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) |
@@ -4422,7 +4439,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4422 | vmx->vcpu.arch.pat = host_pat; | 4439 | vmx->vcpu.arch.pat = host_pat; |
4423 | } | 4440 | } |
4424 | 4441 | ||
4425 | for (i = 0; i < NR_VMX_MSR; ++i) { | 4442 | for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) { |
4426 | u32 index = vmx_msr_index[i]; | 4443 | u32 index = vmx_msr_index[i]; |
4427 | u32 data_low, data_high; | 4444 | u32 data_low, data_high; |
4428 | int j = vmx->nmsrs; | 4445 | int j = vmx->nmsrs; |
@@ -4873,7 +4890,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
4873 | if (!(vcpu->guest_debug & | 4890 | if (!(vcpu->guest_debug & |
4874 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { | 4891 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { |
4875 | vcpu->arch.dr6 &= ~15; | 4892 | vcpu->arch.dr6 &= ~15; |
4876 | vcpu->arch.dr6 |= dr6; | 4893 | vcpu->arch.dr6 |= dr6 | DR6_RTM; |
4877 | if (!(dr6 & ~DR6_RESERVED)) /* icebp */ | 4894 | if (!(dr6 & ~DR6_RESERVED)) /* icebp */ |
4878 | skip_emulated_instruction(vcpu); | 4895 | skip_emulated_instruction(vcpu); |
4879 | 4896 | ||
@@ -5039,7 +5056,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
5039 | reg = (exit_qualification >> 8) & 15; | 5056 | reg = (exit_qualification >> 8) & 15; |
5040 | switch ((exit_qualification >> 4) & 3) { | 5057 | switch ((exit_qualification >> 4) & 3) { |
5041 | case 0: /* mov to cr */ | 5058 | case 0: /* mov to cr */ |
5042 | val = kvm_register_read(vcpu, reg); | 5059 | val = kvm_register_readl(vcpu, reg); |
5043 | trace_kvm_cr_write(cr, val); | 5060 | trace_kvm_cr_write(cr, val); |
5044 | switch (cr) { | 5061 | switch (cr) { |
5045 | case 0: | 5062 | case 0: |
@@ -5056,7 +5073,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) | |||
5056 | return 1; | 5073 | return 1; |
5057 | case 8: { | 5074 | case 8: { |
5058 | u8 cr8_prev = kvm_get_cr8(vcpu); | 5075 | u8 cr8_prev = kvm_get_cr8(vcpu); |
5059 | u8 cr8 = kvm_register_read(vcpu, reg); | 5076 | u8 cr8 = (u8)val; |
5060 | err = kvm_set_cr8(vcpu, cr8); | 5077 | err = kvm_set_cr8(vcpu, cr8); |
5061 | kvm_complete_insn_gp(vcpu, err); | 5078 | kvm_complete_insn_gp(vcpu, err); |
5062 | if (irqchip_in_kernel(vcpu->kvm)) | 5079 | if (irqchip_in_kernel(vcpu->kvm)) |
@@ -5132,7 +5149,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
5132 | return 0; | 5149 | return 0; |
5133 | } else { | 5150 | } else { |
5134 | vcpu->arch.dr7 &= ~DR7_GD; | 5151 | vcpu->arch.dr7 &= ~DR7_GD; |
5135 | vcpu->arch.dr6 |= DR6_BD; | 5152 | vcpu->arch.dr6 |= DR6_BD | DR6_RTM; |
5136 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); | 5153 | vmcs_writel(GUEST_DR7, vcpu->arch.dr7); |
5137 | kvm_queue_exception(vcpu, DB_VECTOR); | 5154 | kvm_queue_exception(vcpu, DB_VECTOR); |
5138 | return 1; | 5155 | return 1; |
@@ -5165,7 +5182,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
5165 | return 1; | 5182 | return 1; |
5166 | kvm_register_write(vcpu, reg, val); | 5183 | kvm_register_write(vcpu, reg, val); |
5167 | } else | 5184 | } else |
5168 | if (kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg))) | 5185 | if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg))) |
5169 | return 1; | 5186 | return 1; |
5170 | 5187 | ||
5171 | skip_emulated_instruction(vcpu); | 5188 | skip_emulated_instruction(vcpu); |
@@ -5621,7 +5638,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
5621 | cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 5638 | cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
5622 | intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; | 5639 | intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; |
5623 | 5640 | ||
5624 | while (!guest_state_valid(vcpu) && count-- != 0) { | 5641 | while (vmx->emulation_required && count-- != 0) { |
5625 | if (intr_window_requested && vmx_interrupt_allowed(vcpu)) | 5642 | if (intr_window_requested && vmx_interrupt_allowed(vcpu)) |
5626 | return handle_interrupt_window(&vmx->vcpu); | 5643 | return handle_interrupt_window(&vmx->vcpu); |
5627 | 5644 | ||
@@ -5655,7 +5672,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) | |||
5655 | schedule(); | 5672 | schedule(); |
5656 | } | 5673 | } |
5657 | 5674 | ||
5658 | vmx->emulation_required = emulation_required(vcpu); | ||
5659 | out: | 5675 | out: |
5660 | return ret; | 5676 | return ret; |
5661 | } | 5677 | } |
@@ -5754,22 +5770,27 @@ static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr) | |||
5754 | 5770 | ||
5755 | /* | 5771 | /* |
5756 | * Free all VMCSs saved for this vcpu, except the one pointed by | 5772 | * Free all VMCSs saved for this vcpu, except the one pointed by |
5757 | * vmx->loaded_vmcs. These include the VMCSs in vmcs02_pool (except the one | 5773 | * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs |
5758 | * currently used, if running L2), and vmcs01 when running L2. | 5774 | * must be &vmx->vmcs01. |
5759 | */ | 5775 | */ |
5760 | static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) | 5776 | static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) |
5761 | { | 5777 | { |
5762 | struct vmcs02_list *item, *n; | 5778 | struct vmcs02_list *item, *n; |
5779 | |||
5780 | WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01); | ||
5763 | list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { | 5781 | list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { |
5764 | if (vmx->loaded_vmcs != &item->vmcs02) | 5782 | /* |
5765 | free_loaded_vmcs(&item->vmcs02); | 5783 | * Something will leak if the above WARN triggers. Better than |
5784 | * a use-after-free. | ||
5785 | */ | ||
5786 | if (vmx->loaded_vmcs == &item->vmcs02) | ||
5787 | continue; | ||
5788 | |||
5789 | free_loaded_vmcs(&item->vmcs02); | ||
5766 | list_del(&item->list); | 5790 | list_del(&item->list); |
5767 | kfree(item); | 5791 | kfree(item); |
5792 | vmx->nested.vmcs02_num--; | ||
5768 | } | 5793 | } |
5769 | vmx->nested.vmcs02_num = 0; | ||
5770 | |||
5771 | if (vmx->loaded_vmcs != &vmx->vmcs01) | ||
5772 | free_loaded_vmcs(&vmx->vmcs01); | ||
5773 | } | 5794 | } |
5774 | 5795 | ||
5775 | /* | 5796 | /* |
@@ -5918,7 +5939,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, | |||
5918 | * which replaces physical address width with 32 | 5939 | * which replaces physical address width with 32 |
5919 | * | 5940 | * |
5920 | */ | 5941 | */ |
5921 | if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { | 5942 | if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { |
5922 | nested_vmx_failInvalid(vcpu); | 5943 | nested_vmx_failInvalid(vcpu); |
5923 | skip_emulated_instruction(vcpu); | 5944 | skip_emulated_instruction(vcpu); |
5924 | return 1; | 5945 | return 1; |
@@ -5936,7 +5957,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, | |||
5936 | vmx->nested.vmxon_ptr = vmptr; | 5957 | vmx->nested.vmxon_ptr = vmptr; |
5937 | break; | 5958 | break; |
5938 | case EXIT_REASON_VMCLEAR: | 5959 | case EXIT_REASON_VMCLEAR: |
5939 | if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { | 5960 | if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { |
5940 | nested_vmx_failValid(vcpu, | 5961 | nested_vmx_failValid(vcpu, |
5941 | VMXERR_VMCLEAR_INVALID_ADDRESS); | 5962 | VMXERR_VMCLEAR_INVALID_ADDRESS); |
5942 | skip_emulated_instruction(vcpu); | 5963 | skip_emulated_instruction(vcpu); |
@@ -5951,7 +5972,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, | |||
5951 | } | 5972 | } |
5952 | break; | 5973 | break; |
5953 | case EXIT_REASON_VMPTRLD: | 5974 | case EXIT_REASON_VMPTRLD: |
5954 | if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { | 5975 | if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { |
5955 | nested_vmx_failValid(vcpu, | 5976 | nested_vmx_failValid(vcpu, |
5956 | VMXERR_VMPTRLD_INVALID_ADDRESS); | 5977 | VMXERR_VMPTRLD_INVALID_ADDRESS); |
5957 | skip_emulated_instruction(vcpu); | 5978 | skip_emulated_instruction(vcpu); |
@@ -6086,20 +6107,27 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) | |||
6086 | static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) | 6107 | static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) |
6087 | { | 6108 | { |
6088 | u32 exec_control; | 6109 | u32 exec_control; |
6110 | if (vmx->nested.current_vmptr == -1ull) | ||
6111 | return; | ||
6112 | |||
6113 | /* current_vmptr and current_vmcs12 are always set/reset together */ | ||
6114 | if (WARN_ON(vmx->nested.current_vmcs12 == NULL)) | ||
6115 | return; | ||
6116 | |||
6089 | if (enable_shadow_vmcs) { | 6117 | if (enable_shadow_vmcs) { |
6090 | if (vmx->nested.current_vmcs12 != NULL) { | 6118 | /* copy to memory all shadowed fields in case |
6091 | /* copy to memory all shadowed fields in case | 6119 | they were modified */ |
6092 | they were modified */ | 6120 | copy_shadow_to_vmcs12(vmx); |
6093 | copy_shadow_to_vmcs12(vmx); | 6121 | vmx->nested.sync_shadow_vmcs = false; |
6094 | vmx->nested.sync_shadow_vmcs = false; | 6122 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); |
6095 | exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); | 6123 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; |
6096 | exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; | 6124 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
6097 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 6125 | vmcs_write64(VMCS_LINK_POINTER, -1ull); |
6098 | vmcs_write64(VMCS_LINK_POINTER, -1ull); | ||
6099 | } | ||
6100 | } | 6126 | } |
6101 | kunmap(vmx->nested.current_vmcs12_page); | 6127 | kunmap(vmx->nested.current_vmcs12_page); |
6102 | nested_release_page(vmx->nested.current_vmcs12_page); | 6128 | nested_release_page(vmx->nested.current_vmcs12_page); |
6129 | vmx->nested.current_vmptr = -1ull; | ||
6130 | vmx->nested.current_vmcs12 = NULL; | ||
6103 | } | 6131 | } |
6104 | 6132 | ||
6105 | /* | 6133 | /* |
@@ -6110,12 +6138,9 @@ static void free_nested(struct vcpu_vmx *vmx) | |||
6110 | { | 6138 | { |
6111 | if (!vmx->nested.vmxon) | 6139 | if (!vmx->nested.vmxon) |
6112 | return; | 6140 | return; |
6141 | |||
6113 | vmx->nested.vmxon = false; | 6142 | vmx->nested.vmxon = false; |
6114 | if (vmx->nested.current_vmptr != -1ull) { | 6143 | nested_release_vmcs12(vmx); |
6115 | nested_release_vmcs12(vmx); | ||
6116 | vmx->nested.current_vmptr = -1ull; | ||
6117 | vmx->nested.current_vmcs12 = NULL; | ||
6118 | } | ||
6119 | if (enable_shadow_vmcs) | 6144 | if (enable_shadow_vmcs) |
6120 | free_vmcs(vmx->nested.current_shadow_vmcs); | 6145 | free_vmcs(vmx->nested.current_shadow_vmcs); |
6121 | /* Unpin physical memory we referred to in current vmcs02 */ | 6146 | /* Unpin physical memory we referred to in current vmcs02 */ |
@@ -6152,11 +6177,8 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) | |||
6152 | if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr)) | 6177 | if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr)) |
6153 | return 1; | 6178 | return 1; |
6154 | 6179 | ||
6155 | if (vmptr == vmx->nested.current_vmptr) { | 6180 | if (vmptr == vmx->nested.current_vmptr) |
6156 | nested_release_vmcs12(vmx); | 6181 | nested_release_vmcs12(vmx); |
6157 | vmx->nested.current_vmptr = -1ull; | ||
6158 | vmx->nested.current_vmcs12 = NULL; | ||
6159 | } | ||
6160 | 6182 | ||
6161 | page = nested_get_page(vcpu, vmptr); | 6183 | page = nested_get_page(vcpu, vmptr); |
6162 | if (page == NULL) { | 6184 | if (page == NULL) { |
@@ -6384,7 +6406,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) | |||
6384 | return 1; | 6406 | return 1; |
6385 | 6407 | ||
6386 | /* Decode instruction info and find the field to read */ | 6408 | /* Decode instruction info and find the field to read */ |
6387 | field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); | 6409 | field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); |
6388 | /* Read the field, zero-extended to a u64 field_value */ | 6410 | /* Read the field, zero-extended to a u64 field_value */ |
6389 | if (!vmcs12_read_any(vcpu, field, &field_value)) { | 6411 | if (!vmcs12_read_any(vcpu, field, &field_value)) { |
6390 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); | 6412 | nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); |
@@ -6397,7 +6419,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) | |||
6397 | * on the guest's mode (32 or 64 bit), not on the given field's length. | 6419 | * on the guest's mode (32 or 64 bit), not on the given field's length. |
6398 | */ | 6420 | */ |
6399 | if (vmx_instruction_info & (1u << 10)) { | 6421 | if (vmx_instruction_info & (1u << 10)) { |
6400 | kvm_register_write(vcpu, (((vmx_instruction_info) >> 3) & 0xf), | 6422 | kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf), |
6401 | field_value); | 6423 | field_value); |
6402 | } else { | 6424 | } else { |
6403 | if (get_vmx_mem_address(vcpu, exit_qualification, | 6425 | if (get_vmx_mem_address(vcpu, exit_qualification, |
@@ -6434,21 +6456,21 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) | |||
6434 | return 1; | 6456 | return 1; |
6435 | 6457 | ||
6436 | if (vmx_instruction_info & (1u << 10)) | 6458 | if (vmx_instruction_info & (1u << 10)) |
6437 | field_value = kvm_register_read(vcpu, | 6459 | field_value = kvm_register_readl(vcpu, |
6438 | (((vmx_instruction_info) >> 3) & 0xf)); | 6460 | (((vmx_instruction_info) >> 3) & 0xf)); |
6439 | else { | 6461 | else { |
6440 | if (get_vmx_mem_address(vcpu, exit_qualification, | 6462 | if (get_vmx_mem_address(vcpu, exit_qualification, |
6441 | vmx_instruction_info, &gva)) | 6463 | vmx_instruction_info, &gva)) |
6442 | return 1; | 6464 | return 1; |
6443 | if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, | 6465 | if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, |
6444 | &field_value, (is_long_mode(vcpu) ? 8 : 4), &e)) { | 6466 | &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) { |
6445 | kvm_inject_page_fault(vcpu, &e); | 6467 | kvm_inject_page_fault(vcpu, &e); |
6446 | return 1; | 6468 | return 1; |
6447 | } | 6469 | } |
6448 | } | 6470 | } |
6449 | 6471 | ||
6450 | 6472 | ||
6451 | field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); | 6473 | field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); |
6452 | if (vmcs_field_readonly(field)) { | 6474 | if (vmcs_field_readonly(field)) { |
6453 | nested_vmx_failValid(vcpu, | 6475 | nested_vmx_failValid(vcpu, |
6454 | VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); | 6476 | VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); |
@@ -6498,9 +6520,8 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) | |||
6498 | skip_emulated_instruction(vcpu); | 6520 | skip_emulated_instruction(vcpu); |
6499 | return 1; | 6521 | return 1; |
6500 | } | 6522 | } |
6501 | if (vmx->nested.current_vmptr != -1ull) | ||
6502 | nested_release_vmcs12(vmx); | ||
6503 | 6523 | ||
6524 | nested_release_vmcs12(vmx); | ||
6504 | vmx->nested.current_vmptr = vmptr; | 6525 | vmx->nested.current_vmptr = vmptr; |
6505 | vmx->nested.current_vmcs12 = new_vmcs12; | 6526 | vmx->nested.current_vmcs12 = new_vmcs12; |
6506 | vmx->nested.current_vmcs12_page = page; | 6527 | vmx->nested.current_vmcs12_page = page; |
@@ -6571,7 +6592,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) | |||
6571 | } | 6592 | } |
6572 | 6593 | ||
6573 | vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); | 6594 | vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); |
6574 | type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); | 6595 | type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); |
6575 | 6596 | ||
6576 | types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; | 6597 | types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; |
6577 | 6598 | ||
@@ -6751,7 +6772,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, | |||
6751 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 6772 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
6752 | int cr = exit_qualification & 15; | 6773 | int cr = exit_qualification & 15; |
6753 | int reg = (exit_qualification >> 8) & 15; | 6774 | int reg = (exit_qualification >> 8) & 15; |
6754 | unsigned long val = kvm_register_read(vcpu, reg); | 6775 | unsigned long val = kvm_register_readl(vcpu, reg); |
6755 | 6776 | ||
6756 | switch ((exit_qualification >> 4) & 3) { | 6777 | switch ((exit_qualification >> 4) & 3) { |
6757 | case 0: /* mov to cr */ | 6778 | case 0: /* mov to cr */ |
@@ -7112,7 +7133,26 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) | |||
7112 | if (max_irr == -1) | 7133 | if (max_irr == -1) |
7113 | return; | 7134 | return; |
7114 | 7135 | ||
7115 | vmx_set_rvi(max_irr); | 7136 | /* |
7137 | * If a vmexit is needed, vmx_check_nested_events handles it. | ||
7138 | */ | ||
7139 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) | ||
7140 | return; | ||
7141 | |||
7142 | if (!is_guest_mode(vcpu)) { | ||
7143 | vmx_set_rvi(max_irr); | ||
7144 | return; | ||
7145 | } | ||
7146 | |||
7147 | /* | ||
7148 | * Fall back to pre-APICv interrupt injection since L2 | ||
7149 | * is run without virtual interrupt delivery. | ||
7150 | */ | ||
7151 | if (!kvm_event_needs_reinjection(vcpu) && | ||
7152 | vmx_interrupt_allowed(vcpu)) { | ||
7153 | kvm_queue_interrupt(vcpu, max_irr, false); | ||
7154 | vmx_inject_irq(vcpu); | ||
7155 | } | ||
7116 | } | 7156 | } |
7117 | 7157 | ||
7118 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) | 7158 | static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) |
@@ -7520,13 +7560,31 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
7520 | vmx_complete_interrupts(vmx); | 7560 | vmx_complete_interrupts(vmx); |
7521 | } | 7561 | } |
7522 | 7562 | ||
7563 | static void vmx_load_vmcs01(struct kvm_vcpu *vcpu) | ||
7564 | { | ||
7565 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
7566 | int cpu; | ||
7567 | |||
7568 | if (vmx->loaded_vmcs == &vmx->vmcs01) | ||
7569 | return; | ||
7570 | |||
7571 | cpu = get_cpu(); | ||
7572 | vmx->loaded_vmcs = &vmx->vmcs01; | ||
7573 | vmx_vcpu_put(vcpu); | ||
7574 | vmx_vcpu_load(vcpu, cpu); | ||
7575 | vcpu->cpu = cpu; | ||
7576 | put_cpu(); | ||
7577 | } | ||
7578 | |||
7523 | static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | 7579 | static void vmx_free_vcpu(struct kvm_vcpu *vcpu) |
7524 | { | 7580 | { |
7525 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7581 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7526 | 7582 | ||
7527 | free_vpid(vmx); | 7583 | free_vpid(vmx); |
7528 | free_loaded_vmcs(vmx->loaded_vmcs); | 7584 | leave_guest_mode(vcpu); |
7585 | vmx_load_vmcs01(vcpu); | ||
7529 | free_nested(vmx); | 7586 | free_nested(vmx); |
7587 | free_loaded_vmcs(vmx->loaded_vmcs); | ||
7530 | kfree(vmx->guest_msrs); | 7588 | kfree(vmx->guest_msrs); |
7531 | kvm_vcpu_uninit(vcpu); | 7589 | kvm_vcpu_uninit(vcpu); |
7532 | kmem_cache_free(kvm_vcpu_cache, vmx); | 7590 | kmem_cache_free(kvm_vcpu_cache, vmx); |
@@ -7548,6 +7606,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
7548 | goto free_vcpu; | 7606 | goto free_vcpu; |
7549 | 7607 | ||
7550 | vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); | 7608 | vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); |
7609 | BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0]) | ||
7610 | > PAGE_SIZE); | ||
7611 | |||
7551 | err = -ENOMEM; | 7612 | err = -ENOMEM; |
7552 | if (!vmx->guest_msrs) { | 7613 | if (!vmx->guest_msrs) { |
7553 | goto uninit_vcpu; | 7614 | goto uninit_vcpu; |
@@ -7836,7 +7897,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7836 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); | 7897 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); |
7837 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); | 7898 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); |
7838 | 7899 | ||
7839 | vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); | 7900 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { |
7901 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); | ||
7902 | vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); | ||
7903 | } else { | ||
7904 | kvm_set_dr(vcpu, 7, vcpu->arch.dr7); | ||
7905 | vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); | ||
7906 | } | ||
7840 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 7907 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
7841 | vmcs12->vm_entry_intr_info_field); | 7908 | vmcs12->vm_entry_intr_info_field); |
7842 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, | 7909 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, |
@@ -7846,7 +7913,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7846 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | 7913 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, |
7847 | vmcs12->guest_interruptibility_info); | 7914 | vmcs12->guest_interruptibility_info); |
7848 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); | 7915 | vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); |
7849 | kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); | ||
7850 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); | 7916 | vmx_set_rflags(vcpu, vmcs12->guest_rflags); |
7851 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, | 7917 | vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, |
7852 | vmcs12->guest_pending_dbg_exceptions); | 7918 | vmcs12->guest_pending_dbg_exceptions); |
@@ -8113,14 +8179,14 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
8113 | } | 8179 | } |
8114 | 8180 | ||
8115 | if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && | 8181 | if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && |
8116 | !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { | 8182 | !PAGE_ALIGNED(vmcs12->msr_bitmap)) { |
8117 | /*TODO: Also verify bits beyond physical address width are 0*/ | 8183 | /*TODO: Also verify bits beyond physical address width are 0*/ |
8118 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 8184 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
8119 | return 1; | 8185 | return 1; |
8120 | } | 8186 | } |
8121 | 8187 | ||
8122 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && | 8188 | if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && |
8123 | !IS_ALIGNED(vmcs12->apic_access_addr, PAGE_SIZE)) { | 8189 | !PAGE_ALIGNED(vmcs12->apic_access_addr)) { |
8124 | /*TODO: Also verify bits beyond physical address width are 0*/ | 8190 | /*TODO: Also verify bits beyond physical address width are 0*/ |
8125 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 8191 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
8126 | return 1; | 8192 | return 1; |
@@ -8136,15 +8202,18 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
8136 | } | 8202 | } |
8137 | 8203 | ||
8138 | if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, | 8204 | if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, |
8139 | nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high) || | 8205 | nested_vmx_true_procbased_ctls_low, |
8206 | nested_vmx_procbased_ctls_high) || | ||
8140 | !vmx_control_verify(vmcs12->secondary_vm_exec_control, | 8207 | !vmx_control_verify(vmcs12->secondary_vm_exec_control, |
8141 | nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) || | 8208 | nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) || |
8142 | !vmx_control_verify(vmcs12->pin_based_vm_exec_control, | 8209 | !vmx_control_verify(vmcs12->pin_based_vm_exec_control, |
8143 | nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) || | 8210 | nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) || |
8144 | !vmx_control_verify(vmcs12->vm_exit_controls, | 8211 | !vmx_control_verify(vmcs12->vm_exit_controls, |
8145 | nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) || | 8212 | nested_vmx_true_exit_ctls_low, |
8213 | nested_vmx_exit_ctls_high) || | ||
8146 | !vmx_control_verify(vmcs12->vm_entry_controls, | 8214 | !vmx_control_verify(vmcs12->vm_entry_controls, |
8147 | nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high)) | 8215 | nested_vmx_true_entry_ctls_low, |
8216 | nested_vmx_entry_ctls_high)) | ||
8148 | { | 8217 | { |
8149 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 8218 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
8150 | return 1; | 8219 | return 1; |
@@ -8221,6 +8290,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
8221 | 8290 | ||
8222 | vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); | 8291 | vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); |
8223 | 8292 | ||
8293 | if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) | ||
8294 | vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); | ||
8295 | |||
8224 | cpu = get_cpu(); | 8296 | cpu = get_cpu(); |
8225 | vmx->loaded_vmcs = vmcs02; | 8297 | vmx->loaded_vmcs = vmcs02; |
8226 | vmx_vcpu_put(vcpu); | 8298 | vmx_vcpu_put(vcpu); |
@@ -8398,7 +8470,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
8398 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); | 8470 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); |
8399 | vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); | 8471 | vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); |
8400 | 8472 | ||
8401 | kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); | ||
8402 | vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); | 8473 | vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); |
8403 | vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP); | 8474 | vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP); |
8404 | vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); | 8475 | vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); |
@@ -8477,9 +8548,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, | |||
8477 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | | 8548 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | |
8478 | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); | 8549 | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); |
8479 | 8550 | ||
8551 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) { | ||
8552 | kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); | ||
8553 | vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); | ||
8554 | } | ||
8555 | |||
8480 | /* TODO: These cannot have changed unless we have MSR bitmaps and | 8556 | /* TODO: These cannot have changed unless we have MSR bitmaps and |
8481 | * the relevant bit asks not to trap the change */ | 8557 | * the relevant bit asks not to trap the change */ |
8482 | vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); | ||
8483 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) | 8558 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) |
8484 | vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); | 8559 | vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); |
8485 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) | 8560 | if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) |
@@ -8670,7 +8745,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
8670 | unsigned long exit_qualification) | 8745 | unsigned long exit_qualification) |
8671 | { | 8746 | { |
8672 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 8747 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
8673 | int cpu; | ||
8674 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 8748 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
8675 | 8749 | ||
8676 | /* trying to cancel vmlaunch/vmresume is a bug */ | 8750 | /* trying to cancel vmlaunch/vmresume is a bug */ |
@@ -8695,12 +8769,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, | |||
8695 | vmcs12->vm_exit_intr_error_code, | 8769 | vmcs12->vm_exit_intr_error_code, |
8696 | KVM_ISA_VMX); | 8770 | KVM_ISA_VMX); |
8697 | 8771 | ||
8698 | cpu = get_cpu(); | 8772 | vmx_load_vmcs01(vcpu); |
8699 | vmx->loaded_vmcs = &vmx->vmcs01; | ||
8700 | vmx_vcpu_put(vcpu); | ||
8701 | vmx_vcpu_load(vcpu, cpu); | ||
8702 | vcpu->cpu = cpu; | ||
8703 | put_cpu(); | ||
8704 | 8773 | ||
8705 | vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); | 8774 | vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); |
8706 | vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); | 8775 | vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); |
@@ -8890,7 +8959,7 @@ static int __init vmx_init(void) | |||
8890 | 8959 | ||
8891 | rdmsrl_safe(MSR_EFER, &host_efer); | 8960 | rdmsrl_safe(MSR_EFER, &host_efer); |
8892 | 8961 | ||
8893 | for (i = 0; i < NR_VMX_MSR; ++i) | 8962 | for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) |
8894 | kvm_define_shared_msr(i, vmx_msr_index[i]); | 8963 | kvm_define_shared_msr(i, vmx_msr_index[i]); |
8895 | 8964 | ||
8896 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); | 8965 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ef432f891d30..b86d329b953a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -87,6 +87,7 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); | |||
87 | 87 | ||
88 | static void update_cr8_intercept(struct kvm_vcpu *vcpu); | 88 | static void update_cr8_intercept(struct kvm_vcpu *vcpu); |
89 | static void process_nmi(struct kvm_vcpu *vcpu); | 89 | static void process_nmi(struct kvm_vcpu *vcpu); |
90 | static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | ||
90 | 91 | ||
91 | struct kvm_x86_ops *kvm_x86_ops; | 92 | struct kvm_x86_ops *kvm_x86_ops; |
92 | EXPORT_SYMBOL_GPL(kvm_x86_ops); | 93 | EXPORT_SYMBOL_GPL(kvm_x86_ops); |
@@ -211,6 +212,7 @@ static void shared_msr_update(unsigned slot, u32 msr) | |||
211 | 212 | ||
212 | void kvm_define_shared_msr(unsigned slot, u32 msr) | 213 | void kvm_define_shared_msr(unsigned slot, u32 msr) |
213 | { | 214 | { |
215 | BUG_ON(slot >= KVM_NR_SHARED_MSRS); | ||
214 | if (slot >= shared_msrs_global.nr) | 216 | if (slot >= shared_msrs_global.nr) |
215 | shared_msrs_global.nr = slot + 1; | 217 | shared_msrs_global.nr = slot + 1; |
216 | shared_msrs_global.msrs[slot] = msr; | 218 | shared_msrs_global.msrs[slot] = msr; |
@@ -310,6 +312,31 @@ static int exception_class(int vector) | |||
310 | return EXCPT_BENIGN; | 312 | return EXCPT_BENIGN; |
311 | } | 313 | } |
312 | 314 | ||
315 | #define EXCPT_FAULT 0 | ||
316 | #define EXCPT_TRAP 1 | ||
317 | #define EXCPT_ABORT 2 | ||
318 | #define EXCPT_INTERRUPT 3 | ||
319 | |||
320 | static int exception_type(int vector) | ||
321 | { | ||
322 | unsigned int mask; | ||
323 | |||
324 | if (WARN_ON(vector > 31 || vector == NMI_VECTOR)) | ||
325 | return EXCPT_INTERRUPT; | ||
326 | |||
327 | mask = 1 << vector; | ||
328 | |||
329 | /* #DB is trap, as instruction watchpoints are handled elsewhere */ | ||
330 | if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR))) | ||
331 | return EXCPT_TRAP; | ||
332 | |||
333 | if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR))) | ||
334 | return EXCPT_ABORT; | ||
335 | |||
336 | /* Reserved exceptions will result in fault */ | ||
337 | return EXCPT_FAULT; | ||
338 | } | ||
339 | |||
313 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, | 340 | static void kvm_multiple_exception(struct kvm_vcpu *vcpu, |
314 | unsigned nr, bool has_error, u32 error_code, | 341 | unsigned nr, bool has_error, u32 error_code, |
315 | bool reinject) | 342 | bool reinject) |
@@ -758,6 +785,15 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu) | |||
758 | vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED; | 785 | vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED; |
759 | } | 786 | } |
760 | 787 | ||
788 | static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) | ||
789 | { | ||
790 | u64 fixed = DR6_FIXED_1; | ||
791 | |||
792 | if (!guest_cpuid_has_rtm(vcpu)) | ||
793 | fixed |= DR6_RTM; | ||
794 | return fixed; | ||
795 | } | ||
796 | |||
761 | static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | 797 | static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) |
762 | { | 798 | { |
763 | switch (dr) { | 799 | switch (dr) { |
@@ -773,7 +809,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | |||
773 | case 6: | 809 | case 6: |
774 | if (val & 0xffffffff00000000ULL) | 810 | if (val & 0xffffffff00000000ULL) |
775 | return -1; /* #GP */ | 811 | return -1; /* #GP */ |
776 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | 812 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu); |
777 | kvm_update_dr6(vcpu); | 813 | kvm_update_dr6(vcpu); |
778 | break; | 814 | break; |
779 | case 5: | 815 | case 5: |
@@ -1215,6 +1251,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1215 | unsigned long flags; | 1251 | unsigned long flags; |
1216 | s64 usdiff; | 1252 | s64 usdiff; |
1217 | bool matched; | 1253 | bool matched; |
1254 | bool already_matched; | ||
1218 | u64 data = msr->data; | 1255 | u64 data = msr->data; |
1219 | 1256 | ||
1220 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); | 1257 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); |
@@ -1279,6 +1316,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1279 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); | 1316 | pr_debug("kvm: adjusted tsc offset by %llu\n", delta); |
1280 | } | 1317 | } |
1281 | matched = true; | 1318 | matched = true; |
1319 | already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation); | ||
1282 | } else { | 1320 | } else { |
1283 | /* | 1321 | /* |
1284 | * We split periods of matched TSC writes into generations. | 1322 | * We split periods of matched TSC writes into generations. |
@@ -1294,7 +1332,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1294 | kvm->arch.cur_tsc_write = data; | 1332 | kvm->arch.cur_tsc_write = data; |
1295 | kvm->arch.cur_tsc_offset = offset; | 1333 | kvm->arch.cur_tsc_offset = offset; |
1296 | matched = false; | 1334 | matched = false; |
1297 | pr_debug("kvm: new tsc generation %u, clock %llu\n", | 1335 | pr_debug("kvm: new tsc generation %llu, clock %llu\n", |
1298 | kvm->arch.cur_tsc_generation, data); | 1336 | kvm->arch.cur_tsc_generation, data); |
1299 | } | 1337 | } |
1300 | 1338 | ||
@@ -1319,10 +1357,11 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1319 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | 1357 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); |
1320 | 1358 | ||
1321 | spin_lock(&kvm->arch.pvclock_gtod_sync_lock); | 1359 | spin_lock(&kvm->arch.pvclock_gtod_sync_lock); |
1322 | if (matched) | 1360 | if (!matched) { |
1323 | kvm->arch.nr_vcpus_matched_tsc++; | ||
1324 | else | ||
1325 | kvm->arch.nr_vcpus_matched_tsc = 0; | 1361 | kvm->arch.nr_vcpus_matched_tsc = 0; |
1362 | } else if (!already_matched) { | ||
1363 | kvm->arch.nr_vcpus_matched_tsc++; | ||
1364 | } | ||
1326 | 1365 | ||
1327 | kvm_track_tsc_matching(vcpu); | 1366 | kvm_track_tsc_matching(vcpu); |
1328 | spin_unlock(&kvm->arch.pvclock_gtod_sync_lock); | 1367 | spin_unlock(&kvm->arch.pvclock_gtod_sync_lock); |
@@ -2032,6 +2071,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2032 | data &= ~(u64)0x40; /* ignore flush filter disable */ | 2071 | data &= ~(u64)0x40; /* ignore flush filter disable */ |
2033 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ | 2072 | data &= ~(u64)0x100; /* ignore ignne emulation enable */ |
2034 | data &= ~(u64)0x8; /* ignore TLB cache disable */ | 2073 | data &= ~(u64)0x8; /* ignore TLB cache disable */ |
2074 | data &= ~(u64)0x40000; /* ignore Mc status write enable */ | ||
2035 | if (data != 0) { | 2075 | if (data != 0) { |
2036 | vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", | 2076 | vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", |
2037 | data); | 2077 | data); |
@@ -2974,9 +3014,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, | |||
2974 | vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; | 3014 | vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; |
2975 | events->interrupt.nr = vcpu->arch.interrupt.nr; | 3015 | events->interrupt.nr = vcpu->arch.interrupt.nr; |
2976 | events->interrupt.soft = 0; | 3016 | events->interrupt.soft = 0; |
2977 | events->interrupt.shadow = | 3017 | events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu); |
2978 | kvm_x86_ops->get_interrupt_shadow(vcpu, | ||
2979 | KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); | ||
2980 | 3018 | ||
2981 | events->nmi.injected = vcpu->arch.nmi_injected; | 3019 | events->nmi.injected = vcpu->arch.nmi_injected; |
2982 | events->nmi.pending = vcpu->arch.nmi_pending != 0; | 3020 | events->nmi.pending = vcpu->arch.nmi_pending != 0; |
@@ -4082,7 +4120,8 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, | |||
4082 | 4120 | ||
4083 | if (gpa == UNMAPPED_GVA) | 4121 | if (gpa == UNMAPPED_GVA) |
4084 | return X86EMUL_PROPAGATE_FAULT; | 4122 | return X86EMUL_PROPAGATE_FAULT; |
4085 | ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); | 4123 | ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, data, |
4124 | offset, toread); | ||
4086 | if (ret < 0) { | 4125 | if (ret < 0) { |
4087 | r = X86EMUL_IO_NEEDED; | 4126 | r = X86EMUL_IO_NEEDED; |
4088 | goto out; | 4127 | goto out; |
@@ -4103,10 +4142,24 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt, | |||
4103 | { | 4142 | { |
4104 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); | 4143 | struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); |
4105 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; | 4144 | u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; |
4145 | unsigned offset; | ||
4146 | int ret; | ||
4106 | 4147 | ||
4107 | return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, | 4148 | /* Inline kvm_read_guest_virt_helper for speed. */ |
4108 | access | PFERR_FETCH_MASK, | 4149 | gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK, |
4109 | exception); | 4150 | exception); |
4151 | if (unlikely(gpa == UNMAPPED_GVA)) | ||
4152 | return X86EMUL_PROPAGATE_FAULT; | ||
4153 | |||
4154 | offset = addr & (PAGE_SIZE-1); | ||
4155 | if (WARN_ON(offset + bytes > PAGE_SIZE)) | ||
4156 | bytes = (unsigned)PAGE_SIZE - offset; | ||
4157 | ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, val, | ||
4158 | offset, bytes); | ||
4159 | if (unlikely(ret < 0)) | ||
4160 | return X86EMUL_IO_NEEDED; | ||
4161 | |||
4162 | return X86EMUL_CONTINUE; | ||
4110 | } | 4163 | } |
4111 | 4164 | ||
4112 | int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, | 4165 | int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, |
@@ -4730,7 +4783,6 @@ static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector, | |||
4730 | if (desc->g) | 4783 | if (desc->g) |
4731 | var.limit = (var.limit << 12) | 0xfff; | 4784 | var.limit = (var.limit << 12) | 0xfff; |
4732 | var.type = desc->type; | 4785 | var.type = desc->type; |
4733 | var.present = desc->p; | ||
4734 | var.dpl = desc->dpl; | 4786 | var.dpl = desc->dpl; |
4735 | var.db = desc->d; | 4787 | var.db = desc->d; |
4736 | var.s = desc->s; | 4788 | var.s = desc->s; |
@@ -4762,6 +4814,12 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, | |||
4762 | return kvm_set_msr(emul_to_vcpu(ctxt), &msr); | 4814 | return kvm_set_msr(emul_to_vcpu(ctxt), &msr); |
4763 | } | 4815 | } |
4764 | 4816 | ||
4817 | static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt, | ||
4818 | u32 pmc) | ||
4819 | { | ||
4820 | return kvm_pmu_check_pmc(emul_to_vcpu(ctxt), pmc); | ||
4821 | } | ||
4822 | |||
4765 | static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, | 4823 | static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, |
4766 | u32 pmc, u64 *pdata) | 4824 | u32 pmc, u64 *pdata) |
4767 | { | 4825 | { |
@@ -4838,6 +4896,7 @@ static const struct x86_emulate_ops emulate_ops = { | |||
4838 | .set_dr = emulator_set_dr, | 4896 | .set_dr = emulator_set_dr, |
4839 | .set_msr = emulator_set_msr, | 4897 | .set_msr = emulator_set_msr, |
4840 | .get_msr = emulator_get_msr, | 4898 | .get_msr = emulator_get_msr, |
4899 | .check_pmc = emulator_check_pmc, | ||
4841 | .read_pmc = emulator_read_pmc, | 4900 | .read_pmc = emulator_read_pmc, |
4842 | .halt = emulator_halt, | 4901 | .halt = emulator_halt, |
4843 | .wbinvd = emulator_wbinvd, | 4902 | .wbinvd = emulator_wbinvd, |
@@ -4850,7 +4909,7 @@ static const struct x86_emulate_ops emulate_ops = { | |||
4850 | 4909 | ||
4851 | static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) | 4910 | static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) |
4852 | { | 4911 | { |
4853 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); | 4912 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu); |
4854 | /* | 4913 | /* |
4855 | * an sti; sti; sequence only disable interrupts for the first | 4914 | * an sti; sti; sequence only disable interrupts for the first |
4856 | * instruction. So, if the last instruction, be it emulated or | 4915 | * instruction. So, if the last instruction, be it emulated or |
@@ -4858,8 +4917,13 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) | |||
4858 | * means that the last instruction is an sti. We should not | 4917 | * means that the last instruction is an sti. We should not |
4859 | * leave the flag on in this case. The same goes for mov ss | 4918 | * leave the flag on in this case. The same goes for mov ss |
4860 | */ | 4919 | */ |
4861 | if (!(int_shadow & mask)) | 4920 | if (int_shadow & mask) |
4921 | mask = 0; | ||
4922 | if (unlikely(int_shadow || mask)) { | ||
4862 | kvm_x86_ops->set_interrupt_shadow(vcpu, mask); | 4923 | kvm_x86_ops->set_interrupt_shadow(vcpu, mask); |
4924 | if (!mask) | ||
4925 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
4926 | } | ||
4863 | } | 4927 | } |
4864 | 4928 | ||
4865 | static void inject_emulated_exception(struct kvm_vcpu *vcpu) | 4929 | static void inject_emulated_exception(struct kvm_vcpu *vcpu) |
@@ -4874,19 +4938,6 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu) | |||
4874 | kvm_queue_exception(vcpu, ctxt->exception.vector); | 4938 | kvm_queue_exception(vcpu, ctxt->exception.vector); |
4875 | } | 4939 | } |
4876 | 4940 | ||
4877 | static void init_decode_cache(struct x86_emulate_ctxt *ctxt) | ||
4878 | { | ||
4879 | memset(&ctxt->opcode_len, 0, | ||
4880 | (void *)&ctxt->_regs - (void *)&ctxt->opcode_len); | ||
4881 | |||
4882 | ctxt->fetch.start = 0; | ||
4883 | ctxt->fetch.end = 0; | ||
4884 | ctxt->io_read.pos = 0; | ||
4885 | ctxt->io_read.end = 0; | ||
4886 | ctxt->mem_read.pos = 0; | ||
4887 | ctxt->mem_read.end = 0; | ||
4888 | } | ||
4889 | |||
4890 | static void init_emulate_ctxt(struct kvm_vcpu *vcpu) | 4941 | static void init_emulate_ctxt(struct kvm_vcpu *vcpu) |
4891 | { | 4942 | { |
4892 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; | 4943 | struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; |
@@ -5085,23 +5136,22 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, | |||
5085 | return dr6; | 5136 | return dr6; |
5086 | } | 5137 | } |
5087 | 5138 | ||
5088 | static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r) | 5139 | static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r) |
5089 | { | 5140 | { |
5090 | struct kvm_run *kvm_run = vcpu->run; | 5141 | struct kvm_run *kvm_run = vcpu->run; |
5091 | 5142 | ||
5092 | /* | 5143 | /* |
5093 | * Use the "raw" value to see if TF was passed to the processor. | 5144 | * rflags is the old, "raw" value of the flags. The new value has |
5094 | * Note that the new value of the flags has not been saved yet. | 5145 | * not been saved yet. |
5095 | * | 5146 | * |
5096 | * This is correct even for TF set by the guest, because "the | 5147 | * This is correct even for TF set by the guest, because "the |
5097 | * processor will not generate this exception after the instruction | 5148 | * processor will not generate this exception after the instruction |
5098 | * that sets the TF flag". | 5149 | * that sets the TF flag". |
5099 | */ | 5150 | */ |
5100 | unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); | ||
5101 | |||
5102 | if (unlikely(rflags & X86_EFLAGS_TF)) { | 5151 | if (unlikely(rflags & X86_EFLAGS_TF)) { |
5103 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { | 5152 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { |
5104 | kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1; | 5153 | kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | |
5154 | DR6_RTM; | ||
5105 | kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; | 5155 | kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; |
5106 | kvm_run->debug.arch.exception = DB_VECTOR; | 5156 | kvm_run->debug.arch.exception = DB_VECTOR; |
5107 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 5157 | kvm_run->exit_reason = KVM_EXIT_DEBUG; |
@@ -5114,7 +5164,7 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r) | |||
5114 | * cleared by the processor". | 5164 | * cleared by the processor". |
5115 | */ | 5165 | */ |
5116 | vcpu->arch.dr6 &= ~15; | 5166 | vcpu->arch.dr6 &= ~15; |
5117 | vcpu->arch.dr6 |= DR6_BS; | 5167 | vcpu->arch.dr6 |= DR6_BS | DR6_RTM; |
5118 | kvm_queue_exception(vcpu, DB_VECTOR); | 5168 | kvm_queue_exception(vcpu, DB_VECTOR); |
5119 | } | 5169 | } |
5120 | } | 5170 | } |
@@ -5133,7 +5183,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) | |||
5133 | vcpu->arch.eff_db); | 5183 | vcpu->arch.eff_db); |
5134 | 5184 | ||
5135 | if (dr6 != 0) { | 5185 | if (dr6 != 0) { |
5136 | kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; | 5186 | kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM; |
5137 | kvm_run->debug.arch.pc = kvm_rip_read(vcpu) + | 5187 | kvm_run->debug.arch.pc = kvm_rip_read(vcpu) + |
5138 | get_segment_base(vcpu, VCPU_SREG_CS); | 5188 | get_segment_base(vcpu, VCPU_SREG_CS); |
5139 | 5189 | ||
@@ -5144,14 +5194,15 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) | |||
5144 | } | 5194 | } |
5145 | } | 5195 | } |
5146 | 5196 | ||
5147 | if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK)) { | 5197 | if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) && |
5198 | !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) { | ||
5148 | dr6 = kvm_vcpu_check_hw_bp(eip, 0, | 5199 | dr6 = kvm_vcpu_check_hw_bp(eip, 0, |
5149 | vcpu->arch.dr7, | 5200 | vcpu->arch.dr7, |
5150 | vcpu->arch.db); | 5201 | vcpu->arch.db); |
5151 | 5202 | ||
5152 | if (dr6 != 0) { | 5203 | if (dr6 != 0) { |
5153 | vcpu->arch.dr6 &= ~15; | 5204 | vcpu->arch.dr6 &= ~15; |
5154 | vcpu->arch.dr6 |= dr6; | 5205 | vcpu->arch.dr6 |= dr6 | DR6_RTM; |
5155 | kvm_queue_exception(vcpu, DB_VECTOR); | 5206 | kvm_queue_exception(vcpu, DB_VECTOR); |
5156 | *r = EMULATE_DONE; | 5207 | *r = EMULATE_DONE; |
5157 | return true; | 5208 | return true; |
@@ -5215,6 +5266,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
5215 | 5266 | ||
5216 | if (emulation_type & EMULTYPE_SKIP) { | 5267 | if (emulation_type & EMULTYPE_SKIP) { |
5217 | kvm_rip_write(vcpu, ctxt->_eip); | 5268 | kvm_rip_write(vcpu, ctxt->_eip); |
5269 | if (ctxt->eflags & X86_EFLAGS_RF) | ||
5270 | kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF); | ||
5218 | return EMULATE_DONE; | 5271 | return EMULATE_DONE; |
5219 | } | 5272 | } |
5220 | 5273 | ||
@@ -5265,13 +5318,22 @@ restart: | |||
5265 | r = EMULATE_DONE; | 5318 | r = EMULATE_DONE; |
5266 | 5319 | ||
5267 | if (writeback) { | 5320 | if (writeback) { |
5321 | unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); | ||
5268 | toggle_interruptibility(vcpu, ctxt->interruptibility); | 5322 | toggle_interruptibility(vcpu, ctxt->interruptibility); |
5269 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
5270 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | 5323 | vcpu->arch.emulate_regs_need_sync_to_vcpu = false; |
5271 | kvm_rip_write(vcpu, ctxt->eip); | 5324 | kvm_rip_write(vcpu, ctxt->eip); |
5272 | if (r == EMULATE_DONE) | 5325 | if (r == EMULATE_DONE) |
5273 | kvm_vcpu_check_singlestep(vcpu, &r); | 5326 | kvm_vcpu_check_singlestep(vcpu, rflags, &r); |
5274 | kvm_set_rflags(vcpu, ctxt->eflags); | 5327 | __kvm_set_rflags(vcpu, ctxt->eflags); |
5328 | |||
5329 | /* | ||
5330 | * For STI, interrupts are shadowed; so KVM_REQ_EVENT will | ||
5331 | * do nothing, and it will be requested again as soon as | ||
5332 | * the shadow expires. But we still need to check here, | ||
5333 | * because POPF has no interrupt shadow. | ||
5334 | */ | ||
5335 | if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF)) | ||
5336 | kvm_make_request(KVM_REQ_EVENT, vcpu); | ||
5275 | } else | 5337 | } else |
5276 | vcpu->arch.emulate_regs_need_sync_to_vcpu = true; | 5338 | vcpu->arch.emulate_regs_need_sync_to_vcpu = true; |
5277 | 5339 | ||
@@ -5662,7 +5724,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
5662 | u64 param, ingpa, outgpa, ret; | 5724 | u64 param, ingpa, outgpa, ret; |
5663 | uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; | 5725 | uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; |
5664 | bool fast, longmode; | 5726 | bool fast, longmode; |
5665 | int cs_db, cs_l; | ||
5666 | 5727 | ||
5667 | /* | 5728 | /* |
5668 | * hypercall generates UD from non zero cpl and real mode | 5729 | * hypercall generates UD from non zero cpl and real mode |
@@ -5673,8 +5734,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) | |||
5673 | return 0; | 5734 | return 0; |
5674 | } | 5735 | } |
5675 | 5736 | ||
5676 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | 5737 | longmode = is_64_bit_mode(vcpu); |
5677 | longmode = is_long_mode(vcpu) && cs_l == 1; | ||
5678 | 5738 | ||
5679 | if (!longmode) { | 5739 | if (!longmode) { |
5680 | param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | | 5740 | param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | |
@@ -5739,7 +5799,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) | |||
5739 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | 5799 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) |
5740 | { | 5800 | { |
5741 | unsigned long nr, a0, a1, a2, a3, ret; | 5801 | unsigned long nr, a0, a1, a2, a3, ret; |
5742 | int r = 1; | 5802 | int op_64_bit, r = 1; |
5743 | 5803 | ||
5744 | if (kvm_hv_hypercall_enabled(vcpu->kvm)) | 5804 | if (kvm_hv_hypercall_enabled(vcpu->kvm)) |
5745 | return kvm_hv_hypercall(vcpu); | 5805 | return kvm_hv_hypercall(vcpu); |
@@ -5752,7 +5812,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
5752 | 5812 | ||
5753 | trace_kvm_hypercall(nr, a0, a1, a2, a3); | 5813 | trace_kvm_hypercall(nr, a0, a1, a2, a3); |
5754 | 5814 | ||
5755 | if (!is_long_mode(vcpu)) { | 5815 | op_64_bit = is_64_bit_mode(vcpu); |
5816 | if (!op_64_bit) { | ||
5756 | nr &= 0xFFFFFFFF; | 5817 | nr &= 0xFFFFFFFF; |
5757 | a0 &= 0xFFFFFFFF; | 5818 | a0 &= 0xFFFFFFFF; |
5758 | a1 &= 0xFFFFFFFF; | 5819 | a1 &= 0xFFFFFFFF; |
@@ -5778,6 +5839,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) | |||
5778 | break; | 5839 | break; |
5779 | } | 5840 | } |
5780 | out: | 5841 | out: |
5842 | if (!op_64_bit) | ||
5843 | ret = (u32)ret; | ||
5781 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret); | 5844 | kvm_register_write(vcpu, VCPU_REGS_RAX, ret); |
5782 | ++vcpu->stat.hypercalls; | 5845 | ++vcpu->stat.hypercalls; |
5783 | return r; | 5846 | return r; |
@@ -5856,6 +5919,11 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) | |||
5856 | trace_kvm_inj_exception(vcpu->arch.exception.nr, | 5919 | trace_kvm_inj_exception(vcpu->arch.exception.nr, |
5857 | vcpu->arch.exception.has_error_code, | 5920 | vcpu->arch.exception.has_error_code, |
5858 | vcpu->arch.exception.error_code); | 5921 | vcpu->arch.exception.error_code); |
5922 | |||
5923 | if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT) | ||
5924 | __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | | ||
5925 | X86_EFLAGS_RF); | ||
5926 | |||
5859 | kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, | 5927 | kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, |
5860 | vcpu->arch.exception.has_error_code, | 5928 | vcpu->arch.exception.has_error_code, |
5861 | vcpu->arch.exception.error_code, | 5929 | vcpu->arch.exception.error_code, |
@@ -6847,9 +6915,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
6847 | atomic_set(&vcpu->arch.nmi_queued, 0); | 6915 | atomic_set(&vcpu->arch.nmi_queued, 0); |
6848 | vcpu->arch.nmi_pending = 0; | 6916 | vcpu->arch.nmi_pending = 0; |
6849 | vcpu->arch.nmi_injected = false; | 6917 | vcpu->arch.nmi_injected = false; |
6918 | kvm_clear_interrupt_queue(vcpu); | ||
6919 | kvm_clear_exception_queue(vcpu); | ||
6850 | 6920 | ||
6851 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); | 6921 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); |
6852 | vcpu->arch.dr6 = DR6_FIXED_1; | 6922 | vcpu->arch.dr6 = DR6_INIT; |
6853 | kvm_update_dr6(vcpu); | 6923 | kvm_update_dr6(vcpu); |
6854 | vcpu->arch.dr7 = DR7_FIXED_1; | 6924 | vcpu->arch.dr7 = DR7_FIXED_1; |
6855 | kvm_update_dr7(vcpu); | 6925 | kvm_update_dr7(vcpu); |
@@ -7405,12 +7475,17 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) | |||
7405 | } | 7475 | } |
7406 | EXPORT_SYMBOL_GPL(kvm_get_rflags); | 7476 | EXPORT_SYMBOL_GPL(kvm_get_rflags); |
7407 | 7477 | ||
7408 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 7478 | static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
7409 | { | 7479 | { |
7410 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && | 7480 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && |
7411 | kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) | 7481 | kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) |
7412 | rflags |= X86_EFLAGS_TF; | 7482 | rflags |= X86_EFLAGS_TF; |
7413 | kvm_x86_ops->set_rflags(vcpu, rflags); | 7483 | kvm_x86_ops->set_rflags(vcpu, rflags); |
7484 | } | ||
7485 | |||
7486 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | ||
7487 | { | ||
7488 | __kvm_set_rflags(vcpu, rflags); | ||
7414 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 7489 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
7415 | } | 7490 | } |
7416 | EXPORT_SYMBOL_GPL(kvm_set_rflags); | 7491 | EXPORT_SYMBOL_GPL(kvm_set_rflags); |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 8c97bac9a895..306a1b77581f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -47,6 +47,16 @@ static inline int is_long_mode(struct kvm_vcpu *vcpu) | |||
47 | #endif | 47 | #endif |
48 | } | 48 | } |
49 | 49 | ||
50 | static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu) | ||
51 | { | ||
52 | int cs_db, cs_l; | ||
53 | |||
54 | if (!is_long_mode(vcpu)) | ||
55 | return false; | ||
56 | kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); | ||
57 | return cs_l; | ||
58 | } | ||
59 | |||
50 | static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) | 60 | static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) |
51 | { | 61 | { |
52 | return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; | 62 | return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; |
@@ -108,6 +118,23 @@ static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) | |||
108 | return false; | 118 | return false; |
109 | } | 119 | } |
110 | 120 | ||
121 | static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, | ||
122 | enum kvm_reg reg) | ||
123 | { | ||
124 | unsigned long val = kvm_register_read(vcpu, reg); | ||
125 | |||
126 | return is_64_bit_mode(vcpu) ? val : (u32)val; | ||
127 | } | ||
128 | |||
129 | static inline void kvm_register_writel(struct kvm_vcpu *vcpu, | ||
130 | enum kvm_reg reg, | ||
131 | unsigned long val) | ||
132 | { | ||
133 | if (!is_64_bit_mode(vcpu)) | ||
134 | val = (u32)val; | ||
135 | return kvm_register_write(vcpu, reg, val); | ||
136 | } | ||
137 | |||
111 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | 138 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); |
112 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | 139 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); |
113 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); | 140 | int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); |