aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 15:16:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-04 15:16:46 -0400
commit8533ce72718871fb528d853391746f36243273af (patch)
treea3ac06520e45cb6a472ed83979b0d48b6c2cec15 /arch/x86/kvm
parentc9b88e9581828bb8bba06c5e7ee8ed1761172b6e (diff)
parent42cbc04fd3b5e3f9b011bf9fa3ce0b3d1e10b58b (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM changes from Paolo Bonzini: "These are the x86, MIPS and s390 changes; PPC and ARM will come in a few days. MIPS and s390 have little going on this release; just bugfixes, some small, some larger. The highlights for x86 are nested VMX improvements (Jan Kiszka), optimizations for old processor (up to Nehalem, by me and Bandan Das), and a lot of x86 emulator bugfixes (Nadav Amit). Stephen Rothwell reported a trivial conflict with the tracing branch" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (104 commits) x86/kvm: Resolve shadow warnings in macro expansion KVM: s390: rework broken SIGP STOP interrupt handling KVM: x86: always exit on EOIs for interrupts listed in the IOAPIC redir table KVM: vmx: remove duplicate vmx_mpx_supported() prototype KVM: s390: Fix memory leak on busy SIGP stop x86/kvm: Resolve shadow warning from min macro kvm: Resolve missing-field-initializers warnings Replace NR_VMX_MSR with its definition KVM: x86: Assertions to check no overrun in MSR lists KVM: x86: set rflags.rf during fault injection KVM: x86: Setting rflags.rf during rep-string emulation KVM: x86: DR6/7.RTM cannot be written KVM: nVMX: clean up nested_release_vmcs12 and code around it KVM: nVMX: fix lifetime issues for vmcs02 KVM: x86: Defining missing x86 vectors KVM: x86: emulator injects #DB when RFLAGS.RF is set KVM: x86: Cleanup of rflags.rf cleaning KVM: x86: Clear rflags.rf on emulated instructions KVM: x86: popf emulation should not change RF KVM: x86: Clearing rflags.rf upon skipped emulated instruction ...
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c494
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/mmutrace.h4
-rw-r--r--arch/x86/kvm/pmu.c9
-rw-r--r--arch/x86/kvm/svm.c57
-rw-r--r--arch/x86/kvm/trace.h6
-rw-r--r--arch/x86/kvm/vmx.c239
-rw-r--r--arch/x86/kvm/x86.c171
-rw-r--r--arch/x86/kvm/x86.h27
10 files changed, 641 insertions, 378 deletions
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index f9087315e0cd..a5380590ab0e 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -95,4 +95,12 @@ static inline bool guest_cpuid_has_gbpages(struct kvm_vcpu *vcpu)
95 best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); 95 best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
96 return best && (best->edx & bit(X86_FEATURE_GBPAGES)); 96 return best && (best->edx & bit(X86_FEATURE_GBPAGES));
97} 97}
98
99static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
100{
101 struct kvm_cpuid_entry2 *best;
102
103 best = kvm_find_cpuid_entry(vcpu, 7, 0);
104 return best && (best->ebx & bit(X86_FEATURE_RTM));
105}
98#endif 106#endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e4e833d3d7d7..56657b0bb3bb 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -162,6 +162,10 @@
162#define NoWrite ((u64)1 << 45) /* No writeback */ 162#define NoWrite ((u64)1 << 45) /* No writeback */
163#define SrcWrite ((u64)1 << 46) /* Write back src operand */ 163#define SrcWrite ((u64)1 << 46) /* Write back src operand */
164#define NoMod ((u64)1 << 47) /* Mod field is ignored */ 164#define NoMod ((u64)1 << 47) /* Mod field is ignored */
165#define Intercept ((u64)1 << 48) /* Has valid intercept field */
166#define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
167#define NoBigReal ((u64)1 << 50) /* No big real mode */
168#define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
165 169
166#define DstXacc (DstAccLo | SrcAccHi | SrcWrite) 170#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
167 171
@@ -426,6 +430,7 @@ static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
426 .modrm_reg = ctxt->modrm_reg, 430 .modrm_reg = ctxt->modrm_reg,
427 .modrm_rm = ctxt->modrm_rm, 431 .modrm_rm = ctxt->modrm_rm,
428 .src_val = ctxt->src.val64, 432 .src_val = ctxt->src.val64,
433 .dst_val = ctxt->dst.val64,
429 .src_bytes = ctxt->src.bytes, 434 .src_bytes = ctxt->src.bytes,
430 .dst_bytes = ctxt->dst.bytes, 435 .dst_bytes = ctxt->dst.bytes,
431 .ad_bytes = ctxt->ad_bytes, 436 .ad_bytes = ctxt->ad_bytes,
@@ -511,12 +516,6 @@ static u32 desc_limit_scaled(struct desc_struct *desc)
511 return desc->g ? (limit << 12) | 0xfff : limit; 516 return desc->g ? (limit << 12) | 0xfff : limit;
512} 517}
513 518
514static void set_seg_override(struct x86_emulate_ctxt *ctxt, int seg)
515{
516 ctxt->has_seg_override = true;
517 ctxt->seg_override = seg;
518}
519
520static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) 519static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
521{ 520{
522 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) 521 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
@@ -525,14 +524,6 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
525 return ctxt->ops->get_cached_segment_base(ctxt, seg); 524 return ctxt->ops->get_cached_segment_base(ctxt, seg);
526} 525}
527 526
528static unsigned seg_override(struct x86_emulate_ctxt *ctxt)
529{
530 if (!ctxt->has_seg_override)
531 return 0;
532
533 return ctxt->seg_override;
534}
535
536static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec, 527static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
537 u32 error, bool valid) 528 u32 error, bool valid)
538{ 529{
@@ -651,7 +642,12 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
651 if (!fetch && (desc.type & 8) && !(desc.type & 2)) 642 if (!fetch && (desc.type & 8) && !(desc.type & 2))
652 goto bad; 643 goto bad;
653 lim = desc_limit_scaled(&desc); 644 lim = desc_limit_scaled(&desc);
654 if ((desc.type & 8) || !(desc.type & 4)) { 645 if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch &&
646 (ctxt->d & NoBigReal)) {
647 /* la is between zero and 0xffff */
648 if (la > 0xffff || (u32)(la + size - 1) > 0xffff)
649 goto bad;
650 } else if ((desc.type & 8) || !(desc.type & 4)) {
655 /* expand-up segment */ 651 /* expand-up segment */
656 if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) 652 if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
657 goto bad; 653 goto bad;
@@ -716,68 +712,71 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
716} 712}
717 713
718/* 714/*
719 * Fetch the next byte of the instruction being emulated which is pointed to 715 * Prefetch the remaining bytes of the instruction without crossing page
720 * by ctxt->_eip, then increment ctxt->_eip.
721 *
722 * Also prefetch the remaining bytes of the instruction without crossing page
723 * boundary if they are not in fetch_cache yet. 716 * boundary if they are not in fetch_cache yet.
724 */ 717 */
725static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, u8 *dest) 718static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
726{ 719{
727 struct fetch_cache *fc = &ctxt->fetch;
728 int rc; 720 int rc;
729 int size, cur_size; 721 unsigned size;
730 722 unsigned long linear;
731 if (ctxt->_eip == fc->end) { 723 int cur_size = ctxt->fetch.end - ctxt->fetch.data;
732 unsigned long linear; 724 struct segmented_address addr = { .seg = VCPU_SREG_CS,
733 struct segmented_address addr = { .seg = VCPU_SREG_CS, 725 .ea = ctxt->eip + cur_size };
734 .ea = ctxt->_eip }; 726
735 cur_size = fc->end - fc->start; 727 size = 15UL ^ cur_size;
736 size = min(15UL - cur_size, 728 rc = __linearize(ctxt, addr, size, false, true, &linear);
737 PAGE_SIZE - offset_in_page(ctxt->_eip)); 729 if (unlikely(rc != X86EMUL_CONTINUE))
738 rc = __linearize(ctxt, addr, size, false, true, &linear); 730 return rc;
739 if (unlikely(rc != X86EMUL_CONTINUE))
740 return rc;
741 rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size,
742 size, &ctxt->exception);
743 if (unlikely(rc != X86EMUL_CONTINUE))
744 return rc;
745 fc->end += size;
746 }
747 *dest = fc->data[ctxt->_eip - fc->start];
748 ctxt->_eip++;
749 return X86EMUL_CONTINUE;
750}
751 731
752static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, 732 size = min_t(unsigned, size, PAGE_SIZE - offset_in_page(linear));
753 void *dest, unsigned size)
754{
755 int rc;
756 733
757 /* x86 instructions are limited to 15 bytes. */ 734 /*
758 if (unlikely(ctxt->_eip + size - ctxt->eip > 15)) 735 * One instruction can only straddle two pages,
736 * and one has been loaded at the beginning of
737 * x86_decode_insn. So, if not enough bytes
738 * still, we must have hit the 15-byte boundary.
739 */
740 if (unlikely(size < op_size))
759 return X86EMUL_UNHANDLEABLE; 741 return X86EMUL_UNHANDLEABLE;
760 while (size--) { 742 rc = ctxt->ops->fetch(ctxt, linear, ctxt->fetch.end,
761 rc = do_insn_fetch_byte(ctxt, dest++); 743 size, &ctxt->exception);
762 if (rc != X86EMUL_CONTINUE) 744 if (unlikely(rc != X86EMUL_CONTINUE))
763 return rc; 745 return rc;
764 } 746 ctxt->fetch.end += size;
765 return X86EMUL_CONTINUE; 747 return X86EMUL_CONTINUE;
766} 748}
767 749
750static __always_inline int do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt,
751 unsigned size)
752{
753 if (unlikely(ctxt->fetch.end - ctxt->fetch.ptr < size))
754 return __do_insn_fetch_bytes(ctxt, size);
755 else
756 return X86EMUL_CONTINUE;
757}
758
768/* Fetch next part of the instruction being emulated. */ 759/* Fetch next part of the instruction being emulated. */
769#define insn_fetch(_type, _ctxt) \ 760#define insn_fetch(_type, _ctxt) \
770({ unsigned long _x; \ 761({ _type _x; \
771 rc = do_insn_fetch(_ctxt, &_x, sizeof(_type)); \ 762 \
763 rc = do_insn_fetch_bytes(_ctxt, sizeof(_type)); \
772 if (rc != X86EMUL_CONTINUE) \ 764 if (rc != X86EMUL_CONTINUE) \
773 goto done; \ 765 goto done; \
774 (_type)_x; \ 766 ctxt->_eip += sizeof(_type); \
767 _x = *(_type __aligned(1) *) ctxt->fetch.ptr; \
768 ctxt->fetch.ptr += sizeof(_type); \
769 _x; \
775}) 770})
776 771
777#define insn_fetch_arr(_arr, _size, _ctxt) \ 772#define insn_fetch_arr(_arr, _size, _ctxt) \
778({ rc = do_insn_fetch(_ctxt, _arr, (_size)); \ 773({ \
774 rc = do_insn_fetch_bytes(_ctxt, _size); \
779 if (rc != X86EMUL_CONTINUE) \ 775 if (rc != X86EMUL_CONTINUE) \
780 goto done; \ 776 goto done; \
777 ctxt->_eip += (_size); \
778 memcpy(_arr, ctxt->fetch.ptr, _size); \
779 ctxt->fetch.ptr += (_size); \
781}) 780})
782 781
783/* 782/*
@@ -1063,19 +1062,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1063 struct operand *op) 1062 struct operand *op)
1064{ 1063{
1065 u8 sib; 1064 u8 sib;
1066 int index_reg = 0, base_reg = 0, scale; 1065 int index_reg, base_reg, scale;
1067 int rc = X86EMUL_CONTINUE; 1066 int rc = X86EMUL_CONTINUE;
1068 ulong modrm_ea = 0; 1067 ulong modrm_ea = 0;
1069 1068
1070 if (ctxt->rex_prefix) { 1069 ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
1071 ctxt->modrm_reg = (ctxt->rex_prefix & 4) << 1; /* REX.R */ 1070 index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
1072 index_reg = (ctxt->rex_prefix & 2) << 2; /* REX.X */ 1071 base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
1073 ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */
1074 }
1075 1072
1076 ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6; 1073 ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
1077 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3; 1074 ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
1078 ctxt->modrm_rm |= (ctxt->modrm & 0x07); 1075 ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
1079 ctxt->modrm_seg = VCPU_SREG_DS; 1076 ctxt->modrm_seg = VCPU_SREG_DS;
1080 1077
1081 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) { 1078 if (ctxt->modrm_mod == 3 || (ctxt->d & NoMod)) {
@@ -1093,7 +1090,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1093 if (ctxt->d & Mmx) { 1090 if (ctxt->d & Mmx) {
1094 op->type = OP_MM; 1091 op->type = OP_MM;
1095 op->bytes = 8; 1092 op->bytes = 8;
1096 op->addr.xmm = ctxt->modrm_rm & 7; 1093 op->addr.mm = ctxt->modrm_rm & 7;
1097 return rc; 1094 return rc;
1098 } 1095 }
1099 fetch_register_operand(op); 1096 fetch_register_operand(op);
@@ -1190,6 +1187,9 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1190 } 1187 }
1191 } 1188 }
1192 op->addr.mem.ea = modrm_ea; 1189 op->addr.mem.ea = modrm_ea;
1190 if (ctxt->ad_bytes != 8)
1191 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
1192
1193done: 1193done:
1194 return rc; 1194 return rc;
1195} 1195}
@@ -1220,12 +1220,14 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt)
1220 long sv = 0, mask; 1220 long sv = 0, mask;
1221 1221
1222 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) { 1222 if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) {
1223 mask = ~(ctxt->dst.bytes * 8 - 1); 1223 mask = ~((long)ctxt->dst.bytes * 8 - 1);
1224 1224
1225 if (ctxt->src.bytes == 2) 1225 if (ctxt->src.bytes == 2)
1226 sv = (s16)ctxt->src.val & (s16)mask; 1226 sv = (s16)ctxt->src.val & (s16)mask;
1227 else if (ctxt->src.bytes == 4) 1227 else if (ctxt->src.bytes == 4)
1228 sv = (s32)ctxt->src.val & (s32)mask; 1228 sv = (s32)ctxt->src.val & (s32)mask;
1229 else
1230 sv = (s64)ctxt->src.val & (s64)mask;
1229 1231
1230 ctxt->dst.addr.mem.ea += (sv >> 3); 1232 ctxt->dst.addr.mem.ea += (sv >> 3);
1231 } 1233 }
@@ -1315,8 +1317,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1315 in_page = (ctxt->eflags & EFLG_DF) ? 1317 in_page = (ctxt->eflags & EFLG_DF) ?
1316 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : 1318 offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
1317 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); 1319 PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
1318 n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, 1320 n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
1319 count);
1320 if (n == 0) 1321 if (n == 0)
1321 n = 1; 1322 n = 1;
1322 rc->pos = rc->end = 0; 1323 rc->pos = rc->end = 0;
@@ -1358,17 +1359,19 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1358 u16 selector, struct desc_ptr *dt) 1359 u16 selector, struct desc_ptr *dt)
1359{ 1360{
1360 const struct x86_emulate_ops *ops = ctxt->ops; 1361 const struct x86_emulate_ops *ops = ctxt->ops;
1362 u32 base3 = 0;
1361 1363
1362 if (selector & 1 << 2) { 1364 if (selector & 1 << 2) {
1363 struct desc_struct desc; 1365 struct desc_struct desc;
1364 u16 sel; 1366 u16 sel;
1365 1367
1366 memset (dt, 0, sizeof *dt); 1368 memset (dt, 0, sizeof *dt);
1367 if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR)) 1369 if (!ops->get_segment(ctxt, &sel, &desc, &base3,
1370 VCPU_SREG_LDTR))
1368 return; 1371 return;
1369 1372
1370 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ 1373 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1371 dt->address = get_desc_base(&desc); 1374 dt->address = get_desc_base(&desc) | ((u64)base3 << 32);
1372 } else 1375 } else
1373 ops->get_gdt(ctxt, dt); 1376 ops->get_gdt(ctxt, dt);
1374} 1377}
@@ -1422,6 +1425,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1422 ulong desc_addr; 1425 ulong desc_addr;
1423 int ret; 1426 int ret;
1424 u16 dummy; 1427 u16 dummy;
1428 u32 base3 = 0;
1425 1429
1426 memset(&seg_desc, 0, sizeof seg_desc); 1430 memset(&seg_desc, 0, sizeof seg_desc);
1427 1431
@@ -1538,9 +1542,14 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1538 ret = write_segment_descriptor(ctxt, selector, &seg_desc); 1542 ret = write_segment_descriptor(ctxt, selector, &seg_desc);
1539 if (ret != X86EMUL_CONTINUE) 1543 if (ret != X86EMUL_CONTINUE)
1540 return ret; 1544 return ret;
1545 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1546 ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
1547 sizeof(base3), &ctxt->exception);
1548 if (ret != X86EMUL_CONTINUE)
1549 return ret;
1541 } 1550 }
1542load: 1551load:
1543 ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg); 1552 ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
1544 return X86EMUL_CONTINUE; 1553 return X86EMUL_CONTINUE;
1545exception: 1554exception:
1546 emulate_exception(ctxt, err_vec, err_code, true); 1555 emulate_exception(ctxt, err_vec, err_code, true);
@@ -1575,34 +1584,28 @@ static void write_register_operand(struct operand *op)
1575 1584
1576static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) 1585static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
1577{ 1586{
1578 int rc;
1579
1580 switch (op->type) { 1587 switch (op->type) {
1581 case OP_REG: 1588 case OP_REG:
1582 write_register_operand(op); 1589 write_register_operand(op);
1583 break; 1590 break;
1584 case OP_MEM: 1591 case OP_MEM:
1585 if (ctxt->lock_prefix) 1592 if (ctxt->lock_prefix)
1586 rc = segmented_cmpxchg(ctxt, 1593 return segmented_cmpxchg(ctxt,
1594 op->addr.mem,
1595 &op->orig_val,
1596 &op->val,
1597 op->bytes);
1598 else
1599 return segmented_write(ctxt,
1587 op->addr.mem, 1600 op->addr.mem,
1588 &op->orig_val,
1589 &op->val, 1601 &op->val,
1590 op->bytes); 1602 op->bytes);
1591 else
1592 rc = segmented_write(ctxt,
1593 op->addr.mem,
1594 &op->val,
1595 op->bytes);
1596 if (rc != X86EMUL_CONTINUE)
1597 return rc;
1598 break; 1603 break;
1599 case OP_MEM_STR: 1604 case OP_MEM_STR:
1600 rc = segmented_write(ctxt, 1605 return segmented_write(ctxt,
1601 op->addr.mem, 1606 op->addr.mem,
1602 op->data, 1607 op->data,
1603 op->bytes * op->count); 1608 op->bytes * op->count);
1604 if (rc != X86EMUL_CONTINUE)
1605 return rc;
1606 break; 1609 break;
1607 case OP_XMM: 1610 case OP_XMM:
1608 write_sse_reg(ctxt, &op->vec_val, op->addr.xmm); 1611 write_sse_reg(ctxt, &op->vec_val, op->addr.xmm);
@@ -1671,7 +1674,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1671 return rc; 1674 return rc;
1672 1675
1673 change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF 1676 change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1674 | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID; 1677 | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID;
1675 1678
1676 switch(ctxt->mode) { 1679 switch(ctxt->mode) {
1677 case X86EMUL_MODE_PROT64: 1680 case X86EMUL_MODE_PROT64:
@@ -1754,6 +1757,9 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1754 if (rc != X86EMUL_CONTINUE) 1757 if (rc != X86EMUL_CONTINUE)
1755 return rc; 1758 return rc;
1756 1759
1760 if (ctxt->modrm_reg == VCPU_SREG_SS)
1761 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1762
1757 rc = load_segment_descriptor(ctxt, (u16)selector, seg); 1763 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1758 return rc; 1764 return rc;
1759} 1765}
@@ -1991,6 +1997,9 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
1991{ 1997{
1992 u64 old = ctxt->dst.orig_val64; 1998 u64 old = ctxt->dst.orig_val64;
1993 1999
2000 if (ctxt->dst.bytes == 16)
2001 return X86EMUL_UNHANDLEABLE;
2002
1994 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || 2003 if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
1995 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { 2004 ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
1996 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); 2005 *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
@@ -2017,6 +2026,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2017{ 2026{
2018 int rc; 2027 int rc;
2019 unsigned long cs; 2028 unsigned long cs;
2029 int cpl = ctxt->ops->cpl(ctxt);
2020 2030
2021 rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); 2031 rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
2022 if (rc != X86EMUL_CONTINUE) 2032 if (rc != X86EMUL_CONTINUE)
@@ -2026,6 +2036,9 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2026 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); 2036 rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
2027 if (rc != X86EMUL_CONTINUE) 2037 if (rc != X86EMUL_CONTINUE)
2028 return rc; 2038 return rc;
2039 /* Outer-privilege level return is not implemented */
2040 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2041 return X86EMUL_UNHANDLEABLE;
2029 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); 2042 rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
2030 return rc; 2043 return rc;
2031} 2044}
@@ -2044,8 +2057,10 @@ static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
2044static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) 2057static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2045{ 2058{
2046 /* Save real source value, then compare EAX against destination. */ 2059 /* Save real source value, then compare EAX against destination. */
2060 ctxt->dst.orig_val = ctxt->dst.val;
2061 ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX);
2047 ctxt->src.orig_val = ctxt->src.val; 2062 ctxt->src.orig_val = ctxt->src.val;
2048 ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); 2063 ctxt->src.val = ctxt->dst.orig_val;
2049 fastop(ctxt, em_cmp); 2064 fastop(ctxt, em_cmp);
2050 2065
2051 if (ctxt->eflags & EFLG_ZF) { 2066 if (ctxt->eflags & EFLG_ZF) {
@@ -2055,6 +2070,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2055 /* Failure: write the value we saw to EAX. */ 2070 /* Failure: write the value we saw to EAX. */
2056 ctxt->dst.type = OP_REG; 2071 ctxt->dst.type = OP_REG;
2057 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); 2072 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2073 ctxt->dst.val = ctxt->dst.orig_val;
2058 } 2074 }
2059 return X86EMUL_CONTINUE; 2075 return X86EMUL_CONTINUE;
2060} 2076}
@@ -2194,7 +2210,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
2194 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip; 2210 *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
2195 if (efer & EFER_LMA) { 2211 if (efer & EFER_LMA) {
2196#ifdef CONFIG_X86_64 2212#ifdef CONFIG_X86_64
2197 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags & ~EFLG_RF; 2213 *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags;
2198 2214
2199 ops->get_msr(ctxt, 2215 ops->get_msr(ctxt,
2200 ctxt->mode == X86EMUL_MODE_PROT64 ? 2216 ctxt->mode == X86EMUL_MODE_PROT64 ?
@@ -2202,14 +2218,14 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
2202 ctxt->_eip = msr_data; 2218 ctxt->_eip = msr_data;
2203 2219
2204 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); 2220 ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
2205 ctxt->eflags &= ~(msr_data | EFLG_RF); 2221 ctxt->eflags &= ~msr_data;
2206#endif 2222#endif
2207 } else { 2223 } else {
2208 /* legacy mode */ 2224 /* legacy mode */
2209 ops->get_msr(ctxt, MSR_STAR, &msr_data); 2225 ops->get_msr(ctxt, MSR_STAR, &msr_data);
2210 ctxt->_eip = (u32)msr_data; 2226 ctxt->_eip = (u32)msr_data;
2211 2227
2212 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); 2228 ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
2213 } 2229 }
2214 2230
2215 return X86EMUL_CONTINUE; 2231 return X86EMUL_CONTINUE;
@@ -2258,7 +2274,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
2258 break; 2274 break;
2259 } 2275 }
2260 2276
2261 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); 2277 ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
2262 cs_sel = (u16)msr_data; 2278 cs_sel = (u16)msr_data;
2263 cs_sel &= ~SELECTOR_RPL_MASK; 2279 cs_sel &= ~SELECTOR_RPL_MASK;
2264 ss_sel = cs_sel + 8; 2280 ss_sel = cs_sel + 8;
@@ -2964,7 +2980,7 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
2964 2980
2965static int em_mov(struct x86_emulate_ctxt *ctxt) 2981static int em_mov(struct x86_emulate_ctxt *ctxt)
2966{ 2982{
2967 memcpy(ctxt->dst.valptr, ctxt->src.valptr, ctxt->op_bytes); 2983 memcpy(ctxt->dst.valptr, ctxt->src.valptr, sizeof(ctxt->src.valptr));
2968 return X86EMUL_CONTINUE; 2984 return X86EMUL_CONTINUE;
2969} 2985}
2970 2986
@@ -3221,7 +3237,8 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt)
3221 3237
3222static int em_smsw(struct x86_emulate_ctxt *ctxt) 3238static int em_smsw(struct x86_emulate_ctxt *ctxt)
3223{ 3239{
3224 ctxt->dst.bytes = 2; 3240 if (ctxt->dst.type == OP_MEM)
3241 ctxt->dst.bytes = 2;
3225 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0); 3242 ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0);
3226 return X86EMUL_CONTINUE; 3243 return X86EMUL_CONTINUE;
3227} 3244}
@@ -3496,7 +3513,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
3496 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); 3513 u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
3497 3514
3498 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || 3515 if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
3499 (rcx > 3)) 3516 ctxt->ops->check_pmc(ctxt, rcx))
3500 return emulate_gp(ctxt, 0); 3517 return emulate_gp(ctxt, 0);
3501 3518
3502 return X86EMUL_CONTINUE; 3519 return X86EMUL_CONTINUE;
@@ -3521,9 +3538,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3521} 3538}
3522 3539
3523#define D(_y) { .flags = (_y) } 3540#define D(_y) { .flags = (_y) }
3524#define DI(_y, _i) { .flags = (_y), .intercept = x86_intercept_##_i } 3541#define DI(_y, _i) { .flags = (_y)|Intercept, .intercept = x86_intercept_##_i }
3525#define DIP(_y, _i, _p) { .flags = (_y), .intercept = x86_intercept_##_i, \ 3542#define DIP(_y, _i, _p) { .flags = (_y)|Intercept|CheckPerm, \
3526 .check_perm = (_p) } 3543 .intercept = x86_intercept_##_i, .check_perm = (_p) }
3527#define N D(NotImpl) 3544#define N D(NotImpl)
3528#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } 3545#define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
3529#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } 3546#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
@@ -3532,10 +3549,10 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3532#define I(_f, _e) { .flags = (_f), .u.execute = (_e) } 3549#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3533#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } 3550#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
3534#define II(_f, _e, _i) \ 3551#define II(_f, _e, _i) \
3535 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } 3552 { .flags = (_f)|Intercept, .u.execute = (_e), .intercept = x86_intercept_##_i }
3536#define IIP(_f, _e, _i, _p) \ 3553#define IIP(_f, _e, _i, _p) \
3537 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i, \ 3554 { .flags = (_f)|Intercept|CheckPerm, .u.execute = (_e), \
3538 .check_perm = (_p) } 3555 .intercept = x86_intercept_##_i, .check_perm = (_p) }
3539#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) } 3556#define GP(_f, _g) { .flags = ((_f) | Prefix), .u.gprefix = (_g) }
3540 3557
3541#define D2bv(_f) D((_f) | ByteOp), D(_f) 3558#define D2bv(_f) D((_f) | ByteOp), D(_f)
@@ -3634,8 +3651,8 @@ static const struct opcode group6[] = {
3634}; 3651};
3635 3652
3636static const struct group_dual group7 = { { 3653static const struct group_dual group7 = { {
3637 II(Mov | DstMem | Priv, em_sgdt, sgdt), 3654 II(Mov | DstMem, em_sgdt, sgdt),
3638 II(Mov | DstMem | Priv, em_sidt, sidt), 3655 II(Mov | DstMem, em_sidt, sidt),
3639 II(SrcMem | Priv, em_lgdt, lgdt), 3656 II(SrcMem | Priv, em_lgdt, lgdt),
3640 II(SrcMem | Priv, em_lidt, lidt), 3657 II(SrcMem | Priv, em_lidt, lidt),
3641 II(SrcNone | DstMem | Mov, em_smsw, smsw), N, 3658 II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
@@ -3899,7 +3916,7 @@ static const struct opcode twobyte_table[256] = {
3899 N, N, 3916 N, N,
3900 N, N, N, N, N, N, N, N, 3917 N, N, N, N, N, N, N, N,
3901 /* 0x40 - 0x4F */ 3918 /* 0x40 - 0x4F */
3902 X16(D(DstReg | SrcMem | ModRM | Mov)), 3919 X16(D(DstReg | SrcMem | ModRM)),
3903 /* 0x50 - 0x5F */ 3920 /* 0x50 - 0x5F */
3904 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 3921 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
3905 /* 0x60 - 0x6F */ 3922 /* 0x60 - 0x6F */
@@ -4061,12 +4078,12 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4061 mem_common: 4078 mem_common:
4062 *op = ctxt->memop; 4079 *op = ctxt->memop;
4063 ctxt->memopp = op; 4080 ctxt->memopp = op;
4064 if ((ctxt->d & BitOp) && op == &ctxt->dst) 4081 if (ctxt->d & BitOp)
4065 fetch_bit_operand(ctxt); 4082 fetch_bit_operand(ctxt);
4066 op->orig_val = op->val; 4083 op->orig_val = op->val;
4067 break; 4084 break;
4068 case OpMem64: 4085 case OpMem64:
4069 ctxt->memop.bytes = 8; 4086 ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8;
4070 goto mem_common; 4087 goto mem_common;
4071 case OpAcc: 4088 case OpAcc:
4072 op->type = OP_REG; 4089 op->type = OP_REG;
@@ -4150,7 +4167,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4150 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; 4167 op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
4151 op->addr.mem.ea = 4168 op->addr.mem.ea =
4152 register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); 4169 register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
4153 op->addr.mem.seg = seg_override(ctxt); 4170 op->addr.mem.seg = ctxt->seg_override;
4154 op->val = 0; 4171 op->val = 0;
4155 op->count = 1; 4172 op->count = 1;
4156 break; 4173 break;
@@ -4161,7 +4178,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
4161 register_address(ctxt, 4178 register_address(ctxt,
4162 reg_read(ctxt, VCPU_REGS_RBX) + 4179 reg_read(ctxt, VCPU_REGS_RBX) +
4163 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff)); 4180 (reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
4164 op->addr.mem.seg = seg_override(ctxt); 4181 op->addr.mem.seg = ctxt->seg_override;
4165 op->val = 0; 4182 op->val = 0;
4166 break; 4183 break;
4167 case OpImmFAddr: 4184 case OpImmFAddr:
@@ -4208,16 +4225,22 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4208 int mode = ctxt->mode; 4225 int mode = ctxt->mode;
4209 int def_op_bytes, def_ad_bytes, goffset, simd_prefix; 4226 int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
4210 bool op_prefix = false; 4227 bool op_prefix = false;
4228 bool has_seg_override = false;
4211 struct opcode opcode; 4229 struct opcode opcode;
4212 4230
4213 ctxt->memop.type = OP_NONE; 4231 ctxt->memop.type = OP_NONE;
4214 ctxt->memopp = NULL; 4232 ctxt->memopp = NULL;
4215 ctxt->_eip = ctxt->eip; 4233 ctxt->_eip = ctxt->eip;
4216 ctxt->fetch.start = ctxt->_eip; 4234 ctxt->fetch.ptr = ctxt->fetch.data;
4217 ctxt->fetch.end = ctxt->fetch.start + insn_len; 4235 ctxt->fetch.end = ctxt->fetch.data + insn_len;
4218 ctxt->opcode_len = 1; 4236 ctxt->opcode_len = 1;
4219 if (insn_len > 0) 4237 if (insn_len > 0)
4220 memcpy(ctxt->fetch.data, insn, insn_len); 4238 memcpy(ctxt->fetch.data, insn, insn_len);
4239 else {
4240 rc = __do_insn_fetch_bytes(ctxt, 1);
4241 if (rc != X86EMUL_CONTINUE)
4242 return rc;
4243 }
4221 4244
4222 switch (mode) { 4245 switch (mode) {
4223 case X86EMUL_MODE_REAL: 4246 case X86EMUL_MODE_REAL:
@@ -4261,11 +4284,13 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
4261 case 0x2e: /* CS override */ 4284 case 0x2e: /* CS override */
4262 case 0x36: /* SS override */ 4285 case 0x36: /* SS override */
4263 case 0x3e: /* DS override */ 4286 case 0x3e: /* DS override */
4264 set_seg_override(ctxt, (ctxt->b >> 3) & 3); 4287 has_seg_override = true;
4288 ctxt->seg_override = (ctxt->b >> 3) & 3;
4265 break; 4289 break;
4266 case 0x64: /* FS override */ 4290 case 0x64: /* FS override */
4267 case 0x65: /* GS override */ 4291 case 0x65: /* GS override */
4268 set_seg_override(ctxt, ctxt->b & 7); 4292 has_seg_override = true;
4293 ctxt->seg_override = ctxt->b & 7;
4269 break; 4294 break;
4270 case 0x40 ... 0x4f: /* REX */ 4295 case 0x40 ... 0x4f: /* REX */
4271 if (mode != X86EMUL_MODE_PROT64) 4296 if (mode != X86EMUL_MODE_PROT64)
@@ -4314,6 +4339,13 @@ done_prefixes:
4314 if (ctxt->d & ModRM) 4339 if (ctxt->d & ModRM)
4315 ctxt->modrm = insn_fetch(u8, ctxt); 4340 ctxt->modrm = insn_fetch(u8, ctxt);
4316 4341
4342 /* vex-prefix instructions are not implemented */
4343 if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
4344 (mode == X86EMUL_MODE_PROT64 ||
4345 (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) {
4346 ctxt->d = NotImpl;
4347 }
4348
4317 while (ctxt->d & GroupMask) { 4349 while (ctxt->d & GroupMask) {
4318 switch (ctxt->d & GroupMask) { 4350 switch (ctxt->d & GroupMask) {
4319 case Group: 4351 case Group:
@@ -4356,49 +4388,59 @@ done_prefixes:
4356 ctxt->d |= opcode.flags; 4388 ctxt->d |= opcode.flags;
4357 } 4389 }
4358 4390
4359 ctxt->execute = opcode.u.execute;
4360 ctxt->check_perm = opcode.check_perm;
4361 ctxt->intercept = opcode.intercept;
4362
4363 /* Unrecognised? */ 4391 /* Unrecognised? */
4364 if (ctxt->d == 0 || (ctxt->d & NotImpl)) 4392 if (ctxt->d == 0)
4365 return EMULATION_FAILED; 4393 return EMULATION_FAILED;
4366 4394
4367 if (!(ctxt->d & EmulateOnUD) && ctxt->ud) 4395 ctxt->execute = opcode.u.execute;
4368 return EMULATION_FAILED;
4369 4396
4370 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) 4397 if (unlikely(ctxt->d &
4371 ctxt->op_bytes = 8; 4398 (NotImpl|EmulateOnUD|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
4399 /*
4400 * These are copied unconditionally here, and checked unconditionally
4401 * in x86_emulate_insn.
4402 */
4403 ctxt->check_perm = opcode.check_perm;
4404 ctxt->intercept = opcode.intercept;
4405
4406 if (ctxt->d & NotImpl)
4407 return EMULATION_FAILED;
4408
4409 if (!(ctxt->d & EmulateOnUD) && ctxt->ud)
4410 return EMULATION_FAILED;
4372 4411
4373 if (ctxt->d & Op3264) { 4412 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
4374 if (mode == X86EMUL_MODE_PROT64)
4375 ctxt->op_bytes = 8; 4413 ctxt->op_bytes = 8;
4376 else
4377 ctxt->op_bytes = 4;
4378 }
4379 4414
4380 if (ctxt->d & Sse) 4415 if (ctxt->d & Op3264) {
4381 ctxt->op_bytes = 16; 4416 if (mode == X86EMUL_MODE_PROT64)
4382 else if (ctxt->d & Mmx) 4417 ctxt->op_bytes = 8;
4383 ctxt->op_bytes = 8; 4418 else
4419 ctxt->op_bytes = 4;
4420 }
4421
4422 if (ctxt->d & Sse)
4423 ctxt->op_bytes = 16;
4424 else if (ctxt->d & Mmx)
4425 ctxt->op_bytes = 8;
4426 }
4384 4427
4385 /* ModRM and SIB bytes. */ 4428 /* ModRM and SIB bytes. */
4386 if (ctxt->d & ModRM) { 4429 if (ctxt->d & ModRM) {
4387 rc = decode_modrm(ctxt, &ctxt->memop); 4430 rc = decode_modrm(ctxt, &ctxt->memop);
4388 if (!ctxt->has_seg_override) 4431 if (!has_seg_override) {
4389 set_seg_override(ctxt, ctxt->modrm_seg); 4432 has_seg_override = true;
4433 ctxt->seg_override = ctxt->modrm_seg;
4434 }
4390 } else if (ctxt->d & MemAbs) 4435 } else if (ctxt->d & MemAbs)
4391 rc = decode_abs(ctxt, &ctxt->memop); 4436 rc = decode_abs(ctxt, &ctxt->memop);
4392 if (rc != X86EMUL_CONTINUE) 4437 if (rc != X86EMUL_CONTINUE)
4393 goto done; 4438 goto done;
4394 4439
4395 if (!ctxt->has_seg_override) 4440 if (!has_seg_override)
4396 set_seg_override(ctxt, VCPU_SREG_DS); 4441 ctxt->seg_override = VCPU_SREG_DS;
4397
4398 ctxt->memop.addr.mem.seg = seg_override(ctxt);
4399 4442
4400 if (ctxt->memop.type == OP_MEM && ctxt->ad_bytes != 8) 4443 ctxt->memop.addr.mem.seg = ctxt->seg_override;
4401 ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
4402 4444
4403 /* 4445 /*
4404 * Decode and fetch the source operand: register, memory 4446 * Decode and fetch the source operand: register, memory
@@ -4420,7 +4462,7 @@ done_prefixes:
4420 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); 4462 rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
4421 4463
4422done: 4464done:
4423 if (ctxt->memopp && ctxt->memopp->type == OP_MEM && ctxt->rip_relative) 4465 if (ctxt->rip_relative)
4424 ctxt->memopp->addr.mem.ea += ctxt->_eip; 4466 ctxt->memopp->addr.mem.ea += ctxt->_eip;
4425 4467
4426 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK; 4468 return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
@@ -4495,6 +4537,16 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
4495 return X86EMUL_CONTINUE; 4537 return X86EMUL_CONTINUE;
4496} 4538}
4497 4539
4540void init_decode_cache(struct x86_emulate_ctxt *ctxt)
4541{
4542 memset(&ctxt->rip_relative, 0,
4543 (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
4544
4545 ctxt->io_read.pos = 0;
4546 ctxt->io_read.end = 0;
4547 ctxt->mem_read.end = 0;
4548}
4549
4498int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) 4550int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4499{ 4551{
4500 const struct x86_emulate_ops *ops = ctxt->ops; 4552 const struct x86_emulate_ops *ops = ctxt->ops;
@@ -4503,12 +4555,6 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4503 4555
4504 ctxt->mem_read.pos = 0; 4556 ctxt->mem_read.pos = 0;
4505 4557
4506 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
4507 (ctxt->d & Undefined)) {
4508 rc = emulate_ud(ctxt);
4509 goto done;
4510 }
4511
4512 /* LOCK prefix is allowed only with some instructions */ 4558 /* LOCK prefix is allowed only with some instructions */
4513 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) { 4559 if (ctxt->lock_prefix && (!(ctxt->d & Lock) || ctxt->dst.type != OP_MEM)) {
4514 rc = emulate_ud(ctxt); 4560 rc = emulate_ud(ctxt);
@@ -4520,69 +4566,82 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4520 goto done; 4566 goto done;
4521 } 4567 }
4522 4568
4523 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM))) 4569 if (unlikely(ctxt->d &
4524 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) { 4570 (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
4525 rc = emulate_ud(ctxt); 4571 if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
4526 goto done; 4572 (ctxt->d & Undefined)) {
4527 } 4573 rc = emulate_ud(ctxt);
4528 4574 goto done;
4529 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) { 4575 }
4530 rc = emulate_nm(ctxt);
4531 goto done;
4532 }
4533 4576
4534 if (ctxt->d & Mmx) { 4577 if (((ctxt->d & (Sse|Mmx)) && ((ops->get_cr(ctxt, 0) & X86_CR0_EM)))
4535 rc = flush_pending_x87_faults(ctxt); 4578 || ((ctxt->d & Sse) && !(ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))) {
4536 if (rc != X86EMUL_CONTINUE) 4579 rc = emulate_ud(ctxt);
4537 goto done; 4580 goto done;
4538 /* 4581 }
4539 * Now that we know the fpu is exception safe, we can fetch
4540 * operands from it.
4541 */
4542 fetch_possible_mmx_operand(ctxt, &ctxt->src);
4543 fetch_possible_mmx_operand(ctxt, &ctxt->src2);
4544 if (!(ctxt->d & Mov))
4545 fetch_possible_mmx_operand(ctxt, &ctxt->dst);
4546 }
4547 4582
4548 if (unlikely(ctxt->guest_mode) && ctxt->intercept) { 4583 if ((ctxt->d & (Sse|Mmx)) && (ops->get_cr(ctxt, 0) & X86_CR0_TS)) {
4549 rc = emulator_check_intercept(ctxt, ctxt->intercept, 4584 rc = emulate_nm(ctxt);
4550 X86_ICPT_PRE_EXCEPT);
4551 if (rc != X86EMUL_CONTINUE)
4552 goto done; 4585 goto done;
4553 } 4586 }
4554 4587
4555 /* Privileged instruction can be executed only in CPL=0 */ 4588 if (ctxt->d & Mmx) {
4556 if ((ctxt->d & Priv) && ops->cpl(ctxt)) { 4589 rc = flush_pending_x87_faults(ctxt);
4557 rc = emulate_gp(ctxt, 0); 4590 if (rc != X86EMUL_CONTINUE)
4558 goto done; 4591 goto done;
4559 } 4592 /*
4593 * Now that we know the fpu is exception safe, we can fetch
4594 * operands from it.
4595 */
4596 fetch_possible_mmx_operand(ctxt, &ctxt->src);
4597 fetch_possible_mmx_operand(ctxt, &ctxt->src2);
4598 if (!(ctxt->d & Mov))
4599 fetch_possible_mmx_operand(ctxt, &ctxt->dst);
4600 }
4560 4601
4561 /* Instruction can only be executed in protected mode */ 4602 if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
4562 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { 4603 rc = emulator_check_intercept(ctxt, ctxt->intercept,
4563 rc = emulate_ud(ctxt); 4604 X86_ICPT_PRE_EXCEPT);
4564 goto done; 4605 if (rc != X86EMUL_CONTINUE)
4565 } 4606 goto done;
4607 }
4566 4608
4567 /* Do instruction specific permission checks */ 4609 /* Privileged instruction can be executed only in CPL=0 */
4568 if (ctxt->check_perm) { 4610 if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
4569 rc = ctxt->check_perm(ctxt); 4611 if (ctxt->d & PrivUD)
4570 if (rc != X86EMUL_CONTINUE) 4612 rc = emulate_ud(ctxt);
4613 else
4614 rc = emulate_gp(ctxt, 0);
4571 goto done; 4615 goto done;
4572 } 4616 }
4573 4617
4574 if (unlikely(ctxt->guest_mode) && ctxt->intercept) { 4618 /* Instruction can only be executed in protected mode */
4575 rc = emulator_check_intercept(ctxt, ctxt->intercept, 4619 if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
4576 X86_ICPT_POST_EXCEPT); 4620 rc = emulate_ud(ctxt);
4577 if (rc != X86EMUL_CONTINUE)
4578 goto done; 4621 goto done;
4579 } 4622 }
4580 4623
4581 if (ctxt->rep_prefix && (ctxt->d & String)) { 4624 /* Do instruction specific permission checks */
4582 /* All REP prefixes have the same first termination condition */ 4625 if (ctxt->d & CheckPerm) {
4583 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { 4626 rc = ctxt->check_perm(ctxt);
4584 ctxt->eip = ctxt->_eip; 4627 if (rc != X86EMUL_CONTINUE)
4585 goto done; 4628 goto done;
4629 }
4630
4631 if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
4632 rc = emulator_check_intercept(ctxt, ctxt->intercept,
4633 X86_ICPT_POST_EXCEPT);
4634 if (rc != X86EMUL_CONTINUE)
4635 goto done;
4636 }
4637
4638 if (ctxt->rep_prefix && (ctxt->d & String)) {
4639 /* All REP prefixes have the same first termination condition */
4640 if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
4641 ctxt->eip = ctxt->_eip;
4642 ctxt->eflags &= ~EFLG_RF;
4643 goto done;
4644 }
4586 } 4645 }
4587 } 4646 }
4588 4647
@@ -4616,13 +4675,18 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4616 4675
4617special_insn: 4676special_insn:
4618 4677
4619 if (unlikely(ctxt->guest_mode) && ctxt->intercept) { 4678 if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
4620 rc = emulator_check_intercept(ctxt, ctxt->intercept, 4679 rc = emulator_check_intercept(ctxt, ctxt->intercept,
4621 X86_ICPT_POST_MEMACCESS); 4680 X86_ICPT_POST_MEMACCESS);
4622 if (rc != X86EMUL_CONTINUE) 4681 if (rc != X86EMUL_CONTINUE)
4623 goto done; 4682 goto done;
4624 } 4683 }
4625 4684
4685 if (ctxt->rep_prefix && (ctxt->d & String))
4686 ctxt->eflags |= EFLG_RF;
4687 else
4688 ctxt->eflags &= ~EFLG_RF;
4689
4626 if (ctxt->execute) { 4690 if (ctxt->execute) {
4627 if (ctxt->d & Fastop) { 4691 if (ctxt->d & Fastop) {
4628 void (*fop)(struct fastop *) = (void *)ctxt->execute; 4692 void (*fop)(struct fastop *) = (void *)ctxt->execute;
@@ -4657,8 +4721,9 @@ special_insn:
4657 break; 4721 break;
4658 case 0x90 ... 0x97: /* nop / xchg reg, rax */ 4722 case 0x90 ... 0x97: /* nop / xchg reg, rax */
4659 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX)) 4723 if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
4660 break; 4724 ctxt->dst.type = OP_NONE;
4661 rc = em_xchg(ctxt); 4725 else
4726 rc = em_xchg(ctxt);
4662 break; 4727 break;
4663 case 0x98: /* cbw/cwde/cdqe */ 4728 case 0x98: /* cbw/cwde/cdqe */
4664 switch (ctxt->op_bytes) { 4729 switch (ctxt->op_bytes) {
@@ -4709,17 +4774,17 @@ special_insn:
4709 goto done; 4774 goto done;
4710 4775
4711writeback: 4776writeback:
4712 if (!(ctxt->d & NoWrite)) {
4713 rc = writeback(ctxt, &ctxt->dst);
4714 if (rc != X86EMUL_CONTINUE)
4715 goto done;
4716 }
4717 if (ctxt->d & SrcWrite) { 4777 if (ctxt->d & SrcWrite) {
4718 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR); 4778 BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR);
4719 rc = writeback(ctxt, &ctxt->src); 4779 rc = writeback(ctxt, &ctxt->src);
4720 if (rc != X86EMUL_CONTINUE) 4780 if (rc != X86EMUL_CONTINUE)
4721 goto done; 4781 goto done;
4722 } 4782 }
4783 if (!(ctxt->d & NoWrite)) {
4784 rc = writeback(ctxt, &ctxt->dst);
4785 if (rc != X86EMUL_CONTINUE)
4786 goto done;
4787 }
4723 4788
4724 /* 4789 /*
4725 * restore dst type in case the decoding will be reused 4790 * restore dst type in case the decoding will be reused
@@ -4761,6 +4826,7 @@ writeback:
4761 } 4826 }
4762 goto done; /* skip rip writeback */ 4827 goto done; /* skip rip writeback */
4763 } 4828 }
4829 ctxt->eflags &= ~EFLG_RF;
4764 } 4830 }
4765 4831
4766 ctxt->eip = ctxt->_eip; 4832 ctxt->eip = ctxt->_eip;
@@ -4793,8 +4859,10 @@ twobyte_insn:
4793 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); 4859 ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val);
4794 break; 4860 break;
4795 case 0x40 ... 0x4f: /* cmov */ 4861 case 0x40 ... 0x4f: /* cmov */
4796 ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val; 4862 if (test_cc(ctxt->b, ctxt->eflags))
4797 if (!test_cc(ctxt->b, ctxt->eflags)) 4863 ctxt->dst.val = ctxt->src.val;
4864 else if (ctxt->mode != X86EMUL_MODE_PROT64 ||
4865 ctxt->op_bytes != 4)
4798 ctxt->dst.type = OP_NONE; /* no writeback */ 4866 ctxt->dst.type = OP_NONE; /* no writeback */
4799 break; 4867 break;
4800 case 0x80 ... 0x8f: /* jnz rel, etc*/ 4868 case 0x80 ... 0x8f: /* jnz rel, etc*/
@@ -4818,8 +4886,8 @@ twobyte_insn:
4818 break; 4886 break;
4819 case 0xc3: /* movnti */ 4887 case 0xc3: /* movnti */
4820 ctxt->dst.bytes = ctxt->op_bytes; 4888 ctxt->dst.bytes = ctxt->op_bytes;
4821 ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : 4889 ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val :
4822 (u64) ctxt->src.val; 4890 (u32) ctxt->src.val;
4823 break; 4891 break;
4824 default: 4892 default:
4825 goto cannot_emulate; 4893 goto cannot_emulate;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 006911858174..3855103f71fd 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1451,7 +1451,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
1451 vcpu->arch.apic_arb_prio = 0; 1451 vcpu->arch.apic_arb_prio = 0;
1452 vcpu->arch.apic_attention = 0; 1452 vcpu->arch.apic_attention = 0;
1453 1453
1454 apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" 1454 apic_debug("%s: vcpu=%p, id=%d, base_msr="
1455 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, 1455 "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
1456 vcpu, kvm_apic_id(apic), 1456 vcpu, kvm_apic_id(apic),
1457 vcpu->arch.apic_base, apic->base_address); 1457 vcpu->arch.apic_base, apic->base_address);
@@ -1895,7 +1895,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
1895 /* evaluate pending_events before reading the vector */ 1895 /* evaluate pending_events before reading the vector */
1896 smp_rmb(); 1896 smp_rmb();
1897 sipi_vector = apic->sipi_vector; 1897 sipi_vector = apic->sipi_vector;
1898 pr_debug("vcpu %d received sipi with vector # %x\n", 1898 apic_debug("vcpu %d received sipi with vector # %x\n",
1899 vcpu->vcpu_id, sipi_vector); 1899 vcpu->vcpu_id, sipi_vector);
1900 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); 1900 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
1901 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 1901 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h
index 2e5652b62fd6..5aaf35641768 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/kvm/mmutrace.h
@@ -22,7 +22,7 @@
22 __entry->unsync = sp->unsync; 22 __entry->unsync = sp->unsync;
23 23
24#define KVM_MMU_PAGE_PRINTK() ({ \ 24#define KVM_MMU_PAGE_PRINTK() ({ \
25 const char *ret = trace_seq_buffer_ptr(p); \ 25 const u32 saved_len = p->len; \
26 static const char *access_str[] = { \ 26 static const char *access_str[] = { \
27 "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \ 27 "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux" \
28 }; \ 28 }; \
@@ -41,7 +41,7 @@
41 role.nxe ? "" : "!", \ 41 role.nxe ? "" : "!", \
42 __entry->root_count, \ 42 __entry->root_count, \
43 __entry->unsync ? "unsync" : "sync", 0); \ 43 __entry->unsync ? "unsync" : "sync", 0); \
44 ret; \ 44 p->buffer + saved_len; \
45 }) 45 })
46 46
47#define kvm_mmu_trace_pferr_flags \ 47#define kvm_mmu_trace_pferr_flags \
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index cbecaa90399c..3dd6accb64ec 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -428,6 +428,15 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
428 return 1; 428 return 1;
429} 429}
430 430
431int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc)
432{
433 struct kvm_pmu *pmu = &vcpu->arch.pmu;
434 bool fixed = pmc & (1u << 30);
435 pmc &= ~(3u << 30);
436 return (!fixed && pmc >= pmu->nr_arch_gp_counters) ||
437 (fixed && pmc >= pmu->nr_arch_fixed_counters);
438}
439
431int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) 440int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
432{ 441{
433 struct kvm_pmu *pmu = &vcpu->arch.pmu; 442 struct kvm_pmu *pmu = &vcpu->arch.pmu;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b5e994ad0135..ddf742768ecf 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -486,14 +486,14 @@ static int is_external_interrupt(u32 info)
486 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); 486 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
487} 487}
488 488
489static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) 489static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu)
490{ 490{
491 struct vcpu_svm *svm = to_svm(vcpu); 491 struct vcpu_svm *svm = to_svm(vcpu);
492 u32 ret = 0; 492 u32 ret = 0;
493 493
494 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) 494 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
495 ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; 495 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
496 return ret & mask; 496 return ret;
497} 497}
498 498
499static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) 499static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
@@ -1415,7 +1415,16 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
1415 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1; 1415 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
1416 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; 1416 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
1417 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 1417 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
1418 var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; 1418
1419 /*
1420 * AMD CPUs circa 2014 track the G bit for all segments except CS.
1421 * However, the SVM spec states that the G bit is not observed by the
1422 * CPU, and some VMware virtual CPUs drop the G bit for all segments.
1423 * So let's synthesize a legal G bit for all segments, this helps
1424 * running KVM nested. It also helps cross-vendor migration, because
1425 * Intel's vmentry has a check on the 'G' bit.
1426 */
1427 var->g = s->limit > 0xfffff;
1419 1428
1420 /* 1429 /*
1421 * AMD's VMCB does not have an explicit unusable field, so emulate it 1430 * AMD's VMCB does not have an explicit unusable field, so emulate it
@@ -1424,14 +1433,6 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
1424 var->unusable = !var->present || (var->type == 0); 1433 var->unusable = !var->present || (var->type == 0);
1425 1434
1426 switch (seg) { 1435 switch (seg) {
1427 case VCPU_SREG_CS:
1428 /*
1429 * SVM always stores 0 for the 'G' bit in the CS selector in
1430 * the VMCB on a VMEXIT. This hurts cross-vendor migration:
1431 * Intel's VMENTRY has a check on the 'G' bit.
1432 */
1433 var->g = s->limit > 0xfffff;
1434 break;
1435 case VCPU_SREG_TR: 1436 case VCPU_SREG_TR:
1436 /* 1437 /*
1437 * Work around a bug where the busy flag in the tr selector 1438 * Work around a bug where the busy flag in the tr selector
@@ -2116,22 +2117,27 @@ static void nested_svm_unmap(struct page *page)
2116 2117
2117static int nested_svm_intercept_ioio(struct vcpu_svm *svm) 2118static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
2118{ 2119{
2119 unsigned port; 2120 unsigned port, size, iopm_len;
2120 u8 val, bit; 2121 u16 val, mask;
2122 u8 start_bit;
2121 u64 gpa; 2123 u64 gpa;
2122 2124
2123 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) 2125 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
2124 return NESTED_EXIT_HOST; 2126 return NESTED_EXIT_HOST;
2125 2127
2126 port = svm->vmcb->control.exit_info_1 >> 16; 2128 port = svm->vmcb->control.exit_info_1 >> 16;
2129 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
2130 SVM_IOIO_SIZE_SHIFT;
2127 gpa = svm->nested.vmcb_iopm + (port / 8); 2131 gpa = svm->nested.vmcb_iopm + (port / 8);
2128 bit = port % 8; 2132 start_bit = port % 8;
2129 val = 0; 2133 iopm_len = (start_bit + size > 8) ? 2 : 1;
2134 mask = (0xf >> (4 - size)) << start_bit;
2135 val = 0;
2130 2136
2131 if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1)) 2137 if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, iopm_len))
2132 val &= (1 << bit); 2138 return NESTED_EXIT_DONE;
2133 2139
2134 return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 2140 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
2135} 2141}
2136 2142
2137static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) 2143static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
@@ -4205,7 +4211,8 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
4205 if (info->intercept == x86_intercept_cr_write) 4211 if (info->intercept == x86_intercept_cr_write)
4206 icpt_info.exit_code += info->modrm_reg; 4212 icpt_info.exit_code += info->modrm_reg;
4207 4213
4208 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0) 4214 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
4215 info->intercept == x86_intercept_clts)
4209 break; 4216 break;
4210 4217
4211 intercept = svm->nested.intercept; 4218 intercept = svm->nested.intercept;
@@ -4250,14 +4257,14 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
4250 u64 exit_info; 4257 u64 exit_info;
4251 u32 bytes; 4258 u32 bytes;
4252 4259
4253 exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16;
4254
4255 if (info->intercept == x86_intercept_in || 4260 if (info->intercept == x86_intercept_in ||
4256 info->intercept == x86_intercept_ins) { 4261 info->intercept == x86_intercept_ins) {
4257 exit_info |= SVM_IOIO_TYPE_MASK; 4262 exit_info = ((info->src_val & 0xffff) << 16) |
4258 bytes = info->src_bytes; 4263 SVM_IOIO_TYPE_MASK;
4259 } else {
4260 bytes = info->dst_bytes; 4264 bytes = info->dst_bytes;
4265 } else {
4266 exit_info = (info->dst_val & 0xffff) << 16;
4267 bytes = info->src_bytes;
4261 } 4268 }
4262 4269
4263 if (info->intercept == x86_intercept_outs || 4270 if (info->intercept == x86_intercept_outs ||
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 33574c95220d..e850a7d332be 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -721,10 +721,10 @@ TRACE_EVENT(kvm_emulate_insn,
721 ), 721 ),
722 722
723 TP_fast_assign( 723 TP_fast_assign(
724 __entry->rip = vcpu->arch.emulate_ctxt.fetch.start;
725 __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); 724 __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS);
726 __entry->len = vcpu->arch.emulate_ctxt._eip 725 __entry->len = vcpu->arch.emulate_ctxt.fetch.ptr
727 - vcpu->arch.emulate_ctxt.fetch.start; 726 - vcpu->arch.emulate_ctxt.fetch.data;
727 __entry->rip = vcpu->arch.emulate_ctxt._eip - __entry->len;
728 memcpy(__entry->insn, 728 memcpy(__entry->insn,
729 vcpu->arch.emulate_ctxt.fetch.data, 729 vcpu->arch.emulate_ctxt.fetch.data,
730 15); 730 15);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 801332edefc3..e618f34bde2d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -383,6 +383,9 @@ struct nested_vmx {
383 383
384 struct hrtimer preemption_timer; 384 struct hrtimer preemption_timer;
385 bool preemption_timer_expired; 385 bool preemption_timer_expired;
386
387 /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
388 u64 vmcs01_debugctl;
386}; 389};
387 390
388#define POSTED_INTR_ON 0 391#define POSTED_INTR_ON 0
@@ -740,7 +743,6 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var);
740static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); 743static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
741static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); 744static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
742static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); 745static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
743static bool vmx_mpx_supported(void);
744 746
745static DEFINE_PER_CPU(struct vmcs *, vmxarea); 747static DEFINE_PER_CPU(struct vmcs *, vmxarea);
746static DEFINE_PER_CPU(struct vmcs *, current_vmcs); 748static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -820,7 +822,6 @@ static const u32 vmx_msr_index[] = {
820#endif 822#endif
821 MSR_EFER, MSR_TSC_AUX, MSR_STAR, 823 MSR_EFER, MSR_TSC_AUX, MSR_STAR,
822}; 824};
823#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
824 825
825static inline bool is_page_fault(u32 intr_info) 826static inline bool is_page_fault(u32 intr_info)
826{ 827{
@@ -1940,7 +1941,7 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1940 vmcs_writel(GUEST_RFLAGS, rflags); 1941 vmcs_writel(GUEST_RFLAGS, rflags);
1941} 1942}
1942 1943
1943static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) 1944static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
1944{ 1945{
1945 u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); 1946 u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
1946 int ret = 0; 1947 int ret = 0;
@@ -1950,7 +1951,7 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
1950 if (interruptibility & GUEST_INTR_STATE_MOV_SS) 1951 if (interruptibility & GUEST_INTR_STATE_MOV_SS)
1951 ret |= KVM_X86_SHADOW_INT_MOV_SS; 1952 ret |= KVM_X86_SHADOW_INT_MOV_SS;
1952 1953
1953 return ret & mask; 1954 return ret;
1954} 1955}
1955 1956
1956static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) 1957static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
@@ -2239,10 +2240,13 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
2239 * or other means. 2240 * or other means.
2240 */ 2241 */
2241static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high; 2242static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high;
2243static u32 nested_vmx_true_procbased_ctls_low;
2242static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; 2244static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high;
2243static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; 2245static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
2244static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; 2246static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
2247static u32 nested_vmx_true_exit_ctls_low;
2245static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; 2248static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
2249static u32 nested_vmx_true_entry_ctls_low;
2246static u32 nested_vmx_misc_low, nested_vmx_misc_high; 2250static u32 nested_vmx_misc_low, nested_vmx_misc_high;
2247static u32 nested_vmx_ept_caps; 2251static u32 nested_vmx_ept_caps;
2248static __init void nested_vmx_setup_ctls_msrs(void) 2252static __init void nested_vmx_setup_ctls_msrs(void)
@@ -2265,21 +2269,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2265 /* pin-based controls */ 2269 /* pin-based controls */
2266 rdmsr(MSR_IA32_VMX_PINBASED_CTLS, 2270 rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
2267 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); 2271 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high);
2268 /*
2269 * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is
2270 * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR.
2271 */
2272 nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; 2272 nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
2273 nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | 2273 nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
2274 PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; 2274 PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS;
2275 nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | 2275 nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
2276 PIN_BASED_VMX_PREEMPTION_TIMER; 2276 PIN_BASED_VMX_PREEMPTION_TIMER;
2277 2277
2278 /* 2278 /* exit controls */
2279 * Exit controls
2280 * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and
2281 * 17 must be 1.
2282 */
2283 rdmsr(MSR_IA32_VMX_EXIT_CTLS, 2279 rdmsr(MSR_IA32_VMX_EXIT_CTLS,
2284 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); 2280 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high);
2285 nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; 2281 nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
@@ -2296,10 +2292,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2296 if (vmx_mpx_supported()) 2292 if (vmx_mpx_supported())
2297 nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; 2293 nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
2298 2294
2295 /* We support free control of debug control saving. */
2296 nested_vmx_true_exit_ctls_low = nested_vmx_exit_ctls_low &
2297 ~VM_EXIT_SAVE_DEBUG_CONTROLS;
2298
2299 /* entry controls */ 2299 /* entry controls */
2300 rdmsr(MSR_IA32_VMX_ENTRY_CTLS, 2300 rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
2301 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); 2301 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high);
2302 /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */
2303 nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; 2302 nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
2304 nested_vmx_entry_ctls_high &= 2303 nested_vmx_entry_ctls_high &=
2305#ifdef CONFIG_X86_64 2304#ifdef CONFIG_X86_64
@@ -2311,10 +2310,14 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2311 if (vmx_mpx_supported()) 2310 if (vmx_mpx_supported())
2312 nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; 2311 nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
2313 2312
2313 /* We support free control of debug control loading. */
2314 nested_vmx_true_entry_ctls_low = nested_vmx_entry_ctls_low &
2315 ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
2316
2314 /* cpu-based controls */ 2317 /* cpu-based controls */
2315 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, 2318 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
2316 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); 2319 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
2317 nested_vmx_procbased_ctls_low = 0; 2320 nested_vmx_procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
2318 nested_vmx_procbased_ctls_high &= 2321 nested_vmx_procbased_ctls_high &=
2319 CPU_BASED_VIRTUAL_INTR_PENDING | 2322 CPU_BASED_VIRTUAL_INTR_PENDING |
2320 CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | 2323 CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
@@ -2335,7 +2338,12 @@ static __init void nested_vmx_setup_ctls_msrs(void)
2335 * can use it to avoid exits to L1 - even when L0 runs L2 2338 * can use it to avoid exits to L1 - even when L0 runs L2
2336 * without MSR bitmaps. 2339 * without MSR bitmaps.
2337 */ 2340 */
2338 nested_vmx_procbased_ctls_high |= CPU_BASED_USE_MSR_BITMAPS; 2341 nested_vmx_procbased_ctls_high |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
2342 CPU_BASED_USE_MSR_BITMAPS;
2343
2344 /* We support free control of CR3 access interception. */
2345 nested_vmx_true_procbased_ctls_low = nested_vmx_procbased_ctls_low &
2346 ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
2339 2347
2340 /* secondary cpu-based controls */ 2348 /* secondary cpu-based controls */
2341 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, 2349 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
@@ -2394,7 +2402,7 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2394 * guest, and the VMCS structure we give it - not about the 2402 * guest, and the VMCS structure we give it - not about the
2395 * VMX support of the underlying hardware. 2403 * VMX support of the underlying hardware.
2396 */ 2404 */
2397 *pdata = VMCS12_REVISION | 2405 *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS |
2398 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | 2406 ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
2399 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); 2407 (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
2400 break; 2408 break;
@@ -2404,16 +2412,25 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2404 nested_vmx_pinbased_ctls_high); 2412 nested_vmx_pinbased_ctls_high);
2405 break; 2413 break;
2406 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: 2414 case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
2415 *pdata = vmx_control_msr(nested_vmx_true_procbased_ctls_low,
2416 nested_vmx_procbased_ctls_high);
2417 break;
2407 case MSR_IA32_VMX_PROCBASED_CTLS: 2418 case MSR_IA32_VMX_PROCBASED_CTLS:
2408 *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low, 2419 *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low,
2409 nested_vmx_procbased_ctls_high); 2420 nested_vmx_procbased_ctls_high);
2410 break; 2421 break;
2411 case MSR_IA32_VMX_TRUE_EXIT_CTLS: 2422 case MSR_IA32_VMX_TRUE_EXIT_CTLS:
2423 *pdata = vmx_control_msr(nested_vmx_true_exit_ctls_low,
2424 nested_vmx_exit_ctls_high);
2425 break;
2412 case MSR_IA32_VMX_EXIT_CTLS: 2426 case MSR_IA32_VMX_EXIT_CTLS:
2413 *pdata = vmx_control_msr(nested_vmx_exit_ctls_low, 2427 *pdata = vmx_control_msr(nested_vmx_exit_ctls_low,
2414 nested_vmx_exit_ctls_high); 2428 nested_vmx_exit_ctls_high);
2415 break; 2429 break;
2416 case MSR_IA32_VMX_TRUE_ENTRY_CTLS: 2430 case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
2431 *pdata = vmx_control_msr(nested_vmx_true_entry_ctls_low,
2432 nested_vmx_entry_ctls_high);
2433 break;
2417 case MSR_IA32_VMX_ENTRY_CTLS: 2434 case MSR_IA32_VMX_ENTRY_CTLS:
2418 *pdata = vmx_control_msr(nested_vmx_entry_ctls_low, 2435 *pdata = vmx_control_msr(nested_vmx_entry_ctls_low,
2419 nested_vmx_entry_ctls_high); 2436 nested_vmx_entry_ctls_high);
@@ -2442,7 +2459,7 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
2442 *pdata = -1ULL; 2459 *pdata = -1ULL;
2443 break; 2460 break;
2444 case MSR_IA32_VMX_VMCS_ENUM: 2461 case MSR_IA32_VMX_VMCS_ENUM:
2445 *pdata = 0x1f; 2462 *pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */
2446 break; 2463 break;
2447 case MSR_IA32_VMX_PROCBASED_CTLS2: 2464 case MSR_IA32_VMX_PROCBASED_CTLS2:
2448 *pdata = vmx_control_msr(nested_vmx_secondary_ctls_low, 2465 *pdata = vmx_control_msr(nested_vmx_secondary_ctls_low,
@@ -3653,7 +3670,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
3653 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); 3670 vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var));
3654 3671
3655out: 3672out:
3656 vmx->emulation_required |= emulation_required(vcpu); 3673 vmx->emulation_required = emulation_required(vcpu);
3657} 3674}
3658 3675
3659static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) 3676static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -4422,7 +4439,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
4422 vmx->vcpu.arch.pat = host_pat; 4439 vmx->vcpu.arch.pat = host_pat;
4423 } 4440 }
4424 4441
4425 for (i = 0; i < NR_VMX_MSR; ++i) { 4442 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
4426 u32 index = vmx_msr_index[i]; 4443 u32 index = vmx_msr_index[i];
4427 u32 data_low, data_high; 4444 u32 data_low, data_high;
4428 int j = vmx->nmsrs; 4445 int j = vmx->nmsrs;
@@ -4873,7 +4890,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
4873 if (!(vcpu->guest_debug & 4890 if (!(vcpu->guest_debug &
4874 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { 4891 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
4875 vcpu->arch.dr6 &= ~15; 4892 vcpu->arch.dr6 &= ~15;
4876 vcpu->arch.dr6 |= dr6; 4893 vcpu->arch.dr6 |= dr6 | DR6_RTM;
4877 if (!(dr6 & ~DR6_RESERVED)) /* icebp */ 4894 if (!(dr6 & ~DR6_RESERVED)) /* icebp */
4878 skip_emulated_instruction(vcpu); 4895 skip_emulated_instruction(vcpu);
4879 4896
@@ -5039,7 +5056,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
5039 reg = (exit_qualification >> 8) & 15; 5056 reg = (exit_qualification >> 8) & 15;
5040 switch ((exit_qualification >> 4) & 3) { 5057 switch ((exit_qualification >> 4) & 3) {
5041 case 0: /* mov to cr */ 5058 case 0: /* mov to cr */
5042 val = kvm_register_read(vcpu, reg); 5059 val = kvm_register_readl(vcpu, reg);
5043 trace_kvm_cr_write(cr, val); 5060 trace_kvm_cr_write(cr, val);
5044 switch (cr) { 5061 switch (cr) {
5045 case 0: 5062 case 0:
@@ -5056,7 +5073,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
5056 return 1; 5073 return 1;
5057 case 8: { 5074 case 8: {
5058 u8 cr8_prev = kvm_get_cr8(vcpu); 5075 u8 cr8_prev = kvm_get_cr8(vcpu);
5059 u8 cr8 = kvm_register_read(vcpu, reg); 5076 u8 cr8 = (u8)val;
5060 err = kvm_set_cr8(vcpu, cr8); 5077 err = kvm_set_cr8(vcpu, cr8);
5061 kvm_complete_insn_gp(vcpu, err); 5078 kvm_complete_insn_gp(vcpu, err);
5062 if (irqchip_in_kernel(vcpu->kvm)) 5079 if (irqchip_in_kernel(vcpu->kvm))
@@ -5132,7 +5149,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5132 return 0; 5149 return 0;
5133 } else { 5150 } else {
5134 vcpu->arch.dr7 &= ~DR7_GD; 5151 vcpu->arch.dr7 &= ~DR7_GD;
5135 vcpu->arch.dr6 |= DR6_BD; 5152 vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
5136 vmcs_writel(GUEST_DR7, vcpu->arch.dr7); 5153 vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
5137 kvm_queue_exception(vcpu, DB_VECTOR); 5154 kvm_queue_exception(vcpu, DB_VECTOR);
5138 return 1; 5155 return 1;
@@ -5165,7 +5182,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
5165 return 1; 5182 return 1;
5166 kvm_register_write(vcpu, reg, val); 5183 kvm_register_write(vcpu, reg, val);
5167 } else 5184 } else
5168 if (kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg))) 5185 if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
5169 return 1; 5186 return 1;
5170 5187
5171 skip_emulated_instruction(vcpu); 5188 skip_emulated_instruction(vcpu);
@@ -5621,7 +5638,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
5621 cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 5638 cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
5622 intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; 5639 intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
5623 5640
5624 while (!guest_state_valid(vcpu) && count-- != 0) { 5641 while (vmx->emulation_required && count-- != 0) {
5625 if (intr_window_requested && vmx_interrupt_allowed(vcpu)) 5642 if (intr_window_requested && vmx_interrupt_allowed(vcpu))
5626 return handle_interrupt_window(&vmx->vcpu); 5643 return handle_interrupt_window(&vmx->vcpu);
5627 5644
@@ -5655,7 +5672,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
5655 schedule(); 5672 schedule();
5656 } 5673 }
5657 5674
5658 vmx->emulation_required = emulation_required(vcpu);
5659out: 5675out:
5660 return ret; 5676 return ret;
5661} 5677}
@@ -5754,22 +5770,27 @@ static void nested_free_vmcs02(struct vcpu_vmx *vmx, gpa_t vmptr)
5754 5770
5755/* 5771/*
5756 * Free all VMCSs saved for this vcpu, except the one pointed by 5772 * Free all VMCSs saved for this vcpu, except the one pointed by
5757 * vmx->loaded_vmcs. These include the VMCSs in vmcs02_pool (except the one 5773 * vmx->loaded_vmcs. We must be running L1, so vmx->loaded_vmcs
5758 * currently used, if running L2), and vmcs01 when running L2. 5774 * must be &vmx->vmcs01.
5759 */ 5775 */
5760static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx) 5776static void nested_free_all_saved_vmcss(struct vcpu_vmx *vmx)
5761{ 5777{
5762 struct vmcs02_list *item, *n; 5778 struct vmcs02_list *item, *n;
5779
5780 WARN_ON(vmx->loaded_vmcs != &vmx->vmcs01);
5763 list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) { 5781 list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_pool, list) {
5764 if (vmx->loaded_vmcs != &item->vmcs02) 5782 /*
5765 free_loaded_vmcs(&item->vmcs02); 5783 * Something will leak if the above WARN triggers. Better than
5784 * a use-after-free.
5785 */
5786 if (vmx->loaded_vmcs == &item->vmcs02)
5787 continue;
5788
5789 free_loaded_vmcs(&item->vmcs02);
5766 list_del(&item->list); 5790 list_del(&item->list);
5767 kfree(item); 5791 kfree(item);
5792 vmx->nested.vmcs02_num--;
5768 } 5793 }
5769 vmx->nested.vmcs02_num = 0;
5770
5771 if (vmx->loaded_vmcs != &vmx->vmcs01)
5772 free_loaded_vmcs(&vmx->vmcs01);
5773} 5794}
5774 5795
5775/* 5796/*
@@ -5918,7 +5939,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
5918 * which replaces physical address width with 32 5939 * which replaces physical address width with 32
5919 * 5940 *
5920 */ 5941 */
5921 if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { 5942 if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
5922 nested_vmx_failInvalid(vcpu); 5943 nested_vmx_failInvalid(vcpu);
5923 skip_emulated_instruction(vcpu); 5944 skip_emulated_instruction(vcpu);
5924 return 1; 5945 return 1;
@@ -5936,7 +5957,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
5936 vmx->nested.vmxon_ptr = vmptr; 5957 vmx->nested.vmxon_ptr = vmptr;
5937 break; 5958 break;
5938 case EXIT_REASON_VMCLEAR: 5959 case EXIT_REASON_VMCLEAR:
5939 if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { 5960 if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
5940 nested_vmx_failValid(vcpu, 5961 nested_vmx_failValid(vcpu,
5941 VMXERR_VMCLEAR_INVALID_ADDRESS); 5962 VMXERR_VMCLEAR_INVALID_ADDRESS);
5942 skip_emulated_instruction(vcpu); 5963 skip_emulated_instruction(vcpu);
@@ -5951,7 +5972,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
5951 } 5972 }
5952 break; 5973 break;
5953 case EXIT_REASON_VMPTRLD: 5974 case EXIT_REASON_VMPTRLD:
5954 if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { 5975 if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
5955 nested_vmx_failValid(vcpu, 5976 nested_vmx_failValid(vcpu,
5956 VMXERR_VMPTRLD_INVALID_ADDRESS); 5977 VMXERR_VMPTRLD_INVALID_ADDRESS);
5957 skip_emulated_instruction(vcpu); 5978 skip_emulated_instruction(vcpu);
@@ -6086,20 +6107,27 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
6086static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) 6107static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
6087{ 6108{
6088 u32 exec_control; 6109 u32 exec_control;
6110 if (vmx->nested.current_vmptr == -1ull)
6111 return;
6112
6113 /* current_vmptr and current_vmcs12 are always set/reset together */
6114 if (WARN_ON(vmx->nested.current_vmcs12 == NULL))
6115 return;
6116
6089 if (enable_shadow_vmcs) { 6117 if (enable_shadow_vmcs) {
6090 if (vmx->nested.current_vmcs12 != NULL) { 6118 /* copy to memory all shadowed fields in case
6091 /* copy to memory all shadowed fields in case 6119 they were modified */
6092 they were modified */ 6120 copy_shadow_to_vmcs12(vmx);
6093 copy_shadow_to_vmcs12(vmx); 6121 vmx->nested.sync_shadow_vmcs = false;
6094 vmx->nested.sync_shadow_vmcs = false; 6122 exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
6095 exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); 6123 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
6096 exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; 6124 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
6097 vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); 6125 vmcs_write64(VMCS_LINK_POINTER, -1ull);
6098 vmcs_write64(VMCS_LINK_POINTER, -1ull);
6099 }
6100 } 6126 }
6101 kunmap(vmx->nested.current_vmcs12_page); 6127 kunmap(vmx->nested.current_vmcs12_page);
6102 nested_release_page(vmx->nested.current_vmcs12_page); 6128 nested_release_page(vmx->nested.current_vmcs12_page);
6129 vmx->nested.current_vmptr = -1ull;
6130 vmx->nested.current_vmcs12 = NULL;
6103} 6131}
6104 6132
6105/* 6133/*
@@ -6110,12 +6138,9 @@ static void free_nested(struct vcpu_vmx *vmx)
6110{ 6138{
6111 if (!vmx->nested.vmxon) 6139 if (!vmx->nested.vmxon)
6112 return; 6140 return;
6141
6113 vmx->nested.vmxon = false; 6142 vmx->nested.vmxon = false;
6114 if (vmx->nested.current_vmptr != -1ull) { 6143 nested_release_vmcs12(vmx);
6115 nested_release_vmcs12(vmx);
6116 vmx->nested.current_vmptr = -1ull;
6117 vmx->nested.current_vmcs12 = NULL;
6118 }
6119 if (enable_shadow_vmcs) 6144 if (enable_shadow_vmcs)
6120 free_vmcs(vmx->nested.current_shadow_vmcs); 6145 free_vmcs(vmx->nested.current_shadow_vmcs);
6121 /* Unpin physical memory we referred to in current vmcs02 */ 6146 /* Unpin physical memory we referred to in current vmcs02 */
@@ -6152,11 +6177,8 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
6152 if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr)) 6177 if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMCLEAR, &vmptr))
6153 return 1; 6178 return 1;
6154 6179
6155 if (vmptr == vmx->nested.current_vmptr) { 6180 if (vmptr == vmx->nested.current_vmptr)
6156 nested_release_vmcs12(vmx); 6181 nested_release_vmcs12(vmx);
6157 vmx->nested.current_vmptr = -1ull;
6158 vmx->nested.current_vmcs12 = NULL;
6159 }
6160 6182
6161 page = nested_get_page(vcpu, vmptr); 6183 page = nested_get_page(vcpu, vmptr);
6162 if (page == NULL) { 6184 if (page == NULL) {
@@ -6384,7 +6406,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
6384 return 1; 6406 return 1;
6385 6407
6386 /* Decode instruction info and find the field to read */ 6408 /* Decode instruction info and find the field to read */
6387 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); 6409 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
6388 /* Read the field, zero-extended to a u64 field_value */ 6410 /* Read the field, zero-extended to a u64 field_value */
6389 if (!vmcs12_read_any(vcpu, field, &field_value)) { 6411 if (!vmcs12_read_any(vcpu, field, &field_value)) {
6390 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); 6412 nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
@@ -6397,7 +6419,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
6397 * on the guest's mode (32 or 64 bit), not on the given field's length. 6419 * on the guest's mode (32 or 64 bit), not on the given field's length.
6398 */ 6420 */
6399 if (vmx_instruction_info & (1u << 10)) { 6421 if (vmx_instruction_info & (1u << 10)) {
6400 kvm_register_write(vcpu, (((vmx_instruction_info) >> 3) & 0xf), 6422 kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf),
6401 field_value); 6423 field_value);
6402 } else { 6424 } else {
6403 if (get_vmx_mem_address(vcpu, exit_qualification, 6425 if (get_vmx_mem_address(vcpu, exit_qualification,
@@ -6434,21 +6456,21 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
6434 return 1; 6456 return 1;
6435 6457
6436 if (vmx_instruction_info & (1u << 10)) 6458 if (vmx_instruction_info & (1u << 10))
6437 field_value = kvm_register_read(vcpu, 6459 field_value = kvm_register_readl(vcpu,
6438 (((vmx_instruction_info) >> 3) & 0xf)); 6460 (((vmx_instruction_info) >> 3) & 0xf));
6439 else { 6461 else {
6440 if (get_vmx_mem_address(vcpu, exit_qualification, 6462 if (get_vmx_mem_address(vcpu, exit_qualification,
6441 vmx_instruction_info, &gva)) 6463 vmx_instruction_info, &gva))
6442 return 1; 6464 return 1;
6443 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, 6465 if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva,
6444 &field_value, (is_long_mode(vcpu) ? 8 : 4), &e)) { 6466 &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
6445 kvm_inject_page_fault(vcpu, &e); 6467 kvm_inject_page_fault(vcpu, &e);
6446 return 1; 6468 return 1;
6447 } 6469 }
6448 } 6470 }
6449 6471
6450 6472
6451 field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); 6473 field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
6452 if (vmcs_field_readonly(field)) { 6474 if (vmcs_field_readonly(field)) {
6453 nested_vmx_failValid(vcpu, 6475 nested_vmx_failValid(vcpu,
6454 VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); 6476 VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
@@ -6498,9 +6520,8 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
6498 skip_emulated_instruction(vcpu); 6520 skip_emulated_instruction(vcpu);
6499 return 1; 6521 return 1;
6500 } 6522 }
6501 if (vmx->nested.current_vmptr != -1ull)
6502 nested_release_vmcs12(vmx);
6503 6523
6524 nested_release_vmcs12(vmx);
6504 vmx->nested.current_vmptr = vmptr; 6525 vmx->nested.current_vmptr = vmptr;
6505 vmx->nested.current_vmcs12 = new_vmcs12; 6526 vmx->nested.current_vmcs12 = new_vmcs12;
6506 vmx->nested.current_vmcs12_page = page; 6527 vmx->nested.current_vmcs12_page = page;
@@ -6571,7 +6592,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
6571 } 6592 }
6572 6593
6573 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); 6594 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
6574 type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); 6595 type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
6575 6596
6576 types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; 6597 types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
6577 6598
@@ -6751,7 +6772,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
6751 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); 6772 unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
6752 int cr = exit_qualification & 15; 6773 int cr = exit_qualification & 15;
6753 int reg = (exit_qualification >> 8) & 15; 6774 int reg = (exit_qualification >> 8) & 15;
6754 unsigned long val = kvm_register_read(vcpu, reg); 6775 unsigned long val = kvm_register_readl(vcpu, reg);
6755 6776
6756 switch ((exit_qualification >> 4) & 3) { 6777 switch ((exit_qualification >> 4) & 3) {
6757 case 0: /* mov to cr */ 6778 case 0: /* mov to cr */
@@ -7112,7 +7133,26 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
7112 if (max_irr == -1) 7133 if (max_irr == -1)
7113 return; 7134 return;
7114 7135
7115 vmx_set_rvi(max_irr); 7136 /*
7137 * If a vmexit is needed, vmx_check_nested_events handles it.
7138 */
7139 if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
7140 return;
7141
7142 if (!is_guest_mode(vcpu)) {
7143 vmx_set_rvi(max_irr);
7144 return;
7145 }
7146
7147 /*
7148 * Fall back to pre-APICv interrupt injection since L2
7149 * is run without virtual interrupt delivery.
7150 */
7151 if (!kvm_event_needs_reinjection(vcpu) &&
7152 vmx_interrupt_allowed(vcpu)) {
7153 kvm_queue_interrupt(vcpu, max_irr, false);
7154 vmx_inject_irq(vcpu);
7155 }
7116} 7156}
7117 7157
7118static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 7158static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
@@ -7520,13 +7560,31 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
7520 vmx_complete_interrupts(vmx); 7560 vmx_complete_interrupts(vmx);
7521} 7561}
7522 7562
7563static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
7564{
7565 struct vcpu_vmx *vmx = to_vmx(vcpu);
7566 int cpu;
7567
7568 if (vmx->loaded_vmcs == &vmx->vmcs01)
7569 return;
7570
7571 cpu = get_cpu();
7572 vmx->loaded_vmcs = &vmx->vmcs01;
7573 vmx_vcpu_put(vcpu);
7574 vmx_vcpu_load(vcpu, cpu);
7575 vcpu->cpu = cpu;
7576 put_cpu();
7577}
7578
7523static void vmx_free_vcpu(struct kvm_vcpu *vcpu) 7579static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
7524{ 7580{
7525 struct vcpu_vmx *vmx = to_vmx(vcpu); 7581 struct vcpu_vmx *vmx = to_vmx(vcpu);
7526 7582
7527 free_vpid(vmx); 7583 free_vpid(vmx);
7528 free_loaded_vmcs(vmx->loaded_vmcs); 7584 leave_guest_mode(vcpu);
7585 vmx_load_vmcs01(vcpu);
7529 free_nested(vmx); 7586 free_nested(vmx);
7587 free_loaded_vmcs(vmx->loaded_vmcs);
7530 kfree(vmx->guest_msrs); 7588 kfree(vmx->guest_msrs);
7531 kvm_vcpu_uninit(vcpu); 7589 kvm_vcpu_uninit(vcpu);
7532 kmem_cache_free(kvm_vcpu_cache, vmx); 7590 kmem_cache_free(kvm_vcpu_cache, vmx);
@@ -7548,6 +7606,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
7548 goto free_vcpu; 7606 goto free_vcpu;
7549 7607
7550 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); 7608 vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
7609 BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
7610 > PAGE_SIZE);
7611
7551 err = -ENOMEM; 7612 err = -ENOMEM;
7552 if (!vmx->guest_msrs) { 7613 if (!vmx->guest_msrs) {
7553 goto uninit_vcpu; 7614 goto uninit_vcpu;
@@ -7836,7 +7897,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7836 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); 7897 vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
7837 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); 7898 vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
7838 7899
7839 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); 7900 if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
7901 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
7902 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
7903 } else {
7904 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
7905 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
7906 }
7840 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 7907 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
7841 vmcs12->vm_entry_intr_info_field); 7908 vmcs12->vm_entry_intr_info_field);
7842 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, 7909 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
@@ -7846,7 +7913,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
7846 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 7913 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
7847 vmcs12->guest_interruptibility_info); 7914 vmcs12->guest_interruptibility_info);
7848 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); 7915 vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
7849 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
7850 vmx_set_rflags(vcpu, vmcs12->guest_rflags); 7916 vmx_set_rflags(vcpu, vmcs12->guest_rflags);
7851 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 7917 vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
7852 vmcs12->guest_pending_dbg_exceptions); 7918 vmcs12->guest_pending_dbg_exceptions);
@@ -8113,14 +8179,14 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
8113 } 8179 }
8114 8180
8115 if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && 8181 if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) &&
8116 !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { 8182 !PAGE_ALIGNED(vmcs12->msr_bitmap)) {
8117 /*TODO: Also verify bits beyond physical address width are 0*/ 8183 /*TODO: Also verify bits beyond physical address width are 0*/
8118 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 8184 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
8119 return 1; 8185 return 1;
8120 } 8186 }
8121 8187
8122 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && 8188 if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
8123 !IS_ALIGNED(vmcs12->apic_access_addr, PAGE_SIZE)) { 8189 !PAGE_ALIGNED(vmcs12->apic_access_addr)) {
8124 /*TODO: Also verify bits beyond physical address width are 0*/ 8190 /*TODO: Also verify bits beyond physical address width are 0*/
8125 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 8191 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
8126 return 1; 8192 return 1;
@@ -8136,15 +8202,18 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
8136 } 8202 }
8137 8203
8138 if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, 8204 if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
8139 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high) || 8205 nested_vmx_true_procbased_ctls_low,
8206 nested_vmx_procbased_ctls_high) ||
8140 !vmx_control_verify(vmcs12->secondary_vm_exec_control, 8207 !vmx_control_verify(vmcs12->secondary_vm_exec_control,
8141 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) || 8208 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) ||
8142 !vmx_control_verify(vmcs12->pin_based_vm_exec_control, 8209 !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
8143 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) || 8210 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) ||
8144 !vmx_control_verify(vmcs12->vm_exit_controls, 8211 !vmx_control_verify(vmcs12->vm_exit_controls,
8145 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) || 8212 nested_vmx_true_exit_ctls_low,
8213 nested_vmx_exit_ctls_high) ||
8146 !vmx_control_verify(vmcs12->vm_entry_controls, 8214 !vmx_control_verify(vmcs12->vm_entry_controls,
8147 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high)) 8215 nested_vmx_true_entry_ctls_low,
8216 nested_vmx_entry_ctls_high))
8148 { 8217 {
8149 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); 8218 nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
8150 return 1; 8219 return 1;
@@ -8221,6 +8290,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
8221 8290
8222 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); 8291 vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
8223 8292
8293 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
8294 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
8295
8224 cpu = get_cpu(); 8296 cpu = get_cpu();
8225 vmx->loaded_vmcs = vmcs02; 8297 vmx->loaded_vmcs = vmcs02;
8226 vmx_vcpu_put(vcpu); 8298 vmx_vcpu_put(vcpu);
@@ -8398,7 +8470,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8398 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); 8470 vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
8399 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); 8471 vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
8400 8472
8401 kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
8402 vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); 8473 vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
8403 vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP); 8474 vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP);
8404 vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); 8475 vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
@@ -8477,9 +8548,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
8477 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | 8548 (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
8478 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); 8549 (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
8479 8550
8551 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) {
8552 kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
8553 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
8554 }
8555
8480 /* TODO: These cannot have changed unless we have MSR bitmaps and 8556 /* TODO: These cannot have changed unless we have MSR bitmaps and
8481 * the relevant bit asks not to trap the change */ 8557 * the relevant bit asks not to trap the change */
8482 vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
8483 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) 8558 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
8484 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); 8559 vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
8485 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) 8560 if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
@@ -8670,7 +8745,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8670 unsigned long exit_qualification) 8745 unsigned long exit_qualification)
8671{ 8746{
8672 struct vcpu_vmx *vmx = to_vmx(vcpu); 8747 struct vcpu_vmx *vmx = to_vmx(vcpu);
8673 int cpu;
8674 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 8748 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
8675 8749
8676 /* trying to cancel vmlaunch/vmresume is a bug */ 8750 /* trying to cancel vmlaunch/vmresume is a bug */
@@ -8695,12 +8769,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
8695 vmcs12->vm_exit_intr_error_code, 8769 vmcs12->vm_exit_intr_error_code,
8696 KVM_ISA_VMX); 8770 KVM_ISA_VMX);
8697 8771
8698 cpu = get_cpu(); 8772 vmx_load_vmcs01(vcpu);
8699 vmx->loaded_vmcs = &vmx->vmcs01;
8700 vmx_vcpu_put(vcpu);
8701 vmx_vcpu_load(vcpu, cpu);
8702 vcpu->cpu = cpu;
8703 put_cpu();
8704 8773
8705 vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); 8774 vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS));
8706 vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); 8775 vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
@@ -8890,7 +8959,7 @@ static int __init vmx_init(void)
8890 8959
8891 rdmsrl_safe(MSR_EFER, &host_efer); 8960 rdmsrl_safe(MSR_EFER, &host_efer);
8892 8961
8893 for (i = 0; i < NR_VMX_MSR; ++i) 8962 for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
8894 kvm_define_shared_msr(i, vmx_msr_index[i]); 8963 kvm_define_shared_msr(i, vmx_msr_index[i]);
8895 8964
8896 vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); 8965 vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ef432f891d30..b86d329b953a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -87,6 +87,7 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
87 87
88static void update_cr8_intercept(struct kvm_vcpu *vcpu); 88static void update_cr8_intercept(struct kvm_vcpu *vcpu);
89static void process_nmi(struct kvm_vcpu *vcpu); 89static void process_nmi(struct kvm_vcpu *vcpu);
90static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
90 91
91struct kvm_x86_ops *kvm_x86_ops; 92struct kvm_x86_ops *kvm_x86_ops;
92EXPORT_SYMBOL_GPL(kvm_x86_ops); 93EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -211,6 +212,7 @@ static void shared_msr_update(unsigned slot, u32 msr)
211 212
212void kvm_define_shared_msr(unsigned slot, u32 msr) 213void kvm_define_shared_msr(unsigned slot, u32 msr)
213{ 214{
215 BUG_ON(slot >= KVM_NR_SHARED_MSRS);
214 if (slot >= shared_msrs_global.nr) 216 if (slot >= shared_msrs_global.nr)
215 shared_msrs_global.nr = slot + 1; 217 shared_msrs_global.nr = slot + 1;
216 shared_msrs_global.msrs[slot] = msr; 218 shared_msrs_global.msrs[slot] = msr;
@@ -310,6 +312,31 @@ static int exception_class(int vector)
310 return EXCPT_BENIGN; 312 return EXCPT_BENIGN;
311} 313}
312 314
315#define EXCPT_FAULT 0
316#define EXCPT_TRAP 1
317#define EXCPT_ABORT 2
318#define EXCPT_INTERRUPT 3
319
320static int exception_type(int vector)
321{
322 unsigned int mask;
323
324 if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
325 return EXCPT_INTERRUPT;
326
327 mask = 1 << vector;
328
329 /* #DB is trap, as instruction watchpoints are handled elsewhere */
330 if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
331 return EXCPT_TRAP;
332
333 if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
334 return EXCPT_ABORT;
335
336 /* Reserved exceptions will result in fault */
337 return EXCPT_FAULT;
338}
339
313static void kvm_multiple_exception(struct kvm_vcpu *vcpu, 340static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
314 unsigned nr, bool has_error, u32 error_code, 341 unsigned nr, bool has_error, u32 error_code,
315 bool reinject) 342 bool reinject)
@@ -758,6 +785,15 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu)
758 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED; 785 vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
759} 786}
760 787
788static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
789{
790 u64 fixed = DR6_FIXED_1;
791
792 if (!guest_cpuid_has_rtm(vcpu))
793 fixed |= DR6_RTM;
794 return fixed;
795}
796
761static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) 797static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
762{ 798{
763 switch (dr) { 799 switch (dr) {
@@ -773,7 +809,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
773 case 6: 809 case 6:
774 if (val & 0xffffffff00000000ULL) 810 if (val & 0xffffffff00000000ULL)
775 return -1; /* #GP */ 811 return -1; /* #GP */
776 vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; 812 vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
777 kvm_update_dr6(vcpu); 813 kvm_update_dr6(vcpu);
778 break; 814 break;
779 case 5: 815 case 5:
@@ -1215,6 +1251,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1215 unsigned long flags; 1251 unsigned long flags;
1216 s64 usdiff; 1252 s64 usdiff;
1217 bool matched; 1253 bool matched;
1254 bool already_matched;
1218 u64 data = msr->data; 1255 u64 data = msr->data;
1219 1256
1220 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); 1257 raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
@@ -1279,6 +1316,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1279 pr_debug("kvm: adjusted tsc offset by %llu\n", delta); 1316 pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
1280 } 1317 }
1281 matched = true; 1318 matched = true;
1319 already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
1282 } else { 1320 } else {
1283 /* 1321 /*
1284 * We split periods of matched TSC writes into generations. 1322 * We split periods of matched TSC writes into generations.
@@ -1294,7 +1332,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1294 kvm->arch.cur_tsc_write = data; 1332 kvm->arch.cur_tsc_write = data;
1295 kvm->arch.cur_tsc_offset = offset; 1333 kvm->arch.cur_tsc_offset = offset;
1296 matched = false; 1334 matched = false;
1297 pr_debug("kvm: new tsc generation %u, clock %llu\n", 1335 pr_debug("kvm: new tsc generation %llu, clock %llu\n",
1298 kvm->arch.cur_tsc_generation, data); 1336 kvm->arch.cur_tsc_generation, data);
1299 } 1337 }
1300 1338
@@ -1319,10 +1357,11 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
1319 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); 1357 raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
1320 1358
1321 spin_lock(&kvm->arch.pvclock_gtod_sync_lock); 1359 spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
1322 if (matched) 1360 if (!matched) {
1323 kvm->arch.nr_vcpus_matched_tsc++;
1324 else
1325 kvm->arch.nr_vcpus_matched_tsc = 0; 1361 kvm->arch.nr_vcpus_matched_tsc = 0;
1362 } else if (!already_matched) {
1363 kvm->arch.nr_vcpus_matched_tsc++;
1364 }
1326 1365
1327 kvm_track_tsc_matching(vcpu); 1366 kvm_track_tsc_matching(vcpu);
1328 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock); 1367 spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
@@ -2032,6 +2071,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
2032 data &= ~(u64)0x40; /* ignore flush filter disable */ 2071 data &= ~(u64)0x40; /* ignore flush filter disable */
2033 data &= ~(u64)0x100; /* ignore ignne emulation enable */ 2072 data &= ~(u64)0x100; /* ignore ignne emulation enable */
2034 data &= ~(u64)0x8; /* ignore TLB cache disable */ 2073 data &= ~(u64)0x8; /* ignore TLB cache disable */
2074 data &= ~(u64)0x40000; /* ignore Mc status write enable */
2035 if (data != 0) { 2075 if (data != 0) {
2036 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", 2076 vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
2037 data); 2077 data);
@@ -2974,9 +3014,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
2974 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; 3014 vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
2975 events->interrupt.nr = vcpu->arch.interrupt.nr; 3015 events->interrupt.nr = vcpu->arch.interrupt.nr;
2976 events->interrupt.soft = 0; 3016 events->interrupt.soft = 0;
2977 events->interrupt.shadow = 3017 events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
2978 kvm_x86_ops->get_interrupt_shadow(vcpu,
2979 KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
2980 3018
2981 events->nmi.injected = vcpu->arch.nmi_injected; 3019 events->nmi.injected = vcpu->arch.nmi_injected;
2982 events->nmi.pending = vcpu->arch.nmi_pending != 0; 3020 events->nmi.pending = vcpu->arch.nmi_pending != 0;
@@ -4082,7 +4120,8 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
4082 4120
4083 if (gpa == UNMAPPED_GVA) 4121 if (gpa == UNMAPPED_GVA)
4084 return X86EMUL_PROPAGATE_FAULT; 4122 return X86EMUL_PROPAGATE_FAULT;
4085 ret = kvm_read_guest(vcpu->kvm, gpa, data, toread); 4123 ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, data,
4124 offset, toread);
4086 if (ret < 0) { 4125 if (ret < 0) {
4087 r = X86EMUL_IO_NEEDED; 4126 r = X86EMUL_IO_NEEDED;
4088 goto out; 4127 goto out;
@@ -4103,10 +4142,24 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
4103{ 4142{
4104 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 4143 struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
4105 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; 4144 u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
4145 unsigned offset;
4146 int ret;
4106 4147
4107 return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 4148 /* Inline kvm_read_guest_virt_helper for speed. */
4108 access | PFERR_FETCH_MASK, 4149 gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
4109 exception); 4150 exception);
4151 if (unlikely(gpa == UNMAPPED_GVA))
4152 return X86EMUL_PROPAGATE_FAULT;
4153
4154 offset = addr & (PAGE_SIZE-1);
4155 if (WARN_ON(offset + bytes > PAGE_SIZE))
4156 bytes = (unsigned)PAGE_SIZE - offset;
4157 ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, val,
4158 offset, bytes);
4159 if (unlikely(ret < 0))
4160 return X86EMUL_IO_NEEDED;
4161
4162 return X86EMUL_CONTINUE;
4110} 4163}
4111 4164
4112int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt, 4165int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
@@ -4730,7 +4783,6 @@ static void emulator_set_segment(struct x86_emulate_ctxt *ctxt, u16 selector,
4730 if (desc->g) 4783 if (desc->g)
4731 var.limit = (var.limit << 12) | 0xfff; 4784 var.limit = (var.limit << 12) | 0xfff;
4732 var.type = desc->type; 4785 var.type = desc->type;
4733 var.present = desc->p;
4734 var.dpl = desc->dpl; 4786 var.dpl = desc->dpl;
4735 var.db = desc->d; 4787 var.db = desc->d;
4736 var.s = desc->s; 4788 var.s = desc->s;
@@ -4762,6 +4814,12 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
4762 return kvm_set_msr(emul_to_vcpu(ctxt), &msr); 4814 return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
4763} 4815}
4764 4816
4817static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
4818 u32 pmc)
4819{
4820 return kvm_pmu_check_pmc(emul_to_vcpu(ctxt), pmc);
4821}
4822
4765static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, 4823static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
4766 u32 pmc, u64 *pdata) 4824 u32 pmc, u64 *pdata)
4767{ 4825{
@@ -4838,6 +4896,7 @@ static const struct x86_emulate_ops emulate_ops = {
4838 .set_dr = emulator_set_dr, 4896 .set_dr = emulator_set_dr,
4839 .set_msr = emulator_set_msr, 4897 .set_msr = emulator_set_msr,
4840 .get_msr = emulator_get_msr, 4898 .get_msr = emulator_get_msr,
4899 .check_pmc = emulator_check_pmc,
4841 .read_pmc = emulator_read_pmc, 4900 .read_pmc = emulator_read_pmc,
4842 .halt = emulator_halt, 4901 .halt = emulator_halt,
4843 .wbinvd = emulator_wbinvd, 4902 .wbinvd = emulator_wbinvd,
@@ -4850,7 +4909,7 @@ static const struct x86_emulate_ops emulate_ops = {
4850 4909
4851static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) 4910static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4852{ 4911{
4853 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); 4912 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
4854 /* 4913 /*
4855 * an sti; sti; sequence only disable interrupts for the first 4914 * an sti; sti; sequence only disable interrupts for the first
4856 * instruction. So, if the last instruction, be it emulated or 4915 * instruction. So, if the last instruction, be it emulated or
@@ -4858,8 +4917,13 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
4858 * means that the last instruction is an sti. We should not 4917 * means that the last instruction is an sti. We should not
4859 * leave the flag on in this case. The same goes for mov ss 4918 * leave the flag on in this case. The same goes for mov ss
4860 */ 4919 */
4861 if (!(int_shadow & mask)) 4920 if (int_shadow & mask)
4921 mask = 0;
4922 if (unlikely(int_shadow || mask)) {
4862 kvm_x86_ops->set_interrupt_shadow(vcpu, mask); 4923 kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
4924 if (!mask)
4925 kvm_make_request(KVM_REQ_EVENT, vcpu);
4926 }
4863} 4927}
4864 4928
4865static void inject_emulated_exception(struct kvm_vcpu *vcpu) 4929static void inject_emulated_exception(struct kvm_vcpu *vcpu)
@@ -4874,19 +4938,6 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
4874 kvm_queue_exception(vcpu, ctxt->exception.vector); 4938 kvm_queue_exception(vcpu, ctxt->exception.vector);
4875} 4939}
4876 4940
4877static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
4878{
4879 memset(&ctxt->opcode_len, 0,
4880 (void *)&ctxt->_regs - (void *)&ctxt->opcode_len);
4881
4882 ctxt->fetch.start = 0;
4883 ctxt->fetch.end = 0;
4884 ctxt->io_read.pos = 0;
4885 ctxt->io_read.end = 0;
4886 ctxt->mem_read.pos = 0;
4887 ctxt->mem_read.end = 0;
4888}
4889
4890static void init_emulate_ctxt(struct kvm_vcpu *vcpu) 4941static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
4891{ 4942{
4892 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; 4943 struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
@@ -5085,23 +5136,22 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
5085 return dr6; 5136 return dr6;
5086} 5137}
5087 5138
5088static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r) 5139static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
5089{ 5140{
5090 struct kvm_run *kvm_run = vcpu->run; 5141 struct kvm_run *kvm_run = vcpu->run;
5091 5142
5092 /* 5143 /*
5093 * Use the "raw" value to see if TF was passed to the processor. 5144 * rflags is the old, "raw" value of the flags. The new value has
5094 * Note that the new value of the flags has not been saved yet. 5145 * not been saved yet.
5095 * 5146 *
5096 * This is correct even for TF set by the guest, because "the 5147 * This is correct even for TF set by the guest, because "the
5097 * processor will not generate this exception after the instruction 5148 * processor will not generate this exception after the instruction
5098 * that sets the TF flag". 5149 * that sets the TF flag".
5099 */ 5150 */
5100 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
5101
5102 if (unlikely(rflags & X86_EFLAGS_TF)) { 5151 if (unlikely(rflags & X86_EFLAGS_TF)) {
5103 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { 5152 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
5104 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1; 5153 kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
5154 DR6_RTM;
5105 kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; 5155 kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
5106 kvm_run->debug.arch.exception = DB_VECTOR; 5156 kvm_run->debug.arch.exception = DB_VECTOR;
5107 kvm_run->exit_reason = KVM_EXIT_DEBUG; 5157 kvm_run->exit_reason = KVM_EXIT_DEBUG;
@@ -5114,7 +5164,7 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r)
5114 * cleared by the processor". 5164 * cleared by the processor".
5115 */ 5165 */
5116 vcpu->arch.dr6 &= ~15; 5166 vcpu->arch.dr6 &= ~15;
5117 vcpu->arch.dr6 |= DR6_BS; 5167 vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
5118 kvm_queue_exception(vcpu, DB_VECTOR); 5168 kvm_queue_exception(vcpu, DB_VECTOR);
5119 } 5169 }
5120 } 5170 }
@@ -5133,7 +5183,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
5133 vcpu->arch.eff_db); 5183 vcpu->arch.eff_db);
5134 5184
5135 if (dr6 != 0) { 5185 if (dr6 != 0) {
5136 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; 5186 kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
5137 kvm_run->debug.arch.pc = kvm_rip_read(vcpu) + 5187 kvm_run->debug.arch.pc = kvm_rip_read(vcpu) +
5138 get_segment_base(vcpu, VCPU_SREG_CS); 5188 get_segment_base(vcpu, VCPU_SREG_CS);
5139 5189
@@ -5144,14 +5194,15 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
5144 } 5194 }
5145 } 5195 }
5146 5196
5147 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK)) { 5197 if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
5198 !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
5148 dr6 = kvm_vcpu_check_hw_bp(eip, 0, 5199 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
5149 vcpu->arch.dr7, 5200 vcpu->arch.dr7,
5150 vcpu->arch.db); 5201 vcpu->arch.db);
5151 5202
5152 if (dr6 != 0) { 5203 if (dr6 != 0) {
5153 vcpu->arch.dr6 &= ~15; 5204 vcpu->arch.dr6 &= ~15;
5154 vcpu->arch.dr6 |= dr6; 5205 vcpu->arch.dr6 |= dr6 | DR6_RTM;
5155 kvm_queue_exception(vcpu, DB_VECTOR); 5206 kvm_queue_exception(vcpu, DB_VECTOR);
5156 *r = EMULATE_DONE; 5207 *r = EMULATE_DONE;
5157 return true; 5208 return true;
@@ -5215,6 +5266,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
5215 5266
5216 if (emulation_type & EMULTYPE_SKIP) { 5267 if (emulation_type & EMULTYPE_SKIP) {
5217 kvm_rip_write(vcpu, ctxt->_eip); 5268 kvm_rip_write(vcpu, ctxt->_eip);
5269 if (ctxt->eflags & X86_EFLAGS_RF)
5270 kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
5218 return EMULATE_DONE; 5271 return EMULATE_DONE;
5219 } 5272 }
5220 5273
@@ -5265,13 +5318,22 @@ restart:
5265 r = EMULATE_DONE; 5318 r = EMULATE_DONE;
5266 5319
5267 if (writeback) { 5320 if (writeback) {
5321 unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
5268 toggle_interruptibility(vcpu, ctxt->interruptibility); 5322 toggle_interruptibility(vcpu, ctxt->interruptibility);
5269 kvm_make_request(KVM_REQ_EVENT, vcpu);
5270 vcpu->arch.emulate_regs_need_sync_to_vcpu = false; 5323 vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
5271 kvm_rip_write(vcpu, ctxt->eip); 5324 kvm_rip_write(vcpu, ctxt->eip);
5272 if (r == EMULATE_DONE) 5325 if (r == EMULATE_DONE)
5273 kvm_vcpu_check_singlestep(vcpu, &r); 5326 kvm_vcpu_check_singlestep(vcpu, rflags, &r);
5274 kvm_set_rflags(vcpu, ctxt->eflags); 5327 __kvm_set_rflags(vcpu, ctxt->eflags);
5328
5329 /*
5330 * For STI, interrupts are shadowed; so KVM_REQ_EVENT will
5331 * do nothing, and it will be requested again as soon as
5332 * the shadow expires. But we still need to check here,
5333 * because POPF has no interrupt shadow.
5334 */
5335 if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
5336 kvm_make_request(KVM_REQ_EVENT, vcpu);
5275 } else 5337 } else
5276 vcpu->arch.emulate_regs_need_sync_to_vcpu = true; 5338 vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
5277 5339
@@ -5662,7 +5724,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
5662 u64 param, ingpa, outgpa, ret; 5724 u64 param, ingpa, outgpa, ret;
5663 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; 5725 uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
5664 bool fast, longmode; 5726 bool fast, longmode;
5665 int cs_db, cs_l;
5666 5727
5667 /* 5728 /*
5668 * hypercall generates UD from non zero cpl and real mode 5729 * hypercall generates UD from non zero cpl and real mode
@@ -5673,8 +5734,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
5673 return 0; 5734 return 0;
5674 } 5735 }
5675 5736
5676 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); 5737 longmode = is_64_bit_mode(vcpu);
5677 longmode = is_long_mode(vcpu) && cs_l == 1;
5678 5738
5679 if (!longmode) { 5739 if (!longmode) {
5680 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | 5740 param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
@@ -5739,7 +5799,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
5739int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) 5799int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5740{ 5800{
5741 unsigned long nr, a0, a1, a2, a3, ret; 5801 unsigned long nr, a0, a1, a2, a3, ret;
5742 int r = 1; 5802 int op_64_bit, r = 1;
5743 5803
5744 if (kvm_hv_hypercall_enabled(vcpu->kvm)) 5804 if (kvm_hv_hypercall_enabled(vcpu->kvm))
5745 return kvm_hv_hypercall(vcpu); 5805 return kvm_hv_hypercall(vcpu);
@@ -5752,7 +5812,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5752 5812
5753 trace_kvm_hypercall(nr, a0, a1, a2, a3); 5813 trace_kvm_hypercall(nr, a0, a1, a2, a3);
5754 5814
5755 if (!is_long_mode(vcpu)) { 5815 op_64_bit = is_64_bit_mode(vcpu);
5816 if (!op_64_bit) {
5756 nr &= 0xFFFFFFFF; 5817 nr &= 0xFFFFFFFF;
5757 a0 &= 0xFFFFFFFF; 5818 a0 &= 0xFFFFFFFF;
5758 a1 &= 0xFFFFFFFF; 5819 a1 &= 0xFFFFFFFF;
@@ -5778,6 +5839,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
5778 break; 5839 break;
5779 } 5840 }
5780out: 5841out:
5842 if (!op_64_bit)
5843 ret = (u32)ret;
5781 kvm_register_write(vcpu, VCPU_REGS_RAX, ret); 5844 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
5782 ++vcpu->stat.hypercalls; 5845 ++vcpu->stat.hypercalls;
5783 return r; 5846 return r;
@@ -5856,6 +5919,11 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
5856 trace_kvm_inj_exception(vcpu->arch.exception.nr, 5919 trace_kvm_inj_exception(vcpu->arch.exception.nr,
5857 vcpu->arch.exception.has_error_code, 5920 vcpu->arch.exception.has_error_code,
5858 vcpu->arch.exception.error_code); 5921 vcpu->arch.exception.error_code);
5922
5923 if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
5924 __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
5925 X86_EFLAGS_RF);
5926
5859 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, 5927 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
5860 vcpu->arch.exception.has_error_code, 5928 vcpu->arch.exception.has_error_code,
5861 vcpu->arch.exception.error_code, 5929 vcpu->arch.exception.error_code,
@@ -6847,9 +6915,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
6847 atomic_set(&vcpu->arch.nmi_queued, 0); 6915 atomic_set(&vcpu->arch.nmi_queued, 0);
6848 vcpu->arch.nmi_pending = 0; 6916 vcpu->arch.nmi_pending = 0;
6849 vcpu->arch.nmi_injected = false; 6917 vcpu->arch.nmi_injected = false;
6918 kvm_clear_interrupt_queue(vcpu);
6919 kvm_clear_exception_queue(vcpu);
6850 6920
6851 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); 6921 memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
6852 vcpu->arch.dr6 = DR6_FIXED_1; 6922 vcpu->arch.dr6 = DR6_INIT;
6853 kvm_update_dr6(vcpu); 6923 kvm_update_dr6(vcpu);
6854 vcpu->arch.dr7 = DR7_FIXED_1; 6924 vcpu->arch.dr7 = DR7_FIXED_1;
6855 kvm_update_dr7(vcpu); 6925 kvm_update_dr7(vcpu);
@@ -7405,12 +7475,17 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
7405} 7475}
7406EXPORT_SYMBOL_GPL(kvm_get_rflags); 7476EXPORT_SYMBOL_GPL(kvm_get_rflags);
7407 7477
7408void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 7478static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
7409{ 7479{
7410 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && 7480 if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
7411 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) 7481 kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
7412 rflags |= X86_EFLAGS_TF; 7482 rflags |= X86_EFLAGS_TF;
7413 kvm_x86_ops->set_rflags(vcpu, rflags); 7483 kvm_x86_ops->set_rflags(vcpu, rflags);
7484}
7485
7486void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
7487{
7488 __kvm_set_rflags(vcpu, rflags);
7414 kvm_make_request(KVM_REQ_EVENT, vcpu); 7489 kvm_make_request(KVM_REQ_EVENT, vcpu);
7415} 7490}
7416EXPORT_SYMBOL_GPL(kvm_set_rflags); 7491EXPORT_SYMBOL_GPL(kvm_set_rflags);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 8c97bac9a895..306a1b77581f 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -47,6 +47,16 @@ static inline int is_long_mode(struct kvm_vcpu *vcpu)
47#endif 47#endif
48} 48}
49 49
50static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu)
51{
52 int cs_db, cs_l;
53
54 if (!is_long_mode(vcpu))
55 return false;
56 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
57 return cs_l;
58}
59
50static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) 60static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
51{ 61{
52 return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; 62 return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
@@ -108,6 +118,23 @@ static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
108 return false; 118 return false;
109} 119}
110 120
121static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu,
122 enum kvm_reg reg)
123{
124 unsigned long val = kvm_register_read(vcpu, reg);
125
126 return is_64_bit_mode(vcpu) ? val : (u32)val;
127}
128
129static inline void kvm_register_writel(struct kvm_vcpu *vcpu,
130 enum kvm_reg reg,
131 unsigned long val)
132{
133 if (!is_64_bit_mode(vcpu))
134 val = (u32)val;
135 return kvm_register_write(vcpu, reg, val);
136}
137
111void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); 138void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
112void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); 139void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
113int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); 140int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);