aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/emulate.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-13 12:55:09 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-13 12:55:09 -0500
commitb9085bcbf5f43adf60533f9b635b2e7faeed0fe9 (patch)
treee397abf5682a45c096e75b3d0fa99c8e228425fc /arch/x86/kvm/emulate.c
parentc7d7b98671552abade78834c522b7308bda73c0d (diff)
parent6557bada461afeaa920a189fae2cff7c8fdce39f (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM update from Paolo Bonzini: "Fairly small update, but there are some interesting new features. Common: Optional support for adding a small amount of polling on each HLT instruction executed in the guest (or equivalent for other architectures). This can improve latency up to 50% on some scenarios (e.g. O_DSYNC writes or TCP_RR netperf tests). This also has to be enabled manually for now, but the plan is to auto-tune this in the future. ARM/ARM64: The highlights are support for GICv3 emulation and dirty page tracking s390: Several optimizations and bugfixes. Also a first: a feature exposed by KVM (UUID and long guest name in /proc/sysinfo) before it is available in IBM's hypervisor! :) MIPS: Bugfixes. x86: Support for PML (page modification logging, a new feature in Broadwell Xeons that speeds up dirty page tracking), nested virtualization improvements (nested APICv---a nice optimization), usual round of emulation fixes. There is also a new option to reduce latency of the TSC deadline timer in the guest; this needs to be tuned manually. Some commits are common between this pull and Catalin's; I see you have already included his tree. Powerpc: Nothing yet. The KVM/PPC changes will come in through the PPC maintainers, because I haven't received them yet and I might end up being offline for some part of next week" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (130 commits) KVM: ia64: drop kvm.h from installed user headers KVM: x86: fix build with !CONFIG_SMP KVM: x86: emulate: correct page fault error code for NoWrite instructions KVM: Disable compat ioctl for s390 KVM: s390: add cpu model support KVM: s390: use facilities and cpu_id per KVM KVM: s390/CPACF: Choose crypto control block format s390/kernel: Update /proc/sysinfo file with Extended Name and UUID KVM: s390: reenable LPP facility KVM: s390: floating irqs: fix user triggerable endless loop kvm: add halt_poll_ns module parameter kvm: remove KVM_MMIO_SIZE KVM: MIPS: Don't leak FPU/DSP to guest KVM: MIPS: Disable HTW while in guest KVM: nVMX: Enable nested posted interrupt processing KVM: nVMX: Enable nested virtual interrupt delivery KVM: nVMX: Enable nested apic register virtualization KVM: nVMX: Make nested control MSRs per-cpu KVM: nVMX: Enable nested virtualize x2apic mode KVM: nVMX: Prepare for using hardware MSR bitmap ...
Diffstat (limited to 'arch/x86/kvm/emulate.c')
-rw-r--r--arch/x86/kvm/emulate.c230
1 files changed, 156 insertions, 74 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index de12c1d379f1..e0b794a84c35 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -86,6 +86,7 @@
86#define DstAcc (OpAcc << DstShift) 86#define DstAcc (OpAcc << DstShift)
87#define DstDI (OpDI << DstShift) 87#define DstDI (OpDI << DstShift)
88#define DstMem64 (OpMem64 << DstShift) 88#define DstMem64 (OpMem64 << DstShift)
89#define DstMem16 (OpMem16 << DstShift)
89#define DstImmUByte (OpImmUByte << DstShift) 90#define DstImmUByte (OpImmUByte << DstShift)
90#define DstDX (OpDX << DstShift) 91#define DstDX (OpDX << DstShift)
91#define DstAccLo (OpAccLo << DstShift) 92#define DstAccLo (OpAccLo << DstShift)
@@ -124,6 +125,7 @@
124#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ 125#define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */
125#define Escape (5<<15) /* Escape to coprocessor instruction */ 126#define Escape (5<<15) /* Escape to coprocessor instruction */
126#define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */ 127#define InstrDual (6<<15) /* Alternate instruction decoding of mod == 3 */
128#define ModeDual (7<<15) /* Different instruction for 32/64 bit */
127#define Sse (1<<18) /* SSE Vector instruction */ 129#define Sse (1<<18) /* SSE Vector instruction */
128/* Generic ModRM decode. */ 130/* Generic ModRM decode. */
129#define ModRM (1<<19) 131#define ModRM (1<<19)
@@ -165,10 +167,10 @@
165#define NoMod ((u64)1 << 47) /* Mod field is ignored */ 167#define NoMod ((u64)1 << 47) /* Mod field is ignored */
166#define Intercept ((u64)1 << 48) /* Has valid intercept field */ 168#define Intercept ((u64)1 << 48) /* Has valid intercept field */
167#define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */ 169#define CheckPerm ((u64)1 << 49) /* Has valid check_perm field */
168#define NoBigReal ((u64)1 << 50) /* No big real mode */
169#define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */ 170#define PrivUD ((u64)1 << 51) /* #UD instead of #GP on CPL > 0 */
170#define NearBranch ((u64)1 << 52) /* Near branches */ 171#define NearBranch ((u64)1 << 52) /* Near branches */
171#define No16 ((u64)1 << 53) /* No 16 bit operand */ 172#define No16 ((u64)1 << 53) /* No 16 bit operand */
173#define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */
172 174
173#define DstXacc (DstAccLo | SrcAccHi | SrcWrite) 175#define DstXacc (DstAccLo | SrcAccHi | SrcWrite)
174 176
@@ -213,6 +215,7 @@ struct opcode {
213 const struct gprefix *gprefix; 215 const struct gprefix *gprefix;
214 const struct escape *esc; 216 const struct escape *esc;
215 const struct instr_dual *idual; 217 const struct instr_dual *idual;
218 const struct mode_dual *mdual;
216 void (*fastop)(struct fastop *fake); 219 void (*fastop)(struct fastop *fake);
217 } u; 220 } u;
218 int (*check_perm)(struct x86_emulate_ctxt *ctxt); 221 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
@@ -240,6 +243,11 @@ struct instr_dual {
240 struct opcode mod3; 243 struct opcode mod3;
241}; 244};
242 245
246struct mode_dual {
247 struct opcode mode32;
248 struct opcode mode64;
249};
250
243/* EFLAGS bit definitions. */ 251/* EFLAGS bit definitions. */
244#define EFLG_ID (1<<21) 252#define EFLG_ID (1<<21)
245#define EFLG_VIP (1<<20) 253#define EFLG_VIP (1<<20)
@@ -262,6 +270,13 @@ struct instr_dual {
262#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a 270#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
263#define EFLG_RESERVED_ONE_MASK 2 271#define EFLG_RESERVED_ONE_MASK 2
264 272
273enum x86_transfer_type {
274 X86_TRANSFER_NONE,
275 X86_TRANSFER_CALL_JMP,
276 X86_TRANSFER_RET,
277 X86_TRANSFER_TASK_SWITCH,
278};
279
265static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr) 280static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
266{ 281{
267 if (!(ctxt->regs_valid & (1 << nr))) { 282 if (!(ctxt->regs_valid & (1 << nr))) {
@@ -669,9 +684,13 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
669 } 684 }
670 if (addr.ea > lim) 685 if (addr.ea > lim)
671 goto bad; 686 goto bad;
672 *max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea); 687 if (lim == 0xffffffff)
673 if (size > *max_size) 688 *max_size = ~0u;
674 goto bad; 689 else {
690 *max_size = (u64)lim + 1 - addr.ea;
691 if (size > *max_size)
692 goto bad;
693 }
675 la &= (u32)-1; 694 la &= (u32)-1;
676 break; 695 break;
677 } 696 }
@@ -722,19 +741,26 @@ static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
722 const struct desc_struct *cs_desc) 741 const struct desc_struct *cs_desc)
723{ 742{
724 enum x86emul_mode mode = ctxt->mode; 743 enum x86emul_mode mode = ctxt->mode;
744 int rc;
725 745
726#ifdef CONFIG_X86_64 746#ifdef CONFIG_X86_64
727 if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) { 747 if (ctxt->mode >= X86EMUL_MODE_PROT16) {
728 u64 efer = 0; 748 if (cs_desc->l) {
749 u64 efer = 0;
729 750
730 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); 751 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
731 if (efer & EFER_LMA) 752 if (efer & EFER_LMA)
732 mode = X86EMUL_MODE_PROT64; 753 mode = X86EMUL_MODE_PROT64;
754 } else
755 mode = X86EMUL_MODE_PROT32; /* temporary value */
733 } 756 }
734#endif 757#endif
735 if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32) 758 if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
736 mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 759 mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
737 return assign_eip(ctxt, dst, mode); 760 rc = assign_eip(ctxt, dst, mode);
761 if (rc == X86EMUL_CONTINUE)
762 ctxt->mode = mode;
763 return rc;
738} 764}
739 765
740static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) 766static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
@@ -1057,8 +1083,6 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
1057 asm volatile("fnstcw %0": "+m"(fcw)); 1083 asm volatile("fnstcw %0": "+m"(fcw));
1058 ctxt->ops->put_fpu(ctxt); 1084 ctxt->ops->put_fpu(ctxt);
1059 1085
1060 /* force 2 byte destination */
1061 ctxt->dst.bytes = 2;
1062 ctxt->dst.val = fcw; 1086 ctxt->dst.val = fcw;
1063 1087
1064 return X86EMUL_CONTINUE; 1088 return X86EMUL_CONTINUE;
@@ -1075,8 +1099,6 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
1075 asm volatile("fnstsw %0": "+m"(fsw)); 1099 asm volatile("fnstsw %0": "+m"(fsw));
1076 ctxt->ops->put_fpu(ctxt); 1100 ctxt->ops->put_fpu(ctxt);
1077 1101
1078 /* force 2 byte destination */
1079 ctxt->dst.bytes = 2;
1080 ctxt->dst.val = fsw; 1102 ctxt->dst.val = fsw;
1081 1103
1082 return X86EMUL_CONTINUE; 1104 return X86EMUL_CONTINUE;
@@ -1223,6 +1245,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
1223 else { 1245 else {
1224 modrm_ea += reg_read(ctxt, base_reg); 1246 modrm_ea += reg_read(ctxt, base_reg);
1225 adjust_modrm_seg(ctxt, base_reg); 1247 adjust_modrm_seg(ctxt, base_reg);
1248 /* Increment ESP on POP [ESP] */
1249 if ((ctxt->d & IncSP) &&
1250 base_reg == VCPU_REGS_RSP)
1251 modrm_ea += ctxt->op_bytes;
1226 } 1252 }
1227 if (index_reg != 4) 1253 if (index_reg != 4)
1228 modrm_ea += reg_read(ctxt, index_reg) << scale; 1254 modrm_ea += reg_read(ctxt, index_reg) << scale;
@@ -1435,10 +1461,8 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1435 ops->get_gdt(ctxt, dt); 1461 ops->get_gdt(ctxt, dt);
1436} 1462}
1437 1463
1438/* allowed just for 8 bytes segments */ 1464static int get_descriptor_ptr(struct x86_emulate_ctxt *ctxt,
1439static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, 1465 u16 selector, ulong *desc_addr_p)
1440 u16 selector, struct desc_struct *desc,
1441 ulong *desc_addr_p)
1442{ 1466{
1443 struct desc_ptr dt; 1467 struct desc_ptr dt;
1444 u16 index = selector >> 3; 1468 u16 index = selector >> 3;
@@ -1449,8 +1473,34 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1449 if (dt.size < index * 8 + 7) 1473 if (dt.size < index * 8 + 7)
1450 return emulate_gp(ctxt, selector & 0xfffc); 1474 return emulate_gp(ctxt, selector & 0xfffc);
1451 1475
1452 *desc_addr_p = addr = dt.address + index * 8; 1476 addr = dt.address + index * 8;
1453 return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc, 1477
1478#ifdef CONFIG_X86_64
1479 if (addr >> 32 != 0) {
1480 u64 efer = 0;
1481
1482 ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
1483 if (!(efer & EFER_LMA))
1484 addr &= (u32)-1;
1485 }
1486#endif
1487
1488 *desc_addr_p = addr;
1489 return X86EMUL_CONTINUE;
1490}
1491
1492/* allowed just for 8 bytes segments */
1493static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1494 u16 selector, struct desc_struct *desc,
1495 ulong *desc_addr_p)
1496{
1497 int rc;
1498
1499 rc = get_descriptor_ptr(ctxt, selector, desc_addr_p);
1500 if (rc != X86EMUL_CONTINUE)
1501 return rc;
1502
1503 return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc),
1454 &ctxt->exception); 1504 &ctxt->exception);
1455} 1505}
1456 1506
@@ -1458,16 +1508,13 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1458static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, 1508static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1459 u16 selector, struct desc_struct *desc) 1509 u16 selector, struct desc_struct *desc)
1460{ 1510{
1461 struct desc_ptr dt; 1511 int rc;
1462 u16 index = selector >> 3;
1463 ulong addr; 1512 ulong addr;
1464 1513
1465 get_descriptor_table_ptr(ctxt, selector, &dt); 1514 rc = get_descriptor_ptr(ctxt, selector, &addr);
1466 1515 if (rc != X86EMUL_CONTINUE)
1467 if (dt.size < index * 8 + 7) 1516 return rc;
1468 return emulate_gp(ctxt, selector & 0xfffc);
1469 1517
1470 addr = dt.address + index * 8;
1471 return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc, 1518 return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
1472 &ctxt->exception); 1519 &ctxt->exception);
1473} 1520}
@@ -1475,7 +1522,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1475/* Does not support long mode */ 1522/* Does not support long mode */
1476static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, 1523static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1477 u16 selector, int seg, u8 cpl, 1524 u16 selector, int seg, u8 cpl,
1478 bool in_task_switch, 1525 enum x86_transfer_type transfer,
1479 struct desc_struct *desc) 1526 struct desc_struct *desc)
1480{ 1527{
1481 struct desc_struct seg_desc, old_desc; 1528 struct desc_struct seg_desc, old_desc;
@@ -1529,11 +1576,15 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1529 return ret; 1576 return ret;
1530 1577
1531 err_code = selector & 0xfffc; 1578 err_code = selector & 0xfffc;
1532 err_vec = in_task_switch ? TS_VECTOR : GP_VECTOR; 1579 err_vec = (transfer == X86_TRANSFER_TASK_SWITCH) ? TS_VECTOR :
1580 GP_VECTOR;
1533 1581
1534 /* can't load system descriptor into segment selector */ 1582 /* can't load system descriptor into segment selector */
1535 if (seg <= VCPU_SREG_GS && !seg_desc.s) 1583 if (seg <= VCPU_SREG_GS && !seg_desc.s) {
1584 if (transfer == X86_TRANSFER_CALL_JMP)
1585 return X86EMUL_UNHANDLEABLE;
1536 goto exception; 1586 goto exception;
1587 }
1537 1588
1538 if (!seg_desc.p) { 1589 if (!seg_desc.p) {
1539 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; 1590 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
@@ -1605,10 +1656,13 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1605 1656
1606 if (seg_desc.s) { 1657 if (seg_desc.s) {
1607 /* mark segment as accessed */ 1658 /* mark segment as accessed */
1608 seg_desc.type |= 1; 1659 if (!(seg_desc.type & 1)) {
1609 ret = write_segment_descriptor(ctxt, selector, &seg_desc); 1660 seg_desc.type |= 1;
1610 if (ret != X86EMUL_CONTINUE) 1661 ret = write_segment_descriptor(ctxt, selector,
1611 return ret; 1662 &seg_desc);
1663 if (ret != X86EMUL_CONTINUE)
1664 return ret;
1665 }
1612 } else if (ctxt->mode == X86EMUL_MODE_PROT64) { 1666 } else if (ctxt->mode == X86EMUL_MODE_PROT64) {
1613 ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3, 1667 ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
1614 sizeof(base3), &ctxt->exception); 1668 sizeof(base3), &ctxt->exception);
@@ -1631,7 +1685,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1631 u16 selector, int seg) 1685 u16 selector, int seg)
1632{ 1686{
1633 u8 cpl = ctxt->ops->cpl(ctxt); 1687 u8 cpl = ctxt->ops->cpl(ctxt);
1634 return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL); 1688 return __load_segment_descriptor(ctxt, selector, seg, cpl,
1689 X86_TRANSFER_NONE, NULL);
1635} 1690}
1636 1691
1637static void write_register_operand(struct operand *op) 1692static void write_register_operand(struct operand *op)
@@ -1828,12 +1883,14 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
1828 unsigned long selector; 1883 unsigned long selector;
1829 int rc; 1884 int rc;
1830 1885
1831 rc = emulate_pop(ctxt, &selector, ctxt->op_bytes); 1886 rc = emulate_pop(ctxt, &selector, 2);
1832 if (rc != X86EMUL_CONTINUE) 1887 if (rc != X86EMUL_CONTINUE)
1833 return rc; 1888 return rc;
1834 1889
1835 if (ctxt->modrm_reg == VCPU_SREG_SS) 1890 if (ctxt->modrm_reg == VCPU_SREG_SS)
1836 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; 1891 ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
1892 if (ctxt->op_bytes > 2)
1893 rsp_increment(ctxt, ctxt->op_bytes - 2);
1837 1894
1838 rc = load_segment_descriptor(ctxt, (u16)selector, seg); 1895 rc = load_segment_descriptor(ctxt, (u16)selector, seg);
1839 return rc; 1896 return rc;
@@ -2007,6 +2064,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
2007 2064
2008 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */ 2065 ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
2009 ctxt->eflags |= EFLG_RESERVED_ONE_MASK; 2066 ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
2067 ctxt->ops->set_nmi_mask(ctxt, false);
2010 2068
2011 return rc; 2069 return rc;
2012} 2070}
@@ -2041,7 +2099,8 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
2041 2099
2042 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); 2100 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
2043 2101
2044 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, 2102 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
2103 X86_TRANSFER_CALL_JMP,
2045 &new_desc); 2104 &new_desc);
2046 if (rc != X86EMUL_CONTINUE) 2105 if (rc != X86EMUL_CONTINUE)
2047 return rc; 2106 return rc;
@@ -2130,7 +2189,8 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
2130 /* Outer-privilege level return is not implemented */ 2189 /* Outer-privilege level return is not implemented */
2131 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) 2190 if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
2132 return X86EMUL_UNHANDLEABLE; 2191 return X86EMUL_UNHANDLEABLE;
2133 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false, 2192 rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl,
2193 X86_TRANSFER_RET,
2134 &new_desc); 2194 &new_desc);
2135 if (rc != X86EMUL_CONTINUE) 2195 if (rc != X86EMUL_CONTINUE)
2136 return rc; 2196 return rc;
@@ -2163,12 +2223,15 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
2163 fastop(ctxt, em_cmp); 2223 fastop(ctxt, em_cmp);
2164 2224
2165 if (ctxt->eflags & EFLG_ZF) { 2225 if (ctxt->eflags & EFLG_ZF) {
2166 /* Success: write back to memory. */ 2226 /* Success: write back to memory; no update of EAX */
2227 ctxt->src.type = OP_NONE;
2167 ctxt->dst.val = ctxt->src.orig_val; 2228 ctxt->dst.val = ctxt->src.orig_val;
2168 } else { 2229 } else {
2169 /* Failure: write the value we saw to EAX. */ 2230 /* Failure: write the value we saw to EAX. */
2170 ctxt->dst.type = OP_REG; 2231 ctxt->src.type = OP_REG;
2171 ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); 2232 ctxt->src.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
2233 ctxt->src.val = ctxt->dst.orig_val;
2234 /* Create write-cycle to dest by writing the same value */
2172 ctxt->dst.val = ctxt->dst.orig_val; 2235 ctxt->dst.val = ctxt->dst.orig_val;
2173 } 2236 }
2174 return X86EMUL_CONTINUE; 2237 return X86EMUL_CONTINUE;
@@ -2556,23 +2619,23 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2556 * it is handled in a context of new task 2619 * it is handled in a context of new task
2557 */ 2620 */
2558 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, 2621 ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
2559 true, NULL); 2622 X86_TRANSFER_TASK_SWITCH, NULL);
2560 if (ret != X86EMUL_CONTINUE) 2623 if (ret != X86EMUL_CONTINUE)
2561 return ret; 2624 return ret;
2562 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, 2625 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2563 true, NULL); 2626 X86_TRANSFER_TASK_SWITCH, NULL);
2564 if (ret != X86EMUL_CONTINUE) 2627 if (ret != X86EMUL_CONTINUE)
2565 return ret; 2628 return ret;
2566 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, 2629 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2567 true, NULL); 2630 X86_TRANSFER_TASK_SWITCH, NULL);
2568 if (ret != X86EMUL_CONTINUE) 2631 if (ret != X86EMUL_CONTINUE)
2569 return ret; 2632 return ret;
2570 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, 2633 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2571 true, NULL); 2634 X86_TRANSFER_TASK_SWITCH, NULL);
2572 if (ret != X86EMUL_CONTINUE) 2635 if (ret != X86EMUL_CONTINUE)
2573 return ret; 2636 return ret;
2574 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, 2637 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2575 true, NULL); 2638 X86_TRANSFER_TASK_SWITCH, NULL);
2576 if (ret != X86EMUL_CONTINUE) 2639 if (ret != X86EMUL_CONTINUE)
2577 return ret; 2640 return ret;
2578 2641
@@ -2694,31 +2757,31 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2694 * it is handled in a context of new task 2757 * it is handled in a context of new task
2695 */ 2758 */
2696 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, 2759 ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
2697 cpl, true, NULL); 2760 cpl, X86_TRANSFER_TASK_SWITCH, NULL);
2698 if (ret != X86EMUL_CONTINUE) 2761 if (ret != X86EMUL_CONTINUE)
2699 return ret; 2762 return ret;
2700 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, 2763 ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
2701 true, NULL); 2764 X86_TRANSFER_TASK_SWITCH, NULL);
2702 if (ret != X86EMUL_CONTINUE) 2765 if (ret != X86EMUL_CONTINUE)
2703 return ret; 2766 return ret;
2704 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, 2767 ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
2705 true, NULL); 2768 X86_TRANSFER_TASK_SWITCH, NULL);
2706 if (ret != X86EMUL_CONTINUE) 2769 if (ret != X86EMUL_CONTINUE)
2707 return ret; 2770 return ret;
2708 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, 2771 ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
2709 true, NULL); 2772 X86_TRANSFER_TASK_SWITCH, NULL);
2710 if (ret != X86EMUL_CONTINUE) 2773 if (ret != X86EMUL_CONTINUE)
2711 return ret; 2774 return ret;
2712 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, 2775 ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
2713 true, NULL); 2776 X86_TRANSFER_TASK_SWITCH, NULL);
2714 if (ret != X86EMUL_CONTINUE) 2777 if (ret != X86EMUL_CONTINUE)
2715 return ret; 2778 return ret;
2716 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, 2779 ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
2717 true, NULL); 2780 X86_TRANSFER_TASK_SWITCH, NULL);
2718 if (ret != X86EMUL_CONTINUE) 2781 if (ret != X86EMUL_CONTINUE)
2719 return ret; 2782 return ret;
2720 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, 2783 ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
2721 true, NULL); 2784 X86_TRANSFER_TASK_SWITCH, NULL);
2722 if (ret != X86EMUL_CONTINUE) 2785 if (ret != X86EMUL_CONTINUE)
2723 return ret; 2786 return ret;
2724 2787
@@ -2739,7 +2802,6 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2739 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, 2802 ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
2740 &ctxt->exception); 2803 &ctxt->exception);
2741 if (ret != X86EMUL_CONTINUE) 2804 if (ret != X86EMUL_CONTINUE)
2742 /* FIXME: need to provide precise fault address */
2743 return ret; 2805 return ret;
2744 2806
2745 save_state_to_tss32(ctxt, &tss_seg); 2807 save_state_to_tss32(ctxt, &tss_seg);
@@ -2748,13 +2810,11 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2748 ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip, 2810 ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
2749 ldt_sel_offset - eip_offset, &ctxt->exception); 2811 ldt_sel_offset - eip_offset, &ctxt->exception);
2750 if (ret != X86EMUL_CONTINUE) 2812 if (ret != X86EMUL_CONTINUE)
2751 /* FIXME: need to provide precise fault address */
2752 return ret; 2813 return ret;
2753 2814
2754 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, 2815 ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
2755 &ctxt->exception); 2816 &ctxt->exception);
2756 if (ret != X86EMUL_CONTINUE) 2817 if (ret != X86EMUL_CONTINUE)
2757 /* FIXME: need to provide precise fault address */
2758 return ret; 2818 return ret;
2759 2819
2760 if (old_tss_sel != 0xffff) { 2820 if (old_tss_sel != 0xffff) {
@@ -2765,7 +2825,6 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2765 sizeof tss_seg.prev_task_link, 2825 sizeof tss_seg.prev_task_link,
2766 &ctxt->exception); 2826 &ctxt->exception);
2767 if (ret != X86EMUL_CONTINUE) 2827 if (ret != X86EMUL_CONTINUE)
2768 /* FIXME: need to provide precise fault address */
2769 return ret; 2828 return ret;
2770 } 2829 }
2771 2830
@@ -2999,15 +3058,16 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
2999 struct desc_struct old_desc, new_desc; 3058 struct desc_struct old_desc, new_desc;
3000 const struct x86_emulate_ops *ops = ctxt->ops; 3059 const struct x86_emulate_ops *ops = ctxt->ops;
3001 int cpl = ctxt->ops->cpl(ctxt); 3060 int cpl = ctxt->ops->cpl(ctxt);
3061 enum x86emul_mode prev_mode = ctxt->mode;
3002 3062
3003 old_eip = ctxt->_eip; 3063 old_eip = ctxt->_eip;
3004 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS); 3064 ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
3005 3065
3006 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); 3066 memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
3007 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, 3067 rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
3008 &new_desc); 3068 X86_TRANSFER_CALL_JMP, &new_desc);
3009 if (rc != X86EMUL_CONTINUE) 3069 if (rc != X86EMUL_CONTINUE)
3010 return X86EMUL_CONTINUE; 3070 return rc;
3011 3071
3012 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); 3072 rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3013 if (rc != X86EMUL_CONTINUE) 3073 if (rc != X86EMUL_CONTINUE)
@@ -3022,11 +3082,14 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
3022 rc = em_push(ctxt); 3082 rc = em_push(ctxt);
3023 /* If we failed, we tainted the memory, but the very least we should 3083 /* If we failed, we tainted the memory, but the very least we should
3024 restore cs */ 3084 restore cs */
3025 if (rc != X86EMUL_CONTINUE) 3085 if (rc != X86EMUL_CONTINUE) {
3086 pr_warn_once("faulting far call emulation tainted memory\n");
3026 goto fail; 3087 goto fail;
3088 }
3027 return rc; 3089 return rc;
3028fail: 3090fail:
3029 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); 3091 ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
3092 ctxt->mode = prev_mode;
3030 return rc; 3093 return rc;
3031 3094
3032} 3095}
@@ -3477,6 +3540,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt)
3477 return X86EMUL_CONTINUE; 3540 return X86EMUL_CONTINUE;
3478} 3541}
3479 3542
3543static int em_movsxd(struct x86_emulate_ctxt *ctxt)
3544{
3545 ctxt->dst.val = (s32) ctxt->src.val;
3546 return X86EMUL_CONTINUE;
3547}
3548
3480static bool valid_cr(int nr) 3549static bool valid_cr(int nr)
3481{ 3550{
3482 switch (nr) { 3551 switch (nr) {
@@ -3676,6 +3745,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3676#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } 3745#define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
3677#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } 3746#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3678#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) } 3747#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
3748#define MD(_f, _m) { .flags = ((_f) | ModeDual), .u.mdual = (_m) }
3679#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } 3749#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
3680#define I(_f, _e) { .flags = (_f), .u.execute = (_e) } 3750#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3681#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } 3751#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
@@ -3738,7 +3808,7 @@ static const struct opcode group1[] = {
3738}; 3808};
3739 3809
3740static const struct opcode group1A[] = { 3810static const struct opcode group1A[] = {
3741 I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, 3811 I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N,
3742}; 3812};
3743 3813
3744static const struct opcode group2[] = { 3814static const struct opcode group2[] = {
@@ -3854,7 +3924,7 @@ static const struct gprefix pfx_0f_e7 = {
3854}; 3924};
3855 3925
3856static const struct escape escape_d9 = { { 3926static const struct escape escape_d9 = { {
3857 N, N, N, N, N, N, N, I(DstMem, em_fnstcw), 3927 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstcw),
3858}, { 3928}, {
3859 /* 0xC0 - 0xC7 */ 3929 /* 0xC0 - 0xC7 */
3860 N, N, N, N, N, N, N, N, 3930 N, N, N, N, N, N, N, N,
@@ -3896,7 +3966,7 @@ static const struct escape escape_db = { {
3896} }; 3966} };
3897 3967
3898static const struct escape escape_dd = { { 3968static const struct escape escape_dd = { {
3899 N, N, N, N, N, N, N, I(DstMem, em_fnstsw), 3969 N, N, N, N, N, N, N, I(DstMem16 | Mov, em_fnstsw),
3900}, { 3970}, {
3901 /* 0xC0 - 0xC7 */ 3971 /* 0xC0 - 0xC7 */
3902 N, N, N, N, N, N, N, N, 3972 N, N, N, N, N, N, N, N,
@@ -3920,6 +3990,10 @@ static const struct instr_dual instr_dual_0f_c3 = {
3920 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N 3990 I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
3921}; 3991};
3922 3992
3993static const struct mode_dual mode_dual_63 = {
3994 N, I(DstReg | SrcMem32 | ModRM | Mov, em_movsxd)
3995};
3996
3923static const struct opcode opcode_table[256] = { 3997static const struct opcode opcode_table[256] = {
3924 /* 0x00 - 0x07 */ 3998 /* 0x00 - 0x07 */
3925 F6ALU(Lock, em_add), 3999 F6ALU(Lock, em_add),
@@ -3954,7 +4028,7 @@ static const struct opcode opcode_table[256] = {
3954 /* 0x60 - 0x67 */ 4028 /* 0x60 - 0x67 */
3955 I(ImplicitOps | Stack | No64, em_pusha), 4029 I(ImplicitOps | Stack | No64, em_pusha),
3956 I(ImplicitOps | Stack | No64, em_popa), 4030 I(ImplicitOps | Stack | No64, em_popa),
3957 N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ , 4031 N, MD(ModRM, &mode_dual_63),
3958 N, N, N, N, 4032 N, N, N, N,
3959 /* 0x68 - 0x6F */ 4033 /* 0x68 - 0x6F */
3960 I(SrcImm | Mov | Stack, em_push), 4034 I(SrcImm | Mov | Stack, em_push),
@@ -4010,8 +4084,8 @@ static const struct opcode opcode_table[256] = {
4010 G(ByteOp, group11), G(0, group11), 4084 G(ByteOp, group11), G(0, group11),
4011 /* 0xC8 - 0xCF */ 4085 /* 0xC8 - 0xCF */
4012 I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave), 4086 I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
4013 I(ImplicitOps | Stack | SrcImmU16, em_ret_far_imm), 4087 I(ImplicitOps | SrcImmU16, em_ret_far_imm),
4014 I(ImplicitOps | Stack, em_ret_far), 4088 I(ImplicitOps, em_ret_far),
4015 D(ImplicitOps), DI(SrcImmByte, intn), 4089 D(ImplicitOps), DI(SrcImmByte, intn),
4016 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), 4090 D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
4017 /* 0xD0 - 0xD7 */ 4091 /* 0xD0 - 0xD7 */
@@ -4108,7 +4182,7 @@ static const struct opcode twobyte_table[256] = {
4108 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), 4182 F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),
4109 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul), 4183 GD(0, &group15), F(DstReg | SrcMem | ModRM, em_imul),
4110 /* 0xB0 - 0xB7 */ 4184 /* 0xB0 - 0xB7 */
4111 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), 4185 I2bv(DstMem | SrcReg | ModRM | Lock | PageTable | SrcWrite, em_cmpxchg),
4112 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), 4186 I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg),
4113 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), 4187 F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
4114 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), 4188 I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
@@ -4174,6 +4248,8 @@ static const struct opcode opcode_map_0f_38[256] = {
4174#undef I 4248#undef I
4175#undef GP 4249#undef GP
4176#undef EXT 4250#undef EXT
4251#undef MD
4252#undef ID
4177 4253
4178#undef D2bv 4254#undef D2bv
4179#undef D2bvIP 4255#undef D2bvIP
@@ -4563,6 +4639,12 @@ done_prefixes:
4563 else 4639 else
4564 opcode = opcode.u.idual->mod012; 4640 opcode = opcode.u.idual->mod012;
4565 break; 4641 break;
4642 case ModeDual:
4643 if (ctxt->mode == X86EMUL_MODE_PROT64)
4644 opcode = opcode.u.mdual->mode64;
4645 else
4646 opcode = opcode.u.mdual->mode32;
4647 break;
4566 default: 4648 default:
4567 return EMULATION_FAILED; 4649 return EMULATION_FAILED;
4568 } 4650 }
@@ -4860,8 +4942,13 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4860 /* optimisation - avoid slow emulated read if Mov */ 4942 /* optimisation - avoid slow emulated read if Mov */
4861 rc = segmented_read(ctxt, ctxt->dst.addr.mem, 4943 rc = segmented_read(ctxt, ctxt->dst.addr.mem,
4862 &ctxt->dst.val, ctxt->dst.bytes); 4944 &ctxt->dst.val, ctxt->dst.bytes);
4863 if (rc != X86EMUL_CONTINUE) 4945 if (rc != X86EMUL_CONTINUE) {
4946 if (!(ctxt->d & NoWrite) &&
4947 rc == X86EMUL_PROPAGATE_FAULT &&
4948 ctxt->exception.vector == PF_VECTOR)
4949 ctxt->exception.error_code |= PFERR_WRITE_MASK;
4864 goto done; 4950 goto done;
4951 }
4865 } 4952 }
4866 ctxt->dst.orig_val = ctxt->dst.val; 4953 ctxt->dst.orig_val = ctxt->dst.val;
4867 4954
@@ -4899,11 +4986,6 @@ special_insn:
4899 goto threebyte_insn; 4986 goto threebyte_insn;
4900 4987
4901 switch (ctxt->b) { 4988 switch (ctxt->b) {
4902 case 0x63: /* movsxd */
4903 if (ctxt->mode != X86EMUL_MODE_PROT64)
4904 goto cannot_emulate;
4905 ctxt->dst.val = (s32) ctxt->src.val;
4906 break;
4907 case 0x70 ... 0x7f: /* jcc (short) */ 4989 case 0x70 ... 0x7f: /* jcc (short) */
4908 if (test_cc(ctxt->b, ctxt->eflags)) 4990 if (test_cc(ctxt->b, ctxt->eflags))
4909 rc = jmp_rel(ctxt, ctxt->src.val); 4991 rc = jmp_rel(ctxt, ctxt->src.val);