aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAvi Kivity <avi.kivity@gmail.com>2013-01-04 09:18:48 -0500
committerMarcelo Tosatti <mtosatti@redhat.com>2013-01-09 14:39:17 -0500
commite28bbd44dad134046ef9463cbb8c1cf81f53de5e (patch)
treeba58057fd3ad51430fffe57e41d7bc3be62b91db
parentb09408d00fd82be80289a329dd94d1a0d6b77dc2 (diff)
KVM: x86 emulator: framework for streamlining arithmetic opcodes
We emulate arithmetic opcodes by executing a "similar" (same operation, different operands) on the cpu. This ensures accurate emulation, esp. wrt. eflags. However, the prologue and epilogue around the opcode is fairly long, consisting of a switch (for the operand size) and code to load and save the operands. This is repeated for every opcode. This patch introduces an alternative way to emulate arithmetic opcodes. Instead of the above, we have four (three on i386) functions consisting of just the opcode and a ret; one for each operand size. For example: .align 8 em_notb: not %al ret .align 8 em_notw: not %ax ret .align 8 em_notl: not %eax ret .align 8 em_notq: not %rax ret The prologue and epilogue are shared across all opcodes. Note the functions use a special calling convention; notably eflags is an input/output parameter and is not clobbered. Rather than dispatching the four functions through a jump table, the functions are declared as a constant size (8) so their address can be calculated. Acked-by: Gleb Natapov <gleb@redhat.com> Signed-off-by: Avi Kivity <avi.kivity@gmail.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
-rw-r--r--arch/x86/kvm/emulate.c41
1 files changed, 41 insertions, 0 deletions
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 53c5ad6851d1..dd71567d7c71 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -149,6 +149,7 @@
149#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ 149#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
150#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ 150#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
151#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ 151#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
152#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
152 153
153#define X2(x...) x, x 154#define X2(x...) x, x
154#define X3(x...) X2(x), x 155#define X3(x...) X2(x), x
@@ -159,6 +160,27 @@
159#define X8(x...) X4(x), X4(x) 160#define X8(x...) X4(x), X4(x)
160#define X16(x...) X8(x), X8(x) 161#define X16(x...) X8(x), X8(x)
161 162
163#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
164#define FASTOP_SIZE 8
165
166/*
167 * fastop functions have a special calling convention:
168 *
169 * dst: [rdx]:rax (in/out)
170 * src: rbx (in/out)
171 * src2: rcx (in)
172 * flags: rflags (in/out)
173 *
174 * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
175 * different operand sizes can be reached by calculation, rather than a jump
176 * table (which would be bigger than the code).
177 *
178 * fastop functions are declared as taking a never-defined fastop parameter,
179 * so they can't be called from C directly.
180 */
181
182struct fastop;
183
162struct opcode { 184struct opcode {
163 u64 flags : 56; 185 u64 flags : 56;
164 u64 intercept : 8; 186 u64 intercept : 8;
@@ -168,6 +190,7 @@ struct opcode {
168 const struct group_dual *gdual; 190 const struct group_dual *gdual;
169 const struct gprefix *gprefix; 191 const struct gprefix *gprefix;
170 const struct escape *esc; 192 const struct escape *esc;
193 void (*fastop)(struct fastop *fake);
171 } u; 194 } u;
172 int (*check_perm)(struct x86_emulate_ctxt *ctxt); 195 int (*check_perm)(struct x86_emulate_ctxt *ctxt);
173}; 196};
@@ -3646,6 +3669,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
3646#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } 3669#define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
3647#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } 3670#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
3648#define I(_f, _e) { .flags = (_f), .u.execute = (_e) } 3671#define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
3672#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
3649#define II(_f, _e, _i) \ 3673#define II(_f, _e, _i) \
3650 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } 3674 { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i }
3651#define IIP(_f, _e, _i, _p) \ 3675#define IIP(_f, _e, _i, _p) \
@@ -4502,6 +4526,16 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
4502 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); 4526 read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
4503} 4527}
4504 4528
4529static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
4530{
4531 ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
4532 fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE;
4533 asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
4534 : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags)
4535 : "c"(ctxt->src2.val), [fastop]"S"(fop));
4536 ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
4537 return X86EMUL_CONTINUE;
4538}
4505 4539
4506int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) 4540int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
4507{ 4541{
@@ -4631,6 +4665,13 @@ special_insn:
4631 } 4665 }
4632 4666
4633 if (ctxt->execute) { 4667 if (ctxt->execute) {
4668 if (ctxt->d & Fastop) {
4669 void (*fop)(struct fastop *) = (void *)ctxt->execute;
4670 rc = fastop(ctxt, fop);
4671 if (rc != X86EMUL_CONTINUE)
4672 goto done;
4673 goto writeback;
4674 }
4634 rc = ctxt->execute(ctxt); 4675 rc = ctxt->execute(ctxt);
4635 if (rc != X86EMUL_CONTINUE) 4676 if (rc != X86EMUL_CONTINUE)
4636 goto done; 4677 goto done;