aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lguest/i386_head.S
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2009-06-13 00:27:03 -0400
committerRusty Russell <rusty@rustcorp.com.au>2009-06-12 08:57:03 -0400
commit61f4bc83fea248a3092beb7ba43daa5629615513 (patch)
tree5ce12fc0676f93a49f743dab1c60f8e1ca991ec3 /arch/x86/lguest/i386_head.S
parenta32a8813d0173163ba44d8f9556e0d89fdc4fb46 (diff)
lguest: optimize by coding restore_flags and irq_enable in assembler.
The downside of the last patch which made restore_flags and irq_enable check interrupts is that they are now too big to be patched directly into the callsites, so the C versions are always used. But the C versions go via PV_CALLEE_SAVE_REGS_THUNK which saves all the registers. In fact, we don't need any registers in the fast path, so we can do better than this if we actually code them in assembler. The results are in the noise, but since it's about the same amount of code, it's worth applying. 1GB Guest->Host: input(suppressed),output(suppressed) Before: Seconds: 0:16.53 Packets: 377268,753673 Interrupts: 22461,24297 Notifications: 1(5245),21303(732370) Net IRQs triggered: 377023(245),42578(711095) After: Seconds: 0:16.48 Packets: 377289,753673 Interrupts: 22281,24465 Notifications: 1(5245),21296(732377) Net IRQs triggered: 377060(229),42564(711109) Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Diffstat (limited to 'arch/x86/lguest/i386_head.S')
-rw-r--r--arch/x86/lguest/i386_head.S58
1 files changed, 57 insertions, 1 deletions
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index 3e0c5545d59c..a9c8cfe61cd4 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -47,7 +47,63 @@ ENTRY(lguest_entry)
47 47
48LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) 48LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
49LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) 49LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
50/*:*/ 50
51/*G:033 But using those wrappers is inefficient (we'll see why that doesn't
52 * matter for save_fl and irq_disable later). If we write our routines
53 * carefully in assembler, we can avoid clobbering any registers and avoid
54 * jumping through the wrapper functions.
55 *
56 * I skipped over our first piece of assembler, but this one is worth studying
57 * in a bit more detail so I'll describe in easy stages. First, the routine
58 * to enable interrupts: */
59ENTRY(lg_irq_enable)
60 /* The reverse of irq_disable, this sets lguest_data.irq_enabled to
61 * X86_EFLAGS_IF (ie. "Interrupts enabled"). */
62 movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled
63 /* But now we need to check if the Host wants to know: there might have
64 * been interrupts waiting to be delivered, in which case it will have
65 * set lguest_data.irq_pending to X86_EFLAGS_IF. If it's not zero, we
66 * jump to send_interrupts, otherwise we're done. */
67 testl $0, lguest_data+LGUEST_DATA_irq_pending
68 jnz send_interrupts
69 /* One cool thing about x86 is that you can do many things without using
70 * a register. In this case, the normal path hasn't needed to save or
71 * restore any registers at all! */
72 ret
73send_interrupts:
74 /* OK, now we need a register: eax is used for the hypercall number,
75 * which is LHCALL_SEND_INTERRUPTS.
76 *
77 * We used not to bother with this pending detection at all, which was
78 * much simpler. Sooner or later the Host would realize it had to
79 * send us an interrupt. But that turns out to make performance 7
80 * times worse on a simple tcp benchmark. So now we do this the hard
81 * way. */
82 pushl %eax
83 movl $LHCALL_SEND_INTERRUPTS, %eax
84 /* This is a vmcall instruction (same thing that KVM uses). Older
85 * assembler versions might not know the "vmcall" instruction, so we
86 * create one manually here. */
87 .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
88 popl %eax
89 ret
90
91/* Finally, the "popf" or "restore flags" routine. The %eax register holds the
92 * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're
93 * enabling interrupts again, if it's 0 we're leaving them off. */
94ENTRY(lg_restore_fl)
95 /* This is just "lguest_data.irq_enabled = flags;" */
96 movl %eax, lguest_data+LGUEST_DATA_irq_enabled
97 /* Now, if the %eax value has enabled interrupts and
98 * lguest_data.irq_pending is set, we want to tell the Host so it can
99 * deliver any outstanding interrupts. Fortunately, both values will
100 * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl"
101 * instruction will AND them together for us. If both are set, we
102 * jump to send_interrupts. */
103 testl lguest_data+LGUEST_DATA_irq_pending, %eax
104 jnz send_interrupts
105 /* Again, the normal path has used no extra registers. Clever, huh? */
106 ret
51 107
52/* These demark the EIP range where host should never deliver interrupts. */ 108/* These demark the EIP range where host should never deliver interrupts. */
53.global lguest_noirq_start 109.global lguest_noirq_start