diff options
author | Jeremy Fitzhardinge <jeremy@goop.org> | 2009-01-28 17:35:04 -0500 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2009-01-30 17:51:44 -0500 |
commit | 9104a18dcdd8dfefdddca8ce44988563f13ed3c4 (patch) | |
tree | 9e0b32d6340eb8335ca3ee1fb0f13c203b47c734 | |
parent | b8aa287f77be943e37a84fa4657e27df95269bfb (diff) |
x86/paravirt: selectively save/restore regs around pvops calls
Impact: Optimization
Each asm paravirt-ops call says what registers are available for
clobbering. This patch makes use of this to selectively save/restore
registers around each pvops call. In many cases this significantly
shrinks code size.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r-- | arch/x86/include/asm/paravirt.h | 100 |
1 files changed, 65 insertions, 35 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 961d10c12f16..dcce961262bf 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -12,19 +12,29 @@ | |||
12 | #define CLBR_EAX (1 << 0) | 12 | #define CLBR_EAX (1 << 0) |
13 | #define CLBR_ECX (1 << 1) | 13 | #define CLBR_ECX (1 << 1) |
14 | #define CLBR_EDX (1 << 2) | 14 | #define CLBR_EDX (1 << 2) |
15 | #define CLBR_EDI (1 << 3) | ||
15 | 16 | ||
16 | #ifdef CONFIG_X86_64 | 17 | #ifdef CONFIG_X86_32 |
17 | #define CLBR_RSI (1 << 3) | 18 | /* CLBR_ANY should match all regs platform has. For i386, that's just it */ |
18 | #define CLBR_RDI (1 << 4) | 19 | #define CLBR_ANY ((1 << 4) - 1) |
20 | #else | ||
21 | #define CLBR_RAX CLBR_EAX | ||
22 | #define CLBR_RCX CLBR_ECX | ||
23 | #define CLBR_RDX CLBR_EDX | ||
24 | #define CLBR_RDI CLBR_EDI | ||
25 | #define CLBR_RSI (1 << 4) | ||
19 | #define CLBR_R8 (1 << 5) | 26 | #define CLBR_R8 (1 << 5) |
20 | #define CLBR_R9 (1 << 6) | 27 | #define CLBR_R9 (1 << 6) |
21 | #define CLBR_R10 (1 << 7) | 28 | #define CLBR_R10 (1 << 7) |
22 | #define CLBR_R11 (1 << 8) | 29 | #define CLBR_R11 (1 << 8) |
23 | #define CLBR_ANY ((1 << 9) - 1) | 30 | #define CLBR_ANY ((1 << 9) - 1) |
31 | |||
32 | #define CLBR_ARG_REGS (CLBR_RDI | CLBR_RSI | CLBR_RDX | \ | ||
33 | CLBR_RCX | CLBR_R8 | CLBR_R9) | ||
34 | #define CLBR_RET_REG (CLBR_RAX | CLBR_RDX) | ||
35 | #define CLBR_SCRATCH (CLBR_R10 | CLBR_R11) | ||
36 | |||
24 | #include <asm/desc_defs.h> | 37 | #include <asm/desc_defs.h> |
25 | #else | ||
26 | /* CLBR_ANY should match all regs platform has. For i386, that's just it */ | ||
27 | #define CLBR_ANY ((1 << 3) - 1) | ||
28 | #endif /* X86_64 */ | 38 | #endif /* X86_64 */ |
29 | 39 | ||
30 | #ifndef __ASSEMBLY__ | 40 | #ifndef __ASSEMBLY__ |
@@ -1530,33 +1540,49 @@ static inline unsigned long __raw_local_irq_save(void) | |||
1530 | .popsection | 1540 | .popsection |
1531 | 1541 | ||
1532 | 1542 | ||
1543 | #define COND_PUSH(set, mask, reg) \ | ||
1544 | .if ((~set) & mask); push %reg; .endif | ||
1545 | #define COND_POP(set, mask, reg) \ | ||
1546 | .if ((~set) & mask); pop %reg; .endif | ||
1547 | |||
1533 | #ifdef CONFIG_X86_64 | 1548 | #ifdef CONFIG_X86_64 |
1534 | #define PV_SAVE_REGS \ | 1549 | |
1535 | push %rax; \ | 1550 | #define PV_SAVE_REGS(set) \ |
1536 | push %rcx; \ | 1551 | COND_PUSH(set, CLBR_RAX, rax); \ |
1537 | push %rdx; \ | 1552 | COND_PUSH(set, CLBR_RCX, rcx); \ |
1538 | push %rsi; \ | 1553 | COND_PUSH(set, CLBR_RDX, rdx); \ |
1539 | push %rdi; \ | 1554 | COND_PUSH(set, CLBR_RSI, rsi); \ |
1540 | push %r8; \ | 1555 | COND_PUSH(set, CLBR_RDI, rdi); \ |
1541 | push %r9; \ | 1556 | COND_PUSH(set, CLBR_R8, r8); \ |
1542 | push %r10; \ | 1557 | COND_PUSH(set, CLBR_R9, r9); \ |
1543 | push %r11 | 1558 | COND_PUSH(set, CLBR_R10, r10); \ |
1544 | #define PV_RESTORE_REGS \ | 1559 | COND_PUSH(set, CLBR_R11, r11) |
1545 | pop %r11; \ | 1560 | #define PV_RESTORE_REGS(set) \ |
1546 | pop %r10; \ | 1561 | COND_POP(set, CLBR_R11, r11); \ |
1547 | pop %r9; \ | 1562 | COND_POP(set, CLBR_R10, r10); \ |
1548 | pop %r8; \ | 1563 | COND_POP(set, CLBR_R9, r9); \ |
1549 | pop %rdi; \ | 1564 | COND_POP(set, CLBR_R8, r8); \ |
1550 | pop %rsi; \ | 1565 | COND_POP(set, CLBR_RDI, rdi); \ |
1551 | pop %rdx; \ | 1566 | COND_POP(set, CLBR_RSI, rsi); \ |
1552 | pop %rcx; \ | 1567 | COND_POP(set, CLBR_RDX, rdx); \ |
1553 | pop %rax | 1568 | COND_POP(set, CLBR_RCX, rcx); \ |
1569 | COND_POP(set, CLBR_RAX, rax) | ||
1570 | |||
1554 | #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) | 1571 | #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) |
1555 | #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) | 1572 | #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) |
1556 | #define PARA_INDIRECT(addr) *addr(%rip) | 1573 | #define PARA_INDIRECT(addr) *addr(%rip) |
1557 | #else | 1574 | #else |
1558 | #define PV_SAVE_REGS pushl %eax; pushl %edi; pushl %ecx; pushl %edx | 1575 | #define PV_SAVE_REGS(set) \ |
1559 | #define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax | 1576 | COND_PUSH(set, CLBR_EAX, eax); \ |
1577 | COND_PUSH(set, CLBR_EDI, edi); \ | ||
1578 | COND_PUSH(set, CLBR_ECX, ecx); \ | ||
1579 | COND_PUSH(set, CLBR_EDX, edx) | ||
1580 | #define PV_RESTORE_REGS(set) \ | ||
1581 | COND_POP(set, CLBR_EDX, edx); \ | ||
1582 | COND_POP(set, CLBR_ECX, ecx); \ | ||
1583 | COND_POP(set, CLBR_EDI, edi); \ | ||
1584 | COND_POP(set, CLBR_EAX, eax) | ||
1585 | |||
1560 | #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) | 1586 | #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) |
1561 | #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) | 1587 | #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4) |
1562 | #define PARA_INDIRECT(addr) *%cs:addr | 1588 | #define PARA_INDIRECT(addr) *%cs:addr |
@@ -1568,15 +1594,15 @@ static inline unsigned long __raw_local_irq_save(void) | |||
1568 | 1594 | ||
1569 | #define DISABLE_INTERRUPTS(clobbers) \ | 1595 | #define DISABLE_INTERRUPTS(clobbers) \ |
1570 | PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ | 1596 | PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ |
1571 | PV_SAVE_REGS; \ | 1597 | PV_SAVE_REGS(clobbers); \ |
1572 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ | 1598 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ |
1573 | PV_RESTORE_REGS;) \ | 1599 | PV_RESTORE_REGS(clobbers);) |
1574 | 1600 | ||
1575 | #define ENABLE_INTERRUPTS(clobbers) \ | 1601 | #define ENABLE_INTERRUPTS(clobbers) \ |
1576 | PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ | 1602 | PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ |
1577 | PV_SAVE_REGS; \ | 1603 | PV_SAVE_REGS(clobbers); \ |
1578 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ | 1604 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ |
1579 | PV_RESTORE_REGS;) | 1605 | PV_RESTORE_REGS(clobbers);) |
1580 | 1606 | ||
1581 | #define USERGS_SYSRET32 \ | 1607 | #define USERGS_SYSRET32 \ |
1582 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ | 1608 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ |
@@ -1606,11 +1632,15 @@ static inline unsigned long __raw_local_irq_save(void) | |||
1606 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ | 1632 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ |
1607 | swapgs) | 1633 | swapgs) |
1608 | 1634 | ||
1635 | /* | ||
1636 | * Note: swapgs is very special, and in practise is either going to be | ||
1637 | * implemented with a single "swapgs" instruction or something very | ||
1638 | * special. Either way, we don't need to save any registers for | ||
1639 | * it. | ||
1640 | */ | ||
1609 | #define SWAPGS \ | 1641 | #define SWAPGS \ |
1610 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ | 1642 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ |
1611 | PV_SAVE_REGS; \ | 1643 | call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ |
1612 | call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ | ||
1613 | PV_RESTORE_REGS \ | ||
1614 | ) | 1644 | ) |
1615 | 1645 | ||
1616 | #define GET_CR2_INTO_RCX \ | 1646 | #define GET_CR2_INTO_RCX \ |