diff options
author | Jeremy Fitzhardinge <jeremy@goop.org> | 2008-06-25 00:19:28 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-08 07:15:52 -0400 |
commit | 2be29982a08009c731307f4a39053b70ac4700da (patch) | |
tree | f01fcf847f36b9da5cd87cefa7ee6c13d193077d | |
parent | c7245da6ae7e5208504ff027c4e0eec69b788651 (diff) |
x86/paravirt: add sysret/sysexit pvops for returning to 32-bit compatibility userspace
In a 64-bit system, we need separate sysret/sysexit operations to
return to a 32-bit userspace.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citirx.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/ia32/ia32entry.S | 21 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 4 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt_patch_64.c | 9 | ||||
-rw-r--r-- | include/asm-x86/irqflags.h | 14 | ||||
-rw-r--r-- | include/asm-x86/paravirt.h | 56 |
7 files changed, 89 insertions, 31 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 3aefbce2de48..2a4c42427d9a 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -61,6 +61,19 @@ | |||
61 | CFI_UNDEFINED r15 | 61 | CFI_UNDEFINED r15 |
62 | .endm | 62 | .endm |
63 | 63 | ||
64 | #ifdef CONFIG_PARAVIRT | ||
65 | ENTRY(native_usergs_sysret32) | ||
66 | swapgs | ||
67 | sysretl | ||
68 | ENDPROC(native_usergs_sysret32) | ||
69 | |||
70 | ENTRY(native_irq_enable_sysexit) | ||
71 | swapgs | ||
72 | sti | ||
73 | sysexit | ||
74 | ENDPROC(native_irq_enable_sysexit) | ||
75 | #endif | ||
76 | |||
64 | /* | 77 | /* |
65 | * 32bit SYSENTER instruction entry. | 78 | * 32bit SYSENTER instruction entry. |
66 | * | 79 | * |
@@ -151,10 +164,7 @@ sysenter_do_call: | |||
151 | CFI_ADJUST_CFA_OFFSET -8 | 164 | CFI_ADJUST_CFA_OFFSET -8 |
152 | CFI_REGISTER rsp,rcx | 165 | CFI_REGISTER rsp,rcx |
153 | TRACE_IRQS_ON | 166 | TRACE_IRQS_ON |
154 | swapgs | 167 | ENABLE_INTERRUPTS_SYSEXIT32 |
155 | sti /* sti only takes effect after the next instruction */ | ||
156 | /* sysexit */ | ||
157 | .byte 0xf, 0x35 | ||
158 | 168 | ||
159 | sysenter_tracesys: | 169 | sysenter_tracesys: |
160 | CFI_RESTORE_STATE | 170 | CFI_RESTORE_STATE |
@@ -254,8 +264,7 @@ cstar_do_call: | |||
254 | TRACE_IRQS_ON | 264 | TRACE_IRQS_ON |
255 | movl RSP-ARGOFFSET(%rsp),%esp | 265 | movl RSP-ARGOFFSET(%rsp),%esp |
256 | CFI_RESTORE rsp | 266 | CFI_RESTORE rsp |
257 | swapgs | 267 | USERGS_SYSRET32 |
258 | sysretl | ||
259 | 268 | ||
260 | cstar_tracesys: | 269 | cstar_tracesys: |
261 | CFI_RESTORE_STATE | 270 | CFI_RESTORE_STATE |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index a19aba8c5bb3..06c451af979a 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -62,7 +62,9 @@ int main(void) | |||
62 | OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); | 62 | OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); |
63 | OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); | 63 | OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); |
64 | OFFSET(PV_CPU_iret, pv_cpu_ops, iret); | 64 | OFFSET(PV_CPU_iret, pv_cpu_ops, iret); |
65 | OFFSET(PV_CPU_usergs_sysret, pv_cpu_ops, usergs_sysret); | 65 | OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32); |
66 | OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); | ||
67 | OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); | ||
66 | OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); | 68 | OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); |
67 | OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); | 69 | OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); |
68 | #endif | 70 | #endif |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 18447a373fbd..880ffe510a11 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -59,7 +59,7 @@ | |||
59 | #endif | 59 | #endif |
60 | 60 | ||
61 | #ifdef CONFIG_PARAVIRT | 61 | #ifdef CONFIG_PARAVIRT |
62 | ENTRY(native_usergs_sysret) | 62 | ENTRY(native_usergs_sysret64) |
63 | swapgs | 63 | swapgs |
64 | sysretq | 64 | sysretq |
65 | #endif /* CONFIG_PARAVIRT */ | 65 | #endif /* CONFIG_PARAVIRT */ |
@@ -275,7 +275,7 @@ sysret_check: | |||
275 | RESTORE_ARGS 0,-ARG_SKIP,1 | 275 | RESTORE_ARGS 0,-ARG_SKIP,1 |
276 | /*CFI_REGISTER rflags,r11*/ | 276 | /*CFI_REGISTER rflags,r11*/ |
277 | movq %gs:pda_oldrsp, %rsp | 277 | movq %gs:pda_oldrsp, %rsp |
278 | USERGS_SYSRET | 278 | USERGS_SYSRET64 |
279 | 279 | ||
280 | CFI_RESTORE_STATE | 280 | CFI_RESTORE_STATE |
281 | /* Handle reschedules */ | 281 | /* Handle reschedules */ |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index b0b17f0bc7e9..bf1067e89cad 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -141,7 +141,8 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | |||
141 | ret = paravirt_patch_nop(); | 141 | ret = paravirt_patch_nop(); |
142 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || | 142 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || |
143 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || | 143 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || |
144 | type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret)) | 144 | type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || |
145 | type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64)) | ||
145 | /* If operation requires a jmp, then jmp */ | 146 | /* If operation requires a jmp, then jmp */ |
146 | ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); | 147 | ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); |
147 | else | 148 | else |
@@ -193,7 +194,8 @@ static void native_flush_tlb_single(unsigned long addr) | |||
193 | /* These are in entry.S */ | 194 | /* These are in entry.S */ |
194 | extern void native_iret(void); | 195 | extern void native_iret(void); |
195 | extern void native_irq_enable_sysexit(void); | 196 | extern void native_irq_enable_sysexit(void); |
196 | extern void native_usergs_sysret(void); | 197 | extern void native_usergs_sysret32(void); |
198 | extern void native_usergs_sysret64(void); | ||
197 | 199 | ||
198 | static int __init print_banner(void) | 200 | static int __init print_banner(void) |
199 | { | 201 | { |
@@ -329,10 +331,10 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
329 | .write_idt_entry = native_write_idt_entry, | 331 | .write_idt_entry = native_write_idt_entry, |
330 | .load_sp0 = native_load_sp0, | 332 | .load_sp0 = native_load_sp0, |
331 | 333 | ||
332 | #ifdef CONFIG_X86_32 | ||
333 | .irq_enable_sysexit = native_irq_enable_sysexit, | 334 | .irq_enable_sysexit = native_irq_enable_sysexit, |
334 | #else | 335 | #ifdef CONFIG_X86_64 |
335 | .usergs_sysret = native_usergs_sysret, | 336 | .usergs_sysret32 = native_usergs_sysret32, |
337 | .usergs_sysret64 = native_usergs_sysret64, | ||
336 | #endif | 338 | #endif |
337 | .iret = native_iret, | 339 | .iret = native_iret, |
338 | .swapgs = native_swapgs, | 340 | .swapgs = native_swapgs, |
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index d4c0712a3e64..061d01df9ae6 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c | |||
@@ -14,8 +14,9 @@ DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)"); | |||
14 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); | 14 | DEF_NATIVE(pv_cpu_ops, clts, "clts"); |
15 | DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); | 15 | DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); |
16 | 16 | ||
17 | /* the three commands give us more control to how to return from a syscall */ | 17 | DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit"); |
18 | DEF_NATIVE(pv_cpu_ops, usergs_sysret, "swapgs; sysretq;"); | 18 | DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq"); |
19 | DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); | ||
19 | DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); | 20 | DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); |
20 | 21 | ||
21 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | 22 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, |
@@ -35,7 +36,9 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | |||
35 | PATCH_SITE(pv_irq_ops, irq_enable); | 36 | PATCH_SITE(pv_irq_ops, irq_enable); |
36 | PATCH_SITE(pv_irq_ops, irq_disable); | 37 | PATCH_SITE(pv_irq_ops, irq_disable); |
37 | PATCH_SITE(pv_cpu_ops, iret); | 38 | PATCH_SITE(pv_cpu_ops, iret); |
38 | PATCH_SITE(pv_cpu_ops, usergs_sysret); | 39 | PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); |
40 | PATCH_SITE(pv_cpu_ops, usergs_sysret32); | ||
41 | PATCH_SITE(pv_cpu_ops, usergs_sysret64); | ||
39 | PATCH_SITE(pv_cpu_ops, swapgs); | 42 | PATCH_SITE(pv_cpu_ops, swapgs); |
40 | PATCH_SITE(pv_mmu_ops, read_cr2); | 43 | PATCH_SITE(pv_mmu_ops, read_cr2); |
41 | PATCH_SITE(pv_mmu_ops, read_cr3); | 44 | PATCH_SITE(pv_mmu_ops, read_cr3); |
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h index 544836c96b61..ea9bd2635d59 100644 --- a/include/asm-x86/irqflags.h +++ b/include/asm-x86/irqflags.h | |||
@@ -112,9 +112,17 @@ static inline unsigned long __raw_local_irq_save(void) | |||
112 | 112 | ||
113 | #ifdef CONFIG_X86_64 | 113 | #ifdef CONFIG_X86_64 |
114 | #define INTERRUPT_RETURN iretq | 114 | #define INTERRUPT_RETURN iretq |
115 | #define USERGS_SYSRET \ | 115 | #define USERGS_SYSRET64 \ |
116 | swapgs; \ | 116 | swapgs; \ |
117 | sysretq; | 117 | sysretq; |
118 | #define USERGS_SYSRET32 \ | ||
119 | swapgs; \ | ||
120 | sysretl | ||
121 | #define ENABLE_INTERRUPTS_SYSEXIT32 \ | ||
122 | swapgs; \ | ||
123 | sti; \ | ||
124 | sysexit | ||
125 | |||
118 | #else | 126 | #else |
119 | #define INTERRUPT_RETURN iret | 127 | #define INTERRUPT_RETURN iret |
120 | #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit | 128 | #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit |
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index dad5b4186f51..33f72f8fe757 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h | |||
@@ -141,9 +141,32 @@ struct pv_cpu_ops { | |||
141 | u64 (*read_pmc)(int counter); | 141 | u64 (*read_pmc)(int counter); |
142 | unsigned long long (*read_tscp)(unsigned int *aux); | 142 | unsigned long long (*read_tscp)(unsigned int *aux); |
143 | 143 | ||
144 | /* These three are jmp to, not actually called. */ | 144 | /* |
145 | * Atomically enable interrupts and return to userspace. This | ||
146 | * is only ever used to return to 32-bit processes; in a | ||
147 | * 64-bit kernel, it's used for 32-on-64 compat processes, but | ||
148 | * never native 64-bit processes. (Jump, not call.) | ||
149 | */ | ||
145 | void (*irq_enable_sysexit)(void); | 150 | void (*irq_enable_sysexit)(void); |
146 | void (*usergs_sysret)(void); | 151 | |
152 | /* | ||
153 | * Switch to usermode gs and return to 64-bit usermode using | ||
154 | * sysret. Only used in 64-bit kernels to return to 64-bit | ||
155 | * processes. Usermode register state, including %rsp, must | ||
156 | * already be restored. | ||
157 | */ | ||
158 | void (*usergs_sysret64)(void); | ||
159 | |||
160 | /* | ||
161 | * Switch to usermode gs and return to 32-bit usermode using | ||
162 | * sysret. Used to return to 32-on-64 compat processes. | ||
163 | * Other usermode register state, including %esp, must already | ||
164 | * be restored. | ||
165 | */ | ||
166 | void (*usergs_sysret32)(void); | ||
167 | |||
168 | /* Normal iret. Jump to this with the standard iret stack | ||
169 | frame set up. */ | ||
147 | void (*iret)(void); | 170 | void (*iret)(void); |
148 | 171 | ||
149 | void (*swapgs)(void); | 172 | void (*swapgs)(void); |
@@ -1481,18 +1504,24 @@ static inline unsigned long __raw_local_irq_save(void) | |||
1481 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ | 1504 | call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ |
1482 | PV_RESTORE_REGS;) | 1505 | PV_RESTORE_REGS;) |
1483 | 1506 | ||
1484 | #define ENABLE_INTERRUPTS_SYSEXIT \ | 1507 | #define USERGS_SYSRET32 \ |
1485 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ | 1508 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \ |
1486 | CLBR_NONE, \ | 1509 | CLBR_NONE, \ |
1487 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) | 1510 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32)) |
1488 | |||
1489 | 1511 | ||
1490 | #ifdef CONFIG_X86_32 | 1512 | #ifdef CONFIG_X86_32 |
1491 | #define GET_CR0_INTO_EAX \ | 1513 | #define GET_CR0_INTO_EAX \ |
1492 | push %ecx; push %edx; \ | 1514 | push %ecx; push %edx; \ |
1493 | call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ | 1515 | call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ |
1494 | pop %edx; pop %ecx | 1516 | pop %edx; pop %ecx |
1495 | #else | 1517 | |
1518 | #define ENABLE_INTERRUPTS_SYSEXIT \ | ||
1519 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ | ||
1520 | CLBR_NONE, \ | ||
1521 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) | ||
1522 | |||
1523 | |||
1524 | #else /* !CONFIG_X86_32 */ | ||
1496 | #define SWAPGS \ | 1525 | #define SWAPGS \ |
1497 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ | 1526 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ |
1498 | PV_SAVE_REGS; \ | 1527 | PV_SAVE_REGS; \ |
@@ -1505,11 +1534,16 @@ static inline unsigned long __raw_local_irq_save(void) | |||
1505 | movq %rax, %rcx; \ | 1534 | movq %rax, %rcx; \ |
1506 | xorq %rax, %rax; | 1535 | xorq %rax, %rax; |
1507 | 1536 | ||
1508 | #define USERGS_SYSRET \ | 1537 | #define USERGS_SYSRET64 \ |
1509 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret), \ | 1538 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ |
1510 | CLBR_NONE, \ | 1539 | CLBR_NONE, \ |
1511 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret)) | 1540 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) |
1512 | #endif | 1541 | |
1542 | #define ENABLE_INTERRUPTS_SYSEXIT32 \ | ||
1543 | PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ | ||
1544 | CLBR_NONE, \ | ||
1545 | jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) | ||
1546 | #endif /* CONFIG_X86_32 */ | ||
1513 | 1547 | ||
1514 | #endif /* __ASSEMBLY__ */ | 1548 | #endif /* __ASSEMBLY__ */ |
1515 | #endif /* CONFIG_PARAVIRT */ | 1549 | #endif /* CONFIG_PARAVIRT */ |