aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2008-06-25 00:19:28 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-08 07:15:52 -0400
commit2be29982a08009c731307f4a39053b70ac4700da (patch)
treef01fcf847f36b9da5cd87cefa7ee6c13d193077d
parentc7245da6ae7e5208504ff027c4e0eec69b788651 (diff)
x86/paravirt: add sysret/sysexit pvops for returning to 32-bit compatibility userspace
In a 64-bit system, we need separate sysret/sysexit operations to return to a 32-bit userspace. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citirx.com> Cc: xen-devel <xen-devel@lists.xensource.com> Cc: Stephen Tweedie <sct@redhat.com> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/ia32/ia32entry.S21
-rw-r--r--arch/x86/kernel/asm-offsets_64.c4
-rw-r--r--arch/x86/kernel/entry_64.S4
-rw-r--r--arch/x86/kernel/paravirt.c12
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c9
-rw-r--r--include/asm-x86/irqflags.h14
-rw-r--r--include/asm-x86/paravirt.h56
7 files changed, 89 insertions, 31 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 3aefbce2de48..2a4c42427d9a 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -61,6 +61,19 @@
61 CFI_UNDEFINED r15 61 CFI_UNDEFINED r15
62 .endm 62 .endm
63 63
64#ifdef CONFIG_PARAVIRT
65ENTRY(native_usergs_sysret32)
66 swapgs
67 sysretl
68ENDPROC(native_usergs_sysret32)
69
70ENTRY(native_irq_enable_sysexit)
71 swapgs
72 sti
73 sysexit
74ENDPROC(native_irq_enable_sysexit)
75#endif
76
64/* 77/*
65 * 32bit SYSENTER instruction entry. 78 * 32bit SYSENTER instruction entry.
66 * 79 *
@@ -151,10 +164,7 @@ sysenter_do_call:
151 CFI_ADJUST_CFA_OFFSET -8 164 CFI_ADJUST_CFA_OFFSET -8
152 CFI_REGISTER rsp,rcx 165 CFI_REGISTER rsp,rcx
153 TRACE_IRQS_ON 166 TRACE_IRQS_ON
154 swapgs 167 ENABLE_INTERRUPTS_SYSEXIT32
155 sti /* sti only takes effect after the next instruction */
156 /* sysexit */
157 .byte 0xf, 0x35
158 168
159sysenter_tracesys: 169sysenter_tracesys:
160 CFI_RESTORE_STATE 170 CFI_RESTORE_STATE
@@ -254,8 +264,7 @@ cstar_do_call:
254 TRACE_IRQS_ON 264 TRACE_IRQS_ON
255 movl RSP-ARGOFFSET(%rsp),%esp 265 movl RSP-ARGOFFSET(%rsp),%esp
256 CFI_RESTORE rsp 266 CFI_RESTORE rsp
257 swapgs 267 USERGS_SYSRET32
258 sysretl
259 268
260cstar_tracesys: 269cstar_tracesys:
261 CFI_RESTORE_STATE 270 CFI_RESTORE_STATE
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index a19aba8c5bb3..06c451af979a 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -62,7 +62,9 @@ int main(void)
62 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); 62 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
63 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); 63 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
64 OFFSET(PV_CPU_iret, pv_cpu_ops, iret); 64 OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
65 OFFSET(PV_CPU_usergs_sysret, pv_cpu_ops, usergs_sysret); 65 OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
66 OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
67 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
66 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); 68 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
67 OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); 69 OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
68#endif 70#endif
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 18447a373fbd..880ffe510a11 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -59,7 +59,7 @@
59#endif 59#endif
60 60
61#ifdef CONFIG_PARAVIRT 61#ifdef CONFIG_PARAVIRT
62ENTRY(native_usergs_sysret) 62ENTRY(native_usergs_sysret64)
63 swapgs 63 swapgs
64 sysretq 64 sysretq
65#endif /* CONFIG_PARAVIRT */ 65#endif /* CONFIG_PARAVIRT */
@@ -275,7 +275,7 @@ sysret_check:
275 RESTORE_ARGS 0,-ARG_SKIP,1 275 RESTORE_ARGS 0,-ARG_SKIP,1
276 /*CFI_REGISTER rflags,r11*/ 276 /*CFI_REGISTER rflags,r11*/
277 movq %gs:pda_oldrsp, %rsp 277 movq %gs:pda_oldrsp, %rsp
278 USERGS_SYSRET 278 USERGS_SYSRET64
279 279
280 CFI_RESTORE_STATE 280 CFI_RESTORE_STATE
281 /* Handle reschedules */ 281 /* Handle reschedules */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index b0b17f0bc7e9..bf1067e89cad 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -141,7 +141,8 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
141 ret = paravirt_patch_nop(); 141 ret = paravirt_patch_nop();
142 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || 142 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
143 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || 143 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
144 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret)) 144 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
145 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
145 /* If operation requires a jmp, then jmp */ 146 /* If operation requires a jmp, then jmp */
146 ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); 147 ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
147 else 148 else
@@ -193,7 +194,8 @@ static void native_flush_tlb_single(unsigned long addr)
193/* These are in entry.S */ 194/* These are in entry.S */
194extern void native_iret(void); 195extern void native_iret(void);
195extern void native_irq_enable_sysexit(void); 196extern void native_irq_enable_sysexit(void);
196extern void native_usergs_sysret(void); 197extern void native_usergs_sysret32(void);
198extern void native_usergs_sysret64(void);
197 199
198static int __init print_banner(void) 200static int __init print_banner(void)
199{ 201{
@@ -329,10 +331,10 @@ struct pv_cpu_ops pv_cpu_ops = {
329 .write_idt_entry = native_write_idt_entry, 331 .write_idt_entry = native_write_idt_entry,
330 .load_sp0 = native_load_sp0, 332 .load_sp0 = native_load_sp0,
331 333
332#ifdef CONFIG_X86_32
333 .irq_enable_sysexit = native_irq_enable_sysexit, 334 .irq_enable_sysexit = native_irq_enable_sysexit,
334#else 335#ifdef CONFIG_X86_64
335 .usergs_sysret = native_usergs_sysret, 336 .usergs_sysret32 = native_usergs_sysret32,
337 .usergs_sysret64 = native_usergs_sysret64,
336#endif 338#endif
337 .iret = native_iret, 339 .iret = native_iret,
338 .swapgs = native_swapgs, 340 .swapgs = native_swapgs,
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index d4c0712a3e64..061d01df9ae6 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -14,8 +14,9 @@ DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
14DEF_NATIVE(pv_cpu_ops, clts, "clts"); 14DEF_NATIVE(pv_cpu_ops, clts, "clts");
15DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); 15DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
16 16
17/* the three commands give us more control to how to return from a syscall */ 17DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "swapgs; sti; sysexit");
18DEF_NATIVE(pv_cpu_ops, usergs_sysret, "swapgs; sysretq;"); 18DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
19DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
19DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); 20DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
20 21
21unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 22unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
@@ -35,7 +36,9 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
35 PATCH_SITE(pv_irq_ops, irq_enable); 36 PATCH_SITE(pv_irq_ops, irq_enable);
36 PATCH_SITE(pv_irq_ops, irq_disable); 37 PATCH_SITE(pv_irq_ops, irq_disable);
37 PATCH_SITE(pv_cpu_ops, iret); 38 PATCH_SITE(pv_cpu_ops, iret);
38 PATCH_SITE(pv_cpu_ops, usergs_sysret); 39 PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
40 PATCH_SITE(pv_cpu_ops, usergs_sysret32);
41 PATCH_SITE(pv_cpu_ops, usergs_sysret64);
39 PATCH_SITE(pv_cpu_ops, swapgs); 42 PATCH_SITE(pv_cpu_ops, swapgs);
40 PATCH_SITE(pv_mmu_ops, read_cr2); 43 PATCH_SITE(pv_mmu_ops, read_cr2);
41 PATCH_SITE(pv_mmu_ops, read_cr3); 44 PATCH_SITE(pv_mmu_ops, read_cr3);
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index 544836c96b61..ea9bd2635d59 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -112,9 +112,17 @@ static inline unsigned long __raw_local_irq_save(void)
112 112
113#ifdef CONFIG_X86_64 113#ifdef CONFIG_X86_64
114#define INTERRUPT_RETURN iretq 114#define INTERRUPT_RETURN iretq
115#define USERGS_SYSRET \ 115#define USERGS_SYSRET64 \
116 swapgs; \ 116 swapgs; \
117 sysretq; 117 sysretq;
118#define USERGS_SYSRET32 \
119 swapgs; \
120 sysretl
121#define ENABLE_INTERRUPTS_SYSEXIT32 \
122 swapgs; \
123 sti; \
124 sysexit
125
118#else 126#else
119#define INTERRUPT_RETURN iret 127#define INTERRUPT_RETURN iret
120#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit 128#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index dad5b4186f51..33f72f8fe757 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -141,9 +141,32 @@ struct pv_cpu_ops {
141 u64 (*read_pmc)(int counter); 141 u64 (*read_pmc)(int counter);
142 unsigned long long (*read_tscp)(unsigned int *aux); 142 unsigned long long (*read_tscp)(unsigned int *aux);
143 143
144 /* These three are jmp to, not actually called. */ 144 /*
145 * Atomically enable interrupts and return to userspace. This
146 * is only ever used to return to 32-bit processes; in a
147 * 64-bit kernel, it's used for 32-on-64 compat processes, but
148 * never native 64-bit processes. (Jump, not call.)
149 */
145 void (*irq_enable_sysexit)(void); 150 void (*irq_enable_sysexit)(void);
146 void (*usergs_sysret)(void); 151
152 /*
153 * Switch to usermode gs and return to 64-bit usermode using
154 * sysret. Only used in 64-bit kernels to return to 64-bit
155 * processes. Usermode register state, including %rsp, must
156 * already be restored.
157 */
158 void (*usergs_sysret64)(void);
159
160 /*
161 * Switch to usermode gs and return to 32-bit usermode using
162 * sysret. Used to return to 32-on-64 compat processes.
163 * Other usermode register state, including %esp, must already
164 * be restored.
165 */
166 void (*usergs_sysret32)(void);
167
168 /* Normal iret. Jump to this with the standard iret stack
169 frame set up. */
147 void (*iret)(void); 170 void (*iret)(void);
148 171
149 void (*swapgs)(void); 172 void (*swapgs)(void);
@@ -1481,18 +1504,24 @@ static inline unsigned long __raw_local_irq_save(void)
1481 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ 1504 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
1482 PV_RESTORE_REGS;) 1505 PV_RESTORE_REGS;)
1483 1506
1484#define ENABLE_INTERRUPTS_SYSEXIT \ 1507#define USERGS_SYSRET32 \
1485 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \ 1508 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32), \
1486 CLBR_NONE, \ 1509 CLBR_NONE, \
1487 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit)) 1510 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret32))
1488
1489 1511
1490#ifdef CONFIG_X86_32 1512#ifdef CONFIG_X86_32
1491#define GET_CR0_INTO_EAX \ 1513#define GET_CR0_INTO_EAX \
1492 push %ecx; push %edx; \ 1514 push %ecx; push %edx; \
1493 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ 1515 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
1494 pop %edx; pop %ecx 1516 pop %edx; pop %ecx
1495#else 1517
1518#define ENABLE_INTERRUPTS_SYSEXIT \
1519 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
1520 CLBR_NONE, \
1521 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
1522
1523
1524#else /* !CONFIG_X86_32 */
1496#define SWAPGS \ 1525#define SWAPGS \
1497 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ 1526 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
1498 PV_SAVE_REGS; \ 1527 PV_SAVE_REGS; \
@@ -1505,11 +1534,16 @@ static inline unsigned long __raw_local_irq_save(void)
1505 movq %rax, %rcx; \ 1534 movq %rax, %rcx; \
1506 xorq %rax, %rax; 1535 xorq %rax, %rax;
1507 1536
1508#define USERGS_SYSRET \ 1537#define USERGS_SYSRET64 \
1509 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret), \ 1538 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
1510 CLBR_NONE, \ 1539 CLBR_NONE, \
1511 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret)) 1540 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
1512#endif 1541
1542#define ENABLE_INTERRUPTS_SYSEXIT32 \
1543 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
1544 CLBR_NONE, \
1545 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
1546#endif /* CONFIG_X86_32 */
1513 1547
1514#endif /* __ASSEMBLY__ */ 1548#endif /* __ASSEMBLY__ */
1515#endif /* CONFIG_PARAVIRT */ 1549#endif /* CONFIG_PARAVIRT */