aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy@goop.org>2008-06-25 00:19:26 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-08 07:13:15 -0400
commitd75cd22fdd5f7d203fb60014d426942df33dd9a6 (patch)
tree0613fca9d594eab9a0679f80510fa11b48b31571
parente04e0a630d8b5c621b3a8e70ff20db737d3a5728 (diff)
x86/paravirt: split sysret and sysexit
Don't conflate sysret and sysexit; they're different instructions with different semantics, and may be in use at the same time (at least within the same kernel, depending on whether its an Intel or AMD system). sysexit - just return to userspace, does no register restoration of any kind; must explicitly atomically enable interrupts. sysret - reloads flags from r11, so no need to explicitly enable interrupts on 64-bit, responsible for restoring usermode %gs Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citirx.com> Cc: xen-devel <xen-devel@lists.xensource.com> Cc: Stephen Tweedie <sct@redhat.com> Cc: Eduardo Habkost <ehabkost@redhat.com> Cc: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/asm-offsets_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets_64.c2
-rw-r--r--arch/x86/kernel/entry_32.S8
-rw-r--r--arch/x86/kernel/entry_64.S4
-rw-r--r--arch/x86/kernel/paravirt.c12
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c4
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c4
-rw-r--r--arch/x86/kernel/vmi_32.c4
-rw-r--r--arch/x86/xen/enlighten.c2
-rw-r--r--include/asm-x86/irqflags.h4
-rw-r--r--include/asm-x86/paravirt.h15
11 files changed, 36 insertions, 25 deletions
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 92588083950f..6649d09ad88f 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -111,7 +111,7 @@ void foo(void)
111 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); 111 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
112 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); 112 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
113 OFFSET(PV_CPU_iret, pv_cpu_ops, iret); 113 OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
114 OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); 114 OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
115 OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); 115 OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
116#endif 116#endif
117 117
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index f126c05d6170..27ac2deca465 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -62,7 +62,7 @@ int main(void)
62 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); 62 OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
63 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); 63 OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
64 OFFSET(PV_CPU_iret, pv_cpu_ops, iret); 64 OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
65 OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); 65 OFFSET(PV_CPU_usersp_sysret, pv_cpu_ops, usersp_sysret);
66 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); 66 OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
67 OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); 67 OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
68#endif 68#endif
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 159a1c76d2bd..53393c306e11 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -58,7 +58,7 @@
58 * for paravirtualization. The following will never clobber any registers: 58 * for paravirtualization. The following will never clobber any registers:
59 * INTERRUPT_RETURN (aka. "iret") 59 * INTERRUPT_RETURN (aka. "iret")
60 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") 60 * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
61 * ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit"). 61 * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
62 * 62 *
63 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must 63 * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
64 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). 64 * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
@@ -349,7 +349,7 @@ sysenter_past_esp:
349 xorl %ebp,%ebp 349 xorl %ebp,%ebp
350 TRACE_IRQS_ON 350 TRACE_IRQS_ON
3511: mov PT_FS(%esp), %fs 3511: mov PT_FS(%esp), %fs
352 ENABLE_INTERRUPTS_SYSCALL_RET 352 ENABLE_INTERRUPTS_SYSEXIT
353 CFI_ENDPROC 353 CFI_ENDPROC
354.pushsection .fixup,"ax" 354.pushsection .fixup,"ax"
3552: movl $0,PT_FS(%esp) 3552: movl $0,PT_FS(%esp)
@@ -874,10 +874,10 @@ ENTRY(native_iret)
874.previous 874.previous
875END(native_iret) 875END(native_iret)
876 876
877ENTRY(native_irq_enable_syscall_ret) 877ENTRY(native_irq_enable_sysexit)
878 sti 878 sti
879 sysexit 879 sysexit
880END(native_irq_enable_syscall_ret) 880END(native_irq_enable_sysexit)
881#endif 881#endif
882 882
883KPROBE_ENTRY(int3) 883KPROBE_ENTRY(int3)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 6d1101469e97..0056bc4c61a9 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -59,7 +59,7 @@
59#endif 59#endif
60 60
61#ifdef CONFIG_PARAVIRT 61#ifdef CONFIG_PARAVIRT
62ENTRY(native_irq_enable_syscall_ret) 62ENTRY(native_usersp_sysret)
63 movq %gs:pda_oldrsp,%rsp 63 movq %gs:pda_oldrsp,%rsp
64 swapgs 64 swapgs
65 sysretq 65 sysretq
@@ -275,7 +275,7 @@ sysret_check:
275 CFI_REGISTER rip,rcx 275 CFI_REGISTER rip,rcx
276 RESTORE_ARGS 0,-ARG_SKIP,1 276 RESTORE_ARGS 0,-ARG_SKIP,1
277 /*CFI_REGISTER rflags,r11*/ 277 /*CFI_REGISTER rflags,r11*/
278 ENABLE_INTERRUPTS_SYSCALL_RET 278 USERSP_SYSRET
279 279
280 CFI_RESTORE_STATE 280 CFI_RESTORE_STATE
281 /* Handle reschedules */ 281 /* Handle reschedules */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 78c9a1b9e6b0..565ee7a990ea 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -140,7 +140,8 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
140 /* If the operation is a nop, then nop the callsite */ 140 /* If the operation is a nop, then nop the callsite */
141 ret = paravirt_patch_nop(); 141 ret = paravirt_patch_nop();
142 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || 142 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
143 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret)) 143 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
144 type == PARAVIRT_PATCH(pv_cpu_ops.usersp_sysret))
144 /* If operation requires a jmp, then jmp */ 145 /* If operation requires a jmp, then jmp */
145 ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); 146 ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
146 else 147 else
@@ -191,7 +192,8 @@ static void native_flush_tlb_single(unsigned long addr)
191 192
192/* These are in entry.S */ 193/* These are in entry.S */
193extern void native_iret(void); 194extern void native_iret(void);
194extern void native_irq_enable_syscall_ret(void); 195extern void native_irq_enable_sysexit(void);
196extern void native_usersp_sysret(void);
195 197
196static int __init print_banner(void) 198static int __init print_banner(void)
197{ 199{
@@ -327,7 +329,11 @@ struct pv_cpu_ops pv_cpu_ops = {
327 .write_idt_entry = native_write_idt_entry, 329 .write_idt_entry = native_write_idt_entry,
328 .load_sp0 = native_load_sp0, 330 .load_sp0 = native_load_sp0,
329 331
330 .irq_enable_syscall_ret = native_irq_enable_syscall_ret, 332#ifdef CONFIG_X86_32
333 .irq_enable_sysexit = native_irq_enable_sysexit,
334#else
335 .usersp_sysret = native_usersp_sysret,
336#endif
331 .iret = native_iret, 337 .iret = native_iret,
332 .swapgs = native_swapgs, 338 .swapgs = native_swapgs,
333 339
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 82fc5fcab4f4..58262218781b 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -5,7 +5,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
5DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); 5DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
6DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); 6DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
7DEF_NATIVE(pv_cpu_ops, iret, "iret"); 7DEF_NATIVE(pv_cpu_ops, iret, "iret");
8DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit"); 8DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
9DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); 9DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
10DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); 10DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
11DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); 11DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
@@ -29,7 +29,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
29 PATCH_SITE(pv_irq_ops, restore_fl); 29 PATCH_SITE(pv_irq_ops, restore_fl);
30 PATCH_SITE(pv_irq_ops, save_fl); 30 PATCH_SITE(pv_irq_ops, save_fl);
31 PATCH_SITE(pv_cpu_ops, iret); 31 PATCH_SITE(pv_cpu_ops, iret);
32 PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret); 32 PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
33 PATCH_SITE(pv_mmu_ops, read_cr2); 33 PATCH_SITE(pv_mmu_ops, read_cr2);
34 PATCH_SITE(pv_mmu_ops, read_cr3); 34 PATCH_SITE(pv_mmu_ops, read_cr3);
35 PATCH_SITE(pv_mmu_ops, write_cr3); 35 PATCH_SITE(pv_mmu_ops, write_cr3);
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 7d904e138d7e..4a170552b852 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -15,7 +15,7 @@ DEF_NATIVE(pv_cpu_ops, clts, "clts");
15DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); 15DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
16 16
17/* the three commands give us more control to how to return from a syscall */ 17/* the three commands give us more control to how to return from a syscall */
18DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;"); 18DEF_NATIVE(pv_cpu_ops, usersp_sysret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;");
19DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); 19DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
20 20
21unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 21unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
@@ -35,7 +35,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
35 PATCH_SITE(pv_irq_ops, irq_enable); 35 PATCH_SITE(pv_irq_ops, irq_enable);
36 PATCH_SITE(pv_irq_ops, irq_disable); 36 PATCH_SITE(pv_irq_ops, irq_disable);
37 PATCH_SITE(pv_cpu_ops, iret); 37 PATCH_SITE(pv_cpu_ops, iret);
38 PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret); 38 PATCH_SITE(pv_cpu_ops, usersp_sysret);
39 PATCH_SITE(pv_cpu_ops, swapgs); 39 PATCH_SITE(pv_cpu_ops, swapgs);
40 PATCH_SITE(pv_mmu_ops, read_cr2); 40 PATCH_SITE(pv_mmu_ops, read_cr2);
41 PATCH_SITE(pv_mmu_ops, read_cr3); 41 PATCH_SITE(pv_mmu_ops, read_cr3);
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 956f38927aa7..946bf13b44ab 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -151,7 +151,7 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
151 insns, ip); 151 insns, ip);
152 case PARAVIRT_PATCH(pv_cpu_ops.iret): 152 case PARAVIRT_PATCH(pv_cpu_ops.iret):
153 return patch_internal(VMI_CALL_IRET, len, insns, ip); 153 return patch_internal(VMI_CALL_IRET, len, insns, ip);
154 case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret): 154 case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
155 return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip); 155 return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip);
156 default: 156 default:
157 break; 157 break;
@@ -896,7 +896,7 @@ static inline int __init activate_vmi(void)
896 * the backend. They are performance critical anyway, so requiring 896 * the backend. They are performance critical anyway, so requiring
897 * a patch is not a big problem. 897 * a patch is not a big problem.
898 */ 898 */
899 pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0; 899 pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
900 pv_cpu_ops.iret = (void *)0xbadbab0; 900 pv_cpu_ops.iret = (void *)0xbadbab0;
901 901
902#ifdef CONFIG_SMP 902#ifdef CONFIG_SMP
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d62f14e20708..119c88fa769d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1089,7 +1089,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
1089 .read_pmc = native_read_pmc, 1089 .read_pmc = native_read_pmc,
1090 1090
1091 .iret = xen_iret, 1091 .iret = xen_iret,
1092 .irq_enable_syscall_ret = xen_sysexit, 1092 .irq_enable_sysexit = xen_sysexit,
1093 1093
1094 .load_tr_desc = paravirt_nop, 1094 .load_tr_desc = paravirt_nop,
1095 .set_ldt = xen_set_ldt, 1095 .set_ldt = xen_set_ldt,
diff --git a/include/asm-x86/irqflags.h b/include/asm-x86/irqflags.h
index c242527f970e..99ee5256a7e3 100644
--- a/include/asm-x86/irqflags.h
+++ b/include/asm-x86/irqflags.h
@@ -112,13 +112,13 @@ static inline unsigned long __raw_local_irq_save(void)
112 112
113#ifdef CONFIG_X86_64 113#ifdef CONFIG_X86_64
114#define INTERRUPT_RETURN iretq 114#define INTERRUPT_RETURN iretq
115#define ENABLE_INTERRUPTS_SYSCALL_RET \ 115#define USERSP_SYSRET \
116 movq %gs:pda_oldrsp, %rsp; \ 116 movq %gs:pda_oldrsp, %rsp; \
117 swapgs; \ 117 swapgs; \
118 sysretq; 118 sysretq;
119#else 119#else
120#define INTERRUPT_RETURN iret 120#define INTERRUPT_RETURN iret
121#define ENABLE_INTERRUPTS_SYSCALL_RET sti; sysexit 121#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
122#define GET_CR0_INTO_EAX movl %cr0, %eax 122#define GET_CR0_INTO_EAX movl %cr0, %eax
123#endif 123#endif
124 124
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 82cdcde4b222..2668903b70f5 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -141,8 +141,9 @@ struct pv_cpu_ops {
141 u64 (*read_pmc)(int counter); 141 u64 (*read_pmc)(int counter);
142 unsigned long long (*read_tscp)(unsigned int *aux); 142 unsigned long long (*read_tscp)(unsigned int *aux);
143 143
144 /* These two are jmp to, not actually called. */ 144 /* These three are jmp to, not actually called. */
145 void (*irq_enable_syscall_ret)(void); 145 void (*irq_enable_sysexit)(void);
146 void (*usersp_sysret)(void);
146 void (*iret)(void); 147 void (*iret)(void);
147 148
148 void (*swapgs)(void); 149 void (*swapgs)(void);
@@ -1480,10 +1481,10 @@ static inline unsigned long __raw_local_irq_save(void)
1480 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ 1481 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
1481 PV_RESTORE_REGS;) 1482 PV_RESTORE_REGS;)
1482 1483
1483#define ENABLE_INTERRUPTS_SYSCALL_RET \ 1484#define ENABLE_INTERRUPTS_SYSEXIT \
1484 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_syscall_ret),\ 1485 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
1485 CLBR_NONE, \ 1486 CLBR_NONE, \
1486 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_syscall_ret)) 1487 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
1487 1488
1488 1489
1489#ifdef CONFIG_X86_32 1490#ifdef CONFIG_X86_32
@@ -1504,6 +1505,10 @@ static inline unsigned long __raw_local_irq_save(void)
1504 movq %rax, %rcx; \ 1505 movq %rax, %rcx; \
1505 xorq %rax, %rax; 1506 xorq %rax, %rax;
1506 1507
1508#define USERSP_SYSRET \
1509 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usersp_sysret), \
1510 CLBR_NONE, \
1511 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usersp_sysret))
1507#endif 1512#endif
1508 1513
1509#endif /* __ASSEMBLY__ */ 1514#endif /* __ASSEMBLY__ */