diff options
author | Jeremy Fitzhardinge <jeremy@goop.org> | 2008-06-25 00:19:26 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-08 07:13:15 -0400 |
commit | d75cd22fdd5f7d203fb60014d426942df33dd9a6 (patch) | |
tree | 0613fca9d594eab9a0679f80510fa11b48b31571 /arch/x86/kernel | |
parent | e04e0a630d8b5c621b3a8e70ff20db737d3a5728 (diff) |
x86/paravirt: split sysret and sysexit
Don't conflate sysret and sysexit; they're different instructions with
different semantics, and may be in use at the same time (at least
within the same kernel, depending on whether its an Intel or AMD
system).
sysexit - just return to userspace, does no register restoration of
any kind; must explicitly atomically enable interrupts.
sysret - reloads flags from r11, so no need to explicitly enable
interrupts on 64-bit, responsible for restoring usermode %gs
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citirx.com>
Cc: xen-devel <xen-devel@lists.xensource.com>
Cc: Stephen Tweedie <sct@redhat.com>
Cc: Eduardo Habkost <ehabkost@redhat.com>
Cc: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/asm-offsets_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets_64.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 8 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 4 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt_patch_32.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt_patch_64.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/vmi_32.c | 4 |
8 files changed, 23 insertions, 17 deletions
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 92588083950f..6649d09ad88f 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -111,7 +111,7 @@ void foo(void) | |||
111 | OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); | 111 | OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); |
112 | OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); | 112 | OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); |
113 | OFFSET(PV_CPU_iret, pv_cpu_ops, iret); | 113 | OFFSET(PV_CPU_iret, pv_cpu_ops, iret); |
114 | OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); | 114 | OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); |
115 | OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); | 115 | OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); |
116 | #endif | 116 | #endif |
117 | 117 | ||
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index f126c05d6170..27ac2deca465 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -62,7 +62,7 @@ int main(void) | |||
62 | OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); | 62 | OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); |
63 | OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); | 63 | OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); |
64 | OFFSET(PV_CPU_iret, pv_cpu_ops, iret); | 64 | OFFSET(PV_CPU_iret, pv_cpu_ops, iret); |
65 | OFFSET(PV_CPU_irq_enable_syscall_ret, pv_cpu_ops, irq_enable_syscall_ret); | 65 | OFFSET(PV_CPU_usersp_sysret, pv_cpu_ops, usersp_sysret); |
66 | OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); | 66 | OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); |
67 | OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); | 67 | OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); |
68 | #endif | 68 | #endif |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 159a1c76d2bd..53393c306e11 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -58,7 +58,7 @@ | |||
58 | * for paravirtualization. The following will never clobber any registers: | 58 | * for paravirtualization. The following will never clobber any registers: |
59 | * INTERRUPT_RETURN (aka. "iret") | 59 | * INTERRUPT_RETURN (aka. "iret") |
60 | * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") | 60 | * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax") |
61 | * ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit"). | 61 | * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit"). |
62 | * | 62 | * |
63 | * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must | 63 | * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must |
64 | * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). | 64 | * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY). |
@@ -349,7 +349,7 @@ sysenter_past_esp: | |||
349 | xorl %ebp,%ebp | 349 | xorl %ebp,%ebp |
350 | TRACE_IRQS_ON | 350 | TRACE_IRQS_ON |
351 | 1: mov PT_FS(%esp), %fs | 351 | 1: mov PT_FS(%esp), %fs |
352 | ENABLE_INTERRUPTS_SYSCALL_RET | 352 | ENABLE_INTERRUPTS_SYSEXIT |
353 | CFI_ENDPROC | 353 | CFI_ENDPROC |
354 | .pushsection .fixup,"ax" | 354 | .pushsection .fixup,"ax" |
355 | 2: movl $0,PT_FS(%esp) | 355 | 2: movl $0,PT_FS(%esp) |
@@ -874,10 +874,10 @@ ENTRY(native_iret) | |||
874 | .previous | 874 | .previous |
875 | END(native_iret) | 875 | END(native_iret) |
876 | 876 | ||
877 | ENTRY(native_irq_enable_syscall_ret) | 877 | ENTRY(native_irq_enable_sysexit) |
878 | sti | 878 | sti |
879 | sysexit | 879 | sysexit |
880 | END(native_irq_enable_syscall_ret) | 880 | END(native_irq_enable_sysexit) |
881 | #endif | 881 | #endif |
882 | 882 | ||
883 | KPROBE_ENTRY(int3) | 883 | KPROBE_ENTRY(int3) |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 6d1101469e97..0056bc4c61a9 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -59,7 +59,7 @@ | |||
59 | #endif | 59 | #endif |
60 | 60 | ||
61 | #ifdef CONFIG_PARAVIRT | 61 | #ifdef CONFIG_PARAVIRT |
62 | ENTRY(native_irq_enable_syscall_ret) | 62 | ENTRY(native_usersp_sysret) |
63 | movq %gs:pda_oldrsp,%rsp | 63 | movq %gs:pda_oldrsp,%rsp |
64 | swapgs | 64 | swapgs |
65 | sysretq | 65 | sysretq |
@@ -275,7 +275,7 @@ sysret_check: | |||
275 | CFI_REGISTER rip,rcx | 275 | CFI_REGISTER rip,rcx |
276 | RESTORE_ARGS 0,-ARG_SKIP,1 | 276 | RESTORE_ARGS 0,-ARG_SKIP,1 |
277 | /*CFI_REGISTER rflags,r11*/ | 277 | /*CFI_REGISTER rflags,r11*/ |
278 | ENABLE_INTERRUPTS_SYSCALL_RET | 278 | USERSP_SYSRET |
279 | 279 | ||
280 | CFI_RESTORE_STATE | 280 | CFI_RESTORE_STATE |
281 | /* Handle reschedules */ | 281 | /* Handle reschedules */ |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 78c9a1b9e6b0..565ee7a990ea 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -140,7 +140,8 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, | |||
140 | /* If the operation is a nop, then nop the callsite */ | 140 | /* If the operation is a nop, then nop the callsite */ |
141 | ret = paravirt_patch_nop(); | 141 | ret = paravirt_patch_nop(); |
142 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || | 142 | else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || |
143 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret)) | 143 | type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || |
144 | type == PARAVIRT_PATCH(pv_cpu_ops.usersp_sysret)) | ||
144 | /* If operation requires a jmp, then jmp */ | 145 | /* If operation requires a jmp, then jmp */ |
145 | ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); | 146 | ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); |
146 | else | 147 | else |
@@ -191,7 +192,8 @@ static void native_flush_tlb_single(unsigned long addr) | |||
191 | 192 | ||
192 | /* These are in entry.S */ | 193 | /* These are in entry.S */ |
193 | extern void native_iret(void); | 194 | extern void native_iret(void); |
194 | extern void native_irq_enable_syscall_ret(void); | 195 | extern void native_irq_enable_sysexit(void); |
196 | extern void native_usersp_sysret(void); | ||
195 | 197 | ||
196 | static int __init print_banner(void) | 198 | static int __init print_banner(void) |
197 | { | 199 | { |
@@ -327,7 +329,11 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
327 | .write_idt_entry = native_write_idt_entry, | 329 | .write_idt_entry = native_write_idt_entry, |
328 | .load_sp0 = native_load_sp0, | 330 | .load_sp0 = native_load_sp0, |
329 | 331 | ||
330 | .irq_enable_syscall_ret = native_irq_enable_syscall_ret, | 332 | #ifdef CONFIG_X86_32 |
333 | .irq_enable_sysexit = native_irq_enable_sysexit, | ||
334 | #else | ||
335 | .usersp_sysret = native_usersp_sysret, | ||
336 | #endif | ||
331 | .iret = native_iret, | 337 | .iret = native_iret, |
332 | .swapgs = native_swapgs, | 338 | .swapgs = native_swapgs, |
333 | 339 | ||
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 82fc5fcab4f4..58262218781b 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c | |||
@@ -5,7 +5,7 @@ DEF_NATIVE(pv_irq_ops, irq_enable, "sti"); | |||
5 | DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); | 5 | DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf"); |
6 | DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); | 6 | DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax"); |
7 | DEF_NATIVE(pv_cpu_ops, iret, "iret"); | 7 | DEF_NATIVE(pv_cpu_ops, iret, "iret"); |
8 | DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "sti; sysexit"); | 8 | DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit"); |
9 | DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); | 9 | DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax"); |
10 | DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); | 10 | DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3"); |
11 | DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); | 11 | DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax"); |
@@ -29,7 +29,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | |||
29 | PATCH_SITE(pv_irq_ops, restore_fl); | 29 | PATCH_SITE(pv_irq_ops, restore_fl); |
30 | PATCH_SITE(pv_irq_ops, save_fl); | 30 | PATCH_SITE(pv_irq_ops, save_fl); |
31 | PATCH_SITE(pv_cpu_ops, iret); | 31 | PATCH_SITE(pv_cpu_ops, iret); |
32 | PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret); | 32 | PATCH_SITE(pv_cpu_ops, irq_enable_sysexit); |
33 | PATCH_SITE(pv_mmu_ops, read_cr2); | 33 | PATCH_SITE(pv_mmu_ops, read_cr2); |
34 | PATCH_SITE(pv_mmu_ops, read_cr3); | 34 | PATCH_SITE(pv_mmu_ops, read_cr3); |
35 | PATCH_SITE(pv_mmu_ops, write_cr3); | 35 | PATCH_SITE(pv_mmu_ops, write_cr3); |
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c index 7d904e138d7e..4a170552b852 100644 --- a/arch/x86/kernel/paravirt_patch_64.c +++ b/arch/x86/kernel/paravirt_patch_64.c | |||
@@ -15,7 +15,7 @@ DEF_NATIVE(pv_cpu_ops, clts, "clts"); | |||
15 | DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); | 15 | DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd"); |
16 | 16 | ||
17 | /* the three commands give us more control to how to return from a syscall */ | 17 | /* the three commands give us more control to how to return from a syscall */ |
18 | DEF_NATIVE(pv_cpu_ops, irq_enable_syscall_ret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;"); | 18 | DEF_NATIVE(pv_cpu_ops, usersp_sysret, "movq %gs:" __stringify(pda_oldrsp) ", %rsp; swapgs; sysretq;"); |
19 | DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); | 19 | DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); |
20 | 20 | ||
21 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | 21 | unsigned native_patch(u8 type, u16 clobbers, void *ibuf, |
@@ -35,7 +35,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, | |||
35 | PATCH_SITE(pv_irq_ops, irq_enable); | 35 | PATCH_SITE(pv_irq_ops, irq_enable); |
36 | PATCH_SITE(pv_irq_ops, irq_disable); | 36 | PATCH_SITE(pv_irq_ops, irq_disable); |
37 | PATCH_SITE(pv_cpu_ops, iret); | 37 | PATCH_SITE(pv_cpu_ops, iret); |
38 | PATCH_SITE(pv_cpu_ops, irq_enable_syscall_ret); | 38 | PATCH_SITE(pv_cpu_ops, usersp_sysret); |
39 | PATCH_SITE(pv_cpu_ops, swapgs); | 39 | PATCH_SITE(pv_cpu_ops, swapgs); |
40 | PATCH_SITE(pv_mmu_ops, read_cr2); | 40 | PATCH_SITE(pv_mmu_ops, read_cr2); |
41 | PATCH_SITE(pv_mmu_ops, read_cr3); | 41 | PATCH_SITE(pv_mmu_ops, read_cr3); |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 956f38927aa7..946bf13b44ab 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -151,7 +151,7 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, | |||
151 | insns, ip); | 151 | insns, ip); |
152 | case PARAVIRT_PATCH(pv_cpu_ops.iret): | 152 | case PARAVIRT_PATCH(pv_cpu_ops.iret): |
153 | return patch_internal(VMI_CALL_IRET, len, insns, ip); | 153 | return patch_internal(VMI_CALL_IRET, len, insns, ip); |
154 | case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_syscall_ret): | 154 | case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit): |
155 | return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip); | 155 | return patch_internal(VMI_CALL_SYSEXIT, len, insns, ip); |
156 | default: | 156 | default: |
157 | break; | 157 | break; |
@@ -896,7 +896,7 @@ static inline int __init activate_vmi(void) | |||
896 | * the backend. They are performance critical anyway, so requiring | 896 | * the backend. They are performance critical anyway, so requiring |
897 | * a patch is not a big problem. | 897 | * a patch is not a big problem. |
898 | */ | 898 | */ |
899 | pv_cpu_ops.irq_enable_syscall_ret = (void *)0xfeedbab0; | 899 | pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0; |
900 | pv_cpu_ops.iret = (void *)0xbadbab0; | 900 | pv_cpu_ops.iret = (void *)0xbadbab0; |
901 | 901 | ||
902 | #ifdef CONFIG_SMP | 902 | #ifdef CONFIG_SMP |