aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/ia32
diff options
context:
space:
mode:
authorDenys Vlasenko <dvlasenk@redhat.com>2015-02-26 17:40:32 -0500
committerIngo Molnar <mingo@kernel.org>2015-03-04 16:50:51 -0500
commitb87cf63e2a5fbe3b368d5f5e5708e585b0fb3f84 (patch)
treea99e7a8ebef93d88347bc931fe2cb5b0ea5d47eb /arch/x86/ia32
parent050273d19b94f2adf9d35979cee949d6b6a9df84 (diff)
x86/asm/entry: Add comments about various syscall instructions
SYSCALL/SYSRET and SYSENTER/SYSEXIT have weird semantics. Moreover, they differ in 32- and 64-bit mode. What is saved? What is not? Is rsp set? Are interrupts disabled? People tend to not remember these details well enough. This patch adds comments which explain in detail what registers are modified by each of these instructions. The comments are placed immediately before corresponding entry and exit points. Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com> Signed-off-by: Andy Lutomirski <luto@amacapital.net> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Kees Cook <keescook@chromium.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Will Drewry <wad@chromium.org> Link: http://lkml.kernel.org/r/a94b98b63527797c871a81402ff5060b18fa880a.1424989793.git.luto@amacapital.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/ia32')
-rw-r--r--arch/x86/ia32/ia32entry.S133
1 files changed, 83 insertions, 50 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index e99f8a5be2df..b5670564a1fb 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -99,22 +99,25 @@ ENDPROC(native_irq_enable_sysexit)
99/* 99/*
100 * 32bit SYSENTER instruction entry. 100 * 32bit SYSENTER instruction entry.
101 * 101 *
102 * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
103 * IF and VM in rflags are cleared (IOW: interrupts are off).
104 * SYSENTER does not save anything on the stack,
105 * and does not save old rip (!!!) and rflags.
106 *
102 * Arguments: 107 * Arguments:
103 * %eax System call number. 108 * eax system call number
104 * %ebx Arg1 109 * ebx arg1
105 * %ecx Arg2 110 * ecx arg2
106 * %edx Arg3 111 * edx arg3
107 * %esi Arg4 112 * esi arg4
108 * %edi Arg5 113 * edi arg5
109 * %ebp user stack 114 * ebp user stack
110 * 0(%ebp) Arg6 115 * 0(%ebp) arg6
111 * 116 *
112 * Interrupts off.
113 *
114 * This is purely a fast path. For anything complicated we use the int 0x80 117 * This is purely a fast path. For anything complicated we use the int 0x80
115 * path below. Set up a complete hardware stack frame to share code 118 * path below. We set up a complete hardware stack frame to share code
116 * with the int 0x80 path. 119 * with the int 0x80 path.
117 */ 120 */
118ENTRY(ia32_sysenter_target) 121ENTRY(ia32_sysenter_target)
119 CFI_STARTPROC32 simple 122 CFI_STARTPROC32 simple
120 CFI_SIGNAL_FRAME 123 CFI_SIGNAL_FRAME
@@ -128,6 +131,7 @@ ENTRY(ia32_sysenter_target)
128 * disabled irqs, here we enable it straight after entry: 131 * disabled irqs, here we enable it straight after entry:
129 */ 132 */
130 ENABLE_INTERRUPTS(CLBR_NONE) 133 ENABLE_INTERRUPTS(CLBR_NONE)
134 /* Construct iret frame (ss,rsp,rflags,cs,rip) */
131 movl %ebp,%ebp /* zero extension */ 135 movl %ebp,%ebp /* zero extension */
132 pushq_cfi $__USER32_DS 136 pushq_cfi $__USER32_DS
133 /*CFI_REL_OFFSET ss,0*/ 137 /*CFI_REL_OFFSET ss,0*/
@@ -140,14 +144,19 @@ ENTRY(ia32_sysenter_target)
140 pushq_cfi $__USER32_CS 144 pushq_cfi $__USER32_CS
141 /*CFI_REL_OFFSET cs,0*/ 145 /*CFI_REL_OFFSET cs,0*/
142 movl %eax, %eax 146 movl %eax, %eax
147 /* Store thread_info->sysenter_return in rip stack slot */
143 pushq_cfi %r10 148 pushq_cfi %r10
144 CFI_REL_OFFSET rip,0 149 CFI_REL_OFFSET rip,0
150 /* Store orig_ax */
145 pushq_cfi %rax 151 pushq_cfi %rax
152 /* Construct the rest of "struct pt_regs" */
146 cld 153 cld
147 ALLOC_PT_GPREGS_ON_STACK 154 ALLOC_PT_GPREGS_ON_STACK
148 SAVE_C_REGS_EXCEPT_R891011 155 SAVE_C_REGS_EXCEPT_R891011
149 /* no need to do an access_ok check here because rbp has been 156 /*
150 32bit zero extended */ 157 * no need to do an access_ok check here because rbp has been
158 * 32bit zero extended
159 */
151 ASM_STAC 160 ASM_STAC
1521: movl (%rbp),%ebp 1611: movl (%rbp),%ebp
153 _ASM_EXTABLE(1b,ia32_badarg) 162 _ASM_EXTABLE(1b,ia32_badarg)
@@ -184,6 +193,7 @@ sysexit_from_sys_call:
184 movl RIP(%rsp),%edx /* User %eip */ 193 movl RIP(%rsp),%edx /* User %eip */
185 CFI_REGISTER rip,rdx 194 CFI_REGISTER rip,rdx
186 RESTORE_RSI_RDI 195 RESTORE_RSI_RDI
196 /* pop everything except ss,rsp,rflags slots */
187 REMOVE_PT_GPREGS_FROM_STACK 3*8 197 REMOVE_PT_GPREGS_FROM_STACK 3*8
188 xorq %r8,%r8 198 xorq %r8,%r8
189 xorq %r9,%r9 199 xorq %r9,%r9
@@ -194,6 +204,10 @@ sysexit_from_sys_call:
194 popq_cfi %rcx /* User %esp */ 204 popq_cfi %rcx /* User %esp */
195 CFI_REGISTER rsp,rcx 205 CFI_REGISTER rsp,rcx
196 TRACE_IRQS_ON 206 TRACE_IRQS_ON
207 /*
208 * 32bit SYSEXIT restores eip from edx, esp from ecx.
209 * cs and ss are loaded from MSRs.
210 */
197 ENABLE_INTERRUPTS_SYSEXIT32 211 ENABLE_INTERRUPTS_SYSEXIT32
198 212
199 CFI_RESTORE_STATE 213 CFI_RESTORE_STATE
@@ -274,23 +288,33 @@ ENDPROC(ia32_sysenter_target)
274/* 288/*
275 * 32bit SYSCALL instruction entry. 289 * 32bit SYSCALL instruction entry.
276 * 290 *
291 * 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
292 * then loads new ss, cs, and rip from previously programmed MSRs.
293 * rflags gets masked by a value from another MSR (so CLD and CLAC
294 * are not needed). SYSCALL does not save anything on the stack
295 * and does not change rsp.
296 *
297 * Note: rflags saving+masking-with-MSR happens only in Long mode
298 * (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it).
299 * Don't get confused: rflags saving+masking depends on Long Mode Active bit
300 * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
301 * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
302 *
277 * Arguments: 303 * Arguments:
278 * %eax System call number. 304 * eax system call number
279 * %ebx Arg1 305 * ecx return address
280 * %ecx return EIP 306 * ebx arg1
281 * %edx Arg3 307 * ebp arg2 (note: not saved in the stack frame, should not be touched)
282 * %esi Arg4 308 * edx arg3
283 * %edi Arg5 309 * esi arg4
284 * %ebp Arg2 [note: not saved in the stack frame, should not be touched] 310 * edi arg5
285 * %esp user stack 311 * esp user stack
286 * 0(%esp) Arg6 312 * 0(%esp) arg6
287 * 313 *
288 * Interrupts off.
289 *
290 * This is purely a fast path. For anything complicated we use the int 0x80 314 * This is purely a fast path. For anything complicated we use the int 0x80
291 * path below. Set up a complete hardware stack frame to share code 315 * path below. We set up a complete hardware stack frame to share code
292 * with the int 0x80 path. 316 * with the int 0x80 path.
293 */ 317 */
294ENTRY(ia32_cstar_target) 318ENTRY(ia32_cstar_target)
295 CFI_STARTPROC32 simple 319 CFI_STARTPROC32 simple
296 CFI_SIGNAL_FRAME 320 CFI_SIGNAL_FRAME
@@ -306,7 +330,7 @@ ENTRY(ia32_cstar_target)
306 * disabled irqs and here we enable it straight after entry: 330 * disabled irqs and here we enable it straight after entry:
307 */ 331 */
308 ENABLE_INTERRUPTS(CLBR_NONE) 332 ENABLE_INTERRUPTS(CLBR_NONE)
309 ALLOC_PT_GPREGS_ON_STACK 8 333 ALLOC_PT_GPREGS_ON_STACK 8 /* +8: space for orig_ax */
310 SAVE_C_REGS_EXCEPT_RCX_R891011 334 SAVE_C_REGS_EXCEPT_RCX_R891011
311 movl %eax,%eax /* zero extension */ 335 movl %eax,%eax /* zero extension */
312 movq %rax,ORIG_RAX(%rsp) 336 movq %rax,ORIG_RAX(%rsp)
@@ -320,9 +344,11 @@ ENTRY(ia32_cstar_target)
320 /*CFI_REL_OFFSET rflags,EFLAGS*/ 344 /*CFI_REL_OFFSET rflags,EFLAGS*/
321 movq %r8,RSP(%rsp) 345 movq %r8,RSP(%rsp)
322 CFI_REL_OFFSET rsp,RSP 346 CFI_REL_OFFSET rsp,RSP
323 /* no need to do an access_ok check here because r8 has been 347 /* iret stack frame is complete now */
324 32bit zero extended */ 348 /*
325 /* hardware stack frame is complete now */ 349 * no need to do an access_ok check here because r8 has been
350 * 32bit zero extended
351 */
326 ASM_STAC 352 ASM_STAC
3271: movl (%r8),%r9d 3531: movl (%r8),%r9d
328 _ASM_EXTABLE(1b,ia32_badarg) 354 _ASM_EXTABLE(1b,ia32_badarg)
@@ -355,8 +381,15 @@ sysretl_from_sys_call:
355 TRACE_IRQS_ON 381 TRACE_IRQS_ON
356 movl RSP(%rsp),%esp 382 movl RSP(%rsp),%esp
357 CFI_RESTORE rsp 383 CFI_RESTORE rsp
384 /*
385 * 64bit->32bit SYSRET restores eip from ecx,
386 * eflags from r11 (but RF and VM bits are forced to 0),
387 * cs and ss are loaded from MSRs.
388 * (Note: 32bit->32bit SYSRET is different: since r11
389 * does not exist, it merely sets eflags.IF=1).
390 */
358 USERGS_SYSRET32 391 USERGS_SYSRET32
359 392
360#ifdef CONFIG_AUDITSYSCALL 393#ifdef CONFIG_AUDITSYSCALL
361cstar_auditsys: 394cstar_auditsys:
362 CFI_RESTORE_STATE 395 CFI_RESTORE_STATE
@@ -394,26 +427,26 @@ ia32_badarg:
394 jmp ia32_sysret 427 jmp ia32_sysret
395 CFI_ENDPROC 428 CFI_ENDPROC
396 429
397/* 430/*
398 * Emulated IA32 system calls via int 0x80. 431 * Emulated IA32 system calls via int 0x80.
399 * 432 *
400 * Arguments: 433 * Arguments:
401 * %eax System call number. 434 * eax system call number
402 * %ebx Arg1 435 * ebx arg1
403 * %ecx Arg2 436 * ecx arg2
404 * %edx Arg3 437 * edx arg3
405 * %esi Arg4 438 * esi arg4
406 * %edi Arg5 439 * edi arg5
407 * %ebp Arg6 [note: not saved in the stack frame, should not be touched] 440 * ebp arg6 (note: not saved in the stack frame, should not be touched)
408 * 441 *
409 * Notes: 442 * Notes:
410 * Uses the same stack frame as the x86-64 version. 443 * Uses the same stack frame as the x86-64 version.
411 * All registers except %eax must be saved (but ptrace may violate that) 444 * All registers except eax must be saved (but ptrace may violate that).
412 * Arguments are zero extended. For system calls that want sign extension and 445 * Arguments are zero extended. For system calls that want sign extension and
413 * take long arguments a wrapper is needed. Most calls can just be called 446 * take long arguments a wrapper is needed. Most calls can just be called
414 * directly. 447 * directly.
415 * Assumes it is only called from user space and entered with interrupts off. 448 * Assumes it is only called from user space and entered with interrupts off.
416 */ 449 */
417 450
418ENTRY(ia32_syscall) 451ENTRY(ia32_syscall)
419 CFI_STARTPROC32 simple 452 CFI_STARTPROC32 simple
@@ -432,7 +465,7 @@ ENTRY(ia32_syscall)
432 */ 465 */
433 ENABLE_INTERRUPTS(CLBR_NONE) 466 ENABLE_INTERRUPTS(CLBR_NONE)
434 movl %eax,%eax 467 movl %eax,%eax
435 pushq_cfi %rax 468 pushq_cfi %rax /* store orig_ax */
436 cld 469 cld
437 /* note the registers are not zero extended to the sf. 470 /* note the registers are not zero extended to the sf.
438 this could be a problem. */ 471 this could be a problem. */