diff options
author | Denys Vlasenko <dvlasenk@redhat.com> | 2015-02-26 17:40:27 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-03-04 16:50:49 -0500 |
commit | 76f5df43cab5e765c0bd42289103e8f625813ae1 (patch) | |
tree | f83868c1222fe87789a149bd66ae726f919a9f3d /arch/x86/ia32/ia32entry.S | |
parent | 6e1327bd2b20ccb387fcddc0caa605cb253cc458 (diff) |
x86/asm/entry/64: Always allocate a complete "struct pt_regs" on the kernel stack
The 64-bit entry code was using six stack slots less by not
saving/restoring registers which are callee-preserved according
to the C ABI, and was not allocating space for them.
Only when syscalls needed a complete "struct pt_regs" was
the complete area allocated and filled in.
As an additional twist, on interrupt entry a "slightly less
truncated pt_regs" trick is used, to make nested interrupt
stacks easier to unwind.
This proved to be a source of significant obfuscation and subtle
bugs. For example, 'stub_fork' had to pop the return address,
extend the struct, save registers, and push return address back.
Ugly. 'ia32_ptregs_common' pops return address and "returns" via
jmp insn, throwing a wrench into CPU return stack cache.
This patch changes the code to always allocate a complete
"struct pt_regs" on the kernel stack. The saving of registers
is still done lazily.
"Partial pt_regs" trick on interrupt stack is retained.
Macros which manipulate "struct pt_regs" on stack are reworked:
- ALLOC_PT_GPREGS_ON_STACK allocates the structure.
- SAVE_C_REGS saves to it those registers which are clobbered
by C code.
- SAVE_EXTRA_REGS saves to it all other registers.
- Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros
reverse it.
'ia32_ptregs_common', 'stub_fork' and friends lost their ugly dance
with the return pointer.
LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
instead of magic numbers.
'error_entry' and 'save_paranoid' now use SAVE_C_REGS +
SAVE_EXTRA_REGS instead of having it open-coded yet again.
Patch was run-tested: 64-bit executables, 32-bit executables,
strace works.
Timing tests did not show measurable difference in 32-bit
and 64-bit syscalls.
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1423778052-21038-2-git-send-email-dvlasenk@redhat.com
Link: http://lkml.kernel.org/r/b89763d354aa23e670b9bdf3a40ae320320a7c2e.1424989793.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/ia32/ia32entry.S')
-rw-r--r-- | arch/x86/ia32/ia32entry.S | 47 |
1 files changed, 25 insertions, 22 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 156ebcab4ada..f4bed4971673 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -62,12 +62,12 @@ | |||
62 | */ | 62 | */ |
63 | .macro LOAD_ARGS32 offset, _r9=0 | 63 | .macro LOAD_ARGS32 offset, _r9=0 |
64 | .if \_r9 | 64 | .if \_r9 |
65 | movl \offset+16(%rsp),%r9d | 65 | movl \offset+R9(%rsp),%r9d |
66 | .endif | 66 | .endif |
67 | movl \offset+40(%rsp),%ecx | 67 | movl \offset+RCX(%rsp),%ecx |
68 | movl \offset+48(%rsp),%edx | 68 | movl \offset+RDX(%rsp),%edx |
69 | movl \offset+56(%rsp),%esi | 69 | movl \offset+RSI(%rsp),%esi |
70 | movl \offset+64(%rsp),%edi | 70 | movl \offset+RDI(%rsp),%edi |
71 | movl %eax,%eax /* zero extension */ | 71 | movl %eax,%eax /* zero extension */ |
72 | .endm | 72 | .endm |
73 | 73 | ||
@@ -144,7 +144,8 @@ ENTRY(ia32_sysenter_target) | |||
144 | CFI_REL_OFFSET rip,0 | 144 | CFI_REL_OFFSET rip,0 |
145 | pushq_cfi %rax | 145 | pushq_cfi %rax |
146 | cld | 146 | cld |
147 | SAVE_ARGS 0,1,0 | 147 | ALLOC_PT_GPREGS_ON_STACK |
148 | SAVE_C_REGS_EXCEPT_R891011 | ||
148 | /* no need to do an access_ok check here because rbp has been | 149 | /* no need to do an access_ok check here because rbp has been |
149 | 32bit zero extended */ | 150 | 32bit zero extended */ |
150 | ASM_STAC | 151 | ASM_STAC |
@@ -182,7 +183,8 @@ sysexit_from_sys_call: | |||
182 | andl $~0x200,EFLAGS-ARGOFFSET(%rsp) | 183 | andl $~0x200,EFLAGS-ARGOFFSET(%rsp) |
183 | movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */ | 184 | movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */ |
184 | CFI_REGISTER rip,rdx | 185 | CFI_REGISTER rip,rdx |
185 | RESTORE_ARGS 0,24,0,0,0,0 | 186 | RESTORE_RSI_RDI |
187 | REMOVE_PT_GPREGS_FROM_STACK 3*8 | ||
186 | xorq %r8,%r8 | 188 | xorq %r8,%r8 |
187 | xorq %r9,%r9 | 189 | xorq %r9,%r9 |
188 | xorq %r10,%r10 | 190 | xorq %r10,%r10 |
@@ -256,13 +258,13 @@ sysenter_tracesys: | |||
256 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 258 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
257 | jz sysenter_auditsys | 259 | jz sysenter_auditsys |
258 | #endif | 260 | #endif |
259 | SAVE_REST | 261 | SAVE_EXTRA_REGS |
260 | CLEAR_RREGS | 262 | CLEAR_RREGS |
261 | movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ | 263 | movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ |
262 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 264 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
263 | call syscall_trace_enter | 265 | call syscall_trace_enter |
264 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ | 266 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ |
265 | RESTORE_REST | 267 | RESTORE_EXTRA_REGS |
266 | cmpq $(IA32_NR_syscalls-1),%rax | 268 | cmpq $(IA32_NR_syscalls-1),%rax |
267 | ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ | 269 | ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ |
268 | jmp sysenter_do_call | 270 | jmp sysenter_do_call |
@@ -304,7 +306,8 @@ ENTRY(ia32_cstar_target) | |||
304 | * disabled irqs and here we enable it straight after entry: | 306 | * disabled irqs and here we enable it straight after entry: |
305 | */ | 307 | */ |
306 | ENABLE_INTERRUPTS(CLBR_NONE) | 308 | ENABLE_INTERRUPTS(CLBR_NONE) |
307 | SAVE_ARGS 8,0,0 | 309 | ALLOC_PT_GPREGS_ON_STACK 8 |
310 | SAVE_C_REGS_EXCEPT_RCX_R891011 | ||
308 | movl %eax,%eax /* zero extension */ | 311 | movl %eax,%eax /* zero extension */ |
309 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 312 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
310 | movq %rcx,RIP-ARGOFFSET(%rsp) | 313 | movq %rcx,RIP-ARGOFFSET(%rsp) |
@@ -341,7 +344,7 @@ cstar_dispatch: | |||
341 | jnz sysretl_audit | 344 | jnz sysretl_audit |
342 | sysretl_from_sys_call: | 345 | sysretl_from_sys_call: |
343 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 346 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
344 | RESTORE_ARGS 0,-ARG_SKIP,0,0,0 | 347 | RESTORE_RSI_RDI_RDX |
345 | movl RIP-ARGOFFSET(%rsp),%ecx | 348 | movl RIP-ARGOFFSET(%rsp),%ecx |
346 | CFI_REGISTER rip,rcx | 349 | CFI_REGISTER rip,rcx |
347 | movl EFLAGS-ARGOFFSET(%rsp),%r11d | 350 | movl EFLAGS-ARGOFFSET(%rsp),%r11d |
@@ -372,13 +375,13 @@ cstar_tracesys: | |||
372 | jz cstar_auditsys | 375 | jz cstar_auditsys |
373 | #endif | 376 | #endif |
374 | xchgl %r9d,%ebp | 377 | xchgl %r9d,%ebp |
375 | SAVE_REST | 378 | SAVE_EXTRA_REGS |
376 | CLEAR_RREGS 0, r9 | 379 | CLEAR_RREGS 0, r9 |
377 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 380 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
378 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 381 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
379 | call syscall_trace_enter | 382 | call syscall_trace_enter |
380 | LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ | 383 | LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ |
381 | RESTORE_REST | 384 | RESTORE_EXTRA_REGS |
382 | xchgl %ebp,%r9d | 385 | xchgl %ebp,%r9d |
383 | cmpq $(IA32_NR_syscalls-1),%rax | 386 | cmpq $(IA32_NR_syscalls-1),%rax |
384 | ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ | 387 | ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ |
@@ -433,7 +436,8 @@ ENTRY(ia32_syscall) | |||
433 | cld | 436 | cld |
434 | /* note the registers are not zero extended to the sf. | 437 | /* note the registers are not zero extended to the sf. |
435 | this could be a problem. */ | 438 | this could be a problem. */ |
436 | SAVE_ARGS 0,1,0 | 439 | ALLOC_PT_GPREGS_ON_STACK |
440 | SAVE_C_REGS_EXCEPT_R891011 | ||
437 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 441 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
438 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 442 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
439 | jnz ia32_tracesys | 443 | jnz ia32_tracesys |
@@ -446,16 +450,16 @@ ia32_sysret: | |||
446 | movq %rax,RAX-ARGOFFSET(%rsp) | 450 | movq %rax,RAX-ARGOFFSET(%rsp) |
447 | ia32_ret_from_sys_call: | 451 | ia32_ret_from_sys_call: |
448 | CLEAR_RREGS -ARGOFFSET | 452 | CLEAR_RREGS -ARGOFFSET |
449 | jmp int_ret_from_sys_call | 453 | jmp int_ret_from_sys_call |
450 | 454 | ||
451 | ia32_tracesys: | 455 | ia32_tracesys: |
452 | SAVE_REST | 456 | SAVE_EXTRA_REGS |
453 | CLEAR_RREGS | 457 | CLEAR_RREGS |
454 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 458 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
455 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 459 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
456 | call syscall_trace_enter | 460 | call syscall_trace_enter |
457 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ | 461 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ |
458 | RESTORE_REST | 462 | RESTORE_EXTRA_REGS |
459 | cmpq $(IA32_NR_syscalls-1),%rax | 463 | cmpq $(IA32_NR_syscalls-1),%rax |
460 | ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ | 464 | ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ |
461 | jmp ia32_do_call | 465 | jmp ia32_do_call |
@@ -492,7 +496,6 @@ GLOBAL(stub32_clone) | |||
492 | 496 | ||
493 | ALIGN | 497 | ALIGN |
494 | ia32_ptregs_common: | 498 | ia32_ptregs_common: |
495 | popq %r11 | ||
496 | CFI_ENDPROC | 499 | CFI_ENDPROC |
497 | CFI_STARTPROC32 simple | 500 | CFI_STARTPROC32 simple |
498 | CFI_SIGNAL_FRAME | 501 | CFI_SIGNAL_FRAME |
@@ -507,9 +510,9 @@ ia32_ptregs_common: | |||
507 | /* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ | 510 | /* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ |
508 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET | 511 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET |
509 | /* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ | 512 | /* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ |
510 | SAVE_REST | 513 | SAVE_EXTRA_REGS 8 |
511 | call *%rax | 514 | call *%rax |
512 | RESTORE_REST | 515 | RESTORE_EXTRA_REGS 8 |
513 | jmp ia32_sysret /* misbalances the return cache */ | 516 | ret |
514 | CFI_ENDPROC | 517 | CFI_ENDPROC |
515 | END(ia32_ptregs_common) | 518 | END(ia32_ptregs_common) |