From 15e8f348db372dec21229fda5d52ae6ee7e64666 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 23 Jun 2008 20:41:12 -0700 Subject: x86_64: remove bogus optimization in sysret_signal This short-circuit path in sysret_signal looks wrong to me. AFAICT, in practice the branch is never taken--and if it were, it would go wrong. To wit, try loading a module whose init function does set_thread_flag(TIF_IRET), and see insmod crash (presumably with a wrong user stack pointer). This is because the FIXUP_TOP_OF_STACK work hasn't been done yet when we jump around the call to ptregscall_common and get to int_with_check--where it expects the user RSP,SS,CS and EFLAGS to have been stored by FIXUP_TOP_OF_STACK. I don't think it's normally possible to get to sysret_signal with no _TIF_DO_NOTIFY_MASK bits set anyway, so these two instructions are already superfluous. If it ever did happen, it is harmless to call do_notify_resume with nothing for it to do. Signed-off-by: Roland McGrath --- arch/x86/kernel/entry_64.S | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 8410e26f4183..a169225869cc 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -402,16 +402,12 @@ sysret_careful: sysret_signal: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) - testl $_TIF_DO_NOTIFY_MASK,%edx - jz 1f - - /* Really a signal */ /* edx: work flags (arg3) */ leaq do_notify_resume(%rip),%rax leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 xorl %esi,%esi # oldset -> arg2 call ptregscall_common -1: movl $_TIF_WORK_MASK,%edi + movl $_TIF_WORK_MASK,%edi /* Use IRET because user could have changed frame. This works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ DISABLE_INTERRUPTS(CLBR_NONE) -- cgit v1.2.2 From 86a1c34a929f30fde8ad01ea8245df61ddcf58b7 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 23 Jun 2008 15:37:04 -0700 Subject: x86_64 syscall audit fast-path This adds a fast path for 64-bit syscall entry and exit when TIF_SYSCALL_AUDIT is set, but no other kind of syscall tracing. This path does not need to save and restore all registers as the general case of tracing does. Avoiding the iret return path when syscall audit is enabled helps performance a lot. Signed-off-by: Roland McGrath --- arch/x86/kernel/entry_64.S | 48 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/auditsc.c | 3 ++- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a169225869cc..db7d34a89d2e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -53,6 +53,12 @@ #include #include +/* Avoid __ASSEMBLER__'ifying just for this. */ +#include +#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) +#define __AUDIT_ARCH_64BIT 0x80000000 +#define __AUDIT_ARCH_LE 0x40000000 + .code64 #ifdef CONFIG_FTRACE @@ -351,6 +357,7 @@ ENTRY(system_call_after_swapgs) GET_THREAD_INFO(%rcx) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) jnz tracesys +system_call_fastpath: cmpq $__NR_syscall_max,%rax ja badsys movq %r10,%rcx @@ -402,6 +409,10 @@ sysret_careful: sysret_signal: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) +#ifdef CONFIG_AUDITSYSCALL + bt $TIF_SYSCALL_AUDIT,%edx + jc sysret_audit +#endif /* edx: work flags (arg3) */ leaq do_notify_resume(%rip),%rax leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 @@ -418,8 +429,45 @@ badsys: movq $-ENOSYS,RAX-ARGOFFSET(%rsp) jmp ret_from_sys_call +#ifdef CONFIG_AUDITSYSCALL + /* + * Fast path for syscall audit without full syscall trace. + * We just call audit_syscall_entry() directly, and then + * jump back to the normal fast path. + */ +auditsys: + movq %r10,%r9 /* 6th arg: 4th syscall arg */ + movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ + movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ + movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ + movq %rax,%rsi /* 2nd arg: syscall number */ + movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ + call audit_syscall_entry + LOAD_ARGS 0 /* reload call-clobbered registers */ + jmp system_call_fastpath + + /* + * Return fast path for syscall audit. Call audit_syscall_exit() + * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT + * masked off. + */ +sysret_audit: + movq %rax,%rsi /* second arg, syscall return value */ + cmpq $0,%rax /* is it < 0? */ + setl %al /* 1 if so, 0 if not */ + movzbl %al,%edi /* zero-extend that into %edi */ + inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ + call audit_syscall_exit + movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi + jmp sysret_check +#endif /* CONFIG_AUDITSYSCALL */ + /* Do syscall tracing */ tracesys: +#ifdef CONFIG_AUDITSYSCALL + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) + jz auditsys +#endif SAVE_REST movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ FIXUP_TOP_OF_STACK %rdi diff --git a/kernel/auditsc.c b/kernel/auditsc.c index c10e7aae04d7..4699950e65bd 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1476,7 +1476,8 @@ void audit_syscall_entry(int arch, int major, struct audit_context *context = tsk->audit_context; enum audit_state state; - BUG_ON(!context); + if (unlikely(!context)) + return; /* * This happens only on certain architectures that make system -- cgit v1.2.2 From 5cbf1565f29eb57a86a305b08836613508e294d7 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 24 Jun 2008 01:13:31 -0700 Subject: x86_64 ia32 syscall audit fast-path This adds fast paths for 32-bit syscall entry and exit when TIF_SYSCALL_AUDIT is set, but no other kind of syscall tracing. These paths does not need to save and restore all registers as the general case of tracing does. Avoiding the iret return path when syscall audit is enabled helps performance a lot. Signed-off-by: Roland McGrath --- arch/x86/ia32/ia32entry.S | 91 ++++++++++++++++++++++++++++++++++++++++++++-- arch/x86/kernel/entry_64.S | 1 + 2 files changed, 88 insertions(+), 4 deletions(-) diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 23d146ce676b..021d71bc69b5 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -15,6 +15,16 @@ #include #include +/* Avoid __ASSEMBLER__'ifying just for this. */ +#include +#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) +#define __AUDIT_ARCH_LE 0x40000000 + +#ifndef CONFIG_AUDITSYSCALL +#define sysexit_audit int_ret_from_sys_call +#define sysretl_audit int_ret_from_sys_call +#endif + #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) .macro IA32_ARG_FIXUP noebp=0 @@ -148,13 +158,15 @@ ENTRY(ia32_sysenter_target) ja ia32_badsys sysenter_do_call: IA32_ARG_FIXUP 1 +sysenter_dispatch: call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK,TI_flags(%r10) - jnz int_ret_from_sys_call + jnz sysexit_audit +sysexit_from_sys_call: andl $~TS_COMPAT,TI_status(%r10) /* clear IF, that popfq doesn't enable interrupts early */ andl $~0x200,EFLAGS-R11(%rsp) @@ -170,9 +182,63 @@ sysenter_do_call: TRACE_IRQS_ON ENABLE_INTERRUPTS_SYSEXIT32 -sysenter_tracesys: +#ifdef CONFIG_AUDITSYSCALL + .macro auditsys_entry_common + movl %esi,%r9d /* 6th arg: 4th syscall arg */ + movl %edx,%r8d /* 5th arg: 3rd syscall arg */ + /* (already in %ecx) 4th arg: 2nd syscall arg */ + movl %ebx,%edx /* 3rd arg: 1st syscall arg */ + movl %eax,%esi /* 2nd arg: syscall number */ + movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ + call audit_syscall_entry + movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ + cmpl $(IA32_NR_syscalls-1),%eax + ja ia32_badsys + movl %ebx,%edi /* reload 1st syscall arg */ + movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ + movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */ + movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */ + movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ + .endm + + .macro auditsys_exit exit + testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) + jnz int_ret_from_sys_call + TRACE_IRQS_ON + sti + movl %eax,%esi /* second arg, syscall return value */ + cmpl $0,%eax /* is it < 0? */ + setl %al /* 1 if so, 0 if not */ + movzbl %al,%edi /* zero-extend that into %edi */ + inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ + call audit_syscall_exit + GET_THREAD_INFO(%r10) + movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ + movl RBP-ARGOFFSET(%rsp),%ebp /* reload user register value */ + movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi + cli + TRACE_IRQS_OFF + testl %edi,TI_flags(%r10) + jnz int_with_check + jmp \exit + .endm + +sysenter_auditsys: CFI_RESTORE_STATE + auditsys_entry_common + movl %ebp,%r9d /* reload 6th syscall arg */ + jmp sysenter_dispatch + +sysexit_audit: + auditsys_exit sysexit_from_sys_call +#endif + +sysenter_tracesys: xchgl %r9d,%ebp +#ifdef CONFIG_AUDITSYSCALL + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) + jz sysenter_auditsys +#endif SAVE_REST CLEAR_RREGS movq %r9,R9(%rsp) @@ -252,13 +318,15 @@ cstar_do_call: cmpl $IA32_NR_syscalls-1,%eax ja ia32_badsys IA32_ARG_FIXUP 1 +cstar_dispatch: call *ia32_sys_call_table(,%rax,8) movq %rax,RAX-ARGOFFSET(%rsp) GET_THREAD_INFO(%r10) DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl $_TIF_ALLWORK_MASK,TI_flags(%r10) - jnz int_ret_from_sys_call + jnz sysretl_audit +sysretl_from_sys_call: andl $~TS_COMPAT,TI_status(%r10) RESTORE_ARGS 1,-ARG_SKIP,1,1,1 movl RIP-ARGOFFSET(%rsp),%ecx @@ -270,8 +338,23 @@ cstar_do_call: CFI_RESTORE rsp USERGS_SYSRET32 -cstar_tracesys: +#ifdef CONFIG_AUDITSYSCALL +cstar_auditsys: CFI_RESTORE_STATE + movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */ + auditsys_entry_common + movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */ + jmp cstar_dispatch + +sysretl_audit: + auditsys_exit sysretl_from_sys_call +#endif + +cstar_tracesys: +#ifdef CONFIG_AUDITSYSCALL + testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) + jz cstar_auditsys +#endif xchgl %r9d,%ebp SAVE_REST CLEAR_RREGS diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index db7d34a89d2e..89434d439605 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -492,6 +492,7 @@ tracesys: * Has correct top of stack, but partial stack frame. */ .globl int_ret_from_sys_call + .globl int_with_check int_ret_from_sys_call: DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF -- cgit v1.2.2 From af0575bba0f46dd9054d46e0a88c57afad3bf4d2 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 24 Jun 2008 04:16:52 -0700 Subject: i386 syscall audit fast-path This adds fast paths for 32-bit syscall entry and exit when TIF_SYSCALL_AUDIT is set, but no other kind of syscall tracing. These paths does not need to save and restore all registers as the general case of tracing does. Avoiding the iret return path when syscall audit is enabled helps performance a lot. Signed-off-by: Roland McGrath --- arch/x86/kernel/entry_32.S | 55 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index cdfd94cc6b14..109792bc7cfa 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -54,6 +54,16 @@ #include #include +/* Avoid __ASSEMBLER__'ifying just for this. */ +#include +#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) +#define __AUDIT_ARCH_LE 0x40000000 + +#ifndef CONFIG_AUDITSYSCALL +#define sysenter_audit syscall_trace_entry +#define sysexit_audit syscall_exit_work +#endif + /* * We use macros for low-level operations which need to be overridden * for paravirtualization. The following will never clobber any registers: @@ -333,7 +343,8 @@ sysenter_past_esp: /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) - jnz syscall_trace_entry + jnz sysenter_audit +sysenter_do_call: cmpl $(nr_syscalls), %eax jae syscall_badsys call *sys_call_table(,%eax,4) @@ -343,7 +354,8 @@ sysenter_past_esp: TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx - jne syscall_exit_work + jne sysexit_audit +sysenter_exit: /* if something modifies registers it must also disable sysexit */ movl PT_EIP(%esp), %edx movl PT_OLDESP(%esp), %ecx @@ -351,6 +363,45 @@ sysenter_past_esp: TRACE_IRQS_ON 1: mov PT_FS(%esp), %fs ENABLE_INTERRUPTS_SYSEXIT + +#ifdef CONFIG_AUDITSYSCALL +sysenter_audit: + testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) + jnz syscall_trace_entry + addl $4,%esp + CFI_ADJUST_CFA_OFFSET -4 + /* %esi already in 8(%esp) 6th arg: 4th syscall arg */ + /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */ + /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */ + movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ + movl %eax,%edx /* 2nd arg: syscall number */ + movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ + call audit_syscall_entry + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + movl PT_EAX(%esp),%eax /* reload syscall number */ + jmp sysenter_do_call + +sysexit_audit: + testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx + jne syscall_exit_work + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_ANY) + movl %eax,%edx /* second arg, syscall return value */ + cmpl $0,%eax /* is it < 0? */ + setl %al /* 1 if so, 0 if not */ + movzbl %al,%eax /* zero-extend that */ + inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ + call audit_syscall_exit + DISABLE_INTERRUPTS(CLBR_ANY) + TRACE_IRQS_OFF + movl TI_flags(%ebp), %ecx + testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx + jne syscall_exit_work + movl PT_EAX(%esp),%eax /* reload syscall return value */ + jmp sysenter_exit +#endif + CFI_ENDPROC .pushsection .fixup,"ax" 2: movl $0,PT_FS(%esp) -- cgit v1.2.2