diff options
author | Russell King <rmk+kernel@arm.linux.org.uk> | 2015-08-20 11:13:37 -0400 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2015-08-25 05:32:48 -0400 |
commit | 3302caddf10ad50710dbb7a94ccbdb3ad5bf1412 (patch) | |
tree | 4a25e5f4dffa272d4c86450290bbeff3c7be6005 | |
parent | 01e09a28167c338684606b70797422da3bbb6650 (diff) |
ARM: entry: efficiency cleanups
Make the "fast" syscall return path fast again. The addition of IRQ
tracing and context tracking has made this path grossly inefficient.
We can do much better if these options are enabled if we save the
syscall return code on the stack - we then don't need to save a bunch
of registers around every single callout to C code.
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
-rw-r--r-- | arch/arm/include/asm/assembler.h | 16 | ||||
-rw-r--r-- | arch/arm/include/asm/thread_info.h | 20 | ||||
-rw-r--r-- | arch/arm/kernel/entry-common.S | 61 | ||||
-rw-r--r-- | arch/arm/kernel/signal.c | 6 |
4 files changed, 71 insertions, 32 deletions
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 742495eb5526..5a5504f90d5f 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h | |||
@@ -108,29 +108,37 @@ | |||
108 | .endm | 108 | .endm |
109 | #endif | 109 | #endif |
110 | 110 | ||
111 | .macro asm_trace_hardirqs_off | 111 | .macro asm_trace_hardirqs_off, save=1 |
112 | #if defined(CONFIG_TRACE_IRQFLAGS) | 112 | #if defined(CONFIG_TRACE_IRQFLAGS) |
113 | .if \save | ||
113 | stmdb sp!, {r0-r3, ip, lr} | 114 | stmdb sp!, {r0-r3, ip, lr} |
115 | .endif | ||
114 | bl trace_hardirqs_off | 116 | bl trace_hardirqs_off |
117 | .if \save | ||
115 | ldmia sp!, {r0-r3, ip, lr} | 118 | ldmia sp!, {r0-r3, ip, lr} |
119 | .endif | ||
116 | #endif | 120 | #endif |
117 | .endm | 121 | .endm |
118 | 122 | ||
119 | .macro asm_trace_hardirqs_on, cond=al | 123 | .macro asm_trace_hardirqs_on, cond=al, save=1 |
120 | #if defined(CONFIG_TRACE_IRQFLAGS) | 124 | #if defined(CONFIG_TRACE_IRQFLAGS) |
121 | /* | 125 | /* |
122 | * actually the registers should be pushed and pop'd conditionally, but | 126 | * actually the registers should be pushed and pop'd conditionally, but |
123 | * after bl the flags are certainly clobbered | 127 | * after bl the flags are certainly clobbered |
124 | */ | 128 | */ |
129 | .if \save | ||
125 | stmdb sp!, {r0-r3, ip, lr} | 130 | stmdb sp!, {r0-r3, ip, lr} |
131 | .endif | ||
126 | bl\cond trace_hardirqs_on | 132 | bl\cond trace_hardirqs_on |
133 | .if \save | ||
127 | ldmia sp!, {r0-r3, ip, lr} | 134 | ldmia sp!, {r0-r3, ip, lr} |
135 | .endif | ||
128 | #endif | 136 | #endif |
129 | .endm | 137 | .endm |
130 | 138 | ||
131 | .macro disable_irq | 139 | .macro disable_irq, save=1 |
132 | disable_irq_notrace | 140 | disable_irq_notrace |
133 | asm_trace_hardirqs_off | 141 | asm_trace_hardirqs_off \save |
134 | .endm | 142 | .endm |
135 | 143 | ||
136 | .macro enable_irq | 144 | .macro enable_irq |
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index bd32eded3e50..71e0ffcedf8e 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h | |||
@@ -136,22 +136,18 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |||
136 | 136 | ||
137 | /* | 137 | /* |
138 | * thread information flags: | 138 | * thread information flags: |
139 | * TIF_SYSCALL_TRACE - syscall trace active | ||
140 | * TIF_SYSCAL_AUDIT - syscall auditing active | ||
141 | * TIF_SIGPENDING - signal pending | ||
142 | * TIF_NEED_RESCHED - rescheduling necessary | ||
143 | * TIF_NOTIFY_RESUME - callback before returning to user | ||
144 | * TIF_USEDFPU - FPU was used by this task this quantum (SMP) | 139 | * TIF_USEDFPU - FPU was used by this task this quantum (SMP) |
145 | * TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED | 140 | * TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED |
146 | */ | 141 | */ |
147 | #define TIF_SIGPENDING 0 | 142 | #define TIF_SIGPENDING 0 /* signal pending */ |
148 | #define TIF_NEED_RESCHED 1 | 143 | #define TIF_NEED_RESCHED 1 /* rescheduling necessary */ |
149 | #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ | 144 | #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ |
150 | #define TIF_UPROBE 7 | 145 | #define TIF_UPROBE 3 /* breakpointed or singlestepping */ |
151 | #define TIF_SYSCALL_TRACE 8 | 146 | #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ |
152 | #define TIF_SYSCALL_AUDIT 9 | 147 | #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ |
153 | #define TIF_SYSCALL_TRACEPOINT 10 | 148 | #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ |
154 | #define TIF_SECCOMP 11 /* seccomp syscall filtering active */ | 149 | #define TIF_SECCOMP 7 /* seccomp syscall filtering active */ |
150 | |||
155 | #define TIF_NOHZ 12 /* in adaptive nohz mode */ | 151 | #define TIF_NOHZ 12 /* in adaptive nohz mode */ |
156 | #define TIF_USING_IWMMXT 17 | 152 | #define TIF_USING_IWMMXT 17 |
157 | #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ | 153 | #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ |
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 92828a1dec80..dd3721d1185e 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S | |||
@@ -24,35 +24,55 @@ | |||
24 | 24 | ||
25 | 25 | ||
26 | .align 5 | 26 | .align 5 |
27 | #if !(IS_ENABLED(CONFIG_TRACE_IRQFLAGS) || IS_ENABLED(CONFIG_CONTEXT_TRACKING)) | ||
27 | /* | 28 | /* |
28 | * This is the fast syscall return path. We do as little as | 29 | * This is the fast syscall return path. We do as little as possible here, |
29 | * possible here, and this includes saving r0 back into the SVC | 30 | * such as avoiding writing r0 to the stack. We only use this path if we |
30 | * stack. | 31 | * have tracing and context tracking disabled - the overheads from those |
32 | * features make this path too inefficient. | ||
31 | */ | 33 | */ |
32 | ret_fast_syscall: | 34 | ret_fast_syscall: |
33 | UNWIND(.fnstart ) | 35 | UNWIND(.fnstart ) |
34 | UNWIND(.cantunwind ) | 36 | UNWIND(.cantunwind ) |
35 | disable_irq @ disable interrupts | 37 | disable_irq_notrace @ disable interrupts |
36 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing | 38 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing |
37 | tst r1, #_TIF_SYSCALL_WORK | 39 | tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK |
38 | bne __sys_trace_return | ||
39 | tst r1, #_TIF_WORK_MASK | ||
40 | bne fast_work_pending | 40 | bne fast_work_pending |
41 | asm_trace_hardirqs_on | ||
42 | 41 | ||
43 | /* perform architecture specific actions before user return */ | 42 | /* perform architecture specific actions before user return */ |
44 | arch_ret_to_user r1, lr | 43 | arch_ret_to_user r1, lr |
45 | ct_user_enter | ||
46 | 44 | ||
47 | restore_user_regs fast = 1, offset = S_OFF | 45 | restore_user_regs fast = 1, offset = S_OFF |
48 | UNWIND(.fnend ) | 46 | UNWIND(.fnend ) |
47 | ENDPROC(ret_fast_syscall) | ||
49 | 48 | ||
50 | /* | 49 | /* Ok, we need to do extra processing, enter the slow path. */ |
51 | * Ok, we need to do extra processing, enter the slow path. | ||
52 | */ | ||
53 | fast_work_pending: | 50 | fast_work_pending: |
54 | str r0, [sp, #S_R0+S_OFF]! @ returned r0 | 51 | str r0, [sp, #S_R0+S_OFF]! @ returned r0 |
55 | work_pending: | 52 | /* fall through to work_pending */ |
53 | #else | ||
54 | /* | ||
55 | * The "replacement" ret_fast_syscall for when tracing or context tracking | ||
56 | * is enabled. As we will need to call out to some C functions, we save | ||
57 | * r0 first to avoid needing to save registers around each C function call. | ||
58 | */ | ||
59 | ret_fast_syscall: | ||
60 | UNWIND(.fnstart ) | ||
61 | UNWIND(.cantunwind ) | ||
62 | str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 | ||
63 | disable_irq_notrace @ disable interrupts | ||
64 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing | ||
65 | tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK | ||
66 | beq no_work_pending | ||
67 | UNWIND(.fnend ) | ||
68 | ENDPROC(ret_fast_syscall) | ||
69 | |||
70 | /* Slower path - fall through to work_pending */ | ||
71 | #endif | ||
72 | |||
73 | tst r1, #_TIF_SYSCALL_WORK | ||
74 | bne __sys_trace_return_nosave | ||
75 | slow_work_pending: | ||
56 | mov r0, sp @ 'regs' | 76 | mov r0, sp @ 'regs' |
57 | mov r2, why @ 'syscall' | 77 | mov r2, why @ 'syscall' |
58 | bl do_work_pending | 78 | bl do_work_pending |
@@ -64,16 +84,19 @@ work_pending: | |||
64 | 84 | ||
65 | /* | 85 | /* |
66 | * "slow" syscall return path. "why" tells us if this was a real syscall. | 86 | * "slow" syscall return path. "why" tells us if this was a real syscall. |
87 | * IRQs may be enabled here, so always disable them. Note that we use the | ||
88 | * "notrace" version to avoid calling into the tracing code unnecessarily. | ||
89 | * do_work_pending() will update this state if necessary. | ||
67 | */ | 90 | */ |
68 | ENTRY(ret_to_user) | 91 | ENTRY(ret_to_user) |
69 | ret_slow_syscall: | 92 | ret_slow_syscall: |
70 | disable_irq @ disable interrupts | 93 | disable_irq_notrace @ disable interrupts |
71 | ENTRY(ret_to_user_from_irq) | 94 | ENTRY(ret_to_user_from_irq) |
72 | ldr r1, [tsk, #TI_FLAGS] | 95 | ldr r1, [tsk, #TI_FLAGS] |
73 | tst r1, #_TIF_WORK_MASK | 96 | tst r1, #_TIF_WORK_MASK |
74 | bne work_pending | 97 | bne slow_work_pending |
75 | no_work_pending: | 98 | no_work_pending: |
76 | asm_trace_hardirqs_on | 99 | asm_trace_hardirqs_on save = 0 |
77 | 100 | ||
78 | /* perform architecture specific actions before user return */ | 101 | /* perform architecture specific actions before user return */ |
79 | arch_ret_to_user r1, lr | 102 | arch_ret_to_user r1, lr |
@@ -251,6 +274,12 @@ __sys_trace_return: | |||
251 | bl syscall_trace_exit | 274 | bl syscall_trace_exit |
252 | b ret_slow_syscall | 275 | b ret_slow_syscall |
253 | 276 | ||
277 | __sys_trace_return_nosave: | ||
278 | asm_trace_hardirqs_off save=0 | ||
279 | mov r0, sp | ||
280 | bl syscall_trace_exit | ||
281 | b ret_slow_syscall | ||
282 | |||
254 | .align 5 | 283 | .align 5 |
255 | #ifdef CONFIG_ALIGNMENT_TRAP | 284 | #ifdef CONFIG_ALIGNMENT_TRAP |
256 | .type __cr_alignment, #object | 285 | .type __cr_alignment, #object |
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 423663e23791..b6cda06b455f 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c | |||
@@ -562,6 +562,12 @@ static int do_signal(struct pt_regs *regs, int syscall) | |||
562 | asmlinkage int | 562 | asmlinkage int |
563 | do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) | 563 | do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) |
564 | { | 564 | { |
565 | /* | ||
566 | * The assembly code enters us with IRQs off, but it hasn't | ||
567 | * informed the tracing code of that for efficiency reasons. | ||
568 | * Update the trace code with the current status. | ||
569 | */ | ||
570 | trace_hardirqs_off(); | ||
565 | do { | 571 | do { |
566 | if (likely(thread_flags & _TIF_NEED_RESCHED)) { | 572 | if (likely(thread_flags & _TIF_NEED_RESCHED)) { |
567 | schedule(); | 573 | schedule(); |