diff options
author | David Woodhouse <dwmw2@infradead.org> | 2005-11-15 13:52:18 -0500 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2006-01-08 22:49:01 -0500 |
commit | 401d1f029bebb7153ca704997772113dc36d9527 (patch) | |
tree | 01a3d649ac591d7a1fb910ce29a7223ffb629f9a /arch/powerpc/kernel/entry_64.S | |
parent | 1cd8e506209223ed10da805d99be55e268f4023c (diff) |
[PATCH] syscall entry/exit revamp
This cleanup patch speeds up the null syscall path on ppc64 by about 3%,
and brings the ppc32 and ppc64 code slightly closer together.
The ppc64 code was checking current_thread_info()->flags twice in the
syscall exit path; once for TIF_SYSCALL_T_OR_A before disabling
interrupts, and then again for TIF_SIGPENDING|TIF_NEED_RESCHED etc after
disabling interrupts. Now we do the same as ppc32 -- check the flags
only once in the fast path, and re-enable interrupts if necessary in the
ptrace case.
The patch abolishes the 'syscall_noerror' member of struct thread_info
and replaces it with a TIF_NOERROR bit in the flags, which is handled in
the slow path. This shortens the syscall entry code, which no longer
needs to clear syscall_noerror.
The patch adds a TIF_SAVE_NVGPRS flag which causes the syscall exit slow
path to save the non-volatile GPRs into a signal frame. This removes the
need for the assembly wrappers around sys_sigsuspend(),
sys_rt_sigsuspend(), et al which existed solely to save those registers
in advance. It also means I don't have to add new wrappers for ppoll()
and pselect(), which is what I was supposed to be doing when I got
distracted into this...
Finally, it unifies the ppc64 and ppc32 methods of handling syscall exit
directly into a signal handler (as required by sigsuspend et al) by
introducing a TIF_RESTOREALL flag which causes _all_ the registers to be
reloaded from the pt_regs by taking the ret_from_exception path, instead
of the normal syscall exit path which stomps on the callee-saved GPRs.
It appears to pass an LTP test run on ppc64, and passes basic testing on
ppc32 too. Brief tests of ptrace functionality with strace and gdb also
appear OK. I wouldn't send it to Linus for 2.6.15 just yet though :)
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/kernel/entry_64.S')
-rw-r--r-- | arch/powerpc/kernel/entry_64.S | 214 |
1 files changed, 115 insertions, 99 deletions
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index bce33a38399..0bff31f166d 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S | |||
@@ -113,9 +113,7 @@ system_call_common: | |||
113 | addi r9,r1,STACK_FRAME_OVERHEAD | 113 | addi r9,r1,STACK_FRAME_OVERHEAD |
114 | #endif | 114 | #endif |
115 | clrrdi r11,r1,THREAD_SHIFT | 115 | clrrdi r11,r1,THREAD_SHIFT |
116 | li r12,0 | ||
117 | ld r10,TI_FLAGS(r11) | 116 | ld r10,TI_FLAGS(r11) |
118 | stb r12,TI_SC_NOERR(r11) | ||
119 | andi. r11,r10,_TIF_SYSCALL_T_OR_A | 117 | andi. r11,r10,_TIF_SYSCALL_T_OR_A |
120 | bne- syscall_dotrace | 118 | bne- syscall_dotrace |
121 | syscall_dotrace_cont: | 119 | syscall_dotrace_cont: |
@@ -144,24 +142,12 @@ system_call: /* label this so stack traces look sane */ | |||
144 | bctrl /* Call handler */ | 142 | bctrl /* Call handler */ |
145 | 143 | ||
146 | syscall_exit: | 144 | syscall_exit: |
145 | std r3,RESULT(r1) | ||
147 | #ifdef SHOW_SYSCALLS | 146 | #ifdef SHOW_SYSCALLS |
148 | std r3,GPR3(r1) | ||
149 | bl .do_show_syscall_exit | 147 | bl .do_show_syscall_exit |
150 | ld r3,GPR3(r1) | 148 | ld r3,RESULT(r1) |
151 | #endif | 149 | #endif |
152 | std r3,RESULT(r1) | ||
153 | ld r5,_CCR(r1) | ||
154 | li r10,-_LAST_ERRNO | ||
155 | cmpld r3,r10 | ||
156 | clrrdi r12,r1,THREAD_SHIFT | 150 | clrrdi r12,r1,THREAD_SHIFT |
157 | bge- syscall_error | ||
158 | syscall_error_cont: | ||
159 | |||
160 | /* check for syscall tracing or audit */ | ||
161 | ld r9,TI_FLAGS(r12) | ||
162 | andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) | ||
163 | bne- syscall_exit_trace | ||
164 | syscall_exit_trace_cont: | ||
165 | 151 | ||
166 | /* disable interrupts so current_thread_info()->flags can't change, | 152 | /* disable interrupts so current_thread_info()->flags can't change, |
167 | and so that we don't get interrupted after loading SRR0/1. */ | 153 | and so that we don't get interrupted after loading SRR0/1. */ |
@@ -173,8 +159,13 @@ syscall_exit_trace_cont: | |||
173 | rotldi r10,r10,16 | 159 | rotldi r10,r10,16 |
174 | mtmsrd r10,1 | 160 | mtmsrd r10,1 |
175 | ld r9,TI_FLAGS(r12) | 161 | ld r9,TI_FLAGS(r12) |
176 | andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED) | 162 | li r11,-_LAST_ERRNO |
163 | andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL|_TIF_SAVE_NVGPRS|_TIF_NOERROR) | ||
177 | bne- syscall_exit_work | 164 | bne- syscall_exit_work |
165 | cmpld r3,r11 | ||
166 | ld r5,_CCR(r1) | ||
167 | bge- syscall_error | ||
168 | syscall_error_cont: | ||
178 | ld r7,_NIP(r1) | 169 | ld r7,_NIP(r1) |
179 | stdcx. r0,0,r1 /* to clear the reservation */ | 170 | stdcx. r0,0,r1 /* to clear the reservation */ |
180 | andi. r6,r8,MSR_PR | 171 | andi. r6,r8,MSR_PR |
@@ -193,21 +184,12 @@ syscall_exit_trace_cont: | |||
193 | rfid | 184 | rfid |
194 | b . /* prevent speculative execution */ | 185 | b . /* prevent speculative execution */ |
195 | 186 | ||
196 | syscall_enosys: | 187 | syscall_error: |
197 | li r3,-ENOSYS | ||
198 | std r3,RESULT(r1) | ||
199 | clrrdi r12,r1,THREAD_SHIFT | ||
200 | ld r5,_CCR(r1) | ||
201 | |||
202 | syscall_error: | ||
203 | lbz r11,TI_SC_NOERR(r12) | ||
204 | cmpwi 0,r11,0 | ||
205 | bne- syscall_error_cont | ||
206 | neg r3,r3 | ||
207 | oris r5,r5,0x1000 /* Set SO bit in CR */ | 188 | oris r5,r5,0x1000 /* Set SO bit in CR */ |
189 | neg r3,r3 | ||
208 | std r5,_CCR(r1) | 190 | std r5,_CCR(r1) |
209 | b syscall_error_cont | 191 | b syscall_error_cont |
210 | 192 | ||
211 | /* Traced system call support */ | 193 | /* Traced system call support */ |
212 | syscall_dotrace: | 194 | syscall_dotrace: |
213 | bl .save_nvgprs | 195 | bl .save_nvgprs |
@@ -225,21 +207,69 @@ syscall_dotrace: | |||
225 | ld r10,TI_FLAGS(r10) | 207 | ld r10,TI_FLAGS(r10) |
226 | b syscall_dotrace_cont | 208 | b syscall_dotrace_cont |
227 | 209 | ||
228 | syscall_exit_trace: | 210 | syscall_enosys: |
229 | std r3,GPR3(r1) | 211 | li r3,-ENOSYS |
230 | bl .save_nvgprs | 212 | b syscall_exit |
213 | |||
214 | syscall_exit_work: | ||
215 | /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr. | ||
216 | If TIF_NOERROR is set, just save r3 as it is. */ | ||
217 | |||
218 | andi. r0,r9,_TIF_RESTOREALL | ||
219 | bne- 2f | ||
220 | cmpld r3,r11 /* r10 is -LAST_ERRNO */ | ||
221 | blt+ 1f | ||
222 | andi. r0,r9,_TIF_NOERROR | ||
223 | bne- 1f | ||
224 | ld r5,_CCR(r1) | ||
225 | neg r3,r3 | ||
226 | oris r5,r5,0x1000 /* Set SO bit in CR */ | ||
227 | std r5,_CCR(r1) | ||
228 | 1: std r3,GPR3(r1) | ||
229 | 2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) | ||
230 | beq 4f | ||
231 | |||
232 | /* Clear per-syscall TIF flags if any are set, but _leave_ | ||
233 | _TIF_SAVE_NVGPRS set in r9 since we haven't dealt with that | ||
234 | yet. */ | ||
235 | |||
236 | li r11,_TIF_PERSYSCALL_MASK | ||
237 | addi r12,r12,TI_FLAGS | ||
238 | 3: ldarx r10,0,r12 | ||
239 | andc r10,r10,r11 | ||
240 | stdcx. r10,0,r12 | ||
241 | bne- 3b | ||
242 | subi r12,r12,TI_FLAGS | ||
243 | |||
244 | 4: bl save_nvgprs | ||
245 | /* Anything else left to do? */ | ||
246 | andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SAVE_NVGPRS) | ||
247 | beq .ret_from_except_lite | ||
248 | |||
249 | /* Re-enable interrupts */ | ||
250 | mfmsr r10 | ||
251 | ori r10,r10,MSR_EE | ||
252 | mtmsrd r10,1 | ||
253 | |||
254 | andi. r0,r9,_TIF_SAVE_NVGPRS | ||
255 | bne save_user_nvgprs | ||
256 | |||
257 | /* If tracing, re-enable interrupts and do it */ | ||
258 | save_user_nvgprs_cont: | ||
259 | andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) | ||
260 | beq 5f | ||
261 | |||
231 | addi r3,r1,STACK_FRAME_OVERHEAD | 262 | addi r3,r1,STACK_FRAME_OVERHEAD |
232 | bl .do_syscall_trace_leave | 263 | bl .do_syscall_trace_leave |
233 | REST_NVGPRS(r1) | 264 | REST_NVGPRS(r1) |
234 | ld r3,GPR3(r1) | ||
235 | ld r5,_CCR(r1) | ||
236 | clrrdi r12,r1,THREAD_SHIFT | 265 | clrrdi r12,r1,THREAD_SHIFT |
237 | b syscall_exit_trace_cont | ||
238 | 266 | ||
239 | /* Stuff to do on exit from a system call. */ | 267 | /* Disable interrupts again and handle other work if any */ |
240 | syscall_exit_work: | 268 | 5: mfmsr r10 |
241 | std r3,GPR3(r1) | 269 | rldicl r10,r10,48,1 |
242 | std r5,_CCR(r1) | 270 | rotldi r10,r10,16 |
271 | mtmsrd r10,1 | ||
272 | |||
243 | b .ret_from_except_lite | 273 | b .ret_from_except_lite |
244 | 274 | ||
245 | /* Save non-volatile GPRs, if not already saved. */ | 275 | /* Save non-volatile GPRs, if not already saved. */ |
@@ -252,6 +282,52 @@ _GLOBAL(save_nvgprs) | |||
252 | std r0,_TRAP(r1) | 282 | std r0,_TRAP(r1) |
253 | blr | 283 | blr |
254 | 284 | ||
285 | |||
286 | save_user_nvgprs: | ||
287 | ld r10,TI_SIGFRAME(r12) | ||
288 | andi. r0,r9,_TIF_32BIT | ||
289 | beq- save_user_nvgprs_64 | ||
290 | |||
291 | /* 32-bit save to userspace */ | ||
292 | |||
293 | .macro savewords start, end | ||
294 | 1: stw \start,4*(\start)(r10) | ||
295 | .section __ex_table,"a" | ||
296 | .align 3 | ||
297 | .llong 1b,save_user_nvgprs_fault | ||
298 | .previous | ||
299 | .if \end - \start | ||
300 | savewords "(\start+1)",\end | ||
301 | .endif | ||
302 | .endm | ||
303 | savewords 14,31 | ||
304 | b save_user_nvgprs_cont | ||
305 | |||
306 | save_user_nvgprs_64: | ||
307 | /* 64-bit save to userspace */ | ||
308 | |||
309 | .macro savelongs start, end | ||
310 | 1: std \start,8*(\start)(r10) | ||
311 | .section __ex_table,"a" | ||
312 | .align 3 | ||
313 | .llong 1b,save_user_nvgprs_fault | ||
314 | .previous | ||
315 | .if \end - \start | ||
316 | savelongs "(\start+1)",\end | ||
317 | .endif | ||
318 | .endm | ||
319 | savelongs 14,31 | ||
320 | b save_user_nvgprs_cont | ||
321 | |||
322 | save_user_nvgprs_fault: | ||
323 | li r3,11 /* SIGSEGV */ | ||
324 | ld r4,TI_TASK(r12) | ||
325 | bl .force_sigsegv | ||
326 | |||
327 | clrrdi r12,r1,THREAD_SHIFT | ||
328 | ld r9,TI_FLAGS(r12) | ||
329 | b save_user_nvgprs_cont | ||
330 | |||
255 | /* | 331 | /* |
256 | * The sigsuspend and rt_sigsuspend system calls can call do_signal | 332 | * The sigsuspend and rt_sigsuspend system calls can call do_signal |
257 | * and thus put the process into the stopped state where we might | 333 | * and thus put the process into the stopped state where we might |
@@ -260,35 +336,6 @@ _GLOBAL(save_nvgprs) | |||
260 | * the C code. Similarly, fork, vfork and clone need the full | 336 | * the C code. Similarly, fork, vfork and clone need the full |
261 | * register state on the stack so that it can be copied to the child. | 337 | * register state on the stack so that it can be copied to the child. |
262 | */ | 338 | */ |
263 | _GLOBAL(ppc32_sigsuspend) | ||
264 | bl .save_nvgprs | ||
265 | bl .compat_sys_sigsuspend | ||
266 | b 70f | ||
267 | |||
268 | _GLOBAL(ppc64_rt_sigsuspend) | ||
269 | bl .save_nvgprs | ||
270 | bl .sys_rt_sigsuspend | ||
271 | b 70f | ||
272 | |||
273 | _GLOBAL(ppc32_rt_sigsuspend) | ||
274 | bl .save_nvgprs | ||
275 | bl .compat_sys_rt_sigsuspend | ||
276 | 70: cmpdi 0,r3,0 | ||
277 | /* If it returned an error, we need to return via syscall_exit to set | ||
278 | the SO bit in cr0 and potentially stop for ptrace. */ | ||
279 | bne syscall_exit | ||
280 | /* If sigsuspend() returns zero, we are going into a signal handler. We | ||
281 | may need to call audit_syscall_exit() to mark the exit from sigsuspend() */ | ||
282 | #ifdef CONFIG_AUDITSYSCALL | ||
283 | ld r3,PACACURRENT(r13) | ||
284 | ld r4,AUDITCONTEXT(r3) | ||
285 | cmpdi 0,r4,0 | ||
286 | beq .ret_from_except /* No audit_context: Leave immediately. */ | ||
287 | li r4, 2 /* AUDITSC_FAILURE */ | ||
288 | li r5,-4 /* It's always -EINTR */ | ||
289 | bl .audit_syscall_exit | ||
290 | #endif | ||
291 | b .ret_from_except | ||
292 | 339 | ||
293 | _GLOBAL(ppc_fork) | 340 | _GLOBAL(ppc_fork) |
294 | bl .save_nvgprs | 341 | bl .save_nvgprs |
@@ -305,37 +352,6 @@ _GLOBAL(ppc_clone) | |||
305 | bl .sys_clone | 352 | bl .sys_clone |
306 | b syscall_exit | 353 | b syscall_exit |
307 | 354 | ||
308 | _GLOBAL(ppc32_swapcontext) | ||
309 | bl .save_nvgprs | ||
310 | bl .compat_sys_swapcontext | ||
311 | b 80f | ||
312 | |||
313 | _GLOBAL(ppc64_swapcontext) | ||
314 | bl .save_nvgprs | ||
315 | bl .sys_swapcontext | ||
316 | b 80f | ||
317 | |||
318 | _GLOBAL(ppc32_sigreturn) | ||
319 | bl .compat_sys_sigreturn | ||
320 | b 80f | ||
321 | |||
322 | _GLOBAL(ppc32_rt_sigreturn) | ||
323 | bl .compat_sys_rt_sigreturn | ||
324 | b 80f | ||
325 | |||
326 | _GLOBAL(ppc64_rt_sigreturn) | ||
327 | bl .sys_rt_sigreturn | ||
328 | |||
329 | 80: cmpdi 0,r3,0 | ||
330 | blt syscall_exit | ||
331 | clrrdi r4,r1,THREAD_SHIFT | ||
332 | ld r4,TI_FLAGS(r4) | ||
333 | andi. r4,r4,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) | ||
334 | beq+ 81f | ||
335 | addi r3,r1,STACK_FRAME_OVERHEAD | ||
336 | bl .do_syscall_trace_leave | ||
337 | 81: b .ret_from_except | ||
338 | |||
339 | _GLOBAL(ret_from_fork) | 355 | _GLOBAL(ret_from_fork) |
340 | bl .schedule_tail | 356 | bl .schedule_tail |
341 | REST_NVGPRS(r1) | 357 | REST_NVGPRS(r1) |