diff options
Diffstat (limited to 'arch/x86/ia32/ia32entry.S')
-rw-r--r-- | arch/x86/ia32/ia32entry.S | 485 |
1 files changed, 287 insertions, 198 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 156ebcab4ada..a821b1cd4fa7 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -30,24 +30,13 @@ | |||
30 | 30 | ||
31 | .section .entry.text, "ax" | 31 | .section .entry.text, "ax" |
32 | 32 | ||
33 | .macro IA32_ARG_FIXUP noebp=0 | 33 | /* clobbers %rax */ |
34 | movl %edi,%r8d | 34 | .macro CLEAR_RREGS _r9=rax |
35 | .if \noebp | ||
36 | .else | ||
37 | movl %ebp,%r9d | ||
38 | .endif | ||
39 | xchg %ecx,%esi | ||
40 | movl %ebx,%edi | ||
41 | movl %edx,%edx /* zero extension */ | ||
42 | .endm | ||
43 | |||
44 | /* clobbers %eax */ | ||
45 | .macro CLEAR_RREGS offset=0, _r9=rax | ||
46 | xorl %eax,%eax | 35 | xorl %eax,%eax |
47 | movq %rax,\offset+R11(%rsp) | 36 | movq %rax,R11(%rsp) |
48 | movq %rax,\offset+R10(%rsp) | 37 | movq %rax,R10(%rsp) |
49 | movq %\_r9,\offset+R9(%rsp) | 38 | movq %\_r9,R9(%rsp) |
50 | movq %rax,\offset+R8(%rsp) | 39 | movq %rax,R8(%rsp) |
51 | .endm | 40 | .endm |
52 | 41 | ||
53 | /* | 42 | /* |
@@ -60,14 +49,14 @@ | |||
60 | * If it's -1 to make us punt the syscall, then (u32)-1 is still | 49 | * If it's -1 to make us punt the syscall, then (u32)-1 is still |
61 | * an appropriately invalid value. | 50 | * an appropriately invalid value. |
62 | */ | 51 | */ |
63 | .macro LOAD_ARGS32 offset, _r9=0 | 52 | .macro LOAD_ARGS32 _r9=0 |
64 | .if \_r9 | 53 | .if \_r9 |
65 | movl \offset+16(%rsp),%r9d | 54 | movl R9(%rsp),%r9d |
66 | .endif | 55 | .endif |
67 | movl \offset+40(%rsp),%ecx | 56 | movl RCX(%rsp),%ecx |
68 | movl \offset+48(%rsp),%edx | 57 | movl RDX(%rsp),%edx |
69 | movl \offset+56(%rsp),%esi | 58 | movl RSI(%rsp),%esi |
70 | movl \offset+64(%rsp),%edi | 59 | movl RDI(%rsp),%edi |
71 | movl %eax,%eax /* zero extension */ | 60 | movl %eax,%eax /* zero extension */ |
72 | .endm | 61 | .endm |
73 | 62 | ||
@@ -99,54 +88,69 @@ ENDPROC(native_irq_enable_sysexit) | |||
99 | /* | 88 | /* |
100 | * 32bit SYSENTER instruction entry. | 89 | * 32bit SYSENTER instruction entry. |
101 | * | 90 | * |
91 | * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs. | ||
92 | * IF and VM in rflags are cleared (IOW: interrupts are off). | ||
93 | * SYSENTER does not save anything on the stack, | ||
94 | * and does not save old rip (!!!) and rflags. | ||
95 | * | ||
102 | * Arguments: | 96 | * Arguments: |
103 | * %eax System call number. | 97 | * eax system call number |
104 | * %ebx Arg1 | 98 | * ebx arg1 |
105 | * %ecx Arg2 | 99 | * ecx arg2 |
106 | * %edx Arg3 | 100 | * edx arg3 |
107 | * %esi Arg4 | 101 | * esi arg4 |
108 | * %edi Arg5 | 102 | * edi arg5 |
109 | * %ebp user stack | 103 | * ebp user stack |
110 | * 0(%ebp) Arg6 | 104 | * 0(%ebp) arg6 |
111 | * | 105 | * |
112 | * Interrupts off. | ||
113 | * | ||
114 | * This is purely a fast path. For anything complicated we use the int 0x80 | 106 | * This is purely a fast path. For anything complicated we use the int 0x80 |
115 | * path below. Set up a complete hardware stack frame to share code | 107 | * path below. We set up a complete hardware stack frame to share code |
116 | * with the int 0x80 path. | 108 | * with the int 0x80 path. |
117 | */ | 109 | */ |
118 | ENTRY(ia32_sysenter_target) | 110 | ENTRY(ia32_sysenter_target) |
119 | CFI_STARTPROC32 simple | 111 | CFI_STARTPROC32 simple |
120 | CFI_SIGNAL_FRAME | 112 | CFI_SIGNAL_FRAME |
121 | CFI_DEF_CFA rsp,0 | 113 | CFI_DEF_CFA rsp,0 |
122 | CFI_REGISTER rsp,rbp | 114 | CFI_REGISTER rsp,rbp |
123 | SWAPGS_UNSAFE_STACK | 115 | |
124 | movq PER_CPU_VAR(kernel_stack), %rsp | ||
125 | addq $(KERNEL_STACK_OFFSET),%rsp | ||
126 | /* | 116 | /* |
127 | * No need to follow this irqs on/off section: the syscall | 117 | * Interrupts are off on entry. |
128 | * disabled irqs, here we enable it straight after entry: | 118 | * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, |
119 | * it is too small to ever cause noticeable irq latency. | ||
129 | */ | 120 | */ |
121 | SWAPGS_UNSAFE_STACK | ||
122 | movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp | ||
130 | ENABLE_INTERRUPTS(CLBR_NONE) | 123 | ENABLE_INTERRUPTS(CLBR_NONE) |
131 | movl %ebp,%ebp /* zero extension */ | 124 | |
132 | pushq_cfi $__USER32_DS | 125 | /* Zero-extending 32-bit regs, do not remove */ |
133 | /*CFI_REL_OFFSET ss,0*/ | 126 | movl %ebp, %ebp |
134 | pushq_cfi %rbp | ||
135 | CFI_REL_OFFSET rsp,0 | ||
136 | pushfq_cfi | ||
137 | /*CFI_REL_OFFSET rflags,0*/ | ||
138 | movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d | ||
139 | CFI_REGISTER rip,r10 | ||
140 | pushq_cfi $__USER32_CS | ||
141 | /*CFI_REL_OFFSET cs,0*/ | ||
142 | movl %eax, %eax | 127 | movl %eax, %eax |
143 | pushq_cfi %r10 | 128 | |
144 | CFI_REL_OFFSET rip,0 | 129 | movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d |
145 | pushq_cfi %rax | 130 | CFI_REGISTER rip,r10 |
131 | |||
132 | /* Construct struct pt_regs on stack */ | ||
133 | pushq_cfi $__USER32_DS /* pt_regs->ss */ | ||
134 | pushq_cfi %rbp /* pt_regs->sp */ | ||
135 | CFI_REL_OFFSET rsp,0 | ||
136 | pushfq_cfi /* pt_regs->flags */ | ||
137 | pushq_cfi $__USER32_CS /* pt_regs->cs */ | ||
138 | pushq_cfi %r10 /* pt_regs->ip = thread_info->sysenter_return */ | ||
139 | CFI_REL_OFFSET rip,0 | ||
140 | pushq_cfi_reg rax /* pt_regs->orig_ax */ | ||
141 | pushq_cfi_reg rdi /* pt_regs->di */ | ||
142 | pushq_cfi_reg rsi /* pt_regs->si */ | ||
143 | pushq_cfi_reg rdx /* pt_regs->dx */ | ||
144 | pushq_cfi_reg rcx /* pt_regs->cx */ | ||
145 | pushq_cfi_reg rax /* pt_regs->ax */ | ||
146 | cld | 146 | cld |
147 | SAVE_ARGS 0,1,0 | 147 | sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ |
148 | /* no need to do an access_ok check here because rbp has been | 148 | CFI_ADJUST_CFA_OFFSET 10*8 |
149 | 32bit zero extended */ | 149 | |
150 | /* | ||
151 | * no need to do an access_ok check here because rbp has been | ||
152 | * 32bit zero extended | ||
153 | */ | ||
150 | ASM_STAC | 154 | ASM_STAC |
151 | 1: movl (%rbp),%ebp | 155 | 1: movl (%rbp),%ebp |
152 | _ASM_EXTABLE(1b,ia32_badarg) | 156 | _ASM_EXTABLE(1b,ia32_badarg) |
@@ -157,42 +161,80 @@ ENTRY(ia32_sysenter_target) | |||
157 | * ourselves. To save a few cycles, we can check whether | 161 | * ourselves. To save a few cycles, we can check whether |
158 | * NT was set instead of doing an unconditional popfq. | 162 | * NT was set instead of doing an unconditional popfq. |
159 | */ | 163 | */ |
160 | testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp) | 164 | testl $X86_EFLAGS_NT,EFLAGS(%rsp) |
161 | jnz sysenter_fix_flags | 165 | jnz sysenter_fix_flags |
162 | sysenter_flags_fixed: | 166 | sysenter_flags_fixed: |
163 | 167 | ||
164 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 168 | orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) |
165 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 169 | testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
166 | CFI_REMEMBER_STATE | 170 | CFI_REMEMBER_STATE |
167 | jnz sysenter_tracesys | 171 | jnz sysenter_tracesys |
168 | cmpq $(IA32_NR_syscalls-1),%rax | 172 | cmpq $(IA32_NR_syscalls-1),%rax |
169 | ja ia32_badsys | 173 | ja ia32_badsys |
170 | sysenter_do_call: | 174 | sysenter_do_call: |
171 | IA32_ARG_FIXUP | 175 | /* 32bit syscall -> 64bit C ABI argument conversion */ |
176 | movl %edi,%r8d /* arg5 */ | ||
177 | movl %ebp,%r9d /* arg6 */ | ||
178 | xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ | ||
179 | movl %ebx,%edi /* arg1 */ | ||
180 | movl %edx,%edx /* arg3 (zero extension) */ | ||
172 | sysenter_dispatch: | 181 | sysenter_dispatch: |
173 | call *ia32_sys_call_table(,%rax,8) | 182 | call *ia32_sys_call_table(,%rax,8) |
174 | movq %rax,RAX-ARGOFFSET(%rsp) | 183 | movq %rax,RAX(%rsp) |
175 | DISABLE_INTERRUPTS(CLBR_NONE) | 184 | DISABLE_INTERRUPTS(CLBR_NONE) |
176 | TRACE_IRQS_OFF | 185 | TRACE_IRQS_OFF |
177 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 186 | testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
178 | jnz sysexit_audit | 187 | jnz sysexit_audit |
179 | sysexit_from_sys_call: | 188 | sysexit_from_sys_call: |
180 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 189 | /* |
181 | /* clear IF, that popfq doesn't enable interrupts early */ | 190 | * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an |
182 | andl $~0x200,EFLAGS-ARGOFFSET(%rsp) | 191 | * NMI between STI and SYSEXIT has poorly specified behavior, |
183 | movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */ | 192 | * and and NMI followed by an IRQ with usergs is fatal. So |
184 | CFI_REGISTER rip,rdx | 193 | * we just pretend we're using SYSEXIT but we really use |
185 | RESTORE_ARGS 0,24,0,0,0,0 | 194 | * SYSRETL instead. |
195 | * | ||
196 | * This code path is still called 'sysexit' because it pairs | ||
197 | * with 'sysenter' and it uses the SYSENTER calling convention. | ||
198 | */ | ||
199 | andl $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) | ||
200 | movl RIP(%rsp),%ecx /* User %eip */ | ||
201 | CFI_REGISTER rip,rcx | ||
202 | RESTORE_RSI_RDI | ||
203 | xorl %edx,%edx /* avoid info leaks */ | ||
186 | xorq %r8,%r8 | 204 | xorq %r8,%r8 |
187 | xorq %r9,%r9 | 205 | xorq %r9,%r9 |
188 | xorq %r10,%r10 | 206 | xorq %r10,%r10 |
189 | xorq %r11,%r11 | 207 | movl EFLAGS(%rsp),%r11d /* User eflags */ |
190 | popfq_cfi | ||
191 | /*CFI_RESTORE rflags*/ | 208 | /*CFI_RESTORE rflags*/ |
192 | popq_cfi %rcx /* User %esp */ | ||
193 | CFI_REGISTER rsp,rcx | ||
194 | TRACE_IRQS_ON | 209 | TRACE_IRQS_ON |
195 | ENABLE_INTERRUPTS_SYSEXIT32 | 210 | |
211 | /* | ||
212 | * SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT, | ||
213 | * since it avoids a dicey window with interrupts enabled. | ||
214 | */ | ||
215 | movl RSP(%rsp),%esp | ||
216 | |||
217 | /* | ||
218 | * USERGS_SYSRET32 does: | ||
219 | * gsbase = user's gs base | ||
220 | * eip = ecx | ||
221 | * rflags = r11 | ||
222 | * cs = __USER32_CS | ||
223 | * ss = __USER_DS | ||
224 | * | ||
225 | * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does: | ||
226 | * | ||
227 | * pop %ebp | ||
228 | * pop %edx | ||
229 | * pop %ecx | ||
230 | * | ||
231 | * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to | ||
232 | * avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's | ||
233 | * address (already known to user code), and R12-R15 are | ||
234 | * callee-saved and therefore don't contain any interesting | ||
235 | * kernel data. | ||
236 | */ | ||
237 | USERGS_SYSRET32 | ||
196 | 238 | ||
197 | CFI_RESTORE_STATE | 239 | CFI_RESTORE_STATE |
198 | 240 | ||
@@ -205,18 +247,18 @@ sysexit_from_sys_call: | |||
205 | movl %ebx,%esi /* 2nd arg: 1st syscall arg */ | 247 | movl %ebx,%esi /* 2nd arg: 1st syscall arg */ |
206 | movl %eax,%edi /* 1st arg: syscall number */ | 248 | movl %eax,%edi /* 1st arg: syscall number */ |
207 | call __audit_syscall_entry | 249 | call __audit_syscall_entry |
208 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ | 250 | movl RAX(%rsp),%eax /* reload syscall number */ |
209 | cmpq $(IA32_NR_syscalls-1),%rax | 251 | cmpq $(IA32_NR_syscalls-1),%rax |
210 | ja ia32_badsys | 252 | ja ia32_badsys |
211 | movl %ebx,%edi /* reload 1st syscall arg */ | 253 | movl %ebx,%edi /* reload 1st syscall arg */ |
212 | movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ | 254 | movl RCX(%rsp),%esi /* reload 2nd syscall arg */ |
213 | movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */ | 255 | movl RDX(%rsp),%edx /* reload 3rd syscall arg */ |
214 | movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */ | 256 | movl RSI(%rsp),%ecx /* reload 4th syscall arg */ |
215 | movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ | 257 | movl RDI(%rsp),%r8d /* reload 5th syscall arg */ |
216 | .endm | 258 | .endm |
217 | 259 | ||
218 | .macro auditsys_exit exit | 260 | .macro auditsys_exit exit |
219 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 261 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
220 | jnz ia32_ret_from_sys_call | 262 | jnz ia32_ret_from_sys_call |
221 | TRACE_IRQS_ON | 263 | TRACE_IRQS_ON |
222 | ENABLE_INTERRUPTS(CLBR_NONE) | 264 | ENABLE_INTERRUPTS(CLBR_NONE) |
@@ -227,13 +269,13 @@ sysexit_from_sys_call: | |||
227 | 1: setbe %al /* 1 if error, 0 if not */ | 269 | 1: setbe %al /* 1 if error, 0 if not */ |
228 | movzbl %al,%edi /* zero-extend that into %edi */ | 270 | movzbl %al,%edi /* zero-extend that into %edi */ |
229 | call __audit_syscall_exit | 271 | call __audit_syscall_exit |
230 | movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ | 272 | movq RAX(%rsp),%rax /* reload syscall return value */ |
231 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | 273 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi |
232 | DISABLE_INTERRUPTS(CLBR_NONE) | 274 | DISABLE_INTERRUPTS(CLBR_NONE) |
233 | TRACE_IRQS_OFF | 275 | TRACE_IRQS_OFF |
234 | testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 276 | testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
235 | jz \exit | 277 | jz \exit |
236 | CLEAR_RREGS -ARGOFFSET | 278 | CLEAR_RREGS |
237 | jmp int_with_check | 279 | jmp int_with_check |
238 | .endm | 280 | .endm |
239 | 281 | ||
@@ -253,16 +295,16 @@ sysenter_fix_flags: | |||
253 | 295 | ||
254 | sysenter_tracesys: | 296 | sysenter_tracesys: |
255 | #ifdef CONFIG_AUDITSYSCALL | 297 | #ifdef CONFIG_AUDITSYSCALL |
256 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 298 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
257 | jz sysenter_auditsys | 299 | jz sysenter_auditsys |
258 | #endif | 300 | #endif |
259 | SAVE_REST | 301 | SAVE_EXTRA_REGS |
260 | CLEAR_RREGS | 302 | CLEAR_RREGS |
261 | movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ | 303 | movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */ |
262 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 304 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
263 | call syscall_trace_enter | 305 | call syscall_trace_enter |
264 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ | 306 | LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ |
265 | RESTORE_REST | 307 | RESTORE_EXTRA_REGS |
266 | cmpq $(IA32_NR_syscalls-1),%rax | 308 | cmpq $(IA32_NR_syscalls-1),%rax |
267 | ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ | 309 | ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ |
268 | jmp sysenter_do_call | 310 | jmp sysenter_do_call |
@@ -272,94 +314,128 @@ ENDPROC(ia32_sysenter_target) | |||
272 | /* | 314 | /* |
273 | * 32bit SYSCALL instruction entry. | 315 | * 32bit SYSCALL instruction entry. |
274 | * | 316 | * |
317 | * 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11, | ||
318 | * then loads new ss, cs, and rip from previously programmed MSRs. | ||
319 | * rflags gets masked by a value from another MSR (so CLD and CLAC | ||
320 | * are not needed). SYSCALL does not save anything on the stack | ||
321 | * and does not change rsp. | ||
322 | * | ||
323 | * Note: rflags saving+masking-with-MSR happens only in Long mode | ||
324 | * (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it). | ||
325 | * Don't get confused: rflags saving+masking depends on Long Mode Active bit | ||
326 | * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes | ||
327 | * or target CS descriptor's L bit (SYSCALL does not read segment descriptors). | ||
328 | * | ||
275 | * Arguments: | 329 | * Arguments: |
276 | * %eax System call number. | 330 | * eax system call number |
277 | * %ebx Arg1 | 331 | * ecx return address |
278 | * %ecx return EIP | 332 | * ebx arg1 |
279 | * %edx Arg3 | 333 | * ebp arg2 (note: not saved in the stack frame, should not be touched) |
280 | * %esi Arg4 | 334 | * edx arg3 |
281 | * %edi Arg5 | 335 | * esi arg4 |
282 | * %ebp Arg2 [note: not saved in the stack frame, should not be touched] | 336 | * edi arg5 |
283 | * %esp user stack | 337 | * esp user stack |
284 | * 0(%esp) Arg6 | 338 | * 0(%esp) arg6 |
285 | * | 339 | * |
286 | * Interrupts off. | ||
287 | * | ||
288 | * This is purely a fast path. For anything complicated we use the int 0x80 | 340 | * This is purely a fast path. For anything complicated we use the int 0x80 |
289 | * path below. Set up a complete hardware stack frame to share code | 341 | * path below. We set up a complete hardware stack frame to share code |
290 | * with the int 0x80 path. | 342 | * with the int 0x80 path. |
291 | */ | 343 | */ |
292 | ENTRY(ia32_cstar_target) | 344 | ENTRY(ia32_cstar_target) |
293 | CFI_STARTPROC32 simple | 345 | CFI_STARTPROC32 simple |
294 | CFI_SIGNAL_FRAME | 346 | CFI_SIGNAL_FRAME |
295 | CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET | 347 | CFI_DEF_CFA rsp,0 |
296 | CFI_REGISTER rip,rcx | 348 | CFI_REGISTER rip,rcx |
297 | /*CFI_REGISTER rflags,r11*/ | 349 | /*CFI_REGISTER rflags,r11*/ |
350 | |||
351 | /* | ||
352 | * Interrupts are off on entry. | ||
353 | * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, | ||
354 | * it is too small to ever cause noticeable irq latency. | ||
355 | */ | ||
298 | SWAPGS_UNSAFE_STACK | 356 | SWAPGS_UNSAFE_STACK |
299 | movl %esp,%r8d | 357 | movl %esp,%r8d |
300 | CFI_REGISTER rsp,r8 | 358 | CFI_REGISTER rsp,r8 |
301 | movq PER_CPU_VAR(kernel_stack),%rsp | 359 | movq PER_CPU_VAR(kernel_stack),%rsp |
302 | /* | ||
303 | * No need to follow this irqs on/off section: the syscall | ||
304 | * disabled irqs and here we enable it straight after entry: | ||
305 | */ | ||
306 | ENABLE_INTERRUPTS(CLBR_NONE) | 360 | ENABLE_INTERRUPTS(CLBR_NONE) |
307 | SAVE_ARGS 8,0,0 | 361 | |
308 | movl %eax,%eax /* zero extension */ | 362 | /* Zero-extending 32-bit regs, do not remove */ |
309 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 363 | movl %eax,%eax |
310 | movq %rcx,RIP-ARGOFFSET(%rsp) | 364 | |
311 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 365 | /* Construct struct pt_regs on stack */ |
312 | movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */ | 366 | pushq_cfi $__USER32_DS /* pt_regs->ss */ |
367 | pushq_cfi %r8 /* pt_regs->sp */ | ||
368 | CFI_REL_OFFSET rsp,0 | ||
369 | pushq_cfi %r11 /* pt_regs->flags */ | ||
370 | pushq_cfi $__USER32_CS /* pt_regs->cs */ | ||
371 | pushq_cfi %rcx /* pt_regs->ip */ | ||
372 | CFI_REL_OFFSET rip,0 | ||
373 | pushq_cfi_reg rax /* pt_regs->orig_ax */ | ||
374 | pushq_cfi_reg rdi /* pt_regs->di */ | ||
375 | pushq_cfi_reg rsi /* pt_regs->si */ | ||
376 | pushq_cfi_reg rdx /* pt_regs->dx */ | ||
377 | pushq_cfi_reg rbp /* pt_regs->cx */ | ||
313 | movl %ebp,%ecx | 378 | movl %ebp,%ecx |
314 | movq $__USER32_CS,CS-ARGOFFSET(%rsp) | 379 | pushq_cfi_reg rax /* pt_regs->ax */ |
315 | movq $__USER32_DS,SS-ARGOFFSET(%rsp) | 380 | sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ |
316 | movq %r11,EFLAGS-ARGOFFSET(%rsp) | 381 | CFI_ADJUST_CFA_OFFSET 10*8 |
317 | /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ | 382 | |
318 | movq %r8,RSP-ARGOFFSET(%rsp) | 383 | /* |
319 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET | 384 | * no need to do an access_ok check here because r8 has been |
320 | /* no need to do an access_ok check here because r8 has been | 385 | * 32bit zero extended |
321 | 32bit zero extended */ | 386 | */ |
322 | /* hardware stack frame is complete now */ | ||
323 | ASM_STAC | 387 | ASM_STAC |
324 | 1: movl (%r8),%r9d | 388 | 1: movl (%r8),%r9d |
325 | _ASM_EXTABLE(1b,ia32_badarg) | 389 | _ASM_EXTABLE(1b,ia32_badarg) |
326 | ASM_CLAC | 390 | ASM_CLAC |
327 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 391 | orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) |
328 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 392 | testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
329 | CFI_REMEMBER_STATE | 393 | CFI_REMEMBER_STATE |
330 | jnz cstar_tracesys | 394 | jnz cstar_tracesys |
331 | cmpq $IA32_NR_syscalls-1,%rax | 395 | cmpq $IA32_NR_syscalls-1,%rax |
332 | ja ia32_badsys | 396 | ja ia32_badsys |
333 | cstar_do_call: | 397 | cstar_do_call: |
334 | IA32_ARG_FIXUP 1 | 398 | /* 32bit syscall -> 64bit C ABI argument conversion */ |
399 | movl %edi,%r8d /* arg5 */ | ||
400 | /* r9 already loaded */ /* arg6 */ | ||
401 | xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ | ||
402 | movl %ebx,%edi /* arg1 */ | ||
403 | movl %edx,%edx /* arg3 (zero extension) */ | ||
335 | cstar_dispatch: | 404 | cstar_dispatch: |
336 | call *ia32_sys_call_table(,%rax,8) | 405 | call *ia32_sys_call_table(,%rax,8) |
337 | movq %rax,RAX-ARGOFFSET(%rsp) | 406 | movq %rax,RAX(%rsp) |
338 | DISABLE_INTERRUPTS(CLBR_NONE) | 407 | DISABLE_INTERRUPTS(CLBR_NONE) |
339 | TRACE_IRQS_OFF | 408 | TRACE_IRQS_OFF |
340 | testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 409 | testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
341 | jnz sysretl_audit | 410 | jnz sysretl_audit |
342 | sysretl_from_sys_call: | 411 | sysretl_from_sys_call: |
343 | andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 412 | andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) |
344 | RESTORE_ARGS 0,-ARG_SKIP,0,0,0 | 413 | RESTORE_RSI_RDI_RDX |
345 | movl RIP-ARGOFFSET(%rsp),%ecx | 414 | movl RIP(%rsp),%ecx |
346 | CFI_REGISTER rip,rcx | 415 | CFI_REGISTER rip,rcx |
347 | movl EFLAGS-ARGOFFSET(%rsp),%r11d | 416 | movl EFLAGS(%rsp),%r11d |
348 | /*CFI_REGISTER rflags,r11*/ | 417 | /*CFI_REGISTER rflags,r11*/ |
349 | xorq %r10,%r10 | 418 | xorq %r10,%r10 |
350 | xorq %r9,%r9 | 419 | xorq %r9,%r9 |
351 | xorq %r8,%r8 | 420 | xorq %r8,%r8 |
352 | TRACE_IRQS_ON | 421 | TRACE_IRQS_ON |
353 | movl RSP-ARGOFFSET(%rsp),%esp | 422 | movl RSP(%rsp),%esp |
354 | CFI_RESTORE rsp | 423 | CFI_RESTORE rsp |
424 | /* | ||
425 | * 64bit->32bit SYSRET restores eip from ecx, | ||
426 | * eflags from r11 (but RF and VM bits are forced to 0), | ||
427 | * cs and ss are loaded from MSRs. | ||
428 | * (Note: 32bit->32bit SYSRET is different: since r11 | ||
429 | * does not exist, it merely sets eflags.IF=1). | ||
430 | */ | ||
355 | USERGS_SYSRET32 | 431 | USERGS_SYSRET32 |
356 | 432 | ||
357 | #ifdef CONFIG_AUDITSYSCALL | 433 | #ifdef CONFIG_AUDITSYSCALL |
358 | cstar_auditsys: | 434 | cstar_auditsys: |
359 | CFI_RESTORE_STATE | 435 | CFI_RESTORE_STATE |
360 | movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */ | 436 | movl %r9d,R9(%rsp) /* register to be clobbered by call */ |
361 | auditsys_entry_common | 437 | auditsys_entry_common |
362 | movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */ | 438 | movl R9(%rsp),%r9d /* reload 6th syscall arg */ |
363 | jmp cstar_dispatch | 439 | jmp cstar_dispatch |
364 | 440 | ||
365 | sysretl_audit: | 441 | sysretl_audit: |
@@ -368,17 +444,17 @@ sysretl_audit: | |||
368 | 444 | ||
369 | cstar_tracesys: | 445 | cstar_tracesys: |
370 | #ifdef CONFIG_AUDITSYSCALL | 446 | #ifdef CONFIG_AUDITSYSCALL |
371 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 447 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
372 | jz cstar_auditsys | 448 | jz cstar_auditsys |
373 | #endif | 449 | #endif |
374 | xchgl %r9d,%ebp | 450 | xchgl %r9d,%ebp |
375 | SAVE_REST | 451 | SAVE_EXTRA_REGS |
376 | CLEAR_RREGS 0, r9 | 452 | CLEAR_RREGS r9 |
377 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 453 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
378 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 454 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
379 | call syscall_trace_enter | 455 | call syscall_trace_enter |
380 | LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ | 456 | LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */ |
381 | RESTORE_REST | 457 | RESTORE_EXTRA_REGS |
382 | xchgl %ebp,%r9d | 458 | xchgl %ebp,%r9d |
383 | cmpq $(IA32_NR_syscalls-1),%rax | 459 | cmpq $(IA32_NR_syscalls-1),%rax |
384 | ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ | 460 | ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ |
@@ -391,78 +467,94 @@ ia32_badarg: | |||
391 | jmp ia32_sysret | 467 | jmp ia32_sysret |
392 | CFI_ENDPROC | 468 | CFI_ENDPROC |
393 | 469 | ||
394 | /* | 470 | /* |
395 | * Emulated IA32 system calls via int 0x80. | 471 | * Emulated IA32 system calls via int 0x80. |
396 | * | 472 | * |
397 | * Arguments: | 473 | * Arguments: |
398 | * %eax System call number. | 474 | * eax system call number |
399 | * %ebx Arg1 | 475 | * ebx arg1 |
400 | * %ecx Arg2 | 476 | * ecx arg2 |
401 | * %edx Arg3 | 477 | * edx arg3 |
402 | * %esi Arg4 | 478 | * esi arg4 |
403 | * %edi Arg5 | 479 | * edi arg5 |
404 | * %ebp Arg6 [note: not saved in the stack frame, should not be touched] | 480 | * ebp arg6 (note: not saved in the stack frame, should not be touched) |
405 | * | 481 | * |
406 | * Notes: | 482 | * Notes: |
407 | * Uses the same stack frame as the x86-64 version. | 483 | * Uses the same stack frame as the x86-64 version. |
408 | * All registers except %eax must be saved (but ptrace may violate that) | 484 | * All registers except eax must be saved (but ptrace may violate that). |
409 | * Arguments are zero extended. For system calls that want sign extension and | 485 | * Arguments are zero extended. For system calls that want sign extension and |
410 | * take long arguments a wrapper is needed. Most calls can just be called | 486 | * take long arguments a wrapper is needed. Most calls can just be called |
411 | * directly. | 487 | * directly. |
412 | * Assumes it is only called from user space and entered with interrupts off. | 488 | * Assumes it is only called from user space and entered with interrupts off. |
413 | */ | 489 | */ |
414 | 490 | ||
415 | ENTRY(ia32_syscall) | 491 | ENTRY(ia32_syscall) |
416 | CFI_STARTPROC32 simple | 492 | CFI_STARTPROC32 simple |
417 | CFI_SIGNAL_FRAME | 493 | CFI_SIGNAL_FRAME |
418 | CFI_DEF_CFA rsp,SS+8-RIP | 494 | CFI_DEF_CFA rsp,5*8 |
419 | /*CFI_REL_OFFSET ss,SS-RIP*/ | 495 | /*CFI_REL_OFFSET ss,4*8 */ |
420 | CFI_REL_OFFSET rsp,RSP-RIP | 496 | CFI_REL_OFFSET rsp,3*8 |
421 | /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ | 497 | /*CFI_REL_OFFSET rflags,2*8 */ |
422 | /*CFI_REL_OFFSET cs,CS-RIP*/ | 498 | /*CFI_REL_OFFSET cs,1*8 */ |
423 | CFI_REL_OFFSET rip,RIP-RIP | 499 | CFI_REL_OFFSET rip,0*8 |
424 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 500 | |
425 | SWAPGS | ||
426 | /* | 501 | /* |
427 | * No need to follow this irqs on/off section: the syscall | 502 | * Interrupts are off on entry. |
428 | * disabled irqs and here we enable it straight after entry: | 503 | * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON, |
504 | * it is too small to ever cause noticeable irq latency. | ||
429 | */ | 505 | */ |
506 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
507 | SWAPGS | ||
430 | ENABLE_INTERRUPTS(CLBR_NONE) | 508 | ENABLE_INTERRUPTS(CLBR_NONE) |
431 | movl %eax,%eax | 509 | |
432 | pushq_cfi %rax | 510 | /* Zero-extending 32-bit regs, do not remove */ |
511 | movl %eax,%eax | ||
512 | |||
513 | /* Construct struct pt_regs on stack (iret frame is already on stack) */ | ||
514 | pushq_cfi_reg rax /* pt_regs->orig_ax */ | ||
515 | pushq_cfi_reg rdi /* pt_regs->di */ | ||
516 | pushq_cfi_reg rsi /* pt_regs->si */ | ||
517 | pushq_cfi_reg rdx /* pt_regs->dx */ | ||
518 | pushq_cfi_reg rcx /* pt_regs->cx */ | ||
519 | pushq_cfi_reg rax /* pt_regs->ax */ | ||
433 | cld | 520 | cld |
434 | /* note the registers are not zero extended to the sf. | 521 | sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */ |
435 | this could be a problem. */ | 522 | CFI_ADJUST_CFA_OFFSET 10*8 |
436 | SAVE_ARGS 0,1,0 | 523 | |
437 | orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 524 | orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS) |
438 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) | 525 | testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS) |
439 | jnz ia32_tracesys | 526 | jnz ia32_tracesys |
440 | cmpq $(IA32_NR_syscalls-1),%rax | 527 | cmpq $(IA32_NR_syscalls-1),%rax |
441 | ja ia32_badsys | 528 | ja ia32_badsys |
442 | ia32_do_call: | 529 | ia32_do_call: |
443 | IA32_ARG_FIXUP | 530 | /* 32bit syscall -> 64bit C ABI argument conversion */ |
531 | movl %edi,%r8d /* arg5 */ | ||
532 | movl %ebp,%r9d /* arg6 */ | ||
533 | xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */ | ||
534 | movl %ebx,%edi /* arg1 */ | ||
535 | movl %edx,%edx /* arg3 (zero extension) */ | ||
444 | call *ia32_sys_call_table(,%rax,8) # xxx: rip relative | 536 | call *ia32_sys_call_table(,%rax,8) # xxx: rip relative |
445 | ia32_sysret: | 537 | ia32_sysret: |
446 | movq %rax,RAX-ARGOFFSET(%rsp) | 538 | movq %rax,RAX(%rsp) |
447 | ia32_ret_from_sys_call: | 539 | ia32_ret_from_sys_call: |
448 | CLEAR_RREGS -ARGOFFSET | 540 | CLEAR_RREGS |
449 | jmp int_ret_from_sys_call | 541 | jmp int_ret_from_sys_call |
450 | 542 | ||
451 | ia32_tracesys: | 543 | ia32_tracesys: |
452 | SAVE_REST | 544 | SAVE_EXTRA_REGS |
453 | CLEAR_RREGS | 545 | CLEAR_RREGS |
454 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 546 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
455 | movq %rsp,%rdi /* &pt_regs -> arg1 */ | 547 | movq %rsp,%rdi /* &pt_regs -> arg1 */ |
456 | call syscall_trace_enter | 548 | call syscall_trace_enter |
457 | LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ | 549 | LOAD_ARGS32 /* reload args from stack in case ptrace changed it */ |
458 | RESTORE_REST | 550 | RESTORE_EXTRA_REGS |
459 | cmpq $(IA32_NR_syscalls-1),%rax | 551 | cmpq $(IA32_NR_syscalls-1),%rax |
460 | ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ | 552 | ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ |
461 | jmp ia32_do_call | 553 | jmp ia32_do_call |
462 | END(ia32_syscall) | 554 | END(ia32_syscall) |
463 | 555 | ||
464 | ia32_badsys: | 556 | ia32_badsys: |
465 | movq $0,ORIG_RAX-ARGOFFSET(%rsp) | 557 | movq $0,ORIG_RAX(%rsp) |
466 | movq $-ENOSYS,%rax | 558 | movq $-ENOSYS,%rax |
467 | jmp ia32_sysret | 559 | jmp ia32_sysret |
468 | 560 | ||
@@ -479,8 +571,6 @@ GLOBAL(\label) | |||
479 | 571 | ||
480 | PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn | 572 | PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn |
481 | PTREGSCALL stub32_sigreturn, sys32_sigreturn | 573 | PTREGSCALL stub32_sigreturn, sys32_sigreturn |
482 | PTREGSCALL stub32_execve, compat_sys_execve | ||
483 | PTREGSCALL stub32_execveat, compat_sys_execveat | ||
484 | PTREGSCALL stub32_fork, sys_fork | 574 | PTREGSCALL stub32_fork, sys_fork |
485 | PTREGSCALL stub32_vfork, sys_vfork | 575 | PTREGSCALL stub32_vfork, sys_vfork |
486 | 576 | ||
@@ -492,24 +582,23 @@ GLOBAL(stub32_clone) | |||
492 | 582 | ||
493 | ALIGN | 583 | ALIGN |
494 | ia32_ptregs_common: | 584 | ia32_ptregs_common: |
495 | popq %r11 | ||
496 | CFI_ENDPROC | 585 | CFI_ENDPROC |
497 | CFI_STARTPROC32 simple | 586 | CFI_STARTPROC32 simple |
498 | CFI_SIGNAL_FRAME | 587 | CFI_SIGNAL_FRAME |
499 | CFI_DEF_CFA rsp,SS+8-ARGOFFSET | 588 | CFI_DEF_CFA rsp,SIZEOF_PTREGS |
500 | CFI_REL_OFFSET rax,RAX-ARGOFFSET | 589 | CFI_REL_OFFSET rax,RAX |
501 | CFI_REL_OFFSET rcx,RCX-ARGOFFSET | 590 | CFI_REL_OFFSET rcx,RCX |
502 | CFI_REL_OFFSET rdx,RDX-ARGOFFSET | 591 | CFI_REL_OFFSET rdx,RDX |
503 | CFI_REL_OFFSET rsi,RSI-ARGOFFSET | 592 | CFI_REL_OFFSET rsi,RSI |
504 | CFI_REL_OFFSET rdi,RDI-ARGOFFSET | 593 | CFI_REL_OFFSET rdi,RDI |
505 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 594 | CFI_REL_OFFSET rip,RIP |
506 | /* CFI_REL_OFFSET cs,CS-ARGOFFSET*/ | 595 | /* CFI_REL_OFFSET cs,CS*/ |
507 | /* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ | 596 | /* CFI_REL_OFFSET rflags,EFLAGS*/ |
508 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET | 597 | CFI_REL_OFFSET rsp,RSP |
509 | /* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ | 598 | /* CFI_REL_OFFSET ss,SS*/ |
510 | SAVE_REST | 599 | SAVE_EXTRA_REGS 8 |
511 | call *%rax | 600 | call *%rax |
512 | RESTORE_REST | 601 | RESTORE_EXTRA_REGS 8 |
513 | jmp ia32_sysret /* misbalances the return cache */ | 602 | ret |
514 | CFI_ENDPROC | 603 | CFI_ENDPROC |
515 | END(ia32_ptregs_common) | 604 | END(ia32_ptregs_common) |