diff options
Diffstat (limited to 'arch/x86_64/kernel/entry.S')
-rw-r--r-- | arch/x86_64/kernel/entry.S | 920 |
1 files changed, 920 insertions, 0 deletions
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S new file mode 100644 index 000000000000..e126284db7a8 --- /dev/null +++ b/arch/x86_64/kernel/entry.S | |||
@@ -0,0 +1,920 @@ | |||
1 | /* | ||
2 | * linux/arch/x86_64/entry.S | ||
3 | * | ||
4 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
5 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs | ||
6 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> | ||
7 | * | ||
8 | * $Id$ | ||
9 | */ | ||
10 | |||
11 | /* | ||
12 | * entry.S contains the system-call and fault low-level handling routines. | ||
13 | * | ||
14 | * NOTE: This code handles signal-recognition, which happens every time | ||
15 | * after an interrupt and after each system call. | ||
16 | * | ||
17 | * Normal syscalls and interrupts don't save a full stack frame, this is | ||
18 | * only done for syscall tracing, signals or fork/exec et.al. | ||
19 | * | ||
20 | * A note on terminology: | ||
21 | * - top of stack: Architecture defined interrupt frame from SS to RIP | ||
22 | * at the top of the kernel process stack. | ||
23 | * - partial stack frame: partially saved registers upto R11. | ||
24 | * - full stack frame: Like partial stack frame, but all register saved. | ||
25 | * | ||
26 | * TODO: | ||
27 | * - schedule it carefully for the final hardware. | ||
28 | */ | ||
29 | |||
30 | #define ASSEMBLY 1 | ||
31 | #include <linux/config.h> | ||
32 | #include <linux/linkage.h> | ||
33 | #include <asm/segment.h> | ||
34 | #include <asm/smp.h> | ||
35 | #include <asm/cache.h> | ||
36 | #include <asm/errno.h> | ||
37 | #include <asm/dwarf2.h> | ||
38 | #include <asm/calling.h> | ||
39 | #include <asm/offset.h> | ||
40 | #include <asm/msr.h> | ||
41 | #include <asm/unistd.h> | ||
42 | #include <asm/thread_info.h> | ||
43 | #include <asm/hw_irq.h> | ||
44 | |||
45 | .code64 | ||
46 | |||
47 | #ifdef CONFIG_PREEMPT | ||
48 | #define preempt_stop cli | ||
49 | #else | ||
50 | #define preempt_stop | ||
51 | #define retint_kernel retint_restore_args | ||
52 | #endif | ||
53 | |||
54 | /* | ||
55 | * C code is not supposed to know about undefined top of stack. Every time | ||
56 | * a C function with an pt_regs argument is called from the SYSCALL based | ||
57 | * fast path FIXUP_TOP_OF_STACK is needed. | ||
58 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs | ||
59 | * manipulation. | ||
60 | */ | ||
61 | |||
62 | /* %rsp:at FRAMEEND */ | ||
63 | .macro FIXUP_TOP_OF_STACK tmp | ||
64 | movq %gs:pda_oldrsp,\tmp | ||
65 | movq \tmp,RSP(%rsp) | ||
66 | movq $__USER_DS,SS(%rsp) | ||
67 | movq $__USER_CS,CS(%rsp) | ||
68 | movq $-1,RCX(%rsp) | ||
69 | movq R11(%rsp),\tmp /* get eflags */ | ||
70 | movq \tmp,EFLAGS(%rsp) | ||
71 | .endm | ||
72 | |||
73 | .macro RESTORE_TOP_OF_STACK tmp,offset=0 | ||
74 | movq RSP-\offset(%rsp),\tmp | ||
75 | movq \tmp,%gs:pda_oldrsp | ||
76 | movq EFLAGS-\offset(%rsp),\tmp | ||
77 | movq \tmp,R11-\offset(%rsp) | ||
78 | .endm | ||
79 | |||
80 | .macro FAKE_STACK_FRAME child_rip | ||
81 | /* push in order ss, rsp, eflags, cs, rip */ | ||
82 | xorq %rax, %rax | ||
83 | pushq %rax /* ss */ | ||
84 | CFI_ADJUST_CFA_OFFSET 8 | ||
85 | pushq %rax /* rsp */ | ||
86 | CFI_ADJUST_CFA_OFFSET 8 | ||
87 | CFI_OFFSET rip,0 | ||
88 | pushq $(1<<9) /* eflags - interrupts on */ | ||
89 | CFI_ADJUST_CFA_OFFSET 8 | ||
90 | pushq $__KERNEL_CS /* cs */ | ||
91 | CFI_ADJUST_CFA_OFFSET 8 | ||
92 | pushq \child_rip /* rip */ | ||
93 | CFI_ADJUST_CFA_OFFSET 8 | ||
94 | CFI_OFFSET rip,0 | ||
95 | pushq %rax /* orig rax */ | ||
96 | CFI_ADJUST_CFA_OFFSET 8 | ||
97 | .endm | ||
98 | |||
99 | .macro UNFAKE_STACK_FRAME | ||
100 | addq $8*6, %rsp | ||
101 | CFI_ADJUST_CFA_OFFSET -(6*8) | ||
102 | .endm | ||
103 | |||
104 | .macro CFI_DEFAULT_STACK | ||
105 | CFI_ADJUST_CFA_OFFSET (SS) | ||
106 | CFI_OFFSET r15,R15-SS | ||
107 | CFI_OFFSET r14,R14-SS | ||
108 | CFI_OFFSET r13,R13-SS | ||
109 | CFI_OFFSET r12,R12-SS | ||
110 | CFI_OFFSET rbp,RBP-SS | ||
111 | CFI_OFFSET rbx,RBX-SS | ||
112 | CFI_OFFSET r11,R11-SS | ||
113 | CFI_OFFSET r10,R10-SS | ||
114 | CFI_OFFSET r9,R9-SS | ||
115 | CFI_OFFSET r8,R8-SS | ||
116 | CFI_OFFSET rax,RAX-SS | ||
117 | CFI_OFFSET rcx,RCX-SS | ||
118 | CFI_OFFSET rdx,RDX-SS | ||
119 | CFI_OFFSET rsi,RSI-SS | ||
120 | CFI_OFFSET rdi,RDI-SS | ||
121 | CFI_OFFSET rsp,RSP-SS | ||
122 | CFI_OFFSET rip,RIP-SS | ||
123 | .endm | ||
124 | /* | ||
125 | * A newly forked process directly context switches into this. | ||
126 | */ | ||
127 | /* rdi: prev */ | ||
128 | ENTRY(ret_from_fork) | ||
129 | CFI_STARTPROC | ||
130 | CFI_DEFAULT_STACK | ||
131 | call schedule_tail | ||
132 | GET_THREAD_INFO(%rcx) | ||
133 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) | ||
134 | jnz rff_trace | ||
135 | rff_action: | ||
136 | RESTORE_REST | ||
137 | testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? | ||
138 | je int_ret_from_sys_call | ||
139 | testl $_TIF_IA32,threadinfo_flags(%rcx) | ||
140 | jnz int_ret_from_sys_call | ||
141 | RESTORE_TOP_OF_STACK %rdi,ARGOFFSET | ||
142 | jmp ret_from_sys_call | ||
143 | rff_trace: | ||
144 | movq %rsp,%rdi | ||
145 | call syscall_trace_leave | ||
146 | GET_THREAD_INFO(%rcx) | ||
147 | jmp rff_action | ||
148 | CFI_ENDPROC | ||
149 | |||
150 | /* | ||
151 | * System call entry. Upto 6 arguments in registers are supported. | ||
152 | * | ||
153 | * SYSCALL does not save anything on the stack and does not change the | ||
154 | * stack pointer. | ||
155 | */ | ||
156 | |||
157 | /* | ||
158 | * Register setup: | ||
159 | * rax system call number | ||
160 | * rdi arg0 | ||
161 | * rcx return address for syscall/sysret, C arg3 | ||
162 | * rsi arg1 | ||
163 | * rdx arg2 | ||
164 | * r10 arg3 (--> moved to rcx for C) | ||
165 | * r8 arg4 | ||
166 | * r9 arg5 | ||
167 | * r11 eflags for syscall/sysret, temporary for C | ||
168 | * r12-r15,rbp,rbx saved by C code, not touched. | ||
169 | * | ||
170 | * Interrupts are off on entry. | ||
171 | * Only called from user space. | ||
172 | * | ||
173 | * XXX if we had a free scratch register we could save the RSP into the stack frame | ||
174 | * and report it properly in ps. Unfortunately we haven't. | ||
175 | */ | ||
176 | |||
177 | ENTRY(system_call) | ||
178 | CFI_STARTPROC | ||
179 | swapgs | ||
180 | movq %rsp,%gs:pda_oldrsp | ||
181 | movq %gs:pda_kernelstack,%rsp | ||
182 | sti | ||
183 | SAVE_ARGS 8,1 | ||
184 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | ||
185 | movq %rcx,RIP-ARGOFFSET(%rsp) | ||
186 | GET_THREAD_INFO(%rcx) | ||
187 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) | ||
188 | jnz tracesys | ||
189 | cmpq $__NR_syscall_max,%rax | ||
190 | ja badsys | ||
191 | movq %r10,%rcx | ||
192 | call *sys_call_table(,%rax,8) # XXX: rip relative | ||
193 | movq %rax,RAX-ARGOFFSET(%rsp) | ||
194 | /* | ||
195 | * Syscall return path ending with SYSRET (fast path) | ||
196 | * Has incomplete stack frame and undefined top of stack. | ||
197 | */ | ||
198 | .globl ret_from_sys_call | ||
199 | ret_from_sys_call: | ||
200 | movl $_TIF_WORK_MASK,%edi | ||
201 | /* edi: flagmask */ | ||
202 | sysret_check: | ||
203 | GET_THREAD_INFO(%rcx) | ||
204 | cli | ||
205 | movl threadinfo_flags(%rcx),%edx | ||
206 | andl %edi,%edx | ||
207 | jnz sysret_careful | ||
208 | movq RIP-ARGOFFSET(%rsp),%rcx | ||
209 | RESTORE_ARGS 0,-ARG_SKIP,1 | ||
210 | movq %gs:pda_oldrsp,%rsp | ||
211 | swapgs | ||
212 | sysretq | ||
213 | |||
214 | /* Handle reschedules */ | ||
215 | /* edx: work, edi: workmask */ | ||
216 | sysret_careful: | ||
217 | bt $TIF_NEED_RESCHED,%edx | ||
218 | jnc sysret_signal | ||
219 | sti | ||
220 | pushq %rdi | ||
221 | call schedule | ||
222 | popq %rdi | ||
223 | jmp sysret_check | ||
224 | |||
225 | /* Handle a signal */ | ||
226 | sysret_signal: | ||
227 | sti | ||
228 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | ||
229 | jz 1f | ||
230 | |||
231 | /* Really a signal */ | ||
232 | /* edx: work flags (arg3) */ | ||
233 | leaq do_notify_resume(%rip),%rax | ||
234 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 | ||
235 | xorl %esi,%esi # oldset -> arg2 | ||
236 | call ptregscall_common | ||
237 | 1: movl $_TIF_NEED_RESCHED,%edi | ||
238 | jmp sysret_check | ||
239 | |||
240 | /* Do syscall tracing */ | ||
241 | tracesys: | ||
242 | SAVE_REST | ||
243 | movq $-ENOSYS,RAX(%rsp) | ||
244 | FIXUP_TOP_OF_STACK %rdi | ||
245 | movq %rsp,%rdi | ||
246 | call syscall_trace_enter | ||
247 | LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ | ||
248 | RESTORE_REST | ||
249 | cmpq $__NR_syscall_max,%rax | ||
250 | ja 1f | ||
251 | movq %r10,%rcx /* fixup for C */ | ||
252 | call *sys_call_table(,%rax,8) | ||
253 | movq %rax,RAX-ARGOFFSET(%rsp) | ||
254 | 1: SAVE_REST | ||
255 | movq %rsp,%rdi | ||
256 | call syscall_trace_leave | ||
257 | RESTORE_TOP_OF_STACK %rbx | ||
258 | RESTORE_REST | ||
259 | jmp ret_from_sys_call | ||
260 | |||
261 | badsys: | ||
262 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | ||
263 | jmp ret_from_sys_call | ||
264 | |||
265 | /* | ||
266 | * Syscall return path ending with IRET. | ||
267 | * Has correct top of stack, but partial stack frame. | ||
268 | */ | ||
269 | ENTRY(int_ret_from_sys_call) | ||
270 | cli | ||
271 | testl $3,CS-ARGOFFSET(%rsp) | ||
272 | je retint_restore_args | ||
273 | movl $_TIF_ALLWORK_MASK,%edi | ||
274 | /* edi: mask to check */ | ||
275 | int_with_check: | ||
276 | GET_THREAD_INFO(%rcx) | ||
277 | movl threadinfo_flags(%rcx),%edx | ||
278 | andl %edi,%edx | ||
279 | jnz int_careful | ||
280 | jmp retint_swapgs | ||
281 | |||
282 | /* Either reschedule or signal or syscall exit tracking needed. */ | ||
283 | /* First do a reschedule test. */ | ||
284 | /* edx: work, edi: workmask */ | ||
285 | int_careful: | ||
286 | bt $TIF_NEED_RESCHED,%edx | ||
287 | jnc int_very_careful | ||
288 | sti | ||
289 | pushq %rdi | ||
290 | call schedule | ||
291 | popq %rdi | ||
292 | jmp int_with_check | ||
293 | |||
294 | /* handle signals and tracing -- both require a full stack frame */ | ||
295 | int_very_careful: | ||
296 | sti | ||
297 | SAVE_REST | ||
298 | /* Check for syscall exit trace */ | ||
299 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx | ||
300 | jz int_signal | ||
301 | pushq %rdi | ||
302 | leaq 8(%rsp),%rdi # &ptregs -> arg1 | ||
303 | call syscall_trace_leave | ||
304 | popq %rdi | ||
305 | btr $TIF_SYSCALL_TRACE,%edi | ||
306 | btr $TIF_SYSCALL_AUDIT,%edi | ||
307 | btr $TIF_SINGLESTEP,%edi | ||
308 | jmp int_restore_rest | ||
309 | |||
310 | int_signal: | ||
311 | testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx | ||
312 | jz 1f | ||
313 | movq %rsp,%rdi # &ptregs -> arg1 | ||
314 | xorl %esi,%esi # oldset -> arg2 | ||
315 | call do_notify_resume | ||
316 | 1: movl $_TIF_NEED_RESCHED,%edi | ||
317 | int_restore_rest: | ||
318 | RESTORE_REST | ||
319 | jmp int_with_check | ||
320 | CFI_ENDPROC | ||
321 | |||
322 | /* | ||
323 | * Certain special system calls that need to save a complete full stack frame. | ||
324 | */ | ||
325 | |||
326 | .macro PTREGSCALL label,func,arg | ||
327 | .globl \label | ||
328 | \label: | ||
329 | leaq \func(%rip),%rax | ||
330 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ | ||
331 | jmp ptregscall_common | ||
332 | .endm | ||
333 | |||
334 | PTREGSCALL stub_clone, sys_clone, %r8 | ||
335 | PTREGSCALL stub_fork, sys_fork, %rdi | ||
336 | PTREGSCALL stub_vfork, sys_vfork, %rdi | ||
337 | PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx | ||
338 | PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx | ||
339 | PTREGSCALL stub_iopl, sys_iopl, %rsi | ||
340 | |||
341 | ENTRY(ptregscall_common) | ||
342 | CFI_STARTPROC | ||
343 | popq %r11 | ||
344 | CFI_ADJUST_CFA_OFFSET -8 | ||
345 | SAVE_REST | ||
346 | movq %r11, %r15 | ||
347 | FIXUP_TOP_OF_STACK %r11 | ||
348 | call *%rax | ||
349 | RESTORE_TOP_OF_STACK %r11 | ||
350 | movq %r15, %r11 | ||
351 | RESTORE_REST | ||
352 | pushq %r11 | ||
353 | CFI_ADJUST_CFA_OFFSET 8 | ||
354 | ret | ||
355 | CFI_ENDPROC | ||
356 | |||
357 | ENTRY(stub_execve) | ||
358 | CFI_STARTPROC | ||
359 | popq %r11 | ||
360 | CFI_ADJUST_CFA_OFFSET -8 | ||
361 | SAVE_REST | ||
362 | movq %r11, %r15 | ||
363 | FIXUP_TOP_OF_STACK %r11 | ||
364 | call sys_execve | ||
365 | GET_THREAD_INFO(%rcx) | ||
366 | bt $TIF_IA32,threadinfo_flags(%rcx) | ||
367 | jc exec_32bit | ||
368 | RESTORE_TOP_OF_STACK %r11 | ||
369 | movq %r15, %r11 | ||
370 | RESTORE_REST | ||
371 | push %r11 | ||
372 | ret | ||
373 | |||
374 | exec_32bit: | ||
375 | CFI_ADJUST_CFA_OFFSET REST_SKIP | ||
376 | movq %rax,RAX(%rsp) | ||
377 | RESTORE_REST | ||
378 | jmp int_ret_from_sys_call | ||
379 | CFI_ENDPROC | ||
380 | |||
381 | /* | ||
382 | * sigreturn is special because it needs to restore all registers on return. | ||
383 | * This cannot be done with SYSRET, so use the IRET return path instead. | ||
384 | */ | ||
385 | ENTRY(stub_rt_sigreturn) | ||
386 | CFI_STARTPROC | ||
387 | addq $8, %rsp | ||
388 | SAVE_REST | ||
389 | movq %rsp,%rdi | ||
390 | FIXUP_TOP_OF_STACK %r11 | ||
391 | call sys_rt_sigreturn | ||
392 | movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer | ||
393 | RESTORE_REST | ||
394 | jmp int_ret_from_sys_call | ||
395 | CFI_ENDPROC | ||
396 | |||
397 | /* | ||
398 | * Interrupt entry/exit. | ||
399 | * | ||
400 | * Interrupt entry points save only callee clobbered registers in fast path. | ||
401 | * | ||
402 | * Entry runs with interrupts off. | ||
403 | */ | ||
404 | |||
405 | /* 0(%rsp): interrupt number */ | ||
406 | .macro interrupt func | ||
407 | CFI_STARTPROC simple | ||
408 | CFI_DEF_CFA rsp,(SS-RDI) | ||
409 | CFI_REL_OFFSET rsp,(RSP-ORIG_RAX) | ||
410 | CFI_REL_OFFSET rip,(RIP-ORIG_RAX) | ||
411 | cld | ||
412 | #ifdef CONFIG_DEBUG_INFO | ||
413 | SAVE_ALL | ||
414 | movq %rsp,%rdi | ||
415 | /* | ||
416 | * Setup a stack frame pointer. This allows gdb to trace | ||
417 | * back to the original stack. | ||
418 | */ | ||
419 | movq %rsp,%rbp | ||
420 | CFI_DEF_CFA_REGISTER rbp | ||
421 | #else | ||
422 | SAVE_ARGS | ||
423 | leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler | ||
424 | #endif | ||
425 | testl $3,CS(%rdi) | ||
426 | je 1f | ||
427 | swapgs | ||
428 | 1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count | ||
429 | movq %gs:pda_irqstackptr,%rax | ||
430 | cmoveq %rax,%rsp | ||
431 | pushq %rdi # save old stack | ||
432 | call \func | ||
433 | .endm | ||
434 | |||
435 | ENTRY(common_interrupt) | ||
436 | interrupt do_IRQ | ||
437 | /* 0(%rsp): oldrsp-ARGOFFSET */ | ||
438 | ret_from_intr: | ||
439 | popq %rdi | ||
440 | cli | ||
441 | subl $1,%gs:pda_irqcount | ||
442 | #ifdef CONFIG_DEBUG_INFO | ||
443 | movq RBP(%rdi),%rbp | ||
444 | #endif | ||
445 | leaq ARGOFFSET(%rdi),%rsp | ||
446 | exit_intr: | ||
447 | GET_THREAD_INFO(%rcx) | ||
448 | testl $3,CS-ARGOFFSET(%rsp) | ||
449 | je retint_kernel | ||
450 | |||
451 | /* Interrupt came from user space */ | ||
452 | /* | ||
453 | * Has a correct top of stack, but a partial stack frame | ||
454 | * %rcx: thread info. Interrupts off. | ||
455 | */ | ||
456 | retint_with_reschedule: | ||
457 | movl $_TIF_WORK_MASK,%edi | ||
458 | retint_check: | ||
459 | movl threadinfo_flags(%rcx),%edx | ||
460 | andl %edi,%edx | ||
461 | jnz retint_careful | ||
462 | retint_swapgs: | ||
463 | cli | ||
464 | swapgs | ||
465 | retint_restore_args: | ||
466 | cli | ||
467 | RESTORE_ARGS 0,8,0 | ||
468 | iret_label: | ||
469 | iretq | ||
470 | |||
471 | .section __ex_table,"a" | ||
472 | .quad iret_label,bad_iret | ||
473 | .previous | ||
474 | .section .fixup,"ax" | ||
475 | /* force a signal here? this matches i386 behaviour */ | ||
476 | /* running with kernel gs */ | ||
477 | bad_iret: | ||
478 | movq $-9999,%rdi /* better code? */ | ||
479 | jmp do_exit | ||
480 | .previous | ||
481 | |||
482 | /* edi: workmask, edx: work */ | ||
483 | retint_careful: | ||
484 | bt $TIF_NEED_RESCHED,%edx | ||
485 | jnc retint_signal | ||
486 | sti | ||
487 | pushq %rdi | ||
488 | call schedule | ||
489 | popq %rdi | ||
490 | GET_THREAD_INFO(%rcx) | ||
491 | cli | ||
492 | jmp retint_check | ||
493 | |||
494 | retint_signal: | ||
495 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | ||
496 | jz retint_swapgs | ||
497 | sti | ||
498 | SAVE_REST | ||
499 | movq $-1,ORIG_RAX(%rsp) | ||
500 | xorq %rsi,%rsi # oldset | ||
501 | movq %rsp,%rdi # &pt_regs | ||
502 | call do_notify_resume | ||
503 | RESTORE_REST | ||
504 | cli | ||
505 | movl $_TIF_NEED_RESCHED,%edi | ||
506 | GET_THREAD_INFO(%rcx) | ||
507 | jmp retint_check | ||
508 | |||
509 | #ifdef CONFIG_PREEMPT | ||
510 | /* Returning to kernel space. Check if we need preemption */ | ||
511 | /* rcx: threadinfo. interrupts off. */ | ||
512 | .p2align | ||
513 | retint_kernel: | ||
514 | cmpl $0,threadinfo_preempt_count(%rcx) | ||
515 | jnz retint_restore_args | ||
516 | bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) | ||
517 | jnc retint_restore_args | ||
518 | bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ | ||
519 | jnc retint_restore_args | ||
520 | call preempt_schedule_irq | ||
521 | jmp exit_intr | ||
522 | #endif | ||
523 | CFI_ENDPROC | ||
524 | |||
525 | /* | ||
526 | * APIC interrupts. | ||
527 | */ | ||
528 | .macro apicinterrupt num,func | ||
529 | pushq $\num-256 | ||
530 | interrupt \func | ||
531 | jmp ret_from_intr | ||
532 | CFI_ENDPROC | ||
533 | .endm | ||
534 | |||
535 | ENTRY(thermal_interrupt) | ||
536 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt | ||
537 | |||
538 | #ifdef CONFIG_SMP | ||
539 | ENTRY(reschedule_interrupt) | ||
540 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt | ||
541 | |||
542 | ENTRY(invalidate_interrupt) | ||
543 | apicinterrupt INVALIDATE_TLB_VECTOR,smp_invalidate_interrupt | ||
544 | |||
545 | ENTRY(call_function_interrupt) | ||
546 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt | ||
547 | #endif | ||
548 | |||
549 | #ifdef CONFIG_X86_LOCAL_APIC | ||
550 | ENTRY(apic_timer_interrupt) | ||
551 | apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt | ||
552 | |||
553 | ENTRY(error_interrupt) | ||
554 | apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt | ||
555 | |||
556 | ENTRY(spurious_interrupt) | ||
557 | apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt | ||
558 | #endif | ||
559 | |||
560 | /* | ||
561 | * Exception entry points. | ||
562 | */ | ||
563 | .macro zeroentry sym | ||
564 | pushq $0 /* push error code/oldrax */ | ||
565 | pushq %rax /* push real oldrax to the rdi slot */ | ||
566 | leaq \sym(%rip),%rax | ||
567 | jmp error_entry | ||
568 | .endm | ||
569 | |||
570 | .macro errorentry sym | ||
571 | pushq %rax | ||
572 | leaq \sym(%rip),%rax | ||
573 | jmp error_entry | ||
574 | .endm | ||
575 | |||
576 | /* error code is on the stack already */ | ||
577 | /* handle NMI like exceptions that can happen everywhere */ | ||
578 | .macro paranoidentry sym | ||
579 | SAVE_ALL | ||
580 | cld | ||
581 | movl $1,%ebx | ||
582 | movl $MSR_GS_BASE,%ecx | ||
583 | rdmsr | ||
584 | testl %edx,%edx | ||
585 | js 1f | ||
586 | swapgs | ||
587 | xorl %ebx,%ebx | ||
588 | 1: movq %rsp,%rdi | ||
589 | movq ORIG_RAX(%rsp),%rsi | ||
590 | movq $-1,ORIG_RAX(%rsp) | ||
591 | call \sym | ||
592 | .endm | ||
593 | |||
594 | /* | ||
595 | * Exception entry point. This expects an error code/orig_rax on the stack | ||
596 | * and the exception handler in %rax. | ||
597 | */ | ||
598 | ENTRY(error_entry) | ||
599 | CFI_STARTPROC simple | ||
600 | CFI_DEF_CFA rsp,(SS-RDI) | ||
601 | CFI_REL_OFFSET rsp,(RSP-RDI) | ||
602 | CFI_REL_OFFSET rip,(RIP-RDI) | ||
603 | /* rdi slot contains rax, oldrax contains error code */ | ||
604 | cld | ||
605 | subq $14*8,%rsp | ||
606 | CFI_ADJUST_CFA_OFFSET (14*8) | ||
607 | movq %rsi,13*8(%rsp) | ||
608 | CFI_REL_OFFSET rsi,RSI | ||
609 | movq 14*8(%rsp),%rsi /* load rax from rdi slot */ | ||
610 | movq %rdx,12*8(%rsp) | ||
611 | CFI_REL_OFFSET rdx,RDX | ||
612 | movq %rcx,11*8(%rsp) | ||
613 | CFI_REL_OFFSET rcx,RCX | ||
614 | movq %rsi,10*8(%rsp) /* store rax */ | ||
615 | CFI_REL_OFFSET rax,RAX | ||
616 | movq %r8, 9*8(%rsp) | ||
617 | CFI_REL_OFFSET r8,R8 | ||
618 | movq %r9, 8*8(%rsp) | ||
619 | CFI_REL_OFFSET r9,R9 | ||
620 | movq %r10,7*8(%rsp) | ||
621 | CFI_REL_OFFSET r10,R10 | ||
622 | movq %r11,6*8(%rsp) | ||
623 | CFI_REL_OFFSET r11,R11 | ||
624 | movq %rbx,5*8(%rsp) | ||
625 | CFI_REL_OFFSET rbx,RBX | ||
626 | movq %rbp,4*8(%rsp) | ||
627 | CFI_REL_OFFSET rbp,RBP | ||
628 | movq %r12,3*8(%rsp) | ||
629 | CFI_REL_OFFSET r12,R12 | ||
630 | movq %r13,2*8(%rsp) | ||
631 | CFI_REL_OFFSET r13,R13 | ||
632 | movq %r14,1*8(%rsp) | ||
633 | CFI_REL_OFFSET r14,R14 | ||
634 | movq %r15,(%rsp) | ||
635 | CFI_REL_OFFSET r15,R15 | ||
636 | xorl %ebx,%ebx | ||
637 | testl $3,CS(%rsp) | ||
638 | je error_kernelspace | ||
639 | error_swapgs: | ||
640 | swapgs | ||
641 | error_sti: | ||
642 | movq %rdi,RDI(%rsp) | ||
643 | movq %rsp,%rdi | ||
644 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | ||
645 | movq $-1,ORIG_RAX(%rsp) | ||
646 | call *%rax | ||
647 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | ||
648 | error_exit: | ||
649 | movl %ebx,%eax | ||
650 | RESTORE_REST | ||
651 | cli | ||
652 | GET_THREAD_INFO(%rcx) | ||
653 | testl %eax,%eax | ||
654 | jne retint_kernel | ||
655 | movl threadinfo_flags(%rcx),%edx | ||
656 | movl $_TIF_WORK_MASK,%edi | ||
657 | andl %edi,%edx | ||
658 | jnz retint_careful | ||
659 | swapgs | ||
660 | RESTORE_ARGS 0,8,0 | ||
661 | iretq | ||
662 | CFI_ENDPROC | ||
663 | |||
664 | error_kernelspace: | ||
665 | incl %ebx | ||
666 | /* There are two places in the kernel that can potentially fault with | ||
667 | usergs. Handle them here. The exception handlers after | ||
668 | iret run with kernel gs again, so don't set the user space flag. | ||
669 | B stepping K8s sometimes report an truncated RIP for IRET | ||
670 | exceptions returning to compat mode. Check for these here too. */ | ||
671 | leaq iret_label(%rip),%rbp | ||
672 | cmpq %rbp,RIP(%rsp) | ||
673 | je error_swapgs | ||
674 | movl %ebp,%ebp /* zero extend */ | ||
675 | cmpq %rbp,RIP(%rsp) | ||
676 | je error_swapgs | ||
677 | cmpq $gs_change,RIP(%rsp) | ||
678 | je error_swapgs | ||
679 | jmp error_sti | ||
680 | |||
681 | /* Reload gs selector with exception handling */ | ||
682 | /* edi: new selector */ | ||
683 | ENTRY(load_gs_index) | ||
684 | pushf | ||
685 | cli | ||
686 | swapgs | ||
687 | gs_change: | ||
688 | movl %edi,%gs | ||
689 | 2: mfence /* workaround */ | ||
690 | swapgs | ||
691 | popf | ||
692 | ret | ||
693 | |||
694 | .section __ex_table,"a" | ||
695 | .align 8 | ||
696 | .quad gs_change,bad_gs | ||
697 | .previous | ||
698 | .section .fixup,"ax" | ||
699 | /* running with kernelgs */ | ||
700 | bad_gs: | ||
701 | swapgs /* switch back to user gs */ | ||
702 | xorl %eax,%eax | ||
703 | movl %eax,%gs | ||
704 | jmp 2b | ||
705 | .previous | ||
706 | |||
707 | /* | ||
708 | * Create a kernel thread. | ||
709 | * | ||
710 | * C extern interface: | ||
711 | * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | ||
712 | * | ||
713 | * asm input arguments: | ||
714 | * rdi: fn, rsi: arg, rdx: flags | ||
715 | */ | ||
716 | ENTRY(kernel_thread) | ||
717 | CFI_STARTPROC | ||
718 | FAKE_STACK_FRAME $child_rip | ||
719 | SAVE_ALL | ||
720 | |||
721 | # rdi: flags, rsi: usp, rdx: will be &pt_regs | ||
722 | movq %rdx,%rdi | ||
723 | orq kernel_thread_flags(%rip),%rdi | ||
724 | movq $-1, %rsi | ||
725 | movq %rsp, %rdx | ||
726 | |||
727 | xorl %r8d,%r8d | ||
728 | xorl %r9d,%r9d | ||
729 | |||
730 | # clone now | ||
731 | call do_fork | ||
732 | movq %rax,RAX(%rsp) | ||
733 | xorl %edi,%edi | ||
734 | |||
735 | /* | ||
736 | * It isn't worth to check for reschedule here, | ||
737 | * so internally to the x86_64 port you can rely on kernel_thread() | ||
738 | * not to reschedule the child before returning, this avoids the need | ||
739 | * of hacks for example to fork off the per-CPU idle tasks. | ||
740 | * [Hopefully no generic code relies on the reschedule -AK] | ||
741 | */ | ||
742 | RESTORE_ALL | ||
743 | UNFAKE_STACK_FRAME | ||
744 | ret | ||
745 | CFI_ENDPROC | ||
746 | |||
747 | |||
748 | child_rip: | ||
749 | /* | ||
750 | * Here we are in the child and the registers are set as they were | ||
751 | * at kernel_thread() invocation in the parent. | ||
752 | */ | ||
753 | movq %rdi, %rax | ||
754 | movq %rsi, %rdi | ||
755 | call *%rax | ||
756 | # exit | ||
757 | xorq %rdi, %rdi | ||
758 | call do_exit | ||
759 | |||
760 | /* | ||
761 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. | ||
762 | * | ||
763 | * C extern interface: | ||
764 | * extern long execve(char *name, char **argv, char **envp) | ||
765 | * | ||
766 | * asm input arguments: | ||
767 | * rdi: name, rsi: argv, rdx: envp | ||
768 | * | ||
769 | * We want to fallback into: | ||
770 | * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs) | ||
771 | * | ||
772 | * do_sys_execve asm fallback arguments: | ||
773 | * rdi: name, rsi: argv, rdx: envp, fake frame on the stack | ||
774 | */ | ||
775 | ENTRY(execve) | ||
776 | CFI_STARTPROC | ||
777 | FAKE_STACK_FRAME $0 | ||
778 | SAVE_ALL | ||
779 | call sys_execve | ||
780 | movq %rax, RAX(%rsp) | ||
781 | RESTORE_REST | ||
782 | testq %rax,%rax | ||
783 | je int_ret_from_sys_call | ||
784 | RESTORE_ARGS | ||
785 | UNFAKE_STACK_FRAME | ||
786 | ret | ||
787 | CFI_ENDPROC | ||
788 | |||
789 | ENTRY(page_fault) | ||
790 | errorentry do_page_fault | ||
791 | |||
792 | ENTRY(coprocessor_error) | ||
793 | zeroentry do_coprocessor_error | ||
794 | |||
795 | ENTRY(simd_coprocessor_error) | ||
796 | zeroentry do_simd_coprocessor_error | ||
797 | |||
798 | ENTRY(device_not_available) | ||
799 | zeroentry math_state_restore | ||
800 | |||
801 | /* runs on exception stack */ | ||
802 | ENTRY(debug) | ||
803 | CFI_STARTPROC | ||
804 | pushq $0 | ||
805 | CFI_ADJUST_CFA_OFFSET 8 | ||
806 | paranoidentry do_debug | ||
807 | /* switch back to process stack to restore the state ptrace touched */ | ||
808 | movq %rax,%rsp | ||
809 | testl $3,CS(%rsp) | ||
810 | jnz paranoid_userspace | ||
811 | jmp paranoid_exit | ||
812 | CFI_ENDPROC | ||
813 | |||
814 | /* runs on exception stack */ | ||
815 | ENTRY(nmi) | ||
816 | CFI_STARTPROC | ||
817 | pushq $-1 | ||
818 | CFI_ADJUST_CFA_OFFSET 8 | ||
819 | paranoidentry do_nmi | ||
820 | /* ebx: no swapgs flag */ | ||
821 | paranoid_exit: | ||
822 | testl %ebx,%ebx /* swapgs needed? */ | ||
823 | jnz paranoid_restore | ||
824 | paranoid_swapgs: | ||
825 | cli | ||
826 | swapgs | ||
827 | paranoid_restore: | ||
828 | RESTORE_ALL 8 | ||
829 | iretq | ||
830 | paranoid_userspace: | ||
831 | cli | ||
832 | GET_THREAD_INFO(%rcx) | ||
833 | movl threadinfo_flags(%rcx),%edx | ||
834 | testl $_TIF_NEED_RESCHED,%edx | ||
835 | jnz paranoid_resched | ||
836 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | ||
837 | jnz paranoid_signal | ||
838 | jmp paranoid_swapgs | ||
839 | paranoid_resched: | ||
840 | sti | ||
841 | call schedule | ||
842 | jmp paranoid_exit | ||
843 | paranoid_signal: | ||
844 | sti | ||
845 | xorl %esi,%esi /* oldset */ | ||
846 | movq %rsp,%rdi /* &pt_regs */ | ||
847 | call do_notify_resume | ||
848 | jmp paranoid_exit | ||
849 | CFI_ENDPROC | ||
850 | |||
851 | ENTRY(int3) | ||
852 | zeroentry do_int3 | ||
853 | |||
854 | ENTRY(overflow) | ||
855 | zeroentry do_overflow | ||
856 | |||
857 | ENTRY(bounds) | ||
858 | zeroentry do_bounds | ||
859 | |||
860 | ENTRY(invalid_op) | ||
861 | zeroentry do_invalid_op | ||
862 | |||
863 | ENTRY(coprocessor_segment_overrun) | ||
864 | zeroentry do_coprocessor_segment_overrun | ||
865 | |||
866 | ENTRY(reserved) | ||
867 | zeroentry do_reserved | ||
868 | |||
869 | /* runs on exception stack */ | ||
870 | ENTRY(double_fault) | ||
871 | CFI_STARTPROC | ||
872 | paranoidentry do_double_fault | ||
873 | movq %rax,%rsp | ||
874 | testl $3,CS(%rsp) | ||
875 | jnz paranoid_userspace | ||
876 | jmp paranoid_exit | ||
877 | CFI_ENDPROC | ||
878 | |||
879 | ENTRY(invalid_TSS) | ||
880 | errorentry do_invalid_TSS | ||
881 | |||
882 | ENTRY(segment_not_present) | ||
883 | errorentry do_segment_not_present | ||
884 | |||
885 | /* runs on exception stack */ | ||
886 | ENTRY(stack_segment) | ||
887 | CFI_STARTPROC | ||
888 | paranoidentry do_stack_segment | ||
889 | movq %rax,%rsp | ||
890 | testl $3,CS(%rsp) | ||
891 | jnz paranoid_userspace | ||
892 | jmp paranoid_exit | ||
893 | CFI_ENDPROC | ||
894 | |||
895 | ENTRY(general_protection) | ||
896 | errorentry do_general_protection | ||
897 | |||
898 | ENTRY(alignment_check) | ||
899 | errorentry do_alignment_check | ||
900 | |||
901 | ENTRY(divide_error) | ||
902 | zeroentry do_divide_error | ||
903 | |||
904 | ENTRY(spurious_interrupt_bug) | ||
905 | zeroentry do_spurious_interrupt_bug | ||
906 | |||
907 | #ifdef CONFIG_X86_MCE | ||
908 | /* runs on exception stack */ | ||
909 | ENTRY(machine_check) | ||
910 | CFI_STARTPROC | ||
911 | pushq $0 | ||
912 | CFI_ADJUST_CFA_OFFSET 8 | ||
913 | paranoidentry do_machine_check | ||
914 | jmp paranoid_exit | ||
915 | CFI_ENDPROC | ||
916 | #endif | ||
917 | |||
918 | ENTRY(call_debug) | ||
919 | zeroentry do_call_debug | ||
920 | |||