diff options
-rw-r--r-- | arch/x86/kernel/entry_64.S | 300 |
1 files changed, 166 insertions, 134 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index dbf06a0ef3d5..5a12432ccdf9 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -242,6 +242,78 @@ ENTRY(native_usergs_sysret64) | |||
242 | CFI_REL_OFFSET rsp,RSP | 242 | CFI_REL_OFFSET rsp,RSP |
243 | /*CFI_REL_OFFSET ss,SS*/ | 243 | /*CFI_REL_OFFSET ss,SS*/ |
244 | .endm | 244 | .endm |
245 | |||
246 | /* | ||
247 | * initial frame state for interrupts and exceptions | ||
248 | */ | ||
249 | .macro _frame ref | ||
250 | CFI_STARTPROC simple | ||
251 | CFI_SIGNAL_FRAME | ||
252 | CFI_DEF_CFA rsp,SS+8-\ref | ||
253 | /*CFI_REL_OFFSET ss,SS-\ref*/ | ||
254 | CFI_REL_OFFSET rsp,RSP-\ref | ||
255 | /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ | ||
256 | /*CFI_REL_OFFSET cs,CS-\ref*/ | ||
257 | CFI_REL_OFFSET rip,RIP-\ref | ||
258 | .endm | ||
259 | |||
260 | /* | ||
261 | * initial frame state for interrupts (and exceptions without error code) | ||
262 | */ | ||
263 | #define INTR_FRAME _frame RIP | ||
264 | /* | ||
265 | * initial frame state for exceptions with error code (and interrupts | ||
266 | * with vector already pushed) | ||
267 | */ | ||
268 | #define XCPT_FRAME _frame ORIG_RAX | ||
269 | |||
270 | /* save partial stack frame */ | ||
271 | ENTRY(save_args) | ||
272 | XCPT_FRAME | ||
273 | cld | ||
274 | movq %rdi, 8*8+16(%rsp) | ||
275 | CFI_REL_OFFSET rdi, 8*8+16 | ||
276 | movq %rsi, 7*8+16(%rsp) | ||
277 | CFI_REL_OFFSET rsi, 7*8+16 | ||
278 | movq %rdx, 6*8+16(%rsp) | ||
279 | CFI_REL_OFFSET rdx, 6*8+16 | ||
280 | movq %rcx, 5*8+16(%rsp) | ||
281 | CFI_REL_OFFSET rcx, 5*8+16 | ||
282 | movq %rax, 4*8+16(%rsp) | ||
283 | CFI_REL_OFFSET rax, 4*8+16 | ||
284 | movq %r8, 3*8+16(%rsp) | ||
285 | CFI_REL_OFFSET r8, 3*8+16 | ||
286 | movq %r9, 2*8+16(%rsp) | ||
287 | CFI_REL_OFFSET r9, 2*8+16 | ||
288 | movq %r10, 1*8+16(%rsp) | ||
289 | CFI_REL_OFFSET r10, 1*8+16 | ||
290 | movq %r11, 0*8+16(%rsp) | ||
291 | CFI_REL_OFFSET r11, 0*8+16 | ||
292 | leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ | ||
293 | movq %rbp, 8(%rsp) /* push %rbp */ | ||
294 | leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ | ||
295 | testl $3, CS(%rdi) | ||
296 | je 1f | ||
297 | SWAPGS | ||
298 | /* | ||
299 | * irqcount is used to check if a CPU is already on an interrupt stack | ||
300 | * or not. While this is essentially redundant with preempt_count it is | ||
301 | * a little cheaper to use a separate counter in the PDA (short of | ||
302 | * moving irq_enter into assembly, which would be too much work) | ||
303 | */ | ||
304 | 1: incl %gs:pda_irqcount | ||
305 | jne 2f | ||
306 | pop %rax /* move return address... */ | ||
307 | mov %gs:pda_irqstackptr,%rsp | ||
308 | push %rax /* ... to the new stack */ | ||
309 | /* | ||
310 | * We entered an interrupt context - irqs are off: | ||
311 | */ | ||
312 | 2: TRACE_IRQS_OFF | ||
313 | ret | ||
314 | CFI_ENDPROC | ||
315 | END(save_args) | ||
316 | |||
245 | /* | 317 | /* |
246 | * A newly forked process directly context switches into this. | 318 | * A newly forked process directly context switches into this. |
247 | */ | 319 | */ |
@@ -608,26 +680,6 @@ ENTRY(stub_rt_sigreturn) | |||
608 | END(stub_rt_sigreturn) | 680 | END(stub_rt_sigreturn) |
609 | 681 | ||
610 | /* | 682 | /* |
611 | * initial frame state for interrupts and exceptions | ||
612 | */ | ||
613 | .macro _frame ref | ||
614 | CFI_STARTPROC simple | ||
615 | CFI_SIGNAL_FRAME | ||
616 | CFI_DEF_CFA rsp,SS+8-\ref | ||
617 | /*CFI_REL_OFFSET ss,SS-\ref*/ | ||
618 | CFI_REL_OFFSET rsp,RSP-\ref | ||
619 | /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ | ||
620 | /*CFI_REL_OFFSET cs,CS-\ref*/ | ||
621 | CFI_REL_OFFSET rip,RIP-\ref | ||
622 | .endm | ||
623 | |||
624 | /* initial frame state for interrupts (and exceptions without error code) */ | ||
625 | #define INTR_FRAME _frame RIP | ||
626 | /* initial frame state for exceptions with error code (and interrupts with | ||
627 | vector already pushed) */ | ||
628 | #define XCPT_FRAME _frame ORIG_RAX | ||
629 | |||
630 | /* | ||
631 | * Build the entry stubs and pointer table with some assembler magic. | 683 | * Build the entry stubs and pointer table with some assembler magic. |
632 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a | 684 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a |
633 | * single cache line on all modern x86 implementations. | 685 | * single cache line on all modern x86 implementations. |
@@ -667,46 +719,19 @@ END(irq_entries_start) | |||
667 | END(interrupt) | 719 | END(interrupt) |
668 | .previous | 720 | .previous |
669 | 721 | ||
670 | /* | 722 | /* |
671 | * Interrupt entry/exit. | 723 | * Interrupt entry/exit. |
672 | * | 724 | * |
673 | * Interrupt entry points save only callee clobbered registers in fast path. | 725 | * Interrupt entry points save only callee clobbered registers in fast path. |
674 | * | 726 | * |
675 | * Entry runs with interrupts off. | 727 | * Entry runs with interrupts off. |
676 | */ | 728 | */ |
677 | 729 | ||
678 | /* 0(%rsp): ~(interrupt number) */ | 730 | /* 0(%rsp): ~(interrupt number) */ |
679 | .macro interrupt func | 731 | .macro interrupt func |
680 | cld | 732 | subq $10*8, %rsp |
681 | SAVE_ARGS | 733 | CFI_ADJUST_CFA_OFFSET 10*8 |
682 | leaq -ARGOFFSET(%rsp),%rdi /* arg1 for handler */ | 734 | call save_args |
683 | pushq %rbp | ||
684 | /* | ||
685 | * Save rbp twice: One is for marking the stack frame, as usual, and the | ||
686 | * other, to fill pt_regs properly. This is because bx comes right | ||
687 | * before the last saved register in that structure, and not bp. If the | ||
688 | * base pointer were in the place bx is today, this would not be needed. | ||
689 | */ | ||
690 | movq %rbp, -8(%rsp) | ||
691 | CFI_ADJUST_CFA_OFFSET 8 | ||
692 | CFI_REL_OFFSET rbp, 0 | ||
693 | movq %rsp,%rbp | ||
694 | CFI_DEF_CFA_REGISTER rbp | ||
695 | testl $3,CS(%rdi) | ||
696 | je 1f | ||
697 | SWAPGS | ||
698 | /* irqcount is used to check if a CPU is already on an interrupt | ||
699 | stack or not. While this is essentially redundant with preempt_count | ||
700 | it is a little cheaper to use a separate counter in the PDA | ||
701 | (short of moving irq_enter into assembly, which would be too | ||
702 | much work) */ | ||
703 | 1: incl %gs:pda_irqcount | ||
704 | cmoveq %gs:pda_irqstackptr,%rsp | ||
705 | push %rbp # backlink for old unwinder | ||
706 | /* | ||
707 | * We entered an interrupt context - irqs are off: | ||
708 | */ | ||
709 | TRACE_IRQS_OFF | ||
710 | call \func | 735 | call \func |
711 | .endm | 736 | .endm |
712 | 737 | ||
@@ -852,6 +877,8 @@ END(common_interrupt) | |||
852 | /* | 877 | /* |
853 | * APIC interrupts. | 878 | * APIC interrupts. |
854 | */ | 879 | */ |
880 | .p2align 5 | ||
881 | |||
855 | .macro apicinterrupt num,func | 882 | .macro apicinterrupt num,func |
856 | INTR_FRAME | 883 | INTR_FRAME |
857 | pushq $~(\num) | 884 | pushq $~(\num) |
@@ -922,24 +949,29 @@ END(spurious_interrupt) | |||
922 | .macro zeroentry sym | 949 | .macro zeroentry sym |
923 | INTR_FRAME | 950 | INTR_FRAME |
924 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 951 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
925 | pushq $0 /* push error code/oldrax */ | 952 | pushq $-1 /* ORIG_RAX: no syscall to restart */ |
926 | CFI_ADJUST_CFA_OFFSET 8 | 953 | CFI_ADJUST_CFA_OFFSET 8 |
927 | pushq %rax /* push real oldrax to the rdi slot */ | 954 | subq $15*8,%rsp |
928 | CFI_ADJUST_CFA_OFFSET 8 | 955 | CFI_ADJUST_CFA_OFFSET 15*8 |
929 | CFI_REL_OFFSET rax,0 | 956 | call error_entry |
930 | leaq \sym(%rip),%rax | 957 | movq %rsp,%rdi /* pt_regs pointer */ |
931 | jmp error_entry | 958 | xorl %esi,%esi /* no error code */ |
959 | call \sym | ||
960 | jmp error_exit /* %ebx: no swapgs flag */ | ||
932 | CFI_ENDPROC | 961 | CFI_ENDPROC |
933 | .endm | 962 | .endm |
934 | 963 | ||
935 | .macro errorentry sym | 964 | .macro errorentry sym |
936 | XCPT_FRAME | 965 | XCPT_FRAME |
937 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 966 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
938 | pushq %rax | 967 | subq $15*8,%rsp |
939 | CFI_ADJUST_CFA_OFFSET 8 | 968 | CFI_ADJUST_CFA_OFFSET 15*8 |
940 | CFI_REL_OFFSET rax,0 | 969 | call error_entry |
941 | leaq \sym(%rip),%rax | 970 | movq %rsp,%rdi /* pt_regs pointer */ |
942 | jmp error_entry | 971 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
972 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ | ||
973 | call \sym | ||
974 | jmp error_exit /* %ebx: no swapgs flag */ | ||
943 | CFI_ENDPROC | 975 | CFI_ENDPROC |
944 | .endm | 976 | .endm |
945 | 977 | ||
@@ -1043,93 +1075,93 @@ paranoid_schedule\trace: | |||
1043 | .endm | 1075 | .endm |
1044 | 1076 | ||
1045 | /* | 1077 | /* |
1046 | * Exception entry point. This expects an error code/orig_rax on the stack | 1078 | * Exception entry point. This expects an error code/orig_rax on the stack. |
1047 | * and the exception handler in %rax. | 1079 | * returns in "no swapgs flag" in %ebx. |
1048 | */ | 1080 | */ |
1049 | KPROBE_ENTRY(error_entry) | 1081 | KPROBE_ENTRY(error_entry) |
1050 | _frame RDI | 1082 | _frame RDI |
1051 | CFI_REL_OFFSET rax,0 | 1083 | CFI_ADJUST_CFA_OFFSET 15*8 |
1052 | /* rdi slot contains rax, oldrax contains error code */ | 1084 | /* oldrax contains error code */ |
1053 | cld | 1085 | cld |
1054 | subq $14*8,%rsp | 1086 | movq %rdi,14*8+8(%rsp) |
1055 | CFI_ADJUST_CFA_OFFSET (14*8) | 1087 | CFI_REL_OFFSET rdi,RDI+8 |
1056 | movq %rsi,13*8(%rsp) | 1088 | movq %rsi,13*8+8(%rsp) |
1057 | CFI_REL_OFFSET rsi,RSI | 1089 | CFI_REL_OFFSET rsi,RSI+8 |
1058 | movq 14*8(%rsp),%rsi /* load rax from rdi slot */ | 1090 | movq %rdx,12*8+8(%rsp) |
1059 | CFI_REGISTER rax,rsi | 1091 | CFI_REL_OFFSET rdx,RDX+8 |
1060 | movq %rdx,12*8(%rsp) | 1092 | movq %rcx,11*8+8(%rsp) |
1061 | CFI_REL_OFFSET rdx,RDX | 1093 | CFI_REL_OFFSET rcx,RCX+8 |
1062 | movq %rcx,11*8(%rsp) | 1094 | movq %rax,10*8+8(%rsp) |
1063 | CFI_REL_OFFSET rcx,RCX | 1095 | CFI_REL_OFFSET rax,RAX+8 |
1064 | movq %rsi,10*8(%rsp) /* store rax */ | 1096 | movq %r8, 9*8+8(%rsp) |
1065 | CFI_REL_OFFSET rax,RAX | 1097 | CFI_REL_OFFSET r8,R8+8 |
1066 | movq %r8, 9*8(%rsp) | 1098 | movq %r9, 8*8+8(%rsp) |
1067 | CFI_REL_OFFSET r8,R8 | 1099 | CFI_REL_OFFSET r9,R9+8 |
1068 | movq %r9, 8*8(%rsp) | 1100 | movq %r10,7*8+8(%rsp) |
1069 | CFI_REL_OFFSET r9,R9 | 1101 | CFI_REL_OFFSET r10,R10+8 |
1070 | movq %r10,7*8(%rsp) | 1102 | movq %r11,6*8+8(%rsp) |
1071 | CFI_REL_OFFSET r10,R10 | 1103 | CFI_REL_OFFSET r11,R11+8 |
1072 | movq %r11,6*8(%rsp) | 1104 | movq %rbx,5*8+8(%rsp) |
1073 | CFI_REL_OFFSET r11,R11 | 1105 | CFI_REL_OFFSET rbx,RBX+8 |
1074 | movq %rbx,5*8(%rsp) | 1106 | movq %rbp,4*8+8(%rsp) |
1075 | CFI_REL_OFFSET rbx,RBX | 1107 | CFI_REL_OFFSET rbp,RBP+8 |
1076 | movq %rbp,4*8(%rsp) | 1108 | movq %r12,3*8+8(%rsp) |
1077 | CFI_REL_OFFSET rbp,RBP | 1109 | CFI_REL_OFFSET r12,R12+8 |
1078 | movq %r12,3*8(%rsp) | 1110 | movq %r13,2*8+8(%rsp) |
1079 | CFI_REL_OFFSET r12,R12 | 1111 | CFI_REL_OFFSET r13,R13+8 |
1080 | movq %r13,2*8(%rsp) | 1112 | movq %r14,1*8+8(%rsp) |
1081 | CFI_REL_OFFSET r13,R13 | 1113 | CFI_REL_OFFSET r14,R14+8 |
1082 | movq %r14,1*8(%rsp) | 1114 | movq %r15,0*8+8(%rsp) |
1083 | CFI_REL_OFFSET r14,R14 | 1115 | CFI_REL_OFFSET r15,R15+8 |
1084 | movq %r15,(%rsp) | ||
1085 | CFI_REL_OFFSET r15,R15 | ||
1086 | xorl %ebx,%ebx | 1116 | xorl %ebx,%ebx |
1087 | testl $3,CS(%rsp) | 1117 | testl $3,CS+8(%rsp) |
1088 | je error_kernelspace | 1118 | je error_kernelspace |
1089 | error_swapgs: | 1119 | error_swapgs: |
1090 | SWAPGS | 1120 | SWAPGS |
1091 | error_sti: | 1121 | error_sti: |
1092 | TRACE_IRQS_OFF | 1122 | TRACE_IRQS_OFF |
1093 | movq %rdi,RDI(%rsp) | 1123 | ret |
1094 | CFI_REL_OFFSET rdi,RDI | 1124 | CFI_ENDPROC |
1095 | movq %rsp,%rdi | 1125 | |
1096 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | 1126 | /* |
1097 | movq $-1,ORIG_RAX(%rsp) | 1127 | * There are two places in the kernel that can potentially fault with |
1098 | call *%rax | 1128 | * usergs. Handle them here. The exception handlers after iret run with |
1099 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | 1129 | * kernel gs again, so don't set the user space flag. B stepping K8s |
1100 | error_exit: | 1130 | * sometimes report an truncated RIP for IRET exceptions returning to |
1131 | * compat mode. Check for these here too. | ||
1132 | */ | ||
1133 | error_kernelspace: | ||
1134 | incl %ebx | ||
1135 | leaq irq_return(%rip),%rcx | ||
1136 | cmpq %rcx,RIP+8(%rsp) | ||
1137 | je error_swapgs | ||
1138 | movl %ecx,%ecx /* zero extend */ | ||
1139 | cmpq %rcx,RIP+8(%rsp) | ||
1140 | je error_swapgs | ||
1141 | cmpq $gs_change,RIP+8(%rsp) | ||
1142 | je error_swapgs | ||
1143 | jmp error_sti | ||
1144 | KPROBE_END(error_entry) | ||
1145 | |||
1146 | |||
1147 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | ||
1148 | KPROBE_ENTRY(error_exit) | ||
1149 | _frame R15 | ||
1101 | movl %ebx,%eax | 1150 | movl %ebx,%eax |
1102 | RESTORE_REST | 1151 | RESTORE_REST |
1103 | DISABLE_INTERRUPTS(CLBR_NONE) | 1152 | DISABLE_INTERRUPTS(CLBR_NONE) |
1104 | TRACE_IRQS_OFF | 1153 | TRACE_IRQS_OFF |
1105 | GET_THREAD_INFO(%rcx) | 1154 | GET_THREAD_INFO(%rcx) |
1106 | testl %eax,%eax | 1155 | testl %eax,%eax |
1107 | jne retint_kernel | 1156 | jne retint_kernel |
1108 | LOCKDEP_SYS_EXIT_IRQ | 1157 | LOCKDEP_SYS_EXIT_IRQ |
1109 | movl TI_flags(%rcx),%edx | 1158 | movl TI_flags(%rcx),%edx |
1110 | movl $_TIF_WORK_MASK,%edi | 1159 | movl $_TIF_WORK_MASK,%edi |
1111 | andl %edi,%edx | 1160 | andl %edi,%edx |
1112 | jnz retint_careful | 1161 | jnz retint_careful |
1113 | jmp retint_swapgs | 1162 | jmp retint_swapgs |
1114 | CFI_ENDPROC | 1163 | CFI_ENDPROC |
1115 | 1164 | KPROBE_END(error_exit) | |
1116 | error_kernelspace: | ||
1117 | incl %ebx | ||
1118 | /* There are two places in the kernel that can potentially fault with | ||
1119 | usergs. Handle them here. The exception handlers after | ||
1120 | iret run with kernel gs again, so don't set the user space flag. | ||
1121 | B stepping K8s sometimes report an truncated RIP for IRET | ||
1122 | exceptions returning to compat mode. Check for these here too. */ | ||
1123 | leaq irq_return(%rip),%rcx | ||
1124 | cmpq %rcx,RIP(%rsp) | ||
1125 | je error_swapgs | ||
1126 | movl %ecx,%ecx /* zero extend */ | ||
1127 | cmpq %rcx,RIP(%rsp) | ||
1128 | je error_swapgs | ||
1129 | cmpq $gs_change,RIP(%rsp) | ||
1130 | je error_swapgs | ||
1131 | jmp error_sti | ||
1132 | KPROBE_END(error_entry) | ||
1133 | 1165 | ||
1134 | /* Reload gs selector with exception handling */ | 1166 | /* Reload gs selector with exception handling */ |
1135 | /* edi: new selector */ | 1167 | /* edi: new selector */ |