aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorAlexander van Heukelum <heukelum@mailshack.com>2008-11-18 19:18:11 -0500
committerIngo Molnar <mingo@elte.hu>2008-11-20 04:49:57 -0500
commitd99015b1abbad743aa049b439c1e1dede6d0fa49 (patch)
tree3fc93803067d8ac375cf423b9686bd30b802e727 /arch/x86/kernel
parentc032a2de4c1a82187e9a754511043be47c8a92b5 (diff)
x86: move entry_64.S register saving out of the macros
Here is a combined patch that moves "save_args" out-of-line for the interrupt macro and moves "error_entry" mostly out-of-line for the zeroentry and errorentry macros. The save_args function becomes really straightforward and easy to understand, with the possible exception of the stack switch code, which now needs to copy the return address of to the calling function. Normal interrupts arrive with ((~vector)-0x80) on the stack, which gets adjusted in common_interrupt: <common_interrupt>: (5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */ (4) sub $0x50,%rsp /* space for registers */ (5) callq ffffffff80211290 <save_args> (5) callq ffffffff80214290 <do_IRQ> <ret_from_intr>: ... An apic interrupt stub now look like this: <thermal_interrupt>: (5) pushq $0xffffffffffffff05 /* ~(vector) */ (4) sub $0x50,%rsp /* space for registers */ (5) callq ffffffff80211290 <save_args> (5) callq ffffffff80212b8f <smp_thermal_interrupt> (5) jmpq ffffffff80211f93 <ret_from_intr> Similarly the exception handler register saving function becomes simpler, without the need of any parameter shuffling. The stub for an exception without errorcode looks like this: <overflow>: (6) callq *0x1cad12(%rip) # ffffffff803dd448 <pv_irq_ops+0x38> (2) pushq $0xffffffffffffffff /* no syscall */ (4) sub $0x78,%rsp /* space for registers */ (5) callq ffffffff8030e3b0 <error_entry> (3) mov %rsp,%rdi /* pt_regs pointer */ (2) xor %esi,%esi /* no error code */ (5) callq ffffffff80213446 <do_overflow> (5) jmpq ffffffff8030e460 <error_exit> And one for an exception with errorcode like this: <segment_not_present>: (6) callq *0x1cab92(%rip) # ffffffff803dd448 <pv_irq_ops+0x38> (4) sub $0x78,%rsp /* space for registers */ (5) callq ffffffff8030e3b0 <error_entry> (3) mov %rsp,%rdi /* pt_regs pointer */ (5) mov 0x78(%rsp),%rsi /* load error code */ (9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */ (5) callq ffffffff80213209 <do_segment_not_present> (5) jmpq ffffffff8030e460 <error_exit> Unfortunately, this last type is more than 32 bytes. But the total space savings due to this patch is about 2500 bytes on an smp-configuration, and I think the code is clearer than it was before. The tested kernels were non-paravirt ones (i.e., without the indirect call at the top of the exception handlers). Anyhow, I tested this patch on top of a recent -tip. The machine was an 2x4-core Xeon at 2333MHz. Measured where the delays between (almost-)adjacent rdtsc instructions. The graphs show how much time is spent outside of the program as a function of the measured delay. The area under the graph represents the total time spent outside the program. Eight instances of the rdtsctest were started, each pinned to a single cpu. The histogams are added. For each kernel two measurements were done: one in mostly idle condition, the other while running "bonnie++ -f", bound to cpu 0. Each measurement took 40 minutes runtime. See the attached graphs for the results. The graphs overlap almost everywhere, but there are small differences. Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/entry_64.S300
1 files changed, 166 insertions, 134 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index dbf06a0ef3d5..5a12432ccdf9 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -242,6 +242,78 @@ ENTRY(native_usergs_sysret64)
242 CFI_REL_OFFSET rsp,RSP 242 CFI_REL_OFFSET rsp,RSP
243 /*CFI_REL_OFFSET ss,SS*/ 243 /*CFI_REL_OFFSET ss,SS*/
244 .endm 244 .endm
245
246/*
247 * initial frame state for interrupts and exceptions
248 */
249 .macro _frame ref
250 CFI_STARTPROC simple
251 CFI_SIGNAL_FRAME
252 CFI_DEF_CFA rsp,SS+8-\ref
253 /*CFI_REL_OFFSET ss,SS-\ref*/
254 CFI_REL_OFFSET rsp,RSP-\ref
255 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
256 /*CFI_REL_OFFSET cs,CS-\ref*/
257 CFI_REL_OFFSET rip,RIP-\ref
258 .endm
259
260/*
261 * initial frame state for interrupts (and exceptions without error code)
262 */
263#define INTR_FRAME _frame RIP
264/*
265 * initial frame state for exceptions with error code (and interrupts
266 * with vector already pushed)
267 */
268#define XCPT_FRAME _frame ORIG_RAX
269
270/* save partial stack frame */
271ENTRY(save_args)
272 XCPT_FRAME
273 cld
274 movq %rdi, 8*8+16(%rsp)
275 CFI_REL_OFFSET rdi, 8*8+16
276 movq %rsi, 7*8+16(%rsp)
277 CFI_REL_OFFSET rsi, 7*8+16
278 movq %rdx, 6*8+16(%rsp)
279 CFI_REL_OFFSET rdx, 6*8+16
280 movq %rcx, 5*8+16(%rsp)
281 CFI_REL_OFFSET rcx, 5*8+16
282 movq %rax, 4*8+16(%rsp)
283 CFI_REL_OFFSET rax, 4*8+16
284 movq %r8, 3*8+16(%rsp)
285 CFI_REL_OFFSET r8, 3*8+16
286 movq %r9, 2*8+16(%rsp)
287 CFI_REL_OFFSET r9, 2*8+16
288 movq %r10, 1*8+16(%rsp)
289 CFI_REL_OFFSET r10, 1*8+16
290 movq %r11, 0*8+16(%rsp)
291 CFI_REL_OFFSET r11, 0*8+16
292 leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
293 movq %rbp, 8(%rsp) /* push %rbp */
294 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
295 testl $3, CS(%rdi)
296 je 1f
297 SWAPGS
298 /*
299 * irqcount is used to check if a CPU is already on an interrupt stack
300 * or not. While this is essentially redundant with preempt_count it is
301 * a little cheaper to use a separate counter in the PDA (short of
302 * moving irq_enter into assembly, which would be too much work)
303 */
3041: incl %gs:pda_irqcount
305 jne 2f
306 pop %rax /* move return address... */
307 mov %gs:pda_irqstackptr,%rsp
308 push %rax /* ... to the new stack */
309 /*
310 * We entered an interrupt context - irqs are off:
311 */
3122: TRACE_IRQS_OFF
313 ret
314 CFI_ENDPROC
315END(save_args)
316
245/* 317/*
246 * A newly forked process directly context switches into this. 318 * A newly forked process directly context switches into this.
247 */ 319 */
@@ -608,26 +680,6 @@ ENTRY(stub_rt_sigreturn)
608END(stub_rt_sigreturn) 680END(stub_rt_sigreturn)
609 681
610/* 682/*
611 * initial frame state for interrupts and exceptions
612 */
613 .macro _frame ref
614 CFI_STARTPROC simple
615 CFI_SIGNAL_FRAME
616 CFI_DEF_CFA rsp,SS+8-\ref
617 /*CFI_REL_OFFSET ss,SS-\ref*/
618 CFI_REL_OFFSET rsp,RSP-\ref
619 /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
620 /*CFI_REL_OFFSET cs,CS-\ref*/
621 CFI_REL_OFFSET rip,RIP-\ref
622 .endm
623
624/* initial frame state for interrupts (and exceptions without error code) */
625#define INTR_FRAME _frame RIP
626/* initial frame state for exceptions with error code (and interrupts with
627 vector already pushed) */
628#define XCPT_FRAME _frame ORIG_RAX
629
630/*
631 * Build the entry stubs and pointer table with some assembler magic. 683 * Build the entry stubs and pointer table with some assembler magic.
632 * We pack 7 stubs into a single 32-byte chunk, which will fit in a 684 * We pack 7 stubs into a single 32-byte chunk, which will fit in a
633 * single cache line on all modern x86 implementations. 685 * single cache line on all modern x86 implementations.
@@ -667,46 +719,19 @@ END(irq_entries_start)
667END(interrupt) 719END(interrupt)
668.previous 720.previous
669 721
670/* 722/*
671 * Interrupt entry/exit. 723 * Interrupt entry/exit.
672 * 724 *
673 * Interrupt entry points save only callee clobbered registers in fast path. 725 * Interrupt entry points save only callee clobbered registers in fast path.
674 * 726 *
675 * Entry runs with interrupts off. 727 * Entry runs with interrupts off.
676 */ 728 */
677 729
678/* 0(%rsp): ~(interrupt number) */ 730/* 0(%rsp): ~(interrupt number) */
679 .macro interrupt func 731 .macro interrupt func
680 cld 732 subq $10*8, %rsp
681 SAVE_ARGS 733 CFI_ADJUST_CFA_OFFSET 10*8
682 leaq -ARGOFFSET(%rsp),%rdi /* arg1 for handler */ 734 call save_args
683 pushq %rbp
684 /*
685 * Save rbp twice: One is for marking the stack frame, as usual, and the
686 * other, to fill pt_regs properly. This is because bx comes right
687 * before the last saved register in that structure, and not bp. If the
688 * base pointer were in the place bx is today, this would not be needed.
689 */
690 movq %rbp, -8(%rsp)
691 CFI_ADJUST_CFA_OFFSET 8
692 CFI_REL_OFFSET rbp, 0
693 movq %rsp,%rbp
694 CFI_DEF_CFA_REGISTER rbp
695 testl $3,CS(%rdi)
696 je 1f
697 SWAPGS
698 /* irqcount is used to check if a CPU is already on an interrupt
699 stack or not. While this is essentially redundant with preempt_count
700 it is a little cheaper to use a separate counter in the PDA
701 (short of moving irq_enter into assembly, which would be too
702 much work) */
7031: incl %gs:pda_irqcount
704 cmoveq %gs:pda_irqstackptr,%rsp
705 push %rbp # backlink for old unwinder
706 /*
707 * We entered an interrupt context - irqs are off:
708 */
709 TRACE_IRQS_OFF
710 call \func 735 call \func
711 .endm 736 .endm
712 737
@@ -852,6 +877,8 @@ END(common_interrupt)
852/* 877/*
853 * APIC interrupts. 878 * APIC interrupts.
854 */ 879 */
880 .p2align 5
881
855 .macro apicinterrupt num,func 882 .macro apicinterrupt num,func
856 INTR_FRAME 883 INTR_FRAME
857 pushq $~(\num) 884 pushq $~(\num)
@@ -922,24 +949,29 @@ END(spurious_interrupt)
922 .macro zeroentry sym 949 .macro zeroentry sym
923 INTR_FRAME 950 INTR_FRAME
924 PARAVIRT_ADJUST_EXCEPTION_FRAME 951 PARAVIRT_ADJUST_EXCEPTION_FRAME
925 pushq $0 /* push error code/oldrax */ 952 pushq $-1 /* ORIG_RAX: no syscall to restart */
926 CFI_ADJUST_CFA_OFFSET 8 953 CFI_ADJUST_CFA_OFFSET 8
927 pushq %rax /* push real oldrax to the rdi slot */ 954 subq $15*8,%rsp
928 CFI_ADJUST_CFA_OFFSET 8 955 CFI_ADJUST_CFA_OFFSET 15*8
929 CFI_REL_OFFSET rax,0 956 call error_entry
930 leaq \sym(%rip),%rax 957 movq %rsp,%rdi /* pt_regs pointer */
931 jmp error_entry 958 xorl %esi,%esi /* no error code */
959 call \sym
960 jmp error_exit /* %ebx: no swapgs flag */
932 CFI_ENDPROC 961 CFI_ENDPROC
933 .endm 962 .endm
934 963
935 .macro errorentry sym 964 .macro errorentry sym
936 XCPT_FRAME 965 XCPT_FRAME
937 PARAVIRT_ADJUST_EXCEPTION_FRAME 966 PARAVIRT_ADJUST_EXCEPTION_FRAME
938 pushq %rax 967 subq $15*8,%rsp
939 CFI_ADJUST_CFA_OFFSET 8 968 CFI_ADJUST_CFA_OFFSET 15*8
940 CFI_REL_OFFSET rax,0 969 call error_entry
941 leaq \sym(%rip),%rax 970 movq %rsp,%rdi /* pt_regs pointer */
942 jmp error_entry 971 movq ORIG_RAX(%rsp),%rsi /* get error code */
972 movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
973 call \sym
974 jmp error_exit /* %ebx: no swapgs flag */
943 CFI_ENDPROC 975 CFI_ENDPROC
944 .endm 976 .endm
945 977
@@ -1043,93 +1075,93 @@ paranoid_schedule\trace:
1043 .endm 1075 .endm
1044 1076
1045/* 1077/*
1046 * Exception entry point. This expects an error code/orig_rax on the stack 1078 * Exception entry point. This expects an error code/orig_rax on the stack.
1047 * and the exception handler in %rax. 1079 * returns in "no swapgs flag" in %ebx.
1048 */ 1080 */
1049KPROBE_ENTRY(error_entry) 1081KPROBE_ENTRY(error_entry)
1050 _frame RDI 1082 _frame RDI
1051 CFI_REL_OFFSET rax,0 1083 CFI_ADJUST_CFA_OFFSET 15*8
1052 /* rdi slot contains rax, oldrax contains error code */ 1084 /* oldrax contains error code */
1053 cld 1085 cld
1054 subq $14*8,%rsp 1086 movq %rdi,14*8+8(%rsp)
1055 CFI_ADJUST_CFA_OFFSET (14*8) 1087 CFI_REL_OFFSET rdi,RDI+8
1056 movq %rsi,13*8(%rsp) 1088 movq %rsi,13*8+8(%rsp)
1057 CFI_REL_OFFSET rsi,RSI 1089 CFI_REL_OFFSET rsi,RSI+8
1058 movq 14*8(%rsp),%rsi /* load rax from rdi slot */ 1090 movq %rdx,12*8+8(%rsp)
1059 CFI_REGISTER rax,rsi 1091 CFI_REL_OFFSET rdx,RDX+8
1060 movq %rdx,12*8(%rsp) 1092 movq %rcx,11*8+8(%rsp)
1061 CFI_REL_OFFSET rdx,RDX 1093 CFI_REL_OFFSET rcx,RCX+8
1062 movq %rcx,11*8(%rsp) 1094 movq %rax,10*8+8(%rsp)
1063 CFI_REL_OFFSET rcx,RCX 1095 CFI_REL_OFFSET rax,RAX+8
1064 movq %rsi,10*8(%rsp) /* store rax */ 1096 movq %r8, 9*8+8(%rsp)
1065 CFI_REL_OFFSET rax,RAX 1097 CFI_REL_OFFSET r8,R8+8
1066 movq %r8, 9*8(%rsp) 1098 movq %r9, 8*8+8(%rsp)
1067 CFI_REL_OFFSET r8,R8 1099 CFI_REL_OFFSET r9,R9+8
1068 movq %r9, 8*8(%rsp) 1100 movq %r10,7*8+8(%rsp)
1069 CFI_REL_OFFSET r9,R9 1101 CFI_REL_OFFSET r10,R10+8
1070 movq %r10,7*8(%rsp) 1102 movq %r11,6*8+8(%rsp)
1071 CFI_REL_OFFSET r10,R10 1103 CFI_REL_OFFSET r11,R11+8
1072 movq %r11,6*8(%rsp) 1104 movq %rbx,5*8+8(%rsp)
1073 CFI_REL_OFFSET r11,R11 1105 CFI_REL_OFFSET rbx,RBX+8
1074 movq %rbx,5*8(%rsp) 1106 movq %rbp,4*8+8(%rsp)
1075 CFI_REL_OFFSET rbx,RBX 1107 CFI_REL_OFFSET rbp,RBP+8
1076 movq %rbp,4*8(%rsp) 1108 movq %r12,3*8+8(%rsp)
1077 CFI_REL_OFFSET rbp,RBP 1109 CFI_REL_OFFSET r12,R12+8
1078 movq %r12,3*8(%rsp) 1110 movq %r13,2*8+8(%rsp)
1079 CFI_REL_OFFSET r12,R12 1111 CFI_REL_OFFSET r13,R13+8
1080 movq %r13,2*8(%rsp) 1112 movq %r14,1*8+8(%rsp)
1081 CFI_REL_OFFSET r13,R13 1113 CFI_REL_OFFSET r14,R14+8
1082 movq %r14,1*8(%rsp) 1114 movq %r15,0*8+8(%rsp)
1083 CFI_REL_OFFSET r14,R14 1115 CFI_REL_OFFSET r15,R15+8
1084 movq %r15,(%rsp)
1085 CFI_REL_OFFSET r15,R15
1086 xorl %ebx,%ebx 1116 xorl %ebx,%ebx
1087 testl $3,CS(%rsp) 1117 testl $3,CS+8(%rsp)
1088 je error_kernelspace 1118 je error_kernelspace
1089error_swapgs: 1119error_swapgs:
1090 SWAPGS 1120 SWAPGS
1091error_sti: 1121error_sti:
1092 TRACE_IRQS_OFF 1122 TRACE_IRQS_OFF
1093 movq %rdi,RDI(%rsp) 1123 ret
1094 CFI_REL_OFFSET rdi,RDI 1124 CFI_ENDPROC
1095 movq %rsp,%rdi 1125
1096 movq ORIG_RAX(%rsp),%rsi /* get error code */ 1126/*
1097 movq $-1,ORIG_RAX(%rsp) 1127 * There are two places in the kernel that can potentially fault with
1098 call *%rax 1128 * usergs. Handle them here. The exception handlers after iret run with
1099 /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ 1129 * kernel gs again, so don't set the user space flag. B stepping K8s
1100error_exit: 1130 * sometimes report an truncated RIP for IRET exceptions returning to
1131 * compat mode. Check for these here too.
1132 */
1133error_kernelspace:
1134 incl %ebx
1135 leaq irq_return(%rip),%rcx
1136 cmpq %rcx,RIP+8(%rsp)
1137 je error_swapgs
1138 movl %ecx,%ecx /* zero extend */
1139 cmpq %rcx,RIP+8(%rsp)
1140 je error_swapgs
1141 cmpq $gs_change,RIP+8(%rsp)
1142 je error_swapgs
1143 jmp error_sti
1144KPROBE_END(error_entry)
1145
1146
1147/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
1148KPROBE_ENTRY(error_exit)
1149 _frame R15
1101 movl %ebx,%eax 1150 movl %ebx,%eax
1102 RESTORE_REST 1151 RESTORE_REST
1103 DISABLE_INTERRUPTS(CLBR_NONE) 1152 DISABLE_INTERRUPTS(CLBR_NONE)
1104 TRACE_IRQS_OFF 1153 TRACE_IRQS_OFF
1105 GET_THREAD_INFO(%rcx) 1154 GET_THREAD_INFO(%rcx)
1106 testl %eax,%eax 1155 testl %eax,%eax
1107 jne retint_kernel 1156 jne retint_kernel
1108 LOCKDEP_SYS_EXIT_IRQ 1157 LOCKDEP_SYS_EXIT_IRQ
1109 movl TI_flags(%rcx),%edx 1158 movl TI_flags(%rcx),%edx
1110 movl $_TIF_WORK_MASK,%edi 1159 movl $_TIF_WORK_MASK,%edi
1111 andl %edi,%edx 1160 andl %edi,%edx
1112 jnz retint_careful 1161 jnz retint_careful
1113 jmp retint_swapgs 1162 jmp retint_swapgs
1114 CFI_ENDPROC 1163 CFI_ENDPROC
1115 1164KPROBE_END(error_exit)
1116error_kernelspace:
1117 incl %ebx
1118 /* There are two places in the kernel that can potentially fault with
1119 usergs. Handle them here. The exception handlers after
1120 iret run with kernel gs again, so don't set the user space flag.
1121 B stepping K8s sometimes report an truncated RIP for IRET
1122 exceptions returning to compat mode. Check for these here too. */
1123 leaq irq_return(%rip),%rcx
1124 cmpq %rcx,RIP(%rsp)
1125 je error_swapgs
1126 movl %ecx,%ecx /* zero extend */
1127 cmpq %rcx,RIP(%rsp)
1128 je error_swapgs
1129 cmpq $gs_change,RIP(%rsp)
1130 je error_swapgs
1131 jmp error_sti
1132KPROBE_END(error_entry)
1133 1165
1134 /* Reload gs selector with exception handling */ 1166 /* Reload gs selector with exception handling */
1135 /* edi: new selector */ 1167 /* edi: new selector */