diff options
author | Tejun Heo <tj@kernel.org> | 2009-02-09 08:17:40 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-02-09 18:42:00 -0500 |
commit | ccbeed3a05908d201b47b6c3dd1a373138bba566 (patch) | |
tree | e834d548c70426aa3885dd2169be1a79be4a617b /arch/x86/kernel | |
parent | d9a89a26e02ef9ed03f74a755a8b4d8f3a066622 (diff) |
x86: make lazy %gs optional on x86_32
Impact: pt_regs changed, lazy gs handling made optional, add slight
overhead to SAVE_ALL, simplifies error_code path a bit
On x86_32, %gs hasn't been used by kernel and handled lazily. pt_regs
doesn't have place for it and gs is saved/loaded only when necessary.
In preparation for stack protector support, this patch makes lazy %gs
handling optional by doing the followings.
* Add CONFIG_X86_32_LAZY_GS and place for gs in pt_regs.
* Save and restore %gs along with other registers in entry_32.S unless
LAZY_GS. Note that this unfortunately adds "pushl $0" on SAVE_ALL
even when LAZY_GS. However, it adds no overhead to common exit path
and simplifies entry path with error code.
* Define different user_gs accessors depending on LAZY_GS and add
lazy_save_gs() and lazy_load_gs() which are noop if !LAZY_GS. The
lazy_*_gs() ops are used to save, load and clear %gs lazily.
* Define ELF_CORE_COPY_KERNEL_REGS() which always read %gs directly.
xen and lguest changes need to be verified.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/asm-offsets_32.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 132 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 5 |
4 files changed, 117 insertions, 25 deletions
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index ee4df08feee..fbf2f33e308 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -75,6 +75,7 @@ void foo(void) | |||
75 | OFFSET(PT_DS, pt_regs, ds); | 75 | OFFSET(PT_DS, pt_regs, ds); |
76 | OFFSET(PT_ES, pt_regs, es); | 76 | OFFSET(PT_ES, pt_regs, es); |
77 | OFFSET(PT_FS, pt_regs, fs); | 77 | OFFSET(PT_FS, pt_regs, fs); |
78 | OFFSET(PT_GS, pt_regs, gs); | ||
78 | OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); | 79 | OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); |
79 | OFFSET(PT_EIP, pt_regs, ip); | 80 | OFFSET(PT_EIP, pt_regs, ip); |
80 | OFFSET(PT_CS, pt_regs, cs); | 81 | OFFSET(PT_CS, pt_regs, cs); |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c461925d3b6..82e6868bee4 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -30,12 +30,13 @@ | |||
30 | * 1C(%esp) - %ds | 30 | * 1C(%esp) - %ds |
31 | * 20(%esp) - %es | 31 | * 20(%esp) - %es |
32 | * 24(%esp) - %fs | 32 | * 24(%esp) - %fs |
33 | * 28(%esp) - orig_eax | 33 | * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS |
34 | * 2C(%esp) - %eip | 34 | * 2C(%esp) - orig_eax |
35 | * 30(%esp) - %cs | 35 | * 30(%esp) - %eip |
36 | * 34(%esp) - %eflags | 36 | * 34(%esp) - %cs |
37 | * 38(%esp) - %oldesp | 37 | * 38(%esp) - %eflags |
38 | * 3C(%esp) - %oldss | 38 | * 3C(%esp) - %oldesp |
39 | * 40(%esp) - %oldss | ||
39 | * | 40 | * |
40 | * "current" is in register %ebx during any slow entries. | 41 | * "current" is in register %ebx during any slow entries. |
41 | */ | 42 | */ |
@@ -101,8 +102,99 @@ | |||
101 | #define resume_userspace_sig resume_userspace | 102 | #define resume_userspace_sig resume_userspace |
102 | #endif | 103 | #endif |
103 | 104 | ||
105 | /* | ||
106 | * User gs save/restore | ||
107 | * | ||
108 | * %gs is used for userland TLS and kernel only uses it for stack | ||
109 | * canary which is required to be at %gs:20 by gcc. Read the comment | ||
110 | * at the top of stackprotector.h for more info. | ||
111 | * | ||
112 | * Local labels 98 and 99 are used. | ||
113 | */ | ||
114 | #ifdef CONFIG_X86_32_LAZY_GS | ||
115 | |||
116 | /* unfortunately push/pop can't be no-op */ | ||
117 | .macro PUSH_GS | ||
118 | pushl $0 | ||
119 | CFI_ADJUST_CFA_OFFSET 4 | ||
120 | .endm | ||
121 | .macro POP_GS pop=0 | ||
122 | addl $(4 + \pop), %esp | ||
123 | CFI_ADJUST_CFA_OFFSET -(4 + \pop) | ||
124 | .endm | ||
125 | .macro POP_GS_EX | ||
126 | .endm | ||
127 | |||
128 | /* all the rest are no-op */ | ||
129 | .macro PTGS_TO_GS | ||
130 | .endm | ||
131 | .macro PTGS_TO_GS_EX | ||
132 | .endm | ||
133 | .macro GS_TO_REG reg | ||
134 | .endm | ||
135 | .macro REG_TO_PTGS reg | ||
136 | .endm | ||
137 | .macro SET_KERNEL_GS reg | ||
138 | .endm | ||
139 | |||
140 | #else /* CONFIG_X86_32_LAZY_GS */ | ||
141 | |||
142 | .macro PUSH_GS | ||
143 | pushl %gs | ||
144 | CFI_ADJUST_CFA_OFFSET 4 | ||
145 | /*CFI_REL_OFFSET gs, 0*/ | ||
146 | .endm | ||
147 | |||
148 | .macro POP_GS pop=0 | ||
149 | 98: popl %gs | ||
150 | CFI_ADJUST_CFA_OFFSET -4 | ||
151 | /*CFI_RESTORE gs*/ | ||
152 | .if \pop <> 0 | ||
153 | add $\pop, %esp | ||
154 | CFI_ADJUST_CFA_OFFSET -\pop | ||
155 | .endif | ||
156 | .endm | ||
157 | .macro POP_GS_EX | ||
158 | .pushsection .fixup, "ax" | ||
159 | 99: movl $0, (%esp) | ||
160 | jmp 98b | ||
161 | .section __ex_table, "a" | ||
162 | .align 4 | ||
163 | .long 98b, 99b | ||
164 | .popsection | ||
165 | .endm | ||
166 | |||
167 | .macro PTGS_TO_GS | ||
168 | 98: mov PT_GS(%esp), %gs | ||
169 | .endm | ||
170 | .macro PTGS_TO_GS_EX | ||
171 | .pushsection .fixup, "ax" | ||
172 | 99: movl $0, PT_GS(%esp) | ||
173 | jmp 98b | ||
174 | .section __ex_table, "a" | ||
175 | .align 4 | ||
176 | .long 98b, 99b | ||
177 | .popsection | ||
178 | .endm | ||
179 | |||
180 | .macro GS_TO_REG reg | ||
181 | movl %gs, \reg | ||
182 | /*CFI_REGISTER gs, \reg*/ | ||
183 | .endm | ||
184 | .macro REG_TO_PTGS reg | ||
185 | movl \reg, PT_GS(%esp) | ||
186 | /*CFI_REL_OFFSET gs, PT_GS*/ | ||
187 | .endm | ||
188 | .macro SET_KERNEL_GS reg | ||
189 | xorl \reg, \reg | ||
190 | movl \reg, %gs | ||
191 | .endm | ||
192 | |||
193 | #endif /* CONFIG_X86_32_LAZY_GS */ | ||
194 | |||
104 | .macro SAVE_ALL | 195 | .macro SAVE_ALL |
105 | cld | 196 | cld |
197 | PUSH_GS | ||
106 | pushl %fs | 198 | pushl %fs |
107 | CFI_ADJUST_CFA_OFFSET 4 | 199 | CFI_ADJUST_CFA_OFFSET 4 |
108 | /*CFI_REL_OFFSET fs, 0;*/ | 200 | /*CFI_REL_OFFSET fs, 0;*/ |
@@ -138,6 +230,7 @@ | |||
138 | movl %edx, %es | 230 | movl %edx, %es |
139 | movl $(__KERNEL_PERCPU), %edx | 231 | movl $(__KERNEL_PERCPU), %edx |
140 | movl %edx, %fs | 232 | movl %edx, %fs |
233 | SET_KERNEL_GS %edx | ||
141 | .endm | 234 | .endm |
142 | 235 | ||
143 | .macro RESTORE_INT_REGS | 236 | .macro RESTORE_INT_REGS |
@@ -164,7 +257,7 @@ | |||
164 | CFI_RESTORE eax | 257 | CFI_RESTORE eax |
165 | .endm | 258 | .endm |
166 | 259 | ||
167 | .macro RESTORE_REGS | 260 | .macro RESTORE_REGS pop=0 |
168 | RESTORE_INT_REGS | 261 | RESTORE_INT_REGS |
169 | 1: popl %ds | 262 | 1: popl %ds |
170 | CFI_ADJUST_CFA_OFFSET -4 | 263 | CFI_ADJUST_CFA_OFFSET -4 |
@@ -175,6 +268,7 @@ | |||
175 | 3: popl %fs | 268 | 3: popl %fs |
176 | CFI_ADJUST_CFA_OFFSET -4 | 269 | CFI_ADJUST_CFA_OFFSET -4 |
177 | /*CFI_RESTORE fs;*/ | 270 | /*CFI_RESTORE fs;*/ |
271 | POP_GS \pop | ||
178 | .pushsection .fixup, "ax" | 272 | .pushsection .fixup, "ax" |
179 | 4: movl $0, (%esp) | 273 | 4: movl $0, (%esp) |
180 | jmp 1b | 274 | jmp 1b |
@@ -188,6 +282,7 @@ | |||
188 | .long 2b, 5b | 282 | .long 2b, 5b |
189 | .long 3b, 6b | 283 | .long 3b, 6b |
190 | .popsection | 284 | .popsection |
285 | POP_GS_EX | ||
191 | .endm | 286 | .endm |
192 | 287 | ||
193 | .macro RING0_INT_FRAME | 288 | .macro RING0_INT_FRAME |
@@ -368,6 +463,7 @@ sysenter_exit: | |||
368 | xorl %ebp,%ebp | 463 | xorl %ebp,%ebp |
369 | TRACE_IRQS_ON | 464 | TRACE_IRQS_ON |
370 | 1: mov PT_FS(%esp), %fs | 465 | 1: mov PT_FS(%esp), %fs |
466 | PTGS_TO_GS | ||
371 | ENABLE_INTERRUPTS_SYSEXIT | 467 | ENABLE_INTERRUPTS_SYSEXIT |
372 | 468 | ||
373 | #ifdef CONFIG_AUDITSYSCALL | 469 | #ifdef CONFIG_AUDITSYSCALL |
@@ -416,6 +512,7 @@ sysexit_audit: | |||
416 | .align 4 | 512 | .align 4 |
417 | .long 1b,2b | 513 | .long 1b,2b |
418 | .popsection | 514 | .popsection |
515 | PTGS_TO_GS_EX | ||
419 | ENDPROC(ia32_sysenter_target) | 516 | ENDPROC(ia32_sysenter_target) |
420 | 517 | ||
421 | # system call handler stub | 518 | # system call handler stub |
@@ -458,8 +555,7 @@ restore_all: | |||
458 | restore_nocheck: | 555 | restore_nocheck: |
459 | TRACE_IRQS_IRET | 556 | TRACE_IRQS_IRET |
460 | restore_nocheck_notrace: | 557 | restore_nocheck_notrace: |
461 | RESTORE_REGS | 558 | RESTORE_REGS 4 # skip orig_eax/error_code |
462 | addl $4, %esp # skip orig_eax/error_code | ||
463 | CFI_ADJUST_CFA_OFFSET -4 | 559 | CFI_ADJUST_CFA_OFFSET -4 |
464 | irq_return: | 560 | irq_return: |
465 | INTERRUPT_RETURN | 561 | INTERRUPT_RETURN |
@@ -1078,7 +1174,10 @@ ENTRY(page_fault) | |||
1078 | CFI_ADJUST_CFA_OFFSET 4 | 1174 | CFI_ADJUST_CFA_OFFSET 4 |
1079 | ALIGN | 1175 | ALIGN |
1080 | error_code: | 1176 | error_code: |
1081 | /* the function address is in %fs's slot on the stack */ | 1177 | /* the function address is in %gs's slot on the stack */ |
1178 | pushl %fs | ||
1179 | CFI_ADJUST_CFA_OFFSET 4 | ||
1180 | /*CFI_REL_OFFSET fs, 0*/ | ||
1082 | pushl %es | 1181 | pushl %es |
1083 | CFI_ADJUST_CFA_OFFSET 4 | 1182 | CFI_ADJUST_CFA_OFFSET 4 |
1084 | /*CFI_REL_OFFSET es, 0*/ | 1183 | /*CFI_REL_OFFSET es, 0*/ |
@@ -1107,20 +1206,15 @@ error_code: | |||
1107 | CFI_ADJUST_CFA_OFFSET 4 | 1206 | CFI_ADJUST_CFA_OFFSET 4 |
1108 | CFI_REL_OFFSET ebx, 0 | 1207 | CFI_REL_OFFSET ebx, 0 |
1109 | cld | 1208 | cld |
1110 | pushl %fs | ||
1111 | CFI_ADJUST_CFA_OFFSET 4 | ||
1112 | /*CFI_REL_OFFSET fs, 0*/ | ||
1113 | movl $(__KERNEL_PERCPU), %ecx | 1209 | movl $(__KERNEL_PERCPU), %ecx |
1114 | movl %ecx, %fs | 1210 | movl %ecx, %fs |
1115 | UNWIND_ESPFIX_STACK | 1211 | UNWIND_ESPFIX_STACK |
1116 | popl %ecx | 1212 | GS_TO_REG %ecx |
1117 | CFI_ADJUST_CFA_OFFSET -4 | 1213 | movl PT_GS(%esp), %edi # get the function address |
1118 | /*CFI_REGISTER es, ecx*/ | ||
1119 | movl PT_FS(%esp), %edi # get the function address | ||
1120 | movl PT_ORIG_EAX(%esp), %edx # get the error code | 1214 | movl PT_ORIG_EAX(%esp), %edx # get the error code |
1121 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | 1215 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart |
1122 | mov %ecx, PT_FS(%esp) | 1216 | REG_TO_PTGS %ecx |
1123 | /*CFI_REL_OFFSET fs, ES*/ | 1217 | SET_KERNEL_GS %ecx |
1124 | movl $(__USER_DS), %ecx | 1218 | movl $(__USER_DS), %ecx |
1125 | movl %ecx, %ds | 1219 | movl %ecx, %ds |
1126 | movl %ecx, %es | 1220 | movl %ecx, %es |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d58a340e1be..86122fa2a1b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -539,7 +539,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
539 | * used %fs or %gs (it does not today), or if the kernel is | 539 | * used %fs or %gs (it does not today), or if the kernel is |
540 | * running inside of a hypervisor layer. | 540 | * running inside of a hypervisor layer. |
541 | */ | 541 | */ |
542 | savesegment(gs, prev->gs); | 542 | lazy_save_gs(prev->gs); |
543 | 543 | ||
544 | /* | 544 | /* |
545 | * Load the per-thread Thread-Local Storage descriptor. | 545 | * Load the per-thread Thread-Local Storage descriptor. |
@@ -585,7 +585,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
585 | * Restore %gs if needed (which is common) | 585 | * Restore %gs if needed (which is common) |
586 | */ | 586 | */ |
587 | if (prev->gs | next->gs) | 587 | if (prev->gs | next->gs) |
588 | loadsegment(gs, next->gs); | 588 | lazy_load_gs(next->gs); |
589 | 589 | ||
590 | percpu_write(current_task, next_p); | 590 | percpu_write(current_task, next_p); |
591 | 591 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 508b6b57d0c..7ec39ab37a2 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value) | |||
75 | static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) | 75 | static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) |
76 | { | 76 | { |
77 | BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); | 77 | BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); |
78 | regno >>= 2; | 78 | return ®s->bx + (regno >> 2); |
79 | if (regno > FS) | ||
80 | --regno; | ||
81 | return ®s->bx + regno; | ||
82 | } | 79 | } |
83 | 80 | ||
84 | static u16 get_segment_reg(struct task_struct *task, unsigned long offset) | 81 | static u16 get_segment_reg(struct task_struct *task, unsigned long offset) |