diff options
author | Jeremy Fitzhardinge <jeremy@goop.org> | 2006-12-06 20:14:02 -0500 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2006-12-06 20:14:02 -0500 |
commit | f95d47caae5302a63d92be9a0292abc90e2a14e1 (patch) | |
tree | cfa963975d104c56aba28df6c941759175ed4b98 /arch/i386/kernel/entry.S | |
parent | 62111195800d80c66cdc69063ea3145878c99fbf (diff) |
[PATCH] i386: Use %gs as the PDA base-segment in the kernel
This patch is the meat of the PDA change. This patch makes several related
changes:
1: Most significantly, %gs is now used in the kernel. This means that on
entry, the old value of %gs is saved away, and it is reloaded with
__KERNEL_PDA.
2: entry.S constructs the stack in the shape of struct pt_regs, and this
is passed around the kernel so that the process's saved register
state can be accessed.
Unfortunately struct pt_regs doesn't currently have space for %gs
(or %fs). This patch extends pt_regs to add space for gs (no space
is allocated for %fs, since it won't be used, and it would just
complicate the code in entry.S to work around the space).
3: Because %gs is now saved on the stack like %ds, %es and the integer
registers, there are a number of places where it no longer needs to
be handled specially; namely context switch, and saving/restoring the
register state in a signal context.
4: And since kernel threads run in kernel space and call normal kernel
code, they need to be created with their %gs == __KERNEL_PDA.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Chuck Ebbert <76306.1226@compuserve.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Diffstat (limited to 'arch/i386/kernel/entry.S')
-rw-r--r-- | arch/i386/kernel/entry.S | 70 |
1 files changed, 48 insertions, 22 deletions
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 0069bf01603e..b99d4a160078 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -30,12 +30,13 @@ | |||
30 | * 18(%esp) - %eax | 30 | * 18(%esp) - %eax |
31 | * 1C(%esp) - %ds | 31 | * 1C(%esp) - %ds |
32 | * 20(%esp) - %es | 32 | * 20(%esp) - %es |
33 | * 24(%esp) - orig_eax | 33 | * 24(%esp) - %gs |
34 | * 28(%esp) - %eip | 34 | * 28(%esp) - orig_eax |
35 | * 2C(%esp) - %cs | 35 | * 2C(%esp) - %eip |
36 | * 30(%esp) - %eflags | 36 | * 30(%esp) - %cs |
37 | * 34(%esp) - %oldesp | 37 | * 34(%esp) - %eflags |
38 | * 38(%esp) - %oldss | 38 | * 38(%esp) - %oldesp |
39 | * 3C(%esp) - %oldss | ||
39 | * | 40 | * |
40 | * "current" is in register %ebx during any slow entries. | 41 | * "current" is in register %ebx during any slow entries. |
41 | */ | 42 | */ |
@@ -92,6 +93,9 @@ VM_MASK = 0x00020000 | |||
92 | 93 | ||
93 | #define SAVE_ALL \ | 94 | #define SAVE_ALL \ |
94 | cld; \ | 95 | cld; \ |
96 | pushl %gs; \ | ||
97 | CFI_ADJUST_CFA_OFFSET 4;\ | ||
98 | /*CFI_REL_OFFSET gs, 0;*/\ | ||
95 | pushl %es; \ | 99 | pushl %es; \ |
96 | CFI_ADJUST_CFA_OFFSET 4;\ | 100 | CFI_ADJUST_CFA_OFFSET 4;\ |
97 | /*CFI_REL_OFFSET es, 0;*/\ | 101 | /*CFI_REL_OFFSET es, 0;*/\ |
@@ -121,7 +125,9 @@ VM_MASK = 0x00020000 | |||
121 | CFI_REL_OFFSET ebx, 0;\ | 125 | CFI_REL_OFFSET ebx, 0;\ |
122 | movl $(__USER_DS), %edx; \ | 126 | movl $(__USER_DS), %edx; \ |
123 | movl %edx, %ds; \ | 127 | movl %edx, %ds; \ |
124 | movl %edx, %es; | 128 | movl %edx, %es; \ |
129 | movl $(__KERNEL_PDA), %edx; \ | ||
130 | movl %edx, %gs | ||
125 | 131 | ||
126 | #define RESTORE_INT_REGS \ | 132 | #define RESTORE_INT_REGS \ |
127 | popl %ebx; \ | 133 | popl %ebx; \ |
@@ -154,17 +160,22 @@ VM_MASK = 0x00020000 | |||
154 | 2: popl %es; \ | 160 | 2: popl %es; \ |
155 | CFI_ADJUST_CFA_OFFSET -4;\ | 161 | CFI_ADJUST_CFA_OFFSET -4;\ |
156 | /*CFI_RESTORE es;*/\ | 162 | /*CFI_RESTORE es;*/\ |
157 | .section .fixup,"ax"; \ | 163 | 3: popl %gs; \ |
158 | 3: movl $0,(%esp); \ | 164 | CFI_ADJUST_CFA_OFFSET -4;\ |
159 | jmp 1b; \ | 165 | /*CFI_RESTORE gs;*/\ |
166 | .pushsection .fixup,"ax"; \ | ||
160 | 4: movl $0,(%esp); \ | 167 | 4: movl $0,(%esp); \ |
168 | jmp 1b; \ | ||
169 | 5: movl $0,(%esp); \ | ||
161 | jmp 2b; \ | 170 | jmp 2b; \ |
162 | .previous; \ | 171 | 6: movl $0,(%esp); \ |
172 | jmp 3b; \ | ||
163 | .section __ex_table,"a";\ | 173 | .section __ex_table,"a";\ |
164 | .align 4; \ | 174 | .align 4; \ |
165 | .long 1b,3b; \ | 175 | .long 1b,4b; \ |
166 | .long 2b,4b; \ | 176 | .long 2b,5b; \ |
167 | .previous | 177 | .long 3b,6b; \ |
178 | .popsection | ||
168 | 179 | ||
169 | #define RING0_INT_FRAME \ | 180 | #define RING0_INT_FRAME \ |
170 | CFI_STARTPROC simple;\ | 181 | CFI_STARTPROC simple;\ |
@@ -231,6 +242,7 @@ check_userspace: | |||
231 | andl $(VM_MASK | SEGMENT_RPL_MASK), %eax | 242 | andl $(VM_MASK | SEGMENT_RPL_MASK), %eax |
232 | cmpl $USER_RPL, %eax | 243 | cmpl $USER_RPL, %eax |
233 | jb resume_kernel # not returning to v8086 or userspace | 244 | jb resume_kernel # not returning to v8086 or userspace |
245 | |||
234 | ENTRY(resume_userspace) | 246 | ENTRY(resume_userspace) |
235 | DISABLE_INTERRUPTS # make sure we don't miss an interrupt | 247 | DISABLE_INTERRUPTS # make sure we don't miss an interrupt |
236 | # setting need_resched or sigpending | 248 | # setting need_resched or sigpending |
@@ -327,9 +339,16 @@ sysenter_past_esp: | |||
327 | movl PT_OLDESP(%esp), %ecx | 339 | movl PT_OLDESP(%esp), %ecx |
328 | xorl %ebp,%ebp | 340 | xorl %ebp,%ebp |
329 | TRACE_IRQS_ON | 341 | TRACE_IRQS_ON |
342 | 1: mov PT_GS(%esp), %gs | ||
330 | ENABLE_INTERRUPTS_SYSEXIT | 343 | ENABLE_INTERRUPTS_SYSEXIT |
331 | CFI_ENDPROC | 344 | CFI_ENDPROC |
332 | 345 | .pushsection .fixup,"ax" | |
346 | 2: movl $0,PT_GS(%esp) | ||
347 | jmp 1b | ||
348 | .section __ex_table,"a" | ||
349 | .align 4 | ||
350 | .long 1b,2b | ||
351 | .popsection | ||
333 | 352 | ||
334 | # system call handler stub | 353 | # system call handler stub |
335 | ENTRY(system_call) | 354 | ENTRY(system_call) |
@@ -375,7 +394,7 @@ restore_nocheck: | |||
375 | TRACE_IRQS_IRET | 394 | TRACE_IRQS_IRET |
376 | restore_nocheck_notrace: | 395 | restore_nocheck_notrace: |
377 | RESTORE_REGS | 396 | RESTORE_REGS |
378 | addl $4, %esp | 397 | addl $4, %esp # skip orig_eax/error_code |
379 | CFI_ADJUST_CFA_OFFSET -4 | 398 | CFI_ADJUST_CFA_OFFSET -4 |
380 | 1: INTERRUPT_RETURN | 399 | 1: INTERRUPT_RETURN |
381 | .section .fixup,"ax" | 400 | .section .fixup,"ax" |
@@ -588,6 +607,10 @@ KPROBE_ENTRY(page_fault) | |||
588 | CFI_ADJUST_CFA_OFFSET 4 | 607 | CFI_ADJUST_CFA_OFFSET 4 |
589 | ALIGN | 608 | ALIGN |
590 | error_code: | 609 | error_code: |
610 | /* the function address is in %gs's slot on the stack */ | ||
611 | pushl %es | ||
612 | CFI_ADJUST_CFA_OFFSET 4 | ||
613 | /*CFI_REL_OFFSET es, 0*/ | ||
591 | pushl %ds | 614 | pushl %ds |
592 | CFI_ADJUST_CFA_OFFSET 4 | 615 | CFI_ADJUST_CFA_OFFSET 4 |
593 | /*CFI_REL_OFFSET ds, 0*/ | 616 | /*CFI_REL_OFFSET ds, 0*/ |
@@ -613,18 +636,20 @@ error_code: | |||
613 | CFI_ADJUST_CFA_OFFSET 4 | 636 | CFI_ADJUST_CFA_OFFSET 4 |
614 | CFI_REL_OFFSET ebx, 0 | 637 | CFI_REL_OFFSET ebx, 0 |
615 | cld | 638 | cld |
616 | pushl %es | 639 | pushl %gs |
617 | CFI_ADJUST_CFA_OFFSET 4 | 640 | CFI_ADJUST_CFA_OFFSET 4 |
618 | /*CFI_REL_OFFSET es, 0*/ | 641 | /*CFI_REL_OFFSET gs, 0*/ |
642 | movl $(__KERNEL_PDA), %ecx | ||
643 | movl %ecx, %gs | ||
619 | UNWIND_ESPFIX_STACK | 644 | UNWIND_ESPFIX_STACK |
620 | popl %ecx | 645 | popl %ecx |
621 | CFI_ADJUST_CFA_OFFSET -4 | 646 | CFI_ADJUST_CFA_OFFSET -4 |
622 | /*CFI_REGISTER es, ecx*/ | 647 | /*CFI_REGISTER es, ecx*/ |
623 | movl PT_ES(%esp), %edi # get the function address | 648 | movl PT_GS(%esp), %edi # get the function address |
624 | movl PT_ORIG_EAX(%esp), %edx # get the error code | 649 | movl PT_ORIG_EAX(%esp), %edx # get the error code |
625 | movl $-1, PT_ORIG_EAX(%esp) | 650 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart |
626 | movl %ecx, PT_ES(%esp) | 651 | mov %ecx, PT_GS(%esp) |
627 | /*CFI_REL_OFFSET es, ES*/ | 652 | /*CFI_REL_OFFSET gs, ES*/ |
628 | movl $(__USER_DS), %ecx | 653 | movl $(__USER_DS), %ecx |
629 | movl %ecx, %ds | 654 | movl %ecx, %ds |
630 | movl %ecx, %es | 655 | movl %ecx, %es |
@@ -936,6 +961,7 @@ ENTRY(arch_unwind_init_running) | |||
936 | movl %ebx, PT_EAX(%edx) | 961 | movl %ebx, PT_EAX(%edx) |
937 | movl $__USER_DS, PT_DS(%edx) | 962 | movl $__USER_DS, PT_DS(%edx) |
938 | movl $__USER_DS, PT_ES(%edx) | 963 | movl $__USER_DS, PT_ES(%edx) |
964 | movl $0, PT_GS(%edx) | ||
939 | movl %ebx, PT_ORIG_EAX(%edx) | 965 | movl %ebx, PT_ORIG_EAX(%edx) |
940 | movl %ecx, PT_EIP(%edx) | 966 | movl %ecx, PT_EIP(%edx) |
941 | movl 12(%esp), %ecx | 967 | movl 12(%esp), %ecx |