diff options
author | Stas Sergeev <stsp@aknet.ru> | 2006-12-06 20:14:01 -0500 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2006-12-06 20:14:01 -0500 |
commit | be44d2aabce2d62f72d5751d1871b6212bf7a1c7 (patch) | |
tree | 3f190dd5b5747ee83b50c4596b4801ce6c6b551c /arch/i386/kernel | |
parent | bb81a09e55eaf7e5f798468ab971469b6f66a259 (diff) |
[PATCH] i386: espfix cleanup
Clean up the espfix code:
- Introduced PER_CPU() macro to be used from asm
- Introduced GET_DESC_BASE() macro to be used from asm
- Rewrote the fixup code in asm, as calling a C code with the altered %ss
appeared to be unsafe
- No longer altering the stack from a .fixup section
- 16bit per-cpu stack is no longer used, instead the stack segment base
is patched the way so that the high word of the kernel and user %esp
are the same.
- Added the limit-patching for the espfix segment. (Chuck Ebbert)
[jeremy@goop.org: use the x86 scaling addressing mode rather than shifting]
Signed-off-by: Stas Sergeev <stsp@aknet.ru>
Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: Zachary Amsden <zach@vmware.com>
Acked-by: Chuck Ebbert <76306.1226@compuserve.com>
Acked-by: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r-- | arch/i386/kernel/asm-offsets.c | 5 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/common.c | 11 | ||||
-rw-r--r-- | arch/i386/kernel/entry.S | 73 | ||||
-rw-r--r-- | arch/i386/kernel/head.S | 2 | ||||
-rw-r--r-- | arch/i386/kernel/traps.c | 57 |
5 files changed, 55 insertions, 93 deletions
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index c80271f8f084..e94d910a28bd 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c | |||
@@ -58,6 +58,11 @@ void foo(void) | |||
58 | OFFSET(TI_sysenter_return, thread_info, sysenter_return); | 58 | OFFSET(TI_sysenter_return, thread_info, sysenter_return); |
59 | BLANK(); | 59 | BLANK(); |
60 | 60 | ||
61 | OFFSET(GDS_size, Xgt_desc_struct, size); | ||
62 | OFFSET(GDS_address, Xgt_desc_struct, address); | ||
63 | OFFSET(GDS_pad, Xgt_desc_struct, pad); | ||
64 | BLANK(); | ||
65 | |||
61 | OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); | 66 | OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); |
62 | OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); | 67 | OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); |
63 | BLANK(); | 68 | BLANK(); |
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index d9f3e3c31f05..5532fc4e1bf0 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c | |||
@@ -24,9 +24,6 @@ | |||
24 | DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); | 24 | DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); |
25 | EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); | 25 | EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); |
26 | 26 | ||
27 | DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); | ||
28 | EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); | ||
29 | |||
30 | static int cachesize_override __cpuinitdata = -1; | 27 | static int cachesize_override __cpuinitdata = -1; |
31 | static int disable_x86_fxsr __cpuinitdata; | 28 | static int disable_x86_fxsr __cpuinitdata; |
32 | static int disable_x86_serial_nr __cpuinitdata = 1; | 29 | static int disable_x86_serial_nr __cpuinitdata = 1; |
@@ -603,7 +600,6 @@ void __cpuinit cpu_init(void) | |||
603 | struct tss_struct * t = &per_cpu(init_tss, cpu); | 600 | struct tss_struct * t = &per_cpu(init_tss, cpu); |
604 | struct thread_struct *thread = ¤t->thread; | 601 | struct thread_struct *thread = ¤t->thread; |
605 | struct desc_struct *gdt; | 602 | struct desc_struct *gdt; |
606 | __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); | ||
607 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | 603 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
608 | 604 | ||
609 | if (cpu_test_and_set(cpu, cpu_initialized)) { | 605 | if (cpu_test_and_set(cpu, cpu_initialized)) { |
@@ -651,13 +647,6 @@ old_gdt: | |||
651 | * and set up the GDT descriptor: | 647 | * and set up the GDT descriptor: |
652 | */ | 648 | */ |
653 | memcpy(gdt, cpu_gdt_table, GDT_SIZE); | 649 | memcpy(gdt, cpu_gdt_table, GDT_SIZE); |
654 | |||
655 | /* Set up GDT entry for 16bit stack */ | ||
656 | *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= | ||
657 | ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | | ||
658 | ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | | ||
659 | (CPU_16BIT_STACK_SIZE - 1); | ||
660 | |||
661 | cpu_gdt_descr->size = GDT_SIZE - 1; | 650 | cpu_gdt_descr->size = GDT_SIZE - 1; |
662 | cpu_gdt_descr->address = (unsigned long)gdt; | 651 | cpu_gdt_descr->address = (unsigned long)gdt; |
663 | 652 | ||
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 5a63d6fdb70e..c38d801ba0bb 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <asm/smp.h> | 48 | #include <asm/smp.h> |
49 | #include <asm/page.h> | 49 | #include <asm/page.h> |
50 | #include <asm/desc.h> | 50 | #include <asm/desc.h> |
51 | #include <asm/percpu.h> | ||
51 | #include <asm/dwarf2.h> | 52 | #include <asm/dwarf2.h> |
52 | #include "irq_vectors.h" | 53 | #include "irq_vectors.h" |
53 | 54 | ||
@@ -418,23 +419,18 @@ ldt_ss: | |||
418 | * This is an "official" bug of all the x86-compatible | 419 | * This is an "official" bug of all the x86-compatible |
419 | * CPUs, which we can try to work around to make | 420 | * CPUs, which we can try to work around to make |
420 | * dosemu and wine happy. */ | 421 | * dosemu and wine happy. */ |
421 | subl $8, %esp # reserve space for switch16 pointer | 422 | movl OLDESP(%esp), %eax |
422 | CFI_ADJUST_CFA_OFFSET 8 | 423 | movl %esp, %edx |
424 | call patch_espfix_desc | ||
425 | pushl $__ESPFIX_SS | ||
426 | CFI_ADJUST_CFA_OFFSET 4 | ||
427 | pushl %eax | ||
428 | CFI_ADJUST_CFA_OFFSET 4 | ||
423 | DISABLE_INTERRUPTS | 429 | DISABLE_INTERRUPTS |
424 | TRACE_IRQS_OFF | 430 | TRACE_IRQS_OFF |
425 | movl %esp, %eax | 431 | lss (%esp), %esp |
426 | /* Set up the 16bit stack frame with switch32 pointer on top, | 432 | CFI_ADJUST_CFA_OFFSET -8 |
427 | * and a switch16 pointer on top of the current frame. */ | 433 | jmp restore_nocheck |
428 | call setup_x86_bogus_stack | ||
429 | CFI_ADJUST_CFA_OFFSET -8 # frame has moved | ||
430 | TRACE_IRQS_IRET | ||
431 | RESTORE_REGS | ||
432 | lss 20+4(%esp), %esp # switch to 16bit stack | ||
433 | 1: INTERRUPT_RETURN | ||
434 | .section __ex_table,"a" | ||
435 | .align 4 | ||
436 | .long 1b,iret_exc | ||
437 | .previous | ||
438 | CFI_ENDPROC | 434 | CFI_ENDPROC |
439 | 435 | ||
440 | # perform work that needs to be done immediately before resumption | 436 | # perform work that needs to be done immediately before resumption |
@@ -524,30 +520,30 @@ syscall_badsys: | |||
524 | CFI_ENDPROC | 520 | CFI_ENDPROC |
525 | 521 | ||
526 | #define FIXUP_ESPFIX_STACK \ | 522 | #define FIXUP_ESPFIX_STACK \ |
527 | movl %esp, %eax; \ | 523 | /* since we are on a wrong stack, we cant make it a C code :( */ \ |
528 | /* switch to 32bit stack using the pointer on top of 16bit stack */ \ | 524 | GET_THREAD_INFO(%ebp); \ |
529 | lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \ | 525 | movl TI_cpu(%ebp), %ebx; \ |
530 | /* copy data from 16bit stack to 32bit stack */ \ | 526 | PER_CPU(cpu_gdt_descr, %ebx); \ |
531 | call fixup_x86_bogus_stack; \ | 527 | movl GDS_address(%ebx), %ebx; \ |
532 | /* put ESP to the proper location */ \ | 528 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ |
533 | movl %eax, %esp; | 529 | addl %esp, %eax; \ |
534 | #define UNWIND_ESPFIX_STACK \ | 530 | pushl $__KERNEL_DS; \ |
531 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
535 | pushl %eax; \ | 532 | pushl %eax; \ |
536 | CFI_ADJUST_CFA_OFFSET 4; \ | 533 | CFI_ADJUST_CFA_OFFSET 4; \ |
534 | lss (%esp), %esp; \ | ||
535 | CFI_ADJUST_CFA_OFFSET -8; | ||
536 | #define UNWIND_ESPFIX_STACK \ | ||
537 | movl %ss, %eax; \ | 537 | movl %ss, %eax; \ |
538 | /* see if on 16bit stack */ \ | 538 | /* see if on espfix stack */ \ |
539 | cmpw $__ESPFIX_SS, %ax; \ | 539 | cmpw $__ESPFIX_SS, %ax; \ |
540 | je 28f; \ | 540 | jne 27f; \ |
541 | 27: popl %eax; \ | 541 | movl $__KERNEL_DS, %eax; \ |
542 | CFI_ADJUST_CFA_OFFSET -4; \ | ||
543 | .section .fixup,"ax"; \ | ||
544 | 28: movl $__KERNEL_DS, %eax; \ | ||
545 | movl %eax, %ds; \ | 542 | movl %eax, %ds; \ |
546 | movl %eax, %es; \ | 543 | movl %eax, %es; \ |
547 | /* switch to 32bit stack */ \ | 544 | /* switch to normal stack */ \ |
548 | FIXUP_ESPFIX_STACK; \ | 545 | FIXUP_ESPFIX_STACK; \ |
549 | jmp 27b; \ | 546 | 27:; |
550 | .previous | ||
551 | 547 | ||
552 | /* | 548 | /* |
553 | * Build the entry stubs and pointer table with | 549 | * Build the entry stubs and pointer table with |
@@ -614,7 +610,6 @@ error_code: | |||
614 | pushl %eax | 610 | pushl %eax |
615 | CFI_ADJUST_CFA_OFFSET 4 | 611 | CFI_ADJUST_CFA_OFFSET 4 |
616 | CFI_REL_OFFSET eax, 0 | 612 | CFI_REL_OFFSET eax, 0 |
617 | xorl %eax, %eax | ||
618 | pushl %ebp | 613 | pushl %ebp |
619 | CFI_ADJUST_CFA_OFFSET 4 | 614 | CFI_ADJUST_CFA_OFFSET 4 |
620 | CFI_REL_OFFSET ebp, 0 | 615 | CFI_REL_OFFSET ebp, 0 |
@@ -627,7 +622,6 @@ error_code: | |||
627 | pushl %edx | 622 | pushl %edx |
628 | CFI_ADJUST_CFA_OFFSET 4 | 623 | CFI_ADJUST_CFA_OFFSET 4 |
629 | CFI_REL_OFFSET edx, 0 | 624 | CFI_REL_OFFSET edx, 0 |
630 | decl %eax # eax = -1 | ||
631 | pushl %ecx | 625 | pushl %ecx |
632 | CFI_ADJUST_CFA_OFFSET 4 | 626 | CFI_ADJUST_CFA_OFFSET 4 |
633 | CFI_REL_OFFSET ecx, 0 | 627 | CFI_REL_OFFSET ecx, 0 |
@@ -644,7 +638,7 @@ error_code: | |||
644 | /*CFI_REGISTER es, ecx*/ | 638 | /*CFI_REGISTER es, ecx*/ |
645 | movl ES(%esp), %edi # get the function address | 639 | movl ES(%esp), %edi # get the function address |
646 | movl ORIG_EAX(%esp), %edx # get the error code | 640 | movl ORIG_EAX(%esp), %edx # get the error code |
647 | movl %eax, ORIG_EAX(%esp) | 641 | movl $-1, ORIG_EAX(%esp) |
648 | movl %ecx, ES(%esp) | 642 | movl %ecx, ES(%esp) |
649 | /*CFI_REL_OFFSET es, ES*/ | 643 | /*CFI_REL_OFFSET es, ES*/ |
650 | movl $(__USER_DS), %ecx | 644 | movl $(__USER_DS), %ecx |
@@ -754,7 +748,7 @@ KPROBE_ENTRY(nmi) | |||
754 | cmpw $__ESPFIX_SS, %ax | 748 | cmpw $__ESPFIX_SS, %ax |
755 | popl %eax | 749 | popl %eax |
756 | CFI_ADJUST_CFA_OFFSET -4 | 750 | CFI_ADJUST_CFA_OFFSET -4 |
757 | je nmi_16bit_stack | 751 | je nmi_espfix_stack |
758 | cmpl $sysenter_entry,(%esp) | 752 | cmpl $sysenter_entry,(%esp) |
759 | je nmi_stack_fixup | 753 | je nmi_stack_fixup |
760 | pushl %eax | 754 | pushl %eax |
@@ -797,7 +791,7 @@ nmi_debug_stack_check: | |||
797 | FIX_STACK(24,nmi_stack_correct, 1) | 791 | FIX_STACK(24,nmi_stack_correct, 1) |
798 | jmp nmi_stack_correct | 792 | jmp nmi_stack_correct |
799 | 793 | ||
800 | nmi_16bit_stack: | 794 | nmi_espfix_stack: |
801 | /* We have a RING0_INT_FRAME here. | 795 | /* We have a RING0_INT_FRAME here. |
802 | * | 796 | * |
803 | * create the pointer to lss back | 797 | * create the pointer to lss back |
@@ -806,7 +800,6 @@ nmi_16bit_stack: | |||
806 | CFI_ADJUST_CFA_OFFSET 4 | 800 | CFI_ADJUST_CFA_OFFSET 4 |
807 | pushl %esp | 801 | pushl %esp |
808 | CFI_ADJUST_CFA_OFFSET 4 | 802 | CFI_ADJUST_CFA_OFFSET 4 |
809 | movzwl %sp, %esp | ||
810 | addw $4, (%esp) | 803 | addw $4, (%esp) |
811 | /* copy the iret frame of 12 bytes */ | 804 | /* copy the iret frame of 12 bytes */ |
812 | .rept 3 | 805 | .rept 3 |
@@ -817,11 +810,11 @@ nmi_16bit_stack: | |||
817 | CFI_ADJUST_CFA_OFFSET 4 | 810 | CFI_ADJUST_CFA_OFFSET 4 |
818 | SAVE_ALL | 811 | SAVE_ALL |
819 | FIXUP_ESPFIX_STACK # %eax == %esp | 812 | FIXUP_ESPFIX_STACK # %eax == %esp |
820 | CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved | ||
821 | xorl %edx,%edx # zero error code | 813 | xorl %edx,%edx # zero error code |
822 | call do_nmi | 814 | call do_nmi |
823 | RESTORE_REGS | 815 | RESTORE_REGS |
824 | lss 12+4(%esp), %esp # back to 16bit stack | 816 | lss 12+4(%esp), %esp # back to espfix stack |
817 | CFI_ADJUST_CFA_OFFSET -24 | ||
825 | 1: INTERRUPT_RETURN | 818 | 1: INTERRUPT_RETURN |
826 | CFI_ENDPROC | 819 | CFI_ENDPROC |
827 | .section __ex_table,"a" | 820 | .section __ex_table,"a" |
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index ca31f18d277c..b1f1df11fcc6 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S | |||
@@ -584,7 +584,7 @@ ENTRY(cpu_gdt_table) | |||
584 | .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ | 584 | .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ |
585 | .quad 0x004092000000ffff /* 0xc8 APM DS data */ | 585 | .quad 0x004092000000ffff /* 0xc8 APM DS data */ |
586 | 586 | ||
587 | .quad 0x0000920000000000 /* 0xd0 - ESPFIX 16-bit SS */ | 587 | .quad 0x00c0920000000000 /* 0xd0 - ESPFIX SS */ |
588 | .quad 0x0000000000000000 /* 0xd8 - unused */ | 588 | .quad 0x0000000000000000 /* 0xd8 - unused */ |
589 | .quad 0x0000000000000000 /* 0xe0 - unused */ | 589 | .quad 0x0000000000000000 /* 0xe0 - unused */ |
590 | .quad 0x0000000000000000 /* 0xe8 - unused */ | 590 | .quad 0x0000000000000000 /* 0xe8 - unused */ |
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 56655ea8d98f..f9bb1f89d687 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c | |||
@@ -1088,49 +1088,24 @@ fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, | |||
1088 | #endif | 1088 | #endif |
1089 | } | 1089 | } |
1090 | 1090 | ||
1091 | fastcall void setup_x86_bogus_stack(unsigned char * stk) | 1091 | fastcall unsigned long patch_espfix_desc(unsigned long uesp, |
1092 | unsigned long kesp) | ||
1092 | { | 1093 | { |
1093 | unsigned long *switch16_ptr, *switch32_ptr; | ||
1094 | struct pt_regs *regs; | ||
1095 | unsigned long stack_top, stack_bot; | ||
1096 | unsigned short iret_frame16_off; | ||
1097 | int cpu = smp_processor_id(); | 1094 | int cpu = smp_processor_id(); |
1098 | /* reserve the space on 32bit stack for the magic switch16 pointer */ | 1095 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
1099 | memmove(stk, stk + 8, sizeof(struct pt_regs)); | 1096 | struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address; |
1100 | switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs)); | 1097 | unsigned long base = (kesp - uesp) & -THREAD_SIZE; |
1101 | regs = (struct pt_regs *)stk; | 1098 | unsigned long new_kesp = kesp - base; |
1102 | /* now the switch32 on 16bit stack */ | 1099 | unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; |
1103 | stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); | 1100 | __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; |
1104 | stack_top = stack_bot + CPU_16BIT_STACK_SIZE; | 1101 | /* Set up base for espfix segment */ |
1105 | switch32_ptr = (unsigned long *)(stack_top - 8); | 1102 | desc &= 0x00f0ff0000000000ULL; |
1106 | iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20; | 1103 | desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | |
1107 | /* copy iret frame on 16bit stack */ | 1104 | ((((__u64)base) << 32) & 0xff00000000000000ULL) | |
1108 | memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20); | 1105 | ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | |
1109 | /* fill in the switch pointers */ | 1106 | (lim_pages & 0xffff); |
1110 | switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off; | 1107 | *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; |
1111 | switch16_ptr[1] = __ESPFIX_SS; | 1108 | return new_kesp; |
1112 | switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) + | ||
1113 | 8 - CPU_16BIT_STACK_SIZE; | ||
1114 | switch32_ptr[1] = __KERNEL_DS; | ||
1115 | } | ||
1116 | |||
1117 | fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp) | ||
1118 | { | ||
1119 | unsigned long *switch32_ptr; | ||
1120 | unsigned char *stack16, *stack32; | ||
1121 | unsigned long stack_top, stack_bot; | ||
1122 | int len; | ||
1123 | int cpu = smp_processor_id(); | ||
1124 | stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu); | ||
1125 | stack_top = stack_bot + CPU_16BIT_STACK_SIZE; | ||
1126 | switch32_ptr = (unsigned long *)(stack_top - 8); | ||
1127 | /* copy the data from 16bit stack to 32bit stack */ | ||
1128 | len = CPU_16BIT_STACK_SIZE - 8 - sp; | ||
1129 | stack16 = (unsigned char *)(stack_bot + sp); | ||
1130 | stack32 = (unsigned char *) | ||
1131 | (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len); | ||
1132 | memcpy(stack32, stack16, len); | ||
1133 | return stack32; | ||
1134 | } | 1109 | } |
1135 | 1110 | ||
1136 | /* | 1111 | /* |