diff options
author | Jeremy Fitzhardinge <jeremy@goop.org> | 2007-02-13 07:26:20 -0500 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2007-02-13 07:26:20 -0500 |
commit | 464d1a78fbf8cf6c7fd970e7b3e2db50a320ce28 (patch) | |
tree | 536d8a92976e675b484b35dec88d40c97fab8ac8 /arch/i386/kernel/process.c | |
parent | 54413927f022292aeccadd268fbf1c0b42129945 (diff) |
[PATCH] i386: Convert i386 PDA code to use %fs
Convert the PDA code to use %fs rather than %gs as the segment for
per-processor data. This is because some processors show a small but
measurable performance gain for reloading a NULL segment selector (as %fs
generally is in user-space) versus a non-NULL one (as %gs generally is).
On modern processors the difference is very small, perhaps undetectable.
Some old AMD "K6 3D+" processors are noticably slower when %fs is used
rather than %gs; I have no idea why this might be, but I think they're
sufficiently rare that it doesn't matter much.
This patch also fixes the math emulator, which had not been adjusted to
match the changed struct pt_regs.
[frederik.deweerdt@gmail.com: fixit with gdb]
[mingo@elte.hu: Fix KVM too]
Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Ian Campbell <Ian.Campbell@XenSource.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Zachary Amsden <zach@vmware.com>
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Frederik Deweerdt <frederik.deweerdt@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Diffstat (limited to 'arch/i386/kernel/process.c')
-rw-r--r-- | arch/i386/kernel/process.c | 24 |
1 files changed, 11 insertions, 13 deletions
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index c641056233a6..23ae198dbbc3 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c | |||
@@ -308,8 +308,8 @@ void show_regs(struct pt_regs * regs) | |||
308 | regs->eax,regs->ebx,regs->ecx,regs->edx); | 308 | regs->eax,regs->ebx,regs->ecx,regs->edx); |
309 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", | 309 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", |
310 | regs->esi, regs->edi, regs->ebp); | 310 | regs->esi, regs->edi, regs->ebp); |
311 | printk(" DS: %04x ES: %04x GS: %04x\n", | 311 | printk(" DS: %04x ES: %04x FS: %04x\n", |
312 | 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); | 312 | 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); |
313 | 313 | ||
314 | cr0 = read_cr0(); | 314 | cr0 = read_cr0(); |
315 | cr2 = read_cr2(); | 315 | cr2 = read_cr2(); |
@@ -340,7 +340,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | |||
340 | 340 | ||
341 | regs.xds = __USER_DS; | 341 | regs.xds = __USER_DS; |
342 | regs.xes = __USER_DS; | 342 | regs.xes = __USER_DS; |
343 | regs.xgs = __KERNEL_PDA; | 343 | regs.xfs = __KERNEL_PDA; |
344 | regs.orig_eax = -1; | 344 | regs.orig_eax = -1; |
345 | regs.eip = (unsigned long) kernel_thread_helper; | 345 | regs.eip = (unsigned long) kernel_thread_helper; |
346 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); | 346 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); |
@@ -425,7 +425,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, | |||
425 | 425 | ||
426 | p->thread.eip = (unsigned long) ret_from_fork; | 426 | p->thread.eip = (unsigned long) ret_from_fork; |
427 | 427 | ||
428 | savesegment(fs,p->thread.fs); | 428 | savesegment(gs,p->thread.gs); |
429 | 429 | ||
430 | tsk = current; | 430 | tsk = current; |
431 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 431 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
@@ -501,8 +501,8 @@ void dump_thread(struct pt_regs * regs, struct user * dump) | |||
501 | dump->regs.eax = regs->eax; | 501 | dump->regs.eax = regs->eax; |
502 | dump->regs.ds = regs->xds; | 502 | dump->regs.ds = regs->xds; |
503 | dump->regs.es = regs->xes; | 503 | dump->regs.es = regs->xes; |
504 | savesegment(fs,dump->regs.fs); | 504 | dump->regs.fs = regs->xfs; |
505 | dump->regs.gs = regs->xgs; | 505 | savesegment(gs,dump->regs.gs); |
506 | dump->regs.orig_eax = regs->orig_eax; | 506 | dump->regs.orig_eax = regs->orig_eax; |
507 | dump->regs.eip = regs->eip; | 507 | dump->regs.eip = regs->eip; |
508 | dump->regs.cs = regs->xcs; | 508 | dump->regs.cs = regs->xcs; |
@@ -653,7 +653,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
653 | load_esp0(tss, next); | 653 | load_esp0(tss, next); |
654 | 654 | ||
655 | /* | 655 | /* |
656 | * Save away %fs. No need to save %gs, as it was saved on the | 656 | * Save away %gs. No need to save %fs, as it was saved on the |
657 | * stack on entry. No need to save %es and %ds, as those are | 657 | * stack on entry. No need to save %es and %ds, as those are |
658 | * always kernel segments while inside the kernel. Doing this | 658 | * always kernel segments while inside the kernel. Doing this |
659 | * before setting the new TLS descriptors avoids the situation | 659 | * before setting the new TLS descriptors avoids the situation |
@@ -662,7 +662,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
662 | * used %fs or %gs (it does not today), or if the kernel is | 662 | * used %fs or %gs (it does not today), or if the kernel is |
663 | * running inside of a hypervisor layer. | 663 | * running inside of a hypervisor layer. |
664 | */ | 664 | */ |
665 | savesegment(fs, prev->fs); | 665 | savesegment(gs, prev->gs); |
666 | 666 | ||
667 | /* | 667 | /* |
668 | * Load the per-thread Thread-Local Storage descriptor. | 668 | * Load the per-thread Thread-Local Storage descriptor. |
@@ -670,12 +670,10 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
670 | load_TLS(next, cpu); | 670 | load_TLS(next, cpu); |
671 | 671 | ||
672 | /* | 672 | /* |
673 | * Restore %fs if needed. | 673 | * Restore %gs if needed (which is common) |
674 | * | ||
675 | * Glibc normally makes %fs be zero. | ||
676 | */ | 674 | */ |
677 | if (unlikely(prev->fs | next->fs)) | 675 | if (prev->gs | next->gs) |
678 | loadsegment(fs, next->fs); | 676 | loadsegment(gs, next->gs); |
679 | 677 | ||
680 | write_pda(pcurrent, next_p); | 678 | write_pda(pcurrent, next_p); |
681 | 679 | ||