diff options
Diffstat (limited to 'arch/i386/kernel/process.c')
-rw-r--r-- | arch/i386/kernel/process.c | 102 |
1 files changed, 86 insertions, 16 deletions
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index c641056233a6..bea304d48cdb 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/ptrace.h> | 38 | #include <linux/ptrace.h> |
39 | #include <linux/random.h> | 39 | #include <linux/random.h> |
40 | #include <linux/personality.h> | 40 | #include <linux/personality.h> |
41 | #include <linux/tick.h> | ||
41 | 42 | ||
42 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
43 | #include <asm/pgtable.h> | 44 | #include <asm/pgtable.h> |
@@ -48,6 +49,7 @@ | |||
48 | #include <asm/i387.h> | 49 | #include <asm/i387.h> |
49 | #include <asm/desc.h> | 50 | #include <asm/desc.h> |
50 | #include <asm/vm86.h> | 51 | #include <asm/vm86.h> |
52 | #include <asm/idle.h> | ||
51 | #ifdef CONFIG_MATH_EMULATION | 53 | #ifdef CONFIG_MATH_EMULATION |
52 | #include <asm/math_emu.h> | 54 | #include <asm/math_emu.h> |
53 | #endif | 55 | #endif |
@@ -80,6 +82,42 @@ void (*pm_idle)(void); | |||
80 | EXPORT_SYMBOL(pm_idle); | 82 | EXPORT_SYMBOL(pm_idle); |
81 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); | 83 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); |
82 | 84 | ||
85 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); | ||
86 | |||
87 | void idle_notifier_register(struct notifier_block *n) | ||
88 | { | ||
89 | atomic_notifier_chain_register(&idle_notifier, n); | ||
90 | } | ||
91 | |||
92 | void idle_notifier_unregister(struct notifier_block *n) | ||
93 | { | ||
94 | atomic_notifier_chain_unregister(&idle_notifier, n); | ||
95 | } | ||
96 | |||
97 | static DEFINE_PER_CPU(volatile unsigned long, idle_state); | ||
98 | |||
99 | void enter_idle(void) | ||
100 | { | ||
101 | /* needs to be atomic w.r.t. interrupts, not against other CPUs */ | ||
102 | __set_bit(0, &__get_cpu_var(idle_state)); | ||
103 | atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); | ||
104 | } | ||
105 | |||
106 | static void __exit_idle(void) | ||
107 | { | ||
108 | /* needs to be atomic w.r.t. interrupts, not against other CPUs */ | ||
109 | if (__test_and_clear_bit(0, &__get_cpu_var(idle_state)) == 0) | ||
110 | return; | ||
111 | atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); | ||
112 | } | ||
113 | |||
114 | void exit_idle(void) | ||
115 | { | ||
116 | if (current->pid) | ||
117 | return; | ||
118 | __exit_idle(); | ||
119 | } | ||
120 | |||
83 | void disable_hlt(void) | 121 | void disable_hlt(void) |
84 | { | 122 | { |
85 | hlt_counter++; | 123 | hlt_counter++; |
@@ -130,6 +168,7 @@ EXPORT_SYMBOL(default_idle); | |||
130 | */ | 168 | */ |
131 | static void poll_idle (void) | 169 | static void poll_idle (void) |
132 | { | 170 | { |
171 | local_irq_enable(); | ||
133 | cpu_relax(); | 172 | cpu_relax(); |
134 | } | 173 | } |
135 | 174 | ||
@@ -173,6 +212,7 @@ void cpu_idle(void) | |||
173 | 212 | ||
174 | /* endless idle loop with no priority at all */ | 213 | /* endless idle loop with no priority at all */ |
175 | while (1) { | 214 | while (1) { |
215 | tick_nohz_stop_sched_tick(); | ||
176 | while (!need_resched()) { | 216 | while (!need_resched()) { |
177 | void (*idle)(void); | 217 | void (*idle)(void); |
178 | 218 | ||
@@ -189,8 +229,18 @@ void cpu_idle(void) | |||
189 | play_dead(); | 229 | play_dead(); |
190 | 230 | ||
191 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; | 231 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; |
232 | |||
233 | /* | ||
234 | * Idle routines should keep interrupts disabled | ||
235 | * from here on, until they go to idle. | ||
236 | * Otherwise, idle callbacks can misfire. | ||
237 | */ | ||
238 | local_irq_disable(); | ||
239 | enter_idle(); | ||
192 | idle(); | 240 | idle(); |
241 | __exit_idle(); | ||
193 | } | 242 | } |
243 | tick_nohz_restart_sched_tick(); | ||
194 | preempt_enable_no_resched(); | 244 | preempt_enable_no_resched(); |
195 | schedule(); | 245 | schedule(); |
196 | preempt_disable(); | 246 | preempt_disable(); |
@@ -243,7 +293,11 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) | |||
243 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 293 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
244 | smp_mb(); | 294 | smp_mb(); |
245 | if (!need_resched()) | 295 | if (!need_resched()) |
246 | __mwait(eax, ecx); | 296 | __sti_mwait(eax, ecx); |
297 | else | ||
298 | local_irq_enable(); | ||
299 | } else { | ||
300 | local_irq_enable(); | ||
247 | } | 301 | } |
248 | } | 302 | } |
249 | 303 | ||
@@ -308,8 +362,8 @@ void show_regs(struct pt_regs * regs) | |||
308 | regs->eax,regs->ebx,regs->ecx,regs->edx); | 362 | regs->eax,regs->ebx,regs->ecx,regs->edx); |
309 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", | 363 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", |
310 | regs->esi, regs->edi, regs->ebp); | 364 | regs->esi, regs->edi, regs->ebp); |
311 | printk(" DS: %04x ES: %04x GS: %04x\n", | 365 | printk(" DS: %04x ES: %04x FS: %04x\n", |
312 | 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); | 366 | 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); |
313 | 367 | ||
314 | cr0 = read_cr0(); | 368 | cr0 = read_cr0(); |
315 | cr2 = read_cr2(); | 369 | cr2 = read_cr2(); |
@@ -340,7 +394,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | |||
340 | 394 | ||
341 | regs.xds = __USER_DS; | 395 | regs.xds = __USER_DS; |
342 | regs.xes = __USER_DS; | 396 | regs.xes = __USER_DS; |
343 | regs.xgs = __KERNEL_PDA; | 397 | regs.xfs = __KERNEL_PDA; |
344 | regs.orig_eax = -1; | 398 | regs.orig_eax = -1; |
345 | regs.eip = (unsigned long) kernel_thread_helper; | 399 | regs.eip = (unsigned long) kernel_thread_helper; |
346 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); | 400 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); |
@@ -425,7 +479,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, | |||
425 | 479 | ||
426 | p->thread.eip = (unsigned long) ret_from_fork; | 480 | p->thread.eip = (unsigned long) ret_from_fork; |
427 | 481 | ||
428 | savesegment(fs,p->thread.fs); | 482 | savesegment(gs,p->thread.gs); |
429 | 483 | ||
430 | tsk = current; | 484 | tsk = current; |
431 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 485 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
@@ -501,8 +555,8 @@ void dump_thread(struct pt_regs * regs, struct user * dump) | |||
501 | dump->regs.eax = regs->eax; | 555 | dump->regs.eax = regs->eax; |
502 | dump->regs.ds = regs->xds; | 556 | dump->regs.ds = regs->xds; |
503 | dump->regs.es = regs->xes; | 557 | dump->regs.es = regs->xes; |
504 | savesegment(fs,dump->regs.fs); | 558 | dump->regs.fs = regs->xfs; |
505 | dump->regs.gs = regs->xgs; | 559 | savesegment(gs,dump->regs.gs); |
506 | dump->regs.orig_eax = regs->orig_eax; | 560 | dump->regs.orig_eax = regs->orig_eax; |
507 | dump->regs.eip = regs->eip; | 561 | dump->regs.eip = regs->eip; |
508 | dump->regs.cs = regs->xcs; | 562 | dump->regs.cs = regs->xcs; |
@@ -653,7 +707,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
653 | load_esp0(tss, next); | 707 | load_esp0(tss, next); |
654 | 708 | ||
655 | /* | 709 | /* |
656 | * Save away %fs. No need to save %gs, as it was saved on the | 710 | * Save away %gs. No need to save %fs, as it was saved on the |
657 | * stack on entry. No need to save %es and %ds, as those are | 711 | * stack on entry. No need to save %es and %ds, as those are |
658 | * always kernel segments while inside the kernel. Doing this | 712 | * always kernel segments while inside the kernel. Doing this |
659 | * before setting the new TLS descriptors avoids the situation | 713 | * before setting the new TLS descriptors avoids the situation |
@@ -662,7 +716,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
662 | * used %fs or %gs (it does not today), or if the kernel is | 716 | * used %fs or %gs (it does not today), or if the kernel is |
663 | * running inside of a hypervisor layer. | 717 | * running inside of a hypervisor layer. |
664 | */ | 718 | */ |
665 | savesegment(fs, prev->fs); | 719 | savesegment(gs, prev->gs); |
666 | 720 | ||
667 | /* | 721 | /* |
668 | * Load the per-thread Thread-Local Storage descriptor. | 722 | * Load the per-thread Thread-Local Storage descriptor. |
@@ -670,14 +724,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
670 | load_TLS(next, cpu); | 724 | load_TLS(next, cpu); |
671 | 725 | ||
672 | /* | 726 | /* |
673 | * Restore %fs if needed. | 727 | * Restore IOPL if needed. In normal use, the flags restore |
674 | * | 728 | * in the switch assembly will handle this. But if the kernel |
675 | * Glibc normally makes %fs be zero. | 729 | * is running virtualized at a non-zero CPL, the popf will |
730 | * not restore flags, so it must be done in a separate step. | ||
676 | */ | 731 | */ |
677 | if (unlikely(prev->fs | next->fs)) | 732 | if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) |
678 | loadsegment(fs, next->fs); | 733 | set_iopl_mask(next->iopl); |
679 | |||
680 | write_pda(pcurrent, next_p); | ||
681 | 734 | ||
682 | /* | 735 | /* |
683 | * Now maybe handle debug registers and/or IO bitmaps | 736 | * Now maybe handle debug registers and/or IO bitmaps |
@@ -688,6 +741,15 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
688 | 741 | ||
689 | disable_tsc(prev_p, next_p); | 742 | disable_tsc(prev_p, next_p); |
690 | 743 | ||
744 | /* | ||
745 | * Leave lazy mode, flushing any hypercalls made here. | ||
746 | * This must be done before restoring TLS segments so | ||
747 | * the GDT and LDT are properly updated, and must be | ||
748 | * done before math_state_restore, so the TS bit is up | ||
749 | * to date. | ||
750 | */ | ||
751 | arch_leave_lazy_cpu_mode(); | ||
752 | |||
691 | /* If the task has used fpu the last 5 timeslices, just do a full | 753 | /* If the task has used fpu the last 5 timeslices, just do a full |
692 | * restore of the math state immediately to avoid the trap; the | 754 | * restore of the math state immediately to avoid the trap; the |
693 | * chances of needing FPU soon are obviously high now | 755 | * chances of needing FPU soon are obviously high now |
@@ -695,6 +757,14 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
695 | if (next_p->fpu_counter > 5) | 757 | if (next_p->fpu_counter > 5) |
696 | math_state_restore(); | 758 | math_state_restore(); |
697 | 759 | ||
760 | /* | ||
761 | * Restore %gs if needed (which is common) | ||
762 | */ | ||
763 | if (prev->gs | next->gs) | ||
764 | loadsegment(gs, next->gs); | ||
765 | |||
766 | write_pda(pcurrent, next_p); | ||
767 | |||
698 | return prev_p; | 768 | return prev_p; |
699 | } | 769 | } |
700 | 770 | ||