aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <nickpiggin@yahoo.com.au>2005-11-09 00:39:04 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-09 10:56:33 -0500
commit64c7c8f88559624abdbe12b5da6502e8879f8d28 (patch)
tree02f85a35ddd0f24dec70e5d6ecd61073578fd8d6
parent5bfb5d690f36d316a5f3b4f7775fda996faa6b12 (diff)
[PATCH] sched: resched and cpu_idle rework
Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce confusion, and make their semantics rigid. Improves efficiency of resched_task and some cpu_idle routines. * In resched_task: - TIF_NEED_RESCHED is only cleared with the task's runqueue lock held, and as we hold it during resched_task, then there is no need for an atomic test and set there. The only other time this should be set is when the task's quantum expires, in the timer interrupt - this is protected against because the rq lock is irq-safe. - If TIF_NEED_RESCHED is set, then we don't need to do anything. It won't get unset until the task get's schedule()d off. - If we are running on the same CPU as the task we resched, then set TIF_NEED_RESCHED and no further action is required. - If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set after TIF_NEED_RESCHED has been set, then we need to send an IPI. Using these rules, we are able to remove the test and set operation in resched_task, and make clear the previously vague semantics of POLLING_NRFLAG. * In idle routines: - Enter cpu_idle with preempt disabled. When the need_resched() condition becomes true, explicitly call schedule(). This makes things a bit clearer (IMO), but haven't updated all architectures yet. - Many do a test and clear of TIF_NEED_RESCHED for some reason. According to the resched_task rules, this isn't needed (and actually breaks the assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock held). So remove that. Generally one less locked memory op when switching to the idle thread. - Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner most polling idle loops. The above resched_task semantics allow it to be set until before the last time need_resched() is checked before going into a halt requiring interrupt wakeup. Many idle routines simply never enter such a halt, and so POLLING_NRFLAG can be always left set, completely eliminating resched IPIs when rescheduling the idle task. POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs. Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Con Kolivas <kernel@kolivas.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--Documentation/sched-arch.txt89
-rw-r--r--arch/alpha/kernel/process.c10
-rw-r--r--arch/arm/kernel/process.c14
-rw-r--r--arch/i386/kernel/apm.c20
-rw-r--r--arch/i386/kernel/process.c64
-rw-r--r--arch/ia64/kernel/process.c32
-rw-r--r--arch/parisc/kernel/process.c2
-rw-r--r--arch/powerpc/platforms/iseries/setup.c10
-rw-r--r--arch/powerpc/platforms/pseries/setup.c12
-rw-r--r--arch/ppc/kernel/idle.c20
-rw-r--r--arch/ppc64/kernel/idle.c11
-rw-r--r--arch/s390/kernel/process.c13
-rw-r--r--arch/sh/kernel/process.c12
-rw-r--r--arch/sh64/kernel/process.c14
-rw-r--r--arch/sparc/kernel/process.c35
-rw-r--r--arch/sparc64/kernel/process.c20
-rw-r--r--arch/sparc64/kernel/smp.c13
-rw-r--r--arch/x86_64/kernel/process.c67
-rw-r--r--drivers/acpi/processor_idle.c37
-rw-r--r--kernel/sched.c21
20 files changed, 296 insertions, 220 deletions
diff --git a/Documentation/sched-arch.txt b/Documentation/sched-arch.txt
new file mode 100644
index 000000000000..941615a9769b
--- /dev/null
+++ b/Documentation/sched-arch.txt
@@ -0,0 +1,89 @@
1 CPU Scheduler implementation hints for architecture specific code
2
3 Nick Piggin, 2005
4
5Context switch
6==============
71. Runqueue locking
8By default, the switch_to arch function is called with the runqueue
9locked. This is usually not a problem unless switch_to may need to
10take the runqueue lock. This is usually due to a wake up operation in
11the context switch. See include/asm-ia64/system.h for an example.
12
13To request the scheduler call switch_to with the runqueue unlocked,
14you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
15(typically the one where switch_to is defined).
16
17Unlocked context switches introduce only a very minor performance
18penalty to the core scheduler implementation in the CONFIG_SMP case.
19
202. Interrupt status
21By default, the switch_to arch function is called with interrupts
22disabled. Interrupts may be enabled over the call if it is likely to
23introduce a significant interrupt latency by adding the line
24`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for
25unlocked context switches. This define also implies
26`__ARCH_WANT_UNLOCKED_CTXSW`. See include/asm-arm/system.h for an
27example.
28
29
30CPU idle
31========
32Your cpu_idle routines need to obey the following rules:
33
341. Preempt should now disabled over idle routines. Should only
35 be enabled to call schedule() then disabled again.
36
372. need_resched/TIF_NEED_RESCHED is only ever set, and will never
38 be cleared until the running task has called schedule(). Idle
39 threads need only ever query need_resched, and may never set or
40 clear it.
41
423. When cpu_idle finds (need_resched() == 'true'), it should call
43 schedule(). It should not call schedule() otherwise.
44
454. The only time interrupts need to be disabled when checking
46 need_resched is if we are about to sleep the processor until
47 the next interrupt (this doesn't provide any protection of
48 need_resched, it prevents losing an interrupt).
49
50 4a. Common problem with this type of sleep appears to be:
51 local_irq_disable();
52 if (!need_resched()) {
53 local_irq_enable();
54 *** resched interrupt arrives here ***
55 __asm__("sleep until next interrupt");
56 }
57
585. TIF_POLLING_NRFLAG can be set by idle routines that do not
59 need an interrupt to wake them up when need_resched goes high.
60 In other words, they must be periodically polling need_resched,
61 although it may be reasonable to do some background work or enter
62 a low CPU priority.
63
64 5a. If TIF_POLLING_NRFLAG is set, and we do decide to enter
65 an interrupt sleep, it needs to be cleared then a memory
66 barrier issued (followed by a test of need_resched with
67 interrupts disabled, as explained in 3).
68
69arch/i386/kernel/process.c has examples of both polling and
70sleeping idle functions.
71
72
73Possible arch/ problems
74=======================
75
76Possible arch problems I found (and either tried to fix or didn't):
77
78h8300 - Is such sleeping racy vs interrupts? (See #4a).
79 The H8/300 manual I found indicates yes, however disabling IRQs
80 over the sleep mean only NMIs can wake it up, so can't fix easily
81 without doing spin waiting.
82
83ia64 - is safe_halt call racy vs interrupts? (does it sleep?) (See #4a)
84
85sh64 - Is sleeping racy vs interrupts? (See #4a)
86
87sparc - IRQs on at this point(?), change local_irq_save to _disable.
88 - TODO: needs secondary CPUs to disable preempt (See #1)
89
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index eb20c3afff58..a8682612abc0 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -43,21 +43,17 @@
43#include "proto.h" 43#include "proto.h"
44#include "pci_impl.h" 44#include "pci_impl.h"
45 45
46void default_idle(void)
47{
48 barrier();
49}
50
51void 46void
52cpu_idle(void) 47cpu_idle(void)
53{ 48{
49 set_thread_flag(TIF_POLLING_NRFLAG);
50
54 while (1) { 51 while (1) {
55 void (*idle)(void) = default_idle;
56 /* FIXME -- EV6 and LCA45 know how to power down 52 /* FIXME -- EV6 and LCA45 know how to power down
57 the CPU. */ 53 the CPU. */
58 54
59 while (!need_resched()) 55 while (!need_resched())
60 idle(); 56 cpu_relax();
61 schedule(); 57 schedule();
62 } 58 }
63} 59}
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 93dd92cc12f8..c0f6a119de3b 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -86,12 +86,16 @@ EXPORT_SYMBOL(pm_power_off);
86 */ 86 */
87void default_idle(void) 87void default_idle(void)
88{ 88{
89 local_irq_disable(); 89 if (hlt_counter)
90 if (!need_resched() && !hlt_counter) { 90 cpu_relax();
91 timer_dyn_reprogram(); 91 else {
92 arch_idle(); 92 local_irq_disable();
93 if (!need_resched()) {
94 timer_dyn_reprogram();
95 arch_idle();
96 }
97 local_irq_enable();
93 } 98 }
94 local_irq_enable();
95} 99}
96 100
97/* 101/*
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index 86e80c551478..003548b8735f 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -769,8 +769,26 @@ static int set_system_power_state(u_short state)
769static int apm_do_idle(void) 769static int apm_do_idle(void)
770{ 770{
771 u32 eax; 771 u32 eax;
772 u8 ret = 0;
773 int idled = 0;
774 int polling;
775
776 polling = test_thread_flag(TIF_POLLING_NRFLAG);
777 if (polling) {
778 clear_thread_flag(TIF_POLLING_NRFLAG);
779 smp_mb__after_clear_bit();
780 }
781 if (!need_resched()) {
782 idled = 1;
783 ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
784 }
785 if (polling)
786 set_thread_flag(TIF_POLLING_NRFLAG);
787
788 if (!idled)
789 return 0;
772 790
773 if (apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax)) { 791 if (ret) {
774 static unsigned long t; 792 static unsigned long t;
775 793
776 /* This always fails on some SMP boards running UP kernels. 794 /* This always fails on some SMP boards running UP kernels.
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 5296e284ea36..1cb261f225d5 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -99,14 +99,22 @@ EXPORT_SYMBOL(enable_hlt);
99 */ 99 */
100void default_idle(void) 100void default_idle(void)
101{ 101{
102 local_irq_enable();
103
102 if (!hlt_counter && boot_cpu_data.hlt_works_ok) { 104 if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
103 local_irq_disable(); 105 clear_thread_flag(TIF_POLLING_NRFLAG);
104 if (!need_resched()) 106 smp_mb__after_clear_bit();
105 safe_halt(); 107 while (!need_resched()) {
106 else 108 local_irq_disable();
107 local_irq_enable(); 109 if (!need_resched())
110 safe_halt();
111 else
112 local_irq_enable();
113 }
114 set_thread_flag(TIF_POLLING_NRFLAG);
108 } else { 115 } else {
109 cpu_relax(); 116 while (!need_resched())
117 cpu_relax();
110 } 118 }
111} 119}
112#ifdef CONFIG_APM_MODULE 120#ifdef CONFIG_APM_MODULE
@@ -120,29 +128,14 @@ EXPORT_SYMBOL(default_idle);
120 */ 128 */
121static void poll_idle (void) 129static void poll_idle (void)
122{ 130{
123 int oldval;
124
125 local_irq_enable(); 131 local_irq_enable();
126 132
127 /* 133 asm volatile(
128 * Deal with another CPU just having chosen a thread to 134 "2:"
129 * run here: 135 "testl %0, %1;"
130 */ 136 "rep; nop;"
131 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); 137 "je 2b;"
132 138 : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
133 if (!oldval) {
134 set_thread_flag(TIF_POLLING_NRFLAG);
135 asm volatile(
136 "2:"
137 "testl %0, %1;"
138 "rep; nop;"
139 "je 2b;"
140 : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
141
142 clear_thread_flag(TIF_POLLING_NRFLAG);
143 } else {
144 set_need_resched();
145 }
146} 139}
147 140
148#ifdef CONFIG_HOTPLUG_CPU 141#ifdef CONFIG_HOTPLUG_CPU
@@ -181,6 +174,8 @@ void cpu_idle(void)
181{ 174{
182 int cpu = smp_processor_id(); 175 int cpu = smp_processor_id();
183 176
177 set_thread_flag(TIF_POLLING_NRFLAG);
178
184 /* endless idle loop with no priority at all */ 179 /* endless idle loop with no priority at all */
185 while (1) { 180 while (1) {
186 while (!need_resched()) { 181 while (!need_resched()) {
@@ -246,15 +241,12 @@ static void mwait_idle(void)
246{ 241{
247 local_irq_enable(); 242 local_irq_enable();
248 243
249 if (!need_resched()) { 244 while (!need_resched()) {
250 set_thread_flag(TIF_POLLING_NRFLAG); 245 __monitor((void *)&current_thread_info()->flags, 0, 0);
251 do { 246 smp_mb();
252 __monitor((void *)&current_thread_info()->flags, 0, 0); 247 if (need_resched())
253 if (need_resched()) 248 break;
254 break; 249 __mwait(0, 0);
255 __mwait(0, 0);
256 } while (!need_resched());
257 clear_thread_flag(TIF_POLLING_NRFLAG);
258 } 250 }
259} 251}
260 252
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 4c621fc3c3b9..640d6908f8ec 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -197,11 +197,15 @@ void
197default_idle (void) 197default_idle (void)
198{ 198{
199 local_irq_enable(); 199 local_irq_enable();
200 while (!need_resched()) 200 while (!need_resched()) {
201 if (can_do_pal_halt) 201 if (can_do_pal_halt) {
202 safe_halt(); 202 local_irq_disable();
203 else 203 if (!need_resched())
204 safe_halt();
205 local_irq_enable();
206 } else
204 cpu_relax(); 207 cpu_relax();
208 }
205} 209}
206 210
207#ifdef CONFIG_HOTPLUG_CPU 211#ifdef CONFIG_HOTPLUG_CPU
@@ -263,16 +267,16 @@ void __attribute__((noreturn))
263cpu_idle (void) 267cpu_idle (void)
264{ 268{
265 void (*mark_idle)(int) = ia64_mark_idle; 269 void (*mark_idle)(int) = ia64_mark_idle;
270 int cpu = smp_processor_id();
271 set_thread_flag(TIF_POLLING_NRFLAG);
266 272
267 /* endless idle loop with no priority at all */ 273 /* endless idle loop with no priority at all */
268 while (1) { 274 while (1) {
275 if (!need_resched()) {
276 void (*idle)(void);
269#ifdef CONFIG_SMP 277#ifdef CONFIG_SMP
270 if (!need_resched())
271 min_xtp(); 278 min_xtp();
272#endif 279#endif
273 while (!need_resched()) {
274 void (*idle)(void);
275
276 if (__get_cpu_var(cpu_idle_state)) 280 if (__get_cpu_var(cpu_idle_state))
277 __get_cpu_var(cpu_idle_state) = 0; 281 __get_cpu_var(cpu_idle_state) = 0;
278 282
@@ -284,19 +288,17 @@ cpu_idle (void)
284 if (!idle) 288 if (!idle)
285 idle = default_idle; 289 idle = default_idle;
286 (*idle)(); 290 (*idle)();
287 } 291 if (mark_idle)
288 292 (*mark_idle)(0);
289 if (mark_idle)
290 (*mark_idle)(0);
291
292#ifdef CONFIG_SMP 293#ifdef CONFIG_SMP
293 normal_xtp(); 294 normal_xtp();
294#endif 295#endif
296 }
295 preempt_enable_no_resched(); 297 preempt_enable_no_resched();
296 schedule(); 298 schedule();
297 preempt_disable(); 299 preempt_disable();
298 check_pgt_cache(); 300 check_pgt_cache();
299 if (cpu_is_offline(smp_processor_id())) 301 if (cpu_is_offline(cpu))
300 play_dead(); 302 play_dead();
301 } 303 }
302} 304}
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index f482f78de435..fee4f1f09adc 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -88,6 +88,8 @@ void default_idle(void)
88 */ 88 */
89void cpu_idle(void) 89void cpu_idle(void)
90{ 90{
91 set_thread_flag(TIF_POLLING_NRFLAG);
92
91 /* endless idle loop with no priority at all */ 93 /* endless idle loop with no priority at all */
92 while (1) { 94 while (1) {
93 while (!need_resched()) 95 while (!need_resched())
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index 0130f2619dac..7f8f0cda6a74 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -703,13 +703,10 @@ static void iseries_shared_idle(void)
703static void iseries_dedicated_idle(void) 703static void iseries_dedicated_idle(void)
704{ 704{
705 long oldval; 705 long oldval;
706 set_thread_flag(TIF_POLLING_NRFLAG);
706 707
707 while (1) { 708 while (1) {
708 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); 709 if (!need_resched()) {
709
710 if (!oldval) {
711 set_thread_flag(TIF_POLLING_NRFLAG);
712
713 while (!need_resched()) { 710 while (!need_resched()) {
714 ppc64_runlatch_off(); 711 ppc64_runlatch_off();
715 HMT_low(); 712 HMT_low();
@@ -722,9 +719,6 @@ static void iseries_dedicated_idle(void)
722 } 719 }
723 720
724 HMT_medium(); 721 HMT_medium();
725 clear_thread_flag(TIF_POLLING_NRFLAG);
726 } else {
727 set_need_resched();
728 } 722 }
729 723
730 ppc64_runlatch_on(); 724 ppc64_runlatch_on();
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 4854f5eb5c3d..a093a0d4dd69 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -469,6 +469,7 @@ static inline void dedicated_idle_sleep(unsigned int cpu)
469 * more. 469 * more.
470 */ 470 */
471 clear_thread_flag(TIF_POLLING_NRFLAG); 471 clear_thread_flag(TIF_POLLING_NRFLAG);
472 smp_mb__after_clear_bit();
472 473
473 /* 474 /*
474 * SMT dynamic mode. Cede will result in this thread going 475 * SMT dynamic mode. Cede will result in this thread going
@@ -481,6 +482,7 @@ static inline void dedicated_idle_sleep(unsigned int cpu)
481 cede_processor(); 482 cede_processor();
482 else 483 else
483 local_irq_enable(); 484 local_irq_enable();
485 set_thread_flag(TIF_POLLING_NRFLAG);
484 } else { 486 } else {
485 /* 487 /*
486 * Give the HV an opportunity at the processor, since we are 488 * Give the HV an opportunity at the processor, since we are
@@ -492,11 +494,11 @@ static inline void dedicated_idle_sleep(unsigned int cpu)
492 494
493static void pseries_dedicated_idle(void) 495static void pseries_dedicated_idle(void)
494{ 496{
495 long oldval;
496 struct paca_struct *lpaca = get_paca(); 497 struct paca_struct *lpaca = get_paca();
497 unsigned int cpu = smp_processor_id(); 498 unsigned int cpu = smp_processor_id();
498 unsigned long start_snooze; 499 unsigned long start_snooze;
499 unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); 500 unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
501 set_thread_flag(TIF_POLLING_NRFLAG);
500 502
501 while (1) { 503 while (1) {
502 /* 504 /*
@@ -505,10 +507,7 @@ static void pseries_dedicated_idle(void)
505 */ 507 */
506 lpaca->lppaca.idle = 1; 508 lpaca->lppaca.idle = 1;
507 509
508 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); 510 if (!need_resched()) {
509 if (!oldval) {
510 set_thread_flag(TIF_POLLING_NRFLAG);
511
512 start_snooze = __get_tb() + 511 start_snooze = __get_tb() +
513 *smt_snooze_delay * tb_ticks_per_usec; 512 *smt_snooze_delay * tb_ticks_per_usec;
514 513
@@ -531,9 +530,6 @@ static void pseries_dedicated_idle(void)
531 } 530 }
532 531
533 HMT_medium(); 532 HMT_medium();
534 clear_thread_flag(TIF_POLLING_NRFLAG);
535 } else {
536 set_need_resched();
537 } 533 }
538 534
539 lpaca->lppaca.idle = 0; 535 lpaca->lppaca.idle = 0;
diff --git a/arch/ppc/kernel/idle.c b/arch/ppc/kernel/idle.c
index a6141f05c919..3c4e4cb61074 100644
--- a/arch/ppc/kernel/idle.c
+++ b/arch/ppc/kernel/idle.c
@@ -63,18 +63,18 @@ void cpu_idle(void)
63 int cpu = smp_processor_id(); 63 int cpu = smp_processor_id();
64 64
65 for (;;) { 65 for (;;) {
66 if (ppc_md.idle != NULL) 66 while (need_resched()) {
67 ppc_md.idle(); 67 if (ppc_md.idle != NULL)
68 else 68 ppc_md.idle();
69 default_idle(); 69 else
70 if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) 70 default_idle();
71 cpu_die();
72 if (need_resched()) {
73 preempt_enable_no_resched();
74 schedule();
75 preempt_disable();
76 } 71 }
77 72
73 if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
74 cpu_die();
75 preempt_enable_no_resched();
76 schedule();
77 preempt_disable();
78 } 78 }
79} 79}
80 80
diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c
index 909ea669af91..715bc0e71e0f 100644
--- a/arch/ppc64/kernel/idle.c
+++ b/arch/ppc64/kernel/idle.c
@@ -34,15 +34,11 @@ extern void power4_idle(void);
34 34
35void default_idle(void) 35void default_idle(void)
36{ 36{
37 long oldval;
38 unsigned int cpu = smp_processor_id(); 37 unsigned int cpu = smp_processor_id();
38 set_thread_flag(TIF_POLLING_NRFLAG);
39 39
40 while (1) { 40 while (1) {
41 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); 41 if (!need_resched()) {
42
43 if (!oldval) {
44 set_thread_flag(TIF_POLLING_NRFLAG);
45
46 while (!need_resched() && !cpu_is_offline(cpu)) { 42 while (!need_resched() && !cpu_is_offline(cpu)) {
47 ppc64_runlatch_off(); 43 ppc64_runlatch_off();
48 44
@@ -55,9 +51,6 @@ void default_idle(void)
55 } 51 }
56 52
57 HMT_medium(); 53 HMT_medium();
58 clear_thread_flag(TIF_POLLING_NRFLAG);
59 } else {
60 set_need_resched();
61 } 54 }
62 55
63 ppc64_runlatch_on(); 56 ppc64_runlatch_on();
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 66ca5757e368..78b64fe5e7c2 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -99,14 +99,15 @@ void default_idle(void)
99{ 99{
100 int cpu, rc; 100 int cpu, rc;
101 101
102 /* CPU is going idle. */
103 cpu = smp_processor_id();
104
102 local_irq_disable(); 105 local_irq_disable();
103 if (need_resched()) { 106 if (need_resched()) {
104 local_irq_enable(); 107 local_irq_enable();
105 return; 108 return;
106 } 109 }
107 110
108 /* CPU is going idle. */
109 cpu = smp_processor_id();
110 rc = notifier_call_chain(&idle_chain, CPU_IDLE, (void *)(long) cpu); 111 rc = notifier_call_chain(&idle_chain, CPU_IDLE, (void *)(long) cpu);
111 if (rc != NOTIFY_OK && rc != NOTIFY_DONE) 112 if (rc != NOTIFY_OK && rc != NOTIFY_DONE)
112 BUG(); 113 BUG();
@@ -119,7 +120,7 @@ void default_idle(void)
119 __ctl_set_bit(8, 15); 120 __ctl_set_bit(8, 15);
120 121
121#ifdef CONFIG_HOTPLUG_CPU 122#ifdef CONFIG_HOTPLUG_CPU
122 if (cpu_is_offline(smp_processor_id())) 123 if (cpu_is_offline(cpu))
123 cpu_die(); 124 cpu_die();
124#endif 125#endif
125 126
diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index 1cbc26b796ad..fd4f240b833d 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -51,14 +51,13 @@ void enable_hlt(void)
51 51
52EXPORT_SYMBOL(enable_hlt); 52EXPORT_SYMBOL(enable_hlt);
53 53
54void default_idle(void) 54void cpu_idle(void)
55{ 55{
56 /* endless idle loop with no priority at all */ 56 /* endless idle loop with no priority at all */
57 while (1) { 57 while (1) {
58 if (hlt_counter) { 58 if (hlt_counter) {
59 while (1) 59 while (!need_resched())
60 if (need_resched()) 60 cpu_relax();
61 break;
62 } else { 61 } else {
63 while (!need_resched()) 62 while (!need_resched())
64 cpu_sleep(); 63 cpu_sleep();
@@ -70,11 +69,6 @@ void default_idle(void)
70 } 69 }
71} 70}
72 71
73void cpu_idle(void)
74{
75 default_idle();
76}
77
78void machine_restart(char * __unused) 72void machine_restart(char * __unused)
79{ 73{
80 /* SR.BL=1 and invoke address error to let CPU reset (manual reset) */ 74 /* SR.BL=1 and invoke address error to let CPU reset (manual reset) */
diff --git a/arch/sh64/kernel/process.c b/arch/sh64/kernel/process.c
index 0c09537449b3..b95d04141855 100644
--- a/arch/sh64/kernel/process.c
+++ b/arch/sh64/kernel/process.c
@@ -307,23 +307,19 @@ __setup("hlt", hlt_setup);
307 307
308static inline void hlt(void) 308static inline void hlt(void)
309{ 309{
310 if (hlt_counter)
311 return;
312
313 __asm__ __volatile__ ("sleep" : : : "memory"); 310 __asm__ __volatile__ ("sleep" : : : "memory");
314} 311}
315 312
316/* 313/*
317 * The idle loop on a uniprocessor SH.. 314 * The idle loop on a uniprocessor SH..
318 */ 315 */
319void default_idle(void) 316void cpu_idle(void)
320{ 317{
321 /* endless idle loop with no priority at all */ 318 /* endless idle loop with no priority at all */
322 while (1) { 319 while (1) {
323 if (hlt_counter) { 320 if (hlt_counter) {
324 while (1) 321 while (!need_resched())
325 if (need_resched()) 322 cpu_relax();
326 break;
327 } else { 323 } else {
328 local_irq_disable(); 324 local_irq_disable();
329 while (!need_resched()) { 325 while (!need_resched()) {
@@ -338,11 +334,7 @@ void default_idle(void)
338 schedule(); 334 schedule();
339 preempt_disable(); 335 preempt_disable();
340 } 336 }
341}
342 337
343void cpu_idle(void)
344{
345 default_idle();
346} 338}
347 339
348void machine_restart(char * __unused) 340void machine_restart(char * __unused)
diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c
index c39f4d01096d..ea8647411462 100644
--- a/arch/sparc/kernel/process.c
+++ b/arch/sparc/kernel/process.c
@@ -67,13 +67,6 @@ extern void fpsave(unsigned long *, unsigned long *, void *, unsigned long *);
67struct task_struct *last_task_used_math = NULL; 67struct task_struct *last_task_used_math = NULL;
68struct thread_info *current_set[NR_CPUS]; 68struct thread_info *current_set[NR_CPUS];
69 69
70/*
71 * default_idle is new in 2.5. XXX Review, currently stolen from sparc64.
72 */
73void default_idle(void)
74{
75}
76
77#ifndef CONFIG_SMP 70#ifndef CONFIG_SMP
78 71
79#define SUN4C_FAULT_HIGH 100 72#define SUN4C_FAULT_HIGH 100
@@ -92,12 +85,11 @@ void cpu_idle(void)
92 static unsigned long fps; 85 static unsigned long fps;
93 unsigned long now; 86 unsigned long now;
94 unsigned long faults; 87 unsigned long faults;
95 unsigned long flags;
96 88
97 extern unsigned long sun4c_kernel_faults; 89 extern unsigned long sun4c_kernel_faults;
98 extern void sun4c_grow_kernel_ring(void); 90 extern void sun4c_grow_kernel_ring(void);
99 91
100 local_irq_save(flags); 92 local_irq_disable();
101 now = jiffies; 93 now = jiffies;
102 count -= (now - last_jiffies); 94 count -= (now - last_jiffies);
103 last_jiffies = now; 95 last_jiffies = now;
@@ -113,13 +105,16 @@ void cpu_idle(void)
113 sun4c_grow_kernel_ring(); 105 sun4c_grow_kernel_ring();
114 } 106 }
115 } 107 }
116 local_irq_restore(flags); 108 local_irq_enable();
117 } 109 }
118 110
119 while((!need_resched()) && pm_idle) { 111 if (pm_idle) {
120 (*pm_idle)(); 112 while (!need_resched())
113 (*pm_idle)();
114 } else {
115 while (!need_resched())
116 cpu_relax();
121 } 117 }
122
123 preempt_enable_no_resched(); 118 preempt_enable_no_resched();
124 schedule(); 119 schedule();
125 preempt_disable(); 120 preempt_disable();
@@ -132,15 +127,15 @@ void cpu_idle(void)
132/* This is being executed in task 0 'user space'. */ 127/* This is being executed in task 0 'user space'. */
133void cpu_idle(void) 128void cpu_idle(void)
134{ 129{
130 set_thread_flag(TIF_POLLING_NRFLAG);
135 /* endless idle loop with no priority at all */ 131 /* endless idle loop with no priority at all */
136 while(1) { 132 while(1) {
137 if(need_resched()) { 133 while (!need_resched())
138 preempt_enable_no_resched(); 134 cpu_relax();
139 schedule(); 135 preempt_enable_no_resched();
140 preempt_disable(); 136 schedule();
141 check_pgt_cache(); 137 preempt_disable();
142 } 138 check_pgt_cache();
143 barrier(); /* or else gcc optimizes... */
144 } 139 }
145} 140}
146 141
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index 2f89206e008f..02f9dec1d459 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -85,23 +85,31 @@ void cpu_idle(void)
85 85
86/* 86/*
87 * the idle loop on a UltraMultiPenguin... 87 * the idle loop on a UltraMultiPenguin...
88 *
89 * TIF_POLLING_NRFLAG is set because we do not sleep the cpu
90 * inside of the idler task, so an interrupt is not needed
91 * to get a clean fast response.
92 *
93 * XXX Reverify this assumption... -DaveM
94 *
95 * Addendum: We do want it to do something for the signal
96 * delivery case, we detect that by just seeing
97 * if we are trying to send this to an idler or not.
88 */ 98 */
89#define idle_me_harder() (cpu_data(smp_processor_id()).idle_volume += 1)
90#define unidle_me() (cpu_data(smp_processor_id()).idle_volume = 0)
91void cpu_idle(void) 99void cpu_idle(void)
92{ 100{
101 cpuinfo_sparc *cpuinfo = &local_cpu_data();
93 set_thread_flag(TIF_POLLING_NRFLAG); 102 set_thread_flag(TIF_POLLING_NRFLAG);
103
94 while(1) { 104 while(1) {
95 if (need_resched()) { 105 if (need_resched()) {
96 unidle_me(); 106 cpuinfo->idle_volume = 0;
97 clear_thread_flag(TIF_POLLING_NRFLAG);
98 preempt_enable_no_resched(); 107 preempt_enable_no_resched();
99 schedule(); 108 schedule();
100 preempt_disable(); 109 preempt_disable();
101 set_thread_flag(TIF_POLLING_NRFLAG);
102 check_pgt_cache(); 110 check_pgt_cache();
103 } 111 }
104 idle_me_harder(); 112 cpuinfo->idle_volume++;
105 113
106 /* The store ordering is so that IRQ handlers on 114 /* The store ordering is so that IRQ handlers on
107 * other cpus see our increasing idleness for the buddy 115 * other cpus see our increasing idleness for the buddy
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 8aca4b1dc04e..797a65493fb8 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1152,20 +1152,9 @@ void __init smp_cpus_done(unsigned int max_cpus)
1152 (bogosum/(5000/HZ))%100); 1152 (bogosum/(5000/HZ))%100);
1153} 1153}
1154 1154
1155/* This needn't do anything as we do not sleep the cpu
1156 * inside of the idler task, so an interrupt is not needed
1157 * to get a clean fast response.
1158 *
1159 * XXX Reverify this assumption... -DaveM
1160 *
1161 * Addendum: We do want it to do something for the signal
1162 * delivery case, we detect that by just seeing
1163 * if we are trying to send this to an idler or not.
1164 */
1165void smp_send_reschedule(int cpu) 1155void smp_send_reschedule(int cpu)
1166{ 1156{
1167 if (cpu_data(cpu).idle_volume == 0) 1157 smp_receive_signal(cpu);
1168 smp_receive_signal(cpu);
1169} 1158}
1170 1159
1171/* This is a nop because we capture all other cpus 1160/* This is a nop because we capture all other cpus
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 571f9fe490ce..59be85d9a4bc 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -86,12 +86,22 @@ EXPORT_SYMBOL(enable_hlt);
86 */ 86 */
87void default_idle(void) 87void default_idle(void)
88{ 88{
89 local_irq_enable();
90
89 if (!atomic_read(&hlt_counter)) { 91 if (!atomic_read(&hlt_counter)) {
90 local_irq_disable(); 92 clear_thread_flag(TIF_POLLING_NRFLAG);
91 if (!need_resched()) 93 smp_mb__after_clear_bit();
92 safe_halt(); 94 while (!need_resched()) {
93 else 95 local_irq_disable();
94 local_irq_enable(); 96 if (!need_resched())
97 safe_halt();
98 else
99 local_irq_enable();
100 }
101 set_thread_flag(TIF_POLLING_NRFLAG);
102 } else {
103 while (!need_resched())
104 cpu_relax();
95 } 105 }
96} 106}
97 107
@@ -102,30 +112,16 @@ void default_idle(void)
102 */ 112 */
103static void poll_idle (void) 113static void poll_idle (void)
104{ 114{
105 int oldval;
106
107 local_irq_enable(); 115 local_irq_enable();
108 116
109 /* 117 asm volatile(
110 * Deal with another CPU just having chosen a thread to 118 "2:"
111 * run here: 119 "testl %0,%1;"
112 */ 120 "rep; nop;"
113 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); 121 "je 2b;"
114 122 : :
115 if (!oldval) { 123 "i" (_TIF_NEED_RESCHED),
116 set_thread_flag(TIF_POLLING_NRFLAG); 124 "m" (current_thread_info()->flags));
117 asm volatile(
118 "2:"
119 "testl %0,%1;"
120 "rep; nop;"
121 "je 2b;"
122 : :
123 "i" (_TIF_NEED_RESCHED),
124 "m" (current_thread_info()->flags));
125 clear_thread_flag(TIF_POLLING_NRFLAG);
126 } else {
127 set_need_resched();
128 }
129} 125}
130 126
131void cpu_idle_wait(void) 127void cpu_idle_wait(void)
@@ -187,6 +183,8 @@ static inline void play_dead(void)
187 */ 183 */
188void cpu_idle (void) 184void cpu_idle (void)
189{ 185{
186 set_thread_flag(TIF_POLLING_NRFLAG);
187
190 /* endless idle loop with no priority at all */ 188 /* endless idle loop with no priority at all */
191 while (1) { 189 while (1) {
192 while (!need_resched()) { 190 while (!need_resched()) {
@@ -221,15 +219,12 @@ static void mwait_idle(void)
221{ 219{
222 local_irq_enable(); 220 local_irq_enable();
223 221
224 if (!need_resched()) { 222 while (!need_resched()) {
225 set_thread_flag(TIF_POLLING_NRFLAG); 223 __monitor((void *)&current_thread_info()->flags, 0, 0);
226 do { 224 smp_mb();
227 __monitor((void *)&current_thread_info()->flags, 0, 0); 225 if (need_resched())
228 if (need_resched()) 226 break;
229 break; 227 __mwait(0, 0);
230 __mwait(0, 0);
231 } while (!need_resched());
232 clear_thread_flag(TIF_POLLING_NRFLAG);
233 } 228 }
234} 229}
235 230
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 161db4acfb91..573b6a97bb1f 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -167,6 +167,19 @@ acpi_processor_power_activate(struct acpi_processor *pr,
167 return; 167 return;
168} 168}
169 169
170static void acpi_safe_halt(void)
171{
172 int polling = test_thread_flag(TIF_POLLING_NRFLAG);
173 if (polling) {
174 clear_thread_flag(TIF_POLLING_NRFLAG);
175 smp_mb__after_clear_bit();
176 }
177 if (!need_resched())
178 safe_halt();
179 if (polling)
180 set_thread_flag(TIF_POLLING_NRFLAG);
181}
182
170static atomic_t c3_cpu_count; 183static atomic_t c3_cpu_count;
171 184
172static void acpi_processor_idle(void) 185static void acpi_processor_idle(void)
@@ -177,7 +190,7 @@ static void acpi_processor_idle(void)
177 int sleep_ticks = 0; 190 int sleep_ticks = 0;
178 u32 t1, t2 = 0; 191 u32 t1, t2 = 0;
179 192
180 pr = processors[raw_smp_processor_id()]; 193 pr = processors[smp_processor_id()];
181 if (!pr) 194 if (!pr)
182 return; 195 return;
183 196
@@ -197,8 +210,13 @@ static void acpi_processor_idle(void)
197 } 210 }
198 211
199 cx = pr->power.state; 212 cx = pr->power.state;
200 if (!cx) 213 if (!cx) {
201 goto easy_out; 214 if (pm_idle_save)
215 pm_idle_save();
216 else
217 acpi_safe_halt();
218 return;
219 }
202 220
203 /* 221 /*
204 * Check BM Activity 222 * Check BM Activity
@@ -278,7 +296,8 @@ static void acpi_processor_idle(void)
278 if (pm_idle_save) 296 if (pm_idle_save)
279 pm_idle_save(); 297 pm_idle_save();
280 else 298 else
281 safe_halt(); 299 acpi_safe_halt();
300
282 /* 301 /*
283 * TBD: Can't get time duration while in C1, as resumes 302 * TBD: Can't get time duration while in C1, as resumes
284 * go to an ISR rather than here. Need to instrument 303 * go to an ISR rather than here. Need to instrument
@@ -414,16 +433,6 @@ static void acpi_processor_idle(void)
414 */ 433 */
415 if (next_state != pr->power.state) 434 if (next_state != pr->power.state)
416 acpi_processor_power_activate(pr, next_state); 435 acpi_processor_power_activate(pr, next_state);
417
418 return;
419
420 easy_out:
421 /* do C1 instead of busy loop */
422 if (pm_idle_save)
423 pm_idle_save();
424 else
425 safe_halt();
426 return;
427} 436}
428 437
429static int acpi_processor_set_power_policy(struct acpi_processor *pr) 438static int acpi_processor_set_power_policy(struct acpi_processor *pr)
diff --git a/kernel/sched.c b/kernel/sched.c
index 0f2def822296..ac3f5cc3bb51 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -864,21 +864,28 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
864#ifdef CONFIG_SMP 864#ifdef CONFIG_SMP
865static void resched_task(task_t *p) 865static void resched_task(task_t *p)
866{ 866{
867 int need_resched, nrpolling; 867 int cpu;
868 868
869 assert_spin_locked(&task_rq(p)->lock); 869 assert_spin_locked(&task_rq(p)->lock);
870 870
871 /* minimise the chance of sending an interrupt to poll_idle() */ 871 if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
872 nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); 872 return;
873 need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED); 873
874 nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); 874 set_tsk_thread_flag(p, TIF_NEED_RESCHED);
875
876 cpu = task_cpu(p);
877 if (cpu == smp_processor_id())
878 return;
875 879
876 if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id())) 880 /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */
877 smp_send_reschedule(task_cpu(p)); 881 smp_mb();
882 if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG))
883 smp_send_reschedule(cpu);
878} 884}
879#else 885#else
880static inline void resched_task(task_t *p) 886static inline void resched_task(task_t *p)
881{ 887{
888 assert_spin_locked(&task_rq(p)->lock);
882 set_tsk_need_resched(p); 889 set_tsk_need_resched(p);
883} 890}
884#endif 891#endif