aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2008-04-25 11:39:01 -0400
committerIngo Molnar <mingo@elte.hu>2008-04-26 18:01:45 -0400
commit7f424a8b08c26dc14ac5c17164014539ac9a5c65 (patch)
treef1f7268ce5215fe4909cb801313a8997d52d1e1e
parentc3bf9bc243092c53946fd6d8ebd6dc2f4e572d48 (diff)
fix idle (arch, acpi and apm) and lockdep
OK, so 25-mm1 gave a lockdep error which made me look into this. The first thing that I noticed was the horrible mess; the second thing I saw was hacks like: 71e93d15612c61c2e26a169567becf088e71b8ff The problem is that arch idle routines are somewhat inconsitent with their IRQ state handling and instead of fixing _that_, we go paper over the problem. So the thing I've tried to do is set a standard for idle routines and fix them all up to adhere to that. So the rules are: idle routines are entered with IRQs disabled idle routines will exit with IRQs enabled Nearly all already did this in one form or another. Merge the 32 and 64 bit bits so they no longer have different bugs. As for the actual lockdep warning; __sti_mwait() did a plainly un-annotated irq-enable. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Tested-by: Bob Copeland <me@bobcopeland.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/apm_32.c3
-rw-r--r--arch/x86/kernel/process.c117
-rw-r--r--arch/x86/kernel/process_32.c118
-rw-r--r--arch/x86/kernel/process_64.c123
-rw-r--r--drivers/acpi/processor_idle.c19
-rw-r--r--include/asm-x86/processor.h1
6 files changed, 137 insertions, 244 deletions
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index f0030a0999c7..e4ea362e8480 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -904,6 +904,7 @@ recalc:
904 original_pm_idle(); 904 original_pm_idle();
905 else 905 else
906 default_idle(); 906 default_idle();
907 local_irq_disable();
907 jiffies_since_last_check = jiffies - last_jiffies; 908 jiffies_since_last_check = jiffies - last_jiffies;
908 if (jiffies_since_last_check > idle_period) 909 if (jiffies_since_last_check > idle_period)
909 goto recalc; 910 goto recalc;
@@ -911,6 +912,8 @@ recalc:
911 912
912 if (apm_idle_done) 913 if (apm_idle_done)
913 apm_do_busy(); 914 apm_do_busy();
915
916 local_irq_enable();
914} 917}
915 918
916/** 919/**
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3004d716539d..67e9b4a1e89d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -4,6 +4,8 @@
4#include <linux/smp.h> 4#include <linux/smp.h>
5#include <linux/slab.h> 5#include <linux/slab.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/module.h>
8#include <linux/pm.h>
7 9
8struct kmem_cache *task_xstate_cachep; 10struct kmem_cache *task_xstate_cachep;
9 11
@@ -42,3 +44,118 @@ void arch_task_cache_init(void)
42 __alignof__(union thread_xstate), 44 __alignof__(union thread_xstate),
43 SLAB_PANIC, NULL); 45 SLAB_PANIC, NULL);
44} 46}
47
48static void do_nothing(void *unused)
49{
50}
51
52/*
53 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
54 * pm_idle and update to new pm_idle value. Required while changing pm_idle
55 * handler on SMP systems.
56 *
57 * Caller must have changed pm_idle to the new value before the call. Old
58 * pm_idle value will not be used by any CPU after the return of this function.
59 */
60void cpu_idle_wait(void)
61{
62 smp_mb();
63 /* kick all the CPUs so that they exit out of pm_idle */
64 smp_call_function(do_nothing, NULL, 0, 1);
65}
66EXPORT_SYMBOL_GPL(cpu_idle_wait);
67
68/*
69 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
70 * which can obviate IPI to trigger checking of need_resched.
71 * We execute MONITOR against need_resched and enter optimized wait state
72 * through MWAIT. Whenever someone changes need_resched, we would be woken
73 * up from MWAIT (without an IPI).
74 *
75 * New with Core Duo processors, MWAIT can take some hints based on CPU
76 * capability.
77 */
78void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
79{
80 if (!need_resched()) {
81 __monitor((void *)&current_thread_info()->flags, 0, 0);
82 smp_mb();
83 if (!need_resched())
84 __mwait(ax, cx);
85 }
86}
87
88/* Default MONITOR/MWAIT with no hints, used for default C1 state */
89static void mwait_idle(void)
90{
91 if (!need_resched()) {
92 __monitor((void *)&current_thread_info()->flags, 0, 0);
93 smp_mb();
94 if (!need_resched())
95 __sti_mwait(0, 0);
96 else
97 local_irq_enable();
98 } else
99 local_irq_enable();
100}
101
102
103static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
104{
105 if (force_mwait)
106 return 1;
107 /* Any C1 states supported? */
108 return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
109}
110
111/*
112 * On SMP it's slightly faster (but much more power-consuming!)
113 * to poll the ->work.need_resched flag instead of waiting for the
114 * cross-CPU IPI to arrive. Use this option with caution.
115 */
116static void poll_idle(void)
117{
118 local_irq_enable();
119 cpu_relax();
120}
121
122void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
123{
124 static int selected;
125
126 if (selected)
127 return;
128#ifdef CONFIG_X86_SMP
129 if (pm_idle == poll_idle && smp_num_siblings > 1) {
130 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
131 " performance may degrade.\n");
132 }
133#endif
134 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
135 /*
136 * Skip, if setup has overridden idle.
137 * One CPU supports mwait => All CPUs supports mwait
138 */
139 if (!pm_idle) {
140 printk(KERN_INFO "using mwait in idle threads.\n");
141 pm_idle = mwait_idle;
142 }
143 }
144 selected = 1;
145}
146
147static int __init idle_setup(char *str)
148{
149 if (!strcmp(str, "poll")) {
150 printk("using polling idle threads.\n");
151 pm_idle = poll_idle;
152 } else if (!strcmp(str, "mwait"))
153 force_mwait = 1;
154 else
155 return -1;
156
157 boot_option_idle_override = 1;
158 return 0;
159}
160early_param("idle", idle_setup);
161
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 77de848bd1fb..f8476dfbb60d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -111,12 +111,10 @@ void default_idle(void)
111 */ 111 */
112 smp_mb(); 112 smp_mb();
113 113
114 local_irq_disable(); 114 if (!need_resched())
115 if (!need_resched()) {
116 safe_halt(); /* enables interrupts racelessly */ 115 safe_halt(); /* enables interrupts racelessly */
117 local_irq_disable(); 116 else
118 } 117 local_irq_enable();
119 local_irq_enable();
120 current_thread_info()->status |= TS_POLLING; 118 current_thread_info()->status |= TS_POLLING;
121 } else { 119 } else {
122 local_irq_enable(); 120 local_irq_enable();
@@ -128,17 +126,6 @@ void default_idle(void)
128EXPORT_SYMBOL(default_idle); 126EXPORT_SYMBOL(default_idle);
129#endif 127#endif
130 128
131/*
132 * On SMP it's slightly faster (but much more power-consuming!)
133 * to poll the ->work.need_resched flag instead of waiting for the
134 * cross-CPU IPI to arrive. Use this option with caution.
135 */
136static void poll_idle(void)
137{
138 local_irq_enable();
139 cpu_relax();
140}
141
142#ifdef CONFIG_HOTPLUG_CPU 129#ifdef CONFIG_HOTPLUG_CPU
143#include <asm/nmi.h> 130#include <asm/nmi.h>
144/* We don't actually take CPU down, just spin without interrupts. */ 131/* We don't actually take CPU down, just spin without interrupts. */
@@ -196,6 +183,7 @@ void cpu_idle(void)
196 if (cpu_is_offline(cpu)) 183 if (cpu_is_offline(cpu))
197 play_dead(); 184 play_dead();
198 185
186 local_irq_disable();
199 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 187 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
200 idle(); 188 idle();
201 } 189 }
@@ -206,104 +194,6 @@ void cpu_idle(void)
206 } 194 }
207} 195}
208 196
209static void do_nothing(void *unused)
210{
211}
212
213/*
214 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
215 * pm_idle and update to new pm_idle value. Required while changing pm_idle
216 * handler on SMP systems.
217 *
218 * Caller must have changed pm_idle to the new value before the call. Old
219 * pm_idle value will not be used by any CPU after the return of this function.
220 */
221void cpu_idle_wait(void)
222{
223 smp_mb();
224 /* kick all the CPUs so that they exit out of pm_idle */
225 smp_call_function(do_nothing, NULL, 0, 1);
226}
227EXPORT_SYMBOL_GPL(cpu_idle_wait);
228
229/*
230 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
231 * which can obviate IPI to trigger checking of need_resched.
232 * We execute MONITOR against need_resched and enter optimized wait state
233 * through MWAIT. Whenever someone changes need_resched, we would be woken
234 * up from MWAIT (without an IPI).
235 *
236 * New with Core Duo processors, MWAIT can take some hints based on CPU
237 * capability.
238 */
239void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
240{
241 if (!need_resched()) {
242 __monitor((void *)&current_thread_info()->flags, 0, 0);
243 smp_mb();
244 if (!need_resched())
245 __sti_mwait(ax, cx);
246 else
247 local_irq_enable();
248 } else
249 local_irq_enable();
250}
251
252/* Default MONITOR/MWAIT with no hints, used for default C1 state */
253static void mwait_idle(void)
254{
255 local_irq_enable();
256 mwait_idle_with_hints(0, 0);
257}
258
259static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
260{
261 if (force_mwait)
262 return 1;
263 /* Any C1 states supported? */
264 return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
265}
266
267void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
268{
269 static int selected;
270
271 if (selected)
272 return;
273#ifdef CONFIG_X86_SMP
274 if (pm_idle == poll_idle && smp_num_siblings > 1) {
275 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
276 " performance may degrade.\n");
277 }
278#endif
279 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
280 /*
281 * Skip, if setup has overridden idle.
282 * One CPU supports mwait => All CPUs supports mwait
283 */
284 if (!pm_idle) {
285 printk(KERN_INFO "using mwait in idle threads.\n");
286 pm_idle = mwait_idle;
287 }
288 }
289 selected = 1;
290}
291
292static int __init idle_setup(char *str)
293{
294 if (!strcmp(str, "poll")) {
295 printk("using polling idle threads.\n");
296 pm_idle = poll_idle;
297 } else if (!strcmp(str, "mwait"))
298 force_mwait = 1;
299 else
300 return -1;
301
302 boot_option_idle_override = 1;
303 return 0;
304}
305early_param("idle", idle_setup);
306
307void __show_registers(struct pt_regs *regs, int all) 197void __show_registers(struct pt_regs *regs, int all)
308{ 198{
309 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; 199 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 131c2ee7ac56..e2319f39988b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -106,26 +106,13 @@ void default_idle(void)
106 * test NEED_RESCHED: 106 * test NEED_RESCHED:
107 */ 107 */
108 smp_mb(); 108 smp_mb();
109 local_irq_disable(); 109 if (!need_resched())
110 if (!need_resched()) {
111 safe_halt(); /* enables interrupts racelessly */ 110 safe_halt(); /* enables interrupts racelessly */
112 local_irq_disable(); 111 else
113 } 112 local_irq_enable();
114 local_irq_enable();
115 current_thread_info()->status |= TS_POLLING; 113 current_thread_info()->status |= TS_POLLING;
116} 114}
117 115
118/*
119 * On SMP it's slightly faster (but much more power-consuming!)
120 * to poll the ->need_resched flag instead of waiting for the
121 * cross-CPU IPI to arrive. Use this option with caution.
122 */
123static void poll_idle(void)
124{
125 local_irq_enable();
126 cpu_relax();
127}
128
129#ifdef CONFIG_HOTPLUG_CPU 116#ifdef CONFIG_HOTPLUG_CPU
130DECLARE_PER_CPU(int, cpu_state); 117DECLARE_PER_CPU(int, cpu_state);
131 118
@@ -192,110 +179,6 @@ void cpu_idle(void)
192 } 179 }
193} 180}
194 181
195static void do_nothing(void *unused)
196{
197}
198
199/*
200 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
201 * pm_idle and update to new pm_idle value. Required while changing pm_idle
202 * handler on SMP systems.
203 *
204 * Caller must have changed pm_idle to the new value before the call. Old
205 * pm_idle value will not be used by any CPU after the return of this function.
206 */
207void cpu_idle_wait(void)
208{
209 smp_mb();
210 /* kick all the CPUs so that they exit out of pm_idle */
211 smp_call_function(do_nothing, NULL, 0, 1);
212}
213EXPORT_SYMBOL_GPL(cpu_idle_wait);
214
215/*
216 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
217 * which can obviate IPI to trigger checking of need_resched.
218 * We execute MONITOR against need_resched and enter optimized wait state
219 * through MWAIT. Whenever someone changes need_resched, we would be woken
220 * up from MWAIT (without an IPI).
221 *
222 * New with Core Duo processors, MWAIT can take some hints based on CPU
223 * capability.
224 */
225void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
226{
227 if (!need_resched()) {
228 __monitor((void *)&current_thread_info()->flags, 0, 0);
229 smp_mb();
230 if (!need_resched())
231 __mwait(ax, cx);
232 }
233}
234
235/* Default MONITOR/MWAIT with no hints, used for default C1 state */
236static void mwait_idle(void)
237{
238 if (!need_resched()) {
239 __monitor((void *)&current_thread_info()->flags, 0, 0);
240 smp_mb();
241 if (!need_resched())
242 __sti_mwait(0, 0);
243 else
244 local_irq_enable();
245 } else {
246 local_irq_enable();
247 }
248}
249
250
251static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
252{
253 if (force_mwait)
254 return 1;
255 /* Any C1 states supported? */
256 return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
257}
258
259void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
260{
261 static int selected;
262
263 if (selected)
264 return;
265#ifdef CONFIG_X86_SMP
266 if (pm_idle == poll_idle && smp_num_siblings > 1) {
267 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
268 " performance may degrade.\n");
269 }
270#endif
271 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
272 /*
273 * Skip, if setup has overridden idle.
274 * One CPU supports mwait => All CPUs supports mwait
275 */
276 if (!pm_idle) {
277 printk(KERN_INFO "using mwait in idle threads.\n");
278 pm_idle = mwait_idle;
279 }
280 }
281 selected = 1;
282}
283
284static int __init idle_setup(char *str)
285{
286 if (!strcmp(str, "poll")) {
287 printk("using polling idle threads.\n");
288 pm_idle = poll_idle;
289 } else if (!strcmp(str, "mwait"))
290 force_mwait = 1;
291 else
292 return -1;
293
294 boot_option_idle_override = 1;
295 return 0;
296}
297early_param("idle", idle_setup);
298
299/* Prints also some state that isn't saved in the pt_regs */ 182/* Prints also some state that isn't saved in the pt_regs */
300void __show_regs(struct pt_regs * regs) 183void __show_regs(struct pt_regs * regs)
301{ 184{
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 788da9781f80..0d90ff5fd117 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -418,13 +418,12 @@ static void acpi_processor_idle(void)
418 418
419 cx = pr->power.state; 419 cx = pr->power.state;
420 if (!cx || acpi_idle_suspend) { 420 if (!cx || acpi_idle_suspend) {
421 if (pm_idle_save) 421 if (pm_idle_save) {
422 pm_idle_save(); 422 pm_idle_save(); /* enables IRQs */
423 else 423 } else {
424 acpi_safe_halt(); 424 acpi_safe_halt();
425
426 if (irqs_disabled())
427 local_irq_enable(); 425 local_irq_enable();
426 }
428 427
429 return; 428 return;
430 } 429 }
@@ -520,10 +519,12 @@ static void acpi_processor_idle(void)
520 * Use the appropriate idle routine, the one that would 519 * Use the appropriate idle routine, the one that would
521 * be used without acpi C-states. 520 * be used without acpi C-states.
522 */ 521 */
523 if (pm_idle_save) 522 if (pm_idle_save) {
524 pm_idle_save(); 523 pm_idle_save(); /* enables IRQs */
525 else 524 } else {
526 acpi_safe_halt(); 525 acpi_safe_halt();
526 local_irq_enable();
527 }
527 528
528 /* 529 /*
529 * TBD: Can't get time duration while in C1, as resumes 530 * TBD: Can't get time duration while in C1, as resumes
@@ -534,8 +535,6 @@ static void acpi_processor_idle(void)
534 * skew otherwise. 535 * skew otherwise.
535 */ 536 */
536 sleep_ticks = 0xFFFFFFFF; 537 sleep_ticks = 0xFFFFFFFF;
537 if (irqs_disabled())
538 local_irq_enable();
539 538
540 break; 539 break;
541 540
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 117343b0c271..2e7974ec77ec 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -722,6 +722,7 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
722 722
723static inline void __sti_mwait(unsigned long eax, unsigned long ecx) 723static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
724{ 724{
725 trace_hardirqs_on();
725 /* "mwait %eax, %ecx;" */ 726 /* "mwait %eax, %ecx;" */
726 asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" 727 asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
727 :: "a" (eax), "c" (ecx)); 728 :: "a" (eax), "c" (ecx));