aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt11
-rw-r--r--arch/arm/kernel/process.c13
-rw-r--r--arch/arm/mach-davinci/cpuidle.c84
-rw-r--r--arch/arm64/kernel/process.c13
-rw-r--r--arch/blackfin/kernel/process.c7
-rw-r--r--arch/cris/kernel/process.c11
-rw-r--r--arch/ia64/kernel/process.c3
-rw-r--r--arch/ia64/kernel/setup.c1
-rw-r--r--arch/m32r/kernel/process.c51
-rw-r--r--arch/microblaze/kernel/process.c3
-rw-r--r--arch/mn10300/kernel/process.c7
-rw-r--r--arch/openrisc/kernel/idle.c5
-rw-r--r--arch/sh/kernel/idle.c12
-rw-r--r--arch/sparc/include/asm/processor_32.h1
-rw-r--r--arch/sparc/kernel/apc.c3
-rw-r--r--arch/sparc/kernel/leon_pmc.c5
-rw-r--r--arch/sparc/kernel/pmc.c3
-rw-r--r--arch/sparc/kernel/process_32.c7
-rw-r--r--arch/unicore32/kernel/process.c5
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/mwait.h3
-rw-r--r--arch/x86/include/asm/processor.h18
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h3
-rw-r--r--arch/x86/kernel/apm_32.c57
-rw-r--r--arch/x86/kernel/cpu/bugs.c27
-rw-r--r--arch/x86/kernel/cpu/proc.c2
-rw-r--r--arch/x86/kernel/process.c116
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/xen/setup.c5
-rw-r--r--drivers/acpi/processor_idle.c49
-rw-r--r--drivers/idle/intel_idle.c278
-rw-r--r--include/linux/cpuidle.h22
-rw-r--r--include/linux/pm.h1
-rw-r--r--tools/power/x86/turbostat/turbostat.836
-rw-r--r--tools/power/x86/turbostat/turbostat.c48
35 files changed, 360 insertions, 553 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 41c5d9ecd9bb..4c5b3f993bbb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1039,16 +1039,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1039 Claim all unknown PCI IDE storage controllers. 1039 Claim all unknown PCI IDE storage controllers.
1040 1040
1041 idle= [X86] 1041 idle= [X86]
1042 Format: idle=poll, idle=mwait, idle=halt, idle=nomwait 1042 Format: idle=poll, idle=halt, idle=nomwait
1043 Poll forces a polling idle loop that can slightly 1043 Poll forces a polling idle loop that can slightly
1044 improve the performance of waking up a idle CPU, but 1044 improve the performance of waking up a idle CPU, but
1045 will use a lot of power and make the system run hot. 1045 will use a lot of power and make the system run hot.
1046 Not recommended. 1046 Not recommended.
1047 idle=mwait: On systems which support MONITOR/MWAIT but
1048 the kernel chose to not use it because it doesn't save
1049 as much power as a normal idle loop, use the
1050 MONITOR/MWAIT idle loop anyways. Performance should be
1051 the same as idle=poll.
1052 idle=halt: Halt is forced to be used for CPU idle. 1047 idle=halt: Halt is forced to be used for CPU idle.
1053 In such case C2/C3 won't be used again. 1048 In such case C2/C3 won't be used again.
1054 idle=nomwait: Disable mwait for CPU C-states 1049 idle=nomwait: Disable mwait for CPU C-states
@@ -1891,10 +1886,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1891 wfi(ARM) instruction doesn't work correctly and not to 1886 wfi(ARM) instruction doesn't work correctly and not to
1892 use it. This is also useful when using JTAG debugger. 1887 use it. This is also useful when using JTAG debugger.
1893 1888
1894 no-hlt [BUGS=X86-32] Tells the kernel that the hlt
1895 instruction doesn't work correctly and not to
1896 use it.
1897
1898 no_file_caps Tells the kernel not to honor file capabilities. The 1889 no_file_caps Tells the kernel not to honor file capabilities. The
1899 only way then for a file to be executed with privilege 1890 only way then for a file to be executed with privilege
1900 is to be setuid root or executed by root. 1891 is to be setuid root or executed by root.
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index c6dec5fc20aa..047d3e40e470 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -172,14 +172,9 @@ static void default_idle(void)
172 local_irq_enable(); 172 local_irq_enable();
173} 173}
174 174
175void (*pm_idle)(void) = default_idle;
176EXPORT_SYMBOL(pm_idle);
177
178/* 175/*
179 * The idle thread, has rather strange semantics for calling pm_idle, 176 * The idle thread.
180 * but this is what x86 does and we need to do the same, so that 177 * We always respect 'hlt_counter' to prevent low power idle.
181 * things like cpuidle get called in the same way. The only difference
182 * is that we always respect 'hlt_counter' to prevent low power idle.
183 */ 178 */
184void cpu_idle(void) 179void cpu_idle(void)
185{ 180{
@@ -210,10 +205,10 @@ void cpu_idle(void)
210 } else if (!need_resched()) { 205 } else if (!need_resched()) {
211 stop_critical_timings(); 206 stop_critical_timings();
212 if (cpuidle_idle_call()) 207 if (cpuidle_idle_call())
213 pm_idle(); 208 default_idle();
214 start_critical_timings(); 209 start_critical_timings();
215 /* 210 /*
216 * pm_idle functions must always 211 * default_idle functions must always
217 * return with IRQs enabled. 212 * return with IRQs enabled.
218 */ 213 */
219 WARN_ON(irqs_disabled()); 214 WARN_ON(irqs_disabled());
diff --git a/arch/arm/mach-davinci/cpuidle.c b/arch/arm/mach-davinci/cpuidle.c
index 9107691adbdb..5ac9e9384b15 100644
--- a/arch/arm/mach-davinci/cpuidle.c
+++ b/arch/arm/mach-davinci/cpuidle.c
@@ -25,35 +25,44 @@
25 25
26#define DAVINCI_CPUIDLE_MAX_STATES 2 26#define DAVINCI_CPUIDLE_MAX_STATES 2
27 27
28struct davinci_ops { 28static DEFINE_PER_CPU(struct cpuidle_device, davinci_cpuidle_device);
29 void (*enter) (u32 flags); 29static void __iomem *ddr2_reg_base;
30 void (*exit) (u32 flags); 30static bool ddr2_pdown;
31 u32 flags; 31
32}; 32static void davinci_save_ddr_power(int enter, bool pdown)
33{
34 u32 val;
35
36 val = __raw_readl(ddr2_reg_base + DDR2_SDRCR_OFFSET);
37
38 if (enter) {
39 if (pdown)
40 val |= DDR2_SRPD_BIT;
41 else
42 val &= ~DDR2_SRPD_BIT;
43 val |= DDR2_LPMODEN_BIT;
44 } else {
45 val &= ~(DDR2_SRPD_BIT | DDR2_LPMODEN_BIT);
46 }
47
48 __raw_writel(val, ddr2_reg_base + DDR2_SDRCR_OFFSET);
49}
33 50
34/* Actual code that puts the SoC in different idle states */ 51/* Actual code that puts the SoC in different idle states */
35static int davinci_enter_idle(struct cpuidle_device *dev, 52static int davinci_enter_idle(struct cpuidle_device *dev,
36 struct cpuidle_driver *drv, 53 struct cpuidle_driver *drv,
37 int index) 54 int index)
38{ 55{
39 struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; 56 davinci_save_ddr_power(1, ddr2_pdown);
40 struct davinci_ops *ops = cpuidle_get_statedata(state_usage);
41
42 if (ops && ops->enter)
43 ops->enter(ops->flags);
44 57
45 index = cpuidle_wrap_enter(dev, drv, index, 58 index = cpuidle_wrap_enter(dev, drv, index,
46 arm_cpuidle_simple_enter); 59 arm_cpuidle_simple_enter);
47 60
48 if (ops && ops->exit) 61 davinci_save_ddr_power(0, ddr2_pdown);
49 ops->exit(ops->flags);
50 62
51 return index; 63 return index;
52} 64}
53 65
54/* fields in davinci_ops.flags */
55#define DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN BIT(0)
56
57static struct cpuidle_driver davinci_idle_driver = { 66static struct cpuidle_driver davinci_idle_driver = {
58 .name = "cpuidle-davinci", 67 .name = "cpuidle-davinci",
59 .owner = THIS_MODULE, 68 .owner = THIS_MODULE,
@@ -70,45 +79,6 @@ static struct cpuidle_driver davinci_idle_driver = {
70 .state_count = DAVINCI_CPUIDLE_MAX_STATES, 79 .state_count = DAVINCI_CPUIDLE_MAX_STATES,
71}; 80};
72 81
73static DEFINE_PER_CPU(struct cpuidle_device, davinci_cpuidle_device);
74static void __iomem *ddr2_reg_base;
75
76static void davinci_save_ddr_power(int enter, bool pdown)
77{
78 u32 val;
79
80 val = __raw_readl(ddr2_reg_base + DDR2_SDRCR_OFFSET);
81
82 if (enter) {
83 if (pdown)
84 val |= DDR2_SRPD_BIT;
85 else
86 val &= ~DDR2_SRPD_BIT;
87 val |= DDR2_LPMODEN_BIT;
88 } else {
89 val &= ~(DDR2_SRPD_BIT | DDR2_LPMODEN_BIT);
90 }
91
92 __raw_writel(val, ddr2_reg_base + DDR2_SDRCR_OFFSET);
93}
94
95static void davinci_c2state_enter(u32 flags)
96{
97 davinci_save_ddr_power(1, !!(flags & DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN));
98}
99
100static void davinci_c2state_exit(u32 flags)
101{
102 davinci_save_ddr_power(0, !!(flags & DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN));
103}
104
105static struct davinci_ops davinci_states[DAVINCI_CPUIDLE_MAX_STATES] = {
106 [1] = {
107 .enter = davinci_c2state_enter,
108 .exit = davinci_c2state_exit,
109 },
110};
111
112static int __init davinci_cpuidle_probe(struct platform_device *pdev) 82static int __init davinci_cpuidle_probe(struct platform_device *pdev)
113{ 83{
114 int ret; 84 int ret;
@@ -124,11 +94,7 @@ static int __init davinci_cpuidle_probe(struct platform_device *pdev)
124 94
125 ddr2_reg_base = pdata->ddr2_ctlr_base; 95 ddr2_reg_base = pdata->ddr2_ctlr_base;
126 96
127 if (pdata->ddr2_pdown) 97 ddr2_pdown = pdata->ddr2_pdown;
128 davinci_states[1].flags |= DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN;
129 cpuidle_set_statedata(&device->states_usage[1], &davinci_states[1]);
130
131 device->state_count = DAVINCI_CPUIDLE_MAX_STATES;
132 98
133 ret = cpuidle_register_driver(&davinci_idle_driver); 99 ret = cpuidle_register_driver(&davinci_idle_driver);
134 if (ret) { 100 if (ret) {
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index cb0956bc96ed..c7002d40a9b0 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -97,14 +97,9 @@ static void default_idle(void)
97 local_irq_enable(); 97 local_irq_enable();
98} 98}
99 99
100void (*pm_idle)(void) = default_idle;
101EXPORT_SYMBOL_GPL(pm_idle);
102
103/* 100/*
104 * The idle thread, has rather strange semantics for calling pm_idle, 101 * The idle thread.
105 * but this is what x86 does and we need to do the same, so that 102 * We always respect 'hlt_counter' to prevent low power idle.
106 * things like cpuidle get called in the same way. The only difference
107 * is that we always respect 'hlt_counter' to prevent low power idle.
108 */ 103 */
109void cpu_idle(void) 104void cpu_idle(void)
110{ 105{
@@ -122,10 +117,10 @@ void cpu_idle(void)
122 local_irq_disable(); 117 local_irq_disable();
123 if (!need_resched()) { 118 if (!need_resched()) {
124 stop_critical_timings(); 119 stop_critical_timings();
125 pm_idle(); 120 default_idle();
126 start_critical_timings(); 121 start_critical_timings();
127 /* 122 /*
128 * pm_idle functions should always return 123 * default_idle functions should always return
129 * with IRQs enabled. 124 * with IRQs enabled.
130 */ 125 */
131 WARN_ON(irqs_disabled()); 126 WARN_ON(irqs_disabled());
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 3e16ad9b0a99..8061426b7df5 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -39,12 +39,6 @@ int nr_l1stack_tasks;
39void *l1_stack_base; 39void *l1_stack_base;
40unsigned long l1_stack_len; 40unsigned long l1_stack_len;
41 41
42/*
43 * Powermanagement idle function, if any..
44 */
45void (*pm_idle)(void) = NULL;
46EXPORT_SYMBOL(pm_idle);
47
48void (*pm_power_off)(void) = NULL; 42void (*pm_power_off)(void) = NULL;
49EXPORT_SYMBOL(pm_power_off); 43EXPORT_SYMBOL(pm_power_off);
50 44
@@ -81,7 +75,6 @@ void cpu_idle(void)
81{ 75{
82 /* endless idle loop with no priority at all */ 76 /* endless idle loop with no priority at all */
83 while (1) { 77 while (1) {
84 void (*idle)(void) = pm_idle;
85 78
86#ifdef CONFIG_HOTPLUG_CPU 79#ifdef CONFIG_HOTPLUG_CPU
87 if (cpu_is_offline(smp_processor_id())) 80 if (cpu_is_offline(smp_processor_id()))
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index 7f65be6f7f17..104ff4dd9b98 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -54,11 +54,6 @@ void enable_hlt(void)
54 54
55EXPORT_SYMBOL(enable_hlt); 55EXPORT_SYMBOL(enable_hlt);
56 56
57/*
58 * The following aren't currently used.
59 */
60void (*pm_idle)(void);
61
62extern void default_idle(void); 57extern void default_idle(void);
63 58
64void (*pm_power_off)(void); 59void (*pm_power_off)(void);
@@ -77,16 +72,12 @@ void cpu_idle (void)
77 while (1) { 72 while (1) {
78 rcu_idle_enter(); 73 rcu_idle_enter();
79 while (!need_resched()) { 74 while (!need_resched()) {
80 void (*idle)(void);
81 /* 75 /*
82 * Mark this as an RCU critical section so that 76 * Mark this as an RCU critical section so that
83 * synchronize_kernel() in the unload path waits 77 * synchronize_kernel() in the unload path waits
84 * for our completion. 78 * for our completion.
85 */ 79 */
86 idle = pm_idle; 80 default_idle();
87 if (!idle)
88 idle = default_idle;
89 idle();
90 } 81 }
91 rcu_idle_exit(); 82 rcu_idle_exit();
92 schedule_preempt_disabled(); 83 schedule_preempt_disabled();
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 31360cbbd5f8..e34f565f595a 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -57,8 +57,6 @@ void (*ia64_mark_idle)(int);
57 57
58unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; 58unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
59EXPORT_SYMBOL(boot_option_idle_override); 59EXPORT_SYMBOL(boot_option_idle_override);
60void (*pm_idle) (void);
61EXPORT_SYMBOL(pm_idle);
62void (*pm_power_off) (void); 60void (*pm_power_off) (void);
63EXPORT_SYMBOL(pm_power_off); 61EXPORT_SYMBOL(pm_power_off);
64 62
@@ -301,7 +299,6 @@ cpu_idle (void)
301 if (mark_idle) 299 if (mark_idle)
302 (*mark_idle)(1); 300 (*mark_idle)(1);
303 301
304 idle = pm_idle;
305 if (!idle) 302 if (!idle)
306 idle = default_idle; 303 idle = default_idle;
307 (*idle)(); 304 (*idle)();
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index aaefd9b94f2f..2029cc0d2fc6 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -1051,7 +1051,6 @@ cpu_init (void)
1051 max_num_phys_stacked = num_phys_stacked; 1051 max_num_phys_stacked = num_phys_stacked;
1052 } 1052 }
1053 platform_cpu_init(); 1053 platform_cpu_init();
1054 pm_idle = default_idle;
1055} 1054}
1056 1055
1057void __init 1056void __init
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index 765d0f57c787..bde899e155d3 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -44,36 +44,10 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
44 return tsk->thread.lr; 44 return tsk->thread.lr;
45} 45}
46 46
47/*
48 * Powermanagement idle function, if any..
49 */
50static void (*pm_idle)(void) = NULL;
51
52void (*pm_power_off)(void) = NULL; 47void (*pm_power_off)(void) = NULL;
53EXPORT_SYMBOL(pm_power_off); 48EXPORT_SYMBOL(pm_power_off);
54 49
55/* 50/*
56 * We use this is we don't have any better
57 * idle routine..
58 */
59static void default_idle(void)
60{
61 /* M32R_FIXME: Please use "cpu_sleep" mode. */
62 cpu_relax();
63}
64
65/*
66 * On SMP it's slightly faster (but much more power-consuming!)
67 * to poll the ->work.need_resched flag instead of waiting for the
68 * cross-CPU IPI to arrive. Use this option with caution.
69 */
70static void poll_idle (void)
71{
72 /* M32R_FIXME */
73 cpu_relax();
74}
75
76/*
77 * The idle thread. There's no useful work to be 51 * The idle thread. There's no useful work to be
78 * done, so just try to conserve power and have a 52 * done, so just try to conserve power and have a
79 * low exit latency (ie sit in a loop waiting for 53 * low exit latency (ie sit in a loop waiting for
@@ -84,14 +58,8 @@ void cpu_idle (void)
84 /* endless idle loop with no priority at all */ 58 /* endless idle loop with no priority at all */
85 while (1) { 59 while (1) {
86 rcu_idle_enter(); 60 rcu_idle_enter();
87 while (!need_resched()) { 61 while (!need_resched())
88 void (*idle)(void) = pm_idle; 62 cpu_relax();
89
90 if (!idle)
91 idle = default_idle;
92
93 idle();
94 }
95 rcu_idle_exit(); 63 rcu_idle_exit();
96 schedule_preempt_disabled(); 64 schedule_preempt_disabled();
97 } 65 }
@@ -120,21 +88,6 @@ void machine_power_off(void)
120 /* M32R_FIXME */ 88 /* M32R_FIXME */
121} 89}
122 90
123static int __init idle_setup (char *str)
124{
125 if (!strncmp(str, "poll", 4)) {
126 printk("using poll in idle threads.\n");
127 pm_idle = poll_idle;
128 } else if (!strncmp(str, "sleep", 4)) {
129 printk("using sleep in idle threads.\n");
130 pm_idle = default_idle;
131 }
132
133 return 1;
134}
135
136__setup("idle=", idle_setup);
137
138void show_regs(struct pt_regs * regs) 91void show_regs(struct pt_regs * regs)
139{ 92{
140 printk("\n"); 93 printk("\n");
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index a5b74f729e5b..6ff2dcff3410 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -41,7 +41,6 @@ void show_regs(struct pt_regs *regs)
41 regs->msr, regs->ear, regs->esr, regs->fsr); 41 regs->msr, regs->ear, regs->esr, regs->fsr);
42} 42}
43 43
44void (*pm_idle)(void);
45void (*pm_power_off)(void) = NULL; 44void (*pm_power_off)(void) = NULL;
46EXPORT_SYMBOL(pm_power_off); 45EXPORT_SYMBOL(pm_power_off);
47 46
@@ -98,8 +97,6 @@ void cpu_idle(void)
98 97
99 /* endless idle loop with no priority at all */ 98 /* endless idle loop with no priority at all */
100 while (1) { 99 while (1) {
101 void (*idle)(void) = pm_idle;
102
103 if (!idle) 100 if (!idle)
104 idle = default_idle; 101 idle = default_idle;
105 102
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index eb09f5a552ff..84f4e97e3074 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -37,12 +37,6 @@
37#include "internal.h" 37#include "internal.h"
38 38
39/* 39/*
40 * power management idle function, if any..
41 */
42void (*pm_idle)(void);
43EXPORT_SYMBOL(pm_idle);
44
45/*
46 * return saved PC of a blocked thread. 40 * return saved PC of a blocked thread.
47 */ 41 */
48unsigned long thread_saved_pc(struct task_struct *tsk) 42unsigned long thread_saved_pc(struct task_struct *tsk)
@@ -113,7 +107,6 @@ void cpu_idle(void)
113 void (*idle)(void); 107 void (*idle)(void);
114 108
115 smp_rmb(); 109 smp_rmb();
116 idle = pm_idle;
117 if (!idle) { 110 if (!idle) {
118#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU) 111#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
119 idle = poll_idle; 112 idle = poll_idle;
diff --git a/arch/openrisc/kernel/idle.c b/arch/openrisc/kernel/idle.c
index 7d618feb1b72..5e8a3b6d6bc6 100644
--- a/arch/openrisc/kernel/idle.c
+++ b/arch/openrisc/kernel/idle.c
@@ -39,11 +39,6 @@
39 39
40void (*powersave) (void) = NULL; 40void (*powersave) (void) = NULL;
41 41
42static inline void pm_idle(void)
43{
44 barrier();
45}
46
47void cpu_idle(void) 42void cpu_idle(void)
48{ 43{
49 set_thread_flag(TIF_POLLING_NRFLAG); 44 set_thread_flag(TIF_POLLING_NRFLAG);
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index 0c910163caa3..3d5a1b387cc0 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -22,7 +22,7 @@
22#include <asm/smp.h> 22#include <asm/smp.h>
23#include <asm/bl_bit.h> 23#include <asm/bl_bit.h>
24 24
25void (*pm_idle)(void); 25static void (*sh_idle)(void);
26 26
27static int hlt_counter; 27static int hlt_counter;
28 28
@@ -103,9 +103,9 @@ void cpu_idle(void)
103 /* Don't trace irqs off for idle */ 103 /* Don't trace irqs off for idle */
104 stop_critical_timings(); 104 stop_critical_timings();
105 if (cpuidle_idle_call()) 105 if (cpuidle_idle_call())
106 pm_idle(); 106 sh_idle();
107 /* 107 /*
108 * Sanity check to ensure that pm_idle() returns 108 * Sanity check to ensure that sh_idle() returns
109 * with IRQs enabled 109 * with IRQs enabled
110 */ 110 */
111 WARN_ON(irqs_disabled()); 111 WARN_ON(irqs_disabled());
@@ -123,13 +123,13 @@ void __init select_idle_routine(void)
123 /* 123 /*
124 * If a platform has set its own idle routine, leave it alone. 124 * If a platform has set its own idle routine, leave it alone.
125 */ 125 */
126 if (pm_idle) 126 if (sh_idle)
127 return; 127 return;
128 128
129 if (hlt_works()) 129 if (hlt_works())
130 pm_idle = default_idle; 130 sh_idle = default_idle;
131 else 131 else
132 pm_idle = poll_idle; 132 sh_idle = poll_idle;
133} 133}
134 134
135void stop_this_cpu(void *unused) 135void stop_this_cpu(void *unused)
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index c1e01914fd98..2c7baa4c4505 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -118,6 +118,7 @@ extern unsigned long get_wchan(struct task_struct *);
118extern struct task_struct *last_task_used_math; 118extern struct task_struct *last_task_used_math;
119 119
120#define cpu_relax() barrier() 120#define cpu_relax() barrier()
121extern void (*sparc_idle)(void);
121 122
122#endif 123#endif
123 124
diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c
index 348fa1aeabce..eefda32b595e 100644
--- a/arch/sparc/kernel/apc.c
+++ b/arch/sparc/kernel/apc.c
@@ -20,6 +20,7 @@
20#include <asm/uaccess.h> 20#include <asm/uaccess.h>
21#include <asm/auxio.h> 21#include <asm/auxio.h>
22#include <asm/apc.h> 22#include <asm/apc.h>
23#include <asm/processor.h>
23 24
24/* Debugging 25/* Debugging
25 * 26 *
@@ -158,7 +159,7 @@ static int apc_probe(struct platform_device *op)
158 159
159 /* Assign power management IDLE handler */ 160 /* Assign power management IDLE handler */
160 if (!apc_no_idle) 161 if (!apc_no_idle)
161 pm_idle = apc_swift_idle; 162 sparc_idle = apc_swift_idle;
162 163
163 printk(KERN_INFO "%s: power management initialized%s\n", 164 printk(KERN_INFO "%s: power management initialized%s\n",
164 APC_DEVNAME, apc_no_idle ? " (CPU idle disabled)" : ""); 165 APC_DEVNAME, apc_no_idle ? " (CPU idle disabled)" : "");
diff --git a/arch/sparc/kernel/leon_pmc.c b/arch/sparc/kernel/leon_pmc.c
index 4e174321097d..708bca435219 100644
--- a/arch/sparc/kernel/leon_pmc.c
+++ b/arch/sparc/kernel/leon_pmc.c
@@ -9,6 +9,7 @@
9#include <asm/leon_amba.h> 9#include <asm/leon_amba.h>
10#include <asm/cpu_type.h> 10#include <asm/cpu_type.h>
11#include <asm/leon.h> 11#include <asm/leon.h>
12#include <asm/processor.h>
12 13
13/* List of Systems that need fixup instructions around power-down instruction */ 14/* List of Systems that need fixup instructions around power-down instruction */
14unsigned int pmc_leon_fixup_ids[] = { 15unsigned int pmc_leon_fixup_ids[] = {
@@ -69,9 +70,9 @@ static int __init leon_pmc_install(void)
69 if (sparc_cpu_model == sparc_leon) { 70 if (sparc_cpu_model == sparc_leon) {
70 /* Assign power management IDLE handler */ 71 /* Assign power management IDLE handler */
71 if (pmc_leon_need_fixup()) 72 if (pmc_leon_need_fixup())
72 pm_idle = pmc_leon_idle_fixup; 73 sparc_idle = pmc_leon_idle_fixup;
73 else 74 else
74 pm_idle = pmc_leon_idle; 75 sparc_idle = pmc_leon_idle;
75 76
76 printk(KERN_INFO "leon: power management initialized\n"); 77 printk(KERN_INFO "leon: power management initialized\n");
77 } 78 }
diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c
index dcbb62f63068..8b7297faca79 100644
--- a/arch/sparc/kernel/pmc.c
+++ b/arch/sparc/kernel/pmc.c
@@ -17,6 +17,7 @@
17#include <asm/oplib.h> 17#include <asm/oplib.h>
18#include <asm/uaccess.h> 18#include <asm/uaccess.h>
19#include <asm/auxio.h> 19#include <asm/auxio.h>
20#include <asm/processor.h>
20 21
21/* Debug 22/* Debug
22 * 23 *
@@ -63,7 +64,7 @@ static int pmc_probe(struct platform_device *op)
63 64
64#ifndef PMC_NO_IDLE 65#ifndef PMC_NO_IDLE
65 /* Assign power management IDLE handler */ 66 /* Assign power management IDLE handler */
66 pm_idle = pmc_swift_idle; 67 sparc_idle = pmc_swift_idle;
67#endif 68#endif
68 69
69 printk(KERN_INFO "%s: power management initialized\n", PMC_DEVNAME); 70 printk(KERN_INFO "%s: power management initialized\n", PMC_DEVNAME);
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index be8e862badaf..62eede13831a 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -43,8 +43,7 @@
43 * Power management idle function 43 * Power management idle function
44 * Set in pm platform drivers (apc.c and pmc.c) 44 * Set in pm platform drivers (apc.c and pmc.c)
45 */ 45 */
46void (*pm_idle)(void); 46void (*sparc_idle)(void);
47EXPORT_SYMBOL(pm_idle);
48 47
49/* 48/*
50 * Power-off handler instantiation for pm.h compliance 49 * Power-off handler instantiation for pm.h compliance
@@ -75,8 +74,8 @@ void cpu_idle(void)
75 /* endless idle loop with no priority at all */ 74 /* endless idle loop with no priority at all */
76 for (;;) { 75 for (;;) {
77 while (!need_resched()) { 76 while (!need_resched()) {
78 if (pm_idle) 77 if (sparc_idle)
79 (*pm_idle)(); 78 (*sparc_idle)();
80 else 79 else
81 cpu_relax(); 80 cpu_relax();
82 } 81 }
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index 62bad9fed03e..872d7e22d847 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -45,11 +45,6 @@ static const char * const processor_modes[] = {
45 "UK18", "UK19", "UK1A", "EXTN", "UK1C", "UK1D", "UK1E", "SUSR" 45 "UK18", "UK19", "UK1A", "EXTN", "UK1C", "UK1D", "UK1E", "SUSR"
46}; 46};
47 47
48/*
49 * The idle thread, has rather strange semantics for calling pm_idle,
50 * but this is what x86 does and we need to do the same, so that
51 * things like cpuidle get called in the same way.
52 */
53void cpu_idle(void) 48void cpu_idle(void)
54{ 49{
55 /* endless idle loop with no priority at all */ 50 /* endless idle loop with no priority at all */
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4f7c2da2f9f8..c03309f697f1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1922,6 +1922,7 @@ config APM_DO_ENABLE
1922 this feature. 1922 this feature.
1923 1923
1924config APM_CPU_IDLE 1924config APM_CPU_IDLE
1925 depends on CPU_IDLE
1925 bool "Make CPU Idle calls when idle" 1926 bool "Make CPU Idle calls when idle"
1926 ---help--- 1927 ---help---
1927 Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. 1928 Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index bcdff997668c..2f366d0ac6b4 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -4,7 +4,8 @@
4#define MWAIT_SUBSTATE_MASK 0xf 4#define MWAIT_SUBSTATE_MASK 0xf
5#define MWAIT_CSTATE_MASK 0xf 5#define MWAIT_CSTATE_MASK 0xf
6#define MWAIT_SUBSTATE_SIZE 4 6#define MWAIT_SUBSTATE_SIZE 4
7#define MWAIT_MAX_NUM_CSTATES 8 7#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
8#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
8 9
9#define CPUID_MWAIT_LEAF 5 10#define CPUID_MWAIT_LEAF 5
10#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 11#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 888184b2fc85..b9e7d279f8ef 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -89,7 +89,6 @@ struct cpuinfo_x86 {
89 char wp_works_ok; /* It doesn't on 386's */ 89 char wp_works_ok; /* It doesn't on 386's */
90 90
91 /* Problems on some 486Dx4's and old 386's: */ 91 /* Problems on some 486Dx4's and old 386's: */
92 char hlt_works_ok;
93 char hard_math; 92 char hard_math;
94 char rfu; 93 char rfu;
95 char fdiv_bug; 94 char fdiv_bug;
@@ -165,15 +164,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
165 164
166extern const struct seq_operations cpuinfo_op; 165extern const struct seq_operations cpuinfo_op;
167 166
168static inline int hlt_works(int cpu)
169{
170#ifdef CONFIG_X86_32
171 return cpu_data(cpu).hlt_works_ok;
172#else
173 return 1;
174#endif
175}
176
177#define cache_line_size() (boot_cpu_data.x86_cache_alignment) 167#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
178 168
179extern void cpu_detect(struct cpuinfo_x86 *c); 169extern void cpu_detect(struct cpuinfo_x86 *c);
@@ -725,7 +715,7 @@ extern unsigned long boot_option_idle_override;
725extern bool amd_e400_c1e_detected; 715extern bool amd_e400_c1e_detected;
726 716
727enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, 717enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
728 IDLE_POLL, IDLE_FORCE_MWAIT}; 718 IDLE_POLL};
729 719
730extern void enable_sep_cpu(void); 720extern void enable_sep_cpu(void);
731extern int sysenter_setup(void); 721extern int sysenter_setup(void);
@@ -998,7 +988,11 @@ extern unsigned long arch_align_stack(unsigned long sp);
998extern void free_init_pages(char *what, unsigned long begin, unsigned long end); 988extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
999 989
1000void default_idle(void); 990void default_idle(void);
1001bool set_pm_idle_to_default(void); 991#ifdef CONFIG_XEN
992bool xen_set_default_idle(void);
993#else
994#define xen_set_default_idle 0
995#endif
1002 996
1003void stop_this_cpu(void *dummy); 997void stop_this_cpu(void *dummy);
1004 998
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 433a59fb1a74..8d013f5153bc 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -103,6 +103,8 @@
103#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) 103#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
104#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) 104#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
105 105
106#define MSR_IA32_POWER_CTL 0x000001fc
107
106#define MSR_IA32_MC0_CTL 0x00000400 108#define MSR_IA32_MC0_CTL 0x00000400
107#define MSR_IA32_MC0_STATUS 0x00000401 109#define MSR_IA32_MC0_STATUS 0x00000401
108#define MSR_IA32_MC0_ADDR 0x00000402 110#define MSR_IA32_MC0_ADDR 0x00000402
@@ -272,6 +274,7 @@
272#define MSR_IA32_PLATFORM_ID 0x00000017 274#define MSR_IA32_PLATFORM_ID 0x00000017
273#define MSR_IA32_EBL_CR_POWERON 0x0000002a 275#define MSR_IA32_EBL_CR_POWERON 0x0000002a
274#define MSR_EBC_FREQUENCY_ID 0x0000002c 276#define MSR_EBC_FREQUENCY_ID 0x0000002c
277#define MSR_SMI_COUNT 0x00000034
275#define MSR_IA32_FEATURE_CONTROL 0x0000003a 278#define MSR_IA32_FEATURE_CONTROL 0x0000003a
276#define MSR_IA32_TSC_ADJUST 0x0000003b 279#define MSR_IA32_TSC_ADJUST 0x0000003b
277 280
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index d65464e43503..9f4bc6a1164d 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -232,6 +232,7 @@
232#include <linux/acpi.h> 232#include <linux/acpi.h>
233#include <linux/syscore_ops.h> 233#include <linux/syscore_ops.h>
234#include <linux/i8253.h> 234#include <linux/i8253.h>
235#include <linux/cpuidle.h>
235 236
236#include <asm/uaccess.h> 237#include <asm/uaccess.h>
237#include <asm/desc.h> 238#include <asm/desc.h>
@@ -360,13 +361,35 @@ struct apm_user {
360 * idle percentage above which bios idle calls are done 361 * idle percentage above which bios idle calls are done
361 */ 362 */
362#ifdef CONFIG_APM_CPU_IDLE 363#ifdef CONFIG_APM_CPU_IDLE
363#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012
364#define DEFAULT_IDLE_THRESHOLD 95 364#define DEFAULT_IDLE_THRESHOLD 95
365#else 365#else
366#define DEFAULT_IDLE_THRESHOLD 100 366#define DEFAULT_IDLE_THRESHOLD 100
367#endif 367#endif
368#define DEFAULT_IDLE_PERIOD (100 / 3) 368#define DEFAULT_IDLE_PERIOD (100 / 3)
369 369
370static int apm_cpu_idle(struct cpuidle_device *dev,
371 struct cpuidle_driver *drv, int index);
372
373static struct cpuidle_driver apm_idle_driver = {
374 .name = "apm_idle",
375 .owner = THIS_MODULE,
376 .en_core_tk_irqen = 1,
377 .states = {
378 { /* entry 0 is for polling */ },
379 { /* entry 1 is for APM idle */
380 .name = "APM",
381 .desc = "APM idle",
382 .flags = CPUIDLE_FLAG_TIME_VALID,
383 .exit_latency = 250, /* WAG */
384 .target_residency = 500, /* WAG */
385 .enter = &apm_cpu_idle
386 },
387 },
388 .state_count = 2,
389};
390
391static struct cpuidle_device apm_cpuidle_device;
392
370/* 393/*
371 * Local variables 394 * Local variables
372 */ 395 */
@@ -377,7 +400,6 @@ static struct {
377static int clock_slowed; 400static int clock_slowed;
378static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; 401static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD;
379static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; 402static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD;
380static int set_pm_idle;
381static int suspends_pending; 403static int suspends_pending;
382static int standbys_pending; 404static int standbys_pending;
383static int ignore_sys_suspend; 405static int ignore_sys_suspend;
@@ -884,8 +906,6 @@ static void apm_do_busy(void)
884#define IDLE_CALC_LIMIT (HZ * 100) 906#define IDLE_CALC_LIMIT (HZ * 100)
885#define IDLE_LEAKY_MAX 16 907#define IDLE_LEAKY_MAX 16
886 908
887static void (*original_pm_idle)(void) __read_mostly;
888
889/** 909/**
890 * apm_cpu_idle - cpu idling for APM capable Linux 910 * apm_cpu_idle - cpu idling for APM capable Linux
891 * 911 *
@@ -894,7 +914,8 @@ static void (*original_pm_idle)(void) __read_mostly;
894 * Furthermore it calls the system default idle routine. 914 * Furthermore it calls the system default idle routine.
895 */ 915 */
896 916
897static void apm_cpu_idle(void) 917static int apm_cpu_idle(struct cpuidle_device *dev,
918 struct cpuidle_driver *drv, int index)
898{ 919{
899 static int use_apm_idle; /* = 0 */ 920 static int use_apm_idle; /* = 0 */
900 static unsigned int last_jiffies; /* = 0 */ 921 static unsigned int last_jiffies; /* = 0 */
@@ -904,7 +925,6 @@ static void apm_cpu_idle(void)
904 unsigned int jiffies_since_last_check = jiffies - last_jiffies; 925 unsigned int jiffies_since_last_check = jiffies - last_jiffies;
905 unsigned int bucket; 926 unsigned int bucket;
906 927
907 WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
908recalc: 928recalc:
909 if (jiffies_since_last_check > IDLE_CALC_LIMIT) { 929 if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
910 use_apm_idle = 0; 930 use_apm_idle = 0;
@@ -950,10 +970,7 @@ recalc:
950 break; 970 break;
951 } 971 }
952 } 972 }
953 if (original_pm_idle) 973 default_idle();
954 original_pm_idle();
955 else
956 default_idle();
957 local_irq_disable(); 974 local_irq_disable();
958 jiffies_since_last_check = jiffies - last_jiffies; 975 jiffies_since_last_check = jiffies - last_jiffies;
959 if (jiffies_since_last_check > idle_period) 976 if (jiffies_since_last_check > idle_period)
@@ -963,7 +980,7 @@ recalc:
963 if (apm_idle_done) 980 if (apm_idle_done)
964 apm_do_busy(); 981 apm_do_busy();
965 982
966 local_irq_enable(); 983 return index;
967} 984}
968 985
969/** 986/**
@@ -2381,9 +2398,9 @@ static int __init apm_init(void)
2381 if (HZ != 100) 2398 if (HZ != 100)
2382 idle_period = (idle_period * HZ) / 100; 2399 idle_period = (idle_period * HZ) / 100;
2383 if (idle_threshold < 100) { 2400 if (idle_threshold < 100) {
2384 original_pm_idle = pm_idle; 2401 if (!cpuidle_register_driver(&apm_idle_driver))
2385 pm_idle = apm_cpu_idle; 2402 if (cpuidle_register_device(&apm_cpuidle_device))
2386 set_pm_idle = 1; 2403 cpuidle_unregister_driver(&apm_idle_driver);
2387 } 2404 }
2388 2405
2389 return 0; 2406 return 0;
@@ -2393,15 +2410,9 @@ static void __exit apm_exit(void)
2393{ 2410{
2394 int error; 2411 int error;
2395 2412
2396 if (set_pm_idle) { 2413 cpuidle_unregister_device(&apm_cpuidle_device);
2397 pm_idle = original_pm_idle; 2414 cpuidle_unregister_driver(&apm_idle_driver);
2398 /* 2415
2399 * We are about to unload the current idle thread pm callback
2400 * (pm_idle), Wait for all processors to update cached/local
2401 * copies of pm_idle before proceeding.
2402 */
2403 kick_all_cpus_sync();
2404 }
2405 if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) 2416 if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
2406 && (apm_info.connection_version > 0x0100)) { 2417 && (apm_info.connection_version > 0x0100)) {
2407 error = apm_engage_power_management(APM_DEVICE_ALL, 0); 2418 error = apm_engage_power_management(APM_DEVICE_ALL, 0);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 92dfec986a48..af6455e3fcc9 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -17,15 +17,6 @@
17#include <asm/paravirt.h> 17#include <asm/paravirt.h>
18#include <asm/alternative.h> 18#include <asm/alternative.h>
19 19
20static int __init no_halt(char *s)
21{
22 WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n");
23 boot_cpu_data.hlt_works_ok = 0;
24 return 1;
25}
26
27__setup("no-hlt", no_halt);
28
29static int __init no_387(char *s) 20static int __init no_387(char *s)
30{ 21{
31 boot_cpu_data.hard_math = 0; 22 boot_cpu_data.hard_math = 0;
@@ -89,23 +80,6 @@ static void __init check_fpu(void)
89 pr_warn("Hmm, FPU with FDIV bug\n"); 80 pr_warn("Hmm, FPU with FDIV bug\n");
90} 81}
91 82
92static void __init check_hlt(void)
93{
94 if (boot_cpu_data.x86 >= 5 || paravirt_enabled())
95 return;
96
97 pr_info("Checking 'hlt' instruction... ");
98 if (!boot_cpu_data.hlt_works_ok) {
99 pr_cont("disabled\n");
100 return;
101 }
102 halt();
103 halt();
104 halt();
105 halt();
106 pr_cont("OK\n");
107}
108
109/* 83/*
110 * Check whether we are able to run this kernel safely on SMP. 84 * Check whether we are able to run this kernel safely on SMP.
111 * 85 *
@@ -129,7 +103,6 @@ void __init check_bugs(void)
129 print_cpu_info(&boot_cpu_data); 103 print_cpu_info(&boot_cpu_data);
130#endif 104#endif
131 check_config(); 105 check_config();
132 check_hlt();
133 init_utsname()->machine[1] = 106 init_utsname()->machine[1] =
134 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); 107 '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
135 alternative_instructions(); 108 alternative_instructions();
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 3286a92e662a..e280253f6f94 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -28,7 +28,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
28{ 28{
29 seq_printf(m, 29 seq_printf(m,
30 "fdiv_bug\t: %s\n" 30 "fdiv_bug\t: %s\n"
31 "hlt_bug\t\t: %s\n"
32 "f00f_bug\t: %s\n" 31 "f00f_bug\t: %s\n"
33 "coma_bug\t: %s\n" 32 "coma_bug\t: %s\n"
34 "fpu\t\t: %s\n" 33 "fpu\t\t: %s\n"
@@ -36,7 +35,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
36 "cpuid level\t: %d\n" 35 "cpuid level\t: %d\n"
37 "wp\t\t: %s\n", 36 "wp\t\t: %s\n",
38 c->fdiv_bug ? "yes" : "no", 37 c->fdiv_bug ? "yes" : "no",
39 c->hlt_works_ok ? "no" : "yes",
40 c->f00f_bug ? "yes" : "no", 38 c->f00f_bug ? "yes" : "no",
41 c->coma_bug ? "yes" : "no", 39 c->coma_bug ? "yes" : "no",
42 c->hard_math ? "yes" : "no", 40 c->hard_math ? "yes" : "no",
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index dcfc1f410dc4..14ae10031ff0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -268,13 +268,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
268unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; 268unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
269EXPORT_SYMBOL(boot_option_idle_override); 269EXPORT_SYMBOL(boot_option_idle_override);
270 270
271/* 271static void (*x86_idle)(void);
272 * Powermanagement idle function, if any..
273 */
274void (*pm_idle)(void);
275#ifdef CONFIG_APM_MODULE
276EXPORT_SYMBOL(pm_idle);
277#endif
278 272
279#ifndef CONFIG_SMP 273#ifndef CONFIG_SMP
280static inline void play_dead(void) 274static inline void play_dead(void)
@@ -351,7 +345,7 @@ void cpu_idle(void)
351 rcu_idle_enter(); 345 rcu_idle_enter();
352 346
353 if (cpuidle_idle_call()) 347 if (cpuidle_idle_call())
354 pm_idle(); 348 x86_idle();
355 349
356 rcu_idle_exit(); 350 rcu_idle_exit();
357 start_critical_timings(); 351 start_critical_timings();
@@ -394,14 +388,16 @@ void default_idle(void)
394EXPORT_SYMBOL(default_idle); 388EXPORT_SYMBOL(default_idle);
395#endif 389#endif
396 390
397bool set_pm_idle_to_default(void) 391#ifdef CONFIG_XEN
392bool xen_set_default_idle(void)
398{ 393{
399 bool ret = !!pm_idle; 394 bool ret = !!x86_idle;
400 395
401 pm_idle = default_idle; 396 x86_idle = default_idle;
402 397
403 return ret; 398 return ret;
404} 399}
400#endif
405void stop_this_cpu(void *dummy) 401void stop_this_cpu(void *dummy)
406{ 402{
407 local_irq_disable(); 403 local_irq_disable();
@@ -411,29 +407,8 @@ void stop_this_cpu(void *dummy)
411 set_cpu_online(smp_processor_id(), false); 407 set_cpu_online(smp_processor_id(), false);
412 disable_local_APIC(); 408 disable_local_APIC();
413 409
414 for (;;) { 410 for (;;)
415 if (hlt_works(smp_processor_id())) 411 halt();
416 halt();
417 }
418}
419
420/* Default MONITOR/MWAIT with no hints, used for default C1 state */
421static void mwait_idle(void)
422{
423 if (!need_resched()) {
424 trace_cpu_idle_rcuidle(1, smp_processor_id());
425 if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
426 clflush((void *)&current_thread_info()->flags);
427
428 __monitor((void *)&current_thread_info()->flags, 0, 0);
429 smp_mb();
430 if (!need_resched())
431 __sti_mwait(0, 0);
432 else
433 local_irq_enable();
434 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
435 } else
436 local_irq_enable();
437} 412}
438 413
439/* 414/*
@@ -450,53 +425,6 @@ static void poll_idle(void)
450 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); 425 trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
451} 426}
452 427
453/*
454 * mwait selection logic:
455 *
456 * It depends on the CPU. For AMD CPUs that support MWAIT this is
457 * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
458 * then depend on a clock divisor and current Pstate of the core. If
459 * all cores of a processor are in halt state (C1) the processor can
460 * enter the C1E (C1 enhanced) state. If mwait is used this will never
461 * happen.
462 *
463 * idle=mwait overrides this decision and forces the usage of mwait.
464 */
465
466#define MWAIT_INFO 0x05
467#define MWAIT_ECX_EXTENDED_INFO 0x01
468#define MWAIT_EDX_C1 0xf0
469
470int mwait_usable(const struct cpuinfo_x86 *c)
471{
472 u32 eax, ebx, ecx, edx;
473
474 /* Use mwait if idle=mwait boot option is given */
475 if (boot_option_idle_override == IDLE_FORCE_MWAIT)
476 return 1;
477
478 /*
479 * Any idle= boot option other than idle=mwait means that we must not
480 * use mwait. Eg: idle=halt or idle=poll or idle=nomwait
481 */
482 if (boot_option_idle_override != IDLE_NO_OVERRIDE)
483 return 0;
484
485 if (c->cpuid_level < MWAIT_INFO)
486 return 0;
487
488 cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx);
489 /* Check, whether EDX has extended info about MWAIT */
490 if (!(ecx & MWAIT_ECX_EXTENDED_INFO))
491 return 1;
492
493 /*
494 * edx enumeratios MONITOR/MWAIT extensions. Check, whether
495 * C1 supports MWAIT
496 */
497 return (edx & MWAIT_EDX_C1);
498}
499
500bool amd_e400_c1e_detected; 428bool amd_e400_c1e_detected;
501EXPORT_SYMBOL(amd_e400_c1e_detected); 429EXPORT_SYMBOL(amd_e400_c1e_detected);
502 430
@@ -561,31 +489,24 @@ static void amd_e400_idle(void)
561void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 489void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
562{ 490{
563#ifdef CONFIG_SMP 491#ifdef CONFIG_SMP
564 if (pm_idle == poll_idle && smp_num_siblings > 1) { 492 if (x86_idle == poll_idle && smp_num_siblings > 1)
565 pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); 493 pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
566 }
567#endif 494#endif
568 if (pm_idle) 495 if (x86_idle)
569 return; 496 return;
570 497
571 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { 498 if (cpu_has_amd_erratum(amd_erratum_400)) {
572 /*
573 * One CPU supports mwait => All CPUs supports mwait
574 */
575 pr_info("using mwait in idle threads\n");
576 pm_idle = mwait_idle;
577 } else if (cpu_has_amd_erratum(amd_erratum_400)) {
578 /* E400: APIC timer interrupt does not wake up CPU from C1e */ 499 /* E400: APIC timer interrupt does not wake up CPU from C1e */
579 pr_info("using AMD E400 aware idle routine\n"); 500 pr_info("using AMD E400 aware idle routine\n");
580 pm_idle = amd_e400_idle; 501 x86_idle = amd_e400_idle;
581 } else 502 } else
582 pm_idle = default_idle; 503 x86_idle = default_idle;
583} 504}
584 505
585void __init init_amd_e400_c1e_mask(void) 506void __init init_amd_e400_c1e_mask(void)
586{ 507{
587 /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ 508 /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
588 if (pm_idle == amd_e400_idle) 509 if (x86_idle == amd_e400_idle)
589 zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); 510 zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
590} 511}
591 512
@@ -596,11 +517,8 @@ static int __init idle_setup(char *str)
596 517
597 if (!strcmp(str, "poll")) { 518 if (!strcmp(str, "poll")) {
598 pr_info("using polling idle threads\n"); 519 pr_info("using polling idle threads\n");
599 pm_idle = poll_idle; 520 x86_idle = poll_idle;
600 boot_option_idle_override = IDLE_POLL; 521 boot_option_idle_override = IDLE_POLL;
601 } else if (!strcmp(str, "mwait")) {
602 boot_option_idle_override = IDLE_FORCE_MWAIT;
603 WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n");
604 } else if (!strcmp(str, "halt")) { 522 } else if (!strcmp(str, "halt")) {
605 /* 523 /*
606 * When the boot option of idle=halt is added, halt is 524 * When the boot option of idle=halt is added, halt is
@@ -609,7 +527,7 @@ static int __init idle_setup(char *str)
609 * To continue to load the CPU idle driver, don't touch 527 * To continue to load the CPU idle driver, don't touch
610 * the boot_option_idle_override. 528 * the boot_option_idle_override.
611 */ 529 */
612 pm_idle = default_idle; 530 x86_idle = default_idle;
613 boot_option_idle_override = IDLE_HALT; 531 boot_option_idle_override = IDLE_HALT;
614 } else if (!strcmp(str, "nomwait")) { 532 } else if (!strcmp(str, "nomwait")) {
615 /* 533 /*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ed0fe385289d..a6ceaedc396a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1369,7 +1369,7 @@ static inline void mwait_play_dead(void)
1369 void *mwait_ptr; 1369 void *mwait_ptr;
1370 struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); 1370 struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
1371 1371
1372 if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))) 1372 if (!this_cpu_has(X86_FEATURE_MWAIT))
1373 return; 1373 return;
1374 if (!this_cpu_has(X86_FEATURE_CLFLSH)) 1374 if (!this_cpu_has(X86_FEATURE_CLFLSH))
1375 return; 1375 return;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8971a26d21ab..94eac5c85cdc 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -556,12 +556,9 @@ void __init xen_arch_setup(void)
556 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); 556 COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
557 557
558 /* Set up idle, making sure it calls safe_halt() pvop */ 558 /* Set up idle, making sure it calls safe_halt() pvop */
559#ifdef CONFIG_X86_32
560 boot_cpu_data.hlt_works_ok = 1;
561#endif
562 disable_cpuidle(); 559 disable_cpuidle();
563 disable_cpufreq(); 560 disable_cpufreq();
564 WARN_ON(set_pm_idle_to_default()); 561 WARN_ON(xen_set_default_idle());
565 fiddle_vdso(); 562 fiddle_vdso();
566#ifdef CONFIG_NUMA 563#ifdef CONFIG_NUMA
567 numa_off = 1; 564 numa_off = 1;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index e606e3603d81..fc95308e9a11 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -28,19 +28,12 @@
28 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 28 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29 */ 29 */
30 30
31#include <linux/kernel.h>
32#include <linux/module.h> 31#include <linux/module.h>
33#include <linux/init.h>
34#include <linux/cpufreq.h>
35#include <linux/slab.h>
36#include <linux/acpi.h> 32#include <linux/acpi.h>
37#include <linux/dmi.h> 33#include <linux/dmi.h>
38#include <linux/moduleparam.h> 34#include <linux/sched.h> /* need_resched() */
39#include <linux/sched.h> /* need_resched() */
40#include <linux/pm_qos.h>
41#include <linux/clockchips.h> 35#include <linux/clockchips.h>
42#include <linux/cpuidle.h> 36#include <linux/cpuidle.h>
43#include <linux/irqflags.h>
44 37
45/* 38/*
46 * Include the apic definitions for x86 to have the APIC timer related defines 39 * Include the apic definitions for x86 to have the APIC timer related defines
@@ -52,23 +45,14 @@
52#include <asm/apic.h> 45#include <asm/apic.h>
53#endif 46#endif
54 47
55#include <asm/io.h>
56#include <asm/uaccess.h>
57
58#include <acpi/acpi_bus.h> 48#include <acpi/acpi_bus.h>
59#include <acpi/processor.h> 49#include <acpi/processor.h>
60#include <asm/processor.h>
61 50
62#define PREFIX "ACPI: " 51#define PREFIX "ACPI: "
63 52
64#define ACPI_PROCESSOR_CLASS "processor" 53#define ACPI_PROCESSOR_CLASS "processor"
65#define _COMPONENT ACPI_PROCESSOR_COMPONENT 54#define _COMPONENT ACPI_PROCESSOR_COMPONENT
66ACPI_MODULE_NAME("processor_idle"); 55ACPI_MODULE_NAME("processor_idle");
67#define PM_TIMER_TICK_NS (1000000000ULL/ACPI_PM_TIMER_FREQUENCY)
68#define C2_OVERHEAD 1 /* 1us */
69#define C3_OVERHEAD 1 /* 1us */
70#define PM_TIMER_TICKS_TO_US(p) \
71 (((p) * 1000)/(ACPI_PM_TIMER_FREQUENCY/1000))
72 56
73static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER; 57static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER;
74module_param(max_cstate, uint, 0000); 58module_param(max_cstate, uint, 0000);
@@ -82,10 +66,11 @@ module_param(latency_factor, uint, 0644);
82 66
83static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device); 67static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device);
84 68
69static struct acpi_processor_cx *acpi_cstate[CPUIDLE_STATE_MAX];
70
85static int disabled_by_idle_boot_param(void) 71static int disabled_by_idle_boot_param(void)
86{ 72{
87 return boot_option_idle_override == IDLE_POLL || 73 return boot_option_idle_override == IDLE_POLL ||
88 boot_option_idle_override == IDLE_FORCE_MWAIT ||
89 boot_option_idle_override == IDLE_HALT; 74 boot_option_idle_override == IDLE_HALT;
90} 75}
91 76
@@ -737,8 +722,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
737 struct cpuidle_driver *drv, int index) 722 struct cpuidle_driver *drv, int index)
738{ 723{
739 struct acpi_processor *pr; 724 struct acpi_processor *pr;
740 struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; 725 struct acpi_processor_cx *cx = acpi_cstate[index];
741 struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
742 726
743 pr = __this_cpu_read(processors); 727 pr = __this_cpu_read(processors);
744 728
@@ -761,8 +745,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
761 */ 745 */
762static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) 746static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
763{ 747{
764 struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; 748 struct acpi_processor_cx *cx = acpi_cstate[index];
765 struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
766 749
767 ACPI_FLUSH_CPU_CACHE(); 750 ACPI_FLUSH_CPU_CACHE();
768 751
@@ -792,8 +775,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
792 struct cpuidle_driver *drv, int index) 775 struct cpuidle_driver *drv, int index)
793{ 776{
794 struct acpi_processor *pr; 777 struct acpi_processor *pr;
795 struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; 778 struct acpi_processor_cx *cx = acpi_cstate[index];
796 struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
797 779
798 pr = __this_cpu_read(processors); 780 pr = __this_cpu_read(processors);
799 781
@@ -851,8 +833,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
851 struct cpuidle_driver *drv, int index) 833 struct cpuidle_driver *drv, int index)
852{ 834{
853 struct acpi_processor *pr; 835 struct acpi_processor *pr;
854 struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; 836 struct acpi_processor_cx *cx = acpi_cstate[index];
855 struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
856 837
857 pr = __this_cpu_read(processors); 838 pr = __this_cpu_read(processors);
858 839
@@ -944,13 +925,13 @@ struct cpuidle_driver acpi_idle_driver = {
944 * device i.e. per-cpu data 925 * device i.e. per-cpu data
945 * 926 *
946 * @pr: the ACPI processor 927 * @pr: the ACPI processor
928 * @dev : the cpuidle device
947 */ 929 */
948static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr) 930static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr,
931 struct cpuidle_device *dev)
949{ 932{
950 int i, count = CPUIDLE_DRIVER_STATE_START; 933 int i, count = CPUIDLE_DRIVER_STATE_START;
951 struct acpi_processor_cx *cx; 934 struct acpi_processor_cx *cx;
952 struct cpuidle_state_usage *state_usage;
953 struct cpuidle_device *dev = per_cpu(acpi_cpuidle_device, pr->id);
954 935
955 if (!pr->flags.power_setup_done) 936 if (!pr->flags.power_setup_done)
956 return -EINVAL; 937 return -EINVAL;
@@ -969,7 +950,6 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr)
969 950
970 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) { 951 for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
971 cx = &pr->power.states[i]; 952 cx = &pr->power.states[i];
972 state_usage = &dev->states_usage[count];
973 953
974 if (!cx->valid) 954 if (!cx->valid)
975 continue; 955 continue;
@@ -980,8 +960,7 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr)
980 !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) 960 !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
981 continue; 961 continue;
982#endif 962#endif
983 963 acpi_cstate[count] = cx;
984 cpuidle_set_statedata(state_usage, cx);
985 964
986 count++; 965 count++;
987 if (count == CPUIDLE_STATE_MAX) 966 if (count == CPUIDLE_STATE_MAX)
@@ -1105,7 +1084,7 @@ int acpi_processor_hotplug(struct acpi_processor *pr)
1105 cpuidle_disable_device(dev); 1084 cpuidle_disable_device(dev);
1106 acpi_processor_get_power_info(pr); 1085 acpi_processor_get_power_info(pr);
1107 if (pr->flags.power) { 1086 if (pr->flags.power) {
1108 acpi_processor_setup_cpuidle_cx(pr); 1087 acpi_processor_setup_cpuidle_cx(pr, dev);
1109 ret = cpuidle_enable_device(dev); 1088 ret = cpuidle_enable_device(dev);
1110 } 1089 }
1111 cpuidle_resume_and_unlock(); 1090 cpuidle_resume_and_unlock();
@@ -1163,8 +1142,8 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr)
1163 continue; 1142 continue;
1164 acpi_processor_get_power_info(_pr); 1143 acpi_processor_get_power_info(_pr);
1165 if (_pr->flags.power) { 1144 if (_pr->flags.power) {
1166 acpi_processor_setup_cpuidle_cx(_pr);
1167 dev = per_cpu(acpi_cpuidle_device, cpu); 1145 dev = per_cpu(acpi_cpuidle_device, cpu);
1146 acpi_processor_setup_cpuidle_cx(_pr, dev);
1168 cpuidle_enable_device(dev); 1147 cpuidle_enable_device(dev);
1169 } 1148 }
1170 } 1149 }
@@ -1233,7 +1212,7 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr)
1233 return -ENOMEM; 1212 return -ENOMEM;
1234 per_cpu(acpi_cpuidle_device, pr->id) = dev; 1213 per_cpu(acpi_cpuidle_device, pr->id) = dev;
1235 1214
1236 acpi_processor_setup_cpuidle_cx(pr); 1215 acpi_processor_setup_cpuidle_cx(pr, dev);
1237 1216
1238 /* Register per-cpu cpuidle_device. Cpuidle driver 1217 /* Register per-cpu cpuidle_device. Cpuidle driver
1239 * must already be registered before registering device 1218 * must already be registered before registering device
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 2df9414a72f7..5d6675013864 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -74,7 +74,7 @@ static struct cpuidle_driver intel_idle_driver = {
74 .en_core_tk_irqen = 1, 74 .en_core_tk_irqen = 1,
75}; 75};
76/* intel_idle.max_cstate=0 disables driver */ 76/* intel_idle.max_cstate=0 disables driver */
77static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1; 77static int max_cstate = CPUIDLE_STATE_MAX - 1;
78 78
79static unsigned int mwait_substates; 79static unsigned int mwait_substates;
80 80
@@ -90,6 +90,7 @@ struct idle_cpu {
90 * Indicate which enable bits to clear here. 90 * Indicate which enable bits to clear here.
91 */ 91 */
92 unsigned long auto_demotion_disable_flags; 92 unsigned long auto_demotion_disable_flags;
93 bool disable_promotion_to_c1e;
93}; 94};
94 95
95static const struct idle_cpu *icpu; 96static const struct idle_cpu *icpu;
@@ -109,162 +110,206 @@ static struct cpuidle_state *cpuidle_state_table;
109#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 110#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000
110 111
111/* 112/*
113 * MWAIT takes an 8-bit "hint" in EAX "suggesting"
114 * the C-state (top nibble) and sub-state (bottom nibble)
115 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
116 *
117 * We store the hint at the top of our "flags" for each state.
118 */
119#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
120#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
121
122/*
112 * States are indexed by the cstate number, 123 * States are indexed by the cstate number,
113 * which is also the index into the MWAIT hint array. 124 * which is also the index into the MWAIT hint array.
114 * Thus C0 is a dummy. 125 * Thus C0 is a dummy.
115 */ 126 */
116static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { 127static struct cpuidle_state nehalem_cstates[CPUIDLE_STATE_MAX] = {
117 { /* MWAIT C0 */ }, 128 {
118 { /* MWAIT C1 */
119 .name = "C1-NHM", 129 .name = "C1-NHM",
120 .desc = "MWAIT 0x00", 130 .desc = "MWAIT 0x00",
121 .flags = CPUIDLE_FLAG_TIME_VALID, 131 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
122 .exit_latency = 3, 132 .exit_latency = 3,
123 .target_residency = 6, 133 .target_residency = 6,
124 .enter = &intel_idle }, 134 .enter = &intel_idle },
125 { /* MWAIT C2 */ 135 {
136 .name = "C1E-NHM",
137 .desc = "MWAIT 0x01",
138 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
139 .exit_latency = 10,
140 .target_residency = 20,
141 .enter = &intel_idle },
142 {
126 .name = "C3-NHM", 143 .name = "C3-NHM",
127 .desc = "MWAIT 0x10", 144 .desc = "MWAIT 0x10",
128 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, 145 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
129 .exit_latency = 20, 146 .exit_latency = 20,
130 .target_residency = 80, 147 .target_residency = 80,
131 .enter = &intel_idle }, 148 .enter = &intel_idle },
132 { /* MWAIT C3 */ 149 {
133 .name = "C6-NHM", 150 .name = "C6-NHM",
134 .desc = "MWAIT 0x20", 151 .desc = "MWAIT 0x20",
135 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, 152 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
136 .exit_latency = 200, 153 .exit_latency = 200,
137 .target_residency = 800, 154 .target_residency = 800,
138 .enter = &intel_idle }, 155 .enter = &intel_idle },
156 {
157 .enter = NULL }
139}; 158};
140 159
141static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { 160static struct cpuidle_state snb_cstates[CPUIDLE_STATE_MAX] = {
142 { /* MWAIT C0 */ }, 161 {
143 { /* MWAIT C1 */
144 .name = "C1-SNB", 162 .name = "C1-SNB",
145 .desc = "MWAIT 0x00", 163 .desc = "MWAIT 0x00",
146 .flags = CPUIDLE_FLAG_TIME_VALID, 164 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
147 .exit_latency = 1, 165 .exit_latency = 2,
148 .target_residency = 1, 166 .target_residency = 2,
149 .enter = &intel_idle }, 167 .enter = &intel_idle },
150 { /* MWAIT C2 */ 168 {
169 .name = "C1E-SNB",
170 .desc = "MWAIT 0x01",
171 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
172 .exit_latency = 10,
173 .target_residency = 20,
174 .enter = &intel_idle },
175 {
151 .name = "C3-SNB", 176 .name = "C3-SNB",
152 .desc = "MWAIT 0x10", 177 .desc = "MWAIT 0x10",
153 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, 178 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
154 .exit_latency = 80, 179 .exit_latency = 80,
155 .target_residency = 211, 180 .target_residency = 211,
156 .enter = &intel_idle }, 181 .enter = &intel_idle },
157 { /* MWAIT C3 */ 182 {
158 .name = "C6-SNB", 183 .name = "C6-SNB",
159 .desc = "MWAIT 0x20", 184 .desc = "MWAIT 0x20",
160 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, 185 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
161 .exit_latency = 104, 186 .exit_latency = 104,
162 .target_residency = 345, 187 .target_residency = 345,
163 .enter = &intel_idle }, 188 .enter = &intel_idle },
164 { /* MWAIT C4 */ 189 {
165 .name = "C7-SNB", 190 .name = "C7-SNB",
166 .desc = "MWAIT 0x30", 191 .desc = "MWAIT 0x30",
167 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, 192 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
168 .exit_latency = 109, 193 .exit_latency = 109,
169 .target_residency = 345, 194 .target_residency = 345,
170 .enter = &intel_idle }, 195 .enter = &intel_idle },
196 {
197 .enter = NULL }
171}; 198};
172 199
173static struct cpuidle_state ivb_cstates[MWAIT_MAX_NUM_CSTATES] = { 200static struct cpuidle_state ivb_cstates[CPUIDLE_STATE_MAX] = {
174 { /* MWAIT C0 */ }, 201 {
175 { /* MWAIT C1 */
176 .name = "C1-IVB", 202 .name = "C1-IVB",
177 .desc = "MWAIT 0x00", 203 .desc = "MWAIT 0x00",
178 .flags = CPUIDLE_FLAG_TIME_VALID, 204 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
179 .exit_latency = 1, 205 .exit_latency = 1,
180 .target_residency = 1, 206 .target_residency = 1,
181 .enter = &intel_idle }, 207 .enter = &intel_idle },
182 { /* MWAIT C2 */ 208 {
209 .name = "C1E-IVB",
210 .desc = "MWAIT 0x01",
211 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
212 .exit_latency = 10,
213 .target_residency = 20,
214 .enter = &intel_idle },
215 {
183 .name = "C3-IVB", 216 .name = "C3-IVB",
184 .desc = "MWAIT 0x10", 217 .desc = "MWAIT 0x10",
185 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, 218 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
186 .exit_latency = 59, 219 .exit_latency = 59,
187 .target_residency = 156, 220 .target_residency = 156,
188 .enter = &intel_idle }, 221 .enter = &intel_idle },
189 { /* MWAIT C3 */ 222 {
190 .name = "C6-IVB", 223 .name = "C6-IVB",
191 .desc = "MWAIT 0x20", 224 .desc = "MWAIT 0x20",
192 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, 225 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
193 .exit_latency = 80, 226 .exit_latency = 80,
194 .target_residency = 300, 227 .target_residency = 300,
195 .enter = &intel_idle }, 228 .enter = &intel_idle },
196 { /* MWAIT C4 */ 229 {
197 .name = "C7-IVB", 230 .name = "C7-IVB",
198 .desc = "MWAIT 0x30", 231 .desc = "MWAIT 0x30",
199 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, 232 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
200 .exit_latency = 87, 233 .exit_latency = 87,
201 .target_residency = 300, 234 .target_residency = 300,
202 .enter = &intel_idle }, 235 .enter = &intel_idle },
236 {
237 .enter = NULL }
203}; 238};
204 239
205static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { 240static struct cpuidle_state hsw_cstates[CPUIDLE_STATE_MAX] = {
206 { /* MWAIT C0 */ }, 241 {
207 { /* MWAIT C1 */ 242 .name = "C1-HSW",
208 .name = "C1-ATM",
209 .desc = "MWAIT 0x00", 243 .desc = "MWAIT 0x00",
210 .flags = CPUIDLE_FLAG_TIME_VALID, 244 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
211 .exit_latency = 1, 245 .exit_latency = 2,
212 .target_residency = 4, 246 .target_residency = 2,
247 .enter = &intel_idle },
248 {
249 .name = "C1E-HSW",
250 .desc = "MWAIT 0x01",
251 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
252 .exit_latency = 10,
253 .target_residency = 20,
254 .enter = &intel_idle },
255 {
256 .name = "C3-HSW",
257 .desc = "MWAIT 0x10",
258 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
259 .exit_latency = 33,
260 .target_residency = 100,
261 .enter = &intel_idle },
262 {
263 .name = "C6-HSW",
264 .desc = "MWAIT 0x20",
265 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
266 .exit_latency = 133,
267 .target_residency = 400,
268 .enter = &intel_idle },
269 {
270 .name = "C7s-HSW",
271 .desc = "MWAIT 0x32",
272 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
273 .exit_latency = 166,
274 .target_residency = 500,
275 .enter = &intel_idle },
276 {
277 .enter = NULL }
278};
279
280static struct cpuidle_state atom_cstates[CPUIDLE_STATE_MAX] = {
281 {
282 .name = "C1E-ATM",
283 .desc = "MWAIT 0x00",
284 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
285 .exit_latency = 10,
286 .target_residency = 20,
213 .enter = &intel_idle }, 287 .enter = &intel_idle },
214 { /* MWAIT C2 */ 288 {
215 .name = "C2-ATM", 289 .name = "C2-ATM",
216 .desc = "MWAIT 0x10", 290 .desc = "MWAIT 0x10",
217 .flags = CPUIDLE_FLAG_TIME_VALID, 291 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID,
218 .exit_latency = 20, 292 .exit_latency = 20,
219 .target_residency = 80, 293 .target_residency = 80,
220 .enter = &intel_idle }, 294 .enter = &intel_idle },
221 { /* MWAIT C3 */ }, 295 {
222 { /* MWAIT C4 */
223 .name = "C4-ATM", 296 .name = "C4-ATM",
224 .desc = "MWAIT 0x30", 297 .desc = "MWAIT 0x30",
225 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, 298 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
226 .exit_latency = 100, 299 .exit_latency = 100,
227 .target_residency = 400, 300 .target_residency = 400,
228 .enter = &intel_idle }, 301 .enter = &intel_idle },
229 { /* MWAIT C5 */ }, 302 {
230 { /* MWAIT C6 */
231 .name = "C6-ATM", 303 .name = "C6-ATM",
232 .desc = "MWAIT 0x52", 304 .desc = "MWAIT 0x52",
233 .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, 305 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
234 .exit_latency = 140, 306 .exit_latency = 140,
235 .target_residency = 560, 307 .target_residency = 560,
236 .enter = &intel_idle }, 308 .enter = &intel_idle },
309 {
310 .enter = NULL }
237}; 311};
238 312
239static long get_driver_data(int cstate)
240{
241 int driver_data;
242 switch (cstate) {
243
244 case 1: /* MWAIT C1 */
245 driver_data = 0x00;
246 break;
247 case 2: /* MWAIT C2 */
248 driver_data = 0x10;
249 break;
250 case 3: /* MWAIT C3 */
251 driver_data = 0x20;
252 break;
253 case 4: /* MWAIT C4 */
254 driver_data = 0x30;
255 break;
256 case 5: /* MWAIT C5 */
257 driver_data = 0x40;
258 break;
259 case 6: /* MWAIT C6 */
260 driver_data = 0x52;
261 break;
262 default:
263 driver_data = 0x00;
264 }
265 return driver_data;
266}
267
268/** 313/**
269 * intel_idle 314 * intel_idle
270 * @dev: cpuidle_device 315 * @dev: cpuidle_device
@@ -278,8 +323,7 @@ static int intel_idle(struct cpuidle_device *dev,
278{ 323{
279 unsigned long ecx = 1; /* break on interrupt flag */ 324 unsigned long ecx = 1; /* break on interrupt flag */
280 struct cpuidle_state *state = &drv->states[index]; 325 struct cpuidle_state *state = &drv->states[index];
281 struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; 326 unsigned long eax = flg2MWAIT(state->flags);
282 unsigned long eax = (unsigned long)cpuidle_get_statedata(state_usage);
283 unsigned int cstate; 327 unsigned int cstate;
284 int cpu = smp_processor_id(); 328 int cpu = smp_processor_id();
285 329
@@ -362,10 +406,19 @@ static void auto_demotion_disable(void *dummy)
362 msr_bits &= ~(icpu->auto_demotion_disable_flags); 406 msr_bits &= ~(icpu->auto_demotion_disable_flags);
363 wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); 407 wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
364} 408}
409static void c1e_promotion_disable(void *dummy)
410{
411 unsigned long long msr_bits;
412
413 rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
414 msr_bits &= ~0x2;
415 wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
416}
365 417
366static const struct idle_cpu idle_cpu_nehalem = { 418static const struct idle_cpu idle_cpu_nehalem = {
367 .state_table = nehalem_cstates, 419 .state_table = nehalem_cstates,
368 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 420 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
421 .disable_promotion_to_c1e = true,
369}; 422};
370 423
371static const struct idle_cpu idle_cpu_atom = { 424static const struct idle_cpu idle_cpu_atom = {
@@ -379,10 +432,17 @@ static const struct idle_cpu idle_cpu_lincroft = {
379 432
380static const struct idle_cpu idle_cpu_snb = { 433static const struct idle_cpu idle_cpu_snb = {
381 .state_table = snb_cstates, 434 .state_table = snb_cstates,
435 .disable_promotion_to_c1e = true,
382}; 436};
383 437
384static const struct idle_cpu idle_cpu_ivb = { 438static const struct idle_cpu idle_cpu_ivb = {
385 .state_table = ivb_cstates, 439 .state_table = ivb_cstates,
440 .disable_promotion_to_c1e = true,
441};
442
443static const struct idle_cpu idle_cpu_hsw = {
444 .state_table = hsw_cstates,
445 .disable_promotion_to_c1e = true,
386}; 446};
387 447
388#define ICPU(model, cpu) \ 448#define ICPU(model, cpu) \
@@ -402,6 +462,9 @@ static const struct x86_cpu_id intel_idle_ids[] = {
402 ICPU(0x2d, idle_cpu_snb), 462 ICPU(0x2d, idle_cpu_snb),
403 ICPU(0x3a, idle_cpu_ivb), 463 ICPU(0x3a, idle_cpu_ivb),
404 ICPU(0x3e, idle_cpu_ivb), 464 ICPU(0x3e, idle_cpu_ivb),
465 ICPU(0x3c, idle_cpu_hsw),
466 ICPU(0x3f, idle_cpu_hsw),
467 ICPU(0x45, idle_cpu_hsw),
405 {} 468 {}
406}; 469};
407MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids); 470MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
@@ -484,32 +547,31 @@ static int intel_idle_cpuidle_driver_init(void)
484 547
485 drv->state_count = 1; 548 drv->state_count = 1;
486 549
487 for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { 550 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
488 int num_substates; 551 int num_substates, mwait_hint, mwait_cstate, mwait_substate;
489 552
490 if (cstate > max_cstate) { 553 if (cpuidle_state_table[cstate].enter == NULL)
554 break;
555
556 if (cstate + 1 > max_cstate) {
491 printk(PREFIX "max_cstate %d reached\n", 557 printk(PREFIX "max_cstate %d reached\n",
492 max_cstate); 558 max_cstate);
493 break; 559 break;
494 } 560 }
495 561
562 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
563 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
564 mwait_substate = MWAIT_HINT2SUBSTATE(mwait_hint);
565
496 /* does the state exist in CPUID.MWAIT? */ 566 /* does the state exist in CPUID.MWAIT? */
497 num_substates = (mwait_substates >> ((cstate) * 4)) 567 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
498 & MWAIT_SUBSTATE_MASK; 568 & MWAIT_SUBSTATE_MASK;
499 if (num_substates == 0) 569
500 continue; 570 /* if sub-state in table is not enumerated by CPUID */
501 /* is the state not enabled? */ 571 if ((mwait_substate + 1) > num_substates)
502 if (cpuidle_state_table[cstate].enter == NULL) {
503 /* does the driver not know about the state? */
504 if (*cpuidle_state_table[cstate].name == '\0')
505 pr_debug(PREFIX "unaware of model 0x%x"
506 " MWAIT %d please"
507 " contact lenb@kernel.org\n",
508 boot_cpu_data.x86_model, cstate);
509 continue; 572 continue;
510 }
511 573
512 if ((cstate > 2) && 574 if (((mwait_cstate + 1) > 2) &&
513 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 575 !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
514 mark_tsc_unstable("TSC halts in idle" 576 mark_tsc_unstable("TSC halts in idle"
515 " states deeper than C2"); 577 " states deeper than C2");
@@ -523,6 +585,9 @@ static int intel_idle_cpuidle_driver_init(void)
523 if (icpu->auto_demotion_disable_flags) 585 if (icpu->auto_demotion_disable_flags)
524 on_each_cpu(auto_demotion_disable, NULL, 1); 586 on_each_cpu(auto_demotion_disable, NULL, 1);
525 587
588 if (icpu->disable_promotion_to_c1e) /* each-cpu is redundant */
589 on_each_cpu(c1e_promotion_disable, NULL, 1);
590
526 return 0; 591 return 0;
527} 592}
528 593
@@ -541,25 +606,28 @@ static int intel_idle_cpu_init(int cpu)
541 606
542 dev->state_count = 1; 607 dev->state_count = 1;
543 608
544 for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { 609 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
545 int num_substates; 610 int num_substates, mwait_hint, mwait_cstate, mwait_substate;
611
612 if (cpuidle_state_table[cstate].enter == NULL)
613 continue;
546 614
547 if (cstate > max_cstate) { 615 if (cstate + 1 > max_cstate) {
548 printk(PREFIX "max_cstate %d reached\n", max_cstate); 616 printk(PREFIX "max_cstate %d reached\n", max_cstate);
549 break; 617 break;
550 } 618 }
551 619
620 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
621 mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
622 mwait_substate = MWAIT_HINT2SUBSTATE(mwait_hint);
623
552 /* does the state exist in CPUID.MWAIT? */ 624 /* does the state exist in CPUID.MWAIT? */
553 num_substates = (mwait_substates >> ((cstate) * 4)) 625 num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
554 & MWAIT_SUBSTATE_MASK; 626 & MWAIT_SUBSTATE_MASK;
555 if (num_substates == 0)
556 continue;
557 /* is the state not enabled? */
558 if (cpuidle_state_table[cstate].enter == NULL)
559 continue;
560 627
561 dev->states_usage[dev->state_count].driver_data = 628 /* if sub-state in table is not enumerated by CPUID */
562 (void *)get_driver_data(cstate); 629 if ((mwait_substate + 1) > num_substates)
630 continue;
563 631
564 dev->state_count += 1; 632 dev->state_count += 1;
565 } 633 }
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 24cd1037b6d6..480c14dc1ddd 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -32,8 +32,6 @@ struct cpuidle_driver;
32 ****************************/ 32 ****************************/
33 33
34struct cpuidle_state_usage { 34struct cpuidle_state_usage {
35 void *driver_data;
36
37 unsigned long long disable; 35 unsigned long long disable;
38 unsigned long long usage; 36 unsigned long long usage;
39 unsigned long long time; /* in US */ 37 unsigned long long time; /* in US */
@@ -62,26 +60,6 @@ struct cpuidle_state {
62 60
63#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) 61#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
64 62
65/**
66 * cpuidle_get_statedata - retrieves private driver state data
67 * @st_usage: the state usage statistics
68 */
69static inline void *cpuidle_get_statedata(struct cpuidle_state_usage *st_usage)
70{
71 return st_usage->driver_data;
72}
73
74/**
75 * cpuidle_set_statedata - stores private driver state data
76 * @st_usage: the state usage statistics
77 * @data: the private data
78 */
79static inline void
80cpuidle_set_statedata(struct cpuidle_state_usage *st_usage, void *data)
81{
82 st_usage->driver_data = data;
83}
84
85struct cpuidle_device { 63struct cpuidle_device {
86 unsigned int registered:1; 64 unsigned int registered:1;
87 unsigned int enabled:1; 65 unsigned int enabled:1;
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 03d7bb145311..97bcf23e045a 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -31,7 +31,6 @@
31/* 31/*
32 * Callbacks for platform drivers to implement. 32 * Callbacks for platform drivers to implement.
33 */ 33 */
34extern void (*pm_idle)(void);
35extern void (*pm_power_off)(void); 34extern void (*pm_power_off)(void);
36extern void (*pm_power_off_prepare)(void); 35extern void (*pm_power_off_prepare)(void);
37 36
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 0d7dc2cfefb5..b4ddb748356c 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -31,8 +31,6 @@ The \fB-S\fP option limits output to a 1-line System Summary for each interval.
31.PP 31.PP
32The \fB-v\fP option increases verbosity. 32The \fB-v\fP option increases verbosity.
33.PP 33.PP
34The \fB-s\fP option prints the SMI counter, equivalent to "-c 0x34"
35.PP
36The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter. 34The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter.
37.PP 35.PP
38The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter. 36The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter.
@@ -186,26 +184,24 @@ This is a weighted average, where the weight is %c0. ie. it is the total number
186un-halted cycles elapsed per time divided by the number of CPUs. 184un-halted cycles elapsed per time divided by the number of CPUs.
187.SH SMI COUNTING EXAMPLE 185.SH SMI COUNTING EXAMPLE
188On Intel Nehalem and newer processors, MSR 0x34 is a System Management Mode Interrupt (SMI) counter. 186On Intel Nehalem and newer processors, MSR 0x34 is a System Management Mode Interrupt (SMI) counter.
189Using the -m option, you can display how many SMIs have fired since reset, or if there 187This counter is shown by default under the "SMI" column.
190are SMIs during the measurement interval, you can display the delta using the -d option.
191.nf 188.nf
192[root@x980 ~]# turbostat -m 0x34 189[root@x980 ~]# turbostat
193cor CPU %c0 GHz TSC MSR 0x034 %c1 %c3 %c6 %pc3 %pc6 190cor CPU %c0 GHz TSC SMI %c1 %c3 %c6 CTMP %pc3 %pc6
194 1.41 1.82 3.38 0x00000000 8.92 37.82 51.85 17.37 0.55 191 0.11 1.91 3.38 0 1.84 0.26 97.79 29 0.82 83.87
195 0 0 3.73 2.03 3.38 0x00000055 1.72 48.25 46.31 17.38 0.55 192 0 0 0.40 1.63 3.38 0 10.27 0.12 89.20 20 0.82 83.88
196 0 6 0.14 1.63 3.38 0x00000056 5.30 193 0 6 0.06 1.63 3.38 0 10.61
197 1 2 2.51 1.80 3.38 0x00000056 15.65 29.33 52.52 194 1 2 0.37 2.63 3.38 0 0.02 0.10 99.51 22
198 1 8 0.10 1.65 3.38 0x00000056 18.05 195 1 8 0.01 1.62 3.38 0 0.39
199 2 4 1.16 1.68 3.38 0x00000056 5.87 24.47 68.50 196 2 4 0.07 1.62 3.38 0 0.04 0.07 99.82 23
200 2 10 0.10 1.63 3.38 0x00000056 6.93 197 2 10 0.02 1.62 3.38 0 0.09
201 8 1 3.84 1.91 3.38 0x00000056 1.36 50.65 44.16 198 8 1 0.23 1.64 3.38 0 0.10 1.07 98.60 24
202 8 7 0.08 1.64 3.38 0x00000056 5.12 199 8 7 0.02 1.64 3.38 0 0.31
203 9 3 1.82 1.73 3.38 0x00000056 7.59 24.21 66.38 200 9 3 0.03 1.62 3.38 0 0.03 0.05 99.89 29
204 9 9 0.09 1.68 3.38 0x00000056 9.32 201 9 9 0.02 1.62 3.38 0 0.05
205 10 5 1.66 1.65 3.38 0x00000056 15.10 50.00 33.23 202 10 5 0.07 1.62 3.38 0 0.08 0.12 99.73 27
206 10 11 1.72 1.65 3.38 0x00000056 15.05 203 10 11 0.03 1.62 3.38 0 0.13
207^C 204^C
208[root@x980 ~]#
209.fi 205.fi
210.SH NOTES 206.SH NOTES
211 207
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index ce6d46038f74..6f3214ed4444 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -58,6 +58,7 @@ unsigned int extra_msr_offset32;
58unsigned int extra_msr_offset64; 58unsigned int extra_msr_offset64;
59unsigned int extra_delta_offset32; 59unsigned int extra_delta_offset32;
60unsigned int extra_delta_offset64; 60unsigned int extra_delta_offset64;
61int do_smi;
61double bclk; 62double bclk;
62unsigned int show_pkg; 63unsigned int show_pkg;
63unsigned int show_core; 64unsigned int show_core;
@@ -99,6 +100,7 @@ struct thread_data {
99 unsigned long long extra_delta64; 100 unsigned long long extra_delta64;
100 unsigned long long extra_msr32; 101 unsigned long long extra_msr32;
101 unsigned long long extra_delta32; 102 unsigned long long extra_delta32;
103 unsigned int smi_count;
102 unsigned int cpu_id; 104 unsigned int cpu_id;
103 unsigned int flags; 105 unsigned int flags;
104#define CPU_IS_FIRST_THREAD_IN_CORE 0x2 106#define CPU_IS_FIRST_THREAD_IN_CORE 0x2
@@ -248,6 +250,8 @@ void print_header(void)
248 if (has_aperf) 250 if (has_aperf)
249 outp += sprintf(outp, " GHz"); 251 outp += sprintf(outp, " GHz");
250 outp += sprintf(outp, " TSC"); 252 outp += sprintf(outp, " TSC");
253 if (do_smi)
254 outp += sprintf(outp, " SMI");
251 if (extra_delta_offset32) 255 if (extra_delta_offset32)
252 outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); 256 outp += sprintf(outp, " count 0x%03X", extra_delta_offset32);
253 if (extra_delta_offset64) 257 if (extra_delta_offset64)
@@ -314,6 +318,8 @@ int dump_counters(struct thread_data *t, struct core_data *c,
314 extra_msr_offset32, t->extra_msr32); 318 extra_msr_offset32, t->extra_msr32);
315 fprintf(stderr, "msr0x%x: %016llX\n", 319 fprintf(stderr, "msr0x%x: %016llX\n",
316 extra_msr_offset64, t->extra_msr64); 320 extra_msr_offset64, t->extra_msr64);
321 if (do_smi)
322 fprintf(stderr, "SMI: %08X\n", t->smi_count);
317 } 323 }
318 324
319 if (c) { 325 if (c) {
@@ -352,6 +358,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
352 * RAM_W: %5.2 358 * RAM_W: %5.2
353 * GHz: "GHz" 3 columns %3.2 359 * GHz: "GHz" 3 columns %3.2
354 * TSC: "TSC" 3 columns %3.2 360 * TSC: "TSC" 3 columns %3.2
361 * SMI: "SMI" 4 columns %4d
355 * percentage " %pc3" %6.2 362 * percentage " %pc3" %6.2
356 * Perf Status percentage: %5.2 363 * Perf Status percentage: %5.2
357 * "CTMP" 4 columns %4d 364 * "CTMP" 4 columns %4d
@@ -431,6 +438,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
431 /* TSC */ 438 /* TSC */
432 outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); 439 outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float);
433 440
441 /* SMI */
442 if (do_smi)
443 outp += sprintf(outp, "%4d", t->smi_count);
444
434 /* delta */ 445 /* delta */
435 if (extra_delta_offset32) 446 if (extra_delta_offset32)
436 outp += sprintf(outp, " %11llu", t->extra_delta32); 447 outp += sprintf(outp, " %11llu", t->extra_delta32);
@@ -645,6 +656,9 @@ delta_thread(struct thread_data *new, struct thread_data *old,
645 */ 656 */
646 old->extra_msr32 = new->extra_msr32; 657 old->extra_msr32 = new->extra_msr32;
647 old->extra_msr64 = new->extra_msr64; 658 old->extra_msr64 = new->extra_msr64;
659
660 if (do_smi)
661 old->smi_count = new->smi_count - old->smi_count;
648} 662}
649 663
650int delta_cpu(struct thread_data *t, struct core_data *c, 664int delta_cpu(struct thread_data *t, struct core_data *c,
@@ -672,6 +686,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
672 t->mperf = 0; 686 t->mperf = 0;
673 t->c1 = 0; 687 t->c1 = 0;
674 688
689 t->smi_count = 0;
675 t->extra_delta32 = 0; 690 t->extra_delta32 = 0;
676 t->extra_delta64 = 0; 691 t->extra_delta64 = 0;
677 692
@@ -802,6 +817,11 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
802 return -4; 817 return -4;
803 } 818 }
804 819
820 if (do_smi) {
821 if (get_msr(cpu, MSR_SMI_COUNT, &msr))
822 return -5;
823 t->smi_count = msr & 0xFFFFFFFF;
824 }
805 if (extra_delta_offset32) { 825 if (extra_delta_offset32) {
806 if (get_msr(cpu, extra_delta_offset32, &msr)) 826 if (get_msr(cpu, extra_delta_offset32, &msr))
807 return -5; 827 return -5;
@@ -908,8 +928,7 @@ void print_verbose_header(void)
908 928
909 get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); 929 get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
910 930
911 if (verbose) 931 fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
912 fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
913 932
914 ratio = (msr >> 40) & 0xFF; 933 ratio = (msr >> 40) & 0xFF;
915 fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", 934 fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
@@ -919,13 +938,16 @@ void print_verbose_header(void)
919 fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", 938 fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
920 ratio, bclk, ratio * bclk); 939 ratio, bclk, ratio * bclk);
921 940
941 get_msr(0, MSR_IA32_POWER_CTL, &msr);
942 fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E: %sabled)\n",
943 msr, msr & 0x2 ? "EN" : "DIS");
944
922 if (!do_ivt_turbo_ratio_limit) 945 if (!do_ivt_turbo_ratio_limit)
923 goto print_nhm_turbo_ratio_limits; 946 goto print_nhm_turbo_ratio_limits;
924 947
925 get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); 948 get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
926 949
927 if (verbose) 950 fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
928 fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
929 951
930 ratio = (msr >> 56) & 0xFF; 952 ratio = (msr >> 56) & 0xFF;
931 if (ratio) 953 if (ratio)
@@ -1016,8 +1038,7 @@ print_nhm_turbo_ratio_limits:
1016 1038
1017 get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); 1039 get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
1018 1040
1019 if (verbose) 1041 fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
1020 fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
1021 1042
1022 ratio = (msr >> 56) & 0xFF; 1043 ratio = (msr >> 56) & 0xFF;
1023 if (ratio) 1044 if (ratio)
@@ -1397,6 +1418,9 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
1397 case 0x2D: /* SNB Xeon */ 1418 case 0x2D: /* SNB Xeon */
1398 case 0x3A: /* IVB */ 1419 case 0x3A: /* IVB */
1399 case 0x3E: /* IVB Xeon */ 1420 case 0x3E: /* IVB Xeon */
1421 case 0x3C: /* HSW */
1422 case 0x3F: /* HSW */
1423 case 0x45: /* HSW */
1400 return 1; 1424 return 1;
1401 case 0x2E: /* Nehalem-EX Xeon - Beckton */ 1425 case 0x2E: /* Nehalem-EX Xeon - Beckton */
1402 case 0x2F: /* Westmere-EX Xeon - Eagleton */ 1426 case 0x2F: /* Westmere-EX Xeon - Eagleton */
@@ -1488,6 +1512,9 @@ void rapl_probe(unsigned int family, unsigned int model)
1488 switch (model) { 1512 switch (model) {
1489 case 0x2A: 1513 case 0x2A:
1490 case 0x3A: 1514 case 0x3A:
1515 case 0x3C: /* HSW */
1516 case 0x3F: /* HSW */
1517 case 0x45: /* HSW */
1491 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX; 1518 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
1492 break; 1519 break;
1493 case 0x2D: 1520 case 0x2D:
@@ -1724,6 +1751,9 @@ int is_snb(unsigned int family, unsigned int model)
1724 case 0x2D: 1751 case 0x2D:
1725 case 0x3A: /* IVB */ 1752 case 0x3A: /* IVB */
1726 case 0x3E: /* IVB Xeon */ 1753 case 0x3E: /* IVB Xeon */
1754 case 0x3C: /* HSW */
1755 case 0x3F: /* HSW */
1756 case 0x45: /* HSW */
1727 return 1; 1757 return 1;
1728 } 1758 }
1729 return 0; 1759 return 0;
@@ -1883,6 +1913,7 @@ void check_cpuid()
1883 1913
1884 do_nehalem_platform_info = genuine_intel && has_invariant_tsc; 1914 do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
1885 do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ 1915 do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */
1916 do_smi = do_nhm_cstates;
1886 do_snb_cstates = is_snb(family, model); 1917 do_snb_cstates = is_snb(family, model);
1887 bclk = discover_bclk(family, model); 1918 bclk = discover_bclk(family, model);
1888 1919
@@ -2219,9 +2250,6 @@ void cmdline(int argc, char **argv)
2219 case 'c': 2250 case 'c':
2220 sscanf(optarg, "%x", &extra_delta_offset32); 2251 sscanf(optarg, "%x", &extra_delta_offset32);
2221 break; 2252 break;
2222 case 's':
2223 extra_delta_offset32 = 0x34; /* SMI counter */
2224 break;
2225 case 'C': 2253 case 'C':
2226 sscanf(optarg, "%x", &extra_delta_offset64); 2254 sscanf(optarg, "%x", &extra_delta_offset64);
2227 break; 2255 break;
@@ -2248,7 +2276,7 @@ int main(int argc, char **argv)
2248 cmdline(argc, argv); 2276 cmdline(argc, argv);
2249 2277
2250 if (verbose) 2278 if (verbose)
2251 fprintf(stderr, "turbostat v3.0 November 23, 2012" 2279 fprintf(stderr, "turbostat v3.2 February 11, 2013"
2252 " - Len Brown <lenb@kernel.org>\n"); 2280 " - Len Brown <lenb@kernel.org>\n");
2253 2281
2254 turbostat_init(); 2282 turbostat_init();