diff options
Diffstat (limited to 'arch/x86/kernel/process.c')
| -rw-r--r-- | arch/x86/kernel/process.c | 225 |
1 files changed, 206 insertions, 19 deletions
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ba370dc8685b..7fc4d5b0a6a0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
| @@ -6,8 +6,16 @@ | |||
| 6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
| 7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
| 8 | #include <linux/pm.h> | 8 | #include <linux/pm.h> |
| 9 | #include <linux/clockchips.h> | ||
| 10 | #include <asm/system.h> | ||
| 11 | |||
| 12 | unsigned long idle_halt; | ||
| 13 | EXPORT_SYMBOL(idle_halt); | ||
| 14 | unsigned long idle_nomwait; | ||
| 15 | EXPORT_SYMBOL(idle_nomwait); | ||
| 9 | 16 | ||
| 10 | struct kmem_cache *task_xstate_cachep; | 17 | struct kmem_cache *task_xstate_cachep; |
| 18 | static int force_mwait __cpuinitdata; | ||
| 11 | 19 | ||
| 12 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 20 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
| 13 | { | 21 | { |
| @@ -45,6 +53,76 @@ void arch_task_cache_init(void) | |||
| 45 | SLAB_PANIC, NULL); | 53 | SLAB_PANIC, NULL); |
| 46 | } | 54 | } |
| 47 | 55 | ||
| 56 | /* | ||
| 57 | * Idle related variables and functions | ||
| 58 | */ | ||
| 59 | unsigned long boot_option_idle_override = 0; | ||
| 60 | EXPORT_SYMBOL(boot_option_idle_override); | ||
| 61 | |||
| 62 | /* | ||
| 63 | * Powermanagement idle function, if any.. | ||
| 64 | */ | ||
| 65 | void (*pm_idle)(void); | ||
| 66 | EXPORT_SYMBOL(pm_idle); | ||
| 67 | |||
| 68 | #ifdef CONFIG_X86_32 | ||
| 69 | /* | ||
| 70 | * This halt magic was a workaround for ancient floppy DMA | ||
| 71 | * wreckage. It should be safe to remove. | ||
| 72 | */ | ||
| 73 | static int hlt_counter; | ||
| 74 | void disable_hlt(void) | ||
| 75 | { | ||
| 76 | hlt_counter++; | ||
| 77 | } | ||
| 78 | EXPORT_SYMBOL(disable_hlt); | ||
| 79 | |||
| 80 | void enable_hlt(void) | ||
| 81 | { | ||
| 82 | hlt_counter--; | ||
| 83 | } | ||
| 84 | EXPORT_SYMBOL(enable_hlt); | ||
| 85 | |||
| 86 | static inline int hlt_use_halt(void) | ||
| 87 | { | ||
| 88 | return (!hlt_counter && boot_cpu_data.hlt_works_ok); | ||
| 89 | } | ||
| 90 | #else | ||
| 91 | static inline int hlt_use_halt(void) | ||
| 92 | { | ||
| 93 | return 1; | ||
| 94 | } | ||
| 95 | #endif | ||
| 96 | |||
| 97 | /* | ||
| 98 | * We use this if we don't have any better | ||
| 99 | * idle routine.. | ||
| 100 | */ | ||
| 101 | void default_idle(void) | ||
| 102 | { | ||
| 103 | if (hlt_use_halt()) { | ||
| 104 | current_thread_info()->status &= ~TS_POLLING; | ||
| 105 | /* | ||
| 106 | * TS_POLLING-cleared state must be visible before we | ||
| 107 | * test NEED_RESCHED: | ||
| 108 | */ | ||
| 109 | smp_mb(); | ||
| 110 | |||
| 111 | if (!need_resched()) | ||
| 112 | safe_halt(); /* enables interrupts racelessly */ | ||
| 113 | else | ||
| 114 | local_irq_enable(); | ||
| 115 | current_thread_info()->status |= TS_POLLING; | ||
| 116 | } else { | ||
| 117 | local_irq_enable(); | ||
| 118 | /* loop is done by the caller */ | ||
| 119 | cpu_relax(); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | #ifdef CONFIG_APM_MODULE | ||
| 123 | EXPORT_SYMBOL(default_idle); | ||
| 124 | #endif | ||
| 125 | |||
| 48 | static void do_nothing(void *unused) | 126 | static void do_nothing(void *unused) |
| 49 | { | 127 | { |
| 50 | } | 128 | } |
| @@ -61,7 +139,7 @@ void cpu_idle_wait(void) | |||
| 61 | { | 139 | { |
| 62 | smp_mb(); | 140 | smp_mb(); |
| 63 | /* kick all the CPUs so that they exit out of pm_idle */ | 141 | /* kick all the CPUs so that they exit out of pm_idle */ |
| 64 | smp_call_function(do_nothing, NULL, 0, 1); | 142 | smp_call_function(do_nothing, NULL, 1); |
| 65 | } | 143 | } |
| 66 | EXPORT_SYMBOL_GPL(cpu_idle_wait); | 144 | EXPORT_SYMBOL_GPL(cpu_idle_wait); |
| 67 | 145 | ||
| @@ -122,54 +200,163 @@ static void poll_idle(void) | |||
| 122 | * | 200 | * |
| 123 | * idle=mwait overrides this decision and forces the usage of mwait. | 201 | * idle=mwait overrides this decision and forces the usage of mwait. |
| 124 | */ | 202 | */ |
| 203 | static int __cpuinitdata force_mwait; | ||
| 204 | |||
| 205 | #define MWAIT_INFO 0x05 | ||
| 206 | #define MWAIT_ECX_EXTENDED_INFO 0x01 | ||
| 207 | #define MWAIT_EDX_C1 0xf0 | ||
| 208 | |||
| 125 | static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) | 209 | static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) |
| 126 | { | 210 | { |
| 211 | u32 eax, ebx, ecx, edx; | ||
| 212 | |||
| 127 | if (force_mwait) | 213 | if (force_mwait) |
| 128 | return 1; | 214 | return 1; |
| 129 | 215 | ||
| 130 | if (c->x86_vendor == X86_VENDOR_AMD) { | 216 | if (c->cpuid_level < MWAIT_INFO) |
| 131 | switch(c->x86) { | 217 | return 0; |
| 132 | case 0x10: | 218 | |
| 133 | case 0x11: | 219 | cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx); |
| 134 | return 0; | 220 | /* Check, whether EDX has extended info about MWAIT */ |
| 135 | } | 221 | if (!(ecx & MWAIT_ECX_EXTENDED_INFO)) |
| 136 | } | 222 | return 1; |
| 223 | |||
| 224 | /* | ||
| 225 | * edx enumeratios MONITOR/MWAIT extensions. Check, whether | ||
| 226 | * C1 supports MWAIT | ||
| 227 | */ | ||
| 228 | return (edx & MWAIT_EDX_C1); | ||
| 229 | } | ||
| 230 | |||
| 231 | /* | ||
| 232 | * Check for AMD CPUs, which have potentially C1E support | ||
| 233 | */ | ||
| 234 | static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | ||
| 235 | { | ||
| 236 | if (c->x86_vendor != X86_VENDOR_AMD) | ||
| 237 | return 0; | ||
| 238 | |||
| 239 | if (c->x86 < 0x0F) | ||
| 240 | return 0; | ||
| 241 | |||
| 242 | /* Family 0x0f models < rev F do not have C1E */ | ||
| 243 | if (c->x86 == 0x0f && c->x86_model < 0x40) | ||
| 244 | return 0; | ||
| 245 | |||
| 137 | return 1; | 246 | return 1; |
| 138 | } | 247 | } |
| 139 | 248 | ||
| 140 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | 249 | /* |
| 250 | * C1E aware idle routine. We check for C1E active in the interrupt | ||
| 251 | * pending message MSR. If we detect C1E, then we handle it the same | ||
| 252 | * way as C3 power states (local apic timer and TSC stop) | ||
| 253 | */ | ||
| 254 | static void c1e_idle(void) | ||
| 141 | { | 255 | { |
| 142 | static int selected; | 256 | static cpumask_t c1e_mask = CPU_MASK_NONE; |
| 257 | static int c1e_detected; | ||
| 143 | 258 | ||
| 144 | if (selected) | 259 | if (need_resched()) |
| 145 | return; | 260 | return; |
| 261 | |||
| 262 | if (!c1e_detected) { | ||
| 263 | u32 lo, hi; | ||
| 264 | |||
| 265 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); | ||
| 266 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { | ||
| 267 | c1e_detected = 1; | ||
| 268 | mark_tsc_unstable("TSC halt in C1E"); | ||
| 269 | printk(KERN_INFO "System has C1E enabled\n"); | ||
| 270 | } | ||
| 271 | } | ||
| 272 | |||
| 273 | if (c1e_detected) { | ||
| 274 | int cpu = smp_processor_id(); | ||
| 275 | |||
| 276 | if (!cpu_isset(cpu, c1e_mask)) { | ||
| 277 | cpu_set(cpu, c1e_mask); | ||
| 278 | /* | ||
| 279 | * Force broadcast so ACPI can not interfere. Needs | ||
| 280 | * to run with interrupts enabled as it uses | ||
| 281 | * smp_function_call. | ||
| 282 | */ | ||
| 283 | local_irq_enable(); | ||
| 284 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, | ||
| 285 | &cpu); | ||
| 286 | printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", | ||
| 287 | cpu); | ||
| 288 | local_irq_disable(); | ||
| 289 | } | ||
| 290 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); | ||
| 291 | |||
| 292 | default_idle(); | ||
| 293 | |||
| 294 | /* | ||
| 295 | * The switch back from broadcast mode needs to be | ||
| 296 | * called with interrupts disabled. | ||
| 297 | */ | ||
| 298 | local_irq_disable(); | ||
| 299 | clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu); | ||
| 300 | local_irq_enable(); | ||
| 301 | } else | ||
| 302 | default_idle(); | ||
| 303 | } | ||
| 304 | |||
| 305 | void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | ||
| 306 | { | ||
| 146 | #ifdef CONFIG_X86_SMP | 307 | #ifdef CONFIG_X86_SMP |
| 147 | if (pm_idle == poll_idle && smp_num_siblings > 1) { | 308 | if (pm_idle == poll_idle && smp_num_siblings > 1) { |
| 148 | printk(KERN_WARNING "WARNING: polling idle and HT enabled," | 309 | printk(KERN_WARNING "WARNING: polling idle and HT enabled," |
| 149 | " performance may degrade.\n"); | 310 | " performance may degrade.\n"); |
| 150 | } | 311 | } |
| 151 | #endif | 312 | #endif |
| 313 | if (pm_idle) | ||
| 314 | return; | ||
| 315 | |||
| 152 | if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { | 316 | if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { |
| 153 | /* | 317 | /* |
| 154 | * Skip, if setup has overridden idle. | ||
| 155 | * One CPU supports mwait => All CPUs supports mwait | 318 | * One CPU supports mwait => All CPUs supports mwait |
| 156 | */ | 319 | */ |
| 157 | if (!pm_idle) { | 320 | printk(KERN_INFO "using mwait in idle threads.\n"); |
| 158 | printk(KERN_INFO "using mwait in idle threads.\n"); | 321 | pm_idle = mwait_idle; |
| 159 | pm_idle = mwait_idle; | 322 | } else if (check_c1e_idle(c)) { |
| 160 | } | 323 | printk(KERN_INFO "using C1E aware idle routine\n"); |
| 161 | } | 324 | pm_idle = c1e_idle; |
| 162 | selected = 1; | 325 | } else |
| 326 | pm_idle = default_idle; | ||
| 163 | } | 327 | } |
| 164 | 328 | ||
| 165 | static int __init idle_setup(char *str) | 329 | static int __init idle_setup(char *str) |
| 166 | { | 330 | { |
| 331 | if (!str) | ||
| 332 | return -EINVAL; | ||
| 333 | |||
| 167 | if (!strcmp(str, "poll")) { | 334 | if (!strcmp(str, "poll")) { |
| 168 | printk("using polling idle threads.\n"); | 335 | printk("using polling idle threads.\n"); |
| 169 | pm_idle = poll_idle; | 336 | pm_idle = poll_idle; |
| 170 | } else if (!strcmp(str, "mwait")) | 337 | } else if (!strcmp(str, "mwait")) |
| 171 | force_mwait = 1; | 338 | force_mwait = 1; |
| 172 | else | 339 | else if (!strcmp(str, "halt")) { |
| 340 | /* | ||
| 341 | * When the boot option of idle=halt is added, halt is | ||
| 342 | * forced to be used for CPU idle. In such case CPU C2/C3 | ||
| 343 | * won't be used again. | ||
| 344 | * To continue to load the CPU idle driver, don't touch | ||
| 345 | * the boot_option_idle_override. | ||
| 346 | */ | ||
| 347 | pm_idle = default_idle; | ||
| 348 | idle_halt = 1; | ||
| 349 | return 0; | ||
| 350 | } else if (!strcmp(str, "nomwait")) { | ||
| 351 | /* | ||
| 352 | * If the boot option of "idle=nomwait" is added, | ||
| 353 | * it means that mwait will be disabled for CPU C2/C3 | ||
| 354 | * states. In such case it won't touch the variable | ||
| 355 | * of boot_option_idle_override. | ||
| 356 | */ | ||
| 357 | idle_nomwait = 1; | ||
| 358 | return 0; | ||
| 359 | } else | ||
| 173 | return -1; | 360 | return -1; |
| 174 | 361 | ||
| 175 | boot_option_idle_override = 1; | 362 | boot_option_idle_override = 1; |
