diff options
author | Ashok Raj <ashok.raj@intel.com> | 2005-10-30 17:59:54 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 20:37:14 -0500 |
commit | c32b6b8e524d2c337767d312814484d9289550cf (patch) | |
tree | 02e634b0b48db6eccc8774369366daa1893921ea | |
parent | d434fca737bee0862625c2377b987a7713b6b487 (diff) |
[PATCH] create and destroy cpufreq sysfs entries based on cpu notifiers
cpufreq entries in sysfs should only be populated when CPU is online state.
When we either boot with maxcpus=x and then boot the other cpus by echoing
to sysfs online file, these entries should be created and destroyed when
CPU_DEAD is notified. Same treatement as cache entries under sysfs.
We place the processor in the lowest frequency, so hw managed P-State
transitions can still work on the other threads to save power.
Primary goal was to just make these directories appear/disapper dynamically.
There is one in this patch i had to do, which i really dont like myself but
probably best if someone handling the cpufreq infrastructure could give
this code right treatment if this is not acceptable. I guess its probably
good for the first cut.
- Converting lock_cpu_hotplug()/unlock_cpu_hotplug() to disable/enable preempt.
The locking was smack in the middle of the notification path, when the
hotplug is already holding the lock. I tried another solution to avoid this
so avoid taking locks if we know we are from notification path. The solution
was getting very ugly and i decided this was probably good for this iteration
until someone who understands cpufreq could do a better job than me.
(akpm: export cpucontrol to GPL modules: drivers/cpufreq/cpufreq_stats.c now
does lock_cpu_hotplug())
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Zwane Mwaikambo <zwane@holomorphy.com>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 69 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_stats.c | 42 | ||||
-rw-r--r-- | kernel/cpu.c | 1 |
3 files changed, 103 insertions, 9 deletions
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 346906a96e8b..6c6121b85a54 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c | |||
@@ -4,6 +4,9 @@ | |||
4 | * Copyright (C) 2001 Russell King | 4 | * Copyright (C) 2001 Russell King |
5 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> | 5 | * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> |
6 | * | 6 | * |
7 | * Oct 2005 - Ashok Raj <ashok.raj@intel.com> | ||
8 | * Added handling for CPU hotplug | ||
9 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | 10 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License version 2 as | 11 | * it under the terms of the GNU General Public License version 2 as |
9 | * published by the Free Software Foundation. | 12 | * published by the Free Software Foundation. |
@@ -567,6 +570,9 @@ static int cpufreq_add_dev (struct sys_device * sys_dev) | |||
567 | unsigned long flags; | 570 | unsigned long flags; |
568 | unsigned int j; | 571 | unsigned int j; |
569 | 572 | ||
573 | if (cpu_is_offline(cpu)) | ||
574 | return 0; | ||
575 | |||
570 | cpufreq_debug_disable_ratelimit(); | 576 | cpufreq_debug_disable_ratelimit(); |
571 | dprintk("adding CPU %u\n", cpu); | 577 | dprintk("adding CPU %u\n", cpu); |
572 | 578 | ||
@@ -673,7 +679,7 @@ err_out: | |||
673 | 679 | ||
674 | nomem_out: | 680 | nomem_out: |
675 | module_put(cpufreq_driver->owner); | 681 | module_put(cpufreq_driver->owner); |
676 | module_out: | 682 | module_out: |
677 | cpufreq_debug_enable_ratelimit(); | 683 | cpufreq_debug_enable_ratelimit(); |
678 | return ret; | 684 | return ret; |
679 | } | 685 | } |
@@ -762,7 +768,6 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev) | |||
762 | down(&data->lock); | 768 | down(&data->lock); |
763 | if (cpufreq_driver->target) | 769 | if (cpufreq_driver->target) |
764 | __cpufreq_governor(data, CPUFREQ_GOV_STOP); | 770 | __cpufreq_governor(data, CPUFREQ_GOV_STOP); |
765 | cpufreq_driver->target = NULL; | ||
766 | up(&data->lock); | 771 | up(&data->lock); |
767 | 772 | ||
768 | kobject_unregister(&data->kobj); | 773 | kobject_unregister(&data->kobj); |
@@ -1109,17 +1114,30 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, | |||
1109 | unsigned int relation) | 1114 | unsigned int relation) |
1110 | { | 1115 | { |
1111 | int retval = -EINVAL; | 1116 | int retval = -EINVAL; |
1112 | lock_cpu_hotplug(); | 1117 | |
1118 | /* | ||
1119 | * Converted the lock_cpu_hotplug to preempt_disable() | ||
1120 | * and preempt_enable(). This is a bit kludgy and relies on how cpu | ||
1121 | * hotplug works. All we need is a guarantee that cpu hotplug won't make | ||
1122 | * progress on any cpu. Once we do preempt_disable(), this would ensure | ||
1123 | * that hotplug threads don't get onto this cpu, thereby delaying | ||
1124 | * the cpu remove process. | ||
1125 | * | ||
1126 | * We removed the lock_cpu_hotplug since we need to call this function | ||
1127 | * via cpu hotplug callbacks, which result in locking the cpu hotplug | ||
1128 | * thread itself. Agree this is not very clean, cpufreq community | ||
1129 | * could improve this if required. - Ashok Raj <ashok.raj@intel.com> | ||
1130 | */ | ||
1131 | preempt_disable(); | ||
1113 | dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu, | 1132 | dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu, |
1114 | target_freq, relation); | 1133 | target_freq, relation); |
1115 | if (cpu_online(policy->cpu) && cpufreq_driver->target) | 1134 | if (cpu_online(policy->cpu) && cpufreq_driver->target) |
1116 | retval = cpufreq_driver->target(policy, target_freq, relation); | 1135 | retval = cpufreq_driver->target(policy, target_freq, relation); |
1117 | unlock_cpu_hotplug(); | 1136 | preempt_enable(); |
1118 | return retval; | 1137 | return retval; |
1119 | } | 1138 | } |
1120 | EXPORT_SYMBOL_GPL(__cpufreq_driver_target); | 1139 | EXPORT_SYMBOL_GPL(__cpufreq_driver_target); |
1121 | 1140 | ||
1122 | |||
1123 | int cpufreq_driver_target(struct cpufreq_policy *policy, | 1141 | int cpufreq_driver_target(struct cpufreq_policy *policy, |
1124 | unsigned int target_freq, | 1142 | unsigned int target_freq, |
1125 | unsigned int relation) | 1143 | unsigned int relation) |
@@ -1406,6 +1424,45 @@ int cpufreq_update_policy(unsigned int cpu) | |||
1406 | } | 1424 | } |
1407 | EXPORT_SYMBOL(cpufreq_update_policy); | 1425 | EXPORT_SYMBOL(cpufreq_update_policy); |
1408 | 1426 | ||
1427 | static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, | ||
1428 | unsigned long action, void *hcpu) | ||
1429 | { | ||
1430 | unsigned int cpu = (unsigned long)hcpu; | ||
1431 | struct cpufreq_policy *policy; | ||
1432 | struct sys_device *sys_dev; | ||
1433 | |||
1434 | sys_dev = get_cpu_sysdev(cpu); | ||
1435 | |||
1436 | if (sys_dev) { | ||
1437 | switch (action) { | ||
1438 | case CPU_ONLINE: | ||
1439 | cpufreq_add_dev(sys_dev); | ||
1440 | break; | ||
1441 | case CPU_DOWN_PREPARE: | ||
1442 | /* | ||
1443 | * We attempt to put this cpu in lowest frequency | ||
1444 | * possible before going down. This will permit | ||
1445 | * hardware-managed P-State to switch other related | ||
1446 | * threads to min or higher speeds if possible. | ||
1447 | */ | ||
1448 | policy = cpufreq_cpu_data[cpu]; | ||
1449 | if (policy) { | ||
1450 | cpufreq_driver_target(policy, policy->min, | ||
1451 | CPUFREQ_RELATION_H); | ||
1452 | } | ||
1453 | break; | ||
1454 | case CPU_DEAD: | ||
1455 | cpufreq_remove_dev(sys_dev); | ||
1456 | break; | ||
1457 | } | ||
1458 | } | ||
1459 | return NOTIFY_OK; | ||
1460 | } | ||
1461 | |||
1462 | static struct notifier_block cpufreq_cpu_notifier = | ||
1463 | { | ||
1464 | .notifier_call = cpufreq_cpu_callback, | ||
1465 | }; | ||
1409 | 1466 | ||
1410 | /********************************************************************* | 1467 | /********************************************************************* |
1411 | * REGISTER / UNREGISTER CPUFREQ DRIVER * | 1468 | * REGISTER / UNREGISTER CPUFREQ DRIVER * |
@@ -1466,6 +1523,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) | |||
1466 | } | 1523 | } |
1467 | 1524 | ||
1468 | if (!ret) { | 1525 | if (!ret) { |
1526 | register_cpu_notifier(&cpufreq_cpu_notifier); | ||
1469 | dprintk("driver %s up and running\n", driver_data->name); | 1527 | dprintk("driver %s up and running\n", driver_data->name); |
1470 | cpufreq_debug_enable_ratelimit(); | 1528 | cpufreq_debug_enable_ratelimit(); |
1471 | } | 1529 | } |
@@ -1497,6 +1555,7 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) | |||
1497 | dprintk("unregistering driver %s\n", driver->name); | 1555 | dprintk("unregistering driver %s\n", driver->name); |
1498 | 1556 | ||
1499 | sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver); | 1557 | sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver); |
1558 | unregister_cpu_notifier(&cpufreq_cpu_notifier); | ||
1500 | 1559 | ||
1501 | spin_lock_irqsave(&cpufreq_driver_lock, flags); | 1560 | spin_lock_irqsave(&cpufreq_driver_lock, flags); |
1502 | cpufreq_driver = NULL; | 1561 | cpufreq_driver = NULL; |
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 741b6b191e6a..3597f25d5efa 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/percpu.h> | 19 | #include <linux/percpu.h> |
20 | #include <linux/kobject.h> | 20 | #include <linux/kobject.h> |
21 | #include <linux/spinlock.h> | 21 | #include <linux/spinlock.h> |
22 | #include <linux/notifier.h> | ||
22 | #include <asm/cputime.h> | 23 | #include <asm/cputime.h> |
23 | 24 | ||
24 | static spinlock_t cpufreq_stats_lock; | 25 | static spinlock_t cpufreq_stats_lock; |
@@ -298,6 +299,27 @@ cpufreq_stat_notifier_trans (struct notifier_block *nb, unsigned long val, | |||
298 | return 0; | 299 | return 0; |
299 | } | 300 | } |
300 | 301 | ||
302 | static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb, | ||
303 | unsigned long action, void *hcpu) | ||
304 | { | ||
305 | unsigned int cpu = (unsigned long)hcpu; | ||
306 | |||
307 | switch (action) { | ||
308 | case CPU_ONLINE: | ||
309 | cpufreq_update_policy(cpu); | ||
310 | break; | ||
311 | case CPU_DEAD: | ||
312 | cpufreq_stats_free_table(cpu); | ||
313 | break; | ||
314 | } | ||
315 | return NOTIFY_OK; | ||
316 | } | ||
317 | |||
318 | static struct notifier_block cpufreq_stat_cpu_notifier = | ||
319 | { | ||
320 | .notifier_call = cpufreq_stat_cpu_callback, | ||
321 | }; | ||
322 | |||
301 | static struct notifier_block notifier_policy_block = { | 323 | static struct notifier_block notifier_policy_block = { |
302 | .notifier_call = cpufreq_stat_notifier_policy | 324 | .notifier_call = cpufreq_stat_notifier_policy |
303 | }; | 325 | }; |
@@ -311,6 +333,7 @@ __init cpufreq_stats_init(void) | |||
311 | { | 333 | { |
312 | int ret; | 334 | int ret; |
313 | unsigned int cpu; | 335 | unsigned int cpu; |
336 | |||
314 | spin_lock_init(&cpufreq_stats_lock); | 337 | spin_lock_init(&cpufreq_stats_lock); |
315 | if ((ret = cpufreq_register_notifier(¬ifier_policy_block, | 338 | if ((ret = cpufreq_register_notifier(¬ifier_policy_block, |
316 | CPUFREQ_POLICY_NOTIFIER))) | 339 | CPUFREQ_POLICY_NOTIFIER))) |
@@ -323,20 +346,31 @@ __init cpufreq_stats_init(void) | |||
323 | return ret; | 346 | return ret; |
324 | } | 347 | } |
325 | 348 | ||
326 | for_each_cpu(cpu) | 349 | register_cpu_notifier(&cpufreq_stat_cpu_notifier); |
327 | cpufreq_update_policy(cpu); | 350 | lock_cpu_hotplug(); |
351 | for_each_online_cpu(cpu) { | ||
352 | cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_ONLINE, | ||
353 | (void *)(long)cpu); | ||
354 | } | ||
355 | unlock_cpu_hotplug(); | ||
328 | return 0; | 356 | return 0; |
329 | } | 357 | } |
330 | static void | 358 | static void |
331 | __exit cpufreq_stats_exit(void) | 359 | __exit cpufreq_stats_exit(void) |
332 | { | 360 | { |
333 | unsigned int cpu; | 361 | unsigned int cpu; |
362 | |||
334 | cpufreq_unregister_notifier(¬ifier_policy_block, | 363 | cpufreq_unregister_notifier(¬ifier_policy_block, |
335 | CPUFREQ_POLICY_NOTIFIER); | 364 | CPUFREQ_POLICY_NOTIFIER); |
336 | cpufreq_unregister_notifier(¬ifier_trans_block, | 365 | cpufreq_unregister_notifier(¬ifier_trans_block, |
337 | CPUFREQ_TRANSITION_NOTIFIER); | 366 | CPUFREQ_TRANSITION_NOTIFIER); |
338 | for_each_cpu(cpu) | 367 | unregister_cpu_notifier(&cpufreq_stat_cpu_notifier); |
339 | cpufreq_stats_free_table(cpu); | 368 | lock_cpu_hotplug(); |
369 | for_each_online_cpu(cpu) { | ||
370 | cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_DEAD, | ||
371 | (void *)(long)cpu); | ||
372 | } | ||
373 | unlock_cpu_hotplug(); | ||
340 | } | 374 | } |
341 | 375 | ||
342 | MODULE_AUTHOR ("Zou Nan hai <nanhai.zou@intel.com>"); | 376 | MODULE_AUTHOR ("Zou Nan hai <nanhai.zou@intel.com>"); |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 53d8263ae12e..3619e939182e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | /* This protects CPUs going up and down... */ | 18 | /* This protects CPUs going up and down... */ |
19 | DECLARE_MUTEX(cpucontrol); | 19 | DECLARE_MUTEX(cpucontrol); |
20 | EXPORT_SYMBOL_GPL(cpucontrol); | ||
20 | 21 | ||
21 | static struct notifier_block *cpu_chain; | 22 | static struct notifier_block *cpu_chain; |
22 | 23 | ||