aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-07-08 12:36:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-07-08 12:36:46 -0400
commitb4b21cac88caa4078f5755b0bd3770af5fe9c146 (patch)
treefb5cccef61d0ed80adf4b0d1d03ddb3e80ff701d
parent728b690fd5c185c639a5db0819bd6e0385b14188 (diff)
parenta2e1b4c31257c07f148a89eb7eea7ca959fd0642 (diff)
Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/davej/cpufreq
* 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/davej/cpufreq: [CPUFREQ] Powernow-k8: support family 0xf with 2 low p-states [CPUFREQ] fix (utter) cpufreq_add_dev mess [CPUFREQ] Cleanup locking in conservative governor [CPUFREQ] Cleanup locking in ondemand governor [CPUFREQ] Mark policy_rwsem as going static in cpufreq.c wont be exported [CPUFREQ] Eliminate the recent lockdep warnings in cpufreq
-rw-r--r--Documentation/feature-removal-schedule.txt10
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c30
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h3
-rw-r--r--drivers/cpufreq/cpufreq.c69
-rw-r--r--drivers/cpufreq/cpufreq_conservative.c49
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c77
6 files changed, 115 insertions, 123 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index f8cd450be9aa..09e031c55887 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -458,3 +458,13 @@ Why: Remove the old legacy 32bit machine check code. This has been
458 but the old version has been kept around for easier testing. Note this 458 but the old version has been kept around for easier testing. Note this
459 doesn't impact the old P5 and WinChip machine check handlers. 459 doesn't impact the old P5 and WinChip machine check handlers.
460Who: Andi Kleen <andi@firstfloor.org> 460Who: Andi Kleen <andi@firstfloor.org>
461
462----------------------------
463
464What: lock_policy_rwsem_* and unlock_policy_rwsem_* will not be
465 exported interface anymore.
466When: 2.6.33
467Why: cpu_policy_rwsem has a new cleaner definition making it local to
468 cpufreq core and contained inside cpufreq.c. Other dependent
469 drivers should not use it in order to safely avoid lockdep issues.
470Who: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 81cbe64ed6b4..ae068f59603f 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -299,7 +299,7 @@ static int transition_pstate(struct powernow_k8_data *data, u32 pstate)
299static int transition_fid_vid(struct powernow_k8_data *data, 299static int transition_fid_vid(struct powernow_k8_data *data,
300 u32 reqfid, u32 reqvid) 300 u32 reqfid, u32 reqvid)
301{ 301{
302 if (core_voltage_pre_transition(data, reqvid)) 302 if (core_voltage_pre_transition(data, reqvid, reqfid))
303 return 1; 303 return 1;
304 304
305 if (core_frequency_transition(data, reqfid)) 305 if (core_frequency_transition(data, reqfid))
@@ -327,17 +327,20 @@ static int transition_fid_vid(struct powernow_k8_data *data,
327 327
328/* Phase 1 - core voltage transition ... setup voltage */ 328/* Phase 1 - core voltage transition ... setup voltage */
329static int core_voltage_pre_transition(struct powernow_k8_data *data, 329static int core_voltage_pre_transition(struct powernow_k8_data *data,
330 u32 reqvid) 330 u32 reqvid, u32 reqfid)
331{ 331{
332 u32 rvosteps = data->rvo; 332 u32 rvosteps = data->rvo;
333 u32 savefid = data->currfid; 333 u32 savefid = data->currfid;
334 u32 maxvid, lo; 334 u32 maxvid, lo, rvomult = 1;
335 335
336 dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, " 336 dprintk("ph1 (cpu%d): start, currfid 0x%x, currvid 0x%x, "
337 "reqvid 0x%x, rvo 0x%x\n", 337 "reqvid 0x%x, rvo 0x%x\n",
338 smp_processor_id(), 338 smp_processor_id(),
339 data->currfid, data->currvid, reqvid, data->rvo); 339 data->currfid, data->currvid, reqvid, data->rvo);
340 340
341 if ((savefid < LO_FID_TABLE_TOP) && (reqfid < LO_FID_TABLE_TOP))
342 rvomult = 2;
343 rvosteps *= rvomult;
341 rdmsr(MSR_FIDVID_STATUS, lo, maxvid); 344 rdmsr(MSR_FIDVID_STATUS, lo, maxvid);
342 maxvid = 0x1f & (maxvid >> 16); 345 maxvid = 0x1f & (maxvid >> 16);
343 dprintk("ph1 maxvid=0x%x\n", maxvid); 346 dprintk("ph1 maxvid=0x%x\n", maxvid);
@@ -351,7 +354,8 @@ static int core_voltage_pre_transition(struct powernow_k8_data *data,
351 return 1; 354 return 1;
352 } 355 }
353 356
354 while ((rvosteps > 0) && ((data->rvo + data->currvid) > reqvid)) { 357 while ((rvosteps > 0) &&
358 ((rvomult * data->rvo + data->currvid) > reqvid)) {
355 if (data->currvid == maxvid) { 359 if (data->currvid == maxvid) {
356 rvosteps = 0; 360 rvosteps = 0;
357 } else { 361 } else {
@@ -384,13 +388,6 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
384 u32 vcoreqfid, vcocurrfid, vcofiddiff; 388 u32 vcoreqfid, vcocurrfid, vcofiddiff;
385 u32 fid_interval, savevid = data->currvid; 389 u32 fid_interval, savevid = data->currvid;
386 390
387 if ((reqfid < HI_FID_TABLE_BOTTOM) &&
388 (data->currfid < HI_FID_TABLE_BOTTOM)) {
389 printk(KERN_ERR PFX "ph2: illegal lo-lo transition "
390 "0x%x 0x%x\n", reqfid, data->currfid);
391 return 1;
392 }
393
394 if (data->currfid == reqfid) { 391 if (data->currfid == reqfid) {
395 printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n", 392 printk(KERN_ERR PFX "ph2 null fid transition 0x%x\n",
396 data->currfid); 393 data->currfid);
@@ -407,6 +404,9 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid)
407 vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid 404 vcofiddiff = vcocurrfid > vcoreqfid ? vcocurrfid - vcoreqfid
408 : vcoreqfid - vcocurrfid; 405 : vcoreqfid - vcocurrfid;
409 406
407 if ((reqfid <= LO_FID_TABLE_TOP) && (data->currfid <= LO_FID_TABLE_TOP))
408 vcofiddiff = 0;
409
410 while (vcofiddiff > 2) { 410 while (vcofiddiff > 2) {
411 (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2); 411 (data->currfid & 1) ? (fid_interval = 1) : (fid_interval = 2);
412 412
@@ -1081,14 +1081,6 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data,
1081 return 0; 1081 return 0;
1082 } 1082 }
1083 1083
1084 if ((fid < HI_FID_TABLE_BOTTOM) &&
1085 (data->currfid < HI_FID_TABLE_BOTTOM)) {
1086 printk(KERN_ERR PFX
1087 "ignoring illegal change in lo freq table-%x to 0x%x\n",
1088 data->currfid, fid);
1089 return 1;
1090 }
1091
1092 dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n", 1084 dprintk("cpu %d, changing to fid 0x%x, vid 0x%x\n",
1093 smp_processor_id(), fid, vid); 1085 smp_processor_id(), fid, vid);
1094 freqs.old = find_khz_freq_from_fid(data->currfid); 1086 freqs.old = find_khz_freq_from_fid(data->currfid);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index c9c1190b5e1f..02ce824073cb 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -215,7 +215,8 @@ struct pst_s {
215 215
216#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg) 216#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "powernow-k8", msg)
217 217
218static int core_voltage_pre_transition(struct powernow_k8_data *data, u32 reqvid); 218static int core_voltage_pre_transition(struct powernow_k8_data *data,
219 u32 reqvid, u32 regfid);
219static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid); 220static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvid);
220static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); 221static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid);
221 222
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 6e2ec0b18948..c668ac855f71 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -761,6 +761,10 @@ static struct kobj_type ktype_cpufreq = {
761 * cpufreq_add_dev - add a CPU device 761 * cpufreq_add_dev - add a CPU device
762 * 762 *
763 * Adds the cpufreq interface for a CPU device. 763 * Adds the cpufreq interface for a CPU device.
764 *
765 * The Oracle says: try running cpufreq registration/unregistration concurrently
766 * with with cpu hotplugging and all hell will break loose. Tried to clean this
767 * mess up, but more thorough testing is needed. - Mathieu
764 */ 768 */
765static int cpufreq_add_dev(struct sys_device *sys_dev) 769static int cpufreq_add_dev(struct sys_device *sys_dev)
766{ 770{
@@ -804,15 +808,12 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
804 goto nomem_out; 808 goto nomem_out;
805 } 809 }
806 if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) { 810 if (!alloc_cpumask_var(&policy->cpus, GFP_KERNEL)) {
807 kfree(policy);
808 ret = -ENOMEM; 811 ret = -ENOMEM;
809 goto nomem_out; 812 goto err_free_policy;
810 } 813 }
811 if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) { 814 if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) {
812 free_cpumask_var(policy->cpus);
813 kfree(policy);
814 ret = -ENOMEM; 815 ret = -ENOMEM;
815 goto nomem_out; 816 goto err_free_cpumask;
816 } 817 }
817 818
818 policy->cpu = cpu; 819 policy->cpu = cpu;
@@ -820,7 +821,8 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
820 821
821 /* Initially set CPU itself as the policy_cpu */ 822 /* Initially set CPU itself as the policy_cpu */
822 per_cpu(policy_cpu, cpu) = cpu; 823 per_cpu(policy_cpu, cpu) = cpu;
823 lock_policy_rwsem_write(cpu); 824 ret = (lock_policy_rwsem_write(cpu) < 0);
825 WARN_ON(ret);
824 826
825 init_completion(&policy->kobj_unregister); 827 init_completion(&policy->kobj_unregister);
826 INIT_WORK(&policy->update, handle_update); 828 INIT_WORK(&policy->update, handle_update);
@@ -833,7 +835,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
833 ret = cpufreq_driver->init(policy); 835 ret = cpufreq_driver->init(policy);
834 if (ret) { 836 if (ret) {
835 dprintk("initialization failed\n"); 837 dprintk("initialization failed\n");
836 goto err_out; 838 goto err_unlock_policy;
837 } 839 }
838 policy->user_policy.min = policy->min; 840 policy->user_policy.min = policy->min;
839 policy->user_policy.max = policy->max; 841 policy->user_policy.max = policy->max;
@@ -858,15 +860,21 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
858 /* Check for existing affected CPUs. 860 /* Check for existing affected CPUs.
859 * They may not be aware of it due to CPU Hotplug. 861 * They may not be aware of it due to CPU Hotplug.
860 */ 862 */
861 managed_policy = cpufreq_cpu_get(j); /* FIXME: Where is this released? What about error paths? */ 863 managed_policy = cpufreq_cpu_get(j);
862 if (unlikely(managed_policy)) { 864 if (unlikely(managed_policy)) {
863 865
864 /* Set proper policy_cpu */ 866 /* Set proper policy_cpu */
865 unlock_policy_rwsem_write(cpu); 867 unlock_policy_rwsem_write(cpu);
866 per_cpu(policy_cpu, cpu) = managed_policy->cpu; 868 per_cpu(policy_cpu, cpu) = managed_policy->cpu;
867 869
868 if (lock_policy_rwsem_write(cpu) < 0) 870 if (lock_policy_rwsem_write(cpu) < 0) {
869 goto err_out_driver_exit; 871 /* Should not go through policy unlock path */
872 if (cpufreq_driver->exit)
873 cpufreq_driver->exit(policy);
874 ret = -EBUSY;
875 cpufreq_cpu_put(managed_policy);
876 goto err_free_cpumask;
877 }
870 878
871 spin_lock_irqsave(&cpufreq_driver_lock, flags); 879 spin_lock_irqsave(&cpufreq_driver_lock, flags);
872 cpumask_copy(managed_policy->cpus, policy->cpus); 880 cpumask_copy(managed_policy->cpus, policy->cpus);
@@ -877,12 +885,14 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
877 ret = sysfs_create_link(&sys_dev->kobj, 885 ret = sysfs_create_link(&sys_dev->kobj,
878 &managed_policy->kobj, 886 &managed_policy->kobj,
879 "cpufreq"); 887 "cpufreq");
880 if (ret) 888 if (!ret)
881 goto err_out_driver_exit; 889 cpufreq_cpu_put(managed_policy);
882 890 /*
883 cpufreq_debug_enable_ratelimit(); 891 * Success. We only needed to be added to the mask.
884 ret = 0; 892 * Call driver->exit() because only the cpu parent of
885 goto err_out_driver_exit; /* call driver->exit() */ 893 * the kobj needed to call init().
894 */
895 goto out_driver_exit; /* call driver->exit() */
886 } 896 }
887 } 897 }
888#endif 898#endif
@@ -892,25 +902,25 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
892 ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj, 902 ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj,
893 "cpufreq"); 903 "cpufreq");
894 if (ret) 904 if (ret)
895 goto err_out_driver_exit; 905 goto out_driver_exit;
896 906
897 /* set up files for this cpu device */ 907 /* set up files for this cpu device */
898 drv_attr = cpufreq_driver->attr; 908 drv_attr = cpufreq_driver->attr;
899 while ((drv_attr) && (*drv_attr)) { 909 while ((drv_attr) && (*drv_attr)) {
900 ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); 910 ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
901 if (ret) 911 if (ret)
902 goto err_out_driver_exit; 912 goto err_out_kobj_put;
903 drv_attr++; 913 drv_attr++;
904 } 914 }
905 if (cpufreq_driver->get) { 915 if (cpufreq_driver->get) {
906 ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); 916 ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
907 if (ret) 917 if (ret)
908 goto err_out_driver_exit; 918 goto err_out_kobj_put;
909 } 919 }
910 if (cpufreq_driver->target) { 920 if (cpufreq_driver->target) {
911 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); 921 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
912 if (ret) 922 if (ret)
913 goto err_out_driver_exit; 923 goto err_out_kobj_put;
914 } 924 }
915 925
916 spin_lock_irqsave(&cpufreq_driver_lock, flags); 926 spin_lock_irqsave(&cpufreq_driver_lock, flags);
@@ -928,12 +938,14 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
928 continue; 938 continue;
929 939
930 dprintk("CPU %u already managed, adding link\n", j); 940 dprintk("CPU %u already managed, adding link\n", j);
931 cpufreq_cpu_get(cpu); 941 managed_policy = cpufreq_cpu_get(cpu);
932 cpu_sys_dev = get_cpu_sysdev(j); 942 cpu_sys_dev = get_cpu_sysdev(j);
933 ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, 943 ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
934 "cpufreq"); 944 "cpufreq");
935 if (ret) 945 if (ret) {
946 cpufreq_cpu_put(managed_policy);
936 goto err_out_unregister; 947 goto err_out_unregister;
948 }
937 } 949 }
938 950
939 policy->governor = NULL; /* to assure that the starting sequence is 951 policy->governor = NULL; /* to assure that the starting sequence is
@@ -965,17 +977,20 @@ err_out_unregister:
965 per_cpu(cpufreq_cpu_data, j) = NULL; 977 per_cpu(cpufreq_cpu_data, j) = NULL;
966 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 978 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
967 979
980err_out_kobj_put:
968 kobject_put(&policy->kobj); 981 kobject_put(&policy->kobj);
969 wait_for_completion(&policy->kobj_unregister); 982 wait_for_completion(&policy->kobj_unregister);
970 983
971err_out_driver_exit: 984out_driver_exit:
972 if (cpufreq_driver->exit) 985 if (cpufreq_driver->exit)
973 cpufreq_driver->exit(policy); 986 cpufreq_driver->exit(policy);
974 987
975err_out: 988err_unlock_policy:
976 unlock_policy_rwsem_write(cpu); 989 unlock_policy_rwsem_write(cpu);
990err_free_cpumask:
991 free_cpumask_var(policy->cpus);
992err_free_policy:
977 kfree(policy); 993 kfree(policy);
978
979nomem_out: 994nomem_out:
980 module_put(cpufreq_driver->owner); 995 module_put(cpufreq_driver->owner);
981module_out: 996module_out:
@@ -1070,8 +1085,6 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
1070 spin_unlock_irqrestore(&cpufreq_driver_lock, flags); 1085 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1071#endif 1086#endif
1072 1087
1073 unlock_policy_rwsem_write(cpu);
1074
1075 if (cpufreq_driver->target) 1088 if (cpufreq_driver->target)
1076 __cpufreq_governor(data, CPUFREQ_GOV_STOP); 1089 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1077 1090
@@ -1088,6 +1101,8 @@ static int __cpufreq_remove_dev(struct sys_device *sys_dev)
1088 if (cpufreq_driver->exit) 1101 if (cpufreq_driver->exit)
1089 cpufreq_driver->exit(data); 1102 cpufreq_driver->exit(data);
1090 1103
1104 unlock_policy_rwsem_write(cpu);
1105
1091 free_cpumask_var(data->related_cpus); 1106 free_cpumask_var(data->related_cpus);
1092 free_cpumask_var(data->cpus); 1107 free_cpumask_var(data->cpus);
1093 kfree(data); 1108 kfree(data);
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 7fc58af748b4..57490502b21c 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -63,22 +63,20 @@ struct cpu_dbs_info_s {
63 unsigned int down_skip; 63 unsigned int down_skip;
64 unsigned int requested_freq; 64 unsigned int requested_freq;
65 int cpu; 65 int cpu;
66 unsigned int enable:1; 66 /*
67 * percpu mutex that serializes governor limit change with
68 * do_dbs_timer invocation. We do not want do_dbs_timer to run
69 * when user is changing the governor or limits.
70 */
71 struct mutex timer_mutex;
67}; 72};
68static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); 73static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
69 74
70static unsigned int dbs_enable; /* number of CPUs using this policy */ 75static unsigned int dbs_enable; /* number of CPUs using this policy */
71 76
72/* 77/*
73 * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug 78 * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on
74 * lock and dbs_mutex. cpu_hotplug lock should always be held before 79 * different CPUs. It protects dbs_enable in governor start/stop.
75 * dbs_mutex. If any function that can potentially take cpu_hotplug lock
76 * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then
77 * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
78 * is recursive for the same process. -Venki
79 * DEADLOCK ALERT! (2) : do_dbs_timer() must not take the dbs_mutex, because it
80 * would deadlock with cancel_delayed_work_sync(), which is needed for proper
81 * raceless workqueue teardown.
82 */ 80 */
83static DEFINE_MUTEX(dbs_mutex); 81static DEFINE_MUTEX(dbs_mutex);
84 82
@@ -143,9 +141,6 @@ dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
143 141
144 struct cpufreq_policy *policy; 142 struct cpufreq_policy *policy;
145 143
146 if (!this_dbs_info->enable)
147 return 0;
148
149 policy = this_dbs_info->cur_policy; 144 policy = this_dbs_info->cur_policy;
150 145
151 /* 146 /*
@@ -488,18 +483,12 @@ static void do_dbs_timer(struct work_struct *work)
488 483
489 delay -= jiffies % delay; 484 delay -= jiffies % delay;
490 485
491 if (lock_policy_rwsem_write(cpu) < 0) 486 mutex_lock(&dbs_info->timer_mutex);
492 return;
493
494 if (!dbs_info->enable) {
495 unlock_policy_rwsem_write(cpu);
496 return;
497 }
498 487
499 dbs_check_cpu(dbs_info); 488 dbs_check_cpu(dbs_info);
500 489
501 queue_delayed_work_on(cpu, kconservative_wq, &dbs_info->work, delay); 490 queue_delayed_work_on(cpu, kconservative_wq, &dbs_info->work, delay);
502 unlock_policy_rwsem_write(cpu); 491 mutex_unlock(&dbs_info->timer_mutex);
503} 492}
504 493
505static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) 494static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
@@ -508,7 +497,6 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
508 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 497 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
509 delay -= jiffies % delay; 498 delay -= jiffies % delay;
510 499
511 dbs_info->enable = 1;
512 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); 500 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
513 queue_delayed_work_on(dbs_info->cpu, kconservative_wq, &dbs_info->work, 501 queue_delayed_work_on(dbs_info->cpu, kconservative_wq, &dbs_info->work,
514 delay); 502 delay);
@@ -516,7 +504,6 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
516 504
517static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 505static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
518{ 506{
519 dbs_info->enable = 0;
520 cancel_delayed_work_sync(&dbs_info->work); 507 cancel_delayed_work_sync(&dbs_info->work);
521} 508}
522 509
@@ -535,9 +522,6 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
535 if ((!cpu_online(cpu)) || (!policy->cur)) 522 if ((!cpu_online(cpu)) || (!policy->cur))
536 return -EINVAL; 523 return -EINVAL;
537 524
538 if (this_dbs_info->enable) /* Already enabled */
539 break;
540
541 mutex_lock(&dbs_mutex); 525 mutex_lock(&dbs_mutex);
542 526
543 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); 527 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group);
@@ -561,6 +545,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
561 this_dbs_info->down_skip = 0; 545 this_dbs_info->down_skip = 0;
562 this_dbs_info->requested_freq = policy->cur; 546 this_dbs_info->requested_freq = policy->cur;
563 547
548 mutex_init(&this_dbs_info->timer_mutex);
564 dbs_enable++; 549 dbs_enable++;
565 /* 550 /*
566 * Start the timerschedule work, when this governor 551 * Start the timerschedule work, when this governor
@@ -590,17 +575,19 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
590 &dbs_cpufreq_notifier_block, 575 &dbs_cpufreq_notifier_block,
591 CPUFREQ_TRANSITION_NOTIFIER); 576 CPUFREQ_TRANSITION_NOTIFIER);
592 } 577 }
593 dbs_timer_init(this_dbs_info);
594
595 mutex_unlock(&dbs_mutex); 578 mutex_unlock(&dbs_mutex);
596 579
580 dbs_timer_init(this_dbs_info);
581
597 break; 582 break;
598 583
599 case CPUFREQ_GOV_STOP: 584 case CPUFREQ_GOV_STOP:
600 mutex_lock(&dbs_mutex);
601 dbs_timer_exit(this_dbs_info); 585 dbs_timer_exit(this_dbs_info);
586
587 mutex_lock(&dbs_mutex);
602 sysfs_remove_group(&policy->kobj, &dbs_attr_group); 588 sysfs_remove_group(&policy->kobj, &dbs_attr_group);
603 dbs_enable--; 589 dbs_enable--;
590 mutex_destroy(&this_dbs_info->timer_mutex);
604 591
605 /* 592 /*
606 * Stop the timerschedule work, when this governor 593 * Stop the timerschedule work, when this governor
@@ -616,7 +603,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
616 break; 603 break;
617 604
618 case CPUFREQ_GOV_LIMITS: 605 case CPUFREQ_GOV_LIMITS:
619 mutex_lock(&dbs_mutex); 606 mutex_lock(&this_dbs_info->timer_mutex);
620 if (policy->max < this_dbs_info->cur_policy->cur) 607 if (policy->max < this_dbs_info->cur_policy->cur)
621 __cpufreq_driver_target( 608 __cpufreq_driver_target(
622 this_dbs_info->cur_policy, 609 this_dbs_info->cur_policy,
@@ -625,7 +612,7 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
625 __cpufreq_driver_target( 612 __cpufreq_driver_target(
626 this_dbs_info->cur_policy, 613 this_dbs_info->cur_policy,
627 policy->min, CPUFREQ_RELATION_L); 614 policy->min, CPUFREQ_RELATION_L);
628 mutex_unlock(&dbs_mutex); 615 mutex_unlock(&this_dbs_info->timer_mutex);
629 616
630 break; 617 break;
631 } 618 }
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 1911d1729353..d6ba14276bb1 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -70,23 +70,21 @@ struct cpu_dbs_info_s {
70 unsigned int freq_lo_jiffies; 70 unsigned int freq_lo_jiffies;
71 unsigned int freq_hi_jiffies; 71 unsigned int freq_hi_jiffies;
72 int cpu; 72 int cpu;
73 unsigned int enable:1, 73 unsigned int sample_type:1;
74 sample_type:1; 74 /*
75 * percpu mutex that serializes governor limit change with
76 * do_dbs_timer invocation. We do not want do_dbs_timer to run
77 * when user is changing the governor or limits.
78 */
79 struct mutex timer_mutex;
75}; 80};
76static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); 81static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
77 82
78static unsigned int dbs_enable; /* number of CPUs using this policy */ 83static unsigned int dbs_enable; /* number of CPUs using this policy */
79 84
80/* 85/*
81 * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug 86 * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on
82 * lock and dbs_mutex. cpu_hotplug lock should always be held before 87 * different CPUs. It protects dbs_enable in governor start/stop.
83 * dbs_mutex. If any function that can potentially take cpu_hotplug lock
84 * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then
85 * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock
86 * is recursive for the same process. -Venki
87 * DEADLOCK ALERT! (2) : do_dbs_timer() must not take the dbs_mutex, because it
88 * would deadlock with cancel_delayed_work_sync(), which is needed for proper
89 * raceless workqueue teardown.
90 */ 88 */
91static DEFINE_MUTEX(dbs_mutex); 89static DEFINE_MUTEX(dbs_mutex);
92 90
@@ -192,13 +190,18 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
192 return freq_hi; 190 return freq_hi;
193} 191}
194 192
193static void ondemand_powersave_bias_init_cpu(int cpu)
194{
195 struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
196 dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
197 dbs_info->freq_lo = 0;
198}
199
195static void ondemand_powersave_bias_init(void) 200static void ondemand_powersave_bias_init(void)
196{ 201{
197 int i; 202 int i;
198 for_each_online_cpu(i) { 203 for_each_online_cpu(i) {
199 struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i); 204 ondemand_powersave_bias_init_cpu(i);
200 dbs_info->freq_table = cpufreq_frequency_get_table(i);
201 dbs_info->freq_lo = 0;
202 } 205 }
203} 206}
204 207
@@ -240,12 +243,10 @@ static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
240 unsigned int input; 243 unsigned int input;
241 int ret; 244 int ret;
242 ret = sscanf(buf, "%u", &input); 245 ret = sscanf(buf, "%u", &input);
246 if (ret != 1)
247 return -EINVAL;
243 248
244 mutex_lock(&dbs_mutex); 249 mutex_lock(&dbs_mutex);
245 if (ret != 1) {
246 mutex_unlock(&dbs_mutex);
247 return -EINVAL;
248 }
249 dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); 250 dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
250 mutex_unlock(&dbs_mutex); 251 mutex_unlock(&dbs_mutex);
251 252
@@ -259,13 +260,12 @@ static ssize_t store_up_threshold(struct cpufreq_policy *unused,
259 int ret; 260 int ret;
260 ret = sscanf(buf, "%u", &input); 261 ret = sscanf(buf, "%u", &input);
261 262
262 mutex_lock(&dbs_mutex);
263 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || 263 if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
264 input < MIN_FREQUENCY_UP_THRESHOLD) { 264 input < MIN_FREQUENCY_UP_THRESHOLD) {
265 mutex_unlock(&dbs_mutex);
266 return -EINVAL; 265 return -EINVAL;
267 } 266 }
268 267
268 mutex_lock(&dbs_mutex);
269 dbs_tuners_ins.up_threshold = input; 269 dbs_tuners_ins.up_threshold = input;
270 mutex_unlock(&dbs_mutex); 270 mutex_unlock(&dbs_mutex);
271 271
@@ -363,9 +363,6 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
363 struct cpufreq_policy *policy; 363 struct cpufreq_policy *policy;
364 unsigned int j; 364 unsigned int j;
365 365
366 if (!this_dbs_info->enable)
367 return;
368
369 this_dbs_info->freq_lo = 0; 366 this_dbs_info->freq_lo = 0;
370 policy = this_dbs_info->cur_policy; 367 policy = this_dbs_info->cur_policy;
371 368
@@ -493,14 +490,7 @@ static void do_dbs_timer(struct work_struct *work)
493 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 490 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
494 491
495 delay -= jiffies % delay; 492 delay -= jiffies % delay;
496 493 mutex_lock(&dbs_info->timer_mutex);
497 if (lock_policy_rwsem_write(cpu) < 0)
498 return;
499
500 if (!dbs_info->enable) {
501 unlock_policy_rwsem_write(cpu);
502 return;
503 }
504 494
505 /* Common NORMAL_SAMPLE setup */ 495 /* Common NORMAL_SAMPLE setup */
506 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 496 dbs_info->sample_type = DBS_NORMAL_SAMPLE;
@@ -517,7 +507,7 @@ static void do_dbs_timer(struct work_struct *work)
517 dbs_info->freq_lo, CPUFREQ_RELATION_H); 507 dbs_info->freq_lo, CPUFREQ_RELATION_H);
518 } 508 }
519 queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); 509 queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
520 unlock_policy_rwsem_write(cpu); 510 mutex_unlock(&dbs_info->timer_mutex);
521} 511}
522 512
523static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) 513static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
@@ -526,8 +516,6 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
526 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); 516 int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
527 delay -= jiffies % delay; 517 delay -= jiffies % delay;
528 518
529 dbs_info->enable = 1;
530 ondemand_powersave_bias_init();
531 dbs_info->sample_type = DBS_NORMAL_SAMPLE; 519 dbs_info->sample_type = DBS_NORMAL_SAMPLE;
532 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); 520 INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
533 queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work, 521 queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work,
@@ -536,7 +524,6 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
536 524
537static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 525static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
538{ 526{
539 dbs_info->enable = 0;
540 cancel_delayed_work_sync(&dbs_info->work); 527 cancel_delayed_work_sync(&dbs_info->work);
541} 528}
542 529
@@ -555,19 +542,15 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
555 if ((!cpu_online(cpu)) || (!policy->cur)) 542 if ((!cpu_online(cpu)) || (!policy->cur))
556 return -EINVAL; 543 return -EINVAL;
557 544
558 if (this_dbs_info->enable) /* Already enabled */
559 break;
560
561 mutex_lock(&dbs_mutex); 545 mutex_lock(&dbs_mutex);
562 dbs_enable++;
563 546
564 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); 547 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group);
565 if (rc) { 548 if (rc) {
566 dbs_enable--;
567 mutex_unlock(&dbs_mutex); 549 mutex_unlock(&dbs_mutex);
568 return rc; 550 return rc;
569 } 551 }
570 552
553 dbs_enable++;
571 for_each_cpu(j, policy->cpus) { 554 for_each_cpu(j, policy->cpus) {
572 struct cpu_dbs_info_s *j_dbs_info; 555 struct cpu_dbs_info_s *j_dbs_info;
573 j_dbs_info = &per_cpu(cpu_dbs_info, j); 556 j_dbs_info = &per_cpu(cpu_dbs_info, j);
@@ -581,6 +564,8 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
581 } 564 }
582 } 565 }
583 this_dbs_info->cpu = cpu; 566 this_dbs_info->cpu = cpu;
567 ondemand_powersave_bias_init_cpu(cpu);
568 mutex_init(&this_dbs_info->timer_mutex);
584 /* 569 /*
585 * Start the timerschedule work, when this governor 570 * Start the timerschedule work, when this governor
586 * is used for first time 571 * is used for first time
@@ -598,29 +583,31 @@ static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
598 max(min_sampling_rate, 583 max(min_sampling_rate,
599 latency * LATENCY_MULTIPLIER); 584 latency * LATENCY_MULTIPLIER);
600 } 585 }
601 dbs_timer_init(this_dbs_info);
602
603 mutex_unlock(&dbs_mutex); 586 mutex_unlock(&dbs_mutex);
587
588 dbs_timer_init(this_dbs_info);
604 break; 589 break;
605 590
606 case CPUFREQ_GOV_STOP: 591 case CPUFREQ_GOV_STOP:
607 mutex_lock(&dbs_mutex);
608 dbs_timer_exit(this_dbs_info); 592 dbs_timer_exit(this_dbs_info);
593
594 mutex_lock(&dbs_mutex);
609 sysfs_remove_group(&policy->kobj, &dbs_attr_group); 595 sysfs_remove_group(&policy->kobj, &dbs_attr_group);
596 mutex_destroy(&this_dbs_info->timer_mutex);
610 dbs_enable--; 597 dbs_enable--;
611 mutex_unlock(&dbs_mutex); 598 mutex_unlock(&dbs_mutex);
612 599
613 break; 600 break;
614 601
615 case CPUFREQ_GOV_LIMITS: 602 case CPUFREQ_GOV_LIMITS:
616 mutex_lock(&dbs_mutex); 603 mutex_lock(&this_dbs_info->timer_mutex);
617 if (policy->max < this_dbs_info->cur_policy->cur) 604 if (policy->max < this_dbs_info->cur_policy->cur)
618 __cpufreq_driver_target(this_dbs_info->cur_policy, 605 __cpufreq_driver_target(this_dbs_info->cur_policy,
619 policy->max, CPUFREQ_RELATION_H); 606 policy->max, CPUFREQ_RELATION_H);
620 else if (policy->min > this_dbs_info->cur_policy->cur) 607 else if (policy->min > this_dbs_info->cur_policy->cur)
621 __cpufreq_driver_target(this_dbs_info->cur_policy, 608 __cpufreq_driver_target(this_dbs_info->cur_policy,
622 policy->min, CPUFREQ_RELATION_L); 609 policy->min, CPUFREQ_RELATION_L);
623 mutex_unlock(&dbs_mutex); 610 mutex_unlock(&this_dbs_info->timer_mutex);
624 break; 611 break;
625 } 612 }
626 return 0; 613 return 0;