aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/cpufreq
diff options
context:
space:
mode:
authorViresh Kumar <viresh.kumar@linaro.org>2016-02-10 00:30:25 -0500
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2016-03-09 08:40:59 -0500
commitc54df0718423ea2941151d8516eb76ca6a32a4b4 (patch)
tree815146ab0eff04a46129e523a8255ba3ca07ab83 /drivers/cpufreq
parent68e80dae09033d778b98dc88e5bfe8fdade188e5 (diff)
cpufreq: governor: Create and traverse list of policy_dbs to avoid deadlock
The dbs_data_mutex lock is currently used in two places. First, cpufreq_governor_dbs() uses it to guarantee mutual exclusion between invocations of governor operations from the core. Second, it is used by ondemand governor's update_sampling_rate() to ensure the stability of data structures walked by it. The second usage is quite problematic, because update_sampling_rate() is called from a governor sysfs attribute's ->store callback and that leads to a deadlock scenario involving cpufreq_governor_exit() which runs under dbs_data_mutex. Thus it is better to rework the code so update_sampling_rate() doesn't need to acquire dbs_data_mutex. To that end, rework update_sampling_rate() to walk a list of policy_dbs objects supported by the dbs_data one it has been called for (instead of walking cpu_dbs_info object for all CPUs). The list manipulation is protected with dbs_data->mutex which also is held around the execution of update_sampling_rate(), it is not necessary to hold dbs_data_mutex in that function any more. Reported-by: Juri Lelli <juri.lelli@arm.com> Reported-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r--drivers/cpufreq/cpufreq_governor.c22
-rw-r--r--drivers/cpufreq/cpufreq_governor.h7
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c89
3 files changed, 54 insertions, 64 deletions
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 00cb468d3b6a..2f35270fbd43 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -385,9 +385,14 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy)
385 ret = -EINVAL; 385 ret = -EINVAL;
386 goto free_policy_dbs_info; 386 goto free_policy_dbs_info;
387 } 387 }
388 dbs_data->usage_count++;
389 policy_dbs->dbs_data = dbs_data; 388 policy_dbs->dbs_data = dbs_data;
390 policy->governor_data = policy_dbs; 389 policy->governor_data = policy_dbs;
390
391 mutex_lock(&dbs_data->mutex);
392 dbs_data->usage_count++;
393 list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
394 mutex_unlock(&dbs_data->mutex);
395
391 return 0; 396 return 0;
392 } 397 }
393 398
@@ -397,7 +402,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy)
397 goto free_policy_dbs_info; 402 goto free_policy_dbs_info;
398 } 403 }
399 404
400 dbs_data->usage_count = 1; 405 INIT_LIST_HEAD(&dbs_data->policy_dbs_list);
401 mutex_init(&dbs_data->mutex); 406 mutex_init(&dbs_data->mutex);
402 407
403 ret = gov->init(dbs_data, !policy->governor->initialized); 408 ret = gov->init(dbs_data, !policy->governor->initialized);
@@ -418,9 +423,12 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy)
418 if (!have_governor_per_policy()) 423 if (!have_governor_per_policy())
419 gov->gdbs_data = dbs_data; 424 gov->gdbs_data = dbs_data;
420 425
421 policy_dbs->dbs_data = dbs_data;
422 policy->governor_data = policy_dbs; 426 policy->governor_data = policy_dbs;
423 427
428 policy_dbs->dbs_data = dbs_data;
429 dbs_data->usage_count = 1;
430 list_add(&policy_dbs->list, &dbs_data->policy_dbs_list);
431
424 gov->kobj_type.sysfs_ops = &governor_sysfs_ops; 432 gov->kobj_type.sysfs_ops = &governor_sysfs_ops;
425 ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, 433 ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type,
426 get_governor_parent_kobj(policy), 434 get_governor_parent_kobj(policy),
@@ -448,12 +456,18 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy)
448 struct dbs_governor *gov = dbs_governor_of(policy); 456 struct dbs_governor *gov = dbs_governor_of(policy);
449 struct policy_dbs_info *policy_dbs = policy->governor_data; 457 struct policy_dbs_info *policy_dbs = policy->governor_data;
450 struct dbs_data *dbs_data = policy_dbs->dbs_data; 458 struct dbs_data *dbs_data = policy_dbs->dbs_data;
459 int count;
451 460
452 /* State should be equivalent to INIT */ 461 /* State should be equivalent to INIT */
453 if (policy_dbs->policy) 462 if (policy_dbs->policy)
454 return -EBUSY; 463 return -EBUSY;
455 464
456 if (!--dbs_data->usage_count) { 465 mutex_lock(&dbs_data->mutex);
466 list_del(&policy_dbs->list);
467 count = --dbs_data->usage_count;
468 mutex_unlock(&dbs_data->mutex);
469
470 if (!count) {
457 kobject_put(&dbs_data->kobj); 471 kobject_put(&dbs_data->kobj);
458 472
459 policy->governor_data = NULL; 473 policy->governor_data = NULL;
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 0eb66a6c9503..8bf4775ce03c 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -73,7 +73,11 @@ struct dbs_data {
73 unsigned int up_threshold; 73 unsigned int up_threshold;
74 74
75 struct kobject kobj; 75 struct kobject kobj;
76 /* Protect concurrent updates to governor tunables from sysfs */ 76 struct list_head policy_dbs_list;
77 /*
78 * Protect concurrent updates to governor tunables from sysfs,
79 * policy_dbs_list and usage_count.
80 */
77 struct mutex mutex; 81 struct mutex mutex;
78}; 82};
79 83
@@ -125,6 +129,7 @@ struct policy_dbs_info {
125 struct work_struct work; 129 struct work_struct work;
126 /* dbs_data may be shared between multiple policy objects */ 130 /* dbs_data may be shared between multiple policy objects */
127 struct dbs_data *dbs_data; 131 struct dbs_data *dbs_data;
132 struct list_head list;
128}; 133};
129 134
130static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, 135static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs,
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index e36792f60348..38301c6b31c7 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -226,84 +226,55 @@ static struct dbs_governor od_dbs_gov;
226 * @new_rate: new sampling rate 226 * @new_rate: new sampling rate
227 * 227 *
228 * If new rate is smaller than the old, simply updating 228 * If new rate is smaller than the old, simply updating
229 * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the 229 * dbs.sampling_rate might not be appropriate. For example, if the
230 * original sampling_rate was 1 second and the requested new sampling rate is 10 230 * original sampling_rate was 1 second and the requested new sampling rate is 10
231 * ms because the user needs immediate reaction from ondemand governor, but not 231 * ms because the user needs immediate reaction from ondemand governor, but not
232 * sure if higher frequency will be required or not, then, the governor may 232 * sure if higher frequency will be required or not, then, the governor may
233 * change the sampling rate too late; up to 1 second later. Thus, if we are 233 * change the sampling rate too late; up to 1 second later. Thus, if we are
234 * reducing the sampling rate, we need to make the new value effective 234 * reducing the sampling rate, we need to make the new value effective
235 * immediately. 235 * immediately.
236 *
237 * On the other hand, if new rate is larger than the old, then we may evaluate
238 * the load too soon, and it might we worth updating sample_delay_ns then as
239 * well.
240 *
241 * This must be called with dbs_data->mutex held, otherwise traversing
242 * policy_dbs_list isn't safe.
236 */ 243 */
237static void update_sampling_rate(struct dbs_data *dbs_data, 244static void update_sampling_rate(struct dbs_data *dbs_data,
238 unsigned int new_rate) 245 unsigned int new_rate)
239{ 246{
240 struct cpumask cpumask; 247 struct policy_dbs_info *policy_dbs;
241 int cpu;
242 248
243 dbs_data->sampling_rate = new_rate = max(new_rate, 249 dbs_data->sampling_rate = new_rate = max(new_rate,
244 dbs_data->min_sampling_rate); 250 dbs_data->min_sampling_rate);
245 251
246 /* 252 /*
247 * Lock governor so that governor start/stop can't execute in parallel. 253 * We are operating under dbs_data->mutex and so the list and its
254 * entries can't be freed concurrently.
248 */ 255 */
249 mutex_lock(&dbs_data_mutex); 256 list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) {
250 257 mutex_lock(&policy_dbs->timer_mutex);
251 cpumask_copy(&cpumask, cpu_online_mask);
252
253 for_each_cpu(cpu, &cpumask) {
254 struct cpufreq_policy *policy;
255 struct od_cpu_dbs_info_s *dbs_info;
256 struct cpu_dbs_info *cdbs;
257 struct policy_dbs_info *policy_dbs;
258
259 dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
260 cdbs = &dbs_info->cdbs;
261 policy_dbs = cdbs->policy_dbs;
262
263 /* 258 /*
264 * A valid policy_dbs and policy_dbs->policy means governor 259 * On 32-bit architectures this may race with the
265 * hasn't stopped or exited yet. 260 * sample_delay_ns read in dbs_update_util_handler(), but that
261 * really doesn't matter. If the read returns a value that's
262 * too big, the sample will be skipped, but the next invocation
263 * of dbs_update_util_handler() (when the update has been
264 * completed) will take a sample. If the returned value is too
265 * small, the sample will be taken immediately, but that isn't a
266 * problem, as we want the new rate to take effect immediately
267 * anyway.
268 *
269 * If this runs in parallel with dbs_work_handler(), we may end
270 * up overwriting the sample_delay_ns value that it has just
271 * written, but the difference should not be too big and it will
272 * be corrected next time a sample is taken, so it shouldn't be
273 * significant.
266 */ 274 */
267 if (!policy_dbs || !policy_dbs->policy) 275 gov_update_sample_delay(policy_dbs, new_rate);
268 continue; 276 mutex_unlock(&policy_dbs->timer_mutex);
269
270 policy = policy_dbs->policy;
271
272 /* clear all CPUs of this policy */
273 cpumask_andnot(&cpumask, &cpumask, policy->cpus);
274
275 /*
276 * Update sampling rate for CPUs whose policy is governed by
277 * dbs_data. In case of governor_per_policy, only a single
278 * policy will be governed by dbs_data, otherwise there can be
279 * multiple policies that are governed by the same dbs_data.
280 */
281 if (dbs_data == policy_dbs->dbs_data) {
282 mutex_lock(&policy_dbs->timer_mutex);
283 /*
284 * On 32-bit architectures this may race with the
285 * sample_delay_ns read in dbs_update_util_handler(),
286 * but that really doesn't matter. If the read returns
287 * a value that's too big, the sample will be skipped,
288 * but the next invocation of dbs_update_util_handler()
289 * (when the update has been completed) will take a
290 * sample. If the returned value is too small, the
291 * sample will be taken immediately, but that isn't a
292 * problem, as we want the new rate to take effect
293 * immediately anyway.
294 *
295 * If this runs in parallel with dbs_work_handler(), we
296 * may end up overwriting the sample_delay_ns value that
297 * it has just written, but the difference should not be
298 * too big and it will be corrected next time a sample
299 * is taken, so it shouldn't be significant.
300 */
301 gov_update_sample_delay(policy_dbs, new_rate);
302 mutex_unlock(&policy_dbs->timer_mutex);
303 }
304 } 277 }
305
306 mutex_unlock(&dbs_data_mutex);
307} 278}
308 279
309static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, 280static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,