diff options
author | Viresh Kumar <viresh.kumar@linaro.org> | 2016-02-10 00:30:25 -0500 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2016-03-09 08:40:59 -0500 |
commit | c54df0718423ea2941151d8516eb76ca6a32a4b4 (patch) | |
tree | 815146ab0eff04a46129e523a8255ba3ca07ab83 /drivers/cpufreq | |
parent | 68e80dae09033d778b98dc88e5bfe8fdade188e5 (diff) |
cpufreq: governor: Create and traverse list of policy_dbs to avoid deadlock
The dbs_data_mutex lock is currently used in two places. First,
cpufreq_governor_dbs() uses it to guarantee mutual exclusion between
invocations of governor operations from the core. Second, it is used by
ondemand governor's update_sampling_rate() to ensure the stability of
data structures walked by it.
The second usage is quite problematic, because update_sampling_rate() is
called from a governor sysfs attribute's ->store callback and that leads
to a deadlock scenario involving cpufreq_governor_exit() which runs
under dbs_data_mutex. Thus it is better to rework the code so
update_sampling_rate() doesn't need to acquire dbs_data_mutex.
To that end, rework update_sampling_rate() to walk a list of policy_dbs
objects supported by the dbs_data one it has been called for (instead of
walking cpu_dbs_info object for all CPUs). The list manipulation is
protected with dbs_data->mutex which also is held around the execution
of update_sampling_rate(), it is not necessary to hold dbs_data_mutex in
that function any more.
Reported-by: Juri Lelli <juri.lelli@arm.com>
Reported-by: Shilpasri G Bhat <shilpa.bhat@linux.vnet.ibm.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Subject & changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.c | 22 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.h | 7 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_ondemand.c | 89 |
3 files changed, 54 insertions, 64 deletions
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 00cb468d3b6a..2f35270fbd43 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c | |||
@@ -385,9 +385,14 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) | |||
385 | ret = -EINVAL; | 385 | ret = -EINVAL; |
386 | goto free_policy_dbs_info; | 386 | goto free_policy_dbs_info; |
387 | } | 387 | } |
388 | dbs_data->usage_count++; | ||
389 | policy_dbs->dbs_data = dbs_data; | 388 | policy_dbs->dbs_data = dbs_data; |
390 | policy->governor_data = policy_dbs; | 389 | policy->governor_data = policy_dbs; |
390 | |||
391 | mutex_lock(&dbs_data->mutex); | ||
392 | dbs_data->usage_count++; | ||
393 | list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); | ||
394 | mutex_unlock(&dbs_data->mutex); | ||
395 | |||
391 | return 0; | 396 | return 0; |
392 | } | 397 | } |
393 | 398 | ||
@@ -397,7 +402,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) | |||
397 | goto free_policy_dbs_info; | 402 | goto free_policy_dbs_info; |
398 | } | 403 | } |
399 | 404 | ||
400 | dbs_data->usage_count = 1; | 405 | INIT_LIST_HEAD(&dbs_data->policy_dbs_list); |
401 | mutex_init(&dbs_data->mutex); | 406 | mutex_init(&dbs_data->mutex); |
402 | 407 | ||
403 | ret = gov->init(dbs_data, !policy->governor->initialized); | 408 | ret = gov->init(dbs_data, !policy->governor->initialized); |
@@ -418,9 +423,12 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) | |||
418 | if (!have_governor_per_policy()) | 423 | if (!have_governor_per_policy()) |
419 | gov->gdbs_data = dbs_data; | 424 | gov->gdbs_data = dbs_data; |
420 | 425 | ||
421 | policy_dbs->dbs_data = dbs_data; | ||
422 | policy->governor_data = policy_dbs; | 426 | policy->governor_data = policy_dbs; |
423 | 427 | ||
428 | policy_dbs->dbs_data = dbs_data; | ||
429 | dbs_data->usage_count = 1; | ||
430 | list_add(&policy_dbs->list, &dbs_data->policy_dbs_list); | ||
431 | |||
424 | gov->kobj_type.sysfs_ops = &governor_sysfs_ops; | 432 | gov->kobj_type.sysfs_ops = &governor_sysfs_ops; |
425 | ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, | 433 | ret = kobject_init_and_add(&dbs_data->kobj, &gov->kobj_type, |
426 | get_governor_parent_kobj(policy), | 434 | get_governor_parent_kobj(policy), |
@@ -448,12 +456,18 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) | |||
448 | struct dbs_governor *gov = dbs_governor_of(policy); | 456 | struct dbs_governor *gov = dbs_governor_of(policy); |
449 | struct policy_dbs_info *policy_dbs = policy->governor_data; | 457 | struct policy_dbs_info *policy_dbs = policy->governor_data; |
450 | struct dbs_data *dbs_data = policy_dbs->dbs_data; | 458 | struct dbs_data *dbs_data = policy_dbs->dbs_data; |
459 | int count; | ||
451 | 460 | ||
452 | /* State should be equivalent to INIT */ | 461 | /* State should be equivalent to INIT */ |
453 | if (policy_dbs->policy) | 462 | if (policy_dbs->policy) |
454 | return -EBUSY; | 463 | return -EBUSY; |
455 | 464 | ||
456 | if (!--dbs_data->usage_count) { | 465 | mutex_lock(&dbs_data->mutex); |
466 | list_del(&policy_dbs->list); | ||
467 | count = --dbs_data->usage_count; | ||
468 | mutex_unlock(&dbs_data->mutex); | ||
469 | |||
470 | if (!count) { | ||
457 | kobject_put(&dbs_data->kobj); | 471 | kobject_put(&dbs_data->kobj); |
458 | 472 | ||
459 | policy->governor_data = NULL; | 473 | policy->governor_data = NULL; |
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 0eb66a6c9503..8bf4775ce03c 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h | |||
@@ -73,7 +73,11 @@ struct dbs_data { | |||
73 | unsigned int up_threshold; | 73 | unsigned int up_threshold; |
74 | 74 | ||
75 | struct kobject kobj; | 75 | struct kobject kobj; |
76 | /* Protect concurrent updates to governor tunables from sysfs */ | 76 | struct list_head policy_dbs_list; |
77 | /* | ||
78 | * Protect concurrent updates to governor tunables from sysfs, | ||
79 | * policy_dbs_list and usage_count. | ||
80 | */ | ||
77 | struct mutex mutex; | 81 | struct mutex mutex; |
78 | }; | 82 | }; |
79 | 83 | ||
@@ -125,6 +129,7 @@ struct policy_dbs_info { | |||
125 | struct work_struct work; | 129 | struct work_struct work; |
126 | /* dbs_data may be shared between multiple policy objects */ | 130 | /* dbs_data may be shared between multiple policy objects */ |
127 | struct dbs_data *dbs_data; | 131 | struct dbs_data *dbs_data; |
132 | struct list_head list; | ||
128 | }; | 133 | }; |
129 | 134 | ||
130 | static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, | 135 | static inline void gov_update_sample_delay(struct policy_dbs_info *policy_dbs, |
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index e36792f60348..38301c6b31c7 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c | |||
@@ -226,84 +226,55 @@ static struct dbs_governor od_dbs_gov; | |||
226 | * @new_rate: new sampling rate | 226 | * @new_rate: new sampling rate |
227 | * | 227 | * |
228 | * If new rate is smaller than the old, simply updating | 228 | * If new rate is smaller than the old, simply updating |
229 | * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the | 229 | * dbs.sampling_rate might not be appropriate. For example, if the |
230 | * original sampling_rate was 1 second and the requested new sampling rate is 10 | 230 | * original sampling_rate was 1 second and the requested new sampling rate is 10 |
231 | * ms because the user needs immediate reaction from ondemand governor, but not | 231 | * ms because the user needs immediate reaction from ondemand governor, but not |
232 | * sure if higher frequency will be required or not, then, the governor may | 232 | * sure if higher frequency will be required or not, then, the governor may |
233 | * change the sampling rate too late; up to 1 second later. Thus, if we are | 233 | * change the sampling rate too late; up to 1 second later. Thus, if we are |
234 | * reducing the sampling rate, we need to make the new value effective | 234 | * reducing the sampling rate, we need to make the new value effective |
235 | * immediately. | 235 | * immediately. |
236 | * | ||
237 | * On the other hand, if new rate is larger than the old, then we may evaluate | ||
238 | * the load too soon, and it might we worth updating sample_delay_ns then as | ||
239 | * well. | ||
240 | * | ||
241 | * This must be called with dbs_data->mutex held, otherwise traversing | ||
242 | * policy_dbs_list isn't safe. | ||
236 | */ | 243 | */ |
237 | static void update_sampling_rate(struct dbs_data *dbs_data, | 244 | static void update_sampling_rate(struct dbs_data *dbs_data, |
238 | unsigned int new_rate) | 245 | unsigned int new_rate) |
239 | { | 246 | { |
240 | struct cpumask cpumask; | 247 | struct policy_dbs_info *policy_dbs; |
241 | int cpu; | ||
242 | 248 | ||
243 | dbs_data->sampling_rate = new_rate = max(new_rate, | 249 | dbs_data->sampling_rate = new_rate = max(new_rate, |
244 | dbs_data->min_sampling_rate); | 250 | dbs_data->min_sampling_rate); |
245 | 251 | ||
246 | /* | 252 | /* |
247 | * Lock governor so that governor start/stop can't execute in parallel. | 253 | * We are operating under dbs_data->mutex and so the list and its |
254 | * entries can't be freed concurrently. | ||
248 | */ | 255 | */ |
249 | mutex_lock(&dbs_data_mutex); | 256 | list_for_each_entry(policy_dbs, &dbs_data->policy_dbs_list, list) { |
250 | 257 | mutex_lock(&policy_dbs->timer_mutex); | |
251 | cpumask_copy(&cpumask, cpu_online_mask); | ||
252 | |||
253 | for_each_cpu(cpu, &cpumask) { | ||
254 | struct cpufreq_policy *policy; | ||
255 | struct od_cpu_dbs_info_s *dbs_info; | ||
256 | struct cpu_dbs_info *cdbs; | ||
257 | struct policy_dbs_info *policy_dbs; | ||
258 | |||
259 | dbs_info = &per_cpu(od_cpu_dbs_info, cpu); | ||
260 | cdbs = &dbs_info->cdbs; | ||
261 | policy_dbs = cdbs->policy_dbs; | ||
262 | |||
263 | /* | 258 | /* |
264 | * A valid policy_dbs and policy_dbs->policy means governor | 259 | * On 32-bit architectures this may race with the |
265 | * hasn't stopped or exited yet. | 260 | * sample_delay_ns read in dbs_update_util_handler(), but that |
261 | * really doesn't matter. If the read returns a value that's | ||
262 | * too big, the sample will be skipped, but the next invocation | ||
263 | * of dbs_update_util_handler() (when the update has been | ||
264 | * completed) will take a sample. If the returned value is too | ||
265 | * small, the sample will be taken immediately, but that isn't a | ||
266 | * problem, as we want the new rate to take effect immediately | ||
267 | * anyway. | ||
268 | * | ||
269 | * If this runs in parallel with dbs_work_handler(), we may end | ||
270 | * up overwriting the sample_delay_ns value that it has just | ||
271 | * written, but the difference should not be too big and it will | ||
272 | * be corrected next time a sample is taken, so it shouldn't be | ||
273 | * significant. | ||
266 | */ | 274 | */ |
267 | if (!policy_dbs || !policy_dbs->policy) | 275 | gov_update_sample_delay(policy_dbs, new_rate); |
268 | continue; | 276 | mutex_unlock(&policy_dbs->timer_mutex); |
269 | |||
270 | policy = policy_dbs->policy; | ||
271 | |||
272 | /* clear all CPUs of this policy */ | ||
273 | cpumask_andnot(&cpumask, &cpumask, policy->cpus); | ||
274 | |||
275 | /* | ||
276 | * Update sampling rate for CPUs whose policy is governed by | ||
277 | * dbs_data. In case of governor_per_policy, only a single | ||
278 | * policy will be governed by dbs_data, otherwise there can be | ||
279 | * multiple policies that are governed by the same dbs_data. | ||
280 | */ | ||
281 | if (dbs_data == policy_dbs->dbs_data) { | ||
282 | mutex_lock(&policy_dbs->timer_mutex); | ||
283 | /* | ||
284 | * On 32-bit architectures this may race with the | ||
285 | * sample_delay_ns read in dbs_update_util_handler(), | ||
286 | * but that really doesn't matter. If the read returns | ||
287 | * a value that's too big, the sample will be skipped, | ||
288 | * but the next invocation of dbs_update_util_handler() | ||
289 | * (when the update has been completed) will take a | ||
290 | * sample. If the returned value is too small, the | ||
291 | * sample will be taken immediately, but that isn't a | ||
292 | * problem, as we want the new rate to take effect | ||
293 | * immediately anyway. | ||
294 | * | ||
295 | * If this runs in parallel with dbs_work_handler(), we | ||
296 | * may end up overwriting the sample_delay_ns value that | ||
297 | * it has just written, but the difference should not be | ||
298 | * too big and it will be corrected next time a sample | ||
299 | * is taken, so it shouldn't be significant. | ||
300 | */ | ||
301 | gov_update_sample_delay(policy_dbs, new_rate); | ||
302 | mutex_unlock(&policy_dbs->timer_mutex); | ||
303 | } | ||
304 | } | 277 | } |
305 | |||
306 | mutex_unlock(&dbs_data_mutex); | ||
307 | } | 278 | } |
308 | 279 | ||
309 | static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, | 280 | static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf, |