diff options
author | Russell King <rmk+kernel@arm.linux.org.uk> | 2015-08-12 05:52:16 -0400 |
---|---|---|
committer | Eduardo Valentin <edubezval@gmail.com> | 2015-08-14 21:21:41 -0400 |
commit | 02373d7c69b4270bbab930f8a81b0721be794347 (patch) | |
tree | 4cfe2d1f1621c42bb2ec89eee199fdd1326681eb /drivers/thermal | |
parent | cf736ea6f902c26e03895dc7f5ccbc55cdc68e6e (diff) |
thermal: cpu_cooling: fix lockdep problems in cpu_cooling
A recent change to the cpu_cooling code introduced a AB-BA deadlock
scenario between the cpufreq_policy_notifier_list rwsem and the
cooling_cpufreq_lock. This is caused by cooling_cpufreq_lock being held
before the registration/removal of the notifier block (an operation
which takes the rwsem), and the notifier code itself which takes the
locks in the reverse order:
======================================================
[ INFO: possible circular locking dependency detected ]
3.18.0+ #1453 Not tainted
-------------------------------------------------------
rc.local/770 is trying to acquire lock:
(cooling_cpufreq_lock){+.+.+.}, at: [<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
but task is already holding lock:
((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 ((cpufreq_policy_notifier_list).rwsem){++++.+}:
[<c06bc3b0>] down_write+0x44/0x9c
[<c0043444>] blocking_notifier_chain_register+0x28/0xd8
[<c04ad610>] cpufreq_register_notifier+0x68/0x90
[<c04abe4c>] __cpufreq_cooling_register.part.1+0x120/0x180
[<c04abf44>] __cpufreq_cooling_register+0x98/0xa4
[<c04abf8c>] cpufreq_cooling_register+0x18/0x1c
[<bf0046f8>] imx_thermal_probe+0x1c0/0x470 [imx_thermal]
[<c037cef8>] platform_drv_probe+0x50/0xac
[<c037b710>] driver_probe_device+0x114/0x234
[<c037b8cc>] __driver_attach+0x9c/0xa0
[<c0379d68>] bus_for_each_dev+0x5c/0x90
[<c037b204>] driver_attach+0x24/0x28
[<c037ae7c>] bus_add_driver+0xe0/0x1d8
[<c037c0cc>] driver_register+0x80/0xfc
[<c037cd80>] __platform_driver_register+0x50/0x64
[<bf007018>] 0xbf007018
[<c0008a5c>] do_one_initcall+0x88/0x1d8
[<c0095da4>] load_module+0x1768/0x1ef8
[<c0096614>] SyS_init_module+0xe0/0xf4
[<c000ec00>] ret_fast_syscall+0x0/0x48
-> #0 (cooling_cpufreq_lock){+.+.+.}:
[<c00619f8>] lock_acquire+0xb0/0x124
[<c06ba3b4>] mutex_lock_nested+0x5c/0x3d8
[<c04abfc4>] cpufreq_thermal_notifier+0x34/0xfc
[<c0042bf4>] notifier_call_chain+0x4c/0x8c
[<c0042f20>] __blocking_notifier_call_chain+0x50/0x68
[<c0042f58>] blocking_notifier_call_chain+0x20/0x28
[<c04ae62c>] cpufreq_set_policy+0x7c/0x1d0
[<c04af3cc>] store_scaling_governor+0x74/0x9c
[<c04ad418>] store+0x90/0xc0
[<c0175384>] sysfs_kf_write+0x54/0x58
[<c01746b4>] kernfs_fop_write+0xdc/0x190
[<c010dcc0>] vfs_write+0xac/0x1b4
[<c010dfec>] SyS_write+0x44/0x90
[<c000ec00>] ret_fast_syscall+0x0/0x48
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
lock((cpufreq_policy_notifier_list).rwsem);
lock(cooling_cpufreq_lock);
*** DEADLOCK ***
7 locks held by rc.local/770:
#0: (sb_writers#6){.+.+.+}, at: [<c010dda0>] vfs_write+0x18c/0x1b4
#1: (&of->mutex){+.+.+.}, at: [<c0174678>] kernfs_fop_write+0xa0/0x190
#2: (s_active#52){.+.+.+}, at: [<c0174680>] kernfs_fop_write+0xa8/0x190
#3: (cpu_hotplug.lock){++++++}, at: [<c0026a60>] get_online_cpus+0x34/0x90
#4: (cpufreq_rwsem){.+.+.+}, at: [<c04ad3e0>] store+0x58/0xc0
#5: (&policy->rwsem){+.+.+.}, at: [<c04ad3f8>] store+0x70/0xc0
#6: ((cpufreq_policy_notifier_list).rwsem){++++.+}, at: [<c0042f04>] __blocking_notifier_call_chain+0x34/0x68
stack backtrace:
CPU: 0 PID: 770 Comm: rc.local Not tainted 3.18.0+ #1453
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Backtrace:
[<c00121c8>] (dump_backtrace) from [<c0012360>] (show_stack+0x18/0x1c)
r6:c0b85a80 r5:c0b75630 r4:00000000 r3:00000000
[<c0012348>] (show_stack) from [<c06b6c48>] (dump_stack+0x7c/0x98)
[<c06b6bcc>] (dump_stack) from [<c06b42a4>] (print_circular_bug+0x28c/0x2d8)
r4:c0b85a80 r3:d0071d40
[<c06b4018>] (print_circular_bug) from [<c00613b0>] (__lock_acquire+0x1acc/0x1bb0)
r10:c0b50660 r8:c09e6d80 r7:d0071d40 r6:c11d0f0c r5:00000007 r4:d0072240
[<c005f8e4>] (__lock_acquire) from [<c00619f8>] (lock_acquire+0xb0/0x124)
r10:00000000 r9:c04abfc4 r8:00000000 r7:00000000 r6:00000000 r5:c0a06f0c
r4:00000000
[<c0061948>] (lock_acquire) from [<c06ba3b4>] (mutex_lock_nested+0x5c/0x3d8)
r10:ec853800 r9:c0a06ed4 r8:d0071d40 r7:c0a06ed4 r6:c11d0f0c r5:00000000
r4:c04abfc4
[<c06ba358>] (mutex_lock_nested) from [<c04abfc4>] (cpufreq_thermal_notifier+0x34/0xfc)
r10:ec853800 r9:ec85380c r8:d00d7d3c r7:c0a06ed4 r6:d00d7d3c r5:00000000
r4:fffffffe
[<c04abf90>] (cpufreq_thermal_notifier) from [<c0042bf4>] (notifier_call_chain+0x4c/0x8c)
r7:00000000 r6:00000000 r5:00000000 r4:fffffffe
[<c0042ba8>] (notifier_call_chain) from [<c0042f20>] (__blocking_notifier_call_chain+0x50/0x68)
r8:c0a072a4 r7:00000000 r6:d00d7d3c r5:ffffffff r4:c0a06fc8 r3:ffffffff
[<c0042ed0>] (__blocking_notifier_call_chain) from [<c0042f58>] (blocking_notifier_call_chain+0x20/0x28)
r7:ec98b540 r6:c13ebc80 r5:ed76e600 r4:d00d7d3c
[<c0042f38>] (blocking_notifier_call_chain) from [<c04ae62c>] (cpufreq_set_policy+0x7c/0x1d0)
[<c04ae5b0>] (cpufreq_set_policy) from [<c04af3cc>] (store_scaling_governor+0x74/0x9c)
r7:ec98b540 r6:0000000c r5:ec98b540 r4:ed76e600
[<c04af358>] (store_scaling_governor) from [<c04ad418>] (store+0x90/0xc0)
r6:0000000c r5:ed76e6d4 r4:ed76e600
[<c04ad388>] (store) from [<c0175384>] (sysfs_kf_write+0x54/0x58)
r8:0000000c r7:d00d7f78 r6:ec98b540 r5:0000000c r4:ec853800 r3:0000000c
[<c0175330>] (sysfs_kf_write) from [<c01746b4>] (kernfs_fop_write+0xdc/0x190)
r6:ec98b540 r5:00000000 r4:00000000 r3:c0175330
[<c01745d8>] (kernfs_fop_write) from [<c010dcc0>] (vfs_write+0xac/0x1b4)
r10:0162aa70 r9:d00d6000 r8:0000000c r7:d00d7f78 r6:0162aa70 r5:0000000c
r4:eccde500
[<c010dc14>] (vfs_write) from [<c010dfec>] (SyS_write+0x44/0x90)
r10:0162aa70 r8:0000000c r7:eccde500 r6:eccde500 r5:00000000 r4:00000000
[<c010dfa8>] (SyS_write) from [<c000ec00>] (ret_fast_syscall+0x0/0x48)
r10:00000000 r8:c000edc4 r7:00000004 r6:000216cc r5:0000000c r4:0162aa70
Solve this by moving to finer grained locking - use one mutex to protect
the cpufreq_dev_list as a whole, and a separate lock to ensure correct
ordering of cpufreq notifier registration and removal.
cooling_list_lock is taken within cooling_cpufreq_lock on
(un)registration to preserve the behavior of the code, i.e. to
atomically add/remove to the list and (un)register the notifier.
Fixes: 2dcd851fe4b4 ("thermal: cpu_cooling: Update always cpufreq policy with
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
Diffstat (limited to 'drivers/thermal')
-rw-r--r-- | drivers/thermal/cpu_cooling.c | 31 |
1 files changed, 20 insertions, 11 deletions
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 6509c61b9648..5ae0524bed19 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c | |||
@@ -107,6 +107,9 @@ struct cpufreq_cooling_device { | |||
107 | static DEFINE_IDR(cpufreq_idr); | 107 | static DEFINE_IDR(cpufreq_idr); |
108 | static DEFINE_MUTEX(cooling_cpufreq_lock); | 108 | static DEFINE_MUTEX(cooling_cpufreq_lock); |
109 | 109 | ||
110 | static unsigned int cpufreq_dev_count; | ||
111 | |||
112 | static DEFINE_MUTEX(cooling_list_lock); | ||
110 | static LIST_HEAD(cpufreq_dev_list); | 113 | static LIST_HEAD(cpufreq_dev_list); |
111 | 114 | ||
112 | /** | 115 | /** |
@@ -185,14 +188,14 @@ unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq) | |||
185 | { | 188 | { |
186 | struct cpufreq_cooling_device *cpufreq_dev; | 189 | struct cpufreq_cooling_device *cpufreq_dev; |
187 | 190 | ||
188 | mutex_lock(&cooling_cpufreq_lock); | 191 | mutex_lock(&cooling_list_lock); |
189 | list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { | 192 | list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { |
190 | if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) { | 193 | if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) { |
191 | mutex_unlock(&cooling_cpufreq_lock); | 194 | mutex_unlock(&cooling_list_lock); |
192 | return get_level(cpufreq_dev, freq); | 195 | return get_level(cpufreq_dev, freq); |
193 | } | 196 | } |
194 | } | 197 | } |
195 | mutex_unlock(&cooling_cpufreq_lock); | 198 | mutex_unlock(&cooling_list_lock); |
196 | 199 | ||
197 | pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu); | 200 | pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu); |
198 | return THERMAL_CSTATE_INVALID; | 201 | return THERMAL_CSTATE_INVALID; |
@@ -221,7 +224,7 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb, | |||
221 | switch (event) { | 224 | switch (event) { |
222 | 225 | ||
223 | case CPUFREQ_ADJUST: | 226 | case CPUFREQ_ADJUST: |
224 | mutex_lock(&cooling_cpufreq_lock); | 227 | mutex_lock(&cooling_list_lock); |
225 | list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { | 228 | list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { |
226 | if (!cpumask_test_cpu(policy->cpu, | 229 | if (!cpumask_test_cpu(policy->cpu, |
227 | &cpufreq_dev->allowed_cpus)) | 230 | &cpufreq_dev->allowed_cpus)) |
@@ -233,7 +236,7 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb, | |||
233 | cpufreq_verify_within_limits(policy, 0, | 236 | cpufreq_verify_within_limits(policy, 0, |
234 | max_freq); | 237 | max_freq); |
235 | } | 238 | } |
236 | mutex_unlock(&cooling_cpufreq_lock); | 239 | mutex_unlock(&cooling_list_lock); |
237 | break; | 240 | break; |
238 | default: | 241 | default: |
239 | return NOTIFY_DONE; | 242 | return NOTIFY_DONE; |
@@ -866,12 +869,14 @@ __cpufreq_cooling_register(struct device_node *np, | |||
866 | 869 | ||
867 | mutex_lock(&cooling_cpufreq_lock); | 870 | mutex_lock(&cooling_cpufreq_lock); |
868 | 871 | ||
872 | mutex_lock(&cooling_list_lock); | ||
873 | list_add(&cpufreq_dev->node, &cpufreq_dev_list); | ||
874 | mutex_unlock(&cooling_list_lock); | ||
875 | |||
869 | /* Register the notifier for first cpufreq cooling device */ | 876 | /* Register the notifier for first cpufreq cooling device */ |
870 | if (list_empty(&cpufreq_dev_list)) | 877 | if (!cpufreq_dev_count++) |
871 | cpufreq_register_notifier(&thermal_cpufreq_notifier_block, | 878 | cpufreq_register_notifier(&thermal_cpufreq_notifier_block, |
872 | CPUFREQ_POLICY_NOTIFIER); | 879 | CPUFREQ_POLICY_NOTIFIER); |
873 | list_add(&cpufreq_dev->node, &cpufreq_dev_list); | ||
874 | |||
875 | mutex_unlock(&cooling_cpufreq_lock); | 880 | mutex_unlock(&cooling_cpufreq_lock); |
876 | 881 | ||
877 | return cool_dev; | 882 | return cool_dev; |
@@ -1013,13 +1018,17 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) | |||
1013 | return; | 1018 | return; |
1014 | 1019 | ||
1015 | cpufreq_dev = cdev->devdata; | 1020 | cpufreq_dev = cdev->devdata; |
1016 | mutex_lock(&cooling_cpufreq_lock); | ||
1017 | list_del(&cpufreq_dev->node); | ||
1018 | 1021 | ||
1019 | /* Unregister the notifier for the last cpufreq cooling device */ | 1022 | /* Unregister the notifier for the last cpufreq cooling device */ |
1020 | if (list_empty(&cpufreq_dev_list)) | 1023 | mutex_lock(&cooling_cpufreq_lock); |
1024 | if (!--cpufreq_dev_count) | ||
1021 | cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block, | 1025 | cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block, |
1022 | CPUFREQ_POLICY_NOTIFIER); | 1026 | CPUFREQ_POLICY_NOTIFIER); |
1027 | |||
1028 | mutex_lock(&cooling_list_lock); | ||
1029 | list_del(&cpufreq_dev->node); | ||
1030 | mutex_unlock(&cooling_list_lock); | ||
1031 | |||
1023 | mutex_unlock(&cooling_cpufreq_lock); | 1032 | mutex_unlock(&cooling_cpufreq_lock); |
1024 | 1033 | ||
1025 | thermal_cooling_device_unregister(cpufreq_dev->cool_dev); | 1034 | thermal_cooling_device_unregister(cpufreq_dev->cool_dev); |