diff options
author | Robert Richter <robert.richter@amd.com> | 2011-05-26 12:39:35 -0400 |
---|---|---|
committer | Robert Richter <robert.richter@amd.com> | 2011-05-31 10:33:34 -0400 |
commit | 130c5ce716c9bfd1c2a2ec840a746eb7ff9ce1e6 (patch) | |
tree | 9d5ae217dd7236c73c12ee1bed9111ad6782acab | |
parent | 6ac6519b93065625119a347be1cbcc1b89edb773 (diff) |
oprofile: Fix locking dependency in sync_start()
This fixes the A->B/B->A locking dependency, see the warning below.
The function task_exit_notify() is called with (task_exit_notifier)
.rwsem set and then calls sync_buffer() which locks buffer_mutex. In
sync_start() the buffer_mutex was set to prevent notifier functions to
be started before sync_start() is finished. But when registering the
notifier, (task_exit_notifier).rwsem is locked too, but now in
different order than in sync_buffer(). In theory this causes a locking
dependency, what does not occur in practice since task_exit_notify()
is always called after the notifier is registered which means the lock
is already released.
However, after checking the notifier functions it turned out the
buffer_mutex in sync_start() is unnecessary. This is because
sync_buffer() may be called from the notifiers even if sync_start()
did not finish yet, the buffers are already allocated but empty. No
need to protect this with the mutex.
So we fix this theoretical locking dependency by removing buffer_mutex
in sync_start(). This is similar to the implementation before commit:
750d857 oprofile: fix crash when accessing freed task structs
which introduced the locking dependency.
Lockdep warning:
oprofiled/4447 is trying to acquire lock:
(buffer_mutex){+.+...}, at: [<ffffffffa0000e55>] sync_buffer+0x31/0x3ec [oprofile]
but task is already holding lock:
((task_exit_notifier).rwsem){++++..}, at: [<ffffffff81058026>] __blocking_notifier_call_chain+0x39/0x67
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 ((task_exit_notifier).rwsem){++++..}:
[<ffffffff8106557f>] lock_acquire+0xf8/0x11e
[<ffffffff81463a2b>] down_write+0x44/0x67
[<ffffffff810581c0>] blocking_notifier_chain_register+0x52/0x8b
[<ffffffff8105a6ac>] profile_event_register+0x2d/0x2f
[<ffffffffa00013c1>] sync_start+0x47/0xc6 [oprofile]
[<ffffffffa00001bb>] oprofile_setup+0x60/0xa5 [oprofile]
[<ffffffffa00014e3>] event_buffer_open+0x59/0x8c [oprofile]
[<ffffffff810cd3b9>] __dentry_open+0x1eb/0x308
[<ffffffff810cd59d>] nameidata_to_filp+0x60/0x67
[<ffffffff810daad6>] do_last+0x5be/0x6b2
[<ffffffff810dbc33>] path_openat+0xc7/0x360
[<ffffffff810dbfc5>] do_filp_open+0x3d/0x8c
[<ffffffff810ccfd2>] do_sys_open+0x110/0x1a9
[<ffffffff810cd09e>] sys_open+0x20/0x22
[<ffffffff8146ad4b>] system_call_fastpath+0x16/0x1b
-> #0 (buffer_mutex){+.+...}:
[<ffffffff81064dfb>] __lock_acquire+0x1085/0x1711
[<ffffffff8106557f>] lock_acquire+0xf8/0x11e
[<ffffffff814634f0>] mutex_lock_nested+0x63/0x309
[<ffffffffa0000e55>] sync_buffer+0x31/0x3ec [oprofile]
[<ffffffffa0001226>] task_exit_notify+0x16/0x1a [oprofile]
[<ffffffff81467b96>] notifier_call_chain+0x37/0x63
[<ffffffff8105803d>] __blocking_notifier_call_chain+0x50/0x67
[<ffffffff81058068>] blocking_notifier_call_chain+0x14/0x16
[<ffffffff8105a718>] profile_task_exit+0x1a/0x1c
[<ffffffff81039e8f>] do_exit+0x2a/0x6fc
[<ffffffff8103a5e4>] do_group_exit+0x83/0xae
[<ffffffff8103a626>] sys_exit_group+0x17/0x1b
[<ffffffff8146ad4b>] system_call_fastpath+0x16/0x1b
other info that might help us debug this:
1 lock held by oprofiled/4447:
#0: ((task_exit_notifier).rwsem){++++..}, at: [<ffffffff81058026>] __blocking_notifier_call_chain+0x39/0x67
stack backtrace:
Pid: 4447, comm: oprofiled Not tainted 2.6.39-00007-gcf4d8d4 #10
Call Trace:
[<ffffffff81063193>] print_circular_bug+0xae/0xbc
[<ffffffff81064dfb>] __lock_acquire+0x1085/0x1711
[<ffffffffa0000e55>] ? sync_buffer+0x31/0x3ec [oprofile]
[<ffffffff8106557f>] lock_acquire+0xf8/0x11e
[<ffffffffa0000e55>] ? sync_buffer+0x31/0x3ec [oprofile]
[<ffffffff81062627>] ? mark_lock+0x42f/0x552
[<ffffffffa0000e55>] ? sync_buffer+0x31/0x3ec [oprofile]
[<ffffffff814634f0>] mutex_lock_nested+0x63/0x309
[<ffffffffa0000e55>] ? sync_buffer+0x31/0x3ec [oprofile]
[<ffffffffa0000e55>] sync_buffer+0x31/0x3ec [oprofile]
[<ffffffff81058026>] ? __blocking_notifier_call_chain+0x39/0x67
[<ffffffff81058026>] ? __blocking_notifier_call_chain+0x39/0x67
[<ffffffffa0001226>] task_exit_notify+0x16/0x1a [oprofile]
[<ffffffff81467b96>] notifier_call_chain+0x37/0x63
[<ffffffff8105803d>] __blocking_notifier_call_chain+0x50/0x67
[<ffffffff81058068>] blocking_notifier_call_chain+0x14/0x16
[<ffffffff8105a718>] profile_task_exit+0x1a/0x1c
[<ffffffff81039e8f>] do_exit+0x2a/0x6fc
[<ffffffff81465031>] ? retint_swapgs+0xe/0x13
[<ffffffff8103a5e4>] do_group_exit+0x83/0xae
[<ffffffff8103a626>] sys_exit_group+0x17/0x1b
[<ffffffff8146ad4b>] system_call_fastpath+0x16/0x1b
Reported-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: Carl Love <carll@us.ibm.com>
Cc: <stable@kernel.org> # .36+
Signed-off-by: Robert Richter <robert.richter@amd.com>
-rw-r--r-- | drivers/oprofile/buffer_sync.c | 8 |
1 files changed, 2 insertions, 6 deletions
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 04250aa16f51..f34b5b29fb95 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c | |||
@@ -155,8 +155,6 @@ int sync_start(void) | |||
155 | if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL)) | 155 | if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL)) |
156 | return -ENOMEM; | 156 | return -ENOMEM; |
157 | 157 | ||
158 | mutex_lock(&buffer_mutex); | ||
159 | |||
160 | err = task_handoff_register(&task_free_nb); | 158 | err = task_handoff_register(&task_free_nb); |
161 | if (err) | 159 | if (err) |
162 | goto out1; | 160 | goto out1; |
@@ -173,7 +171,6 @@ int sync_start(void) | |||
173 | start_cpu_work(); | 171 | start_cpu_work(); |
174 | 172 | ||
175 | out: | 173 | out: |
176 | mutex_unlock(&buffer_mutex); | ||
177 | return err; | 174 | return err; |
178 | out4: | 175 | out4: |
179 | profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); | 176 | profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); |
@@ -190,14 +187,13 @@ out1: | |||
190 | 187 | ||
191 | void sync_stop(void) | 188 | void sync_stop(void) |
192 | { | 189 | { |
193 | /* flush buffers */ | ||
194 | mutex_lock(&buffer_mutex); | ||
195 | end_cpu_work(); | 190 | end_cpu_work(); |
196 | unregister_module_notifier(&module_load_nb); | 191 | unregister_module_notifier(&module_load_nb); |
197 | profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); | 192 | profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); |
198 | profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); | 193 | profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); |
199 | task_handoff_unregister(&task_free_nb); | 194 | task_handoff_unregister(&task_free_nb); |
200 | mutex_unlock(&buffer_mutex); | 195 | barrier(); /* do all of the above first */ |
196 | |||
201 | flush_cpu_work(); | 197 | flush_cpu_work(); |
202 | 198 | ||
203 | free_all_tasks(); | 199 | free_all_tasks(); |