diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2017-02-20 00:17:03 -0500 |
---|---|---|
committer | Eric W. Biederman <ebiederm@xmission.com> | 2017-02-21 14:34:53 -0500 |
commit | ace0c791e6c3cf5ef37cad2df69f0d90ccc40ffb (patch) | |
tree | 949d07fc86ef5bafcdcd4b2dc1a8f7ad6af02e01 /fs/proc/proc_sysctl.c | |
parent | fea6d2a610c899bb7fd8e95fcbf46900b886e5a3 (diff) |
proc/sysctl: Don't grab i_lock under sysctl_lock.
Konstantin Khlebnikov <khlebnikov@yandex-team.ru> writes:
> This patch has locking problem. I've got lockdep splat under LTP.
>
> [ 6633.115456] ======================================================
> [ 6633.115502] [ INFO: possible circular locking dependency detected ]
> [ 6633.115553] 4.9.10-debug+ #9 Tainted: G L
> [ 6633.115584] -------------------------------------------------------
> [ 6633.115627] ksm02/284980 is trying to acquire lock:
> [ 6633.115659] (&sb->s_type->i_lock_key#4){+.+...}, at: [<ffffffff816bc1ce>] igrab+0x1e/0x80
> [ 6633.115834] but task is already holding lock:
> [ 6633.115882] (sysctl_lock){+.+...}, at: [<ffffffff817e379b>] unregister_sysctl_table+0x6b/0x110
> [ 6633.116026] which lock already depends on the new lock.
> [ 6633.116026]
> [ 6633.116080]
> [ 6633.116080] the existing dependency chain (in reverse order) is:
> [ 6633.116117]
> -> #2 (sysctl_lock){+.+...}:
> -> #1 (&(&dentry->d_lockref.lock)->rlock){+.+...}:
> -> #0 (&sb->s_type->i_lock_key#4){+.+...}:
>
> d_lock nests inside i_lock
> sysctl_lock nests inside d_lock in d_compare
>
> This patch adds i_lock nesting inside sysctl_lock.
Al Viro <viro@ZenIV.linux.org.uk> replied:
> Once ->unregistering is set, you can drop sysctl_lock just fine. So I'd
> try something like this - use rcu_read_lock() in proc_sys_prune_dcache(),
> drop sysctl_lock() before it and regain after. Make sure that no inodes
> are added to the list ones ->unregistering has been set and use RCU list
> primitives for modifying the inode list, with sysctl_lock still used to
> serialize its modifications.
>
> Freeing struct inode is RCU-delayed (see proc_destroy_inode()), so doing
> igrab() is safe there. Since we don't drop inode reference until after we'd
> passed beyond it in the list, list_for_each_entry_rcu() should be fine.
I agree with Al Viro's analsysis of the situtation.
Fixes: d6cffbbe9a7e ("proc/sysctl: prune stale dentries during unregistering")
Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Tested-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Diffstat (limited to 'fs/proc/proc_sysctl.c')
-rw-r--r-- | fs/proc/proc_sysctl.c | 31 |
1 files changed, 18 insertions, 13 deletions
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 8efb1e10b025..3e64c6502dc8 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -266,21 +266,19 @@ static void proc_sys_prune_dcache(struct ctl_table_header *head) | |||
266 | struct inode *inode, *prev = NULL; | 266 | struct inode *inode, *prev = NULL; |
267 | struct proc_inode *ei; | 267 | struct proc_inode *ei; |
268 | 268 | ||
269 | list_for_each_entry(ei, &head->inodes, sysctl_inodes) { | 269 | rcu_read_lock(); |
270 | list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) { | ||
270 | inode = igrab(&ei->vfs_inode); | 271 | inode = igrab(&ei->vfs_inode); |
271 | if (inode) { | 272 | if (inode) { |
272 | spin_unlock(&sysctl_lock); | 273 | rcu_read_unlock(); |
273 | iput(prev); | 274 | iput(prev); |
274 | prev = inode; | 275 | prev = inode; |
275 | d_prune_aliases(inode); | 276 | d_prune_aliases(inode); |
276 | spin_lock(&sysctl_lock); | 277 | rcu_read_lock(); |
277 | } | 278 | } |
278 | } | 279 | } |
279 | if (prev) { | 280 | rcu_read_unlock(); |
280 | spin_unlock(&sysctl_lock); | 281 | iput(prev); |
281 | iput(prev); | ||
282 | spin_lock(&sysctl_lock); | ||
283 | } | ||
284 | } | 282 | } |
285 | 283 | ||
286 | /* called under sysctl_lock, will reacquire if has to wait */ | 284 | /* called under sysctl_lock, will reacquire if has to wait */ |
@@ -296,10 +294,10 @@ static void start_unregistering(struct ctl_table_header *p) | |||
296 | p->unregistering = &wait; | 294 | p->unregistering = &wait; |
297 | spin_unlock(&sysctl_lock); | 295 | spin_unlock(&sysctl_lock); |
298 | wait_for_completion(&wait); | 296 | wait_for_completion(&wait); |
299 | spin_lock(&sysctl_lock); | ||
300 | } else { | 297 | } else { |
301 | /* anything non-NULL; we'll never dereference it */ | 298 | /* anything non-NULL; we'll never dereference it */ |
302 | p->unregistering = ERR_PTR(-EINVAL); | 299 | p->unregistering = ERR_PTR(-EINVAL); |
300 | spin_unlock(&sysctl_lock); | ||
303 | } | 301 | } |
304 | /* | 302 | /* |
305 | * Prune dentries for unregistered sysctls: namespaced sysctls | 303 | * Prune dentries for unregistered sysctls: namespaced sysctls |
@@ -310,6 +308,7 @@ static void start_unregistering(struct ctl_table_header *p) | |||
310 | * do not remove from the list until nobody holds it; walking the | 308 | * do not remove from the list until nobody holds it; walking the |
311 | * list in do_sysctl() relies on that. | 309 | * list in do_sysctl() relies on that. |
312 | */ | 310 | */ |
311 | spin_lock(&sysctl_lock); | ||
313 | erase_header(p); | 312 | erase_header(p); |
314 | } | 313 | } |
315 | 314 | ||
@@ -455,11 +454,17 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, | |||
455 | inode->i_ino = get_next_ino(); | 454 | inode->i_ino = get_next_ino(); |
456 | 455 | ||
457 | ei = PROC_I(inode); | 456 | ei = PROC_I(inode); |
458 | ei->sysctl = head; | ||
459 | ei->sysctl_entry = table; | ||
460 | 457 | ||
461 | spin_lock(&sysctl_lock); | 458 | spin_lock(&sysctl_lock); |
462 | list_add(&ei->sysctl_inodes, &head->inodes); | 459 | if (unlikely(head->unregistering)) { |
460 | spin_unlock(&sysctl_lock); | ||
461 | iput(inode); | ||
462 | inode = NULL; | ||
463 | goto out; | ||
464 | } | ||
465 | ei->sysctl = head; | ||
466 | ei->sysctl_entry = table; | ||
467 | list_add_rcu(&ei->sysctl_inodes, &head->inodes); | ||
463 | head->count++; | 468 | head->count++; |
464 | spin_unlock(&sysctl_lock); | 469 | spin_unlock(&sysctl_lock); |
465 | 470 | ||
@@ -487,7 +492,7 @@ out: | |||
487 | void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) | 492 | void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) |
488 | { | 493 | { |
489 | spin_lock(&sysctl_lock); | 494 | spin_lock(&sysctl_lock); |
490 | list_del(&PROC_I(inode)->sysctl_inodes); | 495 | list_del_rcu(&PROC_I(inode)->sysctl_inodes); |
491 | if (!--head->count) | 496 | if (!--head->count) |
492 | kfree_rcu(head, rcu); | 497 | kfree_rcu(head, rcu); |
493 | spin_unlock(&sysctl_lock); | 498 | spin_unlock(&sysctl_lock); |