aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc/proc_sysctl.c
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2017-02-20 00:17:03 -0500
committerEric W. Biederman <ebiederm@xmission.com>2017-02-21 14:34:53 -0500
commitace0c791e6c3cf5ef37cad2df69f0d90ccc40ffb (patch)
tree949d07fc86ef5bafcdcd4b2dc1a8f7ad6af02e01 /fs/proc/proc_sysctl.c
parentfea6d2a610c899bb7fd8e95fcbf46900b886e5a3 (diff)
proc/sysctl: Don't grab i_lock under sysctl_lock.
Konstantin Khlebnikov <khlebnikov@yandex-team.ru> writes: > This patch has locking problem. I've got lockdep splat under LTP. > > [ 6633.115456] ====================================================== > [ 6633.115502] [ INFO: possible circular locking dependency detected ] > [ 6633.115553] 4.9.10-debug+ #9 Tainted: G L > [ 6633.115584] ------------------------------------------------------- > [ 6633.115627] ksm02/284980 is trying to acquire lock: > [ 6633.115659] (&sb->s_type->i_lock_key#4){+.+...}, at: [<ffffffff816bc1ce>] igrab+0x1e/0x80 > [ 6633.115834] but task is already holding lock: > [ 6633.115882] (sysctl_lock){+.+...}, at: [<ffffffff817e379b>] unregister_sysctl_table+0x6b/0x110 > [ 6633.116026] which lock already depends on the new lock. > [ 6633.116026] > [ 6633.116080] > [ 6633.116080] the existing dependency chain (in reverse order) is: > [ 6633.116117] > -> #2 (sysctl_lock){+.+...}: > -> #1 (&(&dentry->d_lockref.lock)->rlock){+.+...}: > -> #0 (&sb->s_type->i_lock_key#4){+.+...}: > > d_lock nests inside i_lock > sysctl_lock nests inside d_lock in d_compare > > This patch adds i_lock nesting inside sysctl_lock. Al Viro <viro@ZenIV.linux.org.uk> replied: > Once ->unregistering is set, you can drop sysctl_lock just fine. So I'd > try something like this - use rcu_read_lock() in proc_sys_prune_dcache(), > drop sysctl_lock() before it and regain after. Make sure that no inodes > are added to the list ones ->unregistering has been set and use RCU list > primitives for modifying the inode list, with sysctl_lock still used to > serialize its modifications. > > Freeing struct inode is RCU-delayed (see proc_destroy_inode()), so doing > igrab() is safe there. Since we don't drop inode reference until after we'd > passed beyond it in the list, list_for_each_entry_rcu() should be fine. I agree with Al Viro's analsysis of the situtation. Fixes: d6cffbbe9a7e ("proc/sysctl: prune stale dentries during unregistering") Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Tested-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Suggested-by: Al Viro <viro@ZenIV.linux.org.uk> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Diffstat (limited to 'fs/proc/proc_sysctl.c')
-rw-r--r--fs/proc/proc_sysctl.c31
1 files changed, 18 insertions, 13 deletions
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 8efb1e10b025..3e64c6502dc8 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -266,21 +266,19 @@ static void proc_sys_prune_dcache(struct ctl_table_header *head)
266 struct inode *inode, *prev = NULL; 266 struct inode *inode, *prev = NULL;
267 struct proc_inode *ei; 267 struct proc_inode *ei;
268 268
269 list_for_each_entry(ei, &head->inodes, sysctl_inodes) { 269 rcu_read_lock();
270 list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) {
270 inode = igrab(&ei->vfs_inode); 271 inode = igrab(&ei->vfs_inode);
271 if (inode) { 272 if (inode) {
272 spin_unlock(&sysctl_lock); 273 rcu_read_unlock();
273 iput(prev); 274 iput(prev);
274 prev = inode; 275 prev = inode;
275 d_prune_aliases(inode); 276 d_prune_aliases(inode);
276 spin_lock(&sysctl_lock); 277 rcu_read_lock();
277 } 278 }
278 } 279 }
279 if (prev) { 280 rcu_read_unlock();
280 spin_unlock(&sysctl_lock); 281 iput(prev);
281 iput(prev);
282 spin_lock(&sysctl_lock);
283 }
284} 282}
285 283
286/* called under sysctl_lock, will reacquire if has to wait */ 284/* called under sysctl_lock, will reacquire if has to wait */
@@ -296,10 +294,10 @@ static void start_unregistering(struct ctl_table_header *p)
296 p->unregistering = &wait; 294 p->unregistering = &wait;
297 spin_unlock(&sysctl_lock); 295 spin_unlock(&sysctl_lock);
298 wait_for_completion(&wait); 296 wait_for_completion(&wait);
299 spin_lock(&sysctl_lock);
300 } else { 297 } else {
301 /* anything non-NULL; we'll never dereference it */ 298 /* anything non-NULL; we'll never dereference it */
302 p->unregistering = ERR_PTR(-EINVAL); 299 p->unregistering = ERR_PTR(-EINVAL);
300 spin_unlock(&sysctl_lock);
303 } 301 }
304 /* 302 /*
305 * Prune dentries for unregistered sysctls: namespaced sysctls 303 * Prune dentries for unregistered sysctls: namespaced sysctls
@@ -310,6 +308,7 @@ static void start_unregistering(struct ctl_table_header *p)
310 * do not remove from the list until nobody holds it; walking the 308 * do not remove from the list until nobody holds it; walking the
311 * list in do_sysctl() relies on that. 309 * list in do_sysctl() relies on that.
312 */ 310 */
311 spin_lock(&sysctl_lock);
313 erase_header(p); 312 erase_header(p);
314} 313}
315 314
@@ -455,11 +454,17 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
455 inode->i_ino = get_next_ino(); 454 inode->i_ino = get_next_ino();
456 455
457 ei = PROC_I(inode); 456 ei = PROC_I(inode);
458 ei->sysctl = head;
459 ei->sysctl_entry = table;
460 457
461 spin_lock(&sysctl_lock); 458 spin_lock(&sysctl_lock);
462 list_add(&ei->sysctl_inodes, &head->inodes); 459 if (unlikely(head->unregistering)) {
460 spin_unlock(&sysctl_lock);
461 iput(inode);
462 inode = NULL;
463 goto out;
464 }
465 ei->sysctl = head;
466 ei->sysctl_entry = table;
467 list_add_rcu(&ei->sysctl_inodes, &head->inodes);
463 head->count++; 468 head->count++;
464 spin_unlock(&sysctl_lock); 469 spin_unlock(&sysctl_lock);
465 470
@@ -487,7 +492,7 @@ out:
487void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) 492void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
488{ 493{
489 spin_lock(&sysctl_lock); 494 spin_lock(&sysctl_lock);
490 list_del(&PROC_I(inode)->sysctl_inodes); 495 list_del_rcu(&PROC_I(inode)->sysctl_inodes);
491 if (!--head->count) 496 if (!--head->count)
492 kfree_rcu(head, rcu); 497 kfree_rcu(head, rcu);
493 spin_unlock(&sysctl_lock); 498 spin_unlock(&sysctl_lock);