aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2013-07-19 19:13:55 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2013-07-19 20:58:58 -0400
commitacfec9a5a892f98461f52ed5770de99a3e571ae2 (patch)
treefa9b1cc174ce4214689719aee18285c82ce9d623
parentba57ea64cb1820deb37637de0fdb107f0dc90089 (diff)
livelock avoidance in sget()
Eric Sandeen has found a nasty livelock in sget() - take a mount(2) about to fail. The superblock is on ->fs_supers, ->s_umount is held exclusive, ->s_active is 1. Along comes two more processes, trying to mount the same thing; sget() in each is picking that superblock, bumping ->s_count and trying to grab ->s_umount. ->s_active is 3 now. Original mount(2) finally gets to deactivate_locked_super() on failure; ->s_active is 2, superblock is still ->fs_supers because shutdown will *not* happen until ->s_active hits 0. ->s_umount is dropped and now we have two processes chasing each other: s_active = 2, A acquired ->s_umount, B blocked A sees that the damn thing is stillborn, does deactivate_locked_super() s_active = 1, A drops ->s_umount, B gets it A restarts the search and finds the same superblock. And bumps it ->s_active. s_active = 2, B holds ->s_umount, A blocked on trying to get it ... and we are in the earlier situation with A and B switched places. The root cause, of course, is that ->s_active should not grow until we'd got MS_BORN. Then failing ->mount() will have deactivate_locked_super() shut the damn thing down. Fortunately, it's easy to do - the key point is that grab_super() is called only for superblocks currently on ->fs_supers, so it can bump ->s_count and grab ->s_umount first, then check MS_BORN and bump ->s_active; we must never increment ->s_count for superblocks past ->kill_sb(), but grab_super() is never called for those. The bug is pretty old; we would've caught it by now, if not for accidental exclusion between sget() for block filesystems; the things like cgroup or e.g. mtd-based filesystems don't have anything of that sort, so they get bitten. The right way to deal with that is obviously to fix sget()... Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/super.c25
1 files changed, 10 insertions, 15 deletions
diff --git a/fs/super.c b/fs/super.c
index 7465d4364208..68307c029228 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -336,19 +336,19 @@ EXPORT_SYMBOL(deactivate_super);
336 * and want to turn it into a full-blown active reference. grab_super() 336 * and want to turn it into a full-blown active reference. grab_super()
337 * is called with sb_lock held and drops it. Returns 1 in case of 337 * is called with sb_lock held and drops it. Returns 1 in case of
338 * success, 0 if we had failed (superblock contents was already dead or 338 * success, 0 if we had failed (superblock contents was already dead or
339 * dying when grab_super() had been called). 339 * dying when grab_super() had been called). Note that this is only
340 * called for superblocks not in rundown mode (== ones still on ->fs_supers
341 * of their type), so increment of ->s_count is OK here.
340 */ 342 */
341static int grab_super(struct super_block *s) __releases(sb_lock) 343static int grab_super(struct super_block *s) __releases(sb_lock)
342{ 344{
343 if (atomic_inc_not_zero(&s->s_active)) {
344 spin_unlock(&sb_lock);
345 return 1;
346 }
347 /* it's going away */
348 s->s_count++; 345 s->s_count++;
349 spin_unlock(&sb_lock); 346 spin_unlock(&sb_lock);
350 /* wait for it to die */
351 down_write(&s->s_umount); 347 down_write(&s->s_umount);
348 if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) {
349 put_super(s);
350 return 1;
351 }
352 up_write(&s->s_umount); 352 up_write(&s->s_umount);
353 put_super(s); 353 put_super(s);
354 return 0; 354 return 0;
@@ -463,11 +463,6 @@ retry:
463 destroy_super(s); 463 destroy_super(s);
464 s = NULL; 464 s = NULL;
465 } 465 }
466 down_write(&old->s_umount);
467 if (unlikely(!(old->s_flags & MS_BORN))) {
468 deactivate_locked_super(old);
469 goto retry;
470 }
471 return old; 466 return old;
472 } 467 }
473 } 468 }
@@ -660,10 +655,10 @@ restart:
660 if (hlist_unhashed(&sb->s_instances)) 655 if (hlist_unhashed(&sb->s_instances))
661 continue; 656 continue;
662 if (sb->s_bdev == bdev) { 657 if (sb->s_bdev == bdev) {
663 if (grab_super(sb)) /* drops sb_lock */ 658 if (!grab_super(sb))
664 return sb;
665 else
666 goto restart; 659 goto restart;
660 up_write(&sb->s_umount);
661 return sb;
667 } 662 }
668 } 663 }
669 spin_unlock(&sb_lock); 664 spin_unlock(&sb_lock);