diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2013-07-19 19:13:55 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2013-07-19 20:58:58 -0400 |
commit | acfec9a5a892f98461f52ed5770de99a3e571ae2 (patch) | |
tree | fa9b1cc174ce4214689719aee18285c82ce9d623 | |
parent | ba57ea64cb1820deb37637de0fdb107f0dc90089 (diff) |
livelock avoidance in sget()
Eric Sandeen has found a nasty livelock in sget() - take a mount(2) about
to fail. The superblock is on ->fs_supers, ->s_umount is held exclusive,
->s_active is 1. Along comes two more processes, trying to mount the same
thing; sget() in each is picking that superblock, bumping ->s_count and
trying to grab ->s_umount. ->s_active is 3 now. Original mount(2)
finally gets to deactivate_locked_super() on failure; ->s_active is 2,
superblock is still ->fs_supers because shutdown will *not* happen until
->s_active hits 0. ->s_umount is dropped and now we have two processes
chasing each other:
s_active = 2, A acquired ->s_umount, B blocked
A sees that the damn thing is stillborn, does deactivate_locked_super()
s_active = 1, A drops ->s_umount, B gets it
A restarts the search and finds the same superblock. And bumps it ->s_active.
s_active = 2, B holds ->s_umount, A blocked on trying to get it
... and we are in the earlier situation with A and B switched places.
The root cause, of course, is that ->s_active should not grow until we'd
got MS_BORN. Then failing ->mount() will have deactivate_locked_super()
shut the damn thing down. Fortunately, it's easy to do - the key point
is that grab_super() is called only for superblocks currently on ->fs_supers,
so it can bump ->s_count and grab ->s_umount first, then check MS_BORN and
bump ->s_active; we must never increment ->s_count for superblocks past
->kill_sb(), but grab_super() is never called for those.
The bug is pretty old; we would've caught it by now, if not for accidental
exclusion between sget() for block filesystems; the things like cgroup or
e.g. mtd-based filesystems don't have anything of that sort, so they get
bitten. The right way to deal with that is obviously to fix sget()...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/super.c | 25 |
1 files changed, 10 insertions, 15 deletions
diff --git a/fs/super.c b/fs/super.c index 7465d4364208..68307c029228 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -336,19 +336,19 @@ EXPORT_SYMBOL(deactivate_super); | |||
336 | * and want to turn it into a full-blown active reference. grab_super() | 336 | * and want to turn it into a full-blown active reference. grab_super() |
337 | * is called with sb_lock held and drops it. Returns 1 in case of | 337 | * is called with sb_lock held and drops it. Returns 1 in case of |
338 | * success, 0 if we had failed (superblock contents was already dead or | 338 | * success, 0 if we had failed (superblock contents was already dead or |
339 | * dying when grab_super() had been called). | 339 | * dying when grab_super() had been called). Note that this is only |
340 | * called for superblocks not in rundown mode (== ones still on ->fs_supers | ||
341 | * of their type), so increment of ->s_count is OK here. | ||
340 | */ | 342 | */ |
341 | static int grab_super(struct super_block *s) __releases(sb_lock) | 343 | static int grab_super(struct super_block *s) __releases(sb_lock) |
342 | { | 344 | { |
343 | if (atomic_inc_not_zero(&s->s_active)) { | ||
344 | spin_unlock(&sb_lock); | ||
345 | return 1; | ||
346 | } | ||
347 | /* it's going away */ | ||
348 | s->s_count++; | 345 | s->s_count++; |
349 | spin_unlock(&sb_lock); | 346 | spin_unlock(&sb_lock); |
350 | /* wait for it to die */ | ||
351 | down_write(&s->s_umount); | 347 | down_write(&s->s_umount); |
348 | if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) { | ||
349 | put_super(s); | ||
350 | return 1; | ||
351 | } | ||
352 | up_write(&s->s_umount); | 352 | up_write(&s->s_umount); |
353 | put_super(s); | 353 | put_super(s); |
354 | return 0; | 354 | return 0; |
@@ -463,11 +463,6 @@ retry: | |||
463 | destroy_super(s); | 463 | destroy_super(s); |
464 | s = NULL; | 464 | s = NULL; |
465 | } | 465 | } |
466 | down_write(&old->s_umount); | ||
467 | if (unlikely(!(old->s_flags & MS_BORN))) { | ||
468 | deactivate_locked_super(old); | ||
469 | goto retry; | ||
470 | } | ||
471 | return old; | 466 | return old; |
472 | } | 467 | } |
473 | } | 468 | } |
@@ -660,10 +655,10 @@ restart: | |||
660 | if (hlist_unhashed(&sb->s_instances)) | 655 | if (hlist_unhashed(&sb->s_instances)) |
661 | continue; | 656 | continue; |
662 | if (sb->s_bdev == bdev) { | 657 | if (sb->s_bdev == bdev) { |
663 | if (grab_super(sb)) /* drops sb_lock */ | 658 | if (!grab_super(sb)) |
664 | return sb; | ||
665 | else | ||
666 | goto restart; | 659 | goto restart; |
660 | up_write(&sb->s_umount); | ||
661 | return sb; | ||
667 | } | 662 | } |
668 | } | 663 | } |
669 | spin_unlock(&sb_lock); | 664 | spin_unlock(&sb_lock); |