aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2010-03-22 15:22:31 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2010-05-21 18:31:14 -0400
commit1712ac8fda7d8bc4dc921f5777b7423aacad7263 (patch)
tree1493bc7166ace04d9ac2f4d5383eaab1d43e17c4
parentb20bd1a5e78af267dc4b6e1ffed48d5d776302c5 (diff)
Saner locking around deactivate_super()
Make sure that s_umount is acquired *before* we drop the final active reference; we still have the fast path (atomic_dec_unless) and we have gotten rid of the window between the moment when s_active hits zero and s_umount is acquired. Which simplifies the living hell out of grab_super() and inotify pin_to_kill() stuff. Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/notify/inotify/inotify.c76
-rw-r--r--fs/super.c45
2 files changed, 30 insertions, 91 deletions
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
index 441ef136af22..27b75ebc7460 100644
--- a/fs/notify/inotify/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -511,34 +511,8 @@ EXPORT_SYMBOL_GPL(inotify_init_watch);
511 * done. Cleanup is just deactivate_super(). However, that leaves a messy 511 * done. Cleanup is just deactivate_super(). However, that leaves a messy
512 * case - what if we *are* racing with umount() and active references to 512 * case - what if we *are* racing with umount() and active references to
513 * superblock can't be acquired anymore? We can bump ->s_count, grab 513 * superblock can't be acquired anymore? We can bump ->s_count, grab
514 * ->s_umount, which will almost certainly wait until the superblock is shut 514 * ->s_umount, which will wait until the superblock is shut down and the
515 * down and the watch in question is pining for fjords. That's fine, but 515 * watch in question is pining for fjords.
516 * there is a problem - we might have hit the window between ->s_active
517 * getting to 0 (i.e. the moment when superblock is past the point of no return
518 * and is heading for shutdown) and the moment when deactivate_super() acquires
519 * ->s_umount. We could just do drop_super() yield() and retry, but that's
520 * rather antisocial and this stuff is luser-triggerable. OTOH, having grabbed
521 * ->s_umount and having found that we'd got there first (i.e. that ->s_root is
522 * non-NULL) we know that we won't race with inotify_umount_inodes(). So we
523 * could grab a reference to watch and do the rest as above, just with
524 * drop_super() instead of deactivate_super(), right? Wrong. We had to drop
525 * ih->mutex before we could grab ->s_umount. So the watch could've been gone
526 * already.
527 *
528 * That still can be dealt with - we need to save watch->wd, do idr_find()
529 * and compare its result with our pointer. If they match, we either have
530 * the damn thing still alive or we'd lost not one but two races at once,
531 * the watch had been killed and a new one got created with the same ->wd
532 * at the same address. That couldn't have happened in inotify_destroy(),
533 * but inotify_rm_wd() could run into that. Still, "new one got created"
534 * is not a problem - we have every right to kill it or leave it alone,
535 * whatever's more convenient.
536 *
537 * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as
538 * "grab it and kill it" check. If it's been our original watch, we are
539 * fine, if it's a newcomer - nevermind, just pretend that we'd won the
540 * race and kill the fscker anyway; we are safe since we know that its
541 * superblock won't be going away.
542 * 516 *
543 * And yes, this is far beyond mere "not very pretty"; so's the entire 517 * And yes, this is far beyond mere "not very pretty"; so's the entire
544 * concept of inotify to start with. 518 * concept of inotify to start with.
@@ -552,14 +526,10 @@ EXPORT_SYMBOL_GPL(inotify_init_watch);
552 * Called with ih->mutex held, drops it. Possible return values: 526 * Called with ih->mutex held, drops it. Possible return values:
553 * 0 - nothing to do, it has died 527 * 0 - nothing to do, it has died
554 * 1 - remove it, drop the reference and deactivate_super() 528 * 1 - remove it, drop the reference and deactivate_super()
555 * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid
556 * that variant, since it involved a lot of PITA, but that's the best that
557 * could've been done.
558 */ 529 */
559static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) 530static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
560{ 531{
561 struct super_block *sb = watch->inode->i_sb; 532 struct super_block *sb = watch->inode->i_sb;
562 s32 wd = watch->wd;
563 533
564 if (atomic_inc_not_zero(&sb->s_active)) { 534 if (atomic_inc_not_zero(&sb->s_active)) {
565 get_inotify_watch(watch); 535 get_inotify_watch(watch);
@@ -571,36 +541,16 @@ static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
571 spin_unlock(&sb_lock); 541 spin_unlock(&sb_lock);
572 mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ 542 mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
573 down_read(&sb->s_umount); 543 down_read(&sb->s_umount);
574 if (likely(!sb->s_root)) { 544 /* fs is already shut down; the watch is dead */
575 /* fs is already shut down; the watch is dead */ 545 drop_super(sb);
576 drop_super(sb); 546 return 0;
577 return 0;
578 }
579 /* raced with the final deactivate_super() */
580 mutex_lock(&ih->mutex);
581 if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) {
582 /* the watch is dead */
583 mutex_unlock(&ih->mutex);
584 drop_super(sb);
585 return 0;
586 }
587 /* still alive or freed and reused with the same sb and wd; kill */
588 get_inotify_watch(watch);
589 mutex_unlock(&ih->mutex);
590 return 2;
591} 547}
592 548
593static void unpin_and_kill(struct inotify_watch *watch, int how) 549static void unpin_and_kill(struct inotify_watch *watch)
594{ 550{
595 struct super_block *sb = watch->inode->i_sb; 551 struct super_block *sb = watch->inode->i_sb;
596 put_inotify_watch(watch); 552 put_inotify_watch(watch);
597 switch (how) { 553 deactivate_super(sb);
598 case 1:
599 deactivate_super(sb);
600 break;
601 case 2:
602 drop_super(sb);
603 }
604} 554}
605 555
606/** 556/**
@@ -622,7 +572,6 @@ void inotify_destroy(struct inotify_handle *ih)
622 struct list_head *watches; 572 struct list_head *watches;
623 struct super_block *sb; 573 struct super_block *sb;
624 struct inode *inode; 574 struct inode *inode;
625 int how;
626 575
627 mutex_lock(&ih->mutex); 576 mutex_lock(&ih->mutex);
628 watches = &ih->watches; 577 watches = &ih->watches;
@@ -632,8 +581,7 @@ void inotify_destroy(struct inotify_handle *ih)
632 } 581 }
633 watch = list_first_entry(watches, struct inotify_watch, h_list); 582 watch = list_first_entry(watches, struct inotify_watch, h_list);
634 sb = watch->inode->i_sb; 583 sb = watch->inode->i_sb;
635 how = pin_to_kill(ih, watch); 584 if (!pin_to_kill(ih, watch))
636 if (!how)
637 continue; 585 continue;
638 586
639 inode = watch->inode; 587 inode = watch->inode;
@@ -648,7 +596,7 @@ void inotify_destroy(struct inotify_handle *ih)
648 596
649 mutex_unlock(&ih->mutex); 597 mutex_unlock(&ih->mutex);
650 mutex_unlock(&inode->inotify_mutex); 598 mutex_unlock(&inode->inotify_mutex);
651 unpin_and_kill(watch, how); 599 unpin_and_kill(watch);
652 } 600 }
653 601
654 /* free this handle: the put matching the get in inotify_init() */ 602 /* free this handle: the put matching the get in inotify_init() */
@@ -851,7 +799,6 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
851 struct inotify_watch *watch; 799 struct inotify_watch *watch;
852 struct super_block *sb; 800 struct super_block *sb;
853 struct inode *inode; 801 struct inode *inode;
854 int how;
855 802
856 mutex_lock(&ih->mutex); 803 mutex_lock(&ih->mutex);
857 watch = idr_find(&ih->idr, wd); 804 watch = idr_find(&ih->idr, wd);
@@ -860,8 +807,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
860 return -EINVAL; 807 return -EINVAL;
861 } 808 }
862 sb = watch->inode->i_sb; 809 sb = watch->inode->i_sb;
863 how = pin_to_kill(ih, watch); 810 if (!pin_to_kill(ih, watch))
864 if (!how)
865 return 0; 811 return 0;
866 812
867 inode = watch->inode; 813 inode = watch->inode;
@@ -875,7 +821,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
875 821
876 mutex_unlock(&ih->mutex); 822 mutex_unlock(&ih->mutex);
877 mutex_unlock(&inode->inotify_mutex); 823 mutex_unlock(&inode->inotify_mutex);
878 unpin_and_kill(watch, how); 824 unpin_and_kill(watch);
879 825
880 return 0; 826 return 0;
881} 827}
diff --git a/fs/super.c b/fs/super.c
index bc734f8b3e18..157657b32798 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -178,53 +178,48 @@ void put_super(struct super_block *sb)
178 178
179 179
180/** 180/**
181 * deactivate_super - drop an active reference to superblock 181 * deactivate_locked_super - drop an active reference to superblock
182 * @s: superblock to deactivate 182 * @s: superblock to deactivate
183 * 183 *
184 * Drops an active reference to superblock, acquiring a temprory one if 184 * Drops an active reference to superblock, converting it into a temprory
185 * there is no active references left. In that case we lock superblock, 185 * one if there is no other active references left. In that case we
186 * tell fs driver to shut it down and drop the temporary reference we 186 * tell fs driver to shut it down and drop the temporary reference we
187 * had just acquired. 187 * had just acquired.
188 *
189 * Caller holds exclusive lock on superblock; that lock is released.
188 */ 190 */
189void deactivate_super(struct super_block *s) 191void deactivate_locked_super(struct super_block *s)
190{ 192{
191 struct file_system_type *fs = s->s_type; 193 struct file_system_type *fs = s->s_type;
192 if (atomic_dec_and_test(&s->s_active)) { 194 if (atomic_dec_and_test(&s->s_active)) {
193 vfs_dq_off(s, 0); 195 vfs_dq_off(s, 0);
194 down_write(&s->s_umount);
195 fs->kill_sb(s); 196 fs->kill_sb(s);
196 put_filesystem(fs); 197 put_filesystem(fs);
197 put_super(s); 198 put_super(s);
199 } else {
200 up_write(&s->s_umount);
198 } 201 }
199} 202}
200 203
201EXPORT_SYMBOL(deactivate_super); 204EXPORT_SYMBOL(deactivate_locked_super);
202 205
203/** 206/**
204 * deactivate_locked_super - drop an active reference to superblock 207 * deactivate_super - drop an active reference to superblock
205 * @s: superblock to deactivate 208 * @s: superblock to deactivate
206 * 209 *
207 * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that 210 * Variant of deactivate_locked_super(), except that superblock is *not*
208 * it does not unlock it until it's all over. As the result, it's safe to 211 * locked by caller. If we are going to drop the final active reference,
209 * use to dispose of new superblock on ->get_sb() failure exits - nobody 212 * lock will be acquired prior to that.
210 * will see the sucker until it's all over. Equivalent using up_write +
211 * deactivate_super is safe for that purpose only if superblock is either
212 * safe to use or has NULL ->s_root when we unlock.
213 */ 213 */
214void deactivate_locked_super(struct super_block *s) 214void deactivate_super(struct super_block *s)
215{ 215{
216 struct file_system_type *fs = s->s_type; 216 if (!atomic_add_unless(&s->s_active, -1, 1)) {
217 if (atomic_dec_and_test(&s->s_active)) { 217 down_write(&s->s_umount);
218 vfs_dq_off(s, 0); 218 deactivate_locked_super(s);
219 fs->kill_sb(s);
220 put_filesystem(fs);
221 put_super(s);
222 } else {
223 up_write(&s->s_umount);
224 } 219 }
225} 220}
226 221
227EXPORT_SYMBOL(deactivate_locked_super); 222EXPORT_SYMBOL(deactivate_super);
228 223
229/** 224/**
230 * grab_super - acquire an active reference 225 * grab_super - acquire an active reference
@@ -247,12 +242,10 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
247 /* it's going away */ 242 /* it's going away */
248 s->s_count++; 243 s->s_count++;
249 spin_unlock(&sb_lock); 244 spin_unlock(&sb_lock);
250 /* usually that'll be enough for it to die... */ 245 /* wait for it to die */
251 down_write(&s->s_umount); 246 down_write(&s->s_umount);
252 up_write(&s->s_umount); 247 up_write(&s->s_umount);
253 put_super(s); 248 put_super(s);
254 /* ... but in case it wasn't, let's at least yield() */
255 yield();
256 return 0; 249 return 0;
257} 250}
258 251