diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2010-03-22 15:22:31 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2010-05-21 18:31:14 -0400 |
commit | 1712ac8fda7d8bc4dc921f5777b7423aacad7263 (patch) | |
tree | 1493bc7166ace04d9ac2f4d5383eaab1d43e17c4 | |
parent | b20bd1a5e78af267dc4b6e1ffed48d5d776302c5 (diff) |
Saner locking around deactivate_super()
Make sure that s_umount is acquired *before* we drop the final
active reference; we still have the fast path (atomic_dec_unless)
and we have gotten rid of the window between the moment when
s_active hits zero and s_umount is acquired. Which simplifies
the living hell out of grab_super() and inotify pin_to_kill()
stuff.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/notify/inotify/inotify.c | 76 | ||||
-rw-r--r-- | fs/super.c | 45 |
2 files changed, 30 insertions, 91 deletions
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c index 441ef136af22..27b75ebc7460 100644 --- a/fs/notify/inotify/inotify.c +++ b/fs/notify/inotify/inotify.c | |||
@@ -511,34 +511,8 @@ EXPORT_SYMBOL_GPL(inotify_init_watch); | |||
511 | * done. Cleanup is just deactivate_super(). However, that leaves a messy | 511 | * done. Cleanup is just deactivate_super(). However, that leaves a messy |
512 | * case - what if we *are* racing with umount() and active references to | 512 | * case - what if we *are* racing with umount() and active references to |
513 | * superblock can't be acquired anymore? We can bump ->s_count, grab | 513 | * superblock can't be acquired anymore? We can bump ->s_count, grab |
514 | * ->s_umount, which will almost certainly wait until the superblock is shut | 514 | * ->s_umount, which will wait until the superblock is shut down and the |
515 | * down and the watch in question is pining for fjords. That's fine, but | 515 | * watch in question is pining for fjords. |
516 | * there is a problem - we might have hit the window between ->s_active | ||
517 | * getting to 0 (i.e. the moment when superblock is past the point of no return | ||
518 | * and is heading for shutdown) and the moment when deactivate_super() acquires | ||
519 | * ->s_umount. We could just do drop_super() yield() and retry, but that's | ||
520 | * rather antisocial and this stuff is luser-triggerable. OTOH, having grabbed | ||
521 | * ->s_umount and having found that we'd got there first (i.e. that ->s_root is | ||
522 | * non-NULL) we know that we won't race with inotify_umount_inodes(). So we | ||
523 | * could grab a reference to watch and do the rest as above, just with | ||
524 | * drop_super() instead of deactivate_super(), right? Wrong. We had to drop | ||
525 | * ih->mutex before we could grab ->s_umount. So the watch could've been gone | ||
526 | * already. | ||
527 | * | ||
528 | * That still can be dealt with - we need to save watch->wd, do idr_find() | ||
529 | * and compare its result with our pointer. If they match, we either have | ||
530 | * the damn thing still alive or we'd lost not one but two races at once, | ||
531 | * the watch had been killed and a new one got created with the same ->wd | ||
532 | * at the same address. That couldn't have happened in inotify_destroy(), | ||
533 | * but inotify_rm_wd() could run into that. Still, "new one got created" | ||
534 | * is not a problem - we have every right to kill it or leave it alone, | ||
535 | * whatever's more convenient. | ||
536 | * | ||
537 | * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as | ||
538 | * "grab it and kill it" check. If it's been our original watch, we are | ||
539 | * fine, if it's a newcomer - nevermind, just pretend that we'd won the | ||
540 | * race and kill the fscker anyway; we are safe since we know that its | ||
541 | * superblock won't be going away. | ||
542 | * | 516 | * |
543 | * And yes, this is far beyond mere "not very pretty"; so's the entire | 517 | * And yes, this is far beyond mere "not very pretty"; so's the entire |
544 | * concept of inotify to start with. | 518 | * concept of inotify to start with. |
@@ -552,14 +526,10 @@ EXPORT_SYMBOL_GPL(inotify_init_watch); | |||
552 | * Called with ih->mutex held, drops it. Possible return values: | 526 | * Called with ih->mutex held, drops it. Possible return values: |
553 | * 0 - nothing to do, it has died | 527 | * 0 - nothing to do, it has died |
554 | * 1 - remove it, drop the reference and deactivate_super() | 528 | * 1 - remove it, drop the reference and deactivate_super() |
555 | * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid | ||
556 | * that variant, since it involved a lot of PITA, but that's the best that | ||
557 | * could've been done. | ||
558 | */ | 529 | */ |
559 | static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) | 530 | static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) |
560 | { | 531 | { |
561 | struct super_block *sb = watch->inode->i_sb; | 532 | struct super_block *sb = watch->inode->i_sb; |
562 | s32 wd = watch->wd; | ||
563 | 533 | ||
564 | if (atomic_inc_not_zero(&sb->s_active)) { | 534 | if (atomic_inc_not_zero(&sb->s_active)) { |
565 | get_inotify_watch(watch); | 535 | get_inotify_watch(watch); |
@@ -571,36 +541,16 @@ static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) | |||
571 | spin_unlock(&sb_lock); | 541 | spin_unlock(&sb_lock); |
572 | mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ | 542 | mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ |
573 | down_read(&sb->s_umount); | 543 | down_read(&sb->s_umount); |
574 | if (likely(!sb->s_root)) { | 544 | /* fs is already shut down; the watch is dead */ |
575 | /* fs is already shut down; the watch is dead */ | 545 | drop_super(sb); |
576 | drop_super(sb); | 546 | return 0; |
577 | return 0; | ||
578 | } | ||
579 | /* raced with the final deactivate_super() */ | ||
580 | mutex_lock(&ih->mutex); | ||
581 | if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) { | ||
582 | /* the watch is dead */ | ||
583 | mutex_unlock(&ih->mutex); | ||
584 | drop_super(sb); | ||
585 | return 0; | ||
586 | } | ||
587 | /* still alive or freed and reused with the same sb and wd; kill */ | ||
588 | get_inotify_watch(watch); | ||
589 | mutex_unlock(&ih->mutex); | ||
590 | return 2; | ||
591 | } | 547 | } |
592 | 548 | ||
593 | static void unpin_and_kill(struct inotify_watch *watch, int how) | 549 | static void unpin_and_kill(struct inotify_watch *watch) |
594 | { | 550 | { |
595 | struct super_block *sb = watch->inode->i_sb; | 551 | struct super_block *sb = watch->inode->i_sb; |
596 | put_inotify_watch(watch); | 552 | put_inotify_watch(watch); |
597 | switch (how) { | 553 | deactivate_super(sb); |
598 | case 1: | ||
599 | deactivate_super(sb); | ||
600 | break; | ||
601 | case 2: | ||
602 | drop_super(sb); | ||
603 | } | ||
604 | } | 554 | } |
605 | 555 | ||
606 | /** | 556 | /** |
@@ -622,7 +572,6 @@ void inotify_destroy(struct inotify_handle *ih) | |||
622 | struct list_head *watches; | 572 | struct list_head *watches; |
623 | struct super_block *sb; | 573 | struct super_block *sb; |
624 | struct inode *inode; | 574 | struct inode *inode; |
625 | int how; | ||
626 | 575 | ||
627 | mutex_lock(&ih->mutex); | 576 | mutex_lock(&ih->mutex); |
628 | watches = &ih->watches; | 577 | watches = &ih->watches; |
@@ -632,8 +581,7 @@ void inotify_destroy(struct inotify_handle *ih) | |||
632 | } | 581 | } |
633 | watch = list_first_entry(watches, struct inotify_watch, h_list); | 582 | watch = list_first_entry(watches, struct inotify_watch, h_list); |
634 | sb = watch->inode->i_sb; | 583 | sb = watch->inode->i_sb; |
635 | how = pin_to_kill(ih, watch); | 584 | if (!pin_to_kill(ih, watch)) |
636 | if (!how) | ||
637 | continue; | 585 | continue; |
638 | 586 | ||
639 | inode = watch->inode; | 587 | inode = watch->inode; |
@@ -648,7 +596,7 @@ void inotify_destroy(struct inotify_handle *ih) | |||
648 | 596 | ||
649 | mutex_unlock(&ih->mutex); | 597 | mutex_unlock(&ih->mutex); |
650 | mutex_unlock(&inode->inotify_mutex); | 598 | mutex_unlock(&inode->inotify_mutex); |
651 | unpin_and_kill(watch, how); | 599 | unpin_and_kill(watch); |
652 | } | 600 | } |
653 | 601 | ||
654 | /* free this handle: the put matching the get in inotify_init() */ | 602 | /* free this handle: the put matching the get in inotify_init() */ |
@@ -851,7 +799,6 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
851 | struct inotify_watch *watch; | 799 | struct inotify_watch *watch; |
852 | struct super_block *sb; | 800 | struct super_block *sb; |
853 | struct inode *inode; | 801 | struct inode *inode; |
854 | int how; | ||
855 | 802 | ||
856 | mutex_lock(&ih->mutex); | 803 | mutex_lock(&ih->mutex); |
857 | watch = idr_find(&ih->idr, wd); | 804 | watch = idr_find(&ih->idr, wd); |
@@ -860,8 +807,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
860 | return -EINVAL; | 807 | return -EINVAL; |
861 | } | 808 | } |
862 | sb = watch->inode->i_sb; | 809 | sb = watch->inode->i_sb; |
863 | how = pin_to_kill(ih, watch); | 810 | if (!pin_to_kill(ih, watch)) |
864 | if (!how) | ||
865 | return 0; | 811 | return 0; |
866 | 812 | ||
867 | inode = watch->inode; | 813 | inode = watch->inode; |
@@ -875,7 +821,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
875 | 821 | ||
876 | mutex_unlock(&ih->mutex); | 822 | mutex_unlock(&ih->mutex); |
877 | mutex_unlock(&inode->inotify_mutex); | 823 | mutex_unlock(&inode->inotify_mutex); |
878 | unpin_and_kill(watch, how); | 824 | unpin_and_kill(watch); |
879 | 825 | ||
880 | return 0; | 826 | return 0; |
881 | } | 827 | } |
diff --git a/fs/super.c b/fs/super.c index bc734f8b3e18..157657b32798 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -178,53 +178,48 @@ void put_super(struct super_block *sb) | |||
178 | 178 | ||
179 | 179 | ||
180 | /** | 180 | /** |
181 | * deactivate_super - drop an active reference to superblock | 181 | * deactivate_locked_super - drop an active reference to superblock |
182 | * @s: superblock to deactivate | 182 | * @s: superblock to deactivate |
183 | * | 183 | * |
184 | * Drops an active reference to superblock, acquiring a temprory one if | 184 | * Drops an active reference to superblock, converting it into a temprory |
185 | * there is no active references left. In that case we lock superblock, | 185 | * one if there is no other active references left. In that case we |
186 | * tell fs driver to shut it down and drop the temporary reference we | 186 | * tell fs driver to shut it down and drop the temporary reference we |
187 | * had just acquired. | 187 | * had just acquired. |
188 | * | ||
189 | * Caller holds exclusive lock on superblock; that lock is released. | ||
188 | */ | 190 | */ |
189 | void deactivate_super(struct super_block *s) | 191 | void deactivate_locked_super(struct super_block *s) |
190 | { | 192 | { |
191 | struct file_system_type *fs = s->s_type; | 193 | struct file_system_type *fs = s->s_type; |
192 | if (atomic_dec_and_test(&s->s_active)) { | 194 | if (atomic_dec_and_test(&s->s_active)) { |
193 | vfs_dq_off(s, 0); | 195 | vfs_dq_off(s, 0); |
194 | down_write(&s->s_umount); | ||
195 | fs->kill_sb(s); | 196 | fs->kill_sb(s); |
196 | put_filesystem(fs); | 197 | put_filesystem(fs); |
197 | put_super(s); | 198 | put_super(s); |
199 | } else { | ||
200 | up_write(&s->s_umount); | ||
198 | } | 201 | } |
199 | } | 202 | } |
200 | 203 | ||
201 | EXPORT_SYMBOL(deactivate_super); | 204 | EXPORT_SYMBOL(deactivate_locked_super); |
202 | 205 | ||
203 | /** | 206 | /** |
204 | * deactivate_locked_super - drop an active reference to superblock | 207 | * deactivate_super - drop an active reference to superblock |
205 | * @s: superblock to deactivate | 208 | * @s: superblock to deactivate |
206 | * | 209 | * |
207 | * Equivalent of up_write(&s->s_umount); deactivate_super(s);, except that | 210 | * Variant of deactivate_locked_super(), except that superblock is *not* |
208 | * it does not unlock it until it's all over. As the result, it's safe to | 211 | * locked by caller. If we are going to drop the final active reference, |
209 | * use to dispose of new superblock on ->get_sb() failure exits - nobody | 212 | * lock will be acquired prior to that. |
210 | * will see the sucker until it's all over. Equivalent using up_write + | ||
211 | * deactivate_super is safe for that purpose only if superblock is either | ||
212 | * safe to use or has NULL ->s_root when we unlock. | ||
213 | */ | 213 | */ |
214 | void deactivate_locked_super(struct super_block *s) | 214 | void deactivate_super(struct super_block *s) |
215 | { | 215 | { |
216 | struct file_system_type *fs = s->s_type; | 216 | if (!atomic_add_unless(&s->s_active, -1, 1)) { |
217 | if (atomic_dec_and_test(&s->s_active)) { | 217 | down_write(&s->s_umount); |
218 | vfs_dq_off(s, 0); | 218 | deactivate_locked_super(s); |
219 | fs->kill_sb(s); | ||
220 | put_filesystem(fs); | ||
221 | put_super(s); | ||
222 | } else { | ||
223 | up_write(&s->s_umount); | ||
224 | } | 219 | } |
225 | } | 220 | } |
226 | 221 | ||
227 | EXPORT_SYMBOL(deactivate_locked_super); | 222 | EXPORT_SYMBOL(deactivate_super); |
228 | 223 | ||
229 | /** | 224 | /** |
230 | * grab_super - acquire an active reference | 225 | * grab_super - acquire an active reference |
@@ -247,12 +242,10 @@ static int grab_super(struct super_block *s) __releases(sb_lock) | |||
247 | /* it's going away */ | 242 | /* it's going away */ |
248 | s->s_count++; | 243 | s->s_count++; |
249 | spin_unlock(&sb_lock); | 244 | spin_unlock(&sb_lock); |
250 | /* usually that'll be enough for it to die... */ | 245 | /* wait for it to die */ |
251 | down_write(&s->s_umount); | 246 | down_write(&s->s_umount); |
252 | up_write(&s->s_umount); | 247 | up_write(&s->s_umount); |
253 | put_super(s); | 248 | put_super(s); |
254 | /* ... but in case it wasn't, let's at least yield() */ | ||
255 | yield(); | ||
256 | return 0; | 249 | return 0; |
257 | } | 250 | } |
258 | 251 | ||