diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2010-03-22 15:22:31 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2010-05-21 18:31:14 -0400 |
commit | 1712ac8fda7d8bc4dc921f5777b7423aacad7263 (patch) | |
tree | 1493bc7166ace04d9ac2f4d5383eaab1d43e17c4 /fs/notify/inotify | |
parent | b20bd1a5e78af267dc4b6e1ffed48d5d776302c5 (diff) |
Saner locking around deactivate_super()
Make sure that s_umount is acquired *before* we drop the final
active reference; we still have the fast path (atomic_dec_unless)
and we have gotten rid of the window between the moment when
s_active hits zero and s_umount is acquired. Which simplifies
the living hell out of grab_super() and inotify pin_to_kill()
stuff.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/notify/inotify')
-rw-r--r-- | fs/notify/inotify/inotify.c | 76 |
1 files changed, 11 insertions, 65 deletions
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c index 441ef136af22..27b75ebc7460 100644 --- a/fs/notify/inotify/inotify.c +++ b/fs/notify/inotify/inotify.c | |||
@@ -511,34 +511,8 @@ EXPORT_SYMBOL_GPL(inotify_init_watch); | |||
511 | * done. Cleanup is just deactivate_super(). However, that leaves a messy | 511 | * done. Cleanup is just deactivate_super(). However, that leaves a messy |
512 | * case - what if we *are* racing with umount() and active references to | 512 | * case - what if we *are* racing with umount() and active references to |
513 | * superblock can't be acquired anymore? We can bump ->s_count, grab | 513 | * superblock can't be acquired anymore? We can bump ->s_count, grab |
514 | * ->s_umount, which will almost certainly wait until the superblock is shut | 514 | * ->s_umount, which will wait until the superblock is shut down and the |
515 | * down and the watch in question is pining for fjords. That's fine, but | 515 | * watch in question is pining for fjords. |
516 | * there is a problem - we might have hit the window between ->s_active | ||
517 | * getting to 0 (i.e. the moment when superblock is past the point of no return | ||
518 | * and is heading for shutdown) and the moment when deactivate_super() acquires | ||
519 | * ->s_umount. We could just do drop_super() yield() and retry, but that's | ||
520 | * rather antisocial and this stuff is luser-triggerable. OTOH, having grabbed | ||
521 | * ->s_umount and having found that we'd got there first (i.e. that ->s_root is | ||
522 | * non-NULL) we know that we won't race with inotify_umount_inodes(). So we | ||
523 | * could grab a reference to watch and do the rest as above, just with | ||
524 | * drop_super() instead of deactivate_super(), right? Wrong. We had to drop | ||
525 | * ih->mutex before we could grab ->s_umount. So the watch could've been gone | ||
526 | * already. | ||
527 | * | ||
528 | * That still can be dealt with - we need to save watch->wd, do idr_find() | ||
529 | * and compare its result with our pointer. If they match, we either have | ||
530 | * the damn thing still alive or we'd lost not one but two races at once, | ||
531 | * the watch had been killed and a new one got created with the same ->wd | ||
532 | * at the same address. That couldn't have happened in inotify_destroy(), | ||
533 | * but inotify_rm_wd() could run into that. Still, "new one got created" | ||
534 | * is not a problem - we have every right to kill it or leave it alone, | ||
535 | * whatever's more convenient. | ||
536 | * | ||
537 | * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as | ||
538 | * "grab it and kill it" check. If it's been our original watch, we are | ||
539 | * fine, if it's a newcomer - nevermind, just pretend that we'd won the | ||
540 | * race and kill the fscker anyway; we are safe since we know that its | ||
541 | * superblock won't be going away. | ||
542 | * | 516 | * |
543 | * And yes, this is far beyond mere "not very pretty"; so's the entire | 517 | * And yes, this is far beyond mere "not very pretty"; so's the entire |
544 | * concept of inotify to start with. | 518 | * concept of inotify to start with. |
@@ -552,14 +526,10 @@ EXPORT_SYMBOL_GPL(inotify_init_watch); | |||
552 | * Called with ih->mutex held, drops it. Possible return values: | 526 | * Called with ih->mutex held, drops it. Possible return values: |
553 | * 0 - nothing to do, it has died | 527 | * 0 - nothing to do, it has died |
554 | * 1 - remove it, drop the reference and deactivate_super() | 528 | * 1 - remove it, drop the reference and deactivate_super() |
555 | * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid | ||
556 | * that variant, since it involved a lot of PITA, but that's the best that | ||
557 | * could've been done. | ||
558 | */ | 529 | */ |
559 | static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) | 530 | static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) |
560 | { | 531 | { |
561 | struct super_block *sb = watch->inode->i_sb; | 532 | struct super_block *sb = watch->inode->i_sb; |
562 | s32 wd = watch->wd; | ||
563 | 533 | ||
564 | if (atomic_inc_not_zero(&sb->s_active)) { | 534 | if (atomic_inc_not_zero(&sb->s_active)) { |
565 | get_inotify_watch(watch); | 535 | get_inotify_watch(watch); |
@@ -571,36 +541,16 @@ static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) | |||
571 | spin_unlock(&sb_lock); | 541 | spin_unlock(&sb_lock); |
572 | mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ | 542 | mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ |
573 | down_read(&sb->s_umount); | 543 | down_read(&sb->s_umount); |
574 | if (likely(!sb->s_root)) { | 544 | /* fs is already shut down; the watch is dead */ |
575 | /* fs is already shut down; the watch is dead */ | 545 | drop_super(sb); |
576 | drop_super(sb); | 546 | return 0; |
577 | return 0; | ||
578 | } | ||
579 | /* raced with the final deactivate_super() */ | ||
580 | mutex_lock(&ih->mutex); | ||
581 | if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) { | ||
582 | /* the watch is dead */ | ||
583 | mutex_unlock(&ih->mutex); | ||
584 | drop_super(sb); | ||
585 | return 0; | ||
586 | } | ||
587 | /* still alive or freed and reused with the same sb and wd; kill */ | ||
588 | get_inotify_watch(watch); | ||
589 | mutex_unlock(&ih->mutex); | ||
590 | return 2; | ||
591 | } | 547 | } |
592 | 548 | ||
593 | static void unpin_and_kill(struct inotify_watch *watch, int how) | 549 | static void unpin_and_kill(struct inotify_watch *watch) |
594 | { | 550 | { |
595 | struct super_block *sb = watch->inode->i_sb; | 551 | struct super_block *sb = watch->inode->i_sb; |
596 | put_inotify_watch(watch); | 552 | put_inotify_watch(watch); |
597 | switch (how) { | 553 | deactivate_super(sb); |
598 | case 1: | ||
599 | deactivate_super(sb); | ||
600 | break; | ||
601 | case 2: | ||
602 | drop_super(sb); | ||
603 | } | ||
604 | } | 554 | } |
605 | 555 | ||
606 | /** | 556 | /** |
@@ -622,7 +572,6 @@ void inotify_destroy(struct inotify_handle *ih) | |||
622 | struct list_head *watches; | 572 | struct list_head *watches; |
623 | struct super_block *sb; | 573 | struct super_block *sb; |
624 | struct inode *inode; | 574 | struct inode *inode; |
625 | int how; | ||
626 | 575 | ||
627 | mutex_lock(&ih->mutex); | 576 | mutex_lock(&ih->mutex); |
628 | watches = &ih->watches; | 577 | watches = &ih->watches; |
@@ -632,8 +581,7 @@ void inotify_destroy(struct inotify_handle *ih) | |||
632 | } | 581 | } |
633 | watch = list_first_entry(watches, struct inotify_watch, h_list); | 582 | watch = list_first_entry(watches, struct inotify_watch, h_list); |
634 | sb = watch->inode->i_sb; | 583 | sb = watch->inode->i_sb; |
635 | how = pin_to_kill(ih, watch); | 584 | if (!pin_to_kill(ih, watch)) |
636 | if (!how) | ||
637 | continue; | 585 | continue; |
638 | 586 | ||
639 | inode = watch->inode; | 587 | inode = watch->inode; |
@@ -648,7 +596,7 @@ void inotify_destroy(struct inotify_handle *ih) | |||
648 | 596 | ||
649 | mutex_unlock(&ih->mutex); | 597 | mutex_unlock(&ih->mutex); |
650 | mutex_unlock(&inode->inotify_mutex); | 598 | mutex_unlock(&inode->inotify_mutex); |
651 | unpin_and_kill(watch, how); | 599 | unpin_and_kill(watch); |
652 | } | 600 | } |
653 | 601 | ||
654 | /* free this handle: the put matching the get in inotify_init() */ | 602 | /* free this handle: the put matching the get in inotify_init() */ |
@@ -851,7 +799,6 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
851 | struct inotify_watch *watch; | 799 | struct inotify_watch *watch; |
852 | struct super_block *sb; | 800 | struct super_block *sb; |
853 | struct inode *inode; | 801 | struct inode *inode; |
854 | int how; | ||
855 | 802 | ||
856 | mutex_lock(&ih->mutex); | 803 | mutex_lock(&ih->mutex); |
857 | watch = idr_find(&ih->idr, wd); | 804 | watch = idr_find(&ih->idr, wd); |
@@ -860,8 +807,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
860 | return -EINVAL; | 807 | return -EINVAL; |
861 | } | 808 | } |
862 | sb = watch->inode->i_sb; | 809 | sb = watch->inode->i_sb; |
863 | how = pin_to_kill(ih, watch); | 810 | if (!pin_to_kill(ih, watch)) |
864 | if (!how) | ||
865 | return 0; | 811 | return 0; |
866 | 812 | ||
867 | inode = watch->inode; | 813 | inode = watch->inode; |
@@ -875,7 +821,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
875 | 821 | ||
876 | mutex_unlock(&ih->mutex); | 822 | mutex_unlock(&ih->mutex); |
877 | mutex_unlock(&inode->inotify_mutex); | 823 | mutex_unlock(&inode->inotify_mutex); |
878 | unpin_and_kill(watch, how); | 824 | unpin_and_kill(watch); |
879 | 825 | ||
880 | return 0; | 826 | return 0; |
881 | } | 827 | } |