diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/inotify.c | 150 |
1 files changed, 144 insertions, 6 deletions
diff --git a/fs/inotify.c b/fs/inotify.c index 690e72595e6e..7bbed1b89825 100644 --- a/fs/inotify.c +++ b/fs/inotify.c | |||
@@ -106,6 +106,20 @@ void get_inotify_watch(struct inotify_watch *watch) | |||
106 | } | 106 | } |
107 | EXPORT_SYMBOL_GPL(get_inotify_watch); | 107 | EXPORT_SYMBOL_GPL(get_inotify_watch); |
108 | 108 | ||
109 | int pin_inotify_watch(struct inotify_watch *watch) | ||
110 | { | ||
111 | struct super_block *sb = watch->inode->i_sb; | ||
112 | spin_lock(&sb_lock); | ||
113 | if (sb->s_count >= S_BIAS) { | ||
114 | atomic_inc(&sb->s_active); | ||
115 | spin_unlock(&sb_lock); | ||
116 | atomic_inc(&watch->count); | ||
117 | return 1; | ||
118 | } | ||
119 | spin_unlock(&sb_lock); | ||
120 | return 0; | ||
121 | } | ||
122 | |||
109 | /** | 123 | /** |
110 | * put_inotify_watch - decrements the ref count on a given watch. cleans up | 124 | * put_inotify_watch - decrements the ref count on a given watch. cleans up |
111 | * watch references if the count reaches zero. inotify_watch is freed by | 125 | * watch references if the count reaches zero. inotify_watch is freed by |
@@ -124,6 +138,13 @@ void put_inotify_watch(struct inotify_watch *watch) | |||
124 | } | 138 | } |
125 | EXPORT_SYMBOL_GPL(put_inotify_watch); | 139 | EXPORT_SYMBOL_GPL(put_inotify_watch); |
126 | 140 | ||
141 | void unpin_inotify_watch(struct inotify_watch *watch) | ||
142 | { | ||
143 | struct super_block *sb = watch->inode->i_sb; | ||
144 | put_inotify_watch(watch); | ||
145 | deactivate_super(sb); | ||
146 | } | ||
147 | |||
127 | /* | 148 | /* |
128 | * inotify_handle_get_wd - returns the next WD for use by the given handle | 149 | * inotify_handle_get_wd - returns the next WD for use by the given handle |
129 | * | 150 | * |
@@ -479,6 +500,112 @@ void inotify_init_watch(struct inotify_watch *watch) | |||
479 | } | 500 | } |
480 | EXPORT_SYMBOL_GPL(inotify_init_watch); | 501 | EXPORT_SYMBOL_GPL(inotify_init_watch); |
481 | 502 | ||
503 | /* | ||
504 | * Watch removals suck violently. To kick the watch out we need (in this | ||
505 | * order) inode->inotify_mutex and ih->mutex. That's fine if we have | ||
506 | * a hold on inode; however, for all other cases we need to make damn sure | ||
507 | * we don't race with umount. We can *NOT* just grab a reference to a | ||
508 | * watch - inotify_unmount_inodes() will happily sail past it and we'll end | ||
509 | * with reference to inode potentially outliving its superblock. Ideally | ||
510 | * we just want to grab an active reference to superblock if we can; that | ||
511 | * will make sure we won't go into inotify_umount_inodes() until we are | ||
512 | * done. Cleanup is just deactivate_super(). However, that leaves a messy | ||
513 | * case - what if we *are* racing with umount() and active references to | ||
514 | * superblock can't be acquired anymore? We can bump ->s_count, grab | ||
515 | * ->s_umount, which will almost certainly wait until the superblock is shut | ||
516 | * down and the watch in question is pining for fjords. That's fine, but | ||
517 | * there is a problem - we might have hit the window between ->s_active | ||
518 | * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock | ||
519 | * is past the point of no return and is heading for shutdown) and the | ||
520 | * moment when deactivate_super() acquires ->s_umount. We could just do | ||
521 | * drop_super() yield() and retry, but that's rather antisocial and this | ||
522 | * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having | ||
523 | * found that we'd got there first (i.e. that ->s_root is non-NULL) we know | ||
524 | * that we won't race with inotify_umount_inodes(). So we could grab a | ||
525 | * reference to watch and do the rest as above, just with drop_super() instead | ||
526 | * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we | ||
527 | * could grab ->s_umount. So the watch could've been gone already. | ||
528 | * | ||
529 | * That still can be dealt with - we need to save watch->wd, do idr_find() | ||
530 | * and compare its result with our pointer. If they match, we either have | ||
531 | * the damn thing still alive or we'd lost not one but two races at once, | ||
532 | * the watch had been killed and a new one got created with the same ->wd | ||
533 | * at the same address. That couldn't have happened in inotify_destroy(), | ||
534 | * but inotify_rm_wd() could run into that. Still, "new one got created" | ||
535 | * is not a problem - we have every right to kill it or leave it alone, | ||
536 | * whatever's more convenient. | ||
537 | * | ||
538 | * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as | ||
539 | * "grab it and kill it" check. If it's been our original watch, we are | ||
540 | * fine, if it's a newcomer - nevermind, just pretend that we'd won the | ||
541 | * race and kill the fscker anyway; we are safe since we know that its | ||
542 | * superblock won't be going away. | ||
543 | * | ||
544 | * And yes, this is far beyond mere "not very pretty"; so's the entire | ||
545 | * concept of inotify to start with. | ||
546 | */ | ||
547 | |||
548 | /** | ||
549 | * pin_to_kill - pin the watch down for removal | ||
550 | * @ih: inotify handle | ||
551 | * @watch: watch to kill | ||
552 | * | ||
553 | * Called with ih->mutex held, drops it. Possible return values: | ||
554 | * 0 - nothing to do, it has died | ||
555 | * 1 - remove it, drop the reference and deactivate_super() | ||
556 | * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid | ||
557 | * that variant, since it involved a lot of PITA, but that's the best that | ||
558 | * could've been done. | ||
559 | */ | ||
560 | static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) | ||
561 | { | ||
562 | struct super_block *sb = watch->inode->i_sb; | ||
563 | s32 wd = watch->wd; | ||
564 | |||
565 | spin_lock(&sb_lock); | ||
566 | if (sb->s_count >= S_BIAS) { | ||
567 | atomic_inc(&sb->s_active); | ||
568 | spin_unlock(&sb_lock); | ||
569 | get_inotify_watch(watch); | ||
570 | mutex_unlock(&ih->mutex); | ||
571 | return 1; /* the best outcome */ | ||
572 | } | ||
573 | sb->s_count++; | ||
574 | spin_unlock(&sb_lock); | ||
575 | mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ | ||
576 | down_read(&sb->s_umount); | ||
577 | if (likely(!sb->s_root)) { | ||
578 | /* fs is already shut down; the watch is dead */ | ||
579 | drop_super(sb); | ||
580 | return 0; | ||
581 | } | ||
582 | /* raced with the final deactivate_super() */ | ||
583 | mutex_lock(&ih->mutex); | ||
584 | if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) { | ||
585 | /* the watch is dead */ | ||
586 | mutex_unlock(&ih->mutex); | ||
587 | drop_super(sb); | ||
588 | return 0; | ||
589 | } | ||
590 | /* still alive or freed and reused with the same sb and wd; kill */ | ||
591 | get_inotify_watch(watch); | ||
592 | mutex_unlock(&ih->mutex); | ||
593 | return 2; | ||
594 | } | ||
595 | |||
596 | static void unpin_and_kill(struct inotify_watch *watch, int how) | ||
597 | { | ||
598 | struct super_block *sb = watch->inode->i_sb; | ||
599 | put_inotify_watch(watch); | ||
600 | switch (how) { | ||
601 | case 1: | ||
602 | deactivate_super(sb); | ||
603 | break; | ||
604 | case 2: | ||
605 | drop_super(sb); | ||
606 | } | ||
607 | } | ||
608 | |||
482 | /** | 609 | /** |
483 | * inotify_destroy - clean up and destroy an inotify instance | 610 | * inotify_destroy - clean up and destroy an inotify instance |
484 | * @ih: inotify handle | 611 | * @ih: inotify handle |
@@ -490,11 +617,15 @@ void inotify_destroy(struct inotify_handle *ih) | |||
490 | * pretty. We cannot do a simple iteration over the list, because we | 617 | * pretty. We cannot do a simple iteration over the list, because we |
491 | * do not know the inode until we iterate to the watch. But we need to | 618 | * do not know the inode until we iterate to the watch. But we need to |
492 | * hold inode->inotify_mutex before ih->mutex. The following works. | 619 | * hold inode->inotify_mutex before ih->mutex. The following works. |
620 | * | ||
621 | * AV: it had to become even uglier to start working ;-/ | ||
493 | */ | 622 | */ |
494 | while (1) { | 623 | while (1) { |
495 | struct inotify_watch *watch; | 624 | struct inotify_watch *watch; |
496 | struct list_head *watches; | 625 | struct list_head *watches; |
626 | struct super_block *sb; | ||
497 | struct inode *inode; | 627 | struct inode *inode; |
628 | int how; | ||
498 | 629 | ||
499 | mutex_lock(&ih->mutex); | 630 | mutex_lock(&ih->mutex); |
500 | watches = &ih->watches; | 631 | watches = &ih->watches; |
@@ -503,8 +634,10 @@ void inotify_destroy(struct inotify_handle *ih) | |||
503 | break; | 634 | break; |
504 | } | 635 | } |
505 | watch = list_first_entry(watches, struct inotify_watch, h_list); | 636 | watch = list_first_entry(watches, struct inotify_watch, h_list); |
506 | get_inotify_watch(watch); | 637 | sb = watch->inode->i_sb; |
507 | mutex_unlock(&ih->mutex); | 638 | how = pin_to_kill(ih, watch); |
639 | if (!how) | ||
640 | continue; | ||
508 | 641 | ||
509 | inode = watch->inode; | 642 | inode = watch->inode; |
510 | mutex_lock(&inode->inotify_mutex); | 643 | mutex_lock(&inode->inotify_mutex); |
@@ -518,7 +651,7 @@ void inotify_destroy(struct inotify_handle *ih) | |||
518 | 651 | ||
519 | mutex_unlock(&ih->mutex); | 652 | mutex_unlock(&ih->mutex); |
520 | mutex_unlock(&inode->inotify_mutex); | 653 | mutex_unlock(&inode->inotify_mutex); |
521 | put_inotify_watch(watch); | 654 | unpin_and_kill(watch, how); |
522 | } | 655 | } |
523 | 656 | ||
524 | /* free this handle: the put matching the get in inotify_init() */ | 657 | /* free this handle: the put matching the get in inotify_init() */ |
@@ -719,7 +852,9 @@ void inotify_evict_watch(struct inotify_watch *watch) | |||
719 | int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | 852 | int inotify_rm_wd(struct inotify_handle *ih, u32 wd) |
720 | { | 853 | { |
721 | struct inotify_watch *watch; | 854 | struct inotify_watch *watch; |
855 | struct super_block *sb; | ||
722 | struct inode *inode; | 856 | struct inode *inode; |
857 | int how; | ||
723 | 858 | ||
724 | mutex_lock(&ih->mutex); | 859 | mutex_lock(&ih->mutex); |
725 | watch = idr_find(&ih->idr, wd); | 860 | watch = idr_find(&ih->idr, wd); |
@@ -727,9 +862,12 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
727 | mutex_unlock(&ih->mutex); | 862 | mutex_unlock(&ih->mutex); |
728 | return -EINVAL; | 863 | return -EINVAL; |
729 | } | 864 | } |
730 | get_inotify_watch(watch); | 865 | sb = watch->inode->i_sb; |
866 | how = pin_to_kill(ih, watch); | ||
867 | if (!how) | ||
868 | return 0; | ||
869 | |||
731 | inode = watch->inode; | 870 | inode = watch->inode; |
732 | mutex_unlock(&ih->mutex); | ||
733 | 871 | ||
734 | mutex_lock(&inode->inotify_mutex); | 872 | mutex_lock(&inode->inotify_mutex); |
735 | mutex_lock(&ih->mutex); | 873 | mutex_lock(&ih->mutex); |
@@ -740,7 +878,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
740 | 878 | ||
741 | mutex_unlock(&ih->mutex); | 879 | mutex_unlock(&ih->mutex); |
742 | mutex_unlock(&inode->inotify_mutex); | 880 | mutex_unlock(&inode->inotify_mutex); |
743 | put_inotify_watch(watch); | 881 | unpin_and_kill(watch, how); |
744 | 882 | ||
745 | return 0; | 883 | return 0; |
746 | } | 884 | } |