diff options
Diffstat (limited to 'fs/inotify.c')
-rw-r--r-- | fs/inotify.c | 152 |
1 files changed, 146 insertions, 6 deletions
diff --git a/fs/inotify.c b/fs/inotify.c index 690e72595e6e..dae3f28f30d4 100644 --- a/fs/inotify.c +++ b/fs/inotify.c | |||
@@ -106,6 +106,20 @@ void get_inotify_watch(struct inotify_watch *watch) | |||
106 | } | 106 | } |
107 | EXPORT_SYMBOL_GPL(get_inotify_watch); | 107 | EXPORT_SYMBOL_GPL(get_inotify_watch); |
108 | 108 | ||
109 | int pin_inotify_watch(struct inotify_watch *watch) | ||
110 | { | ||
111 | struct super_block *sb = watch->inode->i_sb; | ||
112 | spin_lock(&sb_lock); | ||
113 | if (sb->s_count >= S_BIAS) { | ||
114 | atomic_inc(&sb->s_active); | ||
115 | spin_unlock(&sb_lock); | ||
116 | atomic_inc(&watch->count); | ||
117 | return 1; | ||
118 | } | ||
119 | spin_unlock(&sb_lock); | ||
120 | return 0; | ||
121 | } | ||
122 | |||
109 | /** | 123 | /** |
110 | * put_inotify_watch - decrements the ref count on a given watch. cleans up | 124 | * put_inotify_watch - decrements the ref count on a given watch. cleans up |
111 | * watch references if the count reaches zero. inotify_watch is freed by | 125 | * watch references if the count reaches zero. inotify_watch is freed by |
@@ -124,6 +138,13 @@ void put_inotify_watch(struct inotify_watch *watch) | |||
124 | } | 138 | } |
125 | EXPORT_SYMBOL_GPL(put_inotify_watch); | 139 | EXPORT_SYMBOL_GPL(put_inotify_watch); |
126 | 140 | ||
141 | void unpin_inotify_watch(struct inotify_watch *watch) | ||
142 | { | ||
143 | struct super_block *sb = watch->inode->i_sb; | ||
144 | put_inotify_watch(watch); | ||
145 | deactivate_super(sb); | ||
146 | } | ||
147 | |||
127 | /* | 148 | /* |
128 | * inotify_handle_get_wd - returns the next WD for use by the given handle | 149 | * inotify_handle_get_wd - returns the next WD for use by the given handle |
129 | * | 150 | * |
@@ -407,11 +428,13 @@ void inotify_unmount_inodes(struct list_head *list) | |||
407 | watches = &inode->inotify_watches; | 428 | watches = &inode->inotify_watches; |
408 | list_for_each_entry_safe(watch, next_w, watches, i_list) { | 429 | list_for_each_entry_safe(watch, next_w, watches, i_list) { |
409 | struct inotify_handle *ih= watch->ih; | 430 | struct inotify_handle *ih= watch->ih; |
431 | get_inotify_watch(watch); | ||
410 | mutex_lock(&ih->mutex); | 432 | mutex_lock(&ih->mutex); |
411 | ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, | 433 | ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, |
412 | NULL, NULL); | 434 | NULL, NULL); |
413 | inotify_remove_watch_locked(ih, watch); | 435 | inotify_remove_watch_locked(ih, watch); |
414 | mutex_unlock(&ih->mutex); | 436 | mutex_unlock(&ih->mutex); |
437 | put_inotify_watch(watch); | ||
415 | } | 438 | } |
416 | mutex_unlock(&inode->inotify_mutex); | 439 | mutex_unlock(&inode->inotify_mutex); |
417 | iput(inode); | 440 | iput(inode); |
@@ -479,6 +502,112 @@ void inotify_init_watch(struct inotify_watch *watch) | |||
479 | } | 502 | } |
480 | EXPORT_SYMBOL_GPL(inotify_init_watch); | 503 | EXPORT_SYMBOL_GPL(inotify_init_watch); |
481 | 504 | ||
505 | /* | ||
506 | * Watch removals suck violently. To kick the watch out we need (in this | ||
507 | * order) inode->inotify_mutex and ih->mutex. That's fine if we have | ||
508 | * a hold on inode; however, for all other cases we need to make damn sure | ||
509 | * we don't race with umount. We can *NOT* just grab a reference to a | ||
510 | * watch - inotify_unmount_inodes() will happily sail past it and we'll end | ||
511 | * with reference to inode potentially outliving its superblock. Ideally | ||
512 | * we just want to grab an active reference to superblock if we can; that | ||
513 | * will make sure we won't go into inotify_umount_inodes() until we are | ||
514 | * done. Cleanup is just deactivate_super(). However, that leaves a messy | ||
515 | * case - what if we *are* racing with umount() and active references to | ||
516 | * superblock can't be acquired anymore? We can bump ->s_count, grab | ||
517 | * ->s_umount, which will almost certainly wait until the superblock is shut | ||
518 | * down and the watch in question is pining for fjords. That's fine, but | ||
519 | * there is a problem - we might have hit the window between ->s_active | ||
520 | * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock | ||
521 | * is past the point of no return and is heading for shutdown) and the | ||
522 | * moment when deactivate_super() acquires ->s_umount. We could just do | ||
523 | * drop_super() yield() and retry, but that's rather antisocial and this | ||
524 | * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having | ||
525 | * found that we'd got there first (i.e. that ->s_root is non-NULL) we know | ||
526 | * that we won't race with inotify_umount_inodes(). So we could grab a | ||
527 | * reference to watch and do the rest as above, just with drop_super() instead | ||
528 | * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we | ||
529 | * could grab ->s_umount. So the watch could've been gone already. | ||
530 | * | ||
531 | * That still can be dealt with - we need to save watch->wd, do idr_find() | ||
532 | * and compare its result with our pointer. If they match, we either have | ||
533 | * the damn thing still alive or we'd lost not one but two races at once, | ||
534 | * the watch had been killed and a new one got created with the same ->wd | ||
535 | * at the same address. That couldn't have happened in inotify_destroy(), | ||
536 | * but inotify_rm_wd() could run into that. Still, "new one got created" | ||
537 | * is not a problem - we have every right to kill it or leave it alone, | ||
538 | * whatever's more convenient. | ||
539 | * | ||
540 | * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as | ||
541 | * "grab it and kill it" check. If it's been our original watch, we are | ||
542 | * fine, if it's a newcomer - nevermind, just pretend that we'd won the | ||
543 | * race and kill the fscker anyway; we are safe since we know that its | ||
544 | * superblock won't be going away. | ||
545 | * | ||
546 | * And yes, this is far beyond mere "not very pretty"; so's the entire | ||
547 | * concept of inotify to start with. | ||
548 | */ | ||
549 | |||
550 | /** | ||
551 | * pin_to_kill - pin the watch down for removal | ||
552 | * @ih: inotify handle | ||
553 | * @watch: watch to kill | ||
554 | * | ||
555 | * Called with ih->mutex held, drops it. Possible return values: | ||
556 | * 0 - nothing to do, it has died | ||
557 | * 1 - remove it, drop the reference and deactivate_super() | ||
558 | * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid | ||
559 | * that variant, since it involved a lot of PITA, but that's the best that | ||
560 | * could've been done. | ||
561 | */ | ||
562 | static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) | ||
563 | { | ||
564 | struct super_block *sb = watch->inode->i_sb; | ||
565 | s32 wd = watch->wd; | ||
566 | |||
567 | spin_lock(&sb_lock); | ||
568 | if (sb->s_count >= S_BIAS) { | ||
569 | atomic_inc(&sb->s_active); | ||
570 | spin_unlock(&sb_lock); | ||
571 | get_inotify_watch(watch); | ||
572 | mutex_unlock(&ih->mutex); | ||
573 | return 1; /* the best outcome */ | ||
574 | } | ||
575 | sb->s_count++; | ||
576 | spin_unlock(&sb_lock); | ||
577 | mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ | ||
578 | down_read(&sb->s_umount); | ||
579 | if (likely(!sb->s_root)) { | ||
580 | /* fs is already shut down; the watch is dead */ | ||
581 | drop_super(sb); | ||
582 | return 0; | ||
583 | } | ||
584 | /* raced with the final deactivate_super() */ | ||
585 | mutex_lock(&ih->mutex); | ||
586 | if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) { | ||
587 | /* the watch is dead */ | ||
588 | mutex_unlock(&ih->mutex); | ||
589 | drop_super(sb); | ||
590 | return 0; | ||
591 | } | ||
592 | /* still alive or freed and reused with the same sb and wd; kill */ | ||
593 | get_inotify_watch(watch); | ||
594 | mutex_unlock(&ih->mutex); | ||
595 | return 2; | ||
596 | } | ||
597 | |||
598 | static void unpin_and_kill(struct inotify_watch *watch, int how) | ||
599 | { | ||
600 | struct super_block *sb = watch->inode->i_sb; | ||
601 | put_inotify_watch(watch); | ||
602 | switch (how) { | ||
603 | case 1: | ||
604 | deactivate_super(sb); | ||
605 | break; | ||
606 | case 2: | ||
607 | drop_super(sb); | ||
608 | } | ||
609 | } | ||
610 | |||
482 | /** | 611 | /** |
483 | * inotify_destroy - clean up and destroy an inotify instance | 612 | * inotify_destroy - clean up and destroy an inotify instance |
484 | * @ih: inotify handle | 613 | * @ih: inotify handle |
@@ -490,11 +619,15 @@ void inotify_destroy(struct inotify_handle *ih) | |||
490 | * pretty. We cannot do a simple iteration over the list, because we | 619 | * pretty. We cannot do a simple iteration over the list, because we |
491 | * do not know the inode until we iterate to the watch. But we need to | 620 | * do not know the inode until we iterate to the watch. But we need to |
492 | * hold inode->inotify_mutex before ih->mutex. The following works. | 621 | * hold inode->inotify_mutex before ih->mutex. The following works. |
622 | * | ||
623 | * AV: it had to become even uglier to start working ;-/ | ||
493 | */ | 624 | */ |
494 | while (1) { | 625 | while (1) { |
495 | struct inotify_watch *watch; | 626 | struct inotify_watch *watch; |
496 | struct list_head *watches; | 627 | struct list_head *watches; |
628 | struct super_block *sb; | ||
497 | struct inode *inode; | 629 | struct inode *inode; |
630 | int how; | ||
498 | 631 | ||
499 | mutex_lock(&ih->mutex); | 632 | mutex_lock(&ih->mutex); |
500 | watches = &ih->watches; | 633 | watches = &ih->watches; |
@@ -503,8 +636,10 @@ void inotify_destroy(struct inotify_handle *ih) | |||
503 | break; | 636 | break; |
504 | } | 637 | } |
505 | watch = list_first_entry(watches, struct inotify_watch, h_list); | 638 | watch = list_first_entry(watches, struct inotify_watch, h_list); |
506 | get_inotify_watch(watch); | 639 | sb = watch->inode->i_sb; |
507 | mutex_unlock(&ih->mutex); | 640 | how = pin_to_kill(ih, watch); |
641 | if (!how) | ||
642 | continue; | ||
508 | 643 | ||
509 | inode = watch->inode; | 644 | inode = watch->inode; |
510 | mutex_lock(&inode->inotify_mutex); | 645 | mutex_lock(&inode->inotify_mutex); |
@@ -518,7 +653,7 @@ void inotify_destroy(struct inotify_handle *ih) | |||
518 | 653 | ||
519 | mutex_unlock(&ih->mutex); | 654 | mutex_unlock(&ih->mutex); |
520 | mutex_unlock(&inode->inotify_mutex); | 655 | mutex_unlock(&inode->inotify_mutex); |
521 | put_inotify_watch(watch); | 656 | unpin_and_kill(watch, how); |
522 | } | 657 | } |
523 | 658 | ||
524 | /* free this handle: the put matching the get in inotify_init() */ | 659 | /* free this handle: the put matching the get in inotify_init() */ |
@@ -719,7 +854,9 @@ void inotify_evict_watch(struct inotify_watch *watch) | |||
719 | int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | 854 | int inotify_rm_wd(struct inotify_handle *ih, u32 wd) |
720 | { | 855 | { |
721 | struct inotify_watch *watch; | 856 | struct inotify_watch *watch; |
857 | struct super_block *sb; | ||
722 | struct inode *inode; | 858 | struct inode *inode; |
859 | int how; | ||
723 | 860 | ||
724 | mutex_lock(&ih->mutex); | 861 | mutex_lock(&ih->mutex); |
725 | watch = idr_find(&ih->idr, wd); | 862 | watch = idr_find(&ih->idr, wd); |
@@ -727,9 +864,12 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
727 | mutex_unlock(&ih->mutex); | 864 | mutex_unlock(&ih->mutex); |
728 | return -EINVAL; | 865 | return -EINVAL; |
729 | } | 866 | } |
730 | get_inotify_watch(watch); | 867 | sb = watch->inode->i_sb; |
868 | how = pin_to_kill(ih, watch); | ||
869 | if (!how) | ||
870 | return 0; | ||
871 | |||
731 | inode = watch->inode; | 872 | inode = watch->inode; |
732 | mutex_unlock(&ih->mutex); | ||
733 | 873 | ||
734 | mutex_lock(&inode->inotify_mutex); | 874 | mutex_lock(&inode->inotify_mutex); |
735 | mutex_lock(&ih->mutex); | 875 | mutex_lock(&ih->mutex); |
@@ -740,7 +880,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
740 | 880 | ||
741 | mutex_unlock(&ih->mutex); | 881 | mutex_unlock(&ih->mutex); |
742 | mutex_unlock(&inode->inotify_mutex); | 882 | mutex_unlock(&inode->inotify_mutex); |
743 | put_inotify_watch(watch); | 883 | unpin_and_kill(watch, how); |
744 | 884 | ||
745 | return 0; | 885 | return 0; |
746 | } | 886 | } |