aboutsummaryrefslogtreecommitdiffstats
path: root/fs/inotify.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/inotify.c')
-rw-r--r--fs/inotify.c152
1 files changed, 146 insertions, 6 deletions
diff --git a/fs/inotify.c b/fs/inotify.c
index 690e72595e6e..dae3f28f30d4 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -106,6 +106,20 @@ void get_inotify_watch(struct inotify_watch *watch)
106} 106}
107EXPORT_SYMBOL_GPL(get_inotify_watch); 107EXPORT_SYMBOL_GPL(get_inotify_watch);
108 108
109int pin_inotify_watch(struct inotify_watch *watch)
110{
111 struct super_block *sb = watch->inode->i_sb;
112 spin_lock(&sb_lock);
113 if (sb->s_count >= S_BIAS) {
114 atomic_inc(&sb->s_active);
115 spin_unlock(&sb_lock);
116 atomic_inc(&watch->count);
117 return 1;
118 }
119 spin_unlock(&sb_lock);
120 return 0;
121}
122
109/** 123/**
110 * put_inotify_watch - decrements the ref count on a given watch. cleans up 124 * put_inotify_watch - decrements the ref count on a given watch. cleans up
111 * watch references if the count reaches zero. inotify_watch is freed by 125 * watch references if the count reaches zero. inotify_watch is freed by
@@ -124,6 +138,13 @@ void put_inotify_watch(struct inotify_watch *watch)
124} 138}
125EXPORT_SYMBOL_GPL(put_inotify_watch); 139EXPORT_SYMBOL_GPL(put_inotify_watch);
126 140
141void unpin_inotify_watch(struct inotify_watch *watch)
142{
143 struct super_block *sb = watch->inode->i_sb;
144 put_inotify_watch(watch);
145 deactivate_super(sb);
146}
147
127/* 148/*
128 * inotify_handle_get_wd - returns the next WD for use by the given handle 149 * inotify_handle_get_wd - returns the next WD for use by the given handle
129 * 150 *
@@ -407,11 +428,13 @@ void inotify_unmount_inodes(struct list_head *list)
407 watches = &inode->inotify_watches; 428 watches = &inode->inotify_watches;
408 list_for_each_entry_safe(watch, next_w, watches, i_list) { 429 list_for_each_entry_safe(watch, next_w, watches, i_list) {
409 struct inotify_handle *ih= watch->ih; 430 struct inotify_handle *ih= watch->ih;
431 get_inotify_watch(watch);
410 mutex_lock(&ih->mutex); 432 mutex_lock(&ih->mutex);
411 ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, 433 ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
412 NULL, NULL); 434 NULL, NULL);
413 inotify_remove_watch_locked(ih, watch); 435 inotify_remove_watch_locked(ih, watch);
414 mutex_unlock(&ih->mutex); 436 mutex_unlock(&ih->mutex);
437 put_inotify_watch(watch);
415 } 438 }
416 mutex_unlock(&inode->inotify_mutex); 439 mutex_unlock(&inode->inotify_mutex);
417 iput(inode); 440 iput(inode);
@@ -479,6 +502,112 @@ void inotify_init_watch(struct inotify_watch *watch)
479} 502}
480EXPORT_SYMBOL_GPL(inotify_init_watch); 503EXPORT_SYMBOL_GPL(inotify_init_watch);
481 504
505/*
506 * Watch removals suck violently. To kick the watch out we need (in this
507 * order) inode->inotify_mutex and ih->mutex. That's fine if we have
508 * a hold on inode; however, for all other cases we need to make damn sure
509 * we don't race with umount. We can *NOT* just grab a reference to a
510 * watch - inotify_unmount_inodes() will happily sail past it and we'll end
511 * with reference to inode potentially outliving its superblock. Ideally
512 * we just want to grab an active reference to superblock if we can; that
513 * will make sure we won't go into inotify_umount_inodes() until we are
514 * done. Cleanup is just deactivate_super(). However, that leaves a messy
515 * case - what if we *are* racing with umount() and active references to
516 * superblock can't be acquired anymore? We can bump ->s_count, grab
517 * ->s_umount, which will almost certainly wait until the superblock is shut
518 * down and the watch in question is pining for fjords. That's fine, but
519 * there is a problem - we might have hit the window between ->s_active
520 * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock
521 * is past the point of no return and is heading for shutdown) and the
522 * moment when deactivate_super() acquires ->s_umount. We could just do
523 * drop_super() yield() and retry, but that's rather antisocial and this
524 * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having
525 * found that we'd got there first (i.e. that ->s_root is non-NULL) we know
526 * that we won't race with inotify_umount_inodes(). So we could grab a
527 * reference to watch and do the rest as above, just with drop_super() instead
528 * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we
529 * could grab ->s_umount. So the watch could've been gone already.
530 *
531 * That still can be dealt with - we need to save watch->wd, do idr_find()
532 * and compare its result with our pointer. If they match, we either have
533 * the damn thing still alive or we'd lost not one but two races at once,
534 * the watch had been killed and a new one got created with the same ->wd
535 * at the same address. That couldn't have happened in inotify_destroy(),
536 * but inotify_rm_wd() could run into that. Still, "new one got created"
537 * is not a problem - we have every right to kill it or leave it alone,
538 * whatever's more convenient.
539 *
540 * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as
541 * "grab it and kill it" check. If it's been our original watch, we are
542 * fine, if it's a newcomer - nevermind, just pretend that we'd won the
543 * race and kill the fscker anyway; we are safe since we know that its
544 * superblock won't be going away.
545 *
546 * And yes, this is far beyond mere "not very pretty"; so's the entire
547 * concept of inotify to start with.
548 */
549
550/**
551 * pin_to_kill - pin the watch down for removal
552 * @ih: inotify handle
553 * @watch: watch to kill
554 *
555 * Called with ih->mutex held, drops it. Possible return values:
556 * 0 - nothing to do, it has died
557 * 1 - remove it, drop the reference and deactivate_super()
558 * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid
559 * that variant, since it involved a lot of PITA, but that's the best that
560 * could've been done.
561 */
562static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
563{
564 struct super_block *sb = watch->inode->i_sb;
565 s32 wd = watch->wd;
566
567 spin_lock(&sb_lock);
568 if (sb->s_count >= S_BIAS) {
569 atomic_inc(&sb->s_active);
570 spin_unlock(&sb_lock);
571 get_inotify_watch(watch);
572 mutex_unlock(&ih->mutex);
573 return 1; /* the best outcome */
574 }
575 sb->s_count++;
576 spin_unlock(&sb_lock);
577 mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
578 down_read(&sb->s_umount);
579 if (likely(!sb->s_root)) {
580 /* fs is already shut down; the watch is dead */
581 drop_super(sb);
582 return 0;
583 }
584 /* raced with the final deactivate_super() */
585 mutex_lock(&ih->mutex);
586 if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) {
587 /* the watch is dead */
588 mutex_unlock(&ih->mutex);
589 drop_super(sb);
590 return 0;
591 }
592 /* still alive or freed and reused with the same sb and wd; kill */
593 get_inotify_watch(watch);
594 mutex_unlock(&ih->mutex);
595 return 2;
596}
597
598static void unpin_and_kill(struct inotify_watch *watch, int how)
599{
600 struct super_block *sb = watch->inode->i_sb;
601 put_inotify_watch(watch);
602 switch (how) {
603 case 1:
604 deactivate_super(sb);
605 break;
606 case 2:
607 drop_super(sb);
608 }
609}
610
482/** 611/**
483 * inotify_destroy - clean up and destroy an inotify instance 612 * inotify_destroy - clean up and destroy an inotify instance
484 * @ih: inotify handle 613 * @ih: inotify handle
@@ -490,11 +619,15 @@ void inotify_destroy(struct inotify_handle *ih)
490 * pretty. We cannot do a simple iteration over the list, because we 619 * pretty. We cannot do a simple iteration over the list, because we
491 * do not know the inode until we iterate to the watch. But we need to 620 * do not know the inode until we iterate to the watch. But we need to
492 * hold inode->inotify_mutex before ih->mutex. The following works. 621 * hold inode->inotify_mutex before ih->mutex. The following works.
622 *
623 * AV: it had to become even uglier to start working ;-/
493 */ 624 */
494 while (1) { 625 while (1) {
495 struct inotify_watch *watch; 626 struct inotify_watch *watch;
496 struct list_head *watches; 627 struct list_head *watches;
628 struct super_block *sb;
497 struct inode *inode; 629 struct inode *inode;
630 int how;
498 631
499 mutex_lock(&ih->mutex); 632 mutex_lock(&ih->mutex);
500 watches = &ih->watches; 633 watches = &ih->watches;
@@ -503,8 +636,10 @@ void inotify_destroy(struct inotify_handle *ih)
503 break; 636 break;
504 } 637 }
505 watch = list_first_entry(watches, struct inotify_watch, h_list); 638 watch = list_first_entry(watches, struct inotify_watch, h_list);
506 get_inotify_watch(watch); 639 sb = watch->inode->i_sb;
507 mutex_unlock(&ih->mutex); 640 how = pin_to_kill(ih, watch);
641 if (!how)
642 continue;
508 643
509 inode = watch->inode; 644 inode = watch->inode;
510 mutex_lock(&inode->inotify_mutex); 645 mutex_lock(&inode->inotify_mutex);
@@ -518,7 +653,7 @@ void inotify_destroy(struct inotify_handle *ih)
518 653
519 mutex_unlock(&ih->mutex); 654 mutex_unlock(&ih->mutex);
520 mutex_unlock(&inode->inotify_mutex); 655 mutex_unlock(&inode->inotify_mutex);
521 put_inotify_watch(watch); 656 unpin_and_kill(watch, how);
522 } 657 }
523 658
524 /* free this handle: the put matching the get in inotify_init() */ 659 /* free this handle: the put matching the get in inotify_init() */
@@ -719,7 +854,9 @@ void inotify_evict_watch(struct inotify_watch *watch)
719int inotify_rm_wd(struct inotify_handle *ih, u32 wd) 854int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
720{ 855{
721 struct inotify_watch *watch; 856 struct inotify_watch *watch;
857 struct super_block *sb;
722 struct inode *inode; 858 struct inode *inode;
859 int how;
723 860
724 mutex_lock(&ih->mutex); 861 mutex_lock(&ih->mutex);
725 watch = idr_find(&ih->idr, wd); 862 watch = idr_find(&ih->idr, wd);
@@ -727,9 +864,12 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
727 mutex_unlock(&ih->mutex); 864 mutex_unlock(&ih->mutex);
728 return -EINVAL; 865 return -EINVAL;
729 } 866 }
730 get_inotify_watch(watch); 867 sb = watch->inode->i_sb;
868 how = pin_to_kill(ih, watch);
869 if (!how)
870 return 0;
871
731 inode = watch->inode; 872 inode = watch->inode;
732 mutex_unlock(&ih->mutex);
733 873
734 mutex_lock(&inode->inotify_mutex); 874 mutex_lock(&inode->inotify_mutex);
735 mutex_lock(&ih->mutex); 875 mutex_lock(&ih->mutex);
@@ -740,7 +880,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
740 880
741 mutex_unlock(&ih->mutex); 881 mutex_unlock(&ih->mutex);
742 mutex_unlock(&inode->inotify_mutex); 882 mutex_unlock(&inode->inotify_mutex);
743 put_inotify_watch(watch); 883 unpin_and_kill(watch, how);
744 884
745 return 0; 885 return 0;
746} 886}