aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/inotify.c150
1 files changed, 144 insertions, 6 deletions
diff --git a/fs/inotify.c b/fs/inotify.c
index 690e72595e6e..7bbed1b89825 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -106,6 +106,20 @@ void get_inotify_watch(struct inotify_watch *watch)
106} 106}
107EXPORT_SYMBOL_GPL(get_inotify_watch); 107EXPORT_SYMBOL_GPL(get_inotify_watch);
108 108
109int pin_inotify_watch(struct inotify_watch *watch)
110{
111 struct super_block *sb = watch->inode->i_sb;
112 spin_lock(&sb_lock);
113 if (sb->s_count >= S_BIAS) {
114 atomic_inc(&sb->s_active);
115 spin_unlock(&sb_lock);
116 atomic_inc(&watch->count);
117 return 1;
118 }
119 spin_unlock(&sb_lock);
120 return 0;
121}
122
109/** 123/**
110 * put_inotify_watch - decrements the ref count on a given watch. cleans up 124 * put_inotify_watch - decrements the ref count on a given watch. cleans up
111 * watch references if the count reaches zero. inotify_watch is freed by 125 * watch references if the count reaches zero. inotify_watch is freed by
@@ -124,6 +138,13 @@ void put_inotify_watch(struct inotify_watch *watch)
124} 138}
125EXPORT_SYMBOL_GPL(put_inotify_watch); 139EXPORT_SYMBOL_GPL(put_inotify_watch);
126 140
141void unpin_inotify_watch(struct inotify_watch *watch)
142{
143 struct super_block *sb = watch->inode->i_sb;
144 put_inotify_watch(watch);
145 deactivate_super(sb);
146}
147
127/* 148/*
128 * inotify_handle_get_wd - returns the next WD for use by the given handle 149 * inotify_handle_get_wd - returns the next WD for use by the given handle
129 * 150 *
@@ -479,6 +500,112 @@ void inotify_init_watch(struct inotify_watch *watch)
479} 500}
480EXPORT_SYMBOL_GPL(inotify_init_watch); 501EXPORT_SYMBOL_GPL(inotify_init_watch);
481 502
503/*
504 * Watch removals suck violently. To kick the watch out we need (in this
505 * order) inode->inotify_mutex and ih->mutex. That's fine if we have
506 * a hold on inode; however, for all other cases we need to make damn sure
507 * we don't race with umount. We can *NOT* just grab a reference to a
508 * watch - inotify_unmount_inodes() will happily sail past it and we'll end
509 * with reference to inode potentially outliving its superblock. Ideally
510 * we just want to grab an active reference to superblock if we can; that
511 * will make sure we won't go into inotify_umount_inodes() until we are
512 * done. Cleanup is just deactivate_super(). However, that leaves a messy
513 * case - what if we *are* racing with umount() and active references to
514 * superblock can't be acquired anymore? We can bump ->s_count, grab
515 * ->s_umount, which will almost certainly wait until the superblock is shut
516 * down and the watch in question is pining for fjords. That's fine, but
517 * there is a problem - we might have hit the window between ->s_active
518 * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock
519 * is past the point of no return and is heading for shutdown) and the
520 * moment when deactivate_super() acquires ->s_umount. We could just do
521 * drop_super() yield() and retry, but that's rather antisocial and this
522 * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having
523 * found that we'd got there first (i.e. that ->s_root is non-NULL) we know
524 * that we won't race with inotify_umount_inodes(). So we could grab a
525 * reference to watch and do the rest as above, just with drop_super() instead
526 * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we
527 * could grab ->s_umount. So the watch could've been gone already.
528 *
529 * That still can be dealt with - we need to save watch->wd, do idr_find()
530 * and compare its result with our pointer. If they match, we either have
531 * the damn thing still alive or we'd lost not one but two races at once,
532 * the watch had been killed and a new one got created with the same ->wd
533 * at the same address. That couldn't have happened in inotify_destroy(),
534 * but inotify_rm_wd() could run into that. Still, "new one got created"
535 * is not a problem - we have every right to kill it or leave it alone,
536 * whatever's more convenient.
537 *
538 * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as
539 * "grab it and kill it" check. If it's been our original watch, we are
540 * fine, if it's a newcomer - nevermind, just pretend that we'd won the
541 * race and kill the fscker anyway; we are safe since we know that its
542 * superblock won't be going away.
543 *
544 * And yes, this is far beyond mere "not very pretty"; so's the entire
545 * concept of inotify to start with.
546 */
547
548/**
549 * pin_to_kill - pin the watch down for removal
550 * @ih: inotify handle
551 * @watch: watch to kill
552 *
553 * Called with ih->mutex held, drops it. Possible return values:
554 * 0 - nothing to do, it has died
555 * 1 - remove it, drop the reference and deactivate_super()
556 * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid
557 * that variant, since it involved a lot of PITA, but that's the best that
558 * could've been done.
559 */
560static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
561{
562 struct super_block *sb = watch->inode->i_sb;
563 s32 wd = watch->wd;
564
565 spin_lock(&sb_lock);
566 if (sb->s_count >= S_BIAS) {
567 atomic_inc(&sb->s_active);
568 spin_unlock(&sb_lock);
569 get_inotify_watch(watch);
570 mutex_unlock(&ih->mutex);
571 return 1; /* the best outcome */
572 }
573 sb->s_count++;
574 spin_unlock(&sb_lock);
575 mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
576 down_read(&sb->s_umount);
577 if (likely(!sb->s_root)) {
578 /* fs is already shut down; the watch is dead */
579 drop_super(sb);
580 return 0;
581 }
582 /* raced with the final deactivate_super() */
583 mutex_lock(&ih->mutex);
584 if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) {
585 /* the watch is dead */
586 mutex_unlock(&ih->mutex);
587 drop_super(sb);
588 return 0;
589 }
590 /* still alive or freed and reused with the same sb and wd; kill */
591 get_inotify_watch(watch);
592 mutex_unlock(&ih->mutex);
593 return 2;
594}
595
596static void unpin_and_kill(struct inotify_watch *watch, int how)
597{
598 struct super_block *sb = watch->inode->i_sb;
599 put_inotify_watch(watch);
600 switch (how) {
601 case 1:
602 deactivate_super(sb);
603 break;
604 case 2:
605 drop_super(sb);
606 }
607}
608
482/** 609/**
483 * inotify_destroy - clean up and destroy an inotify instance 610 * inotify_destroy - clean up and destroy an inotify instance
484 * @ih: inotify handle 611 * @ih: inotify handle
@@ -490,11 +617,15 @@ void inotify_destroy(struct inotify_handle *ih)
490 * pretty. We cannot do a simple iteration over the list, because we 617 * pretty. We cannot do a simple iteration over the list, because we
491 * do not know the inode until we iterate to the watch. But we need to 618 * do not know the inode until we iterate to the watch. But we need to
492 * hold inode->inotify_mutex before ih->mutex. The following works. 619 * hold inode->inotify_mutex before ih->mutex. The following works.
620 *
621 * AV: it had to become even uglier to start working ;-/
493 */ 622 */
494 while (1) { 623 while (1) {
495 struct inotify_watch *watch; 624 struct inotify_watch *watch;
496 struct list_head *watches; 625 struct list_head *watches;
626 struct super_block *sb;
497 struct inode *inode; 627 struct inode *inode;
628 int how;
498 629
499 mutex_lock(&ih->mutex); 630 mutex_lock(&ih->mutex);
500 watches = &ih->watches; 631 watches = &ih->watches;
@@ -503,8 +634,10 @@ void inotify_destroy(struct inotify_handle *ih)
503 break; 634 break;
504 } 635 }
505 watch = list_first_entry(watches, struct inotify_watch, h_list); 636 watch = list_first_entry(watches, struct inotify_watch, h_list);
506 get_inotify_watch(watch); 637 sb = watch->inode->i_sb;
507 mutex_unlock(&ih->mutex); 638 how = pin_to_kill(ih, watch);
639 if (!how)
640 continue;
508 641
509 inode = watch->inode; 642 inode = watch->inode;
510 mutex_lock(&inode->inotify_mutex); 643 mutex_lock(&inode->inotify_mutex);
@@ -518,7 +651,7 @@ void inotify_destroy(struct inotify_handle *ih)
518 651
519 mutex_unlock(&ih->mutex); 652 mutex_unlock(&ih->mutex);
520 mutex_unlock(&inode->inotify_mutex); 653 mutex_unlock(&inode->inotify_mutex);
521 put_inotify_watch(watch); 654 unpin_and_kill(watch, how);
522 } 655 }
523 656
524 /* free this handle: the put matching the get in inotify_init() */ 657 /* free this handle: the put matching the get in inotify_init() */
@@ -719,7 +852,9 @@ void inotify_evict_watch(struct inotify_watch *watch)
719int inotify_rm_wd(struct inotify_handle *ih, u32 wd) 852int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
720{ 853{
721 struct inotify_watch *watch; 854 struct inotify_watch *watch;
855 struct super_block *sb;
722 struct inode *inode; 856 struct inode *inode;
857 int how;
723 858
724 mutex_lock(&ih->mutex); 859 mutex_lock(&ih->mutex);
725 watch = idr_find(&ih->idr, wd); 860 watch = idr_find(&ih->idr, wd);
@@ -727,9 +862,12 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
727 mutex_unlock(&ih->mutex); 862 mutex_unlock(&ih->mutex);
728 return -EINVAL; 863 return -EINVAL;
729 } 864 }
730 get_inotify_watch(watch); 865 sb = watch->inode->i_sb;
866 how = pin_to_kill(ih, watch);
867 if (!how)
868 return 0;
869
731 inode = watch->inode; 870 inode = watch->inode;
732 mutex_unlock(&ih->mutex);
733 871
734 mutex_lock(&inode->inotify_mutex); 872 mutex_lock(&inode->inotify_mutex);
735 mutex_lock(&ih->mutex); 873 mutex_lock(&ih->mutex);
@@ -740,7 +878,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
740 878
741 mutex_unlock(&ih->mutex); 879 mutex_unlock(&ih->mutex);
742 mutex_unlock(&inode->inotify_mutex); 880 mutex_unlock(&inode->inotify_mutex);
743 put_inotify_watch(watch); 881 unpin_and_kill(watch, how);
744 882
745 return 0; 883 return 0;
746} 884}