diff options
Diffstat (limited to 'fs/inotify.c')
| -rw-r--r-- | fs/inotify.c | 150 |
1 files changed, 144 insertions, 6 deletions
diff --git a/fs/inotify.c b/fs/inotify.c index 690e72595e6e..7bbed1b89825 100644 --- a/fs/inotify.c +++ b/fs/inotify.c | |||
| @@ -106,6 +106,20 @@ void get_inotify_watch(struct inotify_watch *watch) | |||
| 106 | } | 106 | } |
| 107 | EXPORT_SYMBOL_GPL(get_inotify_watch); | 107 | EXPORT_SYMBOL_GPL(get_inotify_watch); |
| 108 | 108 | ||
| 109 | int pin_inotify_watch(struct inotify_watch *watch) | ||
| 110 | { | ||
| 111 | struct super_block *sb = watch->inode->i_sb; | ||
| 112 | spin_lock(&sb_lock); | ||
| 113 | if (sb->s_count >= S_BIAS) { | ||
| 114 | atomic_inc(&sb->s_active); | ||
| 115 | spin_unlock(&sb_lock); | ||
| 116 | atomic_inc(&watch->count); | ||
| 117 | return 1; | ||
| 118 | } | ||
| 119 | spin_unlock(&sb_lock); | ||
| 120 | return 0; | ||
| 121 | } | ||
| 122 | |||
| 109 | /** | 123 | /** |
| 110 | * put_inotify_watch - decrements the ref count on a given watch. cleans up | 124 | * put_inotify_watch - decrements the ref count on a given watch. cleans up |
| 111 | * watch references if the count reaches zero. inotify_watch is freed by | 125 | * watch references if the count reaches zero. inotify_watch is freed by |
| @@ -124,6 +138,13 @@ void put_inotify_watch(struct inotify_watch *watch) | |||
| 124 | } | 138 | } |
| 125 | EXPORT_SYMBOL_GPL(put_inotify_watch); | 139 | EXPORT_SYMBOL_GPL(put_inotify_watch); |
| 126 | 140 | ||
| 141 | void unpin_inotify_watch(struct inotify_watch *watch) | ||
| 142 | { | ||
| 143 | struct super_block *sb = watch->inode->i_sb; | ||
| 144 | put_inotify_watch(watch); | ||
| 145 | deactivate_super(sb); | ||
| 146 | } | ||
| 147 | |||
| 127 | /* | 148 | /* |
| 128 | * inotify_handle_get_wd - returns the next WD for use by the given handle | 149 | * inotify_handle_get_wd - returns the next WD for use by the given handle |
| 129 | * | 150 | * |
| @@ -479,6 +500,112 @@ void inotify_init_watch(struct inotify_watch *watch) | |||
| 479 | } | 500 | } |
| 480 | EXPORT_SYMBOL_GPL(inotify_init_watch); | 501 | EXPORT_SYMBOL_GPL(inotify_init_watch); |
| 481 | 502 | ||
| 503 | /* | ||
| 504 | * Watch removals suck violently. To kick the watch out we need (in this | ||
| 505 | * order) inode->inotify_mutex and ih->mutex. That's fine if we have | ||
| 506 | * a hold on inode; however, for all other cases we need to make damn sure | ||
| 507 | * we don't race with umount. We can *NOT* just grab a reference to a | ||
| 508 | * watch - inotify_unmount_inodes() will happily sail past it and we'll end | ||
| 509 | * with reference to inode potentially outliving its superblock. Ideally | ||
| 510 | * we just want to grab an active reference to superblock if we can; that | ||
| 511 | * will make sure we won't go into inotify_umount_inodes() until we are | ||
| 512 | * done. Cleanup is just deactivate_super(). However, that leaves a messy | ||
| 513 | * case - what if we *are* racing with umount() and active references to | ||
| 514 | * superblock can't be acquired anymore? We can bump ->s_count, grab | ||
| 515 | * ->s_umount, which will almost certainly wait until the superblock is shut | ||
| 516 | * down and the watch in question is pining for fjords. That's fine, but | ||
| 517 | * there is a problem - we might have hit the window between ->s_active | ||
| 518 | * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock | ||
| 519 | * is past the point of no return and is heading for shutdown) and the | ||
| 520 | * moment when deactivate_super() acquires ->s_umount. We could just do | ||
| 521 | * drop_super() yield() and retry, but that's rather antisocial and this | ||
| 522 | * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having | ||
| 523 | * found that we'd got there first (i.e. that ->s_root is non-NULL) we know | ||
| 524 | * that we won't race with inotify_umount_inodes(). So we could grab a | ||
| 525 | * reference to watch and do the rest as above, just with drop_super() instead | ||
| 526 | * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we | ||
| 527 | * could grab ->s_umount. So the watch could've been gone already. | ||
| 528 | * | ||
| 529 | * That still can be dealt with - we need to save watch->wd, do idr_find() | ||
| 530 | * and compare its result with our pointer. If they match, we either have | ||
| 531 | * the damn thing still alive or we'd lost not one but two races at once, | ||
| 532 | * the watch had been killed and a new one got created with the same ->wd | ||
| 533 | * at the same address. That couldn't have happened in inotify_destroy(), | ||
| 534 | * but inotify_rm_wd() could run into that. Still, "new one got created" | ||
| 535 | * is not a problem - we have every right to kill it or leave it alone, | ||
| 536 | * whatever's more convenient. | ||
| 537 | * | ||
| 538 | * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as | ||
| 539 | * "grab it and kill it" check. If it's been our original watch, we are | ||
| 540 | * fine, if it's a newcomer - nevermind, just pretend that we'd won the | ||
| 541 | * race and kill the fscker anyway; we are safe since we know that its | ||
| 542 | * superblock won't be going away. | ||
| 543 | * | ||
| 544 | * And yes, this is far beyond mere "not very pretty"; so's the entire | ||
| 545 | * concept of inotify to start with. | ||
| 546 | */ | ||
| 547 | |||
| 548 | /** | ||
| 549 | * pin_to_kill - pin the watch down for removal | ||
| 550 | * @ih: inotify handle | ||
| 551 | * @watch: watch to kill | ||
| 552 | * | ||
| 553 | * Called with ih->mutex held, drops it. Possible return values: | ||
| 554 | * 0 - nothing to do, it has died | ||
| 555 | * 1 - remove it, drop the reference and deactivate_super() | ||
| 556 | * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid | ||
| 557 | * that variant, since it involved a lot of PITA, but that's the best that | ||
| 558 | * could've been done. | ||
| 559 | */ | ||
| 560 | static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch) | ||
| 561 | { | ||
| 562 | struct super_block *sb = watch->inode->i_sb; | ||
| 563 | s32 wd = watch->wd; | ||
| 564 | |||
| 565 | spin_lock(&sb_lock); | ||
| 566 | if (sb->s_count >= S_BIAS) { | ||
| 567 | atomic_inc(&sb->s_active); | ||
| 568 | spin_unlock(&sb_lock); | ||
| 569 | get_inotify_watch(watch); | ||
| 570 | mutex_unlock(&ih->mutex); | ||
| 571 | return 1; /* the best outcome */ | ||
| 572 | } | ||
| 573 | sb->s_count++; | ||
| 574 | spin_unlock(&sb_lock); | ||
| 575 | mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */ | ||
| 576 | down_read(&sb->s_umount); | ||
| 577 | if (likely(!sb->s_root)) { | ||
| 578 | /* fs is already shut down; the watch is dead */ | ||
| 579 | drop_super(sb); | ||
| 580 | return 0; | ||
| 581 | } | ||
| 582 | /* raced with the final deactivate_super() */ | ||
| 583 | mutex_lock(&ih->mutex); | ||
| 584 | if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) { | ||
| 585 | /* the watch is dead */ | ||
| 586 | mutex_unlock(&ih->mutex); | ||
| 587 | drop_super(sb); | ||
| 588 | return 0; | ||
| 589 | } | ||
| 590 | /* still alive or freed and reused with the same sb and wd; kill */ | ||
| 591 | get_inotify_watch(watch); | ||
| 592 | mutex_unlock(&ih->mutex); | ||
| 593 | return 2; | ||
| 594 | } | ||
| 595 | |||
| 596 | static void unpin_and_kill(struct inotify_watch *watch, int how) | ||
| 597 | { | ||
| 598 | struct super_block *sb = watch->inode->i_sb; | ||
| 599 | put_inotify_watch(watch); | ||
| 600 | switch (how) { | ||
| 601 | case 1: | ||
| 602 | deactivate_super(sb); | ||
| 603 | break; | ||
| 604 | case 2: | ||
| 605 | drop_super(sb); | ||
| 606 | } | ||
| 607 | } | ||
| 608 | |||
| 482 | /** | 609 | /** |
| 483 | * inotify_destroy - clean up and destroy an inotify instance | 610 | * inotify_destroy - clean up and destroy an inotify instance |
| 484 | * @ih: inotify handle | 611 | * @ih: inotify handle |
| @@ -490,11 +617,15 @@ void inotify_destroy(struct inotify_handle *ih) | |||
| 490 | * pretty. We cannot do a simple iteration over the list, because we | 617 | * pretty. We cannot do a simple iteration over the list, because we |
| 491 | * do not know the inode until we iterate to the watch. But we need to | 618 | * do not know the inode until we iterate to the watch. But we need to |
| 492 | * hold inode->inotify_mutex before ih->mutex. The following works. | 619 | * hold inode->inotify_mutex before ih->mutex. The following works. |
| 620 | * | ||
| 621 | * AV: it had to become even uglier to start working ;-/ | ||
| 493 | */ | 622 | */ |
| 494 | while (1) { | 623 | while (1) { |
| 495 | struct inotify_watch *watch; | 624 | struct inotify_watch *watch; |
| 496 | struct list_head *watches; | 625 | struct list_head *watches; |
| 626 | struct super_block *sb; | ||
| 497 | struct inode *inode; | 627 | struct inode *inode; |
| 628 | int how; | ||
| 498 | 629 | ||
| 499 | mutex_lock(&ih->mutex); | 630 | mutex_lock(&ih->mutex); |
| 500 | watches = &ih->watches; | 631 | watches = &ih->watches; |
| @@ -503,8 +634,10 @@ void inotify_destroy(struct inotify_handle *ih) | |||
| 503 | break; | 634 | break; |
| 504 | } | 635 | } |
| 505 | watch = list_first_entry(watches, struct inotify_watch, h_list); | 636 | watch = list_first_entry(watches, struct inotify_watch, h_list); |
| 506 | get_inotify_watch(watch); | 637 | sb = watch->inode->i_sb; |
| 507 | mutex_unlock(&ih->mutex); | 638 | how = pin_to_kill(ih, watch); |
| 639 | if (!how) | ||
| 640 | continue; | ||
| 508 | 641 | ||
| 509 | inode = watch->inode; | 642 | inode = watch->inode; |
| 510 | mutex_lock(&inode->inotify_mutex); | 643 | mutex_lock(&inode->inotify_mutex); |
| @@ -518,7 +651,7 @@ void inotify_destroy(struct inotify_handle *ih) | |||
| 518 | 651 | ||
| 519 | mutex_unlock(&ih->mutex); | 652 | mutex_unlock(&ih->mutex); |
| 520 | mutex_unlock(&inode->inotify_mutex); | 653 | mutex_unlock(&inode->inotify_mutex); |
| 521 | put_inotify_watch(watch); | 654 | unpin_and_kill(watch, how); |
| 522 | } | 655 | } |
| 523 | 656 | ||
| 524 | /* free this handle: the put matching the get in inotify_init() */ | 657 | /* free this handle: the put matching the get in inotify_init() */ |
| @@ -719,7 +852,9 @@ void inotify_evict_watch(struct inotify_watch *watch) | |||
| 719 | int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | 852 | int inotify_rm_wd(struct inotify_handle *ih, u32 wd) |
| 720 | { | 853 | { |
| 721 | struct inotify_watch *watch; | 854 | struct inotify_watch *watch; |
| 855 | struct super_block *sb; | ||
| 722 | struct inode *inode; | 856 | struct inode *inode; |
| 857 | int how; | ||
| 723 | 858 | ||
| 724 | mutex_lock(&ih->mutex); | 859 | mutex_lock(&ih->mutex); |
| 725 | watch = idr_find(&ih->idr, wd); | 860 | watch = idr_find(&ih->idr, wd); |
| @@ -727,9 +862,12 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
| 727 | mutex_unlock(&ih->mutex); | 862 | mutex_unlock(&ih->mutex); |
| 728 | return -EINVAL; | 863 | return -EINVAL; |
| 729 | } | 864 | } |
| 730 | get_inotify_watch(watch); | 865 | sb = watch->inode->i_sb; |
| 866 | how = pin_to_kill(ih, watch); | ||
| 867 | if (!how) | ||
| 868 | return 0; | ||
| 869 | |||
| 731 | inode = watch->inode; | 870 | inode = watch->inode; |
| 732 | mutex_unlock(&ih->mutex); | ||
| 733 | 871 | ||
| 734 | mutex_lock(&inode->inotify_mutex); | 872 | mutex_lock(&inode->inotify_mutex); |
| 735 | mutex_lock(&ih->mutex); | 873 | mutex_lock(&ih->mutex); |
| @@ -740,7 +878,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd) | |||
| 740 | 878 | ||
| 741 | mutex_unlock(&ih->mutex); | 879 | mutex_unlock(&ih->mutex); |
| 742 | mutex_unlock(&inode->inotify_mutex); | 880 | mutex_unlock(&inode->inotify_mutex); |
| 743 | put_inotify_watch(watch); | 881 | unpin_and_kill(watch, how); |
| 744 | 882 | ||
| 745 | return 0; | 883 | return 0; |
| 746 | } | 884 | } |
