diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_sync.c')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 92 |
1 files changed, 70 insertions, 22 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index afb0d7cfad1c..a02480de9759 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -53,14 +53,30 @@ xfs_inode_ag_walk_grab( | |||
53 | { | 53 | { |
54 | struct inode *inode = VFS_I(ip); | 54 | struct inode *inode = VFS_I(ip); |
55 | 55 | ||
56 | ASSERT(rcu_read_lock_held()); | ||
57 | |||
58 | /* | ||
59 | * check for stale RCU freed inode | ||
60 | * | ||
61 | * If the inode has been reallocated, it doesn't matter if it's not in | ||
62 | * the AG we are walking - we are walking for writeback, so if it | ||
63 | * passes all the "valid inode" checks and is dirty, then we'll write | ||
64 | * it back anyway. If it has been reallocated and still being | ||
65 | * initialised, the XFS_INEW check below will catch it. | ||
66 | */ | ||
67 | spin_lock(&ip->i_flags_lock); | ||
68 | if (!ip->i_ino) | ||
69 | goto out_unlock_noent; | ||
70 | |||
71 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
72 | if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
73 | goto out_unlock_noent; | ||
74 | spin_unlock(&ip->i_flags_lock); | ||
75 | |||
56 | /* nothing to sync during shutdown */ | 76 | /* nothing to sync during shutdown */ |
57 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 77 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
58 | return EFSCORRUPTED; | 78 | return EFSCORRUPTED; |
59 | 79 | ||
60 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
61 | if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
62 | return ENOENT; | ||
63 | |||
64 | /* If we can't grab the inode, it must on it's way to reclaim. */ | 80 | /* If we can't grab the inode, it must on it's way to reclaim. */ |
65 | if (!igrab(inode)) | 81 | if (!igrab(inode)) |
66 | return ENOENT; | 82 | return ENOENT; |
@@ -72,6 +88,10 @@ xfs_inode_ag_walk_grab( | |||
72 | 88 | ||
73 | /* inode is valid */ | 89 | /* inode is valid */ |
74 | return 0; | 90 | return 0; |
91 | |||
92 | out_unlock_noent: | ||
93 | spin_unlock(&ip->i_flags_lock); | ||
94 | return ENOENT; | ||
75 | } | 95 | } |
76 | 96 | ||
77 | STATIC int | 97 | STATIC int |
@@ -98,12 +118,12 @@ restart: | |||
98 | int error = 0; | 118 | int error = 0; |
99 | int i; | 119 | int i; |
100 | 120 | ||
101 | read_lock(&pag->pag_ici_lock); | 121 | rcu_read_lock(); |
102 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, | 122 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, |
103 | (void **)batch, first_index, | 123 | (void **)batch, first_index, |
104 | XFS_LOOKUP_BATCH); | 124 | XFS_LOOKUP_BATCH); |
105 | if (!nr_found) { | 125 | if (!nr_found) { |
106 | read_unlock(&pag->pag_ici_lock); | 126 | rcu_read_unlock(); |
107 | break; | 127 | break; |
108 | } | 128 | } |
109 | 129 | ||
@@ -118,18 +138,26 @@ restart: | |||
118 | batch[i] = NULL; | 138 | batch[i] = NULL; |
119 | 139 | ||
120 | /* | 140 | /* |
121 | * Update the index for the next lookup. Catch overflows | 141 | * Update the index for the next lookup. Catch |
122 | * into the next AG range which can occur if we have inodes | 142 | * overflows into the next AG range which can occur if |
123 | * in the last block of the AG and we are currently | 143 | * we have inodes in the last block of the AG and we |
124 | * pointing to the last inode. | 144 | * are currently pointing to the last inode. |
145 | * | ||
146 | * Because we may see inodes that are from the wrong AG | ||
147 | * due to RCU freeing and reallocation, only update the | ||
148 | * index if it lies in this AG. It was a race that lead | ||
149 | * us to see this inode, so another lookup from the | ||
150 | * same index will not find it again. | ||
125 | */ | 151 | */ |
152 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) | ||
153 | continue; | ||
126 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | 154 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); |
127 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | 155 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) |
128 | done = 1; | 156 | done = 1; |
129 | } | 157 | } |
130 | 158 | ||
131 | /* unlock now we've grabbed the inodes. */ | 159 | /* unlock now we've grabbed the inodes. */ |
132 | read_unlock(&pag->pag_ici_lock); | 160 | rcu_read_unlock(); |
133 | 161 | ||
134 | for (i = 0; i < nr_found; i++) { | 162 | for (i = 0; i < nr_found; i++) { |
135 | if (!batch[i]) | 163 | if (!batch[i]) |
@@ -592,12 +620,12 @@ xfs_inode_set_reclaim_tag( | |||
592 | struct xfs_perag *pag; | 620 | struct xfs_perag *pag; |
593 | 621 | ||
594 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); | 622 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); |
595 | write_lock(&pag->pag_ici_lock); | 623 | spin_lock(&pag->pag_ici_lock); |
596 | spin_lock(&ip->i_flags_lock); | 624 | spin_lock(&ip->i_flags_lock); |
597 | __xfs_inode_set_reclaim_tag(pag, ip); | 625 | __xfs_inode_set_reclaim_tag(pag, ip); |
598 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | 626 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); |
599 | spin_unlock(&ip->i_flags_lock); | 627 | spin_unlock(&ip->i_flags_lock); |
600 | write_unlock(&pag->pag_ici_lock); | 628 | spin_unlock(&pag->pag_ici_lock); |
601 | xfs_perag_put(pag); | 629 | xfs_perag_put(pag); |
602 | } | 630 | } |
603 | 631 | ||
@@ -639,9 +667,14 @@ xfs_reclaim_inode_grab( | |||
639 | struct xfs_inode *ip, | 667 | struct xfs_inode *ip, |
640 | int flags) | 668 | int flags) |
641 | { | 669 | { |
670 | ASSERT(rcu_read_lock_held()); | ||
671 | |||
672 | /* quick check for stale RCU freed inode */ | ||
673 | if (!ip->i_ino) | ||
674 | return 1; | ||
642 | 675 | ||
643 | /* | 676 | /* |
644 | * do some unlocked checks first to avoid unnecceary lock traffic. | 677 | * do some unlocked checks first to avoid unnecessary lock traffic. |
645 | * The first is a flush lock check, the second is a already in reclaim | 678 | * The first is a flush lock check, the second is a already in reclaim |
646 | * check. Only do these checks if we are not going to block on locks. | 679 | * check. Only do these checks if we are not going to block on locks. |
647 | */ | 680 | */ |
@@ -654,11 +687,16 @@ xfs_reclaim_inode_grab( | |||
654 | * The radix tree lock here protects a thread in xfs_iget from racing | 687 | * The radix tree lock here protects a thread in xfs_iget from racing |
655 | * with us starting reclaim on the inode. Once we have the | 688 | * with us starting reclaim on the inode. Once we have the |
656 | * XFS_IRECLAIM flag set it will not touch us. | 689 | * XFS_IRECLAIM flag set it will not touch us. |
690 | * | ||
691 | * Due to RCU lookup, we may find inodes that have been freed and only | ||
692 | * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that | ||
693 | * aren't candidates for reclaim at all, so we must check the | ||
694 | * XFS_IRECLAIMABLE is set first before proceeding to reclaim. | ||
657 | */ | 695 | */ |
658 | spin_lock(&ip->i_flags_lock); | 696 | spin_lock(&ip->i_flags_lock); |
659 | ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | 697 | if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || |
660 | if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { | 698 | __xfs_iflags_test(ip, XFS_IRECLAIM)) { |
661 | /* ignore as it is already under reclaim */ | 699 | /* not a reclaim candidate. */ |
662 | spin_unlock(&ip->i_flags_lock); | 700 | spin_unlock(&ip->i_flags_lock); |
663 | return 1; | 701 | return 1; |
664 | } | 702 | } |
@@ -795,12 +833,12 @@ reclaim: | |||
795 | * added to the tree assert that it's been there before to catch | 833 | * added to the tree assert that it's been there before to catch |
796 | * problems with the inode life time early on. | 834 | * problems with the inode life time early on. |
797 | */ | 835 | */ |
798 | write_lock(&pag->pag_ici_lock); | 836 | spin_lock(&pag->pag_ici_lock); |
799 | if (!radix_tree_delete(&pag->pag_ici_root, | 837 | if (!radix_tree_delete(&pag->pag_ici_root, |
800 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) | 838 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) |
801 | ASSERT(0); | 839 | ASSERT(0); |
802 | __xfs_inode_clear_reclaim(pag, ip); | 840 | __xfs_inode_clear_reclaim(pag, ip); |
803 | write_unlock(&pag->pag_ici_lock); | 841 | spin_unlock(&pag->pag_ici_lock); |
804 | 842 | ||
805 | /* | 843 | /* |
806 | * Here we do an (almost) spurious inode lock in order to coordinate | 844 | * Here we do an (almost) spurious inode lock in order to coordinate |
@@ -864,14 +902,14 @@ restart: | |||
864 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; | 902 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; |
865 | int i; | 903 | int i; |
866 | 904 | ||
867 | write_lock(&pag->pag_ici_lock); | 905 | rcu_read_lock(); |
868 | nr_found = radix_tree_gang_lookup_tag( | 906 | nr_found = radix_tree_gang_lookup_tag( |
869 | &pag->pag_ici_root, | 907 | &pag->pag_ici_root, |
870 | (void **)batch, first_index, | 908 | (void **)batch, first_index, |
871 | XFS_LOOKUP_BATCH, | 909 | XFS_LOOKUP_BATCH, |
872 | XFS_ICI_RECLAIM_TAG); | 910 | XFS_ICI_RECLAIM_TAG); |
873 | if (!nr_found) { | 911 | if (!nr_found) { |
874 | write_unlock(&pag->pag_ici_lock); | 912 | rcu_read_unlock(); |
875 | break; | 913 | break; |
876 | } | 914 | } |
877 | 915 | ||
@@ -891,14 +929,24 @@ restart: | |||
891 | * occur if we have inodes in the last block of | 929 | * occur if we have inodes in the last block of |
892 | * the AG and we are currently pointing to the | 930 | * the AG and we are currently pointing to the |
893 | * last inode. | 931 | * last inode. |
932 | * | ||
933 | * Because we may see inodes that are from the | ||
934 | * wrong AG due to RCU freeing and | ||
935 | * reallocation, only update the index if it | ||
936 | * lies in this AG. It was a race that lead us | ||
937 | * to see this inode, so another lookup from | ||
938 | * the same index will not find it again. | ||
894 | */ | 939 | */ |
940 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != | ||
941 | pag->pag_agno) | ||
942 | continue; | ||
895 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | 943 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); |
896 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | 944 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) |
897 | done = 1; | 945 | done = 1; |
898 | } | 946 | } |
899 | 947 | ||
900 | /* unlock now we've grabbed the inodes. */ | 948 | /* unlock now we've grabbed the inodes. */ |
901 | write_unlock(&pag->pag_ici_lock); | 949 | rcu_read_unlock(); |
902 | 950 | ||
903 | for (i = 0; i < nr_found; i++) { | 951 | for (i = 0; i < nr_found; i++) { |
904 | if (!batch[i]) | 952 | if (!batch[i]) |