diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 84 | ||||
-rw-r--r-- | fs/xfs/xfs_iget.c | 47 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 52 |
3 files changed, 141 insertions, 42 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index afb0d7cfad1c..fd38682da851 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -53,14 +53,30 @@ xfs_inode_ag_walk_grab( | |||
53 | { | 53 | { |
54 | struct inode *inode = VFS_I(ip); | 54 | struct inode *inode = VFS_I(ip); |
55 | 55 | ||
56 | ASSERT(rcu_read_lock_held()); | ||
57 | |||
58 | /* | ||
59 | * check for stale RCU freed inode | ||
60 | * | ||
61 | * If the inode has been reallocated, it doesn't matter if it's not in | ||
62 | * the AG we are walking - we are walking for writeback, so if it | ||
63 | * passes all the "valid inode" checks and is dirty, then we'll write | ||
64 | * it back anyway. If it has been reallocated and still being | ||
65 | * initialised, the XFS_INEW check below will catch it. | ||
66 | */ | ||
67 | spin_lock(&ip->i_flags_lock); | ||
68 | if (!ip->i_ino) | ||
69 | goto out_unlock_noent; | ||
70 | |||
71 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
72 | if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
73 | goto out_unlock_noent; | ||
74 | spin_unlock(&ip->i_flags_lock); | ||
75 | |||
56 | /* nothing to sync during shutdown */ | 76 | /* nothing to sync during shutdown */ |
57 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 77 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
58 | return EFSCORRUPTED; | 78 | return EFSCORRUPTED; |
59 | 79 | ||
60 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
61 | if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
62 | return ENOENT; | ||
63 | |||
64 | /* If we can't grab the inode, it must on it's way to reclaim. */ | 80 | /* If we can't grab the inode, it must on it's way to reclaim. */ |
65 | if (!igrab(inode)) | 81 | if (!igrab(inode)) |
66 | return ENOENT; | 82 | return ENOENT; |
@@ -72,6 +88,10 @@ xfs_inode_ag_walk_grab( | |||
72 | 88 | ||
73 | /* inode is valid */ | 89 | /* inode is valid */ |
74 | return 0; | 90 | return 0; |
91 | |||
92 | out_unlock_noent: | ||
93 | spin_unlock(&ip->i_flags_lock); | ||
94 | return ENOENT; | ||
75 | } | 95 | } |
76 | 96 | ||
77 | STATIC int | 97 | STATIC int |
@@ -98,12 +118,12 @@ restart: | |||
98 | int error = 0; | 118 | int error = 0; |
99 | int i; | 119 | int i; |
100 | 120 | ||
101 | read_lock(&pag->pag_ici_lock); | 121 | rcu_read_lock(); |
102 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, | 122 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, |
103 | (void **)batch, first_index, | 123 | (void **)batch, first_index, |
104 | XFS_LOOKUP_BATCH); | 124 | XFS_LOOKUP_BATCH); |
105 | if (!nr_found) { | 125 | if (!nr_found) { |
106 | read_unlock(&pag->pag_ici_lock); | 126 | rcu_read_unlock(); |
107 | break; | 127 | break; |
108 | } | 128 | } |
109 | 129 | ||
@@ -118,18 +138,26 @@ restart: | |||
118 | batch[i] = NULL; | 138 | batch[i] = NULL; |
119 | 139 | ||
120 | /* | 140 | /* |
121 | * Update the index for the next lookup. Catch overflows | 141 | * Update the index for the next lookup. Catch |
122 | * into the next AG range which can occur if we have inodes | 142 | * overflows into the next AG range which can occur if |
123 | * in the last block of the AG and we are currently | 143 | * we have inodes in the last block of the AG and we |
124 | * pointing to the last inode. | 144 | * are currently pointing to the last inode. |
145 | * | ||
146 | * Because we may see inodes that are from the wrong AG | ||
147 | * due to RCU freeing and reallocation, only update the | ||
148 | * index if it lies in this AG. It was a race that lead | ||
149 | * us to see this inode, so another lookup from the | ||
150 | * same index will not find it again. | ||
125 | */ | 151 | */ |
152 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) | ||
153 | continue; | ||
126 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | 154 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); |
127 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | 155 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) |
128 | done = 1; | 156 | done = 1; |
129 | } | 157 | } |
130 | 158 | ||
131 | /* unlock now we've grabbed the inodes. */ | 159 | /* unlock now we've grabbed the inodes. */ |
132 | read_unlock(&pag->pag_ici_lock); | 160 | rcu_read_unlock(); |
133 | 161 | ||
134 | for (i = 0; i < nr_found; i++) { | 162 | for (i = 0; i < nr_found; i++) { |
135 | if (!batch[i]) | 163 | if (!batch[i]) |
@@ -639,9 +667,14 @@ xfs_reclaim_inode_grab( | |||
639 | struct xfs_inode *ip, | 667 | struct xfs_inode *ip, |
640 | int flags) | 668 | int flags) |
641 | { | 669 | { |
670 | ASSERT(rcu_read_lock_held()); | ||
671 | |||
672 | /* quick check for stale RCU freed inode */ | ||
673 | if (!ip->i_ino) | ||
674 | return 1; | ||
642 | 675 | ||
643 | /* | 676 | /* |
644 | * do some unlocked checks first to avoid unnecceary lock traffic. | 677 | * do some unlocked checks first to avoid unnecessary lock traffic. |
645 | * The first is a flush lock check, the second is a already in reclaim | 678 | * The first is a flush lock check, the second is a already in reclaim |
646 | * check. Only do these checks if we are not going to block on locks. | 679 | * check. Only do these checks if we are not going to block on locks. |
647 | */ | 680 | */ |
@@ -654,11 +687,16 @@ xfs_reclaim_inode_grab( | |||
654 | * The radix tree lock here protects a thread in xfs_iget from racing | 687 | * The radix tree lock here protects a thread in xfs_iget from racing |
655 | * with us starting reclaim on the inode. Once we have the | 688 | * with us starting reclaim on the inode. Once we have the |
656 | * XFS_IRECLAIM flag set it will not touch us. | 689 | * XFS_IRECLAIM flag set it will not touch us. |
690 | * | ||
691 | * Due to RCU lookup, we may find inodes that have been freed and only | ||
692 | * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that | ||
693 | * aren't candidates for reclaim at all, so we must check the | ||
694 | * XFS_IRECLAIMABLE is set first before proceeding to reclaim. | ||
657 | */ | 695 | */ |
658 | spin_lock(&ip->i_flags_lock); | 696 | spin_lock(&ip->i_flags_lock); |
659 | ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | 697 | if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || |
660 | if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { | 698 | __xfs_iflags_test(ip, XFS_IRECLAIM)) { |
661 | /* ignore as it is already under reclaim */ | 699 | /* not a reclaim candidate. */ |
662 | spin_unlock(&ip->i_flags_lock); | 700 | spin_unlock(&ip->i_flags_lock); |
663 | return 1; | 701 | return 1; |
664 | } | 702 | } |
@@ -864,14 +902,14 @@ restart: | |||
864 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; | 902 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; |
865 | int i; | 903 | int i; |
866 | 904 | ||
867 | write_lock(&pag->pag_ici_lock); | 905 | rcu_read_lock(); |
868 | nr_found = radix_tree_gang_lookup_tag( | 906 | nr_found = radix_tree_gang_lookup_tag( |
869 | &pag->pag_ici_root, | 907 | &pag->pag_ici_root, |
870 | (void **)batch, first_index, | 908 | (void **)batch, first_index, |
871 | XFS_LOOKUP_BATCH, | 909 | XFS_LOOKUP_BATCH, |
872 | XFS_ICI_RECLAIM_TAG); | 910 | XFS_ICI_RECLAIM_TAG); |
873 | if (!nr_found) { | 911 | if (!nr_found) { |
874 | write_unlock(&pag->pag_ici_lock); | 912 | rcu_read_unlock(); |
875 | break; | 913 | break; |
876 | } | 914 | } |
877 | 915 | ||
@@ -891,14 +929,24 @@ restart: | |||
891 | * occur if we have inodes in the last block of | 929 | * occur if we have inodes in the last block of |
892 | * the AG and we are currently pointing to the | 930 | * the AG and we are currently pointing to the |
893 | * last inode. | 931 | * last inode. |
932 | * | ||
933 | * Because we may see inodes that are from the | ||
934 | * wrong AG due to RCU freeing and | ||
935 | * reallocation, only update the index if it | ||
936 | * lies in this AG. It was a race that lead us | ||
937 | * to see this inode, so another lookup from | ||
938 | * the same index will not find it again. | ||
894 | */ | 939 | */ |
940 | if (XFS_INO_TO_AGNO(mp, ip->i_ino) != | ||
941 | pag->pag_agno) | ||
942 | continue; | ||
895 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | 943 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); |
896 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | 944 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) |
897 | done = 1; | 945 | done = 1; |
898 | } | 946 | } |
899 | 947 | ||
900 | /* unlock now we've grabbed the inodes. */ | 948 | /* unlock now we've grabbed the inodes. */ |
901 | write_unlock(&pag->pag_ici_lock); | 949 | rcu_read_unlock(); |
902 | 950 | ||
903 | for (i = 0; i < nr_found; i++) { | 951 | for (i = 0; i < nr_found; i++) { |
904 | if (!batch[i]) | 952 | if (!batch[i]) |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 9fae47556604..04ed09b907b8 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -80,6 +80,7 @@ xfs_inode_alloc( | |||
80 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 80 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
81 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 81 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
82 | ASSERT(completion_done(&ip->i_flush)); | 82 | ASSERT(completion_done(&ip->i_flush)); |
83 | ASSERT(ip->i_ino == 0); | ||
83 | 84 | ||
84 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 85 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
85 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | 86 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, |
@@ -98,9 +99,6 @@ xfs_inode_alloc( | |||
98 | ip->i_size = 0; | 99 | ip->i_size = 0; |
99 | ip->i_new_size = 0; | 100 | ip->i_new_size = 0; |
100 | 101 | ||
101 | /* prevent anyone from using this yet */ | ||
102 | VFS_I(ip)->i_state = I_NEW; | ||
103 | |||
104 | return ip; | 102 | return ip; |
105 | } | 103 | } |
106 | 104 | ||
@@ -159,6 +157,16 @@ xfs_inode_free( | |||
159 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 157 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
160 | ASSERT(completion_done(&ip->i_flush)); | 158 | ASSERT(completion_done(&ip->i_flush)); |
161 | 159 | ||
160 | /* | ||
161 | * Because we use RCU freeing we need to ensure the inode always | ||
162 | * appears to be reclaimed with an invalid inode number when in the | ||
163 | * free state. The ip->i_flags_lock provides the barrier against lookup | ||
164 | * races. | ||
165 | */ | ||
166 | spin_lock(&ip->i_flags_lock); | ||
167 | ip->i_flags = XFS_IRECLAIM; | ||
168 | ip->i_ino = 0; | ||
169 | spin_unlock(&ip->i_flags_lock); | ||
162 | call_rcu((struct rcu_head *)&VFS_I(ip)->i_dentry, __xfs_inode_free); | 170 | call_rcu((struct rcu_head *)&VFS_I(ip)->i_dentry, __xfs_inode_free); |
163 | } | 171 | } |
164 | 172 | ||
@@ -169,14 +177,29 @@ static int | |||
169 | xfs_iget_cache_hit( | 177 | xfs_iget_cache_hit( |
170 | struct xfs_perag *pag, | 178 | struct xfs_perag *pag, |
171 | struct xfs_inode *ip, | 179 | struct xfs_inode *ip, |
180 | xfs_ino_t ino, | ||
172 | int flags, | 181 | int flags, |
173 | int lock_flags) __releases(pag->pag_ici_lock) | 182 | int lock_flags) __releases(RCU) |
174 | { | 183 | { |
175 | struct inode *inode = VFS_I(ip); | 184 | struct inode *inode = VFS_I(ip); |
176 | struct xfs_mount *mp = ip->i_mount; | 185 | struct xfs_mount *mp = ip->i_mount; |
177 | int error; | 186 | int error; |
178 | 187 | ||
188 | /* | ||
189 | * check for re-use of an inode within an RCU grace period due to the | ||
190 | * radix tree nodes not being updated yet. We monitor for this by | ||
191 | * setting the inode number to zero before freeing the inode structure. | ||
192 | * If the inode has been reallocated and set up, then the inode number | ||
193 | * will not match, so check for that, too. | ||
194 | */ | ||
179 | spin_lock(&ip->i_flags_lock); | 195 | spin_lock(&ip->i_flags_lock); |
196 | if (ip->i_ino != ino) { | ||
197 | trace_xfs_iget_skip(ip); | ||
198 | XFS_STATS_INC(xs_ig_frecycle); | ||
199 | error = EAGAIN; | ||
200 | goto out_error; | ||
201 | } | ||
202 | |||
180 | 203 | ||
181 | /* | 204 | /* |
182 | * If we are racing with another cache hit that is currently | 205 | * If we are racing with another cache hit that is currently |
@@ -219,7 +242,7 @@ xfs_iget_cache_hit( | |||
219 | ip->i_flags |= XFS_IRECLAIM; | 242 | ip->i_flags |= XFS_IRECLAIM; |
220 | 243 | ||
221 | spin_unlock(&ip->i_flags_lock); | 244 | spin_unlock(&ip->i_flags_lock); |
222 | read_unlock(&pag->pag_ici_lock); | 245 | rcu_read_unlock(); |
223 | 246 | ||
224 | error = -inode_init_always(mp->m_super, inode); | 247 | error = -inode_init_always(mp->m_super, inode); |
225 | if (error) { | 248 | if (error) { |
@@ -227,7 +250,7 @@ xfs_iget_cache_hit( | |||
227 | * Re-initializing the inode failed, and we are in deep | 250 | * Re-initializing the inode failed, and we are in deep |
228 | * trouble. Try to re-add it to the reclaim list. | 251 | * trouble. Try to re-add it to the reclaim list. |
229 | */ | 252 | */ |
230 | read_lock(&pag->pag_ici_lock); | 253 | rcu_read_lock(); |
231 | spin_lock(&ip->i_flags_lock); | 254 | spin_lock(&ip->i_flags_lock); |
232 | 255 | ||
233 | ip->i_flags &= ~XFS_INEW; | 256 | ip->i_flags &= ~XFS_INEW; |
@@ -261,7 +284,7 @@ xfs_iget_cache_hit( | |||
261 | 284 | ||
262 | /* We've got a live one. */ | 285 | /* We've got a live one. */ |
263 | spin_unlock(&ip->i_flags_lock); | 286 | spin_unlock(&ip->i_flags_lock); |
264 | read_unlock(&pag->pag_ici_lock); | 287 | rcu_read_unlock(); |
265 | trace_xfs_iget_hit(ip); | 288 | trace_xfs_iget_hit(ip); |
266 | } | 289 | } |
267 | 290 | ||
@@ -275,7 +298,7 @@ xfs_iget_cache_hit( | |||
275 | 298 | ||
276 | out_error: | 299 | out_error: |
277 | spin_unlock(&ip->i_flags_lock); | 300 | spin_unlock(&ip->i_flags_lock); |
278 | read_unlock(&pag->pag_ici_lock); | 301 | rcu_read_unlock(); |
279 | return error; | 302 | return error; |
280 | } | 303 | } |
281 | 304 | ||
@@ -397,7 +420,7 @@ xfs_iget( | |||
397 | xfs_agino_t agino; | 420 | xfs_agino_t agino; |
398 | 421 | ||
399 | /* reject inode numbers outside existing AGs */ | 422 | /* reject inode numbers outside existing AGs */ |
400 | if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) | 423 | if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) |
401 | return EINVAL; | 424 | return EINVAL; |
402 | 425 | ||
403 | /* get the perag structure and ensure that it's inode capable */ | 426 | /* get the perag structure and ensure that it's inode capable */ |
@@ -406,15 +429,15 @@ xfs_iget( | |||
406 | 429 | ||
407 | again: | 430 | again: |
408 | error = 0; | 431 | error = 0; |
409 | read_lock(&pag->pag_ici_lock); | 432 | rcu_read_lock(); |
410 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | 433 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); |
411 | 434 | ||
412 | if (ip) { | 435 | if (ip) { |
413 | error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); | 436 | error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); |
414 | if (error) | 437 | if (error) |
415 | goto out_error_or_again; | 438 | goto out_error_or_again; |
416 | } else { | 439 | } else { |
417 | read_unlock(&pag->pag_ici_lock); | 440 | rcu_read_unlock(); |
418 | XFS_STATS_INC(xs_ig_missed); | 441 | XFS_STATS_INC(xs_ig_missed); |
419 | 442 | ||
420 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, | 443 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 108c7a085f94..43ffd9079106 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -2000,17 +2000,33 @@ xfs_ifree_cluster( | |||
2000 | */ | 2000 | */ |
2001 | for (i = 0; i < ninodes; i++) { | 2001 | for (i = 0; i < ninodes; i++) { |
2002 | retry: | 2002 | retry: |
2003 | read_lock(&pag->pag_ici_lock); | 2003 | rcu_read_lock(); |
2004 | ip = radix_tree_lookup(&pag->pag_ici_root, | 2004 | ip = radix_tree_lookup(&pag->pag_ici_root, |
2005 | XFS_INO_TO_AGINO(mp, (inum + i))); | 2005 | XFS_INO_TO_AGINO(mp, (inum + i))); |
2006 | 2006 | ||
2007 | /* Inode not in memory or stale, nothing to do */ | 2007 | /* Inode not in memory, nothing to do */ |
2008 | if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) { | 2008 | if (!ip) { |
2009 | read_unlock(&pag->pag_ici_lock); | 2009 | rcu_read_unlock(); |
2010 | continue; | 2010 | continue; |
2011 | } | 2011 | } |
2012 | 2012 | ||
2013 | /* | 2013 | /* |
2014 | * because this is an RCU protected lookup, we could | ||
2015 | * find a recently freed or even reallocated inode | ||
2016 | * during the lookup. We need to check under the | ||
2017 | * i_flags_lock for a valid inode here. Skip it if it | ||
2018 | * is not valid, the wrong inode or stale. | ||
2019 | */ | ||
2020 | spin_lock(&ip->i_flags_lock); | ||
2021 | if (ip->i_ino != inum + i || | ||
2022 | __xfs_iflags_test(ip, XFS_ISTALE)) { | ||
2023 | spin_unlock(&ip->i_flags_lock); | ||
2024 | rcu_read_unlock(); | ||
2025 | continue; | ||
2026 | } | ||
2027 | spin_unlock(&ip->i_flags_lock); | ||
2028 | |||
2029 | /* | ||
2014 | * Don't try to lock/unlock the current inode, but we | 2030 | * Don't try to lock/unlock the current inode, but we |
2015 | * _cannot_ skip the other inodes that we did not find | 2031 | * _cannot_ skip the other inodes that we did not find |
2016 | * in the list attached to the buffer and are not | 2032 | * in the list attached to the buffer and are not |
@@ -2019,11 +2035,11 @@ retry: | |||
2019 | */ | 2035 | */ |
2020 | if (ip != free_ip && | 2036 | if (ip != free_ip && |
2021 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { | 2037 | !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { |
2022 | read_unlock(&pag->pag_ici_lock); | 2038 | rcu_read_unlock(); |
2023 | delay(1); | 2039 | delay(1); |
2024 | goto retry; | 2040 | goto retry; |
2025 | } | 2041 | } |
2026 | read_unlock(&pag->pag_ici_lock); | 2042 | rcu_read_unlock(); |
2027 | 2043 | ||
2028 | xfs_iflock(ip); | 2044 | xfs_iflock(ip); |
2029 | xfs_iflags_set(ip, XFS_ISTALE); | 2045 | xfs_iflags_set(ip, XFS_ISTALE); |
@@ -2629,7 +2645,7 @@ xfs_iflush_cluster( | |||
2629 | 2645 | ||
2630 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | 2646 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); |
2631 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; | 2647 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; |
2632 | read_lock(&pag->pag_ici_lock); | 2648 | rcu_read_lock(); |
2633 | /* really need a gang lookup range call here */ | 2649 | /* really need a gang lookup range call here */ |
2634 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, | 2650 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, |
2635 | first_index, inodes_per_cluster); | 2651 | first_index, inodes_per_cluster); |
@@ -2640,9 +2656,21 @@ xfs_iflush_cluster( | |||
2640 | iq = ilist[i]; | 2656 | iq = ilist[i]; |
2641 | if (iq == ip) | 2657 | if (iq == ip) |
2642 | continue; | 2658 | continue; |
2643 | /* if the inode lies outside this cluster, we're done. */ | 2659 | |
2644 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) | 2660 | /* |
2645 | break; | 2661 | * because this is an RCU protected lookup, we could find a |
2662 | * recently freed or even reallocated inode during the lookup. | ||
2663 | * We need to check under the i_flags_lock for a valid inode | ||
2664 | * here. Skip it if it is not valid or the wrong inode. | ||
2665 | */ | ||
2666 | spin_lock(&ip->i_flags_lock); | ||
2667 | if (!ip->i_ino || | ||
2668 | (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { | ||
2669 | spin_unlock(&ip->i_flags_lock); | ||
2670 | continue; | ||
2671 | } | ||
2672 | spin_unlock(&ip->i_flags_lock); | ||
2673 | |||
2646 | /* | 2674 | /* |
2647 | * Do an un-protected check to see if the inode is dirty and | 2675 | * Do an un-protected check to see if the inode is dirty and |
2648 | * is a candidate for flushing. These checks will be repeated | 2676 | * is a candidate for flushing. These checks will be repeated |
@@ -2692,7 +2720,7 @@ xfs_iflush_cluster( | |||
2692 | } | 2720 | } |
2693 | 2721 | ||
2694 | out_free: | 2722 | out_free: |
2695 | read_unlock(&pag->pag_ici_lock); | 2723 | rcu_read_unlock(); |
2696 | kmem_free(ilist); | 2724 | kmem_free(ilist); |
2697 | out_put: | 2725 | out_put: |
2698 | xfs_perag_put(pag); | 2726 | xfs_perag_put(pag); |
@@ -2704,7 +2732,7 @@ cluster_corrupt_out: | |||
2704 | * Corruption detected in the clustering loop. Invalidate the | 2732 | * Corruption detected in the clustering loop. Invalidate the |
2705 | * inode buffer and shut down the filesystem. | 2733 | * inode buffer and shut down the filesystem. |
2706 | */ | 2734 | */ |
2707 | read_unlock(&pag->pag_ici_lock); | 2735 | rcu_read_unlock(); |
2708 | /* | 2736 | /* |
2709 | * Clean up the buffer. If it was B_DELWRI, just release it -- | 2737 | * Clean up the buffer. If it was B_DELWRI, just release it -- |
2710 | * brelse can handle it with no problems. If not, shut down the | 2738 | * brelse can handle it with no problems. If not, shut down the |