diff options
author | Alex Elder <aelder@sgi.com> | 2011-01-10 22:35:55 -0500 |
---|---|---|
committer | Alex Elder <aelder@sgi.com> | 2011-01-10 22:35:55 -0500 |
commit | 92f1c008ae79e32b83c0607d184b194f302bb3ee (patch) | |
tree | 070980c581ca39a050a1b86a50fe4c52437cdba1 /fs/xfs/xfs_iget.c | |
parent | e54be894eae10eca9892e965cc9532f5d5a11767 (diff) | |
parent | d0eb2f38b250b7d6c993adf81b0e4ded0565497e (diff) |
Merge branch 'master' into for-linus-merged
This merge pulls the XFS master branch into the latest Linus master.
This results in a merge conflict whose best fix is not obvious.
I manually fixed the conflict, in "fs/xfs/xfs_iget.c".
Dave Chinner had done work that resulted in RCU freeing of inodes
separate from what Nick Piggin had done, and their results differed
slightly in xfs_inode_free(). The fix updates Nick's call_rcu()
with the use of VFS_I(), while incorporating needed updates to some
XFS inode fields implemented in Dave's series. Dave's RCU callback
function has also been removed.
Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_iget.c')
-rw-r--r-- | fs/xfs/xfs_iget.c | 79 |
1 files changed, 61 insertions, 18 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index d7de5a3f7867..cb9b6d1469f7 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -43,6 +43,17 @@ | |||
43 | 43 | ||
44 | 44 | ||
45 | /* | 45 | /* |
46 | * Define xfs inode iolock lockdep classes. We need to ensure that all active | ||
47 | * inodes are considered the same for lockdep purposes, including inodes that | ||
48 | * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to | ||
49 | * guarantee the locks are considered the same when there are multiple lock | ||
50 | * initialisation siteѕ. Also, define a reclaimable inode class so it is | ||
51 | * obvious in lockdep reports which class the report is against. | ||
52 | */ | ||
53 | static struct lock_class_key xfs_iolock_active; | ||
54 | struct lock_class_key xfs_iolock_reclaimable; | ||
55 | |||
56 | /* | ||
46 | * Allocate and initialise an xfs_inode. | 57 | * Allocate and initialise an xfs_inode. |
47 | */ | 58 | */ |
48 | STATIC struct xfs_inode * | 59 | STATIC struct xfs_inode * |
@@ -69,8 +80,11 @@ xfs_inode_alloc( | |||
69 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 80 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
70 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 81 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
71 | ASSERT(completion_done(&ip->i_flush)); | 82 | ASSERT(completion_done(&ip->i_flush)); |
83 | ASSERT(ip->i_ino == 0); | ||
72 | 84 | ||
73 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 85 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
86 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
87 | &xfs_iolock_active, "xfs_iolock_active"); | ||
74 | 88 | ||
75 | /* initialise the xfs inode */ | 89 | /* initialise the xfs inode */ |
76 | ip->i_ino = ino; | 90 | ip->i_ino = ino; |
@@ -85,9 +99,6 @@ xfs_inode_alloc( | |||
85 | ip->i_size = 0; | 99 | ip->i_size = 0; |
86 | ip->i_new_size = 0; | 100 | ip->i_new_size = 0; |
87 | 101 | ||
88 | /* prevent anyone from using this yet */ | ||
89 | VFS_I(ip)->i_state = I_NEW; | ||
90 | |||
91 | return ip; | 102 | return ip; |
92 | } | 103 | } |
93 | 104 | ||
@@ -145,7 +156,18 @@ xfs_inode_free( | |||
145 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 156 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
146 | ASSERT(completion_done(&ip->i_flush)); | 157 | ASSERT(completion_done(&ip->i_flush)); |
147 | 158 | ||
148 | call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback); | 159 | /* |
160 | * Because we use RCU freeing we need to ensure the inode always | ||
161 | * appears to be reclaimed with an invalid inode number when in the | ||
162 | * free state. The ip->i_flags_lock provides the barrier against lookup | ||
163 | * races. | ||
164 | */ | ||
165 | spin_lock(&ip->i_flags_lock); | ||
166 | ip->i_flags = XFS_IRECLAIM; | ||
167 | ip->i_ino = 0; | ||
168 | spin_unlock(&ip->i_flags_lock); | ||
169 | |||
170 | call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); | ||
149 | } | 171 | } |
150 | 172 | ||
151 | /* | 173 | /* |
@@ -155,14 +177,29 @@ static int | |||
155 | xfs_iget_cache_hit( | 177 | xfs_iget_cache_hit( |
156 | struct xfs_perag *pag, | 178 | struct xfs_perag *pag, |
157 | struct xfs_inode *ip, | 179 | struct xfs_inode *ip, |
180 | xfs_ino_t ino, | ||
158 | int flags, | 181 | int flags, |
159 | int lock_flags) __releases(pag->pag_ici_lock) | 182 | int lock_flags) __releases(RCU) |
160 | { | 183 | { |
161 | struct inode *inode = VFS_I(ip); | 184 | struct inode *inode = VFS_I(ip); |
162 | struct xfs_mount *mp = ip->i_mount; | 185 | struct xfs_mount *mp = ip->i_mount; |
163 | int error; | 186 | int error; |
164 | 187 | ||
188 | /* | ||
189 | * check for re-use of an inode within an RCU grace period due to the | ||
190 | * radix tree nodes not being updated yet. We monitor for this by | ||
191 | * setting the inode number to zero before freeing the inode structure. | ||
192 | * If the inode has been reallocated and set up, then the inode number | ||
193 | * will not match, so check for that, too. | ||
194 | */ | ||
165 | spin_lock(&ip->i_flags_lock); | 195 | spin_lock(&ip->i_flags_lock); |
196 | if (ip->i_ino != ino) { | ||
197 | trace_xfs_iget_skip(ip); | ||
198 | XFS_STATS_INC(xs_ig_frecycle); | ||
199 | error = EAGAIN; | ||
200 | goto out_error; | ||
201 | } | ||
202 | |||
166 | 203 | ||
167 | /* | 204 | /* |
168 | * If we are racing with another cache hit that is currently | 205 | * If we are racing with another cache hit that is currently |
@@ -205,7 +242,7 @@ xfs_iget_cache_hit( | |||
205 | ip->i_flags |= XFS_IRECLAIM; | 242 | ip->i_flags |= XFS_IRECLAIM; |
206 | 243 | ||
207 | spin_unlock(&ip->i_flags_lock); | 244 | spin_unlock(&ip->i_flags_lock); |
208 | read_unlock(&pag->pag_ici_lock); | 245 | rcu_read_unlock(); |
209 | 246 | ||
210 | error = -inode_init_always(mp->m_super, inode); | 247 | error = -inode_init_always(mp->m_super, inode); |
211 | if (error) { | 248 | if (error) { |
@@ -213,7 +250,7 @@ xfs_iget_cache_hit( | |||
213 | * Re-initializing the inode failed, and we are in deep | 250 | * Re-initializing the inode failed, and we are in deep |
214 | * trouble. Try to re-add it to the reclaim list. | 251 | * trouble. Try to re-add it to the reclaim list. |
215 | */ | 252 | */ |
216 | read_lock(&pag->pag_ici_lock); | 253 | rcu_read_lock(); |
217 | spin_lock(&ip->i_flags_lock); | 254 | spin_lock(&ip->i_flags_lock); |
218 | 255 | ||
219 | ip->i_flags &= ~XFS_INEW; | 256 | ip->i_flags &= ~XFS_INEW; |
@@ -223,14 +260,20 @@ xfs_iget_cache_hit( | |||
223 | goto out_error; | 260 | goto out_error; |
224 | } | 261 | } |
225 | 262 | ||
226 | write_lock(&pag->pag_ici_lock); | 263 | spin_lock(&pag->pag_ici_lock); |
227 | spin_lock(&ip->i_flags_lock); | 264 | spin_lock(&ip->i_flags_lock); |
228 | ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); | 265 | ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM); |
229 | ip->i_flags |= XFS_INEW; | 266 | ip->i_flags |= XFS_INEW; |
230 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 267 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); |
231 | inode->i_state = I_NEW; | 268 | inode->i_state = I_NEW; |
269 | |||
270 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | ||
271 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | ||
272 | lockdep_set_class_and_name(&ip->i_iolock.mr_lock, | ||
273 | &xfs_iolock_active, "xfs_iolock_active"); | ||
274 | |||
232 | spin_unlock(&ip->i_flags_lock); | 275 | spin_unlock(&ip->i_flags_lock); |
233 | write_unlock(&pag->pag_ici_lock); | 276 | spin_unlock(&pag->pag_ici_lock); |
234 | } else { | 277 | } else { |
235 | /* If the VFS inode is being torn down, pause and try again. */ | 278 | /* If the VFS inode is being torn down, pause and try again. */ |
236 | if (!igrab(inode)) { | 279 | if (!igrab(inode)) { |
@@ -241,7 +284,7 @@ xfs_iget_cache_hit( | |||
241 | 284 | ||
242 | /* We've got a live one. */ | 285 | /* We've got a live one. */ |
243 | spin_unlock(&ip->i_flags_lock); | 286 | spin_unlock(&ip->i_flags_lock); |
244 | read_unlock(&pag->pag_ici_lock); | 287 | rcu_read_unlock(); |
245 | trace_xfs_iget_hit(ip); | 288 | trace_xfs_iget_hit(ip); |
246 | } | 289 | } |
247 | 290 | ||
@@ -255,7 +298,7 @@ xfs_iget_cache_hit( | |||
255 | 298 | ||
256 | out_error: | 299 | out_error: |
257 | spin_unlock(&ip->i_flags_lock); | 300 | spin_unlock(&ip->i_flags_lock); |
258 | read_unlock(&pag->pag_ici_lock); | 301 | rcu_read_unlock(); |
259 | return error; | 302 | return error; |
260 | } | 303 | } |
261 | 304 | ||
@@ -308,7 +351,7 @@ xfs_iget_cache_miss( | |||
308 | BUG(); | 351 | BUG(); |
309 | } | 352 | } |
310 | 353 | ||
311 | write_lock(&pag->pag_ici_lock); | 354 | spin_lock(&pag->pag_ici_lock); |
312 | 355 | ||
313 | /* insert the new inode */ | 356 | /* insert the new inode */ |
314 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); | 357 | error = radix_tree_insert(&pag->pag_ici_root, agino, ip); |
@@ -323,14 +366,14 @@ xfs_iget_cache_miss( | |||
323 | ip->i_udquot = ip->i_gdquot = NULL; | 366 | ip->i_udquot = ip->i_gdquot = NULL; |
324 | xfs_iflags_set(ip, XFS_INEW); | 367 | xfs_iflags_set(ip, XFS_INEW); |
325 | 368 | ||
326 | write_unlock(&pag->pag_ici_lock); | 369 | spin_unlock(&pag->pag_ici_lock); |
327 | radix_tree_preload_end(); | 370 | radix_tree_preload_end(); |
328 | 371 | ||
329 | *ipp = ip; | 372 | *ipp = ip; |
330 | return 0; | 373 | return 0; |
331 | 374 | ||
332 | out_preload_end: | 375 | out_preload_end: |
333 | write_unlock(&pag->pag_ici_lock); | 376 | spin_unlock(&pag->pag_ici_lock); |
334 | radix_tree_preload_end(); | 377 | radix_tree_preload_end(); |
335 | if (lock_flags) | 378 | if (lock_flags) |
336 | xfs_iunlock(ip, lock_flags); | 379 | xfs_iunlock(ip, lock_flags); |
@@ -377,7 +420,7 @@ xfs_iget( | |||
377 | xfs_agino_t agino; | 420 | xfs_agino_t agino; |
378 | 421 | ||
379 | /* reject inode numbers outside existing AGs */ | 422 | /* reject inode numbers outside existing AGs */ |
380 | if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) | 423 | if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) |
381 | return EINVAL; | 424 | return EINVAL; |
382 | 425 | ||
383 | /* get the perag structure and ensure that it's inode capable */ | 426 | /* get the perag structure and ensure that it's inode capable */ |
@@ -386,15 +429,15 @@ xfs_iget( | |||
386 | 429 | ||
387 | again: | 430 | again: |
388 | error = 0; | 431 | error = 0; |
389 | read_lock(&pag->pag_ici_lock); | 432 | rcu_read_lock(); |
390 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); | 433 | ip = radix_tree_lookup(&pag->pag_ici_root, agino); |
391 | 434 | ||
392 | if (ip) { | 435 | if (ip) { |
393 | error = xfs_iget_cache_hit(pag, ip, flags, lock_flags); | 436 | error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags); |
394 | if (error) | 437 | if (error) |
395 | goto out_error_or_again; | 438 | goto out_error_or_again; |
396 | } else { | 439 | } else { |
397 | read_unlock(&pag->pag_ici_lock); | 440 | rcu_read_unlock(); |
398 | XFS_STATS_INC(xs_ig_missed); | 441 | XFS_STATS_INC(xs_ig_missed); |
399 | 442 | ||
400 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, | 443 | error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, |