aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2012-03-06 23:50:25 -0500
committerBen Myers <bpm@sgi.com>2012-03-15 14:16:42 -0400
commitf30d500f809eca67a21704347ab14bb35877b5ee (patch)
tree0e4c8267a9087f0c70994346443221d3efb6f321 /fs
parent8d2a5e6ee37f205b3d76c223d4f3f88ba1d06220 (diff)
xfs: fix inode lookup race
When we get concurrent lookups of the same inode that is not in the per-AG inode cache, there is a race condition that triggers warnings in unlock_new_inode() indicating that we are initialising an inode that isn't in a the correct state for a new inode. When we do an inode lookup via a file handle or a bulkstat, we don't serialise lookups at a higher level through the dentry cache (i.e. pathless lookup), and so we can get concurrent lookups of the same inode. The race condition is between the insertion of the inode into the cache in the case of a cache miss and a concurrently lookup: Thread 1 Thread 2 xfs_iget() xfs_iget_cache_miss() xfs_iread() lock radix tree radix_tree_insert() rcu_read_lock radix_tree_lookup lock inode flags XFS_INEW not set igrab() unlock inode flags rcu_read_unlock use uninitialised inode ..... lock inode flags set XFS_INEW unlock inode flags unlock radix tree xfs_setup_inode() inode flags = I_NEW unlock_new_inode() WARNING as inode flags != I_NEW This can lead to inode corruption, inode list corruption, etc, and is generally a bad thing to occur. Fix this by setting XFS_INEW before inserting the inode into the radix tree. This will ensure any concurrent lookup will find the new inode with XFS_INEW set and that forces the lookup to wait until the XFS_INEW flag is removed before allowing the lookup to succeed. cc: <stable@vger.kernel.org> # for 3.0.x, 3.2.x Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_iget.c18
1 files changed, 12 insertions, 6 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index af3f30a3d9c2..a98cb4524e6c 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -349,9 +349,20 @@ xfs_iget_cache_miss(
349 BUG(); 349 BUG();
350 } 350 }
351 351
352 spin_lock(&pag->pag_ici_lock); 352 /*
353 * These values must be set before inserting the inode into the radix
354 * tree as the moment it is inserted a concurrent lookup (allowed by the
355 * RCU locking mechanism) can find it and that lookup must see that this
356 * is an inode currently under construction (i.e. that XFS_INEW is set).
357 * The ip->i_flags_lock that protects the XFS_INEW flag forms the
358 * memory barrier that ensures this detection works correctly at lookup
359 * time.
360 */
361 ip->i_udquot = ip->i_gdquot = NULL;
362 xfs_iflags_set(ip, XFS_INEW);
353 363
354 /* insert the new inode */ 364 /* insert the new inode */
365 spin_lock(&pag->pag_ici_lock);
355 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 366 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
356 if (unlikely(error)) { 367 if (unlikely(error)) {
357 WARN_ON(error != -EEXIST); 368 WARN_ON(error != -EEXIST);
@@ -359,11 +370,6 @@ xfs_iget_cache_miss(
359 error = EAGAIN; 370 error = EAGAIN;
360 goto out_preload_end; 371 goto out_preload_end;
361 } 372 }
362
363 /* These values _must_ be set before releasing the radix tree lock! */
364 ip->i_udquot = ip->i_gdquot = NULL;
365 xfs_iflags_set(ip, XFS_INEW);
366
367 spin_unlock(&pag->pag_ici_lock); 373 spin_unlock(&pag->pag_ici_lock);
368 radix_tree_preload_end(); 374 radix_tree_preload_end();
369 375