aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_dir2_readdir.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2015-08-18 20:33:00 -0400
committerDave Chinner <david@fromorbit.com>2015-08-18 20:33:00 -0400
commitdbad7c993053d8f482a5f76270a93307537efd8e (patch)
tree9c393b25f4acd9c4ad4b3ee38239bc7290651625 /fs/xfs/xfs_dir2_readdir.c
parent0952c8183c1575a78dc416b5e168987ff98728bb (diff)
xfs: stop holding ILOCK over filldir callbacks
The recent change to the readdir locking made in 40194ec ("xfs: reinstate the ilock in xfs_readdir") for CXFS directory sanity was probably the wrong thing to do. Deep in the readdir code we can take page faults in the filldir callback, and so taking a page fault while holding an inode ilock creates a new set of locking issues that lockdep warns all over the place about. The locking order for regular inodes w.r.t. page faults is io_lock -> pagefault -> mmap_sem -> ilock. The directory readdir code now triggers ilock -> page fault -> mmap_sem. While we cannot deadlock at this point, it inverts all the locking patterns that lockdep normally sees on XFS inodes, and so triggers lockdep. We worked around this with commit 93a8614 ("xfs: fix directory inode iolock lockdep false positive"), but that then just moved the lockdep warning to deeper in the page fault path and triggered on security inode locks. Fixing the shmem issue there just moved the lockdep reports somewhere else, and now we are getting false positives from filesystem freezing annotations getting confused. Further, if we enter memory reclaim in a readdir path, we now get lockdep warning about potential deadlocks because the ilock is held when we enter reclaim. This, again, is different to a regular file in that we never allow memory reclaim to run while holding the ilock for regular files. Hence lockdep now throws ilock->kmalloc->reclaim->ilock warnings. Basically, the problem is that the ilock is being used to protect the directory data and the inode metadata, whereas for a regular file the iolock protects the data and the ilock protects the metadata. From the VFS perspective, the i_mutex serialises all accesses to the directory data, and so not holding the ilock for readdir doesn't matter. The issue is that CXFS doesn't access directory data via the VFS, so it has no "data serialisaton" mechanism. Hence we need to hold the IOLOCK in the correct places to provide this low level directory data access serialisation. The ilock can then be used just when the extent list needs to be read, just like we do for regular files. The directory modification code can take the iolock exclusive when the ilock is also taken, and this then ensures that readdir is correct excluded while modifications are in progress. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/xfs_dir2_readdir.c')
-rw-r--r--fs/xfs/xfs_dir2_readdir.c11
1 files changed, 8 insertions, 3 deletions
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 098cd78fe708..a989a9c7edb7 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -171,6 +171,7 @@ xfs_dir2_block_getdents(
171 int wantoff; /* starting block offset */ 171 int wantoff; /* starting block offset */
172 xfs_off_t cook; 172 xfs_off_t cook;
173 struct xfs_da_geometry *geo = args->geo; 173 struct xfs_da_geometry *geo = args->geo;
174 int lock_mode;
174 175
175 /* 176 /*
176 * If the block number in the offset is out of range, we're done. 177 * If the block number in the offset is out of range, we're done.
@@ -178,7 +179,9 @@ xfs_dir2_block_getdents(
178 if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk) 179 if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
179 return 0; 180 return 0;
180 181
182 lock_mode = xfs_ilock_data_map_shared(dp);
181 error = xfs_dir3_block_read(NULL, dp, &bp); 183 error = xfs_dir3_block_read(NULL, dp, &bp);
184 xfs_iunlock(dp, lock_mode);
182 if (error) 185 if (error)
183 return error; 186 return error;
184 187
@@ -529,9 +532,12 @@ xfs_dir2_leaf_getdents(
529 * current buffer, need to get another one. 532 * current buffer, need to get another one.
530 */ 533 */
531 if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { 534 if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) {
535 int lock_mode;
532 536
537 lock_mode = xfs_ilock_data_map_shared(dp);
533 error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, 538 error = xfs_dir2_leaf_readbuf(args, bufsize, map_info,
534 &curoff, &bp); 539 &curoff, &bp);
540 xfs_iunlock(dp, lock_mode);
535 if (error || !map_info->map_valid) 541 if (error || !map_info->map_valid)
536 break; 542 break;
537 543
@@ -653,7 +659,6 @@ xfs_readdir(
653 struct xfs_da_args args = { NULL }; 659 struct xfs_da_args args = { NULL };
654 int rval; 660 int rval;
655 int v; 661 int v;
656 uint lock_mode;
657 662
658 trace_xfs_readdir(dp); 663 trace_xfs_readdir(dp);
659 664
@@ -666,7 +671,7 @@ xfs_readdir(
666 args.dp = dp; 671 args.dp = dp;
667 args.geo = dp->i_mount->m_dir_geo; 672 args.geo = dp->i_mount->m_dir_geo;
668 673
669 lock_mode = xfs_ilock_data_map_shared(dp); 674 xfs_ilock(dp, XFS_IOLOCK_SHARED);
670 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 675 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
671 rval = xfs_dir2_sf_getdents(&args, ctx); 676 rval = xfs_dir2_sf_getdents(&args, ctx);
672 else if ((rval = xfs_dir2_isblock(&args, &v))) 677 else if ((rval = xfs_dir2_isblock(&args, &v)))
@@ -675,7 +680,7 @@ xfs_readdir(
675 rval = xfs_dir2_block_getdents(&args, ctx); 680 rval = xfs_dir2_block_getdents(&args, ctx);
676 else 681 else
677 rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize); 682 rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
678 xfs_iunlock(dp, lock_mode); 683 xfs_iunlock(dp, XFS_IOLOCK_SHARED);
679 684
680 return rval; 685 return rval;
681} 686}