summaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2015-08-18 20:33:00 -0400
committerDave Chinner <david@fromorbit.com>2015-08-18 20:33:00 -0400
commitdbad7c993053d8f482a5f76270a93307537efd8e (patch)
tree9c393b25f4acd9c4ad4b3ee38239bc7290651625 /fs/xfs
parent0952c8183c1575a78dc416b5e168987ff98728bb (diff)
xfs: stop holding ILOCK over filldir callbacks
The recent change to the readdir locking made in 40194ec ("xfs: reinstate the ilock in xfs_readdir") for CXFS directory sanity was probably the wrong thing to do. Deep in the readdir code we can take page faults in the filldir callback, and so taking a page fault while holding an inode ilock creates a new set of locking issues that lockdep warns all over the place about. The locking order for regular inodes w.r.t. page faults is io_lock -> pagefault -> mmap_sem -> ilock. The directory readdir code now triggers ilock -> page fault -> mmap_sem. While we cannot deadlock at this point, it inverts all the locking patterns that lockdep normally sees on XFS inodes, and so triggers lockdep. We worked around this with commit 93a8614 ("xfs: fix directory inode iolock lockdep false positive"), but that then just moved the lockdep warning to deeper in the page fault path and triggered on security inode locks. Fixing the shmem issue there just moved the lockdep reports somewhere else, and now we are getting false positives from filesystem freezing annotations getting confused. Further, if we enter memory reclaim in a readdir path, we now get lockdep warning about potential deadlocks because the ilock is held when we enter reclaim. This, again, is different to a regular file in that we never allow memory reclaim to run while holding the ilock for regular files. Hence lockdep now throws ilock->kmalloc->reclaim->ilock warnings. Basically, the problem is that the ilock is being used to protect the directory data and the inode metadata, whereas for a regular file the iolock protects the data and the ilock protects the metadata. From the VFS perspective, the i_mutex serialises all accesses to the directory data, and so not holding the ilock for readdir doesn't matter. The issue is that CXFS doesn't access directory data via the VFS, so it has no "data serialisaton" mechanism. Hence we need to hold the IOLOCK in the correct places to provide this low level directory data access serialisation. The ilock can then be used just when the extent list needs to be read, just like we do for regular files. The directory modification code can take the iolock exclusive when the ilock is also taken, and this then ensures that readdir is correct excluded while modifications are in progress. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c3
-rw-r--r--fs/xfs/xfs_dir2_readdir.c11
-rw-r--r--fs/xfs/xfs_inode.c34
-rw-r--r--fs/xfs/xfs_symlink.c7
4 files changed, 36 insertions, 19 deletions
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index a69fb3a1e161..42799d88ecc0 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -362,6 +362,7 @@ xfs_dir_lookup(
362 struct xfs_da_args *args; 362 struct xfs_da_args *args;
363 int rval; 363 int rval;
364 int v; /* type-checking value */ 364 int v; /* type-checking value */
365 int lock_mode;
365 366
366 ASSERT(S_ISDIR(dp->i_d.di_mode)); 367 ASSERT(S_ISDIR(dp->i_d.di_mode));
367 XFS_STATS_INC(xs_dir_lookup); 368 XFS_STATS_INC(xs_dir_lookup);
@@ -387,6 +388,7 @@ xfs_dir_lookup(
387 if (ci_name) 388 if (ci_name)
388 args->op_flags |= XFS_DA_OP_CILOOKUP; 389 args->op_flags |= XFS_DA_OP_CILOOKUP;
389 390
391 lock_mode = xfs_ilock_data_map_shared(dp);
390 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { 392 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
391 rval = xfs_dir2_sf_lookup(args); 393 rval = xfs_dir2_sf_lookup(args);
392 goto out_check_rval; 394 goto out_check_rval;
@@ -419,6 +421,7 @@ out_check_rval:
419 } 421 }
420 } 422 }
421out_free: 423out_free:
424 xfs_iunlock(dp, lock_mode);
422 kmem_free(args); 425 kmem_free(args);
423 return rval; 426 return rval;
424} 427}
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 098cd78fe708..a989a9c7edb7 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -171,6 +171,7 @@ xfs_dir2_block_getdents(
171 int wantoff; /* starting block offset */ 171 int wantoff; /* starting block offset */
172 xfs_off_t cook; 172 xfs_off_t cook;
173 struct xfs_da_geometry *geo = args->geo; 173 struct xfs_da_geometry *geo = args->geo;
174 int lock_mode;
174 175
175 /* 176 /*
176 * If the block number in the offset is out of range, we're done. 177 * If the block number in the offset is out of range, we're done.
@@ -178,7 +179,9 @@ xfs_dir2_block_getdents(
178 if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk) 179 if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
179 return 0; 180 return 0;
180 181
182 lock_mode = xfs_ilock_data_map_shared(dp);
181 error = xfs_dir3_block_read(NULL, dp, &bp); 183 error = xfs_dir3_block_read(NULL, dp, &bp);
184 xfs_iunlock(dp, lock_mode);
182 if (error) 185 if (error)
183 return error; 186 return error;
184 187
@@ -529,9 +532,12 @@ xfs_dir2_leaf_getdents(
529 * current buffer, need to get another one. 532 * current buffer, need to get another one.
530 */ 533 */
531 if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { 534 if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) {
535 int lock_mode;
532 536
537 lock_mode = xfs_ilock_data_map_shared(dp);
533 error = xfs_dir2_leaf_readbuf(args, bufsize, map_info, 538 error = xfs_dir2_leaf_readbuf(args, bufsize, map_info,
534 &curoff, &bp); 539 &curoff, &bp);
540 xfs_iunlock(dp, lock_mode);
535 if (error || !map_info->map_valid) 541 if (error || !map_info->map_valid)
536 break; 542 break;
537 543
@@ -653,7 +659,6 @@ xfs_readdir(
653 struct xfs_da_args args = { NULL }; 659 struct xfs_da_args args = { NULL };
654 int rval; 660 int rval;
655 int v; 661 int v;
656 uint lock_mode;
657 662
658 trace_xfs_readdir(dp); 663 trace_xfs_readdir(dp);
659 664
@@ -666,7 +671,7 @@ xfs_readdir(
666 args.dp = dp; 671 args.dp = dp;
667 args.geo = dp->i_mount->m_dir_geo; 672 args.geo = dp->i_mount->m_dir_geo;
668 673
669 lock_mode = xfs_ilock_data_map_shared(dp); 674 xfs_ilock(dp, XFS_IOLOCK_SHARED);
670 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 675 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
671 rval = xfs_dir2_sf_getdents(&args, ctx); 676 rval = xfs_dir2_sf_getdents(&args, ctx);
672 else if ((rval = xfs_dir2_isblock(&args, &v))) 677 else if ((rval = xfs_dir2_isblock(&args, &v)))
@@ -675,7 +680,7 @@ xfs_readdir(
675 rval = xfs_dir2_block_getdents(&args, ctx); 680 rval = xfs_dir2_block_getdents(&args, ctx);
676 else 681 else
677 rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize); 682 rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
678 xfs_iunlock(dp, lock_mode); 683 xfs_iunlock(dp, XFS_IOLOCK_SHARED);
679 684
680 return rval; 685 return rval;
681} 686}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 657f6123b445..65792660b043 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -663,30 +663,29 @@ xfs_lookup(
663{ 663{
664 xfs_ino_t inum; 664 xfs_ino_t inum;
665 int error; 665 int error;
666 uint lock_mode;
667 666
668 trace_xfs_lookup(dp, name); 667 trace_xfs_lookup(dp, name);
669 668
670 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 669 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
671 return -EIO; 670 return -EIO;
672 671
673 lock_mode = xfs_ilock_data_map_shared(dp); 672 xfs_ilock(dp, XFS_IOLOCK_SHARED);
674 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); 673 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
675 xfs_iunlock(dp, lock_mode);
676
677 if (error) 674 if (error)
678 goto out; 675 goto out_unlock;
679 676
680 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp); 677 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
681 if (error) 678 if (error)
682 goto out_free_name; 679 goto out_free_name;
683 680
681 xfs_iunlock(dp, XFS_IOLOCK_SHARED);
684 return 0; 682 return 0;
685 683
686out_free_name: 684out_free_name:
687 if (ci_name) 685 if (ci_name)
688 kmem_free(ci_name->name); 686 kmem_free(ci_name->name);
689out: 687out_unlock:
688 xfs_iunlock(dp, XFS_IOLOCK_SHARED);
690 *ipp = NULL; 689 *ipp = NULL;
691 return error; 690 return error;
692} 691}
@@ -1183,7 +1182,8 @@ xfs_create(
1183 goto out_trans_cancel; 1182 goto out_trans_cancel;
1184 1183
1185 1184
1186 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 1185 xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
1186 XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
1187 unlock_dp_on_error = true; 1187 unlock_dp_on_error = true;
1188 1188
1189 xfs_bmap_init(&free_list, &first_block); 1189 xfs_bmap_init(&free_list, &first_block);
@@ -1222,7 +1222,7 @@ xfs_create(
1222 * the transaction cancel unlocking dp so don't do it explicitly in the 1222 * the transaction cancel unlocking dp so don't do it explicitly in the
1223 * error path. 1223 * error path.
1224 */ 1224 */
1225 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 1225 xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1226 unlock_dp_on_error = false; 1226 unlock_dp_on_error = false;
1227 1227
1228 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 1228 error = xfs_dir_createname(tp, dp, name, ip->i_ino,
@@ -1295,7 +1295,7 @@ xfs_create(
1295 xfs_qm_dqrele(pdqp); 1295 xfs_qm_dqrele(pdqp);
1296 1296
1297 if (unlock_dp_on_error) 1297 if (unlock_dp_on_error)
1298 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1298 xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1299 return error; 1299 return error;
1300} 1300}
1301 1301
@@ -1443,10 +1443,11 @@ xfs_link(
1443 if (error) 1443 if (error)
1444 goto error_return; 1444 goto error_return;
1445 1445
1446 xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
1446 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1447 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
1447 1448
1448 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 1449 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
1449 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); 1450 xfs_trans_ijoin(tp, tdp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1450 1451
1451 /* 1452 /*
1452 * If we are using project inheritance, we only allow hard link 1453 * If we are using project inheritance, we only allow hard link
@@ -2549,9 +2550,10 @@ xfs_remove(
2549 goto out_trans_cancel; 2550 goto out_trans_cancel;
2550 } 2551 }
2551 2552
2553 xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
2552 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); 2554 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
2553 2555
2554 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 2556 xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
2555 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2557 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2556 2558
2557 /* 2559 /*
@@ -2932,6 +2934,12 @@ xfs_rename(
2932 * whether the target directory is the same as the source 2934 * whether the target directory is the same as the source
2933 * directory, we can lock from 2 to 4 inodes. 2935 * directory, we can lock from 2 to 4 inodes.
2934 */ 2936 */
2937 if (!new_parent)
2938 xfs_ilock(src_dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
2939 else
2940 xfs_lock_two_inodes(src_dp, target_dp,
2941 XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
2942
2935 xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); 2943 xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
2936 2944
2937 /* 2945 /*
@@ -2939,9 +2947,9 @@ xfs_rename(
2939 * we can rely on either trans_commit or trans_cancel to unlock 2947 * we can rely on either trans_commit or trans_cancel to unlock
2940 * them. 2948 * them.
2941 */ 2949 */
2942 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); 2950 xfs_trans_ijoin(tp, src_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
2943 if (new_parent) 2951 if (new_parent)
2944 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); 2952 xfs_trans_ijoin(tp, target_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
2945 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); 2953 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
2946 if (target_ip) 2954 if (target_ip)
2947 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); 2955 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 4be27b0210af..7a01486eff06 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -240,7 +240,8 @@ xfs_symlink(
240 if (error) 240 if (error)
241 goto out_trans_cancel; 241 goto out_trans_cancel;
242 242
243 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); 243 xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
244 XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
244 unlock_dp_on_error = true; 245 unlock_dp_on_error = true;
245 246
246 /* 247 /*
@@ -288,7 +289,7 @@ xfs_symlink(
288 * the transaction cancel unlocking dp so don't do it explicitly in the 289 * the transaction cancel unlocking dp so don't do it explicitly in the
289 * error path. 290 * error path.
290 */ 291 */
291 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); 292 xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
292 unlock_dp_on_error = false; 293 unlock_dp_on_error = false;
293 294
294 /* 295 /*
@@ -421,7 +422,7 @@ out_release_inode:
421 xfs_qm_dqrele(pdqp); 422 xfs_qm_dqrele(pdqp);
422 423
423 if (unlock_dp_on_error) 424 if (unlock_dp_on_error)
424 xfs_iunlock(dp, XFS_ILOCK_EXCL); 425 xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
425 return error; 426 return error;
426} 427}
427 428