aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2016-11-29 22:33:25 -0500
committerDave Chinner <david@fromorbit.com>2016-11-29 22:33:25 -0500
commit6552321831dce87ff5c466a55b58d472732caadc (patch)
tree84f3de7b89690c84fd13e2efa7a85d4918d342e3 /fs/xfs/xfs_inode.c
parentf8319483f57f1ca22370f4150bb990aca7728a67 (diff)
xfs: remove i_iolock and use i_rwsem in the VFS inode instead
This patch drops the XFS-own i_iolock and uses the VFS i_rwsem which recently replaced i_mutex instead. This means we only have to take one lock instead of two in many fast path operations, and we can also shrink the xfs_inode structure. Thanks to the xfs_ilock family there is very little churn, the only thing of note is that we need to switch to use the lock_two_directory helper for taking the i_rwsem on two inodes in a few places to make sure our lock order matches the one used in the VFS. Signed-off-by: Christoph Hellwig <hch@lst.de> Tested-by: Jens Axboe <axboe@fb.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c82
1 files changed, 33 insertions, 49 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 4e560e6a12c1..e9ab42d8965b 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -142,31 +142,31 @@ xfs_ilock_attr_map_shared(
142} 142}
143 143
144/* 144/*
145 * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and 145 * In addition to i_rwsem in the VFS inode, the xfs inode contains 2
146 * the i_lock. This routine allows various combinations of the locks to be 146 * multi-reader locks: i_mmap_lock and the i_lock. This routine allows
147 * obtained. 147 * various combinations of the locks to be obtained.
148 * 148 *
149 * The 3 locks should always be ordered so that the IO lock is obtained first, 149 * The 3 locks should always be ordered so that the IO lock is obtained first,
150 * the mmap lock second and the ilock last in order to prevent deadlock. 150 * the mmap lock second and the ilock last in order to prevent deadlock.
151 * 151 *
152 * Basic locking order: 152 * Basic locking order:
153 * 153 *
154 * i_iolock -> i_mmap_lock -> page_lock -> i_ilock 154 * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock
155 * 155 *
156 * mmap_sem locking order: 156 * mmap_sem locking order:
157 * 157 *
158 * i_iolock -> page lock -> mmap_sem 158 * i_rwsem -> page lock -> mmap_sem
159 * mmap_sem -> i_mmap_lock -> page_lock 159 * mmap_sem -> i_mmap_lock -> page_lock
160 * 160 *
161 * The difference in mmap_sem locking order mean that we cannot hold the 161 * The difference in mmap_sem locking order mean that we cannot hold the
162 * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can 162 * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
163 * fault in pages during copy in/out (for buffered IO) or require the mmap_sem 163 * fault in pages during copy in/out (for buffered IO) or require the mmap_sem
164 * in get_user_pages() to map the user pages into the kernel address space for 164 * in get_user_pages() to map the user pages into the kernel address space for
165 * direct IO. Similarly the i_iolock cannot be taken inside a page fault because 165 * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because
166 * page faults already hold the mmap_sem. 166 * page faults already hold the mmap_sem.
167 * 167 *
168 * Hence to serialise fully against both syscall and mmap based IO, we need to 168 * Hence to serialise fully against both syscall and mmap based IO, we need to
169 * take both the i_iolock and the i_mmap_lock. These locks should *only* be both 169 * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both
170 * taken in places where we need to invalidate the page cache in a race 170 * taken in places where we need to invalidate the page cache in a race
171 * free manner (e.g. truncate, hole punch and other extent manipulation 171 * free manner (e.g. truncate, hole punch and other extent manipulation
172 * functions). 172 * functions).
@@ -191,10 +191,13 @@ xfs_ilock(
191 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); 191 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
192 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); 192 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
193 193
194 if (lock_flags & XFS_IOLOCK_EXCL) 194 if (lock_flags & XFS_IOLOCK_EXCL) {
195 mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 195 down_write_nested(&VFS_I(ip)->i_rwsem,
196 else if (lock_flags & XFS_IOLOCK_SHARED) 196 XFS_IOLOCK_DEP(lock_flags));
197 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); 197 } else if (lock_flags & XFS_IOLOCK_SHARED) {
198 down_read_nested(&VFS_I(ip)->i_rwsem,
199 XFS_IOLOCK_DEP(lock_flags));
200 }
198 201
199 if (lock_flags & XFS_MMAPLOCK_EXCL) 202 if (lock_flags & XFS_MMAPLOCK_EXCL)
200 mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); 203 mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
@@ -240,10 +243,10 @@ xfs_ilock_nowait(
240 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); 243 ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
241 244
242 if (lock_flags & XFS_IOLOCK_EXCL) { 245 if (lock_flags & XFS_IOLOCK_EXCL) {
243 if (!mrtryupdate(&ip->i_iolock)) 246 if (!down_write_trylock(&VFS_I(ip)->i_rwsem))
244 goto out; 247 goto out;
245 } else if (lock_flags & XFS_IOLOCK_SHARED) { 248 } else if (lock_flags & XFS_IOLOCK_SHARED) {
246 if (!mrtryaccess(&ip->i_iolock)) 249 if (!down_read_trylock(&VFS_I(ip)->i_rwsem))
247 goto out; 250 goto out;
248 } 251 }
249 252
@@ -271,9 +274,9 @@ out_undo_mmaplock:
271 mrunlock_shared(&ip->i_mmaplock); 274 mrunlock_shared(&ip->i_mmaplock);
272out_undo_iolock: 275out_undo_iolock:
273 if (lock_flags & XFS_IOLOCK_EXCL) 276 if (lock_flags & XFS_IOLOCK_EXCL)
274 mrunlock_excl(&ip->i_iolock); 277 up_write(&VFS_I(ip)->i_rwsem);
275 else if (lock_flags & XFS_IOLOCK_SHARED) 278 else if (lock_flags & XFS_IOLOCK_SHARED)
276 mrunlock_shared(&ip->i_iolock); 279 up_read(&VFS_I(ip)->i_rwsem);
277out: 280out:
278 return 0; 281 return 0;
279} 282}
@@ -310,9 +313,9 @@ xfs_iunlock(
310 ASSERT(lock_flags != 0); 313 ASSERT(lock_flags != 0);
311 314
312 if (lock_flags & XFS_IOLOCK_EXCL) 315 if (lock_flags & XFS_IOLOCK_EXCL)
313 mrunlock_excl(&ip->i_iolock); 316 up_write(&VFS_I(ip)->i_rwsem);
314 else if (lock_flags & XFS_IOLOCK_SHARED) 317 else if (lock_flags & XFS_IOLOCK_SHARED)
315 mrunlock_shared(&ip->i_iolock); 318 up_read(&VFS_I(ip)->i_rwsem);
316 319
317 if (lock_flags & XFS_MMAPLOCK_EXCL) 320 if (lock_flags & XFS_MMAPLOCK_EXCL)
318 mrunlock_excl(&ip->i_mmaplock); 321 mrunlock_excl(&ip->i_mmaplock);
@@ -345,7 +348,7 @@ xfs_ilock_demote(
345 if (lock_flags & XFS_MMAPLOCK_EXCL) 348 if (lock_flags & XFS_MMAPLOCK_EXCL)
346 mrdemote(&ip->i_mmaplock); 349 mrdemote(&ip->i_mmaplock);
347 if (lock_flags & XFS_IOLOCK_EXCL) 350 if (lock_flags & XFS_IOLOCK_EXCL)
348 mrdemote(&ip->i_iolock); 351 downgrade_write(&VFS_I(ip)->i_rwsem);
349 352
350 trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); 353 trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
351} 354}
@@ -370,8 +373,9 @@ xfs_isilocked(
370 373
371 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { 374 if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
372 if (!(lock_flags & XFS_IOLOCK_SHARED)) 375 if (!(lock_flags & XFS_IOLOCK_SHARED))
373 return !!ip->i_iolock.mr_writer; 376 return !debug_locks ||
374 return rwsem_is_locked(&ip->i_iolock.mr_lock); 377 lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0);
378 return rwsem_is_locked(&VFS_I(ip)->i_rwsem);
375 } 379 }
376 380
377 ASSERT(0); 381 ASSERT(0);
@@ -421,11 +425,7 @@ xfs_lock_inumorder(int lock_mode, int subclass)
421 425
422 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { 426 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
423 ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS); 427 ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
424 ASSERT(xfs_lockdep_subclass_ok(subclass +
425 XFS_IOLOCK_PARENT_VAL));
426 class += subclass << XFS_IOLOCK_SHIFT; 428 class += subclass << XFS_IOLOCK_SHIFT;
427 if (lock_mode & XFS_IOLOCK_PARENT)
428 class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT;
429 } 429 }
430 430
431 if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { 431 if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
@@ -477,8 +477,6 @@ xfs_lock_inodes(
477 XFS_ILOCK_EXCL)); 477 XFS_ILOCK_EXCL));
478 ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED | 478 ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
479 XFS_ILOCK_SHARED))); 479 XFS_ILOCK_SHARED)));
480 ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) ||
481 inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1);
482 ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) || 480 ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
483 inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1); 481 inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
484 ASSERT(!(lock_mode & XFS_ILOCK_EXCL) || 482 ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
@@ -581,10 +579,8 @@ xfs_lock_two_inodes(
581 int attempts = 0; 579 int attempts = 0;
582 xfs_log_item_t *lp; 580 xfs_log_item_t *lp;
583 581
584 if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { 582 ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
585 ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); 583 if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
586 ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
587 } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
588 ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); 584 ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
589 585
590 ASSERT(ip0->i_ino != ip1->i_ino); 586 ASSERT(ip0->i_ino != ip1->i_ino);
@@ -715,7 +711,6 @@ xfs_lookup(
715 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 711 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
716 return -EIO; 712 return -EIO;
717 713
718 xfs_ilock(dp, XFS_IOLOCK_SHARED);
719 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); 714 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
720 if (error) 715 if (error)
721 goto out_unlock; 716 goto out_unlock;
@@ -724,14 +719,12 @@ xfs_lookup(
724 if (error) 719 if (error)
725 goto out_free_name; 720 goto out_free_name;
726 721
727 xfs_iunlock(dp, XFS_IOLOCK_SHARED);
728 return 0; 722 return 0;
729 723
730out_free_name: 724out_free_name:
731 if (ci_name) 725 if (ci_name)
732 kmem_free(ci_name->name); 726 kmem_free(ci_name->name);
733out_unlock: 727out_unlock:
734 xfs_iunlock(dp, XFS_IOLOCK_SHARED);
735 *ipp = NULL; 728 *ipp = NULL;
736 return error; 729 return error;
737} 730}
@@ -1215,8 +1208,7 @@ xfs_create(
1215 if (error) 1208 if (error)
1216 goto out_release_inode; 1209 goto out_release_inode;
1217 1210
1218 xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | 1211 xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
1219 XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
1220 unlock_dp_on_error = true; 1212 unlock_dp_on_error = true;
1221 1213
1222 xfs_defer_init(&dfops, &first_block); 1214 xfs_defer_init(&dfops, &first_block);
@@ -1252,7 +1244,7 @@ xfs_create(
1252 * the transaction cancel unlocking dp so don't do it explicitly in the 1244 * the transaction cancel unlocking dp so don't do it explicitly in the
1253 * error path. 1245 * error path.
1254 */ 1246 */
1255 xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1247 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1256 unlock_dp_on_error = false; 1248 unlock_dp_on_error = false;
1257 1249
1258 error = xfs_dir_createname(tp, dp, name, ip->i_ino, 1250 error = xfs_dir_createname(tp, dp, name, ip->i_ino,
@@ -1325,7 +1317,7 @@ xfs_create(
1325 xfs_qm_dqrele(pdqp); 1317 xfs_qm_dqrele(pdqp);
1326 1318
1327 if (unlock_dp_on_error) 1319 if (unlock_dp_on_error)
1328 xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1320 xfs_iunlock(dp, XFS_ILOCK_EXCL);
1329 return error; 1321 return error;
1330} 1322}
1331 1323
@@ -1466,11 +1458,10 @@ xfs_link(
1466 if (error) 1458 if (error)
1467 goto std_return; 1459 goto std_return;
1468 1460
1469 xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
1470 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); 1461 xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
1471 1462
1472 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); 1463 xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
1473 xfs_trans_ijoin(tp, tdp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 1464 xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
1474 1465
1475 /* 1466 /*
1476 * If we are using project inheritance, we only allow hard link 1467 * If we are using project inheritance, we only allow hard link
@@ -2579,10 +2570,9 @@ xfs_remove(
2579 goto std_return; 2570 goto std_return;
2580 } 2571 }
2581 2572
2582 xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
2583 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); 2573 xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
2584 2574
2585 xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 2575 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2586 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2576 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2587 2577
2588 /* 2578 /*
@@ -2963,12 +2953,6 @@ xfs_rename(
2963 * whether the target directory is the same as the source 2953 * whether the target directory is the same as the source
2964 * directory, we can lock from 2 to 4 inodes. 2954 * directory, we can lock from 2 to 4 inodes.
2965 */ 2955 */
2966 if (!new_parent)
2967 xfs_ilock(src_dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
2968 else
2969 xfs_lock_two_inodes(src_dp, target_dp,
2970 XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
2971
2972 xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); 2956 xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
2973 2957
2974 /* 2958 /*
@@ -2976,9 +2960,9 @@ xfs_rename(
2976 * we can rely on either trans_commit or trans_cancel to unlock 2960 * we can rely on either trans_commit or trans_cancel to unlock
2977 * them. 2961 * them.
2978 */ 2962 */
2979 xfs_trans_ijoin(tp, src_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 2963 xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
2980 if (new_parent) 2964 if (new_parent)
2981 xfs_trans_ijoin(tp, target_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); 2965 xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
2982 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); 2966 xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
2983 if (target_ip) 2967 if (target_ip)
2984 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); 2968 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);