diff options
author | Christoph Hellwig <hch@lst.de> | 2016-11-29 22:33:25 -0500 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2016-11-29 22:33:25 -0500 |
commit | 6552321831dce87ff5c466a55b58d472732caadc (patch) | |
tree | 84f3de7b89690c84fd13e2efa7a85d4918d342e3 /fs/xfs/xfs_inode.c | |
parent | f8319483f57f1ca22370f4150bb990aca7728a67 (diff) |
xfs: remove i_iolock and use i_rwsem in the VFS inode instead
This patch drops the XFS-own i_iolock and uses the VFS i_rwsem which
recently replaced i_mutex instead. This means we only have to take
one lock instead of two in many fast path operations, and we can
also shrink the xfs_inode structure. Thanks to the xfs_ilock family
there is very little churn, the only thing of note is that we need
to switch to use the lock_two_directory helper for taking the i_rwsem
on two inodes in a few places to make sure our lock order matches
the one used in the VFS.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r-- | fs/xfs/xfs_inode.c | 82 |
1 files changed, 33 insertions, 49 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 4e560e6a12c1..e9ab42d8965b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -142,31 +142,31 @@ xfs_ilock_attr_map_shared( | |||
142 | } | 142 | } |
143 | 143 | ||
144 | /* | 144 | /* |
145 | * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and | 145 | * In addition to i_rwsem in the VFS inode, the xfs inode contains 2 |
146 | * the i_lock. This routine allows various combinations of the locks to be | 146 | * multi-reader locks: i_mmap_lock and the i_lock. This routine allows |
147 | * obtained. | 147 | * various combinations of the locks to be obtained. |
148 | * | 148 | * |
149 | * The 3 locks should always be ordered so that the IO lock is obtained first, | 149 | * The 3 locks should always be ordered so that the IO lock is obtained first, |
150 | * the mmap lock second and the ilock last in order to prevent deadlock. | 150 | * the mmap lock second and the ilock last in order to prevent deadlock. |
151 | * | 151 | * |
152 | * Basic locking order: | 152 | * Basic locking order: |
153 | * | 153 | * |
154 | * i_iolock -> i_mmap_lock -> page_lock -> i_ilock | 154 | * i_rwsem -> i_mmap_lock -> page_lock -> i_ilock |
155 | * | 155 | * |
156 | * mmap_sem locking order: | 156 | * mmap_sem locking order: |
157 | * | 157 | * |
158 | * i_iolock -> page lock -> mmap_sem | 158 | * i_rwsem -> page lock -> mmap_sem |
159 | * mmap_sem -> i_mmap_lock -> page_lock | 159 | * mmap_sem -> i_mmap_lock -> page_lock |
160 | * | 160 | * |
161 | * The difference in mmap_sem locking order mean that we cannot hold the | 161 | * The difference in mmap_sem locking order mean that we cannot hold the |
162 | * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can | 162 | * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can |
163 | * fault in pages during copy in/out (for buffered IO) or require the mmap_sem | 163 | * fault in pages during copy in/out (for buffered IO) or require the mmap_sem |
164 | * in get_user_pages() to map the user pages into the kernel address space for | 164 | * in get_user_pages() to map the user pages into the kernel address space for |
165 | * direct IO. Similarly the i_iolock cannot be taken inside a page fault because | 165 | * direct IO. Similarly the i_rwsem cannot be taken inside a page fault because |
166 | * page faults already hold the mmap_sem. | 166 | * page faults already hold the mmap_sem. |
167 | * | 167 | * |
168 | * Hence to serialise fully against both syscall and mmap based IO, we need to | 168 | * Hence to serialise fully against both syscall and mmap based IO, we need to |
169 | * take both the i_iolock and the i_mmap_lock. These locks should *only* be both | 169 | * take both the i_rwsem and the i_mmap_lock. These locks should *only* be both |
170 | * taken in places where we need to invalidate the page cache in a race | 170 | * taken in places where we need to invalidate the page cache in a race |
171 | * free manner (e.g. truncate, hole punch and other extent manipulation | 171 | * free manner (e.g. truncate, hole punch and other extent manipulation |
172 | * functions). | 172 | * functions). |
@@ -191,10 +191,13 @@ xfs_ilock( | |||
191 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); | 191 | (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); |
192 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); | 192 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); |
193 | 193 | ||
194 | if (lock_flags & XFS_IOLOCK_EXCL) | 194 | if (lock_flags & XFS_IOLOCK_EXCL) { |
195 | mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); | 195 | down_write_nested(&VFS_I(ip)->i_rwsem, |
196 | else if (lock_flags & XFS_IOLOCK_SHARED) | 196 | XFS_IOLOCK_DEP(lock_flags)); |
197 | mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags)); | 197 | } else if (lock_flags & XFS_IOLOCK_SHARED) { |
198 | down_read_nested(&VFS_I(ip)->i_rwsem, | ||
199 | XFS_IOLOCK_DEP(lock_flags)); | ||
200 | } | ||
198 | 201 | ||
199 | if (lock_flags & XFS_MMAPLOCK_EXCL) | 202 | if (lock_flags & XFS_MMAPLOCK_EXCL) |
200 | mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); | 203 | mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags)); |
@@ -240,10 +243,10 @@ xfs_ilock_nowait( | |||
240 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); | 243 | ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0); |
241 | 244 | ||
242 | if (lock_flags & XFS_IOLOCK_EXCL) { | 245 | if (lock_flags & XFS_IOLOCK_EXCL) { |
243 | if (!mrtryupdate(&ip->i_iolock)) | 246 | if (!down_write_trylock(&VFS_I(ip)->i_rwsem)) |
244 | goto out; | 247 | goto out; |
245 | } else if (lock_flags & XFS_IOLOCK_SHARED) { | 248 | } else if (lock_flags & XFS_IOLOCK_SHARED) { |
246 | if (!mrtryaccess(&ip->i_iolock)) | 249 | if (!down_read_trylock(&VFS_I(ip)->i_rwsem)) |
247 | goto out; | 250 | goto out; |
248 | } | 251 | } |
249 | 252 | ||
@@ -271,9 +274,9 @@ out_undo_mmaplock: | |||
271 | mrunlock_shared(&ip->i_mmaplock); | 274 | mrunlock_shared(&ip->i_mmaplock); |
272 | out_undo_iolock: | 275 | out_undo_iolock: |
273 | if (lock_flags & XFS_IOLOCK_EXCL) | 276 | if (lock_flags & XFS_IOLOCK_EXCL) |
274 | mrunlock_excl(&ip->i_iolock); | 277 | up_write(&VFS_I(ip)->i_rwsem); |
275 | else if (lock_flags & XFS_IOLOCK_SHARED) | 278 | else if (lock_flags & XFS_IOLOCK_SHARED) |
276 | mrunlock_shared(&ip->i_iolock); | 279 | up_read(&VFS_I(ip)->i_rwsem); |
277 | out: | 280 | out: |
278 | return 0; | 281 | return 0; |
279 | } | 282 | } |
@@ -310,9 +313,9 @@ xfs_iunlock( | |||
310 | ASSERT(lock_flags != 0); | 313 | ASSERT(lock_flags != 0); |
311 | 314 | ||
312 | if (lock_flags & XFS_IOLOCK_EXCL) | 315 | if (lock_flags & XFS_IOLOCK_EXCL) |
313 | mrunlock_excl(&ip->i_iolock); | 316 | up_write(&VFS_I(ip)->i_rwsem); |
314 | else if (lock_flags & XFS_IOLOCK_SHARED) | 317 | else if (lock_flags & XFS_IOLOCK_SHARED) |
315 | mrunlock_shared(&ip->i_iolock); | 318 | up_read(&VFS_I(ip)->i_rwsem); |
316 | 319 | ||
317 | if (lock_flags & XFS_MMAPLOCK_EXCL) | 320 | if (lock_flags & XFS_MMAPLOCK_EXCL) |
318 | mrunlock_excl(&ip->i_mmaplock); | 321 | mrunlock_excl(&ip->i_mmaplock); |
@@ -345,7 +348,7 @@ xfs_ilock_demote( | |||
345 | if (lock_flags & XFS_MMAPLOCK_EXCL) | 348 | if (lock_flags & XFS_MMAPLOCK_EXCL) |
346 | mrdemote(&ip->i_mmaplock); | 349 | mrdemote(&ip->i_mmaplock); |
347 | if (lock_flags & XFS_IOLOCK_EXCL) | 350 | if (lock_flags & XFS_IOLOCK_EXCL) |
348 | mrdemote(&ip->i_iolock); | 351 | downgrade_write(&VFS_I(ip)->i_rwsem); |
349 | 352 | ||
350 | trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); | 353 | trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_); |
351 | } | 354 | } |
@@ -370,8 +373,9 @@ xfs_isilocked( | |||
370 | 373 | ||
371 | if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { | 374 | if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) { |
372 | if (!(lock_flags & XFS_IOLOCK_SHARED)) | 375 | if (!(lock_flags & XFS_IOLOCK_SHARED)) |
373 | return !!ip->i_iolock.mr_writer; | 376 | return !debug_locks || |
374 | return rwsem_is_locked(&ip->i_iolock.mr_lock); | 377 | lockdep_is_held_type(&VFS_I(ip)->i_rwsem, 0); |
378 | return rwsem_is_locked(&VFS_I(ip)->i_rwsem); | ||
375 | } | 379 | } |
376 | 380 | ||
377 | ASSERT(0); | 381 | ASSERT(0); |
@@ -421,11 +425,7 @@ xfs_lock_inumorder(int lock_mode, int subclass) | |||
421 | 425 | ||
422 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { | 426 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { |
423 | ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS); | 427 | ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS); |
424 | ASSERT(xfs_lockdep_subclass_ok(subclass + | ||
425 | XFS_IOLOCK_PARENT_VAL)); | ||
426 | class += subclass << XFS_IOLOCK_SHIFT; | 428 | class += subclass << XFS_IOLOCK_SHIFT; |
427 | if (lock_mode & XFS_IOLOCK_PARENT) | ||
428 | class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT; | ||
429 | } | 429 | } |
430 | 430 | ||
431 | if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { | 431 | if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) { |
@@ -477,8 +477,6 @@ xfs_lock_inodes( | |||
477 | XFS_ILOCK_EXCL)); | 477 | XFS_ILOCK_EXCL)); |
478 | ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED | | 478 | ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED | |
479 | XFS_ILOCK_SHARED))); | 479 | XFS_ILOCK_SHARED))); |
480 | ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) || | ||
481 | inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1); | ||
482 | ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) || | 480 | ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) || |
483 | inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1); | 481 | inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1); |
484 | ASSERT(!(lock_mode & XFS_ILOCK_EXCL) || | 482 | ASSERT(!(lock_mode & XFS_ILOCK_EXCL) || |
@@ -581,10 +579,8 @@ xfs_lock_two_inodes( | |||
581 | int attempts = 0; | 579 | int attempts = 0; |
582 | xfs_log_item_t *lp; | 580 | xfs_log_item_t *lp; |
583 | 581 | ||
584 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) { | 582 | ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))); |
585 | ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))); | 583 | if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) |
586 | ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); | ||
587 | } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) | ||
588 | ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); | 584 | ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))); |
589 | 585 | ||
590 | ASSERT(ip0->i_ino != ip1->i_ino); | 586 | ASSERT(ip0->i_ino != ip1->i_ino); |
@@ -715,7 +711,6 @@ xfs_lookup( | |||
715 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) | 711 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) |
716 | return -EIO; | 712 | return -EIO; |
717 | 713 | ||
718 | xfs_ilock(dp, XFS_IOLOCK_SHARED); | ||
719 | error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); | 714 | error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); |
720 | if (error) | 715 | if (error) |
721 | goto out_unlock; | 716 | goto out_unlock; |
@@ -724,14 +719,12 @@ xfs_lookup( | |||
724 | if (error) | 719 | if (error) |
725 | goto out_free_name; | 720 | goto out_free_name; |
726 | 721 | ||
727 | xfs_iunlock(dp, XFS_IOLOCK_SHARED); | ||
728 | return 0; | 722 | return 0; |
729 | 723 | ||
730 | out_free_name: | 724 | out_free_name: |
731 | if (ci_name) | 725 | if (ci_name) |
732 | kmem_free(ci_name->name); | 726 | kmem_free(ci_name->name); |
733 | out_unlock: | 727 | out_unlock: |
734 | xfs_iunlock(dp, XFS_IOLOCK_SHARED); | ||
735 | *ipp = NULL; | 728 | *ipp = NULL; |
736 | return error; | 729 | return error; |
737 | } | 730 | } |
@@ -1215,8 +1208,7 @@ xfs_create( | |||
1215 | if (error) | 1208 | if (error) |
1216 | goto out_release_inode; | 1209 | goto out_release_inode; |
1217 | 1210 | ||
1218 | xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL | | 1211 | xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); |
1219 | XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT); | ||
1220 | unlock_dp_on_error = true; | 1212 | unlock_dp_on_error = true; |
1221 | 1213 | ||
1222 | xfs_defer_init(&dfops, &first_block); | 1214 | xfs_defer_init(&dfops, &first_block); |
@@ -1252,7 +1244,7 @@ xfs_create( | |||
1252 | * the transaction cancel unlocking dp so don't do it explicitly in the | 1244 | * the transaction cancel unlocking dp so don't do it explicitly in the |
1253 | * error path. | 1245 | * error path. |
1254 | */ | 1246 | */ |
1255 | xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 1247 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
1256 | unlock_dp_on_error = false; | 1248 | unlock_dp_on_error = false; |
1257 | 1249 | ||
1258 | error = xfs_dir_createname(tp, dp, name, ip->i_ino, | 1250 | error = xfs_dir_createname(tp, dp, name, ip->i_ino, |
@@ -1325,7 +1317,7 @@ xfs_create( | |||
1325 | xfs_qm_dqrele(pdqp); | 1317 | xfs_qm_dqrele(pdqp); |
1326 | 1318 | ||
1327 | if (unlock_dp_on_error) | 1319 | if (unlock_dp_on_error) |
1328 | xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 1320 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
1329 | return error; | 1321 | return error; |
1330 | } | 1322 | } |
1331 | 1323 | ||
@@ -1466,11 +1458,10 @@ xfs_link( | |||
1466 | if (error) | 1458 | if (error) |
1467 | goto std_return; | 1459 | goto std_return; |
1468 | 1460 | ||
1469 | xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); | ||
1470 | xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); | 1461 | xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); |
1471 | 1462 | ||
1472 | xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); | 1463 | xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); |
1473 | xfs_trans_ijoin(tp, tdp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 1464 | xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); |
1474 | 1465 | ||
1475 | /* | 1466 | /* |
1476 | * If we are using project inheritance, we only allow hard link | 1467 | * If we are using project inheritance, we only allow hard link |
@@ -2579,10 +2570,9 @@ xfs_remove( | |||
2579 | goto std_return; | 2570 | goto std_return; |
2580 | } | 2571 | } |
2581 | 2572 | ||
2582 | xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); | ||
2583 | xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); | 2573 | xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); |
2584 | 2574 | ||
2585 | xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 2575 | xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); |
2586 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | 2576 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); |
2587 | 2577 | ||
2588 | /* | 2578 | /* |
@@ -2963,12 +2953,6 @@ xfs_rename( | |||
2963 | * whether the target directory is the same as the source | 2953 | * whether the target directory is the same as the source |
2964 | * directory, we can lock from 2 to 4 inodes. | 2954 | * directory, we can lock from 2 to 4 inodes. |
2965 | */ | 2955 | */ |
2966 | if (!new_parent) | ||
2967 | xfs_ilock(src_dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); | ||
2968 | else | ||
2969 | xfs_lock_two_inodes(src_dp, target_dp, | ||
2970 | XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); | ||
2971 | |||
2972 | xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); | 2956 | xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); |
2973 | 2957 | ||
2974 | /* | 2958 | /* |
@@ -2976,9 +2960,9 @@ xfs_rename( | |||
2976 | * we can rely on either trans_commit or trans_cancel to unlock | 2960 | * we can rely on either trans_commit or trans_cancel to unlock |
2977 | * them. | 2961 | * them. |
2978 | */ | 2962 | */ |
2979 | xfs_trans_ijoin(tp, src_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 2963 | xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); |
2980 | if (new_parent) | 2964 | if (new_parent) |
2981 | xfs_trans_ijoin(tp, target_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); | 2965 | xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); |
2982 | xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); | 2966 | xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); |
2983 | if (target_ip) | 2967 | if (target_ip) |
2984 | xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); | 2968 | xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); |