aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2011-01-11 19:37:10 -0500
committerDave Chinner <david@fromorbit.com>2011-01-11 19:37:10 -0500
commit487f84f3f80bc6f00c59725e822653d3ec174b85 (patch)
treed84117b479269b0a96a3041f67006d25b44300c3
parent4c5cfd1b4157fb75d43b44a147c2feba6422fc4f (diff)
xfs: introduce xfs_rw_lock() helpers for locking the inode
We need to obtain the i_mutex, i_iolock and i_ilock during the read and write paths. Add a set of wrapper functions to neatly encapsulate the lock ordering and shared/exclusive semantics to make the locking easier to follow and get right. Note that this changes some of the exclusive locking serialisation in that serialisation will occur against the i_mutex instead of the XFS_IOLOCK_EXCL. This does not change any behaviour, and it is arguably more efficient to use the mutex for such serialisation than the rw_sem. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c131
1 files changed, 80 insertions, 51 deletions
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index c47d7dc0a307..b5e13fbb7386 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -41,6 +41,40 @@
41static const struct vm_operations_struct xfs_file_vm_ops; 41static const struct vm_operations_struct xfs_file_vm_ops;
42 42
43/* 43/*
44 * Locking primitives for read and write IO paths to ensure we consistently use
45 * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
46 */
47static inline void
48xfs_rw_ilock(
49 struct xfs_inode *ip,
50 int type)
51{
52 if (type & XFS_IOLOCK_EXCL)
53 mutex_lock(&VFS_I(ip)->i_mutex);
54 xfs_ilock(ip, type);
55}
56
57static inline void
58xfs_rw_iunlock(
59 struct xfs_inode *ip,
60 int type)
61{
62 xfs_iunlock(ip, type);
63 if (type & XFS_IOLOCK_EXCL)
64 mutex_unlock(&VFS_I(ip)->i_mutex);
65}
66
67static inline void
68xfs_rw_ilock_demote(
69 struct xfs_inode *ip,
70 int type)
71{
72 xfs_ilock_demote(ip, type);
73 if (type & XFS_IOLOCK_EXCL)
74 mutex_unlock(&VFS_I(ip)->i_mutex);
75}
76
77/*
44 * xfs_iozero 78 * xfs_iozero
45 * 79 *
46 * xfs_iozero clears the specified range of buffer supplied, 80 * xfs_iozero clears the specified range of buffer supplied,
@@ -262,22 +296,21 @@ xfs_file_aio_read(
262 if (XFS_FORCED_SHUTDOWN(mp)) 296 if (XFS_FORCED_SHUTDOWN(mp))
263 return -EIO; 297 return -EIO;
264 298
265 if (unlikely(ioflags & IO_ISDIRECT))
266 mutex_lock(&inode->i_mutex);
267 xfs_ilock(ip, XFS_IOLOCK_SHARED);
268
269 if (unlikely(ioflags & IO_ISDIRECT)) { 299 if (unlikely(ioflags & IO_ISDIRECT)) {
300 xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
301
270 if (inode->i_mapping->nrpages) { 302 if (inode->i_mapping->nrpages) {
271 ret = -xfs_flushinval_pages(ip, 303 ret = -xfs_flushinval_pages(ip,
272 (iocb->ki_pos & PAGE_CACHE_MASK), 304 (iocb->ki_pos & PAGE_CACHE_MASK),
273 -1, FI_REMAPF_LOCKED); 305 -1, FI_REMAPF_LOCKED);
306 if (ret) {
307 xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
308 return ret;
309 }
274 } 310 }
275 mutex_unlock(&inode->i_mutex); 311 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
276 if (ret) { 312 } else
277 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 313 xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
278 return ret;
279 }
280 }
281 314
282 trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); 315 trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
283 316
@@ -285,7 +318,7 @@ xfs_file_aio_read(
285 if (ret > 0) 318 if (ret > 0)
286 XFS_STATS_ADD(xs_read_bytes, ret); 319 XFS_STATS_ADD(xs_read_bytes, ret);
287 320
288 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 321 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
289 return ret; 322 return ret;
290} 323}
291 324
@@ -309,7 +342,7 @@ xfs_file_splice_read(
309 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 342 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
310 return -EIO; 343 return -EIO;
311 344
312 xfs_ilock(ip, XFS_IOLOCK_SHARED); 345 xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
313 346
314 trace_xfs_file_splice_read(ip, count, *ppos, ioflags); 347 trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
315 348
@@ -317,7 +350,7 @@ xfs_file_splice_read(
317 if (ret > 0) 350 if (ret > 0)
318 XFS_STATS_ADD(xs_read_bytes, ret); 351 XFS_STATS_ADD(xs_read_bytes, ret);
319 352
320 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 353 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
321 return ret; 354 return ret;
322} 355}
323 356
@@ -338,10 +371,10 @@ xfs_aio_write_isize_update(
338 *ppos = isize; 371 *ppos = isize;
339 372
340 if (*ppos > ip->i_size) { 373 if (*ppos > ip->i_size) {
341 xfs_ilock(ip, XFS_ILOCK_EXCL); 374 xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
342 if (*ppos > ip->i_size) 375 if (*ppos > ip->i_size)
343 ip->i_size = *ppos; 376 ip->i_size = *ppos;
344 xfs_iunlock(ip, XFS_ILOCK_EXCL); 377 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
345 } 378 }
346} 379}
347 380
@@ -356,14 +389,22 @@ xfs_aio_write_newsize_update(
356 struct xfs_inode *ip) 389 struct xfs_inode *ip)
357{ 390{
358 if (ip->i_new_size) { 391 if (ip->i_new_size) {
359 xfs_ilock(ip, XFS_ILOCK_EXCL); 392 xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
360 ip->i_new_size = 0; 393 ip->i_new_size = 0;
361 if (ip->i_d.di_size > ip->i_size) 394 if (ip->i_d.di_size > ip->i_size)
362 ip->i_d.di_size = ip->i_size; 395 ip->i_d.di_size = ip->i_size;
363 xfs_iunlock(ip, XFS_ILOCK_EXCL); 396 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
364 } 397 }
365} 398}
366 399
400/*
401 * xfs_file_splice_write() does not use xfs_rw_ilock() because
402 * generic_file_splice_write() takes the i_mutex itself. This, in theory,
403 * couuld cause lock inversions between the aio_write path and the splice path
404 * if someone is doing concurrent splice(2) based writes and write(2) based
405 * writes to the same inode. The only real way to fix this is to re-implement
406 * the generic code here with correct locking orders.
407 */
367STATIC ssize_t 408STATIC ssize_t
368xfs_file_splice_write( 409xfs_file_splice_write(
369 struct pipe_inode_info *pipe, 410 struct pipe_inode_info *pipe,
@@ -604,7 +645,6 @@ xfs_file_aio_write(
604 xfs_fsize_t new_size; 645 xfs_fsize_t new_size;
605 int iolock; 646 int iolock;
606 size_t ocount = 0, count; 647 size_t ocount = 0, count;
607 int need_i_mutex;
608 648
609 XFS_STATS_INC(xs_write_calls); 649 XFS_STATS_INC(xs_write_calls);
610 650
@@ -631,21 +671,17 @@ xfs_file_aio_write(
631relock: 671relock:
632 if (ioflags & IO_ISDIRECT) { 672 if (ioflags & IO_ISDIRECT) {
633 iolock = XFS_IOLOCK_SHARED; 673 iolock = XFS_IOLOCK_SHARED;
634 need_i_mutex = 0;
635 } else { 674 } else {
636 iolock = XFS_IOLOCK_EXCL; 675 iolock = XFS_IOLOCK_EXCL;
637 need_i_mutex = 1;
638 mutex_lock(&inode->i_mutex);
639 } 676 }
640 677
641 xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
642
643start: 678start:
679 xfs_rw_ilock(ip, XFS_ILOCK_EXCL|iolock);
644 ret = generic_write_checks(file, &pos, &count, 680 ret = generic_write_checks(file, &pos, &count,
645 S_ISBLK(inode->i_mode)); 681 S_ISBLK(inode->i_mode));
646 if (ret) { 682 if (ret) {
647 xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); 683 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
648 goto out_unlock_mutex; 684 return ret;
649 } 685 }
650 686
651 if (ioflags & IO_ISDIRECT) { 687 if (ioflags & IO_ISDIRECT) {
@@ -654,16 +690,20 @@ start:
654 mp->m_rtdev_targp : mp->m_ddev_targp; 690 mp->m_rtdev_targp : mp->m_ddev_targp;
655 691
656 if ((pos & target->bt_smask) || (count & target->bt_smask)) { 692 if ((pos & target->bt_smask) || (count & target->bt_smask)) {
657 xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); 693 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
658 return XFS_ERROR(-EINVAL); 694 return XFS_ERROR(-EINVAL);
659 } 695 }
660 696
661 if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) { 697 /*
662 xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); 698 * For direct I/O, if there are cached pages or we're extending
699 * the file, we need IOLOCK_EXCL until we're sure the bytes at
700 * the new EOF have been zeroed and/or the cached pages are
701 * flushed out. Upgrade the I/O lock and start again.
702 */
703 if (iolock != XFS_IOLOCK_EXCL &&
704 (mapping->nrpages || pos > ip->i_size)) {
705 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock);
663 iolock = XFS_IOLOCK_EXCL; 706 iolock = XFS_IOLOCK_EXCL;
664 need_i_mutex = 1;
665 mutex_lock(&inode->i_mutex);
666 xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
667 goto start; 707 goto start;
668 } 708 }
669 } 709 }
@@ -687,11 +727,11 @@ start:
687 if (pos > ip->i_size) { 727 if (pos > ip->i_size) {
688 ret = -xfs_zero_eof(ip, pos, ip->i_size); 728 ret = -xfs_zero_eof(ip, pos, ip->i_size);
689 if (ret) { 729 if (ret) {
690 xfs_iunlock(ip, XFS_ILOCK_EXCL); 730 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
691 goto out_unlock_internal; 731 goto out_unlock_internal;
692 } 732 }
693 } 733 }
694 xfs_iunlock(ip, XFS_ILOCK_EXCL); 734 xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
695 735
696 /* 736 /*
697 * If we're writing the file then make sure to clear the 737 * If we're writing the file then make sure to clear the
@@ -708,7 +748,7 @@ start:
708 748
709 if ((ioflags & IO_ISDIRECT)) { 749 if ((ioflags & IO_ISDIRECT)) {
710 if (mapping->nrpages) { 750 if (mapping->nrpages) {
711 WARN_ON(need_i_mutex == 0); 751 WARN_ON(iolock != XFS_IOLOCK_EXCL);
712 ret = -xfs_flushinval_pages(ip, 752 ret = -xfs_flushinval_pages(ip,
713 (pos & PAGE_CACHE_MASK), 753 (pos & PAGE_CACHE_MASK),
714 -1, FI_REMAPF_LOCKED); 754 -1, FI_REMAPF_LOCKED);
@@ -716,13 +756,10 @@ start:
716 goto out_unlock_internal; 756 goto out_unlock_internal;
717 } 757 }
718 758
719 if (need_i_mutex) { 759 if (iolock == XFS_IOLOCK_EXCL) {
720 /* demote the lock now the cached pages are gone */ 760 /* demote the lock now the cached pages are gone */
721 xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); 761 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
722 mutex_unlock(&inode->i_mutex);
723
724 iolock = XFS_IOLOCK_SHARED; 762 iolock = XFS_IOLOCK_SHARED;
725 need_i_mutex = 0;
726 } 763 }
727 764
728 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags); 765 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
@@ -740,7 +777,7 @@ start:
740 count -= ret; 777 count -= ret;
741 778
742 ioflags &= ~IO_ISDIRECT; 779 ioflags &= ~IO_ISDIRECT;
743 xfs_iunlock(ip, iolock); 780 xfs_rw_iunlock(ip, iolock);
744 goto relock; 781 goto relock;
745 } 782 }
746 } else { 783 } else {
@@ -775,14 +812,9 @@ write_retry:
775 loff_t end = pos + ret - 1; 812 loff_t end = pos + ret - 1;
776 int error, error2; 813 int error, error2;
777 814
778 xfs_iunlock(ip, iolock); 815 xfs_rw_iunlock(ip, iolock);
779 if (need_i_mutex)
780 mutex_unlock(&inode->i_mutex);
781
782 error = filemap_write_and_wait_range(mapping, pos, end); 816 error = filemap_write_and_wait_range(mapping, pos, end);
783 if (need_i_mutex) 817 xfs_rw_ilock(ip, iolock);
784 mutex_lock(&inode->i_mutex);
785 xfs_ilock(ip, iolock);
786 818
787 error2 = -xfs_file_fsync(file, 819 error2 = -xfs_file_fsync(file,
788 (file->f_flags & __O_SYNC) ? 0 : 1); 820 (file->f_flags & __O_SYNC) ? 0 : 1);
@@ -794,10 +826,7 @@ write_retry:
794 826
795 out_unlock_internal: 827 out_unlock_internal:
796 xfs_aio_write_newsize_update(ip); 828 xfs_aio_write_newsize_update(ip);
797 xfs_iunlock(ip, iolock); 829 xfs_rw_iunlock(ip, iolock);
798 out_unlock_mutex:
799 if (need_i_mutex)
800 mutex_unlock(&inode->i_mutex);
801 return ret; 830 return ret;
802} 831}
803 832