diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_file.c | 131 |
1 files changed, 80 insertions, 51 deletions
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index c47d7dc0a307..b5e13fbb7386 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -41,6 +41,40 @@ | |||
41 | static const struct vm_operations_struct xfs_file_vm_ops; | 41 | static const struct vm_operations_struct xfs_file_vm_ops; |
42 | 42 | ||
43 | /* | 43 | /* |
44 | * Locking primitives for read and write IO paths to ensure we consistently use | ||
45 | * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. | ||
46 | */ | ||
47 | static inline void | ||
48 | xfs_rw_ilock( | ||
49 | struct xfs_inode *ip, | ||
50 | int type) | ||
51 | { | ||
52 | if (type & XFS_IOLOCK_EXCL) | ||
53 | mutex_lock(&VFS_I(ip)->i_mutex); | ||
54 | xfs_ilock(ip, type); | ||
55 | } | ||
56 | |||
57 | static inline void | ||
58 | xfs_rw_iunlock( | ||
59 | struct xfs_inode *ip, | ||
60 | int type) | ||
61 | { | ||
62 | xfs_iunlock(ip, type); | ||
63 | if (type & XFS_IOLOCK_EXCL) | ||
64 | mutex_unlock(&VFS_I(ip)->i_mutex); | ||
65 | } | ||
66 | |||
67 | static inline void | ||
68 | xfs_rw_ilock_demote( | ||
69 | struct xfs_inode *ip, | ||
70 | int type) | ||
71 | { | ||
72 | xfs_ilock_demote(ip, type); | ||
73 | if (type & XFS_IOLOCK_EXCL) | ||
74 | mutex_unlock(&VFS_I(ip)->i_mutex); | ||
75 | } | ||
76 | |||
77 | /* | ||
44 | * xfs_iozero | 78 | * xfs_iozero |
45 | * | 79 | * |
46 | * xfs_iozero clears the specified range of buffer supplied, | 80 | * xfs_iozero clears the specified range of buffer supplied, |
@@ -262,22 +296,21 @@ xfs_file_aio_read( | |||
262 | if (XFS_FORCED_SHUTDOWN(mp)) | 296 | if (XFS_FORCED_SHUTDOWN(mp)) |
263 | return -EIO; | 297 | return -EIO; |
264 | 298 | ||
265 | if (unlikely(ioflags & IO_ISDIRECT)) | ||
266 | mutex_lock(&inode->i_mutex); | ||
267 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
268 | |||
269 | if (unlikely(ioflags & IO_ISDIRECT)) { | 299 | if (unlikely(ioflags & IO_ISDIRECT)) { |
300 | xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); | ||
301 | |||
270 | if (inode->i_mapping->nrpages) { | 302 | if (inode->i_mapping->nrpages) { |
271 | ret = -xfs_flushinval_pages(ip, | 303 | ret = -xfs_flushinval_pages(ip, |
272 | (iocb->ki_pos & PAGE_CACHE_MASK), | 304 | (iocb->ki_pos & PAGE_CACHE_MASK), |
273 | -1, FI_REMAPF_LOCKED); | 305 | -1, FI_REMAPF_LOCKED); |
306 | if (ret) { | ||
307 | xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); | ||
308 | return ret; | ||
309 | } | ||
274 | } | 310 | } |
275 | mutex_unlock(&inode->i_mutex); | 311 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
276 | if (ret) { | 312 | } else |
277 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 313 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
278 | return ret; | ||
279 | } | ||
280 | } | ||
281 | 314 | ||
282 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); | 315 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); |
283 | 316 | ||
@@ -285,7 +318,7 @@ xfs_file_aio_read( | |||
285 | if (ret > 0) | 318 | if (ret > 0) |
286 | XFS_STATS_ADD(xs_read_bytes, ret); | 319 | XFS_STATS_ADD(xs_read_bytes, ret); |
287 | 320 | ||
288 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 321 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
289 | return ret; | 322 | return ret; |
290 | } | 323 | } |
291 | 324 | ||
@@ -309,7 +342,7 @@ xfs_file_splice_read( | |||
309 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 342 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
310 | return -EIO; | 343 | return -EIO; |
311 | 344 | ||
312 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 345 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
313 | 346 | ||
314 | trace_xfs_file_splice_read(ip, count, *ppos, ioflags); | 347 | trace_xfs_file_splice_read(ip, count, *ppos, ioflags); |
315 | 348 | ||
@@ -317,7 +350,7 @@ xfs_file_splice_read( | |||
317 | if (ret > 0) | 350 | if (ret > 0) |
318 | XFS_STATS_ADD(xs_read_bytes, ret); | 351 | XFS_STATS_ADD(xs_read_bytes, ret); |
319 | 352 | ||
320 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 353 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
321 | return ret; | 354 | return ret; |
322 | } | 355 | } |
323 | 356 | ||
@@ -338,10 +371,10 @@ xfs_aio_write_isize_update( | |||
338 | *ppos = isize; | 371 | *ppos = isize; |
339 | 372 | ||
340 | if (*ppos > ip->i_size) { | 373 | if (*ppos > ip->i_size) { |
341 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 374 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); |
342 | if (*ppos > ip->i_size) | 375 | if (*ppos > ip->i_size) |
343 | ip->i_size = *ppos; | 376 | ip->i_size = *ppos; |
344 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 377 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); |
345 | } | 378 | } |
346 | } | 379 | } |
347 | 380 | ||
@@ -356,14 +389,22 @@ xfs_aio_write_newsize_update( | |||
356 | struct xfs_inode *ip) | 389 | struct xfs_inode *ip) |
357 | { | 390 | { |
358 | if (ip->i_new_size) { | 391 | if (ip->i_new_size) { |
359 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 392 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); |
360 | ip->i_new_size = 0; | 393 | ip->i_new_size = 0; |
361 | if (ip->i_d.di_size > ip->i_size) | 394 | if (ip->i_d.di_size > ip->i_size) |
362 | ip->i_d.di_size = ip->i_size; | 395 | ip->i_d.di_size = ip->i_size; |
363 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 396 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); |
364 | } | 397 | } |
365 | } | 398 | } |
366 | 399 | ||
400 | /* | ||
401 | * xfs_file_splice_write() does not use xfs_rw_ilock() because | ||
402 | * generic_file_splice_write() takes the i_mutex itself. This, in theory, | ||
403 | * couuld cause lock inversions between the aio_write path and the splice path | ||
404 | * if someone is doing concurrent splice(2) based writes and write(2) based | ||
405 | * writes to the same inode. The only real way to fix this is to re-implement | ||
406 | * the generic code here with correct locking orders. | ||
407 | */ | ||
367 | STATIC ssize_t | 408 | STATIC ssize_t |
368 | xfs_file_splice_write( | 409 | xfs_file_splice_write( |
369 | struct pipe_inode_info *pipe, | 410 | struct pipe_inode_info *pipe, |
@@ -604,7 +645,6 @@ xfs_file_aio_write( | |||
604 | xfs_fsize_t new_size; | 645 | xfs_fsize_t new_size; |
605 | int iolock; | 646 | int iolock; |
606 | size_t ocount = 0, count; | 647 | size_t ocount = 0, count; |
607 | int need_i_mutex; | ||
608 | 648 | ||
609 | XFS_STATS_INC(xs_write_calls); | 649 | XFS_STATS_INC(xs_write_calls); |
610 | 650 | ||
@@ -631,21 +671,17 @@ xfs_file_aio_write( | |||
631 | relock: | 671 | relock: |
632 | if (ioflags & IO_ISDIRECT) { | 672 | if (ioflags & IO_ISDIRECT) { |
633 | iolock = XFS_IOLOCK_SHARED; | 673 | iolock = XFS_IOLOCK_SHARED; |
634 | need_i_mutex = 0; | ||
635 | } else { | 674 | } else { |
636 | iolock = XFS_IOLOCK_EXCL; | 675 | iolock = XFS_IOLOCK_EXCL; |
637 | need_i_mutex = 1; | ||
638 | mutex_lock(&inode->i_mutex); | ||
639 | } | 676 | } |
640 | 677 | ||
641 | xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); | ||
642 | |||
643 | start: | 678 | start: |
679 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL|iolock); | ||
644 | ret = generic_write_checks(file, &pos, &count, | 680 | ret = generic_write_checks(file, &pos, &count, |
645 | S_ISBLK(inode->i_mode)); | 681 | S_ISBLK(inode->i_mode)); |
646 | if (ret) { | 682 | if (ret) { |
647 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 683 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock); |
648 | goto out_unlock_mutex; | 684 | return ret; |
649 | } | 685 | } |
650 | 686 | ||
651 | if (ioflags & IO_ISDIRECT) { | 687 | if (ioflags & IO_ISDIRECT) { |
@@ -654,16 +690,20 @@ start: | |||
654 | mp->m_rtdev_targp : mp->m_ddev_targp; | 690 | mp->m_rtdev_targp : mp->m_ddev_targp; |
655 | 691 | ||
656 | if ((pos & target->bt_smask) || (count & target->bt_smask)) { | 692 | if ((pos & target->bt_smask) || (count & target->bt_smask)) { |
657 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 693 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock); |
658 | return XFS_ERROR(-EINVAL); | 694 | return XFS_ERROR(-EINVAL); |
659 | } | 695 | } |
660 | 696 | ||
661 | if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) { | 697 | /* |
662 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 698 | * For direct I/O, if there are cached pages or we're extending |
699 | * the file, we need IOLOCK_EXCL until we're sure the bytes at | ||
700 | * the new EOF have been zeroed and/or the cached pages are | ||
701 | * flushed out. Upgrade the I/O lock and start again. | ||
702 | */ | ||
703 | if (iolock != XFS_IOLOCK_EXCL && | ||
704 | (mapping->nrpages || pos > ip->i_size)) { | ||
705 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL|iolock); | ||
663 | iolock = XFS_IOLOCK_EXCL; | 706 | iolock = XFS_IOLOCK_EXCL; |
664 | need_i_mutex = 1; | ||
665 | mutex_lock(&inode->i_mutex); | ||
666 | xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); | ||
667 | goto start; | 707 | goto start; |
668 | } | 708 | } |
669 | } | 709 | } |
@@ -687,11 +727,11 @@ start: | |||
687 | if (pos > ip->i_size) { | 727 | if (pos > ip->i_size) { |
688 | ret = -xfs_zero_eof(ip, pos, ip->i_size); | 728 | ret = -xfs_zero_eof(ip, pos, ip->i_size); |
689 | if (ret) { | 729 | if (ret) { |
690 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 730 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); |
691 | goto out_unlock_internal; | 731 | goto out_unlock_internal; |
692 | } | 732 | } |
693 | } | 733 | } |
694 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 734 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); |
695 | 735 | ||
696 | /* | 736 | /* |
697 | * If we're writing the file then make sure to clear the | 737 | * If we're writing the file then make sure to clear the |
@@ -708,7 +748,7 @@ start: | |||
708 | 748 | ||
709 | if ((ioflags & IO_ISDIRECT)) { | 749 | if ((ioflags & IO_ISDIRECT)) { |
710 | if (mapping->nrpages) { | 750 | if (mapping->nrpages) { |
711 | WARN_ON(need_i_mutex == 0); | 751 | WARN_ON(iolock != XFS_IOLOCK_EXCL); |
712 | ret = -xfs_flushinval_pages(ip, | 752 | ret = -xfs_flushinval_pages(ip, |
713 | (pos & PAGE_CACHE_MASK), | 753 | (pos & PAGE_CACHE_MASK), |
714 | -1, FI_REMAPF_LOCKED); | 754 | -1, FI_REMAPF_LOCKED); |
@@ -716,13 +756,10 @@ start: | |||
716 | goto out_unlock_internal; | 756 | goto out_unlock_internal; |
717 | } | 757 | } |
718 | 758 | ||
719 | if (need_i_mutex) { | 759 | if (iolock == XFS_IOLOCK_EXCL) { |
720 | /* demote the lock now the cached pages are gone */ | 760 | /* demote the lock now the cached pages are gone */ |
721 | xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); | 761 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
722 | mutex_unlock(&inode->i_mutex); | ||
723 | |||
724 | iolock = XFS_IOLOCK_SHARED; | 762 | iolock = XFS_IOLOCK_SHARED; |
725 | need_i_mutex = 0; | ||
726 | } | 763 | } |
727 | 764 | ||
728 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags); | 765 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags); |
@@ -740,7 +777,7 @@ start: | |||
740 | count -= ret; | 777 | count -= ret; |
741 | 778 | ||
742 | ioflags &= ~IO_ISDIRECT; | 779 | ioflags &= ~IO_ISDIRECT; |
743 | xfs_iunlock(ip, iolock); | 780 | xfs_rw_iunlock(ip, iolock); |
744 | goto relock; | 781 | goto relock; |
745 | } | 782 | } |
746 | } else { | 783 | } else { |
@@ -775,14 +812,9 @@ write_retry: | |||
775 | loff_t end = pos + ret - 1; | 812 | loff_t end = pos + ret - 1; |
776 | int error, error2; | 813 | int error, error2; |
777 | 814 | ||
778 | xfs_iunlock(ip, iolock); | 815 | xfs_rw_iunlock(ip, iolock); |
779 | if (need_i_mutex) | ||
780 | mutex_unlock(&inode->i_mutex); | ||
781 | |||
782 | error = filemap_write_and_wait_range(mapping, pos, end); | 816 | error = filemap_write_and_wait_range(mapping, pos, end); |
783 | if (need_i_mutex) | 817 | xfs_rw_ilock(ip, iolock); |
784 | mutex_lock(&inode->i_mutex); | ||
785 | xfs_ilock(ip, iolock); | ||
786 | 818 | ||
787 | error2 = -xfs_file_fsync(file, | 819 | error2 = -xfs_file_fsync(file, |
788 | (file->f_flags & __O_SYNC) ? 0 : 1); | 820 | (file->f_flags & __O_SYNC) ? 0 : 1); |
@@ -794,10 +826,7 @@ write_retry: | |||
794 | 826 | ||
795 | out_unlock_internal: | 827 | out_unlock_internal: |
796 | xfs_aio_write_newsize_update(ip); | 828 | xfs_aio_write_newsize_update(ip); |
797 | xfs_iunlock(ip, iolock); | 829 | xfs_rw_iunlock(ip, iolock); |
798 | out_unlock_mutex: | ||
799 | if (need_i_mutex) | ||
800 | mutex_unlock(&inode->i_mutex); | ||
801 | return ret; | 830 | return ret; |
802 | } | 831 | } |
803 | 832 | ||