diff options
author | Christoph Hellwig <hch@infradead.org> | 2011-12-18 15:00:12 -0500 |
---|---|---|
committer | Ben Myers <bpm@sgi.com> | 2012-01-17 16:10:19 -0500 |
commit | 2813d682e8e6a278f94817429afd46b30875bb6e (patch) | |
tree | d865b04ec89076b692a922b7f5fced9be0458f47 /fs/xfs/xfs_file.c | |
parent | ce7ae151ddada3dbf67301464343c154903166b3 (diff) |
xfs: remove the i_new_size field in struct xfs_inode
Now that we use the VFS i_size field throughout XFS there is no need for the
i_new_size field any more given that the VFS i_size field gets updated
in ->write_end before unlocking the page, and thus is always uptodate when
writeback could see a page. Removing i_new_size also has the advantage that
we will never have to trim back di_size during a failed buffered write,
given that it never gets updated past i_size.
Note that currently the generic direct I/O code only updates i_size after
calling our end_io handler, which requires a small workaround to make
sure di_size actually makes it to disk. I hope to fix this properly in
the generic code.
A downside is that we lose the support for parallel non-overlapping O_DIRECT
appending writes that recently was added. I don't think keeping the complex
and fragile i_new_size infrastructure for this is a good tradeoff - if we
really care about parallel appending writers we should investigate turning
the iolock into a range lock, which would also allow for parallel
non-overlapping buffered writers.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_file.c')
-rw-r--r-- | fs/xfs/xfs_file.c | 72 |
1 files changed, 10 insertions, 62 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 86d5dc260464..632313926788 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -413,27 +413,6 @@ xfs_file_splice_read( | |||
413 | } | 413 | } |
414 | 414 | ||
415 | /* | 415 | /* |
416 | * If this was a direct or synchronous I/O that failed (such as ENOSPC) then | ||
417 | * part of the I/O may have been written to disk before the error occurred. In | ||
418 | * this case the on-disk file size may have been adjusted beyond the in-memory | ||
419 | * file size and now needs to be truncated back. | ||
420 | */ | ||
421 | STATIC void | ||
422 | xfs_aio_write_newsize_update( | ||
423 | struct xfs_inode *ip, | ||
424 | xfs_fsize_t new_size) | ||
425 | { | ||
426 | if (new_size == ip->i_new_size) { | ||
427 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
428 | if (new_size == ip->i_new_size) | ||
429 | ip->i_new_size = 0; | ||
430 | if (ip->i_d.di_size > i_size_read(VFS_I(ip))) | ||
431 | ip->i_d.di_size = i_size_read(VFS_I(ip)); | ||
432 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
433 | } | ||
434 | } | ||
435 | |||
436 | /* | ||
437 | * xfs_file_splice_write() does not use xfs_rw_ilock() because | 416 | * xfs_file_splice_write() does not use xfs_rw_ilock() because |
438 | * generic_file_splice_write() takes the i_mutex itself. This, in theory, | 417 | * generic_file_splice_write() takes the i_mutex itself. This, in theory, |
439 | * couuld cause lock inversions between the aio_write path and the splice path | 418 | * couuld cause lock inversions between the aio_write path and the splice path |
@@ -451,7 +430,6 @@ xfs_file_splice_write( | |||
451 | { | 430 | { |
452 | struct inode *inode = outfilp->f_mapping->host; | 431 | struct inode *inode = outfilp->f_mapping->host; |
453 | struct xfs_inode *ip = XFS_I(inode); | 432 | struct xfs_inode *ip = XFS_I(inode); |
454 | xfs_fsize_t new_size; | ||
455 | int ioflags = 0; | 433 | int ioflags = 0; |
456 | ssize_t ret; | 434 | ssize_t ret; |
457 | 435 | ||
@@ -465,20 +443,12 @@ xfs_file_splice_write( | |||
465 | 443 | ||
466 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | 444 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
467 | 445 | ||
468 | new_size = *ppos + count; | ||
469 | |||
470 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
471 | if (new_size > i_size_read(inode)) | ||
472 | ip->i_new_size = new_size; | ||
473 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
474 | |||
475 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); | 446 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); |
476 | 447 | ||
477 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); | 448 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); |
478 | if (ret > 0) | 449 | if (ret > 0) |
479 | XFS_STATS_ADD(xs_write_bytes, ret); | 450 | XFS_STATS_ADD(xs_write_bytes, ret); |
480 | 451 | ||
481 | xfs_aio_write_newsize_update(ip, new_size); | ||
482 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 452 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
483 | return ret; | 453 | return ret; |
484 | } | 454 | } |
@@ -673,16 +643,13 @@ xfs_file_aio_write_checks( | |||
673 | struct file *file, | 643 | struct file *file, |
674 | loff_t *pos, | 644 | loff_t *pos, |
675 | size_t *count, | 645 | size_t *count, |
676 | xfs_fsize_t *new_sizep, | ||
677 | int *iolock) | 646 | int *iolock) |
678 | { | 647 | { |
679 | struct inode *inode = file->f_mapping->host; | 648 | struct inode *inode = file->f_mapping->host; |
680 | struct xfs_inode *ip = XFS_I(inode); | 649 | struct xfs_inode *ip = XFS_I(inode); |
681 | xfs_fsize_t new_size; | ||
682 | int error = 0; | 650 | int error = 0; |
683 | 651 | ||
684 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | 652 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); |
685 | *new_sizep = 0; | ||
686 | restart: | 653 | restart: |
687 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); | 654 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); |
688 | if (error) { | 655 | if (error) { |
@@ -697,15 +664,13 @@ restart: | |||
697 | /* | 664 | /* |
698 | * If the offset is beyond the size of the file, we need to zero any | 665 | * If the offset is beyond the size of the file, we need to zero any |
699 | * blocks that fall between the existing EOF and the start of this | 666 | * blocks that fall between the existing EOF and the start of this |
700 | * write. There is no need to issue zeroing if another in-flght IO ends | 667 | * write. If zeroing is needed and we are currently holding the |
701 | * at or before this one If zeronig is needed and we are currently | 668 | * iolock shared, we need to update it to exclusive which involves |
702 | * holding the iolock shared, we need to update it to exclusive which | 669 | * dropping all locks and relocking to maintain correct locking order. |
703 | * involves dropping all locks and relocking to maintain correct locking | 670 | * If we do this, restart the function to ensure all checks and values |
704 | * order. If we do this, restart the function to ensure all checks and | 671 | * are still valid. |
705 | * values are still valid. | ||
706 | */ | 672 | */ |
707 | if ((ip->i_new_size && *pos > ip->i_new_size) || | 673 | if (*pos > i_size_read(inode)) { |
708 | (!ip->i_new_size && *pos > i_size_read(inode))) { | ||
709 | if (*iolock == XFS_IOLOCK_SHARED) { | 674 | if (*iolock == XFS_IOLOCK_SHARED) { |
710 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); | 675 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); |
711 | *iolock = XFS_IOLOCK_EXCL; | 676 | *iolock = XFS_IOLOCK_EXCL; |
@@ -714,19 +679,6 @@ restart: | |||
714 | } | 679 | } |
715 | error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); | 680 | error = -xfs_zero_eof(ip, *pos, i_size_read(inode)); |
716 | } | 681 | } |
717 | |||
718 | /* | ||
719 | * If this IO extends beyond EOF, we may need to update ip->i_new_size. | ||
720 | * We have already zeroed space beyond EOF (if necessary). Only update | ||
721 | * ip->i_new_size if this IO ends beyond any other in-flight writes. | ||
722 | */ | ||
723 | new_size = *pos + *count; | ||
724 | if (new_size > i_size_read(inode)) { | ||
725 | if (new_size > ip->i_new_size) | ||
726 | ip->i_new_size = new_size; | ||
727 | *new_sizep = new_size; | ||
728 | } | ||
729 | |||
730 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | 682 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); |
731 | if (error) | 683 | if (error) |
732 | return error; | 684 | return error; |
@@ -772,7 +724,6 @@ xfs_file_dio_aio_write( | |||
772 | unsigned long nr_segs, | 724 | unsigned long nr_segs, |
773 | loff_t pos, | 725 | loff_t pos, |
774 | size_t ocount, | 726 | size_t ocount, |
775 | xfs_fsize_t *new_size, | ||
776 | int *iolock) | 727 | int *iolock) |
777 | { | 728 | { |
778 | struct file *file = iocb->ki_filp; | 729 | struct file *file = iocb->ki_filp; |
@@ -817,7 +768,7 @@ xfs_file_dio_aio_write( | |||
817 | xfs_rw_ilock(ip, *iolock); | 768 | xfs_rw_ilock(ip, *iolock); |
818 | } | 769 | } |
819 | 770 | ||
820 | ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock); | 771 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); |
821 | if (ret) | 772 | if (ret) |
822 | return ret; | 773 | return ret; |
823 | 774 | ||
@@ -855,7 +806,6 @@ xfs_file_buffered_aio_write( | |||
855 | unsigned long nr_segs, | 806 | unsigned long nr_segs, |
856 | loff_t pos, | 807 | loff_t pos, |
857 | size_t ocount, | 808 | size_t ocount, |
858 | xfs_fsize_t *new_size, | ||
859 | int *iolock) | 809 | int *iolock) |
860 | { | 810 | { |
861 | struct file *file = iocb->ki_filp; | 811 | struct file *file = iocb->ki_filp; |
@@ -869,7 +819,7 @@ xfs_file_buffered_aio_write( | |||
869 | *iolock = XFS_IOLOCK_EXCL; | 819 | *iolock = XFS_IOLOCK_EXCL; |
870 | xfs_rw_ilock(ip, *iolock); | 820 | xfs_rw_ilock(ip, *iolock); |
871 | 821 | ||
872 | ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock); | 822 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); |
873 | if (ret) | 823 | if (ret) |
874 | return ret; | 824 | return ret; |
875 | 825 | ||
@@ -909,7 +859,6 @@ xfs_file_aio_write( | |||
909 | ssize_t ret; | 859 | ssize_t ret; |
910 | int iolock; | 860 | int iolock; |
911 | size_t ocount = 0; | 861 | size_t ocount = 0; |
912 | xfs_fsize_t new_size = 0; | ||
913 | 862 | ||
914 | XFS_STATS_INC(xs_write_calls); | 863 | XFS_STATS_INC(xs_write_calls); |
915 | 864 | ||
@@ -929,10 +878,10 @@ xfs_file_aio_write( | |||
929 | 878 | ||
930 | if (unlikely(file->f_flags & O_DIRECT)) | 879 | if (unlikely(file->f_flags & O_DIRECT)) |
931 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, | 880 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, |
932 | ocount, &new_size, &iolock); | 881 | ocount, &iolock); |
933 | else | 882 | else |
934 | ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, | 883 | ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, |
935 | ocount, &new_size, &iolock); | 884 | ocount, &iolock); |
936 | 885 | ||
937 | if (ret <= 0) | 886 | if (ret <= 0) |
938 | goto out_unlock; | 887 | goto out_unlock; |
@@ -953,7 +902,6 @@ xfs_file_aio_write( | |||
953 | } | 902 | } |
954 | 903 | ||
955 | out_unlock: | 904 | out_unlock: |
956 | xfs_aio_write_newsize_update(ip, new_size); | ||
957 | xfs_rw_iunlock(ip, iolock); | 905 | xfs_rw_iunlock(ip, iolock); |
958 | return ret; | 906 | return ret; |
959 | } | 907 | } |