diff options
author | Eryu Guan <eguan@redhat.com> | 2017-09-21 14:26:18 -0400 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2017-09-26 13:55:19 -0400 |
commit | ee70daaba82d70766d0723b743d9fdeb3b06102a (patch) | |
tree | 4ebdc08030551694139a99df64765a723fd08d5b | |
parent | 546e7be8244dc050effef0555df5b8d94d10dafc (diff) |
xfs: update i_size after unwritten conversion in dio completion
Since commit d531d91d6990 ("xfs: always use unwritten extents for
direct I/O writes"), we start allocating unwritten extents for all
direct writes to allow appending aio in XFS.
But for dio writes that could extend file size we update the in-core
inode size first, then convert the unwritten extents to real
allocations at dio completion time in xfs_dio_write_end_io(). Thus a
racing direct read could see the new i_size and find the unwritten
extents first and read zeros instead of actual data, if the direct
writer also takes a shared iolock.
Fix it by updating the in-core inode size after the unwritten extent
conversion. To do this, introduce a new boolean argument to
xfs_iomap_write_unwritten() to tell if we want to update in-core
i_size or not.
Suggested-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Eryu Guan <eguan@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r-- | fs/xfs/xfs_aops.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 33 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_pnfs.c | 2 |
5 files changed, 28 insertions, 19 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 29172609f2a3..f18e5932aec4 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -343,7 +343,8 @@ xfs_end_io( | |||
343 | error = xfs_reflink_end_cow(ip, offset, size); | 343 | error = xfs_reflink_end_cow(ip, offset, size); |
344 | break; | 344 | break; |
345 | case XFS_IO_UNWRITTEN: | 345 | case XFS_IO_UNWRITTEN: |
346 | error = xfs_iomap_write_unwritten(ip, offset, size); | 346 | /* writeback should never update isize */ |
347 | error = xfs_iomap_write_unwritten(ip, offset, size, false); | ||
347 | break; | 348 | break; |
348 | default: | 349 | default: |
349 | ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); | 350 | ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 350b6d43ba23..309e26c9dddb 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -434,7 +434,6 @@ xfs_dio_write_end_io( | |||
434 | struct inode *inode = file_inode(iocb->ki_filp); | 434 | struct inode *inode = file_inode(iocb->ki_filp); |
435 | struct xfs_inode *ip = XFS_I(inode); | 435 | struct xfs_inode *ip = XFS_I(inode); |
436 | loff_t offset = iocb->ki_pos; | 436 | loff_t offset = iocb->ki_pos; |
437 | bool update_size = false; | ||
438 | int error = 0; | 437 | int error = 0; |
439 | 438 | ||
440 | trace_xfs_end_io_direct_write(ip, offset, size); | 439 | trace_xfs_end_io_direct_write(ip, offset, size); |
@@ -445,6 +444,21 @@ xfs_dio_write_end_io( | |||
445 | if (size <= 0) | 444 | if (size <= 0) |
446 | return size; | 445 | return size; |
447 | 446 | ||
447 | if (flags & IOMAP_DIO_COW) { | ||
448 | error = xfs_reflink_end_cow(ip, offset, size); | ||
449 | if (error) | ||
450 | return error; | ||
451 | } | ||
452 | |||
453 | /* | ||
454 | * Unwritten conversion updates the in-core isize after extent | ||
455 | * conversion but before updating the on-disk size. Updating isize any | ||
456 | * earlier allows a racing dio read to find unwritten extents before | ||
457 | * they are converted. | ||
458 | */ | ||
459 | if (flags & IOMAP_DIO_UNWRITTEN) | ||
460 | return xfs_iomap_write_unwritten(ip, offset, size, true); | ||
461 | |||
448 | /* | 462 | /* |
449 | * We need to update the in-core inode size here so that we don't end up | 463 | * We need to update the in-core inode size here so that we don't end up |
450 | * with the on-disk inode size being outside the in-core inode size. We | 464 | * with the on-disk inode size being outside the in-core inode size. We |
@@ -459,20 +473,11 @@ xfs_dio_write_end_io( | |||
459 | spin_lock(&ip->i_flags_lock); | 473 | spin_lock(&ip->i_flags_lock); |
460 | if (offset + size > i_size_read(inode)) { | 474 | if (offset + size > i_size_read(inode)) { |
461 | i_size_write(inode, offset + size); | 475 | i_size_write(inode, offset + size); |
462 | update_size = true; | 476 | spin_unlock(&ip->i_flags_lock); |
463 | } | ||
464 | spin_unlock(&ip->i_flags_lock); | ||
465 | |||
466 | if (flags & IOMAP_DIO_COW) { | ||
467 | error = xfs_reflink_end_cow(ip, offset, size); | ||
468 | if (error) | ||
469 | return error; | ||
470 | } | ||
471 | |||
472 | if (flags & IOMAP_DIO_UNWRITTEN) | ||
473 | error = xfs_iomap_write_unwritten(ip, offset, size); | ||
474 | else if (update_size) | ||
475 | error = xfs_setfilesize(ip, offset, size); | 477 | error = xfs_setfilesize(ip, offset, size); |
478 | } else { | ||
479 | spin_unlock(&ip->i_flags_lock); | ||
480 | } | ||
476 | 481 | ||
477 | return error; | 482 | return error; |
478 | } | 483 | } |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index a1909bc064e9..f179bdf1644d 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -829,7 +829,8 @@ int | |||
829 | xfs_iomap_write_unwritten( | 829 | xfs_iomap_write_unwritten( |
830 | xfs_inode_t *ip, | 830 | xfs_inode_t *ip, |
831 | xfs_off_t offset, | 831 | xfs_off_t offset, |
832 | xfs_off_t count) | 832 | xfs_off_t count, |
833 | bool update_isize) | ||
833 | { | 834 | { |
834 | xfs_mount_t *mp = ip->i_mount; | 835 | xfs_mount_t *mp = ip->i_mount; |
835 | xfs_fileoff_t offset_fsb; | 836 | xfs_fileoff_t offset_fsb; |
@@ -840,6 +841,7 @@ xfs_iomap_write_unwritten( | |||
840 | xfs_trans_t *tp; | 841 | xfs_trans_t *tp; |
841 | xfs_bmbt_irec_t imap; | 842 | xfs_bmbt_irec_t imap; |
842 | struct xfs_defer_ops dfops; | 843 | struct xfs_defer_ops dfops; |
844 | struct inode *inode = VFS_I(ip); | ||
843 | xfs_fsize_t i_size; | 845 | xfs_fsize_t i_size; |
844 | uint resblks; | 846 | uint resblks; |
845 | int error; | 847 | int error; |
@@ -899,7 +901,8 @@ xfs_iomap_write_unwritten( | |||
899 | i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); | 901 | i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); |
900 | if (i_size > offset + count) | 902 | if (i_size > offset + count) |
901 | i_size = offset + count; | 903 | i_size = offset + count; |
902 | 904 | if (update_isize && i_size > i_size_read(inode)) | |
905 | i_size_write(inode, i_size); | ||
903 | i_size = xfs_new_eof(ip, i_size); | 906 | i_size = xfs_new_eof(ip, i_size); |
904 | if (i_size) { | 907 | if (i_size) { |
905 | ip->i_d.di_size = i_size; | 908 | ip->i_d.di_size = i_size; |
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 00db3ecea084..ee535065c5d0 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h | |||
@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, | |||
27 | struct xfs_bmbt_irec *, int); | 27 | struct xfs_bmbt_irec *, int); |
28 | int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t, | 28 | int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t, |
29 | struct xfs_bmbt_irec *); | 29 | struct xfs_bmbt_irec *); |
30 | int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); | 30 | int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); |
31 | 31 | ||
32 | void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, | 32 | void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, |
33 | struct xfs_bmbt_irec *); | 33 | struct xfs_bmbt_irec *); |
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 2f2dc3c09ad0..4246876df7b7 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c | |||
@@ -274,7 +274,7 @@ xfs_fs_commit_blocks( | |||
274 | (end - 1) >> PAGE_SHIFT); | 274 | (end - 1) >> PAGE_SHIFT); |
275 | WARN_ON_ONCE(error); | 275 | WARN_ON_ONCE(error); |
276 | 276 | ||
277 | error = xfs_iomap_write_unwritten(ip, start, length); | 277 | error = xfs_iomap_write_unwritten(ip, start, length, false); |
278 | if (error) | 278 | if (error) |
279 | goto out_drop_iolock; | 279 | goto out_drop_iolock; |
280 | } | 280 | } |