aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2015-04-16 07:59:34 -0400
committerDave Chinner <david@fromorbit.com>2015-04-16 07:59:34 -0400
commit6dfa1b67e3b3a9bf536e2fb9ed99001c219822a5 (patch)
tree142857ff25d5d43393bdbc9c1ff18bffba4d7257 /fs/xfs
parentd5cc2e3f968ff60f247fdef15b04fac788ef46d2 (diff)
xfs: handle DIO overwrite EOF update completion correctly
Currently a DIO overwrite that extends the EOF (e.g sub-block IO or write into allocated blocks beyond EOF) requires a transaction for the EOF update. Thi is done in IO completion context, but we aren't explicitly handling this situation properly and so it can run in interrupt context. Ensure that we defer IO that spans EOF correctly to the DIO completion workqueue, and now that we have an ioend in IO completion we can use the common ioend completion path to do all the work. Note: we do not preallocate the append transaction as we can have multiple mapping and allocation calls per direct IO. hence preallocating can still leave us with nested transactions by attempting to map and allocate more blocks after we've preallocated an append transaction. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_aops.c61
-rw-r--r--fs/xfs/xfs_trace.h1
2 files changed, 31 insertions, 31 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 60d6466d72f6..a59443db1de9 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1293,7 +1293,7 @@ xfs_map_direct(
1293 imap); 1293 imap);
1294 } 1294 }
1295 1295
1296 if (ioend->io_type == XFS_IO_UNWRITTEN) 1296 if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend))
1297 set_buffer_defer_completion(bh_result); 1297 set_buffer_defer_completion(bh_result);
1298} 1298}
1299 1299
@@ -1535,8 +1535,10 @@ xfs_end_io_direct_write(
1535 struct xfs_mount *mp = ip->i_mount; 1535 struct xfs_mount *mp = ip->i_mount;
1536 struct xfs_ioend *ioend = private; 1536 struct xfs_ioend *ioend = private;
1537 1537
1538 trace_xfs_gbmap_direct_endio(ip, offset, size, ioend->io_type, NULL);
1539
1538 if (XFS_FORCED_SHUTDOWN(mp)) 1540 if (XFS_FORCED_SHUTDOWN(mp))
1539 goto out_destroy_ioend; 1541 goto out_end_io;
1540 1542
1541 /* 1543 /*
1542 * dio completion end_io functions are only called on writes if more 1544 * dio completion end_io functions are only called on writes if more
@@ -1557,40 +1559,37 @@ xfs_end_io_direct_write(
1557 ioend->io_offset = offset; 1559 ioend->io_offset = offset;
1558 1560
1559 /* 1561 /*
1560 * While the generic direct I/O code updates the inode size, it does 1562 * The ioend tells us whether we are doing unwritten extent conversion
1561 * so only after the end_io handler is called, which means our 1563 * or an append transaction that updates the on-disk file size. These
1562 * end_io handler thinks the on-disk size is outside the in-core 1564 * cases are the only cases where we should *potentially* be needing
1563 * size. To prevent this just update it a little bit earlier here. 1565 * to update the VFS inode size. When the ioend indicates this, we
1566 * are *guaranteed* to be running in non-interrupt context.
1567 *
1568 * We need to update the in-core inode size here so that we don't end up
1569 * with the on-disk inode size being outside the in-core inode size.
1570 * While we can do this in the process context after the IO has
1571 * completed, this does not work for AIO and hence we always update
1572 * the in-core inode size here if necessary.
1564 */ 1573 */
1565 if (offset + size > i_size_read(inode)) 1574 if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend)) {
1566 i_size_write(inode, offset + size); 1575 if (offset + size > i_size_read(inode))
1576 i_size_write(inode, offset + size);
1577 } else
1578 ASSERT(offset + size <= i_size_read(inode));
1567 1579
1568 /* 1580 /*
1569 * For direct I/O we do not know if we need to allocate blocks or not, 1581 * If we are doing an append IO that needs to update the EOF on disk,
1570 * so we can't preallocate an append transaction, as that results in 1582 * do the transaction reserve now so we can use common end io
1571 * nested reservations and log space deadlocks. Hence allocate the 1583 * processing. Stashing the error (if there is one) in the ioend will
1572 * transaction here. While this is sub-optimal and can block IO 1584 * result in the ioend processing passing on the error if it is
1573 * completion for some time, we're stuck with doing it this way until 1585 * possible as we can't return it from here.
1574 * we can pass the ioend to the direct IO allocation callbacks and
1575 * avoid nesting that way.
1576 */ 1586 */
1577 if (ioend->io_type == XFS_IO_UNWRITTEN) { 1587 if (ioend->io_type == XFS_IO_OVERWRITE && xfs_ioend_is_append(ioend))
1578 xfs_iomap_write_unwritten(ip, offset, size); 1588 ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
1579 } else if (offset + size > ip->i_d.di_size) {
1580 struct xfs_trans *tp;
1581 int error;
1582
1583 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
1584 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
1585 if (error) {
1586 xfs_trans_cancel(tp, 0);
1587 goto out_destroy_ioend;
1588 }
1589 1589
1590 xfs_setfilesize(ip, tp, offset, size); 1590out_end_io:
1591 } 1591 xfs_end_io(&ioend->io_work);
1592out_destroy_ioend: 1592 return;
1593 xfs_destroy_ioend(ioend);
1594} 1593}
1595 1594
1596STATIC ssize_t 1595STATIC ssize_t
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2de8556ffac2..0ae50e9847bb 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1220,6 +1220,7 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
1220DEFINE_IOMAP_EVENT(xfs_gbmap_direct); 1220DEFINE_IOMAP_EVENT(xfs_gbmap_direct);
1221DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new); 1221DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
1222DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update); 1222DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
1223DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
1223 1224
1224DECLARE_EVENT_CLASS(xfs_simple_io_class, 1225DECLARE_EVENT_CLASS(xfs_simple_io_class,
1225 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), 1226 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),