aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2015-04-16 08:00:00 -0400
committerDave Chinner <david@fromorbit.com>2015-04-16 08:00:00 -0400
commita06c277a13c3620c8ee9304891758f2fcff9c4a4 (patch)
tree4ab229dc91e9876a5a42df21f51b61c5977b7735 /fs/xfs
parent6dfa1b67e3b3a9bf536e2fb9ed99001c219822a5 (diff)
xfs: DIO writes within EOF don't need an ioend
DIO writes that lie entirely within EOF have nothing to do in IO completion. In this case, we don't need no steekin' ioend, and so we can avoid allocating an ioend until we have a mapping that spans EOF. This means that IO completion has two contexts - deferred completion to the dio workqueue that uses an ioend, and interrupt completion that does nothing because there is nothing that can be done in this context. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_aops.c69
-rw-r--r--fs/xfs/xfs_trace.h1
2 files changed, 40 insertions, 30 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a59443db1de9..c02a47453137 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1234,15 +1234,19 @@ xfs_vm_releasepage(
1234} 1234}
1235 1235
1236/* 1236/*
1237 * When we map a DIO buffer, we need to attach an ioend that describes the type 1237 * When we map a DIO buffer, we may need to attach an ioend that describes the
1238 * of write IO we are doing. This passes to the completion function the 1238 * type of write IO we are doing. This passes to the completion function the
1239 * operations it needs to perform. 1239 * operations it needs to perform. If the mapping is for an overwrite wholly
1240 * within the EOF then we don't need an ioend and so we don't allocate one.
1241 * This avoids the unnecessary overhead of allocating and freeing ioends for
1242 * workloads that don't require transactions on IO completion.
1240 * 1243 *
1241 * If we get multiple mappings in a single IO, we might be mapping different 1244 * If we get multiple mappings in a single IO, we might be mapping different
1242 * types. But because the direct IO can only have a single private pointer, we 1245 * types. But because the direct IO can only have a single private pointer, we
1243 * need to ensure that: 1246 * need to ensure that:
1244 * 1247 *
1245 * a) the ioend spans the entire region of the IO; and 1248 * a) i) the ioend spans the entire region of unwritten mappings; or
1249 * ii) the ioend spans all the mappings that cross or are beyond EOF; and
1246 * b) if it contains unwritten extents, it is *permanently* marked as such 1250 * b) if it contains unwritten extents, it is *permanently* marked as such
1247 * 1251 *
1248 * We could do this by chaining ioends like buffered IO does, but we only 1252 * We could do this by chaining ioends like buffered IO does, but we only
@@ -1283,21 +1287,23 @@ xfs_map_direct(
1283 trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset, 1287 trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
1284 ioend->io_size, ioend->io_type, 1288 ioend->io_size, ioend->io_type,
1285 imap); 1289 imap);
1286 } else { 1290 } else if (type == XFS_IO_UNWRITTEN ||
1291 offset + size > i_size_read(inode)) {
1287 ioend = xfs_alloc_ioend(inode, type); 1292 ioend = xfs_alloc_ioend(inode, type);
1288 ioend->io_offset = offset; 1293 ioend->io_offset = offset;
1289 ioend->io_size = size; 1294 ioend->io_size = size;
1295
1290 bh_result->b_private = ioend; 1296 bh_result->b_private = ioend;
1297 set_buffer_defer_completion(bh_result);
1291 1298
1292 trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type, 1299 trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
1293 imap); 1300 imap);
1301 } else {
1302 trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
1303 imap);
1294 } 1304 }
1295
1296 if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend))
1297 set_buffer_defer_completion(bh_result);
1298} 1305}
1299 1306
1300
1301/* 1307/*
1302 * If this is O_DIRECT or the mpage code calling tell them how large the mapping 1308 * If this is O_DIRECT or the mpage code calling tell them how large the mapping
1303 * is, so that we can avoid repeated get_blocks calls. 1309 * is, so that we can avoid repeated get_blocks calls.
@@ -1519,9 +1525,11 @@ xfs_get_blocks_direct(
1519/* 1525/*
1520 * Complete a direct I/O write request. 1526 * Complete a direct I/O write request.
1521 * 1527 *
1522 * If the private argument is non-NULL __xfs_get_blocks signals us that we 1528 * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
1523 * need to issue a transaction to convert the range from unwritten to written 1529 * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
1524 * extents. 1530 * wholly within the EOF and so there is nothing for us to do. Note that in this
1531 * case the completion can be called in interrupt context, whereas if we have an
1532 * ioend we will always be called in task context (i.e. from a workqueue).
1525 */ 1533 */
1526STATIC void 1534STATIC void
1527xfs_end_io_direct_write( 1535xfs_end_io_direct_write(
@@ -1535,7 +1543,13 @@ xfs_end_io_direct_write(
1535 struct xfs_mount *mp = ip->i_mount; 1543 struct xfs_mount *mp = ip->i_mount;
1536 struct xfs_ioend *ioend = private; 1544 struct xfs_ioend *ioend = private;
1537 1545
1538 trace_xfs_gbmap_direct_endio(ip, offset, size, ioend->io_type, NULL); 1546 trace_xfs_gbmap_direct_endio(ip, offset, size,
1547 ioend ? ioend->io_type : 0, NULL);
1548
1549 if (!ioend) {
1550 ASSERT(offset + size <= i_size_read(inode));
1551 return;
1552 }
1539 1553
1540 if (XFS_FORCED_SHUTDOWN(mp)) 1554 if (XFS_FORCED_SHUTDOWN(mp))
1541 goto out_end_io; 1555 goto out_end_io;
@@ -1548,12 +1562,12 @@ xfs_end_io_direct_write(
1548 1562
1549 /* 1563 /*
1550 * The ioend only maps whole blocks, while the IO may be sector aligned. 1564 * The ioend only maps whole blocks, while the IO may be sector aligned.
1551 * Hence the ioend offset/size may not match the IO offset/size exactly, 1565 * Hence the ioend offset/size may not match the IO offset/size exactly.
1552 * but should span it completely. Write the IO sizes into the ioend so 1566 * Because we don't map overwrites within EOF into the ioend, the offset
1553 * that completion processing does the right thing. 1567 * may not match, but only if the endio spans EOF. Either way, write
1568 * the IO sizes into the ioend so that completion processing does the
1569 * right thing.
1554 */ 1570 */
1555 ASSERT(size <= ioend->io_size);
1556 ASSERT(offset >= ioend->io_offset);
1557 ASSERT(offset + size <= ioend->io_offset + ioend->io_size); 1571 ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
1558 ioend->io_size = size; 1572 ioend->io_size = size;
1559 ioend->io_offset = offset; 1573 ioend->io_offset = offset;
@@ -1562,20 +1576,15 @@ xfs_end_io_direct_write(
1562 * The ioend tells us whether we are doing unwritten extent conversion 1576 * The ioend tells us whether we are doing unwritten extent conversion
1563 * or an append transaction that updates the on-disk file size. These 1577 * or an append transaction that updates the on-disk file size. These
1564 * cases are the only cases where we should *potentially* be needing 1578 * cases are the only cases where we should *potentially* be needing
1565 * to update the VFS inode size. When the ioend indicates this, we 1579 * to update the VFS inode size.
1566 * are *guaranteed* to be running in non-interrupt context.
1567 * 1580 *
1568 * We need to update the in-core inode size here so that we don't end up 1581 * We need to update the in-core inode size here so that we don't end up
1569 * with the on-disk inode size being outside the in-core inode size. 1582 * with the on-disk inode size being outside the in-core inode size. We
1570 * While we can do this in the process context after the IO has 1583 * have no other method of updating EOF for AIO, so always do it here
1571 * completed, this does not work for AIO and hence we always update 1584 * if necessary.
1572 * the in-core inode size here if necessary.
1573 */ 1585 */
1574 if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend)) { 1586 if (offset + size > i_size_read(inode))
1575 if (offset + size > i_size_read(inode)) 1587 i_size_write(inode, offset + size);
1576 i_size_write(inode, offset + size);
1577 } else
1578 ASSERT(offset + size <= i_size_read(inode));
1579 1588
1580 /* 1589 /*
1581 * If we are doing an append IO that needs to update the EOF on disk, 1590 * If we are doing an append IO that needs to update the EOF on disk,
@@ -1584,7 +1593,7 @@ xfs_end_io_direct_write(
1584 * result in the ioend processing passing on the error if it is 1593 * result in the ioend processing passing on the error if it is
1585 * possible as we can't return it from here. 1594 * possible as we can't return it from here.
1586 */ 1595 */
1587 if (ioend->io_type == XFS_IO_OVERWRITE && xfs_ioend_is_append(ioend)) 1596 if (ioend->io_type == XFS_IO_OVERWRITE)
1588 ioend->io_error = xfs_setfilesize_trans_alloc(ioend); 1597 ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
1589 1598
1590out_end_io: 1599out_end_io:
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 0ae50e9847bb..4e0a5773eee4 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1220,6 +1220,7 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
1220DEFINE_IOMAP_EVENT(xfs_gbmap_direct); 1220DEFINE_IOMAP_EVENT(xfs_gbmap_direct);
1221DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new); 1221DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
1222DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update); 1222DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
1223DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none);
1223DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio); 1224DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
1224 1225
1225DECLARE_EVENT_CLASS(xfs_simple_io_class, 1226DECLARE_EVENT_CLASS(xfs_simple_io_class,