aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2015-11-02 20:27:22 -0500
committerDave Chinner <david@fromorbit.com>2015-11-02 20:27:22 -0500
commit3e12dbbdbd8809f0455920e42fdbf9eddc002651 (patch)
tree2c9a717a37830f88d7daa544c01447b0a198c11d
parent1f93e4a96c9109378204c147b3eec0d0e8100fde (diff)
xfs: fix inode size update overflow in xfs_map_direct()
Both direct IO and DAX pass an offset and count into get_blocks that will overflow a s64 variable when an IO goes into the last supported block in a file (i.e. at offset 2^63 - 1FSB bytes). This can be seen from the tracing: xfs_get_blocks_alloc: [...] offset 0x7ffffffffffff000 count 4096 xfs_gbmap_direct: [...] offset 0x7ffffffffffff000 count 4096 xfs_gbmap_direct_none:[...] offset 0x7ffffffffffff000 count 4096 0x7ffffffffffff000 + 4096 = 0x8000000000000000, and hence that overflows the s64 offset and we fail to detect the need for a filesize update and an ioend is not allocated. This is *mostly* avoided for direct IO because such extending IOs occur with full block allocation, and so the "IS_UNWRITTEN()" check still evaluates as true and we get an ioend that way. However, doing single sector extending IOs to this last block will expose the fact that file size updates will not occur after the first allocating direct IO as the overflow will then be exposed. There is one further complexity: the DAX page fault path also exposes the same issue in block allocation. However, page faults cannot extend the file size, so in this case we want to allocate the block but do not want to allocate an ioend to enable file size update at IO completion. Hence we now need to distinguish between the direct IO patch allocation and dax fault path allocation to avoid leaking ioend structures. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
-rw-r--r--fs/xfs/xfs_aops.c50
-rw-r--r--fs/xfs/xfs_aops.h2
-rw-r--r--fs/xfs/xfs_file.c6
3 files changed, 49 insertions, 9 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 50ab2879b9da..e747d6ad5d18 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1250,13 +1250,28 @@ xfs_vm_releasepage(
1250 * the DIO. There is only going to be one reference to the ioend and its life 1250 * the DIO. There is only going to be one reference to the ioend and its life
1251 * cycle is constrained by the DIO completion code. hence we don't need 1251 * cycle is constrained by the DIO completion code. hence we don't need
1252 * reference counting here. 1252 * reference counting here.
1253 *
1254 * Note that for DIO, an IO to the highest supported file block offset (i.e.
1255 * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
1256 * bit variable. Hence if we see this overflow, we have to assume that the IO is
1257 * extending the file size. We won't know for sure until IO completion is run
1258 * and the actual max write offset is communicated to the IO completion
1259 * routine.
1260 *
1261 * For DAX page faults, we are preparing to never see unwritten extents here,
1262 * nor should we ever extend the inode size. Hence we will soon have nothing to
1263 * do here for this case, ensuring we don't have to provide an IO completion
1264 * callback to free an ioend that we don't actually need for a fault into the
1265 * page at offset (2^63 - 1FSB) bytes.
1253 */ 1266 */
1267
1254static void 1268static void
1255xfs_map_direct( 1269xfs_map_direct(
1256 struct inode *inode, 1270 struct inode *inode,
1257 struct buffer_head *bh_result, 1271 struct buffer_head *bh_result,
1258 struct xfs_bmbt_irec *imap, 1272 struct xfs_bmbt_irec *imap,
1259 xfs_off_t offset) 1273 xfs_off_t offset,
1274 bool dax_fault)
1260{ 1275{
1261 struct xfs_ioend *ioend; 1276 struct xfs_ioend *ioend;
1262 xfs_off_t size = bh_result->b_size; 1277 xfs_off_t size = bh_result->b_size;
@@ -1269,6 +1284,16 @@ xfs_map_direct(
1269 1284
1270 trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap); 1285 trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
1271 1286
1287 /* XXX: preparation for removing unwritten extents in DAX */
1288#if 0
1289 if (dax_fault) {
1290 ASSERT(type == XFS_IO_OVERWRITE);
1291 trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
1292 imap);
1293 return;
1294 }
1295#endif
1296
1272 if (bh_result->b_private) { 1297 if (bh_result->b_private) {
1273 ioend = bh_result->b_private; 1298 ioend = bh_result->b_private;
1274 ASSERT(ioend->io_size > 0); 1299 ASSERT(ioend->io_size > 0);
@@ -1283,7 +1308,8 @@ xfs_map_direct(
1283 ioend->io_size, ioend->io_type, 1308 ioend->io_size, ioend->io_type,
1284 imap); 1309 imap);
1285 } else if (type == XFS_IO_UNWRITTEN || 1310 } else if (type == XFS_IO_UNWRITTEN ||
1286 offset + size > i_size_read(inode)) { 1311 offset + size > i_size_read(inode) ||
1312 offset + size < 0) {
1287 ioend = xfs_alloc_ioend(inode, type); 1313 ioend = xfs_alloc_ioend(inode, type);
1288 ioend->io_offset = offset; 1314 ioend->io_offset = offset;
1289 ioend->io_size = size; 1315 ioend->io_size = size;
@@ -1345,7 +1371,8 @@ __xfs_get_blocks(
1345 sector_t iblock, 1371 sector_t iblock,
1346 struct buffer_head *bh_result, 1372 struct buffer_head *bh_result,
1347 int create, 1373 int create,
1348 bool direct) 1374 bool direct,
1375 bool dax_fault)
1349{ 1376{
1350 struct xfs_inode *ip = XFS_I(inode); 1377 struct xfs_inode *ip = XFS_I(inode);
1351 struct xfs_mount *mp = ip->i_mount; 1378 struct xfs_mount *mp = ip->i_mount;
@@ -1458,7 +1485,8 @@ __xfs_get_blocks(
1458 set_buffer_unwritten(bh_result); 1485 set_buffer_unwritten(bh_result);
1459 /* direct IO needs special help */ 1486 /* direct IO needs special help */
1460 if (create && direct) 1487 if (create && direct)
1461 xfs_map_direct(inode, bh_result, &imap, offset); 1488 xfs_map_direct(inode, bh_result, &imap, offset,
1489 dax_fault);
1462 } 1490 }
1463 1491
1464 /* 1492 /*
@@ -1505,7 +1533,7 @@ xfs_get_blocks(
1505 struct buffer_head *bh_result, 1533 struct buffer_head *bh_result,
1506 int create) 1534 int create)
1507{ 1535{
1508 return __xfs_get_blocks(inode, iblock, bh_result, create, false); 1536 return __xfs_get_blocks(inode, iblock, bh_result, create, false, false);
1509} 1537}
1510 1538
1511int 1539int
@@ -1515,7 +1543,17 @@ xfs_get_blocks_direct(
1515 struct buffer_head *bh_result, 1543 struct buffer_head *bh_result,
1516 int create) 1544 int create)
1517{ 1545{
1518 return __xfs_get_blocks(inode, iblock, bh_result, create, true); 1546 return __xfs_get_blocks(inode, iblock, bh_result, create, true, false);
1547}
1548
1549int
1550xfs_get_blocks_dax_fault(
1551 struct inode *inode,
1552 sector_t iblock,
1553 struct buffer_head *bh_result,
1554 int create)
1555{
1556 return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
1519} 1557}
1520 1558
1521static void 1559static void
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 86afd1ac7895..d39ba25ccc98 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -58,6 +58,8 @@ int xfs_get_blocks(struct inode *inode, sector_t offset,
58 struct buffer_head *map_bh, int create); 58 struct buffer_head *map_bh, int create);
59int xfs_get_blocks_direct(struct inode *inode, sector_t offset, 59int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
60 struct buffer_head *map_bh, int create); 60 struct buffer_head *map_bh, int create);
61int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
62 struct buffer_head *map_bh, int create);
61void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate); 63void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate);
62 64
63extern void xfs_count_page_state(struct page *, int *, int *); 65extern void xfs_count_page_state(struct page *, int *, int *);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e78feb400e22..27abe1c92184 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1503,7 +1503,7 @@ xfs_filemap_page_mkwrite(
1503 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1503 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1504 1504
1505 if (IS_DAX(inode)) { 1505 if (IS_DAX(inode)) {
1506 ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct, 1506 ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault,
1507 xfs_end_io_dax_write); 1507 xfs_end_io_dax_write);
1508 } else { 1508 } else {
1509 ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks); 1509 ret = __block_page_mkwrite(vma, vmf, xfs_get_blocks);
@@ -1538,7 +1538,7 @@ xfs_filemap_fault(
1538 * changes to xfs_get_blocks_direct() to map unwritten extent 1538 * changes to xfs_get_blocks_direct() to map unwritten extent
1539 * ioend for conversion on read-only mappings. 1539 * ioend for conversion on read-only mappings.
1540 */ 1540 */
1541 ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL); 1541 ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault, NULL);
1542 } else 1542 } else
1543 ret = filemap_fault(vma, vmf); 1543 ret = filemap_fault(vma, vmf);
1544 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1544 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
@@ -1565,7 +1565,7 @@ xfs_filemap_pmd_fault(
1565 sb_start_pagefault(inode->i_sb); 1565 sb_start_pagefault(inode->i_sb);
1566 file_update_time(vma->vm_file); 1566 file_update_time(vma->vm_file);
1567 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1567 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1568 ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_direct, 1568 ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault,
1569 xfs_end_io_dax_write); 1569 xfs_end_io_dax_write);
1570 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1570 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1571 sb_end_pagefault(inode->i_sb); 1571 sb_end_pagefault(inode->i_sb);