aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2016-06-20 19:53:44 -0400
committerDave Chinner <david@fromorbit.com>2016-06-20 19:53:44 -0400
commit68a9f5e7007c1afa2cf6830b690a90d0187c0684 (patch)
tree986de78ca7e20e49604faecccc95685ce52b4090
parentf0c6bcba74ac51cb77aadb33ad35cb2dc1ad1506 (diff)
xfs: implement iomap based buffered write path
Convert XFS to use the new iomap based multipage write path. This involves implementing the ->iomap_begin and ->iomap_end methods, and switching the buffered file write, page_mkwrite and xfs_iozero paths to the new iomap helpers. With this change __xfs_get_blocks will never be used for buffered writes, and the code handling them can be removed. Based on earlier code from Dave Chinner. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Bob Peterson <rpeterso@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
-rw-r--r--fs/xfs/Kconfig1
-rw-r--r--fs/xfs/xfs_aops.c212
-rw-r--r--fs/xfs/xfs_file.c71
-rw-r--r--fs/xfs/xfs_iomap.c144
-rw-r--r--fs/xfs/xfs_iomap.h5
-rw-r--r--fs/xfs/xfs_iops.c9
-rw-r--r--fs/xfs/xfs_trace.h3
7 files changed, 187 insertions, 258 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 5d47b4df61ea..35faf128f36d 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -4,6 +4,7 @@ config XFS_FS
4 depends on (64BIT || LBDAF) 4 depends on (64BIT || LBDAF)
5 select EXPORTFS 5 select EXPORTFS
6 select LIBCRC32C 6 select LIBCRC32C
7 select FS_IOMAP
7 help 8 help
8 XFS is a high performance journaling filesystem which originated 9 XFS is a high performance journaling filesystem which originated
9 on the SGI IRIX platform. It is completely multi-threaded, can 10 on the SGI IRIX platform. It is completely multi-threaded, can
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4c463b99fe57..2ac9f7e5f504 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1427,216 +1427,6 @@ xfs_vm_direct_IO(
1427 xfs_get_blocks_direct, endio, NULL, flags); 1427 xfs_get_blocks_direct, endio, NULL, flags);
1428} 1428}
1429 1429
1430/*
1431 * Punch out the delalloc blocks we have already allocated.
1432 *
1433 * Don't bother with xfs_setattr given that nothing can have made it to disk yet
1434 * as the page is still locked at this point.
1435 */
1436STATIC void
1437xfs_vm_kill_delalloc_range(
1438 struct inode *inode,
1439 loff_t start,
1440 loff_t end)
1441{
1442 struct xfs_inode *ip = XFS_I(inode);
1443 xfs_fileoff_t start_fsb;
1444 xfs_fileoff_t end_fsb;
1445 int error;
1446
1447 start_fsb = XFS_B_TO_FSB(ip->i_mount, start);
1448 end_fsb = XFS_B_TO_FSB(ip->i_mount, end);
1449 if (end_fsb <= start_fsb)
1450 return;
1451
1452 xfs_ilock(ip, XFS_ILOCK_EXCL);
1453 error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
1454 end_fsb - start_fsb);
1455 if (error) {
1456 /* something screwed, just bail */
1457 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1458 xfs_alert(ip->i_mount,
1459 "xfs_vm_write_failed: unable to clean up ino %lld",
1460 ip->i_ino);
1461 }
1462 }
1463 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1464}
1465
1466STATIC void
1467xfs_vm_write_failed(
1468 struct inode *inode,
1469 struct page *page,
1470 loff_t pos,
1471 unsigned len)
1472{
1473 loff_t block_offset;
1474 loff_t block_start;
1475 loff_t block_end;
1476 loff_t from = pos & (PAGE_SIZE - 1);
1477 loff_t to = from + len;
1478 struct buffer_head *bh, *head;
1479 struct xfs_mount *mp = XFS_I(inode)->i_mount;
1480
1481 /*
1482 * The request pos offset might be 32 or 64 bit, this is all fine
1483 * on 64-bit platform. However, for 64-bit pos request on 32-bit
1484 * platform, the high 32-bit will be masked off if we evaluate the
1485 * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is
1486 * 0xfffff000 as an unsigned long, hence the result is incorrect
1487 * which could cause the following ASSERT failed in most cases.
1488 * In order to avoid this, we can evaluate the block_offset of the
1489 * start of the page by using shifts rather than masks the mismatch
1490 * problem.
1491 */
1492 block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT;
1493
1494 ASSERT(block_offset + from == pos);
1495
1496 head = page_buffers(page);
1497 block_start = 0;
1498 for (bh = head; bh != head || !block_start;
1499 bh = bh->b_this_page, block_start = block_end,
1500 block_offset += bh->b_size) {
1501 block_end = block_start + bh->b_size;
1502
1503 /* skip buffers before the write */
1504 if (block_end <= from)
1505 continue;
1506
1507 /* if the buffer is after the write, we're done */
1508 if (block_start >= to)
1509 break;
1510
1511 /*
1512 * Process delalloc and unwritten buffers beyond EOF. We can
1513 * encounter unwritten buffers in the event that a file has
1514 * post-EOF unwritten extents and an extending write happens to
1515 * fail (e.g., an unaligned write that also involves a delalloc
1516 * to the same page).
1517 */
1518 if (!buffer_delay(bh) && !buffer_unwritten(bh))
1519 continue;
1520
1521 if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
1522 block_offset < i_size_read(inode))
1523 continue;
1524
1525 if (buffer_delay(bh))
1526 xfs_vm_kill_delalloc_range(inode, block_offset,
1527 block_offset + bh->b_size);
1528
1529 /*
1530 * This buffer does not contain data anymore. make sure anyone
1531 * who finds it knows that for certain.
1532 */
1533 clear_buffer_delay(bh);
1534 clear_buffer_uptodate(bh);
1535 clear_buffer_mapped(bh);
1536 clear_buffer_new(bh);
1537 clear_buffer_dirty(bh);
1538 clear_buffer_unwritten(bh);
1539 }
1540
1541}
1542
1543/*
1544 * This used to call block_write_begin(), but it unlocks and releases the page
1545 * on error, and we need that page to be able to punch stale delalloc blocks out
1546 * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at
1547 * the appropriate point.
1548 */
1549STATIC int
1550xfs_vm_write_begin(
1551 struct file *file,
1552 struct address_space *mapping,
1553 loff_t pos,
1554 unsigned len,
1555 unsigned flags,
1556 struct page **pagep,
1557 void **fsdata)
1558{
1559 pgoff_t index = pos >> PAGE_SHIFT;
1560 struct page *page;
1561 int status;
1562 struct xfs_mount *mp = XFS_I(mapping->host)->i_mount;
1563
1564 ASSERT(len <= PAGE_SIZE);
1565
1566 page = grab_cache_page_write_begin(mapping, index, flags);
1567 if (!page)
1568 return -ENOMEM;
1569
1570 status = __block_write_begin(page, pos, len, xfs_get_blocks);
1571 if (xfs_mp_fail_writes(mp))
1572 status = -EIO;
1573 if (unlikely(status)) {
1574 struct inode *inode = mapping->host;
1575 size_t isize = i_size_read(inode);
1576
1577 xfs_vm_write_failed(inode, page, pos, len);
1578 unlock_page(page);
1579
1580 /*
1581 * If the write is beyond EOF, we only want to kill blocks
1582 * allocated in this write, not blocks that were previously
1583 * written successfully.
1584 */
1585 if (xfs_mp_fail_writes(mp))
1586 isize = 0;
1587 if (pos + len > isize) {
1588 ssize_t start = max_t(ssize_t, pos, isize);
1589
1590 truncate_pagecache_range(inode, start, pos + len);
1591 }
1592
1593 put_page(page);
1594 page = NULL;
1595 }
1596
1597 *pagep = page;
1598 return status;
1599}
1600
1601/*
1602 * On failure, we only need to kill delalloc blocks beyond EOF in the range of
1603 * this specific write because they will never be written. Previous writes
1604 * beyond EOF where block allocation succeeded do not need to be trashed, so
1605 * only new blocks from this write should be trashed. For blocks within
1606 * EOF, generic_write_end() zeros them so they are safe to leave alone and be
1607 * written with all the other valid data.
1608 */
1609STATIC int
1610xfs_vm_write_end(
1611 struct file *file,
1612 struct address_space *mapping,
1613 loff_t pos,
1614 unsigned len,
1615 unsigned copied,
1616 struct page *page,
1617 void *fsdata)
1618{
1619 int ret;
1620
1621 ASSERT(len <= PAGE_SIZE);
1622
1623 ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
1624 if (unlikely(ret < len)) {
1625 struct inode *inode = mapping->host;
1626 size_t isize = i_size_read(inode);
1627 loff_t to = pos + len;
1628
1629 if (to > isize) {
1630 /* only kill blocks in this write beyond EOF */
1631 if (pos > isize)
1632 isize = pos;
1633 xfs_vm_kill_delalloc_range(inode, isize, to);
1634 truncate_pagecache_range(inode, isize, to);
1635 }
1636 }
1637 return ret;
1638}
1639
1640STATIC sector_t 1430STATIC sector_t
1641xfs_vm_bmap( 1431xfs_vm_bmap(
1642 struct address_space *mapping, 1432 struct address_space *mapping,
@@ -1747,8 +1537,6 @@ const struct address_space_operations xfs_address_space_operations = {
1747 .set_page_dirty = xfs_vm_set_page_dirty, 1537 .set_page_dirty = xfs_vm_set_page_dirty,
1748 .releasepage = xfs_vm_releasepage, 1538 .releasepage = xfs_vm_releasepage,
1749 .invalidatepage = xfs_vm_invalidatepage, 1539 .invalidatepage = xfs_vm_invalidatepage,
1750 .write_begin = xfs_vm_write_begin,
1751 .write_end = xfs_vm_write_end,
1752 .bmap = xfs_vm_bmap, 1540 .bmap = xfs_vm_bmap,
1753 .direct_IO = xfs_vm_direct_IO, 1541 .direct_IO = xfs_vm_direct_IO,
1754 .migratepage = buffer_migrate_page, 1542 .migratepage = buffer_migrate_page,
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 47fc63295422..7316d3841c53 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -37,6 +37,7 @@
37#include "xfs_log.h" 37#include "xfs_log.h"
38#include "xfs_icache.h" 38#include "xfs_icache.h"
39#include "xfs_pnfs.h" 39#include "xfs_pnfs.h"
40#include "xfs_iomap.h"
40 41
41#include <linux/dcache.h> 42#include <linux/dcache.h>
42#include <linux/falloc.h> 43#include <linux/falloc.h>
@@ -79,57 +80,27 @@ xfs_rw_ilock_demote(
79 inode_unlock(VFS_I(ip)); 80 inode_unlock(VFS_I(ip));
80} 81}
81 82
82/* 83static int
83 * xfs_iozero clears the specified range supplied via the page cache (except in 84xfs_dax_zero_range(
84 * the DAX case). Writes through the page cache will allocate blocks over holes, 85 struct inode *inode,
85 * though the callers usually map the holes first and avoid them. If a block is 86 loff_t pos,
86 * not completely zeroed, then it will be read from disk before being partially 87 size_t count)
87 * zeroed.
88 *
89 * In the DAX case, we can just directly write to the underlying pages. This
90 * will not allocate blocks, but will avoid holes and unwritten extents and so
91 * not do unnecessary work.
92 */
93int
94xfs_iozero(
95 struct xfs_inode *ip, /* inode */
96 loff_t pos, /* offset in file */
97 size_t count) /* size of data to zero */
98{ 88{
99 struct page *page;
100 struct address_space *mapping;
101 int status = 0; 89 int status = 0;
102 90
103
104 mapping = VFS_I(ip)->i_mapping;
105 do { 91 do {
106 unsigned offset, bytes; 92 unsigned offset, bytes;
107 void *fsdata;
108 93
109 offset = (pos & (PAGE_SIZE -1)); /* Within page */ 94 offset = (pos & (PAGE_SIZE -1)); /* Within page */
110 bytes = PAGE_SIZE - offset; 95 bytes = PAGE_SIZE - offset;
111 if (bytes > count) 96 if (bytes > count)
112 bytes = count; 97 bytes = count;
113 98
114 if (IS_DAX(VFS_I(ip))) { 99 status = dax_zero_page_range(inode, pos, bytes,
115 status = dax_zero_page_range(VFS_I(ip), pos, bytes, 100 xfs_get_blocks_direct);
116 xfs_get_blocks_direct); 101 if (status)
117 if (status) 102 break;
118 break;
119 } else {
120 status = pagecache_write_begin(NULL, mapping, pos, bytes,
121 AOP_FLAG_UNINTERRUPTIBLE,
122 &page, &fsdata);
123 if (status)
124 break;
125
126 zero_user(page, offset, bytes);
127 103
128 status = pagecache_write_end(NULL, mapping, pos, bytes,
129 bytes, page, fsdata);
130 WARN_ON(status <= 0); /* can't return less than zero! */
131 status = 0;
132 }
133 pos += bytes; 104 pos += bytes;
134 count -= bytes; 105 count -= bytes;
135 } while (count); 106 } while (count);
@@ -137,6 +108,24 @@ xfs_iozero(
137 return status; 108 return status;
138} 109}
139 110
111/*
112 * Clear the specified ranges to zero through either the pagecache or DAX.
113 * Holes and unwritten extents will be left as-is as they already are zeroed.
114 */
115int
116xfs_iozero(
117 struct xfs_inode *ip,
118 loff_t pos,
119 size_t count)
120{
121 struct inode *inode = VFS_I(ip);
122
123 if (IS_DAX(VFS_I(ip)))
124 return xfs_dax_zero_range(inode, pos, count);
125 else
126 return iomap_zero_range(inode, pos, count, NULL, &xfs_iomap_ops);
127}
128
140int 129int
141xfs_update_prealloc_flags( 130xfs_update_prealloc_flags(
142 struct xfs_inode *ip, 131 struct xfs_inode *ip,
@@ -841,7 +830,7 @@ xfs_file_buffered_aio_write(
841write_retry: 830write_retry:
842 trace_xfs_file_buffered_write(ip, iov_iter_count(from), 831 trace_xfs_file_buffered_write(ip, iov_iter_count(from),
843 iocb->ki_pos, 0); 832 iocb->ki_pos, 0);
844 ret = generic_perform_write(file, from, iocb->ki_pos); 833 ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
845 if (likely(ret >= 0)) 834 if (likely(ret >= 0))
846 iocb->ki_pos += ret; 835 iocb->ki_pos += ret;
847 836
@@ -1553,7 +1542,7 @@ xfs_filemap_page_mkwrite(
1553 if (IS_DAX(inode)) { 1542 if (IS_DAX(inode)) {
1554 ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault); 1543 ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
1555 } else { 1544 } else {
1556 ret = block_page_mkwrite(vma, vmf, xfs_get_blocks); 1545 ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
1557 ret = block_page_mkwrite_return(ret); 1546 ret = block_page_mkwrite_return(ret);
1558 } 1547 }
1559 1548
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 2f3719461cbd..620fc9120444 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -967,3 +967,147 @@ xfs_bmbt_to_iomap(
967 iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); 967 iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
968 iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); 968 iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
969} 969}
970
971static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps)
972{
973 return !nimaps ||
974 imap->br_startblock == HOLESTARTBLOCK ||
975 imap->br_startblock == DELAYSTARTBLOCK;
976}
977
978static int
979xfs_file_iomap_begin(
980 struct inode *inode,
981 loff_t offset,
982 loff_t length,
983 unsigned flags,
984 struct iomap *iomap)
985{
986 struct xfs_inode *ip = XFS_I(inode);
987 struct xfs_mount *mp = ip->i_mount;
988 struct xfs_bmbt_irec imap;
989 xfs_fileoff_t offset_fsb, end_fsb;
990 int nimaps = 1, error = 0;
991
992 if (XFS_FORCED_SHUTDOWN(mp))
993 return -EIO;
994
995 xfs_ilock(ip, XFS_ILOCK_EXCL);
996
997 ASSERT(offset <= mp->m_super->s_maxbytes);
998 if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
999 length = mp->m_super->s_maxbytes - offset;
1000 offset_fsb = XFS_B_TO_FSBT(mp, offset);
1001 end_fsb = XFS_B_TO_FSB(mp, offset + length);
1002
1003 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
1004 &nimaps, XFS_BMAPI_ENTIRE);
1005 if (error) {
1006 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1007 return error;
1008 }
1009
1010 if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) {
1011 /*
1012 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
1013 * pages to keep the chunks of work done where somewhat symmetric
1014 * with the work writeback does. This is a completely arbitrary
1015 * number pulled out of thin air as a best guess for initial
1016 * testing.
1017 *
1018 * Note that the values needs to be less than 32-bits wide until
1019 * the lower level functions are updated.
1020 */
1021 length = min_t(loff_t, length, 1024 * PAGE_SIZE);
1022 if (xfs_get_extsz_hint(ip)) {
1023 /*
1024 * xfs_iomap_write_direct() expects the shared lock. It
1025 * is unlocked on return.
1026 */
1027 xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
1028 error = xfs_iomap_write_direct(ip, offset, length, &imap,
1029 nimaps);
1030 } else {
1031 error = xfs_iomap_write_delay(ip, offset, length, &imap);
1032 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1033 }
1034
1035 if (error)
1036 return error;
1037
1038 trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
1039 xfs_bmbt_to_iomap(ip, iomap, &imap);
1040 } else if (nimaps) {
1041 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1042 trace_xfs_iomap_found(ip, offset, length, 0, &imap);
1043 xfs_bmbt_to_iomap(ip, iomap, &imap);
1044 } else {
1045 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1046 trace_xfs_iomap_not_found(ip, offset, length, 0, &imap);
1047 iomap->blkno = IOMAP_NULL_BLOCK;
1048 iomap->type = IOMAP_HOLE;
1049 iomap->offset = offset;
1050 iomap->length = length;
1051 }
1052
1053 return 0;
1054}
1055
1056static int
1057xfs_file_iomap_end_delalloc(
1058 struct xfs_inode *ip,
1059 loff_t offset,
1060 loff_t length,
1061 ssize_t written)
1062{
1063 struct xfs_mount *mp = ip->i_mount;
1064 xfs_fileoff_t start_fsb;
1065 xfs_fileoff_t end_fsb;
1066 int error = 0;
1067
1068 start_fsb = XFS_B_TO_FSB(mp, offset + written);
1069 end_fsb = XFS_B_TO_FSB(mp, offset + length);
1070
1071 /*
1072 * Trim back delalloc blocks if we didn't manage to write the whole
1073 * range reserved.
1074 *
1075 * We don't need to care about racing delalloc as we hold i_mutex
1076 * across the reserve/allocate/unreserve calls. If there are delalloc
1077 * blocks in the range, they are ours.
1078 */
1079 if (start_fsb < end_fsb) {
1080 xfs_ilock(ip, XFS_ILOCK_EXCL);
1081 error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
1082 end_fsb - start_fsb);
1083 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1084
1085 if (error && !XFS_FORCED_SHUTDOWN(mp)) {
1086 xfs_alert(mp, "%s: unable to clean up ino %lld",
1087 __func__, ip->i_ino);
1088 return error;
1089 }
1090 }
1091
1092 return 0;
1093}
1094
1095static int
1096xfs_file_iomap_end(
1097 struct inode *inode,
1098 loff_t offset,
1099 loff_t length,
1100 ssize_t written,
1101 unsigned flags,
1102 struct iomap *iomap)
1103{
1104 if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
1105 return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
1106 length, written);
1107 return 0;
1108}
1109
1110struct iomap_ops xfs_iomap_ops = {
1111 .iomap_begin = xfs_file_iomap_begin,
1112 .iomap_end = xfs_file_iomap_end,
1113};
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 718f07c5c0d2..e066d045e2ff 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,7 +18,8 @@
18#ifndef __XFS_IOMAP_H__ 18#ifndef __XFS_IOMAP_H__
19#define __XFS_IOMAP_H__ 19#define __XFS_IOMAP_H__
20 20
21struct iomap; 21#include <linux/iomap.h>
22
22struct xfs_inode; 23struct xfs_inode;
23struct xfs_bmbt_irec; 24struct xfs_bmbt_irec;
24 25
@@ -33,4 +34,6 @@ int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
33void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, 34void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
34 struct xfs_bmbt_irec *); 35 struct xfs_bmbt_irec *);
35 36
37extern struct iomap_ops xfs_iomap_ops;
38
36#endif /* __XFS_IOMAP_H__*/ 39#endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 1a5ca4b4a866..5d1fdae4e39b 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -38,6 +38,7 @@
38#include "xfs_dir2.h" 38#include "xfs_dir2.h"
39#include "xfs_trans_space.h" 39#include "xfs_trans_space.h"
40#include "xfs_pnfs.h" 40#include "xfs_pnfs.h"
41#include "xfs_iomap.h"
41 42
42#include <linux/capability.h> 43#include <linux/capability.h>
43#include <linux/xattr.h> 44#include <linux/xattr.h>
@@ -822,8 +823,8 @@ xfs_setattr_size(
822 error = dax_truncate_page(inode, newsize, 823 error = dax_truncate_page(inode, newsize,
823 xfs_get_blocks_direct); 824 xfs_get_blocks_direct);
824 } else { 825 } else {
825 error = block_truncate_page(inode->i_mapping, newsize, 826 error = iomap_truncate_page(inode, newsize,
826 xfs_get_blocks); 827 &did_zeroing, &xfs_iomap_ops);
827 } 828 }
828 } 829 }
829 830
@@ -838,8 +839,8 @@ xfs_setattr_size(
838 * problem. Note that this includes any block zeroing we did above; 839 * problem. Note that this includes any block zeroing we did above;
839 * otherwise those blocks may not be zeroed after a crash. 840 * otherwise those blocks may not be zeroed after a crash.
840 */ 841 */
841 if (newsize > ip->i_d.di_size && 842 if (did_zeroing ||
842 (oldsize != ip->i_d.di_size || did_zeroing)) { 843 (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
843 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 844 error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
844 ip->i_d.di_size, newsize); 845 ip->i_d.di_size, newsize);
845 if (error) 846 if (error)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ea94ee0fe5ea..bb24ce7b0280 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1295,6 +1295,9 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
1295DEFINE_IOMAP_EVENT(xfs_get_blocks_found); 1295DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
1296DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); 1296DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
1297DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct); 1297DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
1298DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
1299DEFINE_IOMAP_EVENT(xfs_iomap_found);
1300DEFINE_IOMAP_EVENT(xfs_iomap_not_found);
1298 1301
1299DECLARE_EVENT_CLASS(xfs_simple_io_class, 1302DECLARE_EVENT_CLASS(xfs_simple_io_class,
1300 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), 1303 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),