diff options
author | Christoph Hellwig <hch@lst.de> | 2016-06-20 19:53:44 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2016-06-20 19:53:44 -0400 |
commit | 68a9f5e7007c1afa2cf6830b690a90d0187c0684 (patch) | |
tree | 986de78ca7e20e49604faecccc95685ce52b4090 | |
parent | f0c6bcba74ac51cb77aadb33ad35cb2dc1ad1506 (diff) |
xfs: implement iomap based buffered write path
Convert XFS to use the new iomap based multipage write path. This involves
implementing the ->iomap_begin and ->iomap_end methods, and switching the
buffered file write, page_mkwrite and xfs_iozero paths to the new iomap
helpers.
With this change __xfs_get_blocks will never be used for buffered writes,
and the code handling them can be removed.
Based on earlier code from Dave Chinner.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
-rw-r--r-- | fs/xfs/Kconfig | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 212 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 71 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 144 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.h | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 3 |
7 files changed, 187 insertions, 258 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 5d47b4df61ea..35faf128f36d 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig | |||
@@ -4,6 +4,7 @@ config XFS_FS | |||
4 | depends on (64BIT || LBDAF) | 4 | depends on (64BIT || LBDAF) |
5 | select EXPORTFS | 5 | select EXPORTFS |
6 | select LIBCRC32C | 6 | select LIBCRC32C |
7 | select FS_IOMAP | ||
7 | help | 8 | help |
8 | XFS is a high performance journaling filesystem which originated | 9 | XFS is a high performance journaling filesystem which originated |
9 | on the SGI IRIX platform. It is completely multi-threaded, can | 10 | on the SGI IRIX platform. It is completely multi-threaded, can |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4c463b99fe57..2ac9f7e5f504 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -1427,216 +1427,6 @@ xfs_vm_direct_IO( | |||
1427 | xfs_get_blocks_direct, endio, NULL, flags); | 1427 | xfs_get_blocks_direct, endio, NULL, flags); |
1428 | } | 1428 | } |
1429 | 1429 | ||
1430 | /* | ||
1431 | * Punch out the delalloc blocks we have already allocated. | ||
1432 | * | ||
1433 | * Don't bother with xfs_setattr given that nothing can have made it to disk yet | ||
1434 | * as the page is still locked at this point. | ||
1435 | */ | ||
1436 | STATIC void | ||
1437 | xfs_vm_kill_delalloc_range( | ||
1438 | struct inode *inode, | ||
1439 | loff_t start, | ||
1440 | loff_t end) | ||
1441 | { | ||
1442 | struct xfs_inode *ip = XFS_I(inode); | ||
1443 | xfs_fileoff_t start_fsb; | ||
1444 | xfs_fileoff_t end_fsb; | ||
1445 | int error; | ||
1446 | |||
1447 | start_fsb = XFS_B_TO_FSB(ip->i_mount, start); | ||
1448 | end_fsb = XFS_B_TO_FSB(ip->i_mount, end); | ||
1449 | if (end_fsb <= start_fsb) | ||
1450 | return; | ||
1451 | |||
1452 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1453 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, | ||
1454 | end_fsb - start_fsb); | ||
1455 | if (error) { | ||
1456 | /* something screwed, just bail */ | ||
1457 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
1458 | xfs_alert(ip->i_mount, | ||
1459 | "xfs_vm_write_failed: unable to clean up ino %lld", | ||
1460 | ip->i_ino); | ||
1461 | } | ||
1462 | } | ||
1463 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1464 | } | ||
1465 | |||
1466 | STATIC void | ||
1467 | xfs_vm_write_failed( | ||
1468 | struct inode *inode, | ||
1469 | struct page *page, | ||
1470 | loff_t pos, | ||
1471 | unsigned len) | ||
1472 | { | ||
1473 | loff_t block_offset; | ||
1474 | loff_t block_start; | ||
1475 | loff_t block_end; | ||
1476 | loff_t from = pos & (PAGE_SIZE - 1); | ||
1477 | loff_t to = from + len; | ||
1478 | struct buffer_head *bh, *head; | ||
1479 | struct xfs_mount *mp = XFS_I(inode)->i_mount; | ||
1480 | |||
1481 | /* | ||
1482 | * The request pos offset might be 32 or 64 bit, this is all fine | ||
1483 | * on 64-bit platform. However, for 64-bit pos request on 32-bit | ||
1484 | * platform, the high 32-bit will be masked off if we evaluate the | ||
1485 | * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is | ||
1486 | * 0xfffff000 as an unsigned long, hence the result is incorrect | ||
1487 | * which could cause the following ASSERT failed in most cases. | ||
1488 | * In order to avoid this, we can evaluate the block_offset of the | ||
1489 | * start of the page by using shifts rather than masks the mismatch | ||
1490 | * problem. | ||
1491 | */ | ||
1492 | block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT; | ||
1493 | |||
1494 | ASSERT(block_offset + from == pos); | ||
1495 | |||
1496 | head = page_buffers(page); | ||
1497 | block_start = 0; | ||
1498 | for (bh = head; bh != head || !block_start; | ||
1499 | bh = bh->b_this_page, block_start = block_end, | ||
1500 | block_offset += bh->b_size) { | ||
1501 | block_end = block_start + bh->b_size; | ||
1502 | |||
1503 | /* skip buffers before the write */ | ||
1504 | if (block_end <= from) | ||
1505 | continue; | ||
1506 | |||
1507 | /* if the buffer is after the write, we're done */ | ||
1508 | if (block_start >= to) | ||
1509 | break; | ||
1510 | |||
1511 | /* | ||
1512 | * Process delalloc and unwritten buffers beyond EOF. We can | ||
1513 | * encounter unwritten buffers in the event that a file has | ||
1514 | * post-EOF unwritten extents and an extending write happens to | ||
1515 | * fail (e.g., an unaligned write that also involves a delalloc | ||
1516 | * to the same page). | ||
1517 | */ | ||
1518 | if (!buffer_delay(bh) && !buffer_unwritten(bh)) | ||
1519 | continue; | ||
1520 | |||
1521 | if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) && | ||
1522 | block_offset < i_size_read(inode)) | ||
1523 | continue; | ||
1524 | |||
1525 | if (buffer_delay(bh)) | ||
1526 | xfs_vm_kill_delalloc_range(inode, block_offset, | ||
1527 | block_offset + bh->b_size); | ||
1528 | |||
1529 | /* | ||
1530 | * This buffer does not contain data anymore. make sure anyone | ||
1531 | * who finds it knows that for certain. | ||
1532 | */ | ||
1533 | clear_buffer_delay(bh); | ||
1534 | clear_buffer_uptodate(bh); | ||
1535 | clear_buffer_mapped(bh); | ||
1536 | clear_buffer_new(bh); | ||
1537 | clear_buffer_dirty(bh); | ||
1538 | clear_buffer_unwritten(bh); | ||
1539 | } | ||
1540 | |||
1541 | } | ||
1542 | |||
1543 | /* | ||
1544 | * This used to call block_write_begin(), but it unlocks and releases the page | ||
1545 | * on error, and we need that page to be able to punch stale delalloc blocks out | ||
1546 | * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at | ||
1547 | * the appropriate point. | ||
1548 | */ | ||
1549 | STATIC int | ||
1550 | xfs_vm_write_begin( | ||
1551 | struct file *file, | ||
1552 | struct address_space *mapping, | ||
1553 | loff_t pos, | ||
1554 | unsigned len, | ||
1555 | unsigned flags, | ||
1556 | struct page **pagep, | ||
1557 | void **fsdata) | ||
1558 | { | ||
1559 | pgoff_t index = pos >> PAGE_SHIFT; | ||
1560 | struct page *page; | ||
1561 | int status; | ||
1562 | struct xfs_mount *mp = XFS_I(mapping->host)->i_mount; | ||
1563 | |||
1564 | ASSERT(len <= PAGE_SIZE); | ||
1565 | |||
1566 | page = grab_cache_page_write_begin(mapping, index, flags); | ||
1567 | if (!page) | ||
1568 | return -ENOMEM; | ||
1569 | |||
1570 | status = __block_write_begin(page, pos, len, xfs_get_blocks); | ||
1571 | if (xfs_mp_fail_writes(mp)) | ||
1572 | status = -EIO; | ||
1573 | if (unlikely(status)) { | ||
1574 | struct inode *inode = mapping->host; | ||
1575 | size_t isize = i_size_read(inode); | ||
1576 | |||
1577 | xfs_vm_write_failed(inode, page, pos, len); | ||
1578 | unlock_page(page); | ||
1579 | |||
1580 | /* | ||
1581 | * If the write is beyond EOF, we only want to kill blocks | ||
1582 | * allocated in this write, not blocks that were previously | ||
1583 | * written successfully. | ||
1584 | */ | ||
1585 | if (xfs_mp_fail_writes(mp)) | ||
1586 | isize = 0; | ||
1587 | if (pos + len > isize) { | ||
1588 | ssize_t start = max_t(ssize_t, pos, isize); | ||
1589 | |||
1590 | truncate_pagecache_range(inode, start, pos + len); | ||
1591 | } | ||
1592 | |||
1593 | put_page(page); | ||
1594 | page = NULL; | ||
1595 | } | ||
1596 | |||
1597 | *pagep = page; | ||
1598 | return status; | ||
1599 | } | ||
1600 | |||
1601 | /* | ||
1602 | * On failure, we only need to kill delalloc blocks beyond EOF in the range of | ||
1603 | * this specific write because they will never be written. Previous writes | ||
1604 | * beyond EOF where block allocation succeeded do not need to be trashed, so | ||
1605 | * only new blocks from this write should be trashed. For blocks within | ||
1606 | * EOF, generic_write_end() zeros them so they are safe to leave alone and be | ||
1607 | * written with all the other valid data. | ||
1608 | */ | ||
1609 | STATIC int | ||
1610 | xfs_vm_write_end( | ||
1611 | struct file *file, | ||
1612 | struct address_space *mapping, | ||
1613 | loff_t pos, | ||
1614 | unsigned len, | ||
1615 | unsigned copied, | ||
1616 | struct page *page, | ||
1617 | void *fsdata) | ||
1618 | { | ||
1619 | int ret; | ||
1620 | |||
1621 | ASSERT(len <= PAGE_SIZE); | ||
1622 | |||
1623 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | ||
1624 | if (unlikely(ret < len)) { | ||
1625 | struct inode *inode = mapping->host; | ||
1626 | size_t isize = i_size_read(inode); | ||
1627 | loff_t to = pos + len; | ||
1628 | |||
1629 | if (to > isize) { | ||
1630 | /* only kill blocks in this write beyond EOF */ | ||
1631 | if (pos > isize) | ||
1632 | isize = pos; | ||
1633 | xfs_vm_kill_delalloc_range(inode, isize, to); | ||
1634 | truncate_pagecache_range(inode, isize, to); | ||
1635 | } | ||
1636 | } | ||
1637 | return ret; | ||
1638 | } | ||
1639 | |||
1640 | STATIC sector_t | 1430 | STATIC sector_t |
1641 | xfs_vm_bmap( | 1431 | xfs_vm_bmap( |
1642 | struct address_space *mapping, | 1432 | struct address_space *mapping, |
@@ -1747,8 +1537,6 @@ const struct address_space_operations xfs_address_space_operations = { | |||
1747 | .set_page_dirty = xfs_vm_set_page_dirty, | 1537 | .set_page_dirty = xfs_vm_set_page_dirty, |
1748 | .releasepage = xfs_vm_releasepage, | 1538 | .releasepage = xfs_vm_releasepage, |
1749 | .invalidatepage = xfs_vm_invalidatepage, | 1539 | .invalidatepage = xfs_vm_invalidatepage, |
1750 | .write_begin = xfs_vm_write_begin, | ||
1751 | .write_end = xfs_vm_write_end, | ||
1752 | .bmap = xfs_vm_bmap, | 1540 | .bmap = xfs_vm_bmap, |
1753 | .direct_IO = xfs_vm_direct_IO, | 1541 | .direct_IO = xfs_vm_direct_IO, |
1754 | .migratepage = buffer_migrate_page, | 1542 | .migratepage = buffer_migrate_page, |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 47fc63295422..7316d3841c53 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include "xfs_log.h" | 37 | #include "xfs_log.h" |
38 | #include "xfs_icache.h" | 38 | #include "xfs_icache.h" |
39 | #include "xfs_pnfs.h" | 39 | #include "xfs_pnfs.h" |
40 | #include "xfs_iomap.h" | ||
40 | 41 | ||
41 | #include <linux/dcache.h> | 42 | #include <linux/dcache.h> |
42 | #include <linux/falloc.h> | 43 | #include <linux/falloc.h> |
@@ -79,57 +80,27 @@ xfs_rw_ilock_demote( | |||
79 | inode_unlock(VFS_I(ip)); | 80 | inode_unlock(VFS_I(ip)); |
80 | } | 81 | } |
81 | 82 | ||
82 | /* | 83 | static int |
83 | * xfs_iozero clears the specified range supplied via the page cache (except in | 84 | xfs_dax_zero_range( |
84 | * the DAX case). Writes through the page cache will allocate blocks over holes, | 85 | struct inode *inode, |
85 | * though the callers usually map the holes first and avoid them. If a block is | 86 | loff_t pos, |
86 | * not completely zeroed, then it will be read from disk before being partially | 87 | size_t count) |
87 | * zeroed. | ||
88 | * | ||
89 | * In the DAX case, we can just directly write to the underlying pages. This | ||
90 | * will not allocate blocks, but will avoid holes and unwritten extents and so | ||
91 | * not do unnecessary work. | ||
92 | */ | ||
93 | int | ||
94 | xfs_iozero( | ||
95 | struct xfs_inode *ip, /* inode */ | ||
96 | loff_t pos, /* offset in file */ | ||
97 | size_t count) /* size of data to zero */ | ||
98 | { | 88 | { |
99 | struct page *page; | ||
100 | struct address_space *mapping; | ||
101 | int status = 0; | 89 | int status = 0; |
102 | 90 | ||
103 | |||
104 | mapping = VFS_I(ip)->i_mapping; | ||
105 | do { | 91 | do { |
106 | unsigned offset, bytes; | 92 | unsigned offset, bytes; |
107 | void *fsdata; | ||
108 | 93 | ||
109 | offset = (pos & (PAGE_SIZE -1)); /* Within page */ | 94 | offset = (pos & (PAGE_SIZE -1)); /* Within page */ |
110 | bytes = PAGE_SIZE - offset; | 95 | bytes = PAGE_SIZE - offset; |
111 | if (bytes > count) | 96 | if (bytes > count) |
112 | bytes = count; | 97 | bytes = count; |
113 | 98 | ||
114 | if (IS_DAX(VFS_I(ip))) { | 99 | status = dax_zero_page_range(inode, pos, bytes, |
115 | status = dax_zero_page_range(VFS_I(ip), pos, bytes, | 100 | xfs_get_blocks_direct); |
116 | xfs_get_blocks_direct); | 101 | if (status) |
117 | if (status) | 102 | break; |
118 | break; | ||
119 | } else { | ||
120 | status = pagecache_write_begin(NULL, mapping, pos, bytes, | ||
121 | AOP_FLAG_UNINTERRUPTIBLE, | ||
122 | &page, &fsdata); | ||
123 | if (status) | ||
124 | break; | ||
125 | |||
126 | zero_user(page, offset, bytes); | ||
127 | 103 | ||
128 | status = pagecache_write_end(NULL, mapping, pos, bytes, | ||
129 | bytes, page, fsdata); | ||
130 | WARN_ON(status <= 0); /* can't return less than zero! */ | ||
131 | status = 0; | ||
132 | } | ||
133 | pos += bytes; | 104 | pos += bytes; |
134 | count -= bytes; | 105 | count -= bytes; |
135 | } while (count); | 106 | } while (count); |
@@ -137,6 +108,24 @@ xfs_iozero( | |||
137 | return status; | 108 | return status; |
138 | } | 109 | } |
139 | 110 | ||
111 | /* | ||
112 | * Clear the specified ranges to zero through either the pagecache or DAX. | ||
113 | * Holes and unwritten extents will be left as-is as they already are zeroed. | ||
114 | */ | ||
115 | int | ||
116 | xfs_iozero( | ||
117 | struct xfs_inode *ip, | ||
118 | loff_t pos, | ||
119 | size_t count) | ||
120 | { | ||
121 | struct inode *inode = VFS_I(ip); | ||
122 | |||
123 | if (IS_DAX(VFS_I(ip))) | ||
124 | return xfs_dax_zero_range(inode, pos, count); | ||
125 | else | ||
126 | return iomap_zero_range(inode, pos, count, NULL, &xfs_iomap_ops); | ||
127 | } | ||
128 | |||
140 | int | 129 | int |
141 | xfs_update_prealloc_flags( | 130 | xfs_update_prealloc_flags( |
142 | struct xfs_inode *ip, | 131 | struct xfs_inode *ip, |
@@ -841,7 +830,7 @@ xfs_file_buffered_aio_write( | |||
841 | write_retry: | 830 | write_retry: |
842 | trace_xfs_file_buffered_write(ip, iov_iter_count(from), | 831 | trace_xfs_file_buffered_write(ip, iov_iter_count(from), |
843 | iocb->ki_pos, 0); | 832 | iocb->ki_pos, 0); |
844 | ret = generic_perform_write(file, from, iocb->ki_pos); | 833 | ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops); |
845 | if (likely(ret >= 0)) | 834 | if (likely(ret >= 0)) |
846 | iocb->ki_pos += ret; | 835 | iocb->ki_pos += ret; |
847 | 836 | ||
@@ -1553,7 +1542,7 @@ xfs_filemap_page_mkwrite( | |||
1553 | if (IS_DAX(inode)) { | 1542 | if (IS_DAX(inode)) { |
1554 | ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault); | 1543 | ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault); |
1555 | } else { | 1544 | } else { |
1556 | ret = block_page_mkwrite(vma, vmf, xfs_get_blocks); | 1545 | ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops); |
1557 | ret = block_page_mkwrite_return(ret); | 1546 | ret = block_page_mkwrite_return(ret); |
1558 | } | 1547 | } |
1559 | 1548 | ||
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2f3719461cbd..620fc9120444 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -967,3 +967,147 @@ xfs_bmbt_to_iomap( | |||
967 | iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); | 967 | iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); |
968 | iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); | 968 | iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip)); |
969 | } | 969 | } |
970 | |||
971 | static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps) | ||
972 | { | ||
973 | return !nimaps || | ||
974 | imap->br_startblock == HOLESTARTBLOCK || | ||
975 | imap->br_startblock == DELAYSTARTBLOCK; | ||
976 | } | ||
977 | |||
978 | static int | ||
979 | xfs_file_iomap_begin( | ||
980 | struct inode *inode, | ||
981 | loff_t offset, | ||
982 | loff_t length, | ||
983 | unsigned flags, | ||
984 | struct iomap *iomap) | ||
985 | { | ||
986 | struct xfs_inode *ip = XFS_I(inode); | ||
987 | struct xfs_mount *mp = ip->i_mount; | ||
988 | struct xfs_bmbt_irec imap; | ||
989 | xfs_fileoff_t offset_fsb, end_fsb; | ||
990 | int nimaps = 1, error = 0; | ||
991 | |||
992 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
993 | return -EIO; | ||
994 | |||
995 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
996 | |||
997 | ASSERT(offset <= mp->m_super->s_maxbytes); | ||
998 | if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) | ||
999 | length = mp->m_super->s_maxbytes - offset; | ||
1000 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | ||
1001 | end_fsb = XFS_B_TO_FSB(mp, offset + length); | ||
1002 | |||
1003 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, | ||
1004 | &nimaps, XFS_BMAPI_ENTIRE); | ||
1005 | if (error) { | ||
1006 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1007 | return error; | ||
1008 | } | ||
1009 | |||
1010 | if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) { | ||
1011 | /* | ||
1012 | * We cap the maximum length we map here to MAX_WRITEBACK_PAGES | ||
1013 | * pages to keep the chunks of work done where somewhat symmetric | ||
1014 | * with the work writeback does. This is a completely arbitrary | ||
1015 | * number pulled out of thin air as a best guess for initial | ||
1016 | * testing. | ||
1017 | * | ||
1018 | * Note that the values needs to be less than 32-bits wide until | ||
1019 | * the lower level functions are updated. | ||
1020 | */ | ||
1021 | length = min_t(loff_t, length, 1024 * PAGE_SIZE); | ||
1022 | if (xfs_get_extsz_hint(ip)) { | ||
1023 | /* | ||
1024 | * xfs_iomap_write_direct() expects the shared lock. It | ||
1025 | * is unlocked on return. | ||
1026 | */ | ||
1027 | xfs_ilock_demote(ip, XFS_ILOCK_EXCL); | ||
1028 | error = xfs_iomap_write_direct(ip, offset, length, &imap, | ||
1029 | nimaps); | ||
1030 | } else { | ||
1031 | error = xfs_iomap_write_delay(ip, offset, length, &imap); | ||
1032 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1033 | } | ||
1034 | |||
1035 | if (error) | ||
1036 | return error; | ||
1037 | |||
1038 | trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); | ||
1039 | xfs_bmbt_to_iomap(ip, iomap, &imap); | ||
1040 | } else if (nimaps) { | ||
1041 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1042 | trace_xfs_iomap_found(ip, offset, length, 0, &imap); | ||
1043 | xfs_bmbt_to_iomap(ip, iomap, &imap); | ||
1044 | } else { | ||
1045 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1046 | trace_xfs_iomap_not_found(ip, offset, length, 0, &imap); | ||
1047 | iomap->blkno = IOMAP_NULL_BLOCK; | ||
1048 | iomap->type = IOMAP_HOLE; | ||
1049 | iomap->offset = offset; | ||
1050 | iomap->length = length; | ||
1051 | } | ||
1052 | |||
1053 | return 0; | ||
1054 | } | ||
1055 | |||
1056 | static int | ||
1057 | xfs_file_iomap_end_delalloc( | ||
1058 | struct xfs_inode *ip, | ||
1059 | loff_t offset, | ||
1060 | loff_t length, | ||
1061 | ssize_t written) | ||
1062 | { | ||
1063 | struct xfs_mount *mp = ip->i_mount; | ||
1064 | xfs_fileoff_t start_fsb; | ||
1065 | xfs_fileoff_t end_fsb; | ||
1066 | int error = 0; | ||
1067 | |||
1068 | start_fsb = XFS_B_TO_FSB(mp, offset + written); | ||
1069 | end_fsb = XFS_B_TO_FSB(mp, offset + length); | ||
1070 | |||
1071 | /* | ||
1072 | * Trim back delalloc blocks if we didn't manage to write the whole | ||
1073 | * range reserved. | ||
1074 | * | ||
1075 | * We don't need to care about racing delalloc as we hold i_mutex | ||
1076 | * across the reserve/allocate/unreserve calls. If there are delalloc | ||
1077 | * blocks in the range, they are ours. | ||
1078 | */ | ||
1079 | if (start_fsb < end_fsb) { | ||
1080 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1081 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, | ||
1082 | end_fsb - start_fsb); | ||
1083 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1084 | |||
1085 | if (error && !XFS_FORCED_SHUTDOWN(mp)) { | ||
1086 | xfs_alert(mp, "%s: unable to clean up ino %lld", | ||
1087 | __func__, ip->i_ino); | ||
1088 | return error; | ||
1089 | } | ||
1090 | } | ||
1091 | |||
1092 | return 0; | ||
1093 | } | ||
1094 | |||
1095 | static int | ||
1096 | xfs_file_iomap_end( | ||
1097 | struct inode *inode, | ||
1098 | loff_t offset, | ||
1099 | loff_t length, | ||
1100 | ssize_t written, | ||
1101 | unsigned flags, | ||
1102 | struct iomap *iomap) | ||
1103 | { | ||
1104 | if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) | ||
1105 | return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, | ||
1106 | length, written); | ||
1107 | return 0; | ||
1108 | } | ||
1109 | |||
1110 | struct iomap_ops xfs_iomap_ops = { | ||
1111 | .iomap_begin = xfs_file_iomap_begin, | ||
1112 | .iomap_end = xfs_file_iomap_end, | ||
1113 | }; | ||
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 718f07c5c0d2..e066d045e2ff 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h | |||
@@ -18,7 +18,8 @@ | |||
18 | #ifndef __XFS_IOMAP_H__ | 18 | #ifndef __XFS_IOMAP_H__ |
19 | #define __XFS_IOMAP_H__ | 19 | #define __XFS_IOMAP_H__ |
20 | 20 | ||
21 | struct iomap; | 21 | #include <linux/iomap.h> |
22 | |||
22 | struct xfs_inode; | 23 | struct xfs_inode; |
23 | struct xfs_bmbt_irec; | 24 | struct xfs_bmbt_irec; |
24 | 25 | ||
@@ -33,4 +34,6 @@ int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); | |||
33 | void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, | 34 | void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, |
34 | struct xfs_bmbt_irec *); | 35 | struct xfs_bmbt_irec *); |
35 | 36 | ||
37 | extern struct iomap_ops xfs_iomap_ops; | ||
38 | |||
36 | #endif /* __XFS_IOMAP_H__*/ | 39 | #endif /* __XFS_IOMAP_H__*/ |
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 1a5ca4b4a866..5d1fdae4e39b 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include "xfs_dir2.h" | 38 | #include "xfs_dir2.h" |
39 | #include "xfs_trans_space.h" | 39 | #include "xfs_trans_space.h" |
40 | #include "xfs_pnfs.h" | 40 | #include "xfs_pnfs.h" |
41 | #include "xfs_iomap.h" | ||
41 | 42 | ||
42 | #include <linux/capability.h> | 43 | #include <linux/capability.h> |
43 | #include <linux/xattr.h> | 44 | #include <linux/xattr.h> |
@@ -822,8 +823,8 @@ xfs_setattr_size( | |||
822 | error = dax_truncate_page(inode, newsize, | 823 | error = dax_truncate_page(inode, newsize, |
823 | xfs_get_blocks_direct); | 824 | xfs_get_blocks_direct); |
824 | } else { | 825 | } else { |
825 | error = block_truncate_page(inode->i_mapping, newsize, | 826 | error = iomap_truncate_page(inode, newsize, |
826 | xfs_get_blocks); | 827 | &did_zeroing, &xfs_iomap_ops); |
827 | } | 828 | } |
828 | } | 829 | } |
829 | 830 | ||
@@ -838,8 +839,8 @@ xfs_setattr_size( | |||
838 | * problem. Note that this includes any block zeroing we did above; | 839 | * problem. Note that this includes any block zeroing we did above; |
839 | * otherwise those blocks may not be zeroed after a crash. | 840 | * otherwise those blocks may not be zeroed after a crash. |
840 | */ | 841 | */ |
841 | if (newsize > ip->i_d.di_size && | 842 | if (did_zeroing || |
842 | (oldsize != ip->i_d.di_size || did_zeroing)) { | 843 | (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) { |
843 | error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, | 844 | error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, |
844 | ip->i_d.di_size, newsize); | 845 | ip->i_d.di_size, newsize); |
845 | if (error) | 846 | if (error) |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ea94ee0fe5ea..bb24ce7b0280 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -1295,6 +1295,9 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); | |||
1295 | DEFINE_IOMAP_EVENT(xfs_get_blocks_found); | 1295 | DEFINE_IOMAP_EVENT(xfs_get_blocks_found); |
1296 | DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); | 1296 | DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); |
1297 | DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct); | 1297 | DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct); |
1298 | DEFINE_IOMAP_EVENT(xfs_iomap_alloc); | ||
1299 | DEFINE_IOMAP_EVENT(xfs_iomap_found); | ||
1300 | DEFINE_IOMAP_EVENT(xfs_iomap_not_found); | ||
1298 | 1301 | ||
1299 | DECLARE_EVENT_CLASS(xfs_simple_io_class, | 1302 | DECLARE_EVENT_CLASS(xfs_simple_io_class, |
1300 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), | 1303 | TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), |