aboutsummaryrefslogtreecommitdiffstats
path: root/fs/iomap.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/iomap.c')
-rw-r--r--fs/iomap.c53
1 files changed, 41 insertions, 12 deletions
diff --git a/fs/iomap.c b/fs/iomap.c
index 64ce240217a1..3ffb776fbebe 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -142,13 +142,14 @@ static void
142iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop, 142iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
143 loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp) 143 loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp)
144{ 144{
145 loff_t orig_pos = *pos;
146 loff_t isize = i_size_read(inode);
145 unsigned block_bits = inode->i_blkbits; 147 unsigned block_bits = inode->i_blkbits;
146 unsigned block_size = (1 << block_bits); 148 unsigned block_size = (1 << block_bits);
147 unsigned poff = offset_in_page(*pos); 149 unsigned poff = offset_in_page(*pos);
148 unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length); 150 unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
149 unsigned first = poff >> block_bits; 151 unsigned first = poff >> block_bits;
150 unsigned last = (poff + plen - 1) >> block_bits; 152 unsigned last = (poff + plen - 1) >> block_bits;
151 unsigned end = offset_in_page(i_size_read(inode)) >> block_bits;
152 153
153 /* 154 /*
154 * If the block size is smaller than the page size we need to check the 155 * If the block size is smaller than the page size we need to check the
@@ -183,8 +184,12 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
183 * handle both halves separately so that we properly zero data in the 184 * handle both halves separately so that we properly zero data in the
184 * page cache for blocks that are entirely outside of i_size. 185 * page cache for blocks that are entirely outside of i_size.
185 */ 186 */
186 if (first <= end && last > end) 187 if (orig_pos <= isize && orig_pos + length > isize) {
187 plen -= (last - end) * block_size; 188 unsigned end = offset_in_page(isize - 1) >> block_bits;
189
190 if (first <= end && last > end)
191 plen -= (last - end) * block_size;
192 }
188 193
189 *offp = poff; 194 *offp = poff;
190 *lenp = plen; 195 *lenp = plen;
@@ -1580,7 +1585,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
1580 struct bio *bio; 1585 struct bio *bio;
1581 bool need_zeroout = false; 1586 bool need_zeroout = false;
1582 bool use_fua = false; 1587 bool use_fua = false;
1583 int nr_pages, ret; 1588 int nr_pages, ret = 0;
1584 size_t copied = 0; 1589 size_t copied = 0;
1585 1590
1586 if ((pos | length | align) & ((1 << blkbits) - 1)) 1591 if ((pos | length | align) & ((1 << blkbits) - 1))
@@ -1596,12 +1601,13 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
1596 1601
1597 if (iomap->flags & IOMAP_F_NEW) { 1602 if (iomap->flags & IOMAP_F_NEW) {
1598 need_zeroout = true; 1603 need_zeroout = true;
1599 } else { 1604 } else if (iomap->type == IOMAP_MAPPED) {
1600 /* 1605 /*
1601 * Use a FUA write if we need datasync semantics, this 1606 * Use a FUA write if we need datasync semantics, this is a pure
1602 * is a pure data IO that doesn't require any metadata 1607 * data IO that doesn't require any metadata updates (including
1603 * updates and the underlying device supports FUA. This 1608 * after IO completion such as unwritten extent conversion) and
1604 * allows us to avoid cache flushes on IO completion. 1609 * the underlying device supports FUA. This allows us to avoid
1610 * cache flushes on IO completion.
1605 */ 1611 */
1606 if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) && 1612 if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
1607 (dio->flags & IOMAP_DIO_WRITE_FUA) && 1613 (dio->flags & IOMAP_DIO_WRITE_FUA) &&
@@ -1644,8 +1650,14 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
1644 1650
1645 ret = bio_iov_iter_get_pages(bio, &iter); 1651 ret = bio_iov_iter_get_pages(bio, &iter);
1646 if (unlikely(ret)) { 1652 if (unlikely(ret)) {
1653 /*
1654 * We have to stop part way through an IO. We must fall
1655 * through to the sub-block tail zeroing here, otherwise
1656 * this short IO may expose stale data in the tail of
1657 * the block we haven't written data to.
1658 */
1647 bio_put(bio); 1659 bio_put(bio);
1648 return copied ? copied : ret; 1660 goto zero_tail;
1649 } 1661 }
1650 1662
1651 n = bio->bi_iter.bi_size; 1663 n = bio->bi_iter.bi_size;
@@ -1676,13 +1688,21 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
1676 dio->submit.cookie = submit_bio(bio); 1688 dio->submit.cookie = submit_bio(bio);
1677 } while (nr_pages); 1689 } while (nr_pages);
1678 1690
1679 if (need_zeroout) { 1691 /*
1692 * We need to zeroout the tail of a sub-block write if the extent type
1693 * requires zeroing or the write extends beyond EOF. If we don't zero
1694 * the block tail in the latter case, we can expose stale data via mmap
1695 * reads of the EOF block.
1696 */
1697zero_tail:
1698 if (need_zeroout ||
1699 ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) {
1680 /* zero out from the end of the write to the end of the block */ 1700 /* zero out from the end of the write to the end of the block */
1681 pad = pos & (fs_block_size - 1); 1701 pad = pos & (fs_block_size - 1);
1682 if (pad) 1702 if (pad)
1683 iomap_dio_zero(dio, iomap, pos, fs_block_size - pad); 1703 iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
1684 } 1704 }
1685 return copied; 1705 return copied ? copied : ret;
1686} 1706}
1687 1707
1688static loff_t 1708static loff_t
@@ -1857,6 +1877,15 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1857 dio->wait_for_completion = true; 1877 dio->wait_for_completion = true;
1858 ret = 0; 1878 ret = 0;
1859 } 1879 }
1880
1881 /*
1882 * Splicing to pipes can fail on a full pipe. We have to
1883 * swallow this to make it look like a short IO
1884 * otherwise the higher splice layers will completely
1885 * mishandle the error and stop moving data.
1886 */
1887 if (ret == -EFAULT)
1888 ret = 0;
1860 break; 1889 break;
1861 } 1890 }
1862 pos += ret; 1891 pos += ret;