aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-09-06 15:13:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-09-06 15:13:17 -0400
commit11e9739813a753748dd54c905e779dc277efc495 (patch)
treed34bc2f327d815814ee3b8df72694068ef596637 /fs
parent925e0ea47cacc285286550dd48ff4b51cdd911ef (diff)
parent41b9d7263ea1e270019c5d04fa0ab15db50b9725 (diff)
Merge tag 'xfs-for-linus-3.17-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs fixes from Dave Chinner: "The fixes all address recently discovered data corruption issues. The original Direct IO issue was discovered by Chris Mason @ Facebook on a production workload which mixed buffered reads with direct reads and writes IO to the same file. The fix for that exposed other issues with page invalidation (exposed by millions of fsx operations) failing due to dirty buffers beyond EOF. Finally, the collapse_range code could also cause problems due to racing writeback changing the extent map while it was being shifted around. The commits for that problem are simple mitigation fixes that prevent the problem from occuring. A more robust fix for 3.18 that addresses the underlying problem is currently being worked on by Brian. Summary of fixes: - a direct IO read/buffered read data corruption - the associated fallout from the DIO data corruption fix - collapse range bugs that are potential data corruption issues" * tag 'xfs-for-linus-3.17-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: xfs: trim eofblocks before collapse range xfs: xfs_file_collapse_range is delalloc challenged xfs: don't log inode unless extent shift makes extent modifications xfs: use ranged writeback and invalidation for direct IO xfs: don't zero partial page cache pages during O_DIRECT writes xfs: don't zero partial page cache pages during O_DIRECT writes xfs: don't dirty buffers beyond EOF
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c18
-rw-r--r--fs/xfs/xfs_aops.c61
-rw-r--r--fs/xfs/xfs_bmap_util.c20
-rw-r--r--fs/xfs/xfs_file.c27
4 files changed, 114 insertions, 12 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index de2d26d32844..86df952d3e24 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5424,7 +5424,7 @@ xfs_bmap_shift_extents(
5424 struct xfs_bmap_free *flist, 5424 struct xfs_bmap_free *flist,
5425 int num_exts) 5425 int num_exts)
5426{ 5426{
5427 struct xfs_btree_cur *cur; 5427 struct xfs_btree_cur *cur = NULL;
5428 struct xfs_bmbt_rec_host *gotp; 5428 struct xfs_bmbt_rec_host *gotp;
5429 struct xfs_bmbt_irec got; 5429 struct xfs_bmbt_irec got;
5430 struct xfs_bmbt_irec left; 5430 struct xfs_bmbt_irec left;
@@ -5435,7 +5435,7 @@ xfs_bmap_shift_extents(
5435 int error = 0; 5435 int error = 0;
5436 int i; 5436 int i;
5437 int whichfork = XFS_DATA_FORK; 5437 int whichfork = XFS_DATA_FORK;
5438 int logflags; 5438 int logflags = 0;
5439 xfs_filblks_t blockcount = 0; 5439 xfs_filblks_t blockcount = 0;
5440 int total_extents; 5440 int total_extents;
5441 5441
@@ -5478,16 +5478,11 @@ xfs_bmap_shift_extents(
5478 } 5478 }
5479 } 5479 }
5480 5480
5481 /* We are going to change core inode */
5482 logflags = XFS_ILOG_CORE;
5483 if (ifp->if_flags & XFS_IFBROOT) { 5481 if (ifp->if_flags & XFS_IFBROOT) {
5484 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); 5482 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5485 cur->bc_private.b.firstblock = *firstblock; 5483 cur->bc_private.b.firstblock = *firstblock;
5486 cur->bc_private.b.flist = flist; 5484 cur->bc_private.b.flist = flist;
5487 cur->bc_private.b.flags = 0; 5485 cur->bc_private.b.flags = 0;
5488 } else {
5489 cur = NULL;
5490 logflags |= XFS_ILOG_DEXT;
5491 } 5486 }
5492 5487
5493 /* 5488 /*
@@ -5545,11 +5540,14 @@ xfs_bmap_shift_extents(
5545 blockcount = left.br_blockcount + 5540 blockcount = left.br_blockcount +
5546 got.br_blockcount; 5541 got.br_blockcount;
5547 xfs_iext_remove(ip, *current_ext, 1, 0); 5542 xfs_iext_remove(ip, *current_ext, 1, 0);
5543 logflags |= XFS_ILOG_CORE;
5548 if (cur) { 5544 if (cur) {
5549 error = xfs_btree_delete(cur, &i); 5545 error = xfs_btree_delete(cur, &i);
5550 if (error) 5546 if (error)
5551 goto del_cursor; 5547 goto del_cursor;
5552 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); 5548 XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
5549 } else {
5550 logflags |= XFS_ILOG_DEXT;
5553 } 5551 }
5554 XFS_IFORK_NEXT_SET(ip, whichfork, 5552 XFS_IFORK_NEXT_SET(ip, whichfork,
5555 XFS_IFORK_NEXTENTS(ip, whichfork) - 1); 5553 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
@@ -5575,6 +5573,7 @@ xfs_bmap_shift_extents(
5575 got.br_startoff = startoff; 5573 got.br_startoff = startoff;
5576 } 5574 }
5577 5575
5576 logflags |= XFS_ILOG_CORE;
5578 if (cur) { 5577 if (cur) {
5579 error = xfs_bmbt_update(cur, got.br_startoff, 5578 error = xfs_bmbt_update(cur, got.br_startoff,
5580 got.br_startblock, 5579 got.br_startblock,
@@ -5582,6 +5581,8 @@ xfs_bmap_shift_extents(
5582 got.br_state); 5581 got.br_state);
5583 if (error) 5582 if (error)
5584 goto del_cursor; 5583 goto del_cursor;
5584 } else {
5585 logflags |= XFS_ILOG_DEXT;
5585 } 5586 }
5586 5587
5587 (*current_ext)++; 5588 (*current_ext)++;
@@ -5597,6 +5598,7 @@ del_cursor:
5597 xfs_btree_del_cursor(cur, 5598 xfs_btree_del_cursor(cur,
5598 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); 5599 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5599 5600
5600 xfs_trans_log_inode(tp, ip, logflags); 5601 if (logflags)
5602 xfs_trans_log_inode(tp, ip, logflags);
5601 return error; 5603 return error;
5602} 5604}
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 11e9b4caa54f..b984647c24db 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1753,11 +1753,72 @@ xfs_vm_readpages(
1753 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1753 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
1754} 1754}
1755 1755
1756/*
1757 * This is basically a copy of __set_page_dirty_buffers() with one
1758 * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
1759 * dirty, we'll never be able to clean them because we don't write buffers
1760 * beyond EOF, and that means we can't invalidate pages that span EOF
1761 * that have been marked dirty. Further, the dirty state can leak into
1762 * the file interior if the file is extended, resulting in all sorts of
1763 * bad things happening as the state does not match the underlying data.
1764 *
1765 * XXX: this really indicates that bufferheads in XFS need to die. Warts like
1766 * this only exist because of bufferheads and how the generic code manages them.
1767 */
1768STATIC int
1769xfs_vm_set_page_dirty(
1770 struct page *page)
1771{
1772 struct address_space *mapping = page->mapping;
1773 struct inode *inode = mapping->host;
1774 loff_t end_offset;
1775 loff_t offset;
1776 int newly_dirty;
1777
1778 if (unlikely(!mapping))
1779 return !TestSetPageDirty(page);
1780
1781 end_offset = i_size_read(inode);
1782 offset = page_offset(page);
1783
1784 spin_lock(&mapping->private_lock);
1785 if (page_has_buffers(page)) {
1786 struct buffer_head *head = page_buffers(page);
1787 struct buffer_head *bh = head;
1788
1789 do {
1790 if (offset < end_offset)
1791 set_buffer_dirty(bh);
1792 bh = bh->b_this_page;
1793 offset += 1 << inode->i_blkbits;
1794 } while (bh != head);
1795 }
1796 newly_dirty = !TestSetPageDirty(page);
1797 spin_unlock(&mapping->private_lock);
1798
1799 if (newly_dirty) {
1800 /* sigh - __set_page_dirty() is static, so copy it here, too */
1801 unsigned long flags;
1802
1803 spin_lock_irqsave(&mapping->tree_lock, flags);
1804 if (page->mapping) { /* Race with truncate? */
1805 WARN_ON_ONCE(!PageUptodate(page));
1806 account_page_dirtied(page, mapping);
1807 radix_tree_tag_set(&mapping->page_tree,
1808 page_index(page), PAGECACHE_TAG_DIRTY);
1809 }
1810 spin_unlock_irqrestore(&mapping->tree_lock, flags);
1811 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1812 }
1813 return newly_dirty;
1814}
1815
1756const struct address_space_operations xfs_address_space_operations = { 1816const struct address_space_operations xfs_address_space_operations = {
1757 .readpage = xfs_vm_readpage, 1817 .readpage = xfs_vm_readpage,
1758 .readpages = xfs_vm_readpages, 1818 .readpages = xfs_vm_readpages,
1759 .writepage = xfs_vm_writepage, 1819 .writepage = xfs_vm_writepage,
1760 .writepages = xfs_vm_writepages, 1820 .writepages = xfs_vm_writepages,
1821 .set_page_dirty = xfs_vm_set_page_dirty,
1761 .releasepage = xfs_vm_releasepage, 1822 .releasepage = xfs_vm_releasepage,
1762 .invalidatepage = xfs_vm_invalidatepage, 1823 .invalidatepage = xfs_vm_invalidatepage,
1763 .write_begin = xfs_vm_write_begin, 1824 .write_begin = xfs_vm_write_begin,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 2f1e30d39a35..1707980f9a4b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1470,6 +1470,26 @@ xfs_collapse_file_space(
1470 start_fsb = XFS_B_TO_FSB(mp, offset + len); 1470 start_fsb = XFS_B_TO_FSB(mp, offset + len);
1471 shift_fsb = XFS_B_TO_FSB(mp, len); 1471 shift_fsb = XFS_B_TO_FSB(mp, len);
1472 1472
1473 /*
1474 * Writeback the entire file and force remove any post-eof blocks. The
1475 * writeback prevents changes to the extent list via concurrent
1476 * writeback and the eofblocks trim prevents the extent shift algorithm
1477 * from running into a post-eof delalloc extent.
1478 *
1479 * XXX: This is a temporary fix until the extent shift loop below is
1480 * converted to use offsets and lookups within the ILOCK rather than
1481 * carrying around the index into the extent list for the next
1482 * iteration.
1483 */
1484 error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
1485 if (error)
1486 return error;
1487 if (xfs_can_free_eofblocks(ip, true)) {
1488 error = xfs_free_eofblocks(mp, ip, false);
1489 if (error)
1490 return error;
1491 }
1492
1473 error = xfs_free_file_space(ip, offset, len); 1493 error = xfs_free_file_space(ip, offset, len);
1474 if (error) 1494 if (error)
1475 return error; 1495 return error;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 076b1708d134..de5368c803f9 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -291,12 +291,22 @@ xfs_file_read_iter(
291 if (inode->i_mapping->nrpages) { 291 if (inode->i_mapping->nrpages) {
292 ret = filemap_write_and_wait_range( 292 ret = filemap_write_and_wait_range(
293 VFS_I(ip)->i_mapping, 293 VFS_I(ip)->i_mapping,
294 pos, -1); 294 pos, pos + size - 1);
295 if (ret) { 295 if (ret) {
296 xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); 296 xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
297 return ret; 297 return ret;
298 } 298 }
299 truncate_pagecache_range(VFS_I(ip), pos, -1); 299
300 /*
301 * Invalidate whole pages. This can return an error if
302 * we fail to invalidate a page, but this should never
303 * happen on XFS. Warn if it does fail.
304 */
305 ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
306 pos >> PAGE_CACHE_SHIFT,
307 (pos + size - 1) >> PAGE_CACHE_SHIFT);
308 WARN_ON_ONCE(ret);
309 ret = 0;
300 } 310 }
301 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); 311 xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
302 } 312 }
@@ -632,10 +642,19 @@ xfs_file_dio_aio_write(
632 642
633 if (mapping->nrpages) { 643 if (mapping->nrpages) {
634 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, 644 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
635 pos, -1); 645 pos, pos + count - 1);
636 if (ret) 646 if (ret)
637 goto out; 647 goto out;
638 truncate_pagecache_range(VFS_I(ip), pos, -1); 648 /*
649 * Invalidate whole pages. This can return an error if
650 * we fail to invalidate a page, but this should never
651 * happen on XFS. Warn if it does fail.
652 */
653 ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
654 pos >> PAGE_CACHE_SHIFT,
655 (pos + count - 1) >> PAGE_CACHE_SHIFT);
656 WARN_ON_ONCE(ret);
657 ret = 0;
639 } 658 }
640 659
641 /* 660 /*