aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_aops.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2014-09-01 22:12:51 -0400
committerDave Chinner <david@fromorbit.com>2014-09-01 22:12:51 -0400
commit22e757a49cf010703fcb9c9b4ef793248c39b0c2 (patch)
tree7bbd8018010a887e386642eae10d947b4561bcb8 /fs/xfs/xfs_aops.c
parent52addcf9d6669fa439387610bc65c92fa0980cef (diff)
xfs: don't dirty buffers beyond EOF
generic/263 is failing fsx at this point with a page spanning EOF that cannot be invalidated. The operations are: 1190 mapwrite 0x52c00 thru 0x5e569 (0xb96a bytes) 1191 mapread 0x5c000 thru 0x5d636 (0x1637 bytes) 1192 write 0x5b600 thru 0x771ff (0x1bc00 bytes) where 1190 extents EOF from 0x54000 to 0x5e569. When the direct IO write attempts to invalidate the cached page over this range, it fails with -EBUSY and so any attempt to do page invalidation fails. The real question is this: Why can't that page be invalidated after it has been written to disk and cleaned? Well, there's data on the first two buffers in the page (1k block size, 4k page), but the third buffer on the page (i.e. beyond EOF) is failing drop_buffers because it's bh->b_state == 0x3, which is BH_Uptodate | BH_Dirty. IOWs, there's dirty buffers beyond EOF. Say what? OK, set_buffer_dirty() is called on all buffers from __set_page_buffers_dirty(), regardless of whether the buffer is beyond EOF or not, which means that when we get to ->writepage, we have buffers marked dirty beyond EOF that we need to clean. So, we need to implement our own .set_page_dirty method that doesn't dirty buffers beyond EOF. This is messy because the buffer code is not meant to be shared and it has interesting locking issues on the buffer dirty bits. So just copy and paste it and then modify it to suit what we need. Note: the solutions the other filesystems and generic block code use of marking the buffers clean in ->writepage does not work for XFS. It still leaves dirty buffers beyond EOF and invalidations still fail. Hence rather than play whack-a-mole, this patch simply prevents those buffers from being dirtied in the first place. cc: <stable@kernel.org> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/xfs_aops.c')
-rw-r--r--fs/xfs/xfs_aops.c61
1 files changed, 61 insertions, 0 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 11e9b4caa54f..b984647c24db 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1753,11 +1753,72 @@ xfs_vm_readpages(
1753 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1753 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
1754} 1754}
1755 1755
1756/*
1757 * This is basically a copy of __set_page_dirty_buffers() with one
1758 * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
1759 * dirty, we'll never be able to clean them because we don't write buffers
1760 * beyond EOF, and that means we can't invalidate pages that span EOF
1761 * that have been marked dirty. Further, the dirty state can leak into
1762 * the file interior if the file is extended, resulting in all sorts of
1763 * bad things happening as the state does not match the underlying data.
1764 *
1765 * XXX: this really indicates that bufferheads in XFS need to die. Warts like
1766 * this only exist because of bufferheads and how the generic code manages them.
1767 */
1768STATIC int
1769xfs_vm_set_page_dirty(
1770 struct page *page)
1771{
1772 struct address_space *mapping = page->mapping;
1773 struct inode *inode = mapping->host;
1774 loff_t end_offset;
1775 loff_t offset;
1776 int newly_dirty;
1777
1778 if (unlikely(!mapping))
1779 return !TestSetPageDirty(page);
1780
1781 end_offset = i_size_read(inode);
1782 offset = page_offset(page);
1783
1784 spin_lock(&mapping->private_lock);
1785 if (page_has_buffers(page)) {
1786 struct buffer_head *head = page_buffers(page);
1787 struct buffer_head *bh = head;
1788
1789 do {
1790 if (offset < end_offset)
1791 set_buffer_dirty(bh);
1792 bh = bh->b_this_page;
1793 offset += 1 << inode->i_blkbits;
1794 } while (bh != head);
1795 }
1796 newly_dirty = !TestSetPageDirty(page);
1797 spin_unlock(&mapping->private_lock);
1798
1799 if (newly_dirty) {
1800 /* sigh - __set_page_dirty() is static, so copy it here, too */
1801 unsigned long flags;
1802
1803 spin_lock_irqsave(&mapping->tree_lock, flags);
1804 if (page->mapping) { /* Race with truncate? */
1805 WARN_ON_ONCE(!PageUptodate(page));
1806 account_page_dirtied(page, mapping);
1807 radix_tree_tag_set(&mapping->page_tree,
1808 page_index(page), PAGECACHE_TAG_DIRTY);
1809 }
1810 spin_unlock_irqrestore(&mapping->tree_lock, flags);
1811 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
1812 }
1813 return newly_dirty;
1814}
1815
1756const struct address_space_operations xfs_address_space_operations = { 1816const struct address_space_operations xfs_address_space_operations = {
1757 .readpage = xfs_vm_readpage, 1817 .readpage = xfs_vm_readpage,
1758 .readpages = xfs_vm_readpages, 1818 .readpages = xfs_vm_readpages,
1759 .writepage = xfs_vm_writepage, 1819 .writepage = xfs_vm_writepage,
1760 .writepages = xfs_vm_writepages, 1820 .writepages = xfs_vm_writepages,
1821 .set_page_dirty = xfs_vm_set_page_dirty,
1761 .releasepage = xfs_vm_releasepage, 1822 .releasepage = xfs_vm_releasepage,
1762 .invalidatepage = xfs_vm_invalidatepage, 1823 .invalidatepage = xfs_vm_invalidatepage,
1763 .write_begin = xfs_vm_write_begin, 1824 .write_begin = xfs_vm_write_begin,