aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_aops.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2014-03-07 00:19:14 -0500
committerDave Chinner <david@fromorbit.com>2014-03-07 00:19:14 -0500
commita49935f200e24e95fffcc705014c4b60ad78ff1f (patch)
treed6d42597cc8f9e4a48cd41005307ceda143f3cfd /fs/xfs/xfs_aops.c
parente480a7239723afe579060239564884d1fa4c9325 (diff)
xfs: xfs_check_page_type buffer checks need help
xfs_aops_discard_page() was introduced in the following commit: xfs: truncate delalloc extents when IO fails in writeback ... to clean up left over delalloc ranges after I/O failure in ->writepage(). generic/224 tests for this scenario and occasionally reproduces panics on sub-4k blocksize filesystems. The cause of this is failure to clean up the delalloc range on a page where the first buffer does not match one of the expected states of xfs_check_page_type(). If a buffer is not unwritten, delayed or dirty&mapped, xfs_check_page_type() stops and immediately returns 0. The stress test of generic/224 creates a scenario where the first several buffers of a page with delayed buffers are mapped & uptodate and some subsequent buffer is delayed. If the ->writepage() happens to fail for this page, xfs_aops_discard_page() incorrectly skips the entire page. This then causes later failures either when direct IO maps the range and finds the stale delayed buffer, or we evict the inode and find that the inode still has a delayed block reservation accounted to it. We can easily fix this xfs_aops_discard_page() failure by making xfs_check_page_type() check all buffers, but this breaks xfs_convert_page() more than it is already broken. Indeed, xfs_convert_page() wants xfs_check_page_type() to tell it if the first buffers on the pages are of a type that can be aggregated into the contiguous IO that is already being built. xfs_convert_page() should not be writing random buffers out of a page, but the current behaviour will cause it to do so if there are buffers that don't match the current specification on the page. Hence for xfs_convert_page() we need to: a) return "not ok" if the first buffer on the page does not match the specification provided to we don't write anything; and b) abort it's buffer-add-to-io loop the moment we come across a buffer that does not match the specification. Hence we need to fix both xfs_check_page_type() and xfs_convert_page() to work correctly with pages that have mixed buffer types, whilst allowing xfs_aops_discard_page() to scan all buffers on the page for a type match. Reported-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/xfs_aops.c')
-rw-r--r--fs/xfs/xfs_aops.c81
1 files changed, 50 insertions, 31 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index db2cfb067d0b..5935cce8c26c 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -632,38 +632,46 @@ xfs_map_at_offset(
632} 632}
633 633
634/* 634/*
635 * Test if a given page is suitable for writing as part of an unwritten 635 * Test if a given page contains at least one buffer of a given @type.
636 * or delayed allocate extent. 636 * If @check_all_buffers is true, then we walk all the buffers in the page to
637 * try to find one of the type passed in. If it is not set, then the caller only
638 * needs to check the first buffer on the page for a match.
637 */ 639 */
638STATIC int 640STATIC bool
639xfs_check_page_type( 641xfs_check_page_type(
640 struct page *page, 642 struct page *page,
641 unsigned int type) 643 unsigned int type,
644 bool check_all_buffers)
642{ 645{
643 if (PageWriteback(page)) 646 struct buffer_head *bh;
644 return 0; 647 struct buffer_head *head;
645 648
646 if (page->mapping && page_has_buffers(page)) { 649 if (PageWriteback(page))
647 struct buffer_head *bh, *head; 650 return false;
648 int acceptable = 0; 651 if (!page->mapping)
652 return false;
653 if (!page_has_buffers(page))
654 return false;
649 655
650 bh = head = page_buffers(page); 656 bh = head = page_buffers(page);
651 do { 657 do {
652 if (buffer_unwritten(bh)) 658 if (buffer_unwritten(bh)) {
653 acceptable += (type == XFS_IO_UNWRITTEN); 659 if (type == XFS_IO_UNWRITTEN)
654 else if (buffer_delay(bh)) 660 return true;
655 acceptable += (type == XFS_IO_DELALLOC); 661 } else if (buffer_delay(bh)) {
656 else if (buffer_dirty(bh) && buffer_mapped(bh)) 662 if (type == XFS_IO_DELALLOC);
657 acceptable += (type == XFS_IO_OVERWRITE); 663 return true;
658 else 664 } else if (buffer_dirty(bh) && buffer_mapped(bh)) {
659 break; 665 if (type == XFS_IO_OVERWRITE);
660 } while ((bh = bh->b_this_page) != head); 666 return true;
667 }
661 668
662 if (acceptable) 669 /* If we are only checking the first buffer, we are done now. */
663 return 1; 670 if (!check_all_buffers)
664 } 671 break;
672 } while ((bh = bh->b_this_page) != head);
665 673
666 return 0; 674 return false;
667} 675}
668 676
669/* 677/*
@@ -697,7 +705,7 @@ xfs_convert_page(
697 goto fail_unlock_page; 705 goto fail_unlock_page;
698 if (page->mapping != inode->i_mapping) 706 if (page->mapping != inode->i_mapping)
699 goto fail_unlock_page; 707 goto fail_unlock_page;
700 if (!xfs_check_page_type(page, (*ioendp)->io_type)) 708 if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
701 goto fail_unlock_page; 709 goto fail_unlock_page;
702 710
703 /* 711 /*
@@ -742,6 +750,15 @@ xfs_convert_page(
742 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; 750 p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
743 page_dirty = p_offset / len; 751 page_dirty = p_offset / len;
744 752
753 /*
754 * The moment we find a buffer that doesn't match our current type
755 * specification or can't be written, abort the loop and start
756 * writeback. As per the above xfs_imap_valid() check, only
757 * xfs_vm_writepage() can handle partial page writeback fully - we are
758 * limited here to the buffers that are contiguous with the current
759 * ioend, and hence a buffer we can't write breaks that contiguity and
760 * we have to defer the rest of the IO to xfs_vm_writepage().
761 */
745 bh = head = page_buffers(page); 762 bh = head = page_buffers(page);
746 do { 763 do {
747 if (offset >= end_offset) 764 if (offset >= end_offset)
@@ -750,7 +767,7 @@ xfs_convert_page(
750 uptodate = 0; 767 uptodate = 0;
751 if (!(PageUptodate(page) || buffer_uptodate(bh))) { 768 if (!(PageUptodate(page) || buffer_uptodate(bh))) {
752 done = 1; 769 done = 1;
753 continue; 770 break;
754 } 771 }
755 772
756 if (buffer_unwritten(bh) || buffer_delay(bh) || 773 if (buffer_unwritten(bh) || buffer_delay(bh) ||
@@ -762,10 +779,11 @@ xfs_convert_page(
762 else 779 else
763 type = XFS_IO_OVERWRITE; 780 type = XFS_IO_OVERWRITE;
764 781
765 if (!xfs_imap_valid(inode, imap, offset)) { 782 /*
766 done = 1; 783 * imap should always be valid because of the above
767 continue; 784 * partial page end_offset check on the imap.
768 } 785 */
786 ASSERT(xfs_imap_valid(inode, imap, offset));
769 787
770 lock_buffer(bh); 788 lock_buffer(bh);
771 if (type != XFS_IO_OVERWRITE) 789 if (type != XFS_IO_OVERWRITE)
@@ -777,6 +795,7 @@ xfs_convert_page(
777 count++; 795 count++;
778 } else { 796 } else {
779 done = 1; 797 done = 1;
798 break;
780 } 799 }
781 } while (offset += len, (bh = bh->b_this_page) != head); 800 } while (offset += len, (bh = bh->b_this_page) != head);
782 801
@@ -868,7 +887,7 @@ xfs_aops_discard_page(
868 struct buffer_head *bh, *head; 887 struct buffer_head *bh, *head;
869 loff_t offset = page_offset(page); 888 loff_t offset = page_offset(page);
870 889
871 if (!xfs_check_page_type(page, XFS_IO_DELALLOC)) 890 if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true))
872 goto out_invalidate; 891 goto out_invalidate;
873 892
874 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 893 if (XFS_FORCED_SHUTDOWN(ip->i_mount))