xfs: open code end_buffer_async_write in xfs_finish_page_writeback

Our loop in xfs_finish_page_writeback, which iterates over all buffer heads in a page and then calls end_buffer_async_write, which also iterates over all buffers in the page to check if any I/O is in flight is not only inefficient, but also potentially dangerous as end_buffer_async_write can cause the page and all buffers to be freed. Replace it with a single loop that does the work of end_buffer_async_write on a per-page basis. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
author: Christoph Hellwig <hch@lst.de> 2017-09-02 12:53:41 -0400
committer: Darrick J. Wong <darrick.wong@oracle.com> 2017-09-03 13:40:45 -0400
commit: 8353a814f2518dcfa79a5bb77afd0e7dfa391bb1 (patch)
tree: c0acfb3c83991b4fba5b98a907a1cba0fe75310c
parent: dd60687ee541ca3f6df8758f38e6f22f57c42a37 (diff)
1 files changed, 47 insertions, 24 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 6bf120bb1a17..f9efd67f6fa1 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -85,11 +85,11 @@ xfs_find_bdev_for_inode(
 * associated buffer_heads, paying attention to the start and end offsets that
 * we need to process on the page.
 *
- * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last
+ * Note that we open code the action in end_buffer_async_write here so that we
- * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or
+ * only have to iterate over the buffers attached to the page once.  This is not
- * the page at all, as we may be racing with memory reclaim and it can free both
+ * only more efficient, but also ensures that we only calls end_page_writeback
- * the bufferhead chain and the page as it will see the page as clean and
+ * at the end of the iteration, and thus avoids the pitfall of having the page
- * unused.
+ * and buffers potentially freed after every call to end_buffer_async_write.
 */
 static void
 xfs_finish_page_writeback(
@@ -97,29 +97,44 @@ xfs_finish_page_writeback(
        struct bio_vec          *bvec,
        int                     error)
 {
-        unsigned int            end = bvec->bv_offset + bvec->bv_len - 1;
+        struct buffer_head      *head = page_buffers(bvec->bv_page), *bh = head;
-        struct buffer_head      *head, *bh, *next;
+        bool                    busy = false;
        unsigned int            off = 0;
-        unsigned int            bsize;
+        unsigned long           flags;
        ASSERT(bvec->bv_offset < PAGE_SIZE);
        ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);
-        ASSERT(end < PAGE_SIZE);
+        ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE);
        ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
-        bh = head = page_buffers(bvec->bv_page);
+        local_irq_save(flags);
+        bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
-        bsize = bh->b_size;
        do {
-                if (off > end)
+                if (off >= bvec->bv_offset &&
-                        break;
+                    off < bvec->bv_offset + bvec->bv_len) {
-                next = bh->b_this_page;
+                        ASSERT(buffer_async_write(bh));
-                if (off < bvec->bv_offset)
+                        ASSERT(bh->b_end_io == NULL);
-                        goto next_bh;
-                bh->b_end_io(bh, !error);
+                        if (error) {
-next_bh:
+                                mark_buffer_write_io_error(bh);
-                off += bsize;
+                                clear_buffer_uptodate(bh);
-        } while ((bh = next) != head);
+                                SetPageError(bvec->bv_page);
+                        } else {
+                                set_buffer_uptodate(bh);
+                        }
+                        clear_buffer_async_write(bh);
+                        unlock_buffer(bh);
+                } else if (buffer_async_write(bh)) {
+                        ASSERT(buffer_locked(bh));
+                        busy = true;
+                }
+                off += bh->b_size;
+        } while ((bh = bh->b_this_page) != head);
+        bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
+        local_irq_restore(flags);
+        if (!busy)
+                end_page_writeback(bvec->bv_page);
 }
 /*
@@ -133,8 +148,10 @@ xfs_destroy_ioend(
        int                     error)
 {
        struct inode            *inode = ioend->io_inode;
-        struct bio              *last = ioend->io_bio;
+        struct bio              *bio = &ioend->io_inline_bio;
-        struct bio              *bio, *next;
+        struct bio              *last = ioend->io_bio, *next;
+        u64                     start = bio->bi_iter.bi_sector;
+        bool                    quiet = bio_flagged(bio, BIO_QUIET);
        for (bio = &ioend->io_inline_bio; bio; bio = next) {
                struct bio_vec  *bvec;
@@ -155,6 +172,11 @@ xfs_destroy_ioend(
                bio_put(bio);
        }
+        if (unlikely(error && !quiet)) {
+                xfs_err_ratelimited(XFS_I(inode)->i_mount,
+                        "writeback error on sector %llu", start);
+        }
 }
 /*
@@ -423,7 +445,8 @@ xfs_start_buffer_writeback(
        ASSERT(!buffer_delay(bh));
        ASSERT(!buffer_unwritten(bh));
-        mark_buffer_async_write(bh);
+        bh->b_end_io = NULL;
+        set_buffer_async_write(bh);
        set_buffer_uptodate(bh);
        clear_buffer_dirty(bh);
 }
author	Christoph Hellwig <hch@lst.de>	2017-09-02 12:53:41 -0400
committer	Darrick J. Wong <darrick.wong@oracle.com>	2017-09-03 13:40:45 -0400
commit	8353a814f2518dcfa79a5bb77afd0e7dfa391bb1 (patch)
tree	c0acfb3c83991b4fba5b98a907a1cba0fe75310c
parent	dd60687ee541ca3f6df8758f38e6f22f57c42a37 (diff)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 6bf120bb1a17..f9efd67f6fa1 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c
@@ -85,11 +85,11 @@ xfs_find_bdev_for_inode(
85	* associated buffer_heads, paying attention to the start and end offsets that	85	* associated buffer_heads, paying attention to the start and end offsets that
86	* we need to process on the page.	86	* we need to process on the page.
87	*	87	*
88	* Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last	88	* Note that we open code the action in end_buffer_async_write here so that we
89	* buffer in the IO. Once it does this, it is unsafe to access the bufferhead or	89	* only have to iterate over the buffers attached to the page once. This is not
90	* the page at all, as we may be racing with memory reclaim and it can free both	90	* only more efficient, but also ensures that we only calls end_page_writeback
91	* the bufferhead chain and the page as it will see the page as clean and	91	* at the end of the iteration, and thus avoids the pitfall of having the page
92	* unused.	92	* and buffers potentially freed after every call to end_buffer_async_write.
93	*/	93	*/
94	static void	94	static void
95	xfs_finish_page_writeback(	95	xfs_finish_page_writeback(
@@ -97,29 +97,44 @@ xfs_finish_page_writeback(
97	struct bio_vec *bvec,	97	struct bio_vec *bvec,
98	int error)	98	int error)
99	{	99	{
100	unsigned int end = bvec->bv_offset + bvec->bv_len - 1;	100	struct buffer_head head = page_buffers(bvec->bv_page), bh = head;
101	struct buffer_head head, bh, *next;	101	bool busy = false;
102	unsigned int off = 0;	102	unsigned int off = 0;
103	unsigned int bsize;	103	unsigned long flags;
104		104
105	ASSERT(bvec->bv_offset < PAGE_SIZE);	105	ASSERT(bvec->bv_offset < PAGE_SIZE);
106	ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);	106	ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);
107	ASSERT(end < PAGE_SIZE);	107	ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE);
108	ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);	108	ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
109		109
110	bh = head = page_buffers(bvec->bv_page);	110	local_irq_save(flags);
111		111	bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
112	bsize = bh->b_size;
113	do {	112	do {
114	if (off > end)	113	if (off >= bvec->bv_offset &&
115	break;	114	off < bvec->bv_offset + bvec->bv_len) {
116	next = bh->b_this_page;	115	ASSERT(buffer_async_write(bh));
117	if (off < bvec->bv_offset)	116	ASSERT(bh->b_end_io == NULL);
118	goto next_bh;	117
119	bh->b_end_io(bh, !error);	118	if (error) {
120	next_bh:	119	mark_buffer_write_io_error(bh);
121	off += bsize;	120	clear_buffer_uptodate(bh);
122	} while ((bh = next) != head);	121	SetPageError(bvec->bv_page);
		122	} else {
		123	set_buffer_uptodate(bh);
		124	}
		125	clear_buffer_async_write(bh);
		126	unlock_buffer(bh);
		127	} else if (buffer_async_write(bh)) {
		128	ASSERT(buffer_locked(bh));
		129	busy = true;
		130	}
		131	off += bh->b_size;
		132	} while ((bh = bh->b_this_page) != head);
		133	bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
		134	local_irq_restore(flags);
		135
		136	if (!busy)
		137	end_page_writeback(bvec->bv_page);
123	}	138	}
124		139
125	/*	140	/*
@@ -133,8 +148,10 @@ xfs_destroy_ioend(
133	int error)	148	int error)
134	{	149	{
135	struct inode *inode = ioend->io_inode;	150	struct inode *inode = ioend->io_inode;
136	struct bio *last = ioend->io_bio;	151	struct bio *bio = &ioend->io_inline_bio;
137	struct bio bio, next;	152	struct bio last = ioend->io_bio, next;
		153	u64 start = bio->bi_iter.bi_sector;
		154	bool quiet = bio_flagged(bio, BIO_QUIET);
138		155
139	for (bio = &ioend->io_inline_bio; bio; bio = next) {	156	for (bio = &ioend->io_inline_bio; bio; bio = next) {
140	struct bio_vec *bvec;	157	struct bio_vec *bvec;
@@ -155,6 +172,11 @@ xfs_destroy_ioend(
155		172
156	bio_put(bio);	173	bio_put(bio);
157	}	174	}
		175
		176	if (unlikely(error && !quiet)) {
		177	xfs_err_ratelimited(XFS_I(inode)->i_mount,
		178	"writeback error on sector %llu", start);
		179	}
158	}	180	}
159		181
160	/*	182	/*
@@ -423,7 +445,8 @@ xfs_start_buffer_writeback(
423	ASSERT(!buffer_delay(bh));	445	ASSERT(!buffer_delay(bh));
424	ASSERT(!buffer_unwritten(bh));	446	ASSERT(!buffer_unwritten(bh));
425		447
426	mark_buffer_async_write(bh);	448	bh->b_end_io = NULL;
		449	set_buffer_async_write(bh);
427	set_buffer_uptodate(bh);	450	set_buffer_uptodate(bh);
428	clear_buffer_dirty(bh);	451	clear_buffer_dirty(bh);
429	}	452	}