aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2017-09-02 12:53:41 -0400
committerDarrick J. Wong <darrick.wong@oracle.com>2017-09-03 13:40:45 -0400
commit8353a814f2518dcfa79a5bb77afd0e7dfa391bb1 (patch)
treec0acfb3c83991b4fba5b98a907a1cba0fe75310c
parentdd60687ee541ca3f6df8758f38e6f22f57c42a37 (diff)
xfs: open code end_buffer_async_write in xfs_finish_page_writeback
Our loop in xfs_finish_page_writeback, which iterates over all buffer heads in a page and then calls end_buffer_async_write, which also iterates over all buffers in the page to check if any I/O is in flight is not only inefficient, but also potentially dangerous as end_buffer_async_write can cause the page and all buffers to be freed. Replace it with a single loop that does the work of end_buffer_async_write on a per-page basis. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/xfs_aops.c71
1 files changed, 47 insertions, 24 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 6bf120bb1a17..f9efd67f6fa1 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -85,11 +85,11 @@ xfs_find_bdev_for_inode(
85 * associated buffer_heads, paying attention to the start and end offsets that 85 * associated buffer_heads, paying attention to the start and end offsets that
86 * we need to process on the page. 86 * we need to process on the page.
87 * 87 *
88 * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last 88 * Note that we open code the action in end_buffer_async_write here so that we
89 * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or 89 * only have to iterate over the buffers attached to the page once. This is not
90 * the page at all, as we may be racing with memory reclaim and it can free both 90 * only more efficient, but also ensures that we only calls end_page_writeback
91 * the bufferhead chain and the page as it will see the page as clean and 91 * at the end of the iteration, and thus avoids the pitfall of having the page
92 * unused. 92 * and buffers potentially freed after every call to end_buffer_async_write.
93 */ 93 */
94static void 94static void
95xfs_finish_page_writeback( 95xfs_finish_page_writeback(
@@ -97,29 +97,44 @@ xfs_finish_page_writeback(
97 struct bio_vec *bvec, 97 struct bio_vec *bvec,
98 int error) 98 int error)
99{ 99{
100 unsigned int end = bvec->bv_offset + bvec->bv_len - 1; 100 struct buffer_head *head = page_buffers(bvec->bv_page), *bh = head;
101 struct buffer_head *head, *bh, *next; 101 bool busy = false;
102 unsigned int off = 0; 102 unsigned int off = 0;
103 unsigned int bsize; 103 unsigned long flags;
104 104
105 ASSERT(bvec->bv_offset < PAGE_SIZE); 105 ASSERT(bvec->bv_offset < PAGE_SIZE);
106 ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0); 106 ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);
107 ASSERT(end < PAGE_SIZE); 107 ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE);
108 ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0); 108 ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
109 109
110 bh = head = page_buffers(bvec->bv_page); 110 local_irq_save(flags);
111 111 bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
112 bsize = bh->b_size;
113 do { 112 do {
114 if (off > end) 113 if (off >= bvec->bv_offset &&
115 break; 114 off < bvec->bv_offset + bvec->bv_len) {
116 next = bh->b_this_page; 115 ASSERT(buffer_async_write(bh));
117 if (off < bvec->bv_offset) 116 ASSERT(bh->b_end_io == NULL);
118 goto next_bh; 117
119 bh->b_end_io(bh, !error); 118 if (error) {
120next_bh: 119 mark_buffer_write_io_error(bh);
121 off += bsize; 120 clear_buffer_uptodate(bh);
122 } while ((bh = next) != head); 121 SetPageError(bvec->bv_page);
122 } else {
123 set_buffer_uptodate(bh);
124 }
125 clear_buffer_async_write(bh);
126 unlock_buffer(bh);
127 } else if (buffer_async_write(bh)) {
128 ASSERT(buffer_locked(bh));
129 busy = true;
130 }
131 off += bh->b_size;
132 } while ((bh = bh->b_this_page) != head);
133 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
134 local_irq_restore(flags);
135
136 if (!busy)
137 end_page_writeback(bvec->bv_page);
123} 138}
124 139
125/* 140/*
@@ -133,8 +148,10 @@ xfs_destroy_ioend(
133 int error) 148 int error)
134{ 149{
135 struct inode *inode = ioend->io_inode; 150 struct inode *inode = ioend->io_inode;
136 struct bio *last = ioend->io_bio; 151 struct bio *bio = &ioend->io_inline_bio;
137 struct bio *bio, *next; 152 struct bio *last = ioend->io_bio, *next;
153 u64 start = bio->bi_iter.bi_sector;
154 bool quiet = bio_flagged(bio, BIO_QUIET);
138 155
139 for (bio = &ioend->io_inline_bio; bio; bio = next) { 156 for (bio = &ioend->io_inline_bio; bio; bio = next) {
140 struct bio_vec *bvec; 157 struct bio_vec *bvec;
@@ -155,6 +172,11 @@ xfs_destroy_ioend(
155 172
156 bio_put(bio); 173 bio_put(bio);
157 } 174 }
175
176 if (unlikely(error && !quiet)) {
177 xfs_err_ratelimited(XFS_I(inode)->i_mount,
178 "writeback error on sector %llu", start);
179 }
158} 180}
159 181
160/* 182/*
@@ -423,7 +445,8 @@ xfs_start_buffer_writeback(
423 ASSERT(!buffer_delay(bh)); 445 ASSERT(!buffer_delay(bh));
424 ASSERT(!buffer_unwritten(bh)); 446 ASSERT(!buffer_unwritten(bh));
425 447
426 mark_buffer_async_write(bh); 448 bh->b_end_io = NULL;
449 set_buffer_async_write(bh);
427 set_buffer_uptodate(bh); 450 set_buffer_uptodate(bh);
428 clear_buffer_dirty(bh); 451 clear_buffer_dirty(bh);
429} 452}