diff options
author | Christoph Hellwig <hch@lst.de> | 2017-09-02 12:53:41 -0400 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2017-09-03 13:40:45 -0400 |
commit | 8353a814f2518dcfa79a5bb77afd0e7dfa391bb1 (patch) | |
tree | c0acfb3c83991b4fba5b98a907a1cba0fe75310c | |
parent | dd60687ee541ca3f6df8758f38e6f22f57c42a37 (diff) |
xfs: open code end_buffer_async_write in xfs_finish_page_writeback
Our loop in xfs_finish_page_writeback, which iterates over all buffer
heads in a page and then calls end_buffer_async_write, which also
iterates over all buffers in the page to check if any I/O is in flight
is not only inefficient, but also potentially dangerous as
end_buffer_async_write can cause the page and all buffers to be freed.
Replace it with a single loop that does the work of end_buffer_async_write
on a per-page basis.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r-- | fs/xfs/xfs_aops.c | 71 |
1 files changed, 47 insertions, 24 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 6bf120bb1a17..f9efd67f6fa1 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -85,11 +85,11 @@ xfs_find_bdev_for_inode( | |||
85 | * associated buffer_heads, paying attention to the start and end offsets that | 85 | * associated buffer_heads, paying attention to the start and end offsets that |
86 | * we need to process on the page. | 86 | * we need to process on the page. |
87 | * | 87 | * |
88 | * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last | 88 | * Note that we open code the action in end_buffer_async_write here so that we |
89 | * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or | 89 | * only have to iterate over the buffers attached to the page once. This is not |
90 | * the page at all, as we may be racing with memory reclaim and it can free both | 90 | * only more efficient, but also ensures that we only calls end_page_writeback |
91 | * the bufferhead chain and the page as it will see the page as clean and | 91 | * at the end of the iteration, and thus avoids the pitfall of having the page |
92 | * unused. | 92 | * and buffers potentially freed after every call to end_buffer_async_write. |
93 | */ | 93 | */ |
94 | static void | 94 | static void |
95 | xfs_finish_page_writeback( | 95 | xfs_finish_page_writeback( |
@@ -97,29 +97,44 @@ xfs_finish_page_writeback( | |||
97 | struct bio_vec *bvec, | 97 | struct bio_vec *bvec, |
98 | int error) | 98 | int error) |
99 | { | 99 | { |
100 | unsigned int end = bvec->bv_offset + bvec->bv_len - 1; | 100 | struct buffer_head *head = page_buffers(bvec->bv_page), *bh = head; |
101 | struct buffer_head *head, *bh, *next; | 101 | bool busy = false; |
102 | unsigned int off = 0; | 102 | unsigned int off = 0; |
103 | unsigned int bsize; | 103 | unsigned long flags; |
104 | 104 | ||
105 | ASSERT(bvec->bv_offset < PAGE_SIZE); | 105 | ASSERT(bvec->bv_offset < PAGE_SIZE); |
106 | ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0); | 106 | ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0); |
107 | ASSERT(end < PAGE_SIZE); | 107 | ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE); |
108 | ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0); | 108 | ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0); |
109 | 109 | ||
110 | bh = head = page_buffers(bvec->bv_page); | 110 | local_irq_save(flags); |
111 | 111 | bit_spin_lock(BH_Uptodate_Lock, &head->b_state); | |
112 | bsize = bh->b_size; | ||
113 | do { | 112 | do { |
114 | if (off > end) | 113 | if (off >= bvec->bv_offset && |
115 | break; | 114 | off < bvec->bv_offset + bvec->bv_len) { |
116 | next = bh->b_this_page; | 115 | ASSERT(buffer_async_write(bh)); |
117 | if (off < bvec->bv_offset) | 116 | ASSERT(bh->b_end_io == NULL); |
118 | goto next_bh; | 117 | |
119 | bh->b_end_io(bh, !error); | 118 | if (error) { |
120 | next_bh: | 119 | mark_buffer_write_io_error(bh); |
121 | off += bsize; | 120 | clear_buffer_uptodate(bh); |
122 | } while ((bh = next) != head); | 121 | SetPageError(bvec->bv_page); |
122 | } else { | ||
123 | set_buffer_uptodate(bh); | ||
124 | } | ||
125 | clear_buffer_async_write(bh); | ||
126 | unlock_buffer(bh); | ||
127 | } else if (buffer_async_write(bh)) { | ||
128 | ASSERT(buffer_locked(bh)); | ||
129 | busy = true; | ||
130 | } | ||
131 | off += bh->b_size; | ||
132 | } while ((bh = bh->b_this_page) != head); | ||
133 | bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); | ||
134 | local_irq_restore(flags); | ||
135 | |||
136 | if (!busy) | ||
137 | end_page_writeback(bvec->bv_page); | ||
123 | } | 138 | } |
124 | 139 | ||
125 | /* | 140 | /* |
@@ -133,8 +148,10 @@ xfs_destroy_ioend( | |||
133 | int error) | 148 | int error) |
134 | { | 149 | { |
135 | struct inode *inode = ioend->io_inode; | 150 | struct inode *inode = ioend->io_inode; |
136 | struct bio *last = ioend->io_bio; | 151 | struct bio *bio = &ioend->io_inline_bio; |
137 | struct bio *bio, *next; | 152 | struct bio *last = ioend->io_bio, *next; |
153 | u64 start = bio->bi_iter.bi_sector; | ||
154 | bool quiet = bio_flagged(bio, BIO_QUIET); | ||
138 | 155 | ||
139 | for (bio = &ioend->io_inline_bio; bio; bio = next) { | 156 | for (bio = &ioend->io_inline_bio; bio; bio = next) { |
140 | struct bio_vec *bvec; | 157 | struct bio_vec *bvec; |
@@ -155,6 +172,11 @@ xfs_destroy_ioend( | |||
155 | 172 | ||
156 | bio_put(bio); | 173 | bio_put(bio); |
157 | } | 174 | } |
175 | |||
176 | if (unlikely(error && !quiet)) { | ||
177 | xfs_err_ratelimited(XFS_I(inode)->i_mount, | ||
178 | "writeback error on sector %llu", start); | ||
179 | } | ||
158 | } | 180 | } |
159 | 181 | ||
160 | /* | 182 | /* |
@@ -423,7 +445,8 @@ xfs_start_buffer_writeback( | |||
423 | ASSERT(!buffer_delay(bh)); | 445 | ASSERT(!buffer_delay(bh)); |
424 | ASSERT(!buffer_unwritten(bh)); | 446 | ASSERT(!buffer_unwritten(bh)); |
425 | 447 | ||
426 | mark_buffer_async_write(bh); | 448 | bh->b_end_io = NULL; |
449 | set_buffer_async_write(bh); | ||
427 | set_buffer_uptodate(bh); | 450 | set_buffer_uptodate(bh); |
428 | clear_buffer_dirty(bh); | 451 | clear_buffer_dirty(bh); |
429 | } | 452 | } |