diff options
author | Jan Kara <jack@suse.cz> | 2013-06-04 11:58:58 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2013-06-04 11:58:58 -0400 |
commit | 97a851ed71cd9cc2542955e92a001c6ea3d21d35 (patch) | |
tree | 32a881fb66aa9058c4cdb3d279925a492f3689ca /fs/ext4/page-io.c | |
parent | 8af8eecc1331dbf5e8c662022272cf667e213da5 (diff) |
ext4: use io_end for multiple bios
Change writeback path to create just one io_end structure for the
extent to which we submit IO and share it among bios writing that
extent. This prevents needless splitting and joining of unwritten
extents when they cannot be submitted as a single bio.
Bugs in ENOMEM handling found by Linux File System Verification project
(linuxtesting.org) and fixed by Alexey Khoroshilov
<khoroshilov@ispras.ru>.
CC: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/page-io.c')
-rw-r--r-- | fs/ext4/page-io.c | 121 |
1 files changed, 76 insertions, 45 deletions
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 4acf1f78881b..19599bded62a 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -62,15 +62,28 @@ void ext4_ioend_shutdown(struct inode *inode) | |||
62 | cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); | 62 | cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); |
63 | } | 63 | } |
64 | 64 | ||
65 | void ext4_free_io_end(ext4_io_end_t *io) | 65 | static void ext4_release_io_end(ext4_io_end_t *io_end) |
66 | { | 66 | { |
67 | BUG_ON(!io); | 67 | BUG_ON(!list_empty(&io_end->list)); |
68 | BUG_ON(!list_empty(&io->list)); | 68 | BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); |
69 | BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); | 69 | |
70 | if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) | ||
71 | wake_up_all(ext4_ioend_wq(io_end->inode)); | ||
72 | if (io_end->flag & EXT4_IO_END_DIRECT) | ||
73 | inode_dio_done(io_end->inode); | ||
74 | if (io_end->iocb) | ||
75 | aio_complete(io_end->iocb, io_end->result, 0); | ||
76 | kmem_cache_free(io_end_cachep, io_end); | ||
77 | } | ||
78 | |||
79 | static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) | ||
80 | { | ||
81 | struct inode *inode = io_end->inode; | ||
70 | 82 | ||
71 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) | 83 | io_end->flag &= ~EXT4_IO_END_UNWRITTEN; |
72 | wake_up_all(ext4_ioend_wq(io->inode)); | 84 | /* Wake up anyone waiting on unwritten extent conversion */ |
73 | kmem_cache_free(io_end_cachep, io); | 85 | if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) |
86 | wake_up_all(ext4_ioend_wq(inode)); | ||
74 | } | 87 | } |
75 | 88 | ||
76 | /* check a range of space and convert unwritten extents to written. */ | 89 | /* check a range of space and convert unwritten extents to written. */ |
@@ -93,13 +106,8 @@ static int ext4_end_io(ext4_io_end_t *io) | |||
93 | "(inode %lu, offset %llu, size %zd, error %d)", | 106 | "(inode %lu, offset %llu, size %zd, error %d)", |
94 | inode->i_ino, offset, size, ret); | 107 | inode->i_ino, offset, size, ret); |
95 | } | 108 | } |
96 | /* Wake up anyone waiting on unwritten extent conversion */ | 109 | ext4_clear_io_unwritten_flag(io); |
97 | if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) | 110 | ext4_release_io_end(io); |
98 | wake_up_all(ext4_ioend_wq(inode)); | ||
99 | if (io->flag & EXT4_IO_END_DIRECT) | ||
100 | inode_dio_done(inode); | ||
101 | if (io->iocb) | ||
102 | aio_complete(io->iocb, io->result, 0); | ||
103 | return ret; | 111 | return ret; |
104 | } | 112 | } |
105 | 113 | ||
@@ -130,7 +138,7 @@ static void dump_completed_IO(struct inode *inode) | |||
130 | } | 138 | } |
131 | 139 | ||
132 | /* Add the io_end to per-inode completed end_io list. */ | 140 | /* Add the io_end to per-inode completed end_io list. */ |
133 | void ext4_add_complete_io(ext4_io_end_t *io_end) | 141 | static void ext4_add_complete_io(ext4_io_end_t *io_end) |
134 | { | 142 | { |
135 | struct ext4_inode_info *ei = EXT4_I(io_end->inode); | 143 | struct ext4_inode_info *ei = EXT4_I(io_end->inode); |
136 | struct workqueue_struct *wq; | 144 | struct workqueue_struct *wq; |
@@ -167,8 +175,6 @@ static int ext4_do_flush_completed_IO(struct inode *inode) | |||
167 | err = ext4_end_io(io); | 175 | err = ext4_end_io(io); |
168 | if (unlikely(!ret && err)) | 176 | if (unlikely(!ret && err)) |
169 | ret = err; | 177 | ret = err; |
170 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
171 | ext4_free_io_end(io); | ||
172 | } | 178 | } |
173 | return ret; | 179 | return ret; |
174 | } | 180 | } |
@@ -200,10 +206,43 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | |||
200 | atomic_inc(&EXT4_I(inode)->i_ioend_count); | 206 | atomic_inc(&EXT4_I(inode)->i_ioend_count); |
201 | io->inode = inode; | 207 | io->inode = inode; |
202 | INIT_LIST_HEAD(&io->list); | 208 | INIT_LIST_HEAD(&io->list); |
209 | atomic_set(&io->count, 1); | ||
203 | } | 210 | } |
204 | return io; | 211 | return io; |
205 | } | 212 | } |
206 | 213 | ||
214 | void ext4_put_io_end_defer(ext4_io_end_t *io_end) | ||
215 | { | ||
216 | if (atomic_dec_and_test(&io_end->count)) { | ||
217 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) { | ||
218 | ext4_release_io_end(io_end); | ||
219 | return; | ||
220 | } | ||
221 | ext4_add_complete_io(io_end); | ||
222 | } | ||
223 | } | ||
224 | |||
225 | int ext4_put_io_end(ext4_io_end_t *io_end) | ||
226 | { | ||
227 | int err = 0; | ||
228 | |||
229 | if (atomic_dec_and_test(&io_end->count)) { | ||
230 | if (io_end->flag & EXT4_IO_END_UNWRITTEN) { | ||
231 | err = ext4_convert_unwritten_extents(io_end->inode, | ||
232 | io_end->offset, io_end->size); | ||
233 | ext4_clear_io_unwritten_flag(io_end); | ||
234 | } | ||
235 | ext4_release_io_end(io_end); | ||
236 | } | ||
237 | return err; | ||
238 | } | ||
239 | |||
240 | ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) | ||
241 | { | ||
242 | atomic_inc(&io_end->count); | ||
243 | return io_end; | ||
244 | } | ||
245 | |||
207 | /* | 246 | /* |
208 | * Print an buffer I/O error compatible with the fs/buffer.c. This | 247 | * Print an buffer I/O error compatible with the fs/buffer.c. This |
209 | * provides compatibility with dmesg scrapers that look for a specific | 248 | * provides compatibility with dmesg scrapers that look for a specific |
@@ -286,12 +325,7 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
286 | bi_sector >> (inode->i_blkbits - 9)); | 325 | bi_sector >> (inode->i_blkbits - 9)); |
287 | } | 326 | } |
288 | 327 | ||
289 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 328 | ext4_put_io_end_defer(io_end); |
290 | ext4_free_io_end(io_end); | ||
291 | return; | ||
292 | } | ||
293 | |||
294 | ext4_add_complete_io(io_end); | ||
295 | } | 329 | } |
296 | 330 | ||
297 | void ext4_io_submit(struct ext4_io_submit *io) | 331 | void ext4_io_submit(struct ext4_io_submit *io) |
@@ -305,40 +339,37 @@ void ext4_io_submit(struct ext4_io_submit *io) | |||
305 | bio_put(io->io_bio); | 339 | bio_put(io->io_bio); |
306 | } | 340 | } |
307 | io->io_bio = NULL; | 341 | io->io_bio = NULL; |
308 | io->io_op = 0; | 342 | } |
343 | |||
344 | void ext4_io_submit_init(struct ext4_io_submit *io, | ||
345 | struct writeback_control *wbc) | ||
346 | { | ||
347 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
348 | io->io_bio = NULL; | ||
309 | io->io_end = NULL; | 349 | io->io_end = NULL; |
310 | } | 350 | } |
311 | 351 | ||
312 | static int io_submit_init(struct ext4_io_submit *io, | 352 | static int io_submit_init_bio(struct ext4_io_submit *io, |
313 | struct inode *inode, | 353 | struct buffer_head *bh) |
314 | struct writeback_control *wbc, | ||
315 | struct buffer_head *bh) | ||
316 | { | 354 | { |
317 | ext4_io_end_t *io_end; | ||
318 | struct page *page = bh->b_page; | ||
319 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | 355 | int nvecs = bio_get_nr_vecs(bh->b_bdev); |
320 | struct bio *bio; | 356 | struct bio *bio; |
321 | 357 | ||
322 | io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
323 | if (!io_end) | ||
324 | return -ENOMEM; | ||
325 | bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); | 358 | bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); |
326 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 359 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
327 | bio->bi_bdev = bh->b_bdev; | 360 | bio->bi_bdev = bh->b_bdev; |
328 | bio->bi_private = io->io_end = io_end; | ||
329 | bio->bi_end_io = ext4_end_bio; | 361 | bio->bi_end_io = ext4_end_bio; |
330 | 362 | bio->bi_private = ext4_get_io_end(io->io_end); | |
331 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); | 363 | if (!io->io_end->size) |
332 | 364 | io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT) | |
365 | + bh_offset(bh); | ||
333 | io->io_bio = bio; | 366 | io->io_bio = bio; |
334 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
335 | io->io_next_block = bh->b_blocknr; | 367 | io->io_next_block = bh->b_blocknr; |
336 | return 0; | 368 | return 0; |
337 | } | 369 | } |
338 | 370 | ||
339 | static int io_submit_add_bh(struct ext4_io_submit *io, | 371 | static int io_submit_add_bh(struct ext4_io_submit *io, |
340 | struct inode *inode, | 372 | struct inode *inode, |
341 | struct writeback_control *wbc, | ||
342 | struct buffer_head *bh) | 373 | struct buffer_head *bh) |
343 | { | 374 | { |
344 | ext4_io_end_t *io_end; | 375 | ext4_io_end_t *io_end; |
@@ -349,18 +380,18 @@ submit_and_retry: | |||
349 | ext4_io_submit(io); | 380 | ext4_io_submit(io); |
350 | } | 381 | } |
351 | if (io->io_bio == NULL) { | 382 | if (io->io_bio == NULL) { |
352 | ret = io_submit_init(io, inode, wbc, bh); | 383 | ret = io_submit_init_bio(io, bh); |
353 | if (ret) | 384 | if (ret) |
354 | return ret; | 385 | return ret; |
355 | } | 386 | } |
387 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | ||
388 | if (ret != bh->b_size) | ||
389 | goto submit_and_retry; | ||
356 | io_end = io->io_end; | 390 | io_end = io->io_end; |
357 | if (test_clear_buffer_uninit(bh)) | 391 | if (test_clear_buffer_uninit(bh)) |
358 | ext4_set_io_unwritten_flag(inode, io_end); | 392 | ext4_set_io_unwritten_flag(inode, io_end); |
359 | io->io_end->size += bh->b_size; | 393 | io_end->size += bh->b_size; |
360 | io->io_next_block++; | 394 | io->io_next_block++; |
361 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | ||
362 | if (ret != bh->b_size) | ||
363 | goto submit_and_retry; | ||
364 | return 0; | 395 | return 0; |
365 | } | 396 | } |
366 | 397 | ||
@@ -432,7 +463,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
432 | do { | 463 | do { |
433 | if (!buffer_async_write(bh)) | 464 | if (!buffer_async_write(bh)) |
434 | continue; | 465 | continue; |
435 | ret = io_submit_add_bh(io, inode, wbc, bh); | 466 | ret = io_submit_add_bh(io, inode, bh); |
436 | if (ret) { | 467 | if (ret) { |
437 | /* | 468 | /* |
438 | * We only get here on ENOMEM. Not much else | 469 | * We only get here on ENOMEM. Not much else |