aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/page-io.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2013-06-04 11:58:58 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-06-04 11:58:58 -0400
commit97a851ed71cd9cc2542955e92a001c6ea3d21d35 (patch)
tree32a881fb66aa9058c4cdb3d279925a492f3689ca /fs/ext4/page-io.c
parent8af8eecc1331dbf5e8c662022272cf667e213da5 (diff)
ext4: use io_end for multiple bios
Change writeback path to create just one io_end structure for the extent to which we submit IO and share it among bios writing that extent. This prevents needless splitting and joining of unwritten extents when they cannot be submitted as a single bio. Bugs in ENOMEM handling found by Linux File System Verification project (linuxtesting.org) and fixed by Alexey Khoroshilov <khoroshilov@ispras.ru>. CC: Alexey Khoroshilov <khoroshilov@ispras.ru> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/page-io.c')
-rw-r--r--fs/ext4/page-io.c121
1 files changed, 76 insertions, 45 deletions
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 4acf1f78881b..19599bded62a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -62,15 +62,28 @@ void ext4_ioend_shutdown(struct inode *inode)
62 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); 62 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
63} 63}
64 64
65void ext4_free_io_end(ext4_io_end_t *io) 65static void ext4_release_io_end(ext4_io_end_t *io_end)
66{ 66{
67 BUG_ON(!io); 67 BUG_ON(!list_empty(&io_end->list));
68 BUG_ON(!list_empty(&io->list)); 68 BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
69 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); 69
70 if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
71 wake_up_all(ext4_ioend_wq(io_end->inode));
72 if (io_end->flag & EXT4_IO_END_DIRECT)
73 inode_dio_done(io_end->inode);
74 if (io_end->iocb)
75 aio_complete(io_end->iocb, io_end->result, 0);
76 kmem_cache_free(io_end_cachep, io_end);
77}
78
79static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
80{
81 struct inode *inode = io_end->inode;
70 82
71 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) 83 io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
72 wake_up_all(ext4_ioend_wq(io->inode)); 84 /* Wake up anyone waiting on unwritten extent conversion */
73 kmem_cache_free(io_end_cachep, io); 85 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
86 wake_up_all(ext4_ioend_wq(inode));
74} 87}
75 88
76/* check a range of space and convert unwritten extents to written. */ 89/* check a range of space and convert unwritten extents to written. */
@@ -93,13 +106,8 @@ static int ext4_end_io(ext4_io_end_t *io)
93 "(inode %lu, offset %llu, size %zd, error %d)", 106 "(inode %lu, offset %llu, size %zd, error %d)",
94 inode->i_ino, offset, size, ret); 107 inode->i_ino, offset, size, ret);
95 } 108 }
96 /* Wake up anyone waiting on unwritten extent conversion */ 109 ext4_clear_io_unwritten_flag(io);
97 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) 110 ext4_release_io_end(io);
98 wake_up_all(ext4_ioend_wq(inode));
99 if (io->flag & EXT4_IO_END_DIRECT)
100 inode_dio_done(inode);
101 if (io->iocb)
102 aio_complete(io->iocb, io->result, 0);
103 return ret; 111 return ret;
104} 112}
105 113
@@ -130,7 +138,7 @@ static void dump_completed_IO(struct inode *inode)
130} 138}
131 139
132/* Add the io_end to per-inode completed end_io list. */ 140/* Add the io_end to per-inode completed end_io list. */
133void ext4_add_complete_io(ext4_io_end_t *io_end) 141static void ext4_add_complete_io(ext4_io_end_t *io_end)
134{ 142{
135 struct ext4_inode_info *ei = EXT4_I(io_end->inode); 143 struct ext4_inode_info *ei = EXT4_I(io_end->inode);
136 struct workqueue_struct *wq; 144 struct workqueue_struct *wq;
@@ -167,8 +175,6 @@ static int ext4_do_flush_completed_IO(struct inode *inode)
167 err = ext4_end_io(io); 175 err = ext4_end_io(io);
168 if (unlikely(!ret && err)) 176 if (unlikely(!ret && err))
169 ret = err; 177 ret = err;
170 io->flag &= ~EXT4_IO_END_UNWRITTEN;
171 ext4_free_io_end(io);
172 } 178 }
173 return ret; 179 return ret;
174} 180}
@@ -200,10 +206,43 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
200 atomic_inc(&EXT4_I(inode)->i_ioend_count); 206 atomic_inc(&EXT4_I(inode)->i_ioend_count);
201 io->inode = inode; 207 io->inode = inode;
202 INIT_LIST_HEAD(&io->list); 208 INIT_LIST_HEAD(&io->list);
209 atomic_set(&io->count, 1);
203 } 210 }
204 return io; 211 return io;
205} 212}
206 213
214void ext4_put_io_end_defer(ext4_io_end_t *io_end)
215{
216 if (atomic_dec_and_test(&io_end->count)) {
217 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
218 ext4_release_io_end(io_end);
219 return;
220 }
221 ext4_add_complete_io(io_end);
222 }
223}
224
225int ext4_put_io_end(ext4_io_end_t *io_end)
226{
227 int err = 0;
228
229 if (atomic_dec_and_test(&io_end->count)) {
230 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
231 err = ext4_convert_unwritten_extents(io_end->inode,
232 io_end->offset, io_end->size);
233 ext4_clear_io_unwritten_flag(io_end);
234 }
235 ext4_release_io_end(io_end);
236 }
237 return err;
238}
239
240ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
241{
242 atomic_inc(&io_end->count);
243 return io_end;
244}
245
207/* 246/*
208 * Print an buffer I/O error compatible with the fs/buffer.c. This 247 * Print an buffer I/O error compatible with the fs/buffer.c. This
209 * provides compatibility with dmesg scrapers that look for a specific 248 * provides compatibility with dmesg scrapers that look for a specific
@@ -286,12 +325,7 @@ static void ext4_end_bio(struct bio *bio, int error)
286 bi_sector >> (inode->i_blkbits - 9)); 325 bi_sector >> (inode->i_blkbits - 9));
287 } 326 }
288 327
289 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 328 ext4_put_io_end_defer(io_end);
290 ext4_free_io_end(io_end);
291 return;
292 }
293
294 ext4_add_complete_io(io_end);
295} 329}
296 330
297void ext4_io_submit(struct ext4_io_submit *io) 331void ext4_io_submit(struct ext4_io_submit *io)
@@ -305,40 +339,37 @@ void ext4_io_submit(struct ext4_io_submit *io)
305 bio_put(io->io_bio); 339 bio_put(io->io_bio);
306 } 340 }
307 io->io_bio = NULL; 341 io->io_bio = NULL;
308 io->io_op = 0; 342}
343
344void ext4_io_submit_init(struct ext4_io_submit *io,
345 struct writeback_control *wbc)
346{
347 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
348 io->io_bio = NULL;
309 io->io_end = NULL; 349 io->io_end = NULL;
310} 350}
311 351
312static int io_submit_init(struct ext4_io_submit *io, 352static int io_submit_init_bio(struct ext4_io_submit *io,
313 struct inode *inode, 353 struct buffer_head *bh)
314 struct writeback_control *wbc,
315 struct buffer_head *bh)
316{ 354{
317 ext4_io_end_t *io_end;
318 struct page *page = bh->b_page;
319 int nvecs = bio_get_nr_vecs(bh->b_bdev); 355 int nvecs = bio_get_nr_vecs(bh->b_bdev);
320 struct bio *bio; 356 struct bio *bio;
321 357
322 io_end = ext4_init_io_end(inode, GFP_NOFS);
323 if (!io_end)
324 return -ENOMEM;
325 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); 358 bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
326 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 359 bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
327 bio->bi_bdev = bh->b_bdev; 360 bio->bi_bdev = bh->b_bdev;
328 bio->bi_private = io->io_end = io_end;
329 bio->bi_end_io = ext4_end_bio; 361 bio->bi_end_io = ext4_end_bio;
330 362 bio->bi_private = ext4_get_io_end(io->io_end);
331 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); 363 if (!io->io_end->size)
332 364 io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT)
365 + bh_offset(bh);
333 io->io_bio = bio; 366 io->io_bio = bio;
334 io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
335 io->io_next_block = bh->b_blocknr; 367 io->io_next_block = bh->b_blocknr;
336 return 0; 368 return 0;
337} 369}
338 370
339static int io_submit_add_bh(struct ext4_io_submit *io, 371static int io_submit_add_bh(struct ext4_io_submit *io,
340 struct inode *inode, 372 struct inode *inode,
341 struct writeback_control *wbc,
342 struct buffer_head *bh) 373 struct buffer_head *bh)
343{ 374{
344 ext4_io_end_t *io_end; 375 ext4_io_end_t *io_end;
@@ -349,18 +380,18 @@ submit_and_retry:
349 ext4_io_submit(io); 380 ext4_io_submit(io);
350 } 381 }
351 if (io->io_bio == NULL) { 382 if (io->io_bio == NULL) {
352 ret = io_submit_init(io, inode, wbc, bh); 383 ret = io_submit_init_bio(io, bh);
353 if (ret) 384 if (ret)
354 return ret; 385 return ret;
355 } 386 }
387 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
388 if (ret != bh->b_size)
389 goto submit_and_retry;
356 io_end = io->io_end; 390 io_end = io->io_end;
357 if (test_clear_buffer_uninit(bh)) 391 if (test_clear_buffer_uninit(bh))
358 ext4_set_io_unwritten_flag(inode, io_end); 392 ext4_set_io_unwritten_flag(inode, io_end);
359 io->io_end->size += bh->b_size; 393 io_end->size += bh->b_size;
360 io->io_next_block++; 394 io->io_next_block++;
361 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
362 if (ret != bh->b_size)
363 goto submit_and_retry;
364 return 0; 395 return 0;
365} 396}
366 397
@@ -432,7 +463,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
432 do { 463 do {
433 if (!buffer_async_write(bh)) 464 if (!buffer_async_write(bh))
434 continue; 465 continue;
435 ret = io_submit_add_bh(io, inode, wbc, bh); 466 ret = io_submit_add_bh(io, inode, bh);
436 if (ret) { 467 if (ret) {
437 /* 468 /*
438 * We only get here on ENOMEM. Not much else 469 * We only get here on ENOMEM. Not much else