diff options
author | Jan Kara <jack@suse.cz> | 2013-04-11 23:48:32 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2013-04-11 23:48:32 -0400 |
commit | 0058f9658c94037173f7603fc8bae2007cc10253 (patch) | |
tree | 8406b3704c0b7dc8f9057e9e8ae78187c528e594 /fs/ext4/page-io.c | |
parent | e1091b157c330a21bb0eaa881efe0489a1697ed7 (diff) |
ext4: make ext4_bio_write_page() use BH_Async_Write flags
So far ext4_bio_write_page() attached all the pages to ext4_io_end
structure. This makes that structure pretty heavy (1 KB for pointers
+ 16 bytes per page attached to the bio). Also later we would like to
share ext4_io_end structure among several bios in case IO to a single
extent needs to be split among several bios and pointing to pages from
ext4_io_end makes this complex.
We remove page pointers from ext4_io_end and use pointers from bio
itself instead. This isn't as easy when blocksize < pagesize because
then we can have several bios in flight for a single page and we have
to be careful when to call end_page_writeback(). However this is a
known problem already solved by block_write_full_page() /
end_buffer_async_write() so we mimic its behavior here. We mark
buffers going to disk with BH_Async_Write flag and in
ext4_bio_end_io() we check whether there are any buffers with
BH_Async_Write flag left. If there are not, we can call
end_page_writeback().
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Dmitry Monakhov <dmonakhov@openvz.org>
Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Diffstat (limited to 'fs/ext4/page-io.c')
-rw-r--r-- | fs/ext4/page-io.c | 163 |
1 files changed, 77 insertions, 86 deletions
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 047a6de04a0a..1d98fcfc2ff0 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -29,25 +29,19 @@ | |||
29 | #include "xattr.h" | 29 | #include "xattr.h" |
30 | #include "acl.h" | 30 | #include "acl.h" |
31 | 31 | ||
32 | static struct kmem_cache *io_page_cachep, *io_end_cachep; | 32 | static struct kmem_cache *io_end_cachep; |
33 | 33 | ||
34 | int __init ext4_init_pageio(void) | 34 | int __init ext4_init_pageio(void) |
35 | { | 35 | { |
36 | io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); | ||
37 | if (io_page_cachep == NULL) | ||
38 | return -ENOMEM; | ||
39 | io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); | 36 | io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); |
40 | if (io_end_cachep == NULL) { | 37 | if (io_end_cachep == NULL) |
41 | kmem_cache_destroy(io_page_cachep); | ||
42 | return -ENOMEM; | 38 | return -ENOMEM; |
43 | } | ||
44 | return 0; | 39 | return 0; |
45 | } | 40 | } |
46 | 41 | ||
47 | void ext4_exit_pageio(void) | 42 | void ext4_exit_pageio(void) |
48 | { | 43 | { |
49 | kmem_cache_destroy(io_end_cachep); | 44 | kmem_cache_destroy(io_end_cachep); |
50 | kmem_cache_destroy(io_page_cachep); | ||
51 | } | 45 | } |
52 | 46 | ||
53 | /* | 47 | /* |
@@ -67,15 +61,6 @@ void ext4_ioend_shutdown(struct inode *inode) | |||
67 | cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); | 61 | cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); |
68 | } | 62 | } |
69 | 63 | ||
70 | static void put_io_page(struct ext4_io_page *io_page) | ||
71 | { | ||
72 | if (atomic_dec_and_test(&io_page->p_count)) { | ||
73 | end_page_writeback(io_page->p_page); | ||
74 | put_page(io_page->p_page); | ||
75 | kmem_cache_free(io_page_cachep, io_page); | ||
76 | } | ||
77 | } | ||
78 | |||
79 | void ext4_free_io_end(ext4_io_end_t *io) | 64 | void ext4_free_io_end(ext4_io_end_t *io) |
80 | { | 65 | { |
81 | int i; | 66 | int i; |
@@ -84,9 +69,6 @@ void ext4_free_io_end(ext4_io_end_t *io) | |||
84 | BUG_ON(!list_empty(&io->list)); | 69 | BUG_ON(!list_empty(&io->list)); |
85 | BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); | 70 | BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); |
86 | 71 | ||
87 | for (i = 0; i < io->num_io_pages; i++) | ||
88 | put_io_page(io->pages[i]); | ||
89 | io->num_io_pages = 0; | ||
90 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) | 72 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) |
91 | wake_up_all(ext4_ioend_wq(io->inode)); | 73 | wake_up_all(ext4_ioend_wq(io->inode)); |
92 | kmem_cache_free(io_end_cachep, io); | 74 | kmem_cache_free(io_end_cachep, io); |
@@ -243,45 +225,56 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
243 | ext4_io_end_t *io_end = bio->bi_private; | 225 | ext4_io_end_t *io_end = bio->bi_private; |
244 | struct inode *inode; | 226 | struct inode *inode; |
245 | int i; | 227 | int i; |
228 | int blocksize; | ||
246 | sector_t bi_sector = bio->bi_sector; | 229 | sector_t bi_sector = bio->bi_sector; |
247 | 230 | ||
248 | BUG_ON(!io_end); | 231 | BUG_ON(!io_end); |
232 | inode = io_end->inode; | ||
233 | blocksize = 1 << inode->i_blkbits; | ||
249 | bio->bi_private = NULL; | 234 | bio->bi_private = NULL; |
250 | bio->bi_end_io = NULL; | 235 | bio->bi_end_io = NULL; |
251 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | 236 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) |
252 | error = 0; | 237 | error = 0; |
253 | bio_put(bio); | 238 | for (i = 0; i < bio->bi_vcnt; i++) { |
254 | 239 | struct bio_vec *bvec = &bio->bi_io_vec[i]; | |
255 | for (i = 0; i < io_end->num_io_pages; i++) { | 240 | struct page *page = bvec->bv_page; |
256 | struct page *page = io_end->pages[i]->p_page; | ||
257 | struct buffer_head *bh, *head; | 241 | struct buffer_head *bh, *head; |
258 | loff_t offset; | 242 | unsigned bio_start = bvec->bv_offset; |
259 | loff_t io_end_offset; | 243 | unsigned bio_end = bio_start + bvec->bv_len; |
244 | unsigned under_io = 0; | ||
245 | unsigned long flags; | ||
246 | |||
247 | if (!page) | ||
248 | continue; | ||
260 | 249 | ||
261 | if (error) { | 250 | if (error) { |
262 | SetPageError(page); | 251 | SetPageError(page); |
263 | set_bit(AS_EIO, &page->mapping->flags); | 252 | set_bit(AS_EIO, &page->mapping->flags); |
264 | head = page_buffers(page); | ||
265 | BUG_ON(!head); | ||
266 | |||
267 | io_end_offset = io_end->offset + io_end->size; | ||
268 | |||
269 | offset = (sector_t) page->index << PAGE_CACHE_SHIFT; | ||
270 | bh = head; | ||
271 | do { | ||
272 | if ((offset >= io_end->offset) && | ||
273 | (offset+bh->b_size <= io_end_offset)) | ||
274 | buffer_io_error(bh); | ||
275 | |||
276 | offset += bh->b_size; | ||
277 | bh = bh->b_this_page; | ||
278 | } while (bh != head); | ||
279 | } | 253 | } |
280 | 254 | bh = head = page_buffers(page); | |
281 | put_io_page(io_end->pages[i]); | 255 | /* |
256 | * We check all buffers in the page under BH_Uptodate_Lock | ||
257 | * to avoid races with other end io clearing async_write flags | ||
258 | */ | ||
259 | local_irq_save(flags); | ||
260 | bit_spin_lock(BH_Uptodate_Lock, &head->b_state); | ||
261 | do { | ||
262 | if (bh_offset(bh) < bio_start || | ||
263 | bh_offset(bh) + blocksize > bio_end) { | ||
264 | if (buffer_async_write(bh)) | ||
265 | under_io++; | ||
266 | continue; | ||
267 | } | ||
268 | clear_buffer_async_write(bh); | ||
269 | if (error) | ||
270 | buffer_io_error(bh); | ||
271 | } while ((bh = bh->b_this_page) != head); | ||
272 | bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); | ||
273 | local_irq_restore(flags); | ||
274 | if (!under_io) | ||
275 | end_page_writeback(page); | ||
282 | } | 276 | } |
283 | io_end->num_io_pages = 0; | 277 | bio_put(bio); |
284 | inode = io_end->inode; | ||
285 | 278 | ||
286 | if (error) { | 279 | if (error) { |
287 | io_end->flag |= EXT4_IO_END_ERROR; | 280 | io_end->flag |= EXT4_IO_END_ERROR; |
@@ -345,7 +338,6 @@ static int io_submit_init(struct ext4_io_submit *io, | |||
345 | } | 338 | } |
346 | 339 | ||
347 | static int io_submit_add_bh(struct ext4_io_submit *io, | 340 | static int io_submit_add_bh(struct ext4_io_submit *io, |
348 | struct ext4_io_page *io_page, | ||
349 | struct inode *inode, | 341 | struct inode *inode, |
350 | struct writeback_control *wbc, | 342 | struct writeback_control *wbc, |
351 | struct buffer_head *bh) | 343 | struct buffer_head *bh) |
@@ -353,11 +345,6 @@ static int io_submit_add_bh(struct ext4_io_submit *io, | |||
353 | ext4_io_end_t *io_end; | 345 | ext4_io_end_t *io_end; |
354 | int ret; | 346 | int ret; |
355 | 347 | ||
356 | if (buffer_new(bh)) { | ||
357 | clear_buffer_new(bh); | ||
358 | unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); | ||
359 | } | ||
360 | |||
361 | if (io->io_bio && bh->b_blocknr != io->io_next_block) { | 348 | if (io->io_bio && bh->b_blocknr != io->io_next_block) { |
362 | submit_and_retry: | 349 | submit_and_retry: |
363 | ext4_io_submit(io); | 350 | ext4_io_submit(io); |
@@ -368,9 +355,6 @@ submit_and_retry: | |||
368 | return ret; | 355 | return ret; |
369 | } | 356 | } |
370 | io_end = io->io_end; | 357 | io_end = io->io_end; |
371 | if ((io_end->num_io_pages >= MAX_IO_PAGES) && | ||
372 | (io_end->pages[io_end->num_io_pages-1] != io_page)) | ||
373 | goto submit_and_retry; | ||
374 | if (buffer_uninit(bh)) | 358 | if (buffer_uninit(bh)) |
375 | ext4_set_io_unwritten_flag(inode, io_end); | 359 | ext4_set_io_unwritten_flag(inode, io_end); |
376 | io->io_end->size += bh->b_size; | 360 | io->io_end->size += bh->b_size; |
@@ -378,11 +362,6 @@ submit_and_retry: | |||
378 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | 362 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); |
379 | if (ret != bh->b_size) | 363 | if (ret != bh->b_size) |
380 | goto submit_and_retry; | 364 | goto submit_and_retry; |
381 | if ((io_end->num_io_pages == 0) || | ||
382 | (io_end->pages[io_end->num_io_pages-1] != io_page)) { | ||
383 | io_end->pages[io_end->num_io_pages++] = io_page; | ||
384 | atomic_inc(&io_page->p_count); | ||
385 | } | ||
386 | return 0; | 365 | return 0; |
387 | } | 366 | } |
388 | 367 | ||
@@ -392,33 +371,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
392 | struct writeback_control *wbc) | 371 | struct writeback_control *wbc) |
393 | { | 372 | { |
394 | struct inode *inode = page->mapping->host; | 373 | struct inode *inode = page->mapping->host; |
395 | unsigned block_start, block_end, blocksize; | 374 | unsigned block_start, blocksize; |
396 | struct ext4_io_page *io_page; | ||
397 | struct buffer_head *bh, *head; | 375 | struct buffer_head *bh, *head; |
398 | int ret = 0; | 376 | int ret = 0; |
377 | int nr_submitted = 0; | ||
399 | 378 | ||
400 | blocksize = 1 << inode->i_blkbits; | 379 | blocksize = 1 << inode->i_blkbits; |
401 | 380 | ||
402 | BUG_ON(!PageLocked(page)); | 381 | BUG_ON(!PageLocked(page)); |
403 | BUG_ON(PageWriteback(page)); | 382 | BUG_ON(PageWriteback(page)); |
404 | 383 | ||
405 | io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); | ||
406 | if (!io_page) { | ||
407 | redirty_page_for_writepage(wbc, page); | ||
408 | unlock_page(page); | ||
409 | return -ENOMEM; | ||
410 | } | ||
411 | io_page->p_page = page; | ||
412 | atomic_set(&io_page->p_count, 1); | ||
413 | get_page(page); | ||
414 | set_page_writeback(page); | 384 | set_page_writeback(page); |
415 | ClearPageError(page); | 385 | ClearPageError(page); |
416 | 386 | ||
417 | for (bh = head = page_buffers(page), block_start = 0; | 387 | /* |
418 | bh != head || !block_start; | 388 | * In the first loop we prepare and mark buffers to submit. We have to |
419 | block_start = block_end, bh = bh->b_this_page) { | 389 | * mark all buffers in the page before submitting so that |
420 | 390 | * end_page_writeback() cannot be called from ext4_bio_end_io() when IO | |
421 | block_end = block_start + blocksize; | 391 | * on the first buffer finishes and we are still working on submitting |
392 | * the second buffer. | ||
393 | */ | ||
394 | bh = head = page_buffers(page); | ||
395 | do { | ||
396 | block_start = bh_offset(bh); | ||
422 | if (block_start >= len) { | 397 | if (block_start >= len) { |
423 | /* | 398 | /* |
424 | * Comments copied from block_write_full_page_endio: | 399 | * Comments copied from block_write_full_page_endio: |
@@ -431,7 +406,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
431 | * mapped, and writes to that region are not written | 406 | * mapped, and writes to that region are not written |
432 | * out to the file." | 407 | * out to the file." |
433 | */ | 408 | */ |
434 | zero_user_segment(page, block_start, block_end); | 409 | zero_user_segment(page, block_start, |
410 | block_start + blocksize); | ||
435 | clear_buffer_dirty(bh); | 411 | clear_buffer_dirty(bh); |
436 | set_buffer_uptodate(bh); | 412 | set_buffer_uptodate(bh); |
437 | continue; | 413 | continue; |
@@ -445,7 +421,19 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
445 | ext4_io_submit(io); | 421 | ext4_io_submit(io); |
446 | continue; | 422 | continue; |
447 | } | 423 | } |
448 | ret = io_submit_add_bh(io, io_page, inode, wbc, bh); | 424 | if (buffer_new(bh)) { |
425 | clear_buffer_new(bh); | ||
426 | unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); | ||
427 | } | ||
428 | set_buffer_async_write(bh); | ||
429 | } while ((bh = bh->b_this_page) != head); | ||
430 | |||
431 | /* Now submit buffers to write */ | ||
432 | bh = head = page_buffers(page); | ||
433 | do { | ||
434 | if (!buffer_async_write(bh)) | ||
435 | continue; | ||
436 | ret = io_submit_add_bh(io, inode, wbc, bh); | ||
449 | if (ret) { | 437 | if (ret) { |
450 | /* | 438 | /* |
451 | * We only get here on ENOMEM. Not much else | 439 | * We only get here on ENOMEM. Not much else |
@@ -455,17 +443,20 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
455 | redirty_page_for_writepage(wbc, page); | 443 | redirty_page_for_writepage(wbc, page); |
456 | break; | 444 | break; |
457 | } | 445 | } |
446 | nr_submitted++; | ||
458 | clear_buffer_dirty(bh); | 447 | clear_buffer_dirty(bh); |
448 | } while ((bh = bh->b_this_page) != head); | ||
449 | |||
450 | /* Error stopped previous loop? Clean up buffers... */ | ||
451 | if (ret) { | ||
452 | do { | ||
453 | clear_buffer_async_write(bh); | ||
454 | bh = bh->b_this_page; | ||
455 | } while (bh != head); | ||
459 | } | 456 | } |
460 | unlock_page(page); | 457 | unlock_page(page); |
461 | /* | 458 | /* Nothing submitted - we have to end page writeback */ |
462 | * If the page was truncated before we could do the writeback, | 459 | if (!nr_submitted) |
463 | * or we had a memory allocation error while trying to write | 460 | end_page_writeback(page); |
464 | * the first buffer head, we won't have submitted any pages for | ||
465 | * I/O. In that case we need to make sure we've cleared the | ||
466 | * PageWriteback bit from the page to prevent the system from | ||
467 | * wedging later on. | ||
468 | */ | ||
469 | put_io_page(io_page); | ||
470 | return ret; | 461 | return ret; |
471 | } | 462 | } |