aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/page-io.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2013-04-11 23:48:32 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-04-11 23:48:32 -0400
commit0058f9658c94037173f7603fc8bae2007cc10253 (patch)
tree8406b3704c0b7dc8f9057e9e8ae78187c528e594 /fs/ext4/page-io.c
parente1091b157c330a21bb0eaa881efe0489a1697ed7 (diff)
ext4: make ext4_bio_write_page() use BH_Async_Write flags
So far ext4_bio_write_page() attached all the pages to ext4_io_end structure. This makes that structure pretty heavy (1 KB for pointers + 16 bytes per page attached to the bio). Also later we would like to share ext4_io_end structure among several bios in case IO to a single extent needs to be split among several bios and pointing to pages from ext4_io_end makes this complex. We remove page pointers from ext4_io_end and use pointers from bio itself instead. This isn't as easy when blocksize < pagesize because then we can have several bios in flight for a single page and we have to be careful when to call end_page_writeback(). However this is a known problem already solved by block_write_full_page() / end_buffer_async_write() so we mimic its behavior here. We mark buffers going to disk with BH_Async_Write flag and in ext4_bio_end_io() we check whether there are any buffers with BH_Async_Write flag left. If there are not, we can call end_page_writeback(). Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Reviewed-by: Dmitry Monakhov <dmonakhov@openvz.org> Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Diffstat (limited to 'fs/ext4/page-io.c')
-rw-r--r--fs/ext4/page-io.c163
1 files changed, 77 insertions, 86 deletions
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 047a6de04a0a..1d98fcfc2ff0 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -29,25 +29,19 @@
29#include "xattr.h" 29#include "xattr.h"
30#include "acl.h" 30#include "acl.h"
31 31
32static struct kmem_cache *io_page_cachep, *io_end_cachep; 32static struct kmem_cache *io_end_cachep;
33 33
34int __init ext4_init_pageio(void) 34int __init ext4_init_pageio(void)
35{ 35{
36 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
37 if (io_page_cachep == NULL)
38 return -ENOMEM;
39 io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); 36 io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
40 if (io_end_cachep == NULL) { 37 if (io_end_cachep == NULL)
41 kmem_cache_destroy(io_page_cachep);
42 return -ENOMEM; 38 return -ENOMEM;
43 }
44 return 0; 39 return 0;
45} 40}
46 41
47void ext4_exit_pageio(void) 42void ext4_exit_pageio(void)
48{ 43{
49 kmem_cache_destroy(io_end_cachep); 44 kmem_cache_destroy(io_end_cachep);
50 kmem_cache_destroy(io_page_cachep);
51} 45}
52 46
53/* 47/*
@@ -67,15 +61,6 @@ void ext4_ioend_shutdown(struct inode *inode)
67 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); 61 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
68} 62}
69 63
70static void put_io_page(struct ext4_io_page *io_page)
71{
72 if (atomic_dec_and_test(&io_page->p_count)) {
73 end_page_writeback(io_page->p_page);
74 put_page(io_page->p_page);
75 kmem_cache_free(io_page_cachep, io_page);
76 }
77}
78
79void ext4_free_io_end(ext4_io_end_t *io) 64void ext4_free_io_end(ext4_io_end_t *io)
80{ 65{
81 int i; 66 int i;
@@ -84,9 +69,6 @@ void ext4_free_io_end(ext4_io_end_t *io)
84 BUG_ON(!list_empty(&io->list)); 69 BUG_ON(!list_empty(&io->list));
85 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); 70 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
86 71
87 for (i = 0; i < io->num_io_pages; i++)
88 put_io_page(io->pages[i]);
89 io->num_io_pages = 0;
90 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) 72 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count))
91 wake_up_all(ext4_ioend_wq(io->inode)); 73 wake_up_all(ext4_ioend_wq(io->inode));
92 kmem_cache_free(io_end_cachep, io); 74 kmem_cache_free(io_end_cachep, io);
@@ -243,45 +225,56 @@ static void ext4_end_bio(struct bio *bio, int error)
243 ext4_io_end_t *io_end = bio->bi_private; 225 ext4_io_end_t *io_end = bio->bi_private;
244 struct inode *inode; 226 struct inode *inode;
245 int i; 227 int i;
228 int blocksize;
246 sector_t bi_sector = bio->bi_sector; 229 sector_t bi_sector = bio->bi_sector;
247 230
248 BUG_ON(!io_end); 231 BUG_ON(!io_end);
232 inode = io_end->inode;
233 blocksize = 1 << inode->i_blkbits;
249 bio->bi_private = NULL; 234 bio->bi_private = NULL;
250 bio->bi_end_io = NULL; 235 bio->bi_end_io = NULL;
251 if (test_bit(BIO_UPTODATE, &bio->bi_flags)) 236 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
252 error = 0; 237 error = 0;
253 bio_put(bio); 238 for (i = 0; i < bio->bi_vcnt; i++) {
254 239 struct bio_vec *bvec = &bio->bi_io_vec[i];
255 for (i = 0; i < io_end->num_io_pages; i++) { 240 struct page *page = bvec->bv_page;
256 struct page *page = io_end->pages[i]->p_page;
257 struct buffer_head *bh, *head; 241 struct buffer_head *bh, *head;
258 loff_t offset; 242 unsigned bio_start = bvec->bv_offset;
259 loff_t io_end_offset; 243 unsigned bio_end = bio_start + bvec->bv_len;
244 unsigned under_io = 0;
245 unsigned long flags;
246
247 if (!page)
248 continue;
260 249
261 if (error) { 250 if (error) {
262 SetPageError(page); 251 SetPageError(page);
263 set_bit(AS_EIO, &page->mapping->flags); 252 set_bit(AS_EIO, &page->mapping->flags);
264 head = page_buffers(page);
265 BUG_ON(!head);
266
267 io_end_offset = io_end->offset + io_end->size;
268
269 offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
270 bh = head;
271 do {
272 if ((offset >= io_end->offset) &&
273 (offset+bh->b_size <= io_end_offset))
274 buffer_io_error(bh);
275
276 offset += bh->b_size;
277 bh = bh->b_this_page;
278 } while (bh != head);
279 } 253 }
280 254 bh = head = page_buffers(page);
281 put_io_page(io_end->pages[i]); 255 /*
256 * We check all buffers in the page under BH_Uptodate_Lock
257 * to avoid races with other end io clearing async_write flags
258 */
259 local_irq_save(flags);
260 bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
261 do {
262 if (bh_offset(bh) < bio_start ||
263 bh_offset(bh) + blocksize > bio_end) {
264 if (buffer_async_write(bh))
265 under_io++;
266 continue;
267 }
268 clear_buffer_async_write(bh);
269 if (error)
270 buffer_io_error(bh);
271 } while ((bh = bh->b_this_page) != head);
272 bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
273 local_irq_restore(flags);
274 if (!under_io)
275 end_page_writeback(page);
282 } 276 }
283 io_end->num_io_pages = 0; 277 bio_put(bio);
284 inode = io_end->inode;
285 278
286 if (error) { 279 if (error) {
287 io_end->flag |= EXT4_IO_END_ERROR; 280 io_end->flag |= EXT4_IO_END_ERROR;
@@ -345,7 +338,6 @@ static int io_submit_init(struct ext4_io_submit *io,
345} 338}
346 339
347static int io_submit_add_bh(struct ext4_io_submit *io, 340static int io_submit_add_bh(struct ext4_io_submit *io,
348 struct ext4_io_page *io_page,
349 struct inode *inode, 341 struct inode *inode,
350 struct writeback_control *wbc, 342 struct writeback_control *wbc,
351 struct buffer_head *bh) 343 struct buffer_head *bh)
@@ -353,11 +345,6 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
353 ext4_io_end_t *io_end; 345 ext4_io_end_t *io_end;
354 int ret; 346 int ret;
355 347
356 if (buffer_new(bh)) {
357 clear_buffer_new(bh);
358 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
359 }
360
361 if (io->io_bio && bh->b_blocknr != io->io_next_block) { 348 if (io->io_bio && bh->b_blocknr != io->io_next_block) {
362submit_and_retry: 349submit_and_retry:
363 ext4_io_submit(io); 350 ext4_io_submit(io);
@@ -368,9 +355,6 @@ submit_and_retry:
368 return ret; 355 return ret;
369 } 356 }
370 io_end = io->io_end; 357 io_end = io->io_end;
371 if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
372 (io_end->pages[io_end->num_io_pages-1] != io_page))
373 goto submit_and_retry;
374 if (buffer_uninit(bh)) 358 if (buffer_uninit(bh))
375 ext4_set_io_unwritten_flag(inode, io_end); 359 ext4_set_io_unwritten_flag(inode, io_end);
376 io->io_end->size += bh->b_size; 360 io->io_end->size += bh->b_size;
@@ -378,11 +362,6 @@ submit_and_retry:
378 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); 362 ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
379 if (ret != bh->b_size) 363 if (ret != bh->b_size)
380 goto submit_and_retry; 364 goto submit_and_retry;
381 if ((io_end->num_io_pages == 0) ||
382 (io_end->pages[io_end->num_io_pages-1] != io_page)) {
383 io_end->pages[io_end->num_io_pages++] = io_page;
384 atomic_inc(&io_page->p_count);
385 }
386 return 0; 365 return 0;
387} 366}
388 367
@@ -392,33 +371,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
392 struct writeback_control *wbc) 371 struct writeback_control *wbc)
393{ 372{
394 struct inode *inode = page->mapping->host; 373 struct inode *inode = page->mapping->host;
395 unsigned block_start, block_end, blocksize; 374 unsigned block_start, blocksize;
396 struct ext4_io_page *io_page;
397 struct buffer_head *bh, *head; 375 struct buffer_head *bh, *head;
398 int ret = 0; 376 int ret = 0;
377 int nr_submitted = 0;
399 378
400 blocksize = 1 << inode->i_blkbits; 379 blocksize = 1 << inode->i_blkbits;
401 380
402 BUG_ON(!PageLocked(page)); 381 BUG_ON(!PageLocked(page));
403 BUG_ON(PageWriteback(page)); 382 BUG_ON(PageWriteback(page));
404 383
405 io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
406 if (!io_page) {
407 redirty_page_for_writepage(wbc, page);
408 unlock_page(page);
409 return -ENOMEM;
410 }
411 io_page->p_page = page;
412 atomic_set(&io_page->p_count, 1);
413 get_page(page);
414 set_page_writeback(page); 384 set_page_writeback(page);
415 ClearPageError(page); 385 ClearPageError(page);
416 386
417 for (bh = head = page_buffers(page), block_start = 0; 387 /*
418 bh != head || !block_start; 388 * In the first loop we prepare and mark buffers to submit. We have to
419 block_start = block_end, bh = bh->b_this_page) { 389 * mark all buffers in the page before submitting so that
420 390 * end_page_writeback() cannot be called from ext4_bio_end_io() when IO
421 block_end = block_start + blocksize; 391 * on the first buffer finishes and we are still working on submitting
392 * the second buffer.
393 */
394 bh = head = page_buffers(page);
395 do {
396 block_start = bh_offset(bh);
422 if (block_start >= len) { 397 if (block_start >= len) {
423 /* 398 /*
424 * Comments copied from block_write_full_page_endio: 399 * Comments copied from block_write_full_page_endio:
@@ -431,7 +406,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
431 * mapped, and writes to that region are not written 406 * mapped, and writes to that region are not written
432 * out to the file." 407 * out to the file."
433 */ 408 */
434 zero_user_segment(page, block_start, block_end); 409 zero_user_segment(page, block_start,
410 block_start + blocksize);
435 clear_buffer_dirty(bh); 411 clear_buffer_dirty(bh);
436 set_buffer_uptodate(bh); 412 set_buffer_uptodate(bh);
437 continue; 413 continue;
@@ -445,7 +421,19 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
445 ext4_io_submit(io); 421 ext4_io_submit(io);
446 continue; 422 continue;
447 } 423 }
448 ret = io_submit_add_bh(io, io_page, inode, wbc, bh); 424 if (buffer_new(bh)) {
425 clear_buffer_new(bh);
426 unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
427 }
428 set_buffer_async_write(bh);
429 } while ((bh = bh->b_this_page) != head);
430
431 /* Now submit buffers to write */
432 bh = head = page_buffers(page);
433 do {
434 if (!buffer_async_write(bh))
435 continue;
436 ret = io_submit_add_bh(io, inode, wbc, bh);
449 if (ret) { 437 if (ret) {
450 /* 438 /*
451 * We only get here on ENOMEM. Not much else 439 * We only get here on ENOMEM. Not much else
@@ -455,17 +443,20 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
455 redirty_page_for_writepage(wbc, page); 443 redirty_page_for_writepage(wbc, page);
456 break; 444 break;
457 } 445 }
446 nr_submitted++;
458 clear_buffer_dirty(bh); 447 clear_buffer_dirty(bh);
448 } while ((bh = bh->b_this_page) != head);
449
450 /* Error stopped previous loop? Clean up buffers... */
451 if (ret) {
452 do {
453 clear_buffer_async_write(bh);
454 bh = bh->b_this_page;
455 } while (bh != head);
459 } 456 }
460 unlock_page(page); 457 unlock_page(page);
461 /* 458 /* Nothing submitted - we have to end page writeback */
462 * If the page was truncated before we could do the writeback, 459 if (!nr_submitted)
463 * or we had a memory allocation error while trying to write 460 end_page_writeback(page);
464 * the first buffer head, we won't have submitted any pages for
465 * I/O. In that case we need to make sure we've cleared the
466 * PageWriteback bit from the page to prevent the system from
467 * wedging later on.
468 */
469 put_io_page(io_page);
470 return ret; 461 return ret;
471} 462}