aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2013-02-09 09:24:14 -0500
committerTheodore Ts'o <tytso@mit.edu>2013-02-09 09:24:14 -0500
commit47564bfb95bf370d73906fc4ae57c271e8ba96cd (patch)
tree5e2bdd86884e37afbb397ff509d2755cea451372 /fs/ext4
parent9924a92a8c217576bd2a2b1bbbb854462f1a00ae (diff)
ext4: grab page before starting transaction handle in write_begin()
The grab_cache_page_write_begin() function can potentially sleep for a long time, since it may need to do memory allocation which can block if the system is under significant memory pressure, and because it may be blocked on page writeback. If it does take a long time to grab the page, it's better that we not hold an active jbd2 handle. So grab a handle on the page first, and _then_ start the transaction handle. This commit fixes the following long transaction handle hold time: postmark-2917 [000] .... 196.435786: jbd2_handle_stats: dev 254,32 tid 570 type 2 line_no 2541 interval 311 sync 0 requested_blocks 1 dirtied_blocks 0 Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Reviewed-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/inode.c111
1 files changed, 68 insertions, 43 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5042c8773ad7..2fa18bb0bf3c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -875,32 +875,40 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
875 ret = ext4_try_to_write_inline_data(mapping, inode, pos, len, 875 ret = ext4_try_to_write_inline_data(mapping, inode, pos, len,
876 flags, pagep); 876 flags, pagep);
877 if (ret < 0) 877 if (ret < 0)
878 goto out; 878 return ret;
879 if (ret == 1) { 879 if (ret == 1)
880 ret = 0; 880 return 0;
881 goto out;
882 }
883 } 881 }
884 882
885retry: 883 /*
884 * grab_cache_page_write_begin() can take a long time if the
885 * system is thrashing due to memory pressure, or if the page
886 * is being written back. So grab it first before we start
887 * the transaction handle. This also allows us to allocate
888 * the page (if needed) without using GFP_NOFS.
889 */
890retry_grab:
891 page = grab_cache_page_write_begin(mapping, index, flags);
892 if (!page)
893 return -ENOMEM;
894 unlock_page(page);
895
896retry_journal:
886 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); 897 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
887 if (IS_ERR(handle)) { 898 if (IS_ERR(handle)) {
888 ret = PTR_ERR(handle); 899 page_cache_release(page);
889 goto out; 900 return PTR_ERR(handle);
890 } 901 }
891 902
892 /* We cannot recurse into the filesystem as the transaction is already 903 lock_page(page);
893 * started */ 904 if (page->mapping != mapping) {
894 flags |= AOP_FLAG_NOFS; 905 /* The page got truncated from under us */
895 906 unlock_page(page);
896 page = grab_cache_page_write_begin(mapping, index, flags); 907 page_cache_release(page);
897 if (!page) {
898 ext4_journal_stop(handle); 908 ext4_journal_stop(handle);
899 ret = -ENOMEM; 909 goto retry_grab;
900 goto out;
901 } 910 }
902 911 wait_on_page_writeback(page);
903 *pagep = page;
904 912
905 if (ext4_should_dioread_nolock(inode)) 913 if (ext4_should_dioread_nolock(inode))
906 ret = __block_write_begin(page, pos, len, ext4_get_block_write); 914 ret = __block_write_begin(page, pos, len, ext4_get_block_write);
@@ -915,7 +923,6 @@ retry:
915 923
916 if (ret) { 924 if (ret) {
917 unlock_page(page); 925 unlock_page(page);
918 page_cache_release(page);
919 /* 926 /*
920 * __block_write_begin may have instantiated a few blocks 927 * __block_write_begin may have instantiated a few blocks
921 * outside i_size. Trim these off again. Don't need 928 * outside i_size. Trim these off again. Don't need
@@ -939,11 +946,14 @@ retry:
939 if (inode->i_nlink) 946 if (inode->i_nlink)
940 ext4_orphan_del(NULL, inode); 947 ext4_orphan_del(NULL, inode);
941 } 948 }
942 }
943 949
944 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 950 if (ret == -ENOSPC &&
945 goto retry; 951 ext4_should_retry_alloc(inode->i_sb, &retries))
946out: 952 goto retry_journal;
953 page_cache_release(page);
954 return ret;
955 }
956 *pagep = page;
947 return ret; 957 return ret;
948} 958}
949 959
@@ -2458,42 +2468,52 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2458 pos, len, flags, 2468 pos, len, flags,
2459 pagep, fsdata); 2469 pagep, fsdata);
2460 if (ret < 0) 2470 if (ret < 0)
2461 goto out; 2471 return ret;
2462 if (ret == 1) { 2472 if (ret == 1)
2463 ret = 0; 2473 return 0;
2464 goto out;
2465 }
2466 } 2474 }
2467 2475
2468retry: 2476 /*
2477 * grab_cache_page_write_begin() can take a long time if the
2478 * system is thrashing due to memory pressure, or if the page
2479 * is being written back. So grab it first before we start
2480 * the transaction handle. This also allows us to allocate
2481 * the page (if needed) without using GFP_NOFS.
2482 */
2483retry_grab:
2484 page = grab_cache_page_write_begin(mapping, index, flags);
2485 if (!page)
2486 return -ENOMEM;
2487 unlock_page(page);
2488
2469 /* 2489 /*
2470 * With delayed allocation, we don't log the i_disksize update 2490 * With delayed allocation, we don't log the i_disksize update
2471 * if there is delayed block allocation. But we still need 2491 * if there is delayed block allocation. But we still need
2472 * to journalling the i_disksize update if writes to the end 2492 * to journalling the i_disksize update if writes to the end
2473 * of file which has an already mapped buffer. 2493 * of file which has an already mapped buffer.
2474 */ 2494 */
2495retry_journal:
2475 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1); 2496 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1);
2476 if (IS_ERR(handle)) { 2497 if (IS_ERR(handle)) {
2477 ret = PTR_ERR(handle); 2498 page_cache_release(page);
2478 goto out; 2499 return PTR_ERR(handle);
2479 } 2500 }
2480 /* We cannot recurse into the filesystem as the transaction is already
2481 * started */
2482 flags |= AOP_FLAG_NOFS;
2483 2501
2484 page = grab_cache_page_write_begin(mapping, index, flags); 2502 lock_page(page);
2485 if (!page) { 2503 if (page->mapping != mapping) {
2504 /* The page got truncated from under us */
2505 unlock_page(page);
2506 page_cache_release(page);
2486 ext4_journal_stop(handle); 2507 ext4_journal_stop(handle);
2487 ret = -ENOMEM; 2508 goto retry_grab;
2488 goto out;
2489 } 2509 }
2490 *pagep = page; 2510 /* In case writeback began while the page was unlocked */
2511 wait_on_page_writeback(page);
2491 2512
2492 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); 2513 ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
2493 if (ret < 0) { 2514 if (ret < 0) {
2494 unlock_page(page); 2515 unlock_page(page);
2495 ext4_journal_stop(handle); 2516 ext4_journal_stop(handle);
2496 page_cache_release(page);
2497 /* 2517 /*
2498 * block_write_begin may have instantiated a few blocks 2518 * block_write_begin may have instantiated a few blocks
2499 * outside i_size. Trim these off again. Don't need 2519 * outside i_size. Trim these off again. Don't need
@@ -2501,11 +2521,16 @@ retry:
2501 */ 2521 */
2502 if (pos + len > inode->i_size) 2522 if (pos + len > inode->i_size)
2503 ext4_truncate_failed_write(inode); 2523 ext4_truncate_failed_write(inode);
2524
2525 if (ret == -ENOSPC &&
2526 ext4_should_retry_alloc(inode->i_sb, &retries))
2527 goto retry_journal;
2528
2529 page_cache_release(page);
2530 return ret;
2504 } 2531 }
2505 2532
2506 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2533 *pagep = page;
2507 goto retry;
2508out:
2509 return ret; 2534 return ret;
2510} 2535}
2511 2536