aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
authorFrank Mayhar <fmayhar@google.com>2009-01-07 00:06:22 -0500
committerTheodore Ts'o <tytso@mit.edu>2009-01-07 00:06:22 -0500
commit0390131ba84fd3f726f9e24fc4553828125700bb (patch)
tree4c90afad4e8690e25aec0ce069fd450e92ab5f96 /fs/ext4/inode.c
parentff7ef329b268b603ea4a2303241ef1c3829fd574 (diff)
ext4: Allow ext4 to run without a journal
A few weeks ago I posted a patch for discussion that allowed ext4 to run without a journal. Since that time I've integrated the excellent comments from Andreas and fixed several serious bugs. We're currently running with this patch and generating some performance numbers against both ext2 (with backported reservations code) and ext4 with and without a journal. It just so happens that running without a journal is slightly faster for most everything. We did iozone -T -t 4 s 2g -r 256k -T -I -i0 -i1 -i2 which creates 4 threads, each of which create and do reads and writes on a 2G file, with a buffer size of 256K, using O_DIRECT for all file opens to bypass the page cache. Results: ext2 ext4, default ext4, no journal initial writes 13.0 MB/s 15.4 MB/s 15.7 MB/s rewrites 13.1 MB/s 15.6 MB/s 15.9 MB/s reads 15.2 MB/s 16.9 MB/s 17.2 MB/s re-reads 15.3 MB/s 16.9 MB/s 17.2 MB/s random readers 5.6 MB/s 5.6 MB/s 5.7 MB/s random writers 5.1 MB/s 5.3 MB/s 5.4 MB/s So it seems that, so far, this was a useful exercise. Signed-off-by: Frank Mayhar <fmayhar@google.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c130
1 files changed, 87 insertions, 43 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c77a7ac753f3..45d0f70a1f04 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -72,12 +72,17 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
72 * "bh" may be NULL: a metadata block may have been freed from memory 72 * "bh" may be NULL: a metadata block may have been freed from memory
73 * but there may still be a record of it in the journal, and that record 73 * but there may still be a record of it in the journal, and that record
74 * still needs to be revoked. 74 * still needs to be revoked.
75 *
76 * If the handle isn't valid we're not journaling so there's nothing to do.
75 */ 77 */
76int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, 78int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
77 struct buffer_head *bh, ext4_fsblk_t blocknr) 79 struct buffer_head *bh, ext4_fsblk_t blocknr)
78{ 80{
79 int err; 81 int err;
80 82
83 if (!ext4_handle_valid(handle))
84 return 0;
85
81 might_sleep(); 86 might_sleep();
82 87
83 BUFFER_TRACE(bh, "enter"); 88 BUFFER_TRACE(bh, "enter");
@@ -170,7 +175,9 @@ static handle_t *start_transaction(struct inode *inode)
170 */ 175 */
171static int try_to_extend_transaction(handle_t *handle, struct inode *inode) 176static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
172{ 177{
173 if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS) 178 if (!ext4_handle_valid(handle))
179 return 0;
180 if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
174 return 0; 181 return 0;
175 if (!ext4_journal_extend(handle, blocks_for_truncate(inode))) 182 if (!ext4_journal_extend(handle, blocks_for_truncate(inode)))
176 return 0; 183 return 0;
@@ -184,6 +191,7 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
184 */ 191 */
185static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) 192static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
186{ 193{
194 BUG_ON(EXT4_JOURNAL(inode) == NULL);
187 jbd_debug(2, "restarting handle %p\n", handle); 195 jbd_debug(2, "restarting handle %p\n", handle);
188 return ext4_journal_restart(handle, blocks_for_truncate(inode)); 196 return ext4_journal_restart(handle, blocks_for_truncate(inode));
189} 197}
@@ -216,7 +224,7 @@ void ext4_delete_inode(struct inode *inode)
216 } 224 }
217 225
218 if (IS_SYNC(inode)) 226 if (IS_SYNC(inode))
219 handle->h_sync = 1; 227 ext4_handle_sync(handle);
220 inode->i_size = 0; 228 inode->i_size = 0;
221 err = ext4_mark_inode_dirty(handle, inode); 229 err = ext4_mark_inode_dirty(handle, inode);
222 if (err) { 230 if (err) {
@@ -233,7 +241,7 @@ void ext4_delete_inode(struct inode *inode)
233 * enough credits left in the handle to remove the inode from 241 * enough credits left in the handle to remove the inode from
234 * the orphan list and set the dtime field. 242 * the orphan list and set the dtime field.
235 */ 243 */
236 if (handle->h_buffer_credits < 3) { 244 if (!ext4_handle_has_enough_credits(handle, 3)) {
237 err = ext4_journal_extend(handle, 3); 245 err = ext4_journal_extend(handle, 3);
238 if (err > 0) 246 if (err > 0)
239 err = ext4_journal_restart(handle, 3); 247 err = ext4_journal_restart(handle, 3);
@@ -717,8 +725,8 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
717 set_buffer_uptodate(bh); 725 set_buffer_uptodate(bh);
718 unlock_buffer(bh); 726 unlock_buffer(bh);
719 727
720 BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); 728 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
721 err = ext4_journal_dirty_metadata(handle, bh); 729 err = ext4_handle_dirty_metadata(handle, inode, bh);
722 if (err) 730 if (err)
723 goto failed; 731 goto failed;
724 } 732 }
@@ -800,8 +808,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
800 * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode. 808 * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
801 */ 809 */
802 jbd_debug(5, "splicing indirect only\n"); 810 jbd_debug(5, "splicing indirect only\n");
803 BUFFER_TRACE(where->bh, "call ext4_journal_dirty_metadata"); 811 BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
804 err = ext4_journal_dirty_metadata(handle, where->bh); 812 err = ext4_handle_dirty_metadata(handle, inode, where->bh);
805 if (err) 813 if (err)
806 goto err_out; 814 goto err_out;
807 } else { 815 } else {
@@ -1229,8 +1237,8 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
1229 set_buffer_uptodate(bh); 1237 set_buffer_uptodate(bh);
1230 } 1238 }
1231 unlock_buffer(bh); 1239 unlock_buffer(bh);
1232 BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); 1240 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1233 err = ext4_journal_dirty_metadata(handle, bh); 1241 err = ext4_handle_dirty_metadata(handle, inode, bh);
1234 if (!fatal) 1242 if (!fatal)
1235 fatal = err; 1243 fatal = err;
1236 } else { 1244 } else {
@@ -1395,7 +1403,7 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1395 if (!buffer_mapped(bh) || buffer_freed(bh)) 1403 if (!buffer_mapped(bh) || buffer_freed(bh))
1396 return 0; 1404 return 0;
1397 set_buffer_uptodate(bh); 1405 set_buffer_uptodate(bh);
1398 return ext4_journal_dirty_metadata(handle, bh); 1406 return ext4_handle_dirty_metadata(handle, NULL, bh);
1399} 1407}
1400 1408
1401/* 1409/*
@@ -2762,7 +2770,10 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
2762 filemap_write_and_wait(mapping); 2770 filemap_write_and_wait(mapping);
2763 } 2771 }
2764 2772
2765 if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { 2773 BUG_ON(!EXT4_JOURNAL(inode) &&
2774 EXT4_I(inode)->i_state & EXT4_STATE_JDATA);
2775
2776 if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
2766 /* 2777 /*
2767 * This is a REALLY heavyweight approach, but the use of 2778 * This is a REALLY heavyweight approach, but the use of
2768 * bmap on dirty files is expected to be extremely rare: 2779 * bmap on dirty files is expected to be extremely rare:
@@ -3033,7 +3044,10 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
3033 if (offset == 0) 3044 if (offset == 0)
3034 ClearPageChecked(page); 3045 ClearPageChecked(page);
3035 3046
3036 jbd2_journal_invalidatepage(journal, page, offset); 3047 if (journal)
3048 jbd2_journal_invalidatepage(journal, page, offset);
3049 else
3050 block_invalidatepage(page, offset);
3037} 3051}
3038 3052
3039static int ext4_releasepage(struct page *page, gfp_t wait) 3053static int ext4_releasepage(struct page *page, gfp_t wait)
@@ -3043,7 +3057,10 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
3043 WARN_ON(PageChecked(page)); 3057 WARN_ON(PageChecked(page));
3044 if (!page_has_buffers(page)) 3058 if (!page_has_buffers(page))
3045 return 0; 3059 return 0;
3046 return jbd2_journal_try_to_free_buffers(journal, page, wait); 3060 if (journal)
3061 return jbd2_journal_try_to_free_buffers(journal, page, wait);
3062 else
3063 return try_to_free_buffers(page);
3047} 3064}
3048 3065
3049/* 3066/*
@@ -3315,7 +3332,7 @@ int ext4_block_truncate_page(handle_t *handle,
3315 3332
3316 err = 0; 3333 err = 0;
3317 if (ext4_should_journal_data(inode)) { 3334 if (ext4_should_journal_data(inode)) {
3318 err = ext4_journal_dirty_metadata(handle, bh); 3335 err = ext4_handle_dirty_metadata(handle, inode, bh);
3319 } else { 3336 } else {
3320 if (ext4_should_order_data(inode)) 3337 if (ext4_should_order_data(inode))
3321 err = ext4_jbd2_file_inode(handle, inode); 3338 err = ext4_jbd2_file_inode(handle, inode);
@@ -3439,8 +3456,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
3439 __le32 *p; 3456 __le32 *p;
3440 if (try_to_extend_transaction(handle, inode)) { 3457 if (try_to_extend_transaction(handle, inode)) {
3441 if (bh) { 3458 if (bh) {
3442 BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); 3459 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
3443 ext4_journal_dirty_metadata(handle, bh); 3460 ext4_handle_dirty_metadata(handle, inode, bh);
3444 } 3461 }
3445 ext4_mark_inode_dirty(handle, inode); 3462 ext4_mark_inode_dirty(handle, inode);
3446 ext4_journal_test_restart(handle, inode); 3463 ext4_journal_test_restart(handle, inode);
@@ -3540,7 +3557,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
3540 count, block_to_free_p, p); 3557 count, block_to_free_p, p);
3541 3558
3542 if (this_bh) { 3559 if (this_bh) {
3543 BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata"); 3560 BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
3544 3561
3545 /* 3562 /*
3546 * The buffer head should have an attached journal head at this 3563 * The buffer head should have an attached journal head at this
@@ -3549,7 +3566,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
3549 * the block was cleared. Check for this instead of OOPSing. 3566 * the block was cleared. Check for this instead of OOPSing.
3550 */ 3567 */
3551 if (bh2jh(this_bh)) 3568 if (bh2jh(this_bh))
3552 ext4_journal_dirty_metadata(handle, this_bh); 3569 ext4_handle_dirty_metadata(handle, inode, this_bh);
3553 else 3570 else
3554 ext4_error(inode->i_sb, __func__, 3571 ext4_error(inode->i_sb, __func__,
3555 "circular indirect block detected, " 3572 "circular indirect block detected, "
@@ -3579,7 +3596,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
3579 ext4_fsblk_t nr; 3596 ext4_fsblk_t nr;
3580 __le32 *p; 3597 __le32 *p;
3581 3598
3582 if (is_handle_aborted(handle)) 3599 if (ext4_handle_is_aborted(handle))
3583 return; 3600 return;
3584 3601
3585 if (depth--) { 3602 if (depth--) {
@@ -3649,7 +3666,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
3649 * will merely complain about releasing a free block, 3666 * will merely complain about releasing a free block,
3650 * rather than leaking blocks. 3667 * rather than leaking blocks.
3651 */ 3668 */
3652 if (is_handle_aborted(handle)) 3669 if (ext4_handle_is_aborted(handle))
3653 return; 3670 return;
3654 if (try_to_extend_transaction(handle, inode)) { 3671 if (try_to_extend_transaction(handle, inode)) {
3655 ext4_mark_inode_dirty(handle, inode); 3672 ext4_mark_inode_dirty(handle, inode);
@@ -3668,9 +3685,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
3668 parent_bh)){ 3685 parent_bh)){
3669 *p = 0; 3686 *p = 0;
3670 BUFFER_TRACE(parent_bh, 3687 BUFFER_TRACE(parent_bh,
3671 "call ext4_journal_dirty_metadata"); 3688 "call ext4_handle_dirty_metadata");
3672 ext4_journal_dirty_metadata(handle, 3689 ext4_handle_dirty_metadata(handle,
3673 parent_bh); 3690 inode,
3691 parent_bh);
3674 } 3692 }
3675 } 3693 }
3676 } 3694 }
@@ -3858,7 +3876,7 @@ do_indirects:
3858 * synchronous 3876 * synchronous
3859 */ 3877 */
3860 if (IS_SYNC(inode)) 3878 if (IS_SYNC(inode))
3861 handle->h_sync = 1; 3879 ext4_handle_sync(handle);
3862out_stop: 3880out_stop:
3863 /* 3881 /*
3864 * If this was a simple ftruncate(), and the file will remain alive 3882 * If this was a simple ftruncate(), and the file will remain alive
@@ -4357,8 +4375,8 @@ static int ext4_do_update_inode(handle_t *handle,
4357 EXT4_SET_RO_COMPAT_FEATURE(sb, 4375 EXT4_SET_RO_COMPAT_FEATURE(sb,
4358 EXT4_FEATURE_RO_COMPAT_LARGE_FILE); 4376 EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
4359 sb->s_dirt = 1; 4377 sb->s_dirt = 1;
4360 handle->h_sync = 1; 4378 ext4_handle_sync(handle);
4361 err = ext4_journal_dirty_metadata(handle, 4379 err = ext4_handle_dirty_metadata(handle, inode,
4362 EXT4_SB(sb)->s_sbh); 4380 EXT4_SB(sb)->s_sbh);
4363 } 4381 }
4364 } 4382 }
@@ -4385,9 +4403,8 @@ static int ext4_do_update_inode(handle_t *handle,
4385 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); 4403 raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
4386 } 4404 }
4387 4405
4388 4406 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4389 BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); 4407 rc = ext4_handle_dirty_metadata(handle, inode, bh);
4390 rc = ext4_journal_dirty_metadata(handle, bh);
4391 if (!err) 4408 if (!err)
4392 err = rc; 4409 err = rc;
4393 ei->i_state &= ~EXT4_STATE_NEW; 4410 ei->i_state &= ~EXT4_STATE_NEW;
@@ -4450,6 +4467,25 @@ int ext4_write_inode(struct inode *inode, int wait)
4450 return ext4_force_commit(inode->i_sb); 4467 return ext4_force_commit(inode->i_sb);
4451} 4468}
4452 4469
4470int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
4471{
4472 int err = 0;
4473
4474 mark_buffer_dirty(bh);
4475 if (inode && inode_needs_sync(inode)) {
4476 sync_dirty_buffer(bh);
4477 if (buffer_req(bh) && !buffer_uptodate(bh)) {
4478 ext4_error(inode->i_sb, __func__,
4479 "IO error syncing inode, "
4480 "inode=%lu, block=%llu",
4481 inode->i_ino,
4482 (unsigned long long)bh->b_blocknr);
4483 err = -EIO;
4484 }
4485 }
4486 return err;
4487}
4488
4453/* 4489/*
4454 * ext4_setattr() 4490 * ext4_setattr()
4455 * 4491 *
@@ -4754,16 +4790,15 @@ int
4754ext4_reserve_inode_write(handle_t *handle, struct inode *inode, 4790ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
4755 struct ext4_iloc *iloc) 4791 struct ext4_iloc *iloc)
4756{ 4792{
4757 int err = 0; 4793 int err;
4758 if (handle) { 4794
4759 err = ext4_get_inode_loc(inode, iloc); 4795 err = ext4_get_inode_loc(inode, iloc);
4760 if (!err) { 4796 if (!err) {
4761 BUFFER_TRACE(iloc->bh, "get_write_access"); 4797 BUFFER_TRACE(iloc->bh, "get_write_access");
4762 err = ext4_journal_get_write_access(handle, iloc->bh); 4798 err = ext4_journal_get_write_access(handle, iloc->bh);
4763 if (err) { 4799 if (err) {
4764 brelse(iloc->bh); 4800 brelse(iloc->bh);
4765 iloc->bh = NULL; 4801 iloc->bh = NULL;
4766 }
4767 } 4802 }
4768 } 4803 }
4769 ext4_std_error(inode->i_sb, err); 4804 ext4_std_error(inode->i_sb, err);
@@ -4835,7 +4870,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
4835 4870
4836 might_sleep(); 4871 might_sleep();
4837 err = ext4_reserve_inode_write(handle, inode, &iloc); 4872 err = ext4_reserve_inode_write(handle, inode, &iloc);
4838 if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && 4873 if (ext4_handle_valid(handle) &&
4874 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
4839 !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { 4875 !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) {
4840 /* 4876 /*
4841 * We need extra buffer credits since we may write into EA block 4877 * We need extra buffer credits since we may write into EA block
@@ -4887,6 +4923,11 @@ void ext4_dirty_inode(struct inode *inode)
4887 handle_t *current_handle = ext4_journal_current_handle(); 4923 handle_t *current_handle = ext4_journal_current_handle();
4888 handle_t *handle; 4924 handle_t *handle;
4889 4925
4926 if (!ext4_handle_valid(current_handle)) {
4927 ext4_mark_inode_dirty(current_handle, inode);
4928 return;
4929 }
4930
4890 handle = ext4_journal_start(inode, 2); 4931 handle = ext4_journal_start(inode, 2);
4891 if (IS_ERR(handle)) 4932 if (IS_ERR(handle))
4892 goto out; 4933 goto out;
@@ -4924,8 +4965,9 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode)
4924 BUFFER_TRACE(iloc.bh, "get_write_access"); 4965 BUFFER_TRACE(iloc.bh, "get_write_access");
4925 err = jbd2_journal_get_write_access(handle, iloc.bh); 4966 err = jbd2_journal_get_write_access(handle, iloc.bh);
4926 if (!err) 4967 if (!err)
4927 err = ext4_journal_dirty_metadata(handle, 4968 err = ext4_handle_dirty_metadata(handle,
4928 iloc.bh); 4969 inode,
4970 iloc.bh);
4929 brelse(iloc.bh); 4971 brelse(iloc.bh);
4930 } 4972 }
4931 } 4973 }
@@ -4951,6 +4993,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4951 */ 4993 */
4952 4994
4953 journal = EXT4_JOURNAL(inode); 4995 journal = EXT4_JOURNAL(inode);
4996 if (!journal)
4997 return 0;
4954 if (is_journal_aborted(journal)) 4998 if (is_journal_aborted(journal))
4955 return -EROFS; 4999 return -EROFS;
4956 5000
@@ -4980,7 +5024,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4980 return PTR_ERR(handle); 5024 return PTR_ERR(handle);
4981 5025
4982 err = ext4_mark_inode_dirty(handle, inode); 5026 err = ext4_mark_inode_dirty(handle, inode);
4983 handle->h_sync = 1; 5027 ext4_handle_sync(handle);
4984 ext4_journal_stop(handle); 5028 ext4_journal_stop(handle);
4985 ext4_std_error(inode->i_sb, err); 5029 ext4_std_error(inode->i_sb, err);
4986 5030