diff options
author | Jan Kara <jack@suse.cz> | 2013-06-04 12:01:45 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2013-06-04 12:01:45 -0400 |
commit | f5113effc2a2ee6b86a4b345ce557353dcbcfffe (patch) | |
tree | a55e3eb4174ff0d17b9f0523619b9b647774b782 /fs/jbd2/commit.c | |
parent | 97a851ed71cd9cc2542955e92a001c6ea3d21d35 (diff) |
jbd2: don't create journal_head for temporary journal buffers
When writing metadata to the journal, we create temporary buffer heads
for that task. We also attach journal heads to these buffer heads but
the only purpose of the journal heads is to keep buffers linked in
transaction's BJ_IO list. We remove the need for journal heads by
reusing buffer_head's b_assoc_buffers list for that purpose. Also
since BJ_IO list is just a temporary list for transaction commit, we
use a private list in jbd2_journal_commit_transaction() for that thus
removing BJ_IO list from transaction completely.
Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2/commit.c')
-rw-r--r-- | fs/jbd2/commit.c | 65 |
1 files changed, 22 insertions, 43 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index e61d7224a729..57bd2ff97888 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -369,7 +369,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
369 | { | 369 | { |
370 | struct transaction_stats_s stats; | 370 | struct transaction_stats_s stats; |
371 | transaction_t *commit_transaction; | 371 | transaction_t *commit_transaction; |
372 | struct journal_head *jh, *new_jh, *descriptor; | 372 | struct journal_head *jh, *descriptor; |
373 | struct buffer_head **wbuf = journal->j_wbuf; | 373 | struct buffer_head **wbuf = journal->j_wbuf; |
374 | int bufs; | 374 | int bufs; |
375 | int flags; | 375 | int flags; |
@@ -393,6 +393,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
393 | tid_t first_tid; | 393 | tid_t first_tid; |
394 | int update_tail; | 394 | int update_tail; |
395 | int csum_size = 0; | 395 | int csum_size = 0; |
396 | LIST_HEAD(io_bufs); | ||
396 | 397 | ||
397 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 398 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
398 | csum_size = sizeof(struct jbd2_journal_block_tail); | 399 | csum_size = sizeof(struct jbd2_journal_block_tail); |
@@ -659,29 +660,22 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
659 | 660 | ||
660 | /* Bump b_count to prevent truncate from stumbling over | 661 | /* Bump b_count to prevent truncate from stumbling over |
661 | the shadowed buffer! @@@ This can go if we ever get | 662 | the shadowed buffer! @@@ This can go if we ever get |
662 | rid of the BJ_IO/BJ_Shadow pairing of buffers. */ | 663 | rid of the shadow pairing of buffers. */ |
663 | atomic_inc(&jh2bh(jh)->b_count); | 664 | atomic_inc(&jh2bh(jh)->b_count); |
664 | 665 | ||
665 | /* Make a temporary IO buffer with which to write it out | ||
666 | (this will requeue both the metadata buffer and the | ||
667 | temporary IO buffer). new_bh goes on BJ_IO*/ | ||
668 | |||
669 | set_bit(BH_JWrite, &jh2bh(jh)->b_state); | ||
670 | /* | 666 | /* |
671 | * akpm: jbd2_journal_write_metadata_buffer() sets | 667 | * Make a temporary IO buffer with which to write it out |
672 | * new_bh->b_transaction to commit_transaction. | 668 | * (this will requeue the metadata buffer to BJ_Shadow). |
673 | * We need to clean this up before we release new_bh | ||
674 | * (which is of type BJ_IO) | ||
675 | */ | 669 | */ |
670 | set_bit(BH_JWrite, &jh2bh(jh)->b_state); | ||
676 | JBUFFER_TRACE(jh, "ph3: write metadata"); | 671 | JBUFFER_TRACE(jh, "ph3: write metadata"); |
677 | flags = jbd2_journal_write_metadata_buffer(commit_transaction, | 672 | flags = jbd2_journal_write_metadata_buffer(commit_transaction, |
678 | jh, &new_jh, blocknr); | 673 | jh, &wbuf[bufs], blocknr); |
679 | if (flags < 0) { | 674 | if (flags < 0) { |
680 | jbd2_journal_abort(journal, flags); | 675 | jbd2_journal_abort(journal, flags); |
681 | continue; | 676 | continue; |
682 | } | 677 | } |
683 | set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); | 678 | jbd2_file_log_bh(&io_bufs, wbuf[bufs]); |
684 | wbuf[bufs++] = jh2bh(new_jh); | ||
685 | 679 | ||
686 | /* Record the new block's tag in the current descriptor | 680 | /* Record the new block's tag in the current descriptor |
687 | buffer */ | 681 | buffer */ |
@@ -695,10 +689,11 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
695 | tag = (journal_block_tag_t *) tagp; | 689 | tag = (journal_block_tag_t *) tagp; |
696 | write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); | 690 | write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); |
697 | tag->t_flags = cpu_to_be16(tag_flag); | 691 | tag->t_flags = cpu_to_be16(tag_flag); |
698 | jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), | 692 | jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], |
699 | commit_transaction->t_tid); | 693 | commit_transaction->t_tid); |
700 | tagp += tag_bytes; | 694 | tagp += tag_bytes; |
701 | space_left -= tag_bytes; | 695 | space_left -= tag_bytes; |
696 | bufs++; | ||
702 | 697 | ||
703 | if (first_tag) { | 698 | if (first_tag) { |
704 | memcpy (tagp, journal->j_uuid, 16); | 699 | memcpy (tagp, journal->j_uuid, 16); |
@@ -810,7 +805,7 @@ start_journal_io: | |||
810 | the log. Before we can commit it, wait for the IO so far to | 805 | the log. Before we can commit it, wait for the IO so far to |
811 | complete. Control buffers being written are on the | 806 | complete. Control buffers being written are on the |
812 | transaction's t_log_list queue, and metadata buffers are on | 807 | transaction's t_log_list queue, and metadata buffers are on |
813 | the t_iobuf_list queue. | 808 | the io_bufs list. |
814 | 809 | ||
815 | Wait for the buffers in reverse order. That way we are | 810 | Wait for the buffers in reverse order. That way we are |
816 | less likely to be woken up until all IOs have completed, and | 811 | less likely to be woken up until all IOs have completed, and |
@@ -819,46 +814,31 @@ start_journal_io: | |||
819 | 814 | ||
820 | jbd_debug(3, "JBD2: commit phase 3\n"); | 815 | jbd_debug(3, "JBD2: commit phase 3\n"); |
821 | 816 | ||
822 | /* | 817 | while (!list_empty(&io_bufs)) { |
823 | * akpm: these are BJ_IO, and j_list_lock is not needed. | 818 | struct buffer_head *bh = list_entry(io_bufs.prev, |
824 | * See __journal_try_to_free_buffer. | 819 | struct buffer_head, |
825 | */ | 820 | b_assoc_buffers); |
826 | wait_for_iobuf: | ||
827 | while (commit_transaction->t_iobuf_list != NULL) { | ||
828 | struct buffer_head *bh; | ||
829 | 821 | ||
830 | jh = commit_transaction->t_iobuf_list->b_tprev; | 822 | wait_on_buffer(bh); |
831 | bh = jh2bh(jh); | 823 | cond_resched(); |
832 | if (buffer_locked(bh)) { | ||
833 | wait_on_buffer(bh); | ||
834 | goto wait_for_iobuf; | ||
835 | } | ||
836 | if (cond_resched()) | ||
837 | goto wait_for_iobuf; | ||
838 | 824 | ||
839 | if (unlikely(!buffer_uptodate(bh))) | 825 | if (unlikely(!buffer_uptodate(bh))) |
840 | err = -EIO; | 826 | err = -EIO; |
841 | 827 | jbd2_unfile_log_bh(bh); | |
842 | clear_buffer_jwrite(bh); | ||
843 | |||
844 | JBUFFER_TRACE(jh, "ph4: unfile after journal write"); | ||
845 | jbd2_journal_unfile_buffer(journal, jh); | ||
846 | 828 | ||
847 | /* | 829 | /* |
848 | * ->t_iobuf_list should contain only dummy buffer_heads | 830 | * The list contains temporary buffer heads created by |
849 | * which were created by jbd2_journal_write_metadata_buffer(). | 831 | * jbd2_journal_write_metadata_buffer(). |
850 | */ | 832 | */ |
851 | BUFFER_TRACE(bh, "dumping temporary bh"); | 833 | BUFFER_TRACE(bh, "dumping temporary bh"); |
852 | jbd2_journal_put_journal_head(jh); | ||
853 | __brelse(bh); | 834 | __brelse(bh); |
854 | J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); | 835 | J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); |
855 | free_buffer_head(bh); | 836 | free_buffer_head(bh); |
856 | 837 | ||
857 | /* We also have to unlock and free the corresponding | 838 | /* We also have to refile the corresponding shadowed buffer */ |
858 | shadowed buffer */ | ||
859 | jh = commit_transaction->t_shadow_list->b_tprev; | 839 | jh = commit_transaction->t_shadow_list->b_tprev; |
860 | bh = jh2bh(jh); | 840 | bh = jh2bh(jh); |
861 | clear_bit(BH_JWrite, &bh->b_state); | 841 | clear_buffer_jwrite(bh); |
862 | J_ASSERT_BH(bh, buffer_jbddirty(bh)); | 842 | J_ASSERT_BH(bh, buffer_jbddirty(bh)); |
863 | 843 | ||
864 | /* The metadata is now released for reuse, but we need | 844 | /* The metadata is now released for reuse, but we need |
@@ -953,7 +933,6 @@ wait_for_iobuf: | |||
953 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); | 933 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); |
954 | J_ASSERT(commit_transaction->t_buffers == NULL); | 934 | J_ASSERT(commit_transaction->t_buffers == NULL); |
955 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); | 935 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); |
956 | J_ASSERT(commit_transaction->t_iobuf_list == NULL); | ||
957 | J_ASSERT(commit_transaction->t_shadow_list == NULL); | 936 | J_ASSERT(commit_transaction->t_shadow_list == NULL); |
958 | J_ASSERT(commit_transaction->t_log_list == NULL); | 937 | J_ASSERT(commit_transaction->t_log_list == NULL); |
959 | 938 | ||