diff options
author | Jan Kara <jack@suse.cz> | 2013-06-04 12:01:45 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2013-06-04 12:01:45 -0400 |
commit | f5113effc2a2ee6b86a4b345ce557353dcbcfffe (patch) | |
tree | a55e3eb4174ff0d17b9f0523619b9b647774b782 /fs/jbd2 | |
parent | 97a851ed71cd9cc2542955e92a001c6ea3d21d35 (diff) |
jbd2: don't create journal_head for temporary journal buffers
When writing metadata to the journal, we create temporary buffer heads
for that task. We also attach journal heads to these buffer heads but
the only purpose of the journal heads is to keep buffers linked in
transaction's BJ_IO list. We remove the need for journal heads by
reusing buffer_head's b_assoc_buffers list for that purpose. Also
since BJ_IO list is just a temporary list for transaction commit, we
use a private list in jbd2_journal_commit_transaction() for that thus
removing BJ_IO list from transaction completely.
Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/checkpoint.c | 1 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 65 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 36 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 14 |
4 files changed, 40 insertions, 76 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index c78841ee81cf..2735fef6e55e 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -690,7 +690,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact | |||
690 | J_ASSERT(transaction->t_state == T_FINISHED); | 690 | J_ASSERT(transaction->t_state == T_FINISHED); |
691 | J_ASSERT(transaction->t_buffers == NULL); | 691 | J_ASSERT(transaction->t_buffers == NULL); |
692 | J_ASSERT(transaction->t_forget == NULL); | 692 | J_ASSERT(transaction->t_forget == NULL); |
693 | J_ASSERT(transaction->t_iobuf_list == NULL); | ||
694 | J_ASSERT(transaction->t_shadow_list == NULL); | 693 | J_ASSERT(transaction->t_shadow_list == NULL); |
695 | J_ASSERT(transaction->t_log_list == NULL); | 694 | J_ASSERT(transaction->t_log_list == NULL); |
696 | J_ASSERT(transaction->t_checkpoint_list == NULL); | 695 | J_ASSERT(transaction->t_checkpoint_list == NULL); |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index e61d7224a729..57bd2ff97888 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -369,7 +369,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
369 | { | 369 | { |
370 | struct transaction_stats_s stats; | 370 | struct transaction_stats_s stats; |
371 | transaction_t *commit_transaction; | 371 | transaction_t *commit_transaction; |
372 | struct journal_head *jh, *new_jh, *descriptor; | 372 | struct journal_head *jh, *descriptor; |
373 | struct buffer_head **wbuf = journal->j_wbuf; | 373 | struct buffer_head **wbuf = journal->j_wbuf; |
374 | int bufs; | 374 | int bufs; |
375 | int flags; | 375 | int flags; |
@@ -393,6 +393,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
393 | tid_t first_tid; | 393 | tid_t first_tid; |
394 | int update_tail; | 394 | int update_tail; |
395 | int csum_size = 0; | 395 | int csum_size = 0; |
396 | LIST_HEAD(io_bufs); | ||
396 | 397 | ||
397 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 398 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
398 | csum_size = sizeof(struct jbd2_journal_block_tail); | 399 | csum_size = sizeof(struct jbd2_journal_block_tail); |
@@ -659,29 +660,22 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
659 | 660 | ||
660 | /* Bump b_count to prevent truncate from stumbling over | 661 | /* Bump b_count to prevent truncate from stumbling over |
661 | the shadowed buffer! @@@ This can go if we ever get | 662 | the shadowed buffer! @@@ This can go if we ever get |
662 | rid of the BJ_IO/BJ_Shadow pairing of buffers. */ | 663 | rid of the shadow pairing of buffers. */ |
663 | atomic_inc(&jh2bh(jh)->b_count); | 664 | atomic_inc(&jh2bh(jh)->b_count); |
664 | 665 | ||
665 | /* Make a temporary IO buffer with which to write it out | ||
666 | (this will requeue both the metadata buffer and the | ||
667 | temporary IO buffer). new_bh goes on BJ_IO*/ | ||
668 | |||
669 | set_bit(BH_JWrite, &jh2bh(jh)->b_state); | ||
670 | /* | 666 | /* |
671 | * akpm: jbd2_journal_write_metadata_buffer() sets | 667 | * Make a temporary IO buffer with which to write it out |
672 | * new_bh->b_transaction to commit_transaction. | 668 | * (this will requeue the metadata buffer to BJ_Shadow). |
673 | * We need to clean this up before we release new_bh | ||
674 | * (which is of type BJ_IO) | ||
675 | */ | 669 | */ |
670 | set_bit(BH_JWrite, &jh2bh(jh)->b_state); | ||
676 | JBUFFER_TRACE(jh, "ph3: write metadata"); | 671 | JBUFFER_TRACE(jh, "ph3: write metadata"); |
677 | flags = jbd2_journal_write_metadata_buffer(commit_transaction, | 672 | flags = jbd2_journal_write_metadata_buffer(commit_transaction, |
678 | jh, &new_jh, blocknr); | 673 | jh, &wbuf[bufs], blocknr); |
679 | if (flags < 0) { | 674 | if (flags < 0) { |
680 | jbd2_journal_abort(journal, flags); | 675 | jbd2_journal_abort(journal, flags); |
681 | continue; | 676 | continue; |
682 | } | 677 | } |
683 | set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); | 678 | jbd2_file_log_bh(&io_bufs, wbuf[bufs]); |
684 | wbuf[bufs++] = jh2bh(new_jh); | ||
685 | 679 | ||
686 | /* Record the new block's tag in the current descriptor | 680 | /* Record the new block's tag in the current descriptor |
687 | buffer */ | 681 | buffer */ |
@@ -695,10 +689,11 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
695 | tag = (journal_block_tag_t *) tagp; | 689 | tag = (journal_block_tag_t *) tagp; |
696 | write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); | 690 | write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); |
697 | tag->t_flags = cpu_to_be16(tag_flag); | 691 | tag->t_flags = cpu_to_be16(tag_flag); |
698 | jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), | 692 | jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], |
699 | commit_transaction->t_tid); | 693 | commit_transaction->t_tid); |
700 | tagp += tag_bytes; | 694 | tagp += tag_bytes; |
701 | space_left -= tag_bytes; | 695 | space_left -= tag_bytes; |
696 | bufs++; | ||
702 | 697 | ||
703 | if (first_tag) { | 698 | if (first_tag) { |
704 | memcpy (tagp, journal->j_uuid, 16); | 699 | memcpy (tagp, journal->j_uuid, 16); |
@@ -810,7 +805,7 @@ start_journal_io: | |||
810 | the log. Before we can commit it, wait for the IO so far to | 805 | the log. Before we can commit it, wait for the IO so far to |
811 | complete. Control buffers being written are on the | 806 | complete. Control buffers being written are on the |
812 | transaction's t_log_list queue, and metadata buffers are on | 807 | transaction's t_log_list queue, and metadata buffers are on |
813 | the t_iobuf_list queue. | 808 | the io_bufs list. |
814 | 809 | ||
815 | Wait for the buffers in reverse order. That way we are | 810 | Wait for the buffers in reverse order. That way we are |
816 | less likely to be woken up until all IOs have completed, and | 811 | less likely to be woken up until all IOs have completed, and |
@@ -819,46 +814,31 @@ start_journal_io: | |||
819 | 814 | ||
820 | jbd_debug(3, "JBD2: commit phase 3\n"); | 815 | jbd_debug(3, "JBD2: commit phase 3\n"); |
821 | 816 | ||
822 | /* | 817 | while (!list_empty(&io_bufs)) { |
823 | * akpm: these are BJ_IO, and j_list_lock is not needed. | 818 | struct buffer_head *bh = list_entry(io_bufs.prev, |
824 | * See __journal_try_to_free_buffer. | 819 | struct buffer_head, |
825 | */ | 820 | b_assoc_buffers); |
826 | wait_for_iobuf: | ||
827 | while (commit_transaction->t_iobuf_list != NULL) { | ||
828 | struct buffer_head *bh; | ||
829 | 821 | ||
830 | jh = commit_transaction->t_iobuf_list->b_tprev; | 822 | wait_on_buffer(bh); |
831 | bh = jh2bh(jh); | 823 | cond_resched(); |
832 | if (buffer_locked(bh)) { | ||
833 | wait_on_buffer(bh); | ||
834 | goto wait_for_iobuf; | ||
835 | } | ||
836 | if (cond_resched()) | ||
837 | goto wait_for_iobuf; | ||
838 | 824 | ||
839 | if (unlikely(!buffer_uptodate(bh))) | 825 | if (unlikely(!buffer_uptodate(bh))) |
840 | err = -EIO; | 826 | err = -EIO; |
841 | 827 | jbd2_unfile_log_bh(bh); | |
842 | clear_buffer_jwrite(bh); | ||
843 | |||
844 | JBUFFER_TRACE(jh, "ph4: unfile after journal write"); | ||
845 | jbd2_journal_unfile_buffer(journal, jh); | ||
846 | 828 | ||
847 | /* | 829 | /* |
848 | * ->t_iobuf_list should contain only dummy buffer_heads | 830 | * The list contains temporary buffer heads created by |
849 | * which were created by jbd2_journal_write_metadata_buffer(). | 831 | * jbd2_journal_write_metadata_buffer(). |
850 | */ | 832 | */ |
851 | BUFFER_TRACE(bh, "dumping temporary bh"); | 833 | BUFFER_TRACE(bh, "dumping temporary bh"); |
852 | jbd2_journal_put_journal_head(jh); | ||
853 | __brelse(bh); | 834 | __brelse(bh); |
854 | J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); | 835 | J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); |
855 | free_buffer_head(bh); | 836 | free_buffer_head(bh); |
856 | 837 | ||
857 | /* We also have to unlock and free the corresponding | 838 | /* We also have to refile the corresponding shadowed buffer */ |
858 | shadowed buffer */ | ||
859 | jh = commit_transaction->t_shadow_list->b_tprev; | 839 | jh = commit_transaction->t_shadow_list->b_tprev; |
860 | bh = jh2bh(jh); | 840 | bh = jh2bh(jh); |
861 | clear_bit(BH_JWrite, &bh->b_state); | 841 | clear_buffer_jwrite(bh); |
862 | J_ASSERT_BH(bh, buffer_jbddirty(bh)); | 842 | J_ASSERT_BH(bh, buffer_jbddirty(bh)); |
863 | 843 | ||
864 | /* The metadata is now released for reuse, but we need | 844 | /* The metadata is now released for reuse, but we need |
@@ -953,7 +933,6 @@ wait_for_iobuf: | |||
953 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); | 933 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); |
954 | J_ASSERT(commit_transaction->t_buffers == NULL); | 934 | J_ASSERT(commit_transaction->t_buffers == NULL); |
955 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); | 935 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); |
956 | J_ASSERT(commit_transaction->t_iobuf_list == NULL); | ||
957 | J_ASSERT(commit_transaction->t_shadow_list == NULL); | 936 | J_ASSERT(commit_transaction->t_shadow_list == NULL); |
958 | J_ASSERT(commit_transaction->t_log_list == NULL); | 937 | J_ASSERT(commit_transaction->t_log_list == NULL); |
959 | 938 | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 3cdd285df204..45cdc080e466 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -310,14 +310,12 @@ static void journal_kill_thread(journal_t *journal) | |||
310 | * | 310 | * |
311 | * If the source buffer has already been modified by a new transaction | 311 | * If the source buffer has already been modified by a new transaction |
312 | * since we took the last commit snapshot, we use the frozen copy of | 312 | * since we took the last commit snapshot, we use the frozen copy of |
313 | * that data for IO. If we end up using the existing buffer_head's data | 313 | * that data for IO. If we end up using the existing buffer_head's data |
314 | * for the write, then we *have* to lock the buffer to prevent anyone | 314 | * for the write, then we have to make sure nobody modifies it while the |
315 | * else from using and possibly modifying it while the IO is in | 315 | * IO is in progress. do_get_write_access() handles this. |
316 | * progress. | ||
317 | * | 316 | * |
318 | * The function returns a pointer to the buffer_heads to be used for IO. | 317 | * The function returns a pointer to the buffer_head to be used for IO. |
319 | * | 318 | * |
320 | * We assume that the journal has already been locked in this function. | ||
321 | * | 319 | * |
322 | * Return value: | 320 | * Return value: |
323 | * <0: Error | 321 | * <0: Error |
@@ -330,15 +328,14 @@ static void journal_kill_thread(journal_t *journal) | |||
330 | 328 | ||
331 | int jbd2_journal_write_metadata_buffer(transaction_t *transaction, | 329 | int jbd2_journal_write_metadata_buffer(transaction_t *transaction, |
332 | struct journal_head *jh_in, | 330 | struct journal_head *jh_in, |
333 | struct journal_head **jh_out, | 331 | struct buffer_head **bh_out, |
334 | unsigned long long blocknr) | 332 | sector_t blocknr) |
335 | { | 333 | { |
336 | int need_copy_out = 0; | 334 | int need_copy_out = 0; |
337 | int done_copy_out = 0; | 335 | int done_copy_out = 0; |
338 | int do_escape = 0; | 336 | int do_escape = 0; |
339 | char *mapped_data; | 337 | char *mapped_data; |
340 | struct buffer_head *new_bh; | 338 | struct buffer_head *new_bh; |
341 | struct journal_head *new_jh; | ||
342 | struct page *new_page; | 339 | struct page *new_page; |
343 | unsigned int new_offset; | 340 | unsigned int new_offset; |
344 | struct buffer_head *bh_in = jh2bh(jh_in); | 341 | struct buffer_head *bh_in = jh2bh(jh_in); |
@@ -368,14 +365,13 @@ retry_alloc: | |||
368 | 365 | ||
369 | /* keep subsequent assertions sane */ | 366 | /* keep subsequent assertions sane */ |
370 | atomic_set(&new_bh->b_count, 1); | 367 | atomic_set(&new_bh->b_count, 1); |
371 | new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ | ||
372 | 368 | ||
369 | jbd_lock_bh_state(bh_in); | ||
370 | repeat: | ||
373 | /* | 371 | /* |
374 | * If a new transaction has already done a buffer copy-out, then | 372 | * If a new transaction has already done a buffer copy-out, then |
375 | * we use that version of the data for the commit. | 373 | * we use that version of the data for the commit. |
376 | */ | 374 | */ |
377 | jbd_lock_bh_state(bh_in); | ||
378 | repeat: | ||
379 | if (jh_in->b_frozen_data) { | 375 | if (jh_in->b_frozen_data) { |
380 | done_copy_out = 1; | 376 | done_copy_out = 1; |
381 | new_page = virt_to_page(jh_in->b_frozen_data); | 377 | new_page = virt_to_page(jh_in->b_frozen_data); |
@@ -415,7 +411,7 @@ repeat: | |||
415 | jbd_unlock_bh_state(bh_in); | 411 | jbd_unlock_bh_state(bh_in); |
416 | tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); | 412 | tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); |
417 | if (!tmp) { | 413 | if (!tmp) { |
418 | jbd2_journal_put_journal_head(new_jh); | 414 | brelse(new_bh); |
419 | return -ENOMEM; | 415 | return -ENOMEM; |
420 | } | 416 | } |
421 | jbd_lock_bh_state(bh_in); | 417 | jbd_lock_bh_state(bh_in); |
@@ -426,7 +422,7 @@ repeat: | |||
426 | 422 | ||
427 | jh_in->b_frozen_data = tmp; | 423 | jh_in->b_frozen_data = tmp; |
428 | mapped_data = kmap_atomic(new_page); | 424 | mapped_data = kmap_atomic(new_page); |
429 | memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); | 425 | memcpy(tmp, mapped_data + new_offset, bh_in->b_size); |
430 | kunmap_atomic(mapped_data); | 426 | kunmap_atomic(mapped_data); |
431 | 427 | ||
432 | new_page = virt_to_page(tmp); | 428 | new_page = virt_to_page(tmp); |
@@ -452,14 +448,13 @@ repeat: | |||
452 | } | 448 | } |
453 | 449 | ||
454 | set_bh_page(new_bh, new_page, new_offset); | 450 | set_bh_page(new_bh, new_page, new_offset); |
455 | new_jh->b_transaction = NULL; | 451 | new_bh->b_size = bh_in->b_size; |
456 | new_bh->b_size = jh2bh(jh_in)->b_size; | 452 | new_bh->b_bdev = journal->j_dev; |
457 | new_bh->b_bdev = transaction->t_journal->j_dev; | ||
458 | new_bh->b_blocknr = blocknr; | 453 | new_bh->b_blocknr = blocknr; |
459 | set_buffer_mapped(new_bh); | 454 | set_buffer_mapped(new_bh); |
460 | set_buffer_dirty(new_bh); | 455 | set_buffer_dirty(new_bh); |
461 | 456 | ||
462 | *jh_out = new_jh; | 457 | *bh_out = new_bh; |
463 | 458 | ||
464 | /* | 459 | /* |
465 | * The to-be-written buffer needs to get moved to the io queue, | 460 | * The to-be-written buffer needs to get moved to the io queue, |
@@ -472,9 +467,6 @@ repeat: | |||
472 | spin_unlock(&journal->j_list_lock); | 467 | spin_unlock(&journal->j_list_lock); |
473 | jbd_unlock_bh_state(bh_in); | 468 | jbd_unlock_bh_state(bh_in); |
474 | 469 | ||
475 | JBUFFER_TRACE(new_jh, "file as BJ_IO"); | ||
476 | jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); | ||
477 | |||
478 | return do_escape | (done_copy_out << 1); | 470 | return do_escape | (done_copy_out << 1); |
479 | } | 471 | } |
480 | 472 | ||
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 5d8268ad364a..983010900258 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -1601,10 +1601,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) | |||
1601 | * Remove a buffer from the appropriate transaction list. | 1601 | * Remove a buffer from the appropriate transaction list. |
1602 | * | 1602 | * |
1603 | * Note that this function can *change* the value of | 1603 | * Note that this function can *change* the value of |
1604 | * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, | 1604 | * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or |
1605 | * t_log_list or t_reserved_list. If the caller is holding onto a copy of one | 1605 | * t_reserved_list. If the caller is holding onto a copy of one of these |
1606 | * of these pointers, it could go bad. Generally the caller needs to re-read | 1606 | * pointers, it could go bad. Generally the caller needs to re-read the |
1607 | * the pointer from the transaction_t. | 1607 | * pointer from the transaction_t. |
1608 | * | 1608 | * |
1609 | * Called under j_list_lock. | 1609 | * Called under j_list_lock. |
1610 | */ | 1610 | */ |
@@ -1634,9 +1634,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
1634 | case BJ_Forget: | 1634 | case BJ_Forget: |
1635 | list = &transaction->t_forget; | 1635 | list = &transaction->t_forget; |
1636 | break; | 1636 | break; |
1637 | case BJ_IO: | ||
1638 | list = &transaction->t_iobuf_list; | ||
1639 | break; | ||
1640 | case BJ_Shadow: | 1637 | case BJ_Shadow: |
1641 | list = &transaction->t_shadow_list; | 1638 | list = &transaction->t_shadow_list; |
1642 | break; | 1639 | break; |
@@ -2148,9 +2145,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
2148 | case BJ_Forget: | 2145 | case BJ_Forget: |
2149 | list = &transaction->t_forget; | 2146 | list = &transaction->t_forget; |
2150 | break; | 2147 | break; |
2151 | case BJ_IO: | ||
2152 | list = &transaction->t_iobuf_list; | ||
2153 | break; | ||
2154 | case BJ_Shadow: | 2148 | case BJ_Shadow: |
2155 | list = &transaction->t_shadow_list; | 2149 | list = &transaction->t_shadow_list; |
2156 | break; | 2150 | break; |