aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2013-06-04 12:01:45 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-06-04 12:01:45 -0400
commitf5113effc2a2ee6b86a4b345ce557353dcbcfffe (patch)
treea55e3eb4174ff0d17b9f0523619b9b647774b782 /fs/jbd2
parent97a851ed71cd9cc2542955e92a001c6ea3d21d35 (diff)
jbd2: don't create journal_head for temporary journal buffers
When writing metadata to the journal, we create temporary buffer heads for that task. We also attach journal heads to these buffer heads but the only purpose of the journal heads is to keep buffers linked in transaction's BJ_IO list. We remove the need for journal heads by reusing buffer_head's b_assoc_buffers list for that purpose. Also since BJ_IO list is just a temporary list for transaction commit, we use a private list in jbd2_journal_commit_transaction() for that thus removing BJ_IO list from transaction completely. Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/checkpoint.c1
-rw-r--r--fs/jbd2/commit.c65
-rw-r--r--fs/jbd2/journal.c36
-rw-r--r--fs/jbd2/transaction.c14
4 files changed, 40 insertions, 76 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index c78841ee81cf..2735fef6e55e 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -690,7 +690,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
690 J_ASSERT(transaction->t_state == T_FINISHED); 690 J_ASSERT(transaction->t_state == T_FINISHED);
691 J_ASSERT(transaction->t_buffers == NULL); 691 J_ASSERT(transaction->t_buffers == NULL);
692 J_ASSERT(transaction->t_forget == NULL); 692 J_ASSERT(transaction->t_forget == NULL);
693 J_ASSERT(transaction->t_iobuf_list == NULL);
694 J_ASSERT(transaction->t_shadow_list == NULL); 693 J_ASSERT(transaction->t_shadow_list == NULL);
695 J_ASSERT(transaction->t_log_list == NULL); 694 J_ASSERT(transaction->t_log_list == NULL);
696 J_ASSERT(transaction->t_checkpoint_list == NULL); 695 J_ASSERT(transaction->t_checkpoint_list == NULL);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index e61d7224a729..57bd2ff97888 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -369,7 +369,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
369{ 369{
370 struct transaction_stats_s stats; 370 struct transaction_stats_s stats;
371 transaction_t *commit_transaction; 371 transaction_t *commit_transaction;
372 struct journal_head *jh, *new_jh, *descriptor; 372 struct journal_head *jh, *descriptor;
373 struct buffer_head **wbuf = journal->j_wbuf; 373 struct buffer_head **wbuf = journal->j_wbuf;
374 int bufs; 374 int bufs;
375 int flags; 375 int flags;
@@ -393,6 +393,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
393 tid_t first_tid; 393 tid_t first_tid;
394 int update_tail; 394 int update_tail;
395 int csum_size = 0; 395 int csum_size = 0;
396 LIST_HEAD(io_bufs);
396 397
397 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 398 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
398 csum_size = sizeof(struct jbd2_journal_block_tail); 399 csum_size = sizeof(struct jbd2_journal_block_tail);
@@ -659,29 +660,22 @@ void jbd2_journal_commit_transaction(journal_t *journal)
659 660
660 /* Bump b_count to prevent truncate from stumbling over 661 /* Bump b_count to prevent truncate from stumbling over
661 the shadowed buffer! @@@ This can go if we ever get 662 the shadowed buffer! @@@ This can go if we ever get
662 rid of the BJ_IO/BJ_Shadow pairing of buffers. */ 663 rid of the shadow pairing of buffers. */
663 atomic_inc(&jh2bh(jh)->b_count); 664 atomic_inc(&jh2bh(jh)->b_count);
664 665
665 /* Make a temporary IO buffer with which to write it out
666 (this will requeue both the metadata buffer and the
667 temporary IO buffer). new_bh goes on BJ_IO*/
668
669 set_bit(BH_JWrite, &jh2bh(jh)->b_state);
670 /* 666 /*
671 * akpm: jbd2_journal_write_metadata_buffer() sets 667 * Make a temporary IO buffer with which to write it out
672 * new_bh->b_transaction to commit_transaction. 668 * (this will requeue the metadata buffer to BJ_Shadow).
673 * We need to clean this up before we release new_bh
674 * (which is of type BJ_IO)
675 */ 669 */
670 set_bit(BH_JWrite, &jh2bh(jh)->b_state);
676 JBUFFER_TRACE(jh, "ph3: write metadata"); 671 JBUFFER_TRACE(jh, "ph3: write metadata");
677 flags = jbd2_journal_write_metadata_buffer(commit_transaction, 672 flags = jbd2_journal_write_metadata_buffer(commit_transaction,
678 jh, &new_jh, blocknr); 673 jh, &wbuf[bufs], blocknr);
679 if (flags < 0) { 674 if (flags < 0) {
680 jbd2_journal_abort(journal, flags); 675 jbd2_journal_abort(journal, flags);
681 continue; 676 continue;
682 } 677 }
683 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); 678 jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
684 wbuf[bufs++] = jh2bh(new_jh);
685 679
686 /* Record the new block's tag in the current descriptor 680 /* Record the new block's tag in the current descriptor
687 buffer */ 681 buffer */
@@ -695,10 +689,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
695 tag = (journal_block_tag_t *) tagp; 689 tag = (journal_block_tag_t *) tagp;
696 write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); 690 write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
697 tag->t_flags = cpu_to_be16(tag_flag); 691 tag->t_flags = cpu_to_be16(tag_flag);
698 jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), 692 jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
699 commit_transaction->t_tid); 693 commit_transaction->t_tid);
700 tagp += tag_bytes; 694 tagp += tag_bytes;
701 space_left -= tag_bytes; 695 space_left -= tag_bytes;
696 bufs++;
702 697
703 if (first_tag) { 698 if (first_tag) {
704 memcpy (tagp, journal->j_uuid, 16); 699 memcpy (tagp, journal->j_uuid, 16);
@@ -810,7 +805,7 @@ start_journal_io:
810 the log. Before we can commit it, wait for the IO so far to 805 the log. Before we can commit it, wait for the IO so far to
811 complete. Control buffers being written are on the 806 complete. Control buffers being written are on the
812 transaction's t_log_list queue, and metadata buffers are on 807 transaction's t_log_list queue, and metadata buffers are on
813 the t_iobuf_list queue. 808 the io_bufs list.
814 809
815 Wait for the buffers in reverse order. That way we are 810 Wait for the buffers in reverse order. That way we are
816 less likely to be woken up until all IOs have completed, and 811 less likely to be woken up until all IOs have completed, and
@@ -819,46 +814,31 @@ start_journal_io:
819 814
820 jbd_debug(3, "JBD2: commit phase 3\n"); 815 jbd_debug(3, "JBD2: commit phase 3\n");
821 816
822 /* 817 while (!list_empty(&io_bufs)) {
823 * akpm: these are BJ_IO, and j_list_lock is not needed. 818 struct buffer_head *bh = list_entry(io_bufs.prev,
824 * See __journal_try_to_free_buffer. 819 struct buffer_head,
825 */ 820 b_assoc_buffers);
826wait_for_iobuf:
827 while (commit_transaction->t_iobuf_list != NULL) {
828 struct buffer_head *bh;
829 821
830 jh = commit_transaction->t_iobuf_list->b_tprev; 822 wait_on_buffer(bh);
831 bh = jh2bh(jh); 823 cond_resched();
832 if (buffer_locked(bh)) {
833 wait_on_buffer(bh);
834 goto wait_for_iobuf;
835 }
836 if (cond_resched())
837 goto wait_for_iobuf;
838 824
839 if (unlikely(!buffer_uptodate(bh))) 825 if (unlikely(!buffer_uptodate(bh)))
840 err = -EIO; 826 err = -EIO;
841 827 jbd2_unfile_log_bh(bh);
842 clear_buffer_jwrite(bh);
843
844 JBUFFER_TRACE(jh, "ph4: unfile after journal write");
845 jbd2_journal_unfile_buffer(journal, jh);
846 828
847 /* 829 /*
848 * ->t_iobuf_list should contain only dummy buffer_heads 830 * The list contains temporary buffer heads created by
849 * which were created by jbd2_journal_write_metadata_buffer(). 831 * jbd2_journal_write_metadata_buffer().
850 */ 832 */
851 BUFFER_TRACE(bh, "dumping temporary bh"); 833 BUFFER_TRACE(bh, "dumping temporary bh");
852 jbd2_journal_put_journal_head(jh);
853 __brelse(bh); 834 __brelse(bh);
854 J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); 835 J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
855 free_buffer_head(bh); 836 free_buffer_head(bh);
856 837
857 /* We also have to unlock and free the corresponding 838 /* We also have to refile the corresponding shadowed buffer */
858 shadowed buffer */
859 jh = commit_transaction->t_shadow_list->b_tprev; 839 jh = commit_transaction->t_shadow_list->b_tprev;
860 bh = jh2bh(jh); 840 bh = jh2bh(jh);
861 clear_bit(BH_JWrite, &bh->b_state); 841 clear_buffer_jwrite(bh);
862 J_ASSERT_BH(bh, buffer_jbddirty(bh)); 842 J_ASSERT_BH(bh, buffer_jbddirty(bh));
863 843
864 /* The metadata is now released for reuse, but we need 844 /* The metadata is now released for reuse, but we need
@@ -953,7 +933,6 @@ wait_for_iobuf:
953 J_ASSERT(list_empty(&commit_transaction->t_inode_list)); 933 J_ASSERT(list_empty(&commit_transaction->t_inode_list));
954 J_ASSERT(commit_transaction->t_buffers == NULL); 934 J_ASSERT(commit_transaction->t_buffers == NULL);
955 J_ASSERT(commit_transaction->t_checkpoint_list == NULL); 935 J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
956 J_ASSERT(commit_transaction->t_iobuf_list == NULL);
957 J_ASSERT(commit_transaction->t_shadow_list == NULL); 936 J_ASSERT(commit_transaction->t_shadow_list == NULL);
958 J_ASSERT(commit_transaction->t_log_list == NULL); 937 J_ASSERT(commit_transaction->t_log_list == NULL);
959 938
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 3cdd285df204..45cdc080e466 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -310,14 +310,12 @@ static void journal_kill_thread(journal_t *journal)
310 * 310 *
311 * If the source buffer has already been modified by a new transaction 311 * If the source buffer has already been modified by a new transaction
312 * since we took the last commit snapshot, we use the frozen copy of 312 * since we took the last commit snapshot, we use the frozen copy of
313 * that data for IO. If we end up using the existing buffer_head's data 313 * that data for IO. If we end up using the existing buffer_head's data
314 * for the write, then we *have* to lock the buffer to prevent anyone 314 * for the write, then we have to make sure nobody modifies it while the
315 * else from using and possibly modifying it while the IO is in 315 * IO is in progress. do_get_write_access() handles this.
316 * progress.
317 * 316 *
318 * The function returns a pointer to the buffer_heads to be used for IO. 317 * The function returns a pointer to the buffer_head to be used for IO.
319 * 318 *
320 * We assume that the journal has already been locked in this function.
321 * 319 *
322 * Return value: 320 * Return value:
323 * <0: Error 321 * <0: Error
@@ -330,15 +328,14 @@ static void journal_kill_thread(journal_t *journal)
330 328
331int jbd2_journal_write_metadata_buffer(transaction_t *transaction, 329int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
332 struct journal_head *jh_in, 330 struct journal_head *jh_in,
333 struct journal_head **jh_out, 331 struct buffer_head **bh_out,
334 unsigned long long blocknr) 332 sector_t blocknr)
335{ 333{
336 int need_copy_out = 0; 334 int need_copy_out = 0;
337 int done_copy_out = 0; 335 int done_copy_out = 0;
338 int do_escape = 0; 336 int do_escape = 0;
339 char *mapped_data; 337 char *mapped_data;
340 struct buffer_head *new_bh; 338 struct buffer_head *new_bh;
341 struct journal_head *new_jh;
342 struct page *new_page; 339 struct page *new_page;
343 unsigned int new_offset; 340 unsigned int new_offset;
344 struct buffer_head *bh_in = jh2bh(jh_in); 341 struct buffer_head *bh_in = jh2bh(jh_in);
@@ -368,14 +365,13 @@ retry_alloc:
368 365
369 /* keep subsequent assertions sane */ 366 /* keep subsequent assertions sane */
370 atomic_set(&new_bh->b_count, 1); 367 atomic_set(&new_bh->b_count, 1);
371 new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
372 368
369 jbd_lock_bh_state(bh_in);
370repeat:
373 /* 371 /*
374 * If a new transaction has already done a buffer copy-out, then 372 * If a new transaction has already done a buffer copy-out, then
375 * we use that version of the data for the commit. 373 * we use that version of the data for the commit.
376 */ 374 */
377 jbd_lock_bh_state(bh_in);
378repeat:
379 if (jh_in->b_frozen_data) { 375 if (jh_in->b_frozen_data) {
380 done_copy_out = 1; 376 done_copy_out = 1;
381 new_page = virt_to_page(jh_in->b_frozen_data); 377 new_page = virt_to_page(jh_in->b_frozen_data);
@@ -415,7 +411,7 @@ repeat:
415 jbd_unlock_bh_state(bh_in); 411 jbd_unlock_bh_state(bh_in);
416 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); 412 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
417 if (!tmp) { 413 if (!tmp) {
418 jbd2_journal_put_journal_head(new_jh); 414 brelse(new_bh);
419 return -ENOMEM; 415 return -ENOMEM;
420 } 416 }
421 jbd_lock_bh_state(bh_in); 417 jbd_lock_bh_state(bh_in);
@@ -426,7 +422,7 @@ repeat:
426 422
427 jh_in->b_frozen_data = tmp; 423 jh_in->b_frozen_data = tmp;
428 mapped_data = kmap_atomic(new_page); 424 mapped_data = kmap_atomic(new_page);
429 memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); 425 memcpy(tmp, mapped_data + new_offset, bh_in->b_size);
430 kunmap_atomic(mapped_data); 426 kunmap_atomic(mapped_data);
431 427
432 new_page = virt_to_page(tmp); 428 new_page = virt_to_page(tmp);
@@ -452,14 +448,13 @@ repeat:
452 } 448 }
453 449
454 set_bh_page(new_bh, new_page, new_offset); 450 set_bh_page(new_bh, new_page, new_offset);
455 new_jh->b_transaction = NULL; 451 new_bh->b_size = bh_in->b_size;
456 new_bh->b_size = jh2bh(jh_in)->b_size; 452 new_bh->b_bdev = journal->j_dev;
457 new_bh->b_bdev = transaction->t_journal->j_dev;
458 new_bh->b_blocknr = blocknr; 453 new_bh->b_blocknr = blocknr;
459 set_buffer_mapped(new_bh); 454 set_buffer_mapped(new_bh);
460 set_buffer_dirty(new_bh); 455 set_buffer_dirty(new_bh);
461 456
462 *jh_out = new_jh; 457 *bh_out = new_bh;
463 458
464 /* 459 /*
465 * The to-be-written buffer needs to get moved to the io queue, 460 * The to-be-written buffer needs to get moved to the io queue,
@@ -472,9 +467,6 @@ repeat:
472 spin_unlock(&journal->j_list_lock); 467 spin_unlock(&journal->j_list_lock);
473 jbd_unlock_bh_state(bh_in); 468 jbd_unlock_bh_state(bh_in);
474 469
475 JBUFFER_TRACE(new_jh, "file as BJ_IO");
476 jbd2_journal_file_buffer(new_jh, transaction, BJ_IO);
477
478 return do_escape | (done_copy_out << 1); 470 return do_escape | (done_copy_out << 1);
479} 471}
480 472
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 5d8268ad364a..983010900258 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1601,10 +1601,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1601 * Remove a buffer from the appropriate transaction list. 1601 * Remove a buffer from the appropriate transaction list.
1602 * 1602 *
1603 * Note that this function can *change* the value of 1603 * Note that this function can *change* the value of
1604 * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, 1604 * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or
1605 * t_log_list or t_reserved_list. If the caller is holding onto a copy of one 1605 * t_reserved_list. If the caller is holding onto a copy of one of these
1606 * of these pointers, it could go bad. Generally the caller needs to re-read 1606 * pointers, it could go bad. Generally the caller needs to re-read the
1607 * the pointer from the transaction_t. 1607 * pointer from the transaction_t.
1608 * 1608 *
1609 * Called under j_list_lock. 1609 * Called under j_list_lock.
1610 */ 1610 */
@@ -1634,9 +1634,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1634 case BJ_Forget: 1634 case BJ_Forget:
1635 list = &transaction->t_forget; 1635 list = &transaction->t_forget;
1636 break; 1636 break;
1637 case BJ_IO:
1638 list = &transaction->t_iobuf_list;
1639 break;
1640 case BJ_Shadow: 1637 case BJ_Shadow:
1641 list = &transaction->t_shadow_list; 1638 list = &transaction->t_shadow_list;
1642 break; 1639 break;
@@ -2148,9 +2145,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
2148 case BJ_Forget: 2145 case BJ_Forget:
2149 list = &transaction->t_forget; 2146 list = &transaction->t_forget;
2150 break; 2147 break;
2151 case BJ_IO:
2152 list = &transaction->t_iobuf_list;
2153 break;
2154 case BJ_Shadow: 2148 case BJ_Shadow:
2155 list = &transaction->t_shadow_list; 2149 list = &transaction->t_shadow_list;
2156 break; 2150 break;