aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2013-06-04 12:08:56 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-06-04 12:08:56 -0400
commitb34090e5e22a02fba0e4473056cce9420ad9dd0b (patch)
tree7ffb9ecd10ada2aefe9079c2df91405592132e47 /fs/jbd2
parente5a120aeb57f40ae568a5ca1dd6ace53d0213582 (diff)
jbd2: refine waiting for shadow buffers
Currently when we add a buffer to a transaction, we wait until the buffer is removed from BJ_Shadow list (so that we prevent any changes to the buffer that is just written to the journal). This can take unnecessarily long as a lot happens between the time the buffer is submitted to the journal and the time when we remove the buffer from BJ_Shadow list. (e.g. We wait for all data buffers in the transaction, we issue a cache flush, etc.) Also this creates a dependency of do_get_write_access() on transaction commit (namely waiting for data IO to complete) which we want to avoid when implementing transaction reservation. So we modify commit code to set new BH_Shadow flag when temporary shadowing buffer is created and we clear that flag once IO on that buffer is complete. This allows do_get_write_access() to wait only for BH_Shadow bit and thus removes the dependency on data IO completion. Reviewed-by: Zheng Liu <wenqing.lz@taobao.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/commit.c18
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jbd2/transaction.c44
3 files changed, 30 insertions, 34 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7c6f7eea2316..d73a0d808ec1 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -30,15 +30,22 @@
30#include <trace/events/jbd2.h> 30#include <trace/events/jbd2.h>
31 31
32/* 32/*
33 * Default IO end handler for temporary BJ_IO buffer_heads. 33 * IO end handler for temporary buffer_heads handling writes to the journal.
34 */ 34 */
35static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 35static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
36{ 36{
37 struct buffer_head *orig_bh = bh->b_private;
38
37 BUFFER_TRACE(bh, ""); 39 BUFFER_TRACE(bh, "");
38 if (uptodate) 40 if (uptodate)
39 set_buffer_uptodate(bh); 41 set_buffer_uptodate(bh);
40 else 42 else
41 clear_buffer_uptodate(bh); 43 clear_buffer_uptodate(bh);
44 if (orig_bh) {
45 clear_bit_unlock(BH_Shadow, &orig_bh->b_state);
46 smp_mb__after_clear_bit();
47 wake_up_bit(&orig_bh->b_state, BH_Shadow);
48 }
42 unlock_buffer(bh); 49 unlock_buffer(bh);
43} 50}
44 51
@@ -832,6 +839,7 @@ start_journal_io:
832 bh = jh2bh(jh); 839 bh = jh2bh(jh);
833 clear_buffer_jwrite(bh); 840 clear_buffer_jwrite(bh);
834 J_ASSERT_BH(bh, buffer_jbddirty(bh)); 841 J_ASSERT_BH(bh, buffer_jbddirty(bh));
842 J_ASSERT_BH(bh, !buffer_shadow(bh));
835 843
836 /* The metadata is now released for reuse, but we need 844 /* The metadata is now released for reuse, but we need
837 to remember it against this transaction so that when 845 to remember it against this transaction so that when
@@ -839,14 +847,6 @@ start_journal_io:
839 required. */ 847 required. */
840 JBUFFER_TRACE(jh, "file as BJ_Forget"); 848 JBUFFER_TRACE(jh, "file as BJ_Forget");
841 jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); 849 jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
842 /*
843 * Wake up any transactions which were waiting for this IO to
844 * complete. The barrier must be here so that changes by
845 * jbd2_journal_file_buffer() take effect before wake_up_bit()
846 * does the waitqueue check.
847 */
848 smp_mb();
849 wake_up_bit(&bh->b_state, BH_Unshadow);
850 JBUFFER_TRACE(jh, "brelse shadowed buffer"); 850 JBUFFER_TRACE(jh, "brelse shadowed buffer");
851 __brelse(bh); 851 __brelse(bh);
852 } 852 }
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b0a8d1e4703e..5ef0712e2f7a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -451,6 +451,7 @@ repeat:
451 new_bh->b_size = bh_in->b_size; 451 new_bh->b_size = bh_in->b_size;
452 new_bh->b_bdev = journal->j_dev; 452 new_bh->b_bdev = journal->j_dev;
453 new_bh->b_blocknr = blocknr; 453 new_bh->b_blocknr = blocknr;
454 new_bh->b_private = bh_in;
454 set_buffer_mapped(new_bh); 455 set_buffer_mapped(new_bh);
455 set_buffer_dirty(new_bh); 456 set_buffer_dirty(new_bh);
456 457
@@ -465,6 +466,7 @@ repeat:
465 spin_lock(&journal->j_list_lock); 466 spin_lock(&journal->j_list_lock);
466 __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); 467 __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
467 spin_unlock(&journal->j_list_lock); 468 spin_unlock(&journal->j_list_lock);
469 set_buffer_shadow(bh_in);
468 jbd_unlock_bh_state(bh_in); 470 jbd_unlock_bh_state(bh_in);
469 471
470 return do_escape | (done_copy_out << 1); 472 return do_escape | (done_copy_out << 1);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index f1c5392e62b6..6f4248dd8759 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -619,6 +619,12 @@ static void warn_dirty_buffer(struct buffer_head *bh)
619 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); 619 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
620} 620}
621 621
622static int sleep_on_shadow_bh(void *word)
623{
624 io_schedule();
625 return 0;
626}
627
622/* 628/*
623 * If the buffer is already part of the current transaction, then there 629 * If the buffer is already part of the current transaction, then there
624 * is nothing we need to do. If it is already part of a prior 630 * is nothing we need to do. If it is already part of a prior
@@ -754,41 +760,29 @@ repeat:
754 * journaled. If the primary copy is already going to 760 * journaled. If the primary copy is already going to
755 * disk then we cannot do copy-out here. */ 761 * disk then we cannot do copy-out here. */
756 762
757 if (jh->b_jlist == BJ_Shadow) { 763 if (buffer_shadow(bh)) {
758 DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow);
759 wait_queue_head_t *wqh;
760
761 wqh = bit_waitqueue(&bh->b_state, BH_Unshadow);
762
763 JBUFFER_TRACE(jh, "on shadow: sleep"); 764 JBUFFER_TRACE(jh, "on shadow: sleep");
764 jbd_unlock_bh_state(bh); 765 jbd_unlock_bh_state(bh);
765 /* commit wakes up all shadow buffers after IO */ 766 wait_on_bit(&bh->b_state, BH_Shadow,
766 for ( ; ; ) { 767 sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE);
767 prepare_to_wait(wqh, &wait.wait,
768 TASK_UNINTERRUPTIBLE);
769 if (jh->b_jlist != BJ_Shadow)
770 break;
771 schedule();
772 }
773 finish_wait(wqh, &wait.wait);
774 goto repeat; 768 goto repeat;
775 } 769 }
776 770
777 /* Only do the copy if the currently-owning transaction 771 /*
778 * still needs it. If it is on the Forget list, the 772 * Only do the copy if the currently-owning transaction still
779 * committing transaction is past that stage. The 773 * needs it. If buffer isn't on BJ_Metadata list, the
780 * buffer had better remain locked during the kmalloc, 774 * committing transaction is past that stage (here we use the
781 * but that should be true --- we hold the journal lock 775 * fact that BH_Shadow is set under bh_state lock together with
782 * still and the buffer is already on the BUF_JOURNAL 776 * refiling to BJ_Shadow list and at this point we know the
783 * list so won't be flushed. 777 * buffer doesn't have BH_Shadow set).
784 * 778 *
785 * Subtle point, though: if this is a get_undo_access, 779 * Subtle point, though: if this is a get_undo_access,
786 * then we will be relying on the frozen_data to contain 780 * then we will be relying on the frozen_data to contain
787 * the new value of the committed_data record after the 781 * the new value of the committed_data record after the
788 * transaction, so we HAVE to force the frozen_data copy 782 * transaction, so we HAVE to force the frozen_data copy
789 * in that case. */ 783 * in that case.
790 784 */
791 if (jh->b_jlist != BJ_Forget || force_copy) { 785 if (jh->b_jlist == BJ_Metadata || force_copy) {
792 JBUFFER_TRACE(jh, "generate frozen data"); 786 JBUFFER_TRACE(jh, "generate frozen data");
793 if (!frozen_buffer) { 787 if (!frozen_buffer) {
794 JBUFFER_TRACE(jh, "allocate memory for buffer"); 788 JBUFFER_TRACE(jh, "allocate memory for buffer");