aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2011-02-21 11:25:37 -0500
committerJan Kara <jack@suse.cz>2012-04-11 05:12:44 -0400
commit2db938bee32e7469ca8ed9bfb3a05535f28c680d (patch)
tree7d175a486c2e02270839ba18da61455603c2205e
parent923e9a1399b620d063cd88537c64561bc3d5f905 (diff)
jbd: Refine commit writeout logic
Currently we write out all journal buffers in WRITE_SYNC mode. This improves performance for fsync heavy workloads but hinders performance when writes are mostly asynchronous, most noticably it slows down readers and users complain about slow desktop response etc. So submit writes as asynchronous in the normal case and only submit writes as WRITE_SYNC if we detect someone is waiting for current transaction commit. I've gathered some numbers to back this change. The first is the read latency test. It measures time to read 1 MB after several seconds of sleeping in presence of streaming writes. Top 10 times (out of 90) in us: Before After 2131586 697473 1709932 557487 1564598 535642 1480462 347573 1478579 323153 1408496 222181 1388960 181273 1329565 181070 1252486 172832 1223265 172278 Average: 619377 82180 So the improvement in both maximum and average latency is massive. I've measured fsync throughput by: fs_mark -n 100 -t 1 -s 16384 -d /mnt/fsync/ -S 1 -L 4 in presence of streaming reader. The numbers (fsyncs/s) are: Before After 9.9 6.3 6.8 6.0 6.3 6.2 5.8 6.1 So fsync performance seems unharmed by this change. Signed-off-by: Jan Kara <jack@suse.cz>
-rw-r--r--fs/jbd/commit.c10
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jbd/transaction.c2
-rw-r--r--include/linux/jbd.h15
-rw-r--r--include/trace/events/jbd.h24
5 files changed, 26 insertions, 27 deletions
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index f2b9a571f4cf..9d31e6a39205 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -298,6 +298,7 @@ void journal_commit_transaction(journal_t *journal)
298 int tag_flag; 298 int tag_flag;
299 int i; 299 int i;
300 struct blk_plug plug; 300 struct blk_plug plug;
301 int write_op = WRITE;
301 302
302 /* 303 /*
303 * First job: lock down the current transaction and wait for 304 * First job: lock down the current transaction and wait for
@@ -413,13 +414,16 @@ void journal_commit_transaction(journal_t *journal)
413 414
414 jbd_debug (3, "JBD: commit phase 2\n"); 415 jbd_debug (3, "JBD: commit phase 2\n");
415 416
417 if (tid_geq(journal->j_commit_waited, commit_transaction->t_tid))
418 write_op = WRITE_SYNC;
419
416 /* 420 /*
417 * Now start flushing things to disk, in the order they appear 421 * Now start flushing things to disk, in the order they appear
418 * on the transaction lists. Data blocks go first. 422 * on the transaction lists. Data blocks go first.
419 */ 423 */
420 blk_start_plug(&plug); 424 blk_start_plug(&plug);
421 err = journal_submit_data_buffers(journal, commit_transaction, 425 err = journal_submit_data_buffers(journal, commit_transaction,
422 WRITE_SYNC); 426 write_op);
423 blk_finish_plug(&plug); 427 blk_finish_plug(&plug);
424 428
425 /* 429 /*
@@ -478,7 +482,7 @@ void journal_commit_transaction(journal_t *journal)
478 482
479 blk_start_plug(&plug); 483 blk_start_plug(&plug);
480 484
481 journal_write_revoke_records(journal, commit_transaction, WRITE_SYNC); 485 journal_write_revoke_records(journal, commit_transaction, write_op);
482 486
483 /* 487 /*
484 * If we found any dirty or locked buffers, then we should have 488 * If we found any dirty or locked buffers, then we should have
@@ -649,7 +653,7 @@ start_journal_io:
649 clear_buffer_dirty(bh); 653 clear_buffer_dirty(bh);
650 set_buffer_uptodate(bh); 654 set_buffer_uptodate(bh);
651 bh->b_end_io = journal_end_buffer_io_sync; 655 bh->b_end_io = journal_end_buffer_io_sync;
652 submit_bh(WRITE_SYNC, bh); 656 submit_bh(write_op, bh);
653 } 657 }
654 cond_resched(); 658 cond_resched();
655 659
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 0971e9217808..2047fd77bf38 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -563,6 +563,8 @@ int log_wait_commit(journal_t *journal, tid_t tid)
563 spin_unlock(&journal->j_state_lock); 563 spin_unlock(&journal->j_state_lock);
564#endif 564#endif
565 spin_lock(&journal->j_state_lock); 565 spin_lock(&journal->j_state_lock);
566 if (!tid_geq(journal->j_commit_waited, tid))
567 journal->j_commit_waited = tid;
566 while (tid_gt(tid, journal->j_commit_sequence)) { 568 while (tid_gt(tid, journal->j_commit_sequence)) {
567 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", 569 jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
568 tid, journal->j_commit_sequence); 570 tid, journal->j_commit_sequence);
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index b2a7e5244e39..febc10db5ced 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1433,8 +1433,6 @@ int journal_stop(handle_t *handle)
1433 } 1433 }
1434 } 1434 }
1435 1435
1436 if (handle->h_sync)
1437 transaction->t_synchronous_commit = 1;
1438 current->journal_info = NULL; 1436 current->journal_info = NULL;
1439 spin_lock(&journal->j_state_lock); 1437 spin_lock(&journal->j_state_lock);
1440 spin_lock(&transaction->t_handle_lock); 1438 spin_lock(&transaction->t_handle_lock);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index d211732b9e99..f265682ae134 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -479,12 +479,6 @@ struct transaction_s
479 * How many handles used this transaction? [t_handle_lock] 479 * How many handles used this transaction? [t_handle_lock]
480 */ 480 */
481 int t_handle_count; 481 int t_handle_count;
482
483 /*
484 * This transaction is being forced and some process is
485 * waiting for it to finish.
486 */
487 unsigned int t_synchronous_commit:1;
488}; 482};
489 483
490/** 484/**
@@ -531,6 +525,8 @@ struct transaction_s
531 * transaction 525 * transaction
532 * @j_commit_request: Sequence number of the most recent transaction wanting 526 * @j_commit_request: Sequence number of the most recent transaction wanting
533 * commit 527 * commit
528 * @j_commit_waited: Sequence number of the most recent transaction someone
529 * is waiting for to commit.
534 * @j_uuid: Uuid of client object. 530 * @j_uuid: Uuid of client object.
535 * @j_task: Pointer to the current commit thread for this journal 531 * @j_task: Pointer to the current commit thread for this journal
536 * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a 532 * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a
@@ -696,6 +692,13 @@ struct journal_s
696 tid_t j_commit_request; 692 tid_t j_commit_request;
697 693
698 /* 694 /*
695 * Sequence number of the most recent transaction someone is waiting
696 * for to commit.
697 * [j_state_lock]
698 */
699 tid_t j_commit_waited;
700
701 /*
699 * Journal uuid: identifies the object (filesystem, LVM volume etc) 702 * Journal uuid: identifies the object (filesystem, LVM volume etc)
700 * backed by this journal. This will eventually be replaced by an array 703 * backed by this journal. This will eventually be replaced by an array
701 * of uuids, allowing us to index multiple devices within a single 704 * of uuids, allowing us to index multiple devices within a single
diff --git a/include/trace/events/jbd.h b/include/trace/events/jbd.h
index aff64d82d713..9305e1b5edc3 100644
--- a/include/trace/events/jbd.h
+++ b/include/trace/events/jbd.h
@@ -36,19 +36,17 @@ DECLARE_EVENT_CLASS(jbd_commit,
36 36
37 TP_STRUCT__entry( 37 TP_STRUCT__entry(
38 __field( dev_t, dev ) 38 __field( dev_t, dev )
39 __field( char, sync_commit )
40 __field( int, transaction ) 39 __field( int, transaction )
41 ), 40 ),
42 41
43 TP_fast_assign( 42 TP_fast_assign(
44 __entry->dev = journal->j_fs_dev->bd_dev; 43 __entry->dev = journal->j_fs_dev->bd_dev;
45 __entry->sync_commit = commit_transaction->t_synchronous_commit;
46 __entry->transaction = commit_transaction->t_tid; 44 __entry->transaction = commit_transaction->t_tid;
47 ), 45 ),
48 46
49 TP_printk("dev %d,%d transaction %d sync %d", 47 TP_printk("dev %d,%d transaction %d",
50 MAJOR(__entry->dev), MINOR(__entry->dev), 48 MAJOR(__entry->dev), MINOR(__entry->dev),
51 __entry->transaction, __entry->sync_commit) 49 __entry->transaction)
52); 50);
53 51
54DEFINE_EVENT(jbd_commit, jbd_start_commit, 52DEFINE_EVENT(jbd_commit, jbd_start_commit,
@@ -87,19 +85,17 @@ TRACE_EVENT(jbd_drop_transaction,
87 85
88 TP_STRUCT__entry( 86 TP_STRUCT__entry(
89 __field( dev_t, dev ) 87 __field( dev_t, dev )
90 __field( char, sync_commit )
91 __field( int, transaction ) 88 __field( int, transaction )
92 ), 89 ),
93 90
94 TP_fast_assign( 91 TP_fast_assign(
95 __entry->dev = journal->j_fs_dev->bd_dev; 92 __entry->dev = journal->j_fs_dev->bd_dev;
96 __entry->sync_commit = commit_transaction->t_synchronous_commit;
97 __entry->transaction = commit_transaction->t_tid; 93 __entry->transaction = commit_transaction->t_tid;
98 ), 94 ),
99 95
100 TP_printk("dev %d,%d transaction %d sync %d", 96 TP_printk("dev %d,%d transaction %d",
101 MAJOR(__entry->dev), MINOR(__entry->dev), 97 MAJOR(__entry->dev), MINOR(__entry->dev),
102 __entry->transaction, __entry->sync_commit) 98 __entry->transaction)
103); 99);
104 100
105TRACE_EVENT(jbd_end_commit, 101TRACE_EVENT(jbd_end_commit,
@@ -109,21 +105,19 @@ TRACE_EVENT(jbd_end_commit,
109 105
110 TP_STRUCT__entry( 106 TP_STRUCT__entry(
111 __field( dev_t, dev ) 107 __field( dev_t, dev )
112 __field( char, sync_commit )
113 __field( int, transaction ) 108 __field( int, transaction )
114 __field( int, head ) 109 __field( int, head )
115 ), 110 ),
116 111
117 TP_fast_assign( 112 TP_fast_assign(
118 __entry->dev = journal->j_fs_dev->bd_dev; 113 __entry->dev = journal->j_fs_dev->bd_dev;
119 __entry->sync_commit = commit_transaction->t_synchronous_commit;
120 __entry->transaction = commit_transaction->t_tid; 114 __entry->transaction = commit_transaction->t_tid;
121 __entry->head = journal->j_tail_sequence; 115 __entry->head = journal->j_tail_sequence;
122 ), 116 ),
123 117
124 TP_printk("dev %d,%d transaction %d sync %d head %d", 118 TP_printk("dev %d,%d transaction %d head %d",
125 MAJOR(__entry->dev), MINOR(__entry->dev), 119 MAJOR(__entry->dev), MINOR(__entry->dev),
126 __entry->transaction, __entry->sync_commit, __entry->head) 120 __entry->transaction, __entry->head)
127); 121);
128 122
129TRACE_EVENT(jbd_do_submit_data, 123TRACE_EVENT(jbd_do_submit_data,
@@ -133,19 +127,17 @@ TRACE_EVENT(jbd_do_submit_data,
133 127
134 TP_STRUCT__entry( 128 TP_STRUCT__entry(
135 __field( dev_t, dev ) 129 __field( dev_t, dev )
136 __field( char, sync_commit )
137 __field( int, transaction ) 130 __field( int, transaction )
138 ), 131 ),
139 132
140 TP_fast_assign( 133 TP_fast_assign(
141 __entry->dev = journal->j_fs_dev->bd_dev; 134 __entry->dev = journal->j_fs_dev->bd_dev;
142 __entry->sync_commit = commit_transaction->t_synchronous_commit;
143 __entry->transaction = commit_transaction->t_tid; 135 __entry->transaction = commit_transaction->t_tid;
144 ), 136 ),
145 137
146 TP_printk("dev %d,%d transaction %d sync %d", 138 TP_printk("dev %d,%d transaction %d",
147 MAJOR(__entry->dev), MINOR(__entry->dev), 139 MAJOR(__entry->dev), MINOR(__entry->dev),
148 __entry->transaction, __entry->sync_commit) 140 __entry->transaction)
149); 141);
150 142
151TRACE_EVENT(jbd_cleanup_journal_tail, 143TRACE_EVENT(jbd_cleanup_journal_tail,