aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2011-02-21 11:25:37 -0500
committerJan Kara <jack@suse.cz>2012-04-11 05:12:44 -0400
commit2db938bee32e7469ca8ed9bfb3a05535f28c680d (patch)
tree7d175a486c2e02270839ba18da61455603c2205e /include
parent923e9a1399b620d063cd88537c64561bc3d5f905 (diff)
jbd: Refine commit writeout logic
Currently we write out all journal buffers in WRITE_SYNC mode. This improves performance for fsync heavy workloads but hinders performance when writes are mostly asynchronous, most noticably it slows down readers and users complain about slow desktop response etc. So submit writes as asynchronous in the normal case and only submit writes as WRITE_SYNC if we detect someone is waiting for current transaction commit. I've gathered some numbers to back this change. The first is the read latency test. It measures time to read 1 MB after several seconds of sleeping in presence of streaming writes. Top 10 times (out of 90) in us: Before After 2131586 697473 1709932 557487 1564598 535642 1480462 347573 1478579 323153 1408496 222181 1388960 181273 1329565 181070 1252486 172832 1223265 172278 Average: 619377 82180 So the improvement in both maximum and average latency is massive. I've measured fsync throughput by: fs_mark -n 100 -t 1 -s 16384 -d /mnt/fsync/ -S 1 -L 4 in presence of streaming reader. The numbers (fsyncs/s) are: Before After 9.9 6.3 6.8 6.0 6.3 6.2 5.8 6.1 So fsync performance seems unharmed by this change. Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'include')
-rw-r--r--include/linux/jbd.h15
-rw-r--r--include/trace/events/jbd.h24
2 files changed, 17 insertions, 22 deletions
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index d211732b9e99..f265682ae134 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -479,12 +479,6 @@ struct transaction_s
479 * How many handles used this transaction? [t_handle_lock] 479 * How many handles used this transaction? [t_handle_lock]
480 */ 480 */
481 int t_handle_count; 481 int t_handle_count;
482
483 /*
484 * This transaction is being forced and some process is
485 * waiting for it to finish.
486 */
487 unsigned int t_synchronous_commit:1;
488}; 482};
489 483
490/** 484/**
@@ -531,6 +525,8 @@ struct transaction_s
531 * transaction 525 * transaction
532 * @j_commit_request: Sequence number of the most recent transaction wanting 526 * @j_commit_request: Sequence number of the most recent transaction wanting
533 * commit 527 * commit
528 * @j_commit_waited: Sequence number of the most recent transaction someone
529 * is waiting for to commit.
534 * @j_uuid: Uuid of client object. 530 * @j_uuid: Uuid of client object.
535 * @j_task: Pointer to the current commit thread for this journal 531 * @j_task: Pointer to the current commit thread for this journal
536 * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a 532 * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a
@@ -696,6 +692,13 @@ struct journal_s
696 tid_t j_commit_request; 692 tid_t j_commit_request;
697 693
698 /* 694 /*
695 * Sequence number of the most recent transaction someone is waiting
696 * for to commit.
697 * [j_state_lock]
698 */
699 tid_t j_commit_waited;
700
701 /*
699 * Journal uuid: identifies the object (filesystem, LVM volume etc) 702 * Journal uuid: identifies the object (filesystem, LVM volume etc)
700 * backed by this journal. This will eventually be replaced by an array 703 * backed by this journal. This will eventually be replaced by an array
701 * of uuids, allowing us to index multiple devices within a single 704 * of uuids, allowing us to index multiple devices within a single
diff --git a/include/trace/events/jbd.h b/include/trace/events/jbd.h
index aff64d82d713..9305e1b5edc3 100644
--- a/include/trace/events/jbd.h
+++ b/include/trace/events/jbd.h
@@ -36,19 +36,17 @@ DECLARE_EVENT_CLASS(jbd_commit,
36 36
37 TP_STRUCT__entry( 37 TP_STRUCT__entry(
38 __field( dev_t, dev ) 38 __field( dev_t, dev )
39 __field( char, sync_commit )
40 __field( int, transaction ) 39 __field( int, transaction )
41 ), 40 ),
42 41
43 TP_fast_assign( 42 TP_fast_assign(
44 __entry->dev = journal->j_fs_dev->bd_dev; 43 __entry->dev = journal->j_fs_dev->bd_dev;
45 __entry->sync_commit = commit_transaction->t_synchronous_commit;
46 __entry->transaction = commit_transaction->t_tid; 44 __entry->transaction = commit_transaction->t_tid;
47 ), 45 ),
48 46
49 TP_printk("dev %d,%d transaction %d sync %d", 47 TP_printk("dev %d,%d transaction %d",
50 MAJOR(__entry->dev), MINOR(__entry->dev), 48 MAJOR(__entry->dev), MINOR(__entry->dev),
51 __entry->transaction, __entry->sync_commit) 49 __entry->transaction)
52); 50);
53 51
54DEFINE_EVENT(jbd_commit, jbd_start_commit, 52DEFINE_EVENT(jbd_commit, jbd_start_commit,
@@ -87,19 +85,17 @@ TRACE_EVENT(jbd_drop_transaction,
87 85
88 TP_STRUCT__entry( 86 TP_STRUCT__entry(
89 __field( dev_t, dev ) 87 __field( dev_t, dev )
90 __field( char, sync_commit )
91 __field( int, transaction ) 88 __field( int, transaction )
92 ), 89 ),
93 90
94 TP_fast_assign( 91 TP_fast_assign(
95 __entry->dev = journal->j_fs_dev->bd_dev; 92 __entry->dev = journal->j_fs_dev->bd_dev;
96 __entry->sync_commit = commit_transaction->t_synchronous_commit;
97 __entry->transaction = commit_transaction->t_tid; 93 __entry->transaction = commit_transaction->t_tid;
98 ), 94 ),
99 95
100 TP_printk("dev %d,%d transaction %d sync %d", 96 TP_printk("dev %d,%d transaction %d",
101 MAJOR(__entry->dev), MINOR(__entry->dev), 97 MAJOR(__entry->dev), MINOR(__entry->dev),
102 __entry->transaction, __entry->sync_commit) 98 __entry->transaction)
103); 99);
104 100
105TRACE_EVENT(jbd_end_commit, 101TRACE_EVENT(jbd_end_commit,
@@ -109,21 +105,19 @@ TRACE_EVENT(jbd_end_commit,
109 105
110 TP_STRUCT__entry( 106 TP_STRUCT__entry(
111 __field( dev_t, dev ) 107 __field( dev_t, dev )
112 __field( char, sync_commit )
113 __field( int, transaction ) 108 __field( int, transaction )
114 __field( int, head ) 109 __field( int, head )
115 ), 110 ),
116 111
117 TP_fast_assign( 112 TP_fast_assign(
118 __entry->dev = journal->j_fs_dev->bd_dev; 113 __entry->dev = journal->j_fs_dev->bd_dev;
119 __entry->sync_commit = commit_transaction->t_synchronous_commit;
120 __entry->transaction = commit_transaction->t_tid; 114 __entry->transaction = commit_transaction->t_tid;
121 __entry->head = journal->j_tail_sequence; 115 __entry->head = journal->j_tail_sequence;
122 ), 116 ),
123 117
124 TP_printk("dev %d,%d transaction %d sync %d head %d", 118 TP_printk("dev %d,%d transaction %d head %d",
125 MAJOR(__entry->dev), MINOR(__entry->dev), 119 MAJOR(__entry->dev), MINOR(__entry->dev),
126 __entry->transaction, __entry->sync_commit, __entry->head) 120 __entry->transaction, __entry->head)
127); 121);
128 122
129TRACE_EVENT(jbd_do_submit_data, 123TRACE_EVENT(jbd_do_submit_data,
@@ -133,19 +127,17 @@ TRACE_EVENT(jbd_do_submit_data,
133 127
134 TP_STRUCT__entry( 128 TP_STRUCT__entry(
135 __field( dev_t, dev ) 129 __field( dev_t, dev )
136 __field( char, sync_commit )
137 __field( int, transaction ) 130 __field( int, transaction )
138 ), 131 ),
139 132
140 TP_fast_assign( 133 TP_fast_assign(
141 __entry->dev = journal->j_fs_dev->bd_dev; 134 __entry->dev = journal->j_fs_dev->bd_dev;
142 __entry->sync_commit = commit_transaction->t_synchronous_commit;
143 __entry->transaction = commit_transaction->t_tid; 135 __entry->transaction = commit_transaction->t_tid;
144 ), 136 ),
145 137
146 TP_printk("dev %d,%d transaction %d sync %d", 138 TP_printk("dev %d,%d transaction %d",
147 MAJOR(__entry->dev), MINOR(__entry->dev), 139 MAJOR(__entry->dev), MINOR(__entry->dev),
148 __entry->transaction, __entry->sync_commit) 140 __entry->transaction)
149); 141);
150 142
151TRACE_EVENT(jbd_cleanup_journal_tail, 143TRACE_EVENT(jbd_cleanup_journal_tail,