aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/jbd2/commit.c14
-rw-r--r--fs/jbd2/transaction.c58
-rw-r--r--include/linux/jbd2.h15
3 files changed, 73 insertions, 14 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6393fd0d804e..f22d1828ea85 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -355,6 +355,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
355 int flags; 355 int flags;
356 int err; 356 int err;
357 unsigned long long blocknr; 357 unsigned long long blocknr;
358 ktime_t start_time;
359 u64 commit_time;
358 char *tagp = NULL; 360 char *tagp = NULL;
359 journal_header_t *header; 361 journal_header_t *header;
360 journal_block_tag_t *tag = NULL; 362 journal_block_tag_t *tag = NULL;
@@ -481,6 +483,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
481 commit_transaction->t_state = T_FLUSH; 483 commit_transaction->t_state = T_FLUSH;
482 journal->j_committing_transaction = commit_transaction; 484 journal->j_committing_transaction = commit_transaction;
483 journal->j_running_transaction = NULL; 485 journal->j_running_transaction = NULL;
486 start_time = ktime_get();
484 commit_transaction->t_log_start = journal->j_head; 487 commit_transaction->t_log_start = journal->j_head;
485 wake_up(&journal->j_wait_transaction_locked); 488 wake_up(&journal->j_wait_transaction_locked);
486 spin_unlock(&journal->j_state_lock); 489 spin_unlock(&journal->j_state_lock);
@@ -995,6 +998,17 @@ restart_loop:
995 J_ASSERT(commit_transaction == journal->j_committing_transaction); 998 J_ASSERT(commit_transaction == journal->j_committing_transaction);
996 journal->j_commit_sequence = commit_transaction->t_tid; 999 journal->j_commit_sequence = commit_transaction->t_tid;
997 journal->j_committing_transaction = NULL; 1000 journal->j_committing_transaction = NULL;
1001 commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1002
1003 /*
1004 * weight the commit time higher than the average time so we don't
1005 * react too strongly to vast changes in the commit time
1006 */
1007 if (likely(journal->j_average_commit_time))
1008 journal->j_average_commit_time = (commit_time +
1009 journal->j_average_commit_time*3) / 4;
1010 else
1011 journal->j_average_commit_time = commit_time;
998 spin_unlock(&journal->j_state_lock); 1012 spin_unlock(&journal->j_state_lock);
999 1013
1000 if (journal->j_commit_callback) 1014 if (journal->j_commit_callback)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 39b7805a599a..13dcbc990f41 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -25,6 +25,7 @@
25#include <linux/timer.h> 25#include <linux/timer.h>
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/hrtimer.h>
28 29
29static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 30static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
30 31
@@ -48,6 +49,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
48{ 49{
49 transaction->t_journal = journal; 50 transaction->t_journal = journal;
50 transaction->t_state = T_RUNNING; 51 transaction->t_state = T_RUNNING;
52 transaction->t_start_time = ktime_get();
51 transaction->t_tid = journal->j_transaction_sequence++; 53 transaction->t_tid = journal->j_transaction_sequence++;
52 transaction->t_expires = jiffies + journal->j_commit_interval; 54 transaction->t_expires = jiffies + journal->j_commit_interval;
53 spin_lock_init(&transaction->t_handle_lock); 55 spin_lock_init(&transaction->t_handle_lock);
@@ -1193,7 +1195,7 @@ int jbd2_journal_stop(handle_t *handle)
1193{ 1195{
1194 transaction_t *transaction = handle->h_transaction; 1196 transaction_t *transaction = handle->h_transaction;
1195 journal_t *journal = transaction->t_journal; 1197 journal_t *journal = transaction->t_journal;
1196 int old_handle_count, err; 1198 int err;
1197 pid_t pid; 1199 pid_t pid;
1198 1200
1199 J_ASSERT(journal_current_handle() == handle); 1201 J_ASSERT(journal_current_handle() == handle);
@@ -1216,24 +1218,52 @@ int jbd2_journal_stop(handle_t *handle)
1216 /* 1218 /*
1217 * Implement synchronous transaction batching. If the handle 1219 * Implement synchronous transaction batching. If the handle
1218 * was synchronous, don't force a commit immediately. Let's 1220 * was synchronous, don't force a commit immediately. Let's
1219 * yield and let another thread piggyback onto this transaction. 1221 * yield and let another thread piggyback onto this
1220 * Keep doing that while new threads continue to arrive. 1222 * transaction. Keep doing that while new threads continue to
1221 * It doesn't cost much - we're about to run a commit and sleep 1223 * arrive. It doesn't cost much - we're about to run a commit
1222 * on IO anyway. Speeds up many-threaded, many-dir operations 1224 * and sleep on IO anyway. Speeds up many-threaded, many-dir
1223 * by 30x or more... 1225 * operations by 30x or more...
1226 *
1227 * We try and optimize the sleep time against what the
1228 * underlying disk can do, instead of having a static sleep
1229 * time. This is useful for the case where our storage is so
1230 * fast that it is more optimal to go ahead and force a flush
1231 * and wait for the transaction to be committed than it is to
1232 * wait for an arbitrary amount of time for new writers to
1233 * join the transaction. We achieve this by measuring how
1234 * long it takes to commit a transaction, and compare it with
1235 * how long this transaction has been running, and if run time
1236 * < commit time then we sleep for the delta and commit. This
1237 * greatly helps super fast disks that would see slowdowns as
1238 * more threads started doing fsyncs.
1224 * 1239 *
1225 * But don't do this if this process was the most recent one to 1240 * But don't do this if this process was the most recent one
1226 * perform a synchronous write. We do this to detect the case where a 1241 * to perform a synchronous write. We do this to detect the
1227 * single process is doing a stream of sync writes. No point in waiting 1242 * case where a single process is doing a stream of sync
1228 * for joiners in that case. 1243 * writes. No point in waiting for joiners in that case.
1229 */ 1244 */
1230 pid = current->pid; 1245 pid = current->pid;
1231 if (handle->h_sync && journal->j_last_sync_writer != pid) { 1246 if (handle->h_sync && journal->j_last_sync_writer != pid) {
1247 u64 commit_time, trans_time;
1248
1232 journal->j_last_sync_writer = pid; 1249 journal->j_last_sync_writer = pid;
1233 do { 1250
1234 old_handle_count = transaction->t_handle_count; 1251 spin_lock(&journal->j_state_lock);
1235 schedule_timeout_uninterruptible(1); 1252 commit_time = journal->j_average_commit_time;
1236 } while (old_handle_count != transaction->t_handle_count); 1253 spin_unlock(&journal->j_state_lock);
1254
1255 trans_time = ktime_to_ns(ktime_sub(ktime_get(),
1256 transaction->t_start_time));
1257
1258 commit_time = min_t(u64, commit_time,
1259 1000*jiffies_to_usecs(1));
1260
1261 if (trans_time < commit_time) {
1262 ktime_t expires = ktime_add_ns(ktime_get(),
1263 commit_time);
1264 set_current_state(TASK_UNINTERRUPTIBLE);
1265 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
1266 }
1237 } 1267 }
1238 1268
1239 current->journal_info = NULL; 1269 current->journal_info = NULL;
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f36645745489..ab8cef130c28 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -638,6 +638,11 @@ struct transaction_s
638 unsigned long t_expires; 638 unsigned long t_expires;
639 639
640 /* 640 /*
641 * When this transaction started, in nanoseconds [no locking]
642 */
643 ktime_t t_start_time;
644
645 /*
641 * How many handles used this transaction? [t_handle_lock] 646 * How many handles used this transaction? [t_handle_lock]
642 */ 647 */
643 int t_handle_count; 648 int t_handle_count;
@@ -939,8 +944,18 @@ struct journal_s
939 struct buffer_head **j_wbuf; 944 struct buffer_head **j_wbuf;
940 int j_wbufsize; 945 int j_wbufsize;
941 946
947 /*
948 * this is the pid of hte last person to run a synchronous operation
949 * through the journal
950 */
942 pid_t j_last_sync_writer; 951 pid_t j_last_sync_writer;
943 952
953 /*
954 * the average amount of time in nanoseconds it takes to commit a
955 * transaction to disk. [j_state_lock]
956 */
957 u64 j_average_commit_time;
958
944 /* This function is called when a transaction is closed */ 959 /* This function is called when a transaction is closed */
945 void (*j_commit_callback)(journal_t *, 960 void (*j_commit_callback)(journal_t *,
946 transaction_t *); 961 transaction_t *);