aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/jbd/commit.c15
-rw-r--r--fs/jbd/transaction.c38
-rw-r--r--include/linux/jbd.h15
3 files changed, 63 insertions, 5 deletions
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 25719d902c51..3fbffb1ea714 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -306,6 +306,8 @@ void journal_commit_transaction(journal_t *journal)
306 int flags; 306 int flags;
307 int err; 307 int err;
308 unsigned long blocknr; 308 unsigned long blocknr;
309 ktime_t start_time;
310 u64 commit_time;
309 char *tagp = NULL; 311 char *tagp = NULL;
310 journal_header_t *header; 312 journal_header_t *header;
311 journal_block_tag_t *tag = NULL; 313 journal_block_tag_t *tag = NULL;
@@ -418,6 +420,7 @@ void journal_commit_transaction(journal_t *journal)
418 commit_transaction->t_state = T_FLUSH; 420 commit_transaction->t_state = T_FLUSH;
419 journal->j_committing_transaction = commit_transaction; 421 journal->j_committing_transaction = commit_transaction;
420 journal->j_running_transaction = NULL; 422 journal->j_running_transaction = NULL;
423 start_time = ktime_get();
421 commit_transaction->t_log_start = journal->j_head; 424 commit_transaction->t_log_start = journal->j_head;
422 wake_up(&journal->j_wait_transaction_locked); 425 wake_up(&journal->j_wait_transaction_locked);
423 spin_unlock(&journal->j_state_lock); 426 spin_unlock(&journal->j_state_lock);
@@ -913,6 +916,18 @@ restart_loop:
913 J_ASSERT(commit_transaction == journal->j_committing_transaction); 916 J_ASSERT(commit_transaction == journal->j_committing_transaction);
914 journal->j_commit_sequence = commit_transaction->t_tid; 917 journal->j_commit_sequence = commit_transaction->t_tid;
915 journal->j_committing_transaction = NULL; 918 journal->j_committing_transaction = NULL;
919 commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
920
921 /*
922 * weight the commit time higher than the average time so we don't
923 * react too strongly to vast changes in commit time
924 */
925 if (likely(journal->j_average_commit_time))
926 journal->j_average_commit_time = (commit_time*3 +
927 journal->j_average_commit_time) / 4;
928 else
929 journal->j_average_commit_time = commit_time;
930
916 spin_unlock(&journal->j_state_lock); 931 spin_unlock(&journal->j_state_lock);
917 932
918 if (commit_transaction->t_checkpoint_list == NULL && 933 if (commit_transaction->t_checkpoint_list == NULL &&
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 60d4c32c8808..b51fbd4b2913 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -25,6 +25,7 @@
25#include <linux/timer.h> 25#include <linux/timer.h>
26#include <linux/mm.h> 26#include <linux/mm.h>
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/hrtimer.h>
28 29
29static void __journal_temp_unlink_buffer(struct journal_head *jh); 30static void __journal_temp_unlink_buffer(struct journal_head *jh);
30 31
@@ -49,6 +50,7 @@ get_transaction(journal_t *journal, transaction_t *transaction)
49{ 50{
50 transaction->t_journal = journal; 51 transaction->t_journal = journal;
51 transaction->t_state = T_RUNNING; 52 transaction->t_state = T_RUNNING;
53 transaction->t_start_time = ktime_get();
52 transaction->t_tid = journal->j_transaction_sequence++; 54 transaction->t_tid = journal->j_transaction_sequence++;
53 transaction->t_expires = jiffies + journal->j_commit_interval; 55 transaction->t_expires = jiffies + journal->j_commit_interval;
54 spin_lock_init(&transaction->t_handle_lock); 56 spin_lock_init(&transaction->t_handle_lock);
@@ -1370,7 +1372,7 @@ int journal_stop(handle_t *handle)
1370{ 1372{
1371 transaction_t *transaction = handle->h_transaction; 1373 transaction_t *transaction = handle->h_transaction;
1372 journal_t *journal = transaction->t_journal; 1374 journal_t *journal = transaction->t_journal;
1373 int old_handle_count, err; 1375 int err;
1374 pid_t pid; 1376 pid_t pid;
1375 1377
1376 J_ASSERT(journal_current_handle() == handle); 1378 J_ASSERT(journal_current_handle() == handle);
@@ -1399,6 +1401,17 @@ int journal_stop(handle_t *handle)
1399 * on IO anyway. Speeds up many-threaded, many-dir operations 1401 * on IO anyway. Speeds up many-threaded, many-dir operations
1400 * by 30x or more... 1402 * by 30x or more...
1401 * 1403 *
1404 * We try and optimize the sleep time against what the underlying disk
1405 * can do, instead of having a static sleep time. This is usefull for
1406 * the case where our storage is so fast that it is more optimal to go
1407 * ahead and force a flush and wait for the transaction to be committed
1408 * than it is to wait for an arbitrary amount of time for new writers to
1409 * join the transaction. We acheive this by measuring how long it takes
1410 * to commit a transaction, and compare it with how long this
1411 * transaction has been running, and if run time < commit time then we
1412 * sleep for the delta and commit. This greatly helps super fast disks
1413 * that would see slowdowns as more threads started doing fsyncs.
1414 *
1402 * But don't do this if this process was the most recent one to 1415 * But don't do this if this process was the most recent one to
1403 * perform a synchronous write. We do this to detect the case where a 1416 * perform a synchronous write. We do this to detect the case where a
1404 * single process is doing a stream of sync writes. No point in waiting 1417 * single process is doing a stream of sync writes. No point in waiting
@@ -1406,11 +1419,26 @@ int journal_stop(handle_t *handle)
1406 */ 1419 */
1407 pid = current->pid; 1420 pid = current->pid;
1408 if (handle->h_sync && journal->j_last_sync_writer != pid) { 1421 if (handle->h_sync && journal->j_last_sync_writer != pid) {
1422 u64 commit_time, trans_time;
1423
1409 journal->j_last_sync_writer = pid; 1424 journal->j_last_sync_writer = pid;
1410 do { 1425
1411 old_handle_count = transaction->t_handle_count; 1426 spin_lock(&journal->j_state_lock);
1412 schedule_timeout_uninterruptible(1); 1427 commit_time = journal->j_average_commit_time;
1413 } while (old_handle_count != transaction->t_handle_count); 1428 spin_unlock(&journal->j_state_lock);
1429
1430 trans_time = ktime_to_ns(ktime_sub(ktime_get(),
1431 transaction->t_start_time));
1432
1433 commit_time = min_t(u64, commit_time,
1434 1000*jiffies_to_usecs(1));
1435
1436 if (trans_time < commit_time) {
1437 ktime_t expires = ktime_add_ns(ktime_get(),
1438 commit_time);
1439 set_current_state(TASK_UNINTERRUPTIBLE);
1440 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
1441 }
1414 } 1442 }
1415 1443
1416 current->journal_info = NULL; 1444 current->journal_info = NULL;
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 346e2b80be7d..6384b19efe64 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -543,6 +543,11 @@ struct transaction_s
543 unsigned long t_expires; 543 unsigned long t_expires;
544 544
545 /* 545 /*
546 * When this transaction started, in nanoseconds [no locking]
547 */
548 ktime_t t_start_time;
549
550 /*
546 * How many handles used this transaction? [t_handle_lock] 551 * How many handles used this transaction? [t_handle_lock]
547 */ 552 */
548 int t_handle_count; 553 int t_handle_count;
@@ -798,9 +803,19 @@ struct journal_s
798 struct buffer_head **j_wbuf; 803 struct buffer_head **j_wbuf;
799 int j_wbufsize; 804 int j_wbufsize;
800 805
806 /*
807 * this is the pid of the last person to run a synchronous operation
808 * through the journal.
809 */
801 pid_t j_last_sync_writer; 810 pid_t j_last_sync_writer;
802 811
803 /* 812 /*
813 * the average amount of time in nanoseconds it takes to commit a
814 * transaction to the disk. [j_state_lock]
815 */
816 u64 j_average_commit_time;
817
818 /*
804 * An opaque pointer to fs-private information. ext3 puts its 819 * An opaque pointer to fs-private information. ext3 puts its
805 * superblock pointer here 820 * superblock pointer here
806 */ 821 */