diff options
-rw-r--r-- | fs/jbd/commit.c | 15 | ||||
-rw-r--r-- | fs/jbd/transaction.c | 38 | ||||
-rw-r--r-- | include/linux/jbd.h | 15 |
3 files changed, 63 insertions, 5 deletions
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 25719d902c51..3fbffb1ea714 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -306,6 +306,8 @@ void journal_commit_transaction(journal_t *journal) | |||
306 | int flags; | 306 | int flags; |
307 | int err; | 307 | int err; |
308 | unsigned long blocknr; | 308 | unsigned long blocknr; |
309 | ktime_t start_time; | ||
310 | u64 commit_time; | ||
309 | char *tagp = NULL; | 311 | char *tagp = NULL; |
310 | journal_header_t *header; | 312 | journal_header_t *header; |
311 | journal_block_tag_t *tag = NULL; | 313 | journal_block_tag_t *tag = NULL; |
@@ -418,6 +420,7 @@ void journal_commit_transaction(journal_t *journal) | |||
418 | commit_transaction->t_state = T_FLUSH; | 420 | commit_transaction->t_state = T_FLUSH; |
419 | journal->j_committing_transaction = commit_transaction; | 421 | journal->j_committing_transaction = commit_transaction; |
420 | journal->j_running_transaction = NULL; | 422 | journal->j_running_transaction = NULL; |
423 | start_time = ktime_get(); | ||
421 | commit_transaction->t_log_start = journal->j_head; | 424 | commit_transaction->t_log_start = journal->j_head; |
422 | wake_up(&journal->j_wait_transaction_locked); | 425 | wake_up(&journal->j_wait_transaction_locked); |
423 | spin_unlock(&journal->j_state_lock); | 426 | spin_unlock(&journal->j_state_lock); |
@@ -913,6 +916,18 @@ restart_loop: | |||
913 | J_ASSERT(commit_transaction == journal->j_committing_transaction); | 916 | J_ASSERT(commit_transaction == journal->j_committing_transaction); |
914 | journal->j_commit_sequence = commit_transaction->t_tid; | 917 | journal->j_commit_sequence = commit_transaction->t_tid; |
915 | journal->j_committing_transaction = NULL; | 918 | journal->j_committing_transaction = NULL; |
919 | commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); | ||
920 | |||
921 | /* | ||
922 | * weight the commit time higher than the average time so we don't | ||
923 | * react too strongly to vast changes in commit time | ||
924 | */ | ||
925 | if (likely(journal->j_average_commit_time)) | ||
926 | journal->j_average_commit_time = (commit_time*3 + | ||
927 | journal->j_average_commit_time) / 4; | ||
928 | else | ||
929 | journal->j_average_commit_time = commit_time; | ||
930 | |||
916 | spin_unlock(&journal->j_state_lock); | 931 | spin_unlock(&journal->j_state_lock); |
917 | 932 | ||
918 | if (commit_transaction->t_checkpoint_list == NULL && | 933 | if (commit_transaction->t_checkpoint_list == NULL && |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 60d4c32c8808..b51fbd4b2913 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/timer.h> | 25 | #include <linux/timer.h> |
26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/hrtimer.h> | ||
28 | 29 | ||
29 | static void __journal_temp_unlink_buffer(struct journal_head *jh); | 30 | static void __journal_temp_unlink_buffer(struct journal_head *jh); |
30 | 31 | ||
@@ -49,6 +50,7 @@ get_transaction(journal_t *journal, transaction_t *transaction) | |||
49 | { | 50 | { |
50 | transaction->t_journal = journal; | 51 | transaction->t_journal = journal; |
51 | transaction->t_state = T_RUNNING; | 52 | transaction->t_state = T_RUNNING; |
53 | transaction->t_start_time = ktime_get(); | ||
52 | transaction->t_tid = journal->j_transaction_sequence++; | 54 | transaction->t_tid = journal->j_transaction_sequence++; |
53 | transaction->t_expires = jiffies + journal->j_commit_interval; | 55 | transaction->t_expires = jiffies + journal->j_commit_interval; |
54 | spin_lock_init(&transaction->t_handle_lock); | 56 | spin_lock_init(&transaction->t_handle_lock); |
@@ -1370,7 +1372,7 @@ int journal_stop(handle_t *handle) | |||
1370 | { | 1372 | { |
1371 | transaction_t *transaction = handle->h_transaction; | 1373 | transaction_t *transaction = handle->h_transaction; |
1372 | journal_t *journal = transaction->t_journal; | 1374 | journal_t *journal = transaction->t_journal; |
1373 | int old_handle_count, err; | 1375 | int err; |
1374 | pid_t pid; | 1376 | pid_t pid; |
1375 | 1377 | ||
1376 | J_ASSERT(journal_current_handle() == handle); | 1378 | J_ASSERT(journal_current_handle() == handle); |
@@ -1399,6 +1401,17 @@ int journal_stop(handle_t *handle) | |||
1399 | * on IO anyway. Speeds up many-threaded, many-dir operations | 1401 | * on IO anyway. Speeds up many-threaded, many-dir operations |
1400 | * by 30x or more... | 1402 | * by 30x or more... |
1401 | * | 1403 | * |
1404 | * We try and optimize the sleep time against what the underlying disk | ||
1405 | * can do, instead of having a static sleep time. This is usefull for | ||
1406 | * the case where our storage is so fast that it is more optimal to go | ||
1407 | * ahead and force a flush and wait for the transaction to be committed | ||
1408 | * than it is to wait for an arbitrary amount of time for new writers to | ||
1409 | * join the transaction. We acheive this by measuring how long it takes | ||
1410 | * to commit a transaction, and compare it with how long this | ||
1411 | * transaction has been running, and if run time < commit time then we | ||
1412 | * sleep for the delta and commit. This greatly helps super fast disks | ||
1413 | * that would see slowdowns as more threads started doing fsyncs. | ||
1414 | * | ||
1402 | * But don't do this if this process was the most recent one to | 1415 | * But don't do this if this process was the most recent one to |
1403 | * perform a synchronous write. We do this to detect the case where a | 1416 | * perform a synchronous write. We do this to detect the case where a |
1404 | * single process is doing a stream of sync writes. No point in waiting | 1417 | * single process is doing a stream of sync writes. No point in waiting |
@@ -1406,11 +1419,26 @@ int journal_stop(handle_t *handle) | |||
1406 | */ | 1419 | */ |
1407 | pid = current->pid; | 1420 | pid = current->pid; |
1408 | if (handle->h_sync && journal->j_last_sync_writer != pid) { | 1421 | if (handle->h_sync && journal->j_last_sync_writer != pid) { |
1422 | u64 commit_time, trans_time; | ||
1423 | |||
1409 | journal->j_last_sync_writer = pid; | 1424 | journal->j_last_sync_writer = pid; |
1410 | do { | 1425 | |
1411 | old_handle_count = transaction->t_handle_count; | 1426 | spin_lock(&journal->j_state_lock); |
1412 | schedule_timeout_uninterruptible(1); | 1427 | commit_time = journal->j_average_commit_time; |
1413 | } while (old_handle_count != transaction->t_handle_count); | 1428 | spin_unlock(&journal->j_state_lock); |
1429 | |||
1430 | trans_time = ktime_to_ns(ktime_sub(ktime_get(), | ||
1431 | transaction->t_start_time)); | ||
1432 | |||
1433 | commit_time = min_t(u64, commit_time, | ||
1434 | 1000*jiffies_to_usecs(1)); | ||
1435 | |||
1436 | if (trans_time < commit_time) { | ||
1437 | ktime_t expires = ktime_add_ns(ktime_get(), | ||
1438 | commit_time); | ||
1439 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
1440 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | ||
1441 | } | ||
1414 | } | 1442 | } |
1415 | 1443 | ||
1416 | current->journal_info = NULL; | 1444 | current->journal_info = NULL; |
diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 346e2b80be7d..6384b19efe64 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h | |||
@@ -543,6 +543,11 @@ struct transaction_s | |||
543 | unsigned long t_expires; | 543 | unsigned long t_expires; |
544 | 544 | ||
545 | /* | 545 | /* |
546 | * When this transaction started, in nanoseconds [no locking] | ||
547 | */ | ||
548 | ktime_t t_start_time; | ||
549 | |||
550 | /* | ||
546 | * How many handles used this transaction? [t_handle_lock] | 551 | * How many handles used this transaction? [t_handle_lock] |
547 | */ | 552 | */ |
548 | int t_handle_count; | 553 | int t_handle_count; |
@@ -798,9 +803,19 @@ struct journal_s | |||
798 | struct buffer_head **j_wbuf; | 803 | struct buffer_head **j_wbuf; |
799 | int j_wbufsize; | 804 | int j_wbufsize; |
800 | 805 | ||
806 | /* | ||
807 | * this is the pid of the last person to run a synchronous operation | ||
808 | * through the journal. | ||
809 | */ | ||
801 | pid_t j_last_sync_writer; | 810 | pid_t j_last_sync_writer; |
802 | 811 | ||
803 | /* | 812 | /* |
813 | * the average amount of time in nanoseconds it takes to commit a | ||
814 | * transaction to the disk. [j_state_lock] | ||
815 | */ | ||
816 | u64 j_average_commit_time; | ||
817 | |||
818 | /* | ||
804 | * An opaque pointer to fs-private information. ext3 puts its | 819 | * An opaque pointer to fs-private information. ext3 puts its |
805 | * superblock pointer here | 820 | * superblock pointer here |
806 | */ | 821 | */ |