diff options
Diffstat (limited to 'fs/jbd')
-rw-r--r-- | fs/jbd/commit.c | 15 | ||||
-rw-r--r-- | fs/jbd/transaction.c | 39 |
2 files changed, 48 insertions, 6 deletions
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 25719d902c51..3fbffb1ea714 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -306,6 +306,8 @@ void journal_commit_transaction(journal_t *journal) | |||
306 | int flags; | 306 | int flags; |
307 | int err; | 307 | int err; |
308 | unsigned long blocknr; | 308 | unsigned long blocknr; |
309 | ktime_t start_time; | ||
310 | u64 commit_time; | ||
309 | char *tagp = NULL; | 311 | char *tagp = NULL; |
310 | journal_header_t *header; | 312 | journal_header_t *header; |
311 | journal_block_tag_t *tag = NULL; | 313 | journal_block_tag_t *tag = NULL; |
@@ -418,6 +420,7 @@ void journal_commit_transaction(journal_t *journal) | |||
418 | commit_transaction->t_state = T_FLUSH; | 420 | commit_transaction->t_state = T_FLUSH; |
419 | journal->j_committing_transaction = commit_transaction; | 421 | journal->j_committing_transaction = commit_transaction; |
420 | journal->j_running_transaction = NULL; | 422 | journal->j_running_transaction = NULL; |
423 | start_time = ktime_get(); | ||
421 | commit_transaction->t_log_start = journal->j_head; | 424 | commit_transaction->t_log_start = journal->j_head; |
422 | wake_up(&journal->j_wait_transaction_locked); | 425 | wake_up(&journal->j_wait_transaction_locked); |
423 | spin_unlock(&journal->j_state_lock); | 426 | spin_unlock(&journal->j_state_lock); |
@@ -913,6 +916,18 @@ restart_loop: | |||
913 | J_ASSERT(commit_transaction == journal->j_committing_transaction); | 916 | J_ASSERT(commit_transaction == journal->j_committing_transaction); |
914 | journal->j_commit_sequence = commit_transaction->t_tid; | 917 | journal->j_commit_sequence = commit_transaction->t_tid; |
915 | journal->j_committing_transaction = NULL; | 918 | journal->j_committing_transaction = NULL; |
919 | commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); | ||
920 | |||
921 | /* | ||
922 | * weight the commit time higher than the average time so we don't | ||
923 | * react too strongly to vast changes in commit time | ||
924 | */ | ||
925 | if (likely(journal->j_average_commit_time)) | ||
926 | journal->j_average_commit_time = (commit_time*3 + | ||
927 | journal->j_average_commit_time) / 4; | ||
928 | else | ||
929 | journal->j_average_commit_time = commit_time; | ||
930 | |||
916 | spin_unlock(&journal->j_state_lock); | 931 | spin_unlock(&journal->j_state_lock); |
917 | 932 | ||
918 | if (commit_transaction->t_checkpoint_list == NULL && | 933 | if (commit_transaction->t_checkpoint_list == NULL && |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 60d4c32c8808..e6a117431277 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/timer.h> | 25 | #include <linux/timer.h> |
26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
28 | #include <linux/hrtimer.h> | ||
28 | 29 | ||
29 | static void __journal_temp_unlink_buffer(struct journal_head *jh); | 30 | static void __journal_temp_unlink_buffer(struct journal_head *jh); |
30 | 31 | ||
@@ -49,6 +50,7 @@ get_transaction(journal_t *journal, transaction_t *transaction) | |||
49 | { | 50 | { |
50 | transaction->t_journal = journal; | 51 | transaction->t_journal = journal; |
51 | transaction->t_state = T_RUNNING; | 52 | transaction->t_state = T_RUNNING; |
53 | transaction->t_start_time = ktime_get(); | ||
52 | transaction->t_tid = journal->j_transaction_sequence++; | 54 | transaction->t_tid = journal->j_transaction_sequence++; |
53 | transaction->t_expires = jiffies + journal->j_commit_interval; | 55 | transaction->t_expires = jiffies + journal->j_commit_interval; |
54 | spin_lock_init(&transaction->t_handle_lock); | 56 | spin_lock_init(&transaction->t_handle_lock); |
@@ -752,7 +754,6 @@ out: | |||
752 | * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. | 754 | * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. |
753 | * @handle: transaction to add buffer modifications to | 755 | * @handle: transaction to add buffer modifications to |
754 | * @bh: bh to be used for metadata writes | 756 | * @bh: bh to be used for metadata writes |
755 | * @credits: variable that will receive credits for the buffer | ||
756 | * | 757 | * |
757 | * Returns an error code or 0 on success. | 758 | * Returns an error code or 0 on success. |
758 | * | 759 | * |
@@ -1370,7 +1371,7 @@ int journal_stop(handle_t *handle) | |||
1370 | { | 1371 | { |
1371 | transaction_t *transaction = handle->h_transaction; | 1372 | transaction_t *transaction = handle->h_transaction; |
1372 | journal_t *journal = transaction->t_journal; | 1373 | journal_t *journal = transaction->t_journal; |
1373 | int old_handle_count, err; | 1374 | int err; |
1374 | pid_t pid; | 1375 | pid_t pid; |
1375 | 1376 | ||
1376 | J_ASSERT(journal_current_handle() == handle); | 1377 | J_ASSERT(journal_current_handle() == handle); |
@@ -1399,6 +1400,17 @@ int journal_stop(handle_t *handle) | |||
1399 | * on IO anyway. Speeds up many-threaded, many-dir operations | 1400 | * on IO anyway. Speeds up many-threaded, many-dir operations |
1400 | * by 30x or more... | 1401 | * by 30x or more... |
1401 | * | 1402 | * |
1403 | * We try and optimize the sleep time against what the underlying disk | ||
1404 | * can do, instead of having a static sleep time. This is usefull for | ||
1405 | * the case where our storage is so fast that it is more optimal to go | ||
1406 | * ahead and force a flush and wait for the transaction to be committed | ||
1407 | * than it is to wait for an arbitrary amount of time for new writers to | ||
1408 | * join the transaction. We acheive this by measuring how long it takes | ||
1409 | * to commit a transaction, and compare it with how long this | ||
1410 | * transaction has been running, and if run time < commit time then we | ||
1411 | * sleep for the delta and commit. This greatly helps super fast disks | ||
1412 | * that would see slowdowns as more threads started doing fsyncs. | ||
1413 | * | ||
1402 | * But don't do this if this process was the most recent one to | 1414 | * But don't do this if this process was the most recent one to |
1403 | * perform a synchronous write. We do this to detect the case where a | 1415 | * perform a synchronous write. We do this to detect the case where a |
1404 | * single process is doing a stream of sync writes. No point in waiting | 1416 | * single process is doing a stream of sync writes. No point in waiting |
@@ -1406,11 +1418,26 @@ int journal_stop(handle_t *handle) | |||
1406 | */ | 1418 | */ |
1407 | pid = current->pid; | 1419 | pid = current->pid; |
1408 | if (handle->h_sync && journal->j_last_sync_writer != pid) { | 1420 | if (handle->h_sync && journal->j_last_sync_writer != pid) { |
1421 | u64 commit_time, trans_time; | ||
1422 | |||
1409 | journal->j_last_sync_writer = pid; | 1423 | journal->j_last_sync_writer = pid; |
1410 | do { | 1424 | |
1411 | old_handle_count = transaction->t_handle_count; | 1425 | spin_lock(&journal->j_state_lock); |
1412 | schedule_timeout_uninterruptible(1); | 1426 | commit_time = journal->j_average_commit_time; |
1413 | } while (old_handle_count != transaction->t_handle_count); | 1427 | spin_unlock(&journal->j_state_lock); |
1428 | |||
1429 | trans_time = ktime_to_ns(ktime_sub(ktime_get(), | ||
1430 | transaction->t_start_time)); | ||
1431 | |||
1432 | commit_time = min_t(u64, commit_time, | ||
1433 | 1000*jiffies_to_usecs(1)); | ||
1434 | |||
1435 | if (trans_time < commit_time) { | ||
1436 | ktime_t expires = ktime_add_ns(ktime_get(), | ||
1437 | commit_time); | ||
1438 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
1439 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | ||
1440 | } | ||
1414 | } | 1441 | } |
1415 | 1442 | ||
1416 | current->journal_info = NULL; | 1443 | current->journal_info = NULL; |