diff options
Diffstat (limited to 'fs/jbd/transaction.c')
| -rw-r--r-- | fs/jbd/transaction.c | 39 |
1 files changed, 33 insertions, 6 deletions
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 60d4c32c8808..e6a117431277 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <linux/timer.h> | 25 | #include <linux/timer.h> |
| 26 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
| 27 | #include <linux/highmem.h> | 27 | #include <linux/highmem.h> |
| 28 | #include <linux/hrtimer.h> | ||
| 28 | 29 | ||
| 29 | static void __journal_temp_unlink_buffer(struct journal_head *jh); | 30 | static void __journal_temp_unlink_buffer(struct journal_head *jh); |
| 30 | 31 | ||
| @@ -49,6 +50,7 @@ get_transaction(journal_t *journal, transaction_t *transaction) | |||
| 49 | { | 50 | { |
| 50 | transaction->t_journal = journal; | 51 | transaction->t_journal = journal; |
| 51 | transaction->t_state = T_RUNNING; | 52 | transaction->t_state = T_RUNNING; |
| 53 | transaction->t_start_time = ktime_get(); | ||
| 52 | transaction->t_tid = journal->j_transaction_sequence++; | 54 | transaction->t_tid = journal->j_transaction_sequence++; |
| 53 | transaction->t_expires = jiffies + journal->j_commit_interval; | 55 | transaction->t_expires = jiffies + journal->j_commit_interval; |
| 54 | spin_lock_init(&transaction->t_handle_lock); | 56 | spin_lock_init(&transaction->t_handle_lock); |
| @@ -752,7 +754,6 @@ out: | |||
| 752 | * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. | 754 | * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. |
| 753 | * @handle: transaction to add buffer modifications to | 755 | * @handle: transaction to add buffer modifications to |
| 754 | * @bh: bh to be used for metadata writes | 756 | * @bh: bh to be used for metadata writes |
| 755 | * @credits: variable that will receive credits for the buffer | ||
| 756 | * | 757 | * |
| 757 | * Returns an error code or 0 on success. | 758 | * Returns an error code or 0 on success. |
| 758 | * | 759 | * |
| @@ -1370,7 +1371,7 @@ int journal_stop(handle_t *handle) | |||
| 1370 | { | 1371 | { |
| 1371 | transaction_t *transaction = handle->h_transaction; | 1372 | transaction_t *transaction = handle->h_transaction; |
| 1372 | journal_t *journal = transaction->t_journal; | 1373 | journal_t *journal = transaction->t_journal; |
| 1373 | int old_handle_count, err; | 1374 | int err; |
| 1374 | pid_t pid; | 1375 | pid_t pid; |
| 1375 | 1376 | ||
| 1376 | J_ASSERT(journal_current_handle() == handle); | 1377 | J_ASSERT(journal_current_handle() == handle); |
| @@ -1399,6 +1400,17 @@ int journal_stop(handle_t *handle) | |||
| 1399 | * on IO anyway. Speeds up many-threaded, many-dir operations | 1400 | * on IO anyway. Speeds up many-threaded, many-dir operations |
| 1400 | * by 30x or more... | 1401 | * by 30x or more... |
| 1401 | * | 1402 | * |
| 1403 | * We try and optimize the sleep time against what the underlying disk | ||
| 1404 | * can do, instead of having a static sleep time. This is usefull for | ||
| 1405 | * the case where our storage is so fast that it is more optimal to go | ||
| 1406 | * ahead and force a flush and wait for the transaction to be committed | ||
| 1407 | * than it is to wait for an arbitrary amount of time for new writers to | ||
| 1408 | * join the transaction. We acheive this by measuring how long it takes | ||
| 1409 | * to commit a transaction, and compare it with how long this | ||
| 1410 | * transaction has been running, and if run time < commit time then we | ||
| 1411 | * sleep for the delta and commit. This greatly helps super fast disks | ||
| 1412 | * that would see slowdowns as more threads started doing fsyncs. | ||
| 1413 | * | ||
| 1402 | * But don't do this if this process was the most recent one to | 1414 | * But don't do this if this process was the most recent one to |
| 1403 | * perform a synchronous write. We do this to detect the case where a | 1415 | * perform a synchronous write. We do this to detect the case where a |
| 1404 | * single process is doing a stream of sync writes. No point in waiting | 1416 | * single process is doing a stream of sync writes. No point in waiting |
| @@ -1406,11 +1418,26 @@ int journal_stop(handle_t *handle) | |||
| 1406 | */ | 1418 | */ |
| 1407 | pid = current->pid; | 1419 | pid = current->pid; |
| 1408 | if (handle->h_sync && journal->j_last_sync_writer != pid) { | 1420 | if (handle->h_sync && journal->j_last_sync_writer != pid) { |
| 1421 | u64 commit_time, trans_time; | ||
| 1422 | |||
| 1409 | journal->j_last_sync_writer = pid; | 1423 | journal->j_last_sync_writer = pid; |
| 1410 | do { | 1424 | |
| 1411 | old_handle_count = transaction->t_handle_count; | 1425 | spin_lock(&journal->j_state_lock); |
| 1412 | schedule_timeout_uninterruptible(1); | 1426 | commit_time = journal->j_average_commit_time; |
| 1413 | } while (old_handle_count != transaction->t_handle_count); | 1427 | spin_unlock(&journal->j_state_lock); |
| 1428 | |||
| 1429 | trans_time = ktime_to_ns(ktime_sub(ktime_get(), | ||
| 1430 | transaction->t_start_time)); | ||
| 1431 | |||
| 1432 | commit_time = min_t(u64, commit_time, | ||
| 1433 | 1000*jiffies_to_usecs(1)); | ||
| 1434 | |||
| 1435 | if (trans_time < commit_time) { | ||
| 1436 | ktime_t expires = ktime_add_ns(ktime_get(), | ||
| 1437 | commit_time); | ||
| 1438 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 1439 | schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); | ||
| 1440 | } | ||
| 1414 | } | 1441 | } |
| 1415 | 1442 | ||
| 1416 | current->journal_info = NULL; | 1443 | current->journal_info = NULL; |
